1 /* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #define SWSMU_CODE_LAYER_L2 25 26 #include <linux/firmware.h> 27 #include "amdgpu.h" 28 #include "amdgpu_smu.h" 29 #include "atomfirmware.h" 30 #include "amdgpu_atomfirmware.h" 31 #include "amdgpu_atombios.h" 32 #include "smu_v13_0_6_pmfw.h" 33 #include "smu13_driver_if_v13_0_6.h" 34 #include "smu_v13_0_6_ppsmc.h" 35 #include "soc15_common.h" 36 #include "atom.h" 37 #include "power_state.h" 38 #include "smu_v13_0.h" 39 #include "smu_v13_0_6_ppt.h" 40 #include "nbio/nbio_7_4_offset.h" 41 #include "nbio/nbio_7_4_sh_mask.h" 42 #include "thm/thm_11_0_2_offset.h" 43 #include "thm/thm_11_0_2_sh_mask.h" 44 #include "amdgpu_xgmi.h" 45 #include <linux/pci.h> 46 #include "amdgpu_ras.h" 47 #include "amdgpu_mca.h" 48 #include "amdgpu_aca.h" 49 #include "smu_cmn.h" 50 #include "mp/mp_13_0_6_offset.h" 51 #include "mp/mp_13_0_6_sh_mask.h" 52 #include "umc_v12_0.h" 53 54 #undef MP1_Public 55 #undef smnMP1_FIRMWARE_FLAGS 56 57 /* TODO: Check final register offsets */ 58 #define MP1_Public 0x03b00000 59 #define smnMP1_FIRMWARE_FLAGS 0x3010028 60 /* 61 * DO NOT use these for err/warn/info/debug messages. 62 * Use dev_err, dev_warn, dev_info and dev_dbg instead. 63 * They are more MGPU friendly. 64 */ 65 #undef pr_err 66 #undef pr_warn 67 #undef pr_info 68 #undef pr_debug 69 70 MODULE_FIRMWARE("amdgpu/smu_13_0_6.bin"); 71 MODULE_FIRMWARE("amdgpu/smu_13_0_14.bin"); 72 73 #define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c)) 74 75 #define SMU_13_0_6_FEA_MAP(smu_feature, smu_13_0_6_feature) \ 76 [smu_feature] = { 1, (smu_13_0_6_feature) } 77 78 #define FEATURE_MASK(feature) (1ULL << feature) 79 static const struct smu_feature_bits smu_v13_0_6_dpm_features = { 80 .bits = { 81 SMU_FEATURE_BIT_INIT(FEATURE_DATA_CALCULATION), 82 SMU_FEATURE_BIT_INIT(FEATURE_DPM_GFXCLK), 83 SMU_FEATURE_BIT_INIT(FEATURE_DPM_UCLK), 84 SMU_FEATURE_BIT_INIT(FEATURE_DPM_SOCCLK), 85 SMU_FEATURE_BIT_INIT(FEATURE_DPM_FCLK), 86 SMU_FEATURE_BIT_INIT(FEATURE_DPM_LCLK), 87 SMU_FEATURE_BIT_INIT(FEATURE_DPM_XGMI), 88 SMU_FEATURE_BIT_INIT(FEATURE_DPM_VCN) 89 } 90 }; 91 92 #define smnPCIE_ESM_CTRL 0x93D0 93 #define smnPCIE_LC_LINK_WIDTH_CNTL 0x1a340288 94 #define PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK 0x00000070L 95 #define PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT 0x4 96 #define MAX_LINK_WIDTH 6 97 98 #define smnPCIE_LC_SPEED_CNTL 0x1a340290 99 #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK 0xE0 100 #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0x5 101 #define LINK_SPEED_MAX 4 102 #define MCA_BANK_IPID(_ip, _hwid, _type) \ 103 [AMDGPU_MCA_IP_##_ip] = { .hwid = _hwid, .mcatype = _type, } 104 105 struct mca_bank_ipid { 106 enum amdgpu_mca_ip ip; 107 uint16_t hwid; 108 uint16_t mcatype; 109 }; 110 111 struct mca_ras_info { 112 enum amdgpu_ras_block blkid; 113 enum amdgpu_mca_ip ip; 114 int *err_code_array; 115 int err_code_count; 116 int (*get_err_count)(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 117 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count); 118 bool (*bank_is_valid)(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 119 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry); 120 }; 121 122 #define P2S_TABLE_ID_A 0x50325341 123 #define P2S_TABLE_ID_X 0x50325358 124 #define P2S_TABLE_ID_3 0x50325303 125 126 // clang-format off 127 static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = { 128 MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0), 129 MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), 130 MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion, 1), 131 MSG_MAP(EnableAllSmuFeatures, PPSMC_MSG_EnableAllSmuFeatures, 0), 132 MSG_MAP(DisableAllSmuFeatures, PPSMC_MSG_DisableAllSmuFeatures, 0), 133 MSG_MAP(RequestI2cTransaction, PPSMC_MSG_RequestI2cTransaction, 0), 134 MSG_MAP(GetMetricsTable, PPSMC_MSG_GetMetricsTable, 1), 135 MSG_MAP(GetMetricsVersion, PPSMC_MSG_GetMetricsVersion, 1), 136 MSG_MAP(GetEnabledSmuFeaturesHigh, PPSMC_MSG_GetEnabledSmuFeaturesHigh, 1), 137 MSG_MAP(GetEnabledSmuFeaturesLow, PPSMC_MSG_GetEnabledSmuFeaturesLow, 1), 138 MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverDramAddrHigh, 1), 139 MSG_MAP(SetDriverDramAddrLow, PPSMC_MSG_SetDriverDramAddrLow, 1), 140 MSG_MAP(SetToolsDramAddrHigh, PPSMC_MSG_SetToolsDramAddrHigh, 0), 141 MSG_MAP(SetToolsDramAddrLow, PPSMC_MSG_SetToolsDramAddrLow, 0), 142 MSG_MAP(SetSoftMinByFreq, PPSMC_MSG_SetSoftMinByFreq, 0), 143 MSG_MAP(SetSoftMaxByFreq, PPSMC_MSG_SetSoftMaxByFreq, 1), 144 MSG_MAP(GetMinDpmFreq, PPSMC_MSG_GetMinDpmFreq, 1), 145 MSG_MAP(GetMaxDpmFreq, PPSMC_MSG_GetMaxDpmFreq, 1), 146 MSG_MAP(GetDpmFreqByIndex, PPSMC_MSG_GetDpmFreqByIndex, 1), 147 MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 1), 148 MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 1), 149 MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI | SMU_MSG_NO_PRECHECK), 150 MSG_MAP(DramLogSetDramAddrHigh, PPSMC_MSG_DramLogSetDramAddrHigh, 0), 151 MSG_MAP(DramLogSetDramAddrLow, PPSMC_MSG_DramLogSetDramAddrLow, 0), 152 MSG_MAP(DramLogSetDramSize, PPSMC_MSG_DramLogSetDramSize, 0), 153 MSG_MAP(GetDebugData, PPSMC_MSG_GetDebugData, 0), 154 MSG_MAP(SetNumBadHbmPagesRetired, PPSMC_MSG_SetNumBadHbmPagesRetired, 0), 155 MSG_MAP(DFCstateControl, PPSMC_MSG_DFCstateControl, 0), 156 MSG_MAP(GetGmiPwrDnHyst, PPSMC_MSG_GetGmiPwrDnHyst, 0), 157 MSG_MAP(SetGmiPwrDnHyst, PPSMC_MSG_SetGmiPwrDnHyst, 0), 158 MSG_MAP(GmiPwrDnControl, PPSMC_MSG_GmiPwrDnControl, 0), 159 MSG_MAP(EnterGfxoff, PPSMC_MSG_EnterGfxoff, 0), 160 MSG_MAP(ExitGfxoff, PPSMC_MSG_ExitGfxoff, 0), 161 MSG_MAP(EnableDeterminism, PPSMC_MSG_EnableDeterminism, 0), 162 MSG_MAP(DisableDeterminism, PPSMC_MSG_DisableDeterminism, 0), 163 MSG_MAP(GfxDriverResetRecovery, PPSMC_MSG_GfxDriverResetRecovery, 0), 164 MSG_MAP(GetMinGfxclkFrequency, PPSMC_MSG_GetMinGfxDpmFreq, 1), 165 MSG_MAP(GetMaxGfxclkFrequency, PPSMC_MSG_GetMaxGfxDpmFreq, 1), 166 MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxClk, 1), 167 MSG_MAP(SetSoftMaxGfxClk, PPSMC_MSG_SetSoftMaxGfxClk, 1), 168 MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareForDriverUnload, 0), 169 MSG_MAP(GetCTFLimit, PPSMC_MSG_GetCTFLimit, 0), 170 MSG_MAP(GetThermalLimit, PPSMC_MSG_ReadThrottlerLimit, 0), 171 MSG_MAP(ClearMcaOnRead, PPSMC_MSG_ClearMcaOnRead, 0), 172 MSG_MAP(QueryValidMcaCount, PPSMC_MSG_QueryValidMcaCount, SMU_MSG_RAS_PRI), 173 MSG_MAP(QueryValidMcaCeCount, PPSMC_MSG_QueryValidMcaCeCount, SMU_MSG_RAS_PRI), 174 MSG_MAP(McaBankDumpDW, PPSMC_MSG_McaBankDumpDW, SMU_MSG_RAS_PRI), 175 MSG_MAP(McaBankCeDumpDW, PPSMC_MSG_McaBankCeDumpDW, SMU_MSG_RAS_PRI), 176 MSG_MAP(SelectPLPDMode, PPSMC_MSG_SelectPLPDMode, 0), 177 MSG_MAP(RmaDueToBadPageThreshold, PPSMC_MSG_RmaDueToBadPageThreshold, 0), 178 MSG_MAP(SetThrottlingPolicy, PPSMC_MSG_SetThrottlingPolicy, 0), 179 MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0), 180 MSG_MAP(ResetVCN, PPSMC_MSG_ResetVCN, 0), 181 MSG_MAP(GetStaticMetricsTable, PPSMC_MSG_GetStaticMetricsTable, 1), 182 }; 183 184 // clang-format on 185 static const struct cmn2asic_mapping smu_v13_0_6_clk_map[SMU_CLK_COUNT] = { 186 CLK_MAP(SOCCLK, PPCLK_SOCCLK), 187 CLK_MAP(FCLK, PPCLK_FCLK), 188 CLK_MAP(UCLK, PPCLK_UCLK), 189 CLK_MAP(MCLK, PPCLK_UCLK), 190 CLK_MAP(DCLK, PPCLK_DCLK), 191 CLK_MAP(VCLK, PPCLK_VCLK), 192 CLK_MAP(LCLK, PPCLK_LCLK), 193 }; 194 195 static const struct cmn2asic_mapping smu_v13_0_6_feature_mask_map[SMU_FEATURE_COUNT] = { 196 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DATA_CALCULATIONS_BIT, FEATURE_DATA_CALCULATION), 197 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_GFXCLK_BIT, FEATURE_DPM_GFXCLK), 198 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_UCLK_BIT, FEATURE_DPM_UCLK), 199 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_SOCCLK_BIT, FEATURE_DPM_SOCCLK), 200 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_FCLK_BIT, FEATURE_DPM_FCLK), 201 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_LCLK_BIT, FEATURE_DPM_LCLK), 202 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_VCLK_BIT, FEATURE_DPM_VCN), 203 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_DCLK_BIT, FEATURE_DPM_VCN), 204 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_XGMI_BIT, FEATURE_DPM_XGMI), 205 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_GFXCLK_BIT, FEATURE_DS_GFXCLK), 206 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_SOCCLK_BIT, FEATURE_DS_SOCCLK), 207 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_LCLK_BIT, FEATURE_DS_LCLK), 208 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_FCLK_BIT, FEATURE_DS_FCLK), 209 SMU_13_0_6_FEA_MAP(SMU_FEATURE_VCN_DPM_BIT, FEATURE_DPM_VCN), 210 SMU_13_0_6_FEA_MAP(SMU_FEATURE_PPT_BIT, FEATURE_PPT), 211 SMU_13_0_6_FEA_MAP(SMU_FEATURE_TDC_BIT, FEATURE_TDC), 212 SMU_13_0_6_FEA_MAP(SMU_FEATURE_APCC_DFLL_BIT, FEATURE_APCC_DFLL), 213 SMU_13_0_6_FEA_MAP(SMU_FEATURE_MP1_CG_BIT, FEATURE_SMU_CG), 214 SMU_13_0_6_FEA_MAP(SMU_FEATURE_GFXOFF_BIT, FEATURE_GFXOFF), 215 SMU_13_0_6_FEA_MAP(SMU_FEATURE_FW_CTF_BIT, FEATURE_FW_CTF), 216 SMU_13_0_6_FEA_MAP(SMU_FEATURE_THERMAL_BIT, FEATURE_THERMAL), 217 SMU_13_0_6_FEA_MAP(SMU_FEATURE_XGMI_PER_LINK_PWR_DWN_BIT, FEATURE_XGMI_PER_LINK_PWR_DOWN), 218 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DF_CSTATE_BIT, FEATURE_DF_CSTATE), 219 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_VCN_BIT, FEATURE_DS_VCN), 220 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_MP1CLK_BIT, FEATURE_DS_MP1CLK), 221 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_MPIOCLK_BIT, FEATURE_DS_MPIOCLK), 222 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_MP0CLK_BIT, FEATURE_DS_MP0CLK), 223 }; 224 225 #define TABLE_PMSTATUSLOG 0 226 #define TABLE_SMU_METRICS 1 227 #define TABLE_I2C_COMMANDS 2 228 #define TABLE_COUNT 3 229 230 static const struct cmn2asic_mapping smu_v13_0_6_table_map[SMU_TABLE_COUNT] = { 231 TAB_MAP(PMSTATUSLOG), 232 TAB_MAP(SMU_METRICS), 233 TAB_MAP(I2C_COMMANDS), 234 }; 235 236 static const uint8_t smu_v13_0_6_throttler_map[] = { 237 [THROTTLER_PPT_BIT] = (SMU_THROTTLER_PPT0_BIT), 238 [THROTTLER_THERMAL_SOCKET_BIT] = (SMU_THROTTLER_TEMP_GPU_BIT), 239 [THROTTLER_THERMAL_HBM_BIT] = (SMU_THROTTLER_TEMP_MEM_BIT), 240 [THROTTLER_THERMAL_VR_BIT] = (SMU_THROTTLER_TEMP_VR_GFX_BIT), 241 [THROTTLER_PROCHOT_BIT] = (SMU_THROTTLER_PROCHOT_GFX_BIT), 242 }; 243 244 #define GET_GPU_METRIC_FIELD(field, version) ((version == METRICS_VERSION_V0) ?\ 245 (metrics_v0->field) : (metrics_v2->field)) 246 #define GET_METRIC_FIELD(field, version) ((version == METRICS_VERSION_V1) ?\ 247 (metrics_v1->field) : GET_GPU_METRIC_FIELD(field, version)) 248 #define METRICS_TABLE_SIZE (max3(sizeof(MetricsTableV0_t),\ 249 sizeof(MetricsTableV1_t),\ 250 sizeof(MetricsTableV2_t))) 251 252 struct smu_v13_0_6_dpm_map { 253 enum smu_clk_type clk_type; 254 uint32_t feature_num; 255 struct smu_dpm_table *dpm_table; 256 uint32_t *freq_table; 257 }; 258 259 static inline int smu_v13_0_6_get_metrics_version(struct smu_context *smu) 260 { 261 if ((smu->adev->flags & AMD_IS_APU) && 262 smu->smc_fw_version <= 0x4556900) 263 return METRICS_VERSION_V1; 264 else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == 265 IP_VERSION(13, 0, 12)) 266 return METRICS_VERSION_V2; 267 268 return METRICS_VERSION_V0; 269 } 270 271 static inline void smu_v13_0_6_cap_set(struct smu_context *smu, 272 enum smu_v13_0_6_caps cap) 273 { 274 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 275 276 dpm_context->caps |= BIT_ULL(cap); 277 } 278 279 static inline void smu_v13_0_6_cap_clear(struct smu_context *smu, 280 enum smu_v13_0_6_caps cap) 281 { 282 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 283 284 dpm_context->caps &= ~BIT_ULL(cap); 285 } 286 287 bool smu_v13_0_6_cap_supported(struct smu_context *smu, 288 enum smu_v13_0_6_caps cap) 289 { 290 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 291 292 return !!(dpm_context->caps & BIT_ULL(cap)); 293 } 294 295 static void smu_v13_0_14_init_caps(struct smu_context *smu) 296 { 297 enum smu_v13_0_6_caps default_cap_list[] = { SMU_CAP(DPM), 298 SMU_CAP(SET_UCLK_MAX), 299 SMU_CAP(DPM_POLICY), 300 SMU_CAP(PCIE_METRICS), 301 SMU_CAP(CTF_LIMIT), 302 SMU_CAP(MCA_DEBUG_MODE), 303 SMU_CAP(RMA_MSG), 304 SMU_CAP(ACA_SYND) }; 305 uint32_t fw_ver = smu->smc_fw_version; 306 307 for (int i = 0; i < ARRAY_SIZE(default_cap_list); i++) 308 smu_v13_0_6_cap_set(smu, default_cap_list[i]); 309 310 if (fw_ver >= 0x05550E00) 311 smu_v13_0_6_cap_set(smu, SMU_CAP(OTHER_END_METRICS)); 312 if (fw_ver >= 0x05550B00) 313 smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS)); 314 if (fw_ver >= 0x5551200) 315 smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); 316 if (fw_ver >= 0x5551800) 317 smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET)); 318 if (fw_ver >= 0x5551600) { 319 smu_v13_0_6_cap_set(smu, SMU_CAP(STATIC_METRICS)); 320 smu_v13_0_6_cap_set(smu, SMU_CAP(BOARD_VOLTAGE)); 321 smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); 322 } 323 } 324 325 static void smu_v13_0_12_init_caps(struct smu_context *smu) 326 { 327 enum smu_v13_0_6_caps default_cap_list[] = { SMU_CAP(DPM), 328 SMU_CAP(PCIE_METRICS), 329 SMU_CAP(CTF_LIMIT), 330 SMU_CAP(MCA_DEBUG_MODE), 331 SMU_CAP(RMA_MSG), 332 SMU_CAP(ACA_SYND), 333 SMU_CAP(OTHER_END_METRICS), 334 SMU_CAP(PER_INST_METRICS) }; 335 uint32_t fw_ver = smu->smc_fw_version; 336 337 for (int i = 0; i < ARRAY_SIZE(default_cap_list); i++) 338 smu_v13_0_6_cap_set(smu, default_cap_list[i]); 339 340 if (fw_ver < 0x00561900) 341 smu_v13_0_6_cap_clear(smu, SMU_CAP(DPM)); 342 343 if (fw_ver >= 0x00561700) 344 smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); 345 346 if (fw_ver >= 0x00561E00) 347 smu_v13_0_6_cap_set(smu, SMU_CAP(STATIC_METRICS)); 348 349 if (fw_ver >= 0x00562500) 350 smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); 351 352 if (fw_ver >= 0x04560100) { 353 smu_v13_0_6_cap_set(smu, SMU_CAP(BOARD_VOLTAGE)); 354 smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); 355 } 356 357 if (fw_ver > 0x04560900) 358 smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET)); 359 360 if (fw_ver >= 0x04560D00) { 361 smu_v13_0_6_cap_set(smu, SMU_CAP(FAST_PPT)); 362 if (smu->adev->gmc.xgmi.physical_node_id == 0) 363 smu_v13_0_6_cap_set(smu, SMU_CAP(SYSTEM_POWER_METRICS)); 364 } 365 366 if (fw_ver >= 0x04560700) { 367 if (fw_ver >= 0x04560900) { 368 smu_v13_0_6_cap_set(smu, SMU_CAP(TEMP_METRICS)); 369 if (smu->adev->gmc.xgmi.physical_node_id == 0) 370 smu_v13_0_6_cap_set(smu, SMU_CAP(NPM_METRICS)); 371 } else if (!amdgpu_sriov_vf(smu->adev)) 372 smu_v13_0_6_cap_set(smu, SMU_CAP(TEMP_METRICS)); 373 } else { 374 smu_v13_0_12_tables_fini(smu); 375 } 376 377 if (fw_ver >= 0x04561000) 378 smu_v13_0_6_cap_set(smu, SMU_CAP(TEMP_AID_XCD_HBM)); 379 } 380 381 static void smu_v13_0_6_init_caps(struct smu_context *smu) 382 { 383 enum smu_v13_0_6_caps default_cap_list[] = { SMU_CAP(DPM), 384 SMU_CAP(SET_UCLK_MAX), 385 SMU_CAP(DPM_POLICY), 386 SMU_CAP(PCIE_METRICS), 387 SMU_CAP(CTF_LIMIT), 388 SMU_CAP(MCA_DEBUG_MODE), 389 SMU_CAP(RMA_MSG), 390 SMU_CAP(ACA_SYND) }; 391 struct amdgpu_device *adev = smu->adev; 392 uint32_t fw_ver = smu->smc_fw_version; 393 uint32_t pgm = (fw_ver >> 24) & 0xFF; 394 395 for (int i = 0; i < ARRAY_SIZE(default_cap_list); i++) 396 smu_v13_0_6_cap_set(smu, default_cap_list[i]); 397 398 if (fw_ver < 0x552F00) 399 smu_v13_0_6_cap_clear(smu, SMU_CAP(DPM)); 400 if (fw_ver < 0x554500) 401 smu_v13_0_6_cap_clear(smu, SMU_CAP(CTF_LIMIT)); 402 403 if (adev->flags & AMD_IS_APU) { 404 smu_v13_0_6_cap_clear(smu, SMU_CAP(PCIE_METRICS)); 405 smu_v13_0_6_cap_clear(smu, SMU_CAP(DPM_POLICY)); 406 smu_v13_0_6_cap_clear(smu, SMU_CAP(RMA_MSG)); 407 smu_v13_0_6_cap_clear(smu, SMU_CAP(ACA_SYND)); 408 409 if (fw_ver >= 0x04556A00) 410 smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS)); 411 } else { 412 if (fw_ver >= 0x557600) 413 smu_v13_0_6_cap_set(smu, SMU_CAP(OTHER_END_METRICS)); 414 if (fw_ver < 0x00556000) 415 smu_v13_0_6_cap_clear(smu, SMU_CAP(DPM_POLICY)); 416 if (amdgpu_sriov_vf(adev) && (fw_ver < 0x556600)) 417 smu_v13_0_6_cap_clear(smu, SMU_CAP(SET_UCLK_MAX)); 418 if (fw_ver < 0x556300) 419 smu_v13_0_6_cap_clear(smu, SMU_CAP(PCIE_METRICS)); 420 if (fw_ver < 0x554800) 421 smu_v13_0_6_cap_clear(smu, SMU_CAP(MCA_DEBUG_MODE)); 422 if (fw_ver >= 0x556F00) 423 smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS)); 424 if (fw_ver < 0x00555a00) 425 smu_v13_0_6_cap_clear(smu, SMU_CAP(RMA_MSG)); 426 if (fw_ver < 0x00555600) 427 smu_v13_0_6_cap_clear(smu, SMU_CAP(ACA_SYND)); 428 if ((pgm == 7 && fw_ver >= 0x7550E00) || 429 (pgm == 0 && fw_ver >= 0x00557E00)) 430 smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); 431 432 if (amdgpu_sriov_vf(adev)) { 433 if (fw_ver >= 0x00558200) 434 amdgpu_virt_attr_set(&adev->virt.virt_caps, 435 AMDGPU_VIRT_CAP_POWER_LIMIT, 436 AMDGPU_CAP_ATTR_RW); 437 if ((pgm == 0 && fw_ver >= 0x00558000) || 438 (pgm == 7 && fw_ver >= 0x7551000)) { 439 smu_v13_0_6_cap_set(smu, 440 SMU_CAP(STATIC_METRICS)); 441 smu_v13_0_6_cap_set(smu, 442 SMU_CAP(BOARD_VOLTAGE)); 443 smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); 444 } 445 } else { 446 if ((pgm == 0 && fw_ver >= 0x00557F01) || 447 (pgm == 7 && fw_ver >= 0x7551000)) { 448 smu_v13_0_6_cap_set(smu, 449 SMU_CAP(STATIC_METRICS)); 450 smu_v13_0_6_cap_set(smu, 451 SMU_CAP(BOARD_VOLTAGE)); 452 } 453 if ((pgm == 0 && fw_ver >= 0x00558000) || 454 (pgm == 7 && fw_ver >= 0x7551000)) 455 smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); 456 } 457 } 458 if (((pgm == 7) && (fw_ver >= 0x7550700)) || 459 ((pgm == 0) && (fw_ver >= 0x00557900)) || 460 ((pgm == 4) && (fw_ver >= 0x4557000))) 461 smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); 462 463 if ((pgm == 0 && fw_ver >= 0x00558200) || 464 (pgm == 4 && fw_ver >= 0x04557100) || 465 (pgm == 7 && fw_ver >= 0x07551400)) 466 smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET)); 467 } 468 469 static void smu_v13_0_x_init_caps(struct smu_context *smu) 470 { 471 switch (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)) { 472 case IP_VERSION(13, 0, 12): 473 return smu_v13_0_12_init_caps(smu); 474 case IP_VERSION(13, 0, 14): 475 return smu_v13_0_14_init_caps(smu); 476 default: 477 return smu_v13_0_6_init_caps(smu); 478 } 479 } 480 481 static int smu_v13_0_6_check_fw_version(struct smu_context *smu) 482 { 483 int r; 484 485 r = smu_cmn_check_fw_version(smu); 486 /* Initialize caps flags once fw version is fetched */ 487 if (!r) 488 smu_v13_0_x_init_caps(smu); 489 490 return r; 491 } 492 493 static int smu_v13_0_6_init_microcode(struct smu_context *smu) 494 { 495 const struct smc_firmware_header_v2_1 *v2_1; 496 const struct common_firmware_header *hdr; 497 struct amdgpu_firmware_info *ucode = NULL; 498 struct smc_soft_pptable_entry *entries; 499 struct amdgpu_device *adev = smu->adev; 500 uint32_t p2s_table_id = P2S_TABLE_ID_A; 501 int ret = 0, i, p2stable_count; 502 int var = (adev->pdev->device & 0xF); 503 char ucode_prefix[15]; 504 505 /* No need to load P2S tables in IOV mode or for smu v13.0.12 */ 506 if (amdgpu_sriov_vf(adev) || 507 (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12))) 508 return 0; 509 510 if (!(adev->flags & AMD_IS_APU)) { 511 p2s_table_id = P2S_TABLE_ID_X; 512 if (var == 0x5) 513 p2s_table_id = P2S_TABLE_ID_3; 514 } 515 516 amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, 517 sizeof(ucode_prefix)); 518 ret = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, 519 "amdgpu/%s.bin", ucode_prefix); 520 if (ret) 521 goto out; 522 523 hdr = (const struct common_firmware_header *)adev->pm.fw->data; 524 amdgpu_ucode_print_smc_hdr(hdr); 525 526 /* SMU v13.0.6 binary file doesn't carry pptables, instead the entries 527 * are used to carry p2s tables. 528 */ 529 v2_1 = (const struct smc_firmware_header_v2_1 *)adev->pm.fw->data; 530 entries = (struct smc_soft_pptable_entry 531 *)((uint8_t *)v2_1 + 532 le32_to_cpu(v2_1->pptable_entry_offset)); 533 p2stable_count = le32_to_cpu(v2_1->pptable_count); 534 for (i = 0; i < p2stable_count; i++) { 535 if (le32_to_cpu(entries[i].id) == p2s_table_id) { 536 smu->pptable_firmware.data = 537 ((uint8_t *)v2_1 + 538 le32_to_cpu(entries[i].ppt_offset_bytes)); 539 smu->pptable_firmware.size = 540 le32_to_cpu(entries[i].ppt_size_bytes); 541 break; 542 } 543 } 544 545 if (smu->pptable_firmware.data && smu->pptable_firmware.size) { 546 ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_P2S_TABLE]; 547 ucode->ucode_id = AMDGPU_UCODE_ID_P2S_TABLE; 548 ucode->fw = &smu->pptable_firmware; 549 adev->firmware.fw_size += ALIGN(ucode->fw->size, PAGE_SIZE); 550 } 551 552 return 0; 553 out: 554 amdgpu_ucode_release(&adev->pm.fw); 555 556 return ret; 557 } 558 559 static int smu_v13_0_6_tables_init(struct smu_context *smu) 560 { 561 struct smu_table_context *smu_table = &smu->smu_table; 562 struct smu_table *tables = smu_table->tables; 563 struct smu_v13_0_6_gpu_metrics *gpu_metrics; 564 void *driver_pptable __free(kfree) = NULL; 565 void *metrics_table __free(kfree) = NULL; 566 struct amdgpu_device *adev = smu->adev; 567 int gpu_metrcs_size = METRICS_TABLE_SIZE; 568 int ret; 569 570 if (!(adev->flags & AMD_IS_APU)) 571 SMU_TABLE_INIT(tables, SMU_TABLE_PMSTATUSLOG, SMU13_TOOL_SIZE, 572 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); 573 574 SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, 575 max(gpu_metrcs_size, 576 smu_v13_0_12_get_max_metrics_size()), 577 PAGE_SIZE, 578 AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); 579 580 SMU_TABLE_INIT(tables, SMU_TABLE_I2C_COMMANDS, sizeof(SwI2cRequest_t), 581 PAGE_SIZE, 582 AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); 583 584 SMU_TABLE_INIT(tables, SMU_TABLE_PMFW_SYSTEM_METRICS, 585 smu_v13_0_12_get_system_metrics_size(), PAGE_SIZE, 586 AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); 587 588 metrics_table = kzalloc(METRICS_TABLE_SIZE, GFP_KERNEL); 589 if (!metrics_table) 590 return -ENOMEM; 591 smu_table->metrics_time = 0; 592 593 driver_pptable = kzalloc_obj(struct PPTable_t); 594 if (!driver_pptable) 595 return -ENOMEM; 596 597 ret = smu_driver_table_init(smu, SMU_DRIVER_TABLE_GPU_METRICS, 598 sizeof(struct smu_v13_0_6_gpu_metrics), 599 SMU_GPU_METRICS_CACHE_INTERVAL); 600 if (ret) 601 return ret; 602 603 gpu_metrics = (struct smu_v13_0_6_gpu_metrics *)smu_driver_table_ptr( 604 smu, SMU_DRIVER_TABLE_GPU_METRICS); 605 606 smu_v13_0_6_gpu_metrics_init(gpu_metrics, 1, 9); 607 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == 608 IP_VERSION(13, 0, 12)) { 609 ret = smu_v13_0_12_tables_init(smu); 610 if (ret) { 611 smu_driver_table_fini(smu, 612 SMU_DRIVER_TABLE_GPU_METRICS); 613 return ret; 614 } 615 } 616 617 smu_table->metrics_table = no_free_ptr(metrics_table); 618 smu_table->driver_pptable = no_free_ptr(driver_pptable); 619 620 return 0; 621 } 622 623 static int smu_v13_0_6_select_policy_soc_pstate(struct smu_context *smu, 624 int policy) 625 { 626 struct amdgpu_device *adev = smu->adev; 627 int ret, param; 628 629 switch (policy) { 630 case SOC_PSTATE_DEFAULT: 631 param = 0; 632 break; 633 case SOC_PSTATE_0: 634 param = 1; 635 break; 636 case SOC_PSTATE_1: 637 param = 2; 638 break; 639 case SOC_PSTATE_2: 640 param = 3; 641 break; 642 default: 643 return -EINVAL; 644 } 645 646 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetThrottlingPolicy, 647 param, NULL); 648 649 if (ret) 650 dev_err(adev->dev, "select soc pstate policy %d failed", 651 policy); 652 653 return ret; 654 } 655 656 static int smu_v13_0_6_select_plpd_policy(struct smu_context *smu, int level) 657 { 658 struct amdgpu_device *adev = smu->adev; 659 int ret, param; 660 661 switch (level) { 662 case XGMI_PLPD_DEFAULT: 663 param = PPSMC_PLPD_MODE_DEFAULT; 664 break; 665 case XGMI_PLPD_OPTIMIZED: 666 param = PPSMC_PLPD_MODE_OPTIMIZED; 667 break; 668 case XGMI_PLPD_DISALLOW: 669 param = 0; 670 break; 671 default: 672 return -EINVAL; 673 } 674 675 if (level == XGMI_PLPD_DISALLOW) 676 ret = smu_cmn_send_smc_msg_with_param( 677 smu, SMU_MSG_GmiPwrDnControl, param, NULL); 678 else 679 /* change xgmi per-link power down policy */ 680 ret = smu_cmn_send_smc_msg_with_param( 681 smu, SMU_MSG_SelectPLPDMode, param, NULL); 682 683 if (ret) 684 dev_err(adev->dev, 685 "select xgmi per-link power down policy %d failed\n", 686 level); 687 688 return ret; 689 } 690 691 static int smu_v13_0_6_allocate_dpm_context(struct smu_context *smu) 692 { 693 struct smu_dpm_context *smu_dpm = &smu->smu_dpm; 694 struct smu_dpm_policy *policy; 695 696 smu_dpm->dpm_context = 697 kzalloc_obj(struct smu_13_0_dpm_context); 698 if (!smu_dpm->dpm_context) 699 return -ENOMEM; 700 smu_dpm->dpm_context_size = sizeof(struct smu_13_0_dpm_context); 701 702 smu_dpm->dpm_policies = 703 kzalloc_obj(struct smu_dpm_policy_ctxt); 704 if (!smu_dpm->dpm_policies) { 705 kfree(smu_dpm->dpm_context); 706 return -ENOMEM; 707 } 708 709 if (!(smu->adev->flags & AMD_IS_APU)) { 710 policy = &(smu_dpm->dpm_policies->policies[0]); 711 712 policy->policy_type = PP_PM_POLICY_SOC_PSTATE; 713 policy->level_mask = BIT(SOC_PSTATE_DEFAULT) | 714 BIT(SOC_PSTATE_0) | BIT(SOC_PSTATE_1) | 715 BIT(SOC_PSTATE_2); 716 policy->current_level = SOC_PSTATE_DEFAULT; 717 policy->set_policy = smu_v13_0_6_select_policy_soc_pstate; 718 smu_cmn_generic_soc_policy_desc(policy); 719 smu_dpm->dpm_policies->policy_mask |= 720 BIT(PP_PM_POLICY_SOC_PSTATE); 721 } 722 policy = &(smu_dpm->dpm_policies->policies[1]); 723 724 policy->policy_type = PP_PM_POLICY_XGMI_PLPD; 725 policy->level_mask = BIT(XGMI_PLPD_DISALLOW) | BIT(XGMI_PLPD_DEFAULT) | 726 BIT(XGMI_PLPD_OPTIMIZED); 727 policy->current_level = XGMI_PLPD_DEFAULT; 728 policy->set_policy = smu_v13_0_6_select_plpd_policy; 729 smu_cmn_generic_plpd_policy_desc(policy); 730 smu_dpm->dpm_policies->policy_mask |= BIT(PP_PM_POLICY_XGMI_PLPD); 731 732 return 0; 733 } 734 735 static int smu_v13_0_6_init_smc_tables(struct smu_context *smu) 736 { 737 int ret = 0; 738 739 ret = smu_v13_0_6_tables_init(smu); 740 if (ret) 741 return ret; 742 743 ret = smu_v13_0_6_allocate_dpm_context(smu); 744 745 return ret; 746 } 747 748 static int smu_v13_0_6_fini_smc_tables(struct smu_context *smu) 749 { 750 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12)) 751 smu_v13_0_12_tables_fini(smu); 752 return smu_v13_0_fini_smc_tables(smu); 753 } 754 755 static int smu_v13_0_6_init_allowed_features(struct smu_context *smu) 756 { 757 smu_feature_list_set_all(smu, SMU_FEATURE_LIST_ALLOWED); 758 759 return 0; 760 } 761 762 int smu_v13_0_6_get_metrics_table(struct smu_context *smu, void *metrics_table, 763 bool bypass_cache) 764 { 765 struct smu_table_context *smu_table = &smu->smu_table; 766 uint32_t table_size = smu_table->tables[SMU_TABLE_SMU_METRICS].size; 767 struct smu_table *table = &smu_table->driver_table; 768 int ret; 769 770 if (bypass_cache || !smu_table->metrics_time || 771 time_after(jiffies, 772 smu_table->metrics_time + msecs_to_jiffies(1))) { 773 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetMetricsTable, NULL); 774 if (ret) { 775 dev_info(smu->adev->dev, 776 "Failed to export SMU metrics table!\n"); 777 return ret; 778 } 779 780 amdgpu_hdp_invalidate(smu->adev, NULL); 781 ret = smu_cmn_vram_cpy(smu, smu_table->metrics_table, 782 table->cpu_addr, table_size); 783 if (ret) 784 return ret; 785 786 if (!memchr_inv(smu_table->metrics_table, 0xff, 787 min(16, table_size))) 788 return -EHWPOISON; 789 790 smu_table->metrics_time = jiffies; 791 } 792 793 if (metrics_table) 794 memcpy(metrics_table, smu_table->metrics_table, table_size); 795 796 return 0; 797 } 798 799 static ssize_t smu_v13_0_6_get_pm_metrics(struct smu_context *smu, 800 void *metrics, size_t max_size) 801 { 802 struct smu_table_context *smu_tbl_ctxt = &smu->smu_table; 803 uint32_t table_version = smu_tbl_ctxt->tables[SMU_TABLE_SMU_METRICS].version; 804 uint32_t table_size = smu_tbl_ctxt->tables[SMU_TABLE_SMU_METRICS].size; 805 struct amdgpu_pm_metrics *pm_metrics = metrics; 806 uint32_t pmfw_version; 807 int ret; 808 809 if (!pm_metrics || !max_size) 810 return -EINVAL; 811 812 if (max_size < (table_size + sizeof(pm_metrics->common_header))) 813 return -EOVERFLOW; 814 815 /* Don't use cached metrics data */ 816 ret = smu_v13_0_6_get_metrics_table(smu, pm_metrics->data, true); 817 if (ret) 818 return ret; 819 820 smu_cmn_get_smc_version(smu, NULL, &pmfw_version); 821 822 memset(&pm_metrics->common_header, 0, 823 sizeof(pm_metrics->common_header)); 824 pm_metrics->common_header.mp1_ip_discovery_version = 825 amdgpu_ip_version(smu->adev, MP1_HWIP, 0); 826 pm_metrics->common_header.pmfw_version = pmfw_version; 827 pm_metrics->common_header.pmmetrics_version = table_version; 828 pm_metrics->common_header.structure_size = 829 sizeof(pm_metrics->common_header) + table_size; 830 831 return pm_metrics->common_header.structure_size; 832 } 833 834 static void smu_v13_0_6_fill_static_metrics_table(struct smu_context *smu, 835 StaticMetricsTable_t *static_metrics) 836 { 837 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 838 839 if (!static_metrics->InputTelemetryVoltageInmV) { 840 dev_warn(smu->adev->dev, "Invalid board voltage %d\n", 841 static_metrics->InputTelemetryVoltageInmV); 842 } 843 844 dpm_context->board_volt = static_metrics->InputTelemetryVoltageInmV; 845 846 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(PLDM_VERSION)) && 847 static_metrics->pldmVersion[0] != 0xFFFFFFFF) 848 smu->adev->firmware.pldm_version = 849 static_metrics->pldmVersion[0]; 850 } 851 852 int smu_v13_0_6_get_static_metrics_table(struct smu_context *smu) 853 { 854 struct smu_table_context *smu_table = &smu->smu_table; 855 uint32_t table_size = smu_table->tables[SMU_TABLE_SMU_METRICS].size; 856 struct smu_table *table = &smu_table->driver_table; 857 int ret; 858 859 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetStaticMetricsTable, NULL); 860 if (ret) { 861 dev_info(smu->adev->dev, 862 "Failed to export static metrics table!\n"); 863 return ret; 864 } 865 866 amdgpu_hdp_invalidate(smu->adev, NULL); 867 868 return smu_cmn_vram_cpy(smu, smu_table->metrics_table, 869 table->cpu_addr, table_size); 870 } 871 872 static void smu_v13_0_6_update_caps(struct smu_context *smu) 873 { 874 struct smu_table_context *smu_table = &smu->smu_table; 875 struct PPTable_t *pptable = 876 (struct PPTable_t *)smu_table->driver_pptable; 877 878 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(FAST_PPT)) && 879 !pptable->PPT1Max) 880 smu_v13_0_6_cap_clear(smu, SMU_CAP(FAST_PPT)); 881 } 882 883 static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) 884 { 885 struct smu_table_context *smu_table = &smu->smu_table; 886 StaticMetricsTable_t *static_metrics = (StaticMetricsTable_t *)smu_table->metrics_table; 887 MetricsTableV0_t *metrics_v0 = (MetricsTableV0_t *)smu_table->metrics_table; 888 MetricsTableV1_t *metrics_v1 = (MetricsTableV1_t *)smu_table->metrics_table; 889 MetricsTableV2_t *metrics_v2 = (MetricsTableV2_t *)smu_table->metrics_table; 890 struct PPTable_t *pptable = 891 (struct PPTable_t *)smu_table->driver_pptable; 892 int version = smu_v13_0_6_get_metrics_version(smu); 893 int ret, i, retry = 100, n; 894 uint32_t table_version; 895 uint16_t max_speed; 896 uint8_t max_width; 897 898 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) && 899 smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) { 900 ret = smu_v13_0_12_setup_driver_pptable(smu); 901 if (ret) 902 return ret; 903 goto out; 904 } 905 906 /* Store one-time values in driver PPTable */ 907 if (!pptable->Init) { 908 while (--retry) { 909 ret = smu_v13_0_6_get_metrics_table(smu, NULL, true); 910 if (ret) 911 return ret; 912 913 /* Ensure that metrics have been updated */ 914 if (GET_METRIC_FIELD(AccumulationCounter, version)) 915 break; 916 917 usleep_range(1000, 1100); 918 } 919 920 if (!retry) 921 return -ETIME; 922 923 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetMetricsVersion, 924 &table_version); 925 if (ret) 926 return ret; 927 smu_table->tables[SMU_TABLE_SMU_METRICS].version = 928 table_version; 929 930 pptable->MaxSocketPowerLimit = 931 SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketPowerLimit, version)); 932 pptable->MaxGfxclkFrequency = 933 SMUQ10_ROUND(GET_METRIC_FIELD(MaxGfxclkFrequency, version)); 934 pptable->MinGfxclkFrequency = 935 SMUQ10_ROUND(GET_METRIC_FIELD(MinGfxclkFrequency, version)); 936 max_width = (uint8_t)GET_METRIC_FIELD(XgmiWidth, version); 937 max_speed = (uint16_t)GET_METRIC_FIELD(XgmiBitrate, version); 938 amgpu_xgmi_set_max_speed_width(smu->adev, max_speed, max_width); 939 940 for (i = 0; i < 4; ++i) { 941 pptable->FclkFrequencyTable[i] = 942 SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequencyTable, version)[i]); 943 pptable->UclkFrequencyTable[i] = 944 SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequencyTable, version)[i]); 945 pptable->SocclkFrequencyTable[i] = SMUQ10_ROUND( 946 GET_METRIC_FIELD(SocclkFrequencyTable, version)[i]); 947 pptable->VclkFrequencyTable[i] = 948 SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequencyTable, version)[i]); 949 pptable->DclkFrequencyTable[i] = 950 SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequencyTable, version)[i]); 951 pptable->LclkFrequencyTable[i] = 952 SMUQ10_ROUND(GET_METRIC_FIELD(LclkFrequencyTable, version)[i]); 953 } 954 955 /* use AID0 serial number by default */ 956 pptable->PublicSerialNumber_AID = 957 GET_METRIC_FIELD(PublicSerialNumber_AID, version)[0]; 958 959 amdgpu_device_set_uid(smu->adev->uid_info, AMDGPU_UID_TYPE_SOC, 960 0, pptable->PublicSerialNumber_AID); 961 n = ARRAY_SIZE(metrics_v0->PublicSerialNumber_AID); 962 for (i = 0; i < n; i++) { 963 amdgpu_device_set_uid( 964 smu->adev->uid_info, AMDGPU_UID_TYPE_AID, i, 965 GET_METRIC_FIELD(PublicSerialNumber_AID, 966 version)[i]); 967 } 968 n = ARRAY_SIZE(metrics_v0->PublicSerialNumber_XCD); 969 for (i = 0; i < n; i++) { 970 amdgpu_device_set_uid( 971 smu->adev->uid_info, AMDGPU_UID_TYPE_XCD, i, 972 GET_METRIC_FIELD(PublicSerialNumber_XCD, 973 version)[i]); 974 } 975 976 pptable->Init = true; 977 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) { 978 ret = smu_v13_0_6_get_static_metrics_table(smu); 979 if (ret) 980 return ret; 981 smu_v13_0_6_fill_static_metrics_table(smu, static_metrics); 982 } 983 } 984 out: 985 smu_v13_0_6_update_caps(smu); 986 return 0; 987 } 988 989 static int smu_v13_0_6_get_dpm_ultimate_freq(struct smu_context *smu, 990 enum smu_clk_type clk_type, 991 uint32_t *min, uint32_t *max) 992 { 993 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 994 struct smu_table_context *smu_table = &smu->smu_table; 995 struct PPTable_t *pptable = 996 (struct PPTable_t *)smu_table->driver_pptable; 997 struct smu_dpm_table *dpm_table; 998 uint32_t min_clk, max_clk, param; 999 int ret = 0, clk_id = 0; 1000 1001 /* Use dpm tables, if data is already fetched */ 1002 if (pptable->Init) { 1003 switch (clk_type) { 1004 case SMU_MCLK: 1005 case SMU_UCLK: 1006 dpm_table = &dpm_context->dpm_tables.uclk_table; 1007 break; 1008 case SMU_GFXCLK: 1009 case SMU_SCLK: 1010 dpm_table = &dpm_context->dpm_tables.gfx_table; 1011 break; 1012 case SMU_SOCCLK: 1013 dpm_table = &dpm_context->dpm_tables.soc_table; 1014 break; 1015 case SMU_FCLK: 1016 dpm_table = &dpm_context->dpm_tables.fclk_table; 1017 break; 1018 case SMU_VCLK: 1019 dpm_table = &dpm_context->dpm_tables.vclk_table; 1020 break; 1021 case SMU_DCLK: 1022 dpm_table = &dpm_context->dpm_tables.dclk_table; 1023 break; 1024 default: 1025 return -EINVAL; 1026 } 1027 1028 min_clk = SMU_DPM_TABLE_MIN(dpm_table); 1029 max_clk = SMU_DPM_TABLE_MAX(dpm_table); 1030 if (min) 1031 *min = min_clk; 1032 if (max) 1033 *max = max_clk; 1034 1035 if (min_clk && max_clk) 1036 return 0; 1037 } 1038 1039 if (!(clk_type == SMU_GFXCLK || clk_type == SMU_SCLK)) { 1040 clk_id = smu_cmn_to_asic_specific_index( 1041 smu, CMN2ASIC_MAPPING_CLK, clk_type); 1042 if (clk_id < 0) { 1043 ret = -EINVAL; 1044 goto failed; 1045 } 1046 param = (clk_id & 0xffff) << 16; 1047 } 1048 1049 if (max) { 1050 if (clk_type == SMU_GFXCLK || clk_type == SMU_SCLK) 1051 ret = smu_cmn_send_smc_msg( 1052 smu, SMU_MSG_GetMaxGfxclkFrequency, max); 1053 else 1054 ret = smu_cmn_send_smc_msg_with_param( 1055 smu, SMU_MSG_GetMaxDpmFreq, param, max); 1056 if (ret) 1057 goto failed; 1058 } 1059 1060 if (min) { 1061 if (clk_type == SMU_GFXCLK || clk_type == SMU_SCLK) 1062 ret = smu_cmn_send_smc_msg( 1063 smu, SMU_MSG_GetMinGfxclkFrequency, min); 1064 else 1065 ret = smu_cmn_send_smc_msg_with_param( 1066 smu, SMU_MSG_GetMinDpmFreq, param, min); 1067 } 1068 1069 failed: 1070 return ret; 1071 } 1072 1073 static int smu_v13_0_6_get_dpm_level_count(struct smu_context *smu, 1074 enum smu_clk_type clk_type, 1075 uint32_t *levels) 1076 { 1077 int ret; 1078 1079 ret = smu_v13_0_get_dpm_freq_by_index(smu, clk_type, 0xff, levels); 1080 if (!ret) 1081 ++(*levels); 1082 1083 return ret; 1084 } 1085 1086 static void smu_v13_0_6_pm_policy_init(struct smu_context *smu) 1087 { 1088 struct smu_dpm_policy *policy; 1089 1090 policy = smu_get_pm_policy(smu, PP_PM_POLICY_SOC_PSTATE); 1091 if (policy) 1092 policy->current_level = SOC_PSTATE_DEFAULT; 1093 } 1094 1095 static int smu_v13_0_6_set_default_dpm_table(struct smu_context *smu) 1096 { 1097 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 1098 struct smu_table_context *smu_table = &smu->smu_table; 1099 struct smu_dpm_table *dpm_table = NULL; 1100 struct PPTable_t *pptable = 1101 (struct PPTable_t *)smu_table->driver_pptable; 1102 uint32_t gfxclkmin, gfxclkmax, levels; 1103 int ret = 0, i, j; 1104 struct smu_v13_0_6_dpm_map dpm_map[] = { 1105 { SMU_SOCCLK, SMU_FEATURE_DPM_SOCCLK_BIT, 1106 &dpm_context->dpm_tables.soc_table, 1107 pptable->SocclkFrequencyTable }, 1108 { SMU_UCLK, SMU_FEATURE_DPM_UCLK_BIT, 1109 &dpm_context->dpm_tables.uclk_table, 1110 pptable->UclkFrequencyTable }, 1111 { SMU_FCLK, SMU_FEATURE_DPM_FCLK_BIT, 1112 &dpm_context->dpm_tables.fclk_table, 1113 pptable->FclkFrequencyTable }, 1114 { SMU_VCLK, SMU_FEATURE_DPM_VCLK_BIT, 1115 &dpm_context->dpm_tables.vclk_table, 1116 pptable->VclkFrequencyTable }, 1117 { SMU_DCLK, SMU_FEATURE_DPM_DCLK_BIT, 1118 &dpm_context->dpm_tables.dclk_table, 1119 pptable->DclkFrequencyTable }, 1120 }; 1121 1122 smu_v13_0_6_setup_driver_pptable(smu); 1123 1124 /* DPM policy not supported in older firmwares */ 1125 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(DPM_POLICY))) { 1126 struct smu_dpm_context *smu_dpm = &smu->smu_dpm; 1127 1128 smu_dpm->dpm_policies->policy_mask &= 1129 ~BIT(PP_PM_POLICY_SOC_PSTATE); 1130 } 1131 1132 smu_v13_0_6_pm_policy_init(smu); 1133 /* gfxclk dpm table setup */ 1134 dpm_table = &dpm_context->dpm_tables.gfx_table; 1135 dpm_table->clk_type = SMU_GFXCLK; 1136 dpm_table->flags = SMU_DPM_TABLE_FINE_GRAINED; 1137 if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT)) { 1138 /* In the case of gfxclk, only fine-grained dpm is honored. 1139 * Get min/max values from FW. 1140 */ 1141 ret = smu_v13_0_6_get_dpm_ultimate_freq(smu, SMU_GFXCLK, 1142 &gfxclkmin, &gfxclkmax); 1143 if (ret) 1144 return ret; 1145 dpm_table->count = 2; 1146 dpm_table->dpm_levels[0].value = gfxclkmin; 1147 dpm_table->dpm_levels[0].enabled = true; 1148 dpm_table->dpm_levels[1].value = gfxclkmax; 1149 dpm_table->dpm_levels[1].enabled = true; 1150 } else { 1151 dpm_table->count = 1; 1152 dpm_table->dpm_levels[0].value = pptable->MinGfxclkFrequency; 1153 dpm_table->dpm_levels[0].enabled = true; 1154 } 1155 1156 for (j = 0; j < ARRAY_SIZE(dpm_map); j++) { 1157 dpm_table = dpm_map[j].dpm_table; 1158 levels = 1; 1159 if (smu_cmn_feature_is_enabled(smu, dpm_map[j].feature_num)) { 1160 ret = smu_v13_0_6_get_dpm_level_count( 1161 smu, dpm_map[j].clk_type, &levels); 1162 if (ret) 1163 return ret; 1164 } 1165 dpm_table->count = levels; 1166 dpm_table->clk_type = dpm_map[j].clk_type; 1167 for (i = 0; i < dpm_table->count; ++i) { 1168 dpm_table->dpm_levels[i].value = 1169 dpm_map[j].freq_table[i]; 1170 dpm_table->dpm_levels[i].enabled = true; 1171 } 1172 } 1173 1174 return 0; 1175 } 1176 1177 static int smu_v13_0_6_setup_pptable(struct smu_context *smu) 1178 { 1179 struct smu_table_context *table_context = &smu->smu_table; 1180 1181 /* TODO: PPTable is not available. 1182 * 1) Find an alternate way to get 'PPTable values' here. 1183 * 2) Check if there is SW CTF 1184 */ 1185 table_context->thermal_controller_type = 0; 1186 1187 return 0; 1188 } 1189 1190 static int smu_v13_0_6_check_fw_status(struct smu_context *smu) 1191 { 1192 struct amdgpu_device *adev = smu->adev; 1193 uint32_t mp1_fw_flags; 1194 1195 mp1_fw_flags = 1196 RREG32_PCIE(MP1_Public | (smnMP1_FIRMWARE_FLAGS & 0xffffffff)); 1197 1198 if ((mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) >> 1199 MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED__SHIFT) 1200 return 0; 1201 1202 return -EIO; 1203 } 1204 1205 static int smu_v13_0_6_populate_umd_state_clk(struct smu_context *smu) 1206 { 1207 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 1208 struct smu_dpm_table *gfx_table = &dpm_context->dpm_tables.gfx_table; 1209 struct smu_dpm_table *mem_table = &dpm_context->dpm_tables.uclk_table; 1210 struct smu_dpm_table *soc_table = &dpm_context->dpm_tables.soc_table; 1211 struct smu_dpm_table *fclk_table = &dpm_context->dpm_tables.fclk_table; 1212 struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; 1213 1214 pstate_table->gfxclk_pstate.min = SMU_DPM_TABLE_MIN(gfx_table); 1215 pstate_table->gfxclk_pstate.peak = SMU_DPM_TABLE_MAX(gfx_table); 1216 pstate_table->gfxclk_pstate.curr.min = SMU_DPM_TABLE_MIN(gfx_table); 1217 pstate_table->gfxclk_pstate.curr.max = SMU_DPM_TABLE_MAX(gfx_table); 1218 1219 pstate_table->uclk_pstate.min = SMU_DPM_TABLE_MIN(mem_table); 1220 pstate_table->uclk_pstate.peak = SMU_DPM_TABLE_MAX(mem_table); 1221 pstate_table->uclk_pstate.curr.min = SMU_DPM_TABLE_MIN(mem_table); 1222 pstate_table->uclk_pstate.curr.max = SMU_DPM_TABLE_MAX(mem_table); 1223 1224 pstate_table->socclk_pstate.min = SMU_DPM_TABLE_MIN(soc_table); 1225 pstate_table->socclk_pstate.peak = SMU_DPM_TABLE_MAX(soc_table); 1226 pstate_table->socclk_pstate.curr.min = SMU_DPM_TABLE_MIN(soc_table); 1227 pstate_table->socclk_pstate.curr.max = SMU_DPM_TABLE_MAX(soc_table); 1228 1229 pstate_table->fclk_pstate.min = SMU_DPM_TABLE_MIN(fclk_table); 1230 pstate_table->fclk_pstate.peak = SMU_DPM_TABLE_MAX(fclk_table); 1231 pstate_table->fclk_pstate.curr.min = SMU_DPM_TABLE_MIN(fclk_table); 1232 pstate_table->fclk_pstate.curr.max = SMU_DPM_TABLE_MAX(fclk_table); 1233 pstate_table->fclk_pstate.standard = SMU_DPM_TABLE_MIN(fclk_table); 1234 1235 if (gfx_table->count > SMU_13_0_6_UMD_PSTATE_GFXCLK_LEVEL && 1236 mem_table->count > SMU_13_0_6_UMD_PSTATE_MCLK_LEVEL && 1237 soc_table->count > SMU_13_0_6_UMD_PSTATE_SOCCLK_LEVEL) { 1238 pstate_table->gfxclk_pstate.standard = 1239 gfx_table->dpm_levels[SMU_13_0_6_UMD_PSTATE_GFXCLK_LEVEL].value; 1240 pstate_table->uclk_pstate.standard = 1241 mem_table->dpm_levels[SMU_13_0_6_UMD_PSTATE_MCLK_LEVEL].value; 1242 pstate_table->socclk_pstate.standard = 1243 soc_table->dpm_levels[SMU_13_0_6_UMD_PSTATE_SOCCLK_LEVEL].value; 1244 } else { 1245 pstate_table->gfxclk_pstate.standard = 1246 pstate_table->gfxclk_pstate.min; 1247 pstate_table->uclk_pstate.standard = 1248 pstate_table->uclk_pstate.min; 1249 pstate_table->socclk_pstate.standard = 1250 pstate_table->socclk_pstate.min; 1251 } 1252 1253 return 0; 1254 } 1255 1256 static uint32_t smu_v13_0_6_get_throttler_status(struct smu_context *smu) 1257 { 1258 struct smu_power_context *smu_power = &smu->smu_power; 1259 struct smu_13_0_power_context *power_context = smu_power->power_context; 1260 uint32_t throttler_status = 0; 1261 1262 throttler_status = atomic_read(&power_context->throttle_status); 1263 dev_dbg(smu->adev->dev, "SMU Throttler status: %u", throttler_status); 1264 1265 return throttler_status; 1266 } 1267 1268 static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, 1269 MetricsMember_t member, 1270 uint32_t *value) 1271 { 1272 struct smu_table_context *smu_table = &smu->smu_table; 1273 MetricsTableV0_t *metrics_v0 = (MetricsTableV0_t *)smu_table->metrics_table; 1274 MetricsTableV1_t *metrics_v1 = (MetricsTableV1_t *)smu_table->metrics_table; 1275 MetricsTableV2_t *metrics_v2 = (MetricsTableV2_t *)smu_table->metrics_table; 1276 int version = smu_v13_0_6_get_metrics_version(smu); 1277 struct amdgpu_device *adev = smu->adev; 1278 int ret = 0; 1279 int xcc_id; 1280 1281 ret = smu_v13_0_6_get_metrics_table(smu, NULL, false); 1282 if (ret) 1283 return ret; 1284 1285 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) && 1286 smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) 1287 return smu_v13_0_12_get_smu_metrics_data(smu, member, value); 1288 1289 /* For clocks with multiple instances, only report the first one */ 1290 switch (member) { 1291 case METRICS_CURR_GFXCLK: 1292 case METRICS_AVERAGE_GFXCLK: 1293 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(DPM))) { 1294 xcc_id = GET_INST(GC, 0); 1295 *value = SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency, version)[xcc_id]); 1296 } else { 1297 *value = 0; 1298 } 1299 break; 1300 case METRICS_CURR_SOCCLK: 1301 case METRICS_AVERAGE_SOCCLK: 1302 *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency, version)[0]); 1303 break; 1304 case METRICS_CURR_UCLK: 1305 case METRICS_AVERAGE_UCLK: 1306 *value = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency, version)); 1307 break; 1308 case METRICS_CURR_VCLK: 1309 *value = SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency, version)[0]); 1310 break; 1311 case METRICS_CURR_DCLK: 1312 *value = SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency, version)[0]); 1313 break; 1314 case METRICS_CURR_FCLK: 1315 *value = SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequency, version)); 1316 break; 1317 case METRICS_AVERAGE_GFXACTIVITY: 1318 *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy, version)); 1319 break; 1320 case METRICS_AVERAGE_MEMACTIVITY: 1321 *value = SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization, version)); 1322 break; 1323 case METRICS_CURR_SOCKETPOWER: 1324 *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower, version)) << 8; 1325 break; 1326 case METRICS_TEMPERATURE_HOTSPOT: 1327 *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature, version)) * 1328 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 1329 break; 1330 case METRICS_TEMPERATURE_MEM: 1331 *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature, version)) * 1332 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 1333 break; 1334 /* This is the max of all VRs and not just SOC VR. 1335 * No need to define another data type for the same. 1336 */ 1337 case METRICS_TEMPERATURE_VRSOC: 1338 *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature, version)) * 1339 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 1340 break; 1341 default: 1342 *value = UINT_MAX; 1343 break; 1344 } 1345 1346 return ret; 1347 } 1348 1349 static int smu_v13_0_6_get_current_clk_freq_by_table(struct smu_context *smu, 1350 enum smu_clk_type clk_type, 1351 uint32_t *value) 1352 { 1353 MetricsMember_t member_type; 1354 1355 if (!value) 1356 return -EINVAL; 1357 1358 switch (clk_type) { 1359 case SMU_GFXCLK: 1360 case SMU_SCLK: 1361 member_type = METRICS_CURR_GFXCLK; 1362 break; 1363 case SMU_UCLK: 1364 case SMU_MCLK: 1365 member_type = METRICS_CURR_UCLK; 1366 break; 1367 case SMU_SOCCLK: 1368 member_type = METRICS_CURR_SOCCLK; 1369 break; 1370 case SMU_VCLK: 1371 member_type = METRICS_CURR_VCLK; 1372 break; 1373 case SMU_DCLK: 1374 member_type = METRICS_CURR_DCLK; 1375 break; 1376 case SMU_FCLK: 1377 member_type = METRICS_CURR_FCLK; 1378 break; 1379 default: 1380 return -EINVAL; 1381 } 1382 1383 return smu_v13_0_6_get_smu_metrics_data(smu, member_type, value); 1384 } 1385 1386 static int smu_v13_0_6_emit_clk_levels(struct smu_context *smu, 1387 enum smu_clk_type type, char *buf, 1388 int *offset) 1389 { 1390 int now, size = *offset, start_offset = *offset; 1391 int ret = 0; 1392 struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; 1393 struct smu_dpm_table *single_dpm_table = NULL; 1394 struct smu_dpm_context *smu_dpm = &smu->smu_dpm; 1395 struct smu_13_0_dpm_context *dpm_context = NULL; 1396 1397 if (amdgpu_ras_intr_triggered()) { 1398 sysfs_emit_at(buf, size, "unavailable\n"); 1399 return -EBUSY; 1400 } 1401 1402 dpm_context = smu_dpm->dpm_context; 1403 1404 switch (type) { 1405 case SMU_OD_SCLK: 1406 size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK"); 1407 size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMhz\n", 1408 pstate_table->gfxclk_pstate.curr.min, 1409 pstate_table->gfxclk_pstate.curr.max); 1410 break; 1411 case SMU_OD_MCLK: 1412 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SET_UCLK_MAX))) 1413 return -EOPNOTSUPP; 1414 1415 size += sysfs_emit_at(buf, size, "%s:\n", "OD_MCLK"); 1416 size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMhz\n", 1417 pstate_table->uclk_pstate.curr.min, 1418 pstate_table->uclk_pstate.curr.max); 1419 break; 1420 case SMU_OD_FCLK: 1421 if (!smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_FCLK_BIT)) 1422 return -EOPNOTSUPP; 1423 1424 size += sysfs_emit_at(buf, size, "%s:\n", "OD_FCLK"); 1425 size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMhz\n", 1426 pstate_table->fclk_pstate.curr.min, 1427 pstate_table->fclk_pstate.curr.max); 1428 break; 1429 case SMU_SCLK: 1430 case SMU_GFXCLK: 1431 single_dpm_table = &(dpm_context->dpm_tables.gfx_table); 1432 break; 1433 case SMU_MCLK: 1434 case SMU_UCLK: 1435 single_dpm_table = &(dpm_context->dpm_tables.uclk_table); 1436 break; 1437 case SMU_SOCCLK: 1438 single_dpm_table = &(dpm_context->dpm_tables.soc_table); 1439 break; 1440 case SMU_FCLK: 1441 single_dpm_table = &(dpm_context->dpm_tables.fclk_table); 1442 break; 1443 case SMU_VCLK: 1444 single_dpm_table = &(dpm_context->dpm_tables.vclk_table); 1445 break; 1446 case SMU_DCLK: 1447 single_dpm_table = &(dpm_context->dpm_tables.dclk_table); 1448 break; 1449 default: 1450 break; 1451 } 1452 1453 if (single_dpm_table) { 1454 ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, type, 1455 &now); 1456 if (ret) { 1457 dev_err(smu->adev->dev, 1458 "Attempt to get current clk Failed!"); 1459 return ret; 1460 } 1461 return smu_cmn_print_dpm_clk_levels(smu, single_dpm_table, now, 1462 buf, offset); 1463 } 1464 1465 *offset += size - start_offset; 1466 1467 return 0; 1468 } 1469 1470 static int smu_v13_0_6_upload_dpm_level(struct smu_context *smu, bool max, 1471 uint32_t feature_mask, uint32_t level) 1472 { 1473 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 1474 uint32_t freq; 1475 int ret = 0; 1476 1477 if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT) && 1478 (feature_mask & FEATURE_MASK(FEATURE_DPM_GFXCLK))) { 1479 freq = dpm_context->dpm_tables.gfx_table.dpm_levels[level].value; 1480 ret = smu_cmn_send_smc_msg_with_param( 1481 smu, 1482 (max ? SMU_MSG_SetSoftMaxGfxClk : 1483 SMU_MSG_SetSoftMinGfxclk), 1484 freq & 0xffff, NULL); 1485 if (ret) { 1486 dev_err(smu->adev->dev, 1487 "Failed to set soft %s gfxclk !\n", 1488 max ? "max" : "min"); 1489 return ret; 1490 } 1491 } 1492 1493 if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT) && 1494 (feature_mask & FEATURE_MASK(FEATURE_DPM_UCLK))) { 1495 freq = dpm_context->dpm_tables.uclk_table.dpm_levels[level] 1496 .value; 1497 ret = smu_cmn_send_smc_msg_with_param( 1498 smu, 1499 (max ? SMU_MSG_SetSoftMaxByFreq : 1500 SMU_MSG_SetSoftMinByFreq), 1501 (PPCLK_UCLK << 16) | (freq & 0xffff), NULL); 1502 if (ret) { 1503 dev_err(smu->adev->dev, 1504 "Failed to set soft %s memclk !\n", 1505 max ? "max" : "min"); 1506 return ret; 1507 } 1508 } 1509 1510 if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT) && 1511 (feature_mask & FEATURE_MASK(FEATURE_DPM_SOCCLK))) { 1512 freq = dpm_context->dpm_tables.soc_table.dpm_levels[level].value; 1513 ret = smu_cmn_send_smc_msg_with_param( 1514 smu, 1515 (max ? SMU_MSG_SetSoftMaxByFreq : 1516 SMU_MSG_SetSoftMinByFreq), 1517 (PPCLK_SOCCLK << 16) | (freq & 0xffff), NULL); 1518 if (ret) { 1519 dev_err(smu->adev->dev, 1520 "Failed to set soft %s socclk !\n", 1521 max ? "max" : "min"); 1522 return ret; 1523 } 1524 } 1525 1526 return ret; 1527 } 1528 1529 static int smu_v13_0_6_force_clk_levels(struct smu_context *smu, 1530 enum smu_clk_type type, uint32_t mask) 1531 { 1532 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 1533 struct smu_dpm_table *single_dpm_table = NULL; 1534 uint32_t soft_min_level, soft_max_level; 1535 int ret = 0; 1536 1537 soft_min_level = mask ? (ffs(mask) - 1) : 0; 1538 soft_max_level = mask ? (fls(mask) - 1) : 0; 1539 1540 switch (type) { 1541 case SMU_SCLK: 1542 single_dpm_table = &(dpm_context->dpm_tables.gfx_table); 1543 if (soft_max_level >= single_dpm_table->count) { 1544 dev_err(smu->adev->dev, 1545 "Clock level specified %d is over max allowed %d\n", 1546 soft_max_level, single_dpm_table->count - 1); 1547 ret = -EINVAL; 1548 break; 1549 } 1550 1551 ret = smu_v13_0_6_upload_dpm_level( 1552 smu, false, FEATURE_MASK(FEATURE_DPM_GFXCLK), 1553 soft_min_level); 1554 if (ret) { 1555 dev_err(smu->adev->dev, 1556 "Failed to upload boot level to lowest!\n"); 1557 break; 1558 } 1559 1560 ret = smu_v13_0_6_upload_dpm_level( 1561 smu, true, FEATURE_MASK(FEATURE_DPM_GFXCLK), 1562 soft_max_level); 1563 if (ret) 1564 dev_err(smu->adev->dev, 1565 "Failed to upload dpm max level to highest!\n"); 1566 1567 break; 1568 1569 case SMU_MCLK: 1570 case SMU_SOCCLK: 1571 case SMU_FCLK: 1572 /* 1573 * Should not arrive here since smu_13_0_6 does not 1574 * support mclk/socclk/fclk softmin/softmax settings 1575 */ 1576 ret = -EINVAL; 1577 break; 1578 1579 default: 1580 break; 1581 } 1582 1583 return ret; 1584 } 1585 1586 static int smu_v13_0_6_get_current_activity_percent(struct smu_context *smu, 1587 enum amd_pp_sensors sensor, 1588 uint32_t *value) 1589 { 1590 int ret = 0; 1591 1592 if (!value) 1593 return -EINVAL; 1594 1595 switch (sensor) { 1596 case AMDGPU_PP_SENSOR_GPU_LOAD: 1597 ret = smu_v13_0_6_get_smu_metrics_data( 1598 smu, METRICS_AVERAGE_GFXACTIVITY, value); 1599 break; 1600 case AMDGPU_PP_SENSOR_MEM_LOAD: 1601 ret = smu_v13_0_6_get_smu_metrics_data( 1602 smu, METRICS_AVERAGE_MEMACTIVITY, value); 1603 break; 1604 default: 1605 dev_err(smu->adev->dev, 1606 "Invalid sensor for retrieving clock activity\n"); 1607 return -EINVAL; 1608 } 1609 1610 return ret; 1611 } 1612 1613 static int smu_v13_0_6_thermal_get_temperature(struct smu_context *smu, 1614 enum amd_pp_sensors sensor, 1615 uint32_t *value) 1616 { 1617 int ret = 0; 1618 1619 if (!value) 1620 return -EINVAL; 1621 1622 switch (sensor) { 1623 case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: 1624 ret = smu_v13_0_6_get_smu_metrics_data( 1625 smu, METRICS_TEMPERATURE_HOTSPOT, value); 1626 break; 1627 case AMDGPU_PP_SENSOR_MEM_TEMP: 1628 ret = smu_v13_0_6_get_smu_metrics_data( 1629 smu, METRICS_TEMPERATURE_MEM, value); 1630 break; 1631 default: 1632 dev_err(smu->adev->dev, "Invalid sensor for retrieving temp\n"); 1633 return -EINVAL; 1634 } 1635 1636 return ret; 1637 } 1638 1639 static int smu_v13_0_6_read_sensor(struct smu_context *smu, 1640 enum amd_pp_sensors sensor, void *data, 1641 uint32_t *size) 1642 { 1643 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 1644 int ret = 0; 1645 1646 if (amdgpu_ras_intr_triggered()) 1647 return 0; 1648 1649 if (!data || !size) 1650 return -EINVAL; 1651 1652 switch (sensor) { 1653 case AMDGPU_PP_SENSOR_MEM_LOAD: 1654 case AMDGPU_PP_SENSOR_GPU_LOAD: 1655 ret = smu_v13_0_6_get_current_activity_percent(smu, sensor, 1656 (uint32_t *)data); 1657 *size = 4; 1658 break; 1659 case AMDGPU_PP_SENSOR_GPU_INPUT_POWER: 1660 ret = smu_v13_0_6_get_smu_metrics_data(smu, 1661 METRICS_CURR_SOCKETPOWER, 1662 (uint32_t *)data); 1663 *size = 4; 1664 break; 1665 case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: 1666 case AMDGPU_PP_SENSOR_MEM_TEMP: 1667 ret = smu_v13_0_6_thermal_get_temperature(smu, sensor, 1668 (uint32_t *)data); 1669 *size = 4; 1670 break; 1671 case AMDGPU_PP_SENSOR_GFX_MCLK: 1672 ret = smu_v13_0_6_get_current_clk_freq_by_table( 1673 smu, SMU_UCLK, (uint32_t *)data); 1674 /* the output clock frequency in 10K unit */ 1675 *(uint32_t *)data *= 100; 1676 *size = 4; 1677 break; 1678 case AMDGPU_PP_SENSOR_GFX_SCLK: 1679 ret = smu_v13_0_6_get_current_clk_freq_by_table( 1680 smu, SMU_GFXCLK, (uint32_t *)data); 1681 *(uint32_t *)data *= 100; 1682 *size = 4; 1683 break; 1684 case AMDGPU_PP_SENSOR_VDDGFX: 1685 ret = smu_v13_0_get_gfx_vdd(smu, (uint32_t *)data); 1686 *size = 4; 1687 break; 1688 case AMDGPU_PP_SENSOR_VDDBOARD: 1689 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(BOARD_VOLTAGE))) { 1690 *(uint32_t *)data = dpm_context->board_volt; 1691 *size = 4; 1692 break; 1693 } else { 1694 ret = -EOPNOTSUPP; 1695 break; 1696 } 1697 case AMDGPU_PP_SENSOR_NODEPOWERLIMIT: 1698 case AMDGPU_PP_SENSOR_NODEPOWER: 1699 case AMDGPU_PP_SENSOR_GPPTRESIDENCY: 1700 case AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT: 1701 ret = smu_v13_0_12_get_npm_data(smu, sensor, (uint32_t *)data); 1702 if (ret) 1703 return ret; 1704 *size = 4; 1705 break; 1706 case AMDGPU_PP_SENSOR_UBB_POWER: 1707 case AMDGPU_PP_SENSOR_UBB_POWER_LIMIT: 1708 ret = smu_v13_0_12_get_system_power(smu, sensor, (uint32_t *)data); 1709 if (ret) 1710 return ret; 1711 *size = 4; 1712 break; 1713 case AMDGPU_PP_SENSOR_GPU_AVG_POWER: 1714 default: 1715 ret = -EOPNOTSUPP; 1716 break; 1717 } 1718 1719 return ret; 1720 } 1721 1722 static int smu_v13_0_6_get_power_limit(struct smu_context *smu, 1723 uint32_t *current_power_limit, 1724 uint32_t *default_power_limit, 1725 uint32_t *max_power_limit, 1726 uint32_t *min_power_limit) 1727 { 1728 struct smu_table_context *smu_table = &smu->smu_table; 1729 struct PPTable_t *pptable = 1730 (struct PPTable_t *)smu_table->driver_pptable; 1731 uint32_t power_limit = 0; 1732 int ret; 1733 1734 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetPptLimit, &power_limit); 1735 1736 if (ret) { 1737 dev_err(smu->adev->dev, "Couldn't get PPT limit"); 1738 return -EINVAL; 1739 } 1740 1741 if (current_power_limit) 1742 *current_power_limit = power_limit; 1743 if (default_power_limit) 1744 *default_power_limit = pptable->MaxSocketPowerLimit; 1745 1746 if (max_power_limit) { 1747 *max_power_limit = pptable->MaxSocketPowerLimit; 1748 } 1749 1750 if (min_power_limit) 1751 *min_power_limit = 0; 1752 return 0; 1753 } 1754 1755 static int smu_v13_0_6_set_power_limit(struct smu_context *smu, 1756 enum smu_ppt_limit_type limit_type, 1757 uint32_t limit) 1758 { 1759 struct smu_table_context *smu_table = &smu->smu_table; 1760 struct PPTable_t *pptable = 1761 (struct PPTable_t *)smu_table->driver_pptable; 1762 int ret; 1763 1764 if (limit_type == SMU_FAST_PPT_LIMIT) { 1765 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(FAST_PPT))) 1766 return -EOPNOTSUPP; 1767 if (limit > pptable->PPT1Max || limit < pptable->PPT1Min) { 1768 dev_err(smu->adev->dev, 1769 "New power limit (%d) should be between min %d max %d\n", 1770 limit, pptable->PPT1Min, pptable->PPT1Max); 1771 return -EINVAL; 1772 } 1773 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetFastPptLimit, 1774 limit, NULL); 1775 if (ret) 1776 dev_err(smu->adev->dev, "Set fast PPT limit failed!\n"); 1777 return ret; 1778 } 1779 1780 return smu_v13_0_set_power_limit(smu, limit_type, limit); 1781 } 1782 1783 static int smu_v13_0_6_get_ppt_limit(struct smu_context *smu, 1784 uint32_t *ppt_limit, 1785 enum smu_ppt_limit_type type, 1786 enum smu_ppt_limit_level level) 1787 { 1788 struct smu_table_context *smu_table = &smu->smu_table; 1789 struct PPTable_t *pptable = 1790 (struct PPTable_t *)smu_table->driver_pptable; 1791 int ret = 0; 1792 1793 if (type == SMU_FAST_PPT_LIMIT) { 1794 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(FAST_PPT))) 1795 return -EOPNOTSUPP; 1796 switch (level) { 1797 case SMU_PPT_LIMIT_MAX: 1798 *ppt_limit = pptable->PPT1Max; 1799 break; 1800 case SMU_PPT_LIMIT_CURRENT: 1801 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetFastPptLimit, ppt_limit); 1802 if (ret) 1803 dev_err(smu->adev->dev, "Get fast PPT limit failed!\n"); 1804 break; 1805 case SMU_PPT_LIMIT_DEFAULT: 1806 *ppt_limit = pptable->PPT1Default; 1807 break; 1808 case SMU_PPT_LIMIT_MIN: 1809 *ppt_limit = pptable->PPT1Min; 1810 break; 1811 default: 1812 return -EOPNOTSUPP; 1813 } 1814 return ret; 1815 } 1816 return -EOPNOTSUPP; 1817 } 1818 1819 static int smu_v13_0_6_irq_process(struct amdgpu_device *adev, 1820 struct amdgpu_irq_src *source, 1821 struct amdgpu_iv_entry *entry) 1822 { 1823 struct smu_context *smu = adev->powerplay.pp_handle; 1824 struct smu_power_context *smu_power = &smu->smu_power; 1825 struct smu_13_0_power_context *power_context = smu_power->power_context; 1826 uint32_t client_id = entry->client_id; 1827 uint32_t ctxid = entry->src_data[0]; 1828 uint32_t src_id = entry->src_id; 1829 uint32_t data; 1830 1831 if (client_id == SOC15_IH_CLIENTID_MP1) { 1832 if (src_id == IH_INTERRUPT_ID_TO_DRIVER) { 1833 /* ACK SMUToHost interrupt */ 1834 data = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL); 1835 data = REG_SET_FIELD(data, MP1_SMN_IH_SW_INT_CTRL, INT_ACK, 1); 1836 WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL, data); 1837 /* 1838 * ctxid is used to distinguish different events for SMCToHost 1839 * interrupt. 1840 */ 1841 switch (ctxid) { 1842 case IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING: 1843 /* 1844 * Increment the throttle interrupt counter 1845 */ 1846 atomic64_inc(&smu->throttle_int_counter); 1847 1848 if (!atomic_read(&adev->throttling_logging_enabled)) 1849 return 0; 1850 1851 /* This uses the new method which fixes the 1852 * incorrect throttling status reporting 1853 * through metrics table. For older FWs, 1854 * it will be ignored. 1855 */ 1856 if (__ratelimit(&adev->throttling_logging_rs)) { 1857 atomic_set( 1858 &power_context->throttle_status, 1859 entry->src_data[1]); 1860 schedule_work(&smu->throttling_logging_work); 1861 } 1862 break; 1863 default: 1864 dev_dbg(adev->dev, "Unhandled context id %d from client:%d!\n", 1865 ctxid, client_id); 1866 break; 1867 } 1868 } 1869 } 1870 1871 return 0; 1872 } 1873 1874 static int smu_v13_0_6_set_irq_state(struct amdgpu_device *adev, 1875 struct amdgpu_irq_src *source, 1876 unsigned tyep, 1877 enum amdgpu_interrupt_state state) 1878 { 1879 uint32_t val = 0; 1880 1881 switch (state) { 1882 case AMDGPU_IRQ_STATE_DISABLE: 1883 /* For MP1 SW irqs */ 1884 val = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL); 1885 val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT_CTRL, INT_MASK, 1); 1886 WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL, val); 1887 1888 break; 1889 case AMDGPU_IRQ_STATE_ENABLE: 1890 /* For MP1 SW irqs */ 1891 val = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT); 1892 val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT, ID, 0xFE); 1893 val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT, VALID, 0); 1894 WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT, val); 1895 1896 val = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL); 1897 val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT_CTRL, INT_MASK, 0); 1898 WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL, val); 1899 1900 break; 1901 default: 1902 break; 1903 } 1904 1905 return 0; 1906 } 1907 1908 static const struct amdgpu_irq_src_funcs smu_v13_0_6_irq_funcs = { 1909 .set = smu_v13_0_6_set_irq_state, 1910 .process = smu_v13_0_6_irq_process, 1911 }; 1912 1913 static int smu_v13_0_6_register_irq_handler(struct smu_context *smu) 1914 { 1915 struct amdgpu_device *adev = smu->adev; 1916 struct amdgpu_irq_src *irq_src = &smu->irq_source; 1917 int ret = 0; 1918 1919 if (amdgpu_sriov_vf(adev)) 1920 return 0; 1921 1922 irq_src->num_types = 1; 1923 irq_src->funcs = &smu_v13_0_6_irq_funcs; 1924 1925 ret = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_MP1, 1926 IH_INTERRUPT_ID_TO_DRIVER, 1927 irq_src); 1928 if (ret) 1929 return ret; 1930 1931 return ret; 1932 } 1933 1934 static int smu_v13_0_6_notify_unload(struct smu_context *smu) 1935 { 1936 if (amdgpu_in_reset(smu->adev)) 1937 return 0; 1938 1939 dev_dbg(smu->adev->dev, "Notify PMFW about driver unload"); 1940 /* Ignore return, just intimate FW that driver is not going to be there */ 1941 smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL); 1942 1943 return 0; 1944 } 1945 1946 static int smu_v13_0_6_mca_set_debug_mode(struct smu_context *smu, bool enable) 1947 { 1948 /* NOTE: this ClearMcaOnRead message is only supported for smu version 85.72.0 or higher */ 1949 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(MCA_DEBUG_MODE))) 1950 return 0; 1951 1952 return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ClearMcaOnRead, 1953 enable ? 0 : ClearMcaOnRead_UE_FLAG_MASK | ClearMcaOnRead_CE_POLL_MASK, 1954 NULL); 1955 } 1956 1957 static int smu_v13_0_6_system_features_control(struct smu_context *smu, 1958 bool enable) 1959 { 1960 struct amdgpu_device *adev = smu->adev; 1961 int ret = 0; 1962 1963 if (amdgpu_sriov_vf(adev)) 1964 return 0; 1965 1966 if (enable) { 1967 if (!(adev->flags & AMD_IS_APU)) 1968 ret = smu_v13_0_system_features_control(smu, enable); 1969 } else { 1970 /* Notify FW that the device is no longer driver managed */ 1971 smu_v13_0_6_notify_unload(smu); 1972 } 1973 1974 return ret; 1975 } 1976 1977 static int smu_v13_0_6_set_gfx_soft_freq_limited_range(struct smu_context *smu, 1978 uint32_t min, 1979 uint32_t max) 1980 { 1981 int ret; 1982 1983 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxGfxClk, 1984 max & 0xffff, NULL); 1985 if (ret) 1986 return ret; 1987 1988 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMinGfxclk, 1989 min & 0xffff, NULL); 1990 1991 return ret; 1992 } 1993 1994 static int smu_v13_0_6_set_performance_level(struct smu_context *smu, 1995 enum amd_dpm_forced_level level) 1996 { 1997 struct smu_dpm_context *smu_dpm = &(smu->smu_dpm); 1998 struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context; 1999 struct smu_dpm_table *gfx_table = &dpm_context->dpm_tables.gfx_table; 2000 struct smu_dpm_table *uclk_table = &dpm_context->dpm_tables.uclk_table; 2001 struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; 2002 int ret; 2003 2004 /* Disable determinism if switching to another mode */ 2005 if ((smu_dpm->dpm_level == AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) && 2006 (level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)) { 2007 smu_cmn_send_smc_msg(smu, SMU_MSG_DisableDeterminism, NULL); 2008 pstate_table->gfxclk_pstate.curr.max = 2009 SMU_DPM_TABLE_MAX(gfx_table); 2010 } 2011 2012 switch (level) { 2013 case AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM: 2014 return 0; 2015 2016 case AMD_DPM_FORCED_LEVEL_AUTO: 2017 if ((SMU_DPM_TABLE_MIN(gfx_table) != 2018 pstate_table->gfxclk_pstate.curr.min) || 2019 (SMU_DPM_TABLE_MAX(gfx_table) != 2020 pstate_table->gfxclk_pstate.curr.max)) { 2021 ret = smu_v13_0_6_set_gfx_soft_freq_limited_range( 2022 smu, SMU_DPM_TABLE_MIN(gfx_table), 2023 SMU_DPM_TABLE_MAX(gfx_table)); 2024 if (ret) 2025 return ret; 2026 2027 pstate_table->gfxclk_pstate.curr.min = 2028 SMU_DPM_TABLE_MIN(gfx_table); 2029 pstate_table->gfxclk_pstate.curr.max = 2030 SMU_DPM_TABLE_MAX(gfx_table); 2031 } 2032 2033 if (SMU_DPM_TABLE_MAX(uclk_table) != 2034 pstate_table->uclk_pstate.curr.max) { 2035 /* Min UCLK is not expected to be changed */ 2036 ret = smu_v13_0_set_soft_freq_limited_range( 2037 smu, SMU_UCLK, 0, SMU_DPM_TABLE_MAX(uclk_table), 2038 false); 2039 if (ret) 2040 return ret; 2041 pstate_table->uclk_pstate.curr.max = 2042 SMU_DPM_TABLE_MAX(uclk_table); 2043 } 2044 smu_v13_0_reset_custom_level(smu); 2045 2046 return 0; 2047 case AMD_DPM_FORCED_LEVEL_MANUAL: 2048 return 0; 2049 default: 2050 break; 2051 } 2052 2053 return -EOPNOTSUPP; 2054 } 2055 2056 static int smu_v13_0_6_set_soft_freq_limited_range(struct smu_context *smu, 2057 enum smu_clk_type clk_type, 2058 uint32_t min, uint32_t max, 2059 bool automatic) 2060 { 2061 struct smu_dpm_context *smu_dpm = &(smu->smu_dpm); 2062 struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context; 2063 struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; 2064 struct amdgpu_device *adev = smu->adev; 2065 uint32_t min_clk; 2066 uint32_t max_clk; 2067 int ret = 0; 2068 2069 if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK && 2070 clk_type != SMU_UCLK && clk_type != SMU_FCLK) 2071 return -EINVAL; 2072 2073 if ((smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) && 2074 (smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)) 2075 return -EINVAL; 2076 2077 if (smu_dpm->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { 2078 if (min > max) { 2079 dev_err(smu->adev->dev, 2080 "Minimum clk should be less/equal to the maximum allowed clock\n"); 2081 return -EINVAL; 2082 } 2083 2084 if (clk_type == SMU_GFXCLK) { 2085 if ((min == pstate_table->gfxclk_pstate.curr.min) && 2086 (max == pstate_table->gfxclk_pstate.curr.max)) 2087 return 0; 2088 2089 ret = smu_v13_0_6_set_gfx_soft_freq_limited_range( 2090 smu, min, max); 2091 if (!ret) { 2092 pstate_table->gfxclk_pstate.curr.min = min; 2093 pstate_table->gfxclk_pstate.curr.max = max; 2094 } 2095 } 2096 2097 if (clk_type == SMU_UCLK) { 2098 if (max == pstate_table->uclk_pstate.curr.max) 2099 return 0; 2100 /* For VF, only allowed in FW versions 85.102 or greater */ 2101 if (!smu_v13_0_6_cap_supported(smu, 2102 SMU_CAP(SET_UCLK_MAX))) 2103 return -EOPNOTSUPP; 2104 /* Only max clock limiting is allowed for UCLK */ 2105 ret = smu_v13_0_set_soft_freq_limited_range( 2106 smu, SMU_UCLK, 0, max, false); 2107 if (!ret) 2108 pstate_table->uclk_pstate.curr.max = max; 2109 } 2110 2111 if (clk_type == SMU_FCLK) { 2112 if (max == pstate_table->fclk_pstate.curr.max) 2113 return 0; 2114 2115 ret = smu_v13_0_set_soft_freq_limited_range(smu, SMU_FCLK, 0, max, false); 2116 if (!ret) 2117 pstate_table->fclk_pstate.curr.max = max; 2118 } 2119 2120 return ret; 2121 } 2122 2123 if (smu_dpm->dpm_level == AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) { 2124 min_clk = SMU_DPM_TABLE_MIN(&dpm_context->dpm_tables.gfx_table); 2125 max_clk = SMU_DPM_TABLE_MAX(&dpm_context->dpm_tables.gfx_table); 2126 if (!max || (max < min_clk) || (max > max_clk)) { 2127 dev_warn( 2128 adev->dev, 2129 "Invalid max frequency %d MHz specified for determinism\n", 2130 max); 2131 return -EINVAL; 2132 } 2133 2134 /* Restore default min/max clocks and enable determinism */ 2135 ret = smu_v13_0_6_set_gfx_soft_freq_limited_range(smu, min_clk, 2136 max_clk); 2137 if (!ret) { 2138 usleep_range(500, 1000); 2139 ret = smu_cmn_send_smc_msg_with_param( 2140 smu, SMU_MSG_EnableDeterminism, max, NULL); 2141 if (ret) { 2142 dev_err(adev->dev, 2143 "Failed to enable determinism at GFX clock %d MHz\n", 2144 max); 2145 } else { 2146 pstate_table->gfxclk_pstate.curr.min = min_clk; 2147 pstate_table->gfxclk_pstate.curr.max = max; 2148 } 2149 } 2150 } 2151 2152 return ret; 2153 } 2154 2155 static int smu_v13_0_6_usr_edit_dpm_table(struct smu_context *smu, 2156 enum PP_OD_DPM_TABLE_COMMAND type, 2157 long input[], uint32_t size) 2158 { 2159 struct smu_dpm_context *smu_dpm = &(smu->smu_dpm); 2160 struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context; 2161 struct smu_dpm_table *uclk_table = &dpm_context->dpm_tables.uclk_table; 2162 struct smu_dpm_table *fclk_table = &dpm_context->dpm_tables.fclk_table; 2163 struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; 2164 uint32_t min_clk; 2165 uint32_t max_clk; 2166 int ret = 0; 2167 2168 /* Only allowed in manual or determinism mode */ 2169 if ((smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) && 2170 (smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)) 2171 return -EINVAL; 2172 2173 switch (type) { 2174 case PP_OD_EDIT_SCLK_VDDC_TABLE: 2175 if (size != 2) { 2176 dev_err(smu->adev->dev, 2177 "Input parameter number not correct\n"); 2178 return -EINVAL; 2179 } 2180 min_clk = SMU_DPM_TABLE_MIN(&dpm_context->dpm_tables.gfx_table); 2181 max_clk = SMU_DPM_TABLE_MAX(&dpm_context->dpm_tables.gfx_table); 2182 if (input[0] == 0) { 2183 if (input[1] < min_clk) { 2184 dev_warn( 2185 smu->adev->dev, 2186 "Minimum GFX clk (%ld) MHz specified is less than the minimum allowed (%d) MHz\n", 2187 input[1], min_clk); 2188 pstate_table->gfxclk_pstate.custom.min = 2189 pstate_table->gfxclk_pstate.curr.min; 2190 return -EINVAL; 2191 } 2192 2193 pstate_table->gfxclk_pstate.custom.min = input[1]; 2194 } else if (input[0] == 1) { 2195 if (input[1] > max_clk) { 2196 dev_warn( 2197 smu->adev->dev, 2198 "Maximum GFX clk (%ld) MHz specified is greater than the maximum allowed (%d) MHz\n", 2199 input[1], max_clk); 2200 pstate_table->gfxclk_pstate.custom.max = 2201 pstate_table->gfxclk_pstate.curr.max; 2202 return -EINVAL; 2203 } 2204 2205 pstate_table->gfxclk_pstate.custom.max = input[1]; 2206 } else { 2207 return -EINVAL; 2208 } 2209 break; 2210 case PP_OD_EDIT_MCLK_VDDC_TABLE: 2211 if (size != 2) { 2212 dev_err(smu->adev->dev, 2213 "Input parameter number not correct\n"); 2214 return -EINVAL; 2215 } 2216 2217 if (!smu_cmn_feature_is_enabled(smu, 2218 SMU_FEATURE_DPM_UCLK_BIT)) { 2219 dev_warn(smu->adev->dev, 2220 "UCLK_LIMITS setting not supported!\n"); 2221 return -EOPNOTSUPP; 2222 } 2223 max_clk = 2224 SMU_DPM_TABLE_MAX(&dpm_context->dpm_tables.uclk_table); 2225 if (input[0] == 0) { 2226 dev_info(smu->adev->dev, 2227 "Setting min UCLK level is not supported"); 2228 return -EINVAL; 2229 } else if (input[0] == 1) { 2230 if (input[1] > max_clk) { 2231 dev_warn( 2232 smu->adev->dev, 2233 "Maximum UCLK (%ld) MHz specified is greater than the maximum allowed (%d) MHz\n", 2234 input[1], max_clk); 2235 pstate_table->uclk_pstate.custom.max = 2236 pstate_table->uclk_pstate.curr.max; 2237 return -EINVAL; 2238 } 2239 2240 pstate_table->uclk_pstate.custom.max = input[1]; 2241 } 2242 break; 2243 case PP_OD_EDIT_FCLK_TABLE: 2244 if (size != 2) { 2245 dev_err(smu->adev->dev, 2246 "Input parameter number not correct\n"); 2247 return -EINVAL; 2248 } 2249 2250 if (!smu_cmn_feature_is_enabled(smu, 2251 SMU_FEATURE_DPM_FCLK_BIT)) { 2252 dev_warn(smu->adev->dev, 2253 "FCLK limits setting not supported!\n"); 2254 return -EOPNOTSUPP; 2255 } 2256 2257 max_clk = SMU_DPM_TABLE_MAX(&dpm_context->dpm_tables.fclk_table); 2258 if (input[0] == 0) { 2259 dev_info(smu->adev->dev, 2260 "Setting min FCLK level is not supported\n"); 2261 return -EOPNOTSUPP; 2262 } else if (input[0] == 1) { 2263 if (input[1] > max_clk) { 2264 dev_warn(smu->adev->dev, 2265 "Maximum FCLK (%ld) MHz specified is greater than the maximum allowed (%d) MHz\n", 2266 input[1], max_clk); 2267 pstate_table->fclk_pstate.custom.max = 2268 pstate_table->fclk_pstate.curr.max; 2269 return -EINVAL; 2270 } 2271 2272 pstate_table->fclk_pstate.custom.max = input[1]; 2273 } else { 2274 return -EINVAL; 2275 } 2276 break; 2277 2278 case PP_OD_RESTORE_DEFAULT_TABLE: 2279 if (size != 0) { 2280 dev_err(smu->adev->dev, 2281 "Input parameter number not correct\n"); 2282 return -EINVAL; 2283 } else { 2284 /* Use the default frequencies for manual and determinism mode */ 2285 min_clk = SMU_DPM_TABLE_MIN( 2286 &dpm_context->dpm_tables.gfx_table); 2287 max_clk = SMU_DPM_TABLE_MAX( 2288 &dpm_context->dpm_tables.gfx_table); 2289 2290 ret = smu_v13_0_6_set_soft_freq_limited_range( 2291 smu, SMU_GFXCLK, min_clk, max_clk, false); 2292 2293 if (ret) 2294 return ret; 2295 2296 if (SMU_DPM_TABLE_MAX(uclk_table) != 2297 pstate_table->uclk_pstate.curr.max) { 2298 min_clk = SMU_DPM_TABLE_MIN(&dpm_context->dpm_tables.uclk_table); 2299 max_clk = SMU_DPM_TABLE_MAX(&dpm_context->dpm_tables.uclk_table); 2300 ret = smu_v13_0_6_set_soft_freq_limited_range(smu, 2301 SMU_UCLK, min_clk, 2302 max_clk, false); 2303 if (ret) 2304 return ret; 2305 } 2306 2307 if (SMU_DPM_TABLE_MAX(fclk_table) != 2308 pstate_table->fclk_pstate.curr.max) { 2309 max_clk = SMU_DPM_TABLE_MAX(&dpm_context->dpm_tables.fclk_table); 2310 min_clk = SMU_DPM_TABLE_MIN(&dpm_context->dpm_tables.fclk_table); 2311 ret = smu_v13_0_6_set_soft_freq_limited_range(smu, 2312 SMU_FCLK, min_clk, 2313 max_clk, false); 2314 if (ret) 2315 return ret; 2316 } 2317 smu_v13_0_reset_custom_level(smu); 2318 } 2319 break; 2320 case PP_OD_COMMIT_DPM_TABLE: 2321 if (size != 0) { 2322 dev_err(smu->adev->dev, 2323 "Input parameter number not correct\n"); 2324 return -EINVAL; 2325 } else { 2326 if (!pstate_table->gfxclk_pstate.custom.min) 2327 pstate_table->gfxclk_pstate.custom.min = 2328 pstate_table->gfxclk_pstate.curr.min; 2329 2330 if (!pstate_table->gfxclk_pstate.custom.max) 2331 pstate_table->gfxclk_pstate.custom.max = 2332 pstate_table->gfxclk_pstate.curr.max; 2333 2334 min_clk = pstate_table->gfxclk_pstate.custom.min; 2335 max_clk = pstate_table->gfxclk_pstate.custom.max; 2336 2337 ret = smu_v13_0_6_set_soft_freq_limited_range( 2338 smu, SMU_GFXCLK, min_clk, max_clk, false); 2339 2340 if (ret) 2341 return ret; 2342 2343 if (pstate_table->fclk_pstate.custom.max) { 2344 min_clk = pstate_table->fclk_pstate.curr.min; 2345 max_clk = pstate_table->fclk_pstate.custom.max; 2346 ret = smu_v13_0_6_set_soft_freq_limited_range(smu, 2347 SMU_FCLK, min_clk, 2348 max_clk, false); 2349 if (ret) 2350 return ret; 2351 } 2352 2353 if (!pstate_table->uclk_pstate.custom.max) 2354 return 0; 2355 2356 min_clk = pstate_table->uclk_pstate.curr.min; 2357 max_clk = pstate_table->uclk_pstate.custom.max; 2358 return smu_v13_0_6_set_soft_freq_limited_range( 2359 smu, SMU_UCLK, min_clk, max_clk, false); 2360 } 2361 break; 2362 default: 2363 return -ENOSYS; 2364 } 2365 2366 return ret; 2367 } 2368 2369 static int smu_v13_0_6_get_enabled_mask(struct smu_context *smu, 2370 struct smu_feature_bits *feature_mask) 2371 { 2372 int ret; 2373 2374 ret = smu_cmn_get_enabled_mask(smu, feature_mask); 2375 2376 if (ret == -EIO && !smu_v13_0_6_cap_supported(smu, SMU_CAP(DPM))) { 2377 smu_feature_bits_clearall(feature_mask); 2378 ret = 0; 2379 } 2380 2381 return ret; 2382 } 2383 2384 static bool smu_v13_0_6_is_dpm_running(struct smu_context *smu) 2385 { 2386 int ret; 2387 struct smu_feature_bits feature_enabled; 2388 2389 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12)) 2390 return smu_v13_0_12_is_dpm_running(smu); 2391 2392 ret = smu_v13_0_6_get_enabled_mask(smu, &feature_enabled); 2393 2394 if (ret) 2395 return false; 2396 2397 return smu_feature_bits_test_mask(&feature_enabled, 2398 smu_v13_0_6_dpm_features.bits); 2399 } 2400 2401 static int smu_v13_0_6_request_i2c_xfer(struct smu_context *smu, 2402 void *table_data) 2403 { 2404 struct smu_table_context *smu_table = &smu->smu_table; 2405 struct smu_table *table = &smu_table->driver_table; 2406 struct amdgpu_device *adev = smu->adev; 2407 uint32_t table_size; 2408 int ret = 0; 2409 2410 if (!table_data) 2411 return -EINVAL; 2412 2413 table_size = smu_table->tables[SMU_TABLE_I2C_COMMANDS].size; 2414 2415 ret = smu_cmn_vram_cpy(smu, table->cpu_addr, table_data, table_size); 2416 if (ret) 2417 return ret; 2418 2419 /* Flush hdp cache */ 2420 amdgpu_hdp_flush(adev, NULL); 2421 2422 return smu_cmn_send_smc_msg(smu, SMU_MSG_RequestI2cTransaction, 2423 NULL); 2424 } 2425 2426 static int smu_v13_0_6_i2c_xfer(struct i2c_adapter *i2c_adap, 2427 struct i2c_msg *msg, int num_msgs) 2428 { 2429 struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(i2c_adap); 2430 struct amdgpu_device *adev = smu_i2c->adev; 2431 struct smu_context *smu = adev->powerplay.pp_handle; 2432 struct smu_table_context *smu_table = &smu->smu_table; 2433 struct smu_table *table = &smu_table->driver_table; 2434 SwI2cRequest_t *req, *res = (SwI2cRequest_t *)table->cpu_addr; 2435 int i, j, r, c; 2436 u16 dir; 2437 2438 if (!adev->pm.dpm_enabled) 2439 return -EBUSY; 2440 2441 req = kzalloc_obj(*req); 2442 if (!req) 2443 return -ENOMEM; 2444 2445 req->I2CcontrollerPort = smu_i2c->port; 2446 req->I2CSpeed = I2C_SPEED_FAST_400K; 2447 req->SlaveAddress = msg[0].addr << 1; /* wants an 8-bit address */ 2448 dir = msg[0].flags & I2C_M_RD; 2449 2450 for (c = i = 0; i < num_msgs; i++) { 2451 for (j = 0; j < msg[i].len; j++, c++) { 2452 SwI2cCmd_t *cmd = &req->SwI2cCmds[c]; 2453 2454 if (!(msg[i].flags & I2C_M_RD)) { 2455 /* write */ 2456 cmd->CmdConfig |= CMDCONFIG_READWRITE_MASK; 2457 cmd->ReadWriteData = msg[i].buf[j]; 2458 } 2459 2460 if ((dir ^ msg[i].flags) & I2C_M_RD) { 2461 /* The direction changes. 2462 */ 2463 dir = msg[i].flags & I2C_M_RD; 2464 cmd->CmdConfig |= CMDCONFIG_RESTART_MASK; 2465 } 2466 2467 req->NumCmds++; 2468 2469 /* 2470 * Insert STOP if we are at the last byte of either last 2471 * message for the transaction or the client explicitly 2472 * requires a STOP at this particular message. 2473 */ 2474 if ((j == msg[i].len - 1) && 2475 ((i == num_msgs - 1) || (msg[i].flags & I2C_M_STOP))) { 2476 cmd->CmdConfig &= ~CMDCONFIG_RESTART_MASK; 2477 cmd->CmdConfig |= CMDCONFIG_STOP_MASK; 2478 } 2479 } 2480 } 2481 mutex_lock(&adev->pm.mutex); 2482 r = smu_v13_0_6_request_i2c_xfer(smu, req); 2483 if (r) { 2484 /* Retry once, in case of an i2c collision */ 2485 r = smu_v13_0_6_request_i2c_xfer(smu, req); 2486 if (r) 2487 goto fail; 2488 } 2489 2490 for (c = i = 0; i < num_msgs; i++) { 2491 if (!(msg[i].flags & I2C_M_RD)) { 2492 c += msg[i].len; 2493 continue; 2494 } 2495 for (j = 0; j < msg[i].len; j++, c++) { 2496 SwI2cCmd_t *cmd = &res->SwI2cCmds[c]; 2497 2498 msg[i].buf[j] = cmd->ReadWriteData; 2499 } 2500 } 2501 r = num_msgs; 2502 fail: 2503 mutex_unlock(&adev->pm.mutex); 2504 kfree(req); 2505 return r; 2506 } 2507 2508 static u32 smu_v13_0_6_i2c_func(struct i2c_adapter *adap) 2509 { 2510 return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; 2511 } 2512 2513 static const struct i2c_algorithm smu_v13_0_6_i2c_algo = { 2514 .master_xfer = smu_v13_0_6_i2c_xfer, 2515 .functionality = smu_v13_0_6_i2c_func, 2516 }; 2517 2518 static const struct i2c_adapter_quirks smu_v13_0_6_i2c_control_quirks = { 2519 .flags = I2C_AQ_COMB | I2C_AQ_COMB_SAME_ADDR | I2C_AQ_NO_ZERO_LEN, 2520 .max_read_len = MAX_SW_I2C_COMMANDS, 2521 .max_write_len = MAX_SW_I2C_COMMANDS, 2522 .max_comb_1st_msg_len = 2, 2523 .max_comb_2nd_msg_len = MAX_SW_I2C_COMMANDS - 2, 2524 }; 2525 2526 static int smu_v13_0_6_i2c_control_init(struct smu_context *smu) 2527 { 2528 struct amdgpu_device *adev = smu->adev; 2529 int res, i; 2530 2531 for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { 2532 struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; 2533 struct i2c_adapter *control = &smu_i2c->adapter; 2534 2535 smu_i2c->adev = adev; 2536 smu_i2c->port = i; 2537 mutex_init(&smu_i2c->mutex); 2538 control->owner = THIS_MODULE; 2539 control->dev.parent = &adev->pdev->dev; 2540 control->algo = &smu_v13_0_6_i2c_algo; 2541 snprintf(control->name, sizeof(control->name), "AMDGPU SMU %d", i); 2542 control->quirks = &smu_v13_0_6_i2c_control_quirks; 2543 i2c_set_adapdata(control, smu_i2c); 2544 2545 res = devm_i2c_add_adapter(adev->dev, control); 2546 if (res) { 2547 DRM_ERROR("Failed to register hw i2c, err: %d\n", res); 2548 return res; 2549 } 2550 } 2551 2552 adev->pm.ras_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; 2553 adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; 2554 2555 return 0; 2556 } 2557 2558 static void smu_v13_0_6_i2c_control_fini(struct smu_context *smu) 2559 { 2560 struct amdgpu_device *adev = smu->adev; 2561 2562 adev->pm.ras_eeprom_i2c_bus = NULL; 2563 adev->pm.fru_eeprom_i2c_bus = NULL; 2564 } 2565 2566 static void smu_v13_0_6_get_unique_id(struct smu_context *smu) 2567 { 2568 struct amdgpu_device *adev = smu->adev; 2569 struct smu_table_context *smu_table = &smu->smu_table; 2570 struct PPTable_t *pptable = 2571 (struct PPTable_t *)smu_table->driver_pptable; 2572 2573 adev->unique_id = pptable->PublicSerialNumber_AID; 2574 } 2575 2576 static int smu_v13_0_6_get_bamaco_support(struct smu_context *smu) 2577 { 2578 /* smu_13_0_6 does not support baco */ 2579 2580 return 0; 2581 } 2582 2583 static const char *const throttling_logging_label[] = { 2584 [THROTTLER_PROCHOT_BIT] = "Prochot", 2585 [THROTTLER_PPT_BIT] = "PPT", 2586 [THROTTLER_THERMAL_SOCKET_BIT] = "SOC", 2587 [THROTTLER_THERMAL_VR_BIT] = "VR", 2588 [THROTTLER_THERMAL_HBM_BIT] = "HBM" 2589 }; 2590 2591 static void smu_v13_0_6_log_thermal_throttling_event(struct smu_context *smu) 2592 { 2593 int throttler_idx, throttling_events = 0, buf_idx = 0; 2594 struct amdgpu_device *adev = smu->adev; 2595 uint32_t throttler_status; 2596 char log_buf[256]; 2597 2598 throttler_status = smu_v13_0_6_get_throttler_status(smu); 2599 if (!throttler_status) 2600 return; 2601 2602 memset(log_buf, 0, sizeof(log_buf)); 2603 for (throttler_idx = 0; 2604 throttler_idx < ARRAY_SIZE(throttling_logging_label); 2605 throttler_idx++) { 2606 if (throttler_status & (1U << throttler_idx)) { 2607 throttling_events++; 2608 buf_idx += snprintf( 2609 log_buf + buf_idx, sizeof(log_buf) - buf_idx, 2610 "%s%s", throttling_events > 1 ? " and " : "", 2611 throttling_logging_label[throttler_idx]); 2612 if (buf_idx >= sizeof(log_buf)) { 2613 dev_err(adev->dev, "buffer overflow!\n"); 2614 log_buf[sizeof(log_buf) - 1] = '\0'; 2615 break; 2616 } 2617 } 2618 } 2619 2620 dev_warn(adev->dev, 2621 "WARN: GPU is throttled, expect performance decrease. %s.\n", 2622 log_buf); 2623 kgd2kfd_smi_event_throttle( 2624 smu->adev->kfd.dev, 2625 smu_cmn_get_indep_throttler_status(throttler_status, 2626 smu_v13_0_6_throttler_map)); 2627 } 2628 2629 static int 2630 smu_v13_0_6_get_current_pcie_link_width_level(struct smu_context *smu) 2631 { 2632 struct amdgpu_device *adev = smu->adev; 2633 2634 return REG_GET_FIELD(RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL), 2635 PCIE_LC_LINK_WIDTH_CNTL, LC_LINK_WIDTH_RD); 2636 } 2637 2638 static int smu_v13_0_6_get_current_pcie_link_speed(struct smu_context *smu) 2639 { 2640 struct amdgpu_device *adev = smu->adev; 2641 uint32_t speed_level; 2642 uint32_t esm_ctrl; 2643 2644 /* TODO: confirm this on real target */ 2645 esm_ctrl = RREG32_PCIE(smnPCIE_ESM_CTRL); 2646 if ((esm_ctrl >> 15) & 0x1) 2647 return (((esm_ctrl >> 8) & 0x7F) + 128); 2648 2649 speed_level = (RREG32_PCIE(smnPCIE_LC_SPEED_CNTL) & 2650 PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) 2651 >> PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT; 2652 if (speed_level > LINK_SPEED_MAX) 2653 speed_level = 0; 2654 2655 return pcie_gen_to_speed(speed_level + 1); 2656 } 2657 2658 static ssize_t smu_v13_0_6_get_xcp_metrics(struct smu_context *smu, int xcp_id, 2659 void *table) 2660 { 2661 const u8 num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS_4_0_3; 2662 int version = smu_v13_0_6_get_metrics_version(smu); 2663 struct smu_v13_0_6_partition_metrics *xcp_metrics; 2664 struct smu_table_context *smu_table = &smu->smu_table; 2665 struct amdgpu_device *adev = smu->adev; 2666 int ret, inst, i, j, k, idx; 2667 MetricsTableV0_t *metrics_v0; 2668 MetricsTableV1_t *metrics_v1; 2669 MetricsTableV2_t *metrics_v2; 2670 struct amdgpu_xcp *xcp; 2671 u32 inst_mask; 2672 bool per_inst; 2673 2674 if (!table) 2675 return sizeof(*xcp_metrics); 2676 2677 for_each_xcp(adev->xcp_mgr, xcp, i) { 2678 if (xcp->id == xcp_id) 2679 break; 2680 } 2681 if (i == adev->xcp_mgr->num_xcps) 2682 return -EINVAL; 2683 2684 xcp_metrics = (struct smu_v13_0_6_partition_metrics *)table; 2685 smu_v13_0_6_partition_metrics_init(xcp_metrics, 1, 1); 2686 2687 ret = smu_v13_0_6_get_metrics_table(smu, NULL, false); 2688 if (ret) 2689 return ret; 2690 2691 metrics_v0 = (MetricsTableV0_t *)smu_table->metrics_table; 2692 2693 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == 2694 IP_VERSION(13, 0, 12) && 2695 smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) 2696 return smu_v13_0_12_get_xcp_metrics(smu, xcp, table, 2697 metrics_v0); 2698 2699 metrics_v1 = (MetricsTableV1_t *)smu_table->metrics_table; 2700 metrics_v2 = (MetricsTableV2_t *)smu_table->metrics_table; 2701 2702 per_inst = smu_v13_0_6_cap_supported(smu, SMU_CAP(PER_INST_METRICS)); 2703 2704 amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask); 2705 idx = 0; 2706 for_each_inst(k, inst_mask) { 2707 /* Both JPEG and VCN has same instances */ 2708 inst = GET_INST(VCN, k); 2709 2710 for (j = 0; j < num_jpeg_rings; ++j) { 2711 xcp_metrics->jpeg_busy[(idx * num_jpeg_rings) + j] = 2712 SMUQ10_ROUND(GET_METRIC_FIELD( 2713 JpegBusy, 2714 version)[(inst * num_jpeg_rings) + j]); 2715 } 2716 xcp_metrics->vcn_busy[idx] = 2717 SMUQ10_ROUND(GET_METRIC_FIELD(VcnBusy, version)[inst]); 2718 2719 xcp_metrics->current_vclk0[idx] = SMUQ10_ROUND( 2720 GET_METRIC_FIELD(VclkFrequency, version)[inst]); 2721 xcp_metrics->current_dclk0[idx] = SMUQ10_ROUND( 2722 GET_METRIC_FIELD(DclkFrequency, version)[inst]); 2723 xcp_metrics->current_socclk[idx] = SMUQ10_ROUND( 2724 GET_METRIC_FIELD(SocclkFrequency, version)[inst]); 2725 2726 idx++; 2727 } 2728 2729 xcp_metrics->current_uclk = 2730 SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency, version)); 2731 2732 if (per_inst) { 2733 amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &inst_mask); 2734 idx = 0; 2735 for_each_inst(k, inst_mask) { 2736 inst = GET_INST(GC, k); 2737 xcp_metrics->current_gfxclk[idx] = 2738 SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency, 2739 version)[inst]); 2740 2741 xcp_metrics->gfx_busy_inst[idx] = SMUQ10_ROUND( 2742 GET_GPU_METRIC_FIELD(GfxBusy, version)[inst]); 2743 xcp_metrics->gfx_busy_acc[idx] = SMUQ10_ROUND( 2744 GET_GPU_METRIC_FIELD(GfxBusyAcc, 2745 version)[inst]); 2746 if (smu_v13_0_6_cap_supported( 2747 smu, SMU_CAP(HST_LIMIT_METRICS))) { 2748 xcp_metrics->gfx_below_host_limit_ppt_acc 2749 [idx] = SMUQ10_ROUND( 2750 metrics_v0->GfxclkBelowHostLimitPptAcc 2751 [inst]); 2752 xcp_metrics->gfx_below_host_limit_thm_acc 2753 [idx] = SMUQ10_ROUND( 2754 metrics_v0->GfxclkBelowHostLimitThmAcc 2755 [inst]); 2756 xcp_metrics->gfx_low_utilization_acc 2757 [idx] = SMUQ10_ROUND( 2758 metrics_v0 2759 ->GfxclkLowUtilizationAcc[inst]); 2760 xcp_metrics->gfx_below_host_limit_total_acc 2761 [idx] = SMUQ10_ROUND( 2762 metrics_v0->GfxclkBelowHostLimitTotalAcc 2763 [inst]); 2764 } 2765 idx++; 2766 } 2767 } 2768 xcp_metrics->accumulation_counter = GET_METRIC_FIELD(AccumulationCounter, version); 2769 xcp_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp, version); 2770 2771 return sizeof(*xcp_metrics); 2772 } 2773 2774 static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table) 2775 { 2776 struct smu_v13_0_6_gpu_metrics *gpu_metrics; 2777 int version = smu_v13_0_6_get_metrics_version(smu); 2778 struct smu_table_context *smu_table = &smu->smu_table; 2779 struct amdgpu_device *adev = smu->adev; 2780 int ret = 0, xcc_id, inst, i, j; 2781 MetricsTableV0_t *metrics_v0; 2782 MetricsTableV1_t *metrics_v1; 2783 MetricsTableV2_t *metrics_v2; 2784 u16 link_width_level; 2785 u8 num_jpeg_rings; 2786 bool per_inst; 2787 2788 ret = smu_v13_0_6_get_metrics_table(smu, NULL, false); 2789 if (ret) 2790 return ret; 2791 2792 metrics_v0 = (MetricsTableV0_t *)smu_table->metrics_table; 2793 gpu_metrics = (struct smu_v13_0_6_gpu_metrics *)smu_driver_table_ptr( 2794 smu, SMU_DRIVER_TABLE_GPU_METRICS); 2795 2796 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) && 2797 smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) { 2798 smu_v13_0_12_get_gpu_metrics(smu, table, metrics_v0, 2799 gpu_metrics); 2800 goto fill; 2801 } 2802 2803 metrics_v1 = (MetricsTableV1_t *)smu_table->metrics_table; 2804 metrics_v2 = (MetricsTableV2_t *)smu_table->metrics_table; 2805 2806 gpu_metrics->temperature_hotspot = 2807 SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature, version)); 2808 /* Individual HBM stack temperature is not reported */ 2809 gpu_metrics->temperature_mem = 2810 SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature, version)); 2811 /* Reports max temperature of all voltage rails */ 2812 gpu_metrics->temperature_vrsoc = 2813 SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature, version)); 2814 2815 gpu_metrics->average_gfx_activity = 2816 SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy, version)); 2817 gpu_metrics->average_umc_activity = 2818 SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization, version)); 2819 2820 gpu_metrics->mem_max_bandwidth = 2821 SMUQ10_ROUND(GET_METRIC_FIELD(MaxDramBandwidth, version)); 2822 2823 gpu_metrics->curr_socket_power = 2824 SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower, version)); 2825 /* Energy counter reported in 15.259uJ (2^-16) units */ 2826 gpu_metrics->energy_accumulator = GET_METRIC_FIELD(SocketEnergyAcc, version); 2827 2828 for (i = 0; i < MAX_GFX_CLKS; i++) { 2829 xcc_id = GET_INST(GC, i); 2830 if (xcc_id >= 0) 2831 gpu_metrics->current_gfxclk[i] = 2832 SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency, version)[xcc_id]); 2833 2834 if (i < MAX_CLKS) { 2835 gpu_metrics->current_socclk[i] = 2836 SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency, version)[i]); 2837 inst = GET_INST(VCN, i); 2838 if (inst >= 0) { 2839 gpu_metrics->current_vclk0[i] = 2840 SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency, 2841 version)[inst]); 2842 gpu_metrics->current_dclk0[i] = 2843 SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency, 2844 version)[inst]); 2845 } 2846 } 2847 } 2848 2849 gpu_metrics->current_uclk = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency, version)); 2850 2851 /* Total accumulated cycle counter */ 2852 gpu_metrics->accumulation_counter = GET_METRIC_FIELD(AccumulationCounter, version); 2853 2854 /* Accumulated throttler residencies */ 2855 gpu_metrics->prochot_residency_acc = GET_METRIC_FIELD(ProchotResidencyAcc, version); 2856 gpu_metrics->ppt_residency_acc = GET_METRIC_FIELD(PptResidencyAcc, version); 2857 gpu_metrics->socket_thm_residency_acc = GET_METRIC_FIELD(SocketThmResidencyAcc, version); 2858 gpu_metrics->vr_thm_residency_acc = GET_METRIC_FIELD(VrThmResidencyAcc, version); 2859 gpu_metrics->hbm_thm_residency_acc = 2860 GET_METRIC_FIELD(HbmThmResidencyAcc, version); 2861 2862 /* Clock Lock Status. Each bit corresponds to each GFXCLK instance */ 2863 gpu_metrics->gfxclk_lock_status = GET_METRIC_FIELD(GfxLockXCDMak, 2864 version) >> GET_INST(GC, 0); 2865 2866 if (!(adev->flags & AMD_IS_APU)) { 2867 /*Check smu version, PCIE link speed and width will be reported from pmfw metric 2868 * table for both pf & one vf for smu version 85.99.0 or higher else report only 2869 * for pf from registers 2870 */ 2871 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(PCIE_METRICS))) { 2872 gpu_metrics->pcie_link_width = GET_GPU_METRIC_FIELD(PCIeLinkWidth, version); 2873 gpu_metrics->pcie_link_speed = 2874 pcie_gen_to_speed(GET_GPU_METRIC_FIELD(PCIeLinkSpeed, version)); 2875 } else if (!amdgpu_sriov_vf(adev)) { 2876 link_width_level = smu_v13_0_6_get_current_pcie_link_width_level(smu); 2877 if (link_width_level > MAX_LINK_WIDTH) 2878 link_width_level = 0; 2879 2880 gpu_metrics->pcie_link_width = 2881 DECODE_LANE_WIDTH(link_width_level); 2882 gpu_metrics->pcie_link_speed = 2883 smu_v13_0_6_get_current_pcie_link_speed(smu); 2884 } 2885 2886 gpu_metrics->pcie_bandwidth_acc = 2887 SMUQ10_ROUND(GET_GPU_METRIC_FIELD(PcieBandwidthAcc, version)[0]); 2888 gpu_metrics->pcie_bandwidth_inst = 2889 SMUQ10_ROUND(GET_GPU_METRIC_FIELD(PcieBandwidth, version)[0]); 2890 gpu_metrics->pcie_l0_to_recov_count_acc = 2891 GET_GPU_METRIC_FIELD(PCIeL0ToRecoveryCountAcc, version); 2892 gpu_metrics->pcie_replay_count_acc = 2893 GET_GPU_METRIC_FIELD(PCIenReplayAAcc, version); 2894 gpu_metrics->pcie_replay_rover_count_acc = 2895 GET_GPU_METRIC_FIELD(PCIenReplayARolloverCountAcc, version); 2896 gpu_metrics->pcie_nak_sent_count_acc = 2897 GET_GPU_METRIC_FIELD(PCIeNAKSentCountAcc, version); 2898 gpu_metrics->pcie_nak_rcvd_count_acc = 2899 GET_GPU_METRIC_FIELD(PCIeNAKReceivedCountAcc, version); 2900 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(OTHER_END_METRICS))) 2901 gpu_metrics->pcie_lc_perf_other_end_recovery = 2902 GET_GPU_METRIC_FIELD(PCIeOtherEndRecoveryAcc, version); 2903 2904 } 2905 2906 gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); 2907 2908 gpu_metrics->gfx_activity_acc = 2909 SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusyAcc, version)); 2910 gpu_metrics->mem_activity_acc = 2911 SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilizationAcc, version)); 2912 2913 for (i = 0; i < NUM_XGMI_LINKS; i++) { 2914 j = amdgpu_xgmi_get_ext_link(adev, i); 2915 if (j < 0 || j >= NUM_XGMI_LINKS) 2916 continue; 2917 gpu_metrics->xgmi_read_data_acc[j] = SMUQ10_ROUND( 2918 GET_METRIC_FIELD(XgmiReadDataSizeAcc, version)[i]); 2919 gpu_metrics->xgmi_write_data_acc[j] = SMUQ10_ROUND( 2920 GET_METRIC_FIELD(XgmiWriteDataSizeAcc, version)[i]); 2921 ret = amdgpu_get_xgmi_link_status(adev, i); 2922 if (ret >= 0) 2923 gpu_metrics->xgmi_link_status[j] = ret; 2924 } 2925 2926 per_inst = smu_v13_0_6_cap_supported(smu, SMU_CAP(PER_INST_METRICS)); 2927 2928 num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS_4_0_3; 2929 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 2930 inst = GET_INST(JPEG, i); 2931 for (j = 0; j < num_jpeg_rings; ++j) 2932 gpu_metrics->jpeg_busy[(i * num_jpeg_rings) + j] = 2933 SMUQ10_ROUND(GET_METRIC_FIELD( 2934 JpegBusy, 2935 version)[(inst * num_jpeg_rings) + j]); 2936 } 2937 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 2938 inst = GET_INST(VCN, i); 2939 gpu_metrics->vcn_busy[i] = 2940 SMUQ10_ROUND(GET_METRIC_FIELD(VcnBusy, version)[inst]); 2941 } 2942 2943 if (per_inst) { 2944 for (i = 0; i < NUM_XCC(adev->gfx.xcc_mask); ++i) { 2945 inst = GET_INST(GC, i); 2946 gpu_metrics->gfx_busy_inst[i] = SMUQ10_ROUND( 2947 GET_GPU_METRIC_FIELD(GfxBusy, version)[inst]); 2948 gpu_metrics->gfx_busy_acc[i] = SMUQ10_ROUND( 2949 GET_GPU_METRIC_FIELD(GfxBusyAcc, 2950 version)[inst]); 2951 if (smu_v13_0_6_cap_supported( 2952 smu, SMU_CAP(HST_LIMIT_METRICS))) { 2953 gpu_metrics->gfx_below_host_limit_ppt_acc 2954 [i] = SMUQ10_ROUND( 2955 metrics_v0->GfxclkBelowHostLimitPptAcc 2956 [inst]); 2957 gpu_metrics->gfx_below_host_limit_thm_acc 2958 [i] = SMUQ10_ROUND( 2959 metrics_v0->GfxclkBelowHostLimitThmAcc 2960 [inst]); 2961 gpu_metrics->gfx_low_utilization_acc 2962 [i] = SMUQ10_ROUND( 2963 metrics_v0 2964 ->GfxclkLowUtilizationAcc[inst]); 2965 gpu_metrics->gfx_below_host_limit_total_acc 2966 [i] = SMUQ10_ROUND( 2967 metrics_v0->GfxclkBelowHostLimitTotalAcc 2968 [inst]); 2969 } 2970 } 2971 } 2972 2973 gpu_metrics->xgmi_link_width = GET_METRIC_FIELD(XgmiWidth, version); 2974 gpu_metrics->xgmi_link_speed = GET_METRIC_FIELD(XgmiBitrate, version); 2975 2976 gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp, version); 2977 2978 fill: 2979 *table = gpu_metrics; 2980 2981 smu_driver_table_update_cache_time(smu, SMU_DRIVER_TABLE_GPU_METRICS); 2982 2983 return sizeof(*gpu_metrics); 2984 } 2985 2986 static void smu_v13_0_6_restore_pci_config(struct smu_context *smu) 2987 { 2988 struct amdgpu_device *adev = smu->adev; 2989 int i; 2990 2991 for (i = 0; i < 16; i++) 2992 pci_write_config_dword(adev->pdev, i * 4, 2993 adev->pdev->saved_config_space[i]); 2994 pci_restore_msi_state(adev->pdev); 2995 } 2996 2997 static int smu_v13_0_6_mode2_reset(struct smu_context *smu) 2998 { 2999 struct smu_msg_ctl *ctl = &smu->msg_ctl; 3000 struct amdgpu_device *adev = smu->adev; 3001 int ret = 0; 3002 int timeout = 10; 3003 3004 mutex_lock(&ctl->lock); 3005 3006 ret = smu_msg_send_async_locked(ctl, SMU_MSG_GfxDeviceDriverReset, 3007 SMU_RESET_MODE_2); 3008 if (ret) 3009 goto out; 3010 3011 /* Reset takes a bit longer, wait for 200ms. */ 3012 msleep(200); 3013 3014 dev_dbg(adev->dev, "restore config space...\n"); 3015 /* Restore the config space saved during init */ 3016 amdgpu_device_load_pci_state(adev->pdev); 3017 3018 /* Certain platforms have switches which assign virtual BAR values to 3019 * devices. OS uses the virtual BAR values and device behind the switch 3020 * is assgined another BAR value. When device's config space registers 3021 * are queried, switch returns the virtual BAR values. When mode-2 reset 3022 * is performed, switch is unaware of it, and will continue to return 3023 * the same virtual values to the OS.This affects 3024 * pci_restore_config_space() API as it doesn't write the value saved if 3025 * the current value read from config space is the same as what is 3026 * saved. As a workaround, make sure the config space is restored 3027 * always. 3028 */ 3029 if (!(adev->flags & AMD_IS_APU)) 3030 smu_v13_0_6_restore_pci_config(smu); 3031 3032 dev_dbg(adev->dev, "wait for reset ack\n"); 3033 do { 3034 ret = smu_msg_wait_response(ctl, 0); 3035 /* Wait a bit more time for getting ACK */ 3036 if (ret == -ETIME) { 3037 --timeout; 3038 usleep_range(500, 1000); 3039 continue; 3040 } 3041 3042 if (ret) 3043 goto out; 3044 3045 } while (ret == -ETIME && timeout); 3046 3047 out: 3048 mutex_unlock(&ctl->lock); 3049 3050 if (ret) 3051 dev_err(adev->dev, "failed to send mode2 reset, error code %d", 3052 ret); 3053 3054 return ret; 3055 } 3056 3057 static int smu_v13_0_6_get_thermal_temperature_range(struct smu_context *smu, 3058 struct smu_temperature_range *range) 3059 { 3060 struct amdgpu_device *adev = smu->adev; 3061 u32 aid_temp, xcd_temp, max_temp; 3062 u32 ccd_temp = 0; 3063 int ret; 3064 3065 if (amdgpu_sriov_vf(smu->adev)) 3066 return 0; 3067 3068 if (!range) 3069 return -EINVAL; 3070 3071 /*Check smu version, GetCtfLimit message only supported for smu version 85.69 or higher */ 3072 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(CTF_LIMIT))) 3073 return 0; 3074 3075 /* Get SOC Max operating temperature */ 3076 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit, 3077 PPSMC_AID_THM_TYPE, &aid_temp); 3078 if (ret) 3079 goto failed; 3080 if (adev->flags & AMD_IS_APU) { 3081 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit, 3082 PPSMC_CCD_THM_TYPE, &ccd_temp); 3083 if (ret) 3084 goto failed; 3085 } 3086 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit, 3087 PPSMC_XCD_THM_TYPE, &xcd_temp); 3088 if (ret) 3089 goto failed; 3090 range->hotspot_emergency_max = max3(aid_temp, xcd_temp, ccd_temp) * 3091 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 3092 3093 /* Get HBM Max operating temperature */ 3094 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit, 3095 PPSMC_HBM_THM_TYPE, &max_temp); 3096 if (ret) 3097 goto failed; 3098 range->mem_emergency_max = 3099 max_temp * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 3100 3101 /* Get SOC thermal throttle limit */ 3102 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetThermalLimit, 3103 PPSMC_THROTTLING_LIMIT_TYPE_SOCKET, 3104 &max_temp); 3105 if (ret) 3106 goto failed; 3107 range->hotspot_crit_max = 3108 max_temp * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 3109 3110 /* Get HBM thermal throttle limit */ 3111 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetThermalLimit, 3112 PPSMC_THROTTLING_LIMIT_TYPE_HBM, 3113 &max_temp); 3114 if (ret) 3115 goto failed; 3116 3117 range->mem_crit_max = max_temp * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 3118 3119 failed: 3120 return ret; 3121 } 3122 3123 static int smu_v13_0_6_mode1_reset(struct smu_context *smu) 3124 { 3125 struct amdgpu_device *adev = smu->adev; 3126 u32 fatal_err, param; 3127 int ret = 0; 3128 3129 fatal_err = 0; 3130 param = SMU_RESET_MODE_1; 3131 3132 /* fatal error triggered by ras, PMFW supports the flag */ 3133 if (amdgpu_ras_get_fed_status(adev)) 3134 fatal_err = 1; 3135 3136 param |= (fatal_err << 16); 3137 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, 3138 param, NULL); 3139 3140 if (!ret) 3141 msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS); 3142 3143 return ret; 3144 } 3145 3146 static int smu_v13_0_6_link_reset(struct smu_context *smu) 3147 { 3148 int ret = 0; 3149 3150 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, 3151 SMU_RESET_MODE_4, NULL); 3152 return ret; 3153 } 3154 3155 static bool smu_v13_0_6_is_mode1_reset_supported(struct smu_context *smu) 3156 { 3157 return true; 3158 } 3159 3160 static inline bool smu_v13_0_6_is_link_reset_supported(struct smu_context *smu) 3161 { 3162 struct amdgpu_device *adev = smu->adev; 3163 int var = (adev->pdev->device & 0xF); 3164 3165 if (var == 0x0 || var == 0x1 || var == 0x3) 3166 return true; 3167 3168 return false; 3169 } 3170 3171 static int smu_v13_0_6_smu_send_hbm_bad_page_num(struct smu_context *smu, 3172 uint32_t size) 3173 { 3174 int ret = 0; 3175 3176 /* message SMU to update the bad page number on SMUBUS */ 3177 ret = smu_cmn_send_smc_msg_with_param( 3178 smu, SMU_MSG_SetNumBadHbmPagesRetired, size, NULL); 3179 if (ret) 3180 dev_err(smu->adev->dev, 3181 "[%s] failed to message SMU to update HBM bad pages number\n", 3182 __func__); 3183 3184 return ret; 3185 } 3186 3187 static int smu_v13_0_6_send_rma_reason(struct smu_context *smu) 3188 { 3189 int ret; 3190 3191 /* NOTE: the message is only valid on dGPU with pmfw 85.90.0 and above */ 3192 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(RMA_MSG))) 3193 return 0; 3194 3195 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_RmaDueToBadPageThreshold, NULL); 3196 if (ret) 3197 dev_err(smu->adev->dev, 3198 "[%s] failed to send BadPageThreshold event to SMU\n", 3199 __func__); 3200 3201 return ret; 3202 } 3203 3204 /** 3205 * smu_v13_0_6_reset_sdma_is_supported - Check if SDMA reset is supported 3206 * @smu: smu_context pointer 3207 * 3208 * This function checks if the SMU supports resetting the SDMA engine. 3209 * It returns false if the capability is not supported. 3210 */ 3211 static bool smu_v13_0_6_reset_sdma_is_supported(struct smu_context *smu) 3212 { 3213 bool ret = true; 3214 3215 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SDMA_RESET))) { 3216 dev_info(smu->adev->dev, 3217 "SDMA reset capability is not supported\n"); 3218 ret = false; 3219 } 3220 3221 return ret; 3222 } 3223 3224 static int smu_v13_0_6_reset_sdma(struct smu_context *smu, uint32_t inst_mask) 3225 { 3226 int ret = 0; 3227 3228 if (!smu_v13_0_6_reset_sdma_is_supported(smu)) 3229 return -EOPNOTSUPP; 3230 3231 ret = smu_cmn_send_smc_msg_with_param(smu, 3232 SMU_MSG_ResetSDMA, inst_mask, NULL); 3233 if (ret) 3234 dev_err(smu->adev->dev, 3235 "failed to send ResetSDMA event with mask 0x%x\n", 3236 inst_mask); 3237 3238 return ret; 3239 } 3240 3241 static bool smu_v13_0_6_reset_vcn_is_supported(struct smu_context *smu) 3242 { 3243 return smu_v13_0_6_cap_supported(smu, SMU_CAP(VCN_RESET)); 3244 } 3245 3246 static int smu_v13_0_6_reset_vcn(struct smu_context *smu, uint32_t inst_mask) 3247 { 3248 int ret = 0; 3249 3250 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ResetVCN, inst_mask, NULL); 3251 if (ret) 3252 dev_err(smu->adev->dev, 3253 "failed to send ResetVCN event with mask 0x%x\n", 3254 inst_mask); 3255 return ret; 3256 } 3257 3258 static int smu_v13_0_6_ras_send_msg(struct smu_context *smu, enum smu_message_type msg, uint32_t param, uint32_t *read_arg) 3259 { 3260 struct amdgpu_device *adev = smu->adev; 3261 int ret; 3262 3263 if (amdgpu_sriov_vf(adev)) 3264 return -EOPNOTSUPP; 3265 3266 switch (msg) { 3267 case SMU_MSG_QueryValidMcaCount: 3268 case SMU_MSG_QueryValidMcaCeCount: 3269 case SMU_MSG_McaBankDumpDW: 3270 case SMU_MSG_McaBankCeDumpDW: 3271 case SMU_MSG_ClearMcaOnRead: 3272 case SMU_MSG_GetRASTableVersion: 3273 case SMU_MSG_GetBadPageCount: 3274 case SMU_MSG_GetBadPageMcaAddr: 3275 case SMU_MSG_SetTimestamp: 3276 case SMU_MSG_GetTimestamp: 3277 case SMU_MSG_GetBadPageIpid: 3278 case SMU_MSG_EraseRasTable: 3279 ret = smu_cmn_send_smc_msg_with_param(smu, msg, param, read_arg); 3280 break; 3281 default: 3282 ret = -EPERM; 3283 } 3284 3285 return ret; 3286 } 3287 3288 static int smu_v13_0_6_post_init(struct smu_context *smu) 3289 { 3290 if (smu_v13_0_6_is_link_reset_supported(smu)) 3291 smu_feature_cap_set(smu, SMU_FEATURE_CAP_ID__LINK_RESET); 3292 3293 if (smu_v13_0_6_reset_sdma_is_supported(smu)) 3294 smu_feature_cap_set(smu, SMU_FEATURE_CAP_ID__SDMA_RESET); 3295 3296 if (smu_v13_0_6_reset_vcn_is_supported(smu)) 3297 smu_feature_cap_set(smu, SMU_FEATURE_CAP_ID__VCN_RESET); 3298 3299 return 0; 3300 } 3301 3302 static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) 3303 { 3304 struct smu_context *smu = adev->powerplay.pp_handle; 3305 3306 return smu_v13_0_6_mca_set_debug_mode(smu, enable); 3307 } 3308 3309 static int smu_v13_0_6_get_valid_mca_count(struct smu_context *smu, enum amdgpu_mca_error_type type, uint32_t *count) 3310 { 3311 uint32_t msg; 3312 int ret; 3313 3314 if (!count) 3315 return -EINVAL; 3316 3317 switch (type) { 3318 case AMDGPU_MCA_ERROR_TYPE_UE: 3319 msg = SMU_MSG_QueryValidMcaCount; 3320 break; 3321 case AMDGPU_MCA_ERROR_TYPE_CE: 3322 msg = SMU_MSG_QueryValidMcaCeCount; 3323 break; 3324 default: 3325 return -EINVAL; 3326 } 3327 3328 ret = smu_cmn_send_smc_msg(smu, msg, count); 3329 if (ret) { 3330 *count = 0; 3331 return ret; 3332 } 3333 3334 return 0; 3335 } 3336 3337 static int __smu_v13_0_6_mca_dump_bank(struct smu_context *smu, enum amdgpu_mca_error_type type, 3338 int idx, int offset, uint32_t *val) 3339 { 3340 uint32_t msg, param; 3341 3342 switch (type) { 3343 case AMDGPU_MCA_ERROR_TYPE_UE: 3344 msg = SMU_MSG_McaBankDumpDW; 3345 break; 3346 case AMDGPU_MCA_ERROR_TYPE_CE: 3347 msg = SMU_MSG_McaBankCeDumpDW; 3348 break; 3349 default: 3350 return -EINVAL; 3351 } 3352 3353 param = ((idx & 0xffff) << 16) | (offset & 0xfffc); 3354 3355 return smu_cmn_send_smc_msg_with_param(smu, msg, param, val); 3356 } 3357 3358 static int smu_v13_0_6_mca_dump_bank(struct smu_context *smu, enum amdgpu_mca_error_type type, 3359 int idx, int offset, uint32_t *val, int count) 3360 { 3361 int ret, i; 3362 3363 if (!val) 3364 return -EINVAL; 3365 3366 for (i = 0; i < count; i++) { 3367 ret = __smu_v13_0_6_mca_dump_bank(smu, type, idx, offset + (i << 2), &val[i]); 3368 if (ret) 3369 return ret; 3370 } 3371 3372 return 0; 3373 } 3374 3375 static const struct mca_bank_ipid smu_v13_0_6_mca_ipid_table[AMDGPU_MCA_IP_COUNT] = { 3376 MCA_BANK_IPID(UMC, 0x96, 0x0), 3377 MCA_BANK_IPID(SMU, 0x01, 0x1), 3378 MCA_BANK_IPID(MP5, 0x01, 0x2), 3379 MCA_BANK_IPID(PCS_XGMI, 0x50, 0x0), 3380 }; 3381 3382 static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_bank_info *info) 3383 { 3384 u64 ipid = entry->regs[MCA_REG_IDX_IPID]; 3385 u32 instidhi, instid; 3386 3387 /* NOTE: All MCA IPID register share the same format, 3388 * so the driver can share the MCMP1 register header file. 3389 * */ 3390 3391 info->hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); 3392 info->mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); 3393 3394 /* 3395 * Unfied DieID Format: SAASS. A:AID, S:Socket. 3396 * Unfied DieID[4] = InstanceId[0] 3397 * Unfied DieID[0:3] = InstanceIdHi[0:3] 3398 */ 3399 instidhi = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi); 3400 instid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo); 3401 info->aid = ((instidhi >> 2) & 0x03); 3402 info->socket_id = ((instid & 0x1) << 2) | (instidhi & 0x03); 3403 } 3404 3405 static int mca_bank_read_reg(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, 3406 int idx, int reg_idx, uint64_t *val) 3407 { 3408 struct smu_context *smu = adev->powerplay.pp_handle; 3409 uint32_t data[2] = {0, 0}; 3410 int ret; 3411 3412 if (!val || reg_idx >= MCA_REG_IDX_COUNT) 3413 return -EINVAL; 3414 3415 ret = smu_v13_0_6_mca_dump_bank(smu, type, idx, reg_idx * 8, data, ARRAY_SIZE(data)); 3416 if (ret) 3417 return ret; 3418 3419 *val = (uint64_t)data[1] << 32 | data[0]; 3420 3421 dev_dbg(adev->dev, "mca read bank reg: type:%s, index: %d, reg_idx: %d, val: 0x%016llx\n", 3422 type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE", idx, reg_idx, *val); 3423 3424 return 0; 3425 } 3426 3427 static int mca_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, 3428 int idx, struct mca_bank_entry *entry) 3429 { 3430 int i, ret; 3431 3432 /* NOTE: populated all mca register by default */ 3433 for (i = 0; i < ARRAY_SIZE(entry->regs); i++) { 3434 ret = mca_bank_read_reg(adev, type, idx, i, &entry->regs[i]); 3435 if (ret) 3436 return ret; 3437 } 3438 3439 entry->idx = idx; 3440 entry->type = type; 3441 3442 mca_bank_entry_info_decode(entry, &entry->info); 3443 3444 return 0; 3445 } 3446 3447 static int mca_decode_ipid_to_hwip(uint64_t val) 3448 { 3449 const struct mca_bank_ipid *ipid; 3450 uint16_t hwid, mcatype; 3451 int i; 3452 3453 hwid = REG_GET_FIELD(val, MCMP1_IPIDT0, HardwareID); 3454 mcatype = REG_GET_FIELD(val, MCMP1_IPIDT0, McaType); 3455 3456 for (i = 0; i < ARRAY_SIZE(smu_v13_0_6_mca_ipid_table); i++) { 3457 ipid = &smu_v13_0_6_mca_ipid_table[i]; 3458 3459 if (!ipid->hwid) 3460 continue; 3461 3462 if (ipid->hwid == hwid && ipid->mcatype == mcatype) 3463 return i; 3464 } 3465 3466 return AMDGPU_MCA_IP_UNKNOW; 3467 } 3468 3469 static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 3470 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) 3471 { 3472 uint64_t status0; 3473 uint32_t ext_error_code; 3474 uint32_t odecc_err_cnt; 3475 3476 status0 = entry->regs[MCA_REG_IDX_STATUS]; 3477 ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(status0); 3478 odecc_err_cnt = MCA_REG__MISC0__ERRCNT(entry->regs[MCA_REG_IDX_MISC0]); 3479 3480 if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) { 3481 *count = 0; 3482 return 0; 3483 } 3484 3485 if (umc_v12_0_is_deferred_error(adev, status0) || 3486 umc_v12_0_is_uncorrectable_error(adev, status0) || 3487 umc_v12_0_is_correctable_error(adev, status0)) 3488 *count = (ext_error_code == 0) ? odecc_err_cnt : 1; 3489 3490 amdgpu_umc_update_ecc_status(adev, 3491 entry->regs[MCA_REG_IDX_STATUS], 3492 entry->regs[MCA_REG_IDX_IPID], 3493 entry->regs[MCA_REG_IDX_ADDR]); 3494 3495 return 0; 3496 } 3497 3498 static int mca_pcs_xgmi_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 3499 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, 3500 uint32_t *count) 3501 { 3502 u32 ext_error_code; 3503 u32 err_cnt; 3504 3505 ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(entry->regs[MCA_REG_IDX_STATUS]); 3506 err_cnt = MCA_REG__MISC0__ERRCNT(entry->regs[MCA_REG_IDX_MISC0]); 3507 3508 if (type == AMDGPU_MCA_ERROR_TYPE_UE && 3509 (ext_error_code == 0 || ext_error_code == 9)) 3510 *count = err_cnt; 3511 else if (type == AMDGPU_MCA_ERROR_TYPE_CE && ext_error_code == 6) 3512 *count = err_cnt; 3513 3514 return 0; 3515 } 3516 3517 static bool mca_smu_check_error_code(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras, 3518 uint32_t errcode) 3519 { 3520 int i; 3521 3522 if (!mca_ras->err_code_count || !mca_ras->err_code_array) 3523 return true; 3524 3525 for (i = 0; i < mca_ras->err_code_count; i++) { 3526 if (errcode == mca_ras->err_code_array[i]) 3527 return true; 3528 } 3529 3530 return false; 3531 } 3532 3533 static int mca_gfx_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 3534 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) 3535 { 3536 uint64_t status0, misc0; 3537 3538 status0 = entry->regs[MCA_REG_IDX_STATUS]; 3539 if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) { 3540 *count = 0; 3541 return 0; 3542 } 3543 3544 if (type == AMDGPU_MCA_ERROR_TYPE_UE && 3545 REG_GET_FIELD(status0, MCMP1_STATUST0, UC) == 1 && 3546 REG_GET_FIELD(status0, MCMP1_STATUST0, PCC) == 1) { 3547 *count = 1; 3548 return 0; 3549 } else { 3550 misc0 = entry->regs[MCA_REG_IDX_MISC0]; 3551 *count = REG_GET_FIELD(misc0, MCMP1_MISC0T0, ErrCnt); 3552 } 3553 3554 return 0; 3555 } 3556 3557 static int mca_smu_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 3558 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) 3559 { 3560 uint64_t status0, misc0; 3561 3562 status0 = entry->regs[MCA_REG_IDX_STATUS]; 3563 if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) { 3564 *count = 0; 3565 return 0; 3566 } 3567 3568 if (type == AMDGPU_MCA_ERROR_TYPE_UE && 3569 REG_GET_FIELD(status0, MCMP1_STATUST0, UC) == 1 && 3570 REG_GET_FIELD(status0, MCMP1_STATUST0, PCC) == 1) { 3571 if (count) 3572 *count = 1; 3573 return 0; 3574 } 3575 3576 misc0 = entry->regs[MCA_REG_IDX_MISC0]; 3577 *count = REG_GET_FIELD(misc0, MCMP1_MISC0T0, ErrCnt); 3578 3579 return 0; 3580 } 3581 3582 static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 3583 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry) 3584 { 3585 uint32_t instlo; 3586 3587 instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo); 3588 instlo &= GENMASK(31, 1); 3589 switch (instlo) { 3590 case 0x36430400: /* SMNAID XCD 0 */ 3591 case 0x38430400: /* SMNAID XCD 1 */ 3592 case 0x40430400: /* SMNXCD XCD 0, NOTE: FIXME: fix this error later */ 3593 return true; 3594 default: 3595 return false; 3596 } 3597 3598 return false; 3599 }; 3600 3601 static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 3602 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry) 3603 { 3604 struct smu_context *smu = adev->powerplay.pp_handle; 3605 uint32_t errcode, instlo; 3606 3607 instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo); 3608 instlo &= GENMASK(31, 1); 3609 if (instlo != 0x03b30400) 3610 return false; 3611 3612 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(ACA_SYND))) { 3613 errcode = MCA_REG__SYND__ERRORINFORMATION(entry->regs[MCA_REG_IDX_SYND]); 3614 errcode &= 0xff; 3615 } else { 3616 errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode); 3617 } 3618 3619 return mca_smu_check_error_code(adev, mca_ras, errcode); 3620 } 3621 3622 static int sdma_err_codes[] = { CODE_SDMA0, CODE_SDMA1, CODE_SDMA2, CODE_SDMA3 }; 3623 static int mmhub_err_codes[] = { 3624 CODE_DAGB0, CODE_DAGB0 + 1, CODE_DAGB0 + 2, CODE_DAGB0 + 3, CODE_DAGB0 + 4, /* DAGB0-4 */ 3625 CODE_EA0, CODE_EA0 + 1, CODE_EA0 + 2, CODE_EA0 + 3, CODE_EA0 + 4, /* MMEA0-4*/ 3626 CODE_VML2, CODE_VML2_WALKER, CODE_MMCANE, 3627 }; 3628 3629 static int vcn_err_codes[] = { 3630 CODE_VIDD, CODE_VIDV, 3631 }; 3632 static int jpeg_err_codes[] = { 3633 CODE_JPEG0S, CODE_JPEG0D, CODE_JPEG1S, CODE_JPEG1D, 3634 CODE_JPEG2S, CODE_JPEG2D, CODE_JPEG3S, CODE_JPEG3D, 3635 CODE_JPEG4S, CODE_JPEG4D, CODE_JPEG5S, CODE_JPEG5D, 3636 CODE_JPEG6S, CODE_JPEG6D, CODE_JPEG7S, CODE_JPEG7D, 3637 }; 3638 3639 static const struct mca_ras_info mca_ras_table[] = { 3640 { 3641 .blkid = AMDGPU_RAS_BLOCK__UMC, 3642 .ip = AMDGPU_MCA_IP_UMC, 3643 .get_err_count = mca_umc_mca_get_err_count, 3644 }, { 3645 .blkid = AMDGPU_RAS_BLOCK__GFX, 3646 .ip = AMDGPU_MCA_IP_SMU, 3647 .get_err_count = mca_gfx_mca_get_err_count, 3648 .bank_is_valid = mca_gfx_smu_bank_is_valid, 3649 }, { 3650 .blkid = AMDGPU_RAS_BLOCK__SDMA, 3651 .ip = AMDGPU_MCA_IP_SMU, 3652 .err_code_array = sdma_err_codes, 3653 .err_code_count = ARRAY_SIZE(sdma_err_codes), 3654 .get_err_count = mca_smu_mca_get_err_count, 3655 .bank_is_valid = mca_smu_bank_is_valid, 3656 }, { 3657 .blkid = AMDGPU_RAS_BLOCK__MMHUB, 3658 .ip = AMDGPU_MCA_IP_SMU, 3659 .err_code_array = mmhub_err_codes, 3660 .err_code_count = ARRAY_SIZE(mmhub_err_codes), 3661 .get_err_count = mca_smu_mca_get_err_count, 3662 .bank_is_valid = mca_smu_bank_is_valid, 3663 }, { 3664 .blkid = AMDGPU_RAS_BLOCK__XGMI_WAFL, 3665 .ip = AMDGPU_MCA_IP_PCS_XGMI, 3666 .get_err_count = mca_pcs_xgmi_mca_get_err_count, 3667 }, { 3668 .blkid = AMDGPU_RAS_BLOCK__VCN, 3669 .ip = AMDGPU_MCA_IP_SMU, 3670 .err_code_array = vcn_err_codes, 3671 .err_code_count = ARRAY_SIZE(vcn_err_codes), 3672 .get_err_count = mca_smu_mca_get_err_count, 3673 .bank_is_valid = mca_smu_bank_is_valid, 3674 }, { 3675 .blkid = AMDGPU_RAS_BLOCK__JPEG, 3676 .ip = AMDGPU_MCA_IP_SMU, 3677 .err_code_array = jpeg_err_codes, 3678 .err_code_count = ARRAY_SIZE(jpeg_err_codes), 3679 .get_err_count = mca_smu_mca_get_err_count, 3680 .bank_is_valid = mca_smu_bank_is_valid, 3681 }, 3682 }; 3683 3684 static const struct mca_ras_info *mca_get_mca_ras_info(struct amdgpu_device *adev, enum amdgpu_ras_block blkid) 3685 { 3686 int i; 3687 3688 for (i = 0; i < ARRAY_SIZE(mca_ras_table); i++) { 3689 if (mca_ras_table[i].blkid == blkid) 3690 return &mca_ras_table[i]; 3691 } 3692 3693 return NULL; 3694 } 3695 3696 static int mca_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count) 3697 { 3698 struct smu_context *smu = adev->powerplay.pp_handle; 3699 int ret; 3700 3701 switch (type) { 3702 case AMDGPU_MCA_ERROR_TYPE_UE: 3703 case AMDGPU_MCA_ERROR_TYPE_CE: 3704 ret = smu_v13_0_6_get_valid_mca_count(smu, type, count); 3705 break; 3706 default: 3707 ret = -EINVAL; 3708 break; 3709 } 3710 3711 return ret; 3712 } 3713 3714 static bool mca_bank_is_valid(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras, 3715 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry) 3716 { 3717 if (mca_decode_ipid_to_hwip(entry->regs[MCA_REG_IDX_IPID]) != mca_ras->ip) 3718 return false; 3719 3720 if (mca_ras->bank_is_valid) 3721 return mca_ras->bank_is_valid(mca_ras, adev, type, entry); 3722 3723 return true; 3724 } 3725 3726 static int mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, 3727 struct mca_bank_entry *entry, uint32_t *count) 3728 { 3729 const struct mca_ras_info *mca_ras; 3730 3731 if (!entry || !count) 3732 return -EINVAL; 3733 3734 mca_ras = mca_get_mca_ras_info(adev, blk); 3735 if (!mca_ras) 3736 return -EOPNOTSUPP; 3737 3738 if (!mca_bank_is_valid(adev, mca_ras, type, entry)) { 3739 *count = 0; 3740 return 0; 3741 } 3742 3743 return mca_ras->get_err_count(mca_ras, adev, type, entry, count); 3744 } 3745 3746 static int mca_smu_get_mca_entry(struct amdgpu_device *adev, 3747 enum amdgpu_mca_error_type type, int idx, struct mca_bank_entry *entry) 3748 { 3749 return mca_get_mca_entry(adev, type, idx, entry); 3750 } 3751 3752 static int mca_smu_get_valid_mca_count(struct amdgpu_device *adev, 3753 enum amdgpu_mca_error_type type, uint32_t *count) 3754 { 3755 return mca_get_valid_mca_count(adev, type, count); 3756 } 3757 3758 static const struct amdgpu_mca_smu_funcs smu_v13_0_6_mca_smu_funcs = { 3759 .max_ue_count = 12, 3760 .max_ce_count = 12, 3761 .mca_set_debug_mode = mca_smu_set_debug_mode, 3762 .mca_parse_mca_error_count = mca_smu_parse_mca_error_count, 3763 .mca_get_mca_entry = mca_smu_get_mca_entry, 3764 .mca_get_valid_mca_count = mca_smu_get_valid_mca_count, 3765 }; 3766 3767 static int aca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) 3768 { 3769 struct smu_context *smu = adev->powerplay.pp_handle; 3770 3771 return smu_v13_0_6_mca_set_debug_mode(smu, enable); 3772 } 3773 3774 static int smu_v13_0_6_get_valid_aca_count(struct smu_context *smu, enum aca_smu_type type, u32 *count) 3775 { 3776 uint32_t msg; 3777 int ret; 3778 3779 if (!count) 3780 return -EINVAL; 3781 3782 switch (type) { 3783 case ACA_SMU_TYPE_UE: 3784 msg = SMU_MSG_QueryValidMcaCount; 3785 break; 3786 case ACA_SMU_TYPE_CE: 3787 msg = SMU_MSG_QueryValidMcaCeCount; 3788 break; 3789 default: 3790 return -EINVAL; 3791 } 3792 3793 ret = smu_cmn_send_smc_msg(smu, msg, count); 3794 if (ret) { 3795 *count = 0; 3796 return ret; 3797 } 3798 3799 return 0; 3800 } 3801 3802 static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, 3803 enum aca_smu_type type, u32 *count) 3804 { 3805 struct smu_context *smu = adev->powerplay.pp_handle; 3806 int ret; 3807 3808 switch (type) { 3809 case ACA_SMU_TYPE_UE: 3810 case ACA_SMU_TYPE_CE: 3811 ret = smu_v13_0_6_get_valid_aca_count(smu, type, count); 3812 break; 3813 default: 3814 ret = -EINVAL; 3815 break; 3816 } 3817 3818 return ret; 3819 } 3820 3821 static int __smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_smu_type type, 3822 int idx, int offset, u32 *val) 3823 { 3824 uint32_t msg, param; 3825 3826 switch (type) { 3827 case ACA_SMU_TYPE_UE: 3828 msg = SMU_MSG_McaBankDumpDW; 3829 break; 3830 case ACA_SMU_TYPE_CE: 3831 msg = SMU_MSG_McaBankCeDumpDW; 3832 break; 3833 default: 3834 return -EINVAL; 3835 } 3836 3837 param = ((idx & 0xffff) << 16) | (offset & 0xfffc); 3838 3839 return smu_cmn_send_smc_msg_with_param(smu, msg, param, (uint32_t *)val); 3840 } 3841 3842 static int smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_smu_type type, 3843 int idx, int offset, u32 *val, int count) 3844 { 3845 int ret, i; 3846 3847 if (!val) 3848 return -EINVAL; 3849 3850 for (i = 0; i < count; i++) { 3851 ret = __smu_v13_0_6_aca_bank_dump(smu, type, idx, offset + (i << 2), &val[i]); 3852 if (ret) 3853 return ret; 3854 } 3855 3856 return 0; 3857 } 3858 3859 static int aca_bank_read_reg(struct amdgpu_device *adev, enum aca_smu_type type, 3860 int idx, int reg_idx, u64 *val) 3861 { 3862 struct smu_context *smu = adev->powerplay.pp_handle; 3863 u32 data[2] = {0, 0}; 3864 int ret; 3865 3866 if (!val || reg_idx >= ACA_REG_IDX_COUNT) 3867 return -EINVAL; 3868 3869 ret = smu_v13_0_6_aca_bank_dump(smu, type, idx, reg_idx * 8, data, ARRAY_SIZE(data)); 3870 if (ret) 3871 return ret; 3872 3873 *val = (u64)data[1] << 32 | data[0]; 3874 3875 dev_dbg(adev->dev, "mca read bank reg: type:%s, index: %d, reg_idx: %d, val: 0x%016llx\n", 3876 type == ACA_SMU_TYPE_UE ? "UE" : "CE", idx, reg_idx, *val); 3877 3878 return 0; 3879 } 3880 3881 static int aca_smu_get_valid_aca_bank(struct amdgpu_device *adev, 3882 enum aca_smu_type type, int idx, struct aca_bank *bank) 3883 { 3884 int i, ret, count; 3885 3886 count = min_t(int, 16, ARRAY_SIZE(bank->regs)); 3887 for (i = 0; i < count; i++) { 3888 ret = aca_bank_read_reg(adev, type, idx, i, &bank->regs[i]); 3889 if (ret) 3890 return ret; 3891 } 3892 3893 return 0; 3894 } 3895 3896 static int aca_smu_parse_error_code(struct amdgpu_device *adev, struct aca_bank *bank) 3897 { 3898 struct smu_context *smu = adev->powerplay.pp_handle; 3899 int error_code; 3900 3901 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(ACA_SYND))) 3902 error_code = ACA_REG__SYND__ERRORINFORMATION(bank->regs[ACA_REG_IDX_SYND]); 3903 else 3904 error_code = ACA_REG__STATUS__ERRORCODE(bank->regs[ACA_REG_IDX_STATUS]); 3905 3906 return error_code & 0xff; 3907 } 3908 3909 static const struct aca_smu_funcs smu_v13_0_6_aca_smu_funcs = { 3910 .max_ue_bank_count = 12, 3911 .max_ce_bank_count = 12, 3912 .set_debug_mode = aca_smu_set_debug_mode, 3913 .get_valid_aca_count = aca_smu_get_valid_aca_count, 3914 .get_valid_aca_bank = aca_smu_get_valid_aca_bank, 3915 .parse_error_code = aca_smu_parse_error_code, 3916 }; 3917 3918 static void smu_v13_0_6_set_temp_funcs(struct smu_context *smu) 3919 { 3920 smu->smu_temp.temp_funcs = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) 3921 == IP_VERSION(13, 0, 12)) ? &smu_v13_0_12_temp_funcs : NULL; 3922 } 3923 3924 static int smu_v13_0_6_get_ras_smu_drv(struct smu_context *smu, const struct ras_smu_drv **ras_smu_drv) 3925 { 3926 if (!ras_smu_drv) 3927 return -EINVAL; 3928 3929 if (amdgpu_sriov_vf(smu->adev)) 3930 return -EOPNOTSUPP; 3931 3932 if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_HROM_EN_BIT)) 3933 smu_v13_0_6_cap_set(smu, SMU_CAP(RAS_EEPROM)); 3934 3935 switch (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)) { 3936 case IP_VERSION(13, 0, 12): 3937 *ras_smu_drv = &smu_v13_0_12_ras_smu_drv; 3938 break; 3939 default: 3940 *ras_smu_drv = NULL; 3941 break; 3942 } 3943 3944 return 0; 3945 } 3946 3947 static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { 3948 /* init dpm */ 3949 .init_allowed_features = smu_v13_0_6_init_allowed_features, 3950 /* dpm/clk tables */ 3951 .set_default_dpm_table = smu_v13_0_6_set_default_dpm_table, 3952 .populate_umd_state_clk = smu_v13_0_6_populate_umd_state_clk, 3953 .emit_clk_levels = smu_v13_0_6_emit_clk_levels, 3954 .force_clk_levels = smu_v13_0_6_force_clk_levels, 3955 .read_sensor = smu_v13_0_6_read_sensor, 3956 .set_performance_level = smu_v13_0_6_set_performance_level, 3957 .get_power_limit = smu_v13_0_6_get_power_limit, 3958 .is_dpm_running = smu_v13_0_6_is_dpm_running, 3959 .get_unique_id = smu_v13_0_6_get_unique_id, 3960 .init_microcode = smu_v13_0_6_init_microcode, 3961 .fini_microcode = smu_v13_0_fini_microcode, 3962 .init_smc_tables = smu_v13_0_6_init_smc_tables, 3963 .fini_smc_tables = smu_v13_0_6_fini_smc_tables, 3964 .init_power = smu_v13_0_init_power, 3965 .fini_power = smu_v13_0_fini_power, 3966 .check_fw_status = smu_v13_0_6_check_fw_status, 3967 /* pptable related */ 3968 .check_fw_version = smu_v13_0_6_check_fw_version, 3969 .set_driver_table_location = smu_v13_0_set_driver_table_location, 3970 .set_tool_table_location = smu_v13_0_set_tool_table_location, 3971 .notify_memory_pool_location = smu_v13_0_notify_memory_pool_location, 3972 .system_features_control = smu_v13_0_6_system_features_control, 3973 .get_enabled_mask = smu_v13_0_6_get_enabled_mask, 3974 .feature_is_enabled = smu_cmn_feature_is_enabled, 3975 .set_power_limit = smu_v13_0_6_set_power_limit, 3976 .get_ppt_limit = smu_v13_0_6_get_ppt_limit, 3977 .set_xgmi_pstate = smu_v13_0_set_xgmi_pstate, 3978 .register_irq_handler = smu_v13_0_6_register_irq_handler, 3979 .enable_thermal_alert = smu_v13_0_enable_thermal_alert, 3980 .disable_thermal_alert = smu_v13_0_disable_thermal_alert, 3981 .setup_pptable = smu_v13_0_6_setup_pptable, 3982 .get_bamaco_support = smu_v13_0_6_get_bamaco_support, 3983 .get_dpm_ultimate_freq = smu_v13_0_6_get_dpm_ultimate_freq, 3984 .set_soft_freq_limited_range = smu_v13_0_6_set_soft_freq_limited_range, 3985 .od_edit_dpm_table = smu_v13_0_6_usr_edit_dpm_table, 3986 .log_thermal_throttling_event = smu_v13_0_6_log_thermal_throttling_event, 3987 .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, 3988 .get_gpu_metrics = smu_v13_0_6_get_gpu_metrics, 3989 .get_pm_metrics = smu_v13_0_6_get_pm_metrics, 3990 .get_xcp_metrics = smu_v13_0_6_get_xcp_metrics, 3991 .get_thermal_temperature_range = smu_v13_0_6_get_thermal_temperature_range, 3992 .mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported, 3993 .mode1_reset = smu_v13_0_6_mode1_reset, 3994 .mode2_reset = smu_v13_0_6_mode2_reset, 3995 .link_reset = smu_v13_0_6_link_reset, 3996 .wait_for_event = smu_v13_0_wait_for_event, 3997 .i2c_init = smu_v13_0_6_i2c_control_init, 3998 .i2c_fini = smu_v13_0_6_i2c_control_fini, 3999 .send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num, 4000 .send_rma_reason = smu_v13_0_6_send_rma_reason, 4001 .reset_sdma = smu_v13_0_6_reset_sdma, 4002 .dpm_reset_vcn = smu_v13_0_6_reset_vcn, 4003 .post_init = smu_v13_0_6_post_init, 4004 .ras_send_msg = smu_v13_0_6_ras_send_msg, 4005 .get_ras_smu_drv = smu_v13_0_6_get_ras_smu_drv, 4006 }; 4007 4008 void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) 4009 { 4010 const struct cmn2asic_msg_mapping *message_map; 4011 4012 smu->ppt_funcs = &smu_v13_0_6_ppt_funcs; 4013 message_map = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12)) ? 4014 smu_v13_0_12_message_map : smu_v13_0_6_message_map; 4015 smu->clock_map = smu_v13_0_6_clk_map; 4016 smu->feature_map = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12)) ? 4017 smu_v13_0_12_feature_mask_map : smu_v13_0_6_feature_mask_map; 4018 smu->table_map = smu_v13_0_6_table_map; 4019 smu->smc_driver_if_version = SMU_IGNORE_IF_VERSION; 4020 smu->smc_fw_caps |= SMU_FW_CAP_RAS_PRI; 4021 smu_v13_0_init_msg_ctl(smu, message_map); 4022 smu_v13_0_6_set_temp_funcs(smu); 4023 amdgpu_mca_smu_init_funcs(smu->adev, &smu_v13_0_6_mca_smu_funcs); 4024 amdgpu_aca_set_smu_funcs(smu->adev, &smu_v13_0_6_aca_smu_funcs); 4025 } 4026 4027