1 /*
2 * Copyright 2021 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #define SWSMU_CODE_LAYER_L2
25
26 #include <linux/firmware.h>
27 #include "amdgpu.h"
28 #include "amdgpu_smu.h"
29 #include "atomfirmware.h"
30 #include "amdgpu_atomfirmware.h"
31 #include "amdgpu_atombios.h"
32 #include "smu_v13_0_6_pmfw.h"
33 #include "smu13_driver_if_v13_0_6.h"
34 #include "smu_v13_0_6_ppsmc.h"
35 #include "soc15_common.h"
36 #include "atom.h"
37 #include "power_state.h"
38 #include "smu_v13_0.h"
39 #include "smu_v13_0_6_ppt.h"
40 #include "nbio/nbio_7_4_offset.h"
41 #include "nbio/nbio_7_4_sh_mask.h"
42 #include "thm/thm_11_0_2_offset.h"
43 #include "thm/thm_11_0_2_sh_mask.h"
44 #include "amdgpu_xgmi.h"
45 #include <linux/pci.h>
46 #include "amdgpu_ras.h"
47 #include "amdgpu_mca.h"
48 #include "amdgpu_aca.h"
49 #include "smu_cmn.h"
50 #include "mp/mp_13_0_6_offset.h"
51 #include "mp/mp_13_0_6_sh_mask.h"
52 #include "umc_v12_0.h"
53
54 #undef MP1_Public
55 #undef smnMP1_FIRMWARE_FLAGS
56
57 /* TODO: Check final register offsets */
58 #define MP1_Public 0x03b00000
59 #define smnMP1_FIRMWARE_FLAGS 0x3010028
60 /*
61 * DO NOT use these for err/warn/info/debug messages.
62 * Use dev_err, dev_warn, dev_info and dev_dbg instead.
63 * They are more MGPU friendly.
64 */
65 #undef pr_err
66 #undef pr_warn
67 #undef pr_info
68 #undef pr_debug
69
70 MODULE_FIRMWARE("amdgpu/smu_13_0_6.bin");
71 MODULE_FIRMWARE("amdgpu/smu_13_0_14.bin");
72
73 #define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c))
74
75 #define SMU_13_0_6_FEA_MAP(smu_feature, smu_13_0_6_feature) \
76 [smu_feature] = { 1, (smu_13_0_6_feature) }
77
78 #define FEATURE_MASK(feature) (1ULL << feature)
79 static const struct smu_feature_bits smu_v13_0_6_dpm_features = {
80 .bits = {
81 SMU_FEATURE_BIT_INIT(FEATURE_DATA_CALCULATION),
82 SMU_FEATURE_BIT_INIT(FEATURE_DPM_GFXCLK),
83 SMU_FEATURE_BIT_INIT(FEATURE_DPM_UCLK),
84 SMU_FEATURE_BIT_INIT(FEATURE_DPM_SOCCLK),
85 SMU_FEATURE_BIT_INIT(FEATURE_DPM_FCLK),
86 SMU_FEATURE_BIT_INIT(FEATURE_DPM_LCLK),
87 SMU_FEATURE_BIT_INIT(FEATURE_DPM_XGMI),
88 SMU_FEATURE_BIT_INIT(FEATURE_DPM_VCN)
89 }
90 };
91
92 #define smnPCIE_ESM_CTRL 0x93D0
93 #define smnPCIE_LC_LINK_WIDTH_CNTL 0x1a340288
94 #define PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK 0x00000070L
95 #define PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT 0x4
96 #define MAX_LINK_WIDTH 6
97
98 #define smnPCIE_LC_SPEED_CNTL 0x1a340290
99 #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK 0xE0
100 #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0x5
101 #define LINK_SPEED_MAX 4
102 #define MCA_BANK_IPID(_ip, _hwid, _type) \
103 [AMDGPU_MCA_IP_##_ip] = { .hwid = _hwid, .mcatype = _type, }
104
105 struct mca_bank_ipid {
106 enum amdgpu_mca_ip ip;
107 uint16_t hwid;
108 uint16_t mcatype;
109 };
110
111 struct mca_ras_info {
112 enum amdgpu_ras_block blkid;
113 enum amdgpu_mca_ip ip;
114 int *err_code_array;
115 int err_code_count;
116 int (*get_err_count)(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
117 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count);
118 bool (*bank_is_valid)(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
119 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry);
120 };
121
122 #define P2S_TABLE_ID_A 0x50325341
123 #define P2S_TABLE_ID_X 0x50325358
124 #define P2S_TABLE_ID_3 0x50325303
125
126 // clang-format off
127 static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = {
128 MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0),
129 MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1),
130 MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion, 1),
131 MSG_MAP(EnableAllSmuFeatures, PPSMC_MSG_EnableAllSmuFeatures, 0),
132 MSG_MAP(DisableAllSmuFeatures, PPSMC_MSG_DisableAllSmuFeatures, 0),
133 MSG_MAP(RequestI2cTransaction, PPSMC_MSG_RequestI2cTransaction, 0),
134 MSG_MAP(GetMetricsTable, PPSMC_MSG_GetMetricsTable, 1),
135 MSG_MAP(GetMetricsVersion, PPSMC_MSG_GetMetricsVersion, 1),
136 MSG_MAP(GetEnabledSmuFeaturesHigh, PPSMC_MSG_GetEnabledSmuFeaturesHigh, 1),
137 MSG_MAP(GetEnabledSmuFeaturesLow, PPSMC_MSG_GetEnabledSmuFeaturesLow, 1),
138 MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverDramAddrHigh, 1),
139 MSG_MAP(SetDriverDramAddrLow, PPSMC_MSG_SetDriverDramAddrLow, 1),
140 MSG_MAP(SetToolsDramAddrHigh, PPSMC_MSG_SetToolsDramAddrHigh, 0),
141 MSG_MAP(SetToolsDramAddrLow, PPSMC_MSG_SetToolsDramAddrLow, 0),
142 MSG_MAP(SetSoftMinByFreq, PPSMC_MSG_SetSoftMinByFreq, 0),
143 MSG_MAP(SetSoftMaxByFreq, PPSMC_MSG_SetSoftMaxByFreq, 1),
144 MSG_MAP(GetMinDpmFreq, PPSMC_MSG_GetMinDpmFreq, 1),
145 MSG_MAP(GetMaxDpmFreq, PPSMC_MSG_GetMaxDpmFreq, 1),
146 MSG_MAP(GetDpmFreqByIndex, PPSMC_MSG_GetDpmFreqByIndex, 1),
147 MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 1),
148 MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 1),
149 MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI | SMU_MSG_NO_PRECHECK),
150 MSG_MAP(DramLogSetDramAddrHigh, PPSMC_MSG_DramLogSetDramAddrHigh, 0),
151 MSG_MAP(DramLogSetDramAddrLow, PPSMC_MSG_DramLogSetDramAddrLow, 0),
152 MSG_MAP(DramLogSetDramSize, PPSMC_MSG_DramLogSetDramSize, 0),
153 MSG_MAP(GetDebugData, PPSMC_MSG_GetDebugData, 0),
154 MSG_MAP(SetNumBadHbmPagesRetired, PPSMC_MSG_SetNumBadHbmPagesRetired, 0),
155 MSG_MAP(DFCstateControl, PPSMC_MSG_DFCstateControl, 0),
156 MSG_MAP(GetGmiPwrDnHyst, PPSMC_MSG_GetGmiPwrDnHyst, 0),
157 MSG_MAP(SetGmiPwrDnHyst, PPSMC_MSG_SetGmiPwrDnHyst, 0),
158 MSG_MAP(GmiPwrDnControl, PPSMC_MSG_GmiPwrDnControl, 0),
159 MSG_MAP(EnterGfxoff, PPSMC_MSG_EnterGfxoff, 0),
160 MSG_MAP(ExitGfxoff, PPSMC_MSG_ExitGfxoff, 0),
161 MSG_MAP(EnableDeterminism, PPSMC_MSG_EnableDeterminism, 0),
162 MSG_MAP(DisableDeterminism, PPSMC_MSG_DisableDeterminism, 0),
163 MSG_MAP(GfxDriverResetRecovery, PPSMC_MSG_GfxDriverResetRecovery, 0),
164 MSG_MAP(GetMinGfxclkFrequency, PPSMC_MSG_GetMinGfxDpmFreq, 1),
165 MSG_MAP(GetMaxGfxclkFrequency, PPSMC_MSG_GetMaxGfxDpmFreq, 1),
166 MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxClk, 1),
167 MSG_MAP(SetSoftMaxGfxClk, PPSMC_MSG_SetSoftMaxGfxClk, 1),
168 MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareForDriverUnload, 0),
169 MSG_MAP(GetCTFLimit, PPSMC_MSG_GetCTFLimit, 0),
170 MSG_MAP(GetThermalLimit, PPSMC_MSG_ReadThrottlerLimit, 0),
171 MSG_MAP(ClearMcaOnRead, PPSMC_MSG_ClearMcaOnRead, 0),
172 MSG_MAP(QueryValidMcaCount, PPSMC_MSG_QueryValidMcaCount, SMU_MSG_RAS_PRI),
173 MSG_MAP(QueryValidMcaCeCount, PPSMC_MSG_QueryValidMcaCeCount, SMU_MSG_RAS_PRI),
174 MSG_MAP(McaBankDumpDW, PPSMC_MSG_McaBankDumpDW, SMU_MSG_RAS_PRI),
175 MSG_MAP(McaBankCeDumpDW, PPSMC_MSG_McaBankCeDumpDW, SMU_MSG_RAS_PRI),
176 MSG_MAP(SelectPLPDMode, PPSMC_MSG_SelectPLPDMode, 0),
177 MSG_MAP(RmaDueToBadPageThreshold, PPSMC_MSG_RmaDueToBadPageThreshold, 0),
178 MSG_MAP(SetThrottlingPolicy, PPSMC_MSG_SetThrottlingPolicy, 0),
179 MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0),
180 MSG_MAP(ResetVCN, PPSMC_MSG_ResetVCN, 0),
181 MSG_MAP(GetStaticMetricsTable, PPSMC_MSG_GetStaticMetricsTable, 1),
182 };
183
184 // clang-format on
185 static const struct cmn2asic_mapping smu_v13_0_6_clk_map[SMU_CLK_COUNT] = {
186 CLK_MAP(SOCCLK, PPCLK_SOCCLK),
187 CLK_MAP(FCLK, PPCLK_FCLK),
188 CLK_MAP(UCLK, PPCLK_UCLK),
189 CLK_MAP(MCLK, PPCLK_UCLK),
190 CLK_MAP(DCLK, PPCLK_DCLK),
191 CLK_MAP(VCLK, PPCLK_VCLK),
192 CLK_MAP(LCLK, PPCLK_LCLK),
193 };
194
195 static const struct cmn2asic_mapping smu_v13_0_6_feature_mask_map[SMU_FEATURE_COUNT] = {
196 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DATA_CALCULATIONS_BIT, FEATURE_DATA_CALCULATION),
197 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_GFXCLK_BIT, FEATURE_DPM_GFXCLK),
198 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_UCLK_BIT, FEATURE_DPM_UCLK),
199 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_SOCCLK_BIT, FEATURE_DPM_SOCCLK),
200 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_FCLK_BIT, FEATURE_DPM_FCLK),
201 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_LCLK_BIT, FEATURE_DPM_LCLK),
202 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_VCLK_BIT, FEATURE_DPM_VCN),
203 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_DCLK_BIT, FEATURE_DPM_VCN),
204 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_XGMI_BIT, FEATURE_DPM_XGMI),
205 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_GFXCLK_BIT, FEATURE_DS_GFXCLK),
206 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_SOCCLK_BIT, FEATURE_DS_SOCCLK),
207 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_LCLK_BIT, FEATURE_DS_LCLK),
208 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_FCLK_BIT, FEATURE_DS_FCLK),
209 SMU_13_0_6_FEA_MAP(SMU_FEATURE_VCN_DPM_BIT, FEATURE_DPM_VCN),
210 SMU_13_0_6_FEA_MAP(SMU_FEATURE_PPT_BIT, FEATURE_PPT),
211 SMU_13_0_6_FEA_MAP(SMU_FEATURE_TDC_BIT, FEATURE_TDC),
212 SMU_13_0_6_FEA_MAP(SMU_FEATURE_APCC_DFLL_BIT, FEATURE_APCC_DFLL),
213 SMU_13_0_6_FEA_MAP(SMU_FEATURE_MP1_CG_BIT, FEATURE_SMU_CG),
214 SMU_13_0_6_FEA_MAP(SMU_FEATURE_GFXOFF_BIT, FEATURE_GFXOFF),
215 SMU_13_0_6_FEA_MAP(SMU_FEATURE_FW_CTF_BIT, FEATURE_FW_CTF),
216 SMU_13_0_6_FEA_MAP(SMU_FEATURE_THERMAL_BIT, FEATURE_THERMAL),
217 SMU_13_0_6_FEA_MAP(SMU_FEATURE_XGMI_PER_LINK_PWR_DWN_BIT, FEATURE_XGMI_PER_LINK_PWR_DOWN),
218 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DF_CSTATE_BIT, FEATURE_DF_CSTATE),
219 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_VCN_BIT, FEATURE_DS_VCN),
220 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_MP1CLK_BIT, FEATURE_DS_MP1CLK),
221 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_MPIOCLK_BIT, FEATURE_DS_MPIOCLK),
222 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_MP0CLK_BIT, FEATURE_DS_MP0CLK),
223 };
224
225 #define TABLE_PMSTATUSLOG 0
226 #define TABLE_SMU_METRICS 1
227 #define TABLE_I2C_COMMANDS 2
228 #define TABLE_COUNT 3
229
230 static const struct cmn2asic_mapping smu_v13_0_6_table_map[SMU_TABLE_COUNT] = {
231 TAB_MAP(PMSTATUSLOG),
232 TAB_MAP(SMU_METRICS),
233 TAB_MAP(I2C_COMMANDS),
234 };
235
236 static const uint8_t smu_v13_0_6_throttler_map[] = {
237 [THROTTLER_PPT_BIT] = (SMU_THROTTLER_PPT0_BIT),
238 [THROTTLER_THERMAL_SOCKET_BIT] = (SMU_THROTTLER_TEMP_GPU_BIT),
239 [THROTTLER_THERMAL_HBM_BIT] = (SMU_THROTTLER_TEMP_MEM_BIT),
240 [THROTTLER_THERMAL_VR_BIT] = (SMU_THROTTLER_TEMP_VR_GFX_BIT),
241 [THROTTLER_PROCHOT_BIT] = (SMU_THROTTLER_PROCHOT_GFX_BIT),
242 };
243
244 #define GET_GPU_METRIC_FIELD(field, version) ((version == METRICS_VERSION_V0) ?\
245 (metrics_v0->field) : (metrics_v2->field))
246 #define GET_METRIC_FIELD(field, version) ((version == METRICS_VERSION_V1) ?\
247 (metrics_v1->field) : GET_GPU_METRIC_FIELD(field, version))
248 #define METRICS_TABLE_SIZE (max3(sizeof(MetricsTableV0_t),\
249 sizeof(MetricsTableV1_t),\
250 sizeof(MetricsTableV2_t)))
251
252 struct smu_v13_0_6_dpm_map {
253 enum smu_clk_type clk_type;
254 uint32_t feature_num;
255 struct smu_dpm_table *dpm_table;
256 uint32_t *freq_table;
257 };
258
smu_v13_0_6_get_metrics_version(struct smu_context * smu)259 static inline int smu_v13_0_6_get_metrics_version(struct smu_context *smu)
260 {
261 if ((smu->adev->flags & AMD_IS_APU) &&
262 smu->smc_fw_version <= 0x4556900)
263 return METRICS_VERSION_V1;
264 else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) ==
265 IP_VERSION(13, 0, 12))
266 return METRICS_VERSION_V2;
267
268 return METRICS_VERSION_V0;
269 }
270
smu_v13_0_6_cap_set(struct smu_context * smu,enum smu_v13_0_6_caps cap)271 static inline void smu_v13_0_6_cap_set(struct smu_context *smu,
272 enum smu_v13_0_6_caps cap)
273 {
274 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
275
276 dpm_context->caps |= BIT_ULL(cap);
277 }
278
smu_v13_0_6_cap_clear(struct smu_context * smu,enum smu_v13_0_6_caps cap)279 static inline void smu_v13_0_6_cap_clear(struct smu_context *smu,
280 enum smu_v13_0_6_caps cap)
281 {
282 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
283
284 dpm_context->caps &= ~BIT_ULL(cap);
285 }
286
smu_v13_0_6_cap_supported(struct smu_context * smu,enum smu_v13_0_6_caps cap)287 bool smu_v13_0_6_cap_supported(struct smu_context *smu,
288 enum smu_v13_0_6_caps cap)
289 {
290 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
291
292 return !!(dpm_context->caps & BIT_ULL(cap));
293 }
294
smu_v13_0_14_init_caps(struct smu_context * smu)295 static void smu_v13_0_14_init_caps(struct smu_context *smu)
296 {
297 enum smu_v13_0_6_caps default_cap_list[] = { SMU_CAP(DPM),
298 SMU_CAP(SET_UCLK_MAX),
299 SMU_CAP(DPM_POLICY),
300 SMU_CAP(PCIE_METRICS),
301 SMU_CAP(CTF_LIMIT),
302 SMU_CAP(MCA_DEBUG_MODE),
303 SMU_CAP(RMA_MSG),
304 SMU_CAP(ACA_SYND) };
305 uint32_t fw_ver = smu->smc_fw_version;
306
307 for (int i = 0; i < ARRAY_SIZE(default_cap_list); i++)
308 smu_v13_0_6_cap_set(smu, default_cap_list[i]);
309
310 if (fw_ver >= 0x05550E00)
311 smu_v13_0_6_cap_set(smu, SMU_CAP(OTHER_END_METRICS));
312 if (fw_ver >= 0x05550B00)
313 smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS));
314 if (fw_ver >= 0x5551200)
315 smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET));
316 if (fw_ver >= 0x5551800)
317 smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET));
318 if (fw_ver >= 0x5551600) {
319 smu_v13_0_6_cap_set(smu, SMU_CAP(STATIC_METRICS));
320 smu_v13_0_6_cap_set(smu, SMU_CAP(BOARD_VOLTAGE));
321 smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION));
322 }
323 }
324
smu_v13_0_12_init_caps(struct smu_context * smu)325 static void smu_v13_0_12_init_caps(struct smu_context *smu)
326 {
327 enum smu_v13_0_6_caps default_cap_list[] = { SMU_CAP(DPM),
328 SMU_CAP(PCIE_METRICS),
329 SMU_CAP(CTF_LIMIT),
330 SMU_CAP(MCA_DEBUG_MODE),
331 SMU_CAP(RMA_MSG),
332 SMU_CAP(ACA_SYND),
333 SMU_CAP(OTHER_END_METRICS),
334 SMU_CAP(PER_INST_METRICS) };
335 uint32_t fw_ver = smu->smc_fw_version;
336
337 for (int i = 0; i < ARRAY_SIZE(default_cap_list); i++)
338 smu_v13_0_6_cap_set(smu, default_cap_list[i]);
339
340 if (fw_ver < 0x00561900)
341 smu_v13_0_6_cap_clear(smu, SMU_CAP(DPM));
342
343 if (fw_ver >= 0x00561700)
344 smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET));
345
346 if (fw_ver >= 0x00561E00)
347 smu_v13_0_6_cap_set(smu, SMU_CAP(STATIC_METRICS));
348
349 if (fw_ver >= 0x00562500)
350 smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS));
351
352 if (fw_ver >= 0x04560100) {
353 smu_v13_0_6_cap_set(smu, SMU_CAP(BOARD_VOLTAGE));
354 smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION));
355 }
356
357 if (fw_ver > 0x04560900)
358 smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET));
359
360 if (fw_ver >= 0x04560D00) {
361 smu_v13_0_6_cap_set(smu, SMU_CAP(FAST_PPT));
362 if (smu->adev->gmc.xgmi.physical_node_id == 0)
363 smu_v13_0_6_cap_set(smu, SMU_CAP(SYSTEM_POWER_METRICS));
364 }
365
366 if (fw_ver >= 0x04560700) {
367 if (fw_ver >= 0x04560900) {
368 smu_v13_0_6_cap_set(smu, SMU_CAP(TEMP_METRICS));
369 if (smu->adev->gmc.xgmi.physical_node_id == 0)
370 smu_v13_0_6_cap_set(smu, SMU_CAP(NPM_METRICS));
371 } else if (!amdgpu_sriov_vf(smu->adev))
372 smu_v13_0_6_cap_set(smu, SMU_CAP(TEMP_METRICS));
373 } else {
374 smu_v13_0_12_tables_fini(smu);
375 }
376 }
377
smu_v13_0_6_init_caps(struct smu_context * smu)378 static void smu_v13_0_6_init_caps(struct smu_context *smu)
379 {
380 enum smu_v13_0_6_caps default_cap_list[] = { SMU_CAP(DPM),
381 SMU_CAP(SET_UCLK_MAX),
382 SMU_CAP(DPM_POLICY),
383 SMU_CAP(PCIE_METRICS),
384 SMU_CAP(CTF_LIMIT),
385 SMU_CAP(MCA_DEBUG_MODE),
386 SMU_CAP(RMA_MSG),
387 SMU_CAP(ACA_SYND) };
388 struct amdgpu_device *adev = smu->adev;
389 uint32_t fw_ver = smu->smc_fw_version;
390 uint32_t pgm = (fw_ver >> 24) & 0xFF;
391
392 for (int i = 0; i < ARRAY_SIZE(default_cap_list); i++)
393 smu_v13_0_6_cap_set(smu, default_cap_list[i]);
394
395 if (fw_ver < 0x552F00)
396 smu_v13_0_6_cap_clear(smu, SMU_CAP(DPM));
397 if (fw_ver < 0x554500)
398 smu_v13_0_6_cap_clear(smu, SMU_CAP(CTF_LIMIT));
399
400 if (adev->flags & AMD_IS_APU) {
401 smu_v13_0_6_cap_clear(smu, SMU_CAP(PCIE_METRICS));
402 smu_v13_0_6_cap_clear(smu, SMU_CAP(DPM_POLICY));
403 smu_v13_0_6_cap_clear(smu, SMU_CAP(RMA_MSG));
404 smu_v13_0_6_cap_clear(smu, SMU_CAP(ACA_SYND));
405
406 if (fw_ver >= 0x04556A00)
407 smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS));
408 } else {
409 if (fw_ver >= 0x557600)
410 smu_v13_0_6_cap_set(smu, SMU_CAP(OTHER_END_METRICS));
411 if (fw_ver < 0x00556000)
412 smu_v13_0_6_cap_clear(smu, SMU_CAP(DPM_POLICY));
413 if (amdgpu_sriov_vf(adev) && (fw_ver < 0x556600))
414 smu_v13_0_6_cap_clear(smu, SMU_CAP(SET_UCLK_MAX));
415 if (fw_ver < 0x556300)
416 smu_v13_0_6_cap_clear(smu, SMU_CAP(PCIE_METRICS));
417 if (fw_ver < 0x554800)
418 smu_v13_0_6_cap_clear(smu, SMU_CAP(MCA_DEBUG_MODE));
419 if (fw_ver >= 0x556F00)
420 smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS));
421 if (fw_ver < 0x00555a00)
422 smu_v13_0_6_cap_clear(smu, SMU_CAP(RMA_MSG));
423 if (fw_ver < 0x00555600)
424 smu_v13_0_6_cap_clear(smu, SMU_CAP(ACA_SYND));
425 if ((pgm == 7 && fw_ver >= 0x7550E00) ||
426 (pgm == 0 && fw_ver >= 0x00557E00))
427 smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS));
428
429 if (amdgpu_sriov_vf(adev)) {
430 if (fw_ver >= 0x00558200)
431 amdgpu_virt_attr_set(&adev->virt.virt_caps,
432 AMDGPU_VIRT_CAP_POWER_LIMIT,
433 AMDGPU_CAP_ATTR_RW);
434 if ((pgm == 0 && fw_ver >= 0x00558000) ||
435 (pgm == 7 && fw_ver >= 0x7551000)) {
436 smu_v13_0_6_cap_set(smu,
437 SMU_CAP(STATIC_METRICS));
438 smu_v13_0_6_cap_set(smu,
439 SMU_CAP(BOARD_VOLTAGE));
440 smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION));
441 }
442 } else {
443 if ((pgm == 0 && fw_ver >= 0x00557F01) ||
444 (pgm == 7 && fw_ver >= 0x7551000)) {
445 smu_v13_0_6_cap_set(smu,
446 SMU_CAP(STATIC_METRICS));
447 smu_v13_0_6_cap_set(smu,
448 SMU_CAP(BOARD_VOLTAGE));
449 }
450 if ((pgm == 0 && fw_ver >= 0x00558000) ||
451 (pgm == 7 && fw_ver >= 0x7551000))
452 smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION));
453 }
454 }
455 if (((pgm == 7) && (fw_ver >= 0x7550700)) ||
456 ((pgm == 0) && (fw_ver >= 0x00557900)) ||
457 ((pgm == 4) && (fw_ver >= 0x4557000)))
458 smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET));
459
460 if ((pgm == 0 && fw_ver >= 0x00558200) ||
461 (pgm == 7 && fw_ver >= 0x07551400))
462 smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET));
463 }
464
smu_v13_0_x_init_caps(struct smu_context * smu)465 static void smu_v13_0_x_init_caps(struct smu_context *smu)
466 {
467 switch (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)) {
468 case IP_VERSION(13, 0, 12):
469 return smu_v13_0_12_init_caps(smu);
470 case IP_VERSION(13, 0, 14):
471 return smu_v13_0_14_init_caps(smu);
472 default:
473 return smu_v13_0_6_init_caps(smu);
474 }
475 }
476
smu_v13_0_6_check_fw_version(struct smu_context * smu)477 static int smu_v13_0_6_check_fw_version(struct smu_context *smu)
478 {
479 int r;
480
481 r = smu_v13_0_check_fw_version(smu);
482 /* Initialize caps flags once fw version is fetched */
483 if (!r)
484 smu_v13_0_x_init_caps(smu);
485
486 return r;
487 }
488
smu_v13_0_6_init_microcode(struct smu_context * smu)489 static int smu_v13_0_6_init_microcode(struct smu_context *smu)
490 {
491 const struct smc_firmware_header_v2_1 *v2_1;
492 const struct common_firmware_header *hdr;
493 struct amdgpu_firmware_info *ucode = NULL;
494 struct smc_soft_pptable_entry *entries;
495 struct amdgpu_device *adev = smu->adev;
496 uint32_t p2s_table_id = P2S_TABLE_ID_A;
497 int ret = 0, i, p2stable_count;
498 int var = (adev->pdev->device & 0xF);
499 char ucode_prefix[15];
500
501 /* No need to load P2S tables in IOV mode or for smu v13.0.12 */
502 if (amdgpu_sriov_vf(adev) ||
503 (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12)))
504 return 0;
505
506 if (!(adev->flags & AMD_IS_APU)) {
507 p2s_table_id = P2S_TABLE_ID_X;
508 if (var == 0x5)
509 p2s_table_id = P2S_TABLE_ID_3;
510 }
511
512 amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix,
513 sizeof(ucode_prefix));
514 ret = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED,
515 "amdgpu/%s.bin", ucode_prefix);
516 if (ret)
517 goto out;
518
519 hdr = (const struct common_firmware_header *)adev->pm.fw->data;
520 amdgpu_ucode_print_smc_hdr(hdr);
521
522 /* SMU v13.0.6 binary file doesn't carry pptables, instead the entries
523 * are used to carry p2s tables.
524 */
525 v2_1 = (const struct smc_firmware_header_v2_1 *)adev->pm.fw->data;
526 entries = (struct smc_soft_pptable_entry
527 *)((uint8_t *)v2_1 +
528 le32_to_cpu(v2_1->pptable_entry_offset));
529 p2stable_count = le32_to_cpu(v2_1->pptable_count);
530 for (i = 0; i < p2stable_count; i++) {
531 if (le32_to_cpu(entries[i].id) == p2s_table_id) {
532 smu->pptable_firmware.data =
533 ((uint8_t *)v2_1 +
534 le32_to_cpu(entries[i].ppt_offset_bytes));
535 smu->pptable_firmware.size =
536 le32_to_cpu(entries[i].ppt_size_bytes);
537 break;
538 }
539 }
540
541 if (smu->pptable_firmware.data && smu->pptable_firmware.size) {
542 ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_P2S_TABLE];
543 ucode->ucode_id = AMDGPU_UCODE_ID_P2S_TABLE;
544 ucode->fw = &smu->pptable_firmware;
545 adev->firmware.fw_size += ALIGN(ucode->fw->size, PAGE_SIZE);
546 }
547
548 return 0;
549 out:
550 amdgpu_ucode_release(&adev->pm.fw);
551
552 return ret;
553 }
554
smu_v13_0_6_tables_init(struct smu_context * smu)555 static int smu_v13_0_6_tables_init(struct smu_context *smu)
556 {
557 struct smu_table_context *smu_table = &smu->smu_table;
558 struct smu_table *tables = smu_table->tables;
559 struct smu_v13_0_6_gpu_metrics *gpu_metrics;
560 void *driver_pptable __free(kfree) = NULL;
561 void *metrics_table __free(kfree) = NULL;
562 struct amdgpu_device *adev = smu->adev;
563 int gpu_metrcs_size = METRICS_TABLE_SIZE;
564 int ret;
565
566 if (!(adev->flags & AMD_IS_APU))
567 SMU_TABLE_INIT(tables, SMU_TABLE_PMSTATUSLOG, SMU13_TOOL_SIZE,
568 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
569
570 SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS,
571 max(gpu_metrcs_size,
572 smu_v13_0_12_get_max_metrics_size()),
573 PAGE_SIZE,
574 AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT);
575
576 SMU_TABLE_INIT(tables, SMU_TABLE_I2C_COMMANDS, sizeof(SwI2cRequest_t),
577 PAGE_SIZE,
578 AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT);
579
580 SMU_TABLE_INIT(tables, SMU_TABLE_PMFW_SYSTEM_METRICS,
581 smu_v13_0_12_get_system_metrics_size(), PAGE_SIZE,
582 AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT);
583
584 metrics_table = kzalloc(METRICS_TABLE_SIZE, GFP_KERNEL);
585 if (!metrics_table)
586 return -ENOMEM;
587 smu_table->metrics_time = 0;
588
589 driver_pptable = kzalloc_obj(struct PPTable_t);
590 if (!driver_pptable)
591 return -ENOMEM;
592
593 ret = smu_driver_table_init(smu, SMU_DRIVER_TABLE_GPU_METRICS,
594 sizeof(struct smu_v13_0_6_gpu_metrics),
595 SMU_GPU_METRICS_CACHE_INTERVAL);
596 if (ret)
597 return ret;
598
599 gpu_metrics = (struct smu_v13_0_6_gpu_metrics *)smu_driver_table_ptr(
600 smu, SMU_DRIVER_TABLE_GPU_METRICS);
601
602 smu_v13_0_6_gpu_metrics_init(gpu_metrics, 1, 9);
603 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) ==
604 IP_VERSION(13, 0, 12)) {
605 ret = smu_v13_0_12_tables_init(smu);
606 if (ret) {
607 smu_driver_table_fini(smu,
608 SMU_DRIVER_TABLE_GPU_METRICS);
609 return ret;
610 }
611 }
612
613 smu_table->metrics_table = no_free_ptr(metrics_table);
614 smu_table->driver_pptable = no_free_ptr(driver_pptable);
615
616 return 0;
617 }
618
smu_v13_0_6_select_policy_soc_pstate(struct smu_context * smu,int policy)619 static int smu_v13_0_6_select_policy_soc_pstate(struct smu_context *smu,
620 int policy)
621 {
622 struct amdgpu_device *adev = smu->adev;
623 int ret, param;
624
625 switch (policy) {
626 case SOC_PSTATE_DEFAULT:
627 param = 0;
628 break;
629 case SOC_PSTATE_0:
630 param = 1;
631 break;
632 case SOC_PSTATE_1:
633 param = 2;
634 break;
635 case SOC_PSTATE_2:
636 param = 3;
637 break;
638 default:
639 return -EINVAL;
640 }
641
642 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetThrottlingPolicy,
643 param, NULL);
644
645 if (ret)
646 dev_err(adev->dev, "select soc pstate policy %d failed",
647 policy);
648
649 return ret;
650 }
651
smu_v13_0_6_select_plpd_policy(struct smu_context * smu,int level)652 static int smu_v13_0_6_select_plpd_policy(struct smu_context *smu, int level)
653 {
654 struct amdgpu_device *adev = smu->adev;
655 int ret, param;
656
657 switch (level) {
658 case XGMI_PLPD_DEFAULT:
659 param = PPSMC_PLPD_MODE_DEFAULT;
660 break;
661 case XGMI_PLPD_OPTIMIZED:
662 param = PPSMC_PLPD_MODE_OPTIMIZED;
663 break;
664 case XGMI_PLPD_DISALLOW:
665 param = 0;
666 break;
667 default:
668 return -EINVAL;
669 }
670
671 if (level == XGMI_PLPD_DISALLOW)
672 ret = smu_cmn_send_smc_msg_with_param(
673 smu, SMU_MSG_GmiPwrDnControl, param, NULL);
674 else
675 /* change xgmi per-link power down policy */
676 ret = smu_cmn_send_smc_msg_with_param(
677 smu, SMU_MSG_SelectPLPDMode, param, NULL);
678
679 if (ret)
680 dev_err(adev->dev,
681 "select xgmi per-link power down policy %d failed\n",
682 level);
683
684 return ret;
685 }
686
smu_v13_0_6_allocate_dpm_context(struct smu_context * smu)687 static int smu_v13_0_6_allocate_dpm_context(struct smu_context *smu)
688 {
689 struct smu_dpm_context *smu_dpm = &smu->smu_dpm;
690 struct smu_dpm_policy *policy;
691
692 smu_dpm->dpm_context =
693 kzalloc_obj(struct smu_13_0_dpm_context);
694 if (!smu_dpm->dpm_context)
695 return -ENOMEM;
696 smu_dpm->dpm_context_size = sizeof(struct smu_13_0_dpm_context);
697
698 smu_dpm->dpm_policies =
699 kzalloc_obj(struct smu_dpm_policy_ctxt);
700 if (!smu_dpm->dpm_policies) {
701 kfree(smu_dpm->dpm_context);
702 return -ENOMEM;
703 }
704
705 if (!(smu->adev->flags & AMD_IS_APU)) {
706 policy = &(smu_dpm->dpm_policies->policies[0]);
707
708 policy->policy_type = PP_PM_POLICY_SOC_PSTATE;
709 policy->level_mask = BIT(SOC_PSTATE_DEFAULT) |
710 BIT(SOC_PSTATE_0) | BIT(SOC_PSTATE_1) |
711 BIT(SOC_PSTATE_2);
712 policy->current_level = SOC_PSTATE_DEFAULT;
713 policy->set_policy = smu_v13_0_6_select_policy_soc_pstate;
714 smu_cmn_generic_soc_policy_desc(policy);
715 smu_dpm->dpm_policies->policy_mask |=
716 BIT(PP_PM_POLICY_SOC_PSTATE);
717 }
718 policy = &(smu_dpm->dpm_policies->policies[1]);
719
720 policy->policy_type = PP_PM_POLICY_XGMI_PLPD;
721 policy->level_mask = BIT(XGMI_PLPD_DISALLOW) | BIT(XGMI_PLPD_DEFAULT) |
722 BIT(XGMI_PLPD_OPTIMIZED);
723 policy->current_level = XGMI_PLPD_DEFAULT;
724 policy->set_policy = smu_v13_0_6_select_plpd_policy;
725 smu_cmn_generic_plpd_policy_desc(policy);
726 smu_dpm->dpm_policies->policy_mask |= BIT(PP_PM_POLICY_XGMI_PLPD);
727
728 return 0;
729 }
730
smu_v13_0_6_init_smc_tables(struct smu_context * smu)731 static int smu_v13_0_6_init_smc_tables(struct smu_context *smu)
732 {
733 int ret = 0;
734
735 ret = smu_v13_0_6_tables_init(smu);
736 if (ret)
737 return ret;
738
739 ret = smu_v13_0_6_allocate_dpm_context(smu);
740
741 return ret;
742 }
743
smu_v13_0_6_fini_smc_tables(struct smu_context * smu)744 static int smu_v13_0_6_fini_smc_tables(struct smu_context *smu)
745 {
746 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12))
747 smu_v13_0_12_tables_fini(smu);
748 return smu_v13_0_fini_smc_tables(smu);
749 }
750
smu_v13_0_6_init_allowed_features(struct smu_context * smu)751 static int smu_v13_0_6_init_allowed_features(struct smu_context *smu)
752 {
753 smu_feature_list_set_all(smu, SMU_FEATURE_LIST_ALLOWED);
754
755 return 0;
756 }
757
smu_v13_0_6_get_metrics_table(struct smu_context * smu,void * metrics_table,bool bypass_cache)758 int smu_v13_0_6_get_metrics_table(struct smu_context *smu, void *metrics_table,
759 bool bypass_cache)
760 {
761 struct smu_table_context *smu_table = &smu->smu_table;
762 uint32_t table_size = smu_table->tables[SMU_TABLE_SMU_METRICS].size;
763 struct smu_table *table = &smu_table->driver_table;
764 int ret;
765
766 if (bypass_cache || !smu_table->metrics_time ||
767 time_after(jiffies,
768 smu_table->metrics_time + msecs_to_jiffies(1))) {
769 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetMetricsTable, NULL);
770 if (ret) {
771 dev_info(smu->adev->dev,
772 "Failed to export SMU metrics table!\n");
773 return ret;
774 }
775
776 amdgpu_hdp_invalidate(smu->adev, NULL);
777 memcpy(smu_table->metrics_table, table->cpu_addr, table_size);
778
779 smu_table->metrics_time = jiffies;
780 }
781
782 if (metrics_table)
783 memcpy(metrics_table, smu_table->metrics_table, table_size);
784
785 return 0;
786 }
787
smu_v13_0_6_get_pm_metrics(struct smu_context * smu,void * metrics,size_t max_size)788 static ssize_t smu_v13_0_6_get_pm_metrics(struct smu_context *smu,
789 void *metrics, size_t max_size)
790 {
791 struct smu_table_context *smu_tbl_ctxt = &smu->smu_table;
792 uint32_t table_version = smu_tbl_ctxt->tables[SMU_TABLE_SMU_METRICS].version;
793 uint32_t table_size = smu_tbl_ctxt->tables[SMU_TABLE_SMU_METRICS].size;
794 struct amdgpu_pm_metrics *pm_metrics = metrics;
795 uint32_t pmfw_version;
796 int ret;
797
798 if (!pm_metrics || !max_size)
799 return -EINVAL;
800
801 if (max_size < (table_size + sizeof(pm_metrics->common_header)))
802 return -EOVERFLOW;
803
804 /* Don't use cached metrics data */
805 ret = smu_v13_0_6_get_metrics_table(smu, pm_metrics->data, true);
806 if (ret)
807 return ret;
808
809 smu_cmn_get_smc_version(smu, NULL, &pmfw_version);
810
811 memset(&pm_metrics->common_header, 0,
812 sizeof(pm_metrics->common_header));
813 pm_metrics->common_header.mp1_ip_discovery_version =
814 amdgpu_ip_version(smu->adev, MP1_HWIP, 0);
815 pm_metrics->common_header.pmfw_version = pmfw_version;
816 pm_metrics->common_header.pmmetrics_version = table_version;
817 pm_metrics->common_header.structure_size =
818 sizeof(pm_metrics->common_header) + table_size;
819
820 return pm_metrics->common_header.structure_size;
821 }
822
smu_v13_0_6_fill_static_metrics_table(struct smu_context * smu,StaticMetricsTable_t * static_metrics)823 static void smu_v13_0_6_fill_static_metrics_table(struct smu_context *smu,
824 StaticMetricsTable_t *static_metrics)
825 {
826 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
827
828 if (!static_metrics->InputTelemetryVoltageInmV) {
829 dev_warn(smu->adev->dev, "Invalid board voltage %d\n",
830 static_metrics->InputTelemetryVoltageInmV);
831 }
832
833 dpm_context->board_volt = static_metrics->InputTelemetryVoltageInmV;
834
835 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(PLDM_VERSION)) &&
836 static_metrics->pldmVersion[0] != 0xFFFFFFFF)
837 smu->adev->firmware.pldm_version =
838 static_metrics->pldmVersion[0];
839 }
840
smu_v13_0_6_get_static_metrics_table(struct smu_context * smu)841 int smu_v13_0_6_get_static_metrics_table(struct smu_context *smu)
842 {
843 struct smu_table_context *smu_table = &smu->smu_table;
844 uint32_t table_size = smu_table->tables[SMU_TABLE_SMU_METRICS].size;
845 struct smu_table *table = &smu_table->driver_table;
846 int ret;
847
848 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetStaticMetricsTable, NULL);
849 if (ret) {
850 dev_info(smu->adev->dev,
851 "Failed to export static metrics table!\n");
852 return ret;
853 }
854
855 amdgpu_hdp_invalidate(smu->adev, NULL);
856 memcpy(smu_table->metrics_table, table->cpu_addr, table_size);
857
858 return 0;
859 }
860
smu_v13_0_6_update_caps(struct smu_context * smu)861 static void smu_v13_0_6_update_caps(struct smu_context *smu)
862 {
863 struct smu_table_context *smu_table = &smu->smu_table;
864 struct PPTable_t *pptable =
865 (struct PPTable_t *)smu_table->driver_pptable;
866
867 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(FAST_PPT)) &&
868 !pptable->PPT1Max)
869 smu_v13_0_6_cap_clear(smu, SMU_CAP(FAST_PPT));
870 }
871
smu_v13_0_6_setup_driver_pptable(struct smu_context * smu)872 static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
873 {
874 struct smu_table_context *smu_table = &smu->smu_table;
875 StaticMetricsTable_t *static_metrics = (StaticMetricsTable_t *)smu_table->metrics_table;
876 MetricsTableV0_t *metrics_v0 = (MetricsTableV0_t *)smu_table->metrics_table;
877 MetricsTableV1_t *metrics_v1 = (MetricsTableV1_t *)smu_table->metrics_table;
878 MetricsTableV2_t *metrics_v2 = (MetricsTableV2_t *)smu_table->metrics_table;
879 struct PPTable_t *pptable =
880 (struct PPTable_t *)smu_table->driver_pptable;
881 int version = smu_v13_0_6_get_metrics_version(smu);
882 int ret, i, retry = 100, n;
883 uint32_t table_version;
884 uint16_t max_speed;
885 uint8_t max_width;
886
887 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) &&
888 smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) {
889 ret = smu_v13_0_12_setup_driver_pptable(smu);
890 if (ret)
891 return ret;
892 goto out;
893 }
894
895 /* Store one-time values in driver PPTable */
896 if (!pptable->Init) {
897 while (--retry) {
898 ret = smu_v13_0_6_get_metrics_table(smu, NULL, true);
899 if (ret)
900 return ret;
901
902 /* Ensure that metrics have been updated */
903 if (GET_METRIC_FIELD(AccumulationCounter, version))
904 break;
905
906 usleep_range(1000, 1100);
907 }
908
909 if (!retry)
910 return -ETIME;
911
912 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetMetricsVersion,
913 &table_version);
914 if (ret)
915 return ret;
916 smu_table->tables[SMU_TABLE_SMU_METRICS].version =
917 table_version;
918
919 pptable->MaxSocketPowerLimit =
920 SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketPowerLimit, version));
921 pptable->MaxGfxclkFrequency =
922 SMUQ10_ROUND(GET_METRIC_FIELD(MaxGfxclkFrequency, version));
923 pptable->MinGfxclkFrequency =
924 SMUQ10_ROUND(GET_METRIC_FIELD(MinGfxclkFrequency, version));
925 max_width = (uint8_t)GET_METRIC_FIELD(XgmiWidth, version);
926 max_speed = (uint16_t)GET_METRIC_FIELD(XgmiBitrate, version);
927 amgpu_xgmi_set_max_speed_width(smu->adev, max_speed, max_width);
928
929 for (i = 0; i < 4; ++i) {
930 pptable->FclkFrequencyTable[i] =
931 SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequencyTable, version)[i]);
932 pptable->UclkFrequencyTable[i] =
933 SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequencyTable, version)[i]);
934 pptable->SocclkFrequencyTable[i] = SMUQ10_ROUND(
935 GET_METRIC_FIELD(SocclkFrequencyTable, version)[i]);
936 pptable->VclkFrequencyTable[i] =
937 SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequencyTable, version)[i]);
938 pptable->DclkFrequencyTable[i] =
939 SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequencyTable, version)[i]);
940 pptable->LclkFrequencyTable[i] =
941 SMUQ10_ROUND(GET_METRIC_FIELD(LclkFrequencyTable, version)[i]);
942 }
943
944 /* use AID0 serial number by default */
945 pptable->PublicSerialNumber_AID =
946 GET_METRIC_FIELD(PublicSerialNumber_AID, version)[0];
947
948 amdgpu_device_set_uid(smu->adev->uid_info, AMDGPU_UID_TYPE_SOC,
949 0, pptable->PublicSerialNumber_AID);
950 n = ARRAY_SIZE(metrics_v0->PublicSerialNumber_AID);
951 for (i = 0; i < n; i++) {
952 amdgpu_device_set_uid(
953 smu->adev->uid_info, AMDGPU_UID_TYPE_AID, i,
954 GET_METRIC_FIELD(PublicSerialNumber_AID,
955 version)[i]);
956 }
957 n = ARRAY_SIZE(metrics_v0->PublicSerialNumber_XCD);
958 for (i = 0; i < n; i++) {
959 amdgpu_device_set_uid(
960 smu->adev->uid_info, AMDGPU_UID_TYPE_XCD, i,
961 GET_METRIC_FIELD(PublicSerialNumber_XCD,
962 version)[i]);
963 }
964
965 pptable->Init = true;
966 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) {
967 ret = smu_v13_0_6_get_static_metrics_table(smu);
968 if (ret)
969 return ret;
970 smu_v13_0_6_fill_static_metrics_table(smu, static_metrics);
971 }
972 }
973 out:
974 smu_v13_0_6_update_caps(smu);
975 return 0;
976 }
977
smu_v13_0_6_get_dpm_ultimate_freq(struct smu_context * smu,enum smu_clk_type clk_type,uint32_t * min,uint32_t * max)978 static int smu_v13_0_6_get_dpm_ultimate_freq(struct smu_context *smu,
979 enum smu_clk_type clk_type,
980 uint32_t *min, uint32_t *max)
981 {
982 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
983 struct smu_table_context *smu_table = &smu->smu_table;
984 struct PPTable_t *pptable =
985 (struct PPTable_t *)smu_table->driver_pptable;
986 struct smu_dpm_table *dpm_table;
987 uint32_t min_clk, max_clk, param;
988 int ret = 0, clk_id = 0;
989
990 /* Use dpm tables, if data is already fetched */
991 if (pptable->Init) {
992 switch (clk_type) {
993 case SMU_MCLK:
994 case SMU_UCLK:
995 dpm_table = &dpm_context->dpm_tables.uclk_table;
996 break;
997 case SMU_GFXCLK:
998 case SMU_SCLK:
999 dpm_table = &dpm_context->dpm_tables.gfx_table;
1000 break;
1001 case SMU_SOCCLK:
1002 dpm_table = &dpm_context->dpm_tables.soc_table;
1003 break;
1004 case SMU_FCLK:
1005 dpm_table = &dpm_context->dpm_tables.fclk_table;
1006 break;
1007 case SMU_VCLK:
1008 dpm_table = &dpm_context->dpm_tables.vclk_table;
1009 break;
1010 case SMU_DCLK:
1011 dpm_table = &dpm_context->dpm_tables.dclk_table;
1012 break;
1013 default:
1014 return -EINVAL;
1015 }
1016
1017 min_clk = SMU_DPM_TABLE_MIN(dpm_table);
1018 max_clk = SMU_DPM_TABLE_MAX(dpm_table);
1019 if (min)
1020 *min = min_clk;
1021 if (max)
1022 *max = max_clk;
1023
1024 if (min_clk && max_clk)
1025 return 0;
1026 }
1027
1028 if (!(clk_type == SMU_GFXCLK || clk_type == SMU_SCLK)) {
1029 clk_id = smu_cmn_to_asic_specific_index(
1030 smu, CMN2ASIC_MAPPING_CLK, clk_type);
1031 if (clk_id < 0) {
1032 ret = -EINVAL;
1033 goto failed;
1034 }
1035 param = (clk_id & 0xffff) << 16;
1036 }
1037
1038 if (max) {
1039 if (clk_type == SMU_GFXCLK || clk_type == SMU_SCLK)
1040 ret = smu_cmn_send_smc_msg(
1041 smu, SMU_MSG_GetMaxGfxclkFrequency, max);
1042 else
1043 ret = smu_cmn_send_smc_msg_with_param(
1044 smu, SMU_MSG_GetMaxDpmFreq, param, max);
1045 if (ret)
1046 goto failed;
1047 }
1048
1049 if (min) {
1050 if (clk_type == SMU_GFXCLK || clk_type == SMU_SCLK)
1051 ret = smu_cmn_send_smc_msg(
1052 smu, SMU_MSG_GetMinGfxclkFrequency, min);
1053 else
1054 ret = smu_cmn_send_smc_msg_with_param(
1055 smu, SMU_MSG_GetMinDpmFreq, param, min);
1056 }
1057
1058 failed:
1059 return ret;
1060 }
1061
smu_v13_0_6_get_dpm_level_count(struct smu_context * smu,enum smu_clk_type clk_type,uint32_t * levels)1062 static int smu_v13_0_6_get_dpm_level_count(struct smu_context *smu,
1063 enum smu_clk_type clk_type,
1064 uint32_t *levels)
1065 {
1066 int ret;
1067
1068 ret = smu_v13_0_get_dpm_freq_by_index(smu, clk_type, 0xff, levels);
1069 if (!ret)
1070 ++(*levels);
1071
1072 return ret;
1073 }
1074
smu_v13_0_6_pm_policy_init(struct smu_context * smu)1075 static void smu_v13_0_6_pm_policy_init(struct smu_context *smu)
1076 {
1077 struct smu_dpm_policy *policy;
1078
1079 policy = smu_get_pm_policy(smu, PP_PM_POLICY_SOC_PSTATE);
1080 if (policy)
1081 policy->current_level = SOC_PSTATE_DEFAULT;
1082 }
1083
smu_v13_0_6_set_default_dpm_table(struct smu_context * smu)1084 static int smu_v13_0_6_set_default_dpm_table(struct smu_context *smu)
1085 {
1086 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
1087 struct smu_table_context *smu_table = &smu->smu_table;
1088 struct smu_dpm_table *dpm_table = NULL;
1089 struct PPTable_t *pptable =
1090 (struct PPTable_t *)smu_table->driver_pptable;
1091 uint32_t gfxclkmin, gfxclkmax, levels;
1092 int ret = 0, i, j;
1093 struct smu_v13_0_6_dpm_map dpm_map[] = {
1094 { SMU_SOCCLK, SMU_FEATURE_DPM_SOCCLK_BIT,
1095 &dpm_context->dpm_tables.soc_table,
1096 pptable->SocclkFrequencyTable },
1097 { SMU_UCLK, SMU_FEATURE_DPM_UCLK_BIT,
1098 &dpm_context->dpm_tables.uclk_table,
1099 pptable->UclkFrequencyTable },
1100 { SMU_FCLK, SMU_FEATURE_DPM_FCLK_BIT,
1101 &dpm_context->dpm_tables.fclk_table,
1102 pptable->FclkFrequencyTable },
1103 { SMU_VCLK, SMU_FEATURE_DPM_VCLK_BIT,
1104 &dpm_context->dpm_tables.vclk_table,
1105 pptable->VclkFrequencyTable },
1106 { SMU_DCLK, SMU_FEATURE_DPM_DCLK_BIT,
1107 &dpm_context->dpm_tables.dclk_table,
1108 pptable->DclkFrequencyTable },
1109 };
1110
1111 smu_v13_0_6_setup_driver_pptable(smu);
1112
1113 /* DPM policy not supported in older firmwares */
1114 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(DPM_POLICY))) {
1115 struct smu_dpm_context *smu_dpm = &smu->smu_dpm;
1116
1117 smu_dpm->dpm_policies->policy_mask &=
1118 ~BIT(PP_PM_POLICY_SOC_PSTATE);
1119 }
1120
1121 smu_v13_0_6_pm_policy_init(smu);
1122 /* gfxclk dpm table setup */
1123 dpm_table = &dpm_context->dpm_tables.gfx_table;
1124 dpm_table->clk_type = SMU_GFXCLK;
1125 if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT)) {
1126 /* In the case of gfxclk, only fine-grained dpm is honored.
1127 * Get min/max values from FW.
1128 */
1129 ret = smu_v13_0_6_get_dpm_ultimate_freq(smu, SMU_GFXCLK,
1130 &gfxclkmin, &gfxclkmax);
1131 if (ret)
1132 return ret;
1133 dpm_table->count = 2;
1134 dpm_table->dpm_levels[0].value = gfxclkmin;
1135 dpm_table->dpm_levels[0].enabled = true;
1136 dpm_table->dpm_levels[1].value = gfxclkmax;
1137 dpm_table->dpm_levels[1].enabled = true;
1138 } else {
1139 dpm_table->count = 1;
1140 dpm_table->dpm_levels[0].value = pptable->MinGfxclkFrequency;
1141 dpm_table->dpm_levels[0].enabled = true;
1142 }
1143
1144 for (j = 0; j < ARRAY_SIZE(dpm_map); j++) {
1145 dpm_table = dpm_map[j].dpm_table;
1146 levels = 1;
1147 if (smu_cmn_feature_is_enabled(smu, dpm_map[j].feature_num)) {
1148 ret = smu_v13_0_6_get_dpm_level_count(
1149 smu, dpm_map[j].clk_type, &levels);
1150 if (ret)
1151 return ret;
1152 }
1153 dpm_table->count = levels;
1154 dpm_table->clk_type = dpm_map[j].clk_type;
1155 for (i = 0; i < dpm_table->count; ++i) {
1156 dpm_table->dpm_levels[i].value =
1157 dpm_map[j].freq_table[i];
1158 dpm_table->dpm_levels[i].enabled = true;
1159 }
1160 }
1161
1162 return 0;
1163 }
1164
smu_v13_0_6_setup_pptable(struct smu_context * smu)1165 static int smu_v13_0_6_setup_pptable(struct smu_context *smu)
1166 {
1167 struct smu_table_context *table_context = &smu->smu_table;
1168
1169 /* TODO: PPTable is not available.
1170 * 1) Find an alternate way to get 'PPTable values' here.
1171 * 2) Check if there is SW CTF
1172 */
1173 table_context->thermal_controller_type = 0;
1174
1175 return 0;
1176 }
1177
smu_v13_0_6_check_fw_status(struct smu_context * smu)1178 static int smu_v13_0_6_check_fw_status(struct smu_context *smu)
1179 {
1180 struct amdgpu_device *adev = smu->adev;
1181 uint32_t mp1_fw_flags;
1182
1183 mp1_fw_flags =
1184 RREG32_PCIE(MP1_Public | (smnMP1_FIRMWARE_FLAGS & 0xffffffff));
1185
1186 if ((mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) >>
1187 MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED__SHIFT)
1188 return 0;
1189
1190 return -EIO;
1191 }
1192
smu_v13_0_6_populate_umd_state_clk(struct smu_context * smu)1193 static int smu_v13_0_6_populate_umd_state_clk(struct smu_context *smu)
1194 {
1195 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
1196 struct smu_dpm_table *gfx_table = &dpm_context->dpm_tables.gfx_table;
1197 struct smu_dpm_table *mem_table = &dpm_context->dpm_tables.uclk_table;
1198 struct smu_dpm_table *soc_table = &dpm_context->dpm_tables.soc_table;
1199 struct smu_umd_pstate_table *pstate_table = &smu->pstate_table;
1200
1201 pstate_table->gfxclk_pstate.min = SMU_DPM_TABLE_MIN(gfx_table);
1202 pstate_table->gfxclk_pstate.peak = SMU_DPM_TABLE_MAX(gfx_table);
1203 pstate_table->gfxclk_pstate.curr.min = SMU_DPM_TABLE_MIN(gfx_table);
1204 pstate_table->gfxclk_pstate.curr.max = SMU_DPM_TABLE_MAX(gfx_table);
1205
1206 pstate_table->uclk_pstate.min = SMU_DPM_TABLE_MIN(mem_table);
1207 pstate_table->uclk_pstate.peak = SMU_DPM_TABLE_MAX(mem_table);
1208 pstate_table->uclk_pstate.curr.min = SMU_DPM_TABLE_MIN(mem_table);
1209 pstate_table->uclk_pstate.curr.max = SMU_DPM_TABLE_MAX(mem_table);
1210
1211 pstate_table->socclk_pstate.min = SMU_DPM_TABLE_MIN(soc_table);
1212 pstate_table->socclk_pstate.peak = SMU_DPM_TABLE_MAX(soc_table);
1213 pstate_table->socclk_pstate.curr.min = SMU_DPM_TABLE_MIN(soc_table);
1214 pstate_table->socclk_pstate.curr.max = SMU_DPM_TABLE_MAX(soc_table);
1215
1216 if (gfx_table->count > SMU_13_0_6_UMD_PSTATE_GFXCLK_LEVEL &&
1217 mem_table->count > SMU_13_0_6_UMD_PSTATE_MCLK_LEVEL &&
1218 soc_table->count > SMU_13_0_6_UMD_PSTATE_SOCCLK_LEVEL) {
1219 pstate_table->gfxclk_pstate.standard =
1220 gfx_table->dpm_levels[SMU_13_0_6_UMD_PSTATE_GFXCLK_LEVEL].value;
1221 pstate_table->uclk_pstate.standard =
1222 mem_table->dpm_levels[SMU_13_0_6_UMD_PSTATE_MCLK_LEVEL].value;
1223 pstate_table->socclk_pstate.standard =
1224 soc_table->dpm_levels[SMU_13_0_6_UMD_PSTATE_SOCCLK_LEVEL].value;
1225 } else {
1226 pstate_table->gfxclk_pstate.standard =
1227 pstate_table->gfxclk_pstate.min;
1228 pstate_table->uclk_pstate.standard =
1229 pstate_table->uclk_pstate.min;
1230 pstate_table->socclk_pstate.standard =
1231 pstate_table->socclk_pstate.min;
1232 }
1233
1234 return 0;
1235 }
1236
smu_v13_0_6_get_throttler_status(struct smu_context * smu)1237 static uint32_t smu_v13_0_6_get_throttler_status(struct smu_context *smu)
1238 {
1239 struct smu_power_context *smu_power = &smu->smu_power;
1240 struct smu_13_0_power_context *power_context = smu_power->power_context;
1241 uint32_t throttler_status = 0;
1242
1243 throttler_status = atomic_read(&power_context->throttle_status);
1244 dev_dbg(smu->adev->dev, "SMU Throttler status: %u", throttler_status);
1245
1246 return throttler_status;
1247 }
1248
smu_v13_0_6_get_smu_metrics_data(struct smu_context * smu,MetricsMember_t member,uint32_t * value)1249 static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu,
1250 MetricsMember_t member,
1251 uint32_t *value)
1252 {
1253 struct smu_table_context *smu_table = &smu->smu_table;
1254 MetricsTableV0_t *metrics_v0 = (MetricsTableV0_t *)smu_table->metrics_table;
1255 MetricsTableV1_t *metrics_v1 = (MetricsTableV1_t *)smu_table->metrics_table;
1256 MetricsTableV2_t *metrics_v2 = (MetricsTableV2_t *)smu_table->metrics_table;
1257 int version = smu_v13_0_6_get_metrics_version(smu);
1258 struct amdgpu_device *adev = smu->adev;
1259 int ret = 0;
1260 int xcc_id;
1261
1262 ret = smu_v13_0_6_get_metrics_table(smu, NULL, false);
1263 if (ret)
1264 return ret;
1265
1266 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) &&
1267 smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS)))
1268 return smu_v13_0_12_get_smu_metrics_data(smu, member, value);
1269
1270 /* For clocks with multiple instances, only report the first one */
1271 switch (member) {
1272 case METRICS_CURR_GFXCLK:
1273 case METRICS_AVERAGE_GFXCLK:
1274 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(DPM))) {
1275 xcc_id = GET_INST(GC, 0);
1276 *value = SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency, version)[xcc_id]);
1277 } else {
1278 *value = 0;
1279 }
1280 break;
1281 case METRICS_CURR_SOCCLK:
1282 case METRICS_AVERAGE_SOCCLK:
1283 *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency, version)[0]);
1284 break;
1285 case METRICS_CURR_UCLK:
1286 case METRICS_AVERAGE_UCLK:
1287 *value = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency, version));
1288 break;
1289 case METRICS_CURR_VCLK:
1290 *value = SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency, version)[0]);
1291 break;
1292 case METRICS_CURR_DCLK:
1293 *value = SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency, version)[0]);
1294 break;
1295 case METRICS_CURR_FCLK:
1296 *value = SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequency, version));
1297 break;
1298 case METRICS_AVERAGE_GFXACTIVITY:
1299 *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy, version));
1300 break;
1301 case METRICS_AVERAGE_MEMACTIVITY:
1302 *value = SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization, version));
1303 break;
1304 case METRICS_CURR_SOCKETPOWER:
1305 *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower, version)) << 8;
1306 break;
1307 case METRICS_TEMPERATURE_HOTSPOT:
1308 *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature, version)) *
1309 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
1310 break;
1311 case METRICS_TEMPERATURE_MEM:
1312 *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature, version)) *
1313 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
1314 break;
1315 /* This is the max of all VRs and not just SOC VR.
1316 * No need to define another data type for the same.
1317 */
1318 case METRICS_TEMPERATURE_VRSOC:
1319 *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature, version)) *
1320 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
1321 break;
1322 default:
1323 *value = UINT_MAX;
1324 break;
1325 }
1326
1327 return ret;
1328 }
1329
smu_v13_0_6_get_current_clk_freq_by_table(struct smu_context * smu,enum smu_clk_type clk_type,uint32_t * value)1330 static int smu_v13_0_6_get_current_clk_freq_by_table(struct smu_context *smu,
1331 enum smu_clk_type clk_type,
1332 uint32_t *value)
1333 {
1334 MetricsMember_t member_type;
1335
1336 if (!value)
1337 return -EINVAL;
1338
1339 switch (clk_type) {
1340 case SMU_GFXCLK:
1341 case SMU_SCLK:
1342 member_type = METRICS_CURR_GFXCLK;
1343 break;
1344 case SMU_UCLK:
1345 case SMU_MCLK:
1346 member_type = METRICS_CURR_UCLK;
1347 break;
1348 case SMU_SOCCLK:
1349 member_type = METRICS_CURR_SOCCLK;
1350 break;
1351 case SMU_VCLK:
1352 member_type = METRICS_CURR_VCLK;
1353 break;
1354 case SMU_DCLK:
1355 member_type = METRICS_CURR_DCLK;
1356 break;
1357 case SMU_FCLK:
1358 member_type = METRICS_CURR_FCLK;
1359 break;
1360 default:
1361 return -EINVAL;
1362 }
1363
1364 return smu_v13_0_6_get_smu_metrics_data(smu, member_type, value);
1365 }
1366
smu_v13_0_6_emit_clk_levels(struct smu_context * smu,enum smu_clk_type type,char * buf,int * offset)1367 static int smu_v13_0_6_emit_clk_levels(struct smu_context *smu,
1368 enum smu_clk_type type, char *buf,
1369 int *offset)
1370 {
1371 int now, size = *offset, start_offset = *offset;
1372 int ret = 0;
1373 struct smu_umd_pstate_table *pstate_table = &smu->pstate_table;
1374 struct smu_dpm_table *single_dpm_table = NULL;
1375 struct smu_dpm_context *smu_dpm = &smu->smu_dpm;
1376 struct smu_13_0_dpm_context *dpm_context = NULL;
1377
1378 if (amdgpu_ras_intr_triggered()) {
1379 sysfs_emit_at(buf, size, "unavailable\n");
1380 return -EBUSY;
1381 }
1382
1383 dpm_context = smu_dpm->dpm_context;
1384
1385 switch (type) {
1386 case SMU_OD_SCLK:
1387 size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK");
1388 size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMhz\n",
1389 pstate_table->gfxclk_pstate.curr.min,
1390 pstate_table->gfxclk_pstate.curr.max);
1391 break;
1392 case SMU_OD_MCLK:
1393 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SET_UCLK_MAX)))
1394 return -EOPNOTSUPP;
1395
1396 size += sysfs_emit_at(buf, size, "%s:\n", "OD_MCLK");
1397 size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMhz\n",
1398 pstate_table->uclk_pstate.curr.min,
1399 pstate_table->uclk_pstate.curr.max);
1400 break;
1401
1402 case SMU_SCLK:
1403 case SMU_GFXCLK:
1404 single_dpm_table = &(dpm_context->dpm_tables.gfx_table);
1405 break;
1406 case SMU_MCLK:
1407 case SMU_UCLK:
1408 single_dpm_table = &(dpm_context->dpm_tables.uclk_table);
1409 break;
1410 case SMU_SOCCLK:
1411 single_dpm_table = &(dpm_context->dpm_tables.soc_table);
1412 break;
1413 case SMU_FCLK:
1414 single_dpm_table = &(dpm_context->dpm_tables.fclk_table);
1415 break;
1416 case SMU_VCLK:
1417 single_dpm_table = &(dpm_context->dpm_tables.vclk_table);
1418 break;
1419 case SMU_DCLK:
1420 single_dpm_table = &(dpm_context->dpm_tables.dclk_table);
1421 break;
1422 default:
1423 break;
1424 }
1425
1426 if (single_dpm_table) {
1427 ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, type,
1428 &now);
1429 if (ret) {
1430 dev_err(smu->adev->dev,
1431 "Attempt to get current clk Failed!");
1432 return ret;
1433 }
1434 return smu_cmn_print_dpm_clk_levels(smu, single_dpm_table, now,
1435 buf, offset);
1436 }
1437
1438 *offset += size - start_offset;
1439
1440 return 0;
1441 }
1442
smu_v13_0_6_upload_dpm_level(struct smu_context * smu,bool max,uint32_t feature_mask,uint32_t level)1443 static int smu_v13_0_6_upload_dpm_level(struct smu_context *smu, bool max,
1444 uint32_t feature_mask, uint32_t level)
1445 {
1446 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
1447 uint32_t freq;
1448 int ret = 0;
1449
1450 if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT) &&
1451 (feature_mask & FEATURE_MASK(FEATURE_DPM_GFXCLK))) {
1452 freq = dpm_context->dpm_tables.gfx_table.dpm_levels[level].value;
1453 ret = smu_cmn_send_smc_msg_with_param(
1454 smu,
1455 (max ? SMU_MSG_SetSoftMaxGfxClk :
1456 SMU_MSG_SetSoftMinGfxclk),
1457 freq & 0xffff, NULL);
1458 if (ret) {
1459 dev_err(smu->adev->dev,
1460 "Failed to set soft %s gfxclk !\n",
1461 max ? "max" : "min");
1462 return ret;
1463 }
1464 }
1465
1466 if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT) &&
1467 (feature_mask & FEATURE_MASK(FEATURE_DPM_UCLK))) {
1468 freq = dpm_context->dpm_tables.uclk_table.dpm_levels[level]
1469 .value;
1470 ret = smu_cmn_send_smc_msg_with_param(
1471 smu,
1472 (max ? SMU_MSG_SetSoftMaxByFreq :
1473 SMU_MSG_SetSoftMinByFreq),
1474 (PPCLK_UCLK << 16) | (freq & 0xffff), NULL);
1475 if (ret) {
1476 dev_err(smu->adev->dev,
1477 "Failed to set soft %s memclk !\n",
1478 max ? "max" : "min");
1479 return ret;
1480 }
1481 }
1482
1483 if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT) &&
1484 (feature_mask & FEATURE_MASK(FEATURE_DPM_SOCCLK))) {
1485 freq = dpm_context->dpm_tables.soc_table.dpm_levels[level].value;
1486 ret = smu_cmn_send_smc_msg_with_param(
1487 smu,
1488 (max ? SMU_MSG_SetSoftMaxByFreq :
1489 SMU_MSG_SetSoftMinByFreq),
1490 (PPCLK_SOCCLK << 16) | (freq & 0xffff), NULL);
1491 if (ret) {
1492 dev_err(smu->adev->dev,
1493 "Failed to set soft %s socclk !\n",
1494 max ? "max" : "min");
1495 return ret;
1496 }
1497 }
1498
1499 return ret;
1500 }
1501
smu_v13_0_6_force_clk_levels(struct smu_context * smu,enum smu_clk_type type,uint32_t mask)1502 static int smu_v13_0_6_force_clk_levels(struct smu_context *smu,
1503 enum smu_clk_type type, uint32_t mask)
1504 {
1505 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
1506 struct smu_dpm_table *single_dpm_table = NULL;
1507 uint32_t soft_min_level, soft_max_level;
1508 int ret = 0;
1509
1510 soft_min_level = mask ? (ffs(mask) - 1) : 0;
1511 soft_max_level = mask ? (fls(mask) - 1) : 0;
1512
1513 switch (type) {
1514 case SMU_SCLK:
1515 single_dpm_table = &(dpm_context->dpm_tables.gfx_table);
1516 if (soft_max_level >= single_dpm_table->count) {
1517 dev_err(smu->adev->dev,
1518 "Clock level specified %d is over max allowed %d\n",
1519 soft_max_level, single_dpm_table->count - 1);
1520 ret = -EINVAL;
1521 break;
1522 }
1523
1524 ret = smu_v13_0_6_upload_dpm_level(
1525 smu, false, FEATURE_MASK(FEATURE_DPM_GFXCLK),
1526 soft_min_level);
1527 if (ret) {
1528 dev_err(smu->adev->dev,
1529 "Failed to upload boot level to lowest!\n");
1530 break;
1531 }
1532
1533 ret = smu_v13_0_6_upload_dpm_level(
1534 smu, true, FEATURE_MASK(FEATURE_DPM_GFXCLK),
1535 soft_max_level);
1536 if (ret)
1537 dev_err(smu->adev->dev,
1538 "Failed to upload dpm max level to highest!\n");
1539
1540 break;
1541
1542 case SMU_MCLK:
1543 case SMU_SOCCLK:
1544 case SMU_FCLK:
1545 /*
1546 * Should not arrive here since smu_13_0_6 does not
1547 * support mclk/socclk/fclk softmin/softmax settings
1548 */
1549 ret = -EINVAL;
1550 break;
1551
1552 default:
1553 break;
1554 }
1555
1556 return ret;
1557 }
1558
smu_v13_0_6_get_current_activity_percent(struct smu_context * smu,enum amd_pp_sensors sensor,uint32_t * value)1559 static int smu_v13_0_6_get_current_activity_percent(struct smu_context *smu,
1560 enum amd_pp_sensors sensor,
1561 uint32_t *value)
1562 {
1563 int ret = 0;
1564
1565 if (!value)
1566 return -EINVAL;
1567
1568 switch (sensor) {
1569 case AMDGPU_PP_SENSOR_GPU_LOAD:
1570 ret = smu_v13_0_6_get_smu_metrics_data(
1571 smu, METRICS_AVERAGE_GFXACTIVITY, value);
1572 break;
1573 case AMDGPU_PP_SENSOR_MEM_LOAD:
1574 ret = smu_v13_0_6_get_smu_metrics_data(
1575 smu, METRICS_AVERAGE_MEMACTIVITY, value);
1576 break;
1577 default:
1578 dev_err(smu->adev->dev,
1579 "Invalid sensor for retrieving clock activity\n");
1580 return -EINVAL;
1581 }
1582
1583 return ret;
1584 }
1585
smu_v13_0_6_thermal_get_temperature(struct smu_context * smu,enum amd_pp_sensors sensor,uint32_t * value)1586 static int smu_v13_0_6_thermal_get_temperature(struct smu_context *smu,
1587 enum amd_pp_sensors sensor,
1588 uint32_t *value)
1589 {
1590 int ret = 0;
1591
1592 if (!value)
1593 return -EINVAL;
1594
1595 switch (sensor) {
1596 case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
1597 ret = smu_v13_0_6_get_smu_metrics_data(
1598 smu, METRICS_TEMPERATURE_HOTSPOT, value);
1599 break;
1600 case AMDGPU_PP_SENSOR_MEM_TEMP:
1601 ret = smu_v13_0_6_get_smu_metrics_data(
1602 smu, METRICS_TEMPERATURE_MEM, value);
1603 break;
1604 default:
1605 dev_err(smu->adev->dev, "Invalid sensor for retrieving temp\n");
1606 return -EINVAL;
1607 }
1608
1609 return ret;
1610 }
1611
smu_v13_0_6_read_sensor(struct smu_context * smu,enum amd_pp_sensors sensor,void * data,uint32_t * size)1612 static int smu_v13_0_6_read_sensor(struct smu_context *smu,
1613 enum amd_pp_sensors sensor, void *data,
1614 uint32_t *size)
1615 {
1616 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
1617 int ret = 0;
1618
1619 if (amdgpu_ras_intr_triggered())
1620 return 0;
1621
1622 if (!data || !size)
1623 return -EINVAL;
1624
1625 switch (sensor) {
1626 case AMDGPU_PP_SENSOR_MEM_LOAD:
1627 case AMDGPU_PP_SENSOR_GPU_LOAD:
1628 ret = smu_v13_0_6_get_current_activity_percent(smu, sensor,
1629 (uint32_t *)data);
1630 *size = 4;
1631 break;
1632 case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
1633 ret = smu_v13_0_6_get_smu_metrics_data(smu,
1634 METRICS_CURR_SOCKETPOWER,
1635 (uint32_t *)data);
1636 *size = 4;
1637 break;
1638 case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
1639 case AMDGPU_PP_SENSOR_MEM_TEMP:
1640 ret = smu_v13_0_6_thermal_get_temperature(smu, sensor,
1641 (uint32_t *)data);
1642 *size = 4;
1643 break;
1644 case AMDGPU_PP_SENSOR_GFX_MCLK:
1645 ret = smu_v13_0_6_get_current_clk_freq_by_table(
1646 smu, SMU_UCLK, (uint32_t *)data);
1647 /* the output clock frequency in 10K unit */
1648 *(uint32_t *)data *= 100;
1649 *size = 4;
1650 break;
1651 case AMDGPU_PP_SENSOR_GFX_SCLK:
1652 ret = smu_v13_0_6_get_current_clk_freq_by_table(
1653 smu, SMU_GFXCLK, (uint32_t *)data);
1654 *(uint32_t *)data *= 100;
1655 *size = 4;
1656 break;
1657 case AMDGPU_PP_SENSOR_VDDGFX:
1658 ret = smu_v13_0_get_gfx_vdd(smu, (uint32_t *)data);
1659 *size = 4;
1660 break;
1661 case AMDGPU_PP_SENSOR_VDDBOARD:
1662 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(BOARD_VOLTAGE))) {
1663 *(uint32_t *)data = dpm_context->board_volt;
1664 *size = 4;
1665 break;
1666 } else {
1667 ret = -EOPNOTSUPP;
1668 break;
1669 }
1670 case AMDGPU_PP_SENSOR_NODEPOWERLIMIT:
1671 case AMDGPU_PP_SENSOR_NODEPOWER:
1672 case AMDGPU_PP_SENSOR_GPPTRESIDENCY:
1673 case AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT:
1674 ret = smu_v13_0_12_get_npm_data(smu, sensor, (uint32_t *)data);
1675 if (ret)
1676 return ret;
1677 *size = 4;
1678 break;
1679 case AMDGPU_PP_SENSOR_UBB_POWER:
1680 case AMDGPU_PP_SENSOR_UBB_POWER_LIMIT:
1681 ret = smu_v13_0_12_get_system_power(smu, sensor, (uint32_t *)data);
1682 if (ret)
1683 return ret;
1684 *size = 4;
1685 break;
1686 case AMDGPU_PP_SENSOR_GPU_AVG_POWER:
1687 default:
1688 ret = -EOPNOTSUPP;
1689 break;
1690 }
1691
1692 return ret;
1693 }
1694
smu_v13_0_6_get_power_limit(struct smu_context * smu,uint32_t * current_power_limit,uint32_t * default_power_limit,uint32_t * max_power_limit,uint32_t * min_power_limit)1695 static int smu_v13_0_6_get_power_limit(struct smu_context *smu,
1696 uint32_t *current_power_limit,
1697 uint32_t *default_power_limit,
1698 uint32_t *max_power_limit,
1699 uint32_t *min_power_limit)
1700 {
1701 struct smu_table_context *smu_table = &smu->smu_table;
1702 struct PPTable_t *pptable =
1703 (struct PPTable_t *)smu_table->driver_pptable;
1704 uint32_t power_limit = 0;
1705 int ret;
1706
1707 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetPptLimit, &power_limit);
1708
1709 if (ret) {
1710 dev_err(smu->adev->dev, "Couldn't get PPT limit");
1711 return -EINVAL;
1712 }
1713
1714 if (current_power_limit)
1715 *current_power_limit = power_limit;
1716 if (default_power_limit)
1717 *default_power_limit = pptable->MaxSocketPowerLimit;
1718
1719 if (max_power_limit) {
1720 *max_power_limit = pptable->MaxSocketPowerLimit;
1721 }
1722
1723 if (min_power_limit)
1724 *min_power_limit = 0;
1725 return 0;
1726 }
1727
smu_v13_0_6_set_power_limit(struct smu_context * smu,enum smu_ppt_limit_type limit_type,uint32_t limit)1728 static int smu_v13_0_6_set_power_limit(struct smu_context *smu,
1729 enum smu_ppt_limit_type limit_type,
1730 uint32_t limit)
1731 {
1732 struct smu_table_context *smu_table = &smu->smu_table;
1733 struct PPTable_t *pptable =
1734 (struct PPTable_t *)smu_table->driver_pptable;
1735 int ret;
1736
1737 if (limit_type == SMU_FAST_PPT_LIMIT) {
1738 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(FAST_PPT)))
1739 return -EOPNOTSUPP;
1740 if (limit > pptable->PPT1Max || limit < pptable->PPT1Min) {
1741 dev_err(smu->adev->dev,
1742 "New power limit (%d) should be between min %d max %d\n",
1743 limit, pptable->PPT1Min, pptable->PPT1Max);
1744 return -EINVAL;
1745 }
1746 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetFastPptLimit,
1747 limit, NULL);
1748 if (ret)
1749 dev_err(smu->adev->dev, "Set fast PPT limit failed!\n");
1750 return ret;
1751 }
1752
1753 return smu_v13_0_set_power_limit(smu, limit_type, limit);
1754 }
1755
smu_v13_0_6_get_ppt_limit(struct smu_context * smu,uint32_t * ppt_limit,enum smu_ppt_limit_type type,enum smu_ppt_limit_level level)1756 static int smu_v13_0_6_get_ppt_limit(struct smu_context *smu,
1757 uint32_t *ppt_limit,
1758 enum smu_ppt_limit_type type,
1759 enum smu_ppt_limit_level level)
1760 {
1761 struct smu_table_context *smu_table = &smu->smu_table;
1762 struct PPTable_t *pptable =
1763 (struct PPTable_t *)smu_table->driver_pptable;
1764 int ret = 0;
1765
1766 if (type == SMU_FAST_PPT_LIMIT) {
1767 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(FAST_PPT)))
1768 return -EOPNOTSUPP;
1769 switch (level) {
1770 case SMU_PPT_LIMIT_MAX:
1771 *ppt_limit = pptable->PPT1Max;
1772 break;
1773 case SMU_PPT_LIMIT_CURRENT:
1774 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetFastPptLimit, ppt_limit);
1775 if (ret)
1776 dev_err(smu->adev->dev, "Get fast PPT limit failed!\n");
1777 break;
1778 case SMU_PPT_LIMIT_DEFAULT:
1779 *ppt_limit = pptable->PPT1Default;
1780 break;
1781 case SMU_PPT_LIMIT_MIN:
1782 *ppt_limit = pptable->PPT1Min;
1783 break;
1784 default:
1785 return -EOPNOTSUPP;
1786 }
1787 return ret;
1788 }
1789 return -EOPNOTSUPP;
1790 }
1791
smu_v13_0_6_irq_process(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1792 static int smu_v13_0_6_irq_process(struct amdgpu_device *adev,
1793 struct amdgpu_irq_src *source,
1794 struct amdgpu_iv_entry *entry)
1795 {
1796 struct smu_context *smu = adev->powerplay.pp_handle;
1797 struct smu_power_context *smu_power = &smu->smu_power;
1798 struct smu_13_0_power_context *power_context = smu_power->power_context;
1799 uint32_t client_id = entry->client_id;
1800 uint32_t ctxid = entry->src_data[0];
1801 uint32_t src_id = entry->src_id;
1802 uint32_t data;
1803
1804 if (client_id == SOC15_IH_CLIENTID_MP1) {
1805 if (src_id == IH_INTERRUPT_ID_TO_DRIVER) {
1806 /* ACK SMUToHost interrupt */
1807 data = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL);
1808 data = REG_SET_FIELD(data, MP1_SMN_IH_SW_INT_CTRL, INT_ACK, 1);
1809 WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL, data);
1810 /*
1811 * ctxid is used to distinguish different events for SMCToHost
1812 * interrupt.
1813 */
1814 switch (ctxid) {
1815 case IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING:
1816 /*
1817 * Increment the throttle interrupt counter
1818 */
1819 atomic64_inc(&smu->throttle_int_counter);
1820
1821 if (!atomic_read(&adev->throttling_logging_enabled))
1822 return 0;
1823
1824 /* This uses the new method which fixes the
1825 * incorrect throttling status reporting
1826 * through metrics table. For older FWs,
1827 * it will be ignored.
1828 */
1829 if (__ratelimit(&adev->throttling_logging_rs)) {
1830 atomic_set(
1831 &power_context->throttle_status,
1832 entry->src_data[1]);
1833 schedule_work(&smu->throttling_logging_work);
1834 }
1835 break;
1836 default:
1837 dev_dbg(adev->dev, "Unhandled context id %d from client:%d!\n",
1838 ctxid, client_id);
1839 break;
1840 }
1841 }
1842 }
1843
1844 return 0;
1845 }
1846
smu_v13_0_6_set_irq_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned tyep,enum amdgpu_interrupt_state state)1847 static int smu_v13_0_6_set_irq_state(struct amdgpu_device *adev,
1848 struct amdgpu_irq_src *source,
1849 unsigned tyep,
1850 enum amdgpu_interrupt_state state)
1851 {
1852 uint32_t val = 0;
1853
1854 switch (state) {
1855 case AMDGPU_IRQ_STATE_DISABLE:
1856 /* For MP1 SW irqs */
1857 val = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL);
1858 val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT_CTRL, INT_MASK, 1);
1859 WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL, val);
1860
1861 break;
1862 case AMDGPU_IRQ_STATE_ENABLE:
1863 /* For MP1 SW irqs */
1864 val = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT);
1865 val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT, ID, 0xFE);
1866 val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT, VALID, 0);
1867 WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT, val);
1868
1869 val = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL);
1870 val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT_CTRL, INT_MASK, 0);
1871 WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL, val);
1872
1873 break;
1874 default:
1875 break;
1876 }
1877
1878 return 0;
1879 }
1880
1881 static const struct amdgpu_irq_src_funcs smu_v13_0_6_irq_funcs = {
1882 .set = smu_v13_0_6_set_irq_state,
1883 .process = smu_v13_0_6_irq_process,
1884 };
1885
smu_v13_0_6_register_irq_handler(struct smu_context * smu)1886 static int smu_v13_0_6_register_irq_handler(struct smu_context *smu)
1887 {
1888 struct amdgpu_device *adev = smu->adev;
1889 struct amdgpu_irq_src *irq_src = &smu->irq_source;
1890 int ret = 0;
1891
1892 if (amdgpu_sriov_vf(adev))
1893 return 0;
1894
1895 irq_src->num_types = 1;
1896 irq_src->funcs = &smu_v13_0_6_irq_funcs;
1897
1898 ret = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_MP1,
1899 IH_INTERRUPT_ID_TO_DRIVER,
1900 irq_src);
1901 if (ret)
1902 return ret;
1903
1904 return ret;
1905 }
1906
smu_v13_0_6_notify_unload(struct smu_context * smu)1907 static int smu_v13_0_6_notify_unload(struct smu_context *smu)
1908 {
1909 if (amdgpu_in_reset(smu->adev))
1910 return 0;
1911
1912 dev_dbg(smu->adev->dev, "Notify PMFW about driver unload");
1913 /* Ignore return, just intimate FW that driver is not going to be there */
1914 smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL);
1915
1916 return 0;
1917 }
1918
smu_v13_0_6_mca_set_debug_mode(struct smu_context * smu,bool enable)1919 static int smu_v13_0_6_mca_set_debug_mode(struct smu_context *smu, bool enable)
1920 {
1921 /* NOTE: this ClearMcaOnRead message is only supported for smu version 85.72.0 or higher */
1922 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(MCA_DEBUG_MODE)))
1923 return 0;
1924
1925 return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ClearMcaOnRead,
1926 enable ? 0 : ClearMcaOnRead_UE_FLAG_MASK | ClearMcaOnRead_CE_POLL_MASK,
1927 NULL);
1928 }
1929
smu_v13_0_6_system_features_control(struct smu_context * smu,bool enable)1930 static int smu_v13_0_6_system_features_control(struct smu_context *smu,
1931 bool enable)
1932 {
1933 struct amdgpu_device *adev = smu->adev;
1934 int ret = 0;
1935
1936 if (amdgpu_sriov_vf(adev))
1937 return 0;
1938
1939 if (enable) {
1940 if (!(adev->flags & AMD_IS_APU))
1941 ret = smu_v13_0_system_features_control(smu, enable);
1942 } else {
1943 /* Notify FW that the device is no longer driver managed */
1944 smu_v13_0_6_notify_unload(smu);
1945 }
1946
1947 return ret;
1948 }
1949
smu_v13_0_6_set_gfx_soft_freq_limited_range(struct smu_context * smu,uint32_t min,uint32_t max)1950 static int smu_v13_0_6_set_gfx_soft_freq_limited_range(struct smu_context *smu,
1951 uint32_t min,
1952 uint32_t max)
1953 {
1954 int ret;
1955
1956 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxGfxClk,
1957 max & 0xffff, NULL);
1958 if (ret)
1959 return ret;
1960
1961 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMinGfxclk,
1962 min & 0xffff, NULL);
1963
1964 return ret;
1965 }
1966
smu_v13_0_6_set_performance_level(struct smu_context * smu,enum amd_dpm_forced_level level)1967 static int smu_v13_0_6_set_performance_level(struct smu_context *smu,
1968 enum amd_dpm_forced_level level)
1969 {
1970 struct smu_dpm_context *smu_dpm = &(smu->smu_dpm);
1971 struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context;
1972 struct smu_dpm_table *gfx_table = &dpm_context->dpm_tables.gfx_table;
1973 struct smu_dpm_table *uclk_table = &dpm_context->dpm_tables.uclk_table;
1974 struct smu_umd_pstate_table *pstate_table = &smu->pstate_table;
1975 int ret;
1976
1977 /* Disable determinism if switching to another mode */
1978 if ((smu_dpm->dpm_level == AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) &&
1979 (level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)) {
1980 smu_cmn_send_smc_msg(smu, SMU_MSG_DisableDeterminism, NULL);
1981 pstate_table->gfxclk_pstate.curr.max =
1982 SMU_DPM_TABLE_MAX(gfx_table);
1983 }
1984
1985 switch (level) {
1986 case AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM:
1987 return 0;
1988
1989 case AMD_DPM_FORCED_LEVEL_AUTO:
1990 if ((SMU_DPM_TABLE_MIN(gfx_table) !=
1991 pstate_table->gfxclk_pstate.curr.min) ||
1992 (SMU_DPM_TABLE_MAX(gfx_table) !=
1993 pstate_table->gfxclk_pstate.curr.max)) {
1994 ret = smu_v13_0_6_set_gfx_soft_freq_limited_range(
1995 smu, SMU_DPM_TABLE_MIN(gfx_table),
1996 SMU_DPM_TABLE_MAX(gfx_table));
1997 if (ret)
1998 return ret;
1999
2000 pstate_table->gfxclk_pstate.curr.min =
2001 SMU_DPM_TABLE_MIN(gfx_table);
2002 pstate_table->gfxclk_pstate.curr.max =
2003 SMU_DPM_TABLE_MAX(gfx_table);
2004 }
2005
2006 if (SMU_DPM_TABLE_MAX(uclk_table) !=
2007 pstate_table->uclk_pstate.curr.max) {
2008 /* Min UCLK is not expected to be changed */
2009 ret = smu_v13_0_set_soft_freq_limited_range(
2010 smu, SMU_UCLK, 0, SMU_DPM_TABLE_MAX(uclk_table),
2011 false);
2012 if (ret)
2013 return ret;
2014 pstate_table->uclk_pstate.curr.max =
2015 SMU_DPM_TABLE_MAX(uclk_table);
2016 }
2017 smu_v13_0_reset_custom_level(smu);
2018
2019 return 0;
2020 case AMD_DPM_FORCED_LEVEL_MANUAL:
2021 return 0;
2022 default:
2023 break;
2024 }
2025
2026 return -EOPNOTSUPP;
2027 }
2028
smu_v13_0_6_set_soft_freq_limited_range(struct smu_context * smu,enum smu_clk_type clk_type,uint32_t min,uint32_t max,bool automatic)2029 static int smu_v13_0_6_set_soft_freq_limited_range(struct smu_context *smu,
2030 enum smu_clk_type clk_type,
2031 uint32_t min, uint32_t max,
2032 bool automatic)
2033 {
2034 struct smu_dpm_context *smu_dpm = &(smu->smu_dpm);
2035 struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context;
2036 struct smu_umd_pstate_table *pstate_table = &smu->pstate_table;
2037 struct amdgpu_device *adev = smu->adev;
2038 uint32_t min_clk;
2039 uint32_t max_clk;
2040 int ret = 0;
2041
2042 if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK &&
2043 clk_type != SMU_UCLK)
2044 return -EINVAL;
2045
2046 if ((smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) &&
2047 (smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM))
2048 return -EINVAL;
2049
2050 if (smu_dpm->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) {
2051 if (min >= max) {
2052 dev_err(smu->adev->dev,
2053 "Minimum clk should be less than the maximum allowed clock\n");
2054 return -EINVAL;
2055 }
2056
2057 if (clk_type == SMU_GFXCLK) {
2058 if ((min == pstate_table->gfxclk_pstate.curr.min) &&
2059 (max == pstate_table->gfxclk_pstate.curr.max))
2060 return 0;
2061
2062 ret = smu_v13_0_6_set_gfx_soft_freq_limited_range(
2063 smu, min, max);
2064 if (!ret) {
2065 pstate_table->gfxclk_pstate.curr.min = min;
2066 pstate_table->gfxclk_pstate.curr.max = max;
2067 }
2068 }
2069
2070 if (clk_type == SMU_UCLK) {
2071 if (max == pstate_table->uclk_pstate.curr.max)
2072 return 0;
2073 /* For VF, only allowed in FW versions 85.102 or greater */
2074 if (!smu_v13_0_6_cap_supported(smu,
2075 SMU_CAP(SET_UCLK_MAX)))
2076 return -EOPNOTSUPP;
2077 /* Only max clock limiting is allowed for UCLK */
2078 ret = smu_v13_0_set_soft_freq_limited_range(
2079 smu, SMU_UCLK, 0, max, false);
2080 if (!ret)
2081 pstate_table->uclk_pstate.curr.max = max;
2082 }
2083
2084 return ret;
2085 }
2086
2087 if (smu_dpm->dpm_level == AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) {
2088 min_clk = SMU_DPM_TABLE_MIN(&dpm_context->dpm_tables.gfx_table);
2089 max_clk = SMU_DPM_TABLE_MAX(&dpm_context->dpm_tables.gfx_table);
2090 if (!max || (max < min_clk) || (max > max_clk)) {
2091 dev_warn(
2092 adev->dev,
2093 "Invalid max frequency %d MHz specified for determinism\n",
2094 max);
2095 return -EINVAL;
2096 }
2097
2098 /* Restore default min/max clocks and enable determinism */
2099 ret = smu_v13_0_6_set_gfx_soft_freq_limited_range(smu, min_clk,
2100 max_clk);
2101 if (!ret) {
2102 usleep_range(500, 1000);
2103 ret = smu_cmn_send_smc_msg_with_param(
2104 smu, SMU_MSG_EnableDeterminism, max, NULL);
2105 if (ret) {
2106 dev_err(adev->dev,
2107 "Failed to enable determinism at GFX clock %d MHz\n",
2108 max);
2109 } else {
2110 pstate_table->gfxclk_pstate.curr.min = min_clk;
2111 pstate_table->gfxclk_pstate.curr.max = max;
2112 }
2113 }
2114 }
2115
2116 return ret;
2117 }
2118
smu_v13_0_6_usr_edit_dpm_table(struct smu_context * smu,enum PP_OD_DPM_TABLE_COMMAND type,long input[],uint32_t size)2119 static int smu_v13_0_6_usr_edit_dpm_table(struct smu_context *smu,
2120 enum PP_OD_DPM_TABLE_COMMAND type,
2121 long input[], uint32_t size)
2122 {
2123 struct smu_dpm_context *smu_dpm = &(smu->smu_dpm);
2124 struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context;
2125 struct smu_dpm_table *uclk_table = &dpm_context->dpm_tables.uclk_table;
2126 struct smu_umd_pstate_table *pstate_table = &smu->pstate_table;
2127 uint32_t min_clk;
2128 uint32_t max_clk;
2129 int ret = 0;
2130
2131 /* Only allowed in manual or determinism mode */
2132 if ((smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) &&
2133 (smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM))
2134 return -EINVAL;
2135
2136 switch (type) {
2137 case PP_OD_EDIT_SCLK_VDDC_TABLE:
2138 if (size != 2) {
2139 dev_err(smu->adev->dev,
2140 "Input parameter number not correct\n");
2141 return -EINVAL;
2142 }
2143 min_clk = SMU_DPM_TABLE_MIN(&dpm_context->dpm_tables.gfx_table);
2144 max_clk = SMU_DPM_TABLE_MAX(&dpm_context->dpm_tables.gfx_table);
2145 if (input[0] == 0) {
2146 if (input[1] < min_clk) {
2147 dev_warn(
2148 smu->adev->dev,
2149 "Minimum GFX clk (%ld) MHz specified is less than the minimum allowed (%d) MHz\n",
2150 input[1], min_clk);
2151 pstate_table->gfxclk_pstate.custom.min =
2152 pstate_table->gfxclk_pstate.curr.min;
2153 return -EINVAL;
2154 }
2155
2156 pstate_table->gfxclk_pstate.custom.min = input[1];
2157 } else if (input[0] == 1) {
2158 if (input[1] > max_clk) {
2159 dev_warn(
2160 smu->adev->dev,
2161 "Maximum GFX clk (%ld) MHz specified is greater than the maximum allowed (%d) MHz\n",
2162 input[1], max_clk);
2163 pstate_table->gfxclk_pstate.custom.max =
2164 pstate_table->gfxclk_pstate.curr.max;
2165 return -EINVAL;
2166 }
2167
2168 pstate_table->gfxclk_pstate.custom.max = input[1];
2169 } else {
2170 return -EINVAL;
2171 }
2172 break;
2173 case PP_OD_EDIT_MCLK_VDDC_TABLE:
2174 if (size != 2) {
2175 dev_err(smu->adev->dev,
2176 "Input parameter number not correct\n");
2177 return -EINVAL;
2178 }
2179
2180 if (!smu_cmn_feature_is_enabled(smu,
2181 SMU_FEATURE_DPM_UCLK_BIT)) {
2182 dev_warn(smu->adev->dev,
2183 "UCLK_LIMITS setting not supported!\n");
2184 return -EOPNOTSUPP;
2185 }
2186 max_clk =
2187 SMU_DPM_TABLE_MAX(&dpm_context->dpm_tables.uclk_table);
2188 if (input[0] == 0) {
2189 dev_info(smu->adev->dev,
2190 "Setting min UCLK level is not supported");
2191 return -EINVAL;
2192 } else if (input[0] == 1) {
2193 if (input[1] > max_clk) {
2194 dev_warn(
2195 smu->adev->dev,
2196 "Maximum UCLK (%ld) MHz specified is greater than the maximum allowed (%d) MHz\n",
2197 input[1], max_clk);
2198 pstate_table->uclk_pstate.custom.max =
2199 pstate_table->uclk_pstate.curr.max;
2200 return -EINVAL;
2201 }
2202
2203 pstate_table->uclk_pstate.custom.max = input[1];
2204 }
2205 break;
2206
2207 case PP_OD_RESTORE_DEFAULT_TABLE:
2208 if (size != 0) {
2209 dev_err(smu->adev->dev,
2210 "Input parameter number not correct\n");
2211 return -EINVAL;
2212 } else {
2213 /* Use the default frequencies for manual and determinism mode */
2214 min_clk = SMU_DPM_TABLE_MIN(
2215 &dpm_context->dpm_tables.gfx_table);
2216 max_clk = SMU_DPM_TABLE_MAX(
2217 &dpm_context->dpm_tables.gfx_table);
2218
2219 ret = smu_v13_0_6_set_soft_freq_limited_range(
2220 smu, SMU_GFXCLK, min_clk, max_clk, false);
2221
2222 if (ret)
2223 return ret;
2224
2225 if (SMU_DPM_TABLE_MAX(uclk_table) !=
2226 pstate_table->uclk_pstate.curr.max) {
2227 min_clk = SMU_DPM_TABLE_MIN(&dpm_context->dpm_tables.uclk_table);
2228 max_clk = SMU_DPM_TABLE_MAX(&dpm_context->dpm_tables.uclk_table);
2229 ret = smu_v13_0_6_set_soft_freq_limited_range(smu,
2230 SMU_UCLK, min_clk,
2231 max_clk, false);
2232 if (ret)
2233 return ret;
2234 }
2235 smu_v13_0_reset_custom_level(smu);
2236 }
2237 break;
2238 case PP_OD_COMMIT_DPM_TABLE:
2239 if (size != 0) {
2240 dev_err(smu->adev->dev,
2241 "Input parameter number not correct\n");
2242 return -EINVAL;
2243 } else {
2244 if (!pstate_table->gfxclk_pstate.custom.min)
2245 pstate_table->gfxclk_pstate.custom.min =
2246 pstate_table->gfxclk_pstate.curr.min;
2247
2248 if (!pstate_table->gfxclk_pstate.custom.max)
2249 pstate_table->gfxclk_pstate.custom.max =
2250 pstate_table->gfxclk_pstate.curr.max;
2251
2252 min_clk = pstate_table->gfxclk_pstate.custom.min;
2253 max_clk = pstate_table->gfxclk_pstate.custom.max;
2254
2255 ret = smu_v13_0_6_set_soft_freq_limited_range(
2256 smu, SMU_GFXCLK, min_clk, max_clk, false);
2257
2258 if (ret)
2259 return ret;
2260
2261 if (!pstate_table->uclk_pstate.custom.max)
2262 return 0;
2263
2264 min_clk = pstate_table->uclk_pstate.curr.min;
2265 max_clk = pstate_table->uclk_pstate.custom.max;
2266 return smu_v13_0_6_set_soft_freq_limited_range(
2267 smu, SMU_UCLK, min_clk, max_clk, false);
2268 }
2269 break;
2270 default:
2271 return -ENOSYS;
2272 }
2273
2274 return ret;
2275 }
2276
smu_v13_0_6_get_enabled_mask(struct smu_context * smu,struct smu_feature_bits * feature_mask)2277 static int smu_v13_0_6_get_enabled_mask(struct smu_context *smu,
2278 struct smu_feature_bits *feature_mask)
2279 {
2280 int ret;
2281
2282 ret = smu_cmn_get_enabled_mask(smu, feature_mask);
2283
2284 if (ret == -EIO && !smu_v13_0_6_cap_supported(smu, SMU_CAP(DPM))) {
2285 smu_feature_bits_clearall(feature_mask);
2286 ret = 0;
2287 }
2288
2289 return ret;
2290 }
2291
smu_v13_0_6_is_dpm_running(struct smu_context * smu)2292 static bool smu_v13_0_6_is_dpm_running(struct smu_context *smu)
2293 {
2294 int ret;
2295 struct smu_feature_bits feature_enabled;
2296
2297 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12))
2298 return smu_v13_0_12_is_dpm_running(smu);
2299
2300 ret = smu_v13_0_6_get_enabled_mask(smu, &feature_enabled);
2301
2302 if (ret)
2303 return false;
2304
2305 return smu_feature_bits_test_mask(&feature_enabled,
2306 smu_v13_0_6_dpm_features.bits);
2307 }
2308
smu_v13_0_6_request_i2c_xfer(struct smu_context * smu,void * table_data)2309 static int smu_v13_0_6_request_i2c_xfer(struct smu_context *smu,
2310 void *table_data)
2311 {
2312 struct smu_table_context *smu_table = &smu->smu_table;
2313 struct smu_table *table = &smu_table->driver_table;
2314 struct amdgpu_device *adev = smu->adev;
2315 uint32_t table_size;
2316 int ret = 0;
2317
2318 if (!table_data)
2319 return -EINVAL;
2320
2321 table_size = smu_table->tables[SMU_TABLE_I2C_COMMANDS].size;
2322
2323 memcpy(table->cpu_addr, table_data, table_size);
2324 /* Flush hdp cache */
2325 amdgpu_hdp_flush(adev, NULL);
2326 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_RequestI2cTransaction,
2327 NULL);
2328
2329 return ret;
2330 }
2331
smu_v13_0_6_i2c_xfer(struct i2c_adapter * i2c_adap,struct i2c_msg * msg,int num_msgs)2332 static int smu_v13_0_6_i2c_xfer(struct i2c_adapter *i2c_adap,
2333 struct i2c_msg *msg, int num_msgs)
2334 {
2335 struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(i2c_adap);
2336 struct amdgpu_device *adev = smu_i2c->adev;
2337 struct smu_context *smu = adev->powerplay.pp_handle;
2338 struct smu_table_context *smu_table = &smu->smu_table;
2339 struct smu_table *table = &smu_table->driver_table;
2340 SwI2cRequest_t *req, *res = (SwI2cRequest_t *)table->cpu_addr;
2341 int i, j, r, c;
2342 u16 dir;
2343
2344 if (!adev->pm.dpm_enabled)
2345 return -EBUSY;
2346
2347 req = kzalloc_obj(*req);
2348 if (!req)
2349 return -ENOMEM;
2350
2351 req->I2CcontrollerPort = smu_i2c->port;
2352 req->I2CSpeed = I2C_SPEED_FAST_400K;
2353 req->SlaveAddress = msg[0].addr << 1; /* wants an 8-bit address */
2354 dir = msg[0].flags & I2C_M_RD;
2355
2356 for (c = i = 0; i < num_msgs; i++) {
2357 for (j = 0; j < msg[i].len; j++, c++) {
2358 SwI2cCmd_t *cmd = &req->SwI2cCmds[c];
2359
2360 if (!(msg[i].flags & I2C_M_RD)) {
2361 /* write */
2362 cmd->CmdConfig |= CMDCONFIG_READWRITE_MASK;
2363 cmd->ReadWriteData = msg[i].buf[j];
2364 }
2365
2366 if ((dir ^ msg[i].flags) & I2C_M_RD) {
2367 /* The direction changes.
2368 */
2369 dir = msg[i].flags & I2C_M_RD;
2370 cmd->CmdConfig |= CMDCONFIG_RESTART_MASK;
2371 }
2372
2373 req->NumCmds++;
2374
2375 /*
2376 * Insert STOP if we are at the last byte of either last
2377 * message for the transaction or the client explicitly
2378 * requires a STOP at this particular message.
2379 */
2380 if ((j == msg[i].len - 1) &&
2381 ((i == num_msgs - 1) || (msg[i].flags & I2C_M_STOP))) {
2382 cmd->CmdConfig &= ~CMDCONFIG_RESTART_MASK;
2383 cmd->CmdConfig |= CMDCONFIG_STOP_MASK;
2384 }
2385 }
2386 }
2387 mutex_lock(&adev->pm.mutex);
2388 r = smu_v13_0_6_request_i2c_xfer(smu, req);
2389 if (r) {
2390 /* Retry once, in case of an i2c collision */
2391 r = smu_v13_0_6_request_i2c_xfer(smu, req);
2392 if (r)
2393 goto fail;
2394 }
2395
2396 for (c = i = 0; i < num_msgs; i++) {
2397 if (!(msg[i].flags & I2C_M_RD)) {
2398 c += msg[i].len;
2399 continue;
2400 }
2401 for (j = 0; j < msg[i].len; j++, c++) {
2402 SwI2cCmd_t *cmd = &res->SwI2cCmds[c];
2403
2404 msg[i].buf[j] = cmd->ReadWriteData;
2405 }
2406 }
2407 r = num_msgs;
2408 fail:
2409 mutex_unlock(&adev->pm.mutex);
2410 kfree(req);
2411 return r;
2412 }
2413
smu_v13_0_6_i2c_func(struct i2c_adapter * adap)2414 static u32 smu_v13_0_6_i2c_func(struct i2c_adapter *adap)
2415 {
2416 return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
2417 }
2418
2419 static const struct i2c_algorithm smu_v13_0_6_i2c_algo = {
2420 .master_xfer = smu_v13_0_6_i2c_xfer,
2421 .functionality = smu_v13_0_6_i2c_func,
2422 };
2423
2424 static const struct i2c_adapter_quirks smu_v13_0_6_i2c_control_quirks = {
2425 .flags = I2C_AQ_COMB | I2C_AQ_COMB_SAME_ADDR | I2C_AQ_NO_ZERO_LEN,
2426 .max_read_len = MAX_SW_I2C_COMMANDS,
2427 .max_write_len = MAX_SW_I2C_COMMANDS,
2428 .max_comb_1st_msg_len = 2,
2429 .max_comb_2nd_msg_len = MAX_SW_I2C_COMMANDS - 2,
2430 };
2431
smu_v13_0_6_i2c_control_init(struct smu_context * smu)2432 static int smu_v13_0_6_i2c_control_init(struct smu_context *smu)
2433 {
2434 struct amdgpu_device *adev = smu->adev;
2435 int res, i;
2436
2437 for (i = 0; i < MAX_SMU_I2C_BUSES; i++) {
2438 struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i];
2439 struct i2c_adapter *control = &smu_i2c->adapter;
2440
2441 smu_i2c->adev = adev;
2442 smu_i2c->port = i;
2443 mutex_init(&smu_i2c->mutex);
2444 control->owner = THIS_MODULE;
2445 control->dev.parent = &adev->pdev->dev;
2446 control->algo = &smu_v13_0_6_i2c_algo;
2447 snprintf(control->name, sizeof(control->name), "AMDGPU SMU %d", i);
2448 control->quirks = &smu_v13_0_6_i2c_control_quirks;
2449 i2c_set_adapdata(control, smu_i2c);
2450
2451 res = devm_i2c_add_adapter(adev->dev, control);
2452 if (res) {
2453 DRM_ERROR("Failed to register hw i2c, err: %d\n", res);
2454 return res;
2455 }
2456 }
2457
2458 adev->pm.ras_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter;
2459 adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter;
2460
2461 return 0;
2462 }
2463
smu_v13_0_6_i2c_control_fini(struct smu_context * smu)2464 static void smu_v13_0_6_i2c_control_fini(struct smu_context *smu)
2465 {
2466 struct amdgpu_device *adev = smu->adev;
2467
2468 adev->pm.ras_eeprom_i2c_bus = NULL;
2469 adev->pm.fru_eeprom_i2c_bus = NULL;
2470 }
2471
smu_v13_0_6_get_unique_id(struct smu_context * smu)2472 static void smu_v13_0_6_get_unique_id(struct smu_context *smu)
2473 {
2474 struct amdgpu_device *adev = smu->adev;
2475 struct smu_table_context *smu_table = &smu->smu_table;
2476 struct PPTable_t *pptable =
2477 (struct PPTable_t *)smu_table->driver_pptable;
2478
2479 adev->unique_id = pptable->PublicSerialNumber_AID;
2480 }
2481
smu_v13_0_6_get_bamaco_support(struct smu_context * smu)2482 static int smu_v13_0_6_get_bamaco_support(struct smu_context *smu)
2483 {
2484 /* smu_13_0_6 does not support baco */
2485
2486 return 0;
2487 }
2488
2489 static const char *const throttling_logging_label[] = {
2490 [THROTTLER_PROCHOT_BIT] = "Prochot",
2491 [THROTTLER_PPT_BIT] = "PPT",
2492 [THROTTLER_THERMAL_SOCKET_BIT] = "SOC",
2493 [THROTTLER_THERMAL_VR_BIT] = "VR",
2494 [THROTTLER_THERMAL_HBM_BIT] = "HBM"
2495 };
2496
smu_v13_0_6_log_thermal_throttling_event(struct smu_context * smu)2497 static void smu_v13_0_6_log_thermal_throttling_event(struct smu_context *smu)
2498 {
2499 int throttler_idx, throttling_events = 0, buf_idx = 0;
2500 struct amdgpu_device *adev = smu->adev;
2501 uint32_t throttler_status;
2502 char log_buf[256];
2503
2504 throttler_status = smu_v13_0_6_get_throttler_status(smu);
2505 if (!throttler_status)
2506 return;
2507
2508 memset(log_buf, 0, sizeof(log_buf));
2509 for (throttler_idx = 0;
2510 throttler_idx < ARRAY_SIZE(throttling_logging_label);
2511 throttler_idx++) {
2512 if (throttler_status & (1U << throttler_idx)) {
2513 throttling_events++;
2514 buf_idx += snprintf(
2515 log_buf + buf_idx, sizeof(log_buf) - buf_idx,
2516 "%s%s", throttling_events > 1 ? " and " : "",
2517 throttling_logging_label[throttler_idx]);
2518 if (buf_idx >= sizeof(log_buf)) {
2519 dev_err(adev->dev, "buffer overflow!\n");
2520 log_buf[sizeof(log_buf) - 1] = '\0';
2521 break;
2522 }
2523 }
2524 }
2525
2526 dev_warn(adev->dev,
2527 "WARN: GPU is throttled, expect performance decrease. %s.\n",
2528 log_buf);
2529 kgd2kfd_smi_event_throttle(
2530 smu->adev->kfd.dev,
2531 smu_cmn_get_indep_throttler_status(throttler_status,
2532 smu_v13_0_6_throttler_map));
2533 }
2534
2535 static int
smu_v13_0_6_get_current_pcie_link_width_level(struct smu_context * smu)2536 smu_v13_0_6_get_current_pcie_link_width_level(struct smu_context *smu)
2537 {
2538 struct amdgpu_device *adev = smu->adev;
2539
2540 return REG_GET_FIELD(RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL),
2541 PCIE_LC_LINK_WIDTH_CNTL, LC_LINK_WIDTH_RD);
2542 }
2543
smu_v13_0_6_get_current_pcie_link_speed(struct smu_context * smu)2544 static int smu_v13_0_6_get_current_pcie_link_speed(struct smu_context *smu)
2545 {
2546 struct amdgpu_device *adev = smu->adev;
2547 uint32_t speed_level;
2548 uint32_t esm_ctrl;
2549
2550 /* TODO: confirm this on real target */
2551 esm_ctrl = RREG32_PCIE(smnPCIE_ESM_CTRL);
2552 if ((esm_ctrl >> 15) & 0x1)
2553 return (((esm_ctrl >> 8) & 0x7F) + 128);
2554
2555 speed_level = (RREG32_PCIE(smnPCIE_LC_SPEED_CNTL) &
2556 PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK)
2557 >> PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT;
2558 if (speed_level > LINK_SPEED_MAX)
2559 speed_level = 0;
2560
2561 return pcie_gen_to_speed(speed_level + 1);
2562 }
2563
smu_v13_0_6_get_xcp_metrics(struct smu_context * smu,int xcp_id,void * table)2564 static ssize_t smu_v13_0_6_get_xcp_metrics(struct smu_context *smu, int xcp_id,
2565 void *table)
2566 {
2567 const u8 num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS_4_0_3;
2568 int version = smu_v13_0_6_get_metrics_version(smu);
2569 struct smu_v13_0_6_partition_metrics *xcp_metrics;
2570 struct smu_table_context *smu_table = &smu->smu_table;
2571 struct amdgpu_device *adev = smu->adev;
2572 int ret, inst, i, j, k, idx;
2573 MetricsTableV0_t *metrics_v0;
2574 MetricsTableV1_t *metrics_v1;
2575 MetricsTableV2_t *metrics_v2;
2576 struct amdgpu_xcp *xcp;
2577 u32 inst_mask;
2578 bool per_inst;
2579
2580 if (!table)
2581 return sizeof(*xcp_metrics);
2582
2583 for_each_xcp(adev->xcp_mgr, xcp, i) {
2584 if (xcp->id == xcp_id)
2585 break;
2586 }
2587 if (i == adev->xcp_mgr->num_xcps)
2588 return -EINVAL;
2589
2590 xcp_metrics = (struct smu_v13_0_6_partition_metrics *)table;
2591 smu_v13_0_6_partition_metrics_init(xcp_metrics, 1, 1);
2592
2593 ret = smu_v13_0_6_get_metrics_table(smu, NULL, false);
2594 if (ret)
2595 return ret;
2596
2597 metrics_v0 = (MetricsTableV0_t *)smu_table->metrics_table;
2598
2599 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) ==
2600 IP_VERSION(13, 0, 12) &&
2601 smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS)))
2602 return smu_v13_0_12_get_xcp_metrics(smu, xcp, table,
2603 metrics_v0);
2604
2605 metrics_v1 = (MetricsTableV1_t *)smu_table->metrics_table;
2606 metrics_v2 = (MetricsTableV2_t *)smu_table->metrics_table;
2607
2608 per_inst = smu_v13_0_6_cap_supported(smu, SMU_CAP(PER_INST_METRICS));
2609
2610 amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask);
2611 idx = 0;
2612 for_each_inst(k, inst_mask) {
2613 /* Both JPEG and VCN has same instances */
2614 inst = GET_INST(VCN, k);
2615
2616 for (j = 0; j < num_jpeg_rings; ++j) {
2617 xcp_metrics->jpeg_busy[(idx * num_jpeg_rings) + j] =
2618 SMUQ10_ROUND(GET_METRIC_FIELD(
2619 JpegBusy,
2620 version)[(inst * num_jpeg_rings) + j]);
2621 }
2622 xcp_metrics->vcn_busy[idx] =
2623 SMUQ10_ROUND(GET_METRIC_FIELD(VcnBusy, version)[inst]);
2624
2625 xcp_metrics->current_vclk0[idx] = SMUQ10_ROUND(
2626 GET_METRIC_FIELD(VclkFrequency, version)[inst]);
2627 xcp_metrics->current_dclk0[idx] = SMUQ10_ROUND(
2628 GET_METRIC_FIELD(DclkFrequency, version)[inst]);
2629 xcp_metrics->current_socclk[idx] = SMUQ10_ROUND(
2630 GET_METRIC_FIELD(SocclkFrequency, version)[inst]);
2631
2632 idx++;
2633 }
2634
2635 xcp_metrics->current_uclk =
2636 SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency, version));
2637
2638 if (per_inst) {
2639 amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &inst_mask);
2640 idx = 0;
2641 for_each_inst(k, inst_mask) {
2642 inst = GET_INST(GC, k);
2643 xcp_metrics->current_gfxclk[idx] =
2644 SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency,
2645 version)[inst]);
2646
2647 xcp_metrics->gfx_busy_inst[idx] = SMUQ10_ROUND(
2648 GET_GPU_METRIC_FIELD(GfxBusy, version)[inst]);
2649 xcp_metrics->gfx_busy_acc[idx] = SMUQ10_ROUND(
2650 GET_GPU_METRIC_FIELD(GfxBusyAcc,
2651 version)[inst]);
2652 if (smu_v13_0_6_cap_supported(
2653 smu, SMU_CAP(HST_LIMIT_METRICS))) {
2654 xcp_metrics->gfx_below_host_limit_ppt_acc
2655 [idx] = SMUQ10_ROUND(
2656 metrics_v0->GfxclkBelowHostLimitPptAcc
2657 [inst]);
2658 xcp_metrics->gfx_below_host_limit_thm_acc
2659 [idx] = SMUQ10_ROUND(
2660 metrics_v0->GfxclkBelowHostLimitThmAcc
2661 [inst]);
2662 xcp_metrics->gfx_low_utilization_acc
2663 [idx] = SMUQ10_ROUND(
2664 metrics_v0
2665 ->GfxclkLowUtilizationAcc[inst]);
2666 xcp_metrics->gfx_below_host_limit_total_acc
2667 [idx] = SMUQ10_ROUND(
2668 metrics_v0->GfxclkBelowHostLimitTotalAcc
2669 [inst]);
2670 }
2671 idx++;
2672 }
2673 }
2674 xcp_metrics->accumulation_counter = GET_METRIC_FIELD(AccumulationCounter, version);
2675 xcp_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp, version);
2676
2677 return sizeof(*xcp_metrics);
2678 }
2679
smu_v13_0_6_get_gpu_metrics(struct smu_context * smu,void ** table)2680 static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table)
2681 {
2682 struct smu_v13_0_6_gpu_metrics *gpu_metrics;
2683 int version = smu_v13_0_6_get_metrics_version(smu);
2684 struct smu_table_context *smu_table = &smu->smu_table;
2685 struct amdgpu_device *adev = smu->adev;
2686 int ret = 0, xcc_id, inst, i, j;
2687 MetricsTableV0_t *metrics_v0;
2688 MetricsTableV1_t *metrics_v1;
2689 MetricsTableV2_t *metrics_v2;
2690 u16 link_width_level;
2691 u8 num_jpeg_rings;
2692 bool per_inst;
2693
2694 ret = smu_v13_0_6_get_metrics_table(smu, NULL, false);
2695 if (ret)
2696 return ret;
2697
2698 metrics_v0 = (MetricsTableV0_t *)smu_table->metrics_table;
2699 gpu_metrics = (struct smu_v13_0_6_gpu_metrics *)smu_driver_table_ptr(
2700 smu, SMU_DRIVER_TABLE_GPU_METRICS);
2701
2702 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) &&
2703 smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) {
2704 smu_v13_0_12_get_gpu_metrics(smu, table, metrics_v0,
2705 gpu_metrics);
2706 goto fill;
2707 }
2708
2709 metrics_v1 = (MetricsTableV1_t *)smu_table->metrics_table;
2710 metrics_v2 = (MetricsTableV2_t *)smu_table->metrics_table;
2711
2712 gpu_metrics->temperature_hotspot =
2713 SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature, version));
2714 /* Individual HBM stack temperature is not reported */
2715 gpu_metrics->temperature_mem =
2716 SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature, version));
2717 /* Reports max temperature of all voltage rails */
2718 gpu_metrics->temperature_vrsoc =
2719 SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature, version));
2720
2721 gpu_metrics->average_gfx_activity =
2722 SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy, version));
2723 gpu_metrics->average_umc_activity =
2724 SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization, version));
2725
2726 gpu_metrics->mem_max_bandwidth =
2727 SMUQ10_ROUND(GET_METRIC_FIELD(MaxDramBandwidth, version));
2728
2729 gpu_metrics->curr_socket_power =
2730 SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower, version));
2731 /* Energy counter reported in 15.259uJ (2^-16) units */
2732 gpu_metrics->energy_accumulator = GET_METRIC_FIELD(SocketEnergyAcc, version);
2733
2734 for (i = 0; i < MAX_GFX_CLKS; i++) {
2735 xcc_id = GET_INST(GC, i);
2736 if (xcc_id >= 0)
2737 gpu_metrics->current_gfxclk[i] =
2738 SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency, version)[xcc_id]);
2739
2740 if (i < MAX_CLKS) {
2741 gpu_metrics->current_socclk[i] =
2742 SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency, version)[i]);
2743 inst = GET_INST(VCN, i);
2744 if (inst >= 0) {
2745 gpu_metrics->current_vclk0[i] =
2746 SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency,
2747 version)[inst]);
2748 gpu_metrics->current_dclk0[i] =
2749 SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency,
2750 version)[inst]);
2751 }
2752 }
2753 }
2754
2755 gpu_metrics->current_uclk = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency, version));
2756
2757 /* Total accumulated cycle counter */
2758 gpu_metrics->accumulation_counter = GET_METRIC_FIELD(AccumulationCounter, version);
2759
2760 /* Accumulated throttler residencies */
2761 gpu_metrics->prochot_residency_acc = GET_METRIC_FIELD(ProchotResidencyAcc, version);
2762 gpu_metrics->ppt_residency_acc = GET_METRIC_FIELD(PptResidencyAcc, version);
2763 gpu_metrics->socket_thm_residency_acc = GET_METRIC_FIELD(SocketThmResidencyAcc, version);
2764 gpu_metrics->vr_thm_residency_acc = GET_METRIC_FIELD(VrThmResidencyAcc, version);
2765 gpu_metrics->hbm_thm_residency_acc =
2766 GET_METRIC_FIELD(HbmThmResidencyAcc, version);
2767
2768 /* Clock Lock Status. Each bit corresponds to each GFXCLK instance */
2769 gpu_metrics->gfxclk_lock_status = GET_METRIC_FIELD(GfxLockXCDMak,
2770 version) >> GET_INST(GC, 0);
2771
2772 if (!(adev->flags & AMD_IS_APU)) {
2773 /*Check smu version, PCIE link speed and width will be reported from pmfw metric
2774 * table for both pf & one vf for smu version 85.99.0 or higher else report only
2775 * for pf from registers
2776 */
2777 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(PCIE_METRICS))) {
2778 gpu_metrics->pcie_link_width = GET_GPU_METRIC_FIELD(PCIeLinkWidth, version);
2779 gpu_metrics->pcie_link_speed =
2780 pcie_gen_to_speed(GET_GPU_METRIC_FIELD(PCIeLinkSpeed, version));
2781 } else if (!amdgpu_sriov_vf(adev)) {
2782 link_width_level = smu_v13_0_6_get_current_pcie_link_width_level(smu);
2783 if (link_width_level > MAX_LINK_WIDTH)
2784 link_width_level = 0;
2785
2786 gpu_metrics->pcie_link_width =
2787 DECODE_LANE_WIDTH(link_width_level);
2788 gpu_metrics->pcie_link_speed =
2789 smu_v13_0_6_get_current_pcie_link_speed(smu);
2790 }
2791
2792 gpu_metrics->pcie_bandwidth_acc =
2793 SMUQ10_ROUND(GET_GPU_METRIC_FIELD(PcieBandwidthAcc, version)[0]);
2794 gpu_metrics->pcie_bandwidth_inst =
2795 SMUQ10_ROUND(GET_GPU_METRIC_FIELD(PcieBandwidth, version)[0]);
2796 gpu_metrics->pcie_l0_to_recov_count_acc =
2797 GET_GPU_METRIC_FIELD(PCIeL0ToRecoveryCountAcc, version);
2798 gpu_metrics->pcie_replay_count_acc =
2799 GET_GPU_METRIC_FIELD(PCIenReplayAAcc, version);
2800 gpu_metrics->pcie_replay_rover_count_acc =
2801 GET_GPU_METRIC_FIELD(PCIenReplayARolloverCountAcc, version);
2802 gpu_metrics->pcie_nak_sent_count_acc =
2803 GET_GPU_METRIC_FIELD(PCIeNAKSentCountAcc, version);
2804 gpu_metrics->pcie_nak_rcvd_count_acc =
2805 GET_GPU_METRIC_FIELD(PCIeNAKReceivedCountAcc, version);
2806 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(OTHER_END_METRICS)))
2807 gpu_metrics->pcie_lc_perf_other_end_recovery =
2808 GET_GPU_METRIC_FIELD(PCIeOtherEndRecoveryAcc, version);
2809
2810 }
2811
2812 gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
2813
2814 gpu_metrics->gfx_activity_acc =
2815 SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusyAcc, version));
2816 gpu_metrics->mem_activity_acc =
2817 SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilizationAcc, version));
2818
2819 for (i = 0; i < NUM_XGMI_LINKS; i++) {
2820 j = amdgpu_xgmi_get_ext_link(adev, i);
2821 if (j < 0 || j >= NUM_XGMI_LINKS)
2822 continue;
2823 gpu_metrics->xgmi_read_data_acc[j] = SMUQ10_ROUND(
2824 GET_METRIC_FIELD(XgmiReadDataSizeAcc, version)[i]);
2825 gpu_metrics->xgmi_write_data_acc[j] = SMUQ10_ROUND(
2826 GET_METRIC_FIELD(XgmiWriteDataSizeAcc, version)[i]);
2827 ret = amdgpu_get_xgmi_link_status(adev, i);
2828 if (ret >= 0)
2829 gpu_metrics->xgmi_link_status[j] = ret;
2830 }
2831
2832 per_inst = smu_v13_0_6_cap_supported(smu, SMU_CAP(PER_INST_METRICS));
2833
2834 num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS_4_0_3;
2835 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
2836 inst = GET_INST(JPEG, i);
2837 for (j = 0; j < num_jpeg_rings; ++j)
2838 gpu_metrics->jpeg_busy[(i * num_jpeg_rings) + j] =
2839 SMUQ10_ROUND(GET_METRIC_FIELD(
2840 JpegBusy,
2841 version)[(inst * num_jpeg_rings) + j]);
2842 }
2843 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
2844 inst = GET_INST(VCN, i);
2845 gpu_metrics->vcn_busy[i] =
2846 SMUQ10_ROUND(GET_METRIC_FIELD(VcnBusy, version)[inst]);
2847 }
2848
2849 if (per_inst) {
2850 for (i = 0; i < NUM_XCC(adev->gfx.xcc_mask); ++i) {
2851 inst = GET_INST(GC, i);
2852 gpu_metrics->gfx_busy_inst[i] = SMUQ10_ROUND(
2853 GET_GPU_METRIC_FIELD(GfxBusy, version)[inst]);
2854 gpu_metrics->gfx_busy_acc[i] = SMUQ10_ROUND(
2855 GET_GPU_METRIC_FIELD(GfxBusyAcc,
2856 version)[inst]);
2857 if (smu_v13_0_6_cap_supported(
2858 smu, SMU_CAP(HST_LIMIT_METRICS))) {
2859 gpu_metrics->gfx_below_host_limit_ppt_acc
2860 [i] = SMUQ10_ROUND(
2861 metrics_v0->GfxclkBelowHostLimitPptAcc
2862 [inst]);
2863 gpu_metrics->gfx_below_host_limit_thm_acc
2864 [i] = SMUQ10_ROUND(
2865 metrics_v0->GfxclkBelowHostLimitThmAcc
2866 [inst]);
2867 gpu_metrics->gfx_low_utilization_acc
2868 [i] = SMUQ10_ROUND(
2869 metrics_v0
2870 ->GfxclkLowUtilizationAcc[inst]);
2871 gpu_metrics->gfx_below_host_limit_total_acc
2872 [i] = SMUQ10_ROUND(
2873 metrics_v0->GfxclkBelowHostLimitTotalAcc
2874 [inst]);
2875 }
2876 }
2877 }
2878
2879 gpu_metrics->xgmi_link_width = GET_METRIC_FIELD(XgmiWidth, version);
2880 gpu_metrics->xgmi_link_speed = GET_METRIC_FIELD(XgmiBitrate, version);
2881
2882 gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp, version);
2883
2884 fill:
2885 *table = gpu_metrics;
2886
2887 smu_driver_table_update_cache_time(smu, SMU_DRIVER_TABLE_GPU_METRICS);
2888
2889 return sizeof(*gpu_metrics);
2890 }
2891
smu_v13_0_6_restore_pci_config(struct smu_context * smu)2892 static void smu_v13_0_6_restore_pci_config(struct smu_context *smu)
2893 {
2894 struct amdgpu_device *adev = smu->adev;
2895 int i;
2896
2897 for (i = 0; i < 16; i++)
2898 pci_write_config_dword(adev->pdev, i * 4,
2899 adev->pdev->saved_config_space[i]);
2900 pci_restore_msi_state(adev->pdev);
2901 }
2902
smu_v13_0_6_mode2_reset(struct smu_context * smu)2903 static int smu_v13_0_6_mode2_reset(struct smu_context *smu)
2904 {
2905 struct smu_msg_ctl *ctl = &smu->msg_ctl;
2906 struct amdgpu_device *adev = smu->adev;
2907 int ret = 0;
2908 int timeout = 10;
2909
2910 mutex_lock(&ctl->lock);
2911
2912 ret = smu_msg_send_async_locked(ctl, SMU_MSG_GfxDeviceDriverReset,
2913 SMU_RESET_MODE_2);
2914 if (ret)
2915 goto out;
2916
2917 /* Reset takes a bit longer, wait for 200ms. */
2918 msleep(200);
2919
2920 dev_dbg(adev->dev, "restore config space...\n");
2921 /* Restore the config space saved during init */
2922 amdgpu_device_load_pci_state(adev->pdev);
2923
2924 /* Certain platforms have switches which assign virtual BAR values to
2925 * devices. OS uses the virtual BAR values and device behind the switch
2926 * is assgined another BAR value. When device's config space registers
2927 * are queried, switch returns the virtual BAR values. When mode-2 reset
2928 * is performed, switch is unaware of it, and will continue to return
2929 * the same virtual values to the OS.This affects
2930 * pci_restore_config_space() API as it doesn't write the value saved if
2931 * the current value read from config space is the same as what is
2932 * saved. As a workaround, make sure the config space is restored
2933 * always.
2934 */
2935 if (!(adev->flags & AMD_IS_APU))
2936 smu_v13_0_6_restore_pci_config(smu);
2937
2938 dev_dbg(adev->dev, "wait for reset ack\n");
2939 do {
2940 ret = smu_msg_wait_response(ctl, 0);
2941 /* Wait a bit more time for getting ACK */
2942 if (ret == -ETIME) {
2943 --timeout;
2944 usleep_range(500, 1000);
2945 continue;
2946 }
2947
2948 if (ret)
2949 goto out;
2950
2951 } while (ret == -ETIME && timeout);
2952
2953 out:
2954 mutex_unlock(&ctl->lock);
2955
2956 if (ret)
2957 dev_err(adev->dev, "failed to send mode2 reset, error code %d",
2958 ret);
2959
2960 return ret;
2961 }
2962
smu_v13_0_6_get_thermal_temperature_range(struct smu_context * smu,struct smu_temperature_range * range)2963 static int smu_v13_0_6_get_thermal_temperature_range(struct smu_context *smu,
2964 struct smu_temperature_range *range)
2965 {
2966 struct amdgpu_device *adev = smu->adev;
2967 u32 aid_temp, xcd_temp, max_temp;
2968 u32 ccd_temp = 0;
2969 int ret;
2970
2971 if (amdgpu_sriov_vf(smu->adev))
2972 return 0;
2973
2974 if (!range)
2975 return -EINVAL;
2976
2977 /*Check smu version, GetCtfLimit message only supported for smu version 85.69 or higher */
2978 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(CTF_LIMIT)))
2979 return 0;
2980
2981 /* Get SOC Max operating temperature */
2982 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit,
2983 PPSMC_AID_THM_TYPE, &aid_temp);
2984 if (ret)
2985 goto failed;
2986 if (adev->flags & AMD_IS_APU) {
2987 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit,
2988 PPSMC_CCD_THM_TYPE, &ccd_temp);
2989 if (ret)
2990 goto failed;
2991 }
2992 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit,
2993 PPSMC_XCD_THM_TYPE, &xcd_temp);
2994 if (ret)
2995 goto failed;
2996 range->hotspot_emergency_max = max3(aid_temp, xcd_temp, ccd_temp) *
2997 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
2998
2999 /* Get HBM Max operating temperature */
3000 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit,
3001 PPSMC_HBM_THM_TYPE, &max_temp);
3002 if (ret)
3003 goto failed;
3004 range->mem_emergency_max =
3005 max_temp * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
3006
3007 /* Get SOC thermal throttle limit */
3008 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetThermalLimit,
3009 PPSMC_THROTTLING_LIMIT_TYPE_SOCKET,
3010 &max_temp);
3011 if (ret)
3012 goto failed;
3013 range->hotspot_crit_max =
3014 max_temp * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
3015
3016 /* Get HBM thermal throttle limit */
3017 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetThermalLimit,
3018 PPSMC_THROTTLING_LIMIT_TYPE_HBM,
3019 &max_temp);
3020 if (ret)
3021 goto failed;
3022
3023 range->mem_crit_max = max_temp * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
3024
3025 failed:
3026 return ret;
3027 }
3028
smu_v13_0_6_mode1_reset(struct smu_context * smu)3029 static int smu_v13_0_6_mode1_reset(struct smu_context *smu)
3030 {
3031 struct amdgpu_device *adev = smu->adev;
3032 u32 fatal_err, param;
3033 int ret = 0;
3034
3035 fatal_err = 0;
3036 param = SMU_RESET_MODE_1;
3037
3038 /* fatal error triggered by ras, PMFW supports the flag */
3039 if (amdgpu_ras_get_fed_status(adev))
3040 fatal_err = 1;
3041
3042 param |= (fatal_err << 16);
3043 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset,
3044 param, NULL);
3045
3046 if (!ret)
3047 msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS);
3048
3049 return ret;
3050 }
3051
smu_v13_0_6_link_reset(struct smu_context * smu)3052 static int smu_v13_0_6_link_reset(struct smu_context *smu)
3053 {
3054 int ret = 0;
3055
3056 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset,
3057 SMU_RESET_MODE_4, NULL);
3058 return ret;
3059 }
3060
smu_v13_0_6_is_mode1_reset_supported(struct smu_context * smu)3061 static bool smu_v13_0_6_is_mode1_reset_supported(struct smu_context *smu)
3062 {
3063 return true;
3064 }
3065
smu_v13_0_6_is_link_reset_supported(struct smu_context * smu)3066 static inline bool smu_v13_0_6_is_link_reset_supported(struct smu_context *smu)
3067 {
3068 struct amdgpu_device *adev = smu->adev;
3069 int var = (adev->pdev->device & 0xF);
3070
3071 if (var == 0x0 || var == 0x1 || var == 0x3)
3072 return true;
3073
3074 return false;
3075 }
3076
smu_v13_0_6_smu_send_hbm_bad_page_num(struct smu_context * smu,uint32_t size)3077 static int smu_v13_0_6_smu_send_hbm_bad_page_num(struct smu_context *smu,
3078 uint32_t size)
3079 {
3080 int ret = 0;
3081
3082 /* message SMU to update the bad page number on SMUBUS */
3083 ret = smu_cmn_send_smc_msg_with_param(
3084 smu, SMU_MSG_SetNumBadHbmPagesRetired, size, NULL);
3085 if (ret)
3086 dev_err(smu->adev->dev,
3087 "[%s] failed to message SMU to update HBM bad pages number\n",
3088 __func__);
3089
3090 return ret;
3091 }
3092
smu_v13_0_6_send_rma_reason(struct smu_context * smu)3093 static int smu_v13_0_6_send_rma_reason(struct smu_context *smu)
3094 {
3095 int ret;
3096
3097 /* NOTE: the message is only valid on dGPU with pmfw 85.90.0 and above */
3098 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(RMA_MSG)))
3099 return 0;
3100
3101 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_RmaDueToBadPageThreshold, NULL);
3102 if (ret)
3103 dev_err(smu->adev->dev,
3104 "[%s] failed to send BadPageThreshold event to SMU\n",
3105 __func__);
3106
3107 return ret;
3108 }
3109
3110 /**
3111 * smu_v13_0_6_reset_sdma_is_supported - Check if SDMA reset is supported
3112 * @smu: smu_context pointer
3113 *
3114 * This function checks if the SMU supports resetting the SDMA engine.
3115 * It returns false if the capability is not supported.
3116 */
smu_v13_0_6_reset_sdma_is_supported(struct smu_context * smu)3117 static bool smu_v13_0_6_reset_sdma_is_supported(struct smu_context *smu)
3118 {
3119 bool ret = true;
3120
3121 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SDMA_RESET))) {
3122 dev_info(smu->adev->dev,
3123 "SDMA reset capability is not supported\n");
3124 ret = false;
3125 }
3126
3127 return ret;
3128 }
3129
smu_v13_0_6_reset_sdma(struct smu_context * smu,uint32_t inst_mask)3130 static int smu_v13_0_6_reset_sdma(struct smu_context *smu, uint32_t inst_mask)
3131 {
3132 int ret = 0;
3133
3134 if (!smu_v13_0_6_reset_sdma_is_supported(smu))
3135 return -EOPNOTSUPP;
3136
3137 ret = smu_cmn_send_smc_msg_with_param(smu,
3138 SMU_MSG_ResetSDMA, inst_mask, NULL);
3139 if (ret)
3140 dev_err(smu->adev->dev,
3141 "failed to send ResetSDMA event with mask 0x%x\n",
3142 inst_mask);
3143
3144 return ret;
3145 }
3146
smu_v13_0_6_reset_vcn_is_supported(struct smu_context * smu)3147 static bool smu_v13_0_6_reset_vcn_is_supported(struct smu_context *smu)
3148 {
3149 return smu_v13_0_6_cap_supported(smu, SMU_CAP(VCN_RESET));
3150 }
3151
smu_v13_0_6_reset_vcn(struct smu_context * smu,uint32_t inst_mask)3152 static int smu_v13_0_6_reset_vcn(struct smu_context *smu, uint32_t inst_mask)
3153 {
3154 int ret = 0;
3155
3156 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ResetVCN, inst_mask, NULL);
3157 if (ret)
3158 dev_err(smu->adev->dev,
3159 "failed to send ResetVCN event with mask 0x%x\n",
3160 inst_mask);
3161 return ret;
3162 }
3163
smu_v13_0_6_ras_send_msg(struct smu_context * smu,enum smu_message_type msg,uint32_t param,uint32_t * read_arg)3164 static int smu_v13_0_6_ras_send_msg(struct smu_context *smu, enum smu_message_type msg, uint32_t param, uint32_t *read_arg)
3165 {
3166 int ret;
3167
3168 switch (msg) {
3169 case SMU_MSG_QueryValidMcaCount:
3170 case SMU_MSG_QueryValidMcaCeCount:
3171 case SMU_MSG_McaBankDumpDW:
3172 case SMU_MSG_McaBankCeDumpDW:
3173 case SMU_MSG_ClearMcaOnRead:
3174 ret = smu_cmn_send_smc_msg_with_param(smu, msg, param, read_arg);
3175 break;
3176 default:
3177 ret = -EPERM;
3178 }
3179
3180 return ret;
3181 }
3182
smu_v13_0_6_post_init(struct smu_context * smu)3183 static int smu_v13_0_6_post_init(struct smu_context *smu)
3184 {
3185 if (smu_v13_0_6_is_link_reset_supported(smu))
3186 smu_feature_cap_set(smu, SMU_FEATURE_CAP_ID__LINK_RESET);
3187
3188 if (smu_v13_0_6_reset_sdma_is_supported(smu))
3189 smu_feature_cap_set(smu, SMU_FEATURE_CAP_ID__SDMA_RESET);
3190
3191 if (smu_v13_0_6_reset_vcn_is_supported(smu))
3192 smu_feature_cap_set(smu, SMU_FEATURE_CAP_ID__VCN_RESET);
3193
3194 return 0;
3195 }
3196
mca_smu_set_debug_mode(struct amdgpu_device * adev,bool enable)3197 static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
3198 {
3199 struct smu_context *smu = adev->powerplay.pp_handle;
3200
3201 return smu_v13_0_6_mca_set_debug_mode(smu, enable);
3202 }
3203
smu_v13_0_6_get_valid_mca_count(struct smu_context * smu,enum amdgpu_mca_error_type type,uint32_t * count)3204 static int smu_v13_0_6_get_valid_mca_count(struct smu_context *smu, enum amdgpu_mca_error_type type, uint32_t *count)
3205 {
3206 uint32_t msg;
3207 int ret;
3208
3209 if (!count)
3210 return -EINVAL;
3211
3212 switch (type) {
3213 case AMDGPU_MCA_ERROR_TYPE_UE:
3214 msg = SMU_MSG_QueryValidMcaCount;
3215 break;
3216 case AMDGPU_MCA_ERROR_TYPE_CE:
3217 msg = SMU_MSG_QueryValidMcaCeCount;
3218 break;
3219 default:
3220 return -EINVAL;
3221 }
3222
3223 ret = smu_cmn_send_smc_msg(smu, msg, count);
3224 if (ret) {
3225 *count = 0;
3226 return ret;
3227 }
3228
3229 return 0;
3230 }
3231
__smu_v13_0_6_mca_dump_bank(struct smu_context * smu,enum amdgpu_mca_error_type type,int idx,int offset,uint32_t * val)3232 static int __smu_v13_0_6_mca_dump_bank(struct smu_context *smu, enum amdgpu_mca_error_type type,
3233 int idx, int offset, uint32_t *val)
3234 {
3235 uint32_t msg, param;
3236
3237 switch (type) {
3238 case AMDGPU_MCA_ERROR_TYPE_UE:
3239 msg = SMU_MSG_McaBankDumpDW;
3240 break;
3241 case AMDGPU_MCA_ERROR_TYPE_CE:
3242 msg = SMU_MSG_McaBankCeDumpDW;
3243 break;
3244 default:
3245 return -EINVAL;
3246 }
3247
3248 param = ((idx & 0xffff) << 16) | (offset & 0xfffc);
3249
3250 return smu_cmn_send_smc_msg_with_param(smu, msg, param, val);
3251 }
3252
smu_v13_0_6_mca_dump_bank(struct smu_context * smu,enum amdgpu_mca_error_type type,int idx,int offset,uint32_t * val,int count)3253 static int smu_v13_0_6_mca_dump_bank(struct smu_context *smu, enum amdgpu_mca_error_type type,
3254 int idx, int offset, uint32_t *val, int count)
3255 {
3256 int ret, i;
3257
3258 if (!val)
3259 return -EINVAL;
3260
3261 for (i = 0; i < count; i++) {
3262 ret = __smu_v13_0_6_mca_dump_bank(smu, type, idx, offset + (i << 2), &val[i]);
3263 if (ret)
3264 return ret;
3265 }
3266
3267 return 0;
3268 }
3269
3270 static const struct mca_bank_ipid smu_v13_0_6_mca_ipid_table[AMDGPU_MCA_IP_COUNT] = {
3271 MCA_BANK_IPID(UMC, 0x96, 0x0),
3272 MCA_BANK_IPID(SMU, 0x01, 0x1),
3273 MCA_BANK_IPID(MP5, 0x01, 0x2),
3274 MCA_BANK_IPID(PCS_XGMI, 0x50, 0x0),
3275 };
3276
mca_bank_entry_info_decode(struct mca_bank_entry * entry,struct mca_bank_info * info)3277 static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_bank_info *info)
3278 {
3279 u64 ipid = entry->regs[MCA_REG_IDX_IPID];
3280 u32 instidhi, instid;
3281
3282 /* NOTE: All MCA IPID register share the same format,
3283 * so the driver can share the MCMP1 register header file.
3284 * */
3285
3286 info->hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID);
3287 info->mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType);
3288
3289 /*
3290 * Unfied DieID Format: SAASS. A:AID, S:Socket.
3291 * Unfied DieID[4] = InstanceId[0]
3292 * Unfied DieID[0:3] = InstanceIdHi[0:3]
3293 */
3294 instidhi = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi);
3295 instid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo);
3296 info->aid = ((instidhi >> 2) & 0x03);
3297 info->socket_id = ((instid & 0x1) << 2) | (instidhi & 0x03);
3298 }
3299
mca_bank_read_reg(struct amdgpu_device * adev,enum amdgpu_mca_error_type type,int idx,int reg_idx,uint64_t * val)3300 static int mca_bank_read_reg(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
3301 int idx, int reg_idx, uint64_t *val)
3302 {
3303 struct smu_context *smu = adev->powerplay.pp_handle;
3304 uint32_t data[2] = {0, 0};
3305 int ret;
3306
3307 if (!val || reg_idx >= MCA_REG_IDX_COUNT)
3308 return -EINVAL;
3309
3310 ret = smu_v13_0_6_mca_dump_bank(smu, type, idx, reg_idx * 8, data, ARRAY_SIZE(data));
3311 if (ret)
3312 return ret;
3313
3314 *val = (uint64_t)data[1] << 32 | data[0];
3315
3316 dev_dbg(adev->dev, "mca read bank reg: type:%s, index: %d, reg_idx: %d, val: 0x%016llx\n",
3317 type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE", idx, reg_idx, *val);
3318
3319 return 0;
3320 }
3321
mca_get_mca_entry(struct amdgpu_device * adev,enum amdgpu_mca_error_type type,int idx,struct mca_bank_entry * entry)3322 static int mca_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
3323 int idx, struct mca_bank_entry *entry)
3324 {
3325 int i, ret;
3326
3327 /* NOTE: populated all mca register by default */
3328 for (i = 0; i < ARRAY_SIZE(entry->regs); i++) {
3329 ret = mca_bank_read_reg(adev, type, idx, i, &entry->regs[i]);
3330 if (ret)
3331 return ret;
3332 }
3333
3334 entry->idx = idx;
3335 entry->type = type;
3336
3337 mca_bank_entry_info_decode(entry, &entry->info);
3338
3339 return 0;
3340 }
3341
mca_decode_ipid_to_hwip(uint64_t val)3342 static int mca_decode_ipid_to_hwip(uint64_t val)
3343 {
3344 const struct mca_bank_ipid *ipid;
3345 uint16_t hwid, mcatype;
3346 int i;
3347
3348 hwid = REG_GET_FIELD(val, MCMP1_IPIDT0, HardwareID);
3349 mcatype = REG_GET_FIELD(val, MCMP1_IPIDT0, McaType);
3350
3351 for (i = 0; i < ARRAY_SIZE(smu_v13_0_6_mca_ipid_table); i++) {
3352 ipid = &smu_v13_0_6_mca_ipid_table[i];
3353
3354 if (!ipid->hwid)
3355 continue;
3356
3357 if (ipid->hwid == hwid && ipid->mcatype == mcatype)
3358 return i;
3359 }
3360
3361 return AMDGPU_MCA_IP_UNKNOW;
3362 }
3363
mca_umc_mca_get_err_count(const struct mca_ras_info * mca_ras,struct amdgpu_device * adev,enum amdgpu_mca_error_type type,struct mca_bank_entry * entry,uint32_t * count)3364 static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
3365 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count)
3366 {
3367 uint64_t status0;
3368 uint32_t ext_error_code;
3369 uint32_t odecc_err_cnt;
3370
3371 status0 = entry->regs[MCA_REG_IDX_STATUS];
3372 ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(status0);
3373 odecc_err_cnt = MCA_REG__MISC0__ERRCNT(entry->regs[MCA_REG_IDX_MISC0]);
3374
3375 if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) {
3376 *count = 0;
3377 return 0;
3378 }
3379
3380 if (umc_v12_0_is_deferred_error(adev, status0) ||
3381 umc_v12_0_is_uncorrectable_error(adev, status0) ||
3382 umc_v12_0_is_correctable_error(adev, status0))
3383 *count = (ext_error_code == 0) ? odecc_err_cnt : 1;
3384
3385 amdgpu_umc_update_ecc_status(adev,
3386 entry->regs[MCA_REG_IDX_STATUS],
3387 entry->regs[MCA_REG_IDX_IPID],
3388 entry->regs[MCA_REG_IDX_ADDR]);
3389
3390 return 0;
3391 }
3392
mca_pcs_xgmi_mca_get_err_count(const struct mca_ras_info * mca_ras,struct amdgpu_device * adev,enum amdgpu_mca_error_type type,struct mca_bank_entry * entry,uint32_t * count)3393 static int mca_pcs_xgmi_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
3394 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry,
3395 uint32_t *count)
3396 {
3397 u32 ext_error_code;
3398 u32 err_cnt;
3399
3400 ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(entry->regs[MCA_REG_IDX_STATUS]);
3401 err_cnt = MCA_REG__MISC0__ERRCNT(entry->regs[MCA_REG_IDX_MISC0]);
3402
3403 if (type == AMDGPU_MCA_ERROR_TYPE_UE &&
3404 (ext_error_code == 0 || ext_error_code == 9))
3405 *count = err_cnt;
3406 else if (type == AMDGPU_MCA_ERROR_TYPE_CE && ext_error_code == 6)
3407 *count = err_cnt;
3408
3409 return 0;
3410 }
3411
mca_smu_check_error_code(struct amdgpu_device * adev,const struct mca_ras_info * mca_ras,uint32_t errcode)3412 static bool mca_smu_check_error_code(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras,
3413 uint32_t errcode)
3414 {
3415 int i;
3416
3417 if (!mca_ras->err_code_count || !mca_ras->err_code_array)
3418 return true;
3419
3420 for (i = 0; i < mca_ras->err_code_count; i++) {
3421 if (errcode == mca_ras->err_code_array[i])
3422 return true;
3423 }
3424
3425 return false;
3426 }
3427
mca_gfx_mca_get_err_count(const struct mca_ras_info * mca_ras,struct amdgpu_device * adev,enum amdgpu_mca_error_type type,struct mca_bank_entry * entry,uint32_t * count)3428 static int mca_gfx_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
3429 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count)
3430 {
3431 uint64_t status0, misc0;
3432
3433 status0 = entry->regs[MCA_REG_IDX_STATUS];
3434 if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) {
3435 *count = 0;
3436 return 0;
3437 }
3438
3439 if (type == AMDGPU_MCA_ERROR_TYPE_UE &&
3440 REG_GET_FIELD(status0, MCMP1_STATUST0, UC) == 1 &&
3441 REG_GET_FIELD(status0, MCMP1_STATUST0, PCC) == 1) {
3442 *count = 1;
3443 return 0;
3444 } else {
3445 misc0 = entry->regs[MCA_REG_IDX_MISC0];
3446 *count = REG_GET_FIELD(misc0, MCMP1_MISC0T0, ErrCnt);
3447 }
3448
3449 return 0;
3450 }
3451
mca_smu_mca_get_err_count(const struct mca_ras_info * mca_ras,struct amdgpu_device * adev,enum amdgpu_mca_error_type type,struct mca_bank_entry * entry,uint32_t * count)3452 static int mca_smu_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
3453 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count)
3454 {
3455 uint64_t status0, misc0;
3456
3457 status0 = entry->regs[MCA_REG_IDX_STATUS];
3458 if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) {
3459 *count = 0;
3460 return 0;
3461 }
3462
3463 if (type == AMDGPU_MCA_ERROR_TYPE_UE &&
3464 REG_GET_FIELD(status0, MCMP1_STATUST0, UC) == 1 &&
3465 REG_GET_FIELD(status0, MCMP1_STATUST0, PCC) == 1) {
3466 if (count)
3467 *count = 1;
3468 return 0;
3469 }
3470
3471 misc0 = entry->regs[MCA_REG_IDX_MISC0];
3472 *count = REG_GET_FIELD(misc0, MCMP1_MISC0T0, ErrCnt);
3473
3474 return 0;
3475 }
3476
mca_gfx_smu_bank_is_valid(const struct mca_ras_info * mca_ras,struct amdgpu_device * adev,enum amdgpu_mca_error_type type,struct mca_bank_entry * entry)3477 static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
3478 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry)
3479 {
3480 uint32_t instlo;
3481
3482 instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo);
3483 instlo &= GENMASK(31, 1);
3484 switch (instlo) {
3485 case 0x36430400: /* SMNAID XCD 0 */
3486 case 0x38430400: /* SMNAID XCD 1 */
3487 case 0x40430400: /* SMNXCD XCD 0, NOTE: FIXME: fix this error later */
3488 return true;
3489 default:
3490 return false;
3491 }
3492
3493 return false;
3494 };
3495
mca_smu_bank_is_valid(const struct mca_ras_info * mca_ras,struct amdgpu_device * adev,enum amdgpu_mca_error_type type,struct mca_bank_entry * entry)3496 static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
3497 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry)
3498 {
3499 struct smu_context *smu = adev->powerplay.pp_handle;
3500 uint32_t errcode, instlo;
3501
3502 instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo);
3503 instlo &= GENMASK(31, 1);
3504 if (instlo != 0x03b30400)
3505 return false;
3506
3507 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(ACA_SYND))) {
3508 errcode = MCA_REG__SYND__ERRORINFORMATION(entry->regs[MCA_REG_IDX_SYND]);
3509 errcode &= 0xff;
3510 } else {
3511 errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode);
3512 }
3513
3514 return mca_smu_check_error_code(adev, mca_ras, errcode);
3515 }
3516
3517 static int sdma_err_codes[] = { CODE_SDMA0, CODE_SDMA1, CODE_SDMA2, CODE_SDMA3 };
3518 static int mmhub_err_codes[] = {
3519 CODE_DAGB0, CODE_DAGB0 + 1, CODE_DAGB0 + 2, CODE_DAGB0 + 3, CODE_DAGB0 + 4, /* DAGB0-4 */
3520 CODE_EA0, CODE_EA0 + 1, CODE_EA0 + 2, CODE_EA0 + 3, CODE_EA0 + 4, /* MMEA0-4*/
3521 CODE_VML2, CODE_VML2_WALKER, CODE_MMCANE,
3522 };
3523
3524 static int vcn_err_codes[] = {
3525 CODE_VIDD, CODE_VIDV,
3526 };
3527 static int jpeg_err_codes[] = {
3528 CODE_JPEG0S, CODE_JPEG0D, CODE_JPEG1S, CODE_JPEG1D,
3529 CODE_JPEG2S, CODE_JPEG2D, CODE_JPEG3S, CODE_JPEG3D,
3530 CODE_JPEG4S, CODE_JPEG4D, CODE_JPEG5S, CODE_JPEG5D,
3531 CODE_JPEG6S, CODE_JPEG6D, CODE_JPEG7S, CODE_JPEG7D,
3532 };
3533
3534 static const struct mca_ras_info mca_ras_table[] = {
3535 {
3536 .blkid = AMDGPU_RAS_BLOCK__UMC,
3537 .ip = AMDGPU_MCA_IP_UMC,
3538 .get_err_count = mca_umc_mca_get_err_count,
3539 }, {
3540 .blkid = AMDGPU_RAS_BLOCK__GFX,
3541 .ip = AMDGPU_MCA_IP_SMU,
3542 .get_err_count = mca_gfx_mca_get_err_count,
3543 .bank_is_valid = mca_gfx_smu_bank_is_valid,
3544 }, {
3545 .blkid = AMDGPU_RAS_BLOCK__SDMA,
3546 .ip = AMDGPU_MCA_IP_SMU,
3547 .err_code_array = sdma_err_codes,
3548 .err_code_count = ARRAY_SIZE(sdma_err_codes),
3549 .get_err_count = mca_smu_mca_get_err_count,
3550 .bank_is_valid = mca_smu_bank_is_valid,
3551 }, {
3552 .blkid = AMDGPU_RAS_BLOCK__MMHUB,
3553 .ip = AMDGPU_MCA_IP_SMU,
3554 .err_code_array = mmhub_err_codes,
3555 .err_code_count = ARRAY_SIZE(mmhub_err_codes),
3556 .get_err_count = mca_smu_mca_get_err_count,
3557 .bank_is_valid = mca_smu_bank_is_valid,
3558 }, {
3559 .blkid = AMDGPU_RAS_BLOCK__XGMI_WAFL,
3560 .ip = AMDGPU_MCA_IP_PCS_XGMI,
3561 .get_err_count = mca_pcs_xgmi_mca_get_err_count,
3562 }, {
3563 .blkid = AMDGPU_RAS_BLOCK__VCN,
3564 .ip = AMDGPU_MCA_IP_SMU,
3565 .err_code_array = vcn_err_codes,
3566 .err_code_count = ARRAY_SIZE(vcn_err_codes),
3567 .get_err_count = mca_smu_mca_get_err_count,
3568 .bank_is_valid = mca_smu_bank_is_valid,
3569 }, {
3570 .blkid = AMDGPU_RAS_BLOCK__JPEG,
3571 .ip = AMDGPU_MCA_IP_SMU,
3572 .err_code_array = jpeg_err_codes,
3573 .err_code_count = ARRAY_SIZE(jpeg_err_codes),
3574 .get_err_count = mca_smu_mca_get_err_count,
3575 .bank_is_valid = mca_smu_bank_is_valid,
3576 },
3577 };
3578
mca_get_mca_ras_info(struct amdgpu_device * adev,enum amdgpu_ras_block blkid)3579 static const struct mca_ras_info *mca_get_mca_ras_info(struct amdgpu_device *adev, enum amdgpu_ras_block blkid)
3580 {
3581 int i;
3582
3583 for (i = 0; i < ARRAY_SIZE(mca_ras_table); i++) {
3584 if (mca_ras_table[i].blkid == blkid)
3585 return &mca_ras_table[i];
3586 }
3587
3588 return NULL;
3589 }
3590
mca_get_valid_mca_count(struct amdgpu_device * adev,enum amdgpu_mca_error_type type,uint32_t * count)3591 static int mca_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count)
3592 {
3593 struct smu_context *smu = adev->powerplay.pp_handle;
3594 int ret;
3595
3596 switch (type) {
3597 case AMDGPU_MCA_ERROR_TYPE_UE:
3598 case AMDGPU_MCA_ERROR_TYPE_CE:
3599 ret = smu_v13_0_6_get_valid_mca_count(smu, type, count);
3600 break;
3601 default:
3602 ret = -EINVAL;
3603 break;
3604 }
3605
3606 return ret;
3607 }
3608
mca_bank_is_valid(struct amdgpu_device * adev,const struct mca_ras_info * mca_ras,enum amdgpu_mca_error_type type,struct mca_bank_entry * entry)3609 static bool mca_bank_is_valid(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras,
3610 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry)
3611 {
3612 if (mca_decode_ipid_to_hwip(entry->regs[MCA_REG_IDX_IPID]) != mca_ras->ip)
3613 return false;
3614
3615 if (mca_ras->bank_is_valid)
3616 return mca_ras->bank_is_valid(mca_ras, adev, type, entry);
3617
3618 return true;
3619 }
3620
mca_smu_parse_mca_error_count(struct amdgpu_device * adev,enum amdgpu_ras_block blk,enum amdgpu_mca_error_type type,struct mca_bank_entry * entry,uint32_t * count)3621 static int mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
3622 struct mca_bank_entry *entry, uint32_t *count)
3623 {
3624 const struct mca_ras_info *mca_ras;
3625
3626 if (!entry || !count)
3627 return -EINVAL;
3628
3629 mca_ras = mca_get_mca_ras_info(adev, blk);
3630 if (!mca_ras)
3631 return -EOPNOTSUPP;
3632
3633 if (!mca_bank_is_valid(adev, mca_ras, type, entry)) {
3634 *count = 0;
3635 return 0;
3636 }
3637
3638 return mca_ras->get_err_count(mca_ras, adev, type, entry, count);
3639 }
3640
mca_smu_get_mca_entry(struct amdgpu_device * adev,enum amdgpu_mca_error_type type,int idx,struct mca_bank_entry * entry)3641 static int mca_smu_get_mca_entry(struct amdgpu_device *adev,
3642 enum amdgpu_mca_error_type type, int idx, struct mca_bank_entry *entry)
3643 {
3644 return mca_get_mca_entry(adev, type, idx, entry);
3645 }
3646
mca_smu_get_valid_mca_count(struct amdgpu_device * adev,enum amdgpu_mca_error_type type,uint32_t * count)3647 static int mca_smu_get_valid_mca_count(struct amdgpu_device *adev,
3648 enum amdgpu_mca_error_type type, uint32_t *count)
3649 {
3650 return mca_get_valid_mca_count(adev, type, count);
3651 }
3652
3653 static const struct amdgpu_mca_smu_funcs smu_v13_0_6_mca_smu_funcs = {
3654 .max_ue_count = 12,
3655 .max_ce_count = 12,
3656 .mca_set_debug_mode = mca_smu_set_debug_mode,
3657 .mca_parse_mca_error_count = mca_smu_parse_mca_error_count,
3658 .mca_get_mca_entry = mca_smu_get_mca_entry,
3659 .mca_get_valid_mca_count = mca_smu_get_valid_mca_count,
3660 };
3661
aca_smu_set_debug_mode(struct amdgpu_device * adev,bool enable)3662 static int aca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
3663 {
3664 struct smu_context *smu = adev->powerplay.pp_handle;
3665
3666 return smu_v13_0_6_mca_set_debug_mode(smu, enable);
3667 }
3668
smu_v13_0_6_get_valid_aca_count(struct smu_context * smu,enum aca_smu_type type,u32 * count)3669 static int smu_v13_0_6_get_valid_aca_count(struct smu_context *smu, enum aca_smu_type type, u32 *count)
3670 {
3671 uint32_t msg;
3672 int ret;
3673
3674 if (!count)
3675 return -EINVAL;
3676
3677 switch (type) {
3678 case ACA_SMU_TYPE_UE:
3679 msg = SMU_MSG_QueryValidMcaCount;
3680 break;
3681 case ACA_SMU_TYPE_CE:
3682 msg = SMU_MSG_QueryValidMcaCeCount;
3683 break;
3684 default:
3685 return -EINVAL;
3686 }
3687
3688 ret = smu_cmn_send_smc_msg(smu, msg, count);
3689 if (ret) {
3690 *count = 0;
3691 return ret;
3692 }
3693
3694 return 0;
3695 }
3696
aca_smu_get_valid_aca_count(struct amdgpu_device * adev,enum aca_smu_type type,u32 * count)3697 static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev,
3698 enum aca_smu_type type, u32 *count)
3699 {
3700 struct smu_context *smu = adev->powerplay.pp_handle;
3701 int ret;
3702
3703 switch (type) {
3704 case ACA_SMU_TYPE_UE:
3705 case ACA_SMU_TYPE_CE:
3706 ret = smu_v13_0_6_get_valid_aca_count(smu, type, count);
3707 break;
3708 default:
3709 ret = -EINVAL;
3710 break;
3711 }
3712
3713 return ret;
3714 }
3715
__smu_v13_0_6_aca_bank_dump(struct smu_context * smu,enum aca_smu_type type,int idx,int offset,u32 * val)3716 static int __smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_smu_type type,
3717 int idx, int offset, u32 *val)
3718 {
3719 uint32_t msg, param;
3720
3721 switch (type) {
3722 case ACA_SMU_TYPE_UE:
3723 msg = SMU_MSG_McaBankDumpDW;
3724 break;
3725 case ACA_SMU_TYPE_CE:
3726 msg = SMU_MSG_McaBankCeDumpDW;
3727 break;
3728 default:
3729 return -EINVAL;
3730 }
3731
3732 param = ((idx & 0xffff) << 16) | (offset & 0xfffc);
3733
3734 return smu_cmn_send_smc_msg_with_param(smu, msg, param, (uint32_t *)val);
3735 }
3736
smu_v13_0_6_aca_bank_dump(struct smu_context * smu,enum aca_smu_type type,int idx,int offset,u32 * val,int count)3737 static int smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_smu_type type,
3738 int idx, int offset, u32 *val, int count)
3739 {
3740 int ret, i;
3741
3742 if (!val)
3743 return -EINVAL;
3744
3745 for (i = 0; i < count; i++) {
3746 ret = __smu_v13_0_6_aca_bank_dump(smu, type, idx, offset + (i << 2), &val[i]);
3747 if (ret)
3748 return ret;
3749 }
3750
3751 return 0;
3752 }
3753
aca_bank_read_reg(struct amdgpu_device * adev,enum aca_smu_type type,int idx,int reg_idx,u64 * val)3754 static int aca_bank_read_reg(struct amdgpu_device *adev, enum aca_smu_type type,
3755 int idx, int reg_idx, u64 *val)
3756 {
3757 struct smu_context *smu = adev->powerplay.pp_handle;
3758 u32 data[2] = {0, 0};
3759 int ret;
3760
3761 if (!val || reg_idx >= ACA_REG_IDX_COUNT)
3762 return -EINVAL;
3763
3764 ret = smu_v13_0_6_aca_bank_dump(smu, type, idx, reg_idx * 8, data, ARRAY_SIZE(data));
3765 if (ret)
3766 return ret;
3767
3768 *val = (u64)data[1] << 32 | data[0];
3769
3770 dev_dbg(adev->dev, "mca read bank reg: type:%s, index: %d, reg_idx: %d, val: 0x%016llx\n",
3771 type == ACA_SMU_TYPE_UE ? "UE" : "CE", idx, reg_idx, *val);
3772
3773 return 0;
3774 }
3775
aca_smu_get_valid_aca_bank(struct amdgpu_device * adev,enum aca_smu_type type,int idx,struct aca_bank * bank)3776 static int aca_smu_get_valid_aca_bank(struct amdgpu_device *adev,
3777 enum aca_smu_type type, int idx, struct aca_bank *bank)
3778 {
3779 int i, ret, count;
3780
3781 count = min_t(int, 16, ARRAY_SIZE(bank->regs));
3782 for (i = 0; i < count; i++) {
3783 ret = aca_bank_read_reg(adev, type, idx, i, &bank->regs[i]);
3784 if (ret)
3785 return ret;
3786 }
3787
3788 return 0;
3789 }
3790
aca_smu_parse_error_code(struct amdgpu_device * adev,struct aca_bank * bank)3791 static int aca_smu_parse_error_code(struct amdgpu_device *adev, struct aca_bank *bank)
3792 {
3793 struct smu_context *smu = adev->powerplay.pp_handle;
3794 int error_code;
3795
3796 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(ACA_SYND)))
3797 error_code = ACA_REG__SYND__ERRORINFORMATION(bank->regs[ACA_REG_IDX_SYND]);
3798 else
3799 error_code = ACA_REG__STATUS__ERRORCODE(bank->regs[ACA_REG_IDX_STATUS]);
3800
3801 return error_code & 0xff;
3802 }
3803
3804 static const struct aca_smu_funcs smu_v13_0_6_aca_smu_funcs = {
3805 .max_ue_bank_count = 12,
3806 .max_ce_bank_count = 12,
3807 .set_debug_mode = aca_smu_set_debug_mode,
3808 .get_valid_aca_count = aca_smu_get_valid_aca_count,
3809 .get_valid_aca_bank = aca_smu_get_valid_aca_bank,
3810 .parse_error_code = aca_smu_parse_error_code,
3811 };
3812
smu_v13_0_6_set_temp_funcs(struct smu_context * smu)3813 static void smu_v13_0_6_set_temp_funcs(struct smu_context *smu)
3814 {
3815 smu->smu_temp.temp_funcs = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)
3816 == IP_VERSION(13, 0, 12)) ? &smu_v13_0_12_temp_funcs : NULL;
3817 }
3818
smu_v13_0_6_get_ras_smu_drv(struct smu_context * smu,const struct ras_smu_drv ** ras_smu_drv)3819 static int smu_v13_0_6_get_ras_smu_drv(struct smu_context *smu, const struct ras_smu_drv **ras_smu_drv)
3820 {
3821 if (!ras_smu_drv)
3822 return -EINVAL;
3823
3824 if (amdgpu_sriov_vf(smu->adev))
3825 return -EOPNOTSUPP;
3826
3827 if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_HROM_EN_BIT))
3828 smu_v13_0_6_cap_set(smu, SMU_CAP(RAS_EEPROM));
3829
3830 switch (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)) {
3831 case IP_VERSION(13, 0, 12):
3832 *ras_smu_drv = &smu_v13_0_12_ras_smu_drv;
3833 break;
3834 default:
3835 *ras_smu_drv = NULL;
3836 break;
3837 }
3838
3839 return 0;
3840 }
3841
3842 static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
3843 /* init dpm */
3844 .init_allowed_features = smu_v13_0_6_init_allowed_features,
3845 /* dpm/clk tables */
3846 .set_default_dpm_table = smu_v13_0_6_set_default_dpm_table,
3847 .populate_umd_state_clk = smu_v13_0_6_populate_umd_state_clk,
3848 .emit_clk_levels = smu_v13_0_6_emit_clk_levels,
3849 .force_clk_levels = smu_v13_0_6_force_clk_levels,
3850 .read_sensor = smu_v13_0_6_read_sensor,
3851 .set_performance_level = smu_v13_0_6_set_performance_level,
3852 .get_power_limit = smu_v13_0_6_get_power_limit,
3853 .is_dpm_running = smu_v13_0_6_is_dpm_running,
3854 .get_unique_id = smu_v13_0_6_get_unique_id,
3855 .init_microcode = smu_v13_0_6_init_microcode,
3856 .fini_microcode = smu_v13_0_fini_microcode,
3857 .init_smc_tables = smu_v13_0_6_init_smc_tables,
3858 .fini_smc_tables = smu_v13_0_6_fini_smc_tables,
3859 .init_power = smu_v13_0_init_power,
3860 .fini_power = smu_v13_0_fini_power,
3861 .check_fw_status = smu_v13_0_6_check_fw_status,
3862 /* pptable related */
3863 .check_fw_version = smu_v13_0_6_check_fw_version,
3864 .set_driver_table_location = smu_v13_0_set_driver_table_location,
3865 .set_tool_table_location = smu_v13_0_set_tool_table_location,
3866 .notify_memory_pool_location = smu_v13_0_notify_memory_pool_location,
3867 .system_features_control = smu_v13_0_6_system_features_control,
3868 .get_enabled_mask = smu_v13_0_6_get_enabled_mask,
3869 .feature_is_enabled = smu_cmn_feature_is_enabled,
3870 .set_power_limit = smu_v13_0_6_set_power_limit,
3871 .get_ppt_limit = smu_v13_0_6_get_ppt_limit,
3872 .set_xgmi_pstate = smu_v13_0_set_xgmi_pstate,
3873 .register_irq_handler = smu_v13_0_6_register_irq_handler,
3874 .enable_thermal_alert = smu_v13_0_enable_thermal_alert,
3875 .disable_thermal_alert = smu_v13_0_disable_thermal_alert,
3876 .setup_pptable = smu_v13_0_6_setup_pptable,
3877 .get_bamaco_support = smu_v13_0_6_get_bamaco_support,
3878 .get_dpm_ultimate_freq = smu_v13_0_6_get_dpm_ultimate_freq,
3879 .set_soft_freq_limited_range = smu_v13_0_6_set_soft_freq_limited_range,
3880 .od_edit_dpm_table = smu_v13_0_6_usr_edit_dpm_table,
3881 .log_thermal_throttling_event = smu_v13_0_6_log_thermal_throttling_event,
3882 .get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
3883 .get_gpu_metrics = smu_v13_0_6_get_gpu_metrics,
3884 .get_pm_metrics = smu_v13_0_6_get_pm_metrics,
3885 .get_xcp_metrics = smu_v13_0_6_get_xcp_metrics,
3886 .get_thermal_temperature_range = smu_v13_0_6_get_thermal_temperature_range,
3887 .mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported,
3888 .mode1_reset = smu_v13_0_6_mode1_reset,
3889 .mode2_reset = smu_v13_0_6_mode2_reset,
3890 .link_reset = smu_v13_0_6_link_reset,
3891 .wait_for_event = smu_v13_0_wait_for_event,
3892 .i2c_init = smu_v13_0_6_i2c_control_init,
3893 .i2c_fini = smu_v13_0_6_i2c_control_fini,
3894 .send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num,
3895 .send_rma_reason = smu_v13_0_6_send_rma_reason,
3896 .reset_sdma = smu_v13_0_6_reset_sdma,
3897 .dpm_reset_vcn = smu_v13_0_6_reset_vcn,
3898 .post_init = smu_v13_0_6_post_init,
3899 .ras_send_msg = smu_v13_0_6_ras_send_msg,
3900 .get_ras_smu_drv = smu_v13_0_6_get_ras_smu_drv,
3901 };
3902
smu_v13_0_6_set_ppt_funcs(struct smu_context * smu)3903 void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu)
3904 {
3905 const struct cmn2asic_msg_mapping *message_map;
3906
3907 smu->ppt_funcs = &smu_v13_0_6_ppt_funcs;
3908 message_map = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12)) ?
3909 smu_v13_0_12_message_map : smu_v13_0_6_message_map;
3910 smu->clock_map = smu_v13_0_6_clk_map;
3911 smu->feature_map = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12)) ?
3912 smu_v13_0_12_feature_mask_map : smu_v13_0_6_feature_mask_map;
3913 smu->table_map = smu_v13_0_6_table_map;
3914 smu->smc_driver_if_version = SMU_IGNORE_IF_VERSION;
3915 smu->smc_fw_caps |= SMU_FW_CAP_RAS_PRI;
3916 smu_v13_0_init_msg_ctl(smu, message_map);
3917 smu_v13_0_6_set_temp_funcs(smu);
3918 amdgpu_mca_smu_init_funcs(smu->adev, &smu_v13_0_6_mca_smu_funcs);
3919 amdgpu_aca_set_smu_funcs(smu->adev, &smu_v13_0_6_aca_smu_funcs);
3920 }
3921
3922