1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023 Intel Corporation 4 */ 5 6 #include <linux/hwmon-sysfs.h> 7 #include <linux/hwmon.h> 8 #include <linux/jiffies.h> 9 #include <linux/types.h> 10 #include <linux/units.h> 11 12 #include <drm/drm_managed.h> 13 #include "regs/xe_gt_regs.h" 14 #include "regs/xe_mchbar_regs.h" 15 #include "regs/xe_pcode_regs.h" 16 #include "xe_device.h" 17 #include "xe_hwmon.h" 18 #include "xe_mmio.h" 19 #include "xe_pcode.h" 20 #include "xe_pcode_api.h" 21 #include "xe_sriov.h" 22 #include "xe_pm.h" 23 #include "xe_vsec.h" 24 #include "regs/xe_pmt.h" 25 26 enum xe_hwmon_reg { 27 REG_TEMP, 28 REG_PKG_RAPL_LIMIT, 29 REG_PKG_POWER_SKU, 30 REG_PKG_POWER_SKU_UNIT, 31 REG_GT_PERF_STATUS, 32 REG_PKG_ENERGY_STATUS, 33 REG_FAN_SPEED, 34 }; 35 36 enum xe_hwmon_reg_operation { 37 REG_READ32, 38 REG_RMW32, 39 REG_READ64, 40 }; 41 42 #define MAX_VRAM_CHANNELS (16) 43 44 enum xe_hwmon_channel { 45 CHANNEL_CARD, 46 CHANNEL_PKG, 47 CHANNEL_VRAM, 48 CHANNEL_MCTRL, 49 CHANNEL_PCIE, 50 CHANNEL_VRAM_N, 51 CHANNEL_VRAM_N_MAX = CHANNEL_VRAM_N + MAX_VRAM_CHANNELS, 52 CHANNEL_MAX, 53 }; 54 55 enum xe_fan_channel { 56 FAN_1, 57 FAN_2, 58 FAN_3, 59 FAN_MAX, 60 }; 61 62 enum xe_temp_limit { 63 TEMP_LIMIT_PKG_SHUTDOWN, 64 TEMP_LIMIT_PKG_CRIT, 65 TEMP_LIMIT_MEM_SHUTDOWN, 66 TEMP_LIMIT_PKG_MAX, 67 TEMP_LIMIT_MEM_CRIT, 68 TEMP_LIMIT_MAX 69 }; 70 71 /* Attribute index for powerX_xxx_interval sysfs entries */ 72 enum sensor_attr_power { 73 SENSOR_INDEX_PSYS_PL1, 74 SENSOR_INDEX_PKG_PL1, 75 SENSOR_INDEX_PSYS_PL2, 76 SENSOR_INDEX_PKG_PL2, 77 }; 78 79 /* 80 * For platforms that support mailbox commands for power limits, REG_PKG_POWER_SKU_UNIT is 81 * not supported and below are SKU units to be used. 82 */ 83 #define PWR_UNIT 0x3 84 #define ENERGY_UNIT 0xe 85 #define TIME_UNIT 0xa 86 87 /* 88 * SF_* - scale factors for particular quantities according to hwmon spec. 89 */ 90 #define SF_POWER 1000000 /* microwatts */ 91 #define SF_CURR 1000 /* milliamperes */ 92 #define SF_VOLTAGE 1000 /* millivolts */ 93 #define SF_ENERGY 1000000 /* microjoules */ 94 #define SF_TIME 1000 /* milliseconds */ 95 96 /* 97 * PL*_HWMON_ATTR - mapping of hardware power limits to corresponding hwmon power attribute. 98 */ 99 #define PL1_HWMON_ATTR hwmon_power_max 100 #define PL2_HWMON_ATTR hwmon_power_cap 101 102 #define PWR_ATTR_TO_STR(attr) (((attr) == hwmon_power_max) ? "PL1" : "PL2") 103 104 /* 105 * Timeout for power limit write mailbox command. 106 */ 107 #define PL_WRITE_MBX_TIMEOUT_MS (1) 108 109 /* Index of memory controller in READ_THERMAL_DATA output */ 110 #define TEMP_INDEX_MCTRL 2 111 112 /* Maximum characters in hwmon label name */ 113 #define MAX_LABEL_SIZE 16 114 115 /** 116 * struct xe_hwmon_energy_info - to accumulate energy 117 */ 118 struct xe_hwmon_energy_info { 119 /** @reg_val_prev: previous energy reg val */ 120 u32 reg_val_prev; 121 /** @accum_energy: accumulated energy */ 122 long accum_energy; 123 }; 124 125 /** 126 * struct xe_hwmon_fan_info - to cache previous fan reading 127 */ 128 struct xe_hwmon_fan_info { 129 /** @reg_val_prev: previous fan reg val */ 130 u32 reg_val_prev; 131 /** @time_prev: previous timestamp */ 132 u64 time_prev; 133 }; 134 135 /** 136 * struct xe_hwmon_thermal_info - to store temperature data 137 */ 138 struct xe_hwmon_thermal_info { 139 union { 140 /** @limit: temperatures limits */ 141 u8 limit[TEMP_LIMIT_MAX]; 142 /** @data: temperature limits in dwords */ 143 u32 data[DIV_ROUND_UP(TEMP_LIMIT_MAX, sizeof(u32))]; 144 }; 145 /** @count: no of temperature sensors available for the platform */ 146 u8 count; 147 /** @value: signed value from each sensor */ 148 s8 value[U8_MAX]; 149 /** @vram_label: vram label names */ 150 char vram_label[MAX_VRAM_CHANNELS][MAX_LABEL_SIZE]; 151 }; 152 153 /** 154 * struct xe_hwmon - xe hwmon data structure 155 */ 156 struct xe_hwmon { 157 /** @hwmon_dev: hwmon device for xe */ 158 struct device *hwmon_dev; 159 /** @xe: Xe device */ 160 struct xe_device *xe; 161 /** @hwmon_lock: lock for rw attributes*/ 162 struct mutex hwmon_lock; 163 /** @scl_shift_power: pkg power unit */ 164 int scl_shift_power; 165 /** @scl_shift_energy: pkg energy unit */ 166 int scl_shift_energy; 167 /** @scl_shift_time: pkg time unit */ 168 int scl_shift_time; 169 /** @ei: Energy info for energyN_input */ 170 struct xe_hwmon_energy_info ei[CHANNEL_MAX]; 171 /** @fi: Fan info for fanN_input */ 172 struct xe_hwmon_fan_info fi[FAN_MAX]; 173 /** @boot_power_limit_read: is boot power limits read */ 174 bool boot_power_limit_read; 175 /** @pl1_on_boot: power limit PL1 on boot */ 176 u32 pl1_on_boot[CHANNEL_MAX]; 177 /** @pl2_on_boot: power limit PL2 on boot */ 178 u32 pl2_on_boot[CHANNEL_MAX]; 179 /** @temp: Temperature info */ 180 struct xe_hwmon_thermal_info temp; 181 }; 182 183 static int xe_hwmon_pcode_read_power_limit(const struct xe_hwmon *hwmon, u32 attr, int channel, 184 u32 *uval) 185 { 186 struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); 187 u32 val0 = 0, val1 = 0; 188 int ret = 0; 189 190 ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, 191 (channel == CHANNEL_CARD) ? 192 READ_PSYSGPU_POWER_LIMIT : 193 READ_PACKAGE_POWER_LIMIT, 194 hwmon->boot_power_limit_read ? 195 READ_PL_FROM_PCODE : READ_PL_FROM_FW), 196 &val0, &val1); 197 198 if (ret) { 199 drm_dbg(&hwmon->xe->drm, "read failed ch %d val0 0x%08x, val1 0x%08x, ret %d\n", 200 channel, val0, val1, ret); 201 *uval = 0; 202 return ret; 203 } 204 205 /* return the value only if limit is enabled */ 206 if (attr == PL1_HWMON_ATTR) 207 *uval = (val0 & PWR_LIM_EN) ? val0 : 0; 208 else if (attr == PL2_HWMON_ATTR) 209 *uval = (val1 & PWR_LIM_EN) ? val1 : 0; 210 else if (attr == hwmon_power_label) 211 *uval = (val0 & PWR_LIM_EN) ? 1 : (val1 & PWR_LIM_EN) ? 1 : 0; 212 else 213 *uval = 0; 214 215 return ret; 216 } 217 218 static int xe_hwmon_pcode_rmw_power_limit(const struct xe_hwmon *hwmon, u32 attr, u8 channel, 219 u32 clr, u32 set) 220 { 221 struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); 222 u32 val0 = 0, val1 = 0; 223 int ret = 0; 224 225 ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, 226 (channel == CHANNEL_CARD) ? 227 READ_PSYSGPU_POWER_LIMIT : 228 READ_PACKAGE_POWER_LIMIT, 229 hwmon->boot_power_limit_read ? 230 READ_PL_FROM_PCODE : READ_PL_FROM_FW), 231 &val0, &val1); 232 233 if (ret) 234 drm_dbg(&hwmon->xe->drm, "read failed ch %d val0 0x%08x, val1 0x%08x, ret %d\n", 235 channel, val0, val1, ret); 236 237 if (attr == PL1_HWMON_ATTR) 238 val0 = (val0 & ~clr) | set; 239 else if (attr == PL2_HWMON_ATTR) 240 val1 = (val1 & ~clr) | set; 241 else 242 return -EIO; 243 244 ret = xe_pcode_write64_timeout(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, 245 (channel == CHANNEL_CARD) ? 246 WRITE_PSYSGPU_POWER_LIMIT : 247 WRITE_PACKAGE_POWER_LIMIT, 0), 248 val0, val1, PL_WRITE_MBX_TIMEOUT_MS); 249 if (ret) 250 drm_dbg(&hwmon->xe->drm, "write failed ch %d val0 0x%08x, val1 0x%08x, ret %d\n", 251 channel, val0, val1, ret); 252 return ret; 253 } 254 255 static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, 256 int channel) 257 { 258 struct xe_device *xe = hwmon->xe; 259 260 switch (hwmon_reg) { 261 case REG_TEMP: 262 if (xe->info.platform == XE_BATTLEMAGE) { 263 if (channel == CHANNEL_PKG) 264 return BMG_PACKAGE_TEMPERATURE; 265 else if (channel == CHANNEL_VRAM) 266 return BMG_VRAM_TEMPERATURE; 267 else if (in_range(channel, CHANNEL_VRAM_N, CHANNEL_VRAM_N_MAX)) 268 return BMG_VRAM_TEMPERATURE_N(channel - CHANNEL_VRAM_N); 269 } else if (xe->info.platform == XE_DG2) { 270 if (channel == CHANNEL_PKG) 271 return PCU_CR_PACKAGE_TEMPERATURE; 272 else if (channel == CHANNEL_VRAM) 273 return BMG_VRAM_TEMPERATURE; 274 } 275 break; 276 case REG_PKG_RAPL_LIMIT: 277 if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) 278 return PVC_GT0_PACKAGE_RAPL_LIMIT; 279 else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) 280 return PCU_CR_PACKAGE_RAPL_LIMIT; 281 break; 282 case REG_PKG_POWER_SKU: 283 if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) 284 return PVC_GT0_PACKAGE_POWER_SKU; 285 else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) 286 return PCU_CR_PACKAGE_POWER_SKU; 287 break; 288 case REG_PKG_POWER_SKU_UNIT: 289 if (xe->info.platform == XE_PVC) 290 return PVC_GT0_PACKAGE_POWER_SKU_UNIT; 291 else if (xe->info.platform == XE_DG2) 292 return PCU_CR_PACKAGE_POWER_SKU_UNIT; 293 break; 294 case REG_GT_PERF_STATUS: 295 if (xe->info.platform == XE_DG2 && channel == CHANNEL_PKG) 296 return GT_PERF_STATUS; 297 break; 298 case REG_PKG_ENERGY_STATUS: 299 if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) { 300 return PVC_GT0_PLATFORM_ENERGY_STATUS; 301 } else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) { 302 return PCU_CR_PACKAGE_ENERGY_STATUS; 303 } 304 break; 305 case REG_FAN_SPEED: 306 if (channel == FAN_1) 307 return BMG_FAN_1_SPEED; 308 else if (channel == FAN_2) 309 return BMG_FAN_2_SPEED; 310 else if (channel == FAN_3) 311 return BMG_FAN_3_SPEED; 312 break; 313 default: 314 drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg); 315 break; 316 } 317 318 return XE_REG(0); 319 } 320 321 #define PL_DISABLE 0 322 323 /* 324 * HW allows arbitrary PL1 limits to be set but silently clamps these values to 325 * "typical but not guaranteed" min/max values in REG_PKG_POWER_SKU. Follow the 326 * same pattern for sysfs, allow arbitrary PL1 limits to be set but display 327 * clamped values when read. 328 */ 329 static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *value) 330 { 331 u32 reg_val = 0; 332 struct xe_device *xe = hwmon->xe; 333 struct xe_reg rapl_limit, pkg_power_sku; 334 struct xe_mmio *mmio = xe_root_tile_mmio(xe); 335 336 mutex_lock(&hwmon->hwmon_lock); 337 338 if (hwmon->xe->info.has_mbx_power_limits) { 339 xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, ®_val); 340 } else { 341 rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); 342 pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); 343 reg_val = xe_mmio_read32(mmio, rapl_limit); 344 } 345 346 /* Check if PL limits are disabled. */ 347 if (!(reg_val & PWR_LIM_EN)) { 348 *value = PL_DISABLE; 349 drm_info(&hwmon->xe->drm, "%s disabled for channel %d, val 0x%08x\n", 350 PWR_ATTR_TO_STR(attr), channel, reg_val); 351 goto unlock; 352 } 353 354 reg_val = REG_FIELD_GET(PWR_LIM_VAL, reg_val); 355 *value = mul_u32_u32(reg_val, SF_POWER) >> hwmon->scl_shift_power; 356 357 /* For platforms with mailbox power limit support clamping would be done by pcode. */ 358 if (!hwmon->xe->info.has_mbx_power_limits) { 359 u64 pkg_pwr, min, max; 360 361 pkg_pwr = xe_mmio_read64_2x32(mmio, pkg_power_sku); 362 min = REG_FIELD_GET(PKG_MIN_PWR, pkg_pwr); 363 max = REG_FIELD_GET(PKG_MAX_PWR, pkg_pwr); 364 min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); 365 max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); 366 if (min && max) 367 *value = clamp_t(u64, *value, min, max); 368 } 369 unlock: 370 mutex_unlock(&hwmon->hwmon_lock); 371 } 372 373 static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channel, long value) 374 { 375 struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); 376 int ret = 0; 377 u32 reg_val, max; 378 struct xe_reg rapl_limit; 379 u64 max_supp_power_limit = 0; 380 381 mutex_lock(&hwmon->hwmon_lock); 382 383 rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); 384 385 /* Disable Power Limit and verify, as limit cannot be disabled on all platforms. */ 386 if (value == PL_DISABLE) { 387 if (hwmon->xe->info.has_mbx_power_limits) { 388 drm_dbg(&hwmon->xe->drm, "disabling %s on channel %d\n", 389 PWR_ATTR_TO_STR(attr), channel); 390 xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM_EN, 0); 391 xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, ®_val); 392 } else { 393 reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN, 0); 394 reg_val = xe_mmio_read32(mmio, rapl_limit); 395 } 396 397 if (reg_val & PWR_LIM_EN) { 398 drm_warn(&hwmon->xe->drm, "Power limit disable is not supported!\n"); 399 ret = -EOPNOTSUPP; 400 } 401 goto unlock; 402 } 403 404 /* 405 * If the sysfs value exceeds the maximum pcode supported power limit value, clamp it to 406 * the supported maximum (U12.3 format). 407 * This is to avoid truncation during reg_val calculation below and ensure the valid 408 * power limit is sent for pcode which would clamp it to card-supported value. 409 */ 410 max_supp_power_limit = ((PWR_LIM_VAL) >> hwmon->scl_shift_power) * SF_POWER; 411 if (value > max_supp_power_limit) { 412 value = max_supp_power_limit; 413 drm_info(&hwmon->xe->drm, 414 "Power limit clamped as selected %s exceeds channel %d limit\n", 415 PWR_ATTR_TO_STR(attr), channel); 416 } 417 418 /* Computation in 64-bits to avoid overflow. Round to nearest. */ 419 reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); 420 421 /* 422 * Clamp power limit to GPU firmware default as maximum, as an additional protection to 423 * pcode clamp. 424 */ 425 if (hwmon->xe->info.has_mbx_power_limits) { 426 max = (attr == PL1_HWMON_ATTR) ? 427 hwmon->pl1_on_boot[channel] : hwmon->pl2_on_boot[channel]; 428 max = REG_FIELD_PREP(PWR_LIM_VAL, max); 429 if (reg_val > max) { 430 reg_val = max; 431 drm_dbg(&hwmon->xe->drm, 432 "Clamping power limit to GPU firmware default 0x%x\n", 433 reg_val); 434 } 435 } 436 437 reg_val = PWR_LIM_EN | REG_FIELD_PREP(PWR_LIM_VAL, reg_val); 438 439 if (hwmon->xe->info.has_mbx_power_limits) 440 ret = xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM, reg_val); 441 else 442 reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM, reg_val); 443 unlock: 444 mutex_unlock(&hwmon->hwmon_lock); 445 return ret; 446 } 447 448 static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, u32 attr, int channel, 449 long *value) 450 { 451 struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); 452 u32 reg_val; 453 454 if (hwmon->xe->info.has_mbx_power_limits) { 455 /* PL1 is rated max if supported. */ 456 xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, channel, ®_val); 457 } else { 458 /* 459 * This sysfs file won't be visible if REG_PKG_POWER_SKU is invalid, so valid check 460 * for this register can be skipped. 461 * See xe_hwmon_power_is_visible. 462 */ 463 struct xe_reg reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); 464 465 reg_val = xe_mmio_read32(mmio, reg); 466 } 467 468 reg_val = REG_FIELD_GET(PKG_TDP, reg_val); 469 *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); 470 } 471 472 /* 473 * xe_hwmon_energy_get - Obtain energy value 474 * 475 * The underlying energy hardware register is 32-bits and is subject to 476 * overflow. How long before overflow? For example, with an example 477 * scaling bit shift of 14 bits (see register *PACKAGE_POWER_SKU_UNIT) and 478 * a power draw of 1000 watts, the 32-bit counter will overflow in 479 * approximately 4.36 minutes. 480 * 481 * Examples: 482 * 1 watt: (2^32 >> 14) / 1 W / (60 * 60 * 24) secs/day -> 3 days 483 * 1000 watts: (2^32 >> 14) / 1000 W / 60 secs/min -> 4.36 minutes 484 * 485 * The function significantly increases overflow duration (from 4.36 486 * minutes) by accumulating the energy register into a 'long' as allowed by 487 * the hwmon API. Using x86_64 128 bit arithmetic (see mul_u64_u32_shr()), 488 * a 'long' of 63 bits, SF_ENERGY of 1e6 (~20 bits) and 489 * hwmon->scl_shift_energy of 14 bits we have 57 (63 - 20 + 14) bits before 490 * energyN_input overflows. This at 1000 W is an overflow duration of 278 years. 491 */ 492 static void 493 xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy) 494 { 495 struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); 496 struct xe_hwmon_energy_info *ei = &hwmon->ei[channel]; 497 u32 reg_val; 498 int ret = 0; 499 500 /* Energy is supported only for card and pkg */ 501 if (channel > CHANNEL_PKG) { 502 *energy = 0; 503 return; 504 } 505 506 if (hwmon->xe->info.platform == XE_BATTLEMAGE) { 507 u64 pmt_val; 508 509 ret = xe_pmt_telem_read(to_pci_dev(hwmon->xe->drm.dev), 510 xe_mmio_read32(mmio, PUNIT_TELEMETRY_GUID), 511 &pmt_val, BMG_ENERGY_STATUS_PMT_OFFSET, sizeof(pmt_val)); 512 if (ret != sizeof(pmt_val)) { 513 drm_warn(&hwmon->xe->drm, "energy read from pmt failed, ret %d\n", ret); 514 *energy = 0; 515 return; 516 } 517 518 if (channel == CHANNEL_PKG) 519 reg_val = REG_FIELD_GET64(ENERGY_PKG, pmt_val); 520 else 521 reg_val = REG_FIELD_GET64(ENERGY_CARD, pmt_val); 522 } else { 523 reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, 524 channel)); 525 } 526 527 ei->accum_energy += reg_val - ei->reg_val_prev; 528 ei->reg_val_prev = reg_val; 529 530 *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY, 531 hwmon->scl_shift_energy); 532 } 533 534 static ssize_t 535 xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *attr, 536 char *buf) 537 { 538 struct xe_hwmon *hwmon = dev_get_drvdata(dev); 539 struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); 540 u32 reg_val, x, y, x_w = 2; /* 2 bits */ 541 u64 tau4, out; 542 int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD; 543 u32 power_attr = (to_sensor_dev_attr(attr)->index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; 544 545 int ret = 0; 546 547 guard(xe_pm_runtime)(hwmon->xe); 548 549 mutex_lock(&hwmon->hwmon_lock); 550 551 if (hwmon->xe->info.has_mbx_power_limits) { 552 ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, ®_val); 553 if (ret) { 554 drm_err(&hwmon->xe->drm, 555 "power interval read fail, ch %d, attr %d, val 0x%08x, ret %d\n", 556 channel, power_attr, reg_val, ret); 557 reg_val = 0; 558 } 559 } else { 560 reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, 561 channel)); 562 } 563 564 mutex_unlock(&hwmon->hwmon_lock); 565 566 x = REG_FIELD_GET(PWR_LIM_TIME_X, reg_val); 567 y = REG_FIELD_GET(PWR_LIM_TIME_Y, reg_val); 568 569 /* 570 * tau = (1 + (x / 4)) * power(2,y), x = bits(23:22), y = bits(21:17) 571 * = (4 | x) << (y - 2) 572 * 573 * Here (y - 2) ensures a 1.x fixed point representation of 1.x 574 * As x is 2 bits so 1.x can be 1.0, 1.25, 1.50, 1.75 575 * 576 * As y can be < 2, we compute tau4 = (4 | x) << y 577 * and then add 2 when doing the final right shift to account for units 578 */ 579 tau4 = (u64)((1 << x_w) | x) << y; 580 581 /* val in hwmon interface units (millisec) */ 582 out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); 583 584 return sysfs_emit(buf, "%llu\n", out); 585 } 586 587 static ssize_t 588 xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *attr, 589 const char *buf, size_t count) 590 { 591 struct xe_hwmon *hwmon = dev_get_drvdata(dev); 592 struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); 593 u32 x, y, rxy, x_w = 2; /* 2 bits */ 594 u64 tau4, r, max_win; 595 unsigned long val; 596 int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD; 597 u32 power_attr = (to_sensor_dev_attr(attr)->index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; 598 int ret; 599 600 ret = kstrtoul(buf, 0, &val); 601 if (ret) 602 return ret; 603 604 /* 605 * Max HW supported tau in '(1 + (x / 4)) * power(2,y)' format, x = 0, y = 0x12. 606 * The hwmon->scl_shift_time default of 0xa results in a max tau of 256 seconds. 607 * 608 * The ideal scenario is for PKG_MAX_WIN to be read from the PKG_PWR_SKU register. 609 * However, it is observed that existing discrete GPUs does not provide correct 610 * PKG_MAX_WIN value, therefore a using default constant value. For future discrete GPUs 611 * this may get resolved, in which case PKG_MAX_WIN should be obtained from PKG_PWR_SKU. 612 */ 613 #define PKG_MAX_WIN_DEFAULT 0x12ull 614 615 /* 616 * val must be < max in hwmon interface units. The steps below are 617 * explained in xe_hwmon_power_max_interval_show() 618 */ 619 r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT); 620 x = REG_FIELD_GET(PKG_MAX_WIN_X, r); 621 y = REG_FIELD_GET(PKG_MAX_WIN_Y, r); 622 tau4 = (u64)((1 << x_w) | x) << y; 623 max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); 624 625 if (val > max_win) 626 return -EINVAL; 627 628 /* val in hw units */ 629 val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME) + 1; 630 631 /* 632 * Convert val to 1.x * power(2,y) 633 * y = ilog2(val) 634 * x = (val - (1 << y)) >> (y - 2) 635 */ 636 if (!val) { 637 y = 0; 638 x = 0; 639 } else { 640 y = ilog2(val); 641 x = (val - (1ul << y)) << x_w >> y; 642 } 643 644 rxy = REG_FIELD_PREP(PWR_LIM_TIME_X, x) | 645 REG_FIELD_PREP(PWR_LIM_TIME_Y, y); 646 647 guard(xe_pm_runtime)(hwmon->xe); 648 649 mutex_lock(&hwmon->hwmon_lock); 650 651 if (hwmon->xe->info.has_mbx_power_limits) 652 xe_hwmon_pcode_rmw_power_limit(hwmon, power_attr, channel, PWR_LIM_TIME, rxy); 653 else 654 r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel), 655 PWR_LIM_TIME, rxy); 656 657 mutex_unlock(&hwmon->hwmon_lock); 658 659 return count; 660 } 661 662 /* PSYS PL1 */ 663 static SENSOR_DEVICE_ATTR(power1_max_interval, 0664, 664 xe_hwmon_power_max_interval_show, 665 xe_hwmon_power_max_interval_store, SENSOR_INDEX_PSYS_PL1); 666 /* PKG PL1 */ 667 static SENSOR_DEVICE_ATTR(power2_max_interval, 0664, 668 xe_hwmon_power_max_interval_show, 669 xe_hwmon_power_max_interval_store, SENSOR_INDEX_PKG_PL1); 670 /* PSYS PL2 */ 671 static SENSOR_DEVICE_ATTR(power1_cap_interval, 0664, 672 xe_hwmon_power_max_interval_show, 673 xe_hwmon_power_max_interval_store, SENSOR_INDEX_PSYS_PL2); 674 /* PKG PL2 */ 675 static SENSOR_DEVICE_ATTR(power2_cap_interval, 0664, 676 xe_hwmon_power_max_interval_show, 677 xe_hwmon_power_max_interval_store, SENSOR_INDEX_PKG_PL2); 678 679 static struct attribute *hwmon_attributes[] = { 680 &sensor_dev_attr_power1_max_interval.dev_attr.attr, 681 &sensor_dev_attr_power2_max_interval.dev_attr.attr, 682 &sensor_dev_attr_power1_cap_interval.dev_attr.attr, 683 &sensor_dev_attr_power2_cap_interval.dev_attr.attr, 684 NULL 685 }; 686 687 static umode_t xe_hwmon_attributes_visible(struct kobject *kobj, 688 struct attribute *attr, int index) 689 { 690 struct device *dev = kobj_to_dev(kobj); 691 struct xe_hwmon *hwmon = dev_get_drvdata(dev); 692 int ret = 0; 693 int channel = (index % 2) ? CHANNEL_PKG : CHANNEL_CARD; 694 u32 power_attr = (index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; 695 u32 uval = 0; 696 struct xe_reg rapl_limit; 697 struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); 698 699 if (hwmon->xe->info.has_mbx_power_limits) { 700 xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, &uval); 701 } else if (power_attr != PL2_HWMON_ATTR) { 702 rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); 703 if (xe_reg_is_valid(rapl_limit)) 704 uval = xe_mmio_read32(mmio, rapl_limit); 705 } 706 ret = (uval & PWR_LIM_EN) ? attr->mode : 0; 707 708 return ret; 709 } 710 711 static const struct attribute_group hwmon_attrgroup = { 712 .attrs = hwmon_attributes, 713 .is_visible = xe_hwmon_attributes_visible, 714 }; 715 716 static const struct attribute_group *hwmon_groups[] = { 717 &hwmon_attrgroup, 718 NULL 719 }; 720 721 static const struct hwmon_channel_info * const hwmon_info[] = { 722 HWMON_CHANNEL_INFO(temp, 723 HWMON_T_LABEL, 724 HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL | 725 HWMON_T_MAX, 726 HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL, 727 HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL, 728 HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL, 729 HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL, 730 HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL, 731 HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL, 732 HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL, 733 HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL, 734 HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL, 735 HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL, 736 HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL, 737 HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL, 738 HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL, 739 HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL, 740 HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL, 741 HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL, 742 HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL, 743 HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL, 744 HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL), 745 HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CRIT | 746 HWMON_P_CAP, 747 HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CAP), 748 HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL), 749 HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL), 750 HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL), 751 HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT, HWMON_F_INPUT, HWMON_F_INPUT), 752 NULL 753 }; 754 755 static int xe_hwmon_pcode_read_thermal_info(struct xe_hwmon *hwmon) 756 { 757 struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); 758 u32 config = 0; 759 int ret; 760 761 ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_THERMAL_INFO, READ_THERMAL_LIMITS, 0), 762 &hwmon->temp.data[0], &hwmon->temp.data[1]); 763 if (ret) 764 return ret; 765 766 drm_dbg(&hwmon->xe->drm, "thermal info read val 0x%x val1 0x%x\n", 767 hwmon->temp.data[0], hwmon->temp.data[1]); 768 769 ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_THERMAL_INFO, READ_THERMAL_CONFIG, 0), 770 &config, NULL); 771 if (ret) 772 return ret; 773 774 drm_dbg(&hwmon->xe->drm, "thermal config count 0x%x\n", config); 775 hwmon->temp.count = REG_FIELD_GET(TEMP_MASK, config); 776 777 return ret; 778 } 779 780 static int get_mc_temp(struct xe_hwmon *hwmon, long *val) 781 { 782 struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); 783 u32 *dword = (u32 *)hwmon->temp.value; 784 s32 average = 0; 785 int ret, i; 786 787 for (i = 0; i < DIV_ROUND_UP(TEMP_LIMIT_MAX, sizeof(u32)); i++) { 788 ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_THERMAL_INFO, READ_THERMAL_DATA, i), 789 (dword + i), NULL); 790 if (ret) 791 return ret; 792 drm_dbg(&hwmon->xe->drm, "thermal data for group %d val 0x%x\n", i, dword[i]); 793 } 794 795 for (i = TEMP_INDEX_MCTRL; i < hwmon->temp.count - 1; i++) 796 average += hwmon->temp.value[i]; 797 798 average /= (hwmon->temp.count - TEMP_INDEX_MCTRL - 1); 799 *val = average * MILLIDEGREE_PER_DEGREE; 800 return 0; 801 } 802 803 static int get_pcie_temp(struct xe_hwmon *hwmon, long *val) 804 { 805 struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); 806 u32 data = 0; 807 int ret; 808 809 ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_THERMAL_INFO, READ_THERMAL_DATA, 810 PCIE_SENSOR_GROUP_ID), &data, NULL); 811 if (ret) 812 return ret; 813 814 /* Sensor offset is different for G21 */ 815 if (hwmon->xe->info.subplatform != XE_SUBPLATFORM_BATTLEMAGE_G21) 816 data = REG_FIELD_GET(PCIE_SENSOR_MASK, data); 817 818 data = REG_FIELD_GET(TEMP_MASK, data); 819 *val = (s8)data * MILLIDEGREE_PER_DEGREE; 820 821 return 0; 822 } 823 824 /* I1 is exposed as power_crit or as curr_crit depending on bit 31 */ 825 static int xe_hwmon_pcode_read_i1(const struct xe_hwmon *hwmon, u32 *uval) 826 { 827 struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); 828 829 /* Avoid Illegal Subcommand error */ 830 if (hwmon->xe->info.platform == XE_DG2) 831 return -ENXIO; 832 833 return xe_pcode_read(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, 834 POWER_SETUP_SUBCOMMAND_READ_I1, 0), 835 uval, NULL); 836 } 837 838 static int xe_hwmon_pcode_write_i1(const struct xe_hwmon *hwmon, u32 uval) 839 { 840 struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); 841 842 return xe_pcode_write(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, 843 POWER_SETUP_SUBCOMMAND_WRITE_I1, 0), 844 (uval & POWER_SETUP_I1_DATA_MASK)); 845 } 846 847 static int xe_hwmon_pcode_read_fan_control(const struct xe_hwmon *hwmon, u32 subcmd, u32 *uval) 848 { 849 struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); 850 851 /* Platforms that don't return correct value */ 852 if (hwmon->xe->info.platform == XE_DG2 && subcmd == FSC_READ_NUM_FANS) { 853 *uval = 2; 854 return 0; 855 } 856 857 return xe_pcode_read(root_tile, PCODE_MBOX(FAN_SPEED_CONTROL, subcmd, 0), uval, NULL); 858 } 859 860 static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel, 861 long *value, u32 scale_factor) 862 { 863 int ret; 864 u32 uval = 0; 865 866 mutex_lock(&hwmon->hwmon_lock); 867 868 ret = xe_hwmon_pcode_read_i1(hwmon, &uval); 869 if (ret) 870 goto unlock; 871 872 *value = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval), 873 scale_factor, POWER_SETUP_I1_SHIFT); 874 unlock: 875 mutex_unlock(&hwmon->hwmon_lock); 876 return ret; 877 } 878 879 static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel, 880 long value, u32 scale_factor) 881 { 882 int ret; 883 u32 uval; 884 u64 max_crit_power_curr = 0; 885 886 mutex_lock(&hwmon->hwmon_lock); 887 888 /* 889 * If the sysfs value exceeds the pcode mailbox cmd POWER_SETUP_SUBCOMMAND_WRITE_I1 890 * max supported value, clamp it to the command's max (U10.6 format). 891 * This is to avoid truncation during uval calculation below and ensure the valid power 892 * limit is sent for pcode which would clamp it to card-supported value. 893 */ 894 max_crit_power_curr = (POWER_SETUP_I1_DATA_MASK >> POWER_SETUP_I1_SHIFT) * scale_factor; 895 if (value > max_crit_power_curr) { 896 value = max_crit_power_curr; 897 drm_info(&hwmon->xe->drm, 898 "Power limit clamped as selected exceeds channel %d limit\n", 899 channel); 900 } 901 uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor); 902 ret = xe_hwmon_pcode_write_i1(hwmon, uval); 903 904 mutex_unlock(&hwmon->hwmon_lock); 905 return ret; 906 } 907 908 static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, int channel, long *value) 909 { 910 struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); 911 u64 reg_val; 912 913 reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, channel)); 914 /* HW register value in units of 2.5 millivolt */ 915 *value = DIV_ROUND_CLOSEST(REG_FIELD_GET(VOLTAGE_MASK, reg_val) * 2500, SF_VOLTAGE); 916 } 917 918 static inline bool is_vram_ch_available(struct xe_hwmon *hwmon, int channel) 919 { 920 struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); 921 int vram_id = channel - CHANNEL_VRAM_N; 922 struct xe_reg vram_reg; 923 924 vram_reg = xe_hwmon_get_reg(hwmon, REG_TEMP, channel); 925 if (!xe_reg_is_valid(vram_reg) || !xe_mmio_read32(mmio, vram_reg)) 926 return false; 927 928 /* Create label only for available vram channel */ 929 sprintf(hwmon->temp.vram_label[vram_id], "vram_ch_%d", vram_id); 930 return true; 931 } 932 933 static umode_t 934 xe_hwmon_temp_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) 935 { 936 switch (attr) { 937 case hwmon_temp_emergency: 938 switch (channel) { 939 case CHANNEL_PKG: 940 return hwmon->temp.limit[TEMP_LIMIT_PKG_SHUTDOWN] ? 0444 : 0; 941 case CHANNEL_VRAM: 942 return hwmon->temp.limit[TEMP_LIMIT_MEM_SHUTDOWN] ? 0444 : 0; 943 case CHANNEL_MCTRL: 944 case CHANNEL_PCIE: 945 return hwmon->temp.count ? 0444 : 0; 946 case CHANNEL_VRAM_N...CHANNEL_VRAM_N_MAX: 947 return (is_vram_ch_available(hwmon, channel) && 948 hwmon->temp.limit[TEMP_LIMIT_MEM_SHUTDOWN]) ? 0444 : 0; 949 default: 950 return 0; 951 } 952 case hwmon_temp_crit: 953 switch (channel) { 954 case CHANNEL_PKG: 955 return hwmon->temp.limit[TEMP_LIMIT_PKG_CRIT] ? 0444 : 0; 956 case CHANNEL_VRAM: 957 return hwmon->temp.limit[TEMP_LIMIT_MEM_CRIT] ? 0444 : 0; 958 case CHANNEL_MCTRL: 959 case CHANNEL_PCIE: 960 return hwmon->temp.count ? 0444 : 0; 961 case CHANNEL_VRAM_N...CHANNEL_VRAM_N_MAX: 962 return (is_vram_ch_available(hwmon, channel) && 963 hwmon->temp.limit[TEMP_LIMIT_MEM_CRIT]) ? 0444 : 0; 964 default: 965 return 0; 966 } 967 case hwmon_temp_max: 968 switch (channel) { 969 case CHANNEL_PKG: 970 return hwmon->temp.limit[TEMP_LIMIT_PKG_MAX] ? 0444 : 0; 971 default: 972 return 0; 973 } 974 case hwmon_temp_input: 975 case hwmon_temp_label: 976 switch (channel) { 977 case CHANNEL_PKG: 978 case CHANNEL_VRAM: 979 return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_TEMP, 980 channel)) ? 0444 : 0; 981 case CHANNEL_MCTRL: 982 case CHANNEL_PCIE: 983 return hwmon->temp.count ? 0444 : 0; 984 case CHANNEL_VRAM_N...CHANNEL_VRAM_N_MAX: 985 return is_vram_ch_available(hwmon, channel) ? 0444 : 0; 986 default: 987 return 0; 988 } 989 default: 990 return 0; 991 } 992 } 993 994 static int 995 xe_hwmon_temp_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) 996 { 997 struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); 998 u64 reg_val; 999 1000 switch (attr) { 1001 case hwmon_temp_input: 1002 switch (channel) { 1003 case CHANNEL_PKG: 1004 case CHANNEL_VRAM: 1005 reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_TEMP, channel)); 1006 1007 /* HW register value is in degrees Celsius, convert to millidegrees. */ 1008 *val = REG_FIELD_GET(TEMP_MASK, reg_val) * MILLIDEGREE_PER_DEGREE; 1009 return 0; 1010 case CHANNEL_MCTRL: 1011 return get_mc_temp(hwmon, val); 1012 case CHANNEL_PCIE: 1013 return get_pcie_temp(hwmon, val); 1014 case CHANNEL_VRAM_N...CHANNEL_VRAM_N_MAX: 1015 reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_TEMP, channel)); 1016 /* 1017 * This temperature format is 24 bit [31:8] signed integer and 8 bit 1018 * [7:0] fraction. 1019 */ 1020 *val = (s32)(REG_FIELD_GET(TEMP_MASK_VRAM_N, reg_val)) * 1021 (REG_FIELD_GET(TEMP_SIGN_MASK, reg_val) ? -1 : 1) * 1022 MILLIDEGREE_PER_DEGREE; 1023 return 0; 1024 default: 1025 return -EOPNOTSUPP; 1026 } 1027 case hwmon_temp_emergency: 1028 switch (channel) { 1029 case CHANNEL_PKG: 1030 case CHANNEL_MCTRL: 1031 case CHANNEL_PCIE: 1032 *val = hwmon->temp.limit[TEMP_LIMIT_PKG_SHUTDOWN] * MILLIDEGREE_PER_DEGREE; 1033 return 0; 1034 case CHANNEL_VRAM: 1035 case CHANNEL_VRAM_N...CHANNEL_VRAM_N_MAX: 1036 *val = hwmon->temp.limit[TEMP_LIMIT_MEM_SHUTDOWN] * MILLIDEGREE_PER_DEGREE; 1037 return 0; 1038 default: 1039 return -EOPNOTSUPP; 1040 } 1041 case hwmon_temp_crit: 1042 switch (channel) { 1043 case CHANNEL_PKG: 1044 case CHANNEL_MCTRL: 1045 case CHANNEL_PCIE: 1046 *val = hwmon->temp.limit[TEMP_LIMIT_PKG_CRIT] * MILLIDEGREE_PER_DEGREE; 1047 return 0; 1048 case CHANNEL_VRAM: 1049 case CHANNEL_VRAM_N...CHANNEL_VRAM_N_MAX: 1050 *val = hwmon->temp.limit[TEMP_LIMIT_MEM_CRIT] * MILLIDEGREE_PER_DEGREE; 1051 return 0; 1052 default: 1053 return -EOPNOTSUPP; 1054 } 1055 case hwmon_temp_max: 1056 switch (channel) { 1057 case CHANNEL_PKG: 1058 *val = hwmon->temp.limit[TEMP_LIMIT_PKG_MAX] * MILLIDEGREE_PER_DEGREE; 1059 return 0; 1060 default: 1061 return -EOPNOTSUPP; 1062 } 1063 default: 1064 return -EOPNOTSUPP; 1065 } 1066 } 1067 1068 static umode_t 1069 xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) 1070 { 1071 u32 uval = 0; 1072 struct xe_reg reg; 1073 struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); 1074 1075 switch (attr) { 1076 case hwmon_power_max: 1077 case hwmon_power_cap: 1078 if (hwmon->xe->info.has_mbx_power_limits) { 1079 xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &uval); 1080 } else if (attr != PL2_HWMON_ATTR) { 1081 reg = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); 1082 if (xe_reg_is_valid(reg)) 1083 uval = xe_mmio_read32(mmio, reg); 1084 } 1085 if (uval & PWR_LIM_EN) { 1086 drm_info(&hwmon->xe->drm, "%s is supported on channel %d\n", 1087 PWR_ATTR_TO_STR(attr), channel); 1088 return 0664; 1089 } 1090 drm_dbg(&hwmon->xe->drm, "%s is unsupported on channel %d\n", 1091 PWR_ATTR_TO_STR(attr), channel); 1092 return 0; 1093 case hwmon_power_rated_max: 1094 if (hwmon->xe->info.has_mbx_power_limits) { 1095 return 0; 1096 } else { 1097 reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); 1098 if (xe_reg_is_valid(reg)) 1099 uval = xe_mmio_read32(mmio, reg); 1100 return uval ? 0444 : 0; 1101 } 1102 case hwmon_power_crit: 1103 if (channel == CHANNEL_CARD) { 1104 xe_hwmon_pcode_read_i1(hwmon, &uval); 1105 return (uval & POWER_SETUP_I1_WATTS) ? 0644 : 0; 1106 } 1107 break; 1108 case hwmon_power_label: 1109 if (hwmon->xe->info.has_mbx_power_limits) { 1110 xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &uval); 1111 } else { 1112 reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); 1113 if (xe_reg_is_valid(reg)) 1114 uval = xe_mmio_read32(mmio, reg); 1115 1116 if (!uval) { 1117 reg = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); 1118 if (xe_reg_is_valid(reg)) 1119 uval = xe_mmio_read32(mmio, reg); 1120 } 1121 } 1122 if ((!(uval & PWR_LIM_EN)) && channel == CHANNEL_CARD) { 1123 xe_hwmon_pcode_read_i1(hwmon, &uval); 1124 return (uval & POWER_SETUP_I1_WATTS) ? 0444 : 0; 1125 } 1126 return (uval) ? 0444 : 0; 1127 default: 1128 return 0; 1129 } 1130 return 0; 1131 } 1132 1133 static int 1134 xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) 1135 { 1136 switch (attr) { 1137 case hwmon_power_max: 1138 case hwmon_power_cap: 1139 xe_hwmon_power_max_read(hwmon, attr, channel, val); 1140 return 0; 1141 case hwmon_power_rated_max: 1142 xe_hwmon_power_rated_max_read(hwmon, attr, channel, val); 1143 return 0; 1144 case hwmon_power_crit: 1145 return xe_hwmon_power_curr_crit_read(hwmon, channel, val, SF_POWER); 1146 default: 1147 return -EOPNOTSUPP; 1148 } 1149 } 1150 1151 static int 1152 xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val) 1153 { 1154 switch (attr) { 1155 case hwmon_power_cap: 1156 case hwmon_power_max: 1157 return xe_hwmon_power_max_write(hwmon, attr, channel, val); 1158 case hwmon_power_crit: 1159 return xe_hwmon_power_curr_crit_write(hwmon, channel, val, SF_POWER); 1160 default: 1161 return -EOPNOTSUPP; 1162 } 1163 } 1164 1165 static umode_t 1166 xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr, int channel) 1167 { 1168 u32 uval = 0; 1169 1170 /* hwmon sysfs attribute of current available only for package */ 1171 if (channel != CHANNEL_PKG) 1172 return 0; 1173 1174 switch (attr) { 1175 case hwmon_curr_crit: 1176 return (xe_hwmon_pcode_read_i1(hwmon, &uval) || 1177 (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; 1178 case hwmon_curr_label: 1179 return (xe_hwmon_pcode_read_i1(hwmon, &uval) || 1180 (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0444; 1181 break; 1182 default: 1183 return 0; 1184 } 1185 return 0; 1186 } 1187 1188 static int 1189 xe_hwmon_curr_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) 1190 { 1191 switch (attr) { 1192 case hwmon_curr_crit: 1193 return xe_hwmon_power_curr_crit_read(hwmon, channel, val, SF_CURR); 1194 default: 1195 return -EOPNOTSUPP; 1196 } 1197 } 1198 1199 static int 1200 xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val) 1201 { 1202 switch (attr) { 1203 case hwmon_curr_crit: 1204 return xe_hwmon_power_curr_crit_write(hwmon, channel, val, SF_CURR); 1205 default: 1206 return -EOPNOTSUPP; 1207 } 1208 } 1209 1210 static umode_t 1211 xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) 1212 { 1213 switch (attr) { 1214 case hwmon_in_input: 1215 case hwmon_in_label: 1216 return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, 1217 channel)) ? 0444 : 0; 1218 default: 1219 return 0; 1220 } 1221 } 1222 1223 static int 1224 xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) 1225 { 1226 switch (attr) { 1227 case hwmon_in_input: 1228 xe_hwmon_get_voltage(hwmon, channel, val); 1229 return 0; 1230 default: 1231 return -EOPNOTSUPP; 1232 } 1233 } 1234 1235 static umode_t 1236 xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) 1237 { 1238 long energy = 0; 1239 1240 switch (attr) { 1241 case hwmon_energy_input: 1242 case hwmon_energy_label: 1243 if (hwmon->xe->info.platform == XE_BATTLEMAGE) { 1244 xe_hwmon_energy_get(hwmon, channel, &energy); 1245 return energy ? 0444 : 0; 1246 } else { 1247 return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, 1248 channel)) ? 0444 : 0; 1249 } 1250 default: 1251 return 0; 1252 } 1253 } 1254 1255 static int 1256 xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) 1257 { 1258 switch (attr) { 1259 case hwmon_energy_input: 1260 xe_hwmon_energy_get(hwmon, channel, val); 1261 return 0; 1262 default: 1263 return -EOPNOTSUPP; 1264 } 1265 } 1266 1267 static umode_t 1268 xe_hwmon_fan_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) 1269 { 1270 u32 uval = 0; 1271 1272 if (!hwmon->xe->info.has_fan_control) 1273 return 0; 1274 1275 switch (attr) { 1276 case hwmon_fan_input: 1277 if (xe_hwmon_pcode_read_fan_control(hwmon, FSC_READ_NUM_FANS, &uval)) 1278 return 0; 1279 1280 return channel < uval ? 0444 : 0; 1281 default: 1282 return 0; 1283 } 1284 } 1285 1286 static int 1287 xe_hwmon_fan_input_read(struct xe_hwmon *hwmon, int channel, long *val) 1288 { 1289 struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); 1290 struct xe_hwmon_fan_info *fi = &hwmon->fi[channel]; 1291 u64 rotations, time_now, time; 1292 u32 reg_val; 1293 int ret = 0; 1294 1295 mutex_lock(&hwmon->hwmon_lock); 1296 1297 reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_FAN_SPEED, channel)); 1298 time_now = get_jiffies_64(); 1299 1300 /* 1301 * HW register value is accumulated count of pulses from PWM fan with the scale 1302 * of 2 pulses per rotation. 1303 */ 1304 rotations = (reg_val - fi->reg_val_prev) / 2; 1305 1306 time = jiffies_delta_to_msecs(time_now - fi->time_prev); 1307 if (unlikely(!time)) { 1308 ret = -EAGAIN; 1309 goto unlock; 1310 } 1311 1312 /* 1313 * Calculate fan speed in RPM by time averaging two subsequent readings in minutes. 1314 * RPM = number of rotations * msecs per minute / time in msecs 1315 */ 1316 *val = DIV_ROUND_UP_ULL(rotations * (MSEC_PER_SEC * 60), time); 1317 1318 fi->reg_val_prev = reg_val; 1319 fi->time_prev = time_now; 1320 unlock: 1321 mutex_unlock(&hwmon->hwmon_lock); 1322 return ret; 1323 } 1324 1325 static int 1326 xe_hwmon_fan_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) 1327 { 1328 switch (attr) { 1329 case hwmon_fan_input: 1330 return xe_hwmon_fan_input_read(hwmon, channel, val); 1331 default: 1332 return -EOPNOTSUPP; 1333 } 1334 } 1335 1336 static umode_t 1337 xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, 1338 u32 attr, int channel) 1339 { 1340 struct xe_hwmon *hwmon = (struct xe_hwmon *)drvdata; 1341 int ret; 1342 1343 switch (type) { 1344 case hwmon_temp: 1345 ret = xe_hwmon_temp_is_visible(hwmon, attr, channel); 1346 break; 1347 case hwmon_power: 1348 ret = xe_hwmon_power_is_visible(hwmon, attr, channel); 1349 break; 1350 case hwmon_curr: 1351 ret = xe_hwmon_curr_is_visible(hwmon, attr, channel); 1352 break; 1353 case hwmon_in: 1354 ret = xe_hwmon_in_is_visible(hwmon, attr, channel); 1355 break; 1356 case hwmon_energy: 1357 ret = xe_hwmon_energy_is_visible(hwmon, attr, channel); 1358 break; 1359 case hwmon_fan: 1360 ret = xe_hwmon_fan_is_visible(hwmon, attr, channel); 1361 break; 1362 default: 1363 ret = 0; 1364 break; 1365 } 1366 1367 return ret; 1368 } 1369 1370 static int 1371 xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, 1372 int channel, long *val) 1373 { 1374 struct xe_hwmon *hwmon = dev_get_drvdata(dev); 1375 1376 guard(xe_pm_runtime)(hwmon->xe); 1377 1378 switch (type) { 1379 case hwmon_temp: 1380 return xe_hwmon_temp_read(hwmon, attr, channel, val); 1381 case hwmon_power: 1382 return xe_hwmon_power_read(hwmon, attr, channel, val); 1383 case hwmon_curr: 1384 return xe_hwmon_curr_read(hwmon, attr, channel, val); 1385 case hwmon_in: 1386 return xe_hwmon_in_read(hwmon, attr, channel, val); 1387 case hwmon_energy: 1388 return xe_hwmon_energy_read(hwmon, attr, channel, val); 1389 case hwmon_fan: 1390 return xe_hwmon_fan_read(hwmon, attr, channel, val); 1391 default: 1392 return -EOPNOTSUPP; 1393 } 1394 } 1395 1396 static int 1397 xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, 1398 int channel, long val) 1399 { 1400 struct xe_hwmon *hwmon = dev_get_drvdata(dev); 1401 1402 guard(xe_pm_runtime)(hwmon->xe); 1403 1404 switch (type) { 1405 case hwmon_power: 1406 return xe_hwmon_power_write(hwmon, attr, channel, val); 1407 case hwmon_curr: 1408 return xe_hwmon_curr_write(hwmon, attr, channel, val); 1409 default: 1410 return -EOPNOTSUPP; 1411 } 1412 } 1413 1414 static int xe_hwmon_read_label(struct device *dev, 1415 enum hwmon_sensor_types type, 1416 u32 attr, int channel, const char **str) 1417 { 1418 struct xe_hwmon *hwmon = dev_get_drvdata(dev); 1419 1420 switch (type) { 1421 case hwmon_temp: 1422 if (channel == CHANNEL_PKG) 1423 *str = "pkg"; 1424 else if (channel == CHANNEL_VRAM) 1425 *str = "vram"; 1426 else if (channel == CHANNEL_MCTRL) 1427 *str = "mctrl"; 1428 else if (channel == CHANNEL_PCIE) 1429 *str = "pcie"; 1430 else if (in_range(channel, CHANNEL_VRAM_N, CHANNEL_VRAM_N_MAX)) 1431 *str = hwmon->temp.vram_label[channel - CHANNEL_VRAM_N]; 1432 return 0; 1433 case hwmon_power: 1434 case hwmon_energy: 1435 case hwmon_curr: 1436 case hwmon_in: 1437 if (channel == CHANNEL_CARD) 1438 *str = "card"; 1439 else if (channel == CHANNEL_PKG) 1440 *str = "pkg"; 1441 return 0; 1442 default: 1443 return -EOPNOTSUPP; 1444 } 1445 } 1446 1447 static const struct hwmon_ops hwmon_ops = { 1448 .is_visible = xe_hwmon_is_visible, 1449 .read = xe_hwmon_read, 1450 .write = xe_hwmon_write, 1451 .read_string = xe_hwmon_read_label, 1452 }; 1453 1454 static const struct hwmon_chip_info hwmon_chip_info = { 1455 .ops = &hwmon_ops, 1456 .info = hwmon_info, 1457 }; 1458 1459 static void 1460 xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) 1461 { 1462 struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); 1463 long energy, fan_speed; 1464 u64 val_sku_unit = 0; 1465 int channel; 1466 struct xe_reg pkg_power_sku_unit; 1467 1468 if (hwmon->xe->info.has_mbx_power_limits) { 1469 /* Check if GPU firmware support mailbox power limits commands. */ 1470 if (xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, CHANNEL_CARD, 1471 &hwmon->pl1_on_boot[CHANNEL_CARD]) | 1472 xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, CHANNEL_PKG, 1473 &hwmon->pl1_on_boot[CHANNEL_PKG]) | 1474 xe_hwmon_pcode_read_power_limit(hwmon, PL2_HWMON_ATTR, CHANNEL_CARD, 1475 &hwmon->pl2_on_boot[CHANNEL_CARD]) | 1476 xe_hwmon_pcode_read_power_limit(hwmon, PL2_HWMON_ATTR, CHANNEL_PKG, 1477 &hwmon->pl2_on_boot[CHANNEL_PKG])) { 1478 drm_warn(&hwmon->xe->drm, 1479 "Failed to read power limits, check GPU firmware !\n"); 1480 } else { 1481 drm_info(&hwmon->xe->drm, "Using mailbox commands for power limits\n"); 1482 /* Write default limits to read from pcode from now on. */ 1483 xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR, 1484 CHANNEL_CARD, PWR_LIM | PWR_LIM_TIME, 1485 hwmon->pl1_on_boot[CHANNEL_CARD]); 1486 xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR, 1487 CHANNEL_PKG, PWR_LIM | PWR_LIM_TIME, 1488 hwmon->pl1_on_boot[CHANNEL_PKG]); 1489 xe_hwmon_pcode_rmw_power_limit(hwmon, PL2_HWMON_ATTR, 1490 CHANNEL_CARD, PWR_LIM | PWR_LIM_TIME, 1491 hwmon->pl2_on_boot[CHANNEL_CARD]); 1492 xe_hwmon_pcode_rmw_power_limit(hwmon, PL2_HWMON_ATTR, 1493 CHANNEL_PKG, PWR_LIM | PWR_LIM_TIME, 1494 hwmon->pl2_on_boot[CHANNEL_PKG]); 1495 hwmon->scl_shift_power = PWR_UNIT; 1496 hwmon->scl_shift_energy = ENERGY_UNIT; 1497 hwmon->scl_shift_time = TIME_UNIT; 1498 hwmon->boot_power_limit_read = true; 1499 } 1500 } else { 1501 drm_info(&hwmon->xe->drm, "Using register for power limits\n"); 1502 /* 1503 * The contents of register PKG_POWER_SKU_UNIT do not change, 1504 * so read it once and store the shift values. 1505 */ 1506 pkg_power_sku_unit = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0); 1507 if (xe_reg_is_valid(pkg_power_sku_unit)) { 1508 val_sku_unit = xe_mmio_read32(mmio, pkg_power_sku_unit); 1509 hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); 1510 hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit); 1511 hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit); 1512 } 1513 } 1514 /* 1515 * Initialize 'struct xe_hwmon_energy_info', i.e. set fields to the 1516 * first value of the energy register read 1517 */ 1518 for (channel = 0; channel < CHANNEL_MAX; channel++) 1519 if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, channel)) 1520 xe_hwmon_energy_get(hwmon, channel, &energy); 1521 1522 /* Initialize 'struct xe_hwmon_fan_info' with initial fan register reading. */ 1523 for (channel = 0; channel < FAN_MAX; channel++) 1524 if (xe_hwmon_is_visible(hwmon, hwmon_fan, hwmon_fan_input, channel)) 1525 xe_hwmon_fan_input_read(hwmon, channel, &fan_speed); 1526 1527 if (hwmon->xe->info.has_mbx_thermal_info && xe_hwmon_pcode_read_thermal_info(hwmon)) 1528 drm_warn(&hwmon->xe->drm, "Thermal mailbox not supported by card firmware\n"); 1529 } 1530 1531 int xe_hwmon_register(struct xe_device *xe) 1532 { 1533 struct device *dev = xe->drm.dev; 1534 struct xe_hwmon *hwmon; 1535 int ret; 1536 1537 /* hwmon is available only for dGfx */ 1538 if (!IS_DGFX(xe)) 1539 return 0; 1540 1541 /* hwmon is not available on VFs */ 1542 if (IS_SRIOV_VF(xe)) 1543 return 0; 1544 1545 hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL); 1546 if (!hwmon) 1547 return -ENOMEM; 1548 1549 ret = devm_mutex_init(dev, &hwmon->hwmon_lock); 1550 if (ret) 1551 return ret; 1552 1553 /* There's only one instance of hwmon per device */ 1554 hwmon->xe = xe; 1555 xe->hwmon = hwmon; 1556 1557 xe_hwmon_get_preregistration_info(hwmon); 1558 1559 drm_dbg(&xe->drm, "Register xe hwmon interface\n"); 1560 1561 /* hwmon_dev points to device hwmon<i> */ 1562 hwmon->hwmon_dev = devm_hwmon_device_register_with_info(dev, "xe", hwmon, 1563 &hwmon_chip_info, 1564 hwmon_groups); 1565 if (IS_ERR(hwmon->hwmon_dev)) { 1566 drm_err(&xe->drm, "Failed to register xe hwmon (%pe)\n", hwmon->hwmon_dev); 1567 xe->hwmon = NULL; 1568 return PTR_ERR(hwmon->hwmon_dev); 1569 } 1570 1571 return 0; 1572 } 1573 MODULE_IMPORT_NS("INTEL_PMT_TELEMETRY"); 1574