1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Hwmon client for disk and solid state drives with temperature sensors 4 * Copyright (C) 2019 Zodiac Inflight Innovations 5 * 6 * With input from: 7 * Hwmon client for S.M.A.R.T. hard disk drives with temperature sensors. 8 * (C) 2018 Linus Walleij 9 * 10 * hwmon: Driver for SCSI/ATA temperature sensors 11 * by Constantin Baranov <const@mimas.ru>, submitted September 2009 12 * 13 * This drive supports reporting the temperature of SATA drives. It can be 14 * easily extended to report the temperature of SCSI drives. 15 * 16 * The primary means to read drive temperatures and temperature limits 17 * for ATA drives is the SCT Command Transport feature set as specified in 18 * ATA8-ACS. 19 * It can be used to read the current drive temperature, temperature limits, 20 * and historic minimum and maximum temperatures. The SCT Command Transport 21 * feature set is documented in "AT Attachment 8 - ATA/ATAPI Command Set 22 * (ATA8-ACS)". 23 * 24 * If the SCT Command Transport feature set is not available, drive temperatures 25 * may be readable through SMART attributes. Since SMART attributes are not well 26 * defined, this method is only used as fallback mechanism. 27 * 28 * There are three SMART attributes which may report drive temperatures. 29 * Those are defined as follows (from 30 * http://www.cropel.com/library/smart-attribute-list.aspx). 31 * 32 * 190 Temperature Temperature, monitored by a sensor somewhere inside 33 * the drive. Raw value typicaly holds the actual 34 * temperature (hexadecimal) in its rightmost two digits. 35 * 36 * 194 Temperature Temperature, monitored by a sensor somewhere inside 37 * the drive. Raw value typicaly holds the actual 38 * temperature (hexadecimal) in its rightmost two digits. 39 * 40 * 231 Temperature Temperature, monitored by a sensor somewhere inside 41 * the drive. Raw value typicaly holds the actual 42 * temperature (hexadecimal) in its rightmost two digits. 43 * 44 * Wikipedia defines attributes a bit differently. 45 * 46 * 190 Temperature Value is equal to (100-temp. °C), allowing manufacturer 47 * Difference or to set a minimum threshold which corresponds to a 48 * Airflow maximum temperature. This also follows the convention of 49 * Temperature 100 being a best-case value and lower values being 50 * undesirable. However, some older drives may instead 51 * report raw Temperature (identical to 0xC2) or 52 * Temperature minus 50 here. 53 * 194 Temperature or Indicates the device temperature, if the appropriate 54 * Temperature sensor is fitted. Lowest byte of the raw value contains 55 * Celsius the exact temperature value (Celsius degrees). 56 * 231 Life Left Indicates the approximate SSD life left, in terms of 57 * (SSDs) or program/erase cycles or available reserved blocks. 58 * Temperature A normalized value of 100 represents a new drive, with 59 * a threshold value at 10 indicating a need for 60 * replacement. A value of 0 may mean that the drive is 61 * operating in read-only mode to allow data recovery. 62 * Previously (pre-2010) occasionally used for Drive 63 * Temperature (more typically reported at 0xC2). 64 * 65 * Common denominator is that the first raw byte reports the temperature 66 * in degrees C on almost all drives. Some drives may report a fractional 67 * temperature in the second raw byte. 68 * 69 * Known exceptions (from libatasmart): 70 * - SAMSUNG SV0412H and SAMSUNG SV1204H) report the temperature in 10th 71 * degrees C in the first two raw bytes. 72 * - A few Maxtor drives report an unknown or bad value in attribute 194. 73 * - Certain Apple SSD drives report an unknown value in attribute 190. 74 * Only certain firmware versions are affected. 75 * 76 * Those exceptions affect older ATA drives and are currently ignored. 77 * Also, the second raw byte (possibly reporting the fractional temperature) 78 * is currently ignored. 79 * 80 * Many drives also report temperature limits in additional SMART data raw 81 * bytes. The format of those is not well defined and varies widely. 82 * The driver does not currently attempt to report those limits. 83 * 84 * According to data in smartmontools, attribute 231 is rarely used to report 85 * drive temperatures. At the same time, several drives report SSD life left 86 * in attribute 231, but do not support temperature sensors. For this reason, 87 * attribute 231 is currently ignored. 88 * 89 * Following above definitions, temperatures are reported as follows. 90 * If SCT Command Transport is supported, it is used to read the 91 * temperature and, if available, temperature limits. 92 * - Otherwise, if SMART attribute 194 is supported, it is used to read 93 * the temperature. 94 * - Otherwise, if SMART attribute 190 is supported, it is used to read 95 * the temperature. 96 */ 97 98 #include <linux/ata.h> 99 #include <linux/bits.h> 100 #include <linux/device.h> 101 #include <linux/hwmon.h> 102 #include <linux/kernel.h> 103 #include <linux/list.h> 104 #include <linux/module.h> 105 #include <linux/mutex.h> 106 #include <scsi/scsi_cmnd.h> 107 #include <scsi/scsi_device.h> 108 #include <scsi/scsi_driver.h> 109 #include <scsi/scsi_proto.h> 110 111 struct drivetemp_data { 112 struct list_head list; /* list of instantiated devices */ 113 struct mutex lock; /* protect data buffer accesses */ 114 struct scsi_device *sdev; /* SCSI device */ 115 struct device *dev; /* instantiating device */ 116 struct device *hwdev; /* hardware monitoring device */ 117 u8 smartdata[ATA_SECT_SIZE]; /* local buffer */ 118 int (*get_temp)(struct drivetemp_data *st, u32 attr, long *val); 119 bool have_temp_lowest; /* lowest temp in SCT status */ 120 bool have_temp_highest; /* highest temp in SCT status */ 121 bool have_temp_min; /* have min temp */ 122 bool have_temp_max; /* have max temp */ 123 bool have_temp_lcrit; /* have lower critical limit */ 124 bool have_temp_crit; /* have critical limit */ 125 int temp_min; /* min temp */ 126 int temp_max; /* max temp */ 127 int temp_lcrit; /* lower critical limit */ 128 int temp_crit; /* critical limit */ 129 }; 130 131 static LIST_HEAD(drivetemp_devlist); 132 133 #define ATA_MAX_SMART_ATTRS 30 134 #define SMART_TEMP_PROP_190 190 135 #define SMART_TEMP_PROP_194 194 136 137 #define SCT_STATUS_REQ_ADDR 0xe0 138 #define SCT_STATUS_VERSION_LOW 0 /* log byte offsets */ 139 #define SCT_STATUS_VERSION_HIGH 1 140 #define SCT_STATUS_TEMP 200 141 #define SCT_STATUS_TEMP_LOWEST 201 142 #define SCT_STATUS_TEMP_HIGHEST 202 143 #define SCT_READ_LOG_ADDR 0xe1 144 #define SMART_READ_LOG 0xd5 145 #define SMART_WRITE_LOG 0xd6 146 147 #define INVALID_TEMP 0x80 148 149 #define temp_is_valid(temp) ((temp) != INVALID_TEMP) 150 #define temp_from_sct(temp) (((s8)(temp)) * 1000) 151 152 static inline bool ata_id_smart_supported(u16 *id) 153 { 154 return id[ATA_ID_COMMAND_SET_1] & BIT(0); 155 } 156 157 static inline bool ata_id_smart_enabled(u16 *id) 158 { 159 return id[ATA_ID_CFS_ENABLE_1] & BIT(0); 160 } 161 162 static int drivetemp_scsi_command(struct drivetemp_data *st, 163 u8 ata_command, u8 feature, 164 u8 lba_low, u8 lba_mid, u8 lba_high) 165 { 166 u8 scsi_cmd[MAX_COMMAND_SIZE]; 167 enum req_op op; 168 int err; 169 170 memset(scsi_cmd, 0, sizeof(scsi_cmd)); 171 scsi_cmd[0] = ATA_16; 172 if (ata_command == ATA_CMD_SMART && feature == SMART_WRITE_LOG) { 173 scsi_cmd[1] = (5 << 1); /* PIO Data-out */ 174 /* 175 * No off.line or cc, write to dev, block count in sector count 176 * field. 177 */ 178 scsi_cmd[2] = 0x06; 179 op = REQ_OP_DRV_OUT; 180 } else { 181 scsi_cmd[1] = (4 << 1); /* PIO Data-in */ 182 /* 183 * No off.line or cc, read from dev, block count in sector count 184 * field. 185 */ 186 scsi_cmd[2] = 0x0e; 187 op = REQ_OP_DRV_IN; 188 } 189 scsi_cmd[4] = feature; 190 scsi_cmd[6] = 1; /* 1 sector */ 191 scsi_cmd[8] = lba_low; 192 scsi_cmd[10] = lba_mid; 193 scsi_cmd[12] = lba_high; 194 scsi_cmd[14] = ata_command; 195 196 err = scsi_execute_cmd(st->sdev, scsi_cmd, op, st->smartdata, 197 ATA_SECT_SIZE, HZ, 5, NULL); 198 if (err > 0) 199 err = -EIO; 200 return err; 201 } 202 203 static int drivetemp_ata_command(struct drivetemp_data *st, u8 feature, 204 u8 select) 205 { 206 return drivetemp_scsi_command(st, ATA_CMD_SMART, feature, select, 207 ATA_SMART_LBAM_PASS, ATA_SMART_LBAH_PASS); 208 } 209 210 static int drivetemp_get_smarttemp(struct drivetemp_data *st, u32 attr, 211 long *temp) 212 { 213 u8 *buf = st->smartdata; 214 bool have_temp = false; 215 u8 temp_raw; 216 u8 csum; 217 int err; 218 int i; 219 220 err = drivetemp_ata_command(st, ATA_SMART_READ_VALUES, 0); 221 if (err) 222 return err; 223 224 /* Checksum the read value table */ 225 csum = 0; 226 for (i = 0; i < ATA_SECT_SIZE; i++) 227 csum += buf[i]; 228 if (csum) { 229 dev_dbg(&st->sdev->sdev_gendev, 230 "checksum error reading SMART values\n"); 231 return -EIO; 232 } 233 234 for (i = 0; i < ATA_MAX_SMART_ATTRS; i++) { 235 u8 *attr = buf + i * 12; 236 int id = attr[2]; 237 238 if (!id) 239 continue; 240 241 if (id == SMART_TEMP_PROP_190) { 242 temp_raw = attr[7]; 243 have_temp = true; 244 } 245 if (id == SMART_TEMP_PROP_194) { 246 temp_raw = attr[7]; 247 have_temp = true; 248 break; 249 } 250 } 251 252 if (have_temp) { 253 *temp = temp_raw * 1000; 254 return 0; 255 } 256 257 return -ENXIO; 258 } 259 260 static int drivetemp_get_scttemp(struct drivetemp_data *st, u32 attr, long *val) 261 { 262 u8 *buf = st->smartdata; 263 int err; 264 265 err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_STATUS_REQ_ADDR); 266 if (err) 267 return err; 268 switch (attr) { 269 case hwmon_temp_input: 270 if (!temp_is_valid(buf[SCT_STATUS_TEMP])) 271 return -ENODATA; 272 *val = temp_from_sct(buf[SCT_STATUS_TEMP]); 273 break; 274 case hwmon_temp_lowest: 275 if (!temp_is_valid(buf[SCT_STATUS_TEMP_LOWEST])) 276 return -ENODATA; 277 *val = temp_from_sct(buf[SCT_STATUS_TEMP_LOWEST]); 278 break; 279 case hwmon_temp_highest: 280 if (!temp_is_valid(buf[SCT_STATUS_TEMP_HIGHEST])) 281 return -ENODATA; 282 *val = temp_from_sct(buf[SCT_STATUS_TEMP_HIGHEST]); 283 break; 284 default: 285 err = -EINVAL; 286 break; 287 } 288 return err; 289 } 290 291 static const char * const sct_avoid_models[] = { 292 /* 293 * These drives will have WRITE FPDMA QUEUED command timeouts and sometimes just 294 * freeze until power-cycled under heavy write loads when their temperature is 295 * getting polled in SCT mode. The SMART mode seems to be fine, though. 296 * 297 * While only the 3 TB model (DT01ACA3) was actually caught exhibiting the 298 * problem let's play safe here to avoid data corruption and ban the whole 299 * DT01ACAx family. 300 301 * The models from this array are prefix-matched. 302 */ 303 "TOSHIBA DT01ACA", 304 }; 305 306 static bool drivetemp_sct_avoid(struct drivetemp_data *st) 307 { 308 struct scsi_device *sdev = st->sdev; 309 unsigned int ctr; 310 311 if (!sdev->model) 312 return false; 313 314 /* 315 * The "model" field contains just the raw SCSI INQUIRY response 316 * "product identification" field, which has a width of 16 bytes. 317 * This field is space-filled, but is NOT NULL-terminated. 318 */ 319 for (ctr = 0; ctr < ARRAY_SIZE(sct_avoid_models); ctr++) 320 if (!strncmp(sdev->model, sct_avoid_models[ctr], 321 strlen(sct_avoid_models[ctr]))) 322 return true; 323 324 return false; 325 } 326 327 static int drivetemp_identify_sata(struct drivetemp_data *st) 328 { 329 struct scsi_device *sdev = st->sdev; 330 u8 *buf = st->smartdata; 331 struct scsi_vpd *vpd; 332 bool is_ata, is_sata; 333 bool have_sct_data_table; 334 bool have_sct_temp; 335 bool have_smart; 336 bool have_sct; 337 u16 *ata_id; 338 u16 version; 339 long temp; 340 int err; 341 342 /* SCSI-ATA Translation present? */ 343 rcu_read_lock(); 344 vpd = rcu_dereference(sdev->vpd_pg89); 345 346 /* 347 * Verify that ATA IDENTIFY DEVICE data is included in ATA Information 348 * VPD and that the drive implements the SATA protocol. 349 */ 350 if (!vpd || vpd->len < 572 || vpd->data[56] != ATA_CMD_ID_ATA || 351 vpd->data[36] != 0x34) { 352 rcu_read_unlock(); 353 return -ENODEV; 354 } 355 ata_id = (u16 *)&vpd->data[60]; 356 is_ata = ata_id_is_ata(ata_id); 357 is_sata = ata_id_is_sata(ata_id); 358 have_sct = ata_id_sct_supported(ata_id); 359 have_sct_data_table = ata_id_sct_data_tables(ata_id); 360 have_smart = ata_id_smart_supported(ata_id) && 361 ata_id_smart_enabled(ata_id); 362 363 rcu_read_unlock(); 364 365 /* bail out if this is not a SATA device */ 366 if (!is_ata || !is_sata) 367 return -ENODEV; 368 369 if (have_sct && drivetemp_sct_avoid(st)) { 370 dev_notice(&sdev->sdev_gendev, 371 "will avoid using SCT for temperature monitoring\n"); 372 have_sct = false; 373 } 374 375 if (!have_sct) 376 goto skip_sct; 377 378 err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_STATUS_REQ_ADDR); 379 if (err) 380 goto skip_sct; 381 382 version = (buf[SCT_STATUS_VERSION_HIGH] << 8) | 383 buf[SCT_STATUS_VERSION_LOW]; 384 if (version != 2 && version != 3) 385 goto skip_sct; 386 387 have_sct_temp = temp_is_valid(buf[SCT_STATUS_TEMP]); 388 if (!have_sct_temp) 389 goto skip_sct; 390 391 st->have_temp_lowest = temp_is_valid(buf[SCT_STATUS_TEMP_LOWEST]); 392 st->have_temp_highest = temp_is_valid(buf[SCT_STATUS_TEMP_HIGHEST]); 393 394 if (!have_sct_data_table) 395 goto skip_sct_data; 396 397 /* Request and read temperature history table */ 398 memset(buf, '\0', sizeof(st->smartdata)); 399 buf[0] = 5; /* data table command */ 400 buf[2] = 1; /* read table */ 401 buf[4] = 2; /* temperature history table */ 402 403 err = drivetemp_ata_command(st, SMART_WRITE_LOG, SCT_STATUS_REQ_ADDR); 404 if (err) 405 goto skip_sct_data; 406 407 err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_READ_LOG_ADDR); 408 if (err) 409 goto skip_sct_data; 410 411 /* 412 * Temperature limits per AT Attachment 8 - 413 * ATA/ATAPI Command Set (ATA8-ACS) 414 */ 415 st->have_temp_max = temp_is_valid(buf[6]); 416 st->have_temp_crit = temp_is_valid(buf[7]); 417 st->have_temp_min = temp_is_valid(buf[8]); 418 st->have_temp_lcrit = temp_is_valid(buf[9]); 419 420 st->temp_max = temp_from_sct(buf[6]); 421 st->temp_crit = temp_from_sct(buf[7]); 422 st->temp_min = temp_from_sct(buf[8]); 423 st->temp_lcrit = temp_from_sct(buf[9]); 424 425 skip_sct_data: 426 if (have_sct_temp) { 427 st->get_temp = drivetemp_get_scttemp; 428 return 0; 429 } 430 skip_sct: 431 if (!have_smart) 432 return -ENODEV; 433 st->get_temp = drivetemp_get_smarttemp; 434 return drivetemp_get_smarttemp(st, hwmon_temp_input, &temp); 435 } 436 437 static int drivetemp_identify(struct drivetemp_data *st) 438 { 439 struct scsi_device *sdev = st->sdev; 440 441 /* Bail out immediately if there is no inquiry data */ 442 if (!sdev->inquiry || sdev->inquiry_len < 16) 443 return -ENODEV; 444 445 /* Disk device? */ 446 if (sdev->type != TYPE_DISK && sdev->type != TYPE_ZBC) 447 return -ENODEV; 448 449 return drivetemp_identify_sata(st); 450 } 451 452 static int drivetemp_read(struct device *dev, enum hwmon_sensor_types type, 453 u32 attr, int channel, long *val) 454 { 455 struct drivetemp_data *st = dev_get_drvdata(dev); 456 int err = 0; 457 458 if (type != hwmon_temp) 459 return -EINVAL; 460 461 switch (attr) { 462 case hwmon_temp_input: 463 case hwmon_temp_lowest: 464 case hwmon_temp_highest: 465 mutex_lock(&st->lock); 466 err = st->get_temp(st, attr, val); 467 mutex_unlock(&st->lock); 468 break; 469 case hwmon_temp_lcrit: 470 *val = st->temp_lcrit; 471 break; 472 case hwmon_temp_min: 473 *val = st->temp_min; 474 break; 475 case hwmon_temp_max: 476 *val = st->temp_max; 477 break; 478 case hwmon_temp_crit: 479 *val = st->temp_crit; 480 break; 481 default: 482 err = -EINVAL; 483 break; 484 } 485 return err; 486 } 487 488 static umode_t drivetemp_is_visible(const void *data, 489 enum hwmon_sensor_types type, 490 u32 attr, int channel) 491 { 492 const struct drivetemp_data *st = data; 493 494 switch (type) { 495 case hwmon_temp: 496 switch (attr) { 497 case hwmon_temp_input: 498 return 0444; 499 case hwmon_temp_lowest: 500 if (st->have_temp_lowest) 501 return 0444; 502 break; 503 case hwmon_temp_highest: 504 if (st->have_temp_highest) 505 return 0444; 506 break; 507 case hwmon_temp_min: 508 if (st->have_temp_min) 509 return 0444; 510 break; 511 case hwmon_temp_max: 512 if (st->have_temp_max) 513 return 0444; 514 break; 515 case hwmon_temp_lcrit: 516 if (st->have_temp_lcrit) 517 return 0444; 518 break; 519 case hwmon_temp_crit: 520 if (st->have_temp_crit) 521 return 0444; 522 break; 523 default: 524 break; 525 } 526 break; 527 default: 528 break; 529 } 530 return 0; 531 } 532 533 static const struct hwmon_channel_info * const drivetemp_info[] = { 534 HWMON_CHANNEL_INFO(chip, 535 HWMON_C_REGISTER_TZ), 536 HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | 537 HWMON_T_LOWEST | HWMON_T_HIGHEST | 538 HWMON_T_MIN | HWMON_T_MAX | 539 HWMON_T_LCRIT | HWMON_T_CRIT), 540 NULL 541 }; 542 543 static const struct hwmon_ops drivetemp_ops = { 544 .is_visible = drivetemp_is_visible, 545 .read = drivetemp_read, 546 }; 547 548 static const struct hwmon_chip_info drivetemp_chip_info = { 549 .ops = &drivetemp_ops, 550 .info = drivetemp_info, 551 }; 552 553 /* 554 * The device argument points to sdev->sdev_dev. Its parent is 555 * sdev->sdev_gendev, which we can use to get the scsi_device pointer. 556 */ 557 static int drivetemp_add(struct device *dev) 558 { 559 struct scsi_device *sdev = to_scsi_device(dev->parent); 560 struct drivetemp_data *st; 561 int err; 562 563 st = kzalloc(sizeof(*st), GFP_KERNEL); 564 if (!st) 565 return -ENOMEM; 566 567 st->sdev = sdev; 568 st->dev = dev; 569 mutex_init(&st->lock); 570 571 if (drivetemp_identify(st)) { 572 err = -ENODEV; 573 goto abort; 574 } 575 576 st->hwdev = hwmon_device_register_with_info(dev->parent, "drivetemp", 577 st, &drivetemp_chip_info, 578 NULL); 579 if (IS_ERR(st->hwdev)) { 580 err = PTR_ERR(st->hwdev); 581 goto abort; 582 } 583 584 list_add(&st->list, &drivetemp_devlist); 585 return 0; 586 587 abort: 588 kfree(st); 589 return err; 590 } 591 592 static void drivetemp_remove(struct device *dev) 593 { 594 struct drivetemp_data *st, *tmp; 595 596 list_for_each_entry_safe(st, tmp, &drivetemp_devlist, list) { 597 if (st->dev == dev) { 598 list_del(&st->list); 599 hwmon_device_unregister(st->hwdev); 600 kfree(st); 601 break; 602 } 603 } 604 } 605 606 static struct class_interface drivetemp_interface = { 607 .add_dev = drivetemp_add, 608 .remove_dev = drivetemp_remove, 609 }; 610 611 static int __init drivetemp_init(void) 612 { 613 return scsi_register_interface(&drivetemp_interface); 614 } 615 616 static void __exit drivetemp_exit(void) 617 { 618 scsi_unregister_interface(&drivetemp_interface); 619 } 620 621 module_init(drivetemp_init); 622 module_exit(drivetemp_exit); 623 624 MODULE_AUTHOR("Guenter Roeck <linus@roeck-us.net>"); 625 MODULE_DESCRIPTION("Hard drive temperature monitor"); 626 MODULE_LICENSE("GPL"); 627 MODULE_ALIAS("platform:drivetemp"); 628