1eda14cbcSMatt Macy#!/bin/sh 2eda14cbcSMatt Macy# 3eda14cbcSMatt Macy# Show SMART stats 4eda14cbcSMatt Macy# 5eda14cbcSMatt Macy 6eda14cbcSMatt Macyhelpstr=" 7eda14cbcSMatt Macysmart: Show SMART temperature and error stats (specific to drive type) 8eda14cbcSMatt Macysmartx: Show SMART extended drive stats (specific to drive type). 9eda14cbcSMatt Macytemp: Show SMART drive temperature in celsius (all drives). 10eda14cbcSMatt Macyhealth: Show reported SMART status (all drives). 11eda14cbcSMatt Macyr_proc: Show SMART read GBytes processed over drive lifetime (SAS). 12eda14cbcSMatt Macyw_proc: Show SMART write GBytes processed over drive lifetime (SAS). 13eda14cbcSMatt Macyr_ucor: Show SMART read uncorrectable errors (SAS). 14eda14cbcSMatt Macyw_ucor: Show SMART write uncorrectable errors (SAS). 15eda14cbcSMatt Macynonmed: Show SMART non-medium errors (SAS). 16eda14cbcSMatt Macydefect: Show SMART grown defect list (SAS). 17eda14cbcSMatt Macyhours_on: Show number of hours drive powered on (all drives). 18eda14cbcSMatt Macyrealloc: Show SMART reallocated sectors count (ATA). 19eda14cbcSMatt Macyrep_ucor: Show SMART reported uncorrectable count (ATA). 20eda14cbcSMatt Macycmd_to: Show SMART command timeout count (ATA). 21eda14cbcSMatt Macypend_sec: Show SMART current pending sector count (ATA). 22eda14cbcSMatt Macyoff_ucor: Show SMART offline uncorrectable errors (ATA). 23eda14cbcSMatt Macyata_err: Show SMART ATA errors (ATA). 24eda14cbcSMatt Macypwr_cyc: Show SMART power cycle count (ATA). 25eda14cbcSMatt Macyserial: Show disk serial number. 26eda14cbcSMatt Macynvme_err: Show SMART NVMe errors (NVMe). 27eda14cbcSMatt Macysmart_test: Show SMART self-test results summary. 28eda14cbcSMatt Macytest_type: Show SMART self-test type (short, long... ). 29eda14cbcSMatt Macytest_status: Show SMART self-test status. 30eda14cbcSMatt Macytest_progress: Show SMART self-test percentage done. 31eda14cbcSMatt Macytest_ended: Show when the last SMART self-test ended (if supported). 32eda14cbcSMatt Macy" 33eda14cbcSMatt Macy 34eda14cbcSMatt Macy# Hack for developer testing 35eda14cbcSMatt Macy# 36eda14cbcSMatt Macy# If you set $samples to a directory containing smartctl output text files, 37eda14cbcSMatt Macy# we will use them instead of running smartctl on the vdevs. This can be 38eda14cbcSMatt Macy# useful if you want to test a bunch of different smartctl outputs. Also, if 39eda14cbcSMatt Macy# $samples is set, and additional 'file' column is added to the zpool output 40eda14cbcSMatt Macy# showing the filename. 41eda14cbcSMatt Macysamples= 42eda14cbcSMatt Macy 43eda14cbcSMatt Macy# get_filename_from_dir DIR 44eda14cbcSMatt Macy# 45eda14cbcSMatt Macy# Look in directory DIR and return a filename from it. The filename returned 46eda14cbcSMatt Macy# is chosen quasi-sequentially (based off our PID). This allows us to return 47eda14cbcSMatt Macy# a different filename every time this script is invoked (which we do for each 48eda14cbcSMatt Macy# vdev), without having to maintain state. 49eda14cbcSMatt Macyget_filename_from_dir() 50eda14cbcSMatt Macy{ 51eda14cbcSMatt Macy dir=$1 52eda14cbcSMatt Macy pid="$$" 53eda14cbcSMatt Macy num_files=$(find "$dir" -maxdepth 1 -type f | wc -l) 54eda14cbcSMatt Macy mod=$((pid % num_files)) 55eda14cbcSMatt Macy i=0 5616038816SMartin Matuska find "$dir" -type f -printf '%f\n' | while read -r file ; do 57eda14cbcSMatt Macy if [ "$mod" = "$i" ] ; then 58eda14cbcSMatt Macy echo "$file" 59eda14cbcSMatt Macy break 60eda14cbcSMatt Macy fi 61eda14cbcSMatt Macy i=$((i+1)) 62eda14cbcSMatt Macy done 63eda14cbcSMatt Macy} 64eda14cbcSMatt Macy 6516038816SMartin Matuskascript="${0##*/}" 66eda14cbcSMatt Macy 67eda14cbcSMatt Macyif [ "$1" = "-h" ] ; then 68eda14cbcSMatt Macy echo "$helpstr" | grep "$script:" | tr -s '\t' | cut -f 2- 69eda14cbcSMatt Macy exit 70eda14cbcSMatt Macyfi 71eda14cbcSMatt Macy 72*716fd348SMartin Matuska# Sometimes, UPATH ends up /dev/(null). 73*716fd348SMartin Matuska# That should be corrected, but for now... 74e92ffd9bSMartin Matuska# shellcheck disable=SC2154 75*716fd348SMartin Matuskaif [ ! -b "$VDEV_UPATH" ]; then 76*716fd348SMartin Matuska somepath="${VDEV_PATH}" 77*716fd348SMartin Matuskaelse 78*716fd348SMartin Matuska somepath="${VDEV_UPATH}" 79*716fd348SMartin Matuskafi 80*716fd348SMartin Matuska 81*716fd348SMartin Matuskaif [ -b "$somepath" ] && PATH="/usr/sbin:$PATH" command -v smartctl > /dev/null || [ -n "$samples" ] ; then 82eda14cbcSMatt Macy if [ -n "$samples" ] ; then 83eda14cbcSMatt Macy # cat a smartctl output text file instead of running smartctl 84eda14cbcSMatt Macy # on a vdev (only used for developer testing). 85eda14cbcSMatt Macy file=$(get_filename_from_dir "$samples") 86eda14cbcSMatt Macy echo "file=$file" 87eda14cbcSMatt Macy raw_out=$(cat "$samples/$file") 88eda14cbcSMatt Macy else 89*716fd348SMartin Matuska raw_out=$(sudo smartctl -a "$somepath") 90eda14cbcSMatt Macy fi 91eda14cbcSMatt Macy 92eda14cbcSMatt Macy # What kind of drive are we? Look for the right line in smartctl: 93eda14cbcSMatt Macy # 94eda14cbcSMatt Macy # SAS: 95eda14cbcSMatt Macy # Transport protocol: SAS 96eda14cbcSMatt Macy # 97eda14cbcSMatt Macy # SATA: 98eda14cbcSMatt Macy # ATA Version is: 8 99eda14cbcSMatt Macy # 100eda14cbcSMatt Macy # NVMe: 101eda14cbcSMatt Macy # SMART/Health Information (NVMe Log 0xnn, NSID 0xnn) 102eda14cbcSMatt Macy # 103eda14cbcSMatt Macy out=$(echo "$raw_out" | awk ' 104eda14cbcSMatt Macy# SAS specific 105eda14cbcSMatt Macy/read:/{print "rrd="$4"\nr_cor="$5"\nr_proc="$7"\nr_ucor="$8} 106eda14cbcSMatt Macy/write:/{print "rwr="$4"\nw_cor="$5"\nw_proc="$7"\nw_ucor="$8} 107eda14cbcSMatt Macy/Non-medium error count/{print "nonmed="$4} 108eda14cbcSMatt Macy/Elements in grown defect list/{print "defect="$6} 109eda14cbcSMatt Macy 110eda14cbcSMatt Macy# SAS common 111eda14cbcSMatt Macy/SAS/{type="sas"} 112eda14cbcSMatt Macy/Drive Temperature:/{print "temp="$4} 113eda14cbcSMatt Macy# Status can be a long string, substitute spaces for '_' 114eda14cbcSMatt Macy/SMART Health Status:/{printf "health="; for(i=4;i<=NF-1;i++){printf "%s_", $i}; printf "%s\n", $i} 115eda14cbcSMatt Macy/number of hours powered up/{print "hours_on="$7; hours_on=int($7)} 116eda14cbcSMatt Macy/Serial number:/{print "serial="$3} 117eda14cbcSMatt Macy 118eda14cbcSMatt Macy# SATA specific 119eda14cbcSMatt Macy/Reallocated_Sector_Ct/{print "realloc="$10} 120eda14cbcSMatt Macy/Reported_Uncorrect/{print "rep_ucor="$10} 121eda14cbcSMatt Macy/Command_Timeout/{print "cmd_to="$10} 122eda14cbcSMatt Macy/Current_Pending_Sector/{print "pend_sec="$10} 123eda14cbcSMatt Macy/Offline_Uncorrectable/{print "off_ucor="$10} 124eda14cbcSMatt Macy/ATA Error Count:/{print "ata_err="$4} 125eda14cbcSMatt Macy/Power_Cycle_Count/{print "pwr_cyc="$10} 126eda14cbcSMatt Macy 127eda14cbcSMatt Macy# SATA common 128eda14cbcSMatt Macy/SATA/{type="sata"} 129eda14cbcSMatt Macy/Temperature_Celsius/{print "temp="$10} 130eda14cbcSMatt Macy/Airflow_Temperature_Cel/{print "temp="$10} 131eda14cbcSMatt Macy/Current Temperature:/{print "temp="$3} 132eda14cbcSMatt Macy/SMART overall-health self-assessment test result:/{print "health="$6} 133eda14cbcSMatt Macy/Power_On_Hours/{print "hours_on="$10; hours_on=int($10)} 134eda14cbcSMatt Macy/Serial Number:/{print "serial="$3} 135eda14cbcSMatt Macy 136eda14cbcSMatt Macy# NVMe common 137eda14cbcSMatt Macy/NVMe/{type="nvme"} 138eda14cbcSMatt Macy/Temperature:/{print "temp="$2} 139eda14cbcSMatt Macy/SMART overall-health self-assessment test result:/{print "health="$6} 140eda14cbcSMatt Macy/Power On Hours:/{gsub("[^0-9]","",$4); print "hours_on="$4} 141eda14cbcSMatt Macy/Serial Number:/{print "serial="$3} 142eda14cbcSMatt Macy/Power Cycles:/{print "pwr_cyc="$3} 143eda14cbcSMatt Macy 144eda14cbcSMatt Macy# NVMe specific 145eda14cbcSMatt Macy/Media and Data Integrity Errors:/{print "nvme_err="$6} 146eda14cbcSMatt Macy 147eda14cbcSMatt Macy# SMART self-test info 148eda14cbcSMatt Macy/Self-test execution status:/{progress=tolower($4)} # SAS 149eda14cbcSMatt Macy/SMART Self-test log/{test_seen=1} # SAS 150eda14cbcSMatt Macy/SMART Extended Self-test Log/{test_seen=1} # SATA 151eda14cbcSMatt Macy/# 1/{ 152eda14cbcSMatt Macy test_type=tolower($3"_"$4); 153eda14cbcSMatt Macy # Status could be one word ("Completed") or multiple ("Completed: read 154eda14cbcSMatt Macy # failure"). Look for the ":" to see if we need to grab more words. 155eda14cbcSMatt Macy 156eda14cbcSMatt Macy if ($5 ~ ":") 157eda14cbcSMatt Macy status=tolower($5""$6"_"$7) 158eda14cbcSMatt Macy else 159eda14cbcSMatt Macy status=tolower($5) 160eda14cbcSMatt Macy if (status=="self") 161eda14cbcSMatt Macy status="running"; 162eda14cbcSMatt Macy 163eda14cbcSMatt Macy if (type == "sas") { 164eda14cbcSMatt Macy hours=int($(NF-4)) 165eda14cbcSMatt Macy } else { 166eda14cbcSMatt Macy hours=int($(NF-1)) 167eda14cbcSMatt Macy # SATA reports percent remaining, rather than percent done 168eda14cbcSMatt Macy # Convert it to percent done. 169eda14cbcSMatt Macy progress=(100-int($(NF-2)))"%" 170eda14cbcSMatt Macy } 171eda14cbcSMatt Macy # When we int()-ify "hours", it converts stuff like "NOW" and "-" into 172eda14cbcSMatt Macy # 0. In those cases, set it to hours_on, so they will cancel out in 173eda14cbcSMatt Macy # the "hours_ago" calculation later on. 174eda14cbcSMatt Macy if (hours == 0) 175eda14cbcSMatt Macy hours=hours_on 176eda14cbcSMatt Macy 177eda14cbcSMatt Macy if (test_seen) { 178eda14cbcSMatt Macy print "test="hours_on 179eda14cbcSMatt Macy print "test_type="test_type 180eda14cbcSMatt Macy print "test_status="status 181eda14cbcSMatt Macy print "test_progress="progress 182eda14cbcSMatt Macy } 183eda14cbcSMatt Macy # Not all drives report hours_on 184eda14cbcSMatt Macy if (hours_on && hours) { 185eda14cbcSMatt Macy total_hours_ago=(hours_on-hours) 186eda14cbcSMatt Macy days_ago=int(total_hours_ago/24) 187eda14cbcSMatt Macy hours_ago=(total_hours_ago % 24) 188eda14cbcSMatt Macy if (days_ago != 0) 189eda14cbcSMatt Macy ago_str=days_ago"d" 190eda14cbcSMatt Macy if (hours_ago !=0) 191eda14cbcSMatt Macy ago_str=ago_str""hours_ago"h" 192eda14cbcSMatt Macy print "test_ended="ago_str 193eda14cbcSMatt Macy } 194eda14cbcSMatt Macy} 195eda14cbcSMatt Macy 196eda14cbcSMatt MacyEND {print "type="type; ORS="\n"; print ""} 197eda14cbcSMatt Macy'); 198eda14cbcSMatt Macyfi 199eda14cbcSMatt Macytype=$(echo "$out" | grep '^type=' | cut -d '=' -f 2) 200eda14cbcSMatt Macy 201eda14cbcSMatt Macy# If type is not set by now, either we don't have a block device 202eda14cbcSMatt Macy# or smartctl failed. Either way, default to ATA and set $out to 203eda14cbcSMatt Macy# nothing. 204eda14cbcSMatt Macyif [ -z "$type" ]; then 205eda14cbcSMatt Macy type="sata" 206eda14cbcSMatt Macy out= 207eda14cbcSMatt Macyfi 208eda14cbcSMatt Macy 209eda14cbcSMatt Macycase $script in 210eda14cbcSMatt Macysmart) 211eda14cbcSMatt Macy # Print temperature plus common predictors of drive failure 212eda14cbcSMatt Macy if [ "$type" = "sas" ] ; then 213eda14cbcSMatt Macy scripts="temp|health|r_ucor|w_ucor" 214eda14cbcSMatt Macy elif [ "$type" = "sata" ] ; then 215eda14cbcSMatt Macy scripts="temp|health|ata_err|realloc|rep_ucor|cmd_to|pend_sec|off_ucor" 216eda14cbcSMatt Macy elif [ "$type" = "nvme" ] ; then 217eda14cbcSMatt Macy scripts="temp|health|nvme_err" 218eda14cbcSMatt Macy fi 219eda14cbcSMatt Macy ;; 220eda14cbcSMatt Macysmartx) 221eda14cbcSMatt Macy # Print some other interesting stats 222eda14cbcSMatt Macy if [ "$type" = "sas" ] ; then 223eda14cbcSMatt Macy scripts="hours_on|defect|nonmed|r_proc|w_proc" 224eda14cbcSMatt Macy elif [ "$type" = "sata" ] ; then 225eda14cbcSMatt Macy scripts="hours_on|pwr_cyc" 226eda14cbcSMatt Macy elif [ "$type" = "nvme" ] ; then 227eda14cbcSMatt Macy scripts="hours_on|pwr_cyc" 228eda14cbcSMatt Macy fi 229eda14cbcSMatt Macy ;; 230eda14cbcSMatt Macysmart_test) 231eda14cbcSMatt Macy scripts="test_type|test_status|test_progress|test_ended" 232eda14cbcSMatt Macy ;; 233eda14cbcSMatt Macy*) 234eda14cbcSMatt Macy scripts="$script" 235eda14cbcSMatt Macyesac 236eda14cbcSMatt Macy 237eda14cbcSMatt Macywith_vals=$(echo "$out" | grep -E "$scripts") 238eda14cbcSMatt Macyif [ -n "$with_vals" ]; then 239eda14cbcSMatt Macy echo "$with_vals" 24016038816SMartin Matuska without_vals=$(echo "$scripts" | tr '|' '\n' | 241eda14cbcSMatt Macy grep -v -E "$(echo "$with_vals" | 242eda14cbcSMatt Macy awk -F "=" '{print $1}')" | awk '{print $0"="}') 243eda14cbcSMatt Macyelse 24416038816SMartin Matuska without_vals=$(echo "$scripts" | tr '|' '\n' | awk '{print $0"="}') 245eda14cbcSMatt Macyfi 246eda14cbcSMatt Macy 247eda14cbcSMatt Macyif [ -n "$without_vals" ]; then 248eda14cbcSMatt Macy echo "$without_vals" 249eda14cbcSMatt Macyfi 250