xref: /freebsd/sys/contrib/openzfs/cmd/zpool/zpool.d/smart (revision 716fd348e01c5f2ba125f878a634a753436c2994)
1eda14cbcSMatt Macy#!/bin/sh
2eda14cbcSMatt Macy#
3eda14cbcSMatt Macy# Show SMART stats
4eda14cbcSMatt Macy#
5eda14cbcSMatt Macy
6eda14cbcSMatt Macyhelpstr="
7eda14cbcSMatt Macysmart:		Show SMART temperature and error stats (specific to drive type)
8eda14cbcSMatt Macysmartx:		Show SMART extended drive stats (specific to drive type).
9eda14cbcSMatt Macytemp:		Show SMART drive temperature in celsius (all drives).
10eda14cbcSMatt Macyhealth:		Show reported SMART status (all drives).
11eda14cbcSMatt Macyr_proc:		Show SMART read GBytes processed over drive lifetime (SAS).
12eda14cbcSMatt Macyw_proc:		Show SMART write GBytes processed over drive lifetime (SAS).
13eda14cbcSMatt Macyr_ucor:		Show SMART read uncorrectable errors (SAS).
14eda14cbcSMatt Macyw_ucor:		Show SMART write uncorrectable errors (SAS).
15eda14cbcSMatt Macynonmed:		Show SMART non-medium errors (SAS).
16eda14cbcSMatt Macydefect:		Show SMART grown defect list (SAS).
17eda14cbcSMatt Macyhours_on:	Show number of hours drive powered on (all drives).
18eda14cbcSMatt Macyrealloc:	Show SMART reallocated sectors count (ATA).
19eda14cbcSMatt Macyrep_ucor:	Show SMART reported uncorrectable count (ATA).
20eda14cbcSMatt Macycmd_to:		Show SMART command timeout count (ATA).
21eda14cbcSMatt Macypend_sec:	Show SMART current pending sector count (ATA).
22eda14cbcSMatt Macyoff_ucor:	Show SMART offline uncorrectable errors (ATA).
23eda14cbcSMatt Macyata_err:	Show SMART ATA errors (ATA).
24eda14cbcSMatt Macypwr_cyc:	Show SMART power cycle count (ATA).
25eda14cbcSMatt Macyserial:		Show disk serial number.
26eda14cbcSMatt Macynvme_err:	Show SMART NVMe errors (NVMe).
27eda14cbcSMatt Macysmart_test:	Show SMART self-test results summary.
28eda14cbcSMatt Macytest_type:	Show SMART self-test type (short, long... ).
29eda14cbcSMatt Macytest_status:	Show SMART self-test status.
30eda14cbcSMatt Macytest_progress:	Show SMART self-test percentage done.
31eda14cbcSMatt Macytest_ended:	Show when the last SMART self-test ended (if supported).
32eda14cbcSMatt Macy"
33eda14cbcSMatt Macy
34eda14cbcSMatt Macy# Hack for developer testing
35eda14cbcSMatt Macy#
36eda14cbcSMatt Macy# If you set $samples to a directory containing smartctl output text files,
37eda14cbcSMatt Macy# we will use them instead of running smartctl on the vdevs.  This can be
38eda14cbcSMatt Macy# useful if you want to test a bunch of different smartctl outputs.  Also, if
39eda14cbcSMatt Macy# $samples is set, and additional 'file' column is added to the zpool output
40eda14cbcSMatt Macy# showing the filename.
41eda14cbcSMatt Macysamples=
42eda14cbcSMatt Macy
43eda14cbcSMatt Macy# get_filename_from_dir DIR
44eda14cbcSMatt Macy#
45eda14cbcSMatt Macy# Look in directory DIR and return a filename from it.  The filename returned
46eda14cbcSMatt Macy# is chosen quasi-sequentially (based off our PID).  This allows us to return
47eda14cbcSMatt Macy# a different filename every time this script is invoked (which we do for each
48eda14cbcSMatt Macy# vdev), without having to maintain state.
49eda14cbcSMatt Macyget_filename_from_dir()
50eda14cbcSMatt Macy{
51eda14cbcSMatt Macy	dir=$1
52eda14cbcSMatt Macy	pid="$$"
53eda14cbcSMatt Macy	num_files=$(find "$dir" -maxdepth 1 -type f | wc -l)
54eda14cbcSMatt Macy	mod=$((pid % num_files))
55eda14cbcSMatt Macy	i=0
5616038816SMartin Matuska	find "$dir" -type f -printf '%f\n' | while read -r file ; do
57eda14cbcSMatt Macy		if [ "$mod" = "$i" ] ; then
58eda14cbcSMatt Macy			echo "$file"
59eda14cbcSMatt Macy			break
60eda14cbcSMatt Macy		fi
61eda14cbcSMatt Macy		i=$((i+1))
62eda14cbcSMatt Macy	done
63eda14cbcSMatt Macy}
64eda14cbcSMatt Macy
6516038816SMartin Matuskascript="${0##*/}"
66eda14cbcSMatt Macy
67eda14cbcSMatt Macyif [ "$1" = "-h" ] ; then
68eda14cbcSMatt Macy        echo "$helpstr" | grep "$script:" | tr -s '\t' | cut -f 2-
69eda14cbcSMatt Macy        exit
70eda14cbcSMatt Macyfi
71eda14cbcSMatt Macy
72*716fd348SMartin Matuska# Sometimes, UPATH ends up /dev/(null).
73*716fd348SMartin Matuska# That should be corrected, but for now...
74e92ffd9bSMartin Matuska# shellcheck disable=SC2154
75*716fd348SMartin Matuskaif [ ! -b "$VDEV_UPATH" ]; then
76*716fd348SMartin Matuska	somepath="${VDEV_PATH}"
77*716fd348SMartin Matuskaelse
78*716fd348SMartin Matuska	somepath="${VDEV_UPATH}"
79*716fd348SMartin Matuskafi
80*716fd348SMartin Matuska
81*716fd348SMartin Matuskaif [ -b "$somepath" ] && PATH="/usr/sbin:$PATH" command -v smartctl > /dev/null || [ -n "$samples" ] ; then
82eda14cbcSMatt Macy	if [ -n "$samples" ] ; then
83eda14cbcSMatt Macy		# cat a smartctl output text file instead of running smartctl
84eda14cbcSMatt Macy		# on a vdev (only used for developer testing).
85eda14cbcSMatt Macy		file=$(get_filename_from_dir "$samples")
86eda14cbcSMatt Macy		echo "file=$file"
87eda14cbcSMatt Macy		raw_out=$(cat "$samples/$file")
88eda14cbcSMatt Macy	else
89*716fd348SMartin Matuska		raw_out=$(sudo smartctl -a "$somepath")
90eda14cbcSMatt Macy	fi
91eda14cbcSMatt Macy
92eda14cbcSMatt Macy	# What kind of drive are we?  Look for the right line in smartctl:
93eda14cbcSMatt Macy	#
94eda14cbcSMatt Macy	# SAS:
95eda14cbcSMatt Macy	#	Transport protocol:   SAS
96eda14cbcSMatt Macy	#
97eda14cbcSMatt Macy	# SATA:
98eda14cbcSMatt Macy	#	ATA Version is:   8
99eda14cbcSMatt Macy	#
100eda14cbcSMatt Macy	# NVMe:
101eda14cbcSMatt Macy	#       SMART/Health Information (NVMe Log 0xnn, NSID 0xnn)
102eda14cbcSMatt Macy	#
103eda14cbcSMatt Macy	out=$(echo "$raw_out" | awk '
104eda14cbcSMatt Macy# SAS specific
105eda14cbcSMatt Macy/read:/{print "rrd="$4"\nr_cor="$5"\nr_proc="$7"\nr_ucor="$8}
106eda14cbcSMatt Macy/write:/{print "rwr="$4"\nw_cor="$5"\nw_proc="$7"\nw_ucor="$8}
107eda14cbcSMatt Macy/Non-medium error count/{print "nonmed="$4}
108eda14cbcSMatt Macy/Elements in grown defect list/{print "defect="$6}
109eda14cbcSMatt Macy
110eda14cbcSMatt Macy# SAS common
111eda14cbcSMatt Macy/SAS/{type="sas"}
112eda14cbcSMatt Macy/Drive Temperature:/{print "temp="$4}
113eda14cbcSMatt Macy# Status can be a long string, substitute spaces for '_'
114eda14cbcSMatt Macy/SMART Health Status:/{printf "health="; for(i=4;i<=NF-1;i++){printf "%s_", $i}; printf "%s\n", $i}
115eda14cbcSMatt Macy/number of hours powered up/{print "hours_on="$7; hours_on=int($7)}
116eda14cbcSMatt Macy/Serial number:/{print "serial="$3}
117eda14cbcSMatt Macy
118eda14cbcSMatt Macy# SATA specific
119eda14cbcSMatt Macy/Reallocated_Sector_Ct/{print "realloc="$10}
120eda14cbcSMatt Macy/Reported_Uncorrect/{print "rep_ucor="$10}
121eda14cbcSMatt Macy/Command_Timeout/{print "cmd_to="$10}
122eda14cbcSMatt Macy/Current_Pending_Sector/{print "pend_sec="$10}
123eda14cbcSMatt Macy/Offline_Uncorrectable/{print "off_ucor="$10}
124eda14cbcSMatt Macy/ATA Error Count:/{print "ata_err="$4}
125eda14cbcSMatt Macy/Power_Cycle_Count/{print "pwr_cyc="$10}
126eda14cbcSMatt Macy
127eda14cbcSMatt Macy# SATA common
128eda14cbcSMatt Macy/SATA/{type="sata"}
129eda14cbcSMatt Macy/Temperature_Celsius/{print "temp="$10}
130eda14cbcSMatt Macy/Airflow_Temperature_Cel/{print "temp="$10}
131eda14cbcSMatt Macy/Current Temperature:/{print "temp="$3}
132eda14cbcSMatt Macy/SMART overall-health self-assessment test result:/{print "health="$6}
133eda14cbcSMatt Macy/Power_On_Hours/{print "hours_on="$10; hours_on=int($10)}
134eda14cbcSMatt Macy/Serial Number:/{print "serial="$3}
135eda14cbcSMatt Macy
136eda14cbcSMatt Macy# NVMe common
137eda14cbcSMatt Macy/NVMe/{type="nvme"}
138eda14cbcSMatt Macy/Temperature:/{print "temp="$2}
139eda14cbcSMatt Macy/SMART overall-health self-assessment test result:/{print "health="$6}
140eda14cbcSMatt Macy/Power On Hours:/{gsub("[^0-9]","",$4); print "hours_on="$4}
141eda14cbcSMatt Macy/Serial Number:/{print "serial="$3}
142eda14cbcSMatt Macy/Power Cycles:/{print "pwr_cyc="$3}
143eda14cbcSMatt Macy
144eda14cbcSMatt Macy# NVMe specific
145eda14cbcSMatt Macy/Media and Data Integrity Errors:/{print "nvme_err="$6}
146eda14cbcSMatt Macy
147eda14cbcSMatt Macy# SMART self-test info
148eda14cbcSMatt Macy/Self-test execution status:/{progress=tolower($4)} # SAS
149eda14cbcSMatt Macy/SMART Self-test log/{test_seen=1} # SAS
150eda14cbcSMatt Macy/SMART Extended Self-test Log/{test_seen=1} # SATA
151eda14cbcSMatt Macy/# 1/{
152eda14cbcSMatt Macy	test_type=tolower($3"_"$4);
153eda14cbcSMatt Macy	# Status could be one word ("Completed") or multiple ("Completed: read
154eda14cbcSMatt Macy	# failure").  Look for the ":" to see if we need to grab more words.
155eda14cbcSMatt Macy
156eda14cbcSMatt Macy	if ($5 ~ ":")
157eda14cbcSMatt Macy		status=tolower($5""$6"_"$7)
158eda14cbcSMatt Macy	else
159eda14cbcSMatt Macy		status=tolower($5)
160eda14cbcSMatt Macy	if (status=="self")
161eda14cbcSMatt Macy		status="running";
162eda14cbcSMatt Macy
163eda14cbcSMatt Macy	if (type == "sas") {
164eda14cbcSMatt Macy		hours=int($(NF-4))
165eda14cbcSMatt Macy	} else {
166eda14cbcSMatt Macy		hours=int($(NF-1))
167eda14cbcSMatt Macy		# SATA reports percent remaining, rather than percent done
168eda14cbcSMatt Macy		# Convert it to percent done.
169eda14cbcSMatt Macy		progress=(100-int($(NF-2)))"%"
170eda14cbcSMatt Macy	}
171eda14cbcSMatt Macy	# When we int()-ify "hours", it converts stuff like "NOW" and "-" into
172eda14cbcSMatt Macy	# 0.  In those cases, set it to hours_on, so they will cancel out in
173eda14cbcSMatt Macy	# the "hours_ago" calculation later on.
174eda14cbcSMatt Macy	if (hours == 0)
175eda14cbcSMatt Macy		hours=hours_on
176eda14cbcSMatt Macy
177eda14cbcSMatt Macy	if (test_seen) {
178eda14cbcSMatt Macy		print "test="hours_on
179eda14cbcSMatt Macy		print "test_type="test_type
180eda14cbcSMatt Macy		print "test_status="status
181eda14cbcSMatt Macy		print "test_progress="progress
182eda14cbcSMatt Macy	}
183eda14cbcSMatt Macy	# Not all drives report hours_on
184eda14cbcSMatt Macy	if (hours_on && hours) {
185eda14cbcSMatt Macy		total_hours_ago=(hours_on-hours)
186eda14cbcSMatt Macy		days_ago=int(total_hours_ago/24)
187eda14cbcSMatt Macy		hours_ago=(total_hours_ago % 24)
188eda14cbcSMatt Macy		if (days_ago != 0)
189eda14cbcSMatt Macy			ago_str=days_ago"d"
190eda14cbcSMatt Macy		if (hours_ago !=0)
191eda14cbcSMatt Macy			ago_str=ago_str""hours_ago"h"
192eda14cbcSMatt Macy		print "test_ended="ago_str
193eda14cbcSMatt Macy	}
194eda14cbcSMatt Macy}
195eda14cbcSMatt Macy
196eda14cbcSMatt MacyEND {print "type="type; ORS="\n"; print ""}
197eda14cbcSMatt Macy');
198eda14cbcSMatt Macyfi
199eda14cbcSMatt Macytype=$(echo "$out" | grep '^type=' | cut -d '=' -f 2)
200eda14cbcSMatt Macy
201eda14cbcSMatt Macy# If type is not set by now, either we don't have a block device
202eda14cbcSMatt Macy# or smartctl failed. Either way, default to ATA and set $out to
203eda14cbcSMatt Macy# nothing.
204eda14cbcSMatt Macyif [ -z "$type" ]; then
205eda14cbcSMatt Macy	type="sata"
206eda14cbcSMatt Macy	out=
207eda14cbcSMatt Macyfi
208eda14cbcSMatt Macy
209eda14cbcSMatt Macycase $script in
210eda14cbcSMatt Macysmart)
211eda14cbcSMatt Macy	# Print temperature plus common predictors of drive failure
212eda14cbcSMatt Macy	if [ "$type" = "sas" ] ; then
213eda14cbcSMatt Macy		scripts="temp|health|r_ucor|w_ucor"
214eda14cbcSMatt Macy	elif [ "$type" = "sata" ] ; then
215eda14cbcSMatt Macy		scripts="temp|health|ata_err|realloc|rep_ucor|cmd_to|pend_sec|off_ucor"
216eda14cbcSMatt Macy	elif [ "$type" = "nvme" ] ; then
217eda14cbcSMatt Macy		scripts="temp|health|nvme_err"
218eda14cbcSMatt Macy	fi
219eda14cbcSMatt Macy	;;
220eda14cbcSMatt Macysmartx)
221eda14cbcSMatt Macy	# Print some other interesting stats
222eda14cbcSMatt Macy	if [ "$type" = "sas" ] ; then
223eda14cbcSMatt Macy		scripts="hours_on|defect|nonmed|r_proc|w_proc"
224eda14cbcSMatt Macy	elif [ "$type" = "sata" ] ; then
225eda14cbcSMatt Macy		scripts="hours_on|pwr_cyc"
226eda14cbcSMatt Macy	elif [ "$type" = "nvme" ] ; then
227eda14cbcSMatt Macy		scripts="hours_on|pwr_cyc"
228eda14cbcSMatt Macy	fi
229eda14cbcSMatt Macy	;;
230eda14cbcSMatt Macysmart_test)
231eda14cbcSMatt Macy	scripts="test_type|test_status|test_progress|test_ended"
232eda14cbcSMatt Macy	;;
233eda14cbcSMatt Macy*)
234eda14cbcSMatt Macy	scripts="$script"
235eda14cbcSMatt Macyesac
236eda14cbcSMatt Macy
237eda14cbcSMatt Macywith_vals=$(echo "$out" | grep -E "$scripts")
238eda14cbcSMatt Macyif [ -n "$with_vals" ]; then
239eda14cbcSMatt Macy	echo "$with_vals"
24016038816SMartin Matuska	without_vals=$(echo "$scripts" | tr '|' '\n' |
241eda14cbcSMatt Macy		grep -v -E "$(echo "$with_vals" |
242eda14cbcSMatt Macy		awk -F "=" '{print $1}')" | awk '{print $0"="}')
243eda14cbcSMatt Macyelse
24416038816SMartin Matuska	without_vals=$(echo "$scripts" | tr '|' '\n' | awk '{print $0"="}')
245eda14cbcSMatt Macyfi
246eda14cbcSMatt Macy
247eda14cbcSMatt Macyif [ -n "$without_vals" ]; then
248eda14cbcSMatt Macy	echo "$without_vals"
249eda14cbcSMatt Macyfi
250