1*dd32d6b2SMartin Matuska#!/bin/sh 2*dd32d6b2SMartin Matuska# shellcheck disable=SC3014,SC2154,SC2086,SC2034 3*dd32d6b2SMartin Matuska# 4*dd32d6b2SMartin Matuska# Turn off disk's enclosure slot if an I/O is hung triggering the deadman. 5*dd32d6b2SMartin Matuska# 6*dd32d6b2SMartin Matuska# It's possible for outstanding I/O to a misbehaving SCSI disk to neither 7*dd32d6b2SMartin Matuska# promptly complete or return an error. This can occur due to retry and 8*dd32d6b2SMartin Matuska# recovery actions taken by the SCSI layer, driver, or disk. When it occurs 9*dd32d6b2SMartin Matuska# the pool will be unresponsive even though there may be sufficient redundancy 10*dd32d6b2SMartin Matuska# configured to proceeded without this single disk. 11*dd32d6b2SMartin Matuska# 12*dd32d6b2SMartin Matuska# When a hung I/O is detected by the kmods it will be posted as a deadman 13*dd32d6b2SMartin Matuska# event. By default an I/O is considered to be hung after 5 minutes. This 14*dd32d6b2SMartin Matuska# value can be changed with the zfs_deadman_ziotime_ms module parameter. 15*dd32d6b2SMartin Matuska# If ZED_POWER_OFF_ENCLOSURE_SLOT_ON_DEADMAN is set the disk's enclosure 16*dd32d6b2SMartin Matuska# slot will be powered off causing the outstanding I/O to fail. The ZED 17*dd32d6b2SMartin Matuska# will then handle this like a normal disk failure and FAULT the vdev. 18*dd32d6b2SMartin Matuska# 19*dd32d6b2SMartin Matuska# We assume the user will be responsible for turning the slot back on 20*dd32d6b2SMartin Matuska# after replacing the disk. 21*dd32d6b2SMartin Matuska# 22*dd32d6b2SMartin Matuska# Note that this script requires that your enclosure be supported by the 23*dd32d6b2SMartin Matuska# Linux SCSI Enclosure services (SES) driver. The script will do nothing 24*dd32d6b2SMartin Matuska# if you have no enclosure, or if your enclosure isn't supported. 25*dd32d6b2SMartin Matuska# 26*dd32d6b2SMartin Matuska# Exit codes: 27*dd32d6b2SMartin Matuska# 0: slot successfully powered off 28*dd32d6b2SMartin Matuska# 1: enclosure not available 29*dd32d6b2SMartin Matuska# 2: ZED_POWER_OFF_ENCLOSURE_SLOT_ON_DEADMAN disabled 30*dd32d6b2SMartin Matuska# 3: System not configured to wait on deadman 31*dd32d6b2SMartin Matuska# 4: The enclosure sysfs path passed from ZFS does not exist 32*dd32d6b2SMartin Matuska# 5: Enclosure slot didn't actually turn off after we told it to 33*dd32d6b2SMartin Matuska 34*dd32d6b2SMartin Matuska[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" 35*dd32d6b2SMartin Matuska. "${ZED_ZEDLET_DIR}/zed-functions.sh" 36*dd32d6b2SMartin Matuska 37*dd32d6b2SMartin Matuskaif [ ! -d /sys/class/enclosure ] ; then 38*dd32d6b2SMartin Matuska # No JBOD enclosure or NVMe slots 39*dd32d6b2SMartin Matuska exit 1 40*dd32d6b2SMartin Matuskafi 41*dd32d6b2SMartin Matuska 42*dd32d6b2SMartin Matuskaif [ "${ZED_POWER_OFF_ENCLOSURE_SLOT_ON_DEADMAN}" != "1" ] ; then 43*dd32d6b2SMartin Matuska exit 2 44*dd32d6b2SMartin Matuskafi 45*dd32d6b2SMartin Matuska 46*dd32d6b2SMartin Matuskaif [ "$ZEVENT_POOL_FAILMODE" != "wait" ] ; then 47*dd32d6b2SMartin Matuska exit 3 48*dd32d6b2SMartin Matuskafi 49*dd32d6b2SMartin Matuska 50*dd32d6b2SMartin Matuskaif [ ! -f "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status" ] ; then 51*dd32d6b2SMartin Matuska exit 4 52*dd32d6b2SMartin Matuskafi 53*dd32d6b2SMartin Matuska 54*dd32d6b2SMartin Matuska# Turn off the slot and wait for sysfs to report that the slot is off. 55*dd32d6b2SMartin Matuska# It can take ~400ms on some enclosures and multiple retries may be needed. 56*dd32d6b2SMartin Matuskafor i in $(seq 1 20) ; do 57*dd32d6b2SMartin Matuska echo "off" | tee "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status" 58*dd32d6b2SMartin Matuska 59*dd32d6b2SMartin Matuska for j in $(seq 1 5) ; do 60*dd32d6b2SMartin Matuska if [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" == "off" ] ; then 61*dd32d6b2SMartin Matuska break 2 62*dd32d6b2SMartin Matuska fi 63*dd32d6b2SMartin Matuska sleep 0.1 64*dd32d6b2SMartin Matuska done 65*dd32d6b2SMartin Matuskadone 66*dd32d6b2SMartin Matuska 67*dd32d6b2SMartin Matuskaif [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" != "off" ] ; then 68*dd32d6b2SMartin Matuska exit 5 69*dd32d6b2SMartin Matuskafi 70*dd32d6b2SMartin Matuska 71*dd32d6b2SMartin Matuskazed_log_msg "powered down slot $ZEVENT_VDEV_ENC_SYSFS_PATH for $ZEVENT_VDEV_PATH" 72