185d86c8aSOliver O'Halloran#!/bin/sh 285d86c8aSOliver O'Halloran# SPDX-License-Identifier: GPL-2.0-only 385d86c8aSOliver O'Halloran 4*996f9e0fSOliver O'HalloranKSELFTESTS_SKIP=4 5*996f9e0fSOliver O'Halloran 685d86c8aSOliver O'Halloran. ./eeh-functions.sh 785d86c8aSOliver O'Halloran 885d86c8aSOliver O'Halloranif ! eeh_supported ; then 985d86c8aSOliver O'Halloran echo "EEH not supported on this system, skipping" 10*996f9e0fSOliver O'Halloran exit $KSELFTESTS_SKIP; 1185d86c8aSOliver O'Halloranfi 1285d86c8aSOliver O'Halloran 1385d86c8aSOliver O'Halloranif [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \ 1485d86c8aSOliver O'Halloran [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then 1585d86c8aSOliver O'Halloran echo "debugfs EEH testing files are missing. Is debugfs mounted?" 16*996f9e0fSOliver O'Halloran exit $KSELFTESTS_SKIP; 1785d86c8aSOliver O'Halloranfi 1885d86c8aSOliver O'Halloran 1985d86c8aSOliver O'Halloranpre_lspci=`mktemp` 2085d86c8aSOliver O'Halloranlspci > $pre_lspci 2185d86c8aSOliver O'Halloran 2285d86c8aSOliver O'Halloran# Bump the max freeze count to something absurd so we don't 2385d86c8aSOliver O'Halloran# trip over it while breaking things. 2485d86c8aSOliver O'Halloranecho 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes 2585d86c8aSOliver O'Halloran 2685d86c8aSOliver O'Halloran# record the devices that we break in here. Assuming everything 2785d86c8aSOliver O'Halloran# goes to plan we should get them back once the recover process 2885d86c8aSOliver O'Halloran# is finished. 2985d86c8aSOliver O'Hallorandevices="" 3085d86c8aSOliver O'Halloran 3185d86c8aSOliver O'Halloran# Build up a list of candidate devices. 3285d86c8aSOliver O'Halloranfor dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do 3385d86c8aSOliver O'Halloran # skip bridges since we can't recover them (yet...) 3485d86c8aSOliver O'Halloran if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then 3585d86c8aSOliver O'Halloran echo "$dev, Skipped: bridge" 3685d86c8aSOliver O'Halloran continue; 3785d86c8aSOliver O'Halloran fi 3885d86c8aSOliver O'Halloran 3985d86c8aSOliver O'Halloran # Skip VFs for now since we don't have a reliable way 4085d86c8aSOliver O'Halloran # to break them. 4185d86c8aSOliver O'Halloran if [ -e "/sys/bus/pci/devices/$dev/physfn" ] ; then 4285d86c8aSOliver O'Halloran echo "$dev, Skipped: virtfn" 4385d86c8aSOliver O'Halloran continue; 4485d86c8aSOliver O'Halloran fi 4585d86c8aSOliver O'Halloran 46bbe9064fSMichael Ellerman if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then 47bbe9064fSMichael Ellerman echo "$dev, Skipped: ahci doesn't support recovery" 48bbe9064fSMichael Ellerman continue 49bbe9064fSMichael Ellerman fi 50bbe9064fSMichael Ellerman 5185d86c8aSOliver O'Halloran # Don't inject errosr into an already-frozen PE. This happens with 5285d86c8aSOliver O'Halloran # PEs that contain multiple PCI devices (e.g. multi-function cards) 5385d86c8aSOliver O'Halloran # and injecting new errors during the recovery process will probably 5485d86c8aSOliver O'Halloran # result in the recovery failing and the device being marked as 5585d86c8aSOliver O'Halloran # failed. 5685d86c8aSOliver O'Halloran if ! pe_ok $dev ; then 5785d86c8aSOliver O'Halloran echo "$dev, Skipped: Bad initial PE state" 5885d86c8aSOliver O'Halloran continue; 5985d86c8aSOliver O'Halloran fi 6085d86c8aSOliver O'Halloran 6185d86c8aSOliver O'Halloran echo "$dev, Added" 6285d86c8aSOliver O'Halloran 6385d86c8aSOliver O'Halloran # Add to this list of device to check 6485d86c8aSOliver O'Halloran devices="$devices $dev" 6585d86c8aSOliver O'Hallorandone 6685d86c8aSOliver O'Halloran 6785d86c8aSOliver O'Hallorandev_count="$(echo $devices | wc -w)" 6885d86c8aSOliver O'Halloranecho "Found ${dev_count} breakable devices..." 6985d86c8aSOliver O'Halloran 7085d86c8aSOliver O'Halloranfailed=0 7185d86c8aSOliver O'Halloranfor dev in $devices ; do 7285d86c8aSOliver O'Halloran echo "Breaking $dev..." 7385d86c8aSOliver O'Halloran 7485d86c8aSOliver O'Halloran if ! pe_ok $dev ; then 7585d86c8aSOliver O'Halloran echo "Skipping $dev, Initial PE state is not ok" 7685d86c8aSOliver O'Halloran failed="$((failed + 1))" 7785d86c8aSOliver O'Halloran continue; 7885d86c8aSOliver O'Halloran fi 7985d86c8aSOliver O'Halloran 8085d86c8aSOliver O'Halloran if ! eeh_one_dev $dev ; then 8185d86c8aSOliver O'Halloran failed="$((failed + 1))" 8285d86c8aSOliver O'Halloran fi 8385d86c8aSOliver O'Hallorandone 8485d86c8aSOliver O'Halloran 8585d86c8aSOliver O'Halloranecho "$failed devices failed to recover ($dev_count tested)" 8685d86c8aSOliver O'Halloranlspci | diff -u $pre_lspci - 8785d86c8aSOliver O'Halloranrm -f $pre_lspci 8885d86c8aSOliver O'Halloran 89*996f9e0fSOliver O'Hallorantest "$failed" == 0 90*996f9e0fSOliver O'Halloranexit $? 91