xref: /linux/tools/testing/selftests/powerpc/eeh/eeh-basic.sh (revision 996f9e0f93f16211945c8d5f18f296a88cb32f91)
185d86c8aSOliver O'Halloran#!/bin/sh
285d86c8aSOliver O'Halloran# SPDX-License-Identifier: GPL-2.0-only
385d86c8aSOliver O'Halloran
4*996f9e0fSOliver O'HalloranKSELFTESTS_SKIP=4
5*996f9e0fSOliver O'Halloran
685d86c8aSOliver O'Halloran. ./eeh-functions.sh
785d86c8aSOliver O'Halloran
885d86c8aSOliver O'Halloranif ! eeh_supported ; then
985d86c8aSOliver O'Halloran	echo "EEH not supported on this system, skipping"
10*996f9e0fSOliver O'Halloran	exit $KSELFTESTS_SKIP;
1185d86c8aSOliver O'Halloranfi
1285d86c8aSOliver O'Halloran
1385d86c8aSOliver O'Halloranif [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \
1485d86c8aSOliver O'Halloran   [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then
1585d86c8aSOliver O'Halloran	echo "debugfs EEH testing files are missing. Is debugfs mounted?"
16*996f9e0fSOliver O'Halloran	exit $KSELFTESTS_SKIP;
1785d86c8aSOliver O'Halloranfi
1885d86c8aSOliver O'Halloran
1985d86c8aSOliver O'Halloranpre_lspci=`mktemp`
2085d86c8aSOliver O'Halloranlspci > $pre_lspci
2185d86c8aSOliver O'Halloran
2285d86c8aSOliver O'Halloran# Bump the max freeze count to something absurd so we don't
2385d86c8aSOliver O'Halloran# trip over it while breaking things.
2485d86c8aSOliver O'Halloranecho 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes
2585d86c8aSOliver O'Halloran
2685d86c8aSOliver O'Halloran# record the devices that we break in here. Assuming everything
2785d86c8aSOliver O'Halloran# goes to plan we should get them back once the recover process
2885d86c8aSOliver O'Halloran# is finished.
2985d86c8aSOliver O'Hallorandevices=""
3085d86c8aSOliver O'Halloran
3185d86c8aSOliver O'Halloran# Build up a list of candidate devices.
3285d86c8aSOliver O'Halloranfor dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do
3385d86c8aSOliver O'Halloran	# skip bridges since we can't recover them (yet...)
3485d86c8aSOliver O'Halloran	if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then
3585d86c8aSOliver O'Halloran		echo "$dev, Skipped: bridge"
3685d86c8aSOliver O'Halloran		continue;
3785d86c8aSOliver O'Halloran	fi
3885d86c8aSOliver O'Halloran
3985d86c8aSOliver O'Halloran	# Skip VFs for now since we don't have a reliable way
4085d86c8aSOliver O'Halloran	# to break them.
4185d86c8aSOliver O'Halloran	if [ -e "/sys/bus/pci/devices/$dev/physfn" ] ; then
4285d86c8aSOliver O'Halloran		echo "$dev, Skipped: virtfn"
4385d86c8aSOliver O'Halloran		continue;
4485d86c8aSOliver O'Halloran	fi
4585d86c8aSOliver O'Halloran
46bbe9064fSMichael Ellerman	if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then
47bbe9064fSMichael Ellerman		echo "$dev, Skipped: ahci doesn't support recovery"
48bbe9064fSMichael Ellerman		continue
49bbe9064fSMichael Ellerman	fi
50bbe9064fSMichael Ellerman
5185d86c8aSOliver O'Halloran	# Don't inject errosr into an already-frozen PE. This happens with
5285d86c8aSOliver O'Halloran	# PEs that contain multiple PCI devices (e.g. multi-function cards)
5385d86c8aSOliver O'Halloran	# and injecting new errors during the recovery process will probably
5485d86c8aSOliver O'Halloran	# result in the recovery failing and the device being marked as
5585d86c8aSOliver O'Halloran	# failed.
5685d86c8aSOliver O'Halloran	if ! pe_ok $dev ; then
5785d86c8aSOliver O'Halloran		echo "$dev, Skipped: Bad initial PE state"
5885d86c8aSOliver O'Halloran		continue;
5985d86c8aSOliver O'Halloran	fi
6085d86c8aSOliver O'Halloran
6185d86c8aSOliver O'Halloran	echo "$dev, Added"
6285d86c8aSOliver O'Halloran
6385d86c8aSOliver O'Halloran	# Add to this list of device to check
6485d86c8aSOliver O'Halloran	devices="$devices $dev"
6585d86c8aSOliver O'Hallorandone
6685d86c8aSOliver O'Halloran
6785d86c8aSOliver O'Hallorandev_count="$(echo $devices | wc -w)"
6885d86c8aSOliver O'Halloranecho "Found ${dev_count} breakable devices..."
6985d86c8aSOliver O'Halloran
7085d86c8aSOliver O'Halloranfailed=0
7185d86c8aSOliver O'Halloranfor dev in $devices ; do
7285d86c8aSOliver O'Halloran	echo "Breaking $dev..."
7385d86c8aSOliver O'Halloran
7485d86c8aSOliver O'Halloran	if ! pe_ok $dev ; then
7585d86c8aSOliver O'Halloran		echo "Skipping $dev, Initial PE state is not ok"
7685d86c8aSOliver O'Halloran		failed="$((failed + 1))"
7785d86c8aSOliver O'Halloran		continue;
7885d86c8aSOliver O'Halloran	fi
7985d86c8aSOliver O'Halloran
8085d86c8aSOliver O'Halloran	if ! eeh_one_dev $dev ; then
8185d86c8aSOliver O'Halloran		failed="$((failed + 1))"
8285d86c8aSOliver O'Halloran	fi
8385d86c8aSOliver O'Hallorandone
8485d86c8aSOliver O'Halloran
8585d86c8aSOliver O'Halloranecho "$failed devices failed to recover ($dev_count tested)"
8685d86c8aSOliver O'Halloranlspci | diff -u $pre_lspci -
8785d86c8aSOliver O'Halloranrm -f $pre_lspci
8885d86c8aSOliver O'Halloran
89*996f9e0fSOliver O'Hallorantest "$failed" == 0
90*996f9e0fSOliver O'Halloranexit $?
91