xref: /linux/tools/testing/selftests/powerpc/eeh/eeh-basic.sh (revision 85d86c8aa52eb5b3539eebe3adcc2f077118b412)
1*85d86c8aSOliver O'Halloran#!/bin/sh
2*85d86c8aSOliver O'Halloran# SPDX-License-Identifier: GPL-2.0-only
3*85d86c8aSOliver O'Halloran
4*85d86c8aSOliver O'Halloran. ./eeh-functions.sh
5*85d86c8aSOliver O'Halloran
6*85d86c8aSOliver O'Halloranif ! eeh_supported ; then
7*85d86c8aSOliver O'Halloran	echo "EEH not supported on this system, skipping"
8*85d86c8aSOliver O'Halloran	exit 0;
9*85d86c8aSOliver O'Halloranfi
10*85d86c8aSOliver O'Halloran
11*85d86c8aSOliver O'Halloranif [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \
12*85d86c8aSOliver O'Halloran   [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then
13*85d86c8aSOliver O'Halloran	echo "debugfs EEH testing files are missing. Is debugfs mounted?"
14*85d86c8aSOliver O'Halloran	exit 1;
15*85d86c8aSOliver O'Halloranfi
16*85d86c8aSOliver O'Halloran
17*85d86c8aSOliver O'Halloranpre_lspci=`mktemp`
18*85d86c8aSOliver O'Halloranlspci > $pre_lspci
19*85d86c8aSOliver O'Halloran
20*85d86c8aSOliver O'Halloran# Bump the max freeze count to something absurd so we don't
21*85d86c8aSOliver O'Halloran# trip over it while breaking things.
22*85d86c8aSOliver O'Halloranecho 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes
23*85d86c8aSOliver O'Halloran
24*85d86c8aSOliver O'Halloran# record the devices that we break in here. Assuming everything
25*85d86c8aSOliver O'Halloran# goes to plan we should get them back once the recover process
26*85d86c8aSOliver O'Halloran# is finished.
27*85d86c8aSOliver O'Hallorandevices=""
28*85d86c8aSOliver O'Halloran
29*85d86c8aSOliver O'Halloran# Build up a list of candidate devices.
30*85d86c8aSOliver O'Halloranfor dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do
31*85d86c8aSOliver O'Halloran	# skip bridges since we can't recover them (yet...)
32*85d86c8aSOliver O'Halloran	if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then
33*85d86c8aSOliver O'Halloran		echo "$dev, Skipped: bridge"
34*85d86c8aSOliver O'Halloran		continue;
35*85d86c8aSOliver O'Halloran	fi
36*85d86c8aSOliver O'Halloran
37*85d86c8aSOliver O'Halloran	# Skip VFs for now since we don't have a reliable way
38*85d86c8aSOliver O'Halloran	# to break them.
39*85d86c8aSOliver O'Halloran	if [ -e "/sys/bus/pci/devices/$dev/physfn" ] ; then
40*85d86c8aSOliver O'Halloran		echo "$dev, Skipped: virtfn"
41*85d86c8aSOliver O'Halloran		continue;
42*85d86c8aSOliver O'Halloran	fi
43*85d86c8aSOliver O'Halloran
44*85d86c8aSOliver O'Halloran	# Don't inject errosr into an already-frozen PE. This happens with
45*85d86c8aSOliver O'Halloran	# PEs that contain multiple PCI devices (e.g. multi-function cards)
46*85d86c8aSOliver O'Halloran	# and injecting new errors during the recovery process will probably
47*85d86c8aSOliver O'Halloran	# result in the recovery failing and the device being marked as
48*85d86c8aSOliver O'Halloran	# failed.
49*85d86c8aSOliver O'Halloran	if ! pe_ok $dev ; then
50*85d86c8aSOliver O'Halloran		echo "$dev, Skipped: Bad initial PE state"
51*85d86c8aSOliver O'Halloran		continue;
52*85d86c8aSOliver O'Halloran	fi
53*85d86c8aSOliver O'Halloran
54*85d86c8aSOliver O'Halloran	echo "$dev, Added"
55*85d86c8aSOliver O'Halloran
56*85d86c8aSOliver O'Halloran	# Add to this list of device to check
57*85d86c8aSOliver O'Halloran	devices="$devices $dev"
58*85d86c8aSOliver O'Hallorandone
59*85d86c8aSOliver O'Halloran
60*85d86c8aSOliver O'Hallorandev_count="$(echo $devices | wc -w)"
61*85d86c8aSOliver O'Halloranecho "Found ${dev_count} breakable devices..."
62*85d86c8aSOliver O'Halloran
63*85d86c8aSOliver O'Halloranfailed=0
64*85d86c8aSOliver O'Halloranfor dev in $devices ; do
65*85d86c8aSOliver O'Halloran	echo "Breaking $dev..."
66*85d86c8aSOliver O'Halloran
67*85d86c8aSOliver O'Halloran	if ! pe_ok $dev ; then
68*85d86c8aSOliver O'Halloran		echo "Skipping $dev, Initial PE state is not ok"
69*85d86c8aSOliver O'Halloran		failed="$((failed + 1))"
70*85d86c8aSOliver O'Halloran		continue;
71*85d86c8aSOliver O'Halloran	fi
72*85d86c8aSOliver O'Halloran
73*85d86c8aSOliver O'Halloran	if ! eeh_one_dev $dev ; then
74*85d86c8aSOliver O'Halloran		failed="$((failed + 1))"
75*85d86c8aSOliver O'Halloran	fi
76*85d86c8aSOliver O'Hallorandone
77*85d86c8aSOliver O'Halloran
78*85d86c8aSOliver O'Halloranecho "$failed devices failed to recover ($dev_count tested)"
79*85d86c8aSOliver O'Halloranlspci | diff -u $pre_lspci -
80*85d86c8aSOliver O'Halloranrm -f $pre_lspci
81*85d86c8aSOliver O'Halloran
82*85d86c8aSOliver O'Halloranexit $failed
83