1*a8da474eSDaniel Axtens#!/bin/sh 2*a8da474eSDaniel Axtens# 3*a8da474eSDaniel Axtens# Copyright 2015, Daniel Axtens, IBM Corporation 4*a8da474eSDaniel Axtens# 5*a8da474eSDaniel Axtens# This program is free software; you can redistribute it and/or modify 6*a8da474eSDaniel Axtens# it under the terms of the GNU General Public License as published by 7*a8da474eSDaniel Axtens# the Free Software Foundation; version 2 of the License. 8*a8da474eSDaniel Axtens# 9*a8da474eSDaniel Axtens# This program is distributed in the hope that it will be useful, 10*a8da474eSDaniel Axtens# but WITHOUT ANY WARRANTY; without even the implied warranty of 11*a8da474eSDaniel Axtens# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12*a8da474eSDaniel Axtens# GNU General Public License for more details. 13*a8da474eSDaniel Axtens 14*a8da474eSDaniel Axtens 15*a8da474eSDaniel Axtens# do we have ./getscom, ./putscom? 16*a8da474eSDaniel Axtensif [ -x ./getscom ] && [ -x ./putscom ]; then 17*a8da474eSDaniel Axtens GETSCOM=./getscom 18*a8da474eSDaniel Axtens PUTSCOM=./putscom 19*a8da474eSDaniel Axtenselif which getscom > /dev/null; then 20*a8da474eSDaniel Axtens GETSCOM=$(which getscom) 21*a8da474eSDaniel Axtens PUTSCOM=$(which putscom) 22*a8da474eSDaniel Axtenselse 23*a8da474eSDaniel Axtens cat <<EOF 24*a8da474eSDaniel AxtensCan't find getscom/putscom in . or \$PATH. 25*a8da474eSDaniel AxtensSee https://github.com/open-power/skiboot. 26*a8da474eSDaniel AxtensThe tool is in external/xscom-utils 27*a8da474eSDaniel AxtensEOF 28*a8da474eSDaniel Axtens exit 1 29*a8da474eSDaniel Axtensfi 30*a8da474eSDaniel Axtens 31*a8da474eSDaniel Axtens# We will get 8 HMI events per injection 32*a8da474eSDaniel Axtens# todo: deal with things being offline 33*a8da474eSDaniel Axtensexpected_hmis=8 34*a8da474eSDaniel AxtensCOUNT_HMIS() { 35*a8da474eSDaniel Axtens dmesg | grep -c 'Harmless Hypervisor Maintenance interrupt' 36*a8da474eSDaniel Axtens} 37*a8da474eSDaniel Axtens 38*a8da474eSDaniel Axtens# massively expand snooze delay, allowing injection on all cores 39*a8da474eSDaniel Axtensppc64_cpu --smt-snooze-delay=1000000000 40*a8da474eSDaniel Axtens 41*a8da474eSDaniel Axtens# when we exit, restore it 42*a8da474eSDaniel Axtenstrap "ppc64_cpu --smt-snooze-delay=100" 0 1 43*a8da474eSDaniel Axtens 44*a8da474eSDaniel Axtens# for each chip+core combination 45*a8da474eSDaniel Axtens# todo - less fragile parsing 46*a8da474eSDaniel Axtensegrep -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog | 47*a8da474eSDaniel Axtenswhile read chipcore; do 48*a8da474eSDaniel Axtens chip=$(echo "$chipcore"|awk '{print $3}') 49*a8da474eSDaniel Axtens core=$(echo "$chipcore"|awk '{print $5}') 50*a8da474eSDaniel Axtens fir="0x1${core}013100" 51*a8da474eSDaniel Axtens 52*a8da474eSDaniel Axtens # verify that Core FIR is zero as expected 53*a8da474eSDaniel Axtens if [ "$($GETSCOM -c 0x${chip} $fir)" != 0 ]; then 54*a8da474eSDaniel Axtens echo "FIR was not zero before injection for chip $chip, core $core. Aborting!" 55*a8da474eSDaniel Axtens echo "Result of $GETSCOM -c 0x${chip} $fir:" 56*a8da474eSDaniel Axtens $GETSCOM -c 0x${chip} $fir 57*a8da474eSDaniel Axtens echo "If you get a -5 error, the core may be in idle state. Try stress-ng." 58*a8da474eSDaniel Axtens echo "Otherwise, try $PUTSCOM -c 0x${chip} $fir 0" 59*a8da474eSDaniel Axtens exit 1 60*a8da474eSDaniel Axtens fi 61*a8da474eSDaniel Axtens 62*a8da474eSDaniel Axtens # keep track of the number of HMIs handled 63*a8da474eSDaniel Axtens old_hmis=$(COUNT_HMIS) 64*a8da474eSDaniel Axtens 65*a8da474eSDaniel Axtens # do injection, adding a marker to dmesg for clarity 66*a8da474eSDaniel Axtens echo "Injecting HMI on core $core, chip $chip" | tee /dev/kmsg 67*a8da474eSDaniel Axtens # inject a RegFile recoverable error 68*a8da474eSDaniel Axtens if ! $PUTSCOM -c 0x${chip} $fir 2000000000000000 > /dev/null; then 69*a8da474eSDaniel Axtens echo "Error injecting. Aborting!" 70*a8da474eSDaniel Axtens exit 1 71*a8da474eSDaniel Axtens fi 72*a8da474eSDaniel Axtens 73*a8da474eSDaniel Axtens # now we want to wait for all the HMIs to be processed 74*a8da474eSDaniel Axtens # we expect one per thread on the core 75*a8da474eSDaniel Axtens i=0; 76*a8da474eSDaniel Axtens new_hmis=$(COUNT_HMIS) 77*a8da474eSDaniel Axtens while [ $new_hmis -lt $((old_hmis + expected_hmis)) ] && [ $i -lt 12 ]; do 78*a8da474eSDaniel Axtens echo "Seen $((new_hmis - old_hmis)) HMI(s) out of $expected_hmis expected, sleeping" 79*a8da474eSDaniel Axtens sleep 5; 80*a8da474eSDaniel Axtens i=$((i + 1)) 81*a8da474eSDaniel Axtens new_hmis=$(COUNT_HMIS) 82*a8da474eSDaniel Axtens done 83*a8da474eSDaniel Axtens if [ $i = 12 ]; then 84*a8da474eSDaniel Axtens echo "Haven't seen expected $expected_hmis recoveries after 1 min. Aborting." 85*a8da474eSDaniel Axtens exit 1 86*a8da474eSDaniel Axtens fi 87*a8da474eSDaniel Axtens echo "Processed $expected_hmis events; presumed success. Check dmesg." 88*a8da474eSDaniel Axtens echo "" 89*a8da474eSDaniel Axtensdone 90