xref: /titanic_52/usr/src/cmd/fm/eversholt/files/common/disk.esc (revision e58a33b62cd4c9a6815fd752ce58b5f389289da1)
124db4641Seschrock/*
224db4641Seschrock * CDDL HEADER START
324db4641Seschrock *
424db4641Seschrock * The contents of this file are subject to the terms of the
524db4641Seschrock * Common Development and Distribution License (the "License").
624db4641Seschrock * You may not use this file except in compliance with the License.
724db4641Seschrock *
824db4641Seschrock * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
924db4641Seschrock * or http://www.opensolaris.org/os/licensing.
1024db4641Seschrock * See the License for the specific language governing permissions
1124db4641Seschrock * and limitations under the License.
1224db4641Seschrock *
1324db4641Seschrock * When distributing Covered Code, include this CDDL HEADER in each
1424db4641Seschrock * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1524db4641Seschrock * If applicable, add the following below this CDDL HEADER, with the
1624db4641Seschrock * fields enclosed by brackets "[]" replaced with your own identifying
1724db4641Seschrock * information: Portions Copyright [yyyy] [name of copyright owner]
1824db4641Seschrock *
1924db4641Seschrock * CDDL HEADER END
2024db4641Seschrock */
2124db4641Seschrock/*
22*e58a33b6SStephen Hanson * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
2324db4641Seschrock */
2424db4641Seschrock
2524db4641Seschrock#pragma dictionary "DISK"
2624db4641Seschrock
2724db4641Seschrock#define	P			disk
2824db4641Seschrock
2924db4641Seschrockfru P;
3024db4641Seschrockasru P;
3124db4641Seschrock
3224db4641Seschrock/*
335dc9a986SDavid Zhang * Over all comments for this file:
345dc9a986SDavid Zhang * <disk-as-detector> The disk-as-detector DE provides the mapping between
359e1c849eSDavid Zhang - Sun Microsystems - Beijing China * ereports generated by a kernel disk driver sd(7D) and resulting faults.
369e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
379e1c849eSDavid Zhang - Sun Microsystems - Beijing China
389e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
395dc9a986SDavid Zhang * SERD engine for media error fault propagation:
405dc9a986SDavid Zhang *
415dc9a986SDavid Zhang * This strategy is designed to give a file system, like ZFS, the
425dc9a986SDavid Zhang * ability to attempt data recovery/relocation without faulting a disk.
435dc9a986SDavid Zhang * This implementation depends on a file system retry to the same lba
445dc9a986SDavid Zhang * to trigger a fault when recovery/relocation is not possible.
455dc9a986SDavid Zhang *
465dc9a986SDavid Zhang * We let the engine propagate one error only once every 1 minute and then if we
475dc9a986SDavid Zhang * still get 2 or more * errors within 24 hours for the same LBA, there is a fault.
485dc9a986SDavid Zhang */
495dc9a986SDavid Zhangengine serd.io.scsi.cmd.disk.dev.rqs.merr@P, N=1, T=24h;
505dc9a986SDavid Zhang
515dc9a986SDavid Zhang/*
529e1c849eSDavid Zhang - Sun Microsystems - Beijing China * disk-as-detector: fault events.
539e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
549e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent fault.io.scsi.cmd.disk.dev.rqs.derr@P;
555dc9a986SDavid Zhangevent fault.io.scsi.cmd.disk.dev.rqs.merr@P,
565dc9a986SDavid Zhang    engine=serd.io.scsi.cmd.disk.dev.rqs.merr@P;
575dc9a986SDavid Zhang
589e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
599e1c849eSDavid Zhang - Sun Microsystems - Beijing China * The uderr fault will be defined at some future time.
609e1c849eSDavid Zhang - Sun Microsystems - Beijing China * event fault.io.scsi.cmd.disk.dev.uderr@P;
619e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
629e1c849eSDavid Zhang - Sun Microsystems - Beijing China
639e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
649e1c849eSDavid Zhang - Sun Microsystems - Beijing China * disk-as-detector: upset events.
659e1c849eSDavid Zhang - Sun Microsystems - Beijing China * NOTE: For now we define an upset to implement discard.
669e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
679e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent upset.io.scsi.cmd.disk.dev.rqs.derr@P;
689e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent upset.io.scsi.cmd.disk.dev.rqs.merr@P;
699e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent upset.io.scsi.cmd.disk.dev.uderr@P;
709e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent upset.io.scsi.cmd.disk.dev.serr@P;
719e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent upset.io.scsi.cmd.disk.tran@P;
729e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent upset.io.scsi.cmd.disk.recovered@P;
739e1c849eSDavid Zhang - Sun Microsystems - Beijing China
749e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
759e1c849eSDavid Zhang - Sun Microsystems - Beijing China * disk-as-detector: ereports from the kernel.
769e1c849eSDavid Zhang - Sun Microsystems - Beijing China *
779e1c849eSDavid Zhang - Sun Microsystems - Beijing China * We don't know the topology for all scsi disks, but the kernel will always
789e1c849eSDavid Zhang - Sun Microsystems - Beijing China * generate ereport telemetry assuming that we do. We define these ereports
799e1c849eSDavid Zhang - Sun Microsystems - Beijing China * with 'discard_if_config_unknown=1', which permits ereports against things
809e1c849eSDavid Zhang - Sun Microsystems - Beijing China * with unknown topology to be silently discarded.  The ereport data is logged
819e1c849eSDavid Zhang - Sun Microsystems - Beijing China * in either case, and can be viewed via 'fmdump -eV'.
829e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
839e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent ereport.io.scsi.cmd.disk.dev.rqs.derr@P, discard_if_config_unknown=1;
849e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent ereport.io.scsi.cmd.disk.dev.rqs.merr@P, discard_if_config_unknown=1;
859e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent ereport.io.scsi.cmd.disk.dev.serr@P, discard_if_config_unknown=1;
869e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent ereport.io.scsi.cmd.disk.dev.uderr@P, discard_if_config_unknown=1;
879e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent ereport.io.scsi.cmd.disk.recovered@P, discard_if_config_unknown=1;
889e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent ereport.io.scsi.cmd.disk.tran@P, discard_if_config_unknown=1;
899e1c849eSDavid Zhang - Sun Microsystems - Beijing China
909e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
919e1c849eSDavid Zhang - Sun Microsystems - Beijing China * For some ereports we let the 'driver-assessment', communicated as part of
929e1c849eSDavid Zhang - Sun Microsystems - Beijing China * the ereport payload, determine fault .vs. upset via propagation constraints.
939e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
949e1c849eSDavid Zhang - Sun Microsystems - Beijing China#define DRIVER_ASSESSMENT_FATAL		\
959e1c849eSDavid Zhang - Sun Microsystems - Beijing China	    (payloadprop_contains("driver-assessment", "fatal"))
969e1c849eSDavid Zhang - Sun Microsystems - Beijing China#define DRIVER_ASSESSMENT_NONFATAL	(!DRIVER_ASSESSMENT_FATAL)
979e1c849eSDavid Zhang - Sun Microsystems - Beijing China
989e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
999e1c849eSDavid Zhang - Sun Microsystems - Beijing China * disk-as-detector: propagations from faults(based on
1009e1c849eSDavid Zhang - Sun Microsystems - Beijing China * DRIVER_ASSESSMENT_FATAL).
1019e1c849eSDavid Zhang - Sun Microsystems - Beijing China * We need to set additional fault payloads to indicate fault details.
1029e1c849eSDavid Zhang - Sun Microsystems - Beijing China * The payload we may need are listed as following:
1039e1c849eSDavid Zhang - Sun Microsystems - Beijing China * fault.io.scsi.cmd.disk.dev.rqs.derr
1049e1c849eSDavid Zhang - Sun Microsystems - Beijing China *     op_code, key, asc, ascq
1059e1c849eSDavid Zhang - Sun Microsystems - Beijing China * fault.io.scsi.cmd.disk.dev.rqs.merr
1069e1c849eSDavid Zhang - Sun Microsystems - Beijing China *     op_code, key, asc, ascq, lba
1079e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
1089e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaprop fault.io.scsi.cmd.disk.dev.rqs.derr@P->
1099e1c849eSDavid Zhang - Sun Microsystems - Beijing China    ereport.io.scsi.cmd.disk.dev.rqs.derr@P{ DRIVER_ASSESSMENT_FATAL &&
1109e1c849eSDavid Zhang - Sun Microsystems - Beijing China    setpayloadprop("key", payloadprop("key")) &&
1119e1c849eSDavid Zhang - Sun Microsystems - Beijing China    setpayloadprop("asc", payloadprop("asc")) &&
1129e1c849eSDavid Zhang - Sun Microsystems - Beijing China    setpayloadprop("ascq", payloadprop("ascq"))};
1139e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1145dc9a986SDavid Zhang/*
1155dc9a986SDavid Zhang * Utilize setserdsuffix with specific LBA,
1165dc9a986SDavid Zhang * the serd engine would only trigger if the fault recurred on the same LBA
1175dc9a986SDavid Zhang */
1189e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaprop fault.io.scsi.cmd.disk.dev.rqs.merr@P->
1199e1c849eSDavid Zhang - Sun Microsystems - Beijing China    ereport.io.scsi.cmd.disk.dev.rqs.merr@P{ DRIVER_ASSESSMENT_FATAL &&
1205dc9a986SDavid Zhang    setserdsuffix(payloadprop("lba")) &&
1219e1c849eSDavid Zhang - Sun Microsystems - Beijing China    setpayloadprop("key", payloadprop("key")) &&
1229e1c849eSDavid Zhang - Sun Microsystems - Beijing China    setpayloadprop("asc", payloadprop("asc")) &&
1239e1c849eSDavid Zhang - Sun Microsystems - Beijing China    setpayloadprop("ascq", payloadprop("ascq")) &&
1249e1c849eSDavid Zhang - Sun Microsystems - Beijing China    setpayloadprop("lba", payloadprop("lba"))};
1259e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1269e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
1275dc9a986SDavid Zhang * NOTE: this propagation uses the "may" propagation of eversholt.
1285dc9a986SDavid Zhang * The ereport need never exist. It's just a way of making
1295dc9a986SDavid Zhang * the diagnosis wait for the within time on that ereport
1305dc9a986SDavid Zhang * to complete. Once it has completed the diagnosis continues
1315dc9a986SDavid Zhang * even though the dummy ereport didn't occur.
1325dc9a986SDavid Zhang */
1335dc9a986SDavid Zhangevent ereport.io.scsi.cmd.disk.dev.rqs.merr.dummy@P {within(60s)};
1345dc9a986SDavid Zhangprop fault.io.scsi.cmd.disk.dev.rqs.merr@P (0) ->
1355dc9a986SDavid Zhang	ereport.io.scsi.cmd.disk.dev.rqs.merr.dummy@P;
1365dc9a986SDavid Zhang
1375dc9a986SDavid Zhang/*
1389e1c849eSDavid Zhang - Sun Microsystems - Beijing China * The uderr fault will be propagated at some future time.
1399e1c849eSDavid Zhang - Sun Microsystems - Beijing China * prop fault.io.scsi.cmd.disk.dev.uderr@P->
1409e1c849eSDavid Zhang - Sun Microsystems - Beijing China *     ereport.io.scsi.cmd.disk.dev.uderr@P{ DRIVER_ASSESSMENT_FATAL };
1419e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
1429e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1439e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
1449e1c849eSDavid Zhang - Sun Microsystems - Beijing China * disk-as-detector: propagations from upsets(based on
1459e1c849eSDavid Zhang - Sun Microsystems - Beijing China * DRIVER_ASSESSMENT_NONFATAL).
1469e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
1479e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaprop upset.io.scsi.cmd.disk.dev.rqs.derr@P->
1489e1c849eSDavid Zhang - Sun Microsystems - Beijing China    ereport.io.scsi.cmd.disk.dev.rqs.derr@P{ DRIVER_ASSESSMENT_NONFATAL };
1499e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1509e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaprop upset.io.scsi.cmd.disk.dev.rqs.merr@P->
1519e1c849eSDavid Zhang - Sun Microsystems - Beijing China    ereport.io.scsi.cmd.disk.dev.rqs.merr@P{ DRIVER_ASSESSMENT_NONFATAL };
1529e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1539e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
1549e1c849eSDavid Zhang - Sun Microsystems - Beijing China * disk-as-detector: propagations from upsets(independent of
1559e1c849eSDavid Zhang - Sun Microsystems - Beijing China * driver-assessment)
1569e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
1579e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1589e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaprop upset.io.scsi.cmd.disk.dev.serr@P->
1599e1c849eSDavid Zhang - Sun Microsystems - Beijing China    ereport.io.scsi.cmd.disk.dev.serr@P;
1609e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1619e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaprop upset.io.scsi.cmd.disk.dev.uderr@P->
1629e1c849eSDavid Zhang - Sun Microsystems - Beijing China    ereport.io.scsi.cmd.disk.dev.uderr@P;
1639e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1649e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaprop upset.io.scsi.cmd.disk.recovered@P->
1659e1c849eSDavid Zhang - Sun Microsystems - Beijing China    ereport.io.scsi.cmd.disk.recovered@P;
1669e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1679e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaprop upset.io.scsi.cmd.disk.tran@P->
1689e1c849eSDavid Zhang - Sun Microsystems - Beijing China    ereport.io.scsi.cmd.disk.tran@P;
1699e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1709e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
1719e1c849eSDavid Zhang - Sun Microsystems - Beijing China * --------------------------------------
1729e1c849eSDavid Zhang - Sun Microsystems - Beijing China * The remainder of this file contains rules associated with the operation of
1739e1c849eSDavid Zhang - Sun Microsystems - Beijing China * cmd/fm/modules/common/disk-monitor/disk_monitor.c code.
1749e1c849eSDavid Zhang - Sun Microsystems - Beijing China *
1759e1c849eSDavid Zhang - Sun Microsystems - Beijing China * The disk DE provides a very simple 1-to-1 mapping between SCSI disk events
1769e1c849eSDavid Zhang - Sun Microsystems - Beijing China * generated by the disk-transport fmd module, and the resulting faults.
1779e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
1789e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1799e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
18024db4641Seschrock * Fault events.
18124db4641Seschrock */
18224db4641Seschrockevent fault.io.disk.over-temperature@P,
18324db4641Seschrock    FITrate=10, FRU=P, ASRU=P;
18424db4641Seschrockevent fault.io.disk.predictive-failure@P, FITrate=10,
18524db4641Seschrock    FITrate=10, FRU=P, ASRU=P;
18624db4641Seschrockevent fault.io.disk.self-test-failure@P, FITrate=10,
18724db4641Seschrock    FITrate=10, FRU=P, ASRU=P;
18824db4641Seschrock
18924db4641Seschrock/*
19024db4641Seschrock * ereports.
19124db4641Seschrock */
19224db4641Seschrockevent ereport.io.scsi.disk.over-temperature@P;
19324db4641Seschrockevent ereport.io.scsi.disk.predictive-failure@P;
19424db4641Seschrockevent ereport.io.scsi.disk.self-test-failure@P;
19524db4641Seschrock
19624db4641Seschrock/*
19724db4641Seschrock * Propagations.
19824db4641Seschrock */
19924db4641Seschrockprop fault.io.disk.over-temperature@P ->
20024db4641Seschrock    ereport.io.scsi.disk.over-temperature@P;
20124db4641Seschrock
20224db4641Seschrockprop fault.io.disk.self-test-failure@P ->
20324db4641Seschrock    ereport.io.scsi.disk.self-test-failure@P;
20424db4641Seschrock
20524db4641Seschrockprop fault.io.disk.predictive-failure@P ->
206*e58a33b6SStephen Hanson    ereport.io.scsi.disk.predictive-failure@P {
207*e58a33b6SStephen Hanson    setpayloadprop("asc", payloadprop("additional-sense-code")) &&
208*e58a33b6SStephen Hanson    setpayloadprop("ascq", payloadprop("additional-sense-code-qualifier")) };
209