xref: /titanic_41/usr/src/cmd/lvm/rpc.metamhd/mhd_failfast.c (revision 4eaa471005973e11a6110b69fe990530b3b95a38)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 1994, 2000 by Sun Microsystems, Inc.
24  * All rights reserved.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include "mhd_local.h"
30 
31 #include <stropts.h>
32 #include "ff.h"
33 
34 /*
35  * manipulate failfast driver
36  */
37 
38 /*
39  * disarm failfast
40  */
41 int
42 mhd_ff_disarm(
43 	mhd_drive_set_t	*sp,
44 	mhd_error_t	*mhep
45 )
46 {
47 	struct strioctl	si;
48 
49 	MHDPRINTF1(("%s: disarm\n", sp->sr_name));
50 
51 	/* check locks */
52 	assert(MUTEX_HELD(&sp->sr_mx));
53 
54 	/* ignore not open */
55 	if (sp->sr_ff < 0)
56 		return (0);
57 
58 	/* disarm any existing failfast */
59 	(void) memset(&si, 0, sizeof (si));
60 	si.ic_cmd = FAILFAST_DISARM;
61 	si.ic_timout = INFTIM;
62 	if (ioctl(sp->sr_ff, I_STR, &si) != 0)
63 		return (mhd_error(mhep, errno, "/dev/ff"));
64 
65 	/* return success */
66 	return (0);
67 }
68 
69 /*
70  * open failfast
71  */
72 int
73 mhd_ff_open(
74 	mhd_drive_set_t	*sp,
75 	mhd_error_t	*mhep
76 )
77 {
78 	struct strioctl	si;
79 
80 	/* check locks */
81 	assert(MUTEX_HELD(&sp->sr_mx));
82 	assert((sp->sr_ff_mode == MHD_FF_DEBUG) ||
83 	    (sp->sr_ff_mode == MHD_FF_HALT) ||
84 	    (sp->sr_ff_mode == MHD_FF_PANIC));
85 
86 	/* open if not already */
87 	if ((sp->sr_ff < 0) &&
88 	    ((sp->sr_ff = open("/dev/ff", O_RDWR, 0)) < 0)) {
89 		return (mhd_error(mhep, errno, "/dev/ff"));
90 	}
91 
92 	/* disarm any existing failfast */
93 	if (mhd_ff_disarm(sp, mhep) != 0)
94 		return (-1);
95 
96 	/* load setname */
97 	(void) memset(&si, 0, sizeof (si));
98 	si.ic_cmd = FAILFAST_SETNAME;
99 	si.ic_timout = INFTIM;
100 	si.ic_len = strlen(sp->sr_name);
101 	si.ic_dp = sp->sr_name;
102 	if (ioctl(sp->sr_ff, I_STR, &si) != 0)
103 		return (mhd_error(mhep, errno, "/dev/ff"));
104 
105 	/* load failfast mode */
106 	(void) memset(&si, 0, sizeof (si));
107 	switch (sp->sr_ff_mode) {
108 	case MHD_FF_DEBUG:
109 		si.ic_cmd = FAILFAST_DEBUG_MODE;
110 		break;
111 	case MHD_FF_HALT:
112 		si.ic_cmd = FAILFAST_HALT_MODE;
113 		break;
114 	default:
115 		assert(0);
116 		/* FALLTHROUGH */
117 	case MHD_FF_PANIC:
118 		si.ic_cmd = FAILFAST_PANIC_MODE;
119 		break;
120 	}
121 	si.ic_timout = INFTIM;
122 	if (ioctl(sp->sr_ff, I_STR, &si) != 0)
123 		return (mhd_error(mhep, errno, "/dev/ff"));
124 
125 	/* return success */
126 	return (0);
127 }
128 
129 /*
130  * close failfast
131  */
132 int
133 mhd_ff_close(
134 	mhd_drive_set_t	*sp,
135 	mhd_error_t	*mhep
136 )
137 {
138 	int		rval = 0;
139 
140 	/* check locks */
141 	assert(MUTEX_HELD(&sp->sr_mx));
142 
143 	/* ignore not open */
144 	if (sp->sr_ff < 0)
145 		return (0);
146 
147 	/* disarm any existing failfast */
148 	if (mhd_ff_disarm(sp, mhep) != 0)
149 		rval = -1;
150 
151 	/* close device */
152 	if (close(sp->sr_ff) != 0)
153 		rval = mhd_error(mhep, errno, "/dev/ff");
154 	sp->sr_ff = -1;
155 
156 	/* return success */
157 	return (rval);
158 }
159 
160 /*
161  * reset failfast
162  */
163 int
164 mhd_ff_rearm(
165 	mhd_drive_set_t	*sp,
166 	mhd_error_t	*mhep
167 )
168 {
169 	uint_t		ff = sp->sr_timeouts.mh_ff;
170 	struct strioctl	si;
171 
172 	MHDPRINTF1(("%s: rearm\n", sp->sr_name));
173 
174 	/* check locks */
175 	assert(MUTEX_HELD(&sp->sr_mx));
176 	assert(sp->sr_ff >= 0);
177 
178 	/* if timeout is 0, disarm */
179 	if (ff == 0)
180 		return (mhd_ff_disarm(sp, mhep));
181 
182 	/* rearm failfast */
183 	(void) memset(&si, 0, sizeof (si));
184 	si.ic_cmd = FAILFAST_ARM;
185 	si.ic_timout = INFTIM;
186 	si.ic_len = sizeof (ff);
187 	si.ic_dp = (char *)&ff;
188 	if (ioctl(sp->sr_ff, I_STR, &si) != 0)
189 		return (mhd_error(mhep, errno, "/dev/ff"));
190 
191 	/* return success */
192 	return (0);
193 }
194 
195 /*
196  * die right now
197  */
198 void
199 mhd_ff_die(
200 	mhd_drive_set_t	*sp
201 )
202 {
203 	uint_t		ff = 0;
204 	struct strioctl	si;
205 
206 	MHDPRINTF(("%s: die\n", sp->sr_name));
207 
208 	/* check locks */
209 	assert(MUTEX_HELD(&sp->sr_mx));
210 	assert(sp->sr_ff >= 0);
211 
212 	/* rearm failfast for now */
213 	(void) memset(&si, 0, sizeof (si));
214 	si.ic_cmd = FAILFAST_ARM;
215 	si.ic_timout = INFTIM;
216 	si.ic_len = sizeof (ff);
217 	si.ic_dp = (char *)&ff;
218 	if (ioctl(sp->sr_ff, I_STR, &si) != 0)
219 		mhd_perror("/dev/ff");
220 }
221 
222 /*
223  * check set and reset failfast
224  */
225 void
226 mhd_ff_check(
227 	mhd_drive_set_t		*sp
228 )
229 {
230 	mhd_drive_list_t	*dlp = &sp->sr_drives;
231 	mhd_msec_t		ff = sp->sr_timeouts.mh_ff;
232 	mhd_msec_t		now = mhd_time();
233 	uint_t			i, ok, cnt;
234 
235 	/* check locks */
236 	assert(MUTEX_HELD(&sp->sr_mx));
237 	assert(sp->sr_ff >= 0);
238 	assert((sp->sr_ff_mode == MHD_FF_DEBUG) ||
239 	    (sp->sr_ff_mode == MHD_FF_HALT) ||
240 	    (sp->sr_ff_mode == MHD_FF_PANIC));
241 
242 	/* see how many drives are within alloted time */
243 	for (ok = cnt = 0, i = 0; (i < dlp->dl_ndrive); ++i) {
244 		mhd_drive_t	*dp = dlp->dl_drives[i];
245 
246 		if (dp->dr_state != DRIVE_PROBING)
247 			continue;
248 		++cnt;
249 
250 		MHDPRINTF2(("%s: now %llu dr_time %llu diff %llu ff %llu\n",
251 		    dp->dr_rname, now, dp->dr_time, (now - dp->dr_time), ff));
252 		if ((now - dp->dr_time) <= ff)
253 			++ok;
254 	}
255 
256 	/* check for majority */
257 	if ((cnt == 0) || (ok >= ((cnt / 2) + 1))) {
258 		mhd_error_t	status = mhd_null_error;
259 
260 		if (mhd_ff_rearm(sp, &status) == 0)
261 			return;
262 		mhd_clrerror(&status);
263 	}
264 
265 	/* die */
266 	mhd_eprintf("%s: failed majority cnt %d ok %d\n",
267 	    sp->sr_name, cnt, ok);
268 	mhd_ff_die(sp);
269 }
270