xref: /titanic_41/usr/src/lib/lvm/libmeta/common/meta_mn_handlers.c (revision bf85a12b7c81d0745d5a8aff65baeff50006cde9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <stdlib.h>
28 #include <unistd.h>
29 #include <wait.h>
30 #include <sys/time.h>
31 #include <syslog.h>
32 
33 #include <meta.h>
34 #include <sys/lvm/mdio.h>
35 #include <sys/lvm/md_mddb.h>
36 #include <sys/lvm/md_mirror.h>
37 
38 #define	MAX_N_ARGS 64
39 #define	MAX_ARG_LEN 1024
40 #define	MAX_SLEEPS 99
41 #define	SLEEP_MOD 5
42 
43 /* we reserve 1024 bytes for stdout and the same for stderr */
44 #define	MAX_OUT	1024
45 #define	MAX_ERR	1024
46 #define	JUNK 128 /* used to flush stdout and stderr */
47 
48 
49 /*ARGSUSED*/
50 void
mdmn_do_cmd(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)51 mdmn_do_cmd(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
52 {
53 
54 	/*
55 	 * We are given one string containing all the arguments
56 	 * For execvp() we have to regenerate the arguments again
57 	 */
58 	int	arg;		/* argument that is currently been built */
59 	int	index;		/* runs through arg above */
60 	int	i;		/* helper for for loop */
61 	char	*argv[MAX_N_ARGS]; /* argument array for execvp */
62 	char	*cp;		/* runs through the given command line string */
63 	char	*command = NULL; /* the command we call locally */
64 	int	pout[2];	/* pipe for stdout */
65 	int	perr[2];	/* pipe for stderr */
66 	pid_t	pid;		/* process id */
67 
68 	cp	= msg->msg_event_data;
69 	arg	= 0;
70 	index	= 0;
71 
72 	/* init the args array alloc the first one and null out the rest */
73 	argv[0] = Malloc(MAX_ARG_LEN);
74 	for (i = 1; i < MAX_N_ARGS; i++) {
75 		argv[i] = NULL;
76 	}
77 
78 	resp->mmr_comm_state	= MDMNE_ACK; /* Ok state */;
79 
80 	while (*cp != '\0') {
81 		if (arg == MAX_N_ARGS) {
82 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
83 			    "PANIC: too many arguments specified\n"));
84 			resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
85 			goto out;
86 		}
87 		if (index == MAX_ARG_LEN) {
88 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
89 			    "PANIC: argument too long\n"));
90 			resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
91 			goto out;
92 		}
93 
94 		if ((*cp != ' ') && (*cp != '\t')) {
95 			/*
96 			 * No space or tab: copy char into current
97 			 * argv and advance both pointers
98 			 */
99 
100 			argv[arg][index] = *cp;
101 			cp++;	/* next char in command line	*/
102 			index++;	/* next char in argument	*/
103 		} else {
104 			/*
105 			 * space or tab: terminate current argv,
106 			 * advance arg, reset pointer into arg,
107 			 * advance pointer in command line
108 			 */
109 			argv[arg][index] = '\0';
110 			arg++; /* next argument */
111 			argv[arg] = Malloc(MAX_ARG_LEN);
112 			cp++; /* next char in command line */
113 			index = 0; /* starts at char 0 */
114 		}
115 	}
116 	/* terminate the last real argument */
117 	argv[arg][index] = '\0';
118 	/* the last argument is an NULL pointer */
119 	argv[++arg] = NULL;
120 	if (pipe(pout) < 0)  {
121 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
122 		    "PANIC: pipe failed\n"));
123 		resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
124 		goto out;
125 	}
126 	if (pipe(perr) < 0) {
127 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
128 		    "PANIC: pipe failed\n"));
129 		(void) close(pout[0]);
130 		(void) close(pout[1]);
131 		resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
132 		goto out;
133 	}
134 	command = Strdup(argv[0]);
135 	(void) strcat(argv[0], ".rpc_call");
136 	pid = fork1();
137 	if (pid == (pid_t)-1) {
138 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
139 		    "PANIC: fork failed\n"));
140 		resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
141 		(void) close(pout[0]);
142 		(void) close(pout[1]);
143 		(void) close(perr[0]);
144 		(void) close(perr[1]);
145 		goto out;
146 	} else  if (pid == (pid_t)0) {
147 		/* child */
148 		(void) close(0);
149 		/* close the reading channels of pout and perr */
150 		(void) close(pout[0]);
151 		(void) close(perr[0]);
152 		/* redirect stdout */
153 		if (dup2(pout[1], 1) < 0) {
154 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
155 			    "PANIC: dup2 failed\n"));
156 			resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
157 			return;
158 		}
159 
160 		/* redirect stderr */
161 		if (dup2(perr[1], 2) < 0) {
162 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
163 			    "PANIC: dup2 failed\n"));
164 			resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
165 			return;
166 		}
167 
168 		(void) execvp(command, (char *const *)argv);
169 		perror("execvp");
170 		_exit(1);
171 	} else {
172 		/* parent process */
173 		int stat_loc;
174 		char *out, *err; /* for stdout and stderr of child */
175 		int i; /* index into the aboves */
176 		char junk[JUNK];
177 		int out_done = 0;
178 		int err_done = 0;
179 		int out_read = 0;
180 		int err_read = 0;
181 		int maxfd;
182 		fd_set	rset;
183 
184 
185 		/* close the writing channels of pout and perr */
186 		(void) close(pout[1]);
187 		(void) close(perr[1]);
188 		resp->mmr_out = Malloc(MAX_OUT);
189 		resp->mmr_err = Malloc(MAX_ERR);
190 		resp->mmr_out_size = MAX_OUT;
191 		resp->mmr_err_size = MAX_ERR;
192 		out = resp->mmr_out;
193 		err = resp->mmr_err;
194 		FD_ZERO(&rset);
195 		while ((out_done == 0) || (err_done == 0)) {
196 			FD_SET(pout[0], &rset);
197 			FD_SET(perr[0], &rset);
198 			maxfd = max(pout[0], perr[0]) + 1;
199 			(void) select(maxfd, &rset, NULL, NULL, NULL);
200 
201 			/*
202 			 * Did the child produce some output to stdout?
203 			 * If so, read it until we either reach the end of the
204 			 * output or until we read MAX_OUT bytes.
205 			 * Whatever comes first.
206 			 * In case we already read MAX_OUT bytes we simply
207 			 * read away the output into a junk buffer.
208 			 * Just to make the child happy
209 			 */
210 			if (FD_ISSET(pout[0], &rset)) {
211 				if (MAX_OUT - out_read - 1 > 0) {
212 					i = read(pout[0], out,
213 					    MAX_OUT - out_read);
214 					out_read += i;
215 					out += i;
216 				} else {
217 					/* buffer full, empty stdout */
218 					i = read(pout[0], junk, JUNK);
219 				}
220 				if (i == 0) {
221 					/* stdout is closed by child */
222 					out_done++;
223 				}
224 			}
225 			/* same comment as above | sed -e 's/stdout/stderr/' */
226 			if (FD_ISSET(perr[0], &rset)) {
227 				if (MAX_ERR - err_read - 1 > 0) {
228 					i = read(perr[0], err,
229 					    MAX_ERR - err_read);
230 					err_read += i;
231 					err += i;
232 				} else {
233 					/* buffer full, empty stderr */
234 					i = read(perr[0], junk, JUNK);
235 				}
236 				if (i == 0) {
237 					/* stderr is closed by child */
238 					err_done++;
239 				}
240 			}
241 		}
242 		resp->mmr_out[out_read] = '\0';
243 		resp->mmr_err[err_read] = '\0';
244 
245 		while (waitpid(pid, &stat_loc, 0) < 0) {
246 			if (errno != EINTR) {
247 				resp->mmr_comm_state = MDMNE_HANDLER_FAILED;
248 				break;
249 			}
250 		}
251 		if (errno == 0)
252 			resp->mmr_exitval = WEXITSTATUS(stat_loc);
253 
254 		(void) close(pout[0]);
255 		(void) close(perr[0]);
256 	}
257 out:
258 	for (i = 0; i < MAX_N_ARGS; i++) {
259 		if (argv[i] != NULL) {
260 			free(argv[i]);
261 		}
262 	}
263 	if (command != NULL) {
264 		Free(command);
265 	}
266 }
267 
268 /*
269  * This is for checking if a metadevice is opened, and for
270  * locking in case it is not and for
271  * unlocking a locked device
272  */
273 /*ARGSUSED*/
274 void
mdmn_do_clu(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)275 mdmn_do_clu(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
276 {
277 	if (msg->msg_type == MD_MN_MSG_CLU_CHECK) {
278 		md_isopen_t	*d;
279 		int		ret;
280 
281 		resp->mmr_comm_state = MDMNE_ACK; /* Ok state */;
282 		resp->mmr_out_size = 0;
283 		resp->mmr_err_size = 0;
284 		resp->mmr_out = NULL;
285 		resp->mmr_err = NULL;
286 		d = (md_isopen_t *)(void *)msg->msg_event_data;
287 		ret = metaioctl(MD_IOCISOPEN, d, &(d->mde), NULL);
288 		/*
289 		 * In case the ioctl succeeded, return the open state of
290 		 * the metadevice. Otherwise we return the error the ioctl
291 		 * produced. As this is not zero, no attempt is made to
292 		 * remove/rename the metadevice later
293 		 */
294 
295 		if (ret == 0) {
296 			resp->mmr_exitval = d->isopen;
297 		} else {
298 			/*
299 			 * When doing a metaclear, one node after the other
300 			 * does the two steps:
301 			 * - check on all nodes if this md is opened.
302 			 * - remove the md locally.
303 			 * When the 2nd node asks all nodes if the md is
304 			 * open it starts with the first node.
305 			 * As this already removed the md, the check
306 			 * returns MDE_UNIT_NOT_SETUP.
307 			 * In order to not keep the 2nd node from proceeding,
308 			 * we map this to an Ok.
309 			 */
310 			if (mdismderror(&(d->mde), MDE_UNIT_NOT_SETUP)) {
311 				mdclrerror(&(d->mde));
312 				ret = 0;
313 			}
314 
315 			resp->mmr_exitval = ret;
316 		}
317 	}
318 }
319 
320 /* handler for MD_MN_MSG_REQUIRE_OWNER */
321 /*ARGSUSED*/
322 void
mdmn_do_req_owner(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)323 mdmn_do_req_owner(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
324 {
325 	md_set_mmown_params_t	setown;
326 	md_mn_req_owner_t	*d;
327 	int			ret, n = 0;
328 
329 	resp->mmr_out_size = 0;
330 	resp->mmr_err_size = 0;
331 	resp->mmr_out = NULL;
332 	resp->mmr_err = NULL;
333 	resp->mmr_comm_state = MDMNE_ACK;
334 	d = (md_mn_req_owner_t *)(void *)msg->msg_event_data;
335 
336 	(void) memset(&setown, 0, sizeof (setown));
337 	MD_SETDRIVERNAME(&setown, MD_MIRROR, MD_MIN2SET(d->mnum))
338 	setown.d.mnum = d->mnum;
339 	setown.d.owner = d->owner;
340 
341 	/* Retry ownership change if we get EAGAIN returned */
342 	while ((ret = metaioctl(MD_MN_SET_MM_OWNER, &setown, &setown.mde, NULL))
343 	    != 0) {
344 		md_sys_error_t	*ip =
345 		    &setown.mde.info.md_error_info_t_u.sys_error;
346 		if (ip->errnum != EAGAIN) {
347 			break;
348 		}
349 		if (n++ >= 10) {
350 			break;
351 		}
352 		(void) sleep(1);
353 	}
354 
355 	resp->mmr_exitval = ret;
356 }
357 
358 /*
359  * handler for MD_MN_MSG_CHOOSE_OWNER
360  * This is called when a mirror resync has no owner. The master node generates
361  * this message which is not broadcast to the other nodes. The message is
362  * required as the kernel does not have access to the nodelist for the set.
363  */
364 /*ARGSUSED*/
365 void
mdmn_do_choose_owner(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)366 mdmn_do_choose_owner(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
367 {
368 	md_mn_msg_chowner_t	chownermsg;
369 	md_mn_msg_chooseid_t	*d;
370 	int			ret = 0;
371 	int			nodecnt;
372 	int			nodeno;
373 	uint_t			nodeid;
374 	uint_t			myflags;
375 	set_t			setno;
376 	mdsetname_t		*sp;
377 	md_set_desc		*sd;
378 	md_mnnode_desc		*nd;
379 	md_error_t		mde = mdnullerror;
380 	md_mn_result_t		*resp1 = NULL;
381 
382 	resp->mmr_out_size = 0;
383 	resp->mmr_err_size = 0;
384 	resp->mmr_out = NULL;
385 	resp->mmr_err = NULL;
386 	resp->mmr_comm_state = MDMNE_ACK;
387 	d = (md_mn_msg_chooseid_t *)(void *)msg->msg_event_data;
388 
389 	/*
390 	 * The node to be chosen will be the resync count for the set
391 	 * modulo the number of live nodes in the set
392 	 */
393 	setno = MD_MIN2SET(d->msg_chooseid_mnum);
394 	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
395 		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
396 		    "MD_MN_MSG_CHOOSE_OWNER: Invalid setno %d\n"), setno);
397 		resp->mmr_exitval = 1;
398 		return;
399 	}
400 	if ((sd = metaget_setdesc(sp, &mde)) == NULL) {
401 		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
402 		    "MD_MN_MSG_CHOOSE_OWNER: Invalid set pointer\n"));
403 		resp->mmr_exitval = 1;
404 		return;
405 	}
406 
407 	/* Count the number of live nodes */
408 	nodecnt = 0;
409 	nd = sd->sd_nodelist;
410 	while (nd) {
411 		if (nd->nd_flags & MD_MN_NODE_ALIVE)
412 			nodecnt++;
413 		nd = nd->nd_next;
414 	}
415 	nodeno = (d->msg_chooseid_rcnt%nodecnt);
416 
417 	/*
418 	 * If we've been called with msg_chooseid_set_node set TRUE then we
419 	 * are simply re-setting the owner id to ensure consistency across
420 	 * the cluster.
421 	 * If the flag is reset (B_FALSE) we are requesting a new owner to be
422 	 * determined.
423 	 */
424 	if (d->msg_chooseid_set_node) {
425 		nodeid = d->msg_chooseid_rcnt;
426 	} else {
427 		/* scan the nodelist looking for the required node */
428 		nodecnt = 0;
429 		nd = sd->sd_nodelist;
430 		while (nd) {
431 			if (nd->nd_flags & MD_MN_NODE_ALIVE) {
432 				if (nodecnt == nodeno)
433 					break;
434 				nodecnt++;
435 			}
436 			nd = nd->nd_next;
437 		}
438 		nodeid = nd->nd_nodeid;
439 	}
440 
441 	/* Send message to all nodes to make ownership change */
442 	chownermsg.msg_chowner_mnum =  d->msg_chooseid_mnum;
443 	chownermsg.msg_chowner_nodeid = nodeid;
444 	myflags = MD_MSGF_NO_LOG;
445 
446 	/* inherit some flags from the parent message */
447 	myflags |= msg->msg_flags & MD_MSGF_INHERIT_BITS;
448 
449 	ret = mdmn_send_message(MD_MIN2SET(d->msg_chooseid_mnum),
450 	    MD_MN_MSG_CHANGE_OWNER, myflags, 0, (char *)&chownermsg,
451 	    sizeof (chownermsg), &resp1, &mde);
452 	if (resp1 != NULL)
453 		free_result(resp1);
454 	resp->mmr_exitval = ret;
455 }
456 
457 /*
458  * Handler for MD_MN_MSG_CHANGE_OWNER
459  * This is called when we are perfoming a resync and wish to change from
460  * no mirror owner to an owner chosen by the master.
461  * This mesage is only relevant for the new owner, the message will be
462  * ignored by all other nodes
463  */
464 /*ARGSUSED*/
465 void
mdmn_do_change_owner(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)466 mdmn_do_change_owner(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
467 {
468 	md_set_mmown_params_t	setown;
469 	md_mn_msg_chowner_t	*d;
470 	int			ret = 0;
471 	set_t			setno;
472 	mdsetname_t		*sp;
473 	md_set_desc		*sd;
474 	md_error_t		mde = mdnullerror;
475 
476 	resp->mmr_out_size = 0;
477 	resp->mmr_err_size = 0;
478 	resp->mmr_out = NULL;
479 	resp->mmr_err = NULL;
480 	resp->mmr_comm_state = MDMNE_ACK;
481 	d = (md_mn_msg_chowner_t *)(void *)msg->msg_event_data;
482 
483 	setno = MD_MIN2SET(d->msg_chowner_mnum);
484 	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
485 		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
486 		    "MD_MN_MSG_CHANGE_OWNER: Invalid setno %d\n"), setno);
487 		resp->mmr_exitval = 1;
488 		return;
489 	}
490 	if ((sd = metaget_setdesc(sp, &mde)) == NULL) {
491 		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
492 		    "MD_MN_MSG_CHANGE_OWNER: Invalid set pointer\n"));
493 		resp->mmr_exitval = 1;
494 		return;
495 	}
496 
497 	if (d->msg_chowner_nodeid == sd->sd_mn_mynode->nd_nodeid) {
498 		/*
499 		 * If we are the chosen owner, issue ioctl to make the
500 		 * ownership change
501 		 */
502 		(void) memset(&setown, 0, sizeof (md_set_mmown_params_t));
503 		setown.d.mnum = d->msg_chowner_mnum;
504 		setown.d.owner = d->msg_chowner_nodeid;
505 		setown.d.flags = MD_MN_MM_SPAWN_THREAD;
506 		MD_SETDRIVERNAME(&setown, MD_MIRROR,
507 		    MD_MIN2SET(d->msg_chowner_mnum));
508 
509 		/*
510 		 * Single shot at changing the the owner, if it fails EAGAIN,
511 		 * another node must have become the owner while we are in the
512 		 * process of making this choice.
513 		 */
514 
515 		ret = metaioctl(MD_MN_SET_MM_OWNER, &setown,
516 		    &(setown.mde), NULL);
517 		if (ret == EAGAIN)
518 			ret = 0;
519 	}
520 	resp->mmr_exitval = ret;
521 }
522 
523 /* handler for MD_MN_MSG_SUSPEND_WRITES */
524 /*ARGSUSED*/
525 void
mdmn_do_susp_write(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)526 mdmn_do_susp_write(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
527 {
528 	/* Suspend writes to a region of a mirror */
529 	md_suspend_wr_params_t	suspwr_ioc;
530 	md_mn_msg_suspwr_t	*d;
531 	int			ret;
532 
533 	resp->mmr_out_size = 0;
534 	resp->mmr_err_size = 0;
535 	resp->mmr_out = NULL;
536 	resp->mmr_err = NULL;
537 	resp->mmr_comm_state = MDMNE_ACK;
538 	d = (md_mn_msg_suspwr_t *)(void *)msg->msg_event_data;
539 
540 	(void) memset(&suspwr_ioc, 0, sizeof (md_suspend_wr_params_t));
541 	MD_SETDRIVERNAME(&suspwr_ioc, MD_MIRROR,
542 	    MD_MIN2SET(d->msg_suspwr_mnum));
543 	suspwr_ioc.mnum = d->msg_suspwr_mnum;
544 	ret = metaioctl(MD_MN_SUSPEND_WRITES, &suspwr_ioc,
545 	    &(suspwr_ioc.mde), NULL);
546 	resp->mmr_exitval = ret;
547 }
548 
549 /*
550  * handler for MD_MN_MSG_STATE_UPDATE_RESWR
551  * This functions update a submirror component state and then resumes writes
552  * to the mirror
553  */
554 /*ARGSUSED*/
555 void
mdmn_do_state_upd_reswr(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)556 mdmn_do_state_upd_reswr(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
557 {
558 	/* Update the state of the component of a mirror */
559 	md_set_state_params_t	setstate_ioc;
560 	md_mn_msg_stch_t	*d;
561 	int			ret;
562 
563 	resp->mmr_out_size = 0;
564 	resp->mmr_err_size = 0;
565 	resp->mmr_out = NULL;
566 	resp->mmr_err = NULL;
567 	resp->mmr_comm_state = MDMNE_ACK;
568 	d = (md_mn_msg_stch_t *)(void *)msg->msg_event_data;
569 
570 	(void) memset(&setstate_ioc, 0, sizeof (md_set_state_params_t));
571 	MD_SETDRIVERNAME(&setstate_ioc, MD_MIRROR,
572 	    MD_MIN2SET(d->msg_stch_mnum));
573 	setstate_ioc.mnum = d->msg_stch_mnum;
574 	setstate_ioc.sm = d->msg_stch_sm;
575 	setstate_ioc.comp = d->msg_stch_comp;
576 	setstate_ioc.state = d->msg_stch_new_state;
577 	setstate_ioc.hs_id = d->msg_stch_hs_id;
578 	ret = metaioctl(MD_MN_SET_STATE, &setstate_ioc,
579 	    &(setstate_ioc.mde), NULL);
580 	resp->mmr_exitval = ret;
581 }
582 
583 /*
584  * submessage generator for MD_MN_MSG_STATE_UPDATE and MD_MN_MSG_STATE_UPDATE2
585  * This generates 2 messages, the first is SUSPEND_WRITES and
586  * depending on the type of the original message the second one is
587  * either STATE_UPDATE_RESWR or STATE_UPDATE_RESWR2 which actually does
588  * the same, but runs on a higher class.
589  */
590 int
mdmn_smgen_state_upd(md_mn_msg_t * msg,md_mn_msg_t * msglist[])591 mdmn_smgen_state_upd(md_mn_msg_t *msg, md_mn_msg_t *msglist[])
592 {
593 	md_mn_msg_t		*nmsg;
594 	md_mn_msg_stch_t	*d;
595 	md_mn_msg_stch_t	*stch_data;
596 	md_mn_msg_suspwr_t	*suspwr_data;
597 
598 	d = (md_mn_msg_stch_t *)(void *)msg->msg_event_data;
599 
600 	nmsg = Zalloc(sizeof (md_mn_msg_t));
601 	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
602 
603 	nmsg->msg_flags		= MD_MSGF_NO_LOG; /* Don't log submessages */
604 	nmsg->msg_setno		= msg->msg_setno;
605 	nmsg->msg_type		= MD_MN_MSG_SUSPEND_WRITES;
606 	nmsg->msg_event_size	= sizeof (md_mn_msg_suspwr_t);
607 	nmsg->msg_event_data	= Zalloc(sizeof (md_mn_msg_suspwr_t));
608 	suspwr_data = (md_mn_msg_suspwr_t *)(void *)nmsg->msg_event_data;
609 	suspwr_data->msg_suspwr_mnum = d->msg_stch_mnum;
610 	msglist[0] = nmsg;
611 
612 	nmsg = Zalloc(sizeof (md_mn_msg_t));
613 	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
614 
615 	nmsg->msg_flags		= MD_MSGF_NO_LOG; /* Don't log submessages */
616 	nmsg->msg_setno		= msg->msg_setno;
617 	if (msg->msg_type == MD_MN_MSG_STATE_UPDATE2) {
618 		nmsg->msg_type		= MD_MN_MSG_STATE_UPDATE_RESWR2;
619 	} else {
620 		nmsg->msg_type		= MD_MN_MSG_STATE_UPDATE_RESWR;
621 	}
622 	nmsg->msg_event_size	= sizeof (md_mn_msg_stch_t);
623 	nmsg->msg_event_data	= Zalloc(sizeof (md_mn_msg_stch_t));
624 	stch_data = (md_mn_msg_stch_t *)(void *)nmsg->msg_event_data;
625 	stch_data->msg_stch_mnum = d->msg_stch_mnum;
626 	stch_data->msg_stch_sm = d->msg_stch_sm;
627 	stch_data->msg_stch_comp = d->msg_stch_comp;
628 	stch_data->msg_stch_new_state = d->msg_stch_new_state;
629 	stch_data->msg_stch_hs_id = d->msg_stch_hs_id;
630 	msglist[1] = nmsg;
631 	return (2); /* Return the number of submessages generated */
632 }
633 
634 /*
635  * handler for MD_MN_MSG_ALLOCATE_HOTSPARE and MD_MN_MSG_ALLOCATE_HOTSPARE2
636  * This sends a message to all nodes requesting them to allocate a hotspare
637  * for the specified component. The component is specified by the mnum of
638  * the mirror, the submirror index and the component index.
639  */
640 /*ARGSUSED*/
641 void
mdmn_do_allocate_hotspare(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)642 mdmn_do_allocate_hotspare(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
643 {
644 	/* Allocate a hotspare for a mirror component */
645 	md_alloc_hotsp_params_t allochsp_ioc;
646 	md_mn_msg_allochsp_t    *d;
647 	int			ret;
648 
649 	resp->mmr_out_size = 0;
650 	resp->mmr_err_size = 0;
651 	resp->mmr_out = NULL;
652 	resp->mmr_err = NULL;
653 	resp->mmr_comm_state = MDMNE_ACK;
654 	d = (md_mn_msg_allochsp_t *)((void *)(msg->msg_event_data));
655 
656 	(void) memset(&allochsp_ioc, 0,
657 	    sizeof (md_alloc_hotsp_params_t));
658 	MD_SETDRIVERNAME(&allochsp_ioc, MD_MIRROR,
659 	    MD_MIN2SET(d->msg_allochsp_mnum));
660 	allochsp_ioc.mnum = d->msg_allochsp_mnum;
661 	allochsp_ioc.sm = d->msg_allochsp_sm;
662 	allochsp_ioc.comp = d->msg_allochsp_comp;
663 	allochsp_ioc.hs_id = d->msg_allochsp_hs_id;
664 	ret = metaioctl(MD_MN_ALLOCATE_HOTSPARE, &allochsp_ioc,
665 	    &(allochsp_ioc.mde), NULL);
666 	resp->mmr_exitval = ret;
667 }
668 
669 /*
670  * handler for MD_MN_MSG_RESYNC_STARTING,MD_MN_MSG_RESYNC_FIRST,
671  * MD_MN_MSG_RESYNC_NEXT, MD_MN_MSG_RESYNC_FINISH, MD_MN_MSG_RESYNC_PHASE_DONE
672  */
673 /*ARGSUSED*/
674 void
mdmn_do_resync(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)675 mdmn_do_resync(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
676 {
677 	md_mn_msg_resync_t		*d;
678 	md_mn_rs_params_t		respar;
679 	mddb_setflags_config_t	sf;
680 	md_error_t				ep = mdnullerror;
681 	mdsetname_t				*sp;
682 	int	ret;
683 	int	smi;
684 	int start_flag = 1;
685 	int sleep_count = 0;
686 	unsigned int sleep_time = 2;
687 
688 	resp->mmr_out_size = 0;
689 	resp->mmr_err_size = 0;
690 	resp->mmr_out = NULL;
691 	resp->mmr_err = NULL;
692 	resp->mmr_comm_state = MDMNE_ACK;
693 	d = (md_mn_msg_resync_t *)((void *)(msg->msg_event_data));
694 
695 	(void) memset(&respar, 0, sizeof (respar));
696 	MD_SETDRIVERNAME(&respar, MD_MIRROR,
697 	    MD_MIN2SET(d->msg_resync_mnum))
698 	respar.msg_type = (int)msg->msg_type;
699 	respar.mnum = d->msg_resync_mnum;
700 	respar.rs_type = d->msg_resync_type;
701 	respar.rs_start = d->msg_resync_start;
702 	respar.rs_size = d->msg_resync_rsize;
703 	respar.rs_done = d->msg_resync_done;
704 	respar.rs_2_do = d->msg_resync_2_do;
705 	respar.rs_originator = d->msg_originator;
706 	respar.rs_flags = d->msg_resync_flags;
707 
708 	for (smi = 0; smi < NMIRROR; smi++) {
709 		respar.rs_sm_state[smi] = d->msg_sm_state[smi];
710 		respar.rs_sm_flags[smi] = d->msg_sm_flags[smi];
711 	}
712 
713 	/*
714 	 * Prior to running the resync thread first check that the start_step
715 	 * flag (MD_SET_MN_START_RC) added by metaclust's MC_START step has been
716 	 * removed from the set record flags. Ordinarily, this would be removed
717 	 * at MC_STEP4 in metaclust - need to ensure this has happened on all
718 	 * nodes.
719 	 */
720 	(void) memset(&sf, 0, sizeof (sf));
721 	sf.sf_setno = MD_MIN2SET(d->msg_resync_mnum);
722 	sf.sf_flags = MDDB_NM_GET;
723 	/* Use magic to help protect ioctl against attack. */
724 	sf.sf_magic = MDDB_SETFLAGS_MAGIC;
725 	if ((sp = metasetnosetname(sf.sf_setno, &ep)) == NULL) {
726 		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
727 		    "MDMN_DO_RESYNC: Invalid setno = %d\n"),
728 		    sf.sf_setno);
729 		(void) mdstealerror(&(resp->mmr_ep), &ep);
730 		resp->mmr_exitval = -1;
731 		return;
732 	}
733 
734 	/* start_flag always true initially */
735 	while (start_flag) {
736 		if (metaioctl(MD_MN_GET_SETFLAGS, &sf, &sf.sf_mde, NULL) != 0) {
737 			syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
738 			    "MDMN_DO_RESYNC: Could not get start_step "
739 			    "flag for set %s - returning\n"),
740 			    sp->setname);
741 			(void) mdstealerror(&(resp->mmr_ep), &sf.sf_mde);
742 			resp->mmr_exitval = -1;
743 			return;
744 		}
745 
746 		/* metaioctl returns successfully - is start flag cleared? */
747 		if (sf.sf_setflags & MD_SET_MN_START_RC) {
748 			start_flag = 1;
749 			(void) sleep(sleep_time);
750 			sleep_count++;
751 			if ((sleep_count == 1) ||
752 			    (sleep_count % SLEEP_MOD) == 0) {
753 				syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
754 				    "MDMN_DO_RESYNC: Waiting for start_step "
755 				    "flag for set %s to be cleared\n"),
756 				    sp->setname);
757 			}
758 			if (sleep_count == MAX_SLEEPS) {
759 				syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
760 				    "MDMN_DO_RESYNC: Could not clear "
761 				    "start_step flag for set %s "
762 				    "- returning\n"), sp->setname);
763 				resp->mmr_exitval = -1;
764 				return;
765 			}
766 		} else {
767 			start_flag = 0;
768 		}
769 	}
770 
771 	ret = metaioctl(MD_MN_RESYNC, &respar, &respar.mde, NULL);
772 	if (ret) {
773 		(void) mdstealerror(&(resp->mmr_ep), &respar.mde);
774 	}
775 	resp->mmr_exitval = ret;
776 }
777 
778 /*
779  * handler for MD_MN_MSG_SETSYNC
780  */
781 /*ARGSUSED*/
782 void
mdmn_do_setsync(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)783 mdmn_do_setsync(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
784 {
785 	md_mn_msg_setsync_t	*d;
786 	md_resync_ioctl_t	ri;
787 	int			ret;
788 
789 	resp->mmr_out_size = 0;
790 	resp->mmr_err_size = 0;
791 	resp->mmr_out = NULL;
792 	resp->mmr_err = NULL;
793 	resp->mmr_comm_state = MDMNE_ACK;
794 	d = (md_mn_msg_setsync_t *)((void *)(msg->msg_event_data));
795 
796 	(void) memset(&ri, 0, sizeof (ri));
797 	MD_SETDRIVERNAME(&ri, MD_MIRROR, MD_MIN2SET(d->setsync_mnum))
798 	ri.ri_mnum = d->setsync_mnum;
799 	ri.ri_copysize = d->setsync_copysize;
800 	ri.ri_flags = d->setsync_flags;
801 
802 	ret = metaioctl(MD_MN_SETSYNC, &ri, &ri.mde, NULL);
803 
804 	resp->mmr_exitval = ret;
805 }
806 
807 /*
808  * handler for MD_MN_MSG_SET_CAP. As this handler can deal with both mirrors
809  * and soft partitions, the driver name that is required for the ioctl call
810  * is included in the message.
811  */
812 /*ARGSUSED*/
813 void
mdmn_do_set_cap(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)814 mdmn_do_set_cap(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
815 {
816 	md_mn_msg_setcap_t	*d;
817 	md_mn_setcap_params_t	setcap_ioc;
818 	minor_t			mnum;
819 	int			ret;
820 
821 	resp->mmr_out_size = 0;
822 	resp->mmr_err_size = 0;
823 	resp->mmr_out = NULL;
824 	resp->mmr_err = NULL;
825 	resp->mmr_comm_state = MDMNE_ACK;
826 	d = (md_mn_msg_setcap_t *)((void *)(msg->msg_event_data));
827 	mnum = d->msg_setcap_mnum;
828 
829 	(void) memset(&setcap_ioc, 0, sizeof (setcap_ioc));
830 
831 	MD_SETDRIVERNAME(&setcap_ioc, d->msg_setcap_driver, MD_MIN2SET(mnum));
832 	setcap_ioc.mnum = mnum;
833 	setcap_ioc.sc_set = d->msg_setcap_set;
834 
835 	ret = metaioctl(MD_MN_SET_CAP, &setcap_ioc, &setcap_ioc.mde, NULL);
836 
837 	resp->mmr_exitval = ret;
838 }
839 
840 /*
841  * Dummy handler for various CLASS0 messages like
842  * MD_MN_MSG_VERBOSITY / MD_MN_MSG_RESUME / MD_MN_MSG_SUSPEND ...
843  */
844 /*ARGSUSED*/
845 void
mdmn_do_dummy(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)846 mdmn_do_dummy(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
847 {
848 	resp->mmr_out_size = 0;
849 	resp->mmr_err_size = 0;
850 	resp->mmr_out = NULL;
851 	resp->mmr_err = NULL;
852 	resp->mmr_exitval = 0;
853 	resp->mmr_comm_state = MDMNE_ACK;
854 }
855 
856 /*
857  * Overall description of mdcommd support that keeps all nodes in-sync
858  * with the ondisk diskset mddbs.
859  *
860  * All configuration changes to the mddb - addition/deletion of metadevices
861  * or replicas must use a CLASS1 message to block out these changes.
862  * Changes to the state of existing replicas do not need to block CLASS1
863  * since there is no conflict when just updating the state of a replica.
864  *
865  * Error encountered when master writes to mddbs:
866  *	As the master updates parts of the mddbs, flags are updated describing
867  *	what has been written.  When all locks are dropped (either in
868  *	mddb_setexit or mdioctl), a PARSE message will be generated to all
869  *	nodes with an index list of known good mddbs and the parse flags.
870  *	The master node ignore the parse message since it sent it.
871  *	The slave nodes re-read in the changed part of the mddb using the list
872  *	of known good replicas that was passed.
873  *	PARSE message does not block CLASS1.
874  *	The PARSE message must be the highest class message.  Since this
875  *	message could be sent on any ioctl, this PARSE message class must
876  *	be higher than any other class message that could issue an ioctl.
877  *
878  *	Master		Slave1		Slave2
879  * 	Handles_error
880  *	PARSE		PARSE		PARSE
881  *
882  *
883  * Add/Delete mddbs can occur from the following commands:
884  *	metadb -s set_name -a/-d
885  *	metaset -s set_name -a/-d disk
886  *	metaset -s set_name -b
887  *
888  *	The metadb/metaset command is run on the node executing the command
889  *	and sends an ATTACH/DETACH message to the master node blocking CLASS1
890  *	messages on all nodes until this message is finished.  The master
891  *	node generates 3 submessages of BLOCK, SM_ATTACH/SM_DETACH, UNBLOCK.
892  *	The BLOCK message is only run on the master node and will BLOCK
893  *	the PARSE messages from being sent to the nodes.
894  *	The SM_ATTACH/SM_DETACH message is run on all nodes and actually adds or
895  *	removes the replica(s) from the given disk slice.
896  *	The UNBLOCK message is only run on the master node and allows the
897  *	sending of PARSE messages.
898  *
899  *	Master		Slave1		Slave2
900  *			Add mddb cmd
901  *			ATTACH msg to master
902  *	BLOCK
903  *	ATTACH		ATTACH		ATTACH
904  *	UNBLOCK
905  *	PARSE		PARSE		PARSE
906  *	ATTACH msg finished
907  *
908  * Add/Delete host side information from the following commands:
909  *	metaset -s set_name -a/-d -h
910  *
911  *	The metaset command is run on the node executing the command and
912  *	sends a DB_NEWSIDE/DB_DELSIDE message and a MD_NEWSIDE/MD_DELSIDE
913  *	message whenever a host is added to or deleted from the diskset.
914  *
915  *	The side information contains the major name and minor number
916  *	associated with a disk slice from a certain node's perspective
917  *	in an (failed) effort to support clustered systems that don't have the
918  *	same device name for a physical device. (The original designers of
919  *	SVM eventually took the shortcut of assuming that all device names
920  *	are the same on all systems, but left the side information in the
921  *	mddb and namespace.)  The side information is used for disk slices
922  *	that contain mddbs and/or are components for metadevices.
923  *
924  *	The DB_NEWSIDE/DELSIDE command adds or deletes the side information
925  *	for each mddb for the host being added or deleted.
926  *	The MD_ADDSIDE/MD_DELSIDE command adds or deletes the side information
927  *	for all disk slice components that are in the namespace records for
928  *	the host being added or deleted.
929  *
930  *	The DB_NEWSIDE/DB_DELSIDE message does not change any mddb records
931  *	and only needs to be executed on the master node since the slave
932  *	nodes will be brought up to date by the PARSE message that is
933  *	generated as a result of a change to the mddb.
934  *	The MD_ADDSIDE/MD_DELSIDE message does modify the records in the mddb
935  *	and needs to be run on all nodes.  The message must block class1
936  *	messages so that record changing commands don't interfere.
937  *
938  *	Master		Slave1		Slave2
939  *			Add host
940  *			DB_NEWSIDE msg to master
941  *	DB_NEWSIDE
942  *	PARSE		PARSE		PARSE
943  *	DB_NEWSIDE msg finished
944  *			MD_NEWSIDE msg to master
945  *	MD_NEWSIDE	MD_NEWSIDE	MD_NEWSIDE
946  *	MD_NEWSIDE msg finished
947  *
948  *
949  * Optimized resync record failure:
950  *	When any node sees a failure to write an optimized resync record
951  *	that node notifies the master node of the replica that failed.
952  *	The master node handles the error and updates the rest of the
953  *	nodes using a PARSE message.  The PARSE message also calls
954  *	fixoptrecord on each slave node causing each node to fix up
955  * 	the optimized resync records that are owned by that node (the mirror
956  *	owner code also sets the optimized resync record owner).  The master
957  *	node will fix up all optimized resync records that have no owner or
958  *	are owned by the master node.
959  *
960  *	Master		Slave1		Slave2
961  *					Optimized Record Failure
962  *					OPTRECERR msg to master
963  *	Master handles opt rec failure
964  *	PARSE		PARSE		PARSE
965  *	OPTRECERR msg finished
966  *					Slave rewrites optimized record
967  *
968  */
969 
970 /*
971  * Handler for MD_MN_MSG_MDDB_PARSE which send parse messages to the
972  * slave nodes in order to keep the incore view of the mddbs the
973  * same on all nodes.
974  *
975  * Since master node generated the mddb parse message, do nothing
976  * if this is the master node.
977  *
978  * If this is a slave node, send the parse message down to the kernel
979  * where this node will re-read in parts of the mddbs.
980  *
981  */
982 void
mdmn_do_mddb_parse(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)983 mdmn_do_mddb_parse(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
984 {
985 	md_mn_msg_mddb_parse_t	*d;
986 	mddb_parse_parm_t	mpp;
987 	int			ret = 0;
988 	int			i;
989 
990 	resp->mmr_out_size = 0;
991 	resp->mmr_err_size = 0;
992 	resp->mmr_out = NULL;
993 	resp->mmr_err = NULL;
994 	resp->mmr_comm_state = MDMNE_ACK;
995 	d = (md_mn_msg_mddb_parse_t *)((void *)(msg->msg_event_data));
996 
997 	if (flags & MD_MSGF_ON_MASTER)
998 		return;
999 
1000 	(void) memset(&mpp, 0, sizeof (mpp));
1001 	mpp.c_setno = msg->msg_setno;
1002 	mpp.c_parse_flags = d->msg_parse_flags;
1003 	for (i = 0; i < MDDB_NLB; i++) {
1004 		mpp.c_lb_flags[i] = d->msg_lb_flags[i];
1005 	}
1006 	ret = metaioctl(MD_MN_MDDB_PARSE, &mpp, &mpp.c_mde, NULL);
1007 	if (ret)
1008 		(void) mdstealerror(&(resp->mmr_ep), &mpp.c_mde);
1009 
1010 	resp->mmr_exitval = ret;
1011 }
1012 
1013 /*
1014  * Handler for MD_MN_MSG_MDDB_BLOCK which blocks the generation
1015  * of parse messages from this node.
1016  *
1017  * This is needed when attaching/detaching mddbs on the master and the
1018  * slave node is unable to handle a parse message until the slave node
1019  * has done the attach/detach of the mddbs.  So, master node will block
1020  * the parse messages, execute the attach/detach on all nodes and
1021  * then unblock the parse messages which causes the parse message to
1022  * be sent to all nodes.
1023  */
1024 /*ARGSUSED*/
1025 void
mdmn_do_mddb_block(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)1026 mdmn_do_mddb_block(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
1027 {
1028 	md_mn_msg_mddb_block_t	*d;
1029 	mddb_block_parm_t	mbp;
1030 	int			ret;
1031 
1032 	resp->mmr_out_size = 0;
1033 	resp->mmr_err_size = 0;
1034 	resp->mmr_out = NULL;
1035 	resp->mmr_err = NULL;
1036 	resp->mmr_comm_state = MDMNE_ACK;
1037 	d = (md_mn_msg_mddb_block_t *)((void *)(msg->msg_event_data));
1038 
1039 	(void) memset(&mbp, 0, sizeof (mbp));
1040 	mbp.c_setno = msg->msg_setno;
1041 	mbp.c_blk_flags = d->msg_block_flags;
1042 	ret = metaioctl(MD_MN_MDDB_BLOCK, &mbp, &mbp.c_mde, NULL);
1043 	if (ret)
1044 		(void) mdstealerror(&(resp->mmr_ep), &mbp.c_mde);
1045 
1046 	resp->mmr_exitval = ret;
1047 }
1048 
1049 /*
1050  * Submessage generator for MD_MN_MSG_META_DB_ATTACH which generates
1051  * a BLOCK message on the master node only, a MD_MN_MSG_SM_MDDB_ATTACH
1052  * message on all nodes and then an UNBLOCK message on the master only.
1053  */
1054 int
mdmn_smgen_mddb_attach(md_mn_msg_t * msg,md_mn_msg_t * msglist[])1055 mdmn_smgen_mddb_attach(md_mn_msg_t *msg, md_mn_msg_t *msglist[])
1056 {
1057 	md_mn_msg_t			*nmsg;
1058 	md_mn_msg_meta_db_attach_t	*d;
1059 	md_mn_msg_meta_db_attach_t	*attach_d;
1060 	md_mn_msg_mddb_block_t		*block_d;
1061 
1062 	d = (md_mn_msg_meta_db_attach_t *)(void *)msg->msg_event_data;
1063 
1064 	nmsg = Zalloc(sizeof (md_mn_msg_t));
1065 	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
1066 
1067 	nmsg->msg_flags		= (MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST);
1068 	nmsg->msg_setno		= msg->msg_setno;
1069 	nmsg->msg_type		= MD_MN_MSG_MDDB_BLOCK;
1070 	nmsg->msg_event_size	= sizeof (md_mn_msg_mddb_block_t);
1071 	nmsg->msg_event_data	= Zalloc(sizeof (md_mn_msg_mddb_block_t));
1072 	block_d = (md_mn_msg_mddb_block_t *)(void *)nmsg->msg_event_data;
1073 	block_d->msg_block_flags = MDDB_BLOCK_PARSE;
1074 	msglist[0] = nmsg;
1075 
1076 	nmsg = Zalloc(sizeof (md_mn_msg_t));
1077 	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
1078 
1079 	/* Don't log submessages and panic on inconsistent results */
1080 	nmsg->msg_flags = MD_MSGF_NO_LOG |
1081 	    MD_MSGF_PANIC_WHEN_INCONSISTENT;
1082 	nmsg->msg_setno		= msg->msg_setno;
1083 	nmsg->msg_type		= MD_MN_MSG_SM_MDDB_ATTACH;
1084 	nmsg->msg_event_size	= sizeof (md_mn_msg_meta_db_attach_t);
1085 	nmsg->msg_event_data	= Zalloc(sizeof (md_mn_msg_meta_db_attach_t));
1086 	attach_d = (md_mn_msg_meta_db_attach_t *)
1087 	    (void *)nmsg->msg_event_data;
1088 	attach_d->msg_l_dev = d->msg_l_dev;
1089 	attach_d->msg_cnt = d->msg_cnt;
1090 	attach_d->msg_dbsize = d->msg_dbsize;
1091 	(void) strncpy(attach_d->msg_dname, d->msg_dname, 16);
1092 	attach_d->msg_splitname = d->msg_splitname;
1093 	attach_d->msg_options = d->msg_options;
1094 	msglist[1] = nmsg;
1095 
1096 	nmsg = Zalloc(sizeof (md_mn_msg_t));
1097 	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
1098 
1099 	nmsg->msg_flags		= (MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST);
1100 	nmsg->msg_setno		= msg->msg_setno;
1101 	nmsg->msg_type		= MD_MN_MSG_MDDB_BLOCK;
1102 	nmsg->msg_event_size	= sizeof (md_mn_msg_mddb_block_t);
1103 	nmsg->msg_event_data	= Zalloc(sizeof (md_mn_msg_mddb_block_t));
1104 	block_d = (md_mn_msg_mddb_block_t *)(void *)nmsg->msg_event_data;
1105 	block_d->msg_block_flags = MDDB_UNBLOCK_PARSE;
1106 	msglist[2] = nmsg;
1107 
1108 	return (3); /* Return the number of submessages generated */
1109 }
1110 
1111 /*
1112  * Submessage generator for MD_MN_MSG_META_DB_DETACH which generates
1113  * a BLOCK message on the master node only, a MD_MN_MSG_SM_MDDB_DETACH
1114  * message on all nodes and then an UNBLOCK message on the master only.
1115  */
1116 int
mdmn_smgen_mddb_detach(md_mn_msg_t * msg,md_mn_msg_t * msglist[])1117 mdmn_smgen_mddb_detach(md_mn_msg_t *msg, md_mn_msg_t *msglist[])
1118 {
1119 	md_mn_msg_t			*nmsg;
1120 	md_mn_msg_meta_db_detach_t	*d;
1121 	md_mn_msg_meta_db_detach_t	*detach_d;
1122 	md_mn_msg_mddb_block_t		*block_d;
1123 
1124 	d = (md_mn_msg_meta_db_detach_t *)(void *)msg->msg_event_data;
1125 
1126 	nmsg = Zalloc(sizeof (md_mn_msg_t));
1127 	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
1128 
1129 	nmsg->msg_flags		= (MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST);
1130 	nmsg->msg_setno		= msg->msg_setno;
1131 	nmsg->msg_type		= MD_MN_MSG_MDDB_BLOCK;
1132 	nmsg->msg_event_size	= sizeof (md_mn_msg_mddb_block_t);
1133 	nmsg->msg_event_data	= Zalloc(sizeof (md_mn_msg_mddb_block_t));
1134 	block_d = (md_mn_msg_mddb_block_t *)(void *)nmsg->msg_event_data;
1135 	block_d->msg_block_flags = MDDB_BLOCK_PARSE;
1136 	msglist[0] = nmsg;
1137 
1138 	nmsg = Zalloc(sizeof (md_mn_msg_t));
1139 	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
1140 
1141 	/* Don't log submessages and panic on inconsistent results */
1142 	nmsg->msg_flags = MD_MSGF_NO_LOG |
1143 	    MD_MSGF_PANIC_WHEN_INCONSISTENT;
1144 	nmsg->msg_setno		= msg->msg_setno;
1145 	nmsg->msg_type		= MD_MN_MSG_SM_MDDB_DETACH;
1146 	nmsg->msg_event_size	= sizeof (md_mn_msg_meta_db_detach_t);
1147 	nmsg->msg_event_data	= Zalloc(sizeof (md_mn_msg_meta_db_detach_t));
1148 	detach_d = (md_mn_msg_meta_db_detach_t *)
1149 	    (void *)nmsg->msg_event_data;
1150 	detach_d->msg_splitname = d->msg_splitname;
1151 	msglist[1] = nmsg;
1152 
1153 	nmsg = Zalloc(sizeof (md_mn_msg_t));
1154 	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
1155 
1156 	nmsg->msg_flags		= (MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST);
1157 	nmsg->msg_setno		= msg->msg_setno;
1158 	nmsg->msg_type		= MD_MN_MSG_MDDB_BLOCK;
1159 	nmsg->msg_event_size	= sizeof (md_mn_msg_mddb_block_t);
1160 	nmsg->msg_event_data	= Zalloc(sizeof (md_mn_msg_mddb_block_t));
1161 	block_d = (md_mn_msg_mddb_block_t *)(void *)nmsg->msg_event_data;
1162 	block_d->msg_block_flags = MDDB_UNBLOCK_PARSE;
1163 	msglist[2] = nmsg;
1164 
1165 	return (3); /* Return the number of submessages generated */
1166 }
1167 
1168 /*
1169  * Handler for MD_MN_MSG_SM_MDDB_ATTACH which is used to attach mddbs.
1170  *
1171  * Used when running:
1172  *	metadb -s set_name -a
1173  * 	metaset -s set_name -a/-d disk
1174  *	metaset -s set_name -b
1175  */
1176 /*ARGSUSED*/
1177 void
mdmn_do_sm_mddb_attach(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)1178 mdmn_do_sm_mddb_attach(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
1179 {
1180 	md_mn_msg_meta_db_attach_t	*d;
1181 	struct mddb_config		c;
1182 	int				i;
1183 	int				ret = 0;
1184 	md_error_t			ep = mdnullerror;
1185 	char				*name, *add_name;
1186 	mdname_t			*np;
1187 	mdsetname_t			*sp;
1188 
1189 	resp->mmr_out_size = 0;
1190 	resp->mmr_err_size = 0;
1191 	resp->mmr_out = NULL;
1192 	resp->mmr_err = NULL;
1193 	resp->mmr_comm_state = MDMNE_ACK;
1194 	d = (md_mn_msg_meta_db_attach_t *)((void *)(msg->msg_event_data));
1195 
1196 	(void) memset(&c, 0, sizeof (c));
1197 	c.c_setno = msg->msg_setno;
1198 	c.c_locator.l_dev = meta_cmpldev(d->msg_l_dev);
1199 	(void) strncpy(c.c_locator.l_driver, d->msg_dname,
1200 	    sizeof (c.c_locator.l_driver));
1201 	c.c_devname = d->msg_splitname;
1202 	c.c_locator.l_mnum = meta_getminor(d->msg_l_dev);
1203 	c.c_multi_node = 1;
1204 	if ((sp = metasetnosetname(c.c_setno, &ep)) == NULL) {
1205 		(void) mdstealerror(&(resp->mmr_ep), &ep);
1206 		resp->mmr_exitval = -1;
1207 		return;
1208 	}
1209 	(void) strcpy(c.c_setname, sp->setname);
1210 	c.c_sideno = getmyside(sp, &ep);
1211 	if (c.c_sideno == MD_SIDEWILD) {
1212 		(void) mdstealerror(&(resp->mmr_ep), &ep);
1213 		resp->mmr_exitval = -1;
1214 		return;
1215 	}
1216 
1217 	name = splicename(&d->msg_splitname);
1218 	np = metaname(&sp, name, LOGICAL_DEVICE, &ep);
1219 	Free(name);
1220 	if (np == NULL) {
1221 		(void) mdstealerror(&(resp->mmr_ep), &ep);
1222 		resp->mmr_exitval = -1;
1223 		return;
1224 	}
1225 	/*
1226 	 * All nodes in MN diskset must do meta_check_replica
1227 	 * since this causes the shared namespace to be
1228 	 * populated by the md driver names while checking
1229 	 * to see if this device is already in use as a
1230 	 * metadevice.
1231 	 */
1232 	if (meta_check_replica(sp, np, d->msg_options, 0,
1233 	    (d->msg_cnt * d->msg_dbsize), &ep)) {
1234 		(void) mdstealerror(&(resp->mmr_ep), &ep);
1235 		resp->mmr_exitval = -1;
1236 		return;
1237 	}
1238 
1239 	for (i = 0; i < d->msg_cnt; i++) {
1240 		c.c_locator.l_blkno = i * d->msg_dbsize + 16;
1241 		if (setup_med_cfg(sp, &c,
1242 		    (d->msg_options & MDCHK_SET_FORCE), &ep)) {
1243 			ret = -1;
1244 			(void) mdstealerror(&(resp->mmr_ep), &ep);
1245 			break;
1246 		}
1247 		ret = metaioctl(MD_DB_NEWDEV, &c, &c.c_mde, NULL);
1248 		/* If newdev was successful, continue with attach */
1249 		if (ret == 0) {
1250 			if (meta_db_addsidenms(sp, np, c.c_locator.l_blkno,
1251 			    DB_ADDSIDENMS_NO_BCAST, &ep)) {
1252 				ret = -1;
1253 				(void) mdstealerror(&(resp->mmr_ep), &ep);
1254 				break;
1255 			}
1256 		} else {
1257 			(void) mdstealerror(&(resp->mmr_ep), &c.c_mde);
1258 			break;
1259 		}
1260 	}
1261 	add_name = splicename(&d->msg_splitname);
1262 	if ((np = metaname(&sp, add_name, LOGICAL_DEVICE, &ep)) != NULL) {
1263 		meta_invalidate_name(np);
1264 	} else {
1265 		ret = -1;
1266 		(void) mdstealerror(&(resp->mmr_ep), &ep);
1267 	}
1268 	Free(add_name);
1269 
1270 	resp->mmr_exitval = ret;
1271 }
1272 
1273 /*
1274  * Handler for MD_MN_MSG_SM_MDDB_DETACH which is used to detach mddbs.
1275  *
1276  * Used when running:
1277  *	metadb -s set_name -d
1278  * 	metaset -s set_name -a/-d disk
1279  *	metaset -s set_name -b
1280  */
1281 /*ARGSUSED*/
1282 void
mdmn_do_sm_mddb_detach(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)1283 mdmn_do_sm_mddb_detach(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
1284 {
1285 	md_mn_msg_meta_db_detach_t	*d;
1286 	struct mddb_config		c;
1287 	int				i;
1288 	int				ret = 0;
1289 	md_error_t			ep = mdnullerror;
1290 	char				*name, *del_name;
1291 	mdname_t			*np;
1292 	mdsetname_t			*sp;
1293 
1294 	resp->mmr_out_size = 0;
1295 	resp->mmr_err_size = 0;
1296 	resp->mmr_out = NULL;
1297 	resp->mmr_err = NULL;
1298 	resp->mmr_comm_state = MDMNE_ACK;
1299 	d = (md_mn_msg_meta_db_detach_t *)((void *)(msg->msg_event_data));
1300 
1301 	if ((sp = metasetnosetname(msg->msg_setno, &ep)) == NULL) {
1302 		(void) mdstealerror(&(resp->mmr_ep), &ep);
1303 		resp->mmr_exitval = -1;
1304 		return;
1305 	}
1306 
1307 	(void) memset(&c, 0, sizeof (c));
1308 	c.c_setno = msg->msg_setno;
1309 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
1310 		resp->mmr_exitval = -1;
1311 		(void) mdstealerror(&(resp->mmr_ep), &c.c_mde);
1312 		return;
1313 	}
1314 	i = 0;
1315 	del_name = splicename(&d->msg_splitname);
1316 	while (i < c.c_dbcnt) {
1317 		c.c_id = i;
1318 		if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
1319 			ret = -1;
1320 			(void) mdstealerror(&(resp->mmr_ep), &c.c_mde);
1321 			break;
1322 		}
1323 		name = splicename(&c.c_devname);
1324 		if (strcmp(name, del_name) != 0) {
1325 			Free(name);
1326 			i++;
1327 			continue;
1328 		}
1329 		Free(name);
1330 		/* Found a match - delete mddb */
1331 		if (metaioctl(MD_DB_DELDEV, &c, &c.c_mde, NULL) != 0) {
1332 			ret = -1;
1333 			(void) mdstealerror(&(resp->mmr_ep), &c.c_mde);
1334 			break;
1335 		}
1336 		/* Not incrementing "i" intentionally (dbcnt is changed) */
1337 	}
1338 	if ((np = metaname(&sp, del_name, LOGICAL_DEVICE, &ep)) != NULL) {
1339 		meta_invalidate_name(np);
1340 	} else {
1341 		ret = -1;
1342 		(void) mdstealerror(&(resp->mmr_ep), &ep);
1343 	}
1344 	Free(del_name);
1345 
1346 	resp->mmr_exitval = ret;
1347 }
1348 
1349 /*
1350  * Handler for MD_MN_MSG_META_DB_NEWSIDE which is used to update the
1351  * side information for each diskset mddb when a new host has been
1352  * added to the diskset.  The side information is the /dev/dsk/ctds name
1353  * that the new node would use to access each mddb.
1354  *
1355  * Since this routine makes no changes to the records in the diskset mddb,
1356  * this routine only needs to be run on the master node.  The master node's
1357  * kernel code will detect that portions of the mddb have changed and
1358  * will send a parse message to all nodes to re-parse parts of the mddb.
1359  *
1360  * Used when running:
1361  * 	metaset -s set_name -a -h new_hostname
1362  */
1363 /*ARGSUSED*/
1364 void
mdmn_do_meta_db_newside(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)1365 mdmn_do_meta_db_newside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
1366 {
1367 	md_mn_msg_meta_db_newside_t	*d;
1368 	struct mddb_config		c;
1369 	int				ret = 0;
1370 	mdsetname_t			*sp;
1371 	md_error_t			ep = mdnullerror;
1372 
1373 	resp->mmr_out_size = 0;
1374 	resp->mmr_err_size = 0;
1375 	resp->mmr_out = NULL;
1376 	resp->mmr_err = NULL;
1377 	resp->mmr_comm_state = MDMNE_ACK;
1378 	d = (md_mn_msg_meta_db_newside_t *)((void *)(msg->msg_event_data));
1379 
1380 	(void) memset(&c, 0, sizeof (c));
1381 	c.c_setno = msg->msg_setno;
1382 	c.c_locator.l_dev = meta_cmpldev(d->msg_l_dev);
1383 	c.c_locator.l_blkno = d->msg_blkno;
1384 	(void) strncpy(c.c_locator.l_driver, d->msg_dname,
1385 	    sizeof (c.c_locator.l_driver));
1386 	c.c_devname = d->msg_splitname;
1387 	c.c_locator.l_mnum = d->msg_mnum;
1388 	c.c_multi_node = 1;
1389 	if ((sp = metasetnosetname(c.c_setno, &ep)) == NULL) {
1390 		(void) mdstealerror(&(resp->mmr_ep), &ep);
1391 		resp->mmr_exitval = -1;
1392 		return;
1393 	}
1394 	(void) strcpy(c.c_setname, sp->setname);
1395 	c.c_sideno = d->msg_sideno;
1396 
1397 	if ((ret = metaioctl(MD_DB_NEWSIDE, &c, &c.c_mde, NULL)) != 0) {
1398 		(void) mdstealerror(&(resp->mmr_ep), &c.c_mde);
1399 	}
1400 	resp->mmr_exitval = ret;
1401 }
1402 
1403 /*
1404  * Handler for MD_MN_MSG_META_DB_DELSIDE which is used to remove the
1405  * side information for each diskset mddb when a host has been
1406  * deleted from the diskset.  The side information is the /dev/dsk/ctds name
1407  * that the node would use to access each mddb.
1408  *
1409  * Since this routine makes no changes to the records in the diskset mddb,
1410  * this routine only needs to be run on the master node.  The master node's
1411  * kernel code will detect that portions of the mddb have changed and
1412  * will send a parse message to all nodes to re-parse parts of the mddb.
1413  *
1414  * Used when running:
1415  * 	metaset -s set_name -d -h hostname
1416  */
1417 /*ARGSUSED*/
1418 void
mdmn_do_meta_db_delside(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)1419 mdmn_do_meta_db_delside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
1420 {
1421 	md_mn_msg_meta_db_delside_t	*d;
1422 	mddb_config_t			c;
1423 	int				ret = 0;
1424 	mdsetname_t			*sp;
1425 	md_error_t			ep = mdnullerror;
1426 
1427 	resp->mmr_out_size = 0;
1428 	resp->mmr_err_size = 0;
1429 	resp->mmr_out = NULL;
1430 	resp->mmr_err = NULL;
1431 	resp->mmr_comm_state = MDMNE_ACK;
1432 	d = (md_mn_msg_meta_db_delside_t *)((void *)(msg->msg_event_data));
1433 
1434 	(void) memset(&c, 0, sizeof (c));
1435 	c.c_setno = msg->msg_setno;
1436 	c.c_locator.l_dev = meta_cmpldev(d->msg_l_dev);
1437 	c.c_locator.l_blkno = d->msg_blkno;
1438 	c.c_multi_node = 1;
1439 	if ((sp = metasetnosetname(c.c_setno, &ep)) == NULL) {
1440 		(void) mdstealerror(&(resp->mmr_ep), &ep);
1441 		resp->mmr_exitval = -1;
1442 		return;
1443 	}
1444 	(void) strcpy(c.c_setname, sp->setname);
1445 	c.c_sideno = d->msg_sideno;
1446 
1447 	if ((ret = metaioctl(MD_DB_DELSIDE, &c, &c.c_mde, NULL)) != 0) {
1448 		(void) mdstealerror(&(resp->mmr_ep), &c.c_mde);
1449 	}
1450 	resp->mmr_exitval = ret;
1451 }
1452 
1453 /*
1454  * Handler for MD_MN_MSG_META_MD_ADDSIDE which is used to add the
1455  * side information for each diskset metadevice component (if that
1456  * component is a disk) when a host has been added to the diskset.
1457  * The side information is the /dev/dsk/ctds name that the node would
1458  * use to access the metadevice component.
1459  *
1460  * This routine makes changes to the mddb records and must be run
1461  * on all nodes.
1462  *
1463  * Used when running:
1464  * 	metaset -s set_name -a -h new_hostname
1465  */
1466 /*ARGSUSED*/
1467 void
mdmn_do_meta_md_addside(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)1468 mdmn_do_meta_md_addside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
1469 {
1470 	md_mn_msg_meta_md_addside_t	*d;
1471 	mdnm_params_t			nm;
1472 	mdsetname_t			*sp;
1473 	char				*cname, *dname;
1474 	minor_t				mnum;
1475 	int				done, i;
1476 	md_error_t			ep = mdnullerror;
1477 
1478 	resp->mmr_out_size = 0;
1479 	resp->mmr_err_size = 0;
1480 	resp->mmr_out = NULL;
1481 	resp->mmr_err = NULL;
1482 	resp->mmr_comm_state = MDMNE_ACK;
1483 	d = (md_mn_msg_meta_md_addside_t *)((void *)(msg->msg_event_data));
1484 
1485 	(void) memset(&nm, 0, sizeof (nm));
1486 	if ((sp = metasetnosetname(msg->msg_setno, &ep)) == NULL) {
1487 		(void) mdstealerror(&(resp->mmr_ep), &ep);
1488 		resp->mmr_exitval = -1;
1489 		return;
1490 	}
1491 	/* While loop continues until IOCNXTKEY_NM gives nm.key of KEYWILD */
1492 	/*CONSTCOND*/
1493 	while (1) {
1494 		char	*drvnm = NULL;
1495 
1496 		nm.mde = mdnullerror;
1497 		nm.setno = msg->msg_setno;
1498 		nm.side = d->msg_otherside;
1499 		if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0) {
1500 			(void) mdstealerror(&(resp->mmr_ep), &nm.mde);
1501 			resp->mmr_exitval = -1;
1502 			return;
1503 		}
1504 
1505 		/* Normal exit path is to eventually get a KEYWILD */
1506 		if (nm.key == MD_KEYWILD) {
1507 			resp->mmr_exitval = 0;
1508 			return;
1509 		}
1510 
1511 		/*
1512 		 * Okay we have a valid key
1513 		 * Let's see if it is hsp or not
1514 		 */
1515 		nm.devname = (uintptr_t)meta_getnmentbykey(msg->msg_setno,
1516 		    d->msg_otherside, nm.key, &drvnm, NULL, NULL, &ep);
1517 		if (nm.devname == NULL || drvnm == NULL) {
1518 			if (nm.devname)
1519 				Free((void *)(uintptr_t)nm.devname);
1520 			if (drvnm)
1521 				Free((void *)(uintptr_t)drvnm);
1522 			(void) mdstealerror(&(resp->mmr_ep), &ep);
1523 			resp->mmr_exitval = -1;
1524 			return;
1525 		}
1526 
1527 		/*
1528 		 * If it is hsp add here
1529 		 */
1530 		if (strcmp(drvnm, MD_HOTSPARES) == 0) {
1531 			if (add_name(sp, d->msg_sideno, nm.key, MD_HOTSPARES,
1532 			    minor(NODEV), (char *)(uintptr_t)nm.devname,
1533 			    NULL, NULL, &ep) == -1) {
1534 				Free((void *)(uintptr_t)nm.devname);
1535 				Free((void *)(uintptr_t)drvnm);
1536 				(void) mdstealerror(&(resp->mmr_ep), &ep);
1537 				resp->mmr_exitval = -1;
1538 				return;
1539 			} else {
1540 				Free((void *)(uintptr_t)nm.devname);
1541 				Free((void *)(uintptr_t)drvnm);
1542 				continue;
1543 			}
1544 		}
1545 
1546 		nm.side = d->msg_sideno;
1547 		if ((done = meta_getside_devinfo(sp,
1548 		    (char *)(uintptr_t)nm.devname,
1549 		    d->msg_sideno, &cname, &dname, &mnum, &ep)) == -1) {
1550 			(void) mdstealerror(&(resp->mmr_ep), &ep);
1551 			Free((void *)(uintptr_t)nm.devname);
1552 			resp->mmr_exitval = -1;
1553 			return;
1554 		}
1555 
1556 		Free((void *)(uintptr_t)nm.devname);
1557 		Free((void *)(uintptr_t)drvnm);
1558 
1559 		if (done != 1) {
1560 			Free(cname);
1561 			Free(dname);
1562 			resp->mmr_exitval = -1;
1563 			return;
1564 		}
1565 
1566 		/*
1567 		 * The device reference count can be greater than 1 if
1568 		 * more than one softpart is configured on top of the
1569 		 * same device.  If this is the case then we want to
1570 		 * increment the count to sync up with the other sides.
1571 		 */
1572 		for (i = 0; i < nm.ref_count; i++) {
1573 			if (add_name(sp, d->msg_sideno, nm.key, dname, mnum,
1574 			    cname, NULL, NULL, &ep) == -1) {
1575 				(void) mdstealerror(&(resp->mmr_ep), &ep);
1576 				Free(cname);
1577 				Free(dname);
1578 				resp->mmr_exitval = -1;
1579 				return;
1580 			}
1581 		}
1582 		Free(cname);
1583 		Free(dname);
1584 	}
1585 
1586 	/*NOTREACHED*/
1587 }
1588 /*
1589  * Handler for MD_MN_MSG_META_MD_DELSIDE which is used to delete the
1590  * side information for each diskset metadevice component (if that
1591  * component is a disk) when a host has been removed from the diskset.
1592  * The side information is the /dev/dsk/ctds name that the node would
1593  * use to access the metadevice component.
1594  *
1595  * This routine makes changes to the mddb records and must be run
1596  * on all nodes.
1597  *
1598  * Used when running:
1599  * 	metaset -s set_name -d -h hostname
1600  */
1601 /*ARGSUSED*/
1602 void
mdmn_do_meta_md_delside(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)1603 mdmn_do_meta_md_delside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
1604 {
1605 	md_mn_msg_meta_md_delside_t	*d;
1606 	mdnm_params_t			nm;
1607 	mdsetname_t			*sp;
1608 	md_error_t			ep = mdnullerror;
1609 	int				i;
1610 
1611 	resp->mmr_out_size = 0;
1612 	resp->mmr_err_size = 0;
1613 	resp->mmr_out = NULL;
1614 	resp->mmr_err = NULL;
1615 	resp->mmr_comm_state = MDMNE_ACK;
1616 	d = (md_mn_msg_meta_md_delside_t *)((void *)(msg->msg_event_data));
1617 
1618 	if ((sp = metasetnosetname(msg->msg_setno, &ep)) == NULL) {
1619 		(void) mdstealerror(&(resp->mmr_ep), &ep);
1620 		resp->mmr_exitval = -1;
1621 		return;
1622 	}
1623 
1624 	(void) memset(&nm, 0, sizeof (nm));
1625 	nm.key = MD_KEYWILD;
1626 	/*CONSTCOND*/
1627 	while (1) {
1628 		nm.mde = mdnullerror;
1629 		nm.setno = msg->msg_setno;
1630 		nm.side = MD_SIDEWILD;
1631 		if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0) {
1632 			(void) mdstealerror(&(resp->mmr_ep), &nm.mde);
1633 			resp->mmr_exitval = -1;
1634 			return;
1635 		}
1636 
1637 		/* Normal exit path is to eventually get a KEYWILD */
1638 		if (nm.key == MD_KEYWILD) {
1639 			resp->mmr_exitval = 0;
1640 			return;
1641 		}
1642 
1643 		/*
1644 		 * The device reference count can be greater than 1 if
1645 		 * more than one softpart is configured on top of the
1646 		 * same device.  If this is the case then we want to
1647 		 * decrement the count to zero so the entry can be
1648 		 * actually removed.
1649 		 */
1650 		for (i = 0; i < nm.ref_count; i++) {
1651 			if (del_name(sp, d->msg_sideno, nm.key, &ep) == -1) {
1652 				(void) mdstealerror(&(resp->mmr_ep), &ep);
1653 				resp->mmr_exitval = -1;
1654 				return;
1655 			}
1656 		}
1657 	}
1658 
1659 	/*NOTREACHED*/
1660 }
1661 
1662 /*
1663  * Handler for MD_MN_MSG_MDDB_OPTRECERR which is used to notify
1664  * the master node that a node has seen an error when attempting to
1665  * write to the optimized resync records that reside on 2 of the diskset
1666  * mddbs.  Master node will mark the failed replica in error and this
1667  * will send a parse message to all nodes to re-read parts of the mddb
1668  * and to fix their optimized resync records based on this information.
1669  */
1670 /*ARGSUSED*/
1671 void
mdmn_do_mddb_optrecerr(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)1672 mdmn_do_mddb_optrecerr(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
1673 {
1674 	md_mn_msg_mddb_optrecerr_t	*d;
1675 	mddb_optrec_parm_t		mop;
1676 	int				ret;
1677 	int				i;
1678 
1679 	resp->mmr_out_size = 0;
1680 	resp->mmr_err_size = 0;
1681 	resp->mmr_out = NULL;
1682 	resp->mmr_err = NULL;
1683 	resp->mmr_comm_state = MDMNE_ACK;
1684 	d = (md_mn_msg_mddb_optrecerr_t *)((void *)(msg->msg_event_data));
1685 
1686 	(void) memset(&mop, 0, sizeof (mop));
1687 	mop.c_setno = msg->msg_setno;
1688 	for (i = 0; i < 2; i++) {
1689 		mop.c_recerr[i] = d->msg_recerr[i];
1690 	}
1691 	ret = metaioctl(MD_MN_MDDB_OPTRECFIX, &mop, &mop.c_mde, NULL);
1692 	if (ret)
1693 		(void) mdstealerror(&(resp->mmr_ep), &mop.c_mde);
1694 
1695 	resp->mmr_exitval = ret;
1696 }
1697 
1698 int
mdmn_smgen_test6(md_mn_msg_t * msg,md_mn_msg_t ** msglist)1699 mdmn_smgen_test6(md_mn_msg_t *msg, md_mn_msg_t **msglist)
1700 {
1701 	md_mn_msg_t	*nmsg;
1702 
1703 	nmsg = Zalloc(sizeof (md_mn_msg_t));
1704 	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
1705 
1706 	nmsg->msg_flags		= MD_MSGF_NO_LOG; /* Don't log submessages */
1707 	nmsg->msg_setno		= msg->msg_setno;
1708 	nmsg->msg_type		= MD_MN_MSG_TEST2;
1709 	nmsg->msg_event_size	= sizeof ("test2");
1710 	nmsg->msg_event_data	= Strdup("test2");
1711 	msglist[0] = nmsg;
1712 
1713 	nmsg = Zalloc(sizeof (md_mn_msg_t));
1714 	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
1715 
1716 	nmsg->msg_flags		= MD_MSGF_NO_LOG; /* Don't log submessages */
1717 	nmsg->msg_setno		= msg->msg_setno;
1718 	nmsg->msg_type		= MD_MN_MSG_TEST2;
1719 	nmsg->msg_event_size	= sizeof ("test2");
1720 	nmsg->msg_event_data	= Strdup("test2");
1721 	msglist[1] = nmsg;
1722 
1723 	nmsg = Zalloc(sizeof (md_mn_msg_t));
1724 	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
1725 
1726 	nmsg->msg_flags		= MD_MSGF_NO_LOG; /* Don't log submessages */
1727 	nmsg->msg_setno		= msg->msg_setno;
1728 	nmsg->msg_type		= MD_MN_MSG_TEST3;
1729 	nmsg->msg_event_size	= sizeof ("test3");
1730 	nmsg->msg_event_data	= Strdup("test3");
1731 	msglist[2] = nmsg;
1732 
1733 	nmsg = Zalloc(sizeof (md_mn_msg_t));
1734 	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
1735 
1736 	nmsg->msg_flags		= MD_MSGF_NO_LOG; /* Don't log submessages */
1737 	nmsg->msg_setno		= msg->msg_setno;
1738 	nmsg->msg_type		= MD_MN_MSG_TEST4;
1739 	nmsg->msg_event_size	= sizeof ("test4");
1740 	nmsg->msg_event_data	= Strdup("test4");
1741 	msglist[3] = nmsg;
1742 
1743 	return (4); /* Return the number of submessages generated */
1744 }
1745 
1746 /*
1747  * This is to send an MD_IOCSET ioctl to all nodes to create a soft
1748  * partition.
1749  */
1750 /*ARGSUSED*/
1751 void
mdmn_do_iocset(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)1752 mdmn_do_iocset(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
1753 {
1754 	md_mn_msg_iocset_t	*d;
1755 	int			ret;
1756 	set_t			setno;
1757 	mdsetname_t		*sp;
1758 	mdname_t		*np;
1759 	md_error_t		mde = mdnullerror;
1760 
1761 	resp->mmr_comm_state = MDMNE_ACK; /* Ok state */;
1762 	resp->mmr_out_size = 0;
1763 	resp->mmr_err_size = 0;
1764 	resp->mmr_out = NULL;
1765 	resp->mmr_err = NULL;
1766 	d = (md_mn_msg_iocset_t *)(void *)msg->msg_event_data;
1767 
1768 	setno = MD_MIN2SET(d->iocset_params.mnum);
1769 	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
1770 		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
1771 		    "MD_MN_MSG_IOCSET: Invalid setno %d\n"), setno);
1772 		resp->mmr_exitval = 1;
1773 		return;
1774 	}
1775 
1776 	/*
1777 	 * Device should be in the namespace already
1778 	 */
1779 	if ((np = metamnumname(&sp, d->iocset_params.mnum, 1, &mde)) == NULL) {
1780 		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
1781 		    "MD_MN_MSG_IOCSET: Invalid mnum %d\n"),
1782 		    d->iocset_params.mnum);
1783 		resp->mmr_exitval = 1;
1784 		return;
1785 	}
1786 
1787 	/*
1788 	 * Create unit structure
1789 	 */
1790 	d->iocset_params.mdp = (uintptr_t)&d->unit; /* set pointer to unit */
1791 	ret = metaioctl(MD_IOCSET, &(d->iocset_params), &mde, np->cname);
1792 	resp->mmr_exitval = ret;
1793 }
1794 
1795 /*
1796  * This is to update the status of a softpart
1797  */
1798 /*ARGSUSED*/
1799 void
mdmn_do_sp_setstat(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)1800 mdmn_do_sp_setstat(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
1801 {
1802 	md_mn_msg_sp_setstat_t	*d;
1803 	int			ret;
1804 	set_t			setno;
1805 	mdsetname_t		*sp;
1806 	minor_t			mnum;
1807 	md_error_t		mde = mdnullerror;
1808 
1809 	resp->mmr_comm_state = MDMNE_ACK; /* Ok state */;
1810 	resp->mmr_out_size = 0;
1811 	resp->mmr_err_size = 0;
1812 	resp->mmr_out = NULL;
1813 	resp->mmr_err = NULL;
1814 	d = (md_mn_msg_sp_setstat_t *)(void *)msg->msg_event_data;
1815 
1816 	mnum = d->sp_setstat_mnum;
1817 	setno = MD_MIN2SET(mnum);
1818 	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
1819 		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
1820 		    "MD_MN_MSG_IOCSET: Invalid setno %d\n"), setno);
1821 		resp->mmr_exitval = 1;
1822 		return;
1823 	}
1824 
1825 	ret = meta_sp_setstatus(sp, &mnum, 1, d->sp_setstat_status, &mde);
1826 	resp->mmr_exitval = ret;
1827 }
1828 
1829 /*
1830  * This is to add a key to the namespace
1831  */
1832 /*ARGSUSED*/
1833 void
mdmn_do_addkeyname(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)1834 mdmn_do_addkeyname(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
1835 {
1836 	md_mn_msg_addkeyname_t	*d;
1837 	int			ret;
1838 	set_t			setno;
1839 	mdsetname_t		*sp;
1840 	md_error_t		mde = mdnullerror;
1841 	mdname_t		*compnp;
1842 
1843 	resp->mmr_comm_state = MDMNE_ACK; /* Ok state */;
1844 	resp->mmr_out_size = 0;
1845 	resp->mmr_err_size = 0;
1846 	resp->mmr_out = NULL;
1847 	resp->mmr_err = NULL;
1848 	d = (md_mn_msg_addkeyname_t *)(void *)msg->msg_event_data;
1849 
1850 	setno = d->addkeyname_setno;
1851 	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
1852 		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
1853 		    "MD_MN_ADDKEYNAME: Invalid setno %d\n"), setno);
1854 		resp->mmr_exitval = -1;
1855 		return;
1856 	}
1857 
1858 	compnp = metaname(&sp, d->addkeyname_name, UNKNOWN, &mde);
1859 	if (compnp != NULL) {
1860 		ret = add_key_name(sp, compnp, NULL, &mde);
1861 		if (ret < 0)
1862 			resp->mmr_exitval = -1;
1863 		else
1864 			resp->mmr_exitval = compnp->key;
1865 	} else {
1866 		resp->mmr_exitval = -1;
1867 	}
1868 }
1869 
1870 /*
1871  * This is to delete a key from the namespace
1872  */
1873 /*ARGSUSED*/
1874 void
mdmn_do_delkeyname(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)1875 mdmn_do_delkeyname(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
1876 {
1877 	md_mn_msg_delkeyname_t	*d;
1878 	int			ret;
1879 	set_t			setno;
1880 	mdsetname_t		*sp;
1881 	md_error_t		mde = mdnullerror;
1882 	mdname_t		*compnp;
1883 
1884 	resp->mmr_comm_state = MDMNE_ACK; /* Ok state */;
1885 	resp->mmr_out_size = 0;
1886 	resp->mmr_err_size = 0;
1887 	resp->mmr_out = NULL;
1888 	resp->mmr_err = NULL;
1889 	d = (md_mn_msg_delkeyname_t *)(void *)msg->msg_event_data;
1890 
1891 	setno = d->delkeyname_setno;
1892 	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
1893 		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
1894 		    "MD_MN_DELKEYNAME: Invalid setno %d\n"), setno);
1895 		resp->mmr_exitval = -1;
1896 		return;
1897 	}
1898 
1899 	compnp = metadevname(&sp, d->delkeyname_dev, &mde);
1900 	if (compnp != NULL) {
1901 		/*
1902 		 * Reset the key value for the name. This is required because
1903 		 * any previous call of del_key_name for the same component
1904 		 * will have resulted in the key value being reset to MD_KEYBAD
1905 		 * even though there may still be references to this component.
1906 		 */
1907 		compnp->key = d->delkeyname_key;
1908 		ret = del_key_name(sp, compnp, &mde);
1909 		resp->mmr_exitval = ret;
1910 	} else {
1911 		resp->mmr_exitval = -1;
1912 	}
1913 }
1914 
1915 /*
1916  * This is to get the value of tstate from the master node. We use this
1917  * to get the ABR state of a metadevice from the master.
1918  */
1919 /*ARGSUSED*/
1920 void
mdmn_do_get_tstate(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)1921 mdmn_do_get_tstate(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
1922 {
1923 	md_mn_msg_gettstate_t	*d;
1924 	int			ret;
1925 	uint_t			tstate;
1926 	md_error_t		mde = mdnullerror;
1927 
1928 	resp->mmr_comm_state = MDMNE_ACK; /* Ok state */;
1929 	resp->mmr_out_size = 0;
1930 	resp->mmr_err_size = 0;
1931 	resp->mmr_out = NULL;
1932 	resp->mmr_err = NULL;
1933 	d = (md_mn_msg_gettstate_t *)(void *)msg->msg_event_data;
1934 
1935 	ret = meta_get_tstate(d->gettstate_dev, &tstate, &mde);
1936 	if (ret != 0) {
1937 		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
1938 		    "MD_MN_GET_TSTATE: Invalid dev %llx\n"), d->gettstate_dev);
1939 		tstate = 0;
1940 	}
1941 	resp->mmr_exitval = tstate;
1942 }
1943 
1944 /*
1945  * This is to get the mirror ABR state and the state of its submirrors from
1946  * the master node. We need this to ensure consistent output from metastat
1947  * when a new node joins the cluster during a resync. Without this the
1948  * submirror status will be incorrect until the whole resync is complete which
1949  * may take days for very large metadevices.
1950  */
1951 /*ARGSUSED*/
1952 void
mdmn_do_get_mirstate(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)1953 mdmn_do_get_mirstate(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
1954 {
1955 	md_mn_msg_mir_state_t		*d;
1956 	md_mn_msg_mir_state_res_t	*res;		/* Results */
1957 	set_t				setno;
1958 	mdsetname_t			*sp;		/* Set name */
1959 	mdname_t			*mirnp;		/* Mirror name */
1960 	md_error_t			mde = mdnullerror;
1961 	mm_unit_t			*mm;		/* Mirror */
1962 	int				smi;
1963 	uint_t				tstate;
1964 
1965 	resp->mmr_comm_state = MDMNE_ACK;
1966 	resp->mmr_out_size = sizeof (md_mn_msg_mir_state_res_t);
1967 	resp->mmr_err_size = 0;
1968 	resp->mmr_out = Malloc(resp->mmr_out_size);
1969 	resp->mmr_err = NULL;
1970 	d = (md_mn_msg_mir_state_t *)(void *)msg->msg_event_data;
1971 	res = (md_mn_msg_mir_state_res_t *)(void *)resp->mmr_out;
1972 
1973 	/* Validate set information from minor number */
1974 	setno = MD_MIN2SET(d->mir_state_mnum);
1975 	sp = metasetnosetname(setno, &mde);
1976 	if (sp == NULL) {
1977 		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
1978 		    "MD_MN_GET_MIRROR_STATE: Invalid set %d\n"), setno);
1979 		resp->mmr_exitval = 1;	/* Failure */
1980 		Free(resp->mmr_out);
1981 		resp->mmr_out_size = 0;
1982 		return;
1983 	}
1984 
1985 	/* Construct mirror name from minor number */
1986 	mirnp = metamnumname(&sp, d->mir_state_mnum, 0, &mde);
1987 	if (mirnp == NULL) {
1988 		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
1989 		    "MD_MN_GET_MIRROR_STATE: Invalid minor %lx\n"),
1990 		    d->mir_state_mnum);
1991 		resp->mmr_exitval = 2;	/* Failure */
1992 		Free(resp->mmr_out);
1993 		resp->mmr_out_size = 0;
1994 		return;
1995 	}
1996 
1997 	/* Get common mirror structure */
1998 	mm = (mm_unit_t *)meta_get_mdunit(sp, mirnp, &mde);
1999 	if (mm == NULL) {
2000 		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
2001 		    "MD_MN_GET_MIRROR_STATE: Invalid mirror minor %x\n"),
2002 		    d->mir_state_mnum);
2003 		resp->mmr_exitval = 3;	/* Failure */
2004 		Free(resp->mmr_out);
2005 		resp->mmr_out_size = 0;
2006 		return;
2007 	}
2008 
2009 	if (meta_get_tstate(d->mir_state_mnum, &tstate, &mde) != 0) {
2010 		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
2011 		    "MD_MN_GET_MIRROR_STATE: Invalid minor %lx\n"),
2012 		    d->mir_state_mnum);
2013 		resp->mmr_exitval = 4;	/* Failure */
2014 		Free(resp->mmr_out);
2015 		resp->mmr_out_size = 0;
2016 		return;
2017 	}
2018 	/*
2019 	 * Fill in the sm_state/sm_flags value in the results structure which
2020 	 * gets passed back to the message originator
2021 	 */
2022 	resp->mmr_exitval = 0;
2023 	for (smi = 0; (smi < NMIRROR); smi++) {
2024 		mm_submirror_t *mmsp = &mm->un_sm[smi];
2025 		res->sm_state[smi] = mmsp->sm_state;
2026 		res->sm_flags[smi] = mmsp->sm_flags;
2027 	}
2028 	/* Returm value of tstate for mirror */
2029 	res->mir_tstate = tstate;
2030 }
2031 
2032 /*
2033  * This is to issue an ioctl to call poke_hotspares
2034  */
2035 /*ARGSUSED*/
2036 void
mdmn_do_poke_hotspares(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)2037 mdmn_do_poke_hotspares(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
2038 {
2039 
2040 	md_mn_poke_hotspares_t	pokehsp;
2041 	md_mn_msg_pokehsp_t	*d;
2042 
2043 	resp->mmr_out_size = 0;
2044 	resp->mmr_err_size = 0;
2045 	resp->mmr_out = NULL;
2046 	resp->mmr_err = NULL;
2047 	resp->mmr_comm_state = MDMNE_ACK;
2048 	d = (md_mn_msg_pokehsp_t *)(void *)msg->msg_event_data;
2049 
2050 	(void) memset(&pokehsp, 0, sizeof (pokehsp));
2051 	MD_SETDRIVERNAME(&pokehsp, MD_MIRROR, d->pokehsp_setno);
2052 
2053 	resp->mmr_exitval = metaioctl(MD_MN_POKE_HOTSPARES, &pokehsp,
2054 	    &pokehsp.mde, NULL);
2055 }
2056 
2057 /*
2058  * Called to create a softpart during a metarecover operation
2059  */
2060 /*ARGSUSED*/
2061 void
mdmn_do_addmdname(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)2062 mdmn_do_addmdname(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
2063 {
2064 	md_mn_msg_addmdname_t	*d;
2065 	md_error_t		mde = mdnullerror;
2066 	mdsetname_t		*sp;
2067 	int			init = 0;
2068 	mdkey_t			key;
2069 	minor_t			mnum;
2070 
2071 	resp->mmr_comm_state = MDMNE_ACK; /* Ok state */;
2072 	resp->mmr_out_size = 0;
2073 	resp->mmr_err_size = 0;
2074 	resp->mmr_out = NULL;
2075 	resp->mmr_err = NULL;
2076 	d = (md_mn_msg_addmdname_t *)(void *)msg->msg_event_data;
2077 
2078 	if ((sp = metasetnosetname(d->addmdname_setno, &mde)) == NULL) {
2079 		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
2080 		    "MD_MN_MSG_ADDMDNAME: Invalid setno %d\n"),
2081 		    d->addmdname_setno);
2082 		resp->mmr_exitval = 1;
2083 		return;
2084 	}
2085 
2086 	/*
2087 	 * If device node does not exist then init it
2088 	 */
2089 	if (!is_existing_meta_hsp(sp, d->addmdname_name)) {
2090 		if ((key = meta_init_make_device(&sp, d->addmdname_name,
2091 		    &mde)) <= 0) {
2092 			syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
2093 			    "MD_MN_MSG_ADDMDNAME: Invalid name %s\n"),
2094 			    d->addmdname_name);
2095 			resp->mmr_exitval = 1;
2096 			return;
2097 		}
2098 
2099 		init = 1;
2100 	}
2101 
2102 	/*
2103 	 * We should have it
2104 	 */
2105 	if (metaname(&sp, d->addmdname_name, META_DEVICE, &mde) == NULL) {
2106 
2107 		if (init) {
2108 			if (meta_getnmentbykey(sp->setno, MD_SIDEWILD,
2109 			    key, NULL, &mnum, NULL, &mde) != NULL) {
2110 				(void) metaioctl(
2111 				    MD_IOCREM_DEV, &mnum, &mde, NULL);
2112 			}
2113 		(void) del_self_name(sp, key, &mde);
2114 		}
2115 
2116 		resp->mmr_exitval = 1;
2117 		return;
2118 	}
2119 
2120 	resp->mmr_exitval = 0;
2121 }
2122 
2123 /*
2124  * This is used to issue a MD_MN_RR_DIRTY ioctl to the mirror.
2125  */
2126 /*ARGSUSED*/
2127 void
mdmn_do_mark_dirty(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)2128 mdmn_do_mark_dirty(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
2129 {
2130 	md_mn_msg_rr_dirty_t	*d;
2131 	md_mn_rr_dirty_params_t	rp;
2132 	int			ret;
2133 
2134 	resp->mmr_out_size = 0;
2135 	resp->mmr_err_size = 0;
2136 	resp->mmr_out = NULL;
2137 	resp->mmr_err = NULL;
2138 	resp->mmr_comm_state = MDMNE_ACK;
2139 	d = (md_mn_msg_rr_dirty_t *)((void *)(msg->msg_event_data));
2140 
2141 	(void) memset(&rp, 0, sizeof (rp));
2142 	MD_SETDRIVERNAME(&rp, MD_MIRROR, MD_MIN2SET(d->rr_mnum))
2143 	rp.rr_mnum = d->rr_mnum;
2144 	rp.rr_nodeid = d->rr_nodeid;
2145 	rp.rr_start = (ushort_t)((d->rr_range >> 16) & 0xffff);
2146 	rp.rr_end = (ushort_t)(d->rr_range & 0xffff);
2147 
2148 	ret = metaioctl(MD_MN_RR_DIRTY, &rp, &rp.mde, NULL);
2149 
2150 	resp->mmr_exitval = ret;
2151 }
2152 
2153 /*
2154  * This is used to issue a MD_MN_RR_CLEAN ioctl to the mirror.
2155  */
2156 /*ARGSUSED*/
2157 void
mdmn_do_mark_clean(md_mn_msg_t * msg,uint_t flags,md_mn_result_t * resp)2158 mdmn_do_mark_clean(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp)
2159 {
2160 	md_mn_msg_rr_clean_t	*d;
2161 	md_mn_rr_clean_params_t	*rcp;
2162 	int			ret;
2163 
2164 	resp->mmr_out_size = 0;
2165 	resp->mmr_err_size = 0;
2166 	resp->mmr_out = NULL;
2167 	resp->mmr_err = NULL;
2168 	resp->mmr_comm_state = MDMNE_ACK;
2169 	d = (md_mn_msg_rr_clean_t *)((void *)(msg->msg_event_data));
2170 
2171 	rcp = Zalloc(sizeof (struct md_mn_rr_clean_params) +
2172 	    MDMN_MSG_RR_CLEAN_DATA_BYTES(d));
2173 	MD_SETDRIVERNAME(rcp, MD_MIRROR, MD_MIN2SET(d->rr_mnum))
2174 	rcp->rr_mnum = d->rr_mnum;
2175 	rcp->rr_nodeid = d->rr_nodeid;
2176 	rcp->rr_start_size = d->rr_start_size;
2177 	(void) memcpy(MDMN_RR_CLEAN_PARAMS_DATA(rcp), MDMN_MSG_RR_CLEAN_DATA(d),
2178 	    MDMN_MSG_RR_CLEAN_DATA_BYTES(d));
2179 
2180 	ret = metaioctl(MD_MN_RR_CLEAN, rcp, &rcp->mde, NULL);
2181 
2182 	Free(rcp);
2183 
2184 	resp->mmr_exitval = ret;
2185 }
2186