xref: /titanic_41/usr/src/lib/lvm/libmeta/common/meta_mn_comm.c (revision bf85a12b7c81d0745d5a8aff65baeff50006cde9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <stdlib.h>
28 #include <unistd.h>
29 #include <wait.h>
30 #include <sys/time.h>
31 #include <strings.h>
32 #include <meta.h>
33 #include <syslog.h>
34 
35 extern md_mn_msg_tbl_entry_t  msg_table[];
36 
37 /*
38  * When contacting the local rpc.mdcommd we always want to do that using
39  * the IPv4 version of localhost.
40  */
41 #define	LOCALHOST_IPv4	"127.0.0.1"
42 
43 md_mn_msgclass_t
mdmn_get_message_class(md_mn_msgtype_t msgtype)44 mdmn_get_message_class(md_mn_msgtype_t msgtype)
45 {
46 	return (msg_table[msgtype].mte_class);
47 }
48 
49 void (*
mdmn_get_handler(md_mn_msgtype_t msgtype)50 mdmn_get_handler(md_mn_msgtype_t msgtype))
51 	(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *res)
52 {
53 	return (msg_table[msgtype].mte_handler);
54 }
55 
56 int (*
mdmn_get_submessage_generator(md_mn_msgtype_t msgtype)57 mdmn_get_submessage_generator(md_mn_msgtype_t msgtype))
58 	(md_mn_msg_t *msg, md_mn_msg_t **msglist)
59 {
60 	return (msg_table[msgtype].mte_smgen);
61 }
62 
63 time_t
mdmn_get_timeout(md_mn_msgtype_t msgtype)64 mdmn_get_timeout(md_mn_msgtype_t msgtype)
65 {
66 	return (msg_table[msgtype].mte_timeout);
67 }
68 
69 
70 void
ldump_msg(char * prefix,md_mn_msg_t * msg)71 ldump_msg(char *prefix, md_mn_msg_t *msg)
72 {
73 	(void) fprintf(stderr, "%s &msg       = 0x%x\n", prefix, (uint_t)msg);
74 	(void) fprintf(stderr, "%s ID         = (%d, 0x%llx-%d)\n", prefix,
75 	    MSGID_ELEMS(msg->msg_msgid));
76 	(void) fprintf(stderr, "%s sender     = %d\n", prefix, msg->msg_sender);
77 	(void) fprintf(stderr, "%s flags      = 0x%x\n",
78 	    prefix, msg->msg_flags);
79 	(void) fprintf(stderr, "%s setno      = %d\n", prefix, msg->msg_setno);
80 	(void) fprintf(stderr, "%s recipient  = %d\n",
81 	    prefix, msg->msg_recipient);
82 	(void) fprintf(stderr, "%s type       = %d\n", prefix, msg->msg_type);
83 	(void) fprintf(stderr, "%s size       = %d\n",
84 	    prefix, msg->msg_event_size);
85 }
86 
87 #define	COMMD_PROGNAME	"rpc.mdcommd"
88 
89 extern uint_t meta_rpc_err_mask(void);
90 
91 /*
92  * If a clnt_call gets an RPC error, force the message out here with details.
93  * This would be nice to send to commd_debug(), but we can't call rpc.mdcommd
94  * code from libmeta.
95  */
96 static void
mdmn_handle_RPC_error(CLIENT * clnt,char * ident,md_mn_nodeid_t nid)97 mdmn_handle_RPC_error(CLIENT *clnt, char *ident, md_mn_nodeid_t nid)
98 {
99 	/*
100 	 * This is sized for a max message which would look like this:
101 	 * "mdmn_wakeup_initiator: rpc.mdcommd node 4294967295"
102 	 */
103 	char errstr[51];
104 	struct rpc_err e;
105 
106 	CLNT_GETERR((CLIENT *) clnt, &e);
107 	if (meta_rpc_err_mask() & (1 << e.re_status)) {
108 		if (nid == 0) {
109 			(void) snprintf(errstr, sizeof (errstr),
110 			    "%s: %s node (local)", ident, COMMD_PROGNAME);
111 		} else {
112 			(void) snprintf(errstr, sizeof (errstr),
113 			    "%s: %s node %d", ident, COMMD_PROGNAME, nid);
114 		}
115 		syslog(LOG_WARNING, "mdmn_handle_RPC_error: %s",
116 		    clnt_sperror(clnt, errstr));
117 	}
118 }
119 
120 /* Default timeout can be changed using clnt_control() */
121 static struct timeval TIMEOUT = { 25, 0 };
122 
123 md_mn_result_t *
mdmn_send_2(argp,clnt,nid)124 mdmn_send_2(argp, clnt, nid)
125 	md_mn_msg_t *argp;
126 	CLIENT *clnt;
127 	md_mn_nodeid_t nid;
128 {
129 	enum clnt_stat	res;
130 	md_mn_result_t *clnt_res = Zalloc(sizeof (md_mn_result_t));
131 
132 	res = clnt_call(clnt, mdmn_send,
133 		(xdrproc_t)xdr_md_mn_msg_t, (caddr_t)argp,
134 		(xdrproc_t)xdr_md_mn_result_t, (caddr_t)clnt_res, TIMEOUT);
135 
136 	if (res == RPC_SUCCESS) {
137 		return (clnt_res);
138 	}
139 	mdmn_handle_RPC_error(clnt, "mdmn_send", nid);
140 	Free(clnt_res);
141 	return (NULL);
142 }
143 
144 int *
mdmn_work_2(argp,clnt,nid)145 mdmn_work_2(argp, clnt, nid)
146 	md_mn_msg_t *argp;
147 	CLIENT *clnt;
148 	md_mn_nodeid_t nid;
149 {
150 	enum clnt_stat	res;
151 	int *clnt_res = Zalloc(sizeof (int));
152 
153 	res = clnt_call(clnt, mdmn_work,
154 		(xdrproc_t)xdr_md_mn_msg_t, (caddr_t)argp,
155 		(xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT);
156 
157 	if (res == RPC_SUCCESS) {
158 		return (clnt_res);
159 	}
160 	mdmn_handle_RPC_error(clnt, "mdmn_work", nid);
161 	Free(clnt_res);
162 	return (NULL);
163 }
164 
165 int *
mdmn_wakeup_initiator_2(argp,clnt,nid)166 mdmn_wakeup_initiator_2(argp, clnt, nid)
167 	md_mn_result_t *argp;
168 	CLIENT *clnt;
169 	md_mn_nodeid_t nid;
170 {
171 	enum clnt_stat	res;
172 	int *clnt_res = Zalloc(sizeof (int));
173 
174 	res = clnt_call(clnt, mdmn_wakeup_initiator,
175 		(xdrproc_t)xdr_md_mn_result_t, (caddr_t)argp,
176 		(xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT);
177 
178 	if (res == RPC_SUCCESS) {
179 		return (clnt_res);
180 	}
181 	mdmn_handle_RPC_error(clnt, "mdmn_wakeup_initiator", nid);
182 	Free(clnt_res);
183 	return (NULL);
184 }
185 
186 int *
mdmn_wakeup_master_2(argp,clnt,nid)187 mdmn_wakeup_master_2(argp, clnt, nid)
188 	md_mn_result_t *argp;
189 	CLIENT *clnt;
190 	md_mn_nodeid_t nid;
191 {
192 	enum clnt_stat	res;
193 	int *clnt_res = Zalloc(sizeof (int));
194 
195 	res = clnt_call(clnt, mdmn_wakeup_master,
196 		(xdrproc_t)xdr_md_mn_result_t, (caddr_t)argp,
197 		(xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT);
198 
199 	if (res == RPC_SUCCESS) {
200 		return (clnt_res);
201 	}
202 	mdmn_handle_RPC_error(clnt, "mdmn_wakeup_master", nid);
203 	Free(clnt_res);
204 	return (NULL);
205 }
206 
207 int *
mdmn_comm_lock_2(argp,clnt,nid)208 mdmn_comm_lock_2(argp, clnt, nid)
209 	md_mn_set_and_class_t *argp;
210 	CLIENT *clnt;
211 	md_mn_nodeid_t nid;
212 {
213 	enum clnt_stat	res;
214 	int *clnt_res = Zalloc(sizeof (int));
215 
216 	res = clnt_call(clnt, mdmn_comm_lock,
217 		(xdrproc_t)xdr_md_mn_set_and_class_t, (caddr_t)argp,
218 		(xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT);
219 
220 	if (res == RPC_SUCCESS) {
221 		return (clnt_res);
222 	}
223 	mdmn_handle_RPC_error(clnt, "mdmn_comm_lock", nid);
224 	Free(clnt_res);
225 	return (NULL);
226 }
227 
228 int *
mdmn_comm_unlock_2(argp,clnt,nid)229 mdmn_comm_unlock_2(argp, clnt, nid)
230 	md_mn_set_and_class_t *argp;
231 	CLIENT *clnt;
232 	md_mn_nodeid_t nid;
233 {
234 	enum clnt_stat	res;
235 	int *clnt_res = Zalloc(sizeof (int));
236 
237 	res = clnt_call(clnt, mdmn_comm_unlock,
238 		(xdrproc_t)xdr_md_mn_set_and_class_t, (caddr_t)argp,
239 		(xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT);
240 
241 	if (res == RPC_SUCCESS) {
242 		return (clnt_res);
243 	}
244 	mdmn_handle_RPC_error(clnt, "mdmn_comm_unlock", nid);
245 	Free(clnt_res);
246 	return (NULL);
247 }
248 
249 int *
mdmn_comm_suspend_2(argp,clnt,nid)250 mdmn_comm_suspend_2(argp, clnt, nid)
251 	md_mn_set_and_class_t *argp;
252 	CLIENT *clnt;
253 	md_mn_nodeid_t nid;
254 {
255 	enum clnt_stat	res;
256 	int *clnt_res = Zalloc(sizeof (int));
257 
258 	res = clnt_call(clnt, mdmn_comm_suspend,
259 		(xdrproc_t)xdr_md_mn_set_and_class_t, (caddr_t)argp,
260 		(xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT);
261 
262 	if (res == RPC_SUCCESS) {
263 		return (clnt_res);
264 	}
265 	mdmn_handle_RPC_error(clnt, "mdmn_comm_suspend", nid);
266 	Free(clnt_res);
267 	return (NULL);
268 }
269 
270 int *
mdmn_comm_resume_2(argp,clnt,nid)271 mdmn_comm_resume_2(argp, clnt, nid)
272 	md_mn_set_and_class_t *argp;
273 	CLIENT *clnt;
274 	md_mn_nodeid_t nid;
275 {
276 	enum clnt_stat	res;
277 	int *clnt_res = Zalloc(sizeof (int));
278 
279 	res = clnt_call(clnt, mdmn_comm_resume,
280 		(xdrproc_t)xdr_md_mn_set_and_class_t, (caddr_t)argp,
281 		(xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT);
282 
283 	if (res == RPC_SUCCESS) {
284 		return (clnt_res);
285 	}
286 	mdmn_handle_RPC_error(clnt, "mdmn_comm_resume", nid);
287 	Free(clnt_res);
288 	return (NULL);
289 }
290 
291 int *
mdmn_comm_reinit_set_2(argp,clnt,nid)292 mdmn_comm_reinit_set_2(argp, clnt, nid)
293 	set_t *argp;
294 	CLIENT *clnt;
295 	md_mn_nodeid_t nid;
296 {
297 	enum clnt_stat	res;
298 	int *clnt_res = Zalloc(sizeof (int));
299 
300 	res = clnt_call(clnt, mdmn_comm_reinit_set,
301 		(xdrproc_t)xdr_set_t, (caddr_t)argp,
302 		(xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT);
303 
304 	if (res == RPC_SUCCESS) {
305 		return (clnt_res);
306 	}
307 	mdmn_handle_RPC_error(clnt, "mdmn_comm_reinit_set", nid);
308 	Free(clnt_res);
309 	return (NULL);
310 }
311 
312 int *
mdmn_comm_msglock_2(argp,clnt,nid)313 mdmn_comm_msglock_2(argp, clnt, nid)
314 	md_mn_type_and_lock_t *argp;
315 	CLIENT *clnt;
316 	md_mn_nodeid_t nid;
317 {
318 	enum clnt_stat	res;
319 	int *clnt_res = Zalloc(sizeof (int));
320 
321 	res = clnt_call(clnt, mdmn_comm_msglock,
322 		(xdrproc_t)xdr_md_mn_type_and_lock_t, (caddr_t)argp,
323 		(xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT);
324 
325 	if (res == RPC_SUCCESS) {
326 		return (clnt_res);
327 	}
328 	mdmn_handle_RPC_error(clnt, "mdmn_comm_msglock", nid);
329 	Free(clnt_res);
330 	return (NULL);
331 }
332 
333 
334 #define	USECS_PER_TICK	10000
335 
336 
337 /*
338  * Let the kernel create a clusterwide unique message ID
339  *
340  * returns 0 on success
341  *	   1 on failure
342  */
343 
344 int
mdmn_create_msgid(md_mn_msgid_t * msgid)345 mdmn_create_msgid(md_mn_msgid_t *msgid)
346 {
347 	md_error_t	mde = mdnullerror;
348 
349 	if (msgid == NULL) {
350 		return (1); /* failure */
351 	}
352 
353 	if (metaioctl(MD_IOCGUNIQMSGID, msgid, &mde, NULL) != 0) {
354 		msgid->mid_nid = ~0u;
355 		msgid->mid_time = 0LL;
356 		return (1); /* failure */
357 	}
358 
359 	/*
360 	 * mid_smid and mid_oclass are only used for submessages.
361 	 * mdmn_create_msgid is never called for submessages, as they inherit
362 	 * the message ID from their parent.
363 	 * Thus we can safely null out the following fields.
364 	 */
365 	msgid->mid_smid = 0;
366 	msgid->mid_oclass = 0;
367 
368 	/* if the node_id is not set yet, somethings seems to be wrong */
369 	if (msgid->mid_nid == ~0u) {
370 		return (1); /* failure */
371 	}
372 
373 	return (0); /* success */
374 }
375 
376 md_mn_result_t *
copy_result(md_mn_result_t * res)377 copy_result(md_mn_result_t *res)
378 {
379 	md_mn_result_t *nres;
380 	nres = Zalloc(sizeof (md_mn_result_t));
381 	/* It's MSGID_COPY(from, to); */
382 	MSGID_COPY(&(res->mmr_msgid), &(nres->mmr_msgid));
383 	nres->mmr_msgtype	= res->mmr_msgtype;
384 	nres->mmr_setno		= res->mmr_setno;
385 	nres->mmr_flags		= res->mmr_flags;
386 	nres->mmr_sender	= res->mmr_sender;
387 	nres->mmr_failing_node	= res->mmr_failing_node;
388 	nres->mmr_comm_state	= res->mmr_comm_state;
389 	nres->mmr_exitval	= res->mmr_exitval;
390 	nres->mmr_out_size	= res->mmr_out_size;
391 	nres->mmr_err_size	= res->mmr_err_size;
392 	if (res->mmr_out_size > 0) {
393 		nres->mmr_out = Zalloc(res->mmr_out_size);
394 		bcopy(res->mmr_out, nres->mmr_out, res->mmr_out_size);
395 	}
396 	if (res->mmr_err_size > 0) {
397 		nres->mmr_err = Zalloc(res->mmr_err_size);
398 		bcopy(res->mmr_err, nres->mmr_err, res->mmr_err_size);
399 	}
400 	if (res->mmr_ep.host != '\0') {
401 		nres->mmr_ep.host = strdup(res->mmr_ep.host);
402 	}
403 	if (res->mmr_ep.extra != '\0') {
404 		nres->mmr_ep.extra = strdup(res->mmr_ep.extra);
405 	}
406 	if (res->mmr_ep.name != '\0') {
407 		nres->mmr_ep.name = strdup(res->mmr_ep.name);
408 	}
409 	return (nres);
410 }
411 
412 void
free_result(md_mn_result_t * res)413 free_result(md_mn_result_t *res)
414 {
415 	if (res->mmr_out_size > 0) {
416 		Free(res->mmr_out);
417 	}
418 	if (res->mmr_err_size > 0) {
419 		Free(res->mmr_err);
420 	}
421 	if (res->mmr_ep.host != '\0') {
422 		Free(res->mmr_ep.host);
423 	}
424 	if (res->mmr_ep.extra != '\0') {
425 		Free(res->mmr_ep.extra);
426 	}
427 	if (res->mmr_ep.name != '\0') {
428 		Free(res->mmr_ep.name);
429 	}
430 	Free(res);
431 }
432 
433 
434 /* allocate a new message and copy a given message into it */
435 md_mn_msg_t *
copy_msg(md_mn_msg_t * msg,md_mn_msg_t * dest)436 copy_msg(md_mn_msg_t *msg, md_mn_msg_t *dest)
437 {
438 	md_mn_msg_t *nmsg;
439 
440 	nmsg = dest;
441 
442 	if (nmsg == NULL) {
443 		nmsg = Zalloc(sizeof (md_mn_msg_t));
444 	}
445 	if (nmsg->msg_event_data == NULL) {
446 		nmsg->msg_event_data = Zalloc(msg->msg_event_size);
447 	}
448 	/* It's MSGID_COPY(from, to); */
449 	MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
450 	nmsg->msg_sender	= msg->msg_sender;
451 	nmsg->msg_flags		= msg->msg_flags;
452 	nmsg->msg_setno		= msg->msg_setno;
453 	nmsg->msg_type		= msg->msg_type;
454 	nmsg->msg_recipient	= msg->msg_recipient;
455 	nmsg->msg_event_size	= msg->msg_event_size;
456 	if (msg->msg_event_size > 0) {
457 		bcopy(msg->msg_event_data, nmsg->msg_event_data,
458 		    msg->msg_event_size);
459 	}
460 	return (nmsg);
461 }
462 
463 void
copy_msg_2(md_mn_msg_t * msg,md_mn_msg_od_t * msgod,int direction)464 copy_msg_2(md_mn_msg_t *msg, md_mn_msg_od_t *msgod, int direction)
465 {
466 	assert((direction == MD_MN_COPY_TO_ONDISK) ||
467 	    (direction == MD_MN_COPY_TO_INCORE));
468 
469 	if (direction == MD_MN_COPY_TO_ONDISK) {
470 		MSGID_COPY(&(msg->msg_msgid), &(msgod->msg_msgid));
471 		msgod->msg_sender	= msg->msg_sender;
472 		msgod->msg_flags	= msg->msg_flags;
473 		msgod->msg_setno	= msg->msg_setno;
474 		msgod->msg_type		= msg->msg_type;
475 		msgod->msg_recipient	= msg->msg_recipient;
476 		msgod->msg_od_event_size = msg->msg_event_size;
477 		/* paranoid checks */
478 		if (msg->msg_event_size != 0 && msg->msg_event_data != NULL)
479 			bcopy(msg->msg_event_data,
480 			    &msgod->msg_od_event_data[0], msg->msg_event_size);
481 	} else {
482 		MSGID_COPY(&(msgod->msg_msgid), &(msg->msg_msgid));
483 		msg->msg_sender	= msgod->msg_sender;
484 		msg->msg_flags		= msgod->msg_flags;
485 		msg->msg_setno		= msgod->msg_setno;
486 		msg->msg_type		= msgod->msg_type;
487 		msg->msg_recipient	= msgod->msg_recipient;
488 		msg->msg_event_size	= msgod->msg_od_event_size;
489 		if (msg->msg_event_data == NULL)
490 			msg->msg_event_data = Zalloc(msg->msg_event_size);
491 
492 		bcopy(&msgod->msg_od_event_data[0],
493 		    msg->msg_event_data, msgod->msg_od_event_size);
494 	}
495 }
496 
497 /* Free a message */
498 void
free_msg(md_mn_msg_t * msg)499 free_msg(md_mn_msg_t *msg)
500 {
501 	if (msg->msg_event_size > 0) {
502 		Free(msg->msg_event_data);
503 	}
504 	Free(msg);
505 }
506 
507 
508 /* The following declarations are only for the next two routines */
509 
510 md_mn_client_list_t *mdmn_clients;
511 
512 mutex_t	mcl_mutex;
513 #define	MNGLC_INIT_ONLY	0x0001
514 #define	MNGLC_FOR_REAL	0x0002
515 /*
516  * mdmn_get_local_clnt(flag)
517  * If there is a client in the free pool, get one,
518  * If no client is available, create one.
519  * Every multithreaded application that uses mdmn_send_message must call it
520  * single threaded first with special flags so we do the initialization
521  * stuff in a safe environment.
522  *
523  * Input: MNGLC_INIT_ONLY: just initializes the mutex
524  *        MNGLC_FOR_REAL : do real work
525  * Output:
526  *	An rpc client for sending rpc requests to the local commd
527  *	NULL in case of an error
528  *
529  */
530 static CLIENT *
mdmn_get_local_clnt(uint_t flag)531 mdmn_get_local_clnt(uint_t flag)
532 {
533 	CLIENT *local_daemon;
534 	static int inited = 0;
535 	md_mn_client_list_t *tmp;
536 
537 	if (inited == 0) {
538 		(void) mutex_init(&mcl_mutex, USYNC_THREAD, NULL);
539 		inited = 1;
540 	}
541 
542 	if (flag == MNGLC_INIT_ONLY)
543 		return ((CLIENT *)NULL);
544 
545 	(void) mutex_lock(&mcl_mutex);
546 	if (mdmn_clients == (md_mn_client_list_t *)NULL) {
547 		/* if there is no entry, create a client and return a it */
548 		local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD,
549 		    TWO, "tcp");
550 	} else {
551 		/*
552 		 * If there is an entry from a previous put operation,
553 		 * remove it from the head of the list and free the list stuff
554 		 * around it. Then return the client
555 		 */
556 		local_daemon = mdmn_clients->mcl_clnt;
557 		tmp = mdmn_clients;
558 		mdmn_clients = mdmn_clients->mcl_next;
559 		Free(tmp);
560 	}
561 	(void) mutex_unlock(&mcl_mutex);
562 
563 
564 	if (local_daemon == (CLIENT *)NULL) {
565 		clnt_pcreateerror("local_daemon");
566 	}
567 
568 	return (local_daemon);
569 }
570 
571 /*
572  * mdmn_put_local_clnt()
573  * returns a no longer used client to the pool
574  *
575  * Input: an RPC client
576  * Output: void
577  */
578 static void
mdmn_put_local_clnt(CLIENT * local_daemon)579 mdmn_put_local_clnt(CLIENT *local_daemon)
580 {
581 	md_mn_client_list_t *tmp;
582 
583 	(void) mutex_lock(&mcl_mutex);
584 
585 	tmp =  mdmn_clients;
586 	mdmn_clients = (md_mn_client_list_t *)
587 	    malloc(sizeof (md_mn_client_list_t));
588 	mdmn_clients->mcl_clnt = local_daemon;
589 	mdmn_clients->mcl_next = tmp;
590 
591 	(void) mutex_unlock(&mcl_mutex);
592 }
593 
594 /*
595  * This is the regular interface for sending a message.
596  * This function only passes through all arguments to
597  * mdmn_send_message_with_msgid() and adds a NULL for the message ID.
598  *
599  * Normally, you don't have already a message ID for the message you want
600  * to send.  Only in case of replaying a previously logged message,
601  * a msgid is already attached to it.
602  * In that case mdmn_send_message_with_msgid() has to be called directly.
603  *
604  * The recipient argument is almost always unused, and is therefore typically
605  * set to zero, as zero is an invalid cluster nodeid.  The exceptions are the
606  * marking and clearing of the DRL from a node that is not currently the
607  * owner.  In these cases, the recipient argument will be the nodeid of the
608  * mirror owner, and MD_MSGF_DIRECTED will be set in the flags.  Non-owner
609  * nodes will not receive these messages.
610  *
611  * Return values / CAVEAT EMPTOR: see mdmn_send_message_with_msgid()
612  */
613 
614 int
mdmn_send_message(set_t setno,md_mn_msgtype_t type,uint_t flags,md_mn_nodeid_t recipient,char * data,int size,md_mn_result_t ** result,md_error_t * ep)615 mdmn_send_message(
616 		set_t setno,
617 		md_mn_msgtype_t type,
618 		uint_t flags,
619 		md_mn_nodeid_t recipient,
620 		char *data,
621 		int size,
622 		md_mn_result_t **result,
623 		md_error_t *ep)
624 {
625 	return (mdmn_send_message_with_msgid(setno, type, flags,
626 	    recipient, data, size, result, MD_NULL_MSGID, ep));
627 }
628 /*
629  * mdmn_send_message_with_msgid()
630  * Create a message from the given pieces of data and hand it over
631  * to the local commd.
632  * This may fail for various reasons (rpc error / class busy / class locked ...)
633  * Some error types are immediately deadly, others will cause retries
634  * until the request is fulfilled or until the retries are ecxceeded.
635  *
636  * In case an error is returned it is up to the user to decide what to do.
637  *
638  * Returns:
639  *	0 on success
640  *	1 if retries1 exceeded
641  *	2 if retries2 exceeded
642  *	-1 if connecting to the local daemon failed
643  *	-2 if the RPC call to the local daemon failed
644  *	-3 if this node hasn't yet joined the set
645  *	-4 if any other problem occured
646  *
647  * CAVEAT EMPTOR:
648  *	The caller is responsible for calling free_result() when finished with
649  *	the results!
650  */
651 int
mdmn_send_message_with_msgid(set_t setno,md_mn_msgtype_t type,uint_t flags,md_mn_nodeid_t recipient,char * data,int size,md_mn_result_t ** result,md_mn_msgid_t * msgid,md_error_t * ep)652 mdmn_send_message_with_msgid(
653 		set_t setno,
654 		md_mn_msgtype_t type,
655 		uint_t flags,
656 		md_mn_nodeid_t recipient,
657 		char *data,
658 		int size,
659 		md_mn_result_t **result,
660 		md_mn_msgid_t *msgid,
661 		md_error_t *ep)
662 {
663 	uint_t retry1, ticks1, retry2, ticks2;
664 	int retval;
665 
666 	CLIENT *local_daemon;
667 	struct timeval timeout;
668 
669 	md_mn_msg_t msg;
670 	md_mn_result_t *resp;
671 
672 	/*
673 	 * Special case for multithreaded applications:
674 	 * When starting up, the application should call mdmn_send_message
675 	 * single threaded with all parameters set to NULL.
676 	 * When we detect this we know, we safely can do initialization
677 	 * stuff here.
678 	 * We only check for set and type being zero
679 	 */
680 	if ((setno == 0) && (type == 0)) {
681 		/* do all needed initializations here */
682 		(void) mdmn_get_local_clnt(MNGLC_INIT_ONLY);
683 		return (0); /* success */
684 	}
685 
686 
687 	/* did the caller specify space to store the result pointer? */
688 	if (result == (md_mn_result_t **)NULL) {
689 		syslog(LOG_INFO, dgettext(TEXT_DOMAIN,
690 		    "FATAL, can not allocate result structure\n"));
691 		return (-4);
692 	}
693 	*result = NULL;
694 
695 	/* Replay messages already have their msgID */
696 	if ((flags & MD_MSGF_REPLAY_MSG) == 0) {
697 		if (mdmn_create_msgid(&msg.msg_msgid) != 0) {
698 			syslog(LOG_INFO, dgettext(TEXT_DOMAIN,
699 			    "FATAL, can not create message ID\n"));
700 			return (-4);
701 		}
702 	} else {
703 		/* in this case a message ID must be specified */
704 		assert(msgid != MD_NULL_MSGID);
705 		MSGID_COPY(msgid, &msg.msg_msgid);
706 	}
707 
708 
709 	/*
710 	 * When setting the flags, additionally apply the
711 	 * default flags for this message type.
712 	 */
713 	msg.msg_flags		= flags;
714 	msg.msg_setno		= setno;
715 	msg.msg_recipient	= recipient;
716 	msg.msg_type		= type;
717 	msg.msg_event_size	= size;
718 	msg.msg_event_data	= data;
719 
720 	/*
721 	 * For the timeout pick the specific timeout for the message times the
722 	 * the maximum number of nodes.
723 	 * This is a better estimate than 1 hour or 3 days or never.
724 	 */
725 	timeout.tv_sec = mdmn_get_timeout(type) * NNODES;
726 	timeout.tv_usec = 0;
727 
728 	if (flags & MD_MSGF_VERBOSE) {
729 		syslog(LOG_INFO, "send_message: ID=(%d, 0x%llx-%d)\n",
730 		    MSGID_ELEMS(msg.msg_msgid));
731 	}
732 
733 	/* get an RPC client to the local commd */
734 	local_daemon = mdmn_get_local_clnt(MNGLC_FOR_REAL);
735 	if (local_daemon == (CLIENT *)NULL) {
736 		return (-1);
737 	}
738 	clnt_control(local_daemon, CLSET_TIMEOUT, (char *)&timeout);
739 
740 	retry1 = msg_table[type].mte_retry1;
741 	ticks1 = msg_table[type].mte_ticks1;
742 	retry2 = msg_table[type].mte_retry2;
743 	ticks2 = msg_table[type].mte_ticks2;
744 
745 	/*
746 	 * run that loop until:
747 	 * - commstate is Ok
748 	 * - deadly commstate occured
749 	 * - retries1 or retries2 exceeded
750 	 */
751 	for (; ; ) {
752 		*result = mdmn_send_2(&msg, local_daemon, 0);
753 		resp = *result;
754 		if (resp != (md_mn_result_t *)NULL) {
755 			/* Bingo! */
756 			if (resp->mmr_comm_state == MDMNE_ACK) {
757 				retval = 0;
758 				goto out;
759 			}
760 			/* Hmm... what if there's no handler? */
761 			if (resp->mmr_comm_state == MDMNE_NO_HANDLER) {
762 				retval = 0;
763 				goto out;
764 
765 			}
766 			/*
767 			 * This node didn't yet join the disk set. It is not
768 			 * supposed to send any messages then.
769 			 * This is deadly (no retries)
770 			 */
771 			if (resp->mmr_comm_state == MDMNE_NOT_JOINED) {
772 				retval = -3;
773 				goto out;
774 
775 			}
776 			/* these two are deadly too (no retries) */
777 			if ((resp->mmr_comm_state == MDMNE_NO_WAKEUP_ENTRY) ||
778 			    (resp->mmr_comm_state == MDMNE_LOG_FAIL)) {
779 				retval = -4;
780 				goto out;
781 
782 			}
783 			/* Class busy? Use retry1 */
784 			if (resp->mmr_comm_state == MDMNE_CLASS_BUSY) {
785 				if (retry1-- == 0) {
786 					retval = 1; /* retry1 exceeded */
787 					goto out;
788 				}
789 				(void) usleep(ticks1 * USECS_PER_TICK);
790 				free_result(resp);
791 
792 				if (flags & MD_MSGF_VERBOSE)
793 					(void) printf("#Resend1 ID=(%d, "
794 					    "0x%llx-%d)\n",
795 					    MSGID_ELEMS(msg.msg_msgid));
796 				continue;
797 			}
798 			if ((resp->mmr_comm_state == MDMNE_CLASS_LOCKED) ||
799 			    (resp->mmr_comm_state == MDMNE_ABORT)) {
800 				/*
801 				 * Be patient, wait for 1 secs and try again.
802 				 * It's not likely that the ABORT condition ever
803 				 * goes away, but it won't hurt to retry
804 				 */
805 				free_result(resp);
806 				(void) sleep(1);
807 				continue;
808 			}
809 			if (resp->mmr_comm_state == MDMNE_SUSPENDED) {
810 				if (flags & MD_MSGF_FAIL_ON_SUSPEND) {
811 					/* caller wants us to fail here */
812 					(void) mddserror(ep,
813 					    MDE_DS_NOTNOW_RECONFIG, setno,
814 					    mynode(), mynode(), NULL);
815 					retval = -4;
816 					goto out;
817 				} else {
818 					/* wait for 1 secs and try again. */
819 					free_result(resp);
820 					(void) sleep(1);
821 					continue;
822 				}
823 			}
824 		} else {
825 			/*
826 			 * If we get a NULL back from the rpc call, try to
827 			 * reinitialize the client.
828 			 * Depending on retries2 we try again, or not.
829 			 */
830 			syslog(LOG_INFO,
831 			    "send_message: ID=(%d, 0x%llx-%d) resp = NULL\n",
832 			    MSGID_ELEMS(msg.msg_msgid));
833 
834 			clnt_destroy(local_daemon);
835 			local_daemon = mdmn_get_local_clnt(MNGLC_FOR_REAL);
836 
837 			if (local_daemon == (CLIENT *)NULL) {
838 				return (-1);
839 			}
840 			clnt_control(local_daemon, CLSET_TIMEOUT,
841 			    (char *)&timeout);
842 		}
843 
844 		/*
845 		 * If we are here, either resp is zero or resp is non-zero
846 		 * but some commstate not mentioned above occured.
847 		 * In either case we use retry2
848 		 */
849 		if (retry2-- == 0) {
850 			syslog(LOG_INFO, dgettext(TEXT_DOMAIN,
851 			    "send_message: (%d, 0x%llx-%d) retry2 exceeded\n"),
852 			    MSGID_ELEMS(msg.msg_msgid));
853 
854 			retval = 2; /* retry2 exceeded */
855 			goto out;
856 		}
857 		if (flags & MD_MSGF_VERBOSE) {
858 			syslog(LOG_DEBUG, dgettext(TEXT_DOMAIN,
859 			    "send_message: (%d, 0x%llx-%d) resend on retry2\n"),
860 			    MSGID_ELEMS(msg.msg_msgid));
861 		}
862 
863 		(void) usleep(ticks2 * USECS_PER_TICK);
864 
865 		if (resp != (md_mn_result_t *)NULL) {
866 			free_result(resp);
867 		}
868 	}
869 out:
870 	mdmn_put_local_clnt(local_daemon);
871 	return (retval);
872 }
873 
874 /*
875  * suspend the commd for a given set/class combination.
876  *
877  * Parameter:
878  *	set number or 0 (meaning all sets)
879  *	class number or 0 (meaning all classes)
880  *
881  * Returns:
882  *	0 on success (set is suspended and all messages drained)
883  *	MDE_DS_COMMDCTL_SUSPEND_NYD if set is not yet drained
884  *	MDE_DS_COMMDCTL_SUSPEND_FAIL if any failure occurred
885  */
886 int
mdmn_suspend(set_t setno,md_mn_msgclass_t class,long timeout)887 mdmn_suspend(set_t setno, md_mn_msgclass_t class, long timeout)
888 {
889 	int			*resp;
890 	CLIENT			*local_daemon;
891 	md_mn_set_and_class_t	msc;
892 	md_error_t		xep = mdnullerror;
893 
894 	if ((setno >= MD_MAXSETS) || (class >= MD_MN_NCLASSES)) {
895 		return (MDE_DS_COMMDCTL_SUSPEND_FAIL);
896 	}
897 	local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, TWO,
898 	    "tcp");
899 	if (local_daemon == (CLIENT *)NULL) {
900 		clnt_pcreateerror("local_daemon");
901 		return (MDE_DS_COMMDCTL_SUSPEND_FAIL);
902 	}
903 
904 	if (timeout != 0) {
905 		if (cl_sto(local_daemon, LOCALHOST_IPv4, timeout, &xep) != 0) {
906 			clnt_destroy(local_daemon);
907 			return (1);
908 		}
909 	}
910 
911 	msc.msc_set = setno;
912 	msc.msc_class = class;
913 	msc.msc_flags = 0;
914 
915 	resp = mdmn_comm_suspend_2(&msc, local_daemon, 0);
916 	clnt_destroy(local_daemon);
917 
918 	if (resp == NULL) {
919 		return (MDE_DS_COMMDCTL_SUSPEND_FAIL);
920 	}
921 
922 	if (*resp == MDMNE_ACK) {
923 		/* set successfully drained, no outstanding messages */
924 		return (0);
925 	}
926 	if (*resp != MDMNE_SET_NOT_DRAINED) {
927 		/* some error occurred */
928 		return (MDE_DS_COMMDCTL_SUSPEND_FAIL);
929 	}
930 
931 	/* still outstanding messages, return not yet drained failure */
932 	return (MDE_DS_COMMDCTL_SUSPEND_NYD);
933 }
934 
935 /*
936  * resume the commd for a given set/class combination.
937  *
938  * Parameter:
939  *	set number or 0 (meaning all sets)
940  *	class number or 0 (meaning all classes)
941  *
942  * Returns:
943  *	0 on success
944  *	MDE_DS_COMMDCTL_RESUME_FAIL on failure
945  */
946 int
mdmn_resume(set_t setno,md_mn_msgclass_t class,uint_t flags,long timeout)947 mdmn_resume(set_t setno, md_mn_msgclass_t class, uint_t flags, long timeout)
948 {
949 	md_mn_set_and_class_t	msc;
950 	int			ret = MDE_DS_COMMDCTL_RESUME_FAIL;
951 	int			*resp;
952 	CLIENT			*local_daemon;
953 	md_error_t		xep = mdnullerror;
954 
955 	if ((setno >= MD_MAXSETS) || (class >= MD_MN_NCLASSES)) {
956 		return (MDE_DS_COMMDCTL_RESUME_FAIL);
957 	}
958 	local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, TWO,
959 	    "tcp");
960 	if (local_daemon == (CLIENT *)NULL) {
961 		clnt_pcreateerror("local_daemon");
962 		return (MDE_DS_COMMDCTL_RESUME_FAIL);
963 	}
964 
965 	if (timeout != 0) {
966 		if (cl_sto(local_daemon, LOCALHOST_IPv4, timeout, &xep) != 0) {
967 			clnt_destroy(local_daemon);
968 			return (1);
969 		}
970 	}
971 
972 	msc.msc_set = setno;
973 	msc.msc_class = class;
974 	msc.msc_flags = flags;
975 
976 	resp = mdmn_comm_resume_2(&msc, local_daemon, 0);
977 
978 	if (resp != NULL) {
979 		if (*resp == MDMNE_ACK) {
980 			ret = 0;
981 		}
982 		Free(resp);
983 	}
984 
985 	clnt_destroy(local_daemon);
986 	return (ret);
987 }
988 
989 /*
990  * abort all communication
991  *
992  * returns void, because: if *this* get's an error what do you want to do?
993  */
994 void
mdmn_abort(void)995 mdmn_abort(void)
996 {
997 	char *dummy = "abort";
998 	md_mn_result_t	*resultp = NULL;
999 	md_error_t	mdne = mdnullerror;
1000 
1001 	(void) mdmn_send_message(0, /* No set is needed for this message */
1002 	    MD_MN_MSG_ABORT, MD_MSGF_LOCAL_ONLY, 0,
1003 	    dummy, sizeof (dummy), &resultp, &mdne);
1004 
1005 	if (resultp != NULL) {
1006 		Free(resultp);
1007 	}
1008 }
1009 
1010 /*
1011  * trigger the reinitialization for a given set.
1012  *
1013  * Parameter: set number
1014  *
1015  * Returns:
1016  *	0 on success
1017  *	1 on failure
1018  */
1019 int
mdmn_reinit_set(set_t setno,long timeout)1020 mdmn_reinit_set(set_t setno, long timeout)
1021 {
1022 	int		ret = 1;
1023 	int		*resp;
1024 	CLIENT 		*local_daemon;
1025 	md_error_t	xep = mdnullerror;
1026 
1027 	if ((setno == 0) || (setno >= MD_MAXSETS)) {
1028 		return (1);
1029 	}
1030 	local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, TWO,
1031 	    "tcp");
1032 	if (local_daemon == (CLIENT *)NULL) {
1033 		clnt_pcreateerror("local_daemon");
1034 		return (1);
1035 	}
1036 
1037 	if (timeout != 0) {
1038 		if (cl_sto(local_daemon, LOCALHOST_IPv4, timeout, &xep) != 0) {
1039 			clnt_destroy(local_daemon);
1040 			return (1);
1041 		}
1042 	}
1043 
1044 	resp = mdmn_comm_reinit_set_2(&setno, local_daemon, 0);
1045 
1046 	if (resp != NULL) {
1047 		if (*resp == MDMNE_ACK) {
1048 			ret = 0;
1049 		}
1050 		Free(resp);
1051 	}
1052 
1053 	clnt_destroy(local_daemon);
1054 	return (ret);
1055 }
1056 
1057 
1058 /*
1059  * Lock a single message type from being processed on this node
1060  *
1061  * Parameter: md_mn_msgtype_t msgtype, uint_t locktype
1062  *
1063  * Returns:
1064  *	0 on success
1065  *	1 on failure
1066  */
1067 int
mdmn_msgtype_lock(md_mn_msgtype_t msgtype,uint_t locktype)1068 mdmn_msgtype_lock(md_mn_msgtype_t msgtype, uint_t locktype)
1069 {
1070 	int			ret = 1;
1071 	int			*resp;
1072 	CLIENT			*local_daemon;
1073 	md_mn_type_and_lock_t	mmtl;
1074 
1075 
1076 	if ((msgtype == 0) || (msgtype >= MD_MN_NMESSAGES)) {
1077 		return (1);
1078 	}
1079 	local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, TWO,
1080 	    "tcp");
1081 	if (local_daemon == (CLIENT *)NULL) {
1082 		clnt_pcreateerror("local_daemon");
1083 		return (1);
1084 	}
1085 	mmtl.mmtl_type = msgtype;
1086 	mmtl.mmtl_lock = locktype;
1087 
1088 	resp = mdmn_comm_msglock_2(&mmtl, local_daemon, 0);
1089 
1090 	if (resp != NULL) {
1091 		if (*resp == MDMNE_ACK) {
1092 			ret = 0;
1093 		}
1094 		Free(resp);
1095 	}
1096 
1097 	clnt_destroy(local_daemon);
1098 	return (ret);
1099 }
1100