xref: /titanic_41/usr/src/cmd/lvm/rpc.mdcommd/mdmn_subr.c (revision fd9cb95cbb2f626355a60efb9d02c5f0a33c10e6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <unistd.h>
30 #include <sys/types.h>
31 #include <sys/socket.h>
32 #include <netinet/in.h>
33 #include <arpa/inet.h>
34 #include <thread.h>
35 #include "meta.h"
36 #include "mdmn_subr.h"
37 
38 extern int mdmn_init_set(set_t setno, int todo);
39 
40 uint_t mdmn_busy[MD_MAXSETS][MD_MN_NCLASSES];
41 mutex_t	mdmn_busy_mutex[MD_MAXSETS];
42 cond_t	mdmn_busy_cv[MD_MAXSETS];
43 
44 
45 /* the wakeup table for the initiator's side */
46 mdmn_wti_t mdmn_initiator_table[MD_MAXSETS][MD_MN_NCLASSES];
47 
48 /* the wakeup table for the master */
49 mdmn_wtm_t mdmn_master_table[MD_MAXSETS][MD_MN_NCLASSES];
50 
51 /* List of licensed ip addresses */
52 licensed_ip_t   licensed_nodes[NNODES];
53 
54 /* speed up the search for licensed ip addresses */
55 md_mn_nodeid_t maxlicnodes = 0; /* 0 is not a valid node ID */
56 
57 /*
58  * Check if a given set/class combination is currently in use
59  * If in use, returns TRUE
60  * Otherwise returns FALSE
61  *
62  * Must be called with mdmn_busy_mutex held
63  */
64 bool_t
65 mdmn_is_class_busy(set_t setno, md_mn_msgclass_t class)
66 {
67 	if (mdmn_busy[setno][class] & MDMN_BUSY) {
68 		return (TRUE);
69 	} else {
70 		return (FALSE);
71 	}
72 }
73 
74 /*
75  * Mark a given set/class combination as currently in use
76  * If the class was already in use, returns FALSE
77  * Otherwise returns TRUE
78  *
79  * So mdmn_mark_class_busy can be used like
80  * if (mdmn_mark_class_busy(setno, class) == FALSE)
81  * 	failure;
82  * else
83  *	success;
84  *
85  * Must be called with mdmn_busy_mutex held
86  */
87 bool_t
88 mdmn_mark_class_busy(set_t setno, md_mn_msgclass_t class)
89 {
90 	if (mdmn_busy[setno][class] & MDMN_BUSY) {
91 		return (FALSE);
92 	} else {
93 		mdmn_busy[setno][class] |= MDMN_BUSY;
94 		commd_debug(MD_MMV_MISC, "busy: set=%d, class=%d\n",
95 		    setno, class);
96 		return (TRUE);
97 	}
98 }
99 
100 /*
101  * Mark a given set/class combination as currently available
102  * Always succeeds, thus void.
103  *
104  * If this class is marked MDMN_SUSPEND_ALL, we are in the middle of
105  * draining all classes of this set.
106  * We have to mark class+1 as MDMN_SUSPEND_ALL too.
107  * If class+2 wasn't busy, we proceed with class+2, and so on
108  * If any class is busy, we return.
109  * Then the drain process will be continued by the mdmn_mark_class_unbusy() of
110  * that busy class
111  */
112 void
113 mdmn_mark_class_unbusy(set_t setno, md_mn_msgclass_t class)
114 {
115 	commd_debug(MD_MMV_MISC, "unbusy: set=%d, class=%d\n", setno, class);
116 	mdmn_busy[setno][class] &= ~MDMN_BUSY;
117 	/* something changed, inform threads waiting for that */
118 	cond_signal(&mdmn_busy_cv[setno]);
119 
120 	if ((mdmn_busy[setno][class] & MDMN_SUSPEND_ALL) == 0) {
121 		return;
122 	}
123 
124 	while (++class < MD_MN_NCLASSES) {
125 		commd_debug(MD_MMV_MISC,
126 		    "unbusy: suspending set=%d, class=%d\n", setno, class);
127 		if (mdmn_mark_class_suspended(setno, class, MDMN_SUSPEND_ALL)
128 		    == MDMNE_SET_NOT_DRAINED) {
129 			break;
130 		}
131 	}
132 
133 }
134 
135 
136 /*
137  * Check if a given set/class combination is locked.
138  */
139 bool_t
140 mdmn_is_class_locked(set_t setno, md_mn_msgclass_t class)
141 {
142 	if (mdmn_busy[setno][class] & MDMN_LOCKED) {
143 		return (TRUE);
144 	} else {
145 		return (FALSE);
146 	}
147 }
148 
149 /*
150  * Mark a given set/class combination as locked.
151  * No checking is done here, so routine can be void.
152  * Locking a locked set/class is ok.
153  *
154  * Must be called with mdmn_busy_mutex held
155  */
156 void
157 mdmn_mark_class_locked(set_t setno, md_mn_msgclass_t class)
158 {
159 	mdmn_busy[setno][class] |= MDMN_LOCKED;
160 }
161 
162 /*
163  * Mark a given set/class combination as unlocked.
164  * No checking is done here, so routine can be void.
165  * Unlocking a unlocked set/class is ok.
166  *
167  * Must be called with mdmn_busy_mutex held
168  */
169 void
170 mdmn_mark_class_unlocked(set_t setno, md_mn_msgclass_t class)
171 {
172 	mdmn_busy[setno][class] &= ~MDMN_LOCKED;
173 }
174 
175 /*
176  * Suspend a set/class combination
177  *
178  * If called during draining all classes of a set susptype is MDMN_SUSPEND_ALL.
179  * If only one class is about to be drained susptype is MDMN_SUSPEND_1.
180  *
181  * Returns:
182  *	MDMNE_ACK if there are no outstanding messages
183  *	MDMNE_SET_NOT_DRAINED otherwise
184  *
185  * Must be called with mdmn_busy_mutex held for this set.
186  */
187 int
188 mdmn_mark_class_suspended(set_t setno, md_mn_msgclass_t class, uint_t susptype)
189 {
190 	/*
191 	 * We use the mdmn_busy array to mark this set is suspended.
192 	 */
193 	mdmn_busy[setno][class] |= susptype;
194 
195 	/*
196 	 * If there are outstanding messages for this set/class we
197 	 * return MDMNE_SET_NOT_DRAINED, otherwise we return MDMNE_ACK
198 	 */
199 	if (mdmn_is_class_busy(setno, class) == TRUE) {
200 		return (MDMNE_SET_NOT_DRAINED);
201 	}
202 	return (MDMNE_ACK);
203 }
204 
205 /*
206  * Resume operation for a set/class combination after it was
207  * previously suspended
208  *
209  * If called from mdmn_comm_resume_svc_1 to resume _one_ specific class
210  * then susptype will be MDMN_SUSPEND_1
211  * Otherwise to resume all classes of one set,
212  * then susptype equals (MDMN_SUSPEND_ALL | MDMN_SUSPEND_1)
213  *
214  * Always succeeds, thus void.
215  *
216  * Must be called with mdmn_busy_mutex held for this set.
217  */
218 void
219 mdmn_mark_class_resumed(set_t setno, md_mn_msgclass_t class, uint_t susptype)
220 {
221 	/* simply the reverse operation to mdmn_mark_set_drained() */
222 	mdmn_busy[setno][class] &= ~susptype;
223 }
224 
225 /*
226  * Check if a drain command was issued for this set/class combination.
227  *
228  * Must be called with mdmn_busy_mutex held for this set.
229  */
230 bool_t
231 mdmn_is_class_suspended(set_t setno, md_mn_msgclass_t class)
232 {
233 	if (mdmn_busy[setno][class] & (MDMN_SUSPEND_ALL | MDMN_SUSPEND_1)) {
234 		return (TRUE);
235 	} else {
236 		return (FALSE);
237 	}
238 }
239 
240 /*
241  * Put a result into the wakeup table for the master
242  * It's ensured that the msg id from the master_table entry and from
243  * result are matching
244  */
245 void
246 mdmn_set_master_table_res(set_t setno, md_mn_msgclass_t class,
247 				md_mn_result_t  *res)
248 {
249 	mdmn_master_table[setno][class].wtm_result = res;
250 }
251 void
252 mdmn_set_master_table_id(set_t setno, md_mn_msgclass_t class, md_mn_msgid_t *id)
253 {
254 	MSGID_COPY(id, &(mdmn_master_table[setno][class].wtm_id));
255 }
256 
257 void
258 mdmn_set_master_table_addr(set_t setno, md_mn_msgclass_t class,
259     md_mn_nodeid_t nid)
260 {
261 	mdmn_master_table[setno][class].wtm_addr = nid;
262 }
263 
264 
265 md_mn_result_t *
266 mdmn_get_master_table_res(set_t setno, md_mn_msgclass_t class)
267 {
268 	return (mdmn_master_table[setno][class].wtm_result);
269 }
270 
271 void
272 mdmn_get_master_table_id(set_t setno, md_mn_msgclass_t class, md_mn_msgid_t *id)
273 {
274 	MSGID_COPY(&(mdmn_master_table[setno][class].wtm_id), id);
275 }
276 
277 cond_t *
278 mdmn_get_master_table_cv(set_t setno, md_mn_msgclass_t class)
279 {
280 	return (&(mdmn_master_table[setno][class].wtm_cv));
281 }
282 
283 mutex_t *
284 mdmn_get_master_table_mx(set_t setno, md_mn_msgclass_t class)
285 {
286 	return (&(mdmn_master_table[setno][class].wtm_mx));
287 }
288 
289 md_mn_nodeid_t
290 mdmn_get_master_table_addr(set_t setno, md_mn_msgclass_t class)
291 {
292 	return (mdmn_master_table[setno][class].wtm_addr);
293 }
294 
295 
296 
297 /* here come the functions dealing with the wakeup table for the initiators */
298 
299 
300 void
301 mdmn_register_initiator_table(set_t setno, md_mn_msgclass_t class,
302     md_mn_msg_t *msg, SVCXPRT *transp)
303 {
304 	uint_t nnodes	= set_descriptor[setno]->sd_mn_numnodes;
305 	time_t timeout	= mdmn_get_timeout(msg->msg_type);
306 
307 
308 	MSGID_COPY(&(msg->msg_msgid),
309 	    &(mdmn_initiator_table[setno][class].wti_id));
310 	mdmn_initiator_table[setno][class].wti_transp = transp;
311 	mdmn_initiator_table[setno][class].wti_args = (char *)msg;
312 
313 	/*
314 	 * as the point in time where we want to be guaranteed to be woken up
315 	 * again, we chose the
316 	 * current time + nnodes times the timeout value for the message type
317 	 */
318 	mdmn_initiator_table[setno][class].wti_time =
319 	    time((time_t *)NULL) + (nnodes * timeout);
320 }
321 
322 /*
323  * If the set/class combination is currently busy, return MDMNE_CLASS_BUSY
324  * Otherwise return MDMNE_ACK
325  */
326 int
327 mdmn_check_initiator_table(set_t setno, md_mn_msgclass_t class)
328 {
329 	if ((mdmn_initiator_table[setno][class].wti_id.mid_nid == ~0u) &&
330 	    (mdmn_initiator_table[setno][class].wti_transp == (SVCXPRT *)NULL))
331 		return (MDMNE_ACK);
332 	return (MDMNE_CLASS_BUSY);
333 }
334 
335 /*
336  * Remove an entry from the initiator table entirely,
337  * This must be done with mutex held.
338  */
339 void
340 mdmn_unregister_initiator_table(set_t setno, md_mn_msgclass_t class)
341 {
342 	mdmn_initiator_table[setno][class].wti_id.mid_nid = ~0u;
343 	mdmn_initiator_table[setno][class].wti_id.mid_time = 0LL;
344 	mdmn_initiator_table[setno][class].wti_transp = (SVCXPRT *)NULL;
345 	mdmn_initiator_table[setno][class].wti_args = (char *)0;
346 	mdmn_initiator_table[setno][class].wti_time = (time_t)0;
347 }
348 
349 void
350 mdmn_get_initiator_table_id(set_t setno, md_mn_msgclass_t class,
351 				md_mn_msgid_t *mid)
352 {
353 	MSGID_COPY(&(mdmn_initiator_table[setno][class].wti_id), mid);
354 }
355 
356 SVCXPRT *
357 mdmn_get_initiator_table_transp(set_t setno, md_mn_msgclass_t class)
358 {
359 	return (mdmn_initiator_table[setno][class].wti_transp);
360 }
361 
362 char *
363 mdmn_get_initiator_table_args(set_t setno, md_mn_msgclass_t class)
364 {
365 	return (mdmn_initiator_table[setno][class].wti_args);
366 }
367 
368 mutex_t *
369 mdmn_get_initiator_table_mx(set_t setno, md_mn_msgclass_t class)
370 {
371 	return (&(mdmn_initiator_table[setno][class].wti_mx));
372 }
373 
374 time_t
375 mdmn_get_initiator_table_time(set_t setno, md_mn_msgclass_t class)
376 {
377 	return (mdmn_initiator_table[setno][class].wti_time);
378 }
379 
380 extern uint_t	md_commd_global_verb;	/* global bitmask for debug classes */
381 extern FILE	*commdout;		/* debug output file for the commd */
382 extern hrtime_t __savetime;
383 
384 
385 /*
386  * Print debug messages to the terminal or to syslog
387  * commd_debug(MD_MMV_SYSLOG,....) is always printed (and always via syslog),
388  * even if md_commd_global_verb is zero.
389  *
390  * Otherwise the correct bit must be set in the bitmask md_commd_global_verb
391  */
392 void
393 commd_debug(uint_t debug_class, const char *message, ...)
394 {
395 	va_list ap;
396 
397 	/* Is this a message for syslog? */
398 	if (debug_class == MD_MMV_SYSLOG) {
399 
400 		va_start(ap, message);
401 		(void) vsyslog(LOG_WARNING, message, ap);
402 		va_end(ap);
403 	} else {
404 		/* Is this debug_class set in the global verbosity state?  */
405 		if ((md_commd_global_verb & debug_class) == 0) {
406 			return;
407 		}
408 		/* Is our output file already functioning? */
409 		if (commdout == NULL) {
410 			return;
411 		}
412 		/* Are timestamps activated ? */
413 		if (md_commd_global_verb & MD_MMV_TIMESTAMP) {
414 			/* print time since last TRESET in usecs */
415 			fprintf(commdout, "[%s]",
416 			    meta_print_hrtime(gethrtime() - __savetime));
417 		}
418 		/* Now print the real message */
419 		va_start(ap, message);
420 		(void) vfprintf(commdout, message, ap);
421 		va_end(ap);
422 	}
423 }
424 
425 
426 void
427 dump_hex(uint_t debug_class, unsigned int *x, int cnt)
428 {
429 	cnt /= sizeof (unsigned int);
430 	while (cnt--) {
431 		commd_debug(debug_class, "0x%8x ", *x++);
432 		if (cnt % 4)
433 			continue;
434 		commd_debug(debug_class, "\n");
435 	}
436 	commd_debug(debug_class, "\n");
437 }
438 
439 /* debug output: dump a message */
440 void
441 dump_msg(uint_t dbc, char *prefix, md_mn_msg_t *msg)
442 {
443 	commd_debug(dbc, "%s &msg	= 0x%x\n", prefix, (int)msg);
444 	commd_debug(dbc, "%s ID	= (%d, 0x%llx-%d)\n", prefix,
445 	    MSGID_ELEMS(msg->msg_msgid));
446 	commd_debug(dbc, "%s sender	= %d\n", prefix, msg->msg_sender);
447 	commd_debug(dbc, "%s flags	= 0x%x\n", prefix, msg->msg_flags);
448 	commd_debug(dbc, "%s setno	= %d\n", prefix, msg->msg_setno);
449 	commd_debug(dbc, "%s type	= %d\n", prefix, msg->msg_type);
450 	commd_debug(dbc, "%s size	= %d\n", prefix, msg->msg_event_size);
451 	if (msg->msg_event_size) {
452 		commd_debug(dbc, "%s data	=\n", prefix);
453 		dump_hex(dbc, (unsigned int *)(void *)msg->msg_event_data,
454 		    msg->msg_event_size);
455 	}
456 }
457 
458 /* debug output: dump a result structure */
459 void
460 dump_result(uint_t dbc, char *prefix, md_mn_result_t *res)
461 {
462 	commd_debug(dbc, "%s &res	= 0x%x\n", prefix, (int)res);
463 	commd_debug(dbc, "%s ID	= (%d, 0x%llx-%d)\n", prefix,
464 	    MSGID_ELEMS(res->mmr_msgid));
465 	commd_debug(dbc, "%s setno	= %d\n", prefix, res->mmr_setno);
466 	commd_debug(dbc, "%s type	= %d\n", prefix, res->mmr_msgtype);
467 	commd_debug(dbc, "%s flags	= 0x%x\n", prefix, res->mmr_flags);
468 	commd_debug(dbc, "%s comm_state= %d\n", prefix, res->mmr_comm_state);
469 	commd_debug(dbc, "%s exitval	= %d\n", prefix, res->mmr_exitval);
470 	commd_debug(dbc, "%s out_size	= %d\n", prefix, res->mmr_out_size);
471 	if (res->mmr_out_size)
472 		commd_debug(dbc, "%s out	= %s\n", prefix, res->mmr_out);
473 	commd_debug(dbc, "%s err_size	= %d\n", prefix, res->mmr_err_size);
474 	if (res->mmr_err_size)
475 		commd_debug(dbc, "%s err	= %s\n", prefix, res->mmr_err);
476 }
477 
478 
479 /*
480  * Here we find out, where to store or find the results for a given msg.
481  *
482  * Per set we have a pointer to a three dimensional array:
483  * mct[set] -> mct_mce[NNODES][MD_MN_NCLASSES][MAX_SUBMESSAGES]
484  * So, for every possible node and for every possible class we can store
485  * MAX_SUBMESSAGES results.
486  * the way to find the correct index is
487  *	submessage +
488  *	class * MAX_SUBMESSAGES +
489  *	nodeid * MAX_SUBMESSAGES * MD_MN_NCLASSES.
490  *
491  * To find the correct address the index has to be multiplied
492  * by the size of one entry.
493  */
494 static md_mn_mce_t *
495 mdmn_get_mce_by_msg(md_mn_msg_t *msg)
496 {
497 	set_t	setno = msg->msg_setno;
498 	int	nodeid = msg->msg_msgid.mid_nid;
499 	int	submsg = msg->msg_msgid.mid_smid;
500 	int	mct_index;
501 	off_t	mct_offset;
502 	md_mn_msgclass_t class;
503 
504 	if (mct[setno] != NULL) {
505 		if (mdmn_init_set(setno, MDMN_SET_MCT) != 0) {
506 			return ((md_mn_mce_t *)MDMN_MCT_ERROR);
507 		}
508 	}
509 
510 	if (submsg == 0) {
511 		class = mdmn_get_message_class(msg->msg_type);
512 	} else {
513 		class = msg->msg_msgid.mid_oclass;
514 	}
515 
516 	mct_index = submsg +
517 		    class * MAX_SUBMESSAGES +
518 		    nodeid * MAX_SUBMESSAGES * MD_MN_NCLASSES;
519 
520 	mct_offset = mct_index * sizeof (md_mn_mce_t);
521 
522 	/* LINTED Pointer alignment */
523 	return ((md_mn_mce_t *)((caddr_t)(mct[setno]) + mct_offset));
524 
525 	/*
526 	 * the lint clean version would be:
527 	 * return (&(mct[setno]->mct_mce[0][0][0]) + mct_index);
528 	 * :-)
529 	 */
530 }
531 
532 /*
533  * mdmn_mark_completion(msg, result, flag)
534  * Stores the result of this message into the mmaped memory MCT[setno]
535  * In case the same message comes along a second time we will know that
536  * this message has already been processed and we can deliver the
537  * results immediately.
538  *
539  * Before a message handler is called, the message in the MCT is flagged
540  * as currently being processed (flag == MDMN_MCT_IN_PROGRESS).
541  * This we need so we don't start a second handler for the same message.
542  *
543  * After a message handler is completed, this routine is called with
544  * flag == MDMN_MCT_DONE and the appropriate result that we store in the MCT.
545  * As MCT[setno] is memory mapped to disks, this information is persistent
546  * even across a crash of the commd.
547  * It doesn't have to be persistent across a reboot, though.
548  *
549  * Returns MDMN_MCT_DONE in case of success
550  * Returns MDMN_MCT_ERROR in case of error creating the mct
551  */
552 int
553 mdmn_mark_completion(md_mn_msg_t *msg, md_mn_result_t *result, uint_t flag)
554 {
555 	md_mn_mce_t	*mce;
556 	uint_t		offset_in_page;
557 
558 	mce = mdmn_get_mce_by_msg(msg);
559 	if (mce == (md_mn_mce_t *)-1) {
560 		return (MDMN_MCT_ERROR);
561 	}
562 	offset_in_page = (uint_t)(caddr_t)mce % sysconf(_SC_PAGESIZE);
563 
564 	memset(mce, 0, sizeof (md_mn_mce_t));
565 
566 	MSGID_COPY(&msg->msg_msgid, &mce->mce_result.mmr_msgid);
567 	if (flag == MDMN_MCT_IN_PROGRESS) {
568 		mce->mce_flags = MDMN_MCT_IN_PROGRESS;
569 		goto mmc_out;
570 	}
571 
572 	/*
573 	 * In case the message flags indicate that the result should not be
574 	 * stored in the MCT, we return a MDMN_MCT_NOT_DONE,
575 	 * so the message will be processed at any rate,
576 	 * even if we process this message twice.
577 	 * this makes sense if the result of the message is a dynamic status
578 	 * and might have changed meanwhile.
579 	 */
580 	if (msg->msg_flags & MD_MSGF_NO_MCT) {
581 		return (MDMN_MCT_DONE);
582 	}
583 
584 	/* This msg is no longer in progress */
585 	mce->mce_flags = MDMN_MCT_DONE;
586 
587 	mce->mce_result.mmr_msgtype	    = result->mmr_msgtype;
588 	mce->mce_result.mmr_setno	    = result->mmr_setno;
589 	mce->mce_result.mmr_flags	    = result->mmr_flags;
590 	mce->mce_result.mmr_sender	    = result->mmr_sender;
591 	mce->mce_result.mmr_failing_node    = result->mmr_failing_node;
592 	mce->mce_result.mmr_comm_state	    = result->mmr_comm_state;
593 	mce->mce_result.mmr_exitval	    = result->mmr_exitval;
594 
595 	/* if mmr_exitval is zero, we store stdout, otherwise stderr */
596 	if (result->mmr_exitval == 0) {
597 		if (result->mmr_out_size > 0) {
598 			memcpy(mce->mce_data, result->mmr_out,
599 			    result->mmr_out_size);
600 			mce->mce_result.mmr_out_size = result->mmr_out_size;
601 		}
602 	} else {
603 		if (result->mmr_err_size > 0) {
604 			mce->mce_result.mmr_err_size = result->mmr_err_size;
605 			memcpy(mce->mce_data, result->mmr_err,
606 			    result->mmr_err_size);
607 		}
608 	}
609 
610 	dump_result(MD_MMV_PROC_S, "mdmn_mark_completion1", result);
611 
612 mmc_out:
613 	/* now flush this entry to disk */
614 	msync((caddr_t)mce - offset_in_page,
615 	    sizeof (md_mn_mce_t) + offset_in_page, MS_SYNC);
616 	return (MDMN_MCT_DONE);
617 }
618 
619 /*
620  * mdmn_check_completion(msg, resultp)
621  * checks if msg has already been processed on this node, and if so copies
622  * the stored result to resultp.
623  *
624  * returns MDMN_MCT_DONE and the result filled out acurately in case the
625  *		msg has already been processed before
626  * returns MDMN_MCT_NOT_DONE if the message has not been processed before
627  * returns MDMN_MCT_IN_PROGRESS if the message is currently being processed
628  *	This can only occur on a slave node.
629  * return MDMN_MCT_ERROR in case of error creating the mct
630  */
631 int
632 mdmn_check_completion(md_mn_msg_t *msg, md_mn_result_t *result)
633 {
634 	md_mn_mce_t	*mce;
635 	size_t		outsize;
636 	size_t		errsize;
637 
638 	mce = mdmn_get_mce_by_msg(msg);
639 	if (mce == (md_mn_mce_t *)MDMN_MCT_ERROR) {
640 		return (MDMN_MCT_ERROR); /* what to do in that case ? */
641 	}
642 	if (MSGID_CMP(&(msg->msg_msgid), &(mce->mce_result.mmr_msgid))) {
643 		/* is the message completed, or in progress? */
644 		if (mce->mce_flags & MDMN_MCT_IN_PROGRESS) {
645 			return (MDMN_MCT_IN_PROGRESS);
646 		}
647 		/*
648 		 * See comment on MD_MSGF_NO_MCT above, if this flag is set
649 		 * for a message no result was stored and so the message has
650 		 * to be processed no matter if this is the 2nd time then.
651 		 */
652 		if (msg->msg_flags & MD_MSGF_NO_MCT) {
653 			return (MDMN_MCT_NOT_DONE);
654 		}
655 
656 		/* Paranoia check: mce_flags must be MDMN_MCT_DONE here */
657 		if ((mce->mce_flags & MDMN_MCT_DONE) == 0) {
658 			commd_debug(MD_MMV_ALL,
659 			    "mdmn_check_completion: msg not done and not in "
660 			    "progress! ID = (%d, 0x%llx-%d)\n",
661 			    MSGID_ELEMS(msg->msg_msgid));
662 			return (MDMN_MCT_NOT_DONE);
663 		}
664 		/*
665 		 * Already processed.
666 		 * Copy saved results data;
667 		 * return only a pointer to any output.
668 		 */
669 		MSGID_COPY(&(mce->mce_result.mmr_msgid), &result->mmr_msgid);
670 		result->mmr_msgtype	    = mce->mce_result.mmr_msgtype;
671 		result->mmr_setno	    = mce->mce_result.mmr_setno;
672 		result->mmr_flags	    = mce->mce_result.mmr_flags;
673 		result->mmr_sender	    = mce->mce_result.mmr_sender;
674 		result->mmr_failing_node    = mce->mce_result.mmr_failing_node;
675 		result->mmr_comm_state	    = mce->mce_result.mmr_comm_state;
676 		result->mmr_exitval	    = mce->mce_result.mmr_exitval;
677 		result->mmr_err		    = NULL;
678 		result->mmr_out		    = NULL;
679 		outsize = result->mmr_out_size = mce->mce_result.mmr_out_size;
680 		errsize = result->mmr_err_size = mce->mce_result.mmr_err_size;
681 		/*
682 		 * if the exit val is zero only stdout was stored (if any)
683 		 * otherwise only stderr was stored (if any)
684 		 */
685 		if (result->mmr_exitval == 0) {
686 			if (outsize != 0) {
687 				result->mmr_out = Zalloc(outsize);
688 				memcpy(result->mmr_out, mce->mce_data, outsize);
689 			}
690 		} else {
691 			if (errsize != 0) {
692 				result->mmr_err = Zalloc(errsize);
693 				memcpy(result->mmr_err, mce->mce_data, errsize);
694 			}
695 		}
696 		commd_debug(MD_MMV_MISC,
697 			    "mdmn_check_completion: msg already processed \n");
698 		dump_result(MD_MMV_MISC, "mdmn_check_completion", result);
699 		return (MDMN_MCT_DONE);
700 	}
701 	commd_debug(MD_MMV_MISC,
702 		    "mdmn_check_completion: msg not yet processed\n");
703 	return (MDMN_MCT_NOT_DONE);
704 }
705 
706 
707 
708 /*
709  * check_license(rqstp, chknid)
710  *
711  * Is this RPC request sent from a licensed host?
712  *
713  * If chknid is non-zero, the caller of check_license() knows the ID of
714  * the sender. Then we check just the one entry of licensed_nodes[]
715  *
716  * If chknid is zero, the sender is not known. In that case the sender must be
717  * the local node.
718  *
719  * If the host is licensed, return TRUE, else return FALSE
720  */
721 bool_t
722 check_license(struct svc_req *rqstp, md_mn_nodeid_t chknid)
723 {
724 	char		buf[INET6_ADDRSTRLEN];
725 	void		*caller = NULL;
726 	in_addr_t	caller_ipv4;
727 	in6_addr_t	caller_ipv6;
728 	struct sockaddr	*ca;
729 
730 
731 	ca = (struct sockaddr *)(void *)svc_getrpccaller(rqstp->rq_xprt)->buf;
732 
733 	if (ca->sa_family == AF_INET) {
734 		caller_ipv4 =
735 		    ((struct sockaddr_in *)(void *)ca)->sin_addr.s_addr;
736 		caller = (void *)&caller_ipv4;
737 
738 		if (chknid == 0) {
739 			/* check against local node */
740 			if (caller_ipv4 == htonl(INADDR_LOOPBACK)) {
741 				return (TRUE);
742 
743 			}
744 		} else {
745 			/* check against one specific node */
746 			if ((caller_ipv4 == licensed_nodes[chknid].lip_ipv4) &&
747 			    (licensed_nodes[chknid].lip_family == AF_INET)) {
748 				return (TRUE);
749 			} else {
750 				commd_debug(MD_MMV_MISC,
751 				    "Bad attempt from %x ln[%d]=%x\n",
752 				    caller_ipv4, chknid,
753 				    licensed_nodes[chknid].lip_ipv4);
754 			}
755 		}
756 	} else if (ca->sa_family == AF_INET6) {
757 		caller_ipv6 = ((struct sockaddr_in6 *)(void *)ca)->sin6_addr;
758 		caller = (void *)&caller_ipv6;
759 
760 		if (chknid == 0) {
761 			/* check against local node */
762 			if (IN6_IS_ADDR_LOOPBACK(&caller_ipv6)) {
763 				return (TRUE);
764 
765 			}
766 		} else {
767 			/* check against one specific node */
768 			if (IN6_ARE_ADDR_EQUAL(&caller_ipv6,
769 			    &(licensed_nodes[chknid].lip_ipv6)) &&
770 			    (licensed_nodes[chknid].lip_family == AF_INET6)) {
771 				return (TRUE);
772 			}
773 		}
774 	}
775 	/* if  we are here, we were contacted by an unlicensed node */
776 	commd_debug(MD_MMV_SYSLOG,
777 	    "Bad attempt to contact rpc.mdcommd from %s\n",
778 	    caller ?
779 	    inet_ntop(ca->sa_family, caller, buf, INET6_ADDRSTRLEN) :
780 	    "unknown");
781 
782 	return (FALSE);
783 }
784 
785 /*
786  * Add a node to the list of licensed nodes.
787  *
788  * Only IPv4 is currently supported.
789  * for IPv6, we need to change md_mnnode_desc.
790  */
791 void
792 add_license(md_mnnode_desc *node)
793 {
794 	md_mn_nodeid_t nid = node->nd_nodeid;
795 	char		buf[INET6_ADDRSTRLEN];
796 
797 	/*
798 	 * If this node is not yet licensed, do it now.
799 	 * For now only IPv4 addresses are supported.
800 	 */
801 	commd_debug(MD_MMV_MISC, "add_lic(%s): ln[%d]=%s, lnc[%d]=%d\n",
802 	    node->nd_priv_ic, nid,
803 	    inet_ntop(AF_INET, (void *)&licensed_nodes[nid].lip_ipv4,
804 	    buf, INET6_ADDRSTRLEN), nid, licensed_nodes[nid].lip_cnt);
805 
806 	if (licensed_nodes[nid].lip_ipv4 == (in_addr_t)0) {
807 		licensed_nodes[nid].lip_family = AF_INET; /* IPv4 */
808 		licensed_nodes[nid].lip_ipv4 = inet_addr(node->nd_priv_ic);
809 		/* keep track of the last entry for faster search */
810 		if (nid > maxlicnodes)
811 			maxlicnodes = nid;
812 
813 	}
814 	/* in any case bump up the reference count */
815 	licensed_nodes[nid].lip_cnt++;
816 }
817 
818 /*
819  * lower the reference count for one node.
820  * If that drops to zero, remove the node from the list of licensed nodes
821  *
822  * Only IPv4 is currently supported.
823  * for IPv6, we need to change md_mnnode_desc.
824  */
825 void
826 rem_license(md_mnnode_desc *node)
827 {
828 	md_mn_nodeid_t nid = node->nd_nodeid;
829 	char		buf[INET6_ADDRSTRLEN];
830 
831 	commd_debug(MD_MMV_MISC, "rem_lic(%s): ln[%d]=%s, lnc[%d]=%d\n",
832 	    node->nd_priv_ic, nid,
833 	    inet_ntop(AF_INET, (void *)&licensed_nodes[nid].lip_ipv4, buf,
834 	    INET6_ADDRSTRLEN), nid, licensed_nodes[nid].lip_cnt);
835 
836 	assert(licensed_nodes[nid].lip_cnt > 0);
837 
838 	/*
839 	 * If this was the last reference to that node, it's license expires
840 	 * For now only IPv4 addresses are supported.
841 	 */
842 	if (--licensed_nodes[nid].lip_cnt == 0) {
843 		licensed_nodes[nid].lip_ipv4 = (in_addr_t)0;
844 	}
845 }
846