1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <unistd.h>
28 #include <sys/types.h>
29 #include <sys/socket.h>
30 #include <netinet/in.h>
31 #include <arpa/inet.h>
32 #include <thread.h>
33 #include "meta.h"
34 #include "mdmn_subr.h"
35
36 extern int mdmn_init_set(set_t setno, int todo);
37
38 uint_t mdmn_busy[MD_MAXSETS][MD_MN_NCLASSES];
39 mutex_t mdmn_busy_mutex[MD_MAXSETS];
40 cond_t mdmn_busy_cv[MD_MAXSETS];
41
42
43 /* the wakeup table for the initiator's side */
44 mdmn_wti_t mdmn_initiator_table[MD_MAXSETS][MD_MN_NCLASSES];
45
46 /* the wakeup table for the master */
47 mdmn_wtm_t mdmn_master_table[MD_MAXSETS][MD_MN_NCLASSES];
48
49 /* List of licensed ip addresses */
50 licensed_ip_t licensed_nodes[NNODES];
51
52 /* speed up the search for licensed ip addresses */
53 md_mn_nodeid_t maxlicnodes = 0; /* 0 is not a valid node ID */
54
55 /*
56 * Check if a given set/class combination is currently in use
57 * If in use, returns TRUE
58 * Otherwise returns FALSE
59 *
60 * Must be called with mdmn_busy_mutex held
61 */
62 bool_t
mdmn_is_class_busy(set_t setno,md_mn_msgclass_t class)63 mdmn_is_class_busy(set_t setno, md_mn_msgclass_t class)
64 {
65 if (mdmn_busy[setno][class] & MDMN_BUSY) {
66 return (TRUE);
67 } else {
68 return (FALSE);
69 }
70 }
71
72 /*
73 * Mark a given set/class combination as currently in use
74 * If the class was already in use, returns FALSE
75 * Otherwise returns TRUE
76 *
77 * So mdmn_mark_class_busy can be used like
78 * if (mdmn_mark_class_busy(setno, class) == FALSE)
79 * failure;
80 * else
81 * success;
82 *
83 * Must be called with mdmn_busy_mutex held
84 */
85 bool_t
mdmn_mark_class_busy(set_t setno,md_mn_msgclass_t class)86 mdmn_mark_class_busy(set_t setno, md_mn_msgclass_t class)
87 {
88 if (mdmn_busy[setno][class] & MDMN_BUSY) {
89 return (FALSE);
90 } else {
91 mdmn_busy[setno][class] |= MDMN_BUSY;
92 commd_debug(MD_MMV_MISC, "busy: set=%d, class=%d\n",
93 setno, class);
94 return (TRUE);
95 }
96 }
97
98 /*
99 * Mark a given set/class combination as currently available
100 * Always succeeds, thus void.
101 *
102 * If this class is marked MDMN_SUSPEND_ALL, we are in the middle of
103 * draining all classes of this set.
104 * We have to mark class+1 as MDMN_SUSPEND_ALL too.
105 * If class+2 wasn't busy, we proceed with class+2, and so on
106 * If any class is busy, we return.
107 * Then the drain process will be continued by the mdmn_mark_class_unbusy() of
108 * that busy class
109 */
110 void
mdmn_mark_class_unbusy(set_t setno,md_mn_msgclass_t class)111 mdmn_mark_class_unbusy(set_t setno, md_mn_msgclass_t class)
112 {
113 commd_debug(MD_MMV_MISC, "unbusy: set=%d, class=%d\n", setno, class);
114 mdmn_busy[setno][class] &= ~MDMN_BUSY;
115 /* something changed, inform threads waiting for that */
116 (void) cond_signal(&mdmn_busy_cv[setno]);
117
118 if ((mdmn_busy[setno][class] & MDMN_SUSPEND_ALL) == 0) {
119 return;
120 }
121
122 while (++class < MD_MN_NCLASSES) {
123 commd_debug(MD_MMV_MISC,
124 "unbusy: suspending set=%d, class=%d\n", setno, class);
125 if (mdmn_mark_class_suspended(setno, class, MDMN_SUSPEND_ALL)
126 == MDMNE_SET_NOT_DRAINED) {
127 break;
128 }
129 }
130
131 }
132
133
134 /*
135 * Check if a given set/class combination is locked.
136 */
137 bool_t
mdmn_is_class_locked(set_t setno,md_mn_msgclass_t class)138 mdmn_is_class_locked(set_t setno, md_mn_msgclass_t class)
139 {
140 if (mdmn_busy[setno][class] & MDMN_LOCKED) {
141 return (TRUE);
142 } else {
143 return (FALSE);
144 }
145 }
146
147 /*
148 * Mark a given set/class combination as locked.
149 * No checking is done here, so routine can be void.
150 * Locking a locked set/class is ok.
151 *
152 * Must be called with mdmn_busy_mutex held
153 */
154 void
mdmn_mark_class_locked(set_t setno,md_mn_msgclass_t class)155 mdmn_mark_class_locked(set_t setno, md_mn_msgclass_t class)
156 {
157 mdmn_busy[setno][class] |= MDMN_LOCKED;
158 }
159
160 /*
161 * Mark a given set/class combination as unlocked.
162 * No checking is done here, so routine can be void.
163 * Unlocking a unlocked set/class is ok.
164 *
165 * Must be called with mdmn_busy_mutex held
166 */
167 void
mdmn_mark_class_unlocked(set_t setno,md_mn_msgclass_t class)168 mdmn_mark_class_unlocked(set_t setno, md_mn_msgclass_t class)
169 {
170 mdmn_busy[setno][class] &= ~MDMN_LOCKED;
171 }
172
173 /*
174 * Suspend a set/class combination
175 *
176 * If called during draining all classes of a set susptype is MDMN_SUSPEND_ALL.
177 * If only one class is about to be drained susptype is MDMN_SUSPEND_1.
178 *
179 * Returns:
180 * MDMNE_ACK if there are no outstanding messages
181 * MDMNE_SET_NOT_DRAINED otherwise
182 *
183 * Must be called with mdmn_busy_mutex held for this set.
184 */
185 int
mdmn_mark_class_suspended(set_t setno,md_mn_msgclass_t class,uint_t susptype)186 mdmn_mark_class_suspended(set_t setno, md_mn_msgclass_t class, uint_t susptype)
187 {
188 /*
189 * We use the mdmn_busy array to mark this set is suspended.
190 */
191 mdmn_busy[setno][class] |= susptype;
192
193 /*
194 * If there are outstanding messages for this set/class we
195 * return MDMNE_SET_NOT_DRAINED, otherwise we return MDMNE_ACK
196 */
197 if (mdmn_is_class_busy(setno, class) == TRUE) {
198 return (MDMNE_SET_NOT_DRAINED);
199 }
200 return (MDMNE_ACK);
201 }
202
203 /*
204 * Resume operation for a set/class combination after it was
205 * previously suspended
206 *
207 * If called from mdmn_comm_resume_svc_1 to resume _one_ specific class
208 * then susptype will be MDMN_SUSPEND_1
209 * Otherwise to resume all classes of one set,
210 * then susptype equals (MDMN_SUSPEND_ALL | MDMN_SUSPEND_1)
211 *
212 * Always succeeds, thus void.
213 *
214 * Must be called with mdmn_busy_mutex held for this set.
215 */
216 void
mdmn_mark_class_resumed(set_t setno,md_mn_msgclass_t class,uint_t susptype)217 mdmn_mark_class_resumed(set_t setno, md_mn_msgclass_t class, uint_t susptype)
218 {
219 /* simply the reverse operation to mdmn_mark_set_drained() */
220 mdmn_busy[setno][class] &= ~susptype;
221 }
222
223 /*
224 * Check if a drain command was issued for this set/class combination.
225 *
226 * Must be called with mdmn_busy_mutex held for this set.
227 */
228 bool_t
mdmn_is_class_suspended(set_t setno,md_mn_msgclass_t class)229 mdmn_is_class_suspended(set_t setno, md_mn_msgclass_t class)
230 {
231 if (mdmn_busy[setno][class] & (MDMN_SUSPEND_ALL | MDMN_SUSPEND_1)) {
232 return (TRUE);
233 } else {
234 return (FALSE);
235 }
236 }
237
238 /*
239 * Put a result into the wakeup table for the master
240 * It's ensured that the msg id from the master_table entry and from
241 * result are matching
242 */
243 void
mdmn_set_master_table_res(set_t setno,md_mn_msgclass_t class,md_mn_result_t * res)244 mdmn_set_master_table_res(set_t setno, md_mn_msgclass_t class,
245 md_mn_result_t *res)
246 {
247 mdmn_master_table[setno][class].wtm_result = res;
248 }
249 void
mdmn_set_master_table_id(set_t setno,md_mn_msgclass_t class,md_mn_msgid_t * id)250 mdmn_set_master_table_id(set_t setno, md_mn_msgclass_t class, md_mn_msgid_t *id)
251 {
252 MSGID_COPY(id, &(mdmn_master_table[setno][class].wtm_id));
253 }
254
255 void
mdmn_set_master_table_addr(set_t setno,md_mn_msgclass_t class,md_mn_nodeid_t nid)256 mdmn_set_master_table_addr(set_t setno, md_mn_msgclass_t class,
257 md_mn_nodeid_t nid)
258 {
259 mdmn_master_table[setno][class].wtm_addr = nid;
260 }
261
262
263 md_mn_result_t *
mdmn_get_master_table_res(set_t setno,md_mn_msgclass_t class)264 mdmn_get_master_table_res(set_t setno, md_mn_msgclass_t class)
265 {
266 return (mdmn_master_table[setno][class].wtm_result);
267 }
268
269 void
mdmn_get_master_table_id(set_t setno,md_mn_msgclass_t class,md_mn_msgid_t * id)270 mdmn_get_master_table_id(set_t setno, md_mn_msgclass_t class, md_mn_msgid_t *id)
271 {
272 MSGID_COPY(&(mdmn_master_table[setno][class].wtm_id), id);
273 }
274
275 cond_t *
mdmn_get_master_table_cv(set_t setno,md_mn_msgclass_t class)276 mdmn_get_master_table_cv(set_t setno, md_mn_msgclass_t class)
277 {
278 return (&(mdmn_master_table[setno][class].wtm_cv));
279 }
280
281 mutex_t *
mdmn_get_master_table_mx(set_t setno,md_mn_msgclass_t class)282 mdmn_get_master_table_mx(set_t setno, md_mn_msgclass_t class)
283 {
284 return (&(mdmn_master_table[setno][class].wtm_mx));
285 }
286
287 md_mn_nodeid_t
mdmn_get_master_table_addr(set_t setno,md_mn_msgclass_t class)288 mdmn_get_master_table_addr(set_t setno, md_mn_msgclass_t class)
289 {
290 return (mdmn_master_table[setno][class].wtm_addr);
291 }
292
293
294
295 /* here come the functions dealing with the wakeup table for the initiators */
296
297
298 void
mdmn_register_initiator_table(set_t setno,md_mn_msgclass_t class,md_mn_msg_t * msg,SVCXPRT * transp)299 mdmn_register_initiator_table(set_t setno, md_mn_msgclass_t class,
300 md_mn_msg_t *msg, SVCXPRT *transp)
301 {
302 uint_t nnodes = set_descriptor[setno]->sd_mn_numnodes;
303 time_t timeout = mdmn_get_timeout(msg->msg_type);
304
305
306 MSGID_COPY(&(msg->msg_msgid),
307 &(mdmn_initiator_table[setno][class].wti_id));
308 mdmn_initiator_table[setno][class].wti_transp = transp;
309 mdmn_initiator_table[setno][class].wti_args = (char *)msg;
310
311 /*
312 * as the point in time where we want to be guaranteed to be woken up
313 * again, we chose the
314 * current time + nnodes times the timeout value for the message type
315 */
316 mdmn_initiator_table[setno][class].wti_time =
317 time((time_t *)NULL) + (nnodes * timeout);
318 }
319
320 /*
321 * If the set/class combination is currently busy, return MDMNE_CLASS_BUSY
322 * Otherwise return MDMNE_ACK
323 */
324 int
mdmn_check_initiator_table(set_t setno,md_mn_msgclass_t class)325 mdmn_check_initiator_table(set_t setno, md_mn_msgclass_t class)
326 {
327 if ((mdmn_initiator_table[setno][class].wti_id.mid_nid == ~0u) &&
328 (mdmn_initiator_table[setno][class].wti_transp == (SVCXPRT *)NULL))
329 return (MDMNE_ACK);
330 return (MDMNE_CLASS_BUSY);
331 }
332
333 /*
334 * Remove an entry from the initiator table entirely,
335 * This must be done with mutex held.
336 */
337 void
mdmn_unregister_initiator_table(set_t setno,md_mn_msgclass_t class)338 mdmn_unregister_initiator_table(set_t setno, md_mn_msgclass_t class)
339 {
340 mdmn_initiator_table[setno][class].wti_id.mid_nid = ~0u;
341 mdmn_initiator_table[setno][class].wti_id.mid_time = 0LL;
342 mdmn_initiator_table[setno][class].wti_transp = (SVCXPRT *)NULL;
343 mdmn_initiator_table[setno][class].wti_args = (char *)0;
344 mdmn_initiator_table[setno][class].wti_time = (time_t)0;
345 }
346
347 void
mdmn_get_initiator_table_id(set_t setno,md_mn_msgclass_t class,md_mn_msgid_t * mid)348 mdmn_get_initiator_table_id(set_t setno, md_mn_msgclass_t class,
349 md_mn_msgid_t *mid)
350 {
351 MSGID_COPY(&(mdmn_initiator_table[setno][class].wti_id), mid);
352 }
353
354 SVCXPRT *
mdmn_get_initiator_table_transp(set_t setno,md_mn_msgclass_t class)355 mdmn_get_initiator_table_transp(set_t setno, md_mn_msgclass_t class)
356 {
357 return (mdmn_initiator_table[setno][class].wti_transp);
358 }
359
360 char *
mdmn_get_initiator_table_args(set_t setno,md_mn_msgclass_t class)361 mdmn_get_initiator_table_args(set_t setno, md_mn_msgclass_t class)
362 {
363 return (mdmn_initiator_table[setno][class].wti_args);
364 }
365
366 mutex_t *
mdmn_get_initiator_table_mx(set_t setno,md_mn_msgclass_t class)367 mdmn_get_initiator_table_mx(set_t setno, md_mn_msgclass_t class)
368 {
369 return (&(mdmn_initiator_table[setno][class].wti_mx));
370 }
371
372 time_t
mdmn_get_initiator_table_time(set_t setno,md_mn_msgclass_t class)373 mdmn_get_initiator_table_time(set_t setno, md_mn_msgclass_t class)
374 {
375 return (mdmn_initiator_table[setno][class].wti_time);
376 }
377
378 extern uint_t md_commd_global_verb; /* global bitmask for debug classes */
379 extern FILE *commdout; /* debug output file for the commd */
380 extern hrtime_t __savetime;
381
382
383 /*
384 * Print debug messages to the terminal or to syslog
385 * commd_debug(MD_MMV_SYSLOG,....) is always printed (and always via syslog),
386 * even if md_commd_global_verb is zero.
387 *
388 * Otherwise the correct bit must be set in the bitmask md_commd_global_verb
389 */
390 void
commd_debug(uint_t debug_class,const char * message,...)391 commd_debug(uint_t debug_class, const char *message, ...)
392 {
393 va_list ap;
394
395 /* Is this a message for syslog? */
396 if (debug_class == MD_MMV_SYSLOG) {
397
398 va_start(ap, message);
399 (void) vsyslog(LOG_WARNING, message, ap);
400 va_end(ap);
401 } else {
402 /* Is this debug_class set in the global verbosity state? */
403 if ((md_commd_global_verb & debug_class) == 0) {
404 return;
405 }
406 /* Is our output file already functioning? */
407 if (commdout == NULL) {
408 return;
409 }
410 /* Are timestamps activated ? */
411 if (md_commd_global_verb & MD_MMV_TIMESTAMP) {
412 /* print time since last TRESET in usecs */
413 (void) fprintf(commdout, "[%s]",
414 meta_print_hrtime(gethrtime() - __savetime));
415 }
416 /* Now print the real message */
417 va_start(ap, message);
418 (void) vfprintf(commdout, message, ap);
419 va_end(ap);
420 }
421 }
422
423
424 void
dump_hex(uint_t debug_class,unsigned int * x,int cnt)425 dump_hex(uint_t debug_class, unsigned int *x, int cnt)
426 {
427 cnt /= sizeof (unsigned int);
428 while (cnt--) {
429 commd_debug(debug_class, "0x%8x ", *x++);
430 if (cnt % 4)
431 continue;
432 commd_debug(debug_class, "\n");
433 }
434 commd_debug(debug_class, "\n");
435 }
436
437 /* debug output: dump a message */
438 void
dump_msg(uint_t dbc,char * prefix,md_mn_msg_t * msg)439 dump_msg(uint_t dbc, char *prefix, md_mn_msg_t *msg)
440 {
441 commd_debug(dbc, "%s &msg = 0x%x\n", prefix, (int)msg);
442 commd_debug(dbc, "%s ID = (%d, 0x%llx-%d)\n", prefix,
443 MSGID_ELEMS(msg->msg_msgid));
444 commd_debug(dbc, "%s sender = %d\n", prefix, msg->msg_sender);
445 commd_debug(dbc, "%s flags = 0x%x\n", prefix, msg->msg_flags);
446 commd_debug(dbc, "%s setno = %d\n", prefix, msg->msg_setno);
447 commd_debug(dbc, "%s recipient = %d\n", prefix, msg->msg_recipient);
448 commd_debug(dbc, "%s type = %d\n", prefix, msg->msg_type);
449 commd_debug(dbc, "%s size = %d\n", prefix, msg->msg_event_size);
450 if (msg->msg_event_size) {
451 commd_debug(dbc, "%s data =\n", prefix);
452 dump_hex(dbc, (unsigned int *)(void *)msg->msg_event_data,
453 msg->msg_event_size);
454 }
455 }
456
457 /* debug output: dump a result structure */
458 void
dump_result(uint_t dbc,char * prefix,md_mn_result_t * res)459 dump_result(uint_t dbc, char *prefix, md_mn_result_t *res)
460 {
461 commd_debug(dbc, "%s &res = 0x%x\n", prefix, (int)res);
462 commd_debug(dbc, "%s ID = (%d, 0x%llx-%d)\n", prefix,
463 MSGID_ELEMS(res->mmr_msgid));
464 commd_debug(dbc, "%s setno = %d\n", prefix, res->mmr_setno);
465 commd_debug(dbc, "%s type = %d\n", prefix, res->mmr_msgtype);
466 commd_debug(dbc, "%s flags = 0x%x\n", prefix, res->mmr_flags);
467 commd_debug(dbc, "%s comm_state= %d\n", prefix, res->mmr_comm_state);
468 commd_debug(dbc, "%s exitval = %d\n", prefix, res->mmr_exitval);
469 commd_debug(dbc, "%s out_size = %d\n", prefix, res->mmr_out_size);
470 if (res->mmr_out_size)
471 commd_debug(dbc, "%s out = %s\n", prefix, res->mmr_out);
472 commd_debug(dbc, "%s err_size = %d\n", prefix, res->mmr_err_size);
473 if (res->mmr_err_size)
474 commd_debug(dbc, "%s err = %s\n", prefix, res->mmr_err);
475 }
476
477
478 /*
479 * Here we find out, where to store or find the results for a given msg.
480 *
481 * Per set we have a pointer to a three dimensional array:
482 * mct[set] -> mct_mce[NNODES][MD_MN_NCLASSES][MAX_SUBMESSAGES]
483 * So, for every possible node and for every possible class we can store
484 * MAX_SUBMESSAGES results.
485 * the way to find the correct index is
486 * submessage +
487 * class * MAX_SUBMESSAGES +
488 * nodeid * MAX_SUBMESSAGES * MD_MN_NCLASSES.
489 *
490 * To find the correct address the index has to be multiplied
491 * by the size of one entry.
492 */
493 static md_mn_mce_t *
mdmn_get_mce_by_msg(md_mn_msg_t * msg)494 mdmn_get_mce_by_msg(md_mn_msg_t *msg)
495 {
496 set_t setno = msg->msg_setno;
497 int nodeid = msg->msg_msgid.mid_nid;
498 int submsg = msg->msg_msgid.mid_smid;
499 int mct_index;
500 off_t mct_offset;
501 md_mn_msgclass_t class;
502
503 if (mct[setno] != NULL) {
504 if (mdmn_init_set(setno, MDMN_SET_MCT) != 0) {
505 return ((md_mn_mce_t *)MDMN_MCT_ERROR);
506 }
507 }
508
509 if (submsg == 0) {
510 class = mdmn_get_message_class(msg->msg_type);
511 } else {
512 class = msg->msg_msgid.mid_oclass;
513 }
514
515 mct_index = submsg + class * MAX_SUBMESSAGES +
516 nodeid * MAX_SUBMESSAGES * MD_MN_NCLASSES;
517
518 mct_offset = mct_index * sizeof (md_mn_mce_t);
519
520 /* LINTED Pointer alignment */
521 return ((md_mn_mce_t *)((caddr_t)(mct[setno]) + mct_offset));
522
523 /*
524 * the lint clean version would be:
525 * return (&(mct[setno]->mct_mce[0][0][0]) + mct_index);
526 * :-)
527 */
528 }
529
530 /*
531 * mdmn_mark_completion(msg, result, flag)
532 * Stores the result of this message into the mmaped memory MCT[setno]
533 * In case the same message comes along a second time we will know that
534 * this message has already been processed and we can deliver the
535 * results immediately.
536 *
537 * Before a message handler is called, the message in the MCT is flagged
538 * as currently being processed (flag == MDMN_MCT_IN_PROGRESS).
539 * This we need so we don't start a second handler for the same message.
540 *
541 * After a message handler is completed, this routine is called with
542 * flag == MDMN_MCT_DONE and the appropriate result that we store in the MCT.
543 * As MCT[setno] is memory mapped to disks, this information is persistent
544 * even across a crash of the commd.
545 * It doesn't have to be persistent across a reboot, though.
546 *
547 * Returns MDMN_MCT_DONE in case of success
548 * Returns MDMN_MCT_ERROR in case of error creating the mct
549 */
550 int
mdmn_mark_completion(md_mn_msg_t * msg,md_mn_result_t * result,uint_t flag)551 mdmn_mark_completion(md_mn_msg_t *msg, md_mn_result_t *result, uint_t flag)
552 {
553 md_mn_mce_t *mce;
554 uint_t offset_in_page;
555
556 mce = mdmn_get_mce_by_msg(msg);
557 if (mce == (md_mn_mce_t *)-1) {
558 return (MDMN_MCT_ERROR);
559 }
560 offset_in_page = (uint_t)(caddr_t)mce % sysconf(_SC_PAGESIZE);
561
562 (void) memset(mce, 0, sizeof (md_mn_mce_t));
563
564 MSGID_COPY(&msg->msg_msgid, &mce->mce_result.mmr_msgid);
565 if (flag == MDMN_MCT_IN_PROGRESS) {
566 mce->mce_flags = MDMN_MCT_IN_PROGRESS;
567 goto mmc_out;
568 }
569
570 /*
571 * In case the message flags indicate that the result should not be
572 * stored in the MCT, we return a MDMN_MCT_NOT_DONE,
573 * so the message will be processed at any rate,
574 * even if we process this message twice.
575 * this makes sense if the result of the message is a dynamic status
576 * and might have changed meanwhile.
577 */
578 if (msg->msg_flags & MD_MSGF_NO_MCT) {
579 return (MDMN_MCT_DONE);
580 }
581
582 /* This msg is no longer in progress */
583 mce->mce_flags = MDMN_MCT_DONE;
584
585 mce->mce_result.mmr_msgtype = result->mmr_msgtype;
586 mce->mce_result.mmr_setno = result->mmr_setno;
587 mce->mce_result.mmr_flags = result->mmr_flags;
588 mce->mce_result.mmr_sender = result->mmr_sender;
589 mce->mce_result.mmr_failing_node = result->mmr_failing_node;
590 mce->mce_result.mmr_comm_state = result->mmr_comm_state;
591 mce->mce_result.mmr_exitval = result->mmr_exitval;
592
593 /* if mmr_exitval is zero, we store stdout, otherwise stderr */
594 if (result->mmr_exitval == 0) {
595 if (result->mmr_out_size > 0) {
596 (void) memcpy(mce->mce_data, result->mmr_out,
597 result->mmr_out_size);
598 mce->mce_result.mmr_out_size = result->mmr_out_size;
599 }
600 } else {
601 if (result->mmr_err_size > 0) {
602 mce->mce_result.mmr_err_size = result->mmr_err_size;
603 (void) memcpy(mce->mce_data, result->mmr_err,
604 result->mmr_err_size);
605 }
606 }
607
608 dump_result(MD_MMV_PROC_S, "mdmn_mark_completion1", result);
609
610 mmc_out:
611 /* now flush this entry to disk */
612 (void) msync((caddr_t)mce - offset_in_page,
613 sizeof (md_mn_mce_t) + offset_in_page, MS_SYNC);
614 return (MDMN_MCT_DONE);
615 }
616
617 /*
618 * mdmn_check_completion(msg, resultp)
619 * checks if msg has already been processed on this node, and if so copies
620 * the stored result to resultp.
621 *
622 * returns MDMN_MCT_DONE and the result filled out acurately in case the
623 * msg has already been processed before
624 * returns MDMN_MCT_NOT_DONE if the message has not been processed before
625 * returns MDMN_MCT_IN_PROGRESS if the message is currently being processed
626 * This can only occur on a slave node.
627 * return MDMN_MCT_ERROR in case of error creating the mct
628 */
629 int
mdmn_check_completion(md_mn_msg_t * msg,md_mn_result_t * result)630 mdmn_check_completion(md_mn_msg_t *msg, md_mn_result_t *result)
631 {
632 md_mn_mce_t *mce;
633 size_t outsize;
634 size_t errsize;
635
636 mce = mdmn_get_mce_by_msg(msg);
637 if (mce == (md_mn_mce_t *)MDMN_MCT_ERROR) {
638 return (MDMN_MCT_ERROR); /* what to do in that case ? */
639 }
640 if (MSGID_CMP(&(msg->msg_msgid), &(mce->mce_result.mmr_msgid))) {
641 /* is the message completed, or in progress? */
642 if (mce->mce_flags & MDMN_MCT_IN_PROGRESS) {
643 return (MDMN_MCT_IN_PROGRESS);
644 }
645 /*
646 * See comment on MD_MSGF_NO_MCT above, if this flag is set
647 * for a message no result was stored and so the message has
648 * to be processed no matter if this is the 2nd time then.
649 */
650 if (msg->msg_flags & MD_MSGF_NO_MCT) {
651 return (MDMN_MCT_NOT_DONE);
652 }
653
654 /* Paranoia check: mce_flags must be MDMN_MCT_DONE here */
655 if ((mce->mce_flags & MDMN_MCT_DONE) == 0) {
656 commd_debug(MD_MMV_ALL,
657 "mdmn_check_completion: msg not done and not in "
658 "progress! ID = (%d, 0x%llx-%d)\n",
659 MSGID_ELEMS(msg->msg_msgid));
660 return (MDMN_MCT_NOT_DONE);
661 }
662 /*
663 * Already processed.
664 * Copy saved results data;
665 * return only a pointer to any output.
666 */
667 MSGID_COPY(&(mce->mce_result.mmr_msgid), &result->mmr_msgid);
668 result->mmr_msgtype = mce->mce_result.mmr_msgtype;
669 result->mmr_setno = mce->mce_result.mmr_setno;
670 result->mmr_flags = mce->mce_result.mmr_flags;
671 result->mmr_sender = mce->mce_result.mmr_sender;
672 result->mmr_failing_node = mce->mce_result.mmr_failing_node;
673 result->mmr_comm_state = mce->mce_result.mmr_comm_state;
674 result->mmr_exitval = mce->mce_result.mmr_exitval;
675 result->mmr_err = NULL;
676 result->mmr_out = NULL;
677 outsize = result->mmr_out_size = mce->mce_result.mmr_out_size;
678 errsize = result->mmr_err_size = mce->mce_result.mmr_err_size;
679 /*
680 * if the exit val is zero only stdout was stored (if any)
681 * otherwise only stderr was stored (if any)
682 */
683 if (result->mmr_exitval == 0) {
684 if (outsize != 0) {
685 result->mmr_out = Zalloc(outsize);
686 (void) memcpy(result->mmr_out, mce->mce_data,
687 outsize);
688 }
689 } else {
690 if (errsize != 0) {
691 result->mmr_err = Zalloc(errsize);
692 (void) memcpy(result->mmr_err, mce->mce_data,
693 errsize);
694 }
695 }
696 commd_debug(MD_MMV_MISC,
697 "mdmn_check_completion: msg already processed \n");
698 dump_result(MD_MMV_MISC, "mdmn_check_completion", result);
699 return (MDMN_MCT_DONE);
700 }
701 commd_debug(MD_MMV_MISC,
702 "mdmn_check_completion: msg not yet processed\n");
703 return (MDMN_MCT_NOT_DONE);
704 }
705
706
707
708 /*
709 * check_license(rqstp, chknid)
710 *
711 * Is this RPC request sent from a licensed host?
712 *
713 * If chknid is non-zero, the caller of check_license() knows the ID of
714 * the sender. Then we check just the one entry of licensed_nodes[]
715 *
716 * If chknid is zero, the sender is not known. In that case the sender must be
717 * the local node.
718 *
719 * If the host is licensed, return TRUE, else return FALSE
720 */
721 bool_t
check_license(struct svc_req * rqstp,md_mn_nodeid_t chknid)722 check_license(struct svc_req *rqstp, md_mn_nodeid_t chknid)
723 {
724 char buf[INET6_ADDRSTRLEN];
725 void *caller = NULL;
726 in_addr_t caller_ipv4;
727 in6_addr_t caller_ipv6;
728 struct sockaddr *ca;
729
730
731 ca = (struct sockaddr *)(void *)svc_getrpccaller(rqstp->rq_xprt)->buf;
732
733 if (ca->sa_family == AF_INET) {
734 caller_ipv4 =
735 ((struct sockaddr_in *)(void *)ca)->sin_addr.s_addr;
736 caller = (void *)&caller_ipv4;
737
738 if (chknid == 0) {
739 /* check against local node */
740 if (caller_ipv4 == htonl(INADDR_LOOPBACK)) {
741 return (TRUE);
742
743 }
744 } else {
745 /* check against one specific node */
746 if ((caller_ipv4 == licensed_nodes[chknid].lip_ipv4) &&
747 (licensed_nodes[chknid].lip_family == AF_INET)) {
748 return (TRUE);
749 } else {
750 commd_debug(MD_MMV_MISC,
751 "Bad attempt from %x ln[%d]=%x\n",
752 caller_ipv4, chknid,
753 licensed_nodes[chknid].lip_ipv4);
754 }
755 }
756 } else if (ca->sa_family == AF_INET6) {
757 caller_ipv6 = ((struct sockaddr_in6 *)(void *)ca)->sin6_addr;
758 caller = (void *)&caller_ipv6;
759
760 if (chknid == 0) {
761 /* check against local node */
762 if (IN6_IS_ADDR_LOOPBACK(&caller_ipv6)) {
763 return (TRUE);
764
765 }
766 } else {
767 /* check against one specific node */
768 if (IN6_ARE_ADDR_EQUAL(&caller_ipv6,
769 &(licensed_nodes[chknid].lip_ipv6)) &&
770 (licensed_nodes[chknid].lip_family == AF_INET6)) {
771 return (TRUE);
772 }
773 }
774 }
775 /* if we are here, we were contacted by an unlicensed node */
776 commd_debug(MD_MMV_SYSLOG,
777 "Bad attempt to contact rpc.mdcommd from %s\n",
778 caller ?
779 inet_ntop(ca->sa_family, caller, buf, INET6_ADDRSTRLEN) :
780 "unknown");
781
782 return (FALSE);
783 }
784
785 /*
786 * Add a node to the list of licensed nodes.
787 *
788 * Only IPv4 is currently supported.
789 * for IPv6, we need to change md_mnnode_desc.
790 */
791 void
add_license(md_mnnode_desc * node)792 add_license(md_mnnode_desc *node)
793 {
794 md_mn_nodeid_t nid = node->nd_nodeid;
795 char buf[INET6_ADDRSTRLEN];
796
797 /*
798 * If this node is not yet licensed, do it now.
799 * For now only IPv4 addresses are supported.
800 */
801 commd_debug(MD_MMV_MISC, "add_lic(%s): ln[%d]=%s, lnc[%d]=%d\n",
802 node->nd_priv_ic, nid,
803 inet_ntop(AF_INET, (void *)&licensed_nodes[nid].lip_ipv4,
804 buf, INET6_ADDRSTRLEN), nid, licensed_nodes[nid].lip_cnt);
805
806 if (licensed_nodes[nid].lip_ipv4 == (in_addr_t)0) {
807 licensed_nodes[nid].lip_family = AF_INET; /* IPv4 */
808 licensed_nodes[nid].lip_ipv4 = inet_addr(node->nd_priv_ic);
809 /* keep track of the last entry for faster search */
810 if (nid > maxlicnodes)
811 maxlicnodes = nid;
812
813 }
814 /* in any case bump up the reference count */
815 licensed_nodes[nid].lip_cnt++;
816 }
817
818 /*
819 * lower the reference count for one node.
820 * If that drops to zero, remove the node from the list of licensed nodes
821 *
822 * Only IPv4 is currently supported.
823 * for IPv6, we need to change md_mnnode_desc.
824 */
825 void
rem_license(md_mnnode_desc * node)826 rem_license(md_mnnode_desc *node)
827 {
828 md_mn_nodeid_t nid = node->nd_nodeid;
829 char buf[INET6_ADDRSTRLEN];
830
831 commd_debug(MD_MMV_MISC, "rem_lic(%s): ln[%d]=%s, lnc[%d]=%d\n",
832 node->nd_priv_ic, nid,
833 inet_ntop(AF_INET, (void *)&licensed_nodes[nid].lip_ipv4, buf,
834 INET6_ADDRSTRLEN), nid, licensed_nodes[nid].lip_cnt);
835
836 assert(licensed_nodes[nid].lip_cnt > 0);
837
838 /*
839 * If this was the last reference to that node, it's license expires
840 * For now only IPv4 addresses are supported.
841 */
842 if (--licensed_nodes[nid].lip_cnt == 0) {
843 licensed_nodes[nid].lip_ipv4 = (in_addr_t)0;
844 }
845 }
846