1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <unistd.h> 28 #include <sys/types.h> 29 #include <sys/socket.h> 30 #include <netinet/in.h> 31 #include <arpa/inet.h> 32 #include <thread.h> 33 #include "meta.h" 34 #include "mdmn_subr.h" 35 36 extern int mdmn_init_set(set_t setno, int todo); 37 38 uint_t mdmn_busy[MD_MAXSETS][MD_MN_NCLASSES]; 39 mutex_t mdmn_busy_mutex[MD_MAXSETS]; 40 cond_t mdmn_busy_cv[MD_MAXSETS]; 41 42 43 /* the wakeup table for the initiator's side */ 44 mdmn_wti_t mdmn_initiator_table[MD_MAXSETS][MD_MN_NCLASSES]; 45 46 /* the wakeup table for the master */ 47 mdmn_wtm_t mdmn_master_table[MD_MAXSETS][MD_MN_NCLASSES]; 48 49 /* List of licensed ip addresses */ 50 licensed_ip_t licensed_nodes[NNODES]; 51 52 /* speed up the search for licensed ip addresses */ 53 md_mn_nodeid_t maxlicnodes = 0; /* 0 is not a valid node ID */ 54 55 /* 56 * Check if a given set/class combination is currently in use 57 * If in use, returns TRUE 58 * Otherwise returns FALSE 59 * 60 * Must be called with mdmn_busy_mutex held 61 */ 62 bool_t 63 mdmn_is_class_busy(set_t setno, md_mn_msgclass_t class) 64 { 65 if (mdmn_busy[setno][class] & MDMN_BUSY) { 66 return (TRUE); 67 } else { 68 return (FALSE); 69 } 70 } 71 72 /* 73 * Mark a given set/class combination as currently in use 74 * If the class was already in use, returns FALSE 75 * Otherwise returns TRUE 76 * 77 * So mdmn_mark_class_busy can be used like 78 * if (mdmn_mark_class_busy(setno, class) == FALSE) 79 * failure; 80 * else 81 * success; 82 * 83 * Must be called with mdmn_busy_mutex held 84 */ 85 bool_t 86 mdmn_mark_class_busy(set_t setno, md_mn_msgclass_t class) 87 { 88 if (mdmn_busy[setno][class] & MDMN_BUSY) { 89 return (FALSE); 90 } else { 91 mdmn_busy[setno][class] |= MDMN_BUSY; 92 commd_debug(MD_MMV_MISC, "busy: set=%d, class=%d\n", 93 setno, class); 94 return (TRUE); 95 } 96 } 97 98 /* 99 * Mark a given set/class combination as currently available 100 * Always succeeds, thus void. 101 * 102 * If this class is marked MDMN_SUSPEND_ALL, we are in the middle of 103 * draining all classes of this set. 104 * We have to mark class+1 as MDMN_SUSPEND_ALL too. 105 * If class+2 wasn't busy, we proceed with class+2, and so on 106 * If any class is busy, we return. 107 * Then the drain process will be continued by the mdmn_mark_class_unbusy() of 108 * that busy class 109 */ 110 void 111 mdmn_mark_class_unbusy(set_t setno, md_mn_msgclass_t class) 112 { 113 commd_debug(MD_MMV_MISC, "unbusy: set=%d, class=%d\n", setno, class); 114 mdmn_busy[setno][class] &= ~MDMN_BUSY; 115 /* something changed, inform threads waiting for that */ 116 (void) cond_signal(&mdmn_busy_cv[setno]); 117 118 if ((mdmn_busy[setno][class] & MDMN_SUSPEND_ALL) == 0) { 119 return; 120 } 121 122 while (++class < MD_MN_NCLASSES) { 123 commd_debug(MD_MMV_MISC, 124 "unbusy: suspending set=%d, class=%d\n", setno, class); 125 if (mdmn_mark_class_suspended(setno, class, MDMN_SUSPEND_ALL) 126 == MDMNE_SET_NOT_DRAINED) { 127 break; 128 } 129 } 130 131 } 132 133 134 /* 135 * Check if a given set/class combination is locked. 136 */ 137 bool_t 138 mdmn_is_class_locked(set_t setno, md_mn_msgclass_t class) 139 { 140 if (mdmn_busy[setno][class] & MDMN_LOCKED) { 141 return (TRUE); 142 } else { 143 return (FALSE); 144 } 145 } 146 147 /* 148 * Mark a given set/class combination as locked. 149 * No checking is done here, so routine can be void. 150 * Locking a locked set/class is ok. 151 * 152 * Must be called with mdmn_busy_mutex held 153 */ 154 void 155 mdmn_mark_class_locked(set_t setno, md_mn_msgclass_t class) 156 { 157 mdmn_busy[setno][class] |= MDMN_LOCKED; 158 } 159 160 /* 161 * Mark a given set/class combination as unlocked. 162 * No checking is done here, so routine can be void. 163 * Unlocking a unlocked set/class is ok. 164 * 165 * Must be called with mdmn_busy_mutex held 166 */ 167 void 168 mdmn_mark_class_unlocked(set_t setno, md_mn_msgclass_t class) 169 { 170 mdmn_busy[setno][class] &= ~MDMN_LOCKED; 171 } 172 173 /* 174 * Suspend a set/class combination 175 * 176 * If called during draining all classes of a set susptype is MDMN_SUSPEND_ALL. 177 * If only one class is about to be drained susptype is MDMN_SUSPEND_1. 178 * 179 * Returns: 180 * MDMNE_ACK if there are no outstanding messages 181 * MDMNE_SET_NOT_DRAINED otherwise 182 * 183 * Must be called with mdmn_busy_mutex held for this set. 184 */ 185 int 186 mdmn_mark_class_suspended(set_t setno, md_mn_msgclass_t class, uint_t susptype) 187 { 188 /* 189 * We use the mdmn_busy array to mark this set is suspended. 190 */ 191 mdmn_busy[setno][class] |= susptype; 192 193 /* 194 * If there are outstanding messages for this set/class we 195 * return MDMNE_SET_NOT_DRAINED, otherwise we return MDMNE_ACK 196 */ 197 if (mdmn_is_class_busy(setno, class) == TRUE) { 198 return (MDMNE_SET_NOT_DRAINED); 199 } 200 return (MDMNE_ACK); 201 } 202 203 /* 204 * Resume operation for a set/class combination after it was 205 * previously suspended 206 * 207 * If called from mdmn_comm_resume_svc_1 to resume _one_ specific class 208 * then susptype will be MDMN_SUSPEND_1 209 * Otherwise to resume all classes of one set, 210 * then susptype equals (MDMN_SUSPEND_ALL | MDMN_SUSPEND_1) 211 * 212 * Always succeeds, thus void. 213 * 214 * Must be called with mdmn_busy_mutex held for this set. 215 */ 216 void 217 mdmn_mark_class_resumed(set_t setno, md_mn_msgclass_t class, uint_t susptype) 218 { 219 /* simply the reverse operation to mdmn_mark_set_drained() */ 220 mdmn_busy[setno][class] &= ~susptype; 221 } 222 223 /* 224 * Check if a drain command was issued for this set/class combination. 225 * 226 * Must be called with mdmn_busy_mutex held for this set. 227 */ 228 bool_t 229 mdmn_is_class_suspended(set_t setno, md_mn_msgclass_t class) 230 { 231 if (mdmn_busy[setno][class] & (MDMN_SUSPEND_ALL | MDMN_SUSPEND_1)) { 232 return (TRUE); 233 } else { 234 return (FALSE); 235 } 236 } 237 238 /* 239 * Put a result into the wakeup table for the master 240 * It's ensured that the msg id from the master_table entry and from 241 * result are matching 242 */ 243 void 244 mdmn_set_master_table_res(set_t setno, md_mn_msgclass_t class, 245 md_mn_result_t *res) 246 { 247 mdmn_master_table[setno][class].wtm_result = res; 248 } 249 void 250 mdmn_set_master_table_id(set_t setno, md_mn_msgclass_t class, md_mn_msgid_t *id) 251 { 252 MSGID_COPY(id, &(mdmn_master_table[setno][class].wtm_id)); 253 } 254 255 void 256 mdmn_set_master_table_addr(set_t setno, md_mn_msgclass_t class, 257 md_mn_nodeid_t nid) 258 { 259 mdmn_master_table[setno][class].wtm_addr = nid; 260 } 261 262 263 md_mn_result_t * 264 mdmn_get_master_table_res(set_t setno, md_mn_msgclass_t class) 265 { 266 return (mdmn_master_table[setno][class].wtm_result); 267 } 268 269 void 270 mdmn_get_master_table_id(set_t setno, md_mn_msgclass_t class, md_mn_msgid_t *id) 271 { 272 MSGID_COPY(&(mdmn_master_table[setno][class].wtm_id), id); 273 } 274 275 cond_t * 276 mdmn_get_master_table_cv(set_t setno, md_mn_msgclass_t class) 277 { 278 return (&(mdmn_master_table[setno][class].wtm_cv)); 279 } 280 281 mutex_t * 282 mdmn_get_master_table_mx(set_t setno, md_mn_msgclass_t class) 283 { 284 return (&(mdmn_master_table[setno][class].wtm_mx)); 285 } 286 287 md_mn_nodeid_t 288 mdmn_get_master_table_addr(set_t setno, md_mn_msgclass_t class) 289 { 290 return (mdmn_master_table[setno][class].wtm_addr); 291 } 292 293 294 295 /* here come the functions dealing with the wakeup table for the initiators */ 296 297 298 void 299 mdmn_register_initiator_table(set_t setno, md_mn_msgclass_t class, 300 md_mn_msg_t *msg, SVCXPRT *transp) 301 { 302 uint_t nnodes = set_descriptor[setno]->sd_mn_numnodes; 303 time_t timeout = mdmn_get_timeout(msg->msg_type); 304 305 306 MSGID_COPY(&(msg->msg_msgid), 307 &(mdmn_initiator_table[setno][class].wti_id)); 308 mdmn_initiator_table[setno][class].wti_transp = transp; 309 mdmn_initiator_table[setno][class].wti_args = (char *)msg; 310 311 /* 312 * as the point in time where we want to be guaranteed to be woken up 313 * again, we chose the 314 * current time + nnodes times the timeout value for the message type 315 */ 316 mdmn_initiator_table[setno][class].wti_time = 317 time((time_t *)NULL) + (nnodes * timeout); 318 } 319 320 /* 321 * If the set/class combination is currently busy, return MDMNE_CLASS_BUSY 322 * Otherwise return MDMNE_ACK 323 */ 324 int 325 mdmn_check_initiator_table(set_t setno, md_mn_msgclass_t class) 326 { 327 if ((mdmn_initiator_table[setno][class].wti_id.mid_nid == ~0u) && 328 (mdmn_initiator_table[setno][class].wti_transp == (SVCXPRT *)NULL)) 329 return (MDMNE_ACK); 330 return (MDMNE_CLASS_BUSY); 331 } 332 333 /* 334 * Remove an entry from the initiator table entirely, 335 * This must be done with mutex held. 336 */ 337 void 338 mdmn_unregister_initiator_table(set_t setno, md_mn_msgclass_t class) 339 { 340 mdmn_initiator_table[setno][class].wti_id.mid_nid = ~0u; 341 mdmn_initiator_table[setno][class].wti_id.mid_time = 0LL; 342 mdmn_initiator_table[setno][class].wti_transp = (SVCXPRT *)NULL; 343 mdmn_initiator_table[setno][class].wti_args = (char *)0; 344 mdmn_initiator_table[setno][class].wti_time = (time_t)0; 345 } 346 347 void 348 mdmn_get_initiator_table_id(set_t setno, md_mn_msgclass_t class, 349 md_mn_msgid_t *mid) 350 { 351 MSGID_COPY(&(mdmn_initiator_table[setno][class].wti_id), mid); 352 } 353 354 SVCXPRT * 355 mdmn_get_initiator_table_transp(set_t setno, md_mn_msgclass_t class) 356 { 357 return (mdmn_initiator_table[setno][class].wti_transp); 358 } 359 360 char * 361 mdmn_get_initiator_table_args(set_t setno, md_mn_msgclass_t class) 362 { 363 return (mdmn_initiator_table[setno][class].wti_args); 364 } 365 366 mutex_t * 367 mdmn_get_initiator_table_mx(set_t setno, md_mn_msgclass_t class) 368 { 369 return (&(mdmn_initiator_table[setno][class].wti_mx)); 370 } 371 372 time_t 373 mdmn_get_initiator_table_time(set_t setno, md_mn_msgclass_t class) 374 { 375 return (mdmn_initiator_table[setno][class].wti_time); 376 } 377 378 extern uint_t md_commd_global_verb; /* global bitmask for debug classes */ 379 extern FILE *commdout; /* debug output file for the commd */ 380 extern hrtime_t __savetime; 381 382 383 /* 384 * Print debug messages to the terminal or to syslog 385 * commd_debug(MD_MMV_SYSLOG,....) is always printed (and always via syslog), 386 * even if md_commd_global_verb is zero. 387 * 388 * Otherwise the correct bit must be set in the bitmask md_commd_global_verb 389 */ 390 void 391 commd_debug(uint_t debug_class, const char *message, ...) 392 { 393 va_list ap; 394 395 /* Is this a message for syslog? */ 396 if (debug_class == MD_MMV_SYSLOG) { 397 398 va_start(ap, message); 399 (void) vsyslog(LOG_WARNING, message, ap); 400 va_end(ap); 401 } else { 402 /* Is this debug_class set in the global verbosity state? */ 403 if ((md_commd_global_verb & debug_class) == 0) { 404 return; 405 } 406 /* Is our output file already functioning? */ 407 if (commdout == NULL) { 408 return; 409 } 410 /* Are timestamps activated ? */ 411 if (md_commd_global_verb & MD_MMV_TIMESTAMP) { 412 /* print time since last TRESET in usecs */ 413 (void) fprintf(commdout, "[%s]", 414 meta_print_hrtime(gethrtime() - __savetime)); 415 } 416 /* Now print the real message */ 417 va_start(ap, message); 418 (void) vfprintf(commdout, message, ap); 419 va_end(ap); 420 } 421 } 422 423 424 void 425 dump_hex(uint_t debug_class, unsigned int *x, int cnt) 426 { 427 cnt /= sizeof (unsigned int); 428 while (cnt--) { 429 commd_debug(debug_class, "0x%8x ", *x++); 430 if (cnt % 4) 431 continue; 432 commd_debug(debug_class, "\n"); 433 } 434 commd_debug(debug_class, "\n"); 435 } 436 437 /* debug output: dump a message */ 438 void 439 dump_msg(uint_t dbc, char *prefix, md_mn_msg_t *msg) 440 { 441 commd_debug(dbc, "%s &msg = 0x%x\n", prefix, (int)msg); 442 commd_debug(dbc, "%s ID = (%d, 0x%llx-%d)\n", prefix, 443 MSGID_ELEMS(msg->msg_msgid)); 444 commd_debug(dbc, "%s sender = %d\n", prefix, msg->msg_sender); 445 commd_debug(dbc, "%s flags = 0x%x\n", prefix, msg->msg_flags); 446 commd_debug(dbc, "%s setno = %d\n", prefix, msg->msg_setno); 447 commd_debug(dbc, "%s recipient = %d\n", prefix, msg->msg_recipient); 448 commd_debug(dbc, "%s type = %d\n", prefix, msg->msg_type); 449 commd_debug(dbc, "%s size = %d\n", prefix, msg->msg_event_size); 450 if (msg->msg_event_size) { 451 commd_debug(dbc, "%s data =\n", prefix); 452 dump_hex(dbc, (unsigned int *)(void *)msg->msg_event_data, 453 msg->msg_event_size); 454 } 455 } 456 457 /* debug output: dump a result structure */ 458 void 459 dump_result(uint_t dbc, char *prefix, md_mn_result_t *res) 460 { 461 commd_debug(dbc, "%s &res = 0x%x\n", prefix, (int)res); 462 commd_debug(dbc, "%s ID = (%d, 0x%llx-%d)\n", prefix, 463 MSGID_ELEMS(res->mmr_msgid)); 464 commd_debug(dbc, "%s setno = %d\n", prefix, res->mmr_setno); 465 commd_debug(dbc, "%s type = %d\n", prefix, res->mmr_msgtype); 466 commd_debug(dbc, "%s flags = 0x%x\n", prefix, res->mmr_flags); 467 commd_debug(dbc, "%s comm_state= %d\n", prefix, res->mmr_comm_state); 468 commd_debug(dbc, "%s exitval = %d\n", prefix, res->mmr_exitval); 469 commd_debug(dbc, "%s out_size = %d\n", prefix, res->mmr_out_size); 470 if (res->mmr_out_size) 471 commd_debug(dbc, "%s out = %s\n", prefix, res->mmr_out); 472 commd_debug(dbc, "%s err_size = %d\n", prefix, res->mmr_err_size); 473 if (res->mmr_err_size) 474 commd_debug(dbc, "%s err = %s\n", prefix, res->mmr_err); 475 } 476 477 478 /* 479 * Here we find out, where to store or find the results for a given msg. 480 * 481 * Per set we have a pointer to a three dimensional array: 482 * mct[set] -> mct_mce[NNODES][MD_MN_NCLASSES][MAX_SUBMESSAGES] 483 * So, for every possible node and for every possible class we can store 484 * MAX_SUBMESSAGES results. 485 * the way to find the correct index is 486 * submessage + 487 * class * MAX_SUBMESSAGES + 488 * nodeid * MAX_SUBMESSAGES * MD_MN_NCLASSES. 489 * 490 * To find the correct address the index has to be multiplied 491 * by the size of one entry. 492 */ 493 static md_mn_mce_t * 494 mdmn_get_mce_by_msg(md_mn_msg_t *msg) 495 { 496 set_t setno = msg->msg_setno; 497 int nodeid = msg->msg_msgid.mid_nid; 498 int submsg = msg->msg_msgid.mid_smid; 499 int mct_index; 500 off_t mct_offset; 501 md_mn_msgclass_t class; 502 503 if (mct[setno] != NULL) { 504 if (mdmn_init_set(setno, MDMN_SET_MCT) != 0) { 505 return ((md_mn_mce_t *)MDMN_MCT_ERROR); 506 } 507 } 508 509 if (submsg == 0) { 510 class = mdmn_get_message_class(msg->msg_type); 511 } else { 512 class = msg->msg_msgid.mid_oclass; 513 } 514 515 mct_index = submsg + class * MAX_SUBMESSAGES + 516 nodeid * MAX_SUBMESSAGES * MD_MN_NCLASSES; 517 518 mct_offset = mct_index * sizeof (md_mn_mce_t); 519 520 /* LINTED Pointer alignment */ 521 return ((md_mn_mce_t *)((caddr_t)(mct[setno]) + mct_offset)); 522 523 /* 524 * the lint clean version would be: 525 * return (&(mct[setno]->mct_mce[0][0][0]) + mct_index); 526 * :-) 527 */ 528 } 529 530 /* 531 * mdmn_mark_completion(msg, result, flag) 532 * Stores the result of this message into the mmaped memory MCT[setno] 533 * In case the same message comes along a second time we will know that 534 * this message has already been processed and we can deliver the 535 * results immediately. 536 * 537 * Before a message handler is called, the message in the MCT is flagged 538 * as currently being processed (flag == MDMN_MCT_IN_PROGRESS). 539 * This we need so we don't start a second handler for the same message. 540 * 541 * After a message handler is completed, this routine is called with 542 * flag == MDMN_MCT_DONE and the appropriate result that we store in the MCT. 543 * As MCT[setno] is memory mapped to disks, this information is persistent 544 * even across a crash of the commd. 545 * It doesn't have to be persistent across a reboot, though. 546 * 547 * Returns MDMN_MCT_DONE in case of success 548 * Returns MDMN_MCT_ERROR in case of error creating the mct 549 */ 550 int 551 mdmn_mark_completion(md_mn_msg_t *msg, md_mn_result_t *result, uint_t flag) 552 { 553 md_mn_mce_t *mce; 554 uint_t offset_in_page; 555 556 mce = mdmn_get_mce_by_msg(msg); 557 if (mce == (md_mn_mce_t *)-1) { 558 return (MDMN_MCT_ERROR); 559 } 560 offset_in_page = (uint_t)(caddr_t)mce % sysconf(_SC_PAGESIZE); 561 562 (void) memset(mce, 0, sizeof (md_mn_mce_t)); 563 564 MSGID_COPY(&msg->msg_msgid, &mce->mce_result.mmr_msgid); 565 if (flag == MDMN_MCT_IN_PROGRESS) { 566 mce->mce_flags = MDMN_MCT_IN_PROGRESS; 567 goto mmc_out; 568 } 569 570 /* 571 * In case the message flags indicate that the result should not be 572 * stored in the MCT, we return a MDMN_MCT_NOT_DONE, 573 * so the message will be processed at any rate, 574 * even if we process this message twice. 575 * this makes sense if the result of the message is a dynamic status 576 * and might have changed meanwhile. 577 */ 578 if (msg->msg_flags & MD_MSGF_NO_MCT) { 579 return (MDMN_MCT_DONE); 580 } 581 582 /* This msg is no longer in progress */ 583 mce->mce_flags = MDMN_MCT_DONE; 584 585 mce->mce_result.mmr_msgtype = result->mmr_msgtype; 586 mce->mce_result.mmr_setno = result->mmr_setno; 587 mce->mce_result.mmr_flags = result->mmr_flags; 588 mce->mce_result.mmr_sender = result->mmr_sender; 589 mce->mce_result.mmr_failing_node = result->mmr_failing_node; 590 mce->mce_result.mmr_comm_state = result->mmr_comm_state; 591 mce->mce_result.mmr_exitval = result->mmr_exitval; 592 593 /* if mmr_exitval is zero, we store stdout, otherwise stderr */ 594 if (result->mmr_exitval == 0) { 595 if (result->mmr_out_size > 0) { 596 (void) memcpy(mce->mce_data, result->mmr_out, 597 result->mmr_out_size); 598 mce->mce_result.mmr_out_size = result->mmr_out_size; 599 } 600 } else { 601 if (result->mmr_err_size > 0) { 602 mce->mce_result.mmr_err_size = result->mmr_err_size; 603 (void) memcpy(mce->mce_data, result->mmr_err, 604 result->mmr_err_size); 605 } 606 } 607 608 dump_result(MD_MMV_PROC_S, "mdmn_mark_completion1", result); 609 610 mmc_out: 611 /* now flush this entry to disk */ 612 (void) msync((caddr_t)mce - offset_in_page, 613 sizeof (md_mn_mce_t) + offset_in_page, MS_SYNC); 614 return (MDMN_MCT_DONE); 615 } 616 617 /* 618 * mdmn_check_completion(msg, resultp) 619 * checks if msg has already been processed on this node, and if so copies 620 * the stored result to resultp. 621 * 622 * returns MDMN_MCT_DONE and the result filled out acurately in case the 623 * msg has already been processed before 624 * returns MDMN_MCT_NOT_DONE if the message has not been processed before 625 * returns MDMN_MCT_IN_PROGRESS if the message is currently being processed 626 * This can only occur on a slave node. 627 * return MDMN_MCT_ERROR in case of error creating the mct 628 */ 629 int 630 mdmn_check_completion(md_mn_msg_t *msg, md_mn_result_t *result) 631 { 632 md_mn_mce_t *mce; 633 size_t outsize; 634 size_t errsize; 635 636 mce = mdmn_get_mce_by_msg(msg); 637 if (mce == (md_mn_mce_t *)MDMN_MCT_ERROR) { 638 return (MDMN_MCT_ERROR); /* what to do in that case ? */ 639 } 640 if (MSGID_CMP(&(msg->msg_msgid), &(mce->mce_result.mmr_msgid))) { 641 /* is the message completed, or in progress? */ 642 if (mce->mce_flags & MDMN_MCT_IN_PROGRESS) { 643 return (MDMN_MCT_IN_PROGRESS); 644 } 645 /* 646 * See comment on MD_MSGF_NO_MCT above, if this flag is set 647 * for a message no result was stored and so the message has 648 * to be processed no matter if this is the 2nd time then. 649 */ 650 if (msg->msg_flags & MD_MSGF_NO_MCT) { 651 return (MDMN_MCT_NOT_DONE); 652 } 653 654 /* Paranoia check: mce_flags must be MDMN_MCT_DONE here */ 655 if ((mce->mce_flags & MDMN_MCT_DONE) == 0) { 656 commd_debug(MD_MMV_ALL, 657 "mdmn_check_completion: msg not done and not in " 658 "progress! ID = (%d, 0x%llx-%d)\n", 659 MSGID_ELEMS(msg->msg_msgid)); 660 return (MDMN_MCT_NOT_DONE); 661 } 662 /* 663 * Already processed. 664 * Copy saved results data; 665 * return only a pointer to any output. 666 */ 667 MSGID_COPY(&(mce->mce_result.mmr_msgid), &result->mmr_msgid); 668 result->mmr_msgtype = mce->mce_result.mmr_msgtype; 669 result->mmr_setno = mce->mce_result.mmr_setno; 670 result->mmr_flags = mce->mce_result.mmr_flags; 671 result->mmr_sender = mce->mce_result.mmr_sender; 672 result->mmr_failing_node = mce->mce_result.mmr_failing_node; 673 result->mmr_comm_state = mce->mce_result.mmr_comm_state; 674 result->mmr_exitval = mce->mce_result.mmr_exitval; 675 result->mmr_err = NULL; 676 result->mmr_out = NULL; 677 outsize = result->mmr_out_size = mce->mce_result.mmr_out_size; 678 errsize = result->mmr_err_size = mce->mce_result.mmr_err_size; 679 /* 680 * if the exit val is zero only stdout was stored (if any) 681 * otherwise only stderr was stored (if any) 682 */ 683 if (result->mmr_exitval == 0) { 684 if (outsize != 0) { 685 result->mmr_out = Zalloc(outsize); 686 (void) memcpy(result->mmr_out, mce->mce_data, 687 outsize); 688 } 689 } else { 690 if (errsize != 0) { 691 result->mmr_err = Zalloc(errsize); 692 (void) memcpy(result->mmr_err, mce->mce_data, 693 errsize); 694 } 695 } 696 commd_debug(MD_MMV_MISC, 697 "mdmn_check_completion: msg already processed \n"); 698 dump_result(MD_MMV_MISC, "mdmn_check_completion", result); 699 return (MDMN_MCT_DONE); 700 } 701 commd_debug(MD_MMV_MISC, 702 "mdmn_check_completion: msg not yet processed\n"); 703 return (MDMN_MCT_NOT_DONE); 704 } 705 706 707 708 /* 709 * check_license(rqstp, chknid) 710 * 711 * Is this RPC request sent from a licensed host? 712 * 713 * If chknid is non-zero, the caller of check_license() knows the ID of 714 * the sender. Then we check just the one entry of licensed_nodes[] 715 * 716 * If chknid is zero, the sender is not known. In that case the sender must be 717 * the local node. 718 * 719 * If the host is licensed, return TRUE, else return FALSE 720 */ 721 bool_t 722 check_license(struct svc_req *rqstp, md_mn_nodeid_t chknid) 723 { 724 char buf[INET6_ADDRSTRLEN]; 725 void *caller = NULL; 726 in_addr_t caller_ipv4; 727 in6_addr_t caller_ipv6; 728 struct sockaddr *ca; 729 730 731 ca = (struct sockaddr *)(void *)svc_getrpccaller(rqstp->rq_xprt)->buf; 732 733 if (ca->sa_family == AF_INET) { 734 caller_ipv4 = 735 ((struct sockaddr_in *)(void *)ca)->sin_addr.s_addr; 736 caller = (void *)&caller_ipv4; 737 738 if (chknid == 0) { 739 /* check against local node */ 740 if (caller_ipv4 == htonl(INADDR_LOOPBACK)) { 741 return (TRUE); 742 743 } 744 } else { 745 /* check against one specific node */ 746 if ((caller_ipv4 == licensed_nodes[chknid].lip_ipv4) && 747 (licensed_nodes[chknid].lip_family == AF_INET)) { 748 return (TRUE); 749 } else { 750 commd_debug(MD_MMV_MISC, 751 "Bad attempt from %x ln[%d]=%x\n", 752 caller_ipv4, chknid, 753 licensed_nodes[chknid].lip_ipv4); 754 } 755 } 756 } else if (ca->sa_family == AF_INET6) { 757 caller_ipv6 = ((struct sockaddr_in6 *)(void *)ca)->sin6_addr; 758 caller = (void *)&caller_ipv6; 759 760 if (chknid == 0) { 761 /* check against local node */ 762 if (IN6_IS_ADDR_LOOPBACK(&caller_ipv6)) { 763 return (TRUE); 764 765 } 766 } else { 767 /* check against one specific node */ 768 if (IN6_ARE_ADDR_EQUAL(&caller_ipv6, 769 &(licensed_nodes[chknid].lip_ipv6)) && 770 (licensed_nodes[chknid].lip_family == AF_INET6)) { 771 return (TRUE); 772 } 773 } 774 } 775 /* if we are here, we were contacted by an unlicensed node */ 776 commd_debug(MD_MMV_SYSLOG, 777 "Bad attempt to contact rpc.mdcommd from %s\n", 778 caller ? 779 inet_ntop(ca->sa_family, caller, buf, INET6_ADDRSTRLEN) : 780 "unknown"); 781 782 return (FALSE); 783 } 784 785 /* 786 * Add a node to the list of licensed nodes. 787 * 788 * Only IPv4 is currently supported. 789 * for IPv6, we need to change md_mnnode_desc. 790 */ 791 void 792 add_license(md_mnnode_desc *node) 793 { 794 md_mn_nodeid_t nid = node->nd_nodeid; 795 char buf[INET6_ADDRSTRLEN]; 796 797 /* 798 * If this node is not yet licensed, do it now. 799 * For now only IPv4 addresses are supported. 800 */ 801 commd_debug(MD_MMV_MISC, "add_lic(%s): ln[%d]=%s, lnc[%d]=%d\n", 802 node->nd_priv_ic, nid, 803 inet_ntop(AF_INET, (void *)&licensed_nodes[nid].lip_ipv4, 804 buf, INET6_ADDRSTRLEN), nid, licensed_nodes[nid].lip_cnt); 805 806 if (licensed_nodes[nid].lip_ipv4 == (in_addr_t)0) { 807 licensed_nodes[nid].lip_family = AF_INET; /* IPv4 */ 808 licensed_nodes[nid].lip_ipv4 = inet_addr(node->nd_priv_ic); 809 /* keep track of the last entry for faster search */ 810 if (nid > maxlicnodes) 811 maxlicnodes = nid; 812 813 } 814 /* in any case bump up the reference count */ 815 licensed_nodes[nid].lip_cnt++; 816 } 817 818 /* 819 * lower the reference count for one node. 820 * If that drops to zero, remove the node from the list of licensed nodes 821 * 822 * Only IPv4 is currently supported. 823 * for IPv6, we need to change md_mnnode_desc. 824 */ 825 void 826 rem_license(md_mnnode_desc *node) 827 { 828 md_mn_nodeid_t nid = node->nd_nodeid; 829 char buf[INET6_ADDRSTRLEN]; 830 831 commd_debug(MD_MMV_MISC, "rem_lic(%s): ln[%d]=%s, lnc[%d]=%d\n", 832 node->nd_priv_ic, nid, 833 inet_ntop(AF_INET, (void *)&licensed_nodes[nid].lip_ipv4, buf, 834 INET6_ADDRSTRLEN), nid, licensed_nodes[nid].lip_cnt); 835 836 assert(licensed_nodes[nid].lip_cnt > 0); 837 838 /* 839 * If this was the last reference to that node, it's license expires 840 * For now only IPv4 addresses are supported. 841 */ 842 if (--licensed_nodes[nid].lip_cnt == 0) { 843 licensed_nodes[nid].lip_ipv4 = (in_addr_t)0; 844 } 845 } 846