1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 * 22 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include "rcm_impl.h" 27 #include "rcm_module.h" 28 29 /* 30 * Global locks 31 */ 32 mutex_t rcm_req_lock; /* protects global dr & info request list */ 33 34 /* 35 * Daemon state file 36 */ 37 static int state_fd; 38 #define RCM_STATE_FILE "/var/run/rcm_daemon_state" 39 #define N_REQ_CHUNK 10 /* grow 10 entries at a time */ 40 41 /* 42 * Daemon timeout value 43 */ 44 #define RCM_DAEMON_TIMEOUT 300 /* 5 minutes idle time */ 45 46 /* 47 * Struct for a list of outstanding rcm requests 48 */ 49 typedef struct { 50 int seq_num; /* sequence number of request */ 51 int state; /* current state */ 52 pid_t pid; /* pid of initiator */ 53 uint_t flag; /* request flags */ 54 int type; /* resource(device) type */ 55 timespec_t interval; /* suspend interval */ 56 char device[MAXPATHLEN]; /* name of device or resource */ 57 } req_t; 58 59 typedef struct { 60 int n_req; 61 int n_req_max; /* number of req_t's to follow */ 62 int n_seq_max; /* last sequence number */ 63 int idle_timeout; /* persist idle timeout value */ 64 req_t req[1]; 65 /* more req_t follows */ 66 } req_list_t; 67 68 static req_list_t *dr_req_list; 69 static req_list_t *info_req_list; 70 71 static const char *locked_info = "DR operation in progress"; 72 static const char *locked_err = "Resource is busy"; 73 74 static int rcmd_get_state(); 75 static void add_to_polling_list(pid_t); 76 static void remove_from_polling_list(pid_t); 77 78 void start_polling_thread(); 79 static void stop_polling_thread(); 80 81 /* 82 * Initialize request lists required for locking 83 */ 84 void 85 rcmd_lock_init(void) 86 { 87 int size; 88 struct stat fbuf; 89 90 /* 91 * Start info list with one slot, then grow on demand. 92 */ 93 info_req_list = s_calloc(1, sizeof (req_list_t)); 94 info_req_list->n_req_max = 1; 95 96 /* 97 * Open daemon state file and map in contents 98 */ 99 state_fd = open(RCM_STATE_FILE, O_CREAT|O_RDWR, 0600); 100 if (state_fd == -1) { 101 rcm_log_message(RCM_ERROR, gettext("cannot open %s: %s\n"), 102 RCM_STATE_FILE, strerror(errno)); 103 rcmd_exit(errno); 104 } 105 106 if (fstat(state_fd, &fbuf) != 0) { 107 rcm_log_message(RCM_ERROR, gettext("cannot stat %s: %s\n"), 108 RCM_STATE_FILE, strerror(errno)); 109 rcmd_exit(errno); 110 } 111 112 size = fbuf.st_size; 113 if (size == 0) { 114 size = sizeof (req_list_t); 115 if (ftruncate(state_fd, size) != 0) { 116 rcm_log_message(RCM_ERROR, 117 gettext("cannot truncate %s: %s\n"), 118 RCM_STATE_FILE, strerror(errno)); 119 rcmd_exit(errno); 120 } 121 } 122 123 /*LINTED*/ 124 dr_req_list = (req_list_t *)mmap(NULL, size, PROT_READ|PROT_WRITE, 125 MAP_SHARED, state_fd, 0); 126 if (dr_req_list == MAP_FAILED) { 127 rcm_log_message(RCM_ERROR, gettext("cannot mmap %s: %s\n"), 128 RCM_STATE_FILE, strerror(errno)); 129 rcmd_exit(errno); 130 } 131 132 /* 133 * Initial size is one entry 134 */ 135 if (dr_req_list->n_req_max == 0) { 136 dr_req_list->n_req_max = 1; 137 (void) fsync(state_fd); 138 return; 139 } 140 141 rcm_log_message(RCM_DEBUG, "n_req = %d, n_req_max = %d\n", 142 dr_req_list->n_req, dr_req_list->n_req_max); 143 144 /* 145 * Recover the daemon state 146 */ 147 clean_dr_list(); 148 } 149 150 /* 151 * Get a unique sequence number--to be called with rcm_req_lock held. 152 */ 153 static int 154 get_seq_number() 155 { 156 int number; 157 158 if (dr_req_list == NULL) 159 return (0); 160 161 dr_req_list->n_seq_max++; 162 number = (dr_req_list->n_seq_max << SEQ_NUM_SHIFT); 163 (void) fsync(state_fd); 164 165 return (number); 166 } 167 168 /* 169 * Find entry in list with the same resource name and sequence number. 170 * If seq_num == -1, no seq_num matching is required. 171 */ 172 static req_t * 173 find_req_entry(char *device, uint_t flag, int seq_num, req_list_t *list) 174 { 175 int i; 176 177 /* 178 * Look for entry with the same resource and seq_num. 179 * Also match RCM_FILESYS field in flag. 180 */ 181 for (i = 0; i < list->n_req_max; i++) { 182 if (list->req[i].state == RCM_STATE_REMOVE) 183 /* stale entry */ 184 continue; 185 /* 186 * We need to distiguish a file system root from the directory 187 * it is mounted on. 188 * 189 * Applications are not aware of any difference between the 190 * two, but the system keeps track of it internally by 191 * checking for mount points while traversing file path. 192 * In a similar spirit, RCM is keeping this difference as 193 * an implementation detail. 194 */ 195 if ((strcmp(device, list->req[i].device) != 0) || 196 (list->req[i].flag & RCM_FILESYS) != (flag & RCM_FILESYS)) 197 /* different resource */ 198 continue; 199 200 if ((seq_num != -1) && ((seq_num >> SEQ_NUM_SHIFT) != 201 (list->req[i].seq_num >> SEQ_NUM_SHIFT))) 202 /* different base seqnum */ 203 continue; 204 205 return (&list->req[i]); 206 } 207 208 return (NULL); 209 } 210 211 /* 212 * Get the next empty req_t entry. If no entry exists, grow the list. 213 */ 214 static req_t * 215 get_req_entry(req_list_t **listp) 216 { 217 int i; 218 int n_req = (*listp)->n_req; 219 int n_req_max = (*listp)->n_req_max; 220 221 /* 222 * If the list is full, grow the list and return the first 223 * entry in the new portion. 224 */ 225 if (n_req == n_req_max) { 226 int newsize; 227 228 n_req_max += N_REQ_CHUNK; 229 newsize = sizeof (req_list_t) + (n_req_max - 1) * 230 sizeof (req_t); 231 232 if (listp == &info_req_list) { 233 *listp = s_realloc(*listp, newsize); 234 } else if (ftruncate(state_fd, newsize) != 0) { 235 rcm_log_message(RCM_ERROR, 236 gettext("cannot truncate %s: %s\n"), 237 RCM_STATE_FILE, strerror(errno)); 238 rcmd_exit(errno); 239 /*LINTED*/ 240 } else if ((*listp = (req_list_t *)mmap(NULL, newsize, 241 PROT_READ|PROT_WRITE, MAP_SHARED, state_fd, 0)) == 242 MAP_FAILED) { 243 rcm_log_message(RCM_ERROR, 244 gettext("cannot mmap %s: %s\n"), 245 RCM_STATE_FILE, strerror(errno)); 246 rcmd_exit(errno); 247 } 248 249 /* Initialize the new entries */ 250 for (i = (*listp)->n_req_max; i < n_req_max; i++) { 251 (*listp)->req[i].state = RCM_STATE_REMOVE; 252 (void) strcpy((*listp)->req[i].device, ""); 253 } 254 255 (*listp)->n_req_max = n_req_max; 256 (*listp)->n_req++; 257 return (&(*listp)->req[n_req]); 258 } 259 260 /* 261 * List contains empty slots, find it. 262 */ 263 for (i = 0; i < n_req_max; i++) { 264 if (((*listp)->req[i].device[0] == '\0') || 265 ((*listp)->req[i].state == RCM_STATE_REMOVE)) { 266 break; 267 } 268 } 269 270 assert(i < n_req_max); /* empty slot must exist */ 271 272 (*listp)->n_req++; 273 return (&(*listp)->req[i]); 274 } 275 276 /* 277 * When one resource depends on multiple resources, it's possible that 278 * rcm_get_info can be called multiple times on the resource, resulting 279 * in duplicate information. By assigning a unique sequence number to 280 * each rcm_get_info operation, this duplication can be eliminated. 281 * 282 * Insert a dr entry in info_req_list 283 */ 284 int 285 info_req_add(char *rsrcname, uint_t flag, int seq_num) 286 { 287 int error = 0; 288 char *device; 289 req_t *req; 290 291 rcm_log_message(RCM_TRACE2, "info_req_add(%s, %d)\n", 292 rsrcname, seq_num); 293 294 device = resolve_name(rsrcname); 295 (void) mutex_lock(&rcm_req_lock); 296 297 /* 298 * Look for entry with the same resource and seq_num. 299 * If it exists, we return an error so that such 300 * information is not gathered more than once. 301 */ 302 if (find_req_entry(device, flag, seq_num, info_req_list) != NULL) { 303 rcm_log_message(RCM_DEBUG, "getinfo cycle: %s %d \n", 304 device, seq_num); 305 error = -1; 306 goto out; 307 } 308 309 /* 310 * Get empty entry and fill in seq_num and device. 311 */ 312 req = get_req_entry(&info_req_list); 313 req->seq_num = seq_num; 314 req->state = RCM_STATE_ONLINE; /* mark that the entry is in use */ 315 req->flag = flag; 316 (void) strcpy(req->device, device); 317 318 out: 319 (void) mutex_unlock(&rcm_req_lock); 320 free(device); 321 322 return (error); 323 } 324 325 /* 326 * Remove all entries associated with seq_num from info_req_list 327 */ 328 void 329 info_req_remove(int seq_num) 330 { 331 int i; 332 333 rcm_log_message(RCM_TRACE3, "info_req_remove(%d)\n", seq_num); 334 335 seq_num >>= SEQ_NUM_SHIFT; 336 (void) mutex_lock(&rcm_req_lock); 337 338 /* remove all entries with seq_num */ 339 for (i = 0; i < info_req_list->n_req_max; i++) { 340 if (info_req_list->req[i].state == RCM_STATE_REMOVE) 341 continue; 342 343 if ((info_req_list->req[i].seq_num >> SEQ_NUM_SHIFT) != seq_num) 344 continue; 345 346 info_req_list->req[i].state = RCM_STATE_REMOVE; 347 info_req_list->n_req--; 348 } 349 350 /* 351 * We don't shrink the info_req_list size for now. 352 */ 353 (void) mutex_unlock(&rcm_req_lock); 354 } 355 356 /* 357 * Checking lock conflicts. There is a conflict if: 358 * - attempt to DR a node when either its ancester or descendent 359 * is in the process of DR 360 * - attempt to register for a node when its ancester is locked for DR 361 */ 362 static int 363 check_lock(char *device, uint_t flag, int cflag, rcm_info_t **info) 364 { 365 int i, ret = RCM_SUCCESS; 366 367 if (info) 368 *info = NULL; 369 370 /* 371 * During daemon initialization, don't check locks 372 */ 373 if (dr_req_list == NULL) 374 return (ret); 375 376 for (i = 0; i < dr_req_list->n_req; i++) { 377 req_t *req = &dr_req_list->req[i]; 378 char *dr_dev = req->device; 379 380 /* 381 * Skip empty entries 382 */ 383 if ((req->state == RCM_STATE_REMOVE) || (dr_dev[0] == '\0')) 384 continue; 385 386 /* 387 * Make sure that none of the ancestors of dr_dev is 388 * being operated upon. 389 */ 390 if (EQUAL(device, dr_dev) || DESCENDENT(device, dr_dev)) { 391 /* 392 * An exception to this is the filesystem. 393 * We should allowed a filesystem rooted at a 394 * child directory to be unmounted. 395 */ 396 if ((flag & RCM_FILESYS) && (!EQUAL(device, dr_dev) || 397 ((dr_req_list->req[i].flag & RCM_FILESYS) == 0))) 398 continue; 399 400 assert(info != 0); 401 402 add_busy_rsrc_to_list(dr_dev, dr_req_list->req[i].pid, 403 dr_req_list->req[i].state, 404 dr_req_list->req[i].seq_num, NULL, locked_info, 405 locked_err, NULL, info); 406 ret = RCM_CONFLICT; 407 break; 408 } 409 410 if ((cflag == LOCK_FOR_DR) && DESCENDENT(dr_dev, device)) { 411 /* 412 * Check descendents only for DR request. 413 * 414 * Could have multiple descendents doing DR, 415 * we want to find them all. 416 */ 417 assert(info != 0); 418 419 add_busy_rsrc_to_list(dr_dev, dr_req_list->req[i].pid, 420 dr_req_list->req[i].state, 421 dr_req_list->req[i].seq_num, NULL, locked_info, 422 locked_err, NULL, info); 423 ret = RCM_CONFLICT; 424 /* don't break here, need to find all conflicts */ 425 } 426 } 427 428 return (ret); 429 } 430 431 /* 432 * Check for lock conflicts for DR operation or client registration 433 */ 434 int 435 rsrc_check_lock_conflicts(char *rsrcname, uint_t flag, int cflag, 436 rcm_info_t **info) 437 { 438 int result; 439 char *device; 440 441 device = resolve_name(rsrcname); 442 result = check_lock(device, flag, cflag, info); 443 free(device); 444 445 return (result); 446 } 447 448 static int 449 transition_state(int state) 450 { 451 /* 452 * If the resource state is in transition, ask caller to 453 * try again. 454 */ 455 switch (state) { 456 case RCM_STATE_OFFLINING: 457 case RCM_STATE_SUSPENDING: 458 case RCM_STATE_RESUMING: 459 case RCM_STATE_ONLINING: 460 case RCM_STATE_REMOVING: 461 462 return (1); 463 464 default: 465 /*FALLTHROUGH*/ 466 break; 467 } 468 return (0); 469 } 470 471 /* 472 * Update a dr entry in dr_req_list 473 */ 474 /*ARGSUSED*/ 475 static int 476 dr_req_update_entry(char *device, pid_t pid, uint_t flag, int state, 477 int seq_num, timespec_t *interval, rcm_info_t **infop) 478 { 479 req_t *req; 480 481 /* 482 * Find request entry. If not found, return RCM_FAILURE 483 */ 484 req = find_req_entry(device, flag, -1, dr_req_list); 485 486 if (req == NULL) { 487 switch (state) { 488 case RCM_STATE_OFFLINE_QUERYING: 489 case RCM_STATE_SUSPEND_QUERYING: 490 case RCM_STATE_OFFLINING: 491 case RCM_STATE_SUSPENDING: 492 /* could be re-do operation, no error message */ 493 break; 494 495 default: 496 rcm_log_message(RCM_DEBUG, 497 "update non-existing resource %s\n", device); 498 } 499 return (RCM_FAILURE); 500 } 501 502 /* 503 * During initialization, update is unconditional (forced) 504 * in order to bring the daemon up in a sane state. 505 */ 506 if (rcmd_get_state() == RCMD_INIT) 507 goto update; 508 509 /* 510 * Don't allow update with mismatched initiator pid. This could happen 511 * as part of normal operation. 512 */ 513 if (pid != req->pid) { 514 rcm_log_message(RCM_INFO, 515 gettext("mismatched dr initiator pid: %ld %ld\n"), 516 req->pid, pid); 517 goto failure; 518 } 519 520 rcm_log_message(RCM_TRACE4, 521 "dr_req_update_entry: state=%d, device=%s\n", 522 req->state, req->device); 523 524 /* 525 * Check that the state transition is valid 526 */ 527 switch (state) { 528 case RCM_STATE_OFFLINE_QUERYING: 529 case RCM_STATE_OFFLINING: 530 /* 531 * This is the case of re-offlining, which applies only 532 * if a previous attempt failed. 533 */ 534 if ((req->state != RCM_STATE_OFFLINE_FAIL) && 535 (req->state != RCM_STATE_OFFLINE_QUERYING) && 536 (req->state != RCM_STATE_OFFLINE_QUERY) && 537 (req->state != RCM_STATE_OFFLINE_QUERY_FAIL) && 538 (req->state != RCM_STATE_OFFLINE)) { 539 rcm_log_message(RCM_WARNING, 540 gettext("%s: invalid offlining from state %d\n"), 541 device, req->state); 542 goto failure; 543 } 544 break; 545 546 case RCM_STATE_SUSPEND_QUERYING: 547 case RCM_STATE_SUSPENDING: 548 /* 549 * This is the case of re-suspending, which applies only 550 * if a previous attempt failed. 551 */ 552 if ((req->state != RCM_STATE_SUSPEND_FAIL) && 553 (req->state != RCM_STATE_SUSPEND_QUERYING) && 554 (req->state != RCM_STATE_SUSPEND_QUERY) && 555 (req->state != RCM_STATE_SUSPEND_QUERY_FAIL) && 556 (req->state != RCM_STATE_SUSPEND)) { 557 rcm_log_message(RCM_WARNING, 558 gettext("%s: invalid suspending from state %d\n"), 559 device, req->state); 560 goto failure; 561 } 562 break; 563 564 case RCM_STATE_RESUMING: 565 if ((req->state != RCM_STATE_SUSPEND) && 566 (req->state != RCM_STATE_SUSPEND_QUERYING) && 567 (req->state != RCM_STATE_SUSPEND_QUERY) && 568 (req->state != RCM_STATE_SUSPEND_QUERY_FAIL) && 569 (req->state != RCM_STATE_SUSPEND_FAIL)) { 570 rcm_log_message(RCM_DEBUG, 571 "%s: invalid resuming from state %d\n", 572 device, req->state); 573 goto failure; 574 } 575 break; 576 577 case RCM_STATE_ONLINING: 578 if ((req->state != RCM_STATE_OFFLINE) && 579 (req->state != RCM_STATE_OFFLINE_QUERYING) && 580 (req->state != RCM_STATE_OFFLINE_QUERY) && 581 (req->state != RCM_STATE_OFFLINE_QUERY_FAIL) && 582 (req->state != RCM_STATE_OFFLINE_FAIL)) { 583 rcm_log_message(RCM_INFO, 584 gettext("%s: invalid onlining from state %d\n"), 585 device, req->state); 586 goto failure; 587 } 588 break; 589 590 case RCM_STATE_REMOVING: 591 if ((req->state != RCM_STATE_OFFLINE) && 592 (req->state != RCM_STATE_OFFLINE_FAIL)) { 593 rcm_log_message(RCM_INFO, 594 gettext("%s: invalid removing from state %d\n"), 595 device, req->state); 596 goto failure; 597 } 598 break; 599 600 case RCM_STATE_SUSPEND_FAIL: 601 assert(req->state == RCM_STATE_SUSPENDING); 602 break; 603 604 case RCM_STATE_OFFLINE_FAIL: 605 assert(req->state == RCM_STATE_OFFLINING); 606 break; 607 608 case RCM_STATE_SUSPEND: 609 assert(req->state == RCM_STATE_SUSPENDING); 610 break; 611 612 case RCM_STATE_OFFLINE: 613 assert(req->state == RCM_STATE_OFFLINING); 614 break; 615 616 case RCM_STATE_ONLINE: 617 assert((req->state == RCM_STATE_RESUMING) || 618 (req->state == RCM_STATE_ONLINING)); 619 break; 620 621 default: /* shouldn't be here */ 622 rcm_log_message(RCM_ERROR, 623 gettext("invalid update to dr state: %d\n"), state); 624 return (RCM_FAILURE); 625 } 626 627 update: 628 /* 629 * update the state, interval, and sequence number; sync state file 630 */ 631 req->state = state; 632 req->seq_num = seq_num; 633 634 if (interval) 635 req->interval = *interval; 636 else 637 bzero(&req->interval, sizeof (timespec_t)); 638 639 (void) fsync(state_fd); 640 return (RCM_SUCCESS); 641 642 failure: 643 if (infop != NULL) { 644 add_busy_rsrc_to_list(req->device, req->pid, req->state, 645 req->seq_num, NULL, locked_info, locked_err, NULL, infop); 646 } 647 648 /* 649 * A request may be left in a transition state because the operator 650 * typed ctrl-C. In this case, the daemon thread continues to run 651 * and will eventually put the state in a non-transitional state. 652 * 653 * To be safe, we return EAGAIN to allow librcm to loop and retry. 654 * If we are called from a module, loop & retry could result in a 655 * deadlock. The called will check for this case and turn EAGAIN 656 * into RCM_CONFLICT. 657 */ 658 if (transition_state(req->state)) { 659 return (EAGAIN); 660 } 661 662 return (RCM_CONFLICT); 663 } 664 665 /* 666 * Insert a dr entry in dr_req_list 667 */ 668 int 669 dr_req_add(char *rsrcname, pid_t pid, uint_t flag, int state, int seq_num, 670 timespec_t *interval, rcm_info_t **info) 671 { 672 int error; 673 char *device; 674 req_t *req; 675 676 rcm_log_message(RCM_TRACE3, "dr_req_add(%s, %ld, 0x%x, %d, %d, %p)\n", 677 rsrcname, pid, flag, state, seq_num, (void *)info); 678 679 device = resolve_name(rsrcname); 680 if (device == NULL) 681 return (EINVAL); 682 683 (void) mutex_lock(&rcm_req_lock); 684 685 /* 686 * In the re-offline/suspend case, attempt to update dr request. 687 * 688 * If this succeeds, return success; 689 * If this fails because of a conflict, return error; 690 * If this this fails because no entry exists, add a new entry. 691 */ 692 error = dr_req_update_entry(device, pid, flag, state, seq_num, interval, 693 info); 694 695 switch (error) { 696 case RCM_FAILURE: 697 /* proceed to add a new entry */ 698 break; 699 700 case RCM_CONFLICT: 701 case RCM_SUCCESS: 702 case EAGAIN: 703 default: 704 goto out; 705 } 706 707 /* 708 * Check for lock conflicts 709 */ 710 error = check_lock(device, flag, LOCK_FOR_DR, info); 711 if (error != RCM_SUCCESS) { 712 error = RCM_CONFLICT; 713 goto out; 714 } 715 716 /* 717 * Get empty request entry, fill in values and sync state file 718 */ 719 req = get_req_entry(&dr_req_list); 720 721 req->seq_num = seq_num; 722 req->pid = pid; 723 req->flag = flag; 724 req->state = state; 725 req->type = rsrc_get_type(device); 726 (void) strcpy(req->device, device); 727 728 /* cache interval for failure recovery */ 729 if (interval) 730 req->interval = *interval; 731 else 732 bzero(&req->interval, sizeof (timespec_t)); 733 734 (void) fsync(state_fd); 735 736 /* 737 * Add initiator pid to polling list 738 */ 739 add_to_polling_list(req->pid); 740 741 out: 742 (void) mutex_unlock(&rcm_req_lock); 743 free(device); 744 745 return (error); 746 } 747 748 /* 749 * Update a dr entry in dr_req_list 750 */ 751 /*ARGSUSED*/ 752 int 753 dr_req_update(char *rsrcname, pid_t pid, uint_t flag, int state, int seq_num, 754 rcm_info_t **info) 755 { 756 int error; 757 char *device = resolve_name(rsrcname); 758 759 rcm_log_message(RCM_TRACE3, "dr_req_update(%s, %ld, 0x%x, %d, %d)\n", 760 rsrcname, pid, flag, state, seq_num); 761 762 (void) mutex_lock(&rcm_req_lock); 763 error = dr_req_update_entry(device, pid, flag, state, seq_num, NULL, 764 info); 765 (void) mutex_unlock(&rcm_req_lock); 766 free(device); 767 768 return (error); 769 } 770 771 /* 772 * This function scans the DR request list for the next, non-removed 773 * entry that is part of the specified sequence. The 'device' name 774 * of the entry is copied into the provided 'rsrc' buffer. 775 * 776 * The 'rsrc' buffer is required because the DR request list is only 777 * locked during the duration of this lookup. Giving a direct pointer 778 * to something in the list would be unsafe. 779 */ 780 int 781 dr_req_lookup(int seq_num, char *rsrc) 782 { 783 int i; 784 int len; 785 int base = (seq_num >> SEQ_NUM_SHIFT); 786 int retval = RCM_FAILURE; 787 788 if (rsrc == NULL) { 789 return (RCM_FAILURE); 790 } 791 792 (void) mutex_lock(&rcm_req_lock); 793 794 for (i = 0; i < dr_req_list->n_req_max; i++) { 795 796 /* Skip removed or non-matching entries */ 797 if ((dr_req_list->req[i].state == RCM_STATE_REMOVE) || 798 ((dr_req_list->req[i].seq_num >> SEQ_NUM_SHIFT) != base)) { 799 continue; 800 } 801 802 /* Copy the next-matching 'device' name into 'rsrc' */ 803 len = strlcpy(rsrc, dr_req_list->req[i].device, MAXPATHLEN); 804 if (len < MAXPATHLEN) { 805 retval = RCM_SUCCESS; 806 } 807 break; 808 } 809 810 (void) mutex_unlock(&rcm_req_lock); 811 812 return (retval); 813 } 814 815 /* 816 * Remove a dr entry in dr_req_list 817 */ 818 void 819 dr_req_remove(char *rsrcname, uint_t flag) 820 { 821 req_t *req; 822 char *device = resolve_name(rsrcname); 823 824 rcm_log_message(RCM_TRACE3, "dr_req_remove(%s)\n", rsrcname); 825 826 (void) mutex_lock(&rcm_req_lock); 827 828 /* find entry */ 829 req = find_req_entry(device, flag, -1, dr_req_list); 830 free(device); 831 832 if (req == NULL) { 833 (void) mutex_unlock(&rcm_req_lock); 834 rcm_log_message(RCM_WARNING, 835 gettext("dr_req entry %s not found\n"), rsrcname); 836 return; 837 } 838 839 req->state = RCM_STATE_REMOVE; 840 dr_req_list->n_req--; 841 (void) fsync(state_fd); 842 843 /* 844 * remove pid from polling list 845 */ 846 remove_from_polling_list(req->pid); 847 848 /* 849 * We don't shrink the dr_req_list size for now. 850 * Shouldn't cause big memory leaks. 851 */ 852 (void) mutex_unlock(&rcm_req_lock); 853 } 854 855 /* 856 * Return the list of ongoing dr operation requests 857 */ 858 rcm_info_t * 859 rsrc_dr_info() 860 { 861 int i; 862 rcm_info_t *info; 863 rcm_info_t *result = NULL; 864 char *rsrc; 865 int len; 866 867 rcm_log_message(RCM_TRACE2, "rsrc_dr_info()\n"); 868 869 (void) mutex_lock(&rcm_req_lock); 870 for (i = 0; i < dr_req_list->n_req_max; i++) { 871 if (dr_req_list->req[i].state == RCM_STATE_REMOVE) 872 continue; 873 874 if (dr_req_list->req[i].device[0] == '\0') 875 continue; 876 877 if (dr_req_list->req[i].flag & RCM_FILESYS) { 878 len = strlen(dr_req_list->req[i].device) + 5; 879 rsrc = s_malloc(len); 880 (void) snprintf(rsrc, len, "%s(fs)", 881 dr_req_list->req[i].device); 882 } else { 883 rsrc = s_strdup(dr_req_list->req[i].device); 884 } 885 886 info = s_calloc(1, sizeof (*info)); 887 if (errno = nvlist_alloc(&(info->info), NV_UNIQUE_NAME, 0)) { 888 rcm_log_message(RCM_ERROR, 889 gettext("failed (nvlist_alloc=%s).\n"), 890 strerror(errno)); 891 rcmd_exit(errno); 892 } 893 894 if (errno = nvlist_add_string(info->info, RCM_RSRCNAME, rsrc)) { 895 rcm_log_message(RCM_ERROR, 896 gettext("failed (nvlist_add=%s).\n"), 897 strerror(errno)); 898 rcmd_exit(errno); 899 } 900 (void) free(rsrc); 901 902 if (errno = nvlist_add_int64(info->info, RCM_CLIENT_ID, 903 dr_req_list->req[i].pid)) { 904 rcm_log_message(RCM_ERROR, 905 gettext("failed (nvlist_add=%s).\n"), 906 strerror(errno)); 907 rcmd_exit(errno); 908 } 909 910 if (errno = nvlist_add_int32(info->info, RCM_SEQ_NUM, 911 dr_req_list->req[i].seq_num)) { 912 rcm_log_message(RCM_ERROR, 913 gettext("failed (nvlist_add=%s).\n"), 914 strerror(errno)); 915 rcmd_exit(errno); 916 } 917 918 if (errno = nvlist_add_int32(info->info, RCM_RSRCSTATE, 919 dr_req_list->req[i].state)) { 920 rcm_log_message(RCM_ERROR, 921 gettext("failed (nvlist_add=%s).\n"), 922 strerror(errno)); 923 rcmd_exit(errno); 924 } 925 926 if (errno = nvlist_add_string(info->info, RCM_CLIENT_INFO, 927 (char *)locked_info)) { 928 rcm_log_message(RCM_ERROR, 929 gettext("failed (nvlist_add=%s).\n"), 930 strerror(errno)); 931 rcmd_exit(errno); 932 } 933 934 info->next = result; 935 result = info; 936 } 937 (void) mutex_unlock(&rcm_req_lock); 938 939 return (result); 940 } 941 942 /* 943 * Eliminate entries whose dr initiator is no longer running 944 * and recover daemon state during daemon restart. 945 * 946 * This routine is called from either during daemon initialization 947 * after all modules have registered resources or from the cleanup 948 * thread. In either case, it is the only thread running in the 949 * daemon. 950 */ 951 void 952 clean_dr_list() 953 { 954 int i; 955 struct clean_list { 956 struct clean_list *next; 957 char *rsrcname; 958 pid_t pid; 959 int seq_num; 960 int state; 961 timespec_t interval; 962 } *tmp, *list = NULL; 963 char *rsrcnames[2]; 964 965 rcm_log_message(RCM_TRACE3, 966 "clean_dr_list(): look for stale dr initiators\n"); 967 968 rsrcnames[1] = NULL; 969 970 /* 971 * Make a list of entries to recover. This is necessary because 972 * the recovery operation will modify dr_req_list. 973 */ 974 (void) mutex_lock(&rcm_req_lock); 975 for (i = 0; i < dr_req_list->n_req_max; i++) { 976 /* skip empty entries */ 977 if (dr_req_list->req[i].state == RCM_STATE_REMOVE) 978 continue; 979 980 if (dr_req_list->req[i].device[0] == '\0') 981 continue; 982 983 /* skip cascade operations */ 984 if (dr_req_list->req[i].seq_num & SEQ_NUM_MASK) 985 continue; 986 987 /* 988 * In the cleanup case, ignore entries with initiators alive 989 */ 990 if ((rcmd_get_state() == RCMD_CLEANUP) && 991 proc_exist(dr_req_list->req[i].pid)) 992 continue; 993 994 rcm_log_message(RCM_TRACE1, 995 "found stale entry: %s\n", dr_req_list->req[i].device); 996 997 tmp = s_malloc(sizeof (*tmp)); 998 tmp->rsrcname = s_strdup(dr_req_list->req[i].device); 999 tmp->state = dr_req_list->req[i].state; 1000 tmp->pid = dr_req_list->req[i].pid; 1001 tmp->seq_num = dr_req_list->req[i].seq_num; 1002 tmp->interval = dr_req_list->req[i].interval; 1003 tmp->next = list; 1004 list = tmp; 1005 } 1006 (void) mutex_unlock(&rcm_req_lock); 1007 1008 if (list == NULL) 1009 return; 1010 1011 /* 1012 * If everything worked normally, we shouldn't be here. 1013 * Since we are here, something went wrong, so say something. 1014 */ 1015 if (rcmd_get_state() == RCMD_INIT) { 1016 rcm_log_message(RCM_NOTICE, gettext("rcm_daemon died " 1017 "unexpectedly, recovering previous daemon state\n")); 1018 } else { 1019 rcm_log_message(RCM_INFO, gettext("one or more dr initiator " 1020 "died, attempting automatic recovery\n")); 1021 } 1022 1023 while (list) { 1024 tmp = list; 1025 list = tmp->next; 1026 1027 switch (tmp->state) { 1028 case RCM_STATE_OFFLINE_QUERY: 1029 case RCM_STATE_OFFLINE_QUERY_FAIL: 1030 rsrcnames[0] = tmp->rsrcname; 1031 if (proc_exist(tmp->pid)) { 1032 /* redo */ 1033 (void) process_resource_offline(rsrcnames, 1034 tmp->pid, RCM_QUERY, tmp->seq_num, NULL); 1035 } else { 1036 /* undo */ 1037 (void) notify_resource_online(rsrcnames, 1038 tmp->pid, 0, tmp->seq_num, NULL); 1039 } 1040 break; 1041 1042 case RCM_STATE_OFFLINE: 1043 case RCM_STATE_OFFLINE_FAIL: 1044 rsrcnames[0] = tmp->rsrcname; 1045 if (proc_exist(tmp->pid)) { 1046 /* redo */ 1047 (void) process_resource_offline(rsrcnames, 1048 tmp->pid, 0, tmp->seq_num, NULL); 1049 } else { 1050 /* undo */ 1051 (void) notify_resource_online(rsrcnames, 1052 tmp->pid, 0, tmp->seq_num, NULL); 1053 } 1054 break; 1055 1056 case RCM_STATE_SUSPEND_QUERY: 1057 case RCM_STATE_SUSPEND_QUERY_FAIL: 1058 rsrcnames[0] = tmp->rsrcname; 1059 if (proc_exist(tmp->pid)) { 1060 /* redo */ 1061 (void) process_resource_suspend(rsrcnames, 1062 tmp->pid, RCM_QUERY, tmp->seq_num, 1063 &tmp->interval, NULL); 1064 } else { 1065 /* undo */ 1066 (void) notify_resource_resume(rsrcnames, 1067 tmp->pid, 0, tmp->seq_num, NULL); 1068 } 1069 break; 1070 1071 case RCM_STATE_SUSPEND: 1072 case RCM_STATE_SUSPEND_FAIL: 1073 rsrcnames[0] = tmp->rsrcname; 1074 if (proc_exist(tmp->pid)) { 1075 /* redo */ 1076 (void) process_resource_suspend(rsrcnames, 1077 tmp->pid, 0, tmp->seq_num, &tmp->interval, 1078 NULL); 1079 } else { 1080 /* undo */ 1081 (void) notify_resource_resume(rsrcnames, 1082 tmp->pid, 0, tmp->seq_num, NULL); 1083 } 1084 break; 1085 1086 case RCM_STATE_OFFLINING: 1087 case RCM_STATE_ONLINING: 1088 rsrcnames[0] = tmp->rsrcname; 1089 (void) notify_resource_online(rsrcnames, tmp->pid, 0, 1090 tmp->seq_num, NULL); 1091 break; 1092 1093 case RCM_STATE_SUSPENDING: 1094 case RCM_STATE_RESUMING: 1095 rsrcnames[0] = tmp->rsrcname; 1096 (void) notify_resource_resume(rsrcnames, tmp->pid, 0, 1097 tmp->seq_num, NULL); 1098 break; 1099 1100 case RCM_STATE_REMOVING: 1101 rsrcnames[0] = tmp->rsrcname; 1102 (void) notify_resource_remove(rsrcnames, tmp->pid, 0, 1103 tmp->seq_num, NULL); 1104 break; 1105 1106 default: 1107 rcm_log_message(RCM_WARNING, 1108 gettext("%s in unknown state %d\n"), 1109 tmp->rsrcname, tmp->state); 1110 break; 1111 } 1112 free(tmp->rsrcname); 1113 free(tmp); 1114 } 1115 } 1116 1117 /* 1118 * Selected thread blocking based on event type 1119 */ 1120 barrier_t barrier; 1121 1122 /* 1123 * Change barrier state: 1124 * RCMD_INIT - daemon is intializing, only register allowed 1125 * RCMD_NORMAL - normal daemon processing 1126 * RCMD_CLEANUP - cleanup thread is waiting or running 1127 */ 1128 int 1129 rcmd_get_state() 1130 { 1131 return (barrier.state); 1132 } 1133 1134 void 1135 rcmd_set_state(int state) 1136 { 1137 /* 1138 * The state transition is as follows: 1139 * INIT --> NORMAL <---> CLEANUP 1140 * The implementation favors the cleanup thread 1141 */ 1142 1143 (void) mutex_lock(&barrier.lock); 1144 barrier.state = state; 1145 1146 switch (state) { 1147 case RCMD_CLEANUP: 1148 /* 1149 * Wait for existing threads to exit 1150 */ 1151 barrier.wanted++; 1152 while (barrier.thr_count != 0) 1153 (void) cond_wait(&barrier.cv, &barrier.lock); 1154 barrier.wanted--; 1155 barrier.thr_count = -1; 1156 break; 1157 1158 case RCMD_INIT: 1159 case RCMD_NORMAL: 1160 default: 1161 if (barrier.thr_count == -1) 1162 barrier.thr_count = 0; 1163 if (barrier.wanted) 1164 (void) cond_broadcast(&barrier.cv); 1165 break; 1166 } 1167 1168 (void) mutex_unlock(&barrier.lock); 1169 } 1170 1171 /* 1172 * Increment daemon thread count 1173 */ 1174 int 1175 rcmd_thr_incr(int cmd) 1176 { 1177 int seq_num; 1178 1179 (void) mutex_lock(&barrier.lock); 1180 /* 1181 * Set wanted flag 1182 */ 1183 barrier.wanted++; 1184 1185 /* 1186 * Wait till it is safe for daemon to perform the operation 1187 * 1188 * NOTE: if a module registers by passing a request to the 1189 * client proccess, we may need to allow register 1190 * to come through during daemon initialization. 1191 */ 1192 while (barrier.state != RCMD_NORMAL) 1193 (void) cond_wait(&barrier.cv, &barrier.lock); 1194 1195 if ((cmd == CMD_EVENT) || 1196 (cmd == CMD_REGISTER) || 1197 (cmd == CMD_UNREGISTER)) { 1198 /* 1199 * Event passthru and register ops don't need sequence number 1200 */ 1201 seq_num = -1; 1202 } else { 1203 /* 1204 * Non register operation gets a sequence number 1205 */ 1206 seq_num = get_seq_number(); 1207 } 1208 barrier.wanted--; 1209 barrier.thr_count++; 1210 (void) mutex_unlock(&barrier.lock); 1211 1212 if ((cmd == CMD_OFFLINE) || 1213 (cmd == CMD_SUSPEND) || 1214 (cmd == CMD_GETINFO)) { 1215 /* 1216 * For these operations, need to ask modules to 1217 * register any new resources that came online. 1218 * 1219 * This is because mount/umount are not instrumented 1220 * to register with rcm before using system resources. 1221 * Certain registration ops may fail during sync, which 1222 * indicates race conditions. This cannot be avoided 1223 * without changing mount/umount. 1224 */ 1225 rcmd_db_sync(); 1226 } 1227 1228 return (seq_num); 1229 } 1230 1231 /* 1232 * Decrement thread count 1233 */ 1234 void 1235 rcmd_thr_decr() 1236 { 1237 /* 1238 * Decrement thread count and wake up reload/cleanup thread. 1239 */ 1240 (void) mutex_lock(&barrier.lock); 1241 barrier.last_update = time(NULL); 1242 if (--barrier.thr_count == 0) 1243 (void) cond_broadcast(&barrier.cv); 1244 (void) mutex_unlock(&barrier.lock); 1245 } 1246 1247 /* 1248 * Wakeup all waiting threads as a result of SIGHUP 1249 */ 1250 static int sighup_received = 0; 1251 1252 void 1253 rcmd_thr_signal() 1254 { 1255 (void) mutex_lock(&barrier.lock); 1256 sighup_received = 1; 1257 (void) cond_broadcast(&barrier.cv); 1258 (void) mutex_unlock(&barrier.lock); 1259 } 1260 1261 void 1262 rcmd_start_timer(int timeout) 1263 { 1264 timestruc_t abstime; 1265 1266 if (timeout == 0) 1267 timeout = RCM_DAEMON_TIMEOUT; /* default to 5 minutes */ 1268 else 1269 dr_req_list->idle_timeout = timeout; /* persist timeout */ 1270 1271 if (timeout > 0) { 1272 abstime.tv_sec = time(NULL) + timeout; 1273 } 1274 1275 (void) mutex_lock(&barrier.lock); 1276 for (;;) { 1277 int idletime; 1278 int is_active; 1279 1280 if (timeout > 0) 1281 (void) cond_timedwait(&barrier.cv, &barrier.lock, 1282 &abstime); 1283 else 1284 (void) cond_wait(&barrier.cv, &barrier.lock); 1285 1286 /* 1287 * If sighup received, change timeout to 0 so the daemon is 1288 * shut down at the first possible moment 1289 */ 1290 if (sighup_received) 1291 timeout = 0; 1292 1293 /* 1294 * If timeout is negative, never shutdown the daemon 1295 */ 1296 if (timeout < 0) 1297 continue; 1298 1299 /* 1300 * Check for ongoing/pending activity 1301 */ 1302 is_active = (barrier.thr_count || barrier.wanted || 1303 (dr_req_list->n_req != 0)); 1304 if (is_active) { 1305 abstime.tv_sec = time(NULL) + timeout; 1306 continue; 1307 } 1308 1309 /* 1310 * If idletime is less than timeout, continue to wait 1311 */ 1312 idletime = time(NULL) - barrier.last_update; 1313 if (idletime < timeout) { 1314 abstime.tv_sec = barrier.last_update + timeout; 1315 continue; 1316 } 1317 break; 1318 } 1319 1320 (void) script_main_fini(); 1321 1322 rcm_log_message(RCM_INFO, gettext("rcm_daemon is shut down.\n")); 1323 } 1324 1325 /* 1326 * Code related to polling client pid's 1327 * Not declared as static so that we can find this structure easily 1328 * in the core file. 1329 */ 1330 struct { 1331 int n_pids; 1332 int n_max_pids; 1333 thread_t poll_tid; /* poll thread id */ 1334 int signaled; 1335 pid_t *pids; 1336 int *refcnt; 1337 struct pollfd *fds; 1338 cond_t cv; /* the associated lock is rcm_req_lock */ 1339 } polllist; 1340 1341 static int 1342 find_pid_index(pid_t pid) 1343 { 1344 int i; 1345 1346 for (i = 0; i < polllist.n_pids; i++) { 1347 if (polllist.pids[i] == pid) { 1348 return (i); 1349 } 1350 } 1351 return (-1); 1352 } 1353 1354 /* 1355 * Resize buffer for new pids 1356 */ 1357 static int 1358 get_pid_index() 1359 { 1360 const int n_chunk = 10; 1361 1362 int n_max; 1363 int index = polllist.n_pids; 1364 1365 if (polllist.n_pids < polllist.n_max_pids) { 1366 polllist.n_pids++; 1367 return (index); 1368 } 1369 1370 if (polllist.n_max_pids == 0) { 1371 n_max = n_chunk; 1372 polllist.pids = s_calloc(n_max, sizeof (pid_t)); 1373 polllist.refcnt = s_calloc(n_max, sizeof (int)); 1374 polllist.fds = s_calloc(n_max, sizeof (struct pollfd)); 1375 } else { 1376 n_max = polllist.n_max_pids + n_chunk; 1377 polllist.pids = s_realloc(polllist.pids, 1378 n_max * sizeof (pid_t)); 1379 polllist.refcnt = s_realloc(polllist.refcnt, 1380 n_max * sizeof (int)); 1381 polllist.fds = s_realloc(polllist.fds, 1382 n_max * sizeof (struct pollfd)); 1383 } 1384 polllist.n_max_pids = n_max; 1385 polllist.n_pids++; 1386 return (index); 1387 } 1388 1389 /* 1390 * rcm_req_lock must be held 1391 */ 1392 static void 1393 add_to_polling_list(pid_t pid) 1394 { 1395 int fd, index; 1396 char procfile[MAXPATHLEN]; 1397 1398 if (pid == (pid_t)0) 1399 return; 1400 1401 rcm_log_message(RCM_TRACE1, "add_to_polling_list(%ld)\n", pid); 1402 1403 /* 1404 * Need to stop the poll thread before manipulating the polllist 1405 * since poll thread may possibly be using polllist.fds[] and 1406 * polllist.n_pids. As an optimization, first check if the pid 1407 * is already in the polllist. If it is, there is no need to 1408 * stop the poll thread. Just increment the pid reference count 1409 * and return; 1410 */ 1411 index = find_pid_index(pid); 1412 if (index != -1) { 1413 polllist.refcnt[index]++; 1414 return; 1415 } 1416 1417 stop_polling_thread(); 1418 1419 /* 1420 * In an attempt to stop the poll thread we may have released 1421 * and reacquired rcm_req_lock. So find the index again. 1422 */ 1423 index = find_pid_index(pid); 1424 if (index != -1) { 1425 polllist.refcnt[index]++; 1426 goto done; 1427 } 1428 1429 /* 1430 * Open a /proc file 1431 */ 1432 (void) sprintf(procfile, "/proc/%ld/as", pid); 1433 if ((fd = open(procfile, O_RDONLY)) == -1) { 1434 rcm_log_message(RCM_NOTICE, gettext("open(%s): %s\n"), 1435 procfile, strerror(errno)); 1436 goto done; 1437 } 1438 1439 /* 1440 * add pid to polllist 1441 */ 1442 index = get_pid_index(); 1443 polllist.pids[index] = pid; 1444 polllist.refcnt[index] = 1; 1445 polllist.fds[index].fd = fd; 1446 polllist.fds[index].events = 0; 1447 polllist.fds[index].revents = 0; 1448 1449 rcm_log_message(RCM_DEBUG, "add pid %ld at index %ld\n", pid, index); 1450 1451 done: 1452 start_polling_thread(); 1453 } 1454 1455 /* 1456 * rcm_req_lock must be held 1457 */ 1458 static void 1459 remove_from_polling_list(pid_t pid) 1460 { 1461 int i, index; 1462 1463 if (pid == (pid_t)0) 1464 return; 1465 1466 rcm_log_message(RCM_TRACE1, "remove_from_polling_list(%ld)\n", pid); 1467 1468 /* 1469 * Need to stop the poll thread before manipulating the polllist 1470 * since poll thread may possibly be using polllist.fds[] and 1471 * polllist.n_pids. As an optimization, first check the pid 1472 * reference count. If the pid reference count is greater than 1 1473 * there is no need to stop the polling thread. 1474 */ 1475 1476 index = find_pid_index(pid); 1477 if (index == -1) { 1478 rcm_log_message(RCM_NOTICE, 1479 gettext("error removing pid %ld from polling list\n"), pid); 1480 return; 1481 } 1482 1483 /* 1484 * decrement the pid refcnt 1485 */ 1486 if (polllist.refcnt[index] > 1) { 1487 polllist.refcnt[index]--; 1488 return; 1489 } 1490 1491 stop_polling_thread(); 1492 1493 /* 1494 * In an attempt to stop the poll thread we may have released 1495 * and reacquired rcm_req_lock. So find the index again. 1496 */ 1497 index = find_pid_index(pid); 1498 if (index == -1) { 1499 rcm_log_message(RCM_NOTICE, 1500 gettext("error removing pid %ld from polling list\n"), pid); 1501 goto done; 1502 } 1503 1504 if (--polllist.refcnt[index] > 0) 1505 goto done; 1506 1507 /* 1508 * refcnt down to zero, delete pid from polling list 1509 */ 1510 (void) close(polllist.fds[index].fd); 1511 polllist.n_pids--; 1512 1513 for (i = index; i < polllist.n_pids; i++) { 1514 polllist.pids[i] = polllist.pids[i + 1]; 1515 polllist.refcnt[i] = polllist.refcnt[i + 1]; 1516 bcopy(&polllist.fds[i + 1], &polllist.fds[i], 1517 sizeof (struct pollfd)); 1518 } 1519 1520 rcm_log_message(RCM_DEBUG, "remove pid %ld at index %d\n", pid, index); 1521 1522 done: 1523 start_polling_thread(); 1524 } 1525 1526 void 1527 init_poll_thread() 1528 { 1529 polllist.poll_tid = (thread_t)-1; 1530 } 1531 1532 void 1533 cleanup_poll_thread() 1534 { 1535 (void) mutex_lock(&rcm_req_lock); 1536 if (polllist.poll_tid == thr_self()) { 1537 rcm_log_message(RCM_TRACE2, 1538 "cleanup_poll_thread: n_pids = %d\n", polllist.n_pids); 1539 polllist.poll_tid = (thread_t)-1; 1540 (void) cond_broadcast(&polllist.cv); 1541 } 1542 (void) mutex_unlock(&rcm_req_lock); 1543 } 1544 1545 /*ARGSUSED*/ 1546 static void * 1547 pollfunc(void *arg) 1548 { 1549 sigset_t mask; 1550 1551 rcm_log_message(RCM_TRACE2, "poll thread started. n_pids = %d\n", 1552 polllist.n_pids); 1553 1554 /* 1555 * Unblock SIGUSR1 to allow polling thread to be killed 1556 */ 1557 (void) sigemptyset(&mask); 1558 (void) sigaddset(&mask, SIGUSR1); 1559 (void) thr_sigsetmask(SIG_UNBLOCK, &mask, NULL); 1560 1561 (void) poll(polllist.fds, polllist.n_pids, (time_t)-1); 1562 1563 /* 1564 * block SIGUSR1 to avoid being killed while holding a lock 1565 */ 1566 (void) sigemptyset(&mask); 1567 (void) sigaddset(&mask, SIGUSR1); 1568 (void) thr_sigsetmask(SIG_BLOCK, &mask, NULL); 1569 1570 rcm_log_message(RCM_TRACE2, "returned from poll()\n"); 1571 1572 cleanup_poll_thread(); 1573 1574 (void) mutex_lock(&barrier.lock); 1575 need_cleanup = 1; 1576 (void) cond_broadcast(&barrier.cv); 1577 (void) mutex_unlock(&barrier.lock); 1578 1579 return (NULL); 1580 } 1581 1582 /* 1583 * rcm_req_lock must be held 1584 */ 1585 void 1586 start_polling_thread() 1587 { 1588 int err; 1589 1590 if (rcmd_get_state() != RCMD_NORMAL) 1591 return; 1592 1593 if (polllist.poll_tid != (thread_t)-1 || polllist.n_pids == 0) 1594 return; 1595 1596 if ((err = thr_create(NULL, 0, pollfunc, NULL, THR_DETACHED, 1597 &polllist.poll_tid)) == 0) 1598 polllist.signaled = 0; 1599 else 1600 rcm_log_message(RCM_ERROR, 1601 gettext("failed to create polling thread: %s\n"), 1602 strerror(err)); 1603 } 1604 1605 /* 1606 * rcm_req_lock must be held 1607 */ 1608 static void 1609 stop_polling_thread() 1610 { 1611 int err; 1612 1613 while (polllist.poll_tid != (thread_t)-1) { 1614 if (polllist.signaled == 0) { 1615 if ((err = thr_kill(polllist.poll_tid, SIGUSR1)) == 0) 1616 polllist.signaled = 1; 1617 else 1618 /* 1619 * thr_kill shouldn't have failed since the 1620 * poll thread id and the signal are valid. 1621 * So log an error. Since when thr_kill 1622 * fails no signal is sent (as per man page), 1623 * the cond_wait below will wait until the 1624 * the poll thread exits by some other means. 1625 * The poll thread, for example, exits on its 1626 * own when any DR initiator process that it 1627 * is currently polling exits. 1628 */ 1629 rcm_log_message(RCM_ERROR, 1630 gettext( 1631 "fail to kill polling thread %d: %s\n"), 1632 polllist.poll_tid, strerror(err)); 1633 } 1634 (void) cond_wait(&polllist.cv, &rcm_req_lock); 1635 } 1636 } 1637