1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 * 22 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include "rcm_impl.h" 29 #include "rcm_module.h" 30 31 /* 32 * Global locks 33 */ 34 mutex_t rcm_req_lock; /* protects global dr & info request list */ 35 36 /* 37 * Daemon state file 38 */ 39 static int state_fd; 40 #define RCM_STATE_FILE "/var/run/rcm_daemon_state" 41 #define N_REQ_CHUNK 10 /* grow 10 entries at a time */ 42 43 /* 44 * Daemon timeout value 45 */ 46 #define RCM_DAEMON_TIMEOUT 300 /* 5 minutes idle time */ 47 48 /* 49 * Struct for a list of outstanding rcm requests 50 */ 51 typedef struct { 52 int seq_num; /* sequence number of request */ 53 int state; /* current state */ 54 pid_t pid; /* pid of initiator */ 55 uint_t flag; /* request flags */ 56 int type; /* resource(device) type */ 57 timespec_t interval; /* suspend interval */ 58 char device[MAXPATHLEN]; /* name of device or resource */ 59 } req_t; 60 61 typedef struct { 62 int n_req; 63 int n_req_max; /* number of req_t's to follow */ 64 int n_seq_max; /* last sequence number */ 65 int idle_timeout; /* persist idle timeout value */ 66 req_t req[1]; 67 /* more req_t follows */ 68 } req_list_t; 69 70 static req_list_t *dr_req_list; 71 static req_list_t *info_req_list; 72 73 static const char *locked_info = "DR operation in progress"; 74 static const char *locked_err = "Resource is busy"; 75 76 static int rcmd_get_state(); 77 static void add_to_polling_list(pid_t); 78 static void remove_from_polling_list(pid_t); 79 80 void start_polling_thread(); 81 static void stop_polling_thread(); 82 83 /* 84 * Initialize request lists required for locking 85 */ 86 void 87 rcmd_lock_init(void) 88 { 89 int size; 90 struct stat fbuf; 91 92 /* 93 * Start info list with one slot, then grow on demand. 94 */ 95 info_req_list = s_calloc(1, sizeof (req_list_t)); 96 info_req_list->n_req_max = 1; 97 98 /* 99 * Open daemon state file and map in contents 100 */ 101 state_fd = open(RCM_STATE_FILE, O_CREAT|O_RDWR, 0600); 102 if (state_fd == -1) { 103 rcm_log_message(RCM_ERROR, gettext("cannot open %s: %s\n"), 104 RCM_STATE_FILE, strerror(errno)); 105 rcmd_exit(errno); 106 } 107 108 if (fstat(state_fd, &fbuf) != 0) { 109 rcm_log_message(RCM_ERROR, gettext("cannot stat %s: %s\n"), 110 RCM_STATE_FILE, strerror(errno)); 111 rcmd_exit(errno); 112 } 113 114 size = fbuf.st_size; 115 if (size == 0) { 116 size = sizeof (req_list_t); 117 if (ftruncate(state_fd, size) != 0) { 118 rcm_log_message(RCM_ERROR, 119 gettext("cannot truncate %s: %s\n"), 120 RCM_STATE_FILE, strerror(errno)); 121 rcmd_exit(errno); 122 } 123 } 124 125 /*LINTED*/ 126 dr_req_list = (req_list_t *)mmap(NULL, size, PROT_READ|PROT_WRITE, 127 MAP_SHARED, state_fd, 0); 128 if (dr_req_list == MAP_FAILED) { 129 rcm_log_message(RCM_ERROR, gettext("cannot mmap %s: %s\n"), 130 RCM_STATE_FILE, strerror(errno)); 131 rcmd_exit(errno); 132 } 133 134 /* 135 * Initial size is one entry 136 */ 137 if (dr_req_list->n_req_max == 0) { 138 dr_req_list->n_req_max = 1; 139 (void) fsync(state_fd); 140 return; 141 } 142 143 rcm_log_message(RCM_DEBUG, "n_req = %d, n_req_max = %d\n", 144 dr_req_list->n_req, dr_req_list->n_req_max); 145 146 /* 147 * Recover the daemon state 148 */ 149 clean_dr_list(); 150 } 151 152 /* 153 * Get a unique sequence number--to be called with rcm_req_lock held. 154 */ 155 static int 156 get_seq_number() 157 { 158 int number; 159 160 if (dr_req_list == NULL) 161 return (0); 162 163 dr_req_list->n_seq_max++; 164 number = (dr_req_list->n_seq_max << SEQ_NUM_SHIFT); 165 (void) fsync(state_fd); 166 167 return (number); 168 } 169 170 /* 171 * Find entry in list with the same resource name and sequence number. 172 * If seq_num == -1, no seq_num matching is required. 173 */ 174 static req_t * 175 find_req_entry(char *device, uint_t flag, int seq_num, req_list_t *list) 176 { 177 int i; 178 179 /* 180 * Look for entry with the same resource and seq_num. 181 * Also match RCM_FILESYS field in flag. 182 */ 183 for (i = 0; i < list->n_req_max; i++) { 184 if (list->req[i].state == RCM_STATE_REMOVE) 185 /* stale entry */ 186 continue; 187 /* 188 * We need to distiguish a file system root from the directory 189 * it is mounted on. 190 * 191 * Applications are not aware of any difference between the 192 * two, but the system keeps track of it internally by 193 * checking for mount points while traversing file path. 194 * In a similar spirit, RCM is keeping this difference as 195 * an implementation detail. 196 */ 197 if ((strcmp(device, list->req[i].device) != 0) || 198 (list->req[i].flag & RCM_FILESYS) != (flag & RCM_FILESYS)) 199 /* different resource */ 200 continue; 201 202 if ((seq_num != -1) && ((seq_num >> SEQ_NUM_SHIFT) != 203 (list->req[i].seq_num >> SEQ_NUM_SHIFT))) 204 /* different base seqnum */ 205 continue; 206 207 return (&list->req[i]); 208 } 209 210 return (NULL); 211 } 212 213 /* 214 * Get the next empty req_t entry. If no entry exists, grow the list. 215 */ 216 static req_t * 217 get_req_entry(req_list_t **listp) 218 { 219 int i; 220 int n_req = (*listp)->n_req; 221 int n_req_max = (*listp)->n_req_max; 222 223 /* 224 * If the list is full, grow the list and return the first 225 * entry in the new portion. 226 */ 227 if (n_req == n_req_max) { 228 int newsize; 229 230 n_req_max += N_REQ_CHUNK; 231 newsize = sizeof (req_list_t) + (n_req_max - 1) * 232 sizeof (req_t); 233 234 if (listp == &info_req_list) { 235 *listp = s_realloc(*listp, newsize); 236 } else if (ftruncate(state_fd, newsize) != 0) { 237 rcm_log_message(RCM_ERROR, 238 gettext("cannot truncate %s: %s\n"), 239 RCM_STATE_FILE, strerror(errno)); 240 rcmd_exit(errno); 241 /*LINTED*/ 242 } else if ((*listp = (req_list_t *)mmap(NULL, newsize, 243 PROT_READ|PROT_WRITE, MAP_SHARED, state_fd, 0)) == 244 MAP_FAILED) { 245 rcm_log_message(RCM_ERROR, 246 gettext("cannot mmap %s: %s\n"), 247 RCM_STATE_FILE, strerror(errno)); 248 rcmd_exit(errno); 249 } 250 251 /* Initialize the new entries */ 252 for (i = (*listp)->n_req_max; i < n_req_max; i++) { 253 (*listp)->req[i].state = RCM_STATE_REMOVE; 254 (void) strcpy((*listp)->req[i].device, ""); 255 } 256 257 (*listp)->n_req_max = n_req_max; 258 (*listp)->n_req++; 259 return (&(*listp)->req[n_req]); 260 } 261 262 /* 263 * List contains empty slots, find it. 264 */ 265 for (i = 0; i < n_req_max; i++) { 266 if (((*listp)->req[i].device[0] == '\0') || 267 ((*listp)->req[i].state == RCM_STATE_REMOVE)) { 268 break; 269 } 270 } 271 272 assert(i < n_req_max); /* empty slot must exist */ 273 274 (*listp)->n_req++; 275 return (&(*listp)->req[i]); 276 } 277 278 /* 279 * When one resource depends on multiple resources, it's possible that 280 * rcm_get_info can be called multiple times on the resource, resulting 281 * in duplicate information. By assigning a unique sequence number to 282 * each rcm_get_info operation, this duplication can be eliminated. 283 * 284 * Insert a dr entry in info_req_list 285 */ 286 int 287 info_req_add(char *rsrcname, uint_t flag, int seq_num) 288 { 289 int error = 0; 290 char *device; 291 req_t *req; 292 293 rcm_log_message(RCM_TRACE2, "info_req_add(%s, %d)\n", 294 rsrcname, seq_num); 295 296 device = resolve_name(rsrcname); 297 (void) mutex_lock(&rcm_req_lock); 298 299 /* 300 * Look for entry with the same resource and seq_num. 301 * If it exists, we return an error so that such 302 * information is not gathered more than once. 303 */ 304 if (find_req_entry(device, flag, seq_num, info_req_list) != NULL) { 305 rcm_log_message(RCM_DEBUG, "getinfo cycle: %s %d \n", 306 device, seq_num); 307 error = -1; 308 goto out; 309 } 310 311 /* 312 * Get empty entry and fill in seq_num and device. 313 */ 314 req = get_req_entry(&info_req_list); 315 req->seq_num = seq_num; 316 req->state = RCM_STATE_ONLINE; /* mark that the entry is in use */ 317 req->flag = flag; 318 (void) strcpy(req->device, device); 319 320 out: 321 (void) mutex_unlock(&rcm_req_lock); 322 free(device); 323 324 return (error); 325 } 326 327 /* 328 * Remove all entries associated with seq_num from info_req_list 329 */ 330 void 331 info_req_remove(int seq_num) 332 { 333 int i; 334 335 rcm_log_message(RCM_TRACE3, "info_req_remove(%d)\n", seq_num); 336 337 seq_num >>= SEQ_NUM_SHIFT; 338 (void) mutex_lock(&rcm_req_lock); 339 340 /* remove all entries with seq_num */ 341 for (i = 0; i < info_req_list->n_req_max; i++) { 342 if (info_req_list->req[i].state == RCM_STATE_REMOVE) 343 continue; 344 345 if ((info_req_list->req[i].seq_num >> SEQ_NUM_SHIFT) != seq_num) 346 continue; 347 348 info_req_list->req[i].state = RCM_STATE_REMOVE; 349 info_req_list->n_req--; 350 } 351 352 /* 353 * We don't shrink the info_req_list size for now. 354 */ 355 (void) mutex_unlock(&rcm_req_lock); 356 } 357 358 /* 359 * Checking lock conflicts. There is a conflict if: 360 * - attempt to DR a node when either its ancester or descendent 361 * is in the process of DR 362 * - attempt to register for a node when its ancester is locked for DR 363 */ 364 static int 365 check_lock(char *device, uint_t flag, int cflag, rcm_info_t **info) 366 { 367 int i, ret = RCM_SUCCESS; 368 369 if (info) 370 *info = NULL; 371 372 /* 373 * During daemon initialization, don't check locks 374 */ 375 if (dr_req_list == NULL) 376 return (ret); 377 378 for (i = 0; i < dr_req_list->n_req; i++) { 379 req_t *req = &dr_req_list->req[i]; 380 char *dr_dev = req->device; 381 382 /* 383 * Skip empty entries 384 */ 385 if ((req->state == RCM_STATE_REMOVE) || (dr_dev[0] == '\0')) 386 continue; 387 388 /* 389 * Make sure that none of the ancestors of dr_dev is 390 * being operated upon. 391 */ 392 if (EQUAL(device, dr_dev) || DESCENDENT(device, dr_dev)) { 393 /* 394 * An exception to this is the filesystem. 395 * We should allowed a filesystem rooted at a 396 * child directory to be unmounted. 397 */ 398 if ((flag & RCM_FILESYS) && (!EQUAL(device, dr_dev) || 399 ((dr_req_list->req[i].flag & RCM_FILESYS) == 0))) 400 continue; 401 402 assert(info != 0); 403 404 add_busy_rsrc_to_list(dr_dev, dr_req_list->req[i].pid, 405 dr_req_list->req[i].state, 406 dr_req_list->req[i].seq_num, NULL, locked_info, 407 locked_err, NULL, info); 408 ret = RCM_CONFLICT; 409 break; 410 } 411 412 if ((cflag == LOCK_FOR_DR) && DESCENDENT(dr_dev, device)) { 413 /* 414 * Check descendents only for DR request. 415 * 416 * Could have multiple descendents doing DR, 417 * we want to find them all. 418 */ 419 assert(info != 0); 420 421 add_busy_rsrc_to_list(dr_dev, dr_req_list->req[i].pid, 422 dr_req_list->req[i].state, 423 dr_req_list->req[i].seq_num, NULL, locked_info, 424 locked_err, NULL, info); 425 ret = RCM_CONFLICT; 426 /* don't break here, need to find all conflicts */ 427 } 428 } 429 430 return (ret); 431 } 432 433 /* 434 * Check for lock conflicts for DR operation or client registration 435 */ 436 int 437 rsrc_check_lock_conflicts(char *rsrcname, uint_t flag, int cflag, 438 rcm_info_t **info) 439 { 440 int result; 441 char *device; 442 443 device = resolve_name(rsrcname); 444 result = check_lock(device, flag, cflag, info); 445 free(device); 446 447 return (result); 448 } 449 450 static int 451 transition_state(int state) 452 { 453 /* 454 * If the resource state is in transition, ask caller to 455 * try again. 456 */ 457 switch (state) { 458 case RCM_STATE_OFFLINING: 459 case RCM_STATE_SUSPENDING: 460 case RCM_STATE_RESUMING: 461 case RCM_STATE_ONLINING: 462 case RCM_STATE_REMOVING: 463 464 return (1); 465 466 default: 467 /*FALLTHROUGH*/ 468 break; 469 } 470 return (0); 471 } 472 473 /* 474 * Update a dr entry in dr_req_list 475 */ 476 /*ARGSUSED*/ 477 static int 478 dr_req_update_entry(char *device, pid_t pid, uint_t flag, int state, 479 int seq_num, timespec_t *interval, rcm_info_t **infop) 480 { 481 req_t *req; 482 483 /* 484 * Find request entry. If not found, return RCM_FAILURE 485 */ 486 req = find_req_entry(device, flag, -1, dr_req_list); 487 488 if (req == NULL) { 489 switch (state) { 490 case RCM_STATE_OFFLINE_QUERYING: 491 case RCM_STATE_SUSPEND_QUERYING: 492 case RCM_STATE_OFFLINING: 493 case RCM_STATE_SUSPENDING: 494 /* could be re-do operation, no error message */ 495 break; 496 497 default: 498 rcm_log_message(RCM_DEBUG, 499 "update non-existing resource %s\n", device); 500 } 501 return (RCM_FAILURE); 502 } 503 504 /* 505 * During initialization, update is unconditional (forced) 506 * in order to bring the daemon up in a sane state. 507 */ 508 if (rcmd_get_state() == RCMD_INIT) 509 goto update; 510 511 /* 512 * Don't allow update with mismatched initiator pid. This could happen 513 * as part of normal operation. 514 */ 515 if (pid != req->pid) { 516 rcm_log_message(RCM_INFO, 517 gettext("mismatched dr initiator pid: %ld %ld\n"), 518 req->pid, pid); 519 goto failure; 520 } 521 522 rcm_log_message(RCM_TRACE4, 523 "dr_req_update_entry: state=%d, device=%s\n", 524 req->state, req->device); 525 526 /* 527 * Check that the state transition is valid 528 */ 529 switch (state) { 530 case RCM_STATE_OFFLINE_QUERYING: 531 case RCM_STATE_OFFLINING: 532 /* 533 * This is the case of re-offlining, which applies only 534 * if a previous attempt failed. 535 */ 536 if ((req->state != RCM_STATE_OFFLINE_FAIL) && 537 (req->state != RCM_STATE_OFFLINE_QUERYING) && 538 (req->state != RCM_STATE_OFFLINE_QUERY) && 539 (req->state != RCM_STATE_OFFLINE_QUERY_FAIL) && 540 (req->state != RCM_STATE_OFFLINE)) { 541 rcm_log_message(RCM_WARNING, 542 gettext("%s: invalid offlining from state %d\n"), 543 device, req->state); 544 goto failure; 545 } 546 break; 547 548 case RCM_STATE_SUSPEND_QUERYING: 549 case RCM_STATE_SUSPENDING: 550 /* 551 * This is the case of re-suspending, which applies only 552 * if a previous attempt failed. 553 */ 554 if ((req->state != RCM_STATE_SUSPEND_FAIL) && 555 (req->state != RCM_STATE_SUSPEND_QUERYING) && 556 (req->state != RCM_STATE_SUSPEND_QUERY) && 557 (req->state != RCM_STATE_SUSPEND_QUERY_FAIL) && 558 (req->state != RCM_STATE_SUSPEND)) { 559 rcm_log_message(RCM_WARNING, 560 gettext("%s: invalid suspending from state %d\n"), 561 device, req->state); 562 goto failure; 563 } 564 break; 565 566 case RCM_STATE_RESUMING: 567 if ((req->state != RCM_STATE_SUSPEND) && 568 (req->state != RCM_STATE_SUSPEND_QUERYING) && 569 (req->state != RCM_STATE_SUSPEND_QUERY) && 570 (req->state != RCM_STATE_SUSPEND_QUERY_FAIL) && 571 (req->state != RCM_STATE_SUSPEND_FAIL)) { 572 rcm_log_message(RCM_DEBUG, 573 "%s: invalid resuming from state %d\n", 574 device, req->state); 575 goto failure; 576 } 577 break; 578 579 case RCM_STATE_ONLINING: 580 if ((req->state != RCM_STATE_OFFLINE) && 581 (req->state != RCM_STATE_OFFLINE_QUERYING) && 582 (req->state != RCM_STATE_OFFLINE_QUERY) && 583 (req->state != RCM_STATE_OFFLINE_QUERY_FAIL) && 584 (req->state != RCM_STATE_OFFLINE_FAIL)) { 585 rcm_log_message(RCM_INFO, 586 gettext("%s: invalid onlining from state %d\n"), 587 device, req->state); 588 goto failure; 589 } 590 break; 591 592 case RCM_STATE_REMOVING: 593 if ((req->state != RCM_STATE_OFFLINE) && 594 (req->state != RCM_STATE_OFFLINE_FAIL)) { 595 rcm_log_message(RCM_INFO, 596 gettext("%s: invalid removing from state %d\n"), 597 device, req->state); 598 goto failure; 599 } 600 break; 601 602 case RCM_STATE_SUSPEND_FAIL: 603 assert(req->state == RCM_STATE_SUSPENDING); 604 break; 605 606 case RCM_STATE_OFFLINE_FAIL: 607 assert(req->state == RCM_STATE_OFFLINING); 608 break; 609 610 case RCM_STATE_SUSPEND: 611 assert(req->state == RCM_STATE_SUSPENDING); 612 break; 613 614 case RCM_STATE_OFFLINE: 615 assert(req->state == RCM_STATE_OFFLINING); 616 break; 617 618 case RCM_STATE_ONLINE: 619 assert((req->state == RCM_STATE_RESUMING) || 620 (req->state == RCM_STATE_ONLINING)); 621 break; 622 623 default: /* shouldn't be here */ 624 rcm_log_message(RCM_ERROR, 625 gettext("invalid update to dr state: %d\n"), state); 626 return (RCM_FAILURE); 627 } 628 629 update: 630 /* 631 * update the state, interval, and sequence number; sync state file 632 */ 633 req->state = state; 634 req->seq_num = seq_num; 635 636 if (interval) 637 req->interval = *interval; 638 else 639 bzero(&req->interval, sizeof (timespec_t)); 640 641 (void) fsync(state_fd); 642 return (RCM_SUCCESS); 643 644 failure: 645 if (infop != NULL) { 646 add_busy_rsrc_to_list(req->device, req->pid, req->state, 647 req->seq_num, NULL, locked_info, locked_err, NULL, infop); 648 } 649 650 /* 651 * A request may be left in a transition state because the operator 652 * typed ctrl-C. In this case, the daemon thread continues to run 653 * and will eventually put the state in a non-transitional state. 654 * 655 * To be safe, we return EAGAIN to allow librcm to loop and retry. 656 * If we are called from a module, loop & retry could result in a 657 * deadlock. The called will check for this case and turn EAGAIN 658 * into RCM_CONFLICT. 659 */ 660 if (transition_state(req->state)) { 661 return (EAGAIN); 662 } 663 664 return (RCM_CONFLICT); 665 } 666 667 /* 668 * Insert a dr entry in dr_req_list 669 */ 670 int 671 dr_req_add(char *rsrcname, pid_t pid, uint_t flag, int state, int seq_num, 672 timespec_t *interval, rcm_info_t **info) 673 { 674 int error; 675 char *device; 676 req_t *req; 677 678 rcm_log_message(RCM_TRACE3, "dr_req_add(%s, %ld, 0x%x, %d, %d, %p)\n", 679 rsrcname, pid, flag, state, seq_num, (void *)info); 680 681 device = resolve_name(rsrcname); 682 if (device == NULL) 683 return (EINVAL); 684 685 (void) mutex_lock(&rcm_req_lock); 686 687 /* 688 * In the re-offline/suspend case, attempt to update dr request. 689 * 690 * If this succeeds, return success; 691 * If this fails because of a conflict, return error; 692 * If this this fails because no entry exists, add a new entry. 693 */ 694 error = dr_req_update_entry(device, pid, flag, state, seq_num, interval, 695 info); 696 697 switch (error) { 698 case RCM_FAILURE: 699 /* proceed to add a new entry */ 700 break; 701 702 case RCM_CONFLICT: 703 case RCM_SUCCESS: 704 case EAGAIN: 705 default: 706 goto out; 707 } 708 709 /* 710 * Check for lock conflicts 711 */ 712 error = check_lock(device, flag, LOCK_FOR_DR, info); 713 if (error != RCM_SUCCESS) { 714 error = RCM_CONFLICT; 715 goto out; 716 } 717 718 /* 719 * Get empty request entry, fill in values and sync state file 720 */ 721 req = get_req_entry(&dr_req_list); 722 723 req->seq_num = seq_num; 724 req->pid = pid; 725 req->flag = flag; 726 req->state = state; 727 req->type = rsrc_get_type(device); 728 (void) strcpy(req->device, device); 729 730 /* cache interval for failure recovery */ 731 if (interval) 732 req->interval = *interval; 733 else 734 bzero(&req->interval, sizeof (timespec_t)); 735 736 (void) fsync(state_fd); 737 738 /* 739 * Add initiator pid to polling list 740 */ 741 add_to_polling_list(req->pid); 742 743 out: 744 (void) mutex_unlock(&rcm_req_lock); 745 free(device); 746 747 return (error); 748 } 749 750 /* 751 * Update a dr entry in dr_req_list 752 */ 753 /*ARGSUSED*/ 754 int 755 dr_req_update(char *rsrcname, pid_t pid, uint_t flag, int state, int seq_num, 756 rcm_info_t **info) 757 { 758 int error; 759 char *device = resolve_name(rsrcname); 760 761 rcm_log_message(RCM_TRACE3, "dr_req_update(%s, %ld, 0x%x, %d, %d)\n", 762 rsrcname, pid, flag, state, seq_num); 763 764 (void) mutex_lock(&rcm_req_lock); 765 error = dr_req_update_entry(device, pid, flag, state, seq_num, NULL, 766 info); 767 (void) mutex_unlock(&rcm_req_lock); 768 free(device); 769 770 return (error); 771 } 772 773 /* 774 * This function scans the DR request list for the next, non-removed 775 * entry that is part of the specified sequence. The 'device' name 776 * of the entry is copied into the provided 'rsrc' buffer. 777 * 778 * The 'rsrc' buffer is required because the DR request list is only 779 * locked during the duration of this lookup. Giving a direct pointer 780 * to something in the list would be unsafe. 781 */ 782 int 783 dr_req_lookup(int seq_num, char *rsrc) 784 { 785 int i; 786 int len; 787 int base = (seq_num >> SEQ_NUM_SHIFT); 788 int retval = RCM_FAILURE; 789 790 if (rsrc == NULL) { 791 return (RCM_FAILURE); 792 } 793 794 (void) mutex_lock(&rcm_req_lock); 795 796 for (i = 0; i < dr_req_list->n_req_max; i++) { 797 798 /* Skip removed or non-matching entries */ 799 if ((dr_req_list->req[i].state == RCM_STATE_REMOVE) || 800 ((dr_req_list->req[i].seq_num >> SEQ_NUM_SHIFT) != base)) { 801 continue; 802 } 803 804 /* Copy the next-matching 'device' name into 'rsrc' */ 805 len = strlcpy(rsrc, dr_req_list->req[i].device, MAXPATHLEN); 806 if (len < MAXPATHLEN) { 807 retval = RCM_SUCCESS; 808 } 809 break; 810 } 811 812 (void) mutex_unlock(&rcm_req_lock); 813 814 return (retval); 815 } 816 817 /* 818 * Remove a dr entry in dr_req_list 819 */ 820 void 821 dr_req_remove(char *rsrcname, uint_t flag) 822 { 823 req_t *req; 824 char *device = resolve_name(rsrcname); 825 826 rcm_log_message(RCM_TRACE3, "dr_req_remove(%s)\n", rsrcname); 827 828 (void) mutex_lock(&rcm_req_lock); 829 830 /* find entry */ 831 req = find_req_entry(device, flag, -1, dr_req_list); 832 free(device); 833 834 if (req == NULL) { 835 (void) mutex_unlock(&rcm_req_lock); 836 rcm_log_message(RCM_WARNING, 837 gettext("dr_req entry %s not found\n"), rsrcname); 838 return; 839 } 840 841 req->state = RCM_STATE_REMOVE; 842 dr_req_list->n_req--; 843 (void) fsync(state_fd); 844 845 /* 846 * remove pid from polling list 847 */ 848 remove_from_polling_list(req->pid); 849 850 /* 851 * We don't shrink the dr_req_list size for now. 852 * Shouldn't cause big memory leaks. 853 */ 854 (void) mutex_unlock(&rcm_req_lock); 855 } 856 857 /* 858 * Return the list of ongoing dr operation requests 859 */ 860 rcm_info_t * 861 rsrc_dr_info() 862 { 863 int i; 864 rcm_info_t *info; 865 rcm_info_t *result = NULL; 866 char *rsrc; 867 int len; 868 869 rcm_log_message(RCM_TRACE2, "rsrc_dr_info()\n"); 870 871 (void) mutex_lock(&rcm_req_lock); 872 for (i = 0; i < dr_req_list->n_req_max; i++) { 873 if (dr_req_list->req[i].state == RCM_STATE_REMOVE) 874 continue; 875 876 if (dr_req_list->req[i].device[0] == '\0') 877 continue; 878 879 if (dr_req_list->req[i].flag & RCM_FILESYS) { 880 len = strlen(dr_req_list->req[i].device) + 5; 881 rsrc = s_malloc(len); 882 (void) snprintf(rsrc, len, "%s(fs)", 883 dr_req_list->req[i].device); 884 } else { 885 rsrc = s_strdup(dr_req_list->req[i].device); 886 } 887 888 info = s_calloc(1, sizeof (*info)); 889 if (errno = nvlist_alloc(&(info->info), NV_UNIQUE_NAME, 0)) { 890 rcm_log_message(RCM_ERROR, 891 gettext("failed (nvlist_alloc=%s).\n"), 892 strerror(errno)); 893 rcmd_exit(errno); 894 } 895 896 if (errno = nvlist_add_string(info->info, RCM_RSRCNAME, rsrc)) { 897 rcm_log_message(RCM_ERROR, 898 gettext("failed (nvlist_add=%s).\n"), 899 strerror(errno)); 900 rcmd_exit(errno); 901 } 902 (void) free(rsrc); 903 904 if (errno = nvlist_add_int64(info->info, RCM_CLIENT_ID, 905 dr_req_list->req[i].pid)) { 906 rcm_log_message(RCM_ERROR, 907 gettext("failed (nvlist_add=%s).\n"), 908 strerror(errno)); 909 rcmd_exit(errno); 910 } 911 912 if (errno = nvlist_add_int32(info->info, RCM_SEQ_NUM, 913 dr_req_list->req[i].seq_num)) { 914 rcm_log_message(RCM_ERROR, 915 gettext("failed (nvlist_add=%s).\n"), 916 strerror(errno)); 917 rcmd_exit(errno); 918 } 919 920 if (errno = nvlist_add_int32(info->info, RCM_RSRCSTATE, 921 dr_req_list->req[i].state)) { 922 rcm_log_message(RCM_ERROR, 923 gettext("failed (nvlist_add=%s).\n"), 924 strerror(errno)); 925 rcmd_exit(errno); 926 } 927 928 if (errno = nvlist_add_string(info->info, RCM_CLIENT_INFO, 929 (char *)locked_info)) { 930 rcm_log_message(RCM_ERROR, 931 gettext("failed (nvlist_add=%s).\n"), 932 strerror(errno)); 933 rcmd_exit(errno); 934 } 935 936 info->next = result; 937 result = info; 938 } 939 (void) mutex_unlock(&rcm_req_lock); 940 941 return (result); 942 } 943 944 /* 945 * Eliminate entries whose dr initiator is no longer running 946 * and recover daemon state during daemon restart. 947 * 948 * This routine is called from either during daemon initialization 949 * after all modules have registered resources or from the cleanup 950 * thread. In either case, it is the only thread running in the 951 * daemon. 952 */ 953 void 954 clean_dr_list() 955 { 956 int i; 957 struct clean_list { 958 struct clean_list *next; 959 char *rsrcname; 960 pid_t pid; 961 int seq_num; 962 int state; 963 timespec_t interval; 964 } *tmp, *list = NULL; 965 char *rsrcnames[2]; 966 967 rcm_log_message(RCM_TRACE3, 968 "clean_dr_list(): look for stale dr initiators\n"); 969 970 rsrcnames[1] = NULL; 971 972 /* 973 * Make a list of entries to recover. This is necessary because 974 * the recovery operation will modify dr_req_list. 975 */ 976 (void) mutex_lock(&rcm_req_lock); 977 for (i = 0; i < dr_req_list->n_req_max; i++) { 978 /* skip empty entries */ 979 if (dr_req_list->req[i].state == RCM_STATE_REMOVE) 980 continue; 981 982 if (dr_req_list->req[i].device[0] == '\0') 983 continue; 984 985 /* skip cascade operations */ 986 if (dr_req_list->req[i].seq_num & SEQ_NUM_MASK) 987 continue; 988 989 /* 990 * In the cleanup case, ignore entries with initiators alive 991 */ 992 if ((rcmd_get_state() == RCMD_CLEANUP) && 993 proc_exist(dr_req_list->req[i].pid)) 994 continue; 995 996 rcm_log_message(RCM_TRACE1, 997 "found stale entry: %s\n", dr_req_list->req[i].device); 998 999 tmp = s_malloc(sizeof (*tmp)); 1000 tmp->rsrcname = s_strdup(dr_req_list->req[i].device); 1001 tmp->state = dr_req_list->req[i].state; 1002 tmp->pid = dr_req_list->req[i].pid; 1003 tmp->seq_num = dr_req_list->req[i].seq_num; 1004 tmp->interval = dr_req_list->req[i].interval; 1005 tmp->next = list; 1006 list = tmp; 1007 } 1008 (void) mutex_unlock(&rcm_req_lock); 1009 1010 if (list == NULL) 1011 return; 1012 1013 /* 1014 * If everything worked normally, we shouldn't be here. 1015 * Since we are here, something went wrong, so say something. 1016 */ 1017 if (rcmd_get_state() == RCMD_INIT) { 1018 rcm_log_message(RCM_NOTICE, gettext("rcm_daemon died " 1019 "unexpectedly, recovering previous daemon state\n")); 1020 } else { 1021 rcm_log_message(RCM_INFO, gettext("one or more dr initiator " 1022 "died, attempting automatic recovery\n")); 1023 } 1024 1025 while (list) { 1026 tmp = list; 1027 list = tmp->next; 1028 1029 switch (tmp->state) { 1030 case RCM_STATE_OFFLINE_QUERY: 1031 case RCM_STATE_OFFLINE_QUERY_FAIL: 1032 rsrcnames[0] = tmp->rsrcname; 1033 if (proc_exist(tmp->pid)) { 1034 /* redo */ 1035 (void) process_resource_offline(rsrcnames, 1036 tmp->pid, RCM_QUERY, tmp->seq_num, NULL); 1037 } else { 1038 /* undo */ 1039 (void) notify_resource_online(rsrcnames, 1040 tmp->pid, 0, tmp->seq_num, NULL); 1041 } 1042 break; 1043 1044 case RCM_STATE_OFFLINE: 1045 case RCM_STATE_OFFLINE_FAIL: 1046 rsrcnames[0] = tmp->rsrcname; 1047 if (proc_exist(tmp->pid)) { 1048 /* redo */ 1049 (void) process_resource_offline(rsrcnames, 1050 tmp->pid, 0, tmp->seq_num, NULL); 1051 } else { 1052 /* undo */ 1053 (void) notify_resource_online(rsrcnames, 1054 tmp->pid, 0, tmp->seq_num, NULL); 1055 } 1056 break; 1057 1058 case RCM_STATE_SUSPEND_QUERY: 1059 case RCM_STATE_SUSPEND_QUERY_FAIL: 1060 rsrcnames[0] = tmp->rsrcname; 1061 if (proc_exist(tmp->pid)) { 1062 /* redo */ 1063 (void) process_resource_suspend(rsrcnames, 1064 tmp->pid, RCM_QUERY, tmp->seq_num, 1065 &tmp->interval, NULL); 1066 } else { 1067 /* undo */ 1068 (void) notify_resource_resume(rsrcnames, 1069 tmp->pid, 0, tmp->seq_num, NULL); 1070 } 1071 break; 1072 1073 case RCM_STATE_SUSPEND: 1074 case RCM_STATE_SUSPEND_FAIL: 1075 rsrcnames[0] = tmp->rsrcname; 1076 if (proc_exist(tmp->pid)) { 1077 /* redo */ 1078 (void) process_resource_suspend(rsrcnames, 1079 tmp->pid, 0, tmp->seq_num, &tmp->interval, 1080 NULL); 1081 } else { 1082 /* undo */ 1083 (void) notify_resource_resume(rsrcnames, 1084 tmp->pid, 0, tmp->seq_num, NULL); 1085 } 1086 break; 1087 1088 case RCM_STATE_OFFLINING: 1089 case RCM_STATE_ONLINING: 1090 rsrcnames[0] = tmp->rsrcname; 1091 (void) notify_resource_online(rsrcnames, tmp->pid, 0, 1092 tmp->seq_num, NULL); 1093 break; 1094 1095 case RCM_STATE_SUSPENDING: 1096 case RCM_STATE_RESUMING: 1097 rsrcnames[0] = tmp->rsrcname; 1098 (void) notify_resource_resume(rsrcnames, tmp->pid, 0, 1099 tmp->seq_num, NULL); 1100 break; 1101 1102 case RCM_STATE_REMOVING: 1103 rsrcnames[0] = tmp->rsrcname; 1104 (void) notify_resource_remove(rsrcnames, tmp->pid, 0, 1105 tmp->seq_num, NULL); 1106 break; 1107 1108 default: 1109 rcm_log_message(RCM_WARNING, 1110 gettext("%s in unknown state %d\n"), 1111 tmp->rsrcname, tmp->state); 1112 break; 1113 } 1114 free(tmp->rsrcname); 1115 free(tmp); 1116 } 1117 } 1118 1119 /* 1120 * Selected thread blocking based on event type 1121 */ 1122 barrier_t barrier; 1123 1124 /* 1125 * Change barrier state: 1126 * RCMD_INIT - daemon is intializing, only register allowed 1127 * RCMD_NORMAL - normal daemon processing 1128 * RCMD_CLEANUP - cleanup thread is waiting or running 1129 */ 1130 int 1131 rcmd_get_state() 1132 { 1133 return (barrier.state); 1134 } 1135 1136 void 1137 rcmd_set_state(int state) 1138 { 1139 /* 1140 * The state transition is as follows: 1141 * INIT --> NORMAL <---> CLEANUP 1142 * The implementation favors the cleanup thread 1143 */ 1144 1145 (void) mutex_lock(&barrier.lock); 1146 barrier.state = state; 1147 1148 switch (state) { 1149 case RCMD_CLEANUP: 1150 /* 1151 * Wait for existing threads to exit 1152 */ 1153 barrier.wanted++; 1154 while (barrier.thr_count != 0) 1155 (void) cond_wait(&barrier.cv, &barrier.lock); 1156 barrier.wanted--; 1157 barrier.thr_count = -1; 1158 break; 1159 1160 case RCMD_INIT: 1161 case RCMD_NORMAL: 1162 default: 1163 if (barrier.thr_count == -1) 1164 barrier.thr_count = 0; 1165 if (barrier.wanted) 1166 (void) cond_broadcast(&barrier.cv); 1167 break; 1168 } 1169 1170 (void) mutex_unlock(&barrier.lock); 1171 } 1172 1173 /* 1174 * Increment daemon thread count 1175 */ 1176 int 1177 rcmd_thr_incr(int cmd) 1178 { 1179 int seq_num; 1180 1181 (void) mutex_lock(&barrier.lock); 1182 /* 1183 * Set wanted flag 1184 */ 1185 barrier.wanted++; 1186 1187 /* 1188 * Wait till it is safe for daemon to perform the operation 1189 * 1190 * NOTE: if a module registers by passing a request to the 1191 * client proccess, we may need to allow register 1192 * to come through during daemon initialization. 1193 */ 1194 while (barrier.state != RCMD_NORMAL) 1195 (void) cond_wait(&barrier.cv, &barrier.lock); 1196 1197 if ((cmd == CMD_EVENT) || 1198 (cmd == CMD_REGISTER) || 1199 (cmd == CMD_UNREGISTER)) { 1200 /* 1201 * Event passthru and register ops don't need sequence number 1202 */ 1203 seq_num = -1; 1204 } else { 1205 /* 1206 * Non register operation gets a sequence number 1207 */ 1208 seq_num = get_seq_number(); 1209 } 1210 barrier.wanted--; 1211 barrier.thr_count++; 1212 (void) mutex_unlock(&barrier.lock); 1213 1214 if ((cmd == CMD_OFFLINE) || 1215 (cmd == CMD_SUSPEND) || 1216 (cmd == CMD_GETINFO)) { 1217 /* 1218 * For these operations, need to ask modules to 1219 * register any new resources that came online. 1220 * 1221 * This is because mount/umount are not instrumented 1222 * to register with rcm before using system resources. 1223 * Certain registration ops may fail during sync, which 1224 * indicates race conditions. This cannot be avoided 1225 * without changing mount/umount. 1226 */ 1227 rcmd_db_sync(); 1228 } 1229 1230 return (seq_num); 1231 } 1232 1233 /* 1234 * Decrement thread count 1235 */ 1236 void 1237 rcmd_thr_decr() 1238 { 1239 /* 1240 * Decrement thread count and wake up reload/cleanup thread. 1241 */ 1242 (void) mutex_lock(&barrier.lock); 1243 barrier.last_update = time(NULL); 1244 if (--barrier.thr_count == 0) 1245 (void) cond_broadcast(&barrier.cv); 1246 (void) mutex_unlock(&barrier.lock); 1247 } 1248 1249 /* 1250 * Wakeup all waiting threads as a result of SIGHUP 1251 */ 1252 static int sighup_received = 0; 1253 1254 void 1255 rcmd_thr_signal() 1256 { 1257 (void) mutex_lock(&barrier.lock); 1258 sighup_received = 1; 1259 (void) cond_broadcast(&barrier.cv); 1260 (void) mutex_unlock(&barrier.lock); 1261 } 1262 1263 void 1264 rcmd_start_timer(int timeout) 1265 { 1266 timestruc_t abstime; 1267 1268 if (timeout == 0) 1269 timeout = RCM_DAEMON_TIMEOUT; /* default to 5 minutes */ 1270 else 1271 dr_req_list->idle_timeout = timeout; /* persist timeout */ 1272 1273 if (timeout > 0) { 1274 abstime.tv_sec = time(NULL) + timeout; 1275 } 1276 1277 (void) mutex_lock(&barrier.lock); 1278 for (;;) { 1279 int idletime; 1280 int is_active; 1281 1282 if (timeout > 0) 1283 (void) cond_timedwait(&barrier.cv, &barrier.lock, 1284 &abstime); 1285 else 1286 (void) cond_wait(&barrier.cv, &barrier.lock); 1287 1288 /* 1289 * If sighup received, change timeout to 0 so the daemon is 1290 * shut down at the first possible moment 1291 */ 1292 if (sighup_received) 1293 timeout = 0; 1294 1295 /* 1296 * If timeout is negative, never shutdown the daemon 1297 */ 1298 if (timeout < 0) 1299 continue; 1300 1301 /* 1302 * Check for ongoing/pending activity 1303 */ 1304 is_active = (barrier.thr_count || barrier.wanted || 1305 (dr_req_list->n_req != 0)); 1306 if (is_active) { 1307 abstime.tv_sec = time(NULL) + timeout; 1308 continue; 1309 } 1310 1311 /* 1312 * If idletime is less than timeout, continue to wait 1313 */ 1314 idletime = time(NULL) - barrier.last_update; 1315 if (idletime < timeout) { 1316 abstime.tv_sec = barrier.last_update + timeout; 1317 continue; 1318 } 1319 break; 1320 } 1321 1322 (void) script_main_fini(); 1323 1324 rcm_log_message(RCM_INFO, gettext("rcm_daemon is shut down.\n")); 1325 rcmd_exit(0); 1326 /*NOTREACHED*/ 1327 } 1328 1329 /* 1330 * Code related to polling client pid's 1331 * Not declared as static so that we can find this structure easily 1332 * in the core file. 1333 */ 1334 struct { 1335 int n_pids; 1336 int n_max_pids; 1337 thread_t poll_tid; /* poll thread id */ 1338 int signaled; 1339 pid_t *pids; 1340 int *refcnt; 1341 struct pollfd *fds; 1342 cond_t cv; /* the associated lock is rcm_req_lock */ 1343 } polllist; 1344 1345 static int 1346 find_pid_index(pid_t pid) 1347 { 1348 int i; 1349 1350 for (i = 0; i < polllist.n_pids; i++) { 1351 if (polllist.pids[i] == pid) { 1352 return (i); 1353 } 1354 } 1355 return (-1); 1356 } 1357 1358 /* 1359 * Resize buffer for new pids 1360 */ 1361 static int 1362 get_pid_index() 1363 { 1364 const int n_chunk = 10; 1365 1366 int n_max; 1367 int index = polllist.n_pids; 1368 1369 if (polllist.n_pids < polllist.n_max_pids) { 1370 polllist.n_pids++; 1371 return (index); 1372 } 1373 1374 if (polllist.n_max_pids == 0) { 1375 n_max = n_chunk; 1376 polllist.pids = s_calloc(n_max, sizeof (pid_t)); 1377 polllist.refcnt = s_calloc(n_max, sizeof (int)); 1378 polllist.fds = s_calloc(n_max, sizeof (struct pollfd)); 1379 } else { 1380 n_max = polllist.n_max_pids + n_chunk; 1381 polllist.pids = s_realloc(polllist.pids, 1382 n_max * sizeof (pid_t)); 1383 polllist.refcnt = s_realloc(polllist.refcnt, 1384 n_max * sizeof (int)); 1385 polllist.fds = s_realloc(polllist.fds, 1386 n_max * sizeof (struct pollfd)); 1387 } 1388 polllist.n_max_pids = n_max; 1389 polllist.n_pids++; 1390 return (index); 1391 } 1392 1393 /* 1394 * rcm_req_lock must be held 1395 */ 1396 static void 1397 add_to_polling_list(pid_t pid) 1398 { 1399 int fd, index; 1400 char procfile[MAXPATHLEN]; 1401 1402 if (pid == (pid_t)0) 1403 return; 1404 1405 rcm_log_message(RCM_TRACE1, "add_to_polling_list(%ld)\n", pid); 1406 1407 /* 1408 * Need to stop the poll thread before manipulating the polllist 1409 * since poll thread may possibly be using polllist.fds[] and 1410 * polllist.n_pids. As an optimization, first check if the pid 1411 * is already in the polllist. If it is, there is no need to 1412 * stop the poll thread. Just increment the pid reference count 1413 * and return; 1414 */ 1415 index = find_pid_index(pid); 1416 if (index != -1) { 1417 polllist.refcnt[index]++; 1418 return; 1419 } 1420 1421 stop_polling_thread(); 1422 1423 /* 1424 * In an attempt to stop the poll thread we may have released 1425 * and reacquired rcm_req_lock. So find the index again. 1426 */ 1427 index = find_pid_index(pid); 1428 if (index != -1) { 1429 polllist.refcnt[index]++; 1430 goto done; 1431 } 1432 1433 /* 1434 * Open a /proc file 1435 */ 1436 (void) sprintf(procfile, "/proc/%ld/as", pid); 1437 if ((fd = open(procfile, O_RDONLY)) == -1) { 1438 rcm_log_message(RCM_NOTICE, gettext("open(%s): %s\n"), 1439 procfile, strerror(errno)); 1440 goto done; 1441 } 1442 1443 /* 1444 * add pid to polllist 1445 */ 1446 index = get_pid_index(); 1447 polllist.pids[index] = pid; 1448 polllist.refcnt[index] = 1; 1449 polllist.fds[index].fd = fd; 1450 polllist.fds[index].events = 0; 1451 polllist.fds[index].revents = 0; 1452 1453 rcm_log_message(RCM_DEBUG, "add pid %ld at index %ld\n", pid, index); 1454 1455 done: 1456 start_polling_thread(); 1457 } 1458 1459 /* 1460 * rcm_req_lock must be held 1461 */ 1462 static void 1463 remove_from_polling_list(pid_t pid) 1464 { 1465 int i, index; 1466 1467 if (pid == (pid_t)0) 1468 return; 1469 1470 rcm_log_message(RCM_TRACE1, "remove_from_polling_list(%ld)\n", pid); 1471 1472 /* 1473 * Need to stop the poll thread before manipulating the polllist 1474 * since poll thread may possibly be using polllist.fds[] and 1475 * polllist.n_pids. As an optimization, first check the pid 1476 * reference count. If the pid reference count is greater than 1 1477 * there is no need to stop the polling thread. 1478 */ 1479 1480 index = find_pid_index(pid); 1481 if (index == -1) { 1482 rcm_log_message(RCM_NOTICE, 1483 gettext("error removing pid %ld from polling list\n"), pid); 1484 return; 1485 } 1486 1487 /* 1488 * decrement the pid refcnt 1489 */ 1490 if (polllist.refcnt[index] > 1) { 1491 polllist.refcnt[index]--; 1492 return; 1493 } 1494 1495 stop_polling_thread(); 1496 1497 /* 1498 * In an attempt to stop the poll thread we may have released 1499 * and reacquired rcm_req_lock. So find the index again. 1500 */ 1501 index = find_pid_index(pid); 1502 if (index == -1) { 1503 rcm_log_message(RCM_NOTICE, 1504 gettext("error removing pid %ld from polling list\n"), pid); 1505 goto done; 1506 } 1507 1508 if (--polllist.refcnt[index] > 0) 1509 goto done; 1510 1511 /* 1512 * refcnt down to zero, delete pid from polling list 1513 */ 1514 (void) close(polllist.fds[index].fd); 1515 polllist.n_pids--; 1516 1517 for (i = index; i < polllist.n_pids; i++) { 1518 polllist.pids[i] = polllist.pids[i + 1]; 1519 polllist.refcnt[i] = polllist.refcnt[i + 1]; 1520 bcopy(&polllist.fds[i + 1], &polllist.fds[i], 1521 sizeof (struct pollfd)); 1522 } 1523 1524 rcm_log_message(RCM_DEBUG, "remove pid %ld at index %d\n", pid, index); 1525 1526 done: 1527 start_polling_thread(); 1528 } 1529 1530 void 1531 init_poll_thread() 1532 { 1533 polllist.poll_tid = (thread_t)-1; 1534 } 1535 1536 void 1537 cleanup_poll_thread() 1538 { 1539 (void) mutex_lock(&rcm_req_lock); 1540 if (polllist.poll_tid == thr_self()) { 1541 rcm_log_message(RCM_TRACE2, 1542 "cleanup_poll_thread: n_pids = %d\n", polllist.n_pids); 1543 polllist.poll_tid = (thread_t)-1; 1544 (void) cond_broadcast(&polllist.cv); 1545 } 1546 (void) mutex_unlock(&rcm_req_lock); 1547 } 1548 1549 /*ARGSUSED*/ 1550 static void * 1551 pollfunc(void *arg) 1552 { 1553 sigset_t mask; 1554 1555 rcm_log_message(RCM_TRACE2, "poll thread started. n_pids = %d\n", 1556 polllist.n_pids); 1557 1558 /* 1559 * Unblock SIGUSR1 to allow polling thread to be killed 1560 */ 1561 (void) sigemptyset(&mask); 1562 (void) sigaddset(&mask, SIGUSR1); 1563 (void) thr_sigsetmask(SIG_UNBLOCK, &mask, NULL); 1564 1565 (void) poll(polllist.fds, polllist.n_pids, (time_t)-1); 1566 1567 /* 1568 * block SIGUSR1 to avoid being killed while holding a lock 1569 */ 1570 (void) sigemptyset(&mask); 1571 (void) sigaddset(&mask, SIGUSR1); 1572 (void) thr_sigsetmask(SIG_BLOCK, &mask, NULL); 1573 1574 rcm_log_message(RCM_TRACE2, "returned from poll()\n"); 1575 1576 cleanup_poll_thread(); 1577 1578 (void) mutex_lock(&barrier.lock); 1579 need_cleanup = 1; 1580 (void) cond_broadcast(&barrier.cv); 1581 (void) mutex_unlock(&barrier.lock); 1582 1583 return (NULL); 1584 } 1585 1586 /* 1587 * rcm_req_lock must be held 1588 */ 1589 void 1590 start_polling_thread() 1591 { 1592 int err; 1593 1594 if (rcmd_get_state() != RCMD_NORMAL) 1595 return; 1596 1597 if (polllist.poll_tid != (thread_t)-1 || polllist.n_pids == 0) 1598 return; 1599 1600 if ((err = thr_create(NULL, 0, pollfunc, NULL, THR_DETACHED, 1601 &polllist.poll_tid)) == 0) 1602 polllist.signaled = 0; 1603 else 1604 rcm_log_message(RCM_ERROR, 1605 gettext("failed to create polling thread: %s\n"), 1606 strerror(err)); 1607 } 1608 1609 /* 1610 * rcm_req_lock must be held 1611 */ 1612 static void 1613 stop_polling_thread() 1614 { 1615 int err; 1616 1617 while (polllist.poll_tid != (thread_t)-1) { 1618 if (polllist.signaled == 0) { 1619 if ((err = thr_kill(polllist.poll_tid, SIGUSR1)) == 0) 1620 polllist.signaled = 1; 1621 else 1622 /* 1623 * thr_kill shouldn't have failed since the 1624 * poll thread id and the signal are valid. 1625 * So log an error. Since when thr_kill 1626 * fails no signal is sent (as per man page), 1627 * the cond_wait below will wait until the 1628 * the poll thread exits by some other means. 1629 * The poll thread, for example, exits on its 1630 * own when any DR initiator process that it 1631 * is currently polling exits. 1632 */ 1633 rcm_log_message(RCM_ERROR, 1634 gettext( 1635 "fail to kill polling thread %d: %s\n"), 1636 polllist.poll_tid, strerror(err)); 1637 } 1638 (void) cond_wait(&polllist.cv, &rcm_req_lock); 1639 } 1640 } 1641