1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 *
22 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #pragma ident "%Z%%M% %I% %E% SMI"
27
28 #include "rcm_impl.h"
29 #include "rcm_module.h"
30
31 /*
32 * Global locks
33 */
34 mutex_t rcm_req_lock; /* protects global dr & info request list */
35
36 /*
37 * Daemon state file
38 */
39 static int state_fd;
40 #define RCM_STATE_FILE "/var/run/rcm_daemon_state"
41 #define N_REQ_CHUNK 10 /* grow 10 entries at a time */
42
43 /*
44 * Daemon timeout value
45 */
46 #define RCM_DAEMON_TIMEOUT 300 /* 5 minutes idle time */
47
48 /*
49 * Struct for a list of outstanding rcm requests
50 */
51 typedef struct {
52 int seq_num; /* sequence number of request */
53 int state; /* current state */
54 pid_t pid; /* pid of initiator */
55 uint_t flag; /* request flags */
56 int type; /* resource(device) type */
57 timespec_t interval; /* suspend interval */
58 char device[MAXPATHLEN]; /* name of device or resource */
59 } req_t;
60
61 typedef struct {
62 int n_req;
63 int n_req_max; /* number of req_t's to follow */
64 int n_seq_max; /* last sequence number */
65 int idle_timeout; /* persist idle timeout value */
66 req_t req[1];
67 /* more req_t follows */
68 } req_list_t;
69
70 static req_list_t *dr_req_list;
71 static req_list_t *info_req_list;
72
73 static const char *locked_info = "DR operation in progress";
74 static const char *locked_err = "Resource is busy";
75
76 static int rcmd_get_state();
77 static void add_to_polling_list(pid_t);
78 static void remove_from_polling_list(pid_t);
79
80 void start_polling_thread();
81 static void stop_polling_thread();
82
83 /*
84 * Initialize request lists required for locking
85 */
86 void
rcmd_lock_init(void)87 rcmd_lock_init(void)
88 {
89 int size;
90 struct stat fbuf;
91
92 /*
93 * Start info list with one slot, then grow on demand.
94 */
95 info_req_list = s_calloc(1, sizeof (req_list_t));
96 info_req_list->n_req_max = 1;
97
98 /*
99 * Open daemon state file and map in contents
100 */
101 state_fd = open(RCM_STATE_FILE, O_CREAT|O_RDWR, 0600);
102 if (state_fd == -1) {
103 rcm_log_message(RCM_ERROR, gettext("cannot open %s: %s\n"),
104 RCM_STATE_FILE, strerror(errno));
105 rcmd_exit(errno);
106 }
107
108 if (fstat(state_fd, &fbuf) != 0) {
109 rcm_log_message(RCM_ERROR, gettext("cannot stat %s: %s\n"),
110 RCM_STATE_FILE, strerror(errno));
111 rcmd_exit(errno);
112 }
113
114 size = fbuf.st_size;
115 if (size == 0) {
116 size = sizeof (req_list_t);
117 if (ftruncate(state_fd, size) != 0) {
118 rcm_log_message(RCM_ERROR,
119 gettext("cannot truncate %s: %s\n"),
120 RCM_STATE_FILE, strerror(errno));
121 rcmd_exit(errno);
122 }
123 }
124
125 /*LINTED*/
126 dr_req_list = (req_list_t *)mmap(NULL, size, PROT_READ|PROT_WRITE,
127 MAP_SHARED, state_fd, 0);
128 if (dr_req_list == MAP_FAILED) {
129 rcm_log_message(RCM_ERROR, gettext("cannot mmap %s: %s\n"),
130 RCM_STATE_FILE, strerror(errno));
131 rcmd_exit(errno);
132 }
133
134 /*
135 * Initial size is one entry
136 */
137 if (dr_req_list->n_req_max == 0) {
138 dr_req_list->n_req_max = 1;
139 (void) fsync(state_fd);
140 return;
141 }
142
143 rcm_log_message(RCM_DEBUG, "n_req = %d, n_req_max = %d\n",
144 dr_req_list->n_req, dr_req_list->n_req_max);
145
146 /*
147 * Recover the daemon state
148 */
149 clean_dr_list();
150 }
151
152 /*
153 * Get a unique sequence number--to be called with rcm_req_lock held.
154 */
155 static int
get_seq_number()156 get_seq_number()
157 {
158 int number;
159
160 if (dr_req_list == NULL)
161 return (0);
162
163 dr_req_list->n_seq_max++;
164 number = (dr_req_list->n_seq_max << SEQ_NUM_SHIFT);
165 (void) fsync(state_fd);
166
167 return (number);
168 }
169
170 /*
171 * Find entry in list with the same resource name and sequence number.
172 * If seq_num == -1, no seq_num matching is required.
173 */
174 static req_t *
find_req_entry(char * device,uint_t flag,int seq_num,req_list_t * list)175 find_req_entry(char *device, uint_t flag, int seq_num, req_list_t *list)
176 {
177 int i;
178
179 /*
180 * Look for entry with the same resource and seq_num.
181 * Also match RCM_FILESYS field in flag.
182 */
183 for (i = 0; i < list->n_req_max; i++) {
184 if (list->req[i].state == RCM_STATE_REMOVE)
185 /* stale entry */
186 continue;
187 /*
188 * We need to distiguish a file system root from the directory
189 * it is mounted on.
190 *
191 * Applications are not aware of any difference between the
192 * two, but the system keeps track of it internally by
193 * checking for mount points while traversing file path.
194 * In a similar spirit, RCM is keeping this difference as
195 * an implementation detail.
196 */
197 if ((strcmp(device, list->req[i].device) != 0) ||
198 (list->req[i].flag & RCM_FILESYS) != (flag & RCM_FILESYS))
199 /* different resource */
200 continue;
201
202 if ((seq_num != -1) && ((seq_num >> SEQ_NUM_SHIFT) !=
203 (list->req[i].seq_num >> SEQ_NUM_SHIFT)))
204 /* different base seqnum */
205 continue;
206
207 return (&list->req[i]);
208 }
209
210 return (NULL);
211 }
212
213 /*
214 * Get the next empty req_t entry. If no entry exists, grow the list.
215 */
216 static req_t *
get_req_entry(req_list_t ** listp)217 get_req_entry(req_list_t **listp)
218 {
219 int i;
220 int n_req = (*listp)->n_req;
221 int n_req_max = (*listp)->n_req_max;
222
223 /*
224 * If the list is full, grow the list and return the first
225 * entry in the new portion.
226 */
227 if (n_req == n_req_max) {
228 int newsize;
229
230 n_req_max += N_REQ_CHUNK;
231 newsize = sizeof (req_list_t) + (n_req_max - 1) *
232 sizeof (req_t);
233
234 if (listp == &info_req_list) {
235 *listp = s_realloc(*listp, newsize);
236 } else if (ftruncate(state_fd, newsize) != 0) {
237 rcm_log_message(RCM_ERROR,
238 gettext("cannot truncate %s: %s\n"),
239 RCM_STATE_FILE, strerror(errno));
240 rcmd_exit(errno);
241 /*LINTED*/
242 } else if ((*listp = (req_list_t *)mmap(NULL, newsize,
243 PROT_READ|PROT_WRITE, MAP_SHARED, state_fd, 0)) ==
244 MAP_FAILED) {
245 rcm_log_message(RCM_ERROR,
246 gettext("cannot mmap %s: %s\n"),
247 RCM_STATE_FILE, strerror(errno));
248 rcmd_exit(errno);
249 }
250
251 /* Initialize the new entries */
252 for (i = (*listp)->n_req_max; i < n_req_max; i++) {
253 (*listp)->req[i].state = RCM_STATE_REMOVE;
254 (void) strcpy((*listp)->req[i].device, "");
255 }
256
257 (*listp)->n_req_max = n_req_max;
258 (*listp)->n_req++;
259 return (&(*listp)->req[n_req]);
260 }
261
262 /*
263 * List contains empty slots, find it.
264 */
265 for (i = 0; i < n_req_max; i++) {
266 if (((*listp)->req[i].device[0] == '\0') ||
267 ((*listp)->req[i].state == RCM_STATE_REMOVE)) {
268 break;
269 }
270 }
271
272 assert(i < n_req_max); /* empty slot must exist */
273
274 (*listp)->n_req++;
275 return (&(*listp)->req[i]);
276 }
277
278 /*
279 * When one resource depends on multiple resources, it's possible that
280 * rcm_get_info can be called multiple times on the resource, resulting
281 * in duplicate information. By assigning a unique sequence number to
282 * each rcm_get_info operation, this duplication can be eliminated.
283 *
284 * Insert a dr entry in info_req_list
285 */
286 int
info_req_add(char * rsrcname,uint_t flag,int seq_num)287 info_req_add(char *rsrcname, uint_t flag, int seq_num)
288 {
289 int error = 0;
290 char *device;
291 req_t *req;
292
293 rcm_log_message(RCM_TRACE2, "info_req_add(%s, %d)\n",
294 rsrcname, seq_num);
295
296 device = resolve_name(rsrcname);
297 (void) mutex_lock(&rcm_req_lock);
298
299 /*
300 * Look for entry with the same resource and seq_num.
301 * If it exists, we return an error so that such
302 * information is not gathered more than once.
303 */
304 if (find_req_entry(device, flag, seq_num, info_req_list) != NULL) {
305 rcm_log_message(RCM_DEBUG, "getinfo cycle: %s %d \n",
306 device, seq_num);
307 error = -1;
308 goto out;
309 }
310
311 /*
312 * Get empty entry and fill in seq_num and device.
313 */
314 req = get_req_entry(&info_req_list);
315 req->seq_num = seq_num;
316 req->state = RCM_STATE_ONLINE; /* mark that the entry is in use */
317 req->flag = flag;
318 (void) strcpy(req->device, device);
319
320 out:
321 (void) mutex_unlock(&rcm_req_lock);
322 free(device);
323
324 return (error);
325 }
326
327 /*
328 * Remove all entries associated with seq_num from info_req_list
329 */
330 void
info_req_remove(int seq_num)331 info_req_remove(int seq_num)
332 {
333 int i;
334
335 rcm_log_message(RCM_TRACE3, "info_req_remove(%d)\n", seq_num);
336
337 seq_num >>= SEQ_NUM_SHIFT;
338 (void) mutex_lock(&rcm_req_lock);
339
340 /* remove all entries with seq_num */
341 for (i = 0; i < info_req_list->n_req_max; i++) {
342 if (info_req_list->req[i].state == RCM_STATE_REMOVE)
343 continue;
344
345 if ((info_req_list->req[i].seq_num >> SEQ_NUM_SHIFT) != seq_num)
346 continue;
347
348 info_req_list->req[i].state = RCM_STATE_REMOVE;
349 info_req_list->n_req--;
350 }
351
352 /*
353 * We don't shrink the info_req_list size for now.
354 */
355 (void) mutex_unlock(&rcm_req_lock);
356 }
357
358 /*
359 * Checking lock conflicts. There is a conflict if:
360 * - attempt to DR a node when either its ancester or descendent
361 * is in the process of DR
362 * - attempt to register for a node when its ancester is locked for DR
363 */
364 static int
check_lock(char * device,uint_t flag,int cflag,rcm_info_t ** info)365 check_lock(char *device, uint_t flag, int cflag, rcm_info_t **info)
366 {
367 int i, ret = RCM_SUCCESS;
368
369 if (info)
370 *info = NULL;
371
372 /*
373 * During daemon initialization, don't check locks
374 */
375 if (dr_req_list == NULL)
376 return (ret);
377
378 for (i = 0; i < dr_req_list->n_req; i++) {
379 req_t *req = &dr_req_list->req[i];
380 char *dr_dev = req->device;
381
382 /*
383 * Skip empty entries
384 */
385 if ((req->state == RCM_STATE_REMOVE) || (dr_dev[0] == '\0'))
386 continue;
387
388 /*
389 * Make sure that none of the ancestors of dr_dev is
390 * being operated upon.
391 */
392 if (EQUAL(device, dr_dev) || DESCENDENT(device, dr_dev)) {
393 /*
394 * An exception to this is the filesystem.
395 * We should allowed a filesystem rooted at a
396 * child directory to be unmounted.
397 */
398 if ((flag & RCM_FILESYS) && (!EQUAL(device, dr_dev) ||
399 ((dr_req_list->req[i].flag & RCM_FILESYS) == 0)))
400 continue;
401
402 assert(info != 0);
403
404 add_busy_rsrc_to_list(dr_dev, dr_req_list->req[i].pid,
405 dr_req_list->req[i].state,
406 dr_req_list->req[i].seq_num, NULL, locked_info,
407 locked_err, NULL, info);
408 ret = RCM_CONFLICT;
409 break;
410 }
411
412 if ((cflag == LOCK_FOR_DR) && DESCENDENT(dr_dev, device)) {
413 /*
414 * Check descendents only for DR request.
415 *
416 * Could have multiple descendents doing DR,
417 * we want to find them all.
418 */
419 assert(info != 0);
420
421 add_busy_rsrc_to_list(dr_dev, dr_req_list->req[i].pid,
422 dr_req_list->req[i].state,
423 dr_req_list->req[i].seq_num, NULL, locked_info,
424 locked_err, NULL, info);
425 ret = RCM_CONFLICT;
426 /* don't break here, need to find all conflicts */
427 }
428 }
429
430 return (ret);
431 }
432
433 /*
434 * Check for lock conflicts for DR operation or client registration
435 */
436 int
rsrc_check_lock_conflicts(char * rsrcname,uint_t flag,int cflag,rcm_info_t ** info)437 rsrc_check_lock_conflicts(char *rsrcname, uint_t flag, int cflag,
438 rcm_info_t **info)
439 {
440 int result;
441 char *device;
442
443 device = resolve_name(rsrcname);
444 result = check_lock(device, flag, cflag, info);
445 free(device);
446
447 return (result);
448 }
449
450 static int
transition_state(int state)451 transition_state(int state)
452 {
453 /*
454 * If the resource state is in transition, ask caller to
455 * try again.
456 */
457 switch (state) {
458 case RCM_STATE_OFFLINING:
459 case RCM_STATE_SUSPENDING:
460 case RCM_STATE_RESUMING:
461 case RCM_STATE_ONLINING:
462 case RCM_STATE_REMOVING:
463
464 return (1);
465
466 default:
467 /*FALLTHROUGH*/
468 break;
469 }
470 return (0);
471 }
472
473 /*
474 * Update a dr entry in dr_req_list
475 */
476 /*ARGSUSED*/
477 static int
dr_req_update_entry(char * device,pid_t pid,uint_t flag,int state,int seq_num,timespec_t * interval,rcm_info_t ** infop)478 dr_req_update_entry(char *device, pid_t pid, uint_t flag, int state,
479 int seq_num, timespec_t *interval, rcm_info_t **infop)
480 {
481 req_t *req;
482
483 /*
484 * Find request entry. If not found, return RCM_FAILURE
485 */
486 req = find_req_entry(device, flag, -1, dr_req_list);
487
488 if (req == NULL) {
489 switch (state) {
490 case RCM_STATE_OFFLINE_QUERYING:
491 case RCM_STATE_SUSPEND_QUERYING:
492 case RCM_STATE_OFFLINING:
493 case RCM_STATE_SUSPENDING:
494 /* could be re-do operation, no error message */
495 break;
496
497 default:
498 rcm_log_message(RCM_DEBUG,
499 "update non-existing resource %s\n", device);
500 }
501 return (RCM_FAILURE);
502 }
503
504 /*
505 * During initialization, update is unconditional (forced)
506 * in order to bring the daemon up in a sane state.
507 */
508 if (rcmd_get_state() == RCMD_INIT)
509 goto update;
510
511 /*
512 * Don't allow update with mismatched initiator pid. This could happen
513 * as part of normal operation.
514 */
515 if (pid != req->pid) {
516 rcm_log_message(RCM_INFO,
517 gettext("mismatched dr initiator pid: %ld %ld\n"),
518 req->pid, pid);
519 goto failure;
520 }
521
522 rcm_log_message(RCM_TRACE4,
523 "dr_req_update_entry: state=%d, device=%s\n",
524 req->state, req->device);
525
526 /*
527 * Check that the state transition is valid
528 */
529 switch (state) {
530 case RCM_STATE_OFFLINE_QUERYING:
531 case RCM_STATE_OFFLINING:
532 /*
533 * This is the case of re-offlining, which applies only
534 * if a previous attempt failed.
535 */
536 if ((req->state != RCM_STATE_OFFLINE_FAIL) &&
537 (req->state != RCM_STATE_OFFLINE_QUERYING) &&
538 (req->state != RCM_STATE_OFFLINE_QUERY) &&
539 (req->state != RCM_STATE_OFFLINE_QUERY_FAIL) &&
540 (req->state != RCM_STATE_OFFLINE)) {
541 rcm_log_message(RCM_WARNING,
542 gettext("%s: invalid offlining from state %d\n"),
543 device, req->state);
544 goto failure;
545 }
546 break;
547
548 case RCM_STATE_SUSPEND_QUERYING:
549 case RCM_STATE_SUSPENDING:
550 /*
551 * This is the case of re-suspending, which applies only
552 * if a previous attempt failed.
553 */
554 if ((req->state != RCM_STATE_SUSPEND_FAIL) &&
555 (req->state != RCM_STATE_SUSPEND_QUERYING) &&
556 (req->state != RCM_STATE_SUSPEND_QUERY) &&
557 (req->state != RCM_STATE_SUSPEND_QUERY_FAIL) &&
558 (req->state != RCM_STATE_SUSPEND)) {
559 rcm_log_message(RCM_WARNING,
560 gettext("%s: invalid suspending from state %d\n"),
561 device, req->state);
562 goto failure;
563 }
564 break;
565
566 case RCM_STATE_RESUMING:
567 if ((req->state != RCM_STATE_SUSPEND) &&
568 (req->state != RCM_STATE_SUSPEND_QUERYING) &&
569 (req->state != RCM_STATE_SUSPEND_QUERY) &&
570 (req->state != RCM_STATE_SUSPEND_QUERY_FAIL) &&
571 (req->state != RCM_STATE_SUSPEND_FAIL)) {
572 rcm_log_message(RCM_DEBUG,
573 "%s: invalid resuming from state %d\n",
574 device, req->state);
575 goto failure;
576 }
577 break;
578
579 case RCM_STATE_ONLINING:
580 if ((req->state != RCM_STATE_OFFLINE) &&
581 (req->state != RCM_STATE_OFFLINE_QUERYING) &&
582 (req->state != RCM_STATE_OFFLINE_QUERY) &&
583 (req->state != RCM_STATE_OFFLINE_QUERY_FAIL) &&
584 (req->state != RCM_STATE_OFFLINE_FAIL)) {
585 rcm_log_message(RCM_INFO,
586 gettext("%s: invalid onlining from state %d\n"),
587 device, req->state);
588 goto failure;
589 }
590 break;
591
592 case RCM_STATE_REMOVING:
593 if ((req->state != RCM_STATE_OFFLINE) &&
594 (req->state != RCM_STATE_OFFLINE_FAIL)) {
595 rcm_log_message(RCM_INFO,
596 gettext("%s: invalid removing from state %d\n"),
597 device, req->state);
598 goto failure;
599 }
600 break;
601
602 case RCM_STATE_SUSPEND_FAIL:
603 assert(req->state == RCM_STATE_SUSPENDING);
604 break;
605
606 case RCM_STATE_OFFLINE_FAIL:
607 assert(req->state == RCM_STATE_OFFLINING);
608 break;
609
610 case RCM_STATE_SUSPEND:
611 assert(req->state == RCM_STATE_SUSPENDING);
612 break;
613
614 case RCM_STATE_OFFLINE:
615 assert(req->state == RCM_STATE_OFFLINING);
616 break;
617
618 case RCM_STATE_ONLINE:
619 assert((req->state == RCM_STATE_RESUMING) ||
620 (req->state == RCM_STATE_ONLINING));
621 break;
622
623 default: /* shouldn't be here */
624 rcm_log_message(RCM_ERROR,
625 gettext("invalid update to dr state: %d\n"), state);
626 return (RCM_FAILURE);
627 }
628
629 update:
630 /*
631 * update the state, interval, and sequence number; sync state file
632 */
633 req->state = state;
634 req->seq_num = seq_num;
635
636 if (interval)
637 req->interval = *interval;
638 else
639 bzero(&req->interval, sizeof (timespec_t));
640
641 (void) fsync(state_fd);
642 return (RCM_SUCCESS);
643
644 failure:
645 if (infop != NULL) {
646 add_busy_rsrc_to_list(req->device, req->pid, req->state,
647 req->seq_num, NULL, locked_info, locked_err, NULL, infop);
648 }
649
650 /*
651 * A request may be left in a transition state because the operator
652 * typed ctrl-C. In this case, the daemon thread continues to run
653 * and will eventually put the state in a non-transitional state.
654 *
655 * To be safe, we return EAGAIN to allow librcm to loop and retry.
656 * If we are called from a module, loop & retry could result in a
657 * deadlock. The called will check for this case and turn EAGAIN
658 * into RCM_CONFLICT.
659 */
660 if (transition_state(req->state)) {
661 return (EAGAIN);
662 }
663
664 return (RCM_CONFLICT);
665 }
666
667 /*
668 * Insert a dr entry in dr_req_list
669 */
670 int
dr_req_add(char * rsrcname,pid_t pid,uint_t flag,int state,int seq_num,timespec_t * interval,rcm_info_t ** info)671 dr_req_add(char *rsrcname, pid_t pid, uint_t flag, int state, int seq_num,
672 timespec_t *interval, rcm_info_t **info)
673 {
674 int error;
675 char *device;
676 req_t *req;
677
678 rcm_log_message(RCM_TRACE3, "dr_req_add(%s, %ld, 0x%x, %d, %d, %p)\n",
679 rsrcname, pid, flag, state, seq_num, (void *)info);
680
681 device = resolve_name(rsrcname);
682 if (device == NULL)
683 return (EINVAL);
684
685 (void) mutex_lock(&rcm_req_lock);
686
687 /*
688 * In the re-offline/suspend case, attempt to update dr request.
689 *
690 * If this succeeds, return success;
691 * If this fails because of a conflict, return error;
692 * If this this fails because no entry exists, add a new entry.
693 */
694 error = dr_req_update_entry(device, pid, flag, state, seq_num, interval,
695 info);
696
697 switch (error) {
698 case RCM_FAILURE:
699 /* proceed to add a new entry */
700 break;
701
702 case RCM_CONFLICT:
703 case RCM_SUCCESS:
704 case EAGAIN:
705 default:
706 goto out;
707 }
708
709 /*
710 * Check for lock conflicts
711 */
712 error = check_lock(device, flag, LOCK_FOR_DR, info);
713 if (error != RCM_SUCCESS) {
714 error = RCM_CONFLICT;
715 goto out;
716 }
717
718 /*
719 * Get empty request entry, fill in values and sync state file
720 */
721 req = get_req_entry(&dr_req_list);
722
723 req->seq_num = seq_num;
724 req->pid = pid;
725 req->flag = flag;
726 req->state = state;
727 req->type = rsrc_get_type(device);
728 (void) strcpy(req->device, device);
729
730 /* cache interval for failure recovery */
731 if (interval)
732 req->interval = *interval;
733 else
734 bzero(&req->interval, sizeof (timespec_t));
735
736 (void) fsync(state_fd);
737
738 /*
739 * Add initiator pid to polling list
740 */
741 add_to_polling_list(req->pid);
742
743 out:
744 (void) mutex_unlock(&rcm_req_lock);
745 free(device);
746
747 return (error);
748 }
749
750 /*
751 * Update a dr entry in dr_req_list
752 */
753 /*ARGSUSED*/
754 int
dr_req_update(char * rsrcname,pid_t pid,uint_t flag,int state,int seq_num,rcm_info_t ** info)755 dr_req_update(char *rsrcname, pid_t pid, uint_t flag, int state, int seq_num,
756 rcm_info_t **info)
757 {
758 int error;
759 char *device = resolve_name(rsrcname);
760
761 rcm_log_message(RCM_TRACE3, "dr_req_update(%s, %ld, 0x%x, %d, %d)\n",
762 rsrcname, pid, flag, state, seq_num);
763
764 (void) mutex_lock(&rcm_req_lock);
765 error = dr_req_update_entry(device, pid, flag, state, seq_num, NULL,
766 info);
767 (void) mutex_unlock(&rcm_req_lock);
768 free(device);
769
770 return (error);
771 }
772
773 /*
774 * This function scans the DR request list for the next, non-removed
775 * entry that is part of the specified sequence. The 'device' name
776 * of the entry is copied into the provided 'rsrc' buffer.
777 *
778 * The 'rsrc' buffer is required because the DR request list is only
779 * locked during the duration of this lookup. Giving a direct pointer
780 * to something in the list would be unsafe.
781 */
782 int
dr_req_lookup(int seq_num,char * rsrc)783 dr_req_lookup(int seq_num, char *rsrc)
784 {
785 int i;
786 int len;
787 int base = (seq_num >> SEQ_NUM_SHIFT);
788 int retval = RCM_FAILURE;
789
790 if (rsrc == NULL) {
791 return (RCM_FAILURE);
792 }
793
794 (void) mutex_lock(&rcm_req_lock);
795
796 for (i = 0; i < dr_req_list->n_req_max; i++) {
797
798 /* Skip removed or non-matching entries */
799 if ((dr_req_list->req[i].state == RCM_STATE_REMOVE) ||
800 ((dr_req_list->req[i].seq_num >> SEQ_NUM_SHIFT) != base)) {
801 continue;
802 }
803
804 /* Copy the next-matching 'device' name into 'rsrc' */
805 len = strlcpy(rsrc, dr_req_list->req[i].device, MAXPATHLEN);
806 if (len < MAXPATHLEN) {
807 retval = RCM_SUCCESS;
808 }
809 break;
810 }
811
812 (void) mutex_unlock(&rcm_req_lock);
813
814 return (retval);
815 }
816
817 /*
818 * Remove a dr entry in dr_req_list
819 */
820 void
dr_req_remove(char * rsrcname,uint_t flag)821 dr_req_remove(char *rsrcname, uint_t flag)
822 {
823 req_t *req;
824 char *device = resolve_name(rsrcname);
825
826 rcm_log_message(RCM_TRACE3, "dr_req_remove(%s)\n", rsrcname);
827
828 (void) mutex_lock(&rcm_req_lock);
829
830 /* find entry */
831 req = find_req_entry(device, flag, -1, dr_req_list);
832 free(device);
833
834 if (req == NULL) {
835 (void) mutex_unlock(&rcm_req_lock);
836 rcm_log_message(RCM_WARNING,
837 gettext("dr_req entry %s not found\n"), rsrcname);
838 return;
839 }
840
841 req->state = RCM_STATE_REMOVE;
842 dr_req_list->n_req--;
843 (void) fsync(state_fd);
844
845 /*
846 * remove pid from polling list
847 */
848 remove_from_polling_list(req->pid);
849
850 /*
851 * We don't shrink the dr_req_list size for now.
852 * Shouldn't cause big memory leaks.
853 */
854 (void) mutex_unlock(&rcm_req_lock);
855 }
856
857 /*
858 * Return the list of ongoing dr operation requests
859 */
860 rcm_info_t *
rsrc_dr_info()861 rsrc_dr_info()
862 {
863 int i;
864 rcm_info_t *info;
865 rcm_info_t *result = NULL;
866 char *rsrc;
867 int len;
868
869 rcm_log_message(RCM_TRACE2, "rsrc_dr_info()\n");
870
871 (void) mutex_lock(&rcm_req_lock);
872 for (i = 0; i < dr_req_list->n_req_max; i++) {
873 if (dr_req_list->req[i].state == RCM_STATE_REMOVE)
874 continue;
875
876 if (dr_req_list->req[i].device[0] == '\0')
877 continue;
878
879 if (dr_req_list->req[i].flag & RCM_FILESYS) {
880 len = strlen(dr_req_list->req[i].device) + 5;
881 rsrc = s_malloc(len);
882 (void) snprintf(rsrc, len, "%s(fs)",
883 dr_req_list->req[i].device);
884 } else {
885 rsrc = s_strdup(dr_req_list->req[i].device);
886 }
887
888 info = s_calloc(1, sizeof (*info));
889 if (errno = nvlist_alloc(&(info->info), NV_UNIQUE_NAME, 0)) {
890 rcm_log_message(RCM_ERROR,
891 gettext("failed (nvlist_alloc=%s).\n"),
892 strerror(errno));
893 rcmd_exit(errno);
894 }
895
896 if (errno = nvlist_add_string(info->info, RCM_RSRCNAME, rsrc)) {
897 rcm_log_message(RCM_ERROR,
898 gettext("failed (nvlist_add=%s).\n"),
899 strerror(errno));
900 rcmd_exit(errno);
901 }
902 (void) free(rsrc);
903
904 if (errno = nvlist_add_int64(info->info, RCM_CLIENT_ID,
905 dr_req_list->req[i].pid)) {
906 rcm_log_message(RCM_ERROR,
907 gettext("failed (nvlist_add=%s).\n"),
908 strerror(errno));
909 rcmd_exit(errno);
910 }
911
912 if (errno = nvlist_add_int32(info->info, RCM_SEQ_NUM,
913 dr_req_list->req[i].seq_num)) {
914 rcm_log_message(RCM_ERROR,
915 gettext("failed (nvlist_add=%s).\n"),
916 strerror(errno));
917 rcmd_exit(errno);
918 }
919
920 if (errno = nvlist_add_int32(info->info, RCM_RSRCSTATE,
921 dr_req_list->req[i].state)) {
922 rcm_log_message(RCM_ERROR,
923 gettext("failed (nvlist_add=%s).\n"),
924 strerror(errno));
925 rcmd_exit(errno);
926 }
927
928 if (errno = nvlist_add_string(info->info, RCM_CLIENT_INFO,
929 (char *)locked_info)) {
930 rcm_log_message(RCM_ERROR,
931 gettext("failed (nvlist_add=%s).\n"),
932 strerror(errno));
933 rcmd_exit(errno);
934 }
935
936 info->next = result;
937 result = info;
938 }
939 (void) mutex_unlock(&rcm_req_lock);
940
941 return (result);
942 }
943
944 /*
945 * Eliminate entries whose dr initiator is no longer running
946 * and recover daemon state during daemon restart.
947 *
948 * This routine is called from either during daemon initialization
949 * after all modules have registered resources or from the cleanup
950 * thread. In either case, it is the only thread running in the
951 * daemon.
952 */
953 void
clean_dr_list()954 clean_dr_list()
955 {
956 int i;
957 struct clean_list {
958 struct clean_list *next;
959 char *rsrcname;
960 pid_t pid;
961 int seq_num;
962 int state;
963 timespec_t interval;
964 } *tmp, *list = NULL;
965 char *rsrcnames[2];
966
967 rcm_log_message(RCM_TRACE3,
968 "clean_dr_list(): look for stale dr initiators\n");
969
970 rsrcnames[1] = NULL;
971
972 /*
973 * Make a list of entries to recover. This is necessary because
974 * the recovery operation will modify dr_req_list.
975 */
976 (void) mutex_lock(&rcm_req_lock);
977 for (i = 0; i < dr_req_list->n_req_max; i++) {
978 /* skip empty entries */
979 if (dr_req_list->req[i].state == RCM_STATE_REMOVE)
980 continue;
981
982 if (dr_req_list->req[i].device[0] == '\0')
983 continue;
984
985 /* skip cascade operations */
986 if (dr_req_list->req[i].seq_num & SEQ_NUM_MASK)
987 continue;
988
989 /*
990 * In the cleanup case, ignore entries with initiators alive
991 */
992 if ((rcmd_get_state() == RCMD_CLEANUP) &&
993 proc_exist(dr_req_list->req[i].pid))
994 continue;
995
996 rcm_log_message(RCM_TRACE1,
997 "found stale entry: %s\n", dr_req_list->req[i].device);
998
999 tmp = s_malloc(sizeof (*tmp));
1000 tmp->rsrcname = s_strdup(dr_req_list->req[i].device);
1001 tmp->state = dr_req_list->req[i].state;
1002 tmp->pid = dr_req_list->req[i].pid;
1003 tmp->seq_num = dr_req_list->req[i].seq_num;
1004 tmp->interval = dr_req_list->req[i].interval;
1005 tmp->next = list;
1006 list = tmp;
1007 }
1008 (void) mutex_unlock(&rcm_req_lock);
1009
1010 if (list == NULL)
1011 return;
1012
1013 /*
1014 * If everything worked normally, we shouldn't be here.
1015 * Since we are here, something went wrong, so say something.
1016 */
1017 if (rcmd_get_state() == RCMD_INIT) {
1018 rcm_log_message(RCM_NOTICE, gettext("rcm_daemon died "
1019 "unexpectedly, recovering previous daemon state\n"));
1020 } else {
1021 rcm_log_message(RCM_INFO, gettext("one or more dr initiator "
1022 "died, attempting automatic recovery\n"));
1023 }
1024
1025 while (list) {
1026 tmp = list;
1027 list = tmp->next;
1028
1029 switch (tmp->state) {
1030 case RCM_STATE_OFFLINE_QUERY:
1031 case RCM_STATE_OFFLINE_QUERY_FAIL:
1032 rsrcnames[0] = tmp->rsrcname;
1033 if (proc_exist(tmp->pid)) {
1034 /* redo */
1035 (void) process_resource_offline(rsrcnames,
1036 tmp->pid, RCM_QUERY, tmp->seq_num, NULL);
1037 } else {
1038 /* undo */
1039 (void) notify_resource_online(rsrcnames,
1040 tmp->pid, 0, tmp->seq_num, NULL);
1041 }
1042 break;
1043
1044 case RCM_STATE_OFFLINE:
1045 case RCM_STATE_OFFLINE_FAIL:
1046 rsrcnames[0] = tmp->rsrcname;
1047 if (proc_exist(tmp->pid)) {
1048 /* redo */
1049 (void) process_resource_offline(rsrcnames,
1050 tmp->pid, 0, tmp->seq_num, NULL);
1051 } else {
1052 /* undo */
1053 (void) notify_resource_online(rsrcnames,
1054 tmp->pid, 0, tmp->seq_num, NULL);
1055 }
1056 break;
1057
1058 case RCM_STATE_SUSPEND_QUERY:
1059 case RCM_STATE_SUSPEND_QUERY_FAIL:
1060 rsrcnames[0] = tmp->rsrcname;
1061 if (proc_exist(tmp->pid)) {
1062 /* redo */
1063 (void) process_resource_suspend(rsrcnames,
1064 tmp->pid, RCM_QUERY, tmp->seq_num,
1065 &tmp->interval, NULL);
1066 } else {
1067 /* undo */
1068 (void) notify_resource_resume(rsrcnames,
1069 tmp->pid, 0, tmp->seq_num, NULL);
1070 }
1071 break;
1072
1073 case RCM_STATE_SUSPEND:
1074 case RCM_STATE_SUSPEND_FAIL:
1075 rsrcnames[0] = tmp->rsrcname;
1076 if (proc_exist(tmp->pid)) {
1077 /* redo */
1078 (void) process_resource_suspend(rsrcnames,
1079 tmp->pid, 0, tmp->seq_num, &tmp->interval,
1080 NULL);
1081 } else {
1082 /* undo */
1083 (void) notify_resource_resume(rsrcnames,
1084 tmp->pid, 0, tmp->seq_num, NULL);
1085 }
1086 break;
1087
1088 case RCM_STATE_OFFLINING:
1089 case RCM_STATE_ONLINING:
1090 rsrcnames[0] = tmp->rsrcname;
1091 (void) notify_resource_online(rsrcnames, tmp->pid, 0,
1092 tmp->seq_num, NULL);
1093 break;
1094
1095 case RCM_STATE_SUSPENDING:
1096 case RCM_STATE_RESUMING:
1097 rsrcnames[0] = tmp->rsrcname;
1098 (void) notify_resource_resume(rsrcnames, tmp->pid, 0,
1099 tmp->seq_num, NULL);
1100 break;
1101
1102 case RCM_STATE_REMOVING:
1103 rsrcnames[0] = tmp->rsrcname;
1104 (void) notify_resource_remove(rsrcnames, tmp->pid, 0,
1105 tmp->seq_num, NULL);
1106 break;
1107
1108 default:
1109 rcm_log_message(RCM_WARNING,
1110 gettext("%s in unknown state %d\n"),
1111 tmp->rsrcname, tmp->state);
1112 break;
1113 }
1114 free(tmp->rsrcname);
1115 free(tmp);
1116 }
1117 }
1118
1119 /*
1120 * Selected thread blocking based on event type
1121 */
1122 barrier_t barrier;
1123
1124 /*
1125 * Change barrier state:
1126 * RCMD_INIT - daemon is intializing, only register allowed
1127 * RCMD_NORMAL - normal daemon processing
1128 * RCMD_CLEANUP - cleanup thread is waiting or running
1129 */
1130 int
rcmd_get_state()1131 rcmd_get_state()
1132 {
1133 return (barrier.state);
1134 }
1135
1136 void
rcmd_set_state(int state)1137 rcmd_set_state(int state)
1138 {
1139 /*
1140 * The state transition is as follows:
1141 * INIT --> NORMAL <---> CLEANUP
1142 * The implementation favors the cleanup thread
1143 */
1144
1145 (void) mutex_lock(&barrier.lock);
1146 barrier.state = state;
1147
1148 switch (state) {
1149 case RCMD_CLEANUP:
1150 /*
1151 * Wait for existing threads to exit
1152 */
1153 barrier.wanted++;
1154 while (barrier.thr_count != 0)
1155 (void) cond_wait(&barrier.cv, &barrier.lock);
1156 barrier.wanted--;
1157 barrier.thr_count = -1;
1158 break;
1159
1160 case RCMD_INIT:
1161 case RCMD_NORMAL:
1162 default:
1163 if (barrier.thr_count == -1)
1164 barrier.thr_count = 0;
1165 if (barrier.wanted)
1166 (void) cond_broadcast(&barrier.cv);
1167 break;
1168 }
1169
1170 (void) mutex_unlock(&barrier.lock);
1171 }
1172
1173 /*
1174 * Increment daemon thread count
1175 */
1176 int
rcmd_thr_incr(int cmd)1177 rcmd_thr_incr(int cmd)
1178 {
1179 int seq_num;
1180
1181 (void) mutex_lock(&barrier.lock);
1182 /*
1183 * Set wanted flag
1184 */
1185 barrier.wanted++;
1186
1187 /*
1188 * Wait till it is safe for daemon to perform the operation
1189 *
1190 * NOTE: if a module registers by passing a request to the
1191 * client proccess, we may need to allow register
1192 * to come through during daemon initialization.
1193 */
1194 while (barrier.state != RCMD_NORMAL)
1195 (void) cond_wait(&barrier.cv, &barrier.lock);
1196
1197 if ((cmd == CMD_EVENT) ||
1198 (cmd == CMD_REGISTER) ||
1199 (cmd == CMD_UNREGISTER)) {
1200 /*
1201 * Event passthru and register ops don't need sequence number
1202 */
1203 seq_num = -1;
1204 } else {
1205 /*
1206 * Non register operation gets a sequence number
1207 */
1208 seq_num = get_seq_number();
1209 }
1210 barrier.wanted--;
1211 barrier.thr_count++;
1212 (void) mutex_unlock(&barrier.lock);
1213
1214 if ((cmd == CMD_OFFLINE) ||
1215 (cmd == CMD_SUSPEND) ||
1216 (cmd == CMD_GETINFO)) {
1217 /*
1218 * For these operations, need to ask modules to
1219 * register any new resources that came online.
1220 *
1221 * This is because mount/umount are not instrumented
1222 * to register with rcm before using system resources.
1223 * Certain registration ops may fail during sync, which
1224 * indicates race conditions. This cannot be avoided
1225 * without changing mount/umount.
1226 */
1227 rcmd_db_sync();
1228 }
1229
1230 return (seq_num);
1231 }
1232
1233 /*
1234 * Decrement thread count
1235 */
1236 void
rcmd_thr_decr()1237 rcmd_thr_decr()
1238 {
1239 /*
1240 * Decrement thread count and wake up reload/cleanup thread.
1241 */
1242 (void) mutex_lock(&barrier.lock);
1243 barrier.last_update = time(NULL);
1244 if (--barrier.thr_count == 0)
1245 (void) cond_broadcast(&barrier.cv);
1246 (void) mutex_unlock(&barrier.lock);
1247 }
1248
1249 /*
1250 * Wakeup all waiting threads as a result of SIGHUP
1251 */
1252 static int sighup_received = 0;
1253
1254 void
rcmd_thr_signal()1255 rcmd_thr_signal()
1256 {
1257 (void) mutex_lock(&barrier.lock);
1258 sighup_received = 1;
1259 (void) cond_broadcast(&barrier.cv);
1260 (void) mutex_unlock(&barrier.lock);
1261 }
1262
1263 void
rcmd_start_timer(int timeout)1264 rcmd_start_timer(int timeout)
1265 {
1266 timestruc_t abstime;
1267
1268 if (timeout == 0)
1269 timeout = RCM_DAEMON_TIMEOUT; /* default to 5 minutes */
1270 else
1271 dr_req_list->idle_timeout = timeout; /* persist timeout */
1272
1273 if (timeout > 0) {
1274 abstime.tv_sec = time(NULL) + timeout;
1275 }
1276
1277 (void) mutex_lock(&barrier.lock);
1278 for (;;) {
1279 int idletime;
1280 int is_active;
1281
1282 if (timeout > 0)
1283 (void) cond_timedwait(&barrier.cv, &barrier.lock,
1284 &abstime);
1285 else
1286 (void) cond_wait(&barrier.cv, &barrier.lock);
1287
1288 /*
1289 * If sighup received, change timeout to 0 so the daemon is
1290 * shut down at the first possible moment
1291 */
1292 if (sighup_received)
1293 timeout = 0;
1294
1295 /*
1296 * If timeout is negative, never shutdown the daemon
1297 */
1298 if (timeout < 0)
1299 continue;
1300
1301 /*
1302 * Check for ongoing/pending activity
1303 */
1304 is_active = (barrier.thr_count || barrier.wanted ||
1305 (dr_req_list->n_req != 0));
1306 if (is_active) {
1307 abstime.tv_sec = time(NULL) + timeout;
1308 continue;
1309 }
1310
1311 /*
1312 * If idletime is less than timeout, continue to wait
1313 */
1314 idletime = time(NULL) - barrier.last_update;
1315 if (idletime < timeout) {
1316 abstime.tv_sec = barrier.last_update + timeout;
1317 continue;
1318 }
1319 break;
1320 }
1321
1322 (void) script_main_fini();
1323
1324 rcm_log_message(RCM_INFO, gettext("rcm_daemon is shut down.\n"));
1325 }
1326
1327 /*
1328 * Code related to polling client pid's
1329 * Not declared as static so that we can find this structure easily
1330 * in the core file.
1331 */
1332 struct {
1333 int n_pids;
1334 int n_max_pids;
1335 thread_t poll_tid; /* poll thread id */
1336 int signaled;
1337 pid_t *pids;
1338 int *refcnt;
1339 struct pollfd *fds;
1340 cond_t cv; /* the associated lock is rcm_req_lock */
1341 } polllist;
1342
1343 static int
find_pid_index(pid_t pid)1344 find_pid_index(pid_t pid)
1345 {
1346 int i;
1347
1348 for (i = 0; i < polllist.n_pids; i++) {
1349 if (polllist.pids[i] == pid) {
1350 return (i);
1351 }
1352 }
1353 return (-1);
1354 }
1355
1356 /*
1357 * Resize buffer for new pids
1358 */
1359 static int
get_pid_index()1360 get_pid_index()
1361 {
1362 const int n_chunk = 10;
1363
1364 int n_max;
1365 int index = polllist.n_pids;
1366
1367 if (polllist.n_pids < polllist.n_max_pids) {
1368 polllist.n_pids++;
1369 return (index);
1370 }
1371
1372 if (polllist.n_max_pids == 0) {
1373 n_max = n_chunk;
1374 polllist.pids = s_calloc(n_max, sizeof (pid_t));
1375 polllist.refcnt = s_calloc(n_max, sizeof (int));
1376 polllist.fds = s_calloc(n_max, sizeof (struct pollfd));
1377 } else {
1378 n_max = polllist.n_max_pids + n_chunk;
1379 polllist.pids = s_realloc(polllist.pids,
1380 n_max * sizeof (pid_t));
1381 polllist.refcnt = s_realloc(polllist.refcnt,
1382 n_max * sizeof (int));
1383 polllist.fds = s_realloc(polllist.fds,
1384 n_max * sizeof (struct pollfd));
1385 }
1386 polllist.n_max_pids = n_max;
1387 polllist.n_pids++;
1388 return (index);
1389 }
1390
1391 /*
1392 * rcm_req_lock must be held
1393 */
1394 static void
add_to_polling_list(pid_t pid)1395 add_to_polling_list(pid_t pid)
1396 {
1397 int fd, index;
1398 char procfile[MAXPATHLEN];
1399
1400 if (pid == (pid_t)0)
1401 return;
1402
1403 rcm_log_message(RCM_TRACE1, "add_to_polling_list(%ld)\n", pid);
1404
1405 /*
1406 * Need to stop the poll thread before manipulating the polllist
1407 * since poll thread may possibly be using polllist.fds[] and
1408 * polllist.n_pids. As an optimization, first check if the pid
1409 * is already in the polllist. If it is, there is no need to
1410 * stop the poll thread. Just increment the pid reference count
1411 * and return;
1412 */
1413 index = find_pid_index(pid);
1414 if (index != -1) {
1415 polllist.refcnt[index]++;
1416 return;
1417 }
1418
1419 stop_polling_thread();
1420
1421 /*
1422 * In an attempt to stop the poll thread we may have released
1423 * and reacquired rcm_req_lock. So find the index again.
1424 */
1425 index = find_pid_index(pid);
1426 if (index != -1) {
1427 polllist.refcnt[index]++;
1428 goto done;
1429 }
1430
1431 /*
1432 * Open a /proc file
1433 */
1434 (void) sprintf(procfile, "/proc/%ld/as", pid);
1435 if ((fd = open(procfile, O_RDONLY)) == -1) {
1436 rcm_log_message(RCM_NOTICE, gettext("open(%s): %s\n"),
1437 procfile, strerror(errno));
1438 goto done;
1439 }
1440
1441 /*
1442 * add pid to polllist
1443 */
1444 index = get_pid_index();
1445 polllist.pids[index] = pid;
1446 polllist.refcnt[index] = 1;
1447 polllist.fds[index].fd = fd;
1448 polllist.fds[index].events = 0;
1449 polllist.fds[index].revents = 0;
1450
1451 rcm_log_message(RCM_DEBUG, "add pid %ld at index %ld\n", pid, index);
1452
1453 done:
1454 start_polling_thread();
1455 }
1456
1457 /*
1458 * rcm_req_lock must be held
1459 */
1460 static void
remove_from_polling_list(pid_t pid)1461 remove_from_polling_list(pid_t pid)
1462 {
1463 int i, index;
1464
1465 if (pid == (pid_t)0)
1466 return;
1467
1468 rcm_log_message(RCM_TRACE1, "remove_from_polling_list(%ld)\n", pid);
1469
1470 /*
1471 * Need to stop the poll thread before manipulating the polllist
1472 * since poll thread may possibly be using polllist.fds[] and
1473 * polllist.n_pids. As an optimization, first check the pid
1474 * reference count. If the pid reference count is greater than 1
1475 * there is no need to stop the polling thread.
1476 */
1477
1478 index = find_pid_index(pid);
1479 if (index == -1) {
1480 rcm_log_message(RCM_NOTICE,
1481 gettext("error removing pid %ld from polling list\n"), pid);
1482 return;
1483 }
1484
1485 /*
1486 * decrement the pid refcnt
1487 */
1488 if (polllist.refcnt[index] > 1) {
1489 polllist.refcnt[index]--;
1490 return;
1491 }
1492
1493 stop_polling_thread();
1494
1495 /*
1496 * In an attempt to stop the poll thread we may have released
1497 * and reacquired rcm_req_lock. So find the index again.
1498 */
1499 index = find_pid_index(pid);
1500 if (index == -1) {
1501 rcm_log_message(RCM_NOTICE,
1502 gettext("error removing pid %ld from polling list\n"), pid);
1503 goto done;
1504 }
1505
1506 if (--polllist.refcnt[index] > 0)
1507 goto done;
1508
1509 /*
1510 * refcnt down to zero, delete pid from polling list
1511 */
1512 (void) close(polllist.fds[index].fd);
1513 polllist.n_pids--;
1514
1515 for (i = index; i < polllist.n_pids; i++) {
1516 polllist.pids[i] = polllist.pids[i + 1];
1517 polllist.refcnt[i] = polllist.refcnt[i + 1];
1518 bcopy(&polllist.fds[i + 1], &polllist.fds[i],
1519 sizeof (struct pollfd));
1520 }
1521
1522 rcm_log_message(RCM_DEBUG, "remove pid %ld at index %d\n", pid, index);
1523
1524 done:
1525 start_polling_thread();
1526 }
1527
1528 void
init_poll_thread()1529 init_poll_thread()
1530 {
1531 polllist.poll_tid = (thread_t)-1;
1532 }
1533
1534 void
cleanup_poll_thread()1535 cleanup_poll_thread()
1536 {
1537 (void) mutex_lock(&rcm_req_lock);
1538 if (polllist.poll_tid == thr_self()) {
1539 rcm_log_message(RCM_TRACE2,
1540 "cleanup_poll_thread: n_pids = %d\n", polllist.n_pids);
1541 polllist.poll_tid = (thread_t)-1;
1542 (void) cond_broadcast(&polllist.cv);
1543 }
1544 (void) mutex_unlock(&rcm_req_lock);
1545 }
1546
1547 /*ARGSUSED*/
1548 static void *
pollfunc(void * arg)1549 pollfunc(void *arg)
1550 {
1551 sigset_t mask;
1552
1553 rcm_log_message(RCM_TRACE2, "poll thread started. n_pids = %d\n",
1554 polllist.n_pids);
1555
1556 /*
1557 * Unblock SIGUSR1 to allow polling thread to be killed
1558 */
1559 (void) sigemptyset(&mask);
1560 (void) sigaddset(&mask, SIGUSR1);
1561 (void) thr_sigsetmask(SIG_UNBLOCK, &mask, NULL);
1562
1563 (void) poll(polllist.fds, polllist.n_pids, (time_t)-1);
1564
1565 /*
1566 * block SIGUSR1 to avoid being killed while holding a lock
1567 */
1568 (void) sigemptyset(&mask);
1569 (void) sigaddset(&mask, SIGUSR1);
1570 (void) thr_sigsetmask(SIG_BLOCK, &mask, NULL);
1571
1572 rcm_log_message(RCM_TRACE2, "returned from poll()\n");
1573
1574 cleanup_poll_thread();
1575
1576 (void) mutex_lock(&barrier.lock);
1577 need_cleanup = 1;
1578 (void) cond_broadcast(&barrier.cv);
1579 (void) mutex_unlock(&barrier.lock);
1580
1581 return (NULL);
1582 }
1583
1584 /*
1585 * rcm_req_lock must be held
1586 */
1587 void
start_polling_thread()1588 start_polling_thread()
1589 {
1590 int err;
1591
1592 if (rcmd_get_state() != RCMD_NORMAL)
1593 return;
1594
1595 if (polllist.poll_tid != (thread_t)-1 || polllist.n_pids == 0)
1596 return;
1597
1598 if ((err = thr_create(NULL, 0, pollfunc, NULL, THR_DETACHED,
1599 &polllist.poll_tid)) == 0)
1600 polllist.signaled = 0;
1601 else
1602 rcm_log_message(RCM_ERROR,
1603 gettext("failed to create polling thread: %s\n"),
1604 strerror(err));
1605 }
1606
1607 /*
1608 * rcm_req_lock must be held
1609 */
1610 static void
stop_polling_thread()1611 stop_polling_thread()
1612 {
1613 int err;
1614
1615 while (polllist.poll_tid != (thread_t)-1) {
1616 if (polllist.signaled == 0) {
1617 if ((err = thr_kill(polllist.poll_tid, SIGUSR1)) == 0)
1618 polllist.signaled = 1;
1619 else
1620 /*
1621 * thr_kill shouldn't have failed since the
1622 * poll thread id and the signal are valid.
1623 * So log an error. Since when thr_kill
1624 * fails no signal is sent (as per man page),
1625 * the cond_wait below will wait until the
1626 * the poll thread exits by some other means.
1627 * The poll thread, for example, exits on its
1628 * own when any DR initiator process that it
1629 * is currently polling exits.
1630 */
1631 rcm_log_message(RCM_ERROR,
1632 gettext(
1633 "fail to kill polling thread %d: %s\n"),
1634 polllist.poll_tid, strerror(err));
1635 }
1636 (void) cond_wait(&polllist.cv, &rcm_req_lock);
1637 }
1638 }
1639