xref: /illumos-gate/usr/src/cmd/rcm_daemon/common/rcm_impl.c (revision 2a8bcb4efb45d99ac41c94a75c396b362c414f7f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  *
21  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
22  * Use is subject to license terms.
23  */
24 
25 #include <librcm_impl.h>
26 #include "rcm_impl.h"
27 
28 static int query(char **, int, const char *, int, pid_t, uint_t, timespec_t *,
29     int, rcm_info_t **, int *);
30 static void cancel_query(int, const char *, pid_t, uint_t, int);
31 
32 /*
33  * The following ops are invoked when modules initiate librcm calls which
34  * require daemon processing. Cascaded RCM operations must come through
35  * this path.
36  */
37 librcm_ops_t rcm_ops = {
38 	add_resource_client,
39 	remove_resource_client,
40 	get_resource_info,
41 	process_resource_suspend,
42 	notify_resource_resume,
43 	process_resource_offline,
44 	notify_resource_online,
45 	notify_resource_remove,
46 	request_capacity_change,
47 	notify_capacity_change,
48 	notify_resource_event,
49 	get_resource_state
50 };
51 
52 /*
53  * Process a request or a notification on a subtree
54  */
55 /*ARGSUSED2*/
56 static int
common_resource_op(int cmd,char * rsrcname,pid_t pid,uint_t flag,int seq_num,timespec_t * interval,nvlist_t * nvl,rcm_info_t ** info)57 common_resource_op(int cmd, char *rsrcname, pid_t pid, uint_t flag, int seq_num,
58     timespec_t *interval, nvlist_t *nvl, rcm_info_t **info)
59 {
60 	int error;
61 	rsrc_node_t *node;
62 	tree_walk_arg_t arg;
63 
64 	/*
65 	 * Find the node (root of subtree) in the resource tree, invoke
66 	 * appropriate callbacks for all clients hanging off the subtree,
67 	 * and mark the subtree with the appropriate state.
68 	 *
69 	 * NOTE: It's possible the node doesn't exist, which means no RCM
70 	 * consumer registered for the resource. In this case we silently
71 	 * succeed.
72 	 */
73 	error = rsrc_node_find(rsrcname, 0, &node);
74 	if ((error == RCM_SUCCESS) && (node != NULL)) {
75 		arg.flag = flag;
76 		arg.info = info;
77 		arg.seq_num = seq_num;
78 		arg.interval = interval;
79 		arg.nvl = nvl;
80 		arg.cmd = cmd;
81 
82 		if ((cmd == CMD_NOTIFY_CHANGE) ||
83 		    (cmd == CMD_REQUEST_CHANGE) ||
84 		    (cmd == CMD_EVENT)) {
85 			error = rsrc_client_action_list(node->users, cmd, &arg);
86 		} else {
87 			error = rsrc_tree_action(node, cmd, &arg);
88 		}
89 	} else if ((error == RCM_SUCCESS) && (flag & RCM_RETIRE_REQUEST)) {
90 		/*
91 		 * No matching node, so no client. This means there
92 		 * is no constraint (RCM wise) on this retire. Return
93 		 * RCM_NO_CONSTRAINT to indicate this
94 		 */
95 		rcm_log_message(RCM_TRACE1, "No client. Returning "
96 		    "RCM_NO_CONSTRAINT: %s\n", rsrcname);
97 		error = RCM_NO_CONSTRAINT;
98 	}
99 
100 	return (error);
101 }
102 
103 /*
104  * When a resource is removed, notify all clients who registered for this
105  * particular resource.
106  */
107 int
notify_resource_remove(char ** rsrcnames,pid_t pid,uint_t flag,int seq_num,rcm_info_t ** info)108 notify_resource_remove(char **rsrcnames, pid_t pid, uint_t flag, int seq_num,
109     rcm_info_t **info)
110 {
111 	int i;
112 	int error;
113 	int retval = RCM_SUCCESS;
114 
115 	for (i = 0; rsrcnames[i] != NULL; i++) {
116 
117 		rcm_log_message(RCM_TRACE2,
118 		    "notify_resource_remove(%s, %ld, 0x%x, %d)\n", rsrcnames[i],
119 		    pid, flag, seq_num);
120 
121 		/*
122 		 * Mark state as issuing removal notification. Return failure
123 		 * if no DR request for this node exists.
124 		 */
125 		error = dr_req_update(rsrcnames[i], pid, flag,
126 		    RCM_STATE_REMOVING, seq_num, info);
127 		if (error != RCM_SUCCESS) {
128 			retval = error;
129 			continue;
130 		}
131 
132 		error = common_resource_op(CMD_REMOVE, rsrcnames[i], pid, flag,
133 		    seq_num, NULL, NULL, info);
134 
135 		/*
136 		 * delete the request entry from DR list
137 		 */
138 		dr_req_remove(rsrcnames[i], flag);
139 
140 		if (error != RCM_SUCCESS)
141 			retval = error;
142 	}
143 
144 	return (retval);
145 }
146 
147 /*
148  * Notify users that a resource has been resumed
149  */
150 int
notify_resource_resume(char ** rsrcnames,pid_t pid,uint_t flag,int seq_num,rcm_info_t ** info)151 notify_resource_resume(char **rsrcnames, pid_t pid, uint_t flag, int seq_num,
152     rcm_info_t **info)
153 {
154 	int i;
155 	int error;
156 	rcm_info_t *state_info;
157 	rcm_info_tuple_t *state_tuple;
158 	int retval = RCM_SUCCESS;
159 
160 	for (i = 0; rsrcnames[i] != NULL; i++) {
161 
162 		state_info = NULL;
163 		state_tuple = NULL;
164 
165 		/* Check resource state (was resource actually suspended?) */
166 		if (get_resource_state(rsrcnames[i], pid, &state_info) ||
167 		    ((state_tuple = rcm_info_next(state_info, NULL)) == NULL) ||
168 		    (rcm_info_state(state_tuple) == RCM_STATE_SUSPEND))
169 			flag |= RCM_SUSPENDED;
170 		if (state_info)
171 			rcm_free_info(state_info);
172 
173 		rcm_log_message(RCM_TRACE2,
174 		    "notify_resource_resume(%s, %ld, 0x%x, %d)\n",
175 		    rsrcnames[i], pid, flag, seq_num);
176 
177 		/*
178 		 * Mark state as sending resumption notifications
179 		 */
180 		error = dr_req_update(rsrcnames[i], pid, flag,
181 		    RCM_STATE_RESUMING, seq_num, info);
182 		if (error != RCM_SUCCESS) {
183 			retval = error;
184 			continue;
185 		}
186 
187 		error = common_resource_op(CMD_RESUME, rsrcnames[i], pid, flag,
188 		    seq_num, NULL, NULL, info);
189 
190 		dr_req_remove(rsrcnames[i], flag);
191 
192 		if (error != RCM_SUCCESS)
193 			retval = error;
194 	}
195 
196 	return (retval);
197 }
198 
199 /*
200  * Notify users that an offlined device is again available
201  */
202 int
notify_resource_online(char ** rsrcnames,pid_t pid,uint_t flag,int seq_num,rcm_info_t ** info)203 notify_resource_online(char **rsrcnames, pid_t pid, uint_t flag, int seq_num,
204     rcm_info_t **info)
205 {
206 	int i;
207 	int error;
208 	int retval = RCM_SUCCESS;
209 
210 	for (i = 0; rsrcnames[i] != NULL; i++) {
211 
212 		rcm_log_message(RCM_TRACE2,
213 		    "notify_resource_online(%s, %ld, 0x%x, %d)\n",
214 		    rsrcnames[i], pid, flag, seq_num);
215 
216 		/*
217 		 * Mark state as sending onlining notifications
218 		 */
219 		error = dr_req_update(rsrcnames[i], pid, flag,
220 		    RCM_STATE_ONLINING, seq_num, info);
221 		if (error != RCM_SUCCESS) {
222 			retval = error;
223 			continue;
224 		}
225 
226 		error = common_resource_op(CMD_ONLINE, rsrcnames[i], pid, flag,
227 		    seq_num, NULL, NULL, info);
228 
229 		dr_req_remove(rsrcnames[i], flag);
230 
231 		if (error != RCM_SUCCESS)
232 			retval = error;
233 	}
234 
235 	return (retval);
236 }
237 
238 /*
239  * For offline and suspend, need to get the logic correct here. There are
240  * several cases:
241  *
242  * 1. It is a door call and RCM_QUERY is not set:
243  *	run a QUERY; if that succeeds, run the operation.
244  *
245  * 2. It is a door call and RCM_QUERY is set:
246  *	run the QUERY only.
247  *
248  * 3. It is not a door call:
249  *	run the call, but look at the flag to see if the
250  *	lock should be kept.
251  */
252 
253 /*
254  * Request permission to suspend a resource
255  */
256 int
process_resource_suspend(char ** rsrcnames,pid_t pid,uint_t flag,int seq_num,timespec_t * interval,rcm_info_t ** info)257 process_resource_suspend(char **rsrcnames, pid_t pid, uint_t flag, int seq_num,
258     timespec_t *interval, rcm_info_t **info)
259 {
260 	int i;
261 	int error = RCM_SUCCESS;
262 	int is_doorcall = ((seq_num & SEQ_NUM_MASK) == 0);
263 
264 	/*
265 	 * Query the operation first.  The return value of the query indicates
266 	 * if the operation should proceed and be implemented.
267 	 */
268 	if (query(rsrcnames, CMD_SUSPEND, "suspend", RCM_STATE_SUSPEND_QUERYING,
269 	    pid, flag, interval, seq_num, info, &error) == 0) {
270 		return (error);
271 	}
272 
273 	/*
274 	 * Implement the operation.
275 	 */
276 	for (i = 0; rsrcnames[i] != NULL; i++) {
277 
278 		/* Update the lock from a query state to the suspending state */
279 		if ((error = dr_req_update(rsrcnames[i], pid, flag,
280 		    RCM_STATE_SUSPENDING, seq_num, info)) != RCM_SUCCESS) {
281 
282 			rcm_log_message(RCM_DEBUG,
283 			    "suspend %s denied with error %d\n", rsrcnames[i],
284 			    error);
285 
286 			/*
287 			 * When called from a module, don't return EAGAIN.
288 			 * This is to avoid recursion if module always retries.
289 			 */
290 			if (!is_doorcall && error == EAGAIN) {
291 				return (RCM_CONFLICT);
292 			}
293 
294 			return (error);
295 		}
296 
297 		/* Actually suspend the resource */
298 		error = common_resource_op(CMD_SUSPEND, rsrcnames[i], pid,
299 		    flag, seq_num, interval, NULL, info);
300 		if (error != RCM_SUCCESS) {
301 			(void) dr_req_update(rsrcnames[i], pid, flag,
302 			    RCM_STATE_SUSPEND_FAIL, seq_num, info);
303 			rcm_log_message(RCM_DEBUG,
304 			    "suspend tree failed for %s\n", rsrcnames[i]);
305 			return (error);
306 		}
307 
308 		rcm_log_message(RCM_TRACE3, "suspend tree succeeded for %s\n",
309 		    rsrcnames[i]);
310 
311 		/* Update the lock for the successful suspend */
312 		(void) dr_req_update(rsrcnames[i], pid, flag,
313 		    RCM_STATE_SUSPEND, seq_num, info);
314 	}
315 
316 	return (RCM_SUCCESS);
317 }
318 
319 /*
320  * Process a device removal request, reply is needed
321  */
322 int
process_resource_offline(char ** rsrcnames,pid_t pid,uint_t flag,int seq_num,rcm_info_t ** info)323 process_resource_offline(char **rsrcnames, pid_t pid, uint_t flag, int seq_num,
324     rcm_info_t **info)
325 {
326 	int i;
327 	int error = RCM_SUCCESS;
328 	int is_doorcall = ((seq_num & SEQ_NUM_MASK) == 0);
329 
330 	/*
331 	 * Query the operation first.  The return value of the query indicates
332 	 * if the operation should proceed and be implemented.
333 	 */
334 	if (query(rsrcnames, CMD_OFFLINE, "offline", RCM_STATE_OFFLINE_QUERYING,
335 	    pid, flag, NULL, seq_num, info, &error) == 0) {
336 		return (error);
337 	}
338 
339 	/*
340 	 * Implement the operation.
341 	 */
342 	for (i = 0; rsrcnames[i] != NULL; i++) {
343 
344 		error = dr_req_update(rsrcnames[i], pid, flag,
345 		    RCM_STATE_OFFLINING, seq_num, info);
346 		if (error != RCM_SUCCESS) {
347 			rcm_log_message(RCM_DEBUG,
348 			    "offline %s denied with error %d\n", rsrcnames[i],
349 			    error);
350 
351 			/*
352 			 * When called from a module, don't return EAGAIN.
353 			 * This is to avoid recursion if module always retries.
354 			 */
355 			if (!is_doorcall && error == EAGAIN) {
356 				return (RCM_CONFLICT);
357 			}
358 
359 			return (error);
360 		}
361 
362 		/* Actually offline the resource */
363 		error = common_resource_op(CMD_OFFLINE, rsrcnames[i], pid,
364 		    flag, seq_num, NULL, NULL, info);
365 		if (error != RCM_SUCCESS) {
366 			(void) dr_req_update(rsrcnames[i], pid, flag,
367 			    RCM_STATE_OFFLINE_FAIL, seq_num, info);
368 			rcm_log_message(RCM_DEBUG,
369 			    "offline tree failed for %s\n", rsrcnames[i]);
370 			return (error);
371 		}
372 
373 		rcm_log_message(RCM_TRACE3, "offline tree succeeded for %s\n",
374 		    rsrcnames[i]);
375 
376 		/* Update the lock for the successful offline */
377 		(void) dr_req_update(rsrcnames[i], pid, flag,
378 		    RCM_STATE_OFFLINE, seq_num, info);
379 	}
380 
381 	return (RCM_SUCCESS);
382 }
383 
384 /*
385  * Add a resource client who wishes to interpose on DR, events, or capacity.
386  * Reply needed.
387  */
388 int
add_resource_client(char * modname,char * rsrcname,pid_t pid,uint_t flag,rcm_info_t ** infop)389 add_resource_client(char *modname, char *rsrcname, pid_t pid, uint_t flag,
390     rcm_info_t **infop)
391 {
392 	int error = RCM_SUCCESS;
393 	client_t *user = NULL;
394 	rsrc_node_t *node = NULL;
395 	rcm_info_t *info = NULL;
396 
397 	rcm_log_message(RCM_TRACE2,
398 	    "add_resource_client(%s, %s, %ld, 0x%x)\n",
399 	    modname, rsrcname, pid, flag);
400 
401 	if (strcmp(rsrcname, "/") == 0) {
402 		/*
403 		 * No need to register for /  because it will never go away.
404 		 */
405 		rcm_log_message(RCM_INFO, gettext(
406 		    "registering for / by %s has been turned into a no-op\n"),
407 		    modname);
408 		return (RCM_SUCCESS);
409 	}
410 
411 	/*
412 	 * Hold the rcm_req_lock so no dr request may come in while the
413 	 * registration is in progress.
414 	 */
415 	(void) mutex_lock(&rcm_req_lock);
416 
417 	/*
418 	 * Test if the requested registration is a noop, and return EALREADY
419 	 * if it is.
420 	 */
421 	error = rsrc_node_find(rsrcname, RSRC_NODE_CREATE, &node);
422 	if ((error != RCM_SUCCESS) || (node == NULL)) {
423 		(void) mutex_unlock(&rcm_req_lock);
424 		return (RCM_FAILURE);
425 	}
426 
427 	user = rsrc_client_find(modname, pid, &node->users);
428 	if ((user != NULL) &&
429 	    ((user->flag & (flag & RCM_REGISTER_MASK)) != 0)) {
430 		(void) mutex_unlock(&rcm_req_lock);
431 		if ((flag & RCM_REGISTER_DR) &&
432 		    (user->state == RCM_STATE_REMOVE)) {
433 			user->state = RCM_STATE_ONLINE;
434 			return (RCM_SUCCESS);
435 		}
436 		return (EALREADY);
437 	}
438 
439 	/* If adding a new DR registration, reject if the resource is locked */
440 	if (flag & RCM_REGISTER_DR) {
441 
442 		if (rsrc_check_lock_conflicts(rsrcname, flag, LOCK_FOR_USE,
443 		    &info) != RCM_SUCCESS) {
444 			/*
445 			 * The resource is being DR'ed, so return failure
446 			 */
447 			(void) mutex_unlock(&rcm_req_lock);
448 
449 			/*
450 			 * If caller doesn't care about info, free it
451 			 */
452 			if (infop)
453 				*infop = info;
454 			else
455 				rcm_free_info(info);
456 
457 			return (RCM_CONFLICT);
458 		}
459 	}
460 
461 	/* The registration is new and allowable, so add it */
462 	error = rsrc_node_add_user(node, rsrcname, modname, pid, flag);
463 	(void) mutex_unlock(&rcm_req_lock);
464 
465 	return (error);
466 }
467 
468 /*
469  * Remove a resource client, who no longer wishes to interpose on either
470  * DR, events, or capacity.
471  */
472 int
remove_resource_client(char * modname,char * rsrcname,pid_t pid,uint_t flag)473 remove_resource_client(char *modname, char *rsrcname, pid_t pid, uint_t flag)
474 {
475 	int error;
476 	rsrc_node_t *node;
477 
478 	rcm_log_message(RCM_TRACE2,
479 	    "remove_resource_client(%s, %s, %ld, 0x%x)\n",
480 	    modname, rsrcname, pid, flag);
481 
482 	/*
483 	 * Allow resource client to leave anytime, assume client knows what
484 	 * it is trying to do.
485 	 */
486 	error = rsrc_node_find(rsrcname, 0, &node);
487 	if ((error != RCM_SUCCESS) || (node == NULL)) {
488 		rcm_log_message(RCM_WARNING,
489 		    gettext("resource %s not found\n"), rsrcname);
490 		return (ENOENT);
491 	}
492 
493 	return (rsrc_node_remove_user(node, modname, pid, flag));
494 }
495 
496 /*
497  * Reply is needed
498  */
499 int
get_resource_info(char ** rsrcnames,uint_t flag,int seq_num,rcm_info_t ** info)500 get_resource_info(char **rsrcnames, uint_t flag, int seq_num, rcm_info_t **info)
501 {
502 	int rv = RCM_SUCCESS;
503 
504 	if (flag & RCM_DR_OPERATION) {
505 		*info = rsrc_dr_info();
506 	} else if (flag & RCM_MOD_INFO) {
507 		*info = rsrc_mod_info();
508 	} else {
509 		rv = rsrc_usage_info(rsrcnames, flag, seq_num, info);
510 	}
511 
512 	return (rv);
513 }
514 
515 int
notify_resource_event(char * rsrcname,id_t pid,uint_t flag,int seq_num,nvlist_t * event_data,rcm_info_t ** info)516 notify_resource_event(char *rsrcname, id_t pid, uint_t flag, int seq_num,
517     nvlist_t *event_data, rcm_info_t **info)
518 {
519 	int error;
520 
521 	assert(flag == 0);
522 
523 	rcm_log_message(RCM_TRACE2, "notify_resource_event(%s, %ld, 0x%x)\n",
524 	    rsrcname, pid, flag);
525 
526 	error = common_resource_op(CMD_EVENT, rsrcname, pid, flag, seq_num,
527 	    NULL, event_data, info);
528 
529 	return (error);
530 }
531 
532 int
request_capacity_change(char * rsrcname,id_t pid,uint_t flag,int seq_num,nvlist_t * nvl,rcm_info_t ** info)533 request_capacity_change(char *rsrcname, id_t pid, uint_t flag, int seq_num,
534     nvlist_t *nvl, rcm_info_t **info)
535 {
536 	int error;
537 	int is_doorcall = ((seq_num & SEQ_NUM_MASK) == 0);
538 
539 	rcm_log_message(RCM_TRACE2,
540 	    "request_capacity_change(%s, %ld, 0x%x, %d)\n", rsrcname, pid,
541 	    flag, seq_num);
542 
543 	if (is_doorcall || (flag & RCM_QUERY)) {
544 
545 		error = common_resource_op(CMD_REQUEST_CHANGE, rsrcname, pid,
546 		    flag | RCM_QUERY, seq_num, NULL, nvl, info);
547 
548 		if (error != RCM_SUCCESS) {
549 			rcm_log_message(RCM_DEBUG,
550 			    "request state change query denied\n");
551 			return (error);
552 		}
553 	}
554 
555 	if (flag & RCM_QUERY)
556 		return (RCM_SUCCESS);
557 
558 	error = common_resource_op(CMD_REQUEST_CHANGE, rsrcname, pid, flag,
559 	    seq_num, NULL, nvl, info);
560 
561 	if (error != RCM_SUCCESS) {
562 		rcm_log_message(RCM_DEBUG, "request state change failed\n");
563 		return (RCM_FAILURE);
564 	}
565 
566 	rcm_log_message(RCM_TRACE3, "request state change succeeded\n");
567 
568 	return (error);
569 }
570 
571 int
notify_capacity_change(char * rsrcname,id_t pid,uint_t flag,int seq_num,nvlist_t * nvl,rcm_info_t ** info)572 notify_capacity_change(char *rsrcname, id_t pid, uint_t flag, int seq_num,
573     nvlist_t *nvl, rcm_info_t **info)
574 {
575 	int error;
576 
577 	rcm_log_message(RCM_TRACE2,
578 	    "notify_capacity_change(%s, %ld, 0x%x, %d)\n", rsrcname, pid,
579 	    flag, seq_num);
580 
581 	error = common_resource_op(CMD_NOTIFY_CHANGE, rsrcname, pid, flag,
582 	    seq_num, NULL, nvl, info);
583 
584 	if (error != RCM_SUCCESS) {
585 		rcm_log_message(RCM_DEBUG, "notify state change failed\n");
586 		return (RCM_FAILURE);
587 	}
588 
589 	rcm_log_message(RCM_TRACE3, "notify state change succeeded\n");
590 
591 	return (error);
592 }
593 
594 int
get_resource_state(char * rsrcname,pid_t pid,rcm_info_t ** info)595 get_resource_state(char *rsrcname, pid_t pid, rcm_info_t **info)
596 {
597 	int error;
598 	int state;
599 	char *s;
600 	char *resolved;
601 	rcm_info_t *dr_info = NULL;
602 	rcm_info_tuple_t *dr_info_tuple = NULL;
603 	rsrc_node_t *node;
604 	client_t *client;
605 	char *state_info = gettext("State of resource");
606 
607 	rcm_log_message(RCM_TRACE2, "get_resource_state(%s, %ld)\n",
608 	    rsrcname, pid);
609 
610 	/*
611 	 * Check for locks, first.
612 	 */
613 	dr_info = rsrc_dr_info();
614 	if (dr_info) {
615 		state = RCM_STATE_UNKNOWN;
616 		if ((resolved = resolve_name(rsrcname)) == NULL)
617 			return (RCM_FAILURE);
618 		while (dr_info_tuple = rcm_info_next(dr_info, dr_info_tuple)) {
619 			s = (char *)rcm_info_rsrc(dr_info_tuple);
620 			if (s && (strcmp(resolved, s) == 0)) {
621 				state = rcm_info_state(dr_info_tuple);
622 				break;
623 			}
624 		}
625 		free(resolved);
626 		rcm_free_info(dr_info);
627 		if (state != RCM_STATE_UNKNOWN) {
628 			rcm_log_message(RCM_TRACE2,
629 			    "get_resource_state(%s)=%d\n", rsrcname, state);
630 			add_busy_rsrc_to_list(rsrcname, pid, state, 0, NULL,
631 			    (char *)state_info, NULL, NULL, info);
632 			return (RCM_SUCCESS);
633 		}
634 	}
635 
636 	/*
637 	 * No locks, so look for client states in the resource tree.
638 	 *
639 	 * NOTE: It's possible the node doesn't exist, which means no RCM
640 	 * consumer registered for the resource. In this case we silently
641 	 * succeed.
642 	 */
643 	error = rsrc_node_find(rsrcname, 0, &node);
644 	state = RCM_STATE_ONLINE;
645 
646 	if ((error == RCM_SUCCESS) && (node != NULL)) {
647 		for (client = node->users; client; client = client->next) {
648 			if (client->state == RCM_STATE_OFFLINE_FAIL ||
649 			    client->state == RCM_STATE_OFFLINE_QUERY_FAIL ||
650 			    client->state == RCM_STATE_SUSPEND_FAIL ||
651 			    client->state == RCM_STATE_SUSPEND_QUERY_FAIL) {
652 				state = client->state;
653 				break;
654 			}
655 
656 			if (client->state != RCM_STATE_ONLINE &&
657 			    client->state != RCM_STATE_REMOVE)
658 				state = client->state;
659 		}
660 	}
661 
662 	if (error == RCM_SUCCESS) {
663 		rcm_log_message(RCM_TRACE2, "get_resource_state(%s)=%d\n",
664 		    rsrcname, state);
665 		add_busy_rsrc_to_list(rsrcname, pid, state, 0, NULL,
666 		    (char *)state_info, NULL, NULL, info);
667 	}
668 
669 	return (error);
670 }
671 
672 /*
673  * Perform a query of an offline or suspend.
674  *
675  * The return value of this function indicates whether the operation should
676  * be implemented (0 == No, 1 == Yes).  Note that locks and client state
677  * changes will only persist if the caller is going to implement the operation.
678  */
679 static int
query(char ** rsrcnames,int cmd,const char * opname,int querystate,pid_t pid,uint_t flag,timespec_t * interval,int seq_num,rcm_info_t ** info,int * errorp)680 query(char **rsrcnames, int cmd, const char *opname, int querystate, pid_t pid,
681     uint_t flag, timespec_t *interval, int seq_num, rcm_info_t **info,
682     int *errorp)
683 {
684 	int	i;
685 	int	error;
686 	int	final_error;
687 	int	is_doorcall = ((seq_num & SEQ_NUM_MASK) == 0);
688 
689 	/* Only query for door calls, or when the RCM_QUERY flag is set */
690 	if ((is_doorcall == 0) && ((flag & RCM_QUERY) == 0)) {
691 		return (1);
692 	}
693 
694 	/* Lock all the resources.  Fail the query in the case of a conflict. */
695 	for (i = 0; rsrcnames[i] != NULL; i++) {
696 
697 		rcm_log_message(RCM_TRACE2,
698 		    "process_resource_%s(%s, %ld, 0x%x, %d)\n",
699 		    opname, rsrcnames[i], pid, flag, seq_num);
700 
701 		error = dr_req_add(rsrcnames[i], pid, flag, querystate, seq_num,
702 		    NULL, info);
703 
704 		/* The query goes no further if a resource cannot be locked */
705 		if (error != RCM_SUCCESS) {
706 
707 			rcm_log_message(RCM_DEBUG,
708 			    "%s query %s defined with error %d\n",
709 			    opname, rsrcnames[i], error);
710 
711 			/*
712 			 * Replace EAGAIN with RCM_CONFLICT in the case of
713 			 * module callbacks; to avoid modules from trying
714 			 * again infinitely.
715 			 */
716 			if ((is_doorcall == 0) && (error == EAGAIN)) {
717 				error = RCM_CONFLICT;
718 			}
719 
720 			goto finished;
721 		}
722 	}
723 
724 	/*
725 	 * All the resources were locked above, so use common_resource_op()
726 	 * to pass the query on to the clients.  Accumulate the overall error
727 	 * value in 'final_error', before transferring it to 'error' at the end.
728 	 */
729 	for (final_error = RCM_SUCCESS, i = 0; rsrcnames[i] != NULL; i++) {
730 
731 		/* Log the query (for tracing purposes). */
732 		rcm_log_message(RCM_TRACE2, "querying resource %s\n",
733 		    rsrcnames[i]);
734 
735 		/* Query the resource's clients through common_resource_op(). */
736 		error = common_resource_op(cmd, rsrcnames[i], pid,
737 		    flag | RCM_QUERY, seq_num, interval, NULL, info);
738 
739 		/*
740 		 * If a query fails, don't stop iterating through the loop.
741 		 * Just ensure that 'final_error' is set (if not already),
742 		 * log the error, and continue looping.
743 		 *
744 		 * In the case of a user who manually intervenes and retries
745 		 * the operation, this will maximize the extent of the query
746 		 * so that they experience fewer such iterations overall.
747 		 */
748 		if (error != RCM_SUCCESS) {
749 
750 			/* Log each query that failed along the way */
751 			rcm_log_message(RCM_DEBUG, "%s %s query denied\n",
752 			    opname, rsrcnames[i]);
753 
754 			if (final_error != RCM_FAILURE) {
755 				final_error = error;
756 			}
757 		}
758 	}
759 	error = final_error;
760 
761 	/*
762 	 * Tell the calling function not to proceed any further with the
763 	 * implementation phase of the operation if the query failed, or
764 	 * if the user's intent was to only query the operation.
765 	 */
766 finished:
767 	if ((error != RCM_SUCCESS) || ((flag & RCM_QUERY) != 0)) {
768 
769 		/*
770 		 * Since the operation won't be implemented, cancel the
771 		 * query (unlock resources and reverse client state changes).
772 		 *
773 		 * The cancellation routine cleans up everything for the entire
774 		 * operation, and thus it should only be called from the very
775 		 * root of the operation (e.g. when 'is_doorcall' is TRUE).
776 		 */
777 		if (is_doorcall != 0) {
778 			cancel_query(cmd, opname, pid, flag, seq_num);
779 		}
780 
781 		*errorp = error;
782 		return (0);
783 	}
784 
785 	/* Otherwise, tell the caller to proceed with the implementation. */
786 	*errorp = RCM_SUCCESS;
787 	return (1);
788 }
789 
790 /*
791  * Implementation of a query cancellation.
792  *
793  * The full scope of the query is already noted, so the scope of the operation
794  * does not need to be expanded in the same recursive manner that was used for
795  * the query itself.  (Clients don't have to be called to cross namespaces.)
796  * Instead, the locks added to the DR request list during the query are scanned.
797  */
798 static void
cancel_query(int cmd,const char * opname,pid_t pid,uint_t flag,int seq_num)799 cancel_query(int cmd, const char *opname, pid_t pid, uint_t flag, int seq_num)
800 {
801 	char	rsrc[MAXPATHLEN];
802 
803 	/*
804 	 * Find every lock in the DR request list that is a part of this
805 	 * sequence.  Call common_resource_op() with the QUERY_CANCEL flag to
806 	 * cancel each sub-operation, and then remove each lock from the list.
807 	 *
808 	 * The 'rsrc' buffer is required to retrieve the 'device' fields of
809 	 * matching DR request list entries in a way that's multi-thread safe.
810 	 */
811 	while (dr_req_lookup(seq_num, rsrc) == RCM_SUCCESS) {
812 
813 		rcm_log_message(RCM_TRACE2, "%s query %s cancelled\n",
814 		    opname, rsrc);
815 
816 		(void) common_resource_op(cmd, rsrc, pid,
817 		    flag | RCM_QUERY | RCM_QUERY_CANCEL, seq_num, NULL, NULL,
818 		    NULL);
819 
820 		(void) dr_req_remove(rsrc, flag);
821 	}
822 }
823