xref: /titanic_44/usr/src/cmd/rcm_daemon/common/rcm_impl.c (revision 25e8c5aa2b496d9026e958ac731a610167574f59)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  *
21  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
22  * Use is subject to license terms.
23  */
24 
25 #pragma ident	"%Z%%M%	%I%	%E% SMI"
26 
27 #include <librcm_impl.h>
28 #include "rcm_impl.h"
29 
30 static int query(char **, int, const char *, int, pid_t, uint_t, timespec_t *,
31     int, rcm_info_t **, int *);
32 static void cancel_query(int, const char *, pid_t, uint_t, int);
33 
34 /*
35  * The following ops are invoked when modules initiate librcm calls which
36  * require daemon processing. Cascaded RCM operations must come through
37  * this path.
38  */
39 librcm_ops_t rcm_ops = {
40 	add_resource_client,
41 	remove_resource_client,
42 	get_resource_info,
43 	process_resource_suspend,
44 	notify_resource_resume,
45 	process_resource_offline,
46 	notify_resource_online,
47 	notify_resource_remove,
48 	request_capacity_change,
49 	notify_capacity_change,
50 	notify_resource_event,
51 	get_resource_state
52 };
53 
54 /*
55  * Process a request or a notification on a subtree
56  */
57 /*ARGSUSED2*/
58 static int
common_resource_op(int cmd,char * rsrcname,pid_t pid,uint_t flag,int seq_num,timespec_t * interval,nvlist_t * nvl,rcm_info_t ** info)59 common_resource_op(int cmd, char *rsrcname, pid_t pid, uint_t flag, int seq_num,
60     timespec_t *interval, nvlist_t *nvl, rcm_info_t **info)
61 {
62 	int error;
63 	rsrc_node_t *node;
64 	tree_walk_arg_t arg;
65 
66 	/*
67 	 * Find the node (root of subtree) in the resource tree, invoke
68 	 * appropriate callbacks for all clients hanging off the subtree,
69 	 * and mark the subtree with the appropriate state.
70 	 *
71 	 * NOTE: It's possible the node doesn't exist, which means no RCM
72 	 * consumer registered for the resource. In this case we silently
73 	 * succeed.
74 	 */
75 	error = rsrc_node_find(rsrcname, 0, &node);
76 	if ((error == RCM_SUCCESS) && (node != NULL)) {
77 		arg.flag = flag;
78 		arg.info = info;
79 		arg.seq_num = seq_num;
80 		arg.interval = interval;
81 		arg.nvl = nvl;
82 		arg.cmd = cmd;
83 
84 		if ((cmd == CMD_NOTIFY_CHANGE) ||
85 		    (cmd == CMD_REQUEST_CHANGE) ||
86 		    (cmd == CMD_EVENT)) {
87 			error = rsrc_client_action_list(node->users, cmd, &arg);
88 		} else {
89 			error = rsrc_tree_action(node, cmd, &arg);
90 		}
91 	} else if ((error == RCM_SUCCESS) && (flag & RCM_RETIRE_REQUEST)) {
92 		/*
93 		 * No matching node, so no client. This means there
94 		 * is no constraint (RCM wise) on this retire. Return
95 		 * RCM_NO_CONSTRAINT to indicate this
96 		 */
97 		rcm_log_message(RCM_TRACE1, "No client. Returning "
98 		    "RCM_NO_CONSTRAINT: %s\n", rsrcname);
99 		error = RCM_NO_CONSTRAINT;
100 	}
101 
102 	return (error);
103 }
104 
105 /*
106  * When a resource is removed, notify all clients who registered for this
107  * particular resource.
108  */
109 int
notify_resource_remove(char ** rsrcnames,pid_t pid,uint_t flag,int seq_num,rcm_info_t ** info)110 notify_resource_remove(char **rsrcnames, pid_t pid, uint_t flag, int seq_num,
111     rcm_info_t **info)
112 {
113 	int i;
114 	int error;
115 	int retval = RCM_SUCCESS;
116 
117 	for (i = 0; rsrcnames[i] != NULL; i++) {
118 
119 		rcm_log_message(RCM_TRACE2,
120 		    "notify_resource_remove(%s, %ld, 0x%x, %d)\n", rsrcnames[i],
121 		    pid, flag, seq_num);
122 
123 		/*
124 		 * Mark state as issuing removal notification. Return failure
125 		 * if no DR request for this node exists.
126 		 */
127 		error = dr_req_update(rsrcnames[i], pid, flag,
128 		    RCM_STATE_REMOVING, seq_num, info);
129 		if (error != RCM_SUCCESS) {
130 			retval = error;
131 			continue;
132 		}
133 
134 		error = common_resource_op(CMD_REMOVE, rsrcnames[i], pid, flag,
135 		    seq_num, NULL, NULL, info);
136 
137 		/*
138 		 * delete the request entry from DR list
139 		 */
140 		dr_req_remove(rsrcnames[i], flag);
141 
142 		if (error != RCM_SUCCESS)
143 			retval = error;
144 	}
145 
146 	return (retval);
147 }
148 
149 /*
150  * Notify users that a resource has been resumed
151  */
152 int
notify_resource_resume(char ** rsrcnames,pid_t pid,uint_t flag,int seq_num,rcm_info_t ** info)153 notify_resource_resume(char **rsrcnames, pid_t pid, uint_t flag, int seq_num,
154     rcm_info_t **info)
155 {
156 	int i;
157 	int error;
158 	rcm_info_t *state_info;
159 	rcm_info_tuple_t *state_tuple;
160 	int retval = RCM_SUCCESS;
161 
162 	for (i = 0; rsrcnames[i] != NULL; i++) {
163 
164 		state_info = NULL;
165 		state_tuple = NULL;
166 
167 		/* Check resource state (was resource actually suspended?) */
168 		if (get_resource_state(rsrcnames[i], pid, &state_info) ||
169 		    ((state_tuple = rcm_info_next(state_info, NULL)) == NULL) ||
170 		    (rcm_info_state(state_tuple) == RCM_STATE_SUSPEND))
171 			flag |= RCM_SUSPENDED;
172 		if (state_info)
173 			rcm_free_info(state_info);
174 
175 		rcm_log_message(RCM_TRACE2,
176 		    "notify_resource_resume(%s, %ld, 0x%x, %d)\n",
177 		    rsrcnames[i], pid, flag, seq_num);
178 
179 		/*
180 		 * Mark state as sending resumption notifications
181 		 */
182 		error = dr_req_update(rsrcnames[i], pid, flag,
183 		    RCM_STATE_RESUMING, seq_num, info);
184 		if (error != RCM_SUCCESS) {
185 			retval = error;
186 			continue;
187 		}
188 
189 		error = common_resource_op(CMD_RESUME, rsrcnames[i], pid, flag,
190 		    seq_num, NULL, NULL, info);
191 
192 		dr_req_remove(rsrcnames[i], flag);
193 
194 		if (error != RCM_SUCCESS)
195 			retval = error;
196 	}
197 
198 	return (retval);
199 }
200 
201 /*
202  * Notify users that an offlined device is again available
203  */
204 int
notify_resource_online(char ** rsrcnames,pid_t pid,uint_t flag,int seq_num,rcm_info_t ** info)205 notify_resource_online(char **rsrcnames, pid_t pid, uint_t flag, int seq_num,
206     rcm_info_t **info)
207 {
208 	int i;
209 	int error;
210 	int retval = RCM_SUCCESS;
211 
212 	for (i = 0; rsrcnames[i] != NULL; i++) {
213 
214 		rcm_log_message(RCM_TRACE2,
215 		    "notify_resource_online(%s, %ld, 0x%x, %d)\n",
216 		    rsrcnames[i], pid, flag, seq_num);
217 
218 		/*
219 		 * Mark state as sending onlining notifications
220 		 */
221 		error = dr_req_update(rsrcnames[i], pid, flag,
222 		    RCM_STATE_ONLINING, seq_num, info);
223 		if (error != RCM_SUCCESS) {
224 			retval = error;
225 			continue;
226 		}
227 
228 		error = common_resource_op(CMD_ONLINE, rsrcnames[i], pid, flag,
229 		    seq_num, NULL, NULL, info);
230 
231 		dr_req_remove(rsrcnames[i], flag);
232 
233 		if (error != RCM_SUCCESS)
234 			retval = error;
235 	}
236 
237 	return (retval);
238 }
239 
240 /*
241  * For offline and suspend, need to get the logic correct here. There are
242  * several cases:
243  *
244  * 1. It is a door call and RCM_QUERY is not set:
245  *	run a QUERY; if that succeeds, run the operation.
246  *
247  * 2. It is a door call and RCM_QUERY is set:
248  *	run the QUERY only.
249  *
250  * 3. It is not a door call:
251  *	run the call, but look at the flag to see if the
252  *	lock should be kept.
253  */
254 
255 /*
256  * Request permission to suspend a resource
257  */
258 int
process_resource_suspend(char ** rsrcnames,pid_t pid,uint_t flag,int seq_num,timespec_t * interval,rcm_info_t ** info)259 process_resource_suspend(char **rsrcnames, pid_t pid, uint_t flag, int seq_num,
260     timespec_t *interval, rcm_info_t **info)
261 {
262 	int i;
263 	int error = RCM_SUCCESS;
264 	int is_doorcall = ((seq_num & SEQ_NUM_MASK) == 0);
265 
266 	/*
267 	 * Query the operation first.  The return value of the query indicates
268 	 * if the operation should proceed and be implemented.
269 	 */
270 	if (query(rsrcnames, CMD_SUSPEND, "suspend", RCM_STATE_SUSPEND_QUERYING,
271 	    pid, flag, interval, seq_num, info, &error) == 0) {
272 		return (error);
273 	}
274 
275 	/*
276 	 * Implement the operation.
277 	 */
278 	for (i = 0; rsrcnames[i] != NULL; i++) {
279 
280 		/* Update the lock from a query state to the suspending state */
281 		if ((error = dr_req_update(rsrcnames[i], pid, flag,
282 		    RCM_STATE_SUSPENDING, seq_num, info)) != RCM_SUCCESS) {
283 
284 			rcm_log_message(RCM_DEBUG,
285 			    "suspend %s denied with error %d\n", rsrcnames[i],
286 			    error);
287 
288 			/*
289 			 * When called from a module, don't return EAGAIN.
290 			 * This is to avoid recursion if module always retries.
291 			 */
292 			if (!is_doorcall && error == EAGAIN) {
293 				return (RCM_CONFLICT);
294 			}
295 
296 			return (error);
297 		}
298 
299 		/* Actually suspend the resource */
300 		error = common_resource_op(CMD_SUSPEND, rsrcnames[i], pid,
301 		    flag, seq_num, interval, NULL, info);
302 		if (error != RCM_SUCCESS) {
303 			(void) dr_req_update(rsrcnames[i], pid, flag,
304 			    RCM_STATE_SUSPEND_FAIL, seq_num, info);
305 			rcm_log_message(RCM_DEBUG,
306 			    "suspend tree failed for %s\n", rsrcnames[i]);
307 			return (error);
308 		}
309 
310 		rcm_log_message(RCM_TRACE3, "suspend tree succeeded for %s\n",
311 		    rsrcnames[i]);
312 
313 		/* Update the lock for the successful suspend */
314 		(void) dr_req_update(rsrcnames[i], pid, flag,
315 		    RCM_STATE_SUSPEND, seq_num, info);
316 	}
317 
318 	return (RCM_SUCCESS);
319 }
320 
321 /*
322  * Process a device removal request, reply is needed
323  */
324 int
process_resource_offline(char ** rsrcnames,pid_t pid,uint_t flag,int seq_num,rcm_info_t ** info)325 process_resource_offline(char **rsrcnames, pid_t pid, uint_t flag, int seq_num,
326     rcm_info_t **info)
327 {
328 	int i;
329 	int error = RCM_SUCCESS;
330 	int is_doorcall = ((seq_num & SEQ_NUM_MASK) == 0);
331 
332 	/*
333 	 * Query the operation first.  The return value of the query indicates
334 	 * if the operation should proceed and be implemented.
335 	 */
336 	if (query(rsrcnames, CMD_OFFLINE, "offline", RCM_STATE_OFFLINE_QUERYING,
337 	    pid, flag, NULL, seq_num, info, &error) == 0) {
338 		return (error);
339 	}
340 
341 	/*
342 	 * Implement the operation.
343 	 */
344 	for (i = 0; rsrcnames[i] != NULL; i++) {
345 
346 		error = dr_req_update(rsrcnames[i], pid, flag,
347 		    RCM_STATE_OFFLINING, seq_num, info);
348 		if (error != RCM_SUCCESS) {
349 			rcm_log_message(RCM_DEBUG,
350 			    "offline %s denied with error %d\n", rsrcnames[i],
351 			    error);
352 
353 			/*
354 			 * When called from a module, don't return EAGAIN.
355 			 * This is to avoid recursion if module always retries.
356 			 */
357 			if (!is_doorcall && error == EAGAIN) {
358 				return (RCM_CONFLICT);
359 			}
360 
361 			return (error);
362 		}
363 
364 		/* Actually offline the resource */
365 		error = common_resource_op(CMD_OFFLINE, rsrcnames[i], pid,
366 		    flag, seq_num, NULL, NULL, info);
367 		if (error != RCM_SUCCESS) {
368 			(void) dr_req_update(rsrcnames[i], pid, flag,
369 			    RCM_STATE_OFFLINE_FAIL, seq_num, info);
370 			rcm_log_message(RCM_DEBUG,
371 			    "offline tree failed for %s\n", rsrcnames[i]);
372 			return (error);
373 		}
374 
375 		rcm_log_message(RCM_TRACE3, "offline tree succeeded for %s\n",
376 		    rsrcnames[i]);
377 
378 		/* Update the lock for the successful offline */
379 		(void) dr_req_update(rsrcnames[i], pid, flag,
380 		    RCM_STATE_OFFLINE, seq_num, info);
381 	}
382 
383 	return (RCM_SUCCESS);
384 }
385 
386 /*
387  * Add a resource client who wishes to interpose on DR, events, or capacity.
388  * Reply needed.
389  */
390 int
add_resource_client(char * modname,char * rsrcname,pid_t pid,uint_t flag,rcm_info_t ** infop)391 add_resource_client(char *modname, char *rsrcname, pid_t pid, uint_t flag,
392     rcm_info_t **infop)
393 {
394 	int error = RCM_SUCCESS;
395 	client_t *user = NULL;
396 	rsrc_node_t *node = NULL;
397 	rcm_info_t *info = NULL;
398 
399 	rcm_log_message(RCM_TRACE2,
400 	    "add_resource_client(%s, %s, %ld, 0x%x)\n",
401 	    modname, rsrcname, pid, flag);
402 
403 	if (strcmp(rsrcname, "/") == 0) {
404 		/*
405 		 * No need to register for /  because it will never go away.
406 		 */
407 		rcm_log_message(RCM_INFO, gettext(
408 		    "registering for / by %s has been turned into a no-op\n"),
409 		    modname);
410 		return (RCM_SUCCESS);
411 	}
412 
413 	/*
414 	 * Hold the rcm_req_lock so no dr request may come in while the
415 	 * registration is in progress.
416 	 */
417 	(void) mutex_lock(&rcm_req_lock);
418 
419 	/*
420 	 * Test if the requested registration is a noop, and return EALREADY
421 	 * if it is.
422 	 */
423 	error = rsrc_node_find(rsrcname, RSRC_NODE_CREATE, &node);
424 	if ((error != RCM_SUCCESS) || (node == NULL)) {
425 		(void) mutex_unlock(&rcm_req_lock);
426 		return (RCM_FAILURE);
427 	}
428 
429 	user = rsrc_client_find(modname, pid, &node->users);
430 	if ((user != NULL) &&
431 	    ((user->flag & (flag & RCM_REGISTER_MASK)) != 0)) {
432 		(void) mutex_unlock(&rcm_req_lock);
433 		if ((flag & RCM_REGISTER_DR) &&
434 		    (user->state == RCM_STATE_REMOVE)) {
435 			user->state = RCM_STATE_ONLINE;
436 			return (RCM_SUCCESS);
437 		}
438 		return (EALREADY);
439 	}
440 
441 	/* If adding a new DR registration, reject if the resource is locked */
442 	if (flag & RCM_REGISTER_DR) {
443 
444 		if (rsrc_check_lock_conflicts(rsrcname, flag, LOCK_FOR_USE,
445 		    &info) != RCM_SUCCESS) {
446 			/*
447 			 * The resource is being DR'ed, so return failure
448 			 */
449 			(void) mutex_unlock(&rcm_req_lock);
450 
451 			/*
452 			 * If caller doesn't care about info, free it
453 			 */
454 			if (infop)
455 				*infop = info;
456 			else
457 				rcm_free_info(info);
458 
459 			return (RCM_CONFLICT);
460 		}
461 	}
462 
463 	/* The registration is new and allowable, so add it */
464 	error = rsrc_node_add_user(node, rsrcname, modname, pid, flag);
465 	(void) mutex_unlock(&rcm_req_lock);
466 
467 	return (error);
468 }
469 
470 /*
471  * Remove a resource client, who no longer wishes to interpose on either
472  * DR, events, or capacity.
473  */
474 int
remove_resource_client(char * modname,char * rsrcname,pid_t pid,uint_t flag)475 remove_resource_client(char *modname, char *rsrcname, pid_t pid, uint_t flag)
476 {
477 	int error;
478 	rsrc_node_t *node;
479 
480 	rcm_log_message(RCM_TRACE2,
481 	    "remove_resource_client(%s, %s, %ld, 0x%x)\n",
482 	    modname, rsrcname, pid, flag);
483 
484 	/*
485 	 * Allow resource client to leave anytime, assume client knows what
486 	 * it is trying to do.
487 	 */
488 	error = rsrc_node_find(rsrcname, 0, &node);
489 	if ((error != RCM_SUCCESS) || (node == NULL)) {
490 		rcm_log_message(RCM_WARNING,
491 		    gettext("resource %s not found\n"), rsrcname);
492 		return (ENOENT);
493 	}
494 
495 	return (rsrc_node_remove_user(node, modname, pid, flag));
496 }
497 
498 /*
499  * Reply is needed
500  */
501 int
get_resource_info(char ** rsrcnames,uint_t flag,int seq_num,rcm_info_t ** info)502 get_resource_info(char **rsrcnames, uint_t flag, int seq_num, rcm_info_t **info)
503 {
504 	int rv = RCM_SUCCESS;
505 
506 	if (flag & RCM_DR_OPERATION) {
507 		*info = rsrc_dr_info();
508 	} else if (flag & RCM_MOD_INFO) {
509 		*info = rsrc_mod_info();
510 	} else {
511 		rv = rsrc_usage_info(rsrcnames, flag, seq_num, info);
512 	}
513 
514 	return (rv);
515 }
516 
517 int
notify_resource_event(char * rsrcname,id_t pid,uint_t flag,int seq_num,nvlist_t * event_data,rcm_info_t ** info)518 notify_resource_event(char *rsrcname, id_t pid, uint_t flag, int seq_num,
519     nvlist_t *event_data, rcm_info_t **info)
520 {
521 	int error;
522 
523 	assert(flag == 0);
524 
525 	rcm_log_message(RCM_TRACE2, "notify_resource_event(%s, %ld, 0x%x)\n",
526 	    rsrcname, pid, flag);
527 
528 	error = common_resource_op(CMD_EVENT, rsrcname, pid, flag, seq_num,
529 	    NULL, event_data, info);
530 
531 	return (error);
532 }
533 
534 int
request_capacity_change(char * rsrcname,id_t pid,uint_t flag,int seq_num,nvlist_t * nvl,rcm_info_t ** info)535 request_capacity_change(char *rsrcname, id_t pid, uint_t flag, int seq_num,
536     nvlist_t *nvl, rcm_info_t **info)
537 {
538 	int error;
539 	int is_doorcall = ((seq_num & SEQ_NUM_MASK) == 0);
540 
541 	rcm_log_message(RCM_TRACE2,
542 	    "request_capacity_change(%s, %ld, 0x%x, %d)\n", rsrcname, pid,
543 	    flag, seq_num);
544 
545 	if (is_doorcall || (flag & RCM_QUERY)) {
546 
547 		error = common_resource_op(CMD_REQUEST_CHANGE, rsrcname, pid,
548 		    flag | RCM_QUERY, seq_num, NULL, nvl, info);
549 
550 		if (error != RCM_SUCCESS) {
551 			rcm_log_message(RCM_DEBUG,
552 			    "request state change query denied\n");
553 			return (error);
554 		}
555 	}
556 
557 	if (flag & RCM_QUERY)
558 		return (RCM_SUCCESS);
559 
560 	error = common_resource_op(CMD_REQUEST_CHANGE, rsrcname, pid, flag,
561 	    seq_num, NULL, nvl, info);
562 
563 	if (error != RCM_SUCCESS) {
564 		rcm_log_message(RCM_DEBUG, "request state change failed\n");
565 		return (RCM_FAILURE);
566 	}
567 
568 	rcm_log_message(RCM_TRACE3, "request state change succeeded\n");
569 
570 	return (error);
571 }
572 
573 int
notify_capacity_change(char * rsrcname,id_t pid,uint_t flag,int seq_num,nvlist_t * nvl,rcm_info_t ** info)574 notify_capacity_change(char *rsrcname, id_t pid, uint_t flag, int seq_num,
575     nvlist_t *nvl, rcm_info_t **info)
576 {
577 	int error;
578 
579 	rcm_log_message(RCM_TRACE2,
580 	    "notify_capacity_change(%s, %ld, 0x%x, %d)\n", rsrcname, pid,
581 	    flag, seq_num);
582 
583 	error = common_resource_op(CMD_NOTIFY_CHANGE, rsrcname, pid, flag,
584 	    seq_num, NULL, nvl, info);
585 
586 	if (error != RCM_SUCCESS) {
587 		rcm_log_message(RCM_DEBUG, "notify state change failed\n");
588 		return (RCM_FAILURE);
589 	}
590 
591 	rcm_log_message(RCM_TRACE3, "notify state change succeeded\n");
592 
593 	return (error);
594 }
595 
596 int
get_resource_state(char * rsrcname,pid_t pid,rcm_info_t ** info)597 get_resource_state(char *rsrcname, pid_t pid, rcm_info_t **info)
598 {
599 	int error;
600 	int state;
601 	char *s;
602 	char *resolved;
603 	rcm_info_t *dr_info = NULL;
604 	rcm_info_tuple_t *dr_info_tuple = NULL;
605 	rsrc_node_t *node;
606 	client_t *client;
607 	char *state_info = gettext("State of resource");
608 
609 	rcm_log_message(RCM_TRACE2, "get_resource_state(%s, %ld)\n",
610 	    rsrcname, pid);
611 
612 	/*
613 	 * Check for locks, first.
614 	 */
615 	dr_info = rsrc_dr_info();
616 	if (dr_info) {
617 		state = RCM_STATE_UNKNOWN;
618 		if ((resolved = resolve_name(rsrcname)) == NULL)
619 			return (RCM_FAILURE);
620 		while (dr_info_tuple = rcm_info_next(dr_info, dr_info_tuple)) {
621 			s = (char *)rcm_info_rsrc(dr_info_tuple);
622 			if (s && (strcmp(resolved, s) == 0)) {
623 				state = rcm_info_state(dr_info_tuple);
624 				break;
625 			}
626 		}
627 		free(resolved);
628 		rcm_free_info(dr_info);
629 		if (state != RCM_STATE_UNKNOWN) {
630 			rcm_log_message(RCM_TRACE2,
631 			    "get_resource_state(%s)=%d\n", rsrcname, state);
632 			add_busy_rsrc_to_list(rsrcname, pid, state, 0, NULL,
633 			    (char *)state_info, NULL, NULL, info);
634 			return (RCM_SUCCESS);
635 		}
636 	}
637 
638 	/*
639 	 * No locks, so look for client states in the resource tree.
640 	 *
641 	 * NOTE: It's possible the node doesn't exist, which means no RCM
642 	 * consumer registered for the resource. In this case we silently
643 	 * succeed.
644 	 */
645 	error = rsrc_node_find(rsrcname, 0, &node);
646 	state = RCM_STATE_ONLINE;
647 
648 	if ((error == RCM_SUCCESS) && (node != NULL)) {
649 		for (client = node->users; client; client = client->next) {
650 			if (client->state == RCM_STATE_OFFLINE_FAIL ||
651 			    client->state == RCM_STATE_OFFLINE_QUERY_FAIL ||
652 			    client->state == RCM_STATE_SUSPEND_FAIL ||
653 			    client->state == RCM_STATE_SUSPEND_QUERY_FAIL) {
654 				state = client->state;
655 				break;
656 			}
657 
658 			if (client->state != RCM_STATE_ONLINE &&
659 			    client->state != RCM_STATE_REMOVE)
660 				state = client->state;
661 		}
662 	}
663 
664 	if (error == RCM_SUCCESS) {
665 		rcm_log_message(RCM_TRACE2, "get_resource_state(%s)=%d\n",
666 		    rsrcname, state);
667 		add_busy_rsrc_to_list(rsrcname, pid, state, 0, NULL,
668 		    (char *)state_info, NULL, NULL, info);
669 	}
670 
671 	return (error);
672 }
673 
674 /*
675  * Perform a query of an offline or suspend.
676  *
677  * The return value of this function indicates whether the operation should
678  * be implemented (0 == No, 1 == Yes).  Note that locks and client state
679  * changes will only persist if the caller is going to implement the operation.
680  */
681 static int
query(char ** rsrcnames,int cmd,const char * opname,int querystate,pid_t pid,uint_t flag,timespec_t * interval,int seq_num,rcm_info_t ** info,int * errorp)682 query(char **rsrcnames, int cmd, const char *opname, int querystate, pid_t pid,
683     uint_t flag, timespec_t *interval, int seq_num, rcm_info_t **info,
684     int *errorp)
685 {
686 	int	i;
687 	int	error;
688 	int	final_error;
689 	int	is_doorcall = ((seq_num & SEQ_NUM_MASK) == 0);
690 
691 	/* Only query for door calls, or when the RCM_QUERY flag is set */
692 	if ((is_doorcall == 0) && ((flag & RCM_QUERY) == 0)) {
693 		return (1);
694 	}
695 
696 	/* Lock all the resources.  Fail the query in the case of a conflict. */
697 	for (i = 0; rsrcnames[i] != NULL; i++) {
698 
699 		rcm_log_message(RCM_TRACE2,
700 		    "process_resource_%s(%s, %ld, 0x%x, %d)\n",
701 		    opname, rsrcnames[i], pid, flag, seq_num);
702 
703 		error = dr_req_add(rsrcnames[i], pid, flag, querystate, seq_num,
704 		    NULL, info);
705 
706 		/* The query goes no further if a resource cannot be locked */
707 		if (error != RCM_SUCCESS) {
708 
709 			rcm_log_message(RCM_DEBUG,
710 			    "%s query %s defined with error %d\n",
711 			    opname, rsrcnames[i], error);
712 
713 			/*
714 			 * Replace EAGAIN with RCM_CONFLICT in the case of
715 			 * module callbacks; to avoid modules from trying
716 			 * again infinitely.
717 			 */
718 			if ((is_doorcall == 0) && (error == EAGAIN)) {
719 				error = RCM_CONFLICT;
720 			}
721 
722 			goto finished;
723 		}
724 	}
725 
726 	/*
727 	 * All the resources were locked above, so use common_resource_op()
728 	 * to pass the query on to the clients.  Accumulate the overall error
729 	 * value in 'final_error', before transferring it to 'error' at the end.
730 	 */
731 	for (final_error = RCM_SUCCESS, i = 0; rsrcnames[i] != NULL; i++) {
732 
733 		/* Log the query (for tracing purposes). */
734 		rcm_log_message(RCM_TRACE2, "querying resource %s\n",
735 		    rsrcnames[i]);
736 
737 		/* Query the resource's clients through common_resource_op(). */
738 		error = common_resource_op(cmd, rsrcnames[i], pid,
739 		    flag | RCM_QUERY, seq_num, interval, NULL, info);
740 
741 		/*
742 		 * If a query fails, don't stop iterating through the loop.
743 		 * Just ensure that 'final_error' is set (if not already),
744 		 * log the error, and continue looping.
745 		 *
746 		 * In the case of a user who manually intervenes and retries
747 		 * the operation, this will maximize the extent of the query
748 		 * so that they experience fewer such iterations overall.
749 		 */
750 		if (error != RCM_SUCCESS) {
751 
752 			/* Log each query that failed along the way */
753 			rcm_log_message(RCM_DEBUG, "%s %s query denied\n",
754 			    opname, rsrcnames[i]);
755 
756 			if (final_error != RCM_FAILURE) {
757 				final_error = error;
758 			}
759 		}
760 	}
761 	error = final_error;
762 
763 	/*
764 	 * Tell the calling function not to proceed any further with the
765 	 * implementation phase of the operation if the query failed, or
766 	 * if the user's intent was to only query the operation.
767 	 */
768 finished:
769 	if ((error != RCM_SUCCESS) || ((flag & RCM_QUERY) != 0)) {
770 
771 		/*
772 		 * Since the operation won't be implemented, cancel the
773 		 * query (unlock resources and reverse client state changes).
774 		 *
775 		 * The cancellation routine cleans up everything for the entire
776 		 * operation, and thus it should only be called from the very
777 		 * root of the operation (e.g. when 'is_doorcall' is TRUE).
778 		 */
779 		if (is_doorcall != 0) {
780 			cancel_query(cmd, opname, pid, flag, seq_num);
781 		}
782 
783 		*errorp = error;
784 		return (0);
785 	}
786 
787 	/* Otherwise, tell the caller to proceed with the implementation. */
788 	*errorp = RCM_SUCCESS;
789 	return (1);
790 }
791 
792 /*
793  * Implementation of a query cancellation.
794  *
795  * The full scope of the query is already noted, so the scope of the operation
796  * does not need to be expanded in the same recursive manner that was used for
797  * the query itself.  (Clients don't have to be called to cross namespaces.)
798  * Instead, the locks added to the DR request list during the query are scanned.
799  */
800 static void
cancel_query(int cmd,const char * opname,pid_t pid,uint_t flag,int seq_num)801 cancel_query(int cmd, const char *opname, pid_t pid, uint_t flag, int seq_num)
802 {
803 	char	rsrc[MAXPATHLEN];
804 
805 	/*
806 	 * Find every lock in the DR request list that is a part of this
807 	 * sequence.  Call common_resource_op() with the QUERY_CANCEL flag to
808 	 * cancel each sub-operation, and then remove each lock from the list.
809 	 *
810 	 * The 'rsrc' buffer is required to retrieve the 'device' fields of
811 	 * matching DR request list entries in a way that's multi-thread safe.
812 	 */
813 	while (dr_req_lookup(seq_num, rsrc) == RCM_SUCCESS) {
814 
815 		rcm_log_message(RCM_TRACE2, "%s query %s cancelled\n",
816 		    opname, rsrc);
817 
818 		(void) common_resource_op(cmd, rsrc, pid,
819 		    flag | RCM_QUERY | RCM_QUERY_CANCEL, seq_num, NULL, NULL,
820 		    NULL);
821 
822 		(void) dr_req_remove(rsrc, flag);
823 	}
824 }
825