xref: /titanic_52/usr/src/cmd/rcm_daemon/common/mpxio_rcm.c (revision 9acbbeaf2a1ffe5c14b244867d427714fab43c5c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * RCM module supporting multiplexed I/O controllers (MPxIO).
31  */
32 #include <stdlib.h>
33 #include <stdarg.h>
34 #include <unistd.h>
35 #include <assert.h>
36 #include <syslog.h>
37 #include <string.h>
38 #include <synch.h>
39 #include <libintl.h>
40 #include <locale.h>
41 #include <ctype.h>
42 #include <errno.h>
43 #include <libdevinfo.h>
44 #include <sys/types.h>
45 #include "rcm_module.h"
46 
47 #define	MPXIO_PROP_NAME		"mpxio-component"
48 #define	MPXIO_PROP_CLIENT	"client"
49 
50 #define	CMD_GETINFO		0
51 #define	CMD_OFFLINE		1
52 #define	CMD_ONLINE		2
53 #define	CMD_REMOVE		3
54 
55 #define	CACHE_NEW		0
56 #define	CACHE_REFERENCED	1
57 #define	CACHE_STALE		2
58 
59 #define	MPXIO_MSG_CACHEFAIL	gettext("Internal analysis failure.")
60 #define	MPXIO_MSG_LASTPATH	gettext("Last path to busy resources.")
61 #define	MPXIO_MSG_USAGE		gettext("SCSI Multipathing PHCI (%s)")
62 #define	MPXIO_MSG_USAGEUNKNOWN	gettext("SCSI Multipathing PHCI (<unknown>)")
63 
64 typedef struct {
65 	char *path;
66 	di_path_state_t state;
67 } phci_t;
68 
69 typedef struct phci_list {
70 	phci_t phci;
71 	int referenced;
72 	struct phci_list *next;
73 } phci_list_t;
74 
75 typedef struct group {
76 	int offline;
77 	int nphcis;
78 	int nclients;
79 	phci_t *phcis;
80 	char **clients;
81 	struct group *next;
82 } group_t;
83 
84 static int mpxio_register(rcm_handle_t *);
85 static int mpxio_unregister(rcm_handle_t *);
86 static int mpxio_getinfo(rcm_handle_t *, char *, id_t, uint_t, char **, char **,
87     nvlist_t *, rcm_info_t **);
88 static int mpxio_suspend(rcm_handle_t *, char *, id_t, timespec_t *, uint_t,
89     char **, rcm_info_t **);
90 static int mpxio_resume(rcm_handle_t *, char *, id_t, uint_t, char **,
91     rcm_info_t **);
92 static int mpxio_offline(rcm_handle_t *, char *, id_t, uint_t, char **,
93     rcm_info_t **);
94 static int mpxio_online(rcm_handle_t *, char *, id_t, uint_t, char **,
95     rcm_info_t **);
96 static int mpxio_remove(rcm_handle_t *, char *, id_t, uint_t, char **,
97     rcm_info_t **);
98 static int get_nclients(di_node_t, void *);
99 static int build_groups(di_node_t, void *);
100 static void refresh_regs(rcm_handle_t *);
101 static int get_affected_clients(rcm_handle_t *, char *, int, int, char ***);
102 static int detect_client_change(rcm_handle_t *, int, int, group_t *, char *);
103 static int merge_clients(int *, char ***, group_t *);
104 static phci_list_t *lookup_phci(char *);
105 static int is_client(di_node_t);
106 static char *get_rsrcname(di_node_t);
107 static char *s_state(di_path_state_t);
108 static int compare_phci(const void *, const void *);
109 static void free_grouplist();
110 static void free_group(group_t *);
111 static void free_clients(int, char **);
112 static void free_phcis(int, phci_t *);
113 
114 static struct rcm_mod_ops mpxio_ops =
115 {
116 	RCM_MOD_OPS_VERSION,
117 	mpxio_register,
118 	mpxio_unregister,
119 	mpxio_getinfo,
120 	mpxio_suspend,
121 	mpxio_resume,
122 	mpxio_offline,
123 	mpxio_online,
124 	mpxio_remove,
125 	NULL,
126 	NULL,
127 	NULL
128 };
129 
130 static group_t *group_list;
131 static phci_list_t *reg_list;
132 static mutex_t mpxio_lock;
133 
134 extern int errno;
135 
136 /*
137  * Return the mod-ops vector for initialization.
138  */
139 struct rcm_mod_ops *
140 rcm_mod_init()
141 {
142 	rcm_log_message(RCM_TRACE1, "MPXIO: rcm_mod_init()\n");
143 
144 	return (&mpxio_ops);
145 }
146 
147 /*
148  * Return name and version number for mod_info.
149  */
150 const char *
151 rcm_mod_info()
152 {
153 	rcm_log_message(RCM_TRACE1, "MPXIO: rcm_mod_info()\n");
154 
155 	return (gettext("RCM MPxIO module %I%"));
156 }
157 
158 /*
159  * Destroy the cache and mutex lock when being unloaded.
160  */
161 int
162 rcm_mod_fini()
163 {
164 	phci_list_t *reg;
165 	phci_list_t *next;
166 
167 	rcm_log_message(RCM_TRACE1, "MPXIO: rcm_mod_fini()\n");
168 
169 	/* Free the cache of MPxIO group information */
170 	free_grouplist();
171 
172 	/* Free the cache of registrants */
173 	reg = reg_list;
174 	while (reg) {
175 		next = reg->next;
176 		free(reg->phci.path);
177 		free(reg);
178 		reg = next;
179 	}
180 
181 	/* Destroy the mutex for locking the caches */
182 	(void) mutex_destroy(&mpxio_lock);
183 
184 	return (RCM_SUCCESS);
185 }
186 
187 /*
188  * During each register callback: totally rebuild the group list from a new
189  * libdevinfo snapshot, and then update the registrants.
190  */
191 static int
192 mpxio_register(rcm_handle_t *hdl)
193 {
194 	int nclients = 0;
195 	di_node_t devroot;
196 
197 	rcm_log_message(RCM_TRACE1, "MPXIO: register()\n");
198 
199 	(void) mutex_lock(&mpxio_lock);
200 
201 	/* Destroy the previous group list */
202 	free_grouplist();
203 
204 	/* Get a current libdevinfo snapshot */
205 	if ((devroot = di_init("/", DINFOCPYALL | DINFOPATH)) == DI_NODE_NIL) {
206 		rcm_log_message(RCM_ERROR,
207 		    "MPXIO: libdevinfo initialization failed (%s).\n",
208 		    strerror(errno));
209 		(void) mutex_unlock(&mpxio_lock);
210 		return (RCM_FAILURE);
211 	}
212 
213 	/*
214 	 * First count the total number of clients.  This'll be a useful
215 	 * upper bound when allocating client arrays within each group.
216 	 */
217 	(void) di_walk_node(devroot, DI_WALK_CLDFIRST, &nclients, get_nclients);
218 
219 	rcm_log_message(RCM_TRACE2, gettext("MPXIO: found %d clients.\n"),
220 	    nclients);
221 
222 	/*
223 	 * Then walk the libdevinfo snapshot, building up the new group list
224 	 * along the way.  Pass in the total number of clients (from above) to
225 	 * assist in group construction.
226 	 */
227 	(void) di_walk_node(devroot, DI_WALK_CLDFIRST, &nclients, build_groups);
228 
229 	/* Now with a new group list constructed, refresh the registrants */
230 	refresh_regs(hdl);
231 
232 	/* Free the libdevinfo snapshot */
233 	di_fini(devroot);
234 
235 	(void) mutex_unlock(&mpxio_lock);
236 
237 	return (0);
238 }
239 
240 /*
241  * Unregister all PHCIs and mark the whole registrants list as stale.
242  */
243 static int
244 mpxio_unregister(rcm_handle_t *hdl)
245 {
246 	phci_list_t *reg;
247 
248 	rcm_log_message(RCM_TRACE1, "MPXIO: unregister()\n");
249 
250 	(void) mutex_lock(&mpxio_lock);
251 
252 	for (reg = reg_list; reg != NULL; reg = reg->next) {
253 		(void) rcm_unregister_interest(hdl, reg->phci.path, 0);
254 		reg->referenced = CACHE_STALE;
255 	}
256 
257 	(void) mutex_unlock(&mpxio_lock);
258 
259 	return (RCM_SUCCESS);
260 }
261 
262 /*
263  * To return usage information, just lookup the PHCI in the cache and return
264  * a string identifying that it's a PHCI and describing its cached MPxIO state.
265  * Recurse with the cached list of disks if dependents are to be included.
266  */
267 static int
268 mpxio_getinfo(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags,
269     char **infostr, char **errstr, nvlist_t *props, rcm_info_t **infop)
270 {
271 	size_t len;
272 	int rv = RCM_SUCCESS;
273 	char *buf = NULL;
274 	char **clients = NULL;
275 	phci_list_t *reg;
276 	char c;
277 
278 	rcm_log_message(RCM_TRACE1, "MPXIO: getinfo(%s)\n", rsrc);
279 
280 	*infostr = NULL;
281 	*errstr = NULL;
282 
283 	(void) mutex_lock(&mpxio_lock);
284 
285 	if ((reg = lookup_phci(rsrc)) == NULL) {
286 		*errstr = strdup(MPXIO_MSG_CACHEFAIL);
287 		(void) mutex_unlock(&mpxio_lock);
288 		return (RCM_FAILURE);
289 	}
290 
291 	len = snprintf(&c, 1, MPXIO_MSG_USAGE, s_state(reg->phci.state));
292 	buf = calloc(len + 1, sizeof (char));
293 	if ((buf == NULL) || (snprintf(buf, len + 1, MPXIO_MSG_USAGE,
294 	    s_state(reg->phci.state)) > len + 1)) {
295 		*infostr = strdup(MPXIO_MSG_USAGEUNKNOWN);
296 		*errstr = strdup(gettext("Cannot construct usage string."));
297 		(void) mutex_unlock(&mpxio_lock);
298 		if (buf)
299 			free(buf);
300 		return (RCM_FAILURE);
301 	}
302 	*infostr = buf;
303 
304 	if (flags & RCM_INCLUDE_DEPENDENT) {
305 		rcm_log_message(RCM_TRACE2, "MPXIO: getting clients\n");
306 		if (get_affected_clients(hdl, rsrc, CMD_GETINFO, flags,
307 		    &clients) < 0) {
308 			*errstr = strdup(gettext("Cannot lookup clients."));
309 			(void) mutex_unlock(&mpxio_lock);
310 			return (RCM_FAILURE);
311 		}
312 		if (clients) {
313 			rv = rcm_get_info_list(hdl, clients, flags, infop);
314 			free(clients);
315 		} else {
316 			rcm_log_message(RCM_TRACE2, "MPXIO: none found\n");
317 		}
318 	}
319 
320 	(void) mutex_unlock(&mpxio_lock);
321 	return (rv);
322 }
323 
324 /*
325  * Nothing is implemented for suspend operations.
326  */
327 static int
328 mpxio_suspend(rcm_handle_t *hdl, char *rsrc, id_t id, timespec_t *interval,
329     uint_t flags, char **errstr, rcm_info_t **infop)
330 {
331 	rcm_log_message(RCM_TRACE1, "MPXIO: suspend(%s)\n", rsrc);
332 
333 	return (RCM_SUCCESS);
334 }
335 
336 /*
337  * Nothing is implemented for resume operations.
338  */
339 static int
340 mpxio_resume(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags,
341     char **errstr, rcm_info_t **infop)
342 {
343 	rcm_log_message(RCM_TRACE1, "MPXIO: resume(%s)\n", rsrc);
344 
345 	return (RCM_SUCCESS);
346 }
347 
348 /*
349  * MPxIO has no policy against offlining.  If disks will be affected, then
350  * base the return value for this request on the results of offlining the
351  * list of disks.  Otherwise succeed.
352  */
353 static int
354 mpxio_offline(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags,
355     char **errstr, rcm_info_t **infop)
356 {
357 	char **clients = NULL;
358 	int rv = RCM_SUCCESS;
359 
360 	rcm_log_message(RCM_TRACE1, "MPXIO: offline(%s)\n", rsrc);
361 
362 	(void) mutex_lock(&mpxio_lock);
363 
364 	if (get_affected_clients(hdl, rsrc, CMD_OFFLINE, flags, &clients) < 0) {
365 		*errstr = strdup(gettext("Cannot lookup clients."));
366 		(void) mutex_unlock(&mpxio_lock);
367 		return (RCM_FAILURE);
368 	}
369 
370 	if (clients) {
371 		rv = rcm_request_offline_list(hdl, clients, flags, infop);
372 		if (rv != RCM_SUCCESS)
373 			*errstr = strdup(MPXIO_MSG_LASTPATH);
374 		free(clients);
375 	}
376 
377 	(void) mutex_unlock(&mpxio_lock);
378 
379 	return (rv);
380 }
381 
382 /*
383  * If disks are affected, then they are probably offline and we need to
384  * propagate this online notification to them.
385  */
386 static int
387 mpxio_online(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags,
388     char **errstr, rcm_info_t **infop)
389 {
390 	char **clients;
391 	int rv = RCM_SUCCESS;
392 
393 	rcm_log_message(RCM_TRACE1, "MPXIO: online(%s)\n", rsrc);
394 
395 	(void) mutex_lock(&mpxio_lock);
396 
397 	if (get_affected_clients(hdl, rsrc, CMD_ONLINE, flags, &clients) < 0) {
398 		*errstr = strdup(gettext("Cannot lookup clients."));
399 		(void) mutex_unlock(&mpxio_lock);
400 		return (RCM_FAILURE);
401 	}
402 
403 	if (clients) {
404 		rv = rcm_notify_online_list(hdl, clients, flags, infop);
405 		free(clients);
406 	}
407 
408 	(void) mutex_unlock(&mpxio_lock);
409 
410 	return (rv);
411 }
412 
413 /*
414  * If clients are affected, then they are probably offline and we need to
415  * propagate this removal notification to them.  We can also remove the
416  * cache entry for this PHCI.  If that leaves its group empty, then the
417  * group will be removed during the next register callback.
418  */
419 static int
420 mpxio_remove(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags,
421     char **errstr, rcm_info_t **infop)
422 {
423 	char **clients;
424 	int rv = RCM_SUCCESS;
425 
426 	rcm_log_message(RCM_TRACE1, "MPXIO: remove(%s)\n", rsrc);
427 
428 	(void) mutex_lock(&mpxio_lock);
429 
430 	if (get_affected_clients(hdl, rsrc, CMD_REMOVE, flags, &clients) < 0) {
431 		*errstr = strdup(gettext("Cannot lookup clients."));
432 		(void) mutex_unlock(&mpxio_lock);
433 		return (RCM_FAILURE);
434 	}
435 
436 	if (clients) {
437 		rv = rcm_notify_remove_list(hdl, clients, flags, infop);
438 		free(clients);
439 	}
440 
441 	(void) mutex_unlock(&mpxio_lock);
442 
443 	return (rv);
444 }
445 
446 
447 /*
448  * Returns a string representation of a given libdevinfo path state.
449  */
450 static char *
451 s_state(di_path_state_t state)
452 {
453 	switch (state) {
454 	case DI_PATH_STATE_ONLINE:
455 		return ("online");
456 	case DI_PATH_STATE_OFFLINE:
457 		return ("offline");
458 	case DI_PATH_STATE_STANDBY:
459 		return ("standby");
460 	case DI_PATH_STATE_FAULT:
461 		return ("faulted");
462 	default:
463 		return ("<unknown>");
464 	}
465 }
466 
467 static int
468 get_affected_clients(rcm_handle_t *hdl, char *rsrc, int cmd, int flags,
469     char ***clientsp)
470 {
471 	int nclients = 0;
472 	phci_t phci;
473 	group_t *group;
474 	char **clients = NULL;
475 
476 	/* Build a dummy phci_t for use with bsearch(). */
477 	phci.path = rsrc;
478 
479 	/* Analyze the effects upon each group. */
480 	for (group = group_list; group != NULL; group = group->next) {
481 
482 		/* If the PHCI isn't in the group, then no effects.  Skip. */
483 		if (bsearch(&phci, group->phcis, group->nphcis, sizeof (phci_t),
484 		    compare_phci) == NULL)
485 			continue;
486 
487 		/*
488 		 * Merge in the clients.  All clients are merged in for getinfo
489 		 * operations.  Otherwise it's contingent upon a state change
490 		 * being transferred to the clients as a result of changing
491 		 * the PHCI's state.
492 		 */
493 		if ((cmd == CMD_GETINFO) ||
494 		    detect_client_change(hdl, cmd, flags, group, rsrc)) {
495 			if (merge_clients(&nclients, &clients, group) < 0) {
496 				free_clients(nclients, clients);
497 				return (-1);
498 			}
499 		}
500 	}
501 
502 	/* Return the array of affected disks */
503 	*clientsp = clients;
504 	return (0);
505 }
506 
507 /*
508  * Iterates through the members of a PHCI list, returning the entry
509  * corresponding to the named PHCI resource.  Returns NULL when the lookup
510  * fails.
511  */
512 static phci_list_t *
513 lookup_phci(char *rsrc)
514 {
515 	phci_list_t *reg;
516 
517 	for (reg = reg_list; reg != NULL; reg = reg->next) {
518 		if (strcmp(reg->phci.path, rsrc) == 0)
519 			return (reg);
520 	}
521 
522 	return (NULL);
523 }
524 
525 /*
526  * Tests whether or not an operation on a specific PHCI resource would affect
527  * the array of client devices attached to the PHCI's MPxIO group.
528  *
529  * Returns: 1 if clients would be affected, 0 if not.
530  */
531 static int
532 detect_client_change(rcm_handle_t *hdl, int cmd, int flags, group_t *group,
533     char *rsrc)
534 {
535 	int i;
536 	int state;
537 
538 	/*
539 	 * Perform a full set analysis on the set of redundant PHCIs.  When
540 	 * there are no unaffected and online PHCIs, then changing the state
541 	 * of the named PHCI results in a client state change.
542 	 */
543 	for (i = 0; i < group->nphcis; i++) {
544 
545 		/* Filter the named resource out of the analysis */
546 		if (strcmp(group->phcis[i].path, rsrc) == 0)
547 			continue;
548 
549 		/*
550 		 * If we find a path that's in the ONLINE or STANDBY state
551 		 * that would be left over in the system after completing
552 		 * whatever DR or hotplugging operation is in progress, then
553 		 * return a 0.
554 		 */
555 		if ((group->phcis[i].state == DI_PATH_STATE_ONLINE) ||
556 		    (group->phcis[i].state == DI_PATH_STATE_STANDBY)) {
557 			if (rcm_get_rsrcstate(hdl, group->phcis[i].path, &state)
558 			    != RCM_SUCCESS) {
559 				rcm_log_message(RCM_ERROR,
560 				    "MPXIO: Failed to query resource state\n");
561 				continue;
562 			}
563 			rcm_log_message(RCM_TRACE2, "MPXIO: state of %s: %d\n",
564 			    group->phcis[i].path, state);
565 			if (state == RCM_STATE_ONLINE) {
566 				return (0);
567 			}
568 		}
569 	}
570 
571 	/*
572 	 * The analysis above didn't find a redundant path to take over.  So
573 	 * report that the state of the client resources will change.
574 	 */
575 	return (1);
576 }
577 
578 /*
579  * Merges the client disks connected to a particular MPxIO group in with a
580  * previous array of disk clients.  The result is to adjust the 'nclients'
581  * value with the new count of disks in the array, and to adjust the 'disks'
582  * value to be a larger array of disks including its original contents along
583  * with the current group's contents merged in.
584  */
585 static int
586 merge_clients(int *nclients, char ***clientsp, group_t *group)
587 {
588 	int i;
589 	int old_nclients;
590 	char **clients_new;
591 
592 	if (group->nclients) {
593 		old_nclients = *nclients;
594 		*nclients += group->nclients;
595 		clients_new = realloc(*clientsp,
596 		    ((*nclients) + 1) * sizeof (char *));
597 		if (clients_new == NULL) {
598 			rcm_log_message(RCM_ERROR,
599 			    "MPXIO: cannot reallocate client array (%s).\n",
600 			    strerror(errno));
601 			return (-1);
602 		}
603 		for (i = old_nclients; i < (*nclients); i++) {
604 			/*
605 			 * Don't allocate space for individual disks in the
606 			 * merged list.  Just make references to the previously
607 			 * allocated strings in the group_t structs themselves.
608 			 */
609 			clients_new[i] = group->clients[i - old_nclients];
610 		}
611 		clients_new[(*nclients)] = NULL;
612 		*clientsp = clients_new;
613 	}
614 
615 	return (0);
616 }
617 
618 /*
619  * A libdevinfo di_walk_node() callback.  It's passed an integer pointer as an
620  * argument, and it increments the integer each time it encounters an MPxIO
621  * client.  By initializing the integer to zero and doing a libdevinfo walk with
622  * this function, the total count of MPxIO clients in the system can be found.
623  */
624 static int
625 get_nclients(di_node_t dinode, void *arg)
626 {
627 	int *nclients = arg;
628 
629 	if (is_client(dinode))
630 		(*nclients)++;
631 
632 	return (DI_WALK_CONTINUE);
633 }
634 
635 /*
636  * Tests a libdevinfo node to determine if it's an MPxIO client.
637  *
638  * Returns: non-zero for true, 0 for false.
639  */
640 static int
641 is_client(di_node_t dinode)
642 {
643 	return (di_path_next_phci(dinode, DI_PATH_NIL) != DI_PATH_NIL);
644 }
645 
646 /*
647  * After a new group_list has been constructed, this refreshes the RCM
648  * registrations and the reg_list contents.  It uses a clock like algorithm
649  * with reference bits in the reg_list to know which registrants are new or
650  * old.
651  */
652 static void
653 refresh_regs(rcm_handle_t *hdl)
654 {
655 	int i;
656 	group_t *group;
657 	phci_list_t *reg;
658 	phci_list_t *prev_reg;
659 
660 	/*
661 	 * First part of the clock-like algorithm: clear reference bits.
662 	 */
663 	for (reg = reg_list; reg != NULL; reg = reg->next)
664 		reg->referenced = CACHE_STALE;
665 
666 	/*
667 	 * Second part of the clock-like algorithm: set the reference bits
668 	 * on every registrant that's still active.  (Also add new list nodes
669 	 * for new registrants.)
670 	 */
671 	for (group = group_list; group != NULL; group = group->next) {
672 		for (i = 0; i < group->nphcis; i++) {
673 
674 			/*
675 			 * If already stale in the registrants list, just set
676 			 * its reference bit to REFERENCED and update its state.
677 			 */
678 			if ((reg = lookup_phci(group->phcis[i].path)) != NULL) {
679 				if (reg->referenced == CACHE_STALE)
680 					reg->referenced = CACHE_REFERENCED;
681 				reg->phci.state = group->phcis[i].state;
682 				continue;
683 			}
684 
685 			/*
686 			 * Otherwise, build a new list node and mark it NEW.
687 			 */
688 			reg = (phci_list_t *)calloc(1, sizeof (*reg));
689 			if (reg == NULL) {
690 				rcm_log_message(RCM_ERROR,
691 				    "MPXIO: cannot allocate phci_list (%s).\n",
692 				    strerror(errno));
693 				continue;
694 			}
695 			reg->phci.path = strdup(group->phcis[i].path);
696 			if (reg->phci.path == NULL) {
697 				free(reg);
698 				rcm_log_message(RCM_ERROR,
699 				    "MPXIO: cannot allocate phci path (%s).\n",
700 				    strerror(errno));
701 				continue;
702 			}
703 			reg->phci.state = group->phcis[i].state;
704 			reg->referenced = CACHE_NEW;
705 
706 			/* Link it at the head of reg_list */
707 			reg->next = reg_list;
708 			reg_list = reg;
709 		}
710 	}
711 
712 	/*
713 	 * Final part of the clock algorithm: unregister stale entries, and
714 	 * register new entries.  Stale entries get removed from the list.
715 	 */
716 	reg = reg_list;
717 	prev_reg = NULL;
718 	while (reg) {
719 
720 		/* Unregister and remove stale entries. */
721 		if (reg->referenced == CACHE_STALE) {
722 			(void) rcm_unregister_interest(hdl, reg->phci.path, 0);
723 			free(reg->phci.path);
724 			if (prev_reg == NULL) {
725 				reg_list = reg->next;
726 				free(reg);
727 				reg = reg_list;
728 			} else {
729 				prev_reg->next = reg->next;
730 				free(reg);
731 				reg = prev_reg->next;
732 			}
733 			continue;
734 		}
735 
736 		/* Register new entries. */
737 		if (reg->referenced == CACHE_NEW) {
738 			if (rcm_register_interest(hdl, reg->phci.path, 0, NULL)
739 			    != RCM_SUCCESS) {
740 				rcm_log_message(RCM_ERROR,
741 				    "MPXIO: failed to register %s (%s).\n",
742 				    reg->phci.path, strerror(errno));
743 			}
744 		}
745 
746 		prev_reg = reg;
747 		reg = reg->next;
748 	}
749 }
750 
751 
752 /*
753  * A libdevinfo di_walk_node() callback that builds up the MPxIO group list.
754  *
755  * Every node encountered that's a client node is added into a group's client
756  * list.  Whenever a group doesn't already exist with a matching set of
757  * related PHCIs, then a new group is constructed and put at the head of the
758  * group list.
759  */
760 static int
761 build_groups(di_node_t dinode, void *arg)
762 {
763 	int i = 0;
764 	int nphcis = 0;
765 	int *nclients = (int *)arg;
766 	phci_t *phcis;
767 	group_t *group;
768 	di_node_t phcinode;
769 	di_path_t dipath = DI_PATH_NIL;
770 
771 	/* Safety check */
772 	if (nclients == NULL)
773 		return (DI_WALK_TERMINATE);
774 
775 	/*
776 	 * Build a sorted array of PHCIs pertaining to the client.
777 	 */
778 	while ((dipath = di_path_next_phci(dinode, dipath)) != DI_PATH_NIL)
779 		nphcis++;
780 
781 	/* Skip non-clients. */
782 	if (nphcis == 0)
783 		return (DI_WALK_CONTINUE);
784 
785 	if ((phcis = (phci_t *)calloc(nphcis, sizeof (phci_t))) == NULL) {
786 		rcm_log_message(RCM_ERROR,
787 		    "MPXIO: failed to allocate client's PHCIs (%s).\n",
788 		    strerror(errno));
789 		return (DI_WALK_TERMINATE);
790 	}
791 	while ((dipath = di_path_next_phci(dinode, dipath)) != DI_PATH_NIL) {
792 		phcinode = di_path_phci_node(dipath);
793 		if (phcinode == DI_NODE_NIL) {
794 			free_phcis(i, phcis);	/* free preceeding PHCIs */
795 			rcm_log_message(RCM_ERROR,
796 			    "MPXIO: client appears to have no PHCIs.\n");
797 			return (DI_WALK_TERMINATE);
798 		}
799 		if ((phcis[i].path = get_rsrcname(phcinode)) == NULL) {
800 			free_phcis(i, phcis);
801 			return (DI_WALK_TERMINATE);
802 		}
803 		phcis[i].state = di_path_state(dipath);
804 		i++;
805 	}
806 	qsort(phcis, nphcis, sizeof (phci_t), compare_phci);
807 
808 	/*
809 	 * Compare that PHCI set to each existing group's set.  We just add
810 	 * the client to the group and exit successfully once a match is made.
811 	 * Falling out of this loop means no match was found.
812 	 */
813 	for (group = group_list; group != NULL; group = group->next) {
814 
815 		/* There is no match if the number of PHCIs is inequal */
816 		if (nphcis != group->nphcis)
817 			continue;
818 
819 		/* Compare the PHCIs linearly (which is okay; they're sorted) */
820 		for (i = 0; i < nphcis; i++)
821 			if (strcmp(phcis[i].path, group->phcis[i].path) != 0)
822 				break;
823 
824 		/*
825 		 * If the loop above completed, we have a match.  Add the client
826 		 * to the group's disk array in that case, and return
827 		 * successfully.
828 		 */
829 		if (i == nphcis) {
830 			free_phcis(nphcis, phcis);
831 			if ((group->clients[group->nclients] =
832 			    get_rsrcname(dinode)) == NULL)
833 				return (DI_WALK_TERMINATE);
834 			group->nclients++;
835 			return (DI_WALK_CONTINUE);
836 		}
837 	}
838 
839 	/* The loop above didn't find a match.  So build a new group. */
840 	if ((group = (group_t *)calloc(1, sizeof (*group))) == NULL) {
841 		rcm_log_message(RCM_ERROR,
842 		    "MPXIO: failed to allocate PHCI group (%s).\n",
843 		    strerror(errno));
844 		free_phcis(nphcis, phcis);
845 		return (DI_WALK_TERMINATE);
846 	}
847 	if ((group->clients = (char **)calloc(*nclients, sizeof (char *))) ==
848 	    NULL) {
849 		free(group);
850 		free_phcis(nphcis, phcis);
851 		return (DI_WALK_TERMINATE);
852 	}
853 	group->nphcis = nphcis;
854 	group->phcis = phcis;
855 	if ((group->clients[0] = get_rsrcname(dinode)) == NULL) {
856 		free_group(group);
857 		return (DI_WALK_TERMINATE);
858 	}
859 	group->nclients = 1;
860 
861 	/* Link the group into the group list and return successfully. */
862 	group->next = group_list;
863 	group_list = group;
864 	return (DI_WALK_CONTINUE);
865 }
866 
867 /*
868  * For bsearch() and qsort().  Returns the results of a strcmp() on the names
869  * of two phci_t's.
870  */
871 static int
872 compare_phci(const void *arg1, const void *arg2)
873 {
874 	phci_t *p1 = (phci_t *)arg1;
875 	phci_t *p2 = (phci_t *)arg2;
876 
877 	if ((p1 == NULL) || (p2 == NULL)) {
878 		if (p1 != NULL)
879 			return (-1);
880 		else if (p2 != NULL)
881 			return (1);
882 		return (0);
883 	}
884 
885 	return (strcmp(p1->path, p2->path));
886 }
887 
888 /*
889  * Free the whole list of group's in the global group_list.
890  */
891 static void
892 free_grouplist()
893 {
894 	group_t *group = group_list;
895 	group_t *next;
896 
897 	while (group) {
898 		next = group->next;
899 		free_group(group);
900 		group = next;
901 	}
902 
903 	group_list = NULL;
904 }
905 
906 /*
907  * Free the contents of a single group_t.
908  */
909 static void
910 free_group(group_t *group)
911 {
912 	if (group) {
913 		free_phcis(group->nphcis, group->phcis);
914 		free_clients(group->nclients, group->clients);
915 		free(group);
916 	}
917 }
918 
919 /*
920  * Free an array of clients.
921  */
922 static void
923 free_clients(int nclients, char **clients)
924 {
925 	int i;
926 
927 	if (clients != NULL) {
928 		if (nclients > 0) {
929 			for (i = 0; i < nclients; i++)
930 				if (clients[i])
931 					free(clients[i]);
932 		}
933 		free(clients);
934 	}
935 }
936 
937 /*
938  * Free an array of phci_t's.
939  */
940 static void
941 free_phcis(int nphcis, phci_t *phcis)
942 {
943 	int i;
944 
945 	if ((phcis != NULL) && (nphcis > 0)) {
946 		for (i = 0; i < nphcis; i++)
947 			if (phcis[i].path)
948 				free(phcis[i].path);
949 		free(phcis);
950 	}
951 }
952 
953 /*
954  * Converts a libdevinfo node into a /devices path.  Caller must free results.
955  */
956 static char *
957 get_rsrcname(di_node_t dinode)
958 {
959 	int len;
960 	char *rsrcname;
961 	char *devfspath;
962 	char name[MAXPATHLEN];
963 
964 	if ((devfspath = di_devfs_path(dinode)) == NULL) {
965 		rcm_log_message(RCM_ERROR, "MPXIO: resource has null path.\n");
966 		return (NULL);
967 	}
968 
969 	len = snprintf(name, sizeof (name), "/devices%s", devfspath);
970 	di_devfs_path_free(devfspath);
971 	if (len >= sizeof (name)) {
972 		rcm_log_message(RCM_ERROR, "MPXIO: resource path too long.\n");
973 		return (NULL);
974 	}
975 
976 	if ((rsrcname = strdup(name)) == NULL)
977 		rcm_log_message(RCM_ERROR,
978 		    "MPXIO: failed to allocate resource name (%s).\n",
979 		    strerror(errno));
980 
981 	return (rsrcname);
982 }
983