xref: /illumos-gate/usr/src/cmd/rcm_daemon/common/mpxio_rcm.c (revision 2983dda76a6d296fdb560c88114fe41caad1b84f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * RCM module supporting multiplexed I/O controllers (MPxIO).
30  */
31 #include <stdlib.h>
32 #include <stdarg.h>
33 #include <unistd.h>
34 #include <assert.h>
35 #include <syslog.h>
36 #include <string.h>
37 #include <synch.h>
38 #include <libintl.h>
39 #include <locale.h>
40 #include <ctype.h>
41 #include <errno.h>
42 #include <libdevinfo.h>
43 #include <sys/types.h>
44 #include "rcm_module.h"
45 
46 #define	MPXIO_PROP_NAME		"mpxio-component"
47 #define	MPXIO_PROP_CLIENT	"client"
48 
49 #define	CMD_GETINFO		0
50 #define	CMD_OFFLINE		1
51 #define	CMD_ONLINE		2
52 #define	CMD_REMOVE		3
53 
54 #define	CACHE_NEW		0
55 #define	CACHE_REFERENCED	1
56 #define	CACHE_STALE		2
57 
58 #define	MPXIO_MSG_CACHEFAIL	gettext("Internal analysis failure.")
59 #define	MPXIO_MSG_LASTPATH	gettext("Last path to busy resources.")
60 #define	MPXIO_MSG_USAGE		gettext("SCSI Multipathing PHCI (%s)")
61 #define	MPXIO_MSG_USAGEUNKNOWN	gettext("SCSI Multipathing PHCI (<unknown>)")
62 
63 typedef struct {
64 	char *path;
65 	di_path_state_t state;
66 } phci_t;
67 
68 typedef struct phci_list {
69 	phci_t phci;
70 	int referenced;
71 	struct phci_list *next;
72 } phci_list_t;
73 
74 typedef struct group {
75 	int offline;
76 	int nphcis;
77 	int nclients;
78 	phci_t *phcis;
79 	char **clients;
80 	struct group *next;
81 } group_t;
82 
83 static int mpxio_register(rcm_handle_t *);
84 static int mpxio_unregister(rcm_handle_t *);
85 static int mpxio_getinfo(rcm_handle_t *, char *, id_t, uint_t, char **, char **,
86     nvlist_t *, rcm_info_t **);
87 static int mpxio_suspend(rcm_handle_t *, char *, id_t, timespec_t *, uint_t,
88     char **, rcm_info_t **);
89 static int mpxio_resume(rcm_handle_t *, char *, id_t, uint_t, char **,
90     rcm_info_t **);
91 static int mpxio_offline(rcm_handle_t *, char *, id_t, uint_t, char **,
92     rcm_info_t **);
93 static int mpxio_online(rcm_handle_t *, char *, id_t, uint_t, char **,
94     rcm_info_t **);
95 static int mpxio_remove(rcm_handle_t *, char *, id_t, uint_t, char **,
96     rcm_info_t **);
97 static int get_nclients(di_node_t, void *);
98 static int build_groups(di_node_t, void *);
99 static void refresh_regs(rcm_handle_t *);
100 static int get_affected_clients(rcm_handle_t *, char *, int, int, char ***);
101 static int detect_client_change(rcm_handle_t *, int, int, group_t *, char *);
102 static int merge_clients(int *, char ***, group_t *);
103 static phci_list_t *lookup_phci(char *);
104 static int is_client(di_node_t);
105 static char *get_rsrcname(di_node_t);
106 static char *s_state(di_path_state_t);
107 static int compare_phci(const void *, const void *);
108 static void free_grouplist();
109 static void free_group(group_t *);
110 static void free_clients(int, char **);
111 static void free_phcis(int, phci_t *);
112 
113 static struct rcm_mod_ops mpxio_ops =
114 {
115 	RCM_MOD_OPS_VERSION,
116 	mpxio_register,
117 	mpxio_unregister,
118 	mpxio_getinfo,
119 	mpxio_suspend,
120 	mpxio_resume,
121 	mpxio_offline,
122 	mpxio_online,
123 	mpxio_remove,
124 	NULL,
125 	NULL,
126 	NULL
127 };
128 
129 static group_t *group_list;
130 static phci_list_t *reg_list;
131 static mutex_t mpxio_lock;
132 
133 extern int errno;
134 
135 /*
136  * Return the mod-ops vector for initialization.
137  */
138 struct rcm_mod_ops *
139 rcm_mod_init()
140 {
141 	rcm_log_message(RCM_TRACE1, "MPXIO: rcm_mod_init()\n");
142 
143 	return (&mpxio_ops);
144 }
145 
146 /*
147  * Return name and version number for mod_info.
148  */
149 const char *
150 rcm_mod_info()
151 {
152 	rcm_log_message(RCM_TRACE1, "MPXIO: rcm_mod_info()\n");
153 
154 	return (gettext("RCM MPxIO module 1.6"));
155 }
156 
157 /*
158  * Destroy the cache and mutex lock when being unloaded.
159  */
160 int
161 rcm_mod_fini()
162 {
163 	phci_list_t *reg;
164 	phci_list_t *next;
165 
166 	rcm_log_message(RCM_TRACE1, "MPXIO: rcm_mod_fini()\n");
167 
168 	/* Free the cache of MPxIO group information */
169 	free_grouplist();
170 
171 	/* Free the cache of registrants */
172 	reg = reg_list;
173 	while (reg) {
174 		next = reg->next;
175 		free(reg->phci.path);
176 		free(reg);
177 		reg = next;
178 	}
179 
180 	/* Destroy the mutex for locking the caches */
181 	(void) mutex_destroy(&mpxio_lock);
182 
183 	return (RCM_SUCCESS);
184 }
185 
186 /*
187  * During each register callback: totally rebuild the group list from a new
188  * libdevinfo snapshot, and then update the registrants.
189  */
190 static int
191 mpxio_register(rcm_handle_t *hdl)
192 {
193 	int nclients = 0;
194 	di_node_t devroot;
195 
196 	rcm_log_message(RCM_TRACE1, "MPXIO: register()\n");
197 
198 	(void) mutex_lock(&mpxio_lock);
199 
200 	/* Destroy the previous group list */
201 	free_grouplist();
202 
203 	/* Get a current libdevinfo snapshot */
204 	if ((devroot = di_init("/", DINFOCPYALL | DINFOPATH)) == DI_NODE_NIL) {
205 		rcm_log_message(RCM_ERROR,
206 		    "MPXIO: libdevinfo initialization failed (%s).\n",
207 		    strerror(errno));
208 		(void) mutex_unlock(&mpxio_lock);
209 		return (RCM_FAILURE);
210 	}
211 
212 	/*
213 	 * First count the total number of clients.  This'll be a useful
214 	 * upper bound when allocating client arrays within each group.
215 	 */
216 	(void) di_walk_node(devroot, DI_WALK_CLDFIRST, &nclients, get_nclients);
217 
218 	rcm_log_message(RCM_TRACE2, gettext("MPXIO: found %d clients.\n"),
219 	    nclients);
220 
221 	/*
222 	 * Then walk the libdevinfo snapshot, building up the new group list
223 	 * along the way.  Pass in the total number of clients (from above) to
224 	 * assist in group construction.
225 	 */
226 	(void) di_walk_node(devroot, DI_WALK_CLDFIRST, &nclients, build_groups);
227 
228 	/* Now with a new group list constructed, refresh the registrants */
229 	refresh_regs(hdl);
230 
231 	/* Free the libdevinfo snapshot */
232 	di_fini(devroot);
233 
234 	(void) mutex_unlock(&mpxio_lock);
235 
236 	return (0);
237 }
238 
239 /*
240  * Unregister all PHCIs and mark the whole registrants list as stale.
241  */
242 static int
243 mpxio_unregister(rcm_handle_t *hdl)
244 {
245 	phci_list_t *reg;
246 
247 	rcm_log_message(RCM_TRACE1, "MPXIO: unregister()\n");
248 
249 	(void) mutex_lock(&mpxio_lock);
250 
251 	for (reg = reg_list; reg != NULL; reg = reg->next) {
252 		(void) rcm_unregister_interest(hdl, reg->phci.path, 0);
253 		reg->referenced = CACHE_STALE;
254 	}
255 
256 	(void) mutex_unlock(&mpxio_lock);
257 
258 	return (RCM_SUCCESS);
259 }
260 
261 /*
262  * To return usage information, just lookup the PHCI in the cache and return
263  * a string identifying that it's a PHCI and describing its cached MPxIO state.
264  * Recurse with the cached list of disks if dependents are to be included.
265  */
266 static int
267 mpxio_getinfo(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags,
268     char **infostr, char **errstr, nvlist_t *props, rcm_info_t **infop)
269 {
270 	size_t len;
271 	int rv = RCM_SUCCESS;
272 	char *buf = NULL;
273 	char **clients = NULL;
274 	phci_list_t *reg;
275 	char c;
276 
277 	rcm_log_message(RCM_TRACE1, "MPXIO: getinfo(%s)\n", rsrc);
278 
279 	*infostr = NULL;
280 	*errstr = NULL;
281 
282 	(void) mutex_lock(&mpxio_lock);
283 
284 	if ((reg = lookup_phci(rsrc)) == NULL) {
285 		*errstr = strdup(MPXIO_MSG_CACHEFAIL);
286 		(void) mutex_unlock(&mpxio_lock);
287 		return (RCM_FAILURE);
288 	}
289 
290 	len = snprintf(&c, 1, MPXIO_MSG_USAGE, s_state(reg->phci.state));
291 	buf = calloc(len + 1, sizeof (char));
292 	if ((buf == NULL) || (snprintf(buf, len + 1, MPXIO_MSG_USAGE,
293 	    s_state(reg->phci.state)) > len + 1)) {
294 		*infostr = strdup(MPXIO_MSG_USAGEUNKNOWN);
295 		*errstr = strdup(gettext("Cannot construct usage string."));
296 		(void) mutex_unlock(&mpxio_lock);
297 		if (buf)
298 			free(buf);
299 		return (RCM_FAILURE);
300 	}
301 	*infostr = buf;
302 
303 	if (flags & RCM_INCLUDE_DEPENDENT) {
304 		rcm_log_message(RCM_TRACE2, "MPXIO: getting clients\n");
305 		if (get_affected_clients(hdl, rsrc, CMD_GETINFO, flags,
306 		    &clients) < 0) {
307 			*errstr = strdup(gettext("Cannot lookup clients."));
308 			(void) mutex_unlock(&mpxio_lock);
309 			return (RCM_FAILURE);
310 		}
311 		if (clients) {
312 			rv = rcm_get_info_list(hdl, clients, flags, infop);
313 			free(clients);
314 		} else {
315 			rcm_log_message(RCM_TRACE2, "MPXIO: none found\n");
316 		}
317 	}
318 
319 	(void) mutex_unlock(&mpxio_lock);
320 	return (rv);
321 }
322 
323 /*
324  * Nothing is implemented for suspend operations.
325  */
326 static int
327 mpxio_suspend(rcm_handle_t *hdl, char *rsrc, id_t id, timespec_t *interval,
328     uint_t flags, char **errstr, rcm_info_t **infop)
329 {
330 	rcm_log_message(RCM_TRACE1, "MPXIO: suspend(%s)\n", rsrc);
331 
332 	return (RCM_SUCCESS);
333 }
334 
335 /*
336  * Nothing is implemented for resume operations.
337  */
338 static int
339 mpxio_resume(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags,
340     char **errstr, rcm_info_t **infop)
341 {
342 	rcm_log_message(RCM_TRACE1, "MPXIO: resume(%s)\n", rsrc);
343 
344 	return (RCM_SUCCESS);
345 }
346 
347 /*
348  * MPxIO has no policy against offlining.  If disks will be affected, then
349  * base the return value for this request on the results of offlining the
350  * list of disks.  Otherwise succeed.
351  */
352 static int
353 mpxio_offline(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags,
354     char **errstr, rcm_info_t **infop)
355 {
356 	char **clients = NULL;
357 	int rv = RCM_SUCCESS;
358 
359 	rcm_log_message(RCM_TRACE1, "MPXIO: offline(%s)\n", rsrc);
360 
361 	(void) mutex_lock(&mpxio_lock);
362 
363 	if (get_affected_clients(hdl, rsrc, CMD_OFFLINE, flags, &clients) < 0) {
364 		*errstr = strdup(gettext("Cannot lookup clients."));
365 		(void) mutex_unlock(&mpxio_lock);
366 		return (RCM_FAILURE);
367 	}
368 
369 	if (clients) {
370 		rv = rcm_request_offline_list(hdl, clients, flags, infop);
371 		if (rv != RCM_SUCCESS)
372 			*errstr = strdup(MPXIO_MSG_LASTPATH);
373 		free(clients);
374 	}
375 
376 	(void) mutex_unlock(&mpxio_lock);
377 
378 	return (rv);
379 }
380 
381 /*
382  * If disks are affected, then they are probably offline and we need to
383  * propagate this online notification to them.
384  */
385 static int
386 mpxio_online(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags,
387     char **errstr, rcm_info_t **infop)
388 {
389 	char **clients;
390 	int rv = RCM_SUCCESS;
391 
392 	rcm_log_message(RCM_TRACE1, "MPXIO: online(%s)\n", rsrc);
393 
394 	(void) mutex_lock(&mpxio_lock);
395 
396 	if (get_affected_clients(hdl, rsrc, CMD_ONLINE, flags, &clients) < 0) {
397 		*errstr = strdup(gettext("Cannot lookup clients."));
398 		(void) mutex_unlock(&mpxio_lock);
399 		return (RCM_FAILURE);
400 	}
401 
402 	if (clients) {
403 		rv = rcm_notify_online_list(hdl, clients, flags, infop);
404 		free(clients);
405 	}
406 
407 	(void) mutex_unlock(&mpxio_lock);
408 
409 	return (rv);
410 }
411 
412 /*
413  * If clients are affected, then they are probably offline and we need to
414  * propagate this removal notification to them.  We can also remove the
415  * cache entry for this PHCI.  If that leaves its group empty, then the
416  * group will be removed during the next register callback.
417  */
418 static int
419 mpxio_remove(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags,
420     char **errstr, rcm_info_t **infop)
421 {
422 	char **clients;
423 	int rv = RCM_SUCCESS;
424 
425 	rcm_log_message(RCM_TRACE1, "MPXIO: remove(%s)\n", rsrc);
426 
427 	(void) mutex_lock(&mpxio_lock);
428 
429 	if (get_affected_clients(hdl, rsrc, CMD_REMOVE, flags, &clients) < 0) {
430 		*errstr = strdup(gettext("Cannot lookup clients."));
431 		(void) mutex_unlock(&mpxio_lock);
432 		return (RCM_FAILURE);
433 	}
434 
435 	if (clients) {
436 		rv = rcm_notify_remove_list(hdl, clients, flags, infop);
437 		free(clients);
438 	}
439 
440 	(void) mutex_unlock(&mpxio_lock);
441 
442 	return (rv);
443 }
444 
445 
446 /*
447  * Returns a string representation of a given libdevinfo path state.
448  */
449 static char *
450 s_state(di_path_state_t state)
451 {
452 	switch (state) {
453 	case DI_PATH_STATE_ONLINE:
454 		return ("online");
455 	case DI_PATH_STATE_OFFLINE:
456 		return ("offline");
457 	case DI_PATH_STATE_STANDBY:
458 		return ("standby");
459 	case DI_PATH_STATE_FAULT:
460 		return ("faulted");
461 	default:
462 		return ("<unknown>");
463 	}
464 }
465 
466 static int
467 get_affected_clients(rcm_handle_t *hdl, char *rsrc, int cmd, int flags,
468     char ***clientsp)
469 {
470 	int nclients = 0;
471 	phci_t phci;
472 	group_t *group;
473 	char **clients = NULL;
474 
475 	/* Build a dummy phci_t for use with bsearch(). */
476 	phci.path = rsrc;
477 
478 	/* Analyze the effects upon each group. */
479 	for (group = group_list; group != NULL; group = group->next) {
480 
481 		/* If the PHCI isn't in the group, then no effects.  Skip. */
482 		if (bsearch(&phci, group->phcis, group->nphcis, sizeof (phci_t),
483 		    compare_phci) == NULL)
484 			continue;
485 
486 		/*
487 		 * Merge in the clients.  All clients are merged in for getinfo
488 		 * operations.  Otherwise it's contingent upon a state change
489 		 * being transferred to the clients as a result of changing
490 		 * the PHCI's state.
491 		 */
492 		if ((cmd == CMD_GETINFO) ||
493 		    detect_client_change(hdl, cmd, flags, group, rsrc)) {
494 			if (merge_clients(&nclients, &clients, group) < 0) {
495 				free_clients(nclients, clients);
496 				return (-1);
497 			}
498 		}
499 	}
500 
501 	/* Return the array of affected disks */
502 	*clientsp = clients;
503 	return (0);
504 }
505 
506 /*
507  * Iterates through the members of a PHCI list, returning the entry
508  * corresponding to the named PHCI resource.  Returns NULL when the lookup
509  * fails.
510  */
511 static phci_list_t *
512 lookup_phci(char *rsrc)
513 {
514 	phci_list_t *reg;
515 
516 	for (reg = reg_list; reg != NULL; reg = reg->next) {
517 		if (strcmp(reg->phci.path, rsrc) == 0)
518 			return (reg);
519 	}
520 
521 	return (NULL);
522 }
523 
524 /*
525  * Tests whether or not an operation on a specific PHCI resource would affect
526  * the array of client devices attached to the PHCI's MPxIO group.
527  *
528  * Returns: 1 if clients would be affected, 0 if not.
529  */
530 static int
531 detect_client_change(rcm_handle_t *hdl, int cmd, int flags, group_t *group,
532     char *rsrc)
533 {
534 	int i;
535 	int state;
536 
537 	/*
538 	 * Perform a full set analysis on the set of redundant PHCIs.  When
539 	 * there are no unaffected and online PHCIs, then changing the state
540 	 * of the named PHCI results in a client state change.
541 	 */
542 	for (i = 0; i < group->nphcis; i++) {
543 
544 		/* Filter the named resource out of the analysis */
545 		if (strcmp(group->phcis[i].path, rsrc) == 0)
546 			continue;
547 
548 		/*
549 		 * If we find a path that's in the ONLINE or STANDBY state
550 		 * that would be left over in the system after completing
551 		 * whatever DR or hotplugging operation is in progress, then
552 		 * return a 0.
553 		 */
554 		if ((group->phcis[i].state == DI_PATH_STATE_ONLINE) ||
555 		    (group->phcis[i].state == DI_PATH_STATE_STANDBY)) {
556 			if (rcm_get_rsrcstate(hdl, group->phcis[i].path, &state)
557 			    != RCM_SUCCESS) {
558 				rcm_log_message(RCM_ERROR,
559 				    "MPXIO: Failed to query resource state\n");
560 				continue;
561 			}
562 			rcm_log_message(RCM_TRACE2, "MPXIO: state of %s: %d\n",
563 			    group->phcis[i].path, state);
564 			if (state == RCM_STATE_ONLINE) {
565 				return (0);
566 			}
567 		}
568 	}
569 
570 	/*
571 	 * The analysis above didn't find a redundant path to take over.  So
572 	 * report that the state of the client resources will change.
573 	 */
574 	return (1);
575 }
576 
577 /*
578  * Merges the client disks connected to a particular MPxIO group in with a
579  * previous array of disk clients.  The result is to adjust the 'nclients'
580  * value with the new count of disks in the array, and to adjust the 'disks'
581  * value to be a larger array of disks including its original contents along
582  * with the current group's contents merged in.
583  */
584 static int
585 merge_clients(int *nclients, char ***clientsp, group_t *group)
586 {
587 	int i;
588 	int old_nclients;
589 	char **clients_new;
590 
591 	if (group->nclients) {
592 		old_nclients = *nclients;
593 		*nclients += group->nclients;
594 		clients_new = realloc(*clientsp,
595 		    ((*nclients) + 1) * sizeof (char *));
596 		if (clients_new == NULL) {
597 			rcm_log_message(RCM_ERROR,
598 			    "MPXIO: cannot reallocate client array (%s).\n",
599 			    strerror(errno));
600 			return (-1);
601 		}
602 		for (i = old_nclients; i < (*nclients); i++) {
603 			/*
604 			 * Don't allocate space for individual disks in the
605 			 * merged list.  Just make references to the previously
606 			 * allocated strings in the group_t structs themselves.
607 			 */
608 			clients_new[i] = group->clients[i - old_nclients];
609 		}
610 		clients_new[(*nclients)] = NULL;
611 		*clientsp = clients_new;
612 	}
613 
614 	return (0);
615 }
616 
617 /*
618  * A libdevinfo di_walk_node() callback.  It's passed an integer pointer as an
619  * argument, and it increments the integer each time it encounters an MPxIO
620  * client.  By initializing the integer to zero and doing a libdevinfo walk with
621  * this function, the total count of MPxIO clients in the system can be found.
622  */
623 static int
624 get_nclients(di_node_t dinode, void *arg)
625 {
626 	int *nclients = arg;
627 
628 	if (is_client(dinode))
629 		(*nclients)++;
630 
631 	return (DI_WALK_CONTINUE);
632 }
633 
634 /*
635  * Tests a libdevinfo node to determine if it's an MPxIO client.
636  *
637  * Returns: non-zero for true, 0 for false.
638  */
639 static int
640 is_client(di_node_t dinode)
641 {
642 	return (di_path_client_next_path(dinode, DI_PATH_NIL) != DI_PATH_NIL);
643 }
644 
645 /*
646  * After a new group_list has been constructed, this refreshes the RCM
647  * registrations and the reg_list contents.  It uses a clock like algorithm
648  * with reference bits in the reg_list to know which registrants are new or
649  * old.
650  */
651 static void
652 refresh_regs(rcm_handle_t *hdl)
653 {
654 	int i;
655 	group_t *group;
656 	phci_list_t *reg;
657 	phci_list_t *prev_reg;
658 
659 	/*
660 	 * First part of the clock-like algorithm: clear reference bits.
661 	 */
662 	for (reg = reg_list; reg != NULL; reg = reg->next)
663 		reg->referenced = CACHE_STALE;
664 
665 	/*
666 	 * Second part of the clock-like algorithm: set the reference bits
667 	 * on every registrant that's still active.  (Also add new list nodes
668 	 * for new registrants.)
669 	 */
670 	for (group = group_list; group != NULL; group = group->next) {
671 		for (i = 0; i < group->nphcis; i++) {
672 
673 			/*
674 			 * If already stale in the registrants list, just set
675 			 * its reference bit to REFERENCED and update its state.
676 			 */
677 			if ((reg = lookup_phci(group->phcis[i].path)) != NULL) {
678 				if (reg->referenced == CACHE_STALE)
679 					reg->referenced = CACHE_REFERENCED;
680 				reg->phci.state = group->phcis[i].state;
681 				continue;
682 			}
683 
684 			/*
685 			 * Otherwise, build a new list node and mark it NEW.
686 			 */
687 			reg = (phci_list_t *)calloc(1, sizeof (*reg));
688 			if (reg == NULL) {
689 				rcm_log_message(RCM_ERROR,
690 				    "MPXIO: cannot allocate phci_list (%s).\n",
691 				    strerror(errno));
692 				continue;
693 			}
694 			reg->phci.path = strdup(group->phcis[i].path);
695 			if (reg->phci.path == NULL) {
696 				free(reg);
697 				rcm_log_message(RCM_ERROR,
698 				    "MPXIO: cannot allocate phci path (%s).\n",
699 				    strerror(errno));
700 				continue;
701 			}
702 			reg->phci.state = group->phcis[i].state;
703 			reg->referenced = CACHE_NEW;
704 
705 			/* Link it at the head of reg_list */
706 			reg->next = reg_list;
707 			reg_list = reg;
708 		}
709 	}
710 
711 	/*
712 	 * Final part of the clock algorithm: unregister stale entries, and
713 	 * register new entries.  Stale entries get removed from the list.
714 	 */
715 	reg = reg_list;
716 	prev_reg = NULL;
717 	while (reg) {
718 
719 		/* Unregister and remove stale entries. */
720 		if (reg->referenced == CACHE_STALE) {
721 			(void) rcm_unregister_interest(hdl, reg->phci.path, 0);
722 			free(reg->phci.path);
723 			if (prev_reg == NULL) {
724 				reg_list = reg->next;
725 				free(reg);
726 				reg = reg_list;
727 			} else {
728 				prev_reg->next = reg->next;
729 				free(reg);
730 				reg = prev_reg->next;
731 			}
732 			continue;
733 		}
734 
735 		/* Register new entries. */
736 		if (reg->referenced == CACHE_NEW) {
737 			if (rcm_register_interest(hdl, reg->phci.path, 0, NULL)
738 			    != RCM_SUCCESS) {
739 				rcm_log_message(RCM_ERROR,
740 				    "MPXIO: failed to register %s (%s).\n",
741 				    reg->phci.path, strerror(errno));
742 			}
743 		}
744 
745 		prev_reg = reg;
746 		reg = reg->next;
747 	}
748 }
749 
750 
751 /*
752  * A libdevinfo di_walk_node() callback that builds up the MPxIO group list.
753  *
754  * Every node encountered that's a client node is added into a group's client
755  * list.  Whenever a group doesn't already exist with a matching set of
756  * related PHCIs, then a new group is constructed and put at the head of the
757  * group list.
758  */
759 static int
760 build_groups(di_node_t dinode, void *arg)
761 {
762 	int i = 0;
763 	int nphcis = 0;
764 	int *nclients = (int *)arg;
765 	phci_t *phcis;
766 	group_t *group;
767 	di_node_t phcinode;
768 	di_path_t dipath = DI_PATH_NIL;
769 
770 	/* Safety check */
771 	if (nclients == NULL)
772 		return (DI_WALK_TERMINATE);
773 
774 	/*
775 	 * Build a sorted array of PHCIs pertaining to the client.
776 	 */
777 	while ((dipath =
778 	    di_path_client_next_path(dinode, dipath)) != DI_PATH_NIL)
779 		nphcis++;
780 
781 	/* Skip non-clients. */
782 	if (nphcis == 0)
783 		return (DI_WALK_CONTINUE);
784 
785 	if ((phcis = (phci_t *)calloc(nphcis, sizeof (phci_t))) == NULL) {
786 		rcm_log_message(RCM_ERROR,
787 		    "MPXIO: failed to allocate client's PHCIs (%s).\n",
788 		    strerror(errno));
789 		return (DI_WALK_TERMINATE);
790 	}
791 	while ((dipath =
792 	    di_path_client_next_path(dinode, dipath)) != DI_PATH_NIL) {
793 		phcinode = di_path_phci_node(dipath);
794 		if (phcinode == DI_NODE_NIL) {
795 			free_phcis(i, phcis);	/* free preceeding PHCIs */
796 			rcm_log_message(RCM_ERROR,
797 			    "MPXIO: client appears to have no PHCIs.\n");
798 			return (DI_WALK_TERMINATE);
799 		}
800 		if ((phcis[i].path = get_rsrcname(phcinode)) == NULL) {
801 			free_phcis(i, phcis);
802 			return (DI_WALK_TERMINATE);
803 		}
804 		phcis[i].state = di_path_state(dipath);
805 		i++;
806 	}
807 	qsort(phcis, nphcis, sizeof (phci_t), compare_phci);
808 
809 	/*
810 	 * Compare that PHCI set to each existing group's set.  We just add
811 	 * the client to the group and exit successfully once a match is made.
812 	 * Falling out of this loop means no match was found.
813 	 */
814 	for (group = group_list; group != NULL; group = group->next) {
815 
816 		/* There is no match if the number of PHCIs is inequal */
817 		if (nphcis != group->nphcis)
818 			continue;
819 
820 		/* Compare the PHCIs linearly (which is okay; they're sorted) */
821 		for (i = 0; i < nphcis; i++)
822 			if (strcmp(phcis[i].path, group->phcis[i].path) != 0)
823 				break;
824 
825 		/*
826 		 * If the loop above completed, we have a match.  Add the client
827 		 * to the group's disk array in that case, and return
828 		 * successfully.
829 		 */
830 		if (i == nphcis) {
831 			free_phcis(nphcis, phcis);
832 			if ((group->clients[group->nclients] =
833 			    get_rsrcname(dinode)) == NULL)
834 				return (DI_WALK_TERMINATE);
835 			group->nclients++;
836 			return (DI_WALK_CONTINUE);
837 		}
838 	}
839 
840 	/* The loop above didn't find a match.  So build a new group. */
841 	if ((group = (group_t *)calloc(1, sizeof (*group))) == NULL) {
842 		rcm_log_message(RCM_ERROR,
843 		    "MPXIO: failed to allocate PHCI group (%s).\n",
844 		    strerror(errno));
845 		free_phcis(nphcis, phcis);
846 		return (DI_WALK_TERMINATE);
847 	}
848 	if ((group->clients = (char **)calloc(*nclients, sizeof (char *))) ==
849 	    NULL) {
850 		free(group);
851 		free_phcis(nphcis, phcis);
852 		return (DI_WALK_TERMINATE);
853 	}
854 	group->nphcis = nphcis;
855 	group->phcis = phcis;
856 	if ((group->clients[0] = get_rsrcname(dinode)) == NULL) {
857 		free_group(group);
858 		return (DI_WALK_TERMINATE);
859 	}
860 	group->nclients = 1;
861 
862 	/* Link the group into the group list and return successfully. */
863 	group->next = group_list;
864 	group_list = group;
865 	return (DI_WALK_CONTINUE);
866 }
867 
868 /*
869  * For bsearch() and qsort().  Returns the results of a strcmp() on the names
870  * of two phci_t's.
871  */
872 static int
873 compare_phci(const void *arg1, const void *arg2)
874 {
875 	phci_t *p1 = (phci_t *)arg1;
876 	phci_t *p2 = (phci_t *)arg2;
877 
878 	if ((p1 == NULL) || (p2 == NULL)) {
879 		if (p1 != NULL)
880 			return (-1);
881 		else if (p2 != NULL)
882 			return (1);
883 		return (0);
884 	}
885 
886 	return (strcmp(p1->path, p2->path));
887 }
888 
889 /*
890  * Free the whole list of group's in the global group_list.
891  */
892 static void
893 free_grouplist()
894 {
895 	group_t *group = group_list;
896 	group_t *next;
897 
898 	while (group) {
899 		next = group->next;
900 		free_group(group);
901 		group = next;
902 	}
903 
904 	group_list = NULL;
905 }
906 
907 /*
908  * Free the contents of a single group_t.
909  */
910 static void
911 free_group(group_t *group)
912 {
913 	if (group) {
914 		free_phcis(group->nphcis, group->phcis);
915 		free_clients(group->nclients, group->clients);
916 		free(group);
917 	}
918 }
919 
920 /*
921  * Free an array of clients.
922  */
923 static void
924 free_clients(int nclients, char **clients)
925 {
926 	int i;
927 
928 	if (clients != NULL) {
929 		if (nclients > 0) {
930 			for (i = 0; i < nclients; i++)
931 				if (clients[i])
932 					free(clients[i]);
933 		}
934 		free(clients);
935 	}
936 }
937 
938 /*
939  * Free an array of phci_t's.
940  */
941 static void
942 free_phcis(int nphcis, phci_t *phcis)
943 {
944 	int i;
945 
946 	if ((phcis != NULL) && (nphcis > 0)) {
947 		for (i = 0; i < nphcis; i++)
948 			if (phcis[i].path)
949 				free(phcis[i].path);
950 		free(phcis);
951 	}
952 }
953 
954 /*
955  * Converts a libdevinfo node into a /devices path.  Caller must free results.
956  */
957 static char *
958 get_rsrcname(di_node_t dinode)
959 {
960 	int len;
961 	char *rsrcname;
962 	char *devfspath;
963 	char name[MAXPATHLEN];
964 
965 	if ((devfspath = di_devfs_path(dinode)) == NULL) {
966 		rcm_log_message(RCM_ERROR, "MPXIO: resource has null path.\n");
967 		return (NULL);
968 	}
969 
970 	len = snprintf(name, sizeof (name), "/devices%s", devfspath);
971 	di_devfs_path_free(devfspath);
972 	if (len >= sizeof (name)) {
973 		rcm_log_message(RCM_ERROR, "MPXIO: resource path too long.\n");
974 		return (NULL);
975 	}
976 
977 	if ((rsrcname = strdup(name)) == NULL)
978 		rcm_log_message(RCM_ERROR,
979 		    "MPXIO: failed to allocate resource name (%s).\n",
980 		    strerror(errno));
981 
982 	return (rsrcname);
983 }
984