xref: /illumos-gate/usr/src/cmd/rcm_daemon/common/mpxio_rcm.c (revision 1da57d551424de5a9d469760be7c4b4d4f10a755)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * RCM module supporting multiplexed I/O controllers (MPxIO).
28  */
29 #include <stdlib.h>
30 #include <stdarg.h>
31 #include <unistd.h>
32 #include <assert.h>
33 #include <syslog.h>
34 #include <string.h>
35 #include <synch.h>
36 #include <libintl.h>
37 #include <locale.h>
38 #include <ctype.h>
39 #include <errno.h>
40 #include <libdevinfo.h>
41 #include <sys/types.h>
42 #include "rcm_module.h"
43 
44 #define	MPXIO_PROP_NAME		"mpxio-component"
45 #define	MPXIO_PROP_CLIENT	"client"
46 
47 #define	CMD_GETINFO		0
48 #define	CMD_OFFLINE		1
49 #define	CMD_ONLINE		2
50 #define	CMD_REMOVE		3
51 
52 #define	CACHE_NEW		0
53 #define	CACHE_REFERENCED	1
54 #define	CACHE_STALE		2
55 
56 #define	MPXIO_MSG_CACHEFAIL	gettext("Internal analysis failure.")
57 #define	MPXIO_MSG_LASTPATH	gettext("Last path to busy resources.")
58 #define	MPXIO_MSG_USAGE		gettext("SCSI Multipathing PHCI (%s)")
59 #define	MPXIO_MSG_USAGEUNKNOWN	gettext("SCSI Multipathing PHCI (<unknown>)")
60 
61 typedef struct {
62 	char *path;
63 	di_path_state_t state;
64 } phci_t;
65 
66 typedef struct phci_list {
67 	phci_t phci;
68 	int referenced;
69 	struct phci_list *next;
70 } phci_list_t;
71 
72 typedef struct group {
73 	int offline;
74 	int nphcis;
75 	int nclients;
76 	phci_t *phcis;
77 	char **clients;
78 	struct group *next;
79 } group_t;
80 
81 static int mpxio_register(rcm_handle_t *);
82 static int mpxio_unregister(rcm_handle_t *);
83 static int mpxio_getinfo(rcm_handle_t *, char *, id_t, uint_t, char **, char **,
84     nvlist_t *, rcm_info_t **);
85 static int mpxio_suspend(rcm_handle_t *, char *, id_t, timespec_t *, uint_t,
86     char **, rcm_info_t **);
87 static int mpxio_resume(rcm_handle_t *, char *, id_t, uint_t, char **,
88     rcm_info_t **);
89 static int mpxio_offline(rcm_handle_t *, char *, id_t, uint_t, char **,
90     rcm_info_t **);
91 static int mpxio_online(rcm_handle_t *, char *, id_t, uint_t, char **,
92     rcm_info_t **);
93 static int mpxio_remove(rcm_handle_t *, char *, id_t, uint_t, char **,
94     rcm_info_t **);
95 static int get_nclients(di_node_t, void *);
96 static int build_groups(di_node_t, void *);
97 static void refresh_regs(rcm_handle_t *);
98 static int get_affected_clients(rcm_handle_t *, char *, int, int, char ***);
99 static int detect_client_change(rcm_handle_t *, int, int, group_t *, char *);
100 static int merge_clients(int *, char ***, group_t *);
101 static phci_list_t *lookup_phci(char *);
102 static int is_client(di_node_t);
103 static char *get_rsrcname(di_node_t);
104 static char *s_state(di_path_state_t);
105 static int compare_phci(const void *, const void *);
106 static void free_grouplist();
107 static void free_group(group_t *);
108 static void free_clients(int, char **);
109 static void free_phcis(int, phci_t *);
110 
111 static struct rcm_mod_ops mpxio_ops =
112 {
113 	RCM_MOD_OPS_VERSION,
114 	mpxio_register,
115 	mpxio_unregister,
116 	mpxio_getinfo,
117 	mpxio_suspend,
118 	mpxio_resume,
119 	mpxio_offline,
120 	mpxio_online,
121 	mpxio_remove,
122 	NULL,
123 	NULL,
124 	NULL
125 };
126 
127 static group_t *group_list;
128 static phci_list_t *reg_list;
129 static mutex_t mpxio_lock;
130 
131 extern int errno;
132 
133 /*
134  * Return the mod-ops vector for initialization.
135  */
136 struct rcm_mod_ops *
137 rcm_mod_init()
138 {
139 	rcm_log_message(RCM_TRACE1, "MPXIO: rcm_mod_init()\n");
140 
141 	return (&mpxio_ops);
142 }
143 
144 /*
145  * Return name and version number for mod_info.
146  */
147 const char *
148 rcm_mod_info()
149 {
150 	rcm_log_message(RCM_TRACE1, "MPXIO: rcm_mod_info()\n");
151 
152 	return (gettext("RCM MPxIO module 1.6"));
153 }
154 
155 /*
156  * Destroy the cache and mutex lock when being unloaded.
157  */
158 int
159 rcm_mod_fini()
160 {
161 	phci_list_t *reg;
162 	phci_list_t *next;
163 
164 	rcm_log_message(RCM_TRACE1, "MPXIO: rcm_mod_fini()\n");
165 
166 	/* Free the cache of MPxIO group information */
167 	free_grouplist();
168 
169 	/* Free the cache of registrants */
170 	reg = reg_list;
171 	while (reg) {
172 		next = reg->next;
173 		free(reg->phci.path);
174 		free(reg);
175 		reg = next;
176 	}
177 
178 	/* Destroy the mutex for locking the caches */
179 	(void) mutex_destroy(&mpxio_lock);
180 
181 	return (RCM_SUCCESS);
182 }
183 
184 /*
185  * During each register callback: totally rebuild the group list from a new
186  * libdevinfo snapshot, and then update the registrants.
187  */
188 static int
189 mpxio_register(rcm_handle_t *hdl)
190 {
191 	int nclients = 0;
192 	di_node_t devroot;
193 
194 	rcm_log_message(RCM_TRACE1, "MPXIO: register()\n");
195 
196 	(void) mutex_lock(&mpxio_lock);
197 
198 	/* Destroy the previous group list */
199 	free_grouplist();
200 
201 	/* Get a current libdevinfo snapshot */
202 	if ((devroot = di_init("/", DINFOCPYALL | DINFOPATH)) == DI_NODE_NIL) {
203 		rcm_log_message(RCM_ERROR,
204 		    "MPXIO: libdevinfo initialization failed (%s).\n",
205 		    strerror(errno));
206 		(void) mutex_unlock(&mpxio_lock);
207 		return (RCM_FAILURE);
208 	}
209 
210 	/*
211 	 * First count the total number of clients.  This'll be a useful
212 	 * upper bound when allocating client arrays within each group.
213 	 */
214 	(void) di_walk_node(devroot, DI_WALK_CLDFIRST, &nclients, get_nclients);
215 
216 	rcm_log_message(RCM_TRACE2, gettext("MPXIO: found %d clients.\n"),
217 	    nclients);
218 
219 	/*
220 	 * Then walk the libdevinfo snapshot, building up the new group list
221 	 * along the way.  Pass in the total number of clients (from above) to
222 	 * assist in group construction.
223 	 */
224 	(void) di_walk_node(devroot, DI_WALK_CLDFIRST, &nclients, build_groups);
225 
226 	/* Now with a new group list constructed, refresh the registrants */
227 	refresh_regs(hdl);
228 
229 	/* Free the libdevinfo snapshot */
230 	di_fini(devroot);
231 
232 	(void) mutex_unlock(&mpxio_lock);
233 
234 	return (0);
235 }
236 
237 /*
238  * Unregister all PHCIs and mark the whole registrants list as stale.
239  */
240 static int
241 mpxio_unregister(rcm_handle_t *hdl)
242 {
243 	phci_list_t *reg;
244 
245 	rcm_log_message(RCM_TRACE1, "MPXIO: unregister()\n");
246 
247 	(void) mutex_lock(&mpxio_lock);
248 
249 	for (reg = reg_list; reg != NULL; reg = reg->next) {
250 		(void) rcm_unregister_interest(hdl, reg->phci.path, 0);
251 		reg->referenced = CACHE_STALE;
252 	}
253 
254 	(void) mutex_unlock(&mpxio_lock);
255 
256 	return (RCM_SUCCESS);
257 }
258 
259 /*
260  * To return usage information, just lookup the PHCI in the cache and return
261  * a string identifying that it's a PHCI and describing its cached MPxIO state.
262  * Recurse with the cached list of disks if dependents are to be included.
263  */
264 static int
265 mpxio_getinfo(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags,
266     char **infostr, char **errstr, nvlist_t *props, rcm_info_t **infop)
267 {
268 	size_t len;
269 	int rv = RCM_SUCCESS;
270 	char *buf = NULL;
271 	char **clients = NULL;
272 	phci_list_t *reg;
273 	char c;
274 
275 	rcm_log_message(RCM_TRACE1, "MPXIO: getinfo(%s)\n", rsrc);
276 
277 	*infostr = NULL;
278 	*errstr = NULL;
279 
280 	(void) mutex_lock(&mpxio_lock);
281 
282 	if ((reg = lookup_phci(rsrc)) == NULL) {
283 		*errstr = strdup(MPXIO_MSG_CACHEFAIL);
284 		(void) mutex_unlock(&mpxio_lock);
285 		return (RCM_FAILURE);
286 	}
287 
288 	len = snprintf(&c, 1, MPXIO_MSG_USAGE, s_state(reg->phci.state));
289 	buf = calloc(len + 1, sizeof (char));
290 	if ((buf == NULL) || (snprintf(buf, len + 1, MPXIO_MSG_USAGE,
291 	    s_state(reg->phci.state)) > len + 1)) {
292 		*infostr = strdup(MPXIO_MSG_USAGEUNKNOWN);
293 		*errstr = strdup(gettext("Cannot construct usage string."));
294 		(void) mutex_unlock(&mpxio_lock);
295 		if (buf)
296 			free(buf);
297 		return (RCM_FAILURE);
298 	}
299 	*infostr = buf;
300 
301 	if (flags & RCM_INCLUDE_DEPENDENT) {
302 		rcm_log_message(RCM_TRACE2, "MPXIO: getting clients\n");
303 		if (get_affected_clients(hdl, rsrc, CMD_GETINFO, flags,
304 		    &clients) < 0) {
305 			*errstr = strdup(gettext("Cannot lookup clients."));
306 			(void) mutex_unlock(&mpxio_lock);
307 			return (RCM_FAILURE);
308 		}
309 		if (clients) {
310 			rv = rcm_get_info_list(hdl, clients, flags, infop);
311 			free(clients);
312 		} else {
313 			rcm_log_message(RCM_TRACE2, "MPXIO: none found\n");
314 		}
315 	}
316 
317 	(void) mutex_unlock(&mpxio_lock);
318 	return (rv);
319 }
320 
321 /*
322  * Nothing is implemented for suspend operations.
323  */
324 static int
325 mpxio_suspend(rcm_handle_t *hdl, char *rsrc, id_t id, timespec_t *interval,
326     uint_t flags, char **errstr, rcm_info_t **infop)
327 {
328 	rcm_log_message(RCM_TRACE1, "MPXIO: suspend(%s)\n", rsrc);
329 
330 	return (RCM_SUCCESS);
331 }
332 
333 /*
334  * Nothing is implemented for resume operations.
335  */
336 static int
337 mpxio_resume(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags,
338     char **errstr, rcm_info_t **infop)
339 {
340 	rcm_log_message(RCM_TRACE1, "MPXIO: resume(%s)\n", rsrc);
341 
342 	return (RCM_SUCCESS);
343 }
344 
345 /*
346  * MPxIO has no policy against offlining.  If disks will be affected, then
347  * base the return value for this request on the results of offlining the
348  * list of disks.  Otherwise succeed.
349  */
350 static int
351 mpxio_offline(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags,
352     char **errstr, rcm_info_t **infop)
353 {
354 	char **clients = NULL;
355 	int rv = RCM_SUCCESS;
356 
357 	rcm_log_message(RCM_TRACE1, "MPXIO: offline(%s)\n", rsrc);
358 
359 	(void) mutex_lock(&mpxio_lock);
360 
361 	if (get_affected_clients(hdl, rsrc, CMD_OFFLINE, flags, &clients) < 0) {
362 		*errstr = strdup(gettext("Cannot lookup clients."));
363 		(void) mutex_unlock(&mpxio_lock);
364 		return (RCM_FAILURE);
365 	}
366 
367 	if (clients) {
368 		rv = rcm_request_offline_list(hdl, clients, flags, infop);
369 		if (rv != RCM_SUCCESS)
370 			*errstr = strdup(MPXIO_MSG_LASTPATH);
371 		free(clients);
372 	}
373 
374 	(void) mutex_unlock(&mpxio_lock);
375 
376 	return (rv);
377 }
378 
379 /*
380  * If disks are affected, then they are probably offline and we need to
381  * propagate this online notification to them.
382  */
383 static int
384 mpxio_online(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags,
385     char **errstr, rcm_info_t **infop)
386 {
387 	char **clients;
388 	int rv = RCM_SUCCESS;
389 
390 	rcm_log_message(RCM_TRACE1, "MPXIO: online(%s)\n", rsrc);
391 
392 	(void) mutex_lock(&mpxio_lock);
393 
394 	if (get_affected_clients(hdl, rsrc, CMD_ONLINE, flags, &clients) < 0) {
395 		*errstr = strdup(gettext("Cannot lookup clients."));
396 		(void) mutex_unlock(&mpxio_lock);
397 		return (RCM_FAILURE);
398 	}
399 
400 	if (clients) {
401 		rv = rcm_notify_online_list(hdl, clients, flags, infop);
402 		free(clients);
403 	}
404 
405 	(void) mutex_unlock(&mpxio_lock);
406 
407 	return (rv);
408 }
409 
410 /*
411  * If clients are affected, then they are probably offline and we need to
412  * propagate this removal notification to them.  We can also remove the
413  * cache entry for this PHCI.  If that leaves its group empty, then the
414  * group will be removed during the next register callback.
415  */
416 static int
417 mpxio_remove(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags,
418     char **errstr, rcm_info_t **infop)
419 {
420 	char **clients;
421 	int rv = RCM_SUCCESS;
422 
423 	rcm_log_message(RCM_TRACE1, "MPXIO: remove(%s)\n", rsrc);
424 
425 	(void) mutex_lock(&mpxio_lock);
426 
427 	if (get_affected_clients(hdl, rsrc, CMD_REMOVE, flags, &clients) < 0) {
428 		*errstr = strdup(gettext("Cannot lookup clients."));
429 		(void) mutex_unlock(&mpxio_lock);
430 		return (RCM_FAILURE);
431 	}
432 
433 	if (clients) {
434 		rv = rcm_notify_remove_list(hdl, clients, flags, infop);
435 		free(clients);
436 	}
437 
438 	(void) mutex_unlock(&mpxio_lock);
439 
440 	return (rv);
441 }
442 
443 
444 /*
445  * Returns a string representation of a given libdevinfo path state.
446  */
447 static char *
448 s_state(di_path_state_t state)
449 {
450 	switch (state) {
451 	case DI_PATH_STATE_ONLINE:
452 		return ("online");
453 	case DI_PATH_STATE_OFFLINE:
454 		return ("offline");
455 	case DI_PATH_STATE_STANDBY:
456 		return ("standby");
457 	case DI_PATH_STATE_FAULT:
458 		return ("faulted");
459 	default:
460 		return ("<unknown>");
461 	}
462 }
463 
464 static int
465 get_affected_clients(rcm_handle_t *hdl, char *rsrc, int cmd, int flags,
466     char ***clientsp)
467 {
468 	int nclients = 0;
469 	phci_t phci;
470 	group_t *group;
471 	char **clients = NULL;
472 
473 	/* Build a dummy phci_t for use with bsearch(). */
474 	phci.path = rsrc;
475 
476 	/* Analyze the effects upon each group. */
477 	for (group = group_list; group != NULL; group = group->next) {
478 
479 		/* If the PHCI isn't in the group, then no effects.  Skip. */
480 		if (bsearch(&phci, group->phcis, group->nphcis, sizeof (phci_t),
481 		    compare_phci) == NULL)
482 			continue;
483 
484 		/*
485 		 * Merge in the clients.  All clients are merged in for getinfo
486 		 * operations.  Otherwise it's contingent upon a state change
487 		 * being transferred to the clients as a result of changing
488 		 * the PHCI's state.
489 		 */
490 		if ((cmd == CMD_GETINFO) ||
491 		    detect_client_change(hdl, cmd, flags, group, rsrc)) {
492 			if (merge_clients(&nclients, &clients, group) < 0) {
493 				free_clients(nclients, clients);
494 				return (-1);
495 			}
496 		}
497 	}
498 
499 	/* Return the array of affected disks */
500 	*clientsp = clients;
501 	return (0);
502 }
503 
504 /*
505  * Iterates through the members of a PHCI list, returning the entry
506  * corresponding to the named PHCI resource.  Returns NULL when the lookup
507  * fails.
508  */
509 static phci_list_t *
510 lookup_phci(char *rsrc)
511 {
512 	phci_list_t *reg;
513 
514 	for (reg = reg_list; reg != NULL; reg = reg->next) {
515 		if (strcmp(reg->phci.path, rsrc) == 0)
516 			return (reg);
517 	}
518 
519 	return (NULL);
520 }
521 
522 /*
523  * Tests whether or not an operation on a specific PHCI resource would affect
524  * the array of client devices attached to the PHCI's MPxIO group.
525  *
526  * Returns: 1 if clients would be affected, 0 if not.
527  */
528 static int
529 detect_client_change(rcm_handle_t *hdl, int cmd, int flags, group_t *group,
530     char *rsrc)
531 {
532 	int i;
533 	int state;
534 
535 	/*
536 	 * Perform a full set analysis on the set of redundant PHCIs.  When
537 	 * there are no unaffected and online PHCIs, then changing the state
538 	 * of the named PHCI results in a client state change.
539 	 */
540 	for (i = 0; i < group->nphcis; i++) {
541 
542 		/* Filter the named resource out of the analysis */
543 		if (strcmp(group->phcis[i].path, rsrc) == 0)
544 			continue;
545 
546 		/*
547 		 * If we find a path that's in the ONLINE or STANDBY state
548 		 * that would be left over in the system after completing
549 		 * whatever DR or hotplugging operation is in progress, then
550 		 * return a 0.
551 		 */
552 		if ((group->phcis[i].state == DI_PATH_STATE_ONLINE) ||
553 		    (group->phcis[i].state == DI_PATH_STATE_STANDBY)) {
554 			if (rcm_get_rsrcstate(hdl, group->phcis[i].path, &state)
555 			    != RCM_SUCCESS) {
556 				rcm_log_message(RCM_ERROR,
557 				    "MPXIO: Failed to query resource state\n");
558 				continue;
559 			}
560 			rcm_log_message(RCM_TRACE2, "MPXIO: state of %s: %d\n",
561 			    group->phcis[i].path, state);
562 			if (state == RCM_STATE_ONLINE) {
563 				return (0);
564 			}
565 		}
566 	}
567 
568 	/*
569 	 * The analysis above didn't find a redundant path to take over.  So
570 	 * report that the state of the client resources will change.
571 	 */
572 	return (1);
573 }
574 
575 /*
576  * Merges the client disks connected to a particular MPxIO group in with a
577  * previous array of disk clients.  The result is to adjust the 'nclients'
578  * value with the new count of disks in the array, and to adjust the 'disks'
579  * value to be a larger array of disks including its original contents along
580  * with the current group's contents merged in.
581  */
582 static int
583 merge_clients(int *nclients, char ***clientsp, group_t *group)
584 {
585 	int i;
586 	int old_nclients;
587 	char **clients_new;
588 
589 	if (group->nclients) {
590 		old_nclients = *nclients;
591 		*nclients += group->nclients;
592 		clients_new = realloc(*clientsp,
593 		    ((*nclients) + 1) * sizeof (char *));
594 		if (clients_new == NULL) {
595 			rcm_log_message(RCM_ERROR,
596 			    "MPXIO: cannot reallocate client array (%s).\n",
597 			    strerror(errno));
598 			return (-1);
599 		}
600 		for (i = old_nclients; i < (*nclients); i++) {
601 			/*
602 			 * Don't allocate space for individual disks in the
603 			 * merged list.  Just make references to the previously
604 			 * allocated strings in the group_t structs themselves.
605 			 */
606 			clients_new[i] = group->clients[i - old_nclients];
607 		}
608 		clients_new[(*nclients)] = NULL;
609 		*clientsp = clients_new;
610 	}
611 
612 	return (0);
613 }
614 
615 /*
616  * A libdevinfo di_walk_node() callback.  It's passed an integer pointer as an
617  * argument, and it increments the integer each time it encounters an MPxIO
618  * client.  By initializing the integer to zero and doing a libdevinfo walk with
619  * this function, the total count of MPxIO clients in the system can be found.
620  */
621 static int
622 get_nclients(di_node_t dinode, void *arg)
623 {
624 	int *nclients = arg;
625 
626 	if (is_client(dinode))
627 		(*nclients)++;
628 
629 	return (DI_WALK_CONTINUE);
630 }
631 
632 /*
633  * Tests a libdevinfo node to determine if it's an MPxIO client.
634  *
635  * Returns: non-zero for true, 0 for false.
636  */
637 static int
638 is_client(di_node_t dinode)
639 {
640 	return (di_path_client_next_path(dinode, DI_PATH_NIL) != DI_PATH_NIL);
641 }
642 
643 /*
644  * After a new group_list has been constructed, this refreshes the RCM
645  * registrations and the reg_list contents.  It uses a clock like algorithm
646  * with reference bits in the reg_list to know which registrants are new or
647  * old.
648  */
649 static void
650 refresh_regs(rcm_handle_t *hdl)
651 {
652 	int i;
653 	group_t *group;
654 	phci_list_t *reg;
655 	phci_list_t *prev_reg;
656 
657 	/*
658 	 * First part of the clock-like algorithm: clear reference bits.
659 	 */
660 	for (reg = reg_list; reg != NULL; reg = reg->next)
661 		reg->referenced = CACHE_STALE;
662 
663 	/*
664 	 * Second part of the clock-like algorithm: set the reference bits
665 	 * on every registrant that's still active.  (Also add new list nodes
666 	 * for new registrants.)
667 	 */
668 	for (group = group_list; group != NULL; group = group->next) {
669 		for (i = 0; i < group->nphcis; i++) {
670 
671 			/*
672 			 * If already stale in the registrants list, just set
673 			 * its reference bit to REFERENCED and update its state.
674 			 */
675 			if ((reg = lookup_phci(group->phcis[i].path)) != NULL) {
676 				if (reg->referenced == CACHE_STALE)
677 					reg->referenced = CACHE_REFERENCED;
678 				reg->phci.state = group->phcis[i].state;
679 				continue;
680 			}
681 
682 			/*
683 			 * Otherwise, build a new list node and mark it NEW.
684 			 */
685 			reg = (phci_list_t *)calloc(1, sizeof (*reg));
686 			if (reg == NULL) {
687 				rcm_log_message(RCM_ERROR,
688 				    "MPXIO: cannot allocate phci_list (%s).\n",
689 				    strerror(errno));
690 				continue;
691 			}
692 			reg->phci.path = strdup(group->phcis[i].path);
693 			if (reg->phci.path == NULL) {
694 				free(reg);
695 				rcm_log_message(RCM_ERROR,
696 				    "MPXIO: cannot allocate phci path (%s).\n",
697 				    strerror(errno));
698 				continue;
699 			}
700 			reg->phci.state = group->phcis[i].state;
701 			reg->referenced = CACHE_NEW;
702 
703 			/* Link it at the head of reg_list */
704 			reg->next = reg_list;
705 			reg_list = reg;
706 		}
707 	}
708 
709 	/*
710 	 * Final part of the clock algorithm: unregister stale entries, and
711 	 * register new entries.  Stale entries get removed from the list.
712 	 */
713 	reg = reg_list;
714 	prev_reg = NULL;
715 	while (reg) {
716 
717 		/* Unregister and remove stale entries. */
718 		if (reg->referenced == CACHE_STALE) {
719 			(void) rcm_unregister_interest(hdl, reg->phci.path, 0);
720 			free(reg->phci.path);
721 			if (prev_reg == NULL) {
722 				reg_list = reg->next;
723 				free(reg);
724 				reg = reg_list;
725 			} else {
726 				prev_reg->next = reg->next;
727 				free(reg);
728 				reg = prev_reg->next;
729 			}
730 			continue;
731 		}
732 
733 		/* Register new entries. */
734 		if (reg->referenced == CACHE_NEW) {
735 			if (rcm_register_interest(hdl, reg->phci.path, 0, NULL)
736 			    != RCM_SUCCESS) {
737 				rcm_log_message(RCM_ERROR,
738 				    "MPXIO: failed to register %s (%s).\n",
739 				    reg->phci.path, strerror(errno));
740 			}
741 		}
742 
743 		prev_reg = reg;
744 		reg = reg->next;
745 	}
746 }
747 
748 
749 /*
750  * A libdevinfo di_walk_node() callback that builds up the MPxIO group list.
751  *
752  * Every node encountered that's a client node is added into a group's client
753  * list.  Whenever a group doesn't already exist with a matching set of
754  * related PHCIs, then a new group is constructed and put at the head of the
755  * group list.
756  */
757 static int
758 build_groups(di_node_t dinode, void *arg)
759 {
760 	int i = 0;
761 	int nphcis = 0;
762 	int *nclients = (int *)arg;
763 	phci_t *phcis;
764 	group_t *group;
765 	di_node_t phcinode;
766 	di_path_t dipath = DI_PATH_NIL;
767 
768 	/* Safety check */
769 	if (nclients == NULL)
770 		return (DI_WALK_TERMINATE);
771 
772 	/*
773 	 * Build a sorted array of PHCIs pertaining to the client.
774 	 */
775 	while ((dipath =
776 	    di_path_client_next_path(dinode, dipath)) != DI_PATH_NIL)
777 		nphcis++;
778 
779 	/* Skip non-clients. */
780 	if (nphcis == 0)
781 		return (DI_WALK_CONTINUE);
782 
783 	if ((phcis = (phci_t *)calloc(nphcis, sizeof (phci_t))) == NULL) {
784 		rcm_log_message(RCM_ERROR,
785 		    "MPXIO: failed to allocate client's PHCIs (%s).\n",
786 		    strerror(errno));
787 		return (DI_WALK_TERMINATE);
788 	}
789 	while ((dipath =
790 	    di_path_client_next_path(dinode, dipath)) != DI_PATH_NIL) {
791 		phcinode = di_path_phci_node(dipath);
792 		if (phcinode == DI_NODE_NIL) {
793 			free_phcis(i, phcis);	/* free preceeding PHCIs */
794 			rcm_log_message(RCM_ERROR,
795 			    "MPXIO: client appears to have no PHCIs.\n");
796 			return (DI_WALK_TERMINATE);
797 		}
798 		if ((phcis[i].path = get_rsrcname(phcinode)) == NULL) {
799 			free_phcis(i, phcis);
800 			return (DI_WALK_TERMINATE);
801 		}
802 		phcis[i].state = di_path_state(dipath);
803 		i++;
804 	}
805 	qsort(phcis, nphcis, sizeof (phci_t), compare_phci);
806 
807 	/*
808 	 * Compare that PHCI set to each existing group's set.  We just add
809 	 * the client to the group and exit successfully once a match is made.
810 	 * Falling out of this loop means no match was found.
811 	 */
812 	for (group = group_list; group != NULL; group = group->next) {
813 
814 		/* There is no match if the number of PHCIs is inequal */
815 		if (nphcis != group->nphcis)
816 			continue;
817 
818 		/* Compare the PHCIs linearly (which is okay; they're sorted) */
819 		for (i = 0; i < nphcis; i++)
820 			if (strcmp(phcis[i].path, group->phcis[i].path) != 0)
821 				break;
822 
823 		/*
824 		 * If the loop above completed, we have a match.  Add the client
825 		 * to the group's disk array in that case, and return
826 		 * successfully.
827 		 */
828 		if (i == nphcis) {
829 			free_phcis(nphcis, phcis);
830 			if ((group->clients[group->nclients] =
831 			    get_rsrcname(dinode)) == NULL)
832 				return (DI_WALK_TERMINATE);
833 			group->nclients++;
834 			return (DI_WALK_CONTINUE);
835 		}
836 	}
837 
838 	/* The loop above didn't find a match.  So build a new group. */
839 	if ((group = (group_t *)calloc(1, sizeof (*group))) == NULL) {
840 		rcm_log_message(RCM_ERROR,
841 		    "MPXIO: failed to allocate PHCI group (%s).\n",
842 		    strerror(errno));
843 		free_phcis(nphcis, phcis);
844 		return (DI_WALK_TERMINATE);
845 	}
846 	if ((group->clients = (char **)calloc(*nclients, sizeof (char *))) ==
847 	    NULL) {
848 		free(group);
849 		free_phcis(nphcis, phcis);
850 		return (DI_WALK_TERMINATE);
851 	}
852 	group->nphcis = nphcis;
853 	group->phcis = phcis;
854 	if ((group->clients[0] = get_rsrcname(dinode)) == NULL) {
855 		free_group(group);
856 		return (DI_WALK_TERMINATE);
857 	}
858 	group->nclients = 1;
859 
860 	/* Link the group into the group list and return successfully. */
861 	group->next = group_list;
862 	group_list = group;
863 	return (DI_WALK_CONTINUE);
864 }
865 
866 /*
867  * For bsearch() and qsort().  Returns the results of a strcmp() on the names
868  * of two phci_t's.
869  */
870 static int
871 compare_phci(const void *arg1, const void *arg2)
872 {
873 	phci_t *p1 = (phci_t *)arg1;
874 	phci_t *p2 = (phci_t *)arg2;
875 
876 	if ((p1 == NULL) || (p2 == NULL)) {
877 		if (p1 != NULL)
878 			return (-1);
879 		else if (p2 != NULL)
880 			return (1);
881 		return (0);
882 	}
883 
884 	return (strcmp(p1->path, p2->path));
885 }
886 
887 /*
888  * Free the whole list of group's in the global group_list.
889  */
890 static void
891 free_grouplist()
892 {
893 	group_t *group = group_list;
894 	group_t *next;
895 
896 	while (group) {
897 		next = group->next;
898 		free_group(group);
899 		group = next;
900 	}
901 
902 	group_list = NULL;
903 }
904 
905 /*
906  * Free the contents of a single group_t.
907  */
908 static void
909 free_group(group_t *group)
910 {
911 	if (group) {
912 		free_phcis(group->nphcis, group->phcis);
913 		free_clients(group->nclients, group->clients);
914 		free(group);
915 	}
916 }
917 
918 /*
919  * Free an array of clients.
920  */
921 static void
922 free_clients(int nclients, char **clients)
923 {
924 	int i;
925 
926 	if (clients != NULL) {
927 		if (nclients > 0) {
928 			for (i = 0; i < nclients; i++)
929 				if (clients[i])
930 					free(clients[i]);
931 		}
932 		free(clients);
933 	}
934 }
935 
936 /*
937  * Free an array of phci_t's.
938  */
939 static void
940 free_phcis(int nphcis, phci_t *phcis)
941 {
942 	int i;
943 
944 	if ((phcis != NULL) && (nphcis > 0)) {
945 		for (i = 0; i < nphcis; i++)
946 			if (phcis[i].path)
947 				free(phcis[i].path);
948 		free(phcis);
949 	}
950 }
951 
952 /*
953  * Converts a libdevinfo node into a /devices path.  Caller must free results.
954  */
955 static char *
956 get_rsrcname(di_node_t dinode)
957 {
958 	int len;
959 	char *rsrcname;
960 	char *devfspath;
961 	char name[MAXPATHLEN];
962 
963 	if ((devfspath = di_devfs_path(dinode)) == NULL) {
964 		rcm_log_message(RCM_ERROR, "MPXIO: resource has null path.\n");
965 		return (NULL);
966 	}
967 
968 	len = snprintf(name, sizeof (name), "/devices%s", devfspath);
969 	di_devfs_path_free(devfspath);
970 	if (len >= sizeof (name)) {
971 		rcm_log_message(RCM_ERROR, "MPXIO: resource path too long.\n");
972 		return (NULL);
973 	}
974 
975 	if ((rsrcname = strdup(name)) == NULL)
976 		rcm_log_message(RCM_ERROR,
977 		    "MPXIO: failed to allocate resource name (%s).\n",
978 		    strerror(errno));
979 
980 	return (rsrcname);
981 }
982