xref: /titanic_50/usr/src/uts/common/os/devid_cache.c (revision 5c5f137104b2d56181283389fa902220f2023809)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/note.h>
26 #include <sys/t_lock.h>
27 #include <sys/cmn_err.h>
28 #include <sys/instance.h>
29 #include <sys/conf.h>
30 #include <sys/stat.h>
31 #include <sys/ddi.h>
32 #include <sys/hwconf.h>
33 #include <sys/sunddi.h>
34 #include <sys/sunndi.h>
35 #include <sys/sunmdi.h>
36 #include <sys/ddi_impldefs.h>
37 #include <sys/ndi_impldefs.h>
38 #include <sys/kobj.h>
39 #include <sys/devcache.h>
40 #include <sys/devid_cache.h>
41 #include <sys/sysmacros.h>
42 
43 /*
44  * Discovery refers to the heroic effort made to discover a device which
45  * cannot be accessed at the physical path where it once resided.  Discovery
46  * involves walking the entire device tree attaching all possible disk
47  * instances, to search for the device referenced by a devid.  Obviously,
48  * full device discovery is something to be avoided where possible.
49  * Note that simply invoking devfsadm(1M) is equivalent to running full
50  * discovery at the devid cache level.
51  *
52  * Reasons why a disk may not be accessible:
53  *	disk powered off
54  *	disk removed or cable disconnected
55  *	disk or adapter broken
56  *
57  * Note that discovery is not needed and cannot succeed in any of these
58  * cases.
59  *
60  * When discovery may succeed:
61  *	Discovery will result in success when a device has been moved
62  *	to a different address.  Note that it's recommended that
63  *	devfsadm(1M) be invoked (no arguments required) whenever a system's
64  *	h/w configuration has been updated.  Alternatively, a
65  *	reconfiguration boot can be used to accomplish the same result.
66  *
67  * Note that discovery is not necessary to be able to correct an access
68  * failure for a device which was powered off.  Assuming the cache has an
69  * entry for such a device, simply powering it on should permit the system
70  * to access it.  If problems persist after powering it on, invoke
71  * devfsadm(1M).
72  *
73  * Discovery prior to mounting root is only of interest when booting
74  * from a filesystem which accesses devices by device id, which of
75  * not all do.
76  *
77  * Tunables
78  *
79  * devid_discovery_boot (default 1)
80  *	Number of times discovery will be attempted prior to mounting root.
81  *	Must be done at least once to recover from corrupted or missing
82  *	devid cache backing store.  Probably there's no reason to ever
83  *	set this to greater than one as a missing device will remain
84  *	unavailable no matter how often the system searches for it.
85  *
86  * devid_discovery_postboot (default 1)
87  *	Number of times discovery will be attempted after mounting root.
88  *	This must be performed at least once to discover any devices
89  *	needed after root is mounted which may have been powered
90  *	off and moved before booting.
91  *	Setting this to a larger positive number will introduce
92  *	some inconsistency in system operation.  Searching for a device
93  *	will take an indeterminate amount of time, sometimes slower,
94  *	sometimes faster.  In addition, the system will sometimes
95  *	discover a newly powered on device, sometimes it won't.
96  *	Use of this option is not therefore recommended.
97  *
98  * devid_discovery_postboot_always (default 0)
99  *	Set to 1, the system will always attempt full discovery.
100  *
101  * devid_discovery_secs (default 0)
102  *	Set to a positive value, the system will attempt full discovery
103  *	but with a minimum delay between attempts.  A device search
104  *	within the period of time specified will result in failure.
105  *
106  * devid_cache_read_disable (default 0)
107  *	Set to 1 to disable reading /etc/devices/devid_cache.
108  *	Devid cache will continue to operate normally but
109  *	at least one discovery attempt will be required.
110  *
111  * devid_cache_write_disable (default 0)
112  *	Set to 1 to disable updates to /etc/devices/devid_cache.
113  *	Any updates to the devid cache will not be preserved across a reboot.
114  *
115  * devid_report_error (default 0)
116  *	Set to 1 to enable some error messages related to devid
117  *	cache failures.
118  *
119  * The devid is packed in the cache file as a byte array.  For
120  * portability, this could be done in the encoded string format.
121  */
122 
123 
124 int devid_discovery_boot = 1;
125 int devid_discovery_postboot = 1;
126 int devid_discovery_postboot_always = 0;
127 int devid_discovery_secs = 0;
128 
129 int devid_cache_read_disable = 0;
130 int devid_cache_write_disable = 0;
131 
132 int devid_report_error = 0;
133 
134 
135 /*
136  * State to manage discovery of devices providing a devid
137  */
138 static int		devid_discovery_busy = 0;
139 static kmutex_t		devid_discovery_mutex;
140 static kcondvar_t	devid_discovery_cv;
141 static clock_t		devid_last_discovery = 0;
142 
143 
144 #ifdef	DEBUG
145 int nvp_devid_debug = 0;
146 int devid_debug = 0;
147 int devid_log_registers = 0;
148 int devid_log_finds = 0;
149 int devid_log_lookups = 0;
150 int devid_log_discovery = 0;
151 int devid_log_matches = 0;
152 int devid_log_paths = 0;
153 int devid_log_failures = 0;
154 int devid_log_hold = 0;
155 int devid_log_unregisters = 0;
156 int devid_log_removes = 0;
157 int devid_register_debug = 0;
158 int devid_log_stale = 0;
159 int devid_log_detaches = 0;
160 #endif	/* DEBUG */
161 
162 /*
163  * devid cache file registration for cache reads and updates
164  */
165 static nvf_ops_t devid_cache_ops = {
166 	"/etc/devices/devid_cache",		/* path to cache */
167 	devid_cache_unpack_nvlist,		/* read: nvlist to nvp */
168 	devid_cache_pack_list,			/* write: nvp to nvlist */
169 	devid_list_free,			/* free data list */
170 	NULL					/* write complete callback */
171 };
172 
173 /*
174  * handle to registered devid cache handlers
175  */
176 nvf_handle_t	dcfd_handle;
177 
178 
179 /*
180  * Initialize devid cache file management
181  */
182 void
183 devid_cache_init(void)
184 {
185 	dcfd_handle = nvf_register_file(&devid_cache_ops);
186 	ASSERT(dcfd_handle);
187 
188 	list_create(nvf_list(dcfd_handle), sizeof (nvp_devid_t),
189 	    offsetof(nvp_devid_t, nvp_link));
190 
191 	mutex_init(&devid_discovery_mutex, NULL, MUTEX_DEFAULT, NULL);
192 	cv_init(&devid_discovery_cv, NULL, CV_DRIVER, NULL);
193 }
194 
195 /*
196  * Read and initialize the devid cache from the persistent store
197  */
198 void
199 devid_cache_read(void)
200 {
201 	if (!devid_cache_read_disable) {
202 		rw_enter(nvf_lock(dcfd_handle), RW_WRITER);
203 		ASSERT(list_head(nvf_list(dcfd_handle)) == NULL);
204 		(void) nvf_read_file(dcfd_handle);
205 		rw_exit(nvf_lock(dcfd_handle));
206 	}
207 }
208 
209 static void
210 devid_nvp_free(nvp_devid_t *dp)
211 {
212 	if (dp->nvp_devpath)
213 		kmem_free(dp->nvp_devpath, strlen(dp->nvp_devpath)+1);
214 	if (dp->nvp_devid)
215 		kmem_free(dp->nvp_devid, ddi_devid_sizeof(dp->nvp_devid));
216 
217 	kmem_free(dp, sizeof (nvp_devid_t));
218 }
219 
220 static void
221 devid_list_free(nvf_handle_t fd)
222 {
223 	list_t		*listp;
224 	nvp_devid_t	*np;
225 
226 	ASSERT(RW_WRITE_HELD(nvf_lock(dcfd_handle)));
227 
228 	listp = nvf_list(fd);
229 	while (np = list_head(listp)) {
230 		list_remove(listp, np);
231 		devid_nvp_free(np);
232 	}
233 }
234 
235 /*
236  * Free an nvp element in a list
237  */
238 static void
239 devid_nvp_unlink_and_free(nvf_handle_t fd, nvp_devid_t *np)
240 {
241 	list_remove(nvf_list(fd), np);
242 	devid_nvp_free(np);
243 }
244 
245 /*
246  * Unpack a device path/nvlist pair to the list of devid cache elements.
247  * Used to parse the nvlist format when reading
248  * /etc/devices/devid_cache
249  */
250 static int
251 devid_cache_unpack_nvlist(nvf_handle_t fd, nvlist_t *nvl, char *name)
252 {
253 	nvp_devid_t *np;
254 	ddi_devid_t devidp;
255 	int rval;
256 	uint_t n;
257 
258 	NVP_DEVID_DEBUG_PATH((name));
259 	ASSERT(RW_WRITE_HELD(nvf_lock(dcfd_handle)));
260 
261 	/*
262 	 * check path for a devid
263 	 */
264 	rval = nvlist_lookup_byte_array(nvl,
265 	    DP_DEVID_ID, (uchar_t **)&devidp, &n);
266 	if (rval == 0) {
267 		if (ddi_devid_valid(devidp) == DDI_SUCCESS) {
268 			ASSERT(n == ddi_devid_sizeof(devidp));
269 			np = kmem_zalloc(sizeof (nvp_devid_t), KM_SLEEP);
270 			np->nvp_devpath = i_ddi_strdup(name, KM_SLEEP);
271 			np->nvp_devid = kmem_alloc(n, KM_SLEEP);
272 			(void) bcopy(devidp, np->nvp_devid, n);
273 			list_insert_tail(nvf_list(fd), np);
274 			NVP_DEVID_DEBUG_DEVID((np->nvp_devid));
275 		} else {
276 			DEVIDERR((CE_CONT,
277 			    "%s: invalid devid\n", name));
278 		}
279 	} else {
280 		DEVIDERR((CE_CONT,
281 		    "%s: devid not available\n", name));
282 	}
283 
284 	return (0);
285 }
286 
287 /*
288  * Pack the list of devid cache elements into a single nvlist
289  * Used when writing the nvlist file.
290  */
291 static int
292 devid_cache_pack_list(nvf_handle_t fd, nvlist_t **ret_nvl)
293 {
294 	nvlist_t	*nvl, *sub_nvl;
295 	nvp_devid_t	*np;
296 	int		rval;
297 	list_t		*listp;
298 
299 	ASSERT(RW_WRITE_HELD(nvf_lock(dcfd_handle)));
300 
301 	rval = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
302 	if (rval != 0) {
303 		nvf_error("%s: nvlist alloc error %d\n",
304 		    nvf_cache_name(fd), rval);
305 		return (DDI_FAILURE);
306 	}
307 
308 	listp = nvf_list(fd);
309 	for (np = list_head(listp); np; np = list_next(listp, np)) {
310 		if (np->nvp_devid == NULL)
311 			continue;
312 		NVP_DEVID_DEBUG_PATH(np->nvp_devpath);
313 		rval = nvlist_alloc(&sub_nvl, NV_UNIQUE_NAME, KM_SLEEP);
314 		if (rval != 0) {
315 			nvf_error("%s: nvlist alloc error %d\n",
316 			    nvf_cache_name(fd), rval);
317 			sub_nvl = NULL;
318 			goto err;
319 		}
320 
321 		rval = nvlist_add_byte_array(sub_nvl, DP_DEVID_ID,
322 		    (uchar_t *)np->nvp_devid,
323 		    ddi_devid_sizeof(np->nvp_devid));
324 		if (rval == 0) {
325 			NVP_DEVID_DEBUG_DEVID(np->nvp_devid);
326 		} else {
327 			nvf_error(
328 			    "%s: nvlist add error %d (devid)\n",
329 			    nvf_cache_name(fd), rval);
330 			goto err;
331 		}
332 
333 		rval = nvlist_add_nvlist(nvl, np->nvp_devpath, sub_nvl);
334 		if (rval != 0) {
335 			nvf_error("%s: nvlist add error %d (sublist)\n",
336 			    nvf_cache_name(fd), rval);
337 			goto err;
338 		}
339 		nvlist_free(sub_nvl);
340 	}
341 
342 	*ret_nvl = nvl;
343 	return (DDI_SUCCESS);
344 
345 err:
346 	nvlist_free(sub_nvl);
347 	nvlist_free(nvl);
348 	*ret_nvl = NULL;
349 	return (DDI_FAILURE);
350 }
351 
352 static int
353 e_devid_do_discovery(void)
354 {
355 	ASSERT(mutex_owned(&devid_discovery_mutex));
356 
357 	if (i_ddi_io_initialized() == 0) {
358 		if (devid_discovery_boot > 0) {
359 			devid_discovery_boot--;
360 			return (1);
361 		}
362 	} else {
363 		if (devid_discovery_postboot_always > 0)
364 			return (1);
365 		if (devid_discovery_postboot > 0) {
366 			devid_discovery_postboot--;
367 			return (1);
368 		}
369 		if (devid_discovery_secs > 0) {
370 			if ((ddi_get_lbolt() - devid_last_discovery) >
371 			    drv_usectohz(devid_discovery_secs * MICROSEC)) {
372 				return (1);
373 			}
374 		}
375 	}
376 
377 	DEVID_LOG_DISC((CE_CONT, "devid_discovery: no discovery\n"));
378 	return (0);
379 }
380 
381 static void
382 e_ddi_devid_hold_by_major(major_t major)
383 {
384 	DEVID_LOG_DISC((CE_CONT,
385 	    "devid_discovery: ddi_hold_installed_driver %d\n", major));
386 
387 	if (ddi_hold_installed_driver(major) == NULL)
388 		return;
389 
390 	ddi_rele_driver(major);
391 }
392 
393 /* legacy support - see below */
394 static char *e_ddi_devid_hold_driver_list[] = { "sd", "ssd" };
395 
396 #define	N_DRIVERS_TO_HOLD	\
397 	(sizeof (e_ddi_devid_hold_driver_list) / sizeof (char *))
398 
399 static void
400 e_ddi_devid_hold_installed_driver(ddi_devid_t devid)
401 {
402 	impl_devid_t	*id = (impl_devid_t *)devid;
403 	major_t		major, hint_major;
404 	char		hint[DEVID_HINT_SIZE + 1];
405 	struct devnames	*dnp;
406 	char		**drvp;
407 	int		i;
408 
409 	/* Count non-null bytes */
410 	for (i = 0; i < DEVID_HINT_SIZE; i++)
411 		if (id->did_driver[i] == '\0')
412 			break;
413 
414 	/* Make a copy of the driver hint */
415 	bcopy(id->did_driver, hint, i);
416 	hint[i] = '\0';
417 
418 	/* search for the devid using the hint driver */
419 	hint_major = ddi_name_to_major(hint);
420 	if (hint_major != DDI_MAJOR_T_NONE) {
421 		e_ddi_devid_hold_by_major(hint_major);
422 	}
423 
424 	/*
425 	 * search for the devid with each driver declaring
426 	 * itself as a devid registrant.
427 	 */
428 	for (major = 0; major < devcnt; major++) {
429 		if (major == hint_major)
430 			continue;
431 		dnp = &devnamesp[major];
432 		if (dnp->dn_flags & DN_DEVID_REGISTRANT) {
433 			e_ddi_devid_hold_by_major(major);
434 		}
435 	}
436 
437 	/*
438 	 * Legacy support: may be removed once an upgrade mechanism
439 	 * for driver conf files is available.
440 	 */
441 	drvp = e_ddi_devid_hold_driver_list;
442 	for (i = 0; i < N_DRIVERS_TO_HOLD; i++, drvp++) {
443 		major = ddi_name_to_major(*drvp);
444 		if (major != DDI_MAJOR_T_NONE && major != hint_major) {
445 			e_ddi_devid_hold_by_major(major);
446 		}
447 	}
448 }
449 
450 /*
451  * Return success if discovery was attempted, to indicate
452  * that the desired device may now be available.
453  */
454 int
455 e_ddi_devid_discovery(ddi_devid_t devid)
456 {
457 	int flags;
458 	int rval = DDI_SUCCESS;
459 
460 	mutex_enter(&devid_discovery_mutex);
461 
462 	if (devid_discovery_busy) {
463 		DEVID_LOG_DISC((CE_CONT, "devid_discovery: busy\n"));
464 		while (devid_discovery_busy) {
465 			cv_wait(&devid_discovery_cv, &devid_discovery_mutex);
466 		}
467 	} else if (e_devid_do_discovery()) {
468 		devid_discovery_busy = 1;
469 		mutex_exit(&devid_discovery_mutex);
470 
471 		if (i_ddi_io_initialized() == 0) {
472 			e_ddi_devid_hold_installed_driver(devid);
473 		} else {
474 			DEVID_LOG_DISC((CE_CONT,
475 			    "devid_discovery: ndi_devi_config\n"));
476 			flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT;
477 			if (i_ddi_io_initialized())
478 				flags |= NDI_DRV_CONF_REPROBE;
479 			(void) ndi_devi_config(ddi_root_node(), flags);
480 		}
481 
482 		mutex_enter(&devid_discovery_mutex);
483 		devid_discovery_busy = 0;
484 		cv_broadcast(&devid_discovery_cv);
485 		if (devid_discovery_secs > 0)
486 			devid_last_discovery = ddi_get_lbolt();
487 		DEVID_LOG_DISC((CE_CONT, "devid_discovery: done\n"));
488 	} else {
489 		rval = DDI_FAILURE;
490 		DEVID_LOG_DISC((CE_CONT, "no devid discovery\n"));
491 	}
492 
493 	mutex_exit(&devid_discovery_mutex);
494 
495 	return (rval);
496 }
497 
498 /*
499  * As part of registering a devid for a device,
500  * update the devid cache with this device/devid pair
501  * or note that this combination has registered.
502  *
503  * If a devpath is provided it will be used as the path to register the
504  * devid against, otherwise we use ddi_pathname(dip).  In both cases
505  * we duplicate the path string so that it can be cached/freed indepdently
506  * of the original owner.
507  */
508 static int
509 e_devid_cache_register_cmn(dev_info_t *dip, ddi_devid_t devid, char *devpath)
510 {
511 	nvp_devid_t *np;
512 	nvp_devid_t *new_nvp;
513 	ddi_devid_t new_devid;
514 	int new_devid_size;
515 	char *path, *fullpath;
516 	ddi_devid_t free_devid = NULL;
517 	int pathlen;
518 	list_t *listp;
519 	int is_dirty = 0;
520 
521 
522 	ASSERT(ddi_devid_valid(devid) == DDI_SUCCESS);
523 
524 	if (devpath) {
525 		pathlen = strlen(devpath) + 1;
526 		path = kmem_alloc(pathlen, KM_SLEEP);
527 		bcopy(devpath, path, pathlen);
528 	} else {
529 		/*
530 		 * We are willing to accept DS_BOUND nodes if we can form a full
531 		 * ddi_pathname (i.e. the node is part way to becomming
532 		 * DS_INITIALIZED and devi_addr/ddi_get_name_addr are non-NULL).
533 		 */
534 		if (ddi_get_name_addr(dip) == NULL)
535 			return (DDI_FAILURE);
536 
537 		fullpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
538 		(void) ddi_pathname(dip, fullpath);
539 		pathlen = strlen(fullpath) + 1;
540 		path = kmem_alloc(pathlen, KM_SLEEP);
541 		bcopy(fullpath, path, pathlen);
542 		kmem_free(fullpath, MAXPATHLEN);
543 	}
544 
545 	DEVID_LOG_REG(("register", devid, path));
546 
547 	new_nvp = kmem_zalloc(sizeof (nvp_devid_t), KM_SLEEP);
548 	new_devid_size = ddi_devid_sizeof(devid);
549 	new_devid = kmem_alloc(new_devid_size, KM_SLEEP);
550 	(void) bcopy(devid, new_devid, new_devid_size);
551 
552 	rw_enter(nvf_lock(dcfd_handle), RW_WRITER);
553 
554 	listp = nvf_list(dcfd_handle);
555 	for (np = list_head(listp); np; np = list_next(listp, np)) {
556 		if (strcmp(path, np->nvp_devpath) == 0) {
557 			DEVID_DEBUG2((CE_CONT,
558 			    "register: %s path match\n", path));
559 			if (np->nvp_devid == NULL) {
560 replace:			np->nvp_devid = new_devid;
561 				np->nvp_flags |=
562 				    NVP_DEVID_DIP | NVP_DEVID_REGISTERED;
563 				np->nvp_dip = dip;
564 				if (!devid_cache_write_disable) {
565 					nvf_mark_dirty(dcfd_handle);
566 					is_dirty = 1;
567 				}
568 				rw_exit(nvf_lock(dcfd_handle));
569 				kmem_free(new_nvp, sizeof (nvp_devid_t));
570 				kmem_free(path, pathlen);
571 				goto exit;
572 			}
573 			if (ddi_devid_valid(np->nvp_devid) != DDI_SUCCESS) {
574 				/* replace invalid devid */
575 				free_devid = np->nvp_devid;
576 				goto replace;
577 			}
578 			/*
579 			 * We're registering an already-cached path
580 			 * Does the device's devid match the cache?
581 			 */
582 			if (ddi_devid_compare(devid, np->nvp_devid) != 0) {
583 				DEVID_DEBUG((CE_CONT, "devid register: "
584 				    "devid %s does not match\n", path));
585 				/*
586 				 * Replace cached devid for this path
587 				 * with newly registered devid.  A devid
588 				 * may map to multiple paths but one path
589 				 * should only map to one devid.
590 				 */
591 				devid_nvp_unlink_and_free(dcfd_handle, np);
592 				np = NULL;
593 				break;
594 			} else {
595 				DEVID_DEBUG2((CE_CONT,
596 				    "devid register: %s devid match\n", path));
597 				np->nvp_flags |=
598 				    NVP_DEVID_DIP | NVP_DEVID_REGISTERED;
599 				np->nvp_dip = dip;
600 				rw_exit(nvf_lock(dcfd_handle));
601 				kmem_free(new_nvp, sizeof (nvp_devid_t));
602 				kmem_free(path, pathlen);
603 				kmem_free(new_devid, new_devid_size);
604 				return (DDI_SUCCESS);
605 			}
606 		}
607 	}
608 
609 	/*
610 	 * Add newly registered devid to the cache
611 	 */
612 	ASSERT(np == NULL);
613 
614 	new_nvp->nvp_devpath = path;
615 	new_nvp->nvp_flags = NVP_DEVID_DIP | NVP_DEVID_REGISTERED;
616 	new_nvp->nvp_dip = dip;
617 	new_nvp->nvp_devid = new_devid;
618 
619 	if (!devid_cache_write_disable) {
620 		is_dirty = 1;
621 		nvf_mark_dirty(dcfd_handle);
622 	}
623 	list_insert_tail(nvf_list(dcfd_handle), new_nvp);
624 
625 	rw_exit(nvf_lock(dcfd_handle));
626 
627 exit:
628 	if (free_devid)
629 		kmem_free(free_devid, ddi_devid_sizeof(free_devid));
630 
631 	if (is_dirty)
632 		nvf_wake_daemon();
633 
634 	return (DDI_SUCCESS);
635 }
636 
637 int
638 e_devid_cache_register(dev_info_t *dip, ddi_devid_t devid)
639 {
640 	return (e_devid_cache_register_cmn(dip, devid, NULL));
641 }
642 
643 /*
644  * Unregister a device's devid; the devinfo may hit on multiple entries
645  * arising from both pHCI and vHCI paths.
646  * Called as an instance detachs.
647  * Invalidate the devid's devinfo reference.
648  * Devid-path remains in the cache.
649  */
650 
651 void
652 e_devid_cache_unregister(dev_info_t *dip)
653 {
654 	nvp_devid_t *np;
655 	list_t *listp;
656 
657 	rw_enter(nvf_lock(dcfd_handle), RW_WRITER);
658 
659 	listp = nvf_list(dcfd_handle);
660 	for (np = list_head(listp); np; np = list_next(listp, np)) {
661 		if (np->nvp_devid == NULL)
662 			continue;
663 		if ((np->nvp_flags & NVP_DEVID_DIP) && np->nvp_dip == dip) {
664 			DEVID_LOG_UNREG((CE_CONT,
665 			    "unregister: %s\n", np->nvp_devpath));
666 			np->nvp_flags &= ~NVP_DEVID_DIP;
667 			np->nvp_dip = NULL;
668 		}
669 	}
670 
671 	rw_exit(nvf_lock(dcfd_handle));
672 }
673 
674 int
675 e_devid_cache_pathinfo(mdi_pathinfo_t *pip, ddi_devid_t devid)
676 {
677 	char *path = mdi_pi_pathname(pip);
678 
679 	return (e_devid_cache_register_cmn(mdi_pi_get_client(pip), devid,
680 	    path));
681 }
682 
683 /*
684  * Purge devid cache of stale devids
685  */
686 void
687 devid_cache_cleanup(void)
688 {
689 	nvp_devid_t *np, *next;
690 	list_t *listp;
691 	int is_dirty = 0;
692 
693 	rw_enter(nvf_lock(dcfd_handle), RW_WRITER);
694 
695 	listp = nvf_list(dcfd_handle);
696 	for (np = list_head(listp); np; np = next) {
697 		next = list_next(listp, np);
698 		if (np->nvp_devid == NULL)
699 			continue;
700 		if ((np->nvp_flags & NVP_DEVID_REGISTERED) == 0) {
701 			DEVID_LOG_REMOVE((CE_CONT,
702 			    "cleanup: %s\n", np->nvp_devpath));
703 			if (!devid_cache_write_disable) {
704 				nvf_mark_dirty(dcfd_handle);
705 				is_dirty = 0;
706 			}
707 			devid_nvp_unlink_and_free(dcfd_handle, np);
708 		}
709 	}
710 
711 	rw_exit(nvf_lock(dcfd_handle));
712 
713 	if (is_dirty)
714 		nvf_wake_daemon();
715 }
716 
717 
718 /*
719  * Build a list of dev_t's for a device/devid
720  *
721  * The effect of this function is cumulative, adding dev_t's
722  * for the device to the list of all dev_t's for a given
723  * devid.
724  */
725 static void
726 e_devid_minor_to_devlist(
727 	dev_info_t	*dip,
728 	char		*minor_name,
729 	int		ndevts_alloced,
730 	int		*devtcntp,
731 	dev_t		*devtsp)
732 {
733 	int			circ;
734 	struct ddi_minor_data	*dmdp;
735 	int			minor_all = 0;
736 	int			ndevts = *devtcntp;
737 
738 	ASSERT(i_ddi_devi_attached(dip));
739 
740 	/* are we looking for a set of minor nodes? */
741 	if ((minor_name == DEVID_MINOR_NAME_ALL) ||
742 	    (minor_name == DEVID_MINOR_NAME_ALL_CHR) ||
743 	    (minor_name == DEVID_MINOR_NAME_ALL_BLK))
744 		minor_all = 1;
745 
746 	/* Find matching minor names */
747 	ndi_devi_enter(dip, &circ);
748 	for (dmdp = DEVI(dip)->devi_minor; dmdp; dmdp = dmdp->next) {
749 
750 		/* Skip non-minors, and non matching minor names */
751 		if ((dmdp->type != DDM_MINOR) || ((minor_all == 0) &&
752 		    strcmp(dmdp->ddm_name, minor_name)))
753 			continue;
754 
755 		/* filter out minor_all mismatches */
756 		if (minor_all &&
757 		    (((minor_name == DEVID_MINOR_NAME_ALL_CHR) &&
758 		    (dmdp->ddm_spec_type != S_IFCHR)) ||
759 		    ((minor_name == DEVID_MINOR_NAME_ALL_BLK) &&
760 		    (dmdp->ddm_spec_type != S_IFBLK))))
761 			continue;
762 
763 		if (ndevts < ndevts_alloced)
764 			devtsp[ndevts] = dmdp->ddm_dev;
765 		ndevts++;
766 	}
767 	ndi_devi_exit(dip, circ);
768 
769 	*devtcntp = ndevts;
770 }
771 
772 /*
773  * Search for cached entries matching a devid
774  * Return two lists:
775  *	a list of dev_info nodes, for those devices in the attached state
776  *	a list of pathnames whose instances registered the given devid
777  * If the lists passed in are not sufficient to return the matching
778  * references, return the size of lists required.
779  * The dev_info nodes are returned with a hold that the caller must release.
780  */
781 static int
782 e_devid_cache_devi_path_lists(ddi_devid_t devid, int retmax,
783 	int *retndevis, dev_info_t **retdevis, int *retnpaths, char **retpaths)
784 {
785 	nvp_devid_t *np;
786 	int ndevis, npaths;
787 	dev_info_t *dip, *pdip;
788 	int circ;
789 	int maxdevis = 0;
790 	int maxpaths = 0;
791 	list_t *listp;
792 
793 	ndevis = 0;
794 	npaths = 0;
795 	listp = nvf_list(dcfd_handle);
796 	for (np = list_head(listp); np; np = list_next(listp, np)) {
797 		if (np->nvp_devid == NULL)
798 			continue;
799 		if (ddi_devid_valid(np->nvp_devid) != DDI_SUCCESS) {
800 			DEVIDERR((CE_CONT,
801 			    "find: invalid devid %s\n",
802 			    np->nvp_devpath));
803 			continue;
804 		}
805 		if (ddi_devid_compare(devid, np->nvp_devid) == 0) {
806 			DEVID_DEBUG2((CE_CONT,
807 			    "find: devid match: %s 0x%x\n",
808 			    np->nvp_devpath, np->nvp_flags));
809 			DEVID_LOG_MATCH(("find", devid, np->nvp_devpath));
810 			DEVID_LOG_PATHS((CE_CONT, "%s\n", np->nvp_devpath));
811 
812 			/*
813 			 * Check if we have a cached devinfo reference for this
814 			 * devid.  Place a hold on it to prevent detach
815 			 * Otherwise, use the path instead.
816 			 * Note: returns with a hold on each dev_info
817 			 * node in the list.
818 			 */
819 			dip = NULL;
820 			if (np->nvp_flags & NVP_DEVID_DIP) {
821 				pdip = ddi_get_parent(np->nvp_dip);
822 				if (ndi_devi_tryenter(pdip, &circ)) {
823 					dip = np->nvp_dip;
824 					ndi_hold_devi(dip);
825 					ndi_devi_exit(pdip, circ);
826 					ASSERT(!DEVI_IS_ATTACHING(dip));
827 					ASSERT(!DEVI_IS_DETACHING(dip));
828 				} else {
829 					DEVID_LOG_DETACH((CE_CONT,
830 					    "may be detaching: %s\n",
831 					    np->nvp_devpath));
832 				}
833 			}
834 
835 			if (dip) {
836 				if (ndevis < retmax) {
837 					retdevis[ndevis++] = dip;
838 				} else {
839 					ndi_rele_devi(dip);
840 				}
841 				maxdevis++;
842 			} else {
843 				if (npaths < retmax)
844 					retpaths[npaths++] = np->nvp_devpath;
845 				maxpaths++;
846 			}
847 		}
848 	}
849 
850 	*retndevis = ndevis;
851 	*retnpaths = npaths;
852 	return (maxdevis > maxpaths ? maxdevis : maxpaths);
853 }
854 
855 
856 /*
857  * Search the devid cache, returning dev_t list for all
858  * device paths mapping to the device identified by the
859  * given devid.
860  *
861  * Primary interface used by ddi_lyr_devid_to_devlist()
862  */
863 int
864 e_devid_cache_to_devt_list(ddi_devid_t devid, char *minor_name,
865 	int *retndevts, dev_t **retdevts)
866 {
867 	char		*path, **paths;
868 	int		i, j, n;
869 	dev_t		*devts, *udevts;
870 	dev_t		tdevt;
871 	int		ndevts, undevts, ndevts_alloced;
872 	dev_info_t	*devi, **devis;
873 	int		ndevis, npaths, nalloced;
874 	ddi_devid_t	match_devid;
875 
876 	DEVID_LOG_FIND(("find", devid, NULL));
877 
878 	ASSERT(ddi_devid_valid(devid) == DDI_SUCCESS);
879 	if (ddi_devid_valid(devid) != DDI_SUCCESS) {
880 		DEVID_LOG_ERR(("invalid devid", devid, NULL));
881 		return (DDI_FAILURE);
882 	}
883 
884 	nalloced = 128;
885 
886 	for (;;) {
887 		paths = kmem_zalloc(nalloced * sizeof (char *), KM_SLEEP);
888 		devis = kmem_zalloc(nalloced * sizeof (dev_info_t *), KM_SLEEP);
889 
890 		rw_enter(nvf_lock(dcfd_handle), RW_READER);
891 		n = e_devid_cache_devi_path_lists(devid, nalloced,
892 		    &ndevis, devis, &npaths, paths);
893 		if (n <= nalloced)
894 			break;
895 		rw_exit(nvf_lock(dcfd_handle));
896 		for (i = 0; i < ndevis; i++)
897 			ndi_rele_devi(devis[i]);
898 		kmem_free(paths, nalloced * sizeof (char *));
899 		kmem_free(devis, nalloced * sizeof (dev_info_t *));
900 		nalloced = n + 128;
901 	}
902 
903 	for (i = 0; i < npaths; i++) {
904 		path = i_ddi_strdup(paths[i], KM_SLEEP);
905 		paths[i] = path;
906 	}
907 	rw_exit(nvf_lock(dcfd_handle));
908 
909 	if (ndevis == 0 && npaths == 0) {
910 		DEVID_LOG_ERR(("no devid found", devid, NULL));
911 		kmem_free(paths, nalloced * sizeof (char *));
912 		kmem_free(devis, nalloced * sizeof (dev_info_t *));
913 		return (DDI_FAILURE);
914 	}
915 
916 	ndevts_alloced = 128;
917 restart:
918 	ndevts = 0;
919 	devts = kmem_alloc(ndevts_alloced * sizeof (dev_t), KM_SLEEP);
920 	for (i = 0; i < ndevis; i++) {
921 		ASSERT(!DEVI_IS_ATTACHING(devis[i]));
922 		ASSERT(!DEVI_IS_DETACHING(devis[i]));
923 		e_devid_minor_to_devlist(devis[i], minor_name,
924 		    ndevts_alloced, &ndevts, devts);
925 		if (ndevts > ndevts_alloced) {
926 			kmem_free(devts, ndevts_alloced * sizeof (dev_t));
927 			ndevts_alloced += 128;
928 			goto restart;
929 		}
930 	}
931 	for (i = 0; i < npaths; i++) {
932 		DEVID_LOG_LOOKUP((CE_CONT, "lookup %s\n", paths[i]));
933 		devi = e_ddi_hold_devi_by_path(paths[i], 0);
934 		if (devi == NULL) {
935 			DEVID_LOG_STALE(("stale device reference",
936 			    devid, paths[i]));
937 			continue;
938 		}
939 		/*
940 		 * Verify the newly attached device registered a matching devid
941 		 */
942 		if (i_ddi_devi_get_devid(DDI_DEV_T_ANY, devi,
943 		    &match_devid) != DDI_SUCCESS) {
944 			DEVIDERR((CE_CONT,
945 			    "%s: no devid registered on attach\n",
946 			    paths[i]));
947 			ddi_release_devi(devi);
948 			continue;
949 		}
950 
951 		if (ddi_devid_compare(devid, match_devid) != 0) {
952 			DEVID_LOG_STALE(("new devid registered",
953 			    devid, paths[i]));
954 			ddi_release_devi(devi);
955 			ddi_devid_free(match_devid);
956 			continue;
957 		}
958 		ddi_devid_free(match_devid);
959 
960 		e_devid_minor_to_devlist(devi, minor_name,
961 		    ndevts_alloced, &ndevts, devts);
962 		ddi_release_devi(devi);
963 		if (ndevts > ndevts_alloced) {
964 			kmem_free(devts,
965 			    ndevts_alloced * sizeof (dev_t));
966 			ndevts_alloced += 128;
967 			goto restart;
968 		}
969 	}
970 
971 	/* drop hold from e_devid_cache_devi_path_lists */
972 	for (i = 0; i < ndevis; i++) {
973 		ndi_rele_devi(devis[i]);
974 	}
975 	for (i = 0; i < npaths; i++) {
976 		kmem_free(paths[i], strlen(paths[i]) + 1);
977 	}
978 	kmem_free(paths, nalloced * sizeof (char *));
979 	kmem_free(devis, nalloced * sizeof (dev_info_t *));
980 
981 	if (ndevts == 0) {
982 		DEVID_LOG_ERR(("no devid found", devid, NULL));
983 		kmem_free(devts, ndevts_alloced * sizeof (dev_t));
984 		return (DDI_FAILURE);
985 	}
986 
987 	/*
988 	 * Build the final list of sorted dev_t's with duplicates collapsed so
989 	 * returned results are consistent. This prevents implementation
990 	 * artifacts from causing unnecessary changes in SVM namespace.
991 	 */
992 	/* bubble sort */
993 	for (i = 0; i < (ndevts - 1); i++) {
994 		for (j = 0; j < ((ndevts - 1) - i); j++) {
995 			if (devts[j + 1] < devts[j]) {
996 				tdevt = devts[j];
997 				devts[j] = devts[j + 1];
998 				devts[j + 1] = tdevt;
999 			}
1000 		}
1001 	}
1002 
1003 	/* determine number of unique values */
1004 	for (undevts = ndevts, i = 1; i < ndevts; i++) {
1005 		if (devts[i - 1] == devts[i])
1006 			undevts--;
1007 	}
1008 
1009 	/* allocate unique */
1010 	udevts = kmem_alloc(undevts * sizeof (dev_t), KM_SLEEP);
1011 
1012 	/* copy unique */
1013 	udevts[0] = devts[0];
1014 	for (i = 1, j = 1; i < ndevts; i++) {
1015 		if (devts[i - 1] != devts[i])
1016 			udevts[j++] = devts[i];
1017 	}
1018 	ASSERT(j == undevts);
1019 
1020 	kmem_free(devts, ndevts_alloced * sizeof (dev_t));
1021 
1022 	*retndevts = undevts;
1023 	*retdevts = udevts;
1024 
1025 	return (DDI_SUCCESS);
1026 }
1027 
1028 void
1029 e_devid_cache_free_devt_list(int ndevts, dev_t *devt_list)
1030 {
1031 	kmem_free(devt_list, ndevts * sizeof (dev_t *));
1032 }
1033 
1034 /*
1035  * If given a full path and NULL ua, search for a cache entry
1036  * whose path matches the full path.  On a cache hit duplicate the
1037  * devid of the matched entry into the given devid (caller
1038  * must free);  nodenamebuf is not touched for this usage.
1039  *
1040  * Given a path and a non-NULL unit address, search the cache for any entry
1041  * matching "<path>/%@<unit-address>" where '%' is a wildcard meaning
1042  * any node name.  The path should not end a '/'.  On a cache hit
1043  * duplicate the devid as before (caller must free) and copy into
1044  * the caller-provided nodenamebuf (if not NULL) the nodename of the
1045  * matched entry.
1046  *
1047  * We must not make use of nvp_dip since that may be NULL for cached
1048  * entries that are not present in the current tree.
1049  */
1050 int
1051 e_devid_cache_path_to_devid(char *path, char *ua,
1052     char *nodenamebuf, ddi_devid_t *devidp)
1053 {
1054 	size_t pathlen, ualen;
1055 	int rv = DDI_FAILURE;
1056 	nvp_devid_t *np;
1057 	list_t *listp;
1058 	char *cand;
1059 
1060 	if (path == NULL || *path == '\0' || (ua && *ua == '\0') ||
1061 	    devidp == NULL)
1062 		return (DDI_FAILURE);
1063 
1064 	*devidp = NULL;
1065 
1066 	if (ua) {
1067 		pathlen = strlen(path);
1068 		ualen = strlen(ua);
1069 	}
1070 
1071 	rw_enter(nvf_lock(dcfd_handle), RW_READER);
1072 
1073 	listp = nvf_list(dcfd_handle);
1074 	for (np = list_head(listp); np; np = list_next(listp, np)) {
1075 		size_t nodelen, candlen, n;
1076 		ddi_devid_t devid_dup;
1077 		char *uasep, *node;
1078 
1079 		if (np->nvp_devid == NULL)
1080 			continue;
1081 
1082 		if (ddi_devid_valid(np->nvp_devid) != DDI_SUCCESS) {
1083 			DEVIDERR((CE_CONT,
1084 			    "pathsearch: invalid devid %s\n",
1085 			    np->nvp_devpath));
1086 			continue;
1087 		}
1088 
1089 		cand = np->nvp_devpath;		/* candidate path */
1090 
1091 		/* If a full pathname was provided the compare is easy */
1092 		if (ua == NULL) {
1093 			if (strcmp(cand, path) == 0)
1094 				goto match;
1095 			else
1096 				continue;
1097 		}
1098 
1099 		/*
1100 		 * The compare for initial path plus ua and unknown nodename
1101 		 * is trickier.
1102 		 *
1103 		 * Does the initial path component match 'path'?
1104 		 */
1105 		if (strncmp(path, cand, pathlen) != 0)
1106 			continue;
1107 
1108 		candlen = strlen(cand);
1109 
1110 		/*
1111 		 * The next character must be a '/' and there must be no
1112 		 * further '/' thereafter.  Begin by checking that the
1113 		 * candidate is long enough to include at mininum a
1114 		 * "/<nodename>@<ua>" after the initial portion already
1115 		 * matched assuming a nodename length of 1.
1116 		 */
1117 		if (candlen < pathlen + 1 + 1 + 1 + ualen ||
1118 		    cand[pathlen] != '/' ||
1119 		    strchr(cand + pathlen + 1, '/') != NULL)
1120 			continue;
1121 
1122 		node = cand + pathlen + 1;	/* <node>@<ua> string */
1123 
1124 		/*
1125 		 * Find the '@' before the unit address.  Check for
1126 		 * unit address match.
1127 		 */
1128 		if ((uasep = strchr(node, '@')) == NULL)
1129 			continue;
1130 
1131 		/*
1132 		 * Check we still have enough length and that ua matches
1133 		 */
1134 		nodelen = (uintptr_t)uasep - (uintptr_t)node;
1135 		if (candlen < pathlen + 1 + nodelen + 1 + ualen ||
1136 		    strncmp(ua, uasep + 1, ualen) != 0)
1137 			continue;
1138 match:
1139 		n = ddi_devid_sizeof(np->nvp_devid);
1140 		devid_dup = kmem_alloc(n, KM_SLEEP);	/* caller must free */
1141 		(void) bcopy(np->nvp_devid, devid_dup, n);
1142 		*devidp = devid_dup;
1143 
1144 		if (ua && nodenamebuf) {
1145 			(void) strncpy(nodenamebuf, node, nodelen);
1146 			nodenamebuf[nodelen] = '\0';
1147 		}
1148 
1149 		rv = DDI_SUCCESS;
1150 		break;
1151 	}
1152 
1153 	rw_exit(nvf_lock(dcfd_handle));
1154 
1155 	return (rv);
1156 }
1157 
1158 #ifdef	DEBUG
1159 static void
1160 devid_log(char *fmt, ddi_devid_t devid, char *path)
1161 {
1162 	char *devidstr = ddi_devid_str_encode(devid, NULL);
1163 	if (path) {
1164 		cmn_err(CE_CONT, "%s: %s %s\n", fmt, path, devidstr);
1165 	} else {
1166 		cmn_err(CE_CONT, "%s: %s\n", fmt, devidstr);
1167 	}
1168 	ddi_devid_str_free(devidstr);
1169 }
1170 #endif	/* DEBUG */
1171