xref: /titanic_44/usr/src/uts/common/os/devid_cache.c (revision f38cb554a534c6df738be3f4d23327e69888e634)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/note.h>
26 #include <sys/t_lock.h>
27 #include <sys/cmn_err.h>
28 #include <sys/instance.h>
29 #include <sys/conf.h>
30 #include <sys/stat.h>
31 #include <sys/ddi.h>
32 #include <sys/hwconf.h>
33 #include <sys/sunddi.h>
34 #include <sys/sunndi.h>
35 #include <sys/sunmdi.h>
36 #include <sys/ddi_impldefs.h>
37 #include <sys/ndi_impldefs.h>
38 #include <sys/kobj.h>
39 #include <sys/devcache.h>
40 #include <sys/devid_cache.h>
41 #include <sys/sysmacros.h>
42 
43 /*
44  * Discovery refers to the heroic effort made to discover a device which
45  * cannot be accessed at the physical path where it once resided.  Discovery
46  * involves walking the entire device tree attaching all possible disk
47  * instances, to search for the device referenced by a devid.  Obviously,
48  * full device discovery is something to be avoided where possible.
49  * Note that simply invoking devfsadm(1M) is equivalent to running full
50  * discovery at the devid cache level.
51  *
52  * Reasons why a disk may not be accessible:
53  *	disk powered off
54  *	disk removed or cable disconnected
55  *	disk or adapter broken
56  *
57  * Note that discovery is not needed and cannot succeed in any of these
58  * cases.
59  *
60  * When discovery may succeed:
61  *	Discovery will result in success when a device has been moved
62  *	to a different address.  Note that it's recommended that
63  *	devfsadm(1M) be invoked (no arguments required) whenever a system's
64  *	h/w configuration has been updated.  Alternatively, a
65  *	reconfiguration boot can be used to accomplish the same result.
66  *
67  * Note that discovery is not necessary to be able to correct an access
68  * failure for a device which was powered off.  Assuming the cache has an
69  * entry for such a device, simply powering it on should permit the system
70  * to access it.  If problems persist after powering it on, invoke
71  * devfsadm(1M).
72  *
73  * Discovery prior to mounting root is only of interest when booting
74  * from a filesystem which accesses devices by device id, which of
75  * not all do.
76  *
77  * Tunables
78  *
79  * devid_discovery_boot (default 1)
80  *	Number of times discovery will be attempted prior to mounting root.
81  *	Must be done at least once to recover from corrupted or missing
82  *	devid cache backing store.  Probably there's no reason to ever
83  *	set this to greater than one as a missing device will remain
84  *	unavailable no matter how often the system searches for it.
85  *
86  * devid_discovery_postboot (default 1)
87  *	Number of times discovery will be attempted after mounting root.
88  *	This must be performed at least once to discover any devices
89  *	needed after root is mounted which may have been powered
90  *	off and moved before booting.
91  *	Setting this to a larger positive number will introduce
92  *	some inconsistency in system operation.  Searching for a device
93  *	will take an indeterminate amount of time, sometimes slower,
94  *	sometimes faster.  In addition, the system will sometimes
95  *	discover a newly powered on device, sometimes it won't.
96  *	Use of this option is not therefore recommended.
97  *
98  * devid_discovery_postboot_always (default 0)
99  *	Set to 1, the system will always attempt full discovery.
100  *
101  * devid_discovery_secs (default 0)
102  *	Set to a positive value, the system will attempt full discovery
103  *	but with a minimum delay between attempts.  A device search
104  *	within the period of time specified will result in failure.
105  *
106  * devid_cache_read_disable (default 0)
107  *	Set to 1 to disable reading /etc/devices/devid_cache.
108  *	Devid cache will continue to operate normally but
109  *	at least one discovery attempt will be required.
110  *
111  * devid_cache_write_disable (default 0)
112  *	Set to 1 to disable updates to /etc/devices/devid_cache.
113  *	Any updates to the devid cache will not be preserved across a reboot.
114  *
115  * devid_report_error (default 0)
116  *	Set to 1 to enable some error messages related to devid
117  *	cache failures.
118  *
119  * The devid is packed in the cache file as a byte array.  For
120  * portability, this could be done in the encoded string format.
121  */
122 
123 
124 int devid_discovery_boot = 1;
125 int devid_discovery_postboot = 1;
126 int devid_discovery_postboot_always = 0;
127 int devid_discovery_secs = 0;
128 
129 int devid_cache_read_disable = 0;
130 int devid_cache_write_disable = 0;
131 
132 int devid_report_error = 0;
133 
134 
135 /*
136  * State to manage discovery of devices providing a devid
137  */
138 static int		devid_discovery_busy = 0;
139 static kmutex_t		devid_discovery_mutex;
140 static kcondvar_t	devid_discovery_cv;
141 static clock_t		devid_last_discovery = 0;
142 
143 
144 #ifdef	DEBUG
145 int nvp_devid_debug = 0;
146 int devid_debug = 0;
147 int devid_log_registers = 0;
148 int devid_log_finds = 0;
149 int devid_log_lookups = 0;
150 int devid_log_discovery = 0;
151 int devid_log_matches = 0;
152 int devid_log_paths = 0;
153 int devid_log_failures = 0;
154 int devid_log_hold = 0;
155 int devid_log_unregisters = 0;
156 int devid_log_removes = 0;
157 int devid_register_debug = 0;
158 int devid_log_stale = 0;
159 int devid_log_detaches = 0;
160 #endif	/* DEBUG */
161 
162 /*
163  * devid cache file registration for cache reads and updates
164  */
165 static nvf_ops_t devid_cache_ops = {
166 	"/etc/devices/devid_cache",		/* path to cache */
167 	devid_cache_unpack_nvlist,		/* read: nvlist to nvp */
168 	devid_cache_pack_list,			/* write: nvp to nvlist */
169 	devid_list_free,			/* free data list */
170 	NULL					/* write complete callback */
171 };
172 
173 /*
174  * handle to registered devid cache handlers
175  */
176 nvf_handle_t	dcfd_handle;
177 
178 
179 /*
180  * Initialize devid cache file management
181  */
182 void
183 devid_cache_init(void)
184 {
185 	dcfd_handle = nvf_register_file(&devid_cache_ops);
186 	ASSERT(dcfd_handle);
187 
188 	list_create(nvf_list(dcfd_handle), sizeof (nvp_devid_t),
189 	    offsetof(nvp_devid_t, nvp_link));
190 
191 	mutex_init(&devid_discovery_mutex, NULL, MUTEX_DEFAULT, NULL);
192 	cv_init(&devid_discovery_cv, NULL, CV_DRIVER, NULL);
193 }
194 
195 /*
196  * Read and initialize the devid cache from the persistent store
197  */
198 void
199 devid_cache_read(void)
200 {
201 	if (!devid_cache_read_disable) {
202 		rw_enter(nvf_lock(dcfd_handle), RW_WRITER);
203 		ASSERT(list_head(nvf_list(dcfd_handle)) == NULL);
204 		(void) nvf_read_file(dcfd_handle);
205 		rw_exit(nvf_lock(dcfd_handle));
206 	}
207 }
208 
209 static void
210 devid_nvp_free(nvp_devid_t *dp)
211 {
212 	if (dp->nvp_devpath)
213 		kmem_free(dp->nvp_devpath, strlen(dp->nvp_devpath)+1);
214 	if (dp->nvp_devid)
215 		kmem_free(dp->nvp_devid, ddi_devid_sizeof(dp->nvp_devid));
216 
217 	kmem_free(dp, sizeof (nvp_devid_t));
218 }
219 
220 static void
221 devid_list_free(nvf_handle_t fd)
222 {
223 	list_t		*listp;
224 	nvp_devid_t	*np;
225 
226 	ASSERT(RW_WRITE_HELD(nvf_lock(dcfd_handle)));
227 
228 	listp = nvf_list(fd);
229 	while (np = list_head(listp)) {
230 		list_remove(listp, np);
231 		devid_nvp_free(np);
232 	}
233 }
234 
235 /*
236  * Free an nvp element in a list
237  */
238 static void
239 devid_nvp_unlink_and_free(nvf_handle_t fd, nvp_devid_t *np)
240 {
241 	list_remove(nvf_list(fd), np);
242 	devid_nvp_free(np);
243 }
244 
245 /*
246  * Unpack a device path/nvlist pair to the list of devid cache elements.
247  * Used to parse the nvlist format when reading
248  * /etc/devices/devid_cache
249  */
250 static int
251 devid_cache_unpack_nvlist(nvf_handle_t fd, nvlist_t *nvl, char *name)
252 {
253 	nvp_devid_t *np;
254 	ddi_devid_t devidp;
255 	int rval;
256 	uint_t n;
257 
258 	NVP_DEVID_DEBUG_PATH((name));
259 	ASSERT(RW_WRITE_HELD(nvf_lock(dcfd_handle)));
260 
261 	/*
262 	 * check path for a devid
263 	 */
264 	rval = nvlist_lookup_byte_array(nvl,
265 	    DP_DEVID_ID, (uchar_t **)&devidp, &n);
266 	if (rval == 0) {
267 		if (ddi_devid_valid(devidp) == DDI_SUCCESS) {
268 			ASSERT(n == ddi_devid_sizeof(devidp));
269 			np = kmem_zalloc(sizeof (nvp_devid_t), KM_SLEEP);
270 			np->nvp_devpath = i_ddi_strdup(name, KM_SLEEP);
271 			np->nvp_devid = kmem_alloc(n, KM_SLEEP);
272 			(void) bcopy(devidp, np->nvp_devid, n);
273 			list_insert_tail(nvf_list(fd), np);
274 			NVP_DEVID_DEBUG_DEVID((np->nvp_devid));
275 		} else {
276 			DEVIDERR((CE_CONT,
277 			    "%s: invalid devid\n", name));
278 		}
279 	} else {
280 		DEVIDERR((CE_CONT,
281 		    "%s: devid not available\n", name));
282 	}
283 
284 	return (0);
285 }
286 
287 /*
288  * Pack the list of devid cache elements into a single nvlist
289  * Used when writing the nvlist file.
290  */
291 static int
292 devid_cache_pack_list(nvf_handle_t fd, nvlist_t **ret_nvl)
293 {
294 	nvlist_t	*nvl, *sub_nvl;
295 	nvp_devid_t	*np;
296 	int		rval;
297 	list_t		*listp;
298 
299 	ASSERT(RW_WRITE_HELD(nvf_lock(dcfd_handle)));
300 
301 	rval = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
302 	if (rval != 0) {
303 		nvf_error("%s: nvlist alloc error %d\n",
304 		    nvf_cache_name(fd), rval);
305 		return (DDI_FAILURE);
306 	}
307 
308 	listp = nvf_list(fd);
309 	for (np = list_head(listp); np; np = list_next(listp, np)) {
310 		if (np->nvp_devid == NULL)
311 			continue;
312 		NVP_DEVID_DEBUG_PATH(np->nvp_devpath);
313 		rval = nvlist_alloc(&sub_nvl, NV_UNIQUE_NAME, KM_SLEEP);
314 		if (rval != 0) {
315 			nvf_error("%s: nvlist alloc error %d\n",
316 			    nvf_cache_name(fd), rval);
317 			sub_nvl = NULL;
318 			goto err;
319 		}
320 
321 		rval = nvlist_add_byte_array(sub_nvl, DP_DEVID_ID,
322 		    (uchar_t *)np->nvp_devid,
323 		    ddi_devid_sizeof(np->nvp_devid));
324 		if (rval == 0) {
325 			NVP_DEVID_DEBUG_DEVID(np->nvp_devid);
326 		} else {
327 			nvf_error(
328 			    "%s: nvlist add error %d (devid)\n",
329 			    nvf_cache_name(fd), rval);
330 			goto err;
331 		}
332 
333 		rval = nvlist_add_nvlist(nvl, np->nvp_devpath, sub_nvl);
334 		if (rval != 0) {
335 			nvf_error("%s: nvlist add error %d (sublist)\n",
336 			    nvf_cache_name(fd), rval);
337 			goto err;
338 		}
339 		nvlist_free(sub_nvl);
340 	}
341 
342 	*ret_nvl = nvl;
343 	return (DDI_SUCCESS);
344 
345 err:
346 	if (sub_nvl)
347 		nvlist_free(sub_nvl);
348 	nvlist_free(nvl);
349 	*ret_nvl = NULL;
350 	return (DDI_FAILURE);
351 }
352 
353 static int
354 e_devid_do_discovery(void)
355 {
356 	ASSERT(mutex_owned(&devid_discovery_mutex));
357 
358 	if (i_ddi_io_initialized() == 0) {
359 		if (devid_discovery_boot > 0) {
360 			devid_discovery_boot--;
361 			return (1);
362 		}
363 	} else {
364 		if (devid_discovery_postboot_always > 0)
365 			return (1);
366 		if (devid_discovery_postboot > 0) {
367 			devid_discovery_postboot--;
368 			return (1);
369 		}
370 		if (devid_discovery_secs > 0) {
371 			if ((ddi_get_lbolt() - devid_last_discovery) >
372 			    drv_usectohz(devid_discovery_secs * MICROSEC)) {
373 				return (1);
374 			}
375 		}
376 	}
377 
378 	DEVID_LOG_DISC((CE_CONT, "devid_discovery: no discovery\n"));
379 	return (0);
380 }
381 
382 static void
383 e_ddi_devid_hold_by_major(major_t major)
384 {
385 	DEVID_LOG_DISC((CE_CONT,
386 	    "devid_discovery: ddi_hold_installed_driver %d\n", major));
387 
388 	if (ddi_hold_installed_driver(major) == NULL)
389 		return;
390 
391 	ddi_rele_driver(major);
392 }
393 
394 /* legacy support - see below */
395 static char *e_ddi_devid_hold_driver_list[] = { "sd", "ssd" };
396 
397 #define	N_DRIVERS_TO_HOLD	\
398 	(sizeof (e_ddi_devid_hold_driver_list) / sizeof (char *))
399 
400 static void
401 e_ddi_devid_hold_installed_driver(ddi_devid_t devid)
402 {
403 	impl_devid_t	*id = (impl_devid_t *)devid;
404 	major_t		major, hint_major;
405 	char		hint[DEVID_HINT_SIZE + 1];
406 	struct devnames	*dnp;
407 	char		**drvp;
408 	int		i;
409 
410 	/* Count non-null bytes */
411 	for (i = 0; i < DEVID_HINT_SIZE; i++)
412 		if (id->did_driver[i] == '\0')
413 			break;
414 
415 	/* Make a copy of the driver hint */
416 	bcopy(id->did_driver, hint, i);
417 	hint[i] = '\0';
418 
419 	/* search for the devid using the hint driver */
420 	hint_major = ddi_name_to_major(hint);
421 	if (hint_major != DDI_MAJOR_T_NONE) {
422 		e_ddi_devid_hold_by_major(hint_major);
423 	}
424 
425 	/*
426 	 * search for the devid with each driver declaring
427 	 * itself as a devid registrant.
428 	 */
429 	for (major = 0; major < devcnt; major++) {
430 		if (major == hint_major)
431 			continue;
432 		dnp = &devnamesp[major];
433 		if (dnp->dn_flags & DN_DEVID_REGISTRANT) {
434 			e_ddi_devid_hold_by_major(major);
435 		}
436 	}
437 
438 	/*
439 	 * Legacy support: may be removed once an upgrade mechanism
440 	 * for driver conf files is available.
441 	 */
442 	drvp = e_ddi_devid_hold_driver_list;
443 	for (i = 0; i < N_DRIVERS_TO_HOLD; i++, drvp++) {
444 		major = ddi_name_to_major(*drvp);
445 		if (major != DDI_MAJOR_T_NONE && major != hint_major) {
446 			e_ddi_devid_hold_by_major(major);
447 		}
448 	}
449 }
450 
451 /*
452  * Return success if discovery was attempted, to indicate
453  * that the desired device may now be available.
454  */
455 int
456 e_ddi_devid_discovery(ddi_devid_t devid)
457 {
458 	int flags;
459 	int rval = DDI_SUCCESS;
460 
461 	mutex_enter(&devid_discovery_mutex);
462 
463 	if (devid_discovery_busy) {
464 		DEVID_LOG_DISC((CE_CONT, "devid_discovery: busy\n"));
465 		while (devid_discovery_busy) {
466 			cv_wait(&devid_discovery_cv, &devid_discovery_mutex);
467 		}
468 	} else if (e_devid_do_discovery()) {
469 		devid_discovery_busy = 1;
470 		mutex_exit(&devid_discovery_mutex);
471 
472 		if (i_ddi_io_initialized() == 0) {
473 			e_ddi_devid_hold_installed_driver(devid);
474 		} else {
475 			DEVID_LOG_DISC((CE_CONT,
476 			    "devid_discovery: ndi_devi_config\n"));
477 			flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT;
478 			if (i_ddi_io_initialized())
479 				flags |= NDI_DRV_CONF_REPROBE;
480 			(void) ndi_devi_config(ddi_root_node(), flags);
481 		}
482 
483 		mutex_enter(&devid_discovery_mutex);
484 		devid_discovery_busy = 0;
485 		cv_broadcast(&devid_discovery_cv);
486 		if (devid_discovery_secs > 0)
487 			devid_last_discovery = ddi_get_lbolt();
488 		DEVID_LOG_DISC((CE_CONT, "devid_discovery: done\n"));
489 	} else {
490 		rval = DDI_FAILURE;
491 		DEVID_LOG_DISC((CE_CONT, "no devid discovery\n"));
492 	}
493 
494 	mutex_exit(&devid_discovery_mutex);
495 
496 	return (rval);
497 }
498 
499 /*
500  * As part of registering a devid for a device,
501  * update the devid cache with this device/devid pair
502  * or note that this combination has registered.
503  *
504  * If a devpath is provided it will be used as the path to register the
505  * devid against, otherwise we use ddi_pathname(dip).  In both cases
506  * we duplicate the path string so that it can be cached/freed indepdently
507  * of the original owner.
508  */
509 static int
510 e_devid_cache_register_cmn(dev_info_t *dip, ddi_devid_t devid, char *devpath)
511 {
512 	nvp_devid_t *np;
513 	nvp_devid_t *new_nvp;
514 	ddi_devid_t new_devid;
515 	int new_devid_size;
516 	char *path, *fullpath;
517 	ddi_devid_t free_devid = NULL;
518 	int pathlen;
519 	list_t *listp;
520 	int is_dirty = 0;
521 
522 
523 	ASSERT(ddi_devid_valid(devid) == DDI_SUCCESS);
524 
525 	if (devpath) {
526 		pathlen = strlen(devpath) + 1;
527 		path = kmem_alloc(pathlen, KM_SLEEP);
528 		bcopy(devpath, path, pathlen);
529 	} else {
530 		/*
531 		 * We are willing to accept DS_BOUND nodes if we can form a full
532 		 * ddi_pathname (i.e. the node is part way to becomming
533 		 * DS_INITIALIZED and devi_addr/ddi_get_name_addr are non-NULL).
534 		 */
535 		if (ddi_get_name_addr(dip) == NULL)
536 			return (DDI_FAILURE);
537 
538 		fullpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
539 		(void) ddi_pathname(dip, fullpath);
540 		pathlen = strlen(fullpath) + 1;
541 		path = kmem_alloc(pathlen, KM_SLEEP);
542 		bcopy(fullpath, path, pathlen);
543 		kmem_free(fullpath, MAXPATHLEN);
544 	}
545 
546 	DEVID_LOG_REG(("register", devid, path));
547 
548 	new_nvp = kmem_zalloc(sizeof (nvp_devid_t), KM_SLEEP);
549 	new_devid_size = ddi_devid_sizeof(devid);
550 	new_devid = kmem_alloc(new_devid_size, KM_SLEEP);
551 	(void) bcopy(devid, new_devid, new_devid_size);
552 
553 	rw_enter(nvf_lock(dcfd_handle), RW_WRITER);
554 
555 	listp = nvf_list(dcfd_handle);
556 	for (np = list_head(listp); np; np = list_next(listp, np)) {
557 		if (strcmp(path, np->nvp_devpath) == 0) {
558 			DEVID_DEBUG2((CE_CONT,
559 			    "register: %s path match\n", path));
560 			if (np->nvp_devid == NULL) {
561 replace:			np->nvp_devid = new_devid;
562 				np->nvp_flags |=
563 				    NVP_DEVID_DIP | NVP_DEVID_REGISTERED;
564 				np->nvp_dip = dip;
565 				if (!devid_cache_write_disable) {
566 					nvf_mark_dirty(dcfd_handle);
567 					is_dirty = 1;
568 				}
569 				rw_exit(nvf_lock(dcfd_handle));
570 				kmem_free(new_nvp, sizeof (nvp_devid_t));
571 				kmem_free(path, pathlen);
572 				goto exit;
573 			}
574 			if (ddi_devid_valid(np->nvp_devid) != DDI_SUCCESS) {
575 				/* replace invalid devid */
576 				free_devid = np->nvp_devid;
577 				goto replace;
578 			}
579 			/*
580 			 * We're registering an already-cached path
581 			 * Does the device's devid match the cache?
582 			 */
583 			if (ddi_devid_compare(devid, np->nvp_devid) != 0) {
584 				DEVID_DEBUG((CE_CONT, "devid register: "
585 				    "devid %s does not match\n", path));
586 				/*
587 				 * Replace cached devid for this path
588 				 * with newly registered devid.  A devid
589 				 * may map to multiple paths but one path
590 				 * should only map to one devid.
591 				 */
592 				devid_nvp_unlink_and_free(dcfd_handle, np);
593 				np = NULL;
594 				break;
595 			} else {
596 				DEVID_DEBUG2((CE_CONT,
597 				    "devid register: %s devid match\n", path));
598 				np->nvp_flags |=
599 				    NVP_DEVID_DIP | NVP_DEVID_REGISTERED;
600 				np->nvp_dip = dip;
601 				rw_exit(nvf_lock(dcfd_handle));
602 				kmem_free(new_nvp, sizeof (nvp_devid_t));
603 				kmem_free(path, pathlen);
604 				kmem_free(new_devid, new_devid_size);
605 				return (DDI_SUCCESS);
606 			}
607 		}
608 	}
609 
610 	/*
611 	 * Add newly registered devid to the cache
612 	 */
613 	ASSERT(np == NULL);
614 
615 	new_nvp->nvp_devpath = path;
616 	new_nvp->nvp_flags = NVP_DEVID_DIP | NVP_DEVID_REGISTERED;
617 	new_nvp->nvp_dip = dip;
618 	new_nvp->nvp_devid = new_devid;
619 
620 	if (!devid_cache_write_disable) {
621 		is_dirty = 1;
622 		nvf_mark_dirty(dcfd_handle);
623 	}
624 	list_insert_tail(nvf_list(dcfd_handle), new_nvp);
625 
626 	rw_exit(nvf_lock(dcfd_handle));
627 
628 exit:
629 	if (free_devid)
630 		kmem_free(free_devid, ddi_devid_sizeof(free_devid));
631 
632 	if (is_dirty)
633 		nvf_wake_daemon();
634 
635 	return (DDI_SUCCESS);
636 }
637 
638 int
639 e_devid_cache_register(dev_info_t *dip, ddi_devid_t devid)
640 {
641 	return (e_devid_cache_register_cmn(dip, devid, NULL));
642 }
643 
644 /*
645  * Unregister a device's devid; the devinfo may hit on multiple entries
646  * arising from both pHCI and vHCI paths.
647  * Called as an instance detachs.
648  * Invalidate the devid's devinfo reference.
649  * Devid-path remains in the cache.
650  */
651 
652 void
653 e_devid_cache_unregister(dev_info_t *dip)
654 {
655 	nvp_devid_t *np;
656 	list_t *listp;
657 
658 	rw_enter(nvf_lock(dcfd_handle), RW_WRITER);
659 
660 	listp = nvf_list(dcfd_handle);
661 	for (np = list_head(listp); np; np = list_next(listp, np)) {
662 		if (np->nvp_devid == NULL)
663 			continue;
664 		if ((np->nvp_flags & NVP_DEVID_DIP) && np->nvp_dip == dip) {
665 			DEVID_LOG_UNREG((CE_CONT,
666 			    "unregister: %s\n", np->nvp_devpath));
667 			np->nvp_flags &= ~NVP_DEVID_DIP;
668 			np->nvp_dip = NULL;
669 		}
670 	}
671 
672 	rw_exit(nvf_lock(dcfd_handle));
673 }
674 
675 int
676 e_devid_cache_pathinfo(mdi_pathinfo_t *pip, ddi_devid_t devid)
677 {
678 	char *path = mdi_pi_pathname(pip);
679 
680 	return (e_devid_cache_register_cmn(mdi_pi_get_client(pip), devid,
681 	    path));
682 }
683 
684 /*
685  * Purge devid cache of stale devids
686  */
687 void
688 devid_cache_cleanup(void)
689 {
690 	nvp_devid_t *np, *next;
691 	list_t *listp;
692 	int is_dirty = 0;
693 
694 	rw_enter(nvf_lock(dcfd_handle), RW_WRITER);
695 
696 	listp = nvf_list(dcfd_handle);
697 	for (np = list_head(listp); np; np = next) {
698 		next = list_next(listp, np);
699 		if (np->nvp_devid == NULL)
700 			continue;
701 		if ((np->nvp_flags & NVP_DEVID_REGISTERED) == 0) {
702 			DEVID_LOG_REMOVE((CE_CONT,
703 			    "cleanup: %s\n", np->nvp_devpath));
704 			if (!devid_cache_write_disable) {
705 				nvf_mark_dirty(dcfd_handle);
706 				is_dirty = 0;
707 			}
708 			devid_nvp_unlink_and_free(dcfd_handle, np);
709 		}
710 	}
711 
712 	rw_exit(nvf_lock(dcfd_handle));
713 
714 	if (is_dirty)
715 		nvf_wake_daemon();
716 }
717 
718 
719 /*
720  * Build a list of dev_t's for a device/devid
721  *
722  * The effect of this function is cumulative, adding dev_t's
723  * for the device to the list of all dev_t's for a given
724  * devid.
725  */
726 static void
727 e_devid_minor_to_devlist(
728 	dev_info_t	*dip,
729 	char		*minor_name,
730 	int		ndevts_alloced,
731 	int		*devtcntp,
732 	dev_t		*devtsp)
733 {
734 	int			circ;
735 	struct ddi_minor_data	*dmdp;
736 	int			minor_all = 0;
737 	int			ndevts = *devtcntp;
738 
739 	ASSERT(i_ddi_devi_attached(dip));
740 
741 	/* are we looking for a set of minor nodes? */
742 	if ((minor_name == DEVID_MINOR_NAME_ALL) ||
743 	    (minor_name == DEVID_MINOR_NAME_ALL_CHR) ||
744 	    (minor_name == DEVID_MINOR_NAME_ALL_BLK))
745 		minor_all = 1;
746 
747 	/* Find matching minor names */
748 	ndi_devi_enter(dip, &circ);
749 	for (dmdp = DEVI(dip)->devi_minor; dmdp; dmdp = dmdp->next) {
750 
751 		/* Skip non-minors, and non matching minor names */
752 		if ((dmdp->type != DDM_MINOR) || ((minor_all == 0) &&
753 		    strcmp(dmdp->ddm_name, minor_name)))
754 			continue;
755 
756 		/* filter out minor_all mismatches */
757 		if (minor_all &&
758 		    (((minor_name == DEVID_MINOR_NAME_ALL_CHR) &&
759 		    (dmdp->ddm_spec_type != S_IFCHR)) ||
760 		    ((minor_name == DEVID_MINOR_NAME_ALL_BLK) &&
761 		    (dmdp->ddm_spec_type != S_IFBLK))))
762 			continue;
763 
764 		if (ndevts < ndevts_alloced)
765 			devtsp[ndevts] = dmdp->ddm_dev;
766 		ndevts++;
767 	}
768 	ndi_devi_exit(dip, circ);
769 
770 	*devtcntp = ndevts;
771 }
772 
773 /*
774  * Search for cached entries matching a devid
775  * Return two lists:
776  *	a list of dev_info nodes, for those devices in the attached state
777  *	a list of pathnames whose instances registered the given devid
778  * If the lists passed in are not sufficient to return the matching
779  * references, return the size of lists required.
780  * The dev_info nodes are returned with a hold that the caller must release.
781  */
782 static int
783 e_devid_cache_devi_path_lists(ddi_devid_t devid, int retmax,
784 	int *retndevis, dev_info_t **retdevis, int *retnpaths, char **retpaths)
785 {
786 	nvp_devid_t *np;
787 	int ndevis, npaths;
788 	dev_info_t *dip, *pdip;
789 	int circ;
790 	int maxdevis = 0;
791 	int maxpaths = 0;
792 	list_t *listp;
793 
794 	ndevis = 0;
795 	npaths = 0;
796 	listp = nvf_list(dcfd_handle);
797 	for (np = list_head(listp); np; np = list_next(listp, np)) {
798 		if (np->nvp_devid == NULL)
799 			continue;
800 		if (ddi_devid_valid(np->nvp_devid) != DDI_SUCCESS) {
801 			DEVIDERR((CE_CONT,
802 			    "find: invalid devid %s\n",
803 			    np->nvp_devpath));
804 			continue;
805 		}
806 		if (ddi_devid_compare(devid, np->nvp_devid) == 0) {
807 			DEVID_DEBUG2((CE_CONT,
808 			    "find: devid match: %s 0x%x\n",
809 			    np->nvp_devpath, np->nvp_flags));
810 			DEVID_LOG_MATCH(("find", devid, np->nvp_devpath));
811 			DEVID_LOG_PATHS((CE_CONT, "%s\n", np->nvp_devpath));
812 
813 			/*
814 			 * Check if we have a cached devinfo reference for this
815 			 * devid.  Place a hold on it to prevent detach
816 			 * Otherwise, use the path instead.
817 			 * Note: returns with a hold on each dev_info
818 			 * node in the list.
819 			 */
820 			dip = NULL;
821 			if (np->nvp_flags & NVP_DEVID_DIP) {
822 				pdip = ddi_get_parent(np->nvp_dip);
823 				if (ndi_devi_tryenter(pdip, &circ)) {
824 					dip = np->nvp_dip;
825 					ndi_hold_devi(dip);
826 					ndi_devi_exit(pdip, circ);
827 					ASSERT(!DEVI_IS_ATTACHING(dip));
828 					ASSERT(!DEVI_IS_DETACHING(dip));
829 				} else {
830 					DEVID_LOG_DETACH((CE_CONT,
831 					    "may be detaching: %s\n",
832 					    np->nvp_devpath));
833 				}
834 			}
835 
836 			if (dip) {
837 				if (ndevis < retmax) {
838 					retdevis[ndevis++] = dip;
839 				} else {
840 					ndi_rele_devi(dip);
841 				}
842 				maxdevis++;
843 			} else {
844 				if (npaths < retmax)
845 					retpaths[npaths++] = np->nvp_devpath;
846 				maxpaths++;
847 			}
848 		}
849 	}
850 
851 	*retndevis = ndevis;
852 	*retnpaths = npaths;
853 	return (maxdevis > maxpaths ? maxdevis : maxpaths);
854 }
855 
856 
857 /*
858  * Search the devid cache, returning dev_t list for all
859  * device paths mapping to the device identified by the
860  * given devid.
861  *
862  * Primary interface used by ddi_lyr_devid_to_devlist()
863  */
864 int
865 e_devid_cache_to_devt_list(ddi_devid_t devid, char *minor_name,
866 	int *retndevts, dev_t **retdevts)
867 {
868 	char		*path, **paths;
869 	int		i, j, n;
870 	dev_t		*devts, *udevts;
871 	dev_t		tdevt;
872 	int		ndevts, undevts, ndevts_alloced;
873 	dev_info_t	*devi, **devis;
874 	int		ndevis, npaths, nalloced;
875 	ddi_devid_t	match_devid;
876 
877 	DEVID_LOG_FIND(("find", devid, NULL));
878 
879 	ASSERT(ddi_devid_valid(devid) == DDI_SUCCESS);
880 	if (ddi_devid_valid(devid) != DDI_SUCCESS) {
881 		DEVID_LOG_ERR(("invalid devid", devid, NULL));
882 		return (DDI_FAILURE);
883 	}
884 
885 	nalloced = 128;
886 
887 	for (;;) {
888 		paths = kmem_zalloc(nalloced * sizeof (char *), KM_SLEEP);
889 		devis = kmem_zalloc(nalloced * sizeof (dev_info_t *), KM_SLEEP);
890 
891 		rw_enter(nvf_lock(dcfd_handle), RW_READER);
892 		n = e_devid_cache_devi_path_lists(devid, nalloced,
893 		    &ndevis, devis, &npaths, paths);
894 		if (n <= nalloced)
895 			break;
896 		rw_exit(nvf_lock(dcfd_handle));
897 		for (i = 0; i < ndevis; i++)
898 			ndi_rele_devi(devis[i]);
899 		kmem_free(paths, nalloced * sizeof (char *));
900 		kmem_free(devis, nalloced * sizeof (dev_info_t *));
901 		nalloced = n + 128;
902 	}
903 
904 	for (i = 0; i < npaths; i++) {
905 		path = i_ddi_strdup(paths[i], KM_SLEEP);
906 		paths[i] = path;
907 	}
908 	rw_exit(nvf_lock(dcfd_handle));
909 
910 	if (ndevis == 0 && npaths == 0) {
911 		DEVID_LOG_ERR(("no devid found", devid, NULL));
912 		kmem_free(paths, nalloced * sizeof (char *));
913 		kmem_free(devis, nalloced * sizeof (dev_info_t *));
914 		return (DDI_FAILURE);
915 	}
916 
917 	ndevts_alloced = 128;
918 restart:
919 	ndevts = 0;
920 	devts = kmem_alloc(ndevts_alloced * sizeof (dev_t), KM_SLEEP);
921 	for (i = 0; i < ndevis; i++) {
922 		ASSERT(!DEVI_IS_ATTACHING(devis[i]));
923 		ASSERT(!DEVI_IS_DETACHING(devis[i]));
924 		e_devid_minor_to_devlist(devis[i], minor_name,
925 		    ndevts_alloced, &ndevts, devts);
926 		if (ndevts > ndevts_alloced) {
927 			kmem_free(devts, ndevts_alloced * sizeof (dev_t));
928 			ndevts_alloced += 128;
929 			goto restart;
930 		}
931 	}
932 	for (i = 0; i < npaths; i++) {
933 		DEVID_LOG_LOOKUP((CE_CONT, "lookup %s\n", paths[i]));
934 		devi = e_ddi_hold_devi_by_path(paths[i], 0);
935 		if (devi == NULL) {
936 			DEVID_LOG_STALE(("stale device reference",
937 			    devid, paths[i]));
938 			continue;
939 		}
940 		/*
941 		 * Verify the newly attached device registered a matching devid
942 		 */
943 		if (i_ddi_devi_get_devid(DDI_DEV_T_ANY, devi,
944 		    &match_devid) != DDI_SUCCESS) {
945 			DEVIDERR((CE_CONT,
946 			    "%s: no devid registered on attach\n",
947 			    paths[i]));
948 			ddi_release_devi(devi);
949 			continue;
950 		}
951 
952 		if (ddi_devid_compare(devid, match_devid) != 0) {
953 			DEVID_LOG_STALE(("new devid registered",
954 			    devid, paths[i]));
955 			ddi_release_devi(devi);
956 			ddi_devid_free(match_devid);
957 			continue;
958 		}
959 		ddi_devid_free(match_devid);
960 
961 		e_devid_minor_to_devlist(devi, minor_name,
962 		    ndevts_alloced, &ndevts, devts);
963 		ddi_release_devi(devi);
964 		if (ndevts > ndevts_alloced) {
965 			kmem_free(devts,
966 			    ndevts_alloced * sizeof (dev_t));
967 			ndevts_alloced += 128;
968 			goto restart;
969 		}
970 	}
971 
972 	/* drop hold from e_devid_cache_devi_path_lists */
973 	for (i = 0; i < ndevis; i++) {
974 		ndi_rele_devi(devis[i]);
975 	}
976 	for (i = 0; i < npaths; i++) {
977 		kmem_free(paths[i], strlen(paths[i]) + 1);
978 	}
979 	kmem_free(paths, nalloced * sizeof (char *));
980 	kmem_free(devis, nalloced * sizeof (dev_info_t *));
981 
982 	if (ndevts == 0) {
983 		DEVID_LOG_ERR(("no devid found", devid, NULL));
984 		kmem_free(devts, ndevts_alloced * sizeof (dev_t));
985 		return (DDI_FAILURE);
986 	}
987 
988 	/*
989 	 * Build the final list of sorted dev_t's with duplicates collapsed so
990 	 * returned results are consistent. This prevents implementation
991 	 * artifacts from causing unnecessary changes in SVM namespace.
992 	 */
993 	/* bubble sort */
994 	for (i = 0; i < (ndevts - 1); i++) {
995 		for (j = 0; j < ((ndevts - 1) - i); j++) {
996 			if (devts[j + 1] < devts[j]) {
997 				tdevt = devts[j];
998 				devts[j] = devts[j + 1];
999 				devts[j + 1] = tdevt;
1000 			}
1001 		}
1002 	}
1003 
1004 	/* determine number of unique values */
1005 	for (undevts = ndevts, i = 1; i < ndevts; i++) {
1006 		if (devts[i - 1] == devts[i])
1007 			undevts--;
1008 	}
1009 
1010 	/* allocate unique */
1011 	udevts = kmem_alloc(undevts * sizeof (dev_t), KM_SLEEP);
1012 
1013 	/* copy unique */
1014 	udevts[0] = devts[0];
1015 	for (i = 1, j = 1; i < ndevts; i++) {
1016 		if (devts[i - 1] != devts[i])
1017 			udevts[j++] = devts[i];
1018 	}
1019 	ASSERT(j == undevts);
1020 
1021 	kmem_free(devts, ndevts_alloced * sizeof (dev_t));
1022 
1023 	*retndevts = undevts;
1024 	*retdevts = udevts;
1025 
1026 	return (DDI_SUCCESS);
1027 }
1028 
1029 void
1030 e_devid_cache_free_devt_list(int ndevts, dev_t *devt_list)
1031 {
1032 	kmem_free(devt_list, ndevts * sizeof (dev_t *));
1033 }
1034 
1035 /*
1036  * If given a full path and NULL ua, search for a cache entry
1037  * whose path matches the full path.  On a cache hit duplicate the
1038  * devid of the matched entry into the given devid (caller
1039  * must free);  nodenamebuf is not touched for this usage.
1040  *
1041  * Given a path and a non-NULL unit address, search the cache for any entry
1042  * matching "<path>/%@<unit-address>" where '%' is a wildcard meaning
1043  * any node name.  The path should not end a '/'.  On a cache hit
1044  * duplicate the devid as before (caller must free) and copy into
1045  * the caller-provided nodenamebuf (if not NULL) the nodename of the
1046  * matched entry.
1047  *
1048  * We must not make use of nvp_dip since that may be NULL for cached
1049  * entries that are not present in the current tree.
1050  */
1051 int
1052 e_devid_cache_path_to_devid(char *path, char *ua,
1053     char *nodenamebuf, ddi_devid_t *devidp)
1054 {
1055 	size_t pathlen, ualen;
1056 	int rv = DDI_FAILURE;
1057 	nvp_devid_t *np;
1058 	list_t *listp;
1059 	char *cand;
1060 
1061 	if (path == NULL || *path == '\0' || (ua && *ua == '\0') ||
1062 	    devidp == NULL)
1063 		return (DDI_FAILURE);
1064 
1065 	*devidp = NULL;
1066 
1067 	if (ua) {
1068 		pathlen = strlen(path);
1069 		ualen = strlen(ua);
1070 	}
1071 
1072 	rw_enter(nvf_lock(dcfd_handle), RW_READER);
1073 
1074 	listp = nvf_list(dcfd_handle);
1075 	for (np = list_head(listp); np; np = list_next(listp, np)) {
1076 		size_t nodelen, candlen, n;
1077 		ddi_devid_t devid_dup;
1078 		char *uasep, *node;
1079 
1080 		if (np->nvp_devid == NULL)
1081 			continue;
1082 
1083 		if (ddi_devid_valid(np->nvp_devid) != DDI_SUCCESS) {
1084 			DEVIDERR((CE_CONT,
1085 			    "pathsearch: invalid devid %s\n",
1086 			    np->nvp_devpath));
1087 			continue;
1088 		}
1089 
1090 		cand = np->nvp_devpath;		/* candidate path */
1091 
1092 		/* If a full pathname was provided the compare is easy */
1093 		if (ua == NULL) {
1094 			if (strcmp(cand, path) == 0)
1095 				goto match;
1096 			else
1097 				continue;
1098 		}
1099 
1100 		/*
1101 		 * The compare for initial path plus ua and unknown nodename
1102 		 * is trickier.
1103 		 *
1104 		 * Does the initial path component match 'path'?
1105 		 */
1106 		if (strncmp(path, cand, pathlen) != 0)
1107 			continue;
1108 
1109 		candlen = strlen(cand);
1110 
1111 		/*
1112 		 * The next character must be a '/' and there must be no
1113 		 * further '/' thereafter.  Begin by checking that the
1114 		 * candidate is long enough to include at mininum a
1115 		 * "/<nodename>@<ua>" after the initial portion already
1116 		 * matched assuming a nodename length of 1.
1117 		 */
1118 		if (candlen < pathlen + 1 + 1 + 1 + ualen ||
1119 		    cand[pathlen] != '/' ||
1120 		    strchr(cand + pathlen + 1, '/') != NULL)
1121 			continue;
1122 
1123 		node = cand + pathlen + 1;	/* <node>@<ua> string */
1124 
1125 		/*
1126 		 * Find the '@' before the unit address.  Check for
1127 		 * unit address match.
1128 		 */
1129 		if ((uasep = strchr(node, '@')) == NULL)
1130 			continue;
1131 
1132 		/*
1133 		 * Check we still have enough length and that ua matches
1134 		 */
1135 		nodelen = (uintptr_t)uasep - (uintptr_t)node;
1136 		if (candlen < pathlen + 1 + nodelen + 1 + ualen ||
1137 		    strncmp(ua, uasep + 1, ualen) != 0)
1138 			continue;
1139 match:
1140 		n = ddi_devid_sizeof(np->nvp_devid);
1141 		devid_dup = kmem_alloc(n, KM_SLEEP);	/* caller must free */
1142 		(void) bcopy(np->nvp_devid, devid_dup, n);
1143 		*devidp = devid_dup;
1144 
1145 		if (ua && nodenamebuf) {
1146 			(void) strncpy(nodenamebuf, node, nodelen);
1147 			nodenamebuf[nodelen] = '\0';
1148 		}
1149 
1150 		rv = DDI_SUCCESS;
1151 		break;
1152 	}
1153 
1154 	rw_exit(nvf_lock(dcfd_handle));
1155 
1156 	return (rv);
1157 }
1158 
1159 #ifdef	DEBUG
1160 static void
1161 devid_log(char *fmt, ddi_devid_t devid, char *path)
1162 {
1163 	char *devidstr = ddi_devid_str_encode(devid, NULL);
1164 	if (path) {
1165 		cmn_err(CE_CONT, "%s: %s %s\n", fmt, path, devidstr);
1166 	} else {
1167 		cmn_err(CE_CONT, "%s: %s\n", fmt, devidstr);
1168 	}
1169 	ddi_devid_str_free(devidstr);
1170 }
1171 #endif	/* DEBUG */
1172