xref: /illumos-gate/usr/src/uts/common/os/devid_cache.c (revision 2e837a72011f54762249b6612c2a64f171efcd43)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2018 by Delphix. All rights reserved.
24  */
25 
26 #include <sys/note.h>
27 #include <sys/t_lock.h>
28 #include <sys/cmn_err.h>
29 #include <sys/instance.h>
30 #include <sys/conf.h>
31 #include <sys/stat.h>
32 #include <sys/ddi.h>
33 #include <sys/hwconf.h>
34 #include <sys/sunddi.h>
35 #include <sys/sunndi.h>
36 #include <sys/sunmdi.h>
37 #include <sys/ddi_impldefs.h>
38 #include <sys/ndi_impldefs.h>
39 #include <sys/kobj.h>
40 #include <sys/devcache.h>
41 #include <sys/devid_cache.h>
42 #include <sys/sysmacros.h>
43 
44 /*
45  * Discovery refers to the heroic effort made to discover a device which
46  * cannot be accessed at the physical path where it once resided.  Discovery
47  * involves walking the entire device tree attaching all possible disk
48  * instances, to search for the device referenced by a devid.  Obviously,
49  * full device discovery is something to be avoided where possible.
50  * Note that simply invoking devfsadm(1M) is equivalent to running full
51  * discovery at the devid cache level.
52  *
53  * Reasons why a disk may not be accessible:
54  *	disk powered off
55  *	disk removed or cable disconnected
56  *	disk or adapter broken
57  *
58  * Note that discovery is not needed and cannot succeed in any of these
59  * cases.
60  *
61  * When discovery may succeed:
62  *	Discovery will result in success when a device has been moved
63  *	to a different address.  Note that it's recommended that
64  *	devfsadm(1M) be invoked (no arguments required) whenever a system's
65  *	h/w configuration has been updated.  Alternatively, a
66  *	reconfiguration boot can be used to accomplish the same result.
67  *
68  * Note that discovery is not necessary to be able to correct an access
69  * failure for a device which was powered off.  Assuming the cache has an
70  * entry for such a device, simply powering it on should permit the system
71  * to access it.  If problems persist after powering it on, invoke
72  * devfsadm(1M).
73  *
74  * Discovery prior to mounting root is only of interest when booting
75  * from a filesystem which accesses devices by device id, which of
76  * not all do.
77  *
78  * Tunables
79  *
80  * devid_discovery_boot (default 1)
81  *	Number of times discovery will be attempted prior to mounting root.
82  *	Must be done at least once to recover from corrupted or missing
83  *	devid cache backing store.  Probably there's no reason to ever
84  *	set this to greater than one as a missing device will remain
85  *	unavailable no matter how often the system searches for it.
86  *
87  * devid_discovery_postboot (default 1)
88  *	Number of times discovery will be attempted after mounting root.
89  *	This must be performed at least once to discover any devices
90  *	needed after root is mounted which may have been powered
91  *	off and moved before booting.
92  *	Setting this to a larger positive number will introduce
93  *	some inconsistency in system operation.  Searching for a device
94  *	will take an indeterminate amount of time, sometimes slower,
95  *	sometimes faster.  In addition, the system will sometimes
96  *	discover a newly powered on device, sometimes it won't.
97  *	Use of this option is not therefore recommended.
98  *
99  * devid_discovery_postboot_always (default 0)
100  *	Set to 1, the system will always attempt full discovery.
101  *
102  * devid_discovery_secs (default 0)
103  *	Set to a positive value, the system will attempt full discovery
104  *	but with a minimum delay between attempts.  A device search
105  *	within the period of time specified will result in failure.
106  *
107  * devid_cache_read_disable (default 0)
108  *	Set to 1 to disable reading /etc/devices/devid_cache.
109  *	Devid cache will continue to operate normally but
110  *	at least one discovery attempt will be required.
111  *
112  * devid_cache_write_disable (default 0)
113  *	Set to 1 to disable updates to /etc/devices/devid_cache.
114  *	Any updates to the devid cache will not be preserved across a reboot.
115  *
116  * devid_report_error (default 0)
117  *	Set to 1 to enable some error messages related to devid
118  *	cache failures.
119  *
120  * The devid is packed in the cache file as a byte array.  For
121  * portability, this could be done in the encoded string format.
122  */
123 
124 
125 int devid_discovery_boot = 1;
126 int devid_discovery_postboot = 1;
127 int devid_discovery_postboot_always = 0;
128 int devid_discovery_secs = 0;
129 
130 int devid_cache_read_disable = 0;
131 int devid_cache_write_disable = 0;
132 
133 int devid_report_error = 0;
134 
135 
136 /*
137  * State to manage discovery of devices providing a devid
138  */
139 static int		devid_discovery_busy = 0;
140 static kmutex_t		devid_discovery_mutex;
141 static kcondvar_t	devid_discovery_cv;
142 static clock_t		devid_last_discovery = 0;
143 
144 
145 #ifdef	DEBUG
146 int nvp_devid_debug = 0;
147 int devid_debug = 0;
148 int devid_log_registers = 0;
149 int devid_log_finds = 0;
150 int devid_log_lookups = 0;
151 int devid_log_discovery = 0;
152 int devid_log_matches = 0;
153 int devid_log_paths = 0;
154 int devid_log_failures = 0;
155 int devid_log_hold = 0;
156 int devid_log_unregisters = 0;
157 int devid_log_removes = 0;
158 int devid_register_debug = 0;
159 int devid_log_stale = 0;
160 int devid_log_detaches = 0;
161 #endif	/* DEBUG */
162 
163 /*
164  * devid cache file registration for cache reads and updates
165  */
166 static nvf_ops_t devid_cache_ops = {
167 	"/etc/devices/devid_cache",		/* path to cache */
168 	devid_cache_unpack_nvlist,		/* read: nvlist to nvp */
169 	devid_cache_pack_list,			/* write: nvp to nvlist */
170 	devid_list_free,			/* free data list */
171 	NULL					/* write complete callback */
172 };
173 
174 /*
175  * handle to registered devid cache handlers
176  */
177 nvf_handle_t	dcfd_handle;
178 
179 
180 /*
181  * Initialize devid cache file management
182  */
183 void
184 devid_cache_init(void)
185 {
186 	dcfd_handle = nvf_register_file(&devid_cache_ops);
187 	ASSERT(dcfd_handle);
188 
189 	list_create(nvf_list(dcfd_handle), sizeof (nvp_devid_t),
190 	    offsetof(nvp_devid_t, nvp_link));
191 
192 	mutex_init(&devid_discovery_mutex, NULL, MUTEX_DEFAULT, NULL);
193 	cv_init(&devid_discovery_cv, NULL, CV_DRIVER, NULL);
194 }
195 
196 /*
197  * Read and initialize the devid cache from the persistent store
198  */
199 void
200 devid_cache_read(void)
201 {
202 	if (!devid_cache_read_disable) {
203 		rw_enter(nvf_lock(dcfd_handle), RW_WRITER);
204 		ASSERT(list_head(nvf_list(dcfd_handle)) == NULL);
205 		(void) nvf_read_file(dcfd_handle);
206 		rw_exit(nvf_lock(dcfd_handle));
207 	}
208 }
209 
210 static void
211 devid_nvp_free(nvp_devid_t *dp)
212 {
213 	if (dp->nvp_devpath)
214 		kmem_free(dp->nvp_devpath, strlen(dp->nvp_devpath)+1);
215 	if (dp->nvp_devid)
216 		kmem_free(dp->nvp_devid, ddi_devid_sizeof(dp->nvp_devid));
217 
218 	kmem_free(dp, sizeof (nvp_devid_t));
219 }
220 
221 static void
222 devid_list_free(nvf_handle_t fd)
223 {
224 	list_t		*listp;
225 	nvp_devid_t	*np;
226 
227 	ASSERT(RW_WRITE_HELD(nvf_lock(dcfd_handle)));
228 
229 	listp = nvf_list(fd);
230 	while (np = list_head(listp)) {
231 		list_remove(listp, np);
232 		devid_nvp_free(np);
233 	}
234 }
235 
236 /*
237  * Free an nvp element in a list
238  */
239 static void
240 devid_nvp_unlink_and_free(nvf_handle_t fd, nvp_devid_t *np)
241 {
242 	list_remove(nvf_list(fd), np);
243 	devid_nvp_free(np);
244 }
245 
246 /*
247  * Unpack a device path/nvlist pair to the list of devid cache elements.
248  * Used to parse the nvlist format when reading
249  * /etc/devices/devid_cache
250  */
251 static int
252 devid_cache_unpack_nvlist(nvf_handle_t fd, nvlist_t *nvl, char *name)
253 {
254 	nvp_devid_t *np;
255 	ddi_devid_t devidp;
256 	int rval;
257 	uint_t n;
258 
259 	NVP_DEVID_DEBUG_PATH((name));
260 	ASSERT(RW_WRITE_HELD(nvf_lock(dcfd_handle)));
261 
262 	/*
263 	 * check path for a devid
264 	 */
265 	rval = nvlist_lookup_byte_array(nvl,
266 	    DP_DEVID_ID, (uchar_t **)&devidp, &n);
267 	if (rval == 0) {
268 		if (ddi_devid_valid(devidp) == DDI_SUCCESS) {
269 			ASSERT(n == ddi_devid_sizeof(devidp));
270 			np = kmem_zalloc(sizeof (nvp_devid_t), KM_SLEEP);
271 			np->nvp_devpath = i_ddi_strdup(name, KM_SLEEP);
272 			np->nvp_devid = kmem_alloc(n, KM_SLEEP);
273 			(void) bcopy(devidp, np->nvp_devid, n);
274 			list_insert_tail(nvf_list(fd), np);
275 			NVP_DEVID_DEBUG_DEVID((np->nvp_devid));
276 		} else {
277 			DEVIDERR((CE_CONT,
278 			    "%s: invalid devid\n", name));
279 		}
280 	} else {
281 		DEVIDERR((CE_CONT,
282 		    "%s: devid not available\n", name));
283 	}
284 
285 	return (0);
286 }
287 
288 /*
289  * Pack the list of devid cache elements into a single nvlist
290  * Used when writing the nvlist file.
291  */
292 static int
293 devid_cache_pack_list(nvf_handle_t fd, nvlist_t **ret_nvl)
294 {
295 	nvlist_t	*nvl, *sub_nvl;
296 	nvp_devid_t	*np;
297 	int		rval;
298 	list_t		*listp;
299 
300 	ASSERT(RW_WRITE_HELD(nvf_lock(dcfd_handle)));
301 
302 	rval = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
303 	if (rval != 0) {
304 		nvf_error("%s: nvlist alloc error %d\n",
305 		    nvf_cache_name(fd), rval);
306 		return (DDI_FAILURE);
307 	}
308 
309 	listp = nvf_list(fd);
310 	for (np = list_head(listp); np; np = list_next(listp, np)) {
311 		if (np->nvp_devid == NULL)
312 			continue;
313 		NVP_DEVID_DEBUG_PATH(np->nvp_devpath);
314 		rval = nvlist_alloc(&sub_nvl, NV_UNIQUE_NAME, KM_SLEEP);
315 		if (rval != 0) {
316 			nvf_error("%s: nvlist alloc error %d\n",
317 			    nvf_cache_name(fd), rval);
318 			sub_nvl = NULL;
319 			goto err;
320 		}
321 
322 		rval = nvlist_add_byte_array(sub_nvl, DP_DEVID_ID,
323 		    (uchar_t *)np->nvp_devid,
324 		    ddi_devid_sizeof(np->nvp_devid));
325 		if (rval == 0) {
326 			NVP_DEVID_DEBUG_DEVID(np->nvp_devid);
327 		} else {
328 			nvf_error(
329 			    "%s: nvlist add error %d (devid)\n",
330 			    nvf_cache_name(fd), rval);
331 			goto err;
332 		}
333 
334 		rval = nvlist_add_nvlist(nvl, np->nvp_devpath, sub_nvl);
335 		if (rval != 0) {
336 			nvf_error("%s: nvlist add error %d (sublist)\n",
337 			    nvf_cache_name(fd), rval);
338 			goto err;
339 		}
340 		nvlist_free(sub_nvl);
341 	}
342 
343 	*ret_nvl = nvl;
344 	return (DDI_SUCCESS);
345 
346 err:
347 	nvlist_free(sub_nvl);
348 	nvlist_free(nvl);
349 	*ret_nvl = NULL;
350 	return (DDI_FAILURE);
351 }
352 
353 static int
354 e_devid_do_discovery(void)
355 {
356 	ASSERT(mutex_owned(&devid_discovery_mutex));
357 
358 	if (i_ddi_io_initialized() == 0) {
359 		if (devid_discovery_boot > 0) {
360 			devid_discovery_boot--;
361 			return (1);
362 		}
363 	} else {
364 		if (devid_discovery_postboot_always > 0)
365 			return (1);
366 		if (devid_discovery_postboot > 0) {
367 			devid_discovery_postboot--;
368 			return (1);
369 		}
370 		if (devid_discovery_secs > 0) {
371 			if ((ddi_get_lbolt() - devid_last_discovery) >
372 			    drv_usectohz(devid_discovery_secs * MICROSEC)) {
373 				return (1);
374 			}
375 		}
376 	}
377 
378 	DEVID_LOG_DISC((CE_CONT, "devid_discovery: no discovery\n"));
379 	return (0);
380 }
381 
382 static void
383 e_ddi_devid_hold_by_major(major_t major)
384 {
385 	DEVID_LOG_DISC((CE_CONT,
386 	    "devid_discovery: ddi_hold_installed_driver %d\n", major));
387 
388 	if (ddi_hold_installed_driver(major) == NULL)
389 		return;
390 
391 	ddi_rele_driver(major);
392 }
393 
394 /* legacy support - see below */
395 static char *e_ddi_devid_hold_driver_list[] = { "sd", "ssd" };
396 
397 #define	N_DRIVERS_TO_HOLD	\
398 	(sizeof (e_ddi_devid_hold_driver_list) / sizeof (char *))
399 
400 static void
401 e_ddi_devid_hold_installed_driver(ddi_devid_t devid)
402 {
403 	impl_devid_t	*id = (impl_devid_t *)devid;
404 	major_t		major, hint_major;
405 	char		hint[DEVID_HINT_SIZE + 1];
406 	struct devnames	*dnp;
407 	char		**drvp;
408 	int		i;
409 
410 	/* Count non-null bytes */
411 	for (i = 0; i < DEVID_HINT_SIZE; i++)
412 		if (id->did_driver[i] == '\0')
413 			break;
414 
415 	/* Make a copy of the driver hint */
416 	bcopy(id->did_driver, hint, i);
417 	hint[i] = '\0';
418 
419 	/* search for the devid using the hint driver */
420 	hint_major = ddi_name_to_major(hint);
421 	if (hint_major != DDI_MAJOR_T_NONE) {
422 		e_ddi_devid_hold_by_major(hint_major);
423 	}
424 
425 	/*
426 	 * search for the devid with each driver declaring
427 	 * itself as a devid registrant.
428 	 */
429 	for (major = 0; major < devcnt; major++) {
430 		if (major == hint_major)
431 			continue;
432 		dnp = &devnamesp[major];
433 		if (dnp->dn_flags & DN_DEVID_REGISTRANT) {
434 			e_ddi_devid_hold_by_major(major);
435 		}
436 	}
437 
438 	/*
439 	 * Legacy support: may be removed once an upgrade mechanism
440 	 * for driver conf files is available.
441 	 */
442 	drvp = e_ddi_devid_hold_driver_list;
443 	for (i = 0; i < N_DRIVERS_TO_HOLD; i++, drvp++) {
444 		major = ddi_name_to_major(*drvp);
445 		if (major != DDI_MAJOR_T_NONE && major != hint_major) {
446 			e_ddi_devid_hold_by_major(major);
447 		}
448 	}
449 }
450 
451 /*
452  * Return success if discovery was attempted, to indicate
453  * that the desired device may now be available.
454  */
455 int
456 e_ddi_devid_discovery(ddi_devid_t devid)
457 {
458 	int flags;
459 	int rval = DDI_SUCCESS;
460 
461 	mutex_enter(&devid_discovery_mutex);
462 
463 	if (devid_discovery_busy) {
464 		DEVID_LOG_DISC((CE_CONT, "devid_discovery: busy\n"));
465 		while (devid_discovery_busy) {
466 			cv_wait(&devid_discovery_cv, &devid_discovery_mutex);
467 		}
468 	} else if (e_devid_do_discovery()) {
469 		devid_discovery_busy = 1;
470 		mutex_exit(&devid_discovery_mutex);
471 
472 		if (i_ddi_io_initialized() == 0) {
473 			e_ddi_devid_hold_installed_driver(devid);
474 		} else {
475 			DEVID_LOG_DISC((CE_CONT,
476 			    "devid_discovery: ndi_devi_config\n"));
477 			flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT;
478 			if (i_ddi_io_initialized())
479 				flags |= NDI_DRV_CONF_REPROBE;
480 			(void) ndi_devi_config(ddi_root_node(), flags);
481 		}
482 
483 		mutex_enter(&devid_discovery_mutex);
484 		devid_discovery_busy = 0;
485 		cv_broadcast(&devid_discovery_cv);
486 		if (devid_discovery_secs > 0)
487 			devid_last_discovery = ddi_get_lbolt();
488 		DEVID_LOG_DISC((CE_CONT, "devid_discovery: done\n"));
489 	} else {
490 		rval = DDI_FAILURE;
491 		DEVID_LOG_DISC((CE_CONT, "no devid discovery\n"));
492 	}
493 
494 	mutex_exit(&devid_discovery_mutex);
495 
496 	return (rval);
497 }
498 
499 /*
500  * As part of registering a devid for a device,
501  * update the devid cache with this device/devid pair
502  * or note that this combination has registered.
503  *
504  * If a devpath is provided it will be used as the path to register the
505  * devid against, otherwise we use ddi_pathname(dip).  In both cases
506  * we duplicate the path string so that it can be cached/freed indepdently
507  * of the original owner.
508  */
509 static int
510 e_devid_cache_register_cmn(dev_info_t *dip, ddi_devid_t devid, char *devpath)
511 {
512 	nvp_devid_t *np;
513 	nvp_devid_t *new_nvp;
514 	ddi_devid_t new_devid;
515 	int new_devid_size;
516 	char *path, *fullpath;
517 	ddi_devid_t free_devid = NULL;
518 	int pathlen;
519 	list_t *listp;
520 	int is_dirty = 0;
521 
522 
523 	ASSERT(ddi_devid_valid(devid) == DDI_SUCCESS);
524 
525 	if (devpath) {
526 		pathlen = strlen(devpath) + 1;
527 		path = kmem_alloc(pathlen, KM_SLEEP);
528 		bcopy(devpath, path, pathlen);
529 	} else {
530 		/*
531 		 * We are willing to accept DS_BOUND nodes if we can form a full
532 		 * ddi_pathname (i.e. the node is part way to becomming
533 		 * DS_INITIALIZED and devi_addr/ddi_get_name_addr are non-NULL).
534 		 */
535 		if (ddi_get_name_addr(dip) == NULL)
536 			return (DDI_FAILURE);
537 
538 		fullpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
539 		(void) ddi_pathname(dip, fullpath);
540 		pathlen = strlen(fullpath) + 1;
541 		path = kmem_alloc(pathlen, KM_SLEEP);
542 		bcopy(fullpath, path, pathlen);
543 		kmem_free(fullpath, MAXPATHLEN);
544 	}
545 
546 	DEVID_LOG_REG(("register", devid, path));
547 
548 	new_nvp = kmem_zalloc(sizeof (nvp_devid_t), KM_SLEEP);
549 	new_devid_size = ddi_devid_sizeof(devid);
550 	new_devid = kmem_alloc(new_devid_size, KM_SLEEP);
551 	(void) bcopy(devid, new_devid, new_devid_size);
552 
553 	rw_enter(nvf_lock(dcfd_handle), RW_WRITER);
554 
555 	listp = nvf_list(dcfd_handle);
556 	for (np = list_head(listp); np; np = list_next(listp, np)) {
557 		if (strcmp(path, np->nvp_devpath) == 0) {
558 			DEVID_DEBUG2((CE_CONT,
559 			    "register: %s path match\n", path));
560 			if (np->nvp_devid == NULL) {
561 replace:			np->nvp_devid = new_devid;
562 				np->nvp_flags |=
563 				    NVP_DEVID_DIP | NVP_DEVID_REGISTERED;
564 				np->nvp_dip = dip;
565 				if (!devid_cache_write_disable) {
566 					nvf_mark_dirty(dcfd_handle);
567 					is_dirty = 1;
568 				}
569 				rw_exit(nvf_lock(dcfd_handle));
570 				kmem_free(new_nvp, sizeof (nvp_devid_t));
571 				kmem_free(path, pathlen);
572 				goto exit;
573 			}
574 			if (ddi_devid_valid(np->nvp_devid) != DDI_SUCCESS) {
575 				/* replace invalid devid */
576 				free_devid = np->nvp_devid;
577 				goto replace;
578 			}
579 			/*
580 			 * We're registering an already-cached path
581 			 * Does the device's devid match the cache?
582 			 */
583 			if (ddi_devid_compare(devid, np->nvp_devid) != 0) {
584 				DEVID_DEBUG((CE_CONT, "devid register: "
585 				    "devid %s does not match\n", path));
586 				/*
587 				 * We do not expect devids to change, log it.
588 				 */
589 				char *devid_stored =
590 				    ddi_devid_str_encode(np->nvp_devid, NULL);
591 				char *devid_new =
592 				    ddi_devid_str_encode(devid, NULL);
593 
594 				cmn_err(CE_CONT, "devid register: devid for "
595 				    "%s does not match. stored: %s, new: %s.",
596 				    path, devid_stored, devid_new);
597 
598 				ddi_devid_str_free(devid_stored);
599 				ddi_devid_str_free(devid_new);
600 
601 				/*
602 				 * Replace cached devid for this path
603 				 * with newly registered devid.  A devid
604 				 * may map to multiple paths but one path
605 				 * should only map to one devid.
606 				 */
607 				devid_nvp_unlink_and_free(dcfd_handle, np);
608 				np = NULL;
609 				break;
610 			} else {
611 				DEVID_DEBUG2((CE_CONT,
612 				    "devid register: %s devid match\n", path));
613 				np->nvp_flags |=
614 				    NVP_DEVID_DIP | NVP_DEVID_REGISTERED;
615 				np->nvp_dip = dip;
616 				rw_exit(nvf_lock(dcfd_handle));
617 				kmem_free(new_nvp, sizeof (nvp_devid_t));
618 				kmem_free(path, pathlen);
619 				kmem_free(new_devid, new_devid_size);
620 				return (DDI_SUCCESS);
621 			}
622 		}
623 	}
624 
625 	/*
626 	 * Add newly registered devid to the cache
627 	 */
628 	ASSERT(np == NULL);
629 
630 	new_nvp->nvp_devpath = path;
631 	new_nvp->nvp_flags = NVP_DEVID_DIP | NVP_DEVID_REGISTERED;
632 	new_nvp->nvp_dip = dip;
633 	new_nvp->nvp_devid = new_devid;
634 
635 	if (!devid_cache_write_disable) {
636 		is_dirty = 1;
637 		nvf_mark_dirty(dcfd_handle);
638 	}
639 	list_insert_tail(nvf_list(dcfd_handle), new_nvp);
640 
641 	rw_exit(nvf_lock(dcfd_handle));
642 
643 exit:
644 	if (free_devid)
645 		kmem_free(free_devid, ddi_devid_sizeof(free_devid));
646 
647 	if (is_dirty)
648 		nvf_wake_daemon();
649 
650 	return (DDI_SUCCESS);
651 }
652 
653 int
654 e_devid_cache_register(dev_info_t *dip, ddi_devid_t devid)
655 {
656 	return (e_devid_cache_register_cmn(dip, devid, NULL));
657 }
658 
659 /*
660  * Unregister a device's devid; the devinfo may hit on multiple entries
661  * arising from both pHCI and vHCI paths.
662  * Called as an instance detachs.
663  * Invalidate the devid's devinfo reference.
664  * Devid-path remains in the cache.
665  */
666 
667 void
668 e_devid_cache_unregister(dev_info_t *dip)
669 {
670 	nvp_devid_t *np;
671 	list_t *listp;
672 
673 	rw_enter(nvf_lock(dcfd_handle), RW_WRITER);
674 
675 	listp = nvf_list(dcfd_handle);
676 	for (np = list_head(listp); np; np = list_next(listp, np)) {
677 		if (np->nvp_devid == NULL)
678 			continue;
679 		if ((np->nvp_flags & NVP_DEVID_DIP) && np->nvp_dip == dip) {
680 			DEVID_LOG_UNREG((CE_CONT,
681 			    "unregister: %s\n", np->nvp_devpath));
682 			np->nvp_flags &= ~NVP_DEVID_DIP;
683 			np->nvp_dip = NULL;
684 		}
685 	}
686 
687 	rw_exit(nvf_lock(dcfd_handle));
688 }
689 
690 int
691 e_devid_cache_pathinfo(mdi_pathinfo_t *pip, ddi_devid_t devid)
692 {
693 	char *path = mdi_pi_pathname(pip);
694 
695 	return (e_devid_cache_register_cmn(mdi_pi_get_client(pip), devid,
696 	    path));
697 }
698 
699 /*
700  * Purge devid cache of stale devids
701  */
702 void
703 devid_cache_cleanup(void)
704 {
705 	nvp_devid_t *np, *next;
706 	list_t *listp;
707 	int is_dirty = 0;
708 
709 	rw_enter(nvf_lock(dcfd_handle), RW_WRITER);
710 
711 	listp = nvf_list(dcfd_handle);
712 	for (np = list_head(listp); np; np = next) {
713 		next = list_next(listp, np);
714 		if (np->nvp_devid == NULL)
715 			continue;
716 		if ((np->nvp_flags & NVP_DEVID_REGISTERED) == 0) {
717 			DEVID_LOG_REMOVE((CE_CONT,
718 			    "cleanup: %s\n", np->nvp_devpath));
719 			if (!devid_cache_write_disable) {
720 				nvf_mark_dirty(dcfd_handle);
721 				is_dirty = 0;
722 			}
723 			devid_nvp_unlink_and_free(dcfd_handle, np);
724 		}
725 	}
726 
727 	rw_exit(nvf_lock(dcfd_handle));
728 
729 	if (is_dirty)
730 		nvf_wake_daemon();
731 }
732 
733 
734 /*
735  * Build a list of dev_t's for a device/devid
736  *
737  * The effect of this function is cumulative, adding dev_t's
738  * for the device to the list of all dev_t's for a given
739  * devid.
740  */
741 static void
742 e_devid_minor_to_devlist(
743 	dev_info_t	*dip,
744 	char		*minor_name,
745 	int		ndevts_alloced,
746 	int		*devtcntp,
747 	dev_t		*devtsp)
748 {
749 	int			circ;
750 	struct ddi_minor_data	*dmdp;
751 	int			minor_all = 0;
752 	int			ndevts = *devtcntp;
753 
754 	ASSERT(i_ddi_devi_attached(dip));
755 
756 	/* are we looking for a set of minor nodes? */
757 	if ((minor_name == DEVID_MINOR_NAME_ALL) ||
758 	    (minor_name == DEVID_MINOR_NAME_ALL_CHR) ||
759 	    (minor_name == DEVID_MINOR_NAME_ALL_BLK))
760 		minor_all = 1;
761 
762 	/* Find matching minor names */
763 	ndi_devi_enter(dip, &circ);
764 	for (dmdp = DEVI(dip)->devi_minor; dmdp; dmdp = dmdp->next) {
765 
766 		/* Skip non-minors, and non matching minor names */
767 		if ((dmdp->type != DDM_MINOR) || ((minor_all == 0) &&
768 		    strcmp(dmdp->ddm_name, minor_name)))
769 			continue;
770 
771 		/* filter out minor_all mismatches */
772 		if (minor_all &&
773 		    (((minor_name == DEVID_MINOR_NAME_ALL_CHR) &&
774 		    (dmdp->ddm_spec_type != S_IFCHR)) ||
775 		    ((minor_name == DEVID_MINOR_NAME_ALL_BLK) &&
776 		    (dmdp->ddm_spec_type != S_IFBLK))))
777 			continue;
778 
779 		if (ndevts < ndevts_alloced)
780 			devtsp[ndevts] = dmdp->ddm_dev;
781 		ndevts++;
782 	}
783 	ndi_devi_exit(dip, circ);
784 
785 	*devtcntp = ndevts;
786 }
787 
788 /*
789  * Search for cached entries matching a devid
790  * Return two lists:
791  *	a list of dev_info nodes, for those devices in the attached state
792  *	a list of pathnames whose instances registered the given devid
793  * If the lists passed in are not sufficient to return the matching
794  * references, return the size of lists required.
795  * The dev_info nodes are returned with a hold that the caller must release.
796  */
797 static int
798 e_devid_cache_devi_path_lists(ddi_devid_t devid, int retmax,
799     int *retndevis, dev_info_t **retdevis, int *retnpaths, char **retpaths)
800 {
801 	nvp_devid_t *np;
802 	int ndevis, npaths;
803 	dev_info_t *dip, *pdip;
804 	int circ;
805 	int maxdevis = 0;
806 	int maxpaths = 0;
807 	list_t *listp;
808 
809 	ndevis = 0;
810 	npaths = 0;
811 	listp = nvf_list(dcfd_handle);
812 	for (np = list_head(listp); np; np = list_next(listp, np)) {
813 		if (np->nvp_devid == NULL)
814 			continue;
815 		if (ddi_devid_valid(np->nvp_devid) != DDI_SUCCESS) {
816 			DEVIDERR((CE_CONT,
817 			    "find: invalid devid %s\n",
818 			    np->nvp_devpath));
819 			continue;
820 		}
821 		if (ddi_devid_compare(devid, np->nvp_devid) == 0) {
822 			DEVID_DEBUG2((CE_CONT,
823 			    "find: devid match: %s 0x%x\n",
824 			    np->nvp_devpath, np->nvp_flags));
825 			DEVID_LOG_MATCH(("find", devid, np->nvp_devpath));
826 			DEVID_LOG_PATHS((CE_CONT, "%s\n", np->nvp_devpath));
827 
828 			/*
829 			 * Check if we have a cached devinfo reference for this
830 			 * devid.  Place a hold on it to prevent detach
831 			 * Otherwise, use the path instead.
832 			 * Note: returns with a hold on each dev_info
833 			 * node in the list.
834 			 */
835 			dip = NULL;
836 			if (np->nvp_flags & NVP_DEVID_DIP) {
837 				pdip = ddi_get_parent(np->nvp_dip);
838 				if (ndi_devi_tryenter(pdip, &circ)) {
839 					dip = np->nvp_dip;
840 					ndi_hold_devi(dip);
841 					ndi_devi_exit(pdip, circ);
842 					ASSERT(!DEVI_IS_ATTACHING(dip));
843 					ASSERT(!DEVI_IS_DETACHING(dip));
844 				} else {
845 					DEVID_LOG_DETACH((CE_CONT,
846 					    "may be detaching: %s\n",
847 					    np->nvp_devpath));
848 				}
849 			}
850 
851 			if (dip) {
852 				if (ndevis < retmax) {
853 					retdevis[ndevis++] = dip;
854 				} else {
855 					ndi_rele_devi(dip);
856 				}
857 				maxdevis++;
858 			} else {
859 				if (npaths < retmax)
860 					retpaths[npaths++] = np->nvp_devpath;
861 				maxpaths++;
862 			}
863 		}
864 	}
865 
866 	*retndevis = ndevis;
867 	*retnpaths = npaths;
868 	return (maxdevis > maxpaths ? maxdevis : maxpaths);
869 }
870 
871 
872 /*
873  * Search the devid cache, returning dev_t list for all
874  * device paths mapping to the device identified by the
875  * given devid.
876  *
877  * Primary interface used by ddi_lyr_devid_to_devlist()
878  */
879 int
880 e_devid_cache_to_devt_list(ddi_devid_t devid, char *minor_name,
881     int *retndevts, dev_t **retdevts)
882 {
883 	char		*path, **paths;
884 	int		i, j, n;
885 	dev_t		*devts, *udevts;
886 	dev_t		tdevt;
887 	int		ndevts, undevts, ndevts_alloced;
888 	dev_info_t	*devi, **devis;
889 	int		ndevis, npaths, nalloced;
890 	ddi_devid_t	match_devid;
891 
892 	DEVID_LOG_FIND(("find", devid, NULL));
893 
894 	ASSERT(ddi_devid_valid(devid) == DDI_SUCCESS);
895 	if (ddi_devid_valid(devid) != DDI_SUCCESS) {
896 		DEVID_LOG_ERR(("invalid devid", devid, NULL));
897 		return (DDI_FAILURE);
898 	}
899 
900 	nalloced = 128;
901 
902 	for (;;) {
903 		paths = kmem_zalloc(nalloced * sizeof (char *), KM_SLEEP);
904 		devis = kmem_zalloc(nalloced * sizeof (dev_info_t *), KM_SLEEP);
905 
906 		rw_enter(nvf_lock(dcfd_handle), RW_READER);
907 		n = e_devid_cache_devi_path_lists(devid, nalloced,
908 		    &ndevis, devis, &npaths, paths);
909 		if (n <= nalloced)
910 			break;
911 		rw_exit(nvf_lock(dcfd_handle));
912 		for (i = 0; i < ndevis; i++)
913 			ndi_rele_devi(devis[i]);
914 		kmem_free(paths, nalloced * sizeof (char *));
915 		kmem_free(devis, nalloced * sizeof (dev_info_t *));
916 		nalloced = n + 128;
917 	}
918 
919 	for (i = 0; i < npaths; i++) {
920 		path = i_ddi_strdup(paths[i], KM_SLEEP);
921 		paths[i] = path;
922 	}
923 	rw_exit(nvf_lock(dcfd_handle));
924 
925 	if (ndevis == 0 && npaths == 0) {
926 		DEVID_LOG_ERR(("no devid found", devid, NULL));
927 		kmem_free(paths, nalloced * sizeof (char *));
928 		kmem_free(devis, nalloced * sizeof (dev_info_t *));
929 		return (DDI_FAILURE);
930 	}
931 
932 	ndevts_alloced = 128;
933 restart:
934 	ndevts = 0;
935 	devts = kmem_alloc(ndevts_alloced * sizeof (dev_t), KM_SLEEP);
936 	for (i = 0; i < ndevis; i++) {
937 		ASSERT(!DEVI_IS_ATTACHING(devis[i]));
938 		ASSERT(!DEVI_IS_DETACHING(devis[i]));
939 		e_devid_minor_to_devlist(devis[i], minor_name,
940 		    ndevts_alloced, &ndevts, devts);
941 		if (ndevts > ndevts_alloced) {
942 			kmem_free(devts, ndevts_alloced * sizeof (dev_t));
943 			ndevts_alloced += 128;
944 			goto restart;
945 		}
946 	}
947 	for (i = 0; i < npaths; i++) {
948 		DEVID_LOG_LOOKUP((CE_CONT, "lookup %s\n", paths[i]));
949 		devi = e_ddi_hold_devi_by_path(paths[i], 0);
950 		if (devi == NULL) {
951 			DEVID_LOG_STALE(("stale device reference",
952 			    devid, paths[i]));
953 			continue;
954 		}
955 		/*
956 		 * Verify the newly attached device registered a matching devid
957 		 */
958 		if (i_ddi_devi_get_devid(DDI_DEV_T_ANY, devi,
959 		    &match_devid) != DDI_SUCCESS) {
960 			DEVIDERR((CE_CONT,
961 			    "%s: no devid registered on attach\n",
962 			    paths[i]));
963 			ddi_release_devi(devi);
964 			continue;
965 		}
966 
967 		if (ddi_devid_compare(devid, match_devid) != 0) {
968 			DEVID_LOG_STALE(("new devid registered",
969 			    devid, paths[i]));
970 			ddi_release_devi(devi);
971 			ddi_devid_free(match_devid);
972 			continue;
973 		}
974 		ddi_devid_free(match_devid);
975 
976 		e_devid_minor_to_devlist(devi, minor_name,
977 		    ndevts_alloced, &ndevts, devts);
978 		ddi_release_devi(devi);
979 		if (ndevts > ndevts_alloced) {
980 			kmem_free(devts,
981 			    ndevts_alloced * sizeof (dev_t));
982 			ndevts_alloced += 128;
983 			goto restart;
984 		}
985 	}
986 
987 	/* drop hold from e_devid_cache_devi_path_lists */
988 	for (i = 0; i < ndevis; i++) {
989 		ndi_rele_devi(devis[i]);
990 	}
991 	for (i = 0; i < npaths; i++) {
992 		kmem_free(paths[i], strlen(paths[i]) + 1);
993 	}
994 	kmem_free(paths, nalloced * sizeof (char *));
995 	kmem_free(devis, nalloced * sizeof (dev_info_t *));
996 
997 	if (ndevts == 0) {
998 		DEVID_LOG_ERR(("no devid found", devid, NULL));
999 		kmem_free(devts, ndevts_alloced * sizeof (dev_t));
1000 		return (DDI_FAILURE);
1001 	}
1002 
1003 	/*
1004 	 * Build the final list of sorted dev_t's with duplicates collapsed so
1005 	 * returned results are consistent. This prevents implementation
1006 	 * artifacts from causing unnecessary changes in SVM namespace.
1007 	 */
1008 	/* bubble sort */
1009 	for (i = 0; i < (ndevts - 1); i++) {
1010 		for (j = 0; j < ((ndevts - 1) - i); j++) {
1011 			if (devts[j + 1] < devts[j]) {
1012 				tdevt = devts[j];
1013 				devts[j] = devts[j + 1];
1014 				devts[j + 1] = tdevt;
1015 			}
1016 		}
1017 	}
1018 
1019 	/* determine number of unique values */
1020 	for (undevts = ndevts, i = 1; i < ndevts; i++) {
1021 		if (devts[i - 1] == devts[i])
1022 			undevts--;
1023 	}
1024 
1025 	/* allocate unique */
1026 	udevts = kmem_alloc(undevts * sizeof (dev_t), KM_SLEEP);
1027 
1028 	/* copy unique */
1029 	udevts[0] = devts[0];
1030 	for (i = 1, j = 1; i < ndevts; i++) {
1031 		if (devts[i - 1] != devts[i])
1032 			udevts[j++] = devts[i];
1033 	}
1034 	ASSERT(j == undevts);
1035 
1036 	kmem_free(devts, ndevts_alloced * sizeof (dev_t));
1037 
1038 	*retndevts = undevts;
1039 	*retdevts = udevts;
1040 
1041 	return (DDI_SUCCESS);
1042 }
1043 
1044 void
1045 e_devid_cache_free_devt_list(int ndevts, dev_t *devt_list)
1046 {
1047 	kmem_free(devt_list, ndevts * sizeof (dev_t *));
1048 }
1049 
1050 /*
1051  * If given a full path and NULL ua, search for a cache entry
1052  * whose path matches the full path.  On a cache hit duplicate the
1053  * devid of the matched entry into the given devid (caller
1054  * must free);  nodenamebuf is not touched for this usage.
1055  *
1056  * Given a path and a non-NULL unit address, search the cache for any entry
1057  * matching "<path>/%@<unit-address>" where '%' is a wildcard meaning
1058  * any node name.  The path should not end a '/'.  On a cache hit
1059  * duplicate the devid as before (caller must free) and copy into
1060  * the caller-provided nodenamebuf (if not NULL) the nodename of the
1061  * matched entry.
1062  *
1063  * We must not make use of nvp_dip since that may be NULL for cached
1064  * entries that are not present in the current tree.
1065  */
1066 int
1067 e_devid_cache_path_to_devid(char *path, char *ua,
1068     char *nodenamebuf, ddi_devid_t *devidp)
1069 {
1070 	size_t pathlen, ualen;
1071 	int rv = DDI_FAILURE;
1072 	nvp_devid_t *np;
1073 	list_t *listp;
1074 	char *cand;
1075 
1076 	if (path == NULL || *path == '\0' || (ua && *ua == '\0') ||
1077 	    devidp == NULL)
1078 		return (DDI_FAILURE);
1079 
1080 	*devidp = NULL;
1081 
1082 	if (ua) {
1083 		pathlen = strlen(path);
1084 		ualen = strlen(ua);
1085 	}
1086 
1087 	rw_enter(nvf_lock(dcfd_handle), RW_READER);
1088 
1089 	listp = nvf_list(dcfd_handle);
1090 	for (np = list_head(listp); np; np = list_next(listp, np)) {
1091 		size_t nodelen, candlen, n;
1092 		ddi_devid_t devid_dup;
1093 		char *uasep, *node;
1094 
1095 		if (np->nvp_devid == NULL)
1096 			continue;
1097 
1098 		if (ddi_devid_valid(np->nvp_devid) != DDI_SUCCESS) {
1099 			DEVIDERR((CE_CONT,
1100 			    "pathsearch: invalid devid %s\n",
1101 			    np->nvp_devpath));
1102 			continue;
1103 		}
1104 
1105 		cand = np->nvp_devpath;		/* candidate path */
1106 
1107 		/* If a full pathname was provided the compare is easy */
1108 		if (ua == NULL) {
1109 			if (strcmp(cand, path) == 0)
1110 				goto match;
1111 			else
1112 				continue;
1113 		}
1114 
1115 		/*
1116 		 * The compare for initial path plus ua and unknown nodename
1117 		 * is trickier.
1118 		 *
1119 		 * Does the initial path component match 'path'?
1120 		 */
1121 		if (strncmp(path, cand, pathlen) != 0)
1122 			continue;
1123 
1124 		candlen = strlen(cand);
1125 
1126 		/*
1127 		 * The next character must be a '/' and there must be no
1128 		 * further '/' thereafter.  Begin by checking that the
1129 		 * candidate is long enough to include at mininum a
1130 		 * "/<nodename>@<ua>" after the initial portion already
1131 		 * matched assuming a nodename length of 1.
1132 		 */
1133 		if (candlen < pathlen + 1 + 1 + 1 + ualen ||
1134 		    cand[pathlen] != '/' ||
1135 		    strchr(cand + pathlen + 1, '/') != NULL)
1136 			continue;
1137 
1138 		node = cand + pathlen + 1;	/* <node>@<ua> string */
1139 
1140 		/*
1141 		 * Find the '@' before the unit address.  Check for
1142 		 * unit address match.
1143 		 */
1144 		if ((uasep = strchr(node, '@')) == NULL)
1145 			continue;
1146 
1147 		/*
1148 		 * Check we still have enough length and that ua matches
1149 		 */
1150 		nodelen = (uintptr_t)uasep - (uintptr_t)node;
1151 		if (candlen < pathlen + 1 + nodelen + 1 + ualen ||
1152 		    strncmp(ua, uasep + 1, ualen) != 0)
1153 			continue;
1154 match:
1155 		n = ddi_devid_sizeof(np->nvp_devid);
1156 		devid_dup = kmem_alloc(n, KM_SLEEP);	/* caller must free */
1157 		(void) bcopy(np->nvp_devid, devid_dup, n);
1158 		*devidp = devid_dup;
1159 
1160 		if (ua && nodenamebuf) {
1161 			(void) strncpy(nodenamebuf, node, nodelen);
1162 			nodenamebuf[nodelen] = '\0';
1163 		}
1164 
1165 		rv = DDI_SUCCESS;
1166 		break;
1167 	}
1168 
1169 	rw_exit(nvf_lock(dcfd_handle));
1170 
1171 	return (rv);
1172 }
1173 
1174 #ifdef	DEBUG
1175 static void
1176 devid_log(char *fmt, ddi_devid_t devid, char *path)
1177 {
1178 	char *devidstr = ddi_devid_str_encode(devid, NULL);
1179 	if (path) {
1180 		cmn_err(CE_CONT, "%s: %s %s\n", fmt, path, devidstr);
1181 	} else {
1182 		cmn_err(CE_CONT, "%s: %s\n", fmt, devidstr);
1183 	}
1184 	ddi_devid_str_free(devidstr);
1185 }
1186 #endif	/* DEBUG */
1187