xref: /titanic_41/usr/src/uts/common/os/sunmdi.c (revision 1724dc7b89ff9c4d1006fdb79d20a4102d226def)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * Multipath driver interface (MDI) implementation; see mdi_impldefs.h for a
27  * more detailed discussion of the overall mpxio architecture.
28  *
29  * Default locking order:
30  *
31  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex);
32  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex);
33  * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex);
34  * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex);
35  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
36  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
37  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
38  */
39 
40 #include <sys/note.h>
41 #include <sys/types.h>
42 #include <sys/varargs.h>
43 #include <sys/param.h>
44 #include <sys/errno.h>
45 #include <sys/uio.h>
46 #include <sys/buf.h>
47 #include <sys/modctl.h>
48 #include <sys/open.h>
49 #include <sys/kmem.h>
50 #include <sys/poll.h>
51 #include <sys/conf.h>
52 #include <sys/bootconf.h>
53 #include <sys/cmn_err.h>
54 #include <sys/stat.h>
55 #include <sys/ddi.h>
56 #include <sys/sunddi.h>
57 #include <sys/ddipropdefs.h>
58 #include <sys/sunndi.h>
59 #include <sys/ndi_impldefs.h>
60 #include <sys/promif.h>
61 #include <sys/sunmdi.h>
62 #include <sys/mdi_impldefs.h>
63 #include <sys/taskq.h>
64 #include <sys/epm.h>
65 #include <sys/sunpm.h>
66 #include <sys/modhash.h>
67 #include <sys/disp.h>
68 #include <sys/autoconf.h>
69 #include <sys/sysmacros.h>
70 
71 #ifdef	DEBUG
72 #include <sys/debug.h>
73 int	mdi_debug = 1;
74 int	mdi_debug_logonly = 0;
75 #define	MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel))	i_mdi_log pargs
76 #define	MDI_WARN	CE_WARN, __func__
77 #define	MDI_NOTE	CE_NOTE, __func__
78 #define	MDI_CONT	CE_CONT, __func__
79 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...);
80 #else	/* !DEBUG */
81 #define	MDI_DEBUG(dbglevel, pargs)
82 #endif	/* DEBUG */
83 int	mdi_debug_consoleonly = 0;
84 int	mdi_delay = 3;
85 
86 extern pri_t	minclsyspri;
87 extern int	modrootloaded;
88 
89 /*
90  * Global mutex:
91  * Protects vHCI list and structure members.
92  */
93 kmutex_t	mdi_mutex;
94 
95 /*
96  * Registered vHCI class driver lists
97  */
98 int		mdi_vhci_count;
99 mdi_vhci_t	*mdi_vhci_head;
100 mdi_vhci_t	*mdi_vhci_tail;
101 
102 /*
103  * Client Hash Table size
104  */
105 static int	mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
106 
107 /*
108  * taskq interface definitions
109  */
110 #define	MDI_TASKQ_N_THREADS	8
111 #define	MDI_TASKQ_PRI		minclsyspri
112 #define	MDI_TASKQ_MINALLOC	(4*mdi_taskq_n_threads)
113 #define	MDI_TASKQ_MAXALLOC	(500*mdi_taskq_n_threads)
114 
115 taskq_t				*mdi_taskq;
116 static uint_t			mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
117 
118 #define	TICKS_PER_SECOND	(drv_usectohz(1000000))
119 
120 /*
121  * The data should be "quiet" for this interval (in seconds) before the
122  * vhci cached data is flushed to the disk.
123  */
124 static int mdi_vhcache_flush_delay = 10;
125 
126 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
127 static int mdi_vhcache_flush_daemon_idle_time = 60;
128 
129 /*
130  * MDI falls back to discovery of all paths when a bus_config_one fails.
131  * The following parameters can be used to tune this operation.
132  *
133  * mdi_path_discovery_boot
134  *	Number of times path discovery will be attempted during early boot.
135  *	Probably there is no reason to ever set this value to greater than one.
136  *
137  * mdi_path_discovery_postboot
138  *	Number of times path discovery will be attempted after early boot.
139  *	Set it to a minimum of two to allow for discovery of iscsi paths which
140  *	may happen very late during booting.
141  *
142  * mdi_path_discovery_interval
143  *	Minimum number of seconds MDI will wait between successive discovery
144  *	of all paths. Set it to -1 to disable discovery of all paths.
145  */
146 static int mdi_path_discovery_boot = 1;
147 static int mdi_path_discovery_postboot = 2;
148 static int mdi_path_discovery_interval = 10;
149 
150 /*
151  * number of seconds the asynchronous configuration thread will sleep idle
152  * before exiting.
153  */
154 static int mdi_async_config_idle_time = 600;
155 
156 static int mdi_bus_config_cache_hash_size = 256;
157 
158 /* turns off multithreaded configuration for certain operations */
159 static int mdi_mtc_off = 0;
160 
161 /*
162  * The "path" to a pathinfo node is identical to the /devices path to a
163  * devinfo node had the device been enumerated under a pHCI instead of
164  * a vHCI.  This pathinfo "path" is associated with a 'path_instance'.
165  * This association persists across create/delete of the pathinfo nodes,
166  * but not across reboot.
167  */
168 static uint_t		mdi_pathmap_instance = 1;	/* 0 -> any path */
169 static int		mdi_pathmap_hash_size = 256;
170 static kmutex_t		mdi_pathmap_mutex;
171 static mod_hash_t	*mdi_pathmap_bypath;		/* "path"->instance */
172 static mod_hash_t	*mdi_pathmap_byinstance;	/* instance->"path" */
173 static mod_hash_t	*mdi_pathmap_sbyinstance;	/* inst->shortpath */
174 
175 /*
176  * MDI component property name/value string definitions
177  */
178 const char 		*mdi_component_prop = "mpxio-component";
179 const char		*mdi_component_prop_vhci = "vhci";
180 const char		*mdi_component_prop_phci = "phci";
181 const char		*mdi_component_prop_client = "client";
182 
183 /*
184  * MDI client global unique identifier property name
185  */
186 const char		*mdi_client_guid_prop = "client-guid";
187 
188 /*
189  * MDI client load balancing property name/value string definitions
190  */
191 const char		*mdi_load_balance = "load-balance";
192 const char		*mdi_load_balance_none = "none";
193 const char		*mdi_load_balance_rr = "round-robin";
194 const char		*mdi_load_balance_lba = "logical-block";
195 
196 /*
197  * Obsolete vHCI class definition; to be removed after Leadville update
198  */
199 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
200 
201 static char vhci_greeting[] =
202 	"\tThere already exists one vHCI driver for class %s\n"
203 	"\tOnly one vHCI driver for each class is allowed\n";
204 
205 /*
206  * Static function prototypes
207  */
208 static int		i_mdi_phci_offline(dev_info_t *, uint_t);
209 static int		i_mdi_client_offline(dev_info_t *, uint_t);
210 static int		i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
211 static void		i_mdi_phci_post_detach(dev_info_t *,
212 			    ddi_detach_cmd_t, int);
213 static int		i_mdi_client_pre_detach(dev_info_t *,
214 			    ddi_detach_cmd_t);
215 static void		i_mdi_client_post_detach(dev_info_t *,
216 			    ddi_detach_cmd_t, int);
217 static void		i_mdi_pm_hold_pip(mdi_pathinfo_t *);
218 static void		i_mdi_pm_rele_pip(mdi_pathinfo_t *);
219 static int 		i_mdi_lba_lb(mdi_client_t *ct,
220 			    mdi_pathinfo_t **ret_pip, struct buf *buf);
221 static void		i_mdi_pm_hold_client(mdi_client_t *, int);
222 static void		i_mdi_pm_rele_client(mdi_client_t *, int);
223 static void		i_mdi_pm_reset_client(mdi_client_t *);
224 static int		i_mdi_power_all_phci(mdi_client_t *);
225 static void		i_mdi_log_sysevent(dev_info_t *, char *, char *);
226 
227 
228 /*
229  * Internal mdi_pathinfo node functions
230  */
231 static void		i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
232 
233 static mdi_vhci_t	*i_mdi_vhci_class2vhci(char *);
234 static mdi_vhci_t	*i_devi_get_vhci(dev_info_t *);
235 static mdi_phci_t	*i_devi_get_phci(dev_info_t *);
236 static void		i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
237 static void		i_mdi_phci_unlock(mdi_phci_t *);
238 static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
239 static void		i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
240 static void		i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
241 static void		i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
242 			    mdi_client_t *);
243 static void		i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
244 static void		i_mdi_client_remove_path(mdi_client_t *,
245 			    mdi_pathinfo_t *);
246 
247 static int		i_mdi_pi_state_change(mdi_pathinfo_t *,
248 			    mdi_pathinfo_state_t, int);
249 static int		i_mdi_pi_offline(mdi_pathinfo_t *, int);
250 static dev_info_t	*i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
251 			    char **, int);
252 static dev_info_t	*i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
253 static int		i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
254 static int		i_mdi_is_child_present(dev_info_t *, dev_info_t *);
255 static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
256 static void		i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
257 static void		i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
258 static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *, char *);
259 static void		i_mdi_client_update_state(mdi_client_t *);
260 static int		i_mdi_client_compute_state(mdi_client_t *,
261 			    mdi_phci_t *);
262 static void		i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
263 static void		i_mdi_client_unlock(mdi_client_t *);
264 static int		i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
265 static mdi_client_t	*i_devi_get_client(dev_info_t *);
266 /*
267  * NOTE: this will be removed once the NWS files are changed to use the new
268  * mdi_{enable,disable}_path interfaces
269  */
270 static int		i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
271 				int, int);
272 static mdi_pathinfo_t 	*i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
273 				mdi_vhci_t *vh, int flags, int op);
274 /*
275  * Failover related function prototypes
276  */
277 static int		i_mdi_failover(void *);
278 
279 /*
280  * misc internal functions
281  */
282 static int		i_mdi_get_hash_key(char *);
283 static int		i_map_nvlist_error_to_mdi(int);
284 static void		i_mdi_report_path_state(mdi_client_t *,
285 			    mdi_pathinfo_t *);
286 
287 static void		setup_vhci_cache(mdi_vhci_t *);
288 static int		destroy_vhci_cache(mdi_vhci_t *);
289 static int		stop_vhcache_async_threads(mdi_vhci_config_t *);
290 static boolean_t	stop_vhcache_flush_thread(void *, int);
291 static void		free_string_array(char **, int);
292 static void		free_vhcache_phci(mdi_vhcache_phci_t *);
293 static void		free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
294 static void		free_vhcache_client(mdi_vhcache_client_t *);
295 static int		mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
296 static nvlist_t		*vhcache_to_mainnvl(mdi_vhci_cache_t *);
297 static void		vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
298 static void		vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
299 static void		vhcache_pi_add(mdi_vhci_config_t *,
300 			    struct mdi_pathinfo *);
301 static void		vhcache_pi_remove(mdi_vhci_config_t *,
302 			    struct mdi_pathinfo *);
303 static void		free_phclient_path_list(mdi_phys_path_t *);
304 static void		sort_vhcache_paths(mdi_vhcache_client_t *);
305 static int		flush_vhcache(mdi_vhci_config_t *, int);
306 static void		vhcache_dirty(mdi_vhci_config_t *);
307 static void		free_async_client_config(mdi_async_client_config_t *);
308 static void		single_threaded_vhconfig_enter(mdi_vhci_config_t *);
309 static void		single_threaded_vhconfig_exit(mdi_vhci_config_t *);
310 static nvlist_t		*read_on_disk_vhci_cache(char *);
311 extern int		fread_nvlist(char *, nvlist_t **);
312 extern int		fwrite_nvlist(char *, nvlist_t *);
313 
314 /* called once when first vhci registers with mdi */
315 static void
i_mdi_init()316 i_mdi_init()
317 {
318 	static int initialized = 0;
319 
320 	if (initialized)
321 		return;
322 	initialized = 1;
323 
324 	mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
325 
326 	/* Create our taskq resources */
327 	mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
328 	    MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
329 	    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
330 	ASSERT(mdi_taskq != NULL);	/* taskq_create never fails */
331 
332 	/* Allocate ['path_instance' <-> "path"] maps */
333 	mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
334 	mdi_pathmap_bypath = mod_hash_create_strhash(
335 	    "mdi_pathmap_bypath", mdi_pathmap_hash_size,
336 	    mod_hash_null_valdtor);
337 	mdi_pathmap_byinstance = mod_hash_create_idhash(
338 	    "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
339 	    mod_hash_null_valdtor);
340 	mdi_pathmap_sbyinstance = mod_hash_create_idhash(
341 	    "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size,
342 	    mod_hash_null_valdtor);
343 }
344 
345 /*
346  * mdi_get_component_type():
347  *		Return mpxio component type
348  * Return Values:
349  *		MDI_COMPONENT_NONE
350  *		MDI_COMPONENT_VHCI
351  *		MDI_COMPONENT_PHCI
352  *		MDI_COMPONENT_CLIENT
353  * XXX This doesn't work under multi-level MPxIO and should be
354  *	removed when clients migrate mdi_component_is_*() interfaces.
355  */
356 int
mdi_get_component_type(dev_info_t * dip)357 mdi_get_component_type(dev_info_t *dip)
358 {
359 	return (DEVI(dip)->devi_mdi_component);
360 }
361 
362 /*
363  * mdi_vhci_register():
364  *		Register a vHCI module with the mpxio framework
365  *		mdi_vhci_register() is called by vHCI drivers to register the
366  *		'class_driver' vHCI driver and its MDI entrypoints with the
367  *		mpxio framework.  The vHCI driver must call this interface as
368  *		part of its attach(9e) handler.
369  *		Competing threads may try to attach mdi_vhci_register() as
370  *		the vHCI drivers are loaded and attached as a result of pHCI
371  *		driver instance registration (mdi_phci_register()) with the
372  *		framework.
373  * Return Values:
374  *		MDI_SUCCESS
375  *		MDI_FAILURE
376  */
377 /*ARGSUSED*/
378 int
mdi_vhci_register(char * class,dev_info_t * vdip,mdi_vhci_ops_t * vops,int flags)379 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
380     int flags)
381 {
382 	mdi_vhci_t		*vh = NULL;
383 
384 	/* Registrant can't be older */
385 	ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV);
386 
387 #ifdef DEBUG
388 	/*
389 	 * IB nexus driver is loaded only when IB hardware is present.
390 	 * In order to be able to do this there is a need to drive the loading
391 	 * and attaching of the IB nexus driver (especially when an IB hardware
392 	 * is dynamically plugged in) when an IB HCA driver (PHCI)
393 	 * is being attached. Unfortunately this gets into the limitations
394 	 * of devfs as there seems to be no clean way to drive configuration
395 	 * of a subtree from another subtree of a devfs. Hence, do not ASSERT
396 	 * for IB.
397 	 */
398 	if (strcmp(class, MDI_HCI_CLASS_IB) != 0)
399 		ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
400 #endif
401 
402 	i_mdi_init();
403 
404 	mutex_enter(&mdi_mutex);
405 	/*
406 	 * Scan for already registered vhci
407 	 */
408 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
409 		if (strcmp(vh->vh_class, class) == 0) {
410 			/*
411 			 * vHCI has already been created.  Check for valid
412 			 * vHCI ops registration.  We only support one vHCI
413 			 * module per class
414 			 */
415 			if (vh->vh_ops != NULL) {
416 				mutex_exit(&mdi_mutex);
417 				cmn_err(CE_NOTE, vhci_greeting, class);
418 				return (MDI_FAILURE);
419 			}
420 			break;
421 		}
422 	}
423 
424 	/*
425 	 * if not yet created, create the vHCI component
426 	 */
427 	if (vh == NULL) {
428 		struct client_hash	*hash = NULL;
429 		char			*load_balance;
430 
431 		/*
432 		 * Allocate and initialize the mdi extensions
433 		 */
434 		vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
435 		hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
436 		    KM_SLEEP);
437 		vh->vh_client_table = hash;
438 		vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
439 		(void) strcpy(vh->vh_class, class);
440 		vh->vh_lb = LOAD_BALANCE_RR;
441 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
442 		    0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
443 			if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
444 				vh->vh_lb = LOAD_BALANCE_NONE;
445 			} else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
446 				    == 0) {
447 				vh->vh_lb = LOAD_BALANCE_LBA;
448 			}
449 			ddi_prop_free(load_balance);
450 		}
451 
452 		mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
453 		mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
454 
455 		/*
456 		 * Store the vHCI ops vectors
457 		 */
458 		vh->vh_dip = vdip;
459 		vh->vh_ops = vops;
460 
461 		setup_vhci_cache(vh);
462 
463 		if (mdi_vhci_head == NULL) {
464 			mdi_vhci_head = vh;
465 		}
466 		if (mdi_vhci_tail) {
467 			mdi_vhci_tail->vh_next = vh;
468 		}
469 		mdi_vhci_tail = vh;
470 		mdi_vhci_count++;
471 	}
472 
473 	/*
474 	 * Claim the devfs node as a vhci component
475 	 */
476 	DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
477 
478 	/*
479 	 * Initialize our back reference from dev_info node
480 	 */
481 	DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
482 	mutex_exit(&mdi_mutex);
483 	return (MDI_SUCCESS);
484 }
485 
486 /*
487  * mdi_vhci_unregister():
488  *		Unregister a vHCI module from mpxio framework
489  *		mdi_vhci_unregister() is called from the detach(9E) entrypoint
490  * 		of a vhci to unregister it from the framework.
491  * Return Values:
492  *		MDI_SUCCESS
493  *		MDI_FAILURE
494  */
495 /*ARGSUSED*/
496 int
mdi_vhci_unregister(dev_info_t * vdip,int flags)497 mdi_vhci_unregister(dev_info_t *vdip, int flags)
498 {
499 	mdi_vhci_t	*found, *vh, *prev = NULL;
500 
501 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
502 
503 	/*
504 	 * Check for invalid VHCI
505 	 */
506 	if ((vh = i_devi_get_vhci(vdip)) == NULL)
507 		return (MDI_FAILURE);
508 
509 	/*
510 	 * Scan the list of registered vHCIs for a match
511 	 */
512 	mutex_enter(&mdi_mutex);
513 	for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
514 		if (found == vh)
515 			break;
516 		prev = found;
517 	}
518 
519 	if (found == NULL) {
520 		mutex_exit(&mdi_mutex);
521 		return (MDI_FAILURE);
522 	}
523 
524 	/*
525 	 * Check the vHCI, pHCI and client count. All the pHCIs and clients
526 	 * should have been unregistered, before a vHCI can be
527 	 * unregistered.
528 	 */
529 	MDI_VHCI_PHCI_LOCK(vh);
530 	if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
531 		MDI_VHCI_PHCI_UNLOCK(vh);
532 		mutex_exit(&mdi_mutex);
533 		return (MDI_FAILURE);
534 	}
535 	MDI_VHCI_PHCI_UNLOCK(vh);
536 
537 	if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
538 		mutex_exit(&mdi_mutex);
539 		return (MDI_FAILURE);
540 	}
541 
542 	/*
543 	 * Remove the vHCI from the global list
544 	 */
545 	if (vh == mdi_vhci_head) {
546 		mdi_vhci_head = vh->vh_next;
547 	} else {
548 		prev->vh_next = vh->vh_next;
549 	}
550 	if (vh == mdi_vhci_tail) {
551 		mdi_vhci_tail = prev;
552 	}
553 	mdi_vhci_count--;
554 	mutex_exit(&mdi_mutex);
555 
556 	vh->vh_ops = NULL;
557 	DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
558 	DEVI(vdip)->devi_mdi_xhci = NULL;
559 	kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
560 	kmem_free(vh->vh_client_table,
561 	    mdi_client_table_size * sizeof (struct client_hash));
562 	mutex_destroy(&vh->vh_phci_mutex);
563 	mutex_destroy(&vh->vh_client_mutex);
564 
565 	kmem_free(vh, sizeof (mdi_vhci_t));
566 	return (MDI_SUCCESS);
567 }
568 
569 /*
570  * i_mdi_vhci_class2vhci():
571  *		Look for a matching vHCI module given a vHCI class name
572  * Return Values:
573  *		Handle to a vHCI component
574  *		NULL
575  */
576 static mdi_vhci_t *
i_mdi_vhci_class2vhci(char * class)577 i_mdi_vhci_class2vhci(char *class)
578 {
579 	mdi_vhci_t	*vh = NULL;
580 
581 	ASSERT(!MUTEX_HELD(&mdi_mutex));
582 
583 	mutex_enter(&mdi_mutex);
584 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
585 		if (strcmp(vh->vh_class, class) == 0) {
586 			break;
587 		}
588 	}
589 	mutex_exit(&mdi_mutex);
590 	return (vh);
591 }
592 
593 /*
594  * i_devi_get_vhci():
595  *		Utility function to get the handle to a vHCI component
596  * Return Values:
597  *		Handle to a vHCI component
598  *		NULL
599  */
600 mdi_vhci_t *
i_devi_get_vhci(dev_info_t * vdip)601 i_devi_get_vhci(dev_info_t *vdip)
602 {
603 	mdi_vhci_t	*vh = NULL;
604 	if (MDI_VHCI(vdip)) {
605 		vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
606 	}
607 	return (vh);
608 }
609 
610 /*
611  * mdi_phci_register():
612  *		Register a pHCI module with mpxio framework
613  *		mdi_phci_register() is called by pHCI drivers to register with
614  *		the mpxio framework and a specific 'class_driver' vHCI.  The
615  *		pHCI driver must call this interface as part of its attach(9e)
616  *		handler.
617  * Return Values:
618  *		MDI_SUCCESS
619  *		MDI_FAILURE
620  */
621 /*ARGSUSED*/
622 int
mdi_phci_register(char * class,dev_info_t * pdip,int flags)623 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
624 {
625 	mdi_phci_t		*ph;
626 	mdi_vhci_t		*vh;
627 	char			*data;
628 
629 	/*
630 	 * Some subsystems, like fcp, perform pHCI registration from a
631 	 * different thread than the one doing the pHCI attach(9E) - the
632 	 * driver attach code is waiting for this other thread to complete.
633 	 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
634 	 * (indicating that some thread has done an ndi_devi_enter of parent)
635 	 * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
636 	 */
637 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
638 
639 	/*
640 	 * Check for mpxio-disable property. Enable mpxio if the property is
641 	 * missing or not set to "yes".
642 	 * If the property is set to "yes" then emit a brief message.
643 	 */
644 	if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
645 	    &data) == DDI_SUCCESS)) {
646 		if (strcmp(data, "yes") == 0) {
647 			MDI_DEBUG(1, (MDI_CONT, pdip,
648 			    "?multipath capabilities disabled via %s.conf.",
649 			    ddi_driver_name(pdip)));
650 			ddi_prop_free(data);
651 			return (MDI_FAILURE);
652 		}
653 		ddi_prop_free(data);
654 	}
655 
656 	/*
657 	 * Search for a matching vHCI
658 	 */
659 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
660 	if (vh == NULL) {
661 		return (MDI_FAILURE);
662 	}
663 
664 	ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
665 	mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
666 	ph->ph_dip = pdip;
667 	ph->ph_vhci = vh;
668 	ph->ph_next = NULL;
669 	ph->ph_unstable = 0;
670 	ph->ph_vprivate = 0;
671 	cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
672 
673 	MDI_PHCI_LOCK(ph);
674 	MDI_PHCI_SET_POWER_UP(ph);
675 	MDI_PHCI_UNLOCK(ph);
676 	DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
677 	DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
678 
679 	vhcache_phci_add(vh->vh_config, ph);
680 
681 	MDI_VHCI_PHCI_LOCK(vh);
682 	if (vh->vh_phci_head == NULL) {
683 		vh->vh_phci_head = ph;
684 	}
685 	if (vh->vh_phci_tail) {
686 		vh->vh_phci_tail->ph_next = ph;
687 	}
688 	vh->vh_phci_tail = ph;
689 	vh->vh_phci_count++;
690 	MDI_VHCI_PHCI_UNLOCK(vh);
691 
692 	i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
693 	return (MDI_SUCCESS);
694 }
695 
696 /*
697  * mdi_phci_unregister():
698  *		Unregister a pHCI module from mpxio framework
699  *		mdi_phci_unregister() is called by the pHCI drivers from their
700  *		detach(9E) handler to unregister their instances from the
701  *		framework.
702  * Return Values:
703  *		MDI_SUCCESS
704  *		MDI_FAILURE
705  */
706 /*ARGSUSED*/
707 int
mdi_phci_unregister(dev_info_t * pdip,int flags)708 mdi_phci_unregister(dev_info_t *pdip, int flags)
709 {
710 	mdi_vhci_t		*vh;
711 	mdi_phci_t		*ph;
712 	mdi_phci_t		*tmp;
713 	mdi_phci_t		*prev = NULL;
714 	mdi_pathinfo_t		*pip;
715 
716 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
717 
718 	ph = i_devi_get_phci(pdip);
719 	if (ph == NULL) {
720 		MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI"));
721 		return (MDI_FAILURE);
722 	}
723 
724 	vh = ph->ph_vhci;
725 	ASSERT(vh != NULL);
726 	if (vh == NULL) {
727 		MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI"));
728 		return (MDI_FAILURE);
729 	}
730 
731 	MDI_VHCI_PHCI_LOCK(vh);
732 	tmp = vh->vh_phci_head;
733 	while (tmp) {
734 		if (tmp == ph) {
735 			break;
736 		}
737 		prev = tmp;
738 		tmp = tmp->ph_next;
739 	}
740 
741 	if (ph == vh->vh_phci_head) {
742 		vh->vh_phci_head = ph->ph_next;
743 	} else {
744 		prev->ph_next = ph->ph_next;
745 	}
746 
747 	if (ph == vh->vh_phci_tail) {
748 		vh->vh_phci_tail = prev;
749 	}
750 
751 	vh->vh_phci_count--;
752 	MDI_VHCI_PHCI_UNLOCK(vh);
753 
754 	/* Walk remaining pathinfo nodes and disassociate them from pHCI */
755 	MDI_PHCI_LOCK(ph);
756 	for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip;
757 	    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link)
758 		MDI_PI(pip)->pi_phci = NULL;
759 	MDI_PHCI_UNLOCK(ph);
760 
761 	i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
762 	    ESC_DDI_INITIATOR_UNREGISTER);
763 	vhcache_phci_remove(vh->vh_config, ph);
764 	cv_destroy(&ph->ph_unstable_cv);
765 	mutex_destroy(&ph->ph_mutex);
766 	kmem_free(ph, sizeof (mdi_phci_t));
767 	DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
768 	DEVI(pdip)->devi_mdi_xhci = NULL;
769 	return (MDI_SUCCESS);
770 }
771 
772 /*
773  * i_devi_get_phci():
774  * 		Utility function to return the phci extensions.
775  */
776 static mdi_phci_t *
i_devi_get_phci(dev_info_t * pdip)777 i_devi_get_phci(dev_info_t *pdip)
778 {
779 	mdi_phci_t	*ph = NULL;
780 
781 	if (MDI_PHCI(pdip)) {
782 		ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
783 	}
784 	return (ph);
785 }
786 
787 /*
788  * Single thread mdi entry into devinfo node for modifying its children.
789  * If necessary we perform an ndi_devi_enter of the vHCI before doing
790  * an ndi_devi_enter of 'dip'.  We maintain circular in two parts: one
791  * for the vHCI and one for the pHCI.
792  */
793 void
mdi_devi_enter(dev_info_t * phci_dip,int * circular)794 mdi_devi_enter(dev_info_t *phci_dip, int *circular)
795 {
796 	dev_info_t	*vdip;
797 	int		vcircular, pcircular;
798 
799 	/* Verify calling context */
800 	ASSERT(MDI_PHCI(phci_dip));
801 	vdip = mdi_devi_get_vdip(phci_dip);
802 	ASSERT(vdip);			/* A pHCI always has a vHCI */
803 
804 	/*
805 	 * If pHCI is detaching then the framework has already entered the
806 	 * vHCI on a threads that went down the code path leading to
807 	 * detach_node().  This framework enter of the vHCI during pHCI
808 	 * detach is done to avoid deadlock with vHCI power management
809 	 * operations which enter the vHCI and the enter down the path
810 	 * to the pHCI. If pHCI is detaching then we piggyback this calls
811 	 * enter of the vHCI on frameworks vHCI enter that has already
812 	 * occurred - this is OK because we know that the framework thread
813 	 * doing detach is waiting for our completion.
814 	 *
815 	 * We should DEVI_IS_DETACHING under an enter of the parent to avoid
816 	 * race with detach - but we can't do that because the framework has
817 	 * already entered the parent, so we have some complexity instead.
818 	 */
819 	for (;;) {
820 		if (ndi_devi_tryenter(vdip, &vcircular)) {
821 			ASSERT(vcircular != -1);
822 			if (DEVI_IS_DETACHING(phci_dip)) {
823 				ndi_devi_exit(vdip, vcircular);
824 				vcircular = -1;
825 			}
826 			break;
827 		} else if (DEVI_IS_DETACHING(phci_dip)) {
828 			vcircular = -1;
829 			break;
830 		} else if (servicing_interrupt()) {
831 			/*
832 			 * Don't delay an interrupt (and ensure adaptive
833 			 * mutex inversion support).
834 			 */
835 			ndi_devi_enter(vdip, &vcircular);
836 			break;
837 		} else {
838 			delay_random(mdi_delay);
839 		}
840 	}
841 
842 	ndi_devi_enter(phci_dip, &pcircular);
843 	*circular = (vcircular << 16) | (pcircular & 0xFFFF);
844 }
845 
846 /*
847  * Attempt to mdi_devi_enter.
848  */
849 int
mdi_devi_tryenter(dev_info_t * phci_dip,int * circular)850 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular)
851 {
852 	dev_info_t	*vdip;
853 	int		vcircular, pcircular;
854 
855 	/* Verify calling context */
856 	ASSERT(MDI_PHCI(phci_dip));
857 	vdip = mdi_devi_get_vdip(phci_dip);
858 	ASSERT(vdip);			/* A pHCI always has a vHCI */
859 
860 	if (ndi_devi_tryenter(vdip, &vcircular)) {
861 		if (ndi_devi_tryenter(phci_dip, &pcircular)) {
862 			*circular = (vcircular << 16) | (pcircular & 0xFFFF);
863 			return (1);	/* locked */
864 		}
865 		ndi_devi_exit(vdip, vcircular);
866 	}
867 	return (0);			/* busy */
868 }
869 
870 /*
871  * Release mdi_devi_enter or successful mdi_devi_tryenter.
872  */
873 void
mdi_devi_exit(dev_info_t * phci_dip,int circular)874 mdi_devi_exit(dev_info_t *phci_dip, int circular)
875 {
876 	dev_info_t	*vdip;
877 	int		vcircular, pcircular;
878 
879 	/* Verify calling context */
880 	ASSERT(MDI_PHCI(phci_dip));
881 	vdip = mdi_devi_get_vdip(phci_dip);
882 	ASSERT(vdip);			/* A pHCI always has a vHCI */
883 
884 	/* extract two circular recursion values from single int */
885 	pcircular = (short)(circular & 0xFFFF);
886 	vcircular = (short)((circular >> 16) & 0xFFFF);
887 
888 	ndi_devi_exit(phci_dip, pcircular);
889 	if (vcircular != -1)
890 		ndi_devi_exit(vdip, vcircular);
891 }
892 
893 /*
894  * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
895  * around a pHCI drivers calls to mdi_pi_online/offline, after holding
896  * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
897  * with vHCI power management code during path online/offline.  Each
898  * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
899  * occur within the scope of an active mdi_devi_enter that establishes the
900  * circular value.
901  */
902 void
mdi_devi_exit_phci(dev_info_t * phci_dip,int circular)903 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
904 {
905 	int		pcircular;
906 
907 	/* Verify calling context */
908 	ASSERT(MDI_PHCI(phci_dip));
909 
910 	/* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */
911 	ndi_hold_devi(phci_dip);
912 
913 	pcircular = (short)(circular & 0xFFFF);
914 	ndi_devi_exit(phci_dip, pcircular);
915 }
916 
917 void
mdi_devi_enter_phci(dev_info_t * phci_dip,int * circular)918 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
919 {
920 	int		pcircular;
921 
922 	/* Verify calling context */
923 	ASSERT(MDI_PHCI(phci_dip));
924 
925 	ndi_devi_enter(phci_dip, &pcircular);
926 
927 	/* Drop hold from mdi_devi_exit_phci. */
928 	ndi_rele_devi(phci_dip);
929 
930 	/* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
931 	ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
932 }
933 
934 /*
935  * mdi_devi_get_vdip():
936  *		given a pHCI dip return vHCI dip
937  */
938 dev_info_t *
mdi_devi_get_vdip(dev_info_t * pdip)939 mdi_devi_get_vdip(dev_info_t *pdip)
940 {
941 	mdi_phci_t	*ph;
942 
943 	ph = i_devi_get_phci(pdip);
944 	if (ph && ph->ph_vhci)
945 		return (ph->ph_vhci->vh_dip);
946 	return (NULL);
947 }
948 
949 /*
950  * mdi_devi_pdip_entered():
951  *		Return 1 if we are vHCI and have done an ndi_devi_enter
952  *		of a pHCI
953  */
954 int
mdi_devi_pdip_entered(dev_info_t * vdip)955 mdi_devi_pdip_entered(dev_info_t *vdip)
956 {
957 	mdi_vhci_t	*vh;
958 	mdi_phci_t	*ph;
959 
960 	vh = i_devi_get_vhci(vdip);
961 	if (vh == NULL)
962 		return (0);
963 
964 	MDI_VHCI_PHCI_LOCK(vh);
965 	ph = vh->vh_phci_head;
966 	while (ph) {
967 		if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
968 			MDI_VHCI_PHCI_UNLOCK(vh);
969 			return (1);
970 		}
971 		ph = ph->ph_next;
972 	}
973 	MDI_VHCI_PHCI_UNLOCK(vh);
974 	return (0);
975 }
976 
977 /*
978  * mdi_phci_path2devinfo():
979  * 		Utility function to search for a valid phci device given
980  *		the devfs pathname.
981  */
982 dev_info_t *
mdi_phci_path2devinfo(dev_info_t * vdip,caddr_t pathname)983 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
984 {
985 	char		*temp_pathname;
986 	mdi_vhci_t	*vh;
987 	mdi_phci_t	*ph;
988 	dev_info_t 	*pdip = NULL;
989 
990 	vh = i_devi_get_vhci(vdip);
991 	ASSERT(vh != NULL);
992 
993 	if (vh == NULL) {
994 		/*
995 		 * Invalid vHCI component, return failure
996 		 */
997 		return (NULL);
998 	}
999 
1000 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1001 	MDI_VHCI_PHCI_LOCK(vh);
1002 	ph = vh->vh_phci_head;
1003 	while (ph != NULL) {
1004 		pdip = ph->ph_dip;
1005 		ASSERT(pdip != NULL);
1006 		*temp_pathname = '\0';
1007 		(void) ddi_pathname(pdip, temp_pathname);
1008 		if (strcmp(temp_pathname, pathname) == 0) {
1009 			break;
1010 		}
1011 		ph = ph->ph_next;
1012 	}
1013 	if (ph == NULL) {
1014 		pdip = NULL;
1015 	}
1016 	MDI_VHCI_PHCI_UNLOCK(vh);
1017 	kmem_free(temp_pathname, MAXPATHLEN);
1018 	return (pdip);
1019 }
1020 
1021 /*
1022  * mdi_phci_get_path_count():
1023  * 		get number of path information nodes associated with a given
1024  *		pHCI device.
1025  */
1026 int
mdi_phci_get_path_count(dev_info_t * pdip)1027 mdi_phci_get_path_count(dev_info_t *pdip)
1028 {
1029 	mdi_phci_t	*ph;
1030 	int		count = 0;
1031 
1032 	ph = i_devi_get_phci(pdip);
1033 	if (ph != NULL) {
1034 		count = ph->ph_path_count;
1035 	}
1036 	return (count);
1037 }
1038 
1039 /*
1040  * i_mdi_phci_lock():
1041  *		Lock a pHCI device
1042  * Return Values:
1043  *		None
1044  * Note:
1045  *		The default locking order is:
1046  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
1047  *		But there are number of situations where locks need to be
1048  *		grabbed in reverse order.  This routine implements try and lock
1049  *		mechanism depending on the requested parameter option.
1050  */
1051 static void
i_mdi_phci_lock(mdi_phci_t * ph,mdi_pathinfo_t * pip)1052 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
1053 {
1054 	if (pip) {
1055 		/* Reverse locking is requested. */
1056 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
1057 			if (servicing_interrupt()) {
1058 				MDI_PI_HOLD(pip);
1059 				MDI_PI_UNLOCK(pip);
1060 				MDI_PHCI_LOCK(ph);
1061 				MDI_PI_LOCK(pip);
1062 				MDI_PI_RELE(pip);
1063 				break;
1064 			} else {
1065 				/*
1066 				 * tryenter failed. Try to grab again
1067 				 * after a small delay
1068 				 */
1069 				MDI_PI_HOLD(pip);
1070 				MDI_PI_UNLOCK(pip);
1071 				delay_random(mdi_delay);
1072 				MDI_PI_LOCK(pip);
1073 				MDI_PI_RELE(pip);
1074 			}
1075 		}
1076 	} else {
1077 		MDI_PHCI_LOCK(ph);
1078 	}
1079 }
1080 
1081 /*
1082  * i_mdi_phci_unlock():
1083  *		Unlock the pHCI component
1084  */
1085 static void
i_mdi_phci_unlock(mdi_phci_t * ph)1086 i_mdi_phci_unlock(mdi_phci_t *ph)
1087 {
1088 	MDI_PHCI_UNLOCK(ph);
1089 }
1090 
1091 /*
1092  * i_mdi_devinfo_create():
1093  *		create client device's devinfo node
1094  * Return Values:
1095  *		dev_info
1096  *		NULL
1097  * Notes:
1098  */
1099 static dev_info_t *
i_mdi_devinfo_create(mdi_vhci_t * vh,char * name,char * guid,char ** compatible,int ncompatible)1100 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
1101 	char **compatible, int ncompatible)
1102 {
1103 	dev_info_t *cdip = NULL;
1104 
1105 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1106 
1107 	/* Verify for duplicate entry */
1108 	cdip = i_mdi_devinfo_find(vh, name, guid);
1109 	ASSERT(cdip == NULL);
1110 	if (cdip) {
1111 		cmn_err(CE_WARN,
1112 		    "i_mdi_devinfo_create: client %s@%s already exists",
1113 			name ? name : "", guid ? guid : "");
1114 	}
1115 
1116 	ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
1117 	if (cdip == NULL)
1118 		goto fail;
1119 
1120 	/*
1121 	 * Create component type and Global unique identifier
1122 	 * properties
1123 	 */
1124 	if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
1125 	    MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
1126 		goto fail;
1127 	}
1128 
1129 	/* Decorate the node with compatible property */
1130 	if (compatible &&
1131 	    (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
1132 	    "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
1133 		goto fail;
1134 	}
1135 
1136 	return (cdip);
1137 
1138 fail:
1139 	if (cdip) {
1140 		(void) ndi_prop_remove_all(cdip);
1141 		(void) ndi_devi_free(cdip);
1142 	}
1143 	return (NULL);
1144 }
1145 
1146 /*
1147  * i_mdi_devinfo_find():
1148  *		Find a matching devinfo node for given client node name
1149  *		and its guid.
1150  * Return Values:
1151  *		Handle to a dev_info node or NULL
1152  */
1153 static dev_info_t *
i_mdi_devinfo_find(mdi_vhci_t * vh,caddr_t name,char * guid)1154 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
1155 {
1156 	char			*data;
1157 	dev_info_t 		*cdip = NULL;
1158 	dev_info_t 		*ndip = NULL;
1159 	int			circular;
1160 
1161 	ndi_devi_enter(vh->vh_dip, &circular);
1162 	ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
1163 	while ((cdip = ndip) != NULL) {
1164 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1165 
1166 		if (strcmp(DEVI(cdip)->devi_node_name, name)) {
1167 			continue;
1168 		}
1169 
1170 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
1171 		    DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
1172 		    &data) != DDI_PROP_SUCCESS) {
1173 			continue;
1174 		}
1175 
1176 		if (strcmp(data, guid) != 0) {
1177 			ddi_prop_free(data);
1178 			continue;
1179 		}
1180 		ddi_prop_free(data);
1181 		break;
1182 	}
1183 	ndi_devi_exit(vh->vh_dip, circular);
1184 	return (cdip);
1185 }
1186 
1187 /*
1188  * i_mdi_devinfo_remove():
1189  *		Remove a client device node
1190  */
1191 static int
i_mdi_devinfo_remove(dev_info_t * vdip,dev_info_t * cdip,int flags)1192 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
1193 {
1194 	int	rv = MDI_SUCCESS;
1195 
1196 	if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
1197 	    (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
1198 		rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE);
1199 		if (rv != NDI_SUCCESS) {
1200 			MDI_DEBUG(1, (MDI_NOTE, cdip,
1201 			    "!failed: cdip %p", (void *)cdip));
1202 		}
1203 		/*
1204 		 * Convert to MDI error code
1205 		 */
1206 		switch (rv) {
1207 		case NDI_SUCCESS:
1208 			rv = MDI_SUCCESS;
1209 			break;
1210 		case NDI_BUSY:
1211 			rv = MDI_BUSY;
1212 			break;
1213 		default:
1214 			rv = MDI_FAILURE;
1215 			break;
1216 		}
1217 	}
1218 	return (rv);
1219 }
1220 
1221 /*
1222  * i_devi_get_client()
1223  *		Utility function to get mpxio component extensions
1224  */
1225 static mdi_client_t *
i_devi_get_client(dev_info_t * cdip)1226 i_devi_get_client(dev_info_t *cdip)
1227 {
1228 	mdi_client_t	*ct = NULL;
1229 
1230 	if (MDI_CLIENT(cdip)) {
1231 		ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1232 	}
1233 	return (ct);
1234 }
1235 
1236 /*
1237  * i_mdi_is_child_present():
1238  *		Search for the presence of client device dev_info node
1239  */
1240 static int
i_mdi_is_child_present(dev_info_t * vdip,dev_info_t * cdip)1241 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1242 {
1243 	int		rv = MDI_FAILURE;
1244 	struct dev_info	*dip;
1245 	int		circular;
1246 
1247 	ndi_devi_enter(vdip, &circular);
1248 	dip = DEVI(vdip)->devi_child;
1249 	while (dip) {
1250 		if (dip == DEVI(cdip)) {
1251 			rv = MDI_SUCCESS;
1252 			break;
1253 		}
1254 		dip = dip->devi_sibling;
1255 	}
1256 	ndi_devi_exit(vdip, circular);
1257 	return (rv);
1258 }
1259 
1260 
1261 /*
1262  * i_mdi_client_lock():
1263  *		Grab client component lock
1264  * Return Values:
1265  *		None
1266  * Note:
1267  *		The default locking order is:
1268  *		_NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1269  *		But there are number of situations where locks need to be
1270  *		grabbed in reverse order.  This routine implements try and lock
1271  *		mechanism depending on the requested parameter option.
1272  */
1273 static void
i_mdi_client_lock(mdi_client_t * ct,mdi_pathinfo_t * pip)1274 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1275 {
1276 	if (pip) {
1277 		/*
1278 		 * Reverse locking is requested.
1279 		 */
1280 		while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1281 			if (servicing_interrupt()) {
1282 				MDI_PI_HOLD(pip);
1283 				MDI_PI_UNLOCK(pip);
1284 				MDI_CLIENT_LOCK(ct);
1285 				MDI_PI_LOCK(pip);
1286 				MDI_PI_RELE(pip);
1287 				break;
1288 			} else {
1289 				/*
1290 				 * tryenter failed. Try to grab again
1291 				 * after a small delay
1292 				 */
1293 				MDI_PI_HOLD(pip);
1294 				MDI_PI_UNLOCK(pip);
1295 				delay_random(mdi_delay);
1296 				MDI_PI_LOCK(pip);
1297 				MDI_PI_RELE(pip);
1298 			}
1299 		}
1300 	} else {
1301 		MDI_CLIENT_LOCK(ct);
1302 	}
1303 }
1304 
1305 /*
1306  * i_mdi_client_unlock():
1307  *		Unlock a client component
1308  */
1309 static void
i_mdi_client_unlock(mdi_client_t * ct)1310 i_mdi_client_unlock(mdi_client_t *ct)
1311 {
1312 	MDI_CLIENT_UNLOCK(ct);
1313 }
1314 
1315 /*
1316  * i_mdi_client_alloc():
1317  * 		Allocate and initialize a client structure.  Caller should
1318  *		hold the vhci client lock.
1319  * Return Values:
1320  *		Handle to a client component
1321  */
1322 /*ARGSUSED*/
1323 static mdi_client_t *
i_mdi_client_alloc(mdi_vhci_t * vh,char * name,char * lguid)1324 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1325 {
1326 	mdi_client_t	*ct;
1327 
1328 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1329 
1330 	/*
1331 	 * Allocate and initialize a component structure.
1332 	 */
1333 	ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1334 	mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1335 	ct->ct_hnext = NULL;
1336 	ct->ct_hprev = NULL;
1337 	ct->ct_dip = NULL;
1338 	ct->ct_vhci = vh;
1339 	ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1340 	(void) strcpy(ct->ct_drvname, name);
1341 	ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1342 	(void) strcpy(ct->ct_guid, lguid);
1343 	ct->ct_cprivate = NULL;
1344 	ct->ct_vprivate = NULL;
1345 	ct->ct_flags = 0;
1346 	ct->ct_state = MDI_CLIENT_STATE_FAILED;
1347 	MDI_CLIENT_LOCK(ct);
1348 	MDI_CLIENT_SET_OFFLINE(ct);
1349 	MDI_CLIENT_SET_DETACH(ct);
1350 	MDI_CLIENT_SET_POWER_UP(ct);
1351 	MDI_CLIENT_UNLOCK(ct);
1352 	ct->ct_failover_flags = 0;
1353 	ct->ct_failover_status = 0;
1354 	cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1355 	ct->ct_unstable = 0;
1356 	cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1357 	cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1358 	ct->ct_lb = vh->vh_lb;
1359 	ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1360 	ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1361 	ct->ct_path_count = 0;
1362 	ct->ct_path_head = NULL;
1363 	ct->ct_path_tail = NULL;
1364 	ct->ct_path_last = NULL;
1365 
1366 	/*
1367 	 * Add this client component to our client hash queue
1368 	 */
1369 	i_mdi_client_enlist_table(vh, ct);
1370 	return (ct);
1371 }
1372 
1373 /*
1374  * i_mdi_client_enlist_table():
1375  *		Attach the client device to the client hash table. Caller
1376  *		should hold the vhci client lock.
1377  */
1378 static void
i_mdi_client_enlist_table(mdi_vhci_t * vh,mdi_client_t * ct)1379 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1380 {
1381 	int 			index;
1382 	struct client_hash	*head;
1383 
1384 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1385 
1386 	index = i_mdi_get_hash_key(ct->ct_guid);
1387 	head = &vh->vh_client_table[index];
1388 	ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1389 	head->ct_hash_head = ct;
1390 	head->ct_hash_count++;
1391 	vh->vh_client_count++;
1392 }
1393 
1394 /*
1395  * i_mdi_client_delist_table():
1396  *		Attach the client device to the client hash table.
1397  *		Caller should hold the vhci client lock.
1398  */
1399 static void
i_mdi_client_delist_table(mdi_vhci_t * vh,mdi_client_t * ct)1400 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1401 {
1402 	int			index;
1403 	char			*guid;
1404 	struct client_hash 	*head;
1405 	mdi_client_t		*next;
1406 	mdi_client_t		*last;
1407 
1408 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1409 
1410 	guid = ct->ct_guid;
1411 	index = i_mdi_get_hash_key(guid);
1412 	head = &vh->vh_client_table[index];
1413 
1414 	last = NULL;
1415 	next = (mdi_client_t *)head->ct_hash_head;
1416 	while (next != NULL) {
1417 		if (next == ct) {
1418 			break;
1419 		}
1420 		last = next;
1421 		next = next->ct_hnext;
1422 	}
1423 
1424 	if (next) {
1425 		head->ct_hash_count--;
1426 		if (last == NULL) {
1427 			head->ct_hash_head = ct->ct_hnext;
1428 		} else {
1429 			last->ct_hnext = ct->ct_hnext;
1430 		}
1431 		ct->ct_hnext = NULL;
1432 		vh->vh_client_count--;
1433 	}
1434 }
1435 
1436 
1437 /*
1438  * i_mdi_client_free():
1439  *		Free a client component
1440  */
1441 static int
i_mdi_client_free(mdi_vhci_t * vh,mdi_client_t * ct)1442 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1443 {
1444 	int		rv = MDI_SUCCESS;
1445 	int		flags = ct->ct_flags;
1446 	dev_info_t	*cdip;
1447 	dev_info_t	*vdip;
1448 
1449 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1450 
1451 	vdip = vh->vh_dip;
1452 	cdip = ct->ct_dip;
1453 
1454 	(void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1455 	DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1456 	DEVI(cdip)->devi_mdi_client = NULL;
1457 
1458 	/*
1459 	 * Clear out back ref. to dev_info_t node
1460 	 */
1461 	ct->ct_dip = NULL;
1462 
1463 	/*
1464 	 * Remove this client from our hash queue
1465 	 */
1466 	i_mdi_client_delist_table(vh, ct);
1467 
1468 	/*
1469 	 * Uninitialize and free the component
1470 	 */
1471 	kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1472 	kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1473 	kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1474 	cv_destroy(&ct->ct_failover_cv);
1475 	cv_destroy(&ct->ct_unstable_cv);
1476 	cv_destroy(&ct->ct_powerchange_cv);
1477 	mutex_destroy(&ct->ct_mutex);
1478 	kmem_free(ct, sizeof (*ct));
1479 
1480 	if (cdip != NULL) {
1481 		MDI_VHCI_CLIENT_UNLOCK(vh);
1482 		(void) i_mdi_devinfo_remove(vdip, cdip, flags);
1483 		MDI_VHCI_CLIENT_LOCK(vh);
1484 	}
1485 	return (rv);
1486 }
1487 
1488 /*
1489  * i_mdi_client_find():
1490  * 		Find the client structure corresponding to a given guid
1491  *		Caller should hold the vhci client lock.
1492  */
1493 static mdi_client_t *
i_mdi_client_find(mdi_vhci_t * vh,char * cname,char * guid)1494 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1495 {
1496 	int			index;
1497 	struct client_hash	*head;
1498 	mdi_client_t		*ct;
1499 
1500 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1501 
1502 	index = i_mdi_get_hash_key(guid);
1503 	head = &vh->vh_client_table[index];
1504 
1505 	ct = head->ct_hash_head;
1506 	while (ct != NULL) {
1507 		if (strcmp(ct->ct_guid, guid) == 0 &&
1508 		    (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1509 			break;
1510 		}
1511 		ct = ct->ct_hnext;
1512 	}
1513 	return (ct);
1514 }
1515 
1516 /*
1517  * i_mdi_client_update_state():
1518  *		Compute and update client device state
1519  * Notes:
1520  *		A client device can be in any of three possible states:
1521  *
1522  *		MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1523  *		one online/standby paths. Can tolerate failures.
1524  *		MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1525  *		no alternate paths available as standby. A failure on the online
1526  *		would result in loss of access to device data.
1527  *		MDI_CLIENT_STATE_FAILED - Client device in failed state with
1528  *		no paths available to access the device.
1529  */
1530 static void
i_mdi_client_update_state(mdi_client_t * ct)1531 i_mdi_client_update_state(mdi_client_t *ct)
1532 {
1533 	int state;
1534 
1535 	ASSERT(MDI_CLIENT_LOCKED(ct));
1536 	state = i_mdi_client_compute_state(ct, NULL);
1537 	MDI_CLIENT_SET_STATE(ct, state);
1538 }
1539 
1540 /*
1541  * i_mdi_client_compute_state():
1542  *		Compute client device state
1543  *
1544  *		mdi_phci_t *	Pointer to pHCI structure which should
1545  *				while computing the new value.  Used by
1546  *				i_mdi_phci_offline() to find the new
1547  *				client state after DR of a pHCI.
1548  */
1549 static int
i_mdi_client_compute_state(mdi_client_t * ct,mdi_phci_t * ph)1550 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1551 {
1552 	int		state;
1553 	int		online_count = 0;
1554 	int		standby_count = 0;
1555 	mdi_pathinfo_t	*pip, *next;
1556 
1557 	ASSERT(MDI_CLIENT_LOCKED(ct));
1558 	pip = ct->ct_path_head;
1559 	while (pip != NULL) {
1560 		MDI_PI_LOCK(pip);
1561 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1562 		if (MDI_PI(pip)->pi_phci == ph) {
1563 			MDI_PI_UNLOCK(pip);
1564 			pip = next;
1565 			continue;
1566 		}
1567 
1568 		if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1569 				== MDI_PATHINFO_STATE_ONLINE)
1570 			online_count++;
1571 		else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1572 				== MDI_PATHINFO_STATE_STANDBY)
1573 			standby_count++;
1574 		MDI_PI_UNLOCK(pip);
1575 		pip = next;
1576 	}
1577 
1578 	if (online_count == 0) {
1579 		if (standby_count == 0) {
1580 			state = MDI_CLIENT_STATE_FAILED;
1581 			MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
1582 			    "client state failed: ct = %p", (void *)ct));
1583 		} else if (standby_count == 1) {
1584 			state = MDI_CLIENT_STATE_DEGRADED;
1585 		} else {
1586 			state = MDI_CLIENT_STATE_OPTIMAL;
1587 		}
1588 	} else if (online_count == 1) {
1589 		if (standby_count == 0) {
1590 			state = MDI_CLIENT_STATE_DEGRADED;
1591 		} else {
1592 			state = MDI_CLIENT_STATE_OPTIMAL;
1593 		}
1594 	} else {
1595 		state = MDI_CLIENT_STATE_OPTIMAL;
1596 	}
1597 	return (state);
1598 }
1599 
1600 /*
1601  * i_mdi_client2devinfo():
1602  *		Utility function
1603  */
1604 dev_info_t *
i_mdi_client2devinfo(mdi_client_t * ct)1605 i_mdi_client2devinfo(mdi_client_t *ct)
1606 {
1607 	return (ct->ct_dip);
1608 }
1609 
1610 /*
1611  * mdi_client_path2_devinfo():
1612  * 		Given the parent devinfo and child devfs pathname, search for
1613  *		a valid devfs node handle.
1614  */
1615 dev_info_t *
mdi_client_path2devinfo(dev_info_t * vdip,char * pathname)1616 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1617 {
1618 	dev_info_t 	*cdip = NULL;
1619 	dev_info_t 	*ndip = NULL;
1620 	char		*temp_pathname;
1621 	int		circular;
1622 
1623 	/*
1624 	 * Allocate temp buffer
1625 	 */
1626 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1627 
1628 	/*
1629 	 * Lock parent against changes
1630 	 */
1631 	ndi_devi_enter(vdip, &circular);
1632 	ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1633 	while ((cdip = ndip) != NULL) {
1634 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1635 
1636 		*temp_pathname = '\0';
1637 		(void) ddi_pathname(cdip, temp_pathname);
1638 		if (strcmp(temp_pathname, pathname) == 0) {
1639 			break;
1640 		}
1641 	}
1642 	/*
1643 	 * Release devinfo lock
1644 	 */
1645 	ndi_devi_exit(vdip, circular);
1646 
1647 	/*
1648 	 * Free the temp buffer
1649 	 */
1650 	kmem_free(temp_pathname, MAXPATHLEN);
1651 	return (cdip);
1652 }
1653 
1654 /*
1655  * mdi_client_get_path_count():
1656  * 		Utility function to get number of path information nodes
1657  *		associated with a given client device.
1658  */
1659 int
mdi_client_get_path_count(dev_info_t * cdip)1660 mdi_client_get_path_count(dev_info_t *cdip)
1661 {
1662 	mdi_client_t	*ct;
1663 	int		count = 0;
1664 
1665 	ct = i_devi_get_client(cdip);
1666 	if (ct != NULL) {
1667 		count = ct->ct_path_count;
1668 	}
1669 	return (count);
1670 }
1671 
1672 
1673 /*
1674  * i_mdi_get_hash_key():
1675  * 		Create a hash using strings as keys
1676  *
1677  */
1678 static int
i_mdi_get_hash_key(char * str)1679 i_mdi_get_hash_key(char *str)
1680 {
1681 	uint32_t	g, hash = 0;
1682 	char		*p;
1683 
1684 	for (p = str; *p != '\0'; p++) {
1685 		g = *p;
1686 		hash += g;
1687 	}
1688 	return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1689 }
1690 
1691 /*
1692  * mdi_get_lb_policy():
1693  * 		Get current load balancing policy for a given client device
1694  */
1695 client_lb_t
mdi_get_lb_policy(dev_info_t * cdip)1696 mdi_get_lb_policy(dev_info_t *cdip)
1697 {
1698 	client_lb_t	lb = LOAD_BALANCE_NONE;
1699 	mdi_client_t	*ct;
1700 
1701 	ct = i_devi_get_client(cdip);
1702 	if (ct != NULL) {
1703 		lb = ct->ct_lb;
1704 	}
1705 	return (lb);
1706 }
1707 
1708 /*
1709  * mdi_set_lb_region_size():
1710  * 		Set current region size for the load-balance
1711  */
1712 int
mdi_set_lb_region_size(dev_info_t * cdip,int region_size)1713 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1714 {
1715 	mdi_client_t	*ct;
1716 	int		rv = MDI_FAILURE;
1717 
1718 	ct = i_devi_get_client(cdip);
1719 	if (ct != NULL && ct->ct_lb_args != NULL) {
1720 		ct->ct_lb_args->region_size = region_size;
1721 		rv = MDI_SUCCESS;
1722 	}
1723 	return (rv);
1724 }
1725 
1726 /*
1727  * mdi_Set_lb_policy():
1728  * 		Set current load balancing policy for a given client device
1729  */
1730 int
mdi_set_lb_policy(dev_info_t * cdip,client_lb_t lb)1731 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1732 {
1733 	mdi_client_t	*ct;
1734 	int		rv = MDI_FAILURE;
1735 
1736 	ct = i_devi_get_client(cdip);
1737 	if (ct != NULL) {
1738 		ct->ct_lb = lb;
1739 		rv = MDI_SUCCESS;
1740 	}
1741 	return (rv);
1742 }
1743 
1744 /*
1745  * mdi_failover():
1746  *		failover function called by the vHCI drivers to initiate
1747  *		a failover operation.  This is typically due to non-availability
1748  *		of online paths to route I/O requests.  Failover can be
1749  *		triggered through user application also.
1750  *
1751  *		The vHCI driver calls mdi_failover() to initiate a failover
1752  *		operation. mdi_failover() calls back into the vHCI driver's
1753  *		vo_failover() entry point to perform the actual failover
1754  *		operation.  The reason for requiring the vHCI driver to
1755  *		initiate failover by calling mdi_failover(), instead of directly
1756  *		executing vo_failover() itself, is to ensure that the mdi
1757  *		framework can keep track of the client state properly.
1758  *		Additionally, mdi_failover() provides as a convenience the
1759  *		option of performing the failover operation synchronously or
1760  *		asynchronously
1761  *
1762  *		Upon successful completion of the failover operation, the
1763  *		paths that were previously ONLINE will be in the STANDBY state,
1764  *		and the newly activated paths will be in the ONLINE state.
1765  *
1766  *		The flags modifier determines whether the activation is done
1767  *		synchronously: MDI_FAILOVER_SYNC
1768  * Return Values:
1769  *		MDI_SUCCESS
1770  *		MDI_FAILURE
1771  *		MDI_BUSY
1772  */
1773 /*ARGSUSED*/
1774 int
mdi_failover(dev_info_t * vdip,dev_info_t * cdip,int flags)1775 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1776 {
1777 	int			rv;
1778 	mdi_client_t		*ct;
1779 
1780 	ct = i_devi_get_client(cdip);
1781 	ASSERT(ct != NULL);
1782 	if (ct == NULL) {
1783 		/* cdip is not a valid client device. Nothing more to do. */
1784 		return (MDI_FAILURE);
1785 	}
1786 
1787 	MDI_CLIENT_LOCK(ct);
1788 
1789 	if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1790 		/* A path to the client is being freed */
1791 		MDI_CLIENT_UNLOCK(ct);
1792 		return (MDI_BUSY);
1793 	}
1794 
1795 
1796 	if (MDI_CLIENT_IS_FAILED(ct)) {
1797 		/*
1798 		 * Client is in failed state. Nothing more to do.
1799 		 */
1800 		MDI_CLIENT_UNLOCK(ct);
1801 		return (MDI_FAILURE);
1802 	}
1803 
1804 	if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1805 		/*
1806 		 * Failover is already in progress; return BUSY
1807 		 */
1808 		MDI_CLIENT_UNLOCK(ct);
1809 		return (MDI_BUSY);
1810 	}
1811 	/*
1812 	 * Make sure that mdi_pathinfo node state changes are processed.
1813 	 * We do not allow failovers to progress while client path state
1814 	 * changes are in progress
1815 	 */
1816 	if (ct->ct_unstable) {
1817 		if (flags == MDI_FAILOVER_ASYNC) {
1818 			MDI_CLIENT_UNLOCK(ct);
1819 			return (MDI_BUSY);
1820 		} else {
1821 			while (ct->ct_unstable)
1822 				cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1823 		}
1824 	}
1825 
1826 	/*
1827 	 * Client device is in stable state. Before proceeding, perform sanity
1828 	 * checks again.
1829 	 */
1830 	if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1831 	    (!i_ddi_devi_attached(cdip))) {
1832 		/*
1833 		 * Client is in failed state. Nothing more to do.
1834 		 */
1835 		MDI_CLIENT_UNLOCK(ct);
1836 		return (MDI_FAILURE);
1837 	}
1838 
1839 	/*
1840 	 * Set the client state as failover in progress.
1841 	 */
1842 	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1843 	ct->ct_failover_flags = flags;
1844 	MDI_CLIENT_UNLOCK(ct);
1845 
1846 	if (flags == MDI_FAILOVER_ASYNC) {
1847 		/*
1848 		 * Submit the initiate failover request via CPR safe
1849 		 * taskq threads.
1850 		 */
1851 		(void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1852 		    ct, KM_SLEEP);
1853 		return (MDI_ACCEPT);
1854 	} else {
1855 		/*
1856 		 * Synchronous failover mode.  Typically invoked from the user
1857 		 * land.
1858 		 */
1859 		rv = i_mdi_failover(ct);
1860 	}
1861 	return (rv);
1862 }
1863 
1864 /*
1865  * i_mdi_failover():
1866  *		internal failover function. Invokes vHCI drivers failover
1867  *		callback function and process the failover status
1868  * Return Values:
1869  *		None
1870  *
1871  * Note: A client device in failover state can not be detached or freed.
1872  */
1873 static int
i_mdi_failover(void * arg)1874 i_mdi_failover(void *arg)
1875 {
1876 	int		rv = MDI_SUCCESS;
1877 	mdi_client_t	*ct = (mdi_client_t *)arg;
1878 	mdi_vhci_t	*vh = ct->ct_vhci;
1879 
1880 	ASSERT(!MDI_CLIENT_LOCKED(ct));
1881 
1882 	if (vh->vh_ops->vo_failover != NULL) {
1883 		/*
1884 		 * Call vHCI drivers callback routine
1885 		 */
1886 		rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1887 		    ct->ct_failover_flags);
1888 	}
1889 
1890 	MDI_CLIENT_LOCK(ct);
1891 	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1892 
1893 	/*
1894 	 * Save the failover return status
1895 	 */
1896 	ct->ct_failover_status = rv;
1897 
1898 	/*
1899 	 * As a result of failover, client status would have been changed.
1900 	 * Update the client state and wake up anyone waiting on this client
1901 	 * device.
1902 	 */
1903 	i_mdi_client_update_state(ct);
1904 
1905 	cv_broadcast(&ct->ct_failover_cv);
1906 	MDI_CLIENT_UNLOCK(ct);
1907 	return (rv);
1908 }
1909 
1910 /*
1911  * Load balancing is logical block.
1912  * IOs within the range described by region_size
1913  * would go on the same path. This would improve the
1914  * performance by cache-hit on some of the RAID devices.
1915  * Search only for online paths(At some point we
1916  * may want to balance across target ports).
1917  * If no paths are found then default to round-robin.
1918  */
1919 static int
i_mdi_lba_lb(mdi_client_t * ct,mdi_pathinfo_t ** ret_pip,struct buf * bp)1920 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1921 {
1922 	int		path_index = -1;
1923 	int		online_path_count = 0;
1924 	int		online_nonpref_path_count = 0;
1925 	int 		region_size = ct->ct_lb_args->region_size;
1926 	mdi_pathinfo_t	*pip;
1927 	mdi_pathinfo_t	*next;
1928 	int		preferred, path_cnt;
1929 
1930 	pip = ct->ct_path_head;
1931 	while (pip) {
1932 		MDI_PI_LOCK(pip);
1933 		if (MDI_PI(pip)->pi_state ==
1934 		    MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1935 			online_path_count++;
1936 		} else if (MDI_PI(pip)->pi_state ==
1937 		    MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1938 			online_nonpref_path_count++;
1939 		}
1940 		next = (mdi_pathinfo_t *)
1941 		    MDI_PI(pip)->pi_client_link;
1942 		MDI_PI_UNLOCK(pip);
1943 		pip = next;
1944 	}
1945 	/* if found any online/preferred then use this type */
1946 	if (online_path_count > 0) {
1947 		path_cnt = online_path_count;
1948 		preferred = 1;
1949 	} else if (online_nonpref_path_count > 0) {
1950 		path_cnt = online_nonpref_path_count;
1951 		preferred = 0;
1952 	} else {
1953 		path_cnt = 0;
1954 	}
1955 	if (path_cnt) {
1956 		path_index = (bp->b_blkno >> region_size) % path_cnt;
1957 		pip = ct->ct_path_head;
1958 		while (pip && path_index != -1) {
1959 			MDI_PI_LOCK(pip);
1960 			if (path_index == 0 &&
1961 			    (MDI_PI(pip)->pi_state ==
1962 			    MDI_PATHINFO_STATE_ONLINE) &&
1963 				MDI_PI(pip)->pi_preferred == preferred) {
1964 				MDI_PI_HOLD(pip);
1965 				MDI_PI_UNLOCK(pip);
1966 				*ret_pip = pip;
1967 				return (MDI_SUCCESS);
1968 			}
1969 			path_index --;
1970 			next = (mdi_pathinfo_t *)
1971 			    MDI_PI(pip)->pi_client_link;
1972 			MDI_PI_UNLOCK(pip);
1973 			pip = next;
1974 		}
1975 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
1976 		    "lba %llx: path %s %p",
1977 		    bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip));
1978 	}
1979 	return (MDI_FAILURE);
1980 }
1981 
1982 /*
1983  * mdi_select_path():
1984  *		select a path to access a client device.
1985  *
1986  *		mdi_select_path() function is called by the vHCI drivers to
1987  *		select a path to route the I/O request to.  The caller passes
1988  *		the block I/O data transfer structure ("buf") as one of the
1989  *		parameters.  The mpxio framework uses the buf structure
1990  *		contents to maintain per path statistics (total I/O size /
1991  *		count pending).  If more than one online paths are available to
1992  *		select, the framework automatically selects a suitable path
1993  *		for routing I/O request. If a failover operation is active for
1994  *		this client device the call shall be failed with MDI_BUSY error
1995  *		code.
1996  *
1997  *		By default this function returns a suitable path in online
1998  *		state based on the current load balancing policy.  Currently
1999  *		we support LOAD_BALANCE_NONE (Previously selected online path
2000  *		will continue to be used till the path is usable) and
2001  *		LOAD_BALANCE_RR (Online paths will be selected in a round
2002  *		robin fashion), LOAD_BALANCE_LB(Online paths will be selected
2003  *		based on the logical block).  The load balancing
2004  *		through vHCI drivers configuration file (driver.conf).
2005  *
2006  *		vHCI drivers may override this default behavior by specifying
2007  *		appropriate flags.  The meaning of the thrid argument depends
2008  *		on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
2009  *		then the argument is the "path instance" of the path to select.
2010  *		If MDI_SELECT_PATH_INSTANCE is not set then the argument is
2011  *		"start_pip". A non NULL "start_pip" is the starting point to
2012  *		walk and find the next appropriate path.  The following values
2013  *		are currently defined: MDI_SELECT_ONLINE_PATH (to select an
2014  *		ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
2015  *		STANDBY path).
2016  *
2017  *		The non-standard behavior is used by the scsi_vhci driver,
2018  *		whenever it has to use a STANDBY/FAULTED path.  Eg. during
2019  *		attach of client devices (to avoid an unnecessary failover
2020  *		when the STANDBY path comes up first), during failover
2021  *		(to activate a STANDBY path as ONLINE).
2022  *
2023  *		The selected path is returned in a a mdi_hold_path() state
2024  *		(pi_ref_cnt). Caller should release the hold by calling
2025  *		mdi_rele_path().
2026  *
2027  * Return Values:
2028  *		MDI_SUCCESS	- Completed successfully
2029  *		MDI_BUSY 	- Client device is busy failing over
2030  *		MDI_NOPATH	- Client device is online, but no valid path are
2031  *				  available to access this client device
2032  *		MDI_FAILURE	- Invalid client device or state
2033  *		MDI_DEVI_ONLINING
2034  *				- Client device (struct dev_info state) is in
2035  *				  onlining state.
2036  */
2037 
2038 /*ARGSUSED*/
2039 int
mdi_select_path(dev_info_t * cdip,struct buf * bp,int flags,void * arg,mdi_pathinfo_t ** ret_pip)2040 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
2041     void *arg, mdi_pathinfo_t **ret_pip)
2042 {
2043 	mdi_client_t	*ct;
2044 	mdi_pathinfo_t	*pip;
2045 	mdi_pathinfo_t	*next;
2046 	mdi_pathinfo_t	*head;
2047 	mdi_pathinfo_t	*start;
2048 	client_lb_t	lbp;	/* load balancing policy */
2049 	int		sb = 1;	/* standard behavior */
2050 	int		preferred = 1;	/* preferred path */
2051 	int		cond, cont = 1;
2052 	int		retry = 0;
2053 	mdi_pathinfo_t	*start_pip;	/* request starting pathinfo */
2054 	int		path_instance;	/* request specific path instance */
2055 
2056 	/* determine type of arg based on flags */
2057 	if (flags & MDI_SELECT_PATH_INSTANCE) {
2058 		path_instance = (int)(intptr_t)arg;
2059 		start_pip = NULL;
2060 	} else {
2061 		path_instance = 0;
2062 		start_pip = (mdi_pathinfo_t *)arg;
2063 	}
2064 
2065 	if (flags != 0) {
2066 		/*
2067 		 * disable default behavior
2068 		 */
2069 		sb = 0;
2070 	}
2071 
2072 	*ret_pip = NULL;
2073 	ct = i_devi_get_client(cdip);
2074 	if (ct == NULL) {
2075 		/* mdi extensions are NULL, Nothing more to do */
2076 		return (MDI_FAILURE);
2077 	}
2078 
2079 	MDI_CLIENT_LOCK(ct);
2080 
2081 	if (sb) {
2082 		if (MDI_CLIENT_IS_FAILED(ct)) {
2083 			/*
2084 			 * Client is not ready to accept any I/O requests.
2085 			 * Fail this request.
2086 			 */
2087 			MDI_DEBUG(2, (MDI_NOTE, cdip,
2088 			    "client state offline ct = %p", (void *)ct));
2089 			MDI_CLIENT_UNLOCK(ct);
2090 			return (MDI_FAILURE);
2091 		}
2092 
2093 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
2094 			/*
2095 			 * Check for Failover is in progress. If so tell the
2096 			 * caller that this device is busy.
2097 			 */
2098 			MDI_DEBUG(2, (MDI_NOTE, cdip,
2099 			    "client failover in progress ct = %p",
2100 			    (void *)ct));
2101 			MDI_CLIENT_UNLOCK(ct);
2102 			return (MDI_BUSY);
2103 		}
2104 
2105 		/*
2106 		 * Check to see whether the client device is attached.
2107 		 * If not so, let the vHCI driver manually select a path
2108 		 * (standby) and let the probe/attach process to continue.
2109 		 */
2110 		if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
2111 			MDI_DEBUG(4, (MDI_NOTE, cdip,
2112 			    "devi is onlining ct = %p", (void *)ct));
2113 			MDI_CLIENT_UNLOCK(ct);
2114 			return (MDI_DEVI_ONLINING);
2115 		}
2116 	}
2117 
2118 	/*
2119 	 * Cache in the client list head.  If head of the list is NULL
2120 	 * return MDI_NOPATH
2121 	 */
2122 	head = ct->ct_path_head;
2123 	if (head == NULL) {
2124 		MDI_CLIENT_UNLOCK(ct);
2125 		return (MDI_NOPATH);
2126 	}
2127 
2128 	/* Caller is specifying a specific pathinfo path by path_instance */
2129 	if (path_instance) {
2130 		/* search for pathinfo with correct path_instance */
2131 		for (pip = head;
2132 		    pip && (mdi_pi_get_path_instance(pip) != path_instance);
2133 		    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
2134 			;
2135 
2136 		/* If path can't be selected then MDI_NOPATH is returned. */
2137 		if (pip == NULL) {
2138 			MDI_CLIENT_UNLOCK(ct);
2139 			return (MDI_NOPATH);
2140 		}
2141 
2142 		/*
2143 		 * Verify state of path. When asked to select a specific
2144 		 * path_instance, we select the requested path in any
2145 		 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT.
2146 		 * We don't however select paths where the pHCI has detached.
2147 		 * NOTE: last pathinfo node of an opened client device may
2148 		 * exist in an OFFLINE state after the pHCI associated with
2149 		 * that path has detached (but pi_phci will be NULL if that
2150 		 * has occurred).
2151 		 */
2152 		MDI_PI_LOCK(pip);
2153 		if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) ||
2154 		    (MDI_PI(pip)->pi_phci == NULL)) {
2155 			MDI_PI_UNLOCK(pip);
2156 			MDI_CLIENT_UNLOCK(ct);
2157 			return (MDI_FAILURE);
2158 		}
2159 
2160 		/* Return MDI_BUSY if we have a transient condition */
2161 		if (MDI_PI_IS_TRANSIENT(pip)) {
2162 			MDI_PI_UNLOCK(pip);
2163 			MDI_CLIENT_UNLOCK(ct);
2164 			return (MDI_BUSY);
2165 		}
2166 
2167 		/*
2168 		 * Return the path in hold state. Caller should release the
2169 		 * lock by calling mdi_rele_path()
2170 		 */
2171 		MDI_PI_HOLD(pip);
2172 		MDI_PI_UNLOCK(pip);
2173 		*ret_pip = pip;
2174 		MDI_CLIENT_UNLOCK(ct);
2175 		return (MDI_SUCCESS);
2176 	}
2177 
2178 	/*
2179 	 * for non default behavior, bypass current
2180 	 * load balancing policy and always use LOAD_BALANCE_RR
2181 	 * except that the start point will be adjusted based
2182 	 * on the provided start_pip
2183 	 */
2184 	lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
2185 
2186 	switch (lbp) {
2187 	case LOAD_BALANCE_NONE:
2188 		/*
2189 		 * Load balancing is None  or Alternate path mode
2190 		 * Start looking for a online mdi_pathinfo node starting from
2191 		 * last known selected path
2192 		 */
2193 		preferred = 1;
2194 		pip = (mdi_pathinfo_t *)ct->ct_path_last;
2195 		if (pip == NULL) {
2196 			pip = head;
2197 		}
2198 		start = pip;
2199 		do {
2200 			MDI_PI_LOCK(pip);
2201 			/*
2202 			 * No need to explicitly check if the path is disabled.
2203 			 * Since we are checking for state == ONLINE and the
2204 			 * same variable is used for DISABLE/ENABLE information.
2205 			 */
2206 			if ((MDI_PI(pip)->pi_state  ==
2207 				MDI_PATHINFO_STATE_ONLINE) &&
2208 				preferred == MDI_PI(pip)->pi_preferred) {
2209 				/*
2210 				 * Return the path in hold state. Caller should
2211 				 * release the lock by calling mdi_rele_path()
2212 				 */
2213 				MDI_PI_HOLD(pip);
2214 				MDI_PI_UNLOCK(pip);
2215 				ct->ct_path_last = pip;
2216 				*ret_pip = pip;
2217 				MDI_CLIENT_UNLOCK(ct);
2218 				return (MDI_SUCCESS);
2219 			}
2220 
2221 			/*
2222 			 * Path is busy.
2223 			 */
2224 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2225 			    MDI_PI_IS_TRANSIENT(pip))
2226 				retry = 1;
2227 			/*
2228 			 * Keep looking for a next available online path
2229 			 */
2230 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2231 			if (next == NULL) {
2232 				next = head;
2233 			}
2234 			MDI_PI_UNLOCK(pip);
2235 			pip = next;
2236 			if (start == pip && preferred) {
2237 				preferred = 0;
2238 			} else if (start == pip && !preferred) {
2239 				cont = 0;
2240 			}
2241 		} while (cont);
2242 		break;
2243 
2244 	case LOAD_BALANCE_LBA:
2245 		/*
2246 		 * Make sure we are looking
2247 		 * for an online path. Otherwise, if it is for a STANDBY
2248 		 * path request, it will go through and fetch an ONLINE
2249 		 * path which is not desirable.
2250 		 */
2251 		if ((ct->ct_lb_args != NULL) &&
2252 			    (ct->ct_lb_args->region_size) && bp &&
2253 				(sb || (flags == MDI_SELECT_ONLINE_PATH))) {
2254 			if (i_mdi_lba_lb(ct, ret_pip, bp)
2255 				    == MDI_SUCCESS) {
2256 				MDI_CLIENT_UNLOCK(ct);
2257 				return (MDI_SUCCESS);
2258 			}
2259 		}
2260 		/* FALLTHROUGH */
2261 	case LOAD_BALANCE_RR:
2262 		/*
2263 		 * Load balancing is Round Robin. Start looking for a online
2264 		 * mdi_pathinfo node starting from last known selected path
2265 		 * as the start point.  If override flags are specified,
2266 		 * process accordingly.
2267 		 * If the search is already in effect(start_pip not null),
2268 		 * then lets just use the same path preference to continue the
2269 		 * traversal.
2270 		 */
2271 
2272 		if (start_pip != NULL) {
2273 			preferred = MDI_PI(start_pip)->pi_preferred;
2274 		} else {
2275 			preferred = 1;
2276 		}
2277 
2278 		start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
2279 		if (start == NULL) {
2280 			pip = head;
2281 		} else {
2282 			pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
2283 			if (pip == NULL) {
2284 				if ( flags & MDI_SELECT_NO_PREFERRED) {
2285 					/*
2286 					 * Return since we hit the end of list
2287 					 */
2288 					MDI_CLIENT_UNLOCK(ct);
2289 					return (MDI_NOPATH);
2290 				}
2291 
2292 				if (!sb) {
2293 					if (preferred == 0) {
2294 						/*
2295 						 * Looks like we have completed
2296 						 * the traversal as preferred
2297 						 * value is 0. Time to bail out.
2298 						 */
2299 						*ret_pip = NULL;
2300 						MDI_CLIENT_UNLOCK(ct);
2301 						return (MDI_NOPATH);
2302 					} else {
2303 						/*
2304 						 * Looks like we reached the
2305 						 * end of the list. Lets enable
2306 						 * traversal of non preferred
2307 						 * paths.
2308 						 */
2309 						preferred = 0;
2310 					}
2311 				}
2312 				pip = head;
2313 			}
2314 		}
2315 		start = pip;
2316 		do {
2317 			MDI_PI_LOCK(pip);
2318 			if (sb) {
2319 				cond = ((MDI_PI(pip)->pi_state ==
2320 				    MDI_PATHINFO_STATE_ONLINE &&
2321 					MDI_PI(pip)->pi_preferred ==
2322 						preferred) ? 1 : 0);
2323 			} else {
2324 				if (flags == MDI_SELECT_ONLINE_PATH) {
2325 					cond = ((MDI_PI(pip)->pi_state ==
2326 					    MDI_PATHINFO_STATE_ONLINE &&
2327 						MDI_PI(pip)->pi_preferred ==
2328 						preferred) ? 1 : 0);
2329 				} else if (flags == MDI_SELECT_STANDBY_PATH) {
2330 					cond = ((MDI_PI(pip)->pi_state ==
2331 					    MDI_PATHINFO_STATE_STANDBY &&
2332 						MDI_PI(pip)->pi_preferred ==
2333 						preferred) ? 1 : 0);
2334 				} else if (flags == (MDI_SELECT_ONLINE_PATH |
2335 				    MDI_SELECT_STANDBY_PATH)) {
2336 					cond = (((MDI_PI(pip)->pi_state ==
2337 					    MDI_PATHINFO_STATE_ONLINE ||
2338 					    (MDI_PI(pip)->pi_state ==
2339 					    MDI_PATHINFO_STATE_STANDBY)) &&
2340 						MDI_PI(pip)->pi_preferred ==
2341 						preferred) ? 1 : 0);
2342 				} else if (flags ==
2343 					(MDI_SELECT_STANDBY_PATH |
2344 					MDI_SELECT_ONLINE_PATH |
2345 					MDI_SELECT_USER_DISABLE_PATH)) {
2346 					cond = (((MDI_PI(pip)->pi_state ==
2347 					    MDI_PATHINFO_STATE_ONLINE ||
2348 					    (MDI_PI(pip)->pi_state ==
2349 					    MDI_PATHINFO_STATE_STANDBY) ||
2350 						(MDI_PI(pip)->pi_state ==
2351 					    (MDI_PATHINFO_STATE_ONLINE|
2352 					    MDI_PATHINFO_STATE_USER_DISABLE)) ||
2353 						(MDI_PI(pip)->pi_state ==
2354 					    (MDI_PATHINFO_STATE_STANDBY |
2355 					    MDI_PATHINFO_STATE_USER_DISABLE)))&&
2356 						MDI_PI(pip)->pi_preferred ==
2357 						preferred) ? 1 : 0);
2358 				} else if (flags ==
2359 				    (MDI_SELECT_STANDBY_PATH |
2360 				    MDI_SELECT_ONLINE_PATH |
2361 				    MDI_SELECT_NO_PREFERRED)) {
2362 					cond = (((MDI_PI(pip)->pi_state ==
2363 					    MDI_PATHINFO_STATE_ONLINE) ||
2364 					    (MDI_PI(pip)->pi_state ==
2365 					    MDI_PATHINFO_STATE_STANDBY))
2366 					    ? 1 : 0);
2367 				} else {
2368 					cond = 0;
2369 				}
2370 			}
2371 			/*
2372 			 * No need to explicitly check if the path is disabled.
2373 			 * Since we are checking for state == ONLINE and the
2374 			 * same variable is used for DISABLE/ENABLE information.
2375 			 */
2376 			if (cond) {
2377 				/*
2378 				 * Return the path in hold state. Caller should
2379 				 * release the lock by calling mdi_rele_path()
2380 				 */
2381 				MDI_PI_HOLD(pip);
2382 				MDI_PI_UNLOCK(pip);
2383 				if (sb)
2384 					ct->ct_path_last = pip;
2385 				*ret_pip = pip;
2386 				MDI_CLIENT_UNLOCK(ct);
2387 				return (MDI_SUCCESS);
2388 			}
2389 			/*
2390 			 * Path is busy.
2391 			 */
2392 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2393 			    MDI_PI_IS_TRANSIENT(pip))
2394 				retry = 1;
2395 
2396 			/*
2397 			 * Keep looking for a next available online path
2398 			 */
2399 do_again:
2400 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2401 			if (next == NULL) {
2402 				if ( flags & MDI_SELECT_NO_PREFERRED) {
2403 					/*
2404 					 * Bail out since we hit the end of list
2405 					 */
2406 					MDI_PI_UNLOCK(pip);
2407 					break;
2408 				}
2409 
2410 				if (!sb) {
2411 					if (preferred == 1) {
2412 						/*
2413 						 * Looks like we reached the
2414 						 * end of the list. Lets enable
2415 						 * traversal of non preferred
2416 						 * paths.
2417 						 */
2418 						preferred = 0;
2419 						next = head;
2420 					} else {
2421 						/*
2422 						 * We have done both the passes
2423 						 * Preferred as well as for
2424 						 * Non-preferred. Bail out now.
2425 						 */
2426 						cont = 0;
2427 					}
2428 				} else {
2429 					/*
2430 					 * Standard behavior case.
2431 					 */
2432 					next = head;
2433 				}
2434 			}
2435 			MDI_PI_UNLOCK(pip);
2436 			if (cont == 0) {
2437 				break;
2438 			}
2439 			pip = next;
2440 
2441 			if (!sb) {
2442 				/*
2443 				 * We need to handle the selection of
2444 				 * non-preferred path in the following
2445 				 * case:
2446 				 *
2447 				 * +------+   +------+   +------+   +-----+
2448 				 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2449 				 * +------+   +------+   +------+   +-----+
2450 				 *
2451 				 * If we start the search with B, we need to
2452 				 * skip beyond B to pick C which is non -
2453 				 * preferred in the second pass. The following
2454 				 * test, if true, will allow us to skip over
2455 				 * the 'start'(B in the example) to select
2456 				 * other non preferred elements.
2457 				 */
2458 				if ((start_pip != NULL) && (start_pip == pip) &&
2459 				    (MDI_PI(start_pip)->pi_preferred
2460 				    != preferred)) {
2461 					/*
2462 					 * try again after going past the start
2463 					 * pip
2464 					 */
2465 					MDI_PI_LOCK(pip);
2466 					goto do_again;
2467 				}
2468 			} else {
2469 				/*
2470 				 * Standard behavior case
2471 				 */
2472 				if (start == pip && preferred) {
2473 					/* look for nonpreferred paths */
2474 					preferred = 0;
2475 				} else if (start == pip && !preferred) {
2476 					/*
2477 					 * Exit condition
2478 					 */
2479 					cont = 0;
2480 				}
2481 			}
2482 		} while (cont);
2483 		break;
2484 	}
2485 
2486 	MDI_CLIENT_UNLOCK(ct);
2487 	if (retry == 1) {
2488 		return (MDI_BUSY);
2489 	} else {
2490 		return (MDI_NOPATH);
2491 	}
2492 }
2493 
2494 /*
2495  * For a client, return the next available path to any phci
2496  *
2497  * Note:
2498  *		Caller should hold the branch's devinfo node to get a consistent
2499  *		snap shot of the mdi_pathinfo nodes.
2500  *
2501  *		Please note that even the list is stable the mdi_pathinfo
2502  *		node state and properties are volatile.  The caller should lock
2503  *		and unlock the nodes by calling mdi_pi_lock() and
2504  *		mdi_pi_unlock() functions to get a stable properties.
2505  *
2506  *		If there is a need to use the nodes beyond the hold of the
2507  *		devinfo node period (For ex. I/O), then mdi_pathinfo node
2508  *		need to be held against unexpected removal by calling
2509  *		mdi_hold_path() and should be released by calling
2510  *		mdi_rele_path() on completion.
2511  */
2512 mdi_pathinfo_t *
mdi_get_next_phci_path(dev_info_t * ct_dip,mdi_pathinfo_t * pip)2513 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2514 {
2515 	mdi_client_t *ct;
2516 
2517 	if (!MDI_CLIENT(ct_dip))
2518 		return (NULL);
2519 
2520 	/*
2521 	 * Walk through client link
2522 	 */
2523 	ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2524 	ASSERT(ct != NULL);
2525 
2526 	if (pip == NULL)
2527 		return ((mdi_pathinfo_t *)ct->ct_path_head);
2528 
2529 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2530 }
2531 
2532 /*
2533  * For a phci, return the next available path to any client
2534  * Note: ditto mdi_get_next_phci_path()
2535  */
2536 mdi_pathinfo_t *
mdi_get_next_client_path(dev_info_t * ph_dip,mdi_pathinfo_t * pip)2537 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2538 {
2539 	mdi_phci_t *ph;
2540 
2541 	if (!MDI_PHCI(ph_dip))
2542 		return (NULL);
2543 
2544 	/*
2545 	 * Walk through pHCI link
2546 	 */
2547 	ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2548 	ASSERT(ph != NULL);
2549 
2550 	if (pip == NULL)
2551 		return ((mdi_pathinfo_t *)ph->ph_path_head);
2552 
2553 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2554 }
2555 
2556 /*
2557  * mdi_hold_path():
2558  *		Hold the mdi_pathinfo node against unwanted unexpected free.
2559  * Return Values:
2560  *		None
2561  */
2562 void
mdi_hold_path(mdi_pathinfo_t * pip)2563 mdi_hold_path(mdi_pathinfo_t *pip)
2564 {
2565 	if (pip) {
2566 		MDI_PI_LOCK(pip);
2567 		MDI_PI_HOLD(pip);
2568 		MDI_PI_UNLOCK(pip);
2569 	}
2570 }
2571 
2572 
2573 /*
2574  * mdi_rele_path():
2575  *		Release the mdi_pathinfo node which was selected
2576  *		through mdi_select_path() mechanism or manually held by
2577  *		calling mdi_hold_path().
2578  * Return Values:
2579  *		None
2580  */
2581 void
mdi_rele_path(mdi_pathinfo_t * pip)2582 mdi_rele_path(mdi_pathinfo_t *pip)
2583 {
2584 	if (pip) {
2585 		MDI_PI_LOCK(pip);
2586 		MDI_PI_RELE(pip);
2587 		if (MDI_PI(pip)->pi_ref_cnt == 0) {
2588 			cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2589 		}
2590 		MDI_PI_UNLOCK(pip);
2591 	}
2592 }
2593 
2594 /*
2595  * mdi_pi_lock():
2596  * 		Lock the mdi_pathinfo node.
2597  * Note:
2598  *		The caller should release the lock by calling mdi_pi_unlock()
2599  */
2600 void
mdi_pi_lock(mdi_pathinfo_t * pip)2601 mdi_pi_lock(mdi_pathinfo_t *pip)
2602 {
2603 	ASSERT(pip != NULL);
2604 	if (pip) {
2605 		MDI_PI_LOCK(pip);
2606 	}
2607 }
2608 
2609 
2610 /*
2611  * mdi_pi_unlock():
2612  * 		Unlock the mdi_pathinfo node.
2613  * Note:
2614  *		The mdi_pathinfo node should have been locked with mdi_pi_lock()
2615  */
2616 void
mdi_pi_unlock(mdi_pathinfo_t * pip)2617 mdi_pi_unlock(mdi_pathinfo_t *pip)
2618 {
2619 	ASSERT(pip != NULL);
2620 	if (pip) {
2621 		MDI_PI_UNLOCK(pip);
2622 	}
2623 }
2624 
2625 /*
2626  * mdi_pi_find():
2627  *		Search the list of mdi_pathinfo nodes attached to the
2628  *		pHCI/Client device node whose path address matches "paddr".
2629  *		Returns a pointer to the mdi_pathinfo node if a matching node is
2630  *		found.
2631  * Return Values:
2632  *		mdi_pathinfo node handle
2633  *		NULL
2634  * Notes:
2635  *		Caller need not hold any locks to call this function.
2636  */
2637 mdi_pathinfo_t *
mdi_pi_find(dev_info_t * pdip,char * caddr,char * paddr)2638 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2639 {
2640 	mdi_phci_t		*ph;
2641 	mdi_vhci_t		*vh;
2642 	mdi_client_t		*ct;
2643 	mdi_pathinfo_t		*pip = NULL;
2644 
2645 	MDI_DEBUG(2, (MDI_NOTE, pdip,
2646 	    "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : ""));
2647 	if ((pdip == NULL) || (paddr == NULL)) {
2648 		return (NULL);
2649 	}
2650 	ph = i_devi_get_phci(pdip);
2651 	if (ph == NULL) {
2652 		/*
2653 		 * Invalid pHCI device, Nothing more to do.
2654 		 */
2655 		MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci"));
2656 		return (NULL);
2657 	}
2658 
2659 	vh = ph->ph_vhci;
2660 	if (vh == NULL) {
2661 		/*
2662 		 * Invalid vHCI device, Nothing more to do.
2663 		 */
2664 		MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci"));
2665 		return (NULL);
2666 	}
2667 
2668 	/*
2669 	 * Look for pathinfo node identified by paddr.
2670 	 */
2671 	if (caddr == NULL) {
2672 		/*
2673 		 * Find a mdi_pathinfo node under pHCI list for a matching
2674 		 * unit address.
2675 		 */
2676 		MDI_PHCI_LOCK(ph);
2677 		if (MDI_PHCI_IS_OFFLINE(ph)) {
2678 			MDI_DEBUG(2, (MDI_WARN, pdip,
2679 			    "offline phci %p", (void *)ph));
2680 			MDI_PHCI_UNLOCK(ph);
2681 			return (NULL);
2682 		}
2683 		pip = (mdi_pathinfo_t *)ph->ph_path_head;
2684 
2685 		while (pip != NULL) {
2686 			if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2687 				break;
2688 			}
2689 			pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2690 		}
2691 		MDI_PHCI_UNLOCK(ph);
2692 		MDI_DEBUG(2, (MDI_NOTE, pdip,
2693 		    "found %s %p", mdi_pi_spathname(pip), (void *)pip));
2694 		return (pip);
2695 	}
2696 
2697 	/*
2698 	 * XXX - Is the rest of the code in this function really necessary?
2699 	 * The consumers of mdi_pi_find() can search for the desired pathinfo
2700 	 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2701 	 * whether the search is based on the pathinfo nodes attached to
2702 	 * the pHCI or the client node, the result will be the same.
2703 	 */
2704 
2705 	/*
2706 	 * Find the client device corresponding to 'caddr'
2707 	 */
2708 	MDI_VHCI_CLIENT_LOCK(vh);
2709 
2710 	/*
2711 	 * XXX - Passing NULL to the following function works as long as the
2712 	 * the client addresses (caddr) are unique per vhci basis.
2713 	 */
2714 	ct = i_mdi_client_find(vh, NULL, caddr);
2715 	if (ct == NULL) {
2716 		/*
2717 		 * Client not found, Obviously mdi_pathinfo node has not been
2718 		 * created yet.
2719 		 */
2720 		MDI_VHCI_CLIENT_UNLOCK(vh);
2721 		MDI_DEBUG(2, (MDI_NOTE, pdip,
2722 		    "client not found for caddr @%s", caddr ? caddr : ""));
2723 		return (NULL);
2724 	}
2725 
2726 	/*
2727 	 * Hold the client lock and look for a mdi_pathinfo node with matching
2728 	 * pHCI and paddr
2729 	 */
2730 	MDI_CLIENT_LOCK(ct);
2731 
2732 	/*
2733 	 * Release the global mutex as it is no more needed. Note: We always
2734 	 * respect the locking order while acquiring.
2735 	 */
2736 	MDI_VHCI_CLIENT_UNLOCK(vh);
2737 
2738 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2739 	while (pip != NULL) {
2740 		/*
2741 		 * Compare the unit address
2742 		 */
2743 		if ((MDI_PI(pip)->pi_phci == ph) &&
2744 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2745 			break;
2746 		}
2747 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2748 	}
2749 	MDI_CLIENT_UNLOCK(ct);
2750 	MDI_DEBUG(2, (MDI_NOTE, pdip,
2751 	    "found: %s %p", mdi_pi_spathname(pip), (void *)pip));
2752 	return (pip);
2753 }
2754 
2755 /*
2756  * mdi_pi_alloc():
2757  *		Allocate and initialize a new instance of a mdi_pathinfo node.
2758  *		The mdi_pathinfo node returned by this function identifies a
2759  *		unique device path is capable of having properties attached
2760  *		and passed to mdi_pi_online() to fully attach and online the
2761  *		path and client device node.
2762  *		The mdi_pathinfo node returned by this function must be
2763  *		destroyed using mdi_pi_free() if the path is no longer
2764  *		operational or if the caller fails to attach a client device
2765  *		node when calling mdi_pi_online(). The framework will not free
2766  *		the resources allocated.
2767  *		This function can be called from both interrupt and kernel
2768  *		contexts.  DDI_NOSLEEP flag should be used while calling
2769  *		from interrupt contexts.
2770  * Return Values:
2771  *		MDI_SUCCESS
2772  *		MDI_FAILURE
2773  *		MDI_NOMEM
2774  */
2775 /*ARGSUSED*/
2776 int
mdi_pi_alloc_compatible(dev_info_t * pdip,char * cname,char * caddr,char * paddr,char ** compatible,int ncompatible,int flags,mdi_pathinfo_t ** ret_pip)2777 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2778     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2779 {
2780 	mdi_vhci_t	*vh;
2781 	mdi_phci_t	*ph;
2782 	mdi_client_t	*ct;
2783 	mdi_pathinfo_t	*pip = NULL;
2784 	dev_info_t	*cdip;
2785 	int		rv = MDI_NOMEM;
2786 	int		path_allocated = 0;
2787 
2788 	MDI_DEBUG(2, (MDI_NOTE, pdip,
2789 	    "cname %s: caddr@%s paddr@%s",
2790 	    cname ? cname : "", caddr ? caddr : "", paddr ? paddr : ""));
2791 
2792 	if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2793 	    ret_pip == NULL) {
2794 		/* Nothing more to do */
2795 		return (MDI_FAILURE);
2796 	}
2797 
2798 	*ret_pip = NULL;
2799 
2800 	/* No allocations on detaching pHCI */
2801 	if (DEVI_IS_DETACHING(pdip)) {
2802 		/* Invalid pHCI device, return failure */
2803 		MDI_DEBUG(1, (MDI_WARN, pdip,
2804 		    "!detaching pHCI=%p", (void *)pdip));
2805 		return (MDI_FAILURE);
2806 	}
2807 
2808 	ph = i_devi_get_phci(pdip);
2809 	ASSERT(ph != NULL);
2810 	if (ph == NULL) {
2811 		/* Invalid pHCI device, return failure */
2812 		MDI_DEBUG(1, (MDI_WARN, pdip,
2813 		    "!invalid pHCI=%p", (void *)pdip));
2814 		return (MDI_FAILURE);
2815 	}
2816 
2817 	MDI_PHCI_LOCK(ph);
2818 	vh = ph->ph_vhci;
2819 	if (vh == NULL) {
2820 		/* Invalid vHCI device, return failure */
2821 		MDI_DEBUG(1, (MDI_WARN, pdip,
2822 		    "!invalid vHCI=%p", (void *)pdip));
2823 		MDI_PHCI_UNLOCK(ph);
2824 		return (MDI_FAILURE);
2825 	}
2826 
2827 	if (MDI_PHCI_IS_READY(ph) == 0) {
2828 		/*
2829 		 * Do not allow new node creation when pHCI is in
2830 		 * offline/suspended states
2831 		 */
2832 		MDI_DEBUG(1, (MDI_WARN, pdip,
2833 		    "pHCI=%p is not ready", (void *)ph));
2834 		MDI_PHCI_UNLOCK(ph);
2835 		return (MDI_BUSY);
2836 	}
2837 	MDI_PHCI_UNSTABLE(ph);
2838 	MDI_PHCI_UNLOCK(ph);
2839 
2840 	/* look for a matching client, create one if not found */
2841 	MDI_VHCI_CLIENT_LOCK(vh);
2842 	ct = i_mdi_client_find(vh, cname, caddr);
2843 	if (ct == NULL) {
2844 		ct = i_mdi_client_alloc(vh, cname, caddr);
2845 		ASSERT(ct != NULL);
2846 	}
2847 
2848 	if (ct->ct_dip == NULL) {
2849 		/*
2850 		 * Allocate a devinfo node
2851 		 */
2852 		ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2853 		    compatible, ncompatible);
2854 		if (ct->ct_dip == NULL) {
2855 			(void) i_mdi_client_free(vh, ct);
2856 			goto fail;
2857 		}
2858 	}
2859 	cdip = ct->ct_dip;
2860 
2861 	DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2862 	DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2863 
2864 	MDI_CLIENT_LOCK(ct);
2865 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2866 	while (pip != NULL) {
2867 		/*
2868 		 * Compare the unit address
2869 		 */
2870 		if ((MDI_PI(pip)->pi_phci == ph) &&
2871 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2872 			break;
2873 		}
2874 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2875 	}
2876 	MDI_CLIENT_UNLOCK(ct);
2877 
2878 	if (pip == NULL) {
2879 		/*
2880 		 * This is a new path for this client device.  Allocate and
2881 		 * initialize a new pathinfo node
2882 		 */
2883 		pip = i_mdi_pi_alloc(ph, paddr, ct);
2884 		ASSERT(pip != NULL);
2885 		path_allocated = 1;
2886 	}
2887 	rv = MDI_SUCCESS;
2888 
2889 fail:
2890 	/*
2891 	 * Release the global mutex.
2892 	 */
2893 	MDI_VHCI_CLIENT_UNLOCK(vh);
2894 
2895 	/*
2896 	 * Mark the pHCI as stable
2897 	 */
2898 	MDI_PHCI_LOCK(ph);
2899 	MDI_PHCI_STABLE(ph);
2900 	MDI_PHCI_UNLOCK(ph);
2901 	*ret_pip = pip;
2902 
2903 	MDI_DEBUG(2, (MDI_NOTE, pdip,
2904 	    "alloc %s %p", mdi_pi_spathname(pip), (void *)pip));
2905 
2906 	if (path_allocated)
2907 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2908 
2909 	return (rv);
2910 }
2911 
2912 /*ARGSUSED*/
2913 int
mdi_pi_alloc(dev_info_t * pdip,char * cname,char * caddr,char * paddr,int flags,mdi_pathinfo_t ** ret_pip)2914 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2915     int flags, mdi_pathinfo_t **ret_pip)
2916 {
2917 	return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2918 	    flags, ret_pip));
2919 }
2920 
2921 /*
2922  * i_mdi_pi_alloc():
2923  *		Allocate a mdi_pathinfo node and add to the pHCI path list
2924  * Return Values:
2925  *		mdi_pathinfo
2926  */
2927 /*ARGSUSED*/
2928 static mdi_pathinfo_t *
i_mdi_pi_alloc(mdi_phci_t * ph,char * paddr,mdi_client_t * ct)2929 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2930 {
2931 	mdi_pathinfo_t	*pip;
2932 	int		ct_circular;
2933 	int		ph_circular;
2934 	static char	path[MAXPATHLEN];	/* mdi_pathmap_mutex protects */
2935 	char		*path_persistent;
2936 	int		path_instance;
2937 	mod_hash_val_t	hv;
2938 
2939 	ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
2940 
2941 	pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2942 	mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2943 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2944 	    MDI_PATHINFO_STATE_TRANSIENT;
2945 
2946 	if (MDI_PHCI_IS_USER_DISABLED(ph))
2947 		MDI_PI_SET_USER_DISABLE(pip);
2948 
2949 	if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2950 		MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2951 
2952 	if (MDI_PHCI_IS_DRV_DISABLED(ph))
2953 		MDI_PI_SET_DRV_DISABLE(pip);
2954 
2955 	MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2956 	cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2957 	MDI_PI(pip)->pi_client = ct;
2958 	MDI_PI(pip)->pi_phci = ph;
2959 	MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2960 	(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2961 
2962         /*
2963 	 * We form the "path" to the pathinfo node, and see if we have
2964 	 * already allocated a 'path_instance' for that "path".  If so,
2965 	 * we use the already allocated 'path_instance'.  If not, we
2966 	 * allocate a new 'path_instance' and associate it with a copy of
2967 	 * the "path" string (which is never freed). The association
2968 	 * between a 'path_instance' this "path" string persists until
2969 	 * reboot.
2970 	 */
2971         mutex_enter(&mdi_pathmap_mutex);
2972 	(void) ddi_pathname(ph->ph_dip, path);
2973 	(void) sprintf(path + strlen(path), "/%s@%s",
2974 	    mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2975         if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
2976                 path_instance = (uint_t)(intptr_t)hv;
2977         } else {
2978 		/* allocate a new 'path_instance' and persistent "path" */
2979 		path_instance = mdi_pathmap_instance++;
2980 		path_persistent = i_ddi_strdup(path, KM_SLEEP);
2981                 (void) mod_hash_insert(mdi_pathmap_bypath,
2982                     (mod_hash_key_t)path_persistent,
2983                     (mod_hash_val_t)(intptr_t)path_instance);
2984 		(void) mod_hash_insert(mdi_pathmap_byinstance,
2985 		    (mod_hash_key_t)(intptr_t)path_instance,
2986 		    (mod_hash_val_t)path_persistent);
2987 
2988 		/* create shortpath name */
2989 		(void) snprintf(path, sizeof(path), "%s%d/%s@%s",
2990 		    ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip),
2991 		    mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2992 		path_persistent = i_ddi_strdup(path, KM_SLEEP);
2993 		(void) mod_hash_insert(mdi_pathmap_sbyinstance,
2994 		    (mod_hash_key_t)(intptr_t)path_instance,
2995 		    (mod_hash_val_t)path_persistent);
2996         }
2997         mutex_exit(&mdi_pathmap_mutex);
2998 	MDI_PI(pip)->pi_path_instance = path_instance;
2999 
3000 	(void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
3001 	ASSERT(MDI_PI(pip)->pi_prop != NULL);
3002 	MDI_PI(pip)->pi_pprivate = NULL;
3003 	MDI_PI(pip)->pi_cprivate = NULL;
3004 	MDI_PI(pip)->pi_vprivate = NULL;
3005 	MDI_PI(pip)->pi_client_link = NULL;
3006 	MDI_PI(pip)->pi_phci_link = NULL;
3007 	MDI_PI(pip)->pi_ref_cnt = 0;
3008 	MDI_PI(pip)->pi_kstats = NULL;
3009 	MDI_PI(pip)->pi_preferred = 1;
3010 	cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
3011 
3012 	/*
3013 	 * Lock both dev_info nodes against changes in parallel.
3014 	 *
3015 	 * The ndi_devi_enter(Client), is atypical since the client is a leaf.
3016 	 * This atypical operation is done to synchronize pathinfo nodes
3017 	 * during devinfo snapshot (see di_register_pip) by 'pretending' that
3018 	 * the pathinfo nodes are children of the Client.
3019 	 */
3020 	ndi_devi_enter(ct->ct_dip, &ct_circular);
3021 	ndi_devi_enter(ph->ph_dip, &ph_circular);
3022 
3023 	i_mdi_phci_add_path(ph, pip);
3024 	i_mdi_client_add_path(ct, pip);
3025 
3026 	ndi_devi_exit(ph->ph_dip, ph_circular);
3027 	ndi_devi_exit(ct->ct_dip, ct_circular);
3028 
3029 	return (pip);
3030 }
3031 
3032 /*
3033  * mdi_pi_pathname_by_instance():
3034  *	Lookup of "path" by 'path_instance'. Return "path".
3035  *	NOTE: returned "path" remains valid forever (until reboot).
3036  */
3037 char *
mdi_pi_pathname_by_instance(int path_instance)3038 mdi_pi_pathname_by_instance(int path_instance)
3039 {
3040 	char		*path;
3041 	mod_hash_val_t	hv;
3042 
3043 	/* mdi_pathmap lookup of "path" by 'path_instance' */
3044 	mutex_enter(&mdi_pathmap_mutex);
3045 	if (mod_hash_find(mdi_pathmap_byinstance,
3046 	    (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3047 		path = (char *)hv;
3048 	else
3049 		path = NULL;
3050 	mutex_exit(&mdi_pathmap_mutex);
3051 	return (path);
3052 }
3053 
3054 /*
3055  * mdi_pi_spathname_by_instance():
3056  *	Lookup of "shortpath" by 'path_instance'. Return "shortpath".
3057  *	NOTE: returned "shortpath" remains valid forever (until reboot).
3058  */
3059 char *
mdi_pi_spathname_by_instance(int path_instance)3060 mdi_pi_spathname_by_instance(int path_instance)
3061 {
3062 	char		*path;
3063 	mod_hash_val_t	hv;
3064 
3065 	/* mdi_pathmap lookup of "path" by 'path_instance' */
3066 	mutex_enter(&mdi_pathmap_mutex);
3067 	if (mod_hash_find(mdi_pathmap_sbyinstance,
3068 	    (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3069 		path = (char *)hv;
3070 	else
3071 		path = NULL;
3072 	mutex_exit(&mdi_pathmap_mutex);
3073 	return (path);
3074 }
3075 
3076 
3077 /*
3078  * i_mdi_phci_add_path():
3079  * 		Add a mdi_pathinfo node to pHCI list.
3080  * Notes:
3081  *		Caller should per-pHCI mutex
3082  */
3083 static void
i_mdi_phci_add_path(mdi_phci_t * ph,mdi_pathinfo_t * pip)3084 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3085 {
3086 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3087 
3088 	MDI_PHCI_LOCK(ph);
3089 	if (ph->ph_path_head == NULL) {
3090 		ph->ph_path_head = pip;
3091 	} else {
3092 		MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
3093 	}
3094 	ph->ph_path_tail = pip;
3095 	ph->ph_path_count++;
3096 	MDI_PHCI_UNLOCK(ph);
3097 }
3098 
3099 /*
3100  * i_mdi_client_add_path():
3101  *		Add mdi_pathinfo node to client list
3102  */
3103 static void
i_mdi_client_add_path(mdi_client_t * ct,mdi_pathinfo_t * pip)3104 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3105 {
3106 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3107 
3108 	MDI_CLIENT_LOCK(ct);
3109 	if (ct->ct_path_head == NULL) {
3110 		ct->ct_path_head = pip;
3111 	} else {
3112 		MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
3113 	}
3114 	ct->ct_path_tail = pip;
3115 	ct->ct_path_count++;
3116 	MDI_CLIENT_UNLOCK(ct);
3117 }
3118 
3119 /*
3120  * mdi_pi_free():
3121  *		Free the mdi_pathinfo node and also client device node if this
3122  *		is the last path to the device
3123  * Return Values:
3124  *		MDI_SUCCESS
3125  *		MDI_FAILURE
3126  *		MDI_BUSY
3127  */
3128 /*ARGSUSED*/
3129 int
mdi_pi_free(mdi_pathinfo_t * pip,int flags)3130 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
3131 {
3132 	int		rv;
3133 	mdi_vhci_t	*vh;
3134 	mdi_phci_t	*ph;
3135 	mdi_client_t	*ct;
3136 	int		(*f)();
3137 	int		client_held = 0;
3138 
3139 	MDI_PI_LOCK(pip);
3140 	ph = MDI_PI(pip)->pi_phci;
3141 	ASSERT(ph != NULL);
3142 	if (ph == NULL) {
3143 		/*
3144 		 * Invalid pHCI device, return failure
3145 		 */
3146 		MDI_DEBUG(1, (MDI_WARN, NULL,
3147 		    "!invalid pHCI: pip %s %p",
3148 		    mdi_pi_spathname(pip), (void *)pip));
3149 		MDI_PI_UNLOCK(pip);
3150 		return (MDI_FAILURE);
3151 	}
3152 
3153 	vh = ph->ph_vhci;
3154 	ASSERT(vh != NULL);
3155 	if (vh == NULL) {
3156 		/* Invalid pHCI device, return failure */
3157 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3158 		    "!invalid vHCI: pip %s %p",
3159 		    mdi_pi_spathname(pip), (void *)pip));
3160 		MDI_PI_UNLOCK(pip);
3161 		return (MDI_FAILURE);
3162 	}
3163 
3164 	ct = MDI_PI(pip)->pi_client;
3165 	ASSERT(ct != NULL);
3166 	if (ct == NULL) {
3167 		/*
3168 		 * Invalid Client device, return failure
3169 		 */
3170 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3171 		    "!invalid client: pip %s %p",
3172 		    mdi_pi_spathname(pip), (void *)pip));
3173 		MDI_PI_UNLOCK(pip);
3174 		return (MDI_FAILURE);
3175 	}
3176 
3177 	/*
3178 	 * Check to see for busy condition.  A mdi_pathinfo can only be freed
3179 	 * if the node state is either offline or init and the reference count
3180 	 * is zero.
3181 	 */
3182 	if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
3183 	    MDI_PI_IS_INITING(pip))) {
3184 		/*
3185 		 * Node is busy
3186 		 */
3187 		MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3188 		    "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip));
3189 		MDI_PI_UNLOCK(pip);
3190 		return (MDI_BUSY);
3191 	}
3192 
3193 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3194 		/*
3195 		 * Give a chance for pending I/Os to complete.
3196 		 */
3197 		MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3198 		    "!%d cmds still pending on path: %s %p",
3199 		    MDI_PI(pip)->pi_ref_cnt,
3200 		    mdi_pi_spathname(pip), (void *)pip));
3201 		if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3202 		    &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3203 		    TR_CLOCK_TICK) == -1) {
3204 			/*
3205 			 * The timeout time reached without ref_cnt being zero
3206 			 * being signaled.
3207 			 */
3208 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3209 			    "!Timeout reached on path %s %p without the cond",
3210 			    mdi_pi_spathname(pip), (void *)pip));
3211 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3212 			    "!%d cmds still pending on path %s %p",
3213 			    MDI_PI(pip)->pi_ref_cnt,
3214 			    mdi_pi_spathname(pip), (void *)pip));
3215 			MDI_PI_UNLOCK(pip);
3216 			return (MDI_BUSY);
3217 		}
3218 	}
3219 	if (MDI_PI(pip)->pi_pm_held) {
3220 		client_held = 1;
3221 	}
3222 	MDI_PI_UNLOCK(pip);
3223 
3224 	vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
3225 
3226 	MDI_CLIENT_LOCK(ct);
3227 
3228 	/* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
3229 	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
3230 
3231 	/*
3232 	 * Wait till failover is complete before removing this node.
3233 	 */
3234 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3235 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3236 
3237 	MDI_CLIENT_UNLOCK(ct);
3238 	MDI_VHCI_CLIENT_LOCK(vh);
3239 	MDI_CLIENT_LOCK(ct);
3240 	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
3241 
3242 	if (!MDI_PI_IS_INITING(pip)) {
3243 		f = vh->vh_ops->vo_pi_uninit;
3244 		if (f != NULL) {
3245 			rv = (*f)(vh->vh_dip, pip, 0);
3246 		}
3247 	} else
3248 		rv = MDI_SUCCESS;
3249 
3250 	/*
3251 	 * If vo_pi_uninit() completed successfully.
3252 	 */
3253 	if (rv == MDI_SUCCESS) {
3254 		if (client_held) {
3255 			MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3256 			    "i_mdi_pm_rele_client\n"));
3257 			i_mdi_pm_rele_client(ct, 1);
3258 		}
3259 		i_mdi_pi_free(ph, pip, ct);
3260 		if (ct->ct_path_count == 0) {
3261 			/*
3262 			 * Client lost its last path.
3263 			 * Clean up the client device
3264 			 */
3265 			MDI_CLIENT_UNLOCK(ct);
3266 			(void) i_mdi_client_free(ct->ct_vhci, ct);
3267 			MDI_VHCI_CLIENT_UNLOCK(vh);
3268 			return (rv);
3269 		}
3270 	}
3271 	MDI_CLIENT_UNLOCK(ct);
3272 	MDI_VHCI_CLIENT_UNLOCK(vh);
3273 
3274 	if (rv == MDI_FAILURE)
3275 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
3276 
3277 	return (rv);
3278 }
3279 
3280 /*
3281  * i_mdi_pi_free():
3282  *		Free the mdi_pathinfo node
3283  */
3284 static void
i_mdi_pi_free(mdi_phci_t * ph,mdi_pathinfo_t * pip,mdi_client_t * ct)3285 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
3286 {
3287 	int	ct_circular;
3288 	int	ph_circular;
3289 
3290 	ASSERT(MDI_CLIENT_LOCKED(ct));
3291 
3292 	/*
3293 	 * remove any per-path kstats
3294 	 */
3295 	i_mdi_pi_kstat_destroy(pip);
3296 
3297 	/* See comments in i_mdi_pi_alloc() */
3298 	ndi_devi_enter(ct->ct_dip, &ct_circular);
3299 	ndi_devi_enter(ph->ph_dip, &ph_circular);
3300 
3301 	i_mdi_client_remove_path(ct, pip);
3302 	i_mdi_phci_remove_path(ph, pip);
3303 
3304 	ndi_devi_exit(ph->ph_dip, ph_circular);
3305 	ndi_devi_exit(ct->ct_dip, ct_circular);
3306 
3307 	mutex_destroy(&MDI_PI(pip)->pi_mutex);
3308 	cv_destroy(&MDI_PI(pip)->pi_state_cv);
3309 	cv_destroy(&MDI_PI(pip)->pi_ref_cv);
3310 	if (MDI_PI(pip)->pi_addr) {
3311 		kmem_free(MDI_PI(pip)->pi_addr,
3312 		    strlen(MDI_PI(pip)->pi_addr) + 1);
3313 		MDI_PI(pip)->pi_addr = NULL;
3314 	}
3315 
3316 	if (MDI_PI(pip)->pi_prop) {
3317 		(void) nvlist_free(MDI_PI(pip)->pi_prop);
3318 		MDI_PI(pip)->pi_prop = NULL;
3319 	}
3320 	kmem_free(pip, sizeof (struct mdi_pathinfo));
3321 }
3322 
3323 
3324 /*
3325  * i_mdi_phci_remove_path():
3326  * 		Remove a mdi_pathinfo node from pHCI list.
3327  * Notes:
3328  *		Caller should hold per-pHCI mutex
3329  */
3330 static void
i_mdi_phci_remove_path(mdi_phci_t * ph,mdi_pathinfo_t * pip)3331 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3332 {
3333 	mdi_pathinfo_t	*prev = NULL;
3334 	mdi_pathinfo_t	*path = NULL;
3335 
3336 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3337 
3338 	MDI_PHCI_LOCK(ph);
3339 	path = ph->ph_path_head;
3340 	while (path != NULL) {
3341 		if (path == pip) {
3342 			break;
3343 		}
3344 		prev = path;
3345 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3346 	}
3347 
3348 	if (path) {
3349 		ph->ph_path_count--;
3350 		if (prev) {
3351 			MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
3352 		} else {
3353 			ph->ph_path_head =
3354 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3355 		}
3356 		if (ph->ph_path_tail == path) {
3357 			ph->ph_path_tail = prev;
3358 		}
3359 	}
3360 
3361 	/*
3362 	 * Clear the pHCI link
3363 	 */
3364 	MDI_PI(pip)->pi_phci_link = NULL;
3365 	MDI_PI(pip)->pi_phci = NULL;
3366 	MDI_PHCI_UNLOCK(ph);
3367 }
3368 
3369 /*
3370  * i_mdi_client_remove_path():
3371  * 		Remove a mdi_pathinfo node from client path list.
3372  */
3373 static void
i_mdi_client_remove_path(mdi_client_t * ct,mdi_pathinfo_t * pip)3374 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3375 {
3376 	mdi_pathinfo_t	*prev = NULL;
3377 	mdi_pathinfo_t	*path;
3378 
3379 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3380 
3381 	ASSERT(MDI_CLIENT_LOCKED(ct));
3382 	path = ct->ct_path_head;
3383 	while (path != NULL) {
3384 		if (path == pip) {
3385 			break;
3386 		}
3387 		prev = path;
3388 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3389 	}
3390 
3391 	if (path) {
3392 		ct->ct_path_count--;
3393 		if (prev) {
3394 			MDI_PI(prev)->pi_client_link =
3395 			    MDI_PI(path)->pi_client_link;
3396 		} else {
3397 			ct->ct_path_head =
3398 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3399 		}
3400 		if (ct->ct_path_tail == path) {
3401 			ct->ct_path_tail = prev;
3402 		}
3403 		if (ct->ct_path_last == path) {
3404 			ct->ct_path_last = ct->ct_path_head;
3405 		}
3406 	}
3407 	MDI_PI(pip)->pi_client_link = NULL;
3408 	MDI_PI(pip)->pi_client = NULL;
3409 }
3410 
3411 /*
3412  * i_mdi_pi_state_change():
3413  *		online a mdi_pathinfo node
3414  *
3415  * Return Values:
3416  *		MDI_SUCCESS
3417  *		MDI_FAILURE
3418  */
3419 /*ARGSUSED*/
3420 static int
i_mdi_pi_state_change(mdi_pathinfo_t * pip,mdi_pathinfo_state_t state,int flag)3421 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3422 {
3423 	int		rv = MDI_SUCCESS;
3424 	mdi_vhci_t	*vh;
3425 	mdi_phci_t	*ph;
3426 	mdi_client_t	*ct;
3427 	int		(*f)();
3428 	dev_info_t	*cdip;
3429 
3430 	MDI_PI_LOCK(pip);
3431 
3432 	ph = MDI_PI(pip)->pi_phci;
3433 	ASSERT(ph);
3434 	if (ph == NULL) {
3435 		/*
3436 		 * Invalid pHCI device, fail the request
3437 		 */
3438 		MDI_PI_UNLOCK(pip);
3439 		MDI_DEBUG(1, (MDI_WARN, NULL,
3440 		    "!invalid phci: pip %s %p",
3441 		    mdi_pi_spathname(pip), (void *)pip));
3442 		return (MDI_FAILURE);
3443 	}
3444 
3445 	vh = ph->ph_vhci;
3446 	ASSERT(vh);
3447 	if (vh == NULL) {
3448 		/*
3449 		 * Invalid vHCI device, fail the request
3450 		 */
3451 		MDI_PI_UNLOCK(pip);
3452 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3453 		    "!invalid vhci: pip %s %p",
3454 		    mdi_pi_spathname(pip), (void *)pip));
3455 		return (MDI_FAILURE);
3456 	}
3457 
3458 	ct = MDI_PI(pip)->pi_client;
3459 	ASSERT(ct != NULL);
3460 	if (ct == NULL) {
3461 		/*
3462 		 * Invalid client device, fail the request
3463 		 */
3464 		MDI_PI_UNLOCK(pip);
3465 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3466 		    "!invalid client: pip %s %p",
3467 		    mdi_pi_spathname(pip), (void *)pip));
3468 		return (MDI_FAILURE);
3469 	}
3470 
3471 	/*
3472 	 * If this path has not been initialized yet, Callback vHCI driver's
3473 	 * pathinfo node initialize entry point
3474 	 */
3475 
3476 	if (MDI_PI_IS_INITING(pip)) {
3477 		MDI_PI_UNLOCK(pip);
3478 		f = vh->vh_ops->vo_pi_init;
3479 		if (f != NULL) {
3480 			rv = (*f)(vh->vh_dip, pip, 0);
3481 			if (rv != MDI_SUCCESS) {
3482 				MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3483 				    "!vo_pi_init failed: vHCI %p, pip %s %p",
3484 				    (void *)vh, mdi_pi_spathname(pip),
3485 				    (void *)pip));
3486 				return (MDI_FAILURE);
3487 			}
3488 		}
3489 		MDI_PI_LOCK(pip);
3490 		MDI_PI_CLEAR_TRANSIENT(pip);
3491 	}
3492 
3493 	/*
3494 	 * Do not allow state transition when pHCI is in offline/suspended
3495 	 * states
3496 	 */
3497 	i_mdi_phci_lock(ph, pip);
3498 	if (MDI_PHCI_IS_READY(ph) == 0) {
3499 		MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3500 		    "!pHCI not ready, pHCI=%p", (void *)ph));
3501 		MDI_PI_UNLOCK(pip);
3502 		i_mdi_phci_unlock(ph);
3503 		return (MDI_BUSY);
3504 	}
3505 	MDI_PHCI_UNSTABLE(ph);
3506 	i_mdi_phci_unlock(ph);
3507 
3508 	/*
3509 	 * Check if mdi_pathinfo state is in transient state.
3510 	 * If yes, offlining is in progress and wait till transient state is
3511 	 * cleared.
3512 	 */
3513 	if (MDI_PI_IS_TRANSIENT(pip)) {
3514 		while (MDI_PI_IS_TRANSIENT(pip)) {
3515 			cv_wait(&MDI_PI(pip)->pi_state_cv,
3516 			    &MDI_PI(pip)->pi_mutex);
3517 		}
3518 	}
3519 
3520 	/*
3521 	 * Grab the client lock in reverse order sequence and release the
3522 	 * mdi_pathinfo mutex.
3523 	 */
3524 	i_mdi_client_lock(ct, pip);
3525 	MDI_PI_UNLOCK(pip);
3526 
3527 	/*
3528 	 * Wait till failover state is cleared
3529 	 */
3530 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3531 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3532 
3533 	/*
3534 	 * Mark the mdi_pathinfo node state as transient
3535 	 */
3536 	MDI_PI_LOCK(pip);
3537 	switch (state) {
3538 	case MDI_PATHINFO_STATE_ONLINE:
3539 		MDI_PI_SET_ONLINING(pip);
3540 		break;
3541 
3542 	case MDI_PATHINFO_STATE_STANDBY:
3543 		MDI_PI_SET_STANDBYING(pip);
3544 		break;
3545 
3546 	case MDI_PATHINFO_STATE_FAULT:
3547 		/*
3548 		 * Mark the pathinfo state as FAULTED
3549 		 */
3550 		MDI_PI_SET_FAULTING(pip);
3551 		MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3552 		break;
3553 
3554 	case MDI_PATHINFO_STATE_OFFLINE:
3555 		/*
3556 		 * ndi_devi_offline() cannot hold pip or ct locks.
3557 		 */
3558 		MDI_PI_UNLOCK(pip);
3559 
3560 		/*
3561 		 * If this is a user initiated path online->offline operation
3562 		 * who's success would transition a client from DEGRADED to
3563 		 * FAILED then only proceed if we can offline the client first.
3564 		 */
3565 		cdip = ct->ct_dip;
3566 		if ((flag & NDI_USER_REQ) &&
3567 		    MDI_PI_IS_ONLINE(pip) &&
3568 		    (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3569 			i_mdi_client_unlock(ct);
3570 			rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN);
3571 			if (rv != NDI_SUCCESS) {
3572 				/*
3573 				 * Convert to MDI error code
3574 				 */
3575 				switch (rv) {
3576 				case NDI_BUSY:
3577 					rv = MDI_BUSY;
3578 					break;
3579 				default:
3580 					rv = MDI_FAILURE;
3581 					break;
3582 				}
3583 				goto state_change_exit;
3584 			} else {
3585 				i_mdi_client_lock(ct, NULL);
3586 			}
3587 		}
3588 		/*
3589 		 * Mark the mdi_pathinfo node state as transient
3590 		 */
3591 		MDI_PI_LOCK(pip);
3592 		MDI_PI_SET_OFFLINING(pip);
3593 		break;
3594 	}
3595 	MDI_PI_UNLOCK(pip);
3596 	MDI_CLIENT_UNSTABLE(ct);
3597 	i_mdi_client_unlock(ct);
3598 
3599 	f = vh->vh_ops->vo_pi_state_change;
3600 	if (f != NULL)
3601 		rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3602 
3603 	MDI_CLIENT_LOCK(ct);
3604 	MDI_PI_LOCK(pip);
3605 	if (rv == MDI_NOT_SUPPORTED) {
3606 		MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3607 	}
3608 	if (rv != MDI_SUCCESS) {
3609 		MDI_DEBUG(2, (MDI_WARN, ct->ct_dip,
3610 		    "vo_pi_state_change failed: rv %x", rv));
3611 	}
3612 	if (MDI_PI_IS_TRANSIENT(pip)) {
3613 		if (rv == MDI_SUCCESS) {
3614 			MDI_PI_CLEAR_TRANSIENT(pip);
3615 		} else {
3616 			MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3617 		}
3618 	}
3619 
3620 	/*
3621 	 * Wake anyone waiting for this mdi_pathinfo node
3622 	 */
3623 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3624 	MDI_PI_UNLOCK(pip);
3625 
3626 	/*
3627 	 * Mark the client device as stable
3628 	 */
3629 	MDI_CLIENT_STABLE(ct);
3630 	if (rv == MDI_SUCCESS) {
3631 		if (ct->ct_unstable == 0) {
3632 			cdip = ct->ct_dip;
3633 
3634 			/*
3635 			 * Onlining the mdi_pathinfo node will impact the
3636 			 * client state Update the client and dev_info node
3637 			 * state accordingly
3638 			 */
3639 			rv = NDI_SUCCESS;
3640 			i_mdi_client_update_state(ct);
3641 			switch (MDI_CLIENT_STATE(ct)) {
3642 			case MDI_CLIENT_STATE_OPTIMAL:
3643 			case MDI_CLIENT_STATE_DEGRADED:
3644 				if (cdip && !i_ddi_devi_attached(cdip) &&
3645 				    ((state == MDI_PATHINFO_STATE_ONLINE) ||
3646 				    (state == MDI_PATHINFO_STATE_STANDBY))) {
3647 
3648 					/*
3649 					 * Must do ndi_devi_online() through
3650 					 * hotplug thread for deferred
3651 					 * attach mechanism to work
3652 					 */
3653 					MDI_CLIENT_UNLOCK(ct);
3654 					rv = ndi_devi_online(cdip, 0);
3655 					MDI_CLIENT_LOCK(ct);
3656 					if ((rv != NDI_SUCCESS) &&
3657 					    (MDI_CLIENT_STATE(ct) ==
3658 					    MDI_CLIENT_STATE_DEGRADED)) {
3659 						/*
3660 						 * ndi_devi_online failed.
3661 						 * Reset client flags to
3662 						 * offline.
3663 						 */
3664 						MDI_DEBUG(1, (MDI_WARN, cdip,
3665 						    "!ndi_devi_online failed "
3666 						    "error %x", rv));
3667 						MDI_CLIENT_SET_OFFLINE(ct);
3668 					}
3669 					if (rv != NDI_SUCCESS) {
3670 						/* Reset the path state */
3671 						MDI_PI_LOCK(pip);
3672 						MDI_PI(pip)->pi_state =
3673 						    MDI_PI_OLD_STATE(pip);
3674 						MDI_PI_UNLOCK(pip);
3675 					}
3676 				}
3677 				break;
3678 
3679 			case MDI_CLIENT_STATE_FAILED:
3680 				/*
3681 				 * This is the last path case for
3682 				 * non-user initiated events.
3683 				 */
3684 				if (((flag & NDI_USER_REQ) == 0) &&
3685 				    cdip && (i_ddi_node_state(cdip) >=
3686 				    DS_INITIALIZED)) {
3687 					MDI_CLIENT_UNLOCK(ct);
3688 					rv = ndi_devi_offline(cdip,
3689 					    NDI_DEVFS_CLEAN);
3690 					MDI_CLIENT_LOCK(ct);
3691 
3692 					if (rv != NDI_SUCCESS) {
3693 						/*
3694 						 * ndi_devi_offline failed.
3695 						 * Reset client flags to
3696 						 * online as the path could not
3697 						 * be offlined.
3698 						 */
3699 						MDI_DEBUG(1, (MDI_WARN, cdip,
3700 						    "!ndi_devi_offline failed: "
3701 						    "error %x", rv));
3702 						MDI_CLIENT_SET_ONLINE(ct);
3703 					}
3704 				}
3705 				break;
3706 			}
3707 			/*
3708 			 * Convert to MDI error code
3709 			 */
3710 			switch (rv) {
3711 			case NDI_SUCCESS:
3712 				MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3713 				i_mdi_report_path_state(ct, pip);
3714 				rv = MDI_SUCCESS;
3715 				break;
3716 			case NDI_BUSY:
3717 				rv = MDI_BUSY;
3718 				break;
3719 			default:
3720 				rv = MDI_FAILURE;
3721 				break;
3722 			}
3723 		}
3724 	}
3725 	MDI_CLIENT_UNLOCK(ct);
3726 
3727 state_change_exit:
3728 	/*
3729 	 * Mark the pHCI as stable again.
3730 	 */
3731 	MDI_PHCI_LOCK(ph);
3732 	MDI_PHCI_STABLE(ph);
3733 	MDI_PHCI_UNLOCK(ph);
3734 	return (rv);
3735 }
3736 
3737 /*
3738  * mdi_pi_online():
3739  *		Place the path_info node in the online state.  The path is
3740  *		now available to be selected by mdi_select_path() for
3741  *		transporting I/O requests to client devices.
3742  * Return Values:
3743  *		MDI_SUCCESS
3744  *		MDI_FAILURE
3745  */
3746 int
mdi_pi_online(mdi_pathinfo_t * pip,int flags)3747 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3748 {
3749 	mdi_client_t	*ct = MDI_PI(pip)->pi_client;
3750 	int		client_held = 0;
3751 	int		rv;
3752 
3753 	ASSERT(ct != NULL);
3754 	rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3755 	if (rv != MDI_SUCCESS)
3756 		return (rv);
3757 
3758 	MDI_PI_LOCK(pip);
3759 	if (MDI_PI(pip)->pi_pm_held == 0) {
3760 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3761 		    "i_mdi_pm_hold_pip %p", (void *)pip));
3762 		i_mdi_pm_hold_pip(pip);
3763 		client_held = 1;
3764 	}
3765 	MDI_PI_UNLOCK(pip);
3766 
3767 	if (client_held) {
3768 		MDI_CLIENT_LOCK(ct);
3769 		if (ct->ct_power_cnt == 0) {
3770 			rv = i_mdi_power_all_phci(ct);
3771 		}
3772 
3773 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3774 		    "i_mdi_pm_hold_client %p", (void *)ct));
3775 		i_mdi_pm_hold_client(ct, 1);
3776 		MDI_CLIENT_UNLOCK(ct);
3777 	}
3778 
3779 	return (rv);
3780 }
3781 
3782 /*
3783  * mdi_pi_standby():
3784  *		Place the mdi_pathinfo node in standby state
3785  *
3786  * Return Values:
3787  *		MDI_SUCCESS
3788  *		MDI_FAILURE
3789  */
3790 int
mdi_pi_standby(mdi_pathinfo_t * pip,int flags)3791 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3792 {
3793 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3794 }
3795 
3796 /*
3797  * mdi_pi_fault():
3798  *		Place the mdi_pathinfo node in fault'ed state
3799  * Return Values:
3800  *		MDI_SUCCESS
3801  *		MDI_FAILURE
3802  */
3803 int
mdi_pi_fault(mdi_pathinfo_t * pip,int flags)3804 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3805 {
3806 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3807 }
3808 
3809 /*
3810  * mdi_pi_offline():
3811  *		Offline a mdi_pathinfo node.
3812  * Return Values:
3813  *		MDI_SUCCESS
3814  *		MDI_FAILURE
3815  */
3816 int
mdi_pi_offline(mdi_pathinfo_t * pip,int flags)3817 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3818 {
3819 	int	ret, client_held = 0;
3820 	mdi_client_t	*ct;
3821 
3822 	/*
3823 	 * Original code overloaded NDI_DEVI_REMOVE to this interface, and
3824 	 * used it to mean "user initiated operation" (i.e. devctl). Callers
3825 	 * should now just use NDI_USER_REQ.
3826 	 */
3827 	if (flags & NDI_DEVI_REMOVE) {
3828 		flags &= ~NDI_DEVI_REMOVE;
3829 		flags |= NDI_USER_REQ;
3830 	}
3831 
3832 	ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3833 
3834 	if (ret == MDI_SUCCESS) {
3835 		MDI_PI_LOCK(pip);
3836 		if (MDI_PI(pip)->pi_pm_held) {
3837 			client_held = 1;
3838 		}
3839 		MDI_PI_UNLOCK(pip);
3840 
3841 		if (client_held) {
3842 			ct = MDI_PI(pip)->pi_client;
3843 			MDI_CLIENT_LOCK(ct);
3844 			MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3845 			    "i_mdi_pm_rele_client\n"));
3846 			i_mdi_pm_rele_client(ct, 1);
3847 			MDI_CLIENT_UNLOCK(ct);
3848 		}
3849 	}
3850 
3851 	return (ret);
3852 }
3853 
3854 /*
3855  * i_mdi_pi_offline():
3856  *		Offline a mdi_pathinfo node and call the vHCI driver's callback
3857  */
3858 static int
i_mdi_pi_offline(mdi_pathinfo_t * pip,int flags)3859 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3860 {
3861 	dev_info_t	*vdip = NULL;
3862 	mdi_vhci_t	*vh = NULL;
3863 	mdi_client_t	*ct = NULL;
3864 	int		(*f)();
3865 	int		rv;
3866 
3867 	MDI_PI_LOCK(pip);
3868 	ct = MDI_PI(pip)->pi_client;
3869 	ASSERT(ct != NULL);
3870 
3871 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3872 		/*
3873 		 * Give a chance for pending I/Os to complete.
3874 		 */
3875 		MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3876 		    "!%d cmds still pending on path %s %p",
3877 		    MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip),
3878 		    (void *)pip));
3879 		if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3880 		    &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3881 		    TR_CLOCK_TICK) == -1) {
3882 			/*
3883 			 * The timeout time reached without ref_cnt being zero
3884 			 * being signaled.
3885 			 */
3886 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3887 			    "!Timeout reached on path %s %p without the cond",
3888 			    mdi_pi_spathname(pip), (void *)pip));
3889 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3890 			    "!%d cmds still pending on path %s %p",
3891 			    MDI_PI(pip)->pi_ref_cnt,
3892 			    mdi_pi_spathname(pip), (void *)pip));
3893 		}
3894 	}
3895 	vh = ct->ct_vhci;
3896 	vdip = vh->vh_dip;
3897 
3898 	/*
3899 	 * Notify vHCI that has registered this event
3900 	 */
3901 	ASSERT(vh->vh_ops);
3902 	f = vh->vh_ops->vo_pi_state_change;
3903 
3904 	if (f != NULL) {
3905 		MDI_PI_UNLOCK(pip);
3906 		if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3907 		    flags)) != MDI_SUCCESS) {
3908 			MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3909 			    "!vo_path_offline failed: vdip %s%d %p: path %s %p",
3910 			    ddi_driver_name(vdip), ddi_get_instance(vdip),
3911 			    (void *)vdip, mdi_pi_spathname(pip), (void *)pip));
3912 		}
3913 		MDI_PI_LOCK(pip);
3914 	}
3915 
3916 	/*
3917 	 * Set the mdi_pathinfo node state and clear the transient condition
3918 	 */
3919 	MDI_PI_SET_OFFLINE(pip);
3920 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3921 	MDI_PI_UNLOCK(pip);
3922 
3923 	MDI_CLIENT_LOCK(ct);
3924 	if (rv == MDI_SUCCESS) {
3925 		if (ct->ct_unstable == 0) {
3926 			dev_info_t	*cdip = ct->ct_dip;
3927 
3928 			/*
3929 			 * Onlining the mdi_pathinfo node will impact the
3930 			 * client state Update the client and dev_info node
3931 			 * state accordingly
3932 			 */
3933 			i_mdi_client_update_state(ct);
3934 			rv = NDI_SUCCESS;
3935 			if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3936 				if (cdip &&
3937 				    (i_ddi_node_state(cdip) >=
3938 				    DS_INITIALIZED)) {
3939 					MDI_CLIENT_UNLOCK(ct);
3940 					rv = ndi_devi_offline(cdip,
3941 					    NDI_DEVFS_CLEAN);
3942 					MDI_CLIENT_LOCK(ct);
3943 					if (rv != NDI_SUCCESS) {
3944 						/*
3945 						 * ndi_devi_offline failed.
3946 						 * Reset client flags to
3947 						 * online.
3948 						 */
3949 						MDI_DEBUG(4, (MDI_WARN, cdip,
3950 						    "ndi_devi_offline failed: "
3951 						    "error %x", rv));
3952 						MDI_CLIENT_SET_ONLINE(ct);
3953 					}
3954 				}
3955 			}
3956 			/*
3957 			 * Convert to MDI error code
3958 			 */
3959 			switch (rv) {
3960 			case NDI_SUCCESS:
3961 				rv = MDI_SUCCESS;
3962 				break;
3963 			case NDI_BUSY:
3964 				rv = MDI_BUSY;
3965 				break;
3966 			default:
3967 				rv = MDI_FAILURE;
3968 				break;
3969 			}
3970 		}
3971 		MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3972 		i_mdi_report_path_state(ct, pip);
3973 	}
3974 
3975 	MDI_CLIENT_UNLOCK(ct);
3976 
3977 	/*
3978 	 * Change in the mdi_pathinfo node state will impact the client state
3979 	 */
3980 	MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
3981 	    "ct = %p pip = %p", (void *)ct, (void *)pip));
3982 	return (rv);
3983 }
3984 
3985 /*
3986  * i_mdi_pi_online():
3987  *		Online a mdi_pathinfo node and call the vHCI driver's callback
3988  */
3989 static int
i_mdi_pi_online(mdi_pathinfo_t * pip,int flags)3990 i_mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3991 {
3992 	mdi_vhci_t	*vh = NULL;
3993 	mdi_client_t	*ct = NULL;
3994 	mdi_phci_t	*ph;
3995 	int		(*f)();
3996 	int		rv;
3997 
3998 	MDI_PI_LOCK(pip);
3999 	ph = MDI_PI(pip)->pi_phci;
4000 	vh = ph->ph_vhci;
4001 	ct = MDI_PI(pip)->pi_client;
4002 	MDI_PI_SET_ONLINING(pip)
4003 	MDI_PI_UNLOCK(pip);
4004 	f = vh->vh_ops->vo_pi_state_change;
4005 	if (f != NULL)
4006 		rv = (*f)(vh->vh_dip, pip, MDI_PATHINFO_STATE_ONLINE, 0,
4007 		    flags);
4008 	MDI_CLIENT_LOCK(ct);
4009 	MDI_PI_LOCK(pip);
4010 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
4011 	MDI_PI_UNLOCK(pip);
4012 	if (rv == MDI_SUCCESS) {
4013 		dev_info_t	*cdip = ct->ct_dip;
4014 
4015 		rv = MDI_SUCCESS;
4016 		i_mdi_client_update_state(ct);
4017 		if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL ||
4018 		    MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4019 			if (cdip && !i_ddi_devi_attached(cdip)) {
4020 				MDI_CLIENT_UNLOCK(ct);
4021 				rv = ndi_devi_online(cdip, 0);
4022 				MDI_CLIENT_LOCK(ct);
4023 				if ((rv != NDI_SUCCESS) &&
4024 				    (MDI_CLIENT_STATE(ct) ==
4025 				    MDI_CLIENT_STATE_DEGRADED)) {
4026 					MDI_CLIENT_SET_OFFLINE(ct);
4027 				}
4028 				if (rv != NDI_SUCCESS) {
4029 					/* Reset the path state */
4030 					MDI_PI_LOCK(pip);
4031 					MDI_PI(pip)->pi_state =
4032 					    MDI_PI_OLD_STATE(pip);
4033 					MDI_PI_UNLOCK(pip);
4034 				}
4035 			}
4036 		}
4037 		switch (rv) {
4038 		case NDI_SUCCESS:
4039 			MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
4040 			i_mdi_report_path_state(ct, pip);
4041 			rv = MDI_SUCCESS;
4042 			break;
4043 		case NDI_BUSY:
4044 			rv = MDI_BUSY;
4045 			break;
4046 		default:
4047 			rv = MDI_FAILURE;
4048 			break;
4049 		}
4050 	} else {
4051 		/* Reset the path state */
4052 		MDI_PI_LOCK(pip);
4053 		MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
4054 		MDI_PI_UNLOCK(pip);
4055 	}
4056 	MDI_CLIENT_UNLOCK(ct);
4057 	return (rv);
4058 }
4059 
4060 /*
4061  * mdi_pi_get_node_name():
4062  *              Get the name associated with a mdi_pathinfo node.
4063  *              Since pathinfo nodes are not directly named, we
4064  *              return the node_name of the client.
4065  *
4066  * Return Values:
4067  *              char *
4068  */
4069 char *
mdi_pi_get_node_name(mdi_pathinfo_t * pip)4070 mdi_pi_get_node_name(mdi_pathinfo_t *pip)
4071 {
4072 	mdi_client_t    *ct;
4073 
4074 	if (pip == NULL)
4075 		return (NULL);
4076 	ct = MDI_PI(pip)->pi_client;
4077 	if ((ct == NULL) || (ct->ct_dip == NULL))
4078 		return (NULL);
4079 	return (ddi_node_name(ct->ct_dip));
4080 }
4081 
4082 /*
4083  * mdi_pi_get_addr():
4084  *		Get the unit address associated with a mdi_pathinfo node
4085  *
4086  * Return Values:
4087  *		char *
4088  */
4089 char *
mdi_pi_get_addr(mdi_pathinfo_t * pip)4090 mdi_pi_get_addr(mdi_pathinfo_t *pip)
4091 {
4092 	if (pip == NULL)
4093 		return (NULL);
4094 
4095 	return (MDI_PI(pip)->pi_addr);
4096 }
4097 
4098 /*
4099  * mdi_pi_get_path_instance():
4100  *		Get the 'path_instance' of a mdi_pathinfo node
4101  *
4102  * Return Values:
4103  *		path_instance
4104  */
4105 int
mdi_pi_get_path_instance(mdi_pathinfo_t * pip)4106 mdi_pi_get_path_instance(mdi_pathinfo_t *pip)
4107 {
4108 	if (pip == NULL)
4109 		return (0);
4110 
4111 	return (MDI_PI(pip)->pi_path_instance);
4112 }
4113 
4114 /*
4115  * mdi_pi_pathname():
4116  *		Return pointer to path to pathinfo node.
4117  */
4118 char *
mdi_pi_pathname(mdi_pathinfo_t * pip)4119 mdi_pi_pathname(mdi_pathinfo_t *pip)
4120 {
4121 	if (pip == NULL)
4122 		return (NULL);
4123 	return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip)));
4124 }
4125 
4126 /*
4127  * mdi_pi_spathname():
4128  *		Return pointer to shortpath to pathinfo node. Used for debug
4129  *		messages, so return "" instead of NULL when unknown.
4130  */
4131 char *
mdi_pi_spathname(mdi_pathinfo_t * pip)4132 mdi_pi_spathname(mdi_pathinfo_t *pip)
4133 {
4134 	char	*spath = "";
4135 
4136 	if (pip) {
4137 		spath = mdi_pi_spathname_by_instance(
4138 		    mdi_pi_get_path_instance(pip));
4139 		if (spath == NULL)
4140 			spath = "";
4141 	}
4142 	return (spath);
4143 }
4144 
4145 char *
mdi_pi_pathname_obp(mdi_pathinfo_t * pip,char * path)4146 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path)
4147 {
4148 	char *obp_path = NULL;
4149 	if ((pip == NULL) || (path == NULL))
4150 		return (NULL);
4151 
4152 	if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) {
4153 		(void) strcpy(path, obp_path);
4154 		(void) mdi_prop_free(obp_path);
4155 	} else {
4156 		path = NULL;
4157 	}
4158 	return (path);
4159 }
4160 
4161 int
mdi_pi_pathname_obp_set(mdi_pathinfo_t * pip,char * component)4162 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component)
4163 {
4164 	dev_info_t *pdip;
4165 	char *obp_path = NULL;
4166 	int rc = MDI_FAILURE;
4167 
4168 	if (pip == NULL)
4169 		return (MDI_FAILURE);
4170 
4171 	pdip = mdi_pi_get_phci(pip);
4172 	if (pdip == NULL)
4173 		return (MDI_FAILURE);
4174 
4175 	obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4176 
4177 	if (ddi_pathname_obp(pdip, obp_path) == NULL) {
4178 		(void) ddi_pathname(pdip, obp_path);
4179 	}
4180 
4181 	if (component) {
4182 		(void) strncat(obp_path, "/", MAXPATHLEN);
4183 		(void) strncat(obp_path, component, MAXPATHLEN);
4184 	}
4185 	rc = mdi_prop_update_string(pip, "obp-path", obp_path);
4186 
4187 	if (obp_path)
4188 		kmem_free(obp_path, MAXPATHLEN);
4189 	return (rc);
4190 }
4191 
4192 /*
4193  * mdi_pi_get_client():
4194  *		Get the client devinfo associated with a mdi_pathinfo node
4195  *
4196  * Return Values:
4197  *		Handle to client device dev_info node
4198  */
4199 dev_info_t *
mdi_pi_get_client(mdi_pathinfo_t * pip)4200 mdi_pi_get_client(mdi_pathinfo_t *pip)
4201 {
4202 	dev_info_t	*dip = NULL;
4203 	if (pip) {
4204 		dip = MDI_PI(pip)->pi_client->ct_dip;
4205 	}
4206 	return (dip);
4207 }
4208 
4209 /*
4210  * mdi_pi_get_phci():
4211  *		Get the pHCI devinfo associated with the mdi_pathinfo node
4212  * Return Values:
4213  *		Handle to dev_info node
4214  */
4215 dev_info_t *
mdi_pi_get_phci(mdi_pathinfo_t * pip)4216 mdi_pi_get_phci(mdi_pathinfo_t *pip)
4217 {
4218 	dev_info_t	*dip = NULL;
4219 	mdi_phci_t	*ph;
4220 
4221 	if (pip) {
4222 		ph = MDI_PI(pip)->pi_phci;
4223 		if (ph)
4224 			dip = ph->ph_dip;
4225 	}
4226 	return (dip);
4227 }
4228 
4229 /*
4230  * mdi_pi_get_client_private():
4231  *		Get the client private information associated with the
4232  *		mdi_pathinfo node
4233  */
4234 void *
mdi_pi_get_client_private(mdi_pathinfo_t * pip)4235 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
4236 {
4237 	void *cprivate = NULL;
4238 	if (pip) {
4239 		cprivate = MDI_PI(pip)->pi_cprivate;
4240 	}
4241 	return (cprivate);
4242 }
4243 
4244 /*
4245  * mdi_pi_set_client_private():
4246  *		Set the client private information in the mdi_pathinfo node
4247  */
4248 void
mdi_pi_set_client_private(mdi_pathinfo_t * pip,void * priv)4249 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
4250 {
4251 	if (pip) {
4252 		MDI_PI(pip)->pi_cprivate = priv;
4253 	}
4254 }
4255 
4256 /*
4257  * mdi_pi_get_phci_private():
4258  *		Get the pHCI private information associated with the
4259  *		mdi_pathinfo node
4260  */
4261 caddr_t
mdi_pi_get_phci_private(mdi_pathinfo_t * pip)4262 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
4263 {
4264 	caddr_t	pprivate = NULL;
4265 
4266 	if (pip) {
4267 		pprivate = MDI_PI(pip)->pi_pprivate;
4268 	}
4269 	return (pprivate);
4270 }
4271 
4272 /*
4273  * mdi_pi_set_phci_private():
4274  *		Set the pHCI private information in the mdi_pathinfo node
4275  */
4276 void
mdi_pi_set_phci_private(mdi_pathinfo_t * pip,caddr_t priv)4277 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
4278 {
4279 	if (pip) {
4280 		MDI_PI(pip)->pi_pprivate = priv;
4281 	}
4282 }
4283 
4284 /*
4285  * mdi_pi_get_state():
4286  *		Get the mdi_pathinfo node state. Transient states are internal
4287  *		and not provided to the users
4288  */
4289 mdi_pathinfo_state_t
mdi_pi_get_state(mdi_pathinfo_t * pip)4290 mdi_pi_get_state(mdi_pathinfo_t *pip)
4291 {
4292 	mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
4293 
4294 	if (pip) {
4295 		if (MDI_PI_IS_TRANSIENT(pip)) {
4296 			/*
4297 			 * mdi_pathinfo is in state transition.  Return the
4298 			 * last good state.
4299 			 */
4300 			state = MDI_PI_OLD_STATE(pip);
4301 		} else {
4302 			state = MDI_PI_STATE(pip);
4303 		}
4304 	}
4305 	return (state);
4306 }
4307 
4308 /*
4309  * mdi_pi_get_flags():
4310  *		Get the mdi_pathinfo node flags.
4311  */
4312 uint_t
mdi_pi_get_flags(mdi_pathinfo_t * pip)4313 mdi_pi_get_flags(mdi_pathinfo_t *pip)
4314 {
4315 	return (pip ? MDI_PI(pip)->pi_flags : 0);
4316 }
4317 
4318 /*
4319  * Note that the following function needs to be the new interface for
4320  * mdi_pi_get_state when mpxio gets integrated to ON.
4321  */
4322 int
mdi_pi_get_state2(mdi_pathinfo_t * pip,mdi_pathinfo_state_t * state,uint32_t * ext_state)4323 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
4324 		uint32_t *ext_state)
4325 {
4326 	*state = MDI_PATHINFO_STATE_INIT;
4327 
4328 	if (pip) {
4329 		if (MDI_PI_IS_TRANSIENT(pip)) {
4330 			/*
4331 			 * mdi_pathinfo is in state transition.  Return the
4332 			 * last good state.
4333 			 */
4334 			*state = MDI_PI_OLD_STATE(pip);
4335 			*ext_state = MDI_PI_OLD_EXT_STATE(pip);
4336 		} else {
4337 			*state = MDI_PI_STATE(pip);
4338 			*ext_state = MDI_PI_EXT_STATE(pip);
4339 		}
4340 	}
4341 	return (MDI_SUCCESS);
4342 }
4343 
4344 /*
4345  * mdi_pi_get_preferred:
4346  *	Get the preferred path flag
4347  */
4348 int
mdi_pi_get_preferred(mdi_pathinfo_t * pip)4349 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
4350 {
4351 	if (pip) {
4352 		return (MDI_PI(pip)->pi_preferred);
4353 	}
4354 	return (0);
4355 }
4356 
4357 /*
4358  * mdi_pi_set_preferred:
4359  *	Set the preferred path flag
4360  */
4361 void
mdi_pi_set_preferred(mdi_pathinfo_t * pip,int preferred)4362 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
4363 {
4364 	if (pip) {
4365 		MDI_PI(pip)->pi_preferred = preferred;
4366 	}
4367 }
4368 
4369 /*
4370  * mdi_pi_set_state():
4371  *		Set the mdi_pathinfo node state
4372  */
4373 void
mdi_pi_set_state(mdi_pathinfo_t * pip,mdi_pathinfo_state_t state)4374 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
4375 {
4376 	uint32_t	ext_state;
4377 
4378 	if (pip) {
4379 		ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
4380 		MDI_PI(pip)->pi_state = state;
4381 		MDI_PI(pip)->pi_state |= ext_state;
4382 
4383 		/* Path has changed state, invalidate DINFOCACHE snap shot. */
4384 		i_ddi_di_cache_invalidate();
4385 	}
4386 }
4387 
4388 /*
4389  * Property functions:
4390  */
4391 int
i_map_nvlist_error_to_mdi(int val)4392 i_map_nvlist_error_to_mdi(int val)
4393 {
4394 	int rv;
4395 
4396 	switch (val) {
4397 	case 0:
4398 		rv = DDI_PROP_SUCCESS;
4399 		break;
4400 	case EINVAL:
4401 	case ENOTSUP:
4402 		rv = DDI_PROP_INVAL_ARG;
4403 		break;
4404 	case ENOMEM:
4405 		rv = DDI_PROP_NO_MEMORY;
4406 		break;
4407 	default:
4408 		rv = DDI_PROP_NOT_FOUND;
4409 		break;
4410 	}
4411 	return (rv);
4412 }
4413 
4414 /*
4415  * mdi_pi_get_next_prop():
4416  * 		Property walk function.  The caller should hold mdi_pi_lock()
4417  *		and release by calling mdi_pi_unlock() at the end of walk to
4418  *		get a consistent value.
4419  */
4420 nvpair_t *
mdi_pi_get_next_prop(mdi_pathinfo_t * pip,nvpair_t * prev)4421 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
4422 {
4423 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4424 		return (NULL);
4425 	}
4426 	ASSERT(MDI_PI_LOCKED(pip));
4427 	return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
4428 }
4429 
4430 /*
4431  * mdi_prop_remove():
4432  * 		Remove the named property from the named list.
4433  */
4434 int
mdi_prop_remove(mdi_pathinfo_t * pip,char * name)4435 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
4436 {
4437 	if (pip == NULL) {
4438 		return (DDI_PROP_NOT_FOUND);
4439 	}
4440 	ASSERT(!MDI_PI_LOCKED(pip));
4441 	MDI_PI_LOCK(pip);
4442 	if (MDI_PI(pip)->pi_prop == NULL) {
4443 		MDI_PI_UNLOCK(pip);
4444 		return (DDI_PROP_NOT_FOUND);
4445 	}
4446 	if (name) {
4447 		(void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
4448 	} else {
4449 		char		nvp_name[MAXNAMELEN];
4450 		nvpair_t	*nvp;
4451 		nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
4452 		while (nvp) {
4453 			nvpair_t	*next;
4454 			next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
4455 			(void) snprintf(nvp_name, sizeof(nvp_name), "%s",
4456 			    nvpair_name(nvp));
4457 			(void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
4458 			    nvp_name);
4459 			nvp = next;
4460 		}
4461 	}
4462 	MDI_PI_UNLOCK(pip);
4463 	return (DDI_PROP_SUCCESS);
4464 }
4465 
4466 /*
4467  * mdi_prop_size():
4468  * 		Get buffer size needed to pack the property data.
4469  * 		Caller should hold the mdi_pathinfo_t lock to get a consistent
4470  *		buffer size.
4471  */
4472 int
mdi_prop_size(mdi_pathinfo_t * pip,size_t * buflenp)4473 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
4474 {
4475 	int	rv;
4476 	size_t	bufsize;
4477 
4478 	*buflenp = 0;
4479 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4480 		return (DDI_PROP_NOT_FOUND);
4481 	}
4482 	ASSERT(MDI_PI_LOCKED(pip));
4483 	rv = nvlist_size(MDI_PI(pip)->pi_prop,
4484 	    &bufsize, NV_ENCODE_NATIVE);
4485 	*buflenp = bufsize;
4486 	return (i_map_nvlist_error_to_mdi(rv));
4487 }
4488 
4489 /*
4490  * mdi_prop_pack():
4491  * 		pack the property list.  The caller should hold the
4492  *		mdi_pathinfo_t node to get a consistent data
4493  */
4494 int
mdi_prop_pack(mdi_pathinfo_t * pip,char ** bufp,uint_t buflen)4495 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
4496 {
4497 	int	rv;
4498 	size_t	bufsize;
4499 
4500 	if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
4501 		return (DDI_PROP_NOT_FOUND);
4502 	}
4503 
4504 	ASSERT(MDI_PI_LOCKED(pip));
4505 
4506 	bufsize = buflen;
4507 	rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
4508 	    NV_ENCODE_NATIVE, KM_SLEEP);
4509 
4510 	return (i_map_nvlist_error_to_mdi(rv));
4511 }
4512 
4513 /*
4514  * mdi_prop_update_byte():
4515  *		Create/Update a byte property
4516  */
4517 int
mdi_prop_update_byte(mdi_pathinfo_t * pip,char * name,uchar_t data)4518 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
4519 {
4520 	int rv;
4521 
4522 	if (pip == NULL) {
4523 		return (DDI_PROP_INVAL_ARG);
4524 	}
4525 	ASSERT(!MDI_PI_LOCKED(pip));
4526 	MDI_PI_LOCK(pip);
4527 	if (MDI_PI(pip)->pi_prop == NULL) {
4528 		MDI_PI_UNLOCK(pip);
4529 		return (DDI_PROP_NOT_FOUND);
4530 	}
4531 	rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
4532 	MDI_PI_UNLOCK(pip);
4533 	return (i_map_nvlist_error_to_mdi(rv));
4534 }
4535 
4536 /*
4537  * mdi_prop_update_byte_array():
4538  *		Create/Update a byte array property
4539  */
4540 int
mdi_prop_update_byte_array(mdi_pathinfo_t * pip,char * name,uchar_t * data,uint_t nelements)4541 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
4542     uint_t nelements)
4543 {
4544 	int rv;
4545 
4546 	if (pip == NULL) {
4547 		return (DDI_PROP_INVAL_ARG);
4548 	}
4549 	ASSERT(!MDI_PI_LOCKED(pip));
4550 	MDI_PI_LOCK(pip);
4551 	if (MDI_PI(pip)->pi_prop == NULL) {
4552 		MDI_PI_UNLOCK(pip);
4553 		return (DDI_PROP_NOT_FOUND);
4554 	}
4555 	rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
4556 	MDI_PI_UNLOCK(pip);
4557 	return (i_map_nvlist_error_to_mdi(rv));
4558 }
4559 
4560 /*
4561  * mdi_prop_update_int():
4562  *		Create/Update a 32 bit integer property
4563  */
4564 int
mdi_prop_update_int(mdi_pathinfo_t * pip,char * name,int data)4565 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
4566 {
4567 	int rv;
4568 
4569 	if (pip == NULL) {
4570 		return (DDI_PROP_INVAL_ARG);
4571 	}
4572 	ASSERT(!MDI_PI_LOCKED(pip));
4573 	MDI_PI_LOCK(pip);
4574 	if (MDI_PI(pip)->pi_prop == NULL) {
4575 		MDI_PI_UNLOCK(pip);
4576 		return (DDI_PROP_NOT_FOUND);
4577 	}
4578 	rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
4579 	MDI_PI_UNLOCK(pip);
4580 	return (i_map_nvlist_error_to_mdi(rv));
4581 }
4582 
4583 /*
4584  * mdi_prop_update_int64():
4585  *		Create/Update a 64 bit integer property
4586  */
4587 int
mdi_prop_update_int64(mdi_pathinfo_t * pip,char * name,int64_t data)4588 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
4589 {
4590 	int rv;
4591 
4592 	if (pip == NULL) {
4593 		return (DDI_PROP_INVAL_ARG);
4594 	}
4595 	ASSERT(!MDI_PI_LOCKED(pip));
4596 	MDI_PI_LOCK(pip);
4597 	if (MDI_PI(pip)->pi_prop == NULL) {
4598 		MDI_PI_UNLOCK(pip);
4599 		return (DDI_PROP_NOT_FOUND);
4600 	}
4601 	rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
4602 	MDI_PI_UNLOCK(pip);
4603 	return (i_map_nvlist_error_to_mdi(rv));
4604 }
4605 
4606 /*
4607  * mdi_prop_update_int_array():
4608  *		Create/Update a int array property
4609  */
4610 int
mdi_prop_update_int_array(mdi_pathinfo_t * pip,char * name,int * data,uint_t nelements)4611 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
4612 	    uint_t nelements)
4613 {
4614 	int rv;
4615 
4616 	if (pip == NULL) {
4617 		return (DDI_PROP_INVAL_ARG);
4618 	}
4619 	ASSERT(!MDI_PI_LOCKED(pip));
4620 	MDI_PI_LOCK(pip);
4621 	if (MDI_PI(pip)->pi_prop == NULL) {
4622 		MDI_PI_UNLOCK(pip);
4623 		return (DDI_PROP_NOT_FOUND);
4624 	}
4625 	rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4626 	    nelements);
4627 	MDI_PI_UNLOCK(pip);
4628 	return (i_map_nvlist_error_to_mdi(rv));
4629 }
4630 
4631 /*
4632  * mdi_prop_update_string():
4633  *		Create/Update a string property
4634  */
4635 int
mdi_prop_update_string(mdi_pathinfo_t * pip,char * name,char * data)4636 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4637 {
4638 	int rv;
4639 
4640 	if (pip == NULL) {
4641 		return (DDI_PROP_INVAL_ARG);
4642 	}
4643 	ASSERT(!MDI_PI_LOCKED(pip));
4644 	MDI_PI_LOCK(pip);
4645 	if (MDI_PI(pip)->pi_prop == NULL) {
4646 		MDI_PI_UNLOCK(pip);
4647 		return (DDI_PROP_NOT_FOUND);
4648 	}
4649 	rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4650 	MDI_PI_UNLOCK(pip);
4651 	return (i_map_nvlist_error_to_mdi(rv));
4652 }
4653 
4654 /*
4655  * mdi_prop_update_string_array():
4656  *		Create/Update a string array property
4657  */
4658 int
mdi_prop_update_string_array(mdi_pathinfo_t * pip,char * name,char ** data,uint_t nelements)4659 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4660     uint_t nelements)
4661 {
4662 	int rv;
4663 
4664 	if (pip == NULL) {
4665 		return (DDI_PROP_INVAL_ARG);
4666 	}
4667 	ASSERT(!MDI_PI_LOCKED(pip));
4668 	MDI_PI_LOCK(pip);
4669 	if (MDI_PI(pip)->pi_prop == NULL) {
4670 		MDI_PI_UNLOCK(pip);
4671 		return (DDI_PROP_NOT_FOUND);
4672 	}
4673 	rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4674 	    nelements);
4675 	MDI_PI_UNLOCK(pip);
4676 	return (i_map_nvlist_error_to_mdi(rv));
4677 }
4678 
4679 /*
4680  * mdi_prop_lookup_byte():
4681  * 		Look for byte property identified by name.  The data returned
4682  *		is the actual property and valid as long as mdi_pathinfo_t node
4683  *		is alive.
4684  */
4685 int
mdi_prop_lookup_byte(mdi_pathinfo_t * pip,char * name,uchar_t * data)4686 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4687 {
4688 	int rv;
4689 
4690 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4691 		return (DDI_PROP_NOT_FOUND);
4692 	}
4693 	rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4694 	return (i_map_nvlist_error_to_mdi(rv));
4695 }
4696 
4697 
4698 /*
4699  * mdi_prop_lookup_byte_array():
4700  * 		Look for byte array property identified by name.  The data
4701  *		returned is the actual property and valid as long as
4702  *		mdi_pathinfo_t node is alive.
4703  */
4704 int
mdi_prop_lookup_byte_array(mdi_pathinfo_t * pip,char * name,uchar_t ** data,uint_t * nelements)4705 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4706     uint_t *nelements)
4707 {
4708 	int rv;
4709 
4710 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4711 		return (DDI_PROP_NOT_FOUND);
4712 	}
4713 	rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4714 	    nelements);
4715 	return (i_map_nvlist_error_to_mdi(rv));
4716 }
4717 
4718 /*
4719  * mdi_prop_lookup_int():
4720  * 		Look for int property identified by name.  The data returned
4721  *		is the actual property and valid as long as mdi_pathinfo_t
4722  *		node is alive.
4723  */
4724 int
mdi_prop_lookup_int(mdi_pathinfo_t * pip,char * name,int * data)4725 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4726 {
4727 	int rv;
4728 
4729 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4730 		return (DDI_PROP_NOT_FOUND);
4731 	}
4732 	rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4733 	return (i_map_nvlist_error_to_mdi(rv));
4734 }
4735 
4736 /*
4737  * mdi_prop_lookup_int64():
4738  * 		Look for int64 property identified by name.  The data returned
4739  *		is the actual property and valid as long as mdi_pathinfo_t node
4740  *		is alive.
4741  */
4742 int
mdi_prop_lookup_int64(mdi_pathinfo_t * pip,char * name,int64_t * data)4743 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4744 {
4745 	int rv;
4746 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4747 		return (DDI_PROP_NOT_FOUND);
4748 	}
4749 	rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4750 	return (i_map_nvlist_error_to_mdi(rv));
4751 }
4752 
4753 /*
4754  * mdi_prop_lookup_int_array():
4755  * 		Look for int array property identified by name.  The data
4756  *		returned is the actual property and valid as long as
4757  *		mdi_pathinfo_t node is alive.
4758  */
4759 int
mdi_prop_lookup_int_array(mdi_pathinfo_t * pip,char * name,int ** data,uint_t * nelements)4760 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4761     uint_t *nelements)
4762 {
4763 	int rv;
4764 
4765 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4766 		return (DDI_PROP_NOT_FOUND);
4767 	}
4768 	rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4769 	    (int32_t **)data, nelements);
4770 	return (i_map_nvlist_error_to_mdi(rv));
4771 }
4772 
4773 /*
4774  * mdi_prop_lookup_string():
4775  * 		Look for string property identified by name.  The data
4776  *		returned is the actual property and valid as long as
4777  *		mdi_pathinfo_t node is alive.
4778  */
4779 int
mdi_prop_lookup_string(mdi_pathinfo_t * pip,char * name,char ** data)4780 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4781 {
4782 	int rv;
4783 
4784 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4785 		return (DDI_PROP_NOT_FOUND);
4786 	}
4787 	rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4788 	return (i_map_nvlist_error_to_mdi(rv));
4789 }
4790 
4791 /*
4792  * mdi_prop_lookup_string_array():
4793  * 		Look for string array property identified by name.  The data
4794  *		returned is the actual property and valid as long as
4795  *		mdi_pathinfo_t node is alive.
4796  */
4797 int
mdi_prop_lookup_string_array(mdi_pathinfo_t * pip,char * name,char *** data,uint_t * nelements)4798 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4799     uint_t *nelements)
4800 {
4801 	int rv;
4802 
4803 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4804 		return (DDI_PROP_NOT_FOUND);
4805 	}
4806 	rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4807 	    nelements);
4808 	return (i_map_nvlist_error_to_mdi(rv));
4809 }
4810 
4811 /*
4812  * mdi_prop_free():
4813  * 		Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4814  *		functions return the pointer to actual property data and not a
4815  *		copy of it.  So the data returned is valid as long as
4816  *		mdi_pathinfo_t node is valid.
4817  */
4818 /*ARGSUSED*/
4819 int
mdi_prop_free(void * data)4820 mdi_prop_free(void *data)
4821 {
4822 	return (DDI_PROP_SUCCESS);
4823 }
4824 
4825 /*ARGSUSED*/
4826 static void
i_mdi_report_path_state(mdi_client_t * ct,mdi_pathinfo_t * pip)4827 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4828 {
4829 	char		*ct_path;
4830 	char		*ct_status;
4831 	char		*status;
4832 	dev_info_t	*cdip = ct->ct_dip;
4833 	char		lb_buf[64];
4834 	int		report_lb_c = 0, report_lb_p = 0;
4835 
4836 	ASSERT(MDI_CLIENT_LOCKED(ct));
4837 	if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) ||
4838 	    (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4839 		return;
4840 	}
4841 	if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4842 		ct_status = "optimal";
4843 		report_lb_c = 1;
4844 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4845 		ct_status = "degraded";
4846 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4847 		ct_status = "failed";
4848 	} else {
4849 		ct_status = "unknown";
4850 	}
4851 
4852 	lb_buf[0] = 0;		/* not interested in load balancing config */
4853 
4854 	if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) {
4855 		status = "removed";
4856 	} else if (MDI_PI_IS_OFFLINE(pip)) {
4857 		status = "offline";
4858 	} else if (MDI_PI_IS_ONLINE(pip)) {
4859 		status = "online";
4860 		report_lb_p = 1;
4861 	} else if (MDI_PI_IS_STANDBY(pip)) {
4862 		status = "standby";
4863 	} else if (MDI_PI_IS_FAULT(pip)) {
4864 		status = "faulted";
4865 	} else {
4866 		status = "unknown";
4867 	}
4868 
4869 	if (cdip) {
4870 		ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4871 
4872 		/*
4873 		 * NOTE: Keeping "multipath status: %s" and
4874 		 * "Load balancing: %s" format unchanged in case someone
4875 		 * scrubs /var/adm/messages looking for these messages.
4876 		 */
4877 		if (report_lb_c && report_lb_p) {
4878 			if (ct->ct_lb == LOAD_BALANCE_LBA) {
4879 				(void) snprintf(lb_buf, sizeof (lb_buf),
4880 				    "%s, region-size: %d", mdi_load_balance_lba,
4881 				    ct->ct_lb_args->region_size);
4882 			} else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4883 				(void) snprintf(lb_buf, sizeof (lb_buf),
4884 				    "%s", mdi_load_balance_none);
4885 			} else {
4886 				(void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4887 				    mdi_load_balance_rr);
4888 			}
4889 
4890 			cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4891 			    "?%s (%s%d) multipath status: %s: "
4892 			    "path %d %s is %s: Load balancing: %s\n",
4893 			    ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4894 			    ddi_get_instance(cdip), ct_status,
4895 			    mdi_pi_get_path_instance(pip),
4896 			    mdi_pi_spathname(pip), status, lb_buf);
4897 		} else {
4898 			cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4899 			    "?%s (%s%d) multipath status: %s: "
4900 			    "path %d %s is %s\n",
4901 			    ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4902 			    ddi_get_instance(cdip), ct_status,
4903 			    mdi_pi_get_path_instance(pip),
4904 			    mdi_pi_spathname(pip), status);
4905 		}
4906 
4907 		kmem_free(ct_path, MAXPATHLEN);
4908 		MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4909 	}
4910 }
4911 
4912 #ifdef	DEBUG
4913 /*
4914  * i_mdi_log():
4915  *		Utility function for error message management
4916  *
4917  *		NOTE: Implementation takes care of trailing \n for cmn_err,
4918  *		MDI_DEBUG should not terminate fmt strings with \n.
4919  *
4920  *		NOTE: If the level is >= 2, and there is no leading !?^
4921  *		then a leading ! is implied (but can be overriden via
4922  *		mdi_debug_consoleonly). If you are using kmdb on the console,
4923  *		consider setting mdi_debug_consoleonly to 1 as an aid.
4924  */
4925 /*PRINTFLIKE4*/
4926 static void
i_mdi_log(int level,const char * func,dev_info_t * dip,const char * fmt,...)4927 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...)
4928 {
4929 	char		name[MAXNAMELEN];
4930 	char		buf[512];
4931 	char		*bp;
4932 	va_list		ap;
4933 	int		log_only = 0;
4934 	int		boot_only = 0;
4935 	int		console_only = 0;
4936 
4937 	if (dip) {
4938 		(void) snprintf(name, sizeof(name), "%s%d: ",
4939 		    ddi_driver_name(dip), ddi_get_instance(dip));
4940 	} else {
4941 		name[0] = 0;
4942 	}
4943 
4944 	va_start(ap, fmt);
4945 	(void) vsnprintf(buf, sizeof(buf), fmt, ap);
4946 	va_end(ap);
4947 
4948 	switch (buf[0]) {
4949 	case '!':
4950 		bp = &buf[1];
4951 		log_only = 1;
4952 		break;
4953 	case '?':
4954 		bp = &buf[1];
4955 		boot_only = 1;
4956 		break;
4957 	case '^':
4958 		bp = &buf[1];
4959 		console_only = 1;
4960 		break;
4961 	default:
4962 		if (level >= 2)
4963 			log_only = 1;		/* ! implied */
4964 		bp = buf;
4965 		break;
4966 	}
4967 	if (mdi_debug_logonly) {
4968 		log_only = 1;
4969 		boot_only = 0;
4970 		console_only = 0;
4971 	}
4972 	if (mdi_debug_consoleonly) {
4973 		log_only = 0;
4974 		boot_only = 0;
4975 		console_only = 1;
4976 		level = CE_NOTE;
4977 		goto console;
4978 	}
4979 
4980 	switch (level) {
4981 	case CE_NOTE:
4982 		level = CE_CONT;
4983 		/* FALLTHROUGH */
4984 	case CE_CONT:
4985 		if (boot_only) {
4986 			cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp);
4987 		} else if (console_only) {
4988 			cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp);
4989 		} else if (log_only) {
4990 			cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp);
4991 		} else {
4992 			cmn_err(level, "mdi: %s%s: %s\n", name, func, bp);
4993 		}
4994 		break;
4995 
4996 	case CE_WARN:
4997 	case CE_PANIC:
4998 	console:
4999 		if (boot_only) {
5000 			cmn_err(level, "?mdi: %s%s: %s", name, func, bp);
5001 		} else if (console_only) {
5002 			cmn_err(level, "^mdi: %s%s: %s", name, func, bp);
5003 		} else if (log_only) {
5004 			cmn_err(level, "!mdi: %s%s: %s", name, func, bp);
5005 		} else {
5006 			cmn_err(level, "mdi: %s%s: %s", name, func, bp);
5007 		}
5008 		break;
5009 	default:
5010 		cmn_err(level, "mdi: %s%s", name, bp);
5011 		break;
5012 	}
5013 }
5014 #endif	/* DEBUG */
5015 
5016 void
i_mdi_client_online(dev_info_t * ct_dip)5017 i_mdi_client_online(dev_info_t *ct_dip)
5018 {
5019 	mdi_client_t	*ct;
5020 
5021 	/*
5022 	 * Client online notification. Mark client state as online
5023 	 * restore our binding with dev_info node
5024 	 */
5025 	ct = i_devi_get_client(ct_dip);
5026 	ASSERT(ct != NULL);
5027 	MDI_CLIENT_LOCK(ct);
5028 	MDI_CLIENT_SET_ONLINE(ct);
5029 	/* catch for any memory leaks */
5030 	ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
5031 	ct->ct_dip = ct_dip;
5032 
5033 	if (ct->ct_power_cnt == 0)
5034 		(void) i_mdi_power_all_phci(ct);
5035 
5036 	MDI_DEBUG(4, (MDI_NOTE, ct_dip,
5037 	    "i_mdi_pm_hold_client %p", (void *)ct));
5038 	i_mdi_pm_hold_client(ct, 1);
5039 
5040 	MDI_CLIENT_UNLOCK(ct);
5041 }
5042 
5043 void
i_mdi_phci_online(dev_info_t * ph_dip)5044 i_mdi_phci_online(dev_info_t *ph_dip)
5045 {
5046 	mdi_phci_t	*ph;
5047 
5048 	/* pHCI online notification. Mark state accordingly */
5049 	ph = i_devi_get_phci(ph_dip);
5050 	ASSERT(ph != NULL);
5051 	MDI_PHCI_LOCK(ph);
5052 	MDI_PHCI_SET_ONLINE(ph);
5053 	MDI_PHCI_UNLOCK(ph);
5054 }
5055 
5056 /*
5057  * mdi_devi_online():
5058  * 		Online notification from NDI framework on pHCI/client
5059  *		device online.
5060  * Return Values:
5061  *		NDI_SUCCESS
5062  *		MDI_FAILURE
5063  */
5064 /*ARGSUSED*/
5065 int
mdi_devi_online(dev_info_t * dip,uint_t flags)5066 mdi_devi_online(dev_info_t *dip, uint_t flags)
5067 {
5068 	if (MDI_PHCI(dip)) {
5069 		i_mdi_phci_online(dip);
5070 	}
5071 
5072 	if (MDI_CLIENT(dip)) {
5073 		i_mdi_client_online(dip);
5074 	}
5075 	return (NDI_SUCCESS);
5076 }
5077 
5078 /*
5079  * mdi_devi_offline():
5080  * 		Offline notification from NDI framework on pHCI/Client device
5081  *		offline.
5082  *
5083  * Return Values:
5084  *		NDI_SUCCESS
5085  *		NDI_FAILURE
5086  */
5087 /*ARGSUSED*/
5088 int
mdi_devi_offline(dev_info_t * dip,uint_t flags)5089 mdi_devi_offline(dev_info_t *dip, uint_t flags)
5090 {
5091 	int		rv = NDI_SUCCESS;
5092 
5093 	if (MDI_CLIENT(dip)) {
5094 		rv = i_mdi_client_offline(dip, flags);
5095 		if (rv != NDI_SUCCESS)
5096 			return (rv);
5097 	}
5098 
5099 	if (MDI_PHCI(dip)) {
5100 		rv = i_mdi_phci_offline(dip, flags);
5101 
5102 		if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
5103 			/* set client back online */
5104 			i_mdi_client_online(dip);
5105 		}
5106 	}
5107 
5108 	return (rv);
5109 }
5110 
5111 /*ARGSUSED*/
5112 static int
i_mdi_phci_offline(dev_info_t * dip,uint_t flags)5113 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
5114 {
5115 	int		rv = NDI_SUCCESS;
5116 	mdi_phci_t	*ph;
5117 	mdi_client_t	*ct;
5118 	mdi_pathinfo_t	*pip;
5119 	mdi_pathinfo_t	*next;
5120 	mdi_pathinfo_t	*failed_pip = NULL;
5121 	dev_info_t	*cdip;
5122 
5123 	/*
5124 	 * pHCI component offline notification
5125 	 * Make sure that this pHCI instance is free to be offlined.
5126 	 * If it is OK to proceed, Offline and remove all the child
5127 	 * mdi_pathinfo nodes.  This process automatically offlines
5128 	 * corresponding client devices, for which this pHCI provides
5129 	 * critical services.
5130 	 */
5131 	ph = i_devi_get_phci(dip);
5132 	MDI_DEBUG(2, (MDI_NOTE, dip,
5133 	    "called %p %p", (void *)dip, (void *)ph));
5134 	if (ph == NULL) {
5135 		return (rv);
5136 	}
5137 
5138 	MDI_PHCI_LOCK(ph);
5139 
5140 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5141 		MDI_DEBUG(1, (MDI_WARN, dip,
5142 		    "!pHCI already offlined: %p", (void *)dip));
5143 		MDI_PHCI_UNLOCK(ph);
5144 		return (NDI_SUCCESS);
5145 	}
5146 
5147 	/*
5148 	 * Check to see if the pHCI can be offlined
5149 	 */
5150 	if (ph->ph_unstable) {
5151 		MDI_DEBUG(1, (MDI_WARN, dip,
5152 		    "!One or more target devices are in transient state. "
5153 		    "This device can not be removed at this moment. "
5154 		    "Please try again later."));
5155 		MDI_PHCI_UNLOCK(ph);
5156 		return (NDI_BUSY);
5157 	}
5158 
5159 	pip = ph->ph_path_head;
5160 	while (pip != NULL) {
5161 		MDI_PI_LOCK(pip);
5162 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5163 
5164 		/*
5165 		 * The mdi_pathinfo state is OK. Check the client state.
5166 		 * If failover in progress fail the pHCI from offlining
5167 		 */
5168 		ct = MDI_PI(pip)->pi_client;
5169 		i_mdi_client_lock(ct, pip);
5170 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5171 		    (ct->ct_unstable)) {
5172 			/*
5173 			 * Failover is in progress, Fail the DR
5174 			 */
5175 			MDI_DEBUG(1, (MDI_WARN, dip,
5176 			    "!pHCI device is busy. "
5177 			    "This device can not be removed at this moment. "
5178 			    "Please try again later."));
5179 			MDI_PI_UNLOCK(pip);
5180 			i_mdi_client_unlock(ct);
5181 			MDI_PHCI_UNLOCK(ph);
5182 			return (NDI_BUSY);
5183 		}
5184 		MDI_PI_UNLOCK(pip);
5185 
5186 		/*
5187 		 * Check to see of we are removing the last path of this
5188 		 * client device...
5189 		 */
5190 		cdip = ct->ct_dip;
5191 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5192 		    (i_mdi_client_compute_state(ct, ph) ==
5193 		    MDI_CLIENT_STATE_FAILED)) {
5194 			i_mdi_client_unlock(ct);
5195 			MDI_PHCI_UNLOCK(ph);
5196 			if (ndi_devi_offline(cdip,
5197 			    NDI_DEVFS_CLEAN) != NDI_SUCCESS) {
5198 				/*
5199 				 * ndi_devi_offline() failed.
5200 				 * This pHCI provides the critical path
5201 				 * to one or more client devices.
5202 				 * Return busy.
5203 				 */
5204 				MDI_PHCI_LOCK(ph);
5205 				MDI_DEBUG(1, (MDI_WARN, dip,
5206 				    "!pHCI device is busy. "
5207 				    "This device can not be removed at this "
5208 				    "moment. Please try again later."));
5209 				failed_pip = pip;
5210 				break;
5211 			} else {
5212 				MDI_PHCI_LOCK(ph);
5213 				pip = next;
5214 			}
5215 		} else {
5216 			i_mdi_client_unlock(ct);
5217 			pip = next;
5218 		}
5219 	}
5220 
5221 	if (failed_pip) {
5222 		pip = ph->ph_path_head;
5223 		while (pip != failed_pip) {
5224 			MDI_PI_LOCK(pip);
5225 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5226 			ct = MDI_PI(pip)->pi_client;
5227 			i_mdi_client_lock(ct, pip);
5228 			cdip = ct->ct_dip;
5229 			switch (MDI_CLIENT_STATE(ct)) {
5230 			case MDI_CLIENT_STATE_OPTIMAL:
5231 			case MDI_CLIENT_STATE_DEGRADED:
5232 				if (cdip) {
5233 					MDI_PI_UNLOCK(pip);
5234 					i_mdi_client_unlock(ct);
5235 					MDI_PHCI_UNLOCK(ph);
5236 					(void) ndi_devi_online(cdip, 0);
5237 					MDI_PHCI_LOCK(ph);
5238 					pip = next;
5239 					continue;
5240 				}
5241 				break;
5242 
5243 			case MDI_CLIENT_STATE_FAILED:
5244 				if (cdip) {
5245 					MDI_PI_UNLOCK(pip);
5246 					i_mdi_client_unlock(ct);
5247 					MDI_PHCI_UNLOCK(ph);
5248 					(void) ndi_devi_offline(cdip,
5249 						NDI_DEVFS_CLEAN);
5250 					MDI_PHCI_LOCK(ph);
5251 					pip = next;
5252 					continue;
5253 				}
5254 				break;
5255 			}
5256 			MDI_PI_UNLOCK(pip);
5257 			i_mdi_client_unlock(ct);
5258 			pip = next;
5259 		}
5260 		MDI_PHCI_UNLOCK(ph);
5261 		return (NDI_BUSY);
5262 	}
5263 
5264 	/*
5265 	 * Mark the pHCI as offline
5266 	 */
5267 	MDI_PHCI_SET_OFFLINE(ph);
5268 
5269 	/*
5270 	 * Mark the child mdi_pathinfo nodes as transient
5271 	 */
5272 	pip = ph->ph_path_head;
5273 	while (pip != NULL) {
5274 		MDI_PI_LOCK(pip);
5275 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5276 		MDI_PI_SET_OFFLINING(pip);
5277 		MDI_PI_UNLOCK(pip);
5278 		pip = next;
5279 	}
5280 	MDI_PHCI_UNLOCK(ph);
5281 	/*
5282 	 * Give a chance for any pending commands to execute
5283 	 */
5284 	delay_random(mdi_delay);
5285 	MDI_PHCI_LOCK(ph);
5286 	pip = ph->ph_path_head;
5287 	while (pip != NULL) {
5288 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5289 		(void) i_mdi_pi_offline(pip, flags);
5290 		MDI_PI_LOCK(pip);
5291 		ct = MDI_PI(pip)->pi_client;
5292 		if (!MDI_PI_IS_OFFLINE(pip)) {
5293 			MDI_DEBUG(1, (MDI_WARN, dip,
5294 			    "!pHCI device is busy. "
5295 			    "This device can not be removed at this moment. "
5296 			    "Please try again later."));
5297 			MDI_PI_UNLOCK(pip);
5298 			MDI_PHCI_SET_ONLINE(ph);
5299 			MDI_PHCI_UNLOCK(ph);
5300 			return (NDI_BUSY);
5301 		}
5302 		MDI_PI_UNLOCK(pip);
5303 		pip = next;
5304 	}
5305 	MDI_PHCI_UNLOCK(ph);
5306 
5307 	return (rv);
5308 }
5309 
5310 void
mdi_phci_mark_retiring(dev_info_t * dip,char ** cons_array)5311 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
5312 {
5313 	mdi_phci_t	*ph;
5314 	mdi_client_t	*ct;
5315 	mdi_pathinfo_t	*pip;
5316 	mdi_pathinfo_t	*next;
5317 	dev_info_t	*cdip;
5318 
5319 	if (!MDI_PHCI(dip))
5320 		return;
5321 
5322 	ph = i_devi_get_phci(dip);
5323 	if (ph == NULL) {
5324 		return;
5325 	}
5326 
5327 	MDI_PHCI_LOCK(ph);
5328 
5329 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5330 		/* has no last path */
5331 		MDI_PHCI_UNLOCK(ph);
5332 		return;
5333 	}
5334 
5335 	pip = ph->ph_path_head;
5336 	while (pip != NULL) {
5337 		MDI_PI_LOCK(pip);
5338 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5339 
5340 		ct = MDI_PI(pip)->pi_client;
5341 		i_mdi_client_lock(ct, pip);
5342 		MDI_PI_UNLOCK(pip);
5343 
5344 		cdip = ct->ct_dip;
5345 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5346 		    (i_mdi_client_compute_state(ct, ph) ==
5347 		    MDI_CLIENT_STATE_FAILED)) {
5348 			/* Last path. Mark client dip as retiring */
5349 			i_mdi_client_unlock(ct);
5350 			MDI_PHCI_UNLOCK(ph);
5351 			(void) e_ddi_mark_retiring(cdip, cons_array);
5352 			MDI_PHCI_LOCK(ph);
5353 			pip = next;
5354 		} else {
5355 			i_mdi_client_unlock(ct);
5356 			pip = next;
5357 		}
5358 	}
5359 
5360 	MDI_PHCI_UNLOCK(ph);
5361 
5362 	return;
5363 }
5364 
5365 void
mdi_phci_retire_notify(dev_info_t * dip,int * constraint)5366 mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
5367 {
5368 	mdi_phci_t	*ph;
5369 	mdi_client_t	*ct;
5370 	mdi_pathinfo_t	*pip;
5371 	mdi_pathinfo_t	*next;
5372 	dev_info_t	*cdip;
5373 
5374 	if (!MDI_PHCI(dip))
5375 		return;
5376 
5377 	ph = i_devi_get_phci(dip);
5378 	if (ph == NULL)
5379 		return;
5380 
5381 	MDI_PHCI_LOCK(ph);
5382 
5383 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5384 		MDI_PHCI_UNLOCK(ph);
5385 		/* not last path */
5386 		return;
5387 	}
5388 
5389 	if (ph->ph_unstable) {
5390 		MDI_PHCI_UNLOCK(ph);
5391 		/* can't check for constraints */
5392 		*constraint = 0;
5393 		return;
5394 	}
5395 
5396 	pip = ph->ph_path_head;
5397 	while (pip != NULL) {
5398 		MDI_PI_LOCK(pip);
5399 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5400 
5401 		/*
5402 		 * The mdi_pathinfo state is OK. Check the client state.
5403 		 * If failover in progress fail the pHCI from offlining
5404 		 */
5405 		ct = MDI_PI(pip)->pi_client;
5406 		i_mdi_client_lock(ct, pip);
5407 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5408 		    (ct->ct_unstable)) {
5409 			/*
5410 			 * Failover is in progress, can't check for constraints
5411 			 */
5412 			MDI_PI_UNLOCK(pip);
5413 			i_mdi_client_unlock(ct);
5414 			MDI_PHCI_UNLOCK(ph);
5415 			*constraint = 0;
5416 			return;
5417 		}
5418 		MDI_PI_UNLOCK(pip);
5419 
5420 		/*
5421 		 * Check to see of we are retiring the last path of this
5422 		 * client device...
5423 		 */
5424 		cdip = ct->ct_dip;
5425 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5426 		    (i_mdi_client_compute_state(ct, ph) ==
5427 		    MDI_CLIENT_STATE_FAILED)) {
5428 			i_mdi_client_unlock(ct);
5429 			MDI_PHCI_UNLOCK(ph);
5430 			(void) e_ddi_retire_notify(cdip, constraint);
5431 			MDI_PHCI_LOCK(ph);
5432 			pip = next;
5433 		} else {
5434 			i_mdi_client_unlock(ct);
5435 			pip = next;
5436 		}
5437 	}
5438 
5439 	MDI_PHCI_UNLOCK(ph);
5440 
5441 	return;
5442 }
5443 
5444 /*
5445  * offline the path(s) hanging off the pHCI. If the
5446  * last path to any client, check that constraints
5447  * have been applied.
5448  *
5449  * If constraint is 0, we aren't going to retire the
5450  * pHCI. However we still need to go through the paths
5451  * calling e_ddi_retire_finalize() to clear their
5452  * contract barriers.
5453  */
5454 void
mdi_phci_retire_finalize(dev_info_t * dip,int phci_only,void * constraint)5455 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only, void *constraint)
5456 {
5457 	mdi_phci_t	*ph;
5458 	mdi_client_t	*ct;
5459 	mdi_pathinfo_t	*pip;
5460 	mdi_pathinfo_t	*next;
5461 	dev_info_t	*cdip;
5462 	int		unstable = 0;
5463 	int		tmp_constraint;
5464 
5465 	if (!MDI_PHCI(dip))
5466 		return;
5467 
5468 	ph = i_devi_get_phci(dip);
5469 	if (ph == NULL) {
5470 		/* no last path and no pips */
5471 		return;
5472 	}
5473 
5474 	MDI_PHCI_LOCK(ph);
5475 
5476 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5477 		MDI_PHCI_UNLOCK(ph);
5478 		/* no last path and no pips */
5479 		return;
5480 	}
5481 
5482 	/*
5483 	 * Check to see if the pHCI can be offlined
5484 	 */
5485 	if (ph->ph_unstable) {
5486 		unstable = 1;
5487 	}
5488 
5489 	pip = ph->ph_path_head;
5490 	while (pip != NULL) {
5491 		MDI_PI_LOCK(pip);
5492 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5493 
5494 		/*
5495 		 * if failover in progress fail the pHCI from offlining
5496 		 */
5497 		ct = MDI_PI(pip)->pi_client;
5498 		i_mdi_client_lock(ct, pip);
5499 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5500 		    (ct->ct_unstable)) {
5501 			unstable = 1;
5502 		}
5503 		MDI_PI_UNLOCK(pip);
5504 
5505 		/*
5506 		 * Check to see of we are removing the last path of this
5507 		 * client device...
5508 		 */
5509 		cdip = ct->ct_dip;
5510 		if (!phci_only && cdip &&
5511 		    (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5512 		    (i_mdi_client_compute_state(ct, ph) ==
5513 		    MDI_CLIENT_STATE_FAILED)) {
5514 			i_mdi_client_unlock(ct);
5515 			MDI_PHCI_UNLOCK(ph);
5516 			/*
5517 			 * This is the last path to this client.
5518 			 *
5519 			 * Constraint will only be set to 1 if this client can
5520 			 * be retired (as already determined by
5521 			 * mdi_phci_retire_notify). However we don't actually
5522 			 * need to retire the client (we just retire the last
5523 			 * path - MPXIO will then fail all I/Os to the client).
5524 			 * But we still need to call e_ddi_retire_finalize so
5525 			 * the contract barriers can be cleared. Therefore we
5526 			 * temporarily set constraint = 0 so that the client
5527 			 * dip is not retired.
5528 			 */
5529 			tmp_constraint = 0;
5530 			(void) e_ddi_retire_finalize(cdip, &tmp_constraint);
5531 			MDI_PHCI_LOCK(ph);
5532 			pip = next;
5533 		} else {
5534 			i_mdi_client_unlock(ct);
5535 			pip = next;
5536 		}
5537 	}
5538 
5539 	if (!phci_only && *((int *)constraint) == 0) {
5540 		MDI_PHCI_UNLOCK(ph);
5541 		return;
5542 	}
5543 
5544 	/*
5545 	 * Cannot offline pip(s)
5546 	 */
5547 	if (unstable) {
5548 		cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: "
5549 		    "pHCI in transient state, cannot retire",
5550 		    ddi_driver_name(dip), ddi_get_instance(dip));
5551 		MDI_PHCI_UNLOCK(ph);
5552 		return;
5553 	}
5554 
5555 	/*
5556 	 * Mark the pHCI as offline
5557 	 */
5558 	MDI_PHCI_SET_OFFLINE(ph);
5559 
5560 	/*
5561 	 * Mark the child mdi_pathinfo nodes as transient
5562 	 */
5563 	pip = ph->ph_path_head;
5564 	while (pip != NULL) {
5565 		MDI_PI_LOCK(pip);
5566 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5567 		MDI_PI_SET_OFFLINING(pip);
5568 		MDI_PI_UNLOCK(pip);
5569 		pip = next;
5570 	}
5571 	MDI_PHCI_UNLOCK(ph);
5572 	/*
5573 	 * Give a chance for any pending commands to execute
5574 	 */
5575 	delay_random(mdi_delay);
5576 	MDI_PHCI_LOCK(ph);
5577 	pip = ph->ph_path_head;
5578 	while (pip != NULL) {
5579 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5580 		(void) i_mdi_pi_offline(pip, 0);
5581 		MDI_PI_LOCK(pip);
5582 		ct = MDI_PI(pip)->pi_client;
5583 		if (!MDI_PI_IS_OFFLINE(pip)) {
5584 			cmn_err(CE_WARN, "mdi_phci_retire_finalize: "
5585 			    "path %d %s busy, cannot offline",
5586 			    mdi_pi_get_path_instance(pip),
5587 			    mdi_pi_spathname(pip));
5588 			MDI_PI_UNLOCK(pip);
5589 			MDI_PHCI_SET_ONLINE(ph);
5590 			MDI_PHCI_UNLOCK(ph);
5591 			return;
5592 		}
5593 		MDI_PI_UNLOCK(pip);
5594 		pip = next;
5595 	}
5596 	MDI_PHCI_UNLOCK(ph);
5597 
5598 	return;
5599 }
5600 
5601 void
mdi_phci_unretire(dev_info_t * dip)5602 mdi_phci_unretire(dev_info_t *dip)
5603 {
5604 	mdi_phci_t	*ph;
5605 	mdi_pathinfo_t	*pip;
5606 	mdi_pathinfo_t	*next;
5607 
5608 	ASSERT(MDI_PHCI(dip));
5609 
5610 	/*
5611 	 * Online the phci
5612 	 */
5613 	i_mdi_phci_online(dip);
5614 
5615 	ph = i_devi_get_phci(dip);
5616 	MDI_PHCI_LOCK(ph);
5617 	pip = ph->ph_path_head;
5618 	while (pip != NULL) {
5619 		MDI_PI_LOCK(pip);
5620 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5621 		MDI_PI_UNLOCK(pip);
5622 		(void) i_mdi_pi_online(pip, 0);
5623 		pip = next;
5624 	}
5625 	MDI_PHCI_UNLOCK(ph);
5626 }
5627 
5628 /*ARGSUSED*/
5629 static int
i_mdi_client_offline(dev_info_t * dip,uint_t flags)5630 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
5631 {
5632 	int		rv = NDI_SUCCESS;
5633 	mdi_client_t	*ct;
5634 
5635 	/*
5636 	 * Client component to go offline.  Make sure that we are
5637 	 * not in failing over state and update client state
5638 	 * accordingly
5639 	 */
5640 	ct = i_devi_get_client(dip);
5641 	MDI_DEBUG(2, (MDI_NOTE, dip,
5642 	    "called %p %p", (void *)dip, (void *)ct));
5643 	if (ct != NULL) {
5644 		MDI_CLIENT_LOCK(ct);
5645 		if (ct->ct_unstable) {
5646 			/*
5647 			 * One or more paths are in transient state,
5648 			 * Dont allow offline of a client device
5649 			 */
5650 			MDI_DEBUG(1, (MDI_WARN, dip,
5651 			    "!One or more paths to "
5652 			    "this device are in transient state. "
5653 			    "This device can not be removed at this moment. "
5654 			    "Please try again later."));
5655 			MDI_CLIENT_UNLOCK(ct);
5656 			return (NDI_BUSY);
5657 		}
5658 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
5659 			/*
5660 			 * Failover is in progress, Dont allow DR of
5661 			 * a client device
5662 			 */
5663 			MDI_DEBUG(1, (MDI_WARN, dip,
5664 			    "!Client device is Busy. "
5665 			    "This device can not be removed at this moment. "
5666 			    "Please try again later."));
5667 			MDI_CLIENT_UNLOCK(ct);
5668 			return (NDI_BUSY);
5669 		}
5670 		MDI_CLIENT_SET_OFFLINE(ct);
5671 
5672 		/*
5673 		 * Unbind our relationship with the dev_info node
5674 		 */
5675 		if (flags & NDI_DEVI_REMOVE) {
5676 			ct->ct_dip = NULL;
5677 		}
5678 		MDI_CLIENT_UNLOCK(ct);
5679 	}
5680 	return (rv);
5681 }
5682 
5683 /*
5684  * mdi_pre_attach():
5685  *		Pre attach() notification handler
5686  */
5687 /*ARGSUSED*/
5688 int
mdi_pre_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)5689 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5690 {
5691 	/* don't support old DDI_PM_RESUME */
5692 	if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
5693 	    (cmd == DDI_PM_RESUME))
5694 		return (DDI_FAILURE);
5695 
5696 	return (DDI_SUCCESS);
5697 }
5698 
5699 /*
5700  * mdi_post_attach():
5701  *		Post attach() notification handler
5702  */
5703 /*ARGSUSED*/
5704 void
mdi_post_attach(dev_info_t * dip,ddi_attach_cmd_t cmd,int error)5705 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
5706 {
5707 	mdi_phci_t	*ph;
5708 	mdi_client_t	*ct;
5709 	mdi_vhci_t	*vh;
5710 
5711 	if (MDI_PHCI(dip)) {
5712 		ph = i_devi_get_phci(dip);
5713 		ASSERT(ph != NULL);
5714 
5715 		MDI_PHCI_LOCK(ph);
5716 		switch (cmd) {
5717 		case DDI_ATTACH:
5718 			MDI_DEBUG(2, (MDI_NOTE, dip,
5719 			    "phci post_attach called %p", (void *)ph));
5720 			if (error == DDI_SUCCESS) {
5721 				MDI_PHCI_SET_ATTACH(ph);
5722 			} else {
5723 				MDI_DEBUG(1, (MDI_NOTE, dip,
5724 				    "!pHCI post_attach failed: error %d",
5725 				    error));
5726 				MDI_PHCI_SET_DETACH(ph);
5727 			}
5728 			break;
5729 
5730 		case DDI_RESUME:
5731 			MDI_DEBUG(2, (MDI_NOTE, dip,
5732 			    "pHCI post_resume: called %p", (void *)ph));
5733 			if (error == DDI_SUCCESS) {
5734 				MDI_PHCI_SET_RESUME(ph);
5735 			} else {
5736 				MDI_DEBUG(1, (MDI_NOTE, dip,
5737 				    "!pHCI post_resume failed: error %d",
5738 				    error));
5739 				MDI_PHCI_SET_SUSPEND(ph);
5740 			}
5741 			break;
5742 		}
5743 		MDI_PHCI_UNLOCK(ph);
5744 	}
5745 
5746 	if (MDI_CLIENT(dip)) {
5747 		ct = i_devi_get_client(dip);
5748 		ASSERT(ct != NULL);
5749 
5750 		MDI_CLIENT_LOCK(ct);
5751 		switch (cmd) {
5752 		case DDI_ATTACH:
5753 			MDI_DEBUG(2, (MDI_NOTE, dip,
5754 			    "client post_attach called %p", (void *)ct));
5755 			if (error != DDI_SUCCESS) {
5756 				MDI_DEBUG(1, (MDI_NOTE, dip,
5757 				    "!client post_attach failed: error %d",
5758 				    error));
5759 				MDI_CLIENT_SET_DETACH(ct);
5760 				MDI_DEBUG(4, (MDI_WARN, dip,
5761 				    "i_mdi_pm_reset_client"));
5762 				i_mdi_pm_reset_client(ct);
5763 				break;
5764 			}
5765 
5766 			/*
5767 			 * Client device has successfully attached, inform
5768 			 * the vhci.
5769 			 */
5770 			vh = ct->ct_vhci;
5771 			if (vh->vh_ops->vo_client_attached)
5772 				(*vh->vh_ops->vo_client_attached)(dip);
5773 
5774 			MDI_CLIENT_SET_ATTACH(ct);
5775 			break;
5776 
5777 		case DDI_RESUME:
5778 			MDI_DEBUG(2, (MDI_NOTE, dip,
5779 			    "client post_attach: called %p", (void *)ct));
5780 			if (error == DDI_SUCCESS) {
5781 				MDI_CLIENT_SET_RESUME(ct);
5782 			} else {
5783 				MDI_DEBUG(1, (MDI_NOTE, dip,
5784 				    "!client post_resume failed: error %d",
5785 				    error));
5786 				MDI_CLIENT_SET_SUSPEND(ct);
5787 			}
5788 			break;
5789 		}
5790 		MDI_CLIENT_UNLOCK(ct);
5791 	}
5792 }
5793 
5794 /*
5795  * mdi_pre_detach():
5796  *		Pre detach notification handler
5797  */
5798 /*ARGSUSED*/
5799 int
mdi_pre_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)5800 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5801 {
5802 	int rv = DDI_SUCCESS;
5803 
5804 	if (MDI_CLIENT(dip)) {
5805 		(void) i_mdi_client_pre_detach(dip, cmd);
5806 	}
5807 
5808 	if (MDI_PHCI(dip)) {
5809 		rv = i_mdi_phci_pre_detach(dip, cmd);
5810 	}
5811 
5812 	return (rv);
5813 }
5814 
5815 /*ARGSUSED*/
5816 static int
i_mdi_phci_pre_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)5817 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5818 {
5819 	int		rv = DDI_SUCCESS;
5820 	mdi_phci_t	*ph;
5821 	mdi_client_t	*ct;
5822 	mdi_pathinfo_t	*pip;
5823 	mdi_pathinfo_t	*failed_pip = NULL;
5824 	mdi_pathinfo_t	*next;
5825 
5826 	ph = i_devi_get_phci(dip);
5827 	if (ph == NULL) {
5828 		return (rv);
5829 	}
5830 
5831 	MDI_PHCI_LOCK(ph);
5832 	switch (cmd) {
5833 	case DDI_DETACH:
5834 		MDI_DEBUG(2, (MDI_NOTE, dip,
5835 		    "pHCI pre_detach: called %p", (void *)ph));
5836 		if (!MDI_PHCI_IS_OFFLINE(ph)) {
5837 			/*
5838 			 * mdi_pathinfo nodes are still attached to
5839 			 * this pHCI. Fail the detach for this pHCI.
5840 			 */
5841 			MDI_DEBUG(2, (MDI_WARN, dip,
5842 			    "pHCI pre_detach: paths are still attached %p",
5843 			    (void *)ph));
5844 			rv = DDI_FAILURE;
5845 			break;
5846 		}
5847 		MDI_PHCI_SET_DETACH(ph);
5848 		break;
5849 
5850 	case DDI_SUSPEND:
5851 		/*
5852 		 * pHCI is getting suspended.  Since mpxio client
5853 		 * devices may not be suspended at this point, to avoid
5854 		 * a potential stack overflow, it is important to suspend
5855 		 * client devices before pHCI can be suspended.
5856 		 */
5857 
5858 		MDI_DEBUG(2, (MDI_NOTE, dip,
5859 		    "pHCI pre_suspend: called %p", (void *)ph));
5860 		/*
5861 		 * Suspend all the client devices accessible through this pHCI
5862 		 */
5863 		pip = ph->ph_path_head;
5864 		while (pip != NULL && rv == DDI_SUCCESS) {
5865 			dev_info_t *cdip;
5866 			MDI_PI_LOCK(pip);
5867 			next =
5868 			    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5869 			ct = MDI_PI(pip)->pi_client;
5870 			i_mdi_client_lock(ct, pip);
5871 			cdip = ct->ct_dip;
5872 			MDI_PI_UNLOCK(pip);
5873 			if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
5874 			    MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
5875 				i_mdi_client_unlock(ct);
5876 				if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
5877 				    DDI_SUCCESS) {
5878 					/*
5879 					 * Suspend of one of the client
5880 					 * device has failed.
5881 					 */
5882 					MDI_DEBUG(1, (MDI_WARN, dip,
5883 					    "!suspend of device (%s%d) failed.",
5884 					    ddi_driver_name(cdip),
5885 					    ddi_get_instance(cdip)));
5886 					failed_pip = pip;
5887 					break;
5888 				}
5889 			} else {
5890 				i_mdi_client_unlock(ct);
5891 			}
5892 			pip = next;
5893 		}
5894 
5895 		if (rv == DDI_SUCCESS) {
5896 			/*
5897 			 * Suspend of client devices is complete. Proceed
5898 			 * with pHCI suspend.
5899 			 */
5900 			MDI_PHCI_SET_SUSPEND(ph);
5901 		} else {
5902 			/*
5903 			 * Revert back all the suspended client device states
5904 			 * to converse.
5905 			 */
5906 			pip = ph->ph_path_head;
5907 			while (pip != failed_pip) {
5908 				dev_info_t *cdip;
5909 				MDI_PI_LOCK(pip);
5910 				next =
5911 				    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5912 				ct = MDI_PI(pip)->pi_client;
5913 				i_mdi_client_lock(ct, pip);
5914 				cdip = ct->ct_dip;
5915 				MDI_PI_UNLOCK(pip);
5916 				if (MDI_CLIENT_IS_SUSPENDED(ct)) {
5917 					i_mdi_client_unlock(ct);
5918 					(void) devi_attach(cdip, DDI_RESUME);
5919 				} else {
5920 					i_mdi_client_unlock(ct);
5921 				}
5922 				pip = next;
5923 			}
5924 		}
5925 		break;
5926 
5927 	default:
5928 		rv = DDI_FAILURE;
5929 		break;
5930 	}
5931 	MDI_PHCI_UNLOCK(ph);
5932 	return (rv);
5933 }
5934 
5935 /*ARGSUSED*/
5936 static int
i_mdi_client_pre_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)5937 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5938 {
5939 	int		rv = DDI_SUCCESS;
5940 	mdi_client_t	*ct;
5941 
5942 	ct = i_devi_get_client(dip);
5943 	if (ct == NULL) {
5944 		return (rv);
5945 	}
5946 
5947 	MDI_CLIENT_LOCK(ct);
5948 	switch (cmd) {
5949 	case DDI_DETACH:
5950 		MDI_DEBUG(2, (MDI_NOTE, dip,
5951 		    "client pre_detach: called %p",
5952 		     (void *)ct));
5953 		MDI_CLIENT_SET_DETACH(ct);
5954 		break;
5955 
5956 	case DDI_SUSPEND:
5957 		MDI_DEBUG(2, (MDI_NOTE, dip,
5958 		    "client pre_suspend: called %p",
5959 		    (void *)ct));
5960 		MDI_CLIENT_SET_SUSPEND(ct);
5961 		break;
5962 
5963 	default:
5964 		rv = DDI_FAILURE;
5965 		break;
5966 	}
5967 	MDI_CLIENT_UNLOCK(ct);
5968 	return (rv);
5969 }
5970 
5971 /*
5972  * mdi_post_detach():
5973  *		Post detach notification handler
5974  */
5975 /*ARGSUSED*/
5976 void
mdi_post_detach(dev_info_t * dip,ddi_detach_cmd_t cmd,int error)5977 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5978 {
5979 	/*
5980 	 * Detach/Suspend of mpxio component failed. Update our state
5981 	 * too
5982 	 */
5983 	if (MDI_PHCI(dip))
5984 		i_mdi_phci_post_detach(dip, cmd, error);
5985 
5986 	if (MDI_CLIENT(dip))
5987 		i_mdi_client_post_detach(dip, cmd, error);
5988 }
5989 
5990 /*ARGSUSED*/
5991 static void
i_mdi_phci_post_detach(dev_info_t * dip,ddi_detach_cmd_t cmd,int error)5992 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5993 {
5994 	mdi_phci_t	*ph;
5995 
5996 	/*
5997 	 * Detach/Suspend of phci component failed. Update our state
5998 	 * too
5999 	 */
6000 	ph = i_devi_get_phci(dip);
6001 	if (ph == NULL) {
6002 		return;
6003 	}
6004 
6005 	MDI_PHCI_LOCK(ph);
6006 	/*
6007 	 * Detach of pHCI failed. Restore back converse
6008 	 * state
6009 	 */
6010 	switch (cmd) {
6011 	case DDI_DETACH:
6012 		MDI_DEBUG(2, (MDI_NOTE, dip,
6013 		    "pHCI post_detach: called %p",
6014 		    (void *)ph));
6015 		if (error != DDI_SUCCESS)
6016 			MDI_PHCI_SET_ATTACH(ph);
6017 		break;
6018 
6019 	case DDI_SUSPEND:
6020 		MDI_DEBUG(2, (MDI_NOTE, dip,
6021 		    "pHCI post_suspend: called %p",
6022 		    (void *)ph));
6023 		if (error != DDI_SUCCESS)
6024 			MDI_PHCI_SET_RESUME(ph);
6025 		break;
6026 	}
6027 	MDI_PHCI_UNLOCK(ph);
6028 }
6029 
6030 /*ARGSUSED*/
6031 static void
i_mdi_client_post_detach(dev_info_t * dip,ddi_detach_cmd_t cmd,int error)6032 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
6033 {
6034 	mdi_client_t	*ct;
6035 
6036 	ct = i_devi_get_client(dip);
6037 	if (ct == NULL) {
6038 		return;
6039 	}
6040 	MDI_CLIENT_LOCK(ct);
6041 	/*
6042 	 * Detach of Client failed. Restore back converse
6043 	 * state
6044 	 */
6045 	switch (cmd) {
6046 	case DDI_DETACH:
6047 		MDI_DEBUG(2, (MDI_NOTE, dip,
6048 		    "client post_detach: called %p", (void *)ct));
6049 		if (DEVI_IS_ATTACHING(dip)) {
6050 			MDI_DEBUG(4, (MDI_NOTE, dip,
6051 			    "i_mdi_pm_rele_client\n"));
6052 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6053 		} else {
6054 			MDI_DEBUG(4, (MDI_NOTE, dip,
6055 			    "i_mdi_pm_reset_client\n"));
6056 			i_mdi_pm_reset_client(ct);
6057 		}
6058 		if (error != DDI_SUCCESS)
6059 			MDI_CLIENT_SET_ATTACH(ct);
6060 		break;
6061 
6062 	case DDI_SUSPEND:
6063 		MDI_DEBUG(2, (MDI_NOTE, dip,
6064 		    "called %p", (void *)ct));
6065 		if (error != DDI_SUCCESS)
6066 			MDI_CLIENT_SET_RESUME(ct);
6067 		break;
6068 	}
6069 	MDI_CLIENT_UNLOCK(ct);
6070 }
6071 
6072 int
mdi_pi_kstat_exists(mdi_pathinfo_t * pip)6073 mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
6074 {
6075 	return (MDI_PI(pip)->pi_kstats ? 1 : 0);
6076 }
6077 
6078 /*
6079  * create and install per-path (client - pHCI) statistics
6080  * I/O stats supported: nread, nwritten, reads, and writes
6081  * Error stats - hard errors, soft errors, & transport errors
6082  */
6083 int
mdi_pi_kstat_create(mdi_pathinfo_t * pip,char * ksname)6084 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
6085 {
6086 	kstat_t			*kiosp, *kerrsp;
6087 	struct pi_errs		*nsp;
6088 	struct mdi_pi_kstats	*mdi_statp;
6089 
6090 	if (MDI_PI(pip)->pi_kstats != NULL)
6091 		return (MDI_SUCCESS);
6092 
6093 	if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
6094 	    KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
6095 		return (MDI_FAILURE);
6096 	}
6097 
6098 	(void) strcat(ksname, ",err");
6099 	kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
6100 	    KSTAT_TYPE_NAMED,
6101 	    sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
6102 	if (kerrsp == NULL) {
6103 		kstat_delete(kiosp);
6104 		return (MDI_FAILURE);
6105 	}
6106 
6107 	nsp = (struct pi_errs *)kerrsp->ks_data;
6108 	kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
6109 	kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
6110 	kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
6111 	    KSTAT_DATA_UINT32);
6112 	kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
6113 	    KSTAT_DATA_UINT32);
6114 	kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
6115 	    KSTAT_DATA_UINT32);
6116 	kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
6117 	    KSTAT_DATA_UINT32);
6118 	kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
6119 	    KSTAT_DATA_UINT32);
6120 	kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
6121 	    KSTAT_DATA_UINT32);
6122 	kstat_named_init(&nsp->pi_failedfrom, "Failed From",
6123 	    KSTAT_DATA_UINT32);
6124 	kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
6125 
6126 	mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
6127 	mdi_statp->pi_kstat_ref = 1;
6128 	mdi_statp->pi_kstat_iostats = kiosp;
6129 	mdi_statp->pi_kstat_errstats = kerrsp;
6130 	kstat_install(kiosp);
6131 	kstat_install(kerrsp);
6132 	MDI_PI(pip)->pi_kstats = mdi_statp;
6133 	return (MDI_SUCCESS);
6134 }
6135 
6136 /*
6137  * destroy per-path properties
6138  */
6139 static void
i_mdi_pi_kstat_destroy(mdi_pathinfo_t * pip)6140 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
6141 {
6142 
6143 	struct mdi_pi_kstats *mdi_statp;
6144 
6145 	if (MDI_PI(pip)->pi_kstats == NULL)
6146 		return;
6147 	if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
6148 		return;
6149 
6150 	MDI_PI(pip)->pi_kstats = NULL;
6151 
6152 	/*
6153 	 * the kstat may be shared between multiple pathinfo nodes
6154 	 * decrement this pathinfo's usage, removing the kstats
6155 	 * themselves when the last pathinfo reference is removed.
6156 	 */
6157 	ASSERT(mdi_statp->pi_kstat_ref > 0);
6158 	if (--mdi_statp->pi_kstat_ref != 0)
6159 		return;
6160 
6161 	kstat_delete(mdi_statp->pi_kstat_iostats);
6162 	kstat_delete(mdi_statp->pi_kstat_errstats);
6163 	kmem_free(mdi_statp, sizeof (*mdi_statp));
6164 }
6165 
6166 /*
6167  * update I/O paths KSTATS
6168  */
6169 void
mdi_pi_kstat_iosupdate(mdi_pathinfo_t * pip,struct buf * bp)6170 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
6171 {
6172 	kstat_t *iostatp;
6173 	size_t xfer_cnt;
6174 
6175 	ASSERT(pip != NULL);
6176 
6177 	/*
6178 	 * I/O can be driven across a path prior to having path
6179 	 * statistics available, i.e. probe(9e).
6180 	 */
6181 	if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
6182 		iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
6183 		xfer_cnt = bp->b_bcount - bp->b_resid;
6184 		if (bp->b_flags & B_READ) {
6185 			KSTAT_IO_PTR(iostatp)->reads++;
6186 			KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
6187 		} else {
6188 			KSTAT_IO_PTR(iostatp)->writes++;
6189 			KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
6190 		}
6191 	}
6192 }
6193 
6194 /*
6195  * Enable the path(specific client/target/initiator)
6196  * Enabling a path means that MPxIO may select the enabled path for routing
6197  * future I/O requests, subject to other path state constraints.
6198  */
6199 int
mdi_pi_enable_path(mdi_pathinfo_t * pip,int flags)6200 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
6201 {
6202 	mdi_phci_t	*ph;
6203 
6204 	ph = MDI_PI(pip)->pi_phci;
6205 	if (ph == NULL) {
6206 		MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6207 		    "!failed: path %s %p: NULL ph",
6208 		    mdi_pi_spathname(pip), (void *)pip));
6209 		return (MDI_FAILURE);
6210 	}
6211 
6212 	(void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
6213 		MDI_ENABLE_OP);
6214 	MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6215 	    "!returning success pip = %p. ph = %p",
6216 	    (void *)pip, (void *)ph));
6217 	return (MDI_SUCCESS);
6218 
6219 }
6220 
6221 /*
6222  * Disable the path (specific client/target/initiator)
6223  * Disabling a path means that MPxIO will not select the disabled path for
6224  * routing any new I/O requests.
6225  */
6226 int
mdi_pi_disable_path(mdi_pathinfo_t * pip,int flags)6227 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
6228 {
6229 	mdi_phci_t	*ph;
6230 
6231 	ph = MDI_PI(pip)->pi_phci;
6232 	if (ph == NULL) {
6233 		MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6234 		    "!failed: path %s %p: NULL ph",
6235 		    mdi_pi_spathname(pip), (void *)pip));
6236 		return (MDI_FAILURE);
6237 	}
6238 
6239 	(void) i_mdi_enable_disable_path(pip,
6240 	    ph->ph_vhci, flags, MDI_DISABLE_OP);
6241 	MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6242 	    "!returning success pip = %p. ph = %p",
6243 	    (void *)pip, (void *)ph));
6244 	return (MDI_SUCCESS);
6245 }
6246 
6247 /*
6248  * disable the path to a particular pHCI (pHCI specified in the phci_path
6249  * argument) for a particular client (specified in the client_path argument).
6250  * Disabling a path means that MPxIO will not select the disabled path for
6251  * routing any new I/O requests.
6252  * NOTE: this will be removed once the NWS files are changed to use the new
6253  * mdi_{enable,disable}_path interfaces
6254  */
6255 int
mdi_pi_disable(dev_info_t * cdip,dev_info_t * pdip,int flags)6256 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6257 {
6258 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
6259 }
6260 
6261 /*
6262  * Enable the path to a particular pHCI (pHCI specified in the phci_path
6263  * argument) for a particular client (specified in the client_path argument).
6264  * Enabling a path means that MPxIO may select the enabled path for routing
6265  * future I/O requests, subject to other path state constraints.
6266  * NOTE: this will be removed once the NWS files are changed to use the new
6267  * mdi_{enable,disable}_path interfaces
6268  */
6269 
6270 int
mdi_pi_enable(dev_info_t * cdip,dev_info_t * pdip,int flags)6271 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6272 {
6273 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
6274 }
6275 
6276 /*
6277  * Common routine for doing enable/disable.
6278  */
6279 static mdi_pathinfo_t *
i_mdi_enable_disable_path(mdi_pathinfo_t * pip,mdi_vhci_t * vh,int flags,int op)6280 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
6281 		int op)
6282 {
6283 	int		sync_flag = 0;
6284 	int		rv;
6285 	mdi_pathinfo_t 	*next;
6286 	int		(*f)() = NULL;
6287 
6288 	/*
6289 	 * Check to make sure the path is not already in the
6290 	 * requested state. If it is just return the next path
6291 	 * as we have nothing to do here.
6292 	 */
6293 	if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) ||
6294 	    (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) {
6295 		MDI_PI_LOCK(pip);
6296 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6297 		MDI_PI_UNLOCK(pip);
6298 		return (next);
6299 	}
6300 
6301 	f = vh->vh_ops->vo_pi_state_change;
6302 
6303 	sync_flag = (flags << 8) & 0xf00;
6304 
6305 	/*
6306 	 * Do a callback into the mdi consumer to let it
6307 	 * know that path is about to get enabled/disabled.
6308 	 */
6309 	if (f != NULL) {
6310 		rv = (*f)(vh->vh_dip, pip, 0,
6311 			MDI_PI_EXT_STATE(pip),
6312 			MDI_EXT_STATE_CHANGE | sync_flag |
6313 			op | MDI_BEFORE_STATE_CHANGE);
6314 		if (rv != MDI_SUCCESS) {
6315 			MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6316 			    "vo_pi_state_change: failed rv = %x", rv));
6317 		}
6318 	}
6319 	MDI_PI_LOCK(pip);
6320 	next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6321 
6322 	switch (flags) {
6323 		case USER_DISABLE:
6324 			if (op == MDI_DISABLE_OP) {
6325 				MDI_PI_SET_USER_DISABLE(pip);
6326 			} else {
6327 				MDI_PI_SET_USER_ENABLE(pip);
6328 			}
6329 			break;
6330 		case DRIVER_DISABLE:
6331 			if (op == MDI_DISABLE_OP) {
6332 				MDI_PI_SET_DRV_DISABLE(pip);
6333 			} else {
6334 				MDI_PI_SET_DRV_ENABLE(pip);
6335 			}
6336 			break;
6337 		case DRIVER_DISABLE_TRANSIENT:
6338 			if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
6339 				MDI_PI_SET_DRV_DISABLE_TRANS(pip);
6340 			} else {
6341 				MDI_PI_SET_DRV_ENABLE_TRANS(pip);
6342 			}
6343 			break;
6344 	}
6345 	MDI_PI_UNLOCK(pip);
6346 	/*
6347 	 * Do a callback into the mdi consumer to let it
6348 	 * know that path is now enabled/disabled.
6349 	 */
6350 	if (f != NULL) {
6351 		rv = (*f)(vh->vh_dip, pip, 0,
6352 			MDI_PI_EXT_STATE(pip),
6353 			MDI_EXT_STATE_CHANGE | sync_flag |
6354 			op | MDI_AFTER_STATE_CHANGE);
6355 		if (rv != MDI_SUCCESS) {
6356 			MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6357 			    "vo_pi_state_change failed: rv = %x", rv));
6358 		}
6359 	}
6360 	return (next);
6361 }
6362 
6363 /*
6364  * Common routine for doing enable/disable.
6365  * NOTE: this will be removed once the NWS files are changed to use the new
6366  * mdi_{enable,disable}_path has been putback
6367  */
6368 int
i_mdi_pi_enable_disable(dev_info_t * cdip,dev_info_t * pdip,int flags,int op)6369 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
6370 {
6371 
6372 	mdi_phci_t	*ph;
6373 	mdi_vhci_t	*vh = NULL;
6374 	mdi_client_t	*ct;
6375 	mdi_pathinfo_t	*next, *pip;
6376 	int		found_it;
6377 
6378 	ph = i_devi_get_phci(pdip);
6379 	MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6380 	    "!op = %d pdip = %p cdip = %p", op, (void *)pdip,
6381 	    (void *)cdip));
6382 	if (ph == NULL) {
6383 		MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6384 		    "!failed: operation %d: NULL ph", op));
6385 		return (MDI_FAILURE);
6386 	}
6387 
6388 	if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
6389 		MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6390 		    "!failed: invalid operation %d", op));
6391 		return (MDI_FAILURE);
6392 	}
6393 
6394 	vh = ph->ph_vhci;
6395 
6396 	if (cdip == NULL) {
6397 		/*
6398 		 * Need to mark the Phci as enabled/disabled.
6399 		 */
6400 		MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip,
6401 		    "op %d for the phci", op));
6402 		MDI_PHCI_LOCK(ph);
6403 		switch (flags) {
6404 			case USER_DISABLE:
6405 				if (op == MDI_DISABLE_OP) {
6406 					MDI_PHCI_SET_USER_DISABLE(ph);
6407 				} else {
6408 					MDI_PHCI_SET_USER_ENABLE(ph);
6409 				}
6410 				break;
6411 			case DRIVER_DISABLE:
6412 				if (op == MDI_DISABLE_OP) {
6413 					MDI_PHCI_SET_DRV_DISABLE(ph);
6414 				} else {
6415 					MDI_PHCI_SET_DRV_ENABLE(ph);
6416 				}
6417 				break;
6418 			case DRIVER_DISABLE_TRANSIENT:
6419 				if (op == MDI_DISABLE_OP) {
6420 					MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
6421 				} else {
6422 					MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
6423 				}
6424 				break;
6425 			default:
6426 				MDI_PHCI_UNLOCK(ph);
6427 				MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6428 				    "!invalid flag argument= %d", flags));
6429 		}
6430 
6431 		/*
6432 		 * Phci has been disabled. Now try to enable/disable
6433 		 * path info's to each client.
6434 		 */
6435 		pip = ph->ph_path_head;
6436 		while (pip != NULL) {
6437 			pip = i_mdi_enable_disable_path(pip, vh, flags, op);
6438 		}
6439 		MDI_PHCI_UNLOCK(ph);
6440 	} else {
6441 
6442 		/*
6443 		 * Disable a specific client.
6444 		 */
6445 		ct = i_devi_get_client(cdip);
6446 		if (ct == NULL) {
6447 			MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6448 			    "!failed: operation = %d: NULL ct", op));
6449 			return (MDI_FAILURE);
6450 		}
6451 
6452 		MDI_CLIENT_LOCK(ct);
6453 		pip = ct->ct_path_head;
6454 		found_it = 0;
6455 		while (pip != NULL) {
6456 			MDI_PI_LOCK(pip);
6457 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6458 			if (MDI_PI(pip)->pi_phci == ph) {
6459 				MDI_PI_UNLOCK(pip);
6460 				found_it = 1;
6461 				break;
6462 			}
6463 			MDI_PI_UNLOCK(pip);
6464 			pip = next;
6465 		}
6466 
6467 
6468 		MDI_CLIENT_UNLOCK(ct);
6469 		if (found_it == 0) {
6470 			MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6471 			    "!failed. Could not find corresponding pip\n"));
6472 			return (MDI_FAILURE);
6473 		}
6474 
6475 		(void) i_mdi_enable_disable_path(pip, vh, flags, op);
6476 	}
6477 
6478 	MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6479 	    "!op %d returning success pdip = %p cdip = %p",
6480 	    op, (void *)pdip, (void *)cdip));
6481 	return (MDI_SUCCESS);
6482 }
6483 
6484 /*
6485  * Ensure phci powered up
6486  */
6487 static void
i_mdi_pm_hold_pip(mdi_pathinfo_t * pip)6488 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
6489 {
6490 	dev_info_t	*ph_dip;
6491 
6492 	ASSERT(pip != NULL);
6493 	ASSERT(MDI_PI_LOCKED(pip));
6494 
6495 	if (MDI_PI(pip)->pi_pm_held) {
6496 		return;
6497 	}
6498 
6499 	ph_dip = mdi_pi_get_phci(pip);
6500 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6501 	    "%s %p", mdi_pi_spathname(pip), (void *)pip));
6502 	if (ph_dip == NULL) {
6503 		return;
6504 	}
6505 
6506 	MDI_PI_UNLOCK(pip);
6507 	MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d",
6508 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6509 	pm_hold_power(ph_dip);
6510 	MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d",
6511 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6512 	MDI_PI_LOCK(pip);
6513 
6514 	/* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
6515 	if (DEVI(ph_dip)->devi_pm_info)
6516 		MDI_PI(pip)->pi_pm_held = 1;
6517 }
6518 
6519 /*
6520  * Allow phci powered down
6521  */
6522 static void
i_mdi_pm_rele_pip(mdi_pathinfo_t * pip)6523 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
6524 {
6525 	dev_info_t	*ph_dip = NULL;
6526 
6527 	ASSERT(pip != NULL);
6528 	ASSERT(MDI_PI_LOCKED(pip));
6529 
6530 	if (MDI_PI(pip)->pi_pm_held == 0) {
6531 		return;
6532 	}
6533 
6534 	ph_dip = mdi_pi_get_phci(pip);
6535 	ASSERT(ph_dip != NULL);
6536 
6537 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6538 	    "%s %p", mdi_pi_spathname(pip), (void *)pip));
6539 
6540 	MDI_PI_UNLOCK(pip);
6541 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6542 	    "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6543 	pm_rele_power(ph_dip);
6544 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6545 	    "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6546 	MDI_PI_LOCK(pip);
6547 
6548 	MDI_PI(pip)->pi_pm_held = 0;
6549 }
6550 
6551 static void
i_mdi_pm_hold_client(mdi_client_t * ct,int incr)6552 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
6553 {
6554 	ASSERT(MDI_CLIENT_LOCKED(ct));
6555 
6556 	ct->ct_power_cnt += incr;
6557 	MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6558 	    "%p ct_power_cnt = %d incr = %d",
6559 	    (void *)ct, ct->ct_power_cnt, incr));
6560 	ASSERT(ct->ct_power_cnt >= 0);
6561 }
6562 
6563 static void
i_mdi_rele_all_phci(mdi_client_t * ct)6564 i_mdi_rele_all_phci(mdi_client_t *ct)
6565 {
6566 	mdi_pathinfo_t  *pip;
6567 
6568 	ASSERT(MDI_CLIENT_LOCKED(ct));
6569 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
6570 	while (pip != NULL) {
6571 		mdi_hold_path(pip);
6572 		MDI_PI_LOCK(pip);
6573 		i_mdi_pm_rele_pip(pip);
6574 		MDI_PI_UNLOCK(pip);
6575 		mdi_rele_path(pip);
6576 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6577 	}
6578 }
6579 
6580 static void
i_mdi_pm_rele_client(mdi_client_t * ct,int decr)6581 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
6582 {
6583 	ASSERT(MDI_CLIENT_LOCKED(ct));
6584 
6585 	if (i_ddi_devi_attached(ct->ct_dip)) {
6586 		ct->ct_power_cnt -= decr;
6587 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6588 		    "%p ct_power_cnt = %d decr = %d",
6589 		    (void *)ct, ct->ct_power_cnt, decr));
6590 	}
6591 
6592 	ASSERT(ct->ct_power_cnt >= 0);
6593 	if (ct->ct_power_cnt == 0) {
6594 		i_mdi_rele_all_phci(ct);
6595 		return;
6596 	}
6597 }
6598 
6599 static void
i_mdi_pm_reset_client(mdi_client_t * ct)6600 i_mdi_pm_reset_client(mdi_client_t *ct)
6601 {
6602 	MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6603 	    "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt));
6604 	ASSERT(MDI_CLIENT_LOCKED(ct));
6605 	ct->ct_power_cnt = 0;
6606 	i_mdi_rele_all_phci(ct);
6607 	ct->ct_powercnt_config = 0;
6608 	ct->ct_powercnt_unconfig = 0;
6609 	ct->ct_powercnt_reset = 1;
6610 }
6611 
6612 static int
i_mdi_power_one_phci(mdi_pathinfo_t * pip)6613 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
6614 {
6615 	int		ret;
6616 	dev_info_t	*ph_dip;
6617 
6618 	MDI_PI_LOCK(pip);
6619 	i_mdi_pm_hold_pip(pip);
6620 
6621 	ph_dip = mdi_pi_get_phci(pip);
6622 	MDI_PI_UNLOCK(pip);
6623 
6624 	/* bring all components of phci to full power */
6625 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6626 	    "pm_powerup for %s%d %p", ddi_driver_name(ph_dip),
6627 	    ddi_get_instance(ph_dip), (void *)pip));
6628 
6629 	ret = pm_powerup(ph_dip);
6630 
6631 	if (ret == DDI_FAILURE) {
6632 		MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6633 		    "pm_powerup FAILED for %s%d %p",
6634 		    ddi_driver_name(ph_dip), ddi_get_instance(ph_dip),
6635 		    (void *)pip));
6636 
6637 		MDI_PI_LOCK(pip);
6638 		i_mdi_pm_rele_pip(pip);
6639 		MDI_PI_UNLOCK(pip);
6640 		return (MDI_FAILURE);
6641 	}
6642 
6643 	return (MDI_SUCCESS);
6644 }
6645 
6646 static int
i_mdi_power_all_phci(mdi_client_t * ct)6647 i_mdi_power_all_phci(mdi_client_t *ct)
6648 {
6649 	mdi_pathinfo_t  *pip;
6650 	int		succeeded = 0;
6651 
6652 	ASSERT(MDI_CLIENT_LOCKED(ct));
6653 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
6654 	while (pip != NULL) {
6655 		/*
6656 		 * Don't power if MDI_PATHINFO_STATE_FAULT
6657 		 * or MDI_PATHINFO_STATE_OFFLINE.
6658 		 */
6659 		if (MDI_PI_IS_INIT(pip) ||
6660 		    MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
6661 			mdi_hold_path(pip);
6662 			MDI_CLIENT_UNLOCK(ct);
6663 			if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
6664 				succeeded = 1;
6665 
6666 			ASSERT(ct == MDI_PI(pip)->pi_client);
6667 			MDI_CLIENT_LOCK(ct);
6668 			mdi_rele_path(pip);
6669 		}
6670 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6671 	}
6672 
6673 	return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
6674 }
6675 
6676 /*
6677  * mdi_bus_power():
6678  *		1. Place the phci(s) into powered up state so that
6679  *		   client can do power management
6680  *		2. Ensure phci powered up as client power managing
6681  * Return Values:
6682  *		MDI_SUCCESS
6683  *		MDI_FAILURE
6684  */
6685 int
mdi_bus_power(dev_info_t * parent,void * impl_arg,pm_bus_power_op_t op,void * arg,void * result)6686 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
6687     void *arg, void *result)
6688 {
6689 	int			ret = MDI_SUCCESS;
6690 	pm_bp_child_pwrchg_t	*bpc;
6691 	mdi_client_t		*ct;
6692 	dev_info_t		*cdip;
6693 	pm_bp_has_changed_t	*bphc;
6694 
6695 	/*
6696 	 * BUS_POWER_NOINVOL not supported
6697 	 */
6698 	if (op == BUS_POWER_NOINVOL)
6699 		return (MDI_FAILURE);
6700 
6701 	/*
6702 	 * ignore other OPs.
6703 	 * return quickly to save cou cycles on the ct processing
6704 	 */
6705 	switch (op) {
6706 	case BUS_POWER_PRE_NOTIFICATION:
6707 	case BUS_POWER_POST_NOTIFICATION:
6708 		bpc = (pm_bp_child_pwrchg_t *)arg;
6709 		cdip = bpc->bpc_dip;
6710 		break;
6711 	case BUS_POWER_HAS_CHANGED:
6712 		bphc = (pm_bp_has_changed_t *)arg;
6713 		cdip = bphc->bphc_dip;
6714 		break;
6715 	default:
6716 		return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
6717 	}
6718 
6719 	ASSERT(MDI_CLIENT(cdip));
6720 
6721 	ct = i_devi_get_client(cdip);
6722 	if (ct == NULL)
6723 		return (MDI_FAILURE);
6724 
6725 	/*
6726 	 * wait till the mdi_pathinfo node state change are processed
6727 	 */
6728 	MDI_CLIENT_LOCK(ct);
6729 	switch (op) {
6730 	case BUS_POWER_PRE_NOTIFICATION:
6731 		MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6732 		    "BUS_POWER_PRE_NOTIFICATION:"
6733 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6734 		    ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6735 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
6736 
6737 		/* serialize power level change per client */
6738 		while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6739 			cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6740 
6741 		MDI_CLIENT_SET_POWER_TRANSITION(ct);
6742 
6743 		if (ct->ct_power_cnt == 0) {
6744 			ret = i_mdi_power_all_phci(ct);
6745 		}
6746 
6747 		/*
6748 		 * if new_level > 0:
6749 		 *	- hold phci(s)
6750 		 *	- power up phci(s) if not already
6751 		 * ignore power down
6752 		 */
6753 		if (bpc->bpc_nlevel > 0) {
6754 			if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
6755 				MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6756 				    "i_mdi_pm_hold_client\n"));
6757 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6758 			}
6759 		}
6760 		break;
6761 	case BUS_POWER_POST_NOTIFICATION:
6762 		MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6763 		    "BUS_POWER_POST_NOTIFICATION:"
6764 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d",
6765 		    ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6766 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
6767 		    *(int *)result));
6768 
6769 		if (*(int *)result == DDI_SUCCESS) {
6770 			if (bpc->bpc_nlevel > 0) {
6771 				MDI_CLIENT_SET_POWER_UP(ct);
6772 			} else {
6773 				MDI_CLIENT_SET_POWER_DOWN(ct);
6774 			}
6775 		}
6776 
6777 		/* release the hold we did in pre-notification */
6778 		if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
6779 		    !DEVI_IS_ATTACHING(ct->ct_dip)) {
6780 			MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6781 			    "i_mdi_pm_rele_client\n"));
6782 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6783 		}
6784 
6785 		if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
6786 			/* another thread might started attaching */
6787 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6788 				MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6789 				    "i_mdi_pm_rele_client\n"));
6790 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6791 			/* detaching has been taken care in pm_post_unconfig */
6792 			} else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
6793 				MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6794 				    "i_mdi_pm_reset_client\n"));
6795 				i_mdi_pm_reset_client(ct);
6796 			}
6797 		}
6798 
6799 		MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
6800 		cv_broadcast(&ct->ct_powerchange_cv);
6801 
6802 		break;
6803 
6804 	/* need to do more */
6805 	case BUS_POWER_HAS_CHANGED:
6806 		MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6807 		    "BUS_POWER_HAS_CHANGED:"
6808 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6809 		    ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
6810 		    bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
6811 
6812 		if (bphc->bphc_nlevel > 0 &&
6813 		    bphc->bphc_nlevel > bphc->bphc_olevel) {
6814 			if (ct->ct_power_cnt == 0) {
6815 				ret = i_mdi_power_all_phci(ct);
6816 			}
6817 			MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6818 			    "i_mdi_pm_hold_client\n"));
6819 			i_mdi_pm_hold_client(ct, ct->ct_path_count);
6820 		}
6821 
6822 		if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
6823 			MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6824 			    "i_mdi_pm_rele_client\n"));
6825 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6826 		}
6827 		break;
6828 	}
6829 
6830 	MDI_CLIENT_UNLOCK(ct);
6831 	return (ret);
6832 }
6833 
6834 static int
i_mdi_pm_pre_config_one(dev_info_t * child)6835 i_mdi_pm_pre_config_one(dev_info_t *child)
6836 {
6837 	int		ret = MDI_SUCCESS;
6838 	mdi_client_t	*ct;
6839 
6840 	ct = i_devi_get_client(child);
6841 	if (ct == NULL)
6842 		return (MDI_FAILURE);
6843 
6844 	MDI_CLIENT_LOCK(ct);
6845 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6846 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6847 
6848 	if (!MDI_CLIENT_IS_FAILED(ct)) {
6849 		MDI_CLIENT_UNLOCK(ct);
6850 		MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n"));
6851 		return (MDI_SUCCESS);
6852 	}
6853 
6854 	if (ct->ct_powercnt_config) {
6855 		MDI_CLIENT_UNLOCK(ct);
6856 		MDI_DEBUG(4, (MDI_NOTE, child, "already held\n"));
6857 		return (MDI_SUCCESS);
6858 	}
6859 
6860 	if (ct->ct_power_cnt == 0) {
6861 		ret = i_mdi_power_all_phci(ct);
6862 	}
6863 	MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6864 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
6865 	ct->ct_powercnt_config = 1;
6866 	ct->ct_powercnt_reset = 0;
6867 	MDI_CLIENT_UNLOCK(ct);
6868 	return (ret);
6869 }
6870 
6871 static int
i_mdi_pm_pre_config(dev_info_t * vdip,dev_info_t * child)6872 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
6873 {
6874 	int			ret = MDI_SUCCESS;
6875 	dev_info_t		*cdip;
6876 	int			circ;
6877 
6878 	ASSERT(MDI_VHCI(vdip));
6879 
6880 	/* ndi_devi_config_one */
6881 	if (child) {
6882 		ASSERT(DEVI_BUSY_OWNED(vdip));
6883 		return (i_mdi_pm_pre_config_one(child));
6884 	}
6885 
6886 	/* devi_config_common */
6887 	ndi_devi_enter(vdip, &circ);
6888 	cdip = ddi_get_child(vdip);
6889 	while (cdip) {
6890 		dev_info_t *next = ddi_get_next_sibling(cdip);
6891 
6892 		ret = i_mdi_pm_pre_config_one(cdip);
6893 		if (ret != MDI_SUCCESS)
6894 			break;
6895 		cdip = next;
6896 	}
6897 	ndi_devi_exit(vdip, circ);
6898 	return (ret);
6899 }
6900 
6901 static int
i_mdi_pm_pre_unconfig_one(dev_info_t * child,int * held,int flags)6902 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
6903 {
6904 	int		ret = MDI_SUCCESS;
6905 	mdi_client_t	*ct;
6906 
6907 	ct = i_devi_get_client(child);
6908 	if (ct == NULL)
6909 		return (MDI_FAILURE);
6910 
6911 	MDI_CLIENT_LOCK(ct);
6912 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6913 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6914 
6915 	if (!i_ddi_devi_attached(child)) {
6916 		MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n"));
6917 		MDI_CLIENT_UNLOCK(ct);
6918 		return (MDI_SUCCESS);
6919 	}
6920 
6921 	if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6922 	    (flags & NDI_AUTODETACH)) {
6923 		MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n"));
6924 		MDI_CLIENT_UNLOCK(ct);
6925 		return (MDI_FAILURE);
6926 	}
6927 
6928 	if (ct->ct_powercnt_unconfig) {
6929 		MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n"));
6930 		MDI_CLIENT_UNLOCK(ct);
6931 		*held = 1;
6932 		return (MDI_SUCCESS);
6933 	}
6934 
6935 	if (ct->ct_power_cnt == 0) {
6936 		ret = i_mdi_power_all_phci(ct);
6937 	}
6938 	MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6939 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
6940 	ct->ct_powercnt_unconfig = 1;
6941 	ct->ct_powercnt_reset = 0;
6942 	MDI_CLIENT_UNLOCK(ct);
6943 	if (ret == MDI_SUCCESS)
6944 		*held = 1;
6945 	return (ret);
6946 }
6947 
6948 static int
i_mdi_pm_pre_unconfig(dev_info_t * vdip,dev_info_t * child,int * held,int flags)6949 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
6950     int flags)
6951 {
6952 	int			ret = MDI_SUCCESS;
6953 	dev_info_t		*cdip;
6954 	int			circ;
6955 
6956 	ASSERT(MDI_VHCI(vdip));
6957 	*held = 0;
6958 
6959 	/* ndi_devi_unconfig_one */
6960 	if (child) {
6961 		ASSERT(DEVI_BUSY_OWNED(vdip));
6962 		return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6963 	}
6964 
6965 	/* devi_unconfig_common */
6966 	ndi_devi_enter(vdip, &circ);
6967 	cdip = ddi_get_child(vdip);
6968 	while (cdip) {
6969 		dev_info_t *next = ddi_get_next_sibling(cdip);
6970 
6971 		ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6972 		cdip = next;
6973 	}
6974 	ndi_devi_exit(vdip, circ);
6975 
6976 	if (*held)
6977 		ret = MDI_SUCCESS;
6978 
6979 	return (ret);
6980 }
6981 
6982 static void
i_mdi_pm_post_config_one(dev_info_t * child)6983 i_mdi_pm_post_config_one(dev_info_t *child)
6984 {
6985 	mdi_client_t	*ct;
6986 
6987 	ct = i_devi_get_client(child);
6988 	if (ct == NULL)
6989 		return;
6990 
6991 	MDI_CLIENT_LOCK(ct);
6992 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6993 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6994 
6995 	if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6996 		MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n"));
6997 		MDI_CLIENT_UNLOCK(ct);
6998 		return;
6999 	}
7000 
7001 	/* client has not been updated */
7002 	if (MDI_CLIENT_IS_FAILED(ct)) {
7003 		MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n"));
7004 		MDI_CLIENT_UNLOCK(ct);
7005 		return;
7006 	}
7007 
7008 	/* another thread might have powered it down or detached it */
7009 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
7010 	    !DEVI_IS_ATTACHING(child)) ||
7011 	    (!i_ddi_devi_attached(child) &&
7012 	    !DEVI_IS_ATTACHING(child))) {
7013 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7014 		i_mdi_pm_reset_client(ct);
7015 	} else {
7016 		mdi_pathinfo_t  *pip, *next;
7017 		int	valid_path_count = 0;
7018 
7019 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7020 		pip = ct->ct_path_head;
7021 		while (pip != NULL) {
7022 			MDI_PI_LOCK(pip);
7023 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7024 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7025 				valid_path_count ++;
7026 			MDI_PI_UNLOCK(pip);
7027 			pip = next;
7028 		}
7029 		i_mdi_pm_rele_client(ct, valid_path_count);
7030 	}
7031 	ct->ct_powercnt_config = 0;
7032 	MDI_CLIENT_UNLOCK(ct);
7033 }
7034 
7035 static void
i_mdi_pm_post_config(dev_info_t * vdip,dev_info_t * child)7036 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
7037 {
7038 	int		circ;
7039 	dev_info_t	*cdip;
7040 
7041 	ASSERT(MDI_VHCI(vdip));
7042 
7043 	/* ndi_devi_config_one */
7044 	if (child) {
7045 		ASSERT(DEVI_BUSY_OWNED(vdip));
7046 		i_mdi_pm_post_config_one(child);
7047 		return;
7048 	}
7049 
7050 	/* devi_config_common */
7051 	ndi_devi_enter(vdip, &circ);
7052 	cdip = ddi_get_child(vdip);
7053 	while (cdip) {
7054 		dev_info_t *next = ddi_get_next_sibling(cdip);
7055 
7056 		i_mdi_pm_post_config_one(cdip);
7057 		cdip = next;
7058 	}
7059 	ndi_devi_exit(vdip, circ);
7060 }
7061 
7062 static void
i_mdi_pm_post_unconfig_one(dev_info_t * child)7063 i_mdi_pm_post_unconfig_one(dev_info_t *child)
7064 {
7065 	mdi_client_t	*ct;
7066 
7067 	ct = i_devi_get_client(child);
7068 	if (ct == NULL)
7069 		return;
7070 
7071 	MDI_CLIENT_LOCK(ct);
7072 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
7073 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
7074 
7075 	if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
7076 		MDI_DEBUG(4, (MDI_NOTE, child, "not held\n"));
7077 		MDI_CLIENT_UNLOCK(ct);
7078 		return;
7079 	}
7080 
7081 	/* failure detaching or another thread just attached it */
7082 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
7083 	    i_ddi_devi_attached(child)) ||
7084 	    (!i_ddi_devi_attached(child) &&
7085 	    !DEVI_IS_ATTACHING(child))) {
7086 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7087 		i_mdi_pm_reset_client(ct);
7088 	} else {
7089 		mdi_pathinfo_t  *pip, *next;
7090 		int	valid_path_count = 0;
7091 
7092 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7093 		pip = ct->ct_path_head;
7094 		while (pip != NULL) {
7095 			MDI_PI_LOCK(pip);
7096 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7097 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7098 				valid_path_count ++;
7099 			MDI_PI_UNLOCK(pip);
7100 			pip = next;
7101 		}
7102 		i_mdi_pm_rele_client(ct, valid_path_count);
7103 		ct->ct_powercnt_unconfig = 0;
7104 	}
7105 
7106 	MDI_CLIENT_UNLOCK(ct);
7107 }
7108 
7109 static void
i_mdi_pm_post_unconfig(dev_info_t * vdip,dev_info_t * child,int held)7110 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
7111 {
7112 	int			circ;
7113 	dev_info_t		*cdip;
7114 
7115 	ASSERT(MDI_VHCI(vdip));
7116 
7117 	if (!held) {
7118 		MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held));
7119 		return;
7120 	}
7121 
7122 	if (child) {
7123 		ASSERT(DEVI_BUSY_OWNED(vdip));
7124 		i_mdi_pm_post_unconfig_one(child);
7125 		return;
7126 	}
7127 
7128 	ndi_devi_enter(vdip, &circ);
7129 	cdip = ddi_get_child(vdip);
7130 	while (cdip) {
7131 		dev_info_t *next = ddi_get_next_sibling(cdip);
7132 
7133 		i_mdi_pm_post_unconfig_one(cdip);
7134 		cdip = next;
7135 	}
7136 	ndi_devi_exit(vdip, circ);
7137 }
7138 
7139 int
mdi_power(dev_info_t * vdip,mdi_pm_op_t op,void * args,char * devnm,int flags)7140 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
7141 {
7142 	int			circ, ret = MDI_SUCCESS;
7143 	dev_info_t		*client_dip = NULL;
7144 	mdi_client_t		*ct;
7145 
7146 	/*
7147 	 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
7148 	 * Power up pHCI for the named client device.
7149 	 * Note: Before the client is enumerated under vhci by phci,
7150 	 * client_dip can be NULL. Then proceed to power up all the
7151 	 * pHCIs.
7152 	 */
7153 	if (devnm != NULL) {
7154 		ndi_devi_enter(vdip, &circ);
7155 		client_dip = ndi_devi_findchild(vdip, devnm);
7156 	}
7157 
7158 	MDI_DEBUG(4, (MDI_NOTE, vdip,
7159 	    "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip));
7160 
7161 	switch (op) {
7162 	case MDI_PM_PRE_CONFIG:
7163 		ret = i_mdi_pm_pre_config(vdip, client_dip);
7164 		break;
7165 
7166 	case MDI_PM_PRE_UNCONFIG:
7167 		ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
7168 		    flags);
7169 		break;
7170 
7171 	case MDI_PM_POST_CONFIG:
7172 		i_mdi_pm_post_config(vdip, client_dip);
7173 		break;
7174 
7175 	case MDI_PM_POST_UNCONFIG:
7176 		i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
7177 		break;
7178 
7179 	case MDI_PM_HOLD_POWER:
7180 	case MDI_PM_RELE_POWER:
7181 		ASSERT(args);
7182 
7183 		client_dip = (dev_info_t *)args;
7184 		ASSERT(MDI_CLIENT(client_dip));
7185 
7186 		ct = i_devi_get_client(client_dip);
7187 		MDI_CLIENT_LOCK(ct);
7188 
7189 		if (op == MDI_PM_HOLD_POWER) {
7190 			if (ct->ct_power_cnt == 0) {
7191 				(void) i_mdi_power_all_phci(ct);
7192 				MDI_DEBUG(4, (MDI_NOTE, client_dip,
7193 				    "i_mdi_pm_hold_client\n"));
7194 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
7195 			}
7196 		} else {
7197 			if (DEVI_IS_ATTACHING(client_dip)) {
7198 				MDI_DEBUG(4, (MDI_NOTE, client_dip,
7199 				    "i_mdi_pm_rele_client\n"));
7200 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
7201 			} else {
7202 				MDI_DEBUG(4, (MDI_NOTE, client_dip,
7203 				    "i_mdi_pm_reset_client\n"));
7204 				i_mdi_pm_reset_client(ct);
7205 			}
7206 		}
7207 
7208 		MDI_CLIENT_UNLOCK(ct);
7209 		break;
7210 
7211 	default:
7212 		break;
7213 	}
7214 
7215 	if (devnm)
7216 		ndi_devi_exit(vdip, circ);
7217 
7218 	return (ret);
7219 }
7220 
7221 int
mdi_component_is_vhci(dev_info_t * dip,const char ** mdi_class)7222 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
7223 {
7224 	mdi_vhci_t *vhci;
7225 
7226 	if (!MDI_VHCI(dip))
7227 		return (MDI_FAILURE);
7228 
7229 	if (mdi_class) {
7230 		vhci = DEVI(dip)->devi_mdi_xhci;
7231 		ASSERT(vhci);
7232 		*mdi_class = vhci->vh_class;
7233 	}
7234 
7235 	return (MDI_SUCCESS);
7236 }
7237 
7238 int
mdi_component_is_phci(dev_info_t * dip,const char ** mdi_class)7239 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
7240 {
7241 	mdi_phci_t *phci;
7242 
7243 	if (!MDI_PHCI(dip))
7244 		return (MDI_FAILURE);
7245 
7246 	if (mdi_class) {
7247 		phci = DEVI(dip)->devi_mdi_xhci;
7248 		ASSERT(phci);
7249 		*mdi_class = phci->ph_vhci->vh_class;
7250 	}
7251 
7252 	return (MDI_SUCCESS);
7253 }
7254 
7255 int
mdi_component_is_client(dev_info_t * dip,const char ** mdi_class)7256 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
7257 {
7258 	mdi_client_t *client;
7259 
7260 	if (!MDI_CLIENT(dip))
7261 		return (MDI_FAILURE);
7262 
7263 	if (mdi_class) {
7264 		client = DEVI(dip)->devi_mdi_client;
7265 		ASSERT(client);
7266 		*mdi_class = client->ct_vhci->vh_class;
7267 	}
7268 
7269 	return (MDI_SUCCESS);
7270 }
7271 
7272 void *
mdi_client_get_vhci_private(dev_info_t * dip)7273 mdi_client_get_vhci_private(dev_info_t *dip)
7274 {
7275 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7276 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7277 		mdi_client_t	*ct;
7278 		ct = i_devi_get_client(dip);
7279 		return (ct->ct_vprivate);
7280 	}
7281 	return (NULL);
7282 }
7283 
7284 void
mdi_client_set_vhci_private(dev_info_t * dip,void * data)7285 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
7286 {
7287 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7288 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7289 		mdi_client_t	*ct;
7290 		ct = i_devi_get_client(dip);
7291 		ct->ct_vprivate = data;
7292 	}
7293 }
7294 /*
7295  * mdi_pi_get_vhci_private():
7296  *		Get the vhci private information associated with the
7297  *		mdi_pathinfo node
7298  */
7299 void *
mdi_pi_get_vhci_private(mdi_pathinfo_t * pip)7300 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
7301 {
7302 	caddr_t	vprivate = NULL;
7303 	if (pip) {
7304 		vprivate = MDI_PI(pip)->pi_vprivate;
7305 	}
7306 	return (vprivate);
7307 }
7308 
7309 /*
7310  * mdi_pi_set_vhci_private():
7311  *		Set the vhci private information in the mdi_pathinfo node
7312  */
7313 void
mdi_pi_set_vhci_private(mdi_pathinfo_t * pip,void * priv)7314 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
7315 {
7316 	if (pip) {
7317 		MDI_PI(pip)->pi_vprivate = priv;
7318 	}
7319 }
7320 
7321 /*
7322  * mdi_phci_get_vhci_private():
7323  *		Get the vhci private information associated with the
7324  *		mdi_phci node
7325  */
7326 void *
mdi_phci_get_vhci_private(dev_info_t * dip)7327 mdi_phci_get_vhci_private(dev_info_t *dip)
7328 {
7329 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7330 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7331 		mdi_phci_t	*ph;
7332 		ph = i_devi_get_phci(dip);
7333 		return (ph->ph_vprivate);
7334 	}
7335 	return (NULL);
7336 }
7337 
7338 /*
7339  * mdi_phci_set_vhci_private():
7340  *		Set the vhci private information in the mdi_phci node
7341  */
7342 void
mdi_phci_set_vhci_private(dev_info_t * dip,void * priv)7343 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
7344 {
7345 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7346 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7347 		mdi_phci_t	*ph;
7348 		ph = i_devi_get_phci(dip);
7349 		ph->ph_vprivate = priv;
7350 	}
7351 }
7352 
7353 int
mdi_pi_ishidden(mdi_pathinfo_t * pip)7354 mdi_pi_ishidden(mdi_pathinfo_t *pip)
7355 {
7356 	return (MDI_PI_FLAGS_IS_HIDDEN(pip));
7357 }
7358 
7359 int
mdi_pi_device_isremoved(mdi_pathinfo_t * pip)7360 mdi_pi_device_isremoved(mdi_pathinfo_t *pip)
7361 {
7362 	return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip));
7363 }
7364 
7365 /* Return 1 if all client paths are device_removed */
7366 static int
i_mdi_client_all_devices_removed(mdi_client_t * ct)7367 i_mdi_client_all_devices_removed(mdi_client_t *ct)
7368 {
7369 	mdi_pathinfo_t  *pip;
7370 	int		all_devices_removed = 1;
7371 
7372 	MDI_CLIENT_LOCK(ct);
7373 	for (pip = ct->ct_path_head; pip;
7374 	    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) {
7375 		if (!mdi_pi_device_isremoved(pip)) {
7376 			all_devices_removed = 0;
7377 			break;
7378 		}
7379 	}
7380 	MDI_CLIENT_UNLOCK(ct);
7381 	return (all_devices_removed);
7382 }
7383 
7384 /*
7385  * When processing path hotunplug, represent device removal.
7386  */
7387 int
mdi_pi_device_remove(mdi_pathinfo_t * pip)7388 mdi_pi_device_remove(mdi_pathinfo_t *pip)
7389 {
7390 	mdi_client_t	*ct;
7391 
7392 	MDI_PI_LOCK(pip);
7393 	if (mdi_pi_device_isremoved(pip)) {
7394 		MDI_PI_UNLOCK(pip);
7395 		return (0);
7396 	}
7397 	MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip);
7398 	MDI_PI_FLAGS_SET_HIDDEN(pip);
7399 	MDI_PI_UNLOCK(pip);
7400 
7401 	/*
7402 	 * If all paths associated with the client are now DEVICE_REMOVED,
7403 	 * reflect DEVICE_REMOVED in the client.
7404 	 */
7405 	ct = MDI_PI(pip)->pi_client;
7406 	if (ct && ct->ct_dip && i_mdi_client_all_devices_removed(ct))
7407 		(void) ndi_devi_device_remove(ct->ct_dip);
7408 	else
7409 		i_ddi_di_cache_invalidate();
7410 
7411 	return (1);
7412 }
7413 
7414 /*
7415  * When processing hotplug, if a path marked mdi_pi_device_isremoved()
7416  * is now accessible then this interfaces is used to represent device insertion.
7417  */
7418 int
mdi_pi_device_insert(mdi_pathinfo_t * pip)7419 mdi_pi_device_insert(mdi_pathinfo_t *pip)
7420 {
7421 	MDI_PI_LOCK(pip);
7422 	if (!mdi_pi_device_isremoved(pip)) {
7423 		MDI_PI_UNLOCK(pip);
7424 		return (0);
7425 	}
7426 	MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip);
7427 	MDI_PI_FLAGS_CLR_HIDDEN(pip);
7428 	MDI_PI_UNLOCK(pip);
7429 
7430 	i_ddi_di_cache_invalidate();
7431 
7432 	return (1);
7433 }
7434 
7435 /*
7436  * List of vhci class names:
7437  * A vhci class name must be in this list only if the corresponding vhci
7438  * driver intends to use the mdi provided bus config implementation
7439  * (i.e., mdi_vhci_bus_config()).
7440  */
7441 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
7442 #define	N_VHCI_CLASSES	(sizeof (vhci_class_list) / sizeof (char *))
7443 
7444 /*
7445  * During boot time, the on-disk vhci cache for every vhci class is read
7446  * in the form of an nvlist and stored here.
7447  */
7448 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
7449 
7450 /* nvpair names in vhci cache nvlist */
7451 #define	MDI_VHCI_CACHE_VERSION	1
7452 #define	MDI_NVPNAME_VERSION	"version"
7453 #define	MDI_NVPNAME_PHCIS	"phcis"
7454 #define	MDI_NVPNAME_CTADDRMAP	"clientaddrmap"
7455 
7456 /*
7457  * Given vhci class name, return its on-disk vhci cache filename.
7458  * Memory for the returned filename which includes the full path is allocated
7459  * by this function.
7460  */
7461 static char *
vhclass2vhcache_filename(char * vhclass)7462 vhclass2vhcache_filename(char *vhclass)
7463 {
7464 	char *filename;
7465 	int len;
7466 	static char *fmt = "/etc/devices/mdi_%s_cache";
7467 
7468 	/*
7469 	 * fmt contains the on-disk vhci cache file name format;
7470 	 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
7471 	 */
7472 
7473 	/* the -1 below is to account for "%s" in the format string */
7474 	len = strlen(fmt) + strlen(vhclass) - 1;
7475 	filename = kmem_alloc(len, KM_SLEEP);
7476 	(void) snprintf(filename, len, fmt, vhclass);
7477 	ASSERT(len == (strlen(filename) + 1));
7478 	return (filename);
7479 }
7480 
7481 /*
7482  * initialize the vhci cache related data structures and read the on-disk
7483  * vhci cached data into memory.
7484  */
7485 static void
setup_vhci_cache(mdi_vhci_t * vh)7486 setup_vhci_cache(mdi_vhci_t *vh)
7487 {
7488 	mdi_vhci_config_t *vhc;
7489 	mdi_vhci_cache_t *vhcache;
7490 	int i;
7491 	nvlist_t *nvl = NULL;
7492 
7493 	vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
7494 	vh->vh_config = vhc;
7495 	vhcache = &vhc->vhc_vhcache;
7496 
7497 	vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
7498 
7499 	mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
7500 	cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
7501 
7502 	rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
7503 
7504 	/*
7505 	 * Create string hash; same as mod_hash_create_strhash() except that
7506 	 * we use NULL key destructor.
7507 	 */
7508 	vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
7509 	    mdi_bus_config_cache_hash_size,
7510 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
7511 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
7512 
7513 	/*
7514 	 * The on-disk vhci cache is read during booting prior to the
7515 	 * lights-out period by mdi_read_devices_files().
7516 	 */
7517 	for (i = 0; i < N_VHCI_CLASSES; i++) {
7518 		if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
7519 			nvl = vhcache_nvl[i];
7520 			vhcache_nvl[i] = NULL;
7521 			break;
7522 		}
7523 	}
7524 
7525 	/*
7526 	 * this is to cover the case of some one manually causing unloading
7527 	 * (or detaching) and reloading (or attaching) of a vhci driver.
7528 	 */
7529 	if (nvl == NULL && modrootloaded)
7530 		nvl = read_on_disk_vhci_cache(vh->vh_class);
7531 
7532 	if (nvl != NULL) {
7533 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7534 		if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
7535 			vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
7536 		else  {
7537 			cmn_err(CE_WARN,
7538 			    "%s: data file corrupted, will recreate",
7539 			    vhc->vhc_vhcache_filename);
7540 		}
7541 		rw_exit(&vhcache->vhcache_lock);
7542 		nvlist_free(nvl);
7543 	}
7544 
7545 	vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
7546 	    CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
7547 
7548 	vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
7549 	vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
7550 }
7551 
7552 /*
7553  * free all vhci cache related resources
7554  */
7555 static int
destroy_vhci_cache(mdi_vhci_t * vh)7556 destroy_vhci_cache(mdi_vhci_t *vh)
7557 {
7558 	mdi_vhci_config_t *vhc = vh->vh_config;
7559 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7560 	mdi_vhcache_phci_t *cphci, *cphci_next;
7561 	mdi_vhcache_client_t *cct, *cct_next;
7562 	mdi_vhcache_pathinfo_t *cpi, *cpi_next;
7563 
7564 	if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
7565 		return (MDI_FAILURE);
7566 
7567 	kmem_free(vhc->vhc_vhcache_filename,
7568 	    strlen(vhc->vhc_vhcache_filename) + 1);
7569 
7570 	mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
7571 
7572 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7573 	    cphci = cphci_next) {
7574 		cphci_next = cphci->cphci_next;
7575 		free_vhcache_phci(cphci);
7576 	}
7577 
7578 	for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
7579 		cct_next = cct->cct_next;
7580 		for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
7581 			cpi_next = cpi->cpi_next;
7582 			free_vhcache_pathinfo(cpi);
7583 		}
7584 		free_vhcache_client(cct);
7585 	}
7586 
7587 	rw_destroy(&vhcache->vhcache_lock);
7588 
7589 	mutex_destroy(&vhc->vhc_lock);
7590 	cv_destroy(&vhc->vhc_cv);
7591 	kmem_free(vhc, sizeof (mdi_vhci_config_t));
7592 	return (MDI_SUCCESS);
7593 }
7594 
7595 /*
7596  * Stop all vhci cache related async threads and free their resources.
7597  */
7598 static int
stop_vhcache_async_threads(mdi_vhci_config_t * vhc)7599 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
7600 {
7601 	mdi_async_client_config_t *acc, *acc_next;
7602 
7603 	mutex_enter(&vhc->vhc_lock);
7604 	vhc->vhc_flags |= MDI_VHC_EXIT;
7605 	ASSERT(vhc->vhc_acc_thrcount >= 0);
7606 	cv_broadcast(&vhc->vhc_cv);
7607 
7608 	while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
7609 	    vhc->vhc_acc_thrcount != 0) {
7610 		mutex_exit(&vhc->vhc_lock);
7611 		delay_random(mdi_delay);
7612 		mutex_enter(&vhc->vhc_lock);
7613 	}
7614 
7615 	vhc->vhc_flags &= ~MDI_VHC_EXIT;
7616 
7617 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
7618 		acc_next = acc->acc_next;
7619 		free_async_client_config(acc);
7620 	}
7621 	vhc->vhc_acc_list_head = NULL;
7622 	vhc->vhc_acc_list_tail = NULL;
7623 	vhc->vhc_acc_count = 0;
7624 
7625 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7626 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7627 		mutex_exit(&vhc->vhc_lock);
7628 		if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
7629 			vhcache_dirty(vhc);
7630 			return (MDI_FAILURE);
7631 		}
7632 	} else
7633 		mutex_exit(&vhc->vhc_lock);
7634 
7635 	if (callb_delete(vhc->vhc_cbid) != 0)
7636 		return (MDI_FAILURE);
7637 
7638 	return (MDI_SUCCESS);
7639 }
7640 
7641 /*
7642  * Stop vhci cache flush thread
7643  */
7644 /* ARGSUSED */
7645 static boolean_t
stop_vhcache_flush_thread(void * arg,int code)7646 stop_vhcache_flush_thread(void *arg, int code)
7647 {
7648 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7649 
7650 	mutex_enter(&vhc->vhc_lock);
7651 	vhc->vhc_flags |= MDI_VHC_EXIT;
7652 	cv_broadcast(&vhc->vhc_cv);
7653 
7654 	while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7655 		mutex_exit(&vhc->vhc_lock);
7656 		delay_random(mdi_delay);
7657 		mutex_enter(&vhc->vhc_lock);
7658 	}
7659 
7660 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7661 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7662 		mutex_exit(&vhc->vhc_lock);
7663 		(void) flush_vhcache(vhc, 1);
7664 	} else
7665 		mutex_exit(&vhc->vhc_lock);
7666 
7667 	return (B_TRUE);
7668 }
7669 
7670 /*
7671  * Enqueue the vhcache phci (cphci) at the tail of the list
7672  */
7673 static void
enqueue_vhcache_phci(mdi_vhci_cache_t * vhcache,mdi_vhcache_phci_t * cphci)7674 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
7675 {
7676 	cphci->cphci_next = NULL;
7677 	if (vhcache->vhcache_phci_head == NULL)
7678 		vhcache->vhcache_phci_head = cphci;
7679 	else
7680 		vhcache->vhcache_phci_tail->cphci_next = cphci;
7681 	vhcache->vhcache_phci_tail = cphci;
7682 }
7683 
7684 /*
7685  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
7686  */
7687 static void
enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t * cct,mdi_vhcache_pathinfo_t * cpi)7688 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7689     mdi_vhcache_pathinfo_t *cpi)
7690 {
7691 	cpi->cpi_next = NULL;
7692 	if (cct->cct_cpi_head == NULL)
7693 		cct->cct_cpi_head = cpi;
7694 	else
7695 		cct->cct_cpi_tail->cpi_next = cpi;
7696 	cct->cct_cpi_tail = cpi;
7697 }
7698 
7699 /*
7700  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
7701  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7702  * flag set come at the beginning of the list. All cpis which have this
7703  * flag set come at the end of the list.
7704  */
7705 static void
enqueue_vhcache_pathinfo(mdi_vhcache_client_t * cct,mdi_vhcache_pathinfo_t * newcpi)7706 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7707     mdi_vhcache_pathinfo_t *newcpi)
7708 {
7709 	mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
7710 
7711 	if (cct->cct_cpi_head == NULL ||
7712 	    (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
7713 		enqueue_tail_vhcache_pathinfo(cct, newcpi);
7714 	else {
7715 		for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
7716 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
7717 		    prev_cpi = cpi, cpi = cpi->cpi_next)
7718 			;
7719 
7720 		if (prev_cpi == NULL)
7721 			cct->cct_cpi_head = newcpi;
7722 		else
7723 			prev_cpi->cpi_next = newcpi;
7724 
7725 		newcpi->cpi_next = cpi;
7726 
7727 		if (cpi == NULL)
7728 			cct->cct_cpi_tail = newcpi;
7729 	}
7730 }
7731 
7732 /*
7733  * Enqueue the vhcache client (cct) at the tail of the list
7734  */
7735 static void
enqueue_vhcache_client(mdi_vhci_cache_t * vhcache,mdi_vhcache_client_t * cct)7736 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
7737     mdi_vhcache_client_t *cct)
7738 {
7739 	cct->cct_next = NULL;
7740 	if (vhcache->vhcache_client_head == NULL)
7741 		vhcache->vhcache_client_head = cct;
7742 	else
7743 		vhcache->vhcache_client_tail->cct_next = cct;
7744 	vhcache->vhcache_client_tail = cct;
7745 }
7746 
7747 static void
free_string_array(char ** str,int nelem)7748 free_string_array(char **str, int nelem)
7749 {
7750 	int i;
7751 
7752 	if (str) {
7753 		for (i = 0; i < nelem; i++) {
7754 			if (str[i])
7755 				kmem_free(str[i], strlen(str[i]) + 1);
7756 		}
7757 		kmem_free(str, sizeof (char *) * nelem);
7758 	}
7759 }
7760 
7761 static void
free_vhcache_phci(mdi_vhcache_phci_t * cphci)7762 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
7763 {
7764 	kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
7765 	kmem_free(cphci, sizeof (*cphci));
7766 }
7767 
7768 static void
free_vhcache_pathinfo(mdi_vhcache_pathinfo_t * cpi)7769 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
7770 {
7771 	kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
7772 	kmem_free(cpi, sizeof (*cpi));
7773 }
7774 
7775 static void
free_vhcache_client(mdi_vhcache_client_t * cct)7776 free_vhcache_client(mdi_vhcache_client_t *cct)
7777 {
7778 	kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
7779 	kmem_free(cct, sizeof (*cct));
7780 }
7781 
7782 static char *
vhcache_mknameaddr(char * ct_name,char * ct_addr,int * ret_len)7783 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
7784 {
7785 	char *name_addr;
7786 	int len;
7787 
7788 	len = strlen(ct_name) + strlen(ct_addr) + 2;
7789 	name_addr = kmem_alloc(len, KM_SLEEP);
7790 	(void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
7791 
7792 	if (ret_len)
7793 		*ret_len = len;
7794 	return (name_addr);
7795 }
7796 
7797 /*
7798  * Copy the contents of paddrnvl to vhci cache.
7799  * paddrnvl nvlist contains path information for a vhci client.
7800  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
7801  */
7802 static void
paddrnvl_to_vhcache(nvlist_t * nvl,mdi_vhcache_phci_t * cphci_list[],mdi_vhcache_client_t * cct)7803 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
7804     mdi_vhcache_client_t *cct)
7805 {
7806 	nvpair_t *nvp = NULL;
7807 	mdi_vhcache_pathinfo_t *cpi;
7808 	uint_t nelem;
7809 	uint32_t *val;
7810 
7811 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7812 		ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
7813 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7814 		cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7815 		(void) nvpair_value_uint32_array(nvp, &val, &nelem);
7816 		ASSERT(nelem == 2);
7817 		cpi->cpi_cphci = cphci_list[val[0]];
7818 		cpi->cpi_flags = val[1];
7819 		enqueue_tail_vhcache_pathinfo(cct, cpi);
7820 	}
7821 }
7822 
7823 /*
7824  * Copy the contents of caddrmapnvl to vhci cache.
7825  * caddrmapnvl nvlist contains vhci client address to phci client address
7826  * mappings. See the comment in mainnvl_to_vhcache() for the format of
7827  * this nvlist.
7828  */
7829 static void
caddrmapnvl_to_vhcache(mdi_vhci_cache_t * vhcache,nvlist_t * nvl,mdi_vhcache_phci_t * cphci_list[])7830 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
7831     mdi_vhcache_phci_t *cphci_list[])
7832 {
7833 	nvpair_t *nvp = NULL;
7834 	nvlist_t *paddrnvl;
7835 	mdi_vhcache_client_t *cct;
7836 
7837 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7838 		ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
7839 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7840 		cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7841 		(void) nvpair_value_nvlist(nvp, &paddrnvl);
7842 		paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
7843 		/* the client must contain at least one path */
7844 		ASSERT(cct->cct_cpi_head != NULL);
7845 
7846 		enqueue_vhcache_client(vhcache, cct);
7847 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
7848 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7849 	}
7850 }
7851 
7852 /*
7853  * Copy the contents of the main nvlist to vhci cache.
7854  *
7855  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
7856  * The nvlist contains the mappings between the vhci client addresses and
7857  * their corresponding phci client addresses.
7858  *
7859  * The structure of the nvlist is as follows:
7860  *
7861  * Main nvlist:
7862  *	NAME		TYPE		DATA
7863  *	version		int32		version number
7864  *	phcis		string array	array of phci paths
7865  *	clientaddrmap	nvlist_t	c2paddrs_nvl (see below)
7866  *
7867  * structure of c2paddrs_nvl:
7868  *	NAME		TYPE		DATA
7869  *	caddr1		nvlist_t	paddrs_nvl1
7870  *	caddr2		nvlist_t	paddrs_nvl2
7871  *	...
7872  * where caddr1, caddr2, ... are vhci client name and addresses in the
7873  * form of "<clientname>@<clientaddress>".
7874  * (for example: "ssd@2000002037cd9f72");
7875  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
7876  *
7877  * structure of paddrs_nvl:
7878  *	NAME		TYPE		DATA
7879  *	pi_addr1	uint32_array	(phci-id, cpi_flags)
7880  *	pi_addr2	uint32_array	(phci-id, cpi_flags)
7881  *	...
7882  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
7883  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
7884  * phci-ids are integers that identify pHCIs to which the
7885  * the bus specific address belongs to. These integers are used as an index
7886  * into to the phcis string array in the main nvlist to get the pHCI path.
7887  */
7888 static int
mainnvl_to_vhcache(mdi_vhci_cache_t * vhcache,nvlist_t * nvl)7889 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
7890 {
7891 	char **phcis, **phci_namep;
7892 	uint_t nphcis;
7893 	mdi_vhcache_phci_t *cphci, **cphci_list;
7894 	nvlist_t *caddrmapnvl;
7895 	int32_t ver;
7896 	int i;
7897 	size_t cphci_list_size;
7898 
7899 	ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
7900 
7901 	if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
7902 	    ver != MDI_VHCI_CACHE_VERSION)
7903 		return (MDI_FAILURE);
7904 
7905 	if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
7906 	    &nphcis) != 0)
7907 		return (MDI_SUCCESS);
7908 
7909 	ASSERT(nphcis > 0);
7910 
7911 	cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
7912 	cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
7913 	for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
7914 		cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
7915 		cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
7916 		enqueue_vhcache_phci(vhcache, cphci);
7917 		cphci_list[i] = cphci;
7918 	}
7919 
7920 	ASSERT(vhcache->vhcache_phci_head != NULL);
7921 
7922 	if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
7923 		caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
7924 
7925 	kmem_free(cphci_list, cphci_list_size);
7926 	return (MDI_SUCCESS);
7927 }
7928 
7929 /*
7930  * Build paddrnvl for the specified client using the information in the
7931  * vhci cache and add it to the caddrmapnnvl.
7932  * Returns 0 on success, errno on failure.
7933  */
7934 static int
vhcache_to_paddrnvl(mdi_vhci_cache_t * vhcache,mdi_vhcache_client_t * cct,nvlist_t * caddrmapnvl)7935 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7936     nvlist_t *caddrmapnvl)
7937 {
7938 	mdi_vhcache_pathinfo_t *cpi;
7939 	nvlist_t *nvl;
7940 	int err;
7941 	uint32_t val[2];
7942 
7943 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7944 
7945 	if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7946 		return (err);
7947 
7948 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7949 		val[0] = cpi->cpi_cphci->cphci_id;
7950 		val[1] = cpi->cpi_flags;
7951 		if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7952 		    != 0)
7953 			goto out;
7954 	}
7955 
7956 	err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7957 out:
7958 	nvlist_free(nvl);
7959 	return (err);
7960 }
7961 
7962 /*
7963  * Build caddrmapnvl using the information in the vhci cache
7964  * and add it to the mainnvl.
7965  * Returns 0 on success, errno on failure.
7966  */
7967 static int
vhcache_to_caddrmapnvl(mdi_vhci_cache_t * vhcache,nvlist_t * mainnvl)7968 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7969 {
7970 	mdi_vhcache_client_t *cct;
7971 	nvlist_t *nvl;
7972 	int err;
7973 
7974 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7975 
7976 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7977 		return (err);
7978 
7979 	for (cct = vhcache->vhcache_client_head; cct != NULL;
7980 	    cct = cct->cct_next) {
7981 		if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7982 			goto out;
7983 	}
7984 
7985 	err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7986 out:
7987 	nvlist_free(nvl);
7988 	return (err);
7989 }
7990 
7991 /*
7992  * Build nvlist using the information in the vhci cache.
7993  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7994  * Returns nvl on success, NULL on failure.
7995  */
7996 static nvlist_t *
vhcache_to_mainnvl(mdi_vhci_cache_t * vhcache)7997 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7998 {
7999 	mdi_vhcache_phci_t *cphci;
8000 	uint_t phci_count;
8001 	char **phcis;
8002 	nvlist_t *nvl;
8003 	int err, i;
8004 
8005 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
8006 		nvl = NULL;
8007 		goto out;
8008 	}
8009 
8010 	if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
8011 	    MDI_VHCI_CACHE_VERSION)) != 0)
8012 		goto out;
8013 
8014 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8015 	if (vhcache->vhcache_phci_head == NULL) {
8016 		rw_exit(&vhcache->vhcache_lock);
8017 		return (nvl);
8018 	}
8019 
8020 	phci_count = 0;
8021 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8022 	    cphci = cphci->cphci_next)
8023 		cphci->cphci_id = phci_count++;
8024 
8025 	/* build phci pathname list */
8026 	phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
8027 	for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
8028 	    cphci = cphci->cphci_next, i++)
8029 		phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
8030 
8031 	err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
8032 	    phci_count);
8033 	free_string_array(phcis, phci_count);
8034 
8035 	if (err == 0 &&
8036 	    (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
8037 		rw_exit(&vhcache->vhcache_lock);
8038 		return (nvl);
8039 	}
8040 
8041 	rw_exit(&vhcache->vhcache_lock);
8042 out:
8043 	if (nvl)
8044 		nvlist_free(nvl);
8045 	return (NULL);
8046 }
8047 
8048 /*
8049  * Lookup vhcache phci structure for the specified phci path.
8050  */
8051 static mdi_vhcache_phci_t *
lookup_vhcache_phci_by_name(mdi_vhci_cache_t * vhcache,char * phci_path)8052 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
8053 {
8054 	mdi_vhcache_phci_t *cphci;
8055 
8056 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8057 
8058 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8059 	    cphci = cphci->cphci_next) {
8060 		if (strcmp(cphci->cphci_path, phci_path) == 0)
8061 			return (cphci);
8062 	}
8063 
8064 	return (NULL);
8065 }
8066 
8067 /*
8068  * Lookup vhcache phci structure for the specified phci.
8069  */
8070 static mdi_vhcache_phci_t *
lookup_vhcache_phci_by_addr(mdi_vhci_cache_t * vhcache,mdi_phci_t * ph)8071 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
8072 {
8073 	mdi_vhcache_phci_t *cphci;
8074 
8075 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8076 
8077 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8078 	    cphci = cphci->cphci_next) {
8079 		if (cphci->cphci_phci == ph)
8080 			return (cphci);
8081 	}
8082 
8083 	return (NULL);
8084 }
8085 
8086 /*
8087  * Add the specified phci to the vhci cache if not already present.
8088  */
8089 static void
vhcache_phci_add(mdi_vhci_config_t * vhc,mdi_phci_t * ph)8090 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8091 {
8092 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8093 	mdi_vhcache_phci_t *cphci;
8094 	char *pathname;
8095 	int cache_updated;
8096 
8097 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8098 
8099 	pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8100 	(void) ddi_pathname(ph->ph_dip, pathname);
8101 	if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
8102 	    != NULL) {
8103 		cphci->cphci_phci = ph;
8104 		cache_updated = 0;
8105 	} else {
8106 		cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
8107 		cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
8108 		cphci->cphci_phci = ph;
8109 		enqueue_vhcache_phci(vhcache, cphci);
8110 		cache_updated = 1;
8111 	}
8112 
8113 	rw_exit(&vhcache->vhcache_lock);
8114 
8115 	/*
8116 	 * Since a new phci has been added, reset
8117 	 * vhc_path_discovery_cutoff_time to allow for discovery of paths
8118 	 * during next vhcache_discover_paths().
8119 	 */
8120 	mutex_enter(&vhc->vhc_lock);
8121 	vhc->vhc_path_discovery_cutoff_time = 0;
8122 	mutex_exit(&vhc->vhc_lock);
8123 
8124 	kmem_free(pathname, MAXPATHLEN);
8125 	if (cache_updated)
8126 		vhcache_dirty(vhc);
8127 }
8128 
8129 /*
8130  * Remove the reference to the specified phci from the vhci cache.
8131  */
8132 static void
vhcache_phci_remove(mdi_vhci_config_t * vhc,mdi_phci_t * ph)8133 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8134 {
8135 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8136 	mdi_vhcache_phci_t *cphci;
8137 
8138 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8139 	if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
8140 		/* do not remove the actual mdi_vhcache_phci structure */
8141 		cphci->cphci_phci = NULL;
8142 	}
8143 	rw_exit(&vhcache->vhcache_lock);
8144 }
8145 
8146 static void
init_vhcache_lookup_token(mdi_vhcache_lookup_token_t * dst,mdi_vhcache_lookup_token_t * src)8147 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
8148     mdi_vhcache_lookup_token_t *src)
8149 {
8150 	if (src == NULL) {
8151 		dst->lt_cct = NULL;
8152 		dst->lt_cct_lookup_time = 0;
8153 	} else {
8154 		dst->lt_cct = src->lt_cct;
8155 		dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
8156 	}
8157 }
8158 
8159 /*
8160  * Look up vhcache client for the specified client.
8161  */
8162 static mdi_vhcache_client_t *
lookup_vhcache_client(mdi_vhci_cache_t * vhcache,char * ct_name,char * ct_addr,mdi_vhcache_lookup_token_t * token)8163 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
8164     mdi_vhcache_lookup_token_t *token)
8165 {
8166 	mod_hash_val_t hv;
8167 	char *name_addr;
8168 	int len;
8169 
8170 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8171 
8172 	/*
8173 	 * If no vhcache clean occurred since the last lookup, we can
8174 	 * simply return the cct from the last lookup operation.
8175 	 * It works because ccts are never freed except during the vhcache
8176 	 * cleanup operation.
8177 	 */
8178 	if (token != NULL &&
8179 	    vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
8180 		return (token->lt_cct);
8181 
8182 	name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
8183 	if (mod_hash_find(vhcache->vhcache_client_hash,
8184 	    (mod_hash_key_t)name_addr, &hv) == 0) {
8185 		if (token) {
8186 			token->lt_cct = (mdi_vhcache_client_t *)hv;
8187 			token->lt_cct_lookup_time = ddi_get_lbolt64();
8188 		}
8189 	} else {
8190 		if (token) {
8191 			token->lt_cct = NULL;
8192 			token->lt_cct_lookup_time = 0;
8193 		}
8194 		hv = NULL;
8195 	}
8196 	kmem_free(name_addr, len);
8197 	return ((mdi_vhcache_client_t *)hv);
8198 }
8199 
8200 /*
8201  * Add the specified path to the vhci cache if not already present.
8202  * Also add the vhcache client for the client corresponding to this path
8203  * if it doesn't already exist.
8204  */
8205 static void
vhcache_pi_add(mdi_vhci_config_t * vhc,struct mdi_pathinfo * pip)8206 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8207 {
8208 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8209 	mdi_vhcache_client_t *cct;
8210 	mdi_vhcache_pathinfo_t *cpi;
8211 	mdi_phci_t *ph = pip->pi_phci;
8212 	mdi_client_t *ct = pip->pi_client;
8213 	int cache_updated = 0;
8214 
8215 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8216 
8217 	/* if vhcache client for this pip doesn't already exist, add it */
8218 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8219 	    NULL)) == NULL) {
8220 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
8221 		cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
8222 		    ct->ct_guid, NULL);
8223 		enqueue_vhcache_client(vhcache, cct);
8224 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
8225 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
8226 		cache_updated = 1;
8227 	}
8228 
8229 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8230 		if (cpi->cpi_cphci->cphci_phci == ph &&
8231 		    strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
8232 			cpi->cpi_pip = pip;
8233 			if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
8234 				cpi->cpi_flags &=
8235 				    ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8236 				sort_vhcache_paths(cct);
8237 				cache_updated = 1;
8238 			}
8239 			break;
8240 		}
8241 	}
8242 
8243 	if (cpi == NULL) {
8244 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
8245 		cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
8246 		cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
8247 		ASSERT(cpi->cpi_cphci != NULL);
8248 		cpi->cpi_pip = pip;
8249 		enqueue_vhcache_pathinfo(cct, cpi);
8250 		cache_updated = 1;
8251 	}
8252 
8253 	rw_exit(&vhcache->vhcache_lock);
8254 
8255 	if (cache_updated)
8256 		vhcache_dirty(vhc);
8257 }
8258 
8259 /*
8260  * Remove the reference to the specified path from the vhci cache.
8261  */
8262 static void
vhcache_pi_remove(mdi_vhci_config_t * vhc,struct mdi_pathinfo * pip)8263 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8264 {
8265 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8266 	mdi_client_t *ct = pip->pi_client;
8267 	mdi_vhcache_client_t *cct;
8268 	mdi_vhcache_pathinfo_t *cpi;
8269 
8270 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8271 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8272 	    NULL)) != NULL) {
8273 		for (cpi = cct->cct_cpi_head; cpi != NULL;
8274 		    cpi = cpi->cpi_next) {
8275 			if (cpi->cpi_pip == pip) {
8276 				cpi->cpi_pip = NULL;
8277 				break;
8278 			}
8279 		}
8280 	}
8281 	rw_exit(&vhcache->vhcache_lock);
8282 }
8283 
8284 /*
8285  * Flush the vhci cache to disk.
8286  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
8287  */
8288 static int
flush_vhcache(mdi_vhci_config_t * vhc,int force_flag)8289 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
8290 {
8291 	nvlist_t *nvl;
8292 	int err;
8293 	int rv;
8294 
8295 	/*
8296 	 * It is possible that the system may shutdown before
8297 	 * i_ddi_io_initialized (during stmsboot for example). To allow for
8298 	 * flushing the cache in this case do not check for
8299 	 * i_ddi_io_initialized when force flag is set.
8300 	 */
8301 	if (force_flag == 0 && !i_ddi_io_initialized())
8302 		return (MDI_FAILURE);
8303 
8304 	if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
8305 		err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
8306 		nvlist_free(nvl);
8307 	} else
8308 		err = EFAULT;
8309 
8310 	rv = MDI_SUCCESS;
8311 	mutex_enter(&vhc->vhc_lock);
8312 	if (err != 0) {
8313 		if (err == EROFS) {
8314 			vhc->vhc_flags |= MDI_VHC_READONLY_FS;
8315 			vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
8316 			    MDI_VHC_VHCACHE_DIRTY);
8317 		} else {
8318 			if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
8319 				cmn_err(CE_CONT, "%s: update failed\n",
8320 				    vhc->vhc_vhcache_filename);
8321 				vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
8322 			}
8323 			rv = MDI_FAILURE;
8324 		}
8325 	} else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
8326 		cmn_err(CE_CONT,
8327 		    "%s: update now ok\n", vhc->vhc_vhcache_filename);
8328 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
8329 	}
8330 	mutex_exit(&vhc->vhc_lock);
8331 
8332 	return (rv);
8333 }
8334 
8335 /*
8336  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
8337  * Exits itself if left idle for the idle timeout period.
8338  */
8339 static void
vhcache_flush_thread(void * arg)8340 vhcache_flush_thread(void *arg)
8341 {
8342 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8343 	clock_t idle_time, quit_at_ticks;
8344 	callb_cpr_t cprinfo;
8345 
8346 	/* number of seconds to sleep idle before exiting */
8347 	idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
8348 
8349 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8350 	    "mdi_vhcache_flush");
8351 	mutex_enter(&vhc->vhc_lock);
8352 	for (; ; ) {
8353 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8354 		    (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
8355 			if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
8356 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
8357 				(void) cv_timedwait(&vhc->vhc_cv,
8358 				    &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
8359 				CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8360 			} else {
8361 				vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
8362 				mutex_exit(&vhc->vhc_lock);
8363 
8364 				if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
8365 					vhcache_dirty(vhc);
8366 
8367 				mutex_enter(&vhc->vhc_lock);
8368 			}
8369 		}
8370 
8371 		quit_at_ticks = ddi_get_lbolt() + idle_time;
8372 
8373 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8374 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
8375 		    ddi_get_lbolt() < quit_at_ticks) {
8376 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
8377 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8378 			    quit_at_ticks);
8379 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8380 		}
8381 
8382 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8383 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
8384 			goto out;
8385 	}
8386 
8387 out:
8388 	vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
8389 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8390 	CALLB_CPR_EXIT(&cprinfo);
8391 }
8392 
8393 /*
8394  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
8395  */
8396 static void
vhcache_dirty(mdi_vhci_config_t * vhc)8397 vhcache_dirty(mdi_vhci_config_t *vhc)
8398 {
8399 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8400 	int create_thread;
8401 
8402 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8403 	/* do not flush cache until the cache is fully built */
8404 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8405 		rw_exit(&vhcache->vhcache_lock);
8406 		return;
8407 	}
8408 	rw_exit(&vhcache->vhcache_lock);
8409 
8410 	mutex_enter(&vhc->vhc_lock);
8411 	if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
8412 		mutex_exit(&vhc->vhc_lock);
8413 		return;
8414 	}
8415 
8416 	vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
8417 	vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
8418 	    mdi_vhcache_flush_delay * TICKS_PER_SECOND;
8419 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
8420 		cv_broadcast(&vhc->vhc_cv);
8421 		create_thread = 0;
8422 	} else {
8423 		vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
8424 		create_thread = 1;
8425 	}
8426 	mutex_exit(&vhc->vhc_lock);
8427 
8428 	if (create_thread)
8429 		(void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
8430 		    0, &p0, TS_RUN, minclsyspri);
8431 }
8432 
8433 /*
8434  * phci bus config structure - one for for each phci bus config operation that
8435  * we initiate on behalf of a vhci.
8436  */
8437 typedef struct mdi_phci_bus_config_s {
8438 	char *phbc_phci_path;
8439 	struct mdi_vhci_bus_config_s *phbc_vhbusconfig;	/* vhci bus config */
8440 	struct mdi_phci_bus_config_s *phbc_next;
8441 } mdi_phci_bus_config_t;
8442 
8443 /* vhci bus config structure - one for each vhci bus config operation */
8444 typedef struct mdi_vhci_bus_config_s {
8445 	ddi_bus_config_op_t vhbc_op;	/* bus config op */
8446 	major_t vhbc_op_major;		/* bus config op major */
8447 	uint_t vhbc_op_flags;		/* bus config op flags */
8448 	kmutex_t vhbc_lock;
8449 	kcondvar_t vhbc_cv;
8450 	int vhbc_thr_count;
8451 } mdi_vhci_bus_config_t;
8452 
8453 /*
8454  * bus config the specified phci
8455  */
8456 static void
bus_config_phci(void * arg)8457 bus_config_phci(void *arg)
8458 {
8459 	mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
8460 	mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
8461 	dev_info_t *ph_dip;
8462 
8463 	/*
8464 	 * first configure all path components upto phci and then configure
8465 	 * the phci children.
8466 	 */
8467 	if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
8468 	    != NULL) {
8469 		if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
8470 		    vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
8471 			(void) ndi_devi_config_driver(ph_dip,
8472 			    vhbc->vhbc_op_flags,
8473 			    vhbc->vhbc_op_major);
8474 		} else
8475 			(void) ndi_devi_config(ph_dip,
8476 			    vhbc->vhbc_op_flags);
8477 
8478 		/* release the hold that e_ddi_hold_devi_by_path() placed */
8479 		ndi_rele_devi(ph_dip);
8480 	}
8481 
8482 	kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
8483 	kmem_free(phbc, sizeof (*phbc));
8484 
8485 	mutex_enter(&vhbc->vhbc_lock);
8486 	vhbc->vhbc_thr_count--;
8487 	if (vhbc->vhbc_thr_count == 0)
8488 		cv_broadcast(&vhbc->vhbc_cv);
8489 	mutex_exit(&vhbc->vhbc_lock);
8490 }
8491 
8492 /*
8493  * Bus config all phcis associated with the vhci in parallel.
8494  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
8495  */
8496 static void
bus_config_all_phcis(mdi_vhci_cache_t * vhcache,uint_t flags,ddi_bus_config_op_t op,major_t maj)8497 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
8498     ddi_bus_config_op_t op, major_t maj)
8499 {
8500 	mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
8501 	mdi_vhci_bus_config_t *vhbc;
8502 	mdi_vhcache_phci_t *cphci;
8503 
8504 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8505 	if (vhcache->vhcache_phci_head == NULL) {
8506 		rw_exit(&vhcache->vhcache_lock);
8507 		return;
8508 	}
8509 
8510 	vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
8511 
8512 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8513 	    cphci = cphci->cphci_next) {
8514 		/* skip phcis that haven't attached before root is available */
8515 		if (!modrootloaded && (cphci->cphci_phci == NULL))
8516 			continue;
8517 		phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
8518 		phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
8519 		    KM_SLEEP);
8520 		phbc->phbc_vhbusconfig = vhbc;
8521 		phbc->phbc_next = phbc_head;
8522 		phbc_head = phbc;
8523 		vhbc->vhbc_thr_count++;
8524 	}
8525 	rw_exit(&vhcache->vhcache_lock);
8526 
8527 	vhbc->vhbc_op = op;
8528 	vhbc->vhbc_op_major = maj;
8529 	vhbc->vhbc_op_flags = NDI_NO_EVENT |
8530 	    (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
8531 	mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
8532 	cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
8533 
8534 	/* now create threads to initiate bus config on all phcis in parallel */
8535 	for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
8536 		phbc_next = phbc->phbc_next;
8537 		if (mdi_mtc_off)
8538 			bus_config_phci((void *)phbc);
8539 		else
8540 			(void) thread_create(NULL, 0, bus_config_phci, phbc,
8541 			    0, &p0, TS_RUN, minclsyspri);
8542 	}
8543 
8544 	mutex_enter(&vhbc->vhbc_lock);
8545 	/* wait until all threads exit */
8546 	while (vhbc->vhbc_thr_count > 0)
8547 		cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
8548 	mutex_exit(&vhbc->vhbc_lock);
8549 
8550 	mutex_destroy(&vhbc->vhbc_lock);
8551 	cv_destroy(&vhbc->vhbc_cv);
8552 	kmem_free(vhbc, sizeof (*vhbc));
8553 }
8554 
8555 /*
8556  * Single threaded version of bus_config_all_phcis()
8557  */
8558 static void
st_bus_config_all_phcis(mdi_vhci_config_t * vhc,uint_t flags,ddi_bus_config_op_t op,major_t maj)8559 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
8560     ddi_bus_config_op_t op, major_t maj)
8561 {
8562 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8563 
8564 	single_threaded_vhconfig_enter(vhc);
8565 	bus_config_all_phcis(vhcache, flags, op, maj);
8566 	single_threaded_vhconfig_exit(vhc);
8567 }
8568 
8569 /*
8570  * Perform BUS_CONFIG_ONE on the specified child of the phci.
8571  * The path includes the child component in addition to the phci path.
8572  */
8573 static int
bus_config_one_phci_child(char * path)8574 bus_config_one_phci_child(char *path)
8575 {
8576 	dev_info_t *ph_dip, *child;
8577 	char *devnm;
8578 	int rv = MDI_FAILURE;
8579 
8580 	/* extract the child component of the phci */
8581 	devnm = strrchr(path, '/');
8582 	*devnm++ = '\0';
8583 
8584 	/*
8585 	 * first configure all path components upto phci and then
8586 	 * configure the phci child.
8587 	 */
8588 	if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
8589 		if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
8590 		    NDI_SUCCESS) {
8591 			/*
8592 			 * release the hold that ndi_devi_config_one() placed
8593 			 */
8594 			ndi_rele_devi(child);
8595 			rv = MDI_SUCCESS;
8596 		}
8597 
8598 		/* release the hold that e_ddi_hold_devi_by_path() placed */
8599 		ndi_rele_devi(ph_dip);
8600 	}
8601 
8602 	devnm--;
8603 	*devnm = '/';
8604 	return (rv);
8605 }
8606 
8607 /*
8608  * Build a list of phci client paths for the specified vhci client.
8609  * The list includes only those phci client paths which aren't configured yet.
8610  */
8611 static mdi_phys_path_t *
build_phclient_path_list(mdi_vhcache_client_t * cct,char * ct_name)8612 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
8613 {
8614 	mdi_vhcache_pathinfo_t *cpi;
8615 	mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
8616 	int config_path, len;
8617 
8618 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8619 		/*
8620 		 * include only those paths that aren't configured.
8621 		 */
8622 		config_path = 0;
8623 		if (cpi->cpi_pip == NULL)
8624 			config_path = 1;
8625 		else {
8626 			MDI_PI_LOCK(cpi->cpi_pip);
8627 			if (MDI_PI_IS_INIT(cpi->cpi_pip))
8628 				config_path = 1;
8629 			MDI_PI_UNLOCK(cpi->cpi_pip);
8630 		}
8631 
8632 		if (config_path) {
8633 			pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
8634 			len = strlen(cpi->cpi_cphci->cphci_path) +
8635 			    strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
8636 			pp->phys_path = kmem_alloc(len, KM_SLEEP);
8637 			(void) snprintf(pp->phys_path, len, "%s/%s@%s",
8638 			    cpi->cpi_cphci->cphci_path, ct_name,
8639 			    cpi->cpi_addr);
8640 			pp->phys_path_next = NULL;
8641 
8642 			if (pp_head == NULL)
8643 				pp_head = pp;
8644 			else
8645 				pp_tail->phys_path_next = pp;
8646 			pp_tail = pp;
8647 		}
8648 	}
8649 
8650 	return (pp_head);
8651 }
8652 
8653 /*
8654  * Free the memory allocated for phci client path list.
8655  */
8656 static void
free_phclient_path_list(mdi_phys_path_t * pp_head)8657 free_phclient_path_list(mdi_phys_path_t *pp_head)
8658 {
8659 	mdi_phys_path_t *pp, *pp_next;
8660 
8661 	for (pp = pp_head; pp != NULL; pp = pp_next) {
8662 		pp_next = pp->phys_path_next;
8663 		kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
8664 		kmem_free(pp, sizeof (*pp));
8665 	}
8666 }
8667 
8668 /*
8669  * Allocated async client structure and initialize with the specified values.
8670  */
8671 static mdi_async_client_config_t *
alloc_async_client_config(char * ct_name,char * ct_addr,mdi_phys_path_t * pp_head,mdi_vhcache_lookup_token_t * tok)8672 alloc_async_client_config(char *ct_name, char *ct_addr,
8673     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8674 {
8675 	mdi_async_client_config_t *acc;
8676 
8677 	acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
8678 	acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
8679 	acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
8680 	acc->acc_phclient_path_list_head = pp_head;
8681 	init_vhcache_lookup_token(&acc->acc_token, tok);
8682 	acc->acc_next = NULL;
8683 	return (acc);
8684 }
8685 
8686 /*
8687  * Free the memory allocated for the async client structure and their members.
8688  */
8689 static void
free_async_client_config(mdi_async_client_config_t * acc)8690 free_async_client_config(mdi_async_client_config_t *acc)
8691 {
8692 	if (acc->acc_phclient_path_list_head)
8693 		free_phclient_path_list(acc->acc_phclient_path_list_head);
8694 	kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
8695 	kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
8696 	kmem_free(acc, sizeof (*acc));
8697 }
8698 
8699 /*
8700  * Sort vhcache pathinfos (cpis) of the specified client.
8701  * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
8702  * flag set come at the beginning of the list. All cpis which have this
8703  * flag set come at the end of the list.
8704  */
8705 static void
sort_vhcache_paths(mdi_vhcache_client_t * cct)8706 sort_vhcache_paths(mdi_vhcache_client_t *cct)
8707 {
8708 	mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
8709 
8710 	cpi_head = cct->cct_cpi_head;
8711 	cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8712 	for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8713 		cpi_next = cpi->cpi_next;
8714 		enqueue_vhcache_pathinfo(cct, cpi);
8715 	}
8716 }
8717 
8718 /*
8719  * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
8720  * every vhcache pathinfo of the specified client. If not adjust the flag
8721  * setting appropriately.
8722  *
8723  * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
8724  * on-disk vhci cache. So every time this flag is updated the cache must be
8725  * flushed.
8726  */
8727 static void
adjust_sort_vhcache_paths(mdi_vhci_config_t * vhc,char * ct_name,char * ct_addr,mdi_vhcache_lookup_token_t * tok)8728 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8729     mdi_vhcache_lookup_token_t *tok)
8730 {
8731 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8732 	mdi_vhcache_client_t *cct;
8733 	mdi_vhcache_pathinfo_t *cpi;
8734 
8735 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8736 	if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
8737 	    == NULL) {
8738 		rw_exit(&vhcache->vhcache_lock);
8739 		return;
8740 	}
8741 
8742 	/*
8743 	 * to avoid unnecessary on-disk cache updates, first check if an
8744 	 * update is really needed. If no update is needed simply return.
8745 	 */
8746 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8747 		if ((cpi->cpi_pip != NULL &&
8748 		    (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
8749 		    (cpi->cpi_pip == NULL &&
8750 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
8751 			break;
8752 		}
8753 	}
8754 	if (cpi == NULL) {
8755 		rw_exit(&vhcache->vhcache_lock);
8756 		return;
8757 	}
8758 
8759 	if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
8760 		rw_exit(&vhcache->vhcache_lock);
8761 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8762 		if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
8763 		    tok)) == NULL) {
8764 			rw_exit(&vhcache->vhcache_lock);
8765 			return;
8766 		}
8767 	}
8768 
8769 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8770 		if (cpi->cpi_pip != NULL)
8771 			cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8772 		else
8773 			cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8774 	}
8775 	sort_vhcache_paths(cct);
8776 
8777 	rw_exit(&vhcache->vhcache_lock);
8778 	vhcache_dirty(vhc);
8779 }
8780 
8781 /*
8782  * Configure all specified paths of the client.
8783  */
8784 static void
config_client_paths_sync(mdi_vhci_config_t * vhc,char * ct_name,char * ct_addr,mdi_phys_path_t * pp_head,mdi_vhcache_lookup_token_t * tok)8785 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8786     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8787 {
8788 	mdi_phys_path_t *pp;
8789 
8790 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
8791 		(void) bus_config_one_phci_child(pp->phys_path);
8792 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
8793 }
8794 
8795 /*
8796  * Dequeue elements from vhci async client config list and bus configure
8797  * their corresponding phci clients.
8798  */
8799 static void
config_client_paths_thread(void * arg)8800 config_client_paths_thread(void *arg)
8801 {
8802 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8803 	mdi_async_client_config_t *acc;
8804 	clock_t quit_at_ticks;
8805 	clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
8806 	callb_cpr_t cprinfo;
8807 
8808 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8809 	    "mdi_config_client_paths");
8810 
8811 	for (; ; ) {
8812 		quit_at_ticks = ddi_get_lbolt() + idle_time;
8813 
8814 		mutex_enter(&vhc->vhc_lock);
8815 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8816 		    vhc->vhc_acc_list_head == NULL &&
8817 		    ddi_get_lbolt() < quit_at_ticks) {
8818 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
8819 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8820 			    quit_at_ticks);
8821 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8822 		}
8823 
8824 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8825 		    vhc->vhc_acc_list_head == NULL)
8826 			goto out;
8827 
8828 		acc = vhc->vhc_acc_list_head;
8829 		vhc->vhc_acc_list_head = acc->acc_next;
8830 		if (vhc->vhc_acc_list_head == NULL)
8831 			vhc->vhc_acc_list_tail = NULL;
8832 		vhc->vhc_acc_count--;
8833 		mutex_exit(&vhc->vhc_lock);
8834 
8835 		config_client_paths_sync(vhc, acc->acc_ct_name,
8836 		    acc->acc_ct_addr, acc->acc_phclient_path_list_head,
8837 		    &acc->acc_token);
8838 
8839 		free_async_client_config(acc);
8840 	}
8841 
8842 out:
8843 	vhc->vhc_acc_thrcount--;
8844 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8845 	CALLB_CPR_EXIT(&cprinfo);
8846 }
8847 
8848 /*
8849  * Arrange for all the phci client paths (pp_head) for the specified client
8850  * to be bus configured asynchronously by a thread.
8851  */
8852 static void
config_client_paths_async(mdi_vhci_config_t * vhc,char * ct_name,char * ct_addr,mdi_phys_path_t * pp_head,mdi_vhcache_lookup_token_t * tok)8853 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8854     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8855 {
8856 	mdi_async_client_config_t *acc, *newacc;
8857 	int create_thread;
8858 
8859 	if (pp_head == NULL)
8860 		return;
8861 
8862 	if (mdi_mtc_off) {
8863 		config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
8864 		free_phclient_path_list(pp_head);
8865 		return;
8866 	}
8867 
8868 	newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
8869 	ASSERT(newacc);
8870 
8871 	mutex_enter(&vhc->vhc_lock);
8872 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
8873 		if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
8874 		    strcmp(ct_addr, acc->acc_ct_addr) == 0) {
8875 			free_async_client_config(newacc);
8876 			mutex_exit(&vhc->vhc_lock);
8877 			return;
8878 		}
8879 	}
8880 
8881 	if (vhc->vhc_acc_list_head == NULL)
8882 		vhc->vhc_acc_list_head = newacc;
8883 	else
8884 		vhc->vhc_acc_list_tail->acc_next = newacc;
8885 	vhc->vhc_acc_list_tail = newacc;
8886 	vhc->vhc_acc_count++;
8887 	if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
8888 		cv_broadcast(&vhc->vhc_cv);
8889 		create_thread = 0;
8890 	} else {
8891 		vhc->vhc_acc_thrcount++;
8892 		create_thread = 1;
8893 	}
8894 	mutex_exit(&vhc->vhc_lock);
8895 
8896 	if (create_thread)
8897 		(void) thread_create(NULL, 0, config_client_paths_thread, vhc,
8898 		    0, &p0, TS_RUN, minclsyspri);
8899 }
8900 
8901 /*
8902  * Return number of online paths for the specified client.
8903  */
8904 static int
nonline_paths(mdi_vhcache_client_t * cct)8905 nonline_paths(mdi_vhcache_client_t *cct)
8906 {
8907 	mdi_vhcache_pathinfo_t *cpi;
8908 	int online_count = 0;
8909 
8910 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8911 		if (cpi->cpi_pip != NULL) {
8912 			MDI_PI_LOCK(cpi->cpi_pip);
8913 			if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
8914 				online_count++;
8915 			MDI_PI_UNLOCK(cpi->cpi_pip);
8916 		}
8917 	}
8918 
8919 	return (online_count);
8920 }
8921 
8922 /*
8923  * Bus configure all paths for the specified vhci client.
8924  * If at least one path for the client is already online, the remaining paths
8925  * will be configured asynchronously. Otherwise, it synchronously configures
8926  * the paths until at least one path is online and then rest of the paths
8927  * will be configured asynchronously.
8928  */
8929 static void
config_client_paths(mdi_vhci_config_t * vhc,char * ct_name,char * ct_addr)8930 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
8931 {
8932 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8933 	mdi_phys_path_t *pp_head, *pp;
8934 	mdi_vhcache_client_t *cct;
8935 	mdi_vhcache_lookup_token_t tok;
8936 
8937 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8938 
8939 	init_vhcache_lookup_token(&tok, NULL);
8940 
8941 	if (ct_name == NULL || ct_addr == NULL ||
8942 	    (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8943 	    == NULL ||
8944 	    (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8945 		rw_exit(&vhcache->vhcache_lock);
8946 		return;
8947 	}
8948 
8949 	/* if at least one path is online, configure the rest asynchronously */
8950 	if (nonline_paths(cct) > 0) {
8951 		rw_exit(&vhcache->vhcache_lock);
8952 		config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8953 		return;
8954 	}
8955 
8956 	rw_exit(&vhcache->vhcache_lock);
8957 
8958 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8959 		if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8960 			rw_enter(&vhcache->vhcache_lock, RW_READER);
8961 
8962 			if ((cct = lookup_vhcache_client(vhcache, ct_name,
8963 			    ct_addr, &tok)) == NULL) {
8964 				rw_exit(&vhcache->vhcache_lock);
8965 				goto out;
8966 			}
8967 
8968 			if (nonline_paths(cct) > 0 &&
8969 			    pp->phys_path_next != NULL) {
8970 				rw_exit(&vhcache->vhcache_lock);
8971 				config_client_paths_async(vhc, ct_name, ct_addr,
8972 				    pp->phys_path_next, &tok);
8973 				pp->phys_path_next = NULL;
8974 				goto out;
8975 			}
8976 
8977 			rw_exit(&vhcache->vhcache_lock);
8978 		}
8979 	}
8980 
8981 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8982 out:
8983 	free_phclient_path_list(pp_head);
8984 }
8985 
8986 static void
single_threaded_vhconfig_enter(mdi_vhci_config_t * vhc)8987 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8988 {
8989 	mutex_enter(&vhc->vhc_lock);
8990 	while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8991 		cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8992 	vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8993 	mutex_exit(&vhc->vhc_lock);
8994 }
8995 
8996 static void
single_threaded_vhconfig_exit(mdi_vhci_config_t * vhc)8997 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8998 {
8999 	mutex_enter(&vhc->vhc_lock);
9000 	vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
9001 	cv_broadcast(&vhc->vhc_cv);
9002 	mutex_exit(&vhc->vhc_lock);
9003 }
9004 
9005 typedef struct mdi_phci_driver_info {
9006 	char	*phdriver_name;	/* name of the phci driver */
9007 
9008 	/* set to non zero if the phci driver supports root device */
9009 	int	phdriver_root_support;
9010 } mdi_phci_driver_info_t;
9011 
9012 /*
9013  * vhci class and root support capability of a phci driver can be
9014  * specified using ddi-vhci-class and ddi-no-root-support properties in the
9015  * phci driver.conf file. The built-in tables below contain this information
9016  * for those phci drivers whose driver.conf files don't yet contain this info.
9017  *
9018  * All phci drivers expect iscsi have root device support.
9019  */
9020 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
9021 	{ "fp", 1 },
9022 	{ "iscsi", 0 },
9023 	{ "ibsrp", 1 }
9024 	};
9025 
9026 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
9027 
9028 static void *
mdi_realloc(void * old_ptr,size_t old_size,size_t new_size)9029 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size)
9030 {
9031 	void *new_ptr;
9032 
9033 	new_ptr = kmem_zalloc(new_size, KM_SLEEP);
9034 	if (old_ptr) {
9035 		bcopy(old_ptr, new_ptr, MIN(old_size, new_size));
9036 		kmem_free(old_ptr, old_size);
9037 	}
9038 	return (new_ptr);
9039 }
9040 
9041 static void
add_to_phci_list(char *** driver_list,int ** root_support_list,int * cur_elements,int * max_elements,char * driver_name,int root_support)9042 add_to_phci_list(char ***driver_list, int **root_support_list,
9043     int *cur_elements, int *max_elements, char *driver_name, int root_support)
9044 {
9045 	ASSERT(*cur_elements <= *max_elements);
9046 	if (*cur_elements == *max_elements) {
9047 		*max_elements += 10;
9048 		*driver_list = mdi_realloc(*driver_list,
9049 		    sizeof (char *) * (*cur_elements),
9050 		    sizeof (char *) * (*max_elements));
9051 		*root_support_list = mdi_realloc(*root_support_list,
9052 		    sizeof (int) * (*cur_elements),
9053 		    sizeof (int) * (*max_elements));
9054 	}
9055 	(*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP);
9056 	(*root_support_list)[*cur_elements] = root_support;
9057 	(*cur_elements)++;
9058 }
9059 
9060 static void
get_phci_driver_list(char * vhci_class,char *** driver_list,int ** root_support_list,int * cur_elements,int * max_elements)9061 get_phci_driver_list(char *vhci_class, char ***driver_list,
9062     int **root_support_list, int *cur_elements, int *max_elements)
9063 {
9064 	mdi_phci_driver_info_t	*st_driver_list, *p;
9065 	int		st_ndrivers, root_support, i, j, driver_conf_count;
9066 	major_t		m;
9067 	struct devnames	*dnp;
9068 	ddi_prop_t	*propp;
9069 
9070 	*driver_list = NULL;
9071 	*root_support_list = NULL;
9072 	*cur_elements = 0;
9073 	*max_elements = 0;
9074 
9075 	/* add the phci drivers derived from the phci driver.conf files */
9076 	for (m = 0; m < devcnt; m++) {
9077 		dnp = &devnamesp[m];
9078 
9079 		if (dnp->dn_flags & DN_PHCI_DRIVER) {
9080 			LOCK_DEV_OPS(&dnp->dn_lock);
9081 			if (dnp->dn_global_prop_ptr != NULL &&
9082 			    (propp = i_ddi_prop_search(DDI_DEV_T_ANY,
9083 			    DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING,
9084 			    &dnp->dn_global_prop_ptr->prop_list)) != NULL &&
9085 			    strcmp(propp->prop_val, vhci_class) == 0) {
9086 
9087 				root_support = (i_ddi_prop_search(DDI_DEV_T_ANY,
9088 				    DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT,
9089 				    &dnp->dn_global_prop_ptr->prop_list)
9090 				    == NULL) ? 1 : 0;
9091 
9092 				add_to_phci_list(driver_list, root_support_list,
9093 				    cur_elements, max_elements, dnp->dn_name,
9094 				    root_support);
9095 
9096 				UNLOCK_DEV_OPS(&dnp->dn_lock);
9097 			} else
9098 				UNLOCK_DEV_OPS(&dnp->dn_lock);
9099 		}
9100 	}
9101 
9102 	driver_conf_count = *cur_elements;
9103 
9104 	/* add the phci drivers specified in the built-in tables */
9105 	if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) {
9106 		st_driver_list = scsi_phci_driver_list;
9107 		st_ndrivers = sizeof (scsi_phci_driver_list) /
9108 		    sizeof (mdi_phci_driver_info_t);
9109 	} else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) {
9110 		st_driver_list = ib_phci_driver_list;
9111 		st_ndrivers = sizeof (ib_phci_driver_list) /
9112 		    sizeof (mdi_phci_driver_info_t);
9113 	} else {
9114 		st_driver_list = NULL;
9115 		st_ndrivers = 0;
9116 	}
9117 
9118 	for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) {
9119 		/* add this phci driver if not already added before */
9120 		for (j = 0; j < driver_conf_count; j++) {
9121 			if (strcmp((*driver_list)[j], p->phdriver_name) == 0)
9122 				break;
9123 		}
9124 		if (j == driver_conf_count) {
9125 			add_to_phci_list(driver_list, root_support_list,
9126 			    cur_elements, max_elements, p->phdriver_name,
9127 			    p->phdriver_root_support);
9128 		}
9129 	}
9130 }
9131 
9132 /*
9133  * Attach the phci driver instances associated with the specified vhci class.
9134  * If root is mounted attach all phci driver instances.
9135  * If root is not mounted, attach the instances of only those phci
9136  * drivers that have the root support.
9137  */
9138 static void
attach_phci_drivers(char * vhci_class)9139 attach_phci_drivers(char *vhci_class)
9140 {
9141 	char	**driver_list, **p;
9142 	int	*root_support_list;
9143 	int	cur_elements, max_elements, i;
9144 	major_t	m;
9145 
9146 	get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9147 	    &cur_elements, &max_elements);
9148 
9149 	for (i = 0; i < cur_elements; i++) {
9150 		if (modrootloaded || root_support_list[i]) {
9151 			m = ddi_name_to_major(driver_list[i]);
9152 			if (m != DDI_MAJOR_T_NONE &&
9153 			    ddi_hold_installed_driver(m))
9154 				ddi_rele_driver(m);
9155 		}
9156 	}
9157 
9158 	if (driver_list) {
9159 		for (i = 0, p = driver_list; i < cur_elements; i++, p++)
9160 			kmem_free(*p, strlen(*p) + 1);
9161 		kmem_free(driver_list, sizeof (char *) * max_elements);
9162 		kmem_free(root_support_list, sizeof (int) * max_elements);
9163 	}
9164 }
9165 
9166 /*
9167  * Build vhci cache:
9168  *
9169  * Attach phci driver instances and then drive BUS_CONFIG_ALL on
9170  * the phci driver instances. During this process the cache gets built.
9171  *
9172  * Cache is built fully if the root is mounted.
9173  * If the root is not mounted, phci drivers that do not have root support
9174  * are not attached. As a result the cache is built partially. The entries
9175  * in the cache reflect only those phci drivers that have root support.
9176  */
9177 static int
build_vhci_cache(mdi_vhci_t * vh)9178 build_vhci_cache(mdi_vhci_t *vh)
9179 {
9180 	mdi_vhci_config_t *vhc = vh->vh_config;
9181 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9182 
9183 	single_threaded_vhconfig_enter(vhc);
9184 
9185 	rw_enter(&vhcache->vhcache_lock, RW_READER);
9186 	if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
9187 		rw_exit(&vhcache->vhcache_lock);
9188 		single_threaded_vhconfig_exit(vhc);
9189 		return (0);
9190 	}
9191 	rw_exit(&vhcache->vhcache_lock);
9192 
9193 	attach_phci_drivers(vh->vh_class);
9194 	bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
9195 	    BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9196 
9197 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9198 	vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
9199 	rw_exit(&vhcache->vhcache_lock);
9200 
9201 	single_threaded_vhconfig_exit(vhc);
9202 	vhcache_dirty(vhc);
9203 	return (1);
9204 }
9205 
9206 /*
9207  * Determine if discovery of paths is needed.
9208  */
9209 static int
vhcache_do_discovery(mdi_vhci_config_t * vhc)9210 vhcache_do_discovery(mdi_vhci_config_t *vhc)
9211 {
9212 	int rv = 1;
9213 
9214 	mutex_enter(&vhc->vhc_lock);
9215 	if (i_ddi_io_initialized() == 0) {
9216 		if (vhc->vhc_path_discovery_boot > 0) {
9217 			vhc->vhc_path_discovery_boot--;
9218 			goto out;
9219 		}
9220 	} else {
9221 		if (vhc->vhc_path_discovery_postboot > 0) {
9222 			vhc->vhc_path_discovery_postboot--;
9223 			goto out;
9224 		}
9225 	}
9226 
9227 	/*
9228 	 * Do full path discovery at most once per mdi_path_discovery_interval.
9229 	 * This is to avoid a series of full path discoveries when opening
9230 	 * stale /dev/[r]dsk links.
9231 	 */
9232 	if (mdi_path_discovery_interval != -1 &&
9233 	    ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time)
9234 		goto out;
9235 
9236 	rv = 0;
9237 out:
9238 	mutex_exit(&vhc->vhc_lock);
9239 	return (rv);
9240 }
9241 
9242 /*
9243  * Discover all paths:
9244  *
9245  * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
9246  * driver instances. During this process all paths will be discovered.
9247  */
9248 static int
vhcache_discover_paths(mdi_vhci_t * vh)9249 vhcache_discover_paths(mdi_vhci_t *vh)
9250 {
9251 	mdi_vhci_config_t *vhc = vh->vh_config;
9252 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9253 	int rv = 0;
9254 
9255 	single_threaded_vhconfig_enter(vhc);
9256 
9257 	if (vhcache_do_discovery(vhc)) {
9258 		attach_phci_drivers(vh->vh_class);
9259 		bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
9260 		    NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9261 
9262 		mutex_enter(&vhc->vhc_lock);
9263 		vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() +
9264 		    mdi_path_discovery_interval * TICKS_PER_SECOND;
9265 		mutex_exit(&vhc->vhc_lock);
9266 		rv = 1;
9267 	}
9268 
9269 	single_threaded_vhconfig_exit(vhc);
9270 	return (rv);
9271 }
9272 
9273 /*
9274  * Generic vhci bus config implementation:
9275  *
9276  * Parameters
9277  *	vdip	vhci dip
9278  *	flags	bus config flags
9279  *	op	bus config operation
9280  *	The remaining parameters are bus config operation specific
9281  *
9282  * for BUS_CONFIG_ONE
9283  *	arg	pointer to name@addr
9284  *	child	upon successful return from this function, *child will be
9285  *		set to the configured and held devinfo child node of vdip.
9286  *	ct_addr	pointer to client address (i.e. GUID)
9287  *
9288  * for BUS_CONFIG_DRIVER
9289  *	arg	major number of the driver
9290  *	child and ct_addr parameters are ignored
9291  *
9292  * for BUS_CONFIG_ALL
9293  *	arg, child, and ct_addr parameters are ignored
9294  *
9295  * Note that for the rest of the bus config operations, this function simply
9296  * calls the framework provided default bus config routine.
9297  */
9298 int
mdi_vhci_bus_config(dev_info_t * vdip,uint_t flags,ddi_bus_config_op_t op,void * arg,dev_info_t ** child,char * ct_addr)9299 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
9300     void *arg, dev_info_t **child, char *ct_addr)
9301 {
9302 	mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9303 	mdi_vhci_config_t *vhc = vh->vh_config;
9304 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9305 	int rv = 0;
9306 	int params_valid = 0;
9307 	char *cp;
9308 
9309 	/*
9310 	 * To bus config vhcis we relay operation, possibly using another
9311 	 * thread, to phcis. The phci driver then interacts with MDI to cause
9312 	 * vhci child nodes to be enumerated under the vhci node.  Adding a
9313 	 * vhci child requires an ndi_devi_enter of the vhci. Since another
9314 	 * thread may be adding the child, to avoid deadlock we can't wait
9315 	 * for the relayed operations to complete if we have already entered
9316 	 * the vhci node.
9317 	 */
9318 	if (DEVI_BUSY_OWNED(vdip)) {
9319 		MDI_DEBUG(2, (MDI_NOTE, vdip,
9320 		    "vhci dip is busy owned %p", (void *)vdip));
9321 		goto default_bus_config;
9322 	}
9323 
9324 	rw_enter(&vhcache->vhcache_lock, RW_READER);
9325 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
9326 		rw_exit(&vhcache->vhcache_lock);
9327 		rv = build_vhci_cache(vh);
9328 		rw_enter(&vhcache->vhcache_lock, RW_READER);
9329 	}
9330 
9331 	switch (op) {
9332 	case BUS_CONFIG_ONE:
9333 		if (arg != NULL && ct_addr != NULL) {
9334 			/* extract node name */
9335 			cp = (char *)arg;
9336 			while (*cp != '\0' && *cp != '@')
9337 				cp++;
9338 			if (*cp == '@') {
9339 				params_valid = 1;
9340 				*cp = '\0';
9341 				config_client_paths(vhc, (char *)arg, ct_addr);
9342 				/* config_client_paths() releases cache_lock */
9343 				*cp = '@';
9344 				break;
9345 			}
9346 		}
9347 
9348 		rw_exit(&vhcache->vhcache_lock);
9349 		break;
9350 
9351 	case BUS_CONFIG_DRIVER:
9352 		rw_exit(&vhcache->vhcache_lock);
9353 		if (rv == 0)
9354 			st_bus_config_all_phcis(vhc, flags, op,
9355 			    (major_t)(uintptr_t)arg);
9356 		break;
9357 
9358 	case BUS_CONFIG_ALL:
9359 		rw_exit(&vhcache->vhcache_lock);
9360 		if (rv == 0)
9361 			st_bus_config_all_phcis(vhc, flags, op, -1);
9362 		break;
9363 
9364 	default:
9365 		rw_exit(&vhcache->vhcache_lock);
9366 		break;
9367 	}
9368 
9369 
9370 default_bus_config:
9371 	/*
9372 	 * All requested child nodes are enumerated under the vhci.
9373 	 * Now configure them.
9374 	 */
9375 	if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9376 	    NDI_SUCCESS) {
9377 		return (MDI_SUCCESS);
9378 	} else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
9379 		/* discover all paths and try configuring again */
9380 		if (vhcache_discover_paths(vh) &&
9381 		    ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9382 		    NDI_SUCCESS)
9383 			return (MDI_SUCCESS);
9384 	}
9385 
9386 	return (MDI_FAILURE);
9387 }
9388 
9389 /*
9390  * Read the on-disk vhci cache into an nvlist for the specified vhci class.
9391  */
9392 static nvlist_t *
read_on_disk_vhci_cache(char * vhci_class)9393 read_on_disk_vhci_cache(char *vhci_class)
9394 {
9395 	nvlist_t *nvl;
9396 	int err;
9397 	char *filename;
9398 
9399 	filename = vhclass2vhcache_filename(vhci_class);
9400 
9401 	if ((err = fread_nvlist(filename, &nvl)) == 0) {
9402 		kmem_free(filename, strlen(filename) + 1);
9403 		return (nvl);
9404 	} else if (err == EIO)
9405 		cmn_err(CE_WARN, "%s: I/O error, will recreate", filename);
9406 	else if (err == EINVAL)
9407 		cmn_err(CE_WARN,
9408 		    "%s: data file corrupted, will recreate", filename);
9409 
9410 	kmem_free(filename, strlen(filename) + 1);
9411 	return (NULL);
9412 }
9413 
9414 /*
9415  * Read on-disk vhci cache into nvlists for all vhci classes.
9416  * Called during booting by i_ddi_read_devices_files().
9417  */
9418 void
mdi_read_devices_files(void)9419 mdi_read_devices_files(void)
9420 {
9421 	int i;
9422 
9423 	for (i = 0; i < N_VHCI_CLASSES; i++)
9424 		vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
9425 }
9426 
9427 /*
9428  * Remove all stale entries from vhci cache.
9429  */
9430 static void
clean_vhcache(mdi_vhci_config_t * vhc)9431 clean_vhcache(mdi_vhci_config_t *vhc)
9432 {
9433 	mdi_vhci_cache_t	*vhcache = &vhc->vhc_vhcache;
9434 	mdi_vhcache_phci_t	*phci, *nxt_phci;
9435 	mdi_vhcache_client_t	*client, *nxt_client;
9436 	mdi_vhcache_pathinfo_t	*path, *nxt_path;
9437 
9438 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9439 
9440 	client = vhcache->vhcache_client_head;
9441 	vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
9442 	for ( ; client != NULL; client = nxt_client) {
9443 		nxt_client = client->cct_next;
9444 
9445 		path = client->cct_cpi_head;
9446 		client->cct_cpi_head = client->cct_cpi_tail = NULL;
9447 		for ( ; path != NULL; path = nxt_path) {
9448 			nxt_path = path->cpi_next;
9449 			if ((path->cpi_cphci->cphci_phci != NULL) &&
9450 			    (path->cpi_pip != NULL)) {
9451 				enqueue_tail_vhcache_pathinfo(client, path);
9452 			} else if (path->cpi_pip != NULL) {
9453 				/* Not valid to have a path without a phci. */
9454 				free_vhcache_pathinfo(path);
9455 			}
9456 		}
9457 
9458 		if (client->cct_cpi_head != NULL)
9459 			enqueue_vhcache_client(vhcache, client);
9460 		else {
9461 			(void) mod_hash_destroy(vhcache->vhcache_client_hash,
9462 			    (mod_hash_key_t)client->cct_name_addr);
9463 			free_vhcache_client(client);
9464 		}
9465 	}
9466 
9467 	phci = vhcache->vhcache_phci_head;
9468 	vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
9469 	for ( ; phci != NULL; phci = nxt_phci) {
9470 
9471 		nxt_phci = phci->cphci_next;
9472 		if (phci->cphci_phci != NULL)
9473 			enqueue_vhcache_phci(vhcache, phci);
9474 		else
9475 			free_vhcache_phci(phci);
9476 	}
9477 
9478 	vhcache->vhcache_clean_time = ddi_get_lbolt64();
9479 	rw_exit(&vhcache->vhcache_lock);
9480 	vhcache_dirty(vhc);
9481 }
9482 
9483 /*
9484  * Remove all stale entries from vhci cache.
9485  * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
9486  */
9487 void
mdi_clean_vhcache(void)9488 mdi_clean_vhcache(void)
9489 {
9490 	mdi_vhci_t *vh;
9491 
9492 	mutex_enter(&mdi_mutex);
9493 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9494 		vh->vh_refcnt++;
9495 		mutex_exit(&mdi_mutex);
9496 		clean_vhcache(vh->vh_config);
9497 		mutex_enter(&mdi_mutex);
9498 		vh->vh_refcnt--;
9499 	}
9500 	mutex_exit(&mdi_mutex);
9501 }
9502 
9503 /*
9504  * mdi_vhci_walk_clients():
9505  *		Walker routine to traverse client dev_info nodes
9506  * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
9507  * below the client, including nexus devices, which we dont want.
9508  * So we just traverse the immediate siblings, starting from 1st client.
9509  */
9510 void
mdi_vhci_walk_clients(dev_info_t * vdip,int (* f)(dev_info_t *,void *),void * arg)9511 mdi_vhci_walk_clients(dev_info_t *vdip,
9512     int (*f)(dev_info_t *, void *), void *arg)
9513 {
9514 	mdi_vhci_t	*vh = i_devi_get_vhci(vdip);
9515 	dev_info_t	*cdip;
9516 	mdi_client_t	*ct;
9517 
9518 	MDI_VHCI_CLIENT_LOCK(vh);
9519 	cdip = ddi_get_child(vdip);
9520 	while (cdip) {
9521 		ct = i_devi_get_client(cdip);
9522 		MDI_CLIENT_LOCK(ct);
9523 
9524 		if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE)
9525 			cdip = ddi_get_next_sibling(cdip);
9526 		else
9527 			cdip = NULL;
9528 
9529 		MDI_CLIENT_UNLOCK(ct);
9530 	}
9531 	MDI_VHCI_CLIENT_UNLOCK(vh);
9532 }
9533 
9534 /*
9535  * mdi_vhci_walk_phcis():
9536  *		Walker routine to traverse phci dev_info nodes
9537  */
9538 void
mdi_vhci_walk_phcis(dev_info_t * vdip,int (* f)(dev_info_t *,void *),void * arg)9539 mdi_vhci_walk_phcis(dev_info_t *vdip,
9540     int (*f)(dev_info_t *, void *), void *arg)
9541 {
9542 	mdi_vhci_t	*vh = i_devi_get_vhci(vdip);
9543 	mdi_phci_t	*ph, *next;
9544 
9545 	MDI_VHCI_PHCI_LOCK(vh);
9546 	ph = vh->vh_phci_head;
9547 	while (ph) {
9548 		MDI_PHCI_LOCK(ph);
9549 
9550 		if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE)
9551 			next = ph->ph_next;
9552 		else
9553 			next = NULL;
9554 
9555 		MDI_PHCI_UNLOCK(ph);
9556 		ph = next;
9557 	}
9558 	MDI_VHCI_PHCI_UNLOCK(vh);
9559 }
9560 
9561 
9562 /*
9563  * mdi_walk_vhcis():
9564  *		Walker routine to traverse vhci dev_info nodes
9565  */
9566 void
mdi_walk_vhcis(int (* f)(dev_info_t *,void *),void * arg)9567 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
9568 {
9569 	mdi_vhci_t	*vh = NULL;
9570 
9571 	mutex_enter(&mdi_mutex);
9572 	/*
9573 	 * Scan for already registered vhci
9574 	 */
9575 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9576 		vh->vh_refcnt++;
9577 		mutex_exit(&mdi_mutex);
9578 		if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
9579 			mutex_enter(&mdi_mutex);
9580 			vh->vh_refcnt--;
9581 			break;
9582 		} else {
9583 			mutex_enter(&mdi_mutex);
9584 			vh->vh_refcnt--;
9585 		}
9586 	}
9587 
9588 	mutex_exit(&mdi_mutex);
9589 }
9590 
9591 /*
9592  * i_mdi_log_sysevent():
9593  *		Logs events for pickup by syseventd
9594  */
9595 static void
i_mdi_log_sysevent(dev_info_t * dip,char * ph_vh_class,char * subclass)9596 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
9597 {
9598 	char		*path_name;
9599 	nvlist_t	*attr_list;
9600 
9601 	if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
9602 	    KM_SLEEP) != DDI_SUCCESS) {
9603 		goto alloc_failed;
9604 	}
9605 
9606 	path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
9607 	(void) ddi_pathname(dip, path_name);
9608 
9609 	if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
9610 	    ddi_driver_name(dip)) != DDI_SUCCESS) {
9611 		goto error;
9612 	}
9613 
9614 	if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
9615 	    (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
9616 		goto error;
9617 	}
9618 
9619 	if (nvlist_add_int32(attr_list, DDI_INSTANCE,
9620 	    (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
9621 		goto error;
9622 	}
9623 
9624 	if (nvlist_add_string(attr_list, DDI_PATHNAME,
9625 	    path_name) != DDI_SUCCESS) {
9626 		goto error;
9627 	}
9628 
9629 	if (nvlist_add_string(attr_list, DDI_CLASS,
9630 	    ph_vh_class) != DDI_SUCCESS) {
9631 		goto error;
9632 	}
9633 
9634 	(void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
9635 	    attr_list, NULL, DDI_SLEEP);
9636 
9637 error:
9638 	kmem_free(path_name, MAXPATHLEN);
9639 	nvlist_free(attr_list);
9640 	return;
9641 
9642 alloc_failed:
9643 	MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent"));
9644 }
9645 
9646 char **
mdi_get_phci_driver_list(char * vhci_class,int * ndrivers)9647 mdi_get_phci_driver_list(char *vhci_class, int	*ndrivers)
9648 {
9649 	char	**driver_list, **ret_driver_list = NULL;
9650 	int	*root_support_list;
9651 	int	cur_elements, max_elements;
9652 
9653 	get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9654 	    &cur_elements, &max_elements);
9655 
9656 
9657 	if (driver_list) {
9658 		kmem_free(root_support_list, sizeof (int) * max_elements);
9659 		ret_driver_list = mdi_realloc(driver_list, sizeof (char *)
9660 		    * max_elements, sizeof (char *) * cur_elements);
9661 	}
9662 	*ndrivers = cur_elements;
9663 
9664 	return (ret_driver_list);
9665 
9666 }
9667 
9668 void
mdi_free_phci_driver_list(char ** driver_list,int ndrivers)9669 mdi_free_phci_driver_list(char **driver_list, int ndrivers)
9670 {
9671 	char	**p;
9672 	int	i;
9673 
9674 	if (driver_list) {
9675 		for (i = 0, p = driver_list; i < ndrivers; i++, p++)
9676 			kmem_free(*p, strlen(*p) + 1);
9677 		kmem_free(driver_list, sizeof (char *) * ndrivers);
9678 	}
9679 }
9680 
9681 /*
9682  * mdi_is_dev_supported():
9683  *		function called by pHCI bus config operation to determine if a
9684  *		device should be represented as a child of the vHCI or the
9685  *		pHCI.  This decision is made by the vHCI, using cinfo idenity
9686  *		information passed by the pHCI - specifics of the cinfo
9687  *		representation are by agreement between the pHCI and vHCI.
9688  * Return Values:
9689  *		MDI_SUCCESS
9690  *		MDI_FAILURE
9691  */
9692 int
mdi_is_dev_supported(char * class,dev_info_t * pdip,void * cinfo)9693 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo)
9694 {
9695 	mdi_vhci_t	*vh;
9696 
9697 	ASSERT(class && pdip);
9698 
9699 	/*
9700 	 * For dev_supported, mdi_phci_register() must have established pdip as
9701 	 * a pHCI.
9702 	 *
9703 	 * NOTE: mdi_phci_register() does "mpxio-disable" processing, and
9704 	 * MDI_PHCI(pdip) will return false if mpxio is disabled.
9705 	 */
9706 	if (!MDI_PHCI(pdip))
9707 		return (MDI_FAILURE);
9708 
9709 	/* Return MDI_FAILURE if vHCI does not support asking the question. */
9710 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
9711 	if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) {
9712 		return (MDI_FAILURE);
9713 	}
9714 
9715 	/* Return vHCI answer */
9716 	return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo));
9717 }
9718 
9719 int
mdi_dc_return_dev_state(mdi_pathinfo_t * pip,struct devctl_iocdata * dcp)9720 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp)
9721 {
9722 	uint_t devstate = 0;
9723 	dev_info_t *cdip;
9724 
9725 	if ((pip == NULL) || (dcp == NULL))
9726 		return (MDI_FAILURE);
9727 
9728 	cdip = mdi_pi_get_client(pip);
9729 
9730 	switch (mdi_pi_get_state(pip)) {
9731 	case MDI_PATHINFO_STATE_INIT:
9732 		devstate = DEVICE_DOWN;
9733 		break;
9734 	case MDI_PATHINFO_STATE_ONLINE:
9735 		devstate = DEVICE_ONLINE;
9736 		if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED))
9737 			devstate |= DEVICE_BUSY;
9738 		break;
9739 	case MDI_PATHINFO_STATE_STANDBY:
9740 		devstate = DEVICE_ONLINE;
9741 		break;
9742 	case MDI_PATHINFO_STATE_FAULT:
9743 		devstate = DEVICE_DOWN;
9744 		break;
9745 	case MDI_PATHINFO_STATE_OFFLINE:
9746 		devstate = DEVICE_OFFLINE;
9747 		break;
9748 	default:
9749 		ASSERT(MDI_PI(pip)->pi_state);
9750 	}
9751 
9752 	if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0)
9753 		return (MDI_FAILURE);
9754 
9755 	return (MDI_SUCCESS);
9756 }
9757