1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * Multipath driver interface (MDI) implementation; see mdi_impldefs.h for a
27 * more detailed discussion of the overall mpxio architecture.
28 *
29 * Default locking order:
30 *
31 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex);
32 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex);
33 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex);
34 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex);
35 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
36 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
37 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
38 */
39
40 #include <sys/note.h>
41 #include <sys/types.h>
42 #include <sys/varargs.h>
43 #include <sys/param.h>
44 #include <sys/errno.h>
45 #include <sys/uio.h>
46 #include <sys/buf.h>
47 #include <sys/modctl.h>
48 #include <sys/open.h>
49 #include <sys/kmem.h>
50 #include <sys/poll.h>
51 #include <sys/conf.h>
52 #include <sys/bootconf.h>
53 #include <sys/cmn_err.h>
54 #include <sys/stat.h>
55 #include <sys/ddi.h>
56 #include <sys/sunddi.h>
57 #include <sys/ddipropdefs.h>
58 #include <sys/sunndi.h>
59 #include <sys/ndi_impldefs.h>
60 #include <sys/promif.h>
61 #include <sys/sunmdi.h>
62 #include <sys/mdi_impldefs.h>
63 #include <sys/taskq.h>
64 #include <sys/epm.h>
65 #include <sys/sunpm.h>
66 #include <sys/modhash.h>
67 #include <sys/disp.h>
68 #include <sys/autoconf.h>
69 #include <sys/sysmacros.h>
70
71 #ifdef DEBUG
72 #include <sys/debug.h>
73 int mdi_debug = 1;
74 int mdi_debug_logonly = 0;
75 #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel)) i_mdi_log pargs
76 #define MDI_WARN CE_WARN, __func__
77 #define MDI_NOTE CE_NOTE, __func__
78 #define MDI_CONT CE_CONT, __func__
79 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...);
80 #else /* !DEBUG */
81 #define MDI_DEBUG(dbglevel, pargs)
82 #endif /* DEBUG */
83 int mdi_debug_consoleonly = 0;
84 int mdi_delay = 3;
85
86 extern pri_t minclsyspri;
87 extern int modrootloaded;
88
89 /*
90 * Global mutex:
91 * Protects vHCI list and structure members.
92 */
93 kmutex_t mdi_mutex;
94
95 /*
96 * Registered vHCI class driver lists
97 */
98 int mdi_vhci_count;
99 mdi_vhci_t *mdi_vhci_head;
100 mdi_vhci_t *mdi_vhci_tail;
101
102 /*
103 * Client Hash Table size
104 */
105 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
106
107 /*
108 * taskq interface definitions
109 */
110 #define MDI_TASKQ_N_THREADS 8
111 #define MDI_TASKQ_PRI minclsyspri
112 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads)
113 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads)
114
115 taskq_t *mdi_taskq;
116 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
117
118 #define TICKS_PER_SECOND (drv_usectohz(1000000))
119
120 /*
121 * The data should be "quiet" for this interval (in seconds) before the
122 * vhci cached data is flushed to the disk.
123 */
124 static int mdi_vhcache_flush_delay = 10;
125
126 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
127 static int mdi_vhcache_flush_daemon_idle_time = 60;
128
129 /*
130 * MDI falls back to discovery of all paths when a bus_config_one fails.
131 * The following parameters can be used to tune this operation.
132 *
133 * mdi_path_discovery_boot
134 * Number of times path discovery will be attempted during early boot.
135 * Probably there is no reason to ever set this value to greater than one.
136 *
137 * mdi_path_discovery_postboot
138 * Number of times path discovery will be attempted after early boot.
139 * Set it to a minimum of two to allow for discovery of iscsi paths which
140 * may happen very late during booting.
141 *
142 * mdi_path_discovery_interval
143 * Minimum number of seconds MDI will wait between successive discovery
144 * of all paths. Set it to -1 to disable discovery of all paths.
145 */
146 static int mdi_path_discovery_boot = 1;
147 static int mdi_path_discovery_postboot = 2;
148 static int mdi_path_discovery_interval = 10;
149
150 /*
151 * number of seconds the asynchronous configuration thread will sleep idle
152 * before exiting.
153 */
154 static int mdi_async_config_idle_time = 600;
155
156 static int mdi_bus_config_cache_hash_size = 256;
157
158 /* turns off multithreaded configuration for certain operations */
159 static int mdi_mtc_off = 0;
160
161 /*
162 * The "path" to a pathinfo node is identical to the /devices path to a
163 * devinfo node had the device been enumerated under a pHCI instead of
164 * a vHCI. This pathinfo "path" is associated with a 'path_instance'.
165 * This association persists across create/delete of the pathinfo nodes,
166 * but not across reboot.
167 */
168 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */
169 static int mdi_pathmap_hash_size = 256;
170 static kmutex_t mdi_pathmap_mutex;
171 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */
172 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */
173 static mod_hash_t *mdi_pathmap_sbyinstance; /* inst->shortpath */
174
175 /*
176 * MDI component property name/value string definitions
177 */
178 const char *mdi_component_prop = "mpxio-component";
179 const char *mdi_component_prop_vhci = "vhci";
180 const char *mdi_component_prop_phci = "phci";
181 const char *mdi_component_prop_client = "client";
182
183 /*
184 * MDI client global unique identifier property name
185 */
186 const char *mdi_client_guid_prop = "client-guid";
187
188 /*
189 * MDI client load balancing property name/value string definitions
190 */
191 const char *mdi_load_balance = "load-balance";
192 const char *mdi_load_balance_none = "none";
193 const char *mdi_load_balance_rr = "round-robin";
194 const char *mdi_load_balance_lba = "logical-block";
195
196 /*
197 * Obsolete vHCI class definition; to be removed after Leadville update
198 */
199 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
200
201 static char vhci_greeting[] =
202 "\tThere already exists one vHCI driver for class %s\n"
203 "\tOnly one vHCI driver for each class is allowed\n";
204
205 /*
206 * Static function prototypes
207 */
208 static int i_mdi_phci_offline(dev_info_t *, uint_t);
209 static int i_mdi_client_offline(dev_info_t *, uint_t);
210 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
211 static void i_mdi_phci_post_detach(dev_info_t *,
212 ddi_detach_cmd_t, int);
213 static int i_mdi_client_pre_detach(dev_info_t *,
214 ddi_detach_cmd_t);
215 static void i_mdi_client_post_detach(dev_info_t *,
216 ddi_detach_cmd_t, int);
217 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *);
218 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *);
219 static int i_mdi_lba_lb(mdi_client_t *ct,
220 mdi_pathinfo_t **ret_pip, struct buf *buf);
221 static void i_mdi_pm_hold_client(mdi_client_t *, int);
222 static void i_mdi_pm_rele_client(mdi_client_t *, int);
223 static void i_mdi_pm_reset_client(mdi_client_t *);
224 static int i_mdi_power_all_phci(mdi_client_t *);
225 static void i_mdi_log_sysevent(dev_info_t *, char *, char *);
226
227
228 /*
229 * Internal mdi_pathinfo node functions
230 */
231 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
232
233 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *);
234 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *);
235 static mdi_phci_t *i_devi_get_phci(dev_info_t *);
236 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
237 static void i_mdi_phci_unlock(mdi_phci_t *);
238 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
239 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
240 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
241 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
242 mdi_client_t *);
243 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
244 static void i_mdi_client_remove_path(mdi_client_t *,
245 mdi_pathinfo_t *);
246
247 static int i_mdi_pi_state_change(mdi_pathinfo_t *,
248 mdi_pathinfo_state_t, int);
249 static int i_mdi_pi_offline(mdi_pathinfo_t *, int);
250 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
251 char **, int);
252 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
253 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
254 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *);
255 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
256 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
257 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
258 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *);
259 static void i_mdi_client_update_state(mdi_client_t *);
260 static int i_mdi_client_compute_state(mdi_client_t *,
261 mdi_phci_t *);
262 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
263 static void i_mdi_client_unlock(mdi_client_t *);
264 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
265 static mdi_client_t *i_devi_get_client(dev_info_t *);
266 /*
267 * NOTE: this will be removed once the NWS files are changed to use the new
268 * mdi_{enable,disable}_path interfaces
269 */
270 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
271 int, int);
272 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
273 mdi_vhci_t *vh, int flags, int op);
274 /*
275 * Failover related function prototypes
276 */
277 static int i_mdi_failover(void *);
278
279 /*
280 * misc internal functions
281 */
282 static int i_mdi_get_hash_key(char *);
283 static int i_map_nvlist_error_to_mdi(int);
284 static void i_mdi_report_path_state(mdi_client_t *,
285 mdi_pathinfo_t *);
286
287 static void setup_vhci_cache(mdi_vhci_t *);
288 static int destroy_vhci_cache(mdi_vhci_t *);
289 static int stop_vhcache_async_threads(mdi_vhci_config_t *);
290 static boolean_t stop_vhcache_flush_thread(void *, int);
291 static void free_string_array(char **, int);
292 static void free_vhcache_phci(mdi_vhcache_phci_t *);
293 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
294 static void free_vhcache_client(mdi_vhcache_client_t *);
295 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
296 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *);
297 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
298 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
299 static void vhcache_pi_add(mdi_vhci_config_t *,
300 struct mdi_pathinfo *);
301 static void vhcache_pi_remove(mdi_vhci_config_t *,
302 struct mdi_pathinfo *);
303 static void free_phclient_path_list(mdi_phys_path_t *);
304 static void sort_vhcache_paths(mdi_vhcache_client_t *);
305 static int flush_vhcache(mdi_vhci_config_t *, int);
306 static void vhcache_dirty(mdi_vhci_config_t *);
307 static void free_async_client_config(mdi_async_client_config_t *);
308 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *);
309 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *);
310 static nvlist_t *read_on_disk_vhci_cache(char *);
311 extern int fread_nvlist(char *, nvlist_t **);
312 extern int fwrite_nvlist(char *, nvlist_t *);
313
314 /* called once when first vhci registers with mdi */
315 static void
i_mdi_init()316 i_mdi_init()
317 {
318 static int initialized = 0;
319
320 if (initialized)
321 return;
322 initialized = 1;
323
324 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
325
326 /* Create our taskq resources */
327 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
328 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
329 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
330 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */
331
332 /* Allocate ['path_instance' <-> "path"] maps */
333 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
334 mdi_pathmap_bypath = mod_hash_create_strhash(
335 "mdi_pathmap_bypath", mdi_pathmap_hash_size,
336 mod_hash_null_valdtor);
337 mdi_pathmap_byinstance = mod_hash_create_idhash(
338 "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
339 mod_hash_null_valdtor);
340 mdi_pathmap_sbyinstance = mod_hash_create_idhash(
341 "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size,
342 mod_hash_null_valdtor);
343 }
344
345 /*
346 * mdi_get_component_type():
347 * Return mpxio component type
348 * Return Values:
349 * MDI_COMPONENT_NONE
350 * MDI_COMPONENT_VHCI
351 * MDI_COMPONENT_PHCI
352 * MDI_COMPONENT_CLIENT
353 * XXX This doesn't work under multi-level MPxIO and should be
354 * removed when clients migrate mdi_component_is_*() interfaces.
355 */
356 int
mdi_get_component_type(dev_info_t * dip)357 mdi_get_component_type(dev_info_t *dip)
358 {
359 return (DEVI(dip)->devi_mdi_component);
360 }
361
362 /*
363 * mdi_vhci_register():
364 * Register a vHCI module with the mpxio framework
365 * mdi_vhci_register() is called by vHCI drivers to register the
366 * 'class_driver' vHCI driver and its MDI entrypoints with the
367 * mpxio framework. The vHCI driver must call this interface as
368 * part of its attach(9e) handler.
369 * Competing threads may try to attach mdi_vhci_register() as
370 * the vHCI drivers are loaded and attached as a result of pHCI
371 * driver instance registration (mdi_phci_register()) with the
372 * framework.
373 * Return Values:
374 * MDI_SUCCESS
375 * MDI_FAILURE
376 */
377 /*ARGSUSED*/
378 int
mdi_vhci_register(char * class,dev_info_t * vdip,mdi_vhci_ops_t * vops,int flags)379 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
380 int flags)
381 {
382 mdi_vhci_t *vh = NULL;
383
384 /* Registrant can't be older */
385 ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV);
386
387 #ifdef DEBUG
388 /*
389 * IB nexus driver is loaded only when IB hardware is present.
390 * In order to be able to do this there is a need to drive the loading
391 * and attaching of the IB nexus driver (especially when an IB hardware
392 * is dynamically plugged in) when an IB HCA driver (PHCI)
393 * is being attached. Unfortunately this gets into the limitations
394 * of devfs as there seems to be no clean way to drive configuration
395 * of a subtree from another subtree of a devfs. Hence, do not ASSERT
396 * for IB.
397 */
398 if (strcmp(class, MDI_HCI_CLASS_IB) != 0)
399 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
400 #endif
401
402 i_mdi_init();
403
404 mutex_enter(&mdi_mutex);
405 /*
406 * Scan for already registered vhci
407 */
408 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
409 if (strcmp(vh->vh_class, class) == 0) {
410 /*
411 * vHCI has already been created. Check for valid
412 * vHCI ops registration. We only support one vHCI
413 * module per class
414 */
415 if (vh->vh_ops != NULL) {
416 mutex_exit(&mdi_mutex);
417 cmn_err(CE_NOTE, vhci_greeting, class);
418 return (MDI_FAILURE);
419 }
420 break;
421 }
422 }
423
424 /*
425 * if not yet created, create the vHCI component
426 */
427 if (vh == NULL) {
428 struct client_hash *hash = NULL;
429 char *load_balance;
430
431 /*
432 * Allocate and initialize the mdi extensions
433 */
434 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
435 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
436 KM_SLEEP);
437 vh->vh_client_table = hash;
438 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
439 (void) strcpy(vh->vh_class, class);
440 vh->vh_lb = LOAD_BALANCE_RR;
441 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
442 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
443 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
444 vh->vh_lb = LOAD_BALANCE_NONE;
445 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
446 == 0) {
447 vh->vh_lb = LOAD_BALANCE_LBA;
448 }
449 ddi_prop_free(load_balance);
450 }
451
452 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
453 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
454
455 /*
456 * Store the vHCI ops vectors
457 */
458 vh->vh_dip = vdip;
459 vh->vh_ops = vops;
460
461 setup_vhci_cache(vh);
462
463 if (mdi_vhci_head == NULL) {
464 mdi_vhci_head = vh;
465 }
466 if (mdi_vhci_tail) {
467 mdi_vhci_tail->vh_next = vh;
468 }
469 mdi_vhci_tail = vh;
470 mdi_vhci_count++;
471 }
472
473 /*
474 * Claim the devfs node as a vhci component
475 */
476 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
477
478 /*
479 * Initialize our back reference from dev_info node
480 */
481 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
482 mutex_exit(&mdi_mutex);
483 return (MDI_SUCCESS);
484 }
485
486 /*
487 * mdi_vhci_unregister():
488 * Unregister a vHCI module from mpxio framework
489 * mdi_vhci_unregister() is called from the detach(9E) entrypoint
490 * of a vhci to unregister it from the framework.
491 * Return Values:
492 * MDI_SUCCESS
493 * MDI_FAILURE
494 */
495 /*ARGSUSED*/
496 int
mdi_vhci_unregister(dev_info_t * vdip,int flags)497 mdi_vhci_unregister(dev_info_t *vdip, int flags)
498 {
499 mdi_vhci_t *found, *vh, *prev = NULL;
500
501 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
502
503 /*
504 * Check for invalid VHCI
505 */
506 if ((vh = i_devi_get_vhci(vdip)) == NULL)
507 return (MDI_FAILURE);
508
509 /*
510 * Scan the list of registered vHCIs for a match
511 */
512 mutex_enter(&mdi_mutex);
513 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
514 if (found == vh)
515 break;
516 prev = found;
517 }
518
519 if (found == NULL) {
520 mutex_exit(&mdi_mutex);
521 return (MDI_FAILURE);
522 }
523
524 /*
525 * Check the vHCI, pHCI and client count. All the pHCIs and clients
526 * should have been unregistered, before a vHCI can be
527 * unregistered.
528 */
529 MDI_VHCI_PHCI_LOCK(vh);
530 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
531 MDI_VHCI_PHCI_UNLOCK(vh);
532 mutex_exit(&mdi_mutex);
533 return (MDI_FAILURE);
534 }
535 MDI_VHCI_PHCI_UNLOCK(vh);
536
537 if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
538 mutex_exit(&mdi_mutex);
539 return (MDI_FAILURE);
540 }
541
542 /*
543 * Remove the vHCI from the global list
544 */
545 if (vh == mdi_vhci_head) {
546 mdi_vhci_head = vh->vh_next;
547 } else {
548 prev->vh_next = vh->vh_next;
549 }
550 if (vh == mdi_vhci_tail) {
551 mdi_vhci_tail = prev;
552 }
553 mdi_vhci_count--;
554 mutex_exit(&mdi_mutex);
555
556 vh->vh_ops = NULL;
557 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
558 DEVI(vdip)->devi_mdi_xhci = NULL;
559 kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
560 kmem_free(vh->vh_client_table,
561 mdi_client_table_size * sizeof (struct client_hash));
562 mutex_destroy(&vh->vh_phci_mutex);
563 mutex_destroy(&vh->vh_client_mutex);
564
565 kmem_free(vh, sizeof (mdi_vhci_t));
566 return (MDI_SUCCESS);
567 }
568
569 /*
570 * i_mdi_vhci_class2vhci():
571 * Look for a matching vHCI module given a vHCI class name
572 * Return Values:
573 * Handle to a vHCI component
574 * NULL
575 */
576 static mdi_vhci_t *
i_mdi_vhci_class2vhci(char * class)577 i_mdi_vhci_class2vhci(char *class)
578 {
579 mdi_vhci_t *vh = NULL;
580
581 ASSERT(!MUTEX_HELD(&mdi_mutex));
582
583 mutex_enter(&mdi_mutex);
584 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
585 if (strcmp(vh->vh_class, class) == 0) {
586 break;
587 }
588 }
589 mutex_exit(&mdi_mutex);
590 return (vh);
591 }
592
593 /*
594 * i_devi_get_vhci():
595 * Utility function to get the handle to a vHCI component
596 * Return Values:
597 * Handle to a vHCI component
598 * NULL
599 */
600 mdi_vhci_t *
i_devi_get_vhci(dev_info_t * vdip)601 i_devi_get_vhci(dev_info_t *vdip)
602 {
603 mdi_vhci_t *vh = NULL;
604 if (MDI_VHCI(vdip)) {
605 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
606 }
607 return (vh);
608 }
609
610 /*
611 * mdi_phci_register():
612 * Register a pHCI module with mpxio framework
613 * mdi_phci_register() is called by pHCI drivers to register with
614 * the mpxio framework and a specific 'class_driver' vHCI. The
615 * pHCI driver must call this interface as part of its attach(9e)
616 * handler.
617 * Return Values:
618 * MDI_SUCCESS
619 * MDI_FAILURE
620 */
621 /*ARGSUSED*/
622 int
mdi_phci_register(char * class,dev_info_t * pdip,int flags)623 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
624 {
625 mdi_phci_t *ph;
626 mdi_vhci_t *vh;
627 char *data;
628
629 /*
630 * Some subsystems, like fcp, perform pHCI registration from a
631 * different thread than the one doing the pHCI attach(9E) - the
632 * driver attach code is waiting for this other thread to complete.
633 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
634 * (indicating that some thread has done an ndi_devi_enter of parent)
635 * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
636 */
637 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
638
639 /*
640 * Check for mpxio-disable property. Enable mpxio if the property is
641 * missing or not set to "yes".
642 * If the property is set to "yes" then emit a brief message.
643 */
644 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
645 &data) == DDI_SUCCESS)) {
646 if (strcmp(data, "yes") == 0) {
647 MDI_DEBUG(1, (MDI_CONT, pdip,
648 "?multipath capabilities disabled via %s.conf.",
649 ddi_driver_name(pdip)));
650 ddi_prop_free(data);
651 return (MDI_FAILURE);
652 }
653 ddi_prop_free(data);
654 }
655
656 /*
657 * Search for a matching vHCI
658 */
659 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
660 if (vh == NULL) {
661 return (MDI_FAILURE);
662 }
663
664 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
665 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
666 ph->ph_dip = pdip;
667 ph->ph_vhci = vh;
668 ph->ph_next = NULL;
669 ph->ph_unstable = 0;
670 ph->ph_vprivate = 0;
671 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
672
673 MDI_PHCI_LOCK(ph);
674 MDI_PHCI_SET_POWER_UP(ph);
675 MDI_PHCI_UNLOCK(ph);
676 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
677 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
678
679 vhcache_phci_add(vh->vh_config, ph);
680
681 MDI_VHCI_PHCI_LOCK(vh);
682 if (vh->vh_phci_head == NULL) {
683 vh->vh_phci_head = ph;
684 }
685 if (vh->vh_phci_tail) {
686 vh->vh_phci_tail->ph_next = ph;
687 }
688 vh->vh_phci_tail = ph;
689 vh->vh_phci_count++;
690 MDI_VHCI_PHCI_UNLOCK(vh);
691
692 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
693 return (MDI_SUCCESS);
694 }
695
696 /*
697 * mdi_phci_unregister():
698 * Unregister a pHCI module from mpxio framework
699 * mdi_phci_unregister() is called by the pHCI drivers from their
700 * detach(9E) handler to unregister their instances from the
701 * framework.
702 * Return Values:
703 * MDI_SUCCESS
704 * MDI_FAILURE
705 */
706 /*ARGSUSED*/
707 int
mdi_phci_unregister(dev_info_t * pdip,int flags)708 mdi_phci_unregister(dev_info_t *pdip, int flags)
709 {
710 mdi_vhci_t *vh;
711 mdi_phci_t *ph;
712 mdi_phci_t *tmp;
713 mdi_phci_t *prev = NULL;
714 mdi_pathinfo_t *pip;
715
716 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
717
718 ph = i_devi_get_phci(pdip);
719 if (ph == NULL) {
720 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI"));
721 return (MDI_FAILURE);
722 }
723
724 vh = ph->ph_vhci;
725 ASSERT(vh != NULL);
726 if (vh == NULL) {
727 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI"));
728 return (MDI_FAILURE);
729 }
730
731 MDI_VHCI_PHCI_LOCK(vh);
732 tmp = vh->vh_phci_head;
733 while (tmp) {
734 if (tmp == ph) {
735 break;
736 }
737 prev = tmp;
738 tmp = tmp->ph_next;
739 }
740
741 if (ph == vh->vh_phci_head) {
742 vh->vh_phci_head = ph->ph_next;
743 } else {
744 prev->ph_next = ph->ph_next;
745 }
746
747 if (ph == vh->vh_phci_tail) {
748 vh->vh_phci_tail = prev;
749 }
750
751 vh->vh_phci_count--;
752 MDI_VHCI_PHCI_UNLOCK(vh);
753
754 /* Walk remaining pathinfo nodes and disassociate them from pHCI */
755 MDI_PHCI_LOCK(ph);
756 for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip;
757 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link)
758 MDI_PI(pip)->pi_phci = NULL;
759 MDI_PHCI_UNLOCK(ph);
760
761 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
762 ESC_DDI_INITIATOR_UNREGISTER);
763 vhcache_phci_remove(vh->vh_config, ph);
764 cv_destroy(&ph->ph_unstable_cv);
765 mutex_destroy(&ph->ph_mutex);
766 kmem_free(ph, sizeof (mdi_phci_t));
767 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
768 DEVI(pdip)->devi_mdi_xhci = NULL;
769 return (MDI_SUCCESS);
770 }
771
772 /*
773 * i_devi_get_phci():
774 * Utility function to return the phci extensions.
775 */
776 static mdi_phci_t *
i_devi_get_phci(dev_info_t * pdip)777 i_devi_get_phci(dev_info_t *pdip)
778 {
779 mdi_phci_t *ph = NULL;
780
781 if (MDI_PHCI(pdip)) {
782 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
783 }
784 return (ph);
785 }
786
787 /*
788 * Single thread mdi entry into devinfo node for modifying its children.
789 * If necessary we perform an ndi_devi_enter of the vHCI before doing
790 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one
791 * for the vHCI and one for the pHCI.
792 */
793 void
mdi_devi_enter(dev_info_t * phci_dip,int * circular)794 mdi_devi_enter(dev_info_t *phci_dip, int *circular)
795 {
796 dev_info_t *vdip;
797 int vcircular, pcircular;
798
799 /* Verify calling context */
800 ASSERT(MDI_PHCI(phci_dip));
801 vdip = mdi_devi_get_vdip(phci_dip);
802 ASSERT(vdip); /* A pHCI always has a vHCI */
803
804 /*
805 * If pHCI is detaching then the framework has already entered the
806 * vHCI on a threads that went down the code path leading to
807 * detach_node(). This framework enter of the vHCI during pHCI
808 * detach is done to avoid deadlock with vHCI power management
809 * operations which enter the vHCI and the enter down the path
810 * to the pHCI. If pHCI is detaching then we piggyback this calls
811 * enter of the vHCI on frameworks vHCI enter that has already
812 * occurred - this is OK because we know that the framework thread
813 * doing detach is waiting for our completion.
814 *
815 * We should DEVI_IS_DETACHING under an enter of the parent to avoid
816 * race with detach - but we can't do that because the framework has
817 * already entered the parent, so we have some complexity instead.
818 */
819 for (;;) {
820 if (ndi_devi_tryenter(vdip, &vcircular)) {
821 ASSERT(vcircular != -1);
822 if (DEVI_IS_DETACHING(phci_dip)) {
823 ndi_devi_exit(vdip, vcircular);
824 vcircular = -1;
825 }
826 break;
827 } else if (DEVI_IS_DETACHING(phci_dip)) {
828 vcircular = -1;
829 break;
830 } else if (servicing_interrupt()) {
831 /*
832 * Don't delay an interrupt (and ensure adaptive
833 * mutex inversion support).
834 */
835 ndi_devi_enter(vdip, &vcircular);
836 break;
837 } else {
838 delay_random(mdi_delay);
839 }
840 }
841
842 ndi_devi_enter(phci_dip, &pcircular);
843 *circular = (vcircular << 16) | (pcircular & 0xFFFF);
844 }
845
846 /*
847 * Attempt to mdi_devi_enter.
848 */
849 int
mdi_devi_tryenter(dev_info_t * phci_dip,int * circular)850 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular)
851 {
852 dev_info_t *vdip;
853 int vcircular, pcircular;
854
855 /* Verify calling context */
856 ASSERT(MDI_PHCI(phci_dip));
857 vdip = mdi_devi_get_vdip(phci_dip);
858 ASSERT(vdip); /* A pHCI always has a vHCI */
859
860 if (ndi_devi_tryenter(vdip, &vcircular)) {
861 if (ndi_devi_tryenter(phci_dip, &pcircular)) {
862 *circular = (vcircular << 16) | (pcircular & 0xFFFF);
863 return (1); /* locked */
864 }
865 ndi_devi_exit(vdip, vcircular);
866 }
867 return (0); /* busy */
868 }
869
870 /*
871 * Release mdi_devi_enter or successful mdi_devi_tryenter.
872 */
873 void
mdi_devi_exit(dev_info_t * phci_dip,int circular)874 mdi_devi_exit(dev_info_t *phci_dip, int circular)
875 {
876 dev_info_t *vdip;
877 int vcircular, pcircular;
878
879 /* Verify calling context */
880 ASSERT(MDI_PHCI(phci_dip));
881 vdip = mdi_devi_get_vdip(phci_dip);
882 ASSERT(vdip); /* A pHCI always has a vHCI */
883
884 /* extract two circular recursion values from single int */
885 pcircular = (short)(circular & 0xFFFF);
886 vcircular = (short)((circular >> 16) & 0xFFFF);
887
888 ndi_devi_exit(phci_dip, pcircular);
889 if (vcircular != -1)
890 ndi_devi_exit(vdip, vcircular);
891 }
892
893 /*
894 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
895 * around a pHCI drivers calls to mdi_pi_online/offline, after holding
896 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
897 * with vHCI power management code during path online/offline. Each
898 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
899 * occur within the scope of an active mdi_devi_enter that establishes the
900 * circular value.
901 */
902 void
mdi_devi_exit_phci(dev_info_t * phci_dip,int circular)903 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
904 {
905 int pcircular;
906
907 /* Verify calling context */
908 ASSERT(MDI_PHCI(phci_dip));
909
910 /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */
911 ndi_hold_devi(phci_dip);
912
913 pcircular = (short)(circular & 0xFFFF);
914 ndi_devi_exit(phci_dip, pcircular);
915 }
916
917 void
mdi_devi_enter_phci(dev_info_t * phci_dip,int * circular)918 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
919 {
920 int pcircular;
921
922 /* Verify calling context */
923 ASSERT(MDI_PHCI(phci_dip));
924
925 ndi_devi_enter(phci_dip, &pcircular);
926
927 /* Drop hold from mdi_devi_exit_phci. */
928 ndi_rele_devi(phci_dip);
929
930 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
931 ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
932 }
933
934 /*
935 * mdi_devi_get_vdip():
936 * given a pHCI dip return vHCI dip
937 */
938 dev_info_t *
mdi_devi_get_vdip(dev_info_t * pdip)939 mdi_devi_get_vdip(dev_info_t *pdip)
940 {
941 mdi_phci_t *ph;
942
943 ph = i_devi_get_phci(pdip);
944 if (ph && ph->ph_vhci)
945 return (ph->ph_vhci->vh_dip);
946 return (NULL);
947 }
948
949 /*
950 * mdi_devi_pdip_entered():
951 * Return 1 if we are vHCI and have done an ndi_devi_enter
952 * of a pHCI
953 */
954 int
mdi_devi_pdip_entered(dev_info_t * vdip)955 mdi_devi_pdip_entered(dev_info_t *vdip)
956 {
957 mdi_vhci_t *vh;
958 mdi_phci_t *ph;
959
960 vh = i_devi_get_vhci(vdip);
961 if (vh == NULL)
962 return (0);
963
964 MDI_VHCI_PHCI_LOCK(vh);
965 ph = vh->vh_phci_head;
966 while (ph) {
967 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
968 MDI_VHCI_PHCI_UNLOCK(vh);
969 return (1);
970 }
971 ph = ph->ph_next;
972 }
973 MDI_VHCI_PHCI_UNLOCK(vh);
974 return (0);
975 }
976
977 /*
978 * mdi_phci_path2devinfo():
979 * Utility function to search for a valid phci device given
980 * the devfs pathname.
981 */
982 dev_info_t *
mdi_phci_path2devinfo(dev_info_t * vdip,caddr_t pathname)983 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
984 {
985 char *temp_pathname;
986 mdi_vhci_t *vh;
987 mdi_phci_t *ph;
988 dev_info_t *pdip = NULL;
989
990 vh = i_devi_get_vhci(vdip);
991 ASSERT(vh != NULL);
992
993 if (vh == NULL) {
994 /*
995 * Invalid vHCI component, return failure
996 */
997 return (NULL);
998 }
999
1000 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1001 MDI_VHCI_PHCI_LOCK(vh);
1002 ph = vh->vh_phci_head;
1003 while (ph != NULL) {
1004 pdip = ph->ph_dip;
1005 ASSERT(pdip != NULL);
1006 *temp_pathname = '\0';
1007 (void) ddi_pathname(pdip, temp_pathname);
1008 if (strcmp(temp_pathname, pathname) == 0) {
1009 break;
1010 }
1011 ph = ph->ph_next;
1012 }
1013 if (ph == NULL) {
1014 pdip = NULL;
1015 }
1016 MDI_VHCI_PHCI_UNLOCK(vh);
1017 kmem_free(temp_pathname, MAXPATHLEN);
1018 return (pdip);
1019 }
1020
1021 /*
1022 * mdi_phci_get_path_count():
1023 * get number of path information nodes associated with a given
1024 * pHCI device.
1025 */
1026 int
mdi_phci_get_path_count(dev_info_t * pdip)1027 mdi_phci_get_path_count(dev_info_t *pdip)
1028 {
1029 mdi_phci_t *ph;
1030 int count = 0;
1031
1032 ph = i_devi_get_phci(pdip);
1033 if (ph != NULL) {
1034 count = ph->ph_path_count;
1035 }
1036 return (count);
1037 }
1038
1039 /*
1040 * i_mdi_phci_lock():
1041 * Lock a pHCI device
1042 * Return Values:
1043 * None
1044 * Note:
1045 * The default locking order is:
1046 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
1047 * But there are number of situations where locks need to be
1048 * grabbed in reverse order. This routine implements try and lock
1049 * mechanism depending on the requested parameter option.
1050 */
1051 static void
i_mdi_phci_lock(mdi_phci_t * ph,mdi_pathinfo_t * pip)1052 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
1053 {
1054 if (pip) {
1055 /* Reverse locking is requested. */
1056 while (MDI_PHCI_TRYLOCK(ph) == 0) {
1057 if (servicing_interrupt()) {
1058 MDI_PI_HOLD(pip);
1059 MDI_PI_UNLOCK(pip);
1060 MDI_PHCI_LOCK(ph);
1061 MDI_PI_LOCK(pip);
1062 MDI_PI_RELE(pip);
1063 break;
1064 } else {
1065 /*
1066 * tryenter failed. Try to grab again
1067 * after a small delay
1068 */
1069 MDI_PI_HOLD(pip);
1070 MDI_PI_UNLOCK(pip);
1071 delay_random(mdi_delay);
1072 MDI_PI_LOCK(pip);
1073 MDI_PI_RELE(pip);
1074 }
1075 }
1076 } else {
1077 MDI_PHCI_LOCK(ph);
1078 }
1079 }
1080
1081 /*
1082 * i_mdi_phci_unlock():
1083 * Unlock the pHCI component
1084 */
1085 static void
i_mdi_phci_unlock(mdi_phci_t * ph)1086 i_mdi_phci_unlock(mdi_phci_t *ph)
1087 {
1088 MDI_PHCI_UNLOCK(ph);
1089 }
1090
1091 /*
1092 * i_mdi_devinfo_create():
1093 * create client device's devinfo node
1094 * Return Values:
1095 * dev_info
1096 * NULL
1097 * Notes:
1098 */
1099 static dev_info_t *
i_mdi_devinfo_create(mdi_vhci_t * vh,char * name,char * guid,char ** compatible,int ncompatible)1100 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
1101 char **compatible, int ncompatible)
1102 {
1103 dev_info_t *cdip = NULL;
1104
1105 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1106
1107 /* Verify for duplicate entry */
1108 cdip = i_mdi_devinfo_find(vh, name, guid);
1109 ASSERT(cdip == NULL);
1110 if (cdip) {
1111 cmn_err(CE_WARN,
1112 "i_mdi_devinfo_create: client %s@%s already exists",
1113 name ? name : "", guid ? guid : "");
1114 }
1115
1116 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
1117 if (cdip == NULL)
1118 goto fail;
1119
1120 /*
1121 * Create component type and Global unique identifier
1122 * properties
1123 */
1124 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
1125 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
1126 goto fail;
1127 }
1128
1129 /* Decorate the node with compatible property */
1130 if (compatible &&
1131 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
1132 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
1133 goto fail;
1134 }
1135
1136 return (cdip);
1137
1138 fail:
1139 if (cdip) {
1140 (void) ndi_prop_remove_all(cdip);
1141 (void) ndi_devi_free(cdip);
1142 }
1143 return (NULL);
1144 }
1145
1146 /*
1147 * i_mdi_devinfo_find():
1148 * Find a matching devinfo node for given client node name
1149 * and its guid.
1150 * Return Values:
1151 * Handle to a dev_info node or NULL
1152 */
1153 static dev_info_t *
i_mdi_devinfo_find(mdi_vhci_t * vh,caddr_t name,char * guid)1154 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
1155 {
1156 char *data;
1157 dev_info_t *cdip = NULL;
1158 dev_info_t *ndip = NULL;
1159 int circular;
1160
1161 ndi_devi_enter(vh->vh_dip, &circular);
1162 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
1163 while ((cdip = ndip) != NULL) {
1164 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1165
1166 if (strcmp(DEVI(cdip)->devi_node_name, name)) {
1167 continue;
1168 }
1169
1170 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
1171 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
1172 &data) != DDI_PROP_SUCCESS) {
1173 continue;
1174 }
1175
1176 if (strcmp(data, guid) != 0) {
1177 ddi_prop_free(data);
1178 continue;
1179 }
1180 ddi_prop_free(data);
1181 break;
1182 }
1183 ndi_devi_exit(vh->vh_dip, circular);
1184 return (cdip);
1185 }
1186
1187 /*
1188 * i_mdi_devinfo_remove():
1189 * Remove a client device node
1190 */
1191 static int
i_mdi_devinfo_remove(dev_info_t * vdip,dev_info_t * cdip,int flags)1192 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
1193 {
1194 int rv = MDI_SUCCESS;
1195
1196 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
1197 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
1198 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE);
1199 if (rv != NDI_SUCCESS) {
1200 MDI_DEBUG(1, (MDI_NOTE, cdip,
1201 "!failed: cdip %p", (void *)cdip));
1202 }
1203 /*
1204 * Convert to MDI error code
1205 */
1206 switch (rv) {
1207 case NDI_SUCCESS:
1208 rv = MDI_SUCCESS;
1209 break;
1210 case NDI_BUSY:
1211 rv = MDI_BUSY;
1212 break;
1213 default:
1214 rv = MDI_FAILURE;
1215 break;
1216 }
1217 }
1218 return (rv);
1219 }
1220
1221 /*
1222 * i_devi_get_client()
1223 * Utility function to get mpxio component extensions
1224 */
1225 static mdi_client_t *
i_devi_get_client(dev_info_t * cdip)1226 i_devi_get_client(dev_info_t *cdip)
1227 {
1228 mdi_client_t *ct = NULL;
1229
1230 if (MDI_CLIENT(cdip)) {
1231 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1232 }
1233 return (ct);
1234 }
1235
1236 /*
1237 * i_mdi_is_child_present():
1238 * Search for the presence of client device dev_info node
1239 */
1240 static int
i_mdi_is_child_present(dev_info_t * vdip,dev_info_t * cdip)1241 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1242 {
1243 int rv = MDI_FAILURE;
1244 struct dev_info *dip;
1245 int circular;
1246
1247 ndi_devi_enter(vdip, &circular);
1248 dip = DEVI(vdip)->devi_child;
1249 while (dip) {
1250 if (dip == DEVI(cdip)) {
1251 rv = MDI_SUCCESS;
1252 break;
1253 }
1254 dip = dip->devi_sibling;
1255 }
1256 ndi_devi_exit(vdip, circular);
1257 return (rv);
1258 }
1259
1260
1261 /*
1262 * i_mdi_client_lock():
1263 * Grab client component lock
1264 * Return Values:
1265 * None
1266 * Note:
1267 * The default locking order is:
1268 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1269 * But there are number of situations where locks need to be
1270 * grabbed in reverse order. This routine implements try and lock
1271 * mechanism depending on the requested parameter option.
1272 */
1273 static void
i_mdi_client_lock(mdi_client_t * ct,mdi_pathinfo_t * pip)1274 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1275 {
1276 if (pip) {
1277 /*
1278 * Reverse locking is requested.
1279 */
1280 while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1281 if (servicing_interrupt()) {
1282 MDI_PI_HOLD(pip);
1283 MDI_PI_UNLOCK(pip);
1284 MDI_CLIENT_LOCK(ct);
1285 MDI_PI_LOCK(pip);
1286 MDI_PI_RELE(pip);
1287 break;
1288 } else {
1289 /*
1290 * tryenter failed. Try to grab again
1291 * after a small delay
1292 */
1293 MDI_PI_HOLD(pip);
1294 MDI_PI_UNLOCK(pip);
1295 delay_random(mdi_delay);
1296 MDI_PI_LOCK(pip);
1297 MDI_PI_RELE(pip);
1298 }
1299 }
1300 } else {
1301 MDI_CLIENT_LOCK(ct);
1302 }
1303 }
1304
1305 /*
1306 * i_mdi_client_unlock():
1307 * Unlock a client component
1308 */
1309 static void
i_mdi_client_unlock(mdi_client_t * ct)1310 i_mdi_client_unlock(mdi_client_t *ct)
1311 {
1312 MDI_CLIENT_UNLOCK(ct);
1313 }
1314
1315 /*
1316 * i_mdi_client_alloc():
1317 * Allocate and initialize a client structure. Caller should
1318 * hold the vhci client lock.
1319 * Return Values:
1320 * Handle to a client component
1321 */
1322 /*ARGSUSED*/
1323 static mdi_client_t *
i_mdi_client_alloc(mdi_vhci_t * vh,char * name,char * lguid)1324 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1325 {
1326 mdi_client_t *ct;
1327
1328 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1329
1330 /*
1331 * Allocate and initialize a component structure.
1332 */
1333 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1334 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1335 ct->ct_hnext = NULL;
1336 ct->ct_hprev = NULL;
1337 ct->ct_dip = NULL;
1338 ct->ct_vhci = vh;
1339 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1340 (void) strcpy(ct->ct_drvname, name);
1341 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1342 (void) strcpy(ct->ct_guid, lguid);
1343 ct->ct_cprivate = NULL;
1344 ct->ct_vprivate = NULL;
1345 ct->ct_flags = 0;
1346 ct->ct_state = MDI_CLIENT_STATE_FAILED;
1347 MDI_CLIENT_LOCK(ct);
1348 MDI_CLIENT_SET_OFFLINE(ct);
1349 MDI_CLIENT_SET_DETACH(ct);
1350 MDI_CLIENT_SET_POWER_UP(ct);
1351 MDI_CLIENT_UNLOCK(ct);
1352 ct->ct_failover_flags = 0;
1353 ct->ct_failover_status = 0;
1354 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1355 ct->ct_unstable = 0;
1356 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1357 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1358 ct->ct_lb = vh->vh_lb;
1359 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1360 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1361 ct->ct_path_count = 0;
1362 ct->ct_path_head = NULL;
1363 ct->ct_path_tail = NULL;
1364 ct->ct_path_last = NULL;
1365
1366 /*
1367 * Add this client component to our client hash queue
1368 */
1369 i_mdi_client_enlist_table(vh, ct);
1370 return (ct);
1371 }
1372
1373 /*
1374 * i_mdi_client_enlist_table():
1375 * Attach the client device to the client hash table. Caller
1376 * should hold the vhci client lock.
1377 */
1378 static void
i_mdi_client_enlist_table(mdi_vhci_t * vh,mdi_client_t * ct)1379 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1380 {
1381 int index;
1382 struct client_hash *head;
1383
1384 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1385
1386 index = i_mdi_get_hash_key(ct->ct_guid);
1387 head = &vh->vh_client_table[index];
1388 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1389 head->ct_hash_head = ct;
1390 head->ct_hash_count++;
1391 vh->vh_client_count++;
1392 }
1393
1394 /*
1395 * i_mdi_client_delist_table():
1396 * Attach the client device to the client hash table.
1397 * Caller should hold the vhci client lock.
1398 */
1399 static void
i_mdi_client_delist_table(mdi_vhci_t * vh,mdi_client_t * ct)1400 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1401 {
1402 int index;
1403 char *guid;
1404 struct client_hash *head;
1405 mdi_client_t *next;
1406 mdi_client_t *last;
1407
1408 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1409
1410 guid = ct->ct_guid;
1411 index = i_mdi_get_hash_key(guid);
1412 head = &vh->vh_client_table[index];
1413
1414 last = NULL;
1415 next = (mdi_client_t *)head->ct_hash_head;
1416 while (next != NULL) {
1417 if (next == ct) {
1418 break;
1419 }
1420 last = next;
1421 next = next->ct_hnext;
1422 }
1423
1424 if (next) {
1425 head->ct_hash_count--;
1426 if (last == NULL) {
1427 head->ct_hash_head = ct->ct_hnext;
1428 } else {
1429 last->ct_hnext = ct->ct_hnext;
1430 }
1431 ct->ct_hnext = NULL;
1432 vh->vh_client_count--;
1433 }
1434 }
1435
1436
1437 /*
1438 * i_mdi_client_free():
1439 * Free a client component
1440 */
1441 static int
i_mdi_client_free(mdi_vhci_t * vh,mdi_client_t * ct)1442 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1443 {
1444 int rv = MDI_SUCCESS;
1445 int flags = ct->ct_flags;
1446 dev_info_t *cdip;
1447 dev_info_t *vdip;
1448
1449 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1450
1451 vdip = vh->vh_dip;
1452 cdip = ct->ct_dip;
1453
1454 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1455 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1456 DEVI(cdip)->devi_mdi_client = NULL;
1457
1458 /*
1459 * Clear out back ref. to dev_info_t node
1460 */
1461 ct->ct_dip = NULL;
1462
1463 /*
1464 * Remove this client from our hash queue
1465 */
1466 i_mdi_client_delist_table(vh, ct);
1467
1468 /*
1469 * Uninitialize and free the component
1470 */
1471 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1472 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1473 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1474 cv_destroy(&ct->ct_failover_cv);
1475 cv_destroy(&ct->ct_unstable_cv);
1476 cv_destroy(&ct->ct_powerchange_cv);
1477 mutex_destroy(&ct->ct_mutex);
1478 kmem_free(ct, sizeof (*ct));
1479
1480 if (cdip != NULL) {
1481 MDI_VHCI_CLIENT_UNLOCK(vh);
1482 (void) i_mdi_devinfo_remove(vdip, cdip, flags);
1483 MDI_VHCI_CLIENT_LOCK(vh);
1484 }
1485 return (rv);
1486 }
1487
1488 /*
1489 * i_mdi_client_find():
1490 * Find the client structure corresponding to a given guid
1491 * Caller should hold the vhci client lock.
1492 */
1493 static mdi_client_t *
i_mdi_client_find(mdi_vhci_t * vh,char * cname,char * guid)1494 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1495 {
1496 int index;
1497 struct client_hash *head;
1498 mdi_client_t *ct;
1499
1500 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1501
1502 index = i_mdi_get_hash_key(guid);
1503 head = &vh->vh_client_table[index];
1504
1505 ct = head->ct_hash_head;
1506 while (ct != NULL) {
1507 if (strcmp(ct->ct_guid, guid) == 0 &&
1508 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1509 break;
1510 }
1511 ct = ct->ct_hnext;
1512 }
1513 return (ct);
1514 }
1515
1516 /*
1517 * i_mdi_client_update_state():
1518 * Compute and update client device state
1519 * Notes:
1520 * A client device can be in any of three possible states:
1521 *
1522 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1523 * one online/standby paths. Can tolerate failures.
1524 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1525 * no alternate paths available as standby. A failure on the online
1526 * would result in loss of access to device data.
1527 * MDI_CLIENT_STATE_FAILED - Client device in failed state with
1528 * no paths available to access the device.
1529 */
1530 static void
i_mdi_client_update_state(mdi_client_t * ct)1531 i_mdi_client_update_state(mdi_client_t *ct)
1532 {
1533 int state;
1534
1535 ASSERT(MDI_CLIENT_LOCKED(ct));
1536 state = i_mdi_client_compute_state(ct, NULL);
1537 MDI_CLIENT_SET_STATE(ct, state);
1538 }
1539
1540 /*
1541 * i_mdi_client_compute_state():
1542 * Compute client device state
1543 *
1544 * mdi_phci_t * Pointer to pHCI structure which should
1545 * while computing the new value. Used by
1546 * i_mdi_phci_offline() to find the new
1547 * client state after DR of a pHCI.
1548 */
1549 static int
i_mdi_client_compute_state(mdi_client_t * ct,mdi_phci_t * ph)1550 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1551 {
1552 int state;
1553 int online_count = 0;
1554 int standby_count = 0;
1555 mdi_pathinfo_t *pip, *next;
1556
1557 ASSERT(MDI_CLIENT_LOCKED(ct));
1558 pip = ct->ct_path_head;
1559 while (pip != NULL) {
1560 MDI_PI_LOCK(pip);
1561 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1562 if (MDI_PI(pip)->pi_phci == ph) {
1563 MDI_PI_UNLOCK(pip);
1564 pip = next;
1565 continue;
1566 }
1567
1568 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1569 == MDI_PATHINFO_STATE_ONLINE)
1570 online_count++;
1571 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1572 == MDI_PATHINFO_STATE_STANDBY)
1573 standby_count++;
1574 MDI_PI_UNLOCK(pip);
1575 pip = next;
1576 }
1577
1578 if (online_count == 0) {
1579 if (standby_count == 0) {
1580 state = MDI_CLIENT_STATE_FAILED;
1581 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
1582 "client state failed: ct = %p", (void *)ct));
1583 } else if (standby_count == 1) {
1584 state = MDI_CLIENT_STATE_DEGRADED;
1585 } else {
1586 state = MDI_CLIENT_STATE_OPTIMAL;
1587 }
1588 } else if (online_count == 1) {
1589 if (standby_count == 0) {
1590 state = MDI_CLIENT_STATE_DEGRADED;
1591 } else {
1592 state = MDI_CLIENT_STATE_OPTIMAL;
1593 }
1594 } else {
1595 state = MDI_CLIENT_STATE_OPTIMAL;
1596 }
1597 return (state);
1598 }
1599
1600 /*
1601 * i_mdi_client2devinfo():
1602 * Utility function
1603 */
1604 dev_info_t *
i_mdi_client2devinfo(mdi_client_t * ct)1605 i_mdi_client2devinfo(mdi_client_t *ct)
1606 {
1607 return (ct->ct_dip);
1608 }
1609
1610 /*
1611 * mdi_client_path2_devinfo():
1612 * Given the parent devinfo and child devfs pathname, search for
1613 * a valid devfs node handle.
1614 */
1615 dev_info_t *
mdi_client_path2devinfo(dev_info_t * vdip,char * pathname)1616 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1617 {
1618 dev_info_t *cdip = NULL;
1619 dev_info_t *ndip = NULL;
1620 char *temp_pathname;
1621 int circular;
1622
1623 /*
1624 * Allocate temp buffer
1625 */
1626 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1627
1628 /*
1629 * Lock parent against changes
1630 */
1631 ndi_devi_enter(vdip, &circular);
1632 ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1633 while ((cdip = ndip) != NULL) {
1634 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1635
1636 *temp_pathname = '\0';
1637 (void) ddi_pathname(cdip, temp_pathname);
1638 if (strcmp(temp_pathname, pathname) == 0) {
1639 break;
1640 }
1641 }
1642 /*
1643 * Release devinfo lock
1644 */
1645 ndi_devi_exit(vdip, circular);
1646
1647 /*
1648 * Free the temp buffer
1649 */
1650 kmem_free(temp_pathname, MAXPATHLEN);
1651 return (cdip);
1652 }
1653
1654 /*
1655 * mdi_client_get_path_count():
1656 * Utility function to get number of path information nodes
1657 * associated with a given client device.
1658 */
1659 int
mdi_client_get_path_count(dev_info_t * cdip)1660 mdi_client_get_path_count(dev_info_t *cdip)
1661 {
1662 mdi_client_t *ct;
1663 int count = 0;
1664
1665 ct = i_devi_get_client(cdip);
1666 if (ct != NULL) {
1667 count = ct->ct_path_count;
1668 }
1669 return (count);
1670 }
1671
1672
1673 /*
1674 * i_mdi_get_hash_key():
1675 * Create a hash using strings as keys
1676 *
1677 */
1678 static int
i_mdi_get_hash_key(char * str)1679 i_mdi_get_hash_key(char *str)
1680 {
1681 uint32_t g, hash = 0;
1682 char *p;
1683
1684 for (p = str; *p != '\0'; p++) {
1685 g = *p;
1686 hash += g;
1687 }
1688 return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1689 }
1690
1691 /*
1692 * mdi_get_lb_policy():
1693 * Get current load balancing policy for a given client device
1694 */
1695 client_lb_t
mdi_get_lb_policy(dev_info_t * cdip)1696 mdi_get_lb_policy(dev_info_t *cdip)
1697 {
1698 client_lb_t lb = LOAD_BALANCE_NONE;
1699 mdi_client_t *ct;
1700
1701 ct = i_devi_get_client(cdip);
1702 if (ct != NULL) {
1703 lb = ct->ct_lb;
1704 }
1705 return (lb);
1706 }
1707
1708 /*
1709 * mdi_set_lb_region_size():
1710 * Set current region size for the load-balance
1711 */
1712 int
mdi_set_lb_region_size(dev_info_t * cdip,int region_size)1713 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1714 {
1715 mdi_client_t *ct;
1716 int rv = MDI_FAILURE;
1717
1718 ct = i_devi_get_client(cdip);
1719 if (ct != NULL && ct->ct_lb_args != NULL) {
1720 ct->ct_lb_args->region_size = region_size;
1721 rv = MDI_SUCCESS;
1722 }
1723 return (rv);
1724 }
1725
1726 /*
1727 * mdi_Set_lb_policy():
1728 * Set current load balancing policy for a given client device
1729 */
1730 int
mdi_set_lb_policy(dev_info_t * cdip,client_lb_t lb)1731 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1732 {
1733 mdi_client_t *ct;
1734 int rv = MDI_FAILURE;
1735
1736 ct = i_devi_get_client(cdip);
1737 if (ct != NULL) {
1738 ct->ct_lb = lb;
1739 rv = MDI_SUCCESS;
1740 }
1741 return (rv);
1742 }
1743
1744 /*
1745 * mdi_failover():
1746 * failover function called by the vHCI drivers to initiate
1747 * a failover operation. This is typically due to non-availability
1748 * of online paths to route I/O requests. Failover can be
1749 * triggered through user application also.
1750 *
1751 * The vHCI driver calls mdi_failover() to initiate a failover
1752 * operation. mdi_failover() calls back into the vHCI driver's
1753 * vo_failover() entry point to perform the actual failover
1754 * operation. The reason for requiring the vHCI driver to
1755 * initiate failover by calling mdi_failover(), instead of directly
1756 * executing vo_failover() itself, is to ensure that the mdi
1757 * framework can keep track of the client state properly.
1758 * Additionally, mdi_failover() provides as a convenience the
1759 * option of performing the failover operation synchronously or
1760 * asynchronously
1761 *
1762 * Upon successful completion of the failover operation, the
1763 * paths that were previously ONLINE will be in the STANDBY state,
1764 * and the newly activated paths will be in the ONLINE state.
1765 *
1766 * The flags modifier determines whether the activation is done
1767 * synchronously: MDI_FAILOVER_SYNC
1768 * Return Values:
1769 * MDI_SUCCESS
1770 * MDI_FAILURE
1771 * MDI_BUSY
1772 */
1773 /*ARGSUSED*/
1774 int
mdi_failover(dev_info_t * vdip,dev_info_t * cdip,int flags)1775 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1776 {
1777 int rv;
1778 mdi_client_t *ct;
1779
1780 ct = i_devi_get_client(cdip);
1781 ASSERT(ct != NULL);
1782 if (ct == NULL) {
1783 /* cdip is not a valid client device. Nothing more to do. */
1784 return (MDI_FAILURE);
1785 }
1786
1787 MDI_CLIENT_LOCK(ct);
1788
1789 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1790 /* A path to the client is being freed */
1791 MDI_CLIENT_UNLOCK(ct);
1792 return (MDI_BUSY);
1793 }
1794
1795
1796 if (MDI_CLIENT_IS_FAILED(ct)) {
1797 /*
1798 * Client is in failed state. Nothing more to do.
1799 */
1800 MDI_CLIENT_UNLOCK(ct);
1801 return (MDI_FAILURE);
1802 }
1803
1804 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1805 /*
1806 * Failover is already in progress; return BUSY
1807 */
1808 MDI_CLIENT_UNLOCK(ct);
1809 return (MDI_BUSY);
1810 }
1811 /*
1812 * Make sure that mdi_pathinfo node state changes are processed.
1813 * We do not allow failovers to progress while client path state
1814 * changes are in progress
1815 */
1816 if (ct->ct_unstable) {
1817 if (flags == MDI_FAILOVER_ASYNC) {
1818 MDI_CLIENT_UNLOCK(ct);
1819 return (MDI_BUSY);
1820 } else {
1821 while (ct->ct_unstable)
1822 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1823 }
1824 }
1825
1826 /*
1827 * Client device is in stable state. Before proceeding, perform sanity
1828 * checks again.
1829 */
1830 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1831 (!i_ddi_devi_attached(cdip))) {
1832 /*
1833 * Client is in failed state. Nothing more to do.
1834 */
1835 MDI_CLIENT_UNLOCK(ct);
1836 return (MDI_FAILURE);
1837 }
1838
1839 /*
1840 * Set the client state as failover in progress.
1841 */
1842 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1843 ct->ct_failover_flags = flags;
1844 MDI_CLIENT_UNLOCK(ct);
1845
1846 if (flags == MDI_FAILOVER_ASYNC) {
1847 /*
1848 * Submit the initiate failover request via CPR safe
1849 * taskq threads.
1850 */
1851 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1852 ct, KM_SLEEP);
1853 return (MDI_ACCEPT);
1854 } else {
1855 /*
1856 * Synchronous failover mode. Typically invoked from the user
1857 * land.
1858 */
1859 rv = i_mdi_failover(ct);
1860 }
1861 return (rv);
1862 }
1863
1864 /*
1865 * i_mdi_failover():
1866 * internal failover function. Invokes vHCI drivers failover
1867 * callback function and process the failover status
1868 * Return Values:
1869 * None
1870 *
1871 * Note: A client device in failover state can not be detached or freed.
1872 */
1873 static int
i_mdi_failover(void * arg)1874 i_mdi_failover(void *arg)
1875 {
1876 int rv = MDI_SUCCESS;
1877 mdi_client_t *ct = (mdi_client_t *)arg;
1878 mdi_vhci_t *vh = ct->ct_vhci;
1879
1880 ASSERT(!MDI_CLIENT_LOCKED(ct));
1881
1882 if (vh->vh_ops->vo_failover != NULL) {
1883 /*
1884 * Call vHCI drivers callback routine
1885 */
1886 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1887 ct->ct_failover_flags);
1888 }
1889
1890 MDI_CLIENT_LOCK(ct);
1891 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1892
1893 /*
1894 * Save the failover return status
1895 */
1896 ct->ct_failover_status = rv;
1897
1898 /*
1899 * As a result of failover, client status would have been changed.
1900 * Update the client state and wake up anyone waiting on this client
1901 * device.
1902 */
1903 i_mdi_client_update_state(ct);
1904
1905 cv_broadcast(&ct->ct_failover_cv);
1906 MDI_CLIENT_UNLOCK(ct);
1907 return (rv);
1908 }
1909
1910 /*
1911 * Load balancing is logical block.
1912 * IOs within the range described by region_size
1913 * would go on the same path. This would improve the
1914 * performance by cache-hit on some of the RAID devices.
1915 * Search only for online paths(At some point we
1916 * may want to balance across target ports).
1917 * If no paths are found then default to round-robin.
1918 */
1919 static int
i_mdi_lba_lb(mdi_client_t * ct,mdi_pathinfo_t ** ret_pip,struct buf * bp)1920 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1921 {
1922 int path_index = -1;
1923 int online_path_count = 0;
1924 int online_nonpref_path_count = 0;
1925 int region_size = ct->ct_lb_args->region_size;
1926 mdi_pathinfo_t *pip;
1927 mdi_pathinfo_t *next;
1928 int preferred, path_cnt;
1929
1930 pip = ct->ct_path_head;
1931 while (pip) {
1932 MDI_PI_LOCK(pip);
1933 if (MDI_PI(pip)->pi_state ==
1934 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1935 online_path_count++;
1936 } else if (MDI_PI(pip)->pi_state ==
1937 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1938 online_nonpref_path_count++;
1939 }
1940 next = (mdi_pathinfo_t *)
1941 MDI_PI(pip)->pi_client_link;
1942 MDI_PI_UNLOCK(pip);
1943 pip = next;
1944 }
1945 /* if found any online/preferred then use this type */
1946 if (online_path_count > 0) {
1947 path_cnt = online_path_count;
1948 preferred = 1;
1949 } else if (online_nonpref_path_count > 0) {
1950 path_cnt = online_nonpref_path_count;
1951 preferred = 0;
1952 } else {
1953 path_cnt = 0;
1954 }
1955 if (path_cnt) {
1956 path_index = (bp->b_blkno >> region_size) % path_cnt;
1957 pip = ct->ct_path_head;
1958 while (pip && path_index != -1) {
1959 MDI_PI_LOCK(pip);
1960 if (path_index == 0 &&
1961 (MDI_PI(pip)->pi_state ==
1962 MDI_PATHINFO_STATE_ONLINE) &&
1963 MDI_PI(pip)->pi_preferred == preferred) {
1964 MDI_PI_HOLD(pip);
1965 MDI_PI_UNLOCK(pip);
1966 *ret_pip = pip;
1967 return (MDI_SUCCESS);
1968 }
1969 path_index --;
1970 next = (mdi_pathinfo_t *)
1971 MDI_PI(pip)->pi_client_link;
1972 MDI_PI_UNLOCK(pip);
1973 pip = next;
1974 }
1975 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
1976 "lba %llx: path %s %p",
1977 bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip));
1978 }
1979 return (MDI_FAILURE);
1980 }
1981
1982 /*
1983 * mdi_select_path():
1984 * select a path to access a client device.
1985 *
1986 * mdi_select_path() function is called by the vHCI drivers to
1987 * select a path to route the I/O request to. The caller passes
1988 * the block I/O data transfer structure ("buf") as one of the
1989 * parameters. The mpxio framework uses the buf structure
1990 * contents to maintain per path statistics (total I/O size /
1991 * count pending). If more than one online paths are available to
1992 * select, the framework automatically selects a suitable path
1993 * for routing I/O request. If a failover operation is active for
1994 * this client device the call shall be failed with MDI_BUSY error
1995 * code.
1996 *
1997 * By default this function returns a suitable path in online
1998 * state based on the current load balancing policy. Currently
1999 * we support LOAD_BALANCE_NONE (Previously selected online path
2000 * will continue to be used till the path is usable) and
2001 * LOAD_BALANCE_RR (Online paths will be selected in a round
2002 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected
2003 * based on the logical block). The load balancing
2004 * through vHCI drivers configuration file (driver.conf).
2005 *
2006 * vHCI drivers may override this default behavior by specifying
2007 * appropriate flags. The meaning of the thrid argument depends
2008 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
2009 * then the argument is the "path instance" of the path to select.
2010 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is
2011 * "start_pip". A non NULL "start_pip" is the starting point to
2012 * walk and find the next appropriate path. The following values
2013 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an
2014 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
2015 * STANDBY path).
2016 *
2017 * The non-standard behavior is used by the scsi_vhci driver,
2018 * whenever it has to use a STANDBY/FAULTED path. Eg. during
2019 * attach of client devices (to avoid an unnecessary failover
2020 * when the STANDBY path comes up first), during failover
2021 * (to activate a STANDBY path as ONLINE).
2022 *
2023 * The selected path is returned in a a mdi_hold_path() state
2024 * (pi_ref_cnt). Caller should release the hold by calling
2025 * mdi_rele_path().
2026 *
2027 * Return Values:
2028 * MDI_SUCCESS - Completed successfully
2029 * MDI_BUSY - Client device is busy failing over
2030 * MDI_NOPATH - Client device is online, but no valid path are
2031 * available to access this client device
2032 * MDI_FAILURE - Invalid client device or state
2033 * MDI_DEVI_ONLINING
2034 * - Client device (struct dev_info state) is in
2035 * onlining state.
2036 */
2037
2038 /*ARGSUSED*/
2039 int
mdi_select_path(dev_info_t * cdip,struct buf * bp,int flags,void * arg,mdi_pathinfo_t ** ret_pip)2040 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
2041 void *arg, mdi_pathinfo_t **ret_pip)
2042 {
2043 mdi_client_t *ct;
2044 mdi_pathinfo_t *pip;
2045 mdi_pathinfo_t *next;
2046 mdi_pathinfo_t *head;
2047 mdi_pathinfo_t *start;
2048 client_lb_t lbp; /* load balancing policy */
2049 int sb = 1; /* standard behavior */
2050 int preferred = 1; /* preferred path */
2051 int cond, cont = 1;
2052 int retry = 0;
2053 mdi_pathinfo_t *start_pip; /* request starting pathinfo */
2054 int path_instance; /* request specific path instance */
2055
2056 /* determine type of arg based on flags */
2057 if (flags & MDI_SELECT_PATH_INSTANCE) {
2058 path_instance = (int)(intptr_t)arg;
2059 start_pip = NULL;
2060 } else {
2061 path_instance = 0;
2062 start_pip = (mdi_pathinfo_t *)arg;
2063 }
2064
2065 if (flags != 0) {
2066 /*
2067 * disable default behavior
2068 */
2069 sb = 0;
2070 }
2071
2072 *ret_pip = NULL;
2073 ct = i_devi_get_client(cdip);
2074 if (ct == NULL) {
2075 /* mdi extensions are NULL, Nothing more to do */
2076 return (MDI_FAILURE);
2077 }
2078
2079 MDI_CLIENT_LOCK(ct);
2080
2081 if (sb) {
2082 if (MDI_CLIENT_IS_FAILED(ct)) {
2083 /*
2084 * Client is not ready to accept any I/O requests.
2085 * Fail this request.
2086 */
2087 MDI_DEBUG(2, (MDI_NOTE, cdip,
2088 "client state offline ct = %p", (void *)ct));
2089 MDI_CLIENT_UNLOCK(ct);
2090 return (MDI_FAILURE);
2091 }
2092
2093 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
2094 /*
2095 * Check for Failover is in progress. If so tell the
2096 * caller that this device is busy.
2097 */
2098 MDI_DEBUG(2, (MDI_NOTE, cdip,
2099 "client failover in progress ct = %p",
2100 (void *)ct));
2101 MDI_CLIENT_UNLOCK(ct);
2102 return (MDI_BUSY);
2103 }
2104
2105 /*
2106 * Check to see whether the client device is attached.
2107 * If not so, let the vHCI driver manually select a path
2108 * (standby) and let the probe/attach process to continue.
2109 */
2110 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
2111 MDI_DEBUG(4, (MDI_NOTE, cdip,
2112 "devi is onlining ct = %p", (void *)ct));
2113 MDI_CLIENT_UNLOCK(ct);
2114 return (MDI_DEVI_ONLINING);
2115 }
2116 }
2117
2118 /*
2119 * Cache in the client list head. If head of the list is NULL
2120 * return MDI_NOPATH
2121 */
2122 head = ct->ct_path_head;
2123 if (head == NULL) {
2124 MDI_CLIENT_UNLOCK(ct);
2125 return (MDI_NOPATH);
2126 }
2127
2128 /* Caller is specifying a specific pathinfo path by path_instance */
2129 if (path_instance) {
2130 /* search for pathinfo with correct path_instance */
2131 for (pip = head;
2132 pip && (mdi_pi_get_path_instance(pip) != path_instance);
2133 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
2134 ;
2135
2136 /* If path can't be selected then MDI_NOPATH is returned. */
2137 if (pip == NULL) {
2138 MDI_CLIENT_UNLOCK(ct);
2139 return (MDI_NOPATH);
2140 }
2141
2142 /*
2143 * Verify state of path. When asked to select a specific
2144 * path_instance, we select the requested path in any
2145 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT.
2146 * We don't however select paths where the pHCI has detached.
2147 * NOTE: last pathinfo node of an opened client device may
2148 * exist in an OFFLINE state after the pHCI associated with
2149 * that path has detached (but pi_phci will be NULL if that
2150 * has occurred).
2151 */
2152 MDI_PI_LOCK(pip);
2153 if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) ||
2154 (MDI_PI(pip)->pi_phci == NULL)) {
2155 MDI_PI_UNLOCK(pip);
2156 MDI_CLIENT_UNLOCK(ct);
2157 return (MDI_FAILURE);
2158 }
2159
2160 /* Return MDI_BUSY if we have a transient condition */
2161 if (MDI_PI_IS_TRANSIENT(pip)) {
2162 MDI_PI_UNLOCK(pip);
2163 MDI_CLIENT_UNLOCK(ct);
2164 return (MDI_BUSY);
2165 }
2166
2167 /*
2168 * Return the path in hold state. Caller should release the
2169 * lock by calling mdi_rele_path()
2170 */
2171 MDI_PI_HOLD(pip);
2172 MDI_PI_UNLOCK(pip);
2173 *ret_pip = pip;
2174 MDI_CLIENT_UNLOCK(ct);
2175 return (MDI_SUCCESS);
2176 }
2177
2178 /*
2179 * for non default behavior, bypass current
2180 * load balancing policy and always use LOAD_BALANCE_RR
2181 * except that the start point will be adjusted based
2182 * on the provided start_pip
2183 */
2184 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
2185
2186 switch (lbp) {
2187 case LOAD_BALANCE_NONE:
2188 /*
2189 * Load balancing is None or Alternate path mode
2190 * Start looking for a online mdi_pathinfo node starting from
2191 * last known selected path
2192 */
2193 preferred = 1;
2194 pip = (mdi_pathinfo_t *)ct->ct_path_last;
2195 if (pip == NULL) {
2196 pip = head;
2197 }
2198 start = pip;
2199 do {
2200 MDI_PI_LOCK(pip);
2201 /*
2202 * No need to explicitly check if the path is disabled.
2203 * Since we are checking for state == ONLINE and the
2204 * same variable is used for DISABLE/ENABLE information.
2205 */
2206 if ((MDI_PI(pip)->pi_state ==
2207 MDI_PATHINFO_STATE_ONLINE) &&
2208 preferred == MDI_PI(pip)->pi_preferred) {
2209 /*
2210 * Return the path in hold state. Caller should
2211 * release the lock by calling mdi_rele_path()
2212 */
2213 MDI_PI_HOLD(pip);
2214 MDI_PI_UNLOCK(pip);
2215 ct->ct_path_last = pip;
2216 *ret_pip = pip;
2217 MDI_CLIENT_UNLOCK(ct);
2218 return (MDI_SUCCESS);
2219 }
2220
2221 /*
2222 * Path is busy.
2223 */
2224 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2225 MDI_PI_IS_TRANSIENT(pip))
2226 retry = 1;
2227 /*
2228 * Keep looking for a next available online path
2229 */
2230 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2231 if (next == NULL) {
2232 next = head;
2233 }
2234 MDI_PI_UNLOCK(pip);
2235 pip = next;
2236 if (start == pip && preferred) {
2237 preferred = 0;
2238 } else if (start == pip && !preferred) {
2239 cont = 0;
2240 }
2241 } while (cont);
2242 break;
2243
2244 case LOAD_BALANCE_LBA:
2245 /*
2246 * Make sure we are looking
2247 * for an online path. Otherwise, if it is for a STANDBY
2248 * path request, it will go through and fetch an ONLINE
2249 * path which is not desirable.
2250 */
2251 if ((ct->ct_lb_args != NULL) &&
2252 (ct->ct_lb_args->region_size) && bp &&
2253 (sb || (flags == MDI_SELECT_ONLINE_PATH))) {
2254 if (i_mdi_lba_lb(ct, ret_pip, bp)
2255 == MDI_SUCCESS) {
2256 MDI_CLIENT_UNLOCK(ct);
2257 return (MDI_SUCCESS);
2258 }
2259 }
2260 /* FALLTHROUGH */
2261 case LOAD_BALANCE_RR:
2262 /*
2263 * Load balancing is Round Robin. Start looking for a online
2264 * mdi_pathinfo node starting from last known selected path
2265 * as the start point. If override flags are specified,
2266 * process accordingly.
2267 * If the search is already in effect(start_pip not null),
2268 * then lets just use the same path preference to continue the
2269 * traversal.
2270 */
2271
2272 if (start_pip != NULL) {
2273 preferred = MDI_PI(start_pip)->pi_preferred;
2274 } else {
2275 preferred = 1;
2276 }
2277
2278 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
2279 if (start == NULL) {
2280 pip = head;
2281 } else {
2282 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
2283 if (pip == NULL) {
2284 if ( flags & MDI_SELECT_NO_PREFERRED) {
2285 /*
2286 * Return since we hit the end of list
2287 */
2288 MDI_CLIENT_UNLOCK(ct);
2289 return (MDI_NOPATH);
2290 }
2291
2292 if (!sb) {
2293 if (preferred == 0) {
2294 /*
2295 * Looks like we have completed
2296 * the traversal as preferred
2297 * value is 0. Time to bail out.
2298 */
2299 *ret_pip = NULL;
2300 MDI_CLIENT_UNLOCK(ct);
2301 return (MDI_NOPATH);
2302 } else {
2303 /*
2304 * Looks like we reached the
2305 * end of the list. Lets enable
2306 * traversal of non preferred
2307 * paths.
2308 */
2309 preferred = 0;
2310 }
2311 }
2312 pip = head;
2313 }
2314 }
2315 start = pip;
2316 do {
2317 MDI_PI_LOCK(pip);
2318 if (sb) {
2319 cond = ((MDI_PI(pip)->pi_state ==
2320 MDI_PATHINFO_STATE_ONLINE &&
2321 MDI_PI(pip)->pi_preferred ==
2322 preferred) ? 1 : 0);
2323 } else {
2324 if (flags == MDI_SELECT_ONLINE_PATH) {
2325 cond = ((MDI_PI(pip)->pi_state ==
2326 MDI_PATHINFO_STATE_ONLINE &&
2327 MDI_PI(pip)->pi_preferred ==
2328 preferred) ? 1 : 0);
2329 } else if (flags == MDI_SELECT_STANDBY_PATH) {
2330 cond = ((MDI_PI(pip)->pi_state ==
2331 MDI_PATHINFO_STATE_STANDBY &&
2332 MDI_PI(pip)->pi_preferred ==
2333 preferred) ? 1 : 0);
2334 } else if (flags == (MDI_SELECT_ONLINE_PATH |
2335 MDI_SELECT_STANDBY_PATH)) {
2336 cond = (((MDI_PI(pip)->pi_state ==
2337 MDI_PATHINFO_STATE_ONLINE ||
2338 (MDI_PI(pip)->pi_state ==
2339 MDI_PATHINFO_STATE_STANDBY)) &&
2340 MDI_PI(pip)->pi_preferred ==
2341 preferred) ? 1 : 0);
2342 } else if (flags ==
2343 (MDI_SELECT_STANDBY_PATH |
2344 MDI_SELECT_ONLINE_PATH |
2345 MDI_SELECT_USER_DISABLE_PATH)) {
2346 cond = (((MDI_PI(pip)->pi_state ==
2347 MDI_PATHINFO_STATE_ONLINE ||
2348 (MDI_PI(pip)->pi_state ==
2349 MDI_PATHINFO_STATE_STANDBY) ||
2350 (MDI_PI(pip)->pi_state ==
2351 (MDI_PATHINFO_STATE_ONLINE|
2352 MDI_PATHINFO_STATE_USER_DISABLE)) ||
2353 (MDI_PI(pip)->pi_state ==
2354 (MDI_PATHINFO_STATE_STANDBY |
2355 MDI_PATHINFO_STATE_USER_DISABLE)))&&
2356 MDI_PI(pip)->pi_preferred ==
2357 preferred) ? 1 : 0);
2358 } else if (flags ==
2359 (MDI_SELECT_STANDBY_PATH |
2360 MDI_SELECT_ONLINE_PATH |
2361 MDI_SELECT_NO_PREFERRED)) {
2362 cond = (((MDI_PI(pip)->pi_state ==
2363 MDI_PATHINFO_STATE_ONLINE) ||
2364 (MDI_PI(pip)->pi_state ==
2365 MDI_PATHINFO_STATE_STANDBY))
2366 ? 1 : 0);
2367 } else {
2368 cond = 0;
2369 }
2370 }
2371 /*
2372 * No need to explicitly check if the path is disabled.
2373 * Since we are checking for state == ONLINE and the
2374 * same variable is used for DISABLE/ENABLE information.
2375 */
2376 if (cond) {
2377 /*
2378 * Return the path in hold state. Caller should
2379 * release the lock by calling mdi_rele_path()
2380 */
2381 MDI_PI_HOLD(pip);
2382 MDI_PI_UNLOCK(pip);
2383 if (sb)
2384 ct->ct_path_last = pip;
2385 *ret_pip = pip;
2386 MDI_CLIENT_UNLOCK(ct);
2387 return (MDI_SUCCESS);
2388 }
2389 /*
2390 * Path is busy.
2391 */
2392 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2393 MDI_PI_IS_TRANSIENT(pip))
2394 retry = 1;
2395
2396 /*
2397 * Keep looking for a next available online path
2398 */
2399 do_again:
2400 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2401 if (next == NULL) {
2402 if ( flags & MDI_SELECT_NO_PREFERRED) {
2403 /*
2404 * Bail out since we hit the end of list
2405 */
2406 MDI_PI_UNLOCK(pip);
2407 break;
2408 }
2409
2410 if (!sb) {
2411 if (preferred == 1) {
2412 /*
2413 * Looks like we reached the
2414 * end of the list. Lets enable
2415 * traversal of non preferred
2416 * paths.
2417 */
2418 preferred = 0;
2419 next = head;
2420 } else {
2421 /*
2422 * We have done both the passes
2423 * Preferred as well as for
2424 * Non-preferred. Bail out now.
2425 */
2426 cont = 0;
2427 }
2428 } else {
2429 /*
2430 * Standard behavior case.
2431 */
2432 next = head;
2433 }
2434 }
2435 MDI_PI_UNLOCK(pip);
2436 if (cont == 0) {
2437 break;
2438 }
2439 pip = next;
2440
2441 if (!sb) {
2442 /*
2443 * We need to handle the selection of
2444 * non-preferred path in the following
2445 * case:
2446 *
2447 * +------+ +------+ +------+ +-----+
2448 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2449 * +------+ +------+ +------+ +-----+
2450 *
2451 * If we start the search with B, we need to
2452 * skip beyond B to pick C which is non -
2453 * preferred in the second pass. The following
2454 * test, if true, will allow us to skip over
2455 * the 'start'(B in the example) to select
2456 * other non preferred elements.
2457 */
2458 if ((start_pip != NULL) && (start_pip == pip) &&
2459 (MDI_PI(start_pip)->pi_preferred
2460 != preferred)) {
2461 /*
2462 * try again after going past the start
2463 * pip
2464 */
2465 MDI_PI_LOCK(pip);
2466 goto do_again;
2467 }
2468 } else {
2469 /*
2470 * Standard behavior case
2471 */
2472 if (start == pip && preferred) {
2473 /* look for nonpreferred paths */
2474 preferred = 0;
2475 } else if (start == pip && !preferred) {
2476 /*
2477 * Exit condition
2478 */
2479 cont = 0;
2480 }
2481 }
2482 } while (cont);
2483 break;
2484 }
2485
2486 MDI_CLIENT_UNLOCK(ct);
2487 if (retry == 1) {
2488 return (MDI_BUSY);
2489 } else {
2490 return (MDI_NOPATH);
2491 }
2492 }
2493
2494 /*
2495 * For a client, return the next available path to any phci
2496 *
2497 * Note:
2498 * Caller should hold the branch's devinfo node to get a consistent
2499 * snap shot of the mdi_pathinfo nodes.
2500 *
2501 * Please note that even the list is stable the mdi_pathinfo
2502 * node state and properties are volatile. The caller should lock
2503 * and unlock the nodes by calling mdi_pi_lock() and
2504 * mdi_pi_unlock() functions to get a stable properties.
2505 *
2506 * If there is a need to use the nodes beyond the hold of the
2507 * devinfo node period (For ex. I/O), then mdi_pathinfo node
2508 * need to be held against unexpected removal by calling
2509 * mdi_hold_path() and should be released by calling
2510 * mdi_rele_path() on completion.
2511 */
2512 mdi_pathinfo_t *
mdi_get_next_phci_path(dev_info_t * ct_dip,mdi_pathinfo_t * pip)2513 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2514 {
2515 mdi_client_t *ct;
2516
2517 if (!MDI_CLIENT(ct_dip))
2518 return (NULL);
2519
2520 /*
2521 * Walk through client link
2522 */
2523 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2524 ASSERT(ct != NULL);
2525
2526 if (pip == NULL)
2527 return ((mdi_pathinfo_t *)ct->ct_path_head);
2528
2529 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2530 }
2531
2532 /*
2533 * For a phci, return the next available path to any client
2534 * Note: ditto mdi_get_next_phci_path()
2535 */
2536 mdi_pathinfo_t *
mdi_get_next_client_path(dev_info_t * ph_dip,mdi_pathinfo_t * pip)2537 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2538 {
2539 mdi_phci_t *ph;
2540
2541 if (!MDI_PHCI(ph_dip))
2542 return (NULL);
2543
2544 /*
2545 * Walk through pHCI link
2546 */
2547 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2548 ASSERT(ph != NULL);
2549
2550 if (pip == NULL)
2551 return ((mdi_pathinfo_t *)ph->ph_path_head);
2552
2553 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2554 }
2555
2556 /*
2557 * mdi_hold_path():
2558 * Hold the mdi_pathinfo node against unwanted unexpected free.
2559 * Return Values:
2560 * None
2561 */
2562 void
mdi_hold_path(mdi_pathinfo_t * pip)2563 mdi_hold_path(mdi_pathinfo_t *pip)
2564 {
2565 if (pip) {
2566 MDI_PI_LOCK(pip);
2567 MDI_PI_HOLD(pip);
2568 MDI_PI_UNLOCK(pip);
2569 }
2570 }
2571
2572
2573 /*
2574 * mdi_rele_path():
2575 * Release the mdi_pathinfo node which was selected
2576 * through mdi_select_path() mechanism or manually held by
2577 * calling mdi_hold_path().
2578 * Return Values:
2579 * None
2580 */
2581 void
mdi_rele_path(mdi_pathinfo_t * pip)2582 mdi_rele_path(mdi_pathinfo_t *pip)
2583 {
2584 if (pip) {
2585 MDI_PI_LOCK(pip);
2586 MDI_PI_RELE(pip);
2587 if (MDI_PI(pip)->pi_ref_cnt == 0) {
2588 cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2589 }
2590 MDI_PI_UNLOCK(pip);
2591 }
2592 }
2593
2594 /*
2595 * mdi_pi_lock():
2596 * Lock the mdi_pathinfo node.
2597 * Note:
2598 * The caller should release the lock by calling mdi_pi_unlock()
2599 */
2600 void
mdi_pi_lock(mdi_pathinfo_t * pip)2601 mdi_pi_lock(mdi_pathinfo_t *pip)
2602 {
2603 ASSERT(pip != NULL);
2604 if (pip) {
2605 MDI_PI_LOCK(pip);
2606 }
2607 }
2608
2609
2610 /*
2611 * mdi_pi_unlock():
2612 * Unlock the mdi_pathinfo node.
2613 * Note:
2614 * The mdi_pathinfo node should have been locked with mdi_pi_lock()
2615 */
2616 void
mdi_pi_unlock(mdi_pathinfo_t * pip)2617 mdi_pi_unlock(mdi_pathinfo_t *pip)
2618 {
2619 ASSERT(pip != NULL);
2620 if (pip) {
2621 MDI_PI_UNLOCK(pip);
2622 }
2623 }
2624
2625 /*
2626 * mdi_pi_find():
2627 * Search the list of mdi_pathinfo nodes attached to the
2628 * pHCI/Client device node whose path address matches "paddr".
2629 * Returns a pointer to the mdi_pathinfo node if a matching node is
2630 * found.
2631 * Return Values:
2632 * mdi_pathinfo node handle
2633 * NULL
2634 * Notes:
2635 * Caller need not hold any locks to call this function.
2636 */
2637 mdi_pathinfo_t *
mdi_pi_find(dev_info_t * pdip,char * caddr,char * paddr)2638 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2639 {
2640 mdi_phci_t *ph;
2641 mdi_vhci_t *vh;
2642 mdi_client_t *ct;
2643 mdi_pathinfo_t *pip = NULL;
2644
2645 MDI_DEBUG(2, (MDI_NOTE, pdip,
2646 "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : ""));
2647 if ((pdip == NULL) || (paddr == NULL)) {
2648 return (NULL);
2649 }
2650 ph = i_devi_get_phci(pdip);
2651 if (ph == NULL) {
2652 /*
2653 * Invalid pHCI device, Nothing more to do.
2654 */
2655 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci"));
2656 return (NULL);
2657 }
2658
2659 vh = ph->ph_vhci;
2660 if (vh == NULL) {
2661 /*
2662 * Invalid vHCI device, Nothing more to do.
2663 */
2664 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci"));
2665 return (NULL);
2666 }
2667
2668 /*
2669 * Look for pathinfo node identified by paddr.
2670 */
2671 if (caddr == NULL) {
2672 /*
2673 * Find a mdi_pathinfo node under pHCI list for a matching
2674 * unit address.
2675 */
2676 MDI_PHCI_LOCK(ph);
2677 if (MDI_PHCI_IS_OFFLINE(ph)) {
2678 MDI_DEBUG(2, (MDI_WARN, pdip,
2679 "offline phci %p", (void *)ph));
2680 MDI_PHCI_UNLOCK(ph);
2681 return (NULL);
2682 }
2683 pip = (mdi_pathinfo_t *)ph->ph_path_head;
2684
2685 while (pip != NULL) {
2686 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2687 break;
2688 }
2689 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2690 }
2691 MDI_PHCI_UNLOCK(ph);
2692 MDI_DEBUG(2, (MDI_NOTE, pdip,
2693 "found %s %p", mdi_pi_spathname(pip), (void *)pip));
2694 return (pip);
2695 }
2696
2697 /*
2698 * XXX - Is the rest of the code in this function really necessary?
2699 * The consumers of mdi_pi_find() can search for the desired pathinfo
2700 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2701 * whether the search is based on the pathinfo nodes attached to
2702 * the pHCI or the client node, the result will be the same.
2703 */
2704
2705 /*
2706 * Find the client device corresponding to 'caddr'
2707 */
2708 MDI_VHCI_CLIENT_LOCK(vh);
2709
2710 /*
2711 * XXX - Passing NULL to the following function works as long as the
2712 * the client addresses (caddr) are unique per vhci basis.
2713 */
2714 ct = i_mdi_client_find(vh, NULL, caddr);
2715 if (ct == NULL) {
2716 /*
2717 * Client not found, Obviously mdi_pathinfo node has not been
2718 * created yet.
2719 */
2720 MDI_VHCI_CLIENT_UNLOCK(vh);
2721 MDI_DEBUG(2, (MDI_NOTE, pdip,
2722 "client not found for caddr @%s", caddr ? caddr : ""));
2723 return (NULL);
2724 }
2725
2726 /*
2727 * Hold the client lock and look for a mdi_pathinfo node with matching
2728 * pHCI and paddr
2729 */
2730 MDI_CLIENT_LOCK(ct);
2731
2732 /*
2733 * Release the global mutex as it is no more needed. Note: We always
2734 * respect the locking order while acquiring.
2735 */
2736 MDI_VHCI_CLIENT_UNLOCK(vh);
2737
2738 pip = (mdi_pathinfo_t *)ct->ct_path_head;
2739 while (pip != NULL) {
2740 /*
2741 * Compare the unit address
2742 */
2743 if ((MDI_PI(pip)->pi_phci == ph) &&
2744 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2745 break;
2746 }
2747 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2748 }
2749 MDI_CLIENT_UNLOCK(ct);
2750 MDI_DEBUG(2, (MDI_NOTE, pdip,
2751 "found: %s %p", mdi_pi_spathname(pip), (void *)pip));
2752 return (pip);
2753 }
2754
2755 /*
2756 * mdi_pi_alloc():
2757 * Allocate and initialize a new instance of a mdi_pathinfo node.
2758 * The mdi_pathinfo node returned by this function identifies a
2759 * unique device path is capable of having properties attached
2760 * and passed to mdi_pi_online() to fully attach and online the
2761 * path and client device node.
2762 * The mdi_pathinfo node returned by this function must be
2763 * destroyed using mdi_pi_free() if the path is no longer
2764 * operational or if the caller fails to attach a client device
2765 * node when calling mdi_pi_online(). The framework will not free
2766 * the resources allocated.
2767 * This function can be called from both interrupt and kernel
2768 * contexts. DDI_NOSLEEP flag should be used while calling
2769 * from interrupt contexts.
2770 * Return Values:
2771 * MDI_SUCCESS
2772 * MDI_FAILURE
2773 * MDI_NOMEM
2774 */
2775 /*ARGSUSED*/
2776 int
mdi_pi_alloc_compatible(dev_info_t * pdip,char * cname,char * caddr,char * paddr,char ** compatible,int ncompatible,int flags,mdi_pathinfo_t ** ret_pip)2777 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2778 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2779 {
2780 mdi_vhci_t *vh;
2781 mdi_phci_t *ph;
2782 mdi_client_t *ct;
2783 mdi_pathinfo_t *pip = NULL;
2784 dev_info_t *cdip;
2785 int rv = MDI_NOMEM;
2786 int path_allocated = 0;
2787
2788 MDI_DEBUG(2, (MDI_NOTE, pdip,
2789 "cname %s: caddr@%s paddr@%s",
2790 cname ? cname : "", caddr ? caddr : "", paddr ? paddr : ""));
2791
2792 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2793 ret_pip == NULL) {
2794 /* Nothing more to do */
2795 return (MDI_FAILURE);
2796 }
2797
2798 *ret_pip = NULL;
2799
2800 /* No allocations on detaching pHCI */
2801 if (DEVI_IS_DETACHING(pdip)) {
2802 /* Invalid pHCI device, return failure */
2803 MDI_DEBUG(1, (MDI_WARN, pdip,
2804 "!detaching pHCI=%p", (void *)pdip));
2805 return (MDI_FAILURE);
2806 }
2807
2808 ph = i_devi_get_phci(pdip);
2809 ASSERT(ph != NULL);
2810 if (ph == NULL) {
2811 /* Invalid pHCI device, return failure */
2812 MDI_DEBUG(1, (MDI_WARN, pdip,
2813 "!invalid pHCI=%p", (void *)pdip));
2814 return (MDI_FAILURE);
2815 }
2816
2817 MDI_PHCI_LOCK(ph);
2818 vh = ph->ph_vhci;
2819 if (vh == NULL) {
2820 /* Invalid vHCI device, return failure */
2821 MDI_DEBUG(1, (MDI_WARN, pdip,
2822 "!invalid vHCI=%p", (void *)pdip));
2823 MDI_PHCI_UNLOCK(ph);
2824 return (MDI_FAILURE);
2825 }
2826
2827 if (MDI_PHCI_IS_READY(ph) == 0) {
2828 /*
2829 * Do not allow new node creation when pHCI is in
2830 * offline/suspended states
2831 */
2832 MDI_DEBUG(1, (MDI_WARN, pdip,
2833 "pHCI=%p is not ready", (void *)ph));
2834 MDI_PHCI_UNLOCK(ph);
2835 return (MDI_BUSY);
2836 }
2837 MDI_PHCI_UNSTABLE(ph);
2838 MDI_PHCI_UNLOCK(ph);
2839
2840 /* look for a matching client, create one if not found */
2841 MDI_VHCI_CLIENT_LOCK(vh);
2842 ct = i_mdi_client_find(vh, cname, caddr);
2843 if (ct == NULL) {
2844 ct = i_mdi_client_alloc(vh, cname, caddr);
2845 ASSERT(ct != NULL);
2846 }
2847
2848 if (ct->ct_dip == NULL) {
2849 /*
2850 * Allocate a devinfo node
2851 */
2852 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2853 compatible, ncompatible);
2854 if (ct->ct_dip == NULL) {
2855 (void) i_mdi_client_free(vh, ct);
2856 goto fail;
2857 }
2858 }
2859 cdip = ct->ct_dip;
2860
2861 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2862 DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2863
2864 MDI_CLIENT_LOCK(ct);
2865 pip = (mdi_pathinfo_t *)ct->ct_path_head;
2866 while (pip != NULL) {
2867 /*
2868 * Compare the unit address
2869 */
2870 if ((MDI_PI(pip)->pi_phci == ph) &&
2871 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2872 break;
2873 }
2874 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2875 }
2876 MDI_CLIENT_UNLOCK(ct);
2877
2878 if (pip == NULL) {
2879 /*
2880 * This is a new path for this client device. Allocate and
2881 * initialize a new pathinfo node
2882 */
2883 pip = i_mdi_pi_alloc(ph, paddr, ct);
2884 ASSERT(pip != NULL);
2885 path_allocated = 1;
2886 }
2887 rv = MDI_SUCCESS;
2888
2889 fail:
2890 /*
2891 * Release the global mutex.
2892 */
2893 MDI_VHCI_CLIENT_UNLOCK(vh);
2894
2895 /*
2896 * Mark the pHCI as stable
2897 */
2898 MDI_PHCI_LOCK(ph);
2899 MDI_PHCI_STABLE(ph);
2900 MDI_PHCI_UNLOCK(ph);
2901 *ret_pip = pip;
2902
2903 MDI_DEBUG(2, (MDI_NOTE, pdip,
2904 "alloc %s %p", mdi_pi_spathname(pip), (void *)pip));
2905
2906 if (path_allocated)
2907 vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2908
2909 return (rv);
2910 }
2911
2912 /*ARGSUSED*/
2913 int
mdi_pi_alloc(dev_info_t * pdip,char * cname,char * caddr,char * paddr,int flags,mdi_pathinfo_t ** ret_pip)2914 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2915 int flags, mdi_pathinfo_t **ret_pip)
2916 {
2917 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2918 flags, ret_pip));
2919 }
2920
2921 /*
2922 * i_mdi_pi_alloc():
2923 * Allocate a mdi_pathinfo node and add to the pHCI path list
2924 * Return Values:
2925 * mdi_pathinfo
2926 */
2927 /*ARGSUSED*/
2928 static mdi_pathinfo_t *
i_mdi_pi_alloc(mdi_phci_t * ph,char * paddr,mdi_client_t * ct)2929 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2930 {
2931 mdi_pathinfo_t *pip;
2932 int ct_circular;
2933 int ph_circular;
2934 static char path[MAXPATHLEN]; /* mdi_pathmap_mutex protects */
2935 char *path_persistent;
2936 int path_instance;
2937 mod_hash_val_t hv;
2938
2939 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
2940
2941 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2942 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2943 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2944 MDI_PATHINFO_STATE_TRANSIENT;
2945
2946 if (MDI_PHCI_IS_USER_DISABLED(ph))
2947 MDI_PI_SET_USER_DISABLE(pip);
2948
2949 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2950 MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2951
2952 if (MDI_PHCI_IS_DRV_DISABLED(ph))
2953 MDI_PI_SET_DRV_DISABLE(pip);
2954
2955 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2956 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2957 MDI_PI(pip)->pi_client = ct;
2958 MDI_PI(pip)->pi_phci = ph;
2959 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2960 (void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2961
2962 /*
2963 * We form the "path" to the pathinfo node, and see if we have
2964 * already allocated a 'path_instance' for that "path". If so,
2965 * we use the already allocated 'path_instance'. If not, we
2966 * allocate a new 'path_instance' and associate it with a copy of
2967 * the "path" string (which is never freed). The association
2968 * between a 'path_instance' this "path" string persists until
2969 * reboot.
2970 */
2971 mutex_enter(&mdi_pathmap_mutex);
2972 (void) ddi_pathname(ph->ph_dip, path);
2973 (void) sprintf(path + strlen(path), "/%s@%s",
2974 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2975 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
2976 path_instance = (uint_t)(intptr_t)hv;
2977 } else {
2978 /* allocate a new 'path_instance' and persistent "path" */
2979 path_instance = mdi_pathmap_instance++;
2980 path_persistent = i_ddi_strdup(path, KM_SLEEP);
2981 (void) mod_hash_insert(mdi_pathmap_bypath,
2982 (mod_hash_key_t)path_persistent,
2983 (mod_hash_val_t)(intptr_t)path_instance);
2984 (void) mod_hash_insert(mdi_pathmap_byinstance,
2985 (mod_hash_key_t)(intptr_t)path_instance,
2986 (mod_hash_val_t)path_persistent);
2987
2988 /* create shortpath name */
2989 (void) snprintf(path, sizeof(path), "%s%d/%s@%s",
2990 ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip),
2991 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2992 path_persistent = i_ddi_strdup(path, KM_SLEEP);
2993 (void) mod_hash_insert(mdi_pathmap_sbyinstance,
2994 (mod_hash_key_t)(intptr_t)path_instance,
2995 (mod_hash_val_t)path_persistent);
2996 }
2997 mutex_exit(&mdi_pathmap_mutex);
2998 MDI_PI(pip)->pi_path_instance = path_instance;
2999
3000 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
3001 ASSERT(MDI_PI(pip)->pi_prop != NULL);
3002 MDI_PI(pip)->pi_pprivate = NULL;
3003 MDI_PI(pip)->pi_cprivate = NULL;
3004 MDI_PI(pip)->pi_vprivate = NULL;
3005 MDI_PI(pip)->pi_client_link = NULL;
3006 MDI_PI(pip)->pi_phci_link = NULL;
3007 MDI_PI(pip)->pi_ref_cnt = 0;
3008 MDI_PI(pip)->pi_kstats = NULL;
3009 MDI_PI(pip)->pi_preferred = 1;
3010 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
3011
3012 /*
3013 * Lock both dev_info nodes against changes in parallel.
3014 *
3015 * The ndi_devi_enter(Client), is atypical since the client is a leaf.
3016 * This atypical operation is done to synchronize pathinfo nodes
3017 * during devinfo snapshot (see di_register_pip) by 'pretending' that
3018 * the pathinfo nodes are children of the Client.
3019 */
3020 ndi_devi_enter(ct->ct_dip, &ct_circular);
3021 ndi_devi_enter(ph->ph_dip, &ph_circular);
3022
3023 i_mdi_phci_add_path(ph, pip);
3024 i_mdi_client_add_path(ct, pip);
3025
3026 ndi_devi_exit(ph->ph_dip, ph_circular);
3027 ndi_devi_exit(ct->ct_dip, ct_circular);
3028
3029 return (pip);
3030 }
3031
3032 /*
3033 * mdi_pi_pathname_by_instance():
3034 * Lookup of "path" by 'path_instance'. Return "path".
3035 * NOTE: returned "path" remains valid forever (until reboot).
3036 */
3037 char *
mdi_pi_pathname_by_instance(int path_instance)3038 mdi_pi_pathname_by_instance(int path_instance)
3039 {
3040 char *path;
3041 mod_hash_val_t hv;
3042
3043 /* mdi_pathmap lookup of "path" by 'path_instance' */
3044 mutex_enter(&mdi_pathmap_mutex);
3045 if (mod_hash_find(mdi_pathmap_byinstance,
3046 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3047 path = (char *)hv;
3048 else
3049 path = NULL;
3050 mutex_exit(&mdi_pathmap_mutex);
3051 return (path);
3052 }
3053
3054 /*
3055 * mdi_pi_spathname_by_instance():
3056 * Lookup of "shortpath" by 'path_instance'. Return "shortpath".
3057 * NOTE: returned "shortpath" remains valid forever (until reboot).
3058 */
3059 char *
mdi_pi_spathname_by_instance(int path_instance)3060 mdi_pi_spathname_by_instance(int path_instance)
3061 {
3062 char *path;
3063 mod_hash_val_t hv;
3064
3065 /* mdi_pathmap lookup of "path" by 'path_instance' */
3066 mutex_enter(&mdi_pathmap_mutex);
3067 if (mod_hash_find(mdi_pathmap_sbyinstance,
3068 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3069 path = (char *)hv;
3070 else
3071 path = NULL;
3072 mutex_exit(&mdi_pathmap_mutex);
3073 return (path);
3074 }
3075
3076
3077 /*
3078 * i_mdi_phci_add_path():
3079 * Add a mdi_pathinfo node to pHCI list.
3080 * Notes:
3081 * Caller should per-pHCI mutex
3082 */
3083 static void
i_mdi_phci_add_path(mdi_phci_t * ph,mdi_pathinfo_t * pip)3084 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3085 {
3086 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3087
3088 MDI_PHCI_LOCK(ph);
3089 if (ph->ph_path_head == NULL) {
3090 ph->ph_path_head = pip;
3091 } else {
3092 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
3093 }
3094 ph->ph_path_tail = pip;
3095 ph->ph_path_count++;
3096 MDI_PHCI_UNLOCK(ph);
3097 }
3098
3099 /*
3100 * i_mdi_client_add_path():
3101 * Add mdi_pathinfo node to client list
3102 */
3103 static void
i_mdi_client_add_path(mdi_client_t * ct,mdi_pathinfo_t * pip)3104 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3105 {
3106 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3107
3108 MDI_CLIENT_LOCK(ct);
3109 if (ct->ct_path_head == NULL) {
3110 ct->ct_path_head = pip;
3111 } else {
3112 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
3113 }
3114 ct->ct_path_tail = pip;
3115 ct->ct_path_count++;
3116 MDI_CLIENT_UNLOCK(ct);
3117 }
3118
3119 /*
3120 * mdi_pi_free():
3121 * Free the mdi_pathinfo node and also client device node if this
3122 * is the last path to the device
3123 * Return Values:
3124 * MDI_SUCCESS
3125 * MDI_FAILURE
3126 * MDI_BUSY
3127 */
3128 /*ARGSUSED*/
3129 int
mdi_pi_free(mdi_pathinfo_t * pip,int flags)3130 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
3131 {
3132 int rv;
3133 mdi_vhci_t *vh;
3134 mdi_phci_t *ph;
3135 mdi_client_t *ct;
3136 int (*f)();
3137 int client_held = 0;
3138
3139 MDI_PI_LOCK(pip);
3140 ph = MDI_PI(pip)->pi_phci;
3141 ASSERT(ph != NULL);
3142 if (ph == NULL) {
3143 /*
3144 * Invalid pHCI device, return failure
3145 */
3146 MDI_DEBUG(1, (MDI_WARN, NULL,
3147 "!invalid pHCI: pip %s %p",
3148 mdi_pi_spathname(pip), (void *)pip));
3149 MDI_PI_UNLOCK(pip);
3150 return (MDI_FAILURE);
3151 }
3152
3153 vh = ph->ph_vhci;
3154 ASSERT(vh != NULL);
3155 if (vh == NULL) {
3156 /* Invalid pHCI device, return failure */
3157 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3158 "!invalid vHCI: pip %s %p",
3159 mdi_pi_spathname(pip), (void *)pip));
3160 MDI_PI_UNLOCK(pip);
3161 return (MDI_FAILURE);
3162 }
3163
3164 ct = MDI_PI(pip)->pi_client;
3165 ASSERT(ct != NULL);
3166 if (ct == NULL) {
3167 /*
3168 * Invalid Client device, return failure
3169 */
3170 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3171 "!invalid client: pip %s %p",
3172 mdi_pi_spathname(pip), (void *)pip));
3173 MDI_PI_UNLOCK(pip);
3174 return (MDI_FAILURE);
3175 }
3176
3177 /*
3178 * Check to see for busy condition. A mdi_pathinfo can only be freed
3179 * if the node state is either offline or init and the reference count
3180 * is zero.
3181 */
3182 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
3183 MDI_PI_IS_INITING(pip))) {
3184 /*
3185 * Node is busy
3186 */
3187 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3188 "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip));
3189 MDI_PI_UNLOCK(pip);
3190 return (MDI_BUSY);
3191 }
3192
3193 while (MDI_PI(pip)->pi_ref_cnt != 0) {
3194 /*
3195 * Give a chance for pending I/Os to complete.
3196 */
3197 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3198 "!%d cmds still pending on path: %s %p",
3199 MDI_PI(pip)->pi_ref_cnt,
3200 mdi_pi_spathname(pip), (void *)pip));
3201 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3202 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3203 TR_CLOCK_TICK) == -1) {
3204 /*
3205 * The timeout time reached without ref_cnt being zero
3206 * being signaled.
3207 */
3208 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3209 "!Timeout reached on path %s %p without the cond",
3210 mdi_pi_spathname(pip), (void *)pip));
3211 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3212 "!%d cmds still pending on path %s %p",
3213 MDI_PI(pip)->pi_ref_cnt,
3214 mdi_pi_spathname(pip), (void *)pip));
3215 MDI_PI_UNLOCK(pip);
3216 return (MDI_BUSY);
3217 }
3218 }
3219 if (MDI_PI(pip)->pi_pm_held) {
3220 client_held = 1;
3221 }
3222 MDI_PI_UNLOCK(pip);
3223
3224 vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
3225
3226 MDI_CLIENT_LOCK(ct);
3227
3228 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
3229 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
3230
3231 /*
3232 * Wait till failover is complete before removing this node.
3233 */
3234 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3235 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3236
3237 MDI_CLIENT_UNLOCK(ct);
3238 MDI_VHCI_CLIENT_LOCK(vh);
3239 MDI_CLIENT_LOCK(ct);
3240 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
3241
3242 if (!MDI_PI_IS_INITING(pip)) {
3243 f = vh->vh_ops->vo_pi_uninit;
3244 if (f != NULL) {
3245 rv = (*f)(vh->vh_dip, pip, 0);
3246 }
3247 } else
3248 rv = MDI_SUCCESS;
3249
3250 /*
3251 * If vo_pi_uninit() completed successfully.
3252 */
3253 if (rv == MDI_SUCCESS) {
3254 if (client_held) {
3255 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3256 "i_mdi_pm_rele_client\n"));
3257 i_mdi_pm_rele_client(ct, 1);
3258 }
3259 i_mdi_pi_free(ph, pip, ct);
3260 if (ct->ct_path_count == 0) {
3261 /*
3262 * Client lost its last path.
3263 * Clean up the client device
3264 */
3265 MDI_CLIENT_UNLOCK(ct);
3266 (void) i_mdi_client_free(ct->ct_vhci, ct);
3267 MDI_VHCI_CLIENT_UNLOCK(vh);
3268 return (rv);
3269 }
3270 }
3271 MDI_CLIENT_UNLOCK(ct);
3272 MDI_VHCI_CLIENT_UNLOCK(vh);
3273
3274 if (rv == MDI_FAILURE)
3275 vhcache_pi_add(vh->vh_config, MDI_PI(pip));
3276
3277 return (rv);
3278 }
3279
3280 /*
3281 * i_mdi_pi_free():
3282 * Free the mdi_pathinfo node
3283 */
3284 static void
i_mdi_pi_free(mdi_phci_t * ph,mdi_pathinfo_t * pip,mdi_client_t * ct)3285 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
3286 {
3287 int ct_circular;
3288 int ph_circular;
3289
3290 ASSERT(MDI_CLIENT_LOCKED(ct));
3291
3292 /*
3293 * remove any per-path kstats
3294 */
3295 i_mdi_pi_kstat_destroy(pip);
3296
3297 /* See comments in i_mdi_pi_alloc() */
3298 ndi_devi_enter(ct->ct_dip, &ct_circular);
3299 ndi_devi_enter(ph->ph_dip, &ph_circular);
3300
3301 i_mdi_client_remove_path(ct, pip);
3302 i_mdi_phci_remove_path(ph, pip);
3303
3304 ndi_devi_exit(ph->ph_dip, ph_circular);
3305 ndi_devi_exit(ct->ct_dip, ct_circular);
3306
3307 mutex_destroy(&MDI_PI(pip)->pi_mutex);
3308 cv_destroy(&MDI_PI(pip)->pi_state_cv);
3309 cv_destroy(&MDI_PI(pip)->pi_ref_cv);
3310 if (MDI_PI(pip)->pi_addr) {
3311 kmem_free(MDI_PI(pip)->pi_addr,
3312 strlen(MDI_PI(pip)->pi_addr) + 1);
3313 MDI_PI(pip)->pi_addr = NULL;
3314 }
3315
3316 if (MDI_PI(pip)->pi_prop) {
3317 (void) nvlist_free(MDI_PI(pip)->pi_prop);
3318 MDI_PI(pip)->pi_prop = NULL;
3319 }
3320 kmem_free(pip, sizeof (struct mdi_pathinfo));
3321 }
3322
3323
3324 /*
3325 * i_mdi_phci_remove_path():
3326 * Remove a mdi_pathinfo node from pHCI list.
3327 * Notes:
3328 * Caller should hold per-pHCI mutex
3329 */
3330 static void
i_mdi_phci_remove_path(mdi_phci_t * ph,mdi_pathinfo_t * pip)3331 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3332 {
3333 mdi_pathinfo_t *prev = NULL;
3334 mdi_pathinfo_t *path = NULL;
3335
3336 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3337
3338 MDI_PHCI_LOCK(ph);
3339 path = ph->ph_path_head;
3340 while (path != NULL) {
3341 if (path == pip) {
3342 break;
3343 }
3344 prev = path;
3345 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3346 }
3347
3348 if (path) {
3349 ph->ph_path_count--;
3350 if (prev) {
3351 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
3352 } else {
3353 ph->ph_path_head =
3354 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3355 }
3356 if (ph->ph_path_tail == path) {
3357 ph->ph_path_tail = prev;
3358 }
3359 }
3360
3361 /*
3362 * Clear the pHCI link
3363 */
3364 MDI_PI(pip)->pi_phci_link = NULL;
3365 MDI_PI(pip)->pi_phci = NULL;
3366 MDI_PHCI_UNLOCK(ph);
3367 }
3368
3369 /*
3370 * i_mdi_client_remove_path():
3371 * Remove a mdi_pathinfo node from client path list.
3372 */
3373 static void
i_mdi_client_remove_path(mdi_client_t * ct,mdi_pathinfo_t * pip)3374 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3375 {
3376 mdi_pathinfo_t *prev = NULL;
3377 mdi_pathinfo_t *path;
3378
3379 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3380
3381 ASSERT(MDI_CLIENT_LOCKED(ct));
3382 path = ct->ct_path_head;
3383 while (path != NULL) {
3384 if (path == pip) {
3385 break;
3386 }
3387 prev = path;
3388 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3389 }
3390
3391 if (path) {
3392 ct->ct_path_count--;
3393 if (prev) {
3394 MDI_PI(prev)->pi_client_link =
3395 MDI_PI(path)->pi_client_link;
3396 } else {
3397 ct->ct_path_head =
3398 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3399 }
3400 if (ct->ct_path_tail == path) {
3401 ct->ct_path_tail = prev;
3402 }
3403 if (ct->ct_path_last == path) {
3404 ct->ct_path_last = ct->ct_path_head;
3405 }
3406 }
3407 MDI_PI(pip)->pi_client_link = NULL;
3408 MDI_PI(pip)->pi_client = NULL;
3409 }
3410
3411 /*
3412 * i_mdi_pi_state_change():
3413 * online a mdi_pathinfo node
3414 *
3415 * Return Values:
3416 * MDI_SUCCESS
3417 * MDI_FAILURE
3418 */
3419 /*ARGSUSED*/
3420 static int
i_mdi_pi_state_change(mdi_pathinfo_t * pip,mdi_pathinfo_state_t state,int flag)3421 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3422 {
3423 int rv = MDI_SUCCESS;
3424 mdi_vhci_t *vh;
3425 mdi_phci_t *ph;
3426 mdi_client_t *ct;
3427 int (*f)();
3428 dev_info_t *cdip;
3429
3430 MDI_PI_LOCK(pip);
3431
3432 ph = MDI_PI(pip)->pi_phci;
3433 ASSERT(ph);
3434 if (ph == NULL) {
3435 /*
3436 * Invalid pHCI device, fail the request
3437 */
3438 MDI_PI_UNLOCK(pip);
3439 MDI_DEBUG(1, (MDI_WARN, NULL,
3440 "!invalid phci: pip %s %p",
3441 mdi_pi_spathname(pip), (void *)pip));
3442 return (MDI_FAILURE);
3443 }
3444
3445 vh = ph->ph_vhci;
3446 ASSERT(vh);
3447 if (vh == NULL) {
3448 /*
3449 * Invalid vHCI device, fail the request
3450 */
3451 MDI_PI_UNLOCK(pip);
3452 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3453 "!invalid vhci: pip %s %p",
3454 mdi_pi_spathname(pip), (void *)pip));
3455 return (MDI_FAILURE);
3456 }
3457
3458 ct = MDI_PI(pip)->pi_client;
3459 ASSERT(ct != NULL);
3460 if (ct == NULL) {
3461 /*
3462 * Invalid client device, fail the request
3463 */
3464 MDI_PI_UNLOCK(pip);
3465 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3466 "!invalid client: pip %s %p",
3467 mdi_pi_spathname(pip), (void *)pip));
3468 return (MDI_FAILURE);
3469 }
3470
3471 /*
3472 * If this path has not been initialized yet, Callback vHCI driver's
3473 * pathinfo node initialize entry point
3474 */
3475
3476 if (MDI_PI_IS_INITING(pip)) {
3477 MDI_PI_UNLOCK(pip);
3478 f = vh->vh_ops->vo_pi_init;
3479 if (f != NULL) {
3480 rv = (*f)(vh->vh_dip, pip, 0);
3481 if (rv != MDI_SUCCESS) {
3482 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3483 "!vo_pi_init failed: vHCI %p, pip %s %p",
3484 (void *)vh, mdi_pi_spathname(pip),
3485 (void *)pip));
3486 return (MDI_FAILURE);
3487 }
3488 }
3489 MDI_PI_LOCK(pip);
3490 MDI_PI_CLEAR_TRANSIENT(pip);
3491 }
3492
3493 /*
3494 * Do not allow state transition when pHCI is in offline/suspended
3495 * states
3496 */
3497 i_mdi_phci_lock(ph, pip);
3498 if (MDI_PHCI_IS_READY(ph) == 0) {
3499 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3500 "!pHCI not ready, pHCI=%p", (void *)ph));
3501 MDI_PI_UNLOCK(pip);
3502 i_mdi_phci_unlock(ph);
3503 return (MDI_BUSY);
3504 }
3505 MDI_PHCI_UNSTABLE(ph);
3506 i_mdi_phci_unlock(ph);
3507
3508 /*
3509 * Check if mdi_pathinfo state is in transient state.
3510 * If yes, offlining is in progress and wait till transient state is
3511 * cleared.
3512 */
3513 if (MDI_PI_IS_TRANSIENT(pip)) {
3514 while (MDI_PI_IS_TRANSIENT(pip)) {
3515 cv_wait(&MDI_PI(pip)->pi_state_cv,
3516 &MDI_PI(pip)->pi_mutex);
3517 }
3518 }
3519
3520 /*
3521 * Grab the client lock in reverse order sequence and release the
3522 * mdi_pathinfo mutex.
3523 */
3524 i_mdi_client_lock(ct, pip);
3525 MDI_PI_UNLOCK(pip);
3526
3527 /*
3528 * Wait till failover state is cleared
3529 */
3530 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3531 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3532
3533 /*
3534 * Mark the mdi_pathinfo node state as transient
3535 */
3536 MDI_PI_LOCK(pip);
3537 switch (state) {
3538 case MDI_PATHINFO_STATE_ONLINE:
3539 MDI_PI_SET_ONLINING(pip);
3540 break;
3541
3542 case MDI_PATHINFO_STATE_STANDBY:
3543 MDI_PI_SET_STANDBYING(pip);
3544 break;
3545
3546 case MDI_PATHINFO_STATE_FAULT:
3547 /*
3548 * Mark the pathinfo state as FAULTED
3549 */
3550 MDI_PI_SET_FAULTING(pip);
3551 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3552 break;
3553
3554 case MDI_PATHINFO_STATE_OFFLINE:
3555 /*
3556 * ndi_devi_offline() cannot hold pip or ct locks.
3557 */
3558 MDI_PI_UNLOCK(pip);
3559
3560 /*
3561 * If this is a user initiated path online->offline operation
3562 * who's success would transition a client from DEGRADED to
3563 * FAILED then only proceed if we can offline the client first.
3564 */
3565 cdip = ct->ct_dip;
3566 if ((flag & NDI_USER_REQ) &&
3567 MDI_PI_IS_ONLINE(pip) &&
3568 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3569 i_mdi_client_unlock(ct);
3570 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN);
3571 if (rv != NDI_SUCCESS) {
3572 /*
3573 * Convert to MDI error code
3574 */
3575 switch (rv) {
3576 case NDI_BUSY:
3577 rv = MDI_BUSY;
3578 break;
3579 default:
3580 rv = MDI_FAILURE;
3581 break;
3582 }
3583 goto state_change_exit;
3584 } else {
3585 i_mdi_client_lock(ct, NULL);
3586 }
3587 }
3588 /*
3589 * Mark the mdi_pathinfo node state as transient
3590 */
3591 MDI_PI_LOCK(pip);
3592 MDI_PI_SET_OFFLINING(pip);
3593 break;
3594 }
3595 MDI_PI_UNLOCK(pip);
3596 MDI_CLIENT_UNSTABLE(ct);
3597 i_mdi_client_unlock(ct);
3598
3599 f = vh->vh_ops->vo_pi_state_change;
3600 if (f != NULL)
3601 rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3602
3603 MDI_CLIENT_LOCK(ct);
3604 MDI_PI_LOCK(pip);
3605 if (rv == MDI_NOT_SUPPORTED) {
3606 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3607 }
3608 if (rv != MDI_SUCCESS) {
3609 MDI_DEBUG(2, (MDI_WARN, ct->ct_dip,
3610 "vo_pi_state_change failed: rv %x", rv));
3611 }
3612 if (MDI_PI_IS_TRANSIENT(pip)) {
3613 if (rv == MDI_SUCCESS) {
3614 MDI_PI_CLEAR_TRANSIENT(pip);
3615 } else {
3616 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3617 }
3618 }
3619
3620 /*
3621 * Wake anyone waiting for this mdi_pathinfo node
3622 */
3623 cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3624 MDI_PI_UNLOCK(pip);
3625
3626 /*
3627 * Mark the client device as stable
3628 */
3629 MDI_CLIENT_STABLE(ct);
3630 if (rv == MDI_SUCCESS) {
3631 if (ct->ct_unstable == 0) {
3632 cdip = ct->ct_dip;
3633
3634 /*
3635 * Onlining the mdi_pathinfo node will impact the
3636 * client state Update the client and dev_info node
3637 * state accordingly
3638 */
3639 rv = NDI_SUCCESS;
3640 i_mdi_client_update_state(ct);
3641 switch (MDI_CLIENT_STATE(ct)) {
3642 case MDI_CLIENT_STATE_OPTIMAL:
3643 case MDI_CLIENT_STATE_DEGRADED:
3644 if (cdip && !i_ddi_devi_attached(cdip) &&
3645 ((state == MDI_PATHINFO_STATE_ONLINE) ||
3646 (state == MDI_PATHINFO_STATE_STANDBY))) {
3647
3648 /*
3649 * Must do ndi_devi_online() through
3650 * hotplug thread for deferred
3651 * attach mechanism to work
3652 */
3653 MDI_CLIENT_UNLOCK(ct);
3654 rv = ndi_devi_online(cdip, 0);
3655 MDI_CLIENT_LOCK(ct);
3656 if ((rv != NDI_SUCCESS) &&
3657 (MDI_CLIENT_STATE(ct) ==
3658 MDI_CLIENT_STATE_DEGRADED)) {
3659 /*
3660 * ndi_devi_online failed.
3661 * Reset client flags to
3662 * offline.
3663 */
3664 MDI_DEBUG(1, (MDI_WARN, cdip,
3665 "!ndi_devi_online failed "
3666 "error %x", rv));
3667 MDI_CLIENT_SET_OFFLINE(ct);
3668 }
3669 if (rv != NDI_SUCCESS) {
3670 /* Reset the path state */
3671 MDI_PI_LOCK(pip);
3672 MDI_PI(pip)->pi_state =
3673 MDI_PI_OLD_STATE(pip);
3674 MDI_PI_UNLOCK(pip);
3675 }
3676 }
3677 break;
3678
3679 case MDI_CLIENT_STATE_FAILED:
3680 /*
3681 * This is the last path case for
3682 * non-user initiated events.
3683 */
3684 if (((flag & NDI_USER_REQ) == 0) &&
3685 cdip && (i_ddi_node_state(cdip) >=
3686 DS_INITIALIZED)) {
3687 MDI_CLIENT_UNLOCK(ct);
3688 rv = ndi_devi_offline(cdip,
3689 NDI_DEVFS_CLEAN);
3690 MDI_CLIENT_LOCK(ct);
3691
3692 if (rv != NDI_SUCCESS) {
3693 /*
3694 * ndi_devi_offline failed.
3695 * Reset client flags to
3696 * online as the path could not
3697 * be offlined.
3698 */
3699 MDI_DEBUG(1, (MDI_WARN, cdip,
3700 "!ndi_devi_offline failed: "
3701 "error %x", rv));
3702 MDI_CLIENT_SET_ONLINE(ct);
3703 }
3704 }
3705 break;
3706 }
3707 /*
3708 * Convert to MDI error code
3709 */
3710 switch (rv) {
3711 case NDI_SUCCESS:
3712 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3713 i_mdi_report_path_state(ct, pip);
3714 rv = MDI_SUCCESS;
3715 break;
3716 case NDI_BUSY:
3717 rv = MDI_BUSY;
3718 break;
3719 default:
3720 rv = MDI_FAILURE;
3721 break;
3722 }
3723 }
3724 }
3725 MDI_CLIENT_UNLOCK(ct);
3726
3727 state_change_exit:
3728 /*
3729 * Mark the pHCI as stable again.
3730 */
3731 MDI_PHCI_LOCK(ph);
3732 MDI_PHCI_STABLE(ph);
3733 MDI_PHCI_UNLOCK(ph);
3734 return (rv);
3735 }
3736
3737 /*
3738 * mdi_pi_online():
3739 * Place the path_info node in the online state. The path is
3740 * now available to be selected by mdi_select_path() for
3741 * transporting I/O requests to client devices.
3742 * Return Values:
3743 * MDI_SUCCESS
3744 * MDI_FAILURE
3745 */
3746 int
mdi_pi_online(mdi_pathinfo_t * pip,int flags)3747 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3748 {
3749 mdi_client_t *ct = MDI_PI(pip)->pi_client;
3750 int client_held = 0;
3751 int rv;
3752
3753 ASSERT(ct != NULL);
3754 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3755 if (rv != MDI_SUCCESS)
3756 return (rv);
3757
3758 MDI_PI_LOCK(pip);
3759 if (MDI_PI(pip)->pi_pm_held == 0) {
3760 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3761 "i_mdi_pm_hold_pip %p", (void *)pip));
3762 i_mdi_pm_hold_pip(pip);
3763 client_held = 1;
3764 }
3765 MDI_PI_UNLOCK(pip);
3766
3767 if (client_held) {
3768 MDI_CLIENT_LOCK(ct);
3769 if (ct->ct_power_cnt == 0) {
3770 rv = i_mdi_power_all_phci(ct);
3771 }
3772
3773 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3774 "i_mdi_pm_hold_client %p", (void *)ct));
3775 i_mdi_pm_hold_client(ct, 1);
3776 MDI_CLIENT_UNLOCK(ct);
3777 }
3778
3779 return (rv);
3780 }
3781
3782 /*
3783 * mdi_pi_standby():
3784 * Place the mdi_pathinfo node in standby state
3785 *
3786 * Return Values:
3787 * MDI_SUCCESS
3788 * MDI_FAILURE
3789 */
3790 int
mdi_pi_standby(mdi_pathinfo_t * pip,int flags)3791 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3792 {
3793 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3794 }
3795
3796 /*
3797 * mdi_pi_fault():
3798 * Place the mdi_pathinfo node in fault'ed state
3799 * Return Values:
3800 * MDI_SUCCESS
3801 * MDI_FAILURE
3802 */
3803 int
mdi_pi_fault(mdi_pathinfo_t * pip,int flags)3804 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3805 {
3806 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3807 }
3808
3809 /*
3810 * mdi_pi_offline():
3811 * Offline a mdi_pathinfo node.
3812 * Return Values:
3813 * MDI_SUCCESS
3814 * MDI_FAILURE
3815 */
3816 int
mdi_pi_offline(mdi_pathinfo_t * pip,int flags)3817 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3818 {
3819 int ret, client_held = 0;
3820 mdi_client_t *ct;
3821
3822 /*
3823 * Original code overloaded NDI_DEVI_REMOVE to this interface, and
3824 * used it to mean "user initiated operation" (i.e. devctl). Callers
3825 * should now just use NDI_USER_REQ.
3826 */
3827 if (flags & NDI_DEVI_REMOVE) {
3828 flags &= ~NDI_DEVI_REMOVE;
3829 flags |= NDI_USER_REQ;
3830 }
3831
3832 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3833
3834 if (ret == MDI_SUCCESS) {
3835 MDI_PI_LOCK(pip);
3836 if (MDI_PI(pip)->pi_pm_held) {
3837 client_held = 1;
3838 }
3839 MDI_PI_UNLOCK(pip);
3840
3841 if (client_held) {
3842 ct = MDI_PI(pip)->pi_client;
3843 MDI_CLIENT_LOCK(ct);
3844 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3845 "i_mdi_pm_rele_client\n"));
3846 i_mdi_pm_rele_client(ct, 1);
3847 MDI_CLIENT_UNLOCK(ct);
3848 }
3849 }
3850
3851 return (ret);
3852 }
3853
3854 /*
3855 * i_mdi_pi_offline():
3856 * Offline a mdi_pathinfo node and call the vHCI driver's callback
3857 */
3858 static int
i_mdi_pi_offline(mdi_pathinfo_t * pip,int flags)3859 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3860 {
3861 dev_info_t *vdip = NULL;
3862 mdi_vhci_t *vh = NULL;
3863 mdi_client_t *ct = NULL;
3864 int (*f)();
3865 int rv;
3866
3867 MDI_PI_LOCK(pip);
3868 ct = MDI_PI(pip)->pi_client;
3869 ASSERT(ct != NULL);
3870
3871 while (MDI_PI(pip)->pi_ref_cnt != 0) {
3872 /*
3873 * Give a chance for pending I/Os to complete.
3874 */
3875 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3876 "!%d cmds still pending on path %s %p",
3877 MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip),
3878 (void *)pip));
3879 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3880 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3881 TR_CLOCK_TICK) == -1) {
3882 /*
3883 * The timeout time reached without ref_cnt being zero
3884 * being signaled.
3885 */
3886 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3887 "!Timeout reached on path %s %p without the cond",
3888 mdi_pi_spathname(pip), (void *)pip));
3889 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3890 "!%d cmds still pending on path %s %p",
3891 MDI_PI(pip)->pi_ref_cnt,
3892 mdi_pi_spathname(pip), (void *)pip));
3893 }
3894 }
3895 vh = ct->ct_vhci;
3896 vdip = vh->vh_dip;
3897
3898 /*
3899 * Notify vHCI that has registered this event
3900 */
3901 ASSERT(vh->vh_ops);
3902 f = vh->vh_ops->vo_pi_state_change;
3903
3904 if (f != NULL) {
3905 MDI_PI_UNLOCK(pip);
3906 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3907 flags)) != MDI_SUCCESS) {
3908 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3909 "!vo_path_offline failed: vdip %s%d %p: path %s %p",
3910 ddi_driver_name(vdip), ddi_get_instance(vdip),
3911 (void *)vdip, mdi_pi_spathname(pip), (void *)pip));
3912 }
3913 MDI_PI_LOCK(pip);
3914 }
3915
3916 /*
3917 * Set the mdi_pathinfo node state and clear the transient condition
3918 */
3919 MDI_PI_SET_OFFLINE(pip);
3920 cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3921 MDI_PI_UNLOCK(pip);
3922
3923 MDI_CLIENT_LOCK(ct);
3924 if (rv == MDI_SUCCESS) {
3925 if (ct->ct_unstable == 0) {
3926 dev_info_t *cdip = ct->ct_dip;
3927
3928 /*
3929 * Onlining the mdi_pathinfo node will impact the
3930 * client state Update the client and dev_info node
3931 * state accordingly
3932 */
3933 i_mdi_client_update_state(ct);
3934 rv = NDI_SUCCESS;
3935 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3936 if (cdip &&
3937 (i_ddi_node_state(cdip) >=
3938 DS_INITIALIZED)) {
3939 MDI_CLIENT_UNLOCK(ct);
3940 rv = ndi_devi_offline(cdip,
3941 NDI_DEVFS_CLEAN);
3942 MDI_CLIENT_LOCK(ct);
3943 if (rv != NDI_SUCCESS) {
3944 /*
3945 * ndi_devi_offline failed.
3946 * Reset client flags to
3947 * online.
3948 */
3949 MDI_DEBUG(4, (MDI_WARN, cdip,
3950 "ndi_devi_offline failed: "
3951 "error %x", rv));
3952 MDI_CLIENT_SET_ONLINE(ct);
3953 }
3954 }
3955 }
3956 /*
3957 * Convert to MDI error code
3958 */
3959 switch (rv) {
3960 case NDI_SUCCESS:
3961 rv = MDI_SUCCESS;
3962 break;
3963 case NDI_BUSY:
3964 rv = MDI_BUSY;
3965 break;
3966 default:
3967 rv = MDI_FAILURE;
3968 break;
3969 }
3970 }
3971 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3972 i_mdi_report_path_state(ct, pip);
3973 }
3974
3975 MDI_CLIENT_UNLOCK(ct);
3976
3977 /*
3978 * Change in the mdi_pathinfo node state will impact the client state
3979 */
3980 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
3981 "ct = %p pip = %p", (void *)ct, (void *)pip));
3982 return (rv);
3983 }
3984
3985 /*
3986 * i_mdi_pi_online():
3987 * Online a mdi_pathinfo node and call the vHCI driver's callback
3988 */
3989 static int
i_mdi_pi_online(mdi_pathinfo_t * pip,int flags)3990 i_mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3991 {
3992 mdi_vhci_t *vh = NULL;
3993 mdi_client_t *ct = NULL;
3994 mdi_phci_t *ph;
3995 int (*f)();
3996 int rv;
3997
3998 MDI_PI_LOCK(pip);
3999 ph = MDI_PI(pip)->pi_phci;
4000 vh = ph->ph_vhci;
4001 ct = MDI_PI(pip)->pi_client;
4002 MDI_PI_SET_ONLINING(pip)
4003 MDI_PI_UNLOCK(pip);
4004 f = vh->vh_ops->vo_pi_state_change;
4005 if (f != NULL)
4006 rv = (*f)(vh->vh_dip, pip, MDI_PATHINFO_STATE_ONLINE, 0,
4007 flags);
4008 MDI_CLIENT_LOCK(ct);
4009 MDI_PI_LOCK(pip);
4010 cv_broadcast(&MDI_PI(pip)->pi_state_cv);
4011 MDI_PI_UNLOCK(pip);
4012 if (rv == MDI_SUCCESS) {
4013 dev_info_t *cdip = ct->ct_dip;
4014
4015 rv = MDI_SUCCESS;
4016 i_mdi_client_update_state(ct);
4017 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL ||
4018 MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4019 if (cdip && !i_ddi_devi_attached(cdip)) {
4020 MDI_CLIENT_UNLOCK(ct);
4021 rv = ndi_devi_online(cdip, 0);
4022 MDI_CLIENT_LOCK(ct);
4023 if ((rv != NDI_SUCCESS) &&
4024 (MDI_CLIENT_STATE(ct) ==
4025 MDI_CLIENT_STATE_DEGRADED)) {
4026 MDI_CLIENT_SET_OFFLINE(ct);
4027 }
4028 if (rv != NDI_SUCCESS) {
4029 /* Reset the path state */
4030 MDI_PI_LOCK(pip);
4031 MDI_PI(pip)->pi_state =
4032 MDI_PI_OLD_STATE(pip);
4033 MDI_PI_UNLOCK(pip);
4034 }
4035 }
4036 }
4037 switch (rv) {
4038 case NDI_SUCCESS:
4039 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
4040 i_mdi_report_path_state(ct, pip);
4041 rv = MDI_SUCCESS;
4042 break;
4043 case NDI_BUSY:
4044 rv = MDI_BUSY;
4045 break;
4046 default:
4047 rv = MDI_FAILURE;
4048 break;
4049 }
4050 } else {
4051 /* Reset the path state */
4052 MDI_PI_LOCK(pip);
4053 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
4054 MDI_PI_UNLOCK(pip);
4055 }
4056 MDI_CLIENT_UNLOCK(ct);
4057 return (rv);
4058 }
4059
4060 /*
4061 * mdi_pi_get_node_name():
4062 * Get the name associated with a mdi_pathinfo node.
4063 * Since pathinfo nodes are not directly named, we
4064 * return the node_name of the client.
4065 *
4066 * Return Values:
4067 * char *
4068 */
4069 char *
mdi_pi_get_node_name(mdi_pathinfo_t * pip)4070 mdi_pi_get_node_name(mdi_pathinfo_t *pip)
4071 {
4072 mdi_client_t *ct;
4073
4074 if (pip == NULL)
4075 return (NULL);
4076 ct = MDI_PI(pip)->pi_client;
4077 if ((ct == NULL) || (ct->ct_dip == NULL))
4078 return (NULL);
4079 return (ddi_node_name(ct->ct_dip));
4080 }
4081
4082 /*
4083 * mdi_pi_get_addr():
4084 * Get the unit address associated with a mdi_pathinfo node
4085 *
4086 * Return Values:
4087 * char *
4088 */
4089 char *
mdi_pi_get_addr(mdi_pathinfo_t * pip)4090 mdi_pi_get_addr(mdi_pathinfo_t *pip)
4091 {
4092 if (pip == NULL)
4093 return (NULL);
4094
4095 return (MDI_PI(pip)->pi_addr);
4096 }
4097
4098 /*
4099 * mdi_pi_get_path_instance():
4100 * Get the 'path_instance' of a mdi_pathinfo node
4101 *
4102 * Return Values:
4103 * path_instance
4104 */
4105 int
mdi_pi_get_path_instance(mdi_pathinfo_t * pip)4106 mdi_pi_get_path_instance(mdi_pathinfo_t *pip)
4107 {
4108 if (pip == NULL)
4109 return (0);
4110
4111 return (MDI_PI(pip)->pi_path_instance);
4112 }
4113
4114 /*
4115 * mdi_pi_pathname():
4116 * Return pointer to path to pathinfo node.
4117 */
4118 char *
mdi_pi_pathname(mdi_pathinfo_t * pip)4119 mdi_pi_pathname(mdi_pathinfo_t *pip)
4120 {
4121 if (pip == NULL)
4122 return (NULL);
4123 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip)));
4124 }
4125
4126 /*
4127 * mdi_pi_spathname():
4128 * Return pointer to shortpath to pathinfo node. Used for debug
4129 * messages, so return "" instead of NULL when unknown.
4130 */
4131 char *
mdi_pi_spathname(mdi_pathinfo_t * pip)4132 mdi_pi_spathname(mdi_pathinfo_t *pip)
4133 {
4134 char *spath = "";
4135
4136 if (pip) {
4137 spath = mdi_pi_spathname_by_instance(
4138 mdi_pi_get_path_instance(pip));
4139 if (spath == NULL)
4140 spath = "";
4141 }
4142 return (spath);
4143 }
4144
4145 char *
mdi_pi_pathname_obp(mdi_pathinfo_t * pip,char * path)4146 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path)
4147 {
4148 char *obp_path = NULL;
4149 if ((pip == NULL) || (path == NULL))
4150 return (NULL);
4151
4152 if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) {
4153 (void) strcpy(path, obp_path);
4154 (void) mdi_prop_free(obp_path);
4155 } else {
4156 path = NULL;
4157 }
4158 return (path);
4159 }
4160
4161 int
mdi_pi_pathname_obp_set(mdi_pathinfo_t * pip,char * component)4162 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component)
4163 {
4164 dev_info_t *pdip;
4165 char *obp_path = NULL;
4166 int rc = MDI_FAILURE;
4167
4168 if (pip == NULL)
4169 return (MDI_FAILURE);
4170
4171 pdip = mdi_pi_get_phci(pip);
4172 if (pdip == NULL)
4173 return (MDI_FAILURE);
4174
4175 obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4176
4177 if (ddi_pathname_obp(pdip, obp_path) == NULL) {
4178 (void) ddi_pathname(pdip, obp_path);
4179 }
4180
4181 if (component) {
4182 (void) strncat(obp_path, "/", MAXPATHLEN);
4183 (void) strncat(obp_path, component, MAXPATHLEN);
4184 }
4185 rc = mdi_prop_update_string(pip, "obp-path", obp_path);
4186
4187 if (obp_path)
4188 kmem_free(obp_path, MAXPATHLEN);
4189 return (rc);
4190 }
4191
4192 /*
4193 * mdi_pi_get_client():
4194 * Get the client devinfo associated with a mdi_pathinfo node
4195 *
4196 * Return Values:
4197 * Handle to client device dev_info node
4198 */
4199 dev_info_t *
mdi_pi_get_client(mdi_pathinfo_t * pip)4200 mdi_pi_get_client(mdi_pathinfo_t *pip)
4201 {
4202 dev_info_t *dip = NULL;
4203 if (pip) {
4204 dip = MDI_PI(pip)->pi_client->ct_dip;
4205 }
4206 return (dip);
4207 }
4208
4209 /*
4210 * mdi_pi_get_phci():
4211 * Get the pHCI devinfo associated with the mdi_pathinfo node
4212 * Return Values:
4213 * Handle to dev_info node
4214 */
4215 dev_info_t *
mdi_pi_get_phci(mdi_pathinfo_t * pip)4216 mdi_pi_get_phci(mdi_pathinfo_t *pip)
4217 {
4218 dev_info_t *dip = NULL;
4219 mdi_phci_t *ph;
4220
4221 if (pip) {
4222 ph = MDI_PI(pip)->pi_phci;
4223 if (ph)
4224 dip = ph->ph_dip;
4225 }
4226 return (dip);
4227 }
4228
4229 /*
4230 * mdi_pi_get_client_private():
4231 * Get the client private information associated with the
4232 * mdi_pathinfo node
4233 */
4234 void *
mdi_pi_get_client_private(mdi_pathinfo_t * pip)4235 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
4236 {
4237 void *cprivate = NULL;
4238 if (pip) {
4239 cprivate = MDI_PI(pip)->pi_cprivate;
4240 }
4241 return (cprivate);
4242 }
4243
4244 /*
4245 * mdi_pi_set_client_private():
4246 * Set the client private information in the mdi_pathinfo node
4247 */
4248 void
mdi_pi_set_client_private(mdi_pathinfo_t * pip,void * priv)4249 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
4250 {
4251 if (pip) {
4252 MDI_PI(pip)->pi_cprivate = priv;
4253 }
4254 }
4255
4256 /*
4257 * mdi_pi_get_phci_private():
4258 * Get the pHCI private information associated with the
4259 * mdi_pathinfo node
4260 */
4261 caddr_t
mdi_pi_get_phci_private(mdi_pathinfo_t * pip)4262 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
4263 {
4264 caddr_t pprivate = NULL;
4265
4266 if (pip) {
4267 pprivate = MDI_PI(pip)->pi_pprivate;
4268 }
4269 return (pprivate);
4270 }
4271
4272 /*
4273 * mdi_pi_set_phci_private():
4274 * Set the pHCI private information in the mdi_pathinfo node
4275 */
4276 void
mdi_pi_set_phci_private(mdi_pathinfo_t * pip,caddr_t priv)4277 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
4278 {
4279 if (pip) {
4280 MDI_PI(pip)->pi_pprivate = priv;
4281 }
4282 }
4283
4284 /*
4285 * mdi_pi_get_state():
4286 * Get the mdi_pathinfo node state. Transient states are internal
4287 * and not provided to the users
4288 */
4289 mdi_pathinfo_state_t
mdi_pi_get_state(mdi_pathinfo_t * pip)4290 mdi_pi_get_state(mdi_pathinfo_t *pip)
4291 {
4292 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT;
4293
4294 if (pip) {
4295 if (MDI_PI_IS_TRANSIENT(pip)) {
4296 /*
4297 * mdi_pathinfo is in state transition. Return the
4298 * last good state.
4299 */
4300 state = MDI_PI_OLD_STATE(pip);
4301 } else {
4302 state = MDI_PI_STATE(pip);
4303 }
4304 }
4305 return (state);
4306 }
4307
4308 /*
4309 * mdi_pi_get_flags():
4310 * Get the mdi_pathinfo node flags.
4311 */
4312 uint_t
mdi_pi_get_flags(mdi_pathinfo_t * pip)4313 mdi_pi_get_flags(mdi_pathinfo_t *pip)
4314 {
4315 return (pip ? MDI_PI(pip)->pi_flags : 0);
4316 }
4317
4318 /*
4319 * Note that the following function needs to be the new interface for
4320 * mdi_pi_get_state when mpxio gets integrated to ON.
4321 */
4322 int
mdi_pi_get_state2(mdi_pathinfo_t * pip,mdi_pathinfo_state_t * state,uint32_t * ext_state)4323 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
4324 uint32_t *ext_state)
4325 {
4326 *state = MDI_PATHINFO_STATE_INIT;
4327
4328 if (pip) {
4329 if (MDI_PI_IS_TRANSIENT(pip)) {
4330 /*
4331 * mdi_pathinfo is in state transition. Return the
4332 * last good state.
4333 */
4334 *state = MDI_PI_OLD_STATE(pip);
4335 *ext_state = MDI_PI_OLD_EXT_STATE(pip);
4336 } else {
4337 *state = MDI_PI_STATE(pip);
4338 *ext_state = MDI_PI_EXT_STATE(pip);
4339 }
4340 }
4341 return (MDI_SUCCESS);
4342 }
4343
4344 /*
4345 * mdi_pi_get_preferred:
4346 * Get the preferred path flag
4347 */
4348 int
mdi_pi_get_preferred(mdi_pathinfo_t * pip)4349 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
4350 {
4351 if (pip) {
4352 return (MDI_PI(pip)->pi_preferred);
4353 }
4354 return (0);
4355 }
4356
4357 /*
4358 * mdi_pi_set_preferred:
4359 * Set the preferred path flag
4360 */
4361 void
mdi_pi_set_preferred(mdi_pathinfo_t * pip,int preferred)4362 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
4363 {
4364 if (pip) {
4365 MDI_PI(pip)->pi_preferred = preferred;
4366 }
4367 }
4368
4369 /*
4370 * mdi_pi_set_state():
4371 * Set the mdi_pathinfo node state
4372 */
4373 void
mdi_pi_set_state(mdi_pathinfo_t * pip,mdi_pathinfo_state_t state)4374 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
4375 {
4376 uint32_t ext_state;
4377
4378 if (pip) {
4379 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
4380 MDI_PI(pip)->pi_state = state;
4381 MDI_PI(pip)->pi_state |= ext_state;
4382
4383 /* Path has changed state, invalidate DINFOCACHE snap shot. */
4384 i_ddi_di_cache_invalidate();
4385 }
4386 }
4387
4388 /*
4389 * Property functions:
4390 */
4391 int
i_map_nvlist_error_to_mdi(int val)4392 i_map_nvlist_error_to_mdi(int val)
4393 {
4394 int rv;
4395
4396 switch (val) {
4397 case 0:
4398 rv = DDI_PROP_SUCCESS;
4399 break;
4400 case EINVAL:
4401 case ENOTSUP:
4402 rv = DDI_PROP_INVAL_ARG;
4403 break;
4404 case ENOMEM:
4405 rv = DDI_PROP_NO_MEMORY;
4406 break;
4407 default:
4408 rv = DDI_PROP_NOT_FOUND;
4409 break;
4410 }
4411 return (rv);
4412 }
4413
4414 /*
4415 * mdi_pi_get_next_prop():
4416 * Property walk function. The caller should hold mdi_pi_lock()
4417 * and release by calling mdi_pi_unlock() at the end of walk to
4418 * get a consistent value.
4419 */
4420 nvpair_t *
mdi_pi_get_next_prop(mdi_pathinfo_t * pip,nvpair_t * prev)4421 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
4422 {
4423 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4424 return (NULL);
4425 }
4426 ASSERT(MDI_PI_LOCKED(pip));
4427 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
4428 }
4429
4430 /*
4431 * mdi_prop_remove():
4432 * Remove the named property from the named list.
4433 */
4434 int
mdi_prop_remove(mdi_pathinfo_t * pip,char * name)4435 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
4436 {
4437 if (pip == NULL) {
4438 return (DDI_PROP_NOT_FOUND);
4439 }
4440 ASSERT(!MDI_PI_LOCKED(pip));
4441 MDI_PI_LOCK(pip);
4442 if (MDI_PI(pip)->pi_prop == NULL) {
4443 MDI_PI_UNLOCK(pip);
4444 return (DDI_PROP_NOT_FOUND);
4445 }
4446 if (name) {
4447 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
4448 } else {
4449 char nvp_name[MAXNAMELEN];
4450 nvpair_t *nvp;
4451 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
4452 while (nvp) {
4453 nvpair_t *next;
4454 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
4455 (void) snprintf(nvp_name, sizeof(nvp_name), "%s",
4456 nvpair_name(nvp));
4457 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
4458 nvp_name);
4459 nvp = next;
4460 }
4461 }
4462 MDI_PI_UNLOCK(pip);
4463 return (DDI_PROP_SUCCESS);
4464 }
4465
4466 /*
4467 * mdi_prop_size():
4468 * Get buffer size needed to pack the property data.
4469 * Caller should hold the mdi_pathinfo_t lock to get a consistent
4470 * buffer size.
4471 */
4472 int
mdi_prop_size(mdi_pathinfo_t * pip,size_t * buflenp)4473 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
4474 {
4475 int rv;
4476 size_t bufsize;
4477
4478 *buflenp = 0;
4479 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4480 return (DDI_PROP_NOT_FOUND);
4481 }
4482 ASSERT(MDI_PI_LOCKED(pip));
4483 rv = nvlist_size(MDI_PI(pip)->pi_prop,
4484 &bufsize, NV_ENCODE_NATIVE);
4485 *buflenp = bufsize;
4486 return (i_map_nvlist_error_to_mdi(rv));
4487 }
4488
4489 /*
4490 * mdi_prop_pack():
4491 * pack the property list. The caller should hold the
4492 * mdi_pathinfo_t node to get a consistent data
4493 */
4494 int
mdi_prop_pack(mdi_pathinfo_t * pip,char ** bufp,uint_t buflen)4495 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
4496 {
4497 int rv;
4498 size_t bufsize;
4499
4500 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
4501 return (DDI_PROP_NOT_FOUND);
4502 }
4503
4504 ASSERT(MDI_PI_LOCKED(pip));
4505
4506 bufsize = buflen;
4507 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
4508 NV_ENCODE_NATIVE, KM_SLEEP);
4509
4510 return (i_map_nvlist_error_to_mdi(rv));
4511 }
4512
4513 /*
4514 * mdi_prop_update_byte():
4515 * Create/Update a byte property
4516 */
4517 int
mdi_prop_update_byte(mdi_pathinfo_t * pip,char * name,uchar_t data)4518 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
4519 {
4520 int rv;
4521
4522 if (pip == NULL) {
4523 return (DDI_PROP_INVAL_ARG);
4524 }
4525 ASSERT(!MDI_PI_LOCKED(pip));
4526 MDI_PI_LOCK(pip);
4527 if (MDI_PI(pip)->pi_prop == NULL) {
4528 MDI_PI_UNLOCK(pip);
4529 return (DDI_PROP_NOT_FOUND);
4530 }
4531 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
4532 MDI_PI_UNLOCK(pip);
4533 return (i_map_nvlist_error_to_mdi(rv));
4534 }
4535
4536 /*
4537 * mdi_prop_update_byte_array():
4538 * Create/Update a byte array property
4539 */
4540 int
mdi_prop_update_byte_array(mdi_pathinfo_t * pip,char * name,uchar_t * data,uint_t nelements)4541 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
4542 uint_t nelements)
4543 {
4544 int rv;
4545
4546 if (pip == NULL) {
4547 return (DDI_PROP_INVAL_ARG);
4548 }
4549 ASSERT(!MDI_PI_LOCKED(pip));
4550 MDI_PI_LOCK(pip);
4551 if (MDI_PI(pip)->pi_prop == NULL) {
4552 MDI_PI_UNLOCK(pip);
4553 return (DDI_PROP_NOT_FOUND);
4554 }
4555 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
4556 MDI_PI_UNLOCK(pip);
4557 return (i_map_nvlist_error_to_mdi(rv));
4558 }
4559
4560 /*
4561 * mdi_prop_update_int():
4562 * Create/Update a 32 bit integer property
4563 */
4564 int
mdi_prop_update_int(mdi_pathinfo_t * pip,char * name,int data)4565 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
4566 {
4567 int rv;
4568
4569 if (pip == NULL) {
4570 return (DDI_PROP_INVAL_ARG);
4571 }
4572 ASSERT(!MDI_PI_LOCKED(pip));
4573 MDI_PI_LOCK(pip);
4574 if (MDI_PI(pip)->pi_prop == NULL) {
4575 MDI_PI_UNLOCK(pip);
4576 return (DDI_PROP_NOT_FOUND);
4577 }
4578 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
4579 MDI_PI_UNLOCK(pip);
4580 return (i_map_nvlist_error_to_mdi(rv));
4581 }
4582
4583 /*
4584 * mdi_prop_update_int64():
4585 * Create/Update a 64 bit integer property
4586 */
4587 int
mdi_prop_update_int64(mdi_pathinfo_t * pip,char * name,int64_t data)4588 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
4589 {
4590 int rv;
4591
4592 if (pip == NULL) {
4593 return (DDI_PROP_INVAL_ARG);
4594 }
4595 ASSERT(!MDI_PI_LOCKED(pip));
4596 MDI_PI_LOCK(pip);
4597 if (MDI_PI(pip)->pi_prop == NULL) {
4598 MDI_PI_UNLOCK(pip);
4599 return (DDI_PROP_NOT_FOUND);
4600 }
4601 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
4602 MDI_PI_UNLOCK(pip);
4603 return (i_map_nvlist_error_to_mdi(rv));
4604 }
4605
4606 /*
4607 * mdi_prop_update_int_array():
4608 * Create/Update a int array property
4609 */
4610 int
mdi_prop_update_int_array(mdi_pathinfo_t * pip,char * name,int * data,uint_t nelements)4611 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
4612 uint_t nelements)
4613 {
4614 int rv;
4615
4616 if (pip == NULL) {
4617 return (DDI_PROP_INVAL_ARG);
4618 }
4619 ASSERT(!MDI_PI_LOCKED(pip));
4620 MDI_PI_LOCK(pip);
4621 if (MDI_PI(pip)->pi_prop == NULL) {
4622 MDI_PI_UNLOCK(pip);
4623 return (DDI_PROP_NOT_FOUND);
4624 }
4625 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4626 nelements);
4627 MDI_PI_UNLOCK(pip);
4628 return (i_map_nvlist_error_to_mdi(rv));
4629 }
4630
4631 /*
4632 * mdi_prop_update_string():
4633 * Create/Update a string property
4634 */
4635 int
mdi_prop_update_string(mdi_pathinfo_t * pip,char * name,char * data)4636 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4637 {
4638 int rv;
4639
4640 if (pip == NULL) {
4641 return (DDI_PROP_INVAL_ARG);
4642 }
4643 ASSERT(!MDI_PI_LOCKED(pip));
4644 MDI_PI_LOCK(pip);
4645 if (MDI_PI(pip)->pi_prop == NULL) {
4646 MDI_PI_UNLOCK(pip);
4647 return (DDI_PROP_NOT_FOUND);
4648 }
4649 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4650 MDI_PI_UNLOCK(pip);
4651 return (i_map_nvlist_error_to_mdi(rv));
4652 }
4653
4654 /*
4655 * mdi_prop_update_string_array():
4656 * Create/Update a string array property
4657 */
4658 int
mdi_prop_update_string_array(mdi_pathinfo_t * pip,char * name,char ** data,uint_t nelements)4659 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4660 uint_t nelements)
4661 {
4662 int rv;
4663
4664 if (pip == NULL) {
4665 return (DDI_PROP_INVAL_ARG);
4666 }
4667 ASSERT(!MDI_PI_LOCKED(pip));
4668 MDI_PI_LOCK(pip);
4669 if (MDI_PI(pip)->pi_prop == NULL) {
4670 MDI_PI_UNLOCK(pip);
4671 return (DDI_PROP_NOT_FOUND);
4672 }
4673 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4674 nelements);
4675 MDI_PI_UNLOCK(pip);
4676 return (i_map_nvlist_error_to_mdi(rv));
4677 }
4678
4679 /*
4680 * mdi_prop_lookup_byte():
4681 * Look for byte property identified by name. The data returned
4682 * is the actual property and valid as long as mdi_pathinfo_t node
4683 * is alive.
4684 */
4685 int
mdi_prop_lookup_byte(mdi_pathinfo_t * pip,char * name,uchar_t * data)4686 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4687 {
4688 int rv;
4689
4690 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4691 return (DDI_PROP_NOT_FOUND);
4692 }
4693 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4694 return (i_map_nvlist_error_to_mdi(rv));
4695 }
4696
4697
4698 /*
4699 * mdi_prop_lookup_byte_array():
4700 * Look for byte array property identified by name. The data
4701 * returned is the actual property and valid as long as
4702 * mdi_pathinfo_t node is alive.
4703 */
4704 int
mdi_prop_lookup_byte_array(mdi_pathinfo_t * pip,char * name,uchar_t ** data,uint_t * nelements)4705 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4706 uint_t *nelements)
4707 {
4708 int rv;
4709
4710 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4711 return (DDI_PROP_NOT_FOUND);
4712 }
4713 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4714 nelements);
4715 return (i_map_nvlist_error_to_mdi(rv));
4716 }
4717
4718 /*
4719 * mdi_prop_lookup_int():
4720 * Look for int property identified by name. The data returned
4721 * is the actual property and valid as long as mdi_pathinfo_t
4722 * node is alive.
4723 */
4724 int
mdi_prop_lookup_int(mdi_pathinfo_t * pip,char * name,int * data)4725 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4726 {
4727 int rv;
4728
4729 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4730 return (DDI_PROP_NOT_FOUND);
4731 }
4732 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4733 return (i_map_nvlist_error_to_mdi(rv));
4734 }
4735
4736 /*
4737 * mdi_prop_lookup_int64():
4738 * Look for int64 property identified by name. The data returned
4739 * is the actual property and valid as long as mdi_pathinfo_t node
4740 * is alive.
4741 */
4742 int
mdi_prop_lookup_int64(mdi_pathinfo_t * pip,char * name,int64_t * data)4743 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4744 {
4745 int rv;
4746 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4747 return (DDI_PROP_NOT_FOUND);
4748 }
4749 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4750 return (i_map_nvlist_error_to_mdi(rv));
4751 }
4752
4753 /*
4754 * mdi_prop_lookup_int_array():
4755 * Look for int array property identified by name. The data
4756 * returned is the actual property and valid as long as
4757 * mdi_pathinfo_t node is alive.
4758 */
4759 int
mdi_prop_lookup_int_array(mdi_pathinfo_t * pip,char * name,int ** data,uint_t * nelements)4760 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4761 uint_t *nelements)
4762 {
4763 int rv;
4764
4765 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4766 return (DDI_PROP_NOT_FOUND);
4767 }
4768 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4769 (int32_t **)data, nelements);
4770 return (i_map_nvlist_error_to_mdi(rv));
4771 }
4772
4773 /*
4774 * mdi_prop_lookup_string():
4775 * Look for string property identified by name. The data
4776 * returned is the actual property and valid as long as
4777 * mdi_pathinfo_t node is alive.
4778 */
4779 int
mdi_prop_lookup_string(mdi_pathinfo_t * pip,char * name,char ** data)4780 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4781 {
4782 int rv;
4783
4784 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4785 return (DDI_PROP_NOT_FOUND);
4786 }
4787 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4788 return (i_map_nvlist_error_to_mdi(rv));
4789 }
4790
4791 /*
4792 * mdi_prop_lookup_string_array():
4793 * Look for string array property identified by name. The data
4794 * returned is the actual property and valid as long as
4795 * mdi_pathinfo_t node is alive.
4796 */
4797 int
mdi_prop_lookup_string_array(mdi_pathinfo_t * pip,char * name,char *** data,uint_t * nelements)4798 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4799 uint_t *nelements)
4800 {
4801 int rv;
4802
4803 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4804 return (DDI_PROP_NOT_FOUND);
4805 }
4806 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4807 nelements);
4808 return (i_map_nvlist_error_to_mdi(rv));
4809 }
4810
4811 /*
4812 * mdi_prop_free():
4813 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4814 * functions return the pointer to actual property data and not a
4815 * copy of it. So the data returned is valid as long as
4816 * mdi_pathinfo_t node is valid.
4817 */
4818 /*ARGSUSED*/
4819 int
mdi_prop_free(void * data)4820 mdi_prop_free(void *data)
4821 {
4822 return (DDI_PROP_SUCCESS);
4823 }
4824
4825 /*ARGSUSED*/
4826 static void
i_mdi_report_path_state(mdi_client_t * ct,mdi_pathinfo_t * pip)4827 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4828 {
4829 char *ct_path;
4830 char *ct_status;
4831 char *status;
4832 dev_info_t *cdip = ct->ct_dip;
4833 char lb_buf[64];
4834 int report_lb_c = 0, report_lb_p = 0;
4835
4836 ASSERT(MDI_CLIENT_LOCKED(ct));
4837 if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) ||
4838 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4839 return;
4840 }
4841 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4842 ct_status = "optimal";
4843 report_lb_c = 1;
4844 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4845 ct_status = "degraded";
4846 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4847 ct_status = "failed";
4848 } else {
4849 ct_status = "unknown";
4850 }
4851
4852 lb_buf[0] = 0; /* not interested in load balancing config */
4853
4854 if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) {
4855 status = "removed";
4856 } else if (MDI_PI_IS_OFFLINE(pip)) {
4857 status = "offline";
4858 } else if (MDI_PI_IS_ONLINE(pip)) {
4859 status = "online";
4860 report_lb_p = 1;
4861 } else if (MDI_PI_IS_STANDBY(pip)) {
4862 status = "standby";
4863 } else if (MDI_PI_IS_FAULT(pip)) {
4864 status = "faulted";
4865 } else {
4866 status = "unknown";
4867 }
4868
4869 if (cdip) {
4870 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4871
4872 /*
4873 * NOTE: Keeping "multipath status: %s" and
4874 * "Load balancing: %s" format unchanged in case someone
4875 * scrubs /var/adm/messages looking for these messages.
4876 */
4877 if (report_lb_c && report_lb_p) {
4878 if (ct->ct_lb == LOAD_BALANCE_LBA) {
4879 (void) snprintf(lb_buf, sizeof (lb_buf),
4880 "%s, region-size: %d", mdi_load_balance_lba,
4881 ct->ct_lb_args->region_size);
4882 } else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4883 (void) snprintf(lb_buf, sizeof (lb_buf),
4884 "%s", mdi_load_balance_none);
4885 } else {
4886 (void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4887 mdi_load_balance_rr);
4888 }
4889
4890 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4891 "?%s (%s%d) multipath status: %s: "
4892 "path %d %s is %s: Load balancing: %s\n",
4893 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4894 ddi_get_instance(cdip), ct_status,
4895 mdi_pi_get_path_instance(pip),
4896 mdi_pi_spathname(pip), status, lb_buf);
4897 } else {
4898 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4899 "?%s (%s%d) multipath status: %s: "
4900 "path %d %s is %s\n",
4901 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4902 ddi_get_instance(cdip), ct_status,
4903 mdi_pi_get_path_instance(pip),
4904 mdi_pi_spathname(pip), status);
4905 }
4906
4907 kmem_free(ct_path, MAXPATHLEN);
4908 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4909 }
4910 }
4911
4912 #ifdef DEBUG
4913 /*
4914 * i_mdi_log():
4915 * Utility function for error message management
4916 *
4917 * NOTE: Implementation takes care of trailing \n for cmn_err,
4918 * MDI_DEBUG should not terminate fmt strings with \n.
4919 *
4920 * NOTE: If the level is >= 2, and there is no leading !?^
4921 * then a leading ! is implied (but can be overriden via
4922 * mdi_debug_consoleonly). If you are using kmdb on the console,
4923 * consider setting mdi_debug_consoleonly to 1 as an aid.
4924 */
4925 /*PRINTFLIKE4*/
4926 static void
i_mdi_log(int level,const char * func,dev_info_t * dip,const char * fmt,...)4927 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...)
4928 {
4929 char name[MAXNAMELEN];
4930 char buf[512];
4931 char *bp;
4932 va_list ap;
4933 int log_only = 0;
4934 int boot_only = 0;
4935 int console_only = 0;
4936
4937 if (dip) {
4938 (void) snprintf(name, sizeof(name), "%s%d: ",
4939 ddi_driver_name(dip), ddi_get_instance(dip));
4940 } else {
4941 name[0] = 0;
4942 }
4943
4944 va_start(ap, fmt);
4945 (void) vsnprintf(buf, sizeof(buf), fmt, ap);
4946 va_end(ap);
4947
4948 switch (buf[0]) {
4949 case '!':
4950 bp = &buf[1];
4951 log_only = 1;
4952 break;
4953 case '?':
4954 bp = &buf[1];
4955 boot_only = 1;
4956 break;
4957 case '^':
4958 bp = &buf[1];
4959 console_only = 1;
4960 break;
4961 default:
4962 if (level >= 2)
4963 log_only = 1; /* ! implied */
4964 bp = buf;
4965 break;
4966 }
4967 if (mdi_debug_logonly) {
4968 log_only = 1;
4969 boot_only = 0;
4970 console_only = 0;
4971 }
4972 if (mdi_debug_consoleonly) {
4973 log_only = 0;
4974 boot_only = 0;
4975 console_only = 1;
4976 level = CE_NOTE;
4977 goto console;
4978 }
4979
4980 switch (level) {
4981 case CE_NOTE:
4982 level = CE_CONT;
4983 /* FALLTHROUGH */
4984 case CE_CONT:
4985 if (boot_only) {
4986 cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp);
4987 } else if (console_only) {
4988 cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp);
4989 } else if (log_only) {
4990 cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp);
4991 } else {
4992 cmn_err(level, "mdi: %s%s: %s\n", name, func, bp);
4993 }
4994 break;
4995
4996 case CE_WARN:
4997 case CE_PANIC:
4998 console:
4999 if (boot_only) {
5000 cmn_err(level, "?mdi: %s%s: %s", name, func, bp);
5001 } else if (console_only) {
5002 cmn_err(level, "^mdi: %s%s: %s", name, func, bp);
5003 } else if (log_only) {
5004 cmn_err(level, "!mdi: %s%s: %s", name, func, bp);
5005 } else {
5006 cmn_err(level, "mdi: %s%s: %s", name, func, bp);
5007 }
5008 break;
5009 default:
5010 cmn_err(level, "mdi: %s%s", name, bp);
5011 break;
5012 }
5013 }
5014 #endif /* DEBUG */
5015
5016 void
i_mdi_client_online(dev_info_t * ct_dip)5017 i_mdi_client_online(dev_info_t *ct_dip)
5018 {
5019 mdi_client_t *ct;
5020
5021 /*
5022 * Client online notification. Mark client state as online
5023 * restore our binding with dev_info node
5024 */
5025 ct = i_devi_get_client(ct_dip);
5026 ASSERT(ct != NULL);
5027 MDI_CLIENT_LOCK(ct);
5028 MDI_CLIENT_SET_ONLINE(ct);
5029 /* catch for any memory leaks */
5030 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
5031 ct->ct_dip = ct_dip;
5032
5033 if (ct->ct_power_cnt == 0)
5034 (void) i_mdi_power_all_phci(ct);
5035
5036 MDI_DEBUG(4, (MDI_NOTE, ct_dip,
5037 "i_mdi_pm_hold_client %p", (void *)ct));
5038 i_mdi_pm_hold_client(ct, 1);
5039
5040 MDI_CLIENT_UNLOCK(ct);
5041 }
5042
5043 void
i_mdi_phci_online(dev_info_t * ph_dip)5044 i_mdi_phci_online(dev_info_t *ph_dip)
5045 {
5046 mdi_phci_t *ph;
5047
5048 /* pHCI online notification. Mark state accordingly */
5049 ph = i_devi_get_phci(ph_dip);
5050 ASSERT(ph != NULL);
5051 MDI_PHCI_LOCK(ph);
5052 MDI_PHCI_SET_ONLINE(ph);
5053 MDI_PHCI_UNLOCK(ph);
5054 }
5055
5056 /*
5057 * mdi_devi_online():
5058 * Online notification from NDI framework on pHCI/client
5059 * device online.
5060 * Return Values:
5061 * NDI_SUCCESS
5062 * MDI_FAILURE
5063 */
5064 /*ARGSUSED*/
5065 int
mdi_devi_online(dev_info_t * dip,uint_t flags)5066 mdi_devi_online(dev_info_t *dip, uint_t flags)
5067 {
5068 if (MDI_PHCI(dip)) {
5069 i_mdi_phci_online(dip);
5070 }
5071
5072 if (MDI_CLIENT(dip)) {
5073 i_mdi_client_online(dip);
5074 }
5075 return (NDI_SUCCESS);
5076 }
5077
5078 /*
5079 * mdi_devi_offline():
5080 * Offline notification from NDI framework on pHCI/Client device
5081 * offline.
5082 *
5083 * Return Values:
5084 * NDI_SUCCESS
5085 * NDI_FAILURE
5086 */
5087 /*ARGSUSED*/
5088 int
mdi_devi_offline(dev_info_t * dip,uint_t flags)5089 mdi_devi_offline(dev_info_t *dip, uint_t flags)
5090 {
5091 int rv = NDI_SUCCESS;
5092
5093 if (MDI_CLIENT(dip)) {
5094 rv = i_mdi_client_offline(dip, flags);
5095 if (rv != NDI_SUCCESS)
5096 return (rv);
5097 }
5098
5099 if (MDI_PHCI(dip)) {
5100 rv = i_mdi_phci_offline(dip, flags);
5101
5102 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
5103 /* set client back online */
5104 i_mdi_client_online(dip);
5105 }
5106 }
5107
5108 return (rv);
5109 }
5110
5111 /*ARGSUSED*/
5112 static int
i_mdi_phci_offline(dev_info_t * dip,uint_t flags)5113 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
5114 {
5115 int rv = NDI_SUCCESS;
5116 mdi_phci_t *ph;
5117 mdi_client_t *ct;
5118 mdi_pathinfo_t *pip;
5119 mdi_pathinfo_t *next;
5120 mdi_pathinfo_t *failed_pip = NULL;
5121 dev_info_t *cdip;
5122
5123 /*
5124 * pHCI component offline notification
5125 * Make sure that this pHCI instance is free to be offlined.
5126 * If it is OK to proceed, Offline and remove all the child
5127 * mdi_pathinfo nodes. This process automatically offlines
5128 * corresponding client devices, for which this pHCI provides
5129 * critical services.
5130 */
5131 ph = i_devi_get_phci(dip);
5132 MDI_DEBUG(2, (MDI_NOTE, dip,
5133 "called %p %p", (void *)dip, (void *)ph));
5134 if (ph == NULL) {
5135 return (rv);
5136 }
5137
5138 MDI_PHCI_LOCK(ph);
5139
5140 if (MDI_PHCI_IS_OFFLINE(ph)) {
5141 MDI_DEBUG(1, (MDI_WARN, dip,
5142 "!pHCI already offlined: %p", (void *)dip));
5143 MDI_PHCI_UNLOCK(ph);
5144 return (NDI_SUCCESS);
5145 }
5146
5147 /*
5148 * Check to see if the pHCI can be offlined
5149 */
5150 if (ph->ph_unstable) {
5151 MDI_DEBUG(1, (MDI_WARN, dip,
5152 "!One or more target devices are in transient state. "
5153 "This device can not be removed at this moment. "
5154 "Please try again later."));
5155 MDI_PHCI_UNLOCK(ph);
5156 return (NDI_BUSY);
5157 }
5158
5159 pip = ph->ph_path_head;
5160 while (pip != NULL) {
5161 MDI_PI_LOCK(pip);
5162 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5163
5164 /*
5165 * The mdi_pathinfo state is OK. Check the client state.
5166 * If failover in progress fail the pHCI from offlining
5167 */
5168 ct = MDI_PI(pip)->pi_client;
5169 i_mdi_client_lock(ct, pip);
5170 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5171 (ct->ct_unstable)) {
5172 /*
5173 * Failover is in progress, Fail the DR
5174 */
5175 MDI_DEBUG(1, (MDI_WARN, dip,
5176 "!pHCI device is busy. "
5177 "This device can not be removed at this moment. "
5178 "Please try again later."));
5179 MDI_PI_UNLOCK(pip);
5180 i_mdi_client_unlock(ct);
5181 MDI_PHCI_UNLOCK(ph);
5182 return (NDI_BUSY);
5183 }
5184 MDI_PI_UNLOCK(pip);
5185
5186 /*
5187 * Check to see of we are removing the last path of this
5188 * client device...
5189 */
5190 cdip = ct->ct_dip;
5191 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5192 (i_mdi_client_compute_state(ct, ph) ==
5193 MDI_CLIENT_STATE_FAILED)) {
5194 i_mdi_client_unlock(ct);
5195 MDI_PHCI_UNLOCK(ph);
5196 if (ndi_devi_offline(cdip,
5197 NDI_DEVFS_CLEAN) != NDI_SUCCESS) {
5198 /*
5199 * ndi_devi_offline() failed.
5200 * This pHCI provides the critical path
5201 * to one or more client devices.
5202 * Return busy.
5203 */
5204 MDI_PHCI_LOCK(ph);
5205 MDI_DEBUG(1, (MDI_WARN, dip,
5206 "!pHCI device is busy. "
5207 "This device can not be removed at this "
5208 "moment. Please try again later."));
5209 failed_pip = pip;
5210 break;
5211 } else {
5212 MDI_PHCI_LOCK(ph);
5213 pip = next;
5214 }
5215 } else {
5216 i_mdi_client_unlock(ct);
5217 pip = next;
5218 }
5219 }
5220
5221 if (failed_pip) {
5222 pip = ph->ph_path_head;
5223 while (pip != failed_pip) {
5224 MDI_PI_LOCK(pip);
5225 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5226 ct = MDI_PI(pip)->pi_client;
5227 i_mdi_client_lock(ct, pip);
5228 cdip = ct->ct_dip;
5229 switch (MDI_CLIENT_STATE(ct)) {
5230 case MDI_CLIENT_STATE_OPTIMAL:
5231 case MDI_CLIENT_STATE_DEGRADED:
5232 if (cdip) {
5233 MDI_PI_UNLOCK(pip);
5234 i_mdi_client_unlock(ct);
5235 MDI_PHCI_UNLOCK(ph);
5236 (void) ndi_devi_online(cdip, 0);
5237 MDI_PHCI_LOCK(ph);
5238 pip = next;
5239 continue;
5240 }
5241 break;
5242
5243 case MDI_CLIENT_STATE_FAILED:
5244 if (cdip) {
5245 MDI_PI_UNLOCK(pip);
5246 i_mdi_client_unlock(ct);
5247 MDI_PHCI_UNLOCK(ph);
5248 (void) ndi_devi_offline(cdip,
5249 NDI_DEVFS_CLEAN);
5250 MDI_PHCI_LOCK(ph);
5251 pip = next;
5252 continue;
5253 }
5254 break;
5255 }
5256 MDI_PI_UNLOCK(pip);
5257 i_mdi_client_unlock(ct);
5258 pip = next;
5259 }
5260 MDI_PHCI_UNLOCK(ph);
5261 return (NDI_BUSY);
5262 }
5263
5264 /*
5265 * Mark the pHCI as offline
5266 */
5267 MDI_PHCI_SET_OFFLINE(ph);
5268
5269 /*
5270 * Mark the child mdi_pathinfo nodes as transient
5271 */
5272 pip = ph->ph_path_head;
5273 while (pip != NULL) {
5274 MDI_PI_LOCK(pip);
5275 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5276 MDI_PI_SET_OFFLINING(pip);
5277 MDI_PI_UNLOCK(pip);
5278 pip = next;
5279 }
5280 MDI_PHCI_UNLOCK(ph);
5281 /*
5282 * Give a chance for any pending commands to execute
5283 */
5284 delay_random(mdi_delay);
5285 MDI_PHCI_LOCK(ph);
5286 pip = ph->ph_path_head;
5287 while (pip != NULL) {
5288 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5289 (void) i_mdi_pi_offline(pip, flags);
5290 MDI_PI_LOCK(pip);
5291 ct = MDI_PI(pip)->pi_client;
5292 if (!MDI_PI_IS_OFFLINE(pip)) {
5293 MDI_DEBUG(1, (MDI_WARN, dip,
5294 "!pHCI device is busy. "
5295 "This device can not be removed at this moment. "
5296 "Please try again later."));
5297 MDI_PI_UNLOCK(pip);
5298 MDI_PHCI_SET_ONLINE(ph);
5299 MDI_PHCI_UNLOCK(ph);
5300 return (NDI_BUSY);
5301 }
5302 MDI_PI_UNLOCK(pip);
5303 pip = next;
5304 }
5305 MDI_PHCI_UNLOCK(ph);
5306
5307 return (rv);
5308 }
5309
5310 void
mdi_phci_mark_retiring(dev_info_t * dip,char ** cons_array)5311 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
5312 {
5313 mdi_phci_t *ph;
5314 mdi_client_t *ct;
5315 mdi_pathinfo_t *pip;
5316 mdi_pathinfo_t *next;
5317 dev_info_t *cdip;
5318
5319 if (!MDI_PHCI(dip))
5320 return;
5321
5322 ph = i_devi_get_phci(dip);
5323 if (ph == NULL) {
5324 return;
5325 }
5326
5327 MDI_PHCI_LOCK(ph);
5328
5329 if (MDI_PHCI_IS_OFFLINE(ph)) {
5330 /* has no last path */
5331 MDI_PHCI_UNLOCK(ph);
5332 return;
5333 }
5334
5335 pip = ph->ph_path_head;
5336 while (pip != NULL) {
5337 MDI_PI_LOCK(pip);
5338 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5339
5340 ct = MDI_PI(pip)->pi_client;
5341 i_mdi_client_lock(ct, pip);
5342 MDI_PI_UNLOCK(pip);
5343
5344 cdip = ct->ct_dip;
5345 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5346 (i_mdi_client_compute_state(ct, ph) ==
5347 MDI_CLIENT_STATE_FAILED)) {
5348 /* Last path. Mark client dip as retiring */
5349 i_mdi_client_unlock(ct);
5350 MDI_PHCI_UNLOCK(ph);
5351 (void) e_ddi_mark_retiring(cdip, cons_array);
5352 MDI_PHCI_LOCK(ph);
5353 pip = next;
5354 } else {
5355 i_mdi_client_unlock(ct);
5356 pip = next;
5357 }
5358 }
5359
5360 MDI_PHCI_UNLOCK(ph);
5361
5362 return;
5363 }
5364
5365 void
mdi_phci_retire_notify(dev_info_t * dip,int * constraint)5366 mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
5367 {
5368 mdi_phci_t *ph;
5369 mdi_client_t *ct;
5370 mdi_pathinfo_t *pip;
5371 mdi_pathinfo_t *next;
5372 dev_info_t *cdip;
5373
5374 if (!MDI_PHCI(dip))
5375 return;
5376
5377 ph = i_devi_get_phci(dip);
5378 if (ph == NULL)
5379 return;
5380
5381 MDI_PHCI_LOCK(ph);
5382
5383 if (MDI_PHCI_IS_OFFLINE(ph)) {
5384 MDI_PHCI_UNLOCK(ph);
5385 /* not last path */
5386 return;
5387 }
5388
5389 if (ph->ph_unstable) {
5390 MDI_PHCI_UNLOCK(ph);
5391 /* can't check for constraints */
5392 *constraint = 0;
5393 return;
5394 }
5395
5396 pip = ph->ph_path_head;
5397 while (pip != NULL) {
5398 MDI_PI_LOCK(pip);
5399 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5400
5401 /*
5402 * The mdi_pathinfo state is OK. Check the client state.
5403 * If failover in progress fail the pHCI from offlining
5404 */
5405 ct = MDI_PI(pip)->pi_client;
5406 i_mdi_client_lock(ct, pip);
5407 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5408 (ct->ct_unstable)) {
5409 /*
5410 * Failover is in progress, can't check for constraints
5411 */
5412 MDI_PI_UNLOCK(pip);
5413 i_mdi_client_unlock(ct);
5414 MDI_PHCI_UNLOCK(ph);
5415 *constraint = 0;
5416 return;
5417 }
5418 MDI_PI_UNLOCK(pip);
5419
5420 /*
5421 * Check to see of we are retiring the last path of this
5422 * client device...
5423 */
5424 cdip = ct->ct_dip;
5425 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5426 (i_mdi_client_compute_state(ct, ph) ==
5427 MDI_CLIENT_STATE_FAILED)) {
5428 i_mdi_client_unlock(ct);
5429 MDI_PHCI_UNLOCK(ph);
5430 (void) e_ddi_retire_notify(cdip, constraint);
5431 MDI_PHCI_LOCK(ph);
5432 pip = next;
5433 } else {
5434 i_mdi_client_unlock(ct);
5435 pip = next;
5436 }
5437 }
5438
5439 MDI_PHCI_UNLOCK(ph);
5440
5441 return;
5442 }
5443
5444 /*
5445 * offline the path(s) hanging off the pHCI. If the
5446 * last path to any client, check that constraints
5447 * have been applied.
5448 *
5449 * If constraint is 0, we aren't going to retire the
5450 * pHCI. However we still need to go through the paths
5451 * calling e_ddi_retire_finalize() to clear their
5452 * contract barriers.
5453 */
5454 void
mdi_phci_retire_finalize(dev_info_t * dip,int phci_only,void * constraint)5455 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only, void *constraint)
5456 {
5457 mdi_phci_t *ph;
5458 mdi_client_t *ct;
5459 mdi_pathinfo_t *pip;
5460 mdi_pathinfo_t *next;
5461 dev_info_t *cdip;
5462 int unstable = 0;
5463 int tmp_constraint;
5464
5465 if (!MDI_PHCI(dip))
5466 return;
5467
5468 ph = i_devi_get_phci(dip);
5469 if (ph == NULL) {
5470 /* no last path and no pips */
5471 return;
5472 }
5473
5474 MDI_PHCI_LOCK(ph);
5475
5476 if (MDI_PHCI_IS_OFFLINE(ph)) {
5477 MDI_PHCI_UNLOCK(ph);
5478 /* no last path and no pips */
5479 return;
5480 }
5481
5482 /*
5483 * Check to see if the pHCI can be offlined
5484 */
5485 if (ph->ph_unstable) {
5486 unstable = 1;
5487 }
5488
5489 pip = ph->ph_path_head;
5490 while (pip != NULL) {
5491 MDI_PI_LOCK(pip);
5492 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5493
5494 /*
5495 * if failover in progress fail the pHCI from offlining
5496 */
5497 ct = MDI_PI(pip)->pi_client;
5498 i_mdi_client_lock(ct, pip);
5499 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5500 (ct->ct_unstable)) {
5501 unstable = 1;
5502 }
5503 MDI_PI_UNLOCK(pip);
5504
5505 /*
5506 * Check to see of we are removing the last path of this
5507 * client device...
5508 */
5509 cdip = ct->ct_dip;
5510 if (!phci_only && cdip &&
5511 (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5512 (i_mdi_client_compute_state(ct, ph) ==
5513 MDI_CLIENT_STATE_FAILED)) {
5514 i_mdi_client_unlock(ct);
5515 MDI_PHCI_UNLOCK(ph);
5516 /*
5517 * This is the last path to this client.
5518 *
5519 * Constraint will only be set to 1 if this client can
5520 * be retired (as already determined by
5521 * mdi_phci_retire_notify). However we don't actually
5522 * need to retire the client (we just retire the last
5523 * path - MPXIO will then fail all I/Os to the client).
5524 * But we still need to call e_ddi_retire_finalize so
5525 * the contract barriers can be cleared. Therefore we
5526 * temporarily set constraint = 0 so that the client
5527 * dip is not retired.
5528 */
5529 tmp_constraint = 0;
5530 (void) e_ddi_retire_finalize(cdip, &tmp_constraint);
5531 MDI_PHCI_LOCK(ph);
5532 pip = next;
5533 } else {
5534 i_mdi_client_unlock(ct);
5535 pip = next;
5536 }
5537 }
5538
5539 if (!phci_only && *((int *)constraint) == 0) {
5540 MDI_PHCI_UNLOCK(ph);
5541 return;
5542 }
5543
5544 /*
5545 * Cannot offline pip(s)
5546 */
5547 if (unstable) {
5548 cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: "
5549 "pHCI in transient state, cannot retire",
5550 ddi_driver_name(dip), ddi_get_instance(dip));
5551 MDI_PHCI_UNLOCK(ph);
5552 return;
5553 }
5554
5555 /*
5556 * Mark the pHCI as offline
5557 */
5558 MDI_PHCI_SET_OFFLINE(ph);
5559
5560 /*
5561 * Mark the child mdi_pathinfo nodes as transient
5562 */
5563 pip = ph->ph_path_head;
5564 while (pip != NULL) {
5565 MDI_PI_LOCK(pip);
5566 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5567 MDI_PI_SET_OFFLINING(pip);
5568 MDI_PI_UNLOCK(pip);
5569 pip = next;
5570 }
5571 MDI_PHCI_UNLOCK(ph);
5572 /*
5573 * Give a chance for any pending commands to execute
5574 */
5575 delay_random(mdi_delay);
5576 MDI_PHCI_LOCK(ph);
5577 pip = ph->ph_path_head;
5578 while (pip != NULL) {
5579 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5580 (void) i_mdi_pi_offline(pip, 0);
5581 MDI_PI_LOCK(pip);
5582 ct = MDI_PI(pip)->pi_client;
5583 if (!MDI_PI_IS_OFFLINE(pip)) {
5584 cmn_err(CE_WARN, "mdi_phci_retire_finalize: "
5585 "path %d %s busy, cannot offline",
5586 mdi_pi_get_path_instance(pip),
5587 mdi_pi_spathname(pip));
5588 MDI_PI_UNLOCK(pip);
5589 MDI_PHCI_SET_ONLINE(ph);
5590 MDI_PHCI_UNLOCK(ph);
5591 return;
5592 }
5593 MDI_PI_UNLOCK(pip);
5594 pip = next;
5595 }
5596 MDI_PHCI_UNLOCK(ph);
5597
5598 return;
5599 }
5600
5601 void
mdi_phci_unretire(dev_info_t * dip)5602 mdi_phci_unretire(dev_info_t *dip)
5603 {
5604 mdi_phci_t *ph;
5605 mdi_pathinfo_t *pip;
5606 mdi_pathinfo_t *next;
5607
5608 ASSERT(MDI_PHCI(dip));
5609
5610 /*
5611 * Online the phci
5612 */
5613 i_mdi_phci_online(dip);
5614
5615 ph = i_devi_get_phci(dip);
5616 MDI_PHCI_LOCK(ph);
5617 pip = ph->ph_path_head;
5618 while (pip != NULL) {
5619 MDI_PI_LOCK(pip);
5620 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5621 MDI_PI_UNLOCK(pip);
5622 (void) i_mdi_pi_online(pip, 0);
5623 pip = next;
5624 }
5625 MDI_PHCI_UNLOCK(ph);
5626 }
5627
5628 /*ARGSUSED*/
5629 static int
i_mdi_client_offline(dev_info_t * dip,uint_t flags)5630 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
5631 {
5632 int rv = NDI_SUCCESS;
5633 mdi_client_t *ct;
5634
5635 /*
5636 * Client component to go offline. Make sure that we are
5637 * not in failing over state and update client state
5638 * accordingly
5639 */
5640 ct = i_devi_get_client(dip);
5641 MDI_DEBUG(2, (MDI_NOTE, dip,
5642 "called %p %p", (void *)dip, (void *)ct));
5643 if (ct != NULL) {
5644 MDI_CLIENT_LOCK(ct);
5645 if (ct->ct_unstable) {
5646 /*
5647 * One or more paths are in transient state,
5648 * Dont allow offline of a client device
5649 */
5650 MDI_DEBUG(1, (MDI_WARN, dip,
5651 "!One or more paths to "
5652 "this device are in transient state. "
5653 "This device can not be removed at this moment. "
5654 "Please try again later."));
5655 MDI_CLIENT_UNLOCK(ct);
5656 return (NDI_BUSY);
5657 }
5658 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
5659 /*
5660 * Failover is in progress, Dont allow DR of
5661 * a client device
5662 */
5663 MDI_DEBUG(1, (MDI_WARN, dip,
5664 "!Client device is Busy. "
5665 "This device can not be removed at this moment. "
5666 "Please try again later."));
5667 MDI_CLIENT_UNLOCK(ct);
5668 return (NDI_BUSY);
5669 }
5670 MDI_CLIENT_SET_OFFLINE(ct);
5671
5672 /*
5673 * Unbind our relationship with the dev_info node
5674 */
5675 if (flags & NDI_DEVI_REMOVE) {
5676 ct->ct_dip = NULL;
5677 }
5678 MDI_CLIENT_UNLOCK(ct);
5679 }
5680 return (rv);
5681 }
5682
5683 /*
5684 * mdi_pre_attach():
5685 * Pre attach() notification handler
5686 */
5687 /*ARGSUSED*/
5688 int
mdi_pre_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)5689 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5690 {
5691 /* don't support old DDI_PM_RESUME */
5692 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
5693 (cmd == DDI_PM_RESUME))
5694 return (DDI_FAILURE);
5695
5696 return (DDI_SUCCESS);
5697 }
5698
5699 /*
5700 * mdi_post_attach():
5701 * Post attach() notification handler
5702 */
5703 /*ARGSUSED*/
5704 void
mdi_post_attach(dev_info_t * dip,ddi_attach_cmd_t cmd,int error)5705 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
5706 {
5707 mdi_phci_t *ph;
5708 mdi_client_t *ct;
5709 mdi_vhci_t *vh;
5710
5711 if (MDI_PHCI(dip)) {
5712 ph = i_devi_get_phci(dip);
5713 ASSERT(ph != NULL);
5714
5715 MDI_PHCI_LOCK(ph);
5716 switch (cmd) {
5717 case DDI_ATTACH:
5718 MDI_DEBUG(2, (MDI_NOTE, dip,
5719 "phci post_attach called %p", (void *)ph));
5720 if (error == DDI_SUCCESS) {
5721 MDI_PHCI_SET_ATTACH(ph);
5722 } else {
5723 MDI_DEBUG(1, (MDI_NOTE, dip,
5724 "!pHCI post_attach failed: error %d",
5725 error));
5726 MDI_PHCI_SET_DETACH(ph);
5727 }
5728 break;
5729
5730 case DDI_RESUME:
5731 MDI_DEBUG(2, (MDI_NOTE, dip,
5732 "pHCI post_resume: called %p", (void *)ph));
5733 if (error == DDI_SUCCESS) {
5734 MDI_PHCI_SET_RESUME(ph);
5735 } else {
5736 MDI_DEBUG(1, (MDI_NOTE, dip,
5737 "!pHCI post_resume failed: error %d",
5738 error));
5739 MDI_PHCI_SET_SUSPEND(ph);
5740 }
5741 break;
5742 }
5743 MDI_PHCI_UNLOCK(ph);
5744 }
5745
5746 if (MDI_CLIENT(dip)) {
5747 ct = i_devi_get_client(dip);
5748 ASSERT(ct != NULL);
5749
5750 MDI_CLIENT_LOCK(ct);
5751 switch (cmd) {
5752 case DDI_ATTACH:
5753 MDI_DEBUG(2, (MDI_NOTE, dip,
5754 "client post_attach called %p", (void *)ct));
5755 if (error != DDI_SUCCESS) {
5756 MDI_DEBUG(1, (MDI_NOTE, dip,
5757 "!client post_attach failed: error %d",
5758 error));
5759 MDI_CLIENT_SET_DETACH(ct);
5760 MDI_DEBUG(4, (MDI_WARN, dip,
5761 "i_mdi_pm_reset_client"));
5762 i_mdi_pm_reset_client(ct);
5763 break;
5764 }
5765
5766 /*
5767 * Client device has successfully attached, inform
5768 * the vhci.
5769 */
5770 vh = ct->ct_vhci;
5771 if (vh->vh_ops->vo_client_attached)
5772 (*vh->vh_ops->vo_client_attached)(dip);
5773
5774 MDI_CLIENT_SET_ATTACH(ct);
5775 break;
5776
5777 case DDI_RESUME:
5778 MDI_DEBUG(2, (MDI_NOTE, dip,
5779 "client post_attach: called %p", (void *)ct));
5780 if (error == DDI_SUCCESS) {
5781 MDI_CLIENT_SET_RESUME(ct);
5782 } else {
5783 MDI_DEBUG(1, (MDI_NOTE, dip,
5784 "!client post_resume failed: error %d",
5785 error));
5786 MDI_CLIENT_SET_SUSPEND(ct);
5787 }
5788 break;
5789 }
5790 MDI_CLIENT_UNLOCK(ct);
5791 }
5792 }
5793
5794 /*
5795 * mdi_pre_detach():
5796 * Pre detach notification handler
5797 */
5798 /*ARGSUSED*/
5799 int
mdi_pre_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)5800 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5801 {
5802 int rv = DDI_SUCCESS;
5803
5804 if (MDI_CLIENT(dip)) {
5805 (void) i_mdi_client_pre_detach(dip, cmd);
5806 }
5807
5808 if (MDI_PHCI(dip)) {
5809 rv = i_mdi_phci_pre_detach(dip, cmd);
5810 }
5811
5812 return (rv);
5813 }
5814
5815 /*ARGSUSED*/
5816 static int
i_mdi_phci_pre_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)5817 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5818 {
5819 int rv = DDI_SUCCESS;
5820 mdi_phci_t *ph;
5821 mdi_client_t *ct;
5822 mdi_pathinfo_t *pip;
5823 mdi_pathinfo_t *failed_pip = NULL;
5824 mdi_pathinfo_t *next;
5825
5826 ph = i_devi_get_phci(dip);
5827 if (ph == NULL) {
5828 return (rv);
5829 }
5830
5831 MDI_PHCI_LOCK(ph);
5832 switch (cmd) {
5833 case DDI_DETACH:
5834 MDI_DEBUG(2, (MDI_NOTE, dip,
5835 "pHCI pre_detach: called %p", (void *)ph));
5836 if (!MDI_PHCI_IS_OFFLINE(ph)) {
5837 /*
5838 * mdi_pathinfo nodes are still attached to
5839 * this pHCI. Fail the detach for this pHCI.
5840 */
5841 MDI_DEBUG(2, (MDI_WARN, dip,
5842 "pHCI pre_detach: paths are still attached %p",
5843 (void *)ph));
5844 rv = DDI_FAILURE;
5845 break;
5846 }
5847 MDI_PHCI_SET_DETACH(ph);
5848 break;
5849
5850 case DDI_SUSPEND:
5851 /*
5852 * pHCI is getting suspended. Since mpxio client
5853 * devices may not be suspended at this point, to avoid
5854 * a potential stack overflow, it is important to suspend
5855 * client devices before pHCI can be suspended.
5856 */
5857
5858 MDI_DEBUG(2, (MDI_NOTE, dip,
5859 "pHCI pre_suspend: called %p", (void *)ph));
5860 /*
5861 * Suspend all the client devices accessible through this pHCI
5862 */
5863 pip = ph->ph_path_head;
5864 while (pip != NULL && rv == DDI_SUCCESS) {
5865 dev_info_t *cdip;
5866 MDI_PI_LOCK(pip);
5867 next =
5868 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5869 ct = MDI_PI(pip)->pi_client;
5870 i_mdi_client_lock(ct, pip);
5871 cdip = ct->ct_dip;
5872 MDI_PI_UNLOCK(pip);
5873 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
5874 MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
5875 i_mdi_client_unlock(ct);
5876 if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
5877 DDI_SUCCESS) {
5878 /*
5879 * Suspend of one of the client
5880 * device has failed.
5881 */
5882 MDI_DEBUG(1, (MDI_WARN, dip,
5883 "!suspend of device (%s%d) failed.",
5884 ddi_driver_name(cdip),
5885 ddi_get_instance(cdip)));
5886 failed_pip = pip;
5887 break;
5888 }
5889 } else {
5890 i_mdi_client_unlock(ct);
5891 }
5892 pip = next;
5893 }
5894
5895 if (rv == DDI_SUCCESS) {
5896 /*
5897 * Suspend of client devices is complete. Proceed
5898 * with pHCI suspend.
5899 */
5900 MDI_PHCI_SET_SUSPEND(ph);
5901 } else {
5902 /*
5903 * Revert back all the suspended client device states
5904 * to converse.
5905 */
5906 pip = ph->ph_path_head;
5907 while (pip != failed_pip) {
5908 dev_info_t *cdip;
5909 MDI_PI_LOCK(pip);
5910 next =
5911 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5912 ct = MDI_PI(pip)->pi_client;
5913 i_mdi_client_lock(ct, pip);
5914 cdip = ct->ct_dip;
5915 MDI_PI_UNLOCK(pip);
5916 if (MDI_CLIENT_IS_SUSPENDED(ct)) {
5917 i_mdi_client_unlock(ct);
5918 (void) devi_attach(cdip, DDI_RESUME);
5919 } else {
5920 i_mdi_client_unlock(ct);
5921 }
5922 pip = next;
5923 }
5924 }
5925 break;
5926
5927 default:
5928 rv = DDI_FAILURE;
5929 break;
5930 }
5931 MDI_PHCI_UNLOCK(ph);
5932 return (rv);
5933 }
5934
5935 /*ARGSUSED*/
5936 static int
i_mdi_client_pre_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)5937 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5938 {
5939 int rv = DDI_SUCCESS;
5940 mdi_client_t *ct;
5941
5942 ct = i_devi_get_client(dip);
5943 if (ct == NULL) {
5944 return (rv);
5945 }
5946
5947 MDI_CLIENT_LOCK(ct);
5948 switch (cmd) {
5949 case DDI_DETACH:
5950 MDI_DEBUG(2, (MDI_NOTE, dip,
5951 "client pre_detach: called %p",
5952 (void *)ct));
5953 MDI_CLIENT_SET_DETACH(ct);
5954 break;
5955
5956 case DDI_SUSPEND:
5957 MDI_DEBUG(2, (MDI_NOTE, dip,
5958 "client pre_suspend: called %p",
5959 (void *)ct));
5960 MDI_CLIENT_SET_SUSPEND(ct);
5961 break;
5962
5963 default:
5964 rv = DDI_FAILURE;
5965 break;
5966 }
5967 MDI_CLIENT_UNLOCK(ct);
5968 return (rv);
5969 }
5970
5971 /*
5972 * mdi_post_detach():
5973 * Post detach notification handler
5974 */
5975 /*ARGSUSED*/
5976 void
mdi_post_detach(dev_info_t * dip,ddi_detach_cmd_t cmd,int error)5977 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5978 {
5979 /*
5980 * Detach/Suspend of mpxio component failed. Update our state
5981 * too
5982 */
5983 if (MDI_PHCI(dip))
5984 i_mdi_phci_post_detach(dip, cmd, error);
5985
5986 if (MDI_CLIENT(dip))
5987 i_mdi_client_post_detach(dip, cmd, error);
5988 }
5989
5990 /*ARGSUSED*/
5991 static void
i_mdi_phci_post_detach(dev_info_t * dip,ddi_detach_cmd_t cmd,int error)5992 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5993 {
5994 mdi_phci_t *ph;
5995
5996 /*
5997 * Detach/Suspend of phci component failed. Update our state
5998 * too
5999 */
6000 ph = i_devi_get_phci(dip);
6001 if (ph == NULL) {
6002 return;
6003 }
6004
6005 MDI_PHCI_LOCK(ph);
6006 /*
6007 * Detach of pHCI failed. Restore back converse
6008 * state
6009 */
6010 switch (cmd) {
6011 case DDI_DETACH:
6012 MDI_DEBUG(2, (MDI_NOTE, dip,
6013 "pHCI post_detach: called %p",
6014 (void *)ph));
6015 if (error != DDI_SUCCESS)
6016 MDI_PHCI_SET_ATTACH(ph);
6017 break;
6018
6019 case DDI_SUSPEND:
6020 MDI_DEBUG(2, (MDI_NOTE, dip,
6021 "pHCI post_suspend: called %p",
6022 (void *)ph));
6023 if (error != DDI_SUCCESS)
6024 MDI_PHCI_SET_RESUME(ph);
6025 break;
6026 }
6027 MDI_PHCI_UNLOCK(ph);
6028 }
6029
6030 /*ARGSUSED*/
6031 static void
i_mdi_client_post_detach(dev_info_t * dip,ddi_detach_cmd_t cmd,int error)6032 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
6033 {
6034 mdi_client_t *ct;
6035
6036 ct = i_devi_get_client(dip);
6037 if (ct == NULL) {
6038 return;
6039 }
6040 MDI_CLIENT_LOCK(ct);
6041 /*
6042 * Detach of Client failed. Restore back converse
6043 * state
6044 */
6045 switch (cmd) {
6046 case DDI_DETACH:
6047 MDI_DEBUG(2, (MDI_NOTE, dip,
6048 "client post_detach: called %p", (void *)ct));
6049 if (DEVI_IS_ATTACHING(dip)) {
6050 MDI_DEBUG(4, (MDI_NOTE, dip,
6051 "i_mdi_pm_rele_client\n"));
6052 i_mdi_pm_rele_client(ct, ct->ct_path_count);
6053 } else {
6054 MDI_DEBUG(4, (MDI_NOTE, dip,
6055 "i_mdi_pm_reset_client\n"));
6056 i_mdi_pm_reset_client(ct);
6057 }
6058 if (error != DDI_SUCCESS)
6059 MDI_CLIENT_SET_ATTACH(ct);
6060 break;
6061
6062 case DDI_SUSPEND:
6063 MDI_DEBUG(2, (MDI_NOTE, dip,
6064 "called %p", (void *)ct));
6065 if (error != DDI_SUCCESS)
6066 MDI_CLIENT_SET_RESUME(ct);
6067 break;
6068 }
6069 MDI_CLIENT_UNLOCK(ct);
6070 }
6071
6072 int
mdi_pi_kstat_exists(mdi_pathinfo_t * pip)6073 mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
6074 {
6075 return (MDI_PI(pip)->pi_kstats ? 1 : 0);
6076 }
6077
6078 /*
6079 * create and install per-path (client - pHCI) statistics
6080 * I/O stats supported: nread, nwritten, reads, and writes
6081 * Error stats - hard errors, soft errors, & transport errors
6082 */
6083 int
mdi_pi_kstat_create(mdi_pathinfo_t * pip,char * ksname)6084 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
6085 {
6086 kstat_t *kiosp, *kerrsp;
6087 struct pi_errs *nsp;
6088 struct mdi_pi_kstats *mdi_statp;
6089
6090 if (MDI_PI(pip)->pi_kstats != NULL)
6091 return (MDI_SUCCESS);
6092
6093 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
6094 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
6095 return (MDI_FAILURE);
6096 }
6097
6098 (void) strcat(ksname, ",err");
6099 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
6100 KSTAT_TYPE_NAMED,
6101 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
6102 if (kerrsp == NULL) {
6103 kstat_delete(kiosp);
6104 return (MDI_FAILURE);
6105 }
6106
6107 nsp = (struct pi_errs *)kerrsp->ks_data;
6108 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
6109 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
6110 kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
6111 KSTAT_DATA_UINT32);
6112 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
6113 KSTAT_DATA_UINT32);
6114 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
6115 KSTAT_DATA_UINT32);
6116 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
6117 KSTAT_DATA_UINT32);
6118 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
6119 KSTAT_DATA_UINT32);
6120 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
6121 KSTAT_DATA_UINT32);
6122 kstat_named_init(&nsp->pi_failedfrom, "Failed From",
6123 KSTAT_DATA_UINT32);
6124 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
6125
6126 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
6127 mdi_statp->pi_kstat_ref = 1;
6128 mdi_statp->pi_kstat_iostats = kiosp;
6129 mdi_statp->pi_kstat_errstats = kerrsp;
6130 kstat_install(kiosp);
6131 kstat_install(kerrsp);
6132 MDI_PI(pip)->pi_kstats = mdi_statp;
6133 return (MDI_SUCCESS);
6134 }
6135
6136 /*
6137 * destroy per-path properties
6138 */
6139 static void
i_mdi_pi_kstat_destroy(mdi_pathinfo_t * pip)6140 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
6141 {
6142
6143 struct mdi_pi_kstats *mdi_statp;
6144
6145 if (MDI_PI(pip)->pi_kstats == NULL)
6146 return;
6147 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
6148 return;
6149
6150 MDI_PI(pip)->pi_kstats = NULL;
6151
6152 /*
6153 * the kstat may be shared between multiple pathinfo nodes
6154 * decrement this pathinfo's usage, removing the kstats
6155 * themselves when the last pathinfo reference is removed.
6156 */
6157 ASSERT(mdi_statp->pi_kstat_ref > 0);
6158 if (--mdi_statp->pi_kstat_ref != 0)
6159 return;
6160
6161 kstat_delete(mdi_statp->pi_kstat_iostats);
6162 kstat_delete(mdi_statp->pi_kstat_errstats);
6163 kmem_free(mdi_statp, sizeof (*mdi_statp));
6164 }
6165
6166 /*
6167 * update I/O paths KSTATS
6168 */
6169 void
mdi_pi_kstat_iosupdate(mdi_pathinfo_t * pip,struct buf * bp)6170 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
6171 {
6172 kstat_t *iostatp;
6173 size_t xfer_cnt;
6174
6175 ASSERT(pip != NULL);
6176
6177 /*
6178 * I/O can be driven across a path prior to having path
6179 * statistics available, i.e. probe(9e).
6180 */
6181 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
6182 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
6183 xfer_cnt = bp->b_bcount - bp->b_resid;
6184 if (bp->b_flags & B_READ) {
6185 KSTAT_IO_PTR(iostatp)->reads++;
6186 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
6187 } else {
6188 KSTAT_IO_PTR(iostatp)->writes++;
6189 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
6190 }
6191 }
6192 }
6193
6194 /*
6195 * Enable the path(specific client/target/initiator)
6196 * Enabling a path means that MPxIO may select the enabled path for routing
6197 * future I/O requests, subject to other path state constraints.
6198 */
6199 int
mdi_pi_enable_path(mdi_pathinfo_t * pip,int flags)6200 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
6201 {
6202 mdi_phci_t *ph;
6203
6204 ph = MDI_PI(pip)->pi_phci;
6205 if (ph == NULL) {
6206 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6207 "!failed: path %s %p: NULL ph",
6208 mdi_pi_spathname(pip), (void *)pip));
6209 return (MDI_FAILURE);
6210 }
6211
6212 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
6213 MDI_ENABLE_OP);
6214 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6215 "!returning success pip = %p. ph = %p",
6216 (void *)pip, (void *)ph));
6217 return (MDI_SUCCESS);
6218
6219 }
6220
6221 /*
6222 * Disable the path (specific client/target/initiator)
6223 * Disabling a path means that MPxIO will not select the disabled path for
6224 * routing any new I/O requests.
6225 */
6226 int
mdi_pi_disable_path(mdi_pathinfo_t * pip,int flags)6227 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
6228 {
6229 mdi_phci_t *ph;
6230
6231 ph = MDI_PI(pip)->pi_phci;
6232 if (ph == NULL) {
6233 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6234 "!failed: path %s %p: NULL ph",
6235 mdi_pi_spathname(pip), (void *)pip));
6236 return (MDI_FAILURE);
6237 }
6238
6239 (void) i_mdi_enable_disable_path(pip,
6240 ph->ph_vhci, flags, MDI_DISABLE_OP);
6241 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6242 "!returning success pip = %p. ph = %p",
6243 (void *)pip, (void *)ph));
6244 return (MDI_SUCCESS);
6245 }
6246
6247 /*
6248 * disable the path to a particular pHCI (pHCI specified in the phci_path
6249 * argument) for a particular client (specified in the client_path argument).
6250 * Disabling a path means that MPxIO will not select the disabled path for
6251 * routing any new I/O requests.
6252 * NOTE: this will be removed once the NWS files are changed to use the new
6253 * mdi_{enable,disable}_path interfaces
6254 */
6255 int
mdi_pi_disable(dev_info_t * cdip,dev_info_t * pdip,int flags)6256 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6257 {
6258 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
6259 }
6260
6261 /*
6262 * Enable the path to a particular pHCI (pHCI specified in the phci_path
6263 * argument) for a particular client (specified in the client_path argument).
6264 * Enabling a path means that MPxIO may select the enabled path for routing
6265 * future I/O requests, subject to other path state constraints.
6266 * NOTE: this will be removed once the NWS files are changed to use the new
6267 * mdi_{enable,disable}_path interfaces
6268 */
6269
6270 int
mdi_pi_enable(dev_info_t * cdip,dev_info_t * pdip,int flags)6271 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6272 {
6273 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
6274 }
6275
6276 /*
6277 * Common routine for doing enable/disable.
6278 */
6279 static mdi_pathinfo_t *
i_mdi_enable_disable_path(mdi_pathinfo_t * pip,mdi_vhci_t * vh,int flags,int op)6280 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
6281 int op)
6282 {
6283 int sync_flag = 0;
6284 int rv;
6285 mdi_pathinfo_t *next;
6286 int (*f)() = NULL;
6287
6288 /*
6289 * Check to make sure the path is not already in the
6290 * requested state. If it is just return the next path
6291 * as we have nothing to do here.
6292 */
6293 if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) ||
6294 (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) {
6295 MDI_PI_LOCK(pip);
6296 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6297 MDI_PI_UNLOCK(pip);
6298 return (next);
6299 }
6300
6301 f = vh->vh_ops->vo_pi_state_change;
6302
6303 sync_flag = (flags << 8) & 0xf00;
6304
6305 /*
6306 * Do a callback into the mdi consumer to let it
6307 * know that path is about to get enabled/disabled.
6308 */
6309 if (f != NULL) {
6310 rv = (*f)(vh->vh_dip, pip, 0,
6311 MDI_PI_EXT_STATE(pip),
6312 MDI_EXT_STATE_CHANGE | sync_flag |
6313 op | MDI_BEFORE_STATE_CHANGE);
6314 if (rv != MDI_SUCCESS) {
6315 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6316 "vo_pi_state_change: failed rv = %x", rv));
6317 }
6318 }
6319 MDI_PI_LOCK(pip);
6320 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6321
6322 switch (flags) {
6323 case USER_DISABLE:
6324 if (op == MDI_DISABLE_OP) {
6325 MDI_PI_SET_USER_DISABLE(pip);
6326 } else {
6327 MDI_PI_SET_USER_ENABLE(pip);
6328 }
6329 break;
6330 case DRIVER_DISABLE:
6331 if (op == MDI_DISABLE_OP) {
6332 MDI_PI_SET_DRV_DISABLE(pip);
6333 } else {
6334 MDI_PI_SET_DRV_ENABLE(pip);
6335 }
6336 break;
6337 case DRIVER_DISABLE_TRANSIENT:
6338 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
6339 MDI_PI_SET_DRV_DISABLE_TRANS(pip);
6340 } else {
6341 MDI_PI_SET_DRV_ENABLE_TRANS(pip);
6342 }
6343 break;
6344 }
6345 MDI_PI_UNLOCK(pip);
6346 /*
6347 * Do a callback into the mdi consumer to let it
6348 * know that path is now enabled/disabled.
6349 */
6350 if (f != NULL) {
6351 rv = (*f)(vh->vh_dip, pip, 0,
6352 MDI_PI_EXT_STATE(pip),
6353 MDI_EXT_STATE_CHANGE | sync_flag |
6354 op | MDI_AFTER_STATE_CHANGE);
6355 if (rv != MDI_SUCCESS) {
6356 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6357 "vo_pi_state_change failed: rv = %x", rv));
6358 }
6359 }
6360 return (next);
6361 }
6362
6363 /*
6364 * Common routine for doing enable/disable.
6365 * NOTE: this will be removed once the NWS files are changed to use the new
6366 * mdi_{enable,disable}_path has been putback
6367 */
6368 int
i_mdi_pi_enable_disable(dev_info_t * cdip,dev_info_t * pdip,int flags,int op)6369 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
6370 {
6371
6372 mdi_phci_t *ph;
6373 mdi_vhci_t *vh = NULL;
6374 mdi_client_t *ct;
6375 mdi_pathinfo_t *next, *pip;
6376 int found_it;
6377
6378 ph = i_devi_get_phci(pdip);
6379 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6380 "!op = %d pdip = %p cdip = %p", op, (void *)pdip,
6381 (void *)cdip));
6382 if (ph == NULL) {
6383 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6384 "!failed: operation %d: NULL ph", op));
6385 return (MDI_FAILURE);
6386 }
6387
6388 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
6389 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6390 "!failed: invalid operation %d", op));
6391 return (MDI_FAILURE);
6392 }
6393
6394 vh = ph->ph_vhci;
6395
6396 if (cdip == NULL) {
6397 /*
6398 * Need to mark the Phci as enabled/disabled.
6399 */
6400 MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip,
6401 "op %d for the phci", op));
6402 MDI_PHCI_LOCK(ph);
6403 switch (flags) {
6404 case USER_DISABLE:
6405 if (op == MDI_DISABLE_OP) {
6406 MDI_PHCI_SET_USER_DISABLE(ph);
6407 } else {
6408 MDI_PHCI_SET_USER_ENABLE(ph);
6409 }
6410 break;
6411 case DRIVER_DISABLE:
6412 if (op == MDI_DISABLE_OP) {
6413 MDI_PHCI_SET_DRV_DISABLE(ph);
6414 } else {
6415 MDI_PHCI_SET_DRV_ENABLE(ph);
6416 }
6417 break;
6418 case DRIVER_DISABLE_TRANSIENT:
6419 if (op == MDI_DISABLE_OP) {
6420 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
6421 } else {
6422 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
6423 }
6424 break;
6425 default:
6426 MDI_PHCI_UNLOCK(ph);
6427 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6428 "!invalid flag argument= %d", flags));
6429 }
6430
6431 /*
6432 * Phci has been disabled. Now try to enable/disable
6433 * path info's to each client.
6434 */
6435 pip = ph->ph_path_head;
6436 while (pip != NULL) {
6437 pip = i_mdi_enable_disable_path(pip, vh, flags, op);
6438 }
6439 MDI_PHCI_UNLOCK(ph);
6440 } else {
6441
6442 /*
6443 * Disable a specific client.
6444 */
6445 ct = i_devi_get_client(cdip);
6446 if (ct == NULL) {
6447 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6448 "!failed: operation = %d: NULL ct", op));
6449 return (MDI_FAILURE);
6450 }
6451
6452 MDI_CLIENT_LOCK(ct);
6453 pip = ct->ct_path_head;
6454 found_it = 0;
6455 while (pip != NULL) {
6456 MDI_PI_LOCK(pip);
6457 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6458 if (MDI_PI(pip)->pi_phci == ph) {
6459 MDI_PI_UNLOCK(pip);
6460 found_it = 1;
6461 break;
6462 }
6463 MDI_PI_UNLOCK(pip);
6464 pip = next;
6465 }
6466
6467
6468 MDI_CLIENT_UNLOCK(ct);
6469 if (found_it == 0) {
6470 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6471 "!failed. Could not find corresponding pip\n"));
6472 return (MDI_FAILURE);
6473 }
6474
6475 (void) i_mdi_enable_disable_path(pip, vh, flags, op);
6476 }
6477
6478 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6479 "!op %d returning success pdip = %p cdip = %p",
6480 op, (void *)pdip, (void *)cdip));
6481 return (MDI_SUCCESS);
6482 }
6483
6484 /*
6485 * Ensure phci powered up
6486 */
6487 static void
i_mdi_pm_hold_pip(mdi_pathinfo_t * pip)6488 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
6489 {
6490 dev_info_t *ph_dip;
6491
6492 ASSERT(pip != NULL);
6493 ASSERT(MDI_PI_LOCKED(pip));
6494
6495 if (MDI_PI(pip)->pi_pm_held) {
6496 return;
6497 }
6498
6499 ph_dip = mdi_pi_get_phci(pip);
6500 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6501 "%s %p", mdi_pi_spathname(pip), (void *)pip));
6502 if (ph_dip == NULL) {
6503 return;
6504 }
6505
6506 MDI_PI_UNLOCK(pip);
6507 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d",
6508 DEVI(ph_dip)->devi_pm_kidsupcnt));
6509 pm_hold_power(ph_dip);
6510 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d",
6511 DEVI(ph_dip)->devi_pm_kidsupcnt));
6512 MDI_PI_LOCK(pip);
6513
6514 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
6515 if (DEVI(ph_dip)->devi_pm_info)
6516 MDI_PI(pip)->pi_pm_held = 1;
6517 }
6518
6519 /*
6520 * Allow phci powered down
6521 */
6522 static void
i_mdi_pm_rele_pip(mdi_pathinfo_t * pip)6523 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
6524 {
6525 dev_info_t *ph_dip = NULL;
6526
6527 ASSERT(pip != NULL);
6528 ASSERT(MDI_PI_LOCKED(pip));
6529
6530 if (MDI_PI(pip)->pi_pm_held == 0) {
6531 return;
6532 }
6533
6534 ph_dip = mdi_pi_get_phci(pip);
6535 ASSERT(ph_dip != NULL);
6536
6537 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6538 "%s %p", mdi_pi_spathname(pip), (void *)pip));
6539
6540 MDI_PI_UNLOCK(pip);
6541 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6542 "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6543 pm_rele_power(ph_dip);
6544 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6545 "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6546 MDI_PI_LOCK(pip);
6547
6548 MDI_PI(pip)->pi_pm_held = 0;
6549 }
6550
6551 static void
i_mdi_pm_hold_client(mdi_client_t * ct,int incr)6552 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
6553 {
6554 ASSERT(MDI_CLIENT_LOCKED(ct));
6555
6556 ct->ct_power_cnt += incr;
6557 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6558 "%p ct_power_cnt = %d incr = %d",
6559 (void *)ct, ct->ct_power_cnt, incr));
6560 ASSERT(ct->ct_power_cnt >= 0);
6561 }
6562
6563 static void
i_mdi_rele_all_phci(mdi_client_t * ct)6564 i_mdi_rele_all_phci(mdi_client_t *ct)
6565 {
6566 mdi_pathinfo_t *pip;
6567
6568 ASSERT(MDI_CLIENT_LOCKED(ct));
6569 pip = (mdi_pathinfo_t *)ct->ct_path_head;
6570 while (pip != NULL) {
6571 mdi_hold_path(pip);
6572 MDI_PI_LOCK(pip);
6573 i_mdi_pm_rele_pip(pip);
6574 MDI_PI_UNLOCK(pip);
6575 mdi_rele_path(pip);
6576 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6577 }
6578 }
6579
6580 static void
i_mdi_pm_rele_client(mdi_client_t * ct,int decr)6581 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
6582 {
6583 ASSERT(MDI_CLIENT_LOCKED(ct));
6584
6585 if (i_ddi_devi_attached(ct->ct_dip)) {
6586 ct->ct_power_cnt -= decr;
6587 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6588 "%p ct_power_cnt = %d decr = %d",
6589 (void *)ct, ct->ct_power_cnt, decr));
6590 }
6591
6592 ASSERT(ct->ct_power_cnt >= 0);
6593 if (ct->ct_power_cnt == 0) {
6594 i_mdi_rele_all_phci(ct);
6595 return;
6596 }
6597 }
6598
6599 static void
i_mdi_pm_reset_client(mdi_client_t * ct)6600 i_mdi_pm_reset_client(mdi_client_t *ct)
6601 {
6602 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6603 "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt));
6604 ASSERT(MDI_CLIENT_LOCKED(ct));
6605 ct->ct_power_cnt = 0;
6606 i_mdi_rele_all_phci(ct);
6607 ct->ct_powercnt_config = 0;
6608 ct->ct_powercnt_unconfig = 0;
6609 ct->ct_powercnt_reset = 1;
6610 }
6611
6612 static int
i_mdi_power_one_phci(mdi_pathinfo_t * pip)6613 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
6614 {
6615 int ret;
6616 dev_info_t *ph_dip;
6617
6618 MDI_PI_LOCK(pip);
6619 i_mdi_pm_hold_pip(pip);
6620
6621 ph_dip = mdi_pi_get_phci(pip);
6622 MDI_PI_UNLOCK(pip);
6623
6624 /* bring all components of phci to full power */
6625 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6626 "pm_powerup for %s%d %p", ddi_driver_name(ph_dip),
6627 ddi_get_instance(ph_dip), (void *)pip));
6628
6629 ret = pm_powerup(ph_dip);
6630
6631 if (ret == DDI_FAILURE) {
6632 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6633 "pm_powerup FAILED for %s%d %p",
6634 ddi_driver_name(ph_dip), ddi_get_instance(ph_dip),
6635 (void *)pip));
6636
6637 MDI_PI_LOCK(pip);
6638 i_mdi_pm_rele_pip(pip);
6639 MDI_PI_UNLOCK(pip);
6640 return (MDI_FAILURE);
6641 }
6642
6643 return (MDI_SUCCESS);
6644 }
6645
6646 static int
i_mdi_power_all_phci(mdi_client_t * ct)6647 i_mdi_power_all_phci(mdi_client_t *ct)
6648 {
6649 mdi_pathinfo_t *pip;
6650 int succeeded = 0;
6651
6652 ASSERT(MDI_CLIENT_LOCKED(ct));
6653 pip = (mdi_pathinfo_t *)ct->ct_path_head;
6654 while (pip != NULL) {
6655 /*
6656 * Don't power if MDI_PATHINFO_STATE_FAULT
6657 * or MDI_PATHINFO_STATE_OFFLINE.
6658 */
6659 if (MDI_PI_IS_INIT(pip) ||
6660 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
6661 mdi_hold_path(pip);
6662 MDI_CLIENT_UNLOCK(ct);
6663 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
6664 succeeded = 1;
6665
6666 ASSERT(ct == MDI_PI(pip)->pi_client);
6667 MDI_CLIENT_LOCK(ct);
6668 mdi_rele_path(pip);
6669 }
6670 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6671 }
6672
6673 return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
6674 }
6675
6676 /*
6677 * mdi_bus_power():
6678 * 1. Place the phci(s) into powered up state so that
6679 * client can do power management
6680 * 2. Ensure phci powered up as client power managing
6681 * Return Values:
6682 * MDI_SUCCESS
6683 * MDI_FAILURE
6684 */
6685 int
mdi_bus_power(dev_info_t * parent,void * impl_arg,pm_bus_power_op_t op,void * arg,void * result)6686 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
6687 void *arg, void *result)
6688 {
6689 int ret = MDI_SUCCESS;
6690 pm_bp_child_pwrchg_t *bpc;
6691 mdi_client_t *ct;
6692 dev_info_t *cdip;
6693 pm_bp_has_changed_t *bphc;
6694
6695 /*
6696 * BUS_POWER_NOINVOL not supported
6697 */
6698 if (op == BUS_POWER_NOINVOL)
6699 return (MDI_FAILURE);
6700
6701 /*
6702 * ignore other OPs.
6703 * return quickly to save cou cycles on the ct processing
6704 */
6705 switch (op) {
6706 case BUS_POWER_PRE_NOTIFICATION:
6707 case BUS_POWER_POST_NOTIFICATION:
6708 bpc = (pm_bp_child_pwrchg_t *)arg;
6709 cdip = bpc->bpc_dip;
6710 break;
6711 case BUS_POWER_HAS_CHANGED:
6712 bphc = (pm_bp_has_changed_t *)arg;
6713 cdip = bphc->bphc_dip;
6714 break;
6715 default:
6716 return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
6717 }
6718
6719 ASSERT(MDI_CLIENT(cdip));
6720
6721 ct = i_devi_get_client(cdip);
6722 if (ct == NULL)
6723 return (MDI_FAILURE);
6724
6725 /*
6726 * wait till the mdi_pathinfo node state change are processed
6727 */
6728 MDI_CLIENT_LOCK(ct);
6729 switch (op) {
6730 case BUS_POWER_PRE_NOTIFICATION:
6731 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6732 "BUS_POWER_PRE_NOTIFICATION:"
6733 "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6734 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6735 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
6736
6737 /* serialize power level change per client */
6738 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6739 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6740
6741 MDI_CLIENT_SET_POWER_TRANSITION(ct);
6742
6743 if (ct->ct_power_cnt == 0) {
6744 ret = i_mdi_power_all_phci(ct);
6745 }
6746
6747 /*
6748 * if new_level > 0:
6749 * - hold phci(s)
6750 * - power up phci(s) if not already
6751 * ignore power down
6752 */
6753 if (bpc->bpc_nlevel > 0) {
6754 if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
6755 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6756 "i_mdi_pm_hold_client\n"));
6757 i_mdi_pm_hold_client(ct, ct->ct_path_count);
6758 }
6759 }
6760 break;
6761 case BUS_POWER_POST_NOTIFICATION:
6762 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6763 "BUS_POWER_POST_NOTIFICATION:"
6764 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d",
6765 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6766 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
6767 *(int *)result));
6768
6769 if (*(int *)result == DDI_SUCCESS) {
6770 if (bpc->bpc_nlevel > 0) {
6771 MDI_CLIENT_SET_POWER_UP(ct);
6772 } else {
6773 MDI_CLIENT_SET_POWER_DOWN(ct);
6774 }
6775 }
6776
6777 /* release the hold we did in pre-notification */
6778 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
6779 !DEVI_IS_ATTACHING(ct->ct_dip)) {
6780 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6781 "i_mdi_pm_rele_client\n"));
6782 i_mdi_pm_rele_client(ct, ct->ct_path_count);
6783 }
6784
6785 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
6786 /* another thread might started attaching */
6787 if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6788 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6789 "i_mdi_pm_rele_client\n"));
6790 i_mdi_pm_rele_client(ct, ct->ct_path_count);
6791 /* detaching has been taken care in pm_post_unconfig */
6792 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
6793 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6794 "i_mdi_pm_reset_client\n"));
6795 i_mdi_pm_reset_client(ct);
6796 }
6797 }
6798
6799 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
6800 cv_broadcast(&ct->ct_powerchange_cv);
6801
6802 break;
6803
6804 /* need to do more */
6805 case BUS_POWER_HAS_CHANGED:
6806 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6807 "BUS_POWER_HAS_CHANGED:"
6808 "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6809 ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
6810 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
6811
6812 if (bphc->bphc_nlevel > 0 &&
6813 bphc->bphc_nlevel > bphc->bphc_olevel) {
6814 if (ct->ct_power_cnt == 0) {
6815 ret = i_mdi_power_all_phci(ct);
6816 }
6817 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6818 "i_mdi_pm_hold_client\n"));
6819 i_mdi_pm_hold_client(ct, ct->ct_path_count);
6820 }
6821
6822 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
6823 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6824 "i_mdi_pm_rele_client\n"));
6825 i_mdi_pm_rele_client(ct, ct->ct_path_count);
6826 }
6827 break;
6828 }
6829
6830 MDI_CLIENT_UNLOCK(ct);
6831 return (ret);
6832 }
6833
6834 static int
i_mdi_pm_pre_config_one(dev_info_t * child)6835 i_mdi_pm_pre_config_one(dev_info_t *child)
6836 {
6837 int ret = MDI_SUCCESS;
6838 mdi_client_t *ct;
6839
6840 ct = i_devi_get_client(child);
6841 if (ct == NULL)
6842 return (MDI_FAILURE);
6843
6844 MDI_CLIENT_LOCK(ct);
6845 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6846 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6847
6848 if (!MDI_CLIENT_IS_FAILED(ct)) {
6849 MDI_CLIENT_UNLOCK(ct);
6850 MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n"));
6851 return (MDI_SUCCESS);
6852 }
6853
6854 if (ct->ct_powercnt_config) {
6855 MDI_CLIENT_UNLOCK(ct);
6856 MDI_DEBUG(4, (MDI_NOTE, child, "already held\n"));
6857 return (MDI_SUCCESS);
6858 }
6859
6860 if (ct->ct_power_cnt == 0) {
6861 ret = i_mdi_power_all_phci(ct);
6862 }
6863 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6864 i_mdi_pm_hold_client(ct, ct->ct_path_count);
6865 ct->ct_powercnt_config = 1;
6866 ct->ct_powercnt_reset = 0;
6867 MDI_CLIENT_UNLOCK(ct);
6868 return (ret);
6869 }
6870
6871 static int
i_mdi_pm_pre_config(dev_info_t * vdip,dev_info_t * child)6872 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
6873 {
6874 int ret = MDI_SUCCESS;
6875 dev_info_t *cdip;
6876 int circ;
6877
6878 ASSERT(MDI_VHCI(vdip));
6879
6880 /* ndi_devi_config_one */
6881 if (child) {
6882 ASSERT(DEVI_BUSY_OWNED(vdip));
6883 return (i_mdi_pm_pre_config_one(child));
6884 }
6885
6886 /* devi_config_common */
6887 ndi_devi_enter(vdip, &circ);
6888 cdip = ddi_get_child(vdip);
6889 while (cdip) {
6890 dev_info_t *next = ddi_get_next_sibling(cdip);
6891
6892 ret = i_mdi_pm_pre_config_one(cdip);
6893 if (ret != MDI_SUCCESS)
6894 break;
6895 cdip = next;
6896 }
6897 ndi_devi_exit(vdip, circ);
6898 return (ret);
6899 }
6900
6901 static int
i_mdi_pm_pre_unconfig_one(dev_info_t * child,int * held,int flags)6902 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
6903 {
6904 int ret = MDI_SUCCESS;
6905 mdi_client_t *ct;
6906
6907 ct = i_devi_get_client(child);
6908 if (ct == NULL)
6909 return (MDI_FAILURE);
6910
6911 MDI_CLIENT_LOCK(ct);
6912 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6913 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6914
6915 if (!i_ddi_devi_attached(child)) {
6916 MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n"));
6917 MDI_CLIENT_UNLOCK(ct);
6918 return (MDI_SUCCESS);
6919 }
6920
6921 if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6922 (flags & NDI_AUTODETACH)) {
6923 MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n"));
6924 MDI_CLIENT_UNLOCK(ct);
6925 return (MDI_FAILURE);
6926 }
6927
6928 if (ct->ct_powercnt_unconfig) {
6929 MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n"));
6930 MDI_CLIENT_UNLOCK(ct);
6931 *held = 1;
6932 return (MDI_SUCCESS);
6933 }
6934
6935 if (ct->ct_power_cnt == 0) {
6936 ret = i_mdi_power_all_phci(ct);
6937 }
6938 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6939 i_mdi_pm_hold_client(ct, ct->ct_path_count);
6940 ct->ct_powercnt_unconfig = 1;
6941 ct->ct_powercnt_reset = 0;
6942 MDI_CLIENT_UNLOCK(ct);
6943 if (ret == MDI_SUCCESS)
6944 *held = 1;
6945 return (ret);
6946 }
6947
6948 static int
i_mdi_pm_pre_unconfig(dev_info_t * vdip,dev_info_t * child,int * held,int flags)6949 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
6950 int flags)
6951 {
6952 int ret = MDI_SUCCESS;
6953 dev_info_t *cdip;
6954 int circ;
6955
6956 ASSERT(MDI_VHCI(vdip));
6957 *held = 0;
6958
6959 /* ndi_devi_unconfig_one */
6960 if (child) {
6961 ASSERT(DEVI_BUSY_OWNED(vdip));
6962 return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6963 }
6964
6965 /* devi_unconfig_common */
6966 ndi_devi_enter(vdip, &circ);
6967 cdip = ddi_get_child(vdip);
6968 while (cdip) {
6969 dev_info_t *next = ddi_get_next_sibling(cdip);
6970
6971 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6972 cdip = next;
6973 }
6974 ndi_devi_exit(vdip, circ);
6975
6976 if (*held)
6977 ret = MDI_SUCCESS;
6978
6979 return (ret);
6980 }
6981
6982 static void
i_mdi_pm_post_config_one(dev_info_t * child)6983 i_mdi_pm_post_config_one(dev_info_t *child)
6984 {
6985 mdi_client_t *ct;
6986
6987 ct = i_devi_get_client(child);
6988 if (ct == NULL)
6989 return;
6990
6991 MDI_CLIENT_LOCK(ct);
6992 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6993 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6994
6995 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6996 MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n"));
6997 MDI_CLIENT_UNLOCK(ct);
6998 return;
6999 }
7000
7001 /* client has not been updated */
7002 if (MDI_CLIENT_IS_FAILED(ct)) {
7003 MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n"));
7004 MDI_CLIENT_UNLOCK(ct);
7005 return;
7006 }
7007
7008 /* another thread might have powered it down or detached it */
7009 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
7010 !DEVI_IS_ATTACHING(child)) ||
7011 (!i_ddi_devi_attached(child) &&
7012 !DEVI_IS_ATTACHING(child))) {
7013 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7014 i_mdi_pm_reset_client(ct);
7015 } else {
7016 mdi_pathinfo_t *pip, *next;
7017 int valid_path_count = 0;
7018
7019 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7020 pip = ct->ct_path_head;
7021 while (pip != NULL) {
7022 MDI_PI_LOCK(pip);
7023 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7024 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7025 valid_path_count ++;
7026 MDI_PI_UNLOCK(pip);
7027 pip = next;
7028 }
7029 i_mdi_pm_rele_client(ct, valid_path_count);
7030 }
7031 ct->ct_powercnt_config = 0;
7032 MDI_CLIENT_UNLOCK(ct);
7033 }
7034
7035 static void
i_mdi_pm_post_config(dev_info_t * vdip,dev_info_t * child)7036 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
7037 {
7038 int circ;
7039 dev_info_t *cdip;
7040
7041 ASSERT(MDI_VHCI(vdip));
7042
7043 /* ndi_devi_config_one */
7044 if (child) {
7045 ASSERT(DEVI_BUSY_OWNED(vdip));
7046 i_mdi_pm_post_config_one(child);
7047 return;
7048 }
7049
7050 /* devi_config_common */
7051 ndi_devi_enter(vdip, &circ);
7052 cdip = ddi_get_child(vdip);
7053 while (cdip) {
7054 dev_info_t *next = ddi_get_next_sibling(cdip);
7055
7056 i_mdi_pm_post_config_one(cdip);
7057 cdip = next;
7058 }
7059 ndi_devi_exit(vdip, circ);
7060 }
7061
7062 static void
i_mdi_pm_post_unconfig_one(dev_info_t * child)7063 i_mdi_pm_post_unconfig_one(dev_info_t *child)
7064 {
7065 mdi_client_t *ct;
7066
7067 ct = i_devi_get_client(child);
7068 if (ct == NULL)
7069 return;
7070
7071 MDI_CLIENT_LOCK(ct);
7072 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
7073 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
7074
7075 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
7076 MDI_DEBUG(4, (MDI_NOTE, child, "not held\n"));
7077 MDI_CLIENT_UNLOCK(ct);
7078 return;
7079 }
7080
7081 /* failure detaching or another thread just attached it */
7082 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
7083 i_ddi_devi_attached(child)) ||
7084 (!i_ddi_devi_attached(child) &&
7085 !DEVI_IS_ATTACHING(child))) {
7086 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7087 i_mdi_pm_reset_client(ct);
7088 } else {
7089 mdi_pathinfo_t *pip, *next;
7090 int valid_path_count = 0;
7091
7092 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7093 pip = ct->ct_path_head;
7094 while (pip != NULL) {
7095 MDI_PI_LOCK(pip);
7096 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7097 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7098 valid_path_count ++;
7099 MDI_PI_UNLOCK(pip);
7100 pip = next;
7101 }
7102 i_mdi_pm_rele_client(ct, valid_path_count);
7103 ct->ct_powercnt_unconfig = 0;
7104 }
7105
7106 MDI_CLIENT_UNLOCK(ct);
7107 }
7108
7109 static void
i_mdi_pm_post_unconfig(dev_info_t * vdip,dev_info_t * child,int held)7110 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
7111 {
7112 int circ;
7113 dev_info_t *cdip;
7114
7115 ASSERT(MDI_VHCI(vdip));
7116
7117 if (!held) {
7118 MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held));
7119 return;
7120 }
7121
7122 if (child) {
7123 ASSERT(DEVI_BUSY_OWNED(vdip));
7124 i_mdi_pm_post_unconfig_one(child);
7125 return;
7126 }
7127
7128 ndi_devi_enter(vdip, &circ);
7129 cdip = ddi_get_child(vdip);
7130 while (cdip) {
7131 dev_info_t *next = ddi_get_next_sibling(cdip);
7132
7133 i_mdi_pm_post_unconfig_one(cdip);
7134 cdip = next;
7135 }
7136 ndi_devi_exit(vdip, circ);
7137 }
7138
7139 int
mdi_power(dev_info_t * vdip,mdi_pm_op_t op,void * args,char * devnm,int flags)7140 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
7141 {
7142 int circ, ret = MDI_SUCCESS;
7143 dev_info_t *client_dip = NULL;
7144 mdi_client_t *ct;
7145
7146 /*
7147 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
7148 * Power up pHCI for the named client device.
7149 * Note: Before the client is enumerated under vhci by phci,
7150 * client_dip can be NULL. Then proceed to power up all the
7151 * pHCIs.
7152 */
7153 if (devnm != NULL) {
7154 ndi_devi_enter(vdip, &circ);
7155 client_dip = ndi_devi_findchild(vdip, devnm);
7156 }
7157
7158 MDI_DEBUG(4, (MDI_NOTE, vdip,
7159 "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip));
7160
7161 switch (op) {
7162 case MDI_PM_PRE_CONFIG:
7163 ret = i_mdi_pm_pre_config(vdip, client_dip);
7164 break;
7165
7166 case MDI_PM_PRE_UNCONFIG:
7167 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
7168 flags);
7169 break;
7170
7171 case MDI_PM_POST_CONFIG:
7172 i_mdi_pm_post_config(vdip, client_dip);
7173 break;
7174
7175 case MDI_PM_POST_UNCONFIG:
7176 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
7177 break;
7178
7179 case MDI_PM_HOLD_POWER:
7180 case MDI_PM_RELE_POWER:
7181 ASSERT(args);
7182
7183 client_dip = (dev_info_t *)args;
7184 ASSERT(MDI_CLIENT(client_dip));
7185
7186 ct = i_devi_get_client(client_dip);
7187 MDI_CLIENT_LOCK(ct);
7188
7189 if (op == MDI_PM_HOLD_POWER) {
7190 if (ct->ct_power_cnt == 0) {
7191 (void) i_mdi_power_all_phci(ct);
7192 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7193 "i_mdi_pm_hold_client\n"));
7194 i_mdi_pm_hold_client(ct, ct->ct_path_count);
7195 }
7196 } else {
7197 if (DEVI_IS_ATTACHING(client_dip)) {
7198 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7199 "i_mdi_pm_rele_client\n"));
7200 i_mdi_pm_rele_client(ct, ct->ct_path_count);
7201 } else {
7202 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7203 "i_mdi_pm_reset_client\n"));
7204 i_mdi_pm_reset_client(ct);
7205 }
7206 }
7207
7208 MDI_CLIENT_UNLOCK(ct);
7209 break;
7210
7211 default:
7212 break;
7213 }
7214
7215 if (devnm)
7216 ndi_devi_exit(vdip, circ);
7217
7218 return (ret);
7219 }
7220
7221 int
mdi_component_is_vhci(dev_info_t * dip,const char ** mdi_class)7222 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
7223 {
7224 mdi_vhci_t *vhci;
7225
7226 if (!MDI_VHCI(dip))
7227 return (MDI_FAILURE);
7228
7229 if (mdi_class) {
7230 vhci = DEVI(dip)->devi_mdi_xhci;
7231 ASSERT(vhci);
7232 *mdi_class = vhci->vh_class;
7233 }
7234
7235 return (MDI_SUCCESS);
7236 }
7237
7238 int
mdi_component_is_phci(dev_info_t * dip,const char ** mdi_class)7239 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
7240 {
7241 mdi_phci_t *phci;
7242
7243 if (!MDI_PHCI(dip))
7244 return (MDI_FAILURE);
7245
7246 if (mdi_class) {
7247 phci = DEVI(dip)->devi_mdi_xhci;
7248 ASSERT(phci);
7249 *mdi_class = phci->ph_vhci->vh_class;
7250 }
7251
7252 return (MDI_SUCCESS);
7253 }
7254
7255 int
mdi_component_is_client(dev_info_t * dip,const char ** mdi_class)7256 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
7257 {
7258 mdi_client_t *client;
7259
7260 if (!MDI_CLIENT(dip))
7261 return (MDI_FAILURE);
7262
7263 if (mdi_class) {
7264 client = DEVI(dip)->devi_mdi_client;
7265 ASSERT(client);
7266 *mdi_class = client->ct_vhci->vh_class;
7267 }
7268
7269 return (MDI_SUCCESS);
7270 }
7271
7272 void *
mdi_client_get_vhci_private(dev_info_t * dip)7273 mdi_client_get_vhci_private(dev_info_t *dip)
7274 {
7275 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7276 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7277 mdi_client_t *ct;
7278 ct = i_devi_get_client(dip);
7279 return (ct->ct_vprivate);
7280 }
7281 return (NULL);
7282 }
7283
7284 void
mdi_client_set_vhci_private(dev_info_t * dip,void * data)7285 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
7286 {
7287 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7288 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7289 mdi_client_t *ct;
7290 ct = i_devi_get_client(dip);
7291 ct->ct_vprivate = data;
7292 }
7293 }
7294 /*
7295 * mdi_pi_get_vhci_private():
7296 * Get the vhci private information associated with the
7297 * mdi_pathinfo node
7298 */
7299 void *
mdi_pi_get_vhci_private(mdi_pathinfo_t * pip)7300 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
7301 {
7302 caddr_t vprivate = NULL;
7303 if (pip) {
7304 vprivate = MDI_PI(pip)->pi_vprivate;
7305 }
7306 return (vprivate);
7307 }
7308
7309 /*
7310 * mdi_pi_set_vhci_private():
7311 * Set the vhci private information in the mdi_pathinfo node
7312 */
7313 void
mdi_pi_set_vhci_private(mdi_pathinfo_t * pip,void * priv)7314 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
7315 {
7316 if (pip) {
7317 MDI_PI(pip)->pi_vprivate = priv;
7318 }
7319 }
7320
7321 /*
7322 * mdi_phci_get_vhci_private():
7323 * Get the vhci private information associated with the
7324 * mdi_phci node
7325 */
7326 void *
mdi_phci_get_vhci_private(dev_info_t * dip)7327 mdi_phci_get_vhci_private(dev_info_t *dip)
7328 {
7329 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7330 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7331 mdi_phci_t *ph;
7332 ph = i_devi_get_phci(dip);
7333 return (ph->ph_vprivate);
7334 }
7335 return (NULL);
7336 }
7337
7338 /*
7339 * mdi_phci_set_vhci_private():
7340 * Set the vhci private information in the mdi_phci node
7341 */
7342 void
mdi_phci_set_vhci_private(dev_info_t * dip,void * priv)7343 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
7344 {
7345 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7346 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7347 mdi_phci_t *ph;
7348 ph = i_devi_get_phci(dip);
7349 ph->ph_vprivate = priv;
7350 }
7351 }
7352
7353 int
mdi_pi_ishidden(mdi_pathinfo_t * pip)7354 mdi_pi_ishidden(mdi_pathinfo_t *pip)
7355 {
7356 return (MDI_PI_FLAGS_IS_HIDDEN(pip));
7357 }
7358
7359 int
mdi_pi_device_isremoved(mdi_pathinfo_t * pip)7360 mdi_pi_device_isremoved(mdi_pathinfo_t *pip)
7361 {
7362 return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip));
7363 }
7364
7365 /* Return 1 if all client paths are device_removed */
7366 static int
i_mdi_client_all_devices_removed(mdi_client_t * ct)7367 i_mdi_client_all_devices_removed(mdi_client_t *ct)
7368 {
7369 mdi_pathinfo_t *pip;
7370 int all_devices_removed = 1;
7371
7372 MDI_CLIENT_LOCK(ct);
7373 for (pip = ct->ct_path_head; pip;
7374 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) {
7375 if (!mdi_pi_device_isremoved(pip)) {
7376 all_devices_removed = 0;
7377 break;
7378 }
7379 }
7380 MDI_CLIENT_UNLOCK(ct);
7381 return (all_devices_removed);
7382 }
7383
7384 /*
7385 * When processing path hotunplug, represent device removal.
7386 */
7387 int
mdi_pi_device_remove(mdi_pathinfo_t * pip)7388 mdi_pi_device_remove(mdi_pathinfo_t *pip)
7389 {
7390 mdi_client_t *ct;
7391
7392 MDI_PI_LOCK(pip);
7393 if (mdi_pi_device_isremoved(pip)) {
7394 MDI_PI_UNLOCK(pip);
7395 return (0);
7396 }
7397 MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip);
7398 MDI_PI_FLAGS_SET_HIDDEN(pip);
7399 MDI_PI_UNLOCK(pip);
7400
7401 /*
7402 * If all paths associated with the client are now DEVICE_REMOVED,
7403 * reflect DEVICE_REMOVED in the client.
7404 */
7405 ct = MDI_PI(pip)->pi_client;
7406 if (ct && ct->ct_dip && i_mdi_client_all_devices_removed(ct))
7407 (void) ndi_devi_device_remove(ct->ct_dip);
7408 else
7409 i_ddi_di_cache_invalidate();
7410
7411 return (1);
7412 }
7413
7414 /*
7415 * When processing hotplug, if a path marked mdi_pi_device_isremoved()
7416 * is now accessible then this interfaces is used to represent device insertion.
7417 */
7418 int
mdi_pi_device_insert(mdi_pathinfo_t * pip)7419 mdi_pi_device_insert(mdi_pathinfo_t *pip)
7420 {
7421 MDI_PI_LOCK(pip);
7422 if (!mdi_pi_device_isremoved(pip)) {
7423 MDI_PI_UNLOCK(pip);
7424 return (0);
7425 }
7426 MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip);
7427 MDI_PI_FLAGS_CLR_HIDDEN(pip);
7428 MDI_PI_UNLOCK(pip);
7429
7430 i_ddi_di_cache_invalidate();
7431
7432 return (1);
7433 }
7434
7435 /*
7436 * List of vhci class names:
7437 * A vhci class name must be in this list only if the corresponding vhci
7438 * driver intends to use the mdi provided bus config implementation
7439 * (i.e., mdi_vhci_bus_config()).
7440 */
7441 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
7442 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *))
7443
7444 /*
7445 * During boot time, the on-disk vhci cache for every vhci class is read
7446 * in the form of an nvlist and stored here.
7447 */
7448 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
7449
7450 /* nvpair names in vhci cache nvlist */
7451 #define MDI_VHCI_CACHE_VERSION 1
7452 #define MDI_NVPNAME_VERSION "version"
7453 #define MDI_NVPNAME_PHCIS "phcis"
7454 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap"
7455
7456 /*
7457 * Given vhci class name, return its on-disk vhci cache filename.
7458 * Memory for the returned filename which includes the full path is allocated
7459 * by this function.
7460 */
7461 static char *
vhclass2vhcache_filename(char * vhclass)7462 vhclass2vhcache_filename(char *vhclass)
7463 {
7464 char *filename;
7465 int len;
7466 static char *fmt = "/etc/devices/mdi_%s_cache";
7467
7468 /*
7469 * fmt contains the on-disk vhci cache file name format;
7470 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
7471 */
7472
7473 /* the -1 below is to account for "%s" in the format string */
7474 len = strlen(fmt) + strlen(vhclass) - 1;
7475 filename = kmem_alloc(len, KM_SLEEP);
7476 (void) snprintf(filename, len, fmt, vhclass);
7477 ASSERT(len == (strlen(filename) + 1));
7478 return (filename);
7479 }
7480
7481 /*
7482 * initialize the vhci cache related data structures and read the on-disk
7483 * vhci cached data into memory.
7484 */
7485 static void
setup_vhci_cache(mdi_vhci_t * vh)7486 setup_vhci_cache(mdi_vhci_t *vh)
7487 {
7488 mdi_vhci_config_t *vhc;
7489 mdi_vhci_cache_t *vhcache;
7490 int i;
7491 nvlist_t *nvl = NULL;
7492
7493 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
7494 vh->vh_config = vhc;
7495 vhcache = &vhc->vhc_vhcache;
7496
7497 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
7498
7499 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
7500 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
7501
7502 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
7503
7504 /*
7505 * Create string hash; same as mod_hash_create_strhash() except that
7506 * we use NULL key destructor.
7507 */
7508 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
7509 mdi_bus_config_cache_hash_size,
7510 mod_hash_null_keydtor, mod_hash_null_valdtor,
7511 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
7512
7513 /*
7514 * The on-disk vhci cache is read during booting prior to the
7515 * lights-out period by mdi_read_devices_files().
7516 */
7517 for (i = 0; i < N_VHCI_CLASSES; i++) {
7518 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
7519 nvl = vhcache_nvl[i];
7520 vhcache_nvl[i] = NULL;
7521 break;
7522 }
7523 }
7524
7525 /*
7526 * this is to cover the case of some one manually causing unloading
7527 * (or detaching) and reloading (or attaching) of a vhci driver.
7528 */
7529 if (nvl == NULL && modrootloaded)
7530 nvl = read_on_disk_vhci_cache(vh->vh_class);
7531
7532 if (nvl != NULL) {
7533 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7534 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
7535 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
7536 else {
7537 cmn_err(CE_WARN,
7538 "%s: data file corrupted, will recreate",
7539 vhc->vhc_vhcache_filename);
7540 }
7541 rw_exit(&vhcache->vhcache_lock);
7542 nvlist_free(nvl);
7543 }
7544
7545 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
7546 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
7547
7548 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
7549 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
7550 }
7551
7552 /*
7553 * free all vhci cache related resources
7554 */
7555 static int
destroy_vhci_cache(mdi_vhci_t * vh)7556 destroy_vhci_cache(mdi_vhci_t *vh)
7557 {
7558 mdi_vhci_config_t *vhc = vh->vh_config;
7559 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7560 mdi_vhcache_phci_t *cphci, *cphci_next;
7561 mdi_vhcache_client_t *cct, *cct_next;
7562 mdi_vhcache_pathinfo_t *cpi, *cpi_next;
7563
7564 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
7565 return (MDI_FAILURE);
7566
7567 kmem_free(vhc->vhc_vhcache_filename,
7568 strlen(vhc->vhc_vhcache_filename) + 1);
7569
7570 mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
7571
7572 for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7573 cphci = cphci_next) {
7574 cphci_next = cphci->cphci_next;
7575 free_vhcache_phci(cphci);
7576 }
7577
7578 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
7579 cct_next = cct->cct_next;
7580 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
7581 cpi_next = cpi->cpi_next;
7582 free_vhcache_pathinfo(cpi);
7583 }
7584 free_vhcache_client(cct);
7585 }
7586
7587 rw_destroy(&vhcache->vhcache_lock);
7588
7589 mutex_destroy(&vhc->vhc_lock);
7590 cv_destroy(&vhc->vhc_cv);
7591 kmem_free(vhc, sizeof (mdi_vhci_config_t));
7592 return (MDI_SUCCESS);
7593 }
7594
7595 /*
7596 * Stop all vhci cache related async threads and free their resources.
7597 */
7598 static int
stop_vhcache_async_threads(mdi_vhci_config_t * vhc)7599 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
7600 {
7601 mdi_async_client_config_t *acc, *acc_next;
7602
7603 mutex_enter(&vhc->vhc_lock);
7604 vhc->vhc_flags |= MDI_VHC_EXIT;
7605 ASSERT(vhc->vhc_acc_thrcount >= 0);
7606 cv_broadcast(&vhc->vhc_cv);
7607
7608 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
7609 vhc->vhc_acc_thrcount != 0) {
7610 mutex_exit(&vhc->vhc_lock);
7611 delay_random(mdi_delay);
7612 mutex_enter(&vhc->vhc_lock);
7613 }
7614
7615 vhc->vhc_flags &= ~MDI_VHC_EXIT;
7616
7617 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
7618 acc_next = acc->acc_next;
7619 free_async_client_config(acc);
7620 }
7621 vhc->vhc_acc_list_head = NULL;
7622 vhc->vhc_acc_list_tail = NULL;
7623 vhc->vhc_acc_count = 0;
7624
7625 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7626 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7627 mutex_exit(&vhc->vhc_lock);
7628 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
7629 vhcache_dirty(vhc);
7630 return (MDI_FAILURE);
7631 }
7632 } else
7633 mutex_exit(&vhc->vhc_lock);
7634
7635 if (callb_delete(vhc->vhc_cbid) != 0)
7636 return (MDI_FAILURE);
7637
7638 return (MDI_SUCCESS);
7639 }
7640
7641 /*
7642 * Stop vhci cache flush thread
7643 */
7644 /* ARGSUSED */
7645 static boolean_t
stop_vhcache_flush_thread(void * arg,int code)7646 stop_vhcache_flush_thread(void *arg, int code)
7647 {
7648 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7649
7650 mutex_enter(&vhc->vhc_lock);
7651 vhc->vhc_flags |= MDI_VHC_EXIT;
7652 cv_broadcast(&vhc->vhc_cv);
7653
7654 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7655 mutex_exit(&vhc->vhc_lock);
7656 delay_random(mdi_delay);
7657 mutex_enter(&vhc->vhc_lock);
7658 }
7659
7660 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7661 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7662 mutex_exit(&vhc->vhc_lock);
7663 (void) flush_vhcache(vhc, 1);
7664 } else
7665 mutex_exit(&vhc->vhc_lock);
7666
7667 return (B_TRUE);
7668 }
7669
7670 /*
7671 * Enqueue the vhcache phci (cphci) at the tail of the list
7672 */
7673 static void
enqueue_vhcache_phci(mdi_vhci_cache_t * vhcache,mdi_vhcache_phci_t * cphci)7674 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
7675 {
7676 cphci->cphci_next = NULL;
7677 if (vhcache->vhcache_phci_head == NULL)
7678 vhcache->vhcache_phci_head = cphci;
7679 else
7680 vhcache->vhcache_phci_tail->cphci_next = cphci;
7681 vhcache->vhcache_phci_tail = cphci;
7682 }
7683
7684 /*
7685 * Enqueue the vhcache pathinfo (cpi) at the tail of the list
7686 */
7687 static void
enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t * cct,mdi_vhcache_pathinfo_t * cpi)7688 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7689 mdi_vhcache_pathinfo_t *cpi)
7690 {
7691 cpi->cpi_next = NULL;
7692 if (cct->cct_cpi_head == NULL)
7693 cct->cct_cpi_head = cpi;
7694 else
7695 cct->cct_cpi_tail->cpi_next = cpi;
7696 cct->cct_cpi_tail = cpi;
7697 }
7698
7699 /*
7700 * Enqueue the vhcache pathinfo (cpi) at the correct location in the
7701 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7702 * flag set come at the beginning of the list. All cpis which have this
7703 * flag set come at the end of the list.
7704 */
7705 static void
enqueue_vhcache_pathinfo(mdi_vhcache_client_t * cct,mdi_vhcache_pathinfo_t * newcpi)7706 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7707 mdi_vhcache_pathinfo_t *newcpi)
7708 {
7709 mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
7710
7711 if (cct->cct_cpi_head == NULL ||
7712 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
7713 enqueue_tail_vhcache_pathinfo(cct, newcpi);
7714 else {
7715 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
7716 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
7717 prev_cpi = cpi, cpi = cpi->cpi_next)
7718 ;
7719
7720 if (prev_cpi == NULL)
7721 cct->cct_cpi_head = newcpi;
7722 else
7723 prev_cpi->cpi_next = newcpi;
7724
7725 newcpi->cpi_next = cpi;
7726
7727 if (cpi == NULL)
7728 cct->cct_cpi_tail = newcpi;
7729 }
7730 }
7731
7732 /*
7733 * Enqueue the vhcache client (cct) at the tail of the list
7734 */
7735 static void
enqueue_vhcache_client(mdi_vhci_cache_t * vhcache,mdi_vhcache_client_t * cct)7736 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
7737 mdi_vhcache_client_t *cct)
7738 {
7739 cct->cct_next = NULL;
7740 if (vhcache->vhcache_client_head == NULL)
7741 vhcache->vhcache_client_head = cct;
7742 else
7743 vhcache->vhcache_client_tail->cct_next = cct;
7744 vhcache->vhcache_client_tail = cct;
7745 }
7746
7747 static void
free_string_array(char ** str,int nelem)7748 free_string_array(char **str, int nelem)
7749 {
7750 int i;
7751
7752 if (str) {
7753 for (i = 0; i < nelem; i++) {
7754 if (str[i])
7755 kmem_free(str[i], strlen(str[i]) + 1);
7756 }
7757 kmem_free(str, sizeof (char *) * nelem);
7758 }
7759 }
7760
7761 static void
free_vhcache_phci(mdi_vhcache_phci_t * cphci)7762 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
7763 {
7764 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
7765 kmem_free(cphci, sizeof (*cphci));
7766 }
7767
7768 static void
free_vhcache_pathinfo(mdi_vhcache_pathinfo_t * cpi)7769 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
7770 {
7771 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
7772 kmem_free(cpi, sizeof (*cpi));
7773 }
7774
7775 static void
free_vhcache_client(mdi_vhcache_client_t * cct)7776 free_vhcache_client(mdi_vhcache_client_t *cct)
7777 {
7778 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
7779 kmem_free(cct, sizeof (*cct));
7780 }
7781
7782 static char *
vhcache_mknameaddr(char * ct_name,char * ct_addr,int * ret_len)7783 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
7784 {
7785 char *name_addr;
7786 int len;
7787
7788 len = strlen(ct_name) + strlen(ct_addr) + 2;
7789 name_addr = kmem_alloc(len, KM_SLEEP);
7790 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
7791
7792 if (ret_len)
7793 *ret_len = len;
7794 return (name_addr);
7795 }
7796
7797 /*
7798 * Copy the contents of paddrnvl to vhci cache.
7799 * paddrnvl nvlist contains path information for a vhci client.
7800 * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
7801 */
7802 static void
paddrnvl_to_vhcache(nvlist_t * nvl,mdi_vhcache_phci_t * cphci_list[],mdi_vhcache_client_t * cct)7803 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
7804 mdi_vhcache_client_t *cct)
7805 {
7806 nvpair_t *nvp = NULL;
7807 mdi_vhcache_pathinfo_t *cpi;
7808 uint_t nelem;
7809 uint32_t *val;
7810
7811 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7812 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
7813 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7814 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7815 (void) nvpair_value_uint32_array(nvp, &val, &nelem);
7816 ASSERT(nelem == 2);
7817 cpi->cpi_cphci = cphci_list[val[0]];
7818 cpi->cpi_flags = val[1];
7819 enqueue_tail_vhcache_pathinfo(cct, cpi);
7820 }
7821 }
7822
7823 /*
7824 * Copy the contents of caddrmapnvl to vhci cache.
7825 * caddrmapnvl nvlist contains vhci client address to phci client address
7826 * mappings. See the comment in mainnvl_to_vhcache() for the format of
7827 * this nvlist.
7828 */
7829 static void
caddrmapnvl_to_vhcache(mdi_vhci_cache_t * vhcache,nvlist_t * nvl,mdi_vhcache_phci_t * cphci_list[])7830 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
7831 mdi_vhcache_phci_t *cphci_list[])
7832 {
7833 nvpair_t *nvp = NULL;
7834 nvlist_t *paddrnvl;
7835 mdi_vhcache_client_t *cct;
7836
7837 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7838 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
7839 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7840 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7841 (void) nvpair_value_nvlist(nvp, &paddrnvl);
7842 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
7843 /* the client must contain at least one path */
7844 ASSERT(cct->cct_cpi_head != NULL);
7845
7846 enqueue_vhcache_client(vhcache, cct);
7847 (void) mod_hash_insert(vhcache->vhcache_client_hash,
7848 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7849 }
7850 }
7851
7852 /*
7853 * Copy the contents of the main nvlist to vhci cache.
7854 *
7855 * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
7856 * The nvlist contains the mappings between the vhci client addresses and
7857 * their corresponding phci client addresses.
7858 *
7859 * The structure of the nvlist is as follows:
7860 *
7861 * Main nvlist:
7862 * NAME TYPE DATA
7863 * version int32 version number
7864 * phcis string array array of phci paths
7865 * clientaddrmap nvlist_t c2paddrs_nvl (see below)
7866 *
7867 * structure of c2paddrs_nvl:
7868 * NAME TYPE DATA
7869 * caddr1 nvlist_t paddrs_nvl1
7870 * caddr2 nvlist_t paddrs_nvl2
7871 * ...
7872 * where caddr1, caddr2, ... are vhci client name and addresses in the
7873 * form of "<clientname>@<clientaddress>".
7874 * (for example: "ssd@2000002037cd9f72");
7875 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
7876 *
7877 * structure of paddrs_nvl:
7878 * NAME TYPE DATA
7879 * pi_addr1 uint32_array (phci-id, cpi_flags)
7880 * pi_addr2 uint32_array (phci-id, cpi_flags)
7881 * ...
7882 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
7883 * (so called pi_addrs, for example: "w2100002037cd9f72,0");
7884 * phci-ids are integers that identify pHCIs to which the
7885 * the bus specific address belongs to. These integers are used as an index
7886 * into to the phcis string array in the main nvlist to get the pHCI path.
7887 */
7888 static int
mainnvl_to_vhcache(mdi_vhci_cache_t * vhcache,nvlist_t * nvl)7889 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
7890 {
7891 char **phcis, **phci_namep;
7892 uint_t nphcis;
7893 mdi_vhcache_phci_t *cphci, **cphci_list;
7894 nvlist_t *caddrmapnvl;
7895 int32_t ver;
7896 int i;
7897 size_t cphci_list_size;
7898
7899 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
7900
7901 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
7902 ver != MDI_VHCI_CACHE_VERSION)
7903 return (MDI_FAILURE);
7904
7905 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
7906 &nphcis) != 0)
7907 return (MDI_SUCCESS);
7908
7909 ASSERT(nphcis > 0);
7910
7911 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
7912 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
7913 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
7914 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
7915 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
7916 enqueue_vhcache_phci(vhcache, cphci);
7917 cphci_list[i] = cphci;
7918 }
7919
7920 ASSERT(vhcache->vhcache_phci_head != NULL);
7921
7922 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
7923 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
7924
7925 kmem_free(cphci_list, cphci_list_size);
7926 return (MDI_SUCCESS);
7927 }
7928
7929 /*
7930 * Build paddrnvl for the specified client using the information in the
7931 * vhci cache and add it to the caddrmapnnvl.
7932 * Returns 0 on success, errno on failure.
7933 */
7934 static int
vhcache_to_paddrnvl(mdi_vhci_cache_t * vhcache,mdi_vhcache_client_t * cct,nvlist_t * caddrmapnvl)7935 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7936 nvlist_t *caddrmapnvl)
7937 {
7938 mdi_vhcache_pathinfo_t *cpi;
7939 nvlist_t *nvl;
7940 int err;
7941 uint32_t val[2];
7942
7943 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7944
7945 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7946 return (err);
7947
7948 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7949 val[0] = cpi->cpi_cphci->cphci_id;
7950 val[1] = cpi->cpi_flags;
7951 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7952 != 0)
7953 goto out;
7954 }
7955
7956 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7957 out:
7958 nvlist_free(nvl);
7959 return (err);
7960 }
7961
7962 /*
7963 * Build caddrmapnvl using the information in the vhci cache
7964 * and add it to the mainnvl.
7965 * Returns 0 on success, errno on failure.
7966 */
7967 static int
vhcache_to_caddrmapnvl(mdi_vhci_cache_t * vhcache,nvlist_t * mainnvl)7968 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7969 {
7970 mdi_vhcache_client_t *cct;
7971 nvlist_t *nvl;
7972 int err;
7973
7974 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7975
7976 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7977 return (err);
7978
7979 for (cct = vhcache->vhcache_client_head; cct != NULL;
7980 cct = cct->cct_next) {
7981 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7982 goto out;
7983 }
7984
7985 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7986 out:
7987 nvlist_free(nvl);
7988 return (err);
7989 }
7990
7991 /*
7992 * Build nvlist using the information in the vhci cache.
7993 * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7994 * Returns nvl on success, NULL on failure.
7995 */
7996 static nvlist_t *
vhcache_to_mainnvl(mdi_vhci_cache_t * vhcache)7997 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7998 {
7999 mdi_vhcache_phci_t *cphci;
8000 uint_t phci_count;
8001 char **phcis;
8002 nvlist_t *nvl;
8003 int err, i;
8004
8005 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
8006 nvl = NULL;
8007 goto out;
8008 }
8009
8010 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
8011 MDI_VHCI_CACHE_VERSION)) != 0)
8012 goto out;
8013
8014 rw_enter(&vhcache->vhcache_lock, RW_READER);
8015 if (vhcache->vhcache_phci_head == NULL) {
8016 rw_exit(&vhcache->vhcache_lock);
8017 return (nvl);
8018 }
8019
8020 phci_count = 0;
8021 for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8022 cphci = cphci->cphci_next)
8023 cphci->cphci_id = phci_count++;
8024
8025 /* build phci pathname list */
8026 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
8027 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
8028 cphci = cphci->cphci_next, i++)
8029 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
8030
8031 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
8032 phci_count);
8033 free_string_array(phcis, phci_count);
8034
8035 if (err == 0 &&
8036 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
8037 rw_exit(&vhcache->vhcache_lock);
8038 return (nvl);
8039 }
8040
8041 rw_exit(&vhcache->vhcache_lock);
8042 out:
8043 if (nvl)
8044 nvlist_free(nvl);
8045 return (NULL);
8046 }
8047
8048 /*
8049 * Lookup vhcache phci structure for the specified phci path.
8050 */
8051 static mdi_vhcache_phci_t *
lookup_vhcache_phci_by_name(mdi_vhci_cache_t * vhcache,char * phci_path)8052 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
8053 {
8054 mdi_vhcache_phci_t *cphci;
8055
8056 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8057
8058 for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8059 cphci = cphci->cphci_next) {
8060 if (strcmp(cphci->cphci_path, phci_path) == 0)
8061 return (cphci);
8062 }
8063
8064 return (NULL);
8065 }
8066
8067 /*
8068 * Lookup vhcache phci structure for the specified phci.
8069 */
8070 static mdi_vhcache_phci_t *
lookup_vhcache_phci_by_addr(mdi_vhci_cache_t * vhcache,mdi_phci_t * ph)8071 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
8072 {
8073 mdi_vhcache_phci_t *cphci;
8074
8075 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8076
8077 for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8078 cphci = cphci->cphci_next) {
8079 if (cphci->cphci_phci == ph)
8080 return (cphci);
8081 }
8082
8083 return (NULL);
8084 }
8085
8086 /*
8087 * Add the specified phci to the vhci cache if not already present.
8088 */
8089 static void
vhcache_phci_add(mdi_vhci_config_t * vhc,mdi_phci_t * ph)8090 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8091 {
8092 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8093 mdi_vhcache_phci_t *cphci;
8094 char *pathname;
8095 int cache_updated;
8096
8097 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8098
8099 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8100 (void) ddi_pathname(ph->ph_dip, pathname);
8101 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
8102 != NULL) {
8103 cphci->cphci_phci = ph;
8104 cache_updated = 0;
8105 } else {
8106 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
8107 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
8108 cphci->cphci_phci = ph;
8109 enqueue_vhcache_phci(vhcache, cphci);
8110 cache_updated = 1;
8111 }
8112
8113 rw_exit(&vhcache->vhcache_lock);
8114
8115 /*
8116 * Since a new phci has been added, reset
8117 * vhc_path_discovery_cutoff_time to allow for discovery of paths
8118 * during next vhcache_discover_paths().
8119 */
8120 mutex_enter(&vhc->vhc_lock);
8121 vhc->vhc_path_discovery_cutoff_time = 0;
8122 mutex_exit(&vhc->vhc_lock);
8123
8124 kmem_free(pathname, MAXPATHLEN);
8125 if (cache_updated)
8126 vhcache_dirty(vhc);
8127 }
8128
8129 /*
8130 * Remove the reference to the specified phci from the vhci cache.
8131 */
8132 static void
vhcache_phci_remove(mdi_vhci_config_t * vhc,mdi_phci_t * ph)8133 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8134 {
8135 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8136 mdi_vhcache_phci_t *cphci;
8137
8138 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8139 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
8140 /* do not remove the actual mdi_vhcache_phci structure */
8141 cphci->cphci_phci = NULL;
8142 }
8143 rw_exit(&vhcache->vhcache_lock);
8144 }
8145
8146 static void
init_vhcache_lookup_token(mdi_vhcache_lookup_token_t * dst,mdi_vhcache_lookup_token_t * src)8147 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
8148 mdi_vhcache_lookup_token_t *src)
8149 {
8150 if (src == NULL) {
8151 dst->lt_cct = NULL;
8152 dst->lt_cct_lookup_time = 0;
8153 } else {
8154 dst->lt_cct = src->lt_cct;
8155 dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
8156 }
8157 }
8158
8159 /*
8160 * Look up vhcache client for the specified client.
8161 */
8162 static mdi_vhcache_client_t *
lookup_vhcache_client(mdi_vhci_cache_t * vhcache,char * ct_name,char * ct_addr,mdi_vhcache_lookup_token_t * token)8163 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
8164 mdi_vhcache_lookup_token_t *token)
8165 {
8166 mod_hash_val_t hv;
8167 char *name_addr;
8168 int len;
8169
8170 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8171
8172 /*
8173 * If no vhcache clean occurred since the last lookup, we can
8174 * simply return the cct from the last lookup operation.
8175 * It works because ccts are never freed except during the vhcache
8176 * cleanup operation.
8177 */
8178 if (token != NULL &&
8179 vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
8180 return (token->lt_cct);
8181
8182 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
8183 if (mod_hash_find(vhcache->vhcache_client_hash,
8184 (mod_hash_key_t)name_addr, &hv) == 0) {
8185 if (token) {
8186 token->lt_cct = (mdi_vhcache_client_t *)hv;
8187 token->lt_cct_lookup_time = ddi_get_lbolt64();
8188 }
8189 } else {
8190 if (token) {
8191 token->lt_cct = NULL;
8192 token->lt_cct_lookup_time = 0;
8193 }
8194 hv = NULL;
8195 }
8196 kmem_free(name_addr, len);
8197 return ((mdi_vhcache_client_t *)hv);
8198 }
8199
8200 /*
8201 * Add the specified path to the vhci cache if not already present.
8202 * Also add the vhcache client for the client corresponding to this path
8203 * if it doesn't already exist.
8204 */
8205 static void
vhcache_pi_add(mdi_vhci_config_t * vhc,struct mdi_pathinfo * pip)8206 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8207 {
8208 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8209 mdi_vhcache_client_t *cct;
8210 mdi_vhcache_pathinfo_t *cpi;
8211 mdi_phci_t *ph = pip->pi_phci;
8212 mdi_client_t *ct = pip->pi_client;
8213 int cache_updated = 0;
8214
8215 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8216
8217 /* if vhcache client for this pip doesn't already exist, add it */
8218 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8219 NULL)) == NULL) {
8220 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
8221 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
8222 ct->ct_guid, NULL);
8223 enqueue_vhcache_client(vhcache, cct);
8224 (void) mod_hash_insert(vhcache->vhcache_client_hash,
8225 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
8226 cache_updated = 1;
8227 }
8228
8229 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8230 if (cpi->cpi_cphci->cphci_phci == ph &&
8231 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
8232 cpi->cpi_pip = pip;
8233 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
8234 cpi->cpi_flags &=
8235 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8236 sort_vhcache_paths(cct);
8237 cache_updated = 1;
8238 }
8239 break;
8240 }
8241 }
8242
8243 if (cpi == NULL) {
8244 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
8245 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
8246 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
8247 ASSERT(cpi->cpi_cphci != NULL);
8248 cpi->cpi_pip = pip;
8249 enqueue_vhcache_pathinfo(cct, cpi);
8250 cache_updated = 1;
8251 }
8252
8253 rw_exit(&vhcache->vhcache_lock);
8254
8255 if (cache_updated)
8256 vhcache_dirty(vhc);
8257 }
8258
8259 /*
8260 * Remove the reference to the specified path from the vhci cache.
8261 */
8262 static void
vhcache_pi_remove(mdi_vhci_config_t * vhc,struct mdi_pathinfo * pip)8263 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8264 {
8265 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8266 mdi_client_t *ct = pip->pi_client;
8267 mdi_vhcache_client_t *cct;
8268 mdi_vhcache_pathinfo_t *cpi;
8269
8270 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8271 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8272 NULL)) != NULL) {
8273 for (cpi = cct->cct_cpi_head; cpi != NULL;
8274 cpi = cpi->cpi_next) {
8275 if (cpi->cpi_pip == pip) {
8276 cpi->cpi_pip = NULL;
8277 break;
8278 }
8279 }
8280 }
8281 rw_exit(&vhcache->vhcache_lock);
8282 }
8283
8284 /*
8285 * Flush the vhci cache to disk.
8286 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
8287 */
8288 static int
flush_vhcache(mdi_vhci_config_t * vhc,int force_flag)8289 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
8290 {
8291 nvlist_t *nvl;
8292 int err;
8293 int rv;
8294
8295 /*
8296 * It is possible that the system may shutdown before
8297 * i_ddi_io_initialized (during stmsboot for example). To allow for
8298 * flushing the cache in this case do not check for
8299 * i_ddi_io_initialized when force flag is set.
8300 */
8301 if (force_flag == 0 && !i_ddi_io_initialized())
8302 return (MDI_FAILURE);
8303
8304 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
8305 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
8306 nvlist_free(nvl);
8307 } else
8308 err = EFAULT;
8309
8310 rv = MDI_SUCCESS;
8311 mutex_enter(&vhc->vhc_lock);
8312 if (err != 0) {
8313 if (err == EROFS) {
8314 vhc->vhc_flags |= MDI_VHC_READONLY_FS;
8315 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
8316 MDI_VHC_VHCACHE_DIRTY);
8317 } else {
8318 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
8319 cmn_err(CE_CONT, "%s: update failed\n",
8320 vhc->vhc_vhcache_filename);
8321 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
8322 }
8323 rv = MDI_FAILURE;
8324 }
8325 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
8326 cmn_err(CE_CONT,
8327 "%s: update now ok\n", vhc->vhc_vhcache_filename);
8328 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
8329 }
8330 mutex_exit(&vhc->vhc_lock);
8331
8332 return (rv);
8333 }
8334
8335 /*
8336 * Call flush_vhcache() to flush the vhci cache at the scheduled time.
8337 * Exits itself if left idle for the idle timeout period.
8338 */
8339 static void
vhcache_flush_thread(void * arg)8340 vhcache_flush_thread(void *arg)
8341 {
8342 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8343 clock_t idle_time, quit_at_ticks;
8344 callb_cpr_t cprinfo;
8345
8346 /* number of seconds to sleep idle before exiting */
8347 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
8348
8349 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8350 "mdi_vhcache_flush");
8351 mutex_enter(&vhc->vhc_lock);
8352 for (; ; ) {
8353 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8354 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
8355 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
8356 CALLB_CPR_SAFE_BEGIN(&cprinfo);
8357 (void) cv_timedwait(&vhc->vhc_cv,
8358 &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
8359 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8360 } else {
8361 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
8362 mutex_exit(&vhc->vhc_lock);
8363
8364 if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
8365 vhcache_dirty(vhc);
8366
8367 mutex_enter(&vhc->vhc_lock);
8368 }
8369 }
8370
8371 quit_at_ticks = ddi_get_lbolt() + idle_time;
8372
8373 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8374 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
8375 ddi_get_lbolt() < quit_at_ticks) {
8376 CALLB_CPR_SAFE_BEGIN(&cprinfo);
8377 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8378 quit_at_ticks);
8379 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8380 }
8381
8382 if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8383 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
8384 goto out;
8385 }
8386
8387 out:
8388 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
8389 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8390 CALLB_CPR_EXIT(&cprinfo);
8391 }
8392
8393 /*
8394 * Make vhci cache dirty and schedule flushing by vhcache flush thread.
8395 */
8396 static void
vhcache_dirty(mdi_vhci_config_t * vhc)8397 vhcache_dirty(mdi_vhci_config_t *vhc)
8398 {
8399 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8400 int create_thread;
8401
8402 rw_enter(&vhcache->vhcache_lock, RW_READER);
8403 /* do not flush cache until the cache is fully built */
8404 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8405 rw_exit(&vhcache->vhcache_lock);
8406 return;
8407 }
8408 rw_exit(&vhcache->vhcache_lock);
8409
8410 mutex_enter(&vhc->vhc_lock);
8411 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
8412 mutex_exit(&vhc->vhc_lock);
8413 return;
8414 }
8415
8416 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
8417 vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
8418 mdi_vhcache_flush_delay * TICKS_PER_SECOND;
8419 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
8420 cv_broadcast(&vhc->vhc_cv);
8421 create_thread = 0;
8422 } else {
8423 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
8424 create_thread = 1;
8425 }
8426 mutex_exit(&vhc->vhc_lock);
8427
8428 if (create_thread)
8429 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
8430 0, &p0, TS_RUN, minclsyspri);
8431 }
8432
8433 /*
8434 * phci bus config structure - one for for each phci bus config operation that
8435 * we initiate on behalf of a vhci.
8436 */
8437 typedef struct mdi_phci_bus_config_s {
8438 char *phbc_phci_path;
8439 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */
8440 struct mdi_phci_bus_config_s *phbc_next;
8441 } mdi_phci_bus_config_t;
8442
8443 /* vhci bus config structure - one for each vhci bus config operation */
8444 typedef struct mdi_vhci_bus_config_s {
8445 ddi_bus_config_op_t vhbc_op; /* bus config op */
8446 major_t vhbc_op_major; /* bus config op major */
8447 uint_t vhbc_op_flags; /* bus config op flags */
8448 kmutex_t vhbc_lock;
8449 kcondvar_t vhbc_cv;
8450 int vhbc_thr_count;
8451 } mdi_vhci_bus_config_t;
8452
8453 /*
8454 * bus config the specified phci
8455 */
8456 static void
bus_config_phci(void * arg)8457 bus_config_phci(void *arg)
8458 {
8459 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
8460 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
8461 dev_info_t *ph_dip;
8462
8463 /*
8464 * first configure all path components upto phci and then configure
8465 * the phci children.
8466 */
8467 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
8468 != NULL) {
8469 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
8470 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
8471 (void) ndi_devi_config_driver(ph_dip,
8472 vhbc->vhbc_op_flags,
8473 vhbc->vhbc_op_major);
8474 } else
8475 (void) ndi_devi_config(ph_dip,
8476 vhbc->vhbc_op_flags);
8477
8478 /* release the hold that e_ddi_hold_devi_by_path() placed */
8479 ndi_rele_devi(ph_dip);
8480 }
8481
8482 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
8483 kmem_free(phbc, sizeof (*phbc));
8484
8485 mutex_enter(&vhbc->vhbc_lock);
8486 vhbc->vhbc_thr_count--;
8487 if (vhbc->vhbc_thr_count == 0)
8488 cv_broadcast(&vhbc->vhbc_cv);
8489 mutex_exit(&vhbc->vhbc_lock);
8490 }
8491
8492 /*
8493 * Bus config all phcis associated with the vhci in parallel.
8494 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
8495 */
8496 static void
bus_config_all_phcis(mdi_vhci_cache_t * vhcache,uint_t flags,ddi_bus_config_op_t op,major_t maj)8497 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
8498 ddi_bus_config_op_t op, major_t maj)
8499 {
8500 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
8501 mdi_vhci_bus_config_t *vhbc;
8502 mdi_vhcache_phci_t *cphci;
8503
8504 rw_enter(&vhcache->vhcache_lock, RW_READER);
8505 if (vhcache->vhcache_phci_head == NULL) {
8506 rw_exit(&vhcache->vhcache_lock);
8507 return;
8508 }
8509
8510 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
8511
8512 for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8513 cphci = cphci->cphci_next) {
8514 /* skip phcis that haven't attached before root is available */
8515 if (!modrootloaded && (cphci->cphci_phci == NULL))
8516 continue;
8517 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
8518 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
8519 KM_SLEEP);
8520 phbc->phbc_vhbusconfig = vhbc;
8521 phbc->phbc_next = phbc_head;
8522 phbc_head = phbc;
8523 vhbc->vhbc_thr_count++;
8524 }
8525 rw_exit(&vhcache->vhcache_lock);
8526
8527 vhbc->vhbc_op = op;
8528 vhbc->vhbc_op_major = maj;
8529 vhbc->vhbc_op_flags = NDI_NO_EVENT |
8530 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
8531 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
8532 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
8533
8534 /* now create threads to initiate bus config on all phcis in parallel */
8535 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
8536 phbc_next = phbc->phbc_next;
8537 if (mdi_mtc_off)
8538 bus_config_phci((void *)phbc);
8539 else
8540 (void) thread_create(NULL, 0, bus_config_phci, phbc,
8541 0, &p0, TS_RUN, minclsyspri);
8542 }
8543
8544 mutex_enter(&vhbc->vhbc_lock);
8545 /* wait until all threads exit */
8546 while (vhbc->vhbc_thr_count > 0)
8547 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
8548 mutex_exit(&vhbc->vhbc_lock);
8549
8550 mutex_destroy(&vhbc->vhbc_lock);
8551 cv_destroy(&vhbc->vhbc_cv);
8552 kmem_free(vhbc, sizeof (*vhbc));
8553 }
8554
8555 /*
8556 * Single threaded version of bus_config_all_phcis()
8557 */
8558 static void
st_bus_config_all_phcis(mdi_vhci_config_t * vhc,uint_t flags,ddi_bus_config_op_t op,major_t maj)8559 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
8560 ddi_bus_config_op_t op, major_t maj)
8561 {
8562 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8563
8564 single_threaded_vhconfig_enter(vhc);
8565 bus_config_all_phcis(vhcache, flags, op, maj);
8566 single_threaded_vhconfig_exit(vhc);
8567 }
8568
8569 /*
8570 * Perform BUS_CONFIG_ONE on the specified child of the phci.
8571 * The path includes the child component in addition to the phci path.
8572 */
8573 static int
bus_config_one_phci_child(char * path)8574 bus_config_one_phci_child(char *path)
8575 {
8576 dev_info_t *ph_dip, *child;
8577 char *devnm;
8578 int rv = MDI_FAILURE;
8579
8580 /* extract the child component of the phci */
8581 devnm = strrchr(path, '/');
8582 *devnm++ = '\0';
8583
8584 /*
8585 * first configure all path components upto phci and then
8586 * configure the phci child.
8587 */
8588 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
8589 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
8590 NDI_SUCCESS) {
8591 /*
8592 * release the hold that ndi_devi_config_one() placed
8593 */
8594 ndi_rele_devi(child);
8595 rv = MDI_SUCCESS;
8596 }
8597
8598 /* release the hold that e_ddi_hold_devi_by_path() placed */
8599 ndi_rele_devi(ph_dip);
8600 }
8601
8602 devnm--;
8603 *devnm = '/';
8604 return (rv);
8605 }
8606
8607 /*
8608 * Build a list of phci client paths for the specified vhci client.
8609 * The list includes only those phci client paths which aren't configured yet.
8610 */
8611 static mdi_phys_path_t *
build_phclient_path_list(mdi_vhcache_client_t * cct,char * ct_name)8612 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
8613 {
8614 mdi_vhcache_pathinfo_t *cpi;
8615 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
8616 int config_path, len;
8617
8618 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8619 /*
8620 * include only those paths that aren't configured.
8621 */
8622 config_path = 0;
8623 if (cpi->cpi_pip == NULL)
8624 config_path = 1;
8625 else {
8626 MDI_PI_LOCK(cpi->cpi_pip);
8627 if (MDI_PI_IS_INIT(cpi->cpi_pip))
8628 config_path = 1;
8629 MDI_PI_UNLOCK(cpi->cpi_pip);
8630 }
8631
8632 if (config_path) {
8633 pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
8634 len = strlen(cpi->cpi_cphci->cphci_path) +
8635 strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
8636 pp->phys_path = kmem_alloc(len, KM_SLEEP);
8637 (void) snprintf(pp->phys_path, len, "%s/%s@%s",
8638 cpi->cpi_cphci->cphci_path, ct_name,
8639 cpi->cpi_addr);
8640 pp->phys_path_next = NULL;
8641
8642 if (pp_head == NULL)
8643 pp_head = pp;
8644 else
8645 pp_tail->phys_path_next = pp;
8646 pp_tail = pp;
8647 }
8648 }
8649
8650 return (pp_head);
8651 }
8652
8653 /*
8654 * Free the memory allocated for phci client path list.
8655 */
8656 static void
free_phclient_path_list(mdi_phys_path_t * pp_head)8657 free_phclient_path_list(mdi_phys_path_t *pp_head)
8658 {
8659 mdi_phys_path_t *pp, *pp_next;
8660
8661 for (pp = pp_head; pp != NULL; pp = pp_next) {
8662 pp_next = pp->phys_path_next;
8663 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
8664 kmem_free(pp, sizeof (*pp));
8665 }
8666 }
8667
8668 /*
8669 * Allocated async client structure and initialize with the specified values.
8670 */
8671 static mdi_async_client_config_t *
alloc_async_client_config(char * ct_name,char * ct_addr,mdi_phys_path_t * pp_head,mdi_vhcache_lookup_token_t * tok)8672 alloc_async_client_config(char *ct_name, char *ct_addr,
8673 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8674 {
8675 mdi_async_client_config_t *acc;
8676
8677 acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
8678 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
8679 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
8680 acc->acc_phclient_path_list_head = pp_head;
8681 init_vhcache_lookup_token(&acc->acc_token, tok);
8682 acc->acc_next = NULL;
8683 return (acc);
8684 }
8685
8686 /*
8687 * Free the memory allocated for the async client structure and their members.
8688 */
8689 static void
free_async_client_config(mdi_async_client_config_t * acc)8690 free_async_client_config(mdi_async_client_config_t *acc)
8691 {
8692 if (acc->acc_phclient_path_list_head)
8693 free_phclient_path_list(acc->acc_phclient_path_list_head);
8694 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
8695 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
8696 kmem_free(acc, sizeof (*acc));
8697 }
8698
8699 /*
8700 * Sort vhcache pathinfos (cpis) of the specified client.
8701 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
8702 * flag set come at the beginning of the list. All cpis which have this
8703 * flag set come at the end of the list.
8704 */
8705 static void
sort_vhcache_paths(mdi_vhcache_client_t * cct)8706 sort_vhcache_paths(mdi_vhcache_client_t *cct)
8707 {
8708 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
8709
8710 cpi_head = cct->cct_cpi_head;
8711 cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8712 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8713 cpi_next = cpi->cpi_next;
8714 enqueue_vhcache_pathinfo(cct, cpi);
8715 }
8716 }
8717
8718 /*
8719 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
8720 * every vhcache pathinfo of the specified client. If not adjust the flag
8721 * setting appropriately.
8722 *
8723 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
8724 * on-disk vhci cache. So every time this flag is updated the cache must be
8725 * flushed.
8726 */
8727 static void
adjust_sort_vhcache_paths(mdi_vhci_config_t * vhc,char * ct_name,char * ct_addr,mdi_vhcache_lookup_token_t * tok)8728 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8729 mdi_vhcache_lookup_token_t *tok)
8730 {
8731 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8732 mdi_vhcache_client_t *cct;
8733 mdi_vhcache_pathinfo_t *cpi;
8734
8735 rw_enter(&vhcache->vhcache_lock, RW_READER);
8736 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
8737 == NULL) {
8738 rw_exit(&vhcache->vhcache_lock);
8739 return;
8740 }
8741
8742 /*
8743 * to avoid unnecessary on-disk cache updates, first check if an
8744 * update is really needed. If no update is needed simply return.
8745 */
8746 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8747 if ((cpi->cpi_pip != NULL &&
8748 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
8749 (cpi->cpi_pip == NULL &&
8750 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
8751 break;
8752 }
8753 }
8754 if (cpi == NULL) {
8755 rw_exit(&vhcache->vhcache_lock);
8756 return;
8757 }
8758
8759 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
8760 rw_exit(&vhcache->vhcache_lock);
8761 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8762 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
8763 tok)) == NULL) {
8764 rw_exit(&vhcache->vhcache_lock);
8765 return;
8766 }
8767 }
8768
8769 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8770 if (cpi->cpi_pip != NULL)
8771 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8772 else
8773 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8774 }
8775 sort_vhcache_paths(cct);
8776
8777 rw_exit(&vhcache->vhcache_lock);
8778 vhcache_dirty(vhc);
8779 }
8780
8781 /*
8782 * Configure all specified paths of the client.
8783 */
8784 static void
config_client_paths_sync(mdi_vhci_config_t * vhc,char * ct_name,char * ct_addr,mdi_phys_path_t * pp_head,mdi_vhcache_lookup_token_t * tok)8785 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8786 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8787 {
8788 mdi_phys_path_t *pp;
8789
8790 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
8791 (void) bus_config_one_phci_child(pp->phys_path);
8792 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
8793 }
8794
8795 /*
8796 * Dequeue elements from vhci async client config list and bus configure
8797 * their corresponding phci clients.
8798 */
8799 static void
config_client_paths_thread(void * arg)8800 config_client_paths_thread(void *arg)
8801 {
8802 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8803 mdi_async_client_config_t *acc;
8804 clock_t quit_at_ticks;
8805 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
8806 callb_cpr_t cprinfo;
8807
8808 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8809 "mdi_config_client_paths");
8810
8811 for (; ; ) {
8812 quit_at_ticks = ddi_get_lbolt() + idle_time;
8813
8814 mutex_enter(&vhc->vhc_lock);
8815 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8816 vhc->vhc_acc_list_head == NULL &&
8817 ddi_get_lbolt() < quit_at_ticks) {
8818 CALLB_CPR_SAFE_BEGIN(&cprinfo);
8819 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8820 quit_at_ticks);
8821 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8822 }
8823
8824 if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8825 vhc->vhc_acc_list_head == NULL)
8826 goto out;
8827
8828 acc = vhc->vhc_acc_list_head;
8829 vhc->vhc_acc_list_head = acc->acc_next;
8830 if (vhc->vhc_acc_list_head == NULL)
8831 vhc->vhc_acc_list_tail = NULL;
8832 vhc->vhc_acc_count--;
8833 mutex_exit(&vhc->vhc_lock);
8834
8835 config_client_paths_sync(vhc, acc->acc_ct_name,
8836 acc->acc_ct_addr, acc->acc_phclient_path_list_head,
8837 &acc->acc_token);
8838
8839 free_async_client_config(acc);
8840 }
8841
8842 out:
8843 vhc->vhc_acc_thrcount--;
8844 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8845 CALLB_CPR_EXIT(&cprinfo);
8846 }
8847
8848 /*
8849 * Arrange for all the phci client paths (pp_head) for the specified client
8850 * to be bus configured asynchronously by a thread.
8851 */
8852 static void
config_client_paths_async(mdi_vhci_config_t * vhc,char * ct_name,char * ct_addr,mdi_phys_path_t * pp_head,mdi_vhcache_lookup_token_t * tok)8853 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8854 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8855 {
8856 mdi_async_client_config_t *acc, *newacc;
8857 int create_thread;
8858
8859 if (pp_head == NULL)
8860 return;
8861
8862 if (mdi_mtc_off) {
8863 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
8864 free_phclient_path_list(pp_head);
8865 return;
8866 }
8867
8868 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
8869 ASSERT(newacc);
8870
8871 mutex_enter(&vhc->vhc_lock);
8872 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
8873 if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
8874 strcmp(ct_addr, acc->acc_ct_addr) == 0) {
8875 free_async_client_config(newacc);
8876 mutex_exit(&vhc->vhc_lock);
8877 return;
8878 }
8879 }
8880
8881 if (vhc->vhc_acc_list_head == NULL)
8882 vhc->vhc_acc_list_head = newacc;
8883 else
8884 vhc->vhc_acc_list_tail->acc_next = newacc;
8885 vhc->vhc_acc_list_tail = newacc;
8886 vhc->vhc_acc_count++;
8887 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
8888 cv_broadcast(&vhc->vhc_cv);
8889 create_thread = 0;
8890 } else {
8891 vhc->vhc_acc_thrcount++;
8892 create_thread = 1;
8893 }
8894 mutex_exit(&vhc->vhc_lock);
8895
8896 if (create_thread)
8897 (void) thread_create(NULL, 0, config_client_paths_thread, vhc,
8898 0, &p0, TS_RUN, minclsyspri);
8899 }
8900
8901 /*
8902 * Return number of online paths for the specified client.
8903 */
8904 static int
nonline_paths(mdi_vhcache_client_t * cct)8905 nonline_paths(mdi_vhcache_client_t *cct)
8906 {
8907 mdi_vhcache_pathinfo_t *cpi;
8908 int online_count = 0;
8909
8910 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8911 if (cpi->cpi_pip != NULL) {
8912 MDI_PI_LOCK(cpi->cpi_pip);
8913 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
8914 online_count++;
8915 MDI_PI_UNLOCK(cpi->cpi_pip);
8916 }
8917 }
8918
8919 return (online_count);
8920 }
8921
8922 /*
8923 * Bus configure all paths for the specified vhci client.
8924 * If at least one path for the client is already online, the remaining paths
8925 * will be configured asynchronously. Otherwise, it synchronously configures
8926 * the paths until at least one path is online and then rest of the paths
8927 * will be configured asynchronously.
8928 */
8929 static void
config_client_paths(mdi_vhci_config_t * vhc,char * ct_name,char * ct_addr)8930 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
8931 {
8932 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8933 mdi_phys_path_t *pp_head, *pp;
8934 mdi_vhcache_client_t *cct;
8935 mdi_vhcache_lookup_token_t tok;
8936
8937 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8938
8939 init_vhcache_lookup_token(&tok, NULL);
8940
8941 if (ct_name == NULL || ct_addr == NULL ||
8942 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8943 == NULL ||
8944 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8945 rw_exit(&vhcache->vhcache_lock);
8946 return;
8947 }
8948
8949 /* if at least one path is online, configure the rest asynchronously */
8950 if (nonline_paths(cct) > 0) {
8951 rw_exit(&vhcache->vhcache_lock);
8952 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8953 return;
8954 }
8955
8956 rw_exit(&vhcache->vhcache_lock);
8957
8958 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8959 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8960 rw_enter(&vhcache->vhcache_lock, RW_READER);
8961
8962 if ((cct = lookup_vhcache_client(vhcache, ct_name,
8963 ct_addr, &tok)) == NULL) {
8964 rw_exit(&vhcache->vhcache_lock);
8965 goto out;
8966 }
8967
8968 if (nonline_paths(cct) > 0 &&
8969 pp->phys_path_next != NULL) {
8970 rw_exit(&vhcache->vhcache_lock);
8971 config_client_paths_async(vhc, ct_name, ct_addr,
8972 pp->phys_path_next, &tok);
8973 pp->phys_path_next = NULL;
8974 goto out;
8975 }
8976
8977 rw_exit(&vhcache->vhcache_lock);
8978 }
8979 }
8980
8981 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8982 out:
8983 free_phclient_path_list(pp_head);
8984 }
8985
8986 static void
single_threaded_vhconfig_enter(mdi_vhci_config_t * vhc)8987 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8988 {
8989 mutex_enter(&vhc->vhc_lock);
8990 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8991 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8992 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8993 mutex_exit(&vhc->vhc_lock);
8994 }
8995
8996 static void
single_threaded_vhconfig_exit(mdi_vhci_config_t * vhc)8997 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8998 {
8999 mutex_enter(&vhc->vhc_lock);
9000 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
9001 cv_broadcast(&vhc->vhc_cv);
9002 mutex_exit(&vhc->vhc_lock);
9003 }
9004
9005 typedef struct mdi_phci_driver_info {
9006 char *phdriver_name; /* name of the phci driver */
9007
9008 /* set to non zero if the phci driver supports root device */
9009 int phdriver_root_support;
9010 } mdi_phci_driver_info_t;
9011
9012 /*
9013 * vhci class and root support capability of a phci driver can be
9014 * specified using ddi-vhci-class and ddi-no-root-support properties in the
9015 * phci driver.conf file. The built-in tables below contain this information
9016 * for those phci drivers whose driver.conf files don't yet contain this info.
9017 *
9018 * All phci drivers expect iscsi have root device support.
9019 */
9020 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
9021 { "fp", 1 },
9022 { "iscsi", 0 },
9023 { "ibsrp", 1 }
9024 };
9025
9026 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
9027
9028 static void *
mdi_realloc(void * old_ptr,size_t old_size,size_t new_size)9029 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size)
9030 {
9031 void *new_ptr;
9032
9033 new_ptr = kmem_zalloc(new_size, KM_SLEEP);
9034 if (old_ptr) {
9035 bcopy(old_ptr, new_ptr, MIN(old_size, new_size));
9036 kmem_free(old_ptr, old_size);
9037 }
9038 return (new_ptr);
9039 }
9040
9041 static void
add_to_phci_list(char *** driver_list,int ** root_support_list,int * cur_elements,int * max_elements,char * driver_name,int root_support)9042 add_to_phci_list(char ***driver_list, int **root_support_list,
9043 int *cur_elements, int *max_elements, char *driver_name, int root_support)
9044 {
9045 ASSERT(*cur_elements <= *max_elements);
9046 if (*cur_elements == *max_elements) {
9047 *max_elements += 10;
9048 *driver_list = mdi_realloc(*driver_list,
9049 sizeof (char *) * (*cur_elements),
9050 sizeof (char *) * (*max_elements));
9051 *root_support_list = mdi_realloc(*root_support_list,
9052 sizeof (int) * (*cur_elements),
9053 sizeof (int) * (*max_elements));
9054 }
9055 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP);
9056 (*root_support_list)[*cur_elements] = root_support;
9057 (*cur_elements)++;
9058 }
9059
9060 static void
get_phci_driver_list(char * vhci_class,char *** driver_list,int ** root_support_list,int * cur_elements,int * max_elements)9061 get_phci_driver_list(char *vhci_class, char ***driver_list,
9062 int **root_support_list, int *cur_elements, int *max_elements)
9063 {
9064 mdi_phci_driver_info_t *st_driver_list, *p;
9065 int st_ndrivers, root_support, i, j, driver_conf_count;
9066 major_t m;
9067 struct devnames *dnp;
9068 ddi_prop_t *propp;
9069
9070 *driver_list = NULL;
9071 *root_support_list = NULL;
9072 *cur_elements = 0;
9073 *max_elements = 0;
9074
9075 /* add the phci drivers derived from the phci driver.conf files */
9076 for (m = 0; m < devcnt; m++) {
9077 dnp = &devnamesp[m];
9078
9079 if (dnp->dn_flags & DN_PHCI_DRIVER) {
9080 LOCK_DEV_OPS(&dnp->dn_lock);
9081 if (dnp->dn_global_prop_ptr != NULL &&
9082 (propp = i_ddi_prop_search(DDI_DEV_T_ANY,
9083 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING,
9084 &dnp->dn_global_prop_ptr->prop_list)) != NULL &&
9085 strcmp(propp->prop_val, vhci_class) == 0) {
9086
9087 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY,
9088 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT,
9089 &dnp->dn_global_prop_ptr->prop_list)
9090 == NULL) ? 1 : 0;
9091
9092 add_to_phci_list(driver_list, root_support_list,
9093 cur_elements, max_elements, dnp->dn_name,
9094 root_support);
9095
9096 UNLOCK_DEV_OPS(&dnp->dn_lock);
9097 } else
9098 UNLOCK_DEV_OPS(&dnp->dn_lock);
9099 }
9100 }
9101
9102 driver_conf_count = *cur_elements;
9103
9104 /* add the phci drivers specified in the built-in tables */
9105 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) {
9106 st_driver_list = scsi_phci_driver_list;
9107 st_ndrivers = sizeof (scsi_phci_driver_list) /
9108 sizeof (mdi_phci_driver_info_t);
9109 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) {
9110 st_driver_list = ib_phci_driver_list;
9111 st_ndrivers = sizeof (ib_phci_driver_list) /
9112 sizeof (mdi_phci_driver_info_t);
9113 } else {
9114 st_driver_list = NULL;
9115 st_ndrivers = 0;
9116 }
9117
9118 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) {
9119 /* add this phci driver if not already added before */
9120 for (j = 0; j < driver_conf_count; j++) {
9121 if (strcmp((*driver_list)[j], p->phdriver_name) == 0)
9122 break;
9123 }
9124 if (j == driver_conf_count) {
9125 add_to_phci_list(driver_list, root_support_list,
9126 cur_elements, max_elements, p->phdriver_name,
9127 p->phdriver_root_support);
9128 }
9129 }
9130 }
9131
9132 /*
9133 * Attach the phci driver instances associated with the specified vhci class.
9134 * If root is mounted attach all phci driver instances.
9135 * If root is not mounted, attach the instances of only those phci
9136 * drivers that have the root support.
9137 */
9138 static void
attach_phci_drivers(char * vhci_class)9139 attach_phci_drivers(char *vhci_class)
9140 {
9141 char **driver_list, **p;
9142 int *root_support_list;
9143 int cur_elements, max_elements, i;
9144 major_t m;
9145
9146 get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9147 &cur_elements, &max_elements);
9148
9149 for (i = 0; i < cur_elements; i++) {
9150 if (modrootloaded || root_support_list[i]) {
9151 m = ddi_name_to_major(driver_list[i]);
9152 if (m != DDI_MAJOR_T_NONE &&
9153 ddi_hold_installed_driver(m))
9154 ddi_rele_driver(m);
9155 }
9156 }
9157
9158 if (driver_list) {
9159 for (i = 0, p = driver_list; i < cur_elements; i++, p++)
9160 kmem_free(*p, strlen(*p) + 1);
9161 kmem_free(driver_list, sizeof (char *) * max_elements);
9162 kmem_free(root_support_list, sizeof (int) * max_elements);
9163 }
9164 }
9165
9166 /*
9167 * Build vhci cache:
9168 *
9169 * Attach phci driver instances and then drive BUS_CONFIG_ALL on
9170 * the phci driver instances. During this process the cache gets built.
9171 *
9172 * Cache is built fully if the root is mounted.
9173 * If the root is not mounted, phci drivers that do not have root support
9174 * are not attached. As a result the cache is built partially. The entries
9175 * in the cache reflect only those phci drivers that have root support.
9176 */
9177 static int
build_vhci_cache(mdi_vhci_t * vh)9178 build_vhci_cache(mdi_vhci_t *vh)
9179 {
9180 mdi_vhci_config_t *vhc = vh->vh_config;
9181 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9182
9183 single_threaded_vhconfig_enter(vhc);
9184
9185 rw_enter(&vhcache->vhcache_lock, RW_READER);
9186 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
9187 rw_exit(&vhcache->vhcache_lock);
9188 single_threaded_vhconfig_exit(vhc);
9189 return (0);
9190 }
9191 rw_exit(&vhcache->vhcache_lock);
9192
9193 attach_phci_drivers(vh->vh_class);
9194 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
9195 BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9196
9197 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9198 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
9199 rw_exit(&vhcache->vhcache_lock);
9200
9201 single_threaded_vhconfig_exit(vhc);
9202 vhcache_dirty(vhc);
9203 return (1);
9204 }
9205
9206 /*
9207 * Determine if discovery of paths is needed.
9208 */
9209 static int
vhcache_do_discovery(mdi_vhci_config_t * vhc)9210 vhcache_do_discovery(mdi_vhci_config_t *vhc)
9211 {
9212 int rv = 1;
9213
9214 mutex_enter(&vhc->vhc_lock);
9215 if (i_ddi_io_initialized() == 0) {
9216 if (vhc->vhc_path_discovery_boot > 0) {
9217 vhc->vhc_path_discovery_boot--;
9218 goto out;
9219 }
9220 } else {
9221 if (vhc->vhc_path_discovery_postboot > 0) {
9222 vhc->vhc_path_discovery_postboot--;
9223 goto out;
9224 }
9225 }
9226
9227 /*
9228 * Do full path discovery at most once per mdi_path_discovery_interval.
9229 * This is to avoid a series of full path discoveries when opening
9230 * stale /dev/[r]dsk links.
9231 */
9232 if (mdi_path_discovery_interval != -1 &&
9233 ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time)
9234 goto out;
9235
9236 rv = 0;
9237 out:
9238 mutex_exit(&vhc->vhc_lock);
9239 return (rv);
9240 }
9241
9242 /*
9243 * Discover all paths:
9244 *
9245 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
9246 * driver instances. During this process all paths will be discovered.
9247 */
9248 static int
vhcache_discover_paths(mdi_vhci_t * vh)9249 vhcache_discover_paths(mdi_vhci_t *vh)
9250 {
9251 mdi_vhci_config_t *vhc = vh->vh_config;
9252 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9253 int rv = 0;
9254
9255 single_threaded_vhconfig_enter(vhc);
9256
9257 if (vhcache_do_discovery(vhc)) {
9258 attach_phci_drivers(vh->vh_class);
9259 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
9260 NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9261
9262 mutex_enter(&vhc->vhc_lock);
9263 vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() +
9264 mdi_path_discovery_interval * TICKS_PER_SECOND;
9265 mutex_exit(&vhc->vhc_lock);
9266 rv = 1;
9267 }
9268
9269 single_threaded_vhconfig_exit(vhc);
9270 return (rv);
9271 }
9272
9273 /*
9274 * Generic vhci bus config implementation:
9275 *
9276 * Parameters
9277 * vdip vhci dip
9278 * flags bus config flags
9279 * op bus config operation
9280 * The remaining parameters are bus config operation specific
9281 *
9282 * for BUS_CONFIG_ONE
9283 * arg pointer to name@addr
9284 * child upon successful return from this function, *child will be
9285 * set to the configured and held devinfo child node of vdip.
9286 * ct_addr pointer to client address (i.e. GUID)
9287 *
9288 * for BUS_CONFIG_DRIVER
9289 * arg major number of the driver
9290 * child and ct_addr parameters are ignored
9291 *
9292 * for BUS_CONFIG_ALL
9293 * arg, child, and ct_addr parameters are ignored
9294 *
9295 * Note that for the rest of the bus config operations, this function simply
9296 * calls the framework provided default bus config routine.
9297 */
9298 int
mdi_vhci_bus_config(dev_info_t * vdip,uint_t flags,ddi_bus_config_op_t op,void * arg,dev_info_t ** child,char * ct_addr)9299 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
9300 void *arg, dev_info_t **child, char *ct_addr)
9301 {
9302 mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9303 mdi_vhci_config_t *vhc = vh->vh_config;
9304 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9305 int rv = 0;
9306 int params_valid = 0;
9307 char *cp;
9308
9309 /*
9310 * To bus config vhcis we relay operation, possibly using another
9311 * thread, to phcis. The phci driver then interacts with MDI to cause
9312 * vhci child nodes to be enumerated under the vhci node. Adding a
9313 * vhci child requires an ndi_devi_enter of the vhci. Since another
9314 * thread may be adding the child, to avoid deadlock we can't wait
9315 * for the relayed operations to complete if we have already entered
9316 * the vhci node.
9317 */
9318 if (DEVI_BUSY_OWNED(vdip)) {
9319 MDI_DEBUG(2, (MDI_NOTE, vdip,
9320 "vhci dip is busy owned %p", (void *)vdip));
9321 goto default_bus_config;
9322 }
9323
9324 rw_enter(&vhcache->vhcache_lock, RW_READER);
9325 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
9326 rw_exit(&vhcache->vhcache_lock);
9327 rv = build_vhci_cache(vh);
9328 rw_enter(&vhcache->vhcache_lock, RW_READER);
9329 }
9330
9331 switch (op) {
9332 case BUS_CONFIG_ONE:
9333 if (arg != NULL && ct_addr != NULL) {
9334 /* extract node name */
9335 cp = (char *)arg;
9336 while (*cp != '\0' && *cp != '@')
9337 cp++;
9338 if (*cp == '@') {
9339 params_valid = 1;
9340 *cp = '\0';
9341 config_client_paths(vhc, (char *)arg, ct_addr);
9342 /* config_client_paths() releases cache_lock */
9343 *cp = '@';
9344 break;
9345 }
9346 }
9347
9348 rw_exit(&vhcache->vhcache_lock);
9349 break;
9350
9351 case BUS_CONFIG_DRIVER:
9352 rw_exit(&vhcache->vhcache_lock);
9353 if (rv == 0)
9354 st_bus_config_all_phcis(vhc, flags, op,
9355 (major_t)(uintptr_t)arg);
9356 break;
9357
9358 case BUS_CONFIG_ALL:
9359 rw_exit(&vhcache->vhcache_lock);
9360 if (rv == 0)
9361 st_bus_config_all_phcis(vhc, flags, op, -1);
9362 break;
9363
9364 default:
9365 rw_exit(&vhcache->vhcache_lock);
9366 break;
9367 }
9368
9369
9370 default_bus_config:
9371 /*
9372 * All requested child nodes are enumerated under the vhci.
9373 * Now configure them.
9374 */
9375 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9376 NDI_SUCCESS) {
9377 return (MDI_SUCCESS);
9378 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
9379 /* discover all paths and try configuring again */
9380 if (vhcache_discover_paths(vh) &&
9381 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9382 NDI_SUCCESS)
9383 return (MDI_SUCCESS);
9384 }
9385
9386 return (MDI_FAILURE);
9387 }
9388
9389 /*
9390 * Read the on-disk vhci cache into an nvlist for the specified vhci class.
9391 */
9392 static nvlist_t *
read_on_disk_vhci_cache(char * vhci_class)9393 read_on_disk_vhci_cache(char *vhci_class)
9394 {
9395 nvlist_t *nvl;
9396 int err;
9397 char *filename;
9398
9399 filename = vhclass2vhcache_filename(vhci_class);
9400
9401 if ((err = fread_nvlist(filename, &nvl)) == 0) {
9402 kmem_free(filename, strlen(filename) + 1);
9403 return (nvl);
9404 } else if (err == EIO)
9405 cmn_err(CE_WARN, "%s: I/O error, will recreate", filename);
9406 else if (err == EINVAL)
9407 cmn_err(CE_WARN,
9408 "%s: data file corrupted, will recreate", filename);
9409
9410 kmem_free(filename, strlen(filename) + 1);
9411 return (NULL);
9412 }
9413
9414 /*
9415 * Read on-disk vhci cache into nvlists for all vhci classes.
9416 * Called during booting by i_ddi_read_devices_files().
9417 */
9418 void
mdi_read_devices_files(void)9419 mdi_read_devices_files(void)
9420 {
9421 int i;
9422
9423 for (i = 0; i < N_VHCI_CLASSES; i++)
9424 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
9425 }
9426
9427 /*
9428 * Remove all stale entries from vhci cache.
9429 */
9430 static void
clean_vhcache(mdi_vhci_config_t * vhc)9431 clean_vhcache(mdi_vhci_config_t *vhc)
9432 {
9433 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9434 mdi_vhcache_phci_t *phci, *nxt_phci;
9435 mdi_vhcache_client_t *client, *nxt_client;
9436 mdi_vhcache_pathinfo_t *path, *nxt_path;
9437
9438 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9439
9440 client = vhcache->vhcache_client_head;
9441 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
9442 for ( ; client != NULL; client = nxt_client) {
9443 nxt_client = client->cct_next;
9444
9445 path = client->cct_cpi_head;
9446 client->cct_cpi_head = client->cct_cpi_tail = NULL;
9447 for ( ; path != NULL; path = nxt_path) {
9448 nxt_path = path->cpi_next;
9449 if ((path->cpi_cphci->cphci_phci != NULL) &&
9450 (path->cpi_pip != NULL)) {
9451 enqueue_tail_vhcache_pathinfo(client, path);
9452 } else if (path->cpi_pip != NULL) {
9453 /* Not valid to have a path without a phci. */
9454 free_vhcache_pathinfo(path);
9455 }
9456 }
9457
9458 if (client->cct_cpi_head != NULL)
9459 enqueue_vhcache_client(vhcache, client);
9460 else {
9461 (void) mod_hash_destroy(vhcache->vhcache_client_hash,
9462 (mod_hash_key_t)client->cct_name_addr);
9463 free_vhcache_client(client);
9464 }
9465 }
9466
9467 phci = vhcache->vhcache_phci_head;
9468 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
9469 for ( ; phci != NULL; phci = nxt_phci) {
9470
9471 nxt_phci = phci->cphci_next;
9472 if (phci->cphci_phci != NULL)
9473 enqueue_vhcache_phci(vhcache, phci);
9474 else
9475 free_vhcache_phci(phci);
9476 }
9477
9478 vhcache->vhcache_clean_time = ddi_get_lbolt64();
9479 rw_exit(&vhcache->vhcache_lock);
9480 vhcache_dirty(vhc);
9481 }
9482
9483 /*
9484 * Remove all stale entries from vhci cache.
9485 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
9486 */
9487 void
mdi_clean_vhcache(void)9488 mdi_clean_vhcache(void)
9489 {
9490 mdi_vhci_t *vh;
9491
9492 mutex_enter(&mdi_mutex);
9493 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9494 vh->vh_refcnt++;
9495 mutex_exit(&mdi_mutex);
9496 clean_vhcache(vh->vh_config);
9497 mutex_enter(&mdi_mutex);
9498 vh->vh_refcnt--;
9499 }
9500 mutex_exit(&mdi_mutex);
9501 }
9502
9503 /*
9504 * mdi_vhci_walk_clients():
9505 * Walker routine to traverse client dev_info nodes
9506 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
9507 * below the client, including nexus devices, which we dont want.
9508 * So we just traverse the immediate siblings, starting from 1st client.
9509 */
9510 void
mdi_vhci_walk_clients(dev_info_t * vdip,int (* f)(dev_info_t *,void *),void * arg)9511 mdi_vhci_walk_clients(dev_info_t *vdip,
9512 int (*f)(dev_info_t *, void *), void *arg)
9513 {
9514 mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9515 dev_info_t *cdip;
9516 mdi_client_t *ct;
9517
9518 MDI_VHCI_CLIENT_LOCK(vh);
9519 cdip = ddi_get_child(vdip);
9520 while (cdip) {
9521 ct = i_devi_get_client(cdip);
9522 MDI_CLIENT_LOCK(ct);
9523
9524 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE)
9525 cdip = ddi_get_next_sibling(cdip);
9526 else
9527 cdip = NULL;
9528
9529 MDI_CLIENT_UNLOCK(ct);
9530 }
9531 MDI_VHCI_CLIENT_UNLOCK(vh);
9532 }
9533
9534 /*
9535 * mdi_vhci_walk_phcis():
9536 * Walker routine to traverse phci dev_info nodes
9537 */
9538 void
mdi_vhci_walk_phcis(dev_info_t * vdip,int (* f)(dev_info_t *,void *),void * arg)9539 mdi_vhci_walk_phcis(dev_info_t *vdip,
9540 int (*f)(dev_info_t *, void *), void *arg)
9541 {
9542 mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9543 mdi_phci_t *ph, *next;
9544
9545 MDI_VHCI_PHCI_LOCK(vh);
9546 ph = vh->vh_phci_head;
9547 while (ph) {
9548 MDI_PHCI_LOCK(ph);
9549
9550 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE)
9551 next = ph->ph_next;
9552 else
9553 next = NULL;
9554
9555 MDI_PHCI_UNLOCK(ph);
9556 ph = next;
9557 }
9558 MDI_VHCI_PHCI_UNLOCK(vh);
9559 }
9560
9561
9562 /*
9563 * mdi_walk_vhcis():
9564 * Walker routine to traverse vhci dev_info nodes
9565 */
9566 void
mdi_walk_vhcis(int (* f)(dev_info_t *,void *),void * arg)9567 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
9568 {
9569 mdi_vhci_t *vh = NULL;
9570
9571 mutex_enter(&mdi_mutex);
9572 /*
9573 * Scan for already registered vhci
9574 */
9575 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9576 vh->vh_refcnt++;
9577 mutex_exit(&mdi_mutex);
9578 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
9579 mutex_enter(&mdi_mutex);
9580 vh->vh_refcnt--;
9581 break;
9582 } else {
9583 mutex_enter(&mdi_mutex);
9584 vh->vh_refcnt--;
9585 }
9586 }
9587
9588 mutex_exit(&mdi_mutex);
9589 }
9590
9591 /*
9592 * i_mdi_log_sysevent():
9593 * Logs events for pickup by syseventd
9594 */
9595 static void
i_mdi_log_sysevent(dev_info_t * dip,char * ph_vh_class,char * subclass)9596 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
9597 {
9598 char *path_name;
9599 nvlist_t *attr_list;
9600
9601 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
9602 KM_SLEEP) != DDI_SUCCESS) {
9603 goto alloc_failed;
9604 }
9605
9606 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
9607 (void) ddi_pathname(dip, path_name);
9608
9609 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
9610 ddi_driver_name(dip)) != DDI_SUCCESS) {
9611 goto error;
9612 }
9613
9614 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
9615 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
9616 goto error;
9617 }
9618
9619 if (nvlist_add_int32(attr_list, DDI_INSTANCE,
9620 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
9621 goto error;
9622 }
9623
9624 if (nvlist_add_string(attr_list, DDI_PATHNAME,
9625 path_name) != DDI_SUCCESS) {
9626 goto error;
9627 }
9628
9629 if (nvlist_add_string(attr_list, DDI_CLASS,
9630 ph_vh_class) != DDI_SUCCESS) {
9631 goto error;
9632 }
9633
9634 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
9635 attr_list, NULL, DDI_SLEEP);
9636
9637 error:
9638 kmem_free(path_name, MAXPATHLEN);
9639 nvlist_free(attr_list);
9640 return;
9641
9642 alloc_failed:
9643 MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent"));
9644 }
9645
9646 char **
mdi_get_phci_driver_list(char * vhci_class,int * ndrivers)9647 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers)
9648 {
9649 char **driver_list, **ret_driver_list = NULL;
9650 int *root_support_list;
9651 int cur_elements, max_elements;
9652
9653 get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9654 &cur_elements, &max_elements);
9655
9656
9657 if (driver_list) {
9658 kmem_free(root_support_list, sizeof (int) * max_elements);
9659 ret_driver_list = mdi_realloc(driver_list, sizeof (char *)
9660 * max_elements, sizeof (char *) * cur_elements);
9661 }
9662 *ndrivers = cur_elements;
9663
9664 return (ret_driver_list);
9665
9666 }
9667
9668 void
mdi_free_phci_driver_list(char ** driver_list,int ndrivers)9669 mdi_free_phci_driver_list(char **driver_list, int ndrivers)
9670 {
9671 char **p;
9672 int i;
9673
9674 if (driver_list) {
9675 for (i = 0, p = driver_list; i < ndrivers; i++, p++)
9676 kmem_free(*p, strlen(*p) + 1);
9677 kmem_free(driver_list, sizeof (char *) * ndrivers);
9678 }
9679 }
9680
9681 /*
9682 * mdi_is_dev_supported():
9683 * function called by pHCI bus config operation to determine if a
9684 * device should be represented as a child of the vHCI or the
9685 * pHCI. This decision is made by the vHCI, using cinfo idenity
9686 * information passed by the pHCI - specifics of the cinfo
9687 * representation are by agreement between the pHCI and vHCI.
9688 * Return Values:
9689 * MDI_SUCCESS
9690 * MDI_FAILURE
9691 */
9692 int
mdi_is_dev_supported(char * class,dev_info_t * pdip,void * cinfo)9693 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo)
9694 {
9695 mdi_vhci_t *vh;
9696
9697 ASSERT(class && pdip);
9698
9699 /*
9700 * For dev_supported, mdi_phci_register() must have established pdip as
9701 * a pHCI.
9702 *
9703 * NOTE: mdi_phci_register() does "mpxio-disable" processing, and
9704 * MDI_PHCI(pdip) will return false if mpxio is disabled.
9705 */
9706 if (!MDI_PHCI(pdip))
9707 return (MDI_FAILURE);
9708
9709 /* Return MDI_FAILURE if vHCI does not support asking the question. */
9710 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
9711 if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) {
9712 return (MDI_FAILURE);
9713 }
9714
9715 /* Return vHCI answer */
9716 return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo));
9717 }
9718
9719 int
mdi_dc_return_dev_state(mdi_pathinfo_t * pip,struct devctl_iocdata * dcp)9720 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp)
9721 {
9722 uint_t devstate = 0;
9723 dev_info_t *cdip;
9724
9725 if ((pip == NULL) || (dcp == NULL))
9726 return (MDI_FAILURE);
9727
9728 cdip = mdi_pi_get_client(pip);
9729
9730 switch (mdi_pi_get_state(pip)) {
9731 case MDI_PATHINFO_STATE_INIT:
9732 devstate = DEVICE_DOWN;
9733 break;
9734 case MDI_PATHINFO_STATE_ONLINE:
9735 devstate = DEVICE_ONLINE;
9736 if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED))
9737 devstate |= DEVICE_BUSY;
9738 break;
9739 case MDI_PATHINFO_STATE_STANDBY:
9740 devstate = DEVICE_ONLINE;
9741 break;
9742 case MDI_PATHINFO_STATE_FAULT:
9743 devstate = DEVICE_DOWN;
9744 break;
9745 case MDI_PATHINFO_STATE_OFFLINE:
9746 devstate = DEVICE_OFFLINE;
9747 break;
9748 default:
9749 ASSERT(MDI_PI(pip)->pi_state);
9750 }
9751
9752 if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0)
9753 return (MDI_FAILURE);
9754
9755 return (MDI_SUCCESS);
9756 }
9757