1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2014 Nexenta Systems Inc. All rights reserved.
24 * Copyright (c) 2018, Joyent, Inc.
25 * Copyright 2023 Oxide Computer Company
26 */
27
28 /*
29 * Multipath driver interface (MDI) implementation; see mdi_impldefs.h for a
30 * more detailed discussion of the overall mpxio architecture.
31 *
32 * Default locking order:
33 *
34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex);
35 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex);
36 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex);
37 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex);
38 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
39 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
40 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
41 */
42
43 #include <sys/note.h>
44 #include <sys/types.h>
45 #include <sys/varargs.h>
46 #include <sys/param.h>
47 #include <sys/errno.h>
48 #include <sys/uio.h>
49 #include <sys/buf.h>
50 #include <sys/modctl.h>
51 #include <sys/open.h>
52 #include <sys/kmem.h>
53 #include <sys/poll.h>
54 #include <sys/conf.h>
55 #include <sys/bootconf.h>
56 #include <sys/cmn_err.h>
57 #include <sys/stat.h>
58 #include <sys/ddi.h>
59 #include <sys/sunddi.h>
60 #include <sys/ddipropdefs.h>
61 #include <sys/sunndi.h>
62 #include <sys/ndi_impldefs.h>
63 #include <sys/promif.h>
64 #include <sys/sunmdi.h>
65 #include <sys/mdi_impldefs.h>
66 #include <sys/taskq.h>
67 #include <sys/epm.h>
68 #include <sys/sunpm.h>
69 #include <sys/modhash.h>
70 #include <sys/disp.h>
71 #include <sys/autoconf.h>
72 #include <sys/sysmacros.h>
73
74 #ifdef DEBUG
75 #include <sys/debug.h>
76 int mdi_debug = 1;
77 int mdi_debug_logonly = 0;
78 #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel)) i_mdi_log pargs
79 #define MDI_WARN CE_WARN, __func__
80 #define MDI_NOTE CE_NOTE, __func__
81 #define MDI_CONT CE_CONT, __func__
82 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...);
83 #else /* !DEBUG */
84 #define MDI_DEBUG(dbglevel, pargs)
85 #endif /* DEBUG */
86 int mdi_debug_consoleonly = 0;
87 int mdi_delay = 3;
88
89 extern pri_t minclsyspri;
90 extern int modrootloaded;
91
92 /*
93 * Global mutex:
94 * Protects vHCI list and structure members.
95 */
96 kmutex_t mdi_mutex;
97
98 /*
99 * Registered vHCI class driver lists
100 */
101 int mdi_vhci_count;
102 mdi_vhci_t *mdi_vhci_head;
103 mdi_vhci_t *mdi_vhci_tail;
104
105 /*
106 * Client Hash Table size
107 */
108 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
109
110 /*
111 * taskq interface definitions
112 */
113 #define MDI_TASKQ_N_THREADS 8
114 #define MDI_TASKQ_PRI minclsyspri
115 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads)
116 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads)
117
118 taskq_t *mdi_taskq;
119 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
120
121 #define TICKS_PER_SECOND (drv_usectohz(1000000))
122
123 /*
124 * The data should be "quiet" for this interval (in seconds) before the
125 * vhci cached data is flushed to the disk.
126 */
127 static int mdi_vhcache_flush_delay = 10;
128
129 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
130 static int mdi_vhcache_flush_daemon_idle_time = 60;
131
132 /*
133 * MDI falls back to discovery of all paths when a bus_config_one fails.
134 * The following parameters can be used to tune this operation.
135 *
136 * mdi_path_discovery_boot
137 * Number of times path discovery will be attempted during early boot.
138 * Probably there is no reason to ever set this value to greater than one.
139 *
140 * mdi_path_discovery_postboot
141 * Number of times path discovery will be attempted after early boot.
142 * Set it to a minimum of two to allow for discovery of iscsi paths which
143 * may happen very late during booting.
144 *
145 * mdi_path_discovery_interval
146 * Minimum number of seconds MDI will wait between successive discovery
147 * of all paths. Set it to -1 to disable discovery of all paths.
148 */
149 static int mdi_path_discovery_boot = 1;
150 static int mdi_path_discovery_postboot = 2;
151 static int mdi_path_discovery_interval = 10;
152
153 /*
154 * number of seconds the asynchronous configuration thread will sleep idle
155 * before exiting.
156 */
157 static int mdi_async_config_idle_time = 600;
158
159 static int mdi_bus_config_cache_hash_size = 256;
160
161 /* turns off multithreaded configuration for certain operations */
162 static int mdi_mtc_off = 0;
163
164 /*
165 * The "path" to a pathinfo node is identical to the /devices path to a
166 * devinfo node had the device been enumerated under a pHCI instead of
167 * a vHCI. This pathinfo "path" is associated with a 'path_instance'.
168 * This association persists across create/delete of the pathinfo nodes,
169 * but not across reboot.
170 */
171 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */
172 static int mdi_pathmap_hash_size = 256;
173 static kmutex_t mdi_pathmap_mutex;
174 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */
175 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */
176 static mod_hash_t *mdi_pathmap_sbyinstance; /* inst->shortpath */
177
178 /*
179 * MDI component property name/value string definitions
180 */
181 const char *mdi_component_prop = "mpxio-component";
182 const char *mdi_component_prop_vhci = "vhci";
183 const char *mdi_component_prop_phci = "phci";
184 const char *mdi_component_prop_client = "client";
185
186 /*
187 * MDI client global unique identifier property name
188 */
189 const char *mdi_client_guid_prop = "client-guid";
190
191 /*
192 * MDI client load balancing property name/value string definitions
193 */
194 const char *mdi_load_balance = "load-balance";
195 const char *mdi_load_balance_none = "none";
196 const char *mdi_load_balance_rr = "round-robin";
197 const char *mdi_load_balance_lba = "logical-block";
198
199 /*
200 * Obsolete vHCI class definition; to be removed after Leadville update
201 */
202 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
203
204 static char vhci_greeting[] =
205 "\tThere already exists one vHCI driver for class %s\n"
206 "\tOnly one vHCI driver for each class is allowed\n";
207
208 /*
209 * Static function prototypes
210 */
211 static int i_mdi_phci_offline(dev_info_t *, uint_t);
212 static int i_mdi_client_offline(dev_info_t *, uint_t);
213 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
214 static void i_mdi_phci_post_detach(dev_info_t *,
215 ddi_detach_cmd_t, int);
216 static int i_mdi_client_pre_detach(dev_info_t *,
217 ddi_detach_cmd_t);
218 static void i_mdi_client_post_detach(dev_info_t *,
219 ddi_detach_cmd_t, int);
220 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *);
221 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *);
222 static int i_mdi_lba_lb(mdi_client_t *ct,
223 mdi_pathinfo_t **ret_pip, struct buf *buf);
224 static void i_mdi_pm_hold_client(mdi_client_t *, int);
225 static void i_mdi_pm_rele_client(mdi_client_t *, int);
226 static void i_mdi_pm_reset_client(mdi_client_t *);
227 static int i_mdi_power_all_phci(mdi_client_t *);
228 static void i_mdi_log_sysevent(dev_info_t *, char *, char *);
229
230
231 /*
232 * Internal mdi_pathinfo node functions
233 */
234 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
235
236 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *);
237 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *);
238 static mdi_phci_t *i_devi_get_phci(dev_info_t *);
239 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
240 static void i_mdi_phci_unlock(mdi_phci_t *);
241 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
242 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
243 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
244 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
245 mdi_client_t *);
246 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
247 static void i_mdi_client_remove_path(mdi_client_t *,
248 mdi_pathinfo_t *);
249
250 static int i_mdi_pi_state_change(mdi_pathinfo_t *,
251 mdi_pathinfo_state_t, int);
252 static int i_mdi_pi_offline(mdi_pathinfo_t *, int);
253 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
254 char **, int);
255 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
256 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
257 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *);
258 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
259 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
260 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
261 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *);
262 static void i_mdi_client_update_state(mdi_client_t *);
263 static int i_mdi_client_compute_state(mdi_client_t *,
264 mdi_phci_t *);
265 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
266 static void i_mdi_client_unlock(mdi_client_t *);
267 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
268 static mdi_client_t *i_devi_get_client(dev_info_t *);
269 /*
270 * NOTE: this will be removed once the NWS files are changed to use the new
271 * mdi_{enable,disable}_path interfaces
272 */
273 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
274 int, int);
275 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
276 mdi_vhci_t *vh, int flags, int op);
277 /*
278 * Failover related function prototypes
279 */
280 static int i_mdi_failover(void *);
281
282 /*
283 * misc internal functions
284 */
285 static int i_mdi_get_hash_key(char *);
286 static int i_map_nvlist_error_to_mdi(int);
287 static void i_mdi_report_path_state(mdi_client_t *,
288 mdi_pathinfo_t *);
289
290 static void setup_vhci_cache(mdi_vhci_t *);
291 static int destroy_vhci_cache(mdi_vhci_t *);
292 static int stop_vhcache_async_threads(mdi_vhci_config_t *);
293 static boolean_t stop_vhcache_flush_thread(void *, int);
294 static void free_string_array(char **, int);
295 static void free_vhcache_phci(mdi_vhcache_phci_t *);
296 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
297 static void free_vhcache_client(mdi_vhcache_client_t *);
298 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
299 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *);
300 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
301 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
302 static void vhcache_pi_add(mdi_vhci_config_t *,
303 struct mdi_pathinfo *);
304 static void vhcache_pi_remove(mdi_vhci_config_t *,
305 struct mdi_pathinfo *);
306 static void free_phclient_path_list(mdi_phys_path_t *);
307 static void sort_vhcache_paths(mdi_vhcache_client_t *);
308 static int flush_vhcache(mdi_vhci_config_t *, int);
309 static void vhcache_dirty(mdi_vhci_config_t *);
310 static void free_async_client_config(mdi_async_client_config_t *);
311 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *);
312 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *);
313 static nvlist_t *read_on_disk_vhci_cache(char *);
314 extern int fread_nvlist(char *, nvlist_t **);
315 extern int fwrite_nvlist(char *, nvlist_t *);
316
317 /* called once when first vhci registers with mdi */
318 static void
i_mdi_init()319 i_mdi_init()
320 {
321 static int initialized = 0;
322
323 if (initialized)
324 return;
325 initialized = 1;
326
327 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
328
329 /* Create our taskq resources */
330 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
331 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
332 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
333 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */
334
335 /* Allocate ['path_instance' <-> "path"] maps */
336 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
337 mdi_pathmap_bypath = mod_hash_create_strhash(
338 "mdi_pathmap_bypath", mdi_pathmap_hash_size,
339 mod_hash_null_valdtor);
340 mdi_pathmap_byinstance = mod_hash_create_idhash(
341 "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
342 mod_hash_null_valdtor);
343 mdi_pathmap_sbyinstance = mod_hash_create_idhash(
344 "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size,
345 mod_hash_null_valdtor);
346 }
347
348 /*
349 * mdi_get_component_type():
350 * Return mpxio component type
351 * Return Values:
352 * MDI_COMPONENT_NONE
353 * MDI_COMPONENT_VHCI
354 * MDI_COMPONENT_PHCI
355 * MDI_COMPONENT_CLIENT
356 * XXX This doesn't work under multi-level MPxIO and should be
357 * removed when clients migrate mdi_component_is_*() interfaces.
358 */
359 int
mdi_get_component_type(dev_info_t * dip)360 mdi_get_component_type(dev_info_t *dip)
361 {
362 return (DEVI(dip)->devi_mdi_component);
363 }
364
365 /*
366 * mdi_vhci_register():
367 * Register a vHCI module with the mpxio framework
368 * mdi_vhci_register() is called by vHCI drivers to register the
369 * 'class_driver' vHCI driver and its MDI entrypoints with the
370 * mpxio framework. The vHCI driver must call this interface as
371 * part of its attach(9e) handler.
372 * Competing threads may try to attach mdi_vhci_register() as
373 * the vHCI drivers are loaded and attached as a result of pHCI
374 * driver instance registration (mdi_phci_register()) with the
375 * framework.
376 * Return Values:
377 * MDI_SUCCESS
378 * MDI_FAILURE
379 */
380 /*ARGSUSED*/
381 int
mdi_vhci_register(char * class,dev_info_t * vdip,mdi_vhci_ops_t * vops,int flags)382 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
383 int flags)
384 {
385 mdi_vhci_t *vh = NULL;
386
387 /* Registrant can't be older */
388 ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV);
389
390 #ifdef DEBUG
391 /*
392 * IB nexus driver is loaded only when IB hardware is present.
393 * In order to be able to do this there is a need to drive the loading
394 * and attaching of the IB nexus driver (especially when an IB hardware
395 * is dynamically plugged in) when an IB HCA driver (PHCI)
396 * is being attached. Unfortunately this gets into the limitations
397 * of devfs as there seems to be no clean way to drive configuration
398 * of a subtree from another subtree of a devfs. Hence, do not ASSERT
399 * for IB.
400 */
401 if (strcmp(class, MDI_HCI_CLASS_IB) != 0)
402 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
403 #endif
404
405 i_mdi_init();
406
407 mutex_enter(&mdi_mutex);
408 /*
409 * Scan for already registered vhci
410 */
411 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
412 if (strcmp(vh->vh_class, class) == 0) {
413 /*
414 * vHCI has already been created. Check for valid
415 * vHCI ops registration. We only support one vHCI
416 * module per class
417 */
418 if (vh->vh_ops != NULL) {
419 mutex_exit(&mdi_mutex);
420 cmn_err(CE_NOTE, vhci_greeting, class);
421 return (MDI_FAILURE);
422 }
423 break;
424 }
425 }
426
427 /*
428 * if not yet created, create the vHCI component
429 */
430 if (vh == NULL) {
431 struct client_hash *hash = NULL;
432 char *load_balance;
433
434 /*
435 * Allocate and initialize the mdi extensions
436 */
437 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
438 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
439 KM_SLEEP);
440 vh->vh_client_table = hash;
441 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
442 (void) strcpy(vh->vh_class, class);
443 vh->vh_lb = LOAD_BALANCE_RR;
444 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
445 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
446 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
447 vh->vh_lb = LOAD_BALANCE_NONE;
448 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
449 == 0) {
450 vh->vh_lb = LOAD_BALANCE_LBA;
451 }
452 ddi_prop_free(load_balance);
453 }
454
455 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
456 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
457
458 /*
459 * Store the vHCI ops vectors
460 */
461 vh->vh_dip = vdip;
462 vh->vh_ops = vops;
463
464 setup_vhci_cache(vh);
465
466 if (mdi_vhci_head == NULL) {
467 mdi_vhci_head = vh;
468 }
469 if (mdi_vhci_tail) {
470 mdi_vhci_tail->vh_next = vh;
471 }
472 mdi_vhci_tail = vh;
473 mdi_vhci_count++;
474 }
475
476 /*
477 * Claim the devfs node as a vhci component
478 */
479 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
480
481 /*
482 * Initialize our back reference from dev_info node
483 */
484 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
485 mutex_exit(&mdi_mutex);
486 return (MDI_SUCCESS);
487 }
488
489 /*
490 * mdi_vhci_unregister():
491 * Unregister a vHCI module from mpxio framework
492 * mdi_vhci_unregister() is called from the detach(9E) entrypoint
493 * of a vhci to unregister it from the framework.
494 * Return Values:
495 * MDI_SUCCESS
496 * MDI_FAILURE
497 */
498 /*ARGSUSED*/
499 int
mdi_vhci_unregister(dev_info_t * vdip,int flags)500 mdi_vhci_unregister(dev_info_t *vdip, int flags)
501 {
502 mdi_vhci_t *found, *vh, *prev = NULL;
503
504 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
505
506 /*
507 * Check for invalid VHCI
508 */
509 if ((vh = i_devi_get_vhci(vdip)) == NULL)
510 return (MDI_FAILURE);
511
512 /*
513 * Scan the list of registered vHCIs for a match
514 */
515 mutex_enter(&mdi_mutex);
516 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
517 if (found == vh)
518 break;
519 prev = found;
520 }
521
522 if (found == NULL) {
523 mutex_exit(&mdi_mutex);
524 return (MDI_FAILURE);
525 }
526
527 /*
528 * Check the vHCI, pHCI and client count. All the pHCIs and clients
529 * should have been unregistered, before a vHCI can be
530 * unregistered.
531 */
532 MDI_VHCI_PHCI_LOCK(vh);
533 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
534 MDI_VHCI_PHCI_UNLOCK(vh);
535 mutex_exit(&mdi_mutex);
536 return (MDI_FAILURE);
537 }
538 MDI_VHCI_PHCI_UNLOCK(vh);
539
540 if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
541 mutex_exit(&mdi_mutex);
542 return (MDI_FAILURE);
543 }
544
545 /*
546 * Remove the vHCI from the global list
547 */
548 if (vh == mdi_vhci_head) {
549 mdi_vhci_head = vh->vh_next;
550 } else {
551 prev->vh_next = vh->vh_next;
552 }
553 if (vh == mdi_vhci_tail) {
554 mdi_vhci_tail = prev;
555 }
556 mdi_vhci_count--;
557 mutex_exit(&mdi_mutex);
558
559 vh->vh_ops = NULL;
560 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
561 DEVI(vdip)->devi_mdi_xhci = NULL;
562 kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
563 kmem_free(vh->vh_client_table,
564 mdi_client_table_size * sizeof (struct client_hash));
565 mutex_destroy(&vh->vh_phci_mutex);
566 mutex_destroy(&vh->vh_client_mutex);
567
568 kmem_free(vh, sizeof (mdi_vhci_t));
569 return (MDI_SUCCESS);
570 }
571
572 /*
573 * i_mdi_vhci_class2vhci():
574 * Look for a matching vHCI module given a vHCI class name
575 * Return Values:
576 * Handle to a vHCI component
577 * NULL
578 */
579 static mdi_vhci_t *
i_mdi_vhci_class2vhci(char * class)580 i_mdi_vhci_class2vhci(char *class)
581 {
582 mdi_vhci_t *vh = NULL;
583
584 ASSERT(!MUTEX_HELD(&mdi_mutex));
585
586 mutex_enter(&mdi_mutex);
587 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
588 if (strcmp(vh->vh_class, class) == 0) {
589 break;
590 }
591 }
592 mutex_exit(&mdi_mutex);
593 return (vh);
594 }
595
596 /*
597 * i_devi_get_vhci():
598 * Utility function to get the handle to a vHCI component
599 * Return Values:
600 * Handle to a vHCI component
601 * NULL
602 */
603 mdi_vhci_t *
i_devi_get_vhci(dev_info_t * vdip)604 i_devi_get_vhci(dev_info_t *vdip)
605 {
606 mdi_vhci_t *vh = NULL;
607 if (MDI_VHCI(vdip)) {
608 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
609 }
610 return (vh);
611 }
612
613 /*
614 * mdi_phci_register():
615 * Register a pHCI module with mpxio framework
616 * mdi_phci_register() is called by pHCI drivers to register with
617 * the mpxio framework and a specific 'class_driver' vHCI. The
618 * pHCI driver must call this interface as part of its attach(9e)
619 * handler.
620 * Return Values:
621 * MDI_SUCCESS
622 * MDI_FAILURE
623 */
624 /*ARGSUSED*/
625 int
mdi_phci_register(char * class,dev_info_t * pdip,int flags)626 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
627 {
628 mdi_phci_t *ph;
629 mdi_vhci_t *vh;
630 char *data;
631
632 /*
633 * Some subsystems, like fcp, perform pHCI registration from a
634 * different thread than the one doing the pHCI attach(9E) - the
635 * driver attach code is waiting for this other thread to complete.
636 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
637 * (indicating that some thread has done an ndi_devi_enter of parent)
638 * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
639 */
640 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
641
642 /*
643 * Check for mpxio-disable property. Enable mpxio if the property is
644 * missing or not set to "yes".
645 * If the property is set to "yes" then emit a brief message.
646 */
647 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
648 &data) == DDI_SUCCESS)) {
649 if (strcmp(data, "yes") == 0) {
650 MDI_DEBUG(1, (MDI_CONT, pdip,
651 "?multipath capabilities disabled via %s.conf.",
652 ddi_driver_name(pdip)));
653 ddi_prop_free(data);
654 return (MDI_FAILURE);
655 }
656 ddi_prop_free(data);
657 }
658
659 /*
660 * Search for a matching vHCI
661 */
662 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
663 if (vh == NULL) {
664 return (MDI_FAILURE);
665 }
666
667 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
668 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
669 ph->ph_dip = pdip;
670 ph->ph_vhci = vh;
671 ph->ph_next = NULL;
672 ph->ph_unstable = 0;
673 ph->ph_vprivate = 0;
674 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
675
676 MDI_PHCI_LOCK(ph);
677 MDI_PHCI_SET_POWER_UP(ph);
678 MDI_PHCI_UNLOCK(ph);
679 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
680 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
681
682 vhcache_phci_add(vh->vh_config, ph);
683
684 MDI_VHCI_PHCI_LOCK(vh);
685 if (vh->vh_phci_head == NULL) {
686 vh->vh_phci_head = ph;
687 }
688 if (vh->vh_phci_tail) {
689 vh->vh_phci_tail->ph_next = ph;
690 }
691 vh->vh_phci_tail = ph;
692 vh->vh_phci_count++;
693 MDI_VHCI_PHCI_UNLOCK(vh);
694
695 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
696 return (MDI_SUCCESS);
697 }
698
699 /*
700 * mdi_phci_unregister():
701 * Unregister a pHCI module from mpxio framework
702 * mdi_phci_unregister() is called by the pHCI drivers from their
703 * detach(9E) handler to unregister their instances from the
704 * framework.
705 * Return Values:
706 * MDI_SUCCESS
707 * MDI_FAILURE
708 */
709 /*ARGSUSED*/
710 int
mdi_phci_unregister(dev_info_t * pdip,int flags)711 mdi_phci_unregister(dev_info_t *pdip, int flags)
712 {
713 mdi_vhci_t *vh;
714 mdi_phci_t *ph;
715 mdi_phci_t *tmp;
716 mdi_phci_t *prev = NULL;
717 mdi_pathinfo_t *pip;
718
719 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
720
721 ph = i_devi_get_phci(pdip);
722 if (ph == NULL) {
723 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI"));
724 return (MDI_FAILURE);
725 }
726
727 vh = ph->ph_vhci;
728 ASSERT(vh != NULL);
729 if (vh == NULL) {
730 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI"));
731 return (MDI_FAILURE);
732 }
733
734 MDI_VHCI_PHCI_LOCK(vh);
735 tmp = vh->vh_phci_head;
736 while (tmp) {
737 if (tmp == ph) {
738 break;
739 }
740 prev = tmp;
741 tmp = tmp->ph_next;
742 }
743
744 if (ph == vh->vh_phci_head) {
745 vh->vh_phci_head = ph->ph_next;
746 } else {
747 prev->ph_next = ph->ph_next;
748 }
749
750 if (ph == vh->vh_phci_tail) {
751 vh->vh_phci_tail = prev;
752 }
753
754 vh->vh_phci_count--;
755 MDI_VHCI_PHCI_UNLOCK(vh);
756
757 /* Walk remaining pathinfo nodes and disassociate them from pHCI */
758 MDI_PHCI_LOCK(ph);
759 for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip;
760 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link)
761 MDI_PI(pip)->pi_phci = NULL;
762 MDI_PHCI_UNLOCK(ph);
763
764 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
765 ESC_DDI_INITIATOR_UNREGISTER);
766 vhcache_phci_remove(vh->vh_config, ph);
767 cv_destroy(&ph->ph_unstable_cv);
768 mutex_destroy(&ph->ph_mutex);
769 kmem_free(ph, sizeof (mdi_phci_t));
770 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
771 DEVI(pdip)->devi_mdi_xhci = NULL;
772 return (MDI_SUCCESS);
773 }
774
775 /*
776 * i_devi_get_phci():
777 * Utility function to return the phci extensions.
778 */
779 static mdi_phci_t *
i_devi_get_phci(dev_info_t * pdip)780 i_devi_get_phci(dev_info_t *pdip)
781 {
782 mdi_phci_t *ph = NULL;
783
784 if (MDI_PHCI(pdip)) {
785 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
786 }
787 return (ph);
788 }
789
790 /*
791 * Single thread mdi entry into devinfo node for modifying its children.
792 * If necessary we perform an ndi_devi_enter of the vHCI before doing
793 * an ndi_devi_enter of 'dip'. If we enter the vHCI, we set *enteredvp
794 * to true, otherwise it is unconditionally set to false.
795 */
796 void
mdi_devi_enter(dev_info_t * phci_dip,boolean_t * enteredvp)797 mdi_devi_enter(dev_info_t *phci_dip, boolean_t *enteredvp)
798 {
799 dev_info_t *vdip;
800
801 /* Verify calling context */
802 ASSERT3P(enteredvp, !=, NULL);
803 ASSERT(MDI_PHCI(phci_dip));
804 vdip = mdi_devi_get_vdip(phci_dip);
805 ASSERT3P(vdip, !=, NULL); /* A pHCI always has a vHCI */
806
807 /*
808 * If pHCI is detaching then the framework has already entered the
809 * vHCI on a thread that went down the code path leading to
810 * detach_node(). This framework enter of the vHCI during pHCI
811 * detach is done to avoid deadlock with vHCI power management
812 * operations which enter the vHCI and then enter down the path
813 * to the pHCI. If pHCI is detaching then we piggyback this call's
814 * enter of the vHCI on the framework's vHCI enter that has already
815 * occurred - this is OK because we know that the framework thread
816 * doing detach is waiting for our completion.
817 *
818 * We should check DEVI_IS_DETACHING under an enter of the parent to
819 * avoid a race with detach, but we can't because the framework has
820 * already entered the parent, so we have this complexity instead.
821 */
822 *enteredvp = B_FALSE;
823 for (;;) {
824 if (panicstr != NULL)
825 return;
826
827 if (ndi_devi_tryenter(vdip)) {
828 *enteredvp = B_TRUE;
829 if (DEVI_IS_DETACHING(phci_dip)) {
830 ndi_devi_exit(vdip);
831 *enteredvp = B_FALSE;
832 }
833 break;
834 } else if (DEVI_IS_DETACHING(phci_dip)) {
835 *enteredvp = B_FALSE;
836 break;
837 } else if (servicing_interrupt()) {
838 /*
839 * Don't delay an interrupt (and ensure adaptive
840 * mutex inversion support).
841 */
842 ndi_devi_enter(vdip);
843 *enteredvp = B_TRUE;
844 break;
845 } else {
846 delay_random(mdi_delay);
847 }
848 }
849
850 ndi_devi_enter(phci_dip);
851 }
852
853 /*
854 * Attempt to mdi_devi_enter.
855 */
856 int
mdi_devi_tryenter(dev_info_t * phci_dip,boolean_t * enteredvp)857 mdi_devi_tryenter(dev_info_t *phci_dip, boolean_t *enteredvp)
858 {
859 dev_info_t *vdip;
860
861 /* Verify calling context */
862 ASSERT(MDI_PHCI(phci_dip));
863 vdip = mdi_devi_get_vdip(phci_dip);
864 ASSERT3P(vdip, !=, NULL); /* A pHCI always has a vHCI */
865
866 *enteredvp = B_FALSE;
867 if (ndi_devi_tryenter(vdip)) {
868 if (ndi_devi_tryenter(phci_dip)) {
869 *enteredvp = B_TRUE;
870 return (1); /* locked */
871 }
872 ndi_devi_exit(vdip);
873 }
874 return (0); /* busy */
875 }
876
877 /*
878 * Release mdi_devi_enter or successful mdi_devi_tryenter.
879 */
880 void
mdi_devi_exit(dev_info_t * phci_dip,boolean_t enteredv)881 mdi_devi_exit(dev_info_t *phci_dip, boolean_t enteredv)
882 {
883 dev_info_t *vdip;
884
885 /* Verify calling context */
886 ASSERT(MDI_PHCI(phci_dip));
887 vdip = mdi_devi_get_vdip(phci_dip);
888 ASSERT3P(vdip, !=, NULL); /* A pHCI always has a vHCI */
889
890 ndi_devi_exit(phci_dip);
891 if (enteredv)
892 ndi_devi_exit(vdip);
893 }
894
895 /*
896 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
897 * around a pHCI drivers calls to mdi_pi_online/offline, after holding
898 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
899 * with vHCI power management code during path online/offline. Each
900 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
901 * occur within the scope of an active mdi_devi_enter that establishes the
902 * circular value.
903 */
904 void
mdi_devi_exit_phci(dev_info_t * phci_dip)905 mdi_devi_exit_phci(dev_info_t *phci_dip)
906 {
907 /* Verify calling context */
908 ASSERT(MDI_PHCI(phci_dip));
909
910 /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */
911 ndi_hold_devi(phci_dip);
912
913 ndi_devi_exit(phci_dip);
914 }
915
916 void
mdi_devi_enter_phci(dev_info_t * phci_dip)917 mdi_devi_enter_phci(dev_info_t *phci_dip)
918 {
919 /* Verify calling context */
920 ASSERT(MDI_PHCI(phci_dip));
921
922 ndi_devi_enter(phci_dip);
923
924 /* Drop hold from mdi_devi_exit_phci. */
925 ndi_rele_devi(phci_dip);
926 }
927
928 /*
929 * mdi_devi_get_vdip():
930 * given a pHCI dip return vHCI dip
931 * Returns:
932 * the vHCI dip if it exists
933 * else NULL
934 */
935 dev_info_t *
mdi_devi_get_vdip(dev_info_t * pdip)936 mdi_devi_get_vdip(dev_info_t *pdip)
937 {
938 mdi_phci_t *ph;
939
940 ph = i_devi_get_phci(pdip);
941 if (ph && ph->ph_vhci)
942 return (ph->ph_vhci->vh_dip);
943 return (NULL);
944 }
945
946 /*
947 * mdi_devi_pdip_entered():
948 * Return 1 if we are vHCI and have done an ndi_devi_enter
949 * of a pHCI
950 */
951 int
mdi_devi_pdip_entered(dev_info_t * vdip)952 mdi_devi_pdip_entered(dev_info_t *vdip)
953 {
954 mdi_vhci_t *vh;
955 mdi_phci_t *ph;
956
957 vh = i_devi_get_vhci(vdip);
958 if (vh == NULL)
959 return (0);
960
961 MDI_VHCI_PHCI_LOCK(vh);
962 ph = vh->vh_phci_head;
963 while (ph) {
964 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
965 MDI_VHCI_PHCI_UNLOCK(vh);
966 return (1);
967 }
968 ph = ph->ph_next;
969 }
970 MDI_VHCI_PHCI_UNLOCK(vh);
971 return (0);
972 }
973
974 /*
975 * mdi_phci_path2devinfo():
976 * Utility function to search for a valid phci device given
977 * the devfs pathname.
978 */
979 dev_info_t *
mdi_phci_path2devinfo(dev_info_t * vdip,caddr_t pathname)980 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
981 {
982 char *temp_pathname;
983 mdi_vhci_t *vh;
984 mdi_phci_t *ph;
985 dev_info_t *pdip = NULL;
986
987 vh = i_devi_get_vhci(vdip);
988 ASSERT(vh != NULL);
989
990 if (vh == NULL) {
991 /*
992 * Invalid vHCI component, return failure
993 */
994 return (NULL);
995 }
996
997 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
998 MDI_VHCI_PHCI_LOCK(vh);
999 ph = vh->vh_phci_head;
1000 while (ph != NULL) {
1001 pdip = ph->ph_dip;
1002 ASSERT(pdip != NULL);
1003 *temp_pathname = '\0';
1004 (void) ddi_pathname(pdip, temp_pathname);
1005 if (strcmp(temp_pathname, pathname) == 0) {
1006 break;
1007 }
1008 ph = ph->ph_next;
1009 }
1010 if (ph == NULL) {
1011 pdip = NULL;
1012 }
1013 MDI_VHCI_PHCI_UNLOCK(vh);
1014 kmem_free(temp_pathname, MAXPATHLEN);
1015 return (pdip);
1016 }
1017
1018 /*
1019 * mdi_phci_get_path_count():
1020 * get number of path information nodes associated with a given
1021 * pHCI device.
1022 */
1023 int
mdi_phci_get_path_count(dev_info_t * pdip)1024 mdi_phci_get_path_count(dev_info_t *pdip)
1025 {
1026 mdi_phci_t *ph;
1027 int count = 0;
1028
1029 ph = i_devi_get_phci(pdip);
1030 if (ph != NULL) {
1031 count = ph->ph_path_count;
1032 }
1033 return (count);
1034 }
1035
1036 /*
1037 * i_mdi_phci_lock():
1038 * Lock a pHCI device
1039 * Return Values:
1040 * None
1041 * Note:
1042 * The default locking order is:
1043 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
1044 * But there are number of situations where locks need to be
1045 * grabbed in reverse order. This routine implements try and lock
1046 * mechanism depending on the requested parameter option.
1047 */
1048 static void
i_mdi_phci_lock(mdi_phci_t * ph,mdi_pathinfo_t * pip)1049 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
1050 {
1051 if (pip) {
1052 /* Reverse locking is requested. */
1053 while (MDI_PHCI_TRYLOCK(ph) == 0) {
1054 if (servicing_interrupt()) {
1055 MDI_PI_HOLD(pip);
1056 MDI_PI_UNLOCK(pip);
1057 MDI_PHCI_LOCK(ph);
1058 MDI_PI_LOCK(pip);
1059 MDI_PI_RELE(pip);
1060 break;
1061 } else {
1062 /*
1063 * tryenter failed. Try to grab again
1064 * after a small delay
1065 */
1066 MDI_PI_HOLD(pip);
1067 MDI_PI_UNLOCK(pip);
1068 delay_random(mdi_delay);
1069 MDI_PI_LOCK(pip);
1070 MDI_PI_RELE(pip);
1071 }
1072 }
1073 } else {
1074 MDI_PHCI_LOCK(ph);
1075 }
1076 }
1077
1078 /*
1079 * i_mdi_phci_unlock():
1080 * Unlock the pHCI component
1081 */
1082 static void
i_mdi_phci_unlock(mdi_phci_t * ph)1083 i_mdi_phci_unlock(mdi_phci_t *ph)
1084 {
1085 MDI_PHCI_UNLOCK(ph);
1086 }
1087
1088 /*
1089 * i_mdi_devinfo_create():
1090 * create client device's devinfo node
1091 * Return Values:
1092 * dev_info
1093 * NULL
1094 * Notes:
1095 */
1096 static dev_info_t *
i_mdi_devinfo_create(mdi_vhci_t * vh,char * name,char * guid,char ** compatible,int ncompatible)1097 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
1098 char **compatible, int ncompatible)
1099 {
1100 dev_info_t *cdip = NULL;
1101
1102 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1103
1104 /* Verify for duplicate entry */
1105 cdip = i_mdi_devinfo_find(vh, name, guid);
1106 ASSERT(cdip == NULL);
1107 if (cdip) {
1108 cmn_err(CE_WARN,
1109 "i_mdi_devinfo_create: client %s@%s already exists",
1110 name ? name : "", guid ? guid : "");
1111 }
1112
1113 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
1114 if (cdip == NULL)
1115 goto fail;
1116
1117 /*
1118 * Create component type and Global unique identifier
1119 * properties
1120 */
1121 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
1122 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
1123 goto fail;
1124 }
1125
1126 /* Decorate the node with compatible property */
1127 if (compatible &&
1128 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
1129 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
1130 goto fail;
1131 }
1132
1133 return (cdip);
1134
1135 fail:
1136 if (cdip) {
1137 (void) ndi_prop_remove_all(cdip);
1138 (void) ndi_devi_free(cdip);
1139 }
1140 return (NULL);
1141 }
1142
1143 /*
1144 * i_mdi_devinfo_find():
1145 * Find a matching devinfo node for given client node name
1146 * and its guid.
1147 * Return Values:
1148 * Handle to a dev_info node or NULL
1149 */
1150 static dev_info_t *
i_mdi_devinfo_find(mdi_vhci_t * vh,caddr_t name,char * guid)1151 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
1152 {
1153 char *data;
1154 dev_info_t *cdip = NULL;
1155 dev_info_t *ndip = NULL;
1156
1157 ndi_devi_enter(vh->vh_dip);
1158 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
1159 while ((cdip = ndip) != NULL) {
1160 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1161
1162 if (strcmp(DEVI(cdip)->devi_node_name, name)) {
1163 continue;
1164 }
1165
1166 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
1167 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
1168 &data) != DDI_PROP_SUCCESS) {
1169 continue;
1170 }
1171
1172 if (strcmp(data, guid) != 0) {
1173 ddi_prop_free(data);
1174 continue;
1175 }
1176 ddi_prop_free(data);
1177 break;
1178 }
1179 ndi_devi_exit(vh->vh_dip);
1180 return (cdip);
1181 }
1182
1183 /*
1184 * i_mdi_devinfo_remove():
1185 * Remove a client device node
1186 */
1187 static int
i_mdi_devinfo_remove(dev_info_t * vdip,dev_info_t * cdip,int flags)1188 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
1189 {
1190 int rv = MDI_SUCCESS;
1191
1192 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
1193 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
1194 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE);
1195 if (rv != NDI_SUCCESS) {
1196 MDI_DEBUG(1, (MDI_NOTE, cdip,
1197 "!failed: cdip %p", (void *)cdip));
1198 }
1199 /*
1200 * Convert to MDI error code
1201 */
1202 switch (rv) {
1203 case NDI_SUCCESS:
1204 rv = MDI_SUCCESS;
1205 break;
1206 case NDI_BUSY:
1207 rv = MDI_BUSY;
1208 break;
1209 default:
1210 rv = MDI_FAILURE;
1211 break;
1212 }
1213 }
1214 return (rv);
1215 }
1216
1217 /*
1218 * i_devi_get_client()
1219 * Utility function to get mpxio component extensions
1220 */
1221 static mdi_client_t *
i_devi_get_client(dev_info_t * cdip)1222 i_devi_get_client(dev_info_t *cdip)
1223 {
1224 mdi_client_t *ct = NULL;
1225
1226 if (MDI_CLIENT(cdip)) {
1227 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1228 }
1229 return (ct);
1230 }
1231
1232 /*
1233 * i_mdi_is_child_present():
1234 * Search for the presence of client device dev_info node
1235 */
1236 static int
i_mdi_is_child_present(dev_info_t * vdip,dev_info_t * cdip)1237 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1238 {
1239 int rv = MDI_FAILURE;
1240 struct dev_info *dip;
1241
1242 ndi_devi_enter(vdip);
1243 dip = DEVI(vdip)->devi_child;
1244 while (dip) {
1245 if (dip == DEVI(cdip)) {
1246 rv = MDI_SUCCESS;
1247 break;
1248 }
1249 dip = dip->devi_sibling;
1250 }
1251 ndi_devi_exit(vdip);
1252 return (rv);
1253 }
1254
1255
1256 /*
1257 * i_mdi_client_lock():
1258 * Grab client component lock
1259 * Return Values:
1260 * None
1261 * Note:
1262 * The default locking order is:
1263 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1264 * But there are number of situations where locks need to be
1265 * grabbed in reverse order. This routine implements try and lock
1266 * mechanism depending on the requested parameter option.
1267 */
1268 static void
i_mdi_client_lock(mdi_client_t * ct,mdi_pathinfo_t * pip)1269 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1270 {
1271 if (pip) {
1272 /*
1273 * Reverse locking is requested.
1274 */
1275 while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1276 if (servicing_interrupt()) {
1277 MDI_PI_HOLD(pip);
1278 MDI_PI_UNLOCK(pip);
1279 MDI_CLIENT_LOCK(ct);
1280 MDI_PI_LOCK(pip);
1281 MDI_PI_RELE(pip);
1282 break;
1283 } else {
1284 /*
1285 * tryenter failed. Try to grab again
1286 * after a small delay
1287 */
1288 MDI_PI_HOLD(pip);
1289 MDI_PI_UNLOCK(pip);
1290 delay_random(mdi_delay);
1291 MDI_PI_LOCK(pip);
1292 MDI_PI_RELE(pip);
1293 }
1294 }
1295 } else {
1296 MDI_CLIENT_LOCK(ct);
1297 }
1298 }
1299
1300 /*
1301 * i_mdi_client_unlock():
1302 * Unlock a client component
1303 */
1304 static void
i_mdi_client_unlock(mdi_client_t * ct)1305 i_mdi_client_unlock(mdi_client_t *ct)
1306 {
1307 MDI_CLIENT_UNLOCK(ct);
1308 }
1309
1310 /*
1311 * i_mdi_client_alloc():
1312 * Allocate and initialize a client structure. Caller should
1313 * hold the vhci client lock.
1314 * Return Values:
1315 * Handle to a client component
1316 */
1317 /*ARGSUSED*/
1318 static mdi_client_t *
i_mdi_client_alloc(mdi_vhci_t * vh,char * name,char * lguid)1319 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1320 {
1321 mdi_client_t *ct;
1322
1323 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1324
1325 /*
1326 * Allocate and initialize a component structure.
1327 */
1328 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1329 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1330 ct->ct_hnext = NULL;
1331 ct->ct_hprev = NULL;
1332 ct->ct_dip = NULL;
1333 ct->ct_vhci = vh;
1334 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1335 (void) strcpy(ct->ct_drvname, name);
1336 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1337 (void) strcpy(ct->ct_guid, lguid);
1338 ct->ct_cprivate = NULL;
1339 ct->ct_vprivate = NULL;
1340 ct->ct_flags = 0;
1341 ct->ct_state = MDI_CLIENT_STATE_FAILED;
1342 MDI_CLIENT_LOCK(ct);
1343 MDI_CLIENT_SET_OFFLINE(ct);
1344 MDI_CLIENT_SET_DETACH(ct);
1345 MDI_CLIENT_SET_POWER_UP(ct);
1346 MDI_CLIENT_UNLOCK(ct);
1347 ct->ct_failover_flags = 0;
1348 ct->ct_failover_status = 0;
1349 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1350 ct->ct_unstable = 0;
1351 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1352 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1353 ct->ct_lb = vh->vh_lb;
1354 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1355 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1356 ct->ct_path_count = 0;
1357 ct->ct_path_head = NULL;
1358 ct->ct_path_tail = NULL;
1359 ct->ct_path_last = NULL;
1360
1361 /*
1362 * Add this client component to our client hash queue
1363 */
1364 i_mdi_client_enlist_table(vh, ct);
1365 return (ct);
1366 }
1367
1368 /*
1369 * i_mdi_client_enlist_table():
1370 * Attach the client device to the client hash table. Caller
1371 * should hold the vhci client lock.
1372 */
1373 static void
i_mdi_client_enlist_table(mdi_vhci_t * vh,mdi_client_t * ct)1374 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1375 {
1376 int index;
1377 struct client_hash *head;
1378
1379 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1380
1381 index = i_mdi_get_hash_key(ct->ct_guid);
1382 head = &vh->vh_client_table[index];
1383 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1384 head->ct_hash_head = ct;
1385 head->ct_hash_count++;
1386 vh->vh_client_count++;
1387 }
1388
1389 /*
1390 * i_mdi_client_delist_table():
1391 * Attach the client device to the client hash table.
1392 * Caller should hold the vhci client lock.
1393 */
1394 static void
i_mdi_client_delist_table(mdi_vhci_t * vh,mdi_client_t * ct)1395 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1396 {
1397 int index;
1398 char *guid;
1399 struct client_hash *head;
1400 mdi_client_t *next;
1401 mdi_client_t *last;
1402
1403 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1404
1405 guid = ct->ct_guid;
1406 index = i_mdi_get_hash_key(guid);
1407 head = &vh->vh_client_table[index];
1408
1409 last = NULL;
1410 next = (mdi_client_t *)head->ct_hash_head;
1411 while (next != NULL) {
1412 if (next == ct) {
1413 break;
1414 }
1415 last = next;
1416 next = next->ct_hnext;
1417 }
1418
1419 if (next) {
1420 head->ct_hash_count--;
1421 if (last == NULL) {
1422 head->ct_hash_head = ct->ct_hnext;
1423 } else {
1424 last->ct_hnext = ct->ct_hnext;
1425 }
1426 ct->ct_hnext = NULL;
1427 vh->vh_client_count--;
1428 }
1429 }
1430
1431
1432 /*
1433 * i_mdi_client_free():
1434 * Free a client component
1435 */
1436 static int
i_mdi_client_free(mdi_vhci_t * vh,mdi_client_t * ct)1437 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1438 {
1439 int rv = MDI_SUCCESS;
1440 int flags = ct->ct_flags;
1441 dev_info_t *cdip;
1442 dev_info_t *vdip;
1443
1444 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1445
1446 vdip = vh->vh_dip;
1447 cdip = ct->ct_dip;
1448
1449 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1450 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1451 DEVI(cdip)->devi_mdi_client = NULL;
1452
1453 /*
1454 * Clear out back ref. to dev_info_t node
1455 */
1456 ct->ct_dip = NULL;
1457
1458 /*
1459 * Remove this client from our hash queue
1460 */
1461 i_mdi_client_delist_table(vh, ct);
1462
1463 /*
1464 * Uninitialize and free the component
1465 */
1466 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1467 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1468 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1469 cv_destroy(&ct->ct_failover_cv);
1470 cv_destroy(&ct->ct_unstable_cv);
1471 cv_destroy(&ct->ct_powerchange_cv);
1472 mutex_destroy(&ct->ct_mutex);
1473 kmem_free(ct, sizeof (*ct));
1474
1475 MDI_VHCI_CLIENT_UNLOCK(vh);
1476 (void) i_mdi_devinfo_remove(vdip, cdip, flags);
1477 MDI_VHCI_CLIENT_LOCK(vh);
1478
1479 return (rv);
1480 }
1481
1482 /*
1483 * i_mdi_client_find():
1484 * Find the client structure corresponding to a given guid
1485 * Caller should hold the vhci client lock.
1486 */
1487 static mdi_client_t *
i_mdi_client_find(mdi_vhci_t * vh,char * cname,char * guid)1488 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1489 {
1490 int index;
1491 struct client_hash *head;
1492 mdi_client_t *ct;
1493
1494 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1495
1496 index = i_mdi_get_hash_key(guid);
1497 head = &vh->vh_client_table[index];
1498
1499 ct = head->ct_hash_head;
1500 while (ct != NULL) {
1501 if (strcmp(ct->ct_guid, guid) == 0 &&
1502 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1503 break;
1504 }
1505 ct = ct->ct_hnext;
1506 }
1507 return (ct);
1508 }
1509
1510 /*
1511 * i_mdi_client_update_state():
1512 * Compute and update client device state
1513 * Notes:
1514 * A client device can be in any of three possible states:
1515 *
1516 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1517 * one online/standby paths. Can tolerate failures.
1518 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1519 * no alternate paths available as standby. A failure on the online
1520 * would result in loss of access to device data.
1521 * MDI_CLIENT_STATE_FAILED - Client device in failed state with
1522 * no paths available to access the device.
1523 */
1524 static void
i_mdi_client_update_state(mdi_client_t * ct)1525 i_mdi_client_update_state(mdi_client_t *ct)
1526 {
1527 int state;
1528
1529 ASSERT(MDI_CLIENT_LOCKED(ct));
1530 state = i_mdi_client_compute_state(ct, NULL);
1531 MDI_CLIENT_SET_STATE(ct, state);
1532 }
1533
1534 /*
1535 * i_mdi_client_compute_state():
1536 * Compute client device state
1537 *
1538 * mdi_phci_t * Pointer to pHCI structure which should
1539 * while computing the new value. Used by
1540 * i_mdi_phci_offline() to find the new
1541 * client state after DR of a pHCI.
1542 */
1543 static int
i_mdi_client_compute_state(mdi_client_t * ct,mdi_phci_t * ph)1544 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1545 {
1546 int state;
1547 int online_count = 0;
1548 int standby_count = 0;
1549 mdi_pathinfo_t *pip, *next;
1550
1551 ASSERT(MDI_CLIENT_LOCKED(ct));
1552 pip = ct->ct_path_head;
1553 while (pip != NULL) {
1554 MDI_PI_LOCK(pip);
1555 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1556 if (MDI_PI(pip)->pi_phci == ph) {
1557 MDI_PI_UNLOCK(pip);
1558 pip = next;
1559 continue;
1560 }
1561
1562 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1563 == MDI_PATHINFO_STATE_ONLINE)
1564 online_count++;
1565 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1566 == MDI_PATHINFO_STATE_STANDBY)
1567 standby_count++;
1568 MDI_PI_UNLOCK(pip);
1569 pip = next;
1570 }
1571
1572 if (online_count == 0) {
1573 if (standby_count == 0) {
1574 state = MDI_CLIENT_STATE_FAILED;
1575 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
1576 "client state failed: ct = %p", (void *)ct));
1577 } else if (standby_count == 1) {
1578 state = MDI_CLIENT_STATE_DEGRADED;
1579 } else {
1580 state = MDI_CLIENT_STATE_OPTIMAL;
1581 }
1582 } else if (online_count == 1) {
1583 if (standby_count == 0) {
1584 state = MDI_CLIENT_STATE_DEGRADED;
1585 } else {
1586 state = MDI_CLIENT_STATE_OPTIMAL;
1587 }
1588 } else {
1589 state = MDI_CLIENT_STATE_OPTIMAL;
1590 }
1591 return (state);
1592 }
1593
1594 /*
1595 * i_mdi_client2devinfo():
1596 * Utility function
1597 */
1598 dev_info_t *
i_mdi_client2devinfo(mdi_client_t * ct)1599 i_mdi_client2devinfo(mdi_client_t *ct)
1600 {
1601 return (ct->ct_dip);
1602 }
1603
1604 /*
1605 * mdi_client_path2_devinfo():
1606 * Given the parent devinfo and child devfs pathname, search for
1607 * a valid devfs node handle.
1608 */
1609 dev_info_t *
mdi_client_path2devinfo(dev_info_t * vdip,char * pathname)1610 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1611 {
1612 dev_info_t *cdip = NULL;
1613 dev_info_t *ndip = NULL;
1614 char *temp_pathname;
1615
1616 /*
1617 * Allocate temp buffer
1618 */
1619 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1620
1621 /*
1622 * Lock parent against changes
1623 */
1624 ndi_devi_enter(vdip);
1625 ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1626 while ((cdip = ndip) != NULL) {
1627 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1628
1629 *temp_pathname = '\0';
1630 (void) ddi_pathname(cdip, temp_pathname);
1631 if (strcmp(temp_pathname, pathname) == 0) {
1632 break;
1633 }
1634 }
1635 /*
1636 * Release devinfo lock
1637 */
1638 ndi_devi_exit(vdip);
1639
1640 /*
1641 * Free the temp buffer
1642 */
1643 kmem_free(temp_pathname, MAXPATHLEN);
1644 return (cdip);
1645 }
1646
1647 /*
1648 * mdi_client_get_path_count():
1649 * Utility function to get number of path information nodes
1650 * associated with a given client device.
1651 */
1652 int
mdi_client_get_path_count(dev_info_t * cdip)1653 mdi_client_get_path_count(dev_info_t *cdip)
1654 {
1655 mdi_client_t *ct;
1656 int count = 0;
1657
1658 ct = i_devi_get_client(cdip);
1659 if (ct != NULL) {
1660 count = ct->ct_path_count;
1661 }
1662 return (count);
1663 }
1664
1665
1666 /*
1667 * i_mdi_get_hash_key():
1668 * Create a hash using strings as keys
1669 *
1670 */
1671 static int
i_mdi_get_hash_key(char * str)1672 i_mdi_get_hash_key(char *str)
1673 {
1674 uint32_t g, hash = 0;
1675 char *p;
1676
1677 for (p = str; *p != '\0'; p++) {
1678 g = *p;
1679 hash += g;
1680 }
1681 return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1682 }
1683
1684 /*
1685 * mdi_get_lb_policy():
1686 * Get current load balancing policy for a given client device
1687 */
1688 client_lb_t
mdi_get_lb_policy(dev_info_t * cdip)1689 mdi_get_lb_policy(dev_info_t *cdip)
1690 {
1691 client_lb_t lb = LOAD_BALANCE_NONE;
1692 mdi_client_t *ct;
1693
1694 ct = i_devi_get_client(cdip);
1695 if (ct != NULL) {
1696 lb = ct->ct_lb;
1697 }
1698 return (lb);
1699 }
1700
1701 /*
1702 * mdi_set_lb_region_size():
1703 * Set current region size for the load-balance
1704 */
1705 int
mdi_set_lb_region_size(dev_info_t * cdip,int region_size)1706 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1707 {
1708 mdi_client_t *ct;
1709 int rv = MDI_FAILURE;
1710
1711 ct = i_devi_get_client(cdip);
1712 if (ct != NULL && ct->ct_lb_args != NULL) {
1713 ct->ct_lb_args->region_size = region_size;
1714 rv = MDI_SUCCESS;
1715 }
1716 return (rv);
1717 }
1718
1719 /*
1720 * mdi_Set_lb_policy():
1721 * Set current load balancing policy for a given client device
1722 */
1723 int
mdi_set_lb_policy(dev_info_t * cdip,client_lb_t lb)1724 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1725 {
1726 mdi_client_t *ct;
1727 int rv = MDI_FAILURE;
1728
1729 ct = i_devi_get_client(cdip);
1730 if (ct != NULL) {
1731 ct->ct_lb = lb;
1732 rv = MDI_SUCCESS;
1733 }
1734 return (rv);
1735 }
1736
1737 static void
mdi_failover_cb(void * arg)1738 mdi_failover_cb(void *arg)
1739 {
1740 (void)i_mdi_failover(arg);
1741 }
1742
1743 /*
1744 * mdi_failover():
1745 * failover function called by the vHCI drivers to initiate
1746 * a failover operation. This is typically due to non-availability
1747 * of online paths to route I/O requests. Failover can be
1748 * triggered through user application also.
1749 *
1750 * The vHCI driver calls mdi_failover() to initiate a failover
1751 * operation. mdi_failover() calls back into the vHCI driver's
1752 * vo_failover() entry point to perform the actual failover
1753 * operation. The reason for requiring the vHCI driver to
1754 * initiate failover by calling mdi_failover(), instead of directly
1755 * executing vo_failover() itself, is to ensure that the mdi
1756 * framework can keep track of the client state properly.
1757 * Additionally, mdi_failover() provides as a convenience the
1758 * option of performing the failover operation synchronously or
1759 * asynchronously
1760 *
1761 * Upon successful completion of the failover operation, the
1762 * paths that were previously ONLINE will be in the STANDBY state,
1763 * and the newly activated paths will be in the ONLINE state.
1764 *
1765 * The flags modifier determines whether the activation is done
1766 * synchronously: MDI_FAILOVER_SYNC
1767 * Return Values:
1768 * MDI_SUCCESS
1769 * MDI_FAILURE
1770 * MDI_BUSY
1771 */
1772 /*ARGSUSED*/
1773 int
mdi_failover(dev_info_t * vdip,dev_info_t * cdip,int flags)1774 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1775 {
1776 int rv;
1777 mdi_client_t *ct;
1778
1779 ct = i_devi_get_client(cdip);
1780 ASSERT(ct != NULL);
1781 if (ct == NULL) {
1782 /* cdip is not a valid client device. Nothing more to do. */
1783 return (MDI_FAILURE);
1784 }
1785
1786 MDI_CLIENT_LOCK(ct);
1787
1788 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1789 /* A path to the client is being freed */
1790 MDI_CLIENT_UNLOCK(ct);
1791 return (MDI_BUSY);
1792 }
1793
1794
1795 if (MDI_CLIENT_IS_FAILED(ct)) {
1796 /*
1797 * Client is in failed state. Nothing more to do.
1798 */
1799 MDI_CLIENT_UNLOCK(ct);
1800 return (MDI_FAILURE);
1801 }
1802
1803 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1804 /*
1805 * Failover is already in progress; return BUSY
1806 */
1807 MDI_CLIENT_UNLOCK(ct);
1808 return (MDI_BUSY);
1809 }
1810 /*
1811 * Make sure that mdi_pathinfo node state changes are processed.
1812 * We do not allow failovers to progress while client path state
1813 * changes are in progress
1814 */
1815 if (ct->ct_unstable) {
1816 if (flags == MDI_FAILOVER_ASYNC) {
1817 MDI_CLIENT_UNLOCK(ct);
1818 return (MDI_BUSY);
1819 } else {
1820 while (ct->ct_unstable)
1821 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1822 }
1823 }
1824
1825 /*
1826 * Client device is in stable state. Before proceeding, perform sanity
1827 * checks again.
1828 */
1829 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1830 (!i_ddi_devi_attached(cdip))) {
1831 /*
1832 * Client is in failed state. Nothing more to do.
1833 */
1834 MDI_CLIENT_UNLOCK(ct);
1835 return (MDI_FAILURE);
1836 }
1837
1838 /*
1839 * Set the client state as failover in progress.
1840 */
1841 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1842 ct->ct_failover_flags = flags;
1843 MDI_CLIENT_UNLOCK(ct);
1844
1845 if (flags == MDI_FAILOVER_ASYNC) {
1846 /*
1847 * Submit the initiate failover request via CPR safe
1848 * taskq threads.
1849 */
1850 (void) taskq_dispatch(mdi_taskq, mdi_failover_cb, ct, KM_SLEEP);
1851 return (MDI_ACCEPT);
1852 } else {
1853 /*
1854 * Synchronous failover mode. Typically invoked from the user
1855 * land.
1856 */
1857 rv = i_mdi_failover(ct);
1858 }
1859 return (rv);
1860 }
1861
1862 /*
1863 * i_mdi_failover():
1864 * internal failover function. Invokes vHCI drivers failover
1865 * callback function and process the failover status
1866 * Return Values:
1867 * None
1868 *
1869 * Note: A client device in failover state can not be detached or freed.
1870 */
1871 static int
i_mdi_failover(void * arg)1872 i_mdi_failover(void *arg)
1873 {
1874 int rv = MDI_SUCCESS;
1875 mdi_client_t *ct = (mdi_client_t *)arg;
1876 mdi_vhci_t *vh = ct->ct_vhci;
1877
1878 ASSERT(!MDI_CLIENT_LOCKED(ct));
1879
1880 if (vh->vh_ops->vo_failover != NULL) {
1881 /*
1882 * Call vHCI drivers callback routine
1883 */
1884 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1885 ct->ct_failover_flags);
1886 }
1887
1888 MDI_CLIENT_LOCK(ct);
1889 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1890
1891 /*
1892 * Save the failover return status
1893 */
1894 ct->ct_failover_status = rv;
1895
1896 /*
1897 * As a result of failover, client status would have been changed.
1898 * Update the client state and wake up anyone waiting on this client
1899 * device.
1900 */
1901 i_mdi_client_update_state(ct);
1902
1903 cv_broadcast(&ct->ct_failover_cv);
1904 MDI_CLIENT_UNLOCK(ct);
1905 return (rv);
1906 }
1907
1908 /*
1909 * Load balancing is logical block.
1910 * IOs within the range described by region_size
1911 * would go on the same path. This would improve the
1912 * performance by cache-hit on some of the RAID devices.
1913 * Search only for online paths(At some point we
1914 * may want to balance across target ports).
1915 * If no paths are found then default to round-robin.
1916 */
1917 static int
i_mdi_lba_lb(mdi_client_t * ct,mdi_pathinfo_t ** ret_pip,struct buf * bp)1918 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1919 {
1920 int path_index = -1;
1921 int online_path_count = 0;
1922 int online_nonpref_path_count = 0;
1923 int region_size = ct->ct_lb_args->region_size;
1924 mdi_pathinfo_t *pip;
1925 mdi_pathinfo_t *next;
1926 int preferred, path_cnt;
1927
1928 pip = ct->ct_path_head;
1929 while (pip) {
1930 MDI_PI_LOCK(pip);
1931 if (MDI_PI(pip)->pi_state ==
1932 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1933 online_path_count++;
1934 } else if (MDI_PI(pip)->pi_state ==
1935 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1936 online_nonpref_path_count++;
1937 }
1938 next = (mdi_pathinfo_t *)
1939 MDI_PI(pip)->pi_client_link;
1940 MDI_PI_UNLOCK(pip);
1941 pip = next;
1942 }
1943 /* if found any online/preferred then use this type */
1944 if (online_path_count > 0) {
1945 path_cnt = online_path_count;
1946 preferred = 1;
1947 } else if (online_nonpref_path_count > 0) {
1948 path_cnt = online_nonpref_path_count;
1949 preferred = 0;
1950 } else {
1951 path_cnt = 0;
1952 }
1953 if (path_cnt) {
1954 path_index = (bp->b_blkno >> region_size) % path_cnt;
1955 pip = ct->ct_path_head;
1956 while (pip && path_index != -1) {
1957 MDI_PI_LOCK(pip);
1958 if (path_index == 0 &&
1959 (MDI_PI(pip)->pi_state ==
1960 MDI_PATHINFO_STATE_ONLINE) &&
1961 MDI_PI(pip)->pi_preferred == preferred) {
1962 MDI_PI_HOLD(pip);
1963 MDI_PI_UNLOCK(pip);
1964 *ret_pip = pip;
1965 return (MDI_SUCCESS);
1966 }
1967 path_index --;
1968 next = (mdi_pathinfo_t *)
1969 MDI_PI(pip)->pi_client_link;
1970 MDI_PI_UNLOCK(pip);
1971 pip = next;
1972 }
1973 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
1974 "lba %llx: path %s %p",
1975 bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip));
1976 }
1977 return (MDI_FAILURE);
1978 }
1979
1980 /*
1981 * mdi_select_path():
1982 * select a path to access a client device.
1983 *
1984 * mdi_select_path() function is called by the vHCI drivers to
1985 * select a path to route the I/O request to. The caller passes
1986 * the block I/O data transfer structure ("buf") as one of the
1987 * parameters. The mpxio framework uses the buf structure
1988 * contents to maintain per path statistics (total I/O size /
1989 * count pending). If more than one online paths are available to
1990 * select, the framework automatically selects a suitable path
1991 * for routing I/O request. If a failover operation is active for
1992 * this client device the call shall be failed with MDI_BUSY error
1993 * code.
1994 *
1995 * By default this function returns a suitable path in online
1996 * state based on the current load balancing policy. Currently
1997 * we support LOAD_BALANCE_NONE (Previously selected online path
1998 * will continue to be used till the path is usable) and
1999 * LOAD_BALANCE_RR (Online paths will be selected in a round
2000 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected
2001 * based on the logical block). The load balancing
2002 * through vHCI drivers configuration file (driver.conf).
2003 *
2004 * vHCI drivers may override this default behavior by specifying
2005 * appropriate flags. The meaning of the thrid argument depends
2006 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
2007 * then the argument is the "path instance" of the path to select.
2008 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is
2009 * "start_pip". A non NULL "start_pip" is the starting point to
2010 * walk and find the next appropriate path. The following values
2011 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an
2012 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
2013 * STANDBY path).
2014 *
2015 * The non-standard behavior is used by the scsi_vhci driver,
2016 * whenever it has to use a STANDBY/FAULTED path. Eg. during
2017 * attach of client devices (to avoid an unnecessary failover
2018 * when the STANDBY path comes up first), during failover
2019 * (to activate a STANDBY path as ONLINE).
2020 *
2021 * The selected path is returned in a a mdi_hold_path() state
2022 * (pi_ref_cnt). Caller should release the hold by calling
2023 * mdi_rele_path().
2024 *
2025 * Return Values:
2026 * MDI_SUCCESS - Completed successfully
2027 * MDI_BUSY - Client device is busy failing over
2028 * MDI_NOPATH - Client device is online, but no valid path are
2029 * available to access this client device
2030 * MDI_FAILURE - Invalid client device or state
2031 * MDI_DEVI_ONLINING
2032 * - Client device (struct dev_info state) is in
2033 * onlining state.
2034 */
2035
2036 /*ARGSUSED*/
2037 int
mdi_select_path(dev_info_t * cdip,struct buf * bp,int flags,void * arg,mdi_pathinfo_t ** ret_pip)2038 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
2039 void *arg, mdi_pathinfo_t **ret_pip)
2040 {
2041 mdi_client_t *ct;
2042 mdi_pathinfo_t *pip;
2043 mdi_pathinfo_t *next;
2044 mdi_pathinfo_t *head;
2045 mdi_pathinfo_t *start;
2046 client_lb_t lbp; /* load balancing policy */
2047 int sb = 1; /* standard behavior */
2048 int preferred = 1; /* preferred path */
2049 int cond, cont = 1;
2050 int retry = 0;
2051 mdi_pathinfo_t *start_pip; /* request starting pathinfo */
2052 int path_instance; /* request specific path instance */
2053
2054 /* determine type of arg based on flags */
2055 if (flags & MDI_SELECT_PATH_INSTANCE) {
2056 path_instance = (int)(intptr_t)arg;
2057 start_pip = NULL;
2058 } else {
2059 path_instance = 0;
2060 start_pip = (mdi_pathinfo_t *)arg;
2061 }
2062
2063 if (flags != 0) {
2064 /*
2065 * disable default behavior
2066 */
2067 sb = 0;
2068 }
2069
2070 *ret_pip = NULL;
2071 ct = i_devi_get_client(cdip);
2072 if (ct == NULL) {
2073 /* mdi extensions are NULL, Nothing more to do */
2074 return (MDI_FAILURE);
2075 }
2076
2077 MDI_CLIENT_LOCK(ct);
2078
2079 if (sb) {
2080 if (MDI_CLIENT_IS_FAILED(ct)) {
2081 /*
2082 * Client is not ready to accept any I/O requests.
2083 * Fail this request.
2084 */
2085 MDI_DEBUG(2, (MDI_NOTE, cdip,
2086 "client state offline ct = %p", (void *)ct));
2087 MDI_CLIENT_UNLOCK(ct);
2088 return (MDI_FAILURE);
2089 }
2090
2091 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
2092 /*
2093 * Check for Failover is in progress. If so tell the
2094 * caller that this device is busy.
2095 */
2096 MDI_DEBUG(2, (MDI_NOTE, cdip,
2097 "client failover in progress ct = %p",
2098 (void *)ct));
2099 MDI_CLIENT_UNLOCK(ct);
2100 return (MDI_BUSY);
2101 }
2102
2103 /*
2104 * Check to see whether the client device is attached.
2105 * If not so, let the vHCI driver manually select a path
2106 * (standby) and let the probe/attach process to continue.
2107 */
2108 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
2109 MDI_DEBUG(4, (MDI_NOTE, cdip,
2110 "devi is onlining ct = %p", (void *)ct));
2111 MDI_CLIENT_UNLOCK(ct);
2112 return (MDI_DEVI_ONLINING);
2113 }
2114 }
2115
2116 /*
2117 * Cache in the client list head. If head of the list is NULL
2118 * return MDI_NOPATH
2119 */
2120 head = ct->ct_path_head;
2121 if (head == NULL) {
2122 MDI_CLIENT_UNLOCK(ct);
2123 return (MDI_NOPATH);
2124 }
2125
2126 /* Caller is specifying a specific pathinfo path by path_instance */
2127 if (path_instance) {
2128 /* search for pathinfo with correct path_instance */
2129 for (pip = head;
2130 pip && (mdi_pi_get_path_instance(pip) != path_instance);
2131 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
2132 ;
2133
2134 /* If path can't be selected then MDI_NOPATH is returned. */
2135 if (pip == NULL) {
2136 MDI_CLIENT_UNLOCK(ct);
2137 return (MDI_NOPATH);
2138 }
2139
2140 /*
2141 * Verify state of path. When asked to select a specific
2142 * path_instance, we select the requested path in any
2143 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT.
2144 * We don't however select paths where the pHCI has detached.
2145 * NOTE: last pathinfo node of an opened client device may
2146 * exist in an OFFLINE state after the pHCI associated with
2147 * that path has detached (but pi_phci will be NULL if that
2148 * has occurred).
2149 */
2150 MDI_PI_LOCK(pip);
2151 if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) ||
2152 (MDI_PI(pip)->pi_phci == NULL)) {
2153 MDI_PI_UNLOCK(pip);
2154 MDI_CLIENT_UNLOCK(ct);
2155 return (MDI_FAILURE);
2156 }
2157
2158 /* Return MDI_BUSY if we have a transient condition */
2159 if (MDI_PI_IS_TRANSIENT(pip)) {
2160 MDI_PI_UNLOCK(pip);
2161 MDI_CLIENT_UNLOCK(ct);
2162 return (MDI_BUSY);
2163 }
2164
2165 /*
2166 * Return the path in hold state. Caller should release the
2167 * lock by calling mdi_rele_path()
2168 */
2169 MDI_PI_HOLD(pip);
2170 MDI_PI_UNLOCK(pip);
2171 *ret_pip = pip;
2172 MDI_CLIENT_UNLOCK(ct);
2173 return (MDI_SUCCESS);
2174 }
2175
2176 /*
2177 * for non default behavior, bypass current
2178 * load balancing policy and always use LOAD_BALANCE_RR
2179 * except that the start point will be adjusted based
2180 * on the provided start_pip
2181 */
2182 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
2183
2184 switch (lbp) {
2185 case LOAD_BALANCE_NONE:
2186 /*
2187 * Load balancing is None or Alternate path mode
2188 * Start looking for a online mdi_pathinfo node starting from
2189 * last known selected path
2190 */
2191 preferred = 1;
2192 pip = (mdi_pathinfo_t *)ct->ct_path_last;
2193 if (pip == NULL) {
2194 pip = head;
2195 }
2196 start = pip;
2197 do {
2198 MDI_PI_LOCK(pip);
2199 /*
2200 * No need to explicitly check if the path is disabled.
2201 * Since we are checking for state == ONLINE and the
2202 * same variable is used for DISABLE/ENABLE information.
2203 */
2204 if ((MDI_PI(pip)->pi_state ==
2205 MDI_PATHINFO_STATE_ONLINE) &&
2206 preferred == MDI_PI(pip)->pi_preferred) {
2207 /*
2208 * Return the path in hold state. Caller should
2209 * release the lock by calling mdi_rele_path()
2210 */
2211 MDI_PI_HOLD(pip);
2212 MDI_PI_UNLOCK(pip);
2213 ct->ct_path_last = pip;
2214 *ret_pip = pip;
2215 MDI_CLIENT_UNLOCK(ct);
2216 return (MDI_SUCCESS);
2217 }
2218
2219 /*
2220 * Path is busy.
2221 */
2222 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2223 MDI_PI_IS_TRANSIENT(pip))
2224 retry = 1;
2225 /*
2226 * Keep looking for a next available online path
2227 */
2228 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2229 if (next == NULL) {
2230 next = head;
2231 }
2232 MDI_PI_UNLOCK(pip);
2233 pip = next;
2234 if (start == pip && preferred) {
2235 preferred = 0;
2236 } else if (start == pip && !preferred) {
2237 cont = 0;
2238 }
2239 } while (cont);
2240 break;
2241
2242 case LOAD_BALANCE_LBA:
2243 /*
2244 * Make sure we are looking
2245 * for an online path. Otherwise, if it is for a STANDBY
2246 * path request, it will go through and fetch an ONLINE
2247 * path which is not desirable.
2248 */
2249 if ((ct->ct_lb_args != NULL) &&
2250 (ct->ct_lb_args->region_size) && bp &&
2251 (sb || (flags == MDI_SELECT_ONLINE_PATH))) {
2252 if (i_mdi_lba_lb(ct, ret_pip, bp)
2253 == MDI_SUCCESS) {
2254 MDI_CLIENT_UNLOCK(ct);
2255 return (MDI_SUCCESS);
2256 }
2257 }
2258 /* FALLTHROUGH */
2259 case LOAD_BALANCE_RR:
2260 /*
2261 * Load balancing is Round Robin. Start looking for a online
2262 * mdi_pathinfo node starting from last known selected path
2263 * as the start point. If override flags are specified,
2264 * process accordingly.
2265 * If the search is already in effect(start_pip not null),
2266 * then lets just use the same path preference to continue the
2267 * traversal.
2268 */
2269
2270 if (start_pip != NULL) {
2271 preferred = MDI_PI(start_pip)->pi_preferred;
2272 } else {
2273 preferred = 1;
2274 }
2275
2276 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
2277 if (start == NULL) {
2278 pip = head;
2279 } else {
2280 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
2281 if (pip == NULL) {
2282 if ( flags & MDI_SELECT_NO_PREFERRED) {
2283 /*
2284 * Return since we hit the end of list
2285 */
2286 MDI_CLIENT_UNLOCK(ct);
2287 return (MDI_NOPATH);
2288 }
2289
2290 if (!sb) {
2291 if (preferred == 0) {
2292 /*
2293 * Looks like we have completed
2294 * the traversal as preferred
2295 * value is 0. Time to bail out.
2296 */
2297 *ret_pip = NULL;
2298 MDI_CLIENT_UNLOCK(ct);
2299 return (MDI_NOPATH);
2300 } else {
2301 /*
2302 * Looks like we reached the
2303 * end of the list. Lets enable
2304 * traversal of non preferred
2305 * paths.
2306 */
2307 preferred = 0;
2308 }
2309 }
2310 pip = head;
2311 }
2312 }
2313 start = pip;
2314 do {
2315 MDI_PI_LOCK(pip);
2316 if (sb) {
2317 cond = ((MDI_PI(pip)->pi_state ==
2318 MDI_PATHINFO_STATE_ONLINE &&
2319 MDI_PI(pip)->pi_preferred ==
2320 preferred) ? 1 : 0);
2321 } else {
2322 if (flags == MDI_SELECT_ONLINE_PATH) {
2323 cond = ((MDI_PI(pip)->pi_state ==
2324 MDI_PATHINFO_STATE_ONLINE &&
2325 MDI_PI(pip)->pi_preferred ==
2326 preferred) ? 1 : 0);
2327 } else if (flags == MDI_SELECT_STANDBY_PATH) {
2328 cond = ((MDI_PI(pip)->pi_state ==
2329 MDI_PATHINFO_STATE_STANDBY &&
2330 MDI_PI(pip)->pi_preferred ==
2331 preferred) ? 1 : 0);
2332 } else if (flags == (MDI_SELECT_ONLINE_PATH |
2333 MDI_SELECT_STANDBY_PATH)) {
2334 cond = (((MDI_PI(pip)->pi_state ==
2335 MDI_PATHINFO_STATE_ONLINE ||
2336 (MDI_PI(pip)->pi_state ==
2337 MDI_PATHINFO_STATE_STANDBY)) &&
2338 MDI_PI(pip)->pi_preferred ==
2339 preferred) ? 1 : 0);
2340 } else if (flags ==
2341 (MDI_SELECT_STANDBY_PATH |
2342 MDI_SELECT_ONLINE_PATH |
2343 MDI_SELECT_USER_DISABLE_PATH)) {
2344 cond = (((MDI_PI(pip)->pi_state ==
2345 MDI_PATHINFO_STATE_ONLINE ||
2346 (MDI_PI(pip)->pi_state ==
2347 MDI_PATHINFO_STATE_STANDBY) ||
2348 (MDI_PI(pip)->pi_state ==
2349 (MDI_PATHINFO_STATE_ONLINE|
2350 MDI_PATHINFO_STATE_USER_DISABLE)) ||
2351 (MDI_PI(pip)->pi_state ==
2352 (MDI_PATHINFO_STATE_STANDBY |
2353 MDI_PATHINFO_STATE_USER_DISABLE)))&&
2354 MDI_PI(pip)->pi_preferred ==
2355 preferred) ? 1 : 0);
2356 } else if (flags ==
2357 (MDI_SELECT_STANDBY_PATH |
2358 MDI_SELECT_ONLINE_PATH |
2359 MDI_SELECT_NO_PREFERRED)) {
2360 cond = (((MDI_PI(pip)->pi_state ==
2361 MDI_PATHINFO_STATE_ONLINE) ||
2362 (MDI_PI(pip)->pi_state ==
2363 MDI_PATHINFO_STATE_STANDBY))
2364 ? 1 : 0);
2365 } else {
2366 cond = 0;
2367 }
2368 }
2369 /*
2370 * No need to explicitly check if the path is disabled.
2371 * Since we are checking for state == ONLINE and the
2372 * same variable is used for DISABLE/ENABLE information.
2373 */
2374 if (cond) {
2375 /*
2376 * Return the path in hold state. Caller should
2377 * release the lock by calling mdi_rele_path()
2378 */
2379 MDI_PI_HOLD(pip);
2380 MDI_PI_UNLOCK(pip);
2381 if (sb)
2382 ct->ct_path_last = pip;
2383 *ret_pip = pip;
2384 MDI_CLIENT_UNLOCK(ct);
2385 return (MDI_SUCCESS);
2386 }
2387 /*
2388 * Path is busy.
2389 */
2390 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2391 MDI_PI_IS_TRANSIENT(pip))
2392 retry = 1;
2393
2394 /*
2395 * Keep looking for a next available online path
2396 */
2397 do_again:
2398 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2399 if (next == NULL) {
2400 if ( flags & MDI_SELECT_NO_PREFERRED) {
2401 /*
2402 * Bail out since we hit the end of list
2403 */
2404 MDI_PI_UNLOCK(pip);
2405 break;
2406 }
2407
2408 if (!sb) {
2409 if (preferred == 1) {
2410 /*
2411 * Looks like we reached the
2412 * end of the list. Lets enable
2413 * traversal of non preferred
2414 * paths.
2415 */
2416 preferred = 0;
2417 next = head;
2418 } else {
2419 /*
2420 * We have done both the passes
2421 * Preferred as well as for
2422 * Non-preferred. Bail out now.
2423 */
2424 cont = 0;
2425 }
2426 } else {
2427 /*
2428 * Standard behavior case.
2429 */
2430 next = head;
2431 }
2432 }
2433 MDI_PI_UNLOCK(pip);
2434 if (cont == 0) {
2435 break;
2436 }
2437 pip = next;
2438
2439 if (!sb) {
2440 /*
2441 * We need to handle the selection of
2442 * non-preferred path in the following
2443 * case:
2444 *
2445 * +------+ +------+ +------+ +-----+
2446 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2447 * +------+ +------+ +------+ +-----+
2448 *
2449 * If we start the search with B, we need to
2450 * skip beyond B to pick C which is non -
2451 * preferred in the second pass. The following
2452 * test, if true, will allow us to skip over
2453 * the 'start'(B in the example) to select
2454 * other non preferred elements.
2455 */
2456 if ((start_pip != NULL) && (start_pip == pip) &&
2457 (MDI_PI(start_pip)->pi_preferred
2458 != preferred)) {
2459 /*
2460 * try again after going past the start
2461 * pip
2462 */
2463 MDI_PI_LOCK(pip);
2464 goto do_again;
2465 }
2466 } else {
2467 /*
2468 * Standard behavior case
2469 */
2470 if (start == pip && preferred) {
2471 /* look for nonpreferred paths */
2472 preferred = 0;
2473 } else if (start == pip && !preferred) {
2474 /*
2475 * Exit condition
2476 */
2477 cont = 0;
2478 }
2479 }
2480 } while (cont);
2481 break;
2482 }
2483
2484 MDI_CLIENT_UNLOCK(ct);
2485 if (retry == 1) {
2486 return (MDI_BUSY);
2487 } else {
2488 return (MDI_NOPATH);
2489 }
2490 }
2491
2492 /*
2493 * For a client, return the next available path to any phci
2494 *
2495 * Note:
2496 * Caller should hold the branch's devinfo node to get a consistent
2497 * snap shot of the mdi_pathinfo nodes.
2498 *
2499 * Please note that even the list is stable the mdi_pathinfo
2500 * node state and properties are volatile. The caller should lock
2501 * and unlock the nodes by calling mdi_pi_lock() and
2502 * mdi_pi_unlock() functions to get a stable properties.
2503 *
2504 * If there is a need to use the nodes beyond the hold of the
2505 * devinfo node period (For ex. I/O), then mdi_pathinfo node
2506 * need to be held against unexpected removal by calling
2507 * mdi_hold_path() and should be released by calling
2508 * mdi_rele_path() on completion.
2509 */
2510 mdi_pathinfo_t *
mdi_get_next_phci_path(dev_info_t * ct_dip,mdi_pathinfo_t * pip)2511 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2512 {
2513 mdi_client_t *ct;
2514
2515 if (!MDI_CLIENT(ct_dip))
2516 return (NULL);
2517
2518 /*
2519 * Walk through client link
2520 */
2521 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2522 ASSERT(ct != NULL);
2523
2524 if (pip == NULL)
2525 return ((mdi_pathinfo_t *)ct->ct_path_head);
2526
2527 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2528 }
2529
2530 /*
2531 * For a phci, return the next available path to any client
2532 * Note: ditto mdi_get_next_phci_path()
2533 */
2534 mdi_pathinfo_t *
mdi_get_next_client_path(dev_info_t * ph_dip,mdi_pathinfo_t * pip)2535 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2536 {
2537 mdi_phci_t *ph;
2538
2539 if (!MDI_PHCI(ph_dip))
2540 return (NULL);
2541
2542 /*
2543 * Walk through pHCI link
2544 */
2545 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2546 ASSERT(ph != NULL);
2547
2548 if (pip == NULL)
2549 return ((mdi_pathinfo_t *)ph->ph_path_head);
2550
2551 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2552 }
2553
2554 /*
2555 * mdi_hold_path():
2556 * Hold the mdi_pathinfo node against unwanted unexpected free.
2557 * Return Values:
2558 * None
2559 */
2560 void
mdi_hold_path(mdi_pathinfo_t * pip)2561 mdi_hold_path(mdi_pathinfo_t *pip)
2562 {
2563 if (pip) {
2564 MDI_PI_LOCK(pip);
2565 MDI_PI_HOLD(pip);
2566 MDI_PI_UNLOCK(pip);
2567 }
2568 }
2569
2570
2571 /*
2572 * mdi_rele_path():
2573 * Release the mdi_pathinfo node which was selected
2574 * through mdi_select_path() mechanism or manually held by
2575 * calling mdi_hold_path().
2576 * Return Values:
2577 * None
2578 */
2579 void
mdi_rele_path(mdi_pathinfo_t * pip)2580 mdi_rele_path(mdi_pathinfo_t *pip)
2581 {
2582 if (pip) {
2583 MDI_PI_LOCK(pip);
2584 MDI_PI_RELE(pip);
2585 if (MDI_PI(pip)->pi_ref_cnt == 0) {
2586 cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2587 }
2588 MDI_PI_UNLOCK(pip);
2589 }
2590 }
2591
2592 /*
2593 * mdi_pi_lock():
2594 * Lock the mdi_pathinfo node.
2595 * Note:
2596 * The caller should release the lock by calling mdi_pi_unlock()
2597 */
2598 void
mdi_pi_lock(mdi_pathinfo_t * pip)2599 mdi_pi_lock(mdi_pathinfo_t *pip)
2600 {
2601 ASSERT(pip != NULL);
2602 if (pip) {
2603 MDI_PI_LOCK(pip);
2604 }
2605 }
2606
2607
2608 /*
2609 * mdi_pi_unlock():
2610 * Unlock the mdi_pathinfo node.
2611 * Note:
2612 * The mdi_pathinfo node should have been locked with mdi_pi_lock()
2613 */
2614 void
mdi_pi_unlock(mdi_pathinfo_t * pip)2615 mdi_pi_unlock(mdi_pathinfo_t *pip)
2616 {
2617 ASSERT(pip != NULL);
2618 if (pip) {
2619 MDI_PI_UNLOCK(pip);
2620 }
2621 }
2622
2623 /*
2624 * mdi_pi_find():
2625 * Search the list of mdi_pathinfo nodes attached to the
2626 * pHCI/Client device node whose path address matches "paddr".
2627 * Returns a pointer to the mdi_pathinfo node if a matching node is
2628 * found.
2629 * Return Values:
2630 * mdi_pathinfo node handle
2631 * NULL
2632 * Notes:
2633 * Caller need not hold any locks to call this function.
2634 */
2635 mdi_pathinfo_t *
mdi_pi_find(dev_info_t * pdip,char * caddr,char * paddr)2636 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2637 {
2638 mdi_phci_t *ph;
2639 mdi_vhci_t *vh;
2640 mdi_client_t *ct;
2641 mdi_pathinfo_t *pip = NULL;
2642
2643 MDI_DEBUG(2, (MDI_NOTE, pdip,
2644 "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : ""));
2645 if ((pdip == NULL) || (paddr == NULL)) {
2646 return (NULL);
2647 }
2648 ph = i_devi_get_phci(pdip);
2649 if (ph == NULL) {
2650 /*
2651 * Invalid pHCI device, Nothing more to do.
2652 */
2653 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci"));
2654 return (NULL);
2655 }
2656
2657 vh = ph->ph_vhci;
2658 if (vh == NULL) {
2659 /*
2660 * Invalid vHCI device, Nothing more to do.
2661 */
2662 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci"));
2663 return (NULL);
2664 }
2665
2666 /*
2667 * Look for pathinfo node identified by paddr.
2668 */
2669 if (caddr == NULL) {
2670 /*
2671 * Find a mdi_pathinfo node under pHCI list for a matching
2672 * unit address.
2673 */
2674 MDI_PHCI_LOCK(ph);
2675 if (MDI_PHCI_IS_OFFLINE(ph)) {
2676 MDI_DEBUG(2, (MDI_WARN, pdip,
2677 "offline phci %p", (void *)ph));
2678 MDI_PHCI_UNLOCK(ph);
2679 return (NULL);
2680 }
2681 pip = (mdi_pathinfo_t *)ph->ph_path_head;
2682
2683 while (pip != NULL) {
2684 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2685 break;
2686 }
2687 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2688 }
2689 MDI_PHCI_UNLOCK(ph);
2690 MDI_DEBUG(2, (MDI_NOTE, pdip,
2691 "found %s %p", mdi_pi_spathname(pip), (void *)pip));
2692 return (pip);
2693 }
2694
2695 /*
2696 * XXX - Is the rest of the code in this function really necessary?
2697 * The consumers of mdi_pi_find() can search for the desired pathinfo
2698 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2699 * whether the search is based on the pathinfo nodes attached to
2700 * the pHCI or the client node, the result will be the same.
2701 */
2702
2703 /*
2704 * Find the client device corresponding to 'caddr'
2705 */
2706 MDI_VHCI_CLIENT_LOCK(vh);
2707
2708 /*
2709 * XXX - Passing NULL to the following function works as long as the
2710 * the client addresses (caddr) are unique per vhci basis.
2711 */
2712 ct = i_mdi_client_find(vh, NULL, caddr);
2713 if (ct == NULL) {
2714 /*
2715 * Client not found, Obviously mdi_pathinfo node has not been
2716 * created yet.
2717 */
2718 MDI_VHCI_CLIENT_UNLOCK(vh);
2719 MDI_DEBUG(2, (MDI_NOTE, pdip,
2720 "client not found for caddr @%s", caddr ? caddr : ""));
2721 return (NULL);
2722 }
2723
2724 /*
2725 * Hold the client lock and look for a mdi_pathinfo node with matching
2726 * pHCI and paddr
2727 */
2728 MDI_CLIENT_LOCK(ct);
2729
2730 /*
2731 * Release the global mutex as it is no more needed. Note: We always
2732 * respect the locking order while acquiring.
2733 */
2734 MDI_VHCI_CLIENT_UNLOCK(vh);
2735
2736 pip = (mdi_pathinfo_t *)ct->ct_path_head;
2737 while (pip != NULL) {
2738 /*
2739 * Compare the unit address
2740 */
2741 if ((MDI_PI(pip)->pi_phci == ph) &&
2742 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2743 break;
2744 }
2745 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2746 }
2747 MDI_CLIENT_UNLOCK(ct);
2748 MDI_DEBUG(2, (MDI_NOTE, pdip,
2749 "found: %s %p", mdi_pi_spathname(pip), (void *)pip));
2750 return (pip);
2751 }
2752
2753 /*
2754 * mdi_pi_alloc():
2755 * Allocate and initialize a new instance of a mdi_pathinfo node.
2756 * The mdi_pathinfo node returned by this function identifies a
2757 * unique device path is capable of having properties attached
2758 * and passed to mdi_pi_online() to fully attach and online the
2759 * path and client device node.
2760 * The mdi_pathinfo node returned by this function must be
2761 * destroyed using mdi_pi_free() if the path is no longer
2762 * operational or if the caller fails to attach a client device
2763 * node when calling mdi_pi_online(). The framework will not free
2764 * the resources allocated.
2765 * This function can be called from both interrupt and kernel
2766 * contexts. DDI_NOSLEEP flag should be used while calling
2767 * from interrupt contexts.
2768 * Return Values:
2769 * MDI_SUCCESS
2770 * MDI_FAILURE
2771 * MDI_NOMEM
2772 */
2773 /*ARGSUSED*/
2774 int
mdi_pi_alloc_compatible(dev_info_t * pdip,char * cname,char * caddr,char * paddr,char ** compatible,int ncompatible,int flags,mdi_pathinfo_t ** ret_pip)2775 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2776 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2777 {
2778 mdi_vhci_t *vh;
2779 mdi_phci_t *ph;
2780 mdi_client_t *ct;
2781 mdi_pathinfo_t *pip = NULL;
2782 dev_info_t *cdip;
2783 int rv = MDI_NOMEM;
2784 int path_allocated = 0;
2785
2786 MDI_DEBUG(2, (MDI_NOTE, pdip,
2787 "cname %s: caddr@%s paddr@%s",
2788 cname ? cname : "", caddr ? caddr : "", paddr ? paddr : ""));
2789
2790 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2791 ret_pip == NULL) {
2792 /* Nothing more to do */
2793 return (MDI_FAILURE);
2794 }
2795
2796 *ret_pip = NULL;
2797
2798 /* No allocations on detaching pHCI */
2799 if (DEVI_IS_DETACHING(pdip)) {
2800 /* Invalid pHCI device, return failure */
2801 MDI_DEBUG(1, (MDI_WARN, pdip,
2802 "!detaching pHCI=%p", (void *)pdip));
2803 return (MDI_FAILURE);
2804 }
2805
2806 ph = i_devi_get_phci(pdip);
2807 ASSERT(ph != NULL);
2808 if (ph == NULL) {
2809 /* Invalid pHCI device, return failure */
2810 MDI_DEBUG(1, (MDI_WARN, pdip,
2811 "!invalid pHCI=%p", (void *)pdip));
2812 return (MDI_FAILURE);
2813 }
2814
2815 MDI_PHCI_LOCK(ph);
2816 vh = ph->ph_vhci;
2817 if (vh == NULL) {
2818 /* Invalid vHCI device, return failure */
2819 MDI_DEBUG(1, (MDI_WARN, pdip,
2820 "!invalid vHCI=%p", (void *)pdip));
2821 MDI_PHCI_UNLOCK(ph);
2822 return (MDI_FAILURE);
2823 }
2824
2825 if (MDI_PHCI_IS_READY(ph) == 0) {
2826 /*
2827 * Do not allow new node creation when pHCI is in
2828 * offline/suspended states
2829 */
2830 MDI_DEBUG(1, (MDI_WARN, pdip,
2831 "pHCI=%p is not ready", (void *)ph));
2832 MDI_PHCI_UNLOCK(ph);
2833 return (MDI_BUSY);
2834 }
2835 MDI_PHCI_UNSTABLE(ph);
2836 MDI_PHCI_UNLOCK(ph);
2837
2838 /* look for a matching client, create one if not found */
2839 MDI_VHCI_CLIENT_LOCK(vh);
2840 ct = i_mdi_client_find(vh, cname, caddr);
2841 if (ct == NULL) {
2842 ct = i_mdi_client_alloc(vh, cname, caddr);
2843 ASSERT(ct != NULL);
2844 }
2845
2846 if (ct->ct_dip == NULL) {
2847 /*
2848 * Allocate a devinfo node
2849 */
2850 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2851 compatible, ncompatible);
2852 if (ct->ct_dip == NULL) {
2853 (void) i_mdi_client_free(vh, ct);
2854 goto fail;
2855 }
2856 }
2857 cdip = ct->ct_dip;
2858
2859 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2860 DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2861
2862 MDI_CLIENT_LOCK(ct);
2863 pip = (mdi_pathinfo_t *)ct->ct_path_head;
2864 while (pip != NULL) {
2865 /*
2866 * Compare the unit address
2867 */
2868 if ((MDI_PI(pip)->pi_phci == ph) &&
2869 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2870 break;
2871 }
2872 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2873 }
2874 MDI_CLIENT_UNLOCK(ct);
2875
2876 if (pip == NULL) {
2877 /*
2878 * This is a new path for this client device. Allocate and
2879 * initialize a new pathinfo node
2880 */
2881 pip = i_mdi_pi_alloc(ph, paddr, ct);
2882 ASSERT(pip != NULL);
2883 path_allocated = 1;
2884 }
2885 rv = MDI_SUCCESS;
2886
2887 fail:
2888 /*
2889 * Release the global mutex.
2890 */
2891 MDI_VHCI_CLIENT_UNLOCK(vh);
2892
2893 /*
2894 * Mark the pHCI as stable
2895 */
2896 MDI_PHCI_LOCK(ph);
2897 MDI_PHCI_STABLE(ph);
2898 MDI_PHCI_UNLOCK(ph);
2899 *ret_pip = pip;
2900
2901 MDI_DEBUG(2, (MDI_NOTE, pdip,
2902 "alloc %s %p", mdi_pi_spathname(pip), (void *)pip));
2903
2904 if (path_allocated)
2905 vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2906
2907 return (rv);
2908 }
2909
2910 /*ARGSUSED*/
2911 int
mdi_pi_alloc(dev_info_t * pdip,char * cname,char * caddr,char * paddr,int flags,mdi_pathinfo_t ** ret_pip)2912 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2913 int flags, mdi_pathinfo_t **ret_pip)
2914 {
2915 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2916 flags, ret_pip));
2917 }
2918
2919 /*
2920 * i_mdi_pi_alloc():
2921 * Allocate a mdi_pathinfo node and add to the pHCI path list
2922 * Return Values:
2923 * mdi_pathinfo
2924 */
2925 /*ARGSUSED*/
2926 static mdi_pathinfo_t *
i_mdi_pi_alloc(mdi_phci_t * ph,char * paddr,mdi_client_t * ct)2927 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2928 {
2929 mdi_pathinfo_t *pip;
2930 static char path[MAXPATHLEN]; /* mdi_pathmap_mutex protects */
2931 char *path_persistent;
2932 int path_instance;
2933 mod_hash_val_t hv;
2934
2935 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
2936
2937 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2938 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2939 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2940 MDI_PATHINFO_STATE_TRANSIENT;
2941
2942 if (MDI_PHCI_IS_USER_DISABLED(ph))
2943 MDI_PI_SET_USER_DISABLE(pip);
2944
2945 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2946 MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2947
2948 if (MDI_PHCI_IS_DRV_DISABLED(ph))
2949 MDI_PI_SET_DRV_DISABLE(pip);
2950
2951 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2952 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2953 MDI_PI(pip)->pi_client = ct;
2954 MDI_PI(pip)->pi_phci = ph;
2955 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2956 (void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2957
2958 /*
2959 * We form the "path" to the pathinfo node, and see if we have
2960 * already allocated a 'path_instance' for that "path". If so,
2961 * we use the already allocated 'path_instance'. If not, we
2962 * allocate a new 'path_instance' and associate it with a copy of
2963 * the "path" string (which is never freed). The association
2964 * between a 'path_instance' this "path" string persists until
2965 * reboot.
2966 */
2967 mutex_enter(&mdi_pathmap_mutex);
2968 (void) ddi_pathname(ph->ph_dip, path);
2969 (void) sprintf(path + strlen(path), "/%s@%s",
2970 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2971 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
2972 path_instance = (uint_t)(intptr_t)hv;
2973 } else {
2974 /* allocate a new 'path_instance' and persistent "path" */
2975 path_instance = mdi_pathmap_instance++;
2976 path_persistent = i_ddi_strdup(path, KM_SLEEP);
2977 (void) mod_hash_insert(mdi_pathmap_bypath,
2978 (mod_hash_key_t)path_persistent,
2979 (mod_hash_val_t)(intptr_t)path_instance);
2980 (void) mod_hash_insert(mdi_pathmap_byinstance,
2981 (mod_hash_key_t)(intptr_t)path_instance,
2982 (mod_hash_val_t)path_persistent);
2983
2984 /* create shortpath name */
2985 (void) snprintf(path, sizeof(path), "%s%d/%s@%s",
2986 ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip),
2987 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2988 path_persistent = i_ddi_strdup(path, KM_SLEEP);
2989 (void) mod_hash_insert(mdi_pathmap_sbyinstance,
2990 (mod_hash_key_t)(intptr_t)path_instance,
2991 (mod_hash_val_t)path_persistent);
2992 }
2993 mutex_exit(&mdi_pathmap_mutex);
2994 MDI_PI(pip)->pi_path_instance = path_instance;
2995
2996 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
2997 ASSERT(MDI_PI(pip)->pi_prop != NULL);
2998 MDI_PI(pip)->pi_pprivate = NULL;
2999 MDI_PI(pip)->pi_cprivate = NULL;
3000 MDI_PI(pip)->pi_vprivate = NULL;
3001 MDI_PI(pip)->pi_client_link = NULL;
3002 MDI_PI(pip)->pi_phci_link = NULL;
3003 MDI_PI(pip)->pi_ref_cnt = 0;
3004 MDI_PI(pip)->pi_kstats = NULL;
3005 MDI_PI(pip)->pi_preferred = 1;
3006 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
3007
3008 /*
3009 * Lock both dev_info nodes against changes in parallel.
3010 *
3011 * The ndi_devi_enter(Client), is atypical since the client is a leaf.
3012 * This atypical operation is done to synchronize pathinfo nodes
3013 * during devinfo snapshot (see di_register_pip) by 'pretending' that
3014 * the pathinfo nodes are children of the Client.
3015 */
3016 ndi_devi_enter(ct->ct_dip);
3017 ndi_devi_enter(ph->ph_dip);
3018
3019 i_mdi_phci_add_path(ph, pip);
3020 i_mdi_client_add_path(ct, pip);
3021
3022 ndi_devi_exit(ph->ph_dip);
3023 ndi_devi_exit(ct->ct_dip);
3024
3025 return (pip);
3026 }
3027
3028 /*
3029 * mdi_pi_pathname_by_instance():
3030 * Lookup of "path" by 'path_instance'. Return "path".
3031 * NOTE: returned "path" remains valid forever (until reboot).
3032 */
3033 char *
mdi_pi_pathname_by_instance(int path_instance)3034 mdi_pi_pathname_by_instance(int path_instance)
3035 {
3036 char *path;
3037 mod_hash_val_t hv;
3038
3039 /* mdi_pathmap lookup of "path" by 'path_instance' */
3040 mutex_enter(&mdi_pathmap_mutex);
3041 if (mod_hash_find(mdi_pathmap_byinstance,
3042 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3043 path = (char *)hv;
3044 else
3045 path = NULL;
3046 mutex_exit(&mdi_pathmap_mutex);
3047 return (path);
3048 }
3049
3050 /*
3051 * mdi_pi_spathname_by_instance():
3052 * Lookup of "shortpath" by 'path_instance'. Return "shortpath".
3053 * NOTE: returned "shortpath" remains valid forever (until reboot).
3054 */
3055 char *
mdi_pi_spathname_by_instance(int path_instance)3056 mdi_pi_spathname_by_instance(int path_instance)
3057 {
3058 char *path;
3059 mod_hash_val_t hv;
3060
3061 /* mdi_pathmap lookup of "path" by 'path_instance' */
3062 mutex_enter(&mdi_pathmap_mutex);
3063 if (mod_hash_find(mdi_pathmap_sbyinstance,
3064 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3065 path = (char *)hv;
3066 else
3067 path = NULL;
3068 mutex_exit(&mdi_pathmap_mutex);
3069 return (path);
3070 }
3071
3072
3073 /*
3074 * i_mdi_phci_add_path():
3075 * Add a mdi_pathinfo node to pHCI list.
3076 * Notes:
3077 * Caller should per-pHCI mutex
3078 */
3079 static void
i_mdi_phci_add_path(mdi_phci_t * ph,mdi_pathinfo_t * pip)3080 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3081 {
3082 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3083
3084 MDI_PHCI_LOCK(ph);
3085 if (ph->ph_path_head == NULL) {
3086 ph->ph_path_head = pip;
3087 } else {
3088 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
3089 }
3090 ph->ph_path_tail = pip;
3091 ph->ph_path_count++;
3092 MDI_PHCI_UNLOCK(ph);
3093 }
3094
3095 /*
3096 * i_mdi_client_add_path():
3097 * Add mdi_pathinfo node to client list
3098 */
3099 static void
i_mdi_client_add_path(mdi_client_t * ct,mdi_pathinfo_t * pip)3100 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3101 {
3102 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3103
3104 MDI_CLIENT_LOCK(ct);
3105 if (ct->ct_path_head == NULL) {
3106 ct->ct_path_head = pip;
3107 } else {
3108 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
3109 }
3110 ct->ct_path_tail = pip;
3111 ct->ct_path_count++;
3112 MDI_CLIENT_UNLOCK(ct);
3113 }
3114
3115 /*
3116 * mdi_pi_free():
3117 * Free the mdi_pathinfo node and also client device node if this
3118 * is the last path to the device
3119 * Return Values:
3120 * MDI_SUCCESS
3121 * MDI_FAILURE
3122 * MDI_BUSY
3123 */
3124 /*ARGSUSED*/
3125 int
mdi_pi_free(mdi_pathinfo_t * pip,int flags)3126 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
3127 {
3128 int rv;
3129 mdi_vhci_t *vh;
3130 mdi_phci_t *ph;
3131 mdi_client_t *ct;
3132 int (*f)();
3133 int client_held = 0;
3134
3135 MDI_PI_LOCK(pip);
3136 ph = MDI_PI(pip)->pi_phci;
3137 ASSERT(ph != NULL);
3138 if (ph == NULL) {
3139 /*
3140 * Invalid pHCI device, return failure
3141 */
3142 MDI_DEBUG(1, (MDI_WARN, NULL,
3143 "!invalid pHCI: pip %s %p",
3144 mdi_pi_spathname(pip), (void *)pip));
3145 MDI_PI_UNLOCK(pip);
3146 return (MDI_FAILURE);
3147 }
3148
3149 vh = ph->ph_vhci;
3150 ASSERT(vh != NULL);
3151 if (vh == NULL) {
3152 /* Invalid pHCI device, return failure */
3153 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3154 "!invalid vHCI: pip %s %p",
3155 mdi_pi_spathname(pip), (void *)pip));
3156 MDI_PI_UNLOCK(pip);
3157 return (MDI_FAILURE);
3158 }
3159
3160 ct = MDI_PI(pip)->pi_client;
3161 ASSERT(ct != NULL);
3162 if (ct == NULL) {
3163 /*
3164 * Invalid Client device, return failure
3165 */
3166 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3167 "!invalid client: pip %s %p",
3168 mdi_pi_spathname(pip), (void *)pip));
3169 MDI_PI_UNLOCK(pip);
3170 return (MDI_FAILURE);
3171 }
3172
3173 /*
3174 * Check to see for busy condition. A mdi_pathinfo can only be freed
3175 * if the node state is either offline or init and the reference count
3176 * is zero.
3177 */
3178 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
3179 MDI_PI_IS_INITING(pip))) {
3180 /*
3181 * Node is busy
3182 */
3183 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3184 "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip));
3185 MDI_PI_UNLOCK(pip);
3186 return (MDI_BUSY);
3187 }
3188
3189 while (MDI_PI(pip)->pi_ref_cnt != 0) {
3190 /*
3191 * Give a chance for pending I/Os to complete.
3192 */
3193 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3194 "!%d cmds still pending on path: %s %p",
3195 MDI_PI(pip)->pi_ref_cnt,
3196 mdi_pi_spathname(pip), (void *)pip));
3197 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3198 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3199 TR_CLOCK_TICK) == -1) {
3200 /*
3201 * The timeout time reached without ref_cnt being zero
3202 * being signaled.
3203 */
3204 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3205 "!Timeout reached on path %s %p without the cond",
3206 mdi_pi_spathname(pip), (void *)pip));
3207 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3208 "!%d cmds still pending on path %s %p",
3209 MDI_PI(pip)->pi_ref_cnt,
3210 mdi_pi_spathname(pip), (void *)pip));
3211 MDI_PI_UNLOCK(pip);
3212 return (MDI_BUSY);
3213 }
3214 }
3215 if (MDI_PI(pip)->pi_pm_held) {
3216 client_held = 1;
3217 }
3218 MDI_PI_UNLOCK(pip);
3219
3220 vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
3221
3222 MDI_CLIENT_LOCK(ct);
3223
3224 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
3225 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
3226
3227 /*
3228 * Wait till failover is complete before removing this node.
3229 */
3230 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3231 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3232
3233 MDI_CLIENT_UNLOCK(ct);
3234 MDI_VHCI_CLIENT_LOCK(vh);
3235 MDI_CLIENT_LOCK(ct);
3236 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
3237
3238 rv = MDI_SUCCESS;
3239 if (!MDI_PI_IS_INITING(pip)) {
3240 f = vh->vh_ops->vo_pi_uninit;
3241 if (f != NULL) {
3242 rv = (*f)(vh->vh_dip, pip, 0);
3243 }
3244 }
3245
3246 /*
3247 * If vo_pi_uninit() completed successfully.
3248 */
3249 if (rv == MDI_SUCCESS) {
3250 if (client_held) {
3251 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3252 "i_mdi_pm_rele_client\n"));
3253 i_mdi_pm_rele_client(ct, 1);
3254 }
3255 i_mdi_pi_free(ph, pip, ct);
3256 if (ct->ct_path_count == 0) {
3257 /*
3258 * Client lost its last path.
3259 * Clean up the client device
3260 */
3261 MDI_CLIENT_UNLOCK(ct);
3262 (void) i_mdi_client_free(ct->ct_vhci, ct);
3263 MDI_VHCI_CLIENT_UNLOCK(vh);
3264 return (rv);
3265 }
3266 }
3267 MDI_CLIENT_UNLOCK(ct);
3268 MDI_VHCI_CLIENT_UNLOCK(vh);
3269
3270 if (rv == MDI_FAILURE)
3271 vhcache_pi_add(vh->vh_config, MDI_PI(pip));
3272
3273 return (rv);
3274 }
3275
3276 /*
3277 * i_mdi_pi_free():
3278 * Free the mdi_pathinfo node
3279 */
3280 static void
i_mdi_pi_free(mdi_phci_t * ph,mdi_pathinfo_t * pip,mdi_client_t * ct)3281 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
3282 {
3283 ASSERT(MDI_CLIENT_LOCKED(ct));
3284
3285 /*
3286 * remove any per-path kstats
3287 */
3288 i_mdi_pi_kstat_destroy(pip);
3289
3290 /* See comments in i_mdi_pi_alloc() */
3291 ndi_devi_enter(ct->ct_dip);
3292 ndi_devi_enter(ph->ph_dip);
3293
3294 i_mdi_client_remove_path(ct, pip);
3295 i_mdi_phci_remove_path(ph, pip);
3296
3297 ndi_devi_exit(ph->ph_dip);
3298 ndi_devi_exit(ct->ct_dip);
3299
3300 mutex_destroy(&MDI_PI(pip)->pi_mutex);
3301 cv_destroy(&MDI_PI(pip)->pi_state_cv);
3302 cv_destroy(&MDI_PI(pip)->pi_ref_cv);
3303 if (MDI_PI(pip)->pi_addr) {
3304 kmem_free(MDI_PI(pip)->pi_addr,
3305 strlen(MDI_PI(pip)->pi_addr) + 1);
3306 MDI_PI(pip)->pi_addr = NULL;
3307 }
3308
3309 if (MDI_PI(pip)->pi_prop) {
3310 (void) nvlist_free(MDI_PI(pip)->pi_prop);
3311 MDI_PI(pip)->pi_prop = NULL;
3312 }
3313 kmem_free(pip, sizeof (struct mdi_pathinfo));
3314 }
3315
3316
3317 /*
3318 * i_mdi_phci_remove_path():
3319 * Remove a mdi_pathinfo node from pHCI list.
3320 * Notes:
3321 * Caller should hold per-pHCI mutex
3322 */
3323 static void
i_mdi_phci_remove_path(mdi_phci_t * ph,mdi_pathinfo_t * pip)3324 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3325 {
3326 mdi_pathinfo_t *prev = NULL;
3327 mdi_pathinfo_t *path = NULL;
3328
3329 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3330
3331 MDI_PHCI_LOCK(ph);
3332 path = ph->ph_path_head;
3333 while (path != NULL) {
3334 if (path == pip) {
3335 break;
3336 }
3337 prev = path;
3338 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3339 }
3340
3341 if (path) {
3342 ph->ph_path_count--;
3343 if (prev) {
3344 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
3345 } else {
3346 ph->ph_path_head =
3347 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3348 }
3349 if (ph->ph_path_tail == path) {
3350 ph->ph_path_tail = prev;
3351 }
3352 }
3353
3354 /*
3355 * Clear the pHCI link
3356 */
3357 MDI_PI(pip)->pi_phci_link = NULL;
3358 MDI_PI(pip)->pi_phci = NULL;
3359 MDI_PHCI_UNLOCK(ph);
3360 }
3361
3362 /*
3363 * i_mdi_client_remove_path():
3364 * Remove a mdi_pathinfo node from client path list.
3365 */
3366 static void
i_mdi_client_remove_path(mdi_client_t * ct,mdi_pathinfo_t * pip)3367 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3368 {
3369 mdi_pathinfo_t *prev = NULL;
3370 mdi_pathinfo_t *path;
3371
3372 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3373
3374 ASSERT(MDI_CLIENT_LOCKED(ct));
3375 path = ct->ct_path_head;
3376 while (path != NULL) {
3377 if (path == pip) {
3378 break;
3379 }
3380 prev = path;
3381 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3382 }
3383
3384 if (path) {
3385 ct->ct_path_count--;
3386 if (prev) {
3387 MDI_PI(prev)->pi_client_link =
3388 MDI_PI(path)->pi_client_link;
3389 } else {
3390 ct->ct_path_head =
3391 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3392 }
3393 if (ct->ct_path_tail == path) {
3394 ct->ct_path_tail = prev;
3395 }
3396 if (ct->ct_path_last == path) {
3397 ct->ct_path_last = ct->ct_path_head;
3398 }
3399 }
3400 MDI_PI(pip)->pi_client_link = NULL;
3401 MDI_PI(pip)->pi_client = NULL;
3402 }
3403
3404 /*
3405 * i_mdi_pi_state_change():
3406 * online a mdi_pathinfo node
3407 *
3408 * Return Values:
3409 * MDI_SUCCESS
3410 * MDI_FAILURE
3411 */
3412 /*ARGSUSED*/
3413 static int
i_mdi_pi_state_change(mdi_pathinfo_t * pip,mdi_pathinfo_state_t state,int flag)3414 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3415 {
3416 int rv = MDI_SUCCESS;
3417 mdi_vhci_t *vh;
3418 mdi_phci_t *ph;
3419 mdi_client_t *ct;
3420 int (*f)();
3421 dev_info_t *cdip;
3422
3423 MDI_PI_LOCK(pip);
3424
3425 ph = MDI_PI(pip)->pi_phci;
3426 ASSERT(ph);
3427 if (ph == NULL) {
3428 /*
3429 * Invalid pHCI device, fail the request
3430 */
3431 MDI_PI_UNLOCK(pip);
3432 MDI_DEBUG(1, (MDI_WARN, NULL,
3433 "!invalid phci: pip %s %p",
3434 mdi_pi_spathname(pip), (void *)pip));
3435 return (MDI_FAILURE);
3436 }
3437
3438 vh = ph->ph_vhci;
3439 ASSERT(vh);
3440 if (vh == NULL) {
3441 /*
3442 * Invalid vHCI device, fail the request
3443 */
3444 MDI_PI_UNLOCK(pip);
3445 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3446 "!invalid vhci: pip %s %p",
3447 mdi_pi_spathname(pip), (void *)pip));
3448 return (MDI_FAILURE);
3449 }
3450
3451 ct = MDI_PI(pip)->pi_client;
3452 ASSERT(ct != NULL);
3453 if (ct == NULL) {
3454 /*
3455 * Invalid client device, fail the request
3456 */
3457 MDI_PI_UNLOCK(pip);
3458 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3459 "!invalid client: pip %s %p",
3460 mdi_pi_spathname(pip), (void *)pip));
3461 return (MDI_FAILURE);
3462 }
3463
3464 /*
3465 * If this path has not been initialized yet, Callback vHCI driver's
3466 * pathinfo node initialize entry point
3467 */
3468
3469 if (MDI_PI_IS_INITING(pip)) {
3470 MDI_PI_UNLOCK(pip);
3471 f = vh->vh_ops->vo_pi_init;
3472 if (f != NULL) {
3473 rv = (*f)(vh->vh_dip, pip, 0);
3474 if (rv != MDI_SUCCESS) {
3475 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3476 "!vo_pi_init failed: vHCI %p, pip %s %p",
3477 (void *)vh, mdi_pi_spathname(pip),
3478 (void *)pip));
3479 return (MDI_FAILURE);
3480 }
3481 }
3482 MDI_PI_LOCK(pip);
3483 MDI_PI_CLEAR_TRANSIENT(pip);
3484 }
3485
3486 /*
3487 * Do not allow state transition when pHCI is in offline/suspended
3488 * states
3489 */
3490 i_mdi_phci_lock(ph, pip);
3491 if (MDI_PHCI_IS_READY(ph) == 0) {
3492 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3493 "!pHCI not ready, pHCI=%p", (void *)ph));
3494 MDI_PI_UNLOCK(pip);
3495 i_mdi_phci_unlock(ph);
3496 return (MDI_BUSY);
3497 }
3498 MDI_PHCI_UNSTABLE(ph);
3499 i_mdi_phci_unlock(ph);
3500
3501 /*
3502 * Check if mdi_pathinfo state is in transient state.
3503 * If yes, offlining is in progress and wait till transient state is
3504 * cleared.
3505 */
3506 if (MDI_PI_IS_TRANSIENT(pip)) {
3507 while (MDI_PI_IS_TRANSIENT(pip)) {
3508 cv_wait(&MDI_PI(pip)->pi_state_cv,
3509 &MDI_PI(pip)->pi_mutex);
3510 }
3511 }
3512
3513 /*
3514 * Grab the client lock in reverse order sequence and release the
3515 * mdi_pathinfo mutex.
3516 */
3517 i_mdi_client_lock(ct, pip);
3518 MDI_PI_UNLOCK(pip);
3519
3520 /*
3521 * Wait till failover state is cleared
3522 */
3523 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3524 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3525
3526 /*
3527 * Mark the mdi_pathinfo node state as transient
3528 */
3529 MDI_PI_LOCK(pip);
3530 switch (state) {
3531 case MDI_PATHINFO_STATE_ONLINE:
3532 MDI_PI_SET_ONLINING(pip);
3533 break;
3534
3535 case MDI_PATHINFO_STATE_STANDBY:
3536 MDI_PI_SET_STANDBYING(pip);
3537 break;
3538
3539 case MDI_PATHINFO_STATE_FAULT:
3540 /*
3541 * Mark the pathinfo state as FAULTED
3542 */
3543 MDI_PI_SET_FAULTING(pip);
3544 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3545 break;
3546
3547 case MDI_PATHINFO_STATE_OFFLINE:
3548 /*
3549 * ndi_devi_offline() cannot hold pip or ct locks.
3550 */
3551 MDI_PI_UNLOCK(pip);
3552
3553 /*
3554 * If this is a user initiated path online->offline operation
3555 * who's success would transition a client from DEGRADED to
3556 * FAILED then only proceed if we can offline the client first.
3557 */
3558 cdip = ct->ct_dip;
3559 if ((flag & NDI_USER_REQ) &&
3560 MDI_PI_IS_ONLINE(pip) &&
3561 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3562 i_mdi_client_unlock(ct);
3563 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN);
3564 if (rv != NDI_SUCCESS) {
3565 /*
3566 * Convert to MDI error code
3567 */
3568 switch (rv) {
3569 case NDI_BUSY:
3570 rv = MDI_BUSY;
3571 break;
3572 default:
3573 rv = MDI_FAILURE;
3574 break;
3575 }
3576 goto state_change_exit;
3577 } else {
3578 i_mdi_client_lock(ct, NULL);
3579 }
3580 }
3581 /*
3582 * Mark the mdi_pathinfo node state as transient
3583 */
3584 MDI_PI_LOCK(pip);
3585 MDI_PI_SET_OFFLINING(pip);
3586 break;
3587
3588 case MDI_PATHINFO_STATE_INIT:
3589 /*
3590 * Callers are not allowed to ask us to change the state to the
3591 * initial state.
3592 */
3593 rv = MDI_FAILURE;
3594 MDI_PI_UNLOCK(pip);
3595 goto state_change_exit;
3596
3597 }
3598 MDI_PI_UNLOCK(pip);
3599 MDI_CLIENT_UNSTABLE(ct);
3600 i_mdi_client_unlock(ct);
3601
3602 f = vh->vh_ops->vo_pi_state_change;
3603 if (f != NULL)
3604 rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3605
3606 MDI_CLIENT_LOCK(ct);
3607 MDI_PI_LOCK(pip);
3608 if (rv == MDI_NOT_SUPPORTED) {
3609 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3610 }
3611 if (rv != MDI_SUCCESS) {
3612 MDI_DEBUG(2, (MDI_WARN, ct->ct_dip,
3613 "vo_pi_state_change failed: rv %x", rv));
3614 }
3615 if (MDI_PI_IS_TRANSIENT(pip)) {
3616 if (rv == MDI_SUCCESS) {
3617 MDI_PI_CLEAR_TRANSIENT(pip);
3618 } else {
3619 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3620 }
3621 }
3622
3623 /*
3624 * Wake anyone waiting for this mdi_pathinfo node
3625 */
3626 cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3627 MDI_PI_UNLOCK(pip);
3628
3629 /*
3630 * Mark the client device as stable
3631 */
3632 MDI_CLIENT_STABLE(ct);
3633 if (rv == MDI_SUCCESS) {
3634 if (ct->ct_unstable == 0) {
3635 cdip = ct->ct_dip;
3636
3637 /*
3638 * Onlining the mdi_pathinfo node will impact the
3639 * client state Update the client and dev_info node
3640 * state accordingly
3641 */
3642 rv = NDI_SUCCESS;
3643 i_mdi_client_update_state(ct);
3644 switch (MDI_CLIENT_STATE(ct)) {
3645 case MDI_CLIENT_STATE_OPTIMAL:
3646 case MDI_CLIENT_STATE_DEGRADED:
3647 if (cdip && !i_ddi_devi_attached(cdip) &&
3648 ((state == MDI_PATHINFO_STATE_ONLINE) ||
3649 (state == MDI_PATHINFO_STATE_STANDBY))) {
3650
3651 /*
3652 * Must do ndi_devi_online() through
3653 * hotplug thread for deferred
3654 * attach mechanism to work
3655 */
3656 MDI_CLIENT_UNLOCK(ct);
3657 rv = ndi_devi_online(cdip, 0);
3658 MDI_CLIENT_LOCK(ct);
3659 if ((rv != NDI_SUCCESS) &&
3660 (MDI_CLIENT_STATE(ct) ==
3661 MDI_CLIENT_STATE_DEGRADED)) {
3662 MDI_DEBUG(1, (MDI_WARN, cdip,
3663 "!ndi_devi_online failed "
3664 "error %x", rv));
3665 }
3666 rv = NDI_SUCCESS;
3667 }
3668 break;
3669
3670 case MDI_CLIENT_STATE_FAILED:
3671 /*
3672 * This is the last path case for
3673 * non-user initiated events.
3674 */
3675 if (((flag & NDI_USER_REQ) == 0) &&
3676 cdip && (i_ddi_node_state(cdip) >=
3677 DS_INITIALIZED)) {
3678 MDI_CLIENT_UNLOCK(ct);
3679 rv = ndi_devi_offline(cdip,
3680 NDI_DEVFS_CLEAN);
3681 MDI_CLIENT_LOCK(ct);
3682
3683 if (rv != NDI_SUCCESS) {
3684 /*
3685 * ndi_devi_offline failed.
3686 * Reset client flags to
3687 * online as the path could not
3688 * be offlined.
3689 */
3690 MDI_DEBUG(1, (MDI_WARN, cdip,
3691 "!ndi_devi_offline failed: "
3692 "error %x", rv));
3693 MDI_CLIENT_SET_ONLINE(ct);
3694 }
3695 }
3696 break;
3697 }
3698 /*
3699 * Convert to MDI error code
3700 */
3701 switch (rv) {
3702 case NDI_SUCCESS:
3703 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3704 i_mdi_report_path_state(ct, pip);
3705 rv = MDI_SUCCESS;
3706 break;
3707 case NDI_BUSY:
3708 rv = MDI_BUSY;
3709 break;
3710 default:
3711 rv = MDI_FAILURE;
3712 break;
3713 }
3714 }
3715 }
3716 MDI_CLIENT_UNLOCK(ct);
3717
3718 state_change_exit:
3719 /*
3720 * Mark the pHCI as stable again.
3721 */
3722 MDI_PHCI_LOCK(ph);
3723 MDI_PHCI_STABLE(ph);
3724 MDI_PHCI_UNLOCK(ph);
3725 return (rv);
3726 }
3727
3728 /*
3729 * mdi_pi_online():
3730 * Place the path_info node in the online state. The path is
3731 * now available to be selected by mdi_select_path() for
3732 * transporting I/O requests to client devices.
3733 * Return Values:
3734 * MDI_SUCCESS
3735 * MDI_FAILURE
3736 */
3737 int
mdi_pi_online(mdi_pathinfo_t * pip,int flags)3738 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3739 {
3740 mdi_client_t *ct = MDI_PI(pip)->pi_client;
3741 int client_held = 0;
3742 int rv;
3743
3744 ASSERT(ct != NULL);
3745 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3746 if (rv != MDI_SUCCESS)
3747 return (rv);
3748
3749 MDI_PI_LOCK(pip);
3750 if (MDI_PI(pip)->pi_pm_held == 0) {
3751 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3752 "i_mdi_pm_hold_pip %p", (void *)pip));
3753 i_mdi_pm_hold_pip(pip);
3754 client_held = 1;
3755 }
3756 MDI_PI_UNLOCK(pip);
3757
3758 if (client_held) {
3759 MDI_CLIENT_LOCK(ct);
3760 if (ct->ct_power_cnt == 0) {
3761 rv = i_mdi_power_all_phci(ct);
3762 }
3763
3764 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3765 "i_mdi_pm_hold_client %p", (void *)ct));
3766 i_mdi_pm_hold_client(ct, 1);
3767 MDI_CLIENT_UNLOCK(ct);
3768 }
3769
3770 return (rv);
3771 }
3772
3773 /*
3774 * mdi_pi_standby():
3775 * Place the mdi_pathinfo node in standby state
3776 *
3777 * Return Values:
3778 * MDI_SUCCESS
3779 * MDI_FAILURE
3780 */
3781 int
mdi_pi_standby(mdi_pathinfo_t * pip,int flags)3782 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3783 {
3784 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3785 }
3786
3787 /*
3788 * mdi_pi_fault():
3789 * Place the mdi_pathinfo node in fault'ed state
3790 * Return Values:
3791 * MDI_SUCCESS
3792 * MDI_FAILURE
3793 */
3794 int
mdi_pi_fault(mdi_pathinfo_t * pip,int flags)3795 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3796 {
3797 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3798 }
3799
3800 /*
3801 * mdi_pi_offline():
3802 * Offline a mdi_pathinfo node.
3803 * Return Values:
3804 * MDI_SUCCESS
3805 * MDI_FAILURE
3806 */
3807 int
mdi_pi_offline(mdi_pathinfo_t * pip,int flags)3808 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3809 {
3810 int ret, client_held = 0;
3811 mdi_client_t *ct;
3812
3813 /*
3814 * Original code overloaded NDI_DEVI_REMOVE to this interface, and
3815 * used it to mean "user initiated operation" (i.e. devctl). Callers
3816 * should now just use NDI_USER_REQ.
3817 */
3818 if (flags & NDI_DEVI_REMOVE) {
3819 flags &= ~NDI_DEVI_REMOVE;
3820 flags |= NDI_USER_REQ;
3821 }
3822
3823 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3824
3825 if (ret == MDI_SUCCESS) {
3826 MDI_PI_LOCK(pip);
3827 if (MDI_PI(pip)->pi_pm_held) {
3828 client_held = 1;
3829 }
3830 MDI_PI_UNLOCK(pip);
3831
3832 if (client_held) {
3833 ct = MDI_PI(pip)->pi_client;
3834 MDI_CLIENT_LOCK(ct);
3835 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3836 "i_mdi_pm_rele_client\n"));
3837 i_mdi_pm_rele_client(ct, 1);
3838 MDI_CLIENT_UNLOCK(ct);
3839 }
3840 }
3841
3842 return (ret);
3843 }
3844
3845 /*
3846 * i_mdi_pi_offline():
3847 * Offline a mdi_pathinfo node and call the vHCI driver's callback
3848 */
3849 static int
i_mdi_pi_offline(mdi_pathinfo_t * pip,int flags)3850 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3851 {
3852 dev_info_t *vdip = NULL;
3853 mdi_vhci_t *vh = NULL;
3854 mdi_client_t *ct = NULL;
3855 int (*f)();
3856 int rv;
3857
3858 MDI_PI_LOCK(pip);
3859 ct = MDI_PI(pip)->pi_client;
3860 ASSERT(ct != NULL);
3861
3862 while (MDI_PI(pip)->pi_ref_cnt != 0) {
3863 /*
3864 * Give a chance for pending I/Os to complete.
3865 */
3866 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3867 "!%d cmds still pending on path %s %p",
3868 MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip),
3869 (void *)pip));
3870 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3871 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3872 TR_CLOCK_TICK) == -1) {
3873 /*
3874 * The timeout time reached without ref_cnt being zero
3875 * being signaled.
3876 */
3877 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3878 "!Timeout reached on path %s %p without the cond",
3879 mdi_pi_spathname(pip), (void *)pip));
3880 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3881 "!%d cmds still pending on path %s %p",
3882 MDI_PI(pip)->pi_ref_cnt,
3883 mdi_pi_spathname(pip), (void *)pip));
3884 }
3885 }
3886 vh = ct->ct_vhci;
3887 vdip = vh->vh_dip;
3888
3889 /*
3890 * Notify vHCI that has registered this event
3891 */
3892 ASSERT(vh->vh_ops);
3893 f = vh->vh_ops->vo_pi_state_change;
3894
3895 rv = MDI_SUCCESS;
3896 if (f != NULL) {
3897 MDI_PI_UNLOCK(pip);
3898 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3899 flags)) != MDI_SUCCESS) {
3900 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3901 "!vo_path_offline failed: vdip %s%d %p: path %s %p",
3902 ddi_driver_name(vdip), ddi_get_instance(vdip),
3903 (void *)vdip, mdi_pi_spathname(pip), (void *)pip));
3904 }
3905 MDI_PI_LOCK(pip);
3906 }
3907
3908 /*
3909 * Set the mdi_pathinfo node state and clear the transient condition
3910 */
3911 MDI_PI_SET_OFFLINE(pip);
3912 cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3913 MDI_PI_UNLOCK(pip);
3914
3915 MDI_CLIENT_LOCK(ct);
3916 if (rv == MDI_SUCCESS) {
3917 if (ct->ct_unstable == 0) {
3918 dev_info_t *cdip = ct->ct_dip;
3919
3920 /*
3921 * Onlining the mdi_pathinfo node will impact the
3922 * client state Update the client and dev_info node
3923 * state accordingly
3924 */
3925 i_mdi_client_update_state(ct);
3926 rv = NDI_SUCCESS;
3927 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3928 if (cdip &&
3929 (i_ddi_node_state(cdip) >=
3930 DS_INITIALIZED)) {
3931 MDI_CLIENT_UNLOCK(ct);
3932 rv = ndi_devi_offline(cdip,
3933 NDI_DEVFS_CLEAN);
3934 MDI_CLIENT_LOCK(ct);
3935 if (rv != NDI_SUCCESS) {
3936 /*
3937 * ndi_devi_offline failed.
3938 * Reset client flags to
3939 * online.
3940 */
3941 MDI_DEBUG(4, (MDI_WARN, cdip,
3942 "ndi_devi_offline failed: "
3943 "error %x", rv));
3944 MDI_CLIENT_SET_ONLINE(ct);
3945 }
3946 }
3947 }
3948 /*
3949 * Convert to MDI error code
3950 */
3951 switch (rv) {
3952 case NDI_SUCCESS:
3953 rv = MDI_SUCCESS;
3954 break;
3955 case NDI_BUSY:
3956 rv = MDI_BUSY;
3957 break;
3958 default:
3959 rv = MDI_FAILURE;
3960 break;
3961 }
3962 }
3963 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3964 i_mdi_report_path_state(ct, pip);
3965 }
3966
3967 MDI_CLIENT_UNLOCK(ct);
3968
3969 /*
3970 * Change in the mdi_pathinfo node state will impact the client state
3971 */
3972 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
3973 "ct = %p pip = %p", (void *)ct, (void *)pip));
3974 return (rv);
3975 }
3976
3977 /*
3978 * i_mdi_pi_online():
3979 * Online a mdi_pathinfo node and call the vHCI driver's callback
3980 */
3981 static int
i_mdi_pi_online(mdi_pathinfo_t * pip,int flags)3982 i_mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3983 {
3984 mdi_vhci_t *vh = NULL;
3985 mdi_client_t *ct = NULL;
3986 mdi_phci_t *ph;
3987 int (*f)();
3988 int rv;
3989
3990 MDI_PI_LOCK(pip);
3991 ph = MDI_PI(pip)->pi_phci;
3992 vh = ph->ph_vhci;
3993 ct = MDI_PI(pip)->pi_client;
3994 MDI_PI_SET_ONLINING(pip)
3995 MDI_PI_UNLOCK(pip);
3996 f = vh->vh_ops->vo_pi_state_change;
3997 rv = MDI_SUCCESS;
3998 if (f != NULL)
3999 rv = (*f)(vh->vh_dip, pip, MDI_PATHINFO_STATE_ONLINE, 0, flags);
4000 MDI_CLIENT_LOCK(ct);
4001 MDI_PI_LOCK(pip);
4002 cv_broadcast(&MDI_PI(pip)->pi_state_cv);
4003 MDI_PI_UNLOCK(pip);
4004 if (rv == MDI_SUCCESS) {
4005 dev_info_t *cdip = ct->ct_dip;
4006
4007 i_mdi_client_update_state(ct);
4008 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL ||
4009 MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4010 if (cdip && !i_ddi_devi_attached(cdip)) {
4011 MDI_CLIENT_UNLOCK(ct);
4012 rv = ndi_devi_online(cdip, 0);
4013 MDI_CLIENT_LOCK(ct);
4014 if ((rv != NDI_SUCCESS) &&
4015 (MDI_CLIENT_STATE(ct) ==
4016 MDI_CLIENT_STATE_DEGRADED)) {
4017 MDI_CLIENT_SET_OFFLINE(ct);
4018 }
4019 if (rv != NDI_SUCCESS) {
4020 /* Reset the path state */
4021 MDI_PI_LOCK(pip);
4022 MDI_PI(pip)->pi_state =
4023 MDI_PI_OLD_STATE(pip);
4024 MDI_PI_UNLOCK(pip);
4025 }
4026 }
4027 }
4028 switch (rv) {
4029 case NDI_SUCCESS:
4030 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
4031 i_mdi_report_path_state(ct, pip);
4032 rv = MDI_SUCCESS;
4033 break;
4034 case NDI_BUSY:
4035 rv = MDI_BUSY;
4036 break;
4037 default:
4038 rv = MDI_FAILURE;
4039 break;
4040 }
4041 } else {
4042 /* Reset the path state */
4043 MDI_PI_LOCK(pip);
4044 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
4045 MDI_PI_UNLOCK(pip);
4046 }
4047 MDI_CLIENT_UNLOCK(ct);
4048 return (rv);
4049 }
4050
4051 /*
4052 * mdi_pi_get_node_name():
4053 * Get the name associated with a mdi_pathinfo node.
4054 * Since pathinfo nodes are not directly named, we
4055 * return the node_name of the client.
4056 *
4057 * Return Values:
4058 * char *
4059 */
4060 char *
mdi_pi_get_node_name(mdi_pathinfo_t * pip)4061 mdi_pi_get_node_name(mdi_pathinfo_t *pip)
4062 {
4063 mdi_client_t *ct;
4064
4065 if (pip == NULL)
4066 return (NULL);
4067 ct = MDI_PI(pip)->pi_client;
4068 if ((ct == NULL) || (ct->ct_dip == NULL))
4069 return (NULL);
4070 return (ddi_node_name(ct->ct_dip));
4071 }
4072
4073 /*
4074 * mdi_pi_get_addr():
4075 * Get the unit address associated with a mdi_pathinfo node
4076 *
4077 * Return Values:
4078 * char *
4079 */
4080 char *
mdi_pi_get_addr(mdi_pathinfo_t * pip)4081 mdi_pi_get_addr(mdi_pathinfo_t *pip)
4082 {
4083 if (pip == NULL)
4084 return (NULL);
4085
4086 return (MDI_PI(pip)->pi_addr);
4087 }
4088
4089 /*
4090 * mdi_pi_get_path_instance():
4091 * Get the 'path_instance' of a mdi_pathinfo node
4092 *
4093 * Return Values:
4094 * path_instance
4095 */
4096 int
mdi_pi_get_path_instance(mdi_pathinfo_t * pip)4097 mdi_pi_get_path_instance(mdi_pathinfo_t *pip)
4098 {
4099 if (pip == NULL)
4100 return (0);
4101
4102 return (MDI_PI(pip)->pi_path_instance);
4103 }
4104
4105 /*
4106 * mdi_pi_pathname():
4107 * Return pointer to path to pathinfo node.
4108 */
4109 char *
mdi_pi_pathname(mdi_pathinfo_t * pip)4110 mdi_pi_pathname(mdi_pathinfo_t *pip)
4111 {
4112 if (pip == NULL)
4113 return (NULL);
4114 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip)));
4115 }
4116
4117 /*
4118 * mdi_pi_spathname():
4119 * Return pointer to shortpath to pathinfo node. Used for debug
4120 * messages, so return "" instead of NULL when unknown.
4121 */
4122 char *
mdi_pi_spathname(mdi_pathinfo_t * pip)4123 mdi_pi_spathname(mdi_pathinfo_t *pip)
4124 {
4125 char *spath = "";
4126
4127 if (pip) {
4128 spath = mdi_pi_spathname_by_instance(
4129 mdi_pi_get_path_instance(pip));
4130 if (spath == NULL)
4131 spath = "";
4132 }
4133 return (spath);
4134 }
4135
4136 char *
mdi_pi_pathname_obp(mdi_pathinfo_t * pip,char * path)4137 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path)
4138 {
4139 char *obp_path = NULL;
4140 if ((pip == NULL) || (path == NULL))
4141 return (NULL);
4142
4143 if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) {
4144 (void) strcpy(path, obp_path);
4145 (void) mdi_prop_free(obp_path);
4146 } else {
4147 path = NULL;
4148 }
4149 return (path);
4150 }
4151
4152 int
mdi_pi_pathname_obp_set(mdi_pathinfo_t * pip,char * component)4153 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component)
4154 {
4155 dev_info_t *pdip;
4156 char *obp_path = NULL;
4157 int rc = MDI_FAILURE;
4158
4159 if (pip == NULL)
4160 return (MDI_FAILURE);
4161
4162 pdip = mdi_pi_get_phci(pip);
4163 if (pdip == NULL)
4164 return (MDI_FAILURE);
4165
4166 obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4167
4168 if (ddi_pathname_obp(pdip, obp_path) == NULL) {
4169 (void) ddi_pathname(pdip, obp_path);
4170 }
4171
4172 if (component) {
4173 (void) strncat(obp_path, "/", MAXPATHLEN);
4174 (void) strncat(obp_path, component, MAXPATHLEN);
4175 }
4176 rc = mdi_prop_update_string(pip, "obp-path", obp_path);
4177
4178 if (obp_path)
4179 kmem_free(obp_path, MAXPATHLEN);
4180 return (rc);
4181 }
4182
4183 /*
4184 * mdi_pi_get_client():
4185 * Get the client devinfo associated with a mdi_pathinfo node
4186 *
4187 * Return Values:
4188 * Handle to client device dev_info node
4189 */
4190 dev_info_t *
mdi_pi_get_client(mdi_pathinfo_t * pip)4191 mdi_pi_get_client(mdi_pathinfo_t *pip)
4192 {
4193 dev_info_t *dip = NULL;
4194 if (pip) {
4195 dip = MDI_PI(pip)->pi_client->ct_dip;
4196 }
4197 return (dip);
4198 }
4199
4200 /*
4201 * mdi_pi_get_phci():
4202 * Get the pHCI devinfo associated with the mdi_pathinfo node
4203 * Return Values:
4204 * Handle to dev_info node
4205 */
4206 dev_info_t *
mdi_pi_get_phci(mdi_pathinfo_t * pip)4207 mdi_pi_get_phci(mdi_pathinfo_t *pip)
4208 {
4209 dev_info_t *dip = NULL;
4210 mdi_phci_t *ph;
4211
4212 if (pip) {
4213 ph = MDI_PI(pip)->pi_phci;
4214 if (ph)
4215 dip = ph->ph_dip;
4216 }
4217 return (dip);
4218 }
4219
4220 /*
4221 * mdi_pi_get_client_private():
4222 * Get the client private information associated with the
4223 * mdi_pathinfo node
4224 */
4225 void *
mdi_pi_get_client_private(mdi_pathinfo_t * pip)4226 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
4227 {
4228 void *cprivate = NULL;
4229 if (pip) {
4230 cprivate = MDI_PI(pip)->pi_cprivate;
4231 }
4232 return (cprivate);
4233 }
4234
4235 /*
4236 * mdi_pi_set_client_private():
4237 * Set the client private information in the mdi_pathinfo node
4238 */
4239 void
mdi_pi_set_client_private(mdi_pathinfo_t * pip,void * priv)4240 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
4241 {
4242 if (pip) {
4243 MDI_PI(pip)->pi_cprivate = priv;
4244 }
4245 }
4246
4247 /*
4248 * mdi_pi_get_phci_private():
4249 * Get the pHCI private information associated with the
4250 * mdi_pathinfo node
4251 */
4252 caddr_t
mdi_pi_get_phci_private(mdi_pathinfo_t * pip)4253 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
4254 {
4255 caddr_t pprivate = NULL;
4256
4257 if (pip) {
4258 pprivate = MDI_PI(pip)->pi_pprivate;
4259 }
4260 return (pprivate);
4261 }
4262
4263 /*
4264 * mdi_pi_set_phci_private():
4265 * Set the pHCI private information in the mdi_pathinfo node
4266 */
4267 void
mdi_pi_set_phci_private(mdi_pathinfo_t * pip,caddr_t priv)4268 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
4269 {
4270 if (pip) {
4271 MDI_PI(pip)->pi_pprivate = priv;
4272 }
4273 }
4274
4275 /*
4276 * mdi_pi_get_state():
4277 * Get the mdi_pathinfo node state. Transient states are internal
4278 * and not provided to the users
4279 */
4280 mdi_pathinfo_state_t
mdi_pi_get_state(mdi_pathinfo_t * pip)4281 mdi_pi_get_state(mdi_pathinfo_t *pip)
4282 {
4283 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT;
4284
4285 if (pip) {
4286 if (MDI_PI_IS_TRANSIENT(pip)) {
4287 /*
4288 * mdi_pathinfo is in state transition. Return the
4289 * last good state.
4290 */
4291 state = MDI_PI_OLD_STATE(pip);
4292 } else {
4293 state = MDI_PI_STATE(pip);
4294 }
4295 }
4296 return (state);
4297 }
4298
4299 /*
4300 * mdi_pi_get_flags():
4301 * Get the mdi_pathinfo node flags.
4302 */
4303 uint_t
mdi_pi_get_flags(mdi_pathinfo_t * pip)4304 mdi_pi_get_flags(mdi_pathinfo_t *pip)
4305 {
4306 return (pip ? MDI_PI(pip)->pi_flags : 0);
4307 }
4308
4309 /*
4310 * Note that the following function needs to be the new interface for
4311 * mdi_pi_get_state when mpxio gets integrated to ON.
4312 */
4313 int
mdi_pi_get_state2(mdi_pathinfo_t * pip,mdi_pathinfo_state_t * state,uint32_t * ext_state)4314 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
4315 uint32_t *ext_state)
4316 {
4317 *state = MDI_PATHINFO_STATE_INIT;
4318
4319 if (pip) {
4320 if (MDI_PI_IS_TRANSIENT(pip)) {
4321 /*
4322 * mdi_pathinfo is in state transition. Return the
4323 * last good state.
4324 */
4325 *state = MDI_PI_OLD_STATE(pip);
4326 *ext_state = MDI_PI_OLD_EXT_STATE(pip);
4327 } else {
4328 *state = MDI_PI_STATE(pip);
4329 *ext_state = MDI_PI_EXT_STATE(pip);
4330 }
4331 }
4332 return (MDI_SUCCESS);
4333 }
4334
4335 /*
4336 * mdi_pi_get_preferred:
4337 * Get the preferred path flag
4338 */
4339 int
mdi_pi_get_preferred(mdi_pathinfo_t * pip)4340 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
4341 {
4342 if (pip) {
4343 return (MDI_PI(pip)->pi_preferred);
4344 }
4345 return (0);
4346 }
4347
4348 /*
4349 * mdi_pi_set_preferred:
4350 * Set the preferred path flag
4351 */
4352 void
mdi_pi_set_preferred(mdi_pathinfo_t * pip,int preferred)4353 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
4354 {
4355 if (pip) {
4356 MDI_PI(pip)->pi_preferred = preferred;
4357 }
4358 }
4359
4360 /*
4361 * mdi_pi_set_state():
4362 * Set the mdi_pathinfo node state
4363 */
4364 void
mdi_pi_set_state(mdi_pathinfo_t * pip,mdi_pathinfo_state_t state)4365 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
4366 {
4367 uint32_t ext_state;
4368
4369 if (pip) {
4370 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
4371 MDI_PI(pip)->pi_state = state;
4372 MDI_PI(pip)->pi_state |= ext_state;
4373
4374 /* Path has changed state, invalidate DINFOCACHE snap shot. */
4375 i_ddi_di_cache_invalidate();
4376 }
4377 }
4378
4379 /*
4380 * Property functions:
4381 */
4382 int
i_map_nvlist_error_to_mdi(int val)4383 i_map_nvlist_error_to_mdi(int val)
4384 {
4385 int rv;
4386
4387 switch (val) {
4388 case 0:
4389 rv = DDI_PROP_SUCCESS;
4390 break;
4391 case EINVAL:
4392 case ENOTSUP:
4393 rv = DDI_PROP_INVAL_ARG;
4394 break;
4395 case ENOMEM:
4396 rv = DDI_PROP_NO_MEMORY;
4397 break;
4398 default:
4399 rv = DDI_PROP_NOT_FOUND;
4400 break;
4401 }
4402 return (rv);
4403 }
4404
4405 /*
4406 * mdi_pi_get_next_prop():
4407 * Property walk function. The caller should hold mdi_pi_lock()
4408 * and release by calling mdi_pi_unlock() at the end of walk to
4409 * get a consistent value.
4410 */
4411 nvpair_t *
mdi_pi_get_next_prop(mdi_pathinfo_t * pip,nvpair_t * prev)4412 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
4413 {
4414 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4415 return (NULL);
4416 }
4417 ASSERT(MDI_PI_LOCKED(pip));
4418 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
4419 }
4420
4421 /*
4422 * mdi_prop_remove():
4423 * Remove the named property from the named list.
4424 */
4425 int
mdi_prop_remove(mdi_pathinfo_t * pip,char * name)4426 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
4427 {
4428 if (pip == NULL) {
4429 return (DDI_PROP_NOT_FOUND);
4430 }
4431 ASSERT(!MDI_PI_LOCKED(pip));
4432 MDI_PI_LOCK(pip);
4433 if (MDI_PI(pip)->pi_prop == NULL) {
4434 MDI_PI_UNLOCK(pip);
4435 return (DDI_PROP_NOT_FOUND);
4436 }
4437 if (name) {
4438 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
4439 } else {
4440 char nvp_name[MAXNAMELEN];
4441 nvpair_t *nvp;
4442 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
4443 while (nvp) {
4444 nvpair_t *next;
4445 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
4446 (void) snprintf(nvp_name, sizeof(nvp_name), "%s",
4447 nvpair_name(nvp));
4448 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
4449 nvp_name);
4450 nvp = next;
4451 }
4452 }
4453 MDI_PI_UNLOCK(pip);
4454 return (DDI_PROP_SUCCESS);
4455 }
4456
4457 /*
4458 * mdi_prop_size():
4459 * Get buffer size needed to pack the property data.
4460 * Caller should hold the mdi_pathinfo_t lock to get a consistent
4461 * buffer size.
4462 */
4463 int
mdi_prop_size(mdi_pathinfo_t * pip,size_t * buflenp)4464 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
4465 {
4466 int rv;
4467 size_t bufsize;
4468
4469 *buflenp = 0;
4470 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4471 return (DDI_PROP_NOT_FOUND);
4472 }
4473 ASSERT(MDI_PI_LOCKED(pip));
4474 rv = nvlist_size(MDI_PI(pip)->pi_prop,
4475 &bufsize, NV_ENCODE_NATIVE);
4476 *buflenp = bufsize;
4477 return (i_map_nvlist_error_to_mdi(rv));
4478 }
4479
4480 /*
4481 * mdi_prop_pack():
4482 * pack the property list. The caller should hold the
4483 * mdi_pathinfo_t node to get a consistent data
4484 */
4485 int
mdi_prop_pack(mdi_pathinfo_t * pip,char ** bufp,uint_t buflen)4486 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
4487 {
4488 int rv;
4489 size_t bufsize;
4490
4491 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
4492 return (DDI_PROP_NOT_FOUND);
4493 }
4494
4495 ASSERT(MDI_PI_LOCKED(pip));
4496
4497 bufsize = buflen;
4498 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
4499 NV_ENCODE_NATIVE, KM_SLEEP);
4500
4501 return (i_map_nvlist_error_to_mdi(rv));
4502 }
4503
4504 /*
4505 * mdi_prop_update_byte():
4506 * Create/Update a byte property
4507 */
4508 int
mdi_prop_update_byte(mdi_pathinfo_t * pip,char * name,uchar_t data)4509 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
4510 {
4511 int rv;
4512
4513 if (pip == NULL) {
4514 return (DDI_PROP_INVAL_ARG);
4515 }
4516 ASSERT(!MDI_PI_LOCKED(pip));
4517 MDI_PI_LOCK(pip);
4518 if (MDI_PI(pip)->pi_prop == NULL) {
4519 MDI_PI_UNLOCK(pip);
4520 return (DDI_PROP_NOT_FOUND);
4521 }
4522 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
4523 MDI_PI_UNLOCK(pip);
4524 return (i_map_nvlist_error_to_mdi(rv));
4525 }
4526
4527 /*
4528 * mdi_prop_update_byte_array():
4529 * Create/Update a byte array property
4530 */
4531 int
mdi_prop_update_byte_array(mdi_pathinfo_t * pip,char * name,uchar_t * data,uint_t nelements)4532 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
4533 uint_t nelements)
4534 {
4535 int rv;
4536
4537 if (pip == NULL) {
4538 return (DDI_PROP_INVAL_ARG);
4539 }
4540 ASSERT(!MDI_PI_LOCKED(pip));
4541 MDI_PI_LOCK(pip);
4542 if (MDI_PI(pip)->pi_prop == NULL) {
4543 MDI_PI_UNLOCK(pip);
4544 return (DDI_PROP_NOT_FOUND);
4545 }
4546 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
4547 MDI_PI_UNLOCK(pip);
4548 return (i_map_nvlist_error_to_mdi(rv));
4549 }
4550
4551 /*
4552 * mdi_prop_update_int():
4553 * Create/Update a 32 bit integer property
4554 */
4555 int
mdi_prop_update_int(mdi_pathinfo_t * pip,char * name,int data)4556 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
4557 {
4558 int rv;
4559
4560 if (pip == NULL) {
4561 return (DDI_PROP_INVAL_ARG);
4562 }
4563 ASSERT(!MDI_PI_LOCKED(pip));
4564 MDI_PI_LOCK(pip);
4565 if (MDI_PI(pip)->pi_prop == NULL) {
4566 MDI_PI_UNLOCK(pip);
4567 return (DDI_PROP_NOT_FOUND);
4568 }
4569 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
4570 MDI_PI_UNLOCK(pip);
4571 return (i_map_nvlist_error_to_mdi(rv));
4572 }
4573
4574 /*
4575 * mdi_prop_update_int64():
4576 * Create/Update a 64 bit integer property
4577 */
4578 int
mdi_prop_update_int64(mdi_pathinfo_t * pip,char * name,int64_t data)4579 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
4580 {
4581 int rv;
4582
4583 if (pip == NULL) {
4584 return (DDI_PROP_INVAL_ARG);
4585 }
4586 ASSERT(!MDI_PI_LOCKED(pip));
4587 MDI_PI_LOCK(pip);
4588 if (MDI_PI(pip)->pi_prop == NULL) {
4589 MDI_PI_UNLOCK(pip);
4590 return (DDI_PROP_NOT_FOUND);
4591 }
4592 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
4593 MDI_PI_UNLOCK(pip);
4594 return (i_map_nvlist_error_to_mdi(rv));
4595 }
4596
4597 /*
4598 * mdi_prop_update_int_array():
4599 * Create/Update a int array property
4600 */
4601 int
mdi_prop_update_int_array(mdi_pathinfo_t * pip,char * name,int * data,uint_t nelements)4602 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
4603 uint_t nelements)
4604 {
4605 int rv;
4606
4607 if (pip == NULL) {
4608 return (DDI_PROP_INVAL_ARG);
4609 }
4610 ASSERT(!MDI_PI_LOCKED(pip));
4611 MDI_PI_LOCK(pip);
4612 if (MDI_PI(pip)->pi_prop == NULL) {
4613 MDI_PI_UNLOCK(pip);
4614 return (DDI_PROP_NOT_FOUND);
4615 }
4616 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4617 nelements);
4618 MDI_PI_UNLOCK(pip);
4619 return (i_map_nvlist_error_to_mdi(rv));
4620 }
4621
4622 /*
4623 * mdi_prop_update_string():
4624 * Create/Update a string property
4625 */
4626 int
mdi_prop_update_string(mdi_pathinfo_t * pip,char * name,char * data)4627 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4628 {
4629 int rv;
4630
4631 if (pip == NULL) {
4632 return (DDI_PROP_INVAL_ARG);
4633 }
4634 ASSERT(!MDI_PI_LOCKED(pip));
4635 MDI_PI_LOCK(pip);
4636 if (MDI_PI(pip)->pi_prop == NULL) {
4637 MDI_PI_UNLOCK(pip);
4638 return (DDI_PROP_NOT_FOUND);
4639 }
4640 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4641 MDI_PI_UNLOCK(pip);
4642 return (i_map_nvlist_error_to_mdi(rv));
4643 }
4644
4645 /*
4646 * mdi_prop_update_string_array():
4647 * Create/Update a string array property
4648 */
4649 int
mdi_prop_update_string_array(mdi_pathinfo_t * pip,char * name,char ** data,uint_t nelements)4650 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4651 uint_t nelements)
4652 {
4653 int rv;
4654
4655 if (pip == NULL) {
4656 return (DDI_PROP_INVAL_ARG);
4657 }
4658 ASSERT(!MDI_PI_LOCKED(pip));
4659 MDI_PI_LOCK(pip);
4660 if (MDI_PI(pip)->pi_prop == NULL) {
4661 MDI_PI_UNLOCK(pip);
4662 return (DDI_PROP_NOT_FOUND);
4663 }
4664 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4665 nelements);
4666 MDI_PI_UNLOCK(pip);
4667 return (i_map_nvlist_error_to_mdi(rv));
4668 }
4669
4670 /*
4671 * mdi_prop_lookup_byte():
4672 * Look for byte property identified by name. The data returned
4673 * is the actual property and valid as long as mdi_pathinfo_t node
4674 * is alive.
4675 */
4676 int
mdi_prop_lookup_byte(mdi_pathinfo_t * pip,char * name,uchar_t * data)4677 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4678 {
4679 int rv;
4680
4681 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4682 return (DDI_PROP_NOT_FOUND);
4683 }
4684 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4685 return (i_map_nvlist_error_to_mdi(rv));
4686 }
4687
4688
4689 /*
4690 * mdi_prop_lookup_byte_array():
4691 * Look for byte array property identified by name. The data
4692 * returned is the actual property and valid as long as
4693 * mdi_pathinfo_t node is alive.
4694 */
4695 int
mdi_prop_lookup_byte_array(mdi_pathinfo_t * pip,char * name,uchar_t ** data,uint_t * nelements)4696 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4697 uint_t *nelements)
4698 {
4699 int rv;
4700
4701 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4702 return (DDI_PROP_NOT_FOUND);
4703 }
4704 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4705 nelements);
4706 return (i_map_nvlist_error_to_mdi(rv));
4707 }
4708
4709 /*
4710 * mdi_prop_lookup_int():
4711 * Look for int property identified by name. The data returned
4712 * is the actual property and valid as long as mdi_pathinfo_t
4713 * node is alive.
4714 */
4715 int
mdi_prop_lookup_int(mdi_pathinfo_t * pip,char * name,int * data)4716 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4717 {
4718 int rv;
4719
4720 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4721 return (DDI_PROP_NOT_FOUND);
4722 }
4723 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4724 return (i_map_nvlist_error_to_mdi(rv));
4725 }
4726
4727 /*
4728 * mdi_prop_lookup_int64():
4729 * Look for int64 property identified by name. The data returned
4730 * is the actual property and valid as long as mdi_pathinfo_t node
4731 * is alive.
4732 */
4733 int
mdi_prop_lookup_int64(mdi_pathinfo_t * pip,char * name,int64_t * data)4734 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4735 {
4736 int rv;
4737 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4738 return (DDI_PROP_NOT_FOUND);
4739 }
4740 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4741 return (i_map_nvlist_error_to_mdi(rv));
4742 }
4743
4744 /*
4745 * mdi_prop_lookup_int_array():
4746 * Look for int array property identified by name. The data
4747 * returned is the actual property and valid as long as
4748 * mdi_pathinfo_t node is alive.
4749 */
4750 int
mdi_prop_lookup_int_array(mdi_pathinfo_t * pip,char * name,int ** data,uint_t * nelements)4751 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4752 uint_t *nelements)
4753 {
4754 int rv;
4755
4756 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4757 return (DDI_PROP_NOT_FOUND);
4758 }
4759 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4760 (int32_t **)data, nelements);
4761 return (i_map_nvlist_error_to_mdi(rv));
4762 }
4763
4764 /*
4765 * mdi_prop_lookup_string():
4766 * Look for string property identified by name. The data
4767 * returned is the actual property and valid as long as
4768 * mdi_pathinfo_t node is alive.
4769 */
4770 int
mdi_prop_lookup_string(mdi_pathinfo_t * pip,char * name,char ** data)4771 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4772 {
4773 int rv;
4774
4775 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4776 return (DDI_PROP_NOT_FOUND);
4777 }
4778 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4779 return (i_map_nvlist_error_to_mdi(rv));
4780 }
4781
4782 /*
4783 * mdi_prop_lookup_string_array():
4784 * Look for string array property identified by name. The data
4785 * returned is the actual property and valid as long as
4786 * mdi_pathinfo_t node is alive.
4787 */
4788 int
mdi_prop_lookup_string_array(mdi_pathinfo_t * pip,char * name,char *** data,uint_t * nelements)4789 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4790 uint_t *nelements)
4791 {
4792 int rv;
4793
4794 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4795 return (DDI_PROP_NOT_FOUND);
4796 }
4797 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4798 nelements);
4799 return (i_map_nvlist_error_to_mdi(rv));
4800 }
4801
4802 /*
4803 * mdi_prop_free():
4804 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4805 * functions return the pointer to actual property data and not a
4806 * copy of it. So the data returned is valid as long as
4807 * mdi_pathinfo_t node is valid.
4808 */
4809 /*ARGSUSED*/
4810 int
mdi_prop_free(void * data)4811 mdi_prop_free(void *data)
4812 {
4813 return (DDI_PROP_SUCCESS);
4814 }
4815
4816 /*ARGSUSED*/
4817 static void
i_mdi_report_path_state(mdi_client_t * ct,mdi_pathinfo_t * pip)4818 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4819 {
4820 char *ct_path;
4821 char *ct_status;
4822 char *status;
4823 dev_info_t *cdip = ct->ct_dip;
4824 char lb_buf[64];
4825 int report_lb_c = 0, report_lb_p = 0;
4826
4827 ASSERT(MDI_CLIENT_LOCKED(ct));
4828 if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) ||
4829 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4830 return;
4831 }
4832 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4833 ct_status = "optimal";
4834 report_lb_c = 1;
4835 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4836 ct_status = "degraded";
4837 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4838 ct_status = "failed";
4839 } else {
4840 ct_status = "unknown";
4841 }
4842
4843 lb_buf[0] = 0; /* not interested in load balancing config */
4844
4845 if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) {
4846 status = "removed";
4847 } else if (MDI_PI_IS_OFFLINE(pip)) {
4848 status = "offline";
4849 } else if (MDI_PI_IS_ONLINE(pip)) {
4850 status = "online";
4851 report_lb_p = 1;
4852 } else if (MDI_PI_IS_STANDBY(pip)) {
4853 status = "standby";
4854 } else if (MDI_PI_IS_FAULT(pip)) {
4855 status = "faulted";
4856 } else {
4857 status = "unknown";
4858 }
4859
4860 if (cdip) {
4861 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4862
4863 /*
4864 * NOTE: Keeping "multipath status: %s" and
4865 * "Load balancing: %s" format unchanged in case someone
4866 * scrubs /var/adm/messages looking for these messages.
4867 */
4868 if (report_lb_c && report_lb_p) {
4869 if (ct->ct_lb == LOAD_BALANCE_LBA) {
4870 (void) snprintf(lb_buf, sizeof (lb_buf),
4871 "%s, region-size: %d", mdi_load_balance_lba,
4872 ct->ct_lb_args->region_size);
4873 } else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4874 (void) snprintf(lb_buf, sizeof (lb_buf),
4875 "%s", mdi_load_balance_none);
4876 } else {
4877 (void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4878 mdi_load_balance_rr);
4879 }
4880
4881 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4882 "?%s (%s%d) multipath status: %s: "
4883 "path %d %s is %s: Load balancing: %s\n",
4884 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4885 ddi_get_instance(cdip), ct_status,
4886 mdi_pi_get_path_instance(pip),
4887 mdi_pi_spathname(pip), status, lb_buf);
4888 } else {
4889 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4890 "?%s (%s%d) multipath status: %s: "
4891 "path %d %s is %s\n",
4892 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4893 ddi_get_instance(cdip), ct_status,
4894 mdi_pi_get_path_instance(pip),
4895 mdi_pi_spathname(pip), status);
4896 }
4897
4898 kmem_free(ct_path, MAXPATHLEN);
4899 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4900 }
4901 }
4902
4903 #ifdef DEBUG
4904 /*
4905 * i_mdi_log():
4906 * Utility function for error message management
4907 *
4908 * NOTE: Implementation takes care of trailing \n for cmn_err,
4909 * MDI_DEBUG should not terminate fmt strings with \n.
4910 *
4911 * NOTE: If the level is >= 2, and there is no leading !?^
4912 * then a leading ! is implied (but can be overriden via
4913 * mdi_debug_consoleonly). If you are using kmdb on the console,
4914 * consider setting mdi_debug_consoleonly to 1 as an aid.
4915 */
4916 /*PRINTFLIKE4*/
4917 static void
i_mdi_log(int level,const char * func,dev_info_t * dip,const char * fmt,...)4918 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...)
4919 {
4920 char name[MAXNAMELEN];
4921 char buf[512];
4922 char *bp;
4923 va_list ap;
4924 int log_only = 0;
4925 int boot_only = 0;
4926 int console_only = 0;
4927
4928 if (dip) {
4929 (void) snprintf(name, sizeof(name), "%s%d: ",
4930 ddi_driver_name(dip), ddi_get_instance(dip));
4931 } else {
4932 name[0] = 0;
4933 }
4934
4935 va_start(ap, fmt);
4936 (void) vsnprintf(buf, sizeof(buf), fmt, ap);
4937 va_end(ap);
4938
4939 switch (buf[0]) {
4940 case '!':
4941 bp = &buf[1];
4942 log_only = 1;
4943 break;
4944 case '?':
4945 bp = &buf[1];
4946 boot_only = 1;
4947 break;
4948 case '^':
4949 bp = &buf[1];
4950 console_only = 1;
4951 break;
4952 default:
4953 if (level >= 2)
4954 log_only = 1; /* ! implied */
4955 bp = buf;
4956 break;
4957 }
4958 if (mdi_debug_logonly) {
4959 log_only = 1;
4960 boot_only = 0;
4961 console_only = 0;
4962 }
4963 if (mdi_debug_consoleonly) {
4964 log_only = 0;
4965 boot_only = 0;
4966 console_only = 1;
4967 level = CE_NOTE;
4968 goto console;
4969 }
4970
4971 switch (level) {
4972 case CE_NOTE:
4973 level = CE_CONT;
4974 /* FALLTHROUGH */
4975 case CE_CONT:
4976 if (boot_only) {
4977 cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp);
4978 } else if (console_only) {
4979 cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp);
4980 } else if (log_only) {
4981 cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp);
4982 } else {
4983 cmn_err(level, "mdi: %s%s: %s\n", name, func, bp);
4984 }
4985 break;
4986
4987 case CE_WARN:
4988 case CE_PANIC:
4989 console:
4990 if (boot_only) {
4991 cmn_err(level, "?mdi: %s%s: %s", name, func, bp);
4992 } else if (console_only) {
4993 cmn_err(level, "^mdi: %s%s: %s", name, func, bp);
4994 } else if (log_only) {
4995 cmn_err(level, "!mdi: %s%s: %s", name, func, bp);
4996 } else {
4997 cmn_err(level, "mdi: %s%s: %s", name, func, bp);
4998 }
4999 break;
5000 default:
5001 cmn_err(level, "mdi: %s%s", name, bp);
5002 break;
5003 }
5004 }
5005 #endif /* DEBUG */
5006
5007 void
i_mdi_client_online(dev_info_t * ct_dip)5008 i_mdi_client_online(dev_info_t *ct_dip)
5009 {
5010 mdi_client_t *ct;
5011
5012 /*
5013 * Client online notification. Mark client state as online
5014 * restore our binding with dev_info node
5015 */
5016 ct = i_devi_get_client(ct_dip);
5017 ASSERT(ct != NULL);
5018 MDI_CLIENT_LOCK(ct);
5019 MDI_CLIENT_SET_ONLINE(ct);
5020 /* catch for any memory leaks */
5021 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
5022 ct->ct_dip = ct_dip;
5023
5024 if (ct->ct_power_cnt == 0)
5025 (void) i_mdi_power_all_phci(ct);
5026
5027 MDI_DEBUG(4, (MDI_NOTE, ct_dip,
5028 "i_mdi_pm_hold_client %p", (void *)ct));
5029 i_mdi_pm_hold_client(ct, 1);
5030
5031 MDI_CLIENT_UNLOCK(ct);
5032 }
5033
5034 void
i_mdi_phci_online(dev_info_t * ph_dip)5035 i_mdi_phci_online(dev_info_t *ph_dip)
5036 {
5037 mdi_phci_t *ph;
5038
5039 /* pHCI online notification. Mark state accordingly */
5040 ph = i_devi_get_phci(ph_dip);
5041 ASSERT(ph != NULL);
5042 MDI_PHCI_LOCK(ph);
5043 MDI_PHCI_SET_ONLINE(ph);
5044 MDI_PHCI_UNLOCK(ph);
5045 }
5046
5047 /*
5048 * mdi_devi_online():
5049 * Online notification from NDI framework on pHCI/client
5050 * device online.
5051 * Return Values:
5052 * NDI_SUCCESS
5053 * MDI_FAILURE
5054 */
5055 /*ARGSUSED*/
5056 int
mdi_devi_online(dev_info_t * dip,uint_t flags)5057 mdi_devi_online(dev_info_t *dip, uint_t flags)
5058 {
5059 if (MDI_PHCI(dip)) {
5060 i_mdi_phci_online(dip);
5061 }
5062
5063 if (MDI_CLIENT(dip)) {
5064 i_mdi_client_online(dip);
5065 }
5066 return (NDI_SUCCESS);
5067 }
5068
5069 /*
5070 * mdi_devi_offline():
5071 * Offline notification from NDI framework on pHCI/Client device
5072 * offline.
5073 *
5074 * Return Values:
5075 * NDI_SUCCESS
5076 * NDI_FAILURE
5077 */
5078 /*ARGSUSED*/
5079 int
mdi_devi_offline(dev_info_t * dip,uint_t flags)5080 mdi_devi_offline(dev_info_t *dip, uint_t flags)
5081 {
5082 int rv = NDI_SUCCESS;
5083
5084 if (MDI_CLIENT(dip)) {
5085 rv = i_mdi_client_offline(dip, flags);
5086 if (rv != NDI_SUCCESS)
5087 return (rv);
5088 }
5089
5090 if (MDI_PHCI(dip)) {
5091 rv = i_mdi_phci_offline(dip, flags);
5092
5093 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
5094 /* set client back online */
5095 i_mdi_client_online(dip);
5096 }
5097 }
5098
5099 return (rv);
5100 }
5101
5102 /*ARGSUSED*/
5103 static int
i_mdi_phci_offline(dev_info_t * dip,uint_t flags)5104 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
5105 {
5106 int rv = NDI_SUCCESS;
5107 mdi_phci_t *ph;
5108 mdi_client_t *ct;
5109 mdi_pathinfo_t *pip;
5110 mdi_pathinfo_t *next;
5111 mdi_pathinfo_t *failed_pip = NULL;
5112 dev_info_t *cdip;
5113
5114 /*
5115 * pHCI component offline notification
5116 * Make sure that this pHCI instance is free to be offlined.
5117 * If it is OK to proceed, Offline and remove all the child
5118 * mdi_pathinfo nodes. This process automatically offlines
5119 * corresponding client devices, for which this pHCI provides
5120 * critical services.
5121 */
5122 ph = i_devi_get_phci(dip);
5123 MDI_DEBUG(2, (MDI_NOTE, dip,
5124 "called %p %p", (void *)dip, (void *)ph));
5125 if (ph == NULL) {
5126 return (rv);
5127 }
5128
5129 MDI_PHCI_LOCK(ph);
5130
5131 if (MDI_PHCI_IS_OFFLINE(ph)) {
5132 MDI_DEBUG(1, (MDI_WARN, dip,
5133 "!pHCI already offlined: %p", (void *)dip));
5134 MDI_PHCI_UNLOCK(ph);
5135 return (NDI_SUCCESS);
5136 }
5137
5138 /*
5139 * Check to see if the pHCI can be offlined
5140 */
5141 if (ph->ph_unstable) {
5142 MDI_DEBUG(1, (MDI_WARN, dip,
5143 "!One or more target devices are in transient state. "
5144 "This device can not be removed at this moment. "
5145 "Please try again later."));
5146 MDI_PHCI_UNLOCK(ph);
5147 return (NDI_BUSY);
5148 }
5149
5150 pip = ph->ph_path_head;
5151 while (pip != NULL) {
5152 MDI_PI_LOCK(pip);
5153 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5154
5155 /*
5156 * The mdi_pathinfo state is OK. Check the client state.
5157 * If failover in progress fail the pHCI from offlining
5158 */
5159 ct = MDI_PI(pip)->pi_client;
5160 i_mdi_client_lock(ct, pip);
5161 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5162 (ct->ct_unstable)) {
5163 /*
5164 * Failover is in progress, Fail the DR
5165 */
5166 MDI_DEBUG(1, (MDI_WARN, dip,
5167 "!pHCI device is busy. "
5168 "This device can not be removed at this moment. "
5169 "Please try again later."));
5170 MDI_PI_UNLOCK(pip);
5171 i_mdi_client_unlock(ct);
5172 MDI_PHCI_UNLOCK(ph);
5173 return (NDI_BUSY);
5174 }
5175 MDI_PI_UNLOCK(pip);
5176
5177 /*
5178 * Check to see of we are removing the last path of this
5179 * client device...
5180 */
5181 cdip = ct->ct_dip;
5182 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5183 (i_mdi_client_compute_state(ct, ph) ==
5184 MDI_CLIENT_STATE_FAILED)) {
5185 i_mdi_client_unlock(ct);
5186 MDI_PHCI_UNLOCK(ph);
5187 if (ndi_devi_offline(cdip,
5188 NDI_DEVFS_CLEAN) != NDI_SUCCESS) {
5189 /*
5190 * ndi_devi_offline() failed.
5191 * This pHCI provides the critical path
5192 * to one or more client devices.
5193 * Return busy.
5194 */
5195 MDI_PHCI_LOCK(ph);
5196 MDI_DEBUG(1, (MDI_WARN, dip,
5197 "!pHCI device is busy. "
5198 "This device can not be removed at this "
5199 "moment. Please try again later."));
5200 failed_pip = pip;
5201 break;
5202 } else {
5203 MDI_PHCI_LOCK(ph);
5204 pip = next;
5205 }
5206 } else {
5207 i_mdi_client_unlock(ct);
5208 pip = next;
5209 }
5210 }
5211
5212 if (failed_pip) {
5213 pip = ph->ph_path_head;
5214 while (pip != failed_pip) {
5215 MDI_PI_LOCK(pip);
5216 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5217 ct = MDI_PI(pip)->pi_client;
5218 i_mdi_client_lock(ct, pip);
5219 cdip = ct->ct_dip;
5220 switch (MDI_CLIENT_STATE(ct)) {
5221 case MDI_CLIENT_STATE_OPTIMAL:
5222 case MDI_CLIENT_STATE_DEGRADED:
5223 if (cdip) {
5224 MDI_PI_UNLOCK(pip);
5225 i_mdi_client_unlock(ct);
5226 MDI_PHCI_UNLOCK(ph);
5227 (void) ndi_devi_online(cdip, 0);
5228 MDI_PHCI_LOCK(ph);
5229 pip = next;
5230 continue;
5231 }
5232 break;
5233
5234 case MDI_CLIENT_STATE_FAILED:
5235 if (cdip) {
5236 MDI_PI_UNLOCK(pip);
5237 i_mdi_client_unlock(ct);
5238 MDI_PHCI_UNLOCK(ph);
5239 (void) ndi_devi_offline(cdip,
5240 NDI_DEVFS_CLEAN);
5241 MDI_PHCI_LOCK(ph);
5242 pip = next;
5243 continue;
5244 }
5245 break;
5246 }
5247 MDI_PI_UNLOCK(pip);
5248 i_mdi_client_unlock(ct);
5249 pip = next;
5250 }
5251 MDI_PHCI_UNLOCK(ph);
5252 return (NDI_BUSY);
5253 }
5254
5255 /*
5256 * Mark the pHCI as offline
5257 */
5258 MDI_PHCI_SET_OFFLINE(ph);
5259
5260 /*
5261 * Mark the child mdi_pathinfo nodes as transient
5262 */
5263 pip = ph->ph_path_head;
5264 while (pip != NULL) {
5265 MDI_PI_LOCK(pip);
5266 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5267 MDI_PI_SET_OFFLINING(pip);
5268 MDI_PI_UNLOCK(pip);
5269 pip = next;
5270 }
5271 MDI_PHCI_UNLOCK(ph);
5272 /*
5273 * Give a chance for any pending commands to execute
5274 */
5275 delay_random(mdi_delay);
5276 MDI_PHCI_LOCK(ph);
5277 pip = ph->ph_path_head;
5278 while (pip != NULL) {
5279 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5280 (void) i_mdi_pi_offline(pip, flags);
5281 MDI_PI_LOCK(pip);
5282 ct = MDI_PI(pip)->pi_client;
5283 if (!MDI_PI_IS_OFFLINE(pip)) {
5284 MDI_DEBUG(1, (MDI_WARN, dip,
5285 "!pHCI device is busy. "
5286 "This device can not be removed at this moment. "
5287 "Please try again later."));
5288 MDI_PI_UNLOCK(pip);
5289 MDI_PHCI_SET_ONLINE(ph);
5290 MDI_PHCI_UNLOCK(ph);
5291 return (NDI_BUSY);
5292 }
5293 MDI_PI_UNLOCK(pip);
5294 pip = next;
5295 }
5296 MDI_PHCI_UNLOCK(ph);
5297
5298 return (rv);
5299 }
5300
5301 void
mdi_phci_mark_retiring(dev_info_t * dip,char ** cons_array)5302 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
5303 {
5304 mdi_phci_t *ph;
5305 mdi_client_t *ct;
5306 mdi_pathinfo_t *pip;
5307 mdi_pathinfo_t *next;
5308 dev_info_t *cdip;
5309
5310 if (!MDI_PHCI(dip))
5311 return;
5312
5313 ph = i_devi_get_phci(dip);
5314 if (ph == NULL) {
5315 return;
5316 }
5317
5318 MDI_PHCI_LOCK(ph);
5319
5320 if (MDI_PHCI_IS_OFFLINE(ph)) {
5321 /* has no last path */
5322 MDI_PHCI_UNLOCK(ph);
5323 return;
5324 }
5325
5326 pip = ph->ph_path_head;
5327 while (pip != NULL) {
5328 MDI_PI_LOCK(pip);
5329 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5330
5331 ct = MDI_PI(pip)->pi_client;
5332 i_mdi_client_lock(ct, pip);
5333 MDI_PI_UNLOCK(pip);
5334
5335 cdip = ct->ct_dip;
5336 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5337 (i_mdi_client_compute_state(ct, ph) ==
5338 MDI_CLIENT_STATE_FAILED)) {
5339 /* Last path. Mark client dip as retiring */
5340 i_mdi_client_unlock(ct);
5341 MDI_PHCI_UNLOCK(ph);
5342 (void) e_ddi_mark_retiring(cdip, cons_array);
5343 MDI_PHCI_LOCK(ph);
5344 pip = next;
5345 } else {
5346 i_mdi_client_unlock(ct);
5347 pip = next;
5348 }
5349 }
5350
5351 MDI_PHCI_UNLOCK(ph);
5352
5353 return;
5354 }
5355
5356 void
mdi_phci_retire_notify(dev_info_t * dip,int * constraint)5357 mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
5358 {
5359 mdi_phci_t *ph;
5360 mdi_client_t *ct;
5361 mdi_pathinfo_t *pip;
5362 mdi_pathinfo_t *next;
5363 dev_info_t *cdip;
5364
5365 if (!MDI_PHCI(dip))
5366 return;
5367
5368 ph = i_devi_get_phci(dip);
5369 if (ph == NULL)
5370 return;
5371
5372 MDI_PHCI_LOCK(ph);
5373
5374 if (MDI_PHCI_IS_OFFLINE(ph)) {
5375 MDI_PHCI_UNLOCK(ph);
5376 /* not last path */
5377 return;
5378 }
5379
5380 if (ph->ph_unstable) {
5381 MDI_PHCI_UNLOCK(ph);
5382 /* can't check for constraints */
5383 *constraint = 0;
5384 return;
5385 }
5386
5387 pip = ph->ph_path_head;
5388 while (pip != NULL) {
5389 MDI_PI_LOCK(pip);
5390 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5391
5392 /*
5393 * The mdi_pathinfo state is OK. Check the client state.
5394 * If failover in progress fail the pHCI from offlining
5395 */
5396 ct = MDI_PI(pip)->pi_client;
5397 i_mdi_client_lock(ct, pip);
5398 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5399 (ct->ct_unstable)) {
5400 /*
5401 * Failover is in progress, can't check for constraints
5402 */
5403 MDI_PI_UNLOCK(pip);
5404 i_mdi_client_unlock(ct);
5405 MDI_PHCI_UNLOCK(ph);
5406 *constraint = 0;
5407 return;
5408 }
5409 MDI_PI_UNLOCK(pip);
5410
5411 /*
5412 * Check to see of we are retiring the last path of this
5413 * client device...
5414 */
5415 cdip = ct->ct_dip;
5416 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5417 (i_mdi_client_compute_state(ct, ph) ==
5418 MDI_CLIENT_STATE_FAILED)) {
5419 i_mdi_client_unlock(ct);
5420 MDI_PHCI_UNLOCK(ph);
5421 (void) e_ddi_retire_notify(cdip, constraint);
5422 MDI_PHCI_LOCK(ph);
5423 pip = next;
5424 } else {
5425 i_mdi_client_unlock(ct);
5426 pip = next;
5427 }
5428 }
5429
5430 MDI_PHCI_UNLOCK(ph);
5431
5432 return;
5433 }
5434
5435 /*
5436 * offline the path(s) hanging off the pHCI. If the
5437 * last path to any client, check that constraints
5438 * have been applied.
5439 *
5440 * If constraint is 0, we aren't going to retire the
5441 * pHCI. However we still need to go through the paths
5442 * calling e_ddi_retire_finalize() to clear their
5443 * contract barriers.
5444 */
5445 void
mdi_phci_retire_finalize(dev_info_t * dip,int phci_only,void * constraint)5446 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only, void *constraint)
5447 {
5448 mdi_phci_t *ph;
5449 mdi_client_t *ct;
5450 mdi_pathinfo_t *pip;
5451 mdi_pathinfo_t *next;
5452 dev_info_t *cdip;
5453 int unstable = 0;
5454 int tmp_constraint;
5455
5456 if (!MDI_PHCI(dip))
5457 return;
5458
5459 ph = i_devi_get_phci(dip);
5460 if (ph == NULL) {
5461 /* no last path and no pips */
5462 return;
5463 }
5464
5465 MDI_PHCI_LOCK(ph);
5466
5467 if (MDI_PHCI_IS_OFFLINE(ph)) {
5468 MDI_PHCI_UNLOCK(ph);
5469 /* no last path and no pips */
5470 return;
5471 }
5472
5473 /*
5474 * Check to see if the pHCI can be offlined
5475 */
5476 if (ph->ph_unstable) {
5477 unstable = 1;
5478 }
5479
5480 pip = ph->ph_path_head;
5481 while (pip != NULL) {
5482 MDI_PI_LOCK(pip);
5483 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5484
5485 /*
5486 * if failover in progress fail the pHCI from offlining
5487 */
5488 ct = MDI_PI(pip)->pi_client;
5489 i_mdi_client_lock(ct, pip);
5490 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5491 (ct->ct_unstable)) {
5492 unstable = 1;
5493 }
5494 MDI_PI_UNLOCK(pip);
5495
5496 /*
5497 * Check to see of we are removing the last path of this
5498 * client device...
5499 */
5500 cdip = ct->ct_dip;
5501 if (!phci_only && cdip &&
5502 (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5503 (i_mdi_client_compute_state(ct, ph) ==
5504 MDI_CLIENT_STATE_FAILED)) {
5505 i_mdi_client_unlock(ct);
5506 MDI_PHCI_UNLOCK(ph);
5507 /*
5508 * This is the last path to this client.
5509 *
5510 * Constraint will only be set to 1 if this client can
5511 * be retired (as already determined by
5512 * mdi_phci_retire_notify). However we don't actually
5513 * need to retire the client (we just retire the last
5514 * path - MPXIO will then fail all I/Os to the client).
5515 * But we still need to call e_ddi_retire_finalize so
5516 * the contract barriers can be cleared. Therefore we
5517 * temporarily set constraint = 0 so that the client
5518 * dip is not retired.
5519 */
5520 tmp_constraint = 0;
5521 (void) e_ddi_retire_finalize(cdip, &tmp_constraint);
5522 MDI_PHCI_LOCK(ph);
5523 pip = next;
5524 } else {
5525 i_mdi_client_unlock(ct);
5526 pip = next;
5527 }
5528 }
5529
5530 if (!phci_only && *((int *)constraint) == 0) {
5531 MDI_PHCI_UNLOCK(ph);
5532 return;
5533 }
5534
5535 /*
5536 * Cannot offline pip(s)
5537 */
5538 if (unstable) {
5539 cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: "
5540 "pHCI in transient state, cannot retire",
5541 ddi_driver_name(dip), ddi_get_instance(dip));
5542 MDI_PHCI_UNLOCK(ph);
5543 return;
5544 }
5545
5546 /*
5547 * Mark the pHCI as offline
5548 */
5549 MDI_PHCI_SET_OFFLINE(ph);
5550
5551 /*
5552 * Mark the child mdi_pathinfo nodes as transient
5553 */
5554 pip = ph->ph_path_head;
5555 while (pip != NULL) {
5556 MDI_PI_LOCK(pip);
5557 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5558 MDI_PI_SET_OFFLINING(pip);
5559 MDI_PI_UNLOCK(pip);
5560 pip = next;
5561 }
5562 MDI_PHCI_UNLOCK(ph);
5563 /*
5564 * Give a chance for any pending commands to execute
5565 */
5566 delay_random(mdi_delay);
5567 MDI_PHCI_LOCK(ph);
5568 pip = ph->ph_path_head;
5569 while (pip != NULL) {
5570 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5571 (void) i_mdi_pi_offline(pip, 0);
5572 MDI_PI_LOCK(pip);
5573 ct = MDI_PI(pip)->pi_client;
5574 if (!MDI_PI_IS_OFFLINE(pip)) {
5575 cmn_err(CE_WARN, "mdi_phci_retire_finalize: "
5576 "path %d %s busy, cannot offline",
5577 mdi_pi_get_path_instance(pip),
5578 mdi_pi_spathname(pip));
5579 MDI_PI_UNLOCK(pip);
5580 MDI_PHCI_SET_ONLINE(ph);
5581 MDI_PHCI_UNLOCK(ph);
5582 return;
5583 }
5584 MDI_PI_UNLOCK(pip);
5585 pip = next;
5586 }
5587 MDI_PHCI_UNLOCK(ph);
5588
5589 return;
5590 }
5591
5592 void
mdi_phci_unretire(dev_info_t * dip)5593 mdi_phci_unretire(dev_info_t *dip)
5594 {
5595 mdi_phci_t *ph;
5596 mdi_pathinfo_t *pip;
5597 mdi_pathinfo_t *next;
5598
5599 ASSERT(MDI_PHCI(dip));
5600
5601 /*
5602 * Online the phci
5603 */
5604 i_mdi_phci_online(dip);
5605
5606 ph = i_devi_get_phci(dip);
5607 MDI_PHCI_LOCK(ph);
5608 pip = ph->ph_path_head;
5609 while (pip != NULL) {
5610 MDI_PI_LOCK(pip);
5611 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5612 MDI_PI_UNLOCK(pip);
5613 (void) i_mdi_pi_online(pip, 0);
5614 pip = next;
5615 }
5616 MDI_PHCI_UNLOCK(ph);
5617 }
5618
5619 /*ARGSUSED*/
5620 static int
i_mdi_client_offline(dev_info_t * dip,uint_t flags)5621 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
5622 {
5623 int rv = NDI_SUCCESS;
5624 mdi_client_t *ct;
5625
5626 /*
5627 * Client component to go offline. Make sure that we are
5628 * not in failing over state and update client state
5629 * accordingly
5630 */
5631 ct = i_devi_get_client(dip);
5632 MDI_DEBUG(2, (MDI_NOTE, dip,
5633 "called %p %p", (void *)dip, (void *)ct));
5634 if (ct != NULL) {
5635 MDI_CLIENT_LOCK(ct);
5636 if (ct->ct_unstable) {
5637 /*
5638 * One or more paths are in transient state,
5639 * Dont allow offline of a client device
5640 */
5641 MDI_DEBUG(1, (MDI_WARN, dip,
5642 "!One or more paths to "
5643 "this device are in transient state. "
5644 "This device can not be removed at this moment. "
5645 "Please try again later."));
5646 MDI_CLIENT_UNLOCK(ct);
5647 return (NDI_BUSY);
5648 }
5649 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
5650 /*
5651 * Failover is in progress, Dont allow DR of
5652 * a client device
5653 */
5654 MDI_DEBUG(1, (MDI_WARN, dip,
5655 "!Client device is Busy. "
5656 "This device can not be removed at this moment. "
5657 "Please try again later."));
5658 MDI_CLIENT_UNLOCK(ct);
5659 return (NDI_BUSY);
5660 }
5661 MDI_CLIENT_SET_OFFLINE(ct);
5662
5663 /*
5664 * Unbind our relationship with the dev_info node
5665 */
5666 if (flags & NDI_DEVI_REMOVE) {
5667 ct->ct_dip = NULL;
5668 }
5669 MDI_CLIENT_UNLOCK(ct);
5670 }
5671 return (rv);
5672 }
5673
5674 /*
5675 * mdi_pre_attach():
5676 * Pre attach() notification handler
5677 */
5678 /*ARGSUSED*/
5679 int
mdi_pre_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)5680 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5681 {
5682 /* don't support old DDI_PM_RESUME */
5683 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
5684 (cmd == DDI_PM_RESUME))
5685 return (DDI_FAILURE);
5686
5687 return (DDI_SUCCESS);
5688 }
5689
5690 /*
5691 * mdi_post_attach():
5692 * Post attach() notification handler
5693 */
5694 /*ARGSUSED*/
5695 void
mdi_post_attach(dev_info_t * dip,ddi_attach_cmd_t cmd,int error)5696 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
5697 {
5698 mdi_phci_t *ph;
5699 mdi_client_t *ct;
5700 mdi_vhci_t *vh;
5701
5702 if (MDI_PHCI(dip)) {
5703 ph = i_devi_get_phci(dip);
5704 ASSERT(ph != NULL);
5705
5706 MDI_PHCI_LOCK(ph);
5707 switch (cmd) {
5708 case DDI_ATTACH:
5709 MDI_DEBUG(2, (MDI_NOTE, dip,
5710 "phci post_attach called %p", (void *)ph));
5711 if (error == DDI_SUCCESS) {
5712 MDI_PHCI_SET_ATTACH(ph);
5713 } else {
5714 MDI_DEBUG(1, (MDI_NOTE, dip,
5715 "!pHCI post_attach failed: error %d",
5716 error));
5717 MDI_PHCI_SET_DETACH(ph);
5718 }
5719 break;
5720
5721 case DDI_RESUME:
5722 case DDI_PM_RESUME:
5723 MDI_DEBUG(2, (MDI_NOTE, dip,
5724 "pHCI post_resume: called %p", (void *)ph));
5725 if (error == DDI_SUCCESS) {
5726 MDI_PHCI_SET_RESUME(ph);
5727 } else {
5728 MDI_DEBUG(1, (MDI_NOTE, dip,
5729 "!pHCI post_resume failed: error %d",
5730 error));
5731 MDI_PHCI_SET_SUSPEND(ph);
5732 }
5733 break;
5734 }
5735 MDI_PHCI_UNLOCK(ph);
5736 }
5737
5738 if (MDI_CLIENT(dip)) {
5739 ct = i_devi_get_client(dip);
5740 ASSERT(ct != NULL);
5741
5742 MDI_CLIENT_LOCK(ct);
5743 switch (cmd) {
5744 case DDI_ATTACH:
5745 MDI_DEBUG(2, (MDI_NOTE, dip,
5746 "client post_attach called %p", (void *)ct));
5747 if (error != DDI_SUCCESS) {
5748 MDI_DEBUG(1, (MDI_NOTE, dip,
5749 "!client post_attach failed: error %d",
5750 error));
5751 MDI_CLIENT_SET_DETACH(ct);
5752 MDI_DEBUG(4, (MDI_WARN, dip,
5753 "i_mdi_pm_reset_client"));
5754 i_mdi_pm_reset_client(ct);
5755 break;
5756 }
5757
5758 /*
5759 * Client device has successfully attached, inform
5760 * the vhci.
5761 */
5762 vh = ct->ct_vhci;
5763 if (vh->vh_ops->vo_client_attached)
5764 (*vh->vh_ops->vo_client_attached)(dip);
5765
5766 MDI_CLIENT_SET_ATTACH(ct);
5767 break;
5768
5769 case DDI_RESUME:
5770 case DDI_PM_RESUME:
5771 MDI_DEBUG(2, (MDI_NOTE, dip,
5772 "client post_attach: called %p", (void *)ct));
5773 if (error == DDI_SUCCESS) {
5774 MDI_CLIENT_SET_RESUME(ct);
5775 } else {
5776 MDI_DEBUG(1, (MDI_NOTE, dip,
5777 "!client post_resume failed: error %d",
5778 error));
5779 MDI_CLIENT_SET_SUSPEND(ct);
5780 }
5781 break;
5782 }
5783 MDI_CLIENT_UNLOCK(ct);
5784 }
5785 }
5786
5787 /*
5788 * mdi_pre_detach():
5789 * Pre detach notification handler
5790 */
5791 /*ARGSUSED*/
5792 int
mdi_pre_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)5793 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5794 {
5795 int rv = DDI_SUCCESS;
5796
5797 if (MDI_CLIENT(dip)) {
5798 (void) i_mdi_client_pre_detach(dip, cmd);
5799 }
5800
5801 if (MDI_PHCI(dip)) {
5802 rv = i_mdi_phci_pre_detach(dip, cmd);
5803 }
5804
5805 return (rv);
5806 }
5807
5808 /*ARGSUSED*/
5809 static int
i_mdi_phci_pre_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)5810 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5811 {
5812 int rv = DDI_SUCCESS;
5813 mdi_phci_t *ph;
5814 mdi_client_t *ct;
5815 mdi_pathinfo_t *pip;
5816 mdi_pathinfo_t *failed_pip = NULL;
5817 mdi_pathinfo_t *next;
5818
5819 ph = i_devi_get_phci(dip);
5820 if (ph == NULL) {
5821 return (rv);
5822 }
5823
5824 MDI_PHCI_LOCK(ph);
5825 switch (cmd) {
5826 case DDI_DETACH:
5827 MDI_DEBUG(2, (MDI_NOTE, dip,
5828 "pHCI pre_detach: called %p", (void *)ph));
5829 if (!MDI_PHCI_IS_OFFLINE(ph)) {
5830 /*
5831 * mdi_pathinfo nodes are still attached to
5832 * this pHCI. Fail the detach for this pHCI.
5833 */
5834 MDI_DEBUG(2, (MDI_WARN, dip,
5835 "pHCI pre_detach: paths are still attached %p",
5836 (void *)ph));
5837 rv = DDI_FAILURE;
5838 break;
5839 }
5840 MDI_PHCI_SET_DETACH(ph);
5841 break;
5842
5843 case DDI_SUSPEND:
5844 /*
5845 * pHCI is getting suspended. Since mpxio client
5846 * devices may not be suspended at this point, to avoid
5847 * a potential stack overflow, it is important to suspend
5848 * client devices before pHCI can be suspended.
5849 */
5850
5851 MDI_DEBUG(2, (MDI_NOTE, dip,
5852 "pHCI pre_suspend: called %p", (void *)ph));
5853 /*
5854 * Suspend all the client devices accessible through this pHCI
5855 */
5856 pip = ph->ph_path_head;
5857 while (pip != NULL && rv == DDI_SUCCESS) {
5858 dev_info_t *cdip;
5859 MDI_PI_LOCK(pip);
5860 next =
5861 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5862 ct = MDI_PI(pip)->pi_client;
5863 i_mdi_client_lock(ct, pip);
5864 cdip = ct->ct_dip;
5865 MDI_PI_UNLOCK(pip);
5866 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
5867 MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
5868 i_mdi_client_unlock(ct);
5869 if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
5870 DDI_SUCCESS) {
5871 /*
5872 * Suspend of one of the client
5873 * device has failed.
5874 */
5875 MDI_DEBUG(1, (MDI_WARN, dip,
5876 "!suspend of device (%s%d) failed.",
5877 ddi_driver_name(cdip),
5878 ddi_get_instance(cdip)));
5879 failed_pip = pip;
5880 break;
5881 }
5882 } else {
5883 i_mdi_client_unlock(ct);
5884 }
5885 pip = next;
5886 }
5887
5888 if (rv == DDI_SUCCESS) {
5889 /*
5890 * Suspend of client devices is complete. Proceed
5891 * with pHCI suspend.
5892 */
5893 MDI_PHCI_SET_SUSPEND(ph);
5894 } else {
5895 /*
5896 * Revert back all the suspended client device states
5897 * to converse.
5898 */
5899 pip = ph->ph_path_head;
5900 while (pip != failed_pip) {
5901 dev_info_t *cdip;
5902 MDI_PI_LOCK(pip);
5903 next =
5904 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5905 ct = MDI_PI(pip)->pi_client;
5906 i_mdi_client_lock(ct, pip);
5907 cdip = ct->ct_dip;
5908 MDI_PI_UNLOCK(pip);
5909 if (MDI_CLIENT_IS_SUSPENDED(ct)) {
5910 i_mdi_client_unlock(ct);
5911 (void) devi_attach(cdip, DDI_RESUME);
5912 } else {
5913 i_mdi_client_unlock(ct);
5914 }
5915 pip = next;
5916 }
5917 }
5918 break;
5919
5920 default:
5921 rv = DDI_FAILURE;
5922 break;
5923 }
5924 MDI_PHCI_UNLOCK(ph);
5925 return (rv);
5926 }
5927
5928 /*ARGSUSED*/
5929 static int
i_mdi_client_pre_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)5930 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5931 {
5932 int rv = DDI_SUCCESS;
5933 mdi_client_t *ct;
5934
5935 ct = i_devi_get_client(dip);
5936 if (ct == NULL) {
5937 return (rv);
5938 }
5939
5940 MDI_CLIENT_LOCK(ct);
5941 switch (cmd) {
5942 case DDI_DETACH:
5943 MDI_DEBUG(2, (MDI_NOTE, dip,
5944 "client pre_detach: called %p",
5945 (void *)ct));
5946 MDI_CLIENT_SET_DETACH(ct);
5947 break;
5948
5949 case DDI_SUSPEND:
5950 MDI_DEBUG(2, (MDI_NOTE, dip,
5951 "client pre_suspend: called %p",
5952 (void *)ct));
5953 MDI_CLIENT_SET_SUSPEND(ct);
5954 break;
5955
5956 default:
5957 rv = DDI_FAILURE;
5958 break;
5959 }
5960 MDI_CLIENT_UNLOCK(ct);
5961 return (rv);
5962 }
5963
5964 /*
5965 * mdi_post_detach():
5966 * Post detach notification handler
5967 */
5968 /*ARGSUSED*/
5969 void
mdi_post_detach(dev_info_t * dip,ddi_detach_cmd_t cmd,int error)5970 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5971 {
5972 /*
5973 * Detach/Suspend of mpxio component failed. Update our state
5974 * too
5975 */
5976 if (MDI_PHCI(dip))
5977 i_mdi_phci_post_detach(dip, cmd, error);
5978
5979 if (MDI_CLIENT(dip))
5980 i_mdi_client_post_detach(dip, cmd, error);
5981 }
5982
5983 /*ARGSUSED*/
5984 static void
i_mdi_phci_post_detach(dev_info_t * dip,ddi_detach_cmd_t cmd,int error)5985 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5986 {
5987 mdi_phci_t *ph;
5988
5989 /*
5990 * Detach/Suspend of phci component failed. Update our state
5991 * too
5992 */
5993 ph = i_devi_get_phci(dip);
5994 if (ph == NULL) {
5995 return;
5996 }
5997
5998 MDI_PHCI_LOCK(ph);
5999 /*
6000 * Detach of pHCI failed. Restore back converse
6001 * state
6002 */
6003 switch (cmd) {
6004 case DDI_DETACH:
6005 MDI_DEBUG(2, (MDI_NOTE, dip,
6006 "pHCI post_detach: called %p",
6007 (void *)ph));
6008 if (error != DDI_SUCCESS)
6009 MDI_PHCI_SET_ATTACH(ph);
6010 break;
6011
6012 case DDI_SUSPEND:
6013 case DDI_PM_SUSPEND:
6014 MDI_DEBUG(2, (MDI_NOTE, dip,
6015 "pHCI post_suspend: called %p",
6016 (void *)ph));
6017 if (error != DDI_SUCCESS)
6018 MDI_PHCI_SET_RESUME(ph);
6019 break;
6020 case DDI_HOTPLUG_DETACH:
6021 break;
6022 }
6023 MDI_PHCI_UNLOCK(ph);
6024 }
6025
6026 /*ARGSUSED*/
6027 static void
i_mdi_client_post_detach(dev_info_t * dip,ddi_detach_cmd_t cmd,int error)6028 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
6029 {
6030 mdi_client_t *ct;
6031
6032 ct = i_devi_get_client(dip);
6033 if (ct == NULL) {
6034 return;
6035 }
6036 MDI_CLIENT_LOCK(ct);
6037 /*
6038 * Detach of Client failed. Restore back converse
6039 * state
6040 */
6041 switch (cmd) {
6042 case DDI_DETACH:
6043 MDI_DEBUG(2, (MDI_NOTE, dip,
6044 "client post_detach: called %p", (void *)ct));
6045 if (DEVI_IS_ATTACHING(dip)) {
6046 MDI_DEBUG(4, (MDI_NOTE, dip,
6047 "i_mdi_pm_rele_client\n"));
6048 i_mdi_pm_rele_client(ct, ct->ct_path_count);
6049 } else {
6050 MDI_DEBUG(4, (MDI_NOTE, dip,
6051 "i_mdi_pm_reset_client\n"));
6052 i_mdi_pm_reset_client(ct);
6053 }
6054 if (error != DDI_SUCCESS)
6055 MDI_CLIENT_SET_ATTACH(ct);
6056 break;
6057
6058 case DDI_SUSPEND:
6059 case DDI_PM_SUSPEND:
6060 MDI_DEBUG(2, (MDI_NOTE, dip,
6061 "called %p", (void *)ct));
6062 if (error != DDI_SUCCESS)
6063 MDI_CLIENT_SET_RESUME(ct);
6064 break;
6065 case DDI_HOTPLUG_DETACH:
6066 break;
6067 }
6068 MDI_CLIENT_UNLOCK(ct);
6069 }
6070
6071 int
mdi_pi_kstat_exists(mdi_pathinfo_t * pip)6072 mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
6073 {
6074 return (MDI_PI(pip)->pi_kstats ? 1 : 0);
6075 }
6076
6077 /*
6078 * create and install per-path (client - pHCI) statistics
6079 * I/O stats supported: nread, nwritten, reads, and writes
6080 * Error stats - hard errors, soft errors, & transport errors
6081 */
6082 int
mdi_pi_kstat_create(mdi_pathinfo_t * pip,char * ksname)6083 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
6084 {
6085 kstat_t *kiosp, *kerrsp;
6086 struct pi_errs *nsp;
6087 struct mdi_pi_kstats *mdi_statp;
6088
6089 if (MDI_PI(pip)->pi_kstats != NULL)
6090 return (MDI_SUCCESS);
6091
6092 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
6093 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
6094 return (MDI_FAILURE);
6095 }
6096
6097 (void) strcat(ksname, ",err");
6098 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
6099 KSTAT_TYPE_NAMED,
6100 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
6101 if (kerrsp == NULL) {
6102 kstat_delete(kiosp);
6103 return (MDI_FAILURE);
6104 }
6105
6106 nsp = (struct pi_errs *)kerrsp->ks_data;
6107 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
6108 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
6109 kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
6110 KSTAT_DATA_UINT32);
6111 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
6112 KSTAT_DATA_UINT32);
6113 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
6114 KSTAT_DATA_UINT32);
6115 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
6116 KSTAT_DATA_UINT32);
6117 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
6118 KSTAT_DATA_UINT32);
6119 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
6120 KSTAT_DATA_UINT32);
6121 kstat_named_init(&nsp->pi_failedfrom, "Failed From",
6122 KSTAT_DATA_UINT32);
6123 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
6124
6125 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
6126 mdi_statp->pi_kstat_ref = 1;
6127 mdi_statp->pi_kstat_iostats = kiosp;
6128 mdi_statp->pi_kstat_errstats = kerrsp;
6129 kstat_install(kiosp);
6130 kstat_install(kerrsp);
6131 MDI_PI(pip)->pi_kstats = mdi_statp;
6132 return (MDI_SUCCESS);
6133 }
6134
6135 /*
6136 * destroy per-path properties
6137 */
6138 static void
i_mdi_pi_kstat_destroy(mdi_pathinfo_t * pip)6139 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
6140 {
6141
6142 struct mdi_pi_kstats *mdi_statp;
6143
6144 if (MDI_PI(pip)->pi_kstats == NULL)
6145 return;
6146 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
6147 return;
6148
6149 MDI_PI(pip)->pi_kstats = NULL;
6150
6151 /*
6152 * the kstat may be shared between multiple pathinfo nodes
6153 * decrement this pathinfo's usage, removing the kstats
6154 * themselves when the last pathinfo reference is removed.
6155 */
6156 ASSERT(mdi_statp->pi_kstat_ref > 0);
6157 if (--mdi_statp->pi_kstat_ref != 0)
6158 return;
6159
6160 kstat_delete(mdi_statp->pi_kstat_iostats);
6161 kstat_delete(mdi_statp->pi_kstat_errstats);
6162 kmem_free(mdi_statp, sizeof (*mdi_statp));
6163 }
6164
6165 /*
6166 * update I/O paths KSTATS
6167 */
6168 void
mdi_pi_kstat_iosupdate(mdi_pathinfo_t * pip,struct buf * bp)6169 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
6170 {
6171 kstat_t *iostatp;
6172 size_t xfer_cnt;
6173
6174 ASSERT(pip != NULL);
6175
6176 /*
6177 * I/O can be driven across a path prior to having path
6178 * statistics available, i.e. probe(9e).
6179 */
6180 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
6181 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
6182 xfer_cnt = bp->b_bcount - bp->b_resid;
6183 if (bp->b_flags & B_READ) {
6184 KSTAT_IO_PTR(iostatp)->reads++;
6185 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
6186 } else {
6187 KSTAT_IO_PTR(iostatp)->writes++;
6188 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
6189 }
6190 }
6191 }
6192
6193 /*
6194 * Enable the path(specific client/target/initiator)
6195 * Enabling a path means that MPxIO may select the enabled path for routing
6196 * future I/O requests, subject to other path state constraints.
6197 */
6198 int
mdi_pi_enable_path(mdi_pathinfo_t * pip,int flags)6199 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
6200 {
6201 mdi_phci_t *ph;
6202
6203 ph = MDI_PI(pip)->pi_phci;
6204 if (ph == NULL) {
6205 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6206 "!failed: path %s %p: NULL ph",
6207 mdi_pi_spathname(pip), (void *)pip));
6208 return (MDI_FAILURE);
6209 }
6210
6211 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
6212 MDI_ENABLE_OP);
6213 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6214 "!returning success pip = %p. ph = %p",
6215 (void *)pip, (void *)ph));
6216 return (MDI_SUCCESS);
6217
6218 }
6219
6220 /*
6221 * Disable the path (specific client/target/initiator)
6222 * Disabling a path means that MPxIO will not select the disabled path for
6223 * routing any new I/O requests.
6224 */
6225 int
mdi_pi_disable_path(mdi_pathinfo_t * pip,int flags)6226 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
6227 {
6228 mdi_phci_t *ph;
6229
6230 ph = MDI_PI(pip)->pi_phci;
6231 if (ph == NULL) {
6232 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6233 "!failed: path %s %p: NULL ph",
6234 mdi_pi_spathname(pip), (void *)pip));
6235 return (MDI_FAILURE);
6236 }
6237
6238 (void) i_mdi_enable_disable_path(pip,
6239 ph->ph_vhci, flags, MDI_DISABLE_OP);
6240 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6241 "!returning success pip = %p. ph = %p",
6242 (void *)pip, (void *)ph));
6243 return (MDI_SUCCESS);
6244 }
6245
6246 /*
6247 * disable the path to a particular pHCI (pHCI specified in the phci_path
6248 * argument) for a particular client (specified in the client_path argument).
6249 * Disabling a path means that MPxIO will not select the disabled path for
6250 * routing any new I/O requests.
6251 * NOTE: this will be removed once the NWS files are changed to use the new
6252 * mdi_{enable,disable}_path interfaces
6253 */
6254 int
mdi_pi_disable(dev_info_t * cdip,dev_info_t * pdip,int flags)6255 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6256 {
6257 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
6258 }
6259
6260 /*
6261 * Enable the path to a particular pHCI (pHCI specified in the phci_path
6262 * argument) for a particular client (specified in the client_path argument).
6263 * Enabling a path means that MPxIO may select the enabled path for routing
6264 * future I/O requests, subject to other path state constraints.
6265 * NOTE: this will be removed once the NWS files are changed to use the new
6266 * mdi_{enable,disable}_path interfaces
6267 */
6268
6269 int
mdi_pi_enable(dev_info_t * cdip,dev_info_t * pdip,int flags)6270 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6271 {
6272 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
6273 }
6274
6275 /*
6276 * Common routine for doing enable/disable.
6277 */
6278 static mdi_pathinfo_t *
i_mdi_enable_disable_path(mdi_pathinfo_t * pip,mdi_vhci_t * vh,int flags,int op)6279 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
6280 int op)
6281 {
6282 int sync_flag = 0;
6283 int rv;
6284 mdi_pathinfo_t *next;
6285 int (*f)() = NULL;
6286
6287 /*
6288 * Check to make sure the path is not already in the
6289 * requested state. If it is just return the next path
6290 * as we have nothing to do here.
6291 */
6292 if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) ||
6293 (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) {
6294 MDI_PI_LOCK(pip);
6295 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6296 MDI_PI_UNLOCK(pip);
6297 return (next);
6298 }
6299
6300 f = vh->vh_ops->vo_pi_state_change;
6301
6302 sync_flag = (flags << 8) & 0xf00;
6303
6304 /*
6305 * Do a callback into the mdi consumer to let it
6306 * know that path is about to get enabled/disabled.
6307 */
6308 rv = MDI_SUCCESS;
6309 if (f != NULL) {
6310 rv = (*f)(vh->vh_dip, pip, 0,
6311 MDI_PI_EXT_STATE(pip),
6312 MDI_EXT_STATE_CHANGE | sync_flag |
6313 op | MDI_BEFORE_STATE_CHANGE);
6314 if (rv != MDI_SUCCESS) {
6315 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6316 "vo_pi_state_change: failed rv = %x", rv));
6317 }
6318 }
6319 MDI_PI_LOCK(pip);
6320 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6321
6322 switch (flags) {
6323 case USER_DISABLE:
6324 if (op == MDI_DISABLE_OP) {
6325 MDI_PI_SET_USER_DISABLE(pip);
6326 } else {
6327 MDI_PI_SET_USER_ENABLE(pip);
6328 }
6329 break;
6330 case DRIVER_DISABLE:
6331 if (op == MDI_DISABLE_OP) {
6332 MDI_PI_SET_DRV_DISABLE(pip);
6333 } else {
6334 MDI_PI_SET_DRV_ENABLE(pip);
6335 }
6336 break;
6337 case DRIVER_DISABLE_TRANSIENT:
6338 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
6339 MDI_PI_SET_DRV_DISABLE_TRANS(pip);
6340 } else {
6341 MDI_PI_SET_DRV_ENABLE_TRANS(pip);
6342 }
6343 break;
6344 }
6345 MDI_PI_UNLOCK(pip);
6346 /*
6347 * Do a callback into the mdi consumer to let it
6348 * know that path is now enabled/disabled.
6349 */
6350 if (f != NULL) {
6351 rv = (*f)(vh->vh_dip, pip, 0,
6352 MDI_PI_EXT_STATE(pip),
6353 MDI_EXT_STATE_CHANGE | sync_flag |
6354 op | MDI_AFTER_STATE_CHANGE);
6355 if (rv != MDI_SUCCESS) {
6356 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6357 "vo_pi_state_change failed: rv = %x", rv));
6358 }
6359 }
6360 return (next);
6361 }
6362
6363 /*
6364 * Common routine for doing enable/disable.
6365 * NOTE: this will be removed once the NWS files are changed to use the new
6366 * mdi_{enable,disable}_path has been putback
6367 */
6368 int
i_mdi_pi_enable_disable(dev_info_t * cdip,dev_info_t * pdip,int flags,int op)6369 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
6370 {
6371
6372 mdi_phci_t *ph;
6373 mdi_vhci_t *vh = NULL;
6374 mdi_client_t *ct;
6375 mdi_pathinfo_t *next, *pip;
6376 int found_it;
6377
6378 ph = i_devi_get_phci(pdip);
6379 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6380 "!op = %d pdip = %p cdip = %p", op, (void *)pdip,
6381 (void *)cdip));
6382 if (ph == NULL) {
6383 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6384 "!failed: operation %d: NULL ph", op));
6385 return (MDI_FAILURE);
6386 }
6387
6388 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
6389 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6390 "!failed: invalid operation %d", op));
6391 return (MDI_FAILURE);
6392 }
6393
6394 vh = ph->ph_vhci;
6395
6396 if (cdip == NULL) {
6397 /*
6398 * Need to mark the Phci as enabled/disabled.
6399 */
6400 MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip,
6401 "op %d for the phci", op));
6402 MDI_PHCI_LOCK(ph);
6403 switch (flags) {
6404 case USER_DISABLE:
6405 if (op == MDI_DISABLE_OP) {
6406 MDI_PHCI_SET_USER_DISABLE(ph);
6407 } else {
6408 MDI_PHCI_SET_USER_ENABLE(ph);
6409 }
6410 break;
6411 case DRIVER_DISABLE:
6412 if (op == MDI_DISABLE_OP) {
6413 MDI_PHCI_SET_DRV_DISABLE(ph);
6414 } else {
6415 MDI_PHCI_SET_DRV_ENABLE(ph);
6416 }
6417 break;
6418 case DRIVER_DISABLE_TRANSIENT:
6419 if (op == MDI_DISABLE_OP) {
6420 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
6421 } else {
6422 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
6423 }
6424 break;
6425 default:
6426 MDI_PHCI_UNLOCK(ph);
6427 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6428 "!invalid flag argument= %d", flags));
6429 }
6430
6431 /*
6432 * Phci has been disabled. Now try to enable/disable
6433 * path info's to each client.
6434 */
6435 pip = ph->ph_path_head;
6436 while (pip != NULL) {
6437 pip = i_mdi_enable_disable_path(pip, vh, flags, op);
6438 }
6439 MDI_PHCI_UNLOCK(ph);
6440 } else {
6441
6442 /*
6443 * Disable a specific client.
6444 */
6445 ct = i_devi_get_client(cdip);
6446 if (ct == NULL) {
6447 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6448 "!failed: operation = %d: NULL ct", op));
6449 return (MDI_FAILURE);
6450 }
6451
6452 MDI_CLIENT_LOCK(ct);
6453 pip = ct->ct_path_head;
6454 found_it = 0;
6455 while (pip != NULL) {
6456 MDI_PI_LOCK(pip);
6457 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6458 if (MDI_PI(pip)->pi_phci == ph) {
6459 MDI_PI_UNLOCK(pip);
6460 found_it = 1;
6461 break;
6462 }
6463 MDI_PI_UNLOCK(pip);
6464 pip = next;
6465 }
6466
6467
6468 MDI_CLIENT_UNLOCK(ct);
6469 if (found_it == 0) {
6470 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6471 "!failed. Could not find corresponding pip\n"));
6472 return (MDI_FAILURE);
6473 }
6474
6475 (void) i_mdi_enable_disable_path(pip, vh, flags, op);
6476 }
6477
6478 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6479 "!op %d returning success pdip = %p cdip = %p",
6480 op, (void *)pdip, (void *)cdip));
6481 return (MDI_SUCCESS);
6482 }
6483
6484 /*
6485 * Ensure phci powered up
6486 */
6487 static void
i_mdi_pm_hold_pip(mdi_pathinfo_t * pip)6488 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
6489 {
6490 dev_info_t *ph_dip;
6491
6492 ASSERT(pip != NULL);
6493 ASSERT(MDI_PI_LOCKED(pip));
6494
6495 if (MDI_PI(pip)->pi_pm_held) {
6496 return;
6497 }
6498
6499 ph_dip = mdi_pi_get_phci(pip);
6500 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6501 "%s %p", mdi_pi_spathname(pip), (void *)pip));
6502 if (ph_dip == NULL) {
6503 return;
6504 }
6505
6506 MDI_PI_UNLOCK(pip);
6507 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d",
6508 DEVI(ph_dip)->devi_pm_kidsupcnt));
6509 pm_hold_power(ph_dip);
6510 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d",
6511 DEVI(ph_dip)->devi_pm_kidsupcnt));
6512 MDI_PI_LOCK(pip);
6513
6514 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
6515 if (DEVI(ph_dip)->devi_pm_info)
6516 MDI_PI(pip)->pi_pm_held = 1;
6517 }
6518
6519 /*
6520 * Allow phci powered down
6521 */
6522 static void
i_mdi_pm_rele_pip(mdi_pathinfo_t * pip)6523 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
6524 {
6525 dev_info_t *ph_dip = NULL;
6526
6527 ASSERT(pip != NULL);
6528 ASSERT(MDI_PI_LOCKED(pip));
6529
6530 if (MDI_PI(pip)->pi_pm_held == 0) {
6531 return;
6532 }
6533
6534 ph_dip = mdi_pi_get_phci(pip);
6535 ASSERT(ph_dip != NULL);
6536
6537 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6538 "%s %p", mdi_pi_spathname(pip), (void *)pip));
6539
6540 MDI_PI_UNLOCK(pip);
6541 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6542 "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6543 pm_rele_power(ph_dip);
6544 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6545 "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6546 MDI_PI_LOCK(pip);
6547
6548 MDI_PI(pip)->pi_pm_held = 0;
6549 }
6550
6551 static void
i_mdi_pm_hold_client(mdi_client_t * ct,int incr)6552 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
6553 {
6554 ASSERT(MDI_CLIENT_LOCKED(ct));
6555
6556 ct->ct_power_cnt += incr;
6557 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6558 "%p ct_power_cnt = %d incr = %d",
6559 (void *)ct, ct->ct_power_cnt, incr));
6560 ASSERT(ct->ct_power_cnt >= 0);
6561 }
6562
6563 static void
i_mdi_rele_all_phci(mdi_client_t * ct)6564 i_mdi_rele_all_phci(mdi_client_t *ct)
6565 {
6566 mdi_pathinfo_t *pip;
6567
6568 ASSERT(MDI_CLIENT_LOCKED(ct));
6569 pip = (mdi_pathinfo_t *)ct->ct_path_head;
6570 while (pip != NULL) {
6571 mdi_hold_path(pip);
6572 MDI_PI_LOCK(pip);
6573 i_mdi_pm_rele_pip(pip);
6574 MDI_PI_UNLOCK(pip);
6575 mdi_rele_path(pip);
6576 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6577 }
6578 }
6579
6580 static void
i_mdi_pm_rele_client(mdi_client_t * ct,int decr)6581 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
6582 {
6583 ASSERT(MDI_CLIENT_LOCKED(ct));
6584
6585 if (i_ddi_devi_attached(ct->ct_dip)) {
6586 ct->ct_power_cnt -= decr;
6587 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6588 "%p ct_power_cnt = %d decr = %d",
6589 (void *)ct, ct->ct_power_cnt, decr));
6590 }
6591
6592 ASSERT(ct->ct_power_cnt >= 0);
6593 if (ct->ct_power_cnt == 0) {
6594 i_mdi_rele_all_phci(ct);
6595 return;
6596 }
6597 }
6598
6599 static void
i_mdi_pm_reset_client(mdi_client_t * ct)6600 i_mdi_pm_reset_client(mdi_client_t *ct)
6601 {
6602 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6603 "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt));
6604 ASSERT(MDI_CLIENT_LOCKED(ct));
6605 ct->ct_power_cnt = 0;
6606 i_mdi_rele_all_phci(ct);
6607 ct->ct_powercnt_config = 0;
6608 ct->ct_powercnt_unconfig = 0;
6609 ct->ct_powercnt_reset = 1;
6610 }
6611
6612 static int
i_mdi_power_one_phci(mdi_pathinfo_t * pip)6613 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
6614 {
6615 int ret;
6616 dev_info_t *ph_dip;
6617
6618 MDI_PI_LOCK(pip);
6619 i_mdi_pm_hold_pip(pip);
6620
6621 ph_dip = mdi_pi_get_phci(pip);
6622 MDI_PI_UNLOCK(pip);
6623
6624 /* bring all components of phci to full power */
6625 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6626 "pm_powerup for %s%d %p", ddi_driver_name(ph_dip),
6627 ddi_get_instance(ph_dip), (void *)pip));
6628
6629 ret = pm_powerup(ph_dip);
6630
6631 if (ret == DDI_FAILURE) {
6632 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6633 "pm_powerup FAILED for %s%d %p",
6634 ddi_driver_name(ph_dip), ddi_get_instance(ph_dip),
6635 (void *)pip));
6636
6637 MDI_PI_LOCK(pip);
6638 i_mdi_pm_rele_pip(pip);
6639 MDI_PI_UNLOCK(pip);
6640 return (MDI_FAILURE);
6641 }
6642
6643 return (MDI_SUCCESS);
6644 }
6645
6646 static int
i_mdi_power_all_phci(mdi_client_t * ct)6647 i_mdi_power_all_phci(mdi_client_t *ct)
6648 {
6649 mdi_pathinfo_t *pip;
6650 int succeeded = 0;
6651
6652 ASSERT(MDI_CLIENT_LOCKED(ct));
6653 pip = (mdi_pathinfo_t *)ct->ct_path_head;
6654 while (pip != NULL) {
6655 /*
6656 * Don't power if MDI_PATHINFO_STATE_FAULT
6657 * or MDI_PATHINFO_STATE_OFFLINE.
6658 */
6659 if (MDI_PI_IS_INIT(pip) ||
6660 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
6661 mdi_hold_path(pip);
6662 MDI_CLIENT_UNLOCK(ct);
6663 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
6664 succeeded = 1;
6665
6666 ASSERT(ct == MDI_PI(pip)->pi_client);
6667 MDI_CLIENT_LOCK(ct);
6668 mdi_rele_path(pip);
6669 }
6670 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6671 }
6672
6673 return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
6674 }
6675
6676 /*
6677 * mdi_bus_power():
6678 * 1. Place the phci(s) into powered up state so that
6679 * client can do power management
6680 * 2. Ensure phci powered up as client power managing
6681 * Return Values:
6682 * MDI_SUCCESS
6683 * MDI_FAILURE
6684 */
6685 int
mdi_bus_power(dev_info_t * parent,void * impl_arg,pm_bus_power_op_t op,void * arg,void * result)6686 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
6687 void *arg, void *result)
6688 {
6689 int ret = MDI_SUCCESS;
6690 pm_bp_child_pwrchg_t *bpc;
6691 mdi_client_t *ct;
6692 dev_info_t *cdip;
6693 pm_bp_has_changed_t *bphc;
6694
6695 /*
6696 * BUS_POWER_NOINVOL not supported
6697 */
6698 if (op == BUS_POWER_NOINVOL)
6699 return (MDI_FAILURE);
6700
6701 /*
6702 * ignore other OPs.
6703 * return quickly to save cou cycles on the ct processing
6704 */
6705 switch (op) {
6706 case BUS_POWER_PRE_NOTIFICATION:
6707 case BUS_POWER_POST_NOTIFICATION:
6708 bpc = (pm_bp_child_pwrchg_t *)arg;
6709 cdip = bpc->bpc_dip;
6710 break;
6711 case BUS_POWER_HAS_CHANGED:
6712 bphc = (pm_bp_has_changed_t *)arg;
6713 cdip = bphc->bphc_dip;
6714 break;
6715 default:
6716 return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
6717 }
6718
6719 ASSERT(MDI_CLIENT(cdip));
6720
6721 ct = i_devi_get_client(cdip);
6722 if (ct == NULL)
6723 return (MDI_FAILURE);
6724
6725 /*
6726 * wait till the mdi_pathinfo node state change are processed
6727 */
6728 MDI_CLIENT_LOCK(ct);
6729 switch (op) {
6730 case BUS_POWER_PRE_NOTIFICATION:
6731 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6732 "BUS_POWER_PRE_NOTIFICATION:"
6733 "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6734 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6735 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
6736
6737 /* serialize power level change per client */
6738 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6739 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6740
6741 MDI_CLIENT_SET_POWER_TRANSITION(ct);
6742
6743 if (ct->ct_power_cnt == 0) {
6744 ret = i_mdi_power_all_phci(ct);
6745 }
6746
6747 /*
6748 * if new_level > 0:
6749 * - hold phci(s)
6750 * - power up phci(s) if not already
6751 * ignore power down
6752 */
6753 if (bpc->bpc_nlevel > 0) {
6754 if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
6755 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6756 "i_mdi_pm_hold_client\n"));
6757 i_mdi_pm_hold_client(ct, ct->ct_path_count);
6758 }
6759 }
6760 break;
6761 case BUS_POWER_POST_NOTIFICATION:
6762 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6763 "BUS_POWER_POST_NOTIFICATION:"
6764 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d",
6765 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6766 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
6767 *(int *)result));
6768
6769 if (*(int *)result == DDI_SUCCESS) {
6770 if (bpc->bpc_nlevel > 0) {
6771 MDI_CLIENT_SET_POWER_UP(ct);
6772 } else {
6773 MDI_CLIENT_SET_POWER_DOWN(ct);
6774 }
6775 }
6776
6777 /* release the hold we did in pre-notification */
6778 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
6779 !DEVI_IS_ATTACHING(ct->ct_dip)) {
6780 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6781 "i_mdi_pm_rele_client\n"));
6782 i_mdi_pm_rele_client(ct, ct->ct_path_count);
6783 }
6784
6785 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
6786 /* another thread might started attaching */
6787 if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6788 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6789 "i_mdi_pm_rele_client\n"));
6790 i_mdi_pm_rele_client(ct, ct->ct_path_count);
6791 /* detaching has been taken care in pm_post_unconfig */
6792 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
6793 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6794 "i_mdi_pm_reset_client\n"));
6795 i_mdi_pm_reset_client(ct);
6796 }
6797 }
6798
6799 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
6800 cv_broadcast(&ct->ct_powerchange_cv);
6801
6802 break;
6803
6804 /* need to do more */
6805 case BUS_POWER_HAS_CHANGED:
6806 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6807 "BUS_POWER_HAS_CHANGED:"
6808 "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6809 ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
6810 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
6811
6812 if (bphc->bphc_nlevel > 0 &&
6813 bphc->bphc_nlevel > bphc->bphc_olevel) {
6814 if (ct->ct_power_cnt == 0) {
6815 ret = i_mdi_power_all_phci(ct);
6816 }
6817 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6818 "i_mdi_pm_hold_client\n"));
6819 i_mdi_pm_hold_client(ct, ct->ct_path_count);
6820 }
6821
6822 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
6823 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6824 "i_mdi_pm_rele_client\n"));
6825 i_mdi_pm_rele_client(ct, ct->ct_path_count);
6826 }
6827 break;
6828 default:
6829 dev_err(parent, CE_WARN, "!unhandled bus power operation: 0x%x",
6830 op);
6831 break;
6832 }
6833
6834 MDI_CLIENT_UNLOCK(ct);
6835 return (ret);
6836 }
6837
6838 static int
i_mdi_pm_pre_config_one(dev_info_t * child)6839 i_mdi_pm_pre_config_one(dev_info_t *child)
6840 {
6841 int ret = MDI_SUCCESS;
6842 mdi_client_t *ct;
6843
6844 ct = i_devi_get_client(child);
6845 if (ct == NULL)
6846 return (MDI_FAILURE);
6847
6848 MDI_CLIENT_LOCK(ct);
6849 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6850 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6851
6852 if (!MDI_CLIENT_IS_FAILED(ct)) {
6853 MDI_CLIENT_UNLOCK(ct);
6854 MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n"));
6855 return (MDI_SUCCESS);
6856 }
6857
6858 if (ct->ct_powercnt_config) {
6859 MDI_CLIENT_UNLOCK(ct);
6860 MDI_DEBUG(4, (MDI_NOTE, child, "already held\n"));
6861 return (MDI_SUCCESS);
6862 }
6863
6864 if (ct->ct_power_cnt == 0) {
6865 ret = i_mdi_power_all_phci(ct);
6866 }
6867 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6868 i_mdi_pm_hold_client(ct, ct->ct_path_count);
6869 ct->ct_powercnt_config = 1;
6870 ct->ct_powercnt_reset = 0;
6871 MDI_CLIENT_UNLOCK(ct);
6872 return (ret);
6873 }
6874
6875 static int
i_mdi_pm_pre_config(dev_info_t * vdip,dev_info_t * child)6876 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
6877 {
6878 int ret = MDI_SUCCESS;
6879 dev_info_t *cdip;
6880
6881 ASSERT(MDI_VHCI(vdip));
6882
6883 /* ndi_devi_config_one */
6884 if (child) {
6885 ASSERT(DEVI_BUSY_OWNED(vdip));
6886 return (i_mdi_pm_pre_config_one(child));
6887 }
6888
6889 /* devi_config_common */
6890 ndi_devi_enter(vdip);
6891 cdip = ddi_get_child(vdip);
6892 while (cdip) {
6893 dev_info_t *next = ddi_get_next_sibling(cdip);
6894
6895 ret = i_mdi_pm_pre_config_one(cdip);
6896 if (ret != MDI_SUCCESS)
6897 break;
6898 cdip = next;
6899 }
6900 ndi_devi_exit(vdip);
6901 return (ret);
6902 }
6903
6904 static int
i_mdi_pm_pre_unconfig_one(dev_info_t * child,int * held,int flags)6905 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
6906 {
6907 int ret = MDI_SUCCESS;
6908 mdi_client_t *ct;
6909
6910 ct = i_devi_get_client(child);
6911 if (ct == NULL)
6912 return (MDI_FAILURE);
6913
6914 MDI_CLIENT_LOCK(ct);
6915 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6916 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6917
6918 if (!i_ddi_devi_attached(child)) {
6919 MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n"));
6920 MDI_CLIENT_UNLOCK(ct);
6921 return (MDI_SUCCESS);
6922 }
6923
6924 if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6925 (flags & NDI_AUTODETACH)) {
6926 MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n"));
6927 MDI_CLIENT_UNLOCK(ct);
6928 return (MDI_FAILURE);
6929 }
6930
6931 if (ct->ct_powercnt_unconfig) {
6932 MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n"));
6933 MDI_CLIENT_UNLOCK(ct);
6934 *held = 1;
6935 return (MDI_SUCCESS);
6936 }
6937
6938 if (ct->ct_power_cnt == 0) {
6939 ret = i_mdi_power_all_phci(ct);
6940 }
6941 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6942 i_mdi_pm_hold_client(ct, ct->ct_path_count);
6943 ct->ct_powercnt_unconfig = 1;
6944 ct->ct_powercnt_reset = 0;
6945 MDI_CLIENT_UNLOCK(ct);
6946 if (ret == MDI_SUCCESS)
6947 *held = 1;
6948 return (ret);
6949 }
6950
6951 static int
i_mdi_pm_pre_unconfig(dev_info_t * vdip,dev_info_t * child,int * held,int flags)6952 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
6953 int flags)
6954 {
6955 int ret = MDI_SUCCESS;
6956 dev_info_t *cdip;
6957
6958 ASSERT(MDI_VHCI(vdip));
6959 *held = 0;
6960
6961 /* ndi_devi_unconfig_one */
6962 if (child) {
6963 ASSERT(DEVI_BUSY_OWNED(vdip));
6964 return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6965 }
6966
6967 /* devi_unconfig_common */
6968 ndi_devi_enter(vdip);
6969 cdip = ddi_get_child(vdip);
6970 while (cdip) {
6971 dev_info_t *next = ddi_get_next_sibling(cdip);
6972
6973 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6974 cdip = next;
6975 }
6976 ndi_devi_exit(vdip);
6977
6978 if (*held)
6979 ret = MDI_SUCCESS;
6980
6981 return (ret);
6982 }
6983
6984 static void
i_mdi_pm_post_config_one(dev_info_t * child)6985 i_mdi_pm_post_config_one(dev_info_t *child)
6986 {
6987 mdi_client_t *ct;
6988
6989 ct = i_devi_get_client(child);
6990 if (ct == NULL)
6991 return;
6992
6993 MDI_CLIENT_LOCK(ct);
6994 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6995 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6996
6997 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6998 MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n"));
6999 MDI_CLIENT_UNLOCK(ct);
7000 return;
7001 }
7002
7003 /* client has not been updated */
7004 if (MDI_CLIENT_IS_FAILED(ct)) {
7005 MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n"));
7006 MDI_CLIENT_UNLOCK(ct);
7007 return;
7008 }
7009
7010 /* another thread might have powered it down or detached it */
7011 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
7012 !DEVI_IS_ATTACHING(child)) ||
7013 (!i_ddi_devi_attached(child) &&
7014 !DEVI_IS_ATTACHING(child))) {
7015 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7016 i_mdi_pm_reset_client(ct);
7017 } else {
7018 mdi_pathinfo_t *pip, *next;
7019 int valid_path_count = 0;
7020
7021 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7022 pip = ct->ct_path_head;
7023 while (pip != NULL) {
7024 MDI_PI_LOCK(pip);
7025 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7026 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7027 valid_path_count ++;
7028 MDI_PI_UNLOCK(pip);
7029 pip = next;
7030 }
7031 i_mdi_pm_rele_client(ct, valid_path_count);
7032 }
7033 ct->ct_powercnt_config = 0;
7034 MDI_CLIENT_UNLOCK(ct);
7035 }
7036
7037 static void
i_mdi_pm_post_config(dev_info_t * vdip,dev_info_t * child)7038 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
7039 {
7040 dev_info_t *cdip;
7041
7042 ASSERT(MDI_VHCI(vdip));
7043
7044 /* ndi_devi_config_one */
7045 if (child) {
7046 ASSERT(DEVI_BUSY_OWNED(vdip));
7047 i_mdi_pm_post_config_one(child);
7048 return;
7049 }
7050
7051 /* devi_config_common */
7052 ndi_devi_enter(vdip);
7053 cdip = ddi_get_child(vdip);
7054 while (cdip) {
7055 dev_info_t *next = ddi_get_next_sibling(cdip);
7056
7057 i_mdi_pm_post_config_one(cdip);
7058 cdip = next;
7059 }
7060 ndi_devi_exit(vdip);
7061 }
7062
7063 static void
i_mdi_pm_post_unconfig_one(dev_info_t * child)7064 i_mdi_pm_post_unconfig_one(dev_info_t *child)
7065 {
7066 mdi_client_t *ct;
7067
7068 ct = i_devi_get_client(child);
7069 if (ct == NULL)
7070 return;
7071
7072 MDI_CLIENT_LOCK(ct);
7073 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
7074 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
7075
7076 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
7077 MDI_DEBUG(4, (MDI_NOTE, child, "not held\n"));
7078 MDI_CLIENT_UNLOCK(ct);
7079 return;
7080 }
7081
7082 /* failure detaching or another thread just attached it */
7083 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
7084 i_ddi_devi_attached(child)) ||
7085 (!i_ddi_devi_attached(child) &&
7086 !DEVI_IS_ATTACHING(child))) {
7087 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7088 i_mdi_pm_reset_client(ct);
7089 } else {
7090 mdi_pathinfo_t *pip, *next;
7091 int valid_path_count = 0;
7092
7093 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7094 pip = ct->ct_path_head;
7095 while (pip != NULL) {
7096 MDI_PI_LOCK(pip);
7097 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7098 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7099 valid_path_count ++;
7100 MDI_PI_UNLOCK(pip);
7101 pip = next;
7102 }
7103 i_mdi_pm_rele_client(ct, valid_path_count);
7104 ct->ct_powercnt_unconfig = 0;
7105 }
7106
7107 MDI_CLIENT_UNLOCK(ct);
7108 }
7109
7110 static void
i_mdi_pm_post_unconfig(dev_info_t * vdip,dev_info_t * child,int held)7111 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
7112 {
7113 dev_info_t *cdip;
7114
7115 ASSERT(MDI_VHCI(vdip));
7116
7117 if (!held) {
7118 MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held));
7119 return;
7120 }
7121
7122 if (child) {
7123 ASSERT(DEVI_BUSY_OWNED(vdip));
7124 i_mdi_pm_post_unconfig_one(child);
7125 return;
7126 }
7127
7128 ndi_devi_enter(vdip);
7129 cdip = ddi_get_child(vdip);
7130 while (cdip) {
7131 dev_info_t *next = ddi_get_next_sibling(cdip);
7132
7133 i_mdi_pm_post_unconfig_one(cdip);
7134 cdip = next;
7135 }
7136 ndi_devi_exit(vdip);
7137 }
7138
7139 int
mdi_power(dev_info_t * vdip,mdi_pm_op_t op,void * args,char * devnm,int flags)7140 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
7141 {
7142 int ret = MDI_SUCCESS;
7143 dev_info_t *client_dip = NULL;
7144 mdi_client_t *ct;
7145
7146 /*
7147 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
7148 * Power up pHCI for the named client device.
7149 * Note: Before the client is enumerated under vhci by phci,
7150 * client_dip can be NULL. Then proceed to power up all the
7151 * pHCIs.
7152 */
7153 if (devnm != NULL) {
7154 ndi_devi_enter(vdip);
7155 client_dip = ndi_devi_findchild(vdip, devnm);
7156 }
7157
7158 MDI_DEBUG(4, (MDI_NOTE, vdip,
7159 "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip));
7160
7161 switch (op) {
7162 case MDI_PM_PRE_CONFIG:
7163 ret = i_mdi_pm_pre_config(vdip, client_dip);
7164 break;
7165
7166 case MDI_PM_PRE_UNCONFIG:
7167 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
7168 flags);
7169 break;
7170
7171 case MDI_PM_POST_CONFIG:
7172 i_mdi_pm_post_config(vdip, client_dip);
7173 break;
7174
7175 case MDI_PM_POST_UNCONFIG:
7176 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
7177 break;
7178
7179 case MDI_PM_HOLD_POWER:
7180 case MDI_PM_RELE_POWER:
7181 ASSERT(args);
7182
7183 client_dip = (dev_info_t *)args;
7184 ASSERT(MDI_CLIENT(client_dip));
7185
7186 ct = i_devi_get_client(client_dip);
7187 MDI_CLIENT_LOCK(ct);
7188
7189 if (op == MDI_PM_HOLD_POWER) {
7190 if (ct->ct_power_cnt == 0) {
7191 (void) i_mdi_power_all_phci(ct);
7192 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7193 "i_mdi_pm_hold_client\n"));
7194 i_mdi_pm_hold_client(ct, ct->ct_path_count);
7195 }
7196 } else {
7197 if (DEVI_IS_ATTACHING(client_dip)) {
7198 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7199 "i_mdi_pm_rele_client\n"));
7200 i_mdi_pm_rele_client(ct, ct->ct_path_count);
7201 } else {
7202 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7203 "i_mdi_pm_reset_client\n"));
7204 i_mdi_pm_reset_client(ct);
7205 }
7206 }
7207
7208 MDI_CLIENT_UNLOCK(ct);
7209 break;
7210
7211 default:
7212 break;
7213 }
7214
7215 if (devnm)
7216 ndi_devi_exit(vdip);
7217
7218 return (ret);
7219 }
7220
7221 int
mdi_component_is_vhci(dev_info_t * dip,const char ** mdi_class)7222 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
7223 {
7224 mdi_vhci_t *vhci;
7225
7226 if (!MDI_VHCI(dip))
7227 return (MDI_FAILURE);
7228
7229 if (mdi_class) {
7230 vhci = DEVI(dip)->devi_mdi_xhci;
7231 ASSERT(vhci);
7232 *mdi_class = vhci->vh_class;
7233 }
7234
7235 return (MDI_SUCCESS);
7236 }
7237
7238 int
mdi_component_is_phci(dev_info_t * dip,const char ** mdi_class)7239 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
7240 {
7241 mdi_phci_t *phci;
7242
7243 if (!MDI_PHCI(dip))
7244 return (MDI_FAILURE);
7245
7246 if (mdi_class) {
7247 phci = DEVI(dip)->devi_mdi_xhci;
7248 ASSERT(phci);
7249 *mdi_class = phci->ph_vhci->vh_class;
7250 }
7251
7252 return (MDI_SUCCESS);
7253 }
7254
7255 int
mdi_component_is_client(dev_info_t * dip,const char ** mdi_class)7256 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
7257 {
7258 mdi_client_t *client;
7259
7260 if (!MDI_CLIENT(dip))
7261 return (MDI_FAILURE);
7262
7263 if (mdi_class) {
7264 client = DEVI(dip)->devi_mdi_client;
7265 ASSERT(client);
7266 *mdi_class = client->ct_vhci->vh_class;
7267 }
7268
7269 return (MDI_SUCCESS);
7270 }
7271
7272 void *
mdi_client_get_vhci_private(dev_info_t * dip)7273 mdi_client_get_vhci_private(dev_info_t *dip)
7274 {
7275 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7276 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7277 mdi_client_t *ct;
7278 ct = i_devi_get_client(dip);
7279 return (ct->ct_vprivate);
7280 }
7281 return (NULL);
7282 }
7283
7284 void
mdi_client_set_vhci_private(dev_info_t * dip,void * data)7285 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
7286 {
7287 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7288 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7289 mdi_client_t *ct;
7290 ct = i_devi_get_client(dip);
7291 ct->ct_vprivate = data;
7292 }
7293 }
7294 /*
7295 * mdi_pi_get_vhci_private():
7296 * Get the vhci private information associated with the
7297 * mdi_pathinfo node
7298 */
7299 void *
mdi_pi_get_vhci_private(mdi_pathinfo_t * pip)7300 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
7301 {
7302 caddr_t vprivate = NULL;
7303 if (pip) {
7304 vprivate = MDI_PI(pip)->pi_vprivate;
7305 }
7306 return (vprivate);
7307 }
7308
7309 /*
7310 * mdi_pi_set_vhci_private():
7311 * Set the vhci private information in the mdi_pathinfo node
7312 */
7313 void
mdi_pi_set_vhci_private(mdi_pathinfo_t * pip,void * priv)7314 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
7315 {
7316 if (pip) {
7317 MDI_PI(pip)->pi_vprivate = priv;
7318 }
7319 }
7320
7321 /*
7322 * mdi_phci_get_vhci_private():
7323 * Get the vhci private information associated with the
7324 * mdi_phci node
7325 */
7326 void *
mdi_phci_get_vhci_private(dev_info_t * dip)7327 mdi_phci_get_vhci_private(dev_info_t *dip)
7328 {
7329 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7330 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7331 mdi_phci_t *ph;
7332 ph = i_devi_get_phci(dip);
7333 return (ph->ph_vprivate);
7334 }
7335 return (NULL);
7336 }
7337
7338 /*
7339 * mdi_phci_set_vhci_private():
7340 * Set the vhci private information in the mdi_phci node
7341 */
7342 void
mdi_phci_set_vhci_private(dev_info_t * dip,void * priv)7343 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
7344 {
7345 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7346 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7347 mdi_phci_t *ph;
7348 ph = i_devi_get_phci(dip);
7349 ph->ph_vprivate = priv;
7350 }
7351 }
7352
7353 int
mdi_pi_ishidden(mdi_pathinfo_t * pip)7354 mdi_pi_ishidden(mdi_pathinfo_t *pip)
7355 {
7356 return (MDI_PI_FLAGS_IS_HIDDEN(pip));
7357 }
7358
7359 int
mdi_pi_device_isremoved(mdi_pathinfo_t * pip)7360 mdi_pi_device_isremoved(mdi_pathinfo_t *pip)
7361 {
7362 return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip));
7363 }
7364
7365 /* Return 1 if all client paths are device_removed */
7366 static int
i_mdi_client_all_devices_removed(mdi_client_t * ct)7367 i_mdi_client_all_devices_removed(mdi_client_t *ct)
7368 {
7369 mdi_pathinfo_t *pip;
7370 int all_devices_removed = 1;
7371
7372 MDI_CLIENT_LOCK(ct);
7373 for (pip = ct->ct_path_head; pip;
7374 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) {
7375 if (!mdi_pi_device_isremoved(pip)) {
7376 all_devices_removed = 0;
7377 break;
7378 }
7379 }
7380 MDI_CLIENT_UNLOCK(ct);
7381 return (all_devices_removed);
7382 }
7383
7384 /*
7385 * When processing path hotunplug, represent device removal.
7386 */
7387 int
mdi_pi_device_remove(mdi_pathinfo_t * pip)7388 mdi_pi_device_remove(mdi_pathinfo_t *pip)
7389 {
7390 mdi_client_t *ct;
7391
7392 MDI_PI_LOCK(pip);
7393 if (mdi_pi_device_isremoved(pip)) {
7394 MDI_PI_UNLOCK(pip);
7395 return (0);
7396 }
7397 MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip);
7398 MDI_PI_FLAGS_SET_HIDDEN(pip);
7399 MDI_PI_UNLOCK(pip);
7400
7401 /*
7402 * If all paths associated with the client are now DEVICE_REMOVED,
7403 * reflect DEVICE_REMOVED in the client.
7404 */
7405 ct = MDI_PI(pip)->pi_client;
7406 if (ct && ct->ct_dip && i_mdi_client_all_devices_removed(ct))
7407 (void) ndi_devi_device_remove(ct->ct_dip);
7408 else
7409 i_ddi_di_cache_invalidate();
7410
7411 return (1);
7412 }
7413
7414 /*
7415 * When processing hotplug, if a path marked mdi_pi_device_isremoved()
7416 * is now accessible then this interfaces is used to represent device insertion.
7417 */
7418 int
mdi_pi_device_insert(mdi_pathinfo_t * pip)7419 mdi_pi_device_insert(mdi_pathinfo_t *pip)
7420 {
7421 MDI_PI_LOCK(pip);
7422 if (!mdi_pi_device_isremoved(pip)) {
7423 MDI_PI_UNLOCK(pip);
7424 return (0);
7425 }
7426 MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip);
7427 MDI_PI_FLAGS_CLR_HIDDEN(pip);
7428 MDI_PI_UNLOCK(pip);
7429
7430 i_ddi_di_cache_invalidate();
7431
7432 return (1);
7433 }
7434
7435 /*
7436 * List of vhci class names:
7437 * A vhci class name must be in this list only if the corresponding vhci
7438 * driver intends to use the mdi provided bus config implementation
7439 * (i.e., mdi_vhci_bus_config()).
7440 */
7441 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
7442 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *))
7443
7444 /*
7445 * During boot time, the on-disk vhci cache for every vhci class is read
7446 * in the form of an nvlist and stored here.
7447 */
7448 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
7449
7450 /* nvpair names in vhci cache nvlist */
7451 #define MDI_VHCI_CACHE_VERSION 1
7452 #define MDI_NVPNAME_VERSION "version"
7453 #define MDI_NVPNAME_PHCIS "phcis"
7454 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap"
7455
7456 /*
7457 * Given vhci class name, return its on-disk vhci cache filename.
7458 * Memory for the returned filename which includes the full path is allocated
7459 * by this function.
7460 */
7461 static char *
vhclass2vhcache_filename(char * vhclass)7462 vhclass2vhcache_filename(char *vhclass)
7463 {
7464 char *filename;
7465 int len;
7466 static char *fmt = "/etc/devices/mdi_%s_cache";
7467
7468 /*
7469 * fmt contains the on-disk vhci cache file name format;
7470 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
7471 */
7472
7473 /* the -1 below is to account for "%s" in the format string */
7474 len = strlen(fmt) + strlen(vhclass) - 1;
7475 filename = kmem_alloc(len, KM_SLEEP);
7476 (void) snprintf(filename, len, fmt, vhclass);
7477 ASSERT(len == (strlen(filename) + 1));
7478 return (filename);
7479 }
7480
7481 /*
7482 * initialize the vhci cache related data structures and read the on-disk
7483 * vhci cached data into memory.
7484 */
7485 static void
setup_vhci_cache(mdi_vhci_t * vh)7486 setup_vhci_cache(mdi_vhci_t *vh)
7487 {
7488 mdi_vhci_config_t *vhc;
7489 mdi_vhci_cache_t *vhcache;
7490 int i;
7491 nvlist_t *nvl = NULL;
7492
7493 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
7494 vh->vh_config = vhc;
7495 vhcache = &vhc->vhc_vhcache;
7496
7497 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
7498
7499 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
7500 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
7501
7502 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
7503
7504 /*
7505 * Create string hash; same as mod_hash_create_strhash() except that
7506 * we use NULL key destructor.
7507 */
7508 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
7509 mdi_bus_config_cache_hash_size,
7510 mod_hash_null_keydtor, mod_hash_null_valdtor,
7511 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
7512
7513 /*
7514 * The on-disk vhci cache is read during booting prior to the
7515 * lights-out period by mdi_read_devices_files().
7516 */
7517 for (i = 0; i < N_VHCI_CLASSES; i++) {
7518 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
7519 nvl = vhcache_nvl[i];
7520 vhcache_nvl[i] = NULL;
7521 break;
7522 }
7523 }
7524
7525 /*
7526 * this is to cover the case of some one manually causing unloading
7527 * (or detaching) and reloading (or attaching) of a vhci driver.
7528 */
7529 if (nvl == NULL && modrootloaded)
7530 nvl = read_on_disk_vhci_cache(vh->vh_class);
7531
7532 if (nvl != NULL) {
7533 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7534 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
7535 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
7536 else {
7537 cmn_err(CE_WARN,
7538 "%s: data file corrupted, will recreate",
7539 vhc->vhc_vhcache_filename);
7540 }
7541 rw_exit(&vhcache->vhcache_lock);
7542 nvlist_free(nvl);
7543 }
7544
7545 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
7546 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
7547
7548 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
7549 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
7550 }
7551
7552 /*
7553 * free all vhci cache related resources
7554 */
7555 static int
destroy_vhci_cache(mdi_vhci_t * vh)7556 destroy_vhci_cache(mdi_vhci_t *vh)
7557 {
7558 mdi_vhci_config_t *vhc = vh->vh_config;
7559 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7560 mdi_vhcache_phci_t *cphci, *cphci_next;
7561 mdi_vhcache_client_t *cct, *cct_next;
7562 mdi_vhcache_pathinfo_t *cpi, *cpi_next;
7563
7564 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
7565 return (MDI_FAILURE);
7566
7567 kmem_free(vhc->vhc_vhcache_filename,
7568 strlen(vhc->vhc_vhcache_filename) + 1);
7569
7570 mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
7571
7572 for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7573 cphci = cphci_next) {
7574 cphci_next = cphci->cphci_next;
7575 free_vhcache_phci(cphci);
7576 }
7577
7578 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
7579 cct_next = cct->cct_next;
7580 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
7581 cpi_next = cpi->cpi_next;
7582 free_vhcache_pathinfo(cpi);
7583 }
7584 free_vhcache_client(cct);
7585 }
7586
7587 rw_destroy(&vhcache->vhcache_lock);
7588
7589 mutex_destroy(&vhc->vhc_lock);
7590 cv_destroy(&vhc->vhc_cv);
7591 kmem_free(vhc, sizeof (mdi_vhci_config_t));
7592 return (MDI_SUCCESS);
7593 }
7594
7595 /*
7596 * Stop all vhci cache related async threads and free their resources.
7597 */
7598 static int
stop_vhcache_async_threads(mdi_vhci_config_t * vhc)7599 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
7600 {
7601 mdi_async_client_config_t *acc, *acc_next;
7602
7603 mutex_enter(&vhc->vhc_lock);
7604 vhc->vhc_flags |= MDI_VHC_EXIT;
7605 ASSERT(vhc->vhc_acc_thrcount >= 0);
7606 cv_broadcast(&vhc->vhc_cv);
7607
7608 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
7609 vhc->vhc_acc_thrcount != 0) {
7610 mutex_exit(&vhc->vhc_lock);
7611 delay_random(mdi_delay);
7612 mutex_enter(&vhc->vhc_lock);
7613 }
7614
7615 vhc->vhc_flags &= ~MDI_VHC_EXIT;
7616
7617 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
7618 acc_next = acc->acc_next;
7619 free_async_client_config(acc);
7620 }
7621 vhc->vhc_acc_list_head = NULL;
7622 vhc->vhc_acc_list_tail = NULL;
7623 vhc->vhc_acc_count = 0;
7624
7625 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7626 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7627 mutex_exit(&vhc->vhc_lock);
7628 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
7629 vhcache_dirty(vhc);
7630 return (MDI_FAILURE);
7631 }
7632 } else
7633 mutex_exit(&vhc->vhc_lock);
7634
7635 if (callb_delete(vhc->vhc_cbid) != 0)
7636 return (MDI_FAILURE);
7637
7638 return (MDI_SUCCESS);
7639 }
7640
7641 /*
7642 * Stop vhci cache flush thread
7643 */
7644 /* ARGSUSED */
7645 static boolean_t
stop_vhcache_flush_thread(void * arg,int code)7646 stop_vhcache_flush_thread(void *arg, int code)
7647 {
7648 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7649
7650 mutex_enter(&vhc->vhc_lock);
7651 vhc->vhc_flags |= MDI_VHC_EXIT;
7652 cv_broadcast(&vhc->vhc_cv);
7653
7654 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7655 mutex_exit(&vhc->vhc_lock);
7656 delay_random(mdi_delay);
7657 mutex_enter(&vhc->vhc_lock);
7658 }
7659
7660 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7661 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7662 mutex_exit(&vhc->vhc_lock);
7663 (void) flush_vhcache(vhc, 1);
7664 } else
7665 mutex_exit(&vhc->vhc_lock);
7666
7667 return (B_TRUE);
7668 }
7669
7670 /*
7671 * Enqueue the vhcache phci (cphci) at the tail of the list
7672 */
7673 static void
enqueue_vhcache_phci(mdi_vhci_cache_t * vhcache,mdi_vhcache_phci_t * cphci)7674 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
7675 {
7676 cphci->cphci_next = NULL;
7677 if (vhcache->vhcache_phci_head == NULL)
7678 vhcache->vhcache_phci_head = cphci;
7679 else
7680 vhcache->vhcache_phci_tail->cphci_next = cphci;
7681 vhcache->vhcache_phci_tail = cphci;
7682 }
7683
7684 /*
7685 * Enqueue the vhcache pathinfo (cpi) at the tail of the list
7686 */
7687 static void
enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t * cct,mdi_vhcache_pathinfo_t * cpi)7688 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7689 mdi_vhcache_pathinfo_t *cpi)
7690 {
7691 cpi->cpi_next = NULL;
7692 if (cct->cct_cpi_head == NULL)
7693 cct->cct_cpi_head = cpi;
7694 else
7695 cct->cct_cpi_tail->cpi_next = cpi;
7696 cct->cct_cpi_tail = cpi;
7697 }
7698
7699 /*
7700 * Enqueue the vhcache pathinfo (cpi) at the correct location in the
7701 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7702 * flag set come at the beginning of the list. All cpis which have this
7703 * flag set come at the end of the list.
7704 */
7705 static void
enqueue_vhcache_pathinfo(mdi_vhcache_client_t * cct,mdi_vhcache_pathinfo_t * newcpi)7706 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7707 mdi_vhcache_pathinfo_t *newcpi)
7708 {
7709 mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
7710
7711 if (cct->cct_cpi_head == NULL ||
7712 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
7713 enqueue_tail_vhcache_pathinfo(cct, newcpi);
7714 else {
7715 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
7716 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
7717 prev_cpi = cpi, cpi = cpi->cpi_next)
7718 ;
7719
7720 if (prev_cpi == NULL)
7721 cct->cct_cpi_head = newcpi;
7722 else
7723 prev_cpi->cpi_next = newcpi;
7724
7725 newcpi->cpi_next = cpi;
7726
7727 if (cpi == NULL)
7728 cct->cct_cpi_tail = newcpi;
7729 }
7730 }
7731
7732 /*
7733 * Enqueue the vhcache client (cct) at the tail of the list
7734 */
7735 static void
enqueue_vhcache_client(mdi_vhci_cache_t * vhcache,mdi_vhcache_client_t * cct)7736 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
7737 mdi_vhcache_client_t *cct)
7738 {
7739 cct->cct_next = NULL;
7740 if (vhcache->vhcache_client_head == NULL)
7741 vhcache->vhcache_client_head = cct;
7742 else
7743 vhcache->vhcache_client_tail->cct_next = cct;
7744 vhcache->vhcache_client_tail = cct;
7745 }
7746
7747 static void
free_string_array(char ** str,int nelem)7748 free_string_array(char **str, int nelem)
7749 {
7750 int i;
7751
7752 if (str) {
7753 for (i = 0; i < nelem; i++) {
7754 if (str[i])
7755 kmem_free(str[i], strlen(str[i]) + 1);
7756 }
7757 kmem_free(str, sizeof (char *) * nelem);
7758 }
7759 }
7760
7761 static void
free_vhcache_phci(mdi_vhcache_phci_t * cphci)7762 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
7763 {
7764 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
7765 kmem_free(cphci, sizeof (*cphci));
7766 }
7767
7768 static void
free_vhcache_pathinfo(mdi_vhcache_pathinfo_t * cpi)7769 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
7770 {
7771 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
7772 kmem_free(cpi, sizeof (*cpi));
7773 }
7774
7775 static void
free_vhcache_client(mdi_vhcache_client_t * cct)7776 free_vhcache_client(mdi_vhcache_client_t *cct)
7777 {
7778 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
7779 kmem_free(cct, sizeof (*cct));
7780 }
7781
7782 static char *
vhcache_mknameaddr(char * ct_name,char * ct_addr,int * ret_len)7783 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
7784 {
7785 char *name_addr;
7786 int len;
7787
7788 len = strlen(ct_name) + strlen(ct_addr) + 2;
7789 name_addr = kmem_alloc(len, KM_SLEEP);
7790 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
7791
7792 if (ret_len)
7793 *ret_len = len;
7794 return (name_addr);
7795 }
7796
7797 /*
7798 * Copy the contents of paddrnvl to vhci cache.
7799 * paddrnvl nvlist contains path information for a vhci client.
7800 * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
7801 */
7802 static void
paddrnvl_to_vhcache(nvlist_t * nvl,mdi_vhcache_phci_t * cphci_list[],mdi_vhcache_client_t * cct)7803 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
7804 mdi_vhcache_client_t *cct)
7805 {
7806 nvpair_t *nvp = NULL;
7807 mdi_vhcache_pathinfo_t *cpi;
7808 uint_t nelem;
7809 uint32_t *val;
7810
7811 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7812 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
7813 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7814 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7815 (void) nvpair_value_uint32_array(nvp, &val, &nelem);
7816 ASSERT(nelem == 2);
7817 cpi->cpi_cphci = cphci_list[val[0]];
7818 cpi->cpi_flags = val[1];
7819 enqueue_tail_vhcache_pathinfo(cct, cpi);
7820 }
7821 }
7822
7823 /*
7824 * Copy the contents of caddrmapnvl to vhci cache.
7825 * caddrmapnvl nvlist contains vhci client address to phci client address
7826 * mappings. See the comment in mainnvl_to_vhcache() for the format of
7827 * this nvlist.
7828 */
7829 static void
caddrmapnvl_to_vhcache(mdi_vhci_cache_t * vhcache,nvlist_t * nvl,mdi_vhcache_phci_t * cphci_list[])7830 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
7831 mdi_vhcache_phci_t *cphci_list[])
7832 {
7833 nvpair_t *nvp = NULL;
7834 nvlist_t *paddrnvl;
7835 mdi_vhcache_client_t *cct;
7836
7837 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7838 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
7839 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7840 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7841 (void) nvpair_value_nvlist(nvp, &paddrnvl);
7842 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
7843 /* the client must contain at least one path */
7844 ASSERT(cct->cct_cpi_head != NULL);
7845
7846 enqueue_vhcache_client(vhcache, cct);
7847 (void) mod_hash_insert(vhcache->vhcache_client_hash,
7848 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7849 }
7850 }
7851
7852 /*
7853 * Copy the contents of the main nvlist to vhci cache.
7854 *
7855 * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
7856 * The nvlist contains the mappings between the vhci client addresses and
7857 * their corresponding phci client addresses.
7858 *
7859 * The structure of the nvlist is as follows:
7860 *
7861 * Main nvlist:
7862 * NAME TYPE DATA
7863 * version int32 version number
7864 * phcis string array array of phci paths
7865 * clientaddrmap nvlist_t c2paddrs_nvl (see below)
7866 *
7867 * structure of c2paddrs_nvl:
7868 * NAME TYPE DATA
7869 * caddr1 nvlist_t paddrs_nvl1
7870 * caddr2 nvlist_t paddrs_nvl2
7871 * ...
7872 * where caddr1, caddr2, ... are vhci client name and addresses in the
7873 * form of "<clientname>@<clientaddress>".
7874 * (for example: "ssd@2000002037cd9f72");
7875 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
7876 *
7877 * structure of paddrs_nvl:
7878 * NAME TYPE DATA
7879 * pi_addr1 uint32_array (phci-id, cpi_flags)
7880 * pi_addr2 uint32_array (phci-id, cpi_flags)
7881 * ...
7882 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
7883 * (so called pi_addrs, for example: "w2100002037cd9f72,0");
7884 * phci-ids are integers that identify pHCIs to which the
7885 * the bus specific address belongs to. These integers are used as an index
7886 * into to the phcis string array in the main nvlist to get the pHCI path.
7887 */
7888 static int
mainnvl_to_vhcache(mdi_vhci_cache_t * vhcache,nvlist_t * nvl)7889 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
7890 {
7891 char **phcis, **phci_namep;
7892 uint_t nphcis;
7893 mdi_vhcache_phci_t *cphci, **cphci_list;
7894 nvlist_t *caddrmapnvl;
7895 int32_t ver;
7896 int i;
7897 size_t cphci_list_size;
7898
7899 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
7900
7901 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
7902 ver != MDI_VHCI_CACHE_VERSION)
7903 return (MDI_FAILURE);
7904
7905 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
7906 &nphcis) != 0)
7907 return (MDI_SUCCESS);
7908
7909 ASSERT(nphcis > 0);
7910
7911 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
7912 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
7913 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
7914 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
7915 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
7916 enqueue_vhcache_phci(vhcache, cphci);
7917 cphci_list[i] = cphci;
7918 }
7919
7920 ASSERT(vhcache->vhcache_phci_head != NULL);
7921
7922 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
7923 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
7924
7925 kmem_free(cphci_list, cphci_list_size);
7926 return (MDI_SUCCESS);
7927 }
7928
7929 /*
7930 * Build paddrnvl for the specified client using the information in the
7931 * vhci cache and add it to the caddrmapnnvl.
7932 * Returns 0 on success, errno on failure.
7933 */
7934 static int
vhcache_to_paddrnvl(mdi_vhci_cache_t * vhcache,mdi_vhcache_client_t * cct,nvlist_t * caddrmapnvl)7935 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7936 nvlist_t *caddrmapnvl)
7937 {
7938 mdi_vhcache_pathinfo_t *cpi;
7939 nvlist_t *nvl;
7940 int err;
7941 uint32_t val[2];
7942
7943 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7944
7945 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7946 return (err);
7947
7948 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7949 val[0] = cpi->cpi_cphci->cphci_id;
7950 val[1] = cpi->cpi_flags;
7951 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7952 != 0)
7953 goto out;
7954 }
7955
7956 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7957 out:
7958 nvlist_free(nvl);
7959 return (err);
7960 }
7961
7962 /*
7963 * Build caddrmapnvl using the information in the vhci cache
7964 * and add it to the mainnvl.
7965 * Returns 0 on success, errno on failure.
7966 */
7967 static int
vhcache_to_caddrmapnvl(mdi_vhci_cache_t * vhcache,nvlist_t * mainnvl)7968 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7969 {
7970 mdi_vhcache_client_t *cct;
7971 nvlist_t *nvl;
7972 int err;
7973
7974 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7975
7976 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7977 return (err);
7978
7979 for (cct = vhcache->vhcache_client_head; cct != NULL;
7980 cct = cct->cct_next) {
7981 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7982 goto out;
7983 }
7984
7985 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7986 out:
7987 nvlist_free(nvl);
7988 return (err);
7989 }
7990
7991 /*
7992 * Build nvlist using the information in the vhci cache.
7993 * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7994 * Returns nvl on success, NULL on failure.
7995 */
7996 static nvlist_t *
vhcache_to_mainnvl(mdi_vhci_cache_t * vhcache)7997 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7998 {
7999 mdi_vhcache_phci_t *cphci;
8000 uint_t phci_count;
8001 char **phcis;
8002 nvlist_t *nvl;
8003 int err, i;
8004
8005 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
8006 nvl = NULL;
8007 goto out;
8008 }
8009
8010 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
8011 MDI_VHCI_CACHE_VERSION)) != 0)
8012 goto out;
8013
8014 rw_enter(&vhcache->vhcache_lock, RW_READER);
8015 if (vhcache->vhcache_phci_head == NULL) {
8016 rw_exit(&vhcache->vhcache_lock);
8017 return (nvl);
8018 }
8019
8020 phci_count = 0;
8021 for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8022 cphci = cphci->cphci_next)
8023 cphci->cphci_id = phci_count++;
8024
8025 /* build phci pathname list */
8026 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
8027 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
8028 cphci = cphci->cphci_next, i++)
8029 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
8030
8031 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
8032 phci_count);
8033 free_string_array(phcis, phci_count);
8034
8035 if (err == 0 &&
8036 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
8037 rw_exit(&vhcache->vhcache_lock);
8038 return (nvl);
8039 }
8040
8041 rw_exit(&vhcache->vhcache_lock);
8042 out:
8043 nvlist_free(nvl);
8044 return (NULL);
8045 }
8046
8047 /*
8048 * Lookup vhcache phci structure for the specified phci path.
8049 */
8050 static mdi_vhcache_phci_t *
lookup_vhcache_phci_by_name(mdi_vhci_cache_t * vhcache,char * phci_path)8051 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
8052 {
8053 mdi_vhcache_phci_t *cphci;
8054
8055 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8056
8057 for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8058 cphci = cphci->cphci_next) {
8059 if (strcmp(cphci->cphci_path, phci_path) == 0)
8060 return (cphci);
8061 }
8062
8063 return (NULL);
8064 }
8065
8066 /*
8067 * Lookup vhcache phci structure for the specified phci.
8068 */
8069 static mdi_vhcache_phci_t *
lookup_vhcache_phci_by_addr(mdi_vhci_cache_t * vhcache,mdi_phci_t * ph)8070 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
8071 {
8072 mdi_vhcache_phci_t *cphci;
8073
8074 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8075
8076 for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8077 cphci = cphci->cphci_next) {
8078 if (cphci->cphci_phci == ph)
8079 return (cphci);
8080 }
8081
8082 return (NULL);
8083 }
8084
8085 /*
8086 * Add the specified phci to the vhci cache if not already present.
8087 */
8088 static void
vhcache_phci_add(mdi_vhci_config_t * vhc,mdi_phci_t * ph)8089 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8090 {
8091 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8092 mdi_vhcache_phci_t *cphci;
8093 char *pathname;
8094 int cache_updated;
8095
8096 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8097
8098 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8099 (void) ddi_pathname(ph->ph_dip, pathname);
8100 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
8101 != NULL) {
8102 cphci->cphci_phci = ph;
8103 cache_updated = 0;
8104 } else {
8105 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
8106 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
8107 cphci->cphci_phci = ph;
8108 enqueue_vhcache_phci(vhcache, cphci);
8109 cache_updated = 1;
8110 }
8111
8112 rw_exit(&vhcache->vhcache_lock);
8113
8114 /*
8115 * Since a new phci has been added, reset
8116 * vhc_path_discovery_cutoff_time to allow for discovery of paths
8117 * during next vhcache_discover_paths().
8118 */
8119 mutex_enter(&vhc->vhc_lock);
8120 vhc->vhc_path_discovery_cutoff_time = 0;
8121 mutex_exit(&vhc->vhc_lock);
8122
8123 kmem_free(pathname, MAXPATHLEN);
8124 if (cache_updated)
8125 vhcache_dirty(vhc);
8126 }
8127
8128 /*
8129 * Remove the reference to the specified phci from the vhci cache.
8130 */
8131 static void
vhcache_phci_remove(mdi_vhci_config_t * vhc,mdi_phci_t * ph)8132 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8133 {
8134 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8135 mdi_vhcache_phci_t *cphci;
8136
8137 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8138 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
8139 /* do not remove the actual mdi_vhcache_phci structure */
8140 cphci->cphci_phci = NULL;
8141 }
8142 rw_exit(&vhcache->vhcache_lock);
8143 }
8144
8145 static void
init_vhcache_lookup_token(mdi_vhcache_lookup_token_t * dst,mdi_vhcache_lookup_token_t * src)8146 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
8147 mdi_vhcache_lookup_token_t *src)
8148 {
8149 if (src == NULL) {
8150 dst->lt_cct = NULL;
8151 dst->lt_cct_lookup_time = 0;
8152 } else {
8153 dst->lt_cct = src->lt_cct;
8154 dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
8155 }
8156 }
8157
8158 /*
8159 * Look up vhcache client for the specified client.
8160 */
8161 static mdi_vhcache_client_t *
lookup_vhcache_client(mdi_vhci_cache_t * vhcache,char * ct_name,char * ct_addr,mdi_vhcache_lookup_token_t * token)8162 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
8163 mdi_vhcache_lookup_token_t *token)
8164 {
8165 mod_hash_val_t hv;
8166 char *name_addr;
8167 int len;
8168
8169 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8170
8171 /*
8172 * If no vhcache clean occurred since the last lookup, we can
8173 * simply return the cct from the last lookup operation.
8174 * It works because ccts are never freed except during the vhcache
8175 * cleanup operation.
8176 */
8177 if (token != NULL &&
8178 vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
8179 return (token->lt_cct);
8180
8181 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
8182 if (mod_hash_find(vhcache->vhcache_client_hash,
8183 (mod_hash_key_t)name_addr, &hv) == 0) {
8184 if (token) {
8185 token->lt_cct = (mdi_vhcache_client_t *)hv;
8186 token->lt_cct_lookup_time = ddi_get_lbolt64();
8187 }
8188 } else {
8189 if (token) {
8190 token->lt_cct = NULL;
8191 token->lt_cct_lookup_time = 0;
8192 }
8193 hv = NULL;
8194 }
8195 kmem_free(name_addr, len);
8196 return ((mdi_vhcache_client_t *)hv);
8197 }
8198
8199 /*
8200 * Add the specified path to the vhci cache if not already present.
8201 * Also add the vhcache client for the client corresponding to this path
8202 * if it doesn't already exist.
8203 */
8204 static void
vhcache_pi_add(mdi_vhci_config_t * vhc,struct mdi_pathinfo * pip)8205 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8206 {
8207 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8208 mdi_vhcache_client_t *cct;
8209 mdi_vhcache_pathinfo_t *cpi;
8210 mdi_phci_t *ph = pip->pi_phci;
8211 mdi_client_t *ct = pip->pi_client;
8212 int cache_updated = 0;
8213
8214 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8215
8216 /* if vhcache client for this pip doesn't already exist, add it */
8217 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8218 NULL)) == NULL) {
8219 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
8220 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
8221 ct->ct_guid, NULL);
8222 enqueue_vhcache_client(vhcache, cct);
8223 (void) mod_hash_insert(vhcache->vhcache_client_hash,
8224 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
8225 cache_updated = 1;
8226 }
8227
8228 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8229 if (cpi->cpi_cphci->cphci_phci == ph &&
8230 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
8231 cpi->cpi_pip = pip;
8232 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
8233 cpi->cpi_flags &=
8234 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8235 sort_vhcache_paths(cct);
8236 cache_updated = 1;
8237 }
8238 break;
8239 }
8240 }
8241
8242 if (cpi == NULL) {
8243 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
8244 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
8245 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
8246 ASSERT(cpi->cpi_cphci != NULL);
8247 cpi->cpi_pip = pip;
8248 enqueue_vhcache_pathinfo(cct, cpi);
8249 cache_updated = 1;
8250 }
8251
8252 rw_exit(&vhcache->vhcache_lock);
8253
8254 if (cache_updated)
8255 vhcache_dirty(vhc);
8256 }
8257
8258 /*
8259 * Remove the reference to the specified path from the vhci cache.
8260 */
8261 static void
vhcache_pi_remove(mdi_vhci_config_t * vhc,struct mdi_pathinfo * pip)8262 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8263 {
8264 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8265 mdi_client_t *ct = pip->pi_client;
8266 mdi_vhcache_client_t *cct;
8267 mdi_vhcache_pathinfo_t *cpi;
8268
8269 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8270 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8271 NULL)) != NULL) {
8272 for (cpi = cct->cct_cpi_head; cpi != NULL;
8273 cpi = cpi->cpi_next) {
8274 if (cpi->cpi_pip == pip) {
8275 cpi->cpi_pip = NULL;
8276 break;
8277 }
8278 }
8279 }
8280 rw_exit(&vhcache->vhcache_lock);
8281 }
8282
8283 /*
8284 * Flush the vhci cache to disk.
8285 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
8286 */
8287 static int
flush_vhcache(mdi_vhci_config_t * vhc,int force_flag)8288 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
8289 {
8290 nvlist_t *nvl;
8291 int err;
8292 int rv;
8293
8294 /*
8295 * It is possible that the system may shutdown before
8296 * i_ddi_io_initialized (during stmsboot for example). To allow for
8297 * flushing the cache in this case do not check for
8298 * i_ddi_io_initialized when force flag is set.
8299 */
8300 if (force_flag == 0 && !i_ddi_io_initialized())
8301 return (MDI_FAILURE);
8302
8303 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
8304 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
8305 nvlist_free(nvl);
8306 } else
8307 err = EFAULT;
8308
8309 rv = MDI_SUCCESS;
8310 mutex_enter(&vhc->vhc_lock);
8311 if (err != 0) {
8312 if (err == EROFS) {
8313 vhc->vhc_flags |= MDI_VHC_READONLY_FS;
8314 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
8315 MDI_VHC_VHCACHE_DIRTY);
8316 } else {
8317 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
8318 cmn_err(CE_CONT, "%s: update failed\n",
8319 vhc->vhc_vhcache_filename);
8320 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
8321 }
8322 rv = MDI_FAILURE;
8323 }
8324 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
8325 cmn_err(CE_CONT,
8326 "%s: update now ok\n", vhc->vhc_vhcache_filename);
8327 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
8328 }
8329 mutex_exit(&vhc->vhc_lock);
8330
8331 return (rv);
8332 }
8333
8334 /*
8335 * Call flush_vhcache() to flush the vhci cache at the scheduled time.
8336 * Exits itself if left idle for the idle timeout period.
8337 */
8338 static void
vhcache_flush_thread(void * arg)8339 vhcache_flush_thread(void *arg)
8340 {
8341 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8342 clock_t idle_time, quit_at_ticks;
8343 callb_cpr_t cprinfo;
8344
8345 /* number of seconds to sleep idle before exiting */
8346 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
8347
8348 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8349 "mdi_vhcache_flush");
8350 mutex_enter(&vhc->vhc_lock);
8351 for (; ; ) {
8352 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8353 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
8354 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
8355 CALLB_CPR_SAFE_BEGIN(&cprinfo);
8356 (void) cv_timedwait(&vhc->vhc_cv,
8357 &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
8358 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8359 } else {
8360 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
8361 mutex_exit(&vhc->vhc_lock);
8362
8363 if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
8364 vhcache_dirty(vhc);
8365
8366 mutex_enter(&vhc->vhc_lock);
8367 }
8368 }
8369
8370 quit_at_ticks = ddi_get_lbolt() + idle_time;
8371
8372 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8373 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
8374 ddi_get_lbolt() < quit_at_ticks) {
8375 CALLB_CPR_SAFE_BEGIN(&cprinfo);
8376 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8377 quit_at_ticks);
8378 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8379 }
8380
8381 if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8382 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
8383 goto out;
8384 }
8385
8386 out:
8387 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
8388 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8389 CALLB_CPR_EXIT(&cprinfo);
8390 }
8391
8392 /*
8393 * Make vhci cache dirty and schedule flushing by vhcache flush thread.
8394 */
8395 static void
vhcache_dirty(mdi_vhci_config_t * vhc)8396 vhcache_dirty(mdi_vhci_config_t *vhc)
8397 {
8398 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8399 int create_thread;
8400
8401 rw_enter(&vhcache->vhcache_lock, RW_READER);
8402 /* do not flush cache until the cache is fully built */
8403 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8404 rw_exit(&vhcache->vhcache_lock);
8405 return;
8406 }
8407 rw_exit(&vhcache->vhcache_lock);
8408
8409 mutex_enter(&vhc->vhc_lock);
8410 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
8411 mutex_exit(&vhc->vhc_lock);
8412 return;
8413 }
8414
8415 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
8416 vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
8417 mdi_vhcache_flush_delay * TICKS_PER_SECOND;
8418 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
8419 cv_broadcast(&vhc->vhc_cv);
8420 create_thread = 0;
8421 } else {
8422 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
8423 create_thread = 1;
8424 }
8425 mutex_exit(&vhc->vhc_lock);
8426
8427 if (create_thread)
8428 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
8429 0, &p0, TS_RUN, minclsyspri);
8430 }
8431
8432 /*
8433 * phci bus config structure - one for for each phci bus config operation that
8434 * we initiate on behalf of a vhci.
8435 */
8436 typedef struct mdi_phci_bus_config_s {
8437 char *phbc_phci_path;
8438 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */
8439 struct mdi_phci_bus_config_s *phbc_next;
8440 } mdi_phci_bus_config_t;
8441
8442 /* vhci bus config structure - one for each vhci bus config operation */
8443 typedef struct mdi_vhci_bus_config_s {
8444 ddi_bus_config_op_t vhbc_op; /* bus config op */
8445 major_t vhbc_op_major; /* bus config op major */
8446 uint_t vhbc_op_flags; /* bus config op flags */
8447 kmutex_t vhbc_lock;
8448 kcondvar_t vhbc_cv;
8449 int vhbc_thr_count;
8450 } mdi_vhci_bus_config_t;
8451
8452 /*
8453 * bus config the specified phci
8454 */
8455 static void
bus_config_phci(void * arg)8456 bus_config_phci(void *arg)
8457 {
8458 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
8459 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
8460 dev_info_t *ph_dip;
8461
8462 /*
8463 * first configure all path components upto phci and then configure
8464 * the phci children.
8465 */
8466 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
8467 != NULL) {
8468 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
8469 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
8470 (void) ndi_devi_config_driver(ph_dip,
8471 vhbc->vhbc_op_flags,
8472 vhbc->vhbc_op_major);
8473 } else
8474 (void) ndi_devi_config(ph_dip,
8475 vhbc->vhbc_op_flags);
8476
8477 /* release the hold that e_ddi_hold_devi_by_path() placed */
8478 ndi_rele_devi(ph_dip);
8479 }
8480
8481 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
8482 kmem_free(phbc, sizeof (*phbc));
8483
8484 mutex_enter(&vhbc->vhbc_lock);
8485 vhbc->vhbc_thr_count--;
8486 if (vhbc->vhbc_thr_count == 0)
8487 cv_broadcast(&vhbc->vhbc_cv);
8488 mutex_exit(&vhbc->vhbc_lock);
8489 }
8490
8491 /*
8492 * Bus config all phcis associated with the vhci in parallel.
8493 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
8494 */
8495 static void
bus_config_all_phcis(mdi_vhci_cache_t * vhcache,uint_t flags,ddi_bus_config_op_t op,major_t maj)8496 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
8497 ddi_bus_config_op_t op, major_t maj)
8498 {
8499 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
8500 mdi_vhci_bus_config_t *vhbc;
8501 mdi_vhcache_phci_t *cphci;
8502
8503 rw_enter(&vhcache->vhcache_lock, RW_READER);
8504 if (vhcache->vhcache_phci_head == NULL) {
8505 rw_exit(&vhcache->vhcache_lock);
8506 return;
8507 }
8508
8509 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
8510
8511 for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8512 cphci = cphci->cphci_next) {
8513 /* skip phcis that haven't attached before root is available */
8514 if (!modrootloaded && (cphci->cphci_phci == NULL))
8515 continue;
8516 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
8517 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
8518 KM_SLEEP);
8519 phbc->phbc_vhbusconfig = vhbc;
8520 phbc->phbc_next = phbc_head;
8521 phbc_head = phbc;
8522 vhbc->vhbc_thr_count++;
8523 }
8524 rw_exit(&vhcache->vhcache_lock);
8525
8526 vhbc->vhbc_op = op;
8527 vhbc->vhbc_op_major = maj;
8528 vhbc->vhbc_op_flags = NDI_NO_EVENT |
8529 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
8530 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
8531 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
8532
8533 /* now create threads to initiate bus config on all phcis in parallel */
8534 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
8535 phbc_next = phbc->phbc_next;
8536 if (mdi_mtc_off)
8537 bus_config_phci((void *)phbc);
8538 else
8539 (void) thread_create(NULL, 0, bus_config_phci, phbc,
8540 0, &p0, TS_RUN, minclsyspri);
8541 }
8542
8543 mutex_enter(&vhbc->vhbc_lock);
8544 /* wait until all threads exit */
8545 while (vhbc->vhbc_thr_count > 0)
8546 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
8547 mutex_exit(&vhbc->vhbc_lock);
8548
8549 mutex_destroy(&vhbc->vhbc_lock);
8550 cv_destroy(&vhbc->vhbc_cv);
8551 kmem_free(vhbc, sizeof (*vhbc));
8552 }
8553
8554 /*
8555 * Single threaded version of bus_config_all_phcis()
8556 */
8557 static void
st_bus_config_all_phcis(mdi_vhci_config_t * vhc,uint_t flags,ddi_bus_config_op_t op,major_t maj)8558 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
8559 ddi_bus_config_op_t op, major_t maj)
8560 {
8561 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8562
8563 single_threaded_vhconfig_enter(vhc);
8564 bus_config_all_phcis(vhcache, flags, op, maj);
8565 single_threaded_vhconfig_exit(vhc);
8566 }
8567
8568 /*
8569 * Perform BUS_CONFIG_ONE on the specified child of the phci.
8570 * The path includes the child component in addition to the phci path.
8571 */
8572 static int
bus_config_one_phci_child(char * path)8573 bus_config_one_phci_child(char *path)
8574 {
8575 dev_info_t *ph_dip, *child;
8576 char *devnm;
8577 int rv = MDI_FAILURE;
8578
8579 /* extract the child component of the phci */
8580 devnm = strrchr(path, '/');
8581 *devnm++ = '\0';
8582
8583 /*
8584 * first configure all path components upto phci and then
8585 * configure the phci child.
8586 */
8587 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
8588 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
8589 NDI_SUCCESS) {
8590 /*
8591 * release the hold that ndi_devi_config_one() placed
8592 */
8593 ndi_rele_devi(child);
8594 rv = MDI_SUCCESS;
8595 }
8596
8597 /* release the hold that e_ddi_hold_devi_by_path() placed */
8598 ndi_rele_devi(ph_dip);
8599 }
8600
8601 devnm--;
8602 *devnm = '/';
8603 return (rv);
8604 }
8605
8606 /*
8607 * Build a list of phci client paths for the specified vhci client.
8608 * The list includes only those phci client paths which aren't configured yet.
8609 */
8610 static mdi_phys_path_t *
build_phclient_path_list(mdi_vhcache_client_t * cct,char * ct_name)8611 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
8612 {
8613 mdi_vhcache_pathinfo_t *cpi;
8614 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
8615 int config_path, len;
8616
8617 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8618 /*
8619 * include only those paths that aren't configured.
8620 */
8621 config_path = 0;
8622 if (cpi->cpi_pip == NULL)
8623 config_path = 1;
8624 else {
8625 MDI_PI_LOCK(cpi->cpi_pip);
8626 if (MDI_PI_IS_INIT(cpi->cpi_pip))
8627 config_path = 1;
8628 MDI_PI_UNLOCK(cpi->cpi_pip);
8629 }
8630
8631 if (config_path) {
8632 pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
8633 len = strlen(cpi->cpi_cphci->cphci_path) +
8634 strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
8635 pp->phys_path = kmem_alloc(len, KM_SLEEP);
8636 (void) snprintf(pp->phys_path, len, "%s/%s@%s",
8637 cpi->cpi_cphci->cphci_path, ct_name,
8638 cpi->cpi_addr);
8639 pp->phys_path_next = NULL;
8640
8641 if (pp_head == NULL)
8642 pp_head = pp;
8643 else
8644 pp_tail->phys_path_next = pp;
8645 pp_tail = pp;
8646 }
8647 }
8648
8649 return (pp_head);
8650 }
8651
8652 /*
8653 * Free the memory allocated for phci client path list.
8654 */
8655 static void
free_phclient_path_list(mdi_phys_path_t * pp_head)8656 free_phclient_path_list(mdi_phys_path_t *pp_head)
8657 {
8658 mdi_phys_path_t *pp, *pp_next;
8659
8660 for (pp = pp_head; pp != NULL; pp = pp_next) {
8661 pp_next = pp->phys_path_next;
8662 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
8663 kmem_free(pp, sizeof (*pp));
8664 }
8665 }
8666
8667 /*
8668 * Allocated async client structure and initialize with the specified values.
8669 */
8670 static mdi_async_client_config_t *
alloc_async_client_config(char * ct_name,char * ct_addr,mdi_phys_path_t * pp_head,mdi_vhcache_lookup_token_t * tok)8671 alloc_async_client_config(char *ct_name, char *ct_addr,
8672 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8673 {
8674 mdi_async_client_config_t *acc;
8675
8676 acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
8677 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
8678 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
8679 acc->acc_phclient_path_list_head = pp_head;
8680 init_vhcache_lookup_token(&acc->acc_token, tok);
8681 acc->acc_next = NULL;
8682 return (acc);
8683 }
8684
8685 /*
8686 * Free the memory allocated for the async client structure and their members.
8687 */
8688 static void
free_async_client_config(mdi_async_client_config_t * acc)8689 free_async_client_config(mdi_async_client_config_t *acc)
8690 {
8691 if (acc->acc_phclient_path_list_head)
8692 free_phclient_path_list(acc->acc_phclient_path_list_head);
8693 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
8694 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
8695 kmem_free(acc, sizeof (*acc));
8696 }
8697
8698 /*
8699 * Sort vhcache pathinfos (cpis) of the specified client.
8700 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
8701 * flag set come at the beginning of the list. All cpis which have this
8702 * flag set come at the end of the list.
8703 */
8704 static void
sort_vhcache_paths(mdi_vhcache_client_t * cct)8705 sort_vhcache_paths(mdi_vhcache_client_t *cct)
8706 {
8707 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
8708
8709 cpi_head = cct->cct_cpi_head;
8710 cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8711 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8712 cpi_next = cpi->cpi_next;
8713 enqueue_vhcache_pathinfo(cct, cpi);
8714 }
8715 }
8716
8717 /*
8718 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
8719 * every vhcache pathinfo of the specified client. If not adjust the flag
8720 * setting appropriately.
8721 *
8722 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
8723 * on-disk vhci cache. So every time this flag is updated the cache must be
8724 * flushed.
8725 */
8726 static void
adjust_sort_vhcache_paths(mdi_vhci_config_t * vhc,char * ct_name,char * ct_addr,mdi_vhcache_lookup_token_t * tok)8727 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8728 mdi_vhcache_lookup_token_t *tok)
8729 {
8730 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8731 mdi_vhcache_client_t *cct;
8732 mdi_vhcache_pathinfo_t *cpi;
8733
8734 rw_enter(&vhcache->vhcache_lock, RW_READER);
8735 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
8736 == NULL) {
8737 rw_exit(&vhcache->vhcache_lock);
8738 return;
8739 }
8740
8741 /*
8742 * to avoid unnecessary on-disk cache updates, first check if an
8743 * update is really needed. If no update is needed simply return.
8744 */
8745 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8746 if ((cpi->cpi_pip != NULL &&
8747 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
8748 (cpi->cpi_pip == NULL &&
8749 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
8750 break;
8751 }
8752 }
8753 if (cpi == NULL) {
8754 rw_exit(&vhcache->vhcache_lock);
8755 return;
8756 }
8757
8758 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
8759 rw_exit(&vhcache->vhcache_lock);
8760 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8761 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
8762 tok)) == NULL) {
8763 rw_exit(&vhcache->vhcache_lock);
8764 return;
8765 }
8766 }
8767
8768 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8769 if (cpi->cpi_pip != NULL)
8770 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8771 else
8772 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8773 }
8774 sort_vhcache_paths(cct);
8775
8776 rw_exit(&vhcache->vhcache_lock);
8777 vhcache_dirty(vhc);
8778 }
8779
8780 /*
8781 * Configure all specified paths of the client.
8782 */
8783 static void
config_client_paths_sync(mdi_vhci_config_t * vhc,char * ct_name,char * ct_addr,mdi_phys_path_t * pp_head,mdi_vhcache_lookup_token_t * tok)8784 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8785 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8786 {
8787 mdi_phys_path_t *pp;
8788
8789 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
8790 (void) bus_config_one_phci_child(pp->phys_path);
8791 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
8792 }
8793
8794 /*
8795 * Dequeue elements from vhci async client config list and bus configure
8796 * their corresponding phci clients.
8797 */
8798 static void
config_client_paths_thread(void * arg)8799 config_client_paths_thread(void *arg)
8800 {
8801 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8802 mdi_async_client_config_t *acc;
8803 clock_t quit_at_ticks;
8804 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
8805 callb_cpr_t cprinfo;
8806
8807 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8808 "mdi_config_client_paths");
8809
8810 for (; ; ) {
8811 quit_at_ticks = ddi_get_lbolt() + idle_time;
8812
8813 mutex_enter(&vhc->vhc_lock);
8814 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8815 vhc->vhc_acc_list_head == NULL &&
8816 ddi_get_lbolt() < quit_at_ticks) {
8817 CALLB_CPR_SAFE_BEGIN(&cprinfo);
8818 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8819 quit_at_ticks);
8820 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8821 }
8822
8823 if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8824 vhc->vhc_acc_list_head == NULL)
8825 goto out;
8826
8827 acc = vhc->vhc_acc_list_head;
8828 vhc->vhc_acc_list_head = acc->acc_next;
8829 if (vhc->vhc_acc_list_head == NULL)
8830 vhc->vhc_acc_list_tail = NULL;
8831 vhc->vhc_acc_count--;
8832 mutex_exit(&vhc->vhc_lock);
8833
8834 config_client_paths_sync(vhc, acc->acc_ct_name,
8835 acc->acc_ct_addr, acc->acc_phclient_path_list_head,
8836 &acc->acc_token);
8837
8838 free_async_client_config(acc);
8839 }
8840
8841 out:
8842 vhc->vhc_acc_thrcount--;
8843 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8844 CALLB_CPR_EXIT(&cprinfo);
8845 }
8846
8847 /*
8848 * Arrange for all the phci client paths (pp_head) for the specified client
8849 * to be bus configured asynchronously by a thread.
8850 */
8851 static void
config_client_paths_async(mdi_vhci_config_t * vhc,char * ct_name,char * ct_addr,mdi_phys_path_t * pp_head,mdi_vhcache_lookup_token_t * tok)8852 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8853 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8854 {
8855 mdi_async_client_config_t *acc, *newacc;
8856 int create_thread;
8857
8858 if (pp_head == NULL)
8859 return;
8860
8861 if (mdi_mtc_off) {
8862 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
8863 free_phclient_path_list(pp_head);
8864 return;
8865 }
8866
8867 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
8868 ASSERT(newacc);
8869
8870 mutex_enter(&vhc->vhc_lock);
8871 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
8872 if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
8873 strcmp(ct_addr, acc->acc_ct_addr) == 0) {
8874 free_async_client_config(newacc);
8875 mutex_exit(&vhc->vhc_lock);
8876 return;
8877 }
8878 }
8879
8880 if (vhc->vhc_acc_list_head == NULL)
8881 vhc->vhc_acc_list_head = newacc;
8882 else
8883 vhc->vhc_acc_list_tail->acc_next = newacc;
8884 vhc->vhc_acc_list_tail = newacc;
8885 vhc->vhc_acc_count++;
8886 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
8887 cv_broadcast(&vhc->vhc_cv);
8888 create_thread = 0;
8889 } else {
8890 vhc->vhc_acc_thrcount++;
8891 create_thread = 1;
8892 }
8893 mutex_exit(&vhc->vhc_lock);
8894
8895 if (create_thread)
8896 (void) thread_create(NULL, 0, config_client_paths_thread, vhc,
8897 0, &p0, TS_RUN, minclsyspri);
8898 }
8899
8900 /*
8901 * Return number of online paths for the specified client.
8902 */
8903 static int
nonline_paths(mdi_vhcache_client_t * cct)8904 nonline_paths(mdi_vhcache_client_t *cct)
8905 {
8906 mdi_vhcache_pathinfo_t *cpi;
8907 int online_count = 0;
8908
8909 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8910 if (cpi->cpi_pip != NULL) {
8911 MDI_PI_LOCK(cpi->cpi_pip);
8912 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
8913 online_count++;
8914 MDI_PI_UNLOCK(cpi->cpi_pip);
8915 }
8916 }
8917
8918 return (online_count);
8919 }
8920
8921 /*
8922 * Bus configure all paths for the specified vhci client.
8923 * If at least one path for the client is already online, the remaining paths
8924 * will be configured asynchronously. Otherwise, it synchronously configures
8925 * the paths until at least one path is online and then rest of the paths
8926 * will be configured asynchronously.
8927 */
8928 static void
config_client_paths(mdi_vhci_config_t * vhc,char * ct_name,char * ct_addr)8929 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
8930 {
8931 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8932 mdi_phys_path_t *pp_head, *pp;
8933 mdi_vhcache_client_t *cct;
8934 mdi_vhcache_lookup_token_t tok;
8935
8936 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8937
8938 init_vhcache_lookup_token(&tok, NULL);
8939
8940 if (ct_name == NULL || ct_addr == NULL ||
8941 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8942 == NULL ||
8943 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8944 rw_exit(&vhcache->vhcache_lock);
8945 return;
8946 }
8947
8948 /* if at least one path is online, configure the rest asynchronously */
8949 if (nonline_paths(cct) > 0) {
8950 rw_exit(&vhcache->vhcache_lock);
8951 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8952 return;
8953 }
8954
8955 rw_exit(&vhcache->vhcache_lock);
8956
8957 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8958 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8959 rw_enter(&vhcache->vhcache_lock, RW_READER);
8960
8961 if ((cct = lookup_vhcache_client(vhcache, ct_name,
8962 ct_addr, &tok)) == NULL) {
8963 rw_exit(&vhcache->vhcache_lock);
8964 goto out;
8965 }
8966
8967 if (nonline_paths(cct) > 0 &&
8968 pp->phys_path_next != NULL) {
8969 rw_exit(&vhcache->vhcache_lock);
8970 config_client_paths_async(vhc, ct_name, ct_addr,
8971 pp->phys_path_next, &tok);
8972 pp->phys_path_next = NULL;
8973 goto out;
8974 }
8975
8976 rw_exit(&vhcache->vhcache_lock);
8977 }
8978 }
8979
8980 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8981 out:
8982 free_phclient_path_list(pp_head);
8983 }
8984
8985 static void
single_threaded_vhconfig_enter(mdi_vhci_config_t * vhc)8986 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8987 {
8988 mutex_enter(&vhc->vhc_lock);
8989 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8990 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8991 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8992 mutex_exit(&vhc->vhc_lock);
8993 }
8994
8995 static void
single_threaded_vhconfig_exit(mdi_vhci_config_t * vhc)8996 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8997 {
8998 mutex_enter(&vhc->vhc_lock);
8999 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
9000 cv_broadcast(&vhc->vhc_cv);
9001 mutex_exit(&vhc->vhc_lock);
9002 }
9003
9004 typedef struct mdi_phci_driver_info {
9005 char *phdriver_name; /* name of the phci driver */
9006
9007 /* set to non zero if the phci driver supports root device */
9008 int phdriver_root_support;
9009 } mdi_phci_driver_info_t;
9010
9011 /*
9012 * vhci class and root support capability of a phci driver can be
9013 * specified using ddi-vhci-class and ddi-no-root-support properties in the
9014 * phci driver.conf file. The built-in tables below contain this information
9015 * for those phci drivers whose driver.conf files don't yet contain this info.
9016 *
9017 * All phci drivers expect iscsi have root device support.
9018 */
9019 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
9020 { "fp", 1 },
9021 { "iscsi", 0 },
9022 { "ibsrp", 1 }
9023 };
9024
9025 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
9026
9027 static void *
mdi_realloc(void * old_ptr,size_t old_size,size_t new_size)9028 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size)
9029 {
9030 void *new_ptr;
9031
9032 new_ptr = kmem_zalloc(new_size, KM_SLEEP);
9033 if (old_ptr) {
9034 bcopy(old_ptr, new_ptr, MIN(old_size, new_size));
9035 kmem_free(old_ptr, old_size);
9036 }
9037 return (new_ptr);
9038 }
9039
9040 static void
add_to_phci_list(char *** driver_list,int ** root_support_list,int * cur_elements,int * max_elements,char * driver_name,int root_support)9041 add_to_phci_list(char ***driver_list, int **root_support_list,
9042 int *cur_elements, int *max_elements, char *driver_name, int root_support)
9043 {
9044 ASSERT(*cur_elements <= *max_elements);
9045 if (*cur_elements == *max_elements) {
9046 *max_elements += 10;
9047 *driver_list = mdi_realloc(*driver_list,
9048 sizeof (char *) * (*cur_elements),
9049 sizeof (char *) * (*max_elements));
9050 *root_support_list = mdi_realloc(*root_support_list,
9051 sizeof (int) * (*cur_elements),
9052 sizeof (int) * (*max_elements));
9053 }
9054 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP);
9055 (*root_support_list)[*cur_elements] = root_support;
9056 (*cur_elements)++;
9057 }
9058
9059 static void
get_phci_driver_list(char * vhci_class,char *** driver_list,int ** root_support_list,int * cur_elements,int * max_elements)9060 get_phci_driver_list(char *vhci_class, char ***driver_list,
9061 int **root_support_list, int *cur_elements, int *max_elements)
9062 {
9063 mdi_phci_driver_info_t *st_driver_list, *p;
9064 int st_ndrivers, root_support, i, j, driver_conf_count;
9065 major_t m;
9066 struct devnames *dnp;
9067 ddi_prop_t *propp;
9068
9069 *driver_list = NULL;
9070 *root_support_list = NULL;
9071 *cur_elements = 0;
9072 *max_elements = 0;
9073
9074 /* add the phci drivers derived from the phci driver.conf files */
9075 for (m = 0; m < devcnt; m++) {
9076 dnp = &devnamesp[m];
9077
9078 if (dnp->dn_flags & DN_PHCI_DRIVER) {
9079 LOCK_DEV_OPS(&dnp->dn_lock);
9080 if (dnp->dn_global_prop_ptr != NULL &&
9081 (propp = i_ddi_prop_search(DDI_DEV_T_ANY,
9082 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING,
9083 &dnp->dn_global_prop_ptr->prop_list)) != NULL &&
9084 strcmp(propp->prop_val, vhci_class) == 0) {
9085
9086 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY,
9087 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT,
9088 &dnp->dn_global_prop_ptr->prop_list)
9089 == NULL) ? 1 : 0;
9090
9091 add_to_phci_list(driver_list, root_support_list,
9092 cur_elements, max_elements, dnp->dn_name,
9093 root_support);
9094
9095 UNLOCK_DEV_OPS(&dnp->dn_lock);
9096 } else
9097 UNLOCK_DEV_OPS(&dnp->dn_lock);
9098 }
9099 }
9100
9101 driver_conf_count = *cur_elements;
9102
9103 /* add the phci drivers specified in the built-in tables */
9104 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) {
9105 st_driver_list = scsi_phci_driver_list;
9106 st_ndrivers = sizeof (scsi_phci_driver_list) /
9107 sizeof (mdi_phci_driver_info_t);
9108 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) {
9109 st_driver_list = ib_phci_driver_list;
9110 st_ndrivers = sizeof (ib_phci_driver_list) /
9111 sizeof (mdi_phci_driver_info_t);
9112 } else {
9113 st_driver_list = NULL;
9114 st_ndrivers = 0;
9115 }
9116
9117 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) {
9118 /* add this phci driver if not already added before */
9119 for (j = 0; j < driver_conf_count; j++) {
9120 if (strcmp((*driver_list)[j], p->phdriver_name) == 0)
9121 break;
9122 }
9123 if (j == driver_conf_count) {
9124 add_to_phci_list(driver_list, root_support_list,
9125 cur_elements, max_elements, p->phdriver_name,
9126 p->phdriver_root_support);
9127 }
9128 }
9129 }
9130
9131 /*
9132 * Attach the phci driver instances associated with the specified vhci class.
9133 * If root is mounted attach all phci driver instances.
9134 * If root is not mounted, attach the instances of only those phci
9135 * drivers that have the root support.
9136 */
9137 static void
attach_phci_drivers(char * vhci_class)9138 attach_phci_drivers(char *vhci_class)
9139 {
9140 char **driver_list, **p;
9141 int *root_support_list;
9142 int cur_elements, max_elements, i;
9143 major_t m;
9144
9145 get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9146 &cur_elements, &max_elements);
9147
9148 for (i = 0; i < cur_elements; i++) {
9149 if (modrootloaded || root_support_list[i]) {
9150 m = ddi_name_to_major(driver_list[i]);
9151 if (m != DDI_MAJOR_T_NONE &&
9152 ddi_hold_installed_driver(m))
9153 ddi_rele_driver(m);
9154 }
9155 }
9156
9157 if (driver_list) {
9158 for (i = 0, p = driver_list; i < cur_elements; i++, p++)
9159 kmem_free(*p, strlen(*p) + 1);
9160 kmem_free(driver_list, sizeof (char *) * max_elements);
9161 kmem_free(root_support_list, sizeof (int) * max_elements);
9162 }
9163 }
9164
9165 /*
9166 * Build vhci cache:
9167 *
9168 * Attach phci driver instances and then drive BUS_CONFIG_ALL on
9169 * the phci driver instances. During this process the cache gets built.
9170 *
9171 * Cache is built fully if the root is mounted.
9172 * If the root is not mounted, phci drivers that do not have root support
9173 * are not attached. As a result the cache is built partially. The entries
9174 * in the cache reflect only those phci drivers that have root support.
9175 */
9176 static int
build_vhci_cache(mdi_vhci_t * vh)9177 build_vhci_cache(mdi_vhci_t *vh)
9178 {
9179 mdi_vhci_config_t *vhc = vh->vh_config;
9180 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9181
9182 single_threaded_vhconfig_enter(vhc);
9183
9184 rw_enter(&vhcache->vhcache_lock, RW_READER);
9185 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
9186 rw_exit(&vhcache->vhcache_lock);
9187 single_threaded_vhconfig_exit(vhc);
9188 return (0);
9189 }
9190 rw_exit(&vhcache->vhcache_lock);
9191
9192 attach_phci_drivers(vh->vh_class);
9193 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
9194 BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9195
9196 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9197 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
9198 rw_exit(&vhcache->vhcache_lock);
9199
9200 single_threaded_vhconfig_exit(vhc);
9201 vhcache_dirty(vhc);
9202 return (1);
9203 }
9204
9205 /*
9206 * Determine if discovery of paths is needed.
9207 */
9208 static int
vhcache_do_discovery(mdi_vhci_config_t * vhc)9209 vhcache_do_discovery(mdi_vhci_config_t *vhc)
9210 {
9211 int rv = 1;
9212
9213 mutex_enter(&vhc->vhc_lock);
9214 if (i_ddi_io_initialized() == 0) {
9215 if (vhc->vhc_path_discovery_boot > 0) {
9216 vhc->vhc_path_discovery_boot--;
9217 goto out;
9218 }
9219 } else {
9220 if (vhc->vhc_path_discovery_postboot > 0) {
9221 vhc->vhc_path_discovery_postboot--;
9222 goto out;
9223 }
9224 }
9225
9226 /*
9227 * Do full path discovery at most once per mdi_path_discovery_interval.
9228 * This is to avoid a series of full path discoveries when opening
9229 * stale /dev/[r]dsk links.
9230 */
9231 if (mdi_path_discovery_interval != -1 &&
9232 ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time)
9233 goto out;
9234
9235 rv = 0;
9236 out:
9237 mutex_exit(&vhc->vhc_lock);
9238 return (rv);
9239 }
9240
9241 /*
9242 * Discover all paths:
9243 *
9244 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
9245 * driver instances. During this process all paths will be discovered.
9246 */
9247 static int
vhcache_discover_paths(mdi_vhci_t * vh)9248 vhcache_discover_paths(mdi_vhci_t *vh)
9249 {
9250 mdi_vhci_config_t *vhc = vh->vh_config;
9251 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9252 int rv = 0;
9253
9254 single_threaded_vhconfig_enter(vhc);
9255
9256 if (vhcache_do_discovery(vhc)) {
9257 attach_phci_drivers(vh->vh_class);
9258 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
9259 NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9260
9261 mutex_enter(&vhc->vhc_lock);
9262 vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() +
9263 mdi_path_discovery_interval * TICKS_PER_SECOND;
9264 mutex_exit(&vhc->vhc_lock);
9265 rv = 1;
9266 }
9267
9268 single_threaded_vhconfig_exit(vhc);
9269 return (rv);
9270 }
9271
9272 /*
9273 * Generic vhci bus config implementation:
9274 *
9275 * Parameters
9276 * vdip vhci dip
9277 * flags bus config flags
9278 * op bus config operation
9279 * The remaining parameters are bus config operation specific
9280 *
9281 * for BUS_CONFIG_ONE
9282 * arg pointer to name@addr
9283 * child upon successful return from this function, *child will be
9284 * set to the configured and held devinfo child node of vdip.
9285 * ct_addr pointer to client address (i.e. GUID)
9286 *
9287 * for BUS_CONFIG_DRIVER
9288 * arg major number of the driver
9289 * child and ct_addr parameters are ignored
9290 *
9291 * for BUS_CONFIG_ALL
9292 * arg, child, and ct_addr parameters are ignored
9293 *
9294 * Note that for the rest of the bus config operations, this function simply
9295 * calls the framework provided default bus config routine.
9296 */
9297 int
mdi_vhci_bus_config(dev_info_t * vdip,uint_t flags,ddi_bus_config_op_t op,void * arg,dev_info_t ** child,char * ct_addr)9298 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
9299 void *arg, dev_info_t **child, char *ct_addr)
9300 {
9301 mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9302 mdi_vhci_config_t *vhc = vh->vh_config;
9303 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9304 int rv = 0;
9305 int params_valid = 0;
9306 char *cp;
9307
9308 /*
9309 * To bus config vhcis we relay operation, possibly using another
9310 * thread, to phcis. The phci driver then interacts with MDI to cause
9311 * vhci child nodes to be enumerated under the vhci node. Adding a
9312 * vhci child requires an ndi_devi_enter of the vhci. Since another
9313 * thread may be adding the child, to avoid deadlock we can't wait
9314 * for the relayed operations to complete if we have already entered
9315 * the vhci node.
9316 */
9317 if (DEVI_BUSY_OWNED(vdip)) {
9318 MDI_DEBUG(2, (MDI_NOTE, vdip,
9319 "vhci dip is busy owned %p", (void *)vdip));
9320 goto default_bus_config;
9321 }
9322
9323 rw_enter(&vhcache->vhcache_lock, RW_READER);
9324 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
9325 rw_exit(&vhcache->vhcache_lock);
9326 rv = build_vhci_cache(vh);
9327 rw_enter(&vhcache->vhcache_lock, RW_READER);
9328 }
9329
9330 switch (op) {
9331 case BUS_CONFIG_ONE:
9332 if (arg != NULL && ct_addr != NULL) {
9333 /* extract node name */
9334 cp = (char *)arg;
9335 while (*cp != '\0' && *cp != '@')
9336 cp++;
9337 if (*cp == '@') {
9338 params_valid = 1;
9339 *cp = '\0';
9340 config_client_paths(vhc, (char *)arg, ct_addr);
9341 /* config_client_paths() releases cache_lock */
9342 *cp = '@';
9343 break;
9344 }
9345 }
9346
9347 rw_exit(&vhcache->vhcache_lock);
9348 break;
9349
9350 case BUS_CONFIG_DRIVER:
9351 rw_exit(&vhcache->vhcache_lock);
9352 if (rv == 0)
9353 st_bus_config_all_phcis(vhc, flags, op,
9354 (major_t)(uintptr_t)arg);
9355 break;
9356
9357 case BUS_CONFIG_ALL:
9358 rw_exit(&vhcache->vhcache_lock);
9359 if (rv == 0)
9360 st_bus_config_all_phcis(vhc, flags, op, -1);
9361 break;
9362
9363 default:
9364 rw_exit(&vhcache->vhcache_lock);
9365 break;
9366 }
9367
9368
9369 default_bus_config:
9370 /*
9371 * All requested child nodes are enumerated under the vhci.
9372 * Now configure them.
9373 */
9374 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9375 NDI_SUCCESS) {
9376 return (MDI_SUCCESS);
9377 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
9378 /* discover all paths and try configuring again */
9379 if (vhcache_discover_paths(vh) &&
9380 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9381 NDI_SUCCESS)
9382 return (MDI_SUCCESS);
9383 }
9384
9385 return (MDI_FAILURE);
9386 }
9387
9388 /*
9389 * Read the on-disk vhci cache into an nvlist for the specified vhci class.
9390 */
9391 static nvlist_t *
read_on_disk_vhci_cache(char * vhci_class)9392 read_on_disk_vhci_cache(char *vhci_class)
9393 {
9394 nvlist_t *nvl;
9395 int err;
9396 char *filename;
9397
9398 filename = vhclass2vhcache_filename(vhci_class);
9399
9400 if ((err = fread_nvlist(filename, &nvl)) == 0) {
9401 kmem_free(filename, strlen(filename) + 1);
9402 return (nvl);
9403 } else if (err == EIO)
9404 cmn_err(CE_WARN, "%s: I/O error, will recreate", filename);
9405 else if (err == EINVAL)
9406 cmn_err(CE_WARN,
9407 "%s: data file corrupted, will recreate", filename);
9408
9409 kmem_free(filename, strlen(filename) + 1);
9410 return (NULL);
9411 }
9412
9413 /*
9414 * Read on-disk vhci cache into nvlists for all vhci classes.
9415 * Called during booting by i_ddi_read_devices_files().
9416 */
9417 void
mdi_read_devices_files(void)9418 mdi_read_devices_files(void)
9419 {
9420 int i;
9421
9422 for (i = 0; i < N_VHCI_CLASSES; i++)
9423 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
9424 }
9425
9426 /*
9427 * Remove all stale entries from vhci cache.
9428 */
9429 static void
clean_vhcache(mdi_vhci_config_t * vhc)9430 clean_vhcache(mdi_vhci_config_t *vhc)
9431 {
9432 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9433 mdi_vhcache_phci_t *phci, *nxt_phci;
9434 mdi_vhcache_client_t *client, *nxt_client;
9435 mdi_vhcache_pathinfo_t *path, *nxt_path;
9436
9437 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9438
9439 client = vhcache->vhcache_client_head;
9440 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
9441 for ( ; client != NULL; client = nxt_client) {
9442 nxt_client = client->cct_next;
9443
9444 path = client->cct_cpi_head;
9445 client->cct_cpi_head = client->cct_cpi_tail = NULL;
9446 for ( ; path != NULL; path = nxt_path) {
9447 nxt_path = path->cpi_next;
9448 if ((path->cpi_cphci->cphci_phci != NULL) &&
9449 (path->cpi_pip != NULL)) {
9450 enqueue_tail_vhcache_pathinfo(client, path);
9451 } else if (path->cpi_pip != NULL) {
9452 /* Not valid to have a path without a phci. */
9453 free_vhcache_pathinfo(path);
9454 }
9455 }
9456
9457 if (client->cct_cpi_head != NULL)
9458 enqueue_vhcache_client(vhcache, client);
9459 else {
9460 (void) mod_hash_destroy(vhcache->vhcache_client_hash,
9461 (mod_hash_key_t)client->cct_name_addr);
9462 free_vhcache_client(client);
9463 }
9464 }
9465
9466 phci = vhcache->vhcache_phci_head;
9467 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
9468 for ( ; phci != NULL; phci = nxt_phci) {
9469
9470 nxt_phci = phci->cphci_next;
9471 if (phci->cphci_phci != NULL)
9472 enqueue_vhcache_phci(vhcache, phci);
9473 else
9474 free_vhcache_phci(phci);
9475 }
9476
9477 vhcache->vhcache_clean_time = ddi_get_lbolt64();
9478 rw_exit(&vhcache->vhcache_lock);
9479 vhcache_dirty(vhc);
9480 }
9481
9482 /*
9483 * Remove all stale entries from vhci cache.
9484 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
9485 */
9486 void
mdi_clean_vhcache(void)9487 mdi_clean_vhcache(void)
9488 {
9489 mdi_vhci_t *vh;
9490
9491 mutex_enter(&mdi_mutex);
9492 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9493 vh->vh_refcnt++;
9494 mutex_exit(&mdi_mutex);
9495 clean_vhcache(vh->vh_config);
9496 mutex_enter(&mdi_mutex);
9497 vh->vh_refcnt--;
9498 }
9499 mutex_exit(&mdi_mutex);
9500 }
9501
9502 /*
9503 * mdi_vhci_walk_clients():
9504 * Walker routine to traverse client dev_info nodes
9505 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
9506 * below the client, including nexus devices, which we dont want.
9507 * So we just traverse the immediate siblings, starting from 1st client.
9508 */
9509 void
mdi_vhci_walk_clients(dev_info_t * vdip,int (* f)(dev_info_t *,void *),void * arg)9510 mdi_vhci_walk_clients(dev_info_t *vdip,
9511 int (*f)(dev_info_t *, void *), void *arg)
9512 {
9513 mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9514 dev_info_t *cdip;
9515 mdi_client_t *ct;
9516
9517 MDI_VHCI_CLIENT_LOCK(vh);
9518 cdip = ddi_get_child(vdip);
9519 while (cdip) {
9520 ct = i_devi_get_client(cdip);
9521 MDI_CLIENT_LOCK(ct);
9522
9523 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE)
9524 cdip = ddi_get_next_sibling(cdip);
9525 else
9526 cdip = NULL;
9527
9528 MDI_CLIENT_UNLOCK(ct);
9529 }
9530 MDI_VHCI_CLIENT_UNLOCK(vh);
9531 }
9532
9533 /*
9534 * mdi_vhci_walk_phcis():
9535 * Walker routine to traverse phci dev_info nodes
9536 */
9537 void
mdi_vhci_walk_phcis(dev_info_t * vdip,int (* f)(dev_info_t *,void *),void * arg)9538 mdi_vhci_walk_phcis(dev_info_t *vdip,
9539 int (*f)(dev_info_t *, void *), void *arg)
9540 {
9541 mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9542 mdi_phci_t *ph, *next;
9543
9544 MDI_VHCI_PHCI_LOCK(vh);
9545 ph = vh->vh_phci_head;
9546 while (ph) {
9547 MDI_PHCI_LOCK(ph);
9548
9549 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE)
9550 next = ph->ph_next;
9551 else
9552 next = NULL;
9553
9554 MDI_PHCI_UNLOCK(ph);
9555 ph = next;
9556 }
9557 MDI_VHCI_PHCI_UNLOCK(vh);
9558 }
9559
9560
9561 /*
9562 * mdi_walk_vhcis():
9563 * Walker routine to traverse vhci dev_info nodes
9564 */
9565 void
mdi_walk_vhcis(int (* f)(dev_info_t *,void *),void * arg)9566 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
9567 {
9568 mdi_vhci_t *vh = NULL;
9569
9570 mutex_enter(&mdi_mutex);
9571 /*
9572 * Scan for already registered vhci
9573 */
9574 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9575 vh->vh_refcnt++;
9576 mutex_exit(&mdi_mutex);
9577 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
9578 mutex_enter(&mdi_mutex);
9579 vh->vh_refcnt--;
9580 break;
9581 } else {
9582 mutex_enter(&mdi_mutex);
9583 vh->vh_refcnt--;
9584 }
9585 }
9586
9587 mutex_exit(&mdi_mutex);
9588 }
9589
9590 /*
9591 * i_mdi_log_sysevent():
9592 * Logs events for pickup by syseventd
9593 */
9594 static void
i_mdi_log_sysevent(dev_info_t * dip,char * ph_vh_class,char * subclass)9595 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
9596 {
9597 char *path_name;
9598 nvlist_t *attr_list;
9599
9600 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
9601 KM_SLEEP) != DDI_SUCCESS) {
9602 goto alloc_failed;
9603 }
9604
9605 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
9606 (void) ddi_pathname(dip, path_name);
9607
9608 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
9609 ddi_driver_name(dip)) != DDI_SUCCESS) {
9610 goto error;
9611 }
9612
9613 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
9614 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
9615 goto error;
9616 }
9617
9618 if (nvlist_add_int32(attr_list, DDI_INSTANCE,
9619 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
9620 goto error;
9621 }
9622
9623 if (nvlist_add_string(attr_list, DDI_PATHNAME,
9624 path_name) != DDI_SUCCESS) {
9625 goto error;
9626 }
9627
9628 if (nvlist_add_string(attr_list, DDI_CLASS,
9629 ph_vh_class) != DDI_SUCCESS) {
9630 goto error;
9631 }
9632
9633 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
9634 attr_list, NULL, DDI_SLEEP);
9635
9636 error:
9637 kmem_free(path_name, MAXPATHLEN);
9638 nvlist_free(attr_list);
9639 return;
9640
9641 alloc_failed:
9642 MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent"));
9643 }
9644
9645 char **
mdi_get_phci_driver_list(char * vhci_class,int * ndrivers)9646 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers)
9647 {
9648 char **driver_list, **ret_driver_list = NULL;
9649 int *root_support_list;
9650 int cur_elements, max_elements;
9651
9652 get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9653 &cur_elements, &max_elements);
9654
9655
9656 if (driver_list) {
9657 kmem_free(root_support_list, sizeof (int) * max_elements);
9658 ret_driver_list = mdi_realloc(driver_list, sizeof (char *)
9659 * max_elements, sizeof (char *) * cur_elements);
9660 }
9661 *ndrivers = cur_elements;
9662
9663 return (ret_driver_list);
9664
9665 }
9666
9667 void
mdi_free_phci_driver_list(char ** driver_list,int ndrivers)9668 mdi_free_phci_driver_list(char **driver_list, int ndrivers)
9669 {
9670 char **p;
9671 int i;
9672
9673 if (driver_list) {
9674 for (i = 0, p = driver_list; i < ndrivers; i++, p++)
9675 kmem_free(*p, strlen(*p) + 1);
9676 kmem_free(driver_list, sizeof (char *) * ndrivers);
9677 }
9678 }
9679
9680 /*
9681 * mdi_is_dev_supported():
9682 * function called by pHCI bus config operation to determine if a
9683 * device should be represented as a child of the vHCI or the
9684 * pHCI. This decision is made by the vHCI, using cinfo idenity
9685 * information passed by the pHCI - specifics of the cinfo
9686 * representation are by agreement between the pHCI and vHCI.
9687 * Return Values:
9688 * MDI_SUCCESS
9689 * MDI_FAILURE
9690 */
9691 int
mdi_is_dev_supported(char * class,dev_info_t * pdip,void * cinfo)9692 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo)
9693 {
9694 mdi_vhci_t *vh;
9695
9696 ASSERT(class && pdip);
9697
9698 /*
9699 * For dev_supported, mdi_phci_register() must have established pdip as
9700 * a pHCI.
9701 *
9702 * NOTE: mdi_phci_register() does "mpxio-disable" processing, and
9703 * MDI_PHCI(pdip) will return false if mpxio is disabled.
9704 */
9705 if (!MDI_PHCI(pdip))
9706 return (MDI_FAILURE);
9707
9708 /* Return MDI_FAILURE if vHCI does not support asking the question. */
9709 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
9710 if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) {
9711 return (MDI_FAILURE);
9712 }
9713
9714 /* Return vHCI answer */
9715 return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo));
9716 }
9717
9718 int
mdi_dc_return_dev_state(mdi_pathinfo_t * pip,struct devctl_iocdata * dcp)9719 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp)
9720 {
9721 uint_t devstate = 0;
9722 dev_info_t *cdip;
9723
9724 if ((pip == NULL) || (dcp == NULL))
9725 return (MDI_FAILURE);
9726
9727 cdip = mdi_pi_get_client(pip);
9728
9729 switch (mdi_pi_get_state(pip)) {
9730 case MDI_PATHINFO_STATE_INIT:
9731 devstate = DEVICE_DOWN;
9732 break;
9733 case MDI_PATHINFO_STATE_ONLINE:
9734 devstate = DEVICE_ONLINE;
9735 if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED))
9736 devstate |= DEVICE_BUSY;
9737 break;
9738 case MDI_PATHINFO_STATE_STANDBY:
9739 devstate = DEVICE_ONLINE;
9740 break;
9741 case MDI_PATHINFO_STATE_FAULT:
9742 devstate = DEVICE_DOWN;
9743 break;
9744 case MDI_PATHINFO_STATE_OFFLINE:
9745 devstate = DEVICE_OFFLINE;
9746 break;
9747 default:
9748 ASSERT(MDI_PI(pip)->pi_state);
9749 }
9750
9751 if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0)
9752 return (MDI_FAILURE);
9753
9754 return (MDI_SUCCESS);
9755 }
9756