/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" /* * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more * detailed discussion of the overall mpxio architecture. * * Default locking order: * * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex)) * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex)) * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEBUG #include int mdi_debug = 1; #define MDI_DEBUG(level, stmnt) \ if (mdi_debug >= (level)) i_mdi_log stmnt static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); #else /* !DEBUG */ #define MDI_DEBUG(level, stmnt) #endif /* DEBUG */ extern pri_t minclsyspri; extern int modrootloaded; /* * Global mutex: * Protects vHCI list and structure members, pHCI and Client lists. */ kmutex_t mdi_mutex; /* * Registered vHCI class driver lists */ int mdi_vhci_count; mdi_vhci_t *mdi_vhci_head; mdi_vhci_t *mdi_vhci_tail; /* * Client Hash Table size */ static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; /* * taskq interface definitions */ #define MDI_TASKQ_N_THREADS 8 #define MDI_TASKQ_PRI minclsyspri #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) taskq_t *mdi_taskq; static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; static int mdi_max_bus_config_threads = 100; /* * To reduce unnecessary BUS_CONFIG_ALLs, do not BUS_CONFIG_ALL phcis in the * context of a BUS_CONFIG_ONE if a BUS_CONFIG_ALL has already been performed * in the last mdi_bus_config_timeout seconds. */ static int mdi_bus_config_timeout = 60; /* in seconds */ /* * MDI component property name/value string definitions */ const char *mdi_component_prop = "mpxio-component"; const char *mdi_component_prop_vhci = "vhci"; const char *mdi_component_prop_phci = "phci"; const char *mdi_component_prop_client = "client"; /* * MDI client global unique identifier property name */ const char *mdi_client_guid_prop = "client-guid"; /* * MDI client load balancing property name/value string definitions */ const char *mdi_load_balance = "load-balance"; const char *mdi_load_balance_none = "none"; const char *mdi_load_balance_rr = "round-robin"; const char *mdi_load_balance_lba = "logical-block"; /* * Obsolete vHCI class definition; to be removed after Leadville update */ const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; static char vhci_greeting[] = "\tThere already exists one vHCI driver for class %s\n" "\tOnly one vHCI driver for each class is allowed\n"; /* * Static function prototypes */ static int i_mdi_phci_offline(dev_info_t *, uint_t); static int i_mdi_client_offline(dev_info_t *, uint_t); static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); static void i_mdi_phci_post_detach(dev_info_t *, ddi_detach_cmd_t, int); static int i_mdi_client_pre_detach(dev_info_t *, ddi_detach_cmd_t); static void i_mdi_client_post_detach(dev_info_t *, ddi_detach_cmd_t, int); static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); static int i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *buf); static void i_mdi_pm_hold_client(mdi_client_t *, int); static void i_mdi_pm_rele_client(mdi_client_t *, int); static void i_mdi_pm_reset_client(mdi_client_t *); static void i_mdi_pm_hold_all_phci(mdi_client_t *); static int i_mdi_power_all_phci(mdi_client_t *); /* * Internal mdi_pathinfo node functions */ static int i_mdi_pi_kstat_create(mdi_pathinfo_t *); static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); static mdi_phci_t *i_devi_get_phci(dev_info_t *); static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); static void i_mdi_phci_get_client_lock(mdi_phci_t *, mdi_client_t *); static void i_mdi_phci_unlock(mdi_phci_t *); static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *, int); static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, mdi_client_t *); static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); static void i_mdi_client_remove_path(mdi_client_t *, mdi_pathinfo_t *); static int i_mdi_pi_state_change(mdi_pathinfo_t *, mdi_pathinfo_state_t, int); static int i_mdi_pi_offline(mdi_pathinfo_t *, int); static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, char **, int, int); static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *, int); static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *); static void i_mdi_client_update_state(mdi_client_t *); static int i_mdi_client_compute_state(mdi_client_t *, mdi_phci_t *); static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); static void i_mdi_client_unlock(mdi_client_t *); static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); static mdi_client_t *i_devi_get_client(dev_info_t *); static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, int, int); /* * Failover related function prototypes */ static int i_mdi_failover(void *); /* * misc internal functions */ static int i_mdi_get_hash_key(char *); static int i_map_nvlist_error_to_mdi(int); static void i_mdi_report_path_state(mdi_client_t *, mdi_pathinfo_t *); /* called once when first vhci registers with mdi */ static void i_mdi_init() { static int initialized = 0; if (initialized) return; initialized = 1; mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); /* * Create our taskq resources */ mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ } /* * mdi_get_component_type(): * Return mpxio component type * Return Values: * MDI_COMPONENT_NONE * MDI_COMPONENT_VHCI * MDI_COMPONENT_PHCI * MDI_COMPONENT_CLIENT * XXX This doesn't work under multi-level MPxIO and should be * removed when clients migrate mdi_is_*() interfaces. */ int mdi_get_component_type(dev_info_t *dip) { return (DEVI(dip)->devi_mdi_component); } /* * mdi_vhci_register(): * Register a vHCI module with the mpxio framework * mdi_vhci_register() is called by vHCI drivers to register the * 'class_driver' vHCI driver and its MDI entrypoints with the * mpxio framework. The vHCI driver must call this interface as * part of its attach(9e) handler. * Competing threads may try to attach mdi_vhci_register() as * the vHCI drivers are loaded and attached as a result of pHCI * driver instance registration (mdi_phci_register()) with the * framework. * Return Values: * MDI_SUCCESS * MDI_FAILURE */ /*ARGSUSED*/ int mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, int flags) { mdi_vhci_t *vh = NULL; ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); i_mdi_init(); mutex_enter(&mdi_mutex); /* * Scan for already registered vhci */ for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { if (strcmp(vh->vh_class, class) == 0) { /* * vHCI has already been created. Check for valid * vHCI ops registration. We only support one vHCI * module per class */ if (vh->vh_ops != NULL) { mutex_exit(&mdi_mutex); cmn_err(CE_NOTE, vhci_greeting, class); return (MDI_FAILURE); } break; } } /* * if not yet created, create the vHCI component */ if (vh == NULL) { struct client_hash *hash = NULL; char *load_balance; /* * Allocate and initialize the mdi extensions */ vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), KM_SLEEP); vh->vh_client_table = hash; vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); (void) strcpy(vh->vh_class, class); vh->vh_lb = LOAD_BALANCE_RR; if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { vh->vh_lb = LOAD_BALANCE_NONE; } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) == 0) { vh->vh_lb = LOAD_BALANCE_LBA; } ddi_prop_free(load_balance); } /* * Store the vHCI ops vectors */ vh->vh_dip = vdip; vh->vh_ops = vops; /* * other members of vh_bus_config are initialized by * the above kmem_zalloc of the vhci structure. */ cv_init(&vh->vh_bus_config.vhc_cv, NULL, CV_DRIVER, NULL); if (mdi_vhci_head == NULL) { mdi_vhci_head = vh; } if (mdi_vhci_tail) { mdi_vhci_tail->vh_next = vh; } mdi_vhci_tail = vh; mdi_vhci_count++; } /* * Claim the devfs node as a vhci component */ DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; /* * Initialize our back reference from dev_info node */ DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; mutex_exit(&mdi_mutex); return (MDI_SUCCESS); } /* * mdi_vhci_unregister(): * Unregister a vHCI module from mpxio framework * mdi_vhci_unregister() is called from the detach(9E) entrypoint * of a vhci to unregister it from the framework. * Return Values: * MDI_SUCCESS * MDI_FAILURE */ /*ARGSUSED*/ int mdi_vhci_unregister(dev_info_t *vdip, int flags) { mdi_vhci_t *found, *vh, *prev = NULL; mdi_phci_config_t *phc, *next_phc; /* * Check for invalid VHCI */ if ((vh = i_devi_get_vhci(vdip)) == NULL) return (MDI_FAILURE); mutex_enter(&mdi_mutex); /* * Scan the list of registered vHCIs for a match */ for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { if (found == vh) break; prev = found; } if (found == NULL) { mutex_exit(&mdi_mutex); return (MDI_FAILURE); } /* * Check the pHCI and client count. All the pHCIs and clients * should have been unregistered, before a vHCI can be * unregistered. */ if (vh->vh_phci_count || vh->vh_client_count) { MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_vhci_unregister: pHCI in registered state.\n")); mutex_exit(&mdi_mutex); return (MDI_FAILURE); } /* * Remove the vHCI from the global list */ if (vh == mdi_vhci_head) { mdi_vhci_head = vh->vh_next; } else { prev->vh_next = vh->vh_next; } if (vh == mdi_vhci_tail) { mdi_vhci_tail = prev; } vh->vh_ops = NULL; mdi_vhci_count--; mutex_exit(&mdi_mutex); DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; DEVI(vdip)->devi_mdi_xhci = NULL; kmem_free(vh->vh_class, strlen(vh->vh_class)+1); kmem_free(vh->vh_client_table, mdi_client_table_size * sizeof (struct client_hash)); /* * there must be no more tasks on the bus config taskq as the vhci * driver can not be detached while bus config is in progress. */ ASSERT(vh->vh_bus_config.vhc_start_time == 0); if (vh->vh_bus_config.vhc_taskq != NULL) taskq_destroy(vh->vh_bus_config.vhc_taskq); for (phc = vh->vh_bus_config.vhc_phc; phc != NULL; phc = next_phc) { next_phc = phc->phc_next; kmem_free(phc, sizeof (*phc)); } cv_destroy(&vh->vh_bus_config.vhc_cv); kmem_free(vh, sizeof (mdi_vhci_t)); return (MDI_SUCCESS); } /* * i_mdi_vhci_class2vhci(): * Look for a matching vHCI module given a vHCI class name * Return Values: * Handle to a vHCI component * NULL */ static mdi_vhci_t * i_mdi_vhci_class2vhci(char *class) { mdi_vhci_t *vh = NULL; ASSERT(!MUTEX_HELD(&mdi_mutex)); mutex_enter(&mdi_mutex); for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { if (strcmp(vh->vh_class, class) == 0) { break; } } mutex_exit(&mdi_mutex); return (vh); } /* * i_devi_get_vhci(): * Utility function to get the handle to a vHCI component * Return Values: * Handle to a vHCI component * NULL */ mdi_vhci_t * i_devi_get_vhci(dev_info_t *vdip) { mdi_vhci_t *vh = NULL; if (MDI_VHCI(vdip)) { vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; } return (vh); } /* * mdi_phci_register(): * Register a pHCI module with mpxio framework * mdi_phci_register() is called by pHCI drivers to register with * the mpxio framework and a specific 'class_driver' vHCI. The * pHCI driver must call this interface as part of its attach(9e) * handler. * Return Values: * MDI_SUCCESS * MDI_FAILURE */ /*ARGSUSED*/ int mdi_phci_register(char *class, dev_info_t *pdip, int flags) { mdi_phci_t *ph; mdi_vhci_t *vh; char *data; char *pathname; pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); (void) ddi_pathname(pdip, pathname); /* * Check for mpxio-disable property. Enable mpxio if the property is * missing or not set to "yes". * If the property is set to "yes" then emit a brief message. */ if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", &data) == DDI_SUCCESS)) { if (strcmp(data, "yes") == 0) { MDI_DEBUG(1, (CE_CONT, pdip, "?%s (%s%d) multipath capabilities " "disabled via %s.conf.\n", pathname, ddi_driver_name(pdip), ddi_get_instance(pdip), ddi_driver_name(pdip))); ddi_prop_free(data); kmem_free(pathname, MAXPATHLEN); return (MDI_FAILURE); } ddi_prop_free(data); } kmem_free(pathname, MAXPATHLEN); /* * Search for a matching vHCI */ vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); if (vh == NULL) { return (MDI_FAILURE); } ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); ph->ph_dip = pdip; ph->ph_vhci = vh; ph->ph_next = NULL; ph->ph_unstable = 0; ph->ph_vprivate = 0; cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); cv_init(&ph->ph_powerchange_cv, NULL, CV_DRIVER, NULL); MDI_PHCI_SET_POWER_UP(ph); DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; mutex_enter(&mdi_mutex); if (vh->vh_phci_head == NULL) { vh->vh_phci_head = ph; } if (vh->vh_phci_tail) { vh->vh_phci_tail->ph_next = ph; } vh->vh_phci_tail = ph; vh->vh_phci_count++; /* to force discovery of all phci children during busconfig */ vh->vh_bus_config.vhc_cutoff_time = -1; mutex_exit(&mdi_mutex); return (MDI_SUCCESS); } /* * mdi_phci_unregister(): * Unregister a pHCI module from mpxio framework * mdi_phci_unregister() is called by the pHCI drivers from their * detach(9E) handler to unregister their instances from the * framework. * Return Values: * MDI_SUCCESS * MDI_FAILURE */ /*ARGSUSED*/ int mdi_phci_unregister(dev_info_t *pdip, int flags) { mdi_vhci_t *vh; mdi_phci_t *ph; mdi_phci_t *tmp; mdi_phci_t *prev = NULL; ph = i_devi_get_phci(pdip); if (ph == NULL) { MDI_DEBUG(1, (CE_WARN, pdip, "!pHCI unregister: Not a valid pHCI")); return (MDI_FAILURE); } vh = ph->ph_vhci; ASSERT(vh != NULL); if (vh == NULL) { MDI_DEBUG(1, (CE_WARN, pdip, "!pHCI unregister: Not a valid vHCI")); return (MDI_FAILURE); } mutex_enter(&mdi_mutex); tmp = vh->vh_phci_head; while (tmp) { if (tmp == ph) { break; } prev = tmp; tmp = tmp->ph_next; } if (ph == vh->vh_phci_head) { vh->vh_phci_head = ph->ph_next; } else { prev->ph_next = ph->ph_next; } if (ph == vh->vh_phci_tail) { vh->vh_phci_tail = prev; } vh->vh_phci_count--; /* * If no busconfig is in progress, release the phci busconfig resources. * We only need vh->vh_phci_count of busconfig resources. */ if (vh->vh_bus_config.vhc_start_time == 0 && vh->vh_bus_config.vhc_phc_cnt > vh->vh_phci_count) { int count; count = vh->vh_bus_config.vhc_phc_cnt - vh->vh_phci_count; while (count--) { mdi_phci_config_t *phc; phc = vh->vh_bus_config.vhc_phc; vh->vh_bus_config.vhc_phc = phc->phc_next; kmem_free(phc, sizeof (*phc)); } vh->vh_bus_config.vhc_phc_cnt = vh->vh_phci_count; } mutex_exit(&mdi_mutex); cv_destroy(&ph->ph_unstable_cv); cv_destroy(&ph->ph_powerchange_cv); mutex_destroy(&ph->ph_mutex); kmem_free(ph, sizeof (mdi_phci_t)); DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; DEVI(pdip)->devi_mdi_xhci = NULL; return (MDI_SUCCESS); } /* * i_devi_get_phci(): * Utility function to return the phci extensions. */ static mdi_phci_t * i_devi_get_phci(dev_info_t *pdip) { mdi_phci_t *ph = NULL; if (MDI_PHCI(pdip)) { ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; } return (ph); } /* * mdi_phci_path2devinfo(): * Utility function to search for a valid phci device given * the devfs pathname. */ dev_info_t * mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) { char *temp_pathname; mdi_vhci_t *vh; mdi_phci_t *ph; dev_info_t *pdip = NULL; vh = i_devi_get_vhci(vdip); ASSERT(vh != NULL); if (vh == NULL) { /* * Invalid vHCI component, return failure */ return (NULL); } temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); mutex_enter(&mdi_mutex); ph = vh->vh_phci_head; while (ph != NULL) { pdip = ph->ph_dip; ASSERT(pdip != NULL); *temp_pathname = '\0'; (void) ddi_pathname(pdip, temp_pathname); if (strcmp(temp_pathname, pathname) == 0) { break; } ph = ph->ph_next; } if (ph == NULL) { pdip = NULL; } mutex_exit(&mdi_mutex); kmem_free(temp_pathname, MAXPATHLEN); return (pdip); } /* * mdi_phci_get_path_count(): * get number of path information nodes associated with a given * pHCI device. */ int mdi_phci_get_path_count(dev_info_t *pdip) { mdi_phci_t *ph; int count = 0; ph = i_devi_get_phci(pdip); if (ph != NULL) { count = ph->ph_path_count; } return (count); } /* * i_mdi_phci_lock(): * Lock a pHCI device * Return Values: * None * Note: * The default locking order is: * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) * But there are number of situations where locks need to be * grabbed in reverse order. This routine implements try and lock * mechanism depending on the requested parameter option. */ static void i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) { if (pip) { /* Reverse locking is requested. */ while (MDI_PHCI_TRYLOCK(ph) == 0) { /* * tryenter failed. Try to grab again * after a small delay */ MDI_PI_HOLD(pip); MDI_PI_UNLOCK(pip); delay(1); MDI_PI_LOCK(pip); MDI_PI_RELE(pip); } } else { MDI_PHCI_LOCK(ph); } } /* * i_mdi_phci_get_client_lock(): * Lock a pHCI device * Return Values: * None * Note: * The default locking order is: * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) * But there are number of situations where locks need to be * grabbed in reverse order. This routine implements try and lock * mechanism depending on the requested parameter option. */ static void i_mdi_phci_get_client_lock(mdi_phci_t *ph, mdi_client_t *ct) { if (ct) { /* Reverse locking is requested. */ while (MDI_PHCI_TRYLOCK(ph) == 0) { /* * tryenter failed. Try to grab again * after a small delay */ MDI_CLIENT_UNLOCK(ct); delay(1); MDI_CLIENT_LOCK(ct); } } else { MDI_PHCI_LOCK(ph); } } /* * i_mdi_phci_unlock(): * Unlock the pHCI component */ static void i_mdi_phci_unlock(mdi_phci_t *ph) { MDI_PHCI_UNLOCK(ph); } /* * i_mdi_devinfo_create(): * create client device's devinfo node * Return Values: * dev_info * NULL * Notes: */ static dev_info_t * i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, char **compatible, int ncompatible, int flags) { dev_info_t *cdip = NULL; ASSERT(MUTEX_HELD(&mdi_mutex)); /* Verify for duplicate entry */ cdip = i_mdi_devinfo_find(vh, name, guid); ASSERT(cdip == NULL); if (cdip) { cmn_err(CE_WARN, "i_mdi_devinfo_create: client dip %p already exists", (void *)cdip); } if (flags == DDI_SLEEP) { ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); } else { (void) ndi_devi_alloc(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); } if (cdip == NULL) goto fail; /* * Create component type and Global unique identifier * properties */ if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { goto fail; } /* Decorate the node with compatible property */ if (compatible && (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { goto fail; } return (cdip); fail: if (cdip) { (void) ndi_prop_remove_all(cdip); (void) ndi_devi_free(cdip); } return (NULL); } /* * i_mdi_devinfo_find(): * Find a matching devinfo node for given client node name * and its guid. * Return Values: * Handle to a dev_info node or NULL */ static dev_info_t * i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) { char *data; dev_info_t *cdip = NULL; dev_info_t *ndip = NULL; int circular; ndi_devi_enter(vh->vh_dip, &circular); ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; while ((cdip = ndip) != NULL) { ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; if (strcmp(DEVI(cdip)->devi_node_name, name)) { continue; } if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, &data) != DDI_PROP_SUCCESS) { continue; } if (strcmp(data, guid) != 0) { ddi_prop_free(data); continue; } ddi_prop_free(data); break; } ndi_devi_exit(vh->vh_dip, circular); return (cdip); } /* * i_mdi_devinfo_remove(): * Remove a client device node */ static int i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) { int rv = MDI_SUCCESS; if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); if (rv != NDI_SUCCESS) { MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" " failed. cdip = %p\n", cdip)); } /* * Convert to MDI error code */ switch (rv) { case NDI_SUCCESS: rv = MDI_SUCCESS; break; case NDI_BUSY: rv = MDI_BUSY; break; default: rv = MDI_FAILURE; break; } } return (rv); } /* * i_devi_get_client() * Utility function to get mpxio component extensions */ static mdi_client_t * i_devi_get_client(dev_info_t *cdip) { mdi_client_t *ct = NULL; if (MDI_CLIENT(cdip)) { ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; } return (ct); } /* * i_mdi_is_child_present(): * Search for the presence of client device dev_info node */ static int i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) { int rv = MDI_FAILURE; struct dev_info *dip; int circular; ndi_devi_enter(vdip, &circular); dip = DEVI(vdip)->devi_child; while (dip) { if (dip == DEVI(cdip)) { rv = MDI_SUCCESS; break; } dip = dip->devi_sibling; } ndi_devi_exit(vdip, circular); return (rv); } /* * i_mdi_client_lock(): * Grab client component lock * Return Values: * None * Note: * The default locking order is: * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) * But there are number of situations where locks need to be * grabbed in reverse order. This routine implements try and lock * mechanism depending on the requested parameter option. */ static void i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) { if (pip) { /* * Reverse locking is requested. */ while (MDI_CLIENT_TRYLOCK(ct) == 0) { /* * tryenter failed. Try to grab again * after a small delay */ MDI_PI_HOLD(pip); MDI_PI_UNLOCK(pip); delay(1); MDI_PI_LOCK(pip); MDI_PI_RELE(pip); } } else { MDI_CLIENT_LOCK(ct); } } /* * i_mdi_client_unlock(): * Unlock a client component */ static void i_mdi_client_unlock(mdi_client_t *ct) { MDI_CLIENT_UNLOCK(ct); } /* * i_mdi_client_alloc(): * Allocate and initialize a client structure. Caller should * hold the global mdi_mutex. * Return Values: * Handle to a client component */ /*ARGSUSED*/ static mdi_client_t * i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid, int flags) { mdi_client_t *ct; char *drvname = NULL; char *guid = NULL; client_lb_args_t *lb_args = NULL; ASSERT(MUTEX_HELD(&mdi_mutex)); /* * Allocate and initialize a component structure. */ ct = kmem_zalloc(sizeof (*ct), (flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP); if (ct == NULL) goto fail; mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); ct->ct_hnext = NULL; ct->ct_hprev = NULL; ct->ct_dip = NULL; ct->ct_vhci = vh; drvname = kmem_alloc(strlen(name) + 1, (flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP); if (drvname == NULL) goto fail; ct->ct_drvname = drvname; (void) strcpy(ct->ct_drvname, name); guid = kmem_alloc(strlen(lguid) + 1, (flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP); if (guid == NULL) goto fail; ct->ct_guid = guid; (void) strcpy(ct->ct_guid, lguid); ct->ct_cprivate = NULL; ct->ct_vprivate = NULL; ct->ct_flags = 0; ct->ct_state = MDI_CLIENT_STATE_FAILED; MDI_CLIENT_SET_OFFLINE(ct); MDI_CLIENT_SET_DETACH(ct); MDI_CLIENT_SET_POWER_UP(ct); ct->ct_failover_flags = 0; ct->ct_failover_status = 0; cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); ct->ct_unstable = 0; cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); ct->ct_lb = vh->vh_lb; lb_args = kmem_zalloc(sizeof (client_lb_args_t), (flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP); if (lb_args == NULL) goto fail; ct->ct_lb_args = lb_args; ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; ct->ct_path_count = 0; ct->ct_path_head = NULL; ct->ct_path_tail = NULL; ct->ct_path_last = NULL; /* * Add this client component to our client hash queue */ i_mdi_client_enlist_table(vh, ct); return (ct); fail: if (guid) kmem_free(guid, strlen(lguid) + 1); if (drvname) kmem_free(drvname, strlen(name) + 1); if (lb_args) kmem_free(lb_args, sizeof (client_lb_args_t)); kmem_free(ct, sizeof (*ct)); return (NULL); } /* * i_mdi_client_enlist_table(): * Attach the client device to the client hash table. Caller * should hold the mdi_mutex */ static void i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) { int index; struct client_hash *head; ASSERT(MUTEX_HELD(&mdi_mutex)); index = i_mdi_get_hash_key(ct->ct_guid); head = &vh->vh_client_table[index]; ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; head->ct_hash_head = ct; head->ct_hash_count++; vh->vh_client_count++; } /* * i_mdi_client_delist_table(): * Attach the client device to the client hash table. * Caller should hold the mdi_mutex */ static void i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) { int index; char *guid; struct client_hash *head; mdi_client_t *next; mdi_client_t *last; ASSERT(MUTEX_HELD(&mdi_mutex)); guid = ct->ct_guid; index = i_mdi_get_hash_key(guid); head = &vh->vh_client_table[index]; last = NULL; next = (mdi_client_t *)head->ct_hash_head; while (next != NULL) { if (next == ct) { break; } last = next; next = next->ct_hnext; } if (next) { head->ct_hash_count--; if (last == NULL) { head->ct_hash_head = ct->ct_hnext; } else { last->ct_hnext = ct->ct_hnext; } ct->ct_hnext = NULL; vh->vh_client_count--; } } /* * i_mdi_client_free(): * Free a client component */ static int i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) { int rv = MDI_SUCCESS; int flags = ct->ct_flags; dev_info_t *cdip; dev_info_t *vdip; ASSERT(MUTEX_HELD(&mdi_mutex)); vdip = vh->vh_dip; cdip = ct->ct_dip; (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; DEVI(cdip)->devi_mdi_client = NULL; /* * Clear out back ref. to dev_info_t node */ ct->ct_dip = NULL; /* * Remove this client from our hash queue */ i_mdi_client_delist_table(vh, ct); /* * Uninitialize and free the component */ kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); cv_destroy(&ct->ct_failover_cv); cv_destroy(&ct->ct_unstable_cv); cv_destroy(&ct->ct_powerchange_cv); mutex_destroy(&ct->ct_mutex); kmem_free(ct, sizeof (*ct)); if (cdip != NULL) { mutex_exit(&mdi_mutex); (void) i_mdi_devinfo_remove(vdip, cdip, flags); mutex_enter(&mdi_mutex); } return (rv); } /* * i_mdi_client_find(): * Find the client structure corresponding to a given guid * Caller should hold the mdi_mutex */ static mdi_client_t * i_mdi_client_find(mdi_vhci_t *vh, char *guid) { int index; struct client_hash *head; mdi_client_t *ct; ASSERT(MUTEX_HELD(&mdi_mutex)); index = i_mdi_get_hash_key(guid); head = &vh->vh_client_table[index]; ct = head->ct_hash_head; while (ct != NULL) { if (strcmp(ct->ct_guid, guid) == 0) { break; } ct = ct->ct_hnext; } return (ct); } /* * i_mdi_client_update_state(): * Compute and update client device state * Notes: * A client device can be in any of three possible states: * * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more * one online/standby paths. Can tolerate failures. * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with * no alternate paths available as standby. A failure on the online * would result in loss of access to device data. * MDI_CLIENT_STATE_FAILED - Client device in failed state with * no paths available to access the device. */ static void i_mdi_client_update_state(mdi_client_t *ct) { int state; ASSERT(MUTEX_HELD(&ct->ct_mutex)); state = i_mdi_client_compute_state(ct, NULL); MDI_CLIENT_SET_STATE(ct, state); } /* * i_mdi_client_compute_state(): * Compute client device state * * mdi_phci_t * Pointer to pHCI structure which should * while computing the new value. Used by * i_mdi_phci_offline() to find the new * client state after DR of a pHCI. */ static int i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) { int state; int online_count = 0; int standby_count = 0; mdi_pathinfo_t *pip, *next; ASSERT(MUTEX_HELD(&ct->ct_mutex)); pip = ct->ct_path_head; while (pip != NULL) { MDI_PI_LOCK(pip); next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; if (MDI_PI(pip)->pi_phci == ph) { MDI_PI_UNLOCK(pip); pip = next; continue; } if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == MDI_PATHINFO_STATE_ONLINE) online_count++; else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == MDI_PATHINFO_STATE_STANDBY) standby_count++; MDI_PI_UNLOCK(pip); pip = next; } if (online_count == 0) { if (standby_count == 0) { state = MDI_CLIENT_STATE_FAILED; MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" " ct = %p\n", ct)); } else if (standby_count == 1) { state = MDI_CLIENT_STATE_DEGRADED; } else { state = MDI_CLIENT_STATE_OPTIMAL; } } else if (online_count == 1) { if (standby_count == 0) { state = MDI_CLIENT_STATE_DEGRADED; } else { state = MDI_CLIENT_STATE_OPTIMAL; } } else { state = MDI_CLIENT_STATE_OPTIMAL; } return (state); } /* * i_mdi_client2devinfo(): * Utility function */ dev_info_t * i_mdi_client2devinfo(mdi_client_t *ct) { return (ct->ct_dip); } /* * mdi_client_path2_devinfo(): * Given the parent devinfo and child devfs pathname, search for * a valid devfs node handle. */ dev_info_t * mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) { dev_info_t *cdip = NULL; dev_info_t *ndip = NULL; char *temp_pathname; int circular; /* * Allocate temp buffer */ temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); /* * Lock parent against changes */ ndi_devi_enter(vdip, &circular); ndip = (dev_info_t *)DEVI(vdip)->devi_child; while ((cdip = ndip) != NULL) { ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; *temp_pathname = '\0'; (void) ddi_pathname(cdip, temp_pathname); if (strcmp(temp_pathname, pathname) == 0) { break; } } /* * Release devinfo lock */ ndi_devi_exit(vdip, circular); /* * Free the temp buffer */ kmem_free(temp_pathname, MAXPATHLEN); return (cdip); } /* * mdi_client_get_path_count(): * Utility function to get number of path information nodes * associated with a given client device. */ int mdi_client_get_path_count(dev_info_t *cdip) { mdi_client_t *ct; int count = 0; ct = i_devi_get_client(cdip); if (ct != NULL) { count = ct->ct_path_count; } return (count); } /* * i_mdi_get_hash_key(): * Create a hash using strings as keys * */ static int i_mdi_get_hash_key(char *str) { uint32_t g, hash = 0; char *p; for (p = str; *p != '\0'; p++) { g = *p; hash += g; } return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); } /* * mdi_get_lb_policy(): * Get current load balancing policy for a given client device */ client_lb_t mdi_get_lb_policy(dev_info_t *cdip) { client_lb_t lb = LOAD_BALANCE_NONE; mdi_client_t *ct; ct = i_devi_get_client(cdip); if (ct != NULL) { lb = ct->ct_lb; } return (lb); } /* * mdi_set_lb_region_size(): * Set current region size for the load-balance */ int mdi_set_lb_region_size(dev_info_t *cdip, int region_size) { mdi_client_t *ct; int rv = MDI_FAILURE; ct = i_devi_get_client(cdip); if (ct != NULL && ct->ct_lb_args != NULL) { ct->ct_lb_args->region_size = region_size; rv = MDI_SUCCESS; } return (rv); } /* * mdi_Set_lb_policy(): * Set current load balancing policy for a given client device */ int mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) { mdi_client_t *ct; int rv = MDI_FAILURE; ct = i_devi_get_client(cdip); if (ct != NULL) { ct->ct_lb = lb; rv = MDI_SUCCESS; } return (rv); } /* * mdi_failover(): * failover function called by the vHCI drivers to initiate * a failover operation. This is typically due to non-availability * of online paths to route I/O requests. Failover can be * triggered through user application also. * * The vHCI driver calls mdi_failover() to initiate a failover * operation. mdi_failover() calls back into the vHCI driver's * vo_failover() entry point to perform the actual failover * operation. The reason for requiring the vHCI driver to * initiate failover by calling mdi_failover(), instead of directly * executing vo_failover() itself, is to ensure that the mdi * framework can keep track of the client state properly. * Additionally, mdi_failover() provides as a convenience the * option of performing the failover operation synchronously or * asynchronously * * Upon successful completion of the failover operation, the * paths that were previously ONLINE will be in the STANDBY state, * and the newly activated paths will be in the ONLINE state. * * The flags modifier determines whether the activation is done * synchronously: MDI_FAILOVER_SYNC * Return Values: * MDI_SUCCESS * MDI_FAILURE * MDI_BUSY */ /*ARGSUSED*/ int mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) { int rv; mdi_client_t *ct; ct = i_devi_get_client(cdip); ASSERT(ct != NULL); if (ct == NULL) { /* cdip is not a valid client device. Nothing more to do. */ return (MDI_FAILURE); } MDI_CLIENT_LOCK(ct); if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { /* A path to the client is being freed */ MDI_CLIENT_UNLOCK(ct); return (MDI_BUSY); } if (MDI_CLIENT_IS_FAILED(ct)) { /* * Client is in failed state. Nothing more to do. */ MDI_CLIENT_UNLOCK(ct); return (MDI_FAILURE); } if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { /* * Failover is already in progress; return BUSY */ MDI_CLIENT_UNLOCK(ct); return (MDI_BUSY); } /* * Make sure that mdi_pathinfo node state changes are processed. * We do not allow failovers to progress while client path state * changes are in progress */ if (ct->ct_unstable) { if (flags == MDI_FAILOVER_ASYNC) { MDI_CLIENT_UNLOCK(ct); return (MDI_BUSY); } else { while (ct->ct_unstable) cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); } } /* * Client device is in stable state. Before proceeding, perform sanity * checks again. */ if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || (i_ddi_node_state(ct->ct_dip) < DS_READY)) { /* * Client is in failed state. Nothing more to do. */ MDI_CLIENT_UNLOCK(ct); return (MDI_FAILURE); } /* * Set the client state as failover in progress. */ MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); ct->ct_failover_flags = flags; MDI_CLIENT_UNLOCK(ct); if (flags == MDI_FAILOVER_ASYNC) { /* * Submit the initiate failover request via CPR safe * taskq threads. */ (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, ct, KM_SLEEP); return (MDI_ACCEPT); } else { /* * Synchronous failover mode. Typically invoked from the user * land. */ rv = i_mdi_failover(ct); } return (rv); } /* * i_mdi_failover(): * internal failover function. Invokes vHCI drivers failover * callback function and process the failover status * Return Values: * None * * Note: A client device in failover state can not be detached or freed. */ static int i_mdi_failover(void *arg) { int rv = MDI_SUCCESS; mdi_client_t *ct = (mdi_client_t *)arg; mdi_vhci_t *vh = ct->ct_vhci; ASSERT(!MUTEX_HELD(&ct->ct_mutex)); if (vh->vh_ops->vo_failover != NULL) { /* * Call vHCI drivers callback routine */ rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, ct->ct_failover_flags); } MDI_CLIENT_LOCK(ct); MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); /* * Save the failover return status */ ct->ct_failover_status = rv; /* * As a result of failover, client status would have been changed. * Update the client state and wake up anyone waiting on this client * device. */ i_mdi_client_update_state(ct); cv_broadcast(&ct->ct_failover_cv); MDI_CLIENT_UNLOCK(ct); return (rv); } /* * Load balancing is logical block. * IOs within the range described by region_size * would go on the same path. This would improve the * performance by cache-hit on some of the RAID devices. * Search only for online paths(At some point we * may want to balance across target ports). * If no paths are found then default to round-robin. */ static int i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) { int path_index = -1; int online_path_count = 0; int online_nonpref_path_count = 0; int region_size = ct->ct_lb_args->region_size; mdi_pathinfo_t *pip; mdi_pathinfo_t *next; int preferred, path_cnt; pip = ct->ct_path_head; while (pip) { MDI_PI_LOCK(pip); if (MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { online_path_count++; } else if (MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { online_nonpref_path_count++; } next = (mdi_pathinfo_t *) MDI_PI(pip)->pi_client_link; MDI_PI_UNLOCK(pip); pip = next; } /* if found any online/preferred then use this type */ if (online_path_count > 0) { path_cnt = online_path_count; preferred = 1; } else if (online_nonpref_path_count > 0) { path_cnt = online_nonpref_path_count; preferred = 0; } else { path_cnt = 0; } if (path_cnt) { path_index = (bp->b_blkno >> region_size) % path_cnt; pip = ct->ct_path_head; while (pip && path_index != -1) { MDI_PI_LOCK(pip); if (path_index == 0 && (MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_ONLINE) && MDI_PI(pip)->pi_preferred == preferred) { MDI_PI_HOLD(pip); MDI_PI_UNLOCK(pip); *ret_pip = pip; return (MDI_SUCCESS); } path_index --; next = (mdi_pathinfo_t *) MDI_PI(pip)->pi_client_link; MDI_PI_UNLOCK(pip); pip = next; } if (pip == NULL) { MDI_DEBUG(4, (CE_NOTE, NULL, "!lba %p, no pip !!\n", bp->b_blkno)); } else { MDI_DEBUG(4, (CE_NOTE, NULL, "!lba %p, no pip for path_index, " "pip %p\n", pip)); } } return (MDI_FAILURE); } /* * mdi_select_path(): * select a path to access a client device. * * mdi_select_path() function is called by the vHCI drivers to * select a path to route the I/O request to. The caller passes * the block I/O data transfer structure ("buf") as one of the * parameters. The mpxio framework uses the buf structure * contents to maintain per path statistics (total I/O size / * count pending). If more than one online paths are available to * select, the framework automatically selects a suitable path * for routing I/O request. If a failover operation is active for * this client device the call shall be failed with MDI_BUSY error * code. * * By default this function returns a suitable path in online * state based on the current load balancing policy. Currently * we support LOAD_BALANCE_NONE (Previously selected online path * will continue to be used till the path is usable) and * LOAD_BALANCE_RR (Online paths will be selected in a round * robin fashion), LOAD_BALANCE_LB(Online paths will be selected * based on the logical block). The load balancing * through vHCI drivers configuration file (driver.conf). * * vHCI drivers may override this default behavior by specifying * appropriate flags. If start_pip is specified (non NULL) is * used as start point to walk and find the next appropriate path. * The following values are currently defined: * MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or * MDI_SELECT_STANDBY_PATH (to select an STANDBY path). * * The non-standard behavior is used by the scsi_vhci driver, * whenever it has to use a STANDBY/FAULTED path. Eg. during * attach of client devices (to avoid an unnecessary failover * when the STANDBY path comes up first), during failover * (to activate a STANDBY path as ONLINE). * * The selected path in returned in a held state (ref_cnt). * Caller should release the hold by calling mdi_rele_path(). * * Return Values: * MDI_SUCCESS - Completed successfully * MDI_BUSY - Client device is busy failing over * MDI_NOPATH - Client device is online, but no valid path are * available to access this client device * MDI_FAILURE - Invalid client device or state * MDI_DEVI_ONLINING * - Client device (struct dev_info state) is in * onlining state. */ /*ARGSUSED*/ int mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip) { mdi_client_t *ct; mdi_pathinfo_t *pip; mdi_pathinfo_t *next; mdi_pathinfo_t *head; mdi_pathinfo_t *start; client_lb_t lbp; /* load balancing policy */ int sb = 1; /* standard behavior */ int preferred = 1; /* preferred path */ int cond, cont = 1; int retry = 0; if (flags != 0) { /* * disable default behavior */ sb = 0; } *ret_pip = NULL; ct = i_devi_get_client(cdip); if (ct == NULL) { /* mdi extensions are NULL, Nothing more to do */ return (MDI_FAILURE); } MDI_CLIENT_LOCK(ct); if (sb) { if (MDI_CLIENT_IS_FAILED(ct)) { /* * Client is not ready to accept any I/O requests. * Fail this request. */ MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " "client state offline ct = %p\n", ct)); MDI_CLIENT_UNLOCK(ct); return (MDI_FAILURE); } if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { /* * Check for Failover is in progress. If so tell the * caller that this device is busy. */ MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " "client failover in progress ct = %p\n", ct)); MDI_CLIENT_UNLOCK(ct); return (MDI_BUSY); } /* * Check to see whether the client device is attached. * If not so, let the vHCI driver manually select a path * (standby) and let the probe/attach process to continue. */ if ((MDI_CLIENT_IS_DETACHED(ct)) || i_ddi_node_state(cdip) < DS_READY) { MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining\n")); MDI_CLIENT_UNLOCK(ct); return (MDI_DEVI_ONLINING); } } /* * Cache in the client list head. If head of the list is NULL * return MDI_NOPATH */ head = ct->ct_path_head; if (head == NULL) { MDI_CLIENT_UNLOCK(ct); return (MDI_NOPATH); } /* * for non default behavior, bypass current * load balancing policy and always use LOAD_BALANCE_RR * except that the start point will be adjusted based * on the provided start_pip */ lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; switch (lbp) { case LOAD_BALANCE_NONE: /* * Load balancing is None or Alternate path mode * Start looking for a online mdi_pathinfo node starting from * last known selected path */ preferred = 1; pip = (mdi_pathinfo_t *)ct->ct_path_last; if (pip == NULL) { pip = head; } start = pip; do { MDI_PI_LOCK(pip); /* * No need to explicitly check if the path is disabled. * Since we are checking for state == ONLINE and the * same veriable is used for DISABLE/ENABLE information. */ if (MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_ONLINE && preferred == MDI_PI(pip)->pi_preferred) { /* * Return the path in hold state. Caller should * release the lock by calling mdi_rele_path() */ MDI_PI_HOLD(pip); MDI_PI_UNLOCK(pip); ct->ct_path_last = pip; *ret_pip = pip; MDI_CLIENT_UNLOCK(ct); return (MDI_SUCCESS); } /* * Path is busy. */ if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || MDI_PI_IS_TRANSIENT(pip)) retry = 1; /* * Keep looking for a next available online path */ next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; if (next == NULL) { next = head; } MDI_PI_UNLOCK(pip); pip = next; if (start == pip && preferred) { preferred = 0; } else if (start == pip && !preferred) { cont = 0; } } while (cont); break; case LOAD_BALANCE_LBA: /* * Make sure we are looking * for an online path. Otherwise, if it is for a STANDBY * path request, it will go through and fetch an ONLINE * path which is not desirable. */ if ((ct->ct_lb_args != NULL) && (ct->ct_lb_args->region_size) && bp && (sb || (flags == MDI_SELECT_ONLINE_PATH))) { if (i_mdi_lba_lb(ct, ret_pip, bp) == MDI_SUCCESS) { MDI_CLIENT_UNLOCK(ct); return (MDI_SUCCESS); } } /* FALLTHROUGH */ case LOAD_BALANCE_RR: /* * Load balancing is Round Robin. Start looking for a online * mdi_pathinfo node starting from last known selected path * as the start point. If override flags are specified, * process accordingly. * If the search is already in effect(start_pip not null), * then lets just use the same path preference to continue the * traversal. */ if (start_pip != NULL) { preferred = MDI_PI(start_pip)->pi_preferred; } else { preferred = 1; } start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; if (start == NULL) { pip = head; } else { pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; if (pip == NULL) { if (!sb) { if (preferred == 0) { /* * Looks like we have completed * the traversal as preferred * value is 0. Time to bail out. */ *ret_pip = NULL; MDI_CLIENT_UNLOCK(ct); return (MDI_NOPATH); } else { /* * Looks like we reached the * end of the list. Lets enable * traversal of non preferred * paths. */ preferred = 0; } } pip = head; } } start = pip; do { MDI_PI_LOCK(pip); if (sb) { cond = ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred == preferred) ? 1 : 0); } else { if (flags == MDI_SELECT_ONLINE_PATH) { cond = ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred == preferred) ? 1 : 0); } else if (flags == MDI_SELECT_STANDBY_PATH) { cond = ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_STANDBY && MDI_PI(pip)->pi_preferred == preferred) ? 1 : 0); } else if (flags == (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH)) { cond = (((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_ONLINE || (MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_STANDBY)) && MDI_PI(pip)->pi_preferred == preferred) ? 1 : 0); } else { cond = 0; } } /* * No need to explicitly check if the path is disabled. * Since we are checking for state == ONLINE and the * same veriable is used for DISABLE/ENABLE information. */ if (cond) { /* * Return the path in hold state. Caller should * release the lock by calling mdi_rele_path() */ MDI_PI_HOLD(pip); MDI_PI_UNLOCK(pip); if (sb) ct->ct_path_last = pip; *ret_pip = pip; MDI_CLIENT_UNLOCK(ct); return (MDI_SUCCESS); } /* * Path is busy. */ if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || MDI_PI_IS_TRANSIENT(pip)) retry = 1; /* * Keep looking for a next available online path */ do_again: next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; if (next == NULL) { if (!sb) { if (preferred == 1) { /* * Looks like we reached the * end of the list. Lets enable * traversal of non preferred * paths. */ preferred = 0; next = head; } else { /* * We have done both the passes * Preferred as well as for * Non-preferred. Bail out now. */ cont = 0; } } else { /* * Standard behavior case. */ next = head; } } MDI_PI_UNLOCK(pip); if (cont == 0) { break; } pip = next; if (!sb) { /* * We need to handle the selection of * non-preferred path in the following * case: * * +------+ +------+ +------+ +-----+ * | A : 1| - | B : 1| - | C : 0| - |NULL | * +------+ +------+ +------+ +-----+ * * If we start the search with B, we need to * skip beyond B to pick C which is non - * preferred in the second pass. The following * test, if true, will allow us to skip over * the 'start'(B in the example) to select * other non preferred elements. */ if ((start_pip != NULL) && (start_pip == pip) && (MDI_PI(start_pip)->pi_preferred != preferred)) { /* * try again after going past the start * pip */ MDI_PI_LOCK(pip); goto do_again; } } else { /* * Standard behavior case */ if (start == pip && preferred) { /* look for nonpreferred paths */ preferred = 0; } else if (start == pip && !preferred) { /* * Exit condition */ cont = 0; } } } while (cont); break; } MDI_CLIENT_UNLOCK(ct); if (retry == 1) { return (MDI_BUSY); } else { return (MDI_NOPATH); } } /* * For a client, return the next available path to any phci * * Note: * Caller should hold the branch's devinfo node to get a consistent * snap shot of the mdi_pathinfo nodes. * * Please note that even the list is stable the mdi_pathinfo * node state and properties are volatile. The caller should lock * and unlock the nodes by calling mdi_pi_lock() and * mdi_pi_unlock() functions to get a stable properties. * * If there is a need to use the nodes beyond the hold of the * devinfo node period (For ex. I/O), then mdi_pathinfo node * need to be held against unexpected removal by calling * mdi_hold_path() and should be released by calling * mdi_rele_path() on completion. */ mdi_pathinfo_t * mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) { mdi_client_t *ct; if (!MDI_CLIENT(ct_dip)) return (NULL); /* * Walk through client link */ ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; ASSERT(ct != NULL); if (pip == NULL) return ((mdi_pathinfo_t *)ct->ct_path_head); return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); } /* * For a phci, return the next available path to any client * Note: ditto mdi_get_next_phci_path() */ mdi_pathinfo_t * mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) { mdi_phci_t *ph; if (!MDI_PHCI(ph_dip)) return (NULL); /* * Walk through pHCI link */ ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; ASSERT(ph != NULL); if (pip == NULL) return ((mdi_pathinfo_t *)ph->ph_path_head); return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); } /* * mdi_get_nextpath(): * mdi_pathinfo node walker function. Get the next node from the * client or pHCI device list. * * XXX This is wrapper function for compatibility purposes only. * * It doesn't work under Multi-level MPxIO, where a dip * is both client and phci (which link should next_path follow?). * Once Leadville is modified to call mdi_get_next_phci/client_path, * this interface should be removed. */ void mdi_get_next_path(dev_info_t *dip, mdi_pathinfo_t *pip, mdi_pathinfo_t **ret_pip) { if (MDI_CLIENT(dip)) { *ret_pip = mdi_get_next_phci_path(dip, pip); } else if (MDI_PHCI(dip)) { *ret_pip = mdi_get_next_client_path(dip, pip); } else { *ret_pip = NULL; } } /* * mdi_hold_path(): * Hold the mdi_pathinfo node against unwanted unexpected free. * Return Values: * None */ void mdi_hold_path(mdi_pathinfo_t *pip) { if (pip) { MDI_PI_LOCK(pip); MDI_PI_HOLD(pip); MDI_PI_UNLOCK(pip); } } /* * mdi_rele_path(): * Release the mdi_pathinfo node which was selected * through mdi_select_path() mechanism or manually held by * calling mdi_hold_path(). * Return Values: * None */ void mdi_rele_path(mdi_pathinfo_t *pip) { if (pip) { MDI_PI_LOCK(pip); MDI_PI_RELE(pip); if (MDI_PI(pip)->pi_ref_cnt == 0) { cv_broadcast(&MDI_PI(pip)->pi_ref_cv); } MDI_PI_UNLOCK(pip); } } /* * mdi_pi_lock(): * Lock the mdi_pathinfo node. * Note: * The caller should release the lock by calling mdi_pi_unlock() */ void mdi_pi_lock(mdi_pathinfo_t *pip) { ASSERT(pip != NULL); if (pip) { MDI_PI_LOCK(pip); } } /* * mdi_pi_unlock(): * Unlock the mdi_pathinfo node. * Note: * The mdi_pathinfo node should have been locked with mdi_pi_lock() */ void mdi_pi_unlock(mdi_pathinfo_t *pip) { ASSERT(pip != NULL); if (pip) { MDI_PI_UNLOCK(pip); } } /* * mdi_pi_find(): * Search the list of mdi_pathinfo nodes attached to the * pHCI/Client device node whose path address matches "paddr". * Returns a pointer to the mdi_pathinfo node if a matching node is * found. * Return Values: * mdi_pathinfo node handle * NULL * Notes: * Caller need not hold any locks to call this function. */ mdi_pathinfo_t * mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) { mdi_phci_t *ph; mdi_vhci_t *vh; mdi_client_t *ct; mdi_pathinfo_t *pip = NULL; if ((pdip == NULL) || (paddr == NULL)) { return (NULL); } ph = i_devi_get_phci(pdip); if (ph == NULL) { /* * Invalid pHCI device, Nothing more to do. */ MDI_DEBUG(2, (CE_WARN, NULL, "!mdi_pi_find: invalid phci")); return (NULL); } vh = ph->ph_vhci; if (vh == NULL) { /* * Invalid vHCI device, Nothing more to do. */ MDI_DEBUG(2, (CE_WARN, NULL, "!mdi_pi_find: invalid phci")); return (NULL); } /* * Look for client device identified by caddr (guid) */ if (caddr == NULL) { /* * Find a mdi_pathinfo node under pHCI list for a matching * unit address. */ mutex_enter(&ph->ph_mutex); pip = (mdi_pathinfo_t *)ph->ph_path_head; while (pip != NULL) { if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { break; } pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; } mutex_exit(&ph->ph_mutex); return (pip); } /* * Find the client device corresponding to 'caddr' */ mutex_enter(&mdi_mutex); ct = i_mdi_client_find(vh, caddr); if (ct == NULL) { /* * Client not found, Obviously mdi_pathinfo node has not been * created yet. */ mutex_exit(&mdi_mutex); return (pip); } /* * Hold the client lock and look for a mdi_pathinfo node with matching * pHCI and paddr */ MDI_CLIENT_LOCK(ct); /* * Release the global mutex as it is no more needed. Note: We always * respect the locking order while acquiring. */ mutex_exit(&mdi_mutex); pip = (mdi_pathinfo_t *)ct->ct_path_head; while (pip != NULL) { /* * Compare the unit address */ if ((MDI_PI(pip)->pi_phci == ph) && strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { break; } pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; } MDI_CLIENT_UNLOCK(ct); return (pip); } /* * mdi_pi_alloc(): * Allocate and initialize a new instance of a mdi_pathinfo node. * The mdi_pathinfo node returned by this function identifies a * unique device path is capable of having properties attached * and passed to mdi_pi_online() to fully attach and online the * path and client device node. * The mdi_pathinfo node returned by this function must be * destroyed using mdi_pi_free() if the path is no longer * operational or if the caller fails to attach a client device * node when calling mdi_pi_online(). The framework will not free * the resources allocated. * This function can be called from both interrupt and kernel * contexts. DDI_NOSLEEP flag should be used while calling * from interrupt contexts. * Return Values: * MDI_SUCCESS * MDI_FAILURE * MDI_NOMEM */ /*ARGSUSED*/ int mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) { mdi_vhci_t *vh; mdi_phci_t *ph; mdi_client_t *ct; mdi_pathinfo_t *pip = NULL; dev_info_t *cdip; int rv = MDI_NOMEM; if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || ret_pip == NULL) { /* Nothing more to do */ return (MDI_FAILURE); } *ret_pip = NULL; ph = i_devi_get_phci(pdip); ASSERT(ph != NULL); if (ph == NULL) { /* Invalid pHCI device, return failure */ MDI_DEBUG(1, (CE_WARN, NULL, "!mdi_pi_alloc: invalid pHCI=%p", pdip)); return (MDI_FAILURE); } MDI_PHCI_LOCK(ph); vh = ph->ph_vhci; if (vh == NULL) { /* Invalid vHCI device, return failure */ MDI_DEBUG(1, (CE_WARN, NULL, "!mdi_pi_alloc: invalid pHCI=%p", pdip)); MDI_PHCI_UNLOCK(ph); return (MDI_FAILURE); } if (MDI_PHCI_IS_READY(ph) == 0) { /* * Do not allow new node creation when pHCI is in * offline/suspended states */ MDI_DEBUG(1, (CE_WARN, NULL, "mdi_pi_alloc: pHCI=%p is not ready", ph)); MDI_PHCI_UNLOCK(ph); return (MDI_BUSY); } MDI_PHCI_UNSTABLE(ph); MDI_PHCI_UNLOCK(ph); /* * Look for a client device with matching guid identified by caddr, * If not found create one */ mutex_enter(&mdi_mutex); ct = i_mdi_client_find(vh, caddr); if (ct == NULL) { ct = i_mdi_client_alloc(vh, cname, caddr, flags); if (ct == NULL) goto fail; } if (ct->ct_dip == NULL) { /* * Allocate a devinfo node */ ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, compatible, ncompatible, flags); if (ct->ct_dip == NULL) { (void) i_mdi_client_free(vh, ct); goto fail; } } cdip = ct->ct_dip; DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; DEVI(cdip)->devi_mdi_client = (caddr_t)ct; pip = (mdi_pathinfo_t *)ct->ct_path_head; while (pip != NULL) { /* * Compare the unit address */ if ((MDI_PI(pip)->pi_phci == ph) && strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { break; } pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; } if (pip == NULL) { /* * This is a new path for this client device. Allocate and * initialize a new pathinfo node */ pip = i_mdi_pi_alloc(ph, paddr, ct, flags); if (pip == NULL) { (void) i_mdi_client_free(vh, ct); goto fail; } } rv = MDI_SUCCESS; fail: /* * Release the global mutex. */ mutex_exit(&mdi_mutex); /* * Mark the pHCI as stable */ MDI_PHCI_LOCK(ph); MDI_PHCI_STABLE(ph); MDI_PHCI_UNLOCK(ph); *ret_pip = pip; return (rv); } /*ARGSUSED*/ int mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, int flags, mdi_pathinfo_t **ret_pip) { return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, flags, ret_pip)); } /* * i_mdi_pi_alloc(): * Allocate a mdi_pathinfo node and add to the pHCI path list * Return Values: * mdi_pathinfo */ /*ARGSUSED*/ static mdi_pathinfo_t * i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct, int flags) { mdi_pathinfo_t *pip = NULL; char *pi_addr = NULL; nvlist_t *pi_prop = NULL; int ct_circular; int ph_circular; pip = kmem_zalloc(sizeof (struct mdi_pathinfo), (flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP); if (pip == NULL) goto fail; mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | MDI_PATHINFO_STATE_TRANSIENT; if (MDI_PHCI_IS_USER_DISABLED(ph)) MDI_PI_SET_USER_DISABLE(pip); if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) MDI_PI_SET_DRV_DISABLE_TRANS(pip); if (MDI_PHCI_IS_DRV_DISABLED(ph)) MDI_PI_SET_DRV_DISABLE(pip); MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); MDI_PI(pip)->pi_client = ct; MDI_PI(pip)->pi_phci = ph; pi_addr = MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, (flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP); if (pi_addr == NULL) goto fail; (void) strcpy(MDI_PI(pip)->pi_addr, paddr); (void) nvlist_alloc(&pi_prop, NV_UNIQUE_NAME, (flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP); if (pi_prop == NULL) goto fail; MDI_PI(pip)->pi_prop = pi_prop; MDI_PI(pip)->pi_pprivate = NULL; MDI_PI(pip)->pi_cprivate = NULL; MDI_PI(pip)->pi_vprivate = NULL; MDI_PI(pip)->pi_client_link = NULL; MDI_PI(pip)->pi_phci_link = NULL; MDI_PI(pip)->pi_ref_cnt = 0; MDI_PI(pip)->pi_kstats = NULL; MDI_PI(pip)->pi_preferred = 1; cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); /* * Lock both dev_info nodes against changes in parallel. */ ndi_devi_enter(ct->ct_dip, &ct_circular); ndi_devi_enter(ph->ph_dip, &ph_circular); i_mdi_phci_add_path(ph, pip); i_mdi_client_add_path(ct, pip); ndi_devi_exit(ph->ph_dip, ph_circular); ndi_devi_exit(ct->ct_dip, ct_circular); return (pip); fail: if (pi_prop) (void) nvlist_free(pi_prop); if (pi_addr) kmem_free(pi_addr, strlen(paddr) + 1); kmem_free(pip, sizeof (struct mdi_pathinfo)); return (NULL); } /* * i_mdi_phci_add_path(): * Add a mdi_pathinfo node to pHCI list. * Notes: * Caller should per-pHCI mutex */ static void i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) { ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); if (ph->ph_path_head == NULL) { ph->ph_path_head = pip; } else { MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); } ph->ph_path_tail = pip; ph->ph_path_count++; } /* * i_mdi_client_add_path(): * Add mdi_pathinfo node to client list */ static void i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) { ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); if (ct->ct_path_head == NULL) { ct->ct_path_head = pip; } else { MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); } ct->ct_path_tail = pip; ct->ct_path_count++; } /* * mdi_pi_free(): * Free the mdi_pathinfo node and also client device node if this * is the last path to the device * Return Values: * MDI_SUCCESS * MDI_FAILURE * MDI_BUSY */ /*ARGSUSED*/ int mdi_pi_free(mdi_pathinfo_t *pip, int flags) { int rv = MDI_SUCCESS; mdi_vhci_t *vh; mdi_phci_t *ph; mdi_client_t *ct; int (*f)(); int client_held = 0; MDI_PI_LOCK(pip); ph = MDI_PI(pip)->pi_phci; ASSERT(ph != NULL); if (ph == NULL) { /* * Invalid pHCI device, return failure */ MDI_DEBUG(1, (CE_WARN, NULL, "!mdi_pi_free: invalid pHCI")); MDI_PI_UNLOCK(pip); return (MDI_FAILURE); } vh = ph->ph_vhci; ASSERT(vh != NULL); if (vh == NULL) { /* Invalid pHCI device, return failure */ MDI_DEBUG(1, (CE_WARN, NULL, "!mdi_pi_free: invalid vHCI")); MDI_PI_UNLOCK(pip); return (MDI_FAILURE); } ct = MDI_PI(pip)->pi_client; ASSERT(ct != NULL); if (ct == NULL) { /* * Invalid Client device, return failure */ MDI_DEBUG(1, (CE_WARN, NULL, "!mdi_pi_free: invalid client")); MDI_PI_UNLOCK(pip); return (MDI_FAILURE); } /* * Check to see for busy condition. A mdi_pathinfo can only be freed * if the node state is either offline or init and the reference count * is zero. */ if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || MDI_PI_IS_INITING(pip))) { /* * Node is busy */ MDI_DEBUG(1, (CE_WARN, NULL, "!mdi_pi_free: pathinfo node is busy pip=%p", pip)); MDI_PI_UNLOCK(pip); return (MDI_BUSY); } while (MDI_PI(pip)->pi_ref_cnt != 0) { /* * Give a chance for pending I/Os to complete. */ MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, "!i_mdi_pi_free: " "%d cmds still pending on path: %p\n", MDI_PI(pip)->pi_ref_cnt, pip)); if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, &MDI_PI(pip)->pi_mutex, ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { /* * The timeout time reached without ref_cnt being zero * being signaled. */ MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, "!i_mdi_pi_free: " "Timeout reached on path %p without the cond\n", pip)); MDI_DEBUG(1, (CE_NOTE, ct->ct_vhci->vh_dip, "!i_mdi_pi_free: " "%d cmds still pending on path: %p\n", MDI_PI(pip)->pi_ref_cnt, pip)); MDI_PI_UNLOCK(pip); return (MDI_BUSY); } } if (MDI_PI(pip)->pi_pm_held) { client_held = 1; } MDI_PI_UNLOCK(pip); MDI_CLIENT_LOCK(ct); /* Prevent further failovers till mdi_mutex is held */ MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); /* * Wait till failover is complete before removing this node. */ while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); MDI_CLIENT_UNLOCK(ct); mutex_enter(&mdi_mutex); MDI_CLIENT_LOCK(ct); MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); if (!MDI_PI_IS_INITING(pip)) { f = vh->vh_ops->vo_pi_uninit; if (f != NULL) { rv = (*f)(vh->vh_dip, pip, 0); } } /* * If vo_pi_uninit() completed successfully. */ if (rv == MDI_SUCCESS) { if (client_held) { MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " "i_mdi_pm_rele_client\n")); i_mdi_pm_rele_client(ct, 1); } i_mdi_pi_free(ph, pip, ct); if (ct->ct_path_count == 0) { /* * Client lost its last path. * Clean up the client device */ MDI_CLIENT_UNLOCK(ct); (void) i_mdi_client_free(ct->ct_vhci, ct); mutex_exit(&mdi_mutex); return (rv); } } MDI_CLIENT_UNLOCK(ct); mutex_exit(&mdi_mutex); return (rv); } /* * i_mdi_pi_free(): * Free the mdi_pathinfo node */ static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) { int ct_circular; int ph_circular; /* * remove any per-path kstats */ i_mdi_pi_kstat_destroy(pip); ndi_devi_enter(ct->ct_dip, &ct_circular); ndi_devi_enter(ph->ph_dip, &ph_circular); i_mdi_client_remove_path(ct, pip); i_mdi_phci_remove_path(ph, pip); ndi_devi_exit(ph->ph_dip, ph_circular); ndi_devi_exit(ct->ct_dip, ct_circular); mutex_destroy(&MDI_PI(pip)->pi_mutex); cv_destroy(&MDI_PI(pip)->pi_state_cv); cv_destroy(&MDI_PI(pip)->pi_ref_cv); if (MDI_PI(pip)->pi_addr) { kmem_free(MDI_PI(pip)->pi_addr, strlen(MDI_PI(pip)->pi_addr) + 1); MDI_PI(pip)->pi_addr = NULL; } if (MDI_PI(pip)->pi_prop) { (void) nvlist_free(MDI_PI(pip)->pi_prop); MDI_PI(pip)->pi_prop = NULL; } kmem_free(pip, sizeof (struct mdi_pathinfo)); } /* * i_mdi_phci_remove_path(): * Remove a mdi_pathinfo node from pHCI list. * Notes: * Caller should hold per-pHCI mutex */ static void i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) { mdi_pathinfo_t *prev = NULL; mdi_pathinfo_t *path = NULL; ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); path = ph->ph_path_head; while (path != NULL) { if (path == pip) { break; } prev = path; path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; } if (path) { ph->ph_path_count--; if (prev) { MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; } else { ph->ph_path_head = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; } if (ph->ph_path_tail == path) { ph->ph_path_tail = prev; } } /* * Clear the pHCI link */ MDI_PI(pip)->pi_phci_link = NULL; MDI_PI(pip)->pi_phci = NULL; } /* * i_mdi_client_remove_path(): * Remove a mdi_pathinfo node from client path list. */ static void i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) { mdi_pathinfo_t *prev = NULL; mdi_pathinfo_t *path; ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); path = ct->ct_path_head; while (path != NULL) { if (path == pip) { break; } prev = path; path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; } if (path) { ct->ct_path_count--; if (prev) { MDI_PI(prev)->pi_client_link = MDI_PI(path)->pi_client_link; } else { ct->ct_path_head = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; } if (ct->ct_path_tail == path) { ct->ct_path_tail = prev; } if (ct->ct_path_last == path) { ct->ct_path_last = ct->ct_path_head; } } MDI_PI(pip)->pi_client_link = NULL; MDI_PI(pip)->pi_client = NULL; } /* * i_mdi_pi_state_change(): * online a mdi_pathinfo node * * Return Values: * MDI_SUCCESS * MDI_FAILURE */ /*ARGSUSED*/ static int i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) { int rv = MDI_SUCCESS; mdi_vhci_t *vh; mdi_phci_t *ph; mdi_client_t *ct; int (*f)(); dev_info_t *cdip; MDI_PI_LOCK(pip); ph = MDI_PI(pip)->pi_phci; ASSERT(ph); if (ph == NULL) { /* * Invalid pHCI device, fail the request */ MDI_PI_UNLOCK(pip); MDI_DEBUG(1, (CE_WARN, NULL, "!mdi_pi_state_change: invalid phci")); return (MDI_FAILURE); } vh = ph->ph_vhci; ASSERT(vh); if (vh == NULL) { /* * Invalid vHCI device, fail the request */ MDI_PI_UNLOCK(pip); MDI_DEBUG(1, (CE_WARN, NULL, "!mdi_pi_state_change: invalid vhci")); return (MDI_FAILURE); } ct = MDI_PI(pip)->pi_client; ASSERT(ct != NULL); if (ct == NULL) { /* * Invalid client device, fail the request */ MDI_PI_UNLOCK(pip); MDI_DEBUG(1, (CE_WARN, NULL, "!mdi_pi_state_change: invalid client")); return (MDI_FAILURE); } /* * If this path has not been initialized yet, Callback vHCI driver's * pathinfo node initialize entry point */ if (MDI_PI_IS_INITING(pip)) { MDI_PI_UNLOCK(pip); f = vh->vh_ops->vo_pi_init; if (f != NULL) { rv = (*f)(vh->vh_dip, pip, 0); if (rv != MDI_SUCCESS) { MDI_DEBUG(1, (CE_WARN, vh->vh_dip, "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", vh, pip)); return (MDI_FAILURE); } } MDI_PI_LOCK(pip); MDI_PI_CLEAR_TRANSIENT(pip); } /* * Do not allow state transition when pHCI is in offline/suspended * states */ i_mdi_phci_lock(ph, pip); if (MDI_PHCI_IS_READY(ph) == 0) { MDI_DEBUG(1, (CE_WARN, NULL, "!mdi_pi_state_change: pHCI not ready, pHCI=%p", ph)); MDI_PI_UNLOCK(pip); i_mdi_phci_unlock(ph); return (MDI_BUSY); } MDI_PHCI_UNSTABLE(ph); i_mdi_phci_unlock(ph); /* * Check if mdi_pathinfo state is in transient state. * If yes, offlining is in progress and wait till transient state is * cleared. */ if (MDI_PI_IS_TRANSIENT(pip)) { while (MDI_PI_IS_TRANSIENT(pip)) { cv_wait(&MDI_PI(pip)->pi_state_cv, &MDI_PI(pip)->pi_mutex); } } /* * Grab the client lock in reverse order sequence and release the * mdi_pathinfo mutex. */ i_mdi_client_lock(ct, pip); MDI_PI_UNLOCK(pip); /* * Wait till failover state is cleared */ while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); /* * Mark the mdi_pathinfo node state as transient */ MDI_PI_LOCK(pip); switch (state) { case MDI_PATHINFO_STATE_ONLINE: MDI_PI_SET_ONLINING(pip); break; case MDI_PATHINFO_STATE_STANDBY: MDI_PI_SET_STANDBYING(pip); break; case MDI_PATHINFO_STATE_FAULT: /* * Mark the pathinfo state as FAULTED */ MDI_PI_SET_FAULTING(pip); MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); break; case MDI_PATHINFO_STATE_OFFLINE: /* * ndi_devi_offline() cannot hold pip or ct locks. */ MDI_PI_UNLOCK(pip); /* * Do not offline if path will become last path and path * is busy for user initiated events. */ cdip = ct->ct_dip; if ((flag & NDI_DEVI_REMOVE) && (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { i_mdi_client_unlock(ct); rv = ndi_devi_offline(cdip, 0); if (rv != NDI_SUCCESS) { /* * Convert to MDI error code */ switch (rv) { case NDI_BUSY: rv = MDI_BUSY; break; default: rv = MDI_FAILURE; break; } goto state_change_exit; } else { i_mdi_client_lock(ct, NULL); } } /* * Mark the mdi_pathinfo node state as transient */ MDI_PI_LOCK(pip); MDI_PI_SET_OFFLINING(pip); break; } MDI_PI_UNLOCK(pip); MDI_CLIENT_UNSTABLE(ct); i_mdi_client_unlock(ct); f = vh->vh_ops->vo_pi_state_change; if (f != NULL) { rv = (*f)(vh->vh_dip, pip, state, 0, flag); if (rv == MDI_NOT_SUPPORTED) { MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); } if (rv != MDI_SUCCESS) { MDI_DEBUG(2, (CE_WARN, vh->vh_dip, "!vo_pi_state_change: failed rv = %x", rv)); } } MDI_CLIENT_LOCK(ct); MDI_PI_LOCK(pip); if (MDI_PI_IS_TRANSIENT(pip)) { if (rv == MDI_SUCCESS) { MDI_PI_CLEAR_TRANSIENT(pip); } else { MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); } } /* * Wake anyone waiting for this mdi_pathinfo node */ cv_broadcast(&MDI_PI(pip)->pi_state_cv); MDI_PI_UNLOCK(pip); /* * Mark the client device as stable */ MDI_CLIENT_STABLE(ct); if (rv == MDI_SUCCESS) { if (ct->ct_unstable == 0) { cdip = ct->ct_dip; /* * Onlining the mdi_pathinfo node will impact the * client state Update the client and dev_info node * state accordingly */ rv = NDI_SUCCESS; i_mdi_client_update_state(ct); switch (MDI_CLIENT_STATE(ct)) { case MDI_CLIENT_STATE_OPTIMAL: case MDI_CLIENT_STATE_DEGRADED: if (cdip && (i_ddi_node_state(cdip) < DS_READY) && ((state == MDI_PATHINFO_STATE_ONLINE) || (state == MDI_PATHINFO_STATE_STANDBY))) { i_mdi_client_unlock(ct); /* * Must do ndi_devi_online() through * hotplug thread for deferred * attach mechanism to work */ rv = ndi_devi_online(cdip, 0); i_mdi_client_lock(ct, NULL); if ((rv != NDI_SUCCESS) && (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { /* * ndi_devi_online failed. * Reset client flags to * offline. */ MDI_DEBUG(1, (CE_WARN, cdip, "!ndi_devi_online: failed " " Error: %x", rv)); MDI_CLIENT_SET_OFFLINE(ct); } if (rv != NDI_SUCCESS) { /* Reset the path state */ MDI_PI_LOCK(pip); MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); MDI_PI_UNLOCK(pip); } } break; case MDI_CLIENT_STATE_FAILED: /* * This is the last path case for * non-user initiated events. */ if (((flag & NDI_DEVI_REMOVE) == 0) && cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED)) { i_mdi_client_unlock(ct); rv = ndi_devi_offline(cdip, 0); i_mdi_client_lock(ct, NULL); if (rv != NDI_SUCCESS) { /* * ndi_devi_offline failed. * Reset client flags to * online as the path could not * be offlined. */ MDI_DEBUG(1, (CE_WARN, cdip, "!ndi_devi_offline: failed " " Error: %x", rv)); MDI_CLIENT_SET_ONLINE(ct); } } break; } /* * Convert to MDI error code */ switch (rv) { case NDI_SUCCESS: MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); i_mdi_report_path_state(ct, pip); rv = MDI_SUCCESS; break; case NDI_BUSY: rv = MDI_BUSY; break; default: rv = MDI_FAILURE; break; } } } MDI_CLIENT_UNLOCK(ct); state_change_exit: /* * Mark the pHCI as stable again. */ MDI_PHCI_LOCK(ph); MDI_PHCI_STABLE(ph); MDI_PHCI_UNLOCK(ph); return (rv); } /* * mdi_pi_online(): * Place the path_info node in the online state. The path is * now available to be selected by mdi_select_path() for * transporting I/O requests to client devices. * Return Values: * MDI_SUCCESS * MDI_FAILURE */ int mdi_pi_online(mdi_pathinfo_t *pip, int flags) { mdi_client_t *ct = MDI_PI(pip)->pi_client; dev_info_t *cdip; int client_held = 0; int rv; ASSERT(ct != NULL); rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); if (rv != MDI_SUCCESS) return (rv); MDI_PI_LOCK(pip); if (MDI_PI(pip)->pi_pm_held == 0) { MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " "i_mdi_pm_hold_pip\n")); i_mdi_pm_hold_pip(pip); client_held = 1; } MDI_PI_UNLOCK(pip); if (client_held) { MDI_CLIENT_LOCK(ct); if (ct->ct_power_cnt == 0) { rv = i_mdi_power_all_phci(ct); } MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " "i_mdi_pm_hold_client\n")); i_mdi_pm_hold_client(ct, 1); MDI_CLIENT_UNLOCK(ct); } /* * Create the per-path (pathinfo) IO and error kstats which * are reported via iostat(1m). * * Defer creating the per-path kstats if device is not yet * attached; the names of the kstats are constructed in part * using the devices instance number which is assigned during * process of attaching the client device. * * The framework post_attach handler, mdi_post_attach(), is * is responsible for initializing the client's pathinfo list * once successfully attached. */ cdip = ct->ct_dip; ASSERT(cdip); if (cdip == NULL || (i_ddi_node_state(cdip) < DS_ATTACHED)) return (rv); MDI_CLIENT_LOCK(ct); rv = i_mdi_pi_kstat_create(pip); MDI_CLIENT_UNLOCK(ct); return (rv); } /* * mdi_pi_standby(): * Place the mdi_pathinfo node in standby state * * Return Values: * MDI_SUCCESS * MDI_FAILURE */ int mdi_pi_standby(mdi_pathinfo_t *pip, int flags) { return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); } /* * mdi_pi_fault(): * Place the mdi_pathinfo node in fault'ed state * Return Values: * MDI_SUCCESS * MDI_FAILURE */ int mdi_pi_fault(mdi_pathinfo_t *pip, int flags) { return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); } /* * mdi_pi_offline(): * Offline a mdi_pathinfo node. * Return Values: * MDI_SUCCESS * MDI_FAILURE */ int mdi_pi_offline(mdi_pathinfo_t *pip, int flags) { int ret, client_held = 0; mdi_client_t *ct; ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); if (ret == MDI_SUCCESS) { MDI_PI_LOCK(pip); if (MDI_PI(pip)->pi_pm_held) { client_held = 1; } MDI_PI_UNLOCK(pip); if (client_held) { ct = MDI_PI(pip)->pi_client; MDI_CLIENT_LOCK(ct); MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_offline i_mdi_pm_rele_client\n")); i_mdi_pm_rele_client(ct, 1); MDI_CLIENT_UNLOCK(ct); } } return (ret); } /* * i_mdi_pi_offline(): * Offline a mdi_pathinfo node and call the vHCI driver's callback */ static int i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) { dev_info_t *vdip = NULL; mdi_vhci_t *vh = NULL; mdi_client_t *ct = NULL; int (*f)(); int rv; MDI_PI_LOCK(pip); ct = MDI_PI(pip)->pi_client; ASSERT(ct != NULL); while (MDI_PI(pip)->pi_ref_cnt != 0) { /* * Give a chance for pending I/Os to complete. */ MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " "%d cmds still pending on path: %p\n", MDI_PI(pip)->pi_ref_cnt, pip)); if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, &MDI_PI(pip)->pi_mutex, ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { /* * The timeout time reached without ref_cnt being zero * being signaled. */ MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " "Timeout reached on path %p without the cond\n", pip)); MDI_DEBUG(1, (CE_NOTE, vdip, "!i_mdi_pi_offline: " "%d cmds still pending on path: %p\n", MDI_PI(pip)->pi_ref_cnt, pip)); } } vh = ct->ct_vhci; vdip = vh->vh_dip; /* * Notify vHCI that has registered this event */ ASSERT(vh->vh_ops); f = vh->vh_ops->vo_pi_state_change; if (f != NULL) { MDI_PI_UNLOCK(pip); if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, flags)) != MDI_SUCCESS) { MDI_DEBUG(1, (CE_WARN, vdip, "!vo_path_offline failed " "vdip 0x%x, pip 0x%x", vdip, pip)); } MDI_PI_LOCK(pip); } /* * Set the mdi_pathinfo node state and clear the transient condition */ MDI_PI_SET_OFFLINE(pip); cv_broadcast(&MDI_PI(pip)->pi_state_cv); MDI_PI_UNLOCK(pip); MDI_CLIENT_LOCK(ct); if (rv == MDI_SUCCESS) { if (ct->ct_unstable == 0) { dev_info_t *cdip = ct->ct_dip; /* * Onlining the mdi_pathinfo node will impact the * client state Update the client and dev_info node * state accordingly */ i_mdi_client_update_state(ct); rv = NDI_SUCCESS; if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED)) { MDI_CLIENT_UNLOCK(ct); rv = ndi_devi_offline(cdip, 0); MDI_CLIENT_LOCK(ct); if (rv != NDI_SUCCESS) { /* * ndi_devi_offline failed. * Reset client flags to * online. */ MDI_DEBUG(4, (CE_WARN, cdip, "!ndi_devi_offline: failed " " Error: %x", rv)); MDI_CLIENT_SET_ONLINE(ct); } } } /* * Convert to MDI error code */ switch (rv) { case NDI_SUCCESS: rv = MDI_SUCCESS; break; case NDI_BUSY: rv = MDI_BUSY; break; default: rv = MDI_FAILURE; break; } } MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); i_mdi_report_path_state(ct, pip); } MDI_CLIENT_UNLOCK(ct); /* * Change in the mdi_pathinfo node state will impact the client state */ MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", ct, pip)); return (rv); } /* * mdi_pi_get_addr(): * Get the unit address associated with a mdi_pathinfo node * * Return Values: * char * */ char * mdi_pi_get_addr(mdi_pathinfo_t *pip) { if (pip == NULL) return (NULL); return (MDI_PI(pip)->pi_addr); } /* * mdi_pi_get_client(): * Get the client devinfo associated with a mdi_pathinfo node * * Return Values: * Handle to client device dev_info node */ dev_info_t * mdi_pi_get_client(mdi_pathinfo_t *pip) { dev_info_t *dip = NULL; if (pip) { dip = MDI_PI(pip)->pi_client->ct_dip; } return (dip); } /* * mdi_pi_get_phci(): * Get the pHCI devinfo associated with the mdi_pathinfo node * Return Values: * Handle to dev_info node */ dev_info_t * mdi_pi_get_phci(mdi_pathinfo_t *pip) { dev_info_t *dip = NULL; if (pip) { dip = MDI_PI(pip)->pi_phci->ph_dip; } return (dip); } /* * mdi_pi_get_client_private(): * Get the client private information associated with the * mdi_pathinfo node */ void * mdi_pi_get_client_private(mdi_pathinfo_t *pip) { void *cprivate = NULL; if (pip) { cprivate = MDI_PI(pip)->pi_cprivate; } return (cprivate); } /* * mdi_pi_set_client_private(): * Set the client private information in the mdi_pathinfo node */ void mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) { if (pip) { MDI_PI(pip)->pi_cprivate = priv; } } /* * mdi_pi_get_phci_private(): * Get the pHCI private information associated with the * mdi_pathinfo node */ caddr_t mdi_pi_get_phci_private(mdi_pathinfo_t *pip) { caddr_t pprivate = NULL; if (pip) { pprivate = MDI_PI(pip)->pi_pprivate; } return (pprivate); } /* * mdi_pi_set_phci_private(): * Set the pHCI private information in the mdi_pathinfo node */ void mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) { if (pip) { MDI_PI(pip)->pi_pprivate = priv; } } /* * mdi_pi_get_state(): * Get the mdi_pathinfo node state. Transient states are internal * and not provided to the users */ mdi_pathinfo_state_t mdi_pi_get_state(mdi_pathinfo_t *pip) { mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; if (pip) { if (MDI_PI_IS_TRANSIENT(pip)) { /* * mdi_pathinfo is in state transition. Return the * last good state. */ state = MDI_PI_OLD_STATE(pip); } else { state = MDI_PI_STATE(pip); } } return (state); } /* * Note that the following function needs to be the new interface for * mdi_pi_get_state when mpxio gets integrated to ON. */ int mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, uint32_t *ext_state) { *state = MDI_PATHINFO_STATE_INIT; if (pip) { if (MDI_PI_IS_TRANSIENT(pip)) { /* * mdi_pathinfo is in state transition. Return the * last good state. */ *state = MDI_PI_OLD_STATE(pip); *ext_state = MDI_PI_OLD_EXT_STATE(pip); } else { *state = MDI_PI_STATE(pip); *ext_state = MDI_PI_EXT_STATE(pip); } } return (MDI_SUCCESS); } /* * mdi_pi_get_preferred: * Get the preferred path flag */ int mdi_pi_get_preferred(mdi_pathinfo_t *pip) { if (pip) { return (MDI_PI(pip)->pi_preferred); } return (0); } /* * mdi_pi_set_preferred: * Set the preferred path flag */ void mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) { if (pip) { MDI_PI(pip)->pi_preferred = preferred; } } /* * mdi_pi_set_state(): * Set the mdi_pathinfo node state */ void mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) { uint32_t ext_state; if (pip) { ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; MDI_PI(pip)->pi_state = state; MDI_PI(pip)->pi_state |= ext_state; } } /* * Property functions: */ int i_map_nvlist_error_to_mdi(int val) { int rv; switch (val) { case 0: rv = DDI_PROP_SUCCESS; break; case EINVAL: case ENOTSUP: rv = DDI_PROP_INVAL_ARG; break; case ENOMEM: rv = DDI_PROP_NO_MEMORY; break; default: rv = DDI_PROP_NOT_FOUND; break; } return (rv); } /* * mdi_pi_get_next_prop(): * Property walk function. The caller should hold mdi_pi_lock() * and release by calling mdi_pi_unlock() at the end of walk to * get a consistent value. */ nvpair_t * mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) { if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { return (NULL); } ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); } /* * mdi_prop_remove(): * Remove the named property from the named list. */ int mdi_prop_remove(mdi_pathinfo_t *pip, char *name) { if (pip == NULL) { return (DDI_PROP_NOT_FOUND); } ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); MDI_PI_LOCK(pip); if (MDI_PI(pip)->pi_prop == NULL) { MDI_PI_UNLOCK(pip); return (DDI_PROP_NOT_FOUND); } if (name) { (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); } else { char nvp_name[MAXNAMELEN]; nvpair_t *nvp; nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); while (nvp) { nvpair_t *next; next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); (void) snprintf(nvp_name, MAXNAMELEN, "%s", nvpair_name(nvp)); (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, nvp_name); nvp = next; } } MDI_PI_UNLOCK(pip); return (DDI_PROP_SUCCESS); } /* * mdi_prop_size(): * Get buffer size needed to pack the property data. * Caller should hold the mdi_pathinfo_t lock to get a consistent * buffer size. */ int mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) { int rv; size_t bufsize; *buflenp = 0; if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { return (DDI_PROP_NOT_FOUND); } ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); rv = nvlist_size(MDI_PI(pip)->pi_prop, &bufsize, NV_ENCODE_NATIVE); *buflenp = bufsize; return (i_map_nvlist_error_to_mdi(rv)); } /* * mdi_prop_pack(): * pack the property list. The caller should hold the * mdi_pathinfo_t node to get a consistent data */ int mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) { int rv; size_t bufsize; if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { return (DDI_PROP_NOT_FOUND); } ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); bufsize = buflen; rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, NV_ENCODE_NATIVE, KM_SLEEP); return (i_map_nvlist_error_to_mdi(rv)); } /* * mdi_prop_update_byte(): * Create/Update a byte property */ int mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) { int rv; if (pip == NULL) { return (DDI_PROP_INVAL_ARG); } ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); MDI_PI_LOCK(pip); if (MDI_PI(pip)->pi_prop == NULL) { MDI_PI_UNLOCK(pip); return (DDI_PROP_NOT_FOUND); } rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); MDI_PI_UNLOCK(pip); return (i_map_nvlist_error_to_mdi(rv)); } /* * mdi_prop_update_byte_array(): * Create/Update a byte array property */ int mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, uint_t nelements) { int rv; if (pip == NULL) { return (DDI_PROP_INVAL_ARG); } ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); MDI_PI_LOCK(pip); if (MDI_PI(pip)->pi_prop == NULL) { MDI_PI_UNLOCK(pip); return (DDI_PROP_NOT_FOUND); } rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); MDI_PI_UNLOCK(pip); return (i_map_nvlist_error_to_mdi(rv)); } /* * mdi_prop_update_int(): * Create/Update a 32 bit integer property */ int mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) { int rv; if (pip == NULL) { return (DDI_PROP_INVAL_ARG); } ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); MDI_PI_LOCK(pip); if (MDI_PI(pip)->pi_prop == NULL) { MDI_PI_UNLOCK(pip); return (DDI_PROP_NOT_FOUND); } rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); MDI_PI_UNLOCK(pip); return (i_map_nvlist_error_to_mdi(rv)); } /* * mdi_prop_update_int64(): * Create/Update a 64 bit integer property */ int mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) { int rv; if (pip == NULL) { return (DDI_PROP_INVAL_ARG); } ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); MDI_PI_LOCK(pip); if (MDI_PI(pip)->pi_prop == NULL) { MDI_PI_UNLOCK(pip); return (DDI_PROP_NOT_FOUND); } rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); MDI_PI_UNLOCK(pip); return (i_map_nvlist_error_to_mdi(rv)); } /* * mdi_prop_update_int_array(): * Create/Update a int array property */ int mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, uint_t nelements) { int rv; if (pip == NULL) { return (DDI_PROP_INVAL_ARG); } ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); MDI_PI_LOCK(pip); if (MDI_PI(pip)->pi_prop == NULL) { MDI_PI_UNLOCK(pip); return (DDI_PROP_NOT_FOUND); } rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, nelements); MDI_PI_UNLOCK(pip); return (i_map_nvlist_error_to_mdi(rv)); } /* * mdi_prop_update_string(): * Create/Update a string property */ int mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) { int rv; if (pip == NULL) { return (DDI_PROP_INVAL_ARG); } ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); MDI_PI_LOCK(pip); if (MDI_PI(pip)->pi_prop == NULL) { MDI_PI_UNLOCK(pip); return (DDI_PROP_NOT_FOUND); } rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); MDI_PI_UNLOCK(pip); return (i_map_nvlist_error_to_mdi(rv)); } /* * mdi_prop_update_string_array(): * Create/Update a string array property */ int mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, uint_t nelements) { int rv; if (pip == NULL) { return (DDI_PROP_INVAL_ARG); } ASSERT(!MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); MDI_PI_LOCK(pip); if (MDI_PI(pip)->pi_prop == NULL) { MDI_PI_UNLOCK(pip); return (DDI_PROP_NOT_FOUND); } rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, nelements); MDI_PI_UNLOCK(pip); return (i_map_nvlist_error_to_mdi(rv)); } /* * mdi_prop_lookup_byte(): * Look for byte property identified by name. The data returned * is the actual property and valid as long as mdi_pathinfo_t node * is alive. */ int mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) { int rv; if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { return (DDI_PROP_NOT_FOUND); } rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); return (i_map_nvlist_error_to_mdi(rv)); } /* * mdi_prop_lookup_byte_array(): * Look for byte array property identified by name. The data * returned is the actual property and valid as long as * mdi_pathinfo_t node is alive. */ int mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, uint_t *nelements) { int rv; if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { return (DDI_PROP_NOT_FOUND); } rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); return (i_map_nvlist_error_to_mdi(rv)); } /* * mdi_prop_lookup_int(): * Look for int property identified by name. The data returned * is the actual property and valid as long as mdi_pathinfo_t * node is alive. */ int mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) { int rv; if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { return (DDI_PROP_NOT_FOUND); } rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); return (i_map_nvlist_error_to_mdi(rv)); } /* * mdi_prop_lookup_int64(): * Look for int64 property identified by name. The data returned * is the actual property and valid as long as mdi_pathinfo_t node * is alive. */ int mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) { int rv; if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { return (DDI_PROP_NOT_FOUND); } rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); return (i_map_nvlist_error_to_mdi(rv)); } /* * mdi_prop_lookup_int_array(): * Look for int array property identified by name. The data * returned is the actual property and valid as long as * mdi_pathinfo_t node is alive. */ int mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, uint_t *nelements) { int rv; if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { return (DDI_PROP_NOT_FOUND); } rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t **)data, nelements); return (i_map_nvlist_error_to_mdi(rv)); } /* * mdi_prop_lookup_string(): * Look for string property identified by name. The data * returned is the actual property and valid as long as * mdi_pathinfo_t node is alive. */ int mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) { int rv; if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { return (DDI_PROP_NOT_FOUND); } rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); return (i_map_nvlist_error_to_mdi(rv)); } /* * mdi_prop_lookup_string_array(): * Look for string array property identified by name. The data * returned is the actual property and valid as long as * mdi_pathinfo_t node is alive. */ int mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, uint_t *nelements) { int rv; if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { return (DDI_PROP_NOT_FOUND); } rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, nelements); return (i_map_nvlist_error_to_mdi(rv)); } /* * mdi_prop_free(): * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() * functions return the pointer to actual property data and not a * copy of it. So the data returned is valid as long as * mdi_pathinfo_t node is valid. */ /*ARGSUSED*/ int mdi_prop_free(void *data) { return (DDI_PROP_SUCCESS); } /*ARGSUSED*/ static void i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) { char *phci_path, *ct_path; char *ct_status; char *status; dev_info_t *dip = ct->ct_dip; char lb_buf[64]; ASSERT(MUTEX_HELD(&ct->ct_mutex)); if ((dip == NULL) || (ddi_get_instance(dip) == -1) || (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { return; } if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { ct_status = "optimal"; } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { ct_status = "degraded"; } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { ct_status = "failed"; } else { ct_status = "unknown"; } if (MDI_PI_IS_OFFLINE(pip)) { status = "offline"; } else if (MDI_PI_IS_ONLINE(pip)) { status = "online"; } else if (MDI_PI_IS_STANDBY(pip)) { status = "standby"; } else if (MDI_PI_IS_FAULT(pip)) { status = "faulted"; } else { status = "unknown"; } if (ct->ct_lb == LOAD_BALANCE_LBA) { (void) snprintf(lb_buf, sizeof (lb_buf), "%s, region-size: %d", mdi_load_balance_lba, ct->ct_lb_args->region_size); } else if (ct->ct_lb == LOAD_BALANCE_NONE) { (void) snprintf(lb_buf, sizeof (lb_buf), "%s", mdi_load_balance_none); } else { (void) snprintf(lb_buf, sizeof (lb_buf), "%s", mdi_load_balance_rr); } if (dip) { ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " "path %s (%s%d) to target address: %s is %s" " Load balancing: %s\n", ddi_pathname(dip, ct_path), ddi_driver_name(dip), ddi_get_instance(dip), ct_status, ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), MDI_PI(pip)->pi_addr, status, lb_buf); kmem_free(phci_path, MAXPATHLEN); kmem_free(ct_path, MAXPATHLEN); MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); } } #ifdef DEBUG /* * i_mdi_log(): * Utility function for error message management * */ /*VARARGS3*/ static void i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) { char buf[MAXNAMELEN]; char name[MAXNAMELEN]; va_list ap; int log_only = 0; int boot_only = 0; int console_only = 0; if (dip) { if (level == CE_PANIC || level == CE_WARN || level == CE_NOTE) { (void) snprintf(name, MAXNAMELEN, "%s%d:\n", ddi_node_name(dip), ddi_get_instance(dip)); } else { (void) snprintf(name, MAXNAMELEN, "%s%d:", ddi_node_name(dip), ddi_get_instance(dip)); } } else { name[0] = '\0'; } va_start(ap, fmt); (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); va_end(ap); switch (buf[0]) { case '!': log_only = 1; break; case '?': boot_only = 1; break; case '^': console_only = 1; break; } switch (level) { case CE_NOTE: level = CE_CONT; /* FALLTHROUGH */ case CE_CONT: case CE_WARN: case CE_PANIC: if (boot_only) { cmn_err(level, "?%s\t%s", name, &buf[1]); } else if (console_only) { cmn_err(level, "^%s\t%s", name, &buf[1]); } else if (log_only) { cmn_err(level, "!%s\t%s", name, &buf[1]); } else { cmn_err(level, "%s\t%s", name, buf); } break; default: cmn_err(level, "%s\t%s", name, buf); break; } } #endif /* DEBUG */ void i_mdi_client_online(dev_info_t *ct_dip) { mdi_client_t *ct; /* * Client online notification. Mark client state as online * restore our binding with dev_info node */ ct = i_devi_get_client(ct_dip); ASSERT(ct != NULL); MDI_CLIENT_LOCK(ct); MDI_CLIENT_SET_ONLINE(ct); /* catch for any memory leaks */ ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); ct->ct_dip = ct_dip; if (ct->ct_power_cnt == 0) (void) i_mdi_power_all_phci(ct); MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " "i_mdi_pm_hold_client\n")); i_mdi_pm_hold_client(ct, 1); MDI_CLIENT_UNLOCK(ct); } void i_mdi_phci_online(dev_info_t *ph_dip) { mdi_phci_t *ph; /* pHCI online notification. Mark state accordingly */ ph = i_devi_get_phci(ph_dip); ASSERT(ph != NULL); MDI_PHCI_LOCK(ph); MDI_PHCI_SET_ONLINE(ph); MDI_PHCI_UNLOCK(ph); } /* * mdi_devi_online(): * Online notification from NDI framework on pHCI/client * device online. * Return Values: * NDI_SUCCESS * MDI_FAILURE */ /*ARGSUSED*/ int mdi_devi_online(dev_info_t *dip, uint_t flags) { if (MDI_PHCI(dip)) { i_mdi_phci_online(dip); } if (MDI_CLIENT(dip)) { i_mdi_client_online(dip); } return (NDI_SUCCESS); } /* * mdi_devi_offline(): * Offline notification from NDI framework on pHCI/Client device * offline. * * Return Values: * NDI_SUCCESS * NDI_FAILURE */ /*ARGSUSED*/ int mdi_devi_offline(dev_info_t *dip, uint_t flags) { int rv = NDI_SUCCESS; if (MDI_CLIENT(dip)) { rv = i_mdi_client_offline(dip, flags); if (rv != NDI_SUCCESS) return (rv); } if (MDI_PHCI(dip)) { rv = i_mdi_phci_offline(dip, flags); if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { /* set client back online */ i_mdi_client_online(dip); } } return (rv); } /*ARGSUSED*/ static int i_mdi_phci_offline(dev_info_t *dip, uint_t flags) { int rv = NDI_SUCCESS; mdi_phci_t *ph; mdi_client_t *ct; mdi_pathinfo_t *pip; mdi_pathinfo_t *next; mdi_pathinfo_t *failed_pip = NULL; dev_info_t *cdip; /* * pHCI component offline notification * Make sure that this pHCI instance is free to be offlined. * If it is OK to proceed, Offline and remove all the child * mdi_pathinfo nodes. This process automatically offlines * corresponding client devices, for which this pHCI provides * critical services. */ MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p\n", dip)); ph = i_devi_get_phci(dip); if (ph == NULL) { return (rv); } MDI_PHCI_LOCK(ph); if (MDI_PHCI_IS_OFFLINE(ph)) { MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", ph)); MDI_PHCI_UNLOCK(ph); return (NDI_SUCCESS); } /* * Check to see if the pHCI can be offlined */ if (ph->ph_unstable) { MDI_DEBUG(1, (CE_WARN, dip, "!One or more target devices are in transient " "state. This device can not be removed at " "this moment. Please try again later.")); MDI_PHCI_UNLOCK(ph); return (NDI_BUSY); } pip = ph->ph_path_head; while (pip != NULL) { MDI_PI_LOCK(pip); next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; /* * The mdi_pathinfo state is OK. Check the client state. * If failover in progress fail the pHCI from offlining */ ct = MDI_PI(pip)->pi_client; i_mdi_client_lock(ct, pip); if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || (ct->ct_unstable)) { /* * Failover is in progress, Fail the DR */ MDI_DEBUG(1, (CE_WARN, dip, "!pHCI device (%s%d) is Busy. %s", ddi_driver_name(dip), ddi_get_instance(dip), "This device can not be removed at " "this moment. Please try again later.")); MDI_PI_UNLOCK(pip); MDI_CLIENT_UNLOCK(ct); MDI_PHCI_UNLOCK(ph); return (NDI_BUSY); } MDI_PI_UNLOCK(pip); /* * Check to see of we are removing the last path of this * client device... */ cdip = ct->ct_dip; if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && (i_mdi_client_compute_state(ct, ph) == MDI_CLIENT_STATE_FAILED)) { i_mdi_client_unlock(ct); MDI_PHCI_UNLOCK(ph); if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { /* * ndi_devi_offline() failed. * This pHCI provides the critical path * to one or more client devices. * Return busy. */ MDI_PHCI_LOCK(ph); MDI_DEBUG(1, (CE_WARN, dip, "!pHCI device (%s%d) is Busy. %s", ddi_driver_name(dip), ddi_get_instance(dip), "This device can not be removed at " "this moment. Please try again later.")); failed_pip = pip; break; } else { MDI_PHCI_LOCK(ph); pip = next; } } else { i_mdi_client_unlock(ct); pip = next; } } if (failed_pip) { pip = ph->ph_path_head; while (pip != failed_pip) { MDI_PI_LOCK(pip); next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; ct = MDI_PI(pip)->pi_client; i_mdi_client_lock(ct, pip); cdip = ct->ct_dip; switch (MDI_CLIENT_STATE(ct)) { case MDI_CLIENT_STATE_OPTIMAL: case MDI_CLIENT_STATE_DEGRADED: if (cdip) { MDI_PI_UNLOCK(pip); i_mdi_client_unlock(ct); MDI_PHCI_UNLOCK(ph); (void) ndi_devi_online(cdip, 0); MDI_PHCI_LOCK(ph); pip = next; continue; } break; case MDI_CLIENT_STATE_FAILED: if (cdip) { MDI_PI_UNLOCK(pip); i_mdi_client_unlock(ct); MDI_PHCI_UNLOCK(ph); (void) ndi_devi_offline(cdip, 0); MDI_PHCI_LOCK(ph); pip = next; continue; } break; } MDI_PI_UNLOCK(pip); i_mdi_client_unlock(ct); pip = next; } MDI_PHCI_UNLOCK(ph); return (NDI_BUSY); } /* * Mark the pHCI as offline */ MDI_PHCI_SET_OFFLINE(ph); /* * Mark the child mdi_pathinfo nodes as transient */ pip = ph->ph_path_head; while (pip != NULL) { MDI_PI_LOCK(pip); next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; MDI_PI_SET_OFFLINING(pip); MDI_PI_UNLOCK(pip); pip = next; } MDI_PHCI_UNLOCK(ph); /* * Give a chance for any pending commands to execute */ delay(1); MDI_PHCI_LOCK(ph); pip = ph->ph_path_head; while (pip != NULL) { next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; (void) i_mdi_pi_offline(pip, flags); MDI_PI_LOCK(pip); ct = MDI_PI(pip)->pi_client; if (!MDI_PI_IS_OFFLINE(pip)) { MDI_DEBUG(1, (CE_WARN, dip, "!pHCI device (%s%d) is Busy. %s", ddi_driver_name(dip), ddi_get_instance(dip), "This device can not be removed at " "this moment. Please try again later.")); MDI_PI_UNLOCK(pip); MDI_PHCI_SET_ONLINE(ph); MDI_PHCI_UNLOCK(ph); return (NDI_BUSY); } MDI_PI_UNLOCK(pip); pip = next; } MDI_PHCI_UNLOCK(ph); return (rv); } /*ARGSUSED*/ static int i_mdi_client_offline(dev_info_t *dip, uint_t flags) { int rv = NDI_SUCCESS; mdi_client_t *ct; /* * Client component to go offline. Make sure that we are * not in failing over state and update client state * accordingly */ MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p\n", dip)); ct = i_devi_get_client(dip); if (ct != NULL) { MDI_CLIENT_LOCK(ct); if (ct->ct_unstable) { /* * One or more paths are in transient state, * Dont allow offline of a client device */ MDI_DEBUG(1, (CE_WARN, dip, "!One or more paths to this device is " "in transient state. This device can not " "be removed at this moment. " "Please try again later.")); MDI_CLIENT_UNLOCK(ct); return (NDI_BUSY); } if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { /* * Failover is in progress, Dont allow DR of * a client device */ MDI_DEBUG(1, (CE_WARN, dip, "!Client device (%s%d) is Busy. %s", ddi_driver_name(dip), ddi_get_instance(dip), "This device can not be removed at " "this moment. Please try again later.")); MDI_CLIENT_UNLOCK(ct); return (NDI_BUSY); } MDI_CLIENT_SET_OFFLINE(ct); /* * Unbind our relationship with the dev_info node */ if (flags & NDI_DEVI_REMOVE) { ct->ct_dip = NULL; } MDI_CLIENT_UNLOCK(ct); } return (rv); } /* * mdi_pre_attach(): * Pre attach() notification handler */ /*ARGSUSED*/ int mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) { /* don't support old DDI_PM_RESUME */ if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && (cmd == DDI_PM_RESUME)) return (DDI_FAILURE); return (DDI_SUCCESS); } /* * mdi_post_attach(): * Post attach() notification handler */ /*ARGSUSED*/ void mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) { mdi_phci_t *ph; mdi_client_t *ct; mdi_pathinfo_t *pip; if (MDI_PHCI(dip)) { ph = i_devi_get_phci(dip); ASSERT(ph != NULL); MDI_PHCI_LOCK(ph); switch (cmd) { case DDI_ATTACH: MDI_DEBUG(2, (CE_NOTE, dip, "!pHCI post_attach: called %p\n", ph)); if (error == DDI_SUCCESS) { MDI_PHCI_SET_ATTACH(ph); } else { MDI_DEBUG(1, (CE_NOTE, dip, "!pHCI post_attach: failed error=%d\n", error)); MDI_PHCI_SET_DETACH(ph); } break; case DDI_RESUME: MDI_DEBUG(2, (CE_NOTE, dip, "!pHCI post_resume: called %p\n", ph)); if (error == DDI_SUCCESS) { MDI_PHCI_SET_RESUME(ph); } else { MDI_DEBUG(1, (CE_NOTE, dip, "!pHCI post_resume: failed error=%d\n", error)); MDI_PHCI_SET_SUSPEND(ph); } break; } MDI_PHCI_UNLOCK(ph); } if (MDI_CLIENT(dip)) { ct = i_devi_get_client(dip); ASSERT(ct != NULL); MDI_CLIENT_LOCK(ct); switch (cmd) { case DDI_ATTACH: MDI_DEBUG(2, (CE_NOTE, dip, "!Client post_attach: called %p\n", ct)); if (error != DDI_SUCCESS) { MDI_DEBUG(1, (CE_NOTE, dip, "!Client post_attach: failed error=%d\n", error)); MDI_CLIENT_SET_DETACH(ct); MDI_DEBUG(4, (CE_WARN, dip, "mdi_post_attach i_mdi_pm_reset_client\n")); i_mdi_pm_reset_client(ct); break; } /* * Client device has successfully attached. * Create kstats for any pathinfo structures * initially associated with this client. */ for (pip = ct->ct_path_head; pip != NULL; pip = (mdi_pathinfo_t *) MDI_PI(pip)->pi_client_link) { (void) i_mdi_pi_kstat_create(pip); i_mdi_report_path_state(ct, pip); } MDI_CLIENT_SET_ATTACH(ct); break; case DDI_RESUME: MDI_DEBUG(2, (CE_NOTE, dip, "!Client post_attach: called %p\n", ct)); if (error == DDI_SUCCESS) { MDI_CLIENT_SET_RESUME(ct); } else { MDI_DEBUG(1, (CE_NOTE, dip, "!Client post_resume: failed error=%d\n", error)); MDI_CLIENT_SET_SUSPEND(ct); } break; } MDI_CLIENT_UNLOCK(ct); } } /* * mdi_pre_detach(): * Pre detach notification handler */ /*ARGSUSED*/ int mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) { int rv = DDI_SUCCESS; if (MDI_CLIENT(dip)) { (void) i_mdi_client_pre_detach(dip, cmd); } if (MDI_PHCI(dip)) { rv = i_mdi_phci_pre_detach(dip, cmd); } return (rv); } /*ARGSUSED*/ static int i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) { int rv = DDI_SUCCESS; mdi_phci_t *ph; mdi_client_t *ct; mdi_pathinfo_t *pip; mdi_pathinfo_t *failed_pip = NULL; mdi_pathinfo_t *next; ph = i_devi_get_phci(dip); if (ph == NULL) { return (rv); } MDI_PHCI_LOCK(ph); switch (cmd) { case DDI_DETACH: MDI_DEBUG(2, (CE_NOTE, dip, "!pHCI pre_detach: called %p\n", ph)); if (!MDI_PHCI_IS_OFFLINE(ph)) { /* * mdi_pathinfo nodes are still attached to * this pHCI. Fail the detach for this pHCI. */ MDI_DEBUG(2, (CE_WARN, dip, "!pHCI pre_detach: " "mdi_pathinfo nodes are still attached " "%p\n", ph)); rv = DDI_FAILURE; break; } MDI_PHCI_SET_DETACH(ph); break; case DDI_SUSPEND: /* * pHCI is getting suspended. Since mpxio client * devices may not be suspended at this point, to avoid * a potential stack overflow, it is important to suspend * client devices before pHCI can be suspended. */ MDI_DEBUG(2, (CE_NOTE, dip, "!pHCI pre_suspend: called %p\n", ph)); /* * Suspend all the client devices accessible through this pHCI */ pip = ph->ph_path_head; while (pip != NULL && rv == DDI_SUCCESS) { dev_info_t *cdip; MDI_PI_LOCK(pip); next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; ct = MDI_PI(pip)->pi_client; i_mdi_client_lock(ct, pip); cdip = ct->ct_dip; MDI_PI_UNLOCK(pip); if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && MDI_CLIENT_IS_SUSPENDED(ct) == 0) { i_mdi_client_unlock(ct); if ((rv = devi_detach(cdip, DDI_SUSPEND)) != DDI_SUCCESS) { /* * Suspend of one of the client * device has failed. */ MDI_DEBUG(1, (CE_WARN, dip, "!Suspend of device (%s%d) failed.", ddi_driver_name(cdip), ddi_get_instance(cdip))); failed_pip = pip; break; } } else { i_mdi_client_unlock(ct); } pip = next; } if (rv == DDI_SUCCESS) { /* * Suspend of client devices is complete. Proceed * with pHCI suspend. */ MDI_PHCI_SET_SUSPEND(ph); } else { /* * Revert back all the suspended client device states * to converse. */ pip = ph->ph_path_head; while (pip != failed_pip) { dev_info_t *cdip; MDI_PI_LOCK(pip); next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; ct = MDI_PI(pip)->pi_client; i_mdi_client_lock(ct, pip); cdip = ct->ct_dip; MDI_PI_UNLOCK(pip); if (MDI_CLIENT_IS_SUSPENDED(ct)) { i_mdi_client_unlock(ct); (void) devi_attach(cdip, DDI_RESUME); } else { i_mdi_client_unlock(ct); } pip = next; } } break; default: rv = DDI_FAILURE; break; } MDI_PHCI_UNLOCK(ph); return (rv); } /*ARGSUSED*/ static int i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) { int rv = DDI_SUCCESS; mdi_client_t *ct; ct = i_devi_get_client(dip); if (ct == NULL) { return (rv); } MDI_CLIENT_LOCK(ct); switch (cmd) { case DDI_DETACH: MDI_DEBUG(2, (CE_NOTE, dip, "!Client pre_detach: called %p\n", ct)); MDI_CLIENT_SET_DETACH(ct); break; case DDI_SUSPEND: MDI_DEBUG(2, (CE_NOTE, dip, "!Client pre_suspend: called %p\n", ct)); MDI_CLIENT_SET_SUSPEND(ct); break; default: rv = DDI_FAILURE; break; } MDI_CLIENT_UNLOCK(ct); return (rv); } /* * mdi_post_detach(): * Post detach notification handler */ /*ARGSUSED*/ void mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) { /* * Detach/Suspend of mpxio component failed. Update our state * too */ if (MDI_PHCI(dip)) i_mdi_phci_post_detach(dip, cmd, error); if (MDI_CLIENT(dip)) i_mdi_client_post_detach(dip, cmd, error); } /*ARGSUSED*/ static void i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) { mdi_phci_t *ph; /* * Detach/Suspend of phci component failed. Update our state * too */ ph = i_devi_get_phci(dip); if (ph == NULL) { return; } MDI_PHCI_LOCK(ph); /* * Detach of pHCI failed. Restore back converse * state */ switch (cmd) { case DDI_DETACH: MDI_DEBUG(2, (CE_NOTE, dip, "!pHCI post_detach: called %p\n", ph)); if (error != DDI_SUCCESS) MDI_PHCI_SET_ATTACH(ph); break; case DDI_SUSPEND: MDI_DEBUG(2, (CE_NOTE, dip, "!pHCI post_suspend: called %p\n", ph)); if (error != DDI_SUCCESS) MDI_PHCI_SET_RESUME(ph); break; } MDI_PHCI_UNLOCK(ph); } /*ARGSUSED*/ static void i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) { mdi_client_t *ct; ct = i_devi_get_client(dip); if (ct == NULL) { return; } MDI_CLIENT_LOCK(ct); /* * Detach of Client failed. Restore back converse * state */ switch (cmd) { case DDI_DETACH: MDI_DEBUG(2, (CE_NOTE, dip, "!Client post_detach: called %p\n", ct)); if (DEVI_IS_ATTACHING(ct->ct_dip)) { MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " "i_mdi_pm_rele_client\n")); i_mdi_pm_rele_client(ct, ct->ct_path_count); } else { MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " "i_mdi_pm_reset_client\n")); i_mdi_pm_reset_client(ct); } if (error != DDI_SUCCESS) MDI_CLIENT_SET_ATTACH(ct); break; case DDI_SUSPEND: MDI_DEBUG(2, (CE_NOTE, dip, "!Client post_suspend: called %p\n", ct)); if (error != DDI_SUCCESS) MDI_CLIENT_SET_RESUME(ct); break; } MDI_CLIENT_UNLOCK(ct); } /* * create and install per-path (client - pHCI) statistics * I/O stats supported: nread, nwritten, reads, and writes * Error stats - hard errors, soft errors, & transport errors */ static int i_mdi_pi_kstat_create(mdi_pathinfo_t *pip) { dev_info_t *client = MDI_PI(pip)->pi_client->ct_dip; dev_info_t *ppath = MDI_PI(pip)->pi_phci->ph_dip; char ksname[KSTAT_STRLEN]; mdi_pathinfo_t *cpip; const char *err_postfix = ",err"; kstat_t *kiosp, *kerrsp; struct pi_errs *nsp; struct mdi_pi_kstats *mdi_statp; ASSERT(client != NULL && ppath != NULL); ASSERT(mutex_owned(&(MDI_PI(pip)->pi_client->ct_mutex))); if (MDI_PI(pip)->pi_kstats != NULL) return (MDI_SUCCESS); for (cpip = MDI_PI(pip)->pi_client->ct_path_head; cpip != NULL; cpip = (mdi_pathinfo_t *)(MDI_PI(cpip)->pi_client_link)) { if (cpip == pip) continue; /* * We have found a different path with same parent * kstats for a given client-pHCI are common */ if ((MDI_PI(cpip)->pi_phci->ph_dip == ppath) && (MDI_PI(cpip)->pi_kstats != NULL)) { MDI_PI(cpip)->pi_kstats->pi_kstat_ref++; MDI_PI(pip)->pi_kstats = MDI_PI(cpip)->pi_kstats; return (MDI_SUCCESS); } } /* * stats are named as follows: TGTx.HBAy, e.g. "ssd0.fp0" * clamp length of name against max length of error kstat name */ if (snprintf(ksname, KSTAT_STRLEN, "%s%d.%s%d", ddi_driver_name(client), ddi_get_instance(client), ddi_driver_name(ppath), ddi_get_instance(ppath)) > (KSTAT_STRLEN - strlen(err_postfix))) { return (MDI_FAILURE); } if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", KSTAT_TYPE_IO, 1, 0)) == NULL) { return (MDI_FAILURE); } (void) strcat(ksname, err_postfix); kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", KSTAT_TYPE_NAMED, sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); if (kerrsp == NULL) { kstat_delete(kiosp); return (MDI_FAILURE); } nsp = (struct pi_errs *)kerrsp->ks_data; kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); kstat_named_init(&nsp->pi_transerrs, "Transport Errors", KSTAT_DATA_UINT32); kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", KSTAT_DATA_UINT32); kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", KSTAT_DATA_UINT32); kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", KSTAT_DATA_UINT32); kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", KSTAT_DATA_UINT32); kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", KSTAT_DATA_UINT32); kstat_named_init(&nsp->pi_failedfrom, "Failed From", KSTAT_DATA_UINT32); kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); mdi_statp->pi_kstat_ref = 1; mdi_statp->pi_kstat_iostats = kiosp; mdi_statp->pi_kstat_errstats = kerrsp; kstat_install(kiosp); kstat_install(kerrsp); MDI_PI(pip)->pi_kstats = mdi_statp; return (MDI_SUCCESS); } /* * destroy per-path properties */ static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) { struct mdi_pi_kstats *mdi_statp; if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) return; MDI_PI(pip)->pi_kstats = NULL; /* * the kstat may be shared between multiple pathinfo nodes * decrement this pathinfo's usage, removing the kstats * themselves when the last pathinfo reference is removed. */ ASSERT(mdi_statp->pi_kstat_ref > 0); if (--mdi_statp->pi_kstat_ref != 0) return; kstat_delete(mdi_statp->pi_kstat_iostats); kstat_delete(mdi_statp->pi_kstat_errstats); kmem_free(mdi_statp, sizeof (*mdi_statp)); } /* * update I/O paths KSTATS */ void mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) { kstat_t *iostatp; size_t xfer_cnt; ASSERT(pip != NULL); /* * I/O can be driven across a path prior to having path * statistics available, i.e. probe(9e). */ if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; xfer_cnt = bp->b_bcount - bp->b_resid; if (bp->b_flags & B_READ) { KSTAT_IO_PTR(iostatp)->reads++; KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; } else { KSTAT_IO_PTR(iostatp)->writes++; KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; } } } /* * disable the path to a particular pHCI (pHCI specified in the phci_path * argument) for a particular client (specified in the client_path argument). * Disabling a path means that MPxIO will not select the disabled path for * routing any new I/O requests. */ int mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) { return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); } /* * Enable the path to a particular pHCI (pHCI specified in the phci_path * argument) for a particular client (specified in the client_path argument). * Enabling a path means that MPxIO may select the enabled path for routing * future I/O requests, subject to other path state constraints. */ int mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) { return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); } /* * Common routine for doing enable/disable. */ int i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) { mdi_phci_t *ph; mdi_vhci_t *vh = NULL; mdi_client_t *ct; mdi_pathinfo_t *next, *pip; int found_it; int (*f)() = NULL; int rv; int sync_flag = 0; ph = i_devi_get_phci(pdip); MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" " Operation = %d pdip = %p cdip = %p\n", op, pdip, cdip)); if (ph == NULL) { MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" " failed. ph = NULL operation = %d\n", op)); return (MDI_FAILURE); } if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" " Invalid operation = %d\n", op)); return (MDI_FAILURE); } sync_flag = (flags << 8) & 0xf00; vh = ph->ph_vhci; f = vh->vh_ops->vo_pi_state_change; if (cdip == NULL) { /* * Need to mark the Phci as enabled/disabled. */ MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" "Operation %d for the phci\n", op)); MDI_PHCI_LOCK(ph); switch (flags) { case USER_DISABLE: if (op == MDI_DISABLE_OP) MDI_PHCI_SET_USER_DISABLE(ph); else MDI_PHCI_SET_USER_ENABLE(ph); break; case DRIVER_DISABLE: if (op == MDI_DISABLE_OP) MDI_PHCI_SET_DRV_DISABLE(ph); else MDI_PHCI_SET_DRV_ENABLE(ph); break; case DRIVER_DISABLE_TRANSIENT: if (op == MDI_DISABLE_OP) MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); else MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); break; default: MDI_PHCI_UNLOCK(ph); MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" " Invalid flag argument= %d\n", flags)); } /* * Phci has been disabled. Now try to enable/disable * path info's to each client. */ pip = ph->ph_path_head; while (pip != NULL) { /* * Do a callback into the mdi consumer to let it * know that path is about to be enabled/disabled. */ if (f != NULL) { rv = (*f)(vh->vh_dip, pip, 0, MDI_PI_EXT_STATE(pip), MDI_EXT_STATE_CHANGE | sync_flag | op | MDI_BEFORE_STATE_CHANGE); if (rv != MDI_SUCCESS) { MDI_DEBUG(2, (CE_WARN, vh->vh_dip, "!vo_pi_state_change: failed rv = %x", rv)); } } MDI_PI_LOCK(pip); next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; switch (flags) { case USER_DISABLE: if (op == MDI_DISABLE_OP) MDI_PI_SET_USER_DISABLE(pip); else MDI_PI_SET_USER_ENABLE(pip); break; case DRIVER_DISABLE: if (op == MDI_DISABLE_OP) MDI_PI_SET_DRV_DISABLE(pip); else MDI_PI_SET_DRV_ENABLE(pip); break; case DRIVER_DISABLE_TRANSIENT: if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) MDI_PI_SET_DRV_DISABLE_TRANS(pip); else MDI_PI_SET_DRV_ENABLE_TRANS(pip); break; } MDI_PI_UNLOCK(pip); /* * Do a callback into the mdi consumer to let it * know that path is now enabled/disabled. */ if (f != NULL) { rv = (*f)(vh->vh_dip, pip, 0, MDI_PI_EXT_STATE(pip), MDI_EXT_STATE_CHANGE | sync_flag | op | MDI_AFTER_STATE_CHANGE); if (rv != MDI_SUCCESS) { MDI_DEBUG(2, (CE_WARN, vh->vh_dip, "!vo_pi_state_change: failed rv = %x", rv)); } } pip = next; } MDI_PHCI_UNLOCK(ph); } else { /* * Disable a specific client. */ ct = i_devi_get_client(cdip); if (ct == NULL) { MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" " failed. ct = NULL operation = %d\n", op)); return (MDI_FAILURE); } MDI_CLIENT_LOCK(ct); pip = ct->ct_path_head; found_it = 0; while (pip != NULL) { MDI_PI_LOCK(pip); next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; if (MDI_PI(pip)->pi_phci == ph) { MDI_PI_UNLOCK(pip); found_it = 1; break; } MDI_PI_UNLOCK(pip); pip = next; } MDI_CLIENT_UNLOCK(ct); if (found_it == 0) { MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" " failed. Could not find corresponding pip\n")); return (MDI_FAILURE); } /* * Do a callback into the mdi consumer to let it * know that path is about to get enabled/disabled. */ if (f != NULL) { rv = (*f)(vh->vh_dip, pip, 0, MDI_PI_EXT_STATE(pip), MDI_EXT_STATE_CHANGE | sync_flag | op | MDI_BEFORE_STATE_CHANGE); if (rv != MDI_SUCCESS) { MDI_DEBUG(2, (CE_WARN, vh->vh_dip, "!vo_pi_state_change: failed rv = %x", rv)); } } MDI_PI_LOCK(pip); switch (flags) { case USER_DISABLE: if (op == MDI_DISABLE_OP) MDI_PI_SET_USER_DISABLE(pip); else MDI_PI_SET_USER_ENABLE(pip); break; case DRIVER_DISABLE: if (op == MDI_DISABLE_OP) MDI_PI_SET_DRV_DISABLE(pip); else MDI_PI_SET_DRV_ENABLE(pip); break; case DRIVER_DISABLE_TRANSIENT: if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) MDI_PI_SET_DRV_DISABLE_TRANS(pip); else MDI_PI_SET_DRV_ENABLE_TRANS(pip); break; } MDI_PI_UNLOCK(pip); /* * Do a callback into the mdi consumer to let it * know that path is now enabled/disabled. */ if (f != NULL) { rv = (*f)(vh->vh_dip, pip, 0, MDI_PI_EXT_STATE(pip), MDI_EXT_STATE_CHANGE | sync_flag | op | MDI_AFTER_STATE_CHANGE); if (rv != MDI_SUCCESS) { MDI_DEBUG(2, (CE_WARN, vh->vh_dip, "!vo_pi_state_change: failed rv = %x", rv)); } } } MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" " Returning success pdip = %p cdip = %p\n", op, pdip, cdip)); return (MDI_SUCCESS); } /*ARGSUSED3*/ int mdi_devi_config_one(dev_info_t *pdip, char *devnm, dev_info_t **cdipp, int flags, clock_t timeout) { mdi_pathinfo_t *pip; dev_info_t *dip; clock_t interval = drv_usectohz(100000); /* 0.1 sec */ char *paddr; MDI_DEBUG(2, (CE_NOTE, NULL, "configure device %s", devnm)); if (!MDI_PHCI(pdip)) return (MDI_FAILURE); paddr = strchr(devnm, '@'); if (paddr == NULL) return (MDI_FAILURE); paddr++; /* skip '@' */ pip = mdi_pi_find(pdip, NULL, paddr); while (pip == NULL && timeout > 0) { if (interval > timeout) interval = timeout; if (flags & NDI_DEVI_DEBUG) { cmn_err(CE_CONT, "%s%d: %s timeout %ld %ld\n", ddi_driver_name(pdip), ddi_get_instance(pdip), paddr, interval, timeout); } delay(interval); timeout -= interval; interval += interval; pip = mdi_pi_find(pdip, NULL, paddr); } if (pip == NULL) return (MDI_FAILURE); dip = mdi_pi_get_client(pip); if (ndi_devi_online(dip, flags) != NDI_SUCCESS) return (MDI_FAILURE); *cdipp = dip; /* TODO: holding should happen inside search functions */ ndi_hold_devi(dip); return (MDI_SUCCESS); } /* * Ensure phci powered up */ static void i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) { dev_info_t *ph_dip; ASSERT(pip != NULL); ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); if (MDI_PI(pip)->pi_pm_held) { return; } ph_dip = mdi_pi_get_phci(pip); MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d\n", ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); if (ph_dip == NULL) { return; } MDI_PI_UNLOCK(pip); MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", DEVI(ph_dip)->devi_pm_kidsupcnt)); pm_hold_power(ph_dip); MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", DEVI(ph_dip)->devi_pm_kidsupcnt)); MDI_PI_LOCK(pip); MDI_PI(pip)->pi_pm_held = 1; } /* * Allow phci powered down */ static void i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) { dev_info_t *ph_dip = NULL; ASSERT(pip != NULL); ASSERT(MUTEX_HELD(&MDI_PI(pip)->pi_mutex)); if (MDI_PI(pip)->pi_pm_held == 0) { return; } ph_dip = mdi_pi_get_phci(pip); ASSERT(ph_dip != NULL); MDI_PI_UNLOCK(pip); MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d\n", ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", DEVI(ph_dip)->devi_pm_kidsupcnt)); pm_rele_power(ph_dip); MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", DEVI(ph_dip)->devi_pm_kidsupcnt)); MDI_PI_LOCK(pip); MDI_PI(pip)->pi_pm_held = 0; } static void i_mdi_pm_hold_client(mdi_client_t *ct, int incr) { ASSERT(ct); ct->ct_power_cnt += incr; MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client " "ct_power_cnt = %d incr = %d\n", ct->ct_power_cnt, incr)); ASSERT(ct->ct_power_cnt >= 0); } static void i_mdi_rele_all_phci(mdi_client_t *ct) { mdi_pathinfo_t *pip; ASSERT(mutex_owned(&ct->ct_mutex)); pip = (mdi_pathinfo_t *)ct->ct_path_head; while (pip != NULL) { mdi_hold_path(pip); MDI_PI_LOCK(pip); i_mdi_pm_rele_pip(pip); MDI_PI_UNLOCK(pip); mdi_rele_path(pip); pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; } } static void i_mdi_pm_rele_client(mdi_client_t *ct, int decr) { ASSERT(ct); if (i_ddi_node_state(ct->ct_dip) >= DS_READY) { ct->ct_power_cnt -= decr; MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client " "ct_power_cnt = %d decr = %d\n", ct->ct_power_cnt, decr)); } ASSERT(ct->ct_power_cnt >= 0); if (ct->ct_power_cnt == 0) { i_mdi_rele_all_phci(ct); return; } } static void i_mdi_pm_reset_client(mdi_client_t *ct) { MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client " "ct_power_cnt = %d\n", ct->ct_power_cnt)); ct->ct_power_cnt = 0; i_mdi_rele_all_phci(ct); ct->ct_powercnt_reset = 1; ct->ct_powercnt_held = 0; } static void i_mdi_pm_hold_all_phci(mdi_client_t *ct) { mdi_pathinfo_t *pip; ASSERT(mutex_owned(&ct->ct_mutex)); pip = (mdi_pathinfo_t *)ct->ct_path_head; while (pip != NULL) { mdi_hold_path(pip); MDI_PI_LOCK(pip); i_mdi_pm_hold_pip(pip); MDI_PI_UNLOCK(pip); mdi_rele_path(pip); pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; } } static int i_mdi_power_one_phci(mdi_pathinfo_t *pip) { int ret; dev_info_t *ph_dip; MDI_PI_LOCK(pip); i_mdi_pm_hold_pip(pip); ph_dip = mdi_pi_get_phci(pip); MDI_PI_UNLOCK(pip); /* bring all components of phci to full power */ MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " "pm_powerup for %s%d\n", ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); ret = pm_powerup(ph_dip); if (ret == DDI_FAILURE) { MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " "pm_powerup FAILED for %s%d\n", ddi_get_name(ph_dip), ddi_get_instance(ph_dip))); MDI_PI_LOCK(pip); i_mdi_pm_rele_pip(pip); MDI_PI_UNLOCK(pip); return (MDI_FAILURE); } return (MDI_SUCCESS); } static int i_mdi_power_all_phci(mdi_client_t *ct) { mdi_pathinfo_t *pip; int succeeded = 0; pip = (mdi_pathinfo_t *)ct->ct_path_head; while (pip != NULL) { mdi_hold_path(pip); MDI_CLIENT_UNLOCK(ct); if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) succeeded = 1; ASSERT(ct == MDI_PI(pip)->pi_client); MDI_CLIENT_LOCK(ct); mdi_rele_path(pip); pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; } return (succeeded ? MDI_SUCCESS : MDI_FAILURE); } /* * mdi_bus_power(): * 1. Place the phci(s) into powered up state so that * client can do power management * 2. Ensure phci powered up as client power managing * Return Values: * MDI_SUCCESS * MDI_FAILURE */ int mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, void *arg, void *result) { int ret = MDI_SUCCESS; pm_bp_child_pwrchg_t *bpc; mdi_client_t *ct; dev_info_t *cdip; pm_bp_has_changed_t *bphc; /* * BUS_POWER_NOINVOL not supported */ if (op == BUS_POWER_NOINVOL) return (MDI_FAILURE); /* * ignore other OPs. * return quickly to save cou cycles on the ct processing */ switch (op) { case BUS_POWER_PRE_NOTIFICATION: case BUS_POWER_POST_NOTIFICATION: bpc = (pm_bp_child_pwrchg_t *)arg; cdip = bpc->bpc_dip; break; case BUS_POWER_HAS_CHANGED: bphc = (pm_bp_has_changed_t *)arg; cdip = bphc->bphc_dip; break; default: return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); } ASSERT(MDI_CLIENT(cdip)); ct = i_devi_get_client(cdip); if (ct == NULL) return (MDI_FAILURE); /* * wait till the mdi_pathinfo node state change are processed */ MDI_CLIENT_LOCK(ct); switch (op) { case BUS_POWER_PRE_NOTIFICATION: MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " "BUS_POWER_PRE_NOTIFICATION:" "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); /* serialize power level change per client */ while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); MDI_CLIENT_SET_POWER_TRANSITION(ct); if (ct->ct_power_cnt == 0) { ret = i_mdi_power_all_phci(ct); } /* * if new_level > 0: * - hold phci(s) * - power up phci(s) if not already * ignore power down */ if (bpc->bpc_nlevel > 0) { if (!DEVI_IS_ATTACHING(ct->ct_dip)) { MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power i_mdi_pm_hold_client\n")); i_mdi_pm_hold_client(ct, ct->ct_path_count); } } break; case BUS_POWER_POST_NOTIFICATION: MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " "BUS_POWER_POST_NOTIFICATION:" "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, *(int *)result)); if (*(int *)result == DDI_SUCCESS) { if (bpc->bpc_nlevel > 0) { MDI_CLIENT_SET_POWER_UP(ct); } else { MDI_CLIENT_SET_POWER_DOWN(ct); } } /* release the hold we did in pre-notification */ if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && !DEVI_IS_ATTACHING(ct->ct_dip)) { MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power i_mdi_pm_rele_client\n")); i_mdi_pm_rele_client(ct, ct->ct_path_count); } if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { /* another thread might started attaching */ if (DEVI_IS_ATTACHING(ct->ct_dip)) { MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power i_mdi_pm_rele_client\n")); i_mdi_pm_rele_client(ct, ct->ct_path_count); /* detaching has been taken care in pm_post_unconfig */ } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power i_mdi_pm_reset_client\n")); i_mdi_pm_reset_client(ct); } } MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); cv_broadcast(&ct->ct_powerchange_cv); break; /* need to do more */ case BUS_POWER_HAS_CHANGED: MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " "BUS_POWER_HAS_CHANGED:" "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); if (bphc->bphc_nlevel > 0 && bphc->bphc_nlevel > bphc->bphc_olevel) { if (ct->ct_power_cnt == 0) { ret = i_mdi_power_all_phci(ct); } MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power i_mdi_pm_hold_client\n")); i_mdi_pm_hold_client(ct, ct->ct_path_count); } if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power i_mdi_pm_rele_client\n")); i_mdi_pm_rele_client(ct, ct->ct_path_count); } break; } MDI_CLIENT_UNLOCK(ct); return (ret); } static int i_mdi_pm_pre_config_one(dev_info_t *child) { int ret = MDI_SUCCESS; mdi_client_t *ct; ct = i_devi_get_client(child); if (ct == NULL) return (MDI_FAILURE); MDI_CLIENT_LOCK(ct); while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); if (!MDI_CLIENT_IS_FAILED(ct)) { MDI_CLIENT_UNLOCK(ct); MDI_DEBUG(4, (CE_NOTE, child, "i_mdi_pm_pre_config_one already configured\n")); return (MDI_SUCCESS); } if (ct->ct_powercnt_held) { MDI_CLIENT_UNLOCK(ct); MDI_DEBUG(4, (CE_NOTE, child, "i_mdi_pm_pre_config_one ALREADY held\n")); return (MDI_SUCCESS); } if (ct->ct_power_cnt == 0) { ret = i_mdi_power_all_phci(ct); } MDI_DEBUG(4, (CE_NOTE, child, "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); i_mdi_pm_hold_client(ct, ct->ct_path_count); ct->ct_powercnt_held = 1; ct->ct_powercnt_reset = 0; MDI_CLIENT_UNLOCK(ct); return (ret); } static int i_mdi_pm_pre_config(dev_info_t *parent, dev_info_t *child) { int ret = MDI_SUCCESS; dev_info_t *cdip; int circ; ASSERT(MDI_VHCI(parent)); /* ndi_devi_config_one */ if (child) { return (i_mdi_pm_pre_config_one(child)); } /* devi_config_common */ ndi_devi_enter(parent, &circ); cdip = ddi_get_child(parent); while (cdip) { dev_info_t *next = ddi_get_next_sibling(cdip); ret = i_mdi_pm_pre_config_one(cdip); if (ret != MDI_SUCCESS) break; cdip = next; } ndi_devi_exit(parent, circ); return (ret); } static int i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) { int ret = MDI_SUCCESS; mdi_client_t *ct; ct = i_devi_get_client(child); if (ct == NULL) return (MDI_FAILURE); MDI_CLIENT_LOCK(ct); while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); if (i_ddi_node_state(ct->ct_dip) < DS_READY) { MDI_DEBUG(4, (CE_NOTE, child, "i_mdi_pm_pre_unconfig node detached already\n")); MDI_CLIENT_UNLOCK(ct); return (MDI_SUCCESS); } if (MDI_CLIENT_IS_POWERED_DOWN(ct) && (flags & NDI_AUTODETACH)) { MDI_DEBUG(4, (CE_NOTE, child, "i_mdi_pm_pre_unconfig auto-modunload\n")); MDI_CLIENT_UNLOCK(ct); return (MDI_FAILURE); } if (ct->ct_powercnt_held) { MDI_DEBUG(4, (CE_NOTE, child, "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); MDI_CLIENT_UNLOCK(ct); *held = 1; return (MDI_SUCCESS); } if (ct->ct_power_cnt == 0) { ret = i_mdi_power_all_phci(ct); } MDI_DEBUG(4, (CE_NOTE, child, "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); i_mdi_pm_hold_client(ct, ct->ct_path_count); ct->ct_powercnt_held = 1; ct->ct_powercnt_reset = 0; MDI_CLIENT_UNLOCK(ct); if (ret == MDI_SUCCESS) *held = 1; return (ret); } static int i_mdi_pm_pre_unconfig(dev_info_t *parent, dev_info_t *child, int *held, int flags) { int ret = MDI_SUCCESS; dev_info_t *cdip; int circ; ASSERT(MDI_VHCI(parent)); *held = 0; /* ndi_devi_unconfig_one */ if (child) { return (i_mdi_pm_pre_unconfig_one(child, held, flags)); } /* devi_unconfig_common */ ndi_devi_enter(parent, &circ); cdip = ddi_get_child(parent); while (cdip) { dev_info_t *next = ddi_get_next_sibling(cdip); ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); cdip = next; } ndi_devi_exit(parent, circ); if (*held) ret = MDI_SUCCESS; return (ret); } static void i_mdi_pm_post_config_one(dev_info_t *child) { mdi_client_t *ct; ct = i_devi_get_client(child); if (ct == NULL) return; MDI_CLIENT_LOCK(ct); while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); if (ct->ct_powercnt_reset || !ct->ct_powercnt_held) { MDI_DEBUG(4, (CE_NOTE, child, "i_mdi_pm_post_config_one NOT held\n")); MDI_CLIENT_UNLOCK(ct); return; } /* client has not been updated */ if (MDI_CLIENT_IS_FAILED(ct)) { MDI_DEBUG(4, (CE_NOTE, child, "i_mdi_pm_post_config_one NOT configured\n")); MDI_CLIENT_UNLOCK(ct); return; } /* another thread might have powered it down or detached it */ if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && !DEVI_IS_ATTACHING(ct->ct_dip)) || (i_ddi_node_state(ct->ct_dip) < DS_READY && !DEVI_IS_ATTACHING(ct->ct_dip))) { MDI_DEBUG(4, (CE_NOTE, child, "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); i_mdi_pm_reset_client(ct); } else { mdi_pathinfo_t *pip, *next; int valid_path_count = 0; MDI_DEBUG(4, (CE_NOTE, child, "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); pip = ct->ct_path_head; while (pip != NULL) { MDI_PI_LOCK(pip); next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == MDI_PATHINFO_STATE_ONLINE || (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == MDI_PATHINFO_STATE_STANDBY) valid_path_count ++; MDI_PI_UNLOCK(pip); pip = next; } i_mdi_pm_rele_client(ct, valid_path_count); } ct->ct_powercnt_held = 0; MDI_CLIENT_UNLOCK(ct); } static void i_mdi_pm_post_config(dev_info_t *parent, dev_info_t *child) { int circ; dev_info_t *cdip; ASSERT(MDI_VHCI(parent)); /* ndi_devi_config_one */ if (child) { i_mdi_pm_post_config_one(child); return; } /* devi_config_common */ ndi_devi_enter(parent, &circ); cdip = ddi_get_child(parent); while (cdip) { dev_info_t *next = ddi_get_next_sibling(cdip); i_mdi_pm_post_config_one(cdip); cdip = next; } ndi_devi_exit(parent, circ); } static void i_mdi_pm_post_unconfig_one(dev_info_t *child) { mdi_client_t *ct; ct = i_devi_get_client(child); if (ct == NULL) return; MDI_CLIENT_LOCK(ct); while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); if (!ct->ct_powercnt_held) { MDI_DEBUG(4, (CE_NOTE, child, "i_mdi_pm_post_unconfig NOT held\n")); MDI_CLIENT_UNLOCK(ct); return; } /* failure detaching or another thread just attached it */ if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && i_ddi_node_state(ct->ct_dip) == DS_READY) || (i_ddi_node_state(ct->ct_dip) != DS_READY && !DEVI_IS_ATTACHING(ct->ct_dip))) { MDI_DEBUG(4, (CE_NOTE, child, "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); i_mdi_pm_reset_client(ct); } MDI_DEBUG(4, (CE_NOTE, child, "i_mdi_pm_post_unconfig not changed\n")); MDI_CLIENT_UNLOCK(ct); } static void i_mdi_pm_post_unconfig(dev_info_t *parent, dev_info_t *child, int held) { int circ; dev_info_t *cdip; ASSERT(MDI_VHCI(parent)); if (!held) { MDI_DEBUG(4, (CE_NOTE, parent, "i_mdi_pm_post_unconfig held = %d\n", held)); return; } if (child) { i_mdi_pm_post_unconfig_one(child); return; } ndi_devi_enter(parent, &circ); cdip = ddi_get_child(parent); while (cdip) { dev_info_t *next = ddi_get_next_sibling(cdip); i_mdi_pm_post_unconfig_one(cdip); cdip = next; } ndi_devi_exit(parent, circ); } int mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) { int circ, ret = MDI_SUCCESS; dev_info_t *client_dip = NULL; mdi_client_t *ct; /* * Handling ndi_devi_config_one and ndi_devi_unconfig_one. * Power up pHCI for the named client device. * Note: Before the client is enumerated under vhci by phci, * client_dip can be NULL. Then proceed to power up all the * pHCIs. */ if (devnm != NULL) { ndi_devi_enter(vdip, &circ); client_dip = ndi_devi_findchild(vdip, devnm); ndi_devi_exit(vdip, circ); } MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d\n", op)); switch (op) { case MDI_PM_PRE_CONFIG: ret = i_mdi_pm_pre_config(vdip, client_dip); break; case MDI_PM_PRE_UNCONFIG: ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, flags); break; case MDI_PM_POST_CONFIG: i_mdi_pm_post_config(vdip, client_dip); break; case MDI_PM_POST_UNCONFIG: i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); break; case MDI_PM_HOLD_POWER: case MDI_PM_RELE_POWER: ASSERT(args); client_dip = (dev_info_t *)args; ASSERT(MDI_CLIENT(client_dip)); ct = i_devi_get_client(client_dip); MDI_CLIENT_LOCK(ct); if (op == MDI_PM_HOLD_POWER) { if (ct->ct_power_cnt == 0) { (void) i_mdi_power_all_phci(ct); MDI_DEBUG(4, (CE_NOTE, client_dip, "mdi_power i_mdi_pm_hold_client\n")); i_mdi_pm_hold_client(ct, ct->ct_path_count); } } else { if (DEVI_IS_ATTACHING(ct->ct_dip)) { MDI_DEBUG(4, (CE_NOTE, client_dip, "mdi_power i_mdi_pm_rele_client\n")); i_mdi_pm_rele_client(ct, ct->ct_path_count); } else { MDI_DEBUG(4, (CE_NOTE, client_dip, "mdi_power i_mdi_pm_reset_client\n")); i_mdi_pm_reset_client(ct); } } MDI_CLIENT_UNLOCK(ct); break; default: break; } return (ret); } int mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) { mdi_vhci_t *vhci; if (!MDI_VHCI(dip)) return (MDI_FAILURE); if (mdi_class) { vhci = DEVI(dip)->devi_mdi_xhci; ASSERT(vhci); *mdi_class = vhci->vh_class; } return (MDI_SUCCESS); } int mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) { mdi_phci_t *phci; if (!MDI_PHCI(dip)) return (MDI_FAILURE); if (mdi_class) { phci = DEVI(dip)->devi_mdi_xhci; ASSERT(phci); *mdi_class = phci->ph_vhci->vh_class; } return (MDI_SUCCESS); } int mdi_component_is_client(dev_info_t *dip, const char **mdi_class) { mdi_client_t *client; if (!MDI_CLIENT(dip)) return (MDI_FAILURE); if (mdi_class) { client = DEVI(dip)->devi_mdi_client; ASSERT(client); *mdi_class = client->ct_vhci->vh_class; } return (MDI_SUCCESS); } /* * XXX This list should include all phci drivers needed during boot time * though it currently contains "fp" only. * Hopefully, the mechanism provided here will be replaced with a better * mechanism by vhci driven enumeration project. */ static char *phci_driver_list[] = { "fp" }; #define N_PHCI_DRIVERS (sizeof (phci_driver_list) / sizeof (char *)) static void i_mdi_attach_phci_drivers() { int i; major_t m; for (i = 0; i < N_PHCI_DRIVERS; i++) { m = ddi_name_to_major(phci_driver_list[i]); if (m != (major_t)-1) { if (ddi_hold_installed_driver(m) != NULL) ddi_rele_driver(m); } } } /* bus config the specified phci */ static void i_mdi_phci_bus_config(void *arg) { mdi_phci_config_t *phc = (mdi_phci_config_t *)arg; mdi_vhci_config_t *vhc; dev_info_t *ph_dip; int rv; ASSERT(phc); vhc = phc->phc_vhc; ASSERT(vhc->vhc_op == BUS_CONFIG_ALL || vhc->vhc_op == BUS_CONFIG_DRIVER); /* * Must have already held the phci parent in * i_mdi_bus_config_all_phcis(). * First configure the phci itself. */ rv = ndi_devi_config_one(phc->phc_parent_dip, phc->phc_devnm + 1, &ph_dip, vhc->vhc_flags); /* release the hold that i_mdi_bus_config_all_phcis() placed */ ndi_rele_devi(phc->phc_parent_dip); if (rv == NDI_SUCCESS) { /* now bus config the phci */ if (vhc->vhc_op == BUS_CONFIG_DRIVER) { (void) ndi_devi_config_driver(ph_dip, vhc->vhc_flags, vhc->vhc_major); } else (void) ndi_devi_config(ph_dip, vhc->vhc_flags); /* release the hold that ndi_devi_config_one() placed */ ndi_rele_devi(ph_dip); } } /* * Bus config all registered phcis associated with the vhci in parallel. * This process guarantees that the child nodes are enumerated under the vhci, * but not necessarily attached. * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. */ static int i_mdi_bus_config_all_phcis(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, major_t maj, int optimize) { mdi_vhci_t *vh; mdi_phci_t *ph; mdi_phci_config_t *phc; int64_t req_time; int phci_count, rv; static int first_time = 1; ASSERT(op == BUS_CONFIG_ALL || op == BUS_CONFIG_DRIVER); ASSERT(!DEVI_BUSY_OWNED(vdip)); MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: %s on all phcis: major = %d, flags = 0x%x, optimize = %d\n", (op == BUS_CONFIG_DRIVER) ? "BUS_CONFIG_DRIVER" : "BUS_CONFIG_ALL", (int)maj, flags, optimize)); vh = i_devi_get_vhci(vdip); ASSERT(vh); mutex_enter(&mdi_mutex); req_time = lbolt64; /* * Reduce unnecessary BUS_CONFIG_ALLs when opening stale * /dev/[r]dsk links. */ if (optimize && (req_time < vh->vh_bus_config.vhc_cutoff_time)) { mutex_exit(&mdi_mutex); return (MDI_SUCCESS); } /* * To initiate bus configs on all phcis in parallel, create a taskq * with multiple threads. Since creation of a taskq is a heavy weight * operation, taskq is created once per vhci and destroyed only when * vhci unregisters with mdi. * * If multiple bus config requests arrive at a time, bus configs on * phcis are initiated on behalf of one of the requests. Other requests * wait until the bus configs on phcis is done. * * When a BUS_CONFIG_ALL on phcis completes, the following is done * to avoid more of unnecessary bus configs. * * o all BUS_CONFIG_ALL requests currently waiting with optimize * flag set are returned, i.e., no new BUS_CONFIG_ALL is initiated * on phcis on behalf of these requests. * * o all BUS_CONFIG_ALL or BUS_CONFIG_DRIVER requests currently * waiting but have arrived prior to initiating BUS_CONFIG_ALL on * phcis are also returned. * * In other cases a new BUS_CONFIG_ALL or BUS_CONFIG_DRIVER is * initiated on phcis on behalf of a new request. */ /* check if a bus config on phcis is in progress */ while (vh->vh_bus_config.vhc_start_time != 0) { ddi_bus_config_op_t current_op; int64_t start_time; current_op = vh->vh_bus_config.vhc_op; start_time = vh->vh_bus_config.vhc_start_time; /* wait until the current bus configs on phcis are done */ while (vh->vh_bus_config.vhc_start_time == start_time) cv_wait(&vh->vh_bus_config.vhc_cv, &mdi_mutex); if (current_op == BUS_CONFIG_ALL && vh->vh_bus_config.vhc_cutoff_time > 0 && (optimize || req_time < start_time)) { mutex_exit(&mdi_mutex); return (MDI_SUCCESS); } } /* * At this point we are single threaded until vh_bus_config.start_time * is reset to 0 at the end of this function. */ vh->vh_bus_config.vhc_op = op; vh->vh_bus_config.vhc_major = maj; vh->vh_bus_config.vhc_flags = flags; vh->vh_bus_config.vhc_start_time = lbolt64; if (first_time && strcmp(vh->vh_class, MDI_HCI_CLASS_SCSI) == 0) { mutex_exit(&mdi_mutex); i_mdi_attach_phci_drivers(); mutex_enter(&mdi_mutex); first_time = 0; } ASSERT(vh->vh_phci_count >= 0); if (vh->vh_phci_count == 0) { rv = MDI_SUCCESS; goto out1; } /* * Create a taskq to initiate bus configs in parallel on phcis. * Taskq allocation can be done in mdi_vhci_register() routine * instead of here. For most systems, doing it here on demand saves * resources as this code path is never called most of the times. */ if (vh->vh_bus_config.vhc_taskq == NULL) { /* * it is ok even if vh->vh_phci_count changes after we release * the mdi_mutex as phci_count is used just as an * advisory number to taskq_create. */ phci_count = vh->vh_phci_count; mutex_exit(&mdi_mutex); /* * As we are single threaded, it is ok to access the * vh_bus_config.taskq member of vh outside of mdi_mutex */ if ((vh->vh_bus_config.vhc_taskq = taskq_create( "mdi_bus_config_taskq", mdi_max_bus_config_threads, MDI_TASKQ_PRI, phci_count, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC)) == NULL) { rv = MDI_FAILURE; goto out; } mutex_enter(&mdi_mutex); } /* allocate at least vh->vh_phci_count phci bus config structures */ while (vh->vh_bus_config.vhc_phc_cnt < vh->vh_phci_count) { int count; count = vh->vh_phci_count - vh->vh_bus_config.vhc_phc_cnt; mutex_exit(&mdi_mutex); while (count--) { phc = kmem_alloc(sizeof (*phc), KM_SLEEP); phc->phc_vhc = &vh->vh_bus_config; /* * there is no need to hold a lock here as we * are single threaded and no one else manipulates * the list while we are here. */ phc->phc_next = vh->vh_bus_config.vhc_phc; vh->vh_bus_config.vhc_phc = phc; vh->vh_bus_config.vhc_phc_cnt++; } mutex_enter(&mdi_mutex); /* * as new phcis could register with mdi after we dropped * the mdi_mutex, we need to recheck the vh->vh_phci_count. * Hence the while loop. */ } for (ph = vh->vh_phci_head, phc = vh->vh_bus_config.vhc_phc; ph != NULL; ph = ph->ph_next, phc = phc->phc_next) { ASSERT(phc != NULL); /* build a phci config handle to be passed to a taskq thread */ MDI_PHCI_LOCK(ph); ASSERT(ph->ph_dip); /* * We need to hold the phci dip before bus configuring the phci. * But placing a hold on the phci dip is not safe here due to * the race with phci detach. To get around this race, * we place a hold on the phci dip's parent and note down * the phci's name@addr. Later, in i_mdi_phci_bus_config(), * we'll first configure the phci itself before bus * configuring the phci. */ phc->phc_parent_dip = ddi_get_parent(ph->ph_dip); ndi_hold_devi(phc->phc_parent_dip); (void) ddi_deviname(ph->ph_dip, phc->phc_devnm); MDI_PHCI_UNLOCK(ph); } phci_count = vh->vh_phci_count; if (vh->vh_bus_config.vhc_cutoff_time == -1) vh->vh_bus_config.vhc_cutoff_time = 0; mutex_exit(&mdi_mutex); MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: initiating %s on all phcis, major = %d, flags = 0x%x\n", (op == BUS_CONFIG_DRIVER) ? "BUS_CONFIG_DRIVER" : "BUS_CONFIG_ALL", (int)maj, flags)); /* * again, no need to hold a lock here as we are single threaded and * no one else manipulates the list while we are here. */ for (phc = vh->vh_bus_config.vhc_phc; phci_count--; phc = phc->phc_next) { (void) taskq_dispatch(vh->vh_bus_config.vhc_taskq, i_mdi_phci_bus_config, phc, TQ_SLEEP); } /* wait until all phci bus configs are done */ taskq_wait(vh->vh_bus_config.vhc_taskq); rv = MDI_SUCCESS; out: mutex_enter(&mdi_mutex); out1: vh->vh_bus_config.vhc_start_time = 0; if (op == BUS_CONFIG_ALL && vh->vh_bus_config.vhc_cutoff_time != -1) { vh->vh_bus_config.vhc_cutoff_time = lbolt64 + (int64_t)drv_usectohz(mdi_bus_config_timeout * 1000000); } cv_broadcast(&vh->vh_bus_config.vhc_cv); mutex_exit(&mdi_mutex); MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: %s on all phcis %s\n", (op == BUS_CONFIG_DRIVER) ? "BUS_CONFIG_DRIVER" : "BUS_CONFIG_ALL", (rv == MDI_SUCCESS) ? "successful" : "failed")); return (rv); } /* * A simple bus config implementation for vhcis with the assumption that all * phcis are always registered with MDI. * * BUS_CONFIG_ALL * * Do BUS_CONFIG_ALL on all phcis associated with the vhci. * * BUS_CONFIG_DRIVER * * Do BUS_CONFIG_DRIVER on all phcis associated with the vhci. * * BUS_CONFIG_ONE * * If the requested child has already been enumerated under the vhci * configure the child and return. Otherwise do BUS_CONFIG_ALL on all * phcis associated with the vhci. */ int mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, void *arg, dev_info_t **child) { int rv = MDI_SUCCESS; /* * While bus configuring phcis, the phci driver interactions with MDI * cause child nodes to be enumerated under the vhci node for which * they need to ndi_devi_enter the vhci node. * * Unfortunately, to avoid the deadlock, we ourself can not wait for * for the bus config operations on phcis to finish while holding the * ndi_devi_enter lock. To avoid this deadlock, skip bus configs on * phcis and call the default framework provided bus config function * if we are called with ndi_devi_enter lock held. */ if (DEVI_BUSY_OWNED(vdip)) { MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: vhci bus config: vhci dip is busy owned\n")); goto default_bus_config; } switch (op) { case BUS_CONFIG_ONE: /* * First try to directly configure the requested child. * This will work only if the requested child has already * been enumerated under vhci, which is usually the most common * case. */ if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == NDI_SUCCESS) { return (MDI_SUCCESS); } MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: BUS_CONFIG_ONE on %s: " "will do BUS_CONFIG_ALL on all phcis\n", (char *)arg)); /* now do BUS_CONFIG_ALL on all phcis */ rv = i_mdi_bus_config_all_phcis(vdip, flags, BUS_CONFIG_ALL, -1, 1); break; case BUS_CONFIG_DRIVER: rv = i_mdi_bus_config_all_phcis(vdip, flags, op, (major_t)(uintptr_t)arg, 0); break; case BUS_CONFIG_ALL: rv = i_mdi_bus_config_all_phcis(vdip, flags, op, -1, 0); break; default: break; } default_bus_config: /* * i_mdi_bus_config_all_phcis() guarantees that child nodes are * enumerated under the vhci, but not necessarily attached. * Now configure the appropriate child nodes. */ if (rv == MDI_SUCCESS && ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == NDI_SUCCESS) { return (MDI_SUCCESS); } return (MDI_FAILURE); } void * mdi_client_get_vhci_private(dev_info_t *dip) { ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { mdi_client_t *ct; ct = i_devi_get_client(dip); return (ct->ct_vprivate); } return (NULL); } void mdi_client_set_vhci_private(dev_info_t *dip, void *data) { ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { mdi_client_t *ct; ct = i_devi_get_client(dip); ct->ct_vprivate = data; } } /* * mdi_pi_get_vhci_private(): * Get the vhci private information associated with the * mdi_pathinfo node */ void * mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) { caddr_t vprivate = NULL; if (pip) { vprivate = MDI_PI(pip)->pi_vprivate; } return (vprivate); } /* * mdi_pi_set_vhci_private(): * Set the vhci private information in the mdi_pathinfo node */ void mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) { if (pip) { MDI_PI(pip)->pi_vprivate = priv; } } /* * mdi_phci_get_vhci_private(): * Get the vhci private information associated with the * mdi_phci node */ void * mdi_phci_get_vhci_private(dev_info_t *dip) { ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { mdi_phci_t *ph; ph = i_devi_get_phci(dip); return (ph->ph_vprivate); } return (NULL); } /* * mdi_phci_set_vhci_private(): * Set the vhci private information in the mdi_phci node */ void mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) { ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { mdi_phci_t *ph; ph = i_devi_get_phci(dip); ph->ph_vprivate = priv; } }