/* * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. */ /* * Copyright 2016 Joyent, Inc. * Copyright 2022 MNX Cloud, Inc. */ /* * Overlay device target cache management * * For more information, see the big theory statement in * uts/common/io/overlay/overlay.c */ #include <sys/types.h> #include <sys/ethernet.h> #include <sys/kmem.h> #include <sys/policy.h> #include <sys/sysmacros.h> #include <sys/stream.h> #include <sys/strsun.h> #include <sys/strsubr.h> #include <sys/mac_provider.h> #include <sys/mac_client.h> #include <sys/mac_client_priv.h> #include <sys/vlan.h> #include <sys/crc32.h> #include <sys/cred.h> #include <sys/file.h> #include <sys/errno.h> #include <sys/ddi.h> #include <sys/sunddi.h> #include <sys/overlay_impl.h> #include <sys/sdt.h> /* * This is total straw man, but at least it's a prime number. Here we're * going to have to go through and do a lot of evaluation and understanding as * to how these target caches should grow and shrink, as well as, memory * pressure and evictions. This just gives us a starting point that'll be 'good * enough', until it's not. */ #define OVERLAY_HSIZE 823 /* * We use this data structure to keep track of what requests have been actively * allocated to a given instance so we know what to put back on the pending * list. */ typedef struct overlay_target_hdl { minor_t oth_minor; /* RO */ zoneid_t oth_zoneid; /* RO */ int oth_oflags; /* RO */ list_node_t oth_link; /* overlay_target_lock */ kmutex_t oth_lock; list_t oth_outstanding; /* oth_lock */ } overlay_target_hdl_t; typedef int (*overlay_target_copyin_f)(const void *, void **, size_t *, int); typedef int (*overlay_target_ioctl_f)(overlay_target_hdl_t *, void *); typedef int (*overlay_target_copyout_f)(void *, void *, size_t, int); typedef struct overlay_target_ioctl { int oti_cmd; /* ioctl id */ boolean_t oti_write; /* ioctl requires FWRITE */ boolean_t oti_ncopyout; /* copyout data? */ overlay_target_copyin_f oti_copyin; /* copyin func */ overlay_target_ioctl_f oti_func; /* function to call */ overlay_target_copyout_f oti_copyout; /* copyin func */ size_t oti_size; /* size of user level structure */ } overlay_target_ioctl_t; static kmem_cache_t *overlay_target_cache; static kmem_cache_t *overlay_entry_cache; static id_space_t *overlay_thdl_idspace; static void *overlay_thdl_state; /* * When we support overlay devices in the NGZ, then all of these need to become * zone aware, by plugging into the netstack engine and becoming per-netstack * data. */ static list_t overlay_thdl_list; static kmutex_t overlay_target_lock; static kcondvar_t overlay_target_condvar; static list_t overlay_target_list; static boolean_t overlay_target_excl; /* * Outstanding data per hash table entry. */ static int overlay_ent_size = 128 * 1024; /* ARGSUSED */ static int overlay_target_cache_constructor(void *buf, void *arg, int kmflgs) { overlay_target_t *ott = buf; mutex_init(&ott->ott_lock, NULL, MUTEX_DRIVER, NULL); cv_init(&ott->ott_cond, NULL, CV_DRIVER, NULL); return (0); } /* ARGSUSED */ static void overlay_target_cache_destructor(void *buf, void *arg) { overlay_target_t *ott = buf; cv_destroy(&ott->ott_cond); mutex_destroy(&ott->ott_lock); } /* ARGSUSED */ static int overlay_entry_cache_constructor(void *buf, void *arg, int kmflgs) { overlay_target_entry_t *ote = buf; bzero(ote, sizeof (overlay_target_entry_t)); mutex_init(&ote->ote_lock, NULL, MUTEX_DRIVER, NULL); return (0); } /* ARGSUSED */ static void overlay_entry_cache_destructor(void *buf, void *arg) { overlay_target_entry_t *ote = buf; mutex_destroy(&ote->ote_lock); } static uint64_t overlay_mac_hash(const void *v) { uint32_t crc; CRC32(crc, v, ETHERADDRL, -1U, crc32_table); return (crc); } static int overlay_mac_cmp(const void *a, const void *b) { return (bcmp(a, b, ETHERADDRL)); } /* ARGSUSED */ static void overlay_target_entry_dtor(void *arg) { overlay_target_entry_t *ote = arg; ote->ote_flags = 0; bzero(ote->ote_addr, ETHERADDRL); ote->ote_ott = NULL; ote->ote_odd = NULL; freemsgchain(ote->ote_chead); ote->ote_chead = ote->ote_ctail = NULL; ote->ote_mbsize = 0; ote->ote_vtime = 0; kmem_cache_free(overlay_entry_cache, ote); } static int overlay_mac_avl(const void *a, const void *b) { int i; const overlay_target_entry_t *l, *r; l = a; r = b; for (i = 0; i < ETHERADDRL; i++) { if (l->ote_addr[i] > r->ote_addr[i]) return (1); else if (l->ote_addr[i] < r->ote_addr[i]) return (-1); } return (0); } void overlay_target_init(void) { int ret; ret = ddi_soft_state_init(&overlay_thdl_state, sizeof (overlay_target_hdl_t), 1); VERIFY(ret == 0); overlay_target_cache = kmem_cache_create("overlay_target", sizeof (overlay_target_t), 0, overlay_target_cache_constructor, overlay_target_cache_destructor, NULL, NULL, NULL, 0); overlay_entry_cache = kmem_cache_create("overlay_entry", sizeof (overlay_target_entry_t), 0, overlay_entry_cache_constructor, overlay_entry_cache_destructor, NULL, NULL, NULL, 0); mutex_init(&overlay_target_lock, NULL, MUTEX_DRIVER, NULL); cv_init(&overlay_target_condvar, NULL, CV_DRIVER, NULL); list_create(&overlay_target_list, sizeof (overlay_target_entry_t), offsetof(overlay_target_entry_t, ote_qlink)); list_create(&overlay_thdl_list, sizeof (overlay_target_hdl_t), offsetof(overlay_target_hdl_t, oth_link)); overlay_thdl_idspace = id_space_create("overlay_target_minors", 1, INT32_MAX); } void overlay_target_fini(void) { id_space_destroy(overlay_thdl_idspace); list_destroy(&overlay_thdl_list); list_destroy(&overlay_target_list); cv_destroy(&overlay_target_condvar); mutex_destroy(&overlay_target_lock); kmem_cache_destroy(overlay_entry_cache); kmem_cache_destroy(overlay_target_cache); ddi_soft_state_fini(&overlay_thdl_state); } void overlay_target_free(overlay_dev_t *odd) { if (odd->odd_target == NULL) return; if (odd->odd_target->ott_mode == OVERLAY_TARGET_DYNAMIC) { refhash_t *rp = odd->odd_target->ott_u.ott_dyn.ott_dhash; avl_tree_t *ap = &odd->odd_target->ott_u.ott_dyn.ott_tree; overlay_target_entry_t *ote; /* * Our AVL tree and hashtable contain the same elements, * therefore we should just remove it from the tree, but then * delete the entries when we remove them from the hash table * (which happens through the refhash dtor). */ while ((ote = avl_first(ap)) != NULL) avl_remove(ap, ote); avl_destroy(ap); for (ote = refhash_first(rp); ote != NULL; ote = refhash_next(rp, ote)) { refhash_remove(rp, ote); } refhash_destroy(rp); } ASSERT(odd->odd_target->ott_ocount == 0); kmem_cache_free(overlay_target_cache, odd->odd_target); } int overlay_target_busy() { int ret; mutex_enter(&overlay_target_lock); ret = !list_is_empty(&overlay_thdl_list); mutex_exit(&overlay_target_lock); return (ret); } static void overlay_target_queue(overlay_target_entry_t *entry) { mutex_enter(&overlay_target_lock); mutex_enter(&entry->ote_ott->ott_lock); if (entry->ote_ott->ott_flags & OVERLAY_T_TEARDOWN) { mutex_exit(&entry->ote_ott->ott_lock); mutex_exit(&overlay_target_lock); return; } entry->ote_ott->ott_ocount++; mutex_exit(&entry->ote_ott->ott_lock); list_insert_tail(&overlay_target_list, entry); cv_signal(&overlay_target_condvar); mutex_exit(&overlay_target_lock); } void overlay_target_quiesce(overlay_target_t *ott) { if (ott == NULL) return; mutex_enter(&ott->ott_lock); ott->ott_flags |= OVERLAY_T_TEARDOWN; while (ott->ott_ocount != 0) cv_wait(&ott->ott_cond, &ott->ott_lock); mutex_exit(&ott->ott_lock); } /* * This functions assumes that the destination mode is OVERLAY_PLUGIN_D_IP | * OVERLAY_PLUGIN_D_PORT. As we don't have an implementation of anything else at * this time, say for NVGRE, we drop all packets that mcuh this. */ int overlay_target_lookup(overlay_dev_t *odd, mblk_t *mp, struct sockaddr *sock, socklen_t *slenp) { int ret; struct sockaddr_in6 *v6; overlay_target_t *ott; mac_header_info_t mhi; overlay_target_entry_t *entry; ASSERT(odd->odd_target != NULL); /* * At this point, the overlay device is in a mux which means that it's * been activated. At this point, parts of the target, such as the mode * and the destination are now read-only and we don't have to worry * about synchronization for them. */ ott = odd->odd_target; if (ott->ott_dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT)) return (OVERLAY_TARGET_DROP); v6 = (struct sockaddr_in6 *)sock; bzero(v6, sizeof (struct sockaddr_in6)); v6->sin6_family = AF_INET6; if (ott->ott_mode == OVERLAY_TARGET_POINT) { mutex_enter(&ott->ott_lock); bcopy(&ott->ott_u.ott_point.otp_ip, &v6->sin6_addr, sizeof (struct in6_addr)); v6->sin6_port = htons(ott->ott_u.ott_point.otp_port); mutex_exit(&ott->ott_lock); *slenp = sizeof (struct sockaddr_in6); return (OVERLAY_TARGET_OK); } ASSERT(ott->ott_mode == OVERLAY_TARGET_DYNAMIC); /* * Note we only want the MAC address here, therefore we won't bother * using mac_vlan_header_info(). If any caller needs the vlan info at * this point, this should change to a call to mac_vlan_header_info(). */ if (mac_header_info(odd->odd_mh, mp, &mhi) != 0) return (OVERLAY_TARGET_DROP); mutex_enter(&ott->ott_lock); entry = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash, mhi.mhi_daddr); if (entry == NULL) { entry = kmem_cache_alloc(overlay_entry_cache, KM_NOSLEEP_LAZY); if (entry == NULL) { mutex_exit(&ott->ott_lock); return (OVERLAY_TARGET_DROP); } bcopy(mhi.mhi_daddr, entry->ote_addr, ETHERADDRL); entry->ote_chead = entry->ote_ctail = mp; entry->ote_mbsize = msgsize(mp); entry->ote_flags |= OVERLAY_ENTRY_F_PENDING; entry->ote_ott = ott; entry->ote_odd = odd; refhash_insert(ott->ott_u.ott_dyn.ott_dhash, entry); avl_add(&ott->ott_u.ott_dyn.ott_tree, entry); mutex_exit(&ott->ott_lock); overlay_target_queue(entry); return (OVERLAY_TARGET_ASYNC); } refhash_hold(ott->ott_u.ott_dyn.ott_dhash, entry); mutex_exit(&ott->ott_lock); mutex_enter(&entry->ote_lock); if (entry->ote_flags & OVERLAY_ENTRY_F_DROP) { ret = OVERLAY_TARGET_DROP; } else if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) { bcopy(&entry->ote_dest.otp_ip, &v6->sin6_addr, sizeof (struct in6_addr)); v6->sin6_port = htons(entry->ote_dest.otp_port); *slenp = sizeof (struct sockaddr_in6); ret = OVERLAY_TARGET_OK; } else { size_t mlen = msgsize(mp); if (mlen + entry->ote_mbsize > overlay_ent_size) { ret = OVERLAY_TARGET_DROP; } else { if (entry->ote_ctail != NULL) { ASSERT(entry->ote_ctail->b_next == NULL); entry->ote_ctail->b_next = mp; entry->ote_ctail = mp; } else { entry->ote_chead = mp; entry->ote_ctail = mp; } entry->ote_mbsize += mlen; if ((entry->ote_flags & OVERLAY_ENTRY_F_PENDING) == 0) { entry->ote_flags |= OVERLAY_ENTRY_F_PENDING; overlay_target_queue(entry); } ret = OVERLAY_TARGET_ASYNC; } } mutex_exit(&entry->ote_lock); mutex_enter(&ott->ott_lock); refhash_rele(ott->ott_u.ott_dyn.ott_dhash, entry); mutex_exit(&ott->ott_lock); return (ret); } /* ARGSUSED */ static int overlay_target_info(overlay_target_hdl_t *thdl, void *arg) { overlay_dev_t *odd; overlay_targ_info_t *oti = arg; odd = overlay_hold_by_dlid(oti->oti_linkid); if (odd == NULL) return (ENOENT); mutex_enter(&odd->odd_lock); oti->oti_flags = 0; oti->oti_needs = odd->odd_plugin->ovp_dest; if (odd->odd_flags & OVERLAY_F_DEGRADED) oti->oti_flags |= OVERLAY_TARG_INFO_F_DEGRADED; if (odd->odd_flags & OVERLAY_F_ACTIVATED) oti->oti_flags |= OVERLAY_TARG_INFO_F_ACTIVE; oti->oti_vnetid = odd->odd_vid; mutex_exit(&odd->odd_lock); overlay_hold_rele(odd); return (0); } /* ARGSUSED */ static int overlay_target_associate(overlay_target_hdl_t *thdl, void *arg) { overlay_dev_t *odd; overlay_target_t *ott; overlay_targ_associate_t *ota = arg; odd = overlay_hold_by_dlid(ota->ota_linkid); if (odd == NULL) return (ENOENT); if (ota->ota_id == 0) { overlay_hold_rele(odd); return (EINVAL); } if (ota->ota_mode != OVERLAY_TARGET_POINT && ota->ota_mode != OVERLAY_TARGET_DYNAMIC) { overlay_hold_rele(odd); return (EINVAL); } if (ota->ota_provides != odd->odd_plugin->ovp_dest) { overlay_hold_rele(odd); return (EINVAL); } if (ota->ota_mode == OVERLAY_TARGET_POINT) { if (ota->ota_provides & OVERLAY_PLUGIN_D_IP) { if (IN6_IS_ADDR_UNSPECIFIED(&ota->ota_point.otp_ip) || IN6_IS_ADDR_V4COMPAT(&ota->ota_point.otp_ip) || IN6_IS_ADDR_V4MAPPED_ANY(&ota->ota_point.otp_ip)) { overlay_hold_rele(odd); return (EINVAL); } } if (ota->ota_provides & OVERLAY_PLUGIN_D_PORT) { if (ota->ota_point.otp_port == 0) { overlay_hold_rele(odd); return (EINVAL); } } } ott = kmem_cache_alloc(overlay_target_cache, KM_SLEEP); ott->ott_flags = 0; ott->ott_ocount = 0; ott->ott_mode = ota->ota_mode; ott->ott_dest = ota->ota_provides; ott->ott_id = ota->ota_id; if (ott->ott_mode == OVERLAY_TARGET_POINT) { bcopy(&ota->ota_point, &ott->ott_u.ott_point, sizeof (overlay_target_point_t)); } else { ott->ott_u.ott_dyn.ott_dhash = refhash_create(OVERLAY_HSIZE, overlay_mac_hash, overlay_mac_cmp, overlay_target_entry_dtor, sizeof (overlay_target_entry_t), offsetof(overlay_target_entry_t, ote_reflink), offsetof(overlay_target_entry_t, ote_addr), KM_SLEEP); avl_create(&ott->ott_u.ott_dyn.ott_tree, overlay_mac_avl, sizeof (overlay_target_entry_t), offsetof(overlay_target_entry_t, ote_avllink)); } mutex_enter(&odd->odd_lock); if (odd->odd_flags & OVERLAY_F_VARPD) { mutex_exit(&odd->odd_lock); kmem_cache_free(overlay_target_cache, ott); overlay_hold_rele(odd); return (EEXIST); } odd->odd_flags |= OVERLAY_F_VARPD; odd->odd_target = ott; mutex_exit(&odd->odd_lock); overlay_hold_rele(odd); return (0); } /* ARGSUSED */ static int overlay_target_degrade(overlay_target_hdl_t *thdl, void *arg) { overlay_dev_t *odd; overlay_targ_degrade_t *otd = arg; odd = overlay_hold_by_dlid(otd->otd_linkid); if (odd == NULL) return (ENOENT); overlay_fm_degrade(odd, otd->otd_buf); overlay_hold_rele(odd); return (0); } /* ARGSUSED */ static int overlay_target_restore(overlay_target_hdl_t *thdl, void *arg) { overlay_dev_t *odd; overlay_targ_id_t *otid = arg; odd = overlay_hold_by_dlid(otid->otid_linkid); if (odd == NULL) return (ENOENT); overlay_fm_restore(odd); overlay_hold_rele(odd); return (0); } /* ARGSUSED */ static int overlay_target_disassociate(overlay_target_hdl_t *thdl, void *arg) { overlay_dev_t *odd; overlay_targ_id_t *otid = arg; odd = overlay_hold_by_dlid(otid->otid_linkid); if (odd == NULL) return (ENOENT); mutex_enter(&odd->odd_lock); odd->odd_flags &= ~OVERLAY_F_VARPD; mutex_exit(&odd->odd_lock); overlay_hold_rele(odd); return (0); } static int overlay_target_lookup_request(overlay_target_hdl_t *thdl, void *arg) { overlay_targ_lookup_t *otl = arg; overlay_target_entry_t *entry; clock_t ret, timeout; mac_header_info_t mhi; timeout = ddi_get_lbolt() + drv_usectohz(MICROSEC); again: mutex_enter(&overlay_target_lock); while (list_is_empty(&overlay_target_list)) { ret = cv_timedwait(&overlay_target_condvar, &overlay_target_lock, timeout); if (ret == -1) { mutex_exit(&overlay_target_lock); return (ETIME); } } entry = list_remove_head(&overlay_target_list); mutex_exit(&overlay_target_lock); mutex_enter(&entry->ote_lock); if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) { ASSERT(entry->ote_chead == NULL); mutex_exit(&entry->ote_lock); goto again; } ASSERT(entry->ote_chead != NULL); /* * If we have a bogon that doesn't have a valid mac header, drop it and * try again. */ if (mac_vlan_header_info(entry->ote_odd->odd_mh, entry->ote_chead, &mhi) != 0) { boolean_t queue = B_FALSE; mblk_t *mp = entry->ote_chead; entry->ote_chead = mp->b_next; mp->b_next = NULL; if (entry->ote_ctail == mp) entry->ote_ctail = entry->ote_chead; entry->ote_mbsize -= msgsize(mp); if (entry->ote_chead != NULL) queue = B_TRUE; mutex_exit(&entry->ote_lock); if (queue == B_TRUE) overlay_target_queue(entry); freemsg(mp); goto again; } otl->otl_dlid = entry->ote_odd->odd_linkid; otl->otl_reqid = (uintptr_t)entry; otl->otl_varpdid = entry->ote_ott->ott_id; otl->otl_vnetid = entry->ote_odd->odd_vid; otl->otl_hdrsize = mhi.mhi_hdrsize; otl->otl_pktsize = msgsize(entry->ote_chead) - otl->otl_hdrsize; bcopy(mhi.mhi_daddr, otl->otl_dstaddr, ETHERADDRL); bcopy(mhi.mhi_saddr, otl->otl_srcaddr, ETHERADDRL); otl->otl_dsttype = mhi.mhi_dsttype; otl->otl_sap = mhi.mhi_bindsap; otl->otl_vlan = VLAN_ID(mhi.mhi_tci); mutex_exit(&entry->ote_lock); mutex_enter(&thdl->oth_lock); list_insert_tail(&thdl->oth_outstanding, entry); mutex_exit(&thdl->oth_lock); return (0); } static int overlay_target_lookup_respond(overlay_target_hdl_t *thdl, void *arg) { const overlay_targ_resp_t *otr = arg; overlay_target_entry_t *entry; mblk_t *mp; mutex_enter(&thdl->oth_lock); for (entry = list_head(&thdl->oth_outstanding); entry != NULL; entry = list_next(&thdl->oth_outstanding, entry)) { if ((uintptr_t)entry == otr->otr_reqid) break; } if (entry == NULL) { mutex_exit(&thdl->oth_lock); return (EINVAL); } list_remove(&thdl->oth_outstanding, entry); mutex_exit(&thdl->oth_lock); mutex_enter(&entry->ote_lock); bcopy(&otr->otr_answer, &entry->ote_dest, sizeof (overlay_target_point_t)); entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING; entry->ote_flags |= OVERLAY_ENTRY_F_VALID; mp = entry->ote_chead; entry->ote_chead = NULL; entry->ote_ctail = NULL; entry->ote_mbsize = 0; entry->ote_vtime = gethrtime(); mutex_exit(&entry->ote_lock); /* * For now do an in-situ drain. */ mp = overlay_m_tx(entry->ote_odd, mp); freemsgchain(mp); mutex_enter(&entry->ote_ott->ott_lock); entry->ote_ott->ott_ocount--; cv_signal(&entry->ote_ott->ott_cond); mutex_exit(&entry->ote_ott->ott_lock); return (0); } static int overlay_target_lookup_drop(overlay_target_hdl_t *thdl, void *arg) { const overlay_targ_resp_t *otr = arg; overlay_target_entry_t *entry; mblk_t *mp; boolean_t queue = B_FALSE; mutex_enter(&thdl->oth_lock); for (entry = list_head(&thdl->oth_outstanding); entry != NULL; entry = list_next(&thdl->oth_outstanding, entry)) { if ((uintptr_t)entry == otr->otr_reqid) break; } if (entry == NULL) { mutex_exit(&thdl->oth_lock); return (EINVAL); } list_remove(&thdl->oth_outstanding, entry); mutex_exit(&thdl->oth_lock); mutex_enter(&entry->ote_lock); /* Safeguard against a confused varpd */ if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) { entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING; DTRACE_PROBE1(overlay__target__valid__drop, overlay_target_entry_t *, entry); mutex_exit(&entry->ote_lock); goto done; } mp = entry->ote_chead; if (mp != NULL) { entry->ote_chead = mp->b_next; mp->b_next = NULL; if (entry->ote_ctail == mp) entry->ote_ctail = entry->ote_chead; entry->ote_mbsize -= msgsize(mp); } if (entry->ote_chead != NULL) { queue = B_TRUE; entry->ote_flags |= OVERLAY_ENTRY_F_PENDING; } else { entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING; } mutex_exit(&entry->ote_lock); if (queue == B_TRUE) overlay_target_queue(entry); freemsg(mp); done: mutex_enter(&entry->ote_ott->ott_lock); entry->ote_ott->ott_ocount--; cv_signal(&entry->ote_ott->ott_cond); mutex_exit(&entry->ote_ott->ott_lock); return (0); } /* ARGSUSED */ static int overlay_target_pkt_copyin(const void *ubuf, void **outp, size_t *bsize, int flags) { overlay_targ_pkt_t *pkt; overlay_targ_pkt32_t *pkt32; pkt = kmem_alloc(sizeof (overlay_targ_pkt_t), KM_SLEEP); *outp = pkt; *bsize = sizeof (overlay_targ_pkt_t); if (ddi_model_convert_from(flags & FMODELS) == DDI_MODEL_ILP32) { uintptr_t addr; if (ddi_copyin(ubuf, pkt, sizeof (overlay_targ_pkt32_t), flags & FKIOCTL) != 0) { kmem_free(pkt, *bsize); return (EFAULT); } pkt32 = (overlay_targ_pkt32_t *)pkt; addr = pkt32->otp_buf; pkt->otp_buf = (void *)addr; } else { if (ddi_copyin(ubuf, pkt, *bsize, flags & FKIOCTL) != 0) { kmem_free(pkt, *bsize); return (EFAULT); } } return (0); } static int overlay_target_pkt_copyout(void *ubuf, void *buf, size_t bufsize, int flags) { if (ddi_model_convert_from(flags & FMODELS) == DDI_MODEL_ILP32) { overlay_targ_pkt_t *pkt = buf; overlay_targ_pkt32_t *pkt32 = buf; uintptr_t addr = (uintptr_t)pkt->otp_buf; pkt32->otp_buf = (caddr32_t)addr; if (ddi_copyout(buf, ubuf, sizeof (overlay_targ_pkt32_t), flags & FKIOCTL) != 0) return (EFAULT); } else { if (ddi_copyout(buf, ubuf, bufsize, flags & FKIOCTL) != 0) return (EFAULT); } return (0); } static int overlay_target_packet(overlay_target_hdl_t *thdl, void *arg) { overlay_targ_pkt_t *pkt = arg; overlay_target_entry_t *entry; mblk_t *mp; size_t mlen; size_t boff; mutex_enter(&thdl->oth_lock); for (entry = list_head(&thdl->oth_outstanding); entry != NULL; entry = list_next(&thdl->oth_outstanding, entry)) { if ((uintptr_t)entry == pkt->otp_reqid) break; } if (entry == NULL) { mutex_exit(&thdl->oth_lock); return (EINVAL); } mutex_enter(&entry->ote_lock); mutex_exit(&thdl->oth_lock); mp = entry->ote_chead; /* Protect against a rogue varpd */ if (mp == NULL) { mutex_exit(&entry->ote_lock); return (EINVAL); } mlen = MIN(msgsize(mp), pkt->otp_size); pkt->otp_size = mlen; boff = 0; while (mlen > 0) { size_t wlen = MIN(MBLKL(mp), mlen); if (ddi_copyout(mp->b_rptr, (void *)((uintptr_t)pkt->otp_buf + boff), wlen, 0) != 0) { mutex_exit(&entry->ote_lock); return (EFAULT); } mlen -= wlen; boff += wlen; mp = mp->b_cont; } mutex_exit(&entry->ote_lock); return (0); } static int overlay_target_inject(overlay_target_hdl_t *thdl, void *arg) { overlay_targ_pkt_t *pkt = arg; overlay_target_entry_t *entry; overlay_dev_t *odd; mblk_t *mp; if (pkt->otp_size > ETHERMAX + VLAN_TAGSZ) return (EINVAL); mp = allocb(pkt->otp_size, 0); if (mp == NULL) return (ENOMEM); if (ddi_copyin(pkt->otp_buf, mp->b_rptr, pkt->otp_size, 0) != 0) { freeb(mp); return (EFAULT); } mp->b_wptr += pkt->otp_size; if (pkt->otp_linkid != UINT64_MAX) { odd = overlay_hold_by_dlid(pkt->otp_linkid); if (odd == NULL) { freeb(mp); return (ENOENT); } } else { mutex_enter(&thdl->oth_lock); for (entry = list_head(&thdl->oth_outstanding); entry != NULL; entry = list_next(&thdl->oth_outstanding, entry)) { if ((uintptr_t)entry == pkt->otp_reqid) break; } if (entry == NULL) { mutex_exit(&thdl->oth_lock); freeb(mp); return (ENOENT); } odd = entry->ote_odd; mutex_exit(&thdl->oth_lock); } mutex_enter(&odd->odd_lock); if ((odd->odd_flags & OVERLAY_F_MDDROP) || !(odd->odd_flags & OVERLAY_F_IN_MUX)) { /* Can't do receive... */ mutex_exit(&odd->odd_lock); OVERLAY_FREEMSG(mp, "dev dropped"); freeb(mp); return (EBUSY); } overlay_io_start(odd, OVERLAY_F_IN_RX); mutex_exit(&odd->odd_lock); mac_rx(odd->odd_mh, NULL, mp); mutex_enter(&odd->odd_lock); overlay_io_done(odd, OVERLAY_F_IN_RX); mutex_exit(&odd->odd_lock); return (0); } static int overlay_target_resend(overlay_target_hdl_t *thdl, void *arg) { overlay_targ_pkt_t *pkt = arg; overlay_target_entry_t *entry; overlay_dev_t *odd; mblk_t *mp; if (pkt->otp_size > ETHERMAX + VLAN_TAGSZ) return (EINVAL); mp = allocb(pkt->otp_size, 0); if (mp == NULL) return (ENOMEM); if (ddi_copyin(pkt->otp_buf, mp->b_rptr, pkt->otp_size, 0) != 0) { freeb(mp); return (EFAULT); } mp->b_wptr += pkt->otp_size; if (pkt->otp_linkid != UINT64_MAX) { odd = overlay_hold_by_dlid(pkt->otp_linkid); if (odd == NULL) { freeb(mp); return (ENOENT); } } else { mutex_enter(&thdl->oth_lock); for (entry = list_head(&thdl->oth_outstanding); entry != NULL; entry = list_next(&thdl->oth_outstanding, entry)) { if ((uintptr_t)entry == pkt->otp_reqid) break; } if (entry == NULL) { mutex_exit(&thdl->oth_lock); freeb(mp); return (ENOENT); } odd = entry->ote_odd; mutex_exit(&thdl->oth_lock); } mp = overlay_m_tx(odd, mp); freemsgchain(mp); return (0); } typedef struct overlay_targ_list_int { boolean_t otli_count; uint32_t otli_cur; uint32_t otli_nents; uint32_t otli_ents[]; } overlay_targ_list_int_t; static int overlay_target_list_copyin(const void *ubuf, void **outp, size_t *bsize, int flags) { overlay_targ_list_t n; overlay_targ_list_int_t *otl; if (ddi_copyin(ubuf, &n, sizeof (overlay_targ_list_t), flags & FKIOCTL) != 0) return (EFAULT); /* */ if (n.otl_nents >= INT32_MAX / sizeof (uint32_t)) return (EINVAL); *bsize = sizeof (overlay_targ_list_int_t) + sizeof (uint32_t) * n.otl_nents; otl = kmem_zalloc(*bsize, KM_SLEEP); otl->otli_cur = 0; otl->otli_nents = n.otl_nents; if (otl->otli_nents != 0) { otl->otli_count = B_FALSE; if (ddi_copyin((void *)((uintptr_t)ubuf + offsetof(overlay_targ_list_t, otl_ents)), otl->otli_ents, n.otl_nents * sizeof (uint32_t), flags & FKIOCTL) != 0) { kmem_free(otl, *bsize); return (EFAULT); } } else { otl->otli_count = B_TRUE; } *outp = otl; return (0); } static int overlay_target_ioctl_list_cb(overlay_dev_t *odd, void *arg) { overlay_targ_list_int_t *otl = arg; if (otl->otli_cur < otl->otli_nents) otl->otli_ents[otl->otli_cur] = odd->odd_linkid; otl->otli_cur++; return (0); } /* ARGSUSED */ static int overlay_target_ioctl_list(overlay_target_hdl_t *thdl, void *arg) { overlay_dev_iter(overlay_target_ioctl_list_cb, arg); return (0); } /* ARGSUSED */ static int overlay_target_list_copyout(void *ubuf, void *buf, size_t bufsize, int flags) { overlay_targ_list_int_t *otl = buf; if (ddi_copyout(&otl->otli_cur, ubuf, sizeof (uint32_t), flags & FKIOCTL) != 0) return (EFAULT); if (otl->otli_count == B_FALSE) { if (ddi_copyout(otl->otli_ents, (void *)((uintptr_t)ubuf + offsetof(overlay_targ_list_t, otl_ents)), sizeof (uint32_t) * otl->otli_nents, flags & FKIOCTL) != 0) return (EFAULT); } return (0); } /* ARGSUSED */ static int overlay_target_cache_get(overlay_target_hdl_t *thdl, void *arg) { int ret = 0; overlay_dev_t *odd; overlay_target_t *ott; overlay_targ_cache_t *otc = arg; odd = overlay_hold_by_dlid(otc->otc_linkid); if (odd == NULL) return (ENOENT); mutex_enter(&odd->odd_lock); if (!(odd->odd_flags & OVERLAY_F_VARPD)) { mutex_exit(&odd->odd_lock); overlay_hold_rele(odd); return (ENXIO); } ott = odd->odd_target; if (ott->ott_mode != OVERLAY_TARGET_POINT && ott->ott_mode != OVERLAY_TARGET_DYNAMIC) { mutex_exit(&odd->odd_lock); overlay_hold_rele(odd); return (ENOTSUP); } mutex_enter(&ott->ott_lock); mutex_exit(&odd->odd_lock); if (ott->ott_mode == OVERLAY_TARGET_POINT) { otc->otc_entry.otce_flags = 0; bcopy(&ott->ott_u.ott_point, &otc->otc_entry.otce_dest, sizeof (overlay_target_point_t)); } else { overlay_target_entry_t *ote; ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash, otc->otc_entry.otce_mac); if (ote != NULL) { mutex_enter(&ote->ote_lock); if ((ote->ote_flags & OVERLAY_ENTRY_F_VALID_MASK) != 0) { if (ote->ote_flags & OVERLAY_ENTRY_F_DROP) { otc->otc_entry.otce_flags = OVERLAY_TARGET_CACHE_DROP; } else { otc->otc_entry.otce_flags = 0; bcopy(&ote->ote_dest, &otc->otc_entry.otce_dest, sizeof (overlay_target_point_t)); } ret = 0; } else { ret = ENOENT; } mutex_exit(&ote->ote_lock); } else { ret = ENOENT; } } mutex_exit(&ott->ott_lock); overlay_hold_rele(odd); return (ret); } /* ARGSUSED */ static int overlay_target_cache_set(overlay_target_hdl_t *thdl, void *arg) { overlay_dev_t *odd; overlay_target_t *ott; overlay_target_entry_t *ote; overlay_targ_cache_t *otc = arg; mblk_t *mp = NULL; if (otc->otc_entry.otce_flags & ~OVERLAY_TARGET_CACHE_DROP) return (EINVAL); odd = overlay_hold_by_dlid(otc->otc_linkid); if (odd == NULL) return (ENOENT); mutex_enter(&odd->odd_lock); if (!(odd->odd_flags & OVERLAY_F_VARPD)) { mutex_exit(&odd->odd_lock); overlay_hold_rele(odd); return (ENXIO); } ott = odd->odd_target; if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) { mutex_exit(&odd->odd_lock); overlay_hold_rele(odd); return (ENOTSUP); } mutex_enter(&ott->ott_lock); mutex_exit(&odd->odd_lock); ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash, otc->otc_entry.otce_mac); if (ote == NULL) { ote = kmem_cache_alloc(overlay_entry_cache, KM_SLEEP); bcopy(otc->otc_entry.otce_mac, ote->ote_addr, ETHERADDRL); ote->ote_chead = ote->ote_ctail = NULL; ote->ote_mbsize = 0; ote->ote_ott = ott; ote->ote_odd = odd; mutex_enter(&ote->ote_lock); refhash_insert(ott->ott_u.ott_dyn.ott_dhash, ote); avl_add(&ott->ott_u.ott_dyn.ott_tree, ote); } else { mutex_enter(&ote->ote_lock); } if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_DROP) { ote->ote_flags |= OVERLAY_ENTRY_F_DROP; } else { ote->ote_flags |= OVERLAY_ENTRY_F_VALID; bcopy(&otc->otc_entry.otce_dest, &ote->ote_dest, sizeof (overlay_target_point_t)); mp = ote->ote_chead; ote->ote_chead = NULL; ote->ote_ctail = NULL; ote->ote_mbsize = 0; ote->ote_vtime = gethrtime(); } mutex_exit(&ote->ote_lock); mutex_exit(&ott->ott_lock); if (mp != NULL) { mp = overlay_m_tx(ote->ote_odd, mp); freemsgchain(mp); } overlay_hold_rele(odd); return (0); } /* ARGSUSED */ static int overlay_target_cache_remove(overlay_target_hdl_t *thdl, void *arg) { int ret = 0; overlay_dev_t *odd; overlay_target_t *ott; overlay_target_entry_t *ote; overlay_targ_cache_t *otc = arg; odd = overlay_hold_by_dlid(otc->otc_linkid); if (odd == NULL) return (ENOENT); mutex_enter(&odd->odd_lock); if (!(odd->odd_flags & OVERLAY_F_VARPD)) { mutex_exit(&odd->odd_lock); overlay_hold_rele(odd); return (ENXIO); } ott = odd->odd_target; if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) { mutex_exit(&odd->odd_lock); overlay_hold_rele(odd); return (ENOTSUP); } mutex_enter(&ott->ott_lock); mutex_exit(&odd->odd_lock); ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash, otc->otc_entry.otce_mac); if (ote != NULL) { mutex_enter(&ote->ote_lock); ote->ote_flags &= ~OVERLAY_ENTRY_F_VALID_MASK; mutex_exit(&ote->ote_lock); ret = 0; } else { ret = ENOENT; } mutex_exit(&ott->ott_lock); overlay_hold_rele(odd); return (ret); } /* ARGSUSED */ static int overlay_target_cache_flush(overlay_target_hdl_t *thdl, void *arg) { avl_tree_t *avl; overlay_dev_t *odd; overlay_target_t *ott; overlay_target_entry_t *ote; overlay_targ_cache_t *otc = arg; odd = overlay_hold_by_dlid(otc->otc_linkid); if (odd == NULL) return (ENOENT); mutex_enter(&odd->odd_lock); if (!(odd->odd_flags & OVERLAY_F_VARPD)) { mutex_exit(&odd->odd_lock); overlay_hold_rele(odd); return (ENXIO); } ott = odd->odd_target; if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) { mutex_exit(&odd->odd_lock); overlay_hold_rele(odd); return (ENOTSUP); } mutex_enter(&ott->ott_lock); mutex_exit(&odd->odd_lock); avl = &ott->ott_u.ott_dyn.ott_tree; for (ote = avl_first(avl); ote != NULL; ote = AVL_NEXT(avl, ote)) { mutex_enter(&ote->ote_lock); ote->ote_flags &= ~OVERLAY_ENTRY_F_VALID_MASK; mutex_exit(&ote->ote_lock); } ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash, otc->otc_entry.otce_mac); mutex_exit(&ott->ott_lock); overlay_hold_rele(odd); return (0); } static int overlay_target_cache_iter_copyin(const void *ubuf, void **outp, size_t *bsize, int flags) { overlay_targ_cache_iter_t base, *iter; if (ddi_copyin(ubuf, &base, sizeof (overlay_targ_cache_iter_t), flags & FKIOCTL) != 0) return (EFAULT); if (base.otci_count > OVERLAY_TARGET_ITER_MAX) return (E2BIG); if (base.otci_count == 0) return (EINVAL); *bsize = sizeof (overlay_targ_cache_iter_t) + base.otci_count * sizeof (overlay_targ_cache_entry_t); iter = kmem_alloc(*bsize, KM_SLEEP); bcopy(&base, iter, sizeof (overlay_targ_cache_iter_t)); *outp = iter; return (0); } typedef struct overlay_targ_cache_marker { uint8_t otcm_mac[ETHERADDRL]; uint16_t otcm_done; } overlay_targ_cache_marker_t; /* ARGSUSED */ static int overlay_target_cache_iter(overlay_target_hdl_t *thdl, void *arg) { overlay_dev_t *odd; overlay_target_t *ott; overlay_target_entry_t lookup, *ent; overlay_targ_cache_marker_t *mark; avl_index_t where; avl_tree_t *avl; uint16_t written = 0; overlay_targ_cache_iter_t *iter = arg; mark = (void *)&iter->otci_marker; if (mark->otcm_done != 0) { iter->otci_count = 0; return (0); } odd = overlay_hold_by_dlid(iter->otci_linkid); if (odd == NULL) return (ENOENT); mutex_enter(&odd->odd_lock); if (!(odd->odd_flags & OVERLAY_F_VARPD)) { mutex_exit(&odd->odd_lock); overlay_hold_rele(odd); return (ENXIO); } ott = odd->odd_target; if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC && ott->ott_mode != OVERLAY_TARGET_POINT) { mutex_exit(&odd->odd_lock); overlay_hold_rele(odd); return (ENOTSUP); } /* * Holding this lock across the entire iteration probably isn't very * good. We should perhaps add an r/w lock for the avl tree. But we'll * wait until we now it's necessary before we do more. */ mutex_enter(&ott->ott_lock); mutex_exit(&odd->odd_lock); if (ott->ott_mode == OVERLAY_TARGET_POINT) { overlay_targ_cache_entry_t *out = &iter->otci_ents[0]; bzero(out->otce_mac, ETHERADDRL); out->otce_flags = 0; bcopy(&ott->ott_u.ott_point, &out->otce_dest, sizeof (overlay_target_point_t)); written++; mark->otcm_done = 1; } avl = &ott->ott_u.ott_dyn.ott_tree; bcopy(mark->otcm_mac, lookup.ote_addr, ETHERADDRL); ent = avl_find(avl, &lookup, &where); /* * NULL ent means that the entry does not exist, so we want to start * with the closest node in the tree. This means that we implicitly rely * on the tree's order and the first node will be the mac 00:00:00:00:00 * and the last will be ff:ff:ff:ff:ff:ff. */ if (ent == NULL) { ent = avl_nearest(avl, where, AVL_AFTER); if (ent == NULL) { mark->otcm_done = 1; goto done; } } for (; ent != NULL && written < iter->otci_count; ent = AVL_NEXT(avl, ent)) { overlay_targ_cache_entry_t *out = &iter->otci_ents[written]; mutex_enter(&ent->ote_lock); if ((ent->ote_flags & OVERLAY_ENTRY_F_VALID_MASK) == 0) { mutex_exit(&ent->ote_lock); continue; } bcopy(ent->ote_addr, out->otce_mac, ETHERADDRL); out->otce_flags = 0; if (ent->ote_flags & OVERLAY_ENTRY_F_DROP) out->otce_flags |= OVERLAY_TARGET_CACHE_DROP; if (ent->ote_flags & OVERLAY_ENTRY_F_VALID) bcopy(&ent->ote_dest, &out->otce_dest, sizeof (overlay_target_point_t)); written++; mutex_exit(&ent->ote_lock); } if (ent != NULL) { bcopy(ent->ote_addr, mark->otcm_mac, ETHERADDRL); } else { mark->otcm_done = 1; } done: iter->otci_count = written; mutex_exit(&ott->ott_lock); overlay_hold_rele(odd); return (0); } /* ARGSUSED */ static int overlay_target_cache_iter_copyout(void *ubuf, void *buf, size_t bufsize, int flags) { size_t outsize; const overlay_targ_cache_iter_t *iter = buf; outsize = sizeof (overlay_targ_cache_iter_t) + iter->otci_count * sizeof (overlay_targ_cache_entry_t); if (ddi_copyout(buf, ubuf, outsize, flags & FKIOCTL) != 0) return (EFAULT); return (0); } static overlay_target_ioctl_t overlay_target_ioctab[] = { { OVERLAY_TARG_INFO, B_TRUE, B_TRUE, NULL, overlay_target_info, NULL, sizeof (overlay_targ_info_t) }, { OVERLAY_TARG_ASSOCIATE, B_TRUE, B_FALSE, NULL, overlay_target_associate, NULL, sizeof (overlay_targ_associate_t) }, { OVERLAY_TARG_DISASSOCIATE, B_TRUE, B_FALSE, NULL, overlay_target_disassociate, NULL, sizeof (overlay_targ_id_t) }, { OVERLAY_TARG_DEGRADE, B_TRUE, B_FALSE, NULL, overlay_target_degrade, NULL, sizeof (overlay_targ_degrade_t) }, { OVERLAY_TARG_RESTORE, B_TRUE, B_FALSE, NULL, overlay_target_restore, NULL, sizeof (overlay_targ_id_t) }, { OVERLAY_TARG_LOOKUP, B_FALSE, B_TRUE, NULL, overlay_target_lookup_request, NULL, sizeof (overlay_targ_lookup_t) }, { OVERLAY_TARG_RESPOND, B_TRUE, B_FALSE, NULL, overlay_target_lookup_respond, NULL, sizeof (overlay_targ_resp_t) }, { OVERLAY_TARG_DROP, B_TRUE, B_FALSE, NULL, overlay_target_lookup_drop, NULL, sizeof (overlay_targ_resp_t) }, { OVERLAY_TARG_PKT, B_TRUE, B_TRUE, overlay_target_pkt_copyin, overlay_target_packet, overlay_target_pkt_copyout, sizeof (overlay_targ_pkt_t) }, { OVERLAY_TARG_INJECT, B_TRUE, B_FALSE, overlay_target_pkt_copyin, overlay_target_inject, NULL, sizeof (overlay_targ_pkt_t) }, { OVERLAY_TARG_RESEND, B_TRUE, B_FALSE, overlay_target_pkt_copyin, overlay_target_resend, NULL, sizeof (overlay_targ_pkt_t) }, { OVERLAY_TARG_LIST, B_FALSE, B_TRUE, overlay_target_list_copyin, overlay_target_ioctl_list, overlay_target_list_copyout, sizeof (overlay_targ_list_t) }, { OVERLAY_TARG_CACHE_GET, B_FALSE, B_TRUE, NULL, overlay_target_cache_get, NULL, sizeof (overlay_targ_cache_t) }, { OVERLAY_TARG_CACHE_SET, B_TRUE, B_TRUE, NULL, overlay_target_cache_set, NULL, sizeof (overlay_targ_cache_t) }, { OVERLAY_TARG_CACHE_REMOVE, B_TRUE, B_TRUE, NULL, overlay_target_cache_remove, NULL, sizeof (overlay_targ_cache_t) }, { OVERLAY_TARG_CACHE_FLUSH, B_TRUE, B_TRUE, NULL, overlay_target_cache_flush, NULL, sizeof (overlay_targ_cache_t) }, { OVERLAY_TARG_CACHE_ITER, B_FALSE, B_TRUE, overlay_target_cache_iter_copyin, overlay_target_cache_iter, overlay_target_cache_iter_copyout, sizeof (overlay_targ_cache_iter_t) }, { 0 } }; int overlay_target_open(dev_t *devp, int flags, int otype, cred_t *credp) { minor_t mid; overlay_target_hdl_t *thdl; if (secpolicy_dl_config(credp) != 0) return (EPERM); if (getminor(*devp) != 0) return (ENXIO); if (otype & OTYP_BLK) return (EINVAL); if (flags & ~(FREAD | FWRITE | FEXCL)) return (EINVAL); if ((flags & FWRITE) && !(flags & FEXCL)) return (EINVAL); if (!(flags & FREAD) && !(flags & FWRITE)) return (EINVAL); if (crgetzoneid(credp) != GLOBAL_ZONEID) return (EPERM); mid = id_alloc(overlay_thdl_idspace); if (ddi_soft_state_zalloc(overlay_thdl_state, mid) != 0) { id_free(overlay_thdl_idspace, mid); return (ENXIO); } thdl = ddi_get_soft_state(overlay_thdl_state, mid); VERIFY(thdl != NULL); thdl->oth_minor = mid; thdl->oth_zoneid = crgetzoneid(credp); thdl->oth_oflags = flags; mutex_init(&thdl->oth_lock, NULL, MUTEX_DRIVER, NULL); list_create(&thdl->oth_outstanding, sizeof (overlay_target_entry_t), offsetof(overlay_target_entry_t, ote_qlink)); *devp = makedevice(getmajor(*devp), mid); mutex_enter(&overlay_target_lock); if ((flags & FEXCL) && overlay_target_excl == B_TRUE) { mutex_exit(&overlay_target_lock); list_destroy(&thdl->oth_outstanding); mutex_destroy(&thdl->oth_lock); ddi_soft_state_free(overlay_thdl_state, mid); id_free(overlay_thdl_idspace, mid); return (EEXIST); } else if ((flags & FEXCL) != 0) { VERIFY(overlay_target_excl == B_FALSE); overlay_target_excl = B_TRUE; } list_insert_tail(&overlay_thdl_list, thdl); mutex_exit(&overlay_target_lock); return (0); } /* ARGSUSED */ int overlay_target_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) { overlay_target_ioctl_t *ioc; overlay_target_hdl_t *thdl; if (secpolicy_dl_config(credp) != 0) return (EPERM); if ((thdl = ddi_get_soft_state(overlay_thdl_state, getminor(dev))) == NULL) return (ENXIO); for (ioc = &overlay_target_ioctab[0]; ioc->oti_cmd != 0; ioc++) { int ret; caddr_t buf; size_t bufsize; if (ioc->oti_cmd != cmd) continue; if (ioc->oti_write == B_TRUE && !(mode & FWRITE)) return (EBADF); if (ioc->oti_copyin == NULL) { bufsize = ioc->oti_size; buf = kmem_alloc(bufsize, KM_SLEEP); if (ddi_copyin((void *)(uintptr_t)arg, buf, bufsize, mode & FKIOCTL) != 0) { kmem_free(buf, bufsize); return (EFAULT); } } else { if ((ret = ioc->oti_copyin((void *)(uintptr_t)arg, (void **)&buf, &bufsize, mode)) != 0) return (ret); } ret = ioc->oti_func(thdl, buf); if (ret == 0 && ioc->oti_size != 0 && ioc->oti_ncopyout == B_TRUE) { if (ioc->oti_copyout == NULL) { if (ddi_copyout(buf, (void *)(uintptr_t)arg, bufsize, mode & FKIOCTL) != 0) ret = EFAULT; } else { ret = ioc->oti_copyout((void *)(uintptr_t)arg, buf, bufsize, mode); } } kmem_free(buf, bufsize); return (ret); } return (ENOTTY); } /* ARGSUSED */ int overlay_target_close(dev_t dev, int flags, int otype, cred_t *credp) { overlay_target_hdl_t *thdl; overlay_target_entry_t *entry; minor_t mid = getminor(dev); if ((thdl = ddi_get_soft_state(overlay_thdl_state, mid)) == NULL) return (ENXIO); mutex_enter(&overlay_target_lock); list_remove(&overlay_thdl_list, thdl); mutex_enter(&thdl->oth_lock); while ((entry = list_remove_head(&thdl->oth_outstanding)) != NULL) list_insert_tail(&overlay_target_list, entry); cv_signal(&overlay_target_condvar); mutex_exit(&thdl->oth_lock); if ((thdl->oth_oflags & FEXCL) != 0) { VERIFY(overlay_target_excl == B_TRUE); overlay_target_excl = B_FALSE; } mutex_exit(&overlay_target_lock); list_destroy(&thdl->oth_outstanding); mutex_destroy(&thdl->oth_lock); mid = thdl->oth_minor; ddi_soft_state_free(overlay_thdl_state, mid); id_free(overlay_thdl_idspace, mid); return (0); }