/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" /* * sun4v LDC Link Layer Shared Memory Routines */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* LDC variables used by shared memory routines */ extern ldc_soft_state_t *ldcssp; extern int ldc_max_retries; extern clock_t ldc_delay; #ifdef DEBUG extern int ldcdbg; #endif /* LDC internal functions used by shared memory routines */ extern void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset); extern int i_ldc_h2v_error(int h_error); #ifdef DEBUG extern void ldcdebug(int64_t id, const char *fmt, ...); #endif /* Memory synchronization internal functions */ static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction, uint64_t offset, size_t size); static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle, uint8_t direction, uint64_t start, uint64_t end); static int i_ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount, uint8_t mtype, uint8_t perm, caddr_t *vaddr, caddr_t *raddr); static int i_ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len, uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount); /* * LDC framework supports mapping remote domain's memory * either directly or via shadow memory pages. Default * support is currently implemented via shadow copy. * Direct map can be enabled by setting 'ldc_shmem_enabled' */ int ldc_shmem_enabled = 0; /* * Use of directly mapped shared memory for LDC descriptor * rings is permitted if this variable is non-zero. */ int ldc_dring_shmem_enabled = 1; /* * The major and minor versions required to use directly * mapped shared memory for LDC descriptor rings. The * ldc_dring_shmem_hv_force variable, if set to a non-zero * value, overrides the hypervisor API version check. */ static int ldc_dring_shmem_hv_major = 1; static int ldc_dring_shmem_hv_minor = 1; static int ldc_dring_shmem_hv_force = 0; /* * The results of the hypervisor service group API check. * A non-zero value indicates the HV includes support for * descriptor ring shared memory. */ static int ldc_dring_shmem_hv_ok = 0; /* * Pages exported for remote access over each channel is * maintained in a table registered with the Hypervisor. * The default number of entries in the table is set to * 'ldc_mtbl_entries'. */ uint64_t ldc_maptable_entries = LDC_MTBL_ENTRIES; #define IDX2COOKIE(idx, pg_szc, pg_shift) \ (((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift))) /* * Sets ldc_dring_shmem_hv_ok to a non-zero value if the HV LDC * API version supports directly mapped shared memory or if it has * been explicitly enabled via ldc_dring_shmem_hv_force. */ void i_ldc_mem_set_hsvc_vers(uint64_t major, uint64_t minor) { if ((major == ldc_dring_shmem_hv_major && minor >= ldc_dring_shmem_hv_minor) || (major > ldc_dring_shmem_hv_major) || (ldc_dring_shmem_hv_force != 0)) { ldc_dring_shmem_hv_ok = 1; } } /* * Allocate a memory handle for the channel and link it into the list * Also choose which memory table to use if this is the first handle * being assigned to this channel */ int ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle) { ldc_chan_t *ldcp; ldc_mhdl_t *mhdl; if (handle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_alloc_handle: invalid channel handle\n"); return (EINVAL); } ldcp = (ldc_chan_t *)handle; mutex_enter(&ldcp->lock); /* check to see if channel is initalized */ if ((ldcp->tstate & ~TS_IN_RESET) < TS_INIT) { DWARN(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) channel not initialized\n", ldcp->id); mutex_exit(&ldcp->lock); return (EINVAL); } /* allocate handle for channel */ mhdl = kmem_cache_alloc(ldcssp->memhdl_cache, KM_SLEEP); /* initialize the lock */ mutex_init(&mhdl->lock, NULL, MUTEX_DRIVER, NULL); mhdl->myshadow = B_FALSE; mhdl->memseg = NULL; mhdl->ldcp = ldcp; mhdl->status = LDC_UNBOUND; /* insert memory handle (@ head) into list */ if (ldcp->mhdl_list == NULL) { ldcp->mhdl_list = mhdl; mhdl->next = NULL; } else { /* insert @ head */ mhdl->next = ldcp->mhdl_list; ldcp->mhdl_list = mhdl; } /* return the handle */ *mhandle = (ldc_mem_handle_t)mhdl; mutex_exit(&ldcp->lock); D1(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) allocated handle 0x%llx\n", ldcp->id, mhdl); return (0); } /* * Free memory handle for the channel and unlink it from the list */ int ldc_mem_free_handle(ldc_mem_handle_t mhandle) { ldc_mhdl_t *mhdl, *phdl; ldc_chan_t *ldcp; if (mhandle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_free_handle: invalid memory handle\n"); return (EINVAL); } mhdl = (ldc_mhdl_t *)mhandle; mutex_enter(&mhdl->lock); ldcp = mhdl->ldcp; if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) { DWARN(ldcp->id, "ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n", mhdl); mutex_exit(&mhdl->lock); return (EINVAL); } mutex_exit(&mhdl->lock); mutex_enter(&ldcp->mlist_lock); phdl = ldcp->mhdl_list; /* first handle */ if (phdl == mhdl) { ldcp->mhdl_list = mhdl->next; mutex_destroy(&mhdl->lock); kmem_cache_free(ldcssp->memhdl_cache, mhdl); D1(ldcp->id, "ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n", ldcp->id, mhdl); } else { /* walk the list - unlink and free */ while (phdl != NULL) { if (phdl->next == mhdl) { phdl->next = mhdl->next; mutex_destroy(&mhdl->lock); kmem_cache_free(ldcssp->memhdl_cache, mhdl); D1(ldcp->id, "ldc_mem_free_handle: (0x%llx) freed " "handle 0x%llx\n", ldcp->id, mhdl); break; } phdl = phdl->next; } } if (phdl == NULL) { DWARN(ldcp->id, "ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl); mutex_exit(&ldcp->mlist_lock); return (EINVAL); } mutex_exit(&ldcp->mlist_lock); return (0); } /* * Bind a memory handle to a virtual address. * The virtual address is converted to the corresponding real addresses. * Returns pointer to the first ldc_mem_cookie and the total number * of cookies for this virtual address. Other cookies can be obtained * using the ldc_mem_nextcookie() call. If the pages are stored in * consecutive locations in the table, a single cookie corresponding to * the first location is returned. The cookie size spans all the entries. * * If the VA corresponds to a page that is already being exported, reuse * the page and do not export it again. Bump the page's use count. */ int ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len, uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount) { /* * Check if direct shared memory map is enabled, if not change * the mapping type to SHADOW_MAP. */ if (ldc_shmem_enabled == 0) mtype = LDC_SHADOW_MAP; return (i_ldc_mem_bind_handle(mhandle, vaddr, len, mtype, perm, cookie, ccount)); } static int i_ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len, uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount) { ldc_mhdl_t *mhdl; ldc_chan_t *ldcp; ldc_mtbl_t *mtbl; ldc_memseg_t *memseg; ldc_mte_t tmp_mte; uint64_t index, prev_index = 0; int64_t cookie_idx; uintptr_t raddr, ra_aligned; uint64_t psize, poffset, v_offset; uint64_t pg_shift, pg_size, pg_size_code, pg_mask; pgcnt_t npages; caddr_t v_align, addr; int i, rv; if (mhandle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_bind_handle: invalid memory handle\n"); return (EINVAL); } mhdl = (ldc_mhdl_t *)mhandle; ldcp = mhdl->ldcp; /* clear count */ *ccount = 0; mutex_enter(&mhdl->lock); if (mhdl->status == LDC_BOUND || mhdl->memseg != NULL) { DWARN(ldcp->id, "ldc_mem_bind_handle: (0x%x) handle already bound\n", mhandle); mutex_exit(&mhdl->lock); return (EINVAL); } /* Force address and size to be 8-byte aligned */ if ((((uintptr_t)vaddr | len) & 0x7) != 0) { DWARN(ldcp->id, "ldc_mem_bind_handle: addr/size is not 8-byte aligned\n"); mutex_exit(&mhdl->lock); return (EINVAL); } mutex_enter(&ldcp->lock); /* * If this channel is binding a memory handle for the * first time allocate it a memory map table and initialize it */ if ((mtbl = ldcp->mtbl) == NULL) { /* Allocate and initialize the map table structure */ mtbl = kmem_zalloc(sizeof (ldc_mtbl_t), KM_SLEEP); mtbl->num_entries = mtbl->num_avail = ldc_maptable_entries; mtbl->size = ldc_maptable_entries * sizeof (ldc_mte_slot_t); mtbl->next_entry = NULL; mtbl->contigmem = B_TRUE; /* Allocate the table itself */ mtbl->table = (ldc_mte_slot_t *) contig_mem_alloc_align(mtbl->size, MMU_PAGESIZE); if (mtbl->table == NULL) { /* allocate a page of memory using kmem_alloc */ mtbl->table = kmem_alloc(MMU_PAGESIZE, KM_SLEEP); mtbl->size = MMU_PAGESIZE; mtbl->contigmem = B_FALSE; mtbl->num_entries = mtbl->num_avail = mtbl->size / sizeof (ldc_mte_slot_t); DWARN(ldcp->id, "ldc_mem_bind_handle: (0x%llx) reduced tbl size " "to %lx entries\n", ldcp->id, mtbl->num_entries); } /* zero out the memory */ bzero(mtbl->table, mtbl->size); /* initialize the lock */ mutex_init(&mtbl->lock, NULL, MUTEX_DRIVER, NULL); /* register table for this channel */ rv = hv_ldc_set_map_table(ldcp->id, va_to_pa(mtbl->table), mtbl->num_entries); if (rv != 0) { DWARN(DBG_ALL_LDCS, "ldc_mem_bind_handle: (0x%lx) err %d mapping tbl", ldcp->id, rv); if (mtbl->contigmem) contig_mem_free(mtbl->table, mtbl->size); else kmem_free(mtbl->table, mtbl->size); mutex_destroy(&mtbl->lock); kmem_free(mtbl, sizeof (ldc_mtbl_t)); mutex_exit(&ldcp->lock); mutex_exit(&mhdl->lock); return (EIO); } ldcp->mtbl = mtbl; D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) alloc'd map table 0x%llx\n", ldcp->id, ldcp->mtbl->table); } mutex_exit(&ldcp->lock); /* FUTURE: get the page size, pgsz code, and shift */ pg_size = MMU_PAGESIZE; pg_size_code = page_szc(pg_size); pg_shift = page_get_shift(pg_size_code); pg_mask = ~(pg_size - 1); D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) binding " "va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", ldcp->id, vaddr, pg_size, pg_size_code, pg_shift); /* aligned VA and its offset */ v_align = (caddr_t)(((uintptr_t)vaddr) & ~(pg_size - 1)); v_offset = ((uintptr_t)vaddr) & (pg_size - 1); npages = (len+v_offset)/pg_size; npages = ((len+v_offset)%pg_size == 0) ? npages : npages+1; D1(ldcp->id, "ldc_mem_bind_handle: binding " "(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n", ldcp->id, vaddr, v_align, v_offset, npages); /* lock the memory table - exclusive access to channel */ mutex_enter(&mtbl->lock); if (npages > mtbl->num_avail) { D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) no table entries\n", ldcp->id); mutex_exit(&mtbl->lock); mutex_exit(&mhdl->lock); return (ENOMEM); } /* Allocate a memseg structure */ memseg = mhdl->memseg = kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP); /* Allocate memory to store all pages and cookies */ memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP); memseg->cookies = kmem_zalloc((sizeof (ldc_mem_cookie_t) * npages), KM_SLEEP); D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) processing 0x%llx pages\n", ldcp->id, npages); addr = v_align; /* * Table slots are used in a round-robin manner. The algorithm permits * inserting duplicate entries. Slots allocated earlier will typically * get freed before we get back to reusing the slot.Inserting duplicate * entries should be OK as we only lookup entries using the cookie addr * i.e. tbl index, during export, unexport and copy operation. * * One implementation what was tried was to search for a duplicate * page entry first and reuse it. The search overhead is very high and * in the vnet case dropped the perf by almost half, 50 to 24 mbps. * So it does make sense to avoid searching for duplicates. * * But during the process of searching for a free slot, if we find a * duplicate entry we will go ahead and use it, and bump its use count. */ /* index to start searching from */ index = mtbl->next_entry; cookie_idx = -1; tmp_mte.ll = 0; /* initialise fields to 0 */ if (mtype & LDC_DIRECT_MAP) { tmp_mte.mte_r = (perm & LDC_MEM_R) ? 1 : 0; tmp_mte.mte_w = (perm & LDC_MEM_W) ? 1 : 0; tmp_mte.mte_x = (perm & LDC_MEM_X) ? 1 : 0; } if (mtype & LDC_SHADOW_MAP) { tmp_mte.mte_cr = (perm & LDC_MEM_R) ? 1 : 0; tmp_mte.mte_cw = (perm & LDC_MEM_W) ? 1 : 0; } if (mtype & LDC_IO_MAP) { tmp_mte.mte_ir = (perm & LDC_MEM_R) ? 1 : 0; tmp_mte.mte_iw = (perm & LDC_MEM_W) ? 1 : 0; } D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll); tmp_mte.mte_pgszc = pg_size_code; /* initialize each mem table entry */ for (i = 0; i < npages; i++) { /* check if slot is available in the table */ while (mtbl->table[index].entry.ll != 0) { index = (index + 1) % mtbl->num_entries; if (index == mtbl->next_entry) { /* we have looped around */ DWARN(DBG_ALL_LDCS, "ldc_mem_bind_handle: (0x%llx) cannot find " "entry\n", ldcp->id); *ccount = 0; /* NOTE: free memory, remove previous entries */ /* this shouldnt happen as num_avail was ok */ mutex_exit(&mtbl->lock); mutex_exit(&mhdl->lock); return (ENOMEM); } } /* get the real address */ raddr = va_to_pa((void *)addr); ra_aligned = ((uintptr_t)raddr & pg_mask); /* build the mte */ tmp_mte.mte_rpfn = ra_aligned >> pg_shift; D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll); /* update entry in table */ mtbl->table[index].entry = tmp_mte; D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) stored MTE 0x%llx" " into loc 0x%llx\n", ldcp->id, tmp_mte.ll, index); /* calculate the size and offset for this export range */ if (i == 0) { /* first page */ psize = min((pg_size - v_offset), len); poffset = v_offset; } else if (i == (npages - 1)) { /* last page */ psize = (((uintptr_t)(vaddr + len)) & ((uint64_t)(pg_size-1))); if (psize == 0) psize = pg_size; poffset = 0; } else { /* middle pages */ psize = pg_size; poffset = 0; } /* store entry for this page */ memseg->pages[i].index = index; memseg->pages[i].raddr = raddr; memseg->pages[i].offset = poffset; memseg->pages[i].size = psize; memseg->pages[i].mte = &(mtbl->table[index]); /* create the cookie */ if (i == 0 || (index != prev_index + 1)) { cookie_idx++; memseg->cookies[cookie_idx].addr = IDX2COOKIE(index, pg_size_code, pg_shift); memseg->cookies[cookie_idx].addr |= poffset; memseg->cookies[cookie_idx].size = psize; } else { memseg->cookies[cookie_idx].size += psize; } D1(ldcp->id, "ldc_mem_bind_handle: bound " "(0x%llx) va=0x%llx, idx=0x%llx, " "ra=0x%llx(sz=0x%x,off=0x%x)\n", ldcp->id, addr, index, raddr, psize, poffset); /* decrement number of available entries */ mtbl->num_avail--; /* increment va by page size */ addr += pg_size; /* increment index */ prev_index = index; index = (index + 1) % mtbl->num_entries; /* save the next slot */ mtbl->next_entry = index; } mutex_exit(&mtbl->lock); /* memory handle = bound */ mhdl->mtype = mtype; mhdl->perm = perm; mhdl->status = LDC_BOUND; /* update memseg_t */ memseg->vaddr = vaddr; memseg->raddr = memseg->pages[0].raddr; memseg->size = len; memseg->npages = npages; memseg->ncookies = cookie_idx + 1; memseg->next_cookie = (memseg->ncookies > 1) ? 1 : 0; /* return count and first cookie */ *ccount = memseg->ncookies; cookie->addr = memseg->cookies[0].addr; cookie->size = memseg->cookies[0].size; D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, " "pgs=0x%llx cookies=0x%llx\n", ldcp->id, mhdl, vaddr, npages, memseg->ncookies); mutex_exit(&mhdl->lock); return (0); } /* * Return the next cookie associated with the specified memory handle */ int ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie) { ldc_mhdl_t *mhdl; ldc_chan_t *ldcp; ldc_memseg_t *memseg; if (mhandle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_nextcookie: invalid memory handle\n"); return (EINVAL); } mhdl = (ldc_mhdl_t *)mhandle; mutex_enter(&mhdl->lock); ldcp = mhdl->ldcp; memseg = mhdl->memseg; if (cookie == 0) { DWARN(ldcp->id, "ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n", ldcp->id); mutex_exit(&mhdl->lock); return (EINVAL); } if (memseg->next_cookie != 0) { cookie->addr = memseg->cookies[memseg->next_cookie].addr; cookie->size = memseg->cookies[memseg->next_cookie].size; memseg->next_cookie++; if (memseg->next_cookie == memseg->ncookies) memseg->next_cookie = 0; } else { DWARN(ldcp->id, "ldc_mem_nextcookie:(0x%llx) no more cookies\n", ldcp->id); cookie->addr = 0; cookie->size = 0; mutex_exit(&mhdl->lock); return (EINVAL); } D1(ldcp->id, "ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n", ldcp->id, cookie->addr, cookie->size); mutex_exit(&mhdl->lock); return (0); } /* * Unbind the virtual memory region associated with the specified * memory handle. Allassociated cookies are freed and the corresponding * RA space is no longer exported. */ int ldc_mem_unbind_handle(ldc_mem_handle_t mhandle) { ldc_mhdl_t *mhdl; ldc_chan_t *ldcp; ldc_mtbl_t *mtbl; ldc_memseg_t *memseg; uint64_t cookie_addr; uint64_t pg_shift, pg_size_code; int i, rv, retries; if (mhandle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_unbind_handle: invalid memory handle\n"); return (EINVAL); } mhdl = (ldc_mhdl_t *)mhandle; mutex_enter(&mhdl->lock); if (mhdl->status == LDC_UNBOUND) { DWARN(DBG_ALL_LDCS, "ldc_mem_unbind_handle: (0x%x) handle is not bound\n", mhandle); mutex_exit(&mhdl->lock); return (EINVAL); } ldcp = mhdl->ldcp; mtbl = ldcp->mtbl; memseg = mhdl->memseg; /* lock the memory table - exclusive access to channel */ mutex_enter(&mtbl->lock); /* undo the pages exported */ for (i = 0; i < memseg->npages; i++) { /* clear the entry from the table */ memseg->pages[i].mte->entry.ll = 0; /* check for mapped pages, revocation cookie != 0 */ if (memseg->pages[i].mte->cookie) { pg_size_code = page_szc(memseg->pages[i].size); pg_shift = page_get_shift(pg_size_code); cookie_addr = IDX2COOKIE(memseg->pages[i].index, pg_size_code, pg_shift); D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) revoke " "cookie 0x%llx, rcookie 0x%llx\n", ldcp->id, cookie_addr, memseg->pages[i].mte->cookie); retries = 0; do { rv = hv_ldc_revoke(ldcp->id, cookie_addr, memseg->pages[i].mte->cookie); if (rv != H_EWOULDBLOCK) break; drv_usecwait(ldc_delay); } while (retries++ < ldc_max_retries); if (rv) { DWARN(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) cannot " "revoke mapping, cookie %llx\n", ldcp->id, cookie_addr); } } mtbl->num_avail++; } mutex_exit(&mtbl->lock); /* free the allocated memseg and page structures */ kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages)); kmem_free(memseg->cookies, (sizeof (ldc_mem_cookie_t) * memseg->npages)); kmem_cache_free(ldcssp->memseg_cache, memseg); /* uninitialize the memory handle */ mhdl->memseg = NULL; mhdl->status = LDC_UNBOUND; D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) unbound handle 0x%llx\n", ldcp->id, mhdl); mutex_exit(&mhdl->lock); return (0); } /* * Get information about the dring. The base address of the descriptor * ring along with the type and permission are returned back. */ int ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo) { ldc_mhdl_t *mhdl; if (mhandle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid memory handle\n"); return (EINVAL); } mhdl = (ldc_mhdl_t *)mhandle; if (minfo == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid args\n"); return (EINVAL); } mutex_enter(&mhdl->lock); minfo->status = mhdl->status; if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) { minfo->vaddr = mhdl->memseg->vaddr; minfo->raddr = mhdl->memseg->raddr; minfo->mtype = mhdl->mtype; minfo->perm = mhdl->perm; } mutex_exit(&mhdl->lock); return (0); } /* * Copy data either from or to the client specified virtual address * space to or from the exported memory associated with the cookies. * The direction argument determines whether the data is read from or * written to exported memory. */ int ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *size, ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction) { ldc_chan_t *ldcp; uint64_t local_voff, local_valign; uint64_t cookie_addr, cookie_size; uint64_t pg_shift, pg_size, pg_size_code; uint64_t export_caddr, export_poff, export_psize, export_size; uint64_t local_ra, local_poff, local_psize; uint64_t copy_size, copied_len = 0, total_bal = 0, idx = 0; pgcnt_t npages; size_t len = *size; int i, rv = 0; uint64_t chid; if (handle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_copy: invalid channel handle\n"); return (EINVAL); } ldcp = (ldc_chan_t *)handle; chid = ldcp->id; /* check to see if channel is UP */ if (ldcp->tstate != TS_UP) { DWARN(chid, "ldc_mem_copy: (0x%llx) channel is not UP\n", chid); return (ECONNRESET); } /* Force address and size to be 8-byte aligned */ if ((((uintptr_t)vaddr | len) & 0x7) != 0) { DWARN(chid, "ldc_mem_copy: addr/sz is not 8-byte aligned\n"); return (EINVAL); } /* Find the size of the exported memory */ export_size = 0; for (i = 0; i < ccount; i++) export_size += cookies[i].size; /* check to see if offset is valid */ if (off > export_size) { DWARN(chid, "ldc_mem_copy: (0x%llx) start offset > export mem size\n", chid); return (EINVAL); } /* * Check to see if the export size is smaller than the size we * are requesting to copy - if so flag an error */ if ((export_size - off) < *size) { DWARN(chid, "ldc_mem_copy: (0x%llx) copy size > export mem size\n", chid); return (EINVAL); } total_bal = min(export_size, *size); /* FUTURE: get the page size, pgsz code, and shift */ pg_size = MMU_PAGESIZE; pg_size_code = page_szc(pg_size); pg_shift = page_get_shift(pg_size_code); D1(chid, "ldc_mem_copy: copying data " "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", chid, vaddr, pg_size, pg_size_code, pg_shift); /* aligned VA and its offset */ local_valign = (((uintptr_t)vaddr) & ~(pg_size - 1)); local_voff = ((uintptr_t)vaddr) & (pg_size - 1); npages = (len+local_voff)/pg_size; npages = ((len+local_voff)%pg_size == 0) ? npages : npages+1; D1(chid, "ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n", chid, vaddr, local_valign, local_voff, npages); local_ra = va_to_pa((void *)local_valign); local_poff = local_voff; local_psize = min(len, (pg_size - local_voff)); len -= local_psize; /* * find the first cookie in the list of cookies * if the offset passed in is not zero */ for (idx = 0; idx < ccount; idx++) { cookie_size = cookies[idx].size; if (off < cookie_size) break; off -= cookie_size; } cookie_addr = cookies[idx].addr + off; cookie_size = cookies[idx].size - off; export_caddr = cookie_addr & ~(pg_size - 1); export_poff = cookie_addr & (pg_size - 1); export_psize = min(cookie_size, (pg_size - export_poff)); for (;;) { copy_size = min(export_psize, local_psize); D1(chid, "ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx," " loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx," " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx," " total_bal=0x%llx\n", chid, direction, export_caddr, local_ra, export_poff, local_poff, export_psize, local_psize, copy_size, total_bal); rv = hv_ldc_copy(chid, direction, (export_caddr + export_poff), (local_ra + local_poff), copy_size, &copied_len); if (rv != 0) { int error = EIO; uint64_t rx_hd, rx_tl; DWARN(chid, "ldc_mem_copy: (0x%llx) err %d during copy\n", (unsigned long long)chid, rv); DWARN(chid, "ldc_mem_copy: (0x%llx) dir=0x%x, caddr=0x%lx, " "loc_ra=0x%lx, exp_poff=0x%lx, loc_poff=0x%lx," " exp_psz=0x%lx, loc_psz=0x%lx, copy_sz=0x%lx," " copied_len=0x%lx, total_bal=0x%lx\n", chid, direction, export_caddr, local_ra, export_poff, local_poff, export_psize, local_psize, copy_size, copied_len, total_bal); *size = *size - total_bal; /* * check if reason for copy error was due to * a channel reset. we need to grab the lock * just in case we have to do a reset. */ mutex_enter(&ldcp->lock); mutex_enter(&ldcp->tx_lock); rv = hv_ldc_rx_get_state(ldcp->id, &rx_hd, &rx_tl, &(ldcp->link_state)); if (ldcp->link_state == LDC_CHANNEL_DOWN || ldcp->link_state == LDC_CHANNEL_RESET) { i_ldc_reset(ldcp, B_FALSE); error = ECONNRESET; } mutex_exit(&ldcp->tx_lock); mutex_exit(&ldcp->lock); return (error); } ASSERT(copied_len <= copy_size); D2(chid, "ldc_mem_copy: copied=0x%llx\n", copied_len); export_poff += copied_len; local_poff += copied_len; export_psize -= copied_len; local_psize -= copied_len; cookie_size -= copied_len; total_bal -= copied_len; if (copy_size != copied_len) continue; if (export_psize == 0 && total_bal != 0) { if (cookie_size == 0) { idx++; cookie_addr = cookies[idx].addr; cookie_size = cookies[idx].size; export_caddr = cookie_addr & ~(pg_size - 1); export_poff = cookie_addr & (pg_size - 1); export_psize = min(cookie_size, (pg_size-export_poff)); } else { export_caddr += pg_size; export_poff = 0; export_psize = min(cookie_size, pg_size); } } if (local_psize == 0 && total_bal != 0) { local_valign += pg_size; local_ra = va_to_pa((void *)local_valign); local_poff = 0; local_psize = min(pg_size, len); len -= local_psize; } /* check if we are all done */ if (total_bal == 0) break; } D1(chid, "ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n", chid, *size); return (0); } /* * Copy data either from or to the client specified virtual address * space to or from HV physical memory. * * The direction argument determines whether the data is read from or * written to HV memory. direction values are LDC_COPY_IN/OUT similar * to the ldc_mem_copy interface */ int ldc_mem_rdwr_cookie(ldc_handle_t handle, caddr_t vaddr, size_t *size, caddr_t paddr, uint8_t direction) { ldc_chan_t *ldcp; uint64_t local_voff, local_valign; uint64_t pg_shift, pg_size, pg_size_code; uint64_t target_pa, target_poff, target_psize, target_size; uint64_t local_ra, local_poff, local_psize; uint64_t copy_size, copied_len = 0; pgcnt_t npages; size_t len = *size; int rv = 0; if (handle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_rdwr_cookie: invalid channel handle\n"); return (EINVAL); } ldcp = (ldc_chan_t *)handle; mutex_enter(&ldcp->lock); /* check to see if channel is UP */ if (ldcp->tstate != TS_UP) { DWARN(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) channel is not UP\n", ldcp->id); mutex_exit(&ldcp->lock); return (ECONNRESET); } /* Force address and size to be 8-byte aligned */ if ((((uintptr_t)vaddr | len) & 0x7) != 0) { DWARN(ldcp->id, "ldc_mem_rdwr_cookie: addr/size is not 8-byte aligned\n"); mutex_exit(&ldcp->lock); return (EINVAL); } target_size = *size; /* FUTURE: get the page size, pgsz code, and shift */ pg_size = MMU_PAGESIZE; pg_size_code = page_szc(pg_size); pg_shift = page_get_shift(pg_size_code); D1(ldcp->id, "ldc_mem_rdwr_cookie: copying data " "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", ldcp->id, vaddr, pg_size, pg_size_code, pg_shift); /* aligned VA and its offset */ local_valign = ((uintptr_t)vaddr) & ~(pg_size - 1); local_voff = ((uintptr_t)vaddr) & (pg_size - 1); npages = (len + local_voff) / pg_size; npages = ((len + local_voff) % pg_size == 0) ? npages : npages+1; D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) v=0x%llx, " "val=0x%llx,off=0x%x,pgs=0x%x\n", ldcp->id, vaddr, local_valign, local_voff, npages); local_ra = va_to_pa((void *)local_valign); local_poff = local_voff; local_psize = min(len, (pg_size - local_voff)); len -= local_psize; target_pa = ((uintptr_t)paddr) & ~(pg_size - 1); target_poff = ((uintptr_t)paddr) & (pg_size - 1); target_psize = pg_size - target_poff; for (;;) { copy_size = min(target_psize, local_psize); D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) dir=0x%x, tar_pa=0x%llx," " loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx," " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx," " total_bal=0x%llx\n", ldcp->id, direction, target_pa, local_ra, target_poff, local_poff, target_psize, local_psize, copy_size, target_size); rv = hv_ldc_copy(ldcp->id, direction, (target_pa + target_poff), (local_ra + local_poff), copy_size, &copied_len); if (rv != 0) { DWARN(DBG_ALL_LDCS, "ldc_mem_rdwr_cookie: (0x%lx) err %d during copy\n", ldcp->id, rv); DWARN(DBG_ALL_LDCS, "ldc_mem_rdwr_cookie: (0x%llx) dir=%lld, " "tar_pa=0x%llx, loc_ra=0x%llx, tar_poff=0x%llx, " "loc_poff=0x%llx, tar_psz=0x%llx, loc_psz=0x%llx, " "copy_sz=0x%llx, total_bal=0x%llx\n", ldcp->id, direction, target_pa, local_ra, target_poff, local_poff, target_psize, local_psize, copy_size, target_size); *size = *size - target_size; mutex_exit(&ldcp->lock); return (i_ldc_h2v_error(rv)); } D2(ldcp->id, "ldc_mem_rdwr_cookie: copied=0x%llx\n", copied_len); target_poff += copied_len; local_poff += copied_len; target_psize -= copied_len; local_psize -= copied_len; target_size -= copied_len; if (copy_size != copied_len) continue; if (target_psize == 0 && target_size != 0) { target_pa += pg_size; target_poff = 0; target_psize = min(pg_size, target_size); } if (local_psize == 0 && target_size != 0) { local_valign += pg_size; local_ra = va_to_pa((void *)local_valign); local_poff = 0; local_psize = min(pg_size, len); len -= local_psize; } /* check if we are all done */ if (target_size == 0) break; } mutex_exit(&ldcp->lock); D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) done copying sz=0x%llx\n", ldcp->id, *size); return (0); } /* * Map an exported memory segment into the local address space. If the * memory range was exported for direct map access, a HV call is made * to allocate a RA range. If the map is done via a shadow copy, local * shadow memory is allocated and the base VA is returned in 'vaddr'. If * the mapping is a direct map then the RA is returned in 'raddr'. */ int ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount, uint8_t mtype, uint8_t perm, caddr_t *vaddr, caddr_t *raddr) { /* * Check if direct map over shared memory is enabled, if not change * the mapping type to SHADOW_MAP. */ if (ldc_shmem_enabled == 0) mtype = LDC_SHADOW_MAP; return (i_ldc_mem_map(mhandle, cookie, ccount, mtype, perm, vaddr, raddr)); } static int i_ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount, uint8_t mtype, uint8_t perm, caddr_t *vaddr, caddr_t *raddr) { int i, j, idx, rv, retries; ldc_chan_t *ldcp; ldc_mhdl_t *mhdl; ldc_memseg_t *memseg; caddr_t tmpaddr; uint64_t map_perm = perm; uint64_t pg_size, pg_shift, pg_size_code, pg_mask; uint64_t exp_size = 0, base_off, map_size, npages; uint64_t cookie_addr, cookie_off, cookie_size; tte_t ldc_tte; if (mhandle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_map: invalid memory handle\n"); return (EINVAL); } mhdl = (ldc_mhdl_t *)mhandle; mutex_enter(&mhdl->lock); if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED || mhdl->memseg != NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_map: (0x%llx) handle bound/mapped\n", mhandle); mutex_exit(&mhdl->lock); return (EINVAL); } ldcp = mhdl->ldcp; mutex_enter(&ldcp->lock); if (ldcp->tstate != TS_UP) { DWARN(ldcp->id, "ldc_mem_dring_map: (0x%llx) channel is not UP\n", ldcp->id); mutex_exit(&ldcp->lock); mutex_exit(&mhdl->lock); return (ECONNRESET); } if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) { DWARN(ldcp->id, "ldc_mem_map: invalid map type\n"); mutex_exit(&ldcp->lock); mutex_exit(&mhdl->lock); return (EINVAL); } D1(ldcp->id, "ldc_mem_map: (0x%llx) cookie = 0x%llx,0x%llx\n", ldcp->id, cookie->addr, cookie->size); /* FUTURE: get the page size, pgsz code, and shift */ pg_size = MMU_PAGESIZE; pg_size_code = page_szc(pg_size); pg_shift = page_get_shift(pg_size_code); pg_mask = ~(pg_size - 1); /* calculate the number of pages in the exported cookie */ base_off = cookie[0].addr & (pg_size - 1); for (idx = 0; idx < ccount; idx++) exp_size += cookie[idx].size; map_size = P2ROUNDUP((exp_size + base_off), pg_size); npages = (map_size >> pg_shift); /* Allocate memseg structure */ memseg = mhdl->memseg = kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP); /* Allocate memory to store all pages and cookies */ memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP); memseg->cookies = kmem_zalloc((sizeof (ldc_mem_cookie_t) * ccount), KM_SLEEP); D2(ldcp->id, "ldc_mem_map: (0x%llx) exp_size=0x%llx, map_size=0x%llx," "pages=0x%llx\n", ldcp->id, exp_size, map_size, npages); /* * Check to see if the client is requesting direct or shadow map * If direct map is requested, try to map remote memory first, * and if that fails, revert to shadow map */ if (mtype == LDC_DIRECT_MAP) { /* Allocate kernel virtual space for mapping */ memseg->vaddr = vmem_xalloc(heap_arena, map_size, pg_size, 0, 0, NULL, NULL, VM_NOSLEEP); if (memseg->vaddr == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_map: (0x%lx) memory map failed\n", ldcp->id); kmem_free(memseg->cookies, (sizeof (ldc_mem_cookie_t) * ccount)); kmem_free(memseg->pages, (sizeof (ldc_page_t) * npages)); kmem_cache_free(ldcssp->memseg_cache, memseg); mutex_exit(&ldcp->lock); mutex_exit(&mhdl->lock); return (ENOMEM); } /* Unload previous mapping */ hat_unload(kas.a_hat, memseg->vaddr, map_size, HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK); /* for each cookie passed in - map into address space */ idx = 0; cookie_size = 0; tmpaddr = memseg->vaddr; for (i = 0; i < npages; i++) { if (cookie_size == 0) { ASSERT(idx < ccount); cookie_addr = cookie[idx].addr & pg_mask; cookie_off = cookie[idx].addr & (pg_size - 1); cookie_size = P2ROUNDUP((cookie_off + cookie[idx].size), pg_size); idx++; } D1(ldcp->id, "ldc_mem_map: (0x%llx) mapping " "cookie 0x%llx, bal=0x%llx\n", ldcp->id, cookie_addr, cookie_size); /* map the cookie into address space */ for (retries = 0; retries < ldc_max_retries; retries++) { rv = hv_ldc_mapin(ldcp->id, cookie_addr, &memseg->pages[i].raddr, &map_perm); if (rv != H_EWOULDBLOCK && rv != H_ETOOMANY) break; drv_usecwait(ldc_delay); } if (rv || memseg->pages[i].raddr == 0) { DWARN(ldcp->id, "ldc_mem_map: (0x%llx) hv mapin err %d\n", ldcp->id, rv); /* remove previous mapins */ hat_unload(kas.a_hat, memseg->vaddr, map_size, HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK); for (j = 0; j < i; j++) { rv = hv_ldc_unmap( memseg->pages[j].raddr); if (rv) { DWARN(ldcp->id, "ldc_mem_map: (0x%llx) " "cannot unmap ra=0x%llx\n", ldcp->id, memseg->pages[j].raddr); } } /* free kernel virtual space */ vmem_free(heap_arena, (void *)memseg->vaddr, map_size); /* direct map failed - revert to shadow map */ mtype = LDC_SHADOW_MAP; break; } else { D1(ldcp->id, "ldc_mem_map: (0x%llx) vtop map 0x%llx -> " "0x%llx, cookie=0x%llx, perm=0x%llx\n", ldcp->id, tmpaddr, memseg->pages[i].raddr, cookie_addr, perm); /* * NOTE: Calling hat_devload directly, causes it * to look for page_t using the pfn. Since this * addr is greater than the memlist, it treates * it as non-memory */ sfmmu_memtte(&ldc_tte, (pfn_t)(memseg->pages[i].raddr >> pg_shift), PROT_READ | PROT_WRITE | HAT_NOSYNC, TTE8K); D1(ldcp->id, "ldc_mem_map: (0x%llx) ra 0x%llx -> " "tte 0x%llx\n", ldcp->id, memseg->pages[i].raddr, ldc_tte); sfmmu_tteload(kas.a_hat, &ldc_tte, tmpaddr, NULL, HAT_LOAD_LOCK); cookie_size -= pg_size; cookie_addr += pg_size; tmpaddr += pg_size; } } } if (mtype == LDC_SHADOW_MAP) { if (*vaddr == NULL) { memseg->vaddr = kmem_zalloc(exp_size, KM_SLEEP); mhdl->myshadow = B_TRUE; D1(ldcp->id, "ldc_mem_map: (0x%llx) allocated " "shadow page va=0x%llx\n", ldcp->id, memseg->vaddr); } else { /* * Use client supplied memory for memseg->vaddr * WARNING: assuming that client mem is >= exp_size */ memseg->vaddr = *vaddr; } /* Save all page and cookie information */ for (i = 0, tmpaddr = memseg->vaddr; i < npages; i++) { memseg->pages[i].raddr = va_to_pa(tmpaddr); memseg->pages[i].size = pg_size; tmpaddr += pg_size; } } /* save all cookies */ bcopy(cookie, memseg->cookies, ccount * sizeof (ldc_mem_cookie_t)); /* update memseg_t */ memseg->raddr = memseg->pages[0].raddr; memseg->size = (mtype == LDC_SHADOW_MAP) ? exp_size : map_size; memseg->npages = npages; memseg->ncookies = ccount; memseg->next_cookie = 0; /* memory handle = mapped */ mhdl->mtype = mtype; mhdl->perm = perm; mhdl->status = LDC_MAPPED; D1(ldcp->id, "ldc_mem_map: (0x%llx) mapped 0x%llx, ra=0x%llx, " "va=0x%llx, pgs=0x%llx cookies=0x%llx\n", ldcp->id, mhdl, memseg->raddr, memseg->vaddr, memseg->npages, memseg->ncookies); if (mtype == LDC_SHADOW_MAP) base_off = 0; if (raddr) *raddr = (caddr_t)(memseg->raddr | base_off); if (vaddr) *vaddr = (caddr_t)((uintptr_t)memseg->vaddr | base_off); mutex_exit(&ldcp->lock); mutex_exit(&mhdl->lock); return (0); } /* * Unmap a memory segment. Free shadow memory (if any). */ int ldc_mem_unmap(ldc_mem_handle_t mhandle) { int i, rv; ldc_mhdl_t *mhdl = (ldc_mhdl_t *)mhandle; ldc_chan_t *ldcp; ldc_memseg_t *memseg; if (mhdl == 0 || mhdl->status != LDC_MAPPED) { DWARN(DBG_ALL_LDCS, "ldc_mem_unmap: (0x%llx) handle is not mapped\n", mhandle); return (EINVAL); } mutex_enter(&mhdl->lock); ldcp = mhdl->ldcp; memseg = mhdl->memseg; D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapping handle 0x%llx\n", ldcp->id, mhdl); /* if we allocated shadow memory - free it */ if (mhdl->mtype == LDC_SHADOW_MAP && mhdl->myshadow) { kmem_free(memseg->vaddr, memseg->size); } else if (mhdl->mtype == LDC_DIRECT_MAP) { /* unmap in the case of DIRECT_MAP */ hat_unload(kas.a_hat, memseg->vaddr, memseg->size, HAT_UNLOAD_UNLOCK); for (i = 0; i < memseg->npages; i++) { rv = hv_ldc_unmap(memseg->pages[i].raddr); if (rv) { DWARN(DBG_ALL_LDCS, "ldc_mem_map: (0x%lx) hv unmap err %d\n", ldcp->id, rv); } } vmem_free(heap_arena, (void *)memseg->vaddr, memseg->size); } /* free the allocated memseg and page structures */ kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages)); kmem_free(memseg->cookies, (sizeof (ldc_mem_cookie_t) * memseg->ncookies)); kmem_cache_free(ldcssp->memseg_cache, memseg); /* uninitialize the memory handle */ mhdl->memseg = NULL; mhdl->status = LDC_UNBOUND; D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapped handle 0x%llx\n", ldcp->id, mhdl); mutex_exit(&mhdl->lock); return (0); } /* * Internal entry point for LDC mapped memory entry consistency * semantics. Acquire copies the contents of the remote memory * into the local shadow copy. The release operation copies the local * contents into the remote memory. The offset and size specify the * bounds for the memory range being synchronized. */ static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction, uint64_t offset, size_t size) { int err; ldc_mhdl_t *mhdl; ldc_chan_t *ldcp; ldc_memseg_t *memseg; caddr_t local_vaddr; size_t copy_size; if (mhandle == NULL) { DWARN(DBG_ALL_LDCS, "i_ldc_mem_acquire_release: invalid memory handle\n"); return (EINVAL); } mhdl = (ldc_mhdl_t *)mhandle; mutex_enter(&mhdl->lock); if (mhdl->status != LDC_MAPPED || mhdl->ldcp == NULL) { DWARN(DBG_ALL_LDCS, "i_ldc_mem_acquire_release: not mapped memory\n"); mutex_exit(&mhdl->lock); return (EINVAL); } /* do nothing for direct map */ if (mhdl->mtype == LDC_DIRECT_MAP) { mutex_exit(&mhdl->lock); return (0); } /* do nothing if COPY_IN+MEM_W and COPY_OUT+MEM_R */ if ((direction == LDC_COPY_IN && (mhdl->perm & LDC_MEM_R) == 0) || (direction == LDC_COPY_OUT && (mhdl->perm & LDC_MEM_W) == 0)) { mutex_exit(&mhdl->lock); return (0); } if (offset >= mhdl->memseg->size || (offset + size) > mhdl->memseg->size) { DWARN(DBG_ALL_LDCS, "i_ldc_mem_acquire_release: memory out of range\n"); mutex_exit(&mhdl->lock); return (EINVAL); } /* get the channel handle and memory segment */ ldcp = mhdl->ldcp; memseg = mhdl->memseg; if (mhdl->mtype == LDC_SHADOW_MAP) { local_vaddr = memseg->vaddr + offset; copy_size = size; /* copy to/from remote from/to local memory */ err = ldc_mem_copy((ldc_handle_t)ldcp, local_vaddr, offset, ©_size, memseg->cookies, memseg->ncookies, direction); if (err || copy_size != size) { DWARN(ldcp->id, "i_ldc_mem_acquire_release: copy failed\n"); mutex_exit(&mhdl->lock); return (err); } } mutex_exit(&mhdl->lock); return (0); } /* * Ensure that the contents in the remote memory seg are consistent * with the contents if of local segment */ int ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size) { return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_IN, offset, size)); } /* * Ensure that the contents in the local memory seg are consistent * with the contents if of remote segment */ int ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size) { return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_OUT, offset, size)); } /* * Allocate a descriptor ring. The size of each each descriptor * must be 8-byte aligned and the entire ring should be a multiple * of MMU_PAGESIZE. */ int ldc_mem_dring_create(uint32_t len, uint32_t dsize, ldc_dring_handle_t *dhandle) { ldc_dring_t *dringp; size_t size = (dsize * len); D1(DBG_ALL_LDCS, "ldc_mem_dring_create: len=0x%x, size=0x%x\n", len, dsize); if (dhandle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid dhandle\n"); return (EINVAL); } if (len == 0) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid length\n"); return (EINVAL); } /* descriptor size should be 8-byte aligned */ if (dsize == 0 || (dsize & 0x7)) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid size\n"); return (EINVAL); } *dhandle = 0; /* Allocate a desc ring structure */ dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP); /* Initialize dring */ dringp->length = len; dringp->dsize = dsize; /* round off to multiple of pagesize */ dringp->size = (size & MMU_PAGEMASK); if (size & MMU_PAGEOFFSET) dringp->size += MMU_PAGESIZE; dringp->status = LDC_UNBOUND; /* allocate descriptor ring memory */ dringp->base = kmem_zalloc(dringp->size, KM_SLEEP); /* initialize the desc ring lock */ mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL); /* Add descriptor ring to the head of global list */ mutex_enter(&ldcssp->lock); dringp->next = ldcssp->dring_list; ldcssp->dring_list = dringp; mutex_exit(&ldcssp->lock); *dhandle = (ldc_dring_handle_t)dringp; D1(DBG_ALL_LDCS, "ldc_mem_dring_create: dring allocated\n"); return (0); } /* * Destroy a descriptor ring. */ int ldc_mem_dring_destroy(ldc_dring_handle_t dhandle) { ldc_dring_t *dringp; ldc_dring_t *tmp_dringp; D1(DBG_ALL_LDCS, "ldc_mem_dring_destroy: entered\n"); if (dhandle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_destroy: invalid desc ring handle\n"); return (EINVAL); } dringp = (ldc_dring_t *)dhandle; if (dringp->status == LDC_BOUND) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_destroy: desc ring is bound\n"); return (EACCES); } mutex_enter(&dringp->lock); mutex_enter(&ldcssp->lock); /* remove from linked list - if not bound */ tmp_dringp = ldcssp->dring_list; if (tmp_dringp == dringp) { ldcssp->dring_list = dringp->next; dringp->next = NULL; } else { while (tmp_dringp != NULL) { if (tmp_dringp->next == dringp) { tmp_dringp->next = dringp->next; dringp->next = NULL; break; } tmp_dringp = tmp_dringp->next; } if (tmp_dringp == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_destroy: invalid descriptor\n"); mutex_exit(&ldcssp->lock); mutex_exit(&dringp->lock); return (EINVAL); } } mutex_exit(&ldcssp->lock); /* free the descriptor ring */ kmem_free(dringp->base, dringp->size); mutex_exit(&dringp->lock); /* destroy dring lock */ mutex_destroy(&dringp->lock); /* free desc ring object */ kmem_free(dringp, sizeof (ldc_dring_t)); return (0); } /* * Bind a previously allocated dring to a channel. The channel should * be OPEN in order to bind the ring to the channel. Returns back a * descriptor ring cookie. The descriptor ring is exported for remote * access by the client at the other end of the channel. An entry for * dring pages is stored in map table (via call to ldc_mem_bind_handle). */ int ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle, uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount) { int err; ldc_chan_t *ldcp; ldc_dring_t *dringp; ldc_mem_handle_t mhandle; /* check to see if channel is initalized */ if (handle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid channel handle\n"); return (EINVAL); } ldcp = (ldc_chan_t *)handle; if (dhandle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid desc ring handle\n"); return (EINVAL); } dringp = (ldc_dring_t *)dhandle; if (cookie == NULL) { DWARN(ldcp->id, "ldc_mem_dring_bind: invalid cookie arg\n"); return (EINVAL); } /* ensure the mtype is valid */ if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP)) == 0) { DWARN(ldcp->id, "ldc_mem_dring_bind: invalid map type\n"); return (EINVAL); } /* no need to bind as direct map if it's not HV supported or enabled */ if (!ldc_dring_shmem_hv_ok || !ldc_dring_shmem_enabled) { mtype = LDC_SHADOW_MAP; } mutex_enter(&dringp->lock); if (dringp->status == LDC_BOUND) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n", ldcp->id); mutex_exit(&dringp->lock); return (EINVAL); } if ((perm & LDC_MEM_RW) == 0) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid permissions\n"); mutex_exit(&dringp->lock); return (EINVAL); } if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid type\n"); mutex_exit(&dringp->lock); return (EINVAL); } dringp->ldcp = ldcp; /* create an memory handle */ err = ldc_mem_alloc_handle(handle, &mhandle); if (err || mhandle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n", ldcp->id); mutex_exit(&dringp->lock); return (err); } dringp->mhdl = mhandle; /* bind the descriptor ring to channel */ err = i_ldc_mem_bind_handle(mhandle, dringp->base, dringp->size, mtype, perm, cookie, ccount); if (err) { DWARN(ldcp->id, "ldc_mem_dring_bind: (0x%llx) error binding mhandle\n", ldcp->id); mutex_exit(&dringp->lock); return (err); } /* * For now return error if we get more than one cookie * FUTURE: Return multiple cookies .. */ if (*ccount > 1) { (void) ldc_mem_unbind_handle(mhandle); (void) ldc_mem_free_handle(mhandle); dringp->ldcp = NULL; dringp->mhdl = NULL; *ccount = 0; mutex_exit(&dringp->lock); return (EAGAIN); } /* Add descriptor ring to channel's exported dring list */ mutex_enter(&ldcp->exp_dlist_lock); dringp->ch_next = ldcp->exp_dring_list; ldcp->exp_dring_list = dringp; mutex_exit(&ldcp->exp_dlist_lock); dringp->status = LDC_BOUND; mutex_exit(&dringp->lock); return (0); } /* * Return the next cookie associated with the specified dring handle */ int ldc_mem_dring_nextcookie(ldc_dring_handle_t dhandle, ldc_mem_cookie_t *cookie) { int rv = 0; ldc_dring_t *dringp; ldc_chan_t *ldcp; if (dhandle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_nextcookie: invalid desc ring handle\n"); return (EINVAL); } dringp = (ldc_dring_t *)dhandle; mutex_enter(&dringp->lock); if (dringp->status != LDC_BOUND) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_nextcookie: descriptor ring 0x%llx " "is not bound\n", dringp); mutex_exit(&dringp->lock); return (EINVAL); } ldcp = dringp->ldcp; if (cookie == NULL) { DWARN(ldcp->id, "ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n", ldcp->id); mutex_exit(&dringp->lock); return (EINVAL); } rv = ldc_mem_nextcookie((ldc_mem_handle_t)dringp->mhdl, cookie); mutex_exit(&dringp->lock); return (rv); } /* * Unbind a previously bound dring from a channel. */ int ldc_mem_dring_unbind(ldc_dring_handle_t dhandle) { ldc_dring_t *dringp; ldc_dring_t *tmp_dringp; ldc_chan_t *ldcp; if (dhandle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_unbind: invalid desc ring handle\n"); return (EINVAL); } dringp = (ldc_dring_t *)dhandle; mutex_enter(&dringp->lock); if (dringp->status == LDC_UNBOUND) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n", dringp); mutex_exit(&dringp->lock); return (EINVAL); } ldcp = dringp->ldcp; mutex_enter(&ldcp->exp_dlist_lock); tmp_dringp = ldcp->exp_dring_list; if (tmp_dringp == dringp) { ldcp->exp_dring_list = dringp->ch_next; dringp->ch_next = NULL; } else { while (tmp_dringp != NULL) { if (tmp_dringp->ch_next == dringp) { tmp_dringp->ch_next = dringp->ch_next; dringp->ch_next = NULL; break; } tmp_dringp = tmp_dringp->ch_next; } if (tmp_dringp == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_unbind: invalid descriptor\n"); mutex_exit(&ldcp->exp_dlist_lock); mutex_exit(&dringp->lock); return (EINVAL); } } mutex_exit(&ldcp->exp_dlist_lock); (void) ldc_mem_unbind_handle((ldc_mem_handle_t)dringp->mhdl); (void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl); dringp->ldcp = NULL; dringp->mhdl = NULL; dringp->status = LDC_UNBOUND; mutex_exit(&dringp->lock); return (0); } #ifdef DEBUG void i_ldc_mem_inject_dring_clear(ldc_chan_t *ldcp) { ldc_dring_t *dp; ldc_mhdl_t *mhdl; ldc_mtbl_t *mtbl; ldc_memseg_t *memseg; uint64_t cookie_addr; uint64_t pg_shift, pg_size_code; int i, rv, retries; /* has a map table been allocated? */ if ((mtbl = ldcp->mtbl) == NULL) return; /* lock the memory table - exclusive access to channel */ mutex_enter(&mtbl->lock); /* lock the exported dring list */ mutex_enter(&ldcp->exp_dlist_lock); for (dp = ldcp->exp_dring_list; dp != NULL; dp = dp->ch_next) { if ((mhdl = (ldc_mhdl_t *)dp->mhdl) == NULL) continue; if ((memseg = mhdl->memseg) == NULL) continue; /* undo the pages exported */ for (i = 0; i < memseg->npages; i++) { /* clear the entry from the table */ memseg->pages[i].mte->entry.ll = 0; pg_size_code = page_szc(memseg->pages[i].size); pg_shift = page_get_shift(pg_size_code); cookie_addr = IDX2COOKIE(memseg->pages[i].index, pg_size_code, pg_shift); retries = 0; do { rv = hv_ldc_revoke(ldcp->id, cookie_addr, memseg->pages[i].mte->cookie); if (rv != H_EWOULDBLOCK) break; drv_usecwait(ldc_delay); } while (retries++ < ldc_max_retries); if (rv != 0) { DWARN(ldcp->id, "i_ldc_mem_inject_dring_clear(): " "hv_ldc_revoke failed: " "channel: 0x%lx, cookie addr: 0x%p," "cookie: 0x%lx, rv: %d", ldcp->id, cookie_addr, memseg->pages[i].mte->cookie, rv); } mtbl->num_avail++; } } mutex_exit(&ldcp->exp_dlist_lock); mutex_exit(&mtbl->lock); } #endif /* * Get information about the dring. The base address of the descriptor * ring along with the type and permission are returned back. */ int ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo) { ldc_dring_t *dringp; int rv; if (dhandle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_info: invalid desc ring handle\n"); return (EINVAL); } dringp = (ldc_dring_t *)dhandle; mutex_enter(&dringp->lock); if (dringp->mhdl) { rv = ldc_mem_info(dringp->mhdl, minfo); if (rv) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_info: error reading mem info\n"); mutex_exit(&dringp->lock); return (rv); } } else { minfo->vaddr = dringp->base; minfo->raddr = NULL; minfo->status = dringp->status; } mutex_exit(&dringp->lock); return (0); } /* * Map an exported descriptor ring into the local address space. If the * descriptor ring was exported for direct map access, a HV call is made * to allocate a RA range. If the map is done via a shadow copy, local * shadow memory is allocated. */ int ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie, uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype, ldc_dring_handle_t *dhandle) { int err; ldc_chan_t *ldcp = (ldc_chan_t *)handle; ldc_mem_handle_t mhandle; ldc_dring_t *dringp; size_t dring_size; if (dhandle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_map: invalid dhandle\n"); return (EINVAL); } /* check to see if channel is initalized */ if (handle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_map: invalid channel handle\n"); return (EINVAL); } ldcp = (ldc_chan_t *)handle; if (cookie == NULL) { DWARN(ldcp->id, "ldc_mem_dring_map: (0x%llx) invalid cookie\n", ldcp->id); return (EINVAL); } /* FUTURE: For now we support only one cookie per dring */ ASSERT(ccount == 1); if (cookie->size < (dsize * len)) { DWARN(ldcp->id, "ldc_mem_dring_map: (0x%llx) invalid dsize/len\n", ldcp->id); return (EINVAL); } /* ensure the mtype is valid */ if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP)) == 0) { DWARN(ldcp->id, "ldc_mem_dring_map: invalid map type\n"); return (EINVAL); } /* do not attempt direct map if it's not HV supported or enabled */ if (!ldc_dring_shmem_hv_ok || !ldc_dring_shmem_enabled) { mtype = LDC_SHADOW_MAP; } *dhandle = 0; /* Allocate an dring structure */ dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP); D1(ldcp->id, "ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n", mtype, len, dsize, cookie->addr, cookie->size); /* Initialize dring */ dringp->length = len; dringp->dsize = dsize; /* round of to multiple of page size */ dring_size = len * dsize; dringp->size = (dring_size & MMU_PAGEMASK); if (dring_size & MMU_PAGEOFFSET) dringp->size += MMU_PAGESIZE; dringp->ldcp = ldcp; /* create an memory handle */ err = ldc_mem_alloc_handle(handle, &mhandle); if (err || mhandle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_map: cannot alloc hdl err=%d\n", err); kmem_free(dringp, sizeof (ldc_dring_t)); return (ENOMEM); } dringp->mhdl = mhandle; dringp->base = NULL; /* map the dring into local memory */ err = i_ldc_mem_map(mhandle, cookie, ccount, mtype, LDC_MEM_RW, &(dringp->base), NULL); if (err || dringp->base == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_map: cannot map desc ring err=%d\n", err); (void) ldc_mem_free_handle(mhandle); kmem_free(dringp, sizeof (ldc_dring_t)); return (ENOMEM); } /* initialize the desc ring lock */ mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL); /* Add descriptor ring to channel's imported dring list */ mutex_enter(&ldcp->imp_dlist_lock); dringp->ch_next = ldcp->imp_dring_list; ldcp->imp_dring_list = dringp; mutex_exit(&ldcp->imp_dlist_lock); dringp->status = LDC_MAPPED; *dhandle = (ldc_dring_handle_t)dringp; return (0); } /* * Unmap a descriptor ring. Free shadow memory (if any). */ int ldc_mem_dring_unmap(ldc_dring_handle_t dhandle) { ldc_dring_t *dringp; ldc_dring_t *tmp_dringp; ldc_chan_t *ldcp; if (dhandle == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_unmap: invalid desc ring handle\n"); return (EINVAL); } dringp = (ldc_dring_t *)dhandle; if (dringp->status != LDC_MAPPED) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_unmap: not a mapped desc ring\n"); return (EINVAL); } mutex_enter(&dringp->lock); ldcp = dringp->ldcp; mutex_enter(&ldcp->imp_dlist_lock); /* find and unlink the desc ring from channel import list */ tmp_dringp = ldcp->imp_dring_list; if (tmp_dringp == dringp) { ldcp->imp_dring_list = dringp->ch_next; dringp->ch_next = NULL; } else { while (tmp_dringp != NULL) { if (tmp_dringp->ch_next == dringp) { tmp_dringp->ch_next = dringp->ch_next; dringp->ch_next = NULL; break; } tmp_dringp = tmp_dringp->ch_next; } if (tmp_dringp == NULL) { DWARN(DBG_ALL_LDCS, "ldc_mem_dring_unmap: invalid descriptor\n"); mutex_exit(&ldcp->imp_dlist_lock); mutex_exit(&dringp->lock); return (EINVAL); } } mutex_exit(&ldcp->imp_dlist_lock); /* do a LDC memory handle unmap and free */ (void) ldc_mem_unmap(dringp->mhdl); (void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl); dringp->status = 0; dringp->ldcp = NULL; mutex_exit(&dringp->lock); /* destroy dring lock */ mutex_destroy(&dringp->lock); /* free desc ring object */ kmem_free(dringp, sizeof (ldc_dring_t)); return (0); } /* * Internal entry point for descriptor ring access entry consistency * semantics. Acquire copies the contents of the remote descriptor ring * into the local shadow copy. The release operation copies the local * contents into the remote dring. The start and end locations specify * bounds for the entries being synchronized. */ static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle, uint8_t direction, uint64_t start, uint64_t end) { int err; ldc_dring_t *dringp; ldc_chan_t *ldcp; ldc_mhdl_t *mhdl; uint64_t soff; size_t copy_size; if (dhandle == NULL) { DWARN(DBG_ALL_LDCS, "i_ldc_dring_acquire_release: invalid desc ring handle\n"); return (EINVAL); } dringp = (ldc_dring_t *)dhandle; mutex_enter(&dringp->lock); if (dringp->status != LDC_MAPPED || dringp->ldcp == NULL) { DWARN(DBG_ALL_LDCS, "i_ldc_dring_acquire_release: not a mapped desc ring\n"); mutex_exit(&dringp->lock); return (EINVAL); } if (start >= dringp->length || end >= dringp->length) { DWARN(DBG_ALL_LDCS, "i_ldc_dring_acquire_release: index out of range\n"); mutex_exit(&dringp->lock); return (EINVAL); } mhdl = (ldc_mhdl_t *)dringp->mhdl; if (mhdl == NULL) { DWARN(DBG_ALL_LDCS, "i_ldc_dring_acquire_release: invalid memory handle\n"); mutex_exit(&dringp->lock); return (EINVAL); } if (mhdl->mtype != LDC_SHADOW_MAP) { DWARN(DBG_ALL_LDCS, "i_ldc_dring_acquire_release: invalid mtype: %d\n", mhdl->mtype); mutex_exit(&dringp->lock); return (EINVAL); } /* get the channel handle */ ldcp = dringp->ldcp; copy_size = (start <= end) ? (((end - start) + 1) * dringp->dsize) : ((dringp->length - start) * dringp->dsize); /* Calculate the relative offset for the first desc */ soff = (start * dringp->dsize); /* copy to/from remote from/to local memory */ D1(ldcp->id, "i_ldc_dring_acquire_release: c1 off=0x%llx sz=0x%llx\n", soff, copy_size); err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl, direction, soff, copy_size); if (err) { DWARN(ldcp->id, "i_ldc_dring_acquire_release: copy failed\n"); mutex_exit(&dringp->lock); return (err); } /* do the balance */ if (start > end) { copy_size = ((end + 1) * dringp->dsize); soff = 0; /* copy to/from remote from/to local memory */ D1(ldcp->id, "i_ldc_dring_acquire_release: c2 " "off=0x%llx sz=0x%llx\n", soff, copy_size); err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl, direction, soff, copy_size); if (err) { DWARN(ldcp->id, "i_ldc_dring_acquire_release: copy failed\n"); mutex_exit(&dringp->lock); return (err); } } mutex_exit(&dringp->lock); return (0); } /* * Ensure that the contents in the local dring are consistent * with the contents if of remote dring */ int ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end) { return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_IN, start, end)); } /* * Ensure that the contents in the remote dring are consistent * with the contents if of local dring */ int ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end) { return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_OUT, start, end)); }