/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Note: This is the backend part of the split PV disk driver. This driver * is not a nexus driver, nor is it a leaf driver(block/char/stream driver). * Currently, it does not create any minor node. So, although, it runs in * backend domain, it will not be used directly from within dom0. * It simply gets block I/O requests issued by frontend from a shared page * (blkif ring buffer - defined by Xen) between backend and frontend domain, * generates a buf, and push it down to underlying disk target driver via * ldi interface. When buf is done, this driver will generate a response * and put it into ring buffer to inform frontend of the status of the I/O * request issued by it. When a new virtual device entry is added in xenstore, * there will be an watch event sent from Xen to xvdi framework, who will, * in turn, create the devinfo node and try to attach this driver * (see xvdi_create_dev). When frontend peer changes its state to * XenbusStateClose, an event will also be sent from Xen to xvdi framework, * who will detach and remove this devinfo node (see i_xvdi_oestate_handler). * I/O requests get from ring buffer and event coming from xenstore cannot be * trusted. We verify them in xdb_get_buf() and xdb_check_state_transition(). * * Virtual device configuration is read/written from/to the database via * xenbus_* interfaces. Driver also use xvdi_* to interact with hypervisor. * There is an on-going effort to make xvdi_* cover all xenbus_*. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static xdb_t *xdb_statep; static int xdb_debug = 0; static void xdb_close(dev_info_t *); static int xdb_push_response(xdb_t *, uint64_t, uint8_t, uint16_t); static int xdb_get_request(xdb_t *, blkif_request_t *); static void blkif_get_x86_32_req(blkif_request_t *, blkif_x86_32_request_t *); static void blkif_get_x86_64_req(blkif_request_t *, blkif_x86_64_request_t *); static int xdb_biodone(buf_t *); #ifdef DEBUG /* * debug aid functions */ static void logva(xdb_t *vdp, uint64_t va) { uint64_t *page_addrs; int i; page_addrs = vdp->page_addrs; for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) { if (page_addrs[i] == va) debug_enter("VA remapping found!"); } for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) { if (page_addrs[i] == 0) { page_addrs[i] = va; break; } } ASSERT(i < XDB_MAX_IO_PAGES(vdp)); } static void unlogva(xdb_t *vdp, uint64_t va) { uint64_t *page_addrs; int i; page_addrs = vdp->page_addrs; for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) { if (page_addrs[i] == va) { page_addrs[i] = 0; break; } } ASSERT(i < XDB_MAX_IO_PAGES(vdp)); } static void xdb_dump_request_oe(blkif_request_t *req) { int i; /* * Exploit the public interface definitions for BLKIF_OP_READ * etc.. */ char *op_name[] = { "read", "write", "barrier", "flush" }; XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "op=%s", op_name[req->operation])); XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "num of segments=%d", req->nr_segments)); XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "handle=%d", req->handle)); XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "id=%llu", (unsigned long long)req->id)); XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "start sector=%llu", (unsigned long long)req->sector_number)); for (i = 0; i < req->nr_segments; i++) { XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "gref=%d, first sec=%d," "last sec=%d", req->seg[i].gref, req->seg[i].first_sect, req->seg[i].last_sect)); } } #endif /* DEBUG */ /* * Statistics. */ static char *xdb_stats[] = { "rd_reqs", "wr_reqs", "br_reqs", "fl_reqs", "oo_reqs" }; static int xdb_kstat_update(kstat_t *ksp, int flag) { xdb_t *vdp; kstat_named_t *knp; if (flag != KSTAT_READ) return (EACCES); vdp = ksp->ks_private; knp = ksp->ks_data; /* * Assignment order should match that of the names in * xdb_stats. */ (knp++)->value.ui64 = vdp->xs_stat_req_reads; (knp++)->value.ui64 = vdp->xs_stat_req_writes; (knp++)->value.ui64 = vdp->xs_stat_req_barriers; (knp++)->value.ui64 = vdp->xs_stat_req_flushes; (knp++)->value.ui64 = 0; /* oo_req */ return (0); } static boolean_t xdb_kstat_init(xdb_t *vdp) { int nstat = sizeof (xdb_stats) / sizeof (xdb_stats[0]); char **cp = xdb_stats; kstat_named_t *knp; if ((vdp->xs_kstats = kstat_create("xdb", ddi_get_instance(vdp->xs_dip), "req_statistics", "block", KSTAT_TYPE_NAMED, nstat, 0)) == NULL) return (B_FALSE); vdp->xs_kstats->ks_private = vdp; vdp->xs_kstats->ks_update = xdb_kstat_update; knp = vdp->xs_kstats->ks_data; while (nstat > 0) { kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); knp++; cp++; nstat--; } kstat_install(vdp->xs_kstats); return (B_TRUE); } static char * i_pathname(dev_info_t *dip) { char *path, *rv; path = kmem_alloc(MAXPATHLEN, KM_SLEEP); (void) ddi_pathname(dip, path); rv = strdup(path); kmem_free(path, MAXPATHLEN); return (rv); } static buf_t * xdb_get_buf(xdb_t *vdp, blkif_request_t *req, xdb_request_t *xreq) { buf_t *bp; uint8_t segs, curseg; int sectors; int i, err; gnttab_map_grant_ref_t mapops[BLKIF_MAX_SEGMENTS_PER_REQUEST]; ddi_acc_handle_t acchdl; acchdl = vdp->xs_ring_hdl; bp = XDB_XREQ2BP(xreq); curseg = xreq->xr_curseg; /* init a new xdb request */ if (req != NULL) { ASSERT(MUTEX_HELD(&vdp->xs_iomutex)); boolean_t pagemapok = B_TRUE; uint8_t op = ddi_get8(acchdl, &req->operation); xreq->xr_vdp = vdp; xreq->xr_op = op; xreq->xr_id = ddi_get64(acchdl, &req->id); segs = xreq->xr_buf_pages = ddi_get8(acchdl, &req->nr_segments); if (segs == 0) { if (op != BLKIF_OP_FLUSH_DISKCACHE) cmn_err(CE_WARN, "!non-BLKIF_OP_FLUSH_DISKCACHE" " is seen from domain %d with zero " "length data buffer!", vdp->xs_peer); bioinit(bp); bp->b_bcount = 0; bp->b_lblkno = 0; bp->b_un.b_addr = NULL; return (bp); } else if (op == BLKIF_OP_FLUSH_DISKCACHE) { cmn_err(CE_WARN, "!BLKIF_OP_FLUSH_DISKCACHE" " is seen from domain %d with non-zero " "length data buffer!", vdp->xs_peer); } /* * segs should be no bigger than BLKIF_MAX_SEGMENTS_PER_REQUEST * according to the definition of blk interface by Xen * we do sanity check here */ if (segs > BLKIF_MAX_SEGMENTS_PER_REQUEST) segs = xreq->xr_buf_pages = BLKIF_MAX_SEGMENTS_PER_REQUEST; for (i = 0; i < segs; i++) { uint8_t fs, ls; mapops[i].host_addr = (uint64_t)(uintptr_t)XDB_IOPAGE_VA( vdp->xs_iopage_va, xreq->xr_idx, i); mapops[i].dom = vdp->xs_peer; mapops[i].ref = ddi_get32(acchdl, &req->seg[i].gref); mapops[i].flags = GNTMAP_host_map; if (op != BLKIF_OP_READ) mapops[i].flags |= GNTMAP_readonly; fs = ddi_get8(acchdl, &req->seg[i].first_sect); ls = ddi_get8(acchdl, &req->seg[i].last_sect); /* * first_sect should be no bigger than last_sect and * both of them should be no bigger than * XB_LAST_SECTOR_IN_SEG according to definition * of blk interface by Xen, so sanity check again */ if (fs > XB_LAST_SECTOR_IN_SEG) fs = XB_LAST_SECTOR_IN_SEG; if (ls > XB_LAST_SECTOR_IN_SEG) ls = XB_LAST_SECTOR_IN_SEG; if (fs > ls) fs = ls; xreq->xr_segs[i].fs = fs; xreq->xr_segs[i].ls = ls; } /* map in io pages */ err = xen_map_gref(GNTTABOP_map_grant_ref, mapops, i, B_FALSE); if (err != 0) return (NULL); for (i = 0; i < segs; i++) { /* * Although HYPERVISOR_grant_table_op() returned no * error, mapping of each single page can fail. So, * we have to do the check here and handle the error * if needed */ if (mapops[i].status != GNTST_okay) { int j; for (j = 0; j < i; j++) { #ifdef DEBUG unlogva(vdp, mapops[j].host_addr); #endif xen_release_pfn( xreq->xr_plist[j].p_pagenum); } pagemapok = B_FALSE; break; } /* record page mapping handle for unmapping later */ xreq->xr_page_hdls[i] = mapops[i].handle; #ifdef DEBUG logva(vdp, mapops[i].host_addr); #endif /* * Pass the MFNs down using the shadow list (xr_pplist) * * This is pretty ugly since we have implict knowledge * of how the rootnex binds buffers. * The GNTTABOP_map_grant_ref op makes us do some ugly * stuff since we're not allowed to touch these PTEs * from the VM. * * Obviously, these aren't real page_t's. The rootnex * only needs p_pagenum. * Also, don't use btop() here or 32 bit PAE breaks. */ xreq->xr_pplist[i] = &xreq->xr_plist[i]; xreq->xr_plist[i].p_pagenum = xen_assign_pfn(mapops[i].dev_bus_addr >> PAGESHIFT); } /* * not all pages mapped in successfully, unmap those mapped-in * page and return failure */ if (!pagemapok) { gnttab_unmap_grant_ref_t unmapop; for (i = 0; i < segs; i++) { if (mapops[i].status != GNTST_okay) continue; unmapop.host_addr = (uint64_t)(uintptr_t)XDB_IOPAGE_VA( vdp->xs_iopage_va, xreq->xr_idx, i); unmapop.dev_bus_addr = NULL; unmapop.handle = mapops[i].handle; (void) HYPERVISOR_grant_table_op( GNTTABOP_unmap_grant_ref, &unmapop, 1); } return (NULL); } bioinit(bp); bp->b_lblkno = ddi_get64(acchdl, &req->sector_number); bp->b_flags = B_BUSY | B_SHADOW | B_PHYS; bp->b_flags |= (ddi_get8(acchdl, &req->operation) == BLKIF_OP_READ) ? B_READ : (B_WRITE | B_ASYNC); } else { uint64_t blkst; int isread; /* reuse this buf */ blkst = bp->b_lblkno + bp->b_bcount / DEV_BSIZE; isread = bp->b_flags & B_READ; bioreset(bp); bp->b_lblkno = blkst; bp->b_flags = B_BUSY | B_SHADOW | B_PHYS; bp->b_flags |= isread ? B_READ : (B_WRITE | B_ASYNC); XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "reuse buf, xreq is %d!!", xreq->xr_idx)); } /* form a buf */ bp->b_un.b_addr = XDB_IOPAGE_VA(vdp->xs_iopage_va, xreq->xr_idx, curseg) + xreq->xr_segs[curseg].fs * DEV_BSIZE; bp->b_shadow = &xreq->xr_pplist[curseg]; bp->b_iodone = xdb_biodone; sectors = 0; /* * Run through the segments. There are XB_NUM_SECTORS_PER_SEG sectors * per segment. On some OSes (e.g. Linux), there may be empty gaps * between segments. (i.e. the first segment may end on sector 6 and * the second segment start on sector 4). * * if a segments first sector is not set to 0, and this is not the * first segment in our buf, end this buf now. * * if a segments last sector is not set to XB_LAST_SECTOR_IN_SEG, and * this is not the last segment in the request, add this segment into * the buf, then end this buf (updating the pointer to point to the * next segment next time around). */ for (i = curseg; i < xreq->xr_buf_pages; i++) { if ((xreq->xr_segs[i].fs != 0) && (i != curseg)) { break; } sectors += (xreq->xr_segs[i].ls - xreq->xr_segs[i].fs + 1); if ((xreq->xr_segs[i].ls != XB_LAST_SECTOR_IN_SEG) && (i != (xreq->xr_buf_pages - 1))) { i++; break; } } xreq->xr_curseg = i; bp->b_bcount = sectors * DEV_BSIZE; bp->b_bufsize = bp->b_bcount; return (bp); } static xdb_request_t * xdb_get_req(xdb_t *vdp) { xdb_request_t *req; int idx; ASSERT(MUTEX_HELD(&vdp->xs_iomutex)); ASSERT(vdp->xs_free_req != -1); req = &vdp->xs_req[vdp->xs_free_req]; vdp->xs_free_req = req->xr_next; idx = req->xr_idx; bzero(req, sizeof (xdb_request_t)); req->xr_idx = idx; return (req); } static void xdb_free_req(xdb_request_t *req) { xdb_t *vdp = req->xr_vdp; ASSERT(MUTEX_HELD(&vdp->xs_iomutex)); req->xr_next = vdp->xs_free_req; vdp->xs_free_req = req->xr_idx; } static void xdb_response(xdb_t *vdp, blkif_request_t *req, boolean_t ok) { ddi_acc_handle_t acchdl = vdp->xs_ring_hdl; if (xdb_push_response(vdp, ddi_get64(acchdl, &req->id), ddi_get8(acchdl, &req->operation), ok)) xvdi_notify_oe(vdp->xs_dip); } static void xdb_init_ioreqs(xdb_t *vdp) { int i; ASSERT(vdp->xs_nentry); if (vdp->xs_req == NULL) vdp->xs_req = kmem_alloc(vdp->xs_nentry * sizeof (xdb_request_t), KM_SLEEP); #ifdef DEBUG if (vdp->page_addrs == NULL) vdp->page_addrs = kmem_zalloc(XDB_MAX_IO_PAGES(vdp) * sizeof (uint64_t), KM_SLEEP); #endif for (i = 0; i < vdp->xs_nentry; i++) { vdp->xs_req[i].xr_idx = i; vdp->xs_req[i].xr_next = i + 1; } vdp->xs_req[vdp->xs_nentry - 1].xr_next = -1; vdp->xs_free_req = 0; /* alloc va in host dom for io page mapping */ vdp->xs_iopage_va = vmem_xalloc(heap_arena, XDB_MAX_IO_PAGES(vdp) * PAGESIZE, PAGESIZE, 0, 0, 0, 0, VM_SLEEP); for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) hat_prepare_mapping(kas.a_hat, vdp->xs_iopage_va + i * PAGESIZE, NULL); } static void xdb_uninit_ioreqs(xdb_t *vdp) { int i; for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) hat_release_mapping(kas.a_hat, vdp->xs_iopage_va + i * PAGESIZE); vmem_xfree(heap_arena, vdp->xs_iopage_va, XDB_MAX_IO_PAGES(vdp) * PAGESIZE); if (vdp->xs_req != NULL) { kmem_free(vdp->xs_req, vdp->xs_nentry * sizeof (xdb_request_t)); vdp->xs_req = NULL; } #ifdef DEBUG if (vdp->page_addrs != NULL) { kmem_free(vdp->page_addrs, XDB_MAX_IO_PAGES(vdp) * sizeof (uint64_t)); vdp->page_addrs = NULL; } #endif } static uint_t xdb_intr(caddr_t arg) { xdb_t *vdp = (xdb_t *)arg; dev_info_t *dip = vdp->xs_dip; blkif_request_t req, *reqp = &req; xdb_request_t *xreq; buf_t *bp; uint8_t op; int ret = DDI_INTR_UNCLAIMED; XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "xdb@%s: I/O request received from dom %d", ddi_get_name_addr(dip), vdp->xs_peer)); mutex_enter(&vdp->xs_iomutex); /* shouldn't touch ring buffer if not in connected state */ if (!vdp->xs_if_connected) { mutex_exit(&vdp->xs_iomutex); return (DDI_INTR_UNCLAIMED); } ASSERT(vdp->xs_hp_connected && vdp->xs_fe_initialised); /* * We'll loop till there is no more request in the ring * We won't stuck in this loop for ever since the size of ring buffer * is limited, and frontend will stop pushing requests into it when * the ring buffer is full */ /* req_event will be increased in xvdi_ring_get_request() */ while (xdb_get_request(vdp, reqp)) { ret = DDI_INTR_CLAIMED; op = ddi_get8(vdp->xs_ring_hdl, &reqp->operation); if (op == BLKIF_OP_READ || op == BLKIF_OP_WRITE || op == BLKIF_OP_WRITE_BARRIER || op == BLKIF_OP_FLUSH_DISKCACHE) { #ifdef DEBUG xdb_dump_request_oe(reqp); #endif xreq = xdb_get_req(vdp); ASSERT(xreq); switch (op) { case BLKIF_OP_READ: vdp->xs_stat_req_reads++; break; case BLKIF_OP_WRITE_BARRIER: vdp->xs_stat_req_barriers++; /* FALLTHRU */ case BLKIF_OP_WRITE: vdp->xs_stat_req_writes++; break; case BLKIF_OP_FLUSH_DISKCACHE: vdp->xs_stat_req_flushes++; break; } xreq->xr_curseg = 0; /* start from first segment */ bp = xdb_get_buf(vdp, reqp, xreq); if (bp == NULL) { /* failed to form a buf */ xdb_free_req(xreq); xdb_response(vdp, reqp, B_FALSE); continue; } bp->av_forw = NULL; XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, " buf %p, blkno %lld, size %lu, addr %p", (void *)bp, (longlong_t)bp->b_blkno, (ulong_t)bp->b_bcount, (void *)bp->b_un.b_addr)); /* send bp to underlying blk driver */ if (vdp->xs_f_iobuf == NULL) { vdp->xs_f_iobuf = vdp->xs_l_iobuf = bp; } else { vdp->xs_l_iobuf->av_forw = bp; vdp->xs_l_iobuf = bp; } } else { xdb_response(vdp, reqp, B_FALSE); XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: " "Unsupported cmd received from dom %d", ddi_get_name_addr(dip), vdp->xs_peer)); } } /* notify our taskq to push buf to underlying blk driver */ if (ret == DDI_INTR_CLAIMED) cv_broadcast(&vdp->xs_iocv); mutex_exit(&vdp->xs_iomutex); return (ret); } static int xdb_biodone(buf_t *bp) { int i, err, bioerr; uint8_t segs; gnttab_unmap_grant_ref_t unmapops[BLKIF_MAX_SEGMENTS_PER_REQUEST]; xdb_request_t *xreq = XDB_BP2XREQ(bp); xdb_t *vdp = xreq->xr_vdp; buf_t *nbp; bioerr = geterror(bp); if (bioerr) XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: I/O error %d", ddi_get_name_addr(vdp->xs_dip), bioerr)); /* check if we are done w/ this I/O request */ if ((bioerr == 0) && (xreq->xr_curseg < xreq->xr_buf_pages)) { nbp = xdb_get_buf(vdp, NULL, xreq); if (nbp) { err = ldi_strategy(vdp->xs_ldi_hdl, nbp); if (err == 0) { XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "sent buf to backend ok")); return (DDI_SUCCESS); } bioerr = EIO; XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: " "sent buf to backend dev failed, err=%d", ddi_get_name_addr(vdp->xs_dip), err)); } else { bioerr = EIO; } } /* unmap io pages */ segs = xreq->xr_buf_pages; /* * segs should be no bigger than BLKIF_MAX_SEGMENTS_PER_REQUEST * according to the definition of blk interface by Xen */ ASSERT(segs <= BLKIF_MAX_SEGMENTS_PER_REQUEST); for (i = 0; i < segs; i++) { unmapops[i].host_addr = (uint64_t)(uintptr_t)XDB_IOPAGE_VA( vdp->xs_iopage_va, xreq->xr_idx, i); #ifdef DEBUG mutex_enter(&vdp->xs_iomutex); unlogva(vdp, unmapops[i].host_addr); mutex_exit(&vdp->xs_iomutex); #endif unmapops[i].dev_bus_addr = NULL; unmapops[i].handle = xreq->xr_page_hdls[i]; } err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmapops, segs); ASSERT(!err); /* * If we have reached a barrier write or a cache flush , then we must * flush all our I/Os. */ if (xreq->xr_op == BLKIF_OP_WRITE_BARRIER || xreq->xr_op == BLKIF_OP_FLUSH_DISKCACHE) { /* * XXX At this point the write did succeed, so I don't * believe we should report an error because the flush * failed. However, this is a debatable point, so * maybe we need to think more carefully about this. * For now, just cast to void. */ (void) ldi_ioctl(vdp->xs_ldi_hdl, DKIOCFLUSHWRITECACHE, NULL, FKIOCTL, kcred, NULL); } mutex_enter(&vdp->xs_iomutex); /* send response back to frontend */ if (vdp->xs_if_connected) { ASSERT(vdp->xs_hp_connected && vdp->xs_fe_initialised); if (xdb_push_response(vdp, xreq->xr_id, xreq->xr_op, bioerr)) xvdi_notify_oe(vdp->xs_dip); XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "sent resp back to frontend, id=%llu", (unsigned long long)xreq->xr_id)); } /* free io resources */ biofini(bp); xdb_free_req(xreq); vdp->xs_ionum--; if (!vdp->xs_if_connected && (vdp->xs_ionum == 0)) { /* we're closing, someone is waiting for I/O clean-up */ cv_signal(&vdp->xs_ionumcv); } mutex_exit(&vdp->xs_iomutex); return (DDI_SUCCESS); } static int xdb_bindto_frontend(xdb_t *vdp) { int err; char *oename; grant_ref_t gref; evtchn_port_t evtchn; dev_info_t *dip = vdp->xs_dip; char protocol[64] = ""; ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); /* * Switch to the XenbusStateInitialised state. This let's the * frontend know that we're about to negotiate a connection. */ (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialised); /* * Gather info from frontend */ oename = xvdi_get_oename(dip); if (oename == NULL) return (DDI_FAILURE); err = xenbus_gather(XBT_NULL, oename, XBP_RING_REF, "%lu", &gref, XBP_EVENT_CHAN, "%u", &evtchn, NULL); if (err != 0) { xvdi_dev_error(dip, err, "Getting ring-ref and evtchn from frontend"); return (DDI_FAILURE); } vdp->xs_blk_protocol = BLKIF_PROTOCOL_NATIVE; vdp->xs_nentry = BLKIF_RING_SIZE; vdp->xs_entrysize = sizeof (union blkif_sring_entry); err = xenbus_gather(XBT_NULL, oename, XBP_PROTOCOL, "%63s", protocol, NULL); if (err) (void) strcpy(protocol, "unspecified, assuming native"); else { /* * We must check for NATIVE first, so that the fast path * is taken for copying data from the guest to the host. */ if (strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE) != 0) { if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_32) == 0) { vdp->xs_blk_protocol = BLKIF_PROTOCOL_X86_32; vdp->xs_nentry = BLKIF_X86_32_RING_SIZE; vdp->xs_entrysize = sizeof (union blkif_x86_32_sring_entry); } else if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_64) == 0) { vdp->xs_blk_protocol = BLKIF_PROTOCOL_X86_64; vdp->xs_nentry = BLKIF_X86_64_RING_SIZE; vdp->xs_entrysize = sizeof (union blkif_x86_64_sring_entry); } else { xvdi_fatal_error(dip, err, "unknown protocol"); return (DDI_FAILURE); } } } #ifdef DEBUG cmn_err(CE_NOTE, "!xdb@%s: blkif protocol '%s' ", ddi_get_name_addr(dip), protocol); #endif /* * Map and init ring. The ring parameters must match those which * have been allocated in the front end. */ if (xvdi_map_ring(dip, vdp->xs_nentry, vdp->xs_entrysize, gref, &vdp->xs_ring) != DDI_SUCCESS) return (DDI_FAILURE); /* * This will be removed after we use shadow I/O ring request since * we don't need to access the ring itself directly, thus the access * handle is not needed */ vdp->xs_ring_hdl = vdp->xs_ring->xr_acc_hdl; /* bind event channel */ err = xvdi_bind_evtchn(dip, evtchn); if (err != DDI_SUCCESS) { xvdi_unmap_ring(vdp->xs_ring); return (DDI_FAILURE); } return (DDI_SUCCESS); } static void xdb_unbindfrom_frontend(xdb_t *vdp) { ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); xvdi_free_evtchn(vdp->xs_dip); xvdi_unmap_ring(vdp->xs_ring); } /* * xdb_params_change() initiates a allows change to the underlying device/file * that the backend is accessing. It does this by disconnecting from the * frontend, closing the old device, clearing a bunch of xenbus parameters, * and switching back to the XenbusStateInitialising state. The frontend * should notice this transition to the XenbusStateInitialising state and * should attempt to reconnect to us (the backend). */ static void xdb_params_change(xdb_t *vdp, char *params, boolean_t update_xs) { xenbus_transaction_t xbt; dev_info_t *dip = vdp->xs_dip; char *xsname; int err; ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); ASSERT(vdp->xs_params_path != NULL); if ((xsname = xvdi_get_xsname(dip)) == NULL) return; if (strcmp(vdp->xs_params_path, params) == 0) return; /* * Close the device we're currently accessing and update the * path which points to our backend device/file. */ xdb_close(dip); vdp->xs_fe_initialised = B_FALSE; trans_retry: if ((err = xenbus_transaction_start(&xbt)) != 0) { xvdi_dev_error(dip, err, "params change transaction init"); goto errout; } /* * Delete all the xenbus properties that are connection dependant * and go back to the initializing state so that the frontend * driver can re-negotiate a connection. */ if (((err = xenbus_rm(xbt, xsname, XBP_FB)) != 0) || ((err = xenbus_rm(xbt, xsname, XBP_INFO)) != 0) || ((err = xenbus_rm(xbt, xsname, "sector-size")) != 0) || ((err = xenbus_rm(xbt, xsname, XBP_SECTORS)) != 0) || ((err = xenbus_rm(xbt, xsname, "instance")) != 0) || ((err = xenbus_rm(xbt, xsname, "node")) != 0) || (update_xs && ((err = xenbus_printf(xbt, xsname, "params", "%s", params)) != 0)) || ((err = xvdi_switch_state(dip, xbt, XenbusStateInitialising) > 0))) { (void) xenbus_transaction_end(xbt, 1); xvdi_dev_error(dip, err, "params change transaction setup"); goto errout; } if ((err = xenbus_transaction_end(xbt, 0)) != 0) { if (err == EAGAIN) { /* transaction is ended, don't need to abort it */ goto trans_retry; } xvdi_dev_error(dip, err, "params change transaction commit"); goto errout; } /* Change the device that we plan to access */ strfree(vdp->xs_params_path); vdp->xs_params_path = strdup(params); return; errout: (void) xvdi_switch_state(dip, xbt, XenbusStateInitialising); } /* * xdb_watch_params_cb() - This callback is invoked whenever there * is an update to the following xenbus parameter: * /local/domain/0/backend/vbd///params * * This normally happens during xm block-configure operations, which * are used to change CD device images for HVM domUs. */ /*ARGSUSED*/ static void xdb_watch_params_cb(dev_info_t *dip, const char *path, void *arg) { xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip); char *xsname, *oename, *str, *str2; if (((xsname = xvdi_get_xsname(dip)) == NULL) || ((oename = xvdi_get_oename(dip)) == NULL)) { return; } mutex_enter(&vdp->xs_cbmutex); if (xenbus_read_str(xsname, "params", &str) != 0) { mutex_exit(&vdp->xs_cbmutex); return; } if (strcmp(vdp->xs_params_path, str) == 0) { /* Nothing todo */ mutex_exit(&vdp->xs_cbmutex); strfree(str); return; } /* * If the frontend isn't a cd device, doesn't support media * requests, or has locked the media, then we can't change * the params value. restore the current value. */ str2 = NULL; if (!XDB_IS_FE_CD(vdp) || (xenbus_read_str(oename, XBP_MEDIA_REQ, &str2) != 0) || (strcmp(str2, XBV_MEDIA_REQ_LOCK) == 0)) { if (str2 != NULL) strfree(str2); strfree(str); str = i_pathname(dip); cmn_err(CE_NOTE, "!%s: media locked, ignoring params update", str); strfree(str); mutex_exit(&vdp->xs_cbmutex); return; } XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "block-configure params request: \"%s\"", str)); xdb_params_change(vdp, str, B_FALSE); mutex_exit(&vdp->xs_cbmutex); strfree(str); } /* * xdb_watch_media_req_cb() - This callback is invoked whenever there * is an update to the following xenbus parameter: * /local/domain//device/vbd//media-req * * Media requests are only supported on CD devices and are issued by * the frontend. Currently the only supported media request operaions * are "lock" and "eject". A "lock" prevents the backend from changing * the backing device/file (via xm block-configure). An "eject" requests * tells the backend device that it should disconnect from the frontend * and closing the backing device/file that is currently in use. */ /*ARGSUSED*/ static void xdb_watch_media_req_cb(dev_info_t *dip, const char *path, void *arg) { xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip); char *oename, *str; mutex_enter(&vdp->xs_cbmutex); if ((oename = xvdi_get_oename(dip)) == NULL) { mutex_exit(&vdp->xs_cbmutex); return; } if (xenbus_read_str(oename, XBP_MEDIA_REQ, &str) != 0) { mutex_exit(&vdp->xs_cbmutex); return; } if (!XDB_IS_FE_CD(vdp)) { xvdi_dev_error(dip, EINVAL, "media-req only supported for cdrom devices"); mutex_exit(&vdp->xs_cbmutex); return; } if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) { mutex_exit(&vdp->xs_cbmutex); strfree(str); return; } strfree(str); XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "media eject request")); xdb_params_change(vdp, "", B_TRUE); (void) xenbus_printf(XBT_NULL, oename, XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE); mutex_exit(&vdp->xs_cbmutex); } /* * If we're dealing with a cdrom device, let the frontend know that * we support media requests via XBP_MEDIA_REQ_SUP, and setup a watch * to handle those frontend media request changes, which modify the * following xenstore parameter: * /local/domain//device/vbd//media-req */ static boolean_t xdb_media_req_init(xdb_t *vdp) { dev_info_t *dip = vdp->xs_dip; char *xsname, *oename; ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); if (((xsname = xvdi_get_xsname(dip)) == NULL) || ((oename = xvdi_get_oename(dip)) == NULL)) return (B_FALSE); if (!XDB_IS_FE_CD(vdp)) return (B_TRUE); if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ_SUP, "%d", 1) != 0) return (B_FALSE); if (xvdi_add_xb_watch_handler(dip, oename, XBP_MEDIA_REQ, xdb_watch_media_req_cb, NULL) != DDI_SUCCESS) { xvdi_dev_error(dip, EAGAIN, "Failed to register watch for cdrom media requests"); return (B_FALSE); } return (B_TRUE); } /* * Get our params value. Also, if we're using "params" then setup a * watch to handle xm block-configure operations which modify the * following xenstore parameter: * /local/domain/0/backend/vbd///params */ static boolean_t xdb_params_init(xdb_t *vdp) { dev_info_t *dip = vdp->xs_dip; char *str, *xsname; int err, watch_params = B_FALSE; ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); ASSERT(vdp->xs_params_path == NULL); if ((xsname = xvdi_get_xsname(dip)) == NULL) return (B_FALSE); if ((err = xenbus_read_str(xsname, "dynamic-device-path", &str)) == ENOENT) { err = xenbus_read_str(xsname, "params", &str); watch_params = B_TRUE; } if (err != 0) return (B_FALSE); vdp->xs_params_path = str; /* * If we got our backing store path from "dynamic-device-path" then * there's no reason to watch "params" */ if (!watch_params) return (B_TRUE); if (xvdi_add_xb_watch_handler(dip, xsname, "params", xdb_watch_params_cb, NULL) != DDI_SUCCESS) { strfree(vdp->xs_params_path); vdp->xs_params_path = NULL; return (B_FALSE); } return (B_TRUE); } #define LOFI_CTRL_NODE "/dev/lofictl" #define LOFI_DEV_NODE "/devices/pseudo/lofi@0:" #define LOFI_MODE (FREAD | FWRITE | FEXCL) static int xdb_setup_node(xdb_t *vdp, char *path) { dev_info_t *dip = vdp->xs_dip; char *xsname, *str; ldi_handle_t ldi_hdl; struct lofi_ioctl *li; int minor, err; ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); if ((xsname = xvdi_get_xsname(dip)) == NULL) return (DDI_FAILURE); if ((err = xenbus_read_str(xsname, "type", &str)) != 0) { xvdi_dev_error(dip, err, "Getting type from backend device"); return (DDI_FAILURE); } if (strcmp(str, "file") == 0) vdp->xs_type |= XDB_DEV_BE_LOFI; strfree(str); if (!XDB_IS_BE_LOFI(vdp)) { (void) strlcpy(path, vdp->xs_params_path, MAXPATHLEN); ASSERT(vdp->xs_lofi_path == NULL); return (DDI_SUCCESS); } do { err = ldi_open_by_name(LOFI_CTRL_NODE, LOFI_MODE, kcred, &ldi_hdl, vdp->xs_ldi_li); } while (err == EBUSY); if (err != 0) { return (DDI_FAILURE); } li = kmem_zalloc(sizeof (*li), KM_SLEEP); (void) strlcpy(li->li_filename, vdp->xs_params_path, sizeof (li->li_filename)); err = ldi_ioctl(ldi_hdl, LOFI_MAP_FILE, (intptr_t)li, LOFI_MODE | FKIOCTL, kcred, &minor); (void) ldi_close(ldi_hdl, LOFI_MODE, kcred); kmem_free(li, sizeof (*li)); if (err != 0) { cmn_err(CE_WARN, "xdb@%s: Failed to create lofi dev for %s", ddi_get_name_addr(dip), vdp->xs_params_path); return (DDI_FAILURE); } /* * return '/devices/...' instead of '/dev/lofi/...' since the * former is available immediately after calling ldi_ioctl */ (void) snprintf(path, MAXPATHLEN, LOFI_DEV_NODE "%d", minor); (void) xenbus_printf(XBT_NULL, xsname, "node", "%s", path); ASSERT(vdp->xs_lofi_path == NULL); vdp->xs_lofi_path = strdup(path); return (DDI_SUCCESS); } static void xdb_teardown_node(xdb_t *vdp) { dev_info_t *dip = vdp->xs_dip; ldi_handle_t ldi_hdl; struct lofi_ioctl *li; int err; ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); if (!XDB_IS_BE_LOFI(vdp)) return; vdp->xs_type &= ~XDB_DEV_BE_LOFI; ASSERT(vdp->xs_lofi_path != NULL); li = kmem_zalloc(sizeof (*li), KM_SLEEP); (void) strlcpy(li->li_filename, vdp->xs_params_path, sizeof (li->li_filename)); do { err = ldi_open_by_name(LOFI_CTRL_NODE, LOFI_MODE, kcred, &ldi_hdl, vdp->xs_ldi_li); } while (err == EBUSY); if (err != 0) { kmem_free(li, sizeof (*li)); return; } if (ldi_ioctl(ldi_hdl, LOFI_UNMAP_FILE, (intptr_t)li, LOFI_MODE | FKIOCTL, kcred, NULL) != 0) { cmn_err(CE_WARN, "xdb@%s: Failed to delete lofi dev for %s", ddi_get_name_addr(dip), li->li_filename); } (void) ldi_close(ldi_hdl, LOFI_MODE, kcred); kmem_free(li, sizeof (*li)); strfree(vdp->xs_lofi_path); vdp->xs_lofi_path = NULL; } static int xdb_open_device(xdb_t *vdp) { dev_info_t *dip = vdp->xs_dip; uint64_t devsize; int blksize; char *nodepath; ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); if (strlen(vdp->xs_params_path) == 0) { /* * it's possible to have no backing device when dealing * with a pv cdrom drive that has no virtual cd associated * with it. */ ASSERT(XDB_IS_FE_CD(vdp)); ASSERT(vdp->xs_sectors == 0); ASSERT(vdp->xs_ldi_li == NULL); ASSERT(vdp->xs_ldi_hdl == NULL); return (DDI_SUCCESS); } if (ldi_ident_from_dip(dip, &vdp->xs_ldi_li) != 0) return (DDI_FAILURE); nodepath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); /* try to open backend device */ if (xdb_setup_node(vdp, nodepath) != DDI_SUCCESS) { xvdi_dev_error(dip, ENXIO, "Getting device path of backend device"); ldi_ident_release(vdp->xs_ldi_li); kmem_free(nodepath, MAXPATHLEN); return (DDI_FAILURE); } if (ldi_open_by_name(nodepath, FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred, &vdp->xs_ldi_hdl, vdp->xs_ldi_li) != 0) { xdb_teardown_node(vdp); ldi_ident_release(vdp->xs_ldi_li); cmn_err(CE_WARN, "xdb@%s: Failed to open: %s", ddi_get_name_addr(dip), nodepath); kmem_free(nodepath, MAXPATHLEN); return (DDI_FAILURE); } if (ldi_get_size(vdp->xs_ldi_hdl, &devsize) != DDI_SUCCESS) { (void) ldi_close(vdp->xs_ldi_hdl, FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred); xdb_teardown_node(vdp); ldi_ident_release(vdp->xs_ldi_li); kmem_free(nodepath, MAXPATHLEN); return (DDI_FAILURE); } blksize = ldi_prop_get_int64(vdp->xs_ldi_hdl, DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "blksize", DEV_BSIZE); if (blksize == DEV_BSIZE) blksize = ldi_prop_get_int(vdp->xs_ldi_hdl, LDI_DEV_T_ANY | DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "device-blksize", DEV_BSIZE); vdp->xs_sec_size = blksize; vdp->xs_sectors = devsize / blksize; /* check if the underlying device is a CD/DVD disc */ if (ldi_prop_get_int(vdp->xs_ldi_hdl, LDI_DEV_T_ANY | DDI_PROP_DONTPASS, INQUIRY_DEVICE_TYPE, DTYPE_DIRECT) == DTYPE_RODIRECT) vdp->xs_type |= XDB_DEV_BE_CD; /* check if the underlying device is a removable disk */ if (ldi_prop_exists(vdp->xs_ldi_hdl, LDI_DEV_T_ANY | DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "removable-media")) vdp->xs_type |= XDB_DEV_BE_RMB; kmem_free(nodepath, MAXPATHLEN); return (DDI_SUCCESS); } static void xdb_close_device(xdb_t *vdp) { ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); if (strlen(vdp->xs_params_path) == 0) { ASSERT(XDB_IS_FE_CD(vdp)); ASSERT(vdp->xs_sectors == 0); ASSERT(vdp->xs_ldi_li == NULL); ASSERT(vdp->xs_ldi_hdl == NULL); return; } (void) ldi_close(vdp->xs_ldi_hdl, FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred); xdb_teardown_node(vdp); ldi_ident_release(vdp->xs_ldi_li); vdp->xs_type &= ~(XDB_DEV_BE_CD | XDB_DEV_BE_RMB); vdp->xs_sectors = 0; vdp->xs_ldi_li = NULL; vdp->xs_ldi_hdl = NULL; } /* * Kick-off connect process * If xs_fe_initialised == B_TRUE and xs_hp_connected == B_TRUE * the xs_if_connected will be changed to B_TRUE on success, */ static void xdb_start_connect(xdb_t *vdp) { xenbus_transaction_t xbt; dev_info_t *dip = vdp->xs_dip; boolean_t fb_exists; int err, instance = ddi_get_instance(dip); uint64_t sectors; uint_t dinfo, ssize; char *xsname; ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); if (((xsname = xvdi_get_xsname(dip)) == NULL) || ((vdp->xs_peer = xvdi_get_oeid(dip)) == (domid_t)-1)) return; mutex_enter(&vdp->xs_iomutex); /* * if the hotplug scripts haven't run or if the frontend is not * initialized, then we can't try to connect. */ if (!vdp->xs_hp_connected || !vdp->xs_fe_initialised) { ASSERT(!vdp->xs_if_connected); mutex_exit(&vdp->xs_iomutex); return; } /* If we're already connected then there's nothing todo */ if (vdp->xs_if_connected) { mutex_exit(&vdp->xs_iomutex); return; } mutex_exit(&vdp->xs_iomutex); /* * Start connect to frontend only when backend device are ready * and frontend has moved to XenbusStateInitialised, which means * ready to connect. */ XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: starting connection process", ddi_get_name_addr(dip))); if (xdb_open_device(vdp) != DDI_SUCCESS) return; if (xdb_bindto_frontend(vdp) != DDI_SUCCESS) { xdb_close_device(vdp); return; } /* init i/o requests */ xdb_init_ioreqs(vdp); if (ddi_add_intr(dip, 0, NULL, NULL, xdb_intr, (caddr_t)vdp) != DDI_SUCCESS) { xdb_uninit_ioreqs(vdp); xdb_unbindfrom_frontend(vdp); xdb_close_device(vdp); return; } dinfo = 0; if (XDB_IS_RO(vdp)) dinfo |= VDISK_READONLY; if (XDB_IS_BE_RMB(vdp)) dinfo |= VDISK_REMOVABLE; if (XDB_IS_BE_CD(vdp)) dinfo |= VDISK_CDROM; if (XDB_IS_FE_CD(vdp)) dinfo |= VDISK_REMOVABLE | VDISK_CDROM; /* * we can recieve intr any time from now on * mark that we're ready to take intr */ mutex_enter(&vdp->xs_iomutex); ASSERT(vdp->xs_fe_initialised); vdp->xs_if_connected = B_TRUE; mutex_exit(&vdp->xs_iomutex); trans_retry: /* write into xenstore the info needed by frontend */ if ((err = xenbus_transaction_start(&xbt)) != 0) { xvdi_dev_error(dip, err, "connect transaction init"); goto errout; } /* If feature-barrier isn't present in xenstore, add it. */ fb_exists = xenbus_exists(xsname, XBP_FB); ssize = (vdp->xs_sec_size == 0) ? DEV_BSIZE : vdp->xs_sec_size; sectors = vdp->xs_sectors; if (((!fb_exists && (err = xenbus_printf(xbt, xsname, XBP_FB, "%d", 1)))) || (err = xenbus_printf(xbt, xsname, XBP_INFO, "%u", dinfo)) || (err = xenbus_printf(xbt, xsname, XBP_SECTOR_SIZE, "%u", ssize)) || (err = xenbus_printf(xbt, xsname, XBP_SECTORS, "%"PRIu64, sectors)) || (err = xenbus_printf(xbt, xsname, "instance", "%d", instance)) || ((err = xvdi_switch_state(dip, xbt, XenbusStateConnected)) > 0)) { (void) xenbus_transaction_end(xbt, 1); xvdi_dev_error(dip, err, "connect transaction setup"); goto errout; } if ((err = xenbus_transaction_end(xbt, 0)) != 0) { if (err == EAGAIN) { /* transaction is ended, don't need to abort it */ goto trans_retry; } xvdi_dev_error(dip, err, "connect transaction commit"); goto errout; } return; errout: xdb_close(dip); } /* * Disconnect from frontend and close backend device */ static void xdb_close(dev_info_t *dip) { xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip); ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); mutex_enter(&vdp->xs_iomutex); /* * if the hotplug scripts haven't run or if the frontend is not * initialized, then we can't be connected, so there's no * connection to close. */ if (!vdp->xs_hp_connected || !vdp->xs_fe_initialised) { ASSERT(!vdp->xs_if_connected); mutex_exit(&vdp->xs_iomutex); return; } /* if we're not connected, there's nothing to do */ if (!vdp->xs_if_connected) { cv_broadcast(&vdp->xs_iocv); mutex_exit(&vdp->xs_iomutex); return; } XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "closing while connected")); vdp->xs_if_connected = B_FALSE; cv_broadcast(&vdp->xs_iocv); mutex_exit(&vdp->xs_iomutex); /* stop accepting I/O request from frontend */ ddi_remove_intr(dip, 0, NULL); /* clear all on-going I/Os, if any */ mutex_enter(&vdp->xs_iomutex); while (vdp->xs_ionum > 0) cv_wait(&vdp->xs_ionumcv, &vdp->xs_iomutex); mutex_exit(&vdp->xs_iomutex); /* clean up resources and close this interface */ xdb_uninit_ioreqs(vdp); xdb_unbindfrom_frontend(vdp); xdb_close_device(vdp); vdp->xs_peer = (domid_t)-1; } static void xdb_send_buf(void *arg) { xdb_t *vdp = (xdb_t *)arg; buf_t *bp; int err; mutex_enter(&vdp->xs_iomutex); while (vdp->xs_send_buf) { if ((bp = vdp->xs_f_iobuf) == NULL) { /* wait for some io to send */ XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "send buf waiting for io")); cv_wait(&vdp->xs_iocv, &vdp->xs_iomutex); continue; } vdp->xs_f_iobuf = bp->av_forw; bp->av_forw = NULL; vdp->xs_ionum++; mutex_exit(&vdp->xs_iomutex); if (bp->b_bcount == 0) { /* no I/O needs to be done */ (void) xdb_biodone(bp); mutex_enter(&vdp->xs_iomutex); continue; } err = EIO; if (vdp->xs_ldi_hdl != NULL) err = ldi_strategy(vdp->xs_ldi_hdl, bp); if (err != 0) { bp->b_flags |= B_ERROR; (void) xdb_biodone(bp); XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: sent buf to backend devfailed, err=%d", ddi_get_name_addr(vdp->xs_dip), err)); } else { XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "sent buf to backend ok")); } mutex_enter(&vdp->xs_iomutex); } XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "send buf finishing")); mutex_exit(&vdp->xs_iomutex); } /*ARGSUSED*/ static void xdb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) { xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data; xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip); XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: " "hotplug status change to %d!", ddi_get_name_addr(dip), state)); if (state != Connected) return; mutex_enter(&vdp->xs_cbmutex); /* If hotplug script have already run, there's nothing todo */ if (vdp->xs_hp_connected) { mutex_exit(&vdp->xs_cbmutex); return; } vdp->xs_hp_connected = B_TRUE; xdb_start_connect(vdp); mutex_exit(&vdp->xs_cbmutex); } /*ARGSUSED*/ static void xdb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) { XenbusState new_state = *(XenbusState *)impl_data; xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip); XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: " "otherend state change to %d!", ddi_get_name_addr(dip), new_state)); mutex_enter(&vdp->xs_cbmutex); /* * Now it'd really be nice if there was a well defined state * transition model for xen frontend drivers, but unfortunatly * there isn't. So we're stuck with assuming that all state * transitions are possible, and we'll just have to deal with * them regardless of what state we're in. */ switch (new_state) { case XenbusStateUnknown: case XenbusStateInitialising: case XenbusStateInitWait: /* tear down our connection to the frontend */ xdb_close(dip); vdp->xs_fe_initialised = B_FALSE; break; case XenbusStateInitialised: /* * If we were conected, then we need to drop the connection * and re-negotiate it. */ xdb_close(dip); vdp->xs_fe_initialised = B_TRUE; xdb_start_connect(vdp); break; case XenbusStateConnected: /* nothing todo here other than congratulate the frontend */ break; case XenbusStateClosing: /* monkey see monkey do */ (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing); break; case XenbusStateClosed: /* tear down our connection to the frontend */ xdb_close(dip); vdp->xs_fe_initialised = B_FALSE; (void) xvdi_switch_state(dip, XBT_NULL, new_state); break; } mutex_exit(&vdp->xs_cbmutex); } static int xdb_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) { ddi_iblock_cookie_t ibc; xdb_t *vdp; int instance = ddi_get_instance(dip); char *xsname, *oename; char *str; switch (cmd) { case DDI_RESUME: return (DDI_FAILURE); case DDI_ATTACH: break; default: return (DDI_FAILURE); } /* DDI_ATTACH */ if (((xsname = xvdi_get_xsname(dip)) == NULL) || ((oename = xvdi_get_oename(dip)) == NULL)) return (DDI_FAILURE); /* * Disable auto-detach. This is necessary so that we don't get * detached while we're disconnected from the front end. */ (void) ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1); if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS) return (DDI_FAILURE); if (ddi_soft_state_zalloc(xdb_statep, instance) != DDI_SUCCESS) return (DDI_FAILURE); vdp = ddi_get_soft_state(xdb_statep, instance); vdp->xs_dip = dip; mutex_init(&vdp->xs_iomutex, NULL, MUTEX_DRIVER, (void *)ibc); mutex_init(&vdp->xs_cbmutex, NULL, MUTEX_DRIVER, (void *)ibc); cv_init(&vdp->xs_iocv, NULL, CV_DRIVER, NULL); cv_init(&vdp->xs_ionumcv, NULL, CV_DRIVER, NULL); ddi_set_driver_private(dip, vdp); if (!xdb_kstat_init(vdp)) goto errout1; /* Check if the frontend device is supposed to be a cdrom */ if (xenbus_read_str(oename, XBP_DEV_TYPE, &str) != 0) return (DDI_FAILURE); if (strcmp(str, XBV_DEV_TYPE_CD) == 0) vdp->xs_type |= XDB_DEV_FE_CD; strfree(str); /* Check if the frontend device is supposed to be read only */ if (xenbus_read_str(xsname, "mode", &str) != 0) return (DDI_FAILURE); if ((strcmp(str, "r") == NULL) || (strcmp(str, "ro") == NULL)) vdp->xs_type |= XDB_DEV_RO; strfree(str); mutex_enter(&vdp->xs_cbmutex); if (!xdb_media_req_init(vdp) || !xdb_params_init(vdp)) { xvdi_remove_xb_watch_handlers(dip); mutex_exit(&vdp->xs_cbmutex); goto errout2; } mutex_exit(&vdp->xs_cbmutex); vdp->xs_send_buf = B_TRUE; vdp->xs_iotaskq = ddi_taskq_create(dip, "xdb_iotask", 1, TASKQ_DEFAULTPRI, 0); (void) ddi_taskq_dispatch(vdp->xs_iotaskq, xdb_send_buf, vdp, DDI_SLEEP); /* Watch frontend and hotplug state change */ if ((xvdi_add_event_handler(dip, XS_OE_STATE, xdb_oe_state_change, NULL) != DDI_SUCCESS) || (xvdi_add_event_handler(dip, XS_HP_STATE, xdb_hp_state_change, NULL) != DDI_SUCCESS)) goto errout3; /* * Kick-off hotplug script */ if (xvdi_post_event(dip, XEN_HP_ADD) != DDI_SUCCESS) { cmn_err(CE_WARN, "xdb@%s: failed to start hotplug script", ddi_get_name_addr(dip)); goto errout3; } /* * start waiting for hotplug event and otherend state event * mainly for debugging, frontend will not take any op seeing this */ (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait); XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: attached!", ddi_get_name_addr(dip))); return (DDI_SUCCESS); errout3: ASSERT(vdp->xs_hp_connected && vdp->xs_if_connected); xvdi_remove_event_handler(dip, NULL); /* Disconnect from the backend */ mutex_enter(&vdp->xs_cbmutex); mutex_enter(&vdp->xs_iomutex); vdp->xs_send_buf = B_FALSE; cv_broadcast(&vdp->xs_iocv); mutex_exit(&vdp->xs_iomutex); mutex_exit(&vdp->xs_cbmutex); /* wait for all io to dtrain and destroy io taskq */ ddi_taskq_destroy(vdp->xs_iotaskq); /* tear down block-configure watch */ mutex_enter(&vdp->xs_cbmutex); xvdi_remove_xb_watch_handlers(dip); mutex_exit(&vdp->xs_cbmutex); errout2: /* remove kstats */ kstat_delete(vdp->xs_kstats); errout1: /* free up driver state */ ddi_set_driver_private(dip, NULL); cv_destroy(&vdp->xs_iocv); cv_destroy(&vdp->xs_ionumcv); mutex_destroy(&vdp->xs_cbmutex); mutex_destroy(&vdp->xs_iomutex); ddi_soft_state_free(xdb_statep, instance); return (DDI_FAILURE); } /*ARGSUSED*/ static int xdb_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) { int instance = ddi_get_instance(dip); xdb_t *vdp = XDB_INST2SOFTS(instance); switch (cmd) { case DDI_SUSPEND: return (DDI_FAILURE); case DDI_DETACH: break; default: return (DDI_FAILURE); } /* DDI_DETACH handling */ /* refuse to detach if we're still in use by the frontend */ mutex_enter(&vdp->xs_iomutex); if (vdp->xs_if_connected) { mutex_exit(&vdp->xs_iomutex); return (DDI_FAILURE); } vdp->xs_send_buf = B_FALSE; cv_broadcast(&vdp->xs_iocv); mutex_exit(&vdp->xs_iomutex); xvdi_remove_event_handler(dip, NULL); (void) xvdi_post_event(dip, XEN_HP_REMOVE); ddi_taskq_destroy(vdp->xs_iotaskq); mutex_enter(&vdp->xs_cbmutex); xvdi_remove_xb_watch_handlers(dip); mutex_exit(&vdp->xs_cbmutex); cv_destroy(&vdp->xs_iocv); cv_destroy(&vdp->xs_ionumcv); mutex_destroy(&vdp->xs_cbmutex); mutex_destroy(&vdp->xs_iomutex); kstat_delete(vdp->xs_kstats); ddi_set_driver_private(dip, NULL); ddi_soft_state_free(xdb_statep, instance); XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: detached!", ddi_get_name_addr(dip))); return (DDI_SUCCESS); } static struct dev_ops xdb_dev_ops = { DEVO_REV, /* devo_rev */ 0, /* devo_refcnt */ ddi_getinfo_1to1, /* devo_getinfo */ nulldev, /* devo_identify */ nulldev, /* devo_probe */ xdb_attach, /* devo_attach */ xdb_detach, /* devo_detach */ nodev, /* devo_reset */ NULL, /* devo_cb_ops */ NULL, /* devo_bus_ops */ NULL, /* power */ ddi_quiesce_not_needed, /* quiesce */ }; /* * Module linkage information for the kernel. */ static struct modldrv modldrv = { &mod_driverops, /* Type of module. */ "vbd backend driver", /* Name of the module */ &xdb_dev_ops /* driver ops */ }; static struct modlinkage xdb_modlinkage = { MODREV_1, &modldrv, NULL }; int _init(void) { int rv; if ((rv = ddi_soft_state_init((void **)&xdb_statep, sizeof (xdb_t), 0)) == 0) if ((rv = mod_install(&xdb_modlinkage)) != 0) ddi_soft_state_fini((void **)&xdb_statep); return (rv); } int _fini(void) { int rv; if ((rv = mod_remove(&xdb_modlinkage)) != 0) return (rv); ddi_soft_state_fini((void **)&xdb_statep); return (rv); } int _info(struct modinfo *modinfop) { return (mod_info(&xdb_modlinkage, modinfop)); } static int xdb_get_request(xdb_t *vdp, blkif_request_t *req) { void *src = xvdi_ring_get_request(vdp->xs_ring); if (src == NULL) return (0); switch (vdp->xs_blk_protocol) { case BLKIF_PROTOCOL_NATIVE: (void) memcpy(req, src, sizeof (*req)); break; case BLKIF_PROTOCOL_X86_32: blkif_get_x86_32_req(req, src); break; case BLKIF_PROTOCOL_X86_64: blkif_get_x86_64_req(req, src); break; default: cmn_err(CE_PANIC, "xdb@%s: unrecognised protocol: %d", ddi_get_name_addr(vdp->xs_dip), vdp->xs_blk_protocol); } return (1); } static int xdb_push_response(xdb_t *vdp, uint64_t id, uint8_t op, uint16_t status) { ddi_acc_handle_t acchdl = vdp->xs_ring_hdl; blkif_response_t *rsp = xvdi_ring_get_response(vdp->xs_ring); blkif_x86_32_response_t *rsp_32 = (blkif_x86_32_response_t *)rsp; blkif_x86_64_response_t *rsp_64 = (blkif_x86_64_response_t *)rsp; ASSERT(rsp); switch (vdp->xs_blk_protocol) { case BLKIF_PROTOCOL_NATIVE: ddi_put64(acchdl, &rsp->id, id); ddi_put8(acchdl, &rsp->operation, op); ddi_put16(acchdl, (uint16_t *)&rsp->status, status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR); break; case BLKIF_PROTOCOL_X86_32: ddi_put64(acchdl, &rsp_32->id, id); ddi_put8(acchdl, &rsp_32->operation, op); ddi_put16(acchdl, (uint16_t *)&rsp_32->status, status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR); break; case BLKIF_PROTOCOL_X86_64: ddi_put64(acchdl, &rsp_64->id, id); ddi_put8(acchdl, &rsp_64->operation, op); ddi_put16(acchdl, (uint16_t *)&rsp_64->status, status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR); break; default: cmn_err(CE_PANIC, "xdb@%s: unrecognised protocol: %d", ddi_get_name_addr(vdp->xs_dip), vdp->xs_blk_protocol); } return (xvdi_ring_push_response(vdp->xs_ring)); } static void blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_request_t *src) { int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; dst->operation = src->operation; dst->nr_segments = src->nr_segments; dst->handle = src->handle; dst->id = src->id; dst->sector_number = src->sector_number; if (n > src->nr_segments) n = src->nr_segments; for (i = 0; i < n; i++) dst->seg[i] = src->seg[i]; } static void blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_request_t *src) { int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; dst->operation = src->operation; dst->nr_segments = src->nr_segments; dst->handle = src->handle; dst->id = src->id; dst->sector_number = src->sector_number; if (n > src->nr_segments) n = src->nr_segments; for (i = 0; i < n; i++) dst->seg[i] = src->seg[i]; }