17eea693dSMark Johnson /* 27eea693dSMark Johnson * CDDL HEADER START 37eea693dSMark Johnson * 47eea693dSMark Johnson * The contents of this file are subject to the terms of the 57eea693dSMark Johnson * Common Development and Distribution License (the "License"). 67eea693dSMark Johnson * You may not use this file except in compliance with the License. 77eea693dSMark Johnson * 87eea693dSMark Johnson * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97eea693dSMark Johnson * or http://www.opensolaris.org/os/licensing. 107eea693dSMark Johnson * See the License for the specific language governing permissions 117eea693dSMark Johnson * and limitations under the License. 127eea693dSMark Johnson * 137eea693dSMark Johnson * When distributing Covered Code, include this CDDL HEADER in each 147eea693dSMark Johnson * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157eea693dSMark Johnson * If applicable, add the following below this CDDL HEADER, with the 167eea693dSMark Johnson * fields enclosed by brackets "[]" replaced with your own identifying 177eea693dSMark Johnson * information: Portions Copyright [yyyy] [name of copyright owner] 187eea693dSMark Johnson * 197eea693dSMark Johnson * CDDL HEADER END 207eea693dSMark Johnson */ 217eea693dSMark Johnson 227eea693dSMark Johnson /* 23d3d50737SRafael Vanoni * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 247eea693dSMark Johnson * Use is subject to license terms. 257eea693dSMark Johnson */ 267eea693dSMark Johnson 277eea693dSMark Johnson 287eea693dSMark Johnson #include <sys/errno.h> 297eea693dSMark Johnson #include <sys/types.h> 307eea693dSMark Johnson #include <sys/conf.h> 317eea693dSMark Johnson #include <sys/kmem.h> 327eea693dSMark Johnson #include <sys/ddi.h> 337eea693dSMark Johnson #include <sys/stat.h> 347eea693dSMark Johnson #include <sys/sunddi.h> 357eea693dSMark Johnson #include <sys/file.h> 367eea693dSMark Johnson #include <sys/open.h> 377eea693dSMark Johnson #include <sys/modctl.h> 387eea693dSMark Johnson #include <sys/ddi_impldefs.h> 397eea693dSMark Johnson #include <sys/sysmacros.h> 407eea693dSMark Johnson #include <sys/ddidevmap.h> 417eea693dSMark Johnson #include <sys/policy.h> 427eea693dSMark Johnson 437eea693dSMark Johnson #include <sys/vmsystm.h> 447eea693dSMark Johnson #include <vm/hat_i86.h> 457eea693dSMark Johnson #include <vm/hat_pte.h> 467eea693dSMark Johnson #include <vm/seg_kmem.h> 477eea693dSMark Johnson #include <vm/seg_mf.h> 487eea693dSMark Johnson 497eea693dSMark Johnson #include <xen/io/blkif_impl.h> 507eea693dSMark Johnson #include <xen/io/blk_common.h> 517eea693dSMark Johnson #include <xen/io/xpvtap.h> 527eea693dSMark Johnson 537eea693dSMark Johnson 547eea693dSMark Johnson static int xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred); 557eea693dSMark Johnson static int xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred); 567eea693dSMark Johnson static int xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 577eea693dSMark Johnson cred_t *cred, int *rval); 587eea693dSMark Johnson static int xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, 597eea693dSMark Johnson size_t len, size_t *maplen, uint_t model); 607eea693dSMark Johnson static int xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp, 617eea693dSMark Johnson off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags, 627eea693dSMark Johnson cred_t *cred_p); 637eea693dSMark Johnson static int xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 647eea693dSMark Johnson struct pollhead **phpp); 657eea693dSMark Johnson 667eea693dSMark Johnson static struct cb_ops xpvtap_cb_ops = { 677eea693dSMark Johnson xpvtap_open, /* cb_open */ 687eea693dSMark Johnson xpvtap_close, /* cb_close */ 697eea693dSMark Johnson nodev, /* cb_strategy */ 707eea693dSMark Johnson nodev, /* cb_print */ 717eea693dSMark Johnson nodev, /* cb_dump */ 727eea693dSMark Johnson nodev, /* cb_read */ 737eea693dSMark Johnson nodev, /* cb_write */ 747eea693dSMark Johnson xpvtap_ioctl, /* cb_ioctl */ 757eea693dSMark Johnson xpvtap_devmap, /* cb_devmap */ 767eea693dSMark Johnson nodev, /* cb_mmap */ 777eea693dSMark Johnson xpvtap_segmap, /* cb_segmap */ 787eea693dSMark Johnson xpvtap_chpoll, /* cb_chpoll */ 797eea693dSMark Johnson ddi_prop_op, /* cb_prop_op */ 807eea693dSMark Johnson NULL, /* cb_stream */ 817eea693dSMark Johnson D_NEW | D_MP | D_64BIT | D_DEVMAP, /* cb_flag */ 827eea693dSMark Johnson CB_REV 837eea693dSMark Johnson }; 847eea693dSMark Johnson 857eea693dSMark Johnson static int xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, 867eea693dSMark Johnson void **result); 877eea693dSMark Johnson static int xpvtap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd); 887eea693dSMark Johnson static int xpvtap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd); 897eea693dSMark Johnson 907eea693dSMark Johnson static struct dev_ops xpvtap_dev_ops = { 917eea693dSMark Johnson DEVO_REV, /* devo_rev */ 927eea693dSMark Johnson 0, /* devo_refcnt */ 937eea693dSMark Johnson xpvtap_getinfo, /* devo_getinfo */ 947eea693dSMark Johnson nulldev, /* devo_identify */ 957eea693dSMark Johnson nulldev, /* devo_probe */ 967eea693dSMark Johnson xpvtap_attach, /* devo_attach */ 977eea693dSMark Johnson xpvtap_detach, /* devo_detach */ 987eea693dSMark Johnson nodev, /* devo_reset */ 997eea693dSMark Johnson &xpvtap_cb_ops, /* devo_cb_ops */ 1007eea693dSMark Johnson NULL, /* devo_bus_ops */ 1017eea693dSMark Johnson NULL /* power */ 1027eea693dSMark Johnson }; 1037eea693dSMark Johnson 1047eea693dSMark Johnson 1057eea693dSMark Johnson static struct modldrv xpvtap_modldrv = { 1067eea693dSMark Johnson &mod_driverops, /* Type of module. This one is a driver */ 1077eea693dSMark Johnson "xpvtap driver", /* Name of the module. */ 1087eea693dSMark Johnson &xpvtap_dev_ops, /* driver ops */ 1097eea693dSMark Johnson }; 1107eea693dSMark Johnson 1117eea693dSMark Johnson static struct modlinkage xpvtap_modlinkage = { 1127eea693dSMark Johnson MODREV_1, 1137eea693dSMark Johnson (void *) &xpvtap_modldrv, 1147eea693dSMark Johnson NULL 1157eea693dSMark Johnson }; 1167eea693dSMark Johnson 1177eea693dSMark Johnson 1187eea693dSMark Johnson void *xpvtap_statep; 1197eea693dSMark Johnson 1207eea693dSMark Johnson 1217eea693dSMark Johnson static xpvtap_state_t *xpvtap_drv_init(int instance); 1227eea693dSMark Johnson static void xpvtap_drv_fini(xpvtap_state_t *state); 1237eea693dSMark Johnson static uint_t xpvtap_intr(caddr_t arg); 1247eea693dSMark Johnson 1257eea693dSMark Johnson typedef void (*xpvtap_rs_cleanup_t)(xpvtap_state_t *state, uint_t rs); 1267eea693dSMark Johnson static void xpvtap_rs_init(uint_t min_val, uint_t max_val, 1277eea693dSMark Johnson xpvtap_rs_hdl_t *handle); 1287eea693dSMark Johnson static void xpvtap_rs_fini(xpvtap_rs_hdl_t *handle); 1297eea693dSMark Johnson static int xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *rs); 1307eea693dSMark Johnson static void xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t rs); 1317eea693dSMark Johnson static void xpvtap_rs_flush(xpvtap_rs_hdl_t handle, 1327eea693dSMark Johnson xpvtap_rs_cleanup_t callback, void *arg); 1337eea693dSMark Johnson 1347eea693dSMark Johnson static int xpvtap_segmf_register(xpvtap_state_t *state); 1357eea693dSMark Johnson static void xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event); 1367eea693dSMark Johnson 1377eea693dSMark Johnson static int xpvtap_user_init(xpvtap_state_t *state); 1387eea693dSMark Johnson static void xpvtap_user_fini(xpvtap_state_t *state); 1397eea693dSMark Johnson static int xpvtap_user_ring_init(xpvtap_state_t *state); 1407eea693dSMark Johnson static void xpvtap_user_ring_fini(xpvtap_state_t *state); 1417eea693dSMark Johnson static int xpvtap_user_thread_init(xpvtap_state_t *state); 1427eea693dSMark Johnson static void xpvtap_user_thread_fini(xpvtap_state_t *state); 1437eea693dSMark Johnson static void xpvtap_user_thread_start(caddr_t arg); 1447eea693dSMark Johnson static void xpvtap_user_thread_stop(xpvtap_state_t *state); 1457eea693dSMark Johnson static void xpvtap_user_thread(void *arg); 1467eea693dSMark Johnson 1477eea693dSMark Johnson static void xpvtap_user_app_stop(caddr_t arg); 1487eea693dSMark Johnson 1497eea693dSMark Johnson static int xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req, 1507eea693dSMark Johnson uint_t *uid); 1517eea693dSMark Johnson static int xpvtap_user_request_push(xpvtap_state_t *state, 1527eea693dSMark Johnson blkif_request_t *req, uint_t uid); 1537eea693dSMark Johnson static int xpvtap_user_response_get(xpvtap_state_t *state, 1547eea693dSMark Johnson blkif_response_t *resp, uint_t *uid); 1557eea693dSMark Johnson static void xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid); 1567eea693dSMark Johnson 1577eea693dSMark Johnson 1587eea693dSMark Johnson /* 1597eea693dSMark Johnson * _init() 1607eea693dSMark Johnson */ 1617eea693dSMark Johnson int 1627eea693dSMark Johnson _init(void) 1637eea693dSMark Johnson { 1647eea693dSMark Johnson int e; 1657eea693dSMark Johnson 1667eea693dSMark Johnson e = ddi_soft_state_init(&xpvtap_statep, sizeof (xpvtap_state_t), 1); 1677eea693dSMark Johnson if (e != 0) { 1687eea693dSMark Johnson return (e); 1697eea693dSMark Johnson } 1707eea693dSMark Johnson 1717eea693dSMark Johnson e = mod_install(&xpvtap_modlinkage); 1727eea693dSMark Johnson if (e != 0) { 1737eea693dSMark Johnson ddi_soft_state_fini(&xpvtap_statep); 1747eea693dSMark Johnson return (e); 1757eea693dSMark Johnson } 1767eea693dSMark Johnson 1777eea693dSMark Johnson return (0); 1787eea693dSMark Johnson } 1797eea693dSMark Johnson 1807eea693dSMark Johnson 1817eea693dSMark Johnson /* 1827eea693dSMark Johnson * _info() 1837eea693dSMark Johnson */ 1847eea693dSMark Johnson int 1857eea693dSMark Johnson _info(struct modinfo *modinfop) 1867eea693dSMark Johnson { 1877eea693dSMark Johnson return (mod_info(&xpvtap_modlinkage, modinfop)); 1887eea693dSMark Johnson } 1897eea693dSMark Johnson 1907eea693dSMark Johnson 1917eea693dSMark Johnson /* 1927eea693dSMark Johnson * _fini() 1937eea693dSMark Johnson */ 1947eea693dSMark Johnson int 1957eea693dSMark Johnson _fini(void) 1967eea693dSMark Johnson { 1977eea693dSMark Johnson int e; 1987eea693dSMark Johnson 1997eea693dSMark Johnson e = mod_remove(&xpvtap_modlinkage); 2007eea693dSMark Johnson if (e != 0) { 2017eea693dSMark Johnson return (e); 2027eea693dSMark Johnson } 2037eea693dSMark Johnson 2047eea693dSMark Johnson ddi_soft_state_fini(&xpvtap_statep); 2057eea693dSMark Johnson 2067eea693dSMark Johnson return (0); 2077eea693dSMark Johnson } 2087eea693dSMark Johnson 2097eea693dSMark Johnson 2107eea693dSMark Johnson /* 2117eea693dSMark Johnson * xpvtap_attach() 2127eea693dSMark Johnson */ 2137eea693dSMark Johnson static int 2147eea693dSMark Johnson xpvtap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 2157eea693dSMark Johnson { 2167eea693dSMark Johnson blk_ringinit_args_t args; 2177eea693dSMark Johnson xpvtap_state_t *state; 2187eea693dSMark Johnson int instance; 2197eea693dSMark Johnson int e; 2207eea693dSMark Johnson 2217eea693dSMark Johnson 2227eea693dSMark Johnson switch (cmd) { 2237eea693dSMark Johnson case DDI_ATTACH: 2247eea693dSMark Johnson break; 2257eea693dSMark Johnson 2267eea693dSMark Johnson case DDI_RESUME: 2277eea693dSMark Johnson return (DDI_SUCCESS); 2287eea693dSMark Johnson 2297eea693dSMark Johnson default: 2307eea693dSMark Johnson return (DDI_FAILURE); 2317eea693dSMark Johnson } 2327eea693dSMark Johnson 2337eea693dSMark Johnson /* initialize our state info */ 2347eea693dSMark Johnson instance = ddi_get_instance(dip); 2357eea693dSMark Johnson state = xpvtap_drv_init(instance); 2367eea693dSMark Johnson if (state == NULL) { 2377eea693dSMark Johnson return (DDI_FAILURE); 2387eea693dSMark Johnson } 2397eea693dSMark Johnson state->bt_dip = dip; 2407eea693dSMark Johnson 2417eea693dSMark Johnson /* Initialize the guest ring */ 2427eea693dSMark Johnson args.ar_dip = state->bt_dip; 2437eea693dSMark Johnson args.ar_intr = xpvtap_intr; 2447eea693dSMark Johnson args.ar_intr_arg = (caddr_t)state; 2457eea693dSMark Johnson args.ar_ringup = xpvtap_user_thread_start; 2467eea693dSMark Johnson args.ar_ringup_arg = (caddr_t)state; 2477eea693dSMark Johnson args.ar_ringdown = xpvtap_user_app_stop; 2487eea693dSMark Johnson args.ar_ringdown_arg = (caddr_t)state; 2497eea693dSMark Johnson e = blk_ring_init(&args, &state->bt_guest_ring); 2507eea693dSMark Johnson if (e != DDI_SUCCESS) { 2517eea693dSMark Johnson goto attachfail_ringinit; 2527eea693dSMark Johnson } 2537eea693dSMark Johnson 2547eea693dSMark Johnson /* create the minor node (for ioctl/mmap) */ 2557eea693dSMark Johnson e = ddi_create_minor_node(dip, "xpvtap", S_IFCHR, instance, 2567eea693dSMark Johnson DDI_PSEUDO, 0); 2577eea693dSMark Johnson if (e != DDI_SUCCESS) { 2587eea693dSMark Johnson goto attachfail_minor_node; 2597eea693dSMark Johnson } 2607eea693dSMark Johnson 2617eea693dSMark Johnson /* Report that driver was loaded */ 2627eea693dSMark Johnson ddi_report_dev(dip); 2637eea693dSMark Johnson 2647eea693dSMark Johnson return (DDI_SUCCESS); 2657eea693dSMark Johnson 2667eea693dSMark Johnson attachfail_minor_node: 2677eea693dSMark Johnson blk_ring_fini(&state->bt_guest_ring); 2687eea693dSMark Johnson attachfail_ringinit: 2697eea693dSMark Johnson xpvtap_drv_fini(state); 2707eea693dSMark Johnson return (DDI_FAILURE); 2717eea693dSMark Johnson } 2727eea693dSMark Johnson 2737eea693dSMark Johnson 2747eea693dSMark Johnson /* 2757eea693dSMark Johnson * xpvtap_detach() 2767eea693dSMark Johnson */ 2777eea693dSMark Johnson static int 2787eea693dSMark Johnson xpvtap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 2797eea693dSMark Johnson { 2807eea693dSMark Johnson xpvtap_state_t *state; 2817eea693dSMark Johnson int instance; 2827eea693dSMark Johnson 2837eea693dSMark Johnson 2847eea693dSMark Johnson instance = ddi_get_instance(dip); 2857eea693dSMark Johnson state = ddi_get_soft_state(xpvtap_statep, instance); 2867eea693dSMark Johnson if (state == NULL) { 2877eea693dSMark Johnson return (DDI_FAILURE); 2887eea693dSMark Johnson } 2897eea693dSMark Johnson 2907eea693dSMark Johnson switch (cmd) { 2917eea693dSMark Johnson case DDI_DETACH: 2927eea693dSMark Johnson break; 2937eea693dSMark Johnson 2947eea693dSMark Johnson case DDI_SUSPEND: 2957eea693dSMark Johnson default: 2967eea693dSMark Johnson return (DDI_FAILURE); 2977eea693dSMark Johnson } 2987eea693dSMark Johnson 2997eea693dSMark Johnson xpvtap_user_thread_stop(state); 3007eea693dSMark Johnson blk_ring_fini(&state->bt_guest_ring); 3017eea693dSMark Johnson xpvtap_drv_fini(state); 3027eea693dSMark Johnson ddi_remove_minor_node(dip, NULL); 3037eea693dSMark Johnson 3047eea693dSMark Johnson return (DDI_SUCCESS); 3057eea693dSMark Johnson } 3067eea693dSMark Johnson 3077eea693dSMark Johnson 3087eea693dSMark Johnson /* 3097eea693dSMark Johnson * xpvtap_getinfo() 3107eea693dSMark Johnson */ 3117eea693dSMark Johnson /*ARGSUSED*/ 3127eea693dSMark Johnson static int 3137eea693dSMark Johnson xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) 3147eea693dSMark Johnson { 3157eea693dSMark Johnson xpvtap_state_t *state; 3167eea693dSMark Johnson int instance; 3177eea693dSMark Johnson dev_t dev; 3187eea693dSMark Johnson int e; 3197eea693dSMark Johnson 3207eea693dSMark Johnson 3217eea693dSMark Johnson dev = (dev_t)arg; 3227eea693dSMark Johnson instance = getminor(dev); 3237eea693dSMark Johnson 3247eea693dSMark Johnson switch (cmd) { 3257eea693dSMark Johnson case DDI_INFO_DEVT2DEVINFO: 3267eea693dSMark Johnson state = ddi_get_soft_state(xpvtap_statep, instance); 3277eea693dSMark Johnson if (state == NULL) { 3287eea693dSMark Johnson return (DDI_FAILURE); 3297eea693dSMark Johnson } 3307eea693dSMark Johnson *result = (void *)state->bt_dip; 3317eea693dSMark Johnson e = DDI_SUCCESS; 3327eea693dSMark Johnson break; 3337eea693dSMark Johnson 3347eea693dSMark Johnson case DDI_INFO_DEVT2INSTANCE: 3357eea693dSMark Johnson *result = (void *)(uintptr_t)instance; 3367eea693dSMark Johnson e = DDI_SUCCESS; 3377eea693dSMark Johnson break; 3387eea693dSMark Johnson 3397eea693dSMark Johnson default: 3407eea693dSMark Johnson e = DDI_FAILURE; 3417eea693dSMark Johnson break; 3427eea693dSMark Johnson } 3437eea693dSMark Johnson 3447eea693dSMark Johnson return (e); 3457eea693dSMark Johnson } 3467eea693dSMark Johnson 3477eea693dSMark Johnson 3487eea693dSMark Johnson /* 3497eea693dSMark Johnson * xpvtap_open() 3507eea693dSMark Johnson */ 3517eea693dSMark Johnson /*ARGSUSED*/ 3527eea693dSMark Johnson static int 3537eea693dSMark Johnson xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred) 3547eea693dSMark Johnson { 3557eea693dSMark Johnson xpvtap_state_t *state; 3567eea693dSMark Johnson int instance; 3577eea693dSMark Johnson 3587eea693dSMark Johnson 3597eea693dSMark Johnson if (secpolicy_xvm_control(cred)) { 3607eea693dSMark Johnson return (EPERM); 3617eea693dSMark Johnson } 3627eea693dSMark Johnson 3637eea693dSMark Johnson instance = getminor(*devp); 3647eea693dSMark Johnson state = ddi_get_soft_state(xpvtap_statep, instance); 3657eea693dSMark Johnson if (state == NULL) { 3667eea693dSMark Johnson return (ENXIO); 3677eea693dSMark Johnson } 3687eea693dSMark Johnson 3697eea693dSMark Johnson /* we should only be opened once */ 3707eea693dSMark Johnson mutex_enter(&state->bt_open.bo_mutex); 3717eea693dSMark Johnson if (state->bt_open.bo_opened) { 3727eea693dSMark Johnson mutex_exit(&state->bt_open.bo_mutex); 3737eea693dSMark Johnson return (EBUSY); 3747eea693dSMark Johnson } 3757eea693dSMark Johnson state->bt_open.bo_opened = B_TRUE; 3767eea693dSMark Johnson mutex_exit(&state->bt_open.bo_mutex); 3777eea693dSMark Johnson 3787eea693dSMark Johnson /* 3797eea693dSMark Johnson * save the apps address space. need it for mapping/unmapping grefs 3807eea693dSMark Johnson * since will be doing it in a separate kernel thread. 3817eea693dSMark Johnson */ 3827eea693dSMark Johnson state->bt_map.um_as = curproc->p_as; 3837eea693dSMark Johnson 3847eea693dSMark Johnson return (0); 3857eea693dSMark Johnson } 3867eea693dSMark Johnson 3877eea693dSMark Johnson 3887eea693dSMark Johnson /* 3897eea693dSMark Johnson * xpvtap_close() 3907eea693dSMark Johnson */ 3917eea693dSMark Johnson /*ARGSUSED*/ 3927eea693dSMark Johnson static int 3937eea693dSMark Johnson xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred) 3947eea693dSMark Johnson { 3957eea693dSMark Johnson xpvtap_state_t *state; 3967eea693dSMark Johnson int instance; 3977eea693dSMark Johnson 3987eea693dSMark Johnson 3997eea693dSMark Johnson instance = getminor(devp); 4007eea693dSMark Johnson state = ddi_get_soft_state(xpvtap_statep, instance); 4017eea693dSMark Johnson if (state == NULL) { 4027eea693dSMark Johnson return (ENXIO); 4037eea693dSMark Johnson } 4047eea693dSMark Johnson 4057eea693dSMark Johnson /* 4067eea693dSMark Johnson * wake thread so it can cleanup and wait for it to exit so we can 4077eea693dSMark Johnson * be sure it's not in the middle of processing a request/response. 4087eea693dSMark Johnson */ 4097eea693dSMark Johnson mutex_enter(&state->bt_thread.ut_mutex); 4107eea693dSMark Johnson state->bt_thread.ut_wake = B_TRUE; 4117eea693dSMark Johnson state->bt_thread.ut_exit = B_TRUE; 4127eea693dSMark Johnson cv_signal(&state->bt_thread.ut_wake_cv); 4137eea693dSMark Johnson if (!state->bt_thread.ut_exit_done) { 4147eea693dSMark Johnson cv_wait(&state->bt_thread.ut_exit_done_cv, 4157eea693dSMark Johnson &state->bt_thread.ut_mutex); 4167eea693dSMark Johnson } 4177eea693dSMark Johnson ASSERT(state->bt_thread.ut_exit_done); 4187eea693dSMark Johnson mutex_exit(&state->bt_thread.ut_mutex); 4197eea693dSMark Johnson 4207eea693dSMark Johnson state->bt_map.um_as = NULL; 4217eea693dSMark Johnson state->bt_map.um_guest_pages = NULL; 4227eea693dSMark Johnson 4237eea693dSMark Johnson /* 4247eea693dSMark Johnson * when the ring is brought down, a userland hotplug script is run 4257eea693dSMark Johnson * which tries to bring the userland app down. We'll wait for a bit 4267eea693dSMark Johnson * for the user app to exit. Notify the thread waiting that the app 4277eea693dSMark Johnson * has closed the driver. 4287eea693dSMark Johnson */ 4297eea693dSMark Johnson mutex_enter(&state->bt_open.bo_mutex); 4307eea693dSMark Johnson ASSERT(state->bt_open.bo_opened); 4317eea693dSMark Johnson state->bt_open.bo_opened = B_FALSE; 4327eea693dSMark Johnson cv_signal(&state->bt_open.bo_exit_cv); 4337eea693dSMark Johnson mutex_exit(&state->bt_open.bo_mutex); 4347eea693dSMark Johnson 4357eea693dSMark Johnson return (0); 4367eea693dSMark Johnson } 4377eea693dSMark Johnson 4387eea693dSMark Johnson 4397eea693dSMark Johnson /* 4407eea693dSMark Johnson * xpvtap_ioctl() 4417eea693dSMark Johnson */ 4427eea693dSMark Johnson /*ARGSUSED*/ 4437eea693dSMark Johnson static int 4447eea693dSMark Johnson xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, 4457eea693dSMark Johnson int *rval) 4467eea693dSMark Johnson { 4477eea693dSMark Johnson xpvtap_state_t *state; 4487eea693dSMark Johnson int instance; 4497eea693dSMark Johnson 4507eea693dSMark Johnson 4517eea693dSMark Johnson if (secpolicy_xvm_control(cred)) { 4527eea693dSMark Johnson return (EPERM); 4537eea693dSMark Johnson } 4547eea693dSMark Johnson 4557eea693dSMark Johnson instance = getminor(dev); 4567eea693dSMark Johnson if (instance == -1) { 4577eea693dSMark Johnson return (EBADF); 4587eea693dSMark Johnson } 4597eea693dSMark Johnson 4607eea693dSMark Johnson state = ddi_get_soft_state(xpvtap_statep, instance); 4617eea693dSMark Johnson if (state == NULL) { 4627eea693dSMark Johnson return (EBADF); 4637eea693dSMark Johnson } 4647eea693dSMark Johnson 4657eea693dSMark Johnson switch (cmd) { 4667eea693dSMark Johnson case XPVTAP_IOCTL_RESP_PUSH: 4677eea693dSMark Johnson /* 4687eea693dSMark Johnson * wake thread, thread handles guest requests and user app 4697eea693dSMark Johnson * responses. 4707eea693dSMark Johnson */ 4717eea693dSMark Johnson mutex_enter(&state->bt_thread.ut_mutex); 4727eea693dSMark Johnson state->bt_thread.ut_wake = B_TRUE; 4737eea693dSMark Johnson cv_signal(&state->bt_thread.ut_wake_cv); 4747eea693dSMark Johnson mutex_exit(&state->bt_thread.ut_mutex); 4757eea693dSMark Johnson break; 4767eea693dSMark Johnson 4777eea693dSMark Johnson default: 4787eea693dSMark Johnson cmn_err(CE_WARN, "ioctl(%d) not supported\n", cmd); 4797eea693dSMark Johnson return (ENXIO); 4807eea693dSMark Johnson } 4817eea693dSMark Johnson 4827eea693dSMark Johnson return (0); 4837eea693dSMark Johnson } 4847eea693dSMark Johnson 4857eea693dSMark Johnson 4867eea693dSMark Johnson /* 4877eea693dSMark Johnson * xpvtap_segmap() 4887eea693dSMark Johnson */ 4897eea693dSMark Johnson /*ARGSUSED*/ 4907eea693dSMark Johnson static int 4917eea693dSMark Johnson xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp, 4927eea693dSMark Johnson off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags, 4937eea693dSMark Johnson cred_t *cred_p) 4947eea693dSMark Johnson { 4957eea693dSMark Johnson struct segmf_crargs a; 4967eea693dSMark Johnson xpvtap_state_t *state; 4977eea693dSMark Johnson int instance; 4987eea693dSMark Johnson int e; 4997eea693dSMark Johnson 5007eea693dSMark Johnson 5017eea693dSMark Johnson if (secpolicy_xvm_control(cred_p)) { 5027eea693dSMark Johnson return (EPERM); 5037eea693dSMark Johnson } 5047eea693dSMark Johnson 5057eea693dSMark Johnson instance = getminor(dev); 5067eea693dSMark Johnson state = ddi_get_soft_state(xpvtap_statep, instance); 5077eea693dSMark Johnson if (state == NULL) { 5087eea693dSMark Johnson return (EBADF); 5097eea693dSMark Johnson } 5107eea693dSMark Johnson 5117eea693dSMark Johnson /* the user app should be doing a MAP_SHARED mapping */ 5127eea693dSMark Johnson if ((flags & MAP_TYPE) != MAP_SHARED) { 5137eea693dSMark Johnson return (EINVAL); 5147eea693dSMark Johnson } 5157eea693dSMark Johnson 5167eea693dSMark Johnson /* 5177eea693dSMark Johnson * if this is the user ring (offset = 0), devmap it (which ends up in 5187eea693dSMark Johnson * xpvtap_devmap). devmap will alloc and map the ring into the 5197eea693dSMark Johnson * app's VA space. 5207eea693dSMark Johnson */ 5217eea693dSMark Johnson if (off == 0) { 5227eea693dSMark Johnson e = devmap_setup(dev, (offset_t)off, asp, addrp, (size_t)len, 5237eea693dSMark Johnson prot, maxprot, flags, cred_p); 5247eea693dSMark Johnson return (e); 5257eea693dSMark Johnson } 5267eea693dSMark Johnson 5277eea693dSMark Johnson /* this should be the mmap for the gref pages (offset = PAGESIZE) */ 5287eea693dSMark Johnson if (off != PAGESIZE) { 5297eea693dSMark Johnson return (EINVAL); 5307eea693dSMark Johnson } 5317eea693dSMark Johnson 5327eea693dSMark Johnson /* make sure we get the size we're expecting */ 5337eea693dSMark Johnson if (len != XPVTAP_GREF_BUFSIZE) { 5347eea693dSMark Johnson return (EINVAL); 5357eea693dSMark Johnson } 5367eea693dSMark Johnson 5377eea693dSMark Johnson /* 5387eea693dSMark Johnson * reserve user app VA space for the gref pages and use segmf to 5397eea693dSMark Johnson * manage the backing store for the physical memory. segmf will 5407eea693dSMark Johnson * map in/out the grefs and fault them in/out. 5417eea693dSMark Johnson */ 5427eea693dSMark Johnson ASSERT(asp == state->bt_map.um_as); 5437eea693dSMark Johnson as_rangelock(asp); 5447eea693dSMark Johnson if ((flags & MAP_FIXED) == 0) { 5457eea693dSMark Johnson map_addr(addrp, len, 0, 0, flags); 5467eea693dSMark Johnson if (*addrp == NULL) { 5477eea693dSMark Johnson as_rangeunlock(asp); 5487eea693dSMark Johnson return (ENOMEM); 5497eea693dSMark Johnson } 5507eea693dSMark Johnson } else { 5517eea693dSMark Johnson /* User specified address */ 5527eea693dSMark Johnson (void) as_unmap(asp, *addrp, len); 5537eea693dSMark Johnson } 5547eea693dSMark Johnson a.dev = dev; 5557eea693dSMark Johnson a.prot = (uchar_t)prot; 5567eea693dSMark Johnson a.maxprot = (uchar_t)maxprot; 5577eea693dSMark Johnson e = as_map(asp, *addrp, len, segmf_create, &a); 5587eea693dSMark Johnson if (e != 0) { 5597eea693dSMark Johnson as_rangeunlock(asp); 5607eea693dSMark Johnson return (e); 5617eea693dSMark Johnson } 5627eea693dSMark Johnson as_rangeunlock(asp); 5637eea693dSMark Johnson 5647eea693dSMark Johnson /* 5657eea693dSMark Johnson * Stash user base address, and compute address where the request 5667eea693dSMark Johnson * array will end up. 5677eea693dSMark Johnson */ 5687eea693dSMark Johnson state->bt_map.um_guest_pages = (caddr_t)*addrp; 5697eea693dSMark Johnson state->bt_map.um_guest_size = (size_t)len; 5707eea693dSMark Johnson 5717eea693dSMark Johnson /* register an as callback so we can cleanup when the app goes away */ 5727eea693dSMark Johnson e = as_add_callback(asp, xpvtap_segmf_unregister, state, 5737eea693dSMark Johnson AS_UNMAP_EVENT, *addrp, len, KM_SLEEP); 5747eea693dSMark Johnson if (e != 0) { 5757eea693dSMark Johnson (void) as_unmap(asp, *addrp, len); 5767eea693dSMark Johnson return (EINVAL); 5777eea693dSMark Johnson } 5787eea693dSMark Johnson 5797eea693dSMark Johnson /* wake thread to see if there are requests already queued up */ 5807eea693dSMark Johnson mutex_enter(&state->bt_thread.ut_mutex); 5817eea693dSMark Johnson state->bt_thread.ut_wake = B_TRUE; 5827eea693dSMark Johnson cv_signal(&state->bt_thread.ut_wake_cv); 5837eea693dSMark Johnson mutex_exit(&state->bt_thread.ut_mutex); 5847eea693dSMark Johnson 5857eea693dSMark Johnson return (0); 5867eea693dSMark Johnson } 5877eea693dSMark Johnson 5887eea693dSMark Johnson 5897eea693dSMark Johnson /* 5907eea693dSMark Johnson * xpvtap_devmap() 5917eea693dSMark Johnson */ 5927eea693dSMark Johnson /*ARGSUSED*/ 5937eea693dSMark Johnson static int 5947eea693dSMark Johnson xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len, 5957eea693dSMark Johnson size_t *maplen, uint_t model) 5967eea693dSMark Johnson { 5977eea693dSMark Johnson xpvtap_user_ring_t *usring; 5987eea693dSMark Johnson xpvtap_state_t *state; 5997eea693dSMark Johnson int instance; 6007eea693dSMark Johnson int e; 6017eea693dSMark Johnson 6027eea693dSMark Johnson 6037eea693dSMark Johnson instance = getminor(dev); 6047eea693dSMark Johnson state = ddi_get_soft_state(xpvtap_statep, instance); 6057eea693dSMark Johnson if (state == NULL) { 6067eea693dSMark Johnson return (EBADF); 6077eea693dSMark Johnson } 6087eea693dSMark Johnson 6097eea693dSMark Johnson /* we should only get here if the offset was == 0 */ 6107eea693dSMark Johnson if (off != 0) { 6117eea693dSMark Johnson return (EINVAL); 6127eea693dSMark Johnson } 6137eea693dSMark Johnson 6147eea693dSMark Johnson /* we should only be mapping in one page */ 6157eea693dSMark Johnson if (len != PAGESIZE) { 6167eea693dSMark Johnson return (EINVAL); 6177eea693dSMark Johnson } 6187eea693dSMark Johnson 6197eea693dSMark Johnson /* 6207eea693dSMark Johnson * we already allocated the user ring during driver attach, all we 6217eea693dSMark Johnson * need to do is map it into the user app's VA. 6227eea693dSMark Johnson */ 6237eea693dSMark Johnson usring = &state->bt_user_ring; 6247eea693dSMark Johnson e = devmap_umem_setup(dhp, state->bt_dip, NULL, usring->ur_cookie, 0, 6257eea693dSMark Johnson PAGESIZE, PROT_ALL, DEVMAP_DEFAULTS, NULL); 6267eea693dSMark Johnson if (e < 0) { 6277eea693dSMark Johnson return (e); 6287eea693dSMark Johnson } 6297eea693dSMark Johnson 6307eea693dSMark Johnson /* return the size to compete the devmap */ 6317eea693dSMark Johnson *maplen = PAGESIZE; 6327eea693dSMark Johnson 6337eea693dSMark Johnson return (0); 6347eea693dSMark Johnson } 6357eea693dSMark Johnson 6367eea693dSMark Johnson 6377eea693dSMark Johnson /* 6387eea693dSMark Johnson * xpvtap_chpoll() 6397eea693dSMark Johnson */ 6407eea693dSMark Johnson static int 6417eea693dSMark Johnson xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 6427eea693dSMark Johnson struct pollhead **phpp) 6437eea693dSMark Johnson { 6447eea693dSMark Johnson xpvtap_user_ring_t *usring; 6457eea693dSMark Johnson xpvtap_state_t *state; 6467eea693dSMark Johnson int instance; 6477eea693dSMark Johnson 6487eea693dSMark Johnson 6497eea693dSMark Johnson instance = getminor(dev); 6507eea693dSMark Johnson if (instance == -1) { 6517eea693dSMark Johnson return (EBADF); 6527eea693dSMark Johnson } 6537eea693dSMark Johnson state = ddi_get_soft_state(xpvtap_statep, instance); 6547eea693dSMark Johnson if (state == NULL) { 6557eea693dSMark Johnson return (EBADF); 6567eea693dSMark Johnson } 6577eea693dSMark Johnson 6587eea693dSMark Johnson if (((events & (POLLIN | POLLRDNORM)) == 0) && !anyyet) { 6597eea693dSMark Johnson *reventsp = 0; 6607eea693dSMark Johnson return (EINVAL); 6617eea693dSMark Johnson } 6627eea693dSMark Johnson 6637eea693dSMark Johnson /* 6647eea693dSMark Johnson * if we pushed requests on the user ring since the last poll, wakeup 6657eea693dSMark Johnson * the user app 6667eea693dSMark Johnson */ 6677eea693dSMark Johnson usring = &state->bt_user_ring; 6687eea693dSMark Johnson if (usring->ur_prod_polled != usring->ur_ring.req_prod_pvt) { 6697eea693dSMark Johnson 6707eea693dSMark Johnson /* 6717eea693dSMark Johnson * XXX - is this faster here or xpvtap_user_request_push?? 6727eea693dSMark Johnson * prelim data says here. Because less membars or because 6737eea693dSMark Johnson * user thread will spin in poll requests before getting to 6747eea693dSMark Johnson * responses? 6757eea693dSMark Johnson */ 6767eea693dSMark Johnson RING_PUSH_REQUESTS(&usring->ur_ring); 6777eea693dSMark Johnson 6787eea693dSMark Johnson usring->ur_prod_polled = usring->ur_ring.sring->req_prod; 6797eea693dSMark Johnson *reventsp = POLLIN | POLLRDNORM; 6807eea693dSMark Johnson 6817eea693dSMark Johnson /* no new requests */ 6827eea693dSMark Johnson } else { 6837eea693dSMark Johnson *reventsp = 0; 6847eea693dSMark Johnson if (!anyyet) { 6857eea693dSMark Johnson *phpp = &state->bt_pollhead; 6867eea693dSMark Johnson } 6877eea693dSMark Johnson } 6887eea693dSMark Johnson 6897eea693dSMark Johnson return (0); 6907eea693dSMark Johnson } 6917eea693dSMark Johnson 6927eea693dSMark Johnson 6937eea693dSMark Johnson /* 6947eea693dSMark Johnson * xpvtap_drv_init() 6957eea693dSMark Johnson */ 6967eea693dSMark Johnson static xpvtap_state_t * 6977eea693dSMark Johnson xpvtap_drv_init(int instance) 6987eea693dSMark Johnson { 6997eea693dSMark Johnson xpvtap_state_t *state; 7007eea693dSMark Johnson int e; 7017eea693dSMark Johnson 7027eea693dSMark Johnson 7037eea693dSMark Johnson e = ddi_soft_state_zalloc(xpvtap_statep, instance); 7047eea693dSMark Johnson if (e != DDI_SUCCESS) { 7057eea693dSMark Johnson return (NULL); 7067eea693dSMark Johnson } 7077eea693dSMark Johnson state = ddi_get_soft_state(xpvtap_statep, instance); 7087eea693dSMark Johnson if (state == NULL) { 7097eea693dSMark Johnson goto drvinitfail_get_soft_state; 7107eea693dSMark Johnson } 7117eea693dSMark Johnson 7127eea693dSMark Johnson state->bt_instance = instance; 7137eea693dSMark Johnson mutex_init(&state->bt_open.bo_mutex, NULL, MUTEX_DRIVER, NULL); 7147eea693dSMark Johnson cv_init(&state->bt_open.bo_exit_cv, NULL, CV_DRIVER, NULL); 7157eea693dSMark Johnson state->bt_open.bo_opened = B_FALSE; 7167eea693dSMark Johnson state->bt_map.um_registered = B_FALSE; 7177eea693dSMark Johnson 7187eea693dSMark Johnson /* initialize user ring, thread, mapping state */ 7197eea693dSMark Johnson e = xpvtap_user_init(state); 7207eea693dSMark Johnson if (e != DDI_SUCCESS) { 7217eea693dSMark Johnson goto drvinitfail_userinit; 7227eea693dSMark Johnson } 7237eea693dSMark Johnson 7247eea693dSMark Johnson return (state); 7257eea693dSMark Johnson 7267eea693dSMark Johnson drvinitfail_userinit: 7277eea693dSMark Johnson cv_destroy(&state->bt_open.bo_exit_cv); 7287eea693dSMark Johnson mutex_destroy(&state->bt_open.bo_mutex); 7297eea693dSMark Johnson drvinitfail_get_soft_state: 7307eea693dSMark Johnson (void) ddi_soft_state_free(xpvtap_statep, instance); 7317eea693dSMark Johnson return (NULL); 7327eea693dSMark Johnson } 7337eea693dSMark Johnson 7347eea693dSMark Johnson 7357eea693dSMark Johnson /* 7367eea693dSMark Johnson * xpvtap_drv_fini() 7377eea693dSMark Johnson */ 7387eea693dSMark Johnson static void 7397eea693dSMark Johnson xpvtap_drv_fini(xpvtap_state_t *state) 7407eea693dSMark Johnson { 7417eea693dSMark Johnson xpvtap_user_fini(state); 7427eea693dSMark Johnson cv_destroy(&state->bt_open.bo_exit_cv); 7437eea693dSMark Johnson mutex_destroy(&state->bt_open.bo_mutex); 7447eea693dSMark Johnson (void) ddi_soft_state_free(xpvtap_statep, state->bt_instance); 7457eea693dSMark Johnson } 7467eea693dSMark Johnson 7477eea693dSMark Johnson 7487eea693dSMark Johnson /* 7497eea693dSMark Johnson * xpvtap_intr() 7507eea693dSMark Johnson * this routine will be called when we have a request on the guest ring. 7517eea693dSMark Johnson */ 7527eea693dSMark Johnson static uint_t 7537eea693dSMark Johnson xpvtap_intr(caddr_t arg) 7547eea693dSMark Johnson { 7557eea693dSMark Johnson xpvtap_state_t *state; 7567eea693dSMark Johnson 7577eea693dSMark Johnson 7587eea693dSMark Johnson state = (xpvtap_state_t *)arg; 7597eea693dSMark Johnson 7607eea693dSMark Johnson /* wake thread, thread handles guest requests and user app responses */ 7617eea693dSMark Johnson mutex_enter(&state->bt_thread.ut_mutex); 7627eea693dSMark Johnson state->bt_thread.ut_wake = B_TRUE; 7637eea693dSMark Johnson cv_signal(&state->bt_thread.ut_wake_cv); 7647eea693dSMark Johnson mutex_exit(&state->bt_thread.ut_mutex); 7657eea693dSMark Johnson 7667eea693dSMark Johnson return (DDI_INTR_CLAIMED); 7677eea693dSMark Johnson } 7687eea693dSMark Johnson 7697eea693dSMark Johnson 7707eea693dSMark Johnson /* 7717eea693dSMark Johnson * xpvtap_segmf_register() 7727eea693dSMark Johnson */ 7737eea693dSMark Johnson static int 7747eea693dSMark Johnson xpvtap_segmf_register(xpvtap_state_t *state) 7757eea693dSMark Johnson { 7767eea693dSMark Johnson struct seg *seg; 7777eea693dSMark Johnson uint64_t pte_ma; 7787eea693dSMark Johnson struct as *as; 7797eea693dSMark Johnson caddr_t uaddr; 7807eea693dSMark Johnson uint_t pgcnt; 7817eea693dSMark Johnson int i; 7827eea693dSMark Johnson 7837eea693dSMark Johnson 7847eea693dSMark Johnson as = state->bt_map.um_as; 7857eea693dSMark Johnson pgcnt = btopr(state->bt_map.um_guest_size); 7867eea693dSMark Johnson uaddr = state->bt_map.um_guest_pages; 7877eea693dSMark Johnson 7887eea693dSMark Johnson if (pgcnt == 0) { 7897eea693dSMark Johnson return (DDI_FAILURE); 7907eea693dSMark Johnson } 7917eea693dSMark Johnson 792*dc32d872SJosef 'Jeff' Sipek AS_LOCK_ENTER(as, RW_READER); 7937eea693dSMark Johnson 7947eea693dSMark Johnson seg = as_findseg(as, state->bt_map.um_guest_pages, 0); 7957eea693dSMark Johnson if ((seg == NULL) || ((uaddr + state->bt_map.um_guest_size) > 7967eea693dSMark Johnson (seg->s_base + seg->s_size))) { 797*dc32d872SJosef 'Jeff' Sipek AS_LOCK_EXIT(as); 7987eea693dSMark Johnson return (DDI_FAILURE); 7997eea693dSMark Johnson } 8007eea693dSMark Johnson 8017eea693dSMark Johnson /* 8027eea693dSMark Johnson * lock down the htables so the HAT can't steal them. Register the 8037eea693dSMark Johnson * PTE MA's for each gref page with seg_mf so we can do user space 8047eea693dSMark Johnson * gref mappings. 8057eea693dSMark Johnson */ 8067eea693dSMark Johnson for (i = 0; i < pgcnt; i++) { 8077eea693dSMark Johnson hat_prepare_mapping(as->a_hat, uaddr, &pte_ma); 8087eea693dSMark Johnson hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0, 8097eea693dSMark Johnson PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK, 8107eea693dSMark Johnson HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 8117eea693dSMark Johnson hat_release_mapping(as->a_hat, uaddr); 8127eea693dSMark Johnson segmf_add_gref_pte(seg, uaddr, pte_ma); 8137eea693dSMark Johnson uaddr += PAGESIZE; 8147eea693dSMark Johnson } 8157eea693dSMark Johnson 8167eea693dSMark Johnson state->bt_map.um_registered = B_TRUE; 8177eea693dSMark Johnson 818*dc32d872SJosef 'Jeff' Sipek AS_LOCK_EXIT(as); 8197eea693dSMark Johnson 8207eea693dSMark Johnson return (DDI_SUCCESS); 8217eea693dSMark Johnson } 8227eea693dSMark Johnson 8237eea693dSMark Johnson 8247eea693dSMark Johnson /* 8257eea693dSMark Johnson * xpvtap_segmf_unregister() 8267eea693dSMark Johnson * as_callback routine 8277eea693dSMark Johnson */ 8287eea693dSMark Johnson /*ARGSUSED*/ 8297eea693dSMark Johnson static void 8307eea693dSMark Johnson xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event) 8317eea693dSMark Johnson { 8327eea693dSMark Johnson xpvtap_state_t *state; 8337eea693dSMark Johnson caddr_t uaddr; 8347eea693dSMark Johnson uint_t pgcnt; 8357eea693dSMark Johnson int i; 8367eea693dSMark Johnson 8377eea693dSMark Johnson 8387eea693dSMark Johnson state = (xpvtap_state_t *)arg; 8397eea693dSMark Johnson if (!state->bt_map.um_registered) { 84055501136SMark Johnson /* remove the callback (which is this routine) */ 84155501136SMark Johnson (void) as_delete_callback(as, arg); 8427eea693dSMark Johnson return; 8437eea693dSMark Johnson } 8447eea693dSMark Johnson 8457eea693dSMark Johnson pgcnt = btopr(state->bt_map.um_guest_size); 8467eea693dSMark Johnson uaddr = state->bt_map.um_guest_pages; 8477eea693dSMark Johnson 8487eea693dSMark Johnson /* unmap any outstanding req's grefs */ 8497eea693dSMark Johnson xpvtap_rs_flush(state->bt_map.um_rs, xpvtap_user_request_unmap, state); 8507eea693dSMark Johnson 8517eea693dSMark Johnson /* Unlock the gref pages */ 8527eea693dSMark Johnson for (i = 0; i < pgcnt; i++) { 853*dc32d872SJosef 'Jeff' Sipek AS_LOCK_ENTER(as, RW_WRITER); 8547eea693dSMark Johnson hat_prepare_mapping(as->a_hat, uaddr, NULL); 8557eea693dSMark Johnson hat_unload(as->a_hat, uaddr, PAGESIZE, HAT_UNLOAD_UNLOCK); 8567eea693dSMark Johnson hat_release_mapping(as->a_hat, uaddr); 857*dc32d872SJosef 'Jeff' Sipek AS_LOCK_EXIT(as); 8587eea693dSMark Johnson uaddr += PAGESIZE; 8597eea693dSMark Johnson } 8607eea693dSMark Johnson 8617eea693dSMark Johnson /* remove the callback (which is this routine) */ 8627eea693dSMark Johnson (void) as_delete_callback(as, arg); 8637eea693dSMark Johnson 8647eea693dSMark Johnson state->bt_map.um_registered = B_FALSE; 8657eea693dSMark Johnson } 8667eea693dSMark Johnson 8677eea693dSMark Johnson 8687eea693dSMark Johnson /* 8697eea693dSMark Johnson * xpvtap_user_init() 8707eea693dSMark Johnson */ 8717eea693dSMark Johnson static int 8727eea693dSMark Johnson xpvtap_user_init(xpvtap_state_t *state) 8737eea693dSMark Johnson { 8747eea693dSMark Johnson xpvtap_user_map_t *map; 8757eea693dSMark Johnson int e; 8767eea693dSMark Johnson 8777eea693dSMark Johnson 8787eea693dSMark Johnson map = &state->bt_map; 8797eea693dSMark Johnson 8807eea693dSMark Johnson /* Setup the ring between the driver and user app */ 8817eea693dSMark Johnson e = xpvtap_user_ring_init(state); 8827eea693dSMark Johnson if (e != DDI_SUCCESS) { 8837eea693dSMark Johnson return (DDI_FAILURE); 8847eea693dSMark Johnson } 8857eea693dSMark Johnson 8867eea693dSMark Johnson /* 8877eea693dSMark Johnson * the user ring can handle BLKIF_RING_SIZE outstanding requests. This 8887eea693dSMark Johnson * is the same number of requests as the guest ring. Initialize the 8897eea693dSMark Johnson * state we use to track request IDs to the user app. These IDs will 8907eea693dSMark Johnson * also identify which group of gref pages correspond with the 8917eea693dSMark Johnson * request. 8927eea693dSMark Johnson */ 8937eea693dSMark Johnson xpvtap_rs_init(0, (BLKIF_RING_SIZE - 1), &map->um_rs); 8947eea693dSMark Johnson 8957eea693dSMark Johnson /* 8967eea693dSMark Johnson * allocate the space to store a copy of each outstanding requests. We 8977eea693dSMark Johnson * will need to reference the ID and the number of segments when we 8987eea693dSMark Johnson * get the response from the user app. 8997eea693dSMark Johnson */ 9007eea693dSMark Johnson map->um_outstanding_reqs = kmem_zalloc( 9017eea693dSMark Johnson sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE, 9027eea693dSMark Johnson KM_SLEEP); 9037eea693dSMark Johnson 9047eea693dSMark Johnson /* 9057eea693dSMark Johnson * initialize the thread we use to process guest requests and user 9067eea693dSMark Johnson * responses. 9077eea693dSMark Johnson */ 9087eea693dSMark Johnson e = xpvtap_user_thread_init(state); 9097eea693dSMark Johnson if (e != DDI_SUCCESS) { 9107eea693dSMark Johnson goto userinitfail_user_thread_init; 9117eea693dSMark Johnson } 9127eea693dSMark Johnson 9137eea693dSMark Johnson return (DDI_SUCCESS); 9147eea693dSMark Johnson 9157eea693dSMark Johnson userinitfail_user_thread_init: 9167eea693dSMark Johnson xpvtap_rs_fini(&map->um_rs); 9177eea693dSMark Johnson kmem_free(map->um_outstanding_reqs, 9187eea693dSMark Johnson sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE); 9197eea693dSMark Johnson xpvtap_user_ring_fini(state); 9207eea693dSMark Johnson return (DDI_FAILURE); 9217eea693dSMark Johnson } 9227eea693dSMark Johnson 9237eea693dSMark Johnson 9247eea693dSMark Johnson /* 9257eea693dSMark Johnson * xpvtap_user_ring_init() 9267eea693dSMark Johnson */ 9277eea693dSMark Johnson static int 9287eea693dSMark Johnson xpvtap_user_ring_init(xpvtap_state_t *state) 9297eea693dSMark Johnson { 9307eea693dSMark Johnson xpvtap_user_ring_t *usring; 9317eea693dSMark Johnson 9327eea693dSMark Johnson 9337eea693dSMark Johnson usring = &state->bt_user_ring; 9347eea693dSMark Johnson 9357eea693dSMark Johnson /* alocate and initialize the page for the shared user ring */ 9367eea693dSMark Johnson usring->ur_sring = (blkif_sring_t *)ddi_umem_alloc(PAGESIZE, 9377eea693dSMark Johnson DDI_UMEM_SLEEP, &usring->ur_cookie); 9387eea693dSMark Johnson SHARED_RING_INIT(usring->ur_sring); 9397eea693dSMark Johnson FRONT_RING_INIT(&usring->ur_ring, usring->ur_sring, PAGESIZE); 9407eea693dSMark Johnson usring->ur_prod_polled = 0; 9417eea693dSMark Johnson 9427eea693dSMark Johnson return (DDI_SUCCESS); 9437eea693dSMark Johnson } 9447eea693dSMark Johnson 9457eea693dSMark Johnson 9467eea693dSMark Johnson /* 9477eea693dSMark Johnson * xpvtap_user_thread_init() 9487eea693dSMark Johnson */ 9497eea693dSMark Johnson static int 9507eea693dSMark Johnson xpvtap_user_thread_init(xpvtap_state_t *state) 9517eea693dSMark Johnson { 9527eea693dSMark Johnson xpvtap_user_thread_t *thread; 9537eea693dSMark Johnson char taskqname[32]; 9547eea693dSMark Johnson 9557eea693dSMark Johnson 9567eea693dSMark Johnson thread = &state->bt_thread; 9577eea693dSMark Johnson 9587eea693dSMark Johnson mutex_init(&thread->ut_mutex, NULL, MUTEX_DRIVER, NULL); 9597eea693dSMark Johnson cv_init(&thread->ut_wake_cv, NULL, CV_DRIVER, NULL); 9607eea693dSMark Johnson cv_init(&thread->ut_exit_done_cv, NULL, CV_DRIVER, NULL); 9617eea693dSMark Johnson thread->ut_wake = B_FALSE; 9627eea693dSMark Johnson thread->ut_exit = B_FALSE; 9637eea693dSMark Johnson thread->ut_exit_done = B_TRUE; 9647eea693dSMark Johnson 9657eea693dSMark Johnson /* create but don't start the user thread */ 9667eea693dSMark Johnson (void) sprintf(taskqname, "xvptap_%d", state->bt_instance); 9677eea693dSMark Johnson thread->ut_taskq = ddi_taskq_create(state->bt_dip, taskqname, 1, 9687eea693dSMark Johnson TASKQ_DEFAULTPRI, 0); 9697eea693dSMark Johnson if (thread->ut_taskq == NULL) { 9707eea693dSMark Johnson goto userinitthrfail_taskq_create; 9717eea693dSMark Johnson } 9727eea693dSMark Johnson 9737eea693dSMark Johnson return (DDI_SUCCESS); 9747eea693dSMark Johnson 9757eea693dSMark Johnson userinitthrfail_taskq_dispatch: 9767eea693dSMark Johnson ddi_taskq_destroy(thread->ut_taskq); 9777eea693dSMark Johnson userinitthrfail_taskq_create: 9787eea693dSMark Johnson cv_destroy(&thread->ut_exit_done_cv); 9797eea693dSMark Johnson cv_destroy(&thread->ut_wake_cv); 9807eea693dSMark Johnson mutex_destroy(&thread->ut_mutex); 9817eea693dSMark Johnson 9827eea693dSMark Johnson return (DDI_FAILURE); 9837eea693dSMark Johnson } 9847eea693dSMark Johnson 9857eea693dSMark Johnson 9867eea693dSMark Johnson /* 9877eea693dSMark Johnson * xpvtap_user_thread_start() 9887eea693dSMark Johnson */ 9897eea693dSMark Johnson static void 9907eea693dSMark Johnson xpvtap_user_thread_start(caddr_t arg) 9917eea693dSMark Johnson { 9927eea693dSMark Johnson xpvtap_user_thread_t *thread; 9937eea693dSMark Johnson xpvtap_state_t *state; 9947eea693dSMark Johnson int e; 9957eea693dSMark Johnson 9967eea693dSMark Johnson 9977eea693dSMark Johnson state = (xpvtap_state_t *)arg; 9987eea693dSMark Johnson thread = &state->bt_thread; 9997eea693dSMark Johnson 10007eea693dSMark Johnson /* start the user thread */ 10017eea693dSMark Johnson thread->ut_exit_done = B_FALSE; 10027eea693dSMark Johnson e = ddi_taskq_dispatch(thread->ut_taskq, xpvtap_user_thread, state, 10037eea693dSMark Johnson DDI_SLEEP); 10047eea693dSMark Johnson if (e != DDI_SUCCESS) { 10057eea693dSMark Johnson thread->ut_exit_done = B_TRUE; 10067eea693dSMark Johnson cmn_err(CE_WARN, "Unable to start user thread\n"); 10077eea693dSMark Johnson } 10087eea693dSMark Johnson } 10097eea693dSMark Johnson 10107eea693dSMark Johnson 10117eea693dSMark Johnson /* 10127eea693dSMark Johnson * xpvtap_user_thread_stop() 10137eea693dSMark Johnson */ 10147eea693dSMark Johnson static void 10157eea693dSMark Johnson xpvtap_user_thread_stop(xpvtap_state_t *state) 10167eea693dSMark Johnson { 10177eea693dSMark Johnson /* wake thread so it can exit */ 10187eea693dSMark Johnson mutex_enter(&state->bt_thread.ut_mutex); 10197eea693dSMark Johnson state->bt_thread.ut_wake = B_TRUE; 10207eea693dSMark Johnson state->bt_thread.ut_exit = B_TRUE; 10217eea693dSMark Johnson cv_signal(&state->bt_thread.ut_wake_cv); 10227eea693dSMark Johnson if (!state->bt_thread.ut_exit_done) { 10237eea693dSMark Johnson cv_wait(&state->bt_thread.ut_exit_done_cv, 10247eea693dSMark Johnson &state->bt_thread.ut_mutex); 10257eea693dSMark Johnson } 10267eea693dSMark Johnson mutex_exit(&state->bt_thread.ut_mutex); 10277eea693dSMark Johnson ASSERT(state->bt_thread.ut_exit_done); 10287eea693dSMark Johnson } 10297eea693dSMark Johnson 10307eea693dSMark Johnson 10317eea693dSMark Johnson /* 10327eea693dSMark Johnson * xpvtap_user_fini() 10337eea693dSMark Johnson */ 10347eea693dSMark Johnson static void 10357eea693dSMark Johnson xpvtap_user_fini(xpvtap_state_t *state) 10367eea693dSMark Johnson { 10377eea693dSMark Johnson xpvtap_user_map_t *map; 10387eea693dSMark Johnson 10397eea693dSMark Johnson 10407eea693dSMark Johnson map = &state->bt_map; 10417eea693dSMark Johnson 10427eea693dSMark Johnson xpvtap_user_thread_fini(state); 10437eea693dSMark Johnson xpvtap_rs_fini(&map->um_rs); 10447eea693dSMark Johnson kmem_free(map->um_outstanding_reqs, 10457eea693dSMark Johnson sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE); 10467eea693dSMark Johnson xpvtap_user_ring_fini(state); 10477eea693dSMark Johnson } 10487eea693dSMark Johnson 10497eea693dSMark Johnson 10507eea693dSMark Johnson /* 10517eea693dSMark Johnson * xpvtap_user_ring_fini() 10527eea693dSMark Johnson */ 10537eea693dSMark Johnson static void 10547eea693dSMark Johnson xpvtap_user_ring_fini(xpvtap_state_t *state) 10557eea693dSMark Johnson { 10567eea693dSMark Johnson ddi_umem_free(state->bt_user_ring.ur_cookie); 10577eea693dSMark Johnson } 10587eea693dSMark Johnson 10597eea693dSMark Johnson 10607eea693dSMark Johnson /* 10617eea693dSMark Johnson * xpvtap_user_thread_fini() 10627eea693dSMark Johnson */ 10637eea693dSMark Johnson static void 10647eea693dSMark Johnson xpvtap_user_thread_fini(xpvtap_state_t *state) 10657eea693dSMark Johnson { 10667eea693dSMark Johnson ddi_taskq_destroy(state->bt_thread.ut_taskq); 10677eea693dSMark Johnson cv_destroy(&state->bt_thread.ut_exit_done_cv); 10687eea693dSMark Johnson cv_destroy(&state->bt_thread.ut_wake_cv); 10697eea693dSMark Johnson mutex_destroy(&state->bt_thread.ut_mutex); 10707eea693dSMark Johnson } 10717eea693dSMark Johnson 10727eea693dSMark Johnson 10737eea693dSMark Johnson /* 10747eea693dSMark Johnson * xpvtap_user_thread() 10757eea693dSMark Johnson */ 10767eea693dSMark Johnson static void 10777eea693dSMark Johnson xpvtap_user_thread(void *arg) 10787eea693dSMark Johnson { 10797eea693dSMark Johnson xpvtap_user_thread_t *thread; 10807eea693dSMark Johnson blkif_response_t resp; 10817eea693dSMark Johnson xpvtap_state_t *state; 10827eea693dSMark Johnson blkif_request_t req; 10837eea693dSMark Johnson boolean_t b; 10847eea693dSMark Johnson uint_t uid; 10857eea693dSMark Johnson int e; 10867eea693dSMark Johnson 10877eea693dSMark Johnson 10887eea693dSMark Johnson state = (xpvtap_state_t *)arg; 10897eea693dSMark Johnson thread = &state->bt_thread; 10907eea693dSMark Johnson 10917eea693dSMark Johnson xpvtap_thread_start: 10927eea693dSMark Johnson /* See if we are supposed to exit */ 10937eea693dSMark Johnson mutex_enter(&thread->ut_mutex); 10947eea693dSMark Johnson if (thread->ut_exit) { 10957eea693dSMark Johnson thread->ut_exit_done = B_TRUE; 10967eea693dSMark Johnson cv_signal(&state->bt_thread.ut_exit_done_cv); 10977eea693dSMark Johnson mutex_exit(&thread->ut_mutex); 10987eea693dSMark Johnson return; 10997eea693dSMark Johnson } 11007eea693dSMark Johnson 11017eea693dSMark Johnson /* 11027eea693dSMark Johnson * if we aren't supposed to be awake, wait until someone wakes us. 11037eea693dSMark Johnson * when we wake up, check for a kill or someone telling us to exit. 11047eea693dSMark Johnson */ 11057eea693dSMark Johnson if (!thread->ut_wake) { 11067eea693dSMark Johnson e = cv_wait_sig(&thread->ut_wake_cv, &thread->ut_mutex); 11077eea693dSMark Johnson if ((e == 0) || (thread->ut_exit)) { 11087eea693dSMark Johnson thread->ut_exit = B_TRUE; 11097eea693dSMark Johnson mutex_exit(&thread->ut_mutex); 11107eea693dSMark Johnson goto xpvtap_thread_start; 11117eea693dSMark Johnson } 11127eea693dSMark Johnson } 11137eea693dSMark Johnson 11147eea693dSMark Johnson /* if someone didn't wake us, go back to the start of the thread */ 11157eea693dSMark Johnson if (!thread->ut_wake) { 11167eea693dSMark Johnson mutex_exit(&thread->ut_mutex); 11177eea693dSMark Johnson goto xpvtap_thread_start; 11187eea693dSMark Johnson } 11197eea693dSMark Johnson 11207eea693dSMark Johnson /* we are awake */ 11217eea693dSMark Johnson thread->ut_wake = B_FALSE; 11227eea693dSMark Johnson mutex_exit(&thread->ut_mutex); 11237eea693dSMark Johnson 11247eea693dSMark Johnson /* process requests from the guest */ 11257eea693dSMark Johnson do { 11267eea693dSMark Johnson /* 11277eea693dSMark Johnson * check for requests from the guest. if we don't have any, 11287eea693dSMark Johnson * break out of the loop. 11297eea693dSMark Johnson */ 11307eea693dSMark Johnson e = blk_ring_request_get(state->bt_guest_ring, &req); 11317eea693dSMark Johnson if (e == B_FALSE) { 11327eea693dSMark Johnson break; 11337eea693dSMark Johnson } 11347eea693dSMark Johnson 11357eea693dSMark Johnson /* we got a request, map the grefs into the user app's VA */ 11367eea693dSMark Johnson e = xpvtap_user_request_map(state, &req, &uid); 11377eea693dSMark Johnson if (e != DDI_SUCCESS) { 11387eea693dSMark Johnson /* 11397eea693dSMark Johnson * If we couldn't map the request (e.g. user app hasn't 11407eea693dSMark Johnson * opened the device yet), requeue it and try again 11417eea693dSMark Johnson * later 11427eea693dSMark Johnson */ 11437eea693dSMark Johnson blk_ring_request_requeue(state->bt_guest_ring); 11447eea693dSMark Johnson break; 11457eea693dSMark Johnson } 11467eea693dSMark Johnson 11477eea693dSMark Johnson /* push the request to the user app */ 11487eea693dSMark Johnson e = xpvtap_user_request_push(state, &req, uid); 11497eea693dSMark Johnson if (e != DDI_SUCCESS) { 11507eea693dSMark Johnson resp.id = req.id; 11517eea693dSMark Johnson resp.operation = req.operation; 11527eea693dSMark Johnson resp.status = BLKIF_RSP_ERROR; 11537eea693dSMark Johnson blk_ring_response_put(state->bt_guest_ring, &resp); 11547eea693dSMark Johnson } 11557eea693dSMark Johnson } while (!thread->ut_exit); 11567eea693dSMark Johnson 11577eea693dSMark Johnson /* process reponses from the user app */ 11587eea693dSMark Johnson do { 11597eea693dSMark Johnson /* 11607eea693dSMark Johnson * check for responses from the user app. if we don't have any, 11617eea693dSMark Johnson * break out of the loop. 11627eea693dSMark Johnson */ 11637eea693dSMark Johnson b = xpvtap_user_response_get(state, &resp, &uid); 11647eea693dSMark Johnson if (b != B_TRUE) { 11657eea693dSMark Johnson break; 11667eea693dSMark Johnson } 11677eea693dSMark Johnson 11687eea693dSMark Johnson /* 11697eea693dSMark Johnson * if we got a response, unmap the grefs from the matching 11707eea693dSMark Johnson * request. 11717eea693dSMark Johnson */ 11727eea693dSMark Johnson xpvtap_user_request_unmap(state, uid); 11737eea693dSMark Johnson 11747eea693dSMark Johnson /* push the response to the guest */ 11757eea693dSMark Johnson blk_ring_response_put(state->bt_guest_ring, &resp); 11767eea693dSMark Johnson } while (!thread->ut_exit); 11777eea693dSMark Johnson 11787eea693dSMark Johnson goto xpvtap_thread_start; 11797eea693dSMark Johnson } 11807eea693dSMark Johnson 11817eea693dSMark Johnson 11827eea693dSMark Johnson /* 11837eea693dSMark Johnson * xpvtap_user_request_map() 11847eea693dSMark Johnson */ 11857eea693dSMark Johnson static int 11867eea693dSMark Johnson xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req, 11877eea693dSMark Johnson uint_t *uid) 11887eea693dSMark Johnson { 11897eea693dSMark Johnson grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 11907eea693dSMark Johnson struct seg *seg; 11917eea693dSMark Johnson struct as *as; 11927eea693dSMark Johnson domid_t domid; 11937eea693dSMark Johnson caddr_t uaddr; 11947eea693dSMark Johnson uint_t flags; 11957eea693dSMark Johnson int i; 11967eea693dSMark Johnson int e; 11977eea693dSMark Johnson 11987eea693dSMark Johnson 11997eea693dSMark Johnson domid = xvdi_get_oeid(state->bt_dip); 12007eea693dSMark Johnson 12017eea693dSMark Johnson as = state->bt_map.um_as; 12027eea693dSMark Johnson if ((as == NULL) || (state->bt_map.um_guest_pages == NULL)) { 12037eea693dSMark Johnson return (DDI_FAILURE); 12047eea693dSMark Johnson } 12057eea693dSMark Johnson 12067eea693dSMark Johnson /* has to happen after segmap returns */ 12077eea693dSMark Johnson if (!state->bt_map.um_registered) { 12087eea693dSMark Johnson /* register the pte's with segmf */ 12097eea693dSMark Johnson e = xpvtap_segmf_register(state); 12107eea693dSMark Johnson if (e != DDI_SUCCESS) { 12117eea693dSMark Johnson return (DDI_FAILURE); 12127eea693dSMark Johnson } 12137eea693dSMark Johnson } 12147eea693dSMark Johnson 12157eea693dSMark Johnson /* alloc an ID for the user ring */ 12167eea693dSMark Johnson e = xpvtap_rs_alloc(state->bt_map.um_rs, uid); 12177eea693dSMark Johnson if (e != DDI_SUCCESS) { 12187eea693dSMark Johnson return (DDI_FAILURE); 12197eea693dSMark Johnson } 12207eea693dSMark Johnson 12217eea693dSMark Johnson /* if we don't have any segments to map, we're done */ 12227eea693dSMark Johnson if ((req->operation == BLKIF_OP_WRITE_BARRIER) || 12237eea693dSMark Johnson (req->operation == BLKIF_OP_FLUSH_DISKCACHE) || 12247eea693dSMark Johnson (req->nr_segments == 0)) { 12257eea693dSMark Johnson return (DDI_SUCCESS); 12267eea693dSMark Johnson } 12277eea693dSMark Johnson 12287eea693dSMark Johnson /* get the apps gref address */ 12297eea693dSMark Johnson uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, *uid); 12307eea693dSMark Johnson 1231*dc32d872SJosef 'Jeff' Sipek AS_LOCK_ENTER(as, RW_READER); 12327eea693dSMark Johnson seg = as_findseg(as, state->bt_map.um_guest_pages, 0); 12337eea693dSMark Johnson if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) > 12347eea693dSMark Johnson (seg->s_base + seg->s_size))) { 1235*dc32d872SJosef 'Jeff' Sipek AS_LOCK_EXIT(as); 12367eea693dSMark Johnson return (DDI_FAILURE); 12377eea693dSMark Johnson } 12387eea693dSMark Johnson 12397eea693dSMark Johnson /* if we are reading from disk, we are writing into memory */ 12407eea693dSMark Johnson flags = 0; 12417eea693dSMark Johnson if (req->operation == BLKIF_OP_READ) { 12427eea693dSMark Johnson flags |= SEGMF_GREF_WR; 12437eea693dSMark Johnson } 12447eea693dSMark Johnson 12457eea693dSMark Johnson /* Load the grefs into seg_mf */ 12467eea693dSMark Johnson for (i = 0; i < req->nr_segments; i++) { 12477eea693dSMark Johnson gref[i] = req->seg[i].gref; 12487eea693dSMark Johnson } 12497eea693dSMark Johnson (void) segmf_add_grefs(seg, uaddr, flags, gref, req->nr_segments, 12507eea693dSMark Johnson domid); 12517eea693dSMark Johnson 1252*dc32d872SJosef 'Jeff' Sipek AS_LOCK_EXIT(as); 12537eea693dSMark Johnson 12547eea693dSMark Johnson return (DDI_SUCCESS); 12557eea693dSMark Johnson } 12567eea693dSMark Johnson 12577eea693dSMark Johnson 12587eea693dSMark Johnson /* 12597eea693dSMark Johnson * xpvtap_user_request_push() 12607eea693dSMark Johnson */ 12617eea693dSMark Johnson static int 12627eea693dSMark Johnson xpvtap_user_request_push(xpvtap_state_t *state, blkif_request_t *req, 12637eea693dSMark Johnson uint_t uid) 12647eea693dSMark Johnson { 12657eea693dSMark Johnson blkif_request_t *outstanding_req; 12667eea693dSMark Johnson blkif_front_ring_t *uring; 12677eea693dSMark Johnson blkif_request_t *target; 12687eea693dSMark Johnson xpvtap_user_map_t *map; 12697eea693dSMark Johnson 12707eea693dSMark Johnson 12717eea693dSMark Johnson uring = &state->bt_user_ring.ur_ring; 12727eea693dSMark Johnson map = &state->bt_map; 12737eea693dSMark Johnson 12747eea693dSMark Johnson target = RING_GET_REQUEST(uring, uring->req_prod_pvt); 12757eea693dSMark Johnson 12767eea693dSMark Johnson /* 12777eea693dSMark Johnson * Save request from the frontend. used for ID mapping and unmap 12787eea693dSMark Johnson * on response/cleanup 12797eea693dSMark Johnson */ 12807eea693dSMark Johnson outstanding_req = &map->um_outstanding_reqs[uid]; 12817eea693dSMark Johnson bcopy(req, outstanding_req, sizeof (*outstanding_req)); 12827eea693dSMark Johnson 12837eea693dSMark Johnson /* put the request on the user ring */ 12847eea693dSMark Johnson bcopy(req, target, sizeof (*req)); 12857eea693dSMark Johnson target->id = (uint64_t)uid; 12867eea693dSMark Johnson uring->req_prod_pvt++; 12877eea693dSMark Johnson 12887eea693dSMark Johnson pollwakeup(&state->bt_pollhead, POLLIN | POLLRDNORM); 12897eea693dSMark Johnson 12907eea693dSMark Johnson return (DDI_SUCCESS); 12917eea693dSMark Johnson } 12927eea693dSMark Johnson 12937eea693dSMark Johnson 12947eea693dSMark Johnson static void 12957eea693dSMark Johnson xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid) 12967eea693dSMark Johnson { 12977eea693dSMark Johnson blkif_request_t *req; 12987eea693dSMark Johnson struct seg *seg; 12997eea693dSMark Johnson struct as *as; 13007eea693dSMark Johnson caddr_t uaddr; 13017eea693dSMark Johnson int e; 13027eea693dSMark Johnson 13037eea693dSMark Johnson 13047eea693dSMark Johnson as = state->bt_map.um_as; 13057eea693dSMark Johnson if (as == NULL) { 13067eea693dSMark Johnson return; 13077eea693dSMark Johnson } 13087eea693dSMark Johnson 13097eea693dSMark Johnson /* get a copy of the original request */ 13107eea693dSMark Johnson req = &state->bt_map.um_outstanding_reqs[uid]; 13117eea693dSMark Johnson 13127eea693dSMark Johnson /* unmap the grefs for this request */ 13137eea693dSMark Johnson if ((req->operation != BLKIF_OP_WRITE_BARRIER) && 13147eea693dSMark Johnson (req->operation != BLKIF_OP_FLUSH_DISKCACHE) && 13157eea693dSMark Johnson (req->nr_segments != 0)) { 13167eea693dSMark Johnson uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, uid); 1317*dc32d872SJosef 'Jeff' Sipek AS_LOCK_ENTER(as, RW_READER); 13187eea693dSMark Johnson seg = as_findseg(as, state->bt_map.um_guest_pages, 0); 13197eea693dSMark Johnson if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) > 13207eea693dSMark Johnson (seg->s_base + seg->s_size))) { 1321*dc32d872SJosef 'Jeff' Sipek AS_LOCK_EXIT(as); 13227eea693dSMark Johnson xpvtap_rs_free(state->bt_map.um_rs, uid); 13237eea693dSMark Johnson return; 13247eea693dSMark Johnson } 13257eea693dSMark Johnson 13267eea693dSMark Johnson e = segmf_release_grefs(seg, uaddr, req->nr_segments); 13277eea693dSMark Johnson if (e != 0) { 13287eea693dSMark Johnson cmn_err(CE_WARN, "unable to release grefs"); 13297eea693dSMark Johnson } 13307eea693dSMark Johnson 1331*dc32d872SJosef 'Jeff' Sipek AS_LOCK_EXIT(as); 13327eea693dSMark Johnson } 13337eea693dSMark Johnson 13347eea693dSMark Johnson /* free up the user ring id */ 13357eea693dSMark Johnson xpvtap_rs_free(state->bt_map.um_rs, uid); 13367eea693dSMark Johnson } 13377eea693dSMark Johnson 13387eea693dSMark Johnson 13397eea693dSMark Johnson static int 13407eea693dSMark Johnson xpvtap_user_response_get(xpvtap_state_t *state, blkif_response_t *resp, 13417eea693dSMark Johnson uint_t *uid) 13427eea693dSMark Johnson { 13437eea693dSMark Johnson blkif_front_ring_t *uring; 13447eea693dSMark Johnson blkif_response_t *target; 13457eea693dSMark Johnson 13467eea693dSMark Johnson 13477eea693dSMark Johnson uring = &state->bt_user_ring.ur_ring; 13487eea693dSMark Johnson 13497eea693dSMark Johnson if (!RING_HAS_UNCONSUMED_RESPONSES(uring)) { 13507eea693dSMark Johnson return (B_FALSE); 13517eea693dSMark Johnson } 13527eea693dSMark Johnson 13537eea693dSMark Johnson target = NULL; 13547eea693dSMark Johnson target = RING_GET_RESPONSE(uring, uring->rsp_cons); 13557eea693dSMark Johnson if (target == NULL) { 13567eea693dSMark Johnson return (B_FALSE); 13577eea693dSMark Johnson } 13587eea693dSMark Johnson 13597eea693dSMark Johnson /* copy out the user app response */ 13607eea693dSMark Johnson bcopy(target, resp, sizeof (*resp)); 13617eea693dSMark Johnson uring->rsp_cons++; 13627eea693dSMark Johnson 13637eea693dSMark Johnson /* restore the quests id from the original request */ 13647eea693dSMark Johnson *uid = (uint_t)resp->id; 13657eea693dSMark Johnson resp->id = state->bt_map.um_outstanding_reqs[*uid].id; 13667eea693dSMark Johnson 13677eea693dSMark Johnson return (B_TRUE); 13687eea693dSMark Johnson } 13697eea693dSMark Johnson 13707eea693dSMark Johnson 13717eea693dSMark Johnson /* 13727eea693dSMark Johnson * xpvtap_user_app_stop() 13737eea693dSMark Johnson */ 13747eea693dSMark Johnson static void xpvtap_user_app_stop(caddr_t arg) 13757eea693dSMark Johnson { 13767eea693dSMark Johnson xpvtap_state_t *state; 13777eea693dSMark Johnson clock_t rc; 13787eea693dSMark Johnson 13797eea693dSMark Johnson state = (xpvtap_state_t *)arg; 13807eea693dSMark Johnson 13817eea693dSMark Johnson /* 13827eea693dSMark Johnson * Give the app 10 secs to exit. If it doesn't exit, it's not a serious 13837eea693dSMark Johnson * problem, we just won't auto-detach the driver. 13847eea693dSMark Johnson */ 13857eea693dSMark Johnson mutex_enter(&state->bt_open.bo_mutex); 13867eea693dSMark Johnson if (state->bt_open.bo_opened) { 1387d3d50737SRafael Vanoni rc = cv_reltimedwait(&state->bt_open.bo_exit_cv, 1388d3d50737SRafael Vanoni &state->bt_open.bo_mutex, drv_usectohz(10000000), 1389d3d50737SRafael Vanoni TR_CLOCK_TICK); 13907eea693dSMark Johnson if (rc <= 0) { 13917eea693dSMark Johnson cmn_err(CE_NOTE, "!user process still has driver open, " 13927eea693dSMark Johnson "deferring detach\n"); 13937eea693dSMark Johnson } 13947eea693dSMark Johnson } 13957eea693dSMark Johnson mutex_exit(&state->bt_open.bo_mutex); 13967eea693dSMark Johnson } 13977eea693dSMark Johnson 13987eea693dSMark Johnson 13997eea693dSMark Johnson /* 14007eea693dSMark Johnson * xpvtap_rs_init() 14017eea693dSMark Johnson * Initialize the resource structure. init() returns a handle to be used 14027eea693dSMark Johnson * for the rest of the resource functions. This code is written assuming 14037eea693dSMark Johnson * that min_val will be close to 0. Therefore, we will allocate the free 14047eea693dSMark Johnson * buffer only taking max_val into account. 14057eea693dSMark Johnson */ 14067eea693dSMark Johnson static void 14077eea693dSMark Johnson xpvtap_rs_init(uint_t min_val, uint_t max_val, xpvtap_rs_hdl_t *handle) 14087eea693dSMark Johnson { 14097eea693dSMark Johnson xpvtap_rs_t *rstruct; 14107eea693dSMark Johnson uint_t array_size; 14117eea693dSMark Johnson uint_t index; 14127eea693dSMark Johnson 14137eea693dSMark Johnson 14147eea693dSMark Johnson ASSERT(handle != NULL); 14157eea693dSMark Johnson ASSERT(min_val < max_val); 14167eea693dSMark Johnson 14177eea693dSMark Johnson /* alloc space for resource structure */ 14187eea693dSMark Johnson rstruct = kmem_alloc(sizeof (xpvtap_rs_t), KM_SLEEP); 14197eea693dSMark Johnson 14207eea693dSMark Johnson /* 14217eea693dSMark Johnson * Test to see if the max value is 64-bit aligned. If so, we don't need 14227eea693dSMark Johnson * to allocate an extra 64-bit word. alloc space for free buffer 14237eea693dSMark Johnson * (8 bytes per uint64_t). 14247eea693dSMark Johnson */ 14257eea693dSMark Johnson if ((max_val & 0x3F) == 0) { 14267eea693dSMark Johnson rstruct->rs_free_size = (max_val >> 6) * 8; 14277eea693dSMark Johnson } else { 14287eea693dSMark Johnson rstruct->rs_free_size = ((max_val >> 6) + 1) * 8; 14297eea693dSMark Johnson } 14307eea693dSMark Johnson rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP); 14317eea693dSMark Johnson 14327eea693dSMark Johnson /* Initialize resource structure */ 14337eea693dSMark Johnson rstruct->rs_min = min_val; 14347eea693dSMark Johnson rstruct->rs_last = min_val; 14357eea693dSMark Johnson rstruct->rs_max = max_val; 14367eea693dSMark Johnson mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, NULL); 14377eea693dSMark Johnson rstruct->rs_flushing = B_FALSE; 14387eea693dSMark Johnson 14397eea693dSMark Johnson /* Mark all resources as free */ 14407eea693dSMark Johnson array_size = rstruct->rs_free_size >> 3; 14417eea693dSMark Johnson for (index = 0; index < array_size; index++) { 14427eea693dSMark Johnson rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF; 14437eea693dSMark Johnson } 14447eea693dSMark Johnson 14457eea693dSMark Johnson /* setup handle which is returned from this function */ 14467eea693dSMark Johnson *handle = rstruct; 14477eea693dSMark Johnson } 14487eea693dSMark Johnson 14497eea693dSMark Johnson 14507eea693dSMark Johnson /* 14517eea693dSMark Johnson * xpvtap_rs_fini() 14527eea693dSMark Johnson * Frees up the space allocated in init(). Notice that a pointer to the 14537eea693dSMark Johnson * handle is used for the parameter. fini() will set the handle to NULL 14547eea693dSMark Johnson * before returning. 14557eea693dSMark Johnson */ 14567eea693dSMark Johnson static void 14577eea693dSMark Johnson xpvtap_rs_fini(xpvtap_rs_hdl_t *handle) 14587eea693dSMark Johnson { 14597eea693dSMark Johnson xpvtap_rs_t *rstruct; 14607eea693dSMark Johnson 14617eea693dSMark Johnson 14627eea693dSMark Johnson ASSERT(handle != NULL); 14637eea693dSMark Johnson 14647eea693dSMark Johnson rstruct = (xpvtap_rs_t *)*handle; 14657eea693dSMark Johnson 14667eea693dSMark Johnson mutex_destroy(&rstruct->rs_mutex); 14677eea693dSMark Johnson kmem_free(rstruct->rs_free, rstruct->rs_free_size); 14687eea693dSMark Johnson kmem_free(rstruct, sizeof (xpvtap_rs_t)); 14697eea693dSMark Johnson 14707eea693dSMark Johnson /* set handle to null. This helps catch bugs. */ 14717eea693dSMark Johnson *handle = NULL; 14727eea693dSMark Johnson } 14737eea693dSMark Johnson 14747eea693dSMark Johnson 14757eea693dSMark Johnson /* 14767eea693dSMark Johnson * xpvtap_rs_alloc() 14777eea693dSMark Johnson * alloc a resource. If alloc fails, we are out of resources. 14787eea693dSMark Johnson */ 14797eea693dSMark Johnson static int 14807eea693dSMark Johnson xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *resource) 14817eea693dSMark Johnson { 14827eea693dSMark Johnson xpvtap_rs_t *rstruct; 14837eea693dSMark Johnson uint_t array_idx; 14847eea693dSMark Johnson uint64_t free; 14857eea693dSMark Johnson uint_t index; 14867eea693dSMark Johnson uint_t last; 14877eea693dSMark Johnson uint_t min; 14887eea693dSMark Johnson uint_t max; 14897eea693dSMark Johnson 14907eea693dSMark Johnson 14917eea693dSMark Johnson ASSERT(handle != NULL); 14927eea693dSMark Johnson ASSERT(resource != NULL); 14937eea693dSMark Johnson 14947eea693dSMark Johnson rstruct = (xpvtap_rs_t *)handle; 14957eea693dSMark Johnson 14967eea693dSMark Johnson mutex_enter(&rstruct->rs_mutex); 14977eea693dSMark Johnson min = rstruct->rs_min; 14987eea693dSMark Johnson max = rstruct->rs_max; 14997eea693dSMark Johnson 15007eea693dSMark Johnson /* 15017eea693dSMark Johnson * Find a free resource. This will return out of the loop once it finds 15027eea693dSMark Johnson * a free resource. There are a total of 'max'-'min'+1 resources. 15037eea693dSMark Johnson * Performs a round robin allocation. 15047eea693dSMark Johnson */ 15057eea693dSMark Johnson for (index = min; index <= max; index++) { 15067eea693dSMark Johnson 15077eea693dSMark Johnson array_idx = rstruct->rs_last >> 6; 15087eea693dSMark Johnson free = rstruct->rs_free[array_idx]; 15097eea693dSMark Johnson last = rstruct->rs_last & 0x3F; 15107eea693dSMark Johnson 15117eea693dSMark Johnson /* if the next resource to check is free */ 15127eea693dSMark Johnson if ((free & ((uint64_t)1 << last)) != 0) { 15137eea693dSMark Johnson /* we are using this resource */ 15147eea693dSMark Johnson *resource = rstruct->rs_last; 15157eea693dSMark Johnson 15167eea693dSMark Johnson /* take it out of the free list */ 15177eea693dSMark Johnson rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last); 15187eea693dSMark Johnson 15197eea693dSMark Johnson /* 15207eea693dSMark Johnson * increment the last count so we start checking the 15217eea693dSMark Johnson * next resource on the next alloc(). Note the rollover 15227eea693dSMark Johnson * at 'max'+1. 15237eea693dSMark Johnson */ 15247eea693dSMark Johnson rstruct->rs_last++; 15257eea693dSMark Johnson if (rstruct->rs_last > max) { 15267eea693dSMark Johnson rstruct->rs_last = rstruct->rs_min; 15277eea693dSMark Johnson } 15287eea693dSMark Johnson 15297eea693dSMark Johnson /* unlock the resource structure */ 15307eea693dSMark Johnson mutex_exit(&rstruct->rs_mutex); 15317eea693dSMark Johnson 15327eea693dSMark Johnson return (DDI_SUCCESS); 15337eea693dSMark Johnson } 15347eea693dSMark Johnson 15357eea693dSMark Johnson /* 15367eea693dSMark Johnson * This resource is not free, lets go to the next one. Note the 15377eea693dSMark Johnson * rollover at 'max'. 15387eea693dSMark Johnson */ 15397eea693dSMark Johnson rstruct->rs_last++; 15407eea693dSMark Johnson if (rstruct->rs_last > max) { 15417eea693dSMark Johnson rstruct->rs_last = rstruct->rs_min; 15427eea693dSMark Johnson } 15437eea693dSMark Johnson } 15447eea693dSMark Johnson 15457eea693dSMark Johnson mutex_exit(&rstruct->rs_mutex); 15467eea693dSMark Johnson 15477eea693dSMark Johnson return (DDI_FAILURE); 15487eea693dSMark Johnson } 15497eea693dSMark Johnson 15507eea693dSMark Johnson 15517eea693dSMark Johnson /* 15527eea693dSMark Johnson * xpvtap_rs_free() 15537eea693dSMark Johnson * Free the previously alloc'd resource. Once a resource has been free'd, 15547eea693dSMark Johnson * it can be used again when alloc is called. 15557eea693dSMark Johnson */ 15567eea693dSMark Johnson static void 15577eea693dSMark Johnson xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t resource) 15587eea693dSMark Johnson { 15597eea693dSMark Johnson xpvtap_rs_t *rstruct; 15607eea693dSMark Johnson uint_t array_idx; 15617eea693dSMark Johnson uint_t offset; 15627eea693dSMark Johnson 15637eea693dSMark Johnson 15647eea693dSMark Johnson ASSERT(handle != NULL); 15657eea693dSMark Johnson 15667eea693dSMark Johnson rstruct = (xpvtap_rs_t *)handle; 15677eea693dSMark Johnson ASSERT(resource >= rstruct->rs_min); 15687eea693dSMark Johnson ASSERT(resource <= rstruct->rs_max); 15697eea693dSMark Johnson 15707eea693dSMark Johnson if (!rstruct->rs_flushing) { 15717eea693dSMark Johnson mutex_enter(&rstruct->rs_mutex); 15727eea693dSMark Johnson } 15737eea693dSMark Johnson 15747eea693dSMark Johnson /* Put the resource back in the free list */ 15757eea693dSMark Johnson array_idx = resource >> 6; 15767eea693dSMark Johnson offset = resource & 0x3F; 15777eea693dSMark Johnson rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset); 15787eea693dSMark Johnson 15797eea693dSMark Johnson if (!rstruct->rs_flushing) { 15807eea693dSMark Johnson mutex_exit(&rstruct->rs_mutex); 15817eea693dSMark Johnson } 15827eea693dSMark Johnson } 15837eea693dSMark Johnson 15847eea693dSMark Johnson 15857eea693dSMark Johnson /* 15867eea693dSMark Johnson * xpvtap_rs_flush() 15877eea693dSMark Johnson */ 15887eea693dSMark Johnson static void 15897eea693dSMark Johnson xpvtap_rs_flush(xpvtap_rs_hdl_t handle, xpvtap_rs_cleanup_t callback, 15907eea693dSMark Johnson void *arg) 15917eea693dSMark Johnson { 15927eea693dSMark Johnson xpvtap_rs_t *rstruct; 15937eea693dSMark Johnson uint_t array_idx; 15947eea693dSMark Johnson uint64_t free; 15957eea693dSMark Johnson uint_t index; 15967eea693dSMark Johnson uint_t last; 15977eea693dSMark Johnson uint_t min; 15987eea693dSMark Johnson uint_t max; 15997eea693dSMark Johnson 16007eea693dSMark Johnson 16017eea693dSMark Johnson ASSERT(handle != NULL); 16027eea693dSMark Johnson 16037eea693dSMark Johnson rstruct = (xpvtap_rs_t *)handle; 16047eea693dSMark Johnson 16057eea693dSMark Johnson mutex_enter(&rstruct->rs_mutex); 16067eea693dSMark Johnson min = rstruct->rs_min; 16077eea693dSMark Johnson max = rstruct->rs_max; 16087eea693dSMark Johnson 16097eea693dSMark Johnson rstruct->rs_flushing = B_TRUE; 16107eea693dSMark Johnson 16117eea693dSMark Johnson /* 16127eea693dSMark Johnson * for all resources not free, call the callback routine to clean it 16137eea693dSMark Johnson * up. 16147eea693dSMark Johnson */ 16157eea693dSMark Johnson for (index = min; index <= max; index++) { 16167eea693dSMark Johnson 16177eea693dSMark Johnson array_idx = rstruct->rs_last >> 6; 16187eea693dSMark Johnson free = rstruct->rs_free[array_idx]; 16197eea693dSMark Johnson last = rstruct->rs_last & 0x3F; 16207eea693dSMark Johnson 16217eea693dSMark Johnson /* if the next resource to check is not free */ 16227eea693dSMark Johnson if ((free & ((uint64_t)1 << last)) == 0) { 16237eea693dSMark Johnson /* call the callback to cleanup */ 16247eea693dSMark Johnson (*callback)(arg, rstruct->rs_last); 16257eea693dSMark Johnson 16267eea693dSMark Johnson /* put it back in the free list */ 16277eea693dSMark Johnson rstruct->rs_free[array_idx] |= ((uint64_t)1 << last); 16287eea693dSMark Johnson } 16297eea693dSMark Johnson 16307eea693dSMark Johnson /* go to the next one. Note the rollover at 'max' */ 16317eea693dSMark Johnson rstruct->rs_last++; 16327eea693dSMark Johnson if (rstruct->rs_last > max) { 16337eea693dSMark Johnson rstruct->rs_last = rstruct->rs_min; 16347eea693dSMark Johnson } 16357eea693dSMark Johnson } 16367eea693dSMark Johnson 16377eea693dSMark Johnson mutex_exit(&rstruct->rs_mutex); 16387eea693dSMark Johnson } 1639