/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Device Strategy */ #include <sys/dktp/cm.h> #include <sys/kstat.h> #include <sys/dktp/quetypes.h> #include <sys/dktp/queue.h> #include <sys/dktp/tgcom.h> #include <sys/dktp/fctypes.h> #include <sys/dktp/flowctrl.h> #include <sys/param.h> #include <vm/page.h> #include <sys/modctl.h> /* * Object Management */ static struct buf *qmerge_nextbp(struct que_data *qfp, struct buf *bp_merge, int *can_merge); static struct modlmisc modlmisc = { &mod_miscops, /* Type of module */ "Device Strategy Objects" }; static struct modlinkage modlinkage = { MODREV_1, &modlmisc, NULL }; int _init(void) { return (mod_install(&modlinkage)); } int _fini(void) { return (mod_remove(&modlinkage)); } int _info(struct modinfo *modinfop) { return (mod_info(&modlinkage, modinfop)); } /* * Common Flow Control functions */ /* * Local static data */ #ifdef FLC_DEBUG #define DENT 0x0001 #define DERR 0x0002 #define DIO 0x0004 static int flc_debug = DENT|DERR|DIO; #include <sys/thread.h> static int flc_malloc_intr = 0; #endif /* FLC_DEBUG */ static int flc_kstat = 1; static struct flc_obj *fc_create(struct flc_objops *fcopsp); static int fc_init(opaque_t queuep, opaque_t tgcom_objp, opaque_t que_objp, void *lkarg); static int fc_free(struct flc_obj *flcobjp); static int fc_start_kstat(opaque_t queuep, char *devtype, int instance); static int fc_stop_kstat(opaque_t queuep); static struct flc_obj * fc_create(struct flc_objops *fcopsp) { struct flc_obj *flcobjp; struct fc_data *fcdp; flcobjp = kmem_zalloc((sizeof (*flcobjp) + sizeof (*fcdp)), KM_NOSLEEP); if (!flcobjp) return (NULL); fcdp = (struct fc_data *)(flcobjp+1); flcobjp->flc_data = (opaque_t)fcdp; flcobjp->flc_ops = fcopsp; return ((opaque_t)flcobjp); } static int dmult_maxcnt = DMULT_MAXCNT; static int fc_init(opaque_t queuep, opaque_t tgcom_objp, opaque_t que_objp, void *lkarg) { struct fc_data *fcdp = (struct fc_data *)queuep; mutex_init(&fcdp->ds_mutex, NULL, MUTEX_DRIVER, lkarg); fcdp->ds_queobjp = que_objp; fcdp->ds_tgcomobjp = tgcom_objp; fcdp->ds_waitcnt = dmult_maxcnt; QUE_INIT(que_objp, lkarg); TGCOM_INIT(tgcom_objp); return (DDI_SUCCESS); } static int fc_free(struct flc_obj *flcobjp) { struct fc_data *fcdp; fcdp = (struct fc_data *)flcobjp->flc_data; if (fcdp->ds_queobjp) QUE_FREE(fcdp->ds_queobjp); if (fcdp->ds_tgcomobjp) { TGCOM_FREE(fcdp->ds_tgcomobjp); mutex_destroy(&fcdp->ds_mutex); } kmem_free(flcobjp, (sizeof (*flcobjp) + sizeof (*fcdp))); return (0); } /*ARGSUSED*/ static int fc_start_kstat(opaque_t queuep, char *devtype, int instance) { struct fc_data *fcdp = (struct fc_data *)queuep; if (!flc_kstat) return (0); if (!fcdp->ds_kstat) { if (fcdp->ds_kstat = kstat_create("cmdk", instance, NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) { kstat_install(fcdp->ds_kstat); } } return (0); } static int fc_stop_kstat(opaque_t queuep) { struct fc_data *fcdp = (struct fc_data *)queuep; if (fcdp->ds_kstat) { kstat_delete(fcdp->ds_kstat); fcdp->ds_kstat = NULL; } return (0); } /* * Single Command per Device */ /* * Local Function Prototypes */ static int dsngl_restart(); static int dsngl_enque(opaque_t, struct buf *); static int dsngl_deque(opaque_t, struct buf *); struct flc_objops dsngl_ops = { fc_init, fc_free, dsngl_enque, dsngl_deque, fc_start_kstat, fc_stop_kstat, 0, 0 }; struct flc_obj * dsngl_create() { return (fc_create((struct flc_objops *)&dsngl_ops)); } static int dsngl_enque(opaque_t queuep, struct buf *in_bp) { struct fc_data *dsnglp = (struct fc_data *)queuep; opaque_t tgcom_objp; opaque_t que_objp; que_objp = dsnglp->ds_queobjp; tgcom_objp = dsnglp->ds_tgcomobjp; if (!in_bp) return (0); mutex_enter(&dsnglp->ds_mutex); if (dsnglp->ds_bp || dsnglp->ds_outcnt) { QUE_ADD(que_objp, in_bp); if (dsnglp->ds_kstat) { kstat_waitq_enter(KSTAT_IO_PTR(dsnglp->ds_kstat)); } mutex_exit(&dsnglp->ds_mutex); return (0); } if (dsnglp->ds_kstat) { kstat_waitq_enter(KSTAT_IO_PTR(dsnglp->ds_kstat)); } if (TGCOM_PKT(tgcom_objp, in_bp, dsngl_restart, (caddr_t)dsnglp) != DDI_SUCCESS) { dsnglp->ds_bp = in_bp; mutex_exit(&dsnglp->ds_mutex); return (0); } dsnglp->ds_outcnt++; if (dsnglp->ds_kstat) kstat_waitq_to_runq(KSTAT_IO_PTR(dsnglp->ds_kstat)); mutex_exit(&dsnglp->ds_mutex); TGCOM_TRANSPORT(tgcom_objp, in_bp); return (0); } static int dsngl_deque(opaque_t queuep, struct buf *in_bp) { struct fc_data *dsnglp = (struct fc_data *)queuep; opaque_t tgcom_objp; opaque_t que_objp; struct buf *bp; que_objp = dsnglp->ds_queobjp; tgcom_objp = dsnglp->ds_tgcomobjp; mutex_enter(&dsnglp->ds_mutex); if (in_bp) { dsnglp->ds_outcnt--; if (dsnglp->ds_kstat) { if (in_bp->b_flags & B_READ) { KSTAT_IO_PTR(dsnglp->ds_kstat)->reads++; KSTAT_IO_PTR(dsnglp->ds_kstat)->nread += (in_bp->b_bcount - in_bp->b_resid); } else { KSTAT_IO_PTR(dsnglp->ds_kstat)->writes++; KSTAT_IO_PTR(dsnglp->ds_kstat)->nwritten += (in_bp->b_bcount - in_bp->b_resid); } kstat_runq_exit(KSTAT_IO_PTR(dsnglp->ds_kstat)); } } for (;;) { if (!dsnglp->ds_bp) dsnglp->ds_bp = QUE_DEL(que_objp); if (!dsnglp->ds_bp || (TGCOM_PKT(tgcom_objp, dsnglp->ds_bp, dsngl_restart, (caddr_t)dsnglp) != DDI_SUCCESS) || dsnglp->ds_outcnt) { mutex_exit(&dsnglp->ds_mutex); return (0); } dsnglp->ds_outcnt++; bp = dsnglp->ds_bp; dsnglp->ds_bp = QUE_DEL(que_objp); if (dsnglp->ds_kstat) kstat_waitq_to_runq(KSTAT_IO_PTR(dsnglp->ds_kstat)); mutex_exit(&dsnglp->ds_mutex); TGCOM_TRANSPORT(tgcom_objp, bp); if (!mutex_tryenter(&dsnglp->ds_mutex)) return (0); } } static int dsngl_restart(struct fc_data *dsnglp) { (void) dsngl_deque(dsnglp, NULL); return (-1); } /* * Multiple Commands per Device */ /* * Local Function Prototypes */ static int dmult_restart(); static int dmult_enque(opaque_t, struct buf *); static int dmult_deque(opaque_t, struct buf *); struct flc_objops dmult_ops = { fc_init, fc_free, dmult_enque, dmult_deque, fc_start_kstat, fc_stop_kstat, 0, 0 }; struct flc_obj * dmult_create() { return (fc_create((struct flc_objops *)&dmult_ops)); } /* * Some of the object management functions QUE_ADD() and QUE_DEL() * do not accquire lock. * They depend on dmult_enque(), dmult_deque() to do all locking. * If this changes we have to grab locks in qmerge_add() and qmerge_del(). */ static int dmult_enque(opaque_t queuep, struct buf *in_bp) { struct fc_data *dmultp = (struct fc_data *)queuep; opaque_t tgcom_objp; opaque_t que_objp; que_objp = dmultp->ds_queobjp; tgcom_objp = dmultp->ds_tgcomobjp; if (!in_bp) return (0); mutex_enter(&dmultp->ds_mutex); if ((dmultp->ds_outcnt >= dmultp->ds_waitcnt) || dmultp->ds_bp) { QUE_ADD(que_objp, in_bp); if (dmultp->ds_kstat) { kstat_waitq_enter(KSTAT_IO_PTR(dmultp->ds_kstat)); } mutex_exit(&dmultp->ds_mutex); return (0); } if (dmultp->ds_kstat) { kstat_waitq_enter(KSTAT_IO_PTR(dmultp->ds_kstat)); } if (TGCOM_PKT(tgcom_objp, in_bp, dmult_restart, (caddr_t)dmultp) != DDI_SUCCESS) { dmultp->ds_bp = in_bp; mutex_exit(&dmultp->ds_mutex); return (0); } dmultp->ds_outcnt++; if (dmultp->ds_kstat) kstat_waitq_to_runq(KSTAT_IO_PTR(dmultp->ds_kstat)); mutex_exit(&dmultp->ds_mutex); TGCOM_TRANSPORT(tgcom_objp, in_bp); return (0); } static int dmult_deque(opaque_t queuep, struct buf *in_bp) { struct fc_data *dmultp = (struct fc_data *)queuep; opaque_t tgcom_objp; opaque_t que_objp; struct buf *bp; que_objp = dmultp->ds_queobjp; tgcom_objp = dmultp->ds_tgcomobjp; mutex_enter(&dmultp->ds_mutex); if (in_bp) { dmultp->ds_outcnt--; if (dmultp->ds_kstat) { if (in_bp->b_flags & B_READ) { KSTAT_IO_PTR(dmultp->ds_kstat)->reads++; KSTAT_IO_PTR(dmultp->ds_kstat)->nread += (in_bp->b_bcount - in_bp->b_resid); } else { KSTAT_IO_PTR(dmultp->ds_kstat)->writes++; KSTAT_IO_PTR(dmultp->ds_kstat)->nwritten += (in_bp->b_bcount - in_bp->b_resid); } kstat_runq_exit(KSTAT_IO_PTR(dmultp->ds_kstat)); } } for (;;) { #ifdef FLC_DEBUG if ((curthread->t_intr) && (!dmultp->ds_bp) && (!dmultp->ds_outcnt)) flc_malloc_intr++; #endif if (!dmultp->ds_bp) dmultp->ds_bp = QUE_DEL(que_objp); if (!dmultp->ds_bp || (TGCOM_PKT(tgcom_objp, dmultp->ds_bp, dmult_restart, (caddr_t)dmultp) != DDI_SUCCESS) || (dmultp->ds_outcnt >= dmultp->ds_waitcnt)) { mutex_exit(&dmultp->ds_mutex); return (0); } dmultp->ds_outcnt++; bp = dmultp->ds_bp; dmultp->ds_bp = QUE_DEL(que_objp); if (dmultp->ds_kstat) kstat_waitq_to_runq(KSTAT_IO_PTR(dmultp->ds_kstat)); mutex_exit(&dmultp->ds_mutex); TGCOM_TRANSPORT(tgcom_objp, bp); if (!mutex_tryenter(&dmultp->ds_mutex)) return (0); } } static int dmult_restart(struct fc_data *dmultp) { (void) dmult_deque(dmultp, NULL); return (-1); } /* * Duplexed Commands per Device: Read Queue and Write Queue */ /* * Local Function Prototypes */ static int duplx_restart(); static int duplx_init(opaque_t queuep, opaque_t tgcom_objp, opaque_t que_objp, void *lkarg); static int duplx_free(struct flc_obj *flcobjp); static int duplx_enque(opaque_t queuep, struct buf *bp); static int duplx_deque(opaque_t queuep, struct buf *bp); struct flc_objops duplx_ops = { duplx_init, duplx_free, duplx_enque, duplx_deque, fc_start_kstat, fc_stop_kstat, 0, 0 }; struct flc_obj * duplx_create() { struct flc_obj *flcobjp; struct duplx_data *fcdp; flcobjp = kmem_zalloc((sizeof (*flcobjp) + sizeof (*fcdp)), KM_NOSLEEP); if (!flcobjp) return (NULL); fcdp = (struct duplx_data *)(flcobjp+1); flcobjp->flc_data = (opaque_t)fcdp; flcobjp->flc_ops = &duplx_ops; fcdp->ds_writeq.fc_qobjp = qfifo_create(); if (!(fcdp->ds_writeq.fc_qobjp = qfifo_create())) { kmem_free(flcobjp, (sizeof (*flcobjp) + sizeof (*fcdp))); return (NULL); } return (flcobjp); } static int duplx_free(struct flc_obj *flcobjp) { struct duplx_data *fcdp; fcdp = (struct duplx_data *)flcobjp->flc_data; if (fcdp->ds_writeq.fc_qobjp) { QUE_FREE(fcdp->ds_writeq.fc_qobjp); } if (fcdp->ds_readq.fc_qobjp) QUE_FREE(fcdp->ds_readq.fc_qobjp); if (fcdp->ds_tgcomobjp) { TGCOM_FREE(fcdp->ds_tgcomobjp); mutex_destroy(&fcdp->ds_mutex); } kmem_free(flcobjp, (sizeof (*flcobjp) + sizeof (*fcdp))); return (0); } static int duplx_init(opaque_t queuep, opaque_t tgcom_objp, opaque_t que_objp, void *lkarg) { struct duplx_data *fcdp = (struct duplx_data *)queuep; fcdp->ds_tgcomobjp = tgcom_objp; fcdp->ds_readq.fc_qobjp = que_objp; QUE_INIT(que_objp, lkarg); QUE_INIT(fcdp->ds_writeq.fc_qobjp, lkarg); TGCOM_INIT(tgcom_objp); mutex_init(&fcdp->ds_mutex, NULL, MUTEX_DRIVER, lkarg); fcdp->ds_writeq.fc_maxcnt = DUPLX_MAXCNT; fcdp->ds_readq.fc_maxcnt = DUPLX_MAXCNT; /* queues point to each other for round robin */ fcdp->ds_readq.next = &fcdp->ds_writeq; fcdp->ds_writeq.next = &fcdp->ds_readq; return (DDI_SUCCESS); } static int duplx_enque(opaque_t queuep, struct buf *in_bp) { struct duplx_data *duplxp = (struct duplx_data *)queuep; opaque_t tgcom_objp; struct fc_que *activeq; struct buf *bp; mutex_enter(&duplxp->ds_mutex); if (in_bp) { if (duplxp->ds_kstat) { kstat_waitq_enter(KSTAT_IO_PTR(duplxp->ds_kstat)); } if (in_bp->b_flags & B_READ) activeq = &duplxp->ds_readq; else activeq = &duplxp->ds_writeq; QUE_ADD(activeq->fc_qobjp, in_bp); } else { activeq = &duplxp->ds_readq; } tgcom_objp = duplxp->ds_tgcomobjp; for (;;) { if (!activeq->fc_bp) activeq->fc_bp = QUE_DEL(activeq->fc_qobjp); if (!activeq->fc_bp || (TGCOM_PKT(tgcom_objp, activeq->fc_bp, duplx_restart, (caddr_t)duplxp) != DDI_SUCCESS) || (activeq->fc_outcnt >= activeq->fc_maxcnt)) { /* switch read/write queues */ activeq = activeq->next; if (!activeq->fc_bp) activeq->fc_bp = QUE_DEL(activeq->fc_qobjp); if (!activeq->fc_bp || (TGCOM_PKT(tgcom_objp, activeq->fc_bp, duplx_restart, (caddr_t)duplxp) != DDI_SUCCESS) || (activeq->fc_outcnt >= activeq->fc_maxcnt)) { mutex_exit(&duplxp->ds_mutex); return (0); } } activeq->fc_outcnt++; bp = activeq->fc_bp; activeq->fc_bp = NULL; if (duplxp->ds_kstat) kstat_waitq_to_runq(KSTAT_IO_PTR(duplxp->ds_kstat)); mutex_exit(&duplxp->ds_mutex); TGCOM_TRANSPORT(tgcom_objp, bp); if (!mutex_tryenter(&duplxp->ds_mutex)) return (0); activeq = activeq->next; } } static int duplx_deque(opaque_t queuep, struct buf *in_bp) { struct duplx_data *duplxp = (struct duplx_data *)queuep; opaque_t tgcom_objp; struct fc_que *activeq; struct buf *bp; mutex_enter(&duplxp->ds_mutex); tgcom_objp = duplxp->ds_tgcomobjp; if (in_bp->b_flags & B_READ) activeq = &duplxp->ds_readq; else activeq = &duplxp->ds_writeq; activeq->fc_outcnt--; if (duplxp->ds_kstat) { if (in_bp->b_flags & B_READ) { KSTAT_IO_PTR(duplxp->ds_kstat)->reads++; KSTAT_IO_PTR(duplxp->ds_kstat)->nread += (in_bp->b_bcount - in_bp->b_resid); } else { KSTAT_IO_PTR(duplxp->ds_kstat)->writes++; KSTAT_IO_PTR(duplxp->ds_kstat)->nwritten += (in_bp->b_bcount - in_bp->b_resid); } kstat_runq_exit(KSTAT_IO_PTR(duplxp->ds_kstat)); } for (;;) { /* if needed, try to pull request off a queue */ if (!activeq->fc_bp) activeq->fc_bp = QUE_DEL(activeq->fc_qobjp); if (!activeq->fc_bp || (TGCOM_PKT(tgcom_objp, activeq->fc_bp, duplx_restart, (caddr_t)duplxp) != DDI_SUCCESS) || (activeq->fc_outcnt >= activeq->fc_maxcnt)) { activeq = activeq->next; if (!activeq->fc_bp) activeq->fc_bp = QUE_DEL(activeq->fc_qobjp); if (!activeq->fc_bp || (TGCOM_PKT(tgcom_objp, activeq->fc_bp, duplx_restart, (caddr_t)duplxp) != DDI_SUCCESS) || (activeq->fc_outcnt >= activeq->fc_maxcnt)) { mutex_exit(&duplxp->ds_mutex); return (0); } } activeq->fc_outcnt++; bp = activeq->fc_bp; activeq->fc_bp = NULL; if (duplxp->ds_kstat) kstat_waitq_to_runq(KSTAT_IO_PTR(duplxp->ds_kstat)); mutex_exit(&duplxp->ds_mutex); TGCOM_TRANSPORT(tgcom_objp, bp); if (!mutex_tryenter(&duplxp->ds_mutex)) return (0); activeq = activeq->next; } } static int duplx_restart(struct duplx_data *duplxp) { (void) duplx_enque(duplxp, NULL); return (-1); } /* * Tagged queueing flow control */ /* * Local Function Prototypes */ struct flc_objops adapt_ops = { fc_init, fc_free, dmult_enque, dmult_deque, fc_start_kstat, fc_stop_kstat, 0, 0 }; struct flc_obj * adapt_create() { return (fc_create((struct flc_objops *)&adapt_ops)); } /* * Common Queue functions */ /* * Local static data */ #ifdef Q_DEBUG #define DENT 0x0001 #define DERR 0x0002 #define DIO 0x0004 static int que_debug = DENT|DERR|DIO; #endif /* Q_DEBUG */ /* * Local Function Prototypes */ static struct que_obj *que_create(struct que_objops *qopsp); static int que_init(struct que_data *qfp, void *lkarg); static int que_free(struct que_obj *queobjp); static struct buf *que_del(struct que_data *qfp); static struct que_obj * que_create(struct que_objops *qopsp) { struct que_data *qfp; struct que_obj *queobjp; queobjp = kmem_zalloc((sizeof (*queobjp) + sizeof (*qfp)), KM_NOSLEEP); if (!queobjp) return (NULL); queobjp->que_ops = qopsp; qfp = (struct que_data *)(queobjp+1); queobjp->que_data = (opaque_t)qfp; return ((opaque_t)queobjp); } static int que_init(struct que_data *qfp, void *lkarg) { mutex_init(&qfp->q_mutex, NULL, MUTEX_DRIVER, lkarg); return (DDI_SUCCESS); } static int que_free(struct que_obj *queobjp) { struct que_data *qfp; qfp = (struct que_data *)queobjp->que_data; mutex_destroy(&qfp->q_mutex); kmem_free(queobjp, (sizeof (*queobjp) + sizeof (struct que_data))); return (0); } static struct buf * que_del(struct que_data *qfp) { struct buf *bp; bp = qfp->q_tab.b_actf; if (bp) { qfp->q_tab.b_actf = bp->av_forw; if (!qfp->q_tab.b_actf) qfp->q_tab.b_actl = NULL; bp->av_forw = 0; } return (bp); } /* * Qmerge * Local Function Prototypes */ static int qmerge_add(), qmerge_free(); static struct buf *qmerge_del(struct que_data *qfp); struct que_objops qmerge_ops = { que_init, qmerge_free, qmerge_add, qmerge_del, 0, 0 }; /* fields in diskhd */ #define hd_cnt b_back #define hd_private b_forw #define hd_flags b_flags #define hd_sync_next av_forw #define hd_async_next av_back #define hd_sync2async sync_async_ratio #define QNEAR_FORWARD 0x01 #define QNEAR_BACKWARD 0x02 #define QNEAR_ASYNCONLY 0x04 #define QNEAR_ASYNCALSO 0x08 #define DBLK(bp) ((unsigned long)(bp)->b_private) #define BP_LT_BP(a, b) (DBLK(a) < DBLK(b)) #define BP_GT_BP(a, b) (DBLK(a) > DBLK(b)) #define BP_LT_HD(a, b) (DBLK(a) < (unsigned long)((b)->hd_private)) #define BP_GT_HD(a, b) (DBLK(a) > (unsigned long)((b)->hd_private)) #define QNEAR_ASYNC (QNEAR_ASYNCONLY|QNEAR_ASYNCALSO) #define SYNC2ASYNC(a) ((a)->q_tab.hd_cnt) /* * qmerge implements a two priority queue, the low priority queue holding ASYNC * write requests, while the rest are queued in the high priority sync queue. * Requests on the async queue would be merged if possible. * By default qmerge2wayscan is 1, indicating an elevator algorithm. When * this variable is set to zero, it has the following side effects. * 1. We assume fairness is the number one issue. * 2. The next request to be picked indicates current head position. * * qmerge_sync2async indicates the ratio of scans of high prioriy * sync queue to low priority async queue. * * When qmerge variables have the following values it defaults to qsort * * qmerge1pri = 1, qmerge2wayscan = 0, qmerge_max_merge = 0 * */ static int qmerge_max_merge = 128 * 1024; static intptr_t qmerge_sync2async = 4; static int qmerge2wayscan = 1; static int qmerge1pri = 0; static int qmerge_merge = 0; /* * Local static data */ struct que_obj * qmerge_create() { struct que_data *qfp; struct que_obj *queobjp; queobjp = kmem_zalloc((sizeof (*queobjp) + sizeof (*qfp)), KM_NOSLEEP); if (!queobjp) return (NULL); queobjp->que_ops = &qmerge_ops; qfp = (struct que_data *)(queobjp+1); qfp->q_tab.hd_private = qfp->q_tab.hd_private = 0; qfp->q_tab.hd_sync_next = qfp->q_tab.hd_async_next = NULL; qfp->q_tab.hd_cnt = (void *)qmerge_sync2async; queobjp->que_data = (opaque_t)qfp; return ((opaque_t)queobjp); } static int qmerge_free(struct que_obj *queobjp) { struct que_data *qfp; qfp = (struct que_data *)queobjp->que_data; mutex_destroy(&qfp->q_mutex); kmem_free(queobjp, (sizeof (*queobjp) + sizeof (*qfp))); return (0); } static int qmerge_can_merge(bp1, bp2) struct buf *bp1, *bp2; { const int paw_flags = B_PAGEIO | B_ASYNC | B_WRITE; if ((bp1->b_un.b_addr != 0) || (bp2->b_un.b_addr != 0) || ((bp1->b_flags & (paw_flags | B_REMAPPED)) != paw_flags) || ((bp2->b_flags & (paw_flags | B_REMAPPED)) != paw_flags) || (bp1->b_bcount & PAGEOFFSET) || (bp2->b_bcount & PAGEOFFSET) || (bp1->b_bcount + bp2->b_bcount > qmerge_max_merge)) return (0); if ((DBLK(bp2) + bp2->b_bcount / DEV_BSIZE == DBLK(bp1)) || (DBLK(bp1) + bp1->b_bcount / DEV_BSIZE == DBLK(bp2))) return (1); else return (0); } static void qmerge_mergesetup(bp_merge, bp) struct buf *bp_merge, *bp; { struct buf *bp1; struct page *pp, *pp_merge, *pp_merge_prev; int forward; qmerge_merge++; forward = DBLK(bp_merge) < DBLK(bp); bp_merge->b_bcount += bp->b_bcount; pp = bp->b_pages; pp_merge = bp_merge->b_pages; pp_merge_prev = pp_merge->p_prev; pp_merge->p_prev->p_next = pp; pp_merge->p_prev = pp->p_prev; pp->p_prev->p_next = pp_merge; pp->p_prev = pp_merge_prev; bp1 = bp_merge->b_forw; bp1->av_back->av_forw = bp; bp->av_back = bp1->av_back; bp1->av_back = bp; bp->av_forw = bp1; if (!forward) { bp_merge->b_forw = bp; bp_merge->b_pages = pp; bp_merge->b_private = bp->b_private; } } static void que_insert(struct que_data *qfp, struct buf *bp) { struct buf *bp1, *bp_start, *lowest_bp, *highest_bp; uintptr_t highest_blk, lowest_blk; struct buf **async_bpp, **sync_bpp, **bpp; struct diskhd *dp = &qfp->q_tab; sync_bpp = &dp->hd_sync_next; async_bpp = &dp->hd_async_next; /* * The ioctl used by the format utility requires that bp->av_back be * preserved. */ if (bp->av_back) bp->b_error = (intptr_t)bp->av_back; if (!qmerge1pri && ((bp->b_flags & (B_ASYNC|B_READ|B_FREE)) == B_ASYNC)) { bpp = &dp->hd_async_next; } else { bpp = &dp->hd_sync_next; } if ((bp1 = *bpp) == NULL) { *bpp = bp; bp->av_forw = bp->av_back = bp; if ((bpp == async_bpp) && (*sync_bpp == NULL)) { dp->hd_flags |= QNEAR_ASYNCONLY; } else if (bpp == sync_bpp) { dp->hd_flags &= ~QNEAR_ASYNCONLY; if (*async_bpp) { dp->hd_flags |= QNEAR_ASYNCALSO; } } return; } bp_start = bp1; if (DBLK(bp) < DBLK(bp1)) { lowest_blk = DBLK(bp1); lowest_bp = bp1; do { if (DBLK(bp) > DBLK(bp1)) { bp->av_forw = bp1->av_forw; bp1->av_forw->av_back = bp; bp1->av_forw = bp; bp->av_back = bp1; if (((bpp == async_bpp) && (dp->hd_flags & QNEAR_ASYNC)) || (bpp == sync_bpp)) { if (!(dp->hd_flags & QNEAR_BACKWARD) && BP_GT_HD(bp, dp)) { *bpp = bp; } } return; } else if (DBLK(bp1) < lowest_blk) { lowest_bp = bp1; lowest_blk = DBLK(bp1); } } while ((DBLK(bp1->av_back) < DBLK(bp1)) && ((bp1 = bp1->av_back) != bp_start)); bp->av_forw = lowest_bp; lowest_bp->av_back->av_forw = bp; bp->av_back = lowest_bp->av_back; lowest_bp->av_back = bp; if ((bpp == async_bpp) && !(dp->hd_flags & QNEAR_ASYNC)) { *bpp = bp; } else if (!(dp->hd_flags & QNEAR_BACKWARD) && BP_GT_HD(bp, dp)) { *bpp = bp; } } else { highest_blk = DBLK(bp1); highest_bp = bp1; do { if (DBLK(bp) < DBLK(bp1)) { bp->av_forw = bp1; bp1->av_back->av_forw = bp; bp->av_back = bp1->av_back; bp1->av_back = bp; if (((bpp == async_bpp) && (dp->hd_flags & QNEAR_ASYNC)) || (bpp == sync_bpp)) { if ((dp->hd_flags & QNEAR_BACKWARD) && BP_LT_HD(bp, dp)) { *bpp = bp; } } return; } else if (DBLK(bp1) > highest_blk) { highest_bp = bp1; highest_blk = DBLK(bp1); } } while ((DBLK(bp1->av_forw) > DBLK(bp1)) && ((bp1 = bp1->av_forw) != bp_start)); bp->av_back = highest_bp; highest_bp->av_forw->av_back = bp; bp->av_forw = highest_bp->av_forw; highest_bp->av_forw = bp; if (((bpp == sync_bpp) || ((bpp == async_bpp) && (dp->hd_flags & QNEAR_ASYNC))) && (dp->hd_flags & QNEAR_BACKWARD) && (BP_LT_HD(bp, dp))) *bpp = bp; } } /* * dmult_enque() holds dmultp->ds_mutex lock, so we dont grab * lock here. If dmult_enque() changes we will have to visit * this function again */ static int qmerge_add(struct que_data *qfp, struct buf *bp) { que_insert(qfp, bp); return (++qfp->q_cnt); } static int qmerge_iodone(struct buf *bp) { struct buf *bp1; struct page *pp, *pp1, *tmp_pp; if (bp->b_flags & B_REMAPPED) bp_mapout(bp); bp1 = bp->b_forw; do { bp->b_forw = bp1->av_forw; bp1->av_forw->av_back = bp1->av_back; bp1->av_back->av_forw = bp1->av_forw; pp = (page_t *)bp1->b_pages; pp1 = bp->b_forw->b_pages; tmp_pp = pp->p_prev; pp->p_prev = pp1->p_prev; pp->p_prev->p_next = pp; pp1->p_prev = tmp_pp; pp1->p_prev->p_next = pp1; if (bp->b_flags & B_ERROR) { bp1->b_error = bp->b_error; bp1->b_flags |= B_ERROR; } biodone(bp1); } while ((bp1 = bp->b_forw) != bp->b_forw->av_forw); biodone(bp1); kmem_free(bp, sizeof (*bp)); return (0); } static struct buf * qmerge_nextbp(struct que_data *qfp, struct buf *bp_merge, int *can_merge) { intptr_t private, cnt; int flags; struct buf *sync_bp, *async_bp, *bp; struct buf **sync_bpp, **async_bpp, **bpp; struct diskhd *dp = &qfp->q_tab; if (qfp->q_cnt == 0) { return (NULL); } flags = qfp->q_tab.hd_flags; sync_bpp = &qfp->q_tab.hd_sync_next; async_bpp = &qfp->q_tab.hd_async_next; begin_nextbp: if (flags & QNEAR_ASYNCONLY) { bp = *async_bpp; private = DBLK(bp); if (bp_merge && !qmerge_can_merge(bp, bp_merge)) { return (NULL); } else if (bp->av_forw == bp) { bp->av_forw = bp->av_back = NULL; flags &= ~(QNEAR_ASYNCONLY | QNEAR_BACKWARD); private = 0; } else if (flags & QNEAR_BACKWARD) { if (DBLK(bp) < DBLK(bp->av_back)) { flags &= ~QNEAR_BACKWARD; private = 0; } } else if (DBLK(bp) > DBLK(bp->av_forw)) { if (qmerge2wayscan) { flags |= QNEAR_BACKWARD; } else { private = 0; } } else if (qmerge2wayscan == 0) { private = DBLK(bp->av_forw); } bpp = async_bpp; } else if (flags & QNEAR_ASYNCALSO) { sync_bp = *sync_bpp; async_bp = *async_bpp; if (flags & QNEAR_BACKWARD) { if (BP_GT_HD(sync_bp, dp) && BP_GT_HD(async_bp, dp)) { flags &= ~(QNEAR_BACKWARD|QNEAR_ASYNCALSO); *sync_bpp = sync_bp->av_forw; *async_bpp = async_bp->av_forw; SYNC2ASYNC(qfp) = (void *)qmerge_sync2async; qfp->q_tab.hd_private = 0; goto begin_nextbp; } if (BP_LT_HD(async_bp, dp) && BP_LT_HD(sync_bp, dp)) { if (BP_GT_BP(async_bp, sync_bp)) { bpp = async_bpp; bp = *async_bpp; } else { bpp = sync_bpp; bp = *sync_bpp; } } else if (BP_LT_HD(async_bp, dp)) { bpp = async_bpp; bp = *async_bpp; } else { bpp = sync_bpp; bp = *sync_bpp; } } else { if (BP_LT_HD(sync_bp, dp) && BP_LT_HD(async_bp, dp)) { if (qmerge2wayscan) { flags |= QNEAR_BACKWARD; *sync_bpp = sync_bp->av_back; *async_bpp = async_bp->av_back; goto begin_nextbp; } else { flags &= ~QNEAR_ASYNCALSO; SYNC2ASYNC(qfp) = (void *)qmerge_sync2async; qfp->q_tab.hd_private = 0; goto begin_nextbp; } } if (BP_GT_HD(async_bp, dp) && BP_GT_HD(sync_bp, dp)) { if (BP_LT_BP(async_bp, sync_bp)) { bpp = async_bpp; bp = *async_bpp; } else { bpp = sync_bpp; bp = *sync_bpp; } } else if (BP_GT_HD(async_bp, dp)) { bpp = async_bpp; bp = *async_bpp; } else { bpp = sync_bpp; bp = *sync_bpp; } } if (bp_merge && !qmerge_can_merge(bp, bp_merge)) { return (NULL); } else if (bp->av_forw == bp) { bp->av_forw = bp->av_back = NULL; flags &= ~QNEAR_ASYNCALSO; if (bpp == async_bpp) { SYNC2ASYNC(qfp) = (void *)qmerge_sync2async; } else { flags |= QNEAR_ASYNCONLY; } } private = DBLK(bp); } else { bp = *sync_bpp; private = DBLK(bp); if (bp_merge && !qmerge_can_merge(bp, bp_merge)) { return (NULL); } else if (bp->av_forw == bp) { private = 0; SYNC2ASYNC(qfp) = (void *)qmerge_sync2async; bp->av_forw = bp->av_back = NULL; flags &= ~QNEAR_BACKWARD; if (*async_bpp) flags |= QNEAR_ASYNCONLY; } else if (flags & QNEAR_BACKWARD) { if (DBLK(bp) < DBLK(bp->av_back)) { flags &= ~QNEAR_BACKWARD; cnt = (intptr_t)SYNC2ASYNC(qfp); if (cnt > 0) { cnt--; SYNC2ASYNC(qfp) = (void *)cnt; } else { if (*async_bpp) flags |= QNEAR_ASYNCALSO; SYNC2ASYNC(qfp) = (void *)qmerge_sync2async; } private = 0; } } else if (DBLK(bp) > DBLK(bp->av_forw)) { private = 0; if (qmerge2wayscan) { flags |= QNEAR_BACKWARD; private = DBLK(bp); } else { cnt = (intptr_t)SYNC2ASYNC(qfp); if (cnt > 0) { cnt--; SYNC2ASYNC(qfp) = (void *)cnt; } else { if (*async_bpp) flags |= QNEAR_ASYNCALSO; SYNC2ASYNC(qfp) = (void *)qmerge_sync2async; } } } else if (qmerge2wayscan == 0) { private = DBLK(bp->av_forw); } bpp = sync_bpp; } if (bp->av_forw) { *can_merge = !(bp->b_flags & B_READ); if (flags & QNEAR_BACKWARD) { *bpp = bp->av_back; if ((DBLK(bp->av_back) + bp->av_back->b_bcount / DEV_BSIZE) != DBLK(bp)) *can_merge = 0; } else { *bpp = bp->av_forw; if ((DBLK(bp) + bp->b_bcount / DEV_BSIZE) != DBLK(bp->av_forw)) *can_merge = 0; } bp->av_forw->av_back = bp->av_back; bp->av_back->av_forw = bp->av_forw; bp->av_forw = bp->av_back = NULL; } else { *bpp = NULL; *can_merge = 0; } qfp->q_tab.hd_private = (void *)private; qfp->q_cnt--; qfp->q_tab.hd_flags = flags; if (bp->b_error) { bp->av_back = (void *)(intptr_t)bp->b_error; bp->b_error = 0; } return (bp); } static struct buf * qmerge_del(struct que_data *qfp) { struct buf *bp, *next_bp, *bp_merge; int alloc_mergebp, merge; if (qfp->q_cnt == 0) { return (NULL); } bp_merge = bp = qmerge_nextbp(qfp, NULL, &merge); alloc_mergebp = 1; while (merge && (next_bp = qmerge_nextbp(qfp, bp_merge, &merge))) { if (alloc_mergebp) { bp_merge = kmem_alloc(sizeof (*bp_merge), KM_NOSLEEP); if (bp_merge == NULL) { mutex_exit(&qfp->q_mutex); return (bp); } bcopy(bp, bp_merge, sizeof (*bp_merge)); bp_merge->b_iodone = qmerge_iodone; bp_merge->b_forw = bp; bp_merge->b_back = (struct buf *)qfp; bp->av_forw = bp->av_back = bp; alloc_mergebp = 0; } qmerge_mergesetup(bp_merge, next_bp); } return (bp_merge); } /* * FIFO Queue functions */ /* * Local Function Prototypes */ static int qfifo_add(); struct que_objops qfifo_ops = { que_init, que_free, qfifo_add, que_del, 0, 0 }; /* * Local static data */ struct que_obj * qfifo_create() { return (que_create((struct que_objops *)&qfifo_ops)); } static int qfifo_add(struct que_data *qfp, struct buf *bp) { if (!qfp->q_tab.b_actf) qfp->q_tab.b_actf = bp; else qfp->q_tab.b_actl->av_forw = bp; qfp->q_tab.b_actl = bp; bp->av_forw = NULL; return (0); } /* * One-Way-Scan Queue functions */ /* * Local Function Prototypes */ static int qsort_add(); static struct buf *qsort_del(); static void oneway_scan_binary(struct diskhd *dp, struct buf *bp); struct que_objops qsort_ops = { que_init, que_free, qsort_add, qsort_del, 0, 0 }; /* * Local static data */ struct que_obj * qsort_create() { return (que_create((struct que_objops *)&qsort_ops)); } static int qsort_add(struct que_data *qfp, struct buf *bp) { qfp->q_cnt++; oneway_scan_binary(&qfp->q_tab, bp); return (0); } #define b_pasf b_forw #define b_pasl b_back static void oneway_scan_binary(struct diskhd *dp, struct buf *bp) { struct buf *ap; ap = dp->b_actf; if (ap == NULL) { dp->b_actf = bp; bp->av_forw = NULL; return; } if (DBLK(bp) < DBLK(ap)) { ap = dp->b_pasf; if ((ap == NULL) || (DBLK(bp) < DBLK(ap))) { dp->b_pasf = bp; bp->av_forw = ap; return; } } while (ap->av_forw) { if (DBLK(bp) < DBLK(ap->av_forw)) break; ap = ap->av_forw; } bp->av_forw = ap->av_forw; ap->av_forw = bp; } static struct buf * qsort_del(struct que_data *qfp) { struct buf *bp; if (qfp->q_cnt == 0) { return (NULL); } qfp->q_cnt--; bp = qfp->q_tab.b_actf; qfp->q_tab.b_actf = bp->av_forw; bp->av_forw = 0; if (!qfp->q_tab.b_actf && qfp->q_tab.b_pasf) { qfp->q_tab.b_actf = qfp->q_tab.b_pasf; qfp->q_tab.b_pasf = NULL; } return (bp); } /* * Tagged queueing */ /* * Local Function Prototypes */ struct que_objops qtag_ops = { que_init, que_free, qsort_add, qsort_del, 0, 0 }; /* * Local static data */ struct que_obj * qtag_create() { return (que_create((struct que_objops *)&qtag_ops)); }