xref: /freebsd/sys/kern/kern_alq.c (revision 5050aa86cff105784877fb886a7b1d25bca5813b)
19454b2d8SWarner Losh /*-
29405072aSJeff Roberson  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3d28f42f9SLawrence Stewart  * Copyright (c) 2008-2009, Lawrence Stewart <lstewart@freebsd.org>
4d28f42f9SLawrence Stewart  * Copyright (c) 2009-2010, The FreeBSD Foundation
59405072aSJeff Roberson  * All rights reserved.
69405072aSJeff Roberson  *
7d28f42f9SLawrence Stewart  * Portions of this software were developed at the Centre for Advanced
8d28f42f9SLawrence Stewart  * Internet Architectures, Swinburne University of Technology, Melbourne,
9d28f42f9SLawrence Stewart  * Australia by Lawrence Stewart under sponsorship from the FreeBSD Foundation.
10d28f42f9SLawrence Stewart  *
119405072aSJeff Roberson  * Redistribution and use in source and binary forms, with or without
129405072aSJeff Roberson  * modification, are permitted provided that the following conditions
139405072aSJeff Roberson  * are met:
149405072aSJeff Roberson  * 1. Redistributions of source code must retain the above copyright
159405072aSJeff Roberson  *    notice unmodified, this list of conditions, and the following
169405072aSJeff Roberson  *    disclaimer.
179405072aSJeff Roberson  * 2. Redistributions in binary form must reproduce the above copyright
189405072aSJeff Roberson  *    notice, this list of conditions and the following disclaimer in the
199405072aSJeff Roberson  *    documentation and/or other materials provided with the distribution.
209405072aSJeff Roberson  *
219405072aSJeff Roberson  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
229405072aSJeff Roberson  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
239405072aSJeff Roberson  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
249405072aSJeff Roberson  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
259405072aSJeff Roberson  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
269405072aSJeff Roberson  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
279405072aSJeff Roberson  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
289405072aSJeff Roberson  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
299405072aSJeff Roberson  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
309405072aSJeff Roberson  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
319405072aSJeff Roberson  */
329405072aSJeff Roberson 
33677b542eSDavid E. O'Brien #include <sys/cdefs.h>
34677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
35677b542eSDavid E. O'Brien 
36d28f42f9SLawrence Stewart #include "opt_mac.h"
37d28f42f9SLawrence Stewart 
389405072aSJeff Roberson #include <sys/param.h>
399405072aSJeff Roberson #include <sys/systm.h>
409405072aSJeff Roberson #include <sys/kernel.h>
419405072aSJeff Roberson #include <sys/kthread.h>
429405072aSJeff Roberson #include <sys/lock.h>
4333f19beeSJohn Baldwin #include <sys/mount.h>
449405072aSJeff Roberson #include <sys/mutex.h>
459405072aSJeff Roberson #include <sys/namei.h>
469405072aSJeff Roberson #include <sys/proc.h>
479405072aSJeff Roberson #include <sys/vnode.h>
489405072aSJeff Roberson #include <sys/alq.h>
499405072aSJeff Roberson #include <sys/malloc.h>
509405072aSJeff Roberson #include <sys/unistd.h>
519405072aSJeff Roberson #include <sys/fcntl.h>
529405072aSJeff Roberson #include <sys/eventhandler.h>
539405072aSJeff Roberson 
54aed55708SRobert Watson #include <security/mac/mac_framework.h>
55aed55708SRobert Watson 
569405072aSJeff Roberson /* Async. Logging Queue */
579405072aSJeff Roberson struct alq {
587d11e744SLawrence Stewart 	char	*aq_entbuf;		/* Buffer for stored entries */
599405072aSJeff Roberson 	int	aq_entmax;		/* Max entries */
609405072aSJeff Roberson 	int	aq_entlen;		/* Entry length */
617d11e744SLawrence Stewart 	int	aq_freebytes;		/* Bytes available in buffer */
627d11e744SLawrence Stewart 	int	aq_buflen;		/* Total length of our buffer */
637d11e744SLawrence Stewart 	int	aq_writehead;		/* Location for next write */
647d11e744SLawrence Stewart 	int	aq_writetail;		/* Flush starts at this location */
657d11e744SLawrence Stewart 	int	aq_wrapearly;		/* # bytes left blank at end of buf */
669405072aSJeff Roberson 	int	aq_flags;		/* Queue flags */
677d11e744SLawrence Stewart 	int	aq_waiters;		/* Num threads waiting for resources
687d11e744SLawrence Stewart 					 * NB: Used as a wait channel so must
697d11e744SLawrence Stewart 					 * not be first field in the alq struct
707d11e744SLawrence Stewart 					 */
717d11e744SLawrence Stewart 	struct	ale	aq_getpost;	/* ALE for use by get/post */
729405072aSJeff Roberson 	struct mtx	aq_mtx;		/* Queue lock */
739405072aSJeff Roberson 	struct vnode	*aq_vp;		/* Open vnode handle */
749e9256e2SJeff Roberson 	struct ucred	*aq_cred;	/* Credentials of the opening thread */
759405072aSJeff Roberson 	LIST_ENTRY(alq)	aq_act;		/* List of active queues */
769405072aSJeff Roberson 	LIST_ENTRY(alq)	aq_link;	/* List of all queues */
779405072aSJeff Roberson };
789405072aSJeff Roberson 
799405072aSJeff Roberson #define	AQ_WANTED	0x0001		/* Wakeup sleeper when io is done */
809405072aSJeff Roberson #define	AQ_ACTIVE	0x0002		/* on the active list */
819405072aSJeff Roberson #define	AQ_FLUSHING	0x0004		/* doing IO */
829405072aSJeff Roberson #define	AQ_SHUTDOWN	0x0008		/* Queue no longer valid */
837d11e744SLawrence Stewart #define	AQ_ORDERED	0x0010		/* Queue enforces ordered writes */
847d11e744SLawrence Stewart #define	AQ_LEGACY	0x0020		/* Legacy queue (fixed length writes) */
859405072aSJeff Roberson 
869405072aSJeff Roberson #define	ALQ_LOCK(alq)	mtx_lock_spin(&(alq)->aq_mtx)
879405072aSJeff Roberson #define	ALQ_UNLOCK(alq)	mtx_unlock_spin(&(alq)->aq_mtx)
889405072aSJeff Roberson 
897d11e744SLawrence Stewart #define HAS_PENDING_DATA(alq) ((alq)->aq_freebytes != (alq)->aq_buflen)
907d11e744SLawrence Stewart 
919405072aSJeff Roberson static MALLOC_DEFINE(M_ALD, "ALD", "ALD");
929405072aSJeff Roberson 
939405072aSJeff Roberson /*
949405072aSJeff Roberson  * The ald_mtx protects the ald_queues list and the ald_active list.
959405072aSJeff Roberson  */
969405072aSJeff Roberson static struct mtx ald_mtx;
979405072aSJeff Roberson static LIST_HEAD(, alq) ald_queues;
989405072aSJeff Roberson static LIST_HEAD(, alq) ald_active;
999405072aSJeff Roberson static int ald_shutingdown = 0;
100a414302fSJeff Roberson struct thread *ald_thread;
101a414302fSJeff Roberson static struct proc *ald_proc;
1029405072aSJeff Roberson 
1039405072aSJeff Roberson #define	ALD_LOCK()	mtx_lock(&ald_mtx)
1049405072aSJeff Roberson #define	ALD_UNLOCK()	mtx_unlock(&ald_mtx)
1059405072aSJeff Roberson 
1069405072aSJeff Roberson /* Daemon functions */
1079405072aSJeff Roberson static int ald_add(struct alq *);
1089405072aSJeff Roberson static int ald_rem(struct alq *);
1099405072aSJeff Roberson static void ald_startup(void *);
1109405072aSJeff Roberson static void ald_daemon(void);
1119405072aSJeff Roberson static void ald_shutdown(void *, int);
1129405072aSJeff Roberson static void ald_activate(struct alq *);
1139405072aSJeff Roberson static void ald_deactivate(struct alq *);
1149405072aSJeff Roberson 
1159405072aSJeff Roberson /* Internal queue functions */
1169405072aSJeff Roberson static void alq_shutdown(struct alq *);
117c0ea37a8SLawrence Stewart static void alq_destroy(struct alq *);
1189405072aSJeff Roberson static int alq_doio(struct alq *);
1199405072aSJeff Roberson 
1209405072aSJeff Roberson 
1219405072aSJeff Roberson /*
1229405072aSJeff Roberson  * Add a new queue to the global list.  Fail if we're shutting down.
1239405072aSJeff Roberson  */
1249405072aSJeff Roberson static int
1259405072aSJeff Roberson ald_add(struct alq *alq)
1269405072aSJeff Roberson {
1279405072aSJeff Roberson 	int error;
1289405072aSJeff Roberson 
1299405072aSJeff Roberson 	error = 0;
1309405072aSJeff Roberson 
1319405072aSJeff Roberson 	ALD_LOCK();
1329405072aSJeff Roberson 	if (ald_shutingdown) {
1339405072aSJeff Roberson 		error = EBUSY;
1349405072aSJeff Roberson 		goto done;
1359405072aSJeff Roberson 	}
1369405072aSJeff Roberson 	LIST_INSERT_HEAD(&ald_queues, alq, aq_link);
1379405072aSJeff Roberson done:
1389405072aSJeff Roberson 	ALD_UNLOCK();
1399405072aSJeff Roberson 	return (error);
1409405072aSJeff Roberson }
1419405072aSJeff Roberson 
1429405072aSJeff Roberson /*
1439405072aSJeff Roberson  * Remove a queue from the global list unless we're shutting down.  If so,
1449405072aSJeff Roberson  * the ald will take care of cleaning up it's resources.
1459405072aSJeff Roberson  */
1469405072aSJeff Roberson static int
1479405072aSJeff Roberson ald_rem(struct alq *alq)
1489405072aSJeff Roberson {
1499405072aSJeff Roberson 	int error;
1509405072aSJeff Roberson 
1519405072aSJeff Roberson 	error = 0;
1529405072aSJeff Roberson 
1539405072aSJeff Roberson 	ALD_LOCK();
1549405072aSJeff Roberson 	if (ald_shutingdown) {
1559405072aSJeff Roberson 		error = EBUSY;
1569405072aSJeff Roberson 		goto done;
1579405072aSJeff Roberson 	}
1589405072aSJeff Roberson 	LIST_REMOVE(alq, aq_link);
1599405072aSJeff Roberson done:
1609405072aSJeff Roberson 	ALD_UNLOCK();
1619405072aSJeff Roberson 	return (error);
1629405072aSJeff Roberson }
1639405072aSJeff Roberson 
1649405072aSJeff Roberson /*
1659405072aSJeff Roberson  * Put a queue on the active list.  This will schedule it for writing.
1669405072aSJeff Roberson  */
1679405072aSJeff Roberson static void
1689405072aSJeff Roberson ald_activate(struct alq *alq)
1699405072aSJeff Roberson {
1709405072aSJeff Roberson 	LIST_INSERT_HEAD(&ald_active, alq, aq_act);
1719405072aSJeff Roberson 	wakeup(&ald_active);
1729405072aSJeff Roberson }
1739405072aSJeff Roberson 
1749405072aSJeff Roberson static void
1759405072aSJeff Roberson ald_deactivate(struct alq *alq)
1769405072aSJeff Roberson {
1779405072aSJeff Roberson 	LIST_REMOVE(alq, aq_act);
1789405072aSJeff Roberson 	alq->aq_flags &= ~AQ_ACTIVE;
1799405072aSJeff Roberson }
1809405072aSJeff Roberson 
1819405072aSJeff Roberson static void
1829405072aSJeff Roberson ald_startup(void *unused)
1839405072aSJeff Roberson {
1849405072aSJeff Roberson 	mtx_init(&ald_mtx, "ALDmtx", NULL, MTX_DEF|MTX_QUIET);
1859405072aSJeff Roberson 	LIST_INIT(&ald_queues);
1869405072aSJeff Roberson 	LIST_INIT(&ald_active);
1879405072aSJeff Roberson }
1889405072aSJeff Roberson 
1899405072aSJeff Roberson static void
1909405072aSJeff Roberson ald_daemon(void)
1919405072aSJeff Roberson {
1929405072aSJeff Roberson 	int needwakeup;
1939405072aSJeff Roberson 	struct alq *alq;
1949405072aSJeff Roberson 
195a414302fSJeff Roberson 	ald_thread = FIRST_THREAD_IN_PROC(ald_proc);
196a414302fSJeff Roberson 
1979405072aSJeff Roberson 	EVENTHANDLER_REGISTER(shutdown_pre_sync, ald_shutdown, NULL,
1989405072aSJeff Roberson 	    SHUTDOWN_PRI_FIRST);
1999405072aSJeff Roberson 
2009405072aSJeff Roberson 	ALD_LOCK();
2019405072aSJeff Roberson 
2029405072aSJeff Roberson 	for (;;) {
203d28f42f9SLawrence Stewart 		while ((alq = LIST_FIRST(&ald_active)) == NULL &&
204d28f42f9SLawrence Stewart 		    !ald_shutingdown)
2059ffad7a9SLawrence Stewart 			mtx_sleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0);
2069405072aSJeff Roberson 
207d28f42f9SLawrence Stewart 		/* Don't shutdown until all active ALQs are flushed. */
208d28f42f9SLawrence Stewart 		if (ald_shutingdown && alq == NULL) {
209d28f42f9SLawrence Stewart 			ALD_UNLOCK();
210d28f42f9SLawrence Stewart 			break;
211d28f42f9SLawrence Stewart 		}
212d28f42f9SLawrence Stewart 
2139405072aSJeff Roberson 		ALQ_LOCK(alq);
2149405072aSJeff Roberson 		ald_deactivate(alq);
2159405072aSJeff Roberson 		ALD_UNLOCK();
2169405072aSJeff Roberson 		needwakeup = alq_doio(alq);
2179405072aSJeff Roberson 		ALQ_UNLOCK(alq);
2189405072aSJeff Roberson 		if (needwakeup)
2197d11e744SLawrence Stewart 			wakeup_one(alq);
2209405072aSJeff Roberson 		ALD_LOCK();
2219405072aSJeff Roberson 	}
222d28f42f9SLawrence Stewart 
223d28f42f9SLawrence Stewart 	kproc_exit(0);
2249405072aSJeff Roberson }
2259405072aSJeff Roberson 
2269405072aSJeff Roberson static void
2279405072aSJeff Roberson ald_shutdown(void *arg, int howto)
2289405072aSJeff Roberson {
2299405072aSJeff Roberson 	struct alq *alq;
2309405072aSJeff Roberson 
2319405072aSJeff Roberson 	ALD_LOCK();
232d28f42f9SLawrence Stewart 
233d28f42f9SLawrence Stewart 	/* Ensure no new queues can be created. */
2349405072aSJeff Roberson 	ald_shutingdown = 1;
2359405072aSJeff Roberson 
236d28f42f9SLawrence Stewart 	/* Shutdown all ALQs prior to terminating the ald_daemon. */
2379405072aSJeff Roberson 	while ((alq = LIST_FIRST(&ald_queues)) != NULL) {
2389405072aSJeff Roberson 		LIST_REMOVE(alq, aq_link);
2399405072aSJeff Roberson 		ALD_UNLOCK();
2409405072aSJeff Roberson 		alq_shutdown(alq);
2419405072aSJeff Roberson 		ALD_LOCK();
2429405072aSJeff Roberson 	}
243d28f42f9SLawrence Stewart 
244d28f42f9SLawrence Stewart 	/* At this point, all ALQs are flushed and shutdown. */
245d28f42f9SLawrence Stewart 
246d28f42f9SLawrence Stewart 	/*
247d28f42f9SLawrence Stewart 	 * Wake ald_daemon so that it exits. It won't be able to do
2489ffad7a9SLawrence Stewart 	 * anything until we mtx_sleep because we hold the ald_mtx.
249d28f42f9SLawrence Stewart 	 */
250d28f42f9SLawrence Stewart 	wakeup(&ald_active);
251d28f42f9SLawrence Stewart 
252d28f42f9SLawrence Stewart 	/* Wait for ald_daemon to exit. */
2539ffad7a9SLawrence Stewart 	mtx_sleep(ald_proc, &ald_mtx, PWAIT, "aldslp", 0);
254d28f42f9SLawrence Stewart 
2559405072aSJeff Roberson 	ALD_UNLOCK();
2569405072aSJeff Roberson }
2579405072aSJeff Roberson 
2589405072aSJeff Roberson static void
2599405072aSJeff Roberson alq_shutdown(struct alq *alq)
2609405072aSJeff Roberson {
2619405072aSJeff Roberson 	ALQ_LOCK(alq);
2629405072aSJeff Roberson 
2639405072aSJeff Roberson 	/* Stop any new writers. */
2649405072aSJeff Roberson 	alq->aq_flags |= AQ_SHUTDOWN;
2659405072aSJeff Roberson 
2667d11e744SLawrence Stewart 	/*
2677d11e744SLawrence Stewart 	 * If the ALQ isn't active but has unwritten data (possible if
2687d11e744SLawrence Stewart 	 * the ALQ_NOACTIVATE flag has been used), explicitly activate the
2697d11e744SLawrence Stewart 	 * ALQ here so that the pending data gets flushed by the ald_daemon.
2707d11e744SLawrence Stewart 	 */
2717d11e744SLawrence Stewart 	if (!(alq->aq_flags & AQ_ACTIVE) && HAS_PENDING_DATA(alq)) {
2727d11e744SLawrence Stewart 		alq->aq_flags |= AQ_ACTIVE;
2737d11e744SLawrence Stewart 		ALQ_UNLOCK(alq);
2747d11e744SLawrence Stewart 		ALD_LOCK();
2757d11e744SLawrence Stewart 		ald_activate(alq);
2767d11e744SLawrence Stewart 		ALD_UNLOCK();
2777d11e744SLawrence Stewart 		ALQ_LOCK(alq);
2787d11e744SLawrence Stewart 	}
2797d11e744SLawrence Stewart 
2809405072aSJeff Roberson 	/* Drain IO */
28197c11ef2SLawrence Stewart 	while (alq->aq_flags & AQ_ACTIVE) {
2829405072aSJeff Roberson 		alq->aq_flags |= AQ_WANTED;
283bff2d4d5SRoman Divacky 		msleep_spin(alq, &alq->aq_mtx, "aldclose", 0);
2849405072aSJeff Roberson 	}
2859405072aSJeff Roberson 	ALQ_UNLOCK(alq);
2869405072aSJeff Roberson 
287a414302fSJeff Roberson 	vn_close(alq->aq_vp, FWRITE, alq->aq_cred,
2889e9256e2SJeff Roberson 	    curthread);
2899e9256e2SJeff Roberson 	crfree(alq->aq_cred);
2909405072aSJeff Roberson }
2919405072aSJeff Roberson 
292c0ea37a8SLawrence Stewart void
293c0ea37a8SLawrence Stewart alq_destroy(struct alq *alq)
294c0ea37a8SLawrence Stewart {
295c0ea37a8SLawrence Stewart 	/* Drain all pending IO. */
296c0ea37a8SLawrence Stewart 	alq_shutdown(alq);
297c0ea37a8SLawrence Stewart 
298c0ea37a8SLawrence Stewart 	mtx_destroy(&alq->aq_mtx);
299c0ea37a8SLawrence Stewart 	free(alq->aq_entbuf, M_ALD);
300c0ea37a8SLawrence Stewart 	free(alq, M_ALD);
301c0ea37a8SLawrence Stewart }
302c0ea37a8SLawrence Stewart 
3039405072aSJeff Roberson /*
3049405072aSJeff Roberson  * Flush all pending data to disk.  This operation will block.
3059405072aSJeff Roberson  */
3069405072aSJeff Roberson static int
3079405072aSJeff Roberson alq_doio(struct alq *alq)
3089405072aSJeff Roberson {
3099405072aSJeff Roberson 	struct thread *td;
3109405072aSJeff Roberson 	struct mount *mp;
3119405072aSJeff Roberson 	struct vnode *vp;
3129405072aSJeff Roberson 	struct uio auio;
3139405072aSJeff Roberson 	struct iovec aiov[2];
3149405072aSJeff Roberson 	int totlen;
3159405072aSJeff Roberson 	int iov;
3167d11e744SLawrence Stewart 	int wrapearly;
3177d11e744SLawrence Stewart 
3187d11e744SLawrence Stewart 	KASSERT((HAS_PENDING_DATA(alq)), ("%s: queue empty!", __func__));
3199405072aSJeff Roberson 
3209405072aSJeff Roberson 	vp = alq->aq_vp;
3219405072aSJeff Roberson 	td = curthread;
3229405072aSJeff Roberson 	totlen = 0;
3237d11e744SLawrence Stewart 	iov = 1;
3247d11e744SLawrence Stewart 	wrapearly = alq->aq_wrapearly;
3259405072aSJeff Roberson 
3269405072aSJeff Roberson 	bzero(&aiov, sizeof(aiov));
3279405072aSJeff Roberson 	bzero(&auio, sizeof(auio));
3289405072aSJeff Roberson 
3297d11e744SLawrence Stewart 	/* Start the write from the location of our buffer tail pointer. */
3307d11e744SLawrence Stewart 	aiov[0].iov_base = alq->aq_entbuf + alq->aq_writetail;
3317d11e744SLawrence Stewart 
3327d11e744SLawrence Stewart 	if (alq->aq_writetail < alq->aq_writehead) {
3337d11e744SLawrence Stewart 		/* Buffer not wrapped. */
3347d11e744SLawrence Stewart 		totlen = aiov[0].iov_len = alq->aq_writehead - alq->aq_writetail;
3357d11e744SLawrence Stewart 	} else if (alq->aq_writehead == 0) {
3367d11e744SLawrence Stewart 		/* Buffer not wrapped (special case to avoid an empty iov). */
3377d11e744SLawrence Stewart 		totlen = aiov[0].iov_len = alq->aq_buflen - alq->aq_writetail -
3387d11e744SLawrence Stewart 		    wrapearly;
3397d11e744SLawrence Stewart 	} else {
3407d11e744SLawrence Stewart 		/*
3417d11e744SLawrence Stewart 		 * Buffer wrapped, requires 2 aiov entries:
3427d11e744SLawrence Stewart 		 * - first is from writetail to end of buffer
3437d11e744SLawrence Stewart 		 * - second is from start of buffer to writehead
3447d11e744SLawrence Stewart 		 */
3457d11e744SLawrence Stewart 		aiov[0].iov_len = alq->aq_buflen - alq->aq_writetail -
3467d11e744SLawrence Stewart 		    wrapearly;
3479405072aSJeff Roberson 		iov++;
3487d11e744SLawrence Stewart 		aiov[1].iov_base = alq->aq_entbuf;
3497d11e744SLawrence Stewart 		aiov[1].iov_len =  alq->aq_writehead;
3507d11e744SLawrence Stewart 		totlen = aiov[0].iov_len + aiov[1].iov_len;
3517d11e744SLawrence Stewart 	}
3529405072aSJeff Roberson 
3539405072aSJeff Roberson 	alq->aq_flags |= AQ_FLUSHING;
3549405072aSJeff Roberson 	ALQ_UNLOCK(alq);
3559405072aSJeff Roberson 
3569405072aSJeff Roberson 	auio.uio_iov = &aiov[0];
3579405072aSJeff Roberson 	auio.uio_offset = 0;
3589405072aSJeff Roberson 	auio.uio_segflg = UIO_SYSSPACE;
3599405072aSJeff Roberson 	auio.uio_rw = UIO_WRITE;
3607d11e744SLawrence Stewart 	auio.uio_iovcnt = iov;
3619405072aSJeff Roberson 	auio.uio_resid = totlen;
3629405072aSJeff Roberson 	auio.uio_td = td;
3639405072aSJeff Roberson 
3649405072aSJeff Roberson 	/*
3659405072aSJeff Roberson 	 * Do all of the junk required to write now.
3669405072aSJeff Roberson 	 */
3679405072aSJeff Roberson 	vn_start_write(vp, &mp, V_WAIT);
368cb05b60aSAttilio Rao 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
36967536f03SRobert Watson 	/*
37067536f03SRobert Watson 	 * XXX: VOP_WRITE error checks are ignored.
37167536f03SRobert Watson 	 */
37267536f03SRobert Watson #ifdef MAC
37330d239bcSRobert Watson 	if (mac_vnode_check_write(alq->aq_cred, NOCRED, vp) == 0)
37467536f03SRobert Watson #endif
3759e9256e2SJeff Roberson 		VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, alq->aq_cred);
37622db15c0SAttilio Rao 	VOP_UNLOCK(vp, 0);
3779405072aSJeff Roberson 	vn_finished_write(mp);
3789405072aSJeff Roberson 
3799405072aSJeff Roberson 	ALQ_LOCK(alq);
3809405072aSJeff Roberson 	alq->aq_flags &= ~AQ_FLUSHING;
3819405072aSJeff Roberson 
3827d11e744SLawrence Stewart 	/* Adjust writetail as required, taking into account wrapping. */
3837d11e744SLawrence Stewart 	alq->aq_writetail = (alq->aq_writetail + totlen + wrapearly) %
3847d11e744SLawrence Stewart 	    alq->aq_buflen;
3857d11e744SLawrence Stewart 	alq->aq_freebytes += totlen + wrapearly;
3867d11e744SLawrence Stewart 
3877d11e744SLawrence Stewart 	/*
3887d11e744SLawrence Stewart 	 * If we just flushed part of the buffer which wrapped, reset the
3897d11e744SLawrence Stewart 	 * wrapearly indicator.
3907d11e744SLawrence Stewart 	 */
3917d11e744SLawrence Stewart 	if (wrapearly)
3927d11e744SLawrence Stewart 		alq->aq_wrapearly = 0;
3937d11e744SLawrence Stewart 
3947d11e744SLawrence Stewart 	/*
3957d11e744SLawrence Stewart 	 * If we just flushed the buffer completely, reset indexes to 0 to
3967d11e744SLawrence Stewart 	 * minimise buffer wraps.
3977d11e744SLawrence Stewart 	 * This is also required to ensure alq_getn() can't wedge itself.
3987d11e744SLawrence Stewart 	 */
3997d11e744SLawrence Stewart 	if (!HAS_PENDING_DATA(alq))
4007d11e744SLawrence Stewart 		alq->aq_writehead = alq->aq_writetail = 0;
4017d11e744SLawrence Stewart 
4027d11e744SLawrence Stewart 	KASSERT((alq->aq_writetail >= 0 && alq->aq_writetail < alq->aq_buflen),
4037d11e744SLawrence Stewart 	    ("%s: aq_writetail < 0 || aq_writetail >= aq_buflen", __func__));
4049405072aSJeff Roberson 
4059405072aSJeff Roberson 	if (alq->aq_flags & AQ_WANTED) {
4069405072aSJeff Roberson 		alq->aq_flags &= ~AQ_WANTED;
4079405072aSJeff Roberson 		return (1);
4089405072aSJeff Roberson 	}
4099405072aSJeff Roberson 
4109405072aSJeff Roberson 	return(0);
4119405072aSJeff Roberson }
4129405072aSJeff Roberson 
4139405072aSJeff Roberson static struct kproc_desc ald_kp = {
4149405072aSJeff Roberson         "ALQ Daemon",
4159405072aSJeff Roberson         ald_daemon,
416a414302fSJeff Roberson         &ald_proc
4179405072aSJeff Roberson };
4189405072aSJeff Roberson 
419237fdd78SRobert Watson SYSINIT(aldthread, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &ald_kp);
420237fdd78SRobert Watson SYSINIT(ald, SI_SUB_LOCK, SI_ORDER_ANY, ald_startup, NULL);
4219405072aSJeff Roberson 
4229405072aSJeff Roberson 
4239405072aSJeff Roberson /* User visible queue functions */
4249405072aSJeff Roberson 
4259405072aSJeff Roberson /*
4269405072aSJeff Roberson  * Create the queue data structure, allocate the buffer, and open the file.
4279405072aSJeff Roberson  */
4287d11e744SLawrence Stewart 
4299405072aSJeff Roberson int
4307d11e744SLawrence Stewart alq_open_flags(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
4317d11e744SLawrence Stewart     int size, int flags)
4329405072aSJeff Roberson {
4339405072aSJeff Roberson 	struct thread *td;
4349405072aSJeff Roberson 	struct nameidata nd;
4359405072aSJeff Roberson 	struct alq *alq;
4367d11e744SLawrence Stewart 	int oflags;
4379405072aSJeff Roberson 	int error;
4387d11e744SLawrence Stewart 
4397d11e744SLawrence Stewart 	KASSERT((size > 0), ("%s: size <= 0", __func__));
4409405072aSJeff Roberson 
4419405072aSJeff Roberson 	*alqp = NULL;
4429405072aSJeff Roberson 	td = curthread;
4439405072aSJeff Roberson 
444*5050aa86SKonstantin Belousov 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, file, td);
4457d11e744SLawrence Stewart 	oflags = FWRITE | O_NOFOLLOW | O_CREAT;
4469405072aSJeff Roberson 
4477d11e744SLawrence Stewart 	error = vn_open_cred(&nd, &oflags, cmode, 0, cred, NULL);
4489405072aSJeff Roberson 	if (error)
4499405072aSJeff Roberson 		return (error);
4509405072aSJeff Roberson 
451f220f7afSPawel Jakub Dawidek 	NDFREE(&nd, NDF_ONLY_PNBUF);
4529405072aSJeff Roberson 	/* We just unlock so we hold a reference */
45322db15c0SAttilio Rao 	VOP_UNLOCK(nd.ni_vp, 0);
4549405072aSJeff Roberson 
455a163d034SWarner Losh 	alq = malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO);
4569405072aSJeff Roberson 	alq->aq_vp = nd.ni_vp;
4574b090e41SRobert Watson 	alq->aq_cred = crhold(cred);
4589405072aSJeff Roberson 
4599405072aSJeff Roberson 	mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET);
4609405072aSJeff Roberson 
4617d11e744SLawrence Stewart 	alq->aq_buflen = size;
4627d11e744SLawrence Stewart 	alq->aq_entmax = 0;
4637d11e744SLawrence Stewart 	alq->aq_entlen = 0;
4649405072aSJeff Roberson 
4657d11e744SLawrence Stewart 	alq->aq_freebytes = alq->aq_buflen;
4667d11e744SLawrence Stewart 	alq->aq_entbuf = malloc(alq->aq_buflen, M_ALD, M_WAITOK|M_ZERO);
4677d11e744SLawrence Stewart 	alq->aq_writehead = alq->aq_writetail = 0;
4687d11e744SLawrence Stewart 	if (flags & ALQ_ORDERED)
4697d11e744SLawrence Stewart 		alq->aq_flags |= AQ_ORDERED;
4709405072aSJeff Roberson 
471c0ea37a8SLawrence Stewart 	if ((error = ald_add(alq)) != 0) {
472c0ea37a8SLawrence Stewart 		alq_destroy(alq);
4739405072aSJeff Roberson 		return (error);
474c0ea37a8SLawrence Stewart 	}
475c0ea37a8SLawrence Stewart 
4769405072aSJeff Roberson 	*alqp = alq;
4779405072aSJeff Roberson 
4789405072aSJeff Roberson 	return (0);
4799405072aSJeff Roberson }
4809405072aSJeff Roberson 
4817d11e744SLawrence Stewart int
4827d11e744SLawrence Stewart alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
4837d11e744SLawrence Stewart     int size, int count)
4847d11e744SLawrence Stewart {
4857d11e744SLawrence Stewart 	int ret;
4867d11e744SLawrence Stewart 
4877d11e744SLawrence Stewart 	KASSERT((count >= 0), ("%s: count < 0", __func__));
4887d11e744SLawrence Stewart 
4897d11e744SLawrence Stewart 	if (count > 0) {
4907d11e744SLawrence Stewart 		ret = alq_open_flags(alqp, file, cred, cmode, size*count, 0);
4917d11e744SLawrence Stewart 		(*alqp)->aq_flags |= AQ_LEGACY;
4927d11e744SLawrence Stewart 		(*alqp)->aq_entmax = count;
4937d11e744SLawrence Stewart 		(*alqp)->aq_entlen = size;
4947d11e744SLawrence Stewart 	} else
4957d11e744SLawrence Stewart 		ret = alq_open_flags(alqp, file, cred, cmode, size, 0);
4967d11e744SLawrence Stewart 
4977d11e744SLawrence Stewart 	return (ret);
4987d11e744SLawrence Stewart }
4997d11e744SLawrence Stewart 
5007d11e744SLawrence Stewart 
5019405072aSJeff Roberson /*
5029405072aSJeff Roberson  * Copy a new entry into the queue.  If the operation would block either
5039405072aSJeff Roberson  * wait or return an error depending on the value of waitok.
5049405072aSJeff Roberson  */
5059405072aSJeff Roberson int
5067d11e744SLawrence Stewart alq_writen(struct alq *alq, void *data, int len, int flags)
5079405072aSJeff Roberson {
5087d11e744SLawrence Stewart 	int activate, copy, ret;
5097d11e744SLawrence Stewart 	void *waitchan;
5109405072aSJeff Roberson 
5117d11e744SLawrence Stewart 	KASSERT((len > 0 && len <= alq->aq_buflen),
5127d11e744SLawrence Stewart 	    ("%s: len <= 0 || len > aq_buflen", __func__));
5139405072aSJeff Roberson 
5147d11e744SLawrence Stewart 	activate = ret = 0;
5157d11e744SLawrence Stewart 	copy = len;
5167d11e744SLawrence Stewart 	waitchan = NULL;
5179405072aSJeff Roberson 
5189405072aSJeff Roberson 	ALQ_LOCK(alq);
5199405072aSJeff Roberson 
5207d11e744SLawrence Stewart 	/*
5217d11e744SLawrence Stewart 	 * Fail to perform the write and return EWOULDBLOCK if:
5227d11e744SLawrence Stewart 	 * - The message is larger than our underlying buffer.
5237d11e744SLawrence Stewart 	 * - The ALQ is being shutdown.
5247d11e744SLawrence Stewart 	 * - There is insufficient free space in our underlying buffer
5257d11e744SLawrence Stewart 	 *   to accept the message and the user can't wait for space.
5267d11e744SLawrence Stewart 	 * - There is insufficient free space in our underlying buffer
5277d11e744SLawrence Stewart 	 *   to accept the message and the alq is inactive due to prior
5287d11e744SLawrence Stewart 	 *   use of the ALQ_NOACTIVATE flag (which would lead to deadlock).
5297d11e744SLawrence Stewart 	 */
5307d11e744SLawrence Stewart 	if (len > alq->aq_buflen ||
5317d11e744SLawrence Stewart 	    alq->aq_flags & AQ_SHUTDOWN ||
5327d11e744SLawrence Stewart 	    (((flags & ALQ_NOWAIT) || (!(alq->aq_flags & AQ_ACTIVE) &&
5337d11e744SLawrence Stewart 	    HAS_PENDING_DATA(alq))) && alq->aq_freebytes < len)) {
5349405072aSJeff Roberson 		ALQ_UNLOCK(alq);
5357d11e744SLawrence Stewart 		return (EWOULDBLOCK);
5369405072aSJeff Roberson 	}
5379405072aSJeff Roberson 
5387d11e744SLawrence Stewart 	/*
5397d11e744SLawrence Stewart 	 * If we want ordered writes and there is already at least one thread
5407d11e744SLawrence Stewart 	 * waiting for resources to become available, sleep until we're woken.
5417d11e744SLawrence Stewart 	 */
5427d11e744SLawrence Stewart 	if (alq->aq_flags & AQ_ORDERED && alq->aq_waiters > 0) {
5437d11e744SLawrence Stewart 		KASSERT(!(flags & ALQ_NOWAIT),
5447d11e744SLawrence Stewart 		    ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__));
5457d11e744SLawrence Stewart 		alq->aq_waiters++;
5467d11e744SLawrence Stewart 		msleep_spin(&alq->aq_waiters, &alq->aq_mtx, "alqwnord", 0);
5477d11e744SLawrence Stewart 		alq->aq_waiters--;
5487d11e744SLawrence Stewart 	}
5499405072aSJeff Roberson 
5507d11e744SLawrence Stewart 	/*
5517d11e744SLawrence Stewart 	 * (ALQ_WAITOK && aq_freebytes < len) or aq_freebytes >= len, either
5527d11e744SLawrence Stewart 	 * enter while loop and sleep until we have enough free bytes (former)
5537d11e744SLawrence Stewart 	 * or skip (latter). If AQ_ORDERED is set, only 1 thread at a time will
5547d11e744SLawrence Stewart 	 * be in this loop. Otherwise, multiple threads may be sleeping here
5557d11e744SLawrence Stewart 	 * competing for ALQ resources.
5567d11e744SLawrence Stewart 	 */
5577d11e744SLawrence Stewart 	while (alq->aq_freebytes < len && !(alq->aq_flags & AQ_SHUTDOWN)) {
5587d11e744SLawrence Stewart 		KASSERT(!(flags & ALQ_NOWAIT),
5597d11e744SLawrence Stewart 		    ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__));
5607d11e744SLawrence Stewart 		alq->aq_flags |= AQ_WANTED;
5617d11e744SLawrence Stewart 		alq->aq_waiters++;
5627d11e744SLawrence Stewart 		if (waitchan)
5637d11e744SLawrence Stewart 			wakeup(waitchan);
5647d11e744SLawrence Stewart 		msleep_spin(alq, &alq->aq_mtx, "alqwnres", 0);
5657d11e744SLawrence Stewart 		alq->aq_waiters--;
5669405072aSJeff Roberson 
5677d11e744SLawrence Stewart 		/*
5687d11e744SLawrence Stewart 		 * If we're the first thread to wake after an AQ_WANTED wakeup
5697d11e744SLawrence Stewart 		 * but there isn't enough free space for us, we're going to loop
5707d11e744SLawrence Stewart 		 * and sleep again. If there are other threads waiting in this
5717d11e744SLawrence Stewart 		 * loop, schedule a wakeup so that they can see if the space
5727d11e744SLawrence Stewart 		 * they require is available.
5737d11e744SLawrence Stewart 		 */
5747d11e744SLawrence Stewart 		if (alq->aq_waiters > 0 && !(alq->aq_flags & AQ_ORDERED) &&
5757d11e744SLawrence Stewart 		    alq->aq_freebytes < len && !(alq->aq_flags & AQ_WANTED))
5767d11e744SLawrence Stewart 			waitchan = alq;
5777d11e744SLawrence Stewart 		else
5787d11e744SLawrence Stewart 			waitchan = NULL;
5797d11e744SLawrence Stewart 	}
5809405072aSJeff Roberson 
5817d11e744SLawrence Stewart 	/*
5827d11e744SLawrence Stewart 	 * If there are waiters, we need to signal the waiting threads after we
5837d11e744SLawrence Stewart 	 * complete our work. The alq ptr is used as a wait channel for threads
5847d11e744SLawrence Stewart 	 * requiring resources to be freed up. In the AQ_ORDERED case, threads
5857d11e744SLawrence Stewart 	 * are not allowed to concurrently compete for resources in the above
5867d11e744SLawrence Stewart 	 * while loop, so we use a different wait channel in this case.
5877d11e744SLawrence Stewart 	 */
5887d11e744SLawrence Stewart 	if (alq->aq_waiters > 0) {
5897d11e744SLawrence Stewart 		if (alq->aq_flags & AQ_ORDERED)
5907d11e744SLawrence Stewart 			waitchan = &alq->aq_waiters;
5917d11e744SLawrence Stewart 		else
5927d11e744SLawrence Stewart 			waitchan = alq;
5937d11e744SLawrence Stewart 	} else
5947d11e744SLawrence Stewart 		waitchan = NULL;
5957d11e744SLawrence Stewart 
5967d11e744SLawrence Stewart 	/* Bail if we're shutting down. */
5977d11e744SLawrence Stewart 	if (alq->aq_flags & AQ_SHUTDOWN) {
5987d11e744SLawrence Stewart 		ret = EWOULDBLOCK;
5997d11e744SLawrence Stewart 		goto unlock;
6007d11e744SLawrence Stewart 	}
6017d11e744SLawrence Stewart 
6027d11e744SLawrence Stewart 	/*
6037d11e744SLawrence Stewart 	 * If we need to wrap the buffer to accommodate the write,
6047d11e744SLawrence Stewart 	 * we'll need 2 calls to bcopy.
6057d11e744SLawrence Stewart 	 */
6067d11e744SLawrence Stewart 	if ((alq->aq_buflen - alq->aq_writehead) < len)
6077d11e744SLawrence Stewart 		copy = alq->aq_buflen - alq->aq_writehead;
6087d11e744SLawrence Stewart 
6097d11e744SLawrence Stewart 	/* Copy message (or part thereof if wrap required) to the buffer. */
6107d11e744SLawrence Stewart 	bcopy(data, alq->aq_entbuf + alq->aq_writehead, copy);
6117d11e744SLawrence Stewart 	alq->aq_writehead += copy;
6127d11e744SLawrence Stewart 
6137d11e744SLawrence Stewart 	if (alq->aq_writehead >= alq->aq_buflen) {
6147d11e744SLawrence Stewart 		KASSERT((alq->aq_writehead == alq->aq_buflen),
6157d11e744SLawrence Stewart 		    ("%s: alq->aq_writehead (%d) > alq->aq_buflen (%d)",
6167d11e744SLawrence Stewart 		    __func__,
6177d11e744SLawrence Stewart 		    alq->aq_writehead,
6187d11e744SLawrence Stewart 		    alq->aq_buflen));
6197d11e744SLawrence Stewart 		alq->aq_writehead = 0;
6207d11e744SLawrence Stewart 	}
6217d11e744SLawrence Stewart 
6227d11e744SLawrence Stewart 	if (copy != len) {
6237d11e744SLawrence Stewart 		/*
6247d11e744SLawrence Stewart 		 * Wrap the buffer by copying the remainder of our message
6257d11e744SLawrence Stewart 		 * to the start of the buffer and resetting aq_writehead.
6267d11e744SLawrence Stewart 		 */
6277d11e744SLawrence Stewart 		bcopy(((uint8_t *)data)+copy, alq->aq_entbuf, len - copy);
6287d11e744SLawrence Stewart 		alq->aq_writehead = len - copy;
6297d11e744SLawrence Stewart 	}
6307d11e744SLawrence Stewart 
6317d11e744SLawrence Stewart 	KASSERT((alq->aq_writehead >= 0 && alq->aq_writehead < alq->aq_buflen),
6327d11e744SLawrence Stewart 	    ("%s: aq_writehead < 0 || aq_writehead >= aq_buflen", __func__));
6337d11e744SLawrence Stewart 
6347d11e744SLawrence Stewart 	alq->aq_freebytes -= len;
6357d11e744SLawrence Stewart 
6367d11e744SLawrence Stewart 	if (!(alq->aq_flags & AQ_ACTIVE) && !(flags & ALQ_NOACTIVATE)) {
6379405072aSJeff Roberson 		alq->aq_flags |= AQ_ACTIVE;
6389405072aSJeff Roberson 		activate = 1;
6397d11e744SLawrence Stewart 	}
6409405072aSJeff Roberson 
6417d11e744SLawrence Stewart 	KASSERT((HAS_PENDING_DATA(alq)), ("%s: queue empty!", __func__));
6427d11e744SLawrence Stewart 
6437d11e744SLawrence Stewart unlock:
6449405072aSJeff Roberson 	ALQ_UNLOCK(alq);
6457d11e744SLawrence Stewart 
6469405072aSJeff Roberson 	if (activate) {
6479405072aSJeff Roberson 		ALD_LOCK();
6489405072aSJeff Roberson 		ald_activate(alq);
6499405072aSJeff Roberson 		ALD_UNLOCK();
6509405072aSJeff Roberson 	}
6517d11e744SLawrence Stewart 
6527d11e744SLawrence Stewart 	/* NB: We rely on wakeup_one waking threads in a FIFO manner. */
6537d11e744SLawrence Stewart 	if (waitchan != NULL)
6547d11e744SLawrence Stewart 		wakeup_one(waitchan);
6557d11e744SLawrence Stewart 
6567d11e744SLawrence Stewart 	return (ret);
6577d11e744SLawrence Stewart }
6587d11e744SLawrence Stewart 
6597d11e744SLawrence Stewart int
6607d11e744SLawrence Stewart alq_write(struct alq *alq, void *data, int flags)
6617d11e744SLawrence Stewart {
6627d11e744SLawrence Stewart 	/* Should only be called in fixed length message (legacy) mode. */
6637d11e744SLawrence Stewart 	KASSERT((alq->aq_flags & AQ_LEGACY),
6647d11e744SLawrence Stewart 	    ("%s: fixed length write on variable length queue", __func__));
6657d11e744SLawrence Stewart 	return (alq_writen(alq, data, alq->aq_entlen, flags));
6667d11e744SLawrence Stewart }
6677d11e744SLawrence Stewart 
6687d11e744SLawrence Stewart /*
6697d11e744SLawrence Stewart  * Retrieve a pointer for the ALQ to write directly into, avoiding bcopy.
6707d11e744SLawrence Stewart  */
6717d11e744SLawrence Stewart struct ale *
6727d11e744SLawrence Stewart alq_getn(struct alq *alq, int len, int flags)
6737d11e744SLawrence Stewart {
6747d11e744SLawrence Stewart 	int contigbytes;
6757d11e744SLawrence Stewart 	void *waitchan;
6767d11e744SLawrence Stewart 
6777d11e744SLawrence Stewart 	KASSERT((len > 0 && len <= alq->aq_buflen),
6787d11e744SLawrence Stewart 	    ("%s: len <= 0 || len > alq->aq_buflen", __func__));
6797d11e744SLawrence Stewart 
6807d11e744SLawrence Stewart 	waitchan = NULL;
6817d11e744SLawrence Stewart 
6827d11e744SLawrence Stewart 	ALQ_LOCK(alq);
6837d11e744SLawrence Stewart 
6847d11e744SLawrence Stewart 	/*
6857d11e744SLawrence Stewart 	 * Determine the number of free contiguous bytes.
6867d11e744SLawrence Stewart 	 * We ensure elsewhere that if aq_writehead == aq_writetail because
6877d11e744SLawrence Stewart 	 * the buffer is empty, they will both be set to 0 and therefore
6887d11e744SLawrence Stewart 	 * aq_freebytes == aq_buflen and is fully contiguous.
6897d11e744SLawrence Stewart 	 * If they are equal and the buffer is not empty, aq_freebytes will
6907d11e744SLawrence Stewart 	 * be 0 indicating the buffer is full.
6917d11e744SLawrence Stewart 	 */
6927d11e744SLawrence Stewart 	if (alq->aq_writehead <= alq->aq_writetail)
6937d11e744SLawrence Stewart 		contigbytes = alq->aq_freebytes;
6947d11e744SLawrence Stewart 	else {
6957d11e744SLawrence Stewart 		contigbytes = alq->aq_buflen - alq->aq_writehead;
6967d11e744SLawrence Stewart 
6977d11e744SLawrence Stewart 		if (contigbytes < len) {
6987d11e744SLawrence Stewart 			/*
6997d11e744SLawrence Stewart 			 * Insufficient space at end of buffer to handle a
7007d11e744SLawrence Stewart 			 * contiguous write. Wrap early if there's space at
7017d11e744SLawrence Stewart 			 * the beginning. This will leave a hole at the end
7027d11e744SLawrence Stewart 			 * of the buffer which we will have to skip over when
7037d11e744SLawrence Stewart 			 * flushing the buffer to disk.
7047d11e744SLawrence Stewart 			 */
7057d11e744SLawrence Stewart 			if (alq->aq_writetail >= len || flags & ALQ_WAITOK) {
7067d11e744SLawrence Stewart 				/* Keep track of # bytes left blank. */
7077d11e744SLawrence Stewart 				alq->aq_wrapearly = contigbytes;
7087d11e744SLawrence Stewart 				/* Do the wrap and adjust counters. */
7097d11e744SLawrence Stewart 				contigbytes = alq->aq_freebytes =
7107d11e744SLawrence Stewart 				    alq->aq_writetail;
7117d11e744SLawrence Stewart 				alq->aq_writehead = 0;
7127d11e744SLawrence Stewart 			}
7137d11e744SLawrence Stewart 		}
7147d11e744SLawrence Stewart 	}
7157d11e744SLawrence Stewart 
7167d11e744SLawrence Stewart 	/*
7177d11e744SLawrence Stewart 	 * Return a NULL ALE if:
7187d11e744SLawrence Stewart 	 * - The message is larger than our underlying buffer.
7197d11e744SLawrence Stewart 	 * - The ALQ is being shutdown.
7207d11e744SLawrence Stewart 	 * - There is insufficient free space in our underlying buffer
7217d11e744SLawrence Stewart 	 *   to accept the message and the user can't wait for space.
7227d11e744SLawrence Stewart 	 * - There is insufficient free space in our underlying buffer
7237d11e744SLawrence Stewart 	 *   to accept the message and the alq is inactive due to prior
7247d11e744SLawrence Stewart 	 *   use of the ALQ_NOACTIVATE flag (which would lead to deadlock).
7257d11e744SLawrence Stewart 	 */
7267d11e744SLawrence Stewart 	if (len > alq->aq_buflen ||
7277d11e744SLawrence Stewart 	    alq->aq_flags & AQ_SHUTDOWN ||
7287d11e744SLawrence Stewart 	    (((flags & ALQ_NOWAIT) || (!(alq->aq_flags & AQ_ACTIVE) &&
7297d11e744SLawrence Stewart 	    HAS_PENDING_DATA(alq))) && contigbytes < len)) {
7307d11e744SLawrence Stewart 		ALQ_UNLOCK(alq);
7317d11e744SLawrence Stewart 		return (NULL);
7327d11e744SLawrence Stewart 	}
7337d11e744SLawrence Stewart 
7347d11e744SLawrence Stewart 	/*
7357d11e744SLawrence Stewart 	 * If we want ordered writes and there is already at least one thread
7367d11e744SLawrence Stewart 	 * waiting for resources to become available, sleep until we're woken.
7377d11e744SLawrence Stewart 	 */
7387d11e744SLawrence Stewart 	if (alq->aq_flags & AQ_ORDERED && alq->aq_waiters > 0) {
7397d11e744SLawrence Stewart 		KASSERT(!(flags & ALQ_NOWAIT),
7407d11e744SLawrence Stewart 		    ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__));
7417d11e744SLawrence Stewart 		alq->aq_waiters++;
7427d11e744SLawrence Stewart 		msleep_spin(&alq->aq_waiters, &alq->aq_mtx, "alqgnord", 0);
7437d11e744SLawrence Stewart 		alq->aq_waiters--;
7447d11e744SLawrence Stewart 	}
7457d11e744SLawrence Stewart 
7467d11e744SLawrence Stewart 	/*
7477d11e744SLawrence Stewart 	 * (ALQ_WAITOK && contigbytes < len) or contigbytes >= len, either enter
7487d11e744SLawrence Stewart 	 * while loop and sleep until we have enough contiguous free bytes
7497d11e744SLawrence Stewart 	 * (former) or skip (latter). If AQ_ORDERED is set, only 1 thread at a
7507d11e744SLawrence Stewart 	 * time will be in this loop. Otherwise, multiple threads may be
7517d11e744SLawrence Stewart 	 * sleeping here competing for ALQ resources.
7527d11e744SLawrence Stewart 	 */
7537d11e744SLawrence Stewart 	while (contigbytes < len && !(alq->aq_flags & AQ_SHUTDOWN)) {
7547d11e744SLawrence Stewart 		KASSERT(!(flags & ALQ_NOWAIT),
7557d11e744SLawrence Stewart 		    ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__));
7567d11e744SLawrence Stewart 		alq->aq_flags |= AQ_WANTED;
7577d11e744SLawrence Stewart 		alq->aq_waiters++;
7587d11e744SLawrence Stewart 		if (waitchan)
7597d11e744SLawrence Stewart 			wakeup(waitchan);
7607d11e744SLawrence Stewart 		msleep_spin(alq, &alq->aq_mtx, "alqgnres", 0);
7617d11e744SLawrence Stewart 		alq->aq_waiters--;
7627d11e744SLawrence Stewart 
7637d11e744SLawrence Stewart 		if (alq->aq_writehead <= alq->aq_writetail)
7647d11e744SLawrence Stewart 			contigbytes = alq->aq_freebytes;
7657d11e744SLawrence Stewart 		else
7667d11e744SLawrence Stewart 			contigbytes = alq->aq_buflen - alq->aq_writehead;
7677d11e744SLawrence Stewart 
7687d11e744SLawrence Stewart 		/*
7697d11e744SLawrence Stewart 		 * If we're the first thread to wake after an AQ_WANTED wakeup
7707d11e744SLawrence Stewart 		 * but there isn't enough free space for us, we're going to loop
7717d11e744SLawrence Stewart 		 * and sleep again. If there are other threads waiting in this
7727d11e744SLawrence Stewart 		 * loop, schedule a wakeup so that they can see if the space
7737d11e744SLawrence Stewart 		 * they require is available.
7747d11e744SLawrence Stewart 		 */
7757d11e744SLawrence Stewart 		if (alq->aq_waiters > 0 && !(alq->aq_flags & AQ_ORDERED) &&
7767d11e744SLawrence Stewart 		    contigbytes < len && !(alq->aq_flags & AQ_WANTED))
7777d11e744SLawrence Stewart 			waitchan = alq;
7787d11e744SLawrence Stewart 		else
7797d11e744SLawrence Stewart 			waitchan = NULL;
7807d11e744SLawrence Stewart 	}
7817d11e744SLawrence Stewart 
7827d11e744SLawrence Stewart 	/*
7837d11e744SLawrence Stewart 	 * If there are waiters, we need to signal the waiting threads after we
7847d11e744SLawrence Stewart 	 * complete our work. The alq ptr is used as a wait channel for threads
7857d11e744SLawrence Stewart 	 * requiring resources to be freed up. In the AQ_ORDERED case, threads
7867d11e744SLawrence Stewart 	 * are not allowed to concurrently compete for resources in the above
7877d11e744SLawrence Stewart 	 * while loop, so we use a different wait channel in this case.
7887d11e744SLawrence Stewart 	 */
7897d11e744SLawrence Stewart 	if (alq->aq_waiters > 0) {
7907d11e744SLawrence Stewart 		if (alq->aq_flags & AQ_ORDERED)
7917d11e744SLawrence Stewart 			waitchan = &alq->aq_waiters;
7927d11e744SLawrence Stewart 		else
7937d11e744SLawrence Stewart 			waitchan = alq;
7947d11e744SLawrence Stewart 	} else
7957d11e744SLawrence Stewart 		waitchan = NULL;
7967d11e744SLawrence Stewart 
7977d11e744SLawrence Stewart 	/* Bail if we're shutting down. */
7987d11e744SLawrence Stewart 	if (alq->aq_flags & AQ_SHUTDOWN) {
7997d11e744SLawrence Stewart 		ALQ_UNLOCK(alq);
8007d11e744SLawrence Stewart 		if (waitchan != NULL)
8017d11e744SLawrence Stewart 			wakeup_one(waitchan);
8027d11e744SLawrence Stewart 		return (NULL);
8037d11e744SLawrence Stewart 	}
8047d11e744SLawrence Stewart 
8057d11e744SLawrence Stewart 	/*
8067d11e744SLawrence Stewart 	 * If we are here, we have a contiguous number of bytes >= len
8077d11e744SLawrence Stewart 	 * available in our buffer starting at aq_writehead.
8087d11e744SLawrence Stewart 	 */
8097d11e744SLawrence Stewart 	alq->aq_getpost.ae_data = alq->aq_entbuf + alq->aq_writehead;
8107d11e744SLawrence Stewart 	alq->aq_getpost.ae_bytesused = len;
8117d11e744SLawrence Stewart 
8127d11e744SLawrence Stewart 	return (&alq->aq_getpost);
8137d11e744SLawrence Stewart }
8147d11e744SLawrence Stewart 
8157d11e744SLawrence Stewart struct ale *
8167d11e744SLawrence Stewart alq_get(struct alq *alq, int flags)
8177d11e744SLawrence Stewart {
8187d11e744SLawrence Stewart 	/* Should only be called in fixed length message (legacy) mode. */
8197d11e744SLawrence Stewart 	KASSERT((alq->aq_flags & AQ_LEGACY),
8207d11e744SLawrence Stewart 	    ("%s: fixed length get on variable length queue", __func__));
8217d11e744SLawrence Stewart 	return (alq_getn(alq, alq->aq_entlen, flags));
8227d11e744SLawrence Stewart }
8237d11e744SLawrence Stewart 
8247d11e744SLawrence Stewart void
8257d11e744SLawrence Stewart alq_post_flags(struct alq *alq, struct ale *ale, int flags)
8267d11e744SLawrence Stewart {
8277d11e744SLawrence Stewart 	int activate;
8287d11e744SLawrence Stewart 	void *waitchan;
8297d11e744SLawrence Stewart 
8307d11e744SLawrence Stewart 	activate = 0;
8317d11e744SLawrence Stewart 
8327d11e744SLawrence Stewart 	if (ale->ae_bytesused > 0) {
8337d11e744SLawrence Stewart 		if (!(alq->aq_flags & AQ_ACTIVE) &&
8347d11e744SLawrence Stewart 		    !(flags & ALQ_NOACTIVATE)) {
8357d11e744SLawrence Stewart 			alq->aq_flags |= AQ_ACTIVE;
8367d11e744SLawrence Stewart 			activate = 1;
8377d11e744SLawrence Stewart 		}
8387d11e744SLawrence Stewart 
8397d11e744SLawrence Stewart 		alq->aq_writehead += ale->ae_bytesused;
8407d11e744SLawrence Stewart 		alq->aq_freebytes -= ale->ae_bytesused;
8417d11e744SLawrence Stewart 
8427d11e744SLawrence Stewart 		/* Wrap aq_writehead if we filled to the end of the buffer. */
8437d11e744SLawrence Stewart 		if (alq->aq_writehead == alq->aq_buflen)
8447d11e744SLawrence Stewart 			alq->aq_writehead = 0;
8457d11e744SLawrence Stewart 
8467d11e744SLawrence Stewart 		KASSERT((alq->aq_writehead >= 0 &&
8477d11e744SLawrence Stewart 		    alq->aq_writehead < alq->aq_buflen),
8487d11e744SLawrence Stewart 		    ("%s: aq_writehead < 0 || aq_writehead >= aq_buflen",
8497d11e744SLawrence Stewart 		    __func__));
8507d11e744SLawrence Stewart 
8517d11e744SLawrence Stewart 		KASSERT((HAS_PENDING_DATA(alq)), ("%s: queue empty!", __func__));
8527d11e744SLawrence Stewart 	}
8537d11e744SLawrence Stewart 
8547d11e744SLawrence Stewart 	/*
8557d11e744SLawrence Stewart 	 * If there are waiters, we need to signal the waiting threads after we
8567d11e744SLawrence Stewart 	 * complete our work. The alq ptr is used as a wait channel for threads
8577d11e744SLawrence Stewart 	 * requiring resources to be freed up. In the AQ_ORDERED case, threads
8587d11e744SLawrence Stewart 	 * are not allowed to concurrently compete for resources in the
8597d11e744SLawrence Stewart 	 * alq_getn() while loop, so we use a different wait channel in this case.
8607d11e744SLawrence Stewart 	 */
8617d11e744SLawrence Stewart 	if (alq->aq_waiters > 0) {
8627d11e744SLawrence Stewart 		if (alq->aq_flags & AQ_ORDERED)
8637d11e744SLawrence Stewart 			waitchan = &alq->aq_waiters;
8647d11e744SLawrence Stewart 		else
8657d11e744SLawrence Stewart 			waitchan = alq;
8667d11e744SLawrence Stewart 	} else
8677d11e744SLawrence Stewart 		waitchan = NULL;
8687d11e744SLawrence Stewart 
8697d11e744SLawrence Stewart 	ALQ_UNLOCK(alq);
8707d11e744SLawrence Stewart 
8717d11e744SLawrence Stewart 	if (activate) {
8727d11e744SLawrence Stewart 		ALD_LOCK();
8737d11e744SLawrence Stewart 		ald_activate(alq);
8747d11e744SLawrence Stewart 		ALD_UNLOCK();
8757d11e744SLawrence Stewart 	}
8767d11e744SLawrence Stewart 
8777d11e744SLawrence Stewart 	/* NB: We rely on wakeup_one waking threads in a FIFO manner. */
8787d11e744SLawrence Stewart 	if (waitchan != NULL)
8797d11e744SLawrence Stewart 		wakeup_one(waitchan);
8809405072aSJeff Roberson }
8819405072aSJeff Roberson 
8829405072aSJeff Roberson void
8839405072aSJeff Roberson alq_flush(struct alq *alq)
8849405072aSJeff Roberson {
8859405072aSJeff Roberson 	int needwakeup = 0;
8869405072aSJeff Roberson 
8879405072aSJeff Roberson 	ALD_LOCK();
8889405072aSJeff Roberson 	ALQ_LOCK(alq);
8897d11e744SLawrence Stewart 
8907d11e744SLawrence Stewart 	/*
8917d11e744SLawrence Stewart 	 * Pull the lever iff there is data to flush and we're
8927d11e744SLawrence Stewart 	 * not already in the middle of a flush operation.
8937d11e744SLawrence Stewart 	 */
8947d11e744SLawrence Stewart 	if (HAS_PENDING_DATA(alq) && !(alq->aq_flags & AQ_FLUSHING)) {
8957d11e744SLawrence Stewart 		if (alq->aq_flags & AQ_ACTIVE)
8969405072aSJeff Roberson 			ald_deactivate(alq);
8977d11e744SLawrence Stewart 
8989405072aSJeff Roberson 		ALD_UNLOCK();
8999405072aSJeff Roberson 		needwakeup = alq_doio(alq);
9009405072aSJeff Roberson 	} else
9019405072aSJeff Roberson 		ALD_UNLOCK();
9027d11e744SLawrence Stewart 
9039405072aSJeff Roberson 	ALQ_UNLOCK(alq);
9049405072aSJeff Roberson 
9059405072aSJeff Roberson 	if (needwakeup)
9067d11e744SLawrence Stewart 		wakeup_one(alq);
9079405072aSJeff Roberson }
9089405072aSJeff Roberson 
9099405072aSJeff Roberson /*
9109405072aSJeff Roberson  * Flush remaining data, close the file and free all resources.
9119405072aSJeff Roberson  */
9129405072aSJeff Roberson void
9139405072aSJeff Roberson alq_close(struct alq *alq)
9149405072aSJeff Roberson {
915c0ea37a8SLawrence Stewart 	/* Only flush and destroy alq if not already shutting down. */
916c0ea37a8SLawrence Stewart 	if (ald_rem(alq) == 0)
917c0ea37a8SLawrence Stewart 		alq_destroy(alq);
9189405072aSJeff Roberson }
919d28f42f9SLawrence Stewart 
920d28f42f9SLawrence Stewart static int
921d28f42f9SLawrence Stewart alq_load_handler(module_t mod, int what, void *arg)
922d28f42f9SLawrence Stewart {
923d28f42f9SLawrence Stewart 	int ret;
924d28f42f9SLawrence Stewart 
925d28f42f9SLawrence Stewart 	ret = 0;
926d28f42f9SLawrence Stewart 
927d28f42f9SLawrence Stewart 	switch (what) {
928d28f42f9SLawrence Stewart 	case MOD_LOAD:
929d28f42f9SLawrence Stewart 	case MOD_SHUTDOWN:
930d28f42f9SLawrence Stewart 		break;
931d28f42f9SLawrence Stewart 
932d28f42f9SLawrence Stewart 	case MOD_QUIESCE:
933d28f42f9SLawrence Stewart 		ALD_LOCK();
934d28f42f9SLawrence Stewart 		/* Only allow unload if there are no open queues. */
935d28f42f9SLawrence Stewart 		if (LIST_FIRST(&ald_queues) == NULL) {
936d28f42f9SLawrence Stewart 			ald_shutingdown = 1;
937d28f42f9SLawrence Stewart 			ALD_UNLOCK();
938d28f42f9SLawrence Stewart 			ald_shutdown(NULL, 0);
939d28f42f9SLawrence Stewart 			mtx_destroy(&ald_mtx);
940d28f42f9SLawrence Stewart 		} else {
941d28f42f9SLawrence Stewart 			ALD_UNLOCK();
942d28f42f9SLawrence Stewart 			ret = EBUSY;
943d28f42f9SLawrence Stewart 		}
944d28f42f9SLawrence Stewart 		break;
945d28f42f9SLawrence Stewart 
946d28f42f9SLawrence Stewart 	case MOD_UNLOAD:
947d28f42f9SLawrence Stewart 		/* If MOD_QUIESCE failed we must fail here too. */
948d28f42f9SLawrence Stewart 		if (ald_shutingdown == 0)
949d28f42f9SLawrence Stewart 			ret = EBUSY;
950d28f42f9SLawrence Stewart 		break;
951d28f42f9SLawrence Stewart 
952d28f42f9SLawrence Stewart 	default:
953d28f42f9SLawrence Stewart 		ret = EINVAL;
954d28f42f9SLawrence Stewart 		break;
955d28f42f9SLawrence Stewart 	}
956d28f42f9SLawrence Stewart 
957d28f42f9SLawrence Stewart 	return (ret);
958d28f42f9SLawrence Stewart }
959d28f42f9SLawrence Stewart 
960d28f42f9SLawrence Stewart static moduledata_t alq_mod =
961d28f42f9SLawrence Stewart {
962d28f42f9SLawrence Stewart 	"alq",
963d28f42f9SLawrence Stewart 	alq_load_handler,
964d28f42f9SLawrence Stewart 	NULL
965d28f42f9SLawrence Stewart };
966d28f42f9SLawrence Stewart 
967d28f42f9SLawrence Stewart DECLARE_MODULE(alq, alq_mod, SI_SUB_SMP, SI_ORDER_ANY);
968d28f42f9SLawrence Stewart MODULE_VERSION(alq, 1);
969