xref: /freebsd/sys/kern/kern_alq.c (revision 47cedcbd7248ce8e2c695abfade9806adb60aa4d)
19454b2d8SWarner Losh /*-
29405072aSJeff Roberson  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3d28f42f9SLawrence Stewart  * Copyright (c) 2008-2009, Lawrence Stewart <lstewart@freebsd.org>
4d28f42f9SLawrence Stewart  * Copyright (c) 2009-2010, The FreeBSD Foundation
59405072aSJeff Roberson  * All rights reserved.
69405072aSJeff Roberson  *
7d28f42f9SLawrence Stewart  * Portions of this software were developed at the Centre for Advanced
8d28f42f9SLawrence Stewart  * Internet Architectures, Swinburne University of Technology, Melbourne,
9d28f42f9SLawrence Stewart  * Australia by Lawrence Stewart under sponsorship from the FreeBSD Foundation.
10d28f42f9SLawrence Stewart  *
119405072aSJeff Roberson  * Redistribution and use in source and binary forms, with or without
129405072aSJeff Roberson  * modification, are permitted provided that the following conditions
139405072aSJeff Roberson  * are met:
149405072aSJeff Roberson  * 1. Redistributions of source code must retain the above copyright
159405072aSJeff Roberson  *    notice unmodified, this list of conditions, and the following
169405072aSJeff Roberson  *    disclaimer.
179405072aSJeff Roberson  * 2. Redistributions in binary form must reproduce the above copyright
189405072aSJeff Roberson  *    notice, this list of conditions and the following disclaimer in the
199405072aSJeff Roberson  *    documentation and/or other materials provided with the distribution.
209405072aSJeff Roberson  *
219405072aSJeff Roberson  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
229405072aSJeff Roberson  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
239405072aSJeff Roberson  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
249405072aSJeff Roberson  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
259405072aSJeff Roberson  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
269405072aSJeff Roberson  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
279405072aSJeff Roberson  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
289405072aSJeff Roberson  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
299405072aSJeff Roberson  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
309405072aSJeff Roberson  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
319405072aSJeff Roberson  */
329405072aSJeff Roberson 
33677b542eSDavid E. O'Brien #include <sys/cdefs.h>
34677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
35677b542eSDavid E. O'Brien 
36d28f42f9SLawrence Stewart #include "opt_mac.h"
37d28f42f9SLawrence Stewart 
389405072aSJeff Roberson #include <sys/param.h>
399405072aSJeff Roberson #include <sys/systm.h>
409405072aSJeff Roberson #include <sys/kernel.h>
419405072aSJeff Roberson #include <sys/kthread.h>
429405072aSJeff Roberson #include <sys/lock.h>
4333f19beeSJohn Baldwin #include <sys/mount.h>
449405072aSJeff Roberson #include <sys/mutex.h>
459405072aSJeff Roberson #include <sys/namei.h>
469405072aSJeff Roberson #include <sys/proc.h>
479405072aSJeff Roberson #include <sys/vnode.h>
489405072aSJeff Roberson #include <sys/alq.h>
499405072aSJeff Roberson #include <sys/malloc.h>
509405072aSJeff Roberson #include <sys/unistd.h>
519405072aSJeff Roberson #include <sys/fcntl.h>
529405072aSJeff Roberson #include <sys/eventhandler.h>
539405072aSJeff Roberson 
54aed55708SRobert Watson #include <security/mac/mac_framework.h>
55aed55708SRobert Watson 
569405072aSJeff Roberson /* Async. Logging Queue */
579405072aSJeff Roberson struct alq {
587d11e744SLawrence Stewart 	char	*aq_entbuf;		/* Buffer for stored entries */
599405072aSJeff Roberson 	int	aq_entmax;		/* Max entries */
609405072aSJeff Roberson 	int	aq_entlen;		/* Entry length */
617d11e744SLawrence Stewart 	int	aq_freebytes;		/* Bytes available in buffer */
627d11e744SLawrence Stewart 	int	aq_buflen;		/* Total length of our buffer */
637d11e744SLawrence Stewart 	int	aq_writehead;		/* Location for next write */
647d11e744SLawrence Stewart 	int	aq_writetail;		/* Flush starts at this location */
657d11e744SLawrence Stewart 	int	aq_wrapearly;		/* # bytes left blank at end of buf */
669405072aSJeff Roberson 	int	aq_flags;		/* Queue flags */
677d11e744SLawrence Stewart 	int	aq_waiters;		/* Num threads waiting for resources
687d11e744SLawrence Stewart 					 * NB: Used as a wait channel so must
697d11e744SLawrence Stewart 					 * not be first field in the alq struct
707d11e744SLawrence Stewart 					 */
717d11e744SLawrence Stewart 	struct	ale	aq_getpost;	/* ALE for use by get/post */
729405072aSJeff Roberson 	struct mtx	aq_mtx;		/* Queue lock */
739405072aSJeff Roberson 	struct vnode	*aq_vp;		/* Open vnode handle */
749e9256e2SJeff Roberson 	struct ucred	*aq_cred;	/* Credentials of the opening thread */
759405072aSJeff Roberson 	LIST_ENTRY(alq)	aq_act;		/* List of active queues */
769405072aSJeff Roberson 	LIST_ENTRY(alq)	aq_link;	/* List of all queues */
779405072aSJeff Roberson };
789405072aSJeff Roberson 
799405072aSJeff Roberson #define	AQ_WANTED	0x0001		/* Wakeup sleeper when io is done */
809405072aSJeff Roberson #define	AQ_ACTIVE	0x0002		/* on the active list */
819405072aSJeff Roberson #define	AQ_FLUSHING	0x0004		/* doing IO */
829405072aSJeff Roberson #define	AQ_SHUTDOWN	0x0008		/* Queue no longer valid */
837d11e744SLawrence Stewart #define	AQ_ORDERED	0x0010		/* Queue enforces ordered writes */
847d11e744SLawrence Stewart #define	AQ_LEGACY	0x0020		/* Legacy queue (fixed length writes) */
859405072aSJeff Roberson 
869405072aSJeff Roberson #define	ALQ_LOCK(alq)	mtx_lock_spin(&(alq)->aq_mtx)
879405072aSJeff Roberson #define	ALQ_UNLOCK(alq)	mtx_unlock_spin(&(alq)->aq_mtx)
889405072aSJeff Roberson 
897d11e744SLawrence Stewart #define HAS_PENDING_DATA(alq) ((alq)->aq_freebytes != (alq)->aq_buflen)
907d11e744SLawrence Stewart 
919405072aSJeff Roberson static MALLOC_DEFINE(M_ALD, "ALD", "ALD");
929405072aSJeff Roberson 
939405072aSJeff Roberson /*
949405072aSJeff Roberson  * The ald_mtx protects the ald_queues list and the ald_active list.
959405072aSJeff Roberson  */
969405072aSJeff Roberson static struct mtx ald_mtx;
979405072aSJeff Roberson static LIST_HEAD(, alq) ald_queues;
989405072aSJeff Roberson static LIST_HEAD(, alq) ald_active;
999405072aSJeff Roberson static int ald_shutingdown = 0;
100a414302fSJeff Roberson struct thread *ald_thread;
101a414302fSJeff Roberson static struct proc *ald_proc;
1027639c9beSLawrence Stewart static eventhandler_tag alq_eventhandler_tag = NULL;
1039405072aSJeff Roberson 
1049405072aSJeff Roberson #define	ALD_LOCK()	mtx_lock(&ald_mtx)
1059405072aSJeff Roberson #define	ALD_UNLOCK()	mtx_unlock(&ald_mtx)
1069405072aSJeff Roberson 
1079405072aSJeff Roberson /* Daemon functions */
1089405072aSJeff Roberson static int ald_add(struct alq *);
1099405072aSJeff Roberson static int ald_rem(struct alq *);
1109405072aSJeff Roberson static void ald_startup(void *);
1119405072aSJeff Roberson static void ald_daemon(void);
1129405072aSJeff Roberson static void ald_shutdown(void *, int);
1139405072aSJeff Roberson static void ald_activate(struct alq *);
1149405072aSJeff Roberson static void ald_deactivate(struct alq *);
1159405072aSJeff Roberson 
1169405072aSJeff Roberson /* Internal queue functions */
1179405072aSJeff Roberson static void alq_shutdown(struct alq *);
118c0ea37a8SLawrence Stewart static void alq_destroy(struct alq *);
1199405072aSJeff Roberson static int alq_doio(struct alq *);
1209405072aSJeff Roberson 
1219405072aSJeff Roberson 
1229405072aSJeff Roberson /*
1239405072aSJeff Roberson  * Add a new queue to the global list.  Fail if we're shutting down.
1249405072aSJeff Roberson  */
1259405072aSJeff Roberson static int
1269405072aSJeff Roberson ald_add(struct alq *alq)
1279405072aSJeff Roberson {
1289405072aSJeff Roberson 	int error;
1299405072aSJeff Roberson 
1309405072aSJeff Roberson 	error = 0;
1319405072aSJeff Roberson 
1329405072aSJeff Roberson 	ALD_LOCK();
1339405072aSJeff Roberson 	if (ald_shutingdown) {
1349405072aSJeff Roberson 		error = EBUSY;
1359405072aSJeff Roberson 		goto done;
1369405072aSJeff Roberson 	}
1379405072aSJeff Roberson 	LIST_INSERT_HEAD(&ald_queues, alq, aq_link);
1389405072aSJeff Roberson done:
1399405072aSJeff Roberson 	ALD_UNLOCK();
1409405072aSJeff Roberson 	return (error);
1419405072aSJeff Roberson }
1429405072aSJeff Roberson 
1439405072aSJeff Roberson /*
1449405072aSJeff Roberson  * Remove a queue from the global list unless we're shutting down.  If so,
1459405072aSJeff Roberson  * the ald will take care of cleaning up it's resources.
1469405072aSJeff Roberson  */
1479405072aSJeff Roberson static int
1489405072aSJeff Roberson ald_rem(struct alq *alq)
1499405072aSJeff Roberson {
1509405072aSJeff Roberson 	int error;
1519405072aSJeff Roberson 
1529405072aSJeff Roberson 	error = 0;
1539405072aSJeff Roberson 
1549405072aSJeff Roberson 	ALD_LOCK();
1559405072aSJeff Roberson 	if (ald_shutingdown) {
1569405072aSJeff Roberson 		error = EBUSY;
1579405072aSJeff Roberson 		goto done;
1589405072aSJeff Roberson 	}
1599405072aSJeff Roberson 	LIST_REMOVE(alq, aq_link);
1609405072aSJeff Roberson done:
1619405072aSJeff Roberson 	ALD_UNLOCK();
1629405072aSJeff Roberson 	return (error);
1639405072aSJeff Roberson }
1649405072aSJeff Roberson 
1659405072aSJeff Roberson /*
1669405072aSJeff Roberson  * Put a queue on the active list.  This will schedule it for writing.
1679405072aSJeff Roberson  */
1689405072aSJeff Roberson static void
1699405072aSJeff Roberson ald_activate(struct alq *alq)
1709405072aSJeff Roberson {
1719405072aSJeff Roberson 	LIST_INSERT_HEAD(&ald_active, alq, aq_act);
1729405072aSJeff Roberson 	wakeup(&ald_active);
1739405072aSJeff Roberson }
1749405072aSJeff Roberson 
1759405072aSJeff Roberson static void
1769405072aSJeff Roberson ald_deactivate(struct alq *alq)
1779405072aSJeff Roberson {
1789405072aSJeff Roberson 	LIST_REMOVE(alq, aq_act);
1799405072aSJeff Roberson 	alq->aq_flags &= ~AQ_ACTIVE;
1809405072aSJeff Roberson }
1819405072aSJeff Roberson 
1829405072aSJeff Roberson static void
1839405072aSJeff Roberson ald_startup(void *unused)
1849405072aSJeff Roberson {
1859405072aSJeff Roberson 	mtx_init(&ald_mtx, "ALDmtx", NULL, MTX_DEF|MTX_QUIET);
1869405072aSJeff Roberson 	LIST_INIT(&ald_queues);
1879405072aSJeff Roberson 	LIST_INIT(&ald_active);
1889405072aSJeff Roberson }
1899405072aSJeff Roberson 
1909405072aSJeff Roberson static void
1919405072aSJeff Roberson ald_daemon(void)
1929405072aSJeff Roberson {
1939405072aSJeff Roberson 	int needwakeup;
1949405072aSJeff Roberson 	struct alq *alq;
1959405072aSJeff Roberson 
196a414302fSJeff Roberson 	ald_thread = FIRST_THREAD_IN_PROC(ald_proc);
197a414302fSJeff Roberson 
1987639c9beSLawrence Stewart 	alq_eventhandler_tag = EVENTHANDLER_REGISTER(shutdown_pre_sync,
1997639c9beSLawrence Stewart 	    ald_shutdown, NULL, SHUTDOWN_PRI_FIRST);
2009405072aSJeff Roberson 
2019405072aSJeff Roberson 	ALD_LOCK();
2029405072aSJeff Roberson 
2039405072aSJeff Roberson 	for (;;) {
204d28f42f9SLawrence Stewart 		while ((alq = LIST_FIRST(&ald_active)) == NULL &&
205d28f42f9SLawrence Stewart 		    !ald_shutingdown)
2069ffad7a9SLawrence Stewart 			mtx_sleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0);
2079405072aSJeff Roberson 
208d28f42f9SLawrence Stewart 		/* Don't shutdown until all active ALQs are flushed. */
209d28f42f9SLawrence Stewart 		if (ald_shutingdown && alq == NULL) {
210d28f42f9SLawrence Stewart 			ALD_UNLOCK();
211d28f42f9SLawrence Stewart 			break;
212d28f42f9SLawrence Stewart 		}
213d28f42f9SLawrence Stewart 
2149405072aSJeff Roberson 		ALQ_LOCK(alq);
2159405072aSJeff Roberson 		ald_deactivate(alq);
2169405072aSJeff Roberson 		ALD_UNLOCK();
2179405072aSJeff Roberson 		needwakeup = alq_doio(alq);
2189405072aSJeff Roberson 		ALQ_UNLOCK(alq);
2199405072aSJeff Roberson 		if (needwakeup)
2207d11e744SLawrence Stewart 			wakeup_one(alq);
2219405072aSJeff Roberson 		ALD_LOCK();
2229405072aSJeff Roberson 	}
223d28f42f9SLawrence Stewart 
224d28f42f9SLawrence Stewart 	kproc_exit(0);
2259405072aSJeff Roberson }
2269405072aSJeff Roberson 
2279405072aSJeff Roberson static void
2289405072aSJeff Roberson ald_shutdown(void *arg, int howto)
2299405072aSJeff Roberson {
2309405072aSJeff Roberson 	struct alq *alq;
2319405072aSJeff Roberson 
2329405072aSJeff Roberson 	ALD_LOCK();
233d28f42f9SLawrence Stewart 
234d28f42f9SLawrence Stewart 	/* Ensure no new queues can be created. */
2359405072aSJeff Roberson 	ald_shutingdown = 1;
2369405072aSJeff Roberson 
237d28f42f9SLawrence Stewart 	/* Shutdown all ALQs prior to terminating the ald_daemon. */
2389405072aSJeff Roberson 	while ((alq = LIST_FIRST(&ald_queues)) != NULL) {
2399405072aSJeff Roberson 		LIST_REMOVE(alq, aq_link);
2409405072aSJeff Roberson 		ALD_UNLOCK();
2419405072aSJeff Roberson 		alq_shutdown(alq);
2429405072aSJeff Roberson 		ALD_LOCK();
2439405072aSJeff Roberson 	}
244d28f42f9SLawrence Stewart 
245d28f42f9SLawrence Stewart 	/* At this point, all ALQs are flushed and shutdown. */
246d28f42f9SLawrence Stewart 
247d28f42f9SLawrence Stewart 	/*
248d28f42f9SLawrence Stewart 	 * Wake ald_daemon so that it exits. It won't be able to do
2499ffad7a9SLawrence Stewart 	 * anything until we mtx_sleep because we hold the ald_mtx.
250d28f42f9SLawrence Stewart 	 */
251d28f42f9SLawrence Stewart 	wakeup(&ald_active);
252d28f42f9SLawrence Stewart 
253d28f42f9SLawrence Stewart 	/* Wait for ald_daemon to exit. */
2549ffad7a9SLawrence Stewart 	mtx_sleep(ald_proc, &ald_mtx, PWAIT, "aldslp", 0);
255d28f42f9SLawrence Stewart 
2569405072aSJeff Roberson 	ALD_UNLOCK();
2579405072aSJeff Roberson }
2589405072aSJeff Roberson 
2599405072aSJeff Roberson static void
2609405072aSJeff Roberson alq_shutdown(struct alq *alq)
2619405072aSJeff Roberson {
2629405072aSJeff Roberson 	ALQ_LOCK(alq);
2639405072aSJeff Roberson 
2649405072aSJeff Roberson 	/* Stop any new writers. */
2659405072aSJeff Roberson 	alq->aq_flags |= AQ_SHUTDOWN;
2669405072aSJeff Roberson 
2677d11e744SLawrence Stewart 	/*
2687d11e744SLawrence Stewart 	 * If the ALQ isn't active but has unwritten data (possible if
2697d11e744SLawrence Stewart 	 * the ALQ_NOACTIVATE flag has been used), explicitly activate the
2707d11e744SLawrence Stewart 	 * ALQ here so that the pending data gets flushed by the ald_daemon.
2717d11e744SLawrence Stewart 	 */
2727d11e744SLawrence Stewart 	if (!(alq->aq_flags & AQ_ACTIVE) && HAS_PENDING_DATA(alq)) {
2737d11e744SLawrence Stewart 		alq->aq_flags |= AQ_ACTIVE;
2747d11e744SLawrence Stewart 		ALQ_UNLOCK(alq);
2757d11e744SLawrence Stewart 		ALD_LOCK();
2767d11e744SLawrence Stewart 		ald_activate(alq);
2777d11e744SLawrence Stewart 		ALD_UNLOCK();
2787d11e744SLawrence Stewart 		ALQ_LOCK(alq);
2797d11e744SLawrence Stewart 	}
2807d11e744SLawrence Stewart 
2819405072aSJeff Roberson 	/* Drain IO */
28297c11ef2SLawrence Stewart 	while (alq->aq_flags & AQ_ACTIVE) {
2839405072aSJeff Roberson 		alq->aq_flags |= AQ_WANTED;
284bff2d4d5SRoman Divacky 		msleep_spin(alq, &alq->aq_mtx, "aldclose", 0);
2859405072aSJeff Roberson 	}
2869405072aSJeff Roberson 	ALQ_UNLOCK(alq);
2879405072aSJeff Roberson 
288a414302fSJeff Roberson 	vn_close(alq->aq_vp, FWRITE, alq->aq_cred,
2899e9256e2SJeff Roberson 	    curthread);
2909e9256e2SJeff Roberson 	crfree(alq->aq_cred);
2919405072aSJeff Roberson }
2929405072aSJeff Roberson 
293c0ea37a8SLawrence Stewart void
294c0ea37a8SLawrence Stewart alq_destroy(struct alq *alq)
295c0ea37a8SLawrence Stewart {
296c0ea37a8SLawrence Stewart 	/* Drain all pending IO. */
297c0ea37a8SLawrence Stewart 	alq_shutdown(alq);
298c0ea37a8SLawrence Stewart 
299c0ea37a8SLawrence Stewart 	mtx_destroy(&alq->aq_mtx);
300c0ea37a8SLawrence Stewart 	free(alq->aq_entbuf, M_ALD);
301c0ea37a8SLawrence Stewart 	free(alq, M_ALD);
302c0ea37a8SLawrence Stewart }
303c0ea37a8SLawrence Stewart 
3049405072aSJeff Roberson /*
3059405072aSJeff Roberson  * Flush all pending data to disk.  This operation will block.
3069405072aSJeff Roberson  */
3079405072aSJeff Roberson static int
3089405072aSJeff Roberson alq_doio(struct alq *alq)
3099405072aSJeff Roberson {
3109405072aSJeff Roberson 	struct thread *td;
3119405072aSJeff Roberson 	struct mount *mp;
3129405072aSJeff Roberson 	struct vnode *vp;
3139405072aSJeff Roberson 	struct uio auio;
3149405072aSJeff Roberson 	struct iovec aiov[2];
3159405072aSJeff Roberson 	int totlen;
3169405072aSJeff Roberson 	int iov;
3177d11e744SLawrence Stewart 	int wrapearly;
3187d11e744SLawrence Stewart 
3197d11e744SLawrence Stewart 	KASSERT((HAS_PENDING_DATA(alq)), ("%s: queue empty!", __func__));
3209405072aSJeff Roberson 
3219405072aSJeff Roberson 	vp = alq->aq_vp;
3229405072aSJeff Roberson 	td = curthread;
3239405072aSJeff Roberson 	totlen = 0;
3247d11e744SLawrence Stewart 	iov = 1;
3257d11e744SLawrence Stewart 	wrapearly = alq->aq_wrapearly;
3269405072aSJeff Roberson 
3279405072aSJeff Roberson 	bzero(&aiov, sizeof(aiov));
3289405072aSJeff Roberson 	bzero(&auio, sizeof(auio));
3299405072aSJeff Roberson 
3307d11e744SLawrence Stewart 	/* Start the write from the location of our buffer tail pointer. */
3317d11e744SLawrence Stewart 	aiov[0].iov_base = alq->aq_entbuf + alq->aq_writetail;
3327d11e744SLawrence Stewart 
3337d11e744SLawrence Stewart 	if (alq->aq_writetail < alq->aq_writehead) {
3347d11e744SLawrence Stewart 		/* Buffer not wrapped. */
3357d11e744SLawrence Stewart 		totlen = aiov[0].iov_len = alq->aq_writehead - alq->aq_writetail;
3367d11e744SLawrence Stewart 	} else if (alq->aq_writehead == 0) {
3377d11e744SLawrence Stewart 		/* Buffer not wrapped (special case to avoid an empty iov). */
3387d11e744SLawrence Stewart 		totlen = aiov[0].iov_len = alq->aq_buflen - alq->aq_writetail -
3397d11e744SLawrence Stewart 		    wrapearly;
3407d11e744SLawrence Stewart 	} else {
3417d11e744SLawrence Stewart 		/*
3427d11e744SLawrence Stewart 		 * Buffer wrapped, requires 2 aiov entries:
3437d11e744SLawrence Stewart 		 * - first is from writetail to end of buffer
3447d11e744SLawrence Stewart 		 * - second is from start of buffer to writehead
3457d11e744SLawrence Stewart 		 */
3467d11e744SLawrence Stewart 		aiov[0].iov_len = alq->aq_buflen - alq->aq_writetail -
3477d11e744SLawrence Stewart 		    wrapearly;
3489405072aSJeff Roberson 		iov++;
3497d11e744SLawrence Stewart 		aiov[1].iov_base = alq->aq_entbuf;
3507d11e744SLawrence Stewart 		aiov[1].iov_len =  alq->aq_writehead;
3517d11e744SLawrence Stewart 		totlen = aiov[0].iov_len + aiov[1].iov_len;
3527d11e744SLawrence Stewart 	}
3539405072aSJeff Roberson 
3549405072aSJeff Roberson 	alq->aq_flags |= AQ_FLUSHING;
3559405072aSJeff Roberson 	ALQ_UNLOCK(alq);
3569405072aSJeff Roberson 
3579405072aSJeff Roberson 	auio.uio_iov = &aiov[0];
3589405072aSJeff Roberson 	auio.uio_offset = 0;
3599405072aSJeff Roberson 	auio.uio_segflg = UIO_SYSSPACE;
3609405072aSJeff Roberson 	auio.uio_rw = UIO_WRITE;
3617d11e744SLawrence Stewart 	auio.uio_iovcnt = iov;
3629405072aSJeff Roberson 	auio.uio_resid = totlen;
3639405072aSJeff Roberson 	auio.uio_td = td;
3649405072aSJeff Roberson 
3659405072aSJeff Roberson 	/*
3669405072aSJeff Roberson 	 * Do all of the junk required to write now.
3679405072aSJeff Roberson 	 */
3689405072aSJeff Roberson 	vn_start_write(vp, &mp, V_WAIT);
369cb05b60aSAttilio Rao 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
37067536f03SRobert Watson 	/*
37167536f03SRobert Watson 	 * XXX: VOP_WRITE error checks are ignored.
37267536f03SRobert Watson 	 */
37367536f03SRobert Watson #ifdef MAC
37430d239bcSRobert Watson 	if (mac_vnode_check_write(alq->aq_cred, NOCRED, vp) == 0)
37567536f03SRobert Watson #endif
3769e9256e2SJeff Roberson 		VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, alq->aq_cred);
37722db15c0SAttilio Rao 	VOP_UNLOCK(vp, 0);
3789405072aSJeff Roberson 	vn_finished_write(mp);
3799405072aSJeff Roberson 
3809405072aSJeff Roberson 	ALQ_LOCK(alq);
3819405072aSJeff Roberson 	alq->aq_flags &= ~AQ_FLUSHING;
3829405072aSJeff Roberson 
3837d11e744SLawrence Stewart 	/* Adjust writetail as required, taking into account wrapping. */
3847d11e744SLawrence Stewart 	alq->aq_writetail = (alq->aq_writetail + totlen + wrapearly) %
3857d11e744SLawrence Stewart 	    alq->aq_buflen;
3867d11e744SLawrence Stewart 	alq->aq_freebytes += totlen + wrapearly;
3877d11e744SLawrence Stewart 
3887d11e744SLawrence Stewart 	/*
3897d11e744SLawrence Stewart 	 * If we just flushed part of the buffer which wrapped, reset the
3907d11e744SLawrence Stewart 	 * wrapearly indicator.
3917d11e744SLawrence Stewart 	 */
3927d11e744SLawrence Stewart 	if (wrapearly)
3937d11e744SLawrence Stewart 		alq->aq_wrapearly = 0;
3947d11e744SLawrence Stewart 
3957d11e744SLawrence Stewart 	/*
3967d11e744SLawrence Stewart 	 * If we just flushed the buffer completely, reset indexes to 0 to
3977d11e744SLawrence Stewart 	 * minimise buffer wraps.
3987d11e744SLawrence Stewart 	 * This is also required to ensure alq_getn() can't wedge itself.
3997d11e744SLawrence Stewart 	 */
4007d11e744SLawrence Stewart 	if (!HAS_PENDING_DATA(alq))
4017d11e744SLawrence Stewart 		alq->aq_writehead = alq->aq_writetail = 0;
4027d11e744SLawrence Stewart 
4037d11e744SLawrence Stewart 	KASSERT((alq->aq_writetail >= 0 && alq->aq_writetail < alq->aq_buflen),
4047d11e744SLawrence Stewart 	    ("%s: aq_writetail < 0 || aq_writetail >= aq_buflen", __func__));
4059405072aSJeff Roberson 
4069405072aSJeff Roberson 	if (alq->aq_flags & AQ_WANTED) {
4079405072aSJeff Roberson 		alq->aq_flags &= ~AQ_WANTED;
4089405072aSJeff Roberson 		return (1);
4099405072aSJeff Roberson 	}
4109405072aSJeff Roberson 
4119405072aSJeff Roberson 	return(0);
4129405072aSJeff Roberson }
4139405072aSJeff Roberson 
4149405072aSJeff Roberson static struct kproc_desc ald_kp = {
4159405072aSJeff Roberson         "ALQ Daemon",
4169405072aSJeff Roberson         ald_daemon,
417a414302fSJeff Roberson         &ald_proc
4189405072aSJeff Roberson };
4199405072aSJeff Roberson 
420237fdd78SRobert Watson SYSINIT(aldthread, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &ald_kp);
421237fdd78SRobert Watson SYSINIT(ald, SI_SUB_LOCK, SI_ORDER_ANY, ald_startup, NULL);
4229405072aSJeff Roberson 
4239405072aSJeff Roberson 
4249405072aSJeff Roberson /* User visible queue functions */
4259405072aSJeff Roberson 
4269405072aSJeff Roberson /*
4279405072aSJeff Roberson  * Create the queue data structure, allocate the buffer, and open the file.
4289405072aSJeff Roberson  */
4297d11e744SLawrence Stewart 
4309405072aSJeff Roberson int
4317d11e744SLawrence Stewart alq_open_flags(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
4327d11e744SLawrence Stewart     int size, int flags)
4339405072aSJeff Roberson {
4349405072aSJeff Roberson 	struct thread *td;
4359405072aSJeff Roberson 	struct nameidata nd;
4369405072aSJeff Roberson 	struct alq *alq;
4377d11e744SLawrence Stewart 	int oflags;
4389405072aSJeff Roberson 	int error;
4397d11e744SLawrence Stewart 
4407d11e744SLawrence Stewart 	KASSERT((size > 0), ("%s: size <= 0", __func__));
4419405072aSJeff Roberson 
4429405072aSJeff Roberson 	*alqp = NULL;
4439405072aSJeff Roberson 	td = curthread;
4449405072aSJeff Roberson 
4455050aa86SKonstantin Belousov 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, file, td);
4467d11e744SLawrence Stewart 	oflags = FWRITE | O_NOFOLLOW | O_CREAT;
4479405072aSJeff Roberson 
4487d11e744SLawrence Stewart 	error = vn_open_cred(&nd, &oflags, cmode, 0, cred, NULL);
4499405072aSJeff Roberson 	if (error)
4509405072aSJeff Roberson 		return (error);
4519405072aSJeff Roberson 
452f220f7afSPawel Jakub Dawidek 	NDFREE(&nd, NDF_ONLY_PNBUF);
4539405072aSJeff Roberson 	/* We just unlock so we hold a reference */
45422db15c0SAttilio Rao 	VOP_UNLOCK(nd.ni_vp, 0);
4559405072aSJeff Roberson 
456a163d034SWarner Losh 	alq = malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO);
4579405072aSJeff Roberson 	alq->aq_vp = nd.ni_vp;
4584b090e41SRobert Watson 	alq->aq_cred = crhold(cred);
4599405072aSJeff Roberson 
4609405072aSJeff Roberson 	mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET);
4619405072aSJeff Roberson 
4627d11e744SLawrence Stewart 	alq->aq_buflen = size;
4637d11e744SLawrence Stewart 	alq->aq_entmax = 0;
4647d11e744SLawrence Stewart 	alq->aq_entlen = 0;
4659405072aSJeff Roberson 
4667d11e744SLawrence Stewart 	alq->aq_freebytes = alq->aq_buflen;
4677d11e744SLawrence Stewart 	alq->aq_entbuf = malloc(alq->aq_buflen, M_ALD, M_WAITOK|M_ZERO);
4687d11e744SLawrence Stewart 	alq->aq_writehead = alq->aq_writetail = 0;
4697d11e744SLawrence Stewart 	if (flags & ALQ_ORDERED)
4707d11e744SLawrence Stewart 		alq->aq_flags |= AQ_ORDERED;
4719405072aSJeff Roberson 
472c0ea37a8SLawrence Stewart 	if ((error = ald_add(alq)) != 0) {
473c0ea37a8SLawrence Stewart 		alq_destroy(alq);
4749405072aSJeff Roberson 		return (error);
475c0ea37a8SLawrence Stewart 	}
476c0ea37a8SLawrence Stewart 
4779405072aSJeff Roberson 	*alqp = alq;
4789405072aSJeff Roberson 
4799405072aSJeff Roberson 	return (0);
4809405072aSJeff Roberson }
4819405072aSJeff Roberson 
4827d11e744SLawrence Stewart int
4837d11e744SLawrence Stewart alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
4847d11e744SLawrence Stewart     int size, int count)
4857d11e744SLawrence Stewart {
4867d11e744SLawrence Stewart 	int ret;
4877d11e744SLawrence Stewart 
4887d11e744SLawrence Stewart 	KASSERT((count >= 0), ("%s: count < 0", __func__));
4897d11e744SLawrence Stewart 
4907d11e744SLawrence Stewart 	if (count > 0) {
4916b57eff4SDmitry Chagin 		if ((ret = alq_open_flags(alqp, file, cred, cmode,
4926b57eff4SDmitry Chagin 		    size*count, 0)) == 0) {
4937d11e744SLawrence Stewart 			(*alqp)->aq_flags |= AQ_LEGACY;
4947d11e744SLawrence Stewart 			(*alqp)->aq_entmax = count;
4957d11e744SLawrence Stewart 			(*alqp)->aq_entlen = size;
4966b57eff4SDmitry Chagin 		}
4977d11e744SLawrence Stewart 	} else
4987d11e744SLawrence Stewart 		ret = alq_open_flags(alqp, file, cred, cmode, size, 0);
4997d11e744SLawrence Stewart 
5007d11e744SLawrence Stewart 	return (ret);
5017d11e744SLawrence Stewart }
5027d11e744SLawrence Stewart 
5037d11e744SLawrence Stewart 
5049405072aSJeff Roberson /*
5059405072aSJeff Roberson  * Copy a new entry into the queue.  If the operation would block either
5069405072aSJeff Roberson  * wait or return an error depending on the value of waitok.
5079405072aSJeff Roberson  */
5089405072aSJeff Roberson int
5097d11e744SLawrence Stewart alq_writen(struct alq *alq, void *data, int len, int flags)
5109405072aSJeff Roberson {
5117d11e744SLawrence Stewart 	int activate, copy, ret;
5127d11e744SLawrence Stewart 	void *waitchan;
5139405072aSJeff Roberson 
5147d11e744SLawrence Stewart 	KASSERT((len > 0 && len <= alq->aq_buflen),
5157d11e744SLawrence Stewart 	    ("%s: len <= 0 || len > aq_buflen", __func__));
5169405072aSJeff Roberson 
5177d11e744SLawrence Stewart 	activate = ret = 0;
5187d11e744SLawrence Stewart 	copy = len;
5197d11e744SLawrence Stewart 	waitchan = NULL;
5209405072aSJeff Roberson 
5219405072aSJeff Roberson 	ALQ_LOCK(alq);
5229405072aSJeff Roberson 
5237d11e744SLawrence Stewart 	/*
5247d11e744SLawrence Stewart 	 * Fail to perform the write and return EWOULDBLOCK if:
5257d11e744SLawrence Stewart 	 * - The message is larger than our underlying buffer.
5267d11e744SLawrence Stewart 	 * - The ALQ is being shutdown.
5277d11e744SLawrence Stewart 	 * - There is insufficient free space in our underlying buffer
5287d11e744SLawrence Stewart 	 *   to accept the message and the user can't wait for space.
5297d11e744SLawrence Stewart 	 * - There is insufficient free space in our underlying buffer
5307d11e744SLawrence Stewart 	 *   to accept the message and the alq is inactive due to prior
5317d11e744SLawrence Stewart 	 *   use of the ALQ_NOACTIVATE flag (which would lead to deadlock).
5327d11e744SLawrence Stewart 	 */
5337d11e744SLawrence Stewart 	if (len > alq->aq_buflen ||
5347d11e744SLawrence Stewart 	    alq->aq_flags & AQ_SHUTDOWN ||
5357d11e744SLawrence Stewart 	    (((flags & ALQ_NOWAIT) || (!(alq->aq_flags & AQ_ACTIVE) &&
5367d11e744SLawrence Stewart 	    HAS_PENDING_DATA(alq))) && alq->aq_freebytes < len)) {
5379405072aSJeff Roberson 		ALQ_UNLOCK(alq);
5387d11e744SLawrence Stewart 		return (EWOULDBLOCK);
5399405072aSJeff Roberson 	}
5409405072aSJeff Roberson 
5417d11e744SLawrence Stewart 	/*
5427d11e744SLawrence Stewart 	 * If we want ordered writes and there is already at least one thread
5437d11e744SLawrence Stewart 	 * waiting for resources to become available, sleep until we're woken.
5447d11e744SLawrence Stewart 	 */
5457d11e744SLawrence Stewart 	if (alq->aq_flags & AQ_ORDERED && alq->aq_waiters > 0) {
5467d11e744SLawrence Stewart 		KASSERT(!(flags & ALQ_NOWAIT),
5477d11e744SLawrence Stewart 		    ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__));
5487d11e744SLawrence Stewart 		alq->aq_waiters++;
5497d11e744SLawrence Stewart 		msleep_spin(&alq->aq_waiters, &alq->aq_mtx, "alqwnord", 0);
5507d11e744SLawrence Stewart 		alq->aq_waiters--;
5517d11e744SLawrence Stewart 	}
5529405072aSJeff Roberson 
5537d11e744SLawrence Stewart 	/*
5547d11e744SLawrence Stewart 	 * (ALQ_WAITOK && aq_freebytes < len) or aq_freebytes >= len, either
5557d11e744SLawrence Stewart 	 * enter while loop and sleep until we have enough free bytes (former)
5567d11e744SLawrence Stewart 	 * or skip (latter). If AQ_ORDERED is set, only 1 thread at a time will
5577d11e744SLawrence Stewart 	 * be in this loop. Otherwise, multiple threads may be sleeping here
5587d11e744SLawrence Stewart 	 * competing for ALQ resources.
5597d11e744SLawrence Stewart 	 */
5607d11e744SLawrence Stewart 	while (alq->aq_freebytes < len && !(alq->aq_flags & AQ_SHUTDOWN)) {
5617d11e744SLawrence Stewart 		KASSERT(!(flags & ALQ_NOWAIT),
5627d11e744SLawrence Stewart 		    ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__));
5637d11e744SLawrence Stewart 		alq->aq_flags |= AQ_WANTED;
5647d11e744SLawrence Stewart 		alq->aq_waiters++;
5657d11e744SLawrence Stewart 		if (waitchan)
5667d11e744SLawrence Stewart 			wakeup(waitchan);
5677d11e744SLawrence Stewart 		msleep_spin(alq, &alq->aq_mtx, "alqwnres", 0);
5687d11e744SLawrence Stewart 		alq->aq_waiters--;
5699405072aSJeff Roberson 
5707d11e744SLawrence Stewart 		/*
5717d11e744SLawrence Stewart 		 * If we're the first thread to wake after an AQ_WANTED wakeup
5727d11e744SLawrence Stewart 		 * but there isn't enough free space for us, we're going to loop
5737d11e744SLawrence Stewart 		 * and sleep again. If there are other threads waiting in this
5747d11e744SLawrence Stewart 		 * loop, schedule a wakeup so that they can see if the space
5757d11e744SLawrence Stewart 		 * they require is available.
5767d11e744SLawrence Stewart 		 */
5777d11e744SLawrence Stewart 		if (alq->aq_waiters > 0 && !(alq->aq_flags & AQ_ORDERED) &&
5787d11e744SLawrence Stewart 		    alq->aq_freebytes < len && !(alq->aq_flags & AQ_WANTED))
5797d11e744SLawrence Stewart 			waitchan = alq;
5807d11e744SLawrence Stewart 		else
5817d11e744SLawrence Stewart 			waitchan = NULL;
5827d11e744SLawrence Stewart 	}
5839405072aSJeff Roberson 
5847d11e744SLawrence Stewart 	/*
5857d11e744SLawrence Stewart 	 * If there are waiters, we need to signal the waiting threads after we
5867d11e744SLawrence Stewart 	 * complete our work. The alq ptr is used as a wait channel for threads
5877d11e744SLawrence Stewart 	 * requiring resources to be freed up. In the AQ_ORDERED case, threads
5887d11e744SLawrence Stewart 	 * are not allowed to concurrently compete for resources in the above
5897d11e744SLawrence Stewart 	 * while loop, so we use a different wait channel in this case.
5907d11e744SLawrence Stewart 	 */
5917d11e744SLawrence Stewart 	if (alq->aq_waiters > 0) {
5927d11e744SLawrence Stewart 		if (alq->aq_flags & AQ_ORDERED)
5937d11e744SLawrence Stewart 			waitchan = &alq->aq_waiters;
5947d11e744SLawrence Stewart 		else
5957d11e744SLawrence Stewart 			waitchan = alq;
5967d11e744SLawrence Stewart 	} else
5977d11e744SLawrence Stewart 		waitchan = NULL;
5987d11e744SLawrence Stewart 
5997d11e744SLawrence Stewart 	/* Bail if we're shutting down. */
6007d11e744SLawrence Stewart 	if (alq->aq_flags & AQ_SHUTDOWN) {
6017d11e744SLawrence Stewart 		ret = EWOULDBLOCK;
6027d11e744SLawrence Stewart 		goto unlock;
6037d11e744SLawrence Stewart 	}
6047d11e744SLawrence Stewart 
6057d11e744SLawrence Stewart 	/*
6067d11e744SLawrence Stewart 	 * If we need to wrap the buffer to accommodate the write,
6077d11e744SLawrence Stewart 	 * we'll need 2 calls to bcopy.
6087d11e744SLawrence Stewart 	 */
6097d11e744SLawrence Stewart 	if ((alq->aq_buflen - alq->aq_writehead) < len)
6107d11e744SLawrence Stewart 		copy = alq->aq_buflen - alq->aq_writehead;
6117d11e744SLawrence Stewart 
6127d11e744SLawrence Stewart 	/* Copy message (or part thereof if wrap required) to the buffer. */
6137d11e744SLawrence Stewart 	bcopy(data, alq->aq_entbuf + alq->aq_writehead, copy);
6147d11e744SLawrence Stewart 	alq->aq_writehead += copy;
6157d11e744SLawrence Stewart 
6167d11e744SLawrence Stewart 	if (alq->aq_writehead >= alq->aq_buflen) {
6177d11e744SLawrence Stewart 		KASSERT((alq->aq_writehead == alq->aq_buflen),
6187d11e744SLawrence Stewart 		    ("%s: alq->aq_writehead (%d) > alq->aq_buflen (%d)",
6197d11e744SLawrence Stewart 		    __func__,
6207d11e744SLawrence Stewart 		    alq->aq_writehead,
6217d11e744SLawrence Stewart 		    alq->aq_buflen));
6227d11e744SLawrence Stewart 		alq->aq_writehead = 0;
6237d11e744SLawrence Stewart 	}
6247d11e744SLawrence Stewart 
6257d11e744SLawrence Stewart 	if (copy != len) {
6267d11e744SLawrence Stewart 		/*
6277d11e744SLawrence Stewart 		 * Wrap the buffer by copying the remainder of our message
6287d11e744SLawrence Stewart 		 * to the start of the buffer and resetting aq_writehead.
6297d11e744SLawrence Stewart 		 */
6307d11e744SLawrence Stewart 		bcopy(((uint8_t *)data)+copy, alq->aq_entbuf, len - copy);
6317d11e744SLawrence Stewart 		alq->aq_writehead = len - copy;
6327d11e744SLawrence Stewart 	}
6337d11e744SLawrence Stewart 
6347d11e744SLawrence Stewart 	KASSERT((alq->aq_writehead >= 0 && alq->aq_writehead < alq->aq_buflen),
6357d11e744SLawrence Stewart 	    ("%s: aq_writehead < 0 || aq_writehead >= aq_buflen", __func__));
6367d11e744SLawrence Stewart 
6377d11e744SLawrence Stewart 	alq->aq_freebytes -= len;
6387d11e744SLawrence Stewart 
6397d11e744SLawrence Stewart 	if (!(alq->aq_flags & AQ_ACTIVE) && !(flags & ALQ_NOACTIVATE)) {
6409405072aSJeff Roberson 		alq->aq_flags |= AQ_ACTIVE;
6419405072aSJeff Roberson 		activate = 1;
6427d11e744SLawrence Stewart 	}
6439405072aSJeff Roberson 
6447d11e744SLawrence Stewart 	KASSERT((HAS_PENDING_DATA(alq)), ("%s: queue empty!", __func__));
6457d11e744SLawrence Stewart 
6467d11e744SLawrence Stewart unlock:
6479405072aSJeff Roberson 	ALQ_UNLOCK(alq);
6487d11e744SLawrence Stewart 
6499405072aSJeff Roberson 	if (activate) {
6509405072aSJeff Roberson 		ALD_LOCK();
6519405072aSJeff Roberson 		ald_activate(alq);
6529405072aSJeff Roberson 		ALD_UNLOCK();
6539405072aSJeff Roberson 	}
6547d11e744SLawrence Stewart 
6557d11e744SLawrence Stewart 	/* NB: We rely on wakeup_one waking threads in a FIFO manner. */
6567d11e744SLawrence Stewart 	if (waitchan != NULL)
6577d11e744SLawrence Stewart 		wakeup_one(waitchan);
6587d11e744SLawrence Stewart 
6597d11e744SLawrence Stewart 	return (ret);
6607d11e744SLawrence Stewart }
6617d11e744SLawrence Stewart 
6627d11e744SLawrence Stewart int
6637d11e744SLawrence Stewart alq_write(struct alq *alq, void *data, int flags)
6647d11e744SLawrence Stewart {
6657d11e744SLawrence Stewart 	/* Should only be called in fixed length message (legacy) mode. */
6667d11e744SLawrence Stewart 	KASSERT((alq->aq_flags & AQ_LEGACY),
6677d11e744SLawrence Stewart 	    ("%s: fixed length write on variable length queue", __func__));
6687d11e744SLawrence Stewart 	return (alq_writen(alq, data, alq->aq_entlen, flags));
6697d11e744SLawrence Stewart }
6707d11e744SLawrence Stewart 
6717d11e744SLawrence Stewart /*
6727d11e744SLawrence Stewart  * Retrieve a pointer for the ALQ to write directly into, avoiding bcopy.
6737d11e744SLawrence Stewart  */
6747d11e744SLawrence Stewart struct ale *
6757d11e744SLawrence Stewart alq_getn(struct alq *alq, int len, int flags)
6767d11e744SLawrence Stewart {
6777d11e744SLawrence Stewart 	int contigbytes;
6787d11e744SLawrence Stewart 	void *waitchan;
6797d11e744SLawrence Stewart 
6807d11e744SLawrence Stewart 	KASSERT((len > 0 && len <= alq->aq_buflen),
6817d11e744SLawrence Stewart 	    ("%s: len <= 0 || len > alq->aq_buflen", __func__));
6827d11e744SLawrence Stewart 
6837d11e744SLawrence Stewart 	waitchan = NULL;
6847d11e744SLawrence Stewart 
6857d11e744SLawrence Stewart 	ALQ_LOCK(alq);
6867d11e744SLawrence Stewart 
6877d11e744SLawrence Stewart 	/*
6887d11e744SLawrence Stewart 	 * Determine the number of free contiguous bytes.
6897d11e744SLawrence Stewart 	 * We ensure elsewhere that if aq_writehead == aq_writetail because
6907d11e744SLawrence Stewart 	 * the buffer is empty, they will both be set to 0 and therefore
6917d11e744SLawrence Stewart 	 * aq_freebytes == aq_buflen and is fully contiguous.
6927d11e744SLawrence Stewart 	 * If they are equal and the buffer is not empty, aq_freebytes will
6937d11e744SLawrence Stewart 	 * be 0 indicating the buffer is full.
6947d11e744SLawrence Stewart 	 */
6957d11e744SLawrence Stewart 	if (alq->aq_writehead <= alq->aq_writetail)
6967d11e744SLawrence Stewart 		contigbytes = alq->aq_freebytes;
6977d11e744SLawrence Stewart 	else {
6987d11e744SLawrence Stewart 		contigbytes = alq->aq_buflen - alq->aq_writehead;
6997d11e744SLawrence Stewart 
7007d11e744SLawrence Stewart 		if (contigbytes < len) {
7017d11e744SLawrence Stewart 			/*
7027d11e744SLawrence Stewart 			 * Insufficient space at end of buffer to handle a
7037d11e744SLawrence Stewart 			 * contiguous write. Wrap early if there's space at
7047d11e744SLawrence Stewart 			 * the beginning. This will leave a hole at the end
7057d11e744SLawrence Stewart 			 * of the buffer which we will have to skip over when
7067d11e744SLawrence Stewart 			 * flushing the buffer to disk.
7077d11e744SLawrence Stewart 			 */
7087d11e744SLawrence Stewart 			if (alq->aq_writetail >= len || flags & ALQ_WAITOK) {
7097d11e744SLawrence Stewart 				/* Keep track of # bytes left blank. */
7107d11e744SLawrence Stewart 				alq->aq_wrapearly = contigbytes;
7117d11e744SLawrence Stewart 				/* Do the wrap and adjust counters. */
7127d11e744SLawrence Stewart 				contigbytes = alq->aq_freebytes =
7137d11e744SLawrence Stewart 				    alq->aq_writetail;
7147d11e744SLawrence Stewart 				alq->aq_writehead = 0;
7157d11e744SLawrence Stewart 			}
7167d11e744SLawrence Stewart 		}
7177d11e744SLawrence Stewart 	}
7187d11e744SLawrence Stewart 
7197d11e744SLawrence Stewart 	/*
7207d11e744SLawrence Stewart 	 * Return a NULL ALE if:
7217d11e744SLawrence Stewart 	 * - The message is larger than our underlying buffer.
7227d11e744SLawrence Stewart 	 * - The ALQ is being shutdown.
7237d11e744SLawrence Stewart 	 * - There is insufficient free space in our underlying buffer
7247d11e744SLawrence Stewart 	 *   to accept the message and the user can't wait for space.
7257d11e744SLawrence Stewart 	 * - There is insufficient free space in our underlying buffer
7267d11e744SLawrence Stewart 	 *   to accept the message and the alq is inactive due to prior
7277d11e744SLawrence Stewart 	 *   use of the ALQ_NOACTIVATE flag (which would lead to deadlock).
7287d11e744SLawrence Stewart 	 */
7297d11e744SLawrence Stewart 	if (len > alq->aq_buflen ||
7307d11e744SLawrence Stewart 	    alq->aq_flags & AQ_SHUTDOWN ||
7317d11e744SLawrence Stewart 	    (((flags & ALQ_NOWAIT) || (!(alq->aq_flags & AQ_ACTIVE) &&
7327d11e744SLawrence Stewart 	    HAS_PENDING_DATA(alq))) && contigbytes < len)) {
7337d11e744SLawrence Stewart 		ALQ_UNLOCK(alq);
7347d11e744SLawrence Stewart 		return (NULL);
7357d11e744SLawrence Stewart 	}
7367d11e744SLawrence Stewart 
7377d11e744SLawrence Stewart 	/*
7387d11e744SLawrence Stewart 	 * If we want ordered writes and there is already at least one thread
7397d11e744SLawrence Stewart 	 * waiting for resources to become available, sleep until we're woken.
7407d11e744SLawrence Stewart 	 */
7417d11e744SLawrence Stewart 	if (alq->aq_flags & AQ_ORDERED && alq->aq_waiters > 0) {
7427d11e744SLawrence Stewart 		KASSERT(!(flags & ALQ_NOWAIT),
7437d11e744SLawrence Stewart 		    ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__));
7447d11e744SLawrence Stewart 		alq->aq_waiters++;
7457d11e744SLawrence Stewart 		msleep_spin(&alq->aq_waiters, &alq->aq_mtx, "alqgnord", 0);
7467d11e744SLawrence Stewart 		alq->aq_waiters--;
7477d11e744SLawrence Stewart 	}
7487d11e744SLawrence Stewart 
7497d11e744SLawrence Stewart 	/*
7507d11e744SLawrence Stewart 	 * (ALQ_WAITOK && contigbytes < len) or contigbytes >= len, either enter
7517d11e744SLawrence Stewart 	 * while loop and sleep until we have enough contiguous free bytes
7527d11e744SLawrence Stewart 	 * (former) or skip (latter). If AQ_ORDERED is set, only 1 thread at a
7537d11e744SLawrence Stewart 	 * time will be in this loop. Otherwise, multiple threads may be
7547d11e744SLawrence Stewart 	 * sleeping here competing for ALQ resources.
7557d11e744SLawrence Stewart 	 */
7567d11e744SLawrence Stewart 	while (contigbytes < len && !(alq->aq_flags & AQ_SHUTDOWN)) {
7577d11e744SLawrence Stewart 		KASSERT(!(flags & ALQ_NOWAIT),
7587d11e744SLawrence Stewart 		    ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__));
7597d11e744SLawrence Stewart 		alq->aq_flags |= AQ_WANTED;
7607d11e744SLawrence Stewart 		alq->aq_waiters++;
7617d11e744SLawrence Stewart 		if (waitchan)
7627d11e744SLawrence Stewart 			wakeup(waitchan);
7637d11e744SLawrence Stewart 		msleep_spin(alq, &alq->aq_mtx, "alqgnres", 0);
7647d11e744SLawrence Stewart 		alq->aq_waiters--;
7657d11e744SLawrence Stewart 
7667d11e744SLawrence Stewart 		if (alq->aq_writehead <= alq->aq_writetail)
7677d11e744SLawrence Stewart 			contigbytes = alq->aq_freebytes;
7687d11e744SLawrence Stewart 		else
7697d11e744SLawrence Stewart 			contigbytes = alq->aq_buflen - alq->aq_writehead;
7707d11e744SLawrence Stewart 
7717d11e744SLawrence Stewart 		/*
7727d11e744SLawrence Stewart 		 * If we're the first thread to wake after an AQ_WANTED wakeup
7737d11e744SLawrence Stewart 		 * but there isn't enough free space for us, we're going to loop
7747d11e744SLawrence Stewart 		 * and sleep again. If there are other threads waiting in this
7757d11e744SLawrence Stewart 		 * loop, schedule a wakeup so that they can see if the space
7767d11e744SLawrence Stewart 		 * they require is available.
7777d11e744SLawrence Stewart 		 */
7787d11e744SLawrence Stewart 		if (alq->aq_waiters > 0 && !(alq->aq_flags & AQ_ORDERED) &&
7797d11e744SLawrence Stewart 		    contigbytes < len && !(alq->aq_flags & AQ_WANTED))
7807d11e744SLawrence Stewart 			waitchan = alq;
7817d11e744SLawrence Stewart 		else
7827d11e744SLawrence Stewart 			waitchan = NULL;
7837d11e744SLawrence Stewart 	}
7847d11e744SLawrence Stewart 
7857d11e744SLawrence Stewart 	/*
7867d11e744SLawrence Stewart 	 * If there are waiters, we need to signal the waiting threads after we
7877d11e744SLawrence Stewart 	 * complete our work. The alq ptr is used as a wait channel for threads
7887d11e744SLawrence Stewart 	 * requiring resources to be freed up. In the AQ_ORDERED case, threads
7897d11e744SLawrence Stewart 	 * are not allowed to concurrently compete for resources in the above
7907d11e744SLawrence Stewart 	 * while loop, so we use a different wait channel in this case.
7917d11e744SLawrence Stewart 	 */
7927d11e744SLawrence Stewart 	if (alq->aq_waiters > 0) {
7937d11e744SLawrence Stewart 		if (alq->aq_flags & AQ_ORDERED)
7947d11e744SLawrence Stewart 			waitchan = &alq->aq_waiters;
7957d11e744SLawrence Stewart 		else
7967d11e744SLawrence Stewart 			waitchan = alq;
7977d11e744SLawrence Stewart 	} else
7987d11e744SLawrence Stewart 		waitchan = NULL;
7997d11e744SLawrence Stewart 
8007d11e744SLawrence Stewart 	/* Bail if we're shutting down. */
8017d11e744SLawrence Stewart 	if (alq->aq_flags & AQ_SHUTDOWN) {
8027d11e744SLawrence Stewart 		ALQ_UNLOCK(alq);
8037d11e744SLawrence Stewart 		if (waitchan != NULL)
8047d11e744SLawrence Stewart 			wakeup_one(waitchan);
8057d11e744SLawrence Stewart 		return (NULL);
8067d11e744SLawrence Stewart 	}
8077d11e744SLawrence Stewart 
8087d11e744SLawrence Stewart 	/*
8097d11e744SLawrence Stewart 	 * If we are here, we have a contiguous number of bytes >= len
8107d11e744SLawrence Stewart 	 * available in our buffer starting at aq_writehead.
8117d11e744SLawrence Stewart 	 */
8127d11e744SLawrence Stewart 	alq->aq_getpost.ae_data = alq->aq_entbuf + alq->aq_writehead;
8137d11e744SLawrence Stewart 	alq->aq_getpost.ae_bytesused = len;
8147d11e744SLawrence Stewart 
8157d11e744SLawrence Stewart 	return (&alq->aq_getpost);
8167d11e744SLawrence Stewart }
8177d11e744SLawrence Stewart 
8187d11e744SLawrence Stewart struct ale *
8197d11e744SLawrence Stewart alq_get(struct alq *alq, int flags)
8207d11e744SLawrence Stewart {
8217d11e744SLawrence Stewart 	/* Should only be called in fixed length message (legacy) mode. */
8227d11e744SLawrence Stewart 	KASSERT((alq->aq_flags & AQ_LEGACY),
8237d11e744SLawrence Stewart 	    ("%s: fixed length get on variable length queue", __func__));
8247d11e744SLawrence Stewart 	return (alq_getn(alq, alq->aq_entlen, flags));
8257d11e744SLawrence Stewart }
8267d11e744SLawrence Stewart 
8277d11e744SLawrence Stewart void
8287d11e744SLawrence Stewart alq_post_flags(struct alq *alq, struct ale *ale, int flags)
8297d11e744SLawrence Stewart {
8307d11e744SLawrence Stewart 	int activate;
8317d11e744SLawrence Stewart 	void *waitchan;
8327d11e744SLawrence Stewart 
8337d11e744SLawrence Stewart 	activate = 0;
8347d11e744SLawrence Stewart 
8357d11e744SLawrence Stewart 	if (ale->ae_bytesused > 0) {
8367d11e744SLawrence Stewart 		if (!(alq->aq_flags & AQ_ACTIVE) &&
8377d11e744SLawrence Stewart 		    !(flags & ALQ_NOACTIVATE)) {
8387d11e744SLawrence Stewart 			alq->aq_flags |= AQ_ACTIVE;
8397d11e744SLawrence Stewart 			activate = 1;
8407d11e744SLawrence Stewart 		}
8417d11e744SLawrence Stewart 
8427d11e744SLawrence Stewart 		alq->aq_writehead += ale->ae_bytesused;
8437d11e744SLawrence Stewart 		alq->aq_freebytes -= ale->ae_bytesused;
8447d11e744SLawrence Stewart 
8457d11e744SLawrence Stewart 		/* Wrap aq_writehead if we filled to the end of the buffer. */
8467d11e744SLawrence Stewart 		if (alq->aq_writehead == alq->aq_buflen)
8477d11e744SLawrence Stewart 			alq->aq_writehead = 0;
8487d11e744SLawrence Stewart 
8497d11e744SLawrence Stewart 		KASSERT((alq->aq_writehead >= 0 &&
8507d11e744SLawrence Stewart 		    alq->aq_writehead < alq->aq_buflen),
8517d11e744SLawrence Stewart 		    ("%s: aq_writehead < 0 || aq_writehead >= aq_buflen",
8527d11e744SLawrence Stewart 		    __func__));
8537d11e744SLawrence Stewart 
8547d11e744SLawrence Stewart 		KASSERT((HAS_PENDING_DATA(alq)), ("%s: queue empty!", __func__));
8557d11e744SLawrence Stewart 	}
8567d11e744SLawrence Stewart 
8577d11e744SLawrence Stewart 	/*
8587d11e744SLawrence Stewart 	 * If there are waiters, we need to signal the waiting threads after we
8597d11e744SLawrence Stewart 	 * complete our work. The alq ptr is used as a wait channel for threads
8607d11e744SLawrence Stewart 	 * requiring resources to be freed up. In the AQ_ORDERED case, threads
8617d11e744SLawrence Stewart 	 * are not allowed to concurrently compete for resources in the
8627d11e744SLawrence Stewart 	 * alq_getn() while loop, so we use a different wait channel in this case.
8637d11e744SLawrence Stewart 	 */
8647d11e744SLawrence Stewart 	if (alq->aq_waiters > 0) {
8657d11e744SLawrence Stewart 		if (alq->aq_flags & AQ_ORDERED)
8667d11e744SLawrence Stewart 			waitchan = &alq->aq_waiters;
8677d11e744SLawrence Stewart 		else
8687d11e744SLawrence Stewart 			waitchan = alq;
8697d11e744SLawrence Stewart 	} else
8707d11e744SLawrence Stewart 		waitchan = NULL;
8717d11e744SLawrence Stewart 
8727d11e744SLawrence Stewart 	ALQ_UNLOCK(alq);
8737d11e744SLawrence Stewart 
8747d11e744SLawrence Stewart 	if (activate) {
8757d11e744SLawrence Stewart 		ALD_LOCK();
8767d11e744SLawrence Stewart 		ald_activate(alq);
8777d11e744SLawrence Stewart 		ALD_UNLOCK();
8787d11e744SLawrence Stewart 	}
8797d11e744SLawrence Stewart 
8807d11e744SLawrence Stewart 	/* NB: We rely on wakeup_one waking threads in a FIFO manner. */
8817d11e744SLawrence Stewart 	if (waitchan != NULL)
8827d11e744SLawrence Stewart 		wakeup_one(waitchan);
8839405072aSJeff Roberson }
8849405072aSJeff Roberson 
8859405072aSJeff Roberson void
8869405072aSJeff Roberson alq_flush(struct alq *alq)
8879405072aSJeff Roberson {
8889405072aSJeff Roberson 	int needwakeup = 0;
8899405072aSJeff Roberson 
8909405072aSJeff Roberson 	ALD_LOCK();
8919405072aSJeff Roberson 	ALQ_LOCK(alq);
8927d11e744SLawrence Stewart 
8937d11e744SLawrence Stewart 	/*
8947d11e744SLawrence Stewart 	 * Pull the lever iff there is data to flush and we're
8957d11e744SLawrence Stewart 	 * not already in the middle of a flush operation.
8967d11e744SLawrence Stewart 	 */
8977d11e744SLawrence Stewart 	if (HAS_PENDING_DATA(alq) && !(alq->aq_flags & AQ_FLUSHING)) {
8987d11e744SLawrence Stewart 		if (alq->aq_flags & AQ_ACTIVE)
8999405072aSJeff Roberson 			ald_deactivate(alq);
9007d11e744SLawrence Stewart 
9019405072aSJeff Roberson 		ALD_UNLOCK();
9029405072aSJeff Roberson 		needwakeup = alq_doio(alq);
9039405072aSJeff Roberson 	} else
9049405072aSJeff Roberson 		ALD_UNLOCK();
9057d11e744SLawrence Stewart 
9069405072aSJeff Roberson 	ALQ_UNLOCK(alq);
9079405072aSJeff Roberson 
9089405072aSJeff Roberson 	if (needwakeup)
9097d11e744SLawrence Stewart 		wakeup_one(alq);
9109405072aSJeff Roberson }
9119405072aSJeff Roberson 
9129405072aSJeff Roberson /*
9139405072aSJeff Roberson  * Flush remaining data, close the file and free all resources.
9149405072aSJeff Roberson  */
9159405072aSJeff Roberson void
9169405072aSJeff Roberson alq_close(struct alq *alq)
9179405072aSJeff Roberson {
918c0ea37a8SLawrence Stewart 	/* Only flush and destroy alq if not already shutting down. */
919c0ea37a8SLawrence Stewart 	if (ald_rem(alq) == 0)
920c0ea37a8SLawrence Stewart 		alq_destroy(alq);
9219405072aSJeff Roberson }
922d28f42f9SLawrence Stewart 
923d28f42f9SLawrence Stewart static int
924d28f42f9SLawrence Stewart alq_load_handler(module_t mod, int what, void *arg)
925d28f42f9SLawrence Stewart {
926d28f42f9SLawrence Stewart 	int ret;
927d28f42f9SLawrence Stewart 
928d28f42f9SLawrence Stewart 	ret = 0;
929d28f42f9SLawrence Stewart 
930d28f42f9SLawrence Stewart 	switch (what) {
931d28f42f9SLawrence Stewart 	case MOD_LOAD:
932d28f42f9SLawrence Stewart 	case MOD_SHUTDOWN:
933d28f42f9SLawrence Stewart 		break;
934d28f42f9SLawrence Stewart 
935d28f42f9SLawrence Stewart 	case MOD_QUIESCE:
936d28f42f9SLawrence Stewart 		ALD_LOCK();
937d28f42f9SLawrence Stewart 		/* Only allow unload if there are no open queues. */
938d28f42f9SLawrence Stewart 		if (LIST_FIRST(&ald_queues) == NULL) {
939d28f42f9SLawrence Stewart 			ald_shutingdown = 1;
940d28f42f9SLawrence Stewart 			ALD_UNLOCK();
941ec41a9a1SLawrence Stewart 			EVENTHANDLER_DEREGISTER(shutdown_pre_sync,
942ec41a9a1SLawrence Stewart 			    alq_eventhandler_tag);
943d28f42f9SLawrence Stewart 			ald_shutdown(NULL, 0);
944d28f42f9SLawrence Stewart 			mtx_destroy(&ald_mtx);
945d28f42f9SLawrence Stewart 		} else {
946d28f42f9SLawrence Stewart 			ALD_UNLOCK();
947d28f42f9SLawrence Stewart 			ret = EBUSY;
948d28f42f9SLawrence Stewart 		}
949d28f42f9SLawrence Stewart 		break;
950d28f42f9SLawrence Stewart 
951d28f42f9SLawrence Stewart 	case MOD_UNLOAD:
952d28f42f9SLawrence Stewart 		/* If MOD_QUIESCE failed we must fail here too. */
953d28f42f9SLawrence Stewart 		if (ald_shutingdown == 0)
954d28f42f9SLawrence Stewart 			ret = EBUSY;
955d28f42f9SLawrence Stewart 		break;
956d28f42f9SLawrence Stewart 
957d28f42f9SLawrence Stewart 	default:
958d28f42f9SLawrence Stewart 		ret = EINVAL;
959d28f42f9SLawrence Stewart 		break;
960d28f42f9SLawrence Stewart 	}
961d28f42f9SLawrence Stewart 
962d28f42f9SLawrence Stewart 	return (ret);
963d28f42f9SLawrence Stewart }
964d28f42f9SLawrence Stewart 
965d28f42f9SLawrence Stewart static moduledata_t alq_mod =
966d28f42f9SLawrence Stewart {
967d28f42f9SLawrence Stewart 	"alq",
968d28f42f9SLawrence Stewart 	alq_load_handler,
969d28f42f9SLawrence Stewart 	NULL
970d28f42f9SLawrence Stewart };
971d28f42f9SLawrence Stewart 
972*47cedcbdSJohn Baldwin DECLARE_MODULE(alq, alq_mod, SI_SUB_LAST, SI_ORDER_ANY);
973d28f42f9SLawrence Stewart MODULE_VERSION(alq, 1);
974