xref: /freebsd/sys/kern/kern_alq.c (revision bb92cd7bcd16f3f36cdbda18d8193619892715fb)
19454b2d8SWarner Losh /*-
28a36da99SPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
38a36da99SPedro F. Giffuni  *
49405072aSJeff Roberson  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
5d28f42f9SLawrence Stewart  * Copyright (c) 2008-2009, Lawrence Stewart <lstewart@freebsd.org>
6d28f42f9SLawrence Stewart  * Copyright (c) 2009-2010, The FreeBSD Foundation
79405072aSJeff Roberson  * All rights reserved.
89405072aSJeff Roberson  *
9d28f42f9SLawrence Stewart  * Portions of this software were developed at the Centre for Advanced
10d28f42f9SLawrence Stewart  * Internet Architectures, Swinburne University of Technology, Melbourne,
11d28f42f9SLawrence Stewart  * Australia by Lawrence Stewart under sponsorship from the FreeBSD Foundation.
12d28f42f9SLawrence Stewart  *
139405072aSJeff Roberson  * Redistribution and use in source and binary forms, with or without
149405072aSJeff Roberson  * modification, are permitted provided that the following conditions
159405072aSJeff Roberson  * are met:
169405072aSJeff Roberson  * 1. Redistributions of source code must retain the above copyright
179405072aSJeff Roberson  *    notice unmodified, this list of conditions, and the following
189405072aSJeff Roberson  *    disclaimer.
199405072aSJeff Roberson  * 2. Redistributions in binary form must reproduce the above copyright
209405072aSJeff Roberson  *    notice, this list of conditions and the following disclaimer in the
219405072aSJeff Roberson  *    documentation and/or other materials provided with the distribution.
229405072aSJeff Roberson  *
239405072aSJeff Roberson  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
249405072aSJeff Roberson  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
259405072aSJeff Roberson  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
269405072aSJeff Roberson  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
279405072aSJeff Roberson  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
289405072aSJeff Roberson  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
299405072aSJeff Roberson  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
309405072aSJeff Roberson  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
319405072aSJeff Roberson  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
329405072aSJeff Roberson  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
339405072aSJeff Roberson  */
349405072aSJeff Roberson 
35677b542eSDavid E. O'Brien #include <sys/cdefs.h>
36677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
37677b542eSDavid E. O'Brien 
38d28f42f9SLawrence Stewart #include "opt_mac.h"
39d28f42f9SLawrence Stewart 
409405072aSJeff Roberson #include <sys/param.h>
419405072aSJeff Roberson #include <sys/systm.h>
429405072aSJeff Roberson #include <sys/kernel.h>
439405072aSJeff Roberson #include <sys/kthread.h>
449405072aSJeff Roberson #include <sys/lock.h>
4533f19beeSJohn Baldwin #include <sys/mount.h>
469405072aSJeff Roberson #include <sys/mutex.h>
479405072aSJeff Roberson #include <sys/namei.h>
489405072aSJeff Roberson #include <sys/proc.h>
499405072aSJeff Roberson #include <sys/vnode.h>
509405072aSJeff Roberson #include <sys/alq.h>
519405072aSJeff Roberson #include <sys/malloc.h>
529405072aSJeff Roberson #include <sys/unistd.h>
539405072aSJeff Roberson #include <sys/fcntl.h>
549405072aSJeff Roberson #include <sys/eventhandler.h>
559405072aSJeff Roberson 
56aed55708SRobert Watson #include <security/mac/mac_framework.h>
57aed55708SRobert Watson 
589405072aSJeff Roberson /* Async. Logging Queue */
599405072aSJeff Roberson struct alq {
607d11e744SLawrence Stewart 	char	*aq_entbuf;		/* Buffer for stored entries */
619405072aSJeff Roberson 	int	aq_entmax;		/* Max entries */
629405072aSJeff Roberson 	int	aq_entlen;		/* Entry length */
637d11e744SLawrence Stewart 	int	aq_freebytes;		/* Bytes available in buffer */
647d11e744SLawrence Stewart 	int	aq_buflen;		/* Total length of our buffer */
657d11e744SLawrence Stewart 	int	aq_writehead;		/* Location for next write */
667d11e744SLawrence Stewart 	int	aq_writetail;		/* Flush starts at this location */
677d11e744SLawrence Stewart 	int	aq_wrapearly;		/* # bytes left blank at end of buf */
689405072aSJeff Roberson 	int	aq_flags;		/* Queue flags */
697d11e744SLawrence Stewart 	int	aq_waiters;		/* Num threads waiting for resources
707d11e744SLawrence Stewart 					 * NB: Used as a wait channel so must
717d11e744SLawrence Stewart 					 * not be first field in the alq struct
727d11e744SLawrence Stewart 					 */
737d11e744SLawrence Stewart 	struct	ale	aq_getpost;	/* ALE for use by get/post */
749405072aSJeff Roberson 	struct mtx	aq_mtx;		/* Queue lock */
759405072aSJeff Roberson 	struct vnode	*aq_vp;		/* Open vnode handle */
769e9256e2SJeff Roberson 	struct ucred	*aq_cred;	/* Credentials of the opening thread */
779405072aSJeff Roberson 	LIST_ENTRY(alq)	aq_act;		/* List of active queues */
789405072aSJeff Roberson 	LIST_ENTRY(alq)	aq_link;	/* List of all queues */
799405072aSJeff Roberson };
809405072aSJeff Roberson 
819405072aSJeff Roberson #define	AQ_WANTED	0x0001		/* Wakeup sleeper when io is done */
829405072aSJeff Roberson #define	AQ_ACTIVE	0x0002		/* on the active list */
839405072aSJeff Roberson #define	AQ_FLUSHING	0x0004		/* doing IO */
849405072aSJeff Roberson #define	AQ_SHUTDOWN	0x0008		/* Queue no longer valid */
857d11e744SLawrence Stewart #define	AQ_ORDERED	0x0010		/* Queue enforces ordered writes */
867d11e744SLawrence Stewart #define	AQ_LEGACY	0x0020		/* Legacy queue (fixed length writes) */
879405072aSJeff Roberson 
889405072aSJeff Roberson #define	ALQ_LOCK(alq)	mtx_lock_spin(&(alq)->aq_mtx)
899405072aSJeff Roberson #define	ALQ_UNLOCK(alq)	mtx_unlock_spin(&(alq)->aq_mtx)
909405072aSJeff Roberson 
917d11e744SLawrence Stewart #define HAS_PENDING_DATA(alq) ((alq)->aq_freebytes != (alq)->aq_buflen)
927d11e744SLawrence Stewart 
939405072aSJeff Roberson static MALLOC_DEFINE(M_ALD, "ALD", "ALD");
949405072aSJeff Roberson 
959405072aSJeff Roberson /*
969405072aSJeff Roberson  * The ald_mtx protects the ald_queues list and the ald_active list.
979405072aSJeff Roberson  */
989405072aSJeff Roberson static struct mtx ald_mtx;
999405072aSJeff Roberson static LIST_HEAD(, alq) ald_queues;
1009405072aSJeff Roberson static LIST_HEAD(, alq) ald_active;
1019405072aSJeff Roberson static int ald_shutingdown = 0;
102a414302fSJeff Roberson struct thread *ald_thread;
103a414302fSJeff Roberson static struct proc *ald_proc;
1047639c9beSLawrence Stewart static eventhandler_tag alq_eventhandler_tag = NULL;
1059405072aSJeff Roberson 
1069405072aSJeff Roberson #define	ALD_LOCK()	mtx_lock(&ald_mtx)
1079405072aSJeff Roberson #define	ALD_UNLOCK()	mtx_unlock(&ald_mtx)
1089405072aSJeff Roberson 
1099405072aSJeff Roberson /* Daemon functions */
1109405072aSJeff Roberson static int ald_add(struct alq *);
1119405072aSJeff Roberson static int ald_rem(struct alq *);
1129405072aSJeff Roberson static void ald_startup(void *);
1139405072aSJeff Roberson static void ald_daemon(void);
1149405072aSJeff Roberson static void ald_shutdown(void *, int);
1159405072aSJeff Roberson static void ald_activate(struct alq *);
1169405072aSJeff Roberson static void ald_deactivate(struct alq *);
1179405072aSJeff Roberson 
1189405072aSJeff Roberson /* Internal queue functions */
1199405072aSJeff Roberson static void alq_shutdown(struct alq *);
120c0ea37a8SLawrence Stewart static void alq_destroy(struct alq *);
1219405072aSJeff Roberson static int alq_doio(struct alq *);
1229405072aSJeff Roberson 
1239405072aSJeff Roberson /*
1249405072aSJeff Roberson  * Add a new queue to the global list.  Fail if we're shutting down.
1259405072aSJeff Roberson  */
1269405072aSJeff Roberson static int
1279405072aSJeff Roberson ald_add(struct alq *alq)
1289405072aSJeff Roberson {
1299405072aSJeff Roberson 	int error;
1309405072aSJeff Roberson 
1319405072aSJeff Roberson 	error = 0;
1329405072aSJeff Roberson 
1339405072aSJeff Roberson 	ALD_LOCK();
1349405072aSJeff Roberson 	if (ald_shutingdown) {
1359405072aSJeff Roberson 		error = EBUSY;
1369405072aSJeff Roberson 		goto done;
1379405072aSJeff Roberson 	}
1389405072aSJeff Roberson 	LIST_INSERT_HEAD(&ald_queues, alq, aq_link);
1399405072aSJeff Roberson done:
1409405072aSJeff Roberson 	ALD_UNLOCK();
1419405072aSJeff Roberson 	return (error);
1429405072aSJeff Roberson }
1439405072aSJeff Roberson 
1449405072aSJeff Roberson /*
1459405072aSJeff Roberson  * Remove a queue from the global list unless we're shutting down.  If so,
1469405072aSJeff Roberson  * the ald will take care of cleaning up it's resources.
1479405072aSJeff Roberson  */
1489405072aSJeff Roberson static int
1499405072aSJeff Roberson ald_rem(struct alq *alq)
1509405072aSJeff Roberson {
1519405072aSJeff Roberson 	int error;
1529405072aSJeff Roberson 
1539405072aSJeff Roberson 	error = 0;
1549405072aSJeff Roberson 
1559405072aSJeff Roberson 	ALD_LOCK();
1569405072aSJeff Roberson 	if (ald_shutingdown) {
1579405072aSJeff Roberson 		error = EBUSY;
1589405072aSJeff Roberson 		goto done;
1599405072aSJeff Roberson 	}
1609405072aSJeff Roberson 	LIST_REMOVE(alq, aq_link);
1619405072aSJeff Roberson done:
1629405072aSJeff Roberson 	ALD_UNLOCK();
1639405072aSJeff Roberson 	return (error);
1649405072aSJeff Roberson }
1659405072aSJeff Roberson 
1669405072aSJeff Roberson /*
1679405072aSJeff Roberson  * Put a queue on the active list.  This will schedule it for writing.
1689405072aSJeff Roberson  */
1699405072aSJeff Roberson static void
1709405072aSJeff Roberson ald_activate(struct alq *alq)
1719405072aSJeff Roberson {
1729405072aSJeff Roberson 	LIST_INSERT_HEAD(&ald_active, alq, aq_act);
1739405072aSJeff Roberson 	wakeup(&ald_active);
1749405072aSJeff Roberson }
1759405072aSJeff Roberson 
1769405072aSJeff Roberson static void
1779405072aSJeff Roberson ald_deactivate(struct alq *alq)
1789405072aSJeff Roberson {
1799405072aSJeff Roberson 	LIST_REMOVE(alq, aq_act);
1809405072aSJeff Roberson 	alq->aq_flags &= ~AQ_ACTIVE;
1819405072aSJeff Roberson }
1829405072aSJeff Roberson 
1839405072aSJeff Roberson static void
1849405072aSJeff Roberson ald_startup(void *unused)
1859405072aSJeff Roberson {
1869405072aSJeff Roberson 	mtx_init(&ald_mtx, "ALDmtx", NULL, MTX_DEF|MTX_QUIET);
1879405072aSJeff Roberson 	LIST_INIT(&ald_queues);
1889405072aSJeff Roberson 	LIST_INIT(&ald_active);
1899405072aSJeff Roberson }
1909405072aSJeff Roberson 
1919405072aSJeff Roberson static void
1929405072aSJeff Roberson ald_daemon(void)
1939405072aSJeff Roberson {
1949405072aSJeff Roberson 	int needwakeup;
1959405072aSJeff Roberson 	struct alq *alq;
1969405072aSJeff Roberson 
197a414302fSJeff Roberson 	ald_thread = FIRST_THREAD_IN_PROC(ald_proc);
198a414302fSJeff Roberson 
1997639c9beSLawrence Stewart 	alq_eventhandler_tag = EVENTHANDLER_REGISTER(shutdown_pre_sync,
2007639c9beSLawrence Stewart 	    ald_shutdown, NULL, SHUTDOWN_PRI_FIRST);
2019405072aSJeff Roberson 
2029405072aSJeff Roberson 	ALD_LOCK();
2039405072aSJeff Roberson 
2049405072aSJeff Roberson 	for (;;) {
205d28f42f9SLawrence Stewart 		while ((alq = LIST_FIRST(&ald_active)) == NULL &&
206d28f42f9SLawrence Stewart 		    !ald_shutingdown)
2079ffad7a9SLawrence Stewart 			mtx_sleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0);
2089405072aSJeff Roberson 
209d28f42f9SLawrence Stewart 		/* Don't shutdown until all active ALQs are flushed. */
210d28f42f9SLawrence Stewart 		if (ald_shutingdown && alq == NULL) {
211d28f42f9SLawrence Stewart 			ALD_UNLOCK();
212d28f42f9SLawrence Stewart 			break;
213d28f42f9SLawrence Stewart 		}
214d28f42f9SLawrence Stewart 
2159405072aSJeff Roberson 		ALQ_LOCK(alq);
2169405072aSJeff Roberson 		ald_deactivate(alq);
2179405072aSJeff Roberson 		ALD_UNLOCK();
2189405072aSJeff Roberson 		needwakeup = alq_doio(alq);
2199405072aSJeff Roberson 		ALQ_UNLOCK(alq);
2209405072aSJeff Roberson 		if (needwakeup)
2217d11e744SLawrence Stewart 			wakeup_one(alq);
2229405072aSJeff Roberson 		ALD_LOCK();
2239405072aSJeff Roberson 	}
224d28f42f9SLawrence Stewart 
225d28f42f9SLawrence Stewart 	kproc_exit(0);
2269405072aSJeff Roberson }
2279405072aSJeff Roberson 
2289405072aSJeff Roberson static void
2299405072aSJeff Roberson ald_shutdown(void *arg, int howto)
2309405072aSJeff Roberson {
2319405072aSJeff Roberson 	struct alq *alq;
2329405072aSJeff Roberson 
2339405072aSJeff Roberson 	ALD_LOCK();
234d28f42f9SLawrence Stewart 
235d28f42f9SLawrence Stewart 	/* Ensure no new queues can be created. */
2369405072aSJeff Roberson 	ald_shutingdown = 1;
2379405072aSJeff Roberson 
238d28f42f9SLawrence Stewart 	/* Shutdown all ALQs prior to terminating the ald_daemon. */
2399405072aSJeff Roberson 	while ((alq = LIST_FIRST(&ald_queues)) != NULL) {
2409405072aSJeff Roberson 		LIST_REMOVE(alq, aq_link);
2419405072aSJeff Roberson 		ALD_UNLOCK();
2429405072aSJeff Roberson 		alq_shutdown(alq);
2439405072aSJeff Roberson 		ALD_LOCK();
2449405072aSJeff Roberson 	}
245d28f42f9SLawrence Stewart 
246d28f42f9SLawrence Stewart 	/* At this point, all ALQs are flushed and shutdown. */
247d28f42f9SLawrence Stewart 
248d28f42f9SLawrence Stewart 	/*
249d28f42f9SLawrence Stewart 	 * Wake ald_daemon so that it exits. It won't be able to do
2509ffad7a9SLawrence Stewart 	 * anything until we mtx_sleep because we hold the ald_mtx.
251d28f42f9SLawrence Stewart 	 */
252d28f42f9SLawrence Stewart 	wakeup(&ald_active);
253d28f42f9SLawrence Stewart 
254d28f42f9SLawrence Stewart 	/* Wait for ald_daemon to exit. */
2559ffad7a9SLawrence Stewart 	mtx_sleep(ald_proc, &ald_mtx, PWAIT, "aldslp", 0);
256d28f42f9SLawrence Stewart 
2579405072aSJeff Roberson 	ALD_UNLOCK();
2589405072aSJeff Roberson }
2599405072aSJeff Roberson 
2609405072aSJeff Roberson static void
2619405072aSJeff Roberson alq_shutdown(struct alq *alq)
2629405072aSJeff Roberson {
2639405072aSJeff Roberson 	ALQ_LOCK(alq);
2649405072aSJeff Roberson 
2659405072aSJeff Roberson 	/* Stop any new writers. */
2669405072aSJeff Roberson 	alq->aq_flags |= AQ_SHUTDOWN;
2679405072aSJeff Roberson 
2687d11e744SLawrence Stewart 	/*
2697d11e744SLawrence Stewart 	 * If the ALQ isn't active but has unwritten data (possible if
2707d11e744SLawrence Stewart 	 * the ALQ_NOACTIVATE flag has been used), explicitly activate the
2717d11e744SLawrence Stewart 	 * ALQ here so that the pending data gets flushed by the ald_daemon.
2727d11e744SLawrence Stewart 	 */
2737d11e744SLawrence Stewart 	if (!(alq->aq_flags & AQ_ACTIVE) && HAS_PENDING_DATA(alq)) {
2747d11e744SLawrence Stewart 		alq->aq_flags |= AQ_ACTIVE;
2757d11e744SLawrence Stewart 		ALQ_UNLOCK(alq);
2767d11e744SLawrence Stewart 		ALD_LOCK();
2777d11e744SLawrence Stewart 		ald_activate(alq);
2787d11e744SLawrence Stewart 		ALD_UNLOCK();
2797d11e744SLawrence Stewart 		ALQ_LOCK(alq);
2807d11e744SLawrence Stewart 	}
2817d11e744SLawrence Stewart 
2829405072aSJeff Roberson 	/* Drain IO */
28397c11ef2SLawrence Stewart 	while (alq->aq_flags & AQ_ACTIVE) {
2849405072aSJeff Roberson 		alq->aq_flags |= AQ_WANTED;
285bff2d4d5SRoman Divacky 		msleep_spin(alq, &alq->aq_mtx, "aldclose", 0);
2869405072aSJeff Roberson 	}
2879405072aSJeff Roberson 	ALQ_UNLOCK(alq);
2889405072aSJeff Roberson 
289a414302fSJeff Roberson 	vn_close(alq->aq_vp, FWRITE, alq->aq_cred,
2909e9256e2SJeff Roberson 	    curthread);
2919e9256e2SJeff Roberson 	crfree(alq->aq_cred);
2929405072aSJeff Roberson }
2939405072aSJeff Roberson 
294c0ea37a8SLawrence Stewart void
295c0ea37a8SLawrence Stewart alq_destroy(struct alq *alq)
296c0ea37a8SLawrence Stewart {
297c0ea37a8SLawrence Stewart 	/* Drain all pending IO. */
298c0ea37a8SLawrence Stewart 	alq_shutdown(alq);
299c0ea37a8SLawrence Stewart 
300c0ea37a8SLawrence Stewart 	mtx_destroy(&alq->aq_mtx);
301c0ea37a8SLawrence Stewart 	free(alq->aq_entbuf, M_ALD);
302c0ea37a8SLawrence Stewart 	free(alq, M_ALD);
303c0ea37a8SLawrence Stewart }
304c0ea37a8SLawrence Stewart 
3059405072aSJeff Roberson /*
3069405072aSJeff Roberson  * Flush all pending data to disk.  This operation will block.
3079405072aSJeff Roberson  */
3089405072aSJeff Roberson static int
3099405072aSJeff Roberson alq_doio(struct alq *alq)
3109405072aSJeff Roberson {
3119405072aSJeff Roberson 	struct thread *td;
3129405072aSJeff Roberson 	struct mount *mp;
3139405072aSJeff Roberson 	struct vnode *vp;
3149405072aSJeff Roberson 	struct uio auio;
3159405072aSJeff Roberson 	struct iovec aiov[2];
3169405072aSJeff Roberson 	int totlen;
3179405072aSJeff Roberson 	int iov;
3187d11e744SLawrence Stewart 	int wrapearly;
3197d11e744SLawrence Stewart 
3207d11e744SLawrence Stewart 	KASSERT((HAS_PENDING_DATA(alq)), ("%s: queue empty!", __func__));
3219405072aSJeff Roberson 
3229405072aSJeff Roberson 	vp = alq->aq_vp;
3239405072aSJeff Roberson 	td = curthread;
3249405072aSJeff Roberson 	totlen = 0;
3257d11e744SLawrence Stewart 	iov = 1;
3267d11e744SLawrence Stewart 	wrapearly = alq->aq_wrapearly;
3279405072aSJeff Roberson 
3289405072aSJeff Roberson 	bzero(&aiov, sizeof(aiov));
3299405072aSJeff Roberson 	bzero(&auio, sizeof(auio));
3309405072aSJeff Roberson 
3317d11e744SLawrence Stewart 	/* Start the write from the location of our buffer tail pointer. */
3327d11e744SLawrence Stewart 	aiov[0].iov_base = alq->aq_entbuf + alq->aq_writetail;
3337d11e744SLawrence Stewart 
3347d11e744SLawrence Stewart 	if (alq->aq_writetail < alq->aq_writehead) {
3357d11e744SLawrence Stewart 		/* Buffer not wrapped. */
3367d11e744SLawrence Stewart 		totlen = aiov[0].iov_len = alq->aq_writehead - alq->aq_writetail;
3377d11e744SLawrence Stewart 	} else if (alq->aq_writehead == 0) {
3387d11e744SLawrence Stewart 		/* Buffer not wrapped (special case to avoid an empty iov). */
3397d11e744SLawrence Stewart 		totlen = aiov[0].iov_len = alq->aq_buflen - alq->aq_writetail -
3407d11e744SLawrence Stewart 		    wrapearly;
3417d11e744SLawrence Stewart 	} else {
3427d11e744SLawrence Stewart 		/*
3437d11e744SLawrence Stewart 		 * Buffer wrapped, requires 2 aiov entries:
3447d11e744SLawrence Stewart 		 * - first is from writetail to end of buffer
3457d11e744SLawrence Stewart 		 * - second is from start of buffer to writehead
3467d11e744SLawrence Stewart 		 */
3477d11e744SLawrence Stewart 		aiov[0].iov_len = alq->aq_buflen - alq->aq_writetail -
3487d11e744SLawrence Stewart 		    wrapearly;
3499405072aSJeff Roberson 		iov++;
3507d11e744SLawrence Stewart 		aiov[1].iov_base = alq->aq_entbuf;
3517d11e744SLawrence Stewart 		aiov[1].iov_len =  alq->aq_writehead;
3527d11e744SLawrence Stewart 		totlen = aiov[0].iov_len + aiov[1].iov_len;
3537d11e744SLawrence Stewart 	}
3549405072aSJeff Roberson 
3559405072aSJeff Roberson 	alq->aq_flags |= AQ_FLUSHING;
3569405072aSJeff Roberson 	ALQ_UNLOCK(alq);
3579405072aSJeff Roberson 
3589405072aSJeff Roberson 	auio.uio_iov = &aiov[0];
3599405072aSJeff Roberson 	auio.uio_offset = 0;
3609405072aSJeff Roberson 	auio.uio_segflg = UIO_SYSSPACE;
3619405072aSJeff Roberson 	auio.uio_rw = UIO_WRITE;
3627d11e744SLawrence Stewart 	auio.uio_iovcnt = iov;
3639405072aSJeff Roberson 	auio.uio_resid = totlen;
3649405072aSJeff Roberson 	auio.uio_td = td;
3659405072aSJeff Roberson 
3669405072aSJeff Roberson 	/*
3679405072aSJeff Roberson 	 * Do all of the junk required to write now.
3689405072aSJeff Roberson 	 */
3699405072aSJeff Roberson 	vn_start_write(vp, &mp, V_WAIT);
370cb05b60aSAttilio Rao 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
37167536f03SRobert Watson 	/*
37267536f03SRobert Watson 	 * XXX: VOP_WRITE error checks are ignored.
37367536f03SRobert Watson 	 */
37467536f03SRobert Watson #ifdef MAC
37530d239bcSRobert Watson 	if (mac_vnode_check_write(alq->aq_cred, NOCRED, vp) == 0)
37667536f03SRobert Watson #endif
3779e9256e2SJeff Roberson 		VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, alq->aq_cred);
378b249ce48SMateusz Guzik 	VOP_UNLOCK(vp);
3799405072aSJeff Roberson 	vn_finished_write(mp);
3809405072aSJeff Roberson 
3819405072aSJeff Roberson 	ALQ_LOCK(alq);
3829405072aSJeff Roberson 	alq->aq_flags &= ~AQ_FLUSHING;
3839405072aSJeff Roberson 
3847d11e744SLawrence Stewart 	/* Adjust writetail as required, taking into account wrapping. */
3857d11e744SLawrence Stewart 	alq->aq_writetail = (alq->aq_writetail + totlen + wrapearly) %
3867d11e744SLawrence Stewart 	    alq->aq_buflen;
3877d11e744SLawrence Stewart 	alq->aq_freebytes += totlen + wrapearly;
3887d11e744SLawrence Stewart 
3897d11e744SLawrence Stewart 	/*
3907d11e744SLawrence Stewart 	 * If we just flushed part of the buffer which wrapped, reset the
3917d11e744SLawrence Stewart 	 * wrapearly indicator.
3927d11e744SLawrence Stewart 	 */
3937d11e744SLawrence Stewart 	if (wrapearly)
3947d11e744SLawrence Stewart 		alq->aq_wrapearly = 0;
3957d11e744SLawrence Stewart 
3967d11e744SLawrence Stewart 	/*
3977d11e744SLawrence Stewart 	 * If we just flushed the buffer completely, reset indexes to 0 to
3987d11e744SLawrence Stewart 	 * minimise buffer wraps.
3997d11e744SLawrence Stewart 	 * This is also required to ensure alq_getn() can't wedge itself.
4007d11e744SLawrence Stewart 	 */
4017d11e744SLawrence Stewart 	if (!HAS_PENDING_DATA(alq))
4027d11e744SLawrence Stewart 		alq->aq_writehead = alq->aq_writetail = 0;
4037d11e744SLawrence Stewart 
4047d11e744SLawrence Stewart 	KASSERT((alq->aq_writetail >= 0 && alq->aq_writetail < alq->aq_buflen),
4057d11e744SLawrence Stewart 	    ("%s: aq_writetail < 0 || aq_writetail >= aq_buflen", __func__));
4069405072aSJeff Roberson 
4079405072aSJeff Roberson 	if (alq->aq_flags & AQ_WANTED) {
4089405072aSJeff Roberson 		alq->aq_flags &= ~AQ_WANTED;
4099405072aSJeff Roberson 		return (1);
4109405072aSJeff Roberson 	}
4119405072aSJeff Roberson 
4129405072aSJeff Roberson 	return(0);
4139405072aSJeff Roberson }
4149405072aSJeff Roberson 
4159405072aSJeff Roberson static struct kproc_desc ald_kp = {
4169405072aSJeff Roberson         "ALQ Daemon",
4179405072aSJeff Roberson         ald_daemon,
418a414302fSJeff Roberson         &ald_proc
4199405072aSJeff Roberson };
4209405072aSJeff Roberson 
421237fdd78SRobert Watson SYSINIT(aldthread, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &ald_kp);
422237fdd78SRobert Watson SYSINIT(ald, SI_SUB_LOCK, SI_ORDER_ANY, ald_startup, NULL);
4239405072aSJeff Roberson 
4249405072aSJeff Roberson /* User visible queue functions */
4259405072aSJeff Roberson 
4269405072aSJeff Roberson /*
4279405072aSJeff Roberson  * Create the queue data structure, allocate the buffer, and open the file.
4289405072aSJeff Roberson  */
4297d11e744SLawrence Stewart 
4309405072aSJeff Roberson int
4317d11e744SLawrence Stewart alq_open_flags(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
4327d11e744SLawrence Stewart     int size, int flags)
4339405072aSJeff Roberson {
4349405072aSJeff Roberson 	struct nameidata nd;
4359405072aSJeff Roberson 	struct alq *alq;
4367d11e744SLawrence Stewart 	int oflags;
4379405072aSJeff Roberson 	int error;
4387d11e744SLawrence Stewart 
4397d11e744SLawrence Stewart 	KASSERT((size > 0), ("%s: size <= 0", __func__));
4409405072aSJeff Roberson 
4419405072aSJeff Roberson 	*alqp = NULL;
4429405072aSJeff Roberson 
4437e1d3eefSMateusz Guzik 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, file);
4447d11e744SLawrence Stewart 	oflags = FWRITE | O_NOFOLLOW | O_CREAT;
4459405072aSJeff Roberson 
4467d11e744SLawrence Stewart 	error = vn_open_cred(&nd, &oflags, cmode, 0, cred, NULL);
4479405072aSJeff Roberson 	if (error)
4489405072aSJeff Roberson 		return (error);
4499405072aSJeff Roberson 
450*bb92cd7bSMateusz Guzik 	NDFREE_PNBUF(&nd);
4519405072aSJeff Roberson 	/* We just unlock so we hold a reference */
452b249ce48SMateusz Guzik 	VOP_UNLOCK(nd.ni_vp);
4539405072aSJeff Roberson 
454a163d034SWarner Losh 	alq = malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO);
4559405072aSJeff Roberson 	alq->aq_vp = nd.ni_vp;
4564b090e41SRobert Watson 	alq->aq_cred = crhold(cred);
4579405072aSJeff Roberson 
4589405072aSJeff Roberson 	mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET);
4599405072aSJeff Roberson 
4607d11e744SLawrence Stewart 	alq->aq_buflen = size;
4617d11e744SLawrence Stewart 	alq->aq_entmax = 0;
4627d11e744SLawrence Stewart 	alq->aq_entlen = 0;
4639405072aSJeff Roberson 
4647d11e744SLawrence Stewart 	alq->aq_freebytes = alq->aq_buflen;
4657d11e744SLawrence Stewart 	alq->aq_entbuf = malloc(alq->aq_buflen, M_ALD, M_WAITOK|M_ZERO);
4667d11e744SLawrence Stewart 	alq->aq_writehead = alq->aq_writetail = 0;
4677d11e744SLawrence Stewart 	if (flags & ALQ_ORDERED)
4687d11e744SLawrence Stewart 		alq->aq_flags |= AQ_ORDERED;
4699405072aSJeff Roberson 
470c0ea37a8SLawrence Stewart 	if ((error = ald_add(alq)) != 0) {
471c0ea37a8SLawrence Stewart 		alq_destroy(alq);
4729405072aSJeff Roberson 		return (error);
473c0ea37a8SLawrence Stewart 	}
474c0ea37a8SLawrence Stewart 
4759405072aSJeff Roberson 	*alqp = alq;
4769405072aSJeff Roberson 
4779405072aSJeff Roberson 	return (0);
4789405072aSJeff Roberson }
4799405072aSJeff Roberson 
4807d11e744SLawrence Stewart int
4817d11e744SLawrence Stewart alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
4827d11e744SLawrence Stewart     int size, int count)
4837d11e744SLawrence Stewart {
4847d11e744SLawrence Stewart 	int ret;
4857d11e744SLawrence Stewart 
4867d11e744SLawrence Stewart 	KASSERT((count >= 0), ("%s: count < 0", __func__));
4877d11e744SLawrence Stewart 
4887d11e744SLawrence Stewart 	if (count > 0) {
4896b57eff4SDmitry Chagin 		if ((ret = alq_open_flags(alqp, file, cred, cmode,
4906b57eff4SDmitry Chagin 		    size*count, 0)) == 0) {
4917d11e744SLawrence Stewart 			(*alqp)->aq_flags |= AQ_LEGACY;
4927d11e744SLawrence Stewart 			(*alqp)->aq_entmax = count;
4937d11e744SLawrence Stewart 			(*alqp)->aq_entlen = size;
4946b57eff4SDmitry Chagin 		}
4957d11e744SLawrence Stewart 	} else
4967d11e744SLawrence Stewart 		ret = alq_open_flags(alqp, file, cred, cmode, size, 0);
4977d11e744SLawrence Stewart 
4987d11e744SLawrence Stewart 	return (ret);
4997d11e744SLawrence Stewart }
5007d11e744SLawrence Stewart 
5019405072aSJeff Roberson /*
5029405072aSJeff Roberson  * Copy a new entry into the queue.  If the operation would block either
5039405072aSJeff Roberson  * wait or return an error depending on the value of waitok.
5049405072aSJeff Roberson  */
5059405072aSJeff Roberson int
5067d11e744SLawrence Stewart alq_writen(struct alq *alq, void *data, int len, int flags)
5079405072aSJeff Roberson {
5087d11e744SLawrence Stewart 	int activate, copy, ret;
5097d11e744SLawrence Stewart 	void *waitchan;
5109405072aSJeff Roberson 
5117d11e744SLawrence Stewart 	KASSERT((len > 0 && len <= alq->aq_buflen),
5127d11e744SLawrence Stewart 	    ("%s: len <= 0 || len > aq_buflen", __func__));
5139405072aSJeff Roberson 
5147d11e744SLawrence Stewart 	activate = ret = 0;
5157d11e744SLawrence Stewart 	copy = len;
5167d11e744SLawrence Stewart 	waitchan = NULL;
5179405072aSJeff Roberson 
5189405072aSJeff Roberson 	ALQ_LOCK(alq);
5199405072aSJeff Roberson 
5207d11e744SLawrence Stewart 	/*
5217d11e744SLawrence Stewart 	 * Fail to perform the write and return EWOULDBLOCK if:
5227d11e744SLawrence Stewart 	 * - The message is larger than our underlying buffer.
5237d11e744SLawrence Stewart 	 * - The ALQ is being shutdown.
5247d11e744SLawrence Stewart 	 * - There is insufficient free space in our underlying buffer
5257d11e744SLawrence Stewart 	 *   to accept the message and the user can't wait for space.
5267d11e744SLawrence Stewart 	 * - There is insufficient free space in our underlying buffer
5277d11e744SLawrence Stewart 	 *   to accept the message and the alq is inactive due to prior
5287d11e744SLawrence Stewart 	 *   use of the ALQ_NOACTIVATE flag (which would lead to deadlock).
5297d11e744SLawrence Stewart 	 */
5307d11e744SLawrence Stewart 	if (len > alq->aq_buflen ||
5317d11e744SLawrence Stewart 	    alq->aq_flags & AQ_SHUTDOWN ||
5327d11e744SLawrence Stewart 	    (((flags & ALQ_NOWAIT) || (!(alq->aq_flags & AQ_ACTIVE) &&
5337d11e744SLawrence Stewart 	    HAS_PENDING_DATA(alq))) && alq->aq_freebytes < len)) {
5349405072aSJeff Roberson 		ALQ_UNLOCK(alq);
5357d11e744SLawrence Stewart 		return (EWOULDBLOCK);
5369405072aSJeff Roberson 	}
5379405072aSJeff Roberson 
5387d11e744SLawrence Stewart 	/*
5397d11e744SLawrence Stewart 	 * If we want ordered writes and there is already at least one thread
5407d11e744SLawrence Stewart 	 * waiting for resources to become available, sleep until we're woken.
5417d11e744SLawrence Stewart 	 */
5427d11e744SLawrence Stewart 	if (alq->aq_flags & AQ_ORDERED && alq->aq_waiters > 0) {
5437d11e744SLawrence Stewart 		KASSERT(!(flags & ALQ_NOWAIT),
5447d11e744SLawrence Stewart 		    ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__));
5457d11e744SLawrence Stewart 		alq->aq_waiters++;
5467d11e744SLawrence Stewart 		msleep_spin(&alq->aq_waiters, &alq->aq_mtx, "alqwnord", 0);
5477d11e744SLawrence Stewart 		alq->aq_waiters--;
5487d11e744SLawrence Stewart 	}
5499405072aSJeff Roberson 
5507d11e744SLawrence Stewart 	/*
5517d11e744SLawrence Stewart 	 * (ALQ_WAITOK && aq_freebytes < len) or aq_freebytes >= len, either
5527d11e744SLawrence Stewart 	 * enter while loop and sleep until we have enough free bytes (former)
5537d11e744SLawrence Stewart 	 * or skip (latter). If AQ_ORDERED is set, only 1 thread at a time will
5547d11e744SLawrence Stewart 	 * be in this loop. Otherwise, multiple threads may be sleeping here
5557d11e744SLawrence Stewart 	 * competing for ALQ resources.
5567d11e744SLawrence Stewart 	 */
5577d11e744SLawrence Stewart 	while (alq->aq_freebytes < len && !(alq->aq_flags & AQ_SHUTDOWN)) {
5587d11e744SLawrence Stewart 		KASSERT(!(flags & ALQ_NOWAIT),
5597d11e744SLawrence Stewart 		    ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__));
5607d11e744SLawrence Stewart 		alq->aq_flags |= AQ_WANTED;
5617d11e744SLawrence Stewart 		alq->aq_waiters++;
5627d11e744SLawrence Stewart 		if (waitchan)
5637d11e744SLawrence Stewart 			wakeup(waitchan);
5647d11e744SLawrence Stewart 		msleep_spin(alq, &alq->aq_mtx, "alqwnres", 0);
5657d11e744SLawrence Stewart 		alq->aq_waiters--;
5669405072aSJeff Roberson 
5677d11e744SLawrence Stewart 		/*
5687d11e744SLawrence Stewart 		 * If we're the first thread to wake after an AQ_WANTED wakeup
5697d11e744SLawrence Stewart 		 * but there isn't enough free space for us, we're going to loop
5707d11e744SLawrence Stewart 		 * and sleep again. If there are other threads waiting in this
5717d11e744SLawrence Stewart 		 * loop, schedule a wakeup so that they can see if the space
5727d11e744SLawrence Stewart 		 * they require is available.
5737d11e744SLawrence Stewart 		 */
5747d11e744SLawrence Stewart 		if (alq->aq_waiters > 0 && !(alq->aq_flags & AQ_ORDERED) &&
5757d11e744SLawrence Stewart 		    alq->aq_freebytes < len && !(alq->aq_flags & AQ_WANTED))
5767d11e744SLawrence Stewart 			waitchan = alq;
5777d11e744SLawrence Stewart 		else
5787d11e744SLawrence Stewart 			waitchan = NULL;
5797d11e744SLawrence Stewart 	}
5809405072aSJeff Roberson 
5817d11e744SLawrence Stewart 	/*
5827d11e744SLawrence Stewart 	 * If there are waiters, we need to signal the waiting threads after we
5837d11e744SLawrence Stewart 	 * complete our work. The alq ptr is used as a wait channel for threads
5847d11e744SLawrence Stewart 	 * requiring resources to be freed up. In the AQ_ORDERED case, threads
5857d11e744SLawrence Stewart 	 * are not allowed to concurrently compete for resources in the above
5867d11e744SLawrence Stewart 	 * while loop, so we use a different wait channel in this case.
5877d11e744SLawrence Stewart 	 */
5887d11e744SLawrence Stewart 	if (alq->aq_waiters > 0) {
5897d11e744SLawrence Stewart 		if (alq->aq_flags & AQ_ORDERED)
5907d11e744SLawrence Stewart 			waitchan = &alq->aq_waiters;
5917d11e744SLawrence Stewart 		else
5927d11e744SLawrence Stewart 			waitchan = alq;
5937d11e744SLawrence Stewart 	} else
5947d11e744SLawrence Stewart 		waitchan = NULL;
5957d11e744SLawrence Stewart 
5967d11e744SLawrence Stewart 	/* Bail if we're shutting down. */
5977d11e744SLawrence Stewart 	if (alq->aq_flags & AQ_SHUTDOWN) {
5987d11e744SLawrence Stewart 		ret = EWOULDBLOCK;
5997d11e744SLawrence Stewart 		goto unlock;
6007d11e744SLawrence Stewart 	}
6017d11e744SLawrence Stewart 
6027d11e744SLawrence Stewart 	/*
6037d11e744SLawrence Stewart 	 * If we need to wrap the buffer to accommodate the write,
6047d11e744SLawrence Stewart 	 * we'll need 2 calls to bcopy.
6057d11e744SLawrence Stewart 	 */
6067d11e744SLawrence Stewart 	if ((alq->aq_buflen - alq->aq_writehead) < len)
6077d11e744SLawrence Stewart 		copy = alq->aq_buflen - alq->aq_writehead;
6087d11e744SLawrence Stewart 
6097d11e744SLawrence Stewart 	/* Copy message (or part thereof if wrap required) to the buffer. */
6107d11e744SLawrence Stewart 	bcopy(data, alq->aq_entbuf + alq->aq_writehead, copy);
6117d11e744SLawrence Stewart 	alq->aq_writehead += copy;
6127d11e744SLawrence Stewart 
6137d11e744SLawrence Stewart 	if (alq->aq_writehead >= alq->aq_buflen) {
6147d11e744SLawrence Stewart 		KASSERT((alq->aq_writehead == alq->aq_buflen),
6157d11e744SLawrence Stewart 		    ("%s: alq->aq_writehead (%d) > alq->aq_buflen (%d)",
6167d11e744SLawrence Stewart 		    __func__,
6177d11e744SLawrence Stewart 		    alq->aq_writehead,
6187d11e744SLawrence Stewart 		    alq->aq_buflen));
6197d11e744SLawrence Stewart 		alq->aq_writehead = 0;
6207d11e744SLawrence Stewart 	}
6217d11e744SLawrence Stewart 
6227d11e744SLawrence Stewart 	if (copy != len) {
6237d11e744SLawrence Stewart 		/*
6247d11e744SLawrence Stewart 		 * Wrap the buffer by copying the remainder of our message
6257d11e744SLawrence Stewart 		 * to the start of the buffer and resetting aq_writehead.
6267d11e744SLawrence Stewart 		 */
6277d11e744SLawrence Stewart 		bcopy(((uint8_t *)data)+copy, alq->aq_entbuf, len - copy);
6287d11e744SLawrence Stewart 		alq->aq_writehead = len - copy;
6297d11e744SLawrence Stewart 	}
6307d11e744SLawrence Stewart 
6317d11e744SLawrence Stewart 	KASSERT((alq->aq_writehead >= 0 && alq->aq_writehead < alq->aq_buflen),
6327d11e744SLawrence Stewart 	    ("%s: aq_writehead < 0 || aq_writehead >= aq_buflen", __func__));
6337d11e744SLawrence Stewart 
6347d11e744SLawrence Stewart 	alq->aq_freebytes -= len;
6357d11e744SLawrence Stewart 
6367d11e744SLawrence Stewart 	if (!(alq->aq_flags & AQ_ACTIVE) && !(flags & ALQ_NOACTIVATE)) {
6379405072aSJeff Roberson 		alq->aq_flags |= AQ_ACTIVE;
6389405072aSJeff Roberson 		activate = 1;
6397d11e744SLawrence Stewart 	}
6409405072aSJeff Roberson 
6417d11e744SLawrence Stewart 	KASSERT((HAS_PENDING_DATA(alq)), ("%s: queue empty!", __func__));
6427d11e744SLawrence Stewart 
6437d11e744SLawrence Stewart unlock:
6449405072aSJeff Roberson 	ALQ_UNLOCK(alq);
6457d11e744SLawrence Stewart 
6469405072aSJeff Roberson 	if (activate) {
6479405072aSJeff Roberson 		ALD_LOCK();
6489405072aSJeff Roberson 		ald_activate(alq);
6499405072aSJeff Roberson 		ALD_UNLOCK();
6509405072aSJeff Roberson 	}
6517d11e744SLawrence Stewart 
6527d11e744SLawrence Stewart 	/* NB: We rely on wakeup_one waking threads in a FIFO manner. */
6537d11e744SLawrence Stewart 	if (waitchan != NULL)
6547d11e744SLawrence Stewart 		wakeup_one(waitchan);
6557d11e744SLawrence Stewart 
6567d11e744SLawrence Stewart 	return (ret);
6577d11e744SLawrence Stewart }
6587d11e744SLawrence Stewart 
6597d11e744SLawrence Stewart int
6607d11e744SLawrence Stewart alq_write(struct alq *alq, void *data, int flags)
6617d11e744SLawrence Stewart {
6627d11e744SLawrence Stewart 	/* Should only be called in fixed length message (legacy) mode. */
6637d11e744SLawrence Stewart 	KASSERT((alq->aq_flags & AQ_LEGACY),
6647d11e744SLawrence Stewart 	    ("%s: fixed length write on variable length queue", __func__));
6657d11e744SLawrence Stewart 	return (alq_writen(alq, data, alq->aq_entlen, flags));
6667d11e744SLawrence Stewart }
6677d11e744SLawrence Stewart 
6687d11e744SLawrence Stewart /*
6697d11e744SLawrence Stewart  * Retrieve a pointer for the ALQ to write directly into, avoiding bcopy.
6707d11e744SLawrence Stewart  */
6717d11e744SLawrence Stewart struct ale *
6727d11e744SLawrence Stewart alq_getn(struct alq *alq, int len, int flags)
6737d11e744SLawrence Stewart {
6747d11e744SLawrence Stewart 	int contigbytes;
6757d11e744SLawrence Stewart 	void *waitchan;
6767d11e744SLawrence Stewart 
6777d11e744SLawrence Stewart 	KASSERT((len > 0 && len <= alq->aq_buflen),
6787d11e744SLawrence Stewart 	    ("%s: len <= 0 || len > alq->aq_buflen", __func__));
6797d11e744SLawrence Stewart 
6807d11e744SLawrence Stewart 	waitchan = NULL;
6817d11e744SLawrence Stewart 
6827d11e744SLawrence Stewart 	ALQ_LOCK(alq);
6837d11e744SLawrence Stewart 
6847d11e744SLawrence Stewart 	/*
6857d11e744SLawrence Stewart 	 * Determine the number of free contiguous bytes.
6867d11e744SLawrence Stewart 	 * We ensure elsewhere that if aq_writehead == aq_writetail because
6877d11e744SLawrence Stewart 	 * the buffer is empty, they will both be set to 0 and therefore
6887d11e744SLawrence Stewart 	 * aq_freebytes == aq_buflen and is fully contiguous.
6897d11e744SLawrence Stewart 	 * If they are equal and the buffer is not empty, aq_freebytes will
6907d11e744SLawrence Stewart 	 * be 0 indicating the buffer is full.
6917d11e744SLawrence Stewart 	 */
6927d11e744SLawrence Stewart 	if (alq->aq_writehead <= alq->aq_writetail)
6937d11e744SLawrence Stewart 		contigbytes = alq->aq_freebytes;
6947d11e744SLawrence Stewart 	else {
6957d11e744SLawrence Stewart 		contigbytes = alq->aq_buflen - alq->aq_writehead;
6967d11e744SLawrence Stewart 
6977d11e744SLawrence Stewart 		if (contigbytes < len) {
6987d11e744SLawrence Stewart 			/*
6997d11e744SLawrence Stewart 			 * Insufficient space at end of buffer to handle a
7007d11e744SLawrence Stewart 			 * contiguous write. Wrap early if there's space at
7017d11e744SLawrence Stewart 			 * the beginning. This will leave a hole at the end
7027d11e744SLawrence Stewart 			 * of the buffer which we will have to skip over when
7037d11e744SLawrence Stewart 			 * flushing the buffer to disk.
7047d11e744SLawrence Stewart 			 */
7057d11e744SLawrence Stewart 			if (alq->aq_writetail >= len || flags & ALQ_WAITOK) {
7067d11e744SLawrence Stewart 				/* Keep track of # bytes left blank. */
7077d11e744SLawrence Stewart 				alq->aq_wrapearly = contigbytes;
7087d11e744SLawrence Stewart 				/* Do the wrap and adjust counters. */
7097d11e744SLawrence Stewart 				contigbytes = alq->aq_freebytes =
7107d11e744SLawrence Stewart 				    alq->aq_writetail;
7117d11e744SLawrence Stewart 				alq->aq_writehead = 0;
7127d11e744SLawrence Stewart 			}
7137d11e744SLawrence Stewart 		}
7147d11e744SLawrence Stewart 	}
7157d11e744SLawrence Stewart 
7167d11e744SLawrence Stewart 	/*
7177d11e744SLawrence Stewart 	 * Return a NULL ALE if:
7187d11e744SLawrence Stewart 	 * - The message is larger than our underlying buffer.
7197d11e744SLawrence Stewart 	 * - The ALQ is being shutdown.
7207d11e744SLawrence Stewart 	 * - There is insufficient free space in our underlying buffer
7217d11e744SLawrence Stewart 	 *   to accept the message and the user can't wait for space.
7227d11e744SLawrence Stewart 	 * - There is insufficient free space in our underlying buffer
7237d11e744SLawrence Stewart 	 *   to accept the message and the alq is inactive due to prior
7247d11e744SLawrence Stewart 	 *   use of the ALQ_NOACTIVATE flag (which would lead to deadlock).
7257d11e744SLawrence Stewart 	 */
7267d11e744SLawrence Stewart 	if (len > alq->aq_buflen ||
7277d11e744SLawrence Stewart 	    alq->aq_flags & AQ_SHUTDOWN ||
7287d11e744SLawrence Stewart 	    (((flags & ALQ_NOWAIT) || (!(alq->aq_flags & AQ_ACTIVE) &&
7297d11e744SLawrence Stewart 	    HAS_PENDING_DATA(alq))) && contigbytes < len)) {
7307d11e744SLawrence Stewart 		ALQ_UNLOCK(alq);
7317d11e744SLawrence Stewart 		return (NULL);
7327d11e744SLawrence Stewart 	}
7337d11e744SLawrence Stewart 
7347d11e744SLawrence Stewart 	/*
7357d11e744SLawrence Stewart 	 * If we want ordered writes and there is already at least one thread
7367d11e744SLawrence Stewart 	 * waiting for resources to become available, sleep until we're woken.
7377d11e744SLawrence Stewart 	 */
7387d11e744SLawrence Stewart 	if (alq->aq_flags & AQ_ORDERED && alq->aq_waiters > 0) {
7397d11e744SLawrence Stewart 		KASSERT(!(flags & ALQ_NOWAIT),
7407d11e744SLawrence Stewart 		    ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__));
7417d11e744SLawrence Stewart 		alq->aq_waiters++;
7427d11e744SLawrence Stewart 		msleep_spin(&alq->aq_waiters, &alq->aq_mtx, "alqgnord", 0);
7437d11e744SLawrence Stewart 		alq->aq_waiters--;
7447d11e744SLawrence Stewart 	}
7457d11e744SLawrence Stewart 
7467d11e744SLawrence Stewart 	/*
7477d11e744SLawrence Stewart 	 * (ALQ_WAITOK && contigbytes < len) or contigbytes >= len, either enter
7487d11e744SLawrence Stewart 	 * while loop and sleep until we have enough contiguous free bytes
7497d11e744SLawrence Stewart 	 * (former) or skip (latter). If AQ_ORDERED is set, only 1 thread at a
7507d11e744SLawrence Stewart 	 * time will be in this loop. Otherwise, multiple threads may be
7517d11e744SLawrence Stewart 	 * sleeping here competing for ALQ resources.
7527d11e744SLawrence Stewart 	 */
7537d11e744SLawrence Stewart 	while (contigbytes < len && !(alq->aq_flags & AQ_SHUTDOWN)) {
7547d11e744SLawrence Stewart 		KASSERT(!(flags & ALQ_NOWAIT),
7557d11e744SLawrence Stewart 		    ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__));
7567d11e744SLawrence Stewart 		alq->aq_flags |= AQ_WANTED;
7577d11e744SLawrence Stewart 		alq->aq_waiters++;
7587d11e744SLawrence Stewart 		if (waitchan)
7597d11e744SLawrence Stewart 			wakeup(waitchan);
7607d11e744SLawrence Stewart 		msleep_spin(alq, &alq->aq_mtx, "alqgnres", 0);
7617d11e744SLawrence Stewart 		alq->aq_waiters--;
7627d11e744SLawrence Stewart 
7637d11e744SLawrence Stewart 		if (alq->aq_writehead <= alq->aq_writetail)
7647d11e744SLawrence Stewart 			contigbytes = alq->aq_freebytes;
7657d11e744SLawrence Stewart 		else
7667d11e744SLawrence Stewart 			contigbytes = alq->aq_buflen - alq->aq_writehead;
7677d11e744SLawrence Stewart 
7687d11e744SLawrence Stewart 		/*
7697d11e744SLawrence Stewart 		 * If we're the first thread to wake after an AQ_WANTED wakeup
7707d11e744SLawrence Stewart 		 * but there isn't enough free space for us, we're going to loop
7717d11e744SLawrence Stewart 		 * and sleep again. If there are other threads waiting in this
7727d11e744SLawrence Stewart 		 * loop, schedule a wakeup so that they can see if the space
7737d11e744SLawrence Stewart 		 * they require is available.
7747d11e744SLawrence Stewart 		 */
7757d11e744SLawrence Stewart 		if (alq->aq_waiters > 0 && !(alq->aq_flags & AQ_ORDERED) &&
7767d11e744SLawrence Stewart 		    contigbytes < len && !(alq->aq_flags & AQ_WANTED))
7777d11e744SLawrence Stewart 			waitchan = alq;
7787d11e744SLawrence Stewart 		else
7797d11e744SLawrence Stewart 			waitchan = NULL;
7807d11e744SLawrence Stewart 	}
7817d11e744SLawrence Stewart 
7827d11e744SLawrence Stewart 	/*
7837d11e744SLawrence Stewart 	 * If there are waiters, we need to signal the waiting threads after we
7847d11e744SLawrence Stewart 	 * complete our work. The alq ptr is used as a wait channel for threads
7857d11e744SLawrence Stewart 	 * requiring resources to be freed up. In the AQ_ORDERED case, threads
7867d11e744SLawrence Stewart 	 * are not allowed to concurrently compete for resources in the above
7877d11e744SLawrence Stewart 	 * while loop, so we use a different wait channel in this case.
7887d11e744SLawrence Stewart 	 */
7897d11e744SLawrence Stewart 	if (alq->aq_waiters > 0) {
7907d11e744SLawrence Stewart 		if (alq->aq_flags & AQ_ORDERED)
7917d11e744SLawrence Stewart 			waitchan = &alq->aq_waiters;
7927d11e744SLawrence Stewart 		else
7937d11e744SLawrence Stewart 			waitchan = alq;
7947d11e744SLawrence Stewart 	} else
7957d11e744SLawrence Stewart 		waitchan = NULL;
7967d11e744SLawrence Stewart 
7977d11e744SLawrence Stewart 	/* Bail if we're shutting down. */
7987d11e744SLawrence Stewart 	if (alq->aq_flags & AQ_SHUTDOWN) {
7997d11e744SLawrence Stewart 		ALQ_UNLOCK(alq);
8007d11e744SLawrence Stewart 		if (waitchan != NULL)
8017d11e744SLawrence Stewart 			wakeup_one(waitchan);
8027d11e744SLawrence Stewart 		return (NULL);
8037d11e744SLawrence Stewart 	}
8047d11e744SLawrence Stewart 
8057d11e744SLawrence Stewart 	/*
8067d11e744SLawrence Stewart 	 * If we are here, we have a contiguous number of bytes >= len
8077d11e744SLawrence Stewart 	 * available in our buffer starting at aq_writehead.
8087d11e744SLawrence Stewart 	 */
8097d11e744SLawrence Stewart 	alq->aq_getpost.ae_data = alq->aq_entbuf + alq->aq_writehead;
8107d11e744SLawrence Stewart 	alq->aq_getpost.ae_bytesused = len;
8117d11e744SLawrence Stewart 
8127d11e744SLawrence Stewart 	return (&alq->aq_getpost);
8137d11e744SLawrence Stewart }
8147d11e744SLawrence Stewart 
8157d11e744SLawrence Stewart struct ale *
8167d11e744SLawrence Stewart alq_get(struct alq *alq, int flags)
8177d11e744SLawrence Stewart {
8187d11e744SLawrence Stewart 	/* Should only be called in fixed length message (legacy) mode. */
8197d11e744SLawrence Stewart 	KASSERT((alq->aq_flags & AQ_LEGACY),
8207d11e744SLawrence Stewart 	    ("%s: fixed length get on variable length queue", __func__));
8217d11e744SLawrence Stewart 	return (alq_getn(alq, alq->aq_entlen, flags));
8227d11e744SLawrence Stewart }
8237d11e744SLawrence Stewart 
8247d11e744SLawrence Stewart void
8257d11e744SLawrence Stewart alq_post_flags(struct alq *alq, struct ale *ale, int flags)
8267d11e744SLawrence Stewart {
8277d11e744SLawrence Stewart 	int activate;
8287d11e744SLawrence Stewart 	void *waitchan;
8297d11e744SLawrence Stewart 
8307d11e744SLawrence Stewart 	activate = 0;
8317d11e744SLawrence Stewart 
8327d11e744SLawrence Stewart 	if (ale->ae_bytesused > 0) {
8337d11e744SLawrence Stewart 		if (!(alq->aq_flags & AQ_ACTIVE) &&
8347d11e744SLawrence Stewart 		    !(flags & ALQ_NOACTIVATE)) {
8357d11e744SLawrence Stewart 			alq->aq_flags |= AQ_ACTIVE;
8367d11e744SLawrence Stewart 			activate = 1;
8377d11e744SLawrence Stewart 		}
8387d11e744SLawrence Stewart 
8397d11e744SLawrence Stewart 		alq->aq_writehead += ale->ae_bytesused;
8407d11e744SLawrence Stewart 		alq->aq_freebytes -= ale->ae_bytesused;
8417d11e744SLawrence Stewart 
8427d11e744SLawrence Stewart 		/* Wrap aq_writehead if we filled to the end of the buffer. */
8437d11e744SLawrence Stewart 		if (alq->aq_writehead == alq->aq_buflen)
8447d11e744SLawrence Stewart 			alq->aq_writehead = 0;
8457d11e744SLawrence Stewart 
8467d11e744SLawrence Stewart 		KASSERT((alq->aq_writehead >= 0 &&
8477d11e744SLawrence Stewart 		    alq->aq_writehead < alq->aq_buflen),
8487d11e744SLawrence Stewart 		    ("%s: aq_writehead < 0 || aq_writehead >= aq_buflen",
8497d11e744SLawrence Stewart 		    __func__));
8507d11e744SLawrence Stewart 
8517d11e744SLawrence Stewart 		KASSERT((HAS_PENDING_DATA(alq)), ("%s: queue empty!", __func__));
8527d11e744SLawrence Stewart 	}
8537d11e744SLawrence Stewart 
8547d11e744SLawrence Stewart 	/*
8557d11e744SLawrence Stewart 	 * If there are waiters, we need to signal the waiting threads after we
8567d11e744SLawrence Stewart 	 * complete our work. The alq ptr is used as a wait channel for threads
8577d11e744SLawrence Stewart 	 * requiring resources to be freed up. In the AQ_ORDERED case, threads
8587d11e744SLawrence Stewart 	 * are not allowed to concurrently compete for resources in the
8597d11e744SLawrence Stewart 	 * alq_getn() while loop, so we use a different wait channel in this case.
8607d11e744SLawrence Stewart 	 */
8617d11e744SLawrence Stewart 	if (alq->aq_waiters > 0) {
8627d11e744SLawrence Stewart 		if (alq->aq_flags & AQ_ORDERED)
8637d11e744SLawrence Stewart 			waitchan = &alq->aq_waiters;
8647d11e744SLawrence Stewart 		else
8657d11e744SLawrence Stewart 			waitchan = alq;
8667d11e744SLawrence Stewart 	} else
8677d11e744SLawrence Stewart 		waitchan = NULL;
8687d11e744SLawrence Stewart 
8697d11e744SLawrence Stewart 	ALQ_UNLOCK(alq);
8707d11e744SLawrence Stewart 
8717d11e744SLawrence Stewart 	if (activate) {
8727d11e744SLawrence Stewart 		ALD_LOCK();
8737d11e744SLawrence Stewart 		ald_activate(alq);
8747d11e744SLawrence Stewart 		ALD_UNLOCK();
8757d11e744SLawrence Stewart 	}
8767d11e744SLawrence Stewart 
8777d11e744SLawrence Stewart 	/* NB: We rely on wakeup_one waking threads in a FIFO manner. */
8787d11e744SLawrence Stewart 	if (waitchan != NULL)
8797d11e744SLawrence Stewart 		wakeup_one(waitchan);
8809405072aSJeff Roberson }
8819405072aSJeff Roberson 
8829405072aSJeff Roberson void
8839405072aSJeff Roberson alq_flush(struct alq *alq)
8849405072aSJeff Roberson {
8859405072aSJeff Roberson 	int needwakeup = 0;
8869405072aSJeff Roberson 
8879405072aSJeff Roberson 	ALD_LOCK();
8889405072aSJeff Roberson 	ALQ_LOCK(alq);
8897d11e744SLawrence Stewart 
8907d11e744SLawrence Stewart 	/*
8917d11e744SLawrence Stewart 	 * Pull the lever iff there is data to flush and we're
8927d11e744SLawrence Stewart 	 * not already in the middle of a flush operation.
8937d11e744SLawrence Stewart 	 */
8947d11e744SLawrence Stewart 	if (HAS_PENDING_DATA(alq) && !(alq->aq_flags & AQ_FLUSHING)) {
8957d11e744SLawrence Stewart 		if (alq->aq_flags & AQ_ACTIVE)
8969405072aSJeff Roberson 			ald_deactivate(alq);
8977d11e744SLawrence Stewart 
8989405072aSJeff Roberson 		ALD_UNLOCK();
8999405072aSJeff Roberson 		needwakeup = alq_doio(alq);
9009405072aSJeff Roberson 	} else
9019405072aSJeff Roberson 		ALD_UNLOCK();
9027d11e744SLawrence Stewart 
9039405072aSJeff Roberson 	ALQ_UNLOCK(alq);
9049405072aSJeff Roberson 
9059405072aSJeff Roberson 	if (needwakeup)
9067d11e744SLawrence Stewart 		wakeup_one(alq);
9079405072aSJeff Roberson }
9089405072aSJeff Roberson 
9099405072aSJeff Roberson /*
9109405072aSJeff Roberson  * Flush remaining data, close the file and free all resources.
9119405072aSJeff Roberson  */
9129405072aSJeff Roberson void
9139405072aSJeff Roberson alq_close(struct alq *alq)
9149405072aSJeff Roberson {
915c0ea37a8SLawrence Stewart 	/* Only flush and destroy alq if not already shutting down. */
916c0ea37a8SLawrence Stewart 	if (ald_rem(alq) == 0)
917c0ea37a8SLawrence Stewart 		alq_destroy(alq);
9189405072aSJeff Roberson }
919d28f42f9SLawrence Stewart 
920d28f42f9SLawrence Stewart static int
921d28f42f9SLawrence Stewart alq_load_handler(module_t mod, int what, void *arg)
922d28f42f9SLawrence Stewart {
923d28f42f9SLawrence Stewart 	int ret;
924d28f42f9SLawrence Stewart 
925d28f42f9SLawrence Stewart 	ret = 0;
926d28f42f9SLawrence Stewart 
927d28f42f9SLawrence Stewart 	switch (what) {
928d28f42f9SLawrence Stewart 	case MOD_LOAD:
929d28f42f9SLawrence Stewart 	case MOD_SHUTDOWN:
930d28f42f9SLawrence Stewart 		break;
931d28f42f9SLawrence Stewart 
932d28f42f9SLawrence Stewart 	case MOD_QUIESCE:
933d28f42f9SLawrence Stewart 		ALD_LOCK();
934d28f42f9SLawrence Stewart 		/* Only allow unload if there are no open queues. */
935d28f42f9SLawrence Stewart 		if (LIST_FIRST(&ald_queues) == NULL) {
936d28f42f9SLawrence Stewart 			ald_shutingdown = 1;
937d28f42f9SLawrence Stewart 			ALD_UNLOCK();
938ec41a9a1SLawrence Stewart 			EVENTHANDLER_DEREGISTER(shutdown_pre_sync,
939ec41a9a1SLawrence Stewart 			    alq_eventhandler_tag);
940d28f42f9SLawrence Stewart 			ald_shutdown(NULL, 0);
941d28f42f9SLawrence Stewart 			mtx_destroy(&ald_mtx);
942d28f42f9SLawrence Stewart 		} else {
943d28f42f9SLawrence Stewart 			ALD_UNLOCK();
944d28f42f9SLawrence Stewart 			ret = EBUSY;
945d28f42f9SLawrence Stewart 		}
946d28f42f9SLawrence Stewart 		break;
947d28f42f9SLawrence Stewart 
948d28f42f9SLawrence Stewart 	case MOD_UNLOAD:
949d28f42f9SLawrence Stewart 		/* If MOD_QUIESCE failed we must fail here too. */
950d28f42f9SLawrence Stewart 		if (ald_shutingdown == 0)
951d28f42f9SLawrence Stewart 			ret = EBUSY;
952d28f42f9SLawrence Stewart 		break;
953d28f42f9SLawrence Stewart 
954d28f42f9SLawrence Stewart 	default:
955d28f42f9SLawrence Stewart 		ret = EINVAL;
956d28f42f9SLawrence Stewart 		break;
957d28f42f9SLawrence Stewart 	}
958d28f42f9SLawrence Stewart 
959d28f42f9SLawrence Stewart 	return (ret);
960d28f42f9SLawrence Stewart }
961d28f42f9SLawrence Stewart 
962d28f42f9SLawrence Stewart static moduledata_t alq_mod =
963d28f42f9SLawrence Stewart {
964d28f42f9SLawrence Stewart 	"alq",
965d28f42f9SLawrence Stewart 	alq_load_handler,
966d28f42f9SLawrence Stewart 	NULL
967d28f42f9SLawrence Stewart };
968d28f42f9SLawrence Stewart 
96947cedcbdSJohn Baldwin DECLARE_MODULE(alq, alq_mod, SI_SUB_LAST, SI_ORDER_ANY);
970d28f42f9SLawrence Stewart MODULE_VERSION(alq, 1);
971