xref: /freebsd/sys/kern/kern_alq.c (revision d79a9edb5ce162c1ba49e12e5c93b894e6a25ad2)
19454b2d8SWarner Losh /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
38a36da99SPedro F. Giffuni  *
49405072aSJeff Roberson  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
5d28f42f9SLawrence Stewart  * Copyright (c) 2008-2009, Lawrence Stewart <lstewart@freebsd.org>
6d28f42f9SLawrence Stewart  * Copyright (c) 2009-2010, The FreeBSD Foundation
79405072aSJeff Roberson  * All rights reserved.
89405072aSJeff Roberson  *
9d28f42f9SLawrence Stewart  * Portions of this software were developed at the Centre for Advanced
10d28f42f9SLawrence Stewart  * Internet Architectures, Swinburne University of Technology, Melbourne,
11d28f42f9SLawrence Stewart  * Australia by Lawrence Stewart under sponsorship from the FreeBSD Foundation.
12d28f42f9SLawrence Stewart  *
139405072aSJeff Roberson  * Redistribution and use in source and binary forms, with or without
149405072aSJeff Roberson  * modification, are permitted provided that the following conditions
159405072aSJeff Roberson  * are met:
169405072aSJeff Roberson  * 1. Redistributions of source code must retain the above copyright
179405072aSJeff Roberson  *    notice unmodified, this list of conditions, and the following
189405072aSJeff Roberson  *    disclaimer.
199405072aSJeff Roberson  * 2. Redistributions in binary form must reproduce the above copyright
209405072aSJeff Roberson  *    notice, this list of conditions and the following disclaimer in the
219405072aSJeff Roberson  *    documentation and/or other materials provided with the distribution.
229405072aSJeff Roberson  *
239405072aSJeff Roberson  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
249405072aSJeff Roberson  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
259405072aSJeff Roberson  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
269405072aSJeff Roberson  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
279405072aSJeff Roberson  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
289405072aSJeff Roberson  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
299405072aSJeff Roberson  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
309405072aSJeff Roberson  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
319405072aSJeff Roberson  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
329405072aSJeff Roberson  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
339405072aSJeff Roberson  */
349405072aSJeff Roberson 
35677b542eSDavid E. O'Brien #include <sys/cdefs.h>
36d28f42f9SLawrence Stewart #include "opt_mac.h"
37d28f42f9SLawrence Stewart 
389405072aSJeff Roberson #include <sys/param.h>
399405072aSJeff Roberson #include <sys/systm.h>
40*d79a9edbSMitchell Horne #include <sys/alq.h>
41*d79a9edbSMitchell Horne #include <sys/eventhandler.h>
42*d79a9edbSMitchell Horne #include <sys/fcntl.h>
439405072aSJeff Roberson #include <sys/kernel.h>
449405072aSJeff Roberson #include <sys/kthread.h>
459405072aSJeff Roberson #include <sys/lock.h>
46*d79a9edbSMitchell Horne #include <sys/malloc.h>
4733f19beeSJohn Baldwin #include <sys/mount.h>
489405072aSJeff Roberson #include <sys/mutex.h>
499405072aSJeff Roberson #include <sys/namei.h>
509405072aSJeff Roberson #include <sys/proc.h>
51*d79a9edbSMitchell Horne #include <sys/reboot.h>
529405072aSJeff Roberson #include <sys/unistd.h>
53*d79a9edbSMitchell Horne #include <sys/vnode.h>
549405072aSJeff Roberson 
55aed55708SRobert Watson #include <security/mac/mac_framework.h>
56aed55708SRobert Watson 
579405072aSJeff Roberson /* Async. Logging Queue */
589405072aSJeff Roberson struct alq {
597d11e744SLawrence Stewart 	char	*aq_entbuf;		/* Buffer for stored entries */
609405072aSJeff Roberson 	int	aq_entmax;		/* Max entries */
619405072aSJeff Roberson 	int	aq_entlen;		/* Entry length */
627d11e744SLawrence Stewart 	int	aq_freebytes;		/* Bytes available in buffer */
637d11e744SLawrence Stewart 	int	aq_buflen;		/* Total length of our buffer */
647d11e744SLawrence Stewart 	int	aq_writehead;		/* Location for next write */
657d11e744SLawrence Stewart 	int	aq_writetail;		/* Flush starts at this location */
667d11e744SLawrence Stewart 	int	aq_wrapearly;		/* # bytes left blank at end of buf */
679405072aSJeff Roberson 	int	aq_flags;		/* Queue flags */
687d11e744SLawrence Stewart 	int	aq_waiters;		/* Num threads waiting for resources
697d11e744SLawrence Stewart 					 * NB: Used as a wait channel so must
707d11e744SLawrence Stewart 					 * not be first field in the alq struct
717d11e744SLawrence Stewart 					 */
727d11e744SLawrence Stewart 	struct	ale	aq_getpost;	/* ALE for use by get/post */
739405072aSJeff Roberson 	struct mtx	aq_mtx;		/* Queue lock */
749405072aSJeff Roberson 	struct vnode	*aq_vp;		/* Open vnode handle */
759e9256e2SJeff Roberson 	struct ucred	*aq_cred;	/* Credentials of the opening thread */
769405072aSJeff Roberson 	LIST_ENTRY(alq)	aq_act;		/* List of active queues */
779405072aSJeff Roberson 	LIST_ENTRY(alq)	aq_link;	/* List of all queues */
789405072aSJeff Roberson };
799405072aSJeff Roberson 
809405072aSJeff Roberson #define	AQ_WANTED	0x0001		/* Wakeup sleeper when io is done */
819405072aSJeff Roberson #define	AQ_ACTIVE	0x0002		/* on the active list */
829405072aSJeff Roberson #define	AQ_FLUSHING	0x0004		/* doing IO */
839405072aSJeff Roberson #define	AQ_SHUTDOWN	0x0008		/* Queue no longer valid */
847d11e744SLawrence Stewart #define	AQ_ORDERED	0x0010		/* Queue enforces ordered writes */
857d11e744SLawrence Stewart #define	AQ_LEGACY	0x0020		/* Legacy queue (fixed length writes) */
869405072aSJeff Roberson 
879405072aSJeff Roberson #define	ALQ_LOCK(alq)	mtx_lock_spin(&(alq)->aq_mtx)
889405072aSJeff Roberson #define	ALQ_UNLOCK(alq)	mtx_unlock_spin(&(alq)->aq_mtx)
899405072aSJeff Roberson 
907d11e744SLawrence Stewart #define HAS_PENDING_DATA(alq) ((alq)->aq_freebytes != (alq)->aq_buflen)
917d11e744SLawrence Stewart 
929405072aSJeff Roberson static MALLOC_DEFINE(M_ALD, "ALD", "ALD");
939405072aSJeff Roberson 
949405072aSJeff Roberson /*
959405072aSJeff Roberson  * The ald_mtx protects the ald_queues list and the ald_active list.
969405072aSJeff Roberson  */
979405072aSJeff Roberson static struct mtx ald_mtx;
989405072aSJeff Roberson static LIST_HEAD(, alq) ald_queues;
999405072aSJeff Roberson static LIST_HEAD(, alq) ald_active;
1009405072aSJeff Roberson static int ald_shutingdown = 0;
101a414302fSJeff Roberson struct thread *ald_thread;
102a414302fSJeff Roberson static struct proc *ald_proc;
1037639c9beSLawrence Stewart static eventhandler_tag alq_eventhandler_tag = NULL;
1049405072aSJeff Roberson 
1059405072aSJeff Roberson #define	ALD_LOCK()	mtx_lock(&ald_mtx)
1069405072aSJeff Roberson #define	ALD_UNLOCK()	mtx_unlock(&ald_mtx)
1079405072aSJeff Roberson 
1089405072aSJeff Roberson /* Daemon functions */
1099405072aSJeff Roberson static int ald_add(struct alq *);
1109405072aSJeff Roberson static int ald_rem(struct alq *);
1119405072aSJeff Roberson static void ald_startup(void *);
1129405072aSJeff Roberson static void ald_daemon(void);
1139405072aSJeff Roberson static void ald_shutdown(void *, int);
1149405072aSJeff Roberson static void ald_activate(struct alq *);
1159405072aSJeff Roberson static void ald_deactivate(struct alq *);
1169405072aSJeff Roberson 
1179405072aSJeff Roberson /* Internal queue functions */
1189405072aSJeff Roberson static void alq_shutdown(struct alq *);
119c0ea37a8SLawrence Stewart static void alq_destroy(struct alq *);
1209405072aSJeff Roberson static int alq_doio(struct alq *);
1219405072aSJeff Roberson 
1229405072aSJeff Roberson /*
1239405072aSJeff Roberson  * Add a new queue to the global list.  Fail if we're shutting down.
1249405072aSJeff Roberson  */
1259405072aSJeff Roberson static int
ald_add(struct alq * alq)1269405072aSJeff Roberson ald_add(struct alq *alq)
1279405072aSJeff Roberson {
1289405072aSJeff Roberson 	int error;
1299405072aSJeff Roberson 
1309405072aSJeff Roberson 	error = 0;
1319405072aSJeff Roberson 
1329405072aSJeff Roberson 	ALD_LOCK();
1339405072aSJeff Roberson 	if (ald_shutingdown) {
1349405072aSJeff Roberson 		error = EBUSY;
1359405072aSJeff Roberson 		goto done;
1369405072aSJeff Roberson 	}
1379405072aSJeff Roberson 	LIST_INSERT_HEAD(&ald_queues, alq, aq_link);
1389405072aSJeff Roberson done:
1399405072aSJeff Roberson 	ALD_UNLOCK();
1409405072aSJeff Roberson 	return (error);
1419405072aSJeff Roberson }
1429405072aSJeff Roberson 
1439405072aSJeff Roberson /*
1449405072aSJeff Roberson  * Remove a queue from the global list unless we're shutting down.  If so,
1459405072aSJeff Roberson  * the ald will take care of cleaning up it's resources.
1469405072aSJeff Roberson  */
1479405072aSJeff Roberson static int
ald_rem(struct alq * alq)1489405072aSJeff Roberson ald_rem(struct alq *alq)
1499405072aSJeff Roberson {
1509405072aSJeff Roberson 	int error;
1519405072aSJeff Roberson 
1529405072aSJeff Roberson 	error = 0;
1539405072aSJeff Roberson 
1549405072aSJeff Roberson 	ALD_LOCK();
1559405072aSJeff Roberson 	if (ald_shutingdown) {
1569405072aSJeff Roberson 		error = EBUSY;
1579405072aSJeff Roberson 		goto done;
1589405072aSJeff Roberson 	}
1599405072aSJeff Roberson 	LIST_REMOVE(alq, aq_link);
1609405072aSJeff Roberson done:
1619405072aSJeff Roberson 	ALD_UNLOCK();
1629405072aSJeff Roberson 	return (error);
1639405072aSJeff Roberson }
1649405072aSJeff Roberson 
1659405072aSJeff Roberson /*
1669405072aSJeff Roberson  * Put a queue on the active list.  This will schedule it for writing.
1679405072aSJeff Roberson  */
1689405072aSJeff Roberson static void
ald_activate(struct alq * alq)1699405072aSJeff Roberson ald_activate(struct alq *alq)
1709405072aSJeff Roberson {
1719405072aSJeff Roberson 	LIST_INSERT_HEAD(&ald_active, alq, aq_act);
1729405072aSJeff Roberson 	wakeup(&ald_active);
1739405072aSJeff Roberson }
1749405072aSJeff Roberson 
1759405072aSJeff Roberson static void
ald_deactivate(struct alq * alq)1769405072aSJeff Roberson ald_deactivate(struct alq *alq)
1779405072aSJeff Roberson {
1789405072aSJeff Roberson 	LIST_REMOVE(alq, aq_act);
1799405072aSJeff Roberson 	alq->aq_flags &= ~AQ_ACTIVE;
1809405072aSJeff Roberson }
1819405072aSJeff Roberson 
1829405072aSJeff Roberson static void
ald_startup(void * unused)1839405072aSJeff Roberson ald_startup(void *unused)
1849405072aSJeff Roberson {
1859405072aSJeff Roberson 	mtx_init(&ald_mtx, "ALDmtx", NULL, MTX_DEF|MTX_QUIET);
1869405072aSJeff Roberson 	LIST_INIT(&ald_queues);
1879405072aSJeff Roberson 	LIST_INIT(&ald_active);
1889405072aSJeff Roberson }
1899405072aSJeff Roberson 
1909405072aSJeff Roberson static void
ald_daemon(void)1919405072aSJeff Roberson ald_daemon(void)
1929405072aSJeff Roberson {
1939405072aSJeff Roberson 	int needwakeup;
1949405072aSJeff Roberson 	struct alq *alq;
1959405072aSJeff Roberson 
196a414302fSJeff Roberson 	ald_thread = FIRST_THREAD_IN_PROC(ald_proc);
197a414302fSJeff Roberson 
1987639c9beSLawrence Stewart 	alq_eventhandler_tag = EVENTHANDLER_REGISTER(shutdown_pre_sync,
1997639c9beSLawrence Stewart 	    ald_shutdown, NULL, SHUTDOWN_PRI_FIRST);
2009405072aSJeff Roberson 
2019405072aSJeff Roberson 	ALD_LOCK();
2029405072aSJeff Roberson 
2039405072aSJeff Roberson 	for (;;) {
204d28f42f9SLawrence Stewart 		while ((alq = LIST_FIRST(&ald_active)) == NULL &&
205d28f42f9SLawrence Stewart 		    !ald_shutingdown)
2069ffad7a9SLawrence Stewart 			mtx_sleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0);
2079405072aSJeff Roberson 
208d28f42f9SLawrence Stewart 		/* Don't shutdown until all active ALQs are flushed. */
209d28f42f9SLawrence Stewart 		if (ald_shutingdown && alq == NULL) {
210d28f42f9SLawrence Stewart 			ALD_UNLOCK();
211d28f42f9SLawrence Stewart 			break;
212d28f42f9SLawrence Stewart 		}
213d28f42f9SLawrence Stewart 
2149405072aSJeff Roberson 		ALQ_LOCK(alq);
2159405072aSJeff Roberson 		ald_deactivate(alq);
2169405072aSJeff Roberson 		ALD_UNLOCK();
2179405072aSJeff Roberson 		needwakeup = alq_doio(alq);
2189405072aSJeff Roberson 		ALQ_UNLOCK(alq);
2199405072aSJeff Roberson 		if (needwakeup)
2207d11e744SLawrence Stewart 			wakeup_one(alq);
2219405072aSJeff Roberson 		ALD_LOCK();
2229405072aSJeff Roberson 	}
223d28f42f9SLawrence Stewart 
224d28f42f9SLawrence Stewart 	kproc_exit(0);
2259405072aSJeff Roberson }
2269405072aSJeff Roberson 
2279405072aSJeff Roberson static void
ald_shutdown(void * arg,int howto)2289405072aSJeff Roberson ald_shutdown(void *arg, int howto)
2299405072aSJeff Roberson {
2309405072aSJeff Roberson 	struct alq *alq;
2319405072aSJeff Roberson 
232*d79a9edbSMitchell Horne 	if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED())
233*d79a9edbSMitchell Horne 		return;
234*d79a9edbSMitchell Horne 
2359405072aSJeff Roberson 	ALD_LOCK();
236d28f42f9SLawrence Stewart 
237d28f42f9SLawrence Stewart 	/* Ensure no new queues can be created. */
2389405072aSJeff Roberson 	ald_shutingdown = 1;
2399405072aSJeff Roberson 
240d28f42f9SLawrence Stewart 	/* Shutdown all ALQs prior to terminating the ald_daemon. */
2419405072aSJeff Roberson 	while ((alq = LIST_FIRST(&ald_queues)) != NULL) {
2429405072aSJeff Roberson 		LIST_REMOVE(alq, aq_link);
2439405072aSJeff Roberson 		ALD_UNLOCK();
2449405072aSJeff Roberson 		alq_shutdown(alq);
2459405072aSJeff Roberson 		ALD_LOCK();
2469405072aSJeff Roberson 	}
247d28f42f9SLawrence Stewart 
248d28f42f9SLawrence Stewart 	/* At this point, all ALQs are flushed and shutdown. */
249d28f42f9SLawrence Stewart 
250d28f42f9SLawrence Stewart 	/*
251d28f42f9SLawrence Stewart 	 * Wake ald_daemon so that it exits. It won't be able to do
2529ffad7a9SLawrence Stewart 	 * anything until we mtx_sleep because we hold the ald_mtx.
253d28f42f9SLawrence Stewart 	 */
254d28f42f9SLawrence Stewart 	wakeup(&ald_active);
255d28f42f9SLawrence Stewart 
256d28f42f9SLawrence Stewart 	/* Wait for ald_daemon to exit. */
2579ffad7a9SLawrence Stewart 	mtx_sleep(ald_proc, &ald_mtx, PWAIT, "aldslp", 0);
258d28f42f9SLawrence Stewart 
2599405072aSJeff Roberson 	ALD_UNLOCK();
2609405072aSJeff Roberson }
2619405072aSJeff Roberson 
2629405072aSJeff Roberson static void
alq_shutdown(struct alq * alq)2639405072aSJeff Roberson alq_shutdown(struct alq *alq)
2649405072aSJeff Roberson {
2659405072aSJeff Roberson 	ALQ_LOCK(alq);
2669405072aSJeff Roberson 
2679405072aSJeff Roberson 	/* Stop any new writers. */
2689405072aSJeff Roberson 	alq->aq_flags |= AQ_SHUTDOWN;
2699405072aSJeff Roberson 
2707d11e744SLawrence Stewart 	/*
2717d11e744SLawrence Stewart 	 * If the ALQ isn't active but has unwritten data (possible if
2727d11e744SLawrence Stewart 	 * the ALQ_NOACTIVATE flag has been used), explicitly activate the
2737d11e744SLawrence Stewart 	 * ALQ here so that the pending data gets flushed by the ald_daemon.
2747d11e744SLawrence Stewart 	 */
2757d11e744SLawrence Stewart 	if (!(alq->aq_flags & AQ_ACTIVE) && HAS_PENDING_DATA(alq)) {
2767d11e744SLawrence Stewart 		alq->aq_flags |= AQ_ACTIVE;
2777d11e744SLawrence Stewart 		ALQ_UNLOCK(alq);
2787d11e744SLawrence Stewart 		ALD_LOCK();
2797d11e744SLawrence Stewart 		ald_activate(alq);
2807d11e744SLawrence Stewart 		ALD_UNLOCK();
2817d11e744SLawrence Stewart 		ALQ_LOCK(alq);
2827d11e744SLawrence Stewart 	}
2837d11e744SLawrence Stewart 
2849405072aSJeff Roberson 	/* Drain IO */
28597c11ef2SLawrence Stewart 	while (alq->aq_flags & AQ_ACTIVE) {
2869405072aSJeff Roberson 		alq->aq_flags |= AQ_WANTED;
287bff2d4d5SRoman Divacky 		msleep_spin(alq, &alq->aq_mtx, "aldclose", 0);
2889405072aSJeff Roberson 	}
2899405072aSJeff Roberson 	ALQ_UNLOCK(alq);
2909405072aSJeff Roberson 
291a414302fSJeff Roberson 	vn_close(alq->aq_vp, FWRITE, alq->aq_cred,
2929e9256e2SJeff Roberson 	    curthread);
2939e9256e2SJeff Roberson 	crfree(alq->aq_cred);
2949405072aSJeff Roberson }
2959405072aSJeff Roberson 
296c0ea37a8SLawrence Stewart void
alq_destroy(struct alq * alq)297c0ea37a8SLawrence Stewart alq_destroy(struct alq *alq)
298c0ea37a8SLawrence Stewart {
299c0ea37a8SLawrence Stewart 	/* Drain all pending IO. */
300c0ea37a8SLawrence Stewart 	alq_shutdown(alq);
301c0ea37a8SLawrence Stewart 
302c0ea37a8SLawrence Stewart 	mtx_destroy(&alq->aq_mtx);
303c0ea37a8SLawrence Stewart 	free(alq->aq_entbuf, M_ALD);
304c0ea37a8SLawrence Stewart 	free(alq, M_ALD);
305c0ea37a8SLawrence Stewart }
306c0ea37a8SLawrence Stewart 
3079405072aSJeff Roberson /*
3089405072aSJeff Roberson  * Flush all pending data to disk.  This operation will block.
3099405072aSJeff Roberson  */
3109405072aSJeff Roberson static int
alq_doio(struct alq * alq)3119405072aSJeff Roberson alq_doio(struct alq *alq)
3129405072aSJeff Roberson {
3139405072aSJeff Roberson 	struct thread *td;
3149405072aSJeff Roberson 	struct mount *mp;
3159405072aSJeff Roberson 	struct vnode *vp;
3169405072aSJeff Roberson 	struct uio auio;
3179405072aSJeff Roberson 	struct iovec aiov[2];
3189405072aSJeff Roberson 	int totlen;
3199405072aSJeff Roberson 	int iov;
3207d11e744SLawrence Stewart 	int wrapearly;
3217d11e744SLawrence Stewart 
3227d11e744SLawrence Stewart 	KASSERT((HAS_PENDING_DATA(alq)), ("%s: queue empty!", __func__));
3239405072aSJeff Roberson 
3249405072aSJeff Roberson 	vp = alq->aq_vp;
3259405072aSJeff Roberson 	td = curthread;
3269405072aSJeff Roberson 	totlen = 0;
3277d11e744SLawrence Stewart 	iov = 1;
3287d11e744SLawrence Stewart 	wrapearly = alq->aq_wrapearly;
3299405072aSJeff Roberson 
3309405072aSJeff Roberson 	bzero(&aiov, sizeof(aiov));
3319405072aSJeff Roberson 	bzero(&auio, sizeof(auio));
3329405072aSJeff Roberson 
3337d11e744SLawrence Stewart 	/* Start the write from the location of our buffer tail pointer. */
3347d11e744SLawrence Stewart 	aiov[0].iov_base = alq->aq_entbuf + alq->aq_writetail;
3357d11e744SLawrence Stewart 
3367d11e744SLawrence Stewart 	if (alq->aq_writetail < alq->aq_writehead) {
3377d11e744SLawrence Stewart 		/* Buffer not wrapped. */
3387d11e744SLawrence Stewart 		totlen = aiov[0].iov_len = alq->aq_writehead - alq->aq_writetail;
3397d11e744SLawrence Stewart 	} else if (alq->aq_writehead == 0) {
3407d11e744SLawrence Stewart 		/* Buffer not wrapped (special case to avoid an empty iov). */
3417d11e744SLawrence Stewart 		totlen = aiov[0].iov_len = alq->aq_buflen - alq->aq_writetail -
3427d11e744SLawrence Stewart 		    wrapearly;
3437d11e744SLawrence Stewart 	} else {
3447d11e744SLawrence Stewart 		/*
3457d11e744SLawrence Stewart 		 * Buffer wrapped, requires 2 aiov entries:
3467d11e744SLawrence Stewart 		 * - first is from writetail to end of buffer
3477d11e744SLawrence Stewart 		 * - second is from start of buffer to writehead
3487d11e744SLawrence Stewart 		 */
3497d11e744SLawrence Stewart 		aiov[0].iov_len = alq->aq_buflen - alq->aq_writetail -
3507d11e744SLawrence Stewart 		    wrapearly;
3519405072aSJeff Roberson 		iov++;
3527d11e744SLawrence Stewart 		aiov[1].iov_base = alq->aq_entbuf;
3537d11e744SLawrence Stewart 		aiov[1].iov_len =  alq->aq_writehead;
3547d11e744SLawrence Stewart 		totlen = aiov[0].iov_len + aiov[1].iov_len;
3557d11e744SLawrence Stewart 	}
3569405072aSJeff Roberson 
3579405072aSJeff Roberson 	alq->aq_flags |= AQ_FLUSHING;
3589405072aSJeff Roberson 	ALQ_UNLOCK(alq);
3599405072aSJeff Roberson 
3609405072aSJeff Roberson 	auio.uio_iov = &aiov[0];
3619405072aSJeff Roberson 	auio.uio_offset = 0;
3629405072aSJeff Roberson 	auio.uio_segflg = UIO_SYSSPACE;
3639405072aSJeff Roberson 	auio.uio_rw = UIO_WRITE;
3647d11e744SLawrence Stewart 	auio.uio_iovcnt = iov;
3659405072aSJeff Roberson 	auio.uio_resid = totlen;
3669405072aSJeff Roberson 	auio.uio_td = td;
3679405072aSJeff Roberson 
3689405072aSJeff Roberson 	/*
3699405072aSJeff Roberson 	 * Do all of the junk required to write now.
3709405072aSJeff Roberson 	 */
3719405072aSJeff Roberson 	vn_start_write(vp, &mp, V_WAIT);
372cb05b60aSAttilio Rao 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
37367536f03SRobert Watson 	/*
37467536f03SRobert Watson 	 * XXX: VOP_WRITE error checks are ignored.
37567536f03SRobert Watson 	 */
37667536f03SRobert Watson #ifdef MAC
37730d239bcSRobert Watson 	if (mac_vnode_check_write(alq->aq_cred, NOCRED, vp) == 0)
37867536f03SRobert Watson #endif
3799e9256e2SJeff Roberson 		VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, alq->aq_cred);
380b249ce48SMateusz Guzik 	VOP_UNLOCK(vp);
3819405072aSJeff Roberson 	vn_finished_write(mp);
3829405072aSJeff Roberson 
3839405072aSJeff Roberson 	ALQ_LOCK(alq);
3849405072aSJeff Roberson 	alq->aq_flags &= ~AQ_FLUSHING;
3859405072aSJeff Roberson 
3867d11e744SLawrence Stewart 	/* Adjust writetail as required, taking into account wrapping. */
3877d11e744SLawrence Stewart 	alq->aq_writetail = (alq->aq_writetail + totlen + wrapearly) %
3887d11e744SLawrence Stewart 	    alq->aq_buflen;
3897d11e744SLawrence Stewart 	alq->aq_freebytes += totlen + wrapearly;
3907d11e744SLawrence Stewart 
3917d11e744SLawrence Stewart 	/*
3927d11e744SLawrence Stewart 	 * If we just flushed part of the buffer which wrapped, reset the
3937d11e744SLawrence Stewart 	 * wrapearly indicator.
3947d11e744SLawrence Stewart 	 */
3957d11e744SLawrence Stewart 	if (wrapearly)
3967d11e744SLawrence Stewart 		alq->aq_wrapearly = 0;
3977d11e744SLawrence Stewart 
3987d11e744SLawrence Stewart 	/*
3997d11e744SLawrence Stewart 	 * If we just flushed the buffer completely, reset indexes to 0 to
4007d11e744SLawrence Stewart 	 * minimise buffer wraps.
4017d11e744SLawrence Stewart 	 * This is also required to ensure alq_getn() can't wedge itself.
4027d11e744SLawrence Stewart 	 */
4037d11e744SLawrence Stewart 	if (!HAS_PENDING_DATA(alq))
4047d11e744SLawrence Stewart 		alq->aq_writehead = alq->aq_writetail = 0;
4057d11e744SLawrence Stewart 
4067d11e744SLawrence Stewart 	KASSERT((alq->aq_writetail >= 0 && alq->aq_writetail < alq->aq_buflen),
4077d11e744SLawrence Stewart 	    ("%s: aq_writetail < 0 || aq_writetail >= aq_buflen", __func__));
4089405072aSJeff Roberson 
4099405072aSJeff Roberson 	if (alq->aq_flags & AQ_WANTED) {
4109405072aSJeff Roberson 		alq->aq_flags &= ~AQ_WANTED;
4119405072aSJeff Roberson 		return (1);
4129405072aSJeff Roberson 	}
4139405072aSJeff Roberson 
4149405072aSJeff Roberson 	return(0);
4159405072aSJeff Roberson }
4169405072aSJeff Roberson 
4179405072aSJeff Roberson static struct kproc_desc ald_kp = {
4189405072aSJeff Roberson         "ALQ Daemon",
4199405072aSJeff Roberson         ald_daemon,
420a414302fSJeff Roberson         &ald_proc
4219405072aSJeff Roberson };
4229405072aSJeff Roberson 
423237fdd78SRobert Watson SYSINIT(aldthread, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &ald_kp);
424237fdd78SRobert Watson SYSINIT(ald, SI_SUB_LOCK, SI_ORDER_ANY, ald_startup, NULL);
4259405072aSJeff Roberson 
4269405072aSJeff Roberson /* User visible queue functions */
4279405072aSJeff Roberson 
4289405072aSJeff Roberson /*
4299405072aSJeff Roberson  * Create the queue data structure, allocate the buffer, and open the file.
4309405072aSJeff Roberson  */
4317d11e744SLawrence Stewart 
4329405072aSJeff Roberson int
alq_open_flags(struct alq ** alqp,const char * file,struct ucred * cred,int cmode,int size,int flags)4337d11e744SLawrence Stewart alq_open_flags(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
4347d11e744SLawrence Stewart     int size, int flags)
4359405072aSJeff Roberson {
4369405072aSJeff Roberson 	struct nameidata nd;
4379405072aSJeff Roberson 	struct alq *alq;
4387d11e744SLawrence Stewart 	int oflags;
4399405072aSJeff Roberson 	int error;
4407d11e744SLawrence Stewart 
4417d11e744SLawrence Stewart 	KASSERT((size > 0), ("%s: size <= 0", __func__));
4429405072aSJeff Roberson 
4439405072aSJeff Roberson 	*alqp = NULL;
4449405072aSJeff Roberson 
4457e1d3eefSMateusz Guzik 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, file);
4467d11e744SLawrence Stewart 	oflags = FWRITE | O_NOFOLLOW | O_CREAT;
4479405072aSJeff Roberson 
4487d11e744SLawrence Stewart 	error = vn_open_cred(&nd, &oflags, cmode, 0, cred, NULL);
4499405072aSJeff Roberson 	if (error)
4509405072aSJeff Roberson 		return (error);
4519405072aSJeff Roberson 
452bb92cd7bSMateusz Guzik 	NDFREE_PNBUF(&nd);
4539405072aSJeff Roberson 	/* We just unlock so we hold a reference */
454b249ce48SMateusz Guzik 	VOP_UNLOCK(nd.ni_vp);
4559405072aSJeff Roberson 
456a163d034SWarner Losh 	alq = malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO);
4579405072aSJeff Roberson 	alq->aq_vp = nd.ni_vp;
4584b090e41SRobert Watson 	alq->aq_cred = crhold(cred);
4599405072aSJeff Roberson 
4609405072aSJeff Roberson 	mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET);
4619405072aSJeff Roberson 
4627d11e744SLawrence Stewart 	alq->aq_buflen = size;
4637d11e744SLawrence Stewart 	alq->aq_entmax = 0;
4647d11e744SLawrence Stewart 	alq->aq_entlen = 0;
4659405072aSJeff Roberson 
4667d11e744SLawrence Stewart 	alq->aq_freebytes = alq->aq_buflen;
4677d11e744SLawrence Stewart 	alq->aq_entbuf = malloc(alq->aq_buflen, M_ALD, M_WAITOK|M_ZERO);
4687d11e744SLawrence Stewart 	alq->aq_writehead = alq->aq_writetail = 0;
4697d11e744SLawrence Stewart 	if (flags & ALQ_ORDERED)
4707d11e744SLawrence Stewart 		alq->aq_flags |= AQ_ORDERED;
4719405072aSJeff Roberson 
472c0ea37a8SLawrence Stewart 	if ((error = ald_add(alq)) != 0) {
473c0ea37a8SLawrence Stewart 		alq_destroy(alq);
4749405072aSJeff Roberson 		return (error);
475c0ea37a8SLawrence Stewart 	}
476c0ea37a8SLawrence Stewart 
4779405072aSJeff Roberson 	*alqp = alq;
4789405072aSJeff Roberson 
4799405072aSJeff Roberson 	return (0);
4809405072aSJeff Roberson }
4819405072aSJeff Roberson 
4827d11e744SLawrence Stewart int
alq_open(struct alq ** alqp,const char * file,struct ucred * cred,int cmode,int size,int count)4837d11e744SLawrence Stewart alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
4847d11e744SLawrence Stewart     int size, int count)
4857d11e744SLawrence Stewart {
4867d11e744SLawrence Stewart 	int ret;
4877d11e744SLawrence Stewart 
4887d11e744SLawrence Stewart 	KASSERT((count >= 0), ("%s: count < 0", __func__));
4897d11e744SLawrence Stewart 
4907d11e744SLawrence Stewart 	if (count > 0) {
4916b57eff4SDmitry Chagin 		if ((ret = alq_open_flags(alqp, file, cred, cmode,
4926b57eff4SDmitry Chagin 		    size*count, 0)) == 0) {
4937d11e744SLawrence Stewart 			(*alqp)->aq_flags |= AQ_LEGACY;
4947d11e744SLawrence Stewart 			(*alqp)->aq_entmax = count;
4957d11e744SLawrence Stewart 			(*alqp)->aq_entlen = size;
4966b57eff4SDmitry Chagin 		}
4977d11e744SLawrence Stewart 	} else
4987d11e744SLawrence Stewart 		ret = alq_open_flags(alqp, file, cred, cmode, size, 0);
4997d11e744SLawrence Stewart 
5007d11e744SLawrence Stewart 	return (ret);
5017d11e744SLawrence Stewart }
5027d11e744SLawrence Stewart 
5039405072aSJeff Roberson /*
5049405072aSJeff Roberson  * Copy a new entry into the queue.  If the operation would block either
5059405072aSJeff Roberson  * wait or return an error depending on the value of waitok.
5069405072aSJeff Roberson  */
5079405072aSJeff Roberson int
alq_writen(struct alq * alq,void * data,int len,int flags)5087d11e744SLawrence Stewart alq_writen(struct alq *alq, void *data, int len, int flags)
5099405072aSJeff Roberson {
5107d11e744SLawrence Stewart 	int activate, copy, ret;
5117d11e744SLawrence Stewart 	void *waitchan;
5129405072aSJeff Roberson 
5137d11e744SLawrence Stewart 	KASSERT((len > 0 && len <= alq->aq_buflen),
5147d11e744SLawrence Stewart 	    ("%s: len <= 0 || len > aq_buflen", __func__));
5159405072aSJeff Roberson 
5167d11e744SLawrence Stewart 	activate = ret = 0;
5177d11e744SLawrence Stewart 	copy = len;
5187d11e744SLawrence Stewart 	waitchan = NULL;
5199405072aSJeff Roberson 
5209405072aSJeff Roberson 	ALQ_LOCK(alq);
5219405072aSJeff Roberson 
5227d11e744SLawrence Stewart 	/*
5237d11e744SLawrence Stewart 	 * Fail to perform the write and return EWOULDBLOCK if:
5247d11e744SLawrence Stewart 	 * - The message is larger than our underlying buffer.
5257d11e744SLawrence Stewart 	 * - The ALQ is being shutdown.
5267d11e744SLawrence Stewart 	 * - There is insufficient free space in our underlying buffer
5277d11e744SLawrence Stewart 	 *   to accept the message and the user can't wait for space.
5287d11e744SLawrence Stewart 	 * - There is insufficient free space in our underlying buffer
5297d11e744SLawrence Stewart 	 *   to accept the message and the alq is inactive due to prior
5307d11e744SLawrence Stewart 	 *   use of the ALQ_NOACTIVATE flag (which would lead to deadlock).
5317d11e744SLawrence Stewart 	 */
5327d11e744SLawrence Stewart 	if (len > alq->aq_buflen ||
5337d11e744SLawrence Stewart 	    alq->aq_flags & AQ_SHUTDOWN ||
5347d11e744SLawrence Stewart 	    (((flags & ALQ_NOWAIT) || (!(alq->aq_flags & AQ_ACTIVE) &&
5357d11e744SLawrence Stewart 	    HAS_PENDING_DATA(alq))) && alq->aq_freebytes < len)) {
5369405072aSJeff Roberson 		ALQ_UNLOCK(alq);
5377d11e744SLawrence Stewart 		return (EWOULDBLOCK);
5389405072aSJeff Roberson 	}
5399405072aSJeff Roberson 
5407d11e744SLawrence Stewart 	/*
5417d11e744SLawrence Stewart 	 * If we want ordered writes and there is already at least one thread
5427d11e744SLawrence Stewart 	 * waiting for resources to become available, sleep until we're woken.
5437d11e744SLawrence Stewart 	 */
5447d11e744SLawrence Stewart 	if (alq->aq_flags & AQ_ORDERED && alq->aq_waiters > 0) {
5457d11e744SLawrence Stewart 		KASSERT(!(flags & ALQ_NOWAIT),
5467d11e744SLawrence Stewart 		    ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__));
5477d11e744SLawrence Stewart 		alq->aq_waiters++;
5487d11e744SLawrence Stewart 		msleep_spin(&alq->aq_waiters, &alq->aq_mtx, "alqwnord", 0);
5497d11e744SLawrence Stewart 		alq->aq_waiters--;
5507d11e744SLawrence Stewart 	}
5519405072aSJeff Roberson 
5527d11e744SLawrence Stewart 	/*
5537d11e744SLawrence Stewart 	 * (ALQ_WAITOK && aq_freebytes < len) or aq_freebytes >= len, either
5547d11e744SLawrence Stewart 	 * enter while loop and sleep until we have enough free bytes (former)
5557d11e744SLawrence Stewart 	 * or skip (latter). If AQ_ORDERED is set, only 1 thread at a time will
5567d11e744SLawrence Stewart 	 * be in this loop. Otherwise, multiple threads may be sleeping here
5577d11e744SLawrence Stewart 	 * competing for ALQ resources.
5587d11e744SLawrence Stewart 	 */
5597d11e744SLawrence Stewart 	while (alq->aq_freebytes < len && !(alq->aq_flags & AQ_SHUTDOWN)) {
5607d11e744SLawrence Stewart 		KASSERT(!(flags & ALQ_NOWAIT),
5617d11e744SLawrence Stewart 		    ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__));
5627d11e744SLawrence Stewart 		alq->aq_flags |= AQ_WANTED;
5637d11e744SLawrence Stewart 		alq->aq_waiters++;
5647d11e744SLawrence Stewart 		if (waitchan)
5657d11e744SLawrence Stewart 			wakeup(waitchan);
5667d11e744SLawrence Stewart 		msleep_spin(alq, &alq->aq_mtx, "alqwnres", 0);
5677d11e744SLawrence Stewart 		alq->aq_waiters--;
5689405072aSJeff Roberson 
5697d11e744SLawrence Stewart 		/*
5707d11e744SLawrence Stewart 		 * If we're the first thread to wake after an AQ_WANTED wakeup
5717d11e744SLawrence Stewart 		 * but there isn't enough free space for us, we're going to loop
5727d11e744SLawrence Stewart 		 * and sleep again. If there are other threads waiting in this
5737d11e744SLawrence Stewart 		 * loop, schedule a wakeup so that they can see if the space
5747d11e744SLawrence Stewart 		 * they require is available.
5757d11e744SLawrence Stewart 		 */
5767d11e744SLawrence Stewart 		if (alq->aq_waiters > 0 && !(alq->aq_flags & AQ_ORDERED) &&
5777d11e744SLawrence Stewart 		    alq->aq_freebytes < len && !(alq->aq_flags & AQ_WANTED))
5787d11e744SLawrence Stewart 			waitchan = alq;
5797d11e744SLawrence Stewart 		else
5807d11e744SLawrence Stewart 			waitchan = NULL;
5817d11e744SLawrence Stewart 	}
5829405072aSJeff Roberson 
5837d11e744SLawrence Stewart 	/*
5847d11e744SLawrence Stewart 	 * If there are waiters, we need to signal the waiting threads after we
5857d11e744SLawrence Stewart 	 * complete our work. The alq ptr is used as a wait channel for threads
5867d11e744SLawrence Stewart 	 * requiring resources to be freed up. In the AQ_ORDERED case, threads
5877d11e744SLawrence Stewart 	 * are not allowed to concurrently compete for resources in the above
5887d11e744SLawrence Stewart 	 * while loop, so we use a different wait channel in this case.
5897d11e744SLawrence Stewart 	 */
5907d11e744SLawrence Stewart 	if (alq->aq_waiters > 0) {
5917d11e744SLawrence Stewart 		if (alq->aq_flags & AQ_ORDERED)
5927d11e744SLawrence Stewart 			waitchan = &alq->aq_waiters;
5937d11e744SLawrence Stewart 		else
5947d11e744SLawrence Stewart 			waitchan = alq;
5957d11e744SLawrence Stewart 	} else
5967d11e744SLawrence Stewart 		waitchan = NULL;
5977d11e744SLawrence Stewart 
5987d11e744SLawrence Stewart 	/* Bail if we're shutting down. */
5997d11e744SLawrence Stewart 	if (alq->aq_flags & AQ_SHUTDOWN) {
6007d11e744SLawrence Stewart 		ret = EWOULDBLOCK;
6017d11e744SLawrence Stewart 		goto unlock;
6027d11e744SLawrence Stewart 	}
6037d11e744SLawrence Stewart 
6047d11e744SLawrence Stewart 	/*
6057d11e744SLawrence Stewart 	 * If we need to wrap the buffer to accommodate the write,
6067d11e744SLawrence Stewart 	 * we'll need 2 calls to bcopy.
6077d11e744SLawrence Stewart 	 */
6087d11e744SLawrence Stewart 	if ((alq->aq_buflen - alq->aq_writehead) < len)
6097d11e744SLawrence Stewart 		copy = alq->aq_buflen - alq->aq_writehead;
6107d11e744SLawrence Stewart 
6117d11e744SLawrence Stewart 	/* Copy message (or part thereof if wrap required) to the buffer. */
6127d11e744SLawrence Stewart 	bcopy(data, alq->aq_entbuf + alq->aq_writehead, copy);
6137d11e744SLawrence Stewart 	alq->aq_writehead += copy;
6147d11e744SLawrence Stewart 
6157d11e744SLawrence Stewart 	if (alq->aq_writehead >= alq->aq_buflen) {
6167d11e744SLawrence Stewart 		KASSERT((alq->aq_writehead == alq->aq_buflen),
6177d11e744SLawrence Stewart 		    ("%s: alq->aq_writehead (%d) > alq->aq_buflen (%d)",
6187d11e744SLawrence Stewart 		    __func__,
6197d11e744SLawrence Stewart 		    alq->aq_writehead,
6207d11e744SLawrence Stewart 		    alq->aq_buflen));
6217d11e744SLawrence Stewart 		alq->aq_writehead = 0;
6227d11e744SLawrence Stewart 	}
6237d11e744SLawrence Stewart 
6247d11e744SLawrence Stewart 	if (copy != len) {
6257d11e744SLawrence Stewart 		/*
6267d11e744SLawrence Stewart 		 * Wrap the buffer by copying the remainder of our message
6277d11e744SLawrence Stewart 		 * to the start of the buffer and resetting aq_writehead.
6287d11e744SLawrence Stewart 		 */
6297d11e744SLawrence Stewart 		bcopy(((uint8_t *)data)+copy, alq->aq_entbuf, len - copy);
6307d11e744SLawrence Stewart 		alq->aq_writehead = len - copy;
6317d11e744SLawrence Stewart 	}
6327d11e744SLawrence Stewart 
6337d11e744SLawrence Stewart 	KASSERT((alq->aq_writehead >= 0 && alq->aq_writehead < alq->aq_buflen),
6347d11e744SLawrence Stewart 	    ("%s: aq_writehead < 0 || aq_writehead >= aq_buflen", __func__));
6357d11e744SLawrence Stewart 
6367d11e744SLawrence Stewart 	alq->aq_freebytes -= len;
6377d11e744SLawrence Stewart 
6387d11e744SLawrence Stewart 	if (!(alq->aq_flags & AQ_ACTIVE) && !(flags & ALQ_NOACTIVATE)) {
6399405072aSJeff Roberson 		alq->aq_flags |= AQ_ACTIVE;
6409405072aSJeff Roberson 		activate = 1;
6417d11e744SLawrence Stewart 	}
6429405072aSJeff Roberson 
6437d11e744SLawrence Stewart 	KASSERT((HAS_PENDING_DATA(alq)), ("%s: queue empty!", __func__));
6447d11e744SLawrence Stewart 
6457d11e744SLawrence Stewart unlock:
6469405072aSJeff Roberson 	ALQ_UNLOCK(alq);
6477d11e744SLawrence Stewart 
6489405072aSJeff Roberson 	if (activate) {
6499405072aSJeff Roberson 		ALD_LOCK();
6509405072aSJeff Roberson 		ald_activate(alq);
6519405072aSJeff Roberson 		ALD_UNLOCK();
6529405072aSJeff Roberson 	}
6537d11e744SLawrence Stewart 
6547d11e744SLawrence Stewart 	/* NB: We rely on wakeup_one waking threads in a FIFO manner. */
6557d11e744SLawrence Stewart 	if (waitchan != NULL)
6567d11e744SLawrence Stewart 		wakeup_one(waitchan);
6577d11e744SLawrence Stewart 
6587d11e744SLawrence Stewart 	return (ret);
6597d11e744SLawrence Stewart }
6607d11e744SLawrence Stewart 
6617d11e744SLawrence Stewart int
alq_write(struct alq * alq,void * data,int flags)6627d11e744SLawrence Stewart alq_write(struct alq *alq, void *data, int flags)
6637d11e744SLawrence Stewart {
6647d11e744SLawrence Stewart 	/* Should only be called in fixed length message (legacy) mode. */
6657d11e744SLawrence Stewart 	KASSERT((alq->aq_flags & AQ_LEGACY),
6667d11e744SLawrence Stewart 	    ("%s: fixed length write on variable length queue", __func__));
6677d11e744SLawrence Stewart 	return (alq_writen(alq, data, alq->aq_entlen, flags));
6687d11e744SLawrence Stewart }
6697d11e744SLawrence Stewart 
6707d11e744SLawrence Stewart /*
6717d11e744SLawrence Stewart  * Retrieve a pointer for the ALQ to write directly into, avoiding bcopy.
6727d11e744SLawrence Stewart  */
6737d11e744SLawrence Stewart struct ale *
alq_getn(struct alq * alq,int len,int flags)6747d11e744SLawrence Stewart alq_getn(struct alq *alq, int len, int flags)
6757d11e744SLawrence Stewart {
6767d11e744SLawrence Stewart 	int contigbytes;
6777d11e744SLawrence Stewart 	void *waitchan;
6787d11e744SLawrence Stewart 
6797d11e744SLawrence Stewart 	KASSERT((len > 0 && len <= alq->aq_buflen),
6807d11e744SLawrence Stewart 	    ("%s: len <= 0 || len > alq->aq_buflen", __func__));
6817d11e744SLawrence Stewart 
6827d11e744SLawrence Stewart 	waitchan = NULL;
6837d11e744SLawrence Stewart 
6847d11e744SLawrence Stewart 	ALQ_LOCK(alq);
6857d11e744SLawrence Stewart 
6867d11e744SLawrence Stewart 	/*
6877d11e744SLawrence Stewart 	 * Determine the number of free contiguous bytes.
6887d11e744SLawrence Stewart 	 * We ensure elsewhere that if aq_writehead == aq_writetail because
6897d11e744SLawrence Stewart 	 * the buffer is empty, they will both be set to 0 and therefore
6907d11e744SLawrence Stewart 	 * aq_freebytes == aq_buflen and is fully contiguous.
6917d11e744SLawrence Stewart 	 * If they are equal and the buffer is not empty, aq_freebytes will
6927d11e744SLawrence Stewart 	 * be 0 indicating the buffer is full.
6937d11e744SLawrence Stewart 	 */
6947d11e744SLawrence Stewart 	if (alq->aq_writehead <= alq->aq_writetail)
6957d11e744SLawrence Stewart 		contigbytes = alq->aq_freebytes;
6967d11e744SLawrence Stewart 	else {
6977d11e744SLawrence Stewart 		contigbytes = alq->aq_buflen - alq->aq_writehead;
6987d11e744SLawrence Stewart 
6997d11e744SLawrence Stewart 		if (contigbytes < len) {
7007d11e744SLawrence Stewart 			/*
7017d11e744SLawrence Stewart 			 * Insufficient space at end of buffer to handle a
7027d11e744SLawrence Stewart 			 * contiguous write. Wrap early if there's space at
7037d11e744SLawrence Stewart 			 * the beginning. This will leave a hole at the end
7047d11e744SLawrence Stewart 			 * of the buffer which we will have to skip over when
7057d11e744SLawrence Stewart 			 * flushing the buffer to disk.
7067d11e744SLawrence Stewart 			 */
7077d11e744SLawrence Stewart 			if (alq->aq_writetail >= len || flags & ALQ_WAITOK) {
7087d11e744SLawrence Stewart 				/* Keep track of # bytes left blank. */
7097d11e744SLawrence Stewart 				alq->aq_wrapearly = contigbytes;
7107d11e744SLawrence Stewart 				/* Do the wrap and adjust counters. */
7117d11e744SLawrence Stewart 				contigbytes = alq->aq_freebytes =
7127d11e744SLawrence Stewart 				    alq->aq_writetail;
7137d11e744SLawrence Stewart 				alq->aq_writehead = 0;
7147d11e744SLawrence Stewart 			}
7157d11e744SLawrence Stewart 		}
7167d11e744SLawrence Stewart 	}
7177d11e744SLawrence Stewart 
7187d11e744SLawrence Stewart 	/*
7197d11e744SLawrence Stewart 	 * Return a NULL ALE if:
7207d11e744SLawrence Stewart 	 * - The message is larger than our underlying buffer.
7217d11e744SLawrence Stewart 	 * - The ALQ is being shutdown.
7227d11e744SLawrence Stewart 	 * - There is insufficient free space in our underlying buffer
7237d11e744SLawrence Stewart 	 *   to accept the message and the user can't wait for space.
7247d11e744SLawrence Stewart 	 * - There is insufficient free space in our underlying buffer
7257d11e744SLawrence Stewart 	 *   to accept the message and the alq is inactive due to prior
7267d11e744SLawrence Stewart 	 *   use of the ALQ_NOACTIVATE flag (which would lead to deadlock).
7277d11e744SLawrence Stewart 	 */
7287d11e744SLawrence Stewart 	if (len > alq->aq_buflen ||
7297d11e744SLawrence Stewart 	    alq->aq_flags & AQ_SHUTDOWN ||
7307d11e744SLawrence Stewart 	    (((flags & ALQ_NOWAIT) || (!(alq->aq_flags & AQ_ACTIVE) &&
7317d11e744SLawrence Stewart 	    HAS_PENDING_DATA(alq))) && contigbytes < len)) {
7327d11e744SLawrence Stewart 		ALQ_UNLOCK(alq);
7337d11e744SLawrence Stewart 		return (NULL);
7347d11e744SLawrence Stewart 	}
7357d11e744SLawrence Stewart 
7367d11e744SLawrence Stewart 	/*
7377d11e744SLawrence Stewart 	 * If we want ordered writes and there is already at least one thread
7387d11e744SLawrence Stewart 	 * waiting for resources to become available, sleep until we're woken.
7397d11e744SLawrence Stewart 	 */
7407d11e744SLawrence Stewart 	if (alq->aq_flags & AQ_ORDERED && alq->aq_waiters > 0) {
7417d11e744SLawrence Stewart 		KASSERT(!(flags & ALQ_NOWAIT),
7427d11e744SLawrence Stewart 		    ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__));
7437d11e744SLawrence Stewart 		alq->aq_waiters++;
7447d11e744SLawrence Stewart 		msleep_spin(&alq->aq_waiters, &alq->aq_mtx, "alqgnord", 0);
7457d11e744SLawrence Stewart 		alq->aq_waiters--;
7467d11e744SLawrence Stewart 	}
7477d11e744SLawrence Stewart 
7487d11e744SLawrence Stewart 	/*
7497d11e744SLawrence Stewart 	 * (ALQ_WAITOK && contigbytes < len) or contigbytes >= len, either enter
7507d11e744SLawrence Stewart 	 * while loop and sleep until we have enough contiguous free bytes
7517d11e744SLawrence Stewart 	 * (former) or skip (latter). If AQ_ORDERED is set, only 1 thread at a
7527d11e744SLawrence Stewart 	 * time will be in this loop. Otherwise, multiple threads may be
7537d11e744SLawrence Stewart 	 * sleeping here competing for ALQ resources.
7547d11e744SLawrence Stewart 	 */
7557d11e744SLawrence Stewart 	while (contigbytes < len && !(alq->aq_flags & AQ_SHUTDOWN)) {
7567d11e744SLawrence Stewart 		KASSERT(!(flags & ALQ_NOWAIT),
7577d11e744SLawrence Stewart 		    ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__));
7587d11e744SLawrence Stewart 		alq->aq_flags |= AQ_WANTED;
7597d11e744SLawrence Stewart 		alq->aq_waiters++;
7607d11e744SLawrence Stewart 		if (waitchan)
7617d11e744SLawrence Stewart 			wakeup(waitchan);
7627d11e744SLawrence Stewart 		msleep_spin(alq, &alq->aq_mtx, "alqgnres", 0);
7637d11e744SLawrence Stewart 		alq->aq_waiters--;
7647d11e744SLawrence Stewart 
7657d11e744SLawrence Stewart 		if (alq->aq_writehead <= alq->aq_writetail)
7667d11e744SLawrence Stewart 			contigbytes = alq->aq_freebytes;
7677d11e744SLawrence Stewart 		else
7687d11e744SLawrence Stewart 			contigbytes = alq->aq_buflen - alq->aq_writehead;
7697d11e744SLawrence Stewart 
7707d11e744SLawrence Stewart 		/*
7717d11e744SLawrence Stewart 		 * If we're the first thread to wake after an AQ_WANTED wakeup
7727d11e744SLawrence Stewart 		 * but there isn't enough free space for us, we're going to loop
7737d11e744SLawrence Stewart 		 * and sleep again. If there are other threads waiting in this
7747d11e744SLawrence Stewart 		 * loop, schedule a wakeup so that they can see if the space
7757d11e744SLawrence Stewart 		 * they require is available.
7767d11e744SLawrence Stewart 		 */
7777d11e744SLawrence Stewart 		if (alq->aq_waiters > 0 && !(alq->aq_flags & AQ_ORDERED) &&
7787d11e744SLawrence Stewart 		    contigbytes < len && !(alq->aq_flags & AQ_WANTED))
7797d11e744SLawrence Stewart 			waitchan = alq;
7807d11e744SLawrence Stewart 		else
7817d11e744SLawrence Stewart 			waitchan = NULL;
7827d11e744SLawrence Stewart 	}
7837d11e744SLawrence Stewart 
7847d11e744SLawrence Stewart 	/*
7857d11e744SLawrence Stewart 	 * If there are waiters, we need to signal the waiting threads after we
7867d11e744SLawrence Stewart 	 * complete our work. The alq ptr is used as a wait channel for threads
7877d11e744SLawrence Stewart 	 * requiring resources to be freed up. In the AQ_ORDERED case, threads
7887d11e744SLawrence Stewart 	 * are not allowed to concurrently compete for resources in the above
7897d11e744SLawrence Stewart 	 * while loop, so we use a different wait channel in this case.
7907d11e744SLawrence Stewart 	 */
7917d11e744SLawrence Stewart 	if (alq->aq_waiters > 0) {
7927d11e744SLawrence Stewart 		if (alq->aq_flags & AQ_ORDERED)
7937d11e744SLawrence Stewart 			waitchan = &alq->aq_waiters;
7947d11e744SLawrence Stewart 		else
7957d11e744SLawrence Stewart 			waitchan = alq;
7967d11e744SLawrence Stewart 	} else
7977d11e744SLawrence Stewart 		waitchan = NULL;
7987d11e744SLawrence Stewart 
7997d11e744SLawrence Stewart 	/* Bail if we're shutting down. */
8007d11e744SLawrence Stewart 	if (alq->aq_flags & AQ_SHUTDOWN) {
8017d11e744SLawrence Stewart 		ALQ_UNLOCK(alq);
8027d11e744SLawrence Stewart 		if (waitchan != NULL)
8037d11e744SLawrence Stewart 			wakeup_one(waitchan);
8047d11e744SLawrence Stewart 		return (NULL);
8057d11e744SLawrence Stewart 	}
8067d11e744SLawrence Stewart 
8077d11e744SLawrence Stewart 	/*
8087d11e744SLawrence Stewart 	 * If we are here, we have a contiguous number of bytes >= len
8097d11e744SLawrence Stewart 	 * available in our buffer starting at aq_writehead.
8107d11e744SLawrence Stewart 	 */
8117d11e744SLawrence Stewart 	alq->aq_getpost.ae_data = alq->aq_entbuf + alq->aq_writehead;
8127d11e744SLawrence Stewart 	alq->aq_getpost.ae_bytesused = len;
8137d11e744SLawrence Stewart 
8147d11e744SLawrence Stewart 	return (&alq->aq_getpost);
8157d11e744SLawrence Stewart }
8167d11e744SLawrence Stewart 
8177d11e744SLawrence Stewart struct ale *
alq_get(struct alq * alq,int flags)8187d11e744SLawrence Stewart alq_get(struct alq *alq, int flags)
8197d11e744SLawrence Stewart {
8207d11e744SLawrence Stewart 	/* Should only be called in fixed length message (legacy) mode. */
8217d11e744SLawrence Stewart 	KASSERT((alq->aq_flags & AQ_LEGACY),
8227d11e744SLawrence Stewart 	    ("%s: fixed length get on variable length queue", __func__));
8237d11e744SLawrence Stewart 	return (alq_getn(alq, alq->aq_entlen, flags));
8247d11e744SLawrence Stewart }
8257d11e744SLawrence Stewart 
8267d11e744SLawrence Stewart void
alq_post_flags(struct alq * alq,struct ale * ale,int flags)8277d11e744SLawrence Stewart alq_post_flags(struct alq *alq, struct ale *ale, int flags)
8287d11e744SLawrence Stewart {
8297d11e744SLawrence Stewart 	int activate;
8307d11e744SLawrence Stewart 	void *waitchan;
8317d11e744SLawrence Stewart 
8327d11e744SLawrence Stewart 	activate = 0;
8337d11e744SLawrence Stewart 
8347d11e744SLawrence Stewart 	if (ale->ae_bytesused > 0) {
8357d11e744SLawrence Stewart 		if (!(alq->aq_flags & AQ_ACTIVE) &&
8367d11e744SLawrence Stewart 		    !(flags & ALQ_NOACTIVATE)) {
8377d11e744SLawrence Stewart 			alq->aq_flags |= AQ_ACTIVE;
8387d11e744SLawrence Stewart 			activate = 1;
8397d11e744SLawrence Stewart 		}
8407d11e744SLawrence Stewart 
8417d11e744SLawrence Stewart 		alq->aq_writehead += ale->ae_bytesused;
8427d11e744SLawrence Stewart 		alq->aq_freebytes -= ale->ae_bytesused;
8437d11e744SLawrence Stewart 
8447d11e744SLawrence Stewart 		/* Wrap aq_writehead if we filled to the end of the buffer. */
8457d11e744SLawrence Stewart 		if (alq->aq_writehead == alq->aq_buflen)
8467d11e744SLawrence Stewart 			alq->aq_writehead = 0;
8477d11e744SLawrence Stewart 
8487d11e744SLawrence Stewart 		KASSERT((alq->aq_writehead >= 0 &&
8497d11e744SLawrence Stewart 		    alq->aq_writehead < alq->aq_buflen),
8507d11e744SLawrence Stewart 		    ("%s: aq_writehead < 0 || aq_writehead >= aq_buflen",
8517d11e744SLawrence Stewart 		    __func__));
8527d11e744SLawrence Stewart 
8537d11e744SLawrence Stewart 		KASSERT((HAS_PENDING_DATA(alq)), ("%s: queue empty!", __func__));
8547d11e744SLawrence Stewart 	}
8557d11e744SLawrence Stewart 
8567d11e744SLawrence Stewart 	/*
8577d11e744SLawrence Stewart 	 * If there are waiters, we need to signal the waiting threads after we
8587d11e744SLawrence Stewart 	 * complete our work. The alq ptr is used as a wait channel for threads
8597d11e744SLawrence Stewart 	 * requiring resources to be freed up. In the AQ_ORDERED case, threads
8607d11e744SLawrence Stewart 	 * are not allowed to concurrently compete for resources in the
8617d11e744SLawrence Stewart 	 * alq_getn() while loop, so we use a different wait channel in this case.
8627d11e744SLawrence Stewart 	 */
8637d11e744SLawrence Stewart 	if (alq->aq_waiters > 0) {
8647d11e744SLawrence Stewart 		if (alq->aq_flags & AQ_ORDERED)
8657d11e744SLawrence Stewart 			waitchan = &alq->aq_waiters;
8667d11e744SLawrence Stewart 		else
8677d11e744SLawrence Stewart 			waitchan = alq;
8687d11e744SLawrence Stewart 	} else
8697d11e744SLawrence Stewart 		waitchan = NULL;
8707d11e744SLawrence Stewart 
8717d11e744SLawrence Stewart 	ALQ_UNLOCK(alq);
8727d11e744SLawrence Stewart 
8737d11e744SLawrence Stewart 	if (activate) {
8747d11e744SLawrence Stewart 		ALD_LOCK();
8757d11e744SLawrence Stewart 		ald_activate(alq);
8767d11e744SLawrence Stewart 		ALD_UNLOCK();
8777d11e744SLawrence Stewart 	}
8787d11e744SLawrence Stewart 
8797d11e744SLawrence Stewart 	/* NB: We rely on wakeup_one waking threads in a FIFO manner. */
8807d11e744SLawrence Stewart 	if (waitchan != NULL)
8817d11e744SLawrence Stewart 		wakeup_one(waitchan);
8829405072aSJeff Roberson }
8839405072aSJeff Roberson 
8849405072aSJeff Roberson void
alq_flush(struct alq * alq)8859405072aSJeff Roberson alq_flush(struct alq *alq)
8869405072aSJeff Roberson {
8879405072aSJeff Roberson 	int needwakeup = 0;
8889405072aSJeff Roberson 
8899405072aSJeff Roberson 	ALD_LOCK();
8909405072aSJeff Roberson 	ALQ_LOCK(alq);
8917d11e744SLawrence Stewart 
8927d11e744SLawrence Stewart 	/*
8937d11e744SLawrence Stewart 	 * Pull the lever iff there is data to flush and we're
8947d11e744SLawrence Stewart 	 * not already in the middle of a flush operation.
8957d11e744SLawrence Stewart 	 */
8967d11e744SLawrence Stewart 	if (HAS_PENDING_DATA(alq) && !(alq->aq_flags & AQ_FLUSHING)) {
8977d11e744SLawrence Stewart 		if (alq->aq_flags & AQ_ACTIVE)
8989405072aSJeff Roberson 			ald_deactivate(alq);
8997d11e744SLawrence Stewart 
9009405072aSJeff Roberson 		ALD_UNLOCK();
9019405072aSJeff Roberson 		needwakeup = alq_doio(alq);
9029405072aSJeff Roberson 	} else
9039405072aSJeff Roberson 		ALD_UNLOCK();
9047d11e744SLawrence Stewart 
9059405072aSJeff Roberson 	ALQ_UNLOCK(alq);
9069405072aSJeff Roberson 
9079405072aSJeff Roberson 	if (needwakeup)
9087d11e744SLawrence Stewart 		wakeup_one(alq);
9099405072aSJeff Roberson }
9109405072aSJeff Roberson 
9119405072aSJeff Roberson /*
9129405072aSJeff Roberson  * Flush remaining data, close the file and free all resources.
9139405072aSJeff Roberson  */
9149405072aSJeff Roberson void
alq_close(struct alq * alq)9159405072aSJeff Roberson alq_close(struct alq *alq)
9169405072aSJeff Roberson {
917c0ea37a8SLawrence Stewart 	/* Only flush and destroy alq if not already shutting down. */
918c0ea37a8SLawrence Stewart 	if (ald_rem(alq) == 0)
919c0ea37a8SLawrence Stewart 		alq_destroy(alq);
9209405072aSJeff Roberson }
921d28f42f9SLawrence Stewart 
922d28f42f9SLawrence Stewart static int
alq_load_handler(module_t mod,int what,void * arg)923d28f42f9SLawrence Stewart alq_load_handler(module_t mod, int what, void *arg)
924d28f42f9SLawrence Stewart {
925d28f42f9SLawrence Stewart 	int ret;
926d28f42f9SLawrence Stewart 
927d28f42f9SLawrence Stewart 	ret = 0;
928d28f42f9SLawrence Stewart 
929d28f42f9SLawrence Stewart 	switch (what) {
930d28f42f9SLawrence Stewart 	case MOD_LOAD:
931d28f42f9SLawrence Stewart 	case MOD_SHUTDOWN:
932d28f42f9SLawrence Stewart 		break;
933d28f42f9SLawrence Stewart 
934d28f42f9SLawrence Stewart 	case MOD_QUIESCE:
935d28f42f9SLawrence Stewart 		ALD_LOCK();
936d28f42f9SLawrence Stewart 		/* Only allow unload if there are no open queues. */
937d28f42f9SLawrence Stewart 		if (LIST_FIRST(&ald_queues) == NULL) {
938d28f42f9SLawrence Stewart 			ald_shutingdown = 1;
939d28f42f9SLawrence Stewart 			ALD_UNLOCK();
940ec41a9a1SLawrence Stewart 			EVENTHANDLER_DEREGISTER(shutdown_pre_sync,
941ec41a9a1SLawrence Stewart 			    alq_eventhandler_tag);
942d28f42f9SLawrence Stewart 			ald_shutdown(NULL, 0);
943d28f42f9SLawrence Stewart 			mtx_destroy(&ald_mtx);
944d28f42f9SLawrence Stewart 		} else {
945d28f42f9SLawrence Stewart 			ALD_UNLOCK();
946d28f42f9SLawrence Stewart 			ret = EBUSY;
947d28f42f9SLawrence Stewart 		}
948d28f42f9SLawrence Stewart 		break;
949d28f42f9SLawrence Stewart 
950d28f42f9SLawrence Stewart 	case MOD_UNLOAD:
951d28f42f9SLawrence Stewart 		/* If MOD_QUIESCE failed we must fail here too. */
952d28f42f9SLawrence Stewart 		if (ald_shutingdown == 0)
953d28f42f9SLawrence Stewart 			ret = EBUSY;
954d28f42f9SLawrence Stewart 		break;
955d28f42f9SLawrence Stewart 
956d28f42f9SLawrence Stewart 	default:
957d28f42f9SLawrence Stewart 		ret = EINVAL;
958d28f42f9SLawrence Stewart 		break;
959d28f42f9SLawrence Stewart 	}
960d28f42f9SLawrence Stewart 
961d28f42f9SLawrence Stewart 	return (ret);
962d28f42f9SLawrence Stewart }
963d28f42f9SLawrence Stewart 
964d28f42f9SLawrence Stewart static moduledata_t alq_mod =
965d28f42f9SLawrence Stewart {
966d28f42f9SLawrence Stewart 	"alq",
967d28f42f9SLawrence Stewart 	alq_load_handler,
968d28f42f9SLawrence Stewart 	NULL
969d28f42f9SLawrence Stewart };
970d28f42f9SLawrence Stewart 
97147cedcbdSJohn Baldwin DECLARE_MODULE(alq, alq_mod, SI_SUB_LAST, SI_ORDER_ANY);
972d28f42f9SLawrence Stewart MODULE_VERSION(alq, 1);
973