xref: /freebsd/sys/kern/kern_alq.c (revision e4e9813eb92cd7c4d4b819a8fbed5cbd3d92f5d8)
1 /*-
2  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include "opt_mac.h"
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/kthread.h>
36 #include <sys/lock.h>
37 #include <sys/mac.h>
38 #include <sys/mount.h>
39 #include <sys/mutex.h>
40 #include <sys/namei.h>
41 #include <sys/proc.h>
42 #include <sys/vnode.h>
43 #include <sys/alq.h>
44 #include <sys/malloc.h>
45 #include <sys/unistd.h>
46 #include <sys/fcntl.h>
47 #include <sys/eventhandler.h>
48 
49 /* Async. Logging Queue */
50 struct alq {
51 	int	aq_entmax;		/* Max entries */
52 	int	aq_entlen;		/* Entry length */
53 	char	*aq_entbuf;		/* Buffer for stored entries */
54 	int	aq_flags;		/* Queue flags */
55 	struct mtx	aq_mtx;		/* Queue lock */
56 	struct vnode	*aq_vp;		/* Open vnode handle */
57 	struct ucred	*aq_cred;	/* Credentials of the opening thread */
58 	struct ale	*aq_first;	/* First ent */
59 	struct ale	*aq_entfree;	/* First free ent */
60 	struct ale	*aq_entvalid;	/* First ent valid for writing */
61 	LIST_ENTRY(alq)	aq_act;		/* List of active queues */
62 	LIST_ENTRY(alq)	aq_link;	/* List of all queues */
63 };
64 
65 #define	AQ_WANTED	0x0001		/* Wakeup sleeper when io is done */
66 #define	AQ_ACTIVE	0x0002		/* on the active list */
67 #define	AQ_FLUSHING	0x0004		/* doing IO */
68 #define	AQ_SHUTDOWN	0x0008		/* Queue no longer valid */
69 
70 #define	ALQ_LOCK(alq)	mtx_lock_spin(&(alq)->aq_mtx)
71 #define	ALQ_UNLOCK(alq)	mtx_unlock_spin(&(alq)->aq_mtx)
72 
73 static MALLOC_DEFINE(M_ALD, "ALD", "ALD");
74 
75 /*
76  * The ald_mtx protects the ald_queues list and the ald_active list.
77  */
78 static struct mtx ald_mtx;
79 static LIST_HEAD(, alq) ald_queues;
80 static LIST_HEAD(, alq) ald_active;
81 static int ald_shutingdown = 0;
82 struct thread *ald_thread;
83 static struct proc *ald_proc;
84 
85 #define	ALD_LOCK()	mtx_lock(&ald_mtx)
86 #define	ALD_UNLOCK()	mtx_unlock(&ald_mtx)
87 
88 /* Daemon functions */
89 static int ald_add(struct alq *);
90 static int ald_rem(struct alq *);
91 static void ald_startup(void *);
92 static void ald_daemon(void);
93 static void ald_shutdown(void *, int);
94 static void ald_activate(struct alq *);
95 static void ald_deactivate(struct alq *);
96 
97 /* Internal queue functions */
98 static void alq_shutdown(struct alq *);
99 static int alq_doio(struct alq *);
100 
101 
102 /*
103  * Add a new queue to the global list.  Fail if we're shutting down.
104  */
105 static int
106 ald_add(struct alq *alq)
107 {
108 	int error;
109 
110 	error = 0;
111 
112 	ALD_LOCK();
113 	if (ald_shutingdown) {
114 		error = EBUSY;
115 		goto done;
116 	}
117 	LIST_INSERT_HEAD(&ald_queues, alq, aq_link);
118 done:
119 	ALD_UNLOCK();
120 	return (error);
121 }
122 
123 /*
124  * Remove a queue from the global list unless we're shutting down.  If so,
125  * the ald will take care of cleaning up it's resources.
126  */
127 static int
128 ald_rem(struct alq *alq)
129 {
130 	int error;
131 
132 	error = 0;
133 
134 	ALD_LOCK();
135 	if (ald_shutingdown) {
136 		error = EBUSY;
137 		goto done;
138 	}
139 	LIST_REMOVE(alq, aq_link);
140 done:
141 	ALD_UNLOCK();
142 	return (error);
143 }
144 
145 /*
146  * Put a queue on the active list.  This will schedule it for writing.
147  */
148 static void
149 ald_activate(struct alq *alq)
150 {
151 	LIST_INSERT_HEAD(&ald_active, alq, aq_act);
152 	wakeup(&ald_active);
153 }
154 
155 static void
156 ald_deactivate(struct alq *alq)
157 {
158 	LIST_REMOVE(alq, aq_act);
159 	alq->aq_flags &= ~AQ_ACTIVE;
160 }
161 
162 static void
163 ald_startup(void *unused)
164 {
165 	mtx_init(&ald_mtx, "ALDmtx", NULL, MTX_DEF|MTX_QUIET);
166 	LIST_INIT(&ald_queues);
167 	LIST_INIT(&ald_active);
168 }
169 
170 static void
171 ald_daemon(void)
172 {
173 	int needwakeup;
174 	struct alq *alq;
175 
176 	ald_thread = FIRST_THREAD_IN_PROC(ald_proc);
177 
178 	EVENTHANDLER_REGISTER(shutdown_pre_sync, ald_shutdown, NULL,
179 	    SHUTDOWN_PRI_FIRST);
180 
181 	ALD_LOCK();
182 
183 	for (;;) {
184 		while ((alq = LIST_FIRST(&ald_active)) == NULL)
185 			msleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0);
186 
187 		ALQ_LOCK(alq);
188 		ald_deactivate(alq);
189 		ALD_UNLOCK();
190 		needwakeup = alq_doio(alq);
191 		ALQ_UNLOCK(alq);
192 		if (needwakeup)
193 			wakeup(alq);
194 		ALD_LOCK();
195 	}
196 }
197 
198 static void
199 ald_shutdown(void *arg, int howto)
200 {
201 	struct alq *alq;
202 
203 	ALD_LOCK();
204 	ald_shutingdown = 1;
205 
206 	while ((alq = LIST_FIRST(&ald_queues)) != NULL) {
207 		LIST_REMOVE(alq, aq_link);
208 		ALD_UNLOCK();
209 		alq_shutdown(alq);
210 		ALD_LOCK();
211 	}
212 	ALD_UNLOCK();
213 }
214 
215 static void
216 alq_shutdown(struct alq *alq)
217 {
218 	ALQ_LOCK(alq);
219 
220 	/* Stop any new writers. */
221 	alq->aq_flags |= AQ_SHUTDOWN;
222 
223 	/* Drain IO */
224 	while (alq->aq_flags & (AQ_FLUSHING|AQ_ACTIVE)) {
225 		alq->aq_flags |= AQ_WANTED;
226 		ALQ_UNLOCK(alq);
227 		tsleep(alq, PWAIT, "aldclose", 0);
228 		ALQ_LOCK(alq);
229 	}
230 	ALQ_UNLOCK(alq);
231 
232 	vn_close(alq->aq_vp, FWRITE, alq->aq_cred,
233 	    curthread);
234 	crfree(alq->aq_cred);
235 }
236 
237 /*
238  * Flush all pending data to disk.  This operation will block.
239  */
240 static int
241 alq_doio(struct alq *alq)
242 {
243 	struct thread *td;
244 	struct mount *mp;
245 	struct vnode *vp;
246 	struct uio auio;
247 	struct iovec aiov[2];
248 	struct ale *ale;
249 	struct ale *alstart;
250 	int totlen;
251 	int iov;
252 	int vfslocked;
253 
254 	vp = alq->aq_vp;
255 	td = curthread;
256 	totlen = 0;
257 	iov = 0;
258 
259 	alstart = ale = alq->aq_entvalid;
260 	alq->aq_entvalid = NULL;
261 
262 	bzero(&aiov, sizeof(aiov));
263 	bzero(&auio, sizeof(auio));
264 
265 	do {
266 		if (aiov[iov].iov_base == NULL)
267 			aiov[iov].iov_base = ale->ae_data;
268 		aiov[iov].iov_len += alq->aq_entlen;
269 		totlen += alq->aq_entlen;
270 		/* Check to see if we're wrapping the buffer */
271 		if (ale->ae_data + alq->aq_entlen != ale->ae_next->ae_data)
272 			iov++;
273 		ale->ae_flags &= ~AE_VALID;
274 		ale = ale->ae_next;
275 	} while (ale->ae_flags & AE_VALID);
276 
277 	alq->aq_flags |= AQ_FLUSHING;
278 	ALQ_UNLOCK(alq);
279 
280 	if (iov == 2 || aiov[iov].iov_base == NULL)
281 		iov--;
282 
283 	auio.uio_iov = &aiov[0];
284 	auio.uio_offset = 0;
285 	auio.uio_segflg = UIO_SYSSPACE;
286 	auio.uio_rw = UIO_WRITE;
287 	auio.uio_iovcnt = iov + 1;
288 	auio.uio_resid = totlen;
289 	auio.uio_td = td;
290 
291 	/*
292 	 * Do all of the junk required to write now.
293 	 */
294 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
295 	vn_start_write(vp, &mp, V_WAIT);
296 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
297 	VOP_LEASE(vp, td, alq->aq_cred, LEASE_WRITE);
298 	/*
299 	 * XXX: VOP_WRITE error checks are ignored.
300 	 */
301 #ifdef MAC
302 	if (mac_check_vnode_write(alq->aq_cred, NOCRED, vp) == 0)
303 #endif
304 		VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, alq->aq_cred);
305 	VOP_UNLOCK(vp, 0, td);
306 	vn_finished_write(mp);
307 	VFS_UNLOCK_GIANT(vfslocked);
308 
309 	ALQ_LOCK(alq);
310 	alq->aq_flags &= ~AQ_FLUSHING;
311 
312 	if (alq->aq_entfree == NULL)
313 		alq->aq_entfree = alstart;
314 
315 	if (alq->aq_flags & AQ_WANTED) {
316 		alq->aq_flags &= ~AQ_WANTED;
317 		return (1);
318 	}
319 
320 	return(0);
321 }
322 
323 static struct kproc_desc ald_kp = {
324         "ALQ Daemon",
325         ald_daemon,
326         &ald_proc
327 };
328 
329 SYSINIT(aldthread, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &ald_kp)
330 SYSINIT(ald, SI_SUB_LOCK, SI_ORDER_ANY, ald_startup, NULL)
331 
332 
333 /* User visible queue functions */
334 
335 /*
336  * Create the queue data structure, allocate the buffer, and open the file.
337  */
338 int
339 alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
340     int size, int count)
341 {
342 	struct thread *td;
343 	struct nameidata nd;
344 	struct ale *ale;
345 	struct ale *alp;
346 	struct alq *alq;
347 	char *bufp;
348 	int flags;
349 	int error;
350 	int i, vfslocked;
351 
352 	*alqp = NULL;
353 	td = curthread;
354 
355 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, file, td);
356 	flags = FWRITE | O_NOFOLLOW | O_CREAT;
357 
358 	error = vn_open_cred(&nd, &flags, cmode, cred, -1);
359 	if (error)
360 		return (error);
361 
362 	vfslocked = NDHASGIANT(&nd);
363 	NDFREE(&nd, NDF_ONLY_PNBUF);
364 	/* We just unlock so we hold a reference */
365 	VOP_UNLOCK(nd.ni_vp, 0, td);
366 	VFS_UNLOCK_GIANT(vfslocked);
367 
368 	alq = malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO);
369 	alq->aq_entbuf = malloc(count * size, M_ALD, M_WAITOK|M_ZERO);
370 	alq->aq_first = malloc(sizeof(*ale) * count, M_ALD, M_WAITOK|M_ZERO);
371 	alq->aq_vp = nd.ni_vp;
372 	alq->aq_cred = crhold(cred);
373 	alq->aq_entmax = count;
374 	alq->aq_entlen = size;
375 	alq->aq_entfree = alq->aq_first;
376 
377 	mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET);
378 
379 	bufp = alq->aq_entbuf;
380 	ale = alq->aq_first;
381 	alp = NULL;
382 
383 	/* Match up entries with buffers */
384 	for (i = 0; i < count; i++) {
385 		if (alp)
386 			alp->ae_next = ale;
387 		ale->ae_data = bufp;
388 		alp = ale;
389 		ale++;
390 		bufp += size;
391 	}
392 
393 	alp->ae_next = alq->aq_first;
394 
395 	if ((error = ald_add(alq)) != 0)
396 		return (error);
397 	*alqp = alq;
398 
399 	return (0);
400 }
401 
402 /*
403  * Copy a new entry into the queue.  If the operation would block either
404  * wait or return an error depending on the value of waitok.
405  */
406 int
407 alq_write(struct alq *alq, void *data, int waitok)
408 {
409 	struct ale *ale;
410 
411 	if ((ale = alq_get(alq, waitok)) == NULL)
412 		return (EWOULDBLOCK);
413 
414 	bcopy(data, ale->ae_data, alq->aq_entlen);
415 	alq_post(alq, ale);
416 
417 	return (0);
418 }
419 
420 struct ale *
421 alq_get(struct alq *alq, int waitok)
422 {
423 	struct ale *ale;
424 	struct ale *aln;
425 
426 	ale = NULL;
427 
428 	ALQ_LOCK(alq);
429 
430 	/* Loop until we get an entry or we're shutting down */
431 	while ((alq->aq_flags & AQ_SHUTDOWN) == 0 &&
432 	    (ale = alq->aq_entfree) == NULL &&
433 	    (waitok & ALQ_WAITOK)) {
434 		alq->aq_flags |= AQ_WANTED;
435 		ALQ_UNLOCK(alq);
436 		tsleep(alq, PWAIT, "alqget", 0);
437 		ALQ_LOCK(alq);
438 	}
439 
440 	if (ale != NULL) {
441 		aln = ale->ae_next;
442 		if ((aln->ae_flags & AE_VALID) == 0)
443 			alq->aq_entfree = aln;
444 		else
445 			alq->aq_entfree = NULL;
446 	} else
447 		ALQ_UNLOCK(alq);
448 
449 
450 	return (ale);
451 }
452 
453 void
454 alq_post(struct alq *alq, struct ale *ale)
455 {
456 	int activate;
457 
458 	ale->ae_flags |= AE_VALID;
459 
460 	if (alq->aq_entvalid == NULL)
461 		alq->aq_entvalid = ale;
462 
463 	if ((alq->aq_flags & AQ_ACTIVE) == 0) {
464 		alq->aq_flags |= AQ_ACTIVE;
465 		activate = 1;
466 	} else
467 		activate = 0;
468 
469 	ALQ_UNLOCK(alq);
470 	if (activate) {
471 		ALD_LOCK();
472 		ald_activate(alq);
473 		ALD_UNLOCK();
474 	}
475 }
476 
477 void
478 alq_flush(struct alq *alq)
479 {
480 	int needwakeup = 0;
481 
482 	ALD_LOCK();
483 	ALQ_LOCK(alq);
484 	if (alq->aq_flags & AQ_ACTIVE) {
485 		ald_deactivate(alq);
486 		ALD_UNLOCK();
487 		needwakeup = alq_doio(alq);
488 	} else
489 		ALD_UNLOCK();
490 	ALQ_UNLOCK(alq);
491 
492 	if (needwakeup)
493 		wakeup(alq);
494 }
495 
496 /*
497  * Flush remaining data, close the file and free all resources.
498  */
499 void
500 alq_close(struct alq *alq)
501 {
502 	/*
503 	 * If we're already shuting down someone else will flush and close
504 	 * the vnode.
505 	 */
506 	if (ald_rem(alq) != 0)
507 		return;
508 
509 	/*
510 	 * Drain all pending IO.
511 	 */
512 	alq_shutdown(alq);
513 
514 	mtx_destroy(&alq->aq_mtx);
515 	free(alq->aq_first, M_ALD);
516 	free(alq->aq_entbuf, M_ALD);
517 	free(alq, M_ALD);
518 }
519