xref: /freebsd/sys/kern/kern_alq.c (revision 70ed590b393173d4ea697be2a27054ed171f0c1a)
1 /*-
2  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3  * Copyright (c) 2008-2009, Lawrence Stewart <lstewart@freebsd.org>
4  * Copyright (c) 2009-2010, The FreeBSD Foundation
5  * All rights reserved.
6  *
7  * Portions of this software were developed at the Centre for Advanced
8  * Internet Architectures, Swinburne University of Technology, Melbourne,
9  * Australia by Lawrence Stewart under sponsorship from the FreeBSD Foundation.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice unmodified, this list of conditions, and the following
16  *    disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include "opt_mac.h"
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/kthread.h>
42 #include <sys/lock.h>
43 #include <sys/mount.h>
44 #include <sys/mutex.h>
45 #include <sys/namei.h>
46 #include <sys/proc.h>
47 #include <sys/vnode.h>
48 #include <sys/alq.h>
49 #include <sys/malloc.h>
50 #include <sys/unistd.h>
51 #include <sys/fcntl.h>
52 #include <sys/eventhandler.h>
53 
54 #include <security/mac/mac_framework.h>
55 
56 /* Async. Logging Queue */
57 struct alq {
58 	int	aq_entmax;		/* Max entries */
59 	int	aq_entlen;		/* Entry length */
60 	char	*aq_entbuf;		/* Buffer for stored entries */
61 	int	aq_flags;		/* Queue flags */
62 	struct mtx	aq_mtx;		/* Queue lock */
63 	struct vnode	*aq_vp;		/* Open vnode handle */
64 	struct ucred	*aq_cred;	/* Credentials of the opening thread */
65 	struct ale	*aq_first;	/* First ent */
66 	struct ale	*aq_entfree;	/* First free ent */
67 	struct ale	*aq_entvalid;	/* First ent valid for writing */
68 	LIST_ENTRY(alq)	aq_act;		/* List of active queues */
69 	LIST_ENTRY(alq)	aq_link;	/* List of all queues */
70 };
71 
72 #define	AQ_WANTED	0x0001		/* Wakeup sleeper when io is done */
73 #define	AQ_ACTIVE	0x0002		/* on the active list */
74 #define	AQ_FLUSHING	0x0004		/* doing IO */
75 #define	AQ_SHUTDOWN	0x0008		/* Queue no longer valid */
76 
77 #define	ALQ_LOCK(alq)	mtx_lock_spin(&(alq)->aq_mtx)
78 #define	ALQ_UNLOCK(alq)	mtx_unlock_spin(&(alq)->aq_mtx)
79 
80 static MALLOC_DEFINE(M_ALD, "ALD", "ALD");
81 
82 /*
83  * The ald_mtx protects the ald_queues list and the ald_active list.
84  */
85 static struct mtx ald_mtx;
86 static LIST_HEAD(, alq) ald_queues;
87 static LIST_HEAD(, alq) ald_active;
88 static int ald_shutingdown = 0;
89 struct thread *ald_thread;
90 static struct proc *ald_proc;
91 
92 #define	ALD_LOCK()	mtx_lock(&ald_mtx)
93 #define	ALD_UNLOCK()	mtx_unlock(&ald_mtx)
94 
95 /* Daemon functions */
96 static int ald_add(struct alq *);
97 static int ald_rem(struct alq *);
98 static void ald_startup(void *);
99 static void ald_daemon(void);
100 static void ald_shutdown(void *, int);
101 static void ald_activate(struct alq *);
102 static void ald_deactivate(struct alq *);
103 
104 /* Internal queue functions */
105 static void alq_shutdown(struct alq *);
106 static void alq_destroy(struct alq *);
107 static int alq_doio(struct alq *);
108 
109 
110 /*
111  * Add a new queue to the global list.  Fail if we're shutting down.
112  */
113 static int
114 ald_add(struct alq *alq)
115 {
116 	int error;
117 
118 	error = 0;
119 
120 	ALD_LOCK();
121 	if (ald_shutingdown) {
122 		error = EBUSY;
123 		goto done;
124 	}
125 	LIST_INSERT_HEAD(&ald_queues, alq, aq_link);
126 done:
127 	ALD_UNLOCK();
128 	return (error);
129 }
130 
131 /*
132  * Remove a queue from the global list unless we're shutting down.  If so,
133  * the ald will take care of cleaning up it's resources.
134  */
135 static int
136 ald_rem(struct alq *alq)
137 {
138 	int error;
139 
140 	error = 0;
141 
142 	ALD_LOCK();
143 	if (ald_shutingdown) {
144 		error = EBUSY;
145 		goto done;
146 	}
147 	LIST_REMOVE(alq, aq_link);
148 done:
149 	ALD_UNLOCK();
150 	return (error);
151 }
152 
153 /*
154  * Put a queue on the active list.  This will schedule it for writing.
155  */
156 static void
157 ald_activate(struct alq *alq)
158 {
159 	LIST_INSERT_HEAD(&ald_active, alq, aq_act);
160 	wakeup(&ald_active);
161 }
162 
163 static void
164 ald_deactivate(struct alq *alq)
165 {
166 	LIST_REMOVE(alq, aq_act);
167 	alq->aq_flags &= ~AQ_ACTIVE;
168 }
169 
170 static void
171 ald_startup(void *unused)
172 {
173 	mtx_init(&ald_mtx, "ALDmtx", NULL, MTX_DEF|MTX_QUIET);
174 	LIST_INIT(&ald_queues);
175 	LIST_INIT(&ald_active);
176 }
177 
178 static void
179 ald_daemon(void)
180 {
181 	int needwakeup;
182 	struct alq *alq;
183 
184 	ald_thread = FIRST_THREAD_IN_PROC(ald_proc);
185 
186 	EVENTHANDLER_REGISTER(shutdown_pre_sync, ald_shutdown, NULL,
187 	    SHUTDOWN_PRI_FIRST);
188 
189 	ALD_LOCK();
190 
191 	for (;;) {
192 		while ((alq = LIST_FIRST(&ald_active)) == NULL &&
193 		    !ald_shutingdown)
194 			mtx_sleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0);
195 
196 		/* Don't shutdown until all active ALQs are flushed. */
197 		if (ald_shutingdown && alq == NULL) {
198 			ALD_UNLOCK();
199 			break;
200 		}
201 
202 		ALQ_LOCK(alq);
203 		ald_deactivate(alq);
204 		ALD_UNLOCK();
205 		needwakeup = alq_doio(alq);
206 		ALQ_UNLOCK(alq);
207 		if (needwakeup)
208 			wakeup(alq);
209 		ALD_LOCK();
210 	}
211 
212 	kproc_exit(0);
213 }
214 
215 static void
216 ald_shutdown(void *arg, int howto)
217 {
218 	struct alq *alq;
219 
220 	ALD_LOCK();
221 
222 	/* Ensure no new queues can be created. */
223 	ald_shutingdown = 1;
224 
225 	/* Shutdown all ALQs prior to terminating the ald_daemon. */
226 	while ((alq = LIST_FIRST(&ald_queues)) != NULL) {
227 		LIST_REMOVE(alq, aq_link);
228 		ALD_UNLOCK();
229 		alq_shutdown(alq);
230 		ALD_LOCK();
231 	}
232 
233 	/* At this point, all ALQs are flushed and shutdown. */
234 
235 	/*
236 	 * Wake ald_daemon so that it exits. It won't be able to do
237 	 * anything until we mtx_sleep because we hold the ald_mtx.
238 	 */
239 	wakeup(&ald_active);
240 
241 	/* Wait for ald_daemon to exit. */
242 	mtx_sleep(ald_proc, &ald_mtx, PWAIT, "aldslp", 0);
243 
244 	ALD_UNLOCK();
245 }
246 
247 static void
248 alq_shutdown(struct alq *alq)
249 {
250 	ALQ_LOCK(alq);
251 
252 	/* Stop any new writers. */
253 	alq->aq_flags |= AQ_SHUTDOWN;
254 
255 	/* Drain IO */
256 	while (alq->aq_flags & AQ_ACTIVE) {
257 		alq->aq_flags |= AQ_WANTED;
258 		msleep_spin(alq, &alq->aq_mtx, "aldclose", 0);
259 	}
260 	ALQ_UNLOCK(alq);
261 
262 	vn_close(alq->aq_vp, FWRITE, alq->aq_cred,
263 	    curthread);
264 	crfree(alq->aq_cred);
265 }
266 
267 void
268 alq_destroy(struct alq *alq)
269 {
270 	/* Drain all pending IO. */
271 	alq_shutdown(alq);
272 
273 	mtx_destroy(&alq->aq_mtx);
274 	free(alq->aq_first, M_ALD);
275 	free(alq->aq_entbuf, M_ALD);
276 	free(alq, M_ALD);
277 }
278 
279 /*
280  * Flush all pending data to disk.  This operation will block.
281  */
282 static int
283 alq_doio(struct alq *alq)
284 {
285 	struct thread *td;
286 	struct mount *mp;
287 	struct vnode *vp;
288 	struct uio auio;
289 	struct iovec aiov[2];
290 	struct ale *ale;
291 	struct ale *alstart;
292 	int totlen;
293 	int iov;
294 	int vfslocked;
295 
296 	vp = alq->aq_vp;
297 	td = curthread;
298 	totlen = 0;
299 	iov = 0;
300 
301 	alstart = ale = alq->aq_entvalid;
302 	alq->aq_entvalid = NULL;
303 
304 	bzero(&aiov, sizeof(aiov));
305 	bzero(&auio, sizeof(auio));
306 
307 	do {
308 		if (aiov[iov].iov_base == NULL)
309 			aiov[iov].iov_base = ale->ae_data;
310 		aiov[iov].iov_len += alq->aq_entlen;
311 		totlen += alq->aq_entlen;
312 		/* Check to see if we're wrapping the buffer */
313 		if (ale->ae_data + alq->aq_entlen != ale->ae_next->ae_data)
314 			iov++;
315 		ale->ae_flags &= ~AE_VALID;
316 		ale = ale->ae_next;
317 	} while (ale->ae_flags & AE_VALID);
318 
319 	alq->aq_flags |= AQ_FLUSHING;
320 	ALQ_UNLOCK(alq);
321 
322 	if (iov == 2 || aiov[iov].iov_base == NULL)
323 		iov--;
324 
325 	auio.uio_iov = &aiov[0];
326 	auio.uio_offset = 0;
327 	auio.uio_segflg = UIO_SYSSPACE;
328 	auio.uio_rw = UIO_WRITE;
329 	auio.uio_iovcnt = iov + 1;
330 	auio.uio_resid = totlen;
331 	auio.uio_td = td;
332 
333 	/*
334 	 * Do all of the junk required to write now.
335 	 */
336 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
337 	vn_start_write(vp, &mp, V_WAIT);
338 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
339 	/*
340 	 * XXX: VOP_WRITE error checks are ignored.
341 	 */
342 #ifdef MAC
343 	if (mac_vnode_check_write(alq->aq_cred, NOCRED, vp) == 0)
344 #endif
345 		VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, alq->aq_cred);
346 	VOP_UNLOCK(vp, 0);
347 	vn_finished_write(mp);
348 	VFS_UNLOCK_GIANT(vfslocked);
349 
350 	ALQ_LOCK(alq);
351 	alq->aq_flags &= ~AQ_FLUSHING;
352 
353 	if (alq->aq_entfree == NULL)
354 		alq->aq_entfree = alstart;
355 
356 	if (alq->aq_flags & AQ_WANTED) {
357 		alq->aq_flags &= ~AQ_WANTED;
358 		return (1);
359 	}
360 
361 	return(0);
362 }
363 
364 static struct kproc_desc ald_kp = {
365         "ALQ Daemon",
366         ald_daemon,
367         &ald_proc
368 };
369 
370 SYSINIT(aldthread, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &ald_kp);
371 SYSINIT(ald, SI_SUB_LOCK, SI_ORDER_ANY, ald_startup, NULL);
372 
373 
374 /* User visible queue functions */
375 
376 /*
377  * Create the queue data structure, allocate the buffer, and open the file.
378  */
379 int
380 alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
381     int size, int count)
382 {
383 	struct thread *td;
384 	struct nameidata nd;
385 	struct ale *ale;
386 	struct ale *alp;
387 	struct alq *alq;
388 	char *bufp;
389 	int flags;
390 	int error;
391 	int i, vfslocked;
392 
393 	*alqp = NULL;
394 	td = curthread;
395 
396 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, file, td);
397 	flags = FWRITE | O_NOFOLLOW | O_CREAT;
398 
399 	error = vn_open_cred(&nd, &flags, cmode, 0, cred, NULL);
400 	if (error)
401 		return (error);
402 
403 	vfslocked = NDHASGIANT(&nd);
404 	NDFREE(&nd, NDF_ONLY_PNBUF);
405 	/* We just unlock so we hold a reference */
406 	VOP_UNLOCK(nd.ni_vp, 0);
407 	VFS_UNLOCK_GIANT(vfslocked);
408 
409 	alq = malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO);
410 	alq->aq_entbuf = malloc(count * size, M_ALD, M_WAITOK|M_ZERO);
411 	alq->aq_first = malloc(sizeof(*ale) * count, M_ALD, M_WAITOK|M_ZERO);
412 	alq->aq_vp = nd.ni_vp;
413 	alq->aq_cred = crhold(cred);
414 	alq->aq_entmax = count;
415 	alq->aq_entlen = size;
416 	alq->aq_entfree = alq->aq_first;
417 
418 	mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET);
419 
420 	bufp = alq->aq_entbuf;
421 	ale = alq->aq_first;
422 	alp = NULL;
423 
424 	/* Match up entries with buffers */
425 	for (i = 0; i < count; i++) {
426 		if (alp)
427 			alp->ae_next = ale;
428 		ale->ae_data = bufp;
429 		alp = ale;
430 		ale++;
431 		bufp += size;
432 	}
433 
434 	alp->ae_next = alq->aq_first;
435 
436 	if ((error = ald_add(alq)) != 0) {
437 		alq_destroy(alq);
438 		return (error);
439 	}
440 
441 	*alqp = alq;
442 
443 	return (0);
444 }
445 
446 /*
447  * Copy a new entry into the queue.  If the operation would block either
448  * wait or return an error depending on the value of waitok.
449  */
450 int
451 alq_write(struct alq *alq, void *data, int waitok)
452 {
453 	struct ale *ale;
454 
455 	if ((ale = alq_get(alq, waitok)) == NULL)
456 		return (EWOULDBLOCK);
457 
458 	bcopy(data, ale->ae_data, alq->aq_entlen);
459 	alq_post(alq, ale);
460 
461 	return (0);
462 }
463 
464 struct ale *
465 alq_get(struct alq *alq, int waitok)
466 {
467 	struct ale *ale;
468 	struct ale *aln;
469 
470 	ale = NULL;
471 
472 	ALQ_LOCK(alq);
473 
474 	/* Loop until we get an entry or we're shutting down */
475 	while ((alq->aq_flags & AQ_SHUTDOWN) == 0 &&
476 	    (ale = alq->aq_entfree) == NULL &&
477 	    (waitok & ALQ_WAITOK)) {
478 		alq->aq_flags |= AQ_WANTED;
479 		msleep_spin(alq, &alq->aq_mtx, "alqget", 0);
480 	}
481 
482 	if (ale != NULL) {
483 		aln = ale->ae_next;
484 		if ((aln->ae_flags & AE_VALID) == 0)
485 			alq->aq_entfree = aln;
486 		else
487 			alq->aq_entfree = NULL;
488 	} else
489 		ALQ_UNLOCK(alq);
490 
491 
492 	return (ale);
493 }
494 
495 void
496 alq_post(struct alq *alq, struct ale *ale)
497 {
498 	int activate;
499 
500 	ale->ae_flags |= AE_VALID;
501 
502 	if (alq->aq_entvalid == NULL)
503 		alq->aq_entvalid = ale;
504 
505 	if ((alq->aq_flags & AQ_ACTIVE) == 0) {
506 		alq->aq_flags |= AQ_ACTIVE;
507 		activate = 1;
508 	} else
509 		activate = 0;
510 
511 	ALQ_UNLOCK(alq);
512 	if (activate) {
513 		ALD_LOCK();
514 		ald_activate(alq);
515 		ALD_UNLOCK();
516 	}
517 }
518 
519 void
520 alq_flush(struct alq *alq)
521 {
522 	int needwakeup = 0;
523 
524 	ALD_LOCK();
525 	ALQ_LOCK(alq);
526 	if (alq->aq_flags & AQ_ACTIVE) {
527 		ald_deactivate(alq);
528 		ALD_UNLOCK();
529 		needwakeup = alq_doio(alq);
530 	} else
531 		ALD_UNLOCK();
532 	ALQ_UNLOCK(alq);
533 
534 	if (needwakeup)
535 		wakeup(alq);
536 }
537 
538 /*
539  * Flush remaining data, close the file and free all resources.
540  */
541 void
542 alq_close(struct alq *alq)
543 {
544 	/* Only flush and destroy alq if not already shutting down. */
545 	if (ald_rem(alq) == 0)
546 		alq_destroy(alq);
547 }
548 
549 static int
550 alq_load_handler(module_t mod, int what, void *arg)
551 {
552 	int ret;
553 
554 	ret = 0;
555 
556 	switch (what) {
557 	case MOD_LOAD:
558 	case MOD_SHUTDOWN:
559 		break;
560 
561 	case MOD_QUIESCE:
562 		ALD_LOCK();
563 		/* Only allow unload if there are no open queues. */
564 		if (LIST_FIRST(&ald_queues) == NULL) {
565 			ald_shutingdown = 1;
566 			ALD_UNLOCK();
567 			ald_shutdown(NULL, 0);
568 			mtx_destroy(&ald_mtx);
569 		} else {
570 			ALD_UNLOCK();
571 			ret = EBUSY;
572 		}
573 		break;
574 
575 	case MOD_UNLOAD:
576 		/* If MOD_QUIESCE failed we must fail here too. */
577 		if (ald_shutingdown == 0)
578 			ret = EBUSY;
579 		break;
580 
581 	default:
582 		ret = EINVAL;
583 		break;
584 	}
585 
586 	return (ret);
587 }
588 
589 static moduledata_t alq_mod =
590 {
591 	"alq",
592 	alq_load_handler,
593 	NULL
594 };
595 
596 DECLARE_MODULE(alq, alq_mod, SI_SUB_SMP, SI_ORDER_ANY);
597 MODULE_VERSION(alq, 1);
598