xref: /freebsd/sys/kern/kern_alq.c (revision c0020399a650364d0134f79f3fa319f84064372d)
1 /*-
2  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include "opt_mac.h"
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/kthread.h>
36 #include <sys/lock.h>
37 #include <sys/mount.h>
38 #include <sys/mutex.h>
39 #include <sys/namei.h>
40 #include <sys/proc.h>
41 #include <sys/vnode.h>
42 #include <sys/alq.h>
43 #include <sys/malloc.h>
44 #include <sys/unistd.h>
45 #include <sys/fcntl.h>
46 #include <sys/eventhandler.h>
47 
48 #include <security/mac/mac_framework.h>
49 
50 /* Async. Logging Queue */
51 struct alq {
52 	int	aq_entmax;		/* Max entries */
53 	int	aq_entlen;		/* Entry length */
54 	char	*aq_entbuf;		/* Buffer for stored entries */
55 	int	aq_flags;		/* Queue flags */
56 	struct mtx	aq_mtx;		/* Queue lock */
57 	struct vnode	*aq_vp;		/* Open vnode handle */
58 	struct ucred	*aq_cred;	/* Credentials of the opening thread */
59 	struct ale	*aq_first;	/* First ent */
60 	struct ale	*aq_entfree;	/* First free ent */
61 	struct ale	*aq_entvalid;	/* First ent valid for writing */
62 	LIST_ENTRY(alq)	aq_act;		/* List of active queues */
63 	LIST_ENTRY(alq)	aq_link;	/* List of all queues */
64 };
65 
66 #define	AQ_WANTED	0x0001		/* Wakeup sleeper when io is done */
67 #define	AQ_ACTIVE	0x0002		/* on the active list */
68 #define	AQ_FLUSHING	0x0004		/* doing IO */
69 #define	AQ_SHUTDOWN	0x0008		/* Queue no longer valid */
70 
71 #define	ALQ_LOCK(alq)	mtx_lock_spin(&(alq)->aq_mtx)
72 #define	ALQ_UNLOCK(alq)	mtx_unlock_spin(&(alq)->aq_mtx)
73 
74 static MALLOC_DEFINE(M_ALD, "ALD", "ALD");
75 
76 /*
77  * The ald_mtx protects the ald_queues list and the ald_active list.
78  */
79 static struct mtx ald_mtx;
80 static LIST_HEAD(, alq) ald_queues;
81 static LIST_HEAD(, alq) ald_active;
82 static int ald_shutingdown = 0;
83 struct thread *ald_thread;
84 static struct proc *ald_proc;
85 
86 #define	ALD_LOCK()	mtx_lock(&ald_mtx)
87 #define	ALD_UNLOCK()	mtx_unlock(&ald_mtx)
88 
89 /* Daemon functions */
90 static int ald_add(struct alq *);
91 static int ald_rem(struct alq *);
92 static void ald_startup(void *);
93 static void ald_daemon(void);
94 static void ald_shutdown(void *, int);
95 static void ald_activate(struct alq *);
96 static void ald_deactivate(struct alq *);
97 
98 /* Internal queue functions */
99 static void alq_shutdown(struct alq *);
100 static int alq_doio(struct alq *);
101 
102 
103 /*
104  * Add a new queue to the global list.  Fail if we're shutting down.
105  */
106 static int
107 ald_add(struct alq *alq)
108 {
109 	int error;
110 
111 	error = 0;
112 
113 	ALD_LOCK();
114 	if (ald_shutingdown) {
115 		error = EBUSY;
116 		goto done;
117 	}
118 	LIST_INSERT_HEAD(&ald_queues, alq, aq_link);
119 done:
120 	ALD_UNLOCK();
121 	return (error);
122 }
123 
124 /*
125  * Remove a queue from the global list unless we're shutting down.  If so,
126  * the ald will take care of cleaning up it's resources.
127  */
128 static int
129 ald_rem(struct alq *alq)
130 {
131 	int error;
132 
133 	error = 0;
134 
135 	ALD_LOCK();
136 	if (ald_shutingdown) {
137 		error = EBUSY;
138 		goto done;
139 	}
140 	LIST_REMOVE(alq, aq_link);
141 done:
142 	ALD_UNLOCK();
143 	return (error);
144 }
145 
146 /*
147  * Put a queue on the active list.  This will schedule it for writing.
148  */
149 static void
150 ald_activate(struct alq *alq)
151 {
152 	LIST_INSERT_HEAD(&ald_active, alq, aq_act);
153 	wakeup(&ald_active);
154 }
155 
156 static void
157 ald_deactivate(struct alq *alq)
158 {
159 	LIST_REMOVE(alq, aq_act);
160 	alq->aq_flags &= ~AQ_ACTIVE;
161 }
162 
163 static void
164 ald_startup(void *unused)
165 {
166 	mtx_init(&ald_mtx, "ALDmtx", NULL, MTX_DEF|MTX_QUIET);
167 	LIST_INIT(&ald_queues);
168 	LIST_INIT(&ald_active);
169 }
170 
171 static void
172 ald_daemon(void)
173 {
174 	int needwakeup;
175 	struct alq *alq;
176 
177 	ald_thread = FIRST_THREAD_IN_PROC(ald_proc);
178 
179 	EVENTHANDLER_REGISTER(shutdown_pre_sync, ald_shutdown, NULL,
180 	    SHUTDOWN_PRI_FIRST);
181 
182 	ALD_LOCK();
183 
184 	for (;;) {
185 		while ((alq = LIST_FIRST(&ald_active)) == NULL)
186 			msleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0);
187 
188 		ALQ_LOCK(alq);
189 		ald_deactivate(alq);
190 		ALD_UNLOCK();
191 		needwakeup = alq_doio(alq);
192 		ALQ_UNLOCK(alq);
193 		if (needwakeup)
194 			wakeup(alq);
195 		ALD_LOCK();
196 	}
197 }
198 
199 static void
200 ald_shutdown(void *arg, int howto)
201 {
202 	struct alq *alq;
203 
204 	ALD_LOCK();
205 	ald_shutingdown = 1;
206 
207 	while ((alq = LIST_FIRST(&ald_queues)) != NULL) {
208 		LIST_REMOVE(alq, aq_link);
209 		ALD_UNLOCK();
210 		alq_shutdown(alq);
211 		ALD_LOCK();
212 	}
213 	ALD_UNLOCK();
214 }
215 
216 static void
217 alq_shutdown(struct alq *alq)
218 {
219 	ALQ_LOCK(alq);
220 
221 	/* Stop any new writers. */
222 	alq->aq_flags |= AQ_SHUTDOWN;
223 
224 	/* Drain IO */
225 	while (alq->aq_flags & (AQ_FLUSHING|AQ_ACTIVE)) {
226 		alq->aq_flags |= AQ_WANTED;
227 		msleep_spin(alq, &alq->aq_mtx, "aldclose", 0);
228 	}
229 	ALQ_UNLOCK(alq);
230 
231 	vn_close(alq->aq_vp, FWRITE, alq->aq_cred,
232 	    curthread);
233 	crfree(alq->aq_cred);
234 }
235 
236 /*
237  * Flush all pending data to disk.  This operation will block.
238  */
239 static int
240 alq_doio(struct alq *alq)
241 {
242 	struct thread *td;
243 	struct mount *mp;
244 	struct vnode *vp;
245 	struct uio auio;
246 	struct iovec aiov[2];
247 	struct ale *ale;
248 	struct ale *alstart;
249 	int totlen;
250 	int iov;
251 	int vfslocked;
252 
253 	vp = alq->aq_vp;
254 	td = curthread;
255 	totlen = 0;
256 	iov = 0;
257 
258 	alstart = ale = alq->aq_entvalid;
259 	alq->aq_entvalid = NULL;
260 
261 	bzero(&aiov, sizeof(aiov));
262 	bzero(&auio, sizeof(auio));
263 
264 	do {
265 		if (aiov[iov].iov_base == NULL)
266 			aiov[iov].iov_base = ale->ae_data;
267 		aiov[iov].iov_len += alq->aq_entlen;
268 		totlen += alq->aq_entlen;
269 		/* Check to see if we're wrapping the buffer */
270 		if (ale->ae_data + alq->aq_entlen != ale->ae_next->ae_data)
271 			iov++;
272 		ale->ae_flags &= ~AE_VALID;
273 		ale = ale->ae_next;
274 	} while (ale->ae_flags & AE_VALID);
275 
276 	alq->aq_flags |= AQ_FLUSHING;
277 	ALQ_UNLOCK(alq);
278 
279 	if (iov == 2 || aiov[iov].iov_base == NULL)
280 		iov--;
281 
282 	auio.uio_iov = &aiov[0];
283 	auio.uio_offset = 0;
284 	auio.uio_segflg = UIO_SYSSPACE;
285 	auio.uio_rw = UIO_WRITE;
286 	auio.uio_iovcnt = iov + 1;
287 	auio.uio_resid = totlen;
288 	auio.uio_td = td;
289 
290 	/*
291 	 * Do all of the junk required to write now.
292 	 */
293 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
294 	vn_start_write(vp, &mp, V_WAIT);
295 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
296 	/*
297 	 * XXX: VOP_WRITE error checks are ignored.
298 	 */
299 #ifdef MAC
300 	if (mac_vnode_check_write(alq->aq_cred, NOCRED, vp) == 0)
301 #endif
302 		VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, alq->aq_cred);
303 	VOP_UNLOCK(vp, 0);
304 	vn_finished_write(mp);
305 	VFS_UNLOCK_GIANT(vfslocked);
306 
307 	ALQ_LOCK(alq);
308 	alq->aq_flags &= ~AQ_FLUSHING;
309 
310 	if (alq->aq_entfree == NULL)
311 		alq->aq_entfree = alstart;
312 
313 	if (alq->aq_flags & AQ_WANTED) {
314 		alq->aq_flags &= ~AQ_WANTED;
315 		return (1);
316 	}
317 
318 	return(0);
319 }
320 
321 static struct kproc_desc ald_kp = {
322         "ALQ Daemon",
323         ald_daemon,
324         &ald_proc
325 };
326 
327 SYSINIT(aldthread, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &ald_kp);
328 SYSINIT(ald, SI_SUB_LOCK, SI_ORDER_ANY, ald_startup, NULL);
329 
330 
331 /* User visible queue functions */
332 
333 /*
334  * Create the queue data structure, allocate the buffer, and open the file.
335  */
336 int
337 alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
338     int size, int count)
339 {
340 	struct thread *td;
341 	struct nameidata nd;
342 	struct ale *ale;
343 	struct ale *alp;
344 	struct alq *alq;
345 	char *bufp;
346 	int flags;
347 	int error;
348 	int i, vfslocked;
349 
350 	*alqp = NULL;
351 	td = curthread;
352 
353 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, file, td);
354 	flags = FWRITE | O_NOFOLLOW | O_CREAT;
355 
356 	error = vn_open_cred(&nd, &flags, cmode, cred, NULL);
357 	if (error)
358 		return (error);
359 
360 	vfslocked = NDHASGIANT(&nd);
361 	NDFREE(&nd, NDF_ONLY_PNBUF);
362 	/* We just unlock so we hold a reference */
363 	VOP_UNLOCK(nd.ni_vp, 0);
364 	VFS_UNLOCK_GIANT(vfslocked);
365 
366 	alq = malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO);
367 	alq->aq_entbuf = malloc(count * size, M_ALD, M_WAITOK|M_ZERO);
368 	alq->aq_first = malloc(sizeof(*ale) * count, M_ALD, M_WAITOK|M_ZERO);
369 	alq->aq_vp = nd.ni_vp;
370 	alq->aq_cred = crhold(cred);
371 	alq->aq_entmax = count;
372 	alq->aq_entlen = size;
373 	alq->aq_entfree = alq->aq_first;
374 
375 	mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET);
376 
377 	bufp = alq->aq_entbuf;
378 	ale = alq->aq_first;
379 	alp = NULL;
380 
381 	/* Match up entries with buffers */
382 	for (i = 0; i < count; i++) {
383 		if (alp)
384 			alp->ae_next = ale;
385 		ale->ae_data = bufp;
386 		alp = ale;
387 		ale++;
388 		bufp += size;
389 	}
390 
391 	alp->ae_next = alq->aq_first;
392 
393 	if ((error = ald_add(alq)) != 0)
394 		return (error);
395 	*alqp = alq;
396 
397 	return (0);
398 }
399 
400 /*
401  * Copy a new entry into the queue.  If the operation would block either
402  * wait or return an error depending on the value of waitok.
403  */
404 int
405 alq_write(struct alq *alq, void *data, int waitok)
406 {
407 	struct ale *ale;
408 
409 	if ((ale = alq_get(alq, waitok)) == NULL)
410 		return (EWOULDBLOCK);
411 
412 	bcopy(data, ale->ae_data, alq->aq_entlen);
413 	alq_post(alq, ale);
414 
415 	return (0);
416 }
417 
418 struct ale *
419 alq_get(struct alq *alq, int waitok)
420 {
421 	struct ale *ale;
422 	struct ale *aln;
423 
424 	ale = NULL;
425 
426 	ALQ_LOCK(alq);
427 
428 	/* Loop until we get an entry or we're shutting down */
429 	while ((alq->aq_flags & AQ_SHUTDOWN) == 0 &&
430 	    (ale = alq->aq_entfree) == NULL &&
431 	    (waitok & ALQ_WAITOK)) {
432 		alq->aq_flags |= AQ_WANTED;
433 		msleep_spin(alq, &alq->aq_mtx, "alqget", 0);
434 	}
435 
436 	if (ale != NULL) {
437 		aln = ale->ae_next;
438 		if ((aln->ae_flags & AE_VALID) == 0)
439 			alq->aq_entfree = aln;
440 		else
441 			alq->aq_entfree = NULL;
442 	} else
443 		ALQ_UNLOCK(alq);
444 
445 
446 	return (ale);
447 }
448 
449 void
450 alq_post(struct alq *alq, struct ale *ale)
451 {
452 	int activate;
453 
454 	ale->ae_flags |= AE_VALID;
455 
456 	if (alq->aq_entvalid == NULL)
457 		alq->aq_entvalid = ale;
458 
459 	if ((alq->aq_flags & AQ_ACTIVE) == 0) {
460 		alq->aq_flags |= AQ_ACTIVE;
461 		activate = 1;
462 	} else
463 		activate = 0;
464 
465 	ALQ_UNLOCK(alq);
466 	if (activate) {
467 		ALD_LOCK();
468 		ald_activate(alq);
469 		ALD_UNLOCK();
470 	}
471 }
472 
473 void
474 alq_flush(struct alq *alq)
475 {
476 	int needwakeup = 0;
477 
478 	ALD_LOCK();
479 	ALQ_LOCK(alq);
480 	if (alq->aq_flags & AQ_ACTIVE) {
481 		ald_deactivate(alq);
482 		ALD_UNLOCK();
483 		needwakeup = alq_doio(alq);
484 	} else
485 		ALD_UNLOCK();
486 	ALQ_UNLOCK(alq);
487 
488 	if (needwakeup)
489 		wakeup(alq);
490 }
491 
492 /*
493  * Flush remaining data, close the file and free all resources.
494  */
495 void
496 alq_close(struct alq *alq)
497 {
498 	/*
499 	 * If we're already shuting down someone else will flush and close
500 	 * the vnode.
501 	 */
502 	if (ald_rem(alq) != 0)
503 		return;
504 
505 	/*
506 	 * Drain all pending IO.
507 	 */
508 	alq_shutdown(alq);
509 
510 	mtx_destroy(&alq->aq_mtx);
511 	free(alq->aq_first, M_ALD);
512 	free(alq->aq_entbuf, M_ALD);
513 	free(alq, M_ALD);
514 }
515