xref: /freebsd/tests/sys/aio/aio_test.c (revision 19cca0b9613d7c3058e41baf0204245119732235)
1 /*-
2  * Copyright (c) 2004 Robert N. M. Watson
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 /*
30  * Regression test to do some very basic AIO exercising on several types of
31  * file descriptors.  Currently, the tests consist of initializing a fixed
32  * size buffer with pseudo-random data, writing it to one fd using AIO, then
33  * reading it from a second descriptor using AIO.  For some targets, the same
34  * fd is used for write and read (i.e., file, md device), but for others the
35  * operation is performed on a peer (pty, socket, fifo, etc).  For each file
36  * descriptor type, several completion methods are tested.  This test program
37  * does not attempt to exercise error cases or more subtle asynchronous
38  * behavior, just make sure that the basic operations work on some basic object
39  * types.
40  */
41 
42 #include <sys/param.h>
43 #include <sys/module.h>
44 #include <sys/resource.h>
45 #include <sys/socket.h>
46 #include <sys/stat.h>
47 #include <sys/mdioctl.h>
48 
49 #include <aio.h>
50 #include <err.h>
51 #include <errno.h>
52 #include <fcntl.h>
53 #include <libutil.h>
54 #include <limits.h>
55 #include <semaphore.h>
56 #include <stdint.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <termios.h>
61 #include <unistd.h>
62 
63 #include <atf-c.h>
64 
65 #include "freebsd_test_suite/macros.h"
66 #include "local.h"
67 
68 /*
69  * GLOBAL_MAX sets the largest usable buffer size to be read and written, as
70  * it sizes ac_buffer in the aio_context structure.  It is also the default
71  * size for file I/O.  For other types, we use smaller blocks or we risk
72  * blocking (and we run in a single process/thread so that would be bad).
73  */
74 #define	GLOBAL_MAX	16384
75 
76 #define	BUFFER_MAX	GLOBAL_MAX
77 
78 /*
79  * A completion function will block until the aio has completed, then return
80  * the result of the aio.  errno will be set appropriately.
81  */
82 typedef ssize_t (*completion)(struct aiocb*);
83 
84 struct aio_context {
85 	int		 ac_read_fd, ac_write_fd;
86 	long		 ac_seed;
87 	char		 ac_buffer[GLOBAL_MAX];
88 	int		 ac_buflen;
89 	int		 ac_seconds;
90 };
91 
92 static sem_t		completions;
93 
94 
95 /*
96  * Fill a buffer given a seed that can be fed into srandom() to initialize
97  * the PRNG in a repeatable manner.
98  */
99 static void
100 aio_fill_buffer(char *buffer, int len, long seed)
101 {
102 	char ch;
103 	int i;
104 
105 	srandom(seed);
106 	for (i = 0; i < len; i++) {
107 		ch = random() & 0xff;
108 		buffer[i] = ch;
109 	}
110 }
111 
112 /*
113  * Test that a buffer matches a given seed.  See aio_fill_buffer().  Return
114  * (1) on a match, (0) on a mismatch.
115  */
116 static int
117 aio_test_buffer(char *buffer, int len, long seed)
118 {
119 	char ch;
120 	int i;
121 
122 	srandom(seed);
123 	for (i = 0; i < len; i++) {
124 		ch = random() & 0xff;
125 		if (buffer[i] != ch)
126 			return (0);
127 	}
128 	return (1);
129 }
130 
131 /*
132  * Initialize a testing context given the file descriptors provided by the
133  * test setup.
134  */
135 static void
136 aio_context_init(struct aio_context *ac, int read_fd,
137     int write_fd, int buflen)
138 {
139 
140 	ATF_REQUIRE_MSG(buflen <= BUFFER_MAX,
141 	    "aio_context_init: buffer too large (%d > %d)",
142 	    buflen, BUFFER_MAX);
143 	bzero(ac, sizeof(*ac));
144 	ac->ac_read_fd = read_fd;
145 	ac->ac_write_fd = write_fd;
146 	ac->ac_buflen = buflen;
147 	srandomdev();
148 	ac->ac_seed = random();
149 	aio_fill_buffer(ac->ac_buffer, buflen, ac->ac_seed);
150 	ATF_REQUIRE_MSG(aio_test_buffer(ac->ac_buffer, buflen,
151 	    ac->ac_seed) != 0, "aio_test_buffer: internal error");
152 }
153 
154 static ssize_t
155 poll(struct aiocb *aio)
156 {
157 	int error;
158 
159 	while ((error = aio_error(aio)) == EINPROGRESS)
160 		usleep(25000);
161 	if (error)
162 		return (error);
163 	else
164 		return (aio_return(aio));
165 }
166 
167 static void
168 sigusr1_handler(int sig __unused)
169 {
170 	ATF_REQUIRE_EQ(0, sem_post(&completions));
171 }
172 
173 static void
174 thr_handler(union sigval sv __unused)
175 {
176 	ATF_REQUIRE_EQ(0, sem_post(&completions));
177 }
178 
179 static ssize_t
180 poll_signaled(struct aiocb *aio)
181 {
182 	int error;
183 
184 	ATF_REQUIRE_EQ(0, sem_wait(&completions));
185 	error = aio_error(aio);
186 	switch (error) {
187 		case EINPROGRESS:
188 			errno = EINTR;
189 			return (-1);
190 		case 0:
191 			return (aio_return(aio));
192 		default:
193 			return (error);
194 	}
195 }
196 
197 /*
198  * Setup a signal handler for signal delivery tests
199  * This isn't thread safe, but it's ok since ATF runs each testcase in a
200  * separate process
201  */
202 static struct sigevent*
203 setup_signal(void)
204 {
205 	static struct sigevent sev;
206 
207 	ATF_REQUIRE_EQ(0, sem_init(&completions, false, 0));
208 	sev.sigev_notify = SIGEV_SIGNAL;
209 	sev.sigev_signo = SIGUSR1;
210 	ATF_REQUIRE(SIG_ERR != signal(SIGUSR1, sigusr1_handler));
211 	return (&sev);
212 }
213 
214 /*
215  * Setup a thread for thread delivery tests
216  * This isn't thread safe, but it's ok since ATF runs each testcase in a
217  * separate process
218  */
219 static struct sigevent*
220 setup_thread(void)
221 {
222 	static struct sigevent sev;
223 
224 	ATF_REQUIRE_EQ(0, sem_init(&completions, false, 0));
225 	sev.sigev_notify = SIGEV_THREAD;
226 	sev.sigev_notify_function = thr_handler;
227 	sev.sigev_notify_attributes = NULL;
228 	return (&sev);
229 }
230 
231 static ssize_t
232 suspend(struct aiocb *aio)
233 {
234 	const struct aiocb *const iocbs[] = {aio};
235 	int error;
236 
237 	error = aio_suspend(iocbs, 1, NULL);
238 	if (error == 0)
239 		return (aio_return(aio));
240 	else
241 		return (error);
242 }
243 
244 static ssize_t
245 waitcomplete(struct aiocb *aio)
246 {
247 	struct aiocb *aiop;
248 	ssize_t ret;
249 
250 	ret = aio_waitcomplete(&aiop, NULL);
251 	ATF_REQUIRE_EQ(aio, aiop);
252 	return (ret);
253 }
254 
255 /*
256  * Perform a simple write test of our initialized data buffer to the provided
257  * file descriptor.
258  */
259 static void
260 aio_write_test(struct aio_context *ac, completion comp, struct sigevent *sev)
261 {
262 	struct aiocb aio;
263 	ssize_t len;
264 
265 	bzero(&aio, sizeof(aio));
266 	aio.aio_buf = ac->ac_buffer;
267 	aio.aio_nbytes = ac->ac_buflen;
268 	aio.aio_fildes = ac->ac_write_fd;
269 	aio.aio_offset = 0;
270 	if (sev)
271 		aio.aio_sigevent = *sev;
272 
273 	if (aio_write(&aio) < 0)
274 		atf_tc_fail("aio_write failed: %s", strerror(errno));
275 
276 	len = comp(&aio);
277 	if (len < 0)
278 		atf_tc_fail("aio failed: %s", strerror(errno));
279 
280 	if (len != ac->ac_buflen)
281 		atf_tc_fail("aio short write (%jd)", (intmax_t)len);
282 }
283 
284 /*
285  * Perform a vectored I/O test of our initialized data buffer to the provided
286  * file descriptor.
287  *
288  * To vectorize the linear buffer, chop it up into two pieces of dissimilar
289  * size, and swap their offsets.
290  */
291 static void
292 aio_writev_test(struct aio_context *ac, completion comp, struct sigevent *sev)
293 {
294 	struct aiocb aio;
295 	struct iovec iov[2];
296 	size_t len0, len1;
297 	ssize_t len;
298 
299 	bzero(&aio, sizeof(aio));
300 
301 	aio.aio_fildes = ac->ac_write_fd;
302 	aio.aio_offset = 0;
303 	len0 = ac->ac_buflen * 3 / 4;
304 	len1 = ac->ac_buflen / 4;
305 	iov[0].iov_base = ac->ac_buffer + len1;
306 	iov[0].iov_len = len0;
307 	iov[1].iov_base = ac->ac_buffer;
308 	iov[1].iov_len = len1;
309 	aio.aio_iov = iov;
310 	aio.aio_iovcnt = 2;
311 	if (sev)
312 		aio.aio_sigevent = *sev;
313 
314 	if (aio_writev(&aio) < 0)
315 		atf_tc_fail("aio_writev failed: %s", strerror(errno));
316 
317 	len = comp(&aio);
318 	if (len < 0)
319 		atf_tc_fail("aio failed: %s", strerror(errno));
320 
321 	if (len != ac->ac_buflen)
322 		atf_tc_fail("aio short write (%jd)", (intmax_t)len);
323 }
324 
325 /*
326  * Perform a simple read test of our initialized data buffer from the
327  * provided file descriptor.
328  */
329 static void
330 aio_read_test(struct aio_context *ac, completion comp, struct sigevent *sev)
331 {
332 	struct aiocb aio;
333 	ssize_t len;
334 
335 	bzero(ac->ac_buffer, ac->ac_buflen);
336 	bzero(&aio, sizeof(aio));
337 	aio.aio_buf = ac->ac_buffer;
338 	aio.aio_nbytes = ac->ac_buflen;
339 	aio.aio_fildes = ac->ac_read_fd;
340 	aio.aio_offset = 0;
341 	if (sev)
342 		aio.aio_sigevent = *sev;
343 
344 	if (aio_read(&aio) < 0)
345 		atf_tc_fail("aio_read failed: %s", strerror(errno));
346 
347 	len = comp(&aio);
348 	if (len < 0)
349 		atf_tc_fail("aio failed: %s", strerror(errno));
350 
351 	ATF_REQUIRE_EQ_MSG(len, ac->ac_buflen,
352 	    "aio short read (%jd)", (intmax_t)len);
353 
354 	if (aio_test_buffer(ac->ac_buffer, ac->ac_buflen, ac->ac_seed) == 0)
355 		atf_tc_fail("buffer mismatched");
356 }
357 
358 static void
359 aio_readv_test(struct aio_context *ac, completion comp, struct sigevent *sev)
360 {
361 	struct aiocb aio;
362 	struct iovec iov[2];
363 	size_t len0, len1;
364 	ssize_t len;
365 
366 	bzero(ac->ac_buffer, ac->ac_buflen);
367 	bzero(&aio, sizeof(aio));
368 	aio.aio_fildes = ac->ac_read_fd;
369 	aio.aio_offset = 0;
370 	len0 = ac->ac_buflen * 3 / 4;
371 	len1 = ac->ac_buflen / 4;
372 	iov[0].iov_base = ac->ac_buffer + len1;
373 	iov[0].iov_len = len0;
374 	iov[1].iov_base = ac->ac_buffer;
375 	iov[1].iov_len = len1;
376 	aio.aio_iov = iov;
377 	aio.aio_iovcnt = 2;
378 	if (sev)
379 		aio.aio_sigevent = *sev;
380 
381 	if (aio_readv(&aio) < 0)
382 		atf_tc_fail("aio_read failed: %s", strerror(errno));
383 
384 	len = comp(&aio);
385 	if (len < 0)
386 		atf_tc_fail("aio failed: %s", strerror(errno));
387 
388 	ATF_REQUIRE_EQ_MSG(len, ac->ac_buflen,
389 	    "aio short read (%jd)", (intmax_t)len);
390 
391 	if (aio_test_buffer(ac->ac_buffer, ac->ac_buflen, ac->ac_seed) == 0)
392 		atf_tc_fail("buffer mismatched");
393 }
394 
395 /*
396  * Series of type-specific tests for AIO.  For now, we just make sure we can
397  * issue a write and then a read to each type.  We assume that once a write
398  * is issued, a read can follow.
399  */
400 
401 /*
402  * Test with a classic file.  Assumes we can create a moderate size temporary
403  * file.
404  */
405 #define	FILE_LEN	GLOBAL_MAX
406 #define	FILE_PATHNAME	"testfile"
407 
408 static void
409 aio_file_test(completion comp, struct sigevent *sev, bool vectored)
410 {
411 	struct aio_context ac;
412 	int fd;
413 
414 	ATF_REQUIRE_KERNEL_MODULE("aio");
415 	ATF_REQUIRE_UNSAFE_AIO();
416 
417 	fd = open(FILE_PATHNAME, O_RDWR | O_CREAT, 0600);
418 	ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno));
419 
420 	aio_context_init(&ac, fd, fd, FILE_LEN);
421 	if (vectored) {
422 		aio_writev_test(&ac, comp, sev);
423 		aio_readv_test(&ac, comp, sev);
424 	} else {
425 		aio_write_test(&ac, comp, sev);
426 		aio_read_test(&ac, comp, sev);
427 	}
428 	close(fd);
429 }
430 
431 ATF_TC_WITHOUT_HEAD(file_poll);
432 ATF_TC_BODY(file_poll, tc)
433 {
434 	aio_file_test(poll, NULL, false);
435 }
436 
437 ATF_TC_WITHOUT_HEAD(file_signal);
438 ATF_TC_BODY(file_signal, tc)
439 {
440 	aio_file_test(poll_signaled, setup_signal(), false);
441 }
442 
443 ATF_TC_WITHOUT_HEAD(file_suspend);
444 ATF_TC_BODY(file_suspend, tc)
445 {
446 	aio_file_test(suspend, NULL, false);
447 }
448 
449 ATF_TC_WITHOUT_HEAD(file_thread);
450 ATF_TC_BODY(file_thread, tc)
451 {
452 	aio_file_test(poll_signaled, setup_thread(), false);
453 }
454 
455 ATF_TC_WITHOUT_HEAD(file_waitcomplete);
456 ATF_TC_BODY(file_waitcomplete, tc)
457 {
458 	aio_file_test(waitcomplete, NULL, false);
459 }
460 
461 #define	FIFO_LEN	256
462 #define	FIFO_PATHNAME	"testfifo"
463 
464 static void
465 aio_fifo_test(completion comp, struct sigevent *sev)
466 {
467 	int error, read_fd = -1, write_fd = -1;
468 	struct aio_context ac;
469 
470 	ATF_REQUIRE_KERNEL_MODULE("aio");
471 	ATF_REQUIRE_UNSAFE_AIO();
472 
473 	ATF_REQUIRE_MSG(mkfifo(FIFO_PATHNAME, 0600) != -1,
474 	    "mkfifo failed: %s", strerror(errno));
475 
476 	read_fd = open(FIFO_PATHNAME, O_RDONLY | O_NONBLOCK);
477 	if (read_fd == -1) {
478 		error = errno;
479 		errno = error;
480 		atf_tc_fail("read_fd open failed: %s",
481 		    strerror(errno));
482 	}
483 
484 	write_fd = open(FIFO_PATHNAME, O_WRONLY);
485 	if (write_fd == -1) {
486 		error = errno;
487 		errno = error;
488 		atf_tc_fail("write_fd open failed: %s",
489 		    strerror(errno));
490 	}
491 
492 	aio_context_init(&ac, read_fd, write_fd, FIFO_LEN);
493 	aio_write_test(&ac, comp, sev);
494 	aio_read_test(&ac, comp, sev);
495 
496 	close(read_fd);
497 	close(write_fd);
498 }
499 
500 ATF_TC_WITHOUT_HEAD(fifo_poll);
501 ATF_TC_BODY(fifo_poll, tc)
502 {
503 	aio_fifo_test(poll, NULL);
504 }
505 
506 ATF_TC_WITHOUT_HEAD(fifo_signal);
507 ATF_TC_BODY(fifo_signal, tc)
508 {
509 	aio_fifo_test(poll_signaled, setup_signal());
510 }
511 
512 ATF_TC_WITHOUT_HEAD(fifo_suspend);
513 ATF_TC_BODY(fifo_suspend, tc)
514 {
515 	aio_fifo_test(suspend, NULL);
516 }
517 
518 ATF_TC_WITHOUT_HEAD(fifo_thread);
519 ATF_TC_BODY(fifo_thread, tc)
520 {
521 	aio_fifo_test(poll_signaled, setup_thread());
522 }
523 
524 ATF_TC_WITHOUT_HEAD(fifo_waitcomplete);
525 ATF_TC_BODY(fifo_waitcomplete, tc)
526 {
527 	aio_fifo_test(waitcomplete, NULL);
528 }
529 
530 #define	UNIX_SOCKETPAIR_LEN	256
531 static void
532 aio_unix_socketpair_test(completion comp, struct sigevent *sev, bool vectored)
533 {
534 	struct aio_context ac;
535 	struct rusage ru_before, ru_after;
536 	int sockets[2];
537 
538 	ATF_REQUIRE_KERNEL_MODULE("aio");
539 
540 	ATF_REQUIRE_MSG(socketpair(PF_UNIX, SOCK_STREAM, 0, sockets) != -1,
541 	    "socketpair failed: %s", strerror(errno));
542 
543 	aio_context_init(&ac, sockets[0], sockets[1], UNIX_SOCKETPAIR_LEN);
544 	ATF_REQUIRE_MSG(getrusage(RUSAGE_SELF, &ru_before) != -1,
545 	    "getrusage failed: %s", strerror(errno));
546 	if (vectored) {
547 		aio_writev_test(&ac, comp, sev);
548 		aio_readv_test(&ac, comp, sev);
549 	} else {
550 		aio_write_test(&ac, comp, sev);
551 		aio_read_test(&ac, comp, sev);
552 	}
553 	ATF_REQUIRE_MSG(getrusage(RUSAGE_SELF, &ru_after) != -1,
554 	    "getrusage failed: %s", strerror(errno));
555 	ATF_REQUIRE(ru_after.ru_msgsnd == ru_before.ru_msgsnd + 1);
556 	ATF_REQUIRE(ru_after.ru_msgrcv == ru_before.ru_msgrcv + 1);
557 
558 	close(sockets[0]);
559 	close(sockets[1]);
560 }
561 
562 ATF_TC_WITHOUT_HEAD(socket_poll);
563 ATF_TC_BODY(socket_poll, tc)
564 {
565 	aio_unix_socketpair_test(poll, NULL, false);
566 }
567 
568 ATF_TC_WITHOUT_HEAD(socket_signal);
569 ATF_TC_BODY(socket_signal, tc)
570 {
571 	aio_unix_socketpair_test(poll_signaled, setup_signal(), false);
572 }
573 
574 ATF_TC_WITHOUT_HEAD(socket_suspend);
575 ATF_TC_BODY(socket_suspend, tc)
576 {
577 	aio_unix_socketpair_test(suspend, NULL, false);
578 }
579 
580 ATF_TC_WITHOUT_HEAD(socket_thread);
581 ATF_TC_BODY(socket_thread, tc)
582 {
583 	aio_unix_socketpair_test(poll_signaled, setup_thread(), false);
584 }
585 
586 ATF_TC_WITHOUT_HEAD(socket_waitcomplete);
587 ATF_TC_BODY(socket_waitcomplete, tc)
588 {
589 	aio_unix_socketpair_test(waitcomplete, NULL, false);
590 }
591 
592 struct aio_pty_arg {
593 	int	apa_read_fd;
594 	int	apa_write_fd;
595 };
596 
597 #define	PTY_LEN		256
598 static void
599 aio_pty_test(completion comp, struct sigevent *sev)
600 {
601 	struct aio_context ac;
602 	int read_fd, write_fd;
603 	struct termios ts;
604 	int error;
605 
606 	ATF_REQUIRE_KERNEL_MODULE("aio");
607 	ATF_REQUIRE_UNSAFE_AIO();
608 
609 	ATF_REQUIRE_MSG(openpty(&read_fd, &write_fd, NULL, NULL, NULL) == 0,
610 	    "openpty failed: %s", strerror(errno));
611 
612 
613 	if (tcgetattr(write_fd, &ts) < 0) {
614 		error = errno;
615 		errno = error;
616 		atf_tc_fail("tcgetattr failed: %s", strerror(errno));
617 	}
618 	cfmakeraw(&ts);
619 	if (tcsetattr(write_fd, TCSANOW, &ts) < 0) {
620 		error = errno;
621 		errno = error;
622 		atf_tc_fail("tcsetattr failed: %s", strerror(errno));
623 	}
624 	aio_context_init(&ac, read_fd, write_fd, PTY_LEN);
625 
626 	aio_write_test(&ac, comp, sev);
627 	aio_read_test(&ac, comp, sev);
628 
629 	close(read_fd);
630 	close(write_fd);
631 }
632 
633 ATF_TC_WITHOUT_HEAD(pty_poll);
634 ATF_TC_BODY(pty_poll, tc)
635 {
636 	aio_pty_test(poll, NULL);
637 }
638 
639 ATF_TC_WITHOUT_HEAD(pty_signal);
640 ATF_TC_BODY(pty_signal, tc)
641 {
642 	aio_pty_test(poll_signaled, setup_signal());
643 }
644 
645 ATF_TC_WITHOUT_HEAD(pty_suspend);
646 ATF_TC_BODY(pty_suspend, tc)
647 {
648 	aio_pty_test(suspend, NULL);
649 }
650 
651 ATF_TC_WITHOUT_HEAD(pty_thread);
652 ATF_TC_BODY(pty_thread, tc)
653 {
654 	aio_pty_test(poll_signaled, setup_thread());
655 }
656 
657 ATF_TC_WITHOUT_HEAD(pty_waitcomplete);
658 ATF_TC_BODY(pty_waitcomplete, tc)
659 {
660 	aio_pty_test(waitcomplete, NULL);
661 }
662 
663 #define	PIPE_LEN	256
664 static void
665 aio_pipe_test(completion comp, struct sigevent *sev)
666 {
667 	struct aio_context ac;
668 	int pipes[2];
669 
670 	ATF_REQUIRE_KERNEL_MODULE("aio");
671 	ATF_REQUIRE_UNSAFE_AIO();
672 
673 	ATF_REQUIRE_MSG(pipe(pipes) != -1,
674 	    "pipe failed: %s", strerror(errno));
675 
676 	aio_context_init(&ac, pipes[0], pipes[1], PIPE_LEN);
677 	aio_write_test(&ac, comp, sev);
678 	aio_read_test(&ac, comp, sev);
679 
680 	close(pipes[0]);
681 	close(pipes[1]);
682 }
683 
684 ATF_TC_WITHOUT_HEAD(pipe_poll);
685 ATF_TC_BODY(pipe_poll, tc)
686 {
687 	aio_pipe_test(poll, NULL);
688 }
689 
690 ATF_TC_WITHOUT_HEAD(pipe_signal);
691 ATF_TC_BODY(pipe_signal, tc)
692 {
693 	aio_pipe_test(poll_signaled, setup_signal());
694 }
695 
696 ATF_TC_WITHOUT_HEAD(pipe_suspend);
697 ATF_TC_BODY(pipe_suspend, tc)
698 {
699 	aio_pipe_test(suspend, NULL);
700 }
701 
702 ATF_TC_WITHOUT_HEAD(pipe_thread);
703 ATF_TC_BODY(pipe_thread, tc)
704 {
705 	aio_pipe_test(poll_signaled, setup_thread());
706 }
707 
708 ATF_TC_WITHOUT_HEAD(pipe_waitcomplete);
709 ATF_TC_BODY(pipe_waitcomplete, tc)
710 {
711 	aio_pipe_test(waitcomplete, NULL);
712 }
713 
714 #define	MD_LEN		GLOBAL_MAX
715 #define	MDUNIT_LINK	"mdunit_link"
716 
717 static int
718 aio_md_setup(void)
719 {
720 	int error, fd, mdctl_fd, unit;
721 	char pathname[PATH_MAX];
722 	struct md_ioctl mdio;
723 	char buf[80];
724 
725 	ATF_REQUIRE_KERNEL_MODULE("aio");
726 
727 	mdctl_fd = open("/dev/" MDCTL_NAME, O_RDWR, 0);
728 	ATF_REQUIRE_MSG(mdctl_fd != -1,
729 	    "opening /dev/%s failed: %s", MDCTL_NAME, strerror(errno));
730 
731 	bzero(&mdio, sizeof(mdio));
732 	mdio.md_version = MDIOVERSION;
733 	mdio.md_type = MD_MALLOC;
734 	mdio.md_options = MD_AUTOUNIT | MD_COMPRESS;
735 	mdio.md_mediasize = GLOBAL_MAX;
736 	mdio.md_sectorsize = 512;
737 
738 	if (ioctl(mdctl_fd, MDIOCATTACH, &mdio) < 0) {
739 		error = errno;
740 		errno = error;
741 		atf_tc_fail("ioctl MDIOCATTACH failed: %s", strerror(errno));
742 	}
743 	close(mdctl_fd);
744 
745 	/* Store the md unit number in a symlink for future cleanup */
746 	unit = mdio.md_unit;
747 	snprintf(buf, sizeof(buf), "%d", unit);
748 	ATF_REQUIRE_EQ(0, symlink(buf, MDUNIT_LINK));
749 	snprintf(pathname, PATH_MAX, "/dev/md%d", unit);
750 	fd = open(pathname, O_RDWR);
751 	ATF_REQUIRE_MSG(fd != -1,
752 	    "opening %s failed: %s", pathname, strerror(errno));
753 
754 	return (fd);
755 }
756 
757 static void
758 aio_md_cleanup(void)
759 {
760 	struct md_ioctl mdio;
761 	int mdctl_fd, error, n, unit;
762 	char buf[80];
763 
764 	mdctl_fd = open("/dev/" MDCTL_NAME, O_RDWR, 0);
765 	ATF_REQUIRE(mdctl_fd >= 0);
766 	n = readlink(MDUNIT_LINK, buf, sizeof(buf));
767 	if (n > 0) {
768 		if (sscanf(buf, "%d", &unit) == 1 && unit >= 0) {
769 			bzero(&mdio, sizeof(mdio));
770 			mdio.md_version = MDIOVERSION;
771 			mdio.md_unit = unit;
772 			if (ioctl(mdctl_fd, MDIOCDETACH, &mdio) == -1) {
773 				error = errno;
774 				close(mdctl_fd);
775 				errno = error;
776 				atf_tc_fail("ioctl MDIOCDETACH failed: %s",
777 				    strerror(errno));
778 			}
779 		}
780 	}
781 
782 	close(mdctl_fd);
783 }
784 
785 static void
786 aio_md_test(completion comp, struct sigevent *sev, bool vectored)
787 {
788 	struct aio_context ac;
789 	int fd;
790 
791 	fd = aio_md_setup();
792 	aio_context_init(&ac, fd, fd, MD_LEN);
793 	if (vectored) {
794 		aio_writev_test(&ac, comp, sev);
795 		aio_readv_test(&ac, comp, sev);
796 	} else {
797 		aio_write_test(&ac, comp, sev);
798 		aio_read_test(&ac, comp, sev);
799 	}
800 
801 	close(fd);
802 }
803 
804 ATF_TC_WITH_CLEANUP(md_poll);
805 ATF_TC_HEAD(md_poll, tc)
806 {
807 
808 	atf_tc_set_md_var(tc, "require.user", "root");
809 }
810 ATF_TC_BODY(md_poll, tc)
811 {
812 	aio_md_test(poll, NULL, false);
813 }
814 ATF_TC_CLEANUP(md_poll, tc)
815 {
816 	aio_md_cleanup();
817 }
818 
819 ATF_TC_WITH_CLEANUP(md_signal);
820 ATF_TC_HEAD(md_signal, tc)
821 {
822 
823 	atf_tc_set_md_var(tc, "require.user", "root");
824 }
825 ATF_TC_BODY(md_signal, tc)
826 {
827 	aio_md_test(poll_signaled, setup_signal(), false);
828 }
829 ATF_TC_CLEANUP(md_signal, tc)
830 {
831 	aio_md_cleanup();
832 }
833 
834 ATF_TC_WITH_CLEANUP(md_suspend);
835 ATF_TC_HEAD(md_suspend, tc)
836 {
837 
838 	atf_tc_set_md_var(tc, "require.user", "root");
839 }
840 ATF_TC_BODY(md_suspend, tc)
841 {
842 	aio_md_test(suspend, NULL, false);
843 }
844 ATF_TC_CLEANUP(md_suspend, tc)
845 {
846 	aio_md_cleanup();
847 }
848 
849 ATF_TC_WITH_CLEANUP(md_thread);
850 ATF_TC_HEAD(md_thread, tc)
851 {
852 
853 	atf_tc_set_md_var(tc, "require.user", "root");
854 }
855 ATF_TC_BODY(md_thread, tc)
856 {
857 	aio_md_test(poll_signaled, setup_thread(), false);
858 }
859 ATF_TC_CLEANUP(md_thread, tc)
860 {
861 	aio_md_cleanup();
862 }
863 
864 ATF_TC_WITH_CLEANUP(md_waitcomplete);
865 ATF_TC_HEAD(md_waitcomplete, tc)
866 {
867 
868 	atf_tc_set_md_var(tc, "require.user", "root");
869 }
870 ATF_TC_BODY(md_waitcomplete, tc)
871 {
872 	aio_md_test(waitcomplete, NULL, false);
873 }
874 ATF_TC_CLEANUP(md_waitcomplete, tc)
875 {
876 	aio_md_cleanup();
877 }
878 
879 #define	ZVOL_VDEV_PATHNAME	"test_vdev"
880 #define POOL_SIZE		(1 << 28)	/* 256 MB */
881 #define ZVOL_SIZE		"64m"
882 #define POOL_NAME		"aio_testpool"
883 #define ZVOL_NAME		"aio_testvol"
884 
885 static int
886 aio_zvol_setup(void)
887 {
888 	FILE *pidfile;
889 	int fd;
890 	pid_t pid;
891 	char pool_name[80];
892 	char cmd[160];
893 	char zvol_name[160];
894 	char devname[160];
895 
896 	ATF_REQUIRE_KERNEL_MODULE("aio");
897 	ATF_REQUIRE_KERNEL_MODULE("zfs");
898 
899 	fd = open(ZVOL_VDEV_PATHNAME, O_RDWR | O_CREAT, 0600);
900 	ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno));
901 	ATF_REQUIRE_EQ_MSG(0,
902 	    ftruncate(fd, POOL_SIZE), "ftruncate failed: %s", strerror(errno));
903 	close(fd);
904 
905 	pid = getpid();
906 	pidfile = fopen("pidfile", "w");
907 	ATF_REQUIRE_MSG(NULL != pidfile, "fopen: %s", strerror(errno));
908 	fprintf(pidfile, "%d", pid);
909 	fclose(pidfile);
910 
911 	snprintf(pool_name, sizeof(pool_name), POOL_NAME ".%d", pid);
912 	snprintf(zvol_name, sizeof(zvol_name), "%s/" ZVOL_NAME, pool_name);
913 	snprintf(cmd, sizeof(cmd), "zpool create %s $PWD/" ZVOL_VDEV_PATHNAME,
914 	    pool_name);
915 	ATF_REQUIRE_EQ_MSG(0, system(cmd),
916 	    "zpool create failed: %s", strerror(errno));
917 	snprintf(cmd, sizeof(cmd),
918 	    "zfs create -o volblocksize=8192 -o volmode=dev -V "
919 		ZVOL_SIZE " %s", zvol_name);
920 	ATF_REQUIRE_EQ_MSG(0, system(cmd),
921 	    "zfs create failed: %s", strerror(errno));
922 	/*
923 	 * XXX Due to bug 251828, we need an extra "zfs set" here
924 	 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=251828
925 	 */
926 	snprintf(cmd, sizeof(cmd), "zfs set volmode=dev %s", zvol_name);
927 	ATF_REQUIRE_EQ_MSG(0, system(cmd),
928 	    "zfs set failed: %s", strerror(errno));
929 
930 	snprintf(devname, sizeof(devname), "/dev/zvol/%s", zvol_name);
931 	do {
932 		fd = open(devname, O_RDWR);
933 	} while (fd == -1 && errno == EINTR) ;
934 	ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno));
935 	return (fd);
936 }
937 
938 static void
939 aio_zvol_cleanup(void)
940 {
941 	FILE *pidfile;
942 	pid_t testpid;
943 	char cmd[160];
944 
945 	pidfile = fopen("pidfile", "r");
946 	if (pidfile == NULL && errno == ENOENT) {
947 		/* Setup probably failed */
948 		return;
949 	}
950 	ATF_REQUIRE_MSG(NULL != pidfile, "fopen: %s", strerror(errno));
951 	ATF_REQUIRE_EQ(1, fscanf(pidfile, "%d", &testpid));
952 	fclose(pidfile);
953 
954 	snprintf(cmd, sizeof(cmd), "zpool destroy " POOL_NAME ".%d", testpid);
955 	system(cmd);
956 }
957 
958 
959 ATF_TC_WITHOUT_HEAD(aio_large_read_test);
960 ATF_TC_BODY(aio_large_read_test, tc)
961 {
962 	struct aiocb cb, *cbp;
963 	ssize_t nread;
964 	size_t len;
965 	int fd;
966 #ifdef __LP64__
967 	int clamped;
968 #endif
969 
970 	ATF_REQUIRE_KERNEL_MODULE("aio");
971 	ATF_REQUIRE_UNSAFE_AIO();
972 
973 #ifdef __LP64__
974 	len = sizeof(clamped);
975 	if (sysctlbyname("debug.iosize_max_clamp", &clamped, &len, NULL, 0) ==
976 	    -1)
977 		atf_libc_error(errno, "Failed to read debug.iosize_max_clamp");
978 #endif
979 
980 	/* Determine the maximum supported read(2) size. */
981 	len = SSIZE_MAX;
982 #ifdef __LP64__
983 	if (clamped)
984 		len = INT_MAX;
985 #endif
986 
987 	fd = open(FILE_PATHNAME, O_RDWR | O_CREAT, 0600);
988 	ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno));
989 
990 	unlink(FILE_PATHNAME);
991 
992 	memset(&cb, 0, sizeof(cb));
993 	cb.aio_nbytes = len;
994 	cb.aio_fildes = fd;
995 	cb.aio_buf = NULL;
996 	if (aio_read(&cb) == -1)
997 		atf_tc_fail("aio_read() of maximum read size failed: %s",
998 		    strerror(errno));
999 
1000 	nread = aio_waitcomplete(&cbp, NULL);
1001 	if (nread == -1)
1002 		atf_tc_fail("aio_waitcomplete() failed: %s", strerror(errno));
1003 	if (nread != 0)
1004 		atf_tc_fail("aio_read() from empty file returned data: %zd",
1005 		    nread);
1006 
1007 	memset(&cb, 0, sizeof(cb));
1008 	cb.aio_nbytes = len + 1;
1009 	cb.aio_fildes = fd;
1010 	cb.aio_buf = NULL;
1011 	if (aio_read(&cb) == -1) {
1012 		if (errno == EINVAL)
1013 			goto finished;
1014 		atf_tc_fail("aio_read() of too large read size failed: %s",
1015 		    strerror(errno));
1016 	}
1017 
1018 	nread = aio_waitcomplete(&cbp, NULL);
1019 	if (nread == -1) {
1020 		if (errno == EINVAL)
1021 			goto finished;
1022 		atf_tc_fail("aio_waitcomplete() failed: %s", strerror(errno));
1023 	}
1024 	atf_tc_fail("aio_read() of too large read size returned: %zd", nread);
1025 
1026 finished:
1027 	close(fd);
1028 }
1029 
1030 /*
1031  * This tests for a bug where arriving socket data can wakeup multiple
1032  * AIO read requests resulting in an uncancellable request.
1033  */
1034 ATF_TC_WITHOUT_HEAD(aio_socket_two_reads);
1035 ATF_TC_BODY(aio_socket_two_reads, tc)
1036 {
1037 	struct ioreq {
1038 		struct aiocb iocb;
1039 		char buffer[1024];
1040 	} ioreq[2];
1041 	struct aiocb *iocb;
1042 	unsigned i;
1043 	int s[2];
1044 	char c;
1045 
1046 	ATF_REQUIRE_KERNEL_MODULE("aio");
1047 #if __FreeBSD_version < 1100101
1048 	aft_tc_skip("kernel version %d is too old (%d required)",
1049 	    __FreeBSD_version, 1100101);
1050 #endif
1051 
1052 	ATF_REQUIRE(socketpair(PF_UNIX, SOCK_STREAM, 0, s) != -1);
1053 
1054 	/* Queue two read requests. */
1055 	memset(&ioreq, 0, sizeof(ioreq));
1056 	for (i = 0; i < nitems(ioreq); i++) {
1057 		ioreq[i].iocb.aio_nbytes = sizeof(ioreq[i].buffer);
1058 		ioreq[i].iocb.aio_fildes = s[0];
1059 		ioreq[i].iocb.aio_buf = ioreq[i].buffer;
1060 		ATF_REQUIRE(aio_read(&ioreq[i].iocb) == 0);
1061 	}
1062 
1063 	/* Send a single byte.  This should complete one request. */
1064 	c = 0xc3;
1065 	ATF_REQUIRE(write(s[1], &c, sizeof(c)) == 1);
1066 
1067 	ATF_REQUIRE(aio_waitcomplete(&iocb, NULL) == 1);
1068 
1069 	/* Determine which request completed and verify the data was read. */
1070 	if (iocb == &ioreq[0].iocb)
1071 		i = 0;
1072 	else
1073 		i = 1;
1074 	ATF_REQUIRE(ioreq[i].buffer[0] == c);
1075 
1076 	i ^= 1;
1077 
1078 	/*
1079 	 * Try to cancel the other request.  On broken systems this
1080 	 * will fail and the process will hang on exit.
1081 	 */
1082 	ATF_REQUIRE(aio_error(&ioreq[i].iocb) == EINPROGRESS);
1083 	ATF_REQUIRE(aio_cancel(s[0], &ioreq[i].iocb) == AIO_CANCELED);
1084 
1085 	close(s[1]);
1086 	close(s[0]);
1087 }
1088 
1089 static void
1090 aio_socket_blocking_short_write_test(bool vectored)
1091 {
1092 	struct aiocb iocb, *iocbp;
1093 	struct iovec iov[2];
1094 	char *buffer[2];
1095 	ssize_t done, r;
1096 	int buffer_size, sb_size;
1097 	socklen_t len;
1098 	int s[2];
1099 
1100 	ATF_REQUIRE_KERNEL_MODULE("aio");
1101 
1102 	ATF_REQUIRE(socketpair(PF_UNIX, SOCK_STREAM, 0, s) != -1);
1103 
1104 	len = sizeof(sb_size);
1105 	ATF_REQUIRE(getsockopt(s[0], SOL_SOCKET, SO_RCVBUF, &sb_size, &len) !=
1106 	    -1);
1107 	ATF_REQUIRE(len == sizeof(sb_size));
1108 	buffer_size = sb_size;
1109 
1110 	ATF_REQUIRE(getsockopt(s[1], SOL_SOCKET, SO_SNDBUF, &sb_size, &len) !=
1111 	    -1);
1112 	ATF_REQUIRE(len == sizeof(sb_size));
1113 	if (sb_size > buffer_size)
1114 		buffer_size = sb_size;
1115 
1116 	/*
1117 	 * Use twice the size of the MAX(receive buffer, send buffer)
1118 	 * to ensure that the write is split up into multiple writes
1119 	 * internally.
1120 	 */
1121 	buffer_size *= 2;
1122 
1123 	buffer[0] = malloc(buffer_size);
1124 	ATF_REQUIRE(buffer[0] != NULL);
1125 	buffer[1] = malloc(buffer_size);
1126 	ATF_REQUIRE(buffer[1] != NULL);
1127 
1128 	srandomdev();
1129 	aio_fill_buffer(buffer[1], buffer_size, random());
1130 
1131 	memset(&iocb, 0, sizeof(iocb));
1132 	iocb.aio_fildes = s[1];
1133 	if (vectored) {
1134 		iov[0].iov_base = buffer[1];
1135 		iov[0].iov_len = buffer_size / 2 + 1;
1136 		iov[1].iov_base = buffer[1] + buffer_size / 2 + 1;
1137 		iov[1].iov_len = buffer_size / 2 - 1;
1138 		iocb.aio_iov = iov;
1139 		iocb.aio_iovcnt = 2;
1140 		r = aio_writev(&iocb);
1141 		ATF_CHECK_EQ_MSG(0, r, "aio_writev returned %zd", r);
1142 	} else {
1143 		iocb.aio_buf = buffer[1];
1144 		iocb.aio_nbytes = buffer_size;
1145 		r = aio_write(&iocb);
1146 		ATF_CHECK_EQ_MSG(0, r, "aio_writev returned %zd", r);
1147 	}
1148 
1149 	done = recv(s[0], buffer[0], buffer_size, MSG_WAITALL);
1150 	ATF_REQUIRE(done == buffer_size);
1151 
1152 	done = aio_waitcomplete(&iocbp, NULL);
1153 	ATF_REQUIRE(iocbp == &iocb);
1154 	ATF_REQUIRE(done == buffer_size);
1155 
1156 	ATF_REQUIRE(memcmp(buffer[0], buffer[1], buffer_size) == 0);
1157 
1158 	close(s[1]);
1159 	close(s[0]);
1160 }
1161 
1162 /*
1163  * This test ensures that aio_write() on a blocking socket of a "large"
1164  * buffer does not return a short completion.
1165  */
1166 ATF_TC_WITHOUT_HEAD(aio_socket_blocking_short_write);
1167 ATF_TC_BODY(aio_socket_blocking_short_write, tc)
1168 {
1169 	aio_socket_blocking_short_write_test(false);
1170 }
1171 
1172 /*
1173  * Like aio_socket_blocking_short_write, but also tests that partially
1174  * completed vectored sends can be retried correctly.
1175  */
1176 ATF_TC_WITHOUT_HEAD(aio_socket_blocking_short_write_vectored);
1177 ATF_TC_BODY(aio_socket_blocking_short_write_vectored, tc)
1178 {
1179 	aio_socket_blocking_short_write_test(true);
1180 }
1181 
1182 /*
1183  * This test verifies that cancelling a partially completed socket write
1184  * returns a short write rather than ECANCELED.
1185  */
1186 ATF_TC_WITHOUT_HEAD(aio_socket_short_write_cancel);
1187 ATF_TC_BODY(aio_socket_short_write_cancel, tc)
1188 {
1189 	struct aiocb iocb, *iocbp;
1190 	char *buffer[2];
1191 	ssize_t done;
1192 	int buffer_size, sb_size;
1193 	socklen_t len;
1194 	int s[2];
1195 
1196 	ATF_REQUIRE_KERNEL_MODULE("aio");
1197 
1198 	ATF_REQUIRE(socketpair(PF_UNIX, SOCK_STREAM, 0, s) != -1);
1199 
1200 	len = sizeof(sb_size);
1201 	ATF_REQUIRE(getsockopt(s[0], SOL_SOCKET, SO_RCVBUF, &sb_size, &len) !=
1202 	    -1);
1203 	ATF_REQUIRE(len == sizeof(sb_size));
1204 	buffer_size = sb_size;
1205 
1206 	ATF_REQUIRE(getsockopt(s[1], SOL_SOCKET, SO_SNDBUF, &sb_size, &len) !=
1207 	    -1);
1208 	ATF_REQUIRE(len == sizeof(sb_size));
1209 	if (sb_size > buffer_size)
1210 		buffer_size = sb_size;
1211 
1212 	/*
1213 	 * Use three times the size of the MAX(receive buffer, send
1214 	 * buffer) for the write to ensure that the write is split up
1215 	 * into multiple writes internally.  The recv() ensures that
1216 	 * the write has partially completed, but a remaining size of
1217 	 * two buffers should ensure that the write has not completed
1218 	 * fully when it is cancelled.
1219 	 */
1220 	buffer[0] = malloc(buffer_size);
1221 	ATF_REQUIRE(buffer[0] != NULL);
1222 	buffer[1] = malloc(buffer_size * 3);
1223 	ATF_REQUIRE(buffer[1] != NULL);
1224 
1225 	srandomdev();
1226 	aio_fill_buffer(buffer[1], buffer_size * 3, random());
1227 
1228 	memset(&iocb, 0, sizeof(iocb));
1229 	iocb.aio_fildes = s[1];
1230 	iocb.aio_buf = buffer[1];
1231 	iocb.aio_nbytes = buffer_size * 3;
1232 	ATF_REQUIRE(aio_write(&iocb) == 0);
1233 
1234 	done = recv(s[0], buffer[0], buffer_size, MSG_WAITALL);
1235 	ATF_REQUIRE(done == buffer_size);
1236 
1237 	ATF_REQUIRE(aio_error(&iocb) == EINPROGRESS);
1238 	ATF_REQUIRE(aio_cancel(s[1], &iocb) == AIO_NOTCANCELED);
1239 
1240 	done = aio_waitcomplete(&iocbp, NULL);
1241 	ATF_REQUIRE(iocbp == &iocb);
1242 	ATF_REQUIRE(done >= buffer_size && done <= buffer_size * 2);
1243 
1244 	ATF_REQUIRE(memcmp(buffer[0], buffer[1], buffer_size) == 0);
1245 
1246 	close(s[1]);
1247 	close(s[0]);
1248 }
1249 
1250 /*
1251  * test aio_fsync's behavior with bad inputs
1252  */
1253 ATF_TC_WITHOUT_HEAD(aio_fsync_errors);
1254 ATF_TC_BODY(aio_fsync_errors, tc)
1255 {
1256 	int fd;
1257 	struct aiocb iocb;
1258 
1259 	ATF_REQUIRE_KERNEL_MODULE("aio");
1260 	ATF_REQUIRE_UNSAFE_AIO();
1261 
1262 	fd = open(FILE_PATHNAME, O_RDWR | O_CREAT, 0600);
1263 	ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno));
1264 	unlink(FILE_PATHNAME);
1265 
1266 	/* aio_fsync should return EINVAL unless op is O_SYNC or O_DSYNC */
1267 	memset(&iocb, 0, sizeof(iocb));
1268 	iocb.aio_fildes = fd;
1269 	ATF_CHECK_EQ(-1, aio_fsync(666, &iocb));
1270 	ATF_CHECK_EQ(EINVAL, errno);
1271 
1272 	/* aio_fsync should return EBADF if fd is not a valid descriptor */
1273 	memset(&iocb, 0, sizeof(iocb));
1274 	iocb.aio_fildes = 666;
1275 	ATF_CHECK_EQ(-1, aio_fsync(O_SYNC, &iocb));
1276 	ATF_CHECK_EQ(EBADF, errno);
1277 
1278 	/* aio_fsync should return EINVAL if sigev_notify is invalid */
1279 	memset(&iocb, 0, sizeof(iocb));
1280 	iocb.aio_fildes = fd;
1281 	iocb.aio_sigevent.sigev_notify = 666;
1282 	ATF_CHECK_EQ(-1, aio_fsync(666, &iocb));
1283 	ATF_CHECK_EQ(EINVAL, errno);
1284 }
1285 
1286 /*
1287  * This test just performs a basic test of aio_fsync().
1288  */
1289 static void
1290 aio_fsync_test(int op)
1291 {
1292 	struct aiocb synccb, *iocbp;
1293 	struct {
1294 		struct aiocb iocb;
1295 		bool done;
1296 		char *buffer;
1297 	} buffers[16];
1298 	struct stat sb;
1299 	ssize_t rval;
1300 	unsigned i;
1301 	int fd;
1302 
1303 	ATF_REQUIRE_KERNEL_MODULE("aio");
1304 	ATF_REQUIRE_UNSAFE_AIO();
1305 
1306 	fd = open(FILE_PATHNAME, O_RDWR | O_CREAT, 0600);
1307 	ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno));
1308 	unlink(FILE_PATHNAME);
1309 
1310 	ATF_REQUIRE(fstat(fd, &sb) == 0);
1311 	ATF_REQUIRE(sb.st_blksize != 0);
1312 	ATF_REQUIRE(ftruncate(fd, sb.st_blksize * nitems(buffers)) == 0);
1313 
1314 	/*
1315 	 * Queue several asynchronous write requests.  Hopefully this
1316 	 * forces the aio_fsync() request to be deferred.  There is no
1317 	 * reliable way to guarantee that however.
1318 	 */
1319 	srandomdev();
1320 	for (i = 0; i < nitems(buffers); i++) {
1321 		buffers[i].done = false;
1322 		memset(&buffers[i].iocb, 0, sizeof(buffers[i].iocb));
1323 		buffers[i].buffer = malloc(sb.st_blksize);
1324 		aio_fill_buffer(buffers[i].buffer, sb.st_blksize, random());
1325 		buffers[i].iocb.aio_fildes = fd;
1326 		buffers[i].iocb.aio_buf = buffers[i].buffer;
1327 		buffers[i].iocb.aio_nbytes = sb.st_blksize;
1328 		buffers[i].iocb.aio_offset = sb.st_blksize * i;
1329 		ATF_REQUIRE(aio_write(&buffers[i].iocb) == 0);
1330 	}
1331 
1332 	/* Queue the aio_fsync request. */
1333 	memset(&synccb, 0, sizeof(synccb));
1334 	synccb.aio_fildes = fd;
1335 	ATF_REQUIRE(aio_fsync(op, &synccb) == 0);
1336 
1337 	/* Wait for requests to complete. */
1338 	for (;;) {
1339 	next:
1340 		rval = aio_waitcomplete(&iocbp, NULL);
1341 		ATF_REQUIRE(iocbp != NULL);
1342 		if (iocbp == &synccb) {
1343 			ATF_REQUIRE(rval == 0);
1344 			break;
1345 		}
1346 
1347 		for (i = 0; i < nitems(buffers); i++) {
1348 			if (iocbp == &buffers[i].iocb) {
1349 				ATF_REQUIRE(buffers[i].done == false);
1350 				ATF_REQUIRE(rval == sb.st_blksize);
1351 				buffers[i].done = true;
1352 				goto next;
1353 			}
1354 		}
1355 
1356 		ATF_REQUIRE_MSG(false, "unmatched AIO request");
1357 	}
1358 
1359 	for (i = 0; i < nitems(buffers); i++)
1360 		ATF_REQUIRE_MSG(buffers[i].done,
1361 		    "AIO request %u did not complete", i);
1362 
1363 	close(fd);
1364 }
1365 
1366 ATF_TC_WITHOUT_HEAD(aio_fsync_sync_test);
1367 ATF_TC_BODY(aio_fsync_sync_test, tc)
1368 {
1369 	aio_fsync_test(O_SYNC);
1370 }
1371 
1372 ATF_TC_WITHOUT_HEAD(aio_fsync_dsync_test);
1373 ATF_TC_BODY(aio_fsync_dsync_test, tc)
1374 {
1375 	aio_fsync_test(O_DSYNC);
1376 }
1377 
1378 /*
1379  * We shouldn't be able to DoS the system by setting iov_len to an insane
1380  * value
1381  */
1382 ATF_TC_WITHOUT_HEAD(aio_writev_dos_iov_len);
1383 ATF_TC_BODY(aio_writev_dos_iov_len, tc)
1384 {
1385 	struct aiocb aio;
1386 	const struct aiocb *const iocbs[] = {&aio};
1387 	const char *wbuf = "Hello, world!";
1388 	struct iovec iov[1];
1389 	ssize_t len, r;
1390 	int fd;
1391 
1392 	ATF_REQUIRE_KERNEL_MODULE("aio");
1393 	ATF_REQUIRE_UNSAFE_AIO();
1394 
1395 	fd = open("testfile", O_RDWR | O_CREAT, 0600);
1396 	ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno));
1397 
1398 	len = strlen(wbuf);
1399 	iov[0].iov_base = __DECONST(void*, wbuf);
1400 	iov[0].iov_len = 1 << 30;
1401 	bzero(&aio, sizeof(aio));
1402 	aio.aio_fildes = fd;
1403 	aio.aio_offset = 0;
1404 	aio.aio_iov = iov;
1405 	aio.aio_iovcnt = 1;
1406 
1407 	r = aio_writev(&aio);
1408 	ATF_CHECK_EQ_MSG(0, r, "aio_writev returned %zd", r);
1409 	ATF_REQUIRE_EQ(0, aio_suspend(iocbs, 1, NULL));
1410 	r = aio_return(&aio);
1411 	ATF_CHECK_EQ_MSG(-1, r, "aio_return returned %zd", r);
1412 	ATF_CHECK_MSG(errno == EFAULT || errno == EINVAL,
1413 	    "aio_writev: %s", strerror(errno));
1414 
1415 	close(fd);
1416 }
1417 
1418 /*
1419  * We shouldn't be able to DoS the system by setting aio_iovcnt to an insane
1420  * value
1421  */
1422 ATF_TC_WITHOUT_HEAD(aio_writev_dos_iovcnt);
1423 ATF_TC_BODY(aio_writev_dos_iovcnt, tc)
1424 {
1425 	struct aiocb aio;
1426 	const char *wbuf = "Hello, world!";
1427 	struct iovec iov[1];
1428 	ssize_t len;
1429 	int fd;
1430 
1431 	ATF_REQUIRE_KERNEL_MODULE("aio");
1432 	ATF_REQUIRE_UNSAFE_AIO();
1433 
1434 	fd = open("testfile", O_RDWR | O_CREAT, 0600);
1435 	ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno));
1436 
1437 	len = strlen(wbuf);
1438 	iov[0].iov_base = __DECONST(void*, wbuf);
1439 	iov[0].iov_len = len;
1440 	bzero(&aio, sizeof(aio));
1441 	aio.aio_fildes = fd;
1442 	aio.aio_offset = 0;
1443 	aio.aio_iov = iov;
1444 	aio.aio_iovcnt = 1 << 30;
1445 
1446 	ATF_REQUIRE_EQ(-1, aio_writev(&aio));
1447 	ATF_CHECK_EQ(EINVAL, errno);
1448 
1449 	close(fd);
1450 }
1451 
1452 ATF_TC_WITH_CLEANUP(aio_writev_efault);
1453 ATF_TC_HEAD(aio_writev_efault, tc)
1454 {
1455 	atf_tc_set_md_var(tc, "descr",
1456 	    "Vectored AIO should gracefully handle invalid addresses");
1457 	atf_tc_set_md_var(tc, "require.user", "root");
1458 }
1459 ATF_TC_BODY(aio_writev_efault, tc)
1460 {
1461 	struct aiocb aio;
1462 	ssize_t buflen;
1463 	char *buffer;
1464 	struct iovec iov[2];
1465 	long seed;
1466 	int fd;
1467 
1468 	ATF_REQUIRE_KERNEL_MODULE("aio");
1469 	ATF_REQUIRE_UNSAFE_AIO();
1470 
1471 	fd = aio_md_setup();
1472 
1473 	seed = random();
1474 	buflen = 4096;
1475 	buffer = malloc(buflen);
1476 	aio_fill_buffer(buffer, buflen, seed);
1477 	iov[0].iov_base = buffer;
1478 	iov[0].iov_len = buflen;
1479 	iov[1].iov_base = (void*)-1;	/* Invalid! */
1480 	iov[1].iov_len = buflen;
1481 	bzero(&aio, sizeof(aio));
1482 	aio.aio_fildes = fd;
1483 	aio.aio_offset = 0;
1484 	aio.aio_iov = iov;
1485 	aio.aio_iovcnt = nitems(iov);
1486 
1487 	ATF_REQUIRE_EQ(-1, aio_writev(&aio));
1488 	ATF_CHECK_EQ(EFAULT, errno);
1489 
1490 	close(fd);
1491 }
1492 ATF_TC_CLEANUP(aio_writev_efault, tc)
1493 {
1494 	aio_md_cleanup();
1495 }
1496 
1497 ATF_TC_WITHOUT_HEAD(aio_writev_empty_file_poll);
1498 ATF_TC_BODY(aio_writev_empty_file_poll, tc)
1499 {
1500 	struct aiocb aio;
1501 	int fd;
1502 
1503 	ATF_REQUIRE_KERNEL_MODULE("aio");
1504 	ATF_REQUIRE_UNSAFE_AIO();
1505 
1506 	fd = open("testfile", O_RDWR | O_CREAT, 0600);
1507 	ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno));
1508 
1509 	bzero(&aio, sizeof(aio));
1510 	aio.aio_fildes = fd;
1511 	aio.aio_offset = 0;
1512 	aio.aio_iovcnt = 0;
1513 
1514 	ATF_REQUIRE_EQ(0, aio_writev(&aio));
1515 	ATF_REQUIRE_EQ(0, suspend(&aio));
1516 
1517 	close(fd);
1518 }
1519 
1520 ATF_TC_WITHOUT_HEAD(aio_writev_empty_file_signal);
1521 ATF_TC_BODY(aio_writev_empty_file_signal, tc)
1522 {
1523 	struct aiocb aio;
1524 	int fd;
1525 
1526 	ATF_REQUIRE_KERNEL_MODULE("aio");
1527 	ATF_REQUIRE_UNSAFE_AIO();
1528 
1529 	fd = open("testfile", O_RDWR | O_CREAT, 0600);
1530 	ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno));
1531 
1532 	bzero(&aio, sizeof(aio));
1533 	aio.aio_fildes = fd;
1534 	aio.aio_offset = 0;
1535 	aio.aio_iovcnt = 0;
1536 	aio.aio_sigevent = *setup_signal();
1537 
1538 	ATF_REQUIRE_EQ(0, aio_writev(&aio));
1539 	ATF_REQUIRE_EQ(0, poll_signaled(&aio));
1540 
1541 	close(fd);
1542 }
1543 
1544 // aio_writev and aio_readv should still work even if the iovcnt is greater
1545 // than the number of buffered AIO operations permitted per process.
1546 ATF_TC_WITH_CLEANUP(vectored_big_iovcnt);
1547 ATF_TC_HEAD(vectored_big_iovcnt, tc)
1548 {
1549 	atf_tc_set_md_var(tc, "descr",
1550 	    "Vectored AIO should still work even if the iovcnt is greater than "
1551 	    "the number of buffered AIO operations permitted by the process");
1552 	atf_tc_set_md_var(tc, "require.user", "root");
1553 }
1554 ATF_TC_BODY(vectored_big_iovcnt, tc)
1555 {
1556 	struct aiocb aio;
1557 	struct iovec *iov;
1558 	ssize_t len, buflen;
1559 	char *buffer;
1560 	const char *oid = "vfs.aio.max_buf_aio";
1561 	long seed;
1562 	int max_buf_aio;
1563 	int fd, i;
1564 	ssize_t sysctl_len = sizeof(max_buf_aio);
1565 
1566 	ATF_REQUIRE_KERNEL_MODULE("aio");
1567 	ATF_REQUIRE_UNSAFE_AIO();
1568 
1569 	if (sysctlbyname(oid, &max_buf_aio, &sysctl_len, NULL, 0) == -1)
1570 		atf_libc_error(errno, "Failed to read %s", oid);
1571 
1572 	seed = random();
1573 	buflen = 512 * (max_buf_aio + 1);
1574 	buffer = malloc(buflen);
1575 	aio_fill_buffer(buffer, buflen, seed);
1576 	iov = calloc(max_buf_aio + 1, sizeof(struct iovec));
1577 
1578 	fd = aio_md_setup();
1579 
1580 	bzero(&aio, sizeof(aio));
1581 	aio.aio_fildes = fd;
1582 	aio.aio_offset = 0;
1583 	for (i = 0; i < max_buf_aio + 1; i++) {
1584 		iov[i].iov_base = &buffer[i * 512];
1585 		iov[i].iov_len = 512;
1586 	}
1587 	aio.aio_iov = iov;
1588 	aio.aio_iovcnt = max_buf_aio + 1;
1589 
1590 	if (aio_writev(&aio) < 0)
1591 		atf_tc_fail("aio_writev failed: %s", strerror(errno));
1592 
1593 	len = poll(&aio);
1594 	if (len < 0)
1595 		atf_tc_fail("aio failed: %s", strerror(errno));
1596 
1597 	if (len != buflen)
1598 		atf_tc_fail("aio short write (%jd)", (intmax_t)len);
1599 
1600 	bzero(&aio, sizeof(aio));
1601 	aio.aio_fildes = fd;
1602 	aio.aio_offset = 0;
1603 	aio.aio_iov = iov;
1604 	aio.aio_iovcnt = max_buf_aio + 1;
1605 
1606 	if (aio_readv(&aio) < 0)
1607 		atf_tc_fail("aio_readv failed: %s", strerror(errno));
1608 
1609 	len = poll(&aio);
1610 	if (len < 0)
1611 		atf_tc_fail("aio failed: %s", strerror(errno));
1612 
1613 	if (len != buflen)
1614 		atf_tc_fail("aio short read (%jd)", (intmax_t)len);
1615 
1616 	if (aio_test_buffer(buffer, buflen, seed) == 0)
1617 		atf_tc_fail("buffer mismatched");
1618 
1619 	close(fd);
1620 }
1621 ATF_TC_CLEANUP(vectored_big_iovcnt, tc)
1622 {
1623 	aio_md_cleanup();
1624 }
1625 
1626 ATF_TC_WITHOUT_HEAD(vectored_file_poll);
1627 ATF_TC_BODY(vectored_file_poll, tc)
1628 {
1629 	aio_file_test(poll, NULL, true);
1630 }
1631 
1632 ATF_TC_WITH_CLEANUP(vectored_md_poll);
1633 ATF_TC_HEAD(vectored_md_poll, tc)
1634 {
1635 	atf_tc_set_md_var(tc, "require.user", "root");
1636 }
1637 ATF_TC_BODY(vectored_md_poll, tc)
1638 {
1639 	aio_md_test(poll, NULL, true);
1640 }
1641 ATF_TC_CLEANUP(vectored_md_poll, tc)
1642 {
1643 	aio_md_cleanup();
1644 }
1645 
1646 ATF_TC_WITHOUT_HEAD(vectored_socket_poll);
1647 ATF_TC_BODY(vectored_socket_poll, tc)
1648 {
1649 	aio_unix_socketpair_test(poll, NULL, true);
1650 }
1651 
1652 // aio_writev and aio_readv should still work even if the iov contains elements
1653 // that aren't a multiple of the device's sector size, and even if the total
1654 // amount if I/O _is_ a multiple of the device's sector size.
1655 ATF_TC_WITH_CLEANUP(vectored_unaligned);
1656 ATF_TC_HEAD(vectored_unaligned, tc)
1657 {
1658 	atf_tc_set_md_var(tc, "descr",
1659 	    "Vectored AIO should still work even if the iov contains elements "
1660 	    "that aren't a multiple of the sector size.");
1661 	atf_tc_set_md_var(tc, "require.user", "root");
1662 }
1663 ATF_TC_BODY(vectored_unaligned, tc)
1664 {
1665 	struct aio_context ac;
1666 	struct aiocb aio;
1667 	struct iovec iov[3];
1668 	ssize_t len, total_len;
1669 	int fd;
1670 
1671 	ATF_REQUIRE_KERNEL_MODULE("aio");
1672 	ATF_REQUIRE_UNSAFE_AIO();
1673 
1674 	/*
1675 	 * Use a zvol with volmode=dev, so it will allow .d_write with
1676 	 * unaligned uio.  geom devices use physio, which doesn't allow that.
1677 	 */
1678 	fd = aio_zvol_setup();
1679 	aio_context_init(&ac, fd, fd, FILE_LEN);
1680 
1681 	/* Break the buffer into 3 parts:
1682 	 * * A 4kB part, aligned to 4kB
1683 	 * * Two other parts that add up to 4kB:
1684 	 *   - 256B
1685 	 *   - 4kB - 256B
1686 	 */
1687 	iov[0].iov_base = ac.ac_buffer;
1688 	iov[0].iov_len = 4096;
1689 	iov[1].iov_base = (void*)((uintptr_t)iov[0].iov_base + iov[0].iov_len);
1690 	iov[1].iov_len = 256;
1691 	iov[2].iov_base = (void*)((uintptr_t)iov[1].iov_base + iov[1].iov_len);
1692 	iov[2].iov_len = 4096 - iov[1].iov_len;
1693 	total_len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len;
1694 	bzero(&aio, sizeof(aio));
1695 	aio.aio_fildes = ac.ac_write_fd;
1696 	aio.aio_offset = 0;
1697 	aio.aio_iov = iov;
1698 	aio.aio_iovcnt = 3;
1699 
1700 	if (aio_writev(&aio) < 0)
1701 		atf_tc_fail("aio_writev failed: %s", strerror(errno));
1702 
1703 	len = poll(&aio);
1704 	if (len < 0)
1705 		atf_tc_fail("aio failed: %s", strerror(errno));
1706 
1707 	if (len != total_len)
1708 		atf_tc_fail("aio short write (%jd)", (intmax_t)len);
1709 
1710 	bzero(&aio, sizeof(aio));
1711 	aio.aio_fildes = ac.ac_read_fd;
1712 	aio.aio_offset = 0;
1713 	aio.aio_iov = iov;
1714 	aio.aio_iovcnt = 3;
1715 
1716 	if (aio_readv(&aio) < 0)
1717 		atf_tc_fail("aio_readv failed: %s", strerror(errno));
1718 	len = poll(&aio);
1719 
1720 	ATF_REQUIRE_MSG(aio_test_buffer(ac.ac_buffer, total_len,
1721 	    ac.ac_seed) != 0, "aio_test_buffer: internal error");
1722 
1723 	close(fd);
1724 }
1725 ATF_TC_CLEANUP(vectored_unaligned, tc)
1726 {
1727 	aio_zvol_cleanup();
1728 }
1729 
1730 static void
1731 aio_zvol_test(completion comp, struct sigevent *sev, bool vectored)
1732 {
1733 	struct aio_context ac;
1734 	int fd;
1735 
1736 	fd = aio_zvol_setup();
1737 	aio_context_init(&ac, fd, fd, MD_LEN);
1738 	if (vectored) {
1739 		aio_writev_test(&ac, comp, sev);
1740 		aio_readv_test(&ac, comp, sev);
1741 	} else {
1742 		aio_write_test(&ac, comp, sev);
1743 		aio_read_test(&ac, comp, sev);
1744 	}
1745 
1746 	close(fd);
1747 }
1748 
1749 /*
1750  * Note that unlike md, the zvol is not a geom device, does not allow unmapped
1751  * buffers, and does not use physio.
1752  */
1753 ATF_TC_WITH_CLEANUP(vectored_zvol_poll);
1754 ATF_TC_HEAD(vectored_zvol_poll, tc)
1755 {
1756 	atf_tc_set_md_var(tc, "require.user", "root");
1757 }
1758 ATF_TC_BODY(vectored_zvol_poll, tc)
1759 {
1760 	aio_zvol_test(poll, NULL, true);
1761 }
1762 ATF_TC_CLEANUP(vectored_zvol_poll, tc)
1763 {
1764 	aio_zvol_cleanup();
1765 }
1766 
1767 ATF_TP_ADD_TCS(tp)
1768 {
1769 
1770 	ATF_TP_ADD_TC(tp, file_poll);
1771 	ATF_TP_ADD_TC(tp, file_signal);
1772 	ATF_TP_ADD_TC(tp, file_suspend);
1773 	ATF_TP_ADD_TC(tp, file_thread);
1774 	ATF_TP_ADD_TC(tp, file_waitcomplete);
1775 	ATF_TP_ADD_TC(tp, fifo_poll);
1776 	ATF_TP_ADD_TC(tp, fifo_signal);
1777 	ATF_TP_ADD_TC(tp, fifo_suspend);
1778 	ATF_TP_ADD_TC(tp, fifo_thread);
1779 	ATF_TP_ADD_TC(tp, fifo_waitcomplete);
1780 	ATF_TP_ADD_TC(tp, socket_poll);
1781 	ATF_TP_ADD_TC(tp, socket_signal);
1782 	ATF_TP_ADD_TC(tp, socket_suspend);
1783 	ATF_TP_ADD_TC(tp, socket_thread);
1784 	ATF_TP_ADD_TC(tp, socket_waitcomplete);
1785 	ATF_TP_ADD_TC(tp, pty_poll);
1786 	ATF_TP_ADD_TC(tp, pty_signal);
1787 	ATF_TP_ADD_TC(tp, pty_suspend);
1788 	ATF_TP_ADD_TC(tp, pty_thread);
1789 	ATF_TP_ADD_TC(tp, pty_waitcomplete);
1790 	ATF_TP_ADD_TC(tp, pipe_poll);
1791 	ATF_TP_ADD_TC(tp, pipe_signal);
1792 	ATF_TP_ADD_TC(tp, pipe_suspend);
1793 	ATF_TP_ADD_TC(tp, pipe_thread);
1794 	ATF_TP_ADD_TC(tp, pipe_waitcomplete);
1795 	ATF_TP_ADD_TC(tp, md_poll);
1796 	ATF_TP_ADD_TC(tp, md_signal);
1797 	ATF_TP_ADD_TC(tp, md_suspend);
1798 	ATF_TP_ADD_TC(tp, md_thread);
1799 	ATF_TP_ADD_TC(tp, md_waitcomplete);
1800 	ATF_TP_ADD_TC(tp, aio_fsync_errors);
1801 	ATF_TP_ADD_TC(tp, aio_fsync_sync_test);
1802 	ATF_TP_ADD_TC(tp, aio_fsync_dsync_test);
1803 	ATF_TP_ADD_TC(tp, aio_large_read_test);
1804 	ATF_TP_ADD_TC(tp, aio_socket_two_reads);
1805 	ATF_TP_ADD_TC(tp, aio_socket_blocking_short_write);
1806 	ATF_TP_ADD_TC(tp, aio_socket_blocking_short_write_vectored);
1807 	ATF_TP_ADD_TC(tp, aio_socket_short_write_cancel);
1808 	ATF_TP_ADD_TC(tp, aio_writev_dos_iov_len);
1809 	ATF_TP_ADD_TC(tp, aio_writev_dos_iovcnt);
1810 	ATF_TP_ADD_TC(tp, aio_writev_efault);
1811 	ATF_TP_ADD_TC(tp, aio_writev_empty_file_poll);
1812 	ATF_TP_ADD_TC(tp, aio_writev_empty_file_signal);
1813 	ATF_TP_ADD_TC(tp, vectored_big_iovcnt);
1814 	ATF_TP_ADD_TC(tp, vectored_file_poll);
1815 	ATF_TP_ADD_TC(tp, vectored_md_poll);
1816 	ATF_TP_ADD_TC(tp, vectored_zvol_poll);
1817 	ATF_TP_ADD_TC(tp, vectored_unaligned);
1818 	ATF_TP_ADD_TC(tp, vectored_socket_poll);
1819 
1820 	return (atf_no_error());
1821 }
1822