xref: /freebsd/sys/kern/kern_physio.c (revision 473c90ac04cec0abbb414978c53e9c259c9129e8)
19454b2d8SWarner Losh /*-
226f9a767SRodney W. Grimes  * Copyright (c) 1994 John S. Dyson
326f9a767SRodney W. Grimes  * All rights reserved.
4df8bae1dSRodney W. Grimes  *
5df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
6df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
7df8bae1dSRodney W. Grimes  * are met:
8df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
926f9a767SRodney W. Grimes  *    notice immediately at the beginning of the file, without modification,
1026f9a767SRodney W. Grimes  *    this list of conditions, and the following disclaimer.
11df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
12df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
13df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
1426f9a767SRodney W. Grimes  * 3. Absolutely no warranty of function or purpose is made by the author
1526f9a767SRodney W. Grimes  *    John S. Dyson.
1626f9a767SRodney W. Grimes  * 4. Modifications may be freely made to this file if the above conditions
1726f9a767SRodney W. Grimes  *    are met.
18df8bae1dSRodney W. Grimes  */
19df8bae1dSRodney W. Grimes 
20df8bae1dSRodney W. Grimes #include <sys/param.h>
21df8bae1dSRodney W. Grimes #include <sys/systm.h>
229626b608SPoul-Henning Kamp #include <sys/bio.h>
23df8bae1dSRodney W. Grimes #include <sys/buf.h>
24df8bae1dSRodney W. Grimes #include <sys/conf.h>
25869fd29aSAlexander Motin #include <sys/malloc.h>
26df8bae1dSRodney W. Grimes #include <sys/proc.h>
27ae34b6ffSEdward Tomasz Napierala #include <sys/racct.h>
2883f6b501SAlexander Motin #include <sys/rwlock.h>
2908637435SBruce Evans #include <sys/uio.h>
30869fd29aSAlexander Motin #include <geom/geom.h>
3108637435SBruce Evans 
3226f9a767SRodney W. Grimes #include <vm/vm.h>
3383f6b501SAlexander Motin #include <vm/vm_object.h>
34869fd29aSAlexander Motin #include <vm/vm_page.h>
3583f6b501SAlexander Motin #include <vm/vm_pager.h>
36efeaf95aSDavid Greenman #include <vm/vm_extern.h>
37869fd29aSAlexander Motin #include <vm/vm_map.h>
38df8bae1dSRodney W. Grimes 
39c48d1775SPoul-Henning Kamp int
4089c9c53dSPoul-Henning Kamp physio(struct cdev *dev, struct uio *uio, int ioflag)
41df8bae1dSRodney W. Grimes {
42cb3450e2SHans Petter Selasky 	struct cdevsw *csw;
43869fd29aSAlexander Motin 	struct buf *pbuf;
44869fd29aSAlexander Motin 	struct bio *bp;
45869fd29aSAlexander Motin 	struct vm_page **pages;
4616e4a0c8SBrooks Davis 	char *base, *sa;
47869fd29aSAlexander Motin 	u_int iolen, poff;
48869fd29aSAlexander Motin 	int error, i, npages, maxpages;
49869fd29aSAlexander Motin 	vm_prot_t prot;
5026f9a767SRodney W. Grimes 
51cb3450e2SHans Petter Selasky 	csw = dev->si_devsw;
52cd6ba3f0SMatt Macy 	npages = 0;
53cd6ba3f0SMatt Macy 	sa = NULL;
54cb3450e2SHans Petter Selasky 	/* check if character device is being destroyed */
55cb3450e2SHans Petter Selasky 	if (csw == NULL)
56cb3450e2SHans Petter Selasky 		return (ENXIO);
57cb3450e2SHans Petter Selasky 
587179e74fSPoul-Henning Kamp 	/* XXX: sanity check */
597179e74fSPoul-Henning Kamp 	if (dev->si_iosize_max < PAGE_SIZE) {
607179e74fSPoul-Henning Kamp 		printf("WARNING: %s si_iosize_max=%d, using DFLTPHYS.\n",
617179e74fSPoul-Henning Kamp 		    devtoname(dev), dev->si_iosize_max);
627179e74fSPoul-Henning Kamp 		dev->si_iosize_max = DFLTPHYS;
637179e74fSPoul-Henning Kamp 	}
647179e74fSPoul-Henning Kamp 
6593729c17SKenneth D. Merry 	/*
6693729c17SKenneth D. Merry 	 * If the driver does not want I/O to be split, that means that we
6793729c17SKenneth D. Merry 	 * need to reject any requests that will not fit into one buffer.
6893729c17SKenneth D. Merry 	 */
69880e57b6SKenneth D. Merry 	if (dev->si_flags & SI_NOSPLIT &&
70cd853791SKonstantin Belousov 	    (uio->uio_resid > dev->si_iosize_max || uio->uio_resid > maxphys ||
71880e57b6SKenneth D. Merry 	    uio->uio_iovcnt > 1)) {
7293729c17SKenneth D. Merry 		/*
7393729c17SKenneth D. Merry 		 * Tell the user why his I/O was rejected.
7493729c17SKenneth D. Merry 		 */
7593729c17SKenneth D. Merry 		if (uio->uio_resid > dev->si_iosize_max)
76880e57b6SKenneth D. Merry 			uprintf("%s: request size=%zd > si_iosize_max=%d; "
7793729c17SKenneth D. Merry 			    "cannot split request\n", devtoname(dev),
7893729c17SKenneth D. Merry 			    uio->uio_resid, dev->si_iosize_max);
79cd853791SKonstantin Belousov 		if (uio->uio_resid > maxphys)
80cd853791SKonstantin Belousov 			uprintf("%s: request size=%zd > maxphys=%lu; "
8193729c17SKenneth D. Merry 			    "cannot split request\n", devtoname(dev),
82cd853791SKonstantin Belousov 			    uio->uio_resid, maxphys);
8393729c17SKenneth D. Merry 		if (uio->uio_iovcnt > 1)
84880e57b6SKenneth D. Merry 			uprintf("%s: request vectors=%d > 1; "
8593729c17SKenneth D. Merry 			    "cannot split request\n", devtoname(dev),
8693729c17SKenneth D. Merry 			    uio->uio_iovcnt);
87869fd29aSAlexander Motin 		return (EFBIG);
8893729c17SKenneth D. Merry 	}
8993729c17SKenneth D. Merry 
90869fd29aSAlexander Motin 	/*
91869fd29aSAlexander Motin 	 * Keep the process UPAGES from being swapped.  Processes swapped
92869fd29aSAlexander Motin 	 * out while holding pbufs, used by swapper, may lead to deadlock.
93869fd29aSAlexander Motin 	 */
94869fd29aSAlexander Motin 	PHOLD(curproc);
95869fd29aSAlexander Motin 
96869fd29aSAlexander Motin 	bp = g_alloc_bio();
97869fd29aSAlexander Motin 	if (uio->uio_segflg != UIO_USERSPACE) {
98869fd29aSAlexander Motin 		pbuf = NULL;
99869fd29aSAlexander Motin 		pages = NULL;
100869fd29aSAlexander Motin 	} else if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) {
101869fd29aSAlexander Motin 		pbuf = NULL;
102cd853791SKonstantin Belousov 		maxpages = btoc(MIN(uio->uio_resid, maxphys)) + 1;
103869fd29aSAlexander Motin 		pages = malloc(sizeof(*pages) * maxpages, M_DEVBUF, M_WAITOK);
104869fd29aSAlexander Motin 	} else {
105756a5412SGleb Smirnoff 		pbuf = uma_zalloc(pbuf_zone, M_WAITOK);
106cd853791SKonstantin Belousov 		MPASS((pbuf->b_flags & B_MAXPHYS) != 0);
107869fd29aSAlexander Motin 		sa = pbuf->b_data;
10883f6b501SAlexander Motin 		maxpages = PBUF_PAGES;
109869fd29aSAlexander Motin 		pages = pbuf->b_pages;
110869fd29aSAlexander Motin 	}
111869fd29aSAlexander Motin 	prot = VM_PROT_READ;
112869fd29aSAlexander Motin 	if (uio->uio_rw == UIO_READ)
113869fd29aSAlexander Motin 		prot |= VM_PROT_WRITE;	/* Less backwards than it looks */
114869fd29aSAlexander Motin 	error = 0;
11526f9a767SRodney W. Grimes 	for (i = 0; i < uio->uio_iovcnt; i++) {
116ae34b6ffSEdward Tomasz Napierala #ifdef RACCT
117ae34b6ffSEdward Tomasz Napierala 		if (racct_enable) {
118ae34b6ffSEdward Tomasz Napierala 			PROC_LOCK(curproc);
119*473c90acSJohn Baldwin 			switch (uio->uio_rw) {
120*473c90acSJohn Baldwin 			case UIO_READ:
121ae34b6ffSEdward Tomasz Napierala 				racct_add_force(curproc, RACCT_READBPS,
122ae34b6ffSEdward Tomasz Napierala 				    uio->uio_iov[i].iov_len);
123ae34b6ffSEdward Tomasz Napierala 				racct_add_force(curproc, RACCT_READIOPS, 1);
124*473c90acSJohn Baldwin 				break;
125*473c90acSJohn Baldwin 			case UIO_WRITE:
126ae34b6ffSEdward Tomasz Napierala 				racct_add_force(curproc, RACCT_WRITEBPS,
127ae34b6ffSEdward Tomasz Napierala 				    uio->uio_iov[i].iov_len);
128ae34b6ffSEdward Tomasz Napierala 				racct_add_force(curproc, RACCT_WRITEIOPS, 1);
129*473c90acSJohn Baldwin 				break;
130ae34b6ffSEdward Tomasz Napierala 			}
131ae34b6ffSEdward Tomasz Napierala 			PROC_UNLOCK(curproc);
132ae34b6ffSEdward Tomasz Napierala 		}
133ae34b6ffSEdward Tomasz Napierala #endif /* RACCT */
134ae34b6ffSEdward Tomasz Napierala 
13526f9a767SRodney W. Grimes 		while (uio->uio_iov[i].iov_len) {
136c55f5707SWarner Losh 			g_reset_bio(bp);
137*473c90acSJohn Baldwin 			switch (uio->uio_rw) {
138*473c90acSJohn Baldwin 			case UIO_READ:
139869fd29aSAlexander Motin 				bp->bio_cmd = BIO_READ;
140eea7f71cSKonstantin Belousov 				curthread->td_ru.ru_inblock++;
141*473c90acSJohn Baldwin 				break;
142*473c90acSJohn Baldwin 			case UIO_WRITE:
143869fd29aSAlexander Motin 				bp->bio_cmd = BIO_WRITE;
144eea7f71cSKonstantin Belousov 				curthread->td_ru.ru_oublock++;
145*473c90acSJohn Baldwin 				break;
146eea7f71cSKonstantin Belousov 			}
147869fd29aSAlexander Motin 			bp->bio_offset = uio->uio_offset;
14816e4a0c8SBrooks Davis 			base = uio->uio_iov[i].iov_base;
149869fd29aSAlexander Motin 			bp->bio_length = uio->uio_iov[i].iov_len;
150869fd29aSAlexander Motin 			if (bp->bio_length > dev->si_iosize_max)
151869fd29aSAlexander Motin 				bp->bio_length = dev->si_iosize_max;
152cd853791SKonstantin Belousov 			if (bp->bio_length > maxphys)
153cd853791SKonstantin Belousov 				bp->bio_length = maxphys;
154869fd29aSAlexander Motin 			bp->bio_bcount = bp->bio_length;
155869fd29aSAlexander Motin 			bp->bio_dev = dev;
15626f9a767SRodney W. Grimes 
157869fd29aSAlexander Motin 			if (pages) {
158869fd29aSAlexander Motin 				if ((npages = vm_fault_quick_hold_pages(
159869fd29aSAlexander Motin 				    &curproc->p_vmspace->vm_map,
16016e4a0c8SBrooks Davis 				    (vm_offset_t)base, bp->bio_length,
161869fd29aSAlexander Motin 				    prot, pages, maxpages)) < 0) {
1622d5c7e45SMatthew Dillon 					error = EFAULT;
1632d5c7e45SMatthew Dillon 					goto doerror;
1642d5c7e45SMatthew Dillon 				}
16583f6b501SAlexander Motin 				poff = (vm_offset_t)base & PAGE_MASK;
166cd6ba3f0SMatt Macy 				if (pbuf && sa) {
167869fd29aSAlexander Motin 					pmap_qenter((vm_offset_t)sa,
168869fd29aSAlexander Motin 					    pages, npages);
169869fd29aSAlexander Motin 					bp->bio_data = sa + poff;
170869fd29aSAlexander Motin 				} else {
171869fd29aSAlexander Motin 					bp->bio_ma = pages;
172869fd29aSAlexander Motin 					bp->bio_ma_n = npages;
173869fd29aSAlexander Motin 					bp->bio_ma_offset = poff;
174869fd29aSAlexander Motin 					bp->bio_data = unmapped_buf;
175869fd29aSAlexander Motin 					bp->bio_flags |= BIO_UNMAPPED;
176869fd29aSAlexander Motin 				}
17716e4a0c8SBrooks Davis 			} else
17816e4a0c8SBrooks Davis 				bp->bio_data = base;
17926f9a767SRodney W. Grimes 
180cb3450e2SHans Petter Selasky 			csw->d_strategy(bp);
181749ffa4eSJeff Roberson 			if (uio->uio_rw == UIO_READ)
182869fd29aSAlexander Motin 				biowait(bp, "physrd");
183749ffa4eSJeff Roberson 			else
184869fd29aSAlexander Motin 				biowait(bp, "physwr");
18526f9a767SRodney W. Grimes 
186869fd29aSAlexander Motin 			if (pages) {
187869fd29aSAlexander Motin 				if (pbuf)
188869fd29aSAlexander Motin 					pmap_qremove((vm_offset_t)sa, npages);
189869fd29aSAlexander Motin 				vm_page_unhold_pages(pages, npages);
190869fd29aSAlexander Motin 			}
191869fd29aSAlexander Motin 
192869fd29aSAlexander Motin 			iolen = bp->bio_length - bp->bio_resid;
193869fd29aSAlexander Motin 			if (iolen == 0 && !(bp->bio_flags & BIO_ERROR))
1947d7bb69dSDavid Greenman 				goto doerror;	/* EOF */
19526f9a767SRodney W. Grimes 			uio->uio_iov[i].iov_len -= iolen;
1962b7f24d2SMike Barcroft 			uio->uio_iov[i].iov_base =
1972b7f24d2SMike Barcroft 			    (char *)uio->uio_iov[i].iov_base + iolen;
19826f9a767SRodney W. Grimes 			uio->uio_resid -= iolen;
19926f9a767SRodney W. Grimes 			uio->uio_offset += iolen;
200869fd29aSAlexander Motin 			if (bp->bio_flags & BIO_ERROR) {
201869fd29aSAlexander Motin 				error = bp->bio_error;
20226f9a767SRodney W. Grimes 				goto doerror;
20326f9a767SRodney W. Grimes 			}
20426f9a767SRodney W. Grimes 		}
20526f9a767SRodney W. Grimes 	}
20626f9a767SRodney W. Grimes doerror:
207869fd29aSAlexander Motin 	if (pbuf)
208756a5412SGleb Smirnoff 		uma_zfree(pbuf_zone, pbuf);
209869fd29aSAlexander Motin 	else if (pages)
210869fd29aSAlexander Motin 		free(pages, M_DEVBUF);
211869fd29aSAlexander Motin 	g_destroy_bio(bp);
21257dc5948SPeter Wemm 	PRELE(curproc);
21326f9a767SRodney W. Grimes 	return (error);
214df8bae1dSRodney W. Grimes }
215