xref: /illumos-gate/usr/src/uts/common/os/bp_map.c (revision 95dd938966e7f45c67d0003e88ead5f5f2ddaecb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/sysmacros.h>
30 #include <sys/systm.h>
31 #include <sys/mman.h>
32 #include <sys/buf.h>
33 #include <sys/vmem.h>
34 #include <sys/cmn_err.h>
35 #include <sys/debug.h>
36 #include <sys/machparam.h>
37 #include <vm/page.h>
38 #include <vm/seg_kmem.h>
39 #include <vm/seg_kpm.h>
40 
41 #ifdef __sparc
42 #include <sys/cpu_module.h>
43 #define	BP_FLUSH(addr, size)	flush_instr_mem((void *)addr, size);
44 #else
45 #define	BP_FLUSH(addr, size)
46 #endif
47 
48 int bp_force_copy = 0;
49 typedef enum {
50 	BP_COPYIN	= 0,
51 	BP_COPYOUT	= 1
52 } bp_copydir_t;
53 static int bp_copy_common(bp_copydir_t dir, struct buf *bp, void *driverbuf,
54     offset_t offset, size_t size);
55 
56 static vmem_t *bp_map_arena;
57 static size_t bp_align;
58 static uint_t bp_devload_flags = PROT_READ | PROT_WRITE | HAT_NOSYNC;
59 int	bp_max_cache = 1 << 17;		/* 128K default; tunable */
60 int	bp_mapin_kpm_enable = 1;	/* enable default; tunable */
61 
62 static void *
63 bp_vmem_alloc(vmem_t *vmp, size_t size, int vmflag)
64 {
65 	return (vmem_xalloc(vmp, size, bp_align, 0, 0, NULL, NULL, vmflag));
66 }
67 
68 void
69 bp_init(size_t align, uint_t devload_flags)
70 {
71 	bp_align = MAX(align, PAGESIZE);
72 	bp_devload_flags |= devload_flags;
73 
74 	if (bp_align <= bp_max_cache)
75 		bp_map_arena = vmem_create("bp_map", NULL, 0, bp_align,
76 		    bp_vmem_alloc, vmem_free, heap_arena,
77 		    MIN(8 * bp_align, bp_max_cache), VM_SLEEP);
78 }
79 
80 /*
81  * common routine so can be called with/without VM_SLEEP
82  */
83 void *
84 bp_mapin_common(struct buf *bp, int flag)
85 {
86 	struct as	*as;
87 	pfn_t		pfnum;
88 	page_t		*pp;
89 	page_t		**pplist;
90 	caddr_t		kaddr;
91 	caddr_t		addr;
92 	uintptr_t	off;
93 	size_t		size;
94 	pgcnt_t		npages;
95 	int		color;
96 
97 	/* return if already mapped in, no pageio/physio, or physio to kas */
98 	if ((bp->b_flags & B_REMAPPED) ||
99 	    !(bp->b_flags & (B_PAGEIO | B_PHYS)) ||
100 	    (((bp->b_flags & (B_PAGEIO | B_PHYS)) == B_PHYS) &&
101 	    ((bp->b_proc == NULL) || (bp->b_proc->p_as == &kas))))
102 		return (bp->b_un.b_addr);
103 
104 	ASSERT((bp->b_flags & (B_PAGEIO | B_PHYS)) != (B_PAGEIO | B_PHYS));
105 
106 	addr = (caddr_t)bp->b_un.b_addr;
107 	off = (uintptr_t)addr & PAGEOFFSET;
108 	size = P2ROUNDUP(bp->b_bcount + off, PAGESIZE);
109 	npages = btop(size);
110 
111 	/* Fastpath single page IO to locked memory by using kpm. */
112 	if ((bp->b_flags & (B_SHADOW | B_PAGEIO)) && (npages == 1) &&
113 	    kpm_enable && bp_mapin_kpm_enable) {
114 		if (bp->b_flags & B_SHADOW)
115 			pp = *bp->b_shadow;
116 		else
117 			pp = bp->b_pages;
118 		kaddr = hat_kpm_mapin(pp, NULL);
119 		bp->b_un.b_addr = kaddr + off;
120 		bp->b_flags |= B_REMAPPED;
121 		return (bp->b_un.b_addr);
122 	}
123 
124 	/*
125 	 * Allocate kernel virtual space for remapping.
126 	 */
127 	color = bp_color(bp);
128 	ASSERT(color < bp_align);
129 
130 	if (bp_map_arena != NULL) {
131 		kaddr = (caddr_t)vmem_alloc(bp_map_arena,
132 		    P2ROUNDUP(color + size, bp_align), flag);
133 		if (kaddr == NULL)
134 			return (NULL);
135 		kaddr += color;
136 	} else {
137 		kaddr = vmem_xalloc(heap_arena, size, bp_align, color,
138 		    0, NULL, NULL, flag);
139 		if (kaddr == NULL)
140 			return (NULL);
141 	}
142 
143 	ASSERT(P2PHASE((uintptr_t)kaddr, bp_align) == color);
144 
145 	/*
146 	 * Map bp into the virtual space we just allocated.
147 	 */
148 	if (bp->b_flags & B_PAGEIO) {
149 		pp = bp->b_pages;
150 		pplist = NULL;
151 	} else if (bp->b_flags & B_SHADOW) {
152 		pp = NULL;
153 		pplist = bp->b_shadow;
154 	} else {
155 		pp = NULL;
156 		pplist = NULL;
157 		if (bp->b_proc == NULL || (as = bp->b_proc->p_as) == NULL)
158 			as = &kas;
159 	}
160 
161 	bp->b_flags |= B_REMAPPED;
162 	bp->b_un.b_addr = kaddr + off;
163 
164 	while (npages-- != 0) {
165 		if (pp) {
166 			pfnum = pp->p_pagenum;
167 			pp = pp->p_next;
168 		} else if (pplist == NULL) {
169 			pfnum = hat_getpfnum(as->a_hat,
170 			    (caddr_t)((uintptr_t)addr & MMU_PAGEMASK));
171 			if (pfnum == PFN_INVALID)
172 				panic("bp_mapin_common: hat_getpfnum for"
173 				    " addr %p failed\n", (void *)addr);
174 			addr += PAGESIZE;
175 		} else {
176 			pfnum = (*pplist)->p_pagenum;
177 			pplist++;
178 		}
179 
180 		hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum,
181 		    bp_devload_flags, HAT_LOAD_LOCK);
182 
183 		kaddr += PAGESIZE;
184 	}
185 	return (bp->b_un.b_addr);
186 }
187 
188 /*
189  * Convert bp for pageio/physio to a kernel addressable location.
190  */
191 void
192 bp_mapin(struct buf *bp)
193 {
194 	(void) bp_mapin_common(bp, VM_SLEEP);
195 }
196 
197 /*
198  * Release all the resources associated with a previous bp_mapin() call.
199  */
200 void
201 bp_mapout(struct buf *bp)
202 {
203 	caddr_t		addr;
204 	uintptr_t	off;
205 	uintptr_t	base;
206 	uintptr_t	color;
207 	size_t		size;
208 	pgcnt_t		npages;
209 	page_t		*pp;
210 
211 	if ((bp->b_flags & B_REMAPPED) == 0)
212 		return;
213 
214 	addr = bp->b_un.b_addr;
215 	off = (uintptr_t)addr & PAGEOFFSET;
216 	size = P2ROUNDUP(bp->b_bcount + off, PAGESIZE);
217 	npages = btop(size);
218 
219 	bp->b_un.b_addr = (caddr_t)off;		/* debugging aid */
220 
221 	if ((bp->b_flags & (B_SHADOW | B_PAGEIO)) && (npages == 1) &&
222 	    kpm_enable && bp_mapin_kpm_enable) {
223 		if (bp->b_flags & B_SHADOW)
224 			pp = *bp->b_shadow;
225 		else
226 			pp = bp->b_pages;
227 		addr = (caddr_t)((uintptr_t)addr & MMU_PAGEMASK);
228 		hat_kpm_mapout(pp, NULL, addr);
229 		bp->b_flags &= ~B_REMAPPED;
230 		return;
231 	}
232 
233 	base = (uintptr_t)addr & MMU_PAGEMASK;
234 	BP_FLUSH(base, size);
235 	hat_unload(kas.a_hat, (void *)base, size,
236 	    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
237 	if (bp_map_arena != NULL) {
238 		color = P2PHASE(base, bp_align);
239 		vmem_free(bp_map_arena, (void *)(base - color),
240 		    P2ROUNDUP(color + size, bp_align));
241 	} else
242 		vmem_free(heap_arena, (void *)base, size);
243 	bp->b_flags &= ~B_REMAPPED;
244 }
245 
246 /*
247  * copy data from a KVA into a buf_t which may not be mapped in. offset
248  * is relative to the buf_t only.
249  */
250 int
251 bp_copyout(void *driverbuf, struct buf *bp, offset_t offset, size_t size)
252 {
253 	return (bp_copy_common(BP_COPYOUT, bp, driverbuf, offset, size));
254 }
255 
256 /*
257  * copy data from a buf_t which may not be mapped in, into a KVA.. offset
258  * is relative to the buf_t only.
259  */
260 int
261 bp_copyin(struct buf *bp, void *driverbuf, offset_t offset, size_t size)
262 {
263 	return (bp_copy_common(BP_COPYIN, bp, driverbuf, offset, size));
264 }
265 
266 
267 #define	BP_COPY(dir, driverbuf, baddr, sz)	\
268 	(dir == BP_COPYIN) ? \
269 	bcopy(baddr, driverbuf, sz) :  bcopy(driverbuf, baddr, sz)
270 
271 static int
272 bp_copy_common(bp_copydir_t dir, struct buf *bp, void *driverbuf,
273     offset_t offset, size_t size)
274 {
275 	page_t **pplist;
276 	uintptr_t poff;
277 	uintptr_t voff;
278 	struct as *as;
279 	caddr_t kaddr;
280 	caddr_t addr;
281 	page_t *page;
282 	size_t psize;
283 	page_t *pp;
284 	pfn_t pfn;
285 
286 
287 	ASSERT((offset + size) <= bp->b_bcount);
288 
289 	/* if the buf_t already has a KVA, just do a bcopy */
290 	if (!(bp->b_flags & (B_PHYS | B_PAGEIO))) {
291 		BP_COPY(dir, driverbuf, bp->b_un.b_addr + offset, size);
292 		return (0);
293 	}
294 
295 	/* if we don't have kpm enabled, we need to do the slow path */
296 	if (!kpm_enable || bp_force_copy) {
297 		bp_mapin(bp);
298 		BP_COPY(dir, driverbuf, bp->b_un.b_addr + offset, size);
299 		bp_mapout(bp);
300 		return (0);
301 	}
302 
303 	/*
304 	 * kpm is enabled, and we need to map in the buf_t for the copy
305 	 */
306 
307 	/* setup pp, plist, and make sure 'as' is right */
308 	if (bp->b_flags & B_PAGEIO) {
309 		pp = bp->b_pages;
310 		pplist = NULL;
311 	} else if (bp->b_flags & B_SHADOW) {
312 		pp = NULL;
313 		pplist = bp->b_shadow;
314 	} else {
315 		pp = NULL;
316 		pplist = NULL;
317 		if (bp->b_proc == NULL || (as = bp->b_proc->p_as) == NULL) {
318 			as = &kas;
319 		}
320 	}
321 
322 	/*
323 	 * locals for the address, the offset into the first page, and the
324 	 * size of the first page we are going to copy.
325 	 */
326 	addr = (caddr_t)bp->b_un.b_addr;
327 	poff = (uintptr_t)addr & PAGEOFFSET;
328 	psize = MIN(PAGESIZE - poff, size);
329 
330 	/*
331 	 * we always start with a 0 offset into the driverbuf provided. The
332 	 * offset passed in only applies to the buf_t.
333 	 */
334 	voff = 0;
335 
336 	/* Loop until we've copied al the data */
337 	while (size > 0) {
338 
339 		/*
340 		 * for a pp or pplist, get the pfn, then go to the next page_t
341 		 * for the next time around the loop.
342 		 */
343 		if (pp) {
344 			page = pp;
345 			pp = pp->p_next;
346 		} else if (pplist != NULL) {
347 			page = (*pplist);
348 			pplist++;
349 
350 		/*
351 		 * We have a user VA. If we are going to copy this page, (e.g.
352 		 * the offset into the buf_t where we start to copy is
353 		 * within this page), get the pfn. Don't waste the cycles
354 		 * getting the pfn if we're not copying this page.
355 		 */
356 		} else if (offset < psize) {
357 			pfn = hat_getpfnum(as->a_hat,
358 			    (caddr_t)((uintptr_t)addr & PAGEMASK));
359 			if (pfn == PFN_INVALID) {
360 				return (-1);
361 			}
362 			page = page_numtopp_nolock(pfn);
363 			addr += psize - offset;
364 		} else {
365 			addr += psize;
366 		}
367 
368 		/*
369 		 * if we have an initial offset into the buf_t passed in,
370 		 * and it falls within the current page, account for it in
371 		 * the page size (how much we will copy) and the offset into the
372 		 * page (where we'll start copying from).
373 		 */
374 		if ((offset > 0) && (offset < psize)) {
375 			psize -= offset;
376 			poff += offset;
377 			offset = 0;
378 
379 		/*
380 		 * if we have an initial offset into the buf_t passed in,
381 		 * and it's not within the current page, skip this page.
382 		 * We don't have to worry about the first page offset and size
383 		 * anymore. psize will normally be PAGESIZE now unless we are
384 		 * on the last page.
385 		 */
386 		} else if (offset >= psize) {
387 			offset -= psize;
388 			psize = MIN(PAGESIZE, size);
389 			poff = 0;
390 			continue;
391 		}
392 
393 		/*
394 		 * get a kpm mapping to the page, them copy in/out of the
395 		 * page. update size left and offset into the driverbuf passed
396 		 * in for the next time around the loop.
397 		 */
398 		kaddr = hat_kpm_mapin(page, NULL) + poff;
399 		BP_COPY(dir, (void *)((uintptr_t)driverbuf + voff), kaddr,
400 		    psize);
401 		hat_kpm_mapout(page, NULL, kaddr - poff);
402 
403 		size -= psize;
404 		voff += psize;
405 
406 		poff = 0;
407 		psize = MIN(PAGESIZE, size);
408 	}
409 
410 	return (0);
411 }
412