xref: /illumos-gate/usr/src/uts/common/os/bp_map.c (revision 584b574a3b16c6772c8204ec1d1c957c56f22a87)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/sysmacros.h>
28 #include <sys/systm.h>
29 #include <sys/mman.h>
30 #include <sys/buf.h>
31 #include <sys/vmem.h>
32 #include <sys/cmn_err.h>
33 #include <sys/debug.h>
34 #include <sys/machparam.h>
35 #include <vm/page.h>
36 #include <vm/seg_kmem.h>
37 #include <vm/seg_kpm.h>
38 
39 #ifdef __sparc
40 #include <sys/cpu_module.h>
41 #define	BP_FLUSH(addr, size)	flush_instr_mem((void *)addr, size);
42 #else
43 #define	BP_FLUSH(addr, size)
44 #endif
45 
46 int bp_force_copy = 0;
47 typedef enum {
48 	BP_COPYIN	= 0,
49 	BP_COPYOUT	= 1
50 } bp_copydir_t;
51 static int bp_copy_common(bp_copydir_t dir, struct buf *bp, void *driverbuf,
52     offset_t offset, size_t size);
53 
54 static vmem_t *bp_map_arena;
55 static size_t bp_align;
56 static uint_t bp_devload_flags = PROT_READ | PROT_WRITE | HAT_NOSYNC;
57 int	bp_max_cache = 1 << 17;		/* 128K default; tunable */
58 int	bp_mapin_kpm_enable = 1;	/* enable default; tunable */
59 
60 static void *
bp_vmem_alloc(vmem_t * vmp,size_t size,int vmflag)61 bp_vmem_alloc(vmem_t *vmp, size_t size, int vmflag)
62 {
63 	return (vmem_xalloc(vmp, size, bp_align, 0, 0, NULL, NULL, vmflag));
64 }
65 
66 void
bp_init(size_t align,uint_t devload_flags)67 bp_init(size_t align, uint_t devload_flags)
68 {
69 	bp_align = MAX(align, PAGESIZE);
70 	bp_devload_flags |= devload_flags;
71 
72 	if (bp_align <= bp_max_cache)
73 		bp_map_arena = vmem_create("bp_map", NULL, 0, bp_align,
74 		    bp_vmem_alloc, vmem_free, heap_arena,
75 		    MIN(8 * bp_align, bp_max_cache), VM_SLEEP);
76 }
77 
78 /*
79  * common routine so can be called with/without VM_SLEEP
80  */
81 void *
bp_mapin_common(struct buf * bp,int flag)82 bp_mapin_common(struct buf *bp, int flag)
83 {
84 	struct as	*as;
85 	pfn_t		pfnum;
86 	page_t		*pp;
87 	page_t		**pplist;
88 	caddr_t		kaddr;
89 	caddr_t		addr;
90 	uintptr_t	off;
91 	size_t		size;
92 	pgcnt_t		npages;
93 	int		color;
94 
95 	as = NULL;
96 	/* return if already mapped in, no pageio/physio, or physio to kas */
97 	if ((bp->b_flags & B_REMAPPED) ||
98 	    !(bp->b_flags & (B_PAGEIO | B_PHYS)) ||
99 	    (((bp->b_flags & (B_PAGEIO | B_PHYS)) == B_PHYS) &&
100 	    ((bp->b_proc == NULL) || (bp->b_proc->p_as == &kas))))
101 		return (bp->b_un.b_addr);
102 
103 	ASSERT((bp->b_flags & (B_PAGEIO | B_PHYS)) != (B_PAGEIO | B_PHYS));
104 
105 	addr = (caddr_t)bp->b_un.b_addr;
106 	off = (uintptr_t)addr & PAGEOFFSET;
107 	size = P2ROUNDUP(bp->b_bcount + off, PAGESIZE);
108 	npages = btop(size);
109 
110 	/* Fastpath single page IO to locked memory by using kpm. */
111 	if ((bp->b_flags & (B_SHADOW | B_PAGEIO)) && (npages == 1) &&
112 	    kpm_enable && bp_mapin_kpm_enable) {
113 		if (bp->b_flags & B_SHADOW)
114 			pp = *bp->b_shadow;
115 		else
116 			pp = bp->b_pages;
117 		kaddr = hat_kpm_mapin(pp, NULL);
118 		bp->b_un.b_addr = kaddr + off;
119 		bp->b_flags |= B_REMAPPED;
120 		return (bp->b_un.b_addr);
121 	}
122 
123 	/*
124 	 * Allocate kernel virtual space for remapping.
125 	 */
126 	color = bp_color(bp);
127 	ASSERT(color < bp_align);
128 
129 	if (bp_map_arena != NULL) {
130 		kaddr = (caddr_t)vmem_alloc(bp_map_arena,
131 		    P2ROUNDUP(color + size, bp_align), flag);
132 		if (kaddr == NULL)
133 			return (NULL);
134 		kaddr += color;
135 	} else {
136 		kaddr = vmem_xalloc(heap_arena, size, bp_align, color,
137 		    0, NULL, NULL, flag);
138 		if (kaddr == NULL)
139 			return (NULL);
140 	}
141 
142 	ASSERT(P2PHASE((uintptr_t)kaddr, bp_align) == color);
143 
144 	/*
145 	 * Map bp into the virtual space we just allocated.
146 	 */
147 	if (bp->b_flags & B_PAGEIO) {
148 		pp = bp->b_pages;
149 		pplist = NULL;
150 	} else if (bp->b_flags & B_SHADOW) {
151 		pp = NULL;
152 		pplist = bp->b_shadow;
153 	} else {
154 		pp = NULL;
155 		pplist = NULL;
156 		if (bp->b_proc == NULL || (as = bp->b_proc->p_as) == NULL)
157 			as = &kas;
158 	}
159 
160 	bp->b_flags |= B_REMAPPED;
161 	bp->b_un.b_addr = kaddr + off;
162 
163 	while (npages-- != 0) {
164 		if (pp) {
165 			pfnum = pp->p_pagenum;
166 			pp = pp->p_next;
167 		} else if (pplist == NULL) {
168 			pfnum = hat_getpfnum(as->a_hat,
169 			    (caddr_t)((uintptr_t)addr & MMU_PAGEMASK));
170 			if (pfnum == PFN_INVALID)
171 				panic("bp_mapin_common: hat_getpfnum for"
172 				    " addr %p failed\n", (void *)addr);
173 			addr += PAGESIZE;
174 		} else {
175 			pfnum = (*pplist)->p_pagenum;
176 			pplist++;
177 		}
178 
179 		hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum,
180 		    bp_devload_flags, HAT_LOAD_LOCK);
181 
182 		kaddr += PAGESIZE;
183 	}
184 	return (bp->b_un.b_addr);
185 }
186 
187 /*
188  * Convert bp for pageio/physio to a kernel addressable location.
189  */
190 void
bp_mapin(struct buf * bp)191 bp_mapin(struct buf *bp)
192 {
193 	(void) bp_mapin_common(bp, VM_SLEEP);
194 }
195 
196 /*
197  * Release all the resources associated with a previous bp_mapin() call.
198  */
199 void
bp_mapout(struct buf * bp)200 bp_mapout(struct buf *bp)
201 {
202 	caddr_t		addr;
203 	uintptr_t	off;
204 	uintptr_t	base;
205 	uintptr_t	color;
206 	size_t		size;
207 	pgcnt_t		npages;
208 	page_t		*pp;
209 
210 	if ((bp->b_flags & B_REMAPPED) == 0)
211 		return;
212 
213 	addr = bp->b_un.b_addr;
214 	off = (uintptr_t)addr & PAGEOFFSET;
215 	size = P2ROUNDUP(bp->b_bcount + off, PAGESIZE);
216 	npages = btop(size);
217 
218 	bp->b_un.b_addr = (caddr_t)off;		/* debugging aid */
219 
220 	if ((bp->b_flags & (B_SHADOW | B_PAGEIO)) && (npages == 1) &&
221 	    kpm_enable && bp_mapin_kpm_enable) {
222 		if (bp->b_flags & B_SHADOW)
223 			pp = *bp->b_shadow;
224 		else
225 			pp = bp->b_pages;
226 		addr = (caddr_t)((uintptr_t)addr & MMU_PAGEMASK);
227 		hat_kpm_mapout(pp, NULL, addr);
228 		bp->b_flags &= ~B_REMAPPED;
229 		return;
230 	}
231 
232 	base = (uintptr_t)addr & MMU_PAGEMASK;
233 	BP_FLUSH(base, size);
234 	hat_unload(kas.a_hat, (void *)base, size,
235 	    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
236 	if (bp_map_arena != NULL) {
237 		color = P2PHASE(base, bp_align);
238 		vmem_free(bp_map_arena, (void *)(base - color),
239 		    P2ROUNDUP(color + size, bp_align));
240 	} else
241 		vmem_free(heap_arena, (void *)base, size);
242 	bp->b_flags &= ~B_REMAPPED;
243 }
244 
245 /*
246  * copy data from a KVA into a buf_t which may not be mapped in. offset
247  * is relative to the buf_t only.
248  */
249 int
bp_copyout(void * driverbuf,struct buf * bp,offset_t offset,size_t size)250 bp_copyout(void *driverbuf, struct buf *bp, offset_t offset, size_t size)
251 {
252 	return (bp_copy_common(BP_COPYOUT, bp, driverbuf, offset, size));
253 }
254 
255 /*
256  * copy data from a buf_t which may not be mapped in, into a KVA.. offset
257  * is relative to the buf_t only.
258  */
259 int
bp_copyin(struct buf * bp,void * driverbuf,offset_t offset,size_t size)260 bp_copyin(struct buf *bp, void *driverbuf, offset_t offset, size_t size)
261 {
262 	return (bp_copy_common(BP_COPYIN, bp, driverbuf, offset, size));
263 }
264 
265 
266 #define	BP_COPY(dir, driverbuf, baddr, sz)	\
267 	(dir == BP_COPYIN) ? \
268 	bcopy(baddr, driverbuf, sz) :  bcopy(driverbuf, baddr, sz)
269 
270 static int
bp_copy_common(bp_copydir_t dir,struct buf * bp,void * driverbuf,offset_t offset,size_t size)271 bp_copy_common(bp_copydir_t dir, struct buf *bp, void *driverbuf,
272     offset_t offset, size_t size)
273 {
274 	page_t **pplist;
275 	uintptr_t poff;
276 	uintptr_t voff;
277 	struct as *as;
278 	caddr_t kaddr;
279 	caddr_t addr;
280 	page_t *page;
281 	size_t psize;
282 	page_t *pp;
283 	pfn_t pfn;
284 
285 	ASSERT((offset + size) <= bp->b_bcount);
286 	as = NULL;
287 
288 	/* if the buf_t already has a KVA, just do a bcopy */
289 	if (!(bp->b_flags & (B_PHYS | B_PAGEIO))) {
290 		BP_COPY(dir, driverbuf, bp->b_un.b_addr + offset, size);
291 		return (0);
292 	}
293 
294 	/* if we don't have kpm enabled, we need to do the slow path */
295 	if (!kpm_enable || bp_force_copy) {
296 		bp_mapin(bp);
297 		BP_COPY(dir, driverbuf, bp->b_un.b_addr + offset, size);
298 		bp_mapout(bp);
299 		return (0);
300 	}
301 
302 	/*
303 	 * kpm is enabled, and we need to map in the buf_t for the copy
304 	 */
305 
306 	/* setup pp, plist, and make sure 'as' is right */
307 	if (bp->b_flags & B_PAGEIO) {
308 		pp = bp->b_pages;
309 		pplist = NULL;
310 	} else if (bp->b_flags & B_SHADOW) {
311 		pp = NULL;
312 		pplist = bp->b_shadow;
313 	} else {
314 		pp = NULL;
315 		pplist = NULL;
316 		if (bp->b_proc == NULL || (as = bp->b_proc->p_as) == NULL) {
317 			as = &kas;
318 		}
319 	}
320 
321 	/*
322 	 * locals for the address, the offset into the first page, and the
323 	 * size of the first page we are going to copy.
324 	 */
325 	addr = (caddr_t)bp->b_un.b_addr;
326 	poff = (uintptr_t)addr & PAGEOFFSET;
327 	psize = MIN(PAGESIZE - poff, size);
328 
329 	/*
330 	 * we always start with a 0 offset into the driverbuf provided. The
331 	 * offset passed in only applies to the buf_t.
332 	 */
333 	voff = 0;
334 
335 	/* Loop until we've copied al the data */
336 	while (size > 0) {
337 
338 		/*
339 		 * for a pp or pplist, get the pfn, then go to the next page_t
340 		 * for the next time around the loop.
341 		 */
342 		if (pp) {
343 			page = pp;
344 			pp = pp->p_next;
345 		} else if (pplist != NULL) {
346 			page = (*pplist);
347 			pplist++;
348 
349 		/*
350 		 * We have a user VA. If we are going to copy this page, (e.g.
351 		 * the offset into the buf_t where we start to copy is
352 		 * within this page), get the pfn. Don't waste the cycles
353 		 * getting the pfn if we're not copying this page.
354 		 */
355 		} else if (offset < psize) {
356 			pfn = hat_getpfnum(as->a_hat,
357 			    (caddr_t)((uintptr_t)addr & PAGEMASK));
358 			if (pfn == PFN_INVALID) {
359 				return (-1);
360 			}
361 			page = page_numtopp_nolock(pfn);
362 			addr += psize - offset;
363 		} else {
364 			addr += psize;
365 		}
366 
367 		/*
368 		 * if we have an initial offset into the buf_t passed in,
369 		 * and it falls within the current page, account for it in
370 		 * the page size (how much we will copy) and the offset into the
371 		 * page (where we'll start copying from).
372 		 */
373 		if ((offset > 0) && (offset < psize)) {
374 			psize -= offset;
375 			poff += offset;
376 			offset = 0;
377 
378 		/*
379 		 * if we have an initial offset into the buf_t passed in,
380 		 * and it's not within the current page, skip this page.
381 		 * We don't have to worry about the first page offset and size
382 		 * anymore. psize will normally be PAGESIZE now unless we are
383 		 * on the last page.
384 		 */
385 		} else if (offset >= psize) {
386 			offset -= psize;
387 			psize = MIN(PAGESIZE, size);
388 			poff = 0;
389 			continue;
390 		}
391 
392 		/*
393 		 * get a kpm mapping to the page, them copy in/out of the
394 		 * page. update size left and offset into the driverbuf passed
395 		 * in for the next time around the loop.
396 		 */
397 		kaddr = hat_kpm_mapin(page, NULL) + poff;
398 		BP_COPY(dir, (void *)((uintptr_t)driverbuf + voff), kaddr,
399 		    psize);
400 		hat_kpm_mapout(page, NULL, kaddr - poff);
401 
402 		size -= psize;
403 		voff += psize;
404 
405 		poff = 0;
406 		psize = MIN(PAGESIZE, size);
407 	}
408 
409 	return (0);
410 }
411