xref: /illumos-gate/usr/src/uts/common/os/bp_map.c (revision 584b574a3b16c6772c8204ec1d1c957c56f22a87)
1  /*
2   * CDDL HEADER START
3   *
4   * The contents of this file are subject to the terms of the
5   * Common Development and Distribution License (the "License").
6   * You may not use this file except in compliance with the License.
7   *
8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9   * or http://www.opensolaris.org/os/licensing.
10   * See the License for the specific language governing permissions
11   * and limitations under the License.
12   *
13   * When distributing Covered Code, include this CDDL HEADER in each
14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15   * If applicable, add the following below this CDDL HEADER, with the
16   * fields enclosed by brackets "[]" replaced with your own identifying
17   * information: Portions Copyright [yyyy] [name of copyright owner]
18   *
19   * CDDL HEADER END
20   */
21  /*
22   * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23   * Use is subject to license terms.
24   */
25  
26  #include <sys/types.h>
27  #include <sys/sysmacros.h>
28  #include <sys/systm.h>
29  #include <sys/mman.h>
30  #include <sys/buf.h>
31  #include <sys/vmem.h>
32  #include <sys/cmn_err.h>
33  #include <sys/debug.h>
34  #include <sys/machparam.h>
35  #include <vm/page.h>
36  #include <vm/seg_kmem.h>
37  #include <vm/seg_kpm.h>
38  
39  #ifdef __sparc
40  #include <sys/cpu_module.h>
41  #define	BP_FLUSH(addr, size)	flush_instr_mem((void *)addr, size);
42  #else
43  #define	BP_FLUSH(addr, size)
44  #endif
45  
46  int bp_force_copy = 0;
47  typedef enum {
48  	BP_COPYIN	= 0,
49  	BP_COPYOUT	= 1
50  } bp_copydir_t;
51  static int bp_copy_common(bp_copydir_t dir, struct buf *bp, void *driverbuf,
52      offset_t offset, size_t size);
53  
54  static vmem_t *bp_map_arena;
55  static size_t bp_align;
56  static uint_t bp_devload_flags = PROT_READ | PROT_WRITE | HAT_NOSYNC;
57  int	bp_max_cache = 1 << 17;		/* 128K default; tunable */
58  int	bp_mapin_kpm_enable = 1;	/* enable default; tunable */
59  
60  static void *
bp_vmem_alloc(vmem_t * vmp,size_t size,int vmflag)61  bp_vmem_alloc(vmem_t *vmp, size_t size, int vmflag)
62  {
63  	return (vmem_xalloc(vmp, size, bp_align, 0, 0, NULL, NULL, vmflag));
64  }
65  
66  void
bp_init(size_t align,uint_t devload_flags)67  bp_init(size_t align, uint_t devload_flags)
68  {
69  	bp_align = MAX(align, PAGESIZE);
70  	bp_devload_flags |= devload_flags;
71  
72  	if (bp_align <= bp_max_cache)
73  		bp_map_arena = vmem_create("bp_map", NULL, 0, bp_align,
74  		    bp_vmem_alloc, vmem_free, heap_arena,
75  		    MIN(8 * bp_align, bp_max_cache), VM_SLEEP);
76  }
77  
78  /*
79   * common routine so can be called with/without VM_SLEEP
80   */
81  void *
bp_mapin_common(struct buf * bp,int flag)82  bp_mapin_common(struct buf *bp, int flag)
83  {
84  	struct as	*as;
85  	pfn_t		pfnum;
86  	page_t		*pp;
87  	page_t		**pplist;
88  	caddr_t		kaddr;
89  	caddr_t		addr;
90  	uintptr_t	off;
91  	size_t		size;
92  	pgcnt_t		npages;
93  	int		color;
94  
95  	as = NULL;
96  	/* return if already mapped in, no pageio/physio, or physio to kas */
97  	if ((bp->b_flags & B_REMAPPED) ||
98  	    !(bp->b_flags & (B_PAGEIO | B_PHYS)) ||
99  	    (((bp->b_flags & (B_PAGEIO | B_PHYS)) == B_PHYS) &&
100  	    ((bp->b_proc == NULL) || (bp->b_proc->p_as == &kas))))
101  		return (bp->b_un.b_addr);
102  
103  	ASSERT((bp->b_flags & (B_PAGEIO | B_PHYS)) != (B_PAGEIO | B_PHYS));
104  
105  	addr = (caddr_t)bp->b_un.b_addr;
106  	off = (uintptr_t)addr & PAGEOFFSET;
107  	size = P2ROUNDUP(bp->b_bcount + off, PAGESIZE);
108  	npages = btop(size);
109  
110  	/* Fastpath single page IO to locked memory by using kpm. */
111  	if ((bp->b_flags & (B_SHADOW | B_PAGEIO)) && (npages == 1) &&
112  	    kpm_enable && bp_mapin_kpm_enable) {
113  		if (bp->b_flags & B_SHADOW)
114  			pp = *bp->b_shadow;
115  		else
116  			pp = bp->b_pages;
117  		kaddr = hat_kpm_mapin(pp, NULL);
118  		bp->b_un.b_addr = kaddr + off;
119  		bp->b_flags |= B_REMAPPED;
120  		return (bp->b_un.b_addr);
121  	}
122  
123  	/*
124  	 * Allocate kernel virtual space for remapping.
125  	 */
126  	color = bp_color(bp);
127  	ASSERT(color < bp_align);
128  
129  	if (bp_map_arena != NULL) {
130  		kaddr = (caddr_t)vmem_alloc(bp_map_arena,
131  		    P2ROUNDUP(color + size, bp_align), flag);
132  		if (kaddr == NULL)
133  			return (NULL);
134  		kaddr += color;
135  	} else {
136  		kaddr = vmem_xalloc(heap_arena, size, bp_align, color,
137  		    0, NULL, NULL, flag);
138  		if (kaddr == NULL)
139  			return (NULL);
140  	}
141  
142  	ASSERT(P2PHASE((uintptr_t)kaddr, bp_align) == color);
143  
144  	/*
145  	 * Map bp into the virtual space we just allocated.
146  	 */
147  	if (bp->b_flags & B_PAGEIO) {
148  		pp = bp->b_pages;
149  		pplist = NULL;
150  	} else if (bp->b_flags & B_SHADOW) {
151  		pp = NULL;
152  		pplist = bp->b_shadow;
153  	} else {
154  		pp = NULL;
155  		pplist = NULL;
156  		if (bp->b_proc == NULL || (as = bp->b_proc->p_as) == NULL)
157  			as = &kas;
158  	}
159  
160  	bp->b_flags |= B_REMAPPED;
161  	bp->b_un.b_addr = kaddr + off;
162  
163  	while (npages-- != 0) {
164  		if (pp) {
165  			pfnum = pp->p_pagenum;
166  			pp = pp->p_next;
167  		} else if (pplist == NULL) {
168  			pfnum = hat_getpfnum(as->a_hat,
169  			    (caddr_t)((uintptr_t)addr & MMU_PAGEMASK));
170  			if (pfnum == PFN_INVALID)
171  				panic("bp_mapin_common: hat_getpfnum for"
172  				    " addr %p failed\n", (void *)addr);
173  			addr += PAGESIZE;
174  		} else {
175  			pfnum = (*pplist)->p_pagenum;
176  			pplist++;
177  		}
178  
179  		hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum,
180  		    bp_devload_flags, HAT_LOAD_LOCK);
181  
182  		kaddr += PAGESIZE;
183  	}
184  	return (bp->b_un.b_addr);
185  }
186  
187  /*
188   * Convert bp for pageio/physio to a kernel addressable location.
189   */
190  void
bp_mapin(struct buf * bp)191  bp_mapin(struct buf *bp)
192  {
193  	(void) bp_mapin_common(bp, VM_SLEEP);
194  }
195  
196  /*
197   * Release all the resources associated with a previous bp_mapin() call.
198   */
199  void
bp_mapout(struct buf * bp)200  bp_mapout(struct buf *bp)
201  {
202  	caddr_t		addr;
203  	uintptr_t	off;
204  	uintptr_t	base;
205  	uintptr_t	color;
206  	size_t		size;
207  	pgcnt_t		npages;
208  	page_t		*pp;
209  
210  	if ((bp->b_flags & B_REMAPPED) == 0)
211  		return;
212  
213  	addr = bp->b_un.b_addr;
214  	off = (uintptr_t)addr & PAGEOFFSET;
215  	size = P2ROUNDUP(bp->b_bcount + off, PAGESIZE);
216  	npages = btop(size);
217  
218  	bp->b_un.b_addr = (caddr_t)off;		/* debugging aid */
219  
220  	if ((bp->b_flags & (B_SHADOW | B_PAGEIO)) && (npages == 1) &&
221  	    kpm_enable && bp_mapin_kpm_enable) {
222  		if (bp->b_flags & B_SHADOW)
223  			pp = *bp->b_shadow;
224  		else
225  			pp = bp->b_pages;
226  		addr = (caddr_t)((uintptr_t)addr & MMU_PAGEMASK);
227  		hat_kpm_mapout(pp, NULL, addr);
228  		bp->b_flags &= ~B_REMAPPED;
229  		return;
230  	}
231  
232  	base = (uintptr_t)addr & MMU_PAGEMASK;
233  	BP_FLUSH(base, size);
234  	hat_unload(kas.a_hat, (void *)base, size,
235  	    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
236  	if (bp_map_arena != NULL) {
237  		color = P2PHASE(base, bp_align);
238  		vmem_free(bp_map_arena, (void *)(base - color),
239  		    P2ROUNDUP(color + size, bp_align));
240  	} else
241  		vmem_free(heap_arena, (void *)base, size);
242  	bp->b_flags &= ~B_REMAPPED;
243  }
244  
245  /*
246   * copy data from a KVA into a buf_t which may not be mapped in. offset
247   * is relative to the buf_t only.
248   */
249  int
bp_copyout(void * driverbuf,struct buf * bp,offset_t offset,size_t size)250  bp_copyout(void *driverbuf, struct buf *bp, offset_t offset, size_t size)
251  {
252  	return (bp_copy_common(BP_COPYOUT, bp, driverbuf, offset, size));
253  }
254  
255  /*
256   * copy data from a buf_t which may not be mapped in, into a KVA.. offset
257   * is relative to the buf_t only.
258   */
259  int
bp_copyin(struct buf * bp,void * driverbuf,offset_t offset,size_t size)260  bp_copyin(struct buf *bp, void *driverbuf, offset_t offset, size_t size)
261  {
262  	return (bp_copy_common(BP_COPYIN, bp, driverbuf, offset, size));
263  }
264  
265  
266  #define	BP_COPY(dir, driverbuf, baddr, sz)	\
267  	(dir == BP_COPYIN) ? \
268  	bcopy(baddr, driverbuf, sz) :  bcopy(driverbuf, baddr, sz)
269  
270  static int
bp_copy_common(bp_copydir_t dir,struct buf * bp,void * driverbuf,offset_t offset,size_t size)271  bp_copy_common(bp_copydir_t dir, struct buf *bp, void *driverbuf,
272      offset_t offset, size_t size)
273  {
274  	page_t **pplist;
275  	uintptr_t poff;
276  	uintptr_t voff;
277  	struct as *as;
278  	caddr_t kaddr;
279  	caddr_t addr;
280  	page_t *page;
281  	size_t psize;
282  	page_t *pp;
283  	pfn_t pfn;
284  
285  	ASSERT((offset + size) <= bp->b_bcount);
286  	as = NULL;
287  
288  	/* if the buf_t already has a KVA, just do a bcopy */
289  	if (!(bp->b_flags & (B_PHYS | B_PAGEIO))) {
290  		BP_COPY(dir, driverbuf, bp->b_un.b_addr + offset, size);
291  		return (0);
292  	}
293  
294  	/* if we don't have kpm enabled, we need to do the slow path */
295  	if (!kpm_enable || bp_force_copy) {
296  		bp_mapin(bp);
297  		BP_COPY(dir, driverbuf, bp->b_un.b_addr + offset, size);
298  		bp_mapout(bp);
299  		return (0);
300  	}
301  
302  	/*
303  	 * kpm is enabled, and we need to map in the buf_t for the copy
304  	 */
305  
306  	/* setup pp, plist, and make sure 'as' is right */
307  	if (bp->b_flags & B_PAGEIO) {
308  		pp = bp->b_pages;
309  		pplist = NULL;
310  	} else if (bp->b_flags & B_SHADOW) {
311  		pp = NULL;
312  		pplist = bp->b_shadow;
313  	} else {
314  		pp = NULL;
315  		pplist = NULL;
316  		if (bp->b_proc == NULL || (as = bp->b_proc->p_as) == NULL) {
317  			as = &kas;
318  		}
319  	}
320  
321  	/*
322  	 * locals for the address, the offset into the first page, and the
323  	 * size of the first page we are going to copy.
324  	 */
325  	addr = (caddr_t)bp->b_un.b_addr;
326  	poff = (uintptr_t)addr & PAGEOFFSET;
327  	psize = MIN(PAGESIZE - poff, size);
328  
329  	/*
330  	 * we always start with a 0 offset into the driverbuf provided. The
331  	 * offset passed in only applies to the buf_t.
332  	 */
333  	voff = 0;
334  
335  	/* Loop until we've copied al the data */
336  	while (size > 0) {
337  
338  		/*
339  		 * for a pp or pplist, get the pfn, then go to the next page_t
340  		 * for the next time around the loop.
341  		 */
342  		if (pp) {
343  			page = pp;
344  			pp = pp->p_next;
345  		} else if (pplist != NULL) {
346  			page = (*pplist);
347  			pplist++;
348  
349  		/*
350  		 * We have a user VA. If we are going to copy this page, (e.g.
351  		 * the offset into the buf_t where we start to copy is
352  		 * within this page), get the pfn. Don't waste the cycles
353  		 * getting the pfn if we're not copying this page.
354  		 */
355  		} else if (offset < psize) {
356  			pfn = hat_getpfnum(as->a_hat,
357  			    (caddr_t)((uintptr_t)addr & PAGEMASK));
358  			if (pfn == PFN_INVALID) {
359  				return (-1);
360  			}
361  			page = page_numtopp_nolock(pfn);
362  			addr += psize - offset;
363  		} else {
364  			addr += psize;
365  		}
366  
367  		/*
368  		 * if we have an initial offset into the buf_t passed in,
369  		 * and it falls within the current page, account for it in
370  		 * the page size (how much we will copy) and the offset into the
371  		 * page (where we'll start copying from).
372  		 */
373  		if ((offset > 0) && (offset < psize)) {
374  			psize -= offset;
375  			poff += offset;
376  			offset = 0;
377  
378  		/*
379  		 * if we have an initial offset into the buf_t passed in,
380  		 * and it's not within the current page, skip this page.
381  		 * We don't have to worry about the first page offset and size
382  		 * anymore. psize will normally be PAGESIZE now unless we are
383  		 * on the last page.
384  		 */
385  		} else if (offset >= psize) {
386  			offset -= psize;
387  			psize = MIN(PAGESIZE, size);
388  			poff = 0;
389  			continue;
390  		}
391  
392  		/*
393  		 * get a kpm mapping to the page, them copy in/out of the
394  		 * page. update size left and offset into the driverbuf passed
395  		 * in for the next time around the loop.
396  		 */
397  		kaddr = hat_kpm_mapin(page, NULL) + poff;
398  		BP_COPY(dir, (void *)((uintptr_t)driverbuf + voff), kaddr,
399  		    psize);
400  		hat_kpm_mapout(page, NULL, kaddr - poff);
401  
402  		size -= psize;
403  		voff += psize;
404  
405  		poff = 0;
406  		psize = MIN(PAGESIZE, size);
407  	}
408  
409  	return (0);
410  }
411