1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <sys/sysmacros.h>
28 #include <sys/systm.h>
29 #include <sys/mman.h>
30 #include <sys/buf.h>
31 #include <sys/vmem.h>
32 #include <sys/cmn_err.h>
33 #include <sys/debug.h>
34 #include <sys/machparam.h>
35 #include <vm/page.h>
36 #include <vm/seg_kmem.h>
37 #include <vm/seg_kpm.h>
38
39 #ifdef __sparc
40 #include <sys/cpu_module.h>
41 #define BP_FLUSH(addr, size) flush_instr_mem((void *)addr, size);
42 #else
43 #define BP_FLUSH(addr, size)
44 #endif
45
46 int bp_force_copy = 0;
47 typedef enum {
48 BP_COPYIN = 0,
49 BP_COPYOUT = 1
50 } bp_copydir_t;
51 static int bp_copy_common(bp_copydir_t dir, struct buf *bp, void *driverbuf,
52 offset_t offset, size_t size);
53
54 static vmem_t *bp_map_arena;
55 static size_t bp_align;
56 static uint_t bp_devload_flags = PROT_READ | PROT_WRITE | HAT_NOSYNC;
57 int bp_max_cache = 1 << 17; /* 128K default; tunable */
58 int bp_mapin_kpm_enable = 1; /* enable default; tunable */
59
60 static void *
bp_vmem_alloc(vmem_t * vmp,size_t size,int vmflag)61 bp_vmem_alloc(vmem_t *vmp, size_t size, int vmflag)
62 {
63 return (vmem_xalloc(vmp, size, bp_align, 0, 0, NULL, NULL, vmflag));
64 }
65
66 void
bp_init(size_t align,uint_t devload_flags)67 bp_init(size_t align, uint_t devload_flags)
68 {
69 bp_align = MAX(align, PAGESIZE);
70 bp_devload_flags |= devload_flags;
71
72 if (bp_align <= bp_max_cache)
73 bp_map_arena = vmem_create("bp_map", NULL, 0, bp_align,
74 bp_vmem_alloc, vmem_free, heap_arena,
75 MIN(8 * bp_align, bp_max_cache), VM_SLEEP);
76 }
77
78 /*
79 * common routine so can be called with/without VM_SLEEP
80 */
81 void *
bp_mapin_common(struct buf * bp,int flag)82 bp_mapin_common(struct buf *bp, int flag)
83 {
84 struct as *as;
85 pfn_t pfnum;
86 page_t *pp;
87 page_t **pplist;
88 caddr_t kaddr;
89 caddr_t addr;
90 uintptr_t off;
91 size_t size;
92 pgcnt_t npages;
93 int color;
94
95 as = NULL;
96 /* return if already mapped in, no pageio/physio, or physio to kas */
97 if ((bp->b_flags & B_REMAPPED) ||
98 !(bp->b_flags & (B_PAGEIO | B_PHYS)) ||
99 (((bp->b_flags & (B_PAGEIO | B_PHYS)) == B_PHYS) &&
100 ((bp->b_proc == NULL) || (bp->b_proc->p_as == &kas))))
101 return (bp->b_un.b_addr);
102
103 ASSERT((bp->b_flags & (B_PAGEIO | B_PHYS)) != (B_PAGEIO | B_PHYS));
104
105 addr = (caddr_t)bp->b_un.b_addr;
106 off = (uintptr_t)addr & PAGEOFFSET;
107 size = P2ROUNDUP(bp->b_bcount + off, PAGESIZE);
108 npages = btop(size);
109
110 /* Fastpath single page IO to locked memory by using kpm. */
111 if ((bp->b_flags & (B_SHADOW | B_PAGEIO)) && (npages == 1) &&
112 kpm_enable && bp_mapin_kpm_enable) {
113 if (bp->b_flags & B_SHADOW)
114 pp = *bp->b_shadow;
115 else
116 pp = bp->b_pages;
117 kaddr = hat_kpm_mapin(pp, NULL);
118 bp->b_un.b_addr = kaddr + off;
119 bp->b_flags |= B_REMAPPED;
120 return (bp->b_un.b_addr);
121 }
122
123 /*
124 * Allocate kernel virtual space for remapping.
125 */
126 color = bp_color(bp);
127 ASSERT(color < bp_align);
128
129 if (bp_map_arena != NULL) {
130 kaddr = (caddr_t)vmem_alloc(bp_map_arena,
131 P2ROUNDUP(color + size, bp_align), flag);
132 if (kaddr == NULL)
133 return (NULL);
134 kaddr += color;
135 } else {
136 kaddr = vmem_xalloc(heap_arena, size, bp_align, color,
137 0, NULL, NULL, flag);
138 if (kaddr == NULL)
139 return (NULL);
140 }
141
142 ASSERT(P2PHASE((uintptr_t)kaddr, bp_align) == color);
143
144 /*
145 * Map bp into the virtual space we just allocated.
146 */
147 if (bp->b_flags & B_PAGEIO) {
148 pp = bp->b_pages;
149 pplist = NULL;
150 } else if (bp->b_flags & B_SHADOW) {
151 pp = NULL;
152 pplist = bp->b_shadow;
153 } else {
154 pp = NULL;
155 pplist = NULL;
156 if (bp->b_proc == NULL || (as = bp->b_proc->p_as) == NULL)
157 as = &kas;
158 }
159
160 bp->b_flags |= B_REMAPPED;
161 bp->b_un.b_addr = kaddr + off;
162
163 while (npages-- != 0) {
164 if (pp) {
165 pfnum = pp->p_pagenum;
166 pp = pp->p_next;
167 } else if (pplist == NULL) {
168 pfnum = hat_getpfnum(as->a_hat,
169 (caddr_t)((uintptr_t)addr & MMU_PAGEMASK));
170 if (pfnum == PFN_INVALID)
171 panic("bp_mapin_common: hat_getpfnum for"
172 " addr %p failed\n", (void *)addr);
173 addr += PAGESIZE;
174 } else {
175 pfnum = (*pplist)->p_pagenum;
176 pplist++;
177 }
178
179 hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum,
180 bp_devload_flags, HAT_LOAD_LOCK);
181
182 kaddr += PAGESIZE;
183 }
184 return (bp->b_un.b_addr);
185 }
186
187 /*
188 * Convert bp for pageio/physio to a kernel addressable location.
189 */
190 void
bp_mapin(struct buf * bp)191 bp_mapin(struct buf *bp)
192 {
193 (void) bp_mapin_common(bp, VM_SLEEP);
194 }
195
196 /*
197 * Release all the resources associated with a previous bp_mapin() call.
198 */
199 void
bp_mapout(struct buf * bp)200 bp_mapout(struct buf *bp)
201 {
202 caddr_t addr;
203 uintptr_t off;
204 uintptr_t base;
205 uintptr_t color;
206 size_t size;
207 pgcnt_t npages;
208 page_t *pp;
209
210 if ((bp->b_flags & B_REMAPPED) == 0)
211 return;
212
213 addr = bp->b_un.b_addr;
214 off = (uintptr_t)addr & PAGEOFFSET;
215 size = P2ROUNDUP(bp->b_bcount + off, PAGESIZE);
216 npages = btop(size);
217
218 bp->b_un.b_addr = (caddr_t)off; /* debugging aid */
219
220 if ((bp->b_flags & (B_SHADOW | B_PAGEIO)) && (npages == 1) &&
221 kpm_enable && bp_mapin_kpm_enable) {
222 if (bp->b_flags & B_SHADOW)
223 pp = *bp->b_shadow;
224 else
225 pp = bp->b_pages;
226 addr = (caddr_t)((uintptr_t)addr & MMU_PAGEMASK);
227 hat_kpm_mapout(pp, NULL, addr);
228 bp->b_flags &= ~B_REMAPPED;
229 return;
230 }
231
232 base = (uintptr_t)addr & MMU_PAGEMASK;
233 BP_FLUSH(base, size);
234 hat_unload(kas.a_hat, (void *)base, size,
235 HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
236 if (bp_map_arena != NULL) {
237 color = P2PHASE(base, bp_align);
238 vmem_free(bp_map_arena, (void *)(base - color),
239 P2ROUNDUP(color + size, bp_align));
240 } else
241 vmem_free(heap_arena, (void *)base, size);
242 bp->b_flags &= ~B_REMAPPED;
243 }
244
245 /*
246 * copy data from a KVA into a buf_t which may not be mapped in. offset
247 * is relative to the buf_t only.
248 */
249 int
bp_copyout(void * driverbuf,struct buf * bp,offset_t offset,size_t size)250 bp_copyout(void *driverbuf, struct buf *bp, offset_t offset, size_t size)
251 {
252 return (bp_copy_common(BP_COPYOUT, bp, driverbuf, offset, size));
253 }
254
255 /*
256 * copy data from a buf_t which may not be mapped in, into a KVA.. offset
257 * is relative to the buf_t only.
258 */
259 int
bp_copyin(struct buf * bp,void * driverbuf,offset_t offset,size_t size)260 bp_copyin(struct buf *bp, void *driverbuf, offset_t offset, size_t size)
261 {
262 return (bp_copy_common(BP_COPYIN, bp, driverbuf, offset, size));
263 }
264
265
266 #define BP_COPY(dir, driverbuf, baddr, sz) \
267 (dir == BP_COPYIN) ? \
268 bcopy(baddr, driverbuf, sz) : bcopy(driverbuf, baddr, sz)
269
270 static int
bp_copy_common(bp_copydir_t dir,struct buf * bp,void * driverbuf,offset_t offset,size_t size)271 bp_copy_common(bp_copydir_t dir, struct buf *bp, void *driverbuf,
272 offset_t offset, size_t size)
273 {
274 page_t **pplist;
275 uintptr_t poff;
276 uintptr_t voff;
277 struct as *as;
278 caddr_t kaddr;
279 caddr_t addr;
280 page_t *page;
281 size_t psize;
282 page_t *pp;
283 pfn_t pfn;
284
285 ASSERT((offset + size) <= bp->b_bcount);
286 as = NULL;
287
288 /* if the buf_t already has a KVA, just do a bcopy */
289 if (!(bp->b_flags & (B_PHYS | B_PAGEIO))) {
290 BP_COPY(dir, driverbuf, bp->b_un.b_addr + offset, size);
291 return (0);
292 }
293
294 /* if we don't have kpm enabled, we need to do the slow path */
295 if (!kpm_enable || bp_force_copy) {
296 bp_mapin(bp);
297 BP_COPY(dir, driverbuf, bp->b_un.b_addr + offset, size);
298 bp_mapout(bp);
299 return (0);
300 }
301
302 /*
303 * kpm is enabled, and we need to map in the buf_t for the copy
304 */
305
306 /* setup pp, plist, and make sure 'as' is right */
307 if (bp->b_flags & B_PAGEIO) {
308 pp = bp->b_pages;
309 pplist = NULL;
310 } else if (bp->b_flags & B_SHADOW) {
311 pp = NULL;
312 pplist = bp->b_shadow;
313 } else {
314 pp = NULL;
315 pplist = NULL;
316 if (bp->b_proc == NULL || (as = bp->b_proc->p_as) == NULL) {
317 as = &kas;
318 }
319 }
320
321 /*
322 * locals for the address, the offset into the first page, and the
323 * size of the first page we are going to copy.
324 */
325 addr = (caddr_t)bp->b_un.b_addr;
326 poff = (uintptr_t)addr & PAGEOFFSET;
327 psize = MIN(PAGESIZE - poff, size);
328
329 /*
330 * we always start with a 0 offset into the driverbuf provided. The
331 * offset passed in only applies to the buf_t.
332 */
333 voff = 0;
334
335 /* Loop until we've copied al the data */
336 while (size > 0) {
337
338 /*
339 * for a pp or pplist, get the pfn, then go to the next page_t
340 * for the next time around the loop.
341 */
342 if (pp) {
343 page = pp;
344 pp = pp->p_next;
345 } else if (pplist != NULL) {
346 page = (*pplist);
347 pplist++;
348
349 /*
350 * We have a user VA. If we are going to copy this page, (e.g.
351 * the offset into the buf_t where we start to copy is
352 * within this page), get the pfn. Don't waste the cycles
353 * getting the pfn if we're not copying this page.
354 */
355 } else if (offset < psize) {
356 pfn = hat_getpfnum(as->a_hat,
357 (caddr_t)((uintptr_t)addr & PAGEMASK));
358 if (pfn == PFN_INVALID) {
359 return (-1);
360 }
361 page = page_numtopp_nolock(pfn);
362 addr += psize - offset;
363 } else {
364 addr += psize;
365 }
366
367 /*
368 * if we have an initial offset into the buf_t passed in,
369 * and it falls within the current page, account for it in
370 * the page size (how much we will copy) and the offset into the
371 * page (where we'll start copying from).
372 */
373 if ((offset > 0) && (offset < psize)) {
374 psize -= offset;
375 poff += offset;
376 offset = 0;
377
378 /*
379 * if we have an initial offset into the buf_t passed in,
380 * and it's not within the current page, skip this page.
381 * We don't have to worry about the first page offset and size
382 * anymore. psize will normally be PAGESIZE now unless we are
383 * on the last page.
384 */
385 } else if (offset >= psize) {
386 offset -= psize;
387 psize = MIN(PAGESIZE, size);
388 poff = 0;
389 continue;
390 }
391
392 /*
393 * get a kpm mapping to the page, them copy in/out of the
394 * page. update size left and offset into the driverbuf passed
395 * in for the next time around the loop.
396 */
397 kaddr = hat_kpm_mapin(page, NULL) + poff;
398 BP_COPY(dir, (void *)((uintptr_t)driverbuf + voff), kaddr,
399 psize);
400 hat_kpm_mapout(page, NULL, kaddr - poff);
401
402 size -= psize;
403 voff += psize;
404
405 poff = 0;
406 psize = MIN(PAGESIZE, size);
407 }
408
409 return (0);
410 }
411