1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/sysmacros.h> 30 #include <sys/systm.h> 31 #include <sys/mman.h> 32 #include <sys/buf.h> 33 #include <sys/vmem.h> 34 #include <sys/cmn_err.h> 35 #include <sys/debug.h> 36 #include <sys/machparam.h> 37 #include <vm/page.h> 38 #include <vm/seg_kmem.h> 39 #include <vm/seg_kpm.h> 40 41 #ifdef __sparc 42 #include <sys/cpu_module.h> 43 #define BP_FLUSH(addr, size) flush_instr_mem((void *)addr, size); 44 #else 45 #define BP_FLUSH(addr, size) 46 #endif 47 48 int bp_force_copy = 0; 49 typedef enum { 50 BP_COPYIN = 0, 51 BP_COPYOUT = 1 52 } bp_copydir_t; 53 static int bp_copy_common(bp_copydir_t dir, struct buf *bp, void *driverbuf, 54 offset_t offset, size_t size); 55 56 static vmem_t *bp_map_arena; 57 static size_t bp_align; 58 static uint_t bp_devload_flags = PROT_READ | PROT_WRITE | HAT_NOSYNC; 59 int bp_max_cache = 1 << 17; /* 128K default; tunable */ 60 int bp_mapin_kpm_enable = 1; /* enable default; tunable */ 61 62 static void * 63 bp_vmem_alloc(vmem_t *vmp, size_t size, int vmflag) 64 { 65 return (vmem_xalloc(vmp, size, bp_align, 0, 0, NULL, NULL, vmflag)); 66 } 67 68 void 69 bp_init(size_t align, uint_t devload_flags) 70 { 71 bp_align = MAX(align, PAGESIZE); 72 bp_devload_flags |= devload_flags; 73 74 if (bp_align <= bp_max_cache) 75 bp_map_arena = vmem_create("bp_map", NULL, 0, bp_align, 76 bp_vmem_alloc, vmem_free, heap_arena, 77 MIN(8 * bp_align, bp_max_cache), VM_SLEEP); 78 } 79 80 /* 81 * common routine so can be called with/without VM_SLEEP 82 */ 83 void * 84 bp_mapin_common(struct buf *bp, int flag) 85 { 86 struct as *as; 87 pfn_t pfnum; 88 page_t *pp; 89 page_t **pplist; 90 caddr_t kaddr; 91 caddr_t addr; 92 uintptr_t off; 93 size_t size; 94 pgcnt_t npages; 95 int color; 96 97 /* return if already mapped in, no pageio/physio, or physio to kas */ 98 if ((bp->b_flags & B_REMAPPED) || 99 !(bp->b_flags & (B_PAGEIO | B_PHYS)) || 100 (((bp->b_flags & (B_PAGEIO | B_PHYS)) == B_PHYS) && 101 ((bp->b_proc == NULL) || (bp->b_proc->p_as == &kas)))) 102 return (bp->b_un.b_addr); 103 104 ASSERT((bp->b_flags & (B_PAGEIO | B_PHYS)) != (B_PAGEIO | B_PHYS)); 105 106 addr = (caddr_t)bp->b_un.b_addr; 107 off = (uintptr_t)addr & PAGEOFFSET; 108 size = P2ROUNDUP(bp->b_bcount + off, PAGESIZE); 109 npages = btop(size); 110 111 /* Fastpath single page IO to locked memory by using kpm. */ 112 if ((bp->b_flags & (B_SHADOW | B_PAGEIO)) && (npages == 1) && 113 kpm_enable && bp_mapin_kpm_enable) { 114 if (bp->b_flags & B_SHADOW) 115 pp = *bp->b_shadow; 116 else 117 pp = bp->b_pages; 118 kaddr = hat_kpm_mapin(pp, NULL); 119 bp->b_un.b_addr = kaddr + off; 120 bp->b_flags |= B_REMAPPED; 121 return (bp->b_un.b_addr); 122 } 123 124 /* 125 * Allocate kernel virtual space for remapping. 126 */ 127 color = bp_color(bp); 128 ASSERT(color < bp_align); 129 130 if (bp_map_arena != NULL) { 131 kaddr = (caddr_t)vmem_alloc(bp_map_arena, 132 P2ROUNDUP(color + size, bp_align), flag); 133 if (kaddr == NULL) 134 return (NULL); 135 kaddr += color; 136 } else { 137 kaddr = vmem_xalloc(heap_arena, size, bp_align, color, 138 0, NULL, NULL, flag); 139 if (kaddr == NULL) 140 return (NULL); 141 } 142 143 ASSERT(P2PHASE((uintptr_t)kaddr, bp_align) == color); 144 145 /* 146 * Map bp into the virtual space we just allocated. 147 */ 148 if (bp->b_flags & B_PAGEIO) { 149 pp = bp->b_pages; 150 pplist = NULL; 151 } else if (bp->b_flags & B_SHADOW) { 152 pp = NULL; 153 pplist = bp->b_shadow; 154 } else { 155 pp = NULL; 156 pplist = NULL; 157 if (bp->b_proc == NULL || (as = bp->b_proc->p_as) == NULL) 158 as = &kas; 159 } 160 161 bp->b_flags |= B_REMAPPED; 162 bp->b_un.b_addr = kaddr + off; 163 164 while (npages-- != 0) { 165 if (pp) { 166 pfnum = pp->p_pagenum; 167 pp = pp->p_next; 168 } else if (pplist == NULL) { 169 pfnum = hat_getpfnum(as->a_hat, 170 (caddr_t)((uintptr_t)addr & MMU_PAGEMASK)); 171 if (pfnum == PFN_INVALID) 172 panic("bp_mapin_common: hat_getpfnum for" 173 " addr %p failed\n", (void *)addr); 174 addr += PAGESIZE; 175 } else { 176 pfnum = (*pplist)->p_pagenum; 177 pplist++; 178 } 179 180 hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum, 181 bp_devload_flags, HAT_LOAD_LOCK); 182 183 kaddr += PAGESIZE; 184 } 185 return (bp->b_un.b_addr); 186 } 187 188 /* 189 * Convert bp for pageio/physio to a kernel addressable location. 190 */ 191 void 192 bp_mapin(struct buf *bp) 193 { 194 (void) bp_mapin_common(bp, VM_SLEEP); 195 } 196 197 /* 198 * Release all the resources associated with a previous bp_mapin() call. 199 */ 200 void 201 bp_mapout(struct buf *bp) 202 { 203 caddr_t addr; 204 uintptr_t off; 205 uintptr_t base; 206 uintptr_t color; 207 size_t size; 208 pgcnt_t npages; 209 page_t *pp; 210 211 if ((bp->b_flags & B_REMAPPED) == 0) 212 return; 213 214 addr = bp->b_un.b_addr; 215 off = (uintptr_t)addr & PAGEOFFSET; 216 size = P2ROUNDUP(bp->b_bcount + off, PAGESIZE); 217 npages = btop(size); 218 219 bp->b_un.b_addr = (caddr_t)off; /* debugging aid */ 220 221 if ((bp->b_flags & (B_SHADOW | B_PAGEIO)) && (npages == 1) && 222 kpm_enable && bp_mapin_kpm_enable) { 223 if (bp->b_flags & B_SHADOW) 224 pp = *bp->b_shadow; 225 else 226 pp = bp->b_pages; 227 addr = (caddr_t)((uintptr_t)addr & MMU_PAGEMASK); 228 hat_kpm_mapout(pp, NULL, addr); 229 bp->b_flags &= ~B_REMAPPED; 230 return; 231 } 232 233 base = (uintptr_t)addr & MMU_PAGEMASK; 234 BP_FLUSH(base, size); 235 hat_unload(kas.a_hat, (void *)base, size, 236 HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK); 237 if (bp_map_arena != NULL) { 238 color = P2PHASE(base, bp_align); 239 vmem_free(bp_map_arena, (void *)(base - color), 240 P2ROUNDUP(color + size, bp_align)); 241 } else 242 vmem_free(heap_arena, (void *)base, size); 243 bp->b_flags &= ~B_REMAPPED; 244 } 245 246 /* 247 * copy data from a KVA into a buf_t which may not be mapped in. offset 248 * is relative to the buf_t only. 249 */ 250 int 251 bp_copyout(void *driverbuf, struct buf *bp, offset_t offset, size_t size) 252 { 253 return (bp_copy_common(BP_COPYOUT, bp, driverbuf, offset, size)); 254 } 255 256 /* 257 * copy data from a buf_t which may not be mapped in, into a KVA.. offset 258 * is relative to the buf_t only. 259 */ 260 int 261 bp_copyin(struct buf *bp, void *driverbuf, offset_t offset, size_t size) 262 { 263 return (bp_copy_common(BP_COPYIN, bp, driverbuf, offset, size)); 264 } 265 266 267 #define BP_COPY(dir, driverbuf, baddr, sz) \ 268 (dir == BP_COPYIN) ? \ 269 bcopy(baddr, driverbuf, sz) : bcopy(driverbuf, baddr, sz) 270 271 static int 272 bp_copy_common(bp_copydir_t dir, struct buf *bp, void *driverbuf, 273 offset_t offset, size_t size) 274 { 275 page_t **pplist; 276 uintptr_t poff; 277 uintptr_t voff; 278 struct as *as; 279 caddr_t kaddr; 280 caddr_t addr; 281 page_t *page; 282 size_t psize; 283 page_t *pp; 284 pfn_t pfn; 285 286 287 ASSERT((offset + size) <= bp->b_bcount); 288 289 /* if the buf_t already has a KVA, just do a bcopy */ 290 if (!(bp->b_flags & (B_PHYS | B_PAGEIO))) { 291 BP_COPY(dir, driverbuf, bp->b_un.b_addr + offset, size); 292 return (0); 293 } 294 295 /* if we don't have kpm enabled, we need to do the slow path */ 296 if (!kpm_enable || bp_force_copy) { 297 bp_mapin(bp); 298 BP_COPY(dir, driverbuf, bp->b_un.b_addr + offset, size); 299 bp_mapout(bp); 300 return (0); 301 } 302 303 /* 304 * kpm is enabled, and we need to map in the buf_t for the copy 305 */ 306 307 /* setup pp, plist, and make sure 'as' is right */ 308 if (bp->b_flags & B_PAGEIO) { 309 pp = bp->b_pages; 310 pplist = NULL; 311 } else if (bp->b_flags & B_SHADOW) { 312 pp = NULL; 313 pplist = bp->b_shadow; 314 } else { 315 pp = NULL; 316 pplist = NULL; 317 if (bp->b_proc == NULL || (as = bp->b_proc->p_as) == NULL) { 318 as = &kas; 319 } 320 } 321 322 /* 323 * locals for the address, the offset into the first page, and the 324 * size of the first page we are going to copy. 325 */ 326 addr = (caddr_t)bp->b_un.b_addr; 327 poff = (uintptr_t)addr & PAGEOFFSET; 328 psize = MIN(PAGESIZE - poff, size); 329 330 /* 331 * we always start with a 0 offset into the driverbuf provided. The 332 * offset passed in only applies to the buf_t. 333 */ 334 voff = 0; 335 336 /* Loop until we've copied al the data */ 337 while (size > 0) { 338 339 /* 340 * for a pp or pplist, get the pfn, then go to the next page_t 341 * for the next time around the loop. 342 */ 343 if (pp) { 344 page = pp; 345 pp = pp->p_next; 346 } else if (pplist != NULL) { 347 page = (*pplist); 348 pplist++; 349 350 /* 351 * We have a user VA. If we are going to copy this page, (e.g. 352 * the offset into the buf_t where we start to copy is 353 * within this page), get the pfn. Don't waste the cycles 354 * getting the pfn if we're not copying this page. 355 */ 356 } else if (offset < psize) { 357 pfn = hat_getpfnum(as->a_hat, 358 (caddr_t)((uintptr_t)addr & PAGEMASK)); 359 if (pfn == PFN_INVALID) { 360 return (-1); 361 } 362 page = page_numtopp_nolock(pfn); 363 addr += psize - offset; 364 } else { 365 addr += psize; 366 } 367 368 /* 369 * if we have an initial offset into the buf_t passed in, 370 * and it falls within the current page, account for it in 371 * the page size (how much we will copy) and the offset into the 372 * page (where we'll start copying from). 373 */ 374 if ((offset > 0) && (offset < psize)) { 375 psize -= offset; 376 poff += offset; 377 offset = 0; 378 379 /* 380 * if we have an initial offset into the buf_t passed in, 381 * and it's not within the current page, skip this page. 382 * We don't have to worry about the first page offset and size 383 * anymore. psize will normally be PAGESIZE now unless we are 384 * on the last page. 385 */ 386 } else if (offset >= psize) { 387 offset -= psize; 388 psize = MIN(PAGESIZE, size); 389 poff = 0; 390 continue; 391 } 392 393 /* 394 * get a kpm mapping to the page, them copy in/out of the 395 * page. update size left and offset into the driverbuf passed 396 * in for the next time around the loop. 397 */ 398 kaddr = hat_kpm_mapin(page, NULL) + poff; 399 BP_COPY(dir, (void *)((uintptr_t)driverbuf + voff), kaddr, 400 psize); 401 hat_kpm_mapout(page, NULL, kaddr - poff); 402 403 size -= psize; 404 voff += psize; 405 406 poff = 0; 407 psize = MIN(PAGESIZE, size); 408 } 409 410 return (0); 411 } 412