1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/sysmacros.h> 28 #include <sys/systm.h> 29 #include <sys/mman.h> 30 #include <sys/buf.h> 31 #include <sys/vmem.h> 32 #include <sys/cmn_err.h> 33 #include <sys/debug.h> 34 #include <sys/machparam.h> 35 #include <vm/page.h> 36 #include <vm/seg_kmem.h> 37 #include <vm/seg_kpm.h> 38 39 #ifdef __sparc 40 #include <sys/cpu_module.h> 41 #define BP_FLUSH(addr, size) flush_instr_mem((void *)addr, size); 42 #else 43 #define BP_FLUSH(addr, size) 44 #endif 45 46 int bp_force_copy = 0; 47 typedef enum { 48 BP_COPYIN = 0, 49 BP_COPYOUT = 1 50 } bp_copydir_t; 51 static int bp_copy_common(bp_copydir_t dir, struct buf *bp, void *driverbuf, 52 offset_t offset, size_t size); 53 54 static vmem_t *bp_map_arena; 55 static size_t bp_align; 56 static uint_t bp_devload_flags = PROT_READ | PROT_WRITE | HAT_NOSYNC; 57 int bp_max_cache = 1 << 17; /* 128K default; tunable */ 58 int bp_mapin_kpm_enable = 1; /* enable default; tunable */ 59 60 static void * 61 bp_vmem_alloc(vmem_t *vmp, size_t size, int vmflag) 62 { 63 return (vmem_xalloc(vmp, size, bp_align, 0, 0, NULL, NULL, vmflag)); 64 } 65 66 void 67 bp_init(size_t align, uint_t devload_flags) 68 { 69 bp_align = MAX(align, PAGESIZE); 70 bp_devload_flags |= devload_flags; 71 72 if (bp_align <= bp_max_cache) 73 bp_map_arena = vmem_create("bp_map", NULL, 0, bp_align, 74 bp_vmem_alloc, vmem_free, heap_arena, 75 MIN(8 * bp_align, bp_max_cache), VM_SLEEP); 76 } 77 78 /* 79 * common routine so can be called with/without VM_SLEEP 80 */ 81 void * 82 bp_mapin_common(struct buf *bp, int flag) 83 { 84 struct as *as; 85 pfn_t pfnum; 86 page_t *pp; 87 page_t **pplist; 88 caddr_t kaddr; 89 caddr_t addr; 90 uintptr_t off; 91 size_t size; 92 pgcnt_t npages; 93 int color; 94 95 as = NULL; 96 /* return if already mapped in, no pageio/physio, or physio to kas */ 97 if ((bp->b_flags & B_REMAPPED) || 98 !(bp->b_flags & (B_PAGEIO | B_PHYS)) || 99 (((bp->b_flags & (B_PAGEIO | B_PHYS)) == B_PHYS) && 100 ((bp->b_proc == NULL) || (bp->b_proc->p_as == &kas)))) 101 return (bp->b_un.b_addr); 102 103 ASSERT((bp->b_flags & (B_PAGEIO | B_PHYS)) != (B_PAGEIO | B_PHYS)); 104 105 addr = (caddr_t)bp->b_un.b_addr; 106 off = (uintptr_t)addr & PAGEOFFSET; 107 size = P2ROUNDUP(bp->b_bcount + off, PAGESIZE); 108 npages = btop(size); 109 110 /* Fastpath single page IO to locked memory by using kpm. */ 111 if ((bp->b_flags & (B_SHADOW | B_PAGEIO)) && (npages == 1) && 112 kpm_enable && bp_mapin_kpm_enable) { 113 if (bp->b_flags & B_SHADOW) 114 pp = *bp->b_shadow; 115 else 116 pp = bp->b_pages; 117 kaddr = hat_kpm_mapin(pp, NULL); 118 bp->b_un.b_addr = kaddr + off; 119 bp->b_flags |= B_REMAPPED; 120 return (bp->b_un.b_addr); 121 } 122 123 /* 124 * Allocate kernel virtual space for remapping. 125 */ 126 color = bp_color(bp); 127 ASSERT(color < bp_align); 128 129 if (bp_map_arena != NULL) { 130 kaddr = (caddr_t)vmem_alloc(bp_map_arena, 131 P2ROUNDUP(color + size, bp_align), flag); 132 if (kaddr == NULL) 133 return (NULL); 134 kaddr += color; 135 } else { 136 kaddr = vmem_xalloc(heap_arena, size, bp_align, color, 137 0, NULL, NULL, flag); 138 if (kaddr == NULL) 139 return (NULL); 140 } 141 142 ASSERT(P2PHASE((uintptr_t)kaddr, bp_align) == color); 143 144 /* 145 * Map bp into the virtual space we just allocated. 146 */ 147 if (bp->b_flags & B_PAGEIO) { 148 pp = bp->b_pages; 149 pplist = NULL; 150 } else if (bp->b_flags & B_SHADOW) { 151 pp = NULL; 152 pplist = bp->b_shadow; 153 } else { 154 pp = NULL; 155 pplist = NULL; 156 if (bp->b_proc == NULL || (as = bp->b_proc->p_as) == NULL) 157 as = &kas; 158 } 159 160 bp->b_flags |= B_REMAPPED; 161 bp->b_un.b_addr = kaddr + off; 162 163 while (npages-- != 0) { 164 if (pp) { 165 pfnum = pp->p_pagenum; 166 pp = pp->p_next; 167 } else if (pplist == NULL) { 168 pfnum = hat_getpfnum(as->a_hat, 169 (caddr_t)((uintptr_t)addr & MMU_PAGEMASK)); 170 if (pfnum == PFN_INVALID) 171 panic("bp_mapin_common: hat_getpfnum for" 172 " addr %p failed\n", (void *)addr); 173 addr += PAGESIZE; 174 } else { 175 pfnum = (*pplist)->p_pagenum; 176 pplist++; 177 } 178 179 hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum, 180 bp_devload_flags, HAT_LOAD_LOCK); 181 182 kaddr += PAGESIZE; 183 } 184 return (bp->b_un.b_addr); 185 } 186 187 /* 188 * Convert bp for pageio/physio to a kernel addressable location. 189 */ 190 void 191 bp_mapin(struct buf *bp) 192 { 193 (void) bp_mapin_common(bp, VM_SLEEP); 194 } 195 196 /* 197 * Release all the resources associated with a previous bp_mapin() call. 198 */ 199 void 200 bp_mapout(struct buf *bp) 201 { 202 caddr_t addr; 203 uintptr_t off; 204 uintptr_t base; 205 uintptr_t color; 206 size_t size; 207 pgcnt_t npages; 208 page_t *pp; 209 210 if ((bp->b_flags & B_REMAPPED) == 0) 211 return; 212 213 addr = bp->b_un.b_addr; 214 off = (uintptr_t)addr & PAGEOFFSET; 215 size = P2ROUNDUP(bp->b_bcount + off, PAGESIZE); 216 npages = btop(size); 217 218 bp->b_un.b_addr = (caddr_t)off; /* debugging aid */ 219 220 if ((bp->b_flags & (B_SHADOW | B_PAGEIO)) && (npages == 1) && 221 kpm_enable && bp_mapin_kpm_enable) { 222 if (bp->b_flags & B_SHADOW) 223 pp = *bp->b_shadow; 224 else 225 pp = bp->b_pages; 226 addr = (caddr_t)((uintptr_t)addr & MMU_PAGEMASK); 227 hat_kpm_mapout(pp, NULL, addr); 228 bp->b_flags &= ~B_REMAPPED; 229 return; 230 } 231 232 base = (uintptr_t)addr & MMU_PAGEMASK; 233 BP_FLUSH(base, size); 234 hat_unload(kas.a_hat, (void *)base, size, 235 HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK); 236 if (bp_map_arena != NULL) { 237 color = P2PHASE(base, bp_align); 238 vmem_free(bp_map_arena, (void *)(base - color), 239 P2ROUNDUP(color + size, bp_align)); 240 } else 241 vmem_free(heap_arena, (void *)base, size); 242 bp->b_flags &= ~B_REMAPPED; 243 } 244 245 /* 246 * copy data from a KVA into a buf_t which may not be mapped in. offset 247 * is relative to the buf_t only. 248 */ 249 int 250 bp_copyout(void *driverbuf, struct buf *bp, offset_t offset, size_t size) 251 { 252 return (bp_copy_common(BP_COPYOUT, bp, driverbuf, offset, size)); 253 } 254 255 /* 256 * copy data from a buf_t which may not be mapped in, into a KVA.. offset 257 * is relative to the buf_t only. 258 */ 259 int 260 bp_copyin(struct buf *bp, void *driverbuf, offset_t offset, size_t size) 261 { 262 return (bp_copy_common(BP_COPYIN, bp, driverbuf, offset, size)); 263 } 264 265 266 #define BP_COPY(dir, driverbuf, baddr, sz) \ 267 (dir == BP_COPYIN) ? \ 268 bcopy(baddr, driverbuf, sz) : bcopy(driverbuf, baddr, sz) 269 270 static int 271 bp_copy_common(bp_copydir_t dir, struct buf *bp, void *driverbuf, 272 offset_t offset, size_t size) 273 { 274 page_t **pplist; 275 uintptr_t poff; 276 uintptr_t voff; 277 struct as *as; 278 caddr_t kaddr; 279 caddr_t addr; 280 page_t *page; 281 size_t psize; 282 page_t *pp; 283 pfn_t pfn; 284 285 ASSERT((offset + size) <= bp->b_bcount); 286 as = NULL; 287 288 /* if the buf_t already has a KVA, just do a bcopy */ 289 if (!(bp->b_flags & (B_PHYS | B_PAGEIO))) { 290 BP_COPY(dir, driverbuf, bp->b_un.b_addr + offset, size); 291 return (0); 292 } 293 294 /* if we don't have kpm enabled, we need to do the slow path */ 295 if (!kpm_enable || bp_force_copy) { 296 bp_mapin(bp); 297 BP_COPY(dir, driverbuf, bp->b_un.b_addr + offset, size); 298 bp_mapout(bp); 299 return (0); 300 } 301 302 /* 303 * kpm is enabled, and we need to map in the buf_t for the copy 304 */ 305 306 /* setup pp, plist, and make sure 'as' is right */ 307 if (bp->b_flags & B_PAGEIO) { 308 pp = bp->b_pages; 309 pplist = NULL; 310 } else if (bp->b_flags & B_SHADOW) { 311 pp = NULL; 312 pplist = bp->b_shadow; 313 } else { 314 pp = NULL; 315 pplist = NULL; 316 if (bp->b_proc == NULL || (as = bp->b_proc->p_as) == NULL) { 317 as = &kas; 318 } 319 } 320 321 /* 322 * locals for the address, the offset into the first page, and the 323 * size of the first page we are going to copy. 324 */ 325 addr = (caddr_t)bp->b_un.b_addr; 326 poff = (uintptr_t)addr & PAGEOFFSET; 327 psize = MIN(PAGESIZE - poff, size); 328 329 /* 330 * we always start with a 0 offset into the driverbuf provided. The 331 * offset passed in only applies to the buf_t. 332 */ 333 voff = 0; 334 335 /* Loop until we've copied al the data */ 336 while (size > 0) { 337 338 /* 339 * for a pp or pplist, get the pfn, then go to the next page_t 340 * for the next time around the loop. 341 */ 342 if (pp) { 343 page = pp; 344 pp = pp->p_next; 345 } else if (pplist != NULL) { 346 page = (*pplist); 347 pplist++; 348 349 /* 350 * We have a user VA. If we are going to copy this page, (e.g. 351 * the offset into the buf_t where we start to copy is 352 * within this page), get the pfn. Don't waste the cycles 353 * getting the pfn if we're not copying this page. 354 */ 355 } else if (offset < psize) { 356 pfn = hat_getpfnum(as->a_hat, 357 (caddr_t)((uintptr_t)addr & PAGEMASK)); 358 if (pfn == PFN_INVALID) { 359 return (-1); 360 } 361 page = page_numtopp_nolock(pfn); 362 addr += psize - offset; 363 } else { 364 addr += psize; 365 } 366 367 /* 368 * if we have an initial offset into the buf_t passed in, 369 * and it falls within the current page, account for it in 370 * the page size (how much we will copy) and the offset into the 371 * page (where we'll start copying from). 372 */ 373 if ((offset > 0) && (offset < psize)) { 374 psize -= offset; 375 poff += offset; 376 offset = 0; 377 378 /* 379 * if we have an initial offset into the buf_t passed in, 380 * and it's not within the current page, skip this page. 381 * We don't have to worry about the first page offset and size 382 * anymore. psize will normally be PAGESIZE now unless we are 383 * on the last page. 384 */ 385 } else if (offset >= psize) { 386 offset -= psize; 387 psize = MIN(PAGESIZE, size); 388 poff = 0; 389 continue; 390 } 391 392 /* 393 * get a kpm mapping to the page, them copy in/out of the 394 * page. update size left and offset into the driverbuf passed 395 * in for the next time around the loop. 396 */ 397 kaddr = hat_kpm_mapin(page, NULL) + poff; 398 BP_COPY(dir, (void *)((uintptr_t)driverbuf + voff), kaddr, 399 psize); 400 hat_kpm_mapout(page, NULL, kaddr - poff); 401 402 size -= psize; 403 voff += psize; 404 405 poff = 0; 406 psize = MIN(PAGESIZE, size); 407 } 408 409 return (0); 410 } 411