1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 39 #include <sys/types.h> 40 #include <sys/sysmacros.h> 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/uio.h> 44 #include <sys/errno.h> 45 #include <sys/vmsystm.h> 46 #include <sys/cmn_err.h> 47 #include <vm/as.h> 48 #include <vm/page.h> 49 50 #include <sys/dcopy.h> 51 52 int64_t uioa_maxpoll = -1; /* <0 = noblock, 0 = block, >0 = block after */ 53 #define UIO_DCOPY_CHANNEL 0 54 #define UIO_DCOPY_CMD 1 55 56 /* 57 * Move "n" bytes at byte address "p"; "rw" indicates the direction 58 * of the move, and the I/O parameters are provided in "uio", which is 59 * update to reflect the data which was moved. Returns 0 on success or 60 * a non-zero errno on failure. 61 */ 62 int 63 uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio) 64 { 65 struct iovec *iov; 66 ulong_t cnt; 67 int error; 68 69 while (n && uio->uio_resid) { 70 iov = uio->uio_iov; 71 cnt = MIN(iov->iov_len, n); 72 if (cnt == 0l) { 73 uio->uio_iov++; 74 uio->uio_iovcnt--; 75 continue; 76 } 77 switch (uio->uio_segflg) { 78 79 case UIO_USERSPACE: 80 case UIO_USERISPACE: 81 if (rw == UIO_READ) { 82 error = xcopyout_nta(p, iov->iov_base, cnt, 83 (uio->uio_extflg & UIO_COPY_CACHED)); 84 } else { 85 error = xcopyin_nta(iov->iov_base, p, cnt, 86 (uio->uio_extflg & UIO_COPY_CACHED)); 87 } 88 89 if (error) 90 return (error); 91 break; 92 93 case UIO_SYSSPACE: 94 if (rw == UIO_READ) 95 error = kcopy_nta(p, iov->iov_base, cnt, 96 (uio->uio_extflg & UIO_COPY_CACHED)); 97 else 98 error = kcopy_nta(iov->iov_base, p, cnt, 99 (uio->uio_extflg & UIO_COPY_CACHED)); 100 if (error) 101 return (error); 102 break; 103 } 104 iov->iov_base += cnt; 105 iov->iov_len -= cnt; 106 uio->uio_resid -= cnt; 107 uio->uio_loffset += cnt; 108 p = (caddr_t)p + cnt; 109 n -= cnt; 110 } 111 return (0); 112 } 113 114 /* 115 * transfer a character value into the address space 116 * delineated by a uio and update fields within the 117 * uio for next character. Return 0 for success, EFAULT 118 * for error. 119 */ 120 int 121 ureadc(int val, struct uio *uiop) 122 { 123 struct iovec *iovp; 124 unsigned char c; 125 126 /* 127 * first determine if uio is valid. uiop should be 128 * non-NULL and the resid count > 0. 129 */ 130 if (!(uiop && uiop->uio_resid > 0)) 131 return (EFAULT); 132 133 /* 134 * scan through iovecs until one is found that is non-empty. 135 * Return EFAULT if none found. 136 */ 137 while (uiop->uio_iovcnt > 0) { 138 iovp = uiop->uio_iov; 139 if (iovp->iov_len <= 0) { 140 uiop->uio_iovcnt--; 141 uiop->uio_iov++; 142 } else 143 break; 144 } 145 146 if (uiop->uio_iovcnt <= 0) 147 return (EFAULT); 148 149 /* 150 * Transfer character to uio space. 151 */ 152 153 c = (unsigned char) (val & 0xFF); 154 155 switch (uiop->uio_segflg) { 156 157 case UIO_USERISPACE: 158 case UIO_USERSPACE: 159 if (copyout(&c, iovp->iov_base, sizeof (unsigned char))) 160 return (EFAULT); 161 break; 162 163 case UIO_SYSSPACE: /* can do direct copy since kernel-kernel */ 164 *iovp->iov_base = c; 165 break; 166 167 default: 168 return (EFAULT); /* invalid segflg value */ 169 } 170 171 /* 172 * bump up/down iovec and uio members to reflect transfer. 173 */ 174 iovp->iov_base++; 175 iovp->iov_len--; 176 uiop->uio_resid--; 177 uiop->uio_loffset++; 178 return (0); /* success */ 179 } 180 181 /* 182 * return a character value from the address space 183 * delineated by a uio and update fields within the 184 * uio for next character. Return the character for success, 185 * -1 for error. 186 */ 187 int 188 uwritec(struct uio *uiop) 189 { 190 struct iovec *iovp; 191 unsigned char c; 192 193 /* 194 * verify we were passed a valid uio structure. 195 * (1) non-NULL uiop, (2) positive resid count 196 * (3) there is an iovec with positive length 197 */ 198 199 if (!(uiop && uiop->uio_resid > 0)) 200 return (-1); 201 202 while (uiop->uio_iovcnt > 0) { 203 iovp = uiop->uio_iov; 204 if (iovp->iov_len <= 0) { 205 uiop->uio_iovcnt--; 206 uiop->uio_iov++; 207 } else 208 break; 209 } 210 211 if (uiop->uio_iovcnt <= 0) 212 return (-1); 213 214 /* 215 * Get the character from the uio address space. 216 */ 217 switch (uiop->uio_segflg) { 218 219 case UIO_USERISPACE: 220 case UIO_USERSPACE: 221 if (copyin(iovp->iov_base, &c, sizeof (unsigned char))) 222 return (-1); 223 break; 224 225 case UIO_SYSSPACE: 226 c = *iovp->iov_base; 227 break; 228 229 default: 230 return (-1); /* invalid segflg */ 231 } 232 233 /* 234 * Adjust fields of iovec and uio appropriately. 235 */ 236 iovp->iov_base++; 237 iovp->iov_len--; 238 uiop->uio_resid--; 239 uiop->uio_loffset++; 240 return ((int)c & 0xFF); /* success */ 241 } 242 243 /* 244 * Drop the next n chars out of *uiop. 245 */ 246 void 247 uioskip(uio_t *uiop, size_t n) 248 { 249 if (n > uiop->uio_resid) 250 return; 251 while (n != 0) { 252 register iovec_t *iovp = uiop->uio_iov; 253 register size_t niovb = MIN(iovp->iov_len, n); 254 255 if (niovb == 0) { 256 uiop->uio_iov++; 257 uiop->uio_iovcnt--; 258 continue; 259 } 260 iovp->iov_base += niovb; 261 uiop->uio_loffset += niovb; 262 iovp->iov_len -= niovb; 263 uiop->uio_resid -= niovb; 264 n -= niovb; 265 } 266 } 267 268 /* 269 * Dup the suio into the duio and diovec of size diov_cnt. If diov 270 * is too small to dup suio then an error will be returned, else 0. 271 */ 272 int 273 uiodup(uio_t *suio, uio_t *duio, iovec_t *diov, int diov_cnt) 274 { 275 int ix; 276 iovec_t *siov = suio->uio_iov; 277 278 *duio = *suio; 279 for (ix = 0; ix < suio->uio_iovcnt; ix++) { 280 diov[ix] = siov[ix]; 281 if (ix >= diov_cnt) 282 return (1); 283 } 284 duio->uio_iov = diov; 285 return (0); 286 } 287 288 /* 289 * Shadow state for checking if a platform has hardware asynchronous 290 * copy capability and minimum copy size, e.g. Intel's I/OAT dma engine, 291 * 292 * Dcopy does a call-back to uioa_dcopy_enable() when a dma device calls 293 * into dcopy to register and uioa_dcopy_disable() when the device calls 294 * into dcopy to unregister. 295 */ 296 uioasync_t uioasync = {B_FALSE, 1024}; 297 298 void 299 uioa_dcopy_enable() 300 { 301 uioasync.enabled = B_TRUE; 302 } 303 304 void 305 uioa_dcopy_disable() 306 { 307 uioasync.enabled = B_FALSE; 308 } 309 310 /* 311 * Schedule an asynchronous move of "n" bytes at byte address "p", 312 * "rw" indicates the direction of the move, I/O parameters and 313 * async state are provided in "uioa" which is update to reflect 314 * the data which is to be moved. 315 * 316 * Returns 0 on success or a non-zero errno on failure. 317 * 318 * Note, while the uioasync APIs are general purpose in design 319 * the current implementation is Intel I/OAT specific. 320 */ 321 int 322 uioamove(void *p, size_t n, enum uio_rw rw, uioa_t *uioa) 323 { 324 int soff, doff; 325 uint64_t pa; 326 int cnt; 327 iovec_t *iov; 328 dcopy_handle_t channel; 329 dcopy_cmd_t cmd; 330 int ret = 0; 331 int dcopy_flags; 332 333 if (!(uioa->uioa_state & UIOA_ENABLED)) { 334 /* The uioa_t isn't enabled */ 335 return (ENXIO); 336 } 337 338 if (uioa->uio_segflg != UIO_USERSPACE || rw != UIO_READ) { 339 /* Only support to user-land from kernel */ 340 return (ENOTSUP); 341 } 342 343 344 channel = uioa->uioa_hwst[UIO_DCOPY_CHANNEL]; 345 cmd = uioa->uioa_hwst[UIO_DCOPY_CMD]; 346 dcopy_flags = DCOPY_NOSLEEP; 347 348 /* 349 * While source bytes and destination bytes. 350 */ 351 while (n > 0 && uioa->uio_resid > 0) { 352 iov = uioa->uio_iov; 353 if (iov->iov_len == 0l) { 354 uioa->uio_iov++; 355 uioa->uio_iovcnt--; 356 uioa->uioa_lcur++; 357 uioa->uioa_lppp = uioa->uioa_lcur->uioa_ppp; 358 continue; 359 } 360 /* 361 * While source bytes schedule an async 362 * dma for destination page by page. 363 */ 364 while (n > 0) { 365 /* Addr offset in page src/dst */ 366 soff = (uintptr_t)p & PAGEOFFSET; 367 doff = (uintptr_t)iov->iov_base & PAGEOFFSET; 368 /* Min copy count src and dst and page sized */ 369 cnt = MIN(n, iov->iov_len); 370 cnt = MIN(cnt, PAGESIZE - soff); 371 cnt = MIN(cnt, PAGESIZE - doff); 372 /* XXX if next page(s) contiguous could use multipage */ 373 374 /* 375 * if we have an old command, we want to link all 376 * other commands to the next command we alloced so 377 * we only need to track the last command but can 378 * still free them all. 379 */ 380 if (cmd != NULL) { 381 dcopy_flags |= DCOPY_ALLOC_LINK; 382 } 383 ret = dcopy_cmd_alloc(channel, dcopy_flags, &cmd); 384 if (ret != DCOPY_SUCCESS) { 385 /* Error of some sort */ 386 return (EIO); 387 } 388 uioa->uioa_hwst[UIO_DCOPY_CMD] = cmd; 389 390 ASSERT(cmd->dp_version == DCOPY_CMD_V0); 391 if (uioa_maxpoll >= 0) { 392 /* Blocking (>0 may be) used in uioafini() */ 393 cmd->dp_flags = DCOPY_CMD_INTR; 394 } else { 395 /* Non blocking uioafini() so no intr */ 396 cmd->dp_flags = DCOPY_CMD_NOFLAGS; 397 } 398 cmd->dp_cmd = DCOPY_CMD_COPY; 399 pa = ptob((uint64_t)hat_getpfnum(kas.a_hat, p)); 400 cmd->dp.copy.cc_source = pa + soff; 401 if (uioa->uioa_lcur->uioa_pfncnt == 0) { 402 /* Have a (page_t **) */ 403 pa = ptob((uint64_t)( 404 *(page_t **)uioa->uioa_lppp)->p_pagenum); 405 } else { 406 /* Have a (pfn_t *) */ 407 pa = ptob((uint64_t)( 408 *(pfn_t *)uioa->uioa_lppp)); 409 } 410 cmd->dp.copy.cc_dest = pa + doff; 411 cmd->dp.copy.cc_size = cnt; 412 ret = dcopy_cmd_post(cmd); 413 if (ret != DCOPY_SUCCESS) { 414 /* Error of some sort */ 415 return (EIO); 416 } 417 ret = 0; 418 419 /* If UIOA_POLL not set, set it */ 420 if (!(uioa->uioa_state & UIOA_POLL)) 421 uioa->uioa_state |= UIOA_POLL; 422 423 /* Update iov, uio, and local pointers/counters */ 424 iov->iov_base += cnt; 425 iov->iov_len -= cnt; 426 uioa->uio_resid -= cnt; 427 uioa->uioa_mbytes += cnt; 428 uioa->uio_loffset += cnt; 429 p = (caddr_t)p + cnt; 430 n -= cnt; 431 432 /* End of iovec? */ 433 if (iov->iov_len == 0) { 434 /* Yup, next iovec */ 435 break; 436 } 437 438 /* Next dst addr page? */ 439 if (doff + cnt == PAGESIZE) { 440 /* Yup, next page_t */ 441 uioa->uioa_lppp++; 442 } 443 } 444 } 445 446 return (ret); 447 } 448 449 /* 450 * Initialize a uioa_t for a given uio_t for the current user context, 451 * copy the common uio_t to the uioa_t, walk the shared iovec_t and 452 * lock down the user-land page(s) containing iovec_t data, then mapin 453 * user-land pages using segkpm. 454 */ 455 int 456 uioainit(uio_t *uiop, uioa_t *uioap) 457 { 458 caddr_t addr; 459 page_t **pages; 460 int off; 461 int len; 462 proc_t *procp = ttoproc(curthread); 463 struct as *as = procp->p_as; 464 iovec_t *iov = uiop->uio_iov; 465 int32_t iovcnt = uiop->uio_iovcnt; 466 uioa_page_t *locked = uioap->uioa_locked; 467 dcopy_handle_t channel; 468 int error; 469 470 if (! (uioap->uioa_state & UIOA_ALLOC)) { 471 /* Can only init() a freshly allocated uioa_t */ 472 return (EINVAL); 473 } 474 475 error = dcopy_alloc(DCOPY_NOSLEEP, &channel); 476 if (error == DCOPY_NORESOURCES) { 477 /* Turn off uioa */ 478 uioasync.enabled = B_FALSE; 479 return (ENODEV); 480 } 481 if (error != DCOPY_SUCCESS) { 482 /* Alloc failed */ 483 return (EIO); 484 } 485 486 uioap->uioa_hwst[UIO_DCOPY_CHANNEL] = channel; 487 uioap->uioa_hwst[UIO_DCOPY_CMD] = NULL; 488 489 /* Indicate uioa_t (will be) initialized */ 490 uioap->uioa_state = UIOA_INIT; 491 492 uioap->uioa_mbytes = 0; 493 494 uioap->uioa_mbytes = 0; 495 496 /* uio_t/uioa_t uio_t common struct copy */ 497 *((uio_t *)uioap) = *uiop; 498 499 /* initialize *uiop->uio_iov */ 500 if (iovcnt > UIOA_IOV_MAX) { 501 /* Too big? */ 502 return (E2BIG); 503 } 504 uioap->uio_iov = iov; 505 uioap->uio_iovcnt = iovcnt; 506 507 /* Mark the uioap as such */ 508 uioap->uio_extflg |= UIO_ASYNC; 509 510 /* 511 * For each iovec_t, lock-down the page(s) backing the iovec_t 512 * and save the page_t list for phys addr use in uioamove(). 513 */ 514 iov = uiop->uio_iov; 515 iovcnt = uiop->uio_iovcnt; 516 while (iovcnt > 0) { 517 addr = iov->iov_base; 518 off = (uintptr_t)addr & PAGEOFFSET; 519 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 520 len = iov->iov_len + off; 521 522 /* Lock down page(s) for the iov span */ 523 if ((error = as_pagelock(as, &pages, 524 iov->iov_base, iov->iov_len, S_WRITE)) != 0) { 525 /* Error */ 526 goto cleanup; 527 } 528 529 if (pages == NULL) { 530 /* 531 * Need page_t list, really only need 532 * a pfn list so build one. 533 */ 534 pfn_t *pfnp; 535 int pcnt = len >> PAGESHIFT; 536 537 if (off) 538 pcnt++; 539 if ((pfnp = kmem_alloc(pcnt * sizeof (pfnp), 540 KM_NOSLEEP)) == NULL) { 541 error = ENOMEM; 542 goto cleanup; 543 } 544 locked->uioa_ppp = (void **)pfnp; 545 locked->uioa_pfncnt = pcnt; 546 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 547 while (pcnt-- > 0) { 548 *pfnp++ = hat_getpfnum(as->a_hat, addr); 549 addr += PAGESIZE; 550 } 551 AS_LOCK_EXIT(as, &as->a_lock); 552 } else { 553 /* Have a page_t list, save it */ 554 locked->uioa_ppp = (void **)pages; 555 locked->uioa_pfncnt = 0; 556 } 557 /* Save for as_pageunlock() in uioafini() */ 558 locked->uioa_base = iov->iov_base; 559 locked->uioa_len = iov->iov_len; 560 locked++; 561 562 /* Next iovec_t */ 563 iov++; 564 iovcnt--; 565 } 566 /* Initialize curret pointer into uioa_locked[] and it's uioa_ppp */ 567 uioap->uioa_lcur = uioap->uioa_locked; 568 uioap->uioa_lppp = uioap->uioa_lcur->uioa_ppp; 569 return (0); 570 571 cleanup: 572 /* Unlock any previously locked page_t(s) */ 573 while (locked > uioap->uioa_locked) { 574 locked--; 575 as_pageunlock(as, (page_t **)locked->uioa_ppp, 576 locked->uioa_base, locked->uioa_len, S_WRITE); 577 } 578 579 /* Last indicate uioa_t still in alloc state */ 580 uioap->uioa_state = UIOA_ALLOC; 581 uioap->uioa_mbytes = 0; 582 583 return (error); 584 } 585 586 /* 587 * Finish processing of a uioa_t by cleanup any pending "uioap" actions. 588 */ 589 int 590 uioafini(uio_t *uiop, uioa_t *uioap) 591 { 592 int32_t iovcnt = uiop->uio_iovcnt; 593 uioa_page_t *locked = uioap->uioa_locked; 594 struct as *as = ttoproc(curthread)->p_as; 595 dcopy_handle_t channel; 596 dcopy_cmd_t cmd; 597 int ret = 0; 598 599 ASSERT(uioap->uio_extflg & UIO_ASYNC); 600 601 if (!(uioap->uioa_state & (UIOA_ENABLED|UIOA_FINI))) { 602 /* Must be an active uioa_t */ 603 return (EINVAL); 604 } 605 606 channel = uioap->uioa_hwst[UIO_DCOPY_CHANNEL]; 607 cmd = uioap->uioa_hwst[UIO_DCOPY_CMD]; 608 609 /* XXX - why do we get cmd == NULL sometimes? */ 610 if (cmd != NULL) { 611 if (uioap->uioa_state & UIOA_POLL) { 612 /* Wait for last dcopy() to finish */ 613 int64_t poll = 1; 614 int poll_flag = DCOPY_POLL_NOFLAGS; 615 616 do { 617 if (uioa_maxpoll == 0 || 618 (uioa_maxpoll > 0 && 619 poll >= uioa_maxpoll)) { 620 /* Always block or after maxpoll */ 621 poll_flag = DCOPY_POLL_BLOCK; 622 } else { 623 /* No block, poll */ 624 poll++; 625 } 626 ret = dcopy_cmd_poll(cmd, poll_flag); 627 } while (ret == DCOPY_PENDING); 628 629 if (ret == DCOPY_COMPLETED) { 630 /* Poll/block succeeded */ 631 ret = 0; 632 } else { 633 /* Poll/block failed */ 634 ret = EIO; 635 } 636 } 637 dcopy_cmd_free(&cmd); 638 } 639 640 dcopy_free(&channel); 641 642 /* Unlock all page(s) iovec_t by iovec_t */ 643 while (iovcnt-- > 0) { 644 page_t **pages; 645 646 if (locked->uioa_pfncnt == 0) { 647 /* A as_pagelock() returned (page_t **) */ 648 pages = (page_t **)locked->uioa_ppp; 649 } else { 650 /* Our pfn_t array */ 651 pages = NULL; 652 kmem_free(locked->uioa_ppp, locked->uioa_pfncnt * 653 sizeof (pfn_t *)); 654 } 655 as_pageunlock(as, pages, locked->uioa_base, locked->uioa_len, 656 S_WRITE); 657 658 locked++; 659 } 660 /* uioa_t->uio_t common struct copy */ 661 *uiop = *((uio_t *)uioap); 662 663 /* 664 * Last, reset uioa state to alloc. 665 * 666 * Note, we only initialize the state here, all other members 667 * will be initialized in a subsequent uioainit(). 668 */ 669 uioap->uioa_state = UIOA_ALLOC; 670 uioap->uioa_mbytes = 0; 671 672 uioap->uioa_hwst[UIO_DCOPY_CMD] = NULL; 673 uioap->uioa_hwst[UIO_DCOPY_CHANNEL] = NULL; 674 675 return (ret); 676 } 677