1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 39 #include <sys/types.h> 40 #include <sys/sysmacros.h> 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/uio.h> 44 #include <sys/errno.h> 45 #include <sys/vmsystm.h> 46 #include <sys/cmn_err.h> 47 #include <vm/as.h> 48 #include <vm/page.h> 49 50 #include <sys/dcopy.h> 51 52 int64_t uioa_maxpoll = -1; /* <0 = noblock, 0 = block, >0 = block after */ 53 #define UIO_DCOPY_CHANNEL 0 54 #define UIO_DCOPY_CMD 1 55 56 /* 57 * Move "n" bytes at byte address "p"; "rw" indicates the direction 58 * of the move, and the I/O parameters are provided in "uio", which is 59 * update to reflect the data which was moved. Returns 0 on success or 60 * a non-zero errno on failure. 61 */ 62 int 63 uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio) 64 { 65 struct iovec *iov; 66 ulong_t cnt; 67 int error; 68 69 while (n && uio->uio_resid) { 70 iov = uio->uio_iov; 71 cnt = MIN(iov->iov_len, n); 72 if (cnt == 0l) { 73 uio->uio_iov++; 74 uio->uio_iovcnt--; 75 continue; 76 } 77 switch (uio->uio_segflg) { 78 79 case UIO_USERSPACE: 80 case UIO_USERISPACE: 81 if (rw == UIO_READ) { 82 error = xcopyout_nta(p, iov->iov_base, cnt, 83 (uio->uio_extflg & UIO_COPY_CACHED)); 84 } else { 85 error = xcopyin_nta(iov->iov_base, p, cnt, 86 (uio->uio_extflg & UIO_COPY_CACHED)); 87 } 88 89 if (error) 90 return (error); 91 break; 92 93 case UIO_SYSSPACE: 94 if (rw == UIO_READ) 95 error = kcopy_nta(p, iov->iov_base, cnt, 96 (uio->uio_extflg & UIO_COPY_CACHED)); 97 else 98 error = kcopy_nta(iov->iov_base, p, cnt, 99 (uio->uio_extflg & UIO_COPY_CACHED)); 100 if (error) 101 return (error); 102 break; 103 } 104 iov->iov_base += cnt; 105 iov->iov_len -= cnt; 106 uio->uio_resid -= cnt; 107 uio->uio_loffset += cnt; 108 p = (caddr_t)p + cnt; 109 n -= cnt; 110 } 111 return (0); 112 } 113 114 /* 115 * Fault in the pages of the first n bytes specified by the uio structure. 116 * 1 byte in each page is touched and the uio struct is unmodified. Any 117 * error will terminate the process as this is only a best attempt to get 118 * the pages resident. 119 */ 120 void 121 uio_prefaultpages(ssize_t n, struct uio *uio) 122 { 123 struct iovec *iov; 124 ulong_t cnt, incr; 125 caddr_t p; 126 uint8_t tmp; 127 int iovcnt; 128 129 iov = uio->uio_iov; 130 iovcnt = uio->uio_iovcnt; 131 132 while ((n > 0) && (iovcnt > 0)) { 133 cnt = MIN(iov->iov_len, n); 134 if (cnt == 0) { 135 /* empty iov entry */ 136 iov++; 137 iovcnt--; 138 continue; 139 } 140 n -= cnt; 141 /* 142 * touch each page in this segment. 143 */ 144 p = iov->iov_base; 145 while (cnt) { 146 switch (uio->uio_segflg) { 147 case UIO_USERSPACE: 148 case UIO_USERISPACE: 149 if (fuword8(p, &tmp)) 150 return; 151 break; 152 case UIO_SYSSPACE: 153 if (kcopy(p, &tmp, 1)) 154 return; 155 break; 156 } 157 incr = MIN(cnt, PAGESIZE); 158 p += incr; 159 cnt -= incr; 160 } 161 /* 162 * touch the last byte in case it straddles a page. 163 */ 164 p--; 165 switch (uio->uio_segflg) { 166 case UIO_USERSPACE: 167 case UIO_USERISPACE: 168 if (fuword8(p, &tmp)) 169 return; 170 break; 171 case UIO_SYSSPACE: 172 if (kcopy(p, &tmp, 1)) 173 return; 174 break; 175 } 176 iov++; 177 iovcnt--; 178 } 179 } 180 181 /* 182 * transfer a character value into the address space 183 * delineated by a uio and update fields within the 184 * uio for next character. Return 0 for success, EFAULT 185 * for error. 186 */ 187 int 188 ureadc(int val, struct uio *uiop) 189 { 190 struct iovec *iovp; 191 unsigned char c; 192 193 /* 194 * first determine if uio is valid. uiop should be 195 * non-NULL and the resid count > 0. 196 */ 197 if (!(uiop && uiop->uio_resid > 0)) 198 return (EFAULT); 199 200 /* 201 * scan through iovecs until one is found that is non-empty. 202 * Return EFAULT if none found. 203 */ 204 while (uiop->uio_iovcnt > 0) { 205 iovp = uiop->uio_iov; 206 if (iovp->iov_len <= 0) { 207 uiop->uio_iovcnt--; 208 uiop->uio_iov++; 209 } else 210 break; 211 } 212 213 if (uiop->uio_iovcnt <= 0) 214 return (EFAULT); 215 216 /* 217 * Transfer character to uio space. 218 */ 219 220 c = (unsigned char) (val & 0xFF); 221 222 switch (uiop->uio_segflg) { 223 224 case UIO_USERISPACE: 225 case UIO_USERSPACE: 226 if (copyout(&c, iovp->iov_base, sizeof (unsigned char))) 227 return (EFAULT); 228 break; 229 230 case UIO_SYSSPACE: /* can do direct copy since kernel-kernel */ 231 *iovp->iov_base = c; 232 break; 233 234 default: 235 return (EFAULT); /* invalid segflg value */ 236 } 237 238 /* 239 * bump up/down iovec and uio members to reflect transfer. 240 */ 241 iovp->iov_base++; 242 iovp->iov_len--; 243 uiop->uio_resid--; 244 uiop->uio_loffset++; 245 return (0); /* success */ 246 } 247 248 /* 249 * return a character value from the address space 250 * delineated by a uio and update fields within the 251 * uio for next character. Return the character for success, 252 * -1 for error. 253 */ 254 int 255 uwritec(struct uio *uiop) 256 { 257 struct iovec *iovp; 258 unsigned char c; 259 260 /* 261 * verify we were passed a valid uio structure. 262 * (1) non-NULL uiop, (2) positive resid count 263 * (3) there is an iovec with positive length 264 */ 265 266 if (!(uiop && uiop->uio_resid > 0)) 267 return (-1); 268 269 while (uiop->uio_iovcnt > 0) { 270 iovp = uiop->uio_iov; 271 if (iovp->iov_len <= 0) { 272 uiop->uio_iovcnt--; 273 uiop->uio_iov++; 274 } else 275 break; 276 } 277 278 if (uiop->uio_iovcnt <= 0) 279 return (-1); 280 281 /* 282 * Get the character from the uio address space. 283 */ 284 switch (uiop->uio_segflg) { 285 286 case UIO_USERISPACE: 287 case UIO_USERSPACE: 288 if (copyin(iovp->iov_base, &c, sizeof (unsigned char))) 289 return (-1); 290 break; 291 292 case UIO_SYSSPACE: 293 c = *iovp->iov_base; 294 break; 295 296 default: 297 return (-1); /* invalid segflg */ 298 } 299 300 /* 301 * Adjust fields of iovec and uio appropriately. 302 */ 303 iovp->iov_base++; 304 iovp->iov_len--; 305 uiop->uio_resid--; 306 uiop->uio_loffset++; 307 return ((int)c & 0xFF); /* success */ 308 } 309 310 /* 311 * Drop the next n chars out of *uiop. 312 */ 313 void 314 uioskip(uio_t *uiop, size_t n) 315 { 316 if (n > uiop->uio_resid) 317 return; 318 while (n != 0) { 319 register iovec_t *iovp = uiop->uio_iov; 320 register size_t niovb = MIN(iovp->iov_len, n); 321 322 if (niovb == 0) { 323 uiop->uio_iov++; 324 uiop->uio_iovcnt--; 325 continue; 326 } 327 iovp->iov_base += niovb; 328 uiop->uio_loffset += niovb; 329 iovp->iov_len -= niovb; 330 uiop->uio_resid -= niovb; 331 n -= niovb; 332 } 333 } 334 335 /* 336 * Dup the suio into the duio and diovec of size diov_cnt. If diov 337 * is too small to dup suio then an error will be returned, else 0. 338 */ 339 int 340 uiodup(uio_t *suio, uio_t *duio, iovec_t *diov, int diov_cnt) 341 { 342 int ix; 343 iovec_t *siov = suio->uio_iov; 344 345 *duio = *suio; 346 for (ix = 0; ix < suio->uio_iovcnt; ix++) { 347 diov[ix] = siov[ix]; 348 if (ix >= diov_cnt) 349 return (1); 350 } 351 duio->uio_iov = diov; 352 return (0); 353 } 354 355 /* 356 * Shadow state for checking if a platform has hardware asynchronous 357 * copy capability and minimum copy size, e.g. Intel's I/OAT dma engine, 358 * 359 * Dcopy does a call-back to uioa_dcopy_enable() when a dma device calls 360 * into dcopy to register and uioa_dcopy_disable() when the device calls 361 * into dcopy to unregister. 362 */ 363 uioasync_t uioasync = {B_FALSE, 1024}; 364 365 void 366 uioa_dcopy_enable() 367 { 368 uioasync.enabled = B_TRUE; 369 } 370 371 void 372 uioa_dcopy_disable() 373 { 374 uioasync.enabled = B_FALSE; 375 } 376 377 /* 378 * Schedule an asynchronous move of "n" bytes at byte address "p", 379 * "rw" indicates the direction of the move, I/O parameters and 380 * async state are provided in "uioa" which is update to reflect 381 * the data which is to be moved. 382 * 383 * Returns 0 on success or a non-zero errno on failure. 384 * 385 * Note, while the uioasync APIs are general purpose in design 386 * the current implementation is Intel I/OAT specific. 387 */ 388 int 389 uioamove(void *p, size_t n, enum uio_rw rw, uioa_t *uioa) 390 { 391 int soff, doff; 392 uint64_t pa; 393 int cnt; 394 iovec_t *iov; 395 dcopy_handle_t channel; 396 dcopy_cmd_t cmd; 397 int ret = 0; 398 int dcopy_flags; 399 400 if (!(uioa->uioa_state & UIOA_ENABLED)) { 401 /* The uioa_t isn't enabled */ 402 return (ENXIO); 403 } 404 405 if (uioa->uio_segflg != UIO_USERSPACE || rw != UIO_READ) { 406 /* Only support to user-land from kernel */ 407 return (ENOTSUP); 408 } 409 410 411 channel = uioa->uioa_hwst[UIO_DCOPY_CHANNEL]; 412 cmd = uioa->uioa_hwst[UIO_DCOPY_CMD]; 413 dcopy_flags = DCOPY_NOSLEEP; 414 415 /* 416 * While source bytes and destination bytes. 417 */ 418 while (n > 0 && uioa->uio_resid > 0) { 419 iov = uioa->uio_iov; 420 if (iov->iov_len == 0l) { 421 uioa->uio_iov++; 422 uioa->uio_iovcnt--; 423 uioa->uioa_lcur++; 424 uioa->uioa_lppp = uioa->uioa_lcur->uioa_ppp; 425 continue; 426 } 427 /* 428 * While source bytes schedule an async 429 * dma for destination page by page. 430 */ 431 while (n > 0) { 432 /* Addr offset in page src/dst */ 433 soff = (uintptr_t)p & PAGEOFFSET; 434 doff = (uintptr_t)iov->iov_base & PAGEOFFSET; 435 /* Min copy count src and dst and page sized */ 436 cnt = MIN(n, iov->iov_len); 437 cnt = MIN(cnt, PAGESIZE - soff); 438 cnt = MIN(cnt, PAGESIZE - doff); 439 /* XXX if next page(s) contiguous could use multipage */ 440 441 /* 442 * if we have an old command, we want to link all 443 * other commands to the next command we alloced so 444 * we only need to track the last command but can 445 * still free them all. 446 */ 447 if (cmd != NULL) { 448 dcopy_flags |= DCOPY_ALLOC_LINK; 449 } 450 ret = dcopy_cmd_alloc(channel, dcopy_flags, &cmd); 451 if (ret != DCOPY_SUCCESS) { 452 /* Error of some sort */ 453 return (EIO); 454 } 455 uioa->uioa_hwst[UIO_DCOPY_CMD] = cmd; 456 457 ASSERT(cmd->dp_version == DCOPY_CMD_V0); 458 if (uioa_maxpoll >= 0) { 459 /* Blocking (>0 may be) used in uioafini() */ 460 cmd->dp_flags = DCOPY_CMD_INTR; 461 } else { 462 /* Non blocking uioafini() so no intr */ 463 cmd->dp_flags = DCOPY_CMD_NOFLAGS; 464 } 465 cmd->dp_cmd = DCOPY_CMD_COPY; 466 pa = ptob((uint64_t)hat_getpfnum(kas.a_hat, p)); 467 cmd->dp.copy.cc_source = pa + soff; 468 if (uioa->uioa_lcur->uioa_pfncnt == 0) { 469 /* Have a (page_t **) */ 470 pa = ptob((uint64_t)( 471 *(page_t **)uioa->uioa_lppp)->p_pagenum); 472 } else { 473 /* Have a (pfn_t *) */ 474 pa = ptob((uint64_t)( 475 *(pfn_t *)uioa->uioa_lppp)); 476 } 477 cmd->dp.copy.cc_dest = pa + doff; 478 cmd->dp.copy.cc_size = cnt; 479 ret = dcopy_cmd_post(cmd); 480 if (ret != DCOPY_SUCCESS) { 481 /* Error of some sort */ 482 return (EIO); 483 } 484 ret = 0; 485 486 /* If UIOA_POLL not set, set it */ 487 if (!(uioa->uioa_state & UIOA_POLL)) 488 uioa->uioa_state |= UIOA_POLL; 489 490 /* Update iov, uio, and local pointers/counters */ 491 iov->iov_base += cnt; 492 iov->iov_len -= cnt; 493 uioa->uio_resid -= cnt; 494 uioa->uioa_mbytes += cnt; 495 uioa->uio_loffset += cnt; 496 p = (caddr_t)p + cnt; 497 n -= cnt; 498 499 /* End of iovec? */ 500 if (iov->iov_len == 0) { 501 /* Yup, next iovec */ 502 break; 503 } 504 505 /* Next dst addr page? */ 506 if (doff + cnt == PAGESIZE) { 507 /* Yup, next page_t */ 508 uioa->uioa_lppp++; 509 } 510 } 511 } 512 513 return (ret); 514 } 515 516 /* 517 * Initialize a uioa_t for a given uio_t for the current user context, 518 * copy the common uio_t to the uioa_t, walk the shared iovec_t and 519 * lock down the user-land page(s) containing iovec_t data, then mapin 520 * user-land pages using segkpm. 521 */ 522 int 523 uioainit(uio_t *uiop, uioa_t *uioap) 524 { 525 caddr_t addr; 526 page_t **pages; 527 int off; 528 int len; 529 proc_t *procp = ttoproc(curthread); 530 struct as *as = procp->p_as; 531 iovec_t *iov = uiop->uio_iov; 532 int32_t iovcnt = uiop->uio_iovcnt; 533 uioa_page_t *locked = uioap->uioa_locked; 534 dcopy_handle_t channel; 535 int error; 536 537 if (! (uioap->uioa_state & UIOA_ALLOC)) { 538 /* Can only init() a freshly allocated uioa_t */ 539 return (EINVAL); 540 } 541 542 error = dcopy_alloc(DCOPY_NOSLEEP, &channel); 543 if (error == DCOPY_NORESOURCES) { 544 /* Turn off uioa */ 545 uioasync.enabled = B_FALSE; 546 return (ENODEV); 547 } 548 if (error != DCOPY_SUCCESS) { 549 /* Alloc failed */ 550 return (EIO); 551 } 552 553 uioap->uioa_hwst[UIO_DCOPY_CHANNEL] = channel; 554 uioap->uioa_hwst[UIO_DCOPY_CMD] = NULL; 555 556 /* Indicate uioa_t (will be) initialized */ 557 uioap->uioa_state = UIOA_INIT; 558 559 uioap->uioa_mbytes = 0; 560 561 /* uio_t/uioa_t uio_t common struct copy */ 562 *((uio_t *)uioap) = *uiop; 563 564 /* initialize *uiop->uio_iov */ 565 if (iovcnt > UIOA_IOV_MAX) { 566 /* Too big? */ 567 return (E2BIG); 568 } 569 uioap->uio_iov = iov; 570 uioap->uio_iovcnt = iovcnt; 571 572 /* Mark the uioap as such */ 573 uioap->uio_extflg |= UIO_ASYNC; 574 575 /* 576 * For each iovec_t, lock-down the page(s) backing the iovec_t 577 * and save the page_t list for phys addr use in uioamove(). 578 */ 579 iov = uiop->uio_iov; 580 iovcnt = uiop->uio_iovcnt; 581 while (iovcnt > 0) { 582 addr = iov->iov_base; 583 off = (uintptr_t)addr & PAGEOFFSET; 584 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 585 len = iov->iov_len + off; 586 587 /* Lock down page(s) for the iov span */ 588 if ((error = as_pagelock(as, &pages, 589 iov->iov_base, iov->iov_len, S_WRITE)) != 0) { 590 /* Error */ 591 goto cleanup; 592 } 593 594 if (pages == NULL) { 595 /* 596 * Need page_t list, really only need 597 * a pfn list so build one. 598 */ 599 pfn_t *pfnp; 600 int pcnt = len >> PAGESHIFT; 601 602 if (off) 603 pcnt++; 604 if ((pfnp = kmem_alloc(pcnt * sizeof (pfnp), 605 KM_NOSLEEP)) == NULL) { 606 error = ENOMEM; 607 goto cleanup; 608 } 609 locked->uioa_ppp = (void **)pfnp; 610 locked->uioa_pfncnt = pcnt; 611 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 612 while (pcnt-- > 0) { 613 *pfnp++ = hat_getpfnum(as->a_hat, addr); 614 addr += PAGESIZE; 615 } 616 AS_LOCK_EXIT(as, &as->a_lock); 617 } else { 618 /* Have a page_t list, save it */ 619 locked->uioa_ppp = (void **)pages; 620 locked->uioa_pfncnt = 0; 621 } 622 /* Save for as_pageunlock() in uioafini() */ 623 locked->uioa_base = iov->iov_base; 624 locked->uioa_len = iov->iov_len; 625 locked++; 626 627 /* Next iovec_t */ 628 iov++; 629 iovcnt--; 630 } 631 /* Initialize curret pointer into uioa_locked[] and it's uioa_ppp */ 632 uioap->uioa_lcur = uioap->uioa_locked; 633 uioap->uioa_lppp = uioap->uioa_lcur->uioa_ppp; 634 return (0); 635 636 cleanup: 637 /* Unlock any previously locked page_t(s) */ 638 while (locked > uioap->uioa_locked) { 639 locked--; 640 as_pageunlock(as, (page_t **)locked->uioa_ppp, 641 locked->uioa_base, locked->uioa_len, S_WRITE); 642 } 643 644 /* Last indicate uioa_t still in alloc state */ 645 uioap->uioa_state = UIOA_ALLOC; 646 uioap->uioa_mbytes = 0; 647 648 return (error); 649 } 650 651 /* 652 * Finish processing of a uioa_t by cleanup any pending "uioap" actions. 653 */ 654 int 655 uioafini(uio_t *uiop, uioa_t *uioap) 656 { 657 int32_t iovcnt = uiop->uio_iovcnt; 658 uioa_page_t *locked = uioap->uioa_locked; 659 struct as *as = ttoproc(curthread)->p_as; 660 dcopy_handle_t channel; 661 dcopy_cmd_t cmd; 662 int ret = 0; 663 664 ASSERT(uioap->uio_extflg & UIO_ASYNC); 665 666 if (!(uioap->uioa_state & (UIOA_ENABLED|UIOA_FINI))) { 667 /* Must be an active uioa_t */ 668 return (EINVAL); 669 } 670 671 channel = uioap->uioa_hwst[UIO_DCOPY_CHANNEL]; 672 cmd = uioap->uioa_hwst[UIO_DCOPY_CMD]; 673 674 /* XXX - why do we get cmd == NULL sometimes? */ 675 if (cmd != NULL) { 676 if (uioap->uioa_state & UIOA_POLL) { 677 /* Wait for last dcopy() to finish */ 678 int64_t poll = 1; 679 int poll_flag = DCOPY_POLL_NOFLAGS; 680 681 do { 682 if (uioa_maxpoll == 0 || 683 (uioa_maxpoll > 0 && 684 poll >= uioa_maxpoll)) { 685 /* Always block or after maxpoll */ 686 poll_flag = DCOPY_POLL_BLOCK; 687 } else { 688 /* No block, poll */ 689 poll++; 690 } 691 ret = dcopy_cmd_poll(cmd, poll_flag); 692 } while (ret == DCOPY_PENDING); 693 694 if (ret == DCOPY_COMPLETED) { 695 /* Poll/block succeeded */ 696 ret = 0; 697 } else { 698 /* Poll/block failed */ 699 ret = EIO; 700 } 701 } 702 dcopy_cmd_free(&cmd); 703 } 704 705 dcopy_free(&channel); 706 707 /* Unlock all page(s) iovec_t by iovec_t */ 708 while (iovcnt-- > 0) { 709 page_t **pages; 710 711 if (locked->uioa_pfncnt == 0) { 712 /* A as_pagelock() returned (page_t **) */ 713 pages = (page_t **)locked->uioa_ppp; 714 } else { 715 /* Our pfn_t array */ 716 pages = NULL; 717 kmem_free(locked->uioa_ppp, locked->uioa_pfncnt * 718 sizeof (pfn_t *)); 719 } 720 as_pageunlock(as, pages, locked->uioa_base, locked->uioa_len, 721 S_WRITE); 722 723 locked++; 724 } 725 /* uioa_t->uio_t common struct copy */ 726 *uiop = *((uio_t *)uioap); 727 728 /* 729 * Last, reset uioa state to alloc. 730 * 731 * Note, we only initialize the state here, all other members 732 * will be initialized in a subsequent uioainit(). 733 */ 734 uioap->uioa_state = UIOA_ALLOC; 735 uioap->uioa_mbytes = 0; 736 737 uioap->uioa_hwst[UIO_DCOPY_CMD] = NULL; 738 uioap->uioa_hwst[UIO_DCOPY_CHANNEL] = NULL; 739 740 return (ret); 741 } 742