1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 39 #pragma ident "%Z%%M% %I% %E% SMI" 40 41 #include <sys/types.h> 42 #include <sys/sysmacros.h> 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/uio.h> 46 #include <sys/errno.h> 47 #include <sys/vmsystm.h> 48 #include <sys/cmn_err.h> 49 #include <vm/as.h> 50 #include <vm/page.h> 51 52 #include <sys/dcopy.h> 53 54 int64_t uioa_maxpoll = -1; /* <0 = noblock, 0 = block, >0 = block after */ 55 #define UIO_DCOPY_CHANNEL 0 56 #define UIO_DCOPY_CMD 1 57 58 /* 59 * Move "n" bytes at byte address "p"; "rw" indicates the direction 60 * of the move, and the I/O parameters are provided in "uio", which is 61 * update to reflect the data which was moved. Returns 0 on success or 62 * a non-zero errno on failure. 63 */ 64 int 65 uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio) 66 { 67 struct iovec *iov; 68 ulong_t cnt; 69 int error; 70 71 while (n && uio->uio_resid) { 72 iov = uio->uio_iov; 73 cnt = MIN(iov->iov_len, n); 74 if (cnt == 0l) { 75 uio->uio_iov++; 76 uio->uio_iovcnt--; 77 continue; 78 } 79 switch (uio->uio_segflg) { 80 81 case UIO_USERSPACE: 82 case UIO_USERISPACE: 83 if (rw == UIO_READ) { 84 error = xcopyout_nta(p, iov->iov_base, cnt, 85 (uio->uio_extflg & UIO_COPY_CACHED)); 86 } else { 87 error = xcopyin_nta(iov->iov_base, p, cnt, 88 (uio->uio_extflg & UIO_COPY_CACHED)); 89 } 90 91 if (error) 92 return (error); 93 break; 94 95 case UIO_SYSSPACE: 96 if (rw == UIO_READ) 97 error = kcopy_nta(p, iov->iov_base, cnt, 98 (uio->uio_extflg & UIO_COPY_CACHED)); 99 else 100 error = kcopy_nta(iov->iov_base, p, cnt, 101 (uio->uio_extflg & UIO_COPY_CACHED)); 102 if (error) 103 return (error); 104 break; 105 } 106 iov->iov_base += cnt; 107 iov->iov_len -= cnt; 108 uio->uio_resid -= cnt; 109 uio->uio_loffset += cnt; 110 p = (caddr_t)p + cnt; 111 n -= cnt; 112 } 113 return (0); 114 } 115 116 /* 117 * transfer a character value into the address space 118 * delineated by a uio and update fields within the 119 * uio for next character. Return 0 for success, EFAULT 120 * for error. 121 */ 122 int 123 ureadc(int val, struct uio *uiop) 124 { 125 struct iovec *iovp; 126 unsigned char c; 127 128 /* 129 * first determine if uio is valid. uiop should be 130 * non-NULL and the resid count > 0. 131 */ 132 if (!(uiop && uiop->uio_resid > 0)) 133 return (EFAULT); 134 135 /* 136 * scan through iovecs until one is found that is non-empty. 137 * Return EFAULT if none found. 138 */ 139 while (uiop->uio_iovcnt > 0) { 140 iovp = uiop->uio_iov; 141 if (iovp->iov_len <= 0) { 142 uiop->uio_iovcnt--; 143 uiop->uio_iov++; 144 } else 145 break; 146 } 147 148 if (uiop->uio_iovcnt <= 0) 149 return (EFAULT); 150 151 /* 152 * Transfer character to uio space. 153 */ 154 155 c = (unsigned char) (val & 0xFF); 156 157 switch (uiop->uio_segflg) { 158 159 case UIO_USERISPACE: 160 case UIO_USERSPACE: 161 if (copyout(&c, iovp->iov_base, sizeof (unsigned char))) 162 return (EFAULT); 163 break; 164 165 case UIO_SYSSPACE: /* can do direct copy since kernel-kernel */ 166 *iovp->iov_base = c; 167 break; 168 169 default: 170 return (EFAULT); /* invalid segflg value */ 171 } 172 173 /* 174 * bump up/down iovec and uio members to reflect transfer. 175 */ 176 iovp->iov_base++; 177 iovp->iov_len--; 178 uiop->uio_resid--; 179 uiop->uio_loffset++; 180 return (0); /* success */ 181 } 182 183 /* 184 * return a character value from the address space 185 * delineated by a uio and update fields within the 186 * uio for next character. Return the character for success, 187 * -1 for error. 188 */ 189 int 190 uwritec(struct uio *uiop) 191 { 192 struct iovec *iovp; 193 unsigned char c; 194 195 /* 196 * verify we were passed a valid uio structure. 197 * (1) non-NULL uiop, (2) positive resid count 198 * (3) there is an iovec with positive length 199 */ 200 201 if (!(uiop && uiop->uio_resid > 0)) 202 return (-1); 203 204 while (uiop->uio_iovcnt > 0) { 205 iovp = uiop->uio_iov; 206 if (iovp->iov_len <= 0) { 207 uiop->uio_iovcnt--; 208 uiop->uio_iov++; 209 } else 210 break; 211 } 212 213 if (uiop->uio_iovcnt <= 0) 214 return (-1); 215 216 /* 217 * Get the character from the uio address space. 218 */ 219 switch (uiop->uio_segflg) { 220 221 case UIO_USERISPACE: 222 case UIO_USERSPACE: 223 if (copyin(iovp->iov_base, &c, sizeof (unsigned char))) 224 return (-1); 225 break; 226 227 case UIO_SYSSPACE: 228 c = *iovp->iov_base; 229 break; 230 231 default: 232 return (-1); /* invalid segflg */ 233 } 234 235 /* 236 * Adjust fields of iovec and uio appropriately. 237 */ 238 iovp->iov_base++; 239 iovp->iov_len--; 240 uiop->uio_resid--; 241 uiop->uio_loffset++; 242 return ((int)c & 0xFF); /* success */ 243 } 244 245 /* 246 * Drop the next n chars out of *uiop. 247 */ 248 void 249 uioskip(uio_t *uiop, size_t n) 250 { 251 if (n > uiop->uio_resid) 252 return; 253 while (n != 0) { 254 register iovec_t *iovp = uiop->uio_iov; 255 register size_t niovb = MIN(iovp->iov_len, n); 256 257 if (niovb == 0) { 258 uiop->uio_iov++; 259 uiop->uio_iovcnt--; 260 continue; 261 } 262 iovp->iov_base += niovb; 263 uiop->uio_loffset += niovb; 264 iovp->iov_len -= niovb; 265 uiop->uio_resid -= niovb; 266 n -= niovb; 267 } 268 } 269 270 /* 271 * Dup the suio into the duio and diovec of size diov_cnt. If diov 272 * is too small to dup suio then an error will be returned, else 0. 273 */ 274 int 275 uiodup(uio_t *suio, uio_t *duio, iovec_t *diov, int diov_cnt) 276 { 277 int ix; 278 iovec_t *siov = suio->uio_iov; 279 280 *duio = *suio; 281 for (ix = 0; ix < suio->uio_iovcnt; ix++) { 282 diov[ix] = siov[ix]; 283 if (ix >= diov_cnt) 284 return (1); 285 } 286 duio->uio_iov = diov; 287 return (0); 288 } 289 290 /* 291 * Shadow state for checking if a platform has hardware asynchronous 292 * copy capability and minimum copy size, e.g. Intel's I/OAT dma engine, 293 * 294 * Dcopy does a call-back to uioa_dcopy_enable() when a dma device calls 295 * into dcopy to register and uioa_dcopy_disable() when the device calls 296 * into dcopy to unregister. 297 */ 298 uioasync_t uioasync = {B_FALSE, 1024}; 299 300 void 301 uioa_dcopy_enable() 302 { 303 uioasync.enabled = B_TRUE; 304 } 305 306 void 307 uioa_dcopy_disable() 308 { 309 uioasync.enabled = B_FALSE; 310 } 311 312 /* 313 * Schedule an asynchronous move of "n" bytes at byte address "p", 314 * "rw" indicates the direction of the move, I/O parameters and 315 * async state are provided in "uioa" which is update to reflect 316 * the data which is to be moved. 317 * 318 * Returns 0 on success or a non-zero errno on failure. 319 * 320 * Note, while the uioasync APIs are general purpose in design 321 * the current implementation is Intel I/OAT specific. 322 */ 323 int 324 uioamove(void *p, size_t n, enum uio_rw rw, uioa_t *uioa) 325 { 326 int soff, doff; 327 uint64_t pa; 328 int cnt; 329 iovec_t *iov; 330 dcopy_handle_t channel; 331 dcopy_cmd_t cmd; 332 int ret = 0; 333 int dcopy_flags; 334 335 if (!(uioa->uioa_state & UIOA_ENABLED)) { 336 /* The uioa_t isn't enabled */ 337 return (ENXIO); 338 } 339 340 if (uioa->uio_segflg != UIO_USERSPACE || rw != UIO_READ) { 341 /* Only support to user-land from kernel */ 342 return (ENOTSUP); 343 } 344 345 346 channel = uioa->uioa_hwst[UIO_DCOPY_CHANNEL]; 347 cmd = uioa->uioa_hwst[UIO_DCOPY_CMD]; 348 dcopy_flags = DCOPY_NOSLEEP; 349 350 /* 351 * While source bytes and destination bytes. 352 */ 353 while (n > 0 && uioa->uio_resid > 0) { 354 iov = uioa->uio_iov; 355 if (iov->iov_len == 0l) { 356 uioa->uio_iov++; 357 uioa->uio_iovcnt--; 358 uioa->uioa_lcur++; 359 uioa->uioa_lppp = uioa->uioa_lcur->uioa_ppp; 360 continue; 361 } 362 /* 363 * While source bytes schedule an async 364 * dma for destination page by page. 365 */ 366 while (n > 0) { 367 /* Addr offset in page src/dst */ 368 soff = (uintptr_t)p & PAGEOFFSET; 369 doff = (uintptr_t)iov->iov_base & PAGEOFFSET; 370 /* Min copy count src and dst and page sized */ 371 cnt = MIN(n, iov->iov_len); 372 cnt = MIN(cnt, PAGESIZE - soff); 373 cnt = MIN(cnt, PAGESIZE - doff); 374 /* XXX if next page(s) contiguous could use multipage */ 375 376 /* 377 * if we have an old command, we want to link all 378 * other commands to the next command we alloced so 379 * we only need to track the last command but can 380 * still free them all. 381 */ 382 if (cmd != NULL) { 383 dcopy_flags |= DCOPY_ALLOC_LINK; 384 } 385 ret = dcopy_cmd_alloc(channel, dcopy_flags, &cmd); 386 if (ret != DCOPY_SUCCESS) { 387 /* Error of some sort */ 388 return (EIO); 389 } 390 uioa->uioa_hwst[UIO_DCOPY_CMD] = cmd; 391 392 ASSERT(cmd->dp_version == DCOPY_CMD_V0); 393 if (uioa_maxpoll >= 0) { 394 /* Blocking (>0 may be) used in uioafini() */ 395 cmd->dp_flags = DCOPY_CMD_INTR; 396 } else { 397 /* Non blocking uioafini() so no intr */ 398 cmd->dp_flags = DCOPY_CMD_NOFLAGS; 399 } 400 cmd->dp_cmd = DCOPY_CMD_COPY; 401 pa = ptob((uint64_t)hat_getpfnum(kas.a_hat, p)); 402 cmd->dp.copy.cc_source = pa + soff; 403 if (uioa->uioa_lcur->uioa_pfncnt == 0) { 404 /* Have a (page_t **) */ 405 pa = ptob((uint64_t)( 406 *(page_t **)uioa->uioa_lppp)->p_pagenum); 407 } else { 408 /* Have a (pfn_t *) */ 409 pa = ptob((uint64_t)( 410 *(pfn_t *)uioa->uioa_lppp)); 411 } 412 cmd->dp.copy.cc_dest = pa + doff; 413 cmd->dp.copy.cc_size = cnt; 414 ret = dcopy_cmd_post(cmd); 415 if (ret != DCOPY_SUCCESS) { 416 /* Error of some sort */ 417 return (EIO); 418 } 419 ret = 0; 420 421 /* If UIOA_POLL not set, set it */ 422 if (!(uioa->uioa_state & UIOA_POLL)) 423 uioa->uioa_state |= UIOA_POLL; 424 425 /* Update iov, uio, and local pointers/counters */ 426 iov->iov_base += cnt; 427 iov->iov_len -= cnt; 428 uioa->uio_resid -= cnt; 429 uioa->uio_loffset += cnt; 430 p = (caddr_t)p + cnt; 431 n -= cnt; 432 433 /* End of iovec? */ 434 if (iov->iov_len == 0) { 435 /* Yup, next iovec */ 436 break; 437 } 438 439 /* Next dst addr page? */ 440 if (doff + cnt == PAGESIZE) { 441 /* Yup, next page_t */ 442 uioa->uioa_lppp++; 443 } 444 } 445 } 446 447 return (ret); 448 } 449 450 /* 451 * Initialize a uioa_t for a given uio_t for the current user context, 452 * copy the common uio_t to the uioa_t, walk the shared iovec_t and 453 * lock down the user-land page(s) containing iovec_t data, then mapin 454 * user-land pages using segkpm. 455 */ 456 int 457 uioainit(uio_t *uiop, uioa_t *uioap) 458 { 459 caddr_t addr; 460 page_t **pages; 461 int off; 462 int len; 463 proc_t *procp = ttoproc(curthread); 464 struct as *as = procp->p_as; 465 iovec_t *iov = uiop->uio_iov; 466 int32_t iovcnt = uiop->uio_iovcnt; 467 uioa_page_t *locked = uioap->uioa_locked; 468 dcopy_handle_t channel; 469 int error; 470 471 if (! (uioap->uioa_state & UIOA_ALLOC)) { 472 /* Can only init() a freshly allocated uioa_t */ 473 return (EINVAL); 474 } 475 476 error = dcopy_alloc(DCOPY_NOSLEEP, &channel); 477 if (error == DCOPY_NORESOURCES) { 478 /* Turn off uioa */ 479 uioasync.enabled = B_FALSE; 480 return (ENODEV); 481 } 482 if (error != DCOPY_SUCCESS) { 483 /* Alloc failed */ 484 return (EIO); 485 } 486 487 uioap->uioa_hwst[UIO_DCOPY_CHANNEL] = channel; 488 uioap->uioa_hwst[UIO_DCOPY_CMD] = NULL; 489 490 /* Indicate uioa_t (will be) initialized */ 491 uioap->uioa_state = UIOA_INIT; 492 493 /* uio_t/uioa_t uio_t common struct copy */ 494 *((uio_t *)uioap) = *uiop; 495 496 /* initialize *uiop->uio_iov */ 497 if (iovcnt > UIOA_IOV_MAX) { 498 /* Too big? */ 499 return (E2BIG); 500 } 501 uioap->uio_iov = iov; 502 uioap->uio_iovcnt = iovcnt; 503 504 /* Mark the uioap as such */ 505 uioap->uio_extflg |= UIO_ASYNC; 506 507 /* 508 * For each iovec_t, lock-down the page(s) backing the iovec_t 509 * and save the page_t list for phys addr use in uioamove(). 510 */ 511 iov = uiop->uio_iov; 512 iovcnt = uiop->uio_iovcnt; 513 while (iovcnt > 0) { 514 addr = iov->iov_base; 515 off = (uintptr_t)addr & PAGEOFFSET; 516 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 517 len = iov->iov_len + off; 518 519 /* Lock down page(s) for the iov span */ 520 if ((error = as_pagelock(as, &pages, 521 iov->iov_base, iov->iov_len, S_WRITE)) != 0) { 522 /* Error */ 523 goto cleanup; 524 } 525 526 if (pages == NULL) { 527 /* 528 * Need page_t list, really only need 529 * a pfn list so build one. 530 */ 531 pfn_t *pfnp; 532 int pcnt = len >> PAGESHIFT; 533 534 if (off) 535 pcnt++; 536 if ((pfnp = kmem_alloc(pcnt * sizeof (pfnp), 537 KM_NOSLEEP)) == NULL) { 538 error = ENOMEM; 539 goto cleanup; 540 } 541 locked->uioa_ppp = (void **)pfnp; 542 locked->uioa_pfncnt = pcnt; 543 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 544 while (pcnt-- > 0) { 545 *pfnp++ = hat_getpfnum(as->a_hat, addr); 546 addr += PAGESIZE; 547 } 548 AS_LOCK_EXIT(as, &as->a_lock); 549 } else { 550 /* Have a page_t list, save it */ 551 locked->uioa_ppp = (void **)pages; 552 locked->uioa_pfncnt = 0; 553 } 554 /* Save for as_pageunlock() in uioafini() */ 555 locked->uioa_base = iov->iov_base; 556 locked->uioa_len = iov->iov_len; 557 locked++; 558 559 /* Next iovec_t */ 560 iov++; 561 iovcnt--; 562 } 563 /* Initialize curret pointer into uioa_locked[] and it's uioa_ppp */ 564 uioap->uioa_lcur = uioap->uioa_locked; 565 uioap->uioa_lppp = uioap->uioa_lcur->uioa_ppp; 566 return (0); 567 568 cleanup: 569 /* Unlock any previously locked page_t(s) */ 570 while (locked > uioap->uioa_locked) { 571 locked--; 572 as_pageunlock(as, (page_t **)locked->uioa_ppp, 573 locked->uioa_base, locked->uioa_len, S_WRITE); 574 } 575 576 /* Last indicate uioa_t still in alloc state */ 577 uioap->uioa_state = UIOA_ALLOC; 578 579 return (error); 580 } 581 582 /* 583 * Finish processing of a uioa_t by cleanup any pending "uioap" actions. 584 */ 585 int 586 uioafini(uio_t *uiop, uioa_t *uioap) 587 { 588 int32_t iovcnt = uiop->uio_iovcnt; 589 uioa_page_t *locked = uioap->uioa_locked; 590 struct as *as = ttoproc(curthread)->p_as; 591 dcopy_handle_t channel; 592 dcopy_cmd_t cmd; 593 int ret = 0; 594 595 ASSERT(uioap->uio_extflg & UIO_ASYNC); 596 597 if (!(uioap->uioa_state & (UIOA_ENABLED|UIOA_FINI))) { 598 /* Must be an active uioa_t */ 599 return (EINVAL); 600 } 601 602 channel = uioap->uioa_hwst[UIO_DCOPY_CHANNEL]; 603 cmd = uioap->uioa_hwst[UIO_DCOPY_CMD]; 604 605 /* XXX - why do we get cmd == NULL sometimes? */ 606 if (cmd != NULL) { 607 if (uioap->uioa_state & UIOA_POLL) { 608 /* Wait for last dcopy() to finish */ 609 int64_t poll = 1; 610 int poll_flag = DCOPY_POLL_NOFLAGS; 611 612 do { 613 if (uioa_maxpoll == 0 || 614 (uioa_maxpoll > 0 && 615 poll >= uioa_maxpoll)) { 616 /* Always block or after maxpoll */ 617 poll_flag = DCOPY_POLL_BLOCK; 618 } else { 619 /* No block, poll */ 620 poll++; 621 } 622 ret = dcopy_cmd_poll(cmd, poll_flag); 623 } while (ret == DCOPY_PENDING); 624 625 if (ret == DCOPY_COMPLETED) { 626 /* Poll/block succeeded */ 627 ret = 0; 628 } else { 629 /* Poll/block failed */ 630 ret = EIO; 631 } 632 } 633 dcopy_cmd_free(&cmd); 634 } 635 636 dcopy_free(&channel); 637 638 /* Unlock all page(s) iovec_t by iovec_t */ 639 while (iovcnt-- > 0) { 640 page_t **pages; 641 642 if (locked->uioa_pfncnt == 0) { 643 /* A as_pagelock() returned (page_t **) */ 644 pages = (page_t **)locked->uioa_ppp; 645 } else { 646 /* Our pfn_t array */ 647 pages = NULL; 648 kmem_free(locked->uioa_ppp, locked->uioa_pfncnt * 649 sizeof (pfn_t *)); 650 } 651 as_pageunlock(as, pages, locked->uioa_base, locked->uioa_len, 652 S_WRITE); 653 654 locked++; 655 } 656 /* uioa_t->uio_t common struct copy */ 657 *uiop = *((uio_t *)uioap); 658 659 /* 660 * Last, reset uioa state to alloc. 661 * 662 * Note, we only initialize the state here, all other members 663 * will be initialized in a subsequent uioainit(). 664 */ 665 uioap->uioa_state = UIOA_ALLOC; 666 667 uioap->uioa_hwst[UIO_DCOPY_CMD] = NULL; 668 uioap->uioa_hwst[UIO_DCOPY_CHANNEL] = NULL; 669 670 return (ret); 671 } 672