1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 39 #pragma ident "%Z%%M% %I% %E% SMI" 40 41 #include <sys/types.h> 42 #include <sys/sysmacros.h> 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/uio.h> 46 #include <sys/errno.h> 47 #include <sys/vmsystm.h> 48 #include <sys/cmn_err.h> 49 #include <vm/as.h> 50 #include <vm/page.h> 51 52 #include <sys/dcopy.h> 53 54 int64_t uioa_maxpoll = -1; /* <0 = noblock, 0 = block, >0 = block after */ 55 #define UIO_DCOPY_CHANNEL 0 56 #define UIO_DCOPY_CMD 1 57 58 /* 59 * Move "n" bytes at byte address "p"; "rw" indicates the direction 60 * of the move, and the I/O parameters are provided in "uio", which is 61 * update to reflect the data which was moved. Returns 0 on success or 62 * a non-zero errno on failure. 63 */ 64 int 65 uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio) 66 { 67 struct iovec *iov; 68 ulong_t cnt; 69 int error; 70 71 while (n && uio->uio_resid) { 72 iov = uio->uio_iov; 73 cnt = MIN(iov->iov_len, n); 74 if (cnt == 0l) { 75 uio->uio_iov++; 76 uio->uio_iovcnt--; 77 continue; 78 } 79 switch (uio->uio_segflg) { 80 81 case UIO_USERSPACE: 82 case UIO_USERISPACE: 83 if (rw == UIO_READ) { 84 error = xcopyout_nta(p, iov->iov_base, cnt, 85 (uio->uio_extflg & UIO_COPY_CACHED)); 86 } else { 87 error = xcopyin_nta(iov->iov_base, p, cnt, 88 (uio->uio_extflg & UIO_COPY_CACHED)); 89 } 90 91 if (error) 92 return (error); 93 break; 94 95 case UIO_SYSSPACE: 96 if (rw == UIO_READ) 97 error = kcopy_nta(p, iov->iov_base, cnt, 98 (uio->uio_extflg & UIO_COPY_CACHED)); 99 else 100 error = kcopy_nta(iov->iov_base, p, cnt, 101 (uio->uio_extflg & UIO_COPY_CACHED)); 102 if (error) 103 return (error); 104 break; 105 } 106 iov->iov_base += cnt; 107 iov->iov_len -= cnt; 108 uio->uio_resid -= cnt; 109 uio->uio_loffset += cnt; 110 p = (caddr_t)p + cnt; 111 n -= cnt; 112 } 113 return (0); 114 } 115 116 /* 117 * transfer a character value into the address space 118 * delineated by a uio and update fields within the 119 * uio for next character. Return 0 for success, EFAULT 120 * for error. 121 */ 122 int 123 ureadc(int val, struct uio *uiop) 124 { 125 struct iovec *iovp; 126 unsigned char c; 127 128 /* 129 * first determine if uio is valid. uiop should be 130 * non-NULL and the resid count > 0. 131 */ 132 if (!(uiop && uiop->uio_resid > 0)) 133 return (EFAULT); 134 135 /* 136 * scan through iovecs until one is found that is non-empty. 137 * Return EFAULT if none found. 138 */ 139 while (uiop->uio_iovcnt > 0) { 140 iovp = uiop->uio_iov; 141 if (iovp->iov_len <= 0) { 142 uiop->uio_iovcnt--; 143 uiop->uio_iov++; 144 } else 145 break; 146 } 147 148 if (uiop->uio_iovcnt <= 0) 149 return (EFAULT); 150 151 /* 152 * Transfer character to uio space. 153 */ 154 155 c = (unsigned char) (val & 0xFF); 156 157 switch (uiop->uio_segflg) { 158 159 case UIO_USERISPACE: 160 case UIO_USERSPACE: 161 if (copyout(&c, iovp->iov_base, sizeof (unsigned char))) 162 return (EFAULT); 163 break; 164 165 case UIO_SYSSPACE: /* can do direct copy since kernel-kernel */ 166 *iovp->iov_base = c; 167 break; 168 169 default: 170 return (EFAULT); /* invalid segflg value */ 171 } 172 173 /* 174 * bump up/down iovec and uio members to reflect transfer. 175 */ 176 iovp->iov_base++; 177 iovp->iov_len--; 178 uiop->uio_resid--; 179 uiop->uio_loffset++; 180 return (0); /* success */ 181 } 182 183 /* 184 * return a character value from the address space 185 * delineated by a uio and update fields within the 186 * uio for next character. Return the character for success, 187 * -1 for error. 188 */ 189 int 190 uwritec(struct uio *uiop) 191 { 192 struct iovec *iovp; 193 unsigned char c; 194 195 /* 196 * verify we were passed a valid uio structure. 197 * (1) non-NULL uiop, (2) positive resid count 198 * (3) there is an iovec with positive length 199 */ 200 201 if (!(uiop && uiop->uio_resid > 0)) 202 return (-1); 203 204 while (uiop->uio_iovcnt > 0) { 205 iovp = uiop->uio_iov; 206 if (iovp->iov_len <= 0) { 207 uiop->uio_iovcnt--; 208 uiop->uio_iov++; 209 } else 210 break; 211 } 212 213 if (uiop->uio_iovcnt <= 0) 214 return (-1); 215 216 /* 217 * Get the character from the uio address space. 218 */ 219 switch (uiop->uio_segflg) { 220 221 case UIO_USERISPACE: 222 case UIO_USERSPACE: 223 if (copyin(iovp->iov_base, &c, sizeof (unsigned char))) 224 return (-1); 225 break; 226 227 case UIO_SYSSPACE: 228 c = *iovp->iov_base; 229 break; 230 231 default: 232 return (-1); /* invalid segflg */ 233 } 234 235 /* 236 * Adjust fields of iovec and uio appropriately. 237 */ 238 iovp->iov_base++; 239 iovp->iov_len--; 240 uiop->uio_resid--; 241 uiop->uio_loffset++; 242 return ((int)c & 0xFF); /* success */ 243 } 244 245 /* 246 * Drop the next n chars out of *uiop. 247 */ 248 void 249 uioskip(uio_t *uiop, size_t n) 250 { 251 if (n > uiop->uio_resid) 252 return; 253 while (n != 0) { 254 register iovec_t *iovp = uiop->uio_iov; 255 register size_t niovb = MIN(iovp->iov_len, n); 256 257 if (niovb == 0) { 258 uiop->uio_iov++; 259 uiop->uio_iovcnt--; 260 continue; 261 } 262 iovp->iov_base += niovb; 263 uiop->uio_loffset += niovb; 264 iovp->iov_len -= niovb; 265 uiop->uio_resid -= niovb; 266 n -= niovb; 267 } 268 } 269 270 /* 271 * Dup the suio into the duio and diovec of size diov_cnt. If diov 272 * is too small to dup suio then an error will be returned, else 0. 273 */ 274 int 275 uiodup(uio_t *suio, uio_t *duio, iovec_t *diov, int diov_cnt) 276 { 277 int ix; 278 iovec_t *siov = suio->uio_iov; 279 280 *duio = *suio; 281 for (ix = 0; ix < suio->uio_iovcnt; ix++) { 282 diov[ix] = siov[ix]; 283 if (ix >= diov_cnt) 284 return (1); 285 } 286 duio->uio_iov = diov; 287 return (0); 288 } 289 290 /* 291 * Shadow state for checking if a platform has hardware asynchronous 292 * copy capability and minimum copy size, e.g. Intel's I/OAT dma engine, 293 * /dev/dcopy. 294 */ 295 uioasync_t uioasync = {B_TRUE, 1024}; 296 297 /* 298 * Schedule an asynchronous move of "n" bytes at byte address "p", 299 * "rw" indicates the direction of the move, I/O parameters and 300 * async state are provided in "uioa" which is update to reflect 301 * the data which is to be moved. 302 * 303 * Returns 0 on success or a non-zero errno on failure. 304 * 305 * Note, while the uioasync APIs are general purpose in design 306 * the current implementation is Intel I/OAT specific. 307 */ 308 int 309 uioamove(void *p, size_t n, enum uio_rw rw, uioa_t *uioa) 310 { 311 int soff, doff; 312 uint64_t pa; 313 int cnt; 314 iovec_t *iov; 315 dcopy_handle_t channel; 316 dcopy_cmd_t cmd; 317 int ret = 0; 318 int dcopy_flags; 319 320 if (!(uioa->uioa_state & UIOA_ENABLED)) { 321 /* The uioa_t isn't enabled */ 322 return (ENXIO); 323 } 324 325 if (uioa->uio_segflg != UIO_USERSPACE || rw != UIO_READ) { 326 /* Only support to user-land from kernel */ 327 return (ENOTSUP); 328 } 329 330 331 channel = uioa->uioa_hwst[UIO_DCOPY_CHANNEL]; 332 cmd = uioa->uioa_hwst[UIO_DCOPY_CMD]; 333 dcopy_flags = DCOPY_NOSLEEP; 334 335 /* 336 * While source bytes and destination bytes. 337 */ 338 while (n > 0 && uioa->uio_resid > 0) { 339 iov = uioa->uio_iov; 340 if (iov->iov_len == 0l) { 341 uioa->uio_iov++; 342 uioa->uio_iovcnt--; 343 uioa->uioa_lcur++; 344 uioa->uioa_lppp = uioa->uioa_lcur->uioa_ppp; 345 continue; 346 } 347 /* 348 * While source bytes schedule an async 349 * dma for destination page by page. 350 */ 351 while (n > 0) { 352 /* Addr offset in page src/dst */ 353 soff = (uintptr_t)p & PAGEOFFSET; 354 doff = (uintptr_t)iov->iov_base & PAGEOFFSET; 355 /* Min copy count src and dst and page sized */ 356 cnt = MIN(n, iov->iov_len); 357 cnt = MIN(cnt, PAGESIZE - soff); 358 cnt = MIN(cnt, PAGESIZE - doff); 359 /* XXX if next page(s) contiguous could use multipage */ 360 361 /* 362 * if we have an old command, we want to link all 363 * other commands to the next command we alloced so 364 * we only need to track the last command but can 365 * still free them all. 366 */ 367 if (cmd != NULL) { 368 dcopy_flags |= DCOPY_ALLOC_LINK; 369 } 370 ret = dcopy_cmd_alloc(channel, dcopy_flags, &cmd); 371 if (ret != DCOPY_SUCCESS) { 372 /* Error of some sort */ 373 return (EIO); 374 } 375 uioa->uioa_hwst[UIO_DCOPY_CMD] = cmd; 376 377 ASSERT(cmd->dp_version == DCOPY_CMD_V0); 378 if (uioa_maxpoll >= 0) { 379 /* Blocking (>0 may be) used in uioafini() */ 380 cmd->dp_flags = DCOPY_CMD_INTR; 381 } else { 382 /* Non blocking uioafini() so no intr */ 383 cmd->dp_flags = DCOPY_CMD_NOFLAGS; 384 } 385 cmd->dp_cmd = DCOPY_CMD_COPY; 386 pa = ptob((uint64_t)hat_getpfnum(kas.a_hat, p)); 387 cmd->dp.copy.cc_source = pa + soff; 388 if (uioa->uioa_lcur->uioa_pfncnt == 0) { 389 /* Have a (page_t **) */ 390 pa = ptob((uint64_t)( 391 *(page_t **)uioa->uioa_lppp)->p_pagenum); 392 } else { 393 /* Have a (pfn_t *) */ 394 pa = ptob((uint64_t)( 395 *(pfn_t *)uioa->uioa_lppp)); 396 } 397 cmd->dp.copy.cc_dest = pa + doff; 398 cmd->dp.copy.cc_size = cnt; 399 ret = dcopy_cmd_post(cmd); 400 if (ret != DCOPY_SUCCESS) { 401 /* Error of some sort */ 402 return (EIO); 403 } 404 ret = 0; 405 406 /* If UIOA_POLL not set, set it */ 407 if (!(uioa->uioa_state & UIOA_POLL)) 408 uioa->uioa_state |= UIOA_POLL; 409 410 /* Update iov, uio, and local pointers/counters */ 411 iov->iov_base += cnt; 412 iov->iov_len -= cnt; 413 uioa->uio_resid -= cnt; 414 uioa->uio_loffset += cnt; 415 p = (caddr_t)p + cnt; 416 n -= cnt; 417 418 /* End of iovec? */ 419 if (iov->iov_len == 0) { 420 /* Yup, next iovec */ 421 break; 422 } 423 424 /* Next dst addr page? */ 425 if (doff + cnt == PAGESIZE) { 426 /* Yup, next page_t */ 427 uioa->uioa_lppp++; 428 } 429 } 430 } 431 432 return (ret); 433 } 434 435 /* 436 * Initialize a uioa_t for a given uio_t for the current user context, 437 * copy the common uio_t to the uioa_t, walk the shared iovec_t and 438 * lock down the user-land page(s) containing iovec_t data, then mapin 439 * user-land pages using segkpm. 440 */ 441 int 442 uioainit(uio_t *uiop, uioa_t *uioap) 443 { 444 caddr_t addr; 445 page_t **pages; 446 int off; 447 int len; 448 proc_t *procp = ttoproc(curthread); 449 struct as *as = procp->p_as; 450 iovec_t *iov = uiop->uio_iov; 451 int32_t iovcnt = uiop->uio_iovcnt; 452 uioa_page_t *locked = uioap->uioa_locked; 453 dcopy_handle_t channel; 454 int error; 455 456 if (! (uioap->uioa_state & UIOA_ALLOC)) { 457 /* Can only init() a freshly allocated uioa_t */ 458 return (EINVAL); 459 } 460 461 error = dcopy_alloc(DCOPY_NOSLEEP, &channel); 462 if (error == DCOPY_NORESOURCES) { 463 /* Turn off uioa */ 464 uioasync.enabled = B_FALSE; 465 return (ENODEV); 466 } 467 if (error != DCOPY_SUCCESS) { 468 /* Alloc failed */ 469 return (EIO); 470 } 471 472 uioap->uioa_hwst[UIO_DCOPY_CHANNEL] = channel; 473 uioap->uioa_hwst[UIO_DCOPY_CMD] = NULL; 474 475 /* Indicate uioa_t (will be) initialized */ 476 uioap->uioa_state = UIOA_INIT; 477 478 /* uio_t/uioa_t uio_t common struct copy */ 479 *((uio_t *)uioap) = *uiop; 480 481 /* initialize *uiop->uio_iov */ 482 if (iovcnt > UIOA_IOV_MAX) { 483 /* Too big? */ 484 return (E2BIG); 485 } 486 uioap->uio_iov = iov; 487 uioap->uio_iovcnt = iovcnt; 488 489 /* Mark the uioap as such */ 490 uioap->uio_extflg |= UIO_ASYNC; 491 492 /* 493 * For each iovec_t, lock-down the page(s) backing the iovec_t 494 * and save the page_t list for phys addr use in uioamove(). 495 */ 496 iov = uiop->uio_iov; 497 iovcnt = uiop->uio_iovcnt; 498 while (iovcnt > 0) { 499 addr = iov->iov_base; 500 off = (uintptr_t)addr & PAGEOFFSET; 501 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 502 len = iov->iov_len + off; 503 504 /* Lock down page(s) for the iov span */ 505 if ((error = as_pagelock(as, &pages, 506 iov->iov_base, iov->iov_len, S_WRITE)) != 0) { 507 /* Error */ 508 goto cleanup; 509 } 510 511 if (pages == NULL) { 512 /* 513 * Need page_t list, really only need 514 * a pfn list so build one. 515 */ 516 pfn_t *pfnp; 517 int pcnt = len >> PAGESHIFT; 518 519 if (off) 520 pcnt++; 521 if ((pfnp = kmem_alloc(pcnt * sizeof (pfnp), 522 KM_NOSLEEP)) == NULL) { 523 error = ENOMEM; 524 goto cleanup; 525 } 526 locked->uioa_ppp = (void **)pfnp; 527 locked->uioa_pfncnt = pcnt; 528 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 529 while (pcnt-- > 0) { 530 *pfnp++ = hat_getpfnum(as->a_hat, addr); 531 addr += PAGESIZE; 532 } 533 AS_LOCK_EXIT(as, &as->a_lock); 534 } else { 535 /* Have a page_t list, save it */ 536 locked->uioa_ppp = (void **)pages; 537 locked->uioa_pfncnt = 0; 538 } 539 /* Save for as_pageunlock() in uioafini() */ 540 locked->uioa_base = iov->iov_base; 541 locked->uioa_len = iov->iov_len; 542 locked++; 543 544 /* Next iovec_t */ 545 iov++; 546 iovcnt--; 547 } 548 /* Initialize curret pointer into uioa_locked[] and it's uioa_ppp */ 549 uioap->uioa_lcur = uioap->uioa_locked; 550 uioap->uioa_lppp = uioap->uioa_lcur->uioa_ppp; 551 return (0); 552 553 cleanup: 554 /* Unlock any previously locked page_t(s) */ 555 while (locked > uioap->uioa_locked) { 556 locked--; 557 as_pageunlock(as, (page_t **)locked->uioa_ppp, 558 locked->uioa_base, locked->uioa_len, S_WRITE); 559 } 560 561 /* Last indicate uioa_t still in alloc state */ 562 uioap->uioa_state = UIOA_ALLOC; 563 564 return (error); 565 } 566 567 /* 568 * Finish processing of a uioa_t by cleanup any pending "uioap" actions. 569 */ 570 int 571 uioafini(uio_t *uiop, uioa_t *uioap) 572 { 573 int32_t iovcnt = uiop->uio_iovcnt; 574 uioa_page_t *locked = uioap->uioa_locked; 575 struct as *as = ttoproc(curthread)->p_as; 576 dcopy_handle_t channel; 577 dcopy_cmd_t cmd; 578 int ret = 0; 579 580 ASSERT(uioap->uio_extflg & UIO_ASYNC); 581 582 if (!(uioap->uioa_state & (UIOA_ENABLED|UIOA_FINI))) { 583 /* Must be an active uioa_t */ 584 return (EINVAL); 585 } 586 587 channel = uioap->uioa_hwst[UIO_DCOPY_CHANNEL]; 588 cmd = uioap->uioa_hwst[UIO_DCOPY_CMD]; 589 590 /* XXX - why do we get cmd == NULL sometimes? */ 591 if (cmd != NULL) { 592 if (uioap->uioa_state & UIOA_POLL) { 593 /* Wait for last dcopy() to finish */ 594 int64_t poll = 1; 595 int poll_flag = DCOPY_POLL_NOFLAGS; 596 597 do { 598 if (uioa_maxpoll == 0 || 599 (uioa_maxpoll > 0 && 600 poll >= uioa_maxpoll)) { 601 /* Always block or after maxpoll */ 602 poll_flag = DCOPY_POLL_BLOCK; 603 } else { 604 /* No block, poll */ 605 poll++; 606 } 607 ret = dcopy_cmd_poll(cmd, poll_flag); 608 } while (ret == DCOPY_PENDING); 609 610 if (ret == DCOPY_COMPLETED) { 611 /* Poll/block succeeded */ 612 ret = 0; 613 } else { 614 /* Poll/block failed */ 615 ret = EIO; 616 } 617 } 618 dcopy_cmd_free(&cmd); 619 } 620 621 dcopy_free(&channel); 622 623 /* Unlock all page(s) iovec_t by iovec_t */ 624 while (iovcnt-- > 0) { 625 page_t **pages; 626 627 if (locked->uioa_pfncnt == 0) { 628 /* A as_pagelock() returned (page_t **) */ 629 pages = (page_t **)locked->uioa_ppp; 630 } else { 631 /* Our pfn_t array */ 632 pages = NULL; 633 kmem_free(locked->uioa_ppp, locked->uioa_pfncnt); 634 } 635 as_pageunlock(as, pages, locked->uioa_base, locked->uioa_len, 636 S_WRITE); 637 638 locked++; 639 } 640 /* uioa_t->uio_t common struct copy */ 641 *uiop = *((uio_t *)uioap); 642 643 /* 644 * Last, reset uioa state to alloc. 645 * 646 * Note, we only initialize the state here, all other members 647 * will be initialized in a subsequent uioainit(). 648 */ 649 uioap->uioa_state = UIOA_ALLOC; 650 651 uioap->uioa_hwst[UIO_DCOPY_CMD] = NULL; 652 uioap->uioa_hwst[UIO_DCOPY_CHANNEL] = NULL; 653 654 return (ret); 655 } 656