1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 /* 39 * Copyright (c) 2015 by Chunwei Chen. All rights reserved. 40 */ 41 42 #ifdef _KERNEL 43 44 #include <sys/types.h> 45 #include <sys/uio_impl.h> 46 #include <sys/sysmacros.h> 47 #include <sys/string.h> 48 #include <linux/kmap_compat.h> 49 #include <linux/uaccess.h> 50 51 /* 52 * Move "n" bytes at byte address "p"; "rw" indicates the direction 53 * of the move, and the I/O parameters are provided in "uio", which is 54 * update to reflect the data which was moved. Returns 0 on success or 55 * a non-zero errno on failure. 56 */ 57 static int 58 zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) 59 { 60 const struct iovec *iov = uio->uio_iov; 61 size_t skip = uio->uio_skip; 62 ulong_t cnt; 63 64 while (n && uio->uio_resid) { 65 cnt = MIN(iov->iov_len - skip, n); 66 switch (uio->uio_segflg) { 67 case UIO_USERSPACE: 68 /* 69 * p = kernel data pointer 70 * iov->iov_base = user data pointer 71 */ 72 if (rw == UIO_READ) { 73 if (copy_to_user(iov->iov_base+skip, p, cnt)) 74 return (EFAULT); 75 } else { 76 unsigned long b_left = 0; 77 if (uio->uio_fault_disable) { 78 if (!zfs_access_ok(VERIFY_READ, 79 (iov->iov_base + skip), cnt)) { 80 return (EFAULT); 81 } 82 pagefault_disable(); 83 b_left = 84 __copy_from_user_inatomic(p, 85 (iov->iov_base + skip), cnt); 86 pagefault_enable(); 87 } else { 88 b_left = 89 copy_from_user(p, 90 (iov->iov_base + skip), cnt); 91 } 92 if (b_left > 0) { 93 unsigned long c_bytes = 94 cnt - b_left; 95 uio->uio_skip += c_bytes; 96 ASSERT3U(uio->uio_skip, <, 97 iov->iov_len); 98 uio->uio_resid -= c_bytes; 99 uio->uio_loffset += c_bytes; 100 return (EFAULT); 101 } 102 } 103 break; 104 case UIO_SYSSPACE: 105 if (rw == UIO_READ) 106 memcpy(iov->iov_base + skip, p, cnt); 107 else 108 memcpy(p, iov->iov_base + skip, cnt); 109 break; 110 default: 111 ASSERT(0); 112 } 113 skip += cnt; 114 if (skip == iov->iov_len) { 115 skip = 0; 116 uio->uio_iov = (++iov); 117 uio->uio_iovcnt--; 118 } 119 uio->uio_skip = skip; 120 uio->uio_resid -= cnt; 121 uio->uio_loffset += cnt; 122 p = (caddr_t)p + cnt; 123 n -= cnt; 124 } 125 return (0); 126 } 127 128 static int 129 zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) 130 { 131 const struct bio_vec *bv = uio->uio_bvec; 132 size_t skip = uio->uio_skip; 133 ulong_t cnt; 134 135 while (n && uio->uio_resid) { 136 void *paddr; 137 cnt = MIN(bv->bv_len - skip, n); 138 139 paddr = zfs_kmap_atomic(bv->bv_page); 140 if (rw == UIO_READ) { 141 /* Copy from buffer 'p' to the bvec data */ 142 memcpy(paddr + bv->bv_offset + skip, p, cnt); 143 } else { 144 /* Copy from bvec data to buffer 'p' */ 145 memcpy(p, paddr + bv->bv_offset + skip, cnt); 146 } 147 zfs_kunmap_atomic(paddr); 148 149 skip += cnt; 150 if (skip == bv->bv_len) { 151 skip = 0; 152 uio->uio_bvec = (++bv); 153 uio->uio_iovcnt--; 154 } 155 uio->uio_skip = skip; 156 uio->uio_resid -= cnt; 157 uio->uio_loffset += cnt; 158 p = (caddr_t)p + cnt; 159 n -= cnt; 160 } 161 return (0); 162 } 163 164 #ifdef HAVE_BLK_MQ 165 static void 166 zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw, 167 struct bio_vec *bv) 168 { 169 void *paddr; 170 171 paddr = zfs_kmap_atomic(bv->bv_page); 172 if (rw == UIO_READ) { 173 /* Copy from buffer 'p' to the bvec data */ 174 memcpy(paddr + bv->bv_offset + skip, p, cnt); 175 } else { 176 /* Copy from bvec data to buffer 'p' */ 177 memcpy(p, paddr + bv->bv_offset + skip, cnt); 178 } 179 zfs_kunmap_atomic(paddr); 180 } 181 182 /* 183 * Copy 'n' bytes of data between the buffer p[] and the data represented 184 * by the request in the uio. 185 */ 186 static int 187 zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) 188 { 189 struct request *rq = uio->rq; 190 struct bio_vec bv; 191 struct req_iterator iter; 192 size_t this_seg_start; /* logical offset */ 193 size_t this_seg_end; /* logical offset */ 194 size_t skip_in_seg; 195 size_t copy_from_seg; 196 size_t orig_loffset; 197 int copied = 0; 198 199 /* 200 * Get the original logical offset of this entire request (because 201 * uio->uio_loffset will be modified over time). 202 */ 203 orig_loffset = io_offset(NULL, rq); 204 this_seg_start = orig_loffset; 205 206 rq_for_each_segment(bv, rq, iter) { 207 if (uio->iter.bio) { 208 /* 209 * If uio->iter.bio is present, then we know we've saved 210 * uio->iter from a previous call to this function, and 211 * we can skip ahead in this rq_for_each_segment() loop 212 * to where we last left off. That way, we don't need 213 * to iterate over tons of segments we've already 214 * processed - we can just restore the "saved state". 215 */ 216 iter = uio->iter; 217 bv = uio->bv; 218 this_seg_start = uio->uio_loffset; 219 memset(&uio->iter, 0, sizeof (uio->iter)); 220 continue; 221 } 222 223 /* 224 * Lookup what the logical offset of the last byte of this 225 * segment is. 226 */ 227 this_seg_end = this_seg_start + bv.bv_len - 1; 228 229 /* 230 * We only need to operate on segments that have data we're 231 * copying. 232 */ 233 if (uio->uio_loffset >= this_seg_start && 234 uio->uio_loffset <= this_seg_end) { 235 /* 236 * Some, or all, of the data in this segment needs to be 237 * copied. 238 */ 239 240 /* 241 * We may be not be copying from the first byte in the 242 * segment. Figure out how many bytes to skip copying 243 * from the beginning of this segment. 244 */ 245 skip_in_seg = uio->uio_loffset - this_seg_start; 246 247 /* 248 * Calculate the total number of bytes from this 249 * segment that we will be copying. 250 */ 251 copy_from_seg = MIN(bv.bv_len - skip_in_seg, n); 252 253 /* Copy the bytes */ 254 zfs_copy_bvec(p, skip_in_seg, copy_from_seg, rw, &bv); 255 p = ((char *)p) + copy_from_seg; 256 257 n -= copy_from_seg; 258 uio->uio_resid -= copy_from_seg; 259 uio->uio_loffset += copy_from_seg; 260 copied = 1; /* We copied some data */ 261 } 262 263 if (n == 0) { 264 /* 265 * All done copying. Save our 'iter' value to the uio. 266 * This allows us to "save our state" and skip ahead in 267 * the rq_for_each_segment() loop the next time we call 268 * call zfs_uiomove_bvec_rq() on this uio (which we 269 * will be doing for any remaining data in the uio). 270 */ 271 uio->iter = iter; /* make a copy of the struct data */ 272 uio->bv = bv; 273 return (0); 274 } 275 276 this_seg_start = this_seg_end + 1; 277 } 278 279 if (!copied) { 280 /* Didn't copy anything */ 281 uio->uio_resid = 0; 282 } 283 return (0); 284 } 285 #endif 286 287 static int 288 zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) 289 { 290 #ifdef HAVE_BLK_MQ 291 if (uio->rq != NULL) 292 return (zfs_uiomove_bvec_rq(p, n, rw, uio)); 293 #else 294 ASSERT3P(uio->rq, ==, NULL); 295 #endif 296 return (zfs_uiomove_bvec_impl(p, n, rw, uio)); 297 } 298 299 #if defined(HAVE_VFS_IOV_ITER) 300 static int 301 zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, 302 boolean_t revert) 303 { 304 size_t cnt = MIN(n, uio->uio_resid); 305 306 if (uio->uio_skip) 307 iov_iter_advance(uio->uio_iter, uio->uio_skip); 308 309 if (rw == UIO_READ) 310 cnt = copy_to_iter(p, cnt, uio->uio_iter); 311 else 312 cnt = copy_from_iter(p, cnt, uio->uio_iter); 313 314 /* 315 * When operating on a full pipe no bytes are processed. 316 * In which case return EFAULT which is converted to EAGAIN 317 * by the kernel's generic_file_splice_read() function. 318 */ 319 if (cnt == 0) 320 return (EFAULT); 321 322 /* 323 * Revert advancing the uio_iter. This is set by zfs_uiocopy() 324 * to avoid consuming the uio and its iov_iter structure. 325 */ 326 if (revert) 327 iov_iter_revert(uio->uio_iter, cnt); 328 329 uio->uio_resid -= cnt; 330 uio->uio_loffset += cnt; 331 332 return (0); 333 } 334 #endif 335 336 int 337 zfs_uiomove(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) 338 { 339 if (uio->uio_segflg == UIO_BVEC) 340 return (zfs_uiomove_bvec(p, n, rw, uio)); 341 #if defined(HAVE_VFS_IOV_ITER) 342 else if (uio->uio_segflg == UIO_ITER) 343 return (zfs_uiomove_iter(p, n, rw, uio, B_FALSE)); 344 #endif 345 else 346 return (zfs_uiomove_iov(p, n, rw, uio)); 347 } 348 EXPORT_SYMBOL(zfs_uiomove); 349 350 /* 351 * Fault in the pages of the first n bytes specified by the uio structure. 352 * 1 byte in each page is touched and the uio struct is unmodified. Any 353 * error will terminate the process as this is only a best attempt to get 354 * the pages resident. 355 */ 356 int 357 zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio) 358 { 359 if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC) { 360 /* There's never a need to fault in kernel pages */ 361 return (0); 362 #if defined(HAVE_VFS_IOV_ITER) 363 } else if (uio->uio_segflg == UIO_ITER) { 364 /* 365 * At least a Linux 4.9 kernel, iov_iter_fault_in_readable() 366 * can be relied on to fault in user pages when referenced. 367 */ 368 if (iov_iter_fault_in_readable(uio->uio_iter, n)) 369 return (EFAULT); 370 #endif 371 } else { 372 /* Fault in all user pages */ 373 ASSERT3S(uio->uio_segflg, ==, UIO_USERSPACE); 374 const struct iovec *iov = uio->uio_iov; 375 int iovcnt = uio->uio_iovcnt; 376 size_t skip = uio->uio_skip; 377 uint8_t tmp; 378 caddr_t p; 379 380 for (; n > 0 && iovcnt > 0; iov++, iovcnt--, skip = 0) { 381 ulong_t cnt = MIN(iov->iov_len - skip, n); 382 /* empty iov */ 383 if (cnt == 0) 384 continue; 385 n -= cnt; 386 /* touch each page in this segment. */ 387 p = iov->iov_base + skip; 388 while (cnt) { 389 if (copy_from_user(&tmp, p, 1)) 390 return (EFAULT); 391 ulong_t incr = MIN(cnt, PAGESIZE); 392 p += incr; 393 cnt -= incr; 394 } 395 /* touch the last byte in case it straddles a page. */ 396 p--; 397 if (copy_from_user(&tmp, p, 1)) 398 return (EFAULT); 399 } 400 } 401 402 return (0); 403 } 404 EXPORT_SYMBOL(zfs_uio_prefaultpages); 405 406 /* 407 * The same as zfs_uiomove() but doesn't modify uio structure. 408 * return in cbytes how many bytes were copied. 409 */ 410 int 411 zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes) 412 { 413 zfs_uio_t uio_copy; 414 int ret; 415 416 memcpy(&uio_copy, uio, sizeof (zfs_uio_t)); 417 418 if (uio->uio_segflg == UIO_BVEC) 419 ret = zfs_uiomove_bvec(p, n, rw, &uio_copy); 420 #if defined(HAVE_VFS_IOV_ITER) 421 else if (uio->uio_segflg == UIO_ITER) 422 ret = zfs_uiomove_iter(p, n, rw, &uio_copy, B_TRUE); 423 #endif 424 else 425 ret = zfs_uiomove_iov(p, n, rw, &uio_copy); 426 427 *cbytes = uio->uio_resid - uio_copy.uio_resid; 428 429 return (ret); 430 } 431 EXPORT_SYMBOL(zfs_uiocopy); 432 433 /* 434 * Drop the next n chars out of *uio. 435 */ 436 void 437 zfs_uioskip(zfs_uio_t *uio, size_t n) 438 { 439 if (n > uio->uio_resid) 440 return; 441 /* 442 * When using a uio with a struct request, we simply 443 * use uio_loffset as a pointer to the next logical byte to 444 * copy in the request. We don't have to do any fancy 445 * accounting with uio_bvec/uio_iovcnt since we don't use 446 * them. 447 */ 448 if (uio->uio_segflg == UIO_BVEC && uio->rq == NULL) { 449 uio->uio_skip += n; 450 while (uio->uio_iovcnt && 451 uio->uio_skip >= uio->uio_bvec->bv_len) { 452 uio->uio_skip -= uio->uio_bvec->bv_len; 453 uio->uio_bvec++; 454 uio->uio_iovcnt--; 455 } 456 #if defined(HAVE_VFS_IOV_ITER) 457 } else if (uio->uio_segflg == UIO_ITER) { 458 iov_iter_advance(uio->uio_iter, n); 459 #endif 460 } else { 461 uio->uio_skip += n; 462 while (uio->uio_iovcnt && 463 uio->uio_skip >= uio->uio_iov->iov_len) { 464 uio->uio_skip -= uio->uio_iov->iov_len; 465 uio->uio_iov++; 466 uio->uio_iovcnt--; 467 } 468 } 469 uio->uio_loffset += n; 470 uio->uio_resid -= n; 471 } 472 EXPORT_SYMBOL(zfs_uioskip); 473 474 #endif /* _KERNEL */ 475