1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 /* 39 * Copyright (c) 2015 by Chunwei Chen. All rights reserved. 40 */ 41 42 #ifdef _KERNEL 43 44 #include <sys/types.h> 45 #include <sys/uio_impl.h> 46 #include <sys/sysmacros.h> 47 #include <sys/string.h> 48 #include <linux/kmap_compat.h> 49 #include <linux/uaccess.h> 50 51 /* 52 * Move "n" bytes at byte address "p"; "rw" indicates the direction 53 * of the move, and the I/O parameters are provided in "uio", which is 54 * update to reflect the data which was moved. Returns 0 on success or 55 * a non-zero errno on failure. 56 */ 57 static int 58 zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) 59 { 60 const struct iovec *iov = uio->uio_iov; 61 size_t skip = uio->uio_skip; 62 ulong_t cnt; 63 64 while (n && uio->uio_resid) { 65 cnt = MIN(iov->iov_len - skip, n); 66 switch (uio->uio_segflg) { 67 case UIO_USERSPACE: 68 /* 69 * p = kernel data pointer 70 * iov->iov_base = user data pointer 71 */ 72 if (rw == UIO_READ) { 73 if (copy_to_user(iov->iov_base+skip, p, cnt)) 74 return (EFAULT); 75 } else { 76 unsigned long b_left = 0; 77 if (uio->uio_fault_disable) { 78 if (!zfs_access_ok(VERIFY_READ, 79 (iov->iov_base + skip), cnt)) { 80 return (EFAULT); 81 } 82 pagefault_disable(); 83 b_left = 84 __copy_from_user_inatomic(p, 85 (iov->iov_base + skip), cnt); 86 pagefault_enable(); 87 } else { 88 b_left = 89 copy_from_user(p, 90 (iov->iov_base + skip), cnt); 91 } 92 if (b_left > 0) { 93 unsigned long c_bytes = 94 cnt - b_left; 95 uio->uio_skip += c_bytes; 96 ASSERT3U(uio->uio_skip, <, 97 iov->iov_len); 98 uio->uio_resid -= c_bytes; 99 uio->uio_loffset += c_bytes; 100 return (EFAULT); 101 } 102 } 103 break; 104 case UIO_SYSSPACE: 105 if (rw == UIO_READ) 106 memcpy(iov->iov_base + skip, p, cnt); 107 else 108 memcpy(p, iov->iov_base + skip, cnt); 109 break; 110 default: 111 ASSERT(0); 112 } 113 skip += cnt; 114 if (skip == iov->iov_len) { 115 skip = 0; 116 uio->uio_iov = (++iov); 117 uio->uio_iovcnt--; 118 } 119 uio->uio_skip = skip; 120 uio->uio_resid -= cnt; 121 uio->uio_loffset += cnt; 122 p = (caddr_t)p + cnt; 123 n -= cnt; 124 } 125 return (0); 126 } 127 128 static int 129 zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) 130 { 131 const struct bio_vec *bv = uio->uio_bvec; 132 size_t skip = uio->uio_skip; 133 ulong_t cnt; 134 135 while (n && uio->uio_resid) { 136 void *paddr; 137 cnt = MIN(bv->bv_len - skip, n); 138 139 paddr = zfs_kmap_local(bv->bv_page); 140 if (rw == UIO_READ) { 141 /* Copy from buffer 'p' to the bvec data */ 142 memcpy(paddr + bv->bv_offset + skip, p, cnt); 143 } else { 144 /* Copy from bvec data to buffer 'p' */ 145 memcpy(p, paddr + bv->bv_offset + skip, cnt); 146 } 147 zfs_kunmap_local(paddr); 148 149 skip += cnt; 150 if (skip == bv->bv_len) { 151 skip = 0; 152 uio->uio_bvec = (++bv); 153 uio->uio_iovcnt--; 154 } 155 uio->uio_skip = skip; 156 uio->uio_resid -= cnt; 157 uio->uio_loffset += cnt; 158 p = (caddr_t)p + cnt; 159 n -= cnt; 160 } 161 return (0); 162 } 163 164 #ifdef HAVE_BLK_MQ 165 static void 166 zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw, 167 struct bio_vec *bv) 168 { 169 void *paddr; 170 171 paddr = zfs_kmap_local(bv->bv_page); 172 if (rw == UIO_READ) { 173 /* Copy from buffer 'p' to the bvec data */ 174 memcpy(paddr + bv->bv_offset + skip, p, cnt); 175 } else { 176 /* Copy from bvec data to buffer 'p' */ 177 memcpy(p, paddr + bv->bv_offset + skip, cnt); 178 } 179 zfs_kunmap_local(paddr); 180 } 181 182 /* 183 * Copy 'n' bytes of data between the buffer p[] and the data represented 184 * by the request in the uio. 185 */ 186 static int 187 zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) 188 { 189 struct request *rq = uio->rq; 190 struct bio_vec bv; 191 struct req_iterator iter; 192 size_t this_seg_start; /* logical offset */ 193 size_t this_seg_end; /* logical offset */ 194 size_t skip_in_seg; 195 size_t copy_from_seg; 196 size_t orig_loffset; 197 int copied = 0; 198 199 /* 200 * Get the original logical offset of this entire request (because 201 * uio->uio_loffset will be modified over time). 202 */ 203 orig_loffset = io_offset(NULL, rq); 204 this_seg_start = orig_loffset; 205 206 rq_for_each_segment(bv, rq, iter) { 207 /* 208 * Lookup what the logical offset of the last byte of this 209 * segment is. 210 */ 211 this_seg_end = this_seg_start + bv.bv_len - 1; 212 213 /* 214 * We only need to operate on segments that have data we're 215 * copying. 216 */ 217 if (uio->uio_loffset >= this_seg_start && 218 uio->uio_loffset <= this_seg_end) { 219 /* 220 * Some, or all, of the data in this segment needs to be 221 * copied. 222 */ 223 224 /* 225 * We may be not be copying from the first byte in the 226 * segment. Figure out how many bytes to skip copying 227 * from the beginning of this segment. 228 */ 229 skip_in_seg = uio->uio_loffset - this_seg_start; 230 231 /* 232 * Calculate the total number of bytes from this 233 * segment that we will be copying. 234 */ 235 copy_from_seg = MIN(bv.bv_len - skip_in_seg, n); 236 237 /* Copy the bytes */ 238 zfs_copy_bvec(p, skip_in_seg, copy_from_seg, rw, &bv); 239 p = ((char *)p) + copy_from_seg; 240 241 n -= copy_from_seg; 242 uio->uio_resid -= copy_from_seg; 243 uio->uio_loffset += copy_from_seg; 244 copied = 1; /* We copied some data */ 245 } 246 247 this_seg_start = this_seg_end + 1; 248 } 249 250 if (!copied) { 251 /* Didn't copy anything */ 252 uio->uio_resid = 0; 253 } 254 return (0); 255 } 256 #endif 257 258 static int 259 zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) 260 { 261 #ifdef HAVE_BLK_MQ 262 if (uio->rq != NULL) 263 return (zfs_uiomove_bvec_rq(p, n, rw, uio)); 264 #else 265 ASSERT3P(uio->rq, ==, NULL); 266 #endif 267 return (zfs_uiomove_bvec_impl(p, n, rw, uio)); 268 } 269 270 #if defined(HAVE_VFS_IOV_ITER) 271 static int 272 zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, 273 boolean_t revert) 274 { 275 size_t cnt = MIN(n, uio->uio_resid); 276 277 if (uio->uio_skip) 278 iov_iter_advance(uio->uio_iter, uio->uio_skip); 279 280 if (rw == UIO_READ) 281 cnt = copy_to_iter(p, cnt, uio->uio_iter); 282 else 283 cnt = copy_from_iter(p, cnt, uio->uio_iter); 284 285 /* 286 * When operating on a full pipe no bytes are processed. 287 * In which case return EFAULT which is converted to EAGAIN 288 * by the kernel's generic_file_splice_read() function. 289 */ 290 if (cnt == 0) 291 return (EFAULT); 292 293 /* 294 * Revert advancing the uio_iter. This is set by zfs_uiocopy() 295 * to avoid consuming the uio and its iov_iter structure. 296 */ 297 if (revert) 298 iov_iter_revert(uio->uio_iter, cnt); 299 300 uio->uio_resid -= cnt; 301 uio->uio_loffset += cnt; 302 303 return (0); 304 } 305 #endif 306 307 int 308 zfs_uiomove(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) 309 { 310 if (uio->uio_segflg == UIO_BVEC) 311 return (zfs_uiomove_bvec(p, n, rw, uio)); 312 #if defined(HAVE_VFS_IOV_ITER) 313 else if (uio->uio_segflg == UIO_ITER) 314 return (zfs_uiomove_iter(p, n, rw, uio, B_FALSE)); 315 #endif 316 else 317 return (zfs_uiomove_iov(p, n, rw, uio)); 318 } 319 EXPORT_SYMBOL(zfs_uiomove); 320 321 /* 322 * Fault in the pages of the first n bytes specified by the uio structure. 323 * 1 byte in each page is touched and the uio struct is unmodified. Any 324 * error will terminate the process as this is only a best attempt to get 325 * the pages resident. 326 */ 327 int 328 zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio) 329 { 330 if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC) { 331 /* There's never a need to fault in kernel pages */ 332 return (0); 333 #if defined(HAVE_VFS_IOV_ITER) 334 } else if (uio->uio_segflg == UIO_ITER) { 335 /* 336 * At least a Linux 4.9 kernel, iov_iter_fault_in_readable() 337 * can be relied on to fault in user pages when referenced. 338 */ 339 if (iov_iter_fault_in_readable(uio->uio_iter, n)) 340 return (EFAULT); 341 #endif 342 } else { 343 /* Fault in all user pages */ 344 ASSERT3S(uio->uio_segflg, ==, UIO_USERSPACE); 345 const struct iovec *iov = uio->uio_iov; 346 int iovcnt = uio->uio_iovcnt; 347 size_t skip = uio->uio_skip; 348 uint8_t tmp; 349 caddr_t p; 350 351 for (; n > 0 && iovcnt > 0; iov++, iovcnt--, skip = 0) { 352 ulong_t cnt = MIN(iov->iov_len - skip, n); 353 /* empty iov */ 354 if (cnt == 0) 355 continue; 356 n -= cnt; 357 /* touch each page in this segment. */ 358 p = iov->iov_base + skip; 359 while (cnt) { 360 if (copy_from_user(&tmp, p, 1)) 361 return (EFAULT); 362 ulong_t incr = MIN(cnt, PAGESIZE); 363 p += incr; 364 cnt -= incr; 365 } 366 /* touch the last byte in case it straddles a page. */ 367 p--; 368 if (copy_from_user(&tmp, p, 1)) 369 return (EFAULT); 370 } 371 } 372 373 return (0); 374 } 375 EXPORT_SYMBOL(zfs_uio_prefaultpages); 376 377 /* 378 * The same as zfs_uiomove() but doesn't modify uio structure. 379 * return in cbytes how many bytes were copied. 380 */ 381 int 382 zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes) 383 { 384 zfs_uio_t uio_copy; 385 int ret; 386 387 memcpy(&uio_copy, uio, sizeof (zfs_uio_t)); 388 389 if (uio->uio_segflg == UIO_BVEC) 390 ret = zfs_uiomove_bvec(p, n, rw, &uio_copy); 391 #if defined(HAVE_VFS_IOV_ITER) 392 else if (uio->uio_segflg == UIO_ITER) 393 ret = zfs_uiomove_iter(p, n, rw, &uio_copy, B_TRUE); 394 #endif 395 else 396 ret = zfs_uiomove_iov(p, n, rw, &uio_copy); 397 398 *cbytes = uio->uio_resid - uio_copy.uio_resid; 399 400 return (ret); 401 } 402 EXPORT_SYMBOL(zfs_uiocopy); 403 404 /* 405 * Drop the next n chars out of *uio. 406 */ 407 void 408 zfs_uioskip(zfs_uio_t *uio, size_t n) 409 { 410 if (n > uio->uio_resid) 411 return; 412 /* 413 * When using a uio with a struct request, we simply 414 * use uio_loffset as a pointer to the next logical byte to 415 * copy in the request. We don't have to do any fancy 416 * accounting with uio_bvec/uio_iovcnt since we don't use 417 * them. 418 */ 419 if (uio->uio_segflg == UIO_BVEC && uio->rq == NULL) { 420 uio->uio_skip += n; 421 while (uio->uio_iovcnt && 422 uio->uio_skip >= uio->uio_bvec->bv_len) { 423 uio->uio_skip -= uio->uio_bvec->bv_len; 424 uio->uio_bvec++; 425 uio->uio_iovcnt--; 426 } 427 #if defined(HAVE_VFS_IOV_ITER) 428 } else if (uio->uio_segflg == UIO_ITER) { 429 iov_iter_advance(uio->uio_iter, n); 430 #endif 431 } else { 432 uio->uio_skip += n; 433 while (uio->uio_iovcnt && 434 uio->uio_skip >= uio->uio_iov->iov_len) { 435 uio->uio_skip -= uio->uio_iov->iov_len; 436 uio->uio_iov++; 437 uio->uio_iovcnt--; 438 } 439 } 440 uio->uio_loffset += n; 441 uio->uio_resid -= n; 442 } 443 EXPORT_SYMBOL(zfs_uioskip); 444 445 #endif /* _KERNEL */ 446