1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* I/O iterator iteration building functions. 3 * 4 * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #ifndef _LINUX_IOV_ITER_H 9 #define _LINUX_IOV_ITER_H 10 11 #include <linux/uio.h> 12 #include <linux/bvec.h> 13 #include <linux/folio_queue.h> 14 15 typedef size_t (*iov_step_f)(void *iter_base, size_t progress, size_t len, 16 void *priv, void *priv2); 17 typedef size_t (*iov_ustep_f)(void __user *iter_base, size_t progress, size_t len, 18 void *priv, void *priv2); 19 20 /* 21 * Handle ITER_UBUF. 22 */ 23 static __always_inline 24 size_t iterate_ubuf(struct iov_iter *iter, size_t len, void *priv, void *priv2, 25 iov_ustep_f step) 26 { 27 void __user *base = iter->ubuf; 28 size_t progress = 0, remain; 29 30 remain = step(base + iter->iov_offset, 0, len, priv, priv2); 31 progress = len - remain; 32 iter->iov_offset += progress; 33 iter->count -= progress; 34 return progress; 35 } 36 37 /* 38 * Handle ITER_IOVEC. 39 */ 40 static __always_inline 41 size_t iterate_iovec(struct iov_iter *iter, size_t len, void *priv, void *priv2, 42 iov_ustep_f step) 43 { 44 const struct iovec *p = iter->__iov; 45 size_t progress = 0, skip = iter->iov_offset; 46 47 do { 48 size_t remain, consumed; 49 size_t part = min(len, p->iov_len - skip); 50 51 if (likely(part)) { 52 remain = step(p->iov_base + skip, progress, part, priv, priv2); 53 consumed = part - remain; 54 progress += consumed; 55 skip += consumed; 56 len -= consumed; 57 if (skip < p->iov_len) 58 break; 59 } 60 p++; 61 skip = 0; 62 } while (len); 63 64 iter->nr_segs -= p - iter->__iov; 65 iter->__iov = p; 66 iter->iov_offset = skip; 67 iter->count -= progress; 68 return progress; 69 } 70 71 /* 72 * Handle ITER_KVEC. 73 */ 74 static __always_inline 75 size_t iterate_kvec(struct iov_iter *iter, size_t len, void *priv, void *priv2, 76 iov_step_f step) 77 { 78 const struct kvec *p = iter->kvec; 79 size_t progress = 0, skip = iter->iov_offset; 80 81 do { 82 size_t remain, consumed; 83 size_t part = min(len, p->iov_len - skip); 84 85 if (likely(part)) { 86 remain = step(p->iov_base + skip, progress, part, priv, priv2); 87 consumed = part - remain; 88 progress += consumed; 89 skip += consumed; 90 len -= consumed; 91 if (skip < p->iov_len) 92 break; 93 } 94 p++; 95 skip = 0; 96 } while (len); 97 98 iter->nr_segs -= p - iter->kvec; 99 iter->kvec = p; 100 iter->iov_offset = skip; 101 iter->count -= progress; 102 return progress; 103 } 104 105 /* 106 * Handle ITER_BVEC. 107 */ 108 static __always_inline 109 size_t iterate_bvec(struct iov_iter *iter, size_t len, void *priv, void *priv2, 110 iov_step_f step) 111 { 112 const struct bio_vec *p = iter->bvec; 113 size_t progress = 0, skip = iter->iov_offset; 114 115 do { 116 size_t remain, consumed; 117 size_t offset = p->bv_offset + skip, part; 118 void *kaddr = kmap_local_page(p->bv_page + offset / PAGE_SIZE); 119 120 part = min3(len, 121 (size_t)(p->bv_len - skip), 122 (size_t)(PAGE_SIZE - offset % PAGE_SIZE)); 123 remain = step(kaddr + offset % PAGE_SIZE, progress, part, priv, priv2); 124 kunmap_local(kaddr); 125 consumed = part - remain; 126 len -= consumed; 127 progress += consumed; 128 skip += consumed; 129 if (skip >= p->bv_len) { 130 skip = 0; 131 p++; 132 } 133 if (remain) 134 break; 135 } while (len); 136 137 iter->nr_segs -= p - iter->bvec; 138 iter->bvec = p; 139 iter->iov_offset = skip; 140 iter->count -= progress; 141 return progress; 142 } 143 144 /* 145 * Handle ITER_FOLIOQ. 146 */ 147 static __always_inline 148 size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2, 149 iov_step_f step) 150 { 151 const struct folio_queue *folioq = iter->folioq; 152 unsigned int slot = iter->folioq_slot; 153 size_t progress = 0, skip = iter->iov_offset; 154 155 if (slot == folioq_nr_slots(folioq)) { 156 /* The iterator may have been extended. */ 157 folioq = folioq->next; 158 slot = 0; 159 } 160 161 do { 162 struct folio *folio = folioq_folio(folioq, slot); 163 size_t part, remain = 0, consumed; 164 size_t fsize; 165 void *base; 166 167 if (!folio) 168 break; 169 170 fsize = folioq_folio_size(folioq, slot); 171 if (skip < fsize) { 172 base = kmap_local_folio(folio, skip); 173 part = umin(len, PAGE_SIZE - skip % PAGE_SIZE); 174 remain = step(base, progress, part, priv, priv2); 175 kunmap_local(base); 176 consumed = part - remain; 177 len -= consumed; 178 progress += consumed; 179 skip += consumed; 180 } 181 if (skip >= fsize) { 182 skip = 0; 183 slot++; 184 if (slot == folioq_nr_slots(folioq) && folioq->next) { 185 folioq = folioq->next; 186 slot = 0; 187 } 188 } 189 if (remain) 190 break; 191 } while (len); 192 193 iter->folioq_slot = slot; 194 iter->folioq = folioq; 195 iter->iov_offset = skip; 196 iter->count -= progress; 197 return progress; 198 } 199 200 /* 201 * Handle ITER_XARRAY. 202 */ 203 static __always_inline 204 size_t iterate_xarray(struct iov_iter *iter, size_t len, void *priv, void *priv2, 205 iov_step_f step) 206 { 207 struct folio *folio; 208 size_t progress = 0; 209 loff_t start = iter->xarray_start + iter->iov_offset; 210 pgoff_t index = start / PAGE_SIZE; 211 XA_STATE(xas, iter->xarray, index); 212 213 rcu_read_lock(); 214 xas_for_each(&xas, folio, ULONG_MAX) { 215 size_t remain, consumed, offset, part, flen; 216 217 if (xas_retry(&xas, folio)) 218 continue; 219 if (WARN_ON(xa_is_value(folio))) 220 break; 221 if (WARN_ON(folio_test_hugetlb(folio))) 222 break; 223 224 offset = offset_in_folio(folio, start + progress); 225 flen = min(folio_size(folio) - offset, len); 226 227 while (flen) { 228 void *base = kmap_local_folio(folio, offset); 229 230 part = min_t(size_t, flen, 231 PAGE_SIZE - offset_in_page(offset)); 232 remain = step(base, progress, part, priv, priv2); 233 kunmap_local(base); 234 235 consumed = part - remain; 236 progress += consumed; 237 len -= consumed; 238 239 if (remain || len == 0) 240 goto out; 241 flen -= consumed; 242 offset += consumed; 243 } 244 } 245 246 out: 247 rcu_read_unlock(); 248 iter->iov_offset += progress; 249 iter->count -= progress; 250 return progress; 251 } 252 253 /* 254 * Handle ITER_DISCARD. 255 */ 256 static __always_inline 257 size_t iterate_discard(struct iov_iter *iter, size_t len, void *priv, void *priv2, 258 iov_step_f step) 259 { 260 size_t progress = len; 261 262 iter->count -= progress; 263 return progress; 264 } 265 266 /** 267 * iterate_and_advance2 - Iterate over an iterator 268 * @iter: The iterator to iterate over. 269 * @len: The amount to iterate over. 270 * @priv: Data for the step functions. 271 * @priv2: More data for the step functions. 272 * @ustep: Function for UBUF/IOVEC iterators; given __user addresses. 273 * @step: Function for other iterators; given kernel addresses. 274 * 275 * Iterate over the next part of an iterator, up to the specified length. The 276 * buffer is presented in segments, which for kernel iteration are broken up by 277 * physical pages and mapped, with the mapped address being presented. 278 * 279 * Two step functions, @step and @ustep, must be provided, one for handling 280 * mapped kernel addresses and the other is given user addresses which have the 281 * potential to fault since no pinning is performed. 282 * 283 * The step functions are passed the address and length of the segment, @priv, 284 * @priv2 and the amount of data so far iterated over (which can, for example, 285 * be added to @priv to point to the right part of a second buffer). The step 286 * functions should return the amount of the segment they didn't process (ie. 0 287 * indicates complete processsing). 288 * 289 * This function returns the amount of data processed (ie. 0 means nothing was 290 * processed and the value of @len means processes to completion). 291 */ 292 static __always_inline 293 size_t iterate_and_advance2(struct iov_iter *iter, size_t len, void *priv, 294 void *priv2, iov_ustep_f ustep, iov_step_f step) 295 { 296 if (unlikely(iter->count < len)) 297 len = iter->count; 298 if (unlikely(!len)) 299 return 0; 300 301 if (likely(iter_is_ubuf(iter))) 302 return iterate_ubuf(iter, len, priv, priv2, ustep); 303 if (likely(iter_is_iovec(iter))) 304 return iterate_iovec(iter, len, priv, priv2, ustep); 305 if (iov_iter_is_bvec(iter)) 306 return iterate_bvec(iter, len, priv, priv2, step); 307 if (iov_iter_is_kvec(iter)) 308 return iterate_kvec(iter, len, priv, priv2, step); 309 if (iov_iter_is_folioq(iter)) 310 return iterate_folioq(iter, len, priv, priv2, step); 311 if (iov_iter_is_xarray(iter)) 312 return iterate_xarray(iter, len, priv, priv2, step); 313 return iterate_discard(iter, len, priv, priv2, step); 314 } 315 316 /** 317 * iterate_and_advance - Iterate over an iterator 318 * @iter: The iterator to iterate over. 319 * @len: The amount to iterate over. 320 * @priv: Data for the step functions. 321 * @ustep: Function for UBUF/IOVEC iterators; given __user addresses. 322 * @step: Function for other iterators; given kernel addresses. 323 * 324 * As iterate_and_advance2(), but priv2 is always NULL. 325 */ 326 static __always_inline 327 size_t iterate_and_advance(struct iov_iter *iter, size_t len, void *priv, 328 iov_ustep_f ustep, iov_step_f step) 329 { 330 return iterate_and_advance2(iter, len, priv, NULL, ustep, step); 331 } 332 333 /** 334 * iterate_and_advance_kernel - Iterate over a kernel-internal iterator 335 * @iter: The iterator to iterate over. 336 * @len: The amount to iterate over. 337 * @priv: Data for the step functions. 338 * @priv2: More data for the step functions. 339 * @step: Function for other iterators; given kernel addresses. 340 * 341 * Iterate over the next part of an iterator, up to the specified length. The 342 * buffer is presented in segments, which for kernel iteration are broken up by 343 * physical pages and mapped, with the mapped address being presented. 344 * 345 * [!] Note This will only handle BVEC, KVEC, FOLIOQ, XARRAY and DISCARD-type 346 * iterators; it will not handle UBUF or IOVEC-type iterators. 347 * 348 * A step functions, @step, must be provided, one for handling mapped kernel 349 * addresses and the other is given user addresses which have the potential to 350 * fault since no pinning is performed. 351 * 352 * The step functions are passed the address and length of the segment, @priv, 353 * @priv2 and the amount of data so far iterated over (which can, for example, 354 * be added to @priv to point to the right part of a second buffer). The step 355 * functions should return the amount of the segment they didn't process (ie. 0 356 * indicates complete processsing). 357 * 358 * This function returns the amount of data processed (ie. 0 means nothing was 359 * processed and the value of @len means processes to completion). 360 */ 361 static __always_inline 362 size_t iterate_and_advance_kernel(struct iov_iter *iter, size_t len, void *priv, 363 void *priv2, iov_step_f step) 364 { 365 if (unlikely(iter->count < len)) 366 len = iter->count; 367 if (unlikely(!len)) 368 return 0; 369 if (iov_iter_is_bvec(iter)) 370 return iterate_bvec(iter, len, priv, priv2, step); 371 if (iov_iter_is_kvec(iter)) 372 return iterate_kvec(iter, len, priv, priv2, step); 373 if (iov_iter_is_folioq(iter)) 374 return iterate_folioq(iter, len, priv, priv2, step); 375 if (iov_iter_is_xarray(iter)) 376 return iterate_xarray(iter, len, priv, priv2, step); 377 return iterate_discard(iter, len, priv, priv2, step); 378 } 379 380 #endif /* _LINUX_IOV_ITER_H */ 381