1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Network filesystem read subrequest result collection, assessment and 3 * retrying. 4 * 5 * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. 6 * Written by David Howells (dhowells@redhat.com) 7 */ 8 9 #include <linux/export.h> 10 #include <linux/fs.h> 11 #include <linux/mm.h> 12 #include <linux/pagemap.h> 13 #include <linux/slab.h> 14 #include <linux/task_io_accounting_ops.h> 15 #include "internal.h" 16 17 /* Notes made in the collector */ 18 #define HIT_PENDING 0x01 /* A front op was still pending */ 19 #define MADE_PROGRESS 0x04 /* Made progress cleaning up a stream or the folio set */ 20 #define BUFFERED 0x08 /* The pagecache needs cleaning up */ 21 #define NEED_RETRY 0x10 /* A front op requests retrying */ 22 #define COPY_TO_CACHE 0x40 /* Need to copy subrequest to cache */ 23 #define ABANDON_SREQ 0x80 /* Need to abandon untransferred part of subrequest */ 24 25 /* 26 * Clear the unread part of an I/O request. 27 */ 28 static void netfs_clear_unread(struct netfs_io_subrequest *subreq) 29 { 30 netfs_reset_iter(subreq); 31 WARN_ON_ONCE(subreq->len - subreq->transferred != iov_iter_count(&subreq->io_iter)); 32 iov_iter_zero(iov_iter_count(&subreq->io_iter), &subreq->io_iter); 33 if (subreq->start + subreq->transferred >= subreq->rreq->i_size) 34 __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags); 35 } 36 37 /* 38 * Flush, mark and unlock a folio that's now completely read. If we want to 39 * cache the folio, we set the group to NETFS_FOLIO_COPY_TO_CACHE, mark it 40 * dirty and let writeback handle it. 41 */ 42 static void netfs_unlock_read_folio(struct netfs_io_request *rreq, 43 struct folio_queue *folioq, 44 int slot) 45 { 46 struct netfs_folio *finfo; 47 struct folio *folio = folioq_folio(folioq, slot); 48 49 if (unlikely(folio_pos(folio) < rreq->abandon_to)) { 50 trace_netfs_folio(folio, netfs_folio_trace_abandon); 51 goto just_unlock; 52 } 53 54 flush_dcache_folio(folio); 55 folio_mark_uptodate(folio); 56 57 if (!test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) { 58 finfo = netfs_folio_info(folio); 59 if (finfo) { 60 trace_netfs_folio(folio, netfs_folio_trace_filled_gaps); 61 if (finfo->netfs_group) 62 folio_change_private(folio, finfo->netfs_group); 63 else 64 folio_detach_private(folio); 65 kfree(finfo); 66 } 67 68 if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags)) { 69 if (!WARN_ON_ONCE(folio_get_private(folio) != NULL)) { 70 trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); 71 folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE); 72 folio_mark_dirty(folio); 73 } 74 } else { 75 trace_netfs_folio(folio, netfs_folio_trace_read_done); 76 } 77 78 folioq_clear(folioq, slot); 79 } else { 80 // TODO: Use of PG_private_2 is deprecated. 81 if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags)) 82 netfs_pgpriv2_copy_to_cache(rreq, folio); 83 } 84 85 just_unlock: 86 if (folio->index == rreq->no_unlock_folio && 87 test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) { 88 _debug("no unlock"); 89 } else { 90 trace_netfs_folio(folio, netfs_folio_trace_read_unlock); 91 folio_unlock(folio); 92 } 93 94 folioq_clear(folioq, slot); 95 } 96 97 /* 98 * Unlock any folios we've finished with. 99 */ 100 static void netfs_read_unlock_folios(struct netfs_io_request *rreq, 101 unsigned int *notes) 102 { 103 struct folio_queue *folioq = rreq->buffer.tail; 104 unsigned long long collected_to = rreq->collected_to; 105 unsigned int slot = rreq->buffer.first_tail_slot; 106 107 if (rreq->cleaned_to >= rreq->collected_to) 108 return; 109 110 // TODO: Begin decryption 111 112 if (slot >= folioq_nr_slots(folioq)) { 113 folioq = rolling_buffer_delete_spent(&rreq->buffer); 114 if (!folioq) { 115 rreq->front_folio_order = 0; 116 return; 117 } 118 slot = 0; 119 } 120 121 for (;;) { 122 struct folio *folio; 123 unsigned long long fpos, fend; 124 unsigned int order; 125 size_t fsize; 126 127 if (*notes & COPY_TO_CACHE) 128 set_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags); 129 130 folio = folioq_folio(folioq, slot); 131 if (WARN_ONCE(!folio_test_locked(folio), 132 "R=%08x: folio %lx is not locked\n", 133 rreq->debug_id, folio->index)) 134 trace_netfs_folio(folio, netfs_folio_trace_not_locked); 135 136 order = folioq_folio_order(folioq, slot); 137 rreq->front_folio_order = order; 138 fsize = PAGE_SIZE << order; 139 fpos = folio_pos(folio); 140 fend = umin(fpos + fsize, rreq->i_size); 141 142 trace_netfs_collect_folio(rreq, folio, fend, collected_to); 143 144 /* Unlock any folio we've transferred all of. */ 145 if (collected_to < fend) 146 break; 147 148 netfs_unlock_read_folio(rreq, folioq, slot); 149 WRITE_ONCE(rreq->cleaned_to, fpos + fsize); 150 *notes |= MADE_PROGRESS; 151 152 clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags); 153 154 /* Clean up the head folioq. If we clear an entire folioq, then 155 * we can get rid of it provided it's not also the tail folioq 156 * being filled by the issuer. 157 */ 158 folioq_clear(folioq, slot); 159 slot++; 160 if (slot >= folioq_nr_slots(folioq)) { 161 folioq = rolling_buffer_delete_spent(&rreq->buffer); 162 if (!folioq) 163 goto done; 164 slot = 0; 165 trace_netfs_folioq(folioq, netfs_trace_folioq_read_progress); 166 } 167 168 if (fpos + fsize >= collected_to) 169 break; 170 } 171 172 rreq->buffer.tail = folioq; 173 done: 174 rreq->buffer.first_tail_slot = slot; 175 } 176 177 /* 178 * Collect and assess the results of various read subrequests. We may need to 179 * retry some of the results. 180 * 181 * Note that we have a sequence of subrequests, which may be drawing on 182 * different sources and may or may not be the same size or starting position 183 * and may not even correspond in boundary alignment. 184 */ 185 static void netfs_collect_read_results(struct netfs_io_request *rreq) 186 { 187 struct netfs_io_subrequest *front, *remove; 188 struct netfs_io_stream *stream = &rreq->io_streams[0]; 189 unsigned int notes; 190 191 _enter("%llx-%llx", rreq->start, rreq->start + rreq->len); 192 trace_netfs_rreq(rreq, netfs_rreq_trace_collect); 193 trace_netfs_collect(rreq); 194 195 reassess: 196 if (rreq->origin == NETFS_READAHEAD || 197 rreq->origin == NETFS_READPAGE || 198 rreq->origin == NETFS_READ_FOR_WRITE) 199 notes = BUFFERED; 200 else 201 notes = 0; 202 203 /* Remove completed subrequests from the front of the stream and 204 * advance the completion point. We stop when we hit something that's 205 * in progress. The issuer thread may be adding stuff to the tail 206 * whilst we're doing this. 207 */ 208 front = READ_ONCE(stream->front); 209 while (front) { 210 size_t transferred; 211 212 trace_netfs_collect_sreq(rreq, front); 213 _debug("sreq [%x] %llx %zx/%zx", 214 front->debug_index, front->start, front->transferred, front->len); 215 216 if (stream->collected_to < front->start) { 217 trace_netfs_collect_gap(rreq, stream, front->start, 'F'); 218 stream->collected_to = front->start; 219 } 220 221 if (netfs_check_subreq_in_progress(front)) 222 notes |= HIT_PENDING; 223 smp_rmb(); /* Read counters after IN_PROGRESS flag. */ 224 transferred = READ_ONCE(front->transferred); 225 226 /* If we can now collect the next folio, do so. We don't want 227 * to defer this as we have to decide whether we need to copy 228 * to the cache or not, and that may differ between adjacent 229 * subreqs. 230 */ 231 if (notes & BUFFERED) { 232 size_t fsize = PAGE_SIZE << rreq->front_folio_order; 233 234 /* Clear the tail of a short read. */ 235 if (!(notes & HIT_PENDING) && 236 front->error == 0 && 237 transferred < front->len && 238 (test_bit(NETFS_SREQ_HIT_EOF, &front->flags) || 239 test_bit(NETFS_SREQ_CLEAR_TAIL, &front->flags))) { 240 netfs_clear_unread(front); 241 transferred = front->transferred = front->len; 242 trace_netfs_sreq(front, netfs_sreq_trace_clear); 243 } 244 245 stream->collected_to = front->start + transferred; 246 rreq->collected_to = stream->collected_to; 247 248 if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &front->flags)) 249 notes |= COPY_TO_CACHE; 250 251 if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { 252 rreq->abandon_to = front->start + front->len; 253 front->transferred = front->len; 254 transferred = front->len; 255 trace_netfs_rreq(rreq, netfs_rreq_trace_set_abandon); 256 } 257 if (front->start + transferred >= rreq->cleaned_to + fsize || 258 test_bit(NETFS_SREQ_HIT_EOF, &front->flags)) 259 netfs_read_unlock_folios(rreq, ¬es); 260 } else { 261 stream->collected_to = front->start + transferred; 262 rreq->collected_to = stream->collected_to; 263 } 264 265 /* Stall if the front is still undergoing I/O. */ 266 if (notes & HIT_PENDING) 267 break; 268 269 if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { 270 if (!stream->failed) { 271 stream->error = front->error; 272 rreq->error = front->error; 273 set_bit(NETFS_RREQ_FAILED, &rreq->flags); 274 stream->failed = true; 275 } 276 notes |= MADE_PROGRESS | ABANDON_SREQ; 277 } else if (test_bit(NETFS_SREQ_NEED_RETRY, &front->flags)) { 278 stream->need_retry = true; 279 notes |= NEED_RETRY | MADE_PROGRESS; 280 break; 281 } else if (test_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags)) { 282 notes |= MADE_PROGRESS; 283 } else { 284 if (!stream->failed) { 285 stream->transferred += transferred; 286 stream->transferred_valid = true; 287 } 288 if (front->transferred < front->len) 289 set_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags); 290 notes |= MADE_PROGRESS; 291 } 292 293 /* Remove if completely consumed. */ 294 stream->source = front->source; 295 spin_lock(&rreq->lock); 296 297 remove = front; 298 trace_netfs_sreq(front, 299 notes & ABANDON_SREQ ? 300 netfs_sreq_trace_abandoned : netfs_sreq_trace_consumed); 301 list_del_init(&front->rreq_link); 302 front = list_first_entry_or_null(&stream->subrequests, 303 struct netfs_io_subrequest, rreq_link); 304 stream->front = front; 305 spin_unlock(&rreq->lock); 306 netfs_put_subrequest(remove, 307 notes & ABANDON_SREQ ? 308 netfs_sreq_trace_put_abandon : 309 netfs_sreq_trace_put_done); 310 } 311 312 trace_netfs_collect_stream(rreq, stream); 313 trace_netfs_collect_state(rreq, rreq->collected_to, notes); 314 315 if (!(notes & BUFFERED)) 316 rreq->cleaned_to = rreq->collected_to; 317 318 if (notes & NEED_RETRY) 319 goto need_retry; 320 if (notes & MADE_PROGRESS) { 321 netfs_wake_rreq_flag(rreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause); 322 //cond_resched(); 323 goto reassess; 324 } 325 326 out: 327 _leave(" = %x", notes); 328 return; 329 330 need_retry: 331 /* Okay... We're going to have to retry parts of the stream. Note 332 * that any partially completed op will have had any wholly transferred 333 * folios removed from it. 334 */ 335 _debug("retry"); 336 netfs_retry_reads(rreq); 337 goto out; 338 } 339 340 /* 341 * Do page flushing and suchlike after DIO. 342 */ 343 static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) 344 { 345 unsigned int i; 346 347 if (rreq->origin == NETFS_UNBUFFERED_READ || 348 rreq->origin == NETFS_DIO_READ) { 349 for (i = 0; i < rreq->direct_bv_count; i++) { 350 flush_dcache_page(rreq->direct_bv[i].bv_page); 351 // TODO: cifs marks pages in the destination buffer 352 // dirty under some circumstances after a read. Do we 353 // need to do that too? 354 set_page_dirty(rreq->direct_bv[i].bv_page); 355 } 356 } 357 358 if (rreq->iocb) { 359 rreq->iocb->ki_pos += rreq->transferred; 360 if (rreq->iocb->ki_complete) { 361 trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete); 362 rreq->iocb->ki_complete( 363 rreq->iocb, rreq->error ? rreq->error : rreq->transferred); 364 } 365 } 366 if (rreq->netfs_ops->done) 367 rreq->netfs_ops->done(rreq); 368 if (rreq->origin == NETFS_UNBUFFERED_READ || 369 rreq->origin == NETFS_DIO_READ) 370 inode_dio_end(rreq->inode); 371 } 372 373 /* 374 * Do processing after reading a monolithic single object. 375 */ 376 static void netfs_rreq_assess_single(struct netfs_io_request *rreq) 377 { 378 struct netfs_io_stream *stream = &rreq->io_streams[0]; 379 380 if (!rreq->error && stream->source == NETFS_DOWNLOAD_FROM_SERVER && 381 fscache_resources_valid(&rreq->cache_resources)) { 382 trace_netfs_rreq(rreq, netfs_rreq_trace_dirty); 383 netfs_single_mark_inode_dirty(rreq->inode); 384 } 385 386 if (rreq->iocb) { 387 rreq->iocb->ki_pos += rreq->transferred; 388 if (rreq->iocb->ki_complete) { 389 trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete); 390 rreq->iocb->ki_complete( 391 rreq->iocb, rreq->error ? rreq->error : rreq->transferred); 392 } 393 } 394 if (rreq->netfs_ops->done) 395 rreq->netfs_ops->done(rreq); 396 } 397 398 /* 399 * Perform the collection of subrequests and folios. 400 * 401 * Note that we're in normal kernel thread context at this point, possibly 402 * running on a workqueue. 403 */ 404 bool netfs_read_collection(struct netfs_io_request *rreq) 405 { 406 struct netfs_io_stream *stream = &rreq->io_streams[0]; 407 408 netfs_collect_read_results(rreq); 409 410 /* We're done when the app thread has finished posting subreqs and the 411 * queue is empty. 412 */ 413 if (!test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags)) 414 return false; 415 smp_rmb(); /* Read ALL_QUEUED before subreq lists. */ 416 417 if (!list_empty(&stream->subrequests)) 418 return false; 419 420 /* Okay, declare that all I/O is complete. */ 421 rreq->transferred = stream->transferred; 422 trace_netfs_rreq(rreq, netfs_rreq_trace_complete); 423 424 //netfs_rreq_is_still_valid(rreq); 425 426 switch (rreq->origin) { 427 case NETFS_UNBUFFERED_READ: 428 case NETFS_DIO_READ: 429 case NETFS_READ_GAPS: 430 netfs_rreq_assess_dio(rreq); 431 break; 432 case NETFS_READ_SINGLE: 433 netfs_rreq_assess_single(rreq); 434 break; 435 default: 436 break; 437 } 438 task_io_account_read(rreq->transferred); 439 440 netfs_wake_rreq_flag(rreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip); 441 /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */ 442 443 trace_netfs_rreq(rreq, netfs_rreq_trace_done); 444 netfs_clear_subrequests(rreq); 445 netfs_unlock_abandoned_read_pages(rreq); 446 if (unlikely(rreq->copy_to_cache)) 447 netfs_pgpriv2_end_copy_to_cache(rreq); 448 return true; 449 } 450 451 void netfs_read_collection_worker(struct work_struct *work) 452 { 453 struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); 454 455 netfs_see_request(rreq, netfs_rreq_trace_see_work); 456 if (netfs_check_rreq_in_progress(rreq)) { 457 if (netfs_read_collection(rreq)) 458 /* Drop the ref from the IN_PROGRESS flag. */ 459 netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); 460 else 461 netfs_see_request(rreq, netfs_rreq_trace_see_work_complete); 462 } 463 } 464 465 /** 466 * netfs_read_subreq_progress - Note progress of a read operation. 467 * @subreq: The read request that has terminated. 468 * 469 * This tells the read side of netfs lib that a contributory I/O operation has 470 * made some progress and that it may be possible to unlock some folios. 471 * 472 * Before calling, the filesystem should update subreq->transferred to track 473 * the amount of data copied into the output buffer. 474 */ 475 void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq) 476 { 477 struct netfs_io_request *rreq = subreq->rreq; 478 struct netfs_io_stream *stream = &rreq->io_streams[0]; 479 size_t fsize = PAGE_SIZE << rreq->front_folio_order; 480 481 trace_netfs_sreq(subreq, netfs_sreq_trace_progress); 482 483 /* If we are at the head of the queue, wake up the collector, 484 * getting a ref to it if we were the ones to do so. 485 */ 486 if (subreq->start + subreq->transferred > rreq->cleaned_to + fsize && 487 (rreq->origin == NETFS_READAHEAD || 488 rreq->origin == NETFS_READPAGE || 489 rreq->origin == NETFS_READ_FOR_WRITE) && 490 list_is_first(&subreq->rreq_link, &stream->subrequests) 491 ) { 492 __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); 493 netfs_wake_collector(rreq); 494 } 495 } 496 EXPORT_SYMBOL(netfs_read_subreq_progress); 497 498 /** 499 * netfs_read_subreq_terminated - Note the termination of an I/O operation. 500 * @subreq: The I/O request that has terminated. 501 * 502 * This tells the read helper that a contributory I/O operation has terminated, 503 * one way or another, and that it should integrate the results. 504 * 505 * The caller indicates the outcome of the operation through @subreq->error, 506 * supplying 0 to indicate a successful or retryable transfer (if 507 * NETFS_SREQ_NEED_RETRY is set) or a negative error code. The helper will 508 * look after reissuing I/O operations as appropriate and writing downloaded 509 * data to the cache. 510 * 511 * Before calling, the filesystem should update subreq->transferred to track 512 * the amount of data copied into the output buffer. 513 */ 514 void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq) 515 { 516 struct netfs_io_request *rreq = subreq->rreq; 517 518 switch (subreq->source) { 519 case NETFS_READ_FROM_CACHE: 520 netfs_stat(&netfs_n_rh_read_done); 521 break; 522 case NETFS_DOWNLOAD_FROM_SERVER: 523 netfs_stat(&netfs_n_rh_download_done); 524 break; 525 default: 526 break; 527 } 528 529 /* Deal with retry requests, short reads and errors. If we retry 530 * but don't make progress, we abandon the attempt. 531 */ 532 if (!subreq->error && subreq->transferred < subreq->len) { 533 if (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags)) { 534 trace_netfs_sreq(subreq, netfs_sreq_trace_hit_eof); 535 } else if (test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags)) { 536 trace_netfs_sreq(subreq, netfs_sreq_trace_need_clear); 537 } else if (test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { 538 trace_netfs_sreq(subreq, netfs_sreq_trace_need_retry); 539 } else if (test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) { 540 __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 541 trace_netfs_sreq(subreq, netfs_sreq_trace_partial_read); 542 } else { 543 __set_bit(NETFS_SREQ_FAILED, &subreq->flags); 544 subreq->error = -ENODATA; 545 trace_netfs_sreq(subreq, netfs_sreq_trace_short); 546 } 547 } 548 549 if (unlikely(subreq->error < 0)) { 550 trace_netfs_failure(rreq, subreq, subreq->error, netfs_fail_read); 551 if (subreq->source == NETFS_READ_FROM_CACHE) { 552 netfs_stat(&netfs_n_rh_read_failed); 553 __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 554 } else { 555 netfs_stat(&netfs_n_rh_download_failed); 556 __set_bit(NETFS_SREQ_FAILED, &subreq->flags); 557 } 558 trace_netfs_rreq(rreq, netfs_rreq_trace_set_pause); 559 set_bit(NETFS_RREQ_PAUSE, &rreq->flags); 560 } 561 562 trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); 563 netfs_subreq_clear_in_progress(subreq); 564 netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated); 565 } 566 EXPORT_SYMBOL(netfs_read_subreq_terminated); 567 568 /* 569 * Handle termination of a read from the cache. 570 */ 571 void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error) 572 { 573 struct netfs_io_subrequest *subreq = priv; 574 575 if (transferred_or_error > 0) { 576 subreq->error = 0; 577 if (transferred_or_error > 0) { 578 subreq->transferred += transferred_or_error; 579 __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); 580 } 581 } else { 582 subreq->error = transferred_or_error; 583 } 584 netfs_read_subreq_terminated(subreq); 585 } 586