1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Network filesystem read subrequest result collection, assessment and 3 * retrying. 4 * 5 * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. 6 * Written by David Howells (dhowells@redhat.com) 7 */ 8 9 #include <linux/export.h> 10 #include <linux/fs.h> 11 #include <linux/mm.h> 12 #include <linux/pagemap.h> 13 #include <linux/slab.h> 14 #include <linux/task_io_accounting_ops.h> 15 #include "internal.h" 16 17 /* Notes made in the collector */ 18 #define HIT_PENDING 0x01 /* A front op was still pending */ 19 #define MADE_PROGRESS 0x04 /* Made progress cleaning up a stream or the folio set */ 20 #define BUFFERED 0x08 /* The pagecache needs cleaning up */ 21 #define NEED_RETRY 0x10 /* A front op requests retrying */ 22 #define COPY_TO_CACHE 0x40 /* Need to copy subrequest to cache */ 23 #define ABANDON_SREQ 0x80 /* Need to abandon untransferred part of subrequest */ 24 25 /* 26 * Clear the unread part of an I/O request. 27 */ 28 static void netfs_clear_unread(struct netfs_io_subrequest *subreq) 29 { 30 netfs_reset_iter(subreq); 31 WARN_ON_ONCE(subreq->len - subreq->transferred != iov_iter_count(&subreq->io_iter)); 32 iov_iter_zero(iov_iter_count(&subreq->io_iter), &subreq->io_iter); 33 if (subreq->start + subreq->transferred >= subreq->rreq->i_size) 34 __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags); 35 } 36 37 /* 38 * Flush, mark and unlock a folio that's now completely read. If we want to 39 * cache the folio, we set the group to NETFS_FOLIO_COPY_TO_CACHE, mark it 40 * dirty and let writeback handle it. 41 */ 42 static void netfs_unlock_read_folio(struct netfs_io_request *rreq, 43 struct folio_queue *folioq, 44 int slot) 45 { 46 struct netfs_folio *finfo; 47 struct folio *folio = folioq_folio(folioq, slot); 48 49 if (unlikely(folio_pos(folio) < rreq->abandon_to)) { 50 trace_netfs_folio(folio, netfs_folio_trace_abandon); 51 goto just_unlock; 52 } 53 54 flush_dcache_folio(folio); 55 folio_mark_uptodate(folio); 56 57 if (!test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) { 58 finfo = netfs_folio_info(folio); 59 if (finfo) { 60 trace_netfs_folio(folio, netfs_folio_trace_filled_gaps); 61 if (finfo->netfs_group) 62 folio_change_private(folio, finfo->netfs_group); 63 else 64 folio_detach_private(folio); 65 kfree(finfo); 66 } 67 68 if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags)) { 69 if (!WARN_ON_ONCE(folio_get_private(folio) != NULL)) { 70 trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); 71 folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE); 72 folio_mark_dirty(folio); 73 } 74 } else { 75 trace_netfs_folio(folio, netfs_folio_trace_read_done); 76 } 77 78 folioq_clear(folioq, slot); 79 } else { 80 // TODO: Use of PG_private_2 is deprecated. 81 if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags)) 82 netfs_pgpriv2_copy_to_cache(rreq, folio); 83 } 84 85 just_unlock: 86 if (folio == rreq->no_unlock_folio && 87 test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) { 88 _debug("no unlock"); 89 } else { 90 trace_netfs_folio(folio, netfs_folio_trace_read_unlock); 91 folio_unlock(folio); 92 } 93 94 folioq_clear(folioq, slot); 95 } 96 97 /* 98 * Unlock any folios we've finished with. 99 */ 100 static void netfs_read_unlock_folios(struct netfs_io_request *rreq, 101 unsigned int *notes) 102 { 103 struct folio_queue *folioq = rreq->buffer.tail; 104 unsigned long long collected_to = rreq->collected_to; 105 unsigned int slot = rreq->buffer.first_tail_slot; 106 107 if (rreq->cleaned_to >= rreq->collected_to) 108 return; 109 110 // TODO: Begin decryption 111 112 if (slot >= folioq_nr_slots(folioq)) { 113 folioq = rolling_buffer_delete_spent(&rreq->buffer); 114 if (!folioq) { 115 rreq->front_folio_order = 0; 116 return; 117 } 118 slot = 0; 119 } 120 121 for (;;) { 122 struct folio *folio; 123 unsigned long long fpos, fend; 124 unsigned int order; 125 size_t fsize; 126 127 if (*notes & COPY_TO_CACHE) 128 set_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags); 129 130 folio = folioq_folio(folioq, slot); 131 if (WARN_ONCE(!folio_test_locked(folio), 132 "R=%08x: folio %lx is not locked\n", 133 rreq->debug_id, folio->index)) 134 trace_netfs_folio(folio, netfs_folio_trace_not_locked); 135 136 order = folioq_folio_order(folioq, slot); 137 rreq->front_folio_order = order; 138 fsize = PAGE_SIZE << order; 139 fpos = folio_pos(folio); 140 fend = fpos + fsize; 141 142 trace_netfs_collect_folio(rreq, folio, fend, collected_to); 143 144 /* Unlock any folio we've transferred all of. */ 145 if (collected_to < fend) 146 break; 147 148 netfs_unlock_read_folio(rreq, folioq, slot); 149 WRITE_ONCE(rreq->cleaned_to, fpos + fsize); 150 *notes |= MADE_PROGRESS; 151 152 clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags); 153 154 /* Clean up the head folioq. If we clear an entire folioq, then 155 * we can get rid of it provided it's not also the tail folioq 156 * being filled by the issuer. 157 */ 158 folioq_clear(folioq, slot); 159 slot++; 160 if (slot >= folioq_nr_slots(folioq)) { 161 folioq = rolling_buffer_delete_spent(&rreq->buffer); 162 if (!folioq) 163 goto done; 164 slot = 0; 165 trace_netfs_folioq(folioq, netfs_trace_folioq_read_progress); 166 } 167 168 if (fpos + fsize >= collected_to) 169 break; 170 } 171 172 rreq->buffer.tail = folioq; 173 done: 174 rreq->buffer.first_tail_slot = slot; 175 } 176 177 /* 178 * Collect and assess the results of various read subrequests. We may need to 179 * retry some of the results. 180 * 181 * Note that we have a sequence of subrequests, which may be drawing on 182 * different sources and may or may not be the same size or starting position 183 * and may not even correspond in boundary alignment. 184 */ 185 static void netfs_collect_read_results(struct netfs_io_request *rreq) 186 { 187 struct netfs_io_subrequest *front, *remove; 188 struct netfs_io_stream *stream = &rreq->io_streams[0]; 189 unsigned int notes; 190 191 _enter("%llx-%llx", rreq->start, rreq->start + rreq->len); 192 trace_netfs_rreq(rreq, netfs_rreq_trace_collect); 193 trace_netfs_collect(rreq); 194 195 reassess: 196 if (rreq->origin == NETFS_READAHEAD || 197 rreq->origin == NETFS_READPAGE || 198 rreq->origin == NETFS_READ_FOR_WRITE) 199 notes = BUFFERED; 200 else 201 notes = 0; 202 203 /* Remove completed subrequests from the front of the stream and 204 * advance the completion point. We stop when we hit something that's 205 * in progress. The issuer thread may be adding stuff to the tail 206 * whilst we're doing this. 207 */ 208 front = list_first_entry_or_null_acquire(&stream->subrequests, 209 struct netfs_io_subrequest, rreq_link); 210 /* Read first subreq pointer before IN_PROGRESS flag. */ 211 212 while (front) { 213 size_t transferred; 214 215 trace_netfs_collect_sreq(rreq, front); 216 _debug("sreq [%x] %llx %zx/%zx", 217 front->debug_index, front->start, front->transferred, front->len); 218 219 if (stream->collected_to < front->start) { 220 trace_netfs_collect_gap(rreq, stream, front->start, 'F'); 221 stream->collected_to = front->start; 222 } 223 224 if (netfs_check_subreq_in_progress(front)) 225 notes |= HIT_PENDING; 226 smp_rmb(); /* Read counters after IN_PROGRESS flag. */ 227 transferred = READ_ONCE(front->transferred); 228 229 /* If we can now collect the next folio, do so. We don't want 230 * to defer this as we have to decide whether we need to copy 231 * to the cache or not, and that may differ between adjacent 232 * subreqs. 233 */ 234 if (notes & BUFFERED) { 235 size_t fsize = PAGE_SIZE << rreq->front_folio_order; 236 237 /* Clear the tail of a short read. */ 238 if (!(notes & HIT_PENDING) && 239 front->error == 0 && 240 transferred < front->len && 241 (test_bit(NETFS_SREQ_HIT_EOF, &front->flags) || 242 test_bit(NETFS_SREQ_CLEAR_TAIL, &front->flags))) { 243 netfs_clear_unread(front); 244 transferred = front->transferred = front->len; 245 trace_netfs_sreq(front, netfs_sreq_trace_clear); 246 } 247 248 stream->collected_to = front->start + transferred; 249 rreq->collected_to = stream->collected_to; 250 251 if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &front->flags)) 252 notes |= COPY_TO_CACHE; 253 254 if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { 255 rreq->abandon_to = front->start + front->len; 256 front->transferred = front->len; 257 transferred = front->len; 258 trace_netfs_rreq(rreq, netfs_rreq_trace_set_abandon); 259 } 260 if (front->start + transferred >= rreq->cleaned_to + fsize || 261 test_bit(NETFS_SREQ_HIT_EOF, &front->flags)) 262 netfs_read_unlock_folios(rreq, ¬es); 263 } else { 264 stream->collected_to = front->start + transferred; 265 rreq->collected_to = stream->collected_to; 266 } 267 268 /* Stall if the front is still undergoing I/O. */ 269 if (notes & HIT_PENDING) 270 break; 271 272 if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { 273 if (!stream->failed) { 274 stream->error = front->error; 275 rreq->error = front->error; 276 set_bit(NETFS_RREQ_FAILED, &rreq->flags); 277 stream->failed = true; 278 } 279 notes |= MADE_PROGRESS | ABANDON_SREQ; 280 } else if (test_bit(NETFS_SREQ_NEED_RETRY, &front->flags)) { 281 stream->need_retry = true; 282 notes |= NEED_RETRY | MADE_PROGRESS; 283 break; 284 } else if (test_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags)) { 285 notes |= MADE_PROGRESS; 286 } else { 287 if (!stream->failed) { 288 stream->transferred += transferred; 289 stream->transferred_valid = true; 290 } 291 if (front->transferred < front->len) 292 set_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags); 293 notes |= MADE_PROGRESS; 294 } 295 296 /* Remove if completely consumed. */ 297 stream->source = front->source; 298 spin_lock(&rreq->lock); 299 300 remove = front; 301 trace_netfs_sreq(front, 302 notes & ABANDON_SREQ ? 303 netfs_sreq_trace_abandoned : netfs_sreq_trace_consumed); 304 list_del_init(&front->rreq_link); 305 front = list_first_entry_or_null(&stream->subrequests, 306 struct netfs_io_subrequest, rreq_link); 307 spin_unlock(&rreq->lock); 308 netfs_put_subrequest(remove, 309 notes & ABANDON_SREQ ? 310 netfs_sreq_trace_put_abandon : 311 netfs_sreq_trace_put_done); 312 } 313 314 trace_netfs_collect_stream(rreq, stream); 315 trace_netfs_collect_state(rreq, rreq->collected_to, notes); 316 317 if (!(notes & BUFFERED)) 318 rreq->cleaned_to = rreq->collected_to; 319 320 if (notes & NEED_RETRY) 321 goto need_retry; 322 if (notes & MADE_PROGRESS) { 323 netfs_wake_rreq_flag(rreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause); 324 //cond_resched(); 325 goto reassess; 326 } 327 328 out: 329 _leave(" = %x", notes); 330 return; 331 332 need_retry: 333 /* Okay... We're going to have to retry parts of the stream. Note 334 * that any partially completed op will have had any wholly transferred 335 * folios removed from it. 336 */ 337 _debug("retry"); 338 netfs_retry_reads(rreq); 339 goto out; 340 } 341 342 /* 343 * Do page flushing and suchlike after DIO. 344 */ 345 static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) 346 { 347 unsigned int i; 348 349 if (rreq->origin == NETFS_UNBUFFERED_READ || 350 rreq->origin == NETFS_DIO_READ) { 351 for (i = 0; i < rreq->direct_bv_count; i++) { 352 flush_dcache_page(rreq->direct_bv[i].bv_page); 353 // TODO: cifs marks pages in the destination buffer 354 // dirty under some circumstances after a read. Do we 355 // need to do that too? 356 set_page_dirty(rreq->direct_bv[i].bv_page); 357 } 358 } 359 360 if (rreq->iocb) { 361 rreq->iocb->ki_pos += rreq->transferred; 362 if (rreq->iocb->ki_complete) { 363 trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete); 364 rreq->iocb->ki_complete( 365 rreq->iocb, rreq->error ? rreq->error : rreq->transferred); 366 } 367 } 368 if (rreq->netfs_ops->done) 369 rreq->netfs_ops->done(rreq); 370 if (rreq->origin == NETFS_UNBUFFERED_READ || 371 rreq->origin == NETFS_DIO_READ) 372 inode_dio_end(rreq->inode); 373 } 374 375 /* 376 * Do processing after reading a monolithic single object. 377 */ 378 static void netfs_rreq_assess_single(struct netfs_io_request *rreq) 379 { 380 struct netfs_io_stream *stream = &rreq->io_streams[0]; 381 382 if (!rreq->error && stream->source == NETFS_DOWNLOAD_FROM_SERVER && 383 fscache_resources_valid(&rreq->cache_resources)) { 384 trace_netfs_rreq(rreq, netfs_rreq_trace_dirty); 385 netfs_single_mark_inode_dirty(rreq->inode); 386 } 387 388 if (rreq->iocb) { 389 rreq->iocb->ki_pos += rreq->transferred; 390 if (rreq->iocb->ki_complete) { 391 trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete); 392 rreq->iocb->ki_complete( 393 rreq->iocb, rreq->error ? rreq->error : rreq->transferred); 394 } 395 } 396 if (rreq->netfs_ops->done) 397 rreq->netfs_ops->done(rreq); 398 } 399 400 /* 401 * Perform the collection of subrequests and folios. 402 * 403 * Note that we're in normal kernel thread context at this point, possibly 404 * running on a workqueue. 405 */ 406 bool netfs_read_collection(struct netfs_io_request *rreq) 407 { 408 struct netfs_io_stream *stream = &rreq->io_streams[0]; 409 410 netfs_collect_read_results(rreq); 411 412 /* We're done when the app thread has finished posting subreqs and the 413 * queue is empty. 414 */ 415 if (!test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags)) 416 return false; 417 smp_rmb(); /* Read ALL_QUEUED before subreq lists. */ 418 419 if (!list_empty(&stream->subrequests)) 420 return false; 421 422 /* Okay, declare that all I/O is complete. */ 423 rreq->transferred = stream->transferred; 424 trace_netfs_rreq(rreq, netfs_rreq_trace_complete); 425 426 //netfs_rreq_is_still_valid(rreq); 427 428 switch (rreq->origin) { 429 case NETFS_UNBUFFERED_READ: 430 case NETFS_DIO_READ: 431 case NETFS_READ_GAPS: 432 netfs_rreq_assess_dio(rreq); 433 break; 434 case NETFS_READ_SINGLE: 435 netfs_rreq_assess_single(rreq); 436 break; 437 default: 438 break; 439 } 440 task_io_account_read(rreq->transferred); 441 442 netfs_wake_rreq_flag(rreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip); 443 /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */ 444 445 trace_netfs_rreq(rreq, netfs_rreq_trace_done); 446 netfs_clear_subrequests(rreq); 447 netfs_unlock_abandoned_read_pages(rreq); 448 if (unlikely(rreq->copy_to_cache)) 449 netfs_pgpriv2_end_copy_to_cache(rreq); 450 return true; 451 } 452 453 void netfs_read_collection_worker(struct work_struct *work) 454 { 455 struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); 456 457 netfs_see_request(rreq, netfs_rreq_trace_see_work); 458 if (netfs_check_rreq_in_progress(rreq)) { 459 if (netfs_read_collection(rreq)) 460 /* Drop the ref from the IN_PROGRESS flag. */ 461 netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); 462 else 463 netfs_see_request(rreq, netfs_rreq_trace_see_work_complete); 464 } 465 } 466 467 /** 468 * netfs_read_subreq_progress - Note progress of a read operation. 469 * @subreq: The read request that has terminated. 470 * 471 * This tells the read side of netfs lib that a contributory I/O operation has 472 * made some progress and that it may be possible to unlock some folios. 473 * 474 * Before calling, the filesystem should update subreq->transferred to track 475 * the amount of data copied into the output buffer. 476 */ 477 void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq) 478 { 479 struct netfs_io_request *rreq = subreq->rreq; 480 struct netfs_io_stream *stream = &rreq->io_streams[0]; 481 size_t fsize = PAGE_SIZE << rreq->front_folio_order; 482 483 trace_netfs_sreq(subreq, netfs_sreq_trace_progress); 484 485 /* If we are at the head of the queue, wake up the collector, 486 * getting a ref to it if we were the ones to do so. 487 */ 488 if (subreq->start + subreq->transferred > rreq->cleaned_to + fsize && 489 (rreq->origin == NETFS_READAHEAD || 490 rreq->origin == NETFS_READPAGE || 491 rreq->origin == NETFS_READ_FOR_WRITE) && 492 list_is_first(&subreq->rreq_link, &stream->subrequests) 493 ) { 494 __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); 495 netfs_wake_collector(rreq); 496 } 497 } 498 EXPORT_SYMBOL(netfs_read_subreq_progress); 499 500 /** 501 * netfs_read_subreq_terminated - Note the termination of an I/O operation. 502 * @subreq: The I/O request that has terminated. 503 * 504 * This tells the read helper that a contributory I/O operation has terminated, 505 * one way or another, and that it should integrate the results. 506 * 507 * The caller indicates the outcome of the operation through @subreq->error, 508 * supplying 0 to indicate a successful or retryable transfer (if 509 * NETFS_SREQ_NEED_RETRY is set) or a negative error code. The helper will 510 * look after reissuing I/O operations as appropriate and writing downloaded 511 * data to the cache. 512 * 513 * Before calling, the filesystem should update subreq->transferred to track 514 * the amount of data copied into the output buffer. 515 */ 516 void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq) 517 { 518 struct netfs_io_request *rreq = subreq->rreq; 519 520 switch (subreq->source) { 521 case NETFS_READ_FROM_CACHE: 522 netfs_stat(&netfs_n_rh_read_done); 523 break; 524 case NETFS_DOWNLOAD_FROM_SERVER: 525 netfs_stat(&netfs_n_rh_download_done); 526 break; 527 default: 528 break; 529 } 530 531 /* Deal with retry requests, short reads and errors. If we retry 532 * but don't make progress, we abandon the attempt. 533 */ 534 if (!subreq->error && subreq->transferred < subreq->len) { 535 if (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags)) { 536 trace_netfs_sreq(subreq, netfs_sreq_trace_hit_eof); 537 } else if (test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags)) { 538 trace_netfs_sreq(subreq, netfs_sreq_trace_need_clear); 539 } else if (test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { 540 trace_netfs_sreq(subreq, netfs_sreq_trace_need_retry); 541 } else if (test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) { 542 __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 543 trace_netfs_sreq(subreq, netfs_sreq_trace_partial_read); 544 } else { 545 __set_bit(NETFS_SREQ_FAILED, &subreq->flags); 546 subreq->error = -ENODATA; 547 trace_netfs_sreq(subreq, netfs_sreq_trace_short); 548 } 549 } 550 551 /* If need retry is set, error should not matter unless we hit too many 552 * retries. Pause the generation of new subreqs 553 */ 554 if (test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { 555 trace_netfs_rreq(rreq, netfs_rreq_trace_set_pause); 556 set_bit(NETFS_RREQ_PAUSE, &rreq->flags); 557 goto skip_error_checks; 558 } 559 560 if (unlikely(subreq->error < 0)) { 561 trace_netfs_failure(rreq, subreq, subreq->error, netfs_fail_read); 562 if (subreq->source == NETFS_READ_FROM_CACHE) { 563 netfs_stat(&netfs_n_rh_read_failed); 564 __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 565 } else { 566 netfs_stat(&netfs_n_rh_download_failed); 567 __set_bit(NETFS_SREQ_FAILED, &subreq->flags); 568 } 569 trace_netfs_rreq(rreq, netfs_rreq_trace_set_pause); 570 set_bit(NETFS_RREQ_PAUSE, &rreq->flags); 571 } 572 573 skip_error_checks: 574 trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); 575 netfs_subreq_clear_in_progress(subreq); 576 netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated); 577 } 578 EXPORT_SYMBOL(netfs_read_subreq_terminated); 579 580 /* 581 * Cancel a read subrequest due to preparation failure. 582 */ 583 void netfs_cancel_read(struct netfs_io_subrequest *subreq, int error) 584 { 585 trace_netfs_sreq(subreq, netfs_sreq_trace_cancel); 586 subreq->error = error; 587 __set_bit(NETFS_SREQ_FAILED, &subreq->flags); 588 netfs_read_subreq_terminated(subreq); 589 } 590 591 /* 592 * Handle termination of a read from the cache. 593 */ 594 void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error) 595 { 596 struct netfs_io_subrequest *subreq = priv; 597 598 if (transferred_or_error > 0) { 599 subreq->error = 0; 600 if (transferred_or_error > 0) { 601 subreq->transferred += transferred_or_error; 602 __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); 603 } 604 } else { 605 subreq->error = transferred_or_error; 606 } 607 netfs_read_subreq_terminated(subreq); 608 } 609