1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Network filesystem read subrequest result collection, assessment and 3 * retrying. 4 * 5 * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. 6 * Written by David Howells (dhowells@redhat.com) 7 */ 8 9 #include <linux/export.h> 10 #include <linux/fs.h> 11 #include <linux/mm.h> 12 #include <linux/pagemap.h> 13 #include <linux/slab.h> 14 #include <linux/task_io_accounting_ops.h> 15 #include "internal.h" 16 17 /* Notes made in the collector */ 18 #define HIT_PENDING 0x01 /* A front op was still pending */ 19 #define MADE_PROGRESS 0x04 /* Made progress cleaning up a stream or the folio set */ 20 #define BUFFERED 0x08 /* The pagecache needs cleaning up */ 21 #define NEED_RETRY 0x10 /* A front op requests retrying */ 22 #define COPY_TO_CACHE 0x40 /* Need to copy subrequest to cache */ 23 #define ABANDON_SREQ 0x80 /* Need to abandon untransferred part of subrequest */ 24 25 /* 26 * Clear the unread part of an I/O request. 27 */ 28 static void netfs_clear_unread(struct netfs_io_subrequest *subreq) 29 { 30 netfs_reset_iter(subreq); 31 WARN_ON_ONCE(subreq->len - subreq->transferred != iov_iter_count(&subreq->io_iter)); 32 iov_iter_zero(iov_iter_count(&subreq->io_iter), &subreq->io_iter); 33 if (subreq->start + subreq->transferred >= subreq->rreq->i_size) 34 __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags); 35 } 36 37 /* 38 * Flush, mark and unlock a folio that's now completely read. If we want to 39 * cache the folio, we set the group to NETFS_FOLIO_COPY_TO_CACHE, mark it 40 * dirty and let writeback handle it. 41 */ 42 static void netfs_unlock_read_folio(struct netfs_io_request *rreq, 43 struct folio_queue *folioq, 44 int slot) 45 { 46 struct netfs_folio *finfo; 47 struct folio *folio = folioq_folio(folioq, slot); 48 49 if (unlikely(folio_pos(folio) < rreq->abandon_to)) { 50 trace_netfs_folio(folio, netfs_folio_trace_abandon); 51 goto just_unlock; 52 } 53 54 flush_dcache_folio(folio); 55 folio_mark_uptodate(folio); 56 57 if (!test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) { 58 finfo = netfs_folio_info(folio); 59 if (finfo) { 60 trace_netfs_folio(folio, netfs_folio_trace_filled_gaps); 61 if (finfo->netfs_group) 62 folio_change_private(folio, finfo->netfs_group); 63 else 64 folio_detach_private(folio); 65 kfree(finfo); 66 } 67 68 if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags)) { 69 if (!WARN_ON_ONCE(folio_get_private(folio) != NULL)) { 70 trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); 71 folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE); 72 folio_mark_dirty(folio); 73 } 74 } else { 75 trace_netfs_folio(folio, netfs_folio_trace_read_done); 76 } 77 78 folioq_clear(folioq, slot); 79 } else { 80 // TODO: Use of PG_private_2 is deprecated. 81 if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags)) 82 netfs_pgpriv2_copy_to_cache(rreq, folio); 83 } 84 85 just_unlock: 86 if (folio->index == rreq->no_unlock_folio && 87 test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) { 88 _debug("no unlock"); 89 } else { 90 trace_netfs_folio(folio, netfs_folio_trace_read_unlock); 91 folio_unlock(folio); 92 } 93 94 folioq_clear(folioq, slot); 95 } 96 97 /* 98 * Unlock any folios we've finished with. 99 */ 100 static void netfs_read_unlock_folios(struct netfs_io_request *rreq, 101 unsigned int *notes) 102 { 103 struct folio_queue *folioq = rreq->buffer.tail; 104 unsigned long long collected_to = rreq->collected_to; 105 unsigned int slot = rreq->buffer.first_tail_slot; 106 107 if (rreq->cleaned_to >= rreq->collected_to) 108 return; 109 110 // TODO: Begin decryption 111 112 if (slot >= folioq_nr_slots(folioq)) { 113 folioq = rolling_buffer_delete_spent(&rreq->buffer); 114 if (!folioq) { 115 rreq->front_folio_order = 0; 116 return; 117 } 118 slot = 0; 119 } 120 121 for (;;) { 122 struct folio *folio; 123 unsigned long long fpos, fend; 124 unsigned int order; 125 size_t fsize; 126 127 if (*notes & COPY_TO_CACHE) 128 set_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags); 129 130 folio = folioq_folio(folioq, slot); 131 if (WARN_ONCE(!folio_test_locked(folio), 132 "R=%08x: folio %lx is not locked\n", 133 rreq->debug_id, folio->index)) 134 trace_netfs_folio(folio, netfs_folio_trace_not_locked); 135 136 order = folioq_folio_order(folioq, slot); 137 rreq->front_folio_order = order; 138 fsize = PAGE_SIZE << order; 139 fpos = folio_pos(folio); 140 fend = umin(fpos + fsize, rreq->i_size); 141 142 trace_netfs_collect_folio(rreq, folio, fend, collected_to); 143 144 /* Unlock any folio we've transferred all of. */ 145 if (collected_to < fend) 146 break; 147 148 netfs_unlock_read_folio(rreq, folioq, slot); 149 WRITE_ONCE(rreq->cleaned_to, fpos + fsize); 150 *notes |= MADE_PROGRESS; 151 152 clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags); 153 154 /* Clean up the head folioq. If we clear an entire folioq, then 155 * we can get rid of it provided it's not also the tail folioq 156 * being filled by the issuer. 157 */ 158 folioq_clear(folioq, slot); 159 slot++; 160 if (slot >= folioq_nr_slots(folioq)) { 161 folioq = rolling_buffer_delete_spent(&rreq->buffer); 162 if (!folioq) 163 goto done; 164 slot = 0; 165 trace_netfs_folioq(folioq, netfs_trace_folioq_read_progress); 166 } 167 168 if (fpos + fsize >= collected_to) 169 break; 170 } 171 172 rreq->buffer.tail = folioq; 173 done: 174 rreq->buffer.first_tail_slot = slot; 175 } 176 177 /* 178 * Collect and assess the results of various read subrequests. We may need to 179 * retry some of the results. 180 * 181 * Note that we have a sequence of subrequests, which may be drawing on 182 * different sources and may or may not be the same size or starting position 183 * and may not even correspond in boundary alignment. 184 */ 185 static void netfs_collect_read_results(struct netfs_io_request *rreq) 186 { 187 struct netfs_io_subrequest *front, *remove; 188 struct netfs_io_stream *stream = &rreq->io_streams[0]; 189 unsigned int notes; 190 191 _enter("%llx-%llx", rreq->start, rreq->start + rreq->len); 192 trace_netfs_rreq(rreq, netfs_rreq_trace_collect); 193 trace_netfs_collect(rreq); 194 195 reassess: 196 if (rreq->origin == NETFS_READAHEAD || 197 rreq->origin == NETFS_READPAGE || 198 rreq->origin == NETFS_READ_FOR_WRITE) 199 notes = BUFFERED; 200 else 201 notes = 0; 202 203 /* Remove completed subrequests from the front of the stream and 204 * advance the completion point. We stop when we hit something that's 205 * in progress. The issuer thread may be adding stuff to the tail 206 * whilst we're doing this. 207 */ 208 front = READ_ONCE(stream->front); 209 while (front) { 210 size_t transferred; 211 212 trace_netfs_collect_sreq(rreq, front); 213 _debug("sreq [%x] %llx %zx/%zx", 214 front->debug_index, front->start, front->transferred, front->len); 215 216 if (stream->collected_to < front->start) { 217 trace_netfs_collect_gap(rreq, stream, front->start, 'F'); 218 stream->collected_to = front->start; 219 } 220 221 if (netfs_check_subreq_in_progress(front)) 222 notes |= HIT_PENDING; 223 smp_rmb(); /* Read counters after IN_PROGRESS flag. */ 224 transferred = READ_ONCE(front->transferred); 225 226 /* If we can now collect the next folio, do so. We don't want 227 * to defer this as we have to decide whether we need to copy 228 * to the cache or not, and that may differ between adjacent 229 * subreqs. 230 */ 231 if (notes & BUFFERED) { 232 size_t fsize = PAGE_SIZE << rreq->front_folio_order; 233 234 /* Clear the tail of a short read. */ 235 if (!(notes & HIT_PENDING) && 236 front->error == 0 && 237 transferred < front->len && 238 (test_bit(NETFS_SREQ_HIT_EOF, &front->flags) || 239 test_bit(NETFS_SREQ_CLEAR_TAIL, &front->flags))) { 240 netfs_clear_unread(front); 241 transferred = front->transferred = front->len; 242 trace_netfs_sreq(front, netfs_sreq_trace_clear); 243 } 244 245 stream->collected_to = front->start + transferred; 246 rreq->collected_to = stream->collected_to; 247 248 if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &front->flags)) 249 notes |= COPY_TO_CACHE; 250 251 if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { 252 rreq->abandon_to = front->start + front->len; 253 front->transferred = front->len; 254 transferred = front->len; 255 trace_netfs_rreq(rreq, netfs_rreq_trace_set_abandon); 256 } 257 if (front->start + transferred >= rreq->cleaned_to + fsize || 258 test_bit(NETFS_SREQ_HIT_EOF, &front->flags)) 259 netfs_read_unlock_folios(rreq, ¬es); 260 } else { 261 stream->collected_to = front->start + transferred; 262 rreq->collected_to = stream->collected_to; 263 } 264 265 /* Stall if the front is still undergoing I/O. */ 266 if (notes & HIT_PENDING) 267 break; 268 269 if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { 270 if (!stream->failed) { 271 stream->error = front->error; 272 rreq->error = front->error; 273 set_bit(NETFS_RREQ_FAILED, &rreq->flags); 274 stream->failed = true; 275 } 276 notes |= MADE_PROGRESS | ABANDON_SREQ; 277 } else if (test_bit(NETFS_SREQ_NEED_RETRY, &front->flags)) { 278 stream->need_retry = true; 279 notes |= NEED_RETRY | MADE_PROGRESS; 280 break; 281 } else if (test_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags)) { 282 notes |= MADE_PROGRESS; 283 } else { 284 if (!stream->failed) 285 stream->transferred += transferred; 286 if (front->transferred < front->len) 287 set_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags); 288 notes |= MADE_PROGRESS; 289 } 290 291 /* Remove if completely consumed. */ 292 stream->source = front->source; 293 spin_lock(&rreq->lock); 294 295 remove = front; 296 trace_netfs_sreq(front, 297 notes & ABANDON_SREQ ? 298 netfs_sreq_trace_abandoned : netfs_sreq_trace_consumed); 299 list_del_init(&front->rreq_link); 300 front = list_first_entry_or_null(&stream->subrequests, 301 struct netfs_io_subrequest, rreq_link); 302 stream->front = front; 303 spin_unlock(&rreq->lock); 304 netfs_put_subrequest(remove, 305 notes & ABANDON_SREQ ? 306 netfs_sreq_trace_put_abandon : 307 netfs_sreq_trace_put_done); 308 } 309 310 trace_netfs_collect_stream(rreq, stream); 311 trace_netfs_collect_state(rreq, rreq->collected_to, notes); 312 313 if (!(notes & BUFFERED)) 314 rreq->cleaned_to = rreq->collected_to; 315 316 if (notes & NEED_RETRY) 317 goto need_retry; 318 if (notes & MADE_PROGRESS) { 319 netfs_wake_rreq_flag(rreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause); 320 //cond_resched(); 321 goto reassess; 322 } 323 324 out: 325 _leave(" = %x", notes); 326 return; 327 328 need_retry: 329 /* Okay... We're going to have to retry parts of the stream. Note 330 * that any partially completed op will have had any wholly transferred 331 * folios removed from it. 332 */ 333 _debug("retry"); 334 netfs_retry_reads(rreq); 335 goto out; 336 } 337 338 /* 339 * Do page flushing and suchlike after DIO. 340 */ 341 static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) 342 { 343 unsigned int i; 344 345 if (rreq->origin == NETFS_UNBUFFERED_READ || 346 rreq->origin == NETFS_DIO_READ) { 347 for (i = 0; i < rreq->direct_bv_count; i++) { 348 flush_dcache_page(rreq->direct_bv[i].bv_page); 349 // TODO: cifs marks pages in the destination buffer 350 // dirty under some circumstances after a read. Do we 351 // need to do that too? 352 set_page_dirty(rreq->direct_bv[i].bv_page); 353 } 354 } 355 356 if (rreq->iocb) { 357 rreq->iocb->ki_pos += rreq->transferred; 358 if (rreq->iocb->ki_complete) { 359 trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete); 360 rreq->iocb->ki_complete( 361 rreq->iocb, rreq->error ? rreq->error : rreq->transferred); 362 } 363 } 364 if (rreq->netfs_ops->done) 365 rreq->netfs_ops->done(rreq); 366 if (rreq->origin == NETFS_UNBUFFERED_READ || 367 rreq->origin == NETFS_DIO_READ) 368 inode_dio_end(rreq->inode); 369 } 370 371 /* 372 * Do processing after reading a monolithic single object. 373 */ 374 static void netfs_rreq_assess_single(struct netfs_io_request *rreq) 375 { 376 struct netfs_io_stream *stream = &rreq->io_streams[0]; 377 378 if (!rreq->error && stream->source == NETFS_DOWNLOAD_FROM_SERVER && 379 fscache_resources_valid(&rreq->cache_resources)) { 380 trace_netfs_rreq(rreq, netfs_rreq_trace_dirty); 381 netfs_single_mark_inode_dirty(rreq->inode); 382 } 383 384 if (rreq->iocb) { 385 rreq->iocb->ki_pos += rreq->transferred; 386 if (rreq->iocb->ki_complete) { 387 trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete); 388 rreq->iocb->ki_complete( 389 rreq->iocb, rreq->error ? rreq->error : rreq->transferred); 390 } 391 } 392 if (rreq->netfs_ops->done) 393 rreq->netfs_ops->done(rreq); 394 } 395 396 /* 397 * Perform the collection of subrequests and folios. 398 * 399 * Note that we're in normal kernel thread context at this point, possibly 400 * running on a workqueue. 401 */ 402 bool netfs_read_collection(struct netfs_io_request *rreq) 403 { 404 struct netfs_io_stream *stream = &rreq->io_streams[0]; 405 406 netfs_collect_read_results(rreq); 407 408 /* We're done when the app thread has finished posting subreqs and the 409 * queue is empty. 410 */ 411 if (!test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags)) 412 return false; 413 smp_rmb(); /* Read ALL_QUEUED before subreq lists. */ 414 415 if (!list_empty(&stream->subrequests)) 416 return false; 417 418 /* Okay, declare that all I/O is complete. */ 419 rreq->transferred = stream->transferred; 420 trace_netfs_rreq(rreq, netfs_rreq_trace_complete); 421 422 //netfs_rreq_is_still_valid(rreq); 423 424 switch (rreq->origin) { 425 case NETFS_UNBUFFERED_READ: 426 case NETFS_DIO_READ: 427 case NETFS_READ_GAPS: 428 netfs_rreq_assess_dio(rreq); 429 break; 430 case NETFS_READ_SINGLE: 431 netfs_rreq_assess_single(rreq); 432 break; 433 default: 434 break; 435 } 436 task_io_account_read(rreq->transferred); 437 438 netfs_wake_rreq_flag(rreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip); 439 /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */ 440 441 trace_netfs_rreq(rreq, netfs_rreq_trace_done); 442 netfs_clear_subrequests(rreq); 443 netfs_unlock_abandoned_read_pages(rreq); 444 if (unlikely(rreq->copy_to_cache)) 445 netfs_pgpriv2_end_copy_to_cache(rreq); 446 return true; 447 } 448 449 void netfs_read_collection_worker(struct work_struct *work) 450 { 451 struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); 452 453 netfs_see_request(rreq, netfs_rreq_trace_see_work); 454 if (netfs_check_rreq_in_progress(rreq)) { 455 if (netfs_read_collection(rreq)) 456 /* Drop the ref from the IN_PROGRESS flag. */ 457 netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); 458 else 459 netfs_see_request(rreq, netfs_rreq_trace_see_work_complete); 460 } 461 } 462 463 /** 464 * netfs_read_subreq_progress - Note progress of a read operation. 465 * @subreq: The read request that has terminated. 466 * 467 * This tells the read side of netfs lib that a contributory I/O operation has 468 * made some progress and that it may be possible to unlock some folios. 469 * 470 * Before calling, the filesystem should update subreq->transferred to track 471 * the amount of data copied into the output buffer. 472 */ 473 void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq) 474 { 475 struct netfs_io_request *rreq = subreq->rreq; 476 struct netfs_io_stream *stream = &rreq->io_streams[0]; 477 size_t fsize = PAGE_SIZE << rreq->front_folio_order; 478 479 trace_netfs_sreq(subreq, netfs_sreq_trace_progress); 480 481 /* If we are at the head of the queue, wake up the collector, 482 * getting a ref to it if we were the ones to do so. 483 */ 484 if (subreq->start + subreq->transferred > rreq->cleaned_to + fsize && 485 (rreq->origin == NETFS_READAHEAD || 486 rreq->origin == NETFS_READPAGE || 487 rreq->origin == NETFS_READ_FOR_WRITE) && 488 list_is_first(&subreq->rreq_link, &stream->subrequests) 489 ) { 490 __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); 491 netfs_wake_collector(rreq); 492 } 493 } 494 EXPORT_SYMBOL(netfs_read_subreq_progress); 495 496 /** 497 * netfs_read_subreq_terminated - Note the termination of an I/O operation. 498 * @subreq: The I/O request that has terminated. 499 * 500 * This tells the read helper that a contributory I/O operation has terminated, 501 * one way or another, and that it should integrate the results. 502 * 503 * The caller indicates the outcome of the operation through @subreq->error, 504 * supplying 0 to indicate a successful or retryable transfer (if 505 * NETFS_SREQ_NEED_RETRY is set) or a negative error code. The helper will 506 * look after reissuing I/O operations as appropriate and writing downloaded 507 * data to the cache. 508 * 509 * Before calling, the filesystem should update subreq->transferred to track 510 * the amount of data copied into the output buffer. 511 */ 512 void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq) 513 { 514 struct netfs_io_request *rreq = subreq->rreq; 515 516 switch (subreq->source) { 517 case NETFS_READ_FROM_CACHE: 518 netfs_stat(&netfs_n_rh_read_done); 519 break; 520 case NETFS_DOWNLOAD_FROM_SERVER: 521 netfs_stat(&netfs_n_rh_download_done); 522 break; 523 default: 524 break; 525 } 526 527 /* Deal with retry requests, short reads and errors. If we retry 528 * but don't make progress, we abandon the attempt. 529 */ 530 if (!subreq->error && subreq->transferred < subreq->len) { 531 if (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags)) { 532 trace_netfs_sreq(subreq, netfs_sreq_trace_hit_eof); 533 } else if (test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags)) { 534 trace_netfs_sreq(subreq, netfs_sreq_trace_need_clear); 535 } else if (test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { 536 trace_netfs_sreq(subreq, netfs_sreq_trace_need_retry); 537 } else if (test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) { 538 __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 539 trace_netfs_sreq(subreq, netfs_sreq_trace_partial_read); 540 } else { 541 __set_bit(NETFS_SREQ_FAILED, &subreq->flags); 542 subreq->error = -ENODATA; 543 trace_netfs_sreq(subreq, netfs_sreq_trace_short); 544 } 545 } 546 547 if (unlikely(subreq->error < 0)) { 548 trace_netfs_failure(rreq, subreq, subreq->error, netfs_fail_read); 549 if (subreq->source == NETFS_READ_FROM_CACHE) { 550 netfs_stat(&netfs_n_rh_read_failed); 551 __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 552 } else { 553 netfs_stat(&netfs_n_rh_download_failed); 554 __set_bit(NETFS_SREQ_FAILED, &subreq->flags); 555 } 556 trace_netfs_rreq(rreq, netfs_rreq_trace_set_pause); 557 set_bit(NETFS_RREQ_PAUSE, &rreq->flags); 558 } 559 560 trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); 561 netfs_subreq_clear_in_progress(subreq); 562 netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated); 563 } 564 EXPORT_SYMBOL(netfs_read_subreq_terminated); 565 566 /* 567 * Handle termination of a read from the cache. 568 */ 569 void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error) 570 { 571 struct netfs_io_subrequest *subreq = priv; 572 573 if (transferred_or_error > 0) { 574 subreq->error = 0; 575 if (transferred_or_error > 0) { 576 subreq->transferred += transferred_or_error; 577 __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); 578 } 579 } else { 580 subreq->error = transferred_or_error; 581 } 582 netfs_read_subreq_terminated(subreq); 583 } 584