1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Network filesystem write subrequest result collection, assessment 3 * and retrying. 4 * 5 * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. 6 * Written by David Howells (dhowells@redhat.com) 7 */ 8 9 #include <linux/export.h> 10 #include <linux/fs.h> 11 #include <linux/mm.h> 12 #include <linux/pagemap.h> 13 #include <linux/slab.h> 14 #include "internal.h" 15 16 /* Notes made in the collector */ 17 #define HIT_PENDING 0x01 /* A front op was still pending */ 18 #define NEED_REASSESS 0x02 /* Need to loop round and reassess */ 19 #define MADE_PROGRESS 0x04 /* Made progress cleaning up a stream or the folio set */ 20 #define NEED_UNLOCK 0x08 /* The pagecache needs unlocking */ 21 #define NEED_RETRY 0x10 /* A front op requests retrying */ 22 #define SAW_FAILURE 0x20 /* One stream or hit a permanent failure */ 23 24 static void netfs_dump_request(const struct netfs_io_request *rreq) 25 { 26 pr_err("Request R=%08x r=%d fl=%lx or=%x e=%ld\n", 27 rreq->debug_id, refcount_read(&rreq->ref), rreq->flags, 28 rreq->origin, rreq->error); 29 pr_err(" st=%llx tsl=%zx/%llx/%llx\n", 30 rreq->start, rreq->transferred, rreq->submitted, rreq->len); 31 pr_err(" cci=%llx/%llx/%llx\n", 32 rreq->cleaned_to, rreq->collected_to, atomic64_read(&rreq->issued_to)); 33 pr_err(" iw=%pSR\n", rreq->netfs_ops->issue_write); 34 for (int i = 0; i < NR_IO_STREAMS; i++) { 35 const struct netfs_io_subrequest *sreq; 36 const struct netfs_io_stream *s = &rreq->io_streams[i]; 37 38 pr_err(" str[%x] s=%x e=%d acnf=%u,%u,%u,%u\n", 39 s->stream_nr, s->source, s->error, 40 s->avail, s->active, s->need_retry, s->failed); 41 pr_err(" str[%x] ct=%llx t=%zx\n", 42 s->stream_nr, s->collected_to, s->transferred); 43 list_for_each_entry(sreq, &s->subrequests, rreq_link) { 44 pr_err(" sreq[%x:%x] sc=%u s=%llx t=%zx/%zx r=%d f=%lx\n", 45 sreq->stream_nr, sreq->debug_index, sreq->source, 46 sreq->start, sreq->transferred, sreq->len, 47 refcount_read(&sreq->ref), sreq->flags); 48 } 49 } 50 } 51 52 /* 53 * Successful completion of write of a folio to the server and/or cache. Note 54 * that we are not allowed to lock the folio here on pain of deadlocking with 55 * truncate. 56 */ 57 int netfs_folio_written_back(struct folio *folio) 58 { 59 enum netfs_folio_trace why = netfs_folio_trace_clear; 60 struct inode *inode = folio_inode(folio); 61 struct netfs_inode *ictx = netfs_inode(inode); 62 struct netfs_folio *finfo; 63 struct netfs_group *group = NULL; 64 int gcount = 0; 65 66 if ((finfo = netfs_folio_info(folio))) { 67 /* Streaming writes cannot be redirtied whilst under writeback, 68 * so discard the streaming record. 69 */ 70 unsigned long long fend; 71 72 fend = folio_pos(folio) + finfo->dirty_offset + finfo->dirty_len; 73 spin_lock(&ictx->inode.i_lock); 74 if (fend > ictx->_zero_point) 75 netfs_write_zero_point(inode, fend); 76 spin_unlock(&ictx->inode.i_lock); 77 78 folio_detach_private(folio); 79 group = finfo->netfs_group; 80 gcount++; 81 kfree(finfo); 82 why = netfs_folio_trace_clear_s; 83 goto end_wb; 84 } 85 86 if ((group = netfs_folio_group(folio))) { 87 if (group == NETFS_FOLIO_COPY_TO_CACHE) { 88 why = netfs_folio_trace_clear_cc; 89 folio_detach_private(folio); 90 goto end_wb; 91 } 92 93 /* Need to detach the group pointer if the page didn't get 94 * redirtied. If it has been redirtied, then it must be within 95 * the same group. 96 */ 97 why = netfs_folio_trace_redirtied; 98 if (!folio_test_dirty(folio)) { 99 folio_detach_private(folio); 100 gcount++; 101 why = netfs_folio_trace_clear_g; 102 } 103 } 104 105 end_wb: 106 trace_netfs_folio(folio, why); 107 folio_end_writeback(folio); 108 return gcount; 109 } 110 111 /* 112 * Unlock any folios we've finished with. 113 */ 114 static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq, 115 unsigned int *notes) 116 { 117 struct folio_queue *folioq = wreq->buffer.tail; 118 unsigned long long collected_to = wreq->collected_to; 119 unsigned int slot = wreq->buffer.first_tail_slot; 120 121 if (WARN_ON_ONCE(!folioq)) { 122 pr_err("[!] Writeback unlock found empty rolling buffer!\n"); 123 netfs_dump_request(wreq); 124 return; 125 } 126 127 if (wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE) { 128 if (netfs_pgpriv2_unlock_copied_folios(wreq)) 129 *notes |= MADE_PROGRESS; 130 return; 131 } 132 133 if (slot >= folioq_nr_slots(folioq)) { 134 folioq = rolling_buffer_delete_spent(&wreq->buffer); 135 if (!folioq) 136 return; 137 slot = 0; 138 } 139 140 for (;;) { 141 struct folio *folio; 142 struct netfs_folio *finfo; 143 unsigned long long fpos, fend; 144 size_t fsize, flen; 145 146 folio = folioq_folio(folioq, slot); 147 if (WARN_ONCE(!folio_test_writeback(folio), 148 "R=%08x: folio %lx is not under writeback\n", 149 wreq->debug_id, folio->index)) 150 trace_netfs_folio(folio, netfs_folio_trace_not_under_wback); 151 152 fpos = folio_pos(folio); 153 fsize = folio_size(folio); 154 finfo = netfs_folio_info(folio); 155 flen = finfo ? finfo->dirty_offset + finfo->dirty_len : fsize; 156 157 fend = min_t(unsigned long long, fpos + flen, wreq->i_size); 158 159 trace_netfs_collect_folio(wreq, folio, fend, collected_to); 160 161 /* Unlock any folio we've transferred all of. */ 162 if (collected_to < fend) 163 break; 164 165 wreq->nr_group_rel += netfs_folio_written_back(folio); 166 wreq->cleaned_to = fpos + fsize; 167 *notes |= MADE_PROGRESS; 168 169 /* Clean up the head folioq. If we clear an entire folioq, then 170 * we can get rid of it provided it's not also the tail folioq 171 * being filled by the issuer. 172 */ 173 folioq_clear(folioq, slot); 174 slot++; 175 if (slot >= folioq_nr_slots(folioq)) { 176 folioq = rolling_buffer_delete_spent(&wreq->buffer); 177 if (!folioq) 178 goto done; 179 slot = 0; 180 } 181 182 if (fpos + fsize >= collected_to) 183 break; 184 } 185 186 wreq->buffer.tail = folioq; 187 done: 188 wreq->buffer.first_tail_slot = slot; 189 } 190 191 /* 192 * Collect and assess the results of various write subrequests. We may need to 193 * retry some of the results - or even do an RMW cycle for content crypto. 194 * 195 * Note that we have a number of parallel, overlapping lists of subrequests, 196 * one to the server and one to the local cache for example, which may not be 197 * the same size or starting position and may not even correspond in boundary 198 * alignment. 199 */ 200 static void netfs_collect_write_results(struct netfs_io_request *wreq) 201 { 202 struct netfs_io_subrequest *front, *remove; 203 struct netfs_io_stream *stream; 204 unsigned long long collected_to, issued_to; 205 unsigned int notes; 206 int s; 207 208 _enter("%llx-%llx", wreq->start, wreq->start + wreq->len); 209 trace_netfs_collect(wreq); 210 trace_netfs_rreq(wreq, netfs_rreq_trace_collect); 211 212 reassess_streams: 213 issued_to = atomic64_read(&wreq->issued_to); 214 smp_rmb(); 215 collected_to = ULLONG_MAX; 216 if (wreq->origin == NETFS_WRITEBACK || 217 wreq->origin == NETFS_WRITETHROUGH || 218 wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE) 219 notes = NEED_UNLOCK; 220 else 221 notes = 0; 222 223 /* Remove completed subrequests from the front of the streams and 224 * advance the completion point on each stream. We stop when we hit 225 * something that's in progress. The issuer thread may be adding stuff 226 * to the tail whilst we're doing this. 227 */ 228 for (s = 0; s < NR_IO_STREAMS; s++) { 229 stream = &wreq->io_streams[s]; 230 /* Read active flag before list pointers */ 231 if (!smp_load_acquire(&stream->active)) 232 continue; 233 234 front = list_first_entry_or_null_acquire(&stream->subrequests, 235 struct netfs_io_subrequest, rreq_link); 236 /* Read first subreq pointer before IN_PROGRESS flag. */ 237 238 while (front) { 239 trace_netfs_collect_sreq(wreq, front); 240 //_debug("sreq [%x] %llx %zx/%zx", 241 // front->debug_index, front->start, front->transferred, front->len); 242 243 if (stream->collected_to < front->start) { 244 trace_netfs_collect_gap(wreq, stream, issued_to, 'F'); 245 stream->collected_to = front->start; 246 } 247 248 /* Stall if the front is still undergoing I/O. */ 249 if (netfs_check_subreq_in_progress(front)) { 250 notes |= HIT_PENDING; 251 break; 252 } 253 smp_rmb(); /* Read counters after I-P flag. */ 254 255 if (stream->failed) { 256 stream->collected_to = front->start + front->len; 257 notes |= MADE_PROGRESS | SAW_FAILURE; 258 goto cancel; 259 } 260 if (front->start + front->transferred > stream->collected_to) { 261 stream->collected_to = front->start + front->transferred; 262 stream->transferred = stream->collected_to - wreq->start; 263 stream->transferred_valid = true; 264 notes |= MADE_PROGRESS; 265 } 266 if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { 267 stream->failed = true; 268 stream->error = front->error; 269 if (stream->source == NETFS_UPLOAD_TO_SERVER) 270 mapping_set_error(wreq->mapping, front->error); 271 notes |= NEED_REASSESS | SAW_FAILURE; 272 break; 273 } 274 if (front->transferred < front->len) { 275 stream->need_retry = true; 276 notes |= NEED_RETRY | MADE_PROGRESS; 277 break; 278 } 279 280 cancel: 281 /* Remove if completely consumed. */ 282 spin_lock(&wreq->lock); 283 284 remove = front; 285 list_del_init(&front->rreq_link); 286 front = list_first_entry_or_null(&stream->subrequests, 287 struct netfs_io_subrequest, rreq_link); 288 spin_unlock(&wreq->lock); 289 netfs_put_subrequest(remove, 290 notes & SAW_FAILURE ? 291 netfs_sreq_trace_put_cancel : 292 netfs_sreq_trace_put_done); 293 } 294 295 /* If we have an empty stream, we need to jump it forward 296 * otherwise the collection point will never advance. 297 */ 298 if (!front && issued_to > stream->collected_to) { 299 trace_netfs_collect_gap(wreq, stream, issued_to, 'E'); 300 stream->collected_to = issued_to; 301 } 302 303 if (stream->collected_to < collected_to) 304 collected_to = stream->collected_to; 305 } 306 307 if (collected_to != ULLONG_MAX && collected_to > wreq->collected_to) 308 wreq->collected_to = collected_to; 309 310 for (s = 0; s < NR_IO_STREAMS; s++) { 311 stream = &wreq->io_streams[s]; 312 if (stream->active) 313 trace_netfs_collect_stream(wreq, stream); 314 } 315 316 trace_netfs_collect_state(wreq, wreq->collected_to, notes); 317 318 /* Unlock any folios that we have now finished with. */ 319 if (notes & NEED_UNLOCK) { 320 if (wreq->cleaned_to < wreq->collected_to) 321 netfs_writeback_unlock_folios(wreq, ¬es); 322 } else { 323 wreq->cleaned_to = wreq->collected_to; 324 } 325 326 // TODO: Discard encryption buffers 327 328 if (notes & NEED_RETRY) 329 goto need_retry; 330 331 if (notes & MADE_PROGRESS) { 332 netfs_wake_rreq_flag(wreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause); 333 //cond_resched(); 334 goto reassess_streams; 335 } 336 337 if (notes & NEED_REASSESS) { 338 //cond_resched(); 339 goto reassess_streams; 340 } 341 342 out: 343 netfs_put_group_many(wreq->group, wreq->nr_group_rel); 344 wreq->nr_group_rel = 0; 345 _leave(" = %x", notes); 346 return; 347 348 need_retry: 349 /* Okay... We're going to have to retry one or both streams. Note 350 * that any partially completed op will have had any wholly transferred 351 * folios removed from it. 352 */ 353 _debug("retry"); 354 netfs_retry_writes(wreq); 355 goto out; 356 } 357 358 /* 359 * Perform the collection of subrequests, folios and encryption buffers. 360 */ 361 bool netfs_write_collection(struct netfs_io_request *wreq) 362 { 363 struct netfs_inode *ictx = netfs_inode(wreq->inode); 364 size_t transferred; 365 bool transferred_valid = false; 366 int s; 367 368 _enter("R=%x", wreq->debug_id); 369 370 netfs_collect_write_results(wreq); 371 372 /* We're done when the app thread has finished posting subreqs and all 373 * the queues in all the streams are empty. 374 */ 375 if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags)) 376 return false; 377 smp_rmb(); /* Read ALL_QUEUED before lists. */ 378 379 transferred = LONG_MAX; 380 for (s = 0; s < NR_IO_STREAMS; s++) { 381 struct netfs_io_stream *stream = &wreq->io_streams[s]; 382 if (!stream->active) 383 continue; 384 if (!list_empty(&stream->subrequests)) 385 return false; 386 if (stream->transferred_valid && 387 stream->transferred < transferred) { 388 transferred = stream->transferred; 389 transferred_valid = true; 390 } 391 } 392 393 /* Okay, declare that all I/O is complete. */ 394 if (transferred_valid) 395 wreq->transferred = transferred; 396 trace_netfs_rreq(wreq, netfs_rreq_trace_write_done); 397 398 if (wreq->io_streams[1].active && 399 wreq->io_streams[1].failed && 400 ictx->ops->invalidate_cache) { 401 /* Cache write failure doesn't prevent writeback completion 402 * unless we're in disconnected mode. 403 */ 404 ictx->ops->invalidate_cache(wreq); 405 } 406 407 _debug("finished"); 408 netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip); 409 /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */ 410 411 if (wreq->iocb) { 412 size_t written = min(wreq->transferred, wreq->len); 413 wreq->iocb->ki_pos += written; 414 if (wreq->iocb->ki_complete) { 415 trace_netfs_rreq(wreq, netfs_rreq_trace_ki_complete); 416 wreq->iocb->ki_complete( 417 wreq->iocb, wreq->error ? wreq->error : written); 418 } 419 wreq->iocb = VFS_PTR_POISON; 420 } 421 422 netfs_clear_subrequests(wreq); 423 return true; 424 } 425 426 void netfs_write_collection_worker(struct work_struct *work) 427 { 428 struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); 429 430 netfs_see_request(rreq, netfs_rreq_trace_see_work); 431 if (netfs_check_rreq_in_progress(rreq)) { 432 if (netfs_write_collection(rreq)) 433 /* Drop the ref from the IN_PROGRESS flag. */ 434 netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); 435 else 436 netfs_see_request(rreq, netfs_rreq_trace_see_work_complete); 437 } 438 } 439 440 /** 441 * netfs_write_subrequest_terminated - Note the termination of a write operation. 442 * @_op: The I/O request that has terminated. 443 * @transferred_or_error: The amount of data transferred or an error code. 444 * 445 * This tells the library that a contributory write I/O operation has 446 * terminated, one way or another, and that it should collect the results. 447 * 448 * The caller indicates in @transferred_or_error the outcome of the operation, 449 * supplying a positive value to indicate the number of bytes transferred or a 450 * negative error code. The library will look after reissuing I/O operations 451 * as appropriate and writing downloaded data to the cache. 452 * 453 * When this is called, ownership of the subrequest is transferred back to the 454 * library, along with a ref. 455 * 456 * Note that %_op is a void* so that the function can be passed to 457 * kiocb::term_func without the need for a casting wrapper. 458 */ 459 void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error) 460 { 461 struct netfs_io_subrequest *subreq = _op; 462 struct netfs_io_request *wreq = subreq->rreq; 463 464 _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); 465 466 switch (subreq->source) { 467 case NETFS_UPLOAD_TO_SERVER: 468 netfs_stat(&netfs_n_wh_upload_done); 469 break; 470 case NETFS_WRITE_TO_CACHE: 471 netfs_stat(&netfs_n_wh_write_done); 472 break; 473 default: 474 BUG(); 475 } 476 477 if (IS_ERR_VALUE(transferred_or_error)) { 478 subreq->error = transferred_or_error; 479 /* if need retry is set, error should not matter */ 480 if (!test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { 481 set_bit(NETFS_SREQ_FAILED, &subreq->flags); 482 trace_netfs_failure(wreq, subreq, transferred_or_error, netfs_fail_write); 483 } 484 485 switch (subreq->source) { 486 case NETFS_WRITE_TO_CACHE: 487 netfs_stat(&netfs_n_wh_write_failed); 488 break; 489 case NETFS_UPLOAD_TO_SERVER: 490 netfs_stat(&netfs_n_wh_upload_failed); 491 break; 492 default: 493 break; 494 } 495 trace_netfs_rreq(wreq, netfs_rreq_trace_set_pause); 496 set_bit(NETFS_RREQ_PAUSE, &wreq->flags); 497 } else { 498 if (WARN(transferred_or_error > subreq->len - subreq->transferred, 499 "Subreq excess write: R=%x[%x] %zd > %zu - %zu", 500 wreq->debug_id, subreq->debug_index, 501 transferred_or_error, subreq->len, subreq->transferred)) 502 transferred_or_error = subreq->len - subreq->transferred; 503 504 subreq->error = 0; 505 subreq->transferred += transferred_or_error; 506 507 if (subreq->transferred < subreq->len) 508 set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 509 } 510 511 trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); 512 netfs_subreq_clear_in_progress(subreq); 513 netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated); 514 } 515 EXPORT_SYMBOL(netfs_write_subrequest_terminated); 516