1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Miscellaneous routines. 3 * 4 * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/swap.h> 9 #include "internal.h" 10 11 /** 12 * netfs_alloc_folioq_buffer - Allocate buffer space into a folio queue 13 * @mapping: Address space to set on the folio (or NULL). 14 * @_buffer: Pointer to the folio queue to add to (may point to a NULL; updated). 15 * @_cur_size: Current size of the buffer (updated). 16 * @size: Target size of the buffer. 17 * @gfp: The allocation constraints. 18 */ 19 int netfs_alloc_folioq_buffer(struct address_space *mapping, 20 struct folio_queue **_buffer, 21 size_t *_cur_size, ssize_t size, gfp_t gfp) 22 { 23 struct folio_queue *tail = *_buffer, *p; 24 25 size = round_up(size, PAGE_SIZE); 26 if (*_cur_size >= size) 27 return 0; 28 29 if (tail) 30 while (tail->next) 31 tail = tail->next; 32 33 do { 34 struct folio *folio; 35 int order = 0, slot; 36 37 if (!tail || folioq_full(tail)) { 38 p = netfs_folioq_alloc(0, GFP_NOFS, netfs_trace_folioq_alloc_buffer); 39 if (!p) 40 return -ENOMEM; 41 if (tail) { 42 tail->next = p; 43 p->prev = tail; 44 } else { 45 *_buffer = p; 46 } 47 tail = p; 48 } 49 50 if (size - *_cur_size > PAGE_SIZE) 51 order = umin(ilog2(size - *_cur_size) - PAGE_SHIFT, 52 MAX_PAGECACHE_ORDER); 53 54 folio = folio_alloc(gfp, order); 55 if (!folio && order > 0) 56 folio = folio_alloc(gfp, 0); 57 if (!folio) 58 return -ENOMEM; 59 60 folio->mapping = mapping; 61 folio->index = *_cur_size / PAGE_SIZE; 62 trace_netfs_folio(folio, netfs_folio_trace_alloc_buffer); 63 slot = folioq_append_mark(tail, folio); 64 *_cur_size += folioq_folio_size(tail, slot); 65 } while (*_cur_size < size); 66 67 return 0; 68 } 69 EXPORT_SYMBOL(netfs_alloc_folioq_buffer); 70 71 /** 72 * netfs_free_folioq_buffer - Free a folio queue. 73 * @fq: The start of the folio queue to free 74 * 75 * Free up a chain of folio_queues and, if marked, the marked folios they point 76 * to. 77 */ 78 void netfs_free_folioq_buffer(struct folio_queue *fq) 79 { 80 struct folio_queue *next; 81 struct folio_batch fbatch; 82 83 folio_batch_init(&fbatch); 84 85 for (; fq; fq = next) { 86 for (int slot = 0; slot < folioq_count(fq); slot++) { 87 struct folio *folio = folioq_folio(fq, slot); 88 89 if (!folio || 90 !folioq_is_marked(fq, slot)) 91 continue; 92 93 trace_netfs_folio(folio, netfs_folio_trace_put); 94 if (folio_batch_add(&fbatch, folio)) 95 folio_batch_release(&fbatch); 96 } 97 98 netfs_stat_d(&netfs_n_folioq); 99 next = fq->next; 100 kfree(fq); 101 } 102 103 folio_batch_release(&fbatch); 104 } 105 EXPORT_SYMBOL(netfs_free_folioq_buffer); 106 107 /* 108 * Reset the subrequest iterator to refer just to the region remaining to be 109 * read. The iterator may or may not have been advanced by socket ops or 110 * extraction ops to an extent that may or may not match the amount actually 111 * read. 112 */ 113 void netfs_reset_iter(struct netfs_io_subrequest *subreq) 114 { 115 struct iov_iter *io_iter = &subreq->io_iter; 116 size_t remain = subreq->len - subreq->transferred; 117 118 if (io_iter->count > remain) 119 iov_iter_advance(io_iter, io_iter->count - remain); 120 else if (io_iter->count < remain) 121 iov_iter_revert(io_iter, remain - io_iter->count); 122 iov_iter_truncate(&subreq->io_iter, remain); 123 } 124 125 /** 126 * netfs_dirty_folio - Mark folio dirty and pin a cache object for writeback 127 * @mapping: The mapping the folio belongs to. 128 * @folio: The folio being dirtied. 129 * 130 * Set the dirty flag on a folio and pin an in-use cache object in memory so 131 * that writeback can later write to it. This is intended to be called from 132 * the filesystem's ->dirty_folio() method. 133 * 134 * Return: true if the dirty flag was set on the folio, false otherwise. 135 */ 136 bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio) 137 { 138 struct inode *inode = mapping->host; 139 struct netfs_inode *ictx = netfs_inode(inode); 140 struct fscache_cookie *cookie = netfs_i_cookie(ictx); 141 bool need_use = false; 142 143 _enter(""); 144 145 if (!filemap_dirty_folio(mapping, folio)) 146 return false; 147 if (!fscache_cookie_valid(cookie)) 148 return true; 149 150 if (!(inode_state_read_once(inode) & I_PINNING_NETFS_WB)) { 151 spin_lock(&inode->i_lock); 152 if (!(inode_state_read(inode) & I_PINNING_NETFS_WB)) { 153 inode_state_set(inode, I_PINNING_NETFS_WB); 154 need_use = true; 155 } 156 spin_unlock(&inode->i_lock); 157 158 if (need_use) 159 fscache_use_cookie(cookie, true); 160 } 161 return true; 162 } 163 EXPORT_SYMBOL(netfs_dirty_folio); 164 165 /** 166 * netfs_unpin_writeback - Unpin writeback resources 167 * @inode: The inode on which the cookie resides 168 * @wbc: The writeback control 169 * 170 * Unpin the writeback resources pinned by netfs_dirty_folio(). This is 171 * intended to be called as/by the netfs's ->write_inode() method. 172 */ 173 int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc) 174 { 175 struct fscache_cookie *cookie = netfs_i_cookie(netfs_inode(inode)); 176 177 if (wbc->unpinned_netfs_wb) 178 fscache_unuse_cookie(cookie, NULL, NULL); 179 return 0; 180 } 181 EXPORT_SYMBOL(netfs_unpin_writeback); 182 183 /** 184 * netfs_clear_inode_writeback - Clear writeback resources pinned by an inode 185 * @inode: The inode to clean up 186 * @aux: Auxiliary data to apply to the inode 187 * 188 * Clear any writeback resources held by an inode when the inode is evicted. 189 * This must be called before clear_inode() is called. 190 */ 191 void netfs_clear_inode_writeback(struct inode *inode, const void *aux) 192 { 193 struct fscache_cookie *cookie = netfs_i_cookie(netfs_inode(inode)); 194 195 if (inode_state_read_once(inode) & I_PINNING_NETFS_WB) { 196 loff_t i_size = i_size_read(inode); 197 fscache_unuse_cookie(cookie, aux, &i_size); 198 } 199 } 200 EXPORT_SYMBOL(netfs_clear_inode_writeback); 201 202 /** 203 * netfs_invalidate_folio - Invalidate or partially invalidate a folio 204 * @folio: Folio proposed for release 205 * @offset: Offset of the invalidated region 206 * @length: Length of the invalidated region 207 * 208 * Invalidate part or all of a folio for a network filesystem. The folio will 209 * be removed afterwards if the invalidated region covers the entire folio. 210 */ 211 void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) 212 { 213 struct netfs_folio *finfo; 214 struct inode *inode = folio_inode(folio); 215 struct netfs_inode *ctx = netfs_inode(inode); 216 size_t flen = folio_size(folio); 217 218 _enter("{%lx},%zx,%zx", folio->index, offset, length); 219 220 if (offset == 0 && length == flen) { 221 unsigned long long i_size, remote_i_size, zero_point; 222 unsigned long long fpos = folio_pos(folio), end; 223 224 netfs_read_sizes(inode, &i_size, &remote_i_size, &zero_point); 225 end = umin(fpos + flen, i_size); 226 if (fpos < i_size && end > zero_point) { 227 spin_lock(&inode->i_lock); 228 end = umin(fpos + flen, inode->i_size); 229 if (fpos < i_size && end > ctx->_zero_point) 230 netfs_write_zero_point(inode, end); 231 spin_unlock(&inode->i_lock); 232 } 233 } 234 235 folio_wait_private_2(folio); /* [DEPRECATED] */ 236 237 if (!folio_test_private(folio)) 238 return; 239 240 finfo = netfs_folio_info(folio); 241 242 if (offset == 0 && length >= flen) 243 goto erase_completely; 244 245 if (finfo) { 246 /* We have a partially uptodate page from a streaming write. */ 247 unsigned int fstart = finfo->dirty_offset; 248 unsigned int fend = fstart + finfo->dirty_len; 249 unsigned int iend = offset + length; 250 251 if (offset >= fend) 252 return; 253 if (iend <= fstart) 254 return; 255 256 /* The invalidation region overlaps the data. If the region 257 * covers the start of the data, we either move along the start 258 * or just erase the data entirely. 259 */ 260 if (offset <= fstart) { 261 if (iend >= fend) 262 goto erase_completely; 263 /* Move the start of the data. */ 264 finfo->dirty_len = fend - iend; 265 finfo->dirty_offset = iend; 266 trace_netfs_folio(folio, netfs_folio_trace_invalidate_front); 267 return; 268 } 269 270 /* Reduce the length of the data if the invalidation region 271 * covers the tail part. 272 */ 273 if (iend >= fend) { 274 finfo->dirty_len = offset - fstart; 275 trace_netfs_folio(folio, netfs_folio_trace_invalidate_tail); 276 return; 277 } 278 279 /* A partial write was split. The caller has already zeroed 280 * it, so just absorb the hole. 281 */ 282 trace_netfs_folio(folio, netfs_folio_trace_invalidate_middle); 283 } 284 return; 285 286 erase_completely: 287 netfs_put_group(netfs_folio_group(folio)); 288 folio_detach_private(folio); 289 folio_clear_uptodate(folio); 290 folio_cancel_dirty(folio); 291 kfree(finfo); 292 trace_netfs_folio(folio, netfs_folio_trace_invalidate_all); 293 } 294 EXPORT_SYMBOL(netfs_invalidate_folio); 295 296 /** 297 * netfs_release_folio - Try to release a folio 298 * @folio: Folio proposed for release 299 * @gfp: Flags qualifying the release 300 * 301 * Request release of a folio and clean up its private state if it's not busy. 302 * Returns true if the folio can now be released, false if not 303 */ 304 bool netfs_release_folio(struct folio *folio, gfp_t gfp) 305 { 306 struct inode *inode = folio_inode(folio); 307 struct netfs_inode *ctx = netfs_inode(inode); 308 unsigned long long i_size, remote_i_size, zero_point, end; 309 310 if (folio_test_dirty(folio)) 311 return false; 312 313 netfs_read_sizes(inode, &i_size, &remote_i_size, &zero_point); 314 end = folio_next_pos(folio); 315 if (end > zero_point) { 316 spin_lock(&inode->i_lock); 317 end = umin(end, ctx->_remote_i_size); 318 if (end > ctx->_zero_point) 319 netfs_write_zero_point(inode, end); 320 spin_unlock(&inode->i_lock); 321 } 322 323 if (folio_test_private(folio)) 324 return false; 325 if (unlikely(folio_test_private_2(folio))) { /* [DEPRECATED] */ 326 if (current_is_kswapd() || !(gfp & __GFP_FS)) 327 return false; 328 folio_wait_private_2(folio); 329 } 330 fscache_note_page_release(netfs_i_cookie(ctx)); 331 return true; 332 } 333 EXPORT_SYMBOL(netfs_release_folio); 334 335 /* 336 * Wake the collection work item. 337 */ 338 void netfs_wake_collector(struct netfs_io_request *rreq) 339 { 340 if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) && 341 !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) { 342 queue_work(system_dfl_wq, &rreq->work); 343 } else { 344 trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue); 345 wake_up(&rreq->waitq); 346 } 347 } 348 349 /* 350 * Mark a subrequest as no longer being in progress and, if need be, wake the 351 * collector. 352 */ 353 void netfs_subreq_clear_in_progress(struct netfs_io_subrequest *subreq) 354 { 355 struct netfs_io_request *rreq = subreq->rreq; 356 struct netfs_io_stream *stream = &rreq->io_streams[subreq->stream_nr]; 357 358 clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags); 359 smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */ 360 361 /* If we are at the head of the queue, wake up the collector. */ 362 if (list_is_first(&subreq->rreq_link, &stream->subrequests) || 363 test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) 364 netfs_wake_collector(rreq); 365 } 366 367 /* 368 * Wait for all outstanding I/O in a stream to quiesce. 369 */ 370 void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq, 371 struct netfs_io_stream *stream) 372 { 373 struct netfs_io_subrequest *subreq; 374 DEFINE_WAIT(myself); 375 376 list_for_each_entry(subreq, &stream->subrequests, rreq_link) { 377 smp_rmb(); /* Read ->next before IN_PROGRESS. */ 378 if (!netfs_check_subreq_in_progress(subreq)) 379 continue; 380 381 trace_netfs_rreq(rreq, netfs_rreq_trace_wait_quiesce); 382 for (;;) { 383 prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); 384 385 if (!netfs_check_subreq_in_progress(subreq)) 386 break; 387 388 trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for); 389 schedule(); 390 } 391 } 392 393 trace_netfs_rreq(rreq, netfs_rreq_trace_waited_quiesce); 394 finish_wait(&rreq->waitq, &myself); 395 } 396 397 /* 398 * Perform collection in app thread if not offloaded to workqueue. 399 */ 400 static int netfs_collect_in_app(struct netfs_io_request *rreq, 401 bool (*collector)(struct netfs_io_request *rreq)) 402 { 403 bool need_collect = false, inactive = true, done = true; 404 405 if (!netfs_check_rreq_in_progress(rreq)) { 406 trace_netfs_rreq(rreq, netfs_rreq_trace_recollect); 407 return 1; /* Done */ 408 } 409 410 for (int i = 0; i < NR_IO_STREAMS; i++) { 411 struct netfs_io_subrequest *subreq; 412 struct netfs_io_stream *stream = &rreq->io_streams[i]; 413 414 if (!stream->active) 415 continue; 416 inactive = false; 417 trace_netfs_collect_stream(rreq, stream); 418 subreq = list_first_entry_or_null(&stream->subrequests, 419 struct netfs_io_subrequest, 420 rreq_link); 421 if (subreq && 422 (!netfs_check_subreq_in_progress(subreq) || 423 test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { 424 need_collect = true; 425 break; 426 } 427 if (subreq || !test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags)) 428 done = false; 429 } 430 431 if (!need_collect && !inactive && !done) 432 return 0; /* Sleep */ 433 434 __set_current_state(TASK_RUNNING); 435 if (collector(rreq)) { 436 /* Drop the ref from the NETFS_RREQ_IN_PROGRESS flag. */ 437 netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); 438 return 1; /* Done */ 439 } 440 441 if (inactive) { 442 WARN(true, "Failed to collect inactive req R=%08x\n", 443 rreq->debug_id); 444 cond_resched(); 445 } 446 return 2; /* Again */ 447 } 448 449 /* 450 * Wait for a request to complete, successfully or otherwise. 451 */ 452 static ssize_t netfs_wait_for_in_progress(struct netfs_io_request *rreq, 453 bool (*collector)(struct netfs_io_request *rreq)) 454 { 455 DEFINE_WAIT(myself); 456 ssize_t ret; 457 458 for (;;) { 459 prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); 460 461 if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { 462 switch (netfs_collect_in_app(rreq, collector)) { 463 case 0: 464 break; 465 case 1: 466 goto all_collected; 467 case 2: 468 if (!netfs_check_rreq_in_progress(rreq)) 469 break; 470 cond_resched(); 471 continue; 472 } 473 } 474 475 if (!netfs_check_rreq_in_progress(rreq)) 476 break; 477 478 trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip); 479 schedule(); 480 } 481 482 all_collected: 483 trace_netfs_rreq(rreq, netfs_rreq_trace_waited_ip); 484 finish_wait(&rreq->waitq, &myself); 485 486 ret = rreq->error; 487 if (ret == 0) { 488 ret = rreq->transferred; 489 switch (rreq->origin) { 490 case NETFS_DIO_READ: 491 case NETFS_DIO_WRITE: 492 case NETFS_READ_SINGLE: 493 case NETFS_UNBUFFERED_READ: 494 case NETFS_UNBUFFERED_WRITE: 495 break; 496 default: 497 if (rreq->submitted < rreq->len) { 498 trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read); 499 ret = -EIO; 500 } 501 break; 502 } 503 } 504 505 return ret; 506 } 507 508 ssize_t netfs_wait_for_read(struct netfs_io_request *rreq) 509 { 510 return netfs_wait_for_in_progress(rreq, netfs_read_collection); 511 } 512 513 ssize_t netfs_wait_for_write(struct netfs_io_request *rreq) 514 { 515 return netfs_wait_for_in_progress(rreq, netfs_write_collection); 516 } 517 518 /* 519 * Wait for a paused operation to unpause or complete in some manner. 520 */ 521 static void netfs_wait_for_pause(struct netfs_io_request *rreq, 522 bool (*collector)(struct netfs_io_request *rreq)) 523 { 524 DEFINE_WAIT(myself); 525 526 for (;;) { 527 trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause); 528 prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); 529 530 if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { 531 switch (netfs_collect_in_app(rreq, collector)) { 532 case 0: 533 break; 534 case 1: 535 goto all_collected; 536 case 2: 537 if (!netfs_check_rreq_in_progress(rreq) || 538 !test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) 539 break; 540 cond_resched(); 541 continue; 542 } 543 } 544 545 if (!netfs_check_rreq_in_progress(rreq) || 546 !test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) 547 break; 548 549 schedule(); 550 } 551 552 all_collected: 553 trace_netfs_rreq(rreq, netfs_rreq_trace_waited_pause); 554 finish_wait(&rreq->waitq, &myself); 555 } 556 557 void netfs_wait_for_paused_read(struct netfs_io_request *rreq) 558 { 559 return netfs_wait_for_pause(rreq, netfs_read_collection); 560 } 561 562 void netfs_wait_for_paused_write(struct netfs_io_request *rreq) 563 { 564 return netfs_wait_for_pause(rreq, netfs_write_collection); 565 } 566