1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Miscellaneous routines. 3 * 4 * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/swap.h> 9 #include "internal.h" 10 11 /** 12 * netfs_alloc_folioq_buffer - Allocate buffer space into a folio queue 13 * @mapping: Address space to set on the folio (or NULL). 14 * @_buffer: Pointer to the folio queue to add to (may point to a NULL; updated). 15 * @_cur_size: Current size of the buffer (updated). 16 * @size: Target size of the buffer. 17 * @gfp: The allocation constraints. 18 */ 19 int netfs_alloc_folioq_buffer(struct address_space *mapping, 20 struct folio_queue **_buffer, 21 size_t *_cur_size, ssize_t size, gfp_t gfp) 22 { 23 struct folio_queue *tail = *_buffer, *p; 24 25 size = round_up(size, PAGE_SIZE); 26 if (*_cur_size >= size) 27 return 0; 28 29 if (tail) 30 while (tail->next) 31 tail = tail->next; 32 33 do { 34 struct folio *folio; 35 int order = 0, slot; 36 37 if (!tail || folioq_full(tail)) { 38 p = netfs_folioq_alloc(0, GFP_NOFS, netfs_trace_folioq_alloc_buffer); 39 if (!p) 40 return -ENOMEM; 41 if (tail) { 42 tail->next = p; 43 p->prev = tail; 44 } else { 45 *_buffer = p; 46 } 47 tail = p; 48 } 49 50 if (size - *_cur_size > PAGE_SIZE) 51 order = umin(ilog2(size - *_cur_size) - PAGE_SHIFT, 52 MAX_PAGECACHE_ORDER); 53 54 folio = folio_alloc(gfp, order); 55 if (!folio && order > 0) 56 folio = folio_alloc(gfp, 0); 57 if (!folio) 58 return -ENOMEM; 59 60 folio->mapping = mapping; 61 folio->index = *_cur_size / PAGE_SIZE; 62 trace_netfs_folio(folio, netfs_folio_trace_alloc_buffer); 63 slot = folioq_append_mark(tail, folio); 64 *_cur_size += folioq_folio_size(tail, slot); 65 } while (*_cur_size < size); 66 67 return 0; 68 } 69 EXPORT_SYMBOL(netfs_alloc_folioq_buffer); 70 71 /** 72 * netfs_free_folioq_buffer - Free a folio queue. 73 * @fq: The start of the folio queue to free 74 * 75 * Free up a chain of folio_queues and, if marked, the marked folios they point 76 * to. 77 */ 78 void netfs_free_folioq_buffer(struct folio_queue *fq) 79 { 80 struct folio_queue *next; 81 struct folio_batch fbatch; 82 83 folio_batch_init(&fbatch); 84 85 for (; fq; fq = next) { 86 for (int slot = 0; slot < folioq_count(fq); slot++) { 87 struct folio *folio = folioq_folio(fq, slot); 88 89 if (!folio || 90 !folioq_is_marked(fq, slot)) 91 continue; 92 93 trace_netfs_folio(folio, netfs_folio_trace_put); 94 if (folio_batch_add(&fbatch, folio)) 95 folio_batch_release(&fbatch); 96 } 97 98 netfs_stat_d(&netfs_n_folioq); 99 next = fq->next; 100 kfree(fq); 101 } 102 103 folio_batch_release(&fbatch); 104 } 105 EXPORT_SYMBOL(netfs_free_folioq_buffer); 106 107 /* 108 * Reset the subrequest iterator to refer just to the region remaining to be 109 * read. The iterator may or may not have been advanced by socket ops or 110 * extraction ops to an extent that may or may not match the amount actually 111 * read. 112 */ 113 void netfs_reset_iter(struct netfs_io_subrequest *subreq) 114 { 115 struct iov_iter *io_iter = &subreq->io_iter; 116 size_t remain = subreq->len - subreq->transferred; 117 118 if (io_iter->count > remain) 119 iov_iter_advance(io_iter, io_iter->count - remain); 120 else if (io_iter->count < remain) 121 iov_iter_revert(io_iter, remain - io_iter->count); 122 iov_iter_truncate(&subreq->io_iter, remain); 123 } 124 125 /** 126 * netfs_dirty_folio - Mark folio dirty and pin a cache object for writeback 127 * @mapping: The mapping the folio belongs to. 128 * @folio: The folio being dirtied. 129 * 130 * Set the dirty flag on a folio and pin an in-use cache object in memory so 131 * that writeback can later write to it. This is intended to be called from 132 * the filesystem's ->dirty_folio() method. 133 * 134 * Return: true if the dirty flag was set on the folio, false otherwise. 135 */ 136 bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio) 137 { 138 struct inode *inode = mapping->host; 139 struct netfs_inode *ictx = netfs_inode(inode); 140 struct fscache_cookie *cookie = netfs_i_cookie(ictx); 141 bool need_use = false; 142 143 _enter(""); 144 145 if (!filemap_dirty_folio(mapping, folio)) 146 return false; 147 if (!fscache_cookie_valid(cookie)) 148 return true; 149 150 if (!(inode->i_state & I_PINNING_NETFS_WB)) { 151 spin_lock(&inode->i_lock); 152 if (!(inode->i_state & I_PINNING_NETFS_WB)) { 153 inode->i_state |= I_PINNING_NETFS_WB; 154 need_use = true; 155 } 156 spin_unlock(&inode->i_lock); 157 158 if (need_use) 159 fscache_use_cookie(cookie, true); 160 } 161 return true; 162 } 163 EXPORT_SYMBOL(netfs_dirty_folio); 164 165 /** 166 * netfs_unpin_writeback - Unpin writeback resources 167 * @inode: The inode on which the cookie resides 168 * @wbc: The writeback control 169 * 170 * Unpin the writeback resources pinned by netfs_dirty_folio(). This is 171 * intended to be called as/by the netfs's ->write_inode() method. 172 */ 173 int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc) 174 { 175 struct fscache_cookie *cookie = netfs_i_cookie(netfs_inode(inode)); 176 177 if (wbc->unpinned_netfs_wb) 178 fscache_unuse_cookie(cookie, NULL, NULL); 179 return 0; 180 } 181 EXPORT_SYMBOL(netfs_unpin_writeback); 182 183 /** 184 * netfs_clear_inode_writeback - Clear writeback resources pinned by an inode 185 * @inode: The inode to clean up 186 * @aux: Auxiliary data to apply to the inode 187 * 188 * Clear any writeback resources held by an inode when the inode is evicted. 189 * This must be called before clear_inode() is called. 190 */ 191 void netfs_clear_inode_writeback(struct inode *inode, const void *aux) 192 { 193 struct fscache_cookie *cookie = netfs_i_cookie(netfs_inode(inode)); 194 195 if (inode->i_state & I_PINNING_NETFS_WB) { 196 loff_t i_size = i_size_read(inode); 197 fscache_unuse_cookie(cookie, aux, &i_size); 198 } 199 } 200 EXPORT_SYMBOL(netfs_clear_inode_writeback); 201 202 /** 203 * netfs_invalidate_folio - Invalidate or partially invalidate a folio 204 * @folio: Folio proposed for release 205 * @offset: Offset of the invalidated region 206 * @length: Length of the invalidated region 207 * 208 * Invalidate part or all of a folio for a network filesystem. The folio will 209 * be removed afterwards if the invalidated region covers the entire folio. 210 */ 211 void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) 212 { 213 struct netfs_folio *finfo; 214 struct netfs_inode *ctx = netfs_inode(folio_inode(folio)); 215 size_t flen = folio_size(folio); 216 217 _enter("{%lx},%zx,%zx", folio->index, offset, length); 218 219 if (offset == 0 && length == flen) { 220 unsigned long long i_size = i_size_read(&ctx->inode); 221 unsigned long long fpos = folio_pos(folio), end; 222 223 end = umin(fpos + flen, i_size); 224 if (fpos < i_size && end > ctx->zero_point) 225 ctx->zero_point = end; 226 } 227 228 folio_wait_private_2(folio); /* [DEPRECATED] */ 229 230 if (!folio_test_private(folio)) 231 return; 232 233 finfo = netfs_folio_info(folio); 234 235 if (offset == 0 && length >= flen) 236 goto erase_completely; 237 238 if (finfo) { 239 /* We have a partially uptodate page from a streaming write. */ 240 unsigned int fstart = finfo->dirty_offset; 241 unsigned int fend = fstart + finfo->dirty_len; 242 unsigned int iend = offset + length; 243 244 if (offset >= fend) 245 return; 246 if (iend <= fstart) 247 return; 248 249 /* The invalidation region overlaps the data. If the region 250 * covers the start of the data, we either move along the start 251 * or just erase the data entirely. 252 */ 253 if (offset <= fstart) { 254 if (iend >= fend) 255 goto erase_completely; 256 /* Move the start of the data. */ 257 finfo->dirty_len = fend - iend; 258 finfo->dirty_offset = offset; 259 return; 260 } 261 262 /* Reduce the length of the data if the invalidation region 263 * covers the tail part. 264 */ 265 if (iend >= fend) { 266 finfo->dirty_len = offset - fstart; 267 return; 268 } 269 270 /* A partial write was split. The caller has already zeroed 271 * it, so just absorb the hole. 272 */ 273 } 274 return; 275 276 erase_completely: 277 netfs_put_group(netfs_folio_group(folio)); 278 folio_detach_private(folio); 279 folio_clear_uptodate(folio); 280 kfree(finfo); 281 return; 282 } 283 EXPORT_SYMBOL(netfs_invalidate_folio); 284 285 /** 286 * netfs_release_folio - Try to release a folio 287 * @folio: Folio proposed for release 288 * @gfp: Flags qualifying the release 289 * 290 * Request release of a folio and clean up its private state if it's not busy. 291 * Returns true if the folio can now be released, false if not 292 */ 293 bool netfs_release_folio(struct folio *folio, gfp_t gfp) 294 { 295 struct netfs_inode *ctx = netfs_inode(folio_inode(folio)); 296 unsigned long long end; 297 298 if (folio_test_dirty(folio)) 299 return false; 300 301 end = umin(folio_pos(folio) + folio_size(folio), i_size_read(&ctx->inode)); 302 if (end > ctx->zero_point) 303 ctx->zero_point = end; 304 305 if (folio_test_private(folio)) 306 return false; 307 if (unlikely(folio_test_private_2(folio))) { /* [DEPRECATED] */ 308 if (current_is_kswapd() || !(gfp & __GFP_FS)) 309 return false; 310 folio_wait_private_2(folio); 311 } 312 fscache_note_page_release(netfs_i_cookie(ctx)); 313 return true; 314 } 315 EXPORT_SYMBOL(netfs_release_folio); 316 317 /* 318 * Wake the collection work item. 319 */ 320 void netfs_wake_collector(struct netfs_io_request *rreq) 321 { 322 if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) && 323 !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) { 324 queue_work(system_unbound_wq, &rreq->work); 325 } else { 326 trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue); 327 wake_up(&rreq->waitq); 328 } 329 } 330 331 /* 332 * Mark a subrequest as no longer being in progress and, if need be, wake the 333 * collector. 334 */ 335 void netfs_subreq_clear_in_progress(struct netfs_io_subrequest *subreq) 336 { 337 struct netfs_io_request *rreq = subreq->rreq; 338 struct netfs_io_stream *stream = &rreq->io_streams[subreq->stream_nr]; 339 340 clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags); 341 smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */ 342 343 /* If we are at the head of the queue, wake up the collector. */ 344 if (list_is_first(&subreq->rreq_link, &stream->subrequests) || 345 test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) 346 netfs_wake_collector(rreq); 347 } 348 349 /* 350 * Wait for all outstanding I/O in a stream to quiesce. 351 */ 352 void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq, 353 struct netfs_io_stream *stream) 354 { 355 struct netfs_io_subrequest *subreq; 356 DEFINE_WAIT(myself); 357 358 list_for_each_entry(subreq, &stream->subrequests, rreq_link) { 359 if (!netfs_check_subreq_in_progress(subreq)) 360 continue; 361 362 trace_netfs_rreq(rreq, netfs_rreq_trace_wait_quiesce); 363 for (;;) { 364 prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); 365 366 if (!netfs_check_subreq_in_progress(subreq)) 367 break; 368 369 trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for); 370 schedule(); 371 } 372 } 373 374 trace_netfs_rreq(rreq, netfs_rreq_trace_waited_quiesce); 375 finish_wait(&rreq->waitq, &myself); 376 } 377 378 /* 379 * Perform collection in app thread if not offloaded to workqueue. 380 */ 381 static int netfs_collect_in_app(struct netfs_io_request *rreq, 382 bool (*collector)(struct netfs_io_request *rreq)) 383 { 384 bool need_collect = false, inactive = true, done = true; 385 386 if (!netfs_check_rreq_in_progress(rreq)) { 387 trace_netfs_rreq(rreq, netfs_rreq_trace_recollect); 388 return 1; /* Done */ 389 } 390 391 for (int i = 0; i < NR_IO_STREAMS; i++) { 392 struct netfs_io_subrequest *subreq; 393 struct netfs_io_stream *stream = &rreq->io_streams[i]; 394 395 if (!stream->active) 396 continue; 397 inactive = false; 398 trace_netfs_collect_stream(rreq, stream); 399 subreq = list_first_entry_or_null(&stream->subrequests, 400 struct netfs_io_subrequest, 401 rreq_link); 402 if (subreq && 403 (!netfs_check_subreq_in_progress(subreq) || 404 test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { 405 need_collect = true; 406 break; 407 } 408 if (subreq || !test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags)) 409 done = false; 410 } 411 412 if (!need_collect && !inactive && !done) 413 return 0; /* Sleep */ 414 415 __set_current_state(TASK_RUNNING); 416 if (collector(rreq)) { 417 /* Drop the ref from the NETFS_RREQ_IN_PROGRESS flag. */ 418 netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); 419 return 1; /* Done */ 420 } 421 422 if (inactive) { 423 WARN(true, "Failed to collect inactive req R=%08x\n", 424 rreq->debug_id); 425 cond_resched(); 426 } 427 return 2; /* Again */ 428 } 429 430 /* 431 * Wait for a request to complete, successfully or otherwise. 432 */ 433 static ssize_t netfs_wait_for_in_progress(struct netfs_io_request *rreq, 434 bool (*collector)(struct netfs_io_request *rreq)) 435 { 436 DEFINE_WAIT(myself); 437 ssize_t ret; 438 439 for (;;) { 440 prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); 441 442 if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { 443 switch (netfs_collect_in_app(rreq, collector)) { 444 case 0: 445 break; 446 case 1: 447 goto all_collected; 448 case 2: 449 if (!netfs_check_rreq_in_progress(rreq)) 450 break; 451 cond_resched(); 452 continue; 453 } 454 } 455 456 if (!netfs_check_rreq_in_progress(rreq)) 457 break; 458 459 trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip); 460 schedule(); 461 } 462 463 all_collected: 464 trace_netfs_rreq(rreq, netfs_rreq_trace_waited_ip); 465 finish_wait(&rreq->waitq, &myself); 466 467 ret = rreq->error; 468 if (ret == 0) { 469 ret = rreq->transferred; 470 switch (rreq->origin) { 471 case NETFS_DIO_READ: 472 case NETFS_DIO_WRITE: 473 case NETFS_READ_SINGLE: 474 case NETFS_UNBUFFERED_READ: 475 case NETFS_UNBUFFERED_WRITE: 476 break; 477 default: 478 if (rreq->submitted < rreq->len) { 479 trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read); 480 ret = -EIO; 481 } 482 break; 483 } 484 } 485 486 return ret; 487 } 488 489 ssize_t netfs_wait_for_read(struct netfs_io_request *rreq) 490 { 491 return netfs_wait_for_in_progress(rreq, netfs_read_collection); 492 } 493 494 ssize_t netfs_wait_for_write(struct netfs_io_request *rreq) 495 { 496 return netfs_wait_for_in_progress(rreq, netfs_write_collection); 497 } 498 499 /* 500 * Wait for a paused operation to unpause or complete in some manner. 501 */ 502 static void netfs_wait_for_pause(struct netfs_io_request *rreq, 503 bool (*collector)(struct netfs_io_request *rreq)) 504 { 505 DEFINE_WAIT(myself); 506 507 for (;;) { 508 trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause); 509 prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); 510 511 if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { 512 switch (netfs_collect_in_app(rreq, collector)) { 513 case 0: 514 break; 515 case 1: 516 goto all_collected; 517 case 2: 518 if (!netfs_check_rreq_in_progress(rreq) || 519 !test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) 520 break; 521 cond_resched(); 522 continue; 523 } 524 } 525 526 if (!netfs_check_rreq_in_progress(rreq) || 527 !test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) 528 break; 529 530 schedule(); 531 } 532 533 all_collected: 534 trace_netfs_rreq(rreq, netfs_rreq_trace_waited_pause); 535 finish_wait(&rreq->waitq, &myself); 536 } 537 538 void netfs_wait_for_paused_read(struct netfs_io_request *rreq) 539 { 540 return netfs_wait_for_pause(rreq, netfs_read_collection); 541 } 542 543 void netfs_wait_for_paused_write(struct netfs_io_request *rreq) 544 { 545 return netfs_wait_for_pause(rreq, netfs_write_collection); 546 } 547