Lines Matching +full:charge +full:- +full:current +full:- +full:limit +full:- +full:mapping
1 // SPDX-License-Identifier: GPL-2.0-only
3 * mm/page-writeback.c
26 #include <linux/backing-dev.h>
54 #define DIRTY_POLL_THRESH (128 >> (PAGE_SHIFT - 10))
57 * Estimate write bandwidth or update dirty limit at 200ms intervals.
100 * The interval between `kupdate'-style writebacks
119 /* End of sysctl-exported parameters */
137 unsigned long wb_dirty; /* per-wb counterparts */
149 * reflect changes in current writeout rate.
157 .wb_completions = &(__wb)->completions
163 .wb_completions = &(__wb)->memcg_completions, \
168 return dtc->dom;
173 return dtc->dom;
178 return mdtc->gdtc;
183 return &wb->memcg_completions;
189 unsigned long this_bw = READ_ONCE(wb->avg_write_bandwidth);
190 unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
191 unsigned long long min = wb->bdi->min_ratio;
192 unsigned long long max = wb->bdi->max_ratio;
216 .wb_completions = &(__wb)->completions
243 *minp = wb->bdi->min_ratio;
244 *maxp = wb->bdi->max_ratio;
257 * user-configurable dirty ratio is the effective number of pages that
261 * Because the user is allowed to specify the dirty limit globally as
262 * absolute number of bytes, calculating the per-zone dirty limit can
263 * require translating the configured limit into a percentage of
268 * node_dirtyable_memory - number of dirtyable pages in a node
272 * page cache. This is the base value for the per-node dirty limits.
280 struct zone *zone = pgdat->node_zones + z;
293 nr_pages -= min(nr_pages, pgdat->totalreserve_pages);
316 z = &NODE_DATA(node)->node_zones[i];
322 nr_pages -= min(nr_pages, high_wmark_pages(z));
342 * global_dirtyable_memory - number of globally dirtyable pages
357 x -= min(x, totalreserve_pages);
363 x -= highmem_dirtyable_memory(x);
369 * domain_dirty_limits - calculate thresh and bg_thresh for a wb_domain
372 * Calculate @dtc->thresh and ->bg_thresh considering
374 * must ensure that @dtc->avail is set before calling this function. The
375 * dirty limits will be lifted by 1/4 for real-time tasks.
379 const unsigned long available_memory = dtc->avail;
383 /* convert ratios to per-PAGE_SIZE for higher precision */
392 unsigned long global_avail = gdtc->avail;
398 * per-PAGE_SIZE, they can be obtained by dividing bytes by
420 tsk = current;
427 * 32-bits. This gives 16TB dirty limits max which is hopefully enough.
431 /* This makes sure bg_thresh is within 32-bits as well */
434 dtc->thresh = thresh;
435 dtc->bg_thresh = bg_thresh;
443 * global_dirty_limits - background-writeback and dirty-throttling thresholds
462 * node_dirty_limit - maximum number of dirty pages allowed in a node
471 struct task_struct *tsk = current;
485 * 32-bits. This gives 16TB dirty limits max which is hopefully enough.
491 * node_dirty_ok - tells whether a node is within its dirty limits
495 * dirty limit, %false if the limit is exceeded.
499 unsigned long limit = node_dirty_limit(pgdat);
505 return nr_pages <= limit;
531 return -ERANGE;
562 return -ERANGE;
584 __fprop_add_percpu_max(&dom->completions, completions,
587 if (unlikely(!dom->period_time)) {
594 dom->period_time = wp_next_time(jiffies);
595 mod_timer(&dom->period_timer, dom->period_time);
608 wb_domain_writeout_add(&global_wb_domain, &wb->completions,
609 wb->bdi->max_prop_frac, nr);
614 wb->bdi->max_prop_frac, nr);
634 int miss_periods = (jiffies - dom->period_time) /
637 if (fprop_new_period(&dom->completions, miss_periods + 1)) {
638 dom->period_time = wp_next_time(dom->period_time +
640 mod_timer(&dom->period_timer, dom->period_time);
646 dom->period_time = 0;
654 spin_lock_init(&dom->lock);
656 timer_setup(&dom->period_timer, writeout_period, TIMER_DEFERRABLE);
658 dom->dirty_limit_tstamp = jiffies;
660 return fprop_global_init(&dom->completions, gfp);
666 del_timer_sync(&dom->period_timer);
667 fprop_global_destroy(&dom->completions);
683 return -EINVAL;
718 return -EINVAL;
721 if (min_ratio > bdi->max_ratio) {
722 ret = -EINVAL;
724 if (min_ratio < bdi->min_ratio) {
725 delta = bdi->min_ratio - min_ratio;
726 bdi_min_ratio -= delta;
727 bdi->min_ratio = min_ratio;
729 delta = min_ratio - bdi->min_ratio;
732 bdi->min_ratio = min_ratio;
734 ret = -EINVAL;
748 return -EINVAL;
751 if (bdi->min_ratio > max_ratio) {
752 ret = -EINVAL;
754 bdi->max_ratio = max_ratio;
755 bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) /
786 return bdi_get_bytes(bdi->min_ratio);
805 return bdi_get_bytes(bdi->max_ratio);
825 return -EINVAL;
829 bdi->capabilities |= BDI_CAP_STRICTLIMIT;
831 bdi->capabilities &= ~BDI_CAP_STRICTLIMIT;
846 return max(thresh, dom->dirty_limit);
851 * system-wide clean memory excluding the amount being used in the domain.
857 unsigned long clean = filepages - min(filepages, mdtc->dirty);
858 unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
859 unsigned long other_clean = global_clean - min(global_clean, clean);
861 mdtc->avail = filepages + min(headroom, other_clean);
877 dtc->avail = global_dirtyable_memory();
878 dtc->dirty = global_node_page_state(NR_FILE_DIRTY);
880 dtc->dirty += global_node_page_state(NR_WRITEBACK);
884 mem_cgroup_wb_stats(dtc->wb, &filepages, &headroom, &dtc->dirty,
887 dtc->dirty += writeback;
893 * __wb_calc_thresh - @wb's share of dirty threshold
898 * threshold as a hard limit when sleeping max_pause per page is not enough
906 * - starving fast devices
907 * - piling up dirty pages (that will take long time to sync) on slow devices
909 * The wb's share of dirty limit will be adapting to its throughput and
910 * bounded by the bdi->min_ratio and/or bdi->max_ratio parameters, if set.
912 * Return: @wb's dirty limit in pages. For dirty throttling limit, the term
920 struct bdi_writeback *wb = dtc->wb;
929 fprop_fraction_percpu(&dom->completions, dtc->wb_completions,
932 wb_thresh = (thresh * (100 * BDI_RATIO_SCALE - bdi_min_ratio)) / (100 * BDI_RATIO_SCALE);
945 * a hard limit in balance_dirty_pages() and wb_position_ratio().
950 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
951 unsigned long limit = hard_dirty_limit(dom, dtc->thresh);
954 if (limit > dtc->dirty)
955 wb_scale_thresh = (limit - dtc->dirty) / 100;
982 * setpoint - dirty 3
983 * f(dirty) := 1.0 + (----------------)
984 * limit - setpoint
990 * (3) f(limit) = 0 => the hard limit
997 unsigned long limit)
1002 x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
1003 (limit - setpoint) | 1);
1050 * 0 +------------.------------------.----------------------*------------->
1051 * freerun^ setpoint^ limit^ dirty pages
1078 * 0 +----------------------.-------------------------------.------------->
1083 * - start writing to a slow SD card and a fast disk at the same time. The SD
1085 * - the wb dirty thresh drops quickly due to change of JBOD workload
1089 struct bdi_writeback *wb = dtc->wb;
1090 unsigned long write_bw = READ_ONCE(wb->avg_write_bandwidth);
1091 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
1092 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
1093 unsigned long wb_thresh = dtc->wb_thresh;
1098 long long pos_ratio; /* for scaling up/down the rate limit */
1101 dtc->pos_ratio = 0;
1103 if (unlikely(dtc->dirty >= limit))
1111 setpoint = (freerun + limit) / 2;
1112 pos_ratio = pos_ratio_polynom(setpoint, dtc->dirty, limit);
1119 * This is especially important for fuse which sets bdi->max_ratio to
1126 * total amount of RAM is 16GB, bdi->max_ratio is equal to 1%, global
1139 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
1142 if (dtc->wb_dirty < 8) {
1143 dtc->pos_ratio = min_t(long long, pos_ratio * 2,
1148 if (dtc->wb_dirty >= wb_thresh)
1152 dtc->wb_bg_thresh);
1157 wb_pos_ratio = pos_ratio_polynom(wb_setpoint, dtc->wb_dirty,
1167 * wb's) while given strictlimit wb is below limit.
1170 * but it would look too non-natural for the case of all
1172 * with bdi->max_ratio == 100%.
1181 dtc->pos_ratio = min(pos_ratio, wb_pos_ratio);
1194 * f(wb_dirty) := 1.0 + k * (wb_dirty - wb_setpoint)
1196 * x_intercept - wb_dirty
1197 * := --------------------------
1198 * x_intercept - wb_setpoint
1203 * (2) k = - 1 / (8 * write_bw) (in single wb case)
1208 * [wb_setpoint - write_bw/2, wb_setpoint + write_bw/2]
1216 if (unlikely(wb_thresh > dtc->thresh))
1217 wb_thresh = dtc->thresh;
1225 wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
1230 x = div_u64((u64)wb_thresh << 16, dtc->thresh | 1);
1234 * (thresh - wb_thresh ~= 0) and transit to wb_thresh in JBOD case.
1236 * wb_thresh thresh - wb_thresh
1237 * span = --------- * (8 * write_bw) + ------------------ * wb_thresh
1240 span = (dtc->thresh - wb_thresh + 8 * write_bw) * (u64)x >> 16;
1243 if (dtc->wb_dirty < x_intercept - span / 4) {
1244 pos_ratio = div64_u64(pos_ratio * (x_intercept - dtc->wb_dirty),
1245 (x_intercept - wb_setpoint) | 1);
1255 if (dtc->wb_dirty < x_intercept) {
1256 if (dtc->wb_dirty > x_intercept / 8)
1258 dtc->wb_dirty);
1263 dtc->pos_ratio = pos_ratio;
1271 unsigned long avg = wb->avg_write_bandwidth;
1272 unsigned long old = wb->write_bandwidth;
1278 * bw * elapsed + write_bandwidth * (period - elapsed)
1279 * write_bandwidth = ---------------------------------------------------
1285 bw = written - min(written, wb->written_stamp);
1292 bw += (u64)wb->write_bandwidth * (period - elapsed);
1299 avg -= (avg - old) >> 3;
1302 avg += (old - avg) >> 3;
1308 long delta = avg - wb->avg_write_bandwidth;
1310 &wb->bdi->tot_write_bandwidth) <= 0);
1312 wb->write_bandwidth = bw;
1313 WRITE_ONCE(wb->avg_write_bandwidth, avg);
1319 unsigned long thresh = dtc->thresh;
1320 unsigned long limit = dom->dirty_limit;
1325 if (limit < thresh) {
1326 limit = thresh;
1333 * dom->dirty_limit which is guaranteed to lie above the dirty pages.
1335 thresh = max(thresh, dtc->dirty);
1336 if (limit > thresh) {
1337 limit -= (limit - thresh) >> 5;
1342 dom->dirty_limit = limit;
1353 if (time_before(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL))
1356 spin_lock(&dom->lock);
1357 if (time_after_eq(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) {
1359 dom->dirty_limit_tstamp = now;
1361 spin_unlock(&dom->lock);
1365 * Maintain wb->dirty_ratelimit, the base dirty throttle rate.
1374 struct bdi_writeback *wb = dtc->wb;
1375 unsigned long dirty = dtc->dirty;
1376 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
1377 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
1378 unsigned long setpoint = (freerun + limit) / 2;
1379 unsigned long write_bw = wb->avg_write_bandwidth;
1380 unsigned long dirty_ratelimit = wb->dirty_ratelimit;
1390 * when dirty pages are truncated by userspace or re-dirtied by FS.
1392 dirty_rate = (dirtied - wb->dirtied_stamp) * HZ / elapsed;
1398 dtc->pos_ratio >> RATELIMIT_CALC_SHIFT;
1405 * formula will yield the balanced rate limit (write_bw / N).
1442 * wb->dirty_ratelimit = balanced_dirty_ratelimit;
1446 * limit the step size.
1450 * task_ratelimit - dirty_ratelimit
1451 * = (pos_ratio - 1) * dirty_ratelimit
1460 * - dirty_ratelimit > balanced_dirty_ratelimit
1461 * - dirty_ratelimit > task_ratelimit (dirty pages are above setpoint)
1467 * |task_ratelimit - dirty_ratelimit| is used to limit the step size
1486 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
1487 dirty = dtc->wb_dirty;
1488 if (dtc->wb_dirty < 8)
1489 setpoint = dtc->wb_dirty + 1;
1491 setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
1495 x = min3(wb->balanced_dirty_ratelimit,
1498 step = x - dirty_ratelimit;
1500 x = max3(wb->balanced_dirty_ratelimit,
1503 step = dirty_ratelimit - x;
1520 dirty_ratelimit -= step;
1522 WRITE_ONCE(wb->dirty_ratelimit, max(dirty_ratelimit, 1UL));
1523 wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
1532 struct bdi_writeback *wb = gdtc->wb;
1538 spin_lock(&wb->list_lock);
1546 elapsed = max(now - wb->bw_time_stamp, 1UL);
1547 dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]);
1548 written = percpu_counter_read(&wb->stat[WB_WRITTEN]);
1565 wb->dirtied_stamp = dirtied;
1566 wb->written_stamp = written;
1567 WRITE_ONCE(wb->bw_time_stamp, now);
1568 spin_unlock(&wb->list_lock);
1584 unsigned long elapsed = now - READ_ONCE(wb->bw_time_stamp);
1587 !atomic_read(&wb->writeback_inodes)) {
1588 spin_lock(&wb->list_lock);
1589 wb->dirtied_stamp = wb_stat(wb, WB_DIRTIED);
1590 wb->written_stamp = wb_stat(wb, WB_WRITTEN);
1591 WRITE_ONCE(wb->bw_time_stamp, now);
1592 spin_unlock(&wb->list_lock);
1601 * global_zone_page_state() too often. So scale it near-sqrt to the safety margin
1608 return 1UL << (ilog2(thresh - dirty) >> 1);
1616 unsigned long bw = READ_ONCE(wb->avg_write_bandwidth);
1620 * Limit pause time for small memory systems. If sleeping for too long
1638 long hi = ilog2(READ_ONCE(wb->avg_write_bandwidth));
1639 long lo = ilog2(READ_ONCE(wb->dirty_ratelimit));
1644 /* target for 10ms pause on 1-dd case */
1654 t += (hi - lo) * (10 * HZ) / 1024;
1670 * 2) limit the target pause time to max_pause/2, so that the normal
1679 * case fio-mmap-randwrite-64k, which does 16*{sync read, async write}.
1709 struct bdi_writeback *wb = dtc->wb;
1715 * - in JBOD setup, wb_thresh can fluctuate a lot
1716 * - in a system with HDD and USB key, the USB key may somehow
1725 dtc->wb_thresh = __wb_calc_thresh(dtc, dtc->thresh);
1726 dtc->wb_bg_thresh = dtc->thresh ?
1727 div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
1735 * reported dirty, even though there are thresh-m pages
1739 if (dtc->wb_thresh < 2 * wb_stat_error()) {
1741 dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK);
1744 dtc->wb_dirty = wb_reclaimable + wb_stat(wb, WB_WRITEBACK);
1754 dirty = dtc->wb_dirty;
1755 thresh = dtc->wb_thresh;
1757 dirty = dtc->dirty;
1758 thresh = dtc->thresh;
1765 * Throttle it only when the background writeback cannot catch-up. This avoids
1771 * for strictlimit-ing.
1780 dirty = dtc->wb_dirty;
1781 thresh = dtc->wb_thresh;
1782 bg_thresh = dtc->wb_bg_thresh;
1784 dirty = dtc->dirty;
1785 thresh = dtc->thresh;
1786 bg_thresh = dtc->bg_thresh;
1788 dtc->freerun = dirty <= dirty_freerun_ceiling(thresh, bg_thresh);
1802 dtc->freerun = false;
1810 * LOCAL_THROTTLE tasks must not be throttled when below the per-wb
1813 if (!(current->flags & PF_LOCAL_THROTTLE))
1816 dtc->freerun = dtc->wb_dirty <
1817 dirty_freerun_ceiling(dtc->wb_thresh, dtc->wb_bg_thresh);
1823 dtc->dirty_exceeded = (dtc->wb_dirty > dtc->wb_thresh) &&
1824 ((dtc->dirty > dtc->thresh) || strictlimit);
1835 if (dtc->freerun)
1866 struct backing_dev_info *bdi = wb->bdi;
1867 bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT;
1894 if (!laptop_mode && nr_dirty > gdtc->bg_thresh &&
1902 if (gdtc->freerun && (!mdtc || mdtc->freerun)) {
1910 current->dirty_paused_when = now;
1911 current->nr_dirtied = 0;
1914 current->nr_dirtied_pause = min(intv, m_intv);
1929 if (gdtc->freerun)
1941 if (mdtc->freerun)
1943 if (mdtc->pos_ratio < gdtc->pos_ratio)
1947 wb->dirty_exceeded = gdtc->dirty_exceeded ||
1948 (mdtc && mdtc->dirty_exceeded);
1949 if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) +
1954 dirty_ratelimit = READ_ONCE(wb->dirty_ratelimit);
1955 task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >>
1957 max_pause = wb_max_pause(wb, sdtc->wb_dirty);
1969 if (current->dirty_paused_when)
1970 pause -= now - current->dirty_paused_when;
1973 * for up to 800ms from time to time on 1-HDD; so does xfs,
1980 sdtc->thresh,
1981 sdtc->bg_thresh,
1982 sdtc->dirty,
1983 sdtc->wb_thresh,
1984 sdtc->wb_dirty,
1991 if (pause < -HZ) {
1992 current->dirty_paused_when = now;
1993 current->nr_dirtied = 0;
1995 current->dirty_paused_when += period;
1996 current->nr_dirtied = 0;
1997 } else if (current->nr_dirtied_pause <= pages_dirtied)
1998 current->nr_dirtied_pause += pages_dirtied;
2003 now += min(pause - max_pause, max_pause);
2009 sdtc->thresh,
2010 sdtc->bg_thresh,
2011 sdtc->dirty,
2012 sdtc->wb_thresh,
2013 sdtc->wb_dirty,
2021 ret = -EAGAIN;
2025 bdi->last_bdp_sleep = jiffies;
2028 current->dirty_paused_when = now + pause;
2029 current->nr_dirtied = 0;
2030 current->nr_dirtied_pause = nr_dirtied_pause;
2044 * In theory 1 page is enough to keep the consumer-producer
2049 if (sdtc->wb_dirty <= wb_stat_error())
2052 if (fatal_signal_pending(current))
2063 * dirty tsk->nr_dirtied_pause pages;
2067 * (tsk->nr_dirtied_pause - 1) pages, balance_dirty_pages() will never be
2069 * throttled page dirties in dirty_throttle_leaks on task exit and charge them
2077 * balance_dirty_pages_ratelimited_flags - Balance dirty memory state.
2078 * @mapping: address_space which was dirtied.
2087 * Return: If @flags contains BDP_ASYNC, it may return -EAGAIN to
2093 int balance_dirty_pages_ratelimited_flags(struct address_space *mapping,
2096 struct inode *inode = mapping->host;
2103 if (!(bdi->capabilities & BDI_CAP_WRITEBACK))
2109 wb = &bdi->wb;
2111 ratelimit = current->nr_dirtied_pause;
2112 if (wb->dirty_exceeded)
2113 ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
2120 * time, hence all honoured too large initial task->nr_dirtied_pause.
2123 if (unlikely(current->nr_dirtied >= ratelimit))
2131 * short-lived tasks (eg. gcc invocations in a kernel build) escaping
2132 * the dirty throttling and livelock other long-run dirtiers.
2135 if (*p > 0 && current->nr_dirtied < ratelimit) {
2137 nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
2138 *p -= nr_pages_dirtied;
2139 current->nr_dirtied += nr_pages_dirtied;
2143 if (unlikely(current->nr_dirtied >= ratelimit))
2144 ret = balance_dirty_pages(wb, current->nr_dirtied, flags);
2152 * balance_dirty_pages_ratelimited - balance dirty memory state.
2153 * @mapping: address_space which was dirtied.
2159 * Once we're over the dirty memory limit we decrease the ratelimiting
2160 * by a lot, to prevent individual processes from overshooting the limit
2163 void balance_dirty_pages_ratelimited(struct address_space *mapping)
2165 balance_dirty_pages_ratelimited_flags(mapping, 0);
2175 struct bdi_writeback *wb = dtc->wb;
2177 dtc->wb_bg_thresh = __wb_calc_thresh(dtc, dtc->bg_thresh);
2178 if (dtc->wb_bg_thresh < 2 * wb_stat_error())
2179 dtc->wb_dirty = wb_stat_sum(wb, WB_RECLAIMABLE);
2181 dtc->wb_dirty = wb_stat(wb, WB_RECLAIMABLE);
2188 if (dtc->dirty > dtc->bg_thresh)
2192 if (dtc->wb_dirty > dtc->wb_bg_thresh)
2199 * wb_over_bg_thresh - does @wb need to be written back?
2235 * and a different non-zero value will wakeup the writeback threads.
2259 * then push it back - the user is still using the disk.
2263 mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode);
2278 del_timer(&bdi->laptop_mode_wb_timer);
2284 * If ratelimit_pages is too high then we can get into dirty-data overload
2299 dom->dirty_limit = dirty_thresh;
2395 * is now applied to total non-HIGHPAGE memory, and as such we can't
2398 * non-HIGHMEM memory.
2417 * tag_pages_for_writeback - tag pages to be written by writeback
2418 * @mapping: address space structure to write
2430 void tag_pages_for_writeback(struct address_space *mapping,
2433 XA_STATE(xas, &mapping->i_pages, start);
2452 static bool folio_prepare_writeback(struct address_space *mapping,
2462 if (unlikely(folio->mapping != mapping))
2472 if (wbc->sync_mode == WB_SYNC_NONE)
2486 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2493 if (wbc->range_cyclic)
2494 return -1;
2495 return wbc->range_end >> PAGE_SHIFT;
2498 static struct folio *writeback_get_folio(struct address_space *mapping,
2504 folio = folio_batch_next(&wbc->fbatch);
2506 folio_batch_release(&wbc->fbatch);
2508 filemap_get_folios_tag(mapping, &wbc->index, wbc_end(wbc),
2509 wbc_to_tag(wbc), &wbc->fbatch);
2510 folio = folio_batch_next(&wbc->fbatch);
2516 if (unlikely(!folio_prepare_writeback(mapping, wbc, folio))) {
2521 trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
2526 * writeback_iter - iterate folio of a mapping for writeback
2527 * @mapping: address space structure to write
2530 * @error: in-out pointer for writeback errors (see below)
2533 * @wbc on @mapping and should be called in a while loop in the ->writepages
2540 * If there was an error in the per-folio writeback inside the writeback_iter()
2551 struct folio *writeback_iter(struct address_space *mapping,
2555 folio_batch_init(&wbc->fbatch);
2556 wbc->saved_err = *error = 0;
2562 * For non-cyclic writeback we always start at the beginning of
2565 if (wbc->range_cyclic)
2566 wbc->index = mapping->writeback_index;
2568 wbc->index = wbc->range_start >> PAGE_SHIFT;
2575 * For data-integrity writeback we have to be careful so that we
2581 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2582 tag_pages_for_writeback(mapping, wbc->index,
2585 wbc->nr_to_write -= folio_nr_pages(folio);
2592 * we run past wbc->nr_to_write or encounter errors.
2593 * We stash away the first error we encounter in wbc->saved_err
2598 * wbc->nr_to_write or encounter the first error.
2600 if (wbc->sync_mode == WB_SYNC_ALL) {
2601 if (*error && !wbc->saved_err)
2602 wbc->saved_err = *error;
2604 if (*error || wbc->nr_to_write <= 0)
2609 folio = writeback_get_folio(mapping, wbc);
2616 * writeback access order inversion - we should only ever lock
2617 * multiple pages in ascending page->index order, and looping
2621 if (wbc->range_cyclic)
2622 mapping->writeback_index = 0;
2628 *error = wbc->saved_err;
2633 if (wbc->range_cyclic)
2634 mapping->writeback_index = folio_next_index(folio);
2635 folio_batch_release(&wbc->fbatch);
2641 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
2642 * @mapping: address space structure to write
2643 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
2651 int write_cache_pages(struct address_space *mapping,
2658 while ((folio = writeback_iter(mapping, wbc, folio, &error))) {
2670 static int writeback_use_writepage(struct address_space *mapping,
2678 while ((folio = writeback_iter(mapping, wbc, folio, &err))) {
2679 err = mapping->a_ops->writepage(&folio->page, wbc);
2684 mapping_set_error(mapping, err);
2691 int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
2696 if (wbc->nr_to_write <= 0)
2698 wb = inode_to_wb_wbc(mapping->host, wbc);
2701 if (mapping->a_ops->writepages) {
2702 ret = mapping->a_ops->writepages(mapping, wbc);
2703 } else if (mapping->a_ops->writepage) {
2704 ret = writeback_use_writepage(mapping, wbc);
2709 if (ret != -ENOMEM || wbc->sync_mode != WB_SYNC_ALL)
2726 if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) +
2735 bool noop_dirty_folio(struct address_space *mapping, struct folio *folio)
2749 struct address_space *mapping)
2751 struct inode *inode = mapping->host;
2753 trace_writeback_dirty_folio(folio, mapping);
2755 if (mapping_can_writeback(mapping)) {
2768 current->nr_dirtied += nr;
2783 lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr);
2784 zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
2785 wb_stat_mod(wb, WB_RECLAIMABLE, -nr);
2804 void __folio_mark_dirty(struct folio *folio, struct address_space *mapping,
2809 xa_lock_irqsave(&mapping->i_pages, flags);
2810 if (folio->mapping) { /* Race with truncate? */
2812 folio_account_dirtied(folio, mapping);
2813 __xa_set_mark(&mapping->i_pages, folio_index(folio),
2816 xa_unlock_irqrestore(&mapping->i_pages, flags);
2820 * filemap_dirty_folio - Mark a folio dirty for filesystems which do not use buffer_heads.
2821 * @mapping: Address space this folio belongs to.
2831 * that case, but not all the buffers. This is a "bottom-up" dirtying,
2832 * whereas block_dirty_folio() is a "top-down" dirtying.
2838 bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio)
2843 __folio_mark_dirty(folio, mapping, !folio_test_private(folio));
2845 if (mapping->host) {
2847 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
2854 * folio_redirty_for_writepage - Decline to write a dirty folio.
2868 struct address_space *mapping = folio->mapping;
2872 wbc->pages_skipped += nr;
2873 ret = filemap_dirty_folio(mapping, folio);
2874 if (mapping && mapping_can_writeback(mapping)) {
2875 struct inode *inode = mapping->host;
2880 current->nr_dirtied -= nr;
2881 node_stat_mod_folio(folio, NR_DIRTIED, -nr);
2882 wb_stat_mod(wb, WB_DIRTIED, -nr);
2890 * folio_mark_dirty - Mark a folio as being modified.
2898 * unmaps pages before removing the folio from its mapping.
2904 struct address_space *mapping = folio_mapping(folio);
2906 if (likely(mapping)) {
2920 return mapping->a_ops->dirty_folio(mapping, folio);
2923 return noop_dirty_folio(mapping, folio);
2929 * folio->mapping->host, and if the folio is unlocked. This is because another
2930 * CPU could truncate the folio off the mapping and then free the mapping.
2932 * Usually, the folio _is_ locked, or the caller is a user-space process which
2963 struct address_space *mapping = folio_mapping(folio);
2965 if (mapping_can_writeback(mapping)) {
2966 struct inode *inode = mapping->host;
2988 * write-for-sync can discover it via a PAGECACHE_TAG_DIRTY walk.
2989 * The ->writepage implementation will run either folio_start_writeback()
2998 struct address_space *mapping = folio_mapping(folio);
3003 if (mapping && mapping_can_writeback(mapping)) {
3004 struct inode *inode = mapping->host;
3013 * (b) we tell the low-level filesystem to
3024 * has no effect on the actual dirty bit - since
3046 lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr);
3047 zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
3048 wb_stat_mod(wb, WB_RECLAIMABLE, -nr);
3060 atomic_inc(&wb->writeback_inodes);
3066 atomic_dec(&wb->writeback_inodes);
3074 spin_lock_irqsave(&wb->work_lock, flags);
3075 if (test_bit(WB_registered, &wb->state))
3076 queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
3077 spin_unlock_irqrestore(&wb->work_lock, flags);
3083 struct address_space *mapping = folio_mapping(folio);
3086 if (mapping && mapping_use_writeback_tags(mapping)) {
3087 struct inode *inode = mapping->host;
3091 xa_lock_irqsave(&mapping->i_pages, flags);
3093 __xa_clear_mark(&mapping->i_pages, folio_index(folio),
3095 if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
3098 wb_stat_mod(wb, WB_WRITEBACK, -nr);
3100 if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
3104 if (mapping->host && !mapping_tagged(mapping,
3106 sb_clear_inode_writeback(mapping->host);
3108 xa_unlock_irqrestore(&mapping->i_pages, flags);
3113 lruvec_stat_mod_folio(folio, NR_WRITEBACK, -nr);
3114 zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
3123 struct address_space *mapping = folio_mapping(folio);
3128 if (mapping && mapping_use_writeback_tags(mapping)) {
3129 XA_STATE(xas, &mapping->i_pages, folio_index(folio));
3130 struct inode *inode = mapping->host;
3139 on_wblist = mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
3142 if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
3155 if (mapping->host && !on_wblist)
3156 sb_mark_inode_writeback(mapping->host);
3179 * folio_wait_writeback - Wait for a folio to finish writeback.
3200 * folio_wait_writeback_killable - Wait for a folio to finish writeback.
3210 * Return: 0 on success, -EINTR if we get a fatal signal while waiting.
3217 return -EINTR;
3225 * folio_wait_stable() - wait for writeback to finish, if necessary.