Lines Matching +full:period +full:- +full:scale
1 // SPDX-License-Identifier: GPL-2.0-only
3 * mm/page-writeback.c
26 #include <linux/backing-dev.h>
54 #define DIRTY_POLL_THRESH (128 >> (PAGE_SHIFT - 10))
100 * The interval between `kupdate'-style writebacks
119 /* End of sysctl-exported parameters */
137 unsigned long wb_dirty; /* per-wb counterparts */
147 * Length of period for aging writeout fractions of bdis. This is an
148 * arbitrarily chosen number. The longer the period, the slower fractions will
157 .wb_completions = &(__wb)->completions
163 .wb_completions = &(__wb)->memcg_completions, \
168 return dtc->dom; in mdtc_valid()
173 return dtc->dom; in dtc_dom()
178 return mdtc->gdtc; in mdtc_gdtc()
183 return &wb->memcg_completions; in wb_memcg_completions()
189 unsigned long this_bw = READ_ONCE(wb->avg_write_bandwidth); in wb_min_max_ratio()
190 unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth); in wb_min_max_ratio()
191 unsigned long long min = wb->bdi->min_ratio; in wb_min_max_ratio()
192 unsigned long long max = wb->bdi->max_ratio; in wb_min_max_ratio()
216 .wb_completions = &(__wb)->completions
243 *minp = wb->bdi->min_ratio; in wb_min_max_ratio()
244 *maxp = wb->bdi->max_ratio; in wb_min_max_ratio()
257 * user-configurable dirty ratio is the effective number of pages that
262 * absolute number of bytes, calculating the per-zone dirty limit can
268 * node_dirtyable_memory - number of dirtyable pages in a node
272 * page cache. This is the base value for the per-node dirty limits.
280 struct zone *zone = pgdat->node_zones + z; in node_dirtyable_memory()
293 nr_pages -= min(nr_pages, pgdat->totalreserve_pages); in node_dirtyable_memory()
316 z = &NODE_DATA(node)->node_zones[i]; in highmem_dirtyable_memory()
322 nr_pages -= min(nr_pages, high_wmark_pages(z)); in highmem_dirtyable_memory()
342 * global_dirtyable_memory - number of globally dirtyable pages
357 x -= min(x, totalreserve_pages); in global_dirtyable_memory()
363 x -= highmem_dirtyable_memory(x); in global_dirtyable_memory()
369 * domain_dirty_limits - calculate thresh and bg_thresh for a wb_domain
372 * Calculate @dtc->thresh and ->bg_thresh considering
374 * must ensure that @dtc->avail is set before calling this function. The
375 * dirty limits will be lifted by 1/4 for real-time tasks.
379 const unsigned long available_memory = dtc->avail; in domain_dirty_limits()
383 /* convert ratios to per-PAGE_SIZE for higher precision */ in domain_dirty_limits()
392 unsigned long global_avail = gdtc->avail; in domain_dirty_limits()
398 * per-PAGE_SIZE, they can be obtained by dividing bytes by in domain_dirty_limits()
427 * 32-bits. This gives 16TB dirty limits max which is hopefully enough. in domain_dirty_limits()
431 /* This makes sure bg_thresh is within 32-bits as well */ in domain_dirty_limits()
434 dtc->thresh = thresh; in domain_dirty_limits()
435 dtc->bg_thresh = bg_thresh; in domain_dirty_limits()
443 * global_dirty_limits - background-writeback and dirty-throttling thresholds
462 * node_dirty_limit - maximum number of dirty pages allowed in a node
485 * 32-bits. This gives 16TB dirty limits max which is hopefully enough. in node_dirty_limit()
491 * node_dirty_ok - tells whether a node is within its dirty limits
531 return -ERANGE; in dirty_background_bytes_handler()
562 return -ERANGE; in dirty_bytes_handler()
584 __fprop_add_percpu_max(&dom->completions, completions, in wb_domain_writeout_add()
586 /* First event after period switching was turned off? */ in wb_domain_writeout_add()
587 if (unlikely(!dom->period_time)) { in wb_domain_writeout_add()
594 dom->period_time = wp_next_time(jiffies); in wb_domain_writeout_add()
595 mod_timer(&dom->period_timer, dom->period_time); in wb_domain_writeout_add()
608 wb_domain_writeout_add(&global_wb_domain, &wb->completions, in __wb_writeout_add()
609 wb->bdi->max_prop_frac, nr); in __wb_writeout_add()
614 wb->bdi->max_prop_frac, nr); in __wb_writeout_add()
634 int miss_periods = (jiffies - dom->period_time) / in writeout_period()
637 if (fprop_new_period(&dom->completions, miss_periods + 1)) { in writeout_period()
638 dom->period_time = wp_next_time(dom->period_time + in writeout_period()
640 mod_timer(&dom->period_timer, dom->period_time); in writeout_period()
643 * Aging has zeroed all fractions. Stop wasting CPU on period in writeout_period()
646 dom->period_time = 0; in writeout_period()
654 spin_lock_init(&dom->lock); in wb_domain_init()
656 timer_setup(&dom->period_timer, writeout_period, TIMER_DEFERRABLE); in wb_domain_init()
658 dom->dirty_limit_tstamp = jiffies; in wb_domain_init()
660 return fprop_global_init(&dom->completions, gfp); in wb_domain_init()
666 del_timer_sync(&dom->period_timer); in wb_domain_exit()
667 fprop_global_destroy(&dom->completions); in wb_domain_exit()
683 return -EINVAL; in bdi_check_pages_limit()
696 return -EINVAL; in bdi_ratio_from_pages()
720 return -EINVAL; in __bdi_set_min_ratio()
723 if (min_ratio > bdi->max_ratio) { in __bdi_set_min_ratio()
724 ret = -EINVAL; in __bdi_set_min_ratio()
726 if (min_ratio < bdi->min_ratio) { in __bdi_set_min_ratio()
727 delta = bdi->min_ratio - min_ratio; in __bdi_set_min_ratio()
728 bdi_min_ratio -= delta; in __bdi_set_min_ratio()
729 bdi->min_ratio = min_ratio; in __bdi_set_min_ratio()
731 delta = min_ratio - bdi->min_ratio; in __bdi_set_min_ratio()
734 bdi->min_ratio = min_ratio; in __bdi_set_min_ratio()
736 ret = -EINVAL; in __bdi_set_min_ratio()
750 return -EINVAL; in __bdi_set_max_ratio()
753 if (bdi->min_ratio > max_ratio) { in __bdi_set_max_ratio()
754 ret = -EINVAL; in __bdi_set_max_ratio()
756 bdi->max_ratio = max_ratio; in __bdi_set_max_ratio()
757 bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / in __bdi_set_max_ratio()
788 return bdi_get_bytes(bdi->min_ratio); in bdi_get_min_bytes()
809 return bdi_get_bytes(bdi->max_ratio); in bdi_get_max_bytes()
831 return -EINVAL; in bdi_set_strict_limit()
835 bdi->capabilities |= BDI_CAP_STRICTLIMIT; in bdi_set_strict_limit()
837 bdi->capabilities &= ~BDI_CAP_STRICTLIMIT; in bdi_set_strict_limit()
852 return max(thresh, dom->dirty_limit); in hard_dirty_limit()
857 * system-wide clean memory excluding the amount being used in the domain.
863 unsigned long clean = filepages - min(filepages, mdtc->dirty); in mdtc_calc_avail()
864 unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty); in mdtc_calc_avail()
865 unsigned long other_clean = global_clean - min(global_clean, clean); in mdtc_calc_avail()
867 mdtc->avail = filepages + min(headroom, other_clean); in mdtc_calc_avail()
883 dtc->avail = global_dirtyable_memory(); in domain_dirty_avail()
884 dtc->dirty = global_node_page_state(NR_FILE_DIRTY); in domain_dirty_avail()
886 dtc->dirty += global_node_page_state(NR_WRITEBACK); in domain_dirty_avail()
890 mem_cgroup_wb_stats(dtc->wb, &filepages, &headroom, &dtc->dirty, in domain_dirty_avail()
893 dtc->dirty += writeback; in domain_dirty_avail()
899 * __wb_calc_thresh - @wb's share of dirty threshold
912 * - starving fast devices
913 * - piling up dirty pages (that will take long time to sync) on slow devices
916 * bounded by the bdi->min_ratio and/or bdi->max_ratio parameters, if set.
926 struct bdi_writeback *wb = dtc->wb; in __wb_calc_thresh()
935 fprop_fraction_percpu(&dom->completions, dtc->wb_completions, in __wb_calc_thresh()
938 wb_thresh = (thresh * (100 * BDI_RATIO_SCALE - bdi_min_ratio)) / (100 * BDI_RATIO_SCALE); in __wb_calc_thresh()
953 if (thresh > dtc->dirty) { in __wb_calc_thresh()
954 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) in __wb_calc_thresh()
955 wb_thresh = max(wb_thresh, (thresh - dtc->dirty) / 100); in __wb_calc_thresh()
957 wb_thresh = max(wb_thresh, (thresh - dtc->dirty) / 8); in __wb_calc_thresh()
988 * setpoint - dirty 3
989 * f(dirty) := 1.0 + (----------------)
990 * limit - setpoint
1008 x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT, in pos_ratio_polynom()
1009 (limit - setpoint) | 1); in pos_ratio_polynom()
1030 * if (dirty < setpoint) scale up pos_ratio
1031 * if (dirty > setpoint) scale down pos_ratio
1033 * if (wb_dirty < wb_setpoint) scale up pos_ratio
1034 * if (wb_dirty > wb_setpoint) scale down pos_ratio
1056 * 0 +------------.------------------.----------------------*------------->
1084 * 0 +----------------------.-------------------------------.------------->
1089 * - start writing to a slow SD card and a fast disk at the same time. The SD
1091 * - the wb dirty thresh drops quickly due to change of JBOD workload
1095 struct bdi_writeback *wb = dtc->wb; in wb_position_ratio()
1096 unsigned long write_bw = READ_ONCE(wb->avg_write_bandwidth); in wb_position_ratio()
1097 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh); in wb_position_ratio()
1098 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh); in wb_position_ratio()
1099 unsigned long wb_thresh = dtc->wb_thresh; in wb_position_ratio()
1107 dtc->pos_ratio = 0; in wb_position_ratio()
1109 if (unlikely(dtc->dirty >= limit)) in wb_position_ratio()
1118 pos_ratio = pos_ratio_polynom(setpoint, dtc->dirty, limit); in wb_position_ratio()
1125 * This is especially important for fuse which sets bdi->max_ratio to in wb_position_ratio()
1132 * total amount of RAM is 16GB, bdi->max_ratio is equal to 1%, global in wb_position_ratio()
1145 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) { in wb_position_ratio()
1148 if (dtc->wb_dirty >= wb_thresh) in wb_position_ratio()
1152 dtc->wb_bg_thresh); in wb_position_ratio()
1157 wb_pos_ratio = pos_ratio_polynom(wb_setpoint, dtc->wb_dirty, in wb_position_ratio()
1170 * but it would look too non-natural for the case of all in wb_position_ratio()
1172 * with bdi->max_ratio == 100%. in wb_position_ratio()
1181 dtc->pos_ratio = min(pos_ratio, wb_pos_ratio); in wb_position_ratio()
1187 * the wb is over/under its share of dirty pages, we want to scale in wb_position_ratio()
1194 * f(wb_dirty) := 1.0 + k * (wb_dirty - wb_setpoint) in wb_position_ratio()
1196 * x_intercept - wb_dirty in wb_position_ratio()
1197 * := -------------------------- in wb_position_ratio()
1198 * x_intercept - wb_setpoint in wb_position_ratio()
1203 * (2) k = - 1 / (8 * write_bw) (in single wb case) in wb_position_ratio()
1208 * [wb_setpoint - write_bw/2, wb_setpoint + write_bw/2] in wb_position_ratio()
1216 if (unlikely(wb_thresh > dtc->thresh)) in wb_position_ratio()
1217 wb_thresh = dtc->thresh; in wb_position_ratio()
1219 * scale global setpoint to wb's: in wb_position_ratio()
1222 x = div_u64((u64)wb_thresh << 16, dtc->thresh | 1); in wb_position_ratio()
1226 * (thresh - wb_thresh ~= 0) and transit to wb_thresh in JBOD case. in wb_position_ratio()
1228 * wb_thresh thresh - wb_thresh in wb_position_ratio()
1229 * span = --------- * (8 * write_bw) + ------------------ * wb_thresh in wb_position_ratio()
1232 span = (dtc->thresh - wb_thresh + 8 * write_bw) * (u64)x >> 16; in wb_position_ratio()
1235 if (dtc->wb_dirty < x_intercept - span / 4) { in wb_position_ratio()
1236 pos_ratio = div64_u64(pos_ratio * (x_intercept - dtc->wb_dirty), in wb_position_ratio()
1237 (x_intercept - wb_setpoint) | 1); in wb_position_ratio()
1247 if (dtc->wb_dirty < x_intercept) { in wb_position_ratio()
1248 if (dtc->wb_dirty > x_intercept / 8) in wb_position_ratio()
1250 dtc->wb_dirty); in wb_position_ratio()
1255 dtc->pos_ratio = pos_ratio; in wb_position_ratio()
1262 const unsigned long period = roundup_pow_of_two(3 * HZ); in wb_update_write_bandwidth() local
1263 unsigned long avg = wb->avg_write_bandwidth; in wb_update_write_bandwidth()
1264 unsigned long old = wb->write_bandwidth; in wb_update_write_bandwidth()
1270 * bw * elapsed + write_bandwidth * (period - elapsed) in wb_update_write_bandwidth()
1271 * write_bandwidth = --------------------------------------------------- in wb_update_write_bandwidth()
1272 * period in wb_update_write_bandwidth()
1277 bw = written - min(written, wb->written_stamp); in wb_update_write_bandwidth()
1279 if (unlikely(elapsed > period)) { in wb_update_write_bandwidth()
1284 bw += (u64)wb->write_bandwidth * (period - elapsed); in wb_update_write_bandwidth()
1285 bw >>= ilog2(period); in wb_update_write_bandwidth()
1291 avg -= (avg - old) >> 3; in wb_update_write_bandwidth()
1294 avg += (old - avg) >> 3; in wb_update_write_bandwidth()
1300 long delta = avg - wb->avg_write_bandwidth; in wb_update_write_bandwidth()
1302 &wb->bdi->tot_write_bandwidth) <= 0); in wb_update_write_bandwidth()
1304 wb->write_bandwidth = bw; in wb_update_write_bandwidth()
1305 WRITE_ONCE(wb->avg_write_bandwidth, avg); in wb_update_write_bandwidth()
1311 unsigned long thresh = dtc->thresh; in update_dirty_limit()
1312 unsigned long limit = dom->dirty_limit; in update_dirty_limit()
1325 * dom->dirty_limit which is guaranteed to lie above the dirty pages. in update_dirty_limit()
1327 thresh = max(thresh, dtc->dirty); in update_dirty_limit()
1329 limit -= (limit - thresh) >> 5; in update_dirty_limit()
1334 dom->dirty_limit = limit; in update_dirty_limit()
1345 if (time_before(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) in domain_update_dirty_limit()
1348 spin_lock(&dom->lock); in domain_update_dirty_limit()
1349 if (time_after_eq(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) { in domain_update_dirty_limit()
1351 dom->dirty_limit_tstamp = now; in domain_update_dirty_limit()
1353 spin_unlock(&dom->lock); in domain_update_dirty_limit()
1357 * Maintain wb->dirty_ratelimit, the base dirty throttle rate.
1366 struct bdi_writeback *wb = dtc->wb; in wb_update_dirty_ratelimit()
1367 unsigned long dirty = dtc->dirty; in wb_update_dirty_ratelimit()
1368 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh); in wb_update_dirty_ratelimit()
1369 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh); in wb_update_dirty_ratelimit()
1371 unsigned long write_bw = wb->avg_write_bandwidth; in wb_update_dirty_ratelimit()
1372 unsigned long dirty_ratelimit = wb->dirty_ratelimit; in wb_update_dirty_ratelimit()
1382 * when dirty pages are truncated by userspace or re-dirtied by FS. in wb_update_dirty_ratelimit()
1384 dirty_rate = (dirtied - wb->dirtied_stamp) * HZ / elapsed; in wb_update_dirty_ratelimit()
1390 dtc->pos_ratio >> RATELIMIT_CALC_SHIFT; in wb_update_dirty_ratelimit()
1434 * wb->dirty_ratelimit = balanced_dirty_ratelimit; in wb_update_dirty_ratelimit()
1442 * task_ratelimit - dirty_ratelimit in wb_update_dirty_ratelimit()
1443 * = (pos_ratio - 1) * dirty_ratelimit in wb_update_dirty_ratelimit()
1452 * - dirty_ratelimit > balanced_dirty_ratelimit in wb_update_dirty_ratelimit()
1453 * - dirty_ratelimit > task_ratelimit (dirty pages are above setpoint) in wb_update_dirty_ratelimit()
1459 * |task_ratelimit - dirty_ratelimit| is used to limit the step size in wb_update_dirty_ratelimit()
1462 * due to the small 200ms estimation period of dirty_rate (we want to in wb_update_dirty_ratelimit()
1463 * keep that period small to reduce time lags). in wb_update_dirty_ratelimit()
1474 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) { in wb_update_dirty_ratelimit()
1475 dirty = dtc->wb_dirty; in wb_update_dirty_ratelimit()
1476 setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2; in wb_update_dirty_ratelimit()
1480 x = min3(wb->balanced_dirty_ratelimit, in wb_update_dirty_ratelimit()
1483 step = x - dirty_ratelimit; in wb_update_dirty_ratelimit()
1485 x = max3(wb->balanced_dirty_ratelimit, in wb_update_dirty_ratelimit()
1488 step = dirty_ratelimit - x; in wb_update_dirty_ratelimit()
1505 dirty_ratelimit -= step; in wb_update_dirty_ratelimit()
1507 WRITE_ONCE(wb->dirty_ratelimit, max(dirty_ratelimit, 1UL)); in wb_update_dirty_ratelimit()
1508 wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit; in wb_update_dirty_ratelimit()
1517 struct bdi_writeback *wb = gdtc->wb; in __wb_update_bandwidth()
1523 spin_lock(&wb->list_lock); in __wb_update_bandwidth()
1531 elapsed = max(now - wb->bw_time_stamp, 1UL); in __wb_update_bandwidth()
1532 dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]); in __wb_update_bandwidth()
1533 written = percpu_counter_read(&wb->stat[WB_WRITTEN]); in __wb_update_bandwidth()
1550 wb->dirtied_stamp = dirtied; in __wb_update_bandwidth()
1551 wb->written_stamp = written; in __wb_update_bandwidth()
1552 WRITE_ONCE(wb->bw_time_stamp, now); in __wb_update_bandwidth()
1553 spin_unlock(&wb->list_lock); in __wb_update_bandwidth()
1569 unsigned long elapsed = now - READ_ONCE(wb->bw_time_stamp); in wb_bandwidth_estimate_start()
1572 !atomic_read(&wb->writeback_inodes)) { in wb_bandwidth_estimate_start()
1573 spin_lock(&wb->list_lock); in wb_bandwidth_estimate_start()
1574 wb->dirtied_stamp = wb_stat(wb, WB_DIRTIED); in wb_bandwidth_estimate_start()
1575 wb->written_stamp = wb_stat(wb, WB_WRITTEN); in wb_bandwidth_estimate_start()
1576 WRITE_ONCE(wb->bw_time_stamp, now); in wb_bandwidth_estimate_start()
1577 spin_unlock(&wb->list_lock); in wb_bandwidth_estimate_start()
1586 * global_zone_page_state() too often. So scale it near-sqrt to the safety margin
1593 return 1UL << (ilog2(thresh - dirty) >> 1); in dirty_poll_interval()
1601 unsigned long bw = READ_ONCE(wb->avg_write_bandwidth); in wb_max_pause()
1623 long hi = ilog2(READ_ONCE(wb->avg_write_bandwidth)); in wb_min_pause()
1624 long lo = ilog2(READ_ONCE(wb->dirty_ratelimit)); in wb_min_pause()
1629 /* target for 10ms pause on 1-dd case */ in wb_min_pause()
1633 * Scale up pause time for concurrent dirtiers in order to reduce CPU in wb_min_pause()
1639 t += (hi - lo) * (10 * HZ) / 1024; in wb_min_pause()
1664 * case fio-mmap-randwrite-64k, which does 16*{sync read, async write}. in wb_min_pause()
1694 struct bdi_writeback *wb = dtc->wb; in wb_dirty_limits()
1700 * - in JBOD setup, wb_thresh can fluctuate a lot in wb_dirty_limits()
1701 * - in a system with HDD and USB key, the USB key may somehow in wb_dirty_limits()
1710 dtc->wb_thresh = __wb_calc_thresh(dtc, dtc->thresh); in wb_dirty_limits()
1711 dtc->wb_bg_thresh = dtc->thresh ? in wb_dirty_limits()
1712 div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; in wb_dirty_limits()
1720 * reported dirty, even though there are thresh-m pages in wb_dirty_limits()
1724 if (dtc->wb_thresh < 2 * wb_stat_error()) { in wb_dirty_limits()
1726 dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK); in wb_dirty_limits()
1729 dtc->wb_dirty = wb_reclaimable + wb_stat(wb, WB_WRITEBACK); in wb_dirty_limits()
1739 dirty = dtc->wb_dirty; in domain_poll_intv()
1740 thresh = dtc->wb_thresh; in domain_poll_intv()
1742 dirty = dtc->dirty; in domain_poll_intv()
1743 thresh = dtc->thresh; in domain_poll_intv()
1750 * Throttle it only when the background writeback cannot catch-up. This avoids
1756 * for strictlimit-ing.
1765 dirty = dtc->wb_dirty; in domain_dirty_freerun()
1766 thresh = dtc->wb_thresh; in domain_dirty_freerun()
1767 bg_thresh = dtc->wb_bg_thresh; in domain_dirty_freerun()
1769 dirty = dtc->dirty; in domain_dirty_freerun()
1770 thresh = dtc->thresh; in domain_dirty_freerun()
1771 bg_thresh = dtc->bg_thresh; in domain_dirty_freerun()
1773 dtc->freerun = dirty <= dirty_freerun_ceiling(thresh, bg_thresh); in domain_dirty_freerun()
1787 dtc->freerun = false; in wb_dirty_freerun()
1795 * LOCAL_THROTTLE tasks must not be throttled when below the per-wb in wb_dirty_freerun()
1798 if (!(current->flags & PF_LOCAL_THROTTLE)) in wb_dirty_freerun()
1801 dtc->freerun = dtc->wb_dirty < in wb_dirty_freerun()
1802 dirty_freerun_ceiling(dtc->wb_thresh, dtc->wb_bg_thresh); in wb_dirty_freerun()
1808 dtc->dirty_exceeded = (dtc->wb_dirty > dtc->wb_thresh) && in wb_dirty_exceeded()
1809 ((dtc->dirty > dtc->thresh) || strictlimit); in wb_dirty_exceeded()
1820 if (dtc->freerun) in balance_wb_limits()
1844 long period; in balance_dirty_pages() local
1851 struct backing_dev_info *bdi = wb->bdi; in balance_dirty_pages()
1852 bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT; in balance_dirty_pages()
1879 if (!laptop_mode && nr_dirty > gdtc->bg_thresh && in balance_dirty_pages()
1887 if (gdtc->freerun && (!mdtc || mdtc->freerun)) { in balance_dirty_pages()
1895 current->dirty_paused_when = now; in balance_dirty_pages()
1896 current->nr_dirtied = 0; in balance_dirty_pages()
1899 current->nr_dirtied_pause = min(intv, m_intv); in balance_dirty_pages()
1914 if (gdtc->freerun) in balance_dirty_pages()
1926 if (mdtc->freerun) in balance_dirty_pages()
1928 if (mdtc->pos_ratio < gdtc->pos_ratio) in balance_dirty_pages()
1932 wb->dirty_exceeded = gdtc->dirty_exceeded || in balance_dirty_pages()
1933 (mdtc && mdtc->dirty_exceeded); in balance_dirty_pages()
1934 if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) + in balance_dirty_pages()
1939 dirty_ratelimit = READ_ONCE(wb->dirty_ratelimit); in balance_dirty_pages()
1940 task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >> in balance_dirty_pages()
1942 max_pause = wb_max_pause(wb, sdtc->wb_dirty); in balance_dirty_pages()
1948 period = max_pause; in balance_dirty_pages()
1952 period = HZ * pages_dirtied / task_ratelimit; in balance_dirty_pages()
1953 pause = period; in balance_dirty_pages()
1954 if (current->dirty_paused_when) in balance_dirty_pages()
1955 pause -= now - current->dirty_paused_when; in balance_dirty_pages()
1958 * for up to 800ms from time to time on 1-HDD; so does xfs, in balance_dirty_pages()
1965 sdtc->thresh, in balance_dirty_pages()
1966 sdtc->bg_thresh, in balance_dirty_pages()
1967 sdtc->dirty, in balance_dirty_pages()
1968 sdtc->wb_thresh, in balance_dirty_pages()
1969 sdtc->wb_dirty, in balance_dirty_pages()
1973 period, in balance_dirty_pages()
1976 if (pause < -HZ) { in balance_dirty_pages()
1977 current->dirty_paused_when = now; in balance_dirty_pages()
1978 current->nr_dirtied = 0; in balance_dirty_pages()
1979 } else if (period) { in balance_dirty_pages()
1980 current->dirty_paused_when += period; in balance_dirty_pages()
1981 current->nr_dirtied = 0; in balance_dirty_pages()
1982 } else if (current->nr_dirtied_pause <= pages_dirtied) in balance_dirty_pages()
1983 current->nr_dirtied_pause += pages_dirtied; in balance_dirty_pages()
1988 now += min(pause - max_pause, max_pause); in balance_dirty_pages()
1994 sdtc->thresh, in balance_dirty_pages()
1995 sdtc->bg_thresh, in balance_dirty_pages()
1996 sdtc->dirty, in balance_dirty_pages()
1997 sdtc->wb_thresh, in balance_dirty_pages()
1998 sdtc->wb_dirty, in balance_dirty_pages()
2002 period, in balance_dirty_pages()
2006 ret = -EAGAIN; in balance_dirty_pages()
2010 bdi->last_bdp_sleep = jiffies; in balance_dirty_pages()
2013 current->dirty_paused_when = now + pause; in balance_dirty_pages()
2014 current->nr_dirtied = 0; in balance_dirty_pages()
2015 current->nr_dirtied_pause = nr_dirtied_pause; in balance_dirty_pages()
2029 * In theory 1 page is enough to keep the consumer-producer in balance_dirty_pages()
2034 if (sdtc->wb_dirty <= wb_stat_error()) in balance_dirty_pages()
2048 * dirty tsk->nr_dirtied_pause pages;
2052 * (tsk->nr_dirtied_pause - 1) pages, balance_dirty_pages() will never be
2062 * balance_dirty_pages_ratelimited_flags - Balance dirty memory state.
2072 * Return: If @flags contains BDP_ASYNC, it may return -EAGAIN to
2081 struct inode *inode = mapping->host; in balance_dirty_pages_ratelimited_flags()
2088 if (!(bdi->capabilities & BDI_CAP_WRITEBACK)) in balance_dirty_pages_ratelimited_flags()
2094 wb = &bdi->wb; in balance_dirty_pages_ratelimited_flags()
2096 ratelimit = current->nr_dirtied_pause; in balance_dirty_pages_ratelimited_flags()
2097 if (wb->dirty_exceeded) in balance_dirty_pages_ratelimited_flags()
2098 ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10)); in balance_dirty_pages_ratelimited_flags()
2105 * time, hence all honoured too large initial task->nr_dirtied_pause. in balance_dirty_pages_ratelimited_flags()
2108 if (unlikely(current->nr_dirtied >= ratelimit)) in balance_dirty_pages_ratelimited_flags()
2116 * short-lived tasks (eg. gcc invocations in a kernel build) escaping in balance_dirty_pages_ratelimited_flags()
2117 * the dirty throttling and livelock other long-run dirtiers. in balance_dirty_pages_ratelimited_flags()
2120 if (*p > 0 && current->nr_dirtied < ratelimit) { in balance_dirty_pages_ratelimited_flags()
2122 nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied); in balance_dirty_pages_ratelimited_flags()
2123 *p -= nr_pages_dirtied; in balance_dirty_pages_ratelimited_flags()
2124 current->nr_dirtied += nr_pages_dirtied; in balance_dirty_pages_ratelimited_flags()
2128 if (unlikely(current->nr_dirtied >= ratelimit)) in balance_dirty_pages_ratelimited_flags()
2129 ret = balance_dirty_pages(wb, current->nr_dirtied, flags); in balance_dirty_pages_ratelimited_flags()
2137 * balance_dirty_pages_ratelimited - balance dirty memory state.
2160 struct bdi_writeback *wb = dtc->wb; in wb_bg_dirty_limits()
2162 dtc->wb_bg_thresh = __wb_calc_thresh(dtc, dtc->bg_thresh); in wb_bg_dirty_limits()
2163 if (dtc->wb_bg_thresh < 2 * wb_stat_error()) in wb_bg_dirty_limits()
2164 dtc->wb_dirty = wb_stat_sum(wb, WB_RECLAIMABLE); in wb_bg_dirty_limits()
2166 dtc->wb_dirty = wb_stat(wb, WB_RECLAIMABLE); in wb_bg_dirty_limits()
2173 if (dtc->dirty > dtc->bg_thresh) in domain_over_bg_thresh()
2177 if (dtc->wb_dirty > dtc->wb_bg_thresh) in domain_over_bg_thresh()
2184 * wb_over_bg_thresh - does @wb need to be written back?
2220 * and a different non-zero value will wakeup the writeback threads. in dirty_writeback_centisecs_handler()
2244 * then push it back - the user is still using the disk.
2248 mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode); in laptop_io_completion()
2263 del_timer(&bdi->laptop_mode_wb_timer); in laptop_sync_completion()
2269 * If ratelimit_pages is too high then we can get into dirty-data overload
2284 dom->dirty_limit = dirty_thresh; in writeback_set_ratelimit()
2375 * We used to scale dirty pages according to how total memory
2378 * However, that was when we used "dirty_ratio" to scale with
2380 * is now applied to total non-HIGHPAGE memory, and as such we can't
2383 * non-HIGHMEM memory.
2385 * But we might still want to scale the dirty_ratio by how
2402 * tag_pages_for_writeback - tag pages to be written by writeback
2418 XA_STATE(xas, &mapping->i_pages, start); in tag_pages_for_writeback()
2447 if (unlikely(folio->mapping != mapping)) in folio_prepare_writeback()
2457 if (wbc->sync_mode == WB_SYNC_NONE) in folio_prepare_writeback()
2471 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) in wbc_to_tag()
2478 if (wbc->range_cyclic) in wbc_end()
2479 return -1; in wbc_end()
2480 return wbc->range_end >> PAGE_SHIFT; in wbc_end()
2489 folio = folio_batch_next(&wbc->fbatch); in writeback_get_folio()
2491 folio_batch_release(&wbc->fbatch); in writeback_get_folio()
2493 filemap_get_folios_tag(mapping, &wbc->index, wbc_end(wbc), in writeback_get_folio()
2494 wbc_to_tag(wbc), &wbc->fbatch); in writeback_get_folio()
2495 folio = folio_batch_next(&wbc->fbatch); in writeback_get_folio()
2506 trace_wbc_writepage(wbc, inode_to_bdi(mapping->host)); in writeback_get_folio()
2511 * writeback_iter - iterate folio of a mapping for writeback
2515 * @error: in-out pointer for writeback errors (see below)
2518 * @wbc on @mapping and should be called in a while loop in the ->writepages
2525 * If there was an error in the per-folio writeback inside the writeback_iter()
2540 folio_batch_init(&wbc->fbatch); in writeback_iter()
2541 wbc->saved_err = *error = 0; in writeback_iter()
2547 * For non-cyclic writeback we always start at the beginning of in writeback_iter()
2550 if (wbc->range_cyclic) in writeback_iter()
2551 wbc->index = mapping->writeback_index; in writeback_iter()
2553 wbc->index = wbc->range_start >> PAGE_SHIFT; in writeback_iter()
2560 * For data-integrity writeback we have to be careful so that we in writeback_iter()
2566 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) in writeback_iter()
2567 tag_pages_for_writeback(mapping, wbc->index, in writeback_iter()
2570 wbc->nr_to_write -= folio_nr_pages(folio); in writeback_iter()
2577 * we run past wbc->nr_to_write or encounter errors. in writeback_iter()
2578 * We stash away the first error we encounter in wbc->saved_err in writeback_iter()
2583 * wbc->nr_to_write or encounter the first error. in writeback_iter()
2585 if (wbc->sync_mode == WB_SYNC_ALL) { in writeback_iter()
2586 if (*error && !wbc->saved_err) in writeback_iter()
2587 wbc->saved_err = *error; in writeback_iter()
2589 if (*error || wbc->nr_to_write <= 0) in writeback_iter()
2601 * writeback access order inversion - we should only ever lock in writeback_iter()
2602 * multiple pages in ascending page->index order, and looping in writeback_iter()
2606 if (wbc->range_cyclic) in writeback_iter()
2607 mapping->writeback_index = 0; in writeback_iter()
2613 *error = wbc->saved_err; in writeback_iter()
2618 if (wbc->range_cyclic) in writeback_iter()
2619 mapping->writeback_index = folio_next_index(folio); in writeback_iter()
2620 folio_batch_release(&wbc->fbatch); in writeback_iter()
2626 …* write_cache_pages - walk the list of dirty pages of the given address space and write all of the…
2628 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
2664 err = mapping->a_ops->writepage(&folio->page, wbc); in writeback_use_writepage()
2681 if (wbc->nr_to_write <= 0) in do_writepages()
2683 wb = inode_to_wb_wbc(mapping->host, wbc); in do_writepages()
2686 if (mapping->a_ops->writepages) { in do_writepages()
2687 ret = mapping->a_ops->writepages(mapping, wbc); in do_writepages()
2688 } else if (mapping->a_ops->writepage) { in do_writepages()
2694 if (ret != -ENOMEM || wbc->sync_mode != WB_SYNC_ALL) in do_writepages()
2711 if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) + in do_writepages()
2736 struct inode *inode = mapping->host; in folio_account_dirtied()
2753 current->nr_dirtied += nr; in folio_account_dirtied()
2768 lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr); in folio_account_cleaned()
2769 zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); in folio_account_cleaned()
2770 wb_stat_mod(wb, WB_RECLAIMABLE, -nr); in folio_account_cleaned()
2794 xa_lock_irqsave(&mapping->i_pages, flags); in __folio_mark_dirty()
2795 if (folio->mapping) { /* Race with truncate? */ in __folio_mark_dirty()
2798 __xa_set_mark(&mapping->i_pages, folio_index(folio), in __folio_mark_dirty()
2801 xa_unlock_irqrestore(&mapping->i_pages, flags); in __folio_mark_dirty()
2805 * filemap_dirty_folio - Mark a folio dirty for filesystems which do not use buffer_heads.
2816 * that case, but not all the buffers. This is a "bottom-up" dirtying,
2817 * whereas block_dirty_folio() is a "top-down" dirtying.
2830 if (mapping->host) { in filemap_dirty_folio()
2832 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); in filemap_dirty_folio()
2839 * folio_redirty_for_writepage - Decline to write a dirty folio.
2853 struct address_space *mapping = folio->mapping; in folio_redirty_for_writepage()
2857 wbc->pages_skipped += nr; in folio_redirty_for_writepage()
2860 struct inode *inode = mapping->host; in folio_redirty_for_writepage()
2865 current->nr_dirtied -= nr; in folio_redirty_for_writepage()
2866 node_stat_mod_folio(folio, NR_DIRTIED, -nr); in folio_redirty_for_writepage()
2867 wb_stat_mod(wb, WB_DIRTIED, -nr); in folio_redirty_for_writepage()
2875 * folio_mark_dirty - Mark a folio as being modified.
2905 return mapping->a_ops->dirty_folio(mapping, folio); in folio_mark_dirty()
2914 * folio->mapping->host, and if the folio is unlocked. This is because another
2917 * Usually, the folio _is_ locked, or the caller is a user-space process which
2951 struct inode *inode = mapping->host; in __folio_cancel_dirty()
2973 * write-for-sync can discover it via a PAGECACHE_TAG_DIRTY walk.
2974 * The ->writepage implementation will run either folio_start_writeback()
2989 struct inode *inode = mapping->host; in folio_clear_dirty_for_io()
2998 * (b) we tell the low-level filesystem to in folio_clear_dirty_for_io()
3009 * has no effect on the actual dirty bit - since in folio_clear_dirty_for_io()
3031 lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr); in folio_clear_dirty_for_io()
3032 zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); in folio_clear_dirty_for_io()
3033 wb_stat_mod(wb, WB_RECLAIMABLE, -nr); in folio_clear_dirty_for_io()
3045 atomic_inc(&wb->writeback_inodes); in wb_inode_writeback_start()
3051 atomic_dec(&wb->writeback_inodes); in wb_inode_writeback_end()
3059 spin_lock_irqsave(&wb->work_lock, flags); in wb_inode_writeback_end()
3060 if (test_bit(WB_registered, &wb->state)) in wb_inode_writeback_end()
3061 queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL); in wb_inode_writeback_end()
3062 spin_unlock_irqrestore(&wb->work_lock, flags); in wb_inode_writeback_end()
3072 struct inode *inode = mapping->host; in __folio_end_writeback()
3076 xa_lock_irqsave(&mapping->i_pages, flags); in __folio_end_writeback()
3078 __xa_clear_mark(&mapping->i_pages, folio_index(folio), in __folio_end_writeback()
3080 if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) { in __folio_end_writeback()
3083 wb_stat_mod(wb, WB_WRITEBACK, -nr); in __folio_end_writeback()
3089 if (mapping->host && !mapping_tagged(mapping, in __folio_end_writeback()
3091 sb_clear_inode_writeback(mapping->host); in __folio_end_writeback()
3093 xa_unlock_irqrestore(&mapping->i_pages, flags); in __folio_end_writeback()
3098 lruvec_stat_mod_folio(folio, NR_WRITEBACK, -nr); in __folio_end_writeback()
3099 zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); in __folio_end_writeback()
3114 XA_STATE(xas, &mapping->i_pages, folio_index(folio)); in __folio_start_writeback()
3115 struct inode *inode = mapping->host; in __folio_start_writeback()
3127 if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) { in __folio_start_writeback()
3140 if (mapping->host && !on_wblist) in __folio_start_writeback()
3141 sb_mark_inode_writeback(mapping->host); in __folio_start_writeback()
3164 * folio_wait_writeback - Wait for a folio to finish writeback.
3185 * folio_wait_writeback_killable - Wait for a folio to finish writeback.
3195 * Return: 0 on success, -EINTR if we get a fatal signal while waiting.
3202 return -EINTR; in folio_wait_writeback_killable()
3210 * folio_wait_stable() - wait for writeback to finish, if necessary.