xref: /linux/fs/xfs/xfs_zone_space_resv.c (revision c148bc7535650fbfa95a1f571b9ffa2ab478ea33)
10bb21930SChristoph Hellwig // SPDX-License-Identifier: GPL-2.0
20bb21930SChristoph Hellwig /*
30bb21930SChristoph Hellwig  * Copyright (c) 2023-2025 Christoph Hellwig.
40bb21930SChristoph Hellwig  * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates.
50bb21930SChristoph Hellwig  */
60bb21930SChristoph Hellwig #include "xfs.h"
70bb21930SChristoph Hellwig #include "xfs_shared.h"
80bb21930SChristoph Hellwig #include "xfs_format.h"
90bb21930SChristoph Hellwig #include "xfs_trans_resv.h"
100bb21930SChristoph Hellwig #include "xfs_mount.h"
110bb21930SChristoph Hellwig #include "xfs_inode.h"
120bb21930SChristoph Hellwig #include "xfs_rtbitmap.h"
130bb21930SChristoph Hellwig #include "xfs_zone_alloc.h"
140bb21930SChristoph Hellwig #include "xfs_zone_priv.h"
150bb21930SChristoph Hellwig #include "xfs_zones.h"
160bb21930SChristoph Hellwig 
170bb21930SChristoph Hellwig /*
180bb21930SChristoph Hellwig  * Note: the zoned allocator does not support a rtextsize > 1, so this code and
190bb21930SChristoph Hellwig  * the allocator itself uses file system blocks interchangeable with realtime
200bb21930SChristoph Hellwig  * extents without doing the otherwise required conversions.
210bb21930SChristoph Hellwig  */
220bb21930SChristoph Hellwig 
230bb21930SChristoph Hellwig /*
240bb21930SChristoph Hellwig  * Per-task space reservation.
250bb21930SChristoph Hellwig  *
260bb21930SChristoph Hellwig  * Tasks that need to wait for GC to free up space allocate one of these
270bb21930SChristoph Hellwig  * on-stack and adds it to the per-mount zi_reclaim_reservations lists.
280bb21930SChristoph Hellwig  * The GC thread will then wake the tasks in order when space becomes available.
290bb21930SChristoph Hellwig  */
300bb21930SChristoph Hellwig struct xfs_zone_reservation {
310bb21930SChristoph Hellwig 	struct list_head	entry;
320bb21930SChristoph Hellwig 	struct task_struct	*task;
330bb21930SChristoph Hellwig 	xfs_filblks_t		count_fsb;
340bb21930SChristoph Hellwig };
350bb21930SChristoph Hellwig 
360bb21930SChristoph Hellwig /*
370bb21930SChristoph Hellwig  * Calculate the number of reserved blocks.
380bb21930SChristoph Hellwig  *
390bb21930SChristoph Hellwig  * XC_FREE_RTEXTENTS counts the user available capacity, to which the file
400bb21930SChristoph Hellwig  * system can be filled, while XC_FREE_RTAVAILABLE counts the blocks instantly
410bb21930SChristoph Hellwig  * available for writes without waiting for GC.
420bb21930SChristoph Hellwig  *
430bb21930SChristoph Hellwig  * For XC_FREE_RTAVAILABLE only the smaller reservation required for GC and
440bb21930SChristoph Hellwig  * block zeroing is excluded from the user capacity, while XC_FREE_RTEXTENTS
450bb21930SChristoph Hellwig  * is further restricted by at least one zone as well as the optional
460bb21930SChristoph Hellwig  * persistently reserved blocks.  This allows the allocator to run more
470bb21930SChristoph Hellwig  * smoothly by not always triggering GC.
480bb21930SChristoph Hellwig  */
490bb21930SChristoph Hellwig uint64_t
xfs_zoned_default_resblks(struct xfs_mount * mp,enum xfs_free_counter ctr)500bb21930SChristoph Hellwig xfs_zoned_default_resblks(
510bb21930SChristoph Hellwig 	struct xfs_mount	*mp,
520bb21930SChristoph Hellwig 	enum xfs_free_counter	ctr)
530bb21930SChristoph Hellwig {
540bb21930SChristoph Hellwig 	switch (ctr) {
550bb21930SChristoph Hellwig 	case XC_FREE_RTEXTENTS:
560bb21930SChristoph Hellwig 		return (uint64_t)XFS_RESERVED_ZONES *
570bb21930SChristoph Hellwig 			mp->m_groups[XG_TYPE_RTG].blocks +
580bb21930SChristoph Hellwig 			mp->m_sb.sb_rtreserved;
590bb21930SChristoph Hellwig 	case XC_FREE_RTAVAILABLE:
600bb21930SChristoph Hellwig 		return (uint64_t)XFS_GC_ZONES *
610bb21930SChristoph Hellwig 			mp->m_groups[XG_TYPE_RTG].blocks;
620bb21930SChristoph Hellwig 	default:
630bb21930SChristoph Hellwig 		ASSERT(0);
640bb21930SChristoph Hellwig 		return 0;
650bb21930SChristoph Hellwig 	}
660bb21930SChristoph Hellwig }
670bb21930SChristoph Hellwig 
680bb21930SChristoph Hellwig void
xfs_zoned_resv_wake_all(struct xfs_mount * mp)690bb21930SChristoph Hellwig xfs_zoned_resv_wake_all(
700bb21930SChristoph Hellwig 	struct xfs_mount		*mp)
710bb21930SChristoph Hellwig {
720bb21930SChristoph Hellwig 	struct xfs_zone_info		*zi = mp->m_zone_info;
730bb21930SChristoph Hellwig 	struct xfs_zone_reservation	*reservation;
740bb21930SChristoph Hellwig 
750bb21930SChristoph Hellwig 	spin_lock(&zi->zi_reservation_lock);
760bb21930SChristoph Hellwig 	list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry)
770bb21930SChristoph Hellwig 		wake_up_process(reservation->task);
780bb21930SChristoph Hellwig 	spin_unlock(&zi->zi_reservation_lock);
790bb21930SChristoph Hellwig }
800bb21930SChristoph Hellwig 
810bb21930SChristoph Hellwig void
xfs_zoned_add_available(struct xfs_mount * mp,xfs_filblks_t count_fsb)820bb21930SChristoph Hellwig xfs_zoned_add_available(
830bb21930SChristoph Hellwig 	struct xfs_mount		*mp,
840bb21930SChristoph Hellwig 	xfs_filblks_t			count_fsb)
850bb21930SChristoph Hellwig {
860bb21930SChristoph Hellwig 	struct xfs_zone_info		*zi = mp->m_zone_info;
870bb21930SChristoph Hellwig 	struct xfs_zone_reservation	*reservation;
880bb21930SChristoph Hellwig 
890bb21930SChristoph Hellwig 	if (list_empty_careful(&zi->zi_reclaim_reservations)) {
900bb21930SChristoph Hellwig 		xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb);
910bb21930SChristoph Hellwig 		return;
920bb21930SChristoph Hellwig 	}
930bb21930SChristoph Hellwig 
940bb21930SChristoph Hellwig 	spin_lock(&zi->zi_reservation_lock);
950bb21930SChristoph Hellwig 	xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb);
960bb21930SChristoph Hellwig 	count_fsb = xfs_sum_freecounter(mp, XC_FREE_RTAVAILABLE);
970bb21930SChristoph Hellwig 	list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry) {
980bb21930SChristoph Hellwig 		if (reservation->count_fsb > count_fsb)
990bb21930SChristoph Hellwig 			break;
1000bb21930SChristoph Hellwig 		wake_up_process(reservation->task);
1010bb21930SChristoph Hellwig 		count_fsb -= reservation->count_fsb;
1020bb21930SChristoph Hellwig 
1030bb21930SChristoph Hellwig 	}
1040bb21930SChristoph Hellwig 	spin_unlock(&zi->zi_reservation_lock);
1050bb21930SChristoph Hellwig }
1060bb21930SChristoph Hellwig 
1070bb21930SChristoph Hellwig static int
xfs_zoned_space_wait_error(struct xfs_mount * mp)1080bb21930SChristoph Hellwig xfs_zoned_space_wait_error(
1090bb21930SChristoph Hellwig 	struct xfs_mount		*mp)
1100bb21930SChristoph Hellwig {
1110bb21930SChristoph Hellwig 	if (xfs_is_shutdown(mp))
1120bb21930SChristoph Hellwig 		return -EIO;
1130bb21930SChristoph Hellwig 	if (fatal_signal_pending(current))
1140bb21930SChristoph Hellwig 		return -EINTR;
1150bb21930SChristoph Hellwig 	return 0;
1160bb21930SChristoph Hellwig }
1170bb21930SChristoph Hellwig 
1180bb21930SChristoph Hellwig static int
xfs_zoned_reserve_available(struct xfs_inode * ip,xfs_filblks_t count_fsb,unsigned int flags)1190bb21930SChristoph Hellwig xfs_zoned_reserve_available(
1200bb21930SChristoph Hellwig 	struct xfs_inode		*ip,
1210bb21930SChristoph Hellwig 	xfs_filblks_t			count_fsb,
1220bb21930SChristoph Hellwig 	unsigned int			flags)
1230bb21930SChristoph Hellwig {
1240bb21930SChristoph Hellwig 	struct xfs_mount		*mp = ip->i_mount;
1250bb21930SChristoph Hellwig 	struct xfs_zone_info		*zi = mp->m_zone_info;
1260bb21930SChristoph Hellwig 	struct xfs_zone_reservation	reservation = {
1270bb21930SChristoph Hellwig 		.task		= current,
1280bb21930SChristoph Hellwig 		.count_fsb	= count_fsb,
1290bb21930SChristoph Hellwig 	};
1300bb21930SChristoph Hellwig 	int				error;
1310bb21930SChristoph Hellwig 
1320bb21930SChristoph Hellwig 	/*
1330bb21930SChristoph Hellwig 	 * If there are no waiters, try to directly grab the available blocks
1340bb21930SChristoph Hellwig 	 * from the percpu counter.
1350bb21930SChristoph Hellwig 	 *
1360bb21930SChristoph Hellwig 	 * If the caller wants to dip into the reserved pool also bypass the
1370bb21930SChristoph Hellwig 	 * wait list.  This relies on the fact that we have a very graciously
1380bb21930SChristoph Hellwig 	 * sized reserved pool that always has enough space.  If the reserved
1390bb21930SChristoph Hellwig 	 * allocations fail we're in trouble.
1400bb21930SChristoph Hellwig 	 */
1410bb21930SChristoph Hellwig 	if (likely(list_empty_careful(&zi->zi_reclaim_reservations) ||
1420bb21930SChristoph Hellwig 	    (flags & XFS_ZR_RESERVED))) {
1430bb21930SChristoph Hellwig 		error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb,
1440bb21930SChristoph Hellwig 				flags & XFS_ZR_RESERVED);
1450bb21930SChristoph Hellwig 		if (error != -ENOSPC)
1460bb21930SChristoph Hellwig 			return error;
1470bb21930SChristoph Hellwig 	}
1480bb21930SChristoph Hellwig 
1490bb21930SChristoph Hellwig 	if (flags & XFS_ZR_NOWAIT)
1500bb21930SChristoph Hellwig 		return -EAGAIN;
1510bb21930SChristoph Hellwig 
1520bb21930SChristoph Hellwig 	spin_lock(&zi->zi_reservation_lock);
1530bb21930SChristoph Hellwig 	list_add_tail(&reservation.entry, &zi->zi_reclaim_reservations);
1540bb21930SChristoph Hellwig 	while ((error = xfs_zoned_space_wait_error(mp)) == 0) {
1550bb21930SChristoph Hellwig 		set_current_state(TASK_KILLABLE);
1560bb21930SChristoph Hellwig 
1570bb21930SChristoph Hellwig 		error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb,
1580bb21930SChristoph Hellwig 				flags & XFS_ZR_RESERVED);
1590bb21930SChristoph Hellwig 		if (error != -ENOSPC)
1600bb21930SChristoph Hellwig 			break;
1610bb21930SChristoph Hellwig 
162080d01c4SChristoph Hellwig 		/*
163*b7bc8548SHans Holmberg 		 * Make sure to start GC if it is not running already. As we
164*b7bc8548SHans Holmberg 		 * check the rtavailable count when filling up zones, GC is
165*b7bc8548SHans Holmberg 		 * normally already running at this point, but in some setups
166*b7bc8548SHans Holmberg 		 * with very few zones we may completely run out of non-
167*b7bc8548SHans Holmberg 		 * reserved blocks in between filling zones.
168*b7bc8548SHans Holmberg 		 */
169*b7bc8548SHans Holmberg 		if (!xfs_is_zonegc_running(mp))
170*b7bc8548SHans Holmberg 			wake_up_process(zi->zi_gc_thread);
171*b7bc8548SHans Holmberg 
172*b7bc8548SHans Holmberg 		/*
173080d01c4SChristoph Hellwig 		 * If there is no reclaimable group left and we aren't still
174080d01c4SChristoph Hellwig 		 * processing a pending GC request give up as we're fully out
175080d01c4SChristoph Hellwig 		 * of space.
176080d01c4SChristoph Hellwig 		 */
177080d01c4SChristoph Hellwig 		if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE) &&
178080d01c4SChristoph Hellwig 		    !xfs_is_zonegc_running(mp))
179080d01c4SChristoph Hellwig 			break;
180080d01c4SChristoph Hellwig 
1810bb21930SChristoph Hellwig 		spin_unlock(&zi->zi_reservation_lock);
1820bb21930SChristoph Hellwig 		schedule();
1830bb21930SChristoph Hellwig 		spin_lock(&zi->zi_reservation_lock);
1840bb21930SChristoph Hellwig 	}
1850bb21930SChristoph Hellwig 	list_del(&reservation.entry);
1860bb21930SChristoph Hellwig 	spin_unlock(&zi->zi_reservation_lock);
1870bb21930SChristoph Hellwig 
1880bb21930SChristoph Hellwig 	__set_current_state(TASK_RUNNING);
1890bb21930SChristoph Hellwig 	return error;
1900bb21930SChristoph Hellwig }
1910bb21930SChristoph Hellwig 
1920bb21930SChristoph Hellwig /*
1930bb21930SChristoph Hellwig  * Implement greedy space allocation for short writes by trying to grab all
1940bb21930SChristoph Hellwig  * that is left after locking out other threads from trying to do the same.
1950bb21930SChristoph Hellwig  *
1960bb21930SChristoph Hellwig  * This isn't exactly optimal and can hopefully be replaced by a proper
1970bb21930SChristoph Hellwig  * percpu_counter primitive one day.
1980bb21930SChristoph Hellwig  */
1990bb21930SChristoph Hellwig static int
xfs_zoned_reserve_extents_greedy(struct xfs_inode * ip,xfs_filblks_t * count_fsb,unsigned int flags)2000bb21930SChristoph Hellwig xfs_zoned_reserve_extents_greedy(
2010bb21930SChristoph Hellwig 	struct xfs_inode		*ip,
2020bb21930SChristoph Hellwig 	xfs_filblks_t			*count_fsb,
2030bb21930SChristoph Hellwig 	unsigned int			flags)
2040bb21930SChristoph Hellwig {
2050bb21930SChristoph Hellwig 	struct xfs_mount		*mp = ip->i_mount;
2060bb21930SChristoph Hellwig 	struct xfs_zone_info		*zi = mp->m_zone_info;
2070bb21930SChristoph Hellwig 	s64				len = *count_fsb;
2080bb21930SChristoph Hellwig 	int				error = -ENOSPC;
2090bb21930SChristoph Hellwig 
2100bb21930SChristoph Hellwig 	spin_lock(&zi->zi_reservation_lock);
2110bb21930SChristoph Hellwig 	len = min(len, xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS));
2120bb21930SChristoph Hellwig 	if (len > 0) {
2130bb21930SChristoph Hellwig 		*count_fsb = len;
2140bb21930SChristoph Hellwig 		error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, *count_fsb,
2150bb21930SChristoph Hellwig 				flags & XFS_ZR_RESERVED);
2160bb21930SChristoph Hellwig 	}
2170bb21930SChristoph Hellwig 	spin_unlock(&zi->zi_reservation_lock);
2180bb21930SChristoph Hellwig 	return error;
2190bb21930SChristoph Hellwig }
2200bb21930SChristoph Hellwig 
2210bb21930SChristoph Hellwig int
xfs_zoned_space_reserve(struct xfs_inode * ip,xfs_filblks_t count_fsb,unsigned int flags,struct xfs_zone_alloc_ctx * ac)2220bb21930SChristoph Hellwig xfs_zoned_space_reserve(
2230bb21930SChristoph Hellwig 	struct xfs_inode		*ip,
2240bb21930SChristoph Hellwig 	xfs_filblks_t			count_fsb,
2250bb21930SChristoph Hellwig 	unsigned int			flags,
2260bb21930SChristoph Hellwig 	struct xfs_zone_alloc_ctx	*ac)
2270bb21930SChristoph Hellwig {
2280bb21930SChristoph Hellwig 	struct xfs_mount		*mp = ip->i_mount;
2290bb21930SChristoph Hellwig 	int				error;
2300bb21930SChristoph Hellwig 
2310bb21930SChristoph Hellwig 	ASSERT(ac->reserved_blocks == 0);
2320bb21930SChristoph Hellwig 	ASSERT(ac->open_zone == NULL);
2330bb21930SChristoph Hellwig 
2340bb21930SChristoph Hellwig 	error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb,
2350bb21930SChristoph Hellwig 			flags & XFS_ZR_RESERVED);
2360bb21930SChristoph Hellwig 	if (error == -ENOSPC && (flags & XFS_ZR_GREEDY) && count_fsb > 1)
2370bb21930SChristoph Hellwig 		error = xfs_zoned_reserve_extents_greedy(ip, &count_fsb, flags);
2380bb21930SChristoph Hellwig 	if (error)
2390bb21930SChristoph Hellwig 		return error;
2400bb21930SChristoph Hellwig 
2410bb21930SChristoph Hellwig 	error = xfs_zoned_reserve_available(ip, count_fsb, flags);
2420bb21930SChristoph Hellwig 	if (error) {
2430bb21930SChristoph Hellwig 		xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb);
2440bb21930SChristoph Hellwig 		return error;
2450bb21930SChristoph Hellwig 	}
2460bb21930SChristoph Hellwig 	ac->reserved_blocks = count_fsb;
2470bb21930SChristoph Hellwig 	return 0;
2480bb21930SChristoph Hellwig }
2490bb21930SChristoph Hellwig 
2500bb21930SChristoph Hellwig void
xfs_zoned_space_unreserve(struct xfs_inode * ip,struct xfs_zone_alloc_ctx * ac)2510bb21930SChristoph Hellwig xfs_zoned_space_unreserve(
2520bb21930SChristoph Hellwig 	struct xfs_inode		*ip,
2530bb21930SChristoph Hellwig 	struct xfs_zone_alloc_ctx	*ac)
2540bb21930SChristoph Hellwig {
2550bb21930SChristoph Hellwig 	if (ac->reserved_blocks > 0) {
2560bb21930SChristoph Hellwig 		struct xfs_mount	*mp = ip->i_mount;
2570bb21930SChristoph Hellwig 
2580bb21930SChristoph Hellwig 		xfs_zoned_add_available(mp, ac->reserved_blocks);
2590bb21930SChristoph Hellwig 		xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, ac->reserved_blocks);
2600bb21930SChristoph Hellwig 	}
2610bb21930SChristoph Hellwig 	if (ac->open_zone)
2620bb21930SChristoph Hellwig 		xfs_open_zone_put(ac->open_zone);
2630bb21930SChristoph Hellwig }
264