1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2006-2007 Silicon Graphics, Inc. 4 * Copyright (c) 2014 Christoph Hellwig. 5 * All Rights Reserved. 6 */ 7 #include "xfs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_inode.h" 14 #include "xfs_bmap.h" 15 #include "xfs_bmap_util.h" 16 #include "xfs_alloc.h" 17 #include "xfs_mru_cache.h" 18 #include "xfs_trace.h" 19 #include "xfs_ag.h" 20 #include "xfs_ag_resv.h" 21 #include "xfs_trans.h" 22 #include "xfs_filestream.h" 23 24 struct xfs_fstrm_item { 25 struct xfs_mru_cache_elem mru; 26 struct xfs_perag *pag; /* AG in use for this directory */ 27 }; 28 29 enum xfs_fstrm_alloc { 30 XFS_PICK_USERDATA = 1, 31 XFS_PICK_LOWSPACE = 2, 32 }; 33 34 static void 35 xfs_fstrm_free_func( 36 void *data, 37 struct xfs_mru_cache_elem *mru) 38 { 39 struct xfs_fstrm_item *item = 40 container_of(mru, struct xfs_fstrm_item, mru); 41 struct xfs_perag *pag = item->pag; 42 43 trace_xfs_filestream_free(pag, mru->key); 44 atomic_dec(&pag->pagf_fstrms); 45 xfs_perag_rele(pag); 46 47 kfree(item); 48 } 49 50 /* 51 * Scan the AGs starting at start_agno looking for an AG that isn't in use and 52 * has at least minlen blocks free. If no AG is found to match the allocation 53 * requirements, pick the AG with the most free space in it. 54 */ 55 static int 56 xfs_filestream_pick_ag( 57 struct xfs_alloc_arg *args, 58 xfs_ino_t pino, 59 xfs_agnumber_t start_agno, 60 int flags, 61 xfs_extlen_t *longest) 62 { 63 struct xfs_mount *mp = args->mp; 64 struct xfs_perag *pag; 65 struct xfs_perag *max_pag = NULL; 66 xfs_extlen_t minlen = *longest; 67 xfs_extlen_t minfree, maxfree = 0; 68 xfs_agnumber_t agno; 69 bool first_pass = true; 70 71 /* 2% of an AG's blocks must be free for it to be chosen. */ 72 minfree = mp->m_sb.sb_agblocks / 50; 73 74 restart: 75 for_each_perag_wrap(mp, start_agno, agno, pag) { 76 int err; 77 78 trace_xfs_filestream_scan(pag, pino); 79 80 *longest = 0; 81 err = xfs_bmap_longest_free_extent(pag, NULL, longest); 82 if (err) { 83 if (err == -EAGAIN) { 84 /* Couldn't lock the AGF, skip this AG. */ 85 err = 0; 86 continue; 87 } 88 xfs_perag_rele(pag); 89 if (max_pag) 90 xfs_perag_rele(max_pag); 91 return err; 92 } 93 94 /* Keep track of the AG with the most free blocks. */ 95 if (pag->pagf_freeblks > maxfree) { 96 maxfree = pag->pagf_freeblks; 97 if (max_pag) 98 xfs_perag_rele(max_pag); 99 atomic_inc(&pag_group(pag)->xg_active_ref); 100 max_pag = pag; 101 } 102 103 /* 104 * The AG reference count does two things: it enforces mutual 105 * exclusion when examining the suitability of an AG in this 106 * loop, and it guards against two filestreams being established 107 * in the same AG as each other. 108 */ 109 if (atomic_inc_return(&pag->pagf_fstrms) <= 1) { 110 if (((minlen && *longest >= minlen) || 111 (!minlen && pag->pagf_freeblks >= minfree)) && 112 (!xfs_perag_prefers_metadata(pag) || 113 !(flags & XFS_PICK_USERDATA) || 114 (flags & XFS_PICK_LOWSPACE))) { 115 /* Break out, retaining the reference on the AG. */ 116 if (max_pag) 117 xfs_perag_rele(max_pag); 118 goto done; 119 } 120 } 121 122 /* Drop the reference on this AG, it's not usable. */ 123 atomic_dec(&pag->pagf_fstrms); 124 } 125 126 /* 127 * Allow a second pass to give xfs_bmap_longest_free_extent() another 128 * attempt at locking AGFs that it might have skipped over before we 129 * fail. 130 */ 131 if (first_pass) { 132 first_pass = false; 133 goto restart; 134 } 135 136 /* 137 * We must be low on data space, so run a final lowspace optimised 138 * selection pass if we haven't already. 139 */ 140 if (!(flags & XFS_PICK_LOWSPACE)) { 141 flags |= XFS_PICK_LOWSPACE; 142 goto restart; 143 } 144 145 /* 146 * No unassociated AGs are available, so select the AG with the most 147 * free space, regardless of whether it's already in use by another 148 * filestream. It none suit, just use whatever AG we can grab. 149 */ 150 if (!max_pag) { 151 for_each_perag_wrap(args->mp, 0, start_agno, pag) { 152 max_pag = pag; 153 break; 154 } 155 156 /* Bail if there are no AGs at all to select from. */ 157 if (!max_pag) 158 return -ENOSPC; 159 } 160 161 pag = max_pag; 162 atomic_inc(&pag->pagf_fstrms); 163 done: 164 trace_xfs_filestream_pick(pag, pino); 165 args->pag = pag; 166 return 0; 167 } 168 169 static struct xfs_inode * 170 xfs_filestream_get_parent( 171 struct xfs_inode *ip) 172 { 173 struct inode *inode = VFS_I(ip), *dir = NULL; 174 struct dentry *dentry, *parent; 175 176 dentry = d_find_alias(inode); 177 if (!dentry) 178 goto out; 179 180 parent = dget_parent(dentry); 181 if (!parent) 182 goto out_dput; 183 184 dir = igrab(d_inode(parent)); 185 dput(parent); 186 187 out_dput: 188 dput(dentry); 189 out: 190 return dir ? XFS_I(dir) : NULL; 191 } 192 193 /* 194 * Lookup the mru cache for an existing association. If one exists and we can 195 * use it, return with an active perag reference indicating that the allocation 196 * will proceed with that association. 197 * 198 * If we have no association, or we cannot use the current one and have to 199 * destroy it, return with longest = 0 to tell the caller to create a new 200 * association. 201 */ 202 static int 203 xfs_filestream_lookup_association( 204 struct xfs_bmalloca *ap, 205 struct xfs_alloc_arg *args, 206 xfs_ino_t pino, 207 xfs_extlen_t *longest) 208 { 209 struct xfs_mount *mp = args->mp; 210 struct xfs_perag *pag; 211 struct xfs_mru_cache_elem *mru; 212 int error = 0; 213 214 *longest = 0; 215 mru = xfs_mru_cache_lookup(mp->m_filestream, pino); 216 if (!mru) 217 return 0; 218 /* 219 * Grab the pag and take an extra active reference for the caller whilst 220 * the mru item cannot go away. This means we'll pin the perag with 221 * the reference we get here even if the filestreams association is torn 222 * down immediately after we mark the lookup as done. 223 */ 224 pag = container_of(mru, struct xfs_fstrm_item, mru)->pag; 225 atomic_inc(&pag_group(pag)->xg_active_ref); 226 xfs_mru_cache_done(mp->m_filestream); 227 228 trace_xfs_filestream_lookup(pag, ap->ip->i_ino); 229 230 ap->blkno = xfs_agbno_to_fsb(pag, 0); 231 xfs_bmap_adjacent(ap); 232 233 /* 234 * If there is very little free space before we start a filestreams 235 * allocation, we're almost guaranteed to fail to find a large enough 236 * free space available so just use the cached AG. 237 */ 238 if (ap->tp->t_flags & XFS_TRANS_LOWMODE) { 239 *longest = 1; 240 goto out_done; 241 } 242 243 error = xfs_bmap_longest_free_extent(pag, args->tp, longest); 244 if (error == -EAGAIN) 245 error = 0; 246 if (error || *longest < args->maxlen) { 247 /* We aren't going to use this perag */ 248 *longest = 0; 249 xfs_perag_rele(pag); 250 return error; 251 } 252 253 out_done: 254 args->pag = pag; 255 return 0; 256 } 257 258 static int 259 xfs_filestream_create_association( 260 struct xfs_bmalloca *ap, 261 struct xfs_alloc_arg *args, 262 xfs_ino_t pino, 263 xfs_extlen_t *longest) 264 { 265 struct xfs_mount *mp = args->mp; 266 struct xfs_mru_cache_elem *mru; 267 struct xfs_fstrm_item *item; 268 xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, pino); 269 int flags = 0; 270 int error; 271 272 /* Changing parent AG association now, so remove the existing one. */ 273 mru = xfs_mru_cache_remove(mp->m_filestream, pino); 274 if (mru) { 275 struct xfs_fstrm_item *item = 276 container_of(mru, struct xfs_fstrm_item, mru); 277 278 agno = (pag_agno(item->pag) + 1) % mp->m_sb.sb_agcount; 279 xfs_fstrm_free_func(mp, mru); 280 } else if (xfs_is_inode32(mp)) { 281 xfs_agnumber_t rotorstep = xfs_rotorstep; 282 283 agno = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount; 284 mp->m_agfrotor = (mp->m_agfrotor + 1) % 285 (mp->m_sb.sb_agcount * rotorstep); 286 } 287 288 ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0); 289 xfs_bmap_adjacent(ap); 290 291 if (ap->datatype & XFS_ALLOC_USERDATA) 292 flags |= XFS_PICK_USERDATA; 293 if (ap->tp->t_flags & XFS_TRANS_LOWMODE) 294 flags |= XFS_PICK_LOWSPACE; 295 296 *longest = ap->length; 297 error = xfs_filestream_pick_ag(args, pino, agno, flags, longest); 298 if (error) 299 return error; 300 301 /* 302 * We are going to use this perag now, so create an assoication for it. 303 * xfs_filestream_pick_ag() has already bumped the perag fstrms counter 304 * for us, so all we need to do here is take another active reference to 305 * the perag for the cached association. 306 * 307 * If we fail to store the association, we need to drop the fstrms 308 * counter as well as drop the perag reference we take here for the 309 * item. We do not need to return an error for this failure - as long as 310 * we return a referenced AG, the allocation can still go ahead just 311 * fine. 312 */ 313 item = kmalloc(sizeof(*item), GFP_KERNEL | __GFP_RETRY_MAYFAIL); 314 if (!item) 315 goto out_put_fstrms; 316 317 atomic_inc(&pag_group(args->pag)->xg_active_ref); 318 item->pag = args->pag; 319 error = xfs_mru_cache_insert(mp->m_filestream, pino, &item->mru); 320 if (error) 321 goto out_free_item; 322 return 0; 323 324 out_free_item: 325 xfs_perag_rele(item->pag); 326 kfree(item); 327 out_put_fstrms: 328 atomic_dec(&args->pag->pagf_fstrms); 329 return 0; 330 } 331 332 /* 333 * Search for an allocation group with a single extent large enough for 334 * the request. First we look for an existing association and use that if it 335 * is found. Otherwise, we create a new association by selecting an AG that fits 336 * the allocation criteria. 337 * 338 * We return with a referenced perag in args->pag to indicate which AG we are 339 * allocating into or an error with no references held. 340 */ 341 int 342 xfs_filestream_select_ag( 343 struct xfs_bmalloca *ap, 344 struct xfs_alloc_arg *args, 345 xfs_extlen_t *longest) 346 { 347 struct xfs_inode *pip; 348 xfs_ino_t ino = 0; 349 int error = 0; 350 351 *longest = 0; 352 args->total = ap->total; 353 pip = xfs_filestream_get_parent(ap->ip); 354 if (pip) { 355 ino = pip->i_ino; 356 error = xfs_filestream_lookup_association(ap, args, ino, 357 longest); 358 xfs_irele(pip); 359 if (error) 360 return error; 361 if (*longest >= args->maxlen) 362 goto out_select; 363 if (ap->tp->t_flags & XFS_TRANS_LOWMODE) 364 goto out_select; 365 } 366 367 error = xfs_filestream_create_association(ap, args, ino, longest); 368 if (error) 369 return error; 370 371 out_select: 372 ap->blkno = xfs_agbno_to_fsb(args->pag, 0); 373 return 0; 374 } 375 376 void 377 xfs_filestream_deassociate( 378 struct xfs_inode *ip) 379 { 380 xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino); 381 } 382 383 int 384 xfs_filestream_mount( 385 xfs_mount_t *mp) 386 { 387 /* 388 * The filestream timer tunable is currently fixed within the range of 389 * one second to four minutes, with five seconds being the default. The 390 * group count is somewhat arbitrary, but it'd be nice to adhere to the 391 * timer tunable to within about 10 percent. This requires at least 10 392 * groups. 393 */ 394 return xfs_mru_cache_create(&mp->m_filestream, mp, 395 xfs_fstrm_centisecs * 10, 10, xfs_fstrm_free_func); 396 } 397 398 void 399 xfs_filestream_unmount( 400 xfs_mount_t *mp) 401 { 402 xfs_mru_cache_destroy(mp->m_filestream); 403 } 404