1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2006-2007 Silicon Graphics, Inc. 4 * Copyright (c) 2014 Christoph Hellwig. 5 * All Rights Reserved. 6 */ 7 #include "xfs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_inode.h" 14 #include "xfs_bmap.h" 15 #include "xfs_bmap_util.h" 16 #include "xfs_alloc.h" 17 #include "xfs_mru_cache.h" 18 #include "xfs_trace.h" 19 #include "xfs_ag.h" 20 #include "xfs_ag_resv.h" 21 #include "xfs_trans.h" 22 #include "xfs_filestream.h" 23 24 struct xfs_fstrm_item { 25 struct xfs_mru_cache_elem mru; 26 struct xfs_perag *pag; /* AG in use for this directory */ 27 }; 28 29 enum xfs_fstrm_alloc { 30 XFS_PICK_USERDATA = 1, 31 XFS_PICK_LOWSPACE = 2, 32 }; 33 34 static void 35 xfs_fstrm_free_func( 36 void *data, 37 struct xfs_mru_cache_elem *mru) 38 { 39 struct xfs_fstrm_item *item = 40 container_of(mru, struct xfs_fstrm_item, mru); 41 struct xfs_perag *pag = item->pag; 42 43 trace_xfs_filestream_free(pag, mru->key); 44 atomic_dec(&pag->pagf_fstrms); 45 xfs_perag_rele(pag); 46 47 kfree(item); 48 } 49 50 /* 51 * Scan the AGs starting at start_agno looking for an AG that isn't in use and 52 * has at least minlen blocks free. If no AG is found to match the allocation 53 * requirements, pick the AG with the most free space in it. 54 */ 55 static int 56 xfs_filestream_pick_ag( 57 struct xfs_alloc_arg *args, 58 xfs_ino_t pino, 59 xfs_agnumber_t start_agno, 60 int flags, 61 xfs_extlen_t *longest) 62 { 63 struct xfs_mount *mp = args->mp; 64 struct xfs_perag *pag; 65 struct xfs_perag *max_pag = NULL; 66 xfs_extlen_t minlen = *longest; 67 xfs_extlen_t free = 0, minfree, maxfree = 0; 68 xfs_agnumber_t agno; 69 bool first_pass = true; 70 int err; 71 72 /* 2% of an AG's blocks must be free for it to be chosen. */ 73 minfree = mp->m_sb.sb_agblocks / 50; 74 75 restart: 76 for_each_perag_wrap(mp, start_agno, agno, pag) { 77 trace_xfs_filestream_scan(pag, pino); 78 *longest = 0; 79 err = xfs_bmap_longest_free_extent(pag, NULL, longest); 80 if (err) { 81 if (err != -EAGAIN) 82 break; 83 /* Couldn't lock the AGF, skip this AG. */ 84 err = 0; 85 continue; 86 } 87 88 /* Keep track of the AG with the most free blocks. */ 89 if (pag->pagf_freeblks > maxfree) { 90 maxfree = pag->pagf_freeblks; 91 if (max_pag) 92 xfs_perag_rele(max_pag); 93 atomic_inc(&pag->pag_active_ref); 94 max_pag = pag; 95 } 96 97 /* 98 * The AG reference count does two things: it enforces mutual 99 * exclusion when examining the suitability of an AG in this 100 * loop, and it guards against two filestreams being established 101 * in the same AG as each other. 102 */ 103 if (atomic_inc_return(&pag->pagf_fstrms) <= 1) { 104 if (((minlen && *longest >= minlen) || 105 (!minlen && pag->pagf_freeblks >= minfree)) && 106 (!xfs_perag_prefers_metadata(pag) || 107 !(flags & XFS_PICK_USERDATA) || 108 (flags & XFS_PICK_LOWSPACE))) { 109 /* Break out, retaining the reference on the AG. */ 110 free = pag->pagf_freeblks; 111 break; 112 } 113 } 114 115 /* Drop the reference on this AG, it's not usable. */ 116 atomic_dec(&pag->pagf_fstrms); 117 } 118 119 if (err) { 120 xfs_perag_rele(pag); 121 if (max_pag) 122 xfs_perag_rele(max_pag); 123 return err; 124 } 125 126 if (!pag) { 127 /* 128 * Allow a second pass to give xfs_bmap_longest_free_extent() 129 * another attempt at locking AGFs that it might have skipped 130 * over before we fail. 131 */ 132 if (first_pass) { 133 first_pass = false; 134 goto restart; 135 } 136 137 /* 138 * We must be low on data space, so run a final lowspace 139 * optimised selection pass if we haven't already. 140 */ 141 if (!(flags & XFS_PICK_LOWSPACE)) { 142 flags |= XFS_PICK_LOWSPACE; 143 goto restart; 144 } 145 146 /* 147 * No unassociated AGs are available, so select the AG with the 148 * most free space, regardless of whether it's already in use by 149 * another filestream. It none suit, just use whatever AG we can 150 * grab. 151 */ 152 if (!max_pag) { 153 for_each_perag_wrap(args->mp, 0, start_agno, args->pag) 154 break; 155 atomic_inc(&args->pag->pagf_fstrms); 156 *longest = 0; 157 } else { 158 pag = max_pag; 159 free = maxfree; 160 atomic_inc(&pag->pagf_fstrms); 161 } 162 } else if (max_pag) { 163 xfs_perag_rele(max_pag); 164 } 165 166 trace_xfs_filestream_pick(pag, pino, free); 167 args->pag = pag; 168 return 0; 169 170 } 171 172 static struct xfs_inode * 173 xfs_filestream_get_parent( 174 struct xfs_inode *ip) 175 { 176 struct inode *inode = VFS_I(ip), *dir = NULL; 177 struct dentry *dentry, *parent; 178 179 dentry = d_find_alias(inode); 180 if (!dentry) 181 goto out; 182 183 parent = dget_parent(dentry); 184 if (!parent) 185 goto out_dput; 186 187 dir = igrab(d_inode(parent)); 188 dput(parent); 189 190 out_dput: 191 dput(dentry); 192 out: 193 return dir ? XFS_I(dir) : NULL; 194 } 195 196 /* 197 * Lookup the mru cache for an existing association. If one exists and we can 198 * use it, return with an active perag reference indicating that the allocation 199 * will proceed with that association. 200 * 201 * If we have no association, or we cannot use the current one and have to 202 * destroy it, return with longest = 0 to tell the caller to create a new 203 * association. 204 */ 205 static int 206 xfs_filestream_lookup_association( 207 struct xfs_bmalloca *ap, 208 struct xfs_alloc_arg *args, 209 xfs_ino_t pino, 210 xfs_extlen_t *longest) 211 { 212 struct xfs_mount *mp = args->mp; 213 struct xfs_perag *pag; 214 struct xfs_mru_cache_elem *mru; 215 int error = 0; 216 217 *longest = 0; 218 mru = xfs_mru_cache_lookup(mp->m_filestream, pino); 219 if (!mru) 220 return 0; 221 /* 222 * Grab the pag and take an extra active reference for the caller whilst 223 * the mru item cannot go away. This means we'll pin the perag with 224 * the reference we get here even if the filestreams association is torn 225 * down immediately after we mark the lookup as done. 226 */ 227 pag = container_of(mru, struct xfs_fstrm_item, mru)->pag; 228 atomic_inc(&pag->pag_active_ref); 229 xfs_mru_cache_done(mp->m_filestream); 230 231 trace_xfs_filestream_lookup(pag, ap->ip->i_ino); 232 233 ap->blkno = XFS_AGB_TO_FSB(args->mp, pag->pag_agno, 0); 234 xfs_bmap_adjacent(ap); 235 236 /* 237 * If there is very little free space before we start a filestreams 238 * allocation, we're almost guaranteed to fail to find a large enough 239 * free space available so just use the cached AG. 240 */ 241 if (ap->tp->t_flags & XFS_TRANS_LOWMODE) { 242 *longest = 1; 243 goto out_done; 244 } 245 246 error = xfs_bmap_longest_free_extent(pag, args->tp, longest); 247 if (error == -EAGAIN) 248 error = 0; 249 if (error || *longest < args->maxlen) { 250 /* We aren't going to use this perag */ 251 *longest = 0; 252 xfs_perag_rele(pag); 253 return error; 254 } 255 256 out_done: 257 args->pag = pag; 258 return 0; 259 } 260 261 static int 262 xfs_filestream_create_association( 263 struct xfs_bmalloca *ap, 264 struct xfs_alloc_arg *args, 265 xfs_ino_t pino, 266 xfs_extlen_t *longest) 267 { 268 struct xfs_mount *mp = args->mp; 269 struct xfs_mru_cache_elem *mru; 270 struct xfs_fstrm_item *item; 271 xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, pino); 272 int flags = 0; 273 int error; 274 275 /* Changing parent AG association now, so remove the existing one. */ 276 mru = xfs_mru_cache_remove(mp->m_filestream, pino); 277 if (mru) { 278 struct xfs_fstrm_item *item = 279 container_of(mru, struct xfs_fstrm_item, mru); 280 281 agno = (item->pag->pag_agno + 1) % mp->m_sb.sb_agcount; 282 xfs_fstrm_free_func(mp, mru); 283 } else if (xfs_is_inode32(mp)) { 284 xfs_agnumber_t rotorstep = xfs_rotorstep; 285 286 agno = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount; 287 mp->m_agfrotor = (mp->m_agfrotor + 1) % 288 (mp->m_sb.sb_agcount * rotorstep); 289 } 290 291 ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0); 292 xfs_bmap_adjacent(ap); 293 294 if (ap->datatype & XFS_ALLOC_USERDATA) 295 flags |= XFS_PICK_USERDATA; 296 if (ap->tp->t_flags & XFS_TRANS_LOWMODE) 297 flags |= XFS_PICK_LOWSPACE; 298 299 *longest = ap->length; 300 error = xfs_filestream_pick_ag(args, pino, agno, flags, longest); 301 if (error) 302 return error; 303 304 /* 305 * We are going to use this perag now, so create an assoication for it. 306 * xfs_filestream_pick_ag() has already bumped the perag fstrms counter 307 * for us, so all we need to do here is take another active reference to 308 * the perag for the cached association. 309 * 310 * If we fail to store the association, we need to drop the fstrms 311 * counter as well as drop the perag reference we take here for the 312 * item. We do not need to return an error for this failure - as long as 313 * we return a referenced AG, the allocation can still go ahead just 314 * fine. 315 */ 316 item = kmalloc(sizeof(*item), GFP_KERNEL | __GFP_RETRY_MAYFAIL); 317 if (!item) 318 goto out_put_fstrms; 319 320 atomic_inc(&args->pag->pag_active_ref); 321 item->pag = args->pag; 322 error = xfs_mru_cache_insert(mp->m_filestream, pino, &item->mru); 323 if (error) 324 goto out_free_item; 325 return 0; 326 327 out_free_item: 328 xfs_perag_rele(item->pag); 329 kfree(item); 330 out_put_fstrms: 331 atomic_dec(&args->pag->pagf_fstrms); 332 return 0; 333 } 334 335 /* 336 * Search for an allocation group with a single extent large enough for 337 * the request. First we look for an existing association and use that if it 338 * is found. Otherwise, we create a new association by selecting an AG that fits 339 * the allocation criteria. 340 * 341 * We return with a referenced perag in args->pag to indicate which AG we are 342 * allocating into or an error with no references held. 343 */ 344 int 345 xfs_filestream_select_ag( 346 struct xfs_bmalloca *ap, 347 struct xfs_alloc_arg *args, 348 xfs_extlen_t *longest) 349 { 350 struct xfs_mount *mp = args->mp; 351 struct xfs_inode *pip; 352 xfs_ino_t ino = 0; 353 int error = 0; 354 355 *longest = 0; 356 args->total = ap->total; 357 pip = xfs_filestream_get_parent(ap->ip); 358 if (pip) { 359 ino = pip->i_ino; 360 error = xfs_filestream_lookup_association(ap, args, ino, 361 longest); 362 xfs_irele(pip); 363 if (error) 364 return error; 365 if (*longest >= args->maxlen) 366 goto out_select; 367 if (ap->tp->t_flags & XFS_TRANS_LOWMODE) 368 goto out_select; 369 } 370 371 error = xfs_filestream_create_association(ap, args, ino, longest); 372 if (error) 373 return error; 374 375 out_select: 376 ap->blkno = XFS_AGB_TO_FSB(mp, args->pag->pag_agno, 0); 377 return 0; 378 } 379 380 void 381 xfs_filestream_deassociate( 382 struct xfs_inode *ip) 383 { 384 xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino); 385 } 386 387 int 388 xfs_filestream_mount( 389 xfs_mount_t *mp) 390 { 391 /* 392 * The filestream timer tunable is currently fixed within the range of 393 * one second to four minutes, with five seconds being the default. The 394 * group count is somewhat arbitrary, but it'd be nice to adhere to the 395 * timer tunable to within about 10 percent. This requires at least 10 396 * groups. 397 */ 398 return xfs_mru_cache_create(&mp->m_filestream, mp, 399 xfs_fstrm_centisecs * 10, 10, xfs_fstrm_free_func); 400 } 401 402 void 403 xfs_filestream_unmount( 404 xfs_mount_t *mp) 405 { 406 xfs_mru_cache_destroy(mp->m_filestream); 407 } 408