1 /* 2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_types.h" 21 #include "xfs_bit.h" 22 #include "xfs_log.h" 23 #include "xfs_inum.h" 24 #include "xfs_trans.h" 25 #include "xfs_sb.h" 26 #include "xfs_ag.h" 27 #include "xfs_dir2.h" 28 #include "xfs_dmapi.h" 29 #include "xfs_mount.h" 30 #include "xfs_error.h" 31 #include "xfs_da_btree.h" 32 #include "xfs_bmap_btree.h" 33 #include "xfs_alloc_btree.h" 34 #include "xfs_ialloc_btree.h" 35 #include "xfs_dir2_sf.h" 36 #include "xfs_attr_sf.h" 37 #include "xfs_dinode.h" 38 #include "xfs_inode.h" 39 #include "xfs_btree.h" 40 #include "xfs_ialloc.h" 41 #include "xfs_alloc.h" 42 #include "xfs_bmap.h" 43 #include "xfs_quota.h" 44 #include "xfs_trans_priv.h" 45 #include "xfs_trans_space.h" 46 #include "xfs_inode_item.h" 47 #include "xfs_trace.h" 48 49 kmem_zone_t *xfs_trans_zone; 50 51 52 /* 53 * Various log reservation values. 54 * 55 * These are based on the size of the file system block because that is what 56 * most transactions manipulate. Each adds in an additional 128 bytes per 57 * item logged to try to account for the overhead of the transaction mechanism. 58 * 59 * Note: Most of the reservations underestimate the number of allocation 60 * groups into which they could free extents in the xfs_bmap_finish() call. 61 * This is because the number in the worst case is quite high and quite 62 * unusual. In order to fix this we need to change xfs_bmap_finish() to free 63 * extents in only a single AG at a time. This will require changes to the 64 * EFI code as well, however, so that the EFI for the extents not freed is 65 * logged again in each transaction. See SGI PV #261917. 66 * 67 * Reservation functions here avoid a huge stack in xfs_trans_init due to 68 * register overflow from temporaries in the calculations. 69 */ 70 71 72 /* 73 * In a write transaction we can allocate a maximum of 2 74 * extents. This gives: 75 * the inode getting the new extents: inode size 76 * the inode's bmap btree: max depth * block size 77 * the agfs of the ags from which the extents are allocated: 2 * sector 78 * the superblock free block counter: sector size 79 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size 80 * And the bmap_finish transaction can free bmap blocks in a join: 81 * the agfs of the ags containing the blocks: 2 * sector size 82 * the agfls of the ags containing the blocks: 2 * sector size 83 * the super block free block counter: sector size 84 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size 85 */ 86 STATIC uint 87 xfs_calc_write_reservation( 88 struct xfs_mount *mp) 89 { 90 return XFS_DQUOT_LOGRES(mp) + 91 MAX((mp->m_sb.sb_inodesize + 92 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + 93 2 * mp->m_sb.sb_sectsize + 94 mp->m_sb.sb_sectsize + 95 XFS_ALLOCFREE_LOG_RES(mp, 2) + 96 128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 97 XFS_ALLOCFREE_LOG_COUNT(mp, 2))), 98 (2 * mp->m_sb.sb_sectsize + 99 2 * mp->m_sb.sb_sectsize + 100 mp->m_sb.sb_sectsize + 101 XFS_ALLOCFREE_LOG_RES(mp, 2) + 102 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))); 103 } 104 105 /* 106 * In truncating a file we free up to two extents at once. We can modify: 107 * the inode being truncated: inode size 108 * the inode's bmap btree: (max depth + 1) * block size 109 * And the bmap_finish transaction can free the blocks and bmap blocks: 110 * the agf for each of the ags: 4 * sector size 111 * the agfl for each of the ags: 4 * sector size 112 * the super block to reflect the freed blocks: sector size 113 * worst case split in allocation btrees per extent assuming 4 extents: 114 * 4 exts * 2 trees * (2 * max depth - 1) * block size 115 * the inode btree: max depth * blocksize 116 * the allocation btrees: 2 trees * (max depth - 1) * block size 117 */ 118 STATIC uint 119 xfs_calc_itruncate_reservation( 120 struct xfs_mount *mp) 121 { 122 return XFS_DQUOT_LOGRES(mp) + 123 MAX((mp->m_sb.sb_inodesize + 124 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + 125 128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))), 126 (4 * mp->m_sb.sb_sectsize + 127 4 * mp->m_sb.sb_sectsize + 128 mp->m_sb.sb_sectsize + 129 XFS_ALLOCFREE_LOG_RES(mp, 4) + 130 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)) + 131 128 * 5 + 132 XFS_ALLOCFREE_LOG_RES(mp, 1) + 133 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + 134 XFS_ALLOCFREE_LOG_COUNT(mp, 1)))); 135 } 136 137 /* 138 * In renaming a files we can modify: 139 * the four inodes involved: 4 * inode size 140 * the two directory btrees: 2 * (max depth + v2) * dir block size 141 * the two directory bmap btrees: 2 * max depth * block size 142 * And the bmap_finish transaction can free dir and bmap blocks (two sets 143 * of bmap blocks) giving: 144 * the agf for the ags in which the blocks live: 3 * sector size 145 * the agfl for the ags in which the blocks live: 3 * sector size 146 * the superblock for the free block count: sector size 147 * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size 148 */ 149 STATIC uint 150 xfs_calc_rename_reservation( 151 struct xfs_mount *mp) 152 { 153 return XFS_DQUOT_LOGRES(mp) + 154 MAX((4 * mp->m_sb.sb_inodesize + 155 2 * XFS_DIROP_LOG_RES(mp) + 156 128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp))), 157 (3 * mp->m_sb.sb_sectsize + 158 3 * mp->m_sb.sb_sectsize + 159 mp->m_sb.sb_sectsize + 160 XFS_ALLOCFREE_LOG_RES(mp, 3) + 161 128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3)))); 162 } 163 164 /* 165 * For creating a link to an inode: 166 * the parent directory inode: inode size 167 * the linked inode: inode size 168 * the directory btree could split: (max depth + v2) * dir block size 169 * the directory bmap btree could join or split: (max depth + v2) * blocksize 170 * And the bmap_finish transaction can free some bmap blocks giving: 171 * the agf for the ag in which the blocks live: sector size 172 * the agfl for the ag in which the blocks live: sector size 173 * the superblock for the free block count: sector size 174 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size 175 */ 176 STATIC uint 177 xfs_calc_link_reservation( 178 struct xfs_mount *mp) 179 { 180 return XFS_DQUOT_LOGRES(mp) + 181 MAX((mp->m_sb.sb_inodesize + 182 mp->m_sb.sb_inodesize + 183 XFS_DIROP_LOG_RES(mp) + 184 128 * (2 + XFS_DIROP_LOG_COUNT(mp))), 185 (mp->m_sb.sb_sectsize + 186 mp->m_sb.sb_sectsize + 187 mp->m_sb.sb_sectsize + 188 XFS_ALLOCFREE_LOG_RES(mp, 1) + 189 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1)))); 190 } 191 192 /* 193 * For removing a directory entry we can modify: 194 * the parent directory inode: inode size 195 * the removed inode: inode size 196 * the directory btree could join: (max depth + v2) * dir block size 197 * the directory bmap btree could join or split: (max depth + v2) * blocksize 198 * And the bmap_finish transaction can free the dir and bmap blocks giving: 199 * the agf for the ag in which the blocks live: 2 * sector size 200 * the agfl for the ag in which the blocks live: 2 * sector size 201 * the superblock for the free block count: sector size 202 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size 203 */ 204 STATIC uint 205 xfs_calc_remove_reservation( 206 struct xfs_mount *mp) 207 { 208 return XFS_DQUOT_LOGRES(mp) + 209 MAX((mp->m_sb.sb_inodesize + 210 mp->m_sb.sb_inodesize + 211 XFS_DIROP_LOG_RES(mp) + 212 128 * (2 + XFS_DIROP_LOG_COUNT(mp))), 213 (2 * mp->m_sb.sb_sectsize + 214 2 * mp->m_sb.sb_sectsize + 215 mp->m_sb.sb_sectsize + 216 XFS_ALLOCFREE_LOG_RES(mp, 2) + 217 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))); 218 } 219 220 /* 221 * For symlink we can modify: 222 * the parent directory inode: inode size 223 * the new inode: inode size 224 * the inode btree entry: 1 block 225 * the directory btree: (max depth + v2) * dir block size 226 * the directory inode's bmap btree: (max depth + v2) * block size 227 * the blocks for the symlink: 1 kB 228 * Or in the first xact we allocate some inodes giving: 229 * the agi and agf of the ag getting the new inodes: 2 * sectorsize 230 * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize 231 * the inode btree: max depth * blocksize 232 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size 233 */ 234 STATIC uint 235 xfs_calc_symlink_reservation( 236 struct xfs_mount *mp) 237 { 238 return XFS_DQUOT_LOGRES(mp) + 239 MAX((mp->m_sb.sb_inodesize + 240 mp->m_sb.sb_inodesize + 241 XFS_FSB_TO_B(mp, 1) + 242 XFS_DIROP_LOG_RES(mp) + 243 1024 + 244 128 * (4 + XFS_DIROP_LOG_COUNT(mp))), 245 (2 * mp->m_sb.sb_sectsize + 246 XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) + 247 XFS_FSB_TO_B(mp, mp->m_in_maxlevels) + 248 XFS_ALLOCFREE_LOG_RES(mp, 1) + 249 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + 250 XFS_ALLOCFREE_LOG_COUNT(mp, 1)))); 251 } 252 253 /* 254 * For create we can modify: 255 * the parent directory inode: inode size 256 * the new inode: inode size 257 * the inode btree entry: block size 258 * the superblock for the nlink flag: sector size 259 * the directory btree: (max depth + v2) * dir block size 260 * the directory inode's bmap btree: (max depth + v2) * block size 261 * Or in the first xact we allocate some inodes giving: 262 * the agi and agf of the ag getting the new inodes: 2 * sectorsize 263 * the superblock for the nlink flag: sector size 264 * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize 265 * the inode btree: max depth * blocksize 266 * the allocation btrees: 2 trees * (max depth - 1) * block size 267 */ 268 STATIC uint 269 xfs_calc_create_reservation( 270 struct xfs_mount *mp) 271 { 272 return XFS_DQUOT_LOGRES(mp) + 273 MAX((mp->m_sb.sb_inodesize + 274 mp->m_sb.sb_inodesize + 275 mp->m_sb.sb_sectsize + 276 XFS_FSB_TO_B(mp, 1) + 277 XFS_DIROP_LOG_RES(mp) + 278 128 * (3 + XFS_DIROP_LOG_COUNT(mp))), 279 (3 * mp->m_sb.sb_sectsize + 280 XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) + 281 XFS_FSB_TO_B(mp, mp->m_in_maxlevels) + 282 XFS_ALLOCFREE_LOG_RES(mp, 1) + 283 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + 284 XFS_ALLOCFREE_LOG_COUNT(mp, 1)))); 285 } 286 287 /* 288 * Making a new directory is the same as creating a new file. 289 */ 290 STATIC uint 291 xfs_calc_mkdir_reservation( 292 struct xfs_mount *mp) 293 { 294 return xfs_calc_create_reservation(mp); 295 } 296 297 /* 298 * In freeing an inode we can modify: 299 * the inode being freed: inode size 300 * the super block free inode counter: sector size 301 * the agi hash list and counters: sector size 302 * the inode btree entry: block size 303 * the on disk inode before ours in the agi hash list: inode cluster size 304 * the inode btree: max depth * blocksize 305 * the allocation btrees: 2 trees * (max depth - 1) * block size 306 */ 307 STATIC uint 308 xfs_calc_ifree_reservation( 309 struct xfs_mount *mp) 310 { 311 return XFS_DQUOT_LOGRES(mp) + 312 mp->m_sb.sb_inodesize + 313 mp->m_sb.sb_sectsize + 314 mp->m_sb.sb_sectsize + 315 XFS_FSB_TO_B(mp, 1) + 316 MAX((__uint16_t)XFS_FSB_TO_B(mp, 1), 317 XFS_INODE_CLUSTER_SIZE(mp)) + 318 128 * 5 + 319 XFS_ALLOCFREE_LOG_RES(mp, 1) + 320 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + 321 XFS_ALLOCFREE_LOG_COUNT(mp, 1)); 322 } 323 324 /* 325 * When only changing the inode we log the inode and possibly the superblock 326 * We also add a bit of slop for the transaction stuff. 327 */ 328 STATIC uint 329 xfs_calc_ichange_reservation( 330 struct xfs_mount *mp) 331 { 332 return XFS_DQUOT_LOGRES(mp) + 333 mp->m_sb.sb_inodesize + 334 mp->m_sb.sb_sectsize + 335 512; 336 337 } 338 339 /* 340 * Growing the data section of the filesystem. 341 * superblock 342 * agi and agf 343 * allocation btrees 344 */ 345 STATIC uint 346 xfs_calc_growdata_reservation( 347 struct xfs_mount *mp) 348 { 349 return mp->m_sb.sb_sectsize * 3 + 350 XFS_ALLOCFREE_LOG_RES(mp, 1) + 351 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1)); 352 } 353 354 /* 355 * Growing the rt section of the filesystem. 356 * In the first set of transactions (ALLOC) we allocate space to the 357 * bitmap or summary files. 358 * superblock: sector size 359 * agf of the ag from which the extent is allocated: sector size 360 * bmap btree for bitmap/summary inode: max depth * blocksize 361 * bitmap/summary inode: inode size 362 * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize 363 */ 364 STATIC uint 365 xfs_calc_growrtalloc_reservation( 366 struct xfs_mount *mp) 367 { 368 return 2 * mp->m_sb.sb_sectsize + 369 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + 370 mp->m_sb.sb_inodesize + 371 XFS_ALLOCFREE_LOG_RES(mp, 1) + 372 128 * (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 373 XFS_ALLOCFREE_LOG_COUNT(mp, 1)); 374 } 375 376 /* 377 * Growing the rt section of the filesystem. 378 * In the second set of transactions (ZERO) we zero the new metadata blocks. 379 * one bitmap/summary block: blocksize 380 */ 381 STATIC uint 382 xfs_calc_growrtzero_reservation( 383 struct xfs_mount *mp) 384 { 385 return mp->m_sb.sb_blocksize + 128; 386 } 387 388 /* 389 * Growing the rt section of the filesystem. 390 * In the third set of transactions (FREE) we update metadata without 391 * allocating any new blocks. 392 * superblock: sector size 393 * bitmap inode: inode size 394 * summary inode: inode size 395 * one bitmap block: blocksize 396 * summary blocks: new summary size 397 */ 398 STATIC uint 399 xfs_calc_growrtfree_reservation( 400 struct xfs_mount *mp) 401 { 402 return mp->m_sb.sb_sectsize + 403 2 * mp->m_sb.sb_inodesize + 404 mp->m_sb.sb_blocksize + 405 mp->m_rsumsize + 406 128 * 5; 407 } 408 409 /* 410 * Logging the inode modification timestamp on a synchronous write. 411 * inode 412 */ 413 STATIC uint 414 xfs_calc_swrite_reservation( 415 struct xfs_mount *mp) 416 { 417 return mp->m_sb.sb_inodesize + 128; 418 } 419 420 /* 421 * Logging the inode mode bits when writing a setuid/setgid file 422 * inode 423 */ 424 STATIC uint 425 xfs_calc_writeid_reservation(xfs_mount_t *mp) 426 { 427 return mp->m_sb.sb_inodesize + 128; 428 } 429 430 /* 431 * Converting the inode from non-attributed to attributed. 432 * the inode being converted: inode size 433 * agf block and superblock (for block allocation) 434 * the new block (directory sized) 435 * bmap blocks for the new directory block 436 * allocation btrees 437 */ 438 STATIC uint 439 xfs_calc_addafork_reservation( 440 struct xfs_mount *mp) 441 { 442 return XFS_DQUOT_LOGRES(mp) + 443 mp->m_sb.sb_inodesize + 444 mp->m_sb.sb_sectsize * 2 + 445 mp->m_dirblksize + 446 XFS_FSB_TO_B(mp, XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + 447 XFS_ALLOCFREE_LOG_RES(mp, 1) + 448 128 * (4 + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1 + 449 XFS_ALLOCFREE_LOG_COUNT(mp, 1)); 450 } 451 452 /* 453 * Removing the attribute fork of a file 454 * the inode being truncated: inode size 455 * the inode's bmap btree: max depth * block size 456 * And the bmap_finish transaction can free the blocks and bmap blocks: 457 * the agf for each of the ags: 4 * sector size 458 * the agfl for each of the ags: 4 * sector size 459 * the super block to reflect the freed blocks: sector size 460 * worst case split in allocation btrees per extent assuming 4 extents: 461 * 4 exts * 2 trees * (2 * max depth - 1) * block size 462 */ 463 STATIC uint 464 xfs_calc_attrinval_reservation( 465 struct xfs_mount *mp) 466 { 467 return MAX((mp->m_sb.sb_inodesize + 468 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + 469 128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))), 470 (4 * mp->m_sb.sb_sectsize + 471 4 * mp->m_sb.sb_sectsize + 472 mp->m_sb.sb_sectsize + 473 XFS_ALLOCFREE_LOG_RES(mp, 4) + 474 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)))); 475 } 476 477 /* 478 * Setting an attribute. 479 * the inode getting the attribute 480 * the superblock for allocations 481 * the agfs extents are allocated from 482 * the attribute btree * max depth 483 * the inode allocation btree 484 * Since attribute transaction space is dependent on the size of the attribute, 485 * the calculation is done partially at mount time and partially at runtime. 486 */ 487 STATIC uint 488 xfs_calc_attrset_reservation( 489 struct xfs_mount *mp) 490 { 491 return XFS_DQUOT_LOGRES(mp) + 492 mp->m_sb.sb_inodesize + 493 mp->m_sb.sb_sectsize + 494 XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) + 495 128 * (2 + XFS_DA_NODE_MAXDEPTH); 496 } 497 498 /* 499 * Removing an attribute. 500 * the inode: inode size 501 * the attribute btree could join: max depth * block size 502 * the inode bmap btree could join or split: max depth * block size 503 * And the bmap_finish transaction can free the attr blocks freed giving: 504 * the agf for the ag in which the blocks live: 2 * sector size 505 * the agfl for the ag in which the blocks live: 2 * sector size 506 * the superblock for the free block count: sector size 507 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size 508 */ 509 STATIC uint 510 xfs_calc_attrrm_reservation( 511 struct xfs_mount *mp) 512 { 513 return XFS_DQUOT_LOGRES(mp) + 514 MAX((mp->m_sb.sb_inodesize + 515 XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) + 516 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + 517 128 * (1 + XFS_DA_NODE_MAXDEPTH + 518 XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))), 519 (2 * mp->m_sb.sb_sectsize + 520 2 * mp->m_sb.sb_sectsize + 521 mp->m_sb.sb_sectsize + 522 XFS_ALLOCFREE_LOG_RES(mp, 2) + 523 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))); 524 } 525 526 /* 527 * Clearing a bad agino number in an agi hash bucket. 528 */ 529 STATIC uint 530 xfs_calc_clear_agi_bucket_reservation( 531 struct xfs_mount *mp) 532 { 533 return mp->m_sb.sb_sectsize + 128; 534 } 535 536 /* 537 * Initialize the precomputed transaction reservation values 538 * in the mount structure. 539 */ 540 void 541 xfs_trans_init( 542 struct xfs_mount *mp) 543 { 544 struct xfs_trans_reservations *resp = &mp->m_reservations; 545 546 resp->tr_write = xfs_calc_write_reservation(mp); 547 resp->tr_itruncate = xfs_calc_itruncate_reservation(mp); 548 resp->tr_rename = xfs_calc_rename_reservation(mp); 549 resp->tr_link = xfs_calc_link_reservation(mp); 550 resp->tr_remove = xfs_calc_remove_reservation(mp); 551 resp->tr_symlink = xfs_calc_symlink_reservation(mp); 552 resp->tr_create = xfs_calc_create_reservation(mp); 553 resp->tr_mkdir = xfs_calc_mkdir_reservation(mp); 554 resp->tr_ifree = xfs_calc_ifree_reservation(mp); 555 resp->tr_ichange = xfs_calc_ichange_reservation(mp); 556 resp->tr_growdata = xfs_calc_growdata_reservation(mp); 557 resp->tr_swrite = xfs_calc_swrite_reservation(mp); 558 resp->tr_writeid = xfs_calc_writeid_reservation(mp); 559 resp->tr_addafork = xfs_calc_addafork_reservation(mp); 560 resp->tr_attrinval = xfs_calc_attrinval_reservation(mp); 561 resp->tr_attrset = xfs_calc_attrset_reservation(mp); 562 resp->tr_attrrm = xfs_calc_attrrm_reservation(mp); 563 resp->tr_clearagi = xfs_calc_clear_agi_bucket_reservation(mp); 564 resp->tr_growrtalloc = xfs_calc_growrtalloc_reservation(mp); 565 resp->tr_growrtzero = xfs_calc_growrtzero_reservation(mp); 566 resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp); 567 } 568 569 /* 570 * This routine is called to allocate a transaction structure. 571 * The type parameter indicates the type of the transaction. These 572 * are enumerated in xfs_trans.h. 573 * 574 * Dynamically allocate the transaction structure from the transaction 575 * zone, initialize it, and return it to the caller. 576 */ 577 xfs_trans_t * 578 xfs_trans_alloc( 579 xfs_mount_t *mp, 580 uint type) 581 { 582 xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); 583 return _xfs_trans_alloc(mp, type, KM_SLEEP); 584 } 585 586 xfs_trans_t * 587 _xfs_trans_alloc( 588 xfs_mount_t *mp, 589 uint type, 590 uint memflags) 591 { 592 xfs_trans_t *tp; 593 594 atomic_inc(&mp->m_active_trans); 595 596 tp = kmem_zone_zalloc(xfs_trans_zone, memflags); 597 tp->t_magic = XFS_TRANS_MAGIC; 598 tp->t_type = type; 599 tp->t_mountp = mp; 600 tp->t_items_free = XFS_LIC_NUM_SLOTS; 601 xfs_lic_init(&(tp->t_items)); 602 INIT_LIST_HEAD(&tp->t_busy); 603 return tp; 604 } 605 606 /* 607 * Free the transaction structure. If there is more clean up 608 * to do when the structure is freed, add it here. 609 */ 610 STATIC void 611 xfs_trans_free( 612 struct xfs_trans *tp) 613 { 614 struct xfs_busy_extent *busyp, *n; 615 616 list_for_each_entry_safe(busyp, n, &tp->t_busy, list) 617 xfs_alloc_busy_clear(tp->t_mountp, busyp); 618 619 atomic_dec(&tp->t_mountp->m_active_trans); 620 xfs_trans_free_dqinfo(tp); 621 kmem_zone_free(xfs_trans_zone, tp); 622 } 623 624 /* 625 * This is called to create a new transaction which will share the 626 * permanent log reservation of the given transaction. The remaining 627 * unused block and rt extent reservations are also inherited. This 628 * implies that the original transaction is no longer allowed to allocate 629 * blocks. Locks and log items, however, are no inherited. They must 630 * be added to the new transaction explicitly. 631 */ 632 xfs_trans_t * 633 xfs_trans_dup( 634 xfs_trans_t *tp) 635 { 636 xfs_trans_t *ntp; 637 638 ntp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP); 639 640 /* 641 * Initialize the new transaction structure. 642 */ 643 ntp->t_magic = XFS_TRANS_MAGIC; 644 ntp->t_type = tp->t_type; 645 ntp->t_mountp = tp->t_mountp; 646 ntp->t_items_free = XFS_LIC_NUM_SLOTS; 647 xfs_lic_init(&(ntp->t_items)); 648 INIT_LIST_HEAD(&ntp->t_busy); 649 650 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 651 ASSERT(tp->t_ticket != NULL); 652 653 ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE); 654 ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket); 655 ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used; 656 tp->t_blk_res = tp->t_blk_res_used; 657 ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; 658 tp->t_rtx_res = tp->t_rtx_res_used; 659 ntp->t_pflags = tp->t_pflags; 660 661 xfs_trans_dup_dqinfo(tp, ntp); 662 663 atomic_inc(&tp->t_mountp->m_active_trans); 664 return ntp; 665 } 666 667 /* 668 * This is called to reserve free disk blocks and log space for the 669 * given transaction. This must be done before allocating any resources 670 * within the transaction. 671 * 672 * This will return ENOSPC if there are not enough blocks available. 673 * It will sleep waiting for available log space. 674 * The only valid value for the flags parameter is XFS_RES_LOG_PERM, which 675 * is used by long running transactions. If any one of the reservations 676 * fails then they will all be backed out. 677 * 678 * This does not do quota reservations. That typically is done by the 679 * caller afterwards. 680 */ 681 int 682 xfs_trans_reserve( 683 xfs_trans_t *tp, 684 uint blocks, 685 uint logspace, 686 uint rtextents, 687 uint flags, 688 uint logcount) 689 { 690 int log_flags; 691 int error = 0; 692 int rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; 693 694 /* Mark this thread as being in a transaction */ 695 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); 696 697 /* 698 * Attempt to reserve the needed disk blocks by decrementing 699 * the number needed from the number available. This will 700 * fail if the count would go below zero. 701 */ 702 if (blocks > 0) { 703 error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS, 704 -((int64_t)blocks), rsvd); 705 if (error != 0) { 706 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 707 return (XFS_ERROR(ENOSPC)); 708 } 709 tp->t_blk_res += blocks; 710 } 711 712 /* 713 * Reserve the log space needed for this transaction. 714 */ 715 if (logspace > 0) { 716 ASSERT((tp->t_log_res == 0) || (tp->t_log_res == logspace)); 717 ASSERT((tp->t_log_count == 0) || 718 (tp->t_log_count == logcount)); 719 if (flags & XFS_TRANS_PERM_LOG_RES) { 720 log_flags = XFS_LOG_PERM_RESERV; 721 tp->t_flags |= XFS_TRANS_PERM_LOG_RES; 722 } else { 723 ASSERT(tp->t_ticket == NULL); 724 ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); 725 log_flags = 0; 726 } 727 728 error = xfs_log_reserve(tp->t_mountp, logspace, logcount, 729 &tp->t_ticket, 730 XFS_TRANSACTION, log_flags, tp->t_type); 731 if (error) { 732 goto undo_blocks; 733 } 734 tp->t_log_res = logspace; 735 tp->t_log_count = logcount; 736 } 737 738 /* 739 * Attempt to reserve the needed realtime extents by decrementing 740 * the number needed from the number available. This will 741 * fail if the count would go below zero. 742 */ 743 if (rtextents > 0) { 744 error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS, 745 -((int64_t)rtextents), rsvd); 746 if (error) { 747 error = XFS_ERROR(ENOSPC); 748 goto undo_log; 749 } 750 tp->t_rtx_res += rtextents; 751 } 752 753 return 0; 754 755 /* 756 * Error cases jump to one of these labels to undo any 757 * reservations which have already been performed. 758 */ 759 undo_log: 760 if (logspace > 0) { 761 if (flags & XFS_TRANS_PERM_LOG_RES) { 762 log_flags = XFS_LOG_REL_PERM_RESERV; 763 } else { 764 log_flags = 0; 765 } 766 xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, log_flags); 767 tp->t_ticket = NULL; 768 tp->t_log_res = 0; 769 tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES; 770 } 771 772 undo_blocks: 773 if (blocks > 0) { 774 (void) xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS, 775 (int64_t)blocks, rsvd); 776 tp->t_blk_res = 0; 777 } 778 779 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 780 781 return error; 782 } 783 784 /* 785 * Record the indicated change to the given field for application 786 * to the file system's superblock when the transaction commits. 787 * For now, just store the change in the transaction structure. 788 * 789 * Mark the transaction structure to indicate that the superblock 790 * needs to be updated before committing. 791 * 792 * Because we may not be keeping track of allocated/free inodes and 793 * used filesystem blocks in the superblock, we do not mark the 794 * superblock dirty in this transaction if we modify these fields. 795 * We still need to update the transaction deltas so that they get 796 * applied to the incore superblock, but we don't want them to 797 * cause the superblock to get locked and logged if these are the 798 * only fields in the superblock that the transaction modifies. 799 */ 800 void 801 xfs_trans_mod_sb( 802 xfs_trans_t *tp, 803 uint field, 804 int64_t delta) 805 { 806 uint32_t flags = (XFS_TRANS_DIRTY|XFS_TRANS_SB_DIRTY); 807 xfs_mount_t *mp = tp->t_mountp; 808 809 switch (field) { 810 case XFS_TRANS_SB_ICOUNT: 811 tp->t_icount_delta += delta; 812 if (xfs_sb_version_haslazysbcount(&mp->m_sb)) 813 flags &= ~XFS_TRANS_SB_DIRTY; 814 break; 815 case XFS_TRANS_SB_IFREE: 816 tp->t_ifree_delta += delta; 817 if (xfs_sb_version_haslazysbcount(&mp->m_sb)) 818 flags &= ~XFS_TRANS_SB_DIRTY; 819 break; 820 case XFS_TRANS_SB_FDBLOCKS: 821 /* 822 * Track the number of blocks allocated in the 823 * transaction. Make sure it does not exceed the 824 * number reserved. 825 */ 826 if (delta < 0) { 827 tp->t_blk_res_used += (uint)-delta; 828 ASSERT(tp->t_blk_res_used <= tp->t_blk_res); 829 } 830 tp->t_fdblocks_delta += delta; 831 if (xfs_sb_version_haslazysbcount(&mp->m_sb)) 832 flags &= ~XFS_TRANS_SB_DIRTY; 833 break; 834 case XFS_TRANS_SB_RES_FDBLOCKS: 835 /* 836 * The allocation has already been applied to the 837 * in-core superblock's counter. This should only 838 * be applied to the on-disk superblock. 839 */ 840 ASSERT(delta < 0); 841 tp->t_res_fdblocks_delta += delta; 842 if (xfs_sb_version_haslazysbcount(&mp->m_sb)) 843 flags &= ~XFS_TRANS_SB_DIRTY; 844 break; 845 case XFS_TRANS_SB_FREXTENTS: 846 /* 847 * Track the number of blocks allocated in the 848 * transaction. Make sure it does not exceed the 849 * number reserved. 850 */ 851 if (delta < 0) { 852 tp->t_rtx_res_used += (uint)-delta; 853 ASSERT(tp->t_rtx_res_used <= tp->t_rtx_res); 854 } 855 tp->t_frextents_delta += delta; 856 break; 857 case XFS_TRANS_SB_RES_FREXTENTS: 858 /* 859 * The allocation has already been applied to the 860 * in-core superblock's counter. This should only 861 * be applied to the on-disk superblock. 862 */ 863 ASSERT(delta < 0); 864 tp->t_res_frextents_delta += delta; 865 break; 866 case XFS_TRANS_SB_DBLOCKS: 867 ASSERT(delta > 0); 868 tp->t_dblocks_delta += delta; 869 break; 870 case XFS_TRANS_SB_AGCOUNT: 871 ASSERT(delta > 0); 872 tp->t_agcount_delta += delta; 873 break; 874 case XFS_TRANS_SB_IMAXPCT: 875 tp->t_imaxpct_delta += delta; 876 break; 877 case XFS_TRANS_SB_REXTSIZE: 878 tp->t_rextsize_delta += delta; 879 break; 880 case XFS_TRANS_SB_RBMBLOCKS: 881 tp->t_rbmblocks_delta += delta; 882 break; 883 case XFS_TRANS_SB_RBLOCKS: 884 tp->t_rblocks_delta += delta; 885 break; 886 case XFS_TRANS_SB_REXTENTS: 887 tp->t_rextents_delta += delta; 888 break; 889 case XFS_TRANS_SB_REXTSLOG: 890 tp->t_rextslog_delta += delta; 891 break; 892 default: 893 ASSERT(0); 894 return; 895 } 896 897 tp->t_flags |= flags; 898 } 899 900 /* 901 * xfs_trans_apply_sb_deltas() is called from the commit code 902 * to bring the superblock buffer into the current transaction 903 * and modify it as requested by earlier calls to xfs_trans_mod_sb(). 904 * 905 * For now we just look at each field allowed to change and change 906 * it if necessary. 907 */ 908 STATIC void 909 xfs_trans_apply_sb_deltas( 910 xfs_trans_t *tp) 911 { 912 xfs_dsb_t *sbp; 913 xfs_buf_t *bp; 914 int whole = 0; 915 916 bp = xfs_trans_getsb(tp, tp->t_mountp, 0); 917 sbp = XFS_BUF_TO_SBP(bp); 918 919 /* 920 * Check that superblock mods match the mods made to AGF counters. 921 */ 922 ASSERT((tp->t_fdblocks_delta + tp->t_res_fdblocks_delta) == 923 (tp->t_ag_freeblks_delta + tp->t_ag_flist_delta + 924 tp->t_ag_btree_delta)); 925 926 /* 927 * Only update the superblock counters if we are logging them 928 */ 929 if (!xfs_sb_version_haslazysbcount(&(tp->t_mountp->m_sb))) { 930 if (tp->t_icount_delta) 931 be64_add_cpu(&sbp->sb_icount, tp->t_icount_delta); 932 if (tp->t_ifree_delta) 933 be64_add_cpu(&sbp->sb_ifree, tp->t_ifree_delta); 934 if (tp->t_fdblocks_delta) 935 be64_add_cpu(&sbp->sb_fdblocks, tp->t_fdblocks_delta); 936 if (tp->t_res_fdblocks_delta) 937 be64_add_cpu(&sbp->sb_fdblocks, tp->t_res_fdblocks_delta); 938 } 939 940 if (tp->t_frextents_delta) 941 be64_add_cpu(&sbp->sb_frextents, tp->t_frextents_delta); 942 if (tp->t_res_frextents_delta) 943 be64_add_cpu(&sbp->sb_frextents, tp->t_res_frextents_delta); 944 945 if (tp->t_dblocks_delta) { 946 be64_add_cpu(&sbp->sb_dblocks, tp->t_dblocks_delta); 947 whole = 1; 948 } 949 if (tp->t_agcount_delta) { 950 be32_add_cpu(&sbp->sb_agcount, tp->t_agcount_delta); 951 whole = 1; 952 } 953 if (tp->t_imaxpct_delta) { 954 sbp->sb_imax_pct += tp->t_imaxpct_delta; 955 whole = 1; 956 } 957 if (tp->t_rextsize_delta) { 958 be32_add_cpu(&sbp->sb_rextsize, tp->t_rextsize_delta); 959 whole = 1; 960 } 961 if (tp->t_rbmblocks_delta) { 962 be32_add_cpu(&sbp->sb_rbmblocks, tp->t_rbmblocks_delta); 963 whole = 1; 964 } 965 if (tp->t_rblocks_delta) { 966 be64_add_cpu(&sbp->sb_rblocks, tp->t_rblocks_delta); 967 whole = 1; 968 } 969 if (tp->t_rextents_delta) { 970 be64_add_cpu(&sbp->sb_rextents, tp->t_rextents_delta); 971 whole = 1; 972 } 973 if (tp->t_rextslog_delta) { 974 sbp->sb_rextslog += tp->t_rextslog_delta; 975 whole = 1; 976 } 977 978 if (whole) 979 /* 980 * Log the whole thing, the fields are noncontiguous. 981 */ 982 xfs_trans_log_buf(tp, bp, 0, sizeof(xfs_dsb_t) - 1); 983 else 984 /* 985 * Since all the modifiable fields are contiguous, we 986 * can get away with this. 987 */ 988 xfs_trans_log_buf(tp, bp, offsetof(xfs_dsb_t, sb_icount), 989 offsetof(xfs_dsb_t, sb_frextents) + 990 sizeof(sbp->sb_frextents) - 1); 991 } 992 993 /* 994 * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations 995 * and apply superblock counter changes to the in-core superblock. The 996 * t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT 997 * applied to the in-core superblock. The idea is that that has already been 998 * done. 999 * 1000 * This is done efficiently with a single call to xfs_mod_incore_sb_batch(). 1001 * However, we have to ensure that we only modify each superblock field only 1002 * once because the application of the delta values may not be atomic. That can 1003 * lead to ENOSPC races occurring if we have two separate modifcations of the 1004 * free space counter to put back the entire reservation and then take away 1005 * what we used. 1006 * 1007 * If we are not logging superblock counters, then the inode allocated/free and 1008 * used block counts are not updated in the on disk superblock. In this case, 1009 * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we 1010 * still need to update the incore superblock with the changes. 1011 */ 1012 void 1013 xfs_trans_unreserve_and_mod_sb( 1014 xfs_trans_t *tp) 1015 { 1016 xfs_mod_sb_t msb[14]; /* If you add cases, add entries */ 1017 xfs_mod_sb_t *msbp; 1018 xfs_mount_t *mp = tp->t_mountp; 1019 /* REFERENCED */ 1020 int error; 1021 int rsvd; 1022 int64_t blkdelta = 0; 1023 int64_t rtxdelta = 0; 1024 1025 msbp = msb; 1026 rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; 1027 1028 /* calculate free blocks delta */ 1029 if (tp->t_blk_res > 0) 1030 blkdelta = tp->t_blk_res; 1031 1032 if ((tp->t_fdblocks_delta != 0) && 1033 (xfs_sb_version_haslazysbcount(&mp->m_sb) || 1034 (tp->t_flags & XFS_TRANS_SB_DIRTY))) 1035 blkdelta += tp->t_fdblocks_delta; 1036 1037 if (blkdelta != 0) { 1038 msbp->msb_field = XFS_SBS_FDBLOCKS; 1039 msbp->msb_delta = blkdelta; 1040 msbp++; 1041 } 1042 1043 /* calculate free realtime extents delta */ 1044 if (tp->t_rtx_res > 0) 1045 rtxdelta = tp->t_rtx_res; 1046 1047 if ((tp->t_frextents_delta != 0) && 1048 (tp->t_flags & XFS_TRANS_SB_DIRTY)) 1049 rtxdelta += tp->t_frextents_delta; 1050 1051 if (rtxdelta != 0) { 1052 msbp->msb_field = XFS_SBS_FREXTENTS; 1053 msbp->msb_delta = rtxdelta; 1054 msbp++; 1055 } 1056 1057 /* apply remaining deltas */ 1058 1059 if (xfs_sb_version_haslazysbcount(&mp->m_sb) || 1060 (tp->t_flags & XFS_TRANS_SB_DIRTY)) { 1061 if (tp->t_icount_delta != 0) { 1062 msbp->msb_field = XFS_SBS_ICOUNT; 1063 msbp->msb_delta = tp->t_icount_delta; 1064 msbp++; 1065 } 1066 if (tp->t_ifree_delta != 0) { 1067 msbp->msb_field = XFS_SBS_IFREE; 1068 msbp->msb_delta = tp->t_ifree_delta; 1069 msbp++; 1070 } 1071 } 1072 1073 if (tp->t_flags & XFS_TRANS_SB_DIRTY) { 1074 if (tp->t_dblocks_delta != 0) { 1075 msbp->msb_field = XFS_SBS_DBLOCKS; 1076 msbp->msb_delta = tp->t_dblocks_delta; 1077 msbp++; 1078 } 1079 if (tp->t_agcount_delta != 0) { 1080 msbp->msb_field = XFS_SBS_AGCOUNT; 1081 msbp->msb_delta = tp->t_agcount_delta; 1082 msbp++; 1083 } 1084 if (tp->t_imaxpct_delta != 0) { 1085 msbp->msb_field = XFS_SBS_IMAX_PCT; 1086 msbp->msb_delta = tp->t_imaxpct_delta; 1087 msbp++; 1088 } 1089 if (tp->t_rextsize_delta != 0) { 1090 msbp->msb_field = XFS_SBS_REXTSIZE; 1091 msbp->msb_delta = tp->t_rextsize_delta; 1092 msbp++; 1093 } 1094 if (tp->t_rbmblocks_delta != 0) { 1095 msbp->msb_field = XFS_SBS_RBMBLOCKS; 1096 msbp->msb_delta = tp->t_rbmblocks_delta; 1097 msbp++; 1098 } 1099 if (tp->t_rblocks_delta != 0) { 1100 msbp->msb_field = XFS_SBS_RBLOCKS; 1101 msbp->msb_delta = tp->t_rblocks_delta; 1102 msbp++; 1103 } 1104 if (tp->t_rextents_delta != 0) { 1105 msbp->msb_field = XFS_SBS_REXTENTS; 1106 msbp->msb_delta = tp->t_rextents_delta; 1107 msbp++; 1108 } 1109 if (tp->t_rextslog_delta != 0) { 1110 msbp->msb_field = XFS_SBS_REXTSLOG; 1111 msbp->msb_delta = tp->t_rextslog_delta; 1112 msbp++; 1113 } 1114 } 1115 1116 /* 1117 * If we need to change anything, do it. 1118 */ 1119 if (msbp > msb) { 1120 error = xfs_mod_incore_sb_batch(tp->t_mountp, msb, 1121 (uint)(msbp - msb), rsvd); 1122 ASSERT(error == 0); 1123 } 1124 } 1125 1126 /* 1127 * Total up the number of log iovecs needed to commit this 1128 * transaction. The transaction itself needs one for the 1129 * transaction header. Ask each dirty item in turn how many 1130 * it needs to get the total. 1131 */ 1132 static uint 1133 xfs_trans_count_vecs( 1134 struct xfs_trans *tp) 1135 { 1136 int nvecs; 1137 xfs_log_item_desc_t *lidp; 1138 1139 nvecs = 1; 1140 lidp = xfs_trans_first_item(tp); 1141 ASSERT(lidp != NULL); 1142 1143 /* In the non-debug case we need to start bailing out if we 1144 * didn't find a log_item here, return zero and let trans_commit 1145 * deal with it. 1146 */ 1147 if (lidp == NULL) 1148 return 0; 1149 1150 while (lidp != NULL) { 1151 /* 1152 * Skip items which aren't dirty in this transaction. 1153 */ 1154 if (!(lidp->lid_flags & XFS_LID_DIRTY)) { 1155 lidp = xfs_trans_next_item(tp, lidp); 1156 continue; 1157 } 1158 lidp->lid_size = IOP_SIZE(lidp->lid_item); 1159 nvecs += lidp->lid_size; 1160 lidp = xfs_trans_next_item(tp, lidp); 1161 } 1162 1163 return nvecs; 1164 } 1165 1166 /* 1167 * Fill in the vector with pointers to data to be logged 1168 * by this transaction. The transaction header takes 1169 * the first vector, and then each dirty item takes the 1170 * number of vectors it indicated it needed in xfs_trans_count_vecs(). 1171 * 1172 * As each item fills in the entries it needs, also pin the item 1173 * so that it cannot be flushed out until the log write completes. 1174 */ 1175 static void 1176 xfs_trans_fill_vecs( 1177 struct xfs_trans *tp, 1178 struct xfs_log_iovec *log_vector) 1179 { 1180 xfs_log_item_desc_t *lidp; 1181 struct xfs_log_iovec *vecp; 1182 uint nitems; 1183 1184 /* 1185 * Skip over the entry for the transaction header, we'll 1186 * fill that in at the end. 1187 */ 1188 vecp = log_vector + 1; 1189 1190 nitems = 0; 1191 lidp = xfs_trans_first_item(tp); 1192 ASSERT(lidp); 1193 while (lidp) { 1194 /* Skip items which aren't dirty in this transaction. */ 1195 if (!(lidp->lid_flags & XFS_LID_DIRTY)) { 1196 lidp = xfs_trans_next_item(tp, lidp); 1197 continue; 1198 } 1199 1200 /* 1201 * The item may be marked dirty but not log anything. This can 1202 * be used to get called when a transaction is committed. 1203 */ 1204 if (lidp->lid_size) 1205 nitems++; 1206 IOP_FORMAT(lidp->lid_item, vecp); 1207 vecp += lidp->lid_size; 1208 IOP_PIN(lidp->lid_item); 1209 lidp = xfs_trans_next_item(tp, lidp); 1210 } 1211 1212 /* 1213 * Now that we've counted the number of items in this transaction, fill 1214 * in the transaction header. Note that the transaction header does not 1215 * have a log item. 1216 */ 1217 tp->t_header.th_magic = XFS_TRANS_HEADER_MAGIC; 1218 tp->t_header.th_type = tp->t_type; 1219 tp->t_header.th_num_items = nitems; 1220 log_vector->i_addr = (xfs_caddr_t)&tp->t_header; 1221 log_vector->i_len = sizeof(xfs_trans_header_t); 1222 log_vector->i_type = XLOG_REG_TYPE_TRANSHDR; 1223 } 1224 1225 /* 1226 * The committed item processing consists of calling the committed routine of 1227 * each logged item, updating the item's position in the AIL if necessary, and 1228 * unpinning each item. If the committed routine returns -1, then do nothing 1229 * further with the item because it may have been freed. 1230 * 1231 * Since items are unlocked when they are copied to the incore log, it is 1232 * possible for two transactions to be completing and manipulating the same 1233 * item simultaneously. The AIL lock will protect the lsn field of each item. 1234 * The value of this field can never go backwards. 1235 * 1236 * We unpin the items after repositioning them in the AIL, because otherwise 1237 * they could be immediately flushed and we'd have to race with the flusher 1238 * trying to pull the item from the AIL as we add it. 1239 */ 1240 void 1241 xfs_trans_item_committed( 1242 struct xfs_log_item *lip, 1243 xfs_lsn_t commit_lsn, 1244 int aborted) 1245 { 1246 xfs_lsn_t item_lsn; 1247 struct xfs_ail *ailp; 1248 1249 if (aborted) 1250 lip->li_flags |= XFS_LI_ABORTED; 1251 item_lsn = IOP_COMMITTED(lip, commit_lsn); 1252 1253 /* If the committed routine returns -1, item has been freed. */ 1254 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) 1255 return; 1256 1257 /* 1258 * If the returned lsn is greater than what it contained before, update 1259 * the location of the item in the AIL. If it is not, then do nothing. 1260 * Items can never move backwards in the AIL. 1261 * 1262 * While the new lsn should usually be greater, it is possible that a 1263 * later transaction completing simultaneously with an earlier one 1264 * using the same item could complete first with a higher lsn. This 1265 * would cause the earlier transaction to fail the test below. 1266 */ 1267 ailp = lip->li_ailp; 1268 spin_lock(&ailp->xa_lock); 1269 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) { 1270 /* 1271 * This will set the item's lsn to item_lsn and update the 1272 * position of the item in the AIL. 1273 * 1274 * xfs_trans_ail_update() drops the AIL lock. 1275 */ 1276 xfs_trans_ail_update(ailp, lip, item_lsn); 1277 } else { 1278 spin_unlock(&ailp->xa_lock); 1279 } 1280 1281 /* 1282 * Now that we've repositioned the item in the AIL, unpin it so it can 1283 * be flushed. Pass information about buffer stale state down from the 1284 * log item flags, if anyone else stales the buffer we do not want to 1285 * pay any attention to it. 1286 */ 1287 IOP_UNPIN(lip); 1288 } 1289 1290 /* 1291 * This is typically called by the LM when a transaction has been fully 1292 * committed to disk. It needs to unpin the items which have 1293 * been logged by the transaction and update their positions 1294 * in the AIL if necessary. 1295 * 1296 * This also gets called when the transactions didn't get written out 1297 * because of an I/O error. Abortflag & XFS_LI_ABORTED is set then. 1298 */ 1299 STATIC void 1300 xfs_trans_committed( 1301 struct xfs_trans *tp, 1302 int abortflag) 1303 { 1304 xfs_log_item_desc_t *lidp; 1305 xfs_log_item_chunk_t *licp; 1306 xfs_log_item_chunk_t *next_licp; 1307 1308 /* Call the transaction's completion callback if there is one. */ 1309 if (tp->t_callback != NULL) 1310 tp->t_callback(tp, tp->t_callarg); 1311 1312 for (lidp = xfs_trans_first_item(tp); 1313 lidp != NULL; 1314 lidp = xfs_trans_next_item(tp, lidp)) { 1315 xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag); 1316 } 1317 1318 /* free the item chunks, ignoring the embedded chunk */ 1319 for (licp = tp->t_items.lic_next; licp != NULL; licp = next_licp) { 1320 next_licp = licp->lic_next; 1321 kmem_free(licp); 1322 } 1323 1324 xfs_trans_free(tp); 1325 } 1326 1327 /* 1328 * Called from the trans_commit code when we notice that 1329 * the filesystem is in the middle of a forced shutdown. 1330 */ 1331 STATIC void 1332 xfs_trans_uncommit( 1333 struct xfs_trans *tp, 1334 uint flags) 1335 { 1336 xfs_log_item_desc_t *lidp; 1337 1338 for (lidp = xfs_trans_first_item(tp); 1339 lidp != NULL; 1340 lidp = xfs_trans_next_item(tp, lidp)) { 1341 /* 1342 * Unpin all but those that aren't dirty. 1343 */ 1344 if (lidp->lid_flags & XFS_LID_DIRTY) 1345 IOP_UNPIN_REMOVE(lidp->lid_item, tp); 1346 } 1347 1348 xfs_trans_unreserve_and_mod_sb(tp); 1349 xfs_trans_unreserve_and_mod_dquots(tp); 1350 1351 xfs_trans_free_items(tp, NULLCOMMITLSN, flags); 1352 xfs_trans_free(tp); 1353 } 1354 1355 /* 1356 * Format the transaction direct to the iclog. This isolates the physical 1357 * transaction commit operation from the logical operation and hence allows 1358 * other methods to be introduced without affecting the existing commit path. 1359 */ 1360 static int 1361 xfs_trans_commit_iclog( 1362 struct xfs_mount *mp, 1363 struct xfs_trans *tp, 1364 xfs_lsn_t *commit_lsn, 1365 int flags) 1366 { 1367 int shutdown; 1368 int error; 1369 int log_flags = 0; 1370 struct xlog_in_core *commit_iclog; 1371 #define XFS_TRANS_LOGVEC_COUNT 16 1372 struct xfs_log_iovec log_vector_fast[XFS_TRANS_LOGVEC_COUNT]; 1373 struct xfs_log_iovec *log_vector; 1374 uint nvec; 1375 1376 1377 /* 1378 * Ask each log item how many log_vector entries it will 1379 * need so we can figure out how many to allocate. 1380 * Try to avoid the kmem_alloc() call in the common case 1381 * by using a vector from the stack when it fits. 1382 */ 1383 nvec = xfs_trans_count_vecs(tp); 1384 if (nvec == 0) { 1385 return ENOMEM; /* triggers a shutdown! */ 1386 } else if (nvec <= XFS_TRANS_LOGVEC_COUNT) { 1387 log_vector = log_vector_fast; 1388 } else { 1389 log_vector = (xfs_log_iovec_t *)kmem_alloc(nvec * 1390 sizeof(xfs_log_iovec_t), 1391 KM_SLEEP); 1392 } 1393 1394 /* 1395 * Fill in the log_vector and pin the logged items, and 1396 * then write the transaction to the log. 1397 */ 1398 xfs_trans_fill_vecs(tp, log_vector); 1399 1400 if (flags & XFS_TRANS_RELEASE_LOG_RES) 1401 log_flags = XFS_LOG_REL_PERM_RESERV; 1402 1403 error = xfs_log_write(mp, log_vector, nvec, tp->t_ticket, &(tp->t_lsn)); 1404 1405 /* 1406 * The transaction is committed incore here, and can go out to disk 1407 * at any time after this call. However, all the items associated 1408 * with the transaction are still locked and pinned in memory. 1409 */ 1410 *commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags); 1411 1412 tp->t_commit_lsn = *commit_lsn; 1413 trace_xfs_trans_commit_lsn(tp); 1414 1415 if (nvec > XFS_TRANS_LOGVEC_COUNT) 1416 kmem_free(log_vector); 1417 1418 /* 1419 * If we got a log write error. Unpin the logitems that we 1420 * had pinned, clean up, free trans structure, and return error. 1421 */ 1422 if (error || *commit_lsn == -1) { 1423 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 1424 xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT); 1425 return XFS_ERROR(EIO); 1426 } 1427 1428 /* 1429 * Once the transaction has committed, unused 1430 * reservations need to be released and changes to 1431 * the superblock need to be reflected in the in-core 1432 * version. Do that now. 1433 */ 1434 xfs_trans_unreserve_and_mod_sb(tp); 1435 1436 /* 1437 * Tell the LM to call the transaction completion routine 1438 * when the log write with LSN commit_lsn completes (e.g. 1439 * when the transaction commit really hits the on-disk log). 1440 * After this call we cannot reference tp, because the call 1441 * can happen at any time and the call will free the transaction 1442 * structure pointed to by tp. The only case where we call 1443 * the completion routine (xfs_trans_committed) directly is 1444 * if the log is turned off on a debug kernel or we're 1445 * running in simulation mode (the log is explicitly turned 1446 * off). 1447 */ 1448 tp->t_logcb.cb_func = (void(*)(void*, int))xfs_trans_committed; 1449 tp->t_logcb.cb_arg = tp; 1450 1451 /* 1452 * We need to pass the iclog buffer which was used for the 1453 * transaction commit record into this function, and attach 1454 * the callback to it. The callback must be attached before 1455 * the items are unlocked to avoid racing with other threads 1456 * waiting for an item to unlock. 1457 */ 1458 shutdown = xfs_log_notify(mp, commit_iclog, &(tp->t_logcb)); 1459 1460 /* 1461 * Mark this thread as no longer being in a transaction 1462 */ 1463 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 1464 1465 /* 1466 * Once all the items of the transaction have been copied 1467 * to the in core log and the callback is attached, the 1468 * items can be unlocked. 1469 * 1470 * This will free descriptors pointing to items which were 1471 * not logged since there is nothing more to do with them. 1472 * For items which were logged, we will keep pointers to them 1473 * so they can be unpinned after the transaction commits to disk. 1474 * This will also stamp each modified meta-data item with 1475 * the commit lsn of this transaction for dependency tracking 1476 * purposes. 1477 */ 1478 xfs_trans_unlock_items(tp, *commit_lsn); 1479 1480 /* 1481 * If we detected a log error earlier, finish committing 1482 * the transaction now (unpin log items, etc). 1483 * 1484 * Order is critical here, to avoid using the transaction 1485 * pointer after its been freed (by xfs_trans_committed 1486 * either here now, or as a callback). We cannot do this 1487 * step inside xfs_log_notify as was done earlier because 1488 * of this issue. 1489 */ 1490 if (shutdown) 1491 xfs_trans_committed(tp, XFS_LI_ABORTED); 1492 1493 /* 1494 * Now that the xfs_trans_committed callback has been attached, 1495 * and the items are released we can finally allow the iclog to 1496 * go to disk. 1497 */ 1498 return xfs_log_release_iclog(mp, commit_iclog); 1499 } 1500 1501 /* 1502 * Walk the log items and allocate log vector structures for 1503 * each item large enough to fit all the vectors they require. 1504 * Note that this format differs from the old log vector format in 1505 * that there is no transaction header in these log vectors. 1506 */ 1507 STATIC struct xfs_log_vec * 1508 xfs_trans_alloc_log_vecs( 1509 xfs_trans_t *tp) 1510 { 1511 xfs_log_item_desc_t *lidp; 1512 struct xfs_log_vec *lv = NULL; 1513 struct xfs_log_vec *ret_lv = NULL; 1514 1515 lidp = xfs_trans_first_item(tp); 1516 1517 /* Bail out if we didn't find a log item. */ 1518 if (!lidp) { 1519 ASSERT(0); 1520 return NULL; 1521 } 1522 1523 while (lidp != NULL) { 1524 struct xfs_log_vec *new_lv; 1525 1526 /* Skip items which aren't dirty in this transaction. */ 1527 if (!(lidp->lid_flags & XFS_LID_DIRTY)) { 1528 lidp = xfs_trans_next_item(tp, lidp); 1529 continue; 1530 } 1531 1532 /* Skip items that do not have any vectors for writing */ 1533 lidp->lid_size = IOP_SIZE(lidp->lid_item); 1534 if (!lidp->lid_size) { 1535 lidp = xfs_trans_next_item(tp, lidp); 1536 continue; 1537 } 1538 1539 new_lv = kmem_zalloc(sizeof(*new_lv) + 1540 lidp->lid_size * sizeof(struct xfs_log_iovec), 1541 KM_SLEEP); 1542 1543 /* The allocated iovec region lies beyond the log vector. */ 1544 new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1]; 1545 new_lv->lv_niovecs = lidp->lid_size; 1546 new_lv->lv_item = lidp->lid_item; 1547 if (!ret_lv) 1548 ret_lv = new_lv; 1549 else 1550 lv->lv_next = new_lv; 1551 lv = new_lv; 1552 lidp = xfs_trans_next_item(tp, lidp); 1553 } 1554 1555 return ret_lv; 1556 } 1557 1558 static int 1559 xfs_trans_commit_cil( 1560 struct xfs_mount *mp, 1561 struct xfs_trans *tp, 1562 xfs_lsn_t *commit_lsn, 1563 int flags) 1564 { 1565 struct xfs_log_vec *log_vector; 1566 int error; 1567 1568 /* 1569 * Get each log item to allocate a vector structure for 1570 * the log item to to pass to the log write code. The 1571 * CIL commit code will format the vector and save it away. 1572 */ 1573 log_vector = xfs_trans_alloc_log_vecs(tp); 1574 if (!log_vector) 1575 return ENOMEM; 1576 1577 error = xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags); 1578 if (error) 1579 return error; 1580 1581 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 1582 1583 /* xfs_trans_free_items() unlocks them first */ 1584 xfs_trans_free_items(tp, *commit_lsn, 0); 1585 xfs_trans_free(tp); 1586 return 0; 1587 } 1588 1589 /* 1590 * xfs_trans_commit 1591 * 1592 * Commit the given transaction to the log a/synchronously. 1593 * 1594 * XFS disk error handling mechanism is not based on a typical 1595 * transaction abort mechanism. Logically after the filesystem 1596 * gets marked 'SHUTDOWN', we can't let any new transactions 1597 * be durable - ie. committed to disk - because some metadata might 1598 * be inconsistent. In such cases, this returns an error, and the 1599 * caller may assume that all locked objects joined to the transaction 1600 * have already been unlocked as if the commit had succeeded. 1601 * Do not reference the transaction structure after this call. 1602 */ 1603 int 1604 _xfs_trans_commit( 1605 struct xfs_trans *tp, 1606 uint flags, 1607 int *log_flushed) 1608 { 1609 struct xfs_mount *mp = tp->t_mountp; 1610 xfs_lsn_t commit_lsn = -1; 1611 int error = 0; 1612 int log_flags = 0; 1613 int sync = tp->t_flags & XFS_TRANS_SYNC; 1614 1615 /* 1616 * Determine whether this commit is releasing a permanent 1617 * log reservation or not. 1618 */ 1619 if (flags & XFS_TRANS_RELEASE_LOG_RES) { 1620 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 1621 log_flags = XFS_LOG_REL_PERM_RESERV; 1622 } 1623 1624 /* 1625 * If there is nothing to be logged by the transaction, 1626 * then unlock all of the items associated with the 1627 * transaction and free the transaction structure. 1628 * Also make sure to return any reserved blocks to 1629 * the free pool. 1630 */ 1631 if (!(tp->t_flags & XFS_TRANS_DIRTY)) 1632 goto out_unreserve; 1633 1634 if (XFS_FORCED_SHUTDOWN(mp)) { 1635 error = XFS_ERROR(EIO); 1636 goto out_unreserve; 1637 } 1638 1639 ASSERT(tp->t_ticket != NULL); 1640 1641 /* 1642 * If we need to update the superblock, then do it now. 1643 */ 1644 if (tp->t_flags & XFS_TRANS_SB_DIRTY) 1645 xfs_trans_apply_sb_deltas(tp); 1646 xfs_trans_apply_dquot_deltas(tp); 1647 1648 if (mp->m_flags & XFS_MOUNT_DELAYLOG) 1649 error = xfs_trans_commit_cil(mp, tp, &commit_lsn, flags); 1650 else 1651 error = xfs_trans_commit_iclog(mp, tp, &commit_lsn, flags); 1652 1653 if (error == ENOMEM) { 1654 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 1655 error = XFS_ERROR(EIO); 1656 goto out_unreserve; 1657 } 1658 1659 /* 1660 * If the transaction needs to be synchronous, then force the 1661 * log out now and wait for it. 1662 */ 1663 if (sync) { 1664 if (!error) { 1665 error = _xfs_log_force_lsn(mp, commit_lsn, 1666 XFS_LOG_SYNC, log_flushed); 1667 } 1668 XFS_STATS_INC(xs_trans_sync); 1669 } else { 1670 XFS_STATS_INC(xs_trans_async); 1671 } 1672 1673 return error; 1674 1675 out_unreserve: 1676 xfs_trans_unreserve_and_mod_sb(tp); 1677 1678 /* 1679 * It is indeed possible for the transaction to be not dirty but 1680 * the dqinfo portion to be. All that means is that we have some 1681 * (non-persistent) quota reservations that need to be unreserved. 1682 */ 1683 xfs_trans_unreserve_and_mod_dquots(tp); 1684 if (tp->t_ticket) { 1685 commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags); 1686 if (commit_lsn == -1 && !error) 1687 error = XFS_ERROR(EIO); 1688 } 1689 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 1690 xfs_trans_free_items(tp, NULLCOMMITLSN, error ? XFS_TRANS_ABORT : 0); 1691 xfs_trans_free(tp); 1692 1693 XFS_STATS_INC(xs_trans_empty); 1694 return error; 1695 } 1696 1697 /* 1698 * Unlock all of the transaction's items and free the transaction. 1699 * The transaction must not have modified any of its items, because 1700 * there is no way to restore them to their previous state. 1701 * 1702 * If the transaction has made a log reservation, make sure to release 1703 * it as well. 1704 */ 1705 void 1706 xfs_trans_cancel( 1707 xfs_trans_t *tp, 1708 int flags) 1709 { 1710 int log_flags; 1711 #ifdef DEBUG 1712 xfs_log_item_chunk_t *licp; 1713 xfs_log_item_desc_t *lidp; 1714 xfs_log_item_t *lip; 1715 int i; 1716 #endif 1717 xfs_mount_t *mp = tp->t_mountp; 1718 1719 /* 1720 * See if the caller is being too lazy to figure out if 1721 * the transaction really needs an abort. 1722 */ 1723 if ((flags & XFS_TRANS_ABORT) && !(tp->t_flags & XFS_TRANS_DIRTY)) 1724 flags &= ~XFS_TRANS_ABORT; 1725 /* 1726 * See if the caller is relying on us to shut down the 1727 * filesystem. This happens in paths where we detect 1728 * corruption and decide to give up. 1729 */ 1730 if ((tp->t_flags & XFS_TRANS_DIRTY) && !XFS_FORCED_SHUTDOWN(mp)) { 1731 XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp); 1732 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1733 } 1734 #ifdef DEBUG 1735 if (!(flags & XFS_TRANS_ABORT)) { 1736 licp = &(tp->t_items); 1737 while (licp != NULL) { 1738 lidp = licp->lic_descs; 1739 for (i = 0; i < licp->lic_unused; i++, lidp++) { 1740 if (xfs_lic_isfree(licp, i)) { 1741 continue; 1742 } 1743 1744 lip = lidp->lid_item; 1745 if (!XFS_FORCED_SHUTDOWN(mp)) 1746 ASSERT(!(lip->li_type == XFS_LI_EFD)); 1747 } 1748 licp = licp->lic_next; 1749 } 1750 } 1751 #endif 1752 xfs_trans_unreserve_and_mod_sb(tp); 1753 xfs_trans_unreserve_and_mod_dquots(tp); 1754 1755 if (tp->t_ticket) { 1756 if (flags & XFS_TRANS_RELEASE_LOG_RES) { 1757 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 1758 log_flags = XFS_LOG_REL_PERM_RESERV; 1759 } else { 1760 log_flags = 0; 1761 } 1762 xfs_log_done(mp, tp->t_ticket, NULL, log_flags); 1763 } 1764 1765 /* mark this thread as no longer being in a transaction */ 1766 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 1767 1768 xfs_trans_free_items(tp, NULLCOMMITLSN, flags); 1769 xfs_trans_free(tp); 1770 } 1771 1772 /* 1773 * Roll from one trans in the sequence of PERMANENT transactions to 1774 * the next: permanent transactions are only flushed out when 1775 * committed with XFS_TRANS_RELEASE_LOG_RES, but we still want as soon 1776 * as possible to let chunks of it go to the log. So we commit the 1777 * chunk we've been working on and get a new transaction to continue. 1778 */ 1779 int 1780 xfs_trans_roll( 1781 struct xfs_trans **tpp, 1782 struct xfs_inode *dp) 1783 { 1784 struct xfs_trans *trans; 1785 unsigned int logres, count; 1786 int error; 1787 1788 /* 1789 * Ensure that the inode is always logged. 1790 */ 1791 trans = *tpp; 1792 xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE); 1793 1794 /* 1795 * Copy the critical parameters from one trans to the next. 1796 */ 1797 logres = trans->t_log_res; 1798 count = trans->t_log_count; 1799 *tpp = xfs_trans_dup(trans); 1800 1801 /* 1802 * Commit the current transaction. 1803 * If this commit failed, then it'd just unlock those items that 1804 * are not marked ihold. That also means that a filesystem shutdown 1805 * is in progress. The caller takes the responsibility to cancel 1806 * the duplicate transaction that gets returned. 1807 */ 1808 error = xfs_trans_commit(trans, 0); 1809 if (error) 1810 return (error); 1811 1812 trans = *tpp; 1813 1814 /* 1815 * transaction commit worked ok so we can drop the extra ticket 1816 * reference that we gained in xfs_trans_dup() 1817 */ 1818 xfs_log_ticket_put(trans->t_ticket); 1819 1820 1821 /* 1822 * Reserve space in the log for th next transaction. 1823 * This also pushes items in the "AIL", the list of logged items, 1824 * out to disk if they are taking up space at the tail of the log 1825 * that we want to use. This requires that either nothing be locked 1826 * across this call, or that anything that is locked be logged in 1827 * the prior and the next transactions. 1828 */ 1829 error = xfs_trans_reserve(trans, 0, logres, 0, 1830 XFS_TRANS_PERM_LOG_RES, count); 1831 /* 1832 * Ensure that the inode is in the new transaction and locked. 1833 */ 1834 if (error) 1835 return error; 1836 1837 xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL); 1838 xfs_trans_ihold(trans, dp); 1839 return 0; 1840 } 1841