1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) International Business Machines Corp., 2000-2005 4 * Portions Copyright (C) Christoph Hellwig, 2001-2002 5 */ 6 7 /* 8 * jfs_txnmgr.c: transaction manager 9 * 10 * notes: 11 * transaction starts with txBegin() and ends with txCommit() 12 * or txAbort(). 13 * 14 * tlock is acquired at the time of update; 15 * (obviate scan at commit time for xtree and dtree) 16 * tlock and mp points to each other; 17 * (no hashlist for mp -> tlock). 18 * 19 * special cases: 20 * tlock on in-memory inode: 21 * in-place tlock in the in-memory inode itself; 22 * converted to page lock by iWrite() at commit time. 23 * 24 * tlock during write()/mmap() under anonymous transaction (tid = 0): 25 * transferred (?) to transaction at commit time. 26 * 27 * use the page itself to update allocation maps 28 * (obviate intermediate replication of allocation/deallocation data) 29 * hold on to mp+lock thru update of maps 30 */ 31 32 #include <linux/fs.h> 33 #include <linux/vmalloc.h> 34 #include <linux/completion.h> 35 #include <linux/freezer.h> 36 #include <linux/module.h> 37 #include <linux/moduleparam.h> 38 #include <linux/kthread.h> 39 #include <linux/seq_file.h> 40 #include "jfs_incore.h" 41 #include "jfs_inode.h" 42 #include "jfs_filsys.h" 43 #include "jfs_metapage.h" 44 #include "jfs_dinode.h" 45 #include "jfs_imap.h" 46 #include "jfs_dmap.h" 47 #include "jfs_superblock.h" 48 #include "jfs_debug.h" 49 50 /* 51 * transaction management structures 52 */ 53 static struct { 54 int freetid; /* index of a free tid structure */ 55 int freelock; /* index first free lock word */ 56 wait_queue_head_t freewait; /* eventlist of free tblock */ 57 wait_queue_head_t freelockwait; /* eventlist of free tlock */ 58 wait_queue_head_t lowlockwait; /* eventlist of ample tlocks */ 59 int tlocksInUse; /* Number of tlocks in use */ 60 spinlock_t LazyLock; /* synchronize sync_queue & unlock_queue */ 61 /* struct tblock *sync_queue; * Transactions waiting for data sync */ 62 struct list_head unlock_queue; /* Txns waiting to be released */ 63 struct list_head anon_list; /* inodes having anonymous txns */ 64 struct list_head anon_list2; /* inodes having anonymous txns 65 that couldn't be sync'ed */ 66 } TxAnchor; 67 68 int jfs_tlocks_low; /* Indicates low number of available tlocks */ 69 70 #ifdef CONFIG_JFS_STATISTICS 71 static struct { 72 uint txBegin; 73 uint txBegin_barrier; 74 uint txBegin_lockslow; 75 uint txBegin_freetid; 76 uint txBeginAnon; 77 uint txBeginAnon_barrier; 78 uint txBeginAnon_lockslow; 79 uint txLockAlloc; 80 uint txLockAlloc_freelock; 81 } TxStat; 82 #endif 83 84 static int nTxBlock = -1; /* number of transaction blocks */ 85 module_param(nTxBlock, int, 0); 86 MODULE_PARM_DESC(nTxBlock, 87 "Number of transaction blocks (max:65536)"); 88 89 static int nTxLock = -1; /* number of transaction locks */ 90 module_param(nTxLock, int, 0); 91 MODULE_PARM_DESC(nTxLock, 92 "Number of transaction locks (max:65536)"); 93 94 struct tblock *TxBlock; /* transaction block table */ 95 static int TxLockLWM; /* Low water mark for number of txLocks used */ 96 static int TxLockHWM; /* High water mark for number of txLocks used */ 97 static int TxLockVHWM; /* Very High water mark */ 98 struct tlock *TxLock; /* transaction lock table */ 99 100 /* 101 * transaction management lock 102 */ 103 static DEFINE_SPINLOCK(jfsTxnLock); 104 105 #define TXN_LOCK() spin_lock(&jfsTxnLock) 106 #define TXN_UNLOCK() spin_unlock(&jfsTxnLock) 107 108 #define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock) 109 #define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) 110 #define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags) 111 112 static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait); 113 static int jfs_commit_thread_waking; 114 115 /* 116 * Retry logic exist outside these macros to protect from spurrious wakeups. 117 */ 118 static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) 119 { 120 DECLARE_WAITQUEUE(wait, current); 121 122 add_wait_queue(event, &wait); 123 set_current_state(TASK_UNINTERRUPTIBLE); 124 TXN_UNLOCK(); 125 io_schedule(); 126 remove_wait_queue(event, &wait); 127 } 128 129 #define TXN_SLEEP(event)\ 130 {\ 131 TXN_SLEEP_DROP_LOCK(event);\ 132 TXN_LOCK();\ 133 } 134 135 #define TXN_WAKEUP(event) wake_up_all(event) 136 137 /* 138 * statistics 139 */ 140 static struct { 141 tid_t maxtid; /* 4: biggest tid ever used */ 142 lid_t maxlid; /* 4: biggest lid ever used */ 143 int ntid; /* 4: # of transactions performed */ 144 int nlid; /* 4: # of tlocks acquired */ 145 int waitlock; /* 4: # of tlock wait */ 146 } stattx; 147 148 /* 149 * forward references 150 */ 151 static void diLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd, 152 struct tlock *tlck, struct commit *cd); 153 static void dataLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd, 154 struct tlock *tlck); 155 static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 156 struct tlock * tlck); 157 static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 158 struct tlock * tlck); 159 static void txAllocPMap(struct inode *ip, struct maplock * maplock, 160 struct tblock * tblk); 161 static void txForce(struct tblock * tblk); 162 static void txLog(struct jfs_log *log, struct tblock *tblk, 163 struct commit *cd); 164 static void txUpdateMap(struct tblock * tblk); 165 static void txRelease(struct tblock * tblk); 166 static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 167 struct tlock * tlck); 168 static void LogSyncRelease(struct metapage * mp); 169 170 /* 171 * transaction block/lock management 172 * --------------------------------- 173 */ 174 175 /* 176 * Get a transaction lock from the free list. If the number in use is 177 * greater than the high water mark, wake up the sync daemon. This should 178 * free some anonymous transaction locks. (TXN_LOCK must be held.) 179 */ 180 static lid_t txLockAlloc(void) 181 { 182 lid_t lid; 183 184 INCREMENT(TxStat.txLockAlloc); 185 if (!TxAnchor.freelock) { 186 INCREMENT(TxStat.txLockAlloc_freelock); 187 } 188 189 while (!(lid = TxAnchor.freelock)) 190 TXN_SLEEP(&TxAnchor.freelockwait); 191 TxAnchor.freelock = TxLock[lid].next; 192 HIGHWATERMARK(stattx.maxlid, lid); 193 if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) { 194 jfs_info("txLockAlloc tlocks low"); 195 jfs_tlocks_low = 1; 196 wake_up_process(jfsSyncThread); 197 } 198 199 return lid; 200 } 201 202 static void txLockFree(lid_t lid) 203 { 204 TxLock[lid].tid = 0; 205 TxLock[lid].next = TxAnchor.freelock; 206 TxAnchor.freelock = lid; 207 TxAnchor.tlocksInUse--; 208 if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) { 209 jfs_info("txLockFree jfs_tlocks_low no more"); 210 jfs_tlocks_low = 0; 211 TXN_WAKEUP(&TxAnchor.lowlockwait); 212 } 213 TXN_WAKEUP(&TxAnchor.freelockwait); 214 } 215 216 /* 217 * NAME: txInit() 218 * 219 * FUNCTION: initialize transaction management structures 220 * 221 * RETURN: 222 * 223 * serialization: single thread at jfs_init() 224 */ 225 int txInit(void) 226 { 227 int k, size; 228 struct sysinfo si; 229 230 /* Set defaults for nTxLock and nTxBlock if unset */ 231 232 if (nTxLock == -1) { 233 if (nTxBlock == -1) { 234 /* Base default on memory size */ 235 si_meminfo(&si); 236 if (si.totalram > (256 * 1024)) /* 1 GB */ 237 nTxLock = 64 * 1024; 238 else 239 nTxLock = si.totalram >> 2; 240 } else if (nTxBlock > (8 * 1024)) 241 nTxLock = 64 * 1024; 242 else 243 nTxLock = nTxBlock << 3; 244 } 245 if (nTxBlock == -1) 246 nTxBlock = nTxLock >> 3; 247 248 /* Verify tunable parameters */ 249 if (nTxBlock < 16) 250 nTxBlock = 16; /* No one should set it this low */ 251 if (nTxBlock > 65536) 252 nTxBlock = 65536; 253 if (nTxLock < 256) 254 nTxLock = 256; /* No one should set it this low */ 255 if (nTxLock > 65536) 256 nTxLock = 65536; 257 258 printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n", 259 nTxBlock, nTxLock); 260 /* 261 * initialize transaction block (tblock) table 262 * 263 * transaction id (tid) = tblock index 264 * tid = 0 is reserved. 265 */ 266 TxLockLWM = (nTxLock * 4) / 10; 267 TxLockHWM = (nTxLock * 7) / 10; 268 TxLockVHWM = (nTxLock * 8) / 10; 269 270 size = sizeof(struct tblock) * nTxBlock; 271 TxBlock = vmalloc(size); 272 if (TxBlock == NULL) 273 return -ENOMEM; 274 275 for (k = 0; k < nTxBlock; k++) { 276 init_waitqueue_head(&TxBlock[k].gcwait); 277 init_waitqueue_head(&TxBlock[k].waitor); 278 INIT_LIST_HEAD(&TxBlock[k].synclist); 279 } 280 281 for (k = 1; k < nTxBlock - 1; k++) { 282 TxBlock[k].next = k + 1; 283 } 284 TxBlock[k].next = 0; 285 286 TxAnchor.freetid = 1; 287 init_waitqueue_head(&TxAnchor.freewait); 288 289 stattx.maxtid = 1; /* statistics */ 290 291 /* 292 * initialize transaction lock (tlock) table 293 * 294 * transaction lock id = tlock index 295 * tlock id = 0 is reserved. 296 */ 297 size = sizeof(struct tlock) * nTxLock; 298 TxLock = vmalloc(size); 299 if (TxLock == NULL) { 300 vfree(TxBlock); 301 return -ENOMEM; 302 } 303 304 /* initialize tlock table */ 305 for (k = 1; k < nTxLock - 1; k++) 306 TxLock[k].next = k + 1; 307 TxLock[k].next = 0; 308 init_waitqueue_head(&TxAnchor.freelockwait); 309 init_waitqueue_head(&TxAnchor.lowlockwait); 310 311 TxAnchor.freelock = 1; 312 TxAnchor.tlocksInUse = 0; 313 INIT_LIST_HEAD(&TxAnchor.anon_list); 314 INIT_LIST_HEAD(&TxAnchor.anon_list2); 315 316 LAZY_LOCK_INIT(); 317 INIT_LIST_HEAD(&TxAnchor.unlock_queue); 318 319 stattx.maxlid = 1; /* statistics */ 320 321 return 0; 322 } 323 324 /* 325 * NAME: txExit() 326 * 327 * FUNCTION: clean up when module is unloaded 328 */ 329 void txExit(void) 330 { 331 vfree(TxLock); 332 TxLock = NULL; 333 vfree(TxBlock); 334 TxBlock = NULL; 335 } 336 337 /* 338 * NAME: txBegin() 339 * 340 * FUNCTION: start a transaction. 341 * 342 * PARAMETER: sb - superblock 343 * flag - force for nested tx; 344 * 345 * RETURN: tid - transaction id 346 * 347 * note: flag force allows to start tx for nested tx 348 * to prevent deadlock on logsync barrier; 349 */ 350 tid_t txBegin(struct super_block *sb, int flag) 351 { 352 tid_t t; 353 struct tblock *tblk; 354 struct jfs_log *log; 355 356 jfs_info("txBegin: flag = 0x%x", flag); 357 log = JFS_SBI(sb)->log; 358 359 if (!log) { 360 jfs_error(sb, "read-only filesystem\n"); 361 return 0; 362 } 363 364 TXN_LOCK(); 365 366 INCREMENT(TxStat.txBegin); 367 368 retry: 369 if (!(flag & COMMIT_FORCE)) { 370 /* 371 * synchronize with logsync barrier 372 */ 373 if (test_bit(log_SYNCBARRIER, &log->flag) || 374 test_bit(log_QUIESCE, &log->flag)) { 375 INCREMENT(TxStat.txBegin_barrier); 376 TXN_SLEEP(&log->syncwait); 377 goto retry; 378 } 379 } 380 if (flag == 0) { 381 /* 382 * Don't begin transaction if we're getting starved for tlocks 383 * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately 384 * free tlocks) 385 */ 386 if (TxAnchor.tlocksInUse > TxLockVHWM) { 387 INCREMENT(TxStat.txBegin_lockslow); 388 TXN_SLEEP(&TxAnchor.lowlockwait); 389 goto retry; 390 } 391 } 392 393 /* 394 * allocate transaction id/block 395 */ 396 if ((t = TxAnchor.freetid) == 0) { 397 jfs_info("txBegin: waiting for free tid"); 398 INCREMENT(TxStat.txBegin_freetid); 399 TXN_SLEEP(&TxAnchor.freewait); 400 goto retry; 401 } 402 403 tblk = tid_to_tblock(t); 404 405 if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) { 406 /* Don't let a non-forced transaction take the last tblk */ 407 jfs_info("txBegin: waiting for free tid"); 408 INCREMENT(TxStat.txBegin_freetid); 409 TXN_SLEEP(&TxAnchor.freewait); 410 goto retry; 411 } 412 413 TxAnchor.freetid = tblk->next; 414 415 /* 416 * initialize transaction 417 */ 418 419 /* 420 * We can't zero the whole thing or we screw up another thread being 421 * awakened after sleeping on tblk->waitor 422 * 423 * memset(tblk, 0, sizeof(struct tblock)); 424 */ 425 tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0; 426 427 tblk->sb = sb; 428 ++log->logtid; 429 tblk->logtid = log->logtid; 430 431 ++log->active; 432 433 HIGHWATERMARK(stattx.maxtid, t); /* statistics */ 434 INCREMENT(stattx.ntid); /* statistics */ 435 436 TXN_UNLOCK(); 437 438 jfs_info("txBegin: returning tid = %d", t); 439 440 return t; 441 } 442 443 /* 444 * NAME: txBeginAnon() 445 * 446 * FUNCTION: start an anonymous transaction. 447 * Blocks if logsync or available tlocks are low to prevent 448 * anonymous tlocks from depleting supply. 449 * 450 * PARAMETER: sb - superblock 451 * 452 * RETURN: none 453 */ 454 void txBeginAnon(struct super_block *sb) 455 { 456 struct jfs_log *log; 457 458 log = JFS_SBI(sb)->log; 459 460 TXN_LOCK(); 461 INCREMENT(TxStat.txBeginAnon); 462 463 retry: 464 /* 465 * synchronize with logsync barrier 466 */ 467 if (test_bit(log_SYNCBARRIER, &log->flag) || 468 test_bit(log_QUIESCE, &log->flag)) { 469 INCREMENT(TxStat.txBeginAnon_barrier); 470 TXN_SLEEP(&log->syncwait); 471 goto retry; 472 } 473 474 /* 475 * Don't begin transaction if we're getting starved for tlocks 476 */ 477 if (TxAnchor.tlocksInUse > TxLockVHWM) { 478 INCREMENT(TxStat.txBeginAnon_lockslow); 479 TXN_SLEEP(&TxAnchor.lowlockwait); 480 goto retry; 481 } 482 TXN_UNLOCK(); 483 } 484 485 /* 486 * txEnd() 487 * 488 * function: free specified transaction block. 489 * 490 * logsync barrier processing: 491 * 492 * serialization: 493 */ 494 void txEnd(tid_t tid) 495 { 496 struct tblock *tblk = tid_to_tblock(tid); 497 struct jfs_log *log; 498 499 jfs_info("txEnd: tid = %d", tid); 500 TXN_LOCK(); 501 502 /* 503 * wakeup transactions waiting on the page locked 504 * by the current transaction 505 */ 506 TXN_WAKEUP(&tblk->waitor); 507 508 log = JFS_SBI(tblk->sb)->log; 509 510 /* 511 * Lazy commit thread can't free this guy until we mark it UNLOCKED, 512 * otherwise, we would be left with a transaction that may have been 513 * reused. 514 * 515 * Lazy commit thread will turn off tblkGC_LAZY before calling this 516 * routine. 517 */ 518 if (tblk->flag & tblkGC_LAZY) { 519 jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk); 520 TXN_UNLOCK(); 521 522 spin_lock_irq(&log->gclock); // LOGGC_LOCK 523 tblk->flag |= tblkGC_UNLOCKED; 524 spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK 525 return; 526 } 527 528 jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk); 529 530 assert(tblk->next == 0); 531 532 /* 533 * insert tblock back on freelist 534 */ 535 tblk->next = TxAnchor.freetid; 536 TxAnchor.freetid = tid; 537 538 /* 539 * mark the tblock not active 540 */ 541 if (--log->active == 0) { 542 clear_bit(log_FLUSH, &log->flag); 543 544 /* 545 * synchronize with logsync barrier 546 */ 547 if (test_bit(log_SYNCBARRIER, &log->flag)) { 548 TXN_UNLOCK(); 549 550 /* write dirty metadata & forward log syncpt */ 551 jfs_syncpt(log, 1); 552 553 jfs_info("log barrier off: 0x%x", log->lsn); 554 555 /* enable new transactions start */ 556 clear_bit(log_SYNCBARRIER, &log->flag); 557 558 /* wakeup all waitors for logsync barrier */ 559 TXN_WAKEUP(&log->syncwait); 560 561 goto wakeup; 562 } 563 } 564 565 TXN_UNLOCK(); 566 wakeup: 567 /* 568 * wakeup all waitors for a free tblock 569 */ 570 TXN_WAKEUP(&TxAnchor.freewait); 571 } 572 573 /* 574 * txLock() 575 * 576 * function: acquire a transaction lock on the specified <mp> 577 * 578 * parameter: 579 * 580 * return: transaction lock id 581 * 582 * serialization: 583 */ 584 struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, 585 int type) 586 { 587 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 588 int dir_xtree = 0; 589 lid_t lid; 590 tid_t xtid; 591 struct tlock *tlck; 592 struct xtlock *xtlck; 593 struct linelock *linelock; 594 xtpage_t *p; 595 struct tblock *tblk; 596 597 TXN_LOCK(); 598 599 if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) && 600 !(mp->xflag & COMMIT_PAGE)) { 601 /* 602 * Directory inode is special. It can have both an xtree tlock 603 * and a dtree tlock associated with it. 604 */ 605 dir_xtree = 1; 606 lid = jfs_ip->xtlid; 607 } else 608 lid = mp->lid; 609 610 /* is page not locked by a transaction ? */ 611 if (lid == 0) 612 goto allocateLock; 613 614 jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid); 615 616 /* is page locked by the requester transaction ? */ 617 tlck = lid_to_tlock(lid); 618 if ((xtid = tlck->tid) == tid) { 619 TXN_UNLOCK(); 620 goto grantLock; 621 } 622 623 /* 624 * is page locked by anonymous transaction/lock ? 625 * 626 * (page update without transaction (i.e., file write) is 627 * locked under anonymous transaction tid = 0: 628 * anonymous tlocks maintained on anonymous tlock list of 629 * the inode of the page and available to all anonymous 630 * transactions until txCommit() time at which point 631 * they are transferred to the transaction tlock list of 632 * the committing transaction of the inode) 633 */ 634 if (xtid == 0) { 635 tlck->tid = tid; 636 TXN_UNLOCK(); 637 tblk = tid_to_tblock(tid); 638 /* 639 * The order of the tlocks in the transaction is important 640 * (during truncate, child xtree pages must be freed before 641 * parent's tlocks change the working map). 642 * Take tlock off anonymous list and add to tail of 643 * transaction list 644 * 645 * Note: We really need to get rid of the tid & lid and 646 * use list_head's. This code is getting UGLY! 647 */ 648 if (jfs_ip->atlhead == lid) { 649 if (jfs_ip->atltail == lid) { 650 /* only anonymous txn. 651 * Remove from anon_list 652 */ 653 TXN_LOCK(); 654 list_del_init(&jfs_ip->anon_inode_list); 655 TXN_UNLOCK(); 656 } 657 jfs_ip->atlhead = tlck->next; 658 } else { 659 lid_t last; 660 for (last = jfs_ip->atlhead; 661 lid_to_tlock(last)->next != lid; 662 last = lid_to_tlock(last)->next) { 663 assert(last); 664 } 665 lid_to_tlock(last)->next = tlck->next; 666 if (jfs_ip->atltail == lid) 667 jfs_ip->atltail = last; 668 } 669 670 /* insert the tlock at tail of transaction tlock list */ 671 672 if (tblk->next) 673 lid_to_tlock(tblk->last)->next = lid; 674 else 675 tblk->next = lid; 676 tlck->next = 0; 677 tblk->last = lid; 678 679 goto grantLock; 680 } 681 682 goto waitLock; 683 684 /* 685 * allocate a tlock 686 */ 687 allocateLock: 688 lid = txLockAlloc(); 689 tlck = lid_to_tlock(lid); 690 691 /* 692 * initialize tlock 693 */ 694 tlck->tid = tid; 695 696 TXN_UNLOCK(); 697 698 /* mark tlock for meta-data page */ 699 if (mp->xflag & COMMIT_PAGE) { 700 701 tlck->flag = tlckPAGELOCK; 702 703 /* mark the page dirty and nohomeok */ 704 metapage_nohomeok(mp); 705 706 jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p", 707 mp, mp->nohomeok, tid, tlck); 708 709 /* if anonymous transaction, and buffer is on the group 710 * commit synclist, mark inode to show this. This will 711 * prevent the buffer from being marked nohomeok for too 712 * long a time. 713 */ 714 if ((tid == 0) && mp->lsn) 715 set_cflag(COMMIT_Synclist, ip); 716 } 717 /* mark tlock for in-memory inode */ 718 else 719 tlck->flag = tlckINODELOCK; 720 721 if (S_ISDIR(ip->i_mode)) 722 tlck->flag |= tlckDIRECTORY; 723 724 tlck->type = 0; 725 726 /* bind the tlock and the page */ 727 tlck->ip = ip; 728 tlck->mp = mp; 729 if (dir_xtree) 730 jfs_ip->xtlid = lid; 731 else 732 mp->lid = lid; 733 734 /* 735 * enqueue transaction lock to transaction/inode 736 */ 737 /* insert the tlock at tail of transaction tlock list */ 738 if (tid) { 739 tblk = tid_to_tblock(tid); 740 if (tblk->next) 741 lid_to_tlock(tblk->last)->next = lid; 742 else 743 tblk->next = lid; 744 tlck->next = 0; 745 tblk->last = lid; 746 } 747 /* anonymous transaction: 748 * insert the tlock at head of inode anonymous tlock list 749 */ 750 else { 751 tlck->next = jfs_ip->atlhead; 752 jfs_ip->atlhead = lid; 753 if (tlck->next == 0) { 754 /* This inode's first anonymous transaction */ 755 jfs_ip->atltail = lid; 756 TXN_LOCK(); 757 list_add_tail(&jfs_ip->anon_inode_list, 758 &TxAnchor.anon_list); 759 TXN_UNLOCK(); 760 } 761 } 762 763 /* initialize type dependent area for linelock */ 764 linelock = (struct linelock *) & tlck->lock; 765 linelock->next = 0; 766 linelock->flag = tlckLINELOCK; 767 linelock->maxcnt = TLOCKSHORT; 768 linelock->index = 0; 769 770 switch (type & tlckTYPE) { 771 case tlckDTREE: 772 linelock->l2linesize = L2DTSLOTSIZE; 773 break; 774 775 case tlckXTREE: 776 linelock->l2linesize = L2XTSLOTSIZE; 777 778 xtlck = (struct xtlock *) linelock; 779 xtlck->header.offset = 0; 780 xtlck->header.length = 2; 781 782 if (type & tlckNEW) { 783 xtlck->lwm.offset = XTENTRYSTART; 784 } else { 785 if (mp->xflag & COMMIT_PAGE) 786 p = (xtpage_t *) mp->data; 787 else 788 p = (xtpage_t *) &jfs_ip->i_xtroot; 789 xtlck->lwm.offset = 790 le16_to_cpu(p->header.nextindex); 791 } 792 xtlck->lwm.length = 0; /* ! */ 793 xtlck->twm.offset = 0; 794 xtlck->hwm.offset = 0; 795 796 xtlck->index = 2; 797 break; 798 799 case tlckINODE: 800 linelock->l2linesize = L2INODESLOTSIZE; 801 break; 802 803 case tlckDATA: 804 linelock->l2linesize = L2DATASLOTSIZE; 805 break; 806 807 default: 808 jfs_err("UFO tlock:0x%p", tlck); 809 } 810 811 /* 812 * update tlock vector 813 */ 814 grantLock: 815 tlck->type |= type; 816 817 return tlck; 818 819 /* 820 * page is being locked by another transaction: 821 */ 822 waitLock: 823 /* Only locks on ipimap or ipaimap should reach here */ 824 /* assert(jfs_ip->fileset == AGGREGATE_I); */ 825 if (jfs_ip->fileset != AGGREGATE_I) { 826 printk(KERN_ERR "txLock: trying to lock locked page!"); 827 print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4, 828 ip, sizeof(*ip), 0); 829 print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4, 830 mp, sizeof(*mp), 0); 831 print_hex_dump(KERN_ERR, "Locker's tblock: ", 832 DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid), 833 sizeof(struct tblock), 0); 834 print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4, 835 tlck, sizeof(*tlck), 0); 836 BUG(); 837 } 838 INCREMENT(stattx.waitlock); /* statistics */ 839 TXN_UNLOCK(); 840 release_metapage(mp); 841 TXN_LOCK(); 842 xtid = tlck->tid; /* reacquire after dropping TXN_LOCK */ 843 844 jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d", 845 tid, xtid, lid); 846 847 /* Recheck everything since dropping TXN_LOCK */ 848 if (xtid && (tlck->mp == mp) && (mp->lid == lid)) 849 TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); 850 else 851 TXN_UNLOCK(); 852 jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid); 853 854 return NULL; 855 } 856 857 /* 858 * NAME: txRelease() 859 * 860 * FUNCTION: Release buffers associated with transaction locks, but don't 861 * mark homeok yet. The allows other transactions to modify 862 * buffers, but won't let them go to disk until commit record 863 * actually gets written. 864 * 865 * PARAMETER: 866 * tblk - 867 * 868 * RETURN: Errors from subroutines. 869 */ 870 static void txRelease(struct tblock * tblk) 871 { 872 struct metapage *mp; 873 lid_t lid; 874 struct tlock *tlck; 875 876 TXN_LOCK(); 877 878 for (lid = tblk->next; lid; lid = tlck->next) { 879 tlck = lid_to_tlock(lid); 880 if ((mp = tlck->mp) != NULL && 881 (tlck->type & tlckBTROOT) == 0) { 882 assert(mp->xflag & COMMIT_PAGE); 883 mp->lid = 0; 884 } 885 } 886 887 /* 888 * wakeup transactions waiting on a page locked 889 * by the current transaction 890 */ 891 TXN_WAKEUP(&tblk->waitor); 892 893 TXN_UNLOCK(); 894 } 895 896 /* 897 * NAME: txUnlock() 898 * 899 * FUNCTION: Initiates pageout of pages modified by tid in journalled 900 * objects and frees their lockwords. 901 */ 902 static void txUnlock(struct tblock * tblk) 903 { 904 struct tlock *tlck; 905 struct linelock *linelock; 906 lid_t lid, next, llid, k; 907 struct metapage *mp; 908 struct jfs_log *log; 909 int difft, diffp; 910 unsigned long flags; 911 912 jfs_info("txUnlock: tblk = 0x%p", tblk); 913 log = JFS_SBI(tblk->sb)->log; 914 915 /* 916 * mark page under tlock homeok (its log has been written): 917 */ 918 for (lid = tblk->next; lid; lid = next) { 919 tlck = lid_to_tlock(lid); 920 next = tlck->next; 921 922 jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck); 923 924 /* unbind page from tlock */ 925 if ((mp = tlck->mp) != NULL && 926 (tlck->type & tlckBTROOT) == 0) { 927 assert(mp->xflag & COMMIT_PAGE); 928 929 /* hold buffer 930 */ 931 hold_metapage(mp); 932 933 assert(mp->nohomeok > 0); 934 _metapage_homeok(mp); 935 936 /* inherit younger/larger clsn */ 937 LOGSYNC_LOCK(log, flags); 938 if (mp->clsn) { 939 logdiff(difft, tblk->clsn, log); 940 logdiff(diffp, mp->clsn, log); 941 if (difft > diffp) 942 mp->clsn = tblk->clsn; 943 } else 944 mp->clsn = tblk->clsn; 945 LOGSYNC_UNLOCK(log, flags); 946 947 assert(!(tlck->flag & tlckFREEPAGE)); 948 949 put_metapage(mp); 950 } 951 952 /* insert tlock, and linelock(s) of the tlock if any, 953 * at head of freelist 954 */ 955 TXN_LOCK(); 956 957 llid = ((struct linelock *) & tlck->lock)->next; 958 while (llid) { 959 linelock = (struct linelock *) lid_to_tlock(llid); 960 k = linelock->next; 961 txLockFree(llid); 962 llid = k; 963 } 964 txLockFree(lid); 965 966 TXN_UNLOCK(); 967 } 968 tblk->next = tblk->last = 0; 969 970 /* 971 * remove tblock from logsynclist 972 * (allocation map pages inherited lsn of tblk and 973 * has been inserted in logsync list at txUpdateMap()) 974 */ 975 if (tblk->lsn) { 976 LOGSYNC_LOCK(log, flags); 977 log->count--; 978 list_del_init(&tblk->synclist); 979 LOGSYNC_UNLOCK(log, flags); 980 } 981 } 982 983 /* 984 * txMaplock() 985 * 986 * function: allocate a transaction lock for freed page/entry; 987 * for freed page, maplock is used as xtlock/dtlock type; 988 */ 989 struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) 990 { 991 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 992 lid_t lid; 993 struct tblock *tblk; 994 struct tlock *tlck; 995 struct maplock *maplock; 996 997 TXN_LOCK(); 998 999 /* 1000 * allocate a tlock 1001 */ 1002 lid = txLockAlloc(); 1003 tlck = lid_to_tlock(lid); 1004 1005 /* 1006 * initialize tlock 1007 */ 1008 tlck->tid = tid; 1009 1010 /* bind the tlock and the object */ 1011 tlck->flag = tlckINODELOCK; 1012 if (S_ISDIR(ip->i_mode)) 1013 tlck->flag |= tlckDIRECTORY; 1014 tlck->ip = ip; 1015 tlck->mp = NULL; 1016 1017 tlck->type = type; 1018 1019 /* 1020 * enqueue transaction lock to transaction/inode 1021 */ 1022 /* insert the tlock at tail of transaction tlock list */ 1023 if (tid) { 1024 tblk = tid_to_tblock(tid); 1025 if (tblk->next) 1026 lid_to_tlock(tblk->last)->next = lid; 1027 else 1028 tblk->next = lid; 1029 tlck->next = 0; 1030 tblk->last = lid; 1031 } 1032 /* anonymous transaction: 1033 * insert the tlock at head of inode anonymous tlock list 1034 */ 1035 else { 1036 tlck->next = jfs_ip->atlhead; 1037 jfs_ip->atlhead = lid; 1038 if (tlck->next == 0) { 1039 /* This inode's first anonymous transaction */ 1040 jfs_ip->atltail = lid; 1041 list_add_tail(&jfs_ip->anon_inode_list, 1042 &TxAnchor.anon_list); 1043 } 1044 } 1045 1046 TXN_UNLOCK(); 1047 1048 /* initialize type dependent area for maplock */ 1049 maplock = (struct maplock *) & tlck->lock; 1050 maplock->next = 0; 1051 maplock->maxcnt = 0; 1052 maplock->index = 0; 1053 1054 return tlck; 1055 } 1056 1057 /* 1058 * txLinelock() 1059 * 1060 * function: allocate a transaction lock for log vector list 1061 */ 1062 struct linelock *txLinelock(struct linelock * tlock) 1063 { 1064 lid_t lid; 1065 struct tlock *tlck; 1066 struct linelock *linelock; 1067 1068 TXN_LOCK(); 1069 1070 /* allocate a TxLock structure */ 1071 lid = txLockAlloc(); 1072 tlck = lid_to_tlock(lid); 1073 1074 TXN_UNLOCK(); 1075 1076 /* initialize linelock */ 1077 linelock = (struct linelock *) tlck; 1078 linelock->next = 0; 1079 linelock->flag = tlckLINELOCK; 1080 linelock->maxcnt = TLOCKLONG; 1081 linelock->index = 0; 1082 if (tlck->flag & tlckDIRECTORY) 1083 linelock->flag |= tlckDIRECTORY; 1084 1085 /* append linelock after tlock */ 1086 linelock->next = tlock->next; 1087 tlock->next = lid; 1088 1089 return linelock; 1090 } 1091 1092 /* 1093 * transaction commit management 1094 * ----------------------------- 1095 */ 1096 1097 /* 1098 * NAME: txCommit() 1099 * 1100 * FUNCTION: commit the changes to the objects specified in 1101 * clist. For journalled segments only the 1102 * changes of the caller are committed, ie by tid. 1103 * for non-journalled segments the data are flushed to 1104 * disk and then the change to the disk inode and indirect 1105 * blocks committed (so blocks newly allocated to the 1106 * segment will be made a part of the segment atomically). 1107 * 1108 * all of the segments specified in clist must be in 1109 * one file system. no more than 6 segments are needed 1110 * to handle all unix svcs. 1111 * 1112 * if the i_nlink field (i.e. disk inode link count) 1113 * is zero, and the type of inode is a regular file or 1114 * directory, or symbolic link , the inode is truncated 1115 * to zero length. the truncation is committed but the 1116 * VM resources are unaffected until it is closed (see 1117 * iput and iclose). 1118 * 1119 * PARAMETER: 1120 * 1121 * RETURN: 1122 * 1123 * serialization: 1124 * on entry the inode lock on each segment is assumed 1125 * to be held. 1126 * 1127 * i/o error: 1128 */ 1129 int txCommit(tid_t tid, /* transaction identifier */ 1130 int nip, /* number of inodes to commit */ 1131 struct inode **iplist, /* list of inode to commit */ 1132 int flag) 1133 { 1134 int rc = 0; 1135 struct commit cd; 1136 struct jfs_log *log; 1137 struct tblock *tblk; 1138 struct lrd *lrd; 1139 struct inode *ip; 1140 struct jfs_inode_info *jfs_ip; 1141 int k, n; 1142 ino_t top; 1143 struct super_block *sb; 1144 1145 jfs_info("txCommit, tid = %d, flag = %d", tid, flag); 1146 /* is read-only file system ? */ 1147 if (isReadOnly(iplist[0])) { 1148 rc = -EROFS; 1149 goto TheEnd; 1150 } 1151 1152 sb = cd.sb = iplist[0]->i_sb; 1153 cd.tid = tid; 1154 1155 if (tid == 0) 1156 tid = txBegin(sb, 0); 1157 tblk = tid_to_tblock(tid); 1158 1159 /* 1160 * initialize commit structure 1161 */ 1162 log = JFS_SBI(sb)->log; 1163 cd.log = log; 1164 1165 /* initialize log record descriptor in commit */ 1166 lrd = &cd.lrd; 1167 lrd->logtid = cpu_to_le32(tblk->logtid); 1168 lrd->backchain = 0; 1169 1170 tblk->xflag |= flag; 1171 1172 if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0) 1173 tblk->xflag |= COMMIT_LAZY; 1174 /* 1175 * prepare non-journaled objects for commit 1176 * 1177 * flush data pages of non-journaled file 1178 * to prevent the file getting non-initialized disk blocks 1179 * in case of crash. 1180 * (new blocks - ) 1181 */ 1182 cd.iplist = iplist; 1183 cd.nip = nip; 1184 1185 /* 1186 * acquire transaction lock on (on-disk) inodes 1187 * 1188 * update on-disk inode from in-memory inode 1189 * acquiring transaction locks for AFTER records 1190 * on the on-disk inode of file object 1191 * 1192 * sort the inodes array by inode number in descending order 1193 * to prevent deadlock when acquiring transaction lock 1194 * of on-disk inodes on multiple on-disk inode pages by 1195 * multiple concurrent transactions 1196 */ 1197 for (k = 0; k < cd.nip; k++) { 1198 top = (cd.iplist[k])->i_ino; 1199 for (n = k + 1; n < cd.nip; n++) { 1200 ip = cd.iplist[n]; 1201 if (ip->i_ino > top) { 1202 top = ip->i_ino; 1203 cd.iplist[n] = cd.iplist[k]; 1204 cd.iplist[k] = ip; 1205 } 1206 } 1207 1208 ip = cd.iplist[k]; 1209 jfs_ip = JFS_IP(ip); 1210 1211 /* 1212 * BUGBUG - This code has temporarily been removed. The 1213 * intent is to ensure that any file data is written before 1214 * the metadata is committed to the journal. This prevents 1215 * uninitialized data from appearing in a file after the 1216 * journal has been replayed. (The uninitialized data 1217 * could be sensitive data removed by another user.) 1218 * 1219 * The problem now is that we are holding the IWRITELOCK 1220 * on the inode, and calling filemap_fdatawrite on an 1221 * unmapped page will cause a deadlock in jfs_get_block. 1222 * 1223 * The long term solution is to pare down the use of 1224 * IWRITELOCK. We are currently holding it too long. 1225 * We could also be smarter about which data pages need 1226 * to be written before the transaction is committed and 1227 * when we don't need to worry about it at all. 1228 * 1229 * if ((!S_ISDIR(ip->i_mode)) 1230 * && (tblk->flag & COMMIT_DELETE) == 0) 1231 * filemap_write_and_wait(ip->i_mapping); 1232 */ 1233 1234 /* 1235 * Mark inode as not dirty. It will still be on the dirty 1236 * inode list, but we'll know not to commit it again unless 1237 * it gets marked dirty again 1238 */ 1239 clear_cflag(COMMIT_Dirty, ip); 1240 1241 /* inherit anonymous tlock(s) of inode */ 1242 if (jfs_ip->atlhead) { 1243 lid_to_tlock(jfs_ip->atltail)->next = tblk->next; 1244 tblk->next = jfs_ip->atlhead; 1245 if (!tblk->last) 1246 tblk->last = jfs_ip->atltail; 1247 jfs_ip->atlhead = jfs_ip->atltail = 0; 1248 TXN_LOCK(); 1249 list_del_init(&jfs_ip->anon_inode_list); 1250 TXN_UNLOCK(); 1251 } 1252 1253 /* 1254 * acquire transaction lock on on-disk inode page 1255 * (become first tlock of the tblk's tlock list) 1256 */ 1257 if (((rc = diWrite(tid, ip)))) 1258 goto out; 1259 } 1260 1261 /* 1262 * write log records from transaction locks 1263 * 1264 * txUpdateMap() resets XAD_NEW in XAD. 1265 */ 1266 txLog(log, tblk, &cd); 1267 1268 /* 1269 * Ensure that inode isn't reused before 1270 * lazy commit thread finishes processing 1271 */ 1272 if (tblk->xflag & COMMIT_DELETE) { 1273 ihold(tblk->u.ip); 1274 /* 1275 * Avoid a rare deadlock 1276 * 1277 * If the inode is locked, we may be blocked in 1278 * jfs_commit_inode. If so, we don't want the 1279 * lazy_commit thread doing the last iput() on the inode 1280 * since that may block on the locked inode. Instead, 1281 * commit the transaction synchronously, so the last iput 1282 * will be done by the calling thread (or later) 1283 */ 1284 /* 1285 * I believe this code is no longer needed. Splitting I_LOCK 1286 * into two bits, I_NEW and I_SYNC should prevent this 1287 * deadlock as well. But since I don't have a JFS testload 1288 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done. 1289 * Joern 1290 */ 1291 if (inode_state_read_once(tblk->u.ip) & I_SYNC) 1292 tblk->xflag &= ~COMMIT_LAZY; 1293 } 1294 1295 ASSERT((!(tblk->xflag & COMMIT_DELETE)) || 1296 ((tblk->u.ip->i_nlink == 0) && 1297 !test_cflag(COMMIT_Nolink, tblk->u.ip))); 1298 1299 /* 1300 * write COMMIT log record 1301 */ 1302 lrd->type = cpu_to_le16(LOG_COMMIT); 1303 lrd->length = 0; 1304 lmLog(log, tblk, lrd, NULL); 1305 1306 lmGroupCommit(log, tblk); 1307 1308 /* 1309 * - transaction is now committed - 1310 */ 1311 1312 /* 1313 * force pages in careful update 1314 * (imap addressing structure update) 1315 */ 1316 if (flag & COMMIT_FORCE) 1317 txForce(tblk); 1318 1319 /* 1320 * update allocation map. 1321 * 1322 * update inode allocation map and inode: 1323 * free pager lock on memory object of inode if any. 1324 * update block allocation map. 1325 * 1326 * txUpdateMap() resets XAD_NEW in XAD. 1327 */ 1328 if (tblk->xflag & COMMIT_FORCE) 1329 txUpdateMap(tblk); 1330 1331 /* 1332 * free transaction locks and pageout/free pages 1333 */ 1334 txRelease(tblk); 1335 1336 if ((tblk->flag & tblkGC_LAZY) == 0) 1337 txUnlock(tblk); 1338 1339 1340 /* 1341 * reset in-memory object state 1342 */ 1343 for (k = 0; k < cd.nip; k++) { 1344 ip = cd.iplist[k]; 1345 jfs_ip = JFS_IP(ip); 1346 1347 /* 1348 * reset in-memory inode state 1349 */ 1350 jfs_ip->bxflag = 0; 1351 jfs_ip->blid = 0; 1352 } 1353 1354 out: 1355 if (rc != 0) 1356 txAbort(tid, 1); 1357 1358 TheEnd: 1359 jfs_info("txCommit: tid = %d, returning %d", tid, rc); 1360 return rc; 1361 } 1362 1363 /* 1364 * NAME: txLog() 1365 * 1366 * FUNCTION: Writes AFTER log records for all lines modified 1367 * by tid for segments specified by inodes in comdata. 1368 * Code assumes only WRITELOCKS are recorded in lockwords. 1369 * 1370 * PARAMETERS: 1371 * 1372 * RETURN : 1373 */ 1374 static void txLog(struct jfs_log *log, struct tblock *tblk, struct commit *cd) 1375 { 1376 struct inode *ip; 1377 lid_t lid; 1378 struct tlock *tlck; 1379 struct lrd *lrd = &cd->lrd; 1380 1381 /* 1382 * write log record(s) for each tlock of transaction, 1383 */ 1384 for (lid = tblk->next; lid; lid = tlck->next) { 1385 tlck = lid_to_tlock(lid); 1386 1387 tlck->flag |= tlckLOG; 1388 1389 /* initialize lrd common */ 1390 ip = tlck->ip; 1391 lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate); 1392 lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset); 1393 lrd->log.redopage.inode = cpu_to_le32(ip->i_ino); 1394 1395 /* write log record of page from the tlock */ 1396 switch (tlck->type & tlckTYPE) { 1397 case tlckXTREE: 1398 xtLog(log, tblk, lrd, tlck); 1399 break; 1400 1401 case tlckDTREE: 1402 dtLog(log, tblk, lrd, tlck); 1403 break; 1404 1405 case tlckINODE: 1406 diLog(log, tblk, lrd, tlck, cd); 1407 break; 1408 1409 case tlckMAP: 1410 mapLog(log, tblk, lrd, tlck); 1411 break; 1412 1413 case tlckDATA: 1414 dataLog(log, tblk, lrd, tlck); 1415 break; 1416 1417 default: 1418 jfs_err("UFO tlock:0x%p", tlck); 1419 } 1420 } 1421 1422 return; 1423 } 1424 1425 /* 1426 * diLog() 1427 * 1428 * function: log inode tlock and format maplock to update bmap; 1429 */ 1430 static void diLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd, 1431 struct tlock *tlck, struct commit *cd) 1432 { 1433 struct metapage *mp; 1434 pxd_t *pxd; 1435 struct pxd_lock *pxdlock; 1436 1437 mp = tlck->mp; 1438 1439 /* initialize as REDOPAGE record format */ 1440 lrd->log.redopage.type = cpu_to_le16(LOG_INODE); 1441 lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE); 1442 1443 pxd = &lrd->log.redopage.pxd; 1444 1445 /* 1446 * inode after image 1447 */ 1448 if (tlck->type & tlckENTRY) { 1449 /* log after-image for logredo(): */ 1450 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1451 PXDaddress(pxd, mp->index); 1452 PXDlength(pxd, 1453 mp->logical_size >> tblk->sb->s_blocksize_bits); 1454 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1455 1456 /* mark page as homeward bound */ 1457 tlck->flag |= tlckWRITEPAGE; 1458 } else if (tlck->type & tlckFREE) { 1459 /* 1460 * free inode extent 1461 * 1462 * (pages of the freed inode extent have been invalidated and 1463 * a maplock for free of the extent has been formatted at 1464 * txLock() time); 1465 * 1466 * the tlock had been acquired on the inode allocation map page 1467 * (iag) that specifies the freed extent, even though the map 1468 * page is not itself logged, to prevent pageout of the map 1469 * page before the log; 1470 */ 1471 1472 /* log LOG_NOREDOINOEXT of the freed inode extent for 1473 * logredo() to start NoRedoPage filters, and to update 1474 * imap and bmap for free of the extent; 1475 */ 1476 lrd->type = cpu_to_le16(LOG_NOREDOINOEXT); 1477 /* 1478 * For the LOG_NOREDOINOEXT record, we need 1479 * to pass the IAG number and inode extent 1480 * index (within that IAG) from which the 1481 * extent is being released. These have been 1482 * passed to us in the iplist[1] and iplist[2]. 1483 */ 1484 lrd->log.noredoinoext.iagnum = 1485 cpu_to_le32((u32) (size_t) cd->iplist[1]); 1486 lrd->log.noredoinoext.inoext_idx = 1487 cpu_to_le32((u32) (size_t) cd->iplist[2]); 1488 1489 pxdlock = (struct pxd_lock *) & tlck->lock; 1490 *pxd = pxdlock->pxd; 1491 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 1492 1493 /* update bmap */ 1494 tlck->flag |= tlckUPDATEMAP; 1495 1496 /* mark page as homeward bound */ 1497 tlck->flag |= tlckWRITEPAGE; 1498 } else 1499 jfs_err("diLog: UFO type tlck:0x%p", tlck); 1500 return; 1501 } 1502 1503 /* 1504 * dataLog() 1505 * 1506 * function: log data tlock 1507 */ 1508 static void dataLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd, 1509 struct tlock *tlck) 1510 { 1511 struct metapage *mp; 1512 pxd_t *pxd; 1513 1514 mp = tlck->mp; 1515 1516 /* initialize as REDOPAGE record format */ 1517 lrd->log.redopage.type = cpu_to_le16(LOG_DATA); 1518 lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE); 1519 1520 pxd = &lrd->log.redopage.pxd; 1521 1522 /* log after-image for logredo(): */ 1523 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1524 1525 if (jfs_dirtable_inline(tlck->ip)) { 1526 /* 1527 * The table has been truncated, we've must have deleted 1528 * the last entry, so don't bother logging this 1529 */ 1530 mp->lid = 0; 1531 grab_metapage(mp); 1532 metapage_homeok(mp); 1533 discard_metapage(mp); 1534 tlck->mp = NULL; 1535 return; 1536 } 1537 1538 PXDaddress(pxd, mp->index); 1539 PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); 1540 1541 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1542 1543 /* mark page as homeward bound */ 1544 tlck->flag |= tlckWRITEPAGE; 1545 1546 return; 1547 } 1548 1549 /* 1550 * dtLog() 1551 * 1552 * function: log dtree tlock and format maplock to update bmap; 1553 */ 1554 static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 1555 struct tlock * tlck) 1556 { 1557 struct metapage *mp; 1558 struct pxd_lock *pxdlock; 1559 pxd_t *pxd; 1560 1561 mp = tlck->mp; 1562 1563 /* initialize as REDOPAGE/NOREDOPAGE record format */ 1564 lrd->log.redopage.type = cpu_to_le16(LOG_DTREE); 1565 lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE); 1566 1567 pxd = &lrd->log.redopage.pxd; 1568 1569 if (tlck->type & tlckBTROOT) 1570 lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); 1571 1572 /* 1573 * page extension via relocation: entry insertion; 1574 * page extension in-place: entry insertion; 1575 * new right page from page split, reinitialized in-line 1576 * root from root page split: entry insertion; 1577 */ 1578 if (tlck->type & (tlckNEW | tlckEXTEND)) { 1579 /* log after-image of the new page for logredo(): 1580 * mark log (LOG_NEW) for logredo() to initialize 1581 * freelist and update bmap for alloc of the new page; 1582 */ 1583 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1584 if (tlck->type & tlckEXTEND) 1585 lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND); 1586 else 1587 lrd->log.redopage.type |= cpu_to_le16(LOG_NEW); 1588 PXDaddress(pxd, mp->index); 1589 PXDlength(pxd, 1590 mp->logical_size >> tblk->sb->s_blocksize_bits); 1591 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1592 1593 /* format a maplock for txUpdateMap() to update bPMAP for 1594 * alloc of the new page; 1595 */ 1596 if (tlck->type & tlckBTROOT) 1597 return; 1598 tlck->flag |= tlckUPDATEMAP; 1599 pxdlock = (struct pxd_lock *) & tlck->lock; 1600 pxdlock->flag = mlckALLOCPXD; 1601 pxdlock->pxd = *pxd; 1602 1603 pxdlock->index = 1; 1604 1605 /* mark page as homeward bound */ 1606 tlck->flag |= tlckWRITEPAGE; 1607 return; 1608 } 1609 1610 /* 1611 * entry insertion/deletion, 1612 * sibling page link update (old right page before split); 1613 */ 1614 if (tlck->type & (tlckENTRY | tlckRELINK)) { 1615 /* log after-image for logredo(): */ 1616 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1617 PXDaddress(pxd, mp->index); 1618 PXDlength(pxd, 1619 mp->logical_size >> tblk->sb->s_blocksize_bits); 1620 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1621 1622 /* mark page as homeward bound */ 1623 tlck->flag |= tlckWRITEPAGE; 1624 return; 1625 } 1626 1627 /* 1628 * page deletion: page has been invalidated 1629 * page relocation: source extent 1630 * 1631 * a maplock for free of the page has been formatted 1632 * at txLock() time); 1633 */ 1634 if (tlck->type & (tlckFREE | tlckRELOCATE)) { 1635 /* log LOG_NOREDOPAGE of the deleted page for logredo() 1636 * to start NoRedoPage filter and to update bmap for free 1637 * of the deletd page 1638 */ 1639 lrd->type = cpu_to_le16(LOG_NOREDOPAGE); 1640 pxdlock = (struct pxd_lock *) & tlck->lock; 1641 *pxd = pxdlock->pxd; 1642 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 1643 1644 /* a maplock for txUpdateMap() for free of the page 1645 * has been formatted at txLock() time; 1646 */ 1647 tlck->flag |= tlckUPDATEMAP; 1648 } 1649 return; 1650 } 1651 1652 /* 1653 * xtLog() 1654 * 1655 * function: log xtree tlock and format maplock to update bmap; 1656 */ 1657 static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 1658 struct tlock * tlck) 1659 { 1660 struct inode *ip; 1661 struct metapage *mp; 1662 xtpage_t *p; 1663 struct xtlock *xtlck; 1664 struct maplock *maplock; 1665 struct xdlistlock *xadlock; 1666 struct pxd_lock *pxdlock; 1667 pxd_t *page_pxd; 1668 int next, lwm, hwm; 1669 1670 ip = tlck->ip; 1671 mp = tlck->mp; 1672 1673 /* initialize as REDOPAGE/NOREDOPAGE record format */ 1674 lrd->log.redopage.type = cpu_to_le16(LOG_XTREE); 1675 lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE); 1676 1677 page_pxd = &lrd->log.redopage.pxd; 1678 1679 if (tlck->type & tlckBTROOT) { 1680 lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); 1681 p = (xtpage_t *) &JFS_IP(ip)->i_xtroot; 1682 if (S_ISDIR(ip->i_mode)) 1683 lrd->log.redopage.type |= 1684 cpu_to_le16(LOG_DIR_XTREE); 1685 } else 1686 p = (xtpage_t *) mp->data; 1687 next = le16_to_cpu(p->header.nextindex); 1688 1689 xtlck = (struct xtlock *) & tlck->lock; 1690 1691 maplock = (struct maplock *) & tlck->lock; 1692 xadlock = (struct xdlistlock *) maplock; 1693 1694 /* 1695 * entry insertion/extension; 1696 * sibling page link update (old right page before split); 1697 */ 1698 if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) { 1699 /* log after-image for logredo(): 1700 * logredo() will update bmap for alloc of new/extended 1701 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from 1702 * after-image of XADlist; 1703 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when 1704 * applying the after-image to the meta-data page. 1705 */ 1706 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1707 PXDaddress(page_pxd, mp->index); 1708 PXDlength(page_pxd, 1709 mp->logical_size >> tblk->sb->s_blocksize_bits); 1710 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1711 1712 /* format a maplock for txUpdateMap() to update bPMAP 1713 * for alloc of new/extended extents of XAD[lwm:next) 1714 * from the page itself; 1715 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. 1716 */ 1717 lwm = xtlck->lwm.offset; 1718 if (lwm == 0) 1719 lwm = XTPAGEMAXSLOT; 1720 1721 if (lwm == next) 1722 goto out; 1723 if (lwm > next) { 1724 jfs_err("xtLog: lwm > next"); 1725 goto out; 1726 } 1727 tlck->flag |= tlckUPDATEMAP; 1728 xadlock->flag = mlckALLOCXADLIST; 1729 xadlock->count = next - lwm; 1730 if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { 1731 int i; 1732 pxd_t *pxd; 1733 /* 1734 * Lazy commit may allow xtree to be modified before 1735 * txUpdateMap runs. Copy xad into linelock to 1736 * preserve correct data. 1737 * 1738 * We can fit twice as may pxd's as xads in the lock 1739 */ 1740 xadlock->flag = mlckALLOCPXDLIST; 1741 pxd = xadlock->xdlist = &xtlck->pxdlock; 1742 for (i = 0; i < xadlock->count; i++) { 1743 PXDaddress(pxd, addressXAD(&p->xad[lwm + i])); 1744 PXDlength(pxd, lengthXAD(&p->xad[lwm + i])); 1745 p->xad[lwm + i].flag &= 1746 ~(XAD_NEW | XAD_EXTENDED); 1747 pxd++; 1748 } 1749 } else { 1750 /* 1751 * xdlist will point to into inode's xtree, ensure 1752 * that transaction is not committed lazily. 1753 */ 1754 xadlock->flag = mlckALLOCXADLIST; 1755 xadlock->xdlist = &p->xad[lwm]; 1756 tblk->xflag &= ~COMMIT_LAZY; 1757 } 1758 jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d count:%d", 1759 tlck->ip, mp, tlck, lwm, xadlock->count); 1760 1761 maplock->index = 1; 1762 1763 out: 1764 /* mark page as homeward bound */ 1765 tlck->flag |= tlckWRITEPAGE; 1766 1767 return; 1768 } 1769 1770 /* 1771 * page deletion: file deletion/truncation (ref. xtTruncate()) 1772 * 1773 * (page will be invalidated after log is written and bmap 1774 * is updated from the page); 1775 */ 1776 if (tlck->type & tlckFREE) { 1777 /* LOG_NOREDOPAGE log for NoRedoPage filter: 1778 * if page free from file delete, NoRedoFile filter from 1779 * inode image of zero link count will subsume NoRedoPage 1780 * filters for each page; 1781 * if page free from file truncattion, write NoRedoPage 1782 * filter; 1783 * 1784 * upadte of block allocation map for the page itself: 1785 * if page free from deletion and truncation, LOG_UPDATEMAP 1786 * log for the page itself is generated from processing 1787 * its parent page xad entries; 1788 */ 1789 /* if page free from file truncation, log LOG_NOREDOPAGE 1790 * of the deleted page for logredo() to start NoRedoPage 1791 * filter for the page; 1792 */ 1793 if (tblk->xflag & COMMIT_TRUNCATE) { 1794 /* write NOREDOPAGE for the page */ 1795 lrd->type = cpu_to_le16(LOG_NOREDOPAGE); 1796 PXDaddress(page_pxd, mp->index); 1797 PXDlength(page_pxd, 1798 mp->logical_size >> tblk->sb-> 1799 s_blocksize_bits); 1800 lrd->backchain = 1801 cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 1802 1803 if (tlck->type & tlckBTROOT) { 1804 /* Empty xtree must be logged */ 1805 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1806 lrd->backchain = 1807 cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1808 } 1809 } 1810 1811 /* init LOG_UPDATEMAP of the freed extents 1812 * XAD[XTENTRYSTART:hwm) from the deleted page itself 1813 * for logredo() to update bmap; 1814 */ 1815 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 1816 lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST); 1817 xtlck = (struct xtlock *) & tlck->lock; 1818 hwm = xtlck->hwm.offset; 1819 lrd->log.updatemap.nxd = 1820 cpu_to_le16(hwm - XTENTRYSTART + 1); 1821 /* reformat linelock for lmLog() */ 1822 xtlck->header.offset = XTENTRYSTART; 1823 xtlck->header.length = hwm - XTENTRYSTART + 1; 1824 xtlck->index = 1; 1825 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1826 1827 /* format a maplock for txUpdateMap() to update bmap 1828 * to free extents of XAD[XTENTRYSTART:hwm) from the 1829 * deleted page itself; 1830 */ 1831 tlck->flag |= tlckUPDATEMAP; 1832 xadlock->count = hwm - XTENTRYSTART + 1; 1833 if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { 1834 int i; 1835 pxd_t *pxd; 1836 /* 1837 * Lazy commit may allow xtree to be modified before 1838 * txUpdateMap runs. Copy xad into linelock to 1839 * preserve correct data. 1840 * 1841 * We can fit twice as may pxd's as xads in the lock 1842 */ 1843 xadlock->flag = mlckFREEPXDLIST; 1844 pxd = xadlock->xdlist = &xtlck->pxdlock; 1845 for (i = 0; i < xadlock->count; i++) { 1846 PXDaddress(pxd, 1847 addressXAD(&p->xad[XTENTRYSTART + i])); 1848 PXDlength(pxd, 1849 lengthXAD(&p->xad[XTENTRYSTART + i])); 1850 pxd++; 1851 } 1852 } else { 1853 /* 1854 * xdlist will point to into inode's xtree, ensure 1855 * that transaction is not committed lazily. 1856 */ 1857 xadlock->flag = mlckFREEXADLIST; 1858 xadlock->xdlist = &p->xad[XTENTRYSTART]; 1859 tblk->xflag &= ~COMMIT_LAZY; 1860 } 1861 jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2", 1862 tlck->ip, mp, xadlock->count); 1863 1864 maplock->index = 1; 1865 1866 /* mark page as invalid */ 1867 if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode)) 1868 && !(tlck->type & tlckBTROOT)) 1869 tlck->flag |= tlckFREEPAGE; 1870 /* 1871 else (tblk->xflag & COMMIT_PMAP) 1872 ? release the page; 1873 */ 1874 return; 1875 } 1876 1877 /* 1878 * page/entry truncation: file truncation (ref. xtTruncate()) 1879 * 1880 * |----------+------+------+---------------| 1881 * | | | 1882 * | | hwm - hwm before truncation 1883 * | next - truncation point 1884 * lwm - lwm before truncation 1885 * header ? 1886 */ 1887 if (tlck->type & tlckTRUNCATE) { 1888 pxd_t pxd; /* truncated extent of xad */ 1889 int twm; 1890 1891 /* 1892 * For truncation the entire linelock may be used, so it would 1893 * be difficult to store xad list in linelock itself. 1894 * Therefore, we'll just force transaction to be committed 1895 * synchronously, so that xtree pages won't be changed before 1896 * txUpdateMap runs. 1897 */ 1898 tblk->xflag &= ~COMMIT_LAZY; 1899 lwm = xtlck->lwm.offset; 1900 if (lwm == 0) 1901 lwm = XTPAGEMAXSLOT; 1902 hwm = xtlck->hwm.offset; 1903 twm = xtlck->twm.offset; 1904 1905 /* 1906 * write log records 1907 */ 1908 /* log after-image for logredo(): 1909 * 1910 * logredo() will update bmap for alloc of new/extended 1911 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from 1912 * after-image of XADlist; 1913 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when 1914 * applying the after-image to the meta-data page. 1915 */ 1916 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1917 PXDaddress(page_pxd, mp->index); 1918 PXDlength(page_pxd, 1919 mp->logical_size >> tblk->sb->s_blocksize_bits); 1920 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1921 1922 /* 1923 * truncate entry XAD[twm == next - 1]: 1924 */ 1925 if (twm == next - 1) { 1926 /* init LOG_UPDATEMAP for logredo() to update bmap for 1927 * free of truncated delta extent of the truncated 1928 * entry XAD[next - 1]: 1929 * (xtlck->pxdlock = truncated delta extent); 1930 */ 1931 pxdlock = (struct pxd_lock *) & xtlck->pxdlock; 1932 /* assert(pxdlock->type & tlckTRUNCATE); */ 1933 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 1934 lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); 1935 lrd->log.updatemap.nxd = cpu_to_le16(1); 1936 lrd->log.updatemap.pxd = pxdlock->pxd; 1937 pxd = pxdlock->pxd; /* save to format maplock */ 1938 lrd->backchain = 1939 cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 1940 } 1941 1942 /* 1943 * free entries XAD[next:hwm]: 1944 */ 1945 if (hwm >= next) { 1946 /* init LOG_UPDATEMAP of the freed extents 1947 * XAD[next:hwm] from the deleted page itself 1948 * for logredo() to update bmap; 1949 */ 1950 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 1951 lrd->log.updatemap.type = 1952 cpu_to_le16(LOG_FREEXADLIST); 1953 xtlck = (struct xtlock *) & tlck->lock; 1954 hwm = xtlck->hwm.offset; 1955 lrd->log.updatemap.nxd = 1956 cpu_to_le16(hwm - next + 1); 1957 /* reformat linelock for lmLog() */ 1958 xtlck->header.offset = next; 1959 xtlck->header.length = hwm - next + 1; 1960 xtlck->index = 1; 1961 lrd->backchain = 1962 cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1963 } 1964 1965 /* 1966 * format maplock(s) for txUpdateMap() to update bmap 1967 */ 1968 maplock->index = 0; 1969 1970 /* 1971 * allocate entries XAD[lwm:next): 1972 */ 1973 if (lwm < next) { 1974 /* format a maplock for txUpdateMap() to update bPMAP 1975 * for alloc of new/extended extents of XAD[lwm:next) 1976 * from the page itself; 1977 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. 1978 */ 1979 tlck->flag |= tlckUPDATEMAP; 1980 xadlock->flag = mlckALLOCXADLIST; 1981 xadlock->count = next - lwm; 1982 xadlock->xdlist = &p->xad[lwm]; 1983 1984 jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d lwm:%d next:%d", 1985 tlck->ip, mp, xadlock->count, lwm, next); 1986 maplock->index++; 1987 xadlock++; 1988 } 1989 1990 /* 1991 * truncate entry XAD[twm == next - 1]: 1992 */ 1993 if (twm == next - 1) { 1994 /* format a maplock for txUpdateMap() to update bmap 1995 * to free truncated delta extent of the truncated 1996 * entry XAD[next - 1]; 1997 * (xtlck->pxdlock = truncated delta extent); 1998 */ 1999 tlck->flag |= tlckUPDATEMAP; 2000 pxdlock = (struct pxd_lock *) xadlock; 2001 pxdlock->flag = mlckFREEPXD; 2002 pxdlock->count = 1; 2003 pxdlock->pxd = pxd; 2004 2005 jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d hwm:%d", 2006 ip, mp, pxdlock->count, hwm); 2007 maplock->index++; 2008 xadlock++; 2009 } 2010 2011 /* 2012 * free entries XAD[next:hwm]: 2013 */ 2014 if (hwm >= next) { 2015 /* format a maplock for txUpdateMap() to update bmap 2016 * to free extents of XAD[next:hwm] from thedeleted 2017 * page itself; 2018 */ 2019 tlck->flag |= tlckUPDATEMAP; 2020 xadlock->flag = mlckFREEXADLIST; 2021 xadlock->count = hwm - next + 1; 2022 xadlock->xdlist = &p->xad[next]; 2023 2024 jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d next:%d hwm:%d", 2025 tlck->ip, mp, xadlock->count, next, hwm); 2026 maplock->index++; 2027 } 2028 2029 /* mark page as homeward bound */ 2030 tlck->flag |= tlckWRITEPAGE; 2031 } 2032 return; 2033 } 2034 2035 /* 2036 * mapLog() 2037 * 2038 * function: log from maplock of freed data extents; 2039 */ 2040 static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 2041 struct tlock * tlck) 2042 { 2043 struct pxd_lock *pxdlock; 2044 int i, nlock; 2045 pxd_t *pxd; 2046 2047 /* 2048 * page relocation: free the source page extent 2049 * 2050 * a maplock for txUpdateMap() for free of the page 2051 * has been formatted at txLock() time saving the src 2052 * relocated page address; 2053 */ 2054 if (tlck->type & tlckRELOCATE) { 2055 /* log LOG_NOREDOPAGE of the old relocated page 2056 * for logredo() to start NoRedoPage filter; 2057 */ 2058 lrd->type = cpu_to_le16(LOG_NOREDOPAGE); 2059 pxdlock = (struct pxd_lock *) & tlck->lock; 2060 pxd = &lrd->log.redopage.pxd; 2061 *pxd = pxdlock->pxd; 2062 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 2063 2064 /* (N.B. currently, logredo() does NOT update bmap 2065 * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE); 2066 * if page free from relocation, LOG_UPDATEMAP log is 2067 * specifically generated now for logredo() 2068 * to update bmap for free of src relocated page; 2069 * (new flag LOG_RELOCATE may be introduced which will 2070 * inform logredo() to start NORedoPage filter and also 2071 * update block allocation map at the same time, thus 2072 * avoiding an extra log write); 2073 */ 2074 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 2075 lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); 2076 lrd->log.updatemap.nxd = cpu_to_le16(1); 2077 lrd->log.updatemap.pxd = pxdlock->pxd; 2078 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 2079 2080 /* a maplock for txUpdateMap() for free of the page 2081 * has been formatted at txLock() time; 2082 */ 2083 tlck->flag |= tlckUPDATEMAP; 2084 return; 2085 } 2086 /* 2087 2088 * Otherwise it's not a relocate request 2089 * 2090 */ 2091 else { 2092 /* log LOG_UPDATEMAP for logredo() to update bmap for 2093 * free of truncated/relocated delta extent of the data; 2094 * e.g.: external EA extent, relocated/truncated extent 2095 * from xtTailgate(); 2096 */ 2097 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 2098 pxdlock = (struct pxd_lock *) & tlck->lock; 2099 nlock = pxdlock->index; 2100 for (i = 0; i < nlock; i++, pxdlock++) { 2101 if (pxdlock->flag & mlckALLOCPXD) 2102 lrd->log.updatemap.type = 2103 cpu_to_le16(LOG_ALLOCPXD); 2104 else 2105 lrd->log.updatemap.type = 2106 cpu_to_le16(LOG_FREEPXD); 2107 lrd->log.updatemap.nxd = cpu_to_le16(1); 2108 lrd->log.updatemap.pxd = pxdlock->pxd; 2109 lrd->backchain = 2110 cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 2111 jfs_info("mapLog: xaddr:0x%lx xlen:0x%x", 2112 (ulong) addressPXD(&pxdlock->pxd), 2113 lengthPXD(&pxdlock->pxd)); 2114 } 2115 2116 /* update bmap */ 2117 tlck->flag |= tlckUPDATEMAP; 2118 } 2119 } 2120 2121 /* 2122 * txEA() 2123 * 2124 * function: acquire maplock for EA/ACL extents or 2125 * set COMMIT_INLINE flag; 2126 */ 2127 void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) 2128 { 2129 struct tlock *tlck = NULL; 2130 struct pxd_lock *maplock = NULL, *pxdlock = NULL; 2131 2132 /* 2133 * format maplock for alloc of new EA extent 2134 */ 2135 if (newea) { 2136 /* Since the newea could be a completely zeroed entry we need to 2137 * check for the two flags which indicate we should actually 2138 * commit new EA data 2139 */ 2140 if (newea->flag & DXD_EXTENT) { 2141 tlck = txMaplock(tid, ip, tlckMAP); 2142 maplock = (struct pxd_lock *) & tlck->lock; 2143 pxdlock = (struct pxd_lock *) maplock; 2144 pxdlock->flag = mlckALLOCPXD; 2145 PXDaddress(&pxdlock->pxd, addressDXD(newea)); 2146 PXDlength(&pxdlock->pxd, lengthDXD(newea)); 2147 pxdlock++; 2148 maplock->index = 1; 2149 } else if (newea->flag & DXD_INLINE) { 2150 tlck = NULL; 2151 2152 set_cflag(COMMIT_Inlineea, ip); 2153 } 2154 } 2155 2156 /* 2157 * format maplock for free of old EA extent 2158 */ 2159 if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) { 2160 if (tlck == NULL) { 2161 tlck = txMaplock(tid, ip, tlckMAP); 2162 maplock = (struct pxd_lock *) & tlck->lock; 2163 pxdlock = (struct pxd_lock *) maplock; 2164 maplock->index = 0; 2165 } 2166 pxdlock->flag = mlckFREEPXD; 2167 PXDaddress(&pxdlock->pxd, addressDXD(oldea)); 2168 PXDlength(&pxdlock->pxd, lengthDXD(oldea)); 2169 maplock->index++; 2170 } 2171 } 2172 2173 /* 2174 * txForce() 2175 * 2176 * function: synchronously write pages locked by transaction 2177 * after txLog() but before txUpdateMap(); 2178 */ 2179 static void txForce(struct tblock * tblk) 2180 { 2181 struct tlock *tlck; 2182 lid_t lid, next; 2183 struct metapage *mp; 2184 2185 /* 2186 * reverse the order of transaction tlocks in 2187 * careful update order of address index pages 2188 * (right to left, bottom up) 2189 */ 2190 tlck = lid_to_tlock(tblk->next); 2191 lid = tlck->next; 2192 tlck->next = 0; 2193 while (lid) { 2194 tlck = lid_to_tlock(lid); 2195 next = tlck->next; 2196 tlck->next = tblk->next; 2197 tblk->next = lid; 2198 lid = next; 2199 } 2200 2201 /* 2202 * synchronously write the page, and 2203 * hold the page for txUpdateMap(); 2204 */ 2205 for (lid = tblk->next; lid; lid = next) { 2206 tlck = lid_to_tlock(lid); 2207 next = tlck->next; 2208 2209 if ((mp = tlck->mp) != NULL && 2210 (tlck->type & tlckBTROOT) == 0) { 2211 assert(mp->xflag & COMMIT_PAGE); 2212 2213 if (tlck->flag & tlckWRITEPAGE) { 2214 tlck->flag &= ~tlckWRITEPAGE; 2215 2216 /* do not release page to freelist */ 2217 force_metapage(mp); 2218 #if 0 2219 /* 2220 * The "right" thing to do here is to 2221 * synchronously write the metadata. 2222 * With the current implementation this 2223 * is hard since write_metapage requires 2224 * us to kunmap & remap the page. If we 2225 * have tlocks pointing into the metadata 2226 * pages, we don't want to do this. I think 2227 * we can get by with synchronously writing 2228 * the pages when they are released. 2229 */ 2230 assert(mp->nohomeok); 2231 set_bit(META_dirty, &mp->flag); 2232 set_bit(META_sync, &mp->flag); 2233 #endif 2234 } 2235 } 2236 } 2237 } 2238 2239 /* 2240 * txUpdateMap() 2241 * 2242 * function: update persistent allocation map (and working map 2243 * if appropriate); 2244 * 2245 * parameter: 2246 */ 2247 static void txUpdateMap(struct tblock * tblk) 2248 { 2249 struct inode *ip; 2250 struct inode *ipimap; 2251 lid_t lid; 2252 struct tlock *tlck; 2253 struct maplock *maplock; 2254 struct pxd_lock pxdlock; 2255 int maptype; 2256 int k, nlock; 2257 struct metapage *mp = NULL; 2258 2259 ipimap = JFS_SBI(tblk->sb)->ipimap; 2260 2261 maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP; 2262 2263 2264 /* 2265 * update block allocation map 2266 * 2267 * update allocation state in pmap (and wmap) and 2268 * update lsn of the pmap page; 2269 */ 2270 /* 2271 * scan each tlock/page of transaction for block allocation/free: 2272 * 2273 * for each tlock/page of transaction, update map. 2274 * ? are there tlock for pmap and pwmap at the same time ? 2275 */ 2276 for (lid = tblk->next; lid; lid = tlck->next) { 2277 tlck = lid_to_tlock(lid); 2278 2279 if ((tlck->flag & tlckUPDATEMAP) == 0) 2280 continue; 2281 2282 if (tlck->flag & tlckFREEPAGE) { 2283 /* 2284 * Another thread may attempt to reuse freed space 2285 * immediately, so we want to get rid of the metapage 2286 * before anyone else has a chance to get it. 2287 * Lock metapage, update maps, then invalidate 2288 * the metapage. 2289 */ 2290 mp = tlck->mp; 2291 ASSERT(mp->xflag & COMMIT_PAGE); 2292 grab_metapage(mp); 2293 } 2294 2295 /* 2296 * extent list: 2297 * . in-line PXD list: 2298 * . out-of-line XAD list: 2299 */ 2300 maplock = (struct maplock *) & tlck->lock; 2301 nlock = maplock->index; 2302 2303 for (k = 0; k < nlock; k++, maplock++) { 2304 /* 2305 * allocate blocks in persistent map: 2306 * 2307 * blocks have been allocated from wmap at alloc time; 2308 */ 2309 if (maplock->flag & mlckALLOC) { 2310 txAllocPMap(ipimap, maplock, tblk); 2311 } 2312 /* 2313 * free blocks in persistent and working map: 2314 * blocks will be freed in pmap and then in wmap; 2315 * 2316 * ? tblock specifies the PMAP/PWMAP based upon 2317 * transaction 2318 * 2319 * free blocks in persistent map: 2320 * blocks will be freed from wmap at last reference 2321 * release of the object for regular files; 2322 * 2323 * Alway free blocks from both persistent & working 2324 * maps for directories 2325 */ 2326 else { /* (maplock->flag & mlckFREE) */ 2327 2328 if (tlck->flag & tlckDIRECTORY) 2329 txFreeMap(ipimap, maplock, 2330 tblk, COMMIT_PWMAP); 2331 else 2332 txFreeMap(ipimap, maplock, 2333 tblk, maptype); 2334 } 2335 } 2336 if (tlck->flag & tlckFREEPAGE) { 2337 if (!(tblk->flag & tblkGC_LAZY)) { 2338 /* This is equivalent to txRelease */ 2339 ASSERT(mp->lid == lid); 2340 tlck->mp->lid = 0; 2341 } 2342 assert(mp->nohomeok == 1); 2343 metapage_homeok(mp); 2344 discard_metapage(mp); 2345 tlck->mp = NULL; 2346 } 2347 } 2348 /* 2349 * update inode allocation map 2350 * 2351 * update allocation state in pmap and 2352 * update lsn of the pmap page; 2353 * update in-memory inode flag/state 2354 * 2355 * unlock mapper/write lock 2356 */ 2357 if (tblk->xflag & COMMIT_CREATE) { 2358 diUpdatePMap(ipimap, tblk->ino, false, tblk); 2359 /* update persistent block allocation map 2360 * for the allocation of inode extent; 2361 */ 2362 pxdlock.flag = mlckALLOCPXD; 2363 pxdlock.pxd = tblk->u.ixpxd; 2364 pxdlock.index = 1; 2365 txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk); 2366 } else if (tblk->xflag & COMMIT_DELETE) { 2367 ip = tblk->u.ip; 2368 diUpdatePMap(ipimap, ip->i_ino, true, tblk); 2369 iput(ip); 2370 } 2371 } 2372 2373 /* 2374 * txAllocPMap() 2375 * 2376 * function: allocate from persistent map; 2377 * 2378 * parameter: 2379 * ipbmap - 2380 * malock - 2381 * xad list: 2382 * pxd: 2383 * 2384 * maptype - 2385 * allocate from persistent map; 2386 * free from persistent map; 2387 * (e.g., tmp file - free from working map at releae 2388 * of last reference); 2389 * free from persistent and working map; 2390 * 2391 * lsn - log sequence number; 2392 */ 2393 static void txAllocPMap(struct inode *ip, struct maplock * maplock, 2394 struct tblock * tblk) 2395 { 2396 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; 2397 struct xdlistlock *xadlistlock; 2398 xad_t *xad; 2399 s64 xaddr; 2400 int xlen; 2401 struct pxd_lock *pxdlock; 2402 struct xdlistlock *pxdlistlock; 2403 pxd_t *pxd; 2404 int n; 2405 2406 /* 2407 * allocate from persistent map; 2408 */ 2409 if (maplock->flag & mlckALLOCXADLIST) { 2410 xadlistlock = (struct xdlistlock *) maplock; 2411 xad = xadlistlock->xdlist; 2412 for (n = 0; n < xadlistlock->count; n++, xad++) { 2413 if (xad->flag & (XAD_NEW | XAD_EXTENDED)) { 2414 xaddr = addressXAD(xad); 2415 xlen = lengthXAD(xad); 2416 dbUpdatePMap(ipbmap, false, xaddr, 2417 (s64) xlen, tblk); 2418 xad->flag &= ~(XAD_NEW | XAD_EXTENDED); 2419 jfs_info("allocPMap: xaddr:0x%lx xlen:%d", 2420 (ulong) xaddr, xlen); 2421 } 2422 } 2423 } else if (maplock->flag & mlckALLOCPXD) { 2424 pxdlock = (struct pxd_lock *) maplock; 2425 xaddr = addressPXD(&pxdlock->pxd); 2426 xlen = lengthPXD(&pxdlock->pxd); 2427 dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk); 2428 jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); 2429 } else { /* (maplock->flag & mlckALLOCPXDLIST) */ 2430 2431 pxdlistlock = (struct xdlistlock *) maplock; 2432 pxd = pxdlistlock->xdlist; 2433 for (n = 0; n < pxdlistlock->count; n++, pxd++) { 2434 xaddr = addressPXD(pxd); 2435 xlen = lengthPXD(pxd); 2436 dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, 2437 tblk); 2438 jfs_info("allocPMap: xaddr:0x%lx xlen:%d", 2439 (ulong) xaddr, xlen); 2440 } 2441 } 2442 } 2443 2444 /* 2445 * txFreeMap() 2446 * 2447 * function: free from persistent and/or working map; 2448 * 2449 * todo: optimization 2450 */ 2451 void txFreeMap(struct inode *ip, 2452 struct maplock * maplock, struct tblock * tblk, int maptype) 2453 { 2454 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; 2455 struct xdlistlock *xadlistlock; 2456 xad_t *xad; 2457 s64 xaddr; 2458 int xlen; 2459 struct pxd_lock *pxdlock; 2460 struct xdlistlock *pxdlistlock; 2461 pxd_t *pxd; 2462 int n; 2463 2464 jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x", 2465 tblk, maplock, maptype); 2466 2467 /* 2468 * free from persistent map; 2469 */ 2470 if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) { 2471 if (maplock->flag & mlckFREEXADLIST) { 2472 xadlistlock = (struct xdlistlock *) maplock; 2473 xad = xadlistlock->xdlist; 2474 for (n = 0; n < xadlistlock->count; n++, xad++) { 2475 if (!(xad->flag & XAD_NEW)) { 2476 xaddr = addressXAD(xad); 2477 xlen = lengthXAD(xad); 2478 dbUpdatePMap(ipbmap, true, xaddr, 2479 (s64) xlen, tblk); 2480 jfs_info("freePMap: xaddr:0x%lx xlen:%d", 2481 (ulong) xaddr, xlen); 2482 } 2483 } 2484 } else if (maplock->flag & mlckFREEPXD) { 2485 pxdlock = (struct pxd_lock *) maplock; 2486 xaddr = addressPXD(&pxdlock->pxd); 2487 xlen = lengthPXD(&pxdlock->pxd); 2488 dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen, 2489 tblk); 2490 jfs_info("freePMap: xaddr:0x%lx xlen:%d", 2491 (ulong) xaddr, xlen); 2492 } else { /* (maplock->flag & mlckALLOCPXDLIST) */ 2493 2494 pxdlistlock = (struct xdlistlock *) maplock; 2495 pxd = pxdlistlock->xdlist; 2496 for (n = 0; n < pxdlistlock->count; n++, pxd++) { 2497 xaddr = addressPXD(pxd); 2498 xlen = lengthPXD(pxd); 2499 dbUpdatePMap(ipbmap, true, xaddr, 2500 (s64) xlen, tblk); 2501 jfs_info("freePMap: xaddr:0x%lx xlen:%d", 2502 (ulong) xaddr, xlen); 2503 } 2504 } 2505 } 2506 2507 /* 2508 * free from working map; 2509 */ 2510 if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) { 2511 if (maplock->flag & mlckFREEXADLIST) { 2512 xadlistlock = (struct xdlistlock *) maplock; 2513 xad = xadlistlock->xdlist; 2514 for (n = 0; n < xadlistlock->count; n++, xad++) { 2515 xaddr = addressXAD(xad); 2516 xlen = lengthXAD(xad); 2517 dbFree(ip, xaddr, (s64) xlen); 2518 xad->flag = 0; 2519 jfs_info("freeWMap: xaddr:0x%lx xlen:%d", 2520 (ulong) xaddr, xlen); 2521 } 2522 } else if (maplock->flag & mlckFREEPXD) { 2523 pxdlock = (struct pxd_lock *) maplock; 2524 xaddr = addressPXD(&pxdlock->pxd); 2525 xlen = lengthPXD(&pxdlock->pxd); 2526 dbFree(ip, xaddr, (s64) xlen); 2527 jfs_info("freeWMap: xaddr:0x%lx xlen:%d", 2528 (ulong) xaddr, xlen); 2529 } else { /* (maplock->flag & mlckFREEPXDLIST) */ 2530 2531 pxdlistlock = (struct xdlistlock *) maplock; 2532 pxd = pxdlistlock->xdlist; 2533 for (n = 0; n < pxdlistlock->count; n++, pxd++) { 2534 xaddr = addressPXD(pxd); 2535 xlen = lengthPXD(pxd); 2536 dbFree(ip, xaddr, (s64) xlen); 2537 jfs_info("freeWMap: xaddr:0x%lx xlen:%d", 2538 (ulong) xaddr, xlen); 2539 } 2540 } 2541 } 2542 } 2543 2544 /* 2545 * txFreelock() 2546 * 2547 * function: remove tlock from inode anonymous locklist 2548 */ 2549 void txFreelock(struct inode *ip) 2550 { 2551 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 2552 struct tlock *xtlck, *tlck; 2553 lid_t xlid = 0, lid; 2554 2555 if (!jfs_ip->atlhead) 2556 return; 2557 2558 TXN_LOCK(); 2559 xtlck = (struct tlock *) &jfs_ip->atlhead; 2560 2561 while ((lid = xtlck->next) != 0) { 2562 tlck = lid_to_tlock(lid); 2563 if (tlck->flag & tlckFREELOCK) { 2564 xtlck->next = tlck->next; 2565 txLockFree(lid); 2566 } else { 2567 xtlck = tlck; 2568 xlid = lid; 2569 } 2570 } 2571 2572 if (jfs_ip->atlhead) 2573 jfs_ip->atltail = xlid; 2574 else { 2575 jfs_ip->atltail = 0; 2576 /* 2577 * If inode was on anon_list, remove it 2578 */ 2579 list_del_init(&jfs_ip->anon_inode_list); 2580 } 2581 TXN_UNLOCK(); 2582 } 2583 2584 /* 2585 * txAbort() 2586 * 2587 * function: abort tx before commit; 2588 * 2589 * frees line-locks and segment locks for all 2590 * segments in comdata structure. 2591 * Optionally sets state of file-system to FM_DIRTY in super-block. 2592 * log age of page-frames in memory for which caller has 2593 * are reset to 0 (to avoid logwarap). 2594 */ 2595 void txAbort(tid_t tid, int dirty) 2596 { 2597 lid_t lid, next; 2598 struct metapage *mp; 2599 struct tblock *tblk = tid_to_tblock(tid); 2600 struct tlock *tlck; 2601 2602 /* 2603 * free tlocks of the transaction 2604 */ 2605 for (lid = tblk->next; lid; lid = next) { 2606 tlck = lid_to_tlock(lid); 2607 next = tlck->next; 2608 mp = tlck->mp; 2609 JFS_IP(tlck->ip)->xtlid = 0; 2610 2611 if (mp) { 2612 mp->lid = 0; 2613 2614 /* 2615 * reset lsn of page to avoid logwarap: 2616 * 2617 * (page may have been previously committed by another 2618 * transaction(s) but has not been paged, i.e., 2619 * it may be on logsync list even though it has not 2620 * been logged for the current tx.) 2621 */ 2622 if (mp->xflag & COMMIT_PAGE && mp->lsn) 2623 LogSyncRelease(mp); 2624 } 2625 /* insert tlock at head of freelist */ 2626 TXN_LOCK(); 2627 txLockFree(lid); 2628 TXN_UNLOCK(); 2629 } 2630 2631 /* caller will free the transaction block */ 2632 2633 tblk->next = tblk->last = 0; 2634 2635 /* 2636 * mark filesystem dirty 2637 */ 2638 if (dirty) 2639 jfs_error(tblk->sb, "\n"); 2640 2641 return; 2642 } 2643 2644 /* 2645 * txLazyCommit(void) 2646 * 2647 * All transactions except those changing ipimap (COMMIT_FORCE) are 2648 * processed by this routine. This insures that the inode and block 2649 * allocation maps are updated in order. For synchronous transactions, 2650 * let the user thread finish processing after txUpdateMap() is called. 2651 */ 2652 static void txLazyCommit(struct tblock * tblk) 2653 { 2654 struct jfs_log *log; 2655 2656 while (((tblk->flag & tblkGC_READY) == 0) && 2657 ((tblk->flag & tblkGC_UNLOCKED) == 0)) { 2658 /* We must have gotten ahead of the user thread 2659 */ 2660 jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk); 2661 yield(); 2662 } 2663 2664 jfs_info("txLazyCommit: processing tblk 0x%p", tblk); 2665 2666 txUpdateMap(tblk); 2667 2668 log = (struct jfs_log *) JFS_SBI(tblk->sb)->log; 2669 2670 spin_lock_irq(&log->gclock); // LOGGC_LOCK 2671 2672 tblk->flag |= tblkGC_COMMITTED; 2673 2674 if (tblk->flag & tblkGC_READY) 2675 log->gcrtc--; 2676 2677 wake_up_all(&tblk->gcwait); // LOGGC_WAKEUP 2678 2679 /* 2680 * Can't release log->gclock until we've tested tblk->flag 2681 */ 2682 if (tblk->flag & tblkGC_LAZY) { 2683 spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK 2684 txUnlock(tblk); 2685 tblk->flag &= ~tblkGC_LAZY; 2686 txEnd(tblk - TxBlock); /* Convert back to tid */ 2687 } else 2688 spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK 2689 2690 jfs_info("txLazyCommit: done: tblk = 0x%p", tblk); 2691 } 2692 2693 /* 2694 * jfs_lazycommit(void) 2695 * 2696 * To be run as a kernel daemon. If lbmIODone is called in an interrupt 2697 * context, or where blocking is not wanted, this routine will process 2698 * committed transactions from the unlock queue. 2699 */ 2700 int jfs_lazycommit(void *arg) 2701 { 2702 int WorkDone; 2703 struct tblock *tblk; 2704 unsigned long flags; 2705 struct jfs_sb_info *sbi; 2706 2707 set_freezable(); 2708 do { 2709 LAZY_LOCK(flags); 2710 jfs_commit_thread_waking = 0; /* OK to wake another thread */ 2711 while (!list_empty(&TxAnchor.unlock_queue)) { 2712 WorkDone = 0; 2713 list_for_each_entry(tblk, &TxAnchor.unlock_queue, 2714 cqueue) { 2715 2716 sbi = JFS_SBI(tblk->sb); 2717 /* 2718 * For each volume, the transactions must be 2719 * handled in order. If another commit thread 2720 * is handling a tblk for this superblock, 2721 * skip it 2722 */ 2723 if (sbi->commit_state & IN_LAZYCOMMIT) 2724 continue; 2725 2726 sbi->commit_state |= IN_LAZYCOMMIT; 2727 WorkDone = 1; 2728 2729 /* 2730 * Remove transaction from queue 2731 */ 2732 list_del(&tblk->cqueue); 2733 2734 LAZY_UNLOCK(flags); 2735 txLazyCommit(tblk); 2736 LAZY_LOCK(flags); 2737 2738 sbi->commit_state &= ~IN_LAZYCOMMIT; 2739 /* 2740 * Don't continue in the for loop. (We can't 2741 * anyway, it's unsafe!) We want to go back to 2742 * the beginning of the list. 2743 */ 2744 break; 2745 } 2746 2747 /* If there was nothing to do, don't continue */ 2748 if (!WorkDone) 2749 break; 2750 } 2751 /* In case a wakeup came while all threads were active */ 2752 jfs_commit_thread_waking = 0; 2753 2754 if (freezing(current)) { 2755 LAZY_UNLOCK(flags); 2756 try_to_freeze(); 2757 } else { 2758 DECLARE_WAITQUEUE(wq, current); 2759 2760 add_wait_queue(&jfs_commit_thread_wait, &wq); 2761 set_current_state(TASK_INTERRUPTIBLE); 2762 LAZY_UNLOCK(flags); 2763 schedule(); 2764 remove_wait_queue(&jfs_commit_thread_wait, &wq); 2765 } 2766 } while (!kthread_should_stop()); 2767 2768 if (!list_empty(&TxAnchor.unlock_queue)) 2769 jfs_err("jfs_lazycommit being killed w/pending transactions!"); 2770 else 2771 jfs_info("jfs_lazycommit being killed"); 2772 return 0; 2773 } 2774 2775 void txLazyUnlock(struct tblock * tblk) 2776 { 2777 unsigned long flags; 2778 2779 LAZY_LOCK(flags); 2780 2781 list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue); 2782 /* 2783 * Don't wake up a commit thread if there is already one servicing 2784 * this superblock, or if the last one we woke up hasn't started yet. 2785 */ 2786 if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) && 2787 !jfs_commit_thread_waking) { 2788 jfs_commit_thread_waking = 1; 2789 wake_up(&jfs_commit_thread_wait); 2790 } 2791 LAZY_UNLOCK(flags); 2792 } 2793 2794 static void LogSyncRelease(struct metapage * mp) 2795 { 2796 struct jfs_log *log = mp->log; 2797 2798 assert(mp->nohomeok); 2799 assert(log); 2800 metapage_homeok(mp); 2801 } 2802 2803 /* 2804 * txQuiesce 2805 * 2806 * Block all new transactions and push anonymous transactions to 2807 * completion 2808 * 2809 * This does almost the same thing as jfs_sync below. We don't 2810 * worry about deadlocking when jfs_tlocks_low is set, since we would 2811 * expect jfs_sync to get us out of that jam. 2812 */ 2813 void txQuiesce(struct super_block *sb) 2814 { 2815 struct inode *ip; 2816 struct jfs_inode_info *jfs_ip; 2817 struct jfs_log *log = JFS_SBI(sb)->log; 2818 tid_t tid; 2819 2820 set_bit(log_QUIESCE, &log->flag); 2821 2822 TXN_LOCK(); 2823 restart: 2824 while (!list_empty(&TxAnchor.anon_list)) { 2825 jfs_ip = list_entry(TxAnchor.anon_list.next, 2826 struct jfs_inode_info, 2827 anon_inode_list); 2828 ip = &jfs_ip->vfs_inode; 2829 2830 /* 2831 * inode will be removed from anonymous list 2832 * when it is committed 2833 */ 2834 TXN_UNLOCK(); 2835 tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE); 2836 mutex_lock(&jfs_ip->commit_mutex); 2837 txCommit(tid, 1, &ip, 0); 2838 txEnd(tid); 2839 mutex_unlock(&jfs_ip->commit_mutex); 2840 /* 2841 * Just to be safe. I don't know how 2842 * long we can run without blocking 2843 */ 2844 cond_resched(); 2845 TXN_LOCK(); 2846 } 2847 2848 /* 2849 * If jfs_sync is running in parallel, there could be some inodes 2850 * on anon_list2. Let's check. 2851 */ 2852 if (!list_empty(&TxAnchor.anon_list2)) { 2853 list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); 2854 goto restart; 2855 } 2856 TXN_UNLOCK(); 2857 2858 /* 2859 * We may need to kick off the group commit 2860 */ 2861 jfs_flush_journal(log, 0); 2862 } 2863 2864 /* 2865 * txResume() 2866 * 2867 * Allows transactions to start again following txQuiesce 2868 */ 2869 void txResume(struct super_block *sb) 2870 { 2871 struct jfs_log *log = JFS_SBI(sb)->log; 2872 2873 clear_bit(log_QUIESCE, &log->flag); 2874 TXN_WAKEUP(&log->syncwait); 2875 } 2876 2877 /* 2878 * jfs_sync(void) 2879 * 2880 * To be run as a kernel daemon. This is awakened when tlocks run low. 2881 * We write any inodes that have anonymous tlocks so they will become 2882 * available. 2883 */ 2884 int jfs_sync(void *arg) 2885 { 2886 struct inode *ip; 2887 struct jfs_inode_info *jfs_ip; 2888 tid_t tid; 2889 2890 set_freezable(); 2891 do { 2892 /* 2893 * write each inode on the anonymous inode list 2894 */ 2895 TXN_LOCK(); 2896 while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) { 2897 jfs_ip = list_entry(TxAnchor.anon_list.next, 2898 struct jfs_inode_info, 2899 anon_inode_list); 2900 ip = &jfs_ip->vfs_inode; 2901 2902 if (! igrab(ip)) { 2903 /* 2904 * Inode is being freed 2905 */ 2906 list_del_init(&jfs_ip->anon_inode_list); 2907 } else if (mutex_trylock(&jfs_ip->commit_mutex)) { 2908 /* 2909 * inode will be removed from anonymous list 2910 * when it is committed 2911 */ 2912 TXN_UNLOCK(); 2913 tid = txBegin(ip->i_sb, COMMIT_INODE); 2914 txCommit(tid, 1, &ip, 0); 2915 txEnd(tid); 2916 mutex_unlock(&jfs_ip->commit_mutex); 2917 2918 iput(ip); 2919 /* 2920 * Just to be safe. I don't know how 2921 * long we can run without blocking 2922 */ 2923 cond_resched(); 2924 TXN_LOCK(); 2925 } else { 2926 /* We can't get the commit mutex. It may 2927 * be held by a thread waiting for tlock's 2928 * so let's not block here. Save it to 2929 * put back on the anon_list. 2930 */ 2931 2932 /* Move from anon_list to anon_list2 */ 2933 list_move(&jfs_ip->anon_inode_list, 2934 &TxAnchor.anon_list2); 2935 2936 TXN_UNLOCK(); 2937 iput(ip); 2938 TXN_LOCK(); 2939 } 2940 } 2941 /* Add anon_list2 back to anon_list */ 2942 list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); 2943 2944 if (freezing(current)) { 2945 TXN_UNLOCK(); 2946 try_to_freeze(); 2947 } else { 2948 set_current_state(TASK_INTERRUPTIBLE); 2949 TXN_UNLOCK(); 2950 schedule(); 2951 } 2952 } while (!kthread_should_stop()); 2953 2954 jfs_info("jfs_sync being killed"); 2955 return 0; 2956 } 2957 2958 #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) 2959 int jfs_txanchor_proc_show(struct seq_file *m, void *v) 2960 { 2961 char *freewait; 2962 char *freelockwait; 2963 char *lowlockwait; 2964 2965 freewait = 2966 waitqueue_active(&TxAnchor.freewait) ? "active" : "empty"; 2967 freelockwait = 2968 waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty"; 2969 lowlockwait = 2970 waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty"; 2971 2972 seq_printf(m, 2973 "JFS TxAnchor\n" 2974 "============\n" 2975 "freetid = %d\n" 2976 "freewait = %s\n" 2977 "freelock = %d\n" 2978 "freelockwait = %s\n" 2979 "lowlockwait = %s\n" 2980 "tlocksInUse = %d\n" 2981 "jfs_tlocks_low = %d\n" 2982 "unlock_queue is %sempty\n", 2983 TxAnchor.freetid, 2984 freewait, 2985 TxAnchor.freelock, 2986 freelockwait, 2987 lowlockwait, 2988 TxAnchor.tlocksInUse, 2989 jfs_tlocks_low, 2990 list_empty(&TxAnchor.unlock_queue) ? "" : "not "); 2991 return 0; 2992 } 2993 #endif 2994 2995 #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) 2996 int jfs_txstats_proc_show(struct seq_file *m, void *v) 2997 { 2998 seq_printf(m, 2999 "JFS TxStats\n" 3000 "===========\n" 3001 "calls to txBegin = %d\n" 3002 "txBegin blocked by sync barrier = %d\n" 3003 "txBegin blocked by tlocks low = %d\n" 3004 "txBegin blocked by no free tid = %d\n" 3005 "calls to txBeginAnon = %d\n" 3006 "txBeginAnon blocked by sync barrier = %d\n" 3007 "txBeginAnon blocked by tlocks low = %d\n" 3008 "calls to txLockAlloc = %d\n" 3009 "tLockAlloc blocked by no free lock = %d\n", 3010 TxStat.txBegin, 3011 TxStat.txBegin_barrier, 3012 TxStat.txBegin_lockslow, 3013 TxStat.txBegin_freetid, 3014 TxStat.txBeginAnon, 3015 TxStat.txBeginAnon_barrier, 3016 TxStat.txBeginAnon_lockslow, 3017 TxStat.txLockAlloc, 3018 TxStat.txLockAlloc_freelock); 3019 return 0; 3020 } 3021 #endif 3022