1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) International Business Machines Corp., 2000-2005 4 * Portions Copyright (C) Christoph Hellwig, 2001-2002 5 */ 6 7 /* 8 * jfs_txnmgr.c: transaction manager 9 * 10 * notes: 11 * transaction starts with txBegin() and ends with txCommit() 12 * or txAbort(). 13 * 14 * tlock is acquired at the time of update; 15 * (obviate scan at commit time for xtree and dtree) 16 * tlock and mp points to each other; 17 * (no hashlist for mp -> tlock). 18 * 19 * special cases: 20 * tlock on in-memory inode: 21 * in-place tlock in the in-memory inode itself; 22 * converted to page lock by iWrite() at commit time. 23 * 24 * tlock during write()/mmap() under anonymous transaction (tid = 0): 25 * transferred (?) to transaction at commit time. 26 * 27 * use the page itself to update allocation maps 28 * (obviate intermediate replication of allocation/deallocation data) 29 * hold on to mp+lock thru update of maps 30 */ 31 32 #include <linux/fs.h> 33 #include <linux/vmalloc.h> 34 #include <linux/completion.h> 35 #include <linux/freezer.h> 36 #include <linux/module.h> 37 #include <linux/moduleparam.h> 38 #include <linux/kthread.h> 39 #include <linux/seq_file.h> 40 #include "jfs_incore.h" 41 #include "jfs_inode.h" 42 #include "jfs_filsys.h" 43 #include "jfs_metapage.h" 44 #include "jfs_dinode.h" 45 #include "jfs_imap.h" 46 #include "jfs_dmap.h" 47 #include "jfs_superblock.h" 48 #include "jfs_debug.h" 49 50 /* 51 * transaction management structures 52 */ 53 static struct { 54 int freetid; /* index of a free tid structure */ 55 int freelock; /* index first free lock word */ 56 wait_queue_head_t freewait; /* eventlist of free tblock */ 57 wait_queue_head_t freelockwait; /* eventlist of free tlock */ 58 wait_queue_head_t lowlockwait; /* eventlist of ample tlocks */ 59 int tlocksInUse; /* Number of tlocks in use */ 60 spinlock_t LazyLock; /* synchronize sync_queue & unlock_queue */ 61 /* struct tblock *sync_queue; * Transactions waiting for data sync */ 62 struct list_head unlock_queue; /* Txns waiting to be released */ 63 struct list_head anon_list; /* inodes having anonymous txns */ 64 struct list_head anon_list2; /* inodes having anonymous txns 65 that couldn't be sync'ed */ 66 } TxAnchor; 67 68 int jfs_tlocks_low; /* Indicates low number of available tlocks */ 69 70 #ifdef CONFIG_JFS_STATISTICS 71 static struct { 72 uint txBegin; 73 uint txBegin_barrier; 74 uint txBegin_lockslow; 75 uint txBegin_freetid; 76 uint txBeginAnon; 77 uint txBeginAnon_barrier; 78 uint txBeginAnon_lockslow; 79 uint txLockAlloc; 80 uint txLockAlloc_freelock; 81 } TxStat; 82 #endif 83 84 static int nTxBlock = -1; /* number of transaction blocks */ 85 module_param(nTxBlock, int, 0); 86 MODULE_PARM_DESC(nTxBlock, 87 "Number of transaction blocks (max:65536)"); 88 89 static int nTxLock = -1; /* number of transaction locks */ 90 module_param(nTxLock, int, 0); 91 MODULE_PARM_DESC(nTxLock, 92 "Number of transaction locks (max:65536)"); 93 94 struct tblock *TxBlock; /* transaction block table */ 95 static int TxLockLWM; /* Low water mark for number of txLocks used */ 96 static int TxLockHWM; /* High water mark for number of txLocks used */ 97 static int TxLockVHWM; /* Very High water mark */ 98 struct tlock *TxLock; /* transaction lock table */ 99 100 /* 101 * transaction management lock 102 */ 103 static DEFINE_SPINLOCK(jfsTxnLock); 104 105 #define TXN_LOCK() spin_lock(&jfsTxnLock) 106 #define TXN_UNLOCK() spin_unlock(&jfsTxnLock) 107 108 #define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock) 109 #define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) 110 #define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags) 111 112 static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait); 113 static int jfs_commit_thread_waking; 114 115 /* 116 * Retry logic exist outside these macros to protect from spurrious wakeups. 117 */ 118 static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) 119 { 120 DECLARE_WAITQUEUE(wait, current); 121 122 add_wait_queue(event, &wait); 123 set_current_state(TASK_UNINTERRUPTIBLE); 124 TXN_UNLOCK(); 125 io_schedule(); 126 remove_wait_queue(event, &wait); 127 } 128 129 #define TXN_SLEEP(event)\ 130 {\ 131 TXN_SLEEP_DROP_LOCK(event);\ 132 TXN_LOCK();\ 133 } 134 135 #define TXN_WAKEUP(event) wake_up_all(event) 136 137 /* 138 * statistics 139 */ 140 static struct { 141 tid_t maxtid; /* 4: biggest tid ever used */ 142 lid_t maxlid; /* 4: biggest lid ever used */ 143 int ntid; /* 4: # of transactions performed */ 144 int nlid; /* 4: # of tlocks acquired */ 145 int waitlock; /* 4: # of tlock wait */ 146 } stattx; 147 148 /* 149 * forward references 150 */ 151 static void diLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd, 152 struct tlock *tlck, struct commit *cd); 153 static void dataLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd, 154 struct tlock *tlck); 155 static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 156 struct tlock * tlck); 157 static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 158 struct tlock * tlck); 159 static void txAllocPMap(struct inode *ip, struct maplock * maplock, 160 struct tblock * tblk); 161 static void txForce(struct tblock * tblk); 162 static void txLog(struct jfs_log *log, struct tblock *tblk, 163 struct commit *cd); 164 static void txUpdateMap(struct tblock * tblk); 165 static void txRelease(struct tblock * tblk); 166 static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 167 struct tlock * tlck); 168 static void LogSyncRelease(struct metapage * mp); 169 170 /* 171 * transaction block/lock management 172 * --------------------------------- 173 */ 174 175 /* 176 * Get a transaction lock from the free list. If the number in use is 177 * greater than the high water mark, wake up the sync daemon. This should 178 * free some anonymous transaction locks. (TXN_LOCK must be held.) 179 */ 180 static lid_t txLockAlloc(void) 181 { 182 lid_t lid; 183 184 INCREMENT(TxStat.txLockAlloc); 185 if (!TxAnchor.freelock) { 186 INCREMENT(TxStat.txLockAlloc_freelock); 187 } 188 189 while (!(lid = TxAnchor.freelock)) 190 TXN_SLEEP(&TxAnchor.freelockwait); 191 TxAnchor.freelock = TxLock[lid].next; 192 HIGHWATERMARK(stattx.maxlid, lid); 193 if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) { 194 jfs_info("txLockAlloc tlocks low"); 195 jfs_tlocks_low = 1; 196 wake_up_process(jfsSyncThread); 197 } 198 199 return lid; 200 } 201 202 static void txLockFree(lid_t lid) 203 { 204 TxLock[lid].tid = 0; 205 TxLock[lid].next = TxAnchor.freelock; 206 TxAnchor.freelock = lid; 207 TxAnchor.tlocksInUse--; 208 if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) { 209 jfs_info("txLockFree jfs_tlocks_low no more"); 210 jfs_tlocks_low = 0; 211 TXN_WAKEUP(&TxAnchor.lowlockwait); 212 } 213 TXN_WAKEUP(&TxAnchor.freelockwait); 214 } 215 216 /* 217 * NAME: txInit() 218 * 219 * FUNCTION: initialize transaction management structures 220 * 221 * RETURN: 222 * 223 * serialization: single thread at jfs_init() 224 */ 225 int txInit(void) 226 { 227 int k, size; 228 struct sysinfo si; 229 230 /* Set defaults for nTxLock and nTxBlock if unset */ 231 232 if (nTxLock == -1) { 233 if (nTxBlock == -1) { 234 /* Base default on memory size */ 235 si_meminfo(&si); 236 if (si.totalram > (256 * 1024)) /* 1 GB */ 237 nTxLock = 64 * 1024; 238 else 239 nTxLock = si.totalram >> 2; 240 } else if (nTxBlock > (8 * 1024)) 241 nTxLock = 64 * 1024; 242 else 243 nTxLock = nTxBlock << 3; 244 } 245 if (nTxBlock == -1) 246 nTxBlock = nTxLock >> 3; 247 248 /* Verify tunable parameters */ 249 if (nTxBlock < 16) 250 nTxBlock = 16; /* No one should set it this low */ 251 if (nTxBlock > 65536) 252 nTxBlock = 65536; 253 if (nTxLock < 256) 254 nTxLock = 256; /* No one should set it this low */ 255 if (nTxLock > 65536) 256 nTxLock = 65536; 257 258 printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n", 259 nTxBlock, nTxLock); 260 /* 261 * initialize transaction block (tblock) table 262 * 263 * transaction id (tid) = tblock index 264 * tid = 0 is reserved. 265 */ 266 TxLockLWM = (nTxLock * 4) / 10; 267 TxLockHWM = (nTxLock * 7) / 10; 268 TxLockVHWM = (nTxLock * 8) / 10; 269 270 size = sizeof(struct tblock) * nTxBlock; 271 TxBlock = vmalloc(size); 272 if (TxBlock == NULL) 273 return -ENOMEM; 274 275 for (k = 0; k < nTxBlock; k++) { 276 init_waitqueue_head(&TxBlock[k].gcwait); 277 init_waitqueue_head(&TxBlock[k].waitor); 278 } 279 280 for (k = 1; k < nTxBlock - 1; k++) { 281 TxBlock[k].next = k + 1; 282 } 283 TxBlock[k].next = 0; 284 285 TxAnchor.freetid = 1; 286 init_waitqueue_head(&TxAnchor.freewait); 287 288 stattx.maxtid = 1; /* statistics */ 289 290 /* 291 * initialize transaction lock (tlock) table 292 * 293 * transaction lock id = tlock index 294 * tlock id = 0 is reserved. 295 */ 296 size = sizeof(struct tlock) * nTxLock; 297 TxLock = vmalloc(size); 298 if (TxLock == NULL) { 299 vfree(TxBlock); 300 return -ENOMEM; 301 } 302 303 /* initialize tlock table */ 304 for (k = 1; k < nTxLock - 1; k++) 305 TxLock[k].next = k + 1; 306 TxLock[k].next = 0; 307 init_waitqueue_head(&TxAnchor.freelockwait); 308 init_waitqueue_head(&TxAnchor.lowlockwait); 309 310 TxAnchor.freelock = 1; 311 TxAnchor.tlocksInUse = 0; 312 INIT_LIST_HEAD(&TxAnchor.anon_list); 313 INIT_LIST_HEAD(&TxAnchor.anon_list2); 314 315 LAZY_LOCK_INIT(); 316 INIT_LIST_HEAD(&TxAnchor.unlock_queue); 317 318 stattx.maxlid = 1; /* statistics */ 319 320 return 0; 321 } 322 323 /* 324 * NAME: txExit() 325 * 326 * FUNCTION: clean up when module is unloaded 327 */ 328 void txExit(void) 329 { 330 vfree(TxLock); 331 TxLock = NULL; 332 vfree(TxBlock); 333 TxBlock = NULL; 334 } 335 336 /* 337 * NAME: txBegin() 338 * 339 * FUNCTION: start a transaction. 340 * 341 * PARAMETER: sb - superblock 342 * flag - force for nested tx; 343 * 344 * RETURN: tid - transaction id 345 * 346 * note: flag force allows to start tx for nested tx 347 * to prevent deadlock on logsync barrier; 348 */ 349 tid_t txBegin(struct super_block *sb, int flag) 350 { 351 tid_t t; 352 struct tblock *tblk; 353 struct jfs_log *log; 354 355 jfs_info("txBegin: flag = 0x%x", flag); 356 log = JFS_SBI(sb)->log; 357 358 if (!log) { 359 jfs_error(sb, "read-only filesystem\n"); 360 return 0; 361 } 362 363 TXN_LOCK(); 364 365 INCREMENT(TxStat.txBegin); 366 367 retry: 368 if (!(flag & COMMIT_FORCE)) { 369 /* 370 * synchronize with logsync barrier 371 */ 372 if (test_bit(log_SYNCBARRIER, &log->flag) || 373 test_bit(log_QUIESCE, &log->flag)) { 374 INCREMENT(TxStat.txBegin_barrier); 375 TXN_SLEEP(&log->syncwait); 376 goto retry; 377 } 378 } 379 if (flag == 0) { 380 /* 381 * Don't begin transaction if we're getting starved for tlocks 382 * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately 383 * free tlocks) 384 */ 385 if (TxAnchor.tlocksInUse > TxLockVHWM) { 386 INCREMENT(TxStat.txBegin_lockslow); 387 TXN_SLEEP(&TxAnchor.lowlockwait); 388 goto retry; 389 } 390 } 391 392 /* 393 * allocate transaction id/block 394 */ 395 if ((t = TxAnchor.freetid) == 0) { 396 jfs_info("txBegin: waiting for free tid"); 397 INCREMENT(TxStat.txBegin_freetid); 398 TXN_SLEEP(&TxAnchor.freewait); 399 goto retry; 400 } 401 402 tblk = tid_to_tblock(t); 403 404 if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) { 405 /* Don't let a non-forced transaction take the last tblk */ 406 jfs_info("txBegin: waiting for free tid"); 407 INCREMENT(TxStat.txBegin_freetid); 408 TXN_SLEEP(&TxAnchor.freewait); 409 goto retry; 410 } 411 412 TxAnchor.freetid = tblk->next; 413 414 /* 415 * initialize transaction 416 */ 417 418 /* 419 * We can't zero the whole thing or we screw up another thread being 420 * awakened after sleeping on tblk->waitor 421 * 422 * memset(tblk, 0, sizeof(struct tblock)); 423 */ 424 tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0; 425 426 tblk->sb = sb; 427 ++log->logtid; 428 tblk->logtid = log->logtid; 429 430 ++log->active; 431 432 HIGHWATERMARK(stattx.maxtid, t); /* statistics */ 433 INCREMENT(stattx.ntid); /* statistics */ 434 435 TXN_UNLOCK(); 436 437 jfs_info("txBegin: returning tid = %d", t); 438 439 return t; 440 } 441 442 /* 443 * NAME: txBeginAnon() 444 * 445 * FUNCTION: start an anonymous transaction. 446 * Blocks if logsync or available tlocks are low to prevent 447 * anonymous tlocks from depleting supply. 448 * 449 * PARAMETER: sb - superblock 450 * 451 * RETURN: none 452 */ 453 void txBeginAnon(struct super_block *sb) 454 { 455 struct jfs_log *log; 456 457 log = JFS_SBI(sb)->log; 458 459 TXN_LOCK(); 460 INCREMENT(TxStat.txBeginAnon); 461 462 retry: 463 /* 464 * synchronize with logsync barrier 465 */ 466 if (test_bit(log_SYNCBARRIER, &log->flag) || 467 test_bit(log_QUIESCE, &log->flag)) { 468 INCREMENT(TxStat.txBeginAnon_barrier); 469 TXN_SLEEP(&log->syncwait); 470 goto retry; 471 } 472 473 /* 474 * Don't begin transaction if we're getting starved for tlocks 475 */ 476 if (TxAnchor.tlocksInUse > TxLockVHWM) { 477 INCREMENT(TxStat.txBeginAnon_lockslow); 478 TXN_SLEEP(&TxAnchor.lowlockwait); 479 goto retry; 480 } 481 TXN_UNLOCK(); 482 } 483 484 /* 485 * txEnd() 486 * 487 * function: free specified transaction block. 488 * 489 * logsync barrier processing: 490 * 491 * serialization: 492 */ 493 void txEnd(tid_t tid) 494 { 495 struct tblock *tblk = tid_to_tblock(tid); 496 struct jfs_log *log; 497 498 jfs_info("txEnd: tid = %d", tid); 499 TXN_LOCK(); 500 501 /* 502 * wakeup transactions waiting on the page locked 503 * by the current transaction 504 */ 505 TXN_WAKEUP(&tblk->waitor); 506 507 log = JFS_SBI(tblk->sb)->log; 508 509 /* 510 * Lazy commit thread can't free this guy until we mark it UNLOCKED, 511 * otherwise, we would be left with a transaction that may have been 512 * reused. 513 * 514 * Lazy commit thread will turn off tblkGC_LAZY before calling this 515 * routine. 516 */ 517 if (tblk->flag & tblkGC_LAZY) { 518 jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk); 519 TXN_UNLOCK(); 520 521 spin_lock_irq(&log->gclock); // LOGGC_LOCK 522 tblk->flag |= tblkGC_UNLOCKED; 523 spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK 524 return; 525 } 526 527 jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk); 528 529 assert(tblk->next == 0); 530 531 /* 532 * insert tblock back on freelist 533 */ 534 tblk->next = TxAnchor.freetid; 535 TxAnchor.freetid = tid; 536 537 /* 538 * mark the tblock not active 539 */ 540 if (--log->active == 0) { 541 clear_bit(log_FLUSH, &log->flag); 542 543 /* 544 * synchronize with logsync barrier 545 */ 546 if (test_bit(log_SYNCBARRIER, &log->flag)) { 547 TXN_UNLOCK(); 548 549 /* write dirty metadata & forward log syncpt */ 550 jfs_syncpt(log, 1); 551 552 jfs_info("log barrier off: 0x%x", log->lsn); 553 554 /* enable new transactions start */ 555 clear_bit(log_SYNCBARRIER, &log->flag); 556 557 /* wakeup all waitors for logsync barrier */ 558 TXN_WAKEUP(&log->syncwait); 559 560 goto wakeup; 561 } 562 } 563 564 TXN_UNLOCK(); 565 wakeup: 566 /* 567 * wakeup all waitors for a free tblock 568 */ 569 TXN_WAKEUP(&TxAnchor.freewait); 570 } 571 572 /* 573 * txLock() 574 * 575 * function: acquire a transaction lock on the specified <mp> 576 * 577 * parameter: 578 * 579 * return: transaction lock id 580 * 581 * serialization: 582 */ 583 struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, 584 int type) 585 { 586 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 587 int dir_xtree = 0; 588 lid_t lid; 589 tid_t xtid; 590 struct tlock *tlck; 591 struct xtlock *xtlck; 592 struct linelock *linelock; 593 xtpage_t *p; 594 struct tblock *tblk; 595 596 TXN_LOCK(); 597 598 if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) && 599 !(mp->xflag & COMMIT_PAGE)) { 600 /* 601 * Directory inode is special. It can have both an xtree tlock 602 * and a dtree tlock associated with it. 603 */ 604 dir_xtree = 1; 605 lid = jfs_ip->xtlid; 606 } else 607 lid = mp->lid; 608 609 /* is page not locked by a transaction ? */ 610 if (lid == 0) 611 goto allocateLock; 612 613 jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid); 614 615 /* is page locked by the requester transaction ? */ 616 tlck = lid_to_tlock(lid); 617 if ((xtid = tlck->tid) == tid) { 618 TXN_UNLOCK(); 619 goto grantLock; 620 } 621 622 /* 623 * is page locked by anonymous transaction/lock ? 624 * 625 * (page update without transaction (i.e., file write) is 626 * locked under anonymous transaction tid = 0: 627 * anonymous tlocks maintained on anonymous tlock list of 628 * the inode of the page and available to all anonymous 629 * transactions until txCommit() time at which point 630 * they are transferred to the transaction tlock list of 631 * the committing transaction of the inode) 632 */ 633 if (xtid == 0) { 634 tlck->tid = tid; 635 TXN_UNLOCK(); 636 tblk = tid_to_tblock(tid); 637 /* 638 * The order of the tlocks in the transaction is important 639 * (during truncate, child xtree pages must be freed before 640 * parent's tlocks change the working map). 641 * Take tlock off anonymous list and add to tail of 642 * transaction list 643 * 644 * Note: We really need to get rid of the tid & lid and 645 * use list_head's. This code is getting UGLY! 646 */ 647 if (jfs_ip->atlhead == lid) { 648 if (jfs_ip->atltail == lid) { 649 /* only anonymous txn. 650 * Remove from anon_list 651 */ 652 TXN_LOCK(); 653 list_del_init(&jfs_ip->anon_inode_list); 654 TXN_UNLOCK(); 655 } 656 jfs_ip->atlhead = tlck->next; 657 } else { 658 lid_t last; 659 for (last = jfs_ip->atlhead; 660 lid_to_tlock(last)->next != lid; 661 last = lid_to_tlock(last)->next) { 662 assert(last); 663 } 664 lid_to_tlock(last)->next = tlck->next; 665 if (jfs_ip->atltail == lid) 666 jfs_ip->atltail = last; 667 } 668 669 /* insert the tlock at tail of transaction tlock list */ 670 671 if (tblk->next) 672 lid_to_tlock(tblk->last)->next = lid; 673 else 674 tblk->next = lid; 675 tlck->next = 0; 676 tblk->last = lid; 677 678 goto grantLock; 679 } 680 681 goto waitLock; 682 683 /* 684 * allocate a tlock 685 */ 686 allocateLock: 687 lid = txLockAlloc(); 688 tlck = lid_to_tlock(lid); 689 690 /* 691 * initialize tlock 692 */ 693 tlck->tid = tid; 694 695 TXN_UNLOCK(); 696 697 /* mark tlock for meta-data page */ 698 if (mp->xflag & COMMIT_PAGE) { 699 700 tlck->flag = tlckPAGELOCK; 701 702 /* mark the page dirty and nohomeok */ 703 metapage_nohomeok(mp); 704 705 jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p", 706 mp, mp->nohomeok, tid, tlck); 707 708 /* if anonymous transaction, and buffer is on the group 709 * commit synclist, mark inode to show this. This will 710 * prevent the buffer from being marked nohomeok for too 711 * long a time. 712 */ 713 if ((tid == 0) && mp->lsn) 714 set_cflag(COMMIT_Synclist, ip); 715 } 716 /* mark tlock for in-memory inode */ 717 else 718 tlck->flag = tlckINODELOCK; 719 720 if (S_ISDIR(ip->i_mode)) 721 tlck->flag |= tlckDIRECTORY; 722 723 tlck->type = 0; 724 725 /* bind the tlock and the page */ 726 tlck->ip = ip; 727 tlck->mp = mp; 728 if (dir_xtree) 729 jfs_ip->xtlid = lid; 730 else 731 mp->lid = lid; 732 733 /* 734 * enqueue transaction lock to transaction/inode 735 */ 736 /* insert the tlock at tail of transaction tlock list */ 737 if (tid) { 738 tblk = tid_to_tblock(tid); 739 if (tblk->next) 740 lid_to_tlock(tblk->last)->next = lid; 741 else 742 tblk->next = lid; 743 tlck->next = 0; 744 tblk->last = lid; 745 } 746 /* anonymous transaction: 747 * insert the tlock at head of inode anonymous tlock list 748 */ 749 else { 750 tlck->next = jfs_ip->atlhead; 751 jfs_ip->atlhead = lid; 752 if (tlck->next == 0) { 753 /* This inode's first anonymous transaction */ 754 jfs_ip->atltail = lid; 755 TXN_LOCK(); 756 list_add_tail(&jfs_ip->anon_inode_list, 757 &TxAnchor.anon_list); 758 TXN_UNLOCK(); 759 } 760 } 761 762 /* initialize type dependent area for linelock */ 763 linelock = (struct linelock *) & tlck->lock; 764 linelock->next = 0; 765 linelock->flag = tlckLINELOCK; 766 linelock->maxcnt = TLOCKSHORT; 767 linelock->index = 0; 768 769 switch (type & tlckTYPE) { 770 case tlckDTREE: 771 linelock->l2linesize = L2DTSLOTSIZE; 772 break; 773 774 case tlckXTREE: 775 linelock->l2linesize = L2XTSLOTSIZE; 776 777 xtlck = (struct xtlock *) linelock; 778 xtlck->header.offset = 0; 779 xtlck->header.length = 2; 780 781 if (type & tlckNEW) { 782 xtlck->lwm.offset = XTENTRYSTART; 783 } else { 784 if (mp->xflag & COMMIT_PAGE) 785 p = (xtpage_t *) mp->data; 786 else 787 p = (xtpage_t *) &jfs_ip->i_xtroot; 788 xtlck->lwm.offset = 789 le16_to_cpu(p->header.nextindex); 790 } 791 xtlck->lwm.length = 0; /* ! */ 792 xtlck->twm.offset = 0; 793 xtlck->hwm.offset = 0; 794 795 xtlck->index = 2; 796 break; 797 798 case tlckINODE: 799 linelock->l2linesize = L2INODESLOTSIZE; 800 break; 801 802 case tlckDATA: 803 linelock->l2linesize = L2DATASLOTSIZE; 804 break; 805 806 default: 807 jfs_err("UFO tlock:0x%p", tlck); 808 } 809 810 /* 811 * update tlock vector 812 */ 813 grantLock: 814 tlck->type |= type; 815 816 return tlck; 817 818 /* 819 * page is being locked by another transaction: 820 */ 821 waitLock: 822 /* Only locks on ipimap or ipaimap should reach here */ 823 /* assert(jfs_ip->fileset == AGGREGATE_I); */ 824 if (jfs_ip->fileset != AGGREGATE_I) { 825 printk(KERN_ERR "txLock: trying to lock locked page!"); 826 print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4, 827 ip, sizeof(*ip), 0); 828 print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4, 829 mp, sizeof(*mp), 0); 830 print_hex_dump(KERN_ERR, "Locker's tblock: ", 831 DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid), 832 sizeof(struct tblock), 0); 833 print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4, 834 tlck, sizeof(*tlck), 0); 835 BUG(); 836 } 837 INCREMENT(stattx.waitlock); /* statistics */ 838 TXN_UNLOCK(); 839 release_metapage(mp); 840 TXN_LOCK(); 841 xtid = tlck->tid; /* reacquire after dropping TXN_LOCK */ 842 843 jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d", 844 tid, xtid, lid); 845 846 /* Recheck everything since dropping TXN_LOCK */ 847 if (xtid && (tlck->mp == mp) && (mp->lid == lid)) 848 TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); 849 else 850 TXN_UNLOCK(); 851 jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid); 852 853 return NULL; 854 } 855 856 /* 857 * NAME: txRelease() 858 * 859 * FUNCTION: Release buffers associated with transaction locks, but don't 860 * mark homeok yet. The allows other transactions to modify 861 * buffers, but won't let them go to disk until commit record 862 * actually gets written. 863 * 864 * PARAMETER: 865 * tblk - 866 * 867 * RETURN: Errors from subroutines. 868 */ 869 static void txRelease(struct tblock * tblk) 870 { 871 struct metapage *mp; 872 lid_t lid; 873 struct tlock *tlck; 874 875 TXN_LOCK(); 876 877 for (lid = tblk->next; lid; lid = tlck->next) { 878 tlck = lid_to_tlock(lid); 879 if ((mp = tlck->mp) != NULL && 880 (tlck->type & tlckBTROOT) == 0) { 881 assert(mp->xflag & COMMIT_PAGE); 882 mp->lid = 0; 883 } 884 } 885 886 /* 887 * wakeup transactions waiting on a page locked 888 * by the current transaction 889 */ 890 TXN_WAKEUP(&tblk->waitor); 891 892 TXN_UNLOCK(); 893 } 894 895 /* 896 * NAME: txUnlock() 897 * 898 * FUNCTION: Initiates pageout of pages modified by tid in journalled 899 * objects and frees their lockwords. 900 */ 901 static void txUnlock(struct tblock * tblk) 902 { 903 struct tlock *tlck; 904 struct linelock *linelock; 905 lid_t lid, next, llid, k; 906 struct metapage *mp; 907 struct jfs_log *log; 908 int difft, diffp; 909 unsigned long flags; 910 911 jfs_info("txUnlock: tblk = 0x%p", tblk); 912 log = JFS_SBI(tblk->sb)->log; 913 914 /* 915 * mark page under tlock homeok (its log has been written): 916 */ 917 for (lid = tblk->next; lid; lid = next) { 918 tlck = lid_to_tlock(lid); 919 next = tlck->next; 920 921 jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck); 922 923 /* unbind page from tlock */ 924 if ((mp = tlck->mp) != NULL && 925 (tlck->type & tlckBTROOT) == 0) { 926 assert(mp->xflag & COMMIT_PAGE); 927 928 /* hold buffer 929 */ 930 hold_metapage(mp); 931 932 assert(mp->nohomeok > 0); 933 _metapage_homeok(mp); 934 935 /* inherit younger/larger clsn */ 936 LOGSYNC_LOCK(log, flags); 937 if (mp->clsn) { 938 logdiff(difft, tblk->clsn, log); 939 logdiff(diffp, mp->clsn, log); 940 if (difft > diffp) 941 mp->clsn = tblk->clsn; 942 } else 943 mp->clsn = tblk->clsn; 944 LOGSYNC_UNLOCK(log, flags); 945 946 assert(!(tlck->flag & tlckFREEPAGE)); 947 948 put_metapage(mp); 949 } 950 951 /* insert tlock, and linelock(s) of the tlock if any, 952 * at head of freelist 953 */ 954 TXN_LOCK(); 955 956 llid = ((struct linelock *) & tlck->lock)->next; 957 while (llid) { 958 linelock = (struct linelock *) lid_to_tlock(llid); 959 k = linelock->next; 960 txLockFree(llid); 961 llid = k; 962 } 963 txLockFree(lid); 964 965 TXN_UNLOCK(); 966 } 967 tblk->next = tblk->last = 0; 968 969 /* 970 * remove tblock from logsynclist 971 * (allocation map pages inherited lsn of tblk and 972 * has been inserted in logsync list at txUpdateMap()) 973 */ 974 if (tblk->lsn) { 975 LOGSYNC_LOCK(log, flags); 976 log->count--; 977 list_del(&tblk->synclist); 978 LOGSYNC_UNLOCK(log, flags); 979 } 980 } 981 982 /* 983 * txMaplock() 984 * 985 * function: allocate a transaction lock for freed page/entry; 986 * for freed page, maplock is used as xtlock/dtlock type; 987 */ 988 struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) 989 { 990 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 991 lid_t lid; 992 struct tblock *tblk; 993 struct tlock *tlck; 994 struct maplock *maplock; 995 996 TXN_LOCK(); 997 998 /* 999 * allocate a tlock 1000 */ 1001 lid = txLockAlloc(); 1002 tlck = lid_to_tlock(lid); 1003 1004 /* 1005 * initialize tlock 1006 */ 1007 tlck->tid = tid; 1008 1009 /* bind the tlock and the object */ 1010 tlck->flag = tlckINODELOCK; 1011 if (S_ISDIR(ip->i_mode)) 1012 tlck->flag |= tlckDIRECTORY; 1013 tlck->ip = ip; 1014 tlck->mp = NULL; 1015 1016 tlck->type = type; 1017 1018 /* 1019 * enqueue transaction lock to transaction/inode 1020 */ 1021 /* insert the tlock at tail of transaction tlock list */ 1022 if (tid) { 1023 tblk = tid_to_tblock(tid); 1024 if (tblk->next) 1025 lid_to_tlock(tblk->last)->next = lid; 1026 else 1027 tblk->next = lid; 1028 tlck->next = 0; 1029 tblk->last = lid; 1030 } 1031 /* anonymous transaction: 1032 * insert the tlock at head of inode anonymous tlock list 1033 */ 1034 else { 1035 tlck->next = jfs_ip->atlhead; 1036 jfs_ip->atlhead = lid; 1037 if (tlck->next == 0) { 1038 /* This inode's first anonymous transaction */ 1039 jfs_ip->atltail = lid; 1040 list_add_tail(&jfs_ip->anon_inode_list, 1041 &TxAnchor.anon_list); 1042 } 1043 } 1044 1045 TXN_UNLOCK(); 1046 1047 /* initialize type dependent area for maplock */ 1048 maplock = (struct maplock *) & tlck->lock; 1049 maplock->next = 0; 1050 maplock->maxcnt = 0; 1051 maplock->index = 0; 1052 1053 return tlck; 1054 } 1055 1056 /* 1057 * txLinelock() 1058 * 1059 * function: allocate a transaction lock for log vector list 1060 */ 1061 struct linelock *txLinelock(struct linelock * tlock) 1062 { 1063 lid_t lid; 1064 struct tlock *tlck; 1065 struct linelock *linelock; 1066 1067 TXN_LOCK(); 1068 1069 /* allocate a TxLock structure */ 1070 lid = txLockAlloc(); 1071 tlck = lid_to_tlock(lid); 1072 1073 TXN_UNLOCK(); 1074 1075 /* initialize linelock */ 1076 linelock = (struct linelock *) tlck; 1077 linelock->next = 0; 1078 linelock->flag = tlckLINELOCK; 1079 linelock->maxcnt = TLOCKLONG; 1080 linelock->index = 0; 1081 if (tlck->flag & tlckDIRECTORY) 1082 linelock->flag |= tlckDIRECTORY; 1083 1084 /* append linelock after tlock */ 1085 linelock->next = tlock->next; 1086 tlock->next = lid; 1087 1088 return linelock; 1089 } 1090 1091 /* 1092 * transaction commit management 1093 * ----------------------------- 1094 */ 1095 1096 /* 1097 * NAME: txCommit() 1098 * 1099 * FUNCTION: commit the changes to the objects specified in 1100 * clist. For journalled segments only the 1101 * changes of the caller are committed, ie by tid. 1102 * for non-journalled segments the data are flushed to 1103 * disk and then the change to the disk inode and indirect 1104 * blocks committed (so blocks newly allocated to the 1105 * segment will be made a part of the segment atomically). 1106 * 1107 * all of the segments specified in clist must be in 1108 * one file system. no more than 6 segments are needed 1109 * to handle all unix svcs. 1110 * 1111 * if the i_nlink field (i.e. disk inode link count) 1112 * is zero, and the type of inode is a regular file or 1113 * directory, or symbolic link , the inode is truncated 1114 * to zero length. the truncation is committed but the 1115 * VM resources are unaffected until it is closed (see 1116 * iput and iclose). 1117 * 1118 * PARAMETER: 1119 * 1120 * RETURN: 1121 * 1122 * serialization: 1123 * on entry the inode lock on each segment is assumed 1124 * to be held. 1125 * 1126 * i/o error: 1127 */ 1128 int txCommit(tid_t tid, /* transaction identifier */ 1129 int nip, /* number of inodes to commit */ 1130 struct inode **iplist, /* list of inode to commit */ 1131 int flag) 1132 { 1133 int rc = 0; 1134 struct commit cd; 1135 struct jfs_log *log; 1136 struct tblock *tblk; 1137 struct lrd *lrd; 1138 struct inode *ip; 1139 struct jfs_inode_info *jfs_ip; 1140 int k, n; 1141 ino_t top; 1142 struct super_block *sb; 1143 1144 jfs_info("txCommit, tid = %d, flag = %d", tid, flag); 1145 /* is read-only file system ? */ 1146 if (isReadOnly(iplist[0])) { 1147 rc = -EROFS; 1148 goto TheEnd; 1149 } 1150 1151 sb = cd.sb = iplist[0]->i_sb; 1152 cd.tid = tid; 1153 1154 if (tid == 0) 1155 tid = txBegin(sb, 0); 1156 tblk = tid_to_tblock(tid); 1157 1158 /* 1159 * initialize commit structure 1160 */ 1161 log = JFS_SBI(sb)->log; 1162 cd.log = log; 1163 1164 /* initialize log record descriptor in commit */ 1165 lrd = &cd.lrd; 1166 lrd->logtid = cpu_to_le32(tblk->logtid); 1167 lrd->backchain = 0; 1168 1169 tblk->xflag |= flag; 1170 1171 if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0) 1172 tblk->xflag |= COMMIT_LAZY; 1173 /* 1174 * prepare non-journaled objects for commit 1175 * 1176 * flush data pages of non-journaled file 1177 * to prevent the file getting non-initialized disk blocks 1178 * in case of crash. 1179 * (new blocks - ) 1180 */ 1181 cd.iplist = iplist; 1182 cd.nip = nip; 1183 1184 /* 1185 * acquire transaction lock on (on-disk) inodes 1186 * 1187 * update on-disk inode from in-memory inode 1188 * acquiring transaction locks for AFTER records 1189 * on the on-disk inode of file object 1190 * 1191 * sort the inodes array by inode number in descending order 1192 * to prevent deadlock when acquiring transaction lock 1193 * of on-disk inodes on multiple on-disk inode pages by 1194 * multiple concurrent transactions 1195 */ 1196 for (k = 0; k < cd.nip; k++) { 1197 top = (cd.iplist[k])->i_ino; 1198 for (n = k + 1; n < cd.nip; n++) { 1199 ip = cd.iplist[n]; 1200 if (ip->i_ino > top) { 1201 top = ip->i_ino; 1202 cd.iplist[n] = cd.iplist[k]; 1203 cd.iplist[k] = ip; 1204 } 1205 } 1206 1207 ip = cd.iplist[k]; 1208 jfs_ip = JFS_IP(ip); 1209 1210 /* 1211 * BUGBUG - This code has temporarily been removed. The 1212 * intent is to ensure that any file data is written before 1213 * the metadata is committed to the journal. This prevents 1214 * uninitialized data from appearing in a file after the 1215 * journal has been replayed. (The uninitialized data 1216 * could be sensitive data removed by another user.) 1217 * 1218 * The problem now is that we are holding the IWRITELOCK 1219 * on the inode, and calling filemap_fdatawrite on an 1220 * unmapped page will cause a deadlock in jfs_get_block. 1221 * 1222 * The long term solution is to pare down the use of 1223 * IWRITELOCK. We are currently holding it too long. 1224 * We could also be smarter about which data pages need 1225 * to be written before the transaction is committed and 1226 * when we don't need to worry about it at all. 1227 * 1228 * if ((!S_ISDIR(ip->i_mode)) 1229 * && (tblk->flag & COMMIT_DELETE) == 0) 1230 * filemap_write_and_wait(ip->i_mapping); 1231 */ 1232 1233 /* 1234 * Mark inode as not dirty. It will still be on the dirty 1235 * inode list, but we'll know not to commit it again unless 1236 * it gets marked dirty again 1237 */ 1238 clear_cflag(COMMIT_Dirty, ip); 1239 1240 /* inherit anonymous tlock(s) of inode */ 1241 if (jfs_ip->atlhead) { 1242 lid_to_tlock(jfs_ip->atltail)->next = tblk->next; 1243 tblk->next = jfs_ip->atlhead; 1244 if (!tblk->last) 1245 tblk->last = jfs_ip->atltail; 1246 jfs_ip->atlhead = jfs_ip->atltail = 0; 1247 TXN_LOCK(); 1248 list_del_init(&jfs_ip->anon_inode_list); 1249 TXN_UNLOCK(); 1250 } 1251 1252 /* 1253 * acquire transaction lock on on-disk inode page 1254 * (become first tlock of the tblk's tlock list) 1255 */ 1256 if (((rc = diWrite(tid, ip)))) 1257 goto out; 1258 } 1259 1260 /* 1261 * write log records from transaction locks 1262 * 1263 * txUpdateMap() resets XAD_NEW in XAD. 1264 */ 1265 txLog(log, tblk, &cd); 1266 1267 /* 1268 * Ensure that inode isn't reused before 1269 * lazy commit thread finishes processing 1270 */ 1271 if (tblk->xflag & COMMIT_DELETE) { 1272 ihold(tblk->u.ip); 1273 /* 1274 * Avoid a rare deadlock 1275 * 1276 * If the inode is locked, we may be blocked in 1277 * jfs_commit_inode. If so, we don't want the 1278 * lazy_commit thread doing the last iput() on the inode 1279 * since that may block on the locked inode. Instead, 1280 * commit the transaction synchronously, so the last iput 1281 * will be done by the calling thread (or later) 1282 */ 1283 /* 1284 * I believe this code is no longer needed. Splitting I_LOCK 1285 * into two bits, I_NEW and I_SYNC should prevent this 1286 * deadlock as well. But since I don't have a JFS testload 1287 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done. 1288 * Joern 1289 */ 1290 if (tblk->u.ip->i_state & I_SYNC) 1291 tblk->xflag &= ~COMMIT_LAZY; 1292 } 1293 1294 ASSERT((!(tblk->xflag & COMMIT_DELETE)) || 1295 ((tblk->u.ip->i_nlink == 0) && 1296 !test_cflag(COMMIT_Nolink, tblk->u.ip))); 1297 1298 /* 1299 * write COMMIT log record 1300 */ 1301 lrd->type = cpu_to_le16(LOG_COMMIT); 1302 lrd->length = 0; 1303 lmLog(log, tblk, lrd, NULL); 1304 1305 lmGroupCommit(log, tblk); 1306 1307 /* 1308 * - transaction is now committed - 1309 */ 1310 1311 /* 1312 * force pages in careful update 1313 * (imap addressing structure update) 1314 */ 1315 if (flag & COMMIT_FORCE) 1316 txForce(tblk); 1317 1318 /* 1319 * update allocation map. 1320 * 1321 * update inode allocation map and inode: 1322 * free pager lock on memory object of inode if any. 1323 * update block allocation map. 1324 * 1325 * txUpdateMap() resets XAD_NEW in XAD. 1326 */ 1327 if (tblk->xflag & COMMIT_FORCE) 1328 txUpdateMap(tblk); 1329 1330 /* 1331 * free transaction locks and pageout/free pages 1332 */ 1333 txRelease(tblk); 1334 1335 if ((tblk->flag & tblkGC_LAZY) == 0) 1336 txUnlock(tblk); 1337 1338 1339 /* 1340 * reset in-memory object state 1341 */ 1342 for (k = 0; k < cd.nip; k++) { 1343 ip = cd.iplist[k]; 1344 jfs_ip = JFS_IP(ip); 1345 1346 /* 1347 * reset in-memory inode state 1348 */ 1349 jfs_ip->bxflag = 0; 1350 jfs_ip->blid = 0; 1351 } 1352 1353 out: 1354 if (rc != 0) 1355 txAbort(tid, 1); 1356 1357 TheEnd: 1358 jfs_info("txCommit: tid = %d, returning %d", tid, rc); 1359 return rc; 1360 } 1361 1362 /* 1363 * NAME: txLog() 1364 * 1365 * FUNCTION: Writes AFTER log records for all lines modified 1366 * by tid for segments specified by inodes in comdata. 1367 * Code assumes only WRITELOCKS are recorded in lockwords. 1368 * 1369 * PARAMETERS: 1370 * 1371 * RETURN : 1372 */ 1373 static void txLog(struct jfs_log *log, struct tblock *tblk, struct commit *cd) 1374 { 1375 struct inode *ip; 1376 lid_t lid; 1377 struct tlock *tlck; 1378 struct lrd *lrd = &cd->lrd; 1379 1380 /* 1381 * write log record(s) for each tlock of transaction, 1382 */ 1383 for (lid = tblk->next; lid; lid = tlck->next) { 1384 tlck = lid_to_tlock(lid); 1385 1386 tlck->flag |= tlckLOG; 1387 1388 /* initialize lrd common */ 1389 ip = tlck->ip; 1390 lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate); 1391 lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset); 1392 lrd->log.redopage.inode = cpu_to_le32(ip->i_ino); 1393 1394 /* write log record of page from the tlock */ 1395 switch (tlck->type & tlckTYPE) { 1396 case tlckXTREE: 1397 xtLog(log, tblk, lrd, tlck); 1398 break; 1399 1400 case tlckDTREE: 1401 dtLog(log, tblk, lrd, tlck); 1402 break; 1403 1404 case tlckINODE: 1405 diLog(log, tblk, lrd, tlck, cd); 1406 break; 1407 1408 case tlckMAP: 1409 mapLog(log, tblk, lrd, tlck); 1410 break; 1411 1412 case tlckDATA: 1413 dataLog(log, tblk, lrd, tlck); 1414 break; 1415 1416 default: 1417 jfs_err("UFO tlock:0x%p", tlck); 1418 } 1419 } 1420 1421 return; 1422 } 1423 1424 /* 1425 * diLog() 1426 * 1427 * function: log inode tlock and format maplock to update bmap; 1428 */ 1429 static void diLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd, 1430 struct tlock *tlck, struct commit *cd) 1431 { 1432 struct metapage *mp; 1433 pxd_t *pxd; 1434 struct pxd_lock *pxdlock; 1435 1436 mp = tlck->mp; 1437 1438 /* initialize as REDOPAGE record format */ 1439 lrd->log.redopage.type = cpu_to_le16(LOG_INODE); 1440 lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE); 1441 1442 pxd = &lrd->log.redopage.pxd; 1443 1444 /* 1445 * inode after image 1446 */ 1447 if (tlck->type & tlckENTRY) { 1448 /* log after-image for logredo(): */ 1449 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1450 PXDaddress(pxd, mp->index); 1451 PXDlength(pxd, 1452 mp->logical_size >> tblk->sb->s_blocksize_bits); 1453 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1454 1455 /* mark page as homeward bound */ 1456 tlck->flag |= tlckWRITEPAGE; 1457 } else if (tlck->type & tlckFREE) { 1458 /* 1459 * free inode extent 1460 * 1461 * (pages of the freed inode extent have been invalidated and 1462 * a maplock for free of the extent has been formatted at 1463 * txLock() time); 1464 * 1465 * the tlock had been acquired on the inode allocation map page 1466 * (iag) that specifies the freed extent, even though the map 1467 * page is not itself logged, to prevent pageout of the map 1468 * page before the log; 1469 */ 1470 1471 /* log LOG_NOREDOINOEXT of the freed inode extent for 1472 * logredo() to start NoRedoPage filters, and to update 1473 * imap and bmap for free of the extent; 1474 */ 1475 lrd->type = cpu_to_le16(LOG_NOREDOINOEXT); 1476 /* 1477 * For the LOG_NOREDOINOEXT record, we need 1478 * to pass the IAG number and inode extent 1479 * index (within that IAG) from which the 1480 * extent is being released. These have been 1481 * passed to us in the iplist[1] and iplist[2]. 1482 */ 1483 lrd->log.noredoinoext.iagnum = 1484 cpu_to_le32((u32) (size_t) cd->iplist[1]); 1485 lrd->log.noredoinoext.inoext_idx = 1486 cpu_to_le32((u32) (size_t) cd->iplist[2]); 1487 1488 pxdlock = (struct pxd_lock *) & tlck->lock; 1489 *pxd = pxdlock->pxd; 1490 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 1491 1492 /* update bmap */ 1493 tlck->flag |= tlckUPDATEMAP; 1494 1495 /* mark page as homeward bound */ 1496 tlck->flag |= tlckWRITEPAGE; 1497 } else 1498 jfs_err("diLog: UFO type tlck:0x%p", tlck); 1499 return; 1500 } 1501 1502 /* 1503 * dataLog() 1504 * 1505 * function: log data tlock 1506 */ 1507 static void dataLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd, 1508 struct tlock *tlck) 1509 { 1510 struct metapage *mp; 1511 pxd_t *pxd; 1512 1513 mp = tlck->mp; 1514 1515 /* initialize as REDOPAGE record format */ 1516 lrd->log.redopage.type = cpu_to_le16(LOG_DATA); 1517 lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE); 1518 1519 pxd = &lrd->log.redopage.pxd; 1520 1521 /* log after-image for logredo(): */ 1522 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1523 1524 if (jfs_dirtable_inline(tlck->ip)) { 1525 /* 1526 * The table has been truncated, we've must have deleted 1527 * the last entry, so don't bother logging this 1528 */ 1529 mp->lid = 0; 1530 grab_metapage(mp); 1531 metapage_homeok(mp); 1532 discard_metapage(mp); 1533 tlck->mp = NULL; 1534 return; 1535 } 1536 1537 PXDaddress(pxd, mp->index); 1538 PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); 1539 1540 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1541 1542 /* mark page as homeward bound */ 1543 tlck->flag |= tlckWRITEPAGE; 1544 1545 return; 1546 } 1547 1548 /* 1549 * dtLog() 1550 * 1551 * function: log dtree tlock and format maplock to update bmap; 1552 */ 1553 static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 1554 struct tlock * tlck) 1555 { 1556 struct metapage *mp; 1557 struct pxd_lock *pxdlock; 1558 pxd_t *pxd; 1559 1560 mp = tlck->mp; 1561 1562 /* initialize as REDOPAGE/NOREDOPAGE record format */ 1563 lrd->log.redopage.type = cpu_to_le16(LOG_DTREE); 1564 lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE); 1565 1566 pxd = &lrd->log.redopage.pxd; 1567 1568 if (tlck->type & tlckBTROOT) 1569 lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); 1570 1571 /* 1572 * page extension via relocation: entry insertion; 1573 * page extension in-place: entry insertion; 1574 * new right page from page split, reinitialized in-line 1575 * root from root page split: entry insertion; 1576 */ 1577 if (tlck->type & (tlckNEW | tlckEXTEND)) { 1578 /* log after-image of the new page for logredo(): 1579 * mark log (LOG_NEW) for logredo() to initialize 1580 * freelist and update bmap for alloc of the new page; 1581 */ 1582 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1583 if (tlck->type & tlckEXTEND) 1584 lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND); 1585 else 1586 lrd->log.redopage.type |= cpu_to_le16(LOG_NEW); 1587 PXDaddress(pxd, mp->index); 1588 PXDlength(pxd, 1589 mp->logical_size >> tblk->sb->s_blocksize_bits); 1590 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1591 1592 /* format a maplock for txUpdateMap() to update bPMAP for 1593 * alloc of the new page; 1594 */ 1595 if (tlck->type & tlckBTROOT) 1596 return; 1597 tlck->flag |= tlckUPDATEMAP; 1598 pxdlock = (struct pxd_lock *) & tlck->lock; 1599 pxdlock->flag = mlckALLOCPXD; 1600 pxdlock->pxd = *pxd; 1601 1602 pxdlock->index = 1; 1603 1604 /* mark page as homeward bound */ 1605 tlck->flag |= tlckWRITEPAGE; 1606 return; 1607 } 1608 1609 /* 1610 * entry insertion/deletion, 1611 * sibling page link update (old right page before split); 1612 */ 1613 if (tlck->type & (tlckENTRY | tlckRELINK)) { 1614 /* log after-image for logredo(): */ 1615 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1616 PXDaddress(pxd, mp->index); 1617 PXDlength(pxd, 1618 mp->logical_size >> tblk->sb->s_blocksize_bits); 1619 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1620 1621 /* mark page as homeward bound */ 1622 tlck->flag |= tlckWRITEPAGE; 1623 return; 1624 } 1625 1626 /* 1627 * page deletion: page has been invalidated 1628 * page relocation: source extent 1629 * 1630 * a maplock for free of the page has been formatted 1631 * at txLock() time); 1632 */ 1633 if (tlck->type & (tlckFREE | tlckRELOCATE)) { 1634 /* log LOG_NOREDOPAGE of the deleted page for logredo() 1635 * to start NoRedoPage filter and to update bmap for free 1636 * of the deletd page 1637 */ 1638 lrd->type = cpu_to_le16(LOG_NOREDOPAGE); 1639 pxdlock = (struct pxd_lock *) & tlck->lock; 1640 *pxd = pxdlock->pxd; 1641 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 1642 1643 /* a maplock for txUpdateMap() for free of the page 1644 * has been formatted at txLock() time; 1645 */ 1646 tlck->flag |= tlckUPDATEMAP; 1647 } 1648 return; 1649 } 1650 1651 /* 1652 * xtLog() 1653 * 1654 * function: log xtree tlock and format maplock to update bmap; 1655 */ 1656 static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 1657 struct tlock * tlck) 1658 { 1659 struct inode *ip; 1660 struct metapage *mp; 1661 xtpage_t *p; 1662 struct xtlock *xtlck; 1663 struct maplock *maplock; 1664 struct xdlistlock *xadlock; 1665 struct pxd_lock *pxdlock; 1666 pxd_t *page_pxd; 1667 int next, lwm, hwm; 1668 1669 ip = tlck->ip; 1670 mp = tlck->mp; 1671 1672 /* initialize as REDOPAGE/NOREDOPAGE record format */ 1673 lrd->log.redopage.type = cpu_to_le16(LOG_XTREE); 1674 lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE); 1675 1676 page_pxd = &lrd->log.redopage.pxd; 1677 1678 if (tlck->type & tlckBTROOT) { 1679 lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); 1680 p = (xtpage_t *) &JFS_IP(ip)->i_xtroot; 1681 if (S_ISDIR(ip->i_mode)) 1682 lrd->log.redopage.type |= 1683 cpu_to_le16(LOG_DIR_XTREE); 1684 } else 1685 p = (xtpage_t *) mp->data; 1686 next = le16_to_cpu(p->header.nextindex); 1687 1688 xtlck = (struct xtlock *) & tlck->lock; 1689 1690 maplock = (struct maplock *) & tlck->lock; 1691 xadlock = (struct xdlistlock *) maplock; 1692 1693 /* 1694 * entry insertion/extension; 1695 * sibling page link update (old right page before split); 1696 */ 1697 if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) { 1698 /* log after-image for logredo(): 1699 * logredo() will update bmap for alloc of new/extended 1700 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from 1701 * after-image of XADlist; 1702 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when 1703 * applying the after-image to the meta-data page. 1704 */ 1705 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1706 PXDaddress(page_pxd, mp->index); 1707 PXDlength(page_pxd, 1708 mp->logical_size >> tblk->sb->s_blocksize_bits); 1709 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1710 1711 /* format a maplock for txUpdateMap() to update bPMAP 1712 * for alloc of new/extended extents of XAD[lwm:next) 1713 * from the page itself; 1714 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. 1715 */ 1716 lwm = xtlck->lwm.offset; 1717 if (lwm == 0) 1718 lwm = XTPAGEMAXSLOT; 1719 1720 if (lwm == next) 1721 goto out; 1722 if (lwm > next) { 1723 jfs_err("xtLog: lwm > next"); 1724 goto out; 1725 } 1726 tlck->flag |= tlckUPDATEMAP; 1727 xadlock->flag = mlckALLOCXADLIST; 1728 xadlock->count = next - lwm; 1729 if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { 1730 int i; 1731 pxd_t *pxd; 1732 /* 1733 * Lazy commit may allow xtree to be modified before 1734 * txUpdateMap runs. Copy xad into linelock to 1735 * preserve correct data. 1736 * 1737 * We can fit twice as may pxd's as xads in the lock 1738 */ 1739 xadlock->flag = mlckALLOCPXDLIST; 1740 pxd = xadlock->xdlist = &xtlck->pxdlock; 1741 for (i = 0; i < xadlock->count; i++) { 1742 PXDaddress(pxd, addressXAD(&p->xad[lwm + i])); 1743 PXDlength(pxd, lengthXAD(&p->xad[lwm + i])); 1744 p->xad[lwm + i].flag &= 1745 ~(XAD_NEW | XAD_EXTENDED); 1746 pxd++; 1747 } 1748 } else { 1749 /* 1750 * xdlist will point to into inode's xtree, ensure 1751 * that transaction is not committed lazily. 1752 */ 1753 xadlock->flag = mlckALLOCXADLIST; 1754 xadlock->xdlist = &p->xad[lwm]; 1755 tblk->xflag &= ~COMMIT_LAZY; 1756 } 1757 jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d count:%d", 1758 tlck->ip, mp, tlck, lwm, xadlock->count); 1759 1760 maplock->index = 1; 1761 1762 out: 1763 /* mark page as homeward bound */ 1764 tlck->flag |= tlckWRITEPAGE; 1765 1766 return; 1767 } 1768 1769 /* 1770 * page deletion: file deletion/truncation (ref. xtTruncate()) 1771 * 1772 * (page will be invalidated after log is written and bmap 1773 * is updated from the page); 1774 */ 1775 if (tlck->type & tlckFREE) { 1776 /* LOG_NOREDOPAGE log for NoRedoPage filter: 1777 * if page free from file delete, NoRedoFile filter from 1778 * inode image of zero link count will subsume NoRedoPage 1779 * filters for each page; 1780 * if page free from file truncattion, write NoRedoPage 1781 * filter; 1782 * 1783 * upadte of block allocation map for the page itself: 1784 * if page free from deletion and truncation, LOG_UPDATEMAP 1785 * log for the page itself is generated from processing 1786 * its parent page xad entries; 1787 */ 1788 /* if page free from file truncation, log LOG_NOREDOPAGE 1789 * of the deleted page for logredo() to start NoRedoPage 1790 * filter for the page; 1791 */ 1792 if (tblk->xflag & COMMIT_TRUNCATE) { 1793 /* write NOREDOPAGE for the page */ 1794 lrd->type = cpu_to_le16(LOG_NOREDOPAGE); 1795 PXDaddress(page_pxd, mp->index); 1796 PXDlength(page_pxd, 1797 mp->logical_size >> tblk->sb-> 1798 s_blocksize_bits); 1799 lrd->backchain = 1800 cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 1801 1802 if (tlck->type & tlckBTROOT) { 1803 /* Empty xtree must be logged */ 1804 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1805 lrd->backchain = 1806 cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1807 } 1808 } 1809 1810 /* init LOG_UPDATEMAP of the freed extents 1811 * XAD[XTENTRYSTART:hwm) from the deleted page itself 1812 * for logredo() to update bmap; 1813 */ 1814 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 1815 lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST); 1816 xtlck = (struct xtlock *) & tlck->lock; 1817 hwm = xtlck->hwm.offset; 1818 lrd->log.updatemap.nxd = 1819 cpu_to_le16(hwm - XTENTRYSTART + 1); 1820 /* reformat linelock for lmLog() */ 1821 xtlck->header.offset = XTENTRYSTART; 1822 xtlck->header.length = hwm - XTENTRYSTART + 1; 1823 xtlck->index = 1; 1824 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1825 1826 /* format a maplock for txUpdateMap() to update bmap 1827 * to free extents of XAD[XTENTRYSTART:hwm) from the 1828 * deleted page itself; 1829 */ 1830 tlck->flag |= tlckUPDATEMAP; 1831 xadlock->count = hwm - XTENTRYSTART + 1; 1832 if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { 1833 int i; 1834 pxd_t *pxd; 1835 /* 1836 * Lazy commit may allow xtree to be modified before 1837 * txUpdateMap runs. Copy xad into linelock to 1838 * preserve correct data. 1839 * 1840 * We can fit twice as may pxd's as xads in the lock 1841 */ 1842 xadlock->flag = mlckFREEPXDLIST; 1843 pxd = xadlock->xdlist = &xtlck->pxdlock; 1844 for (i = 0; i < xadlock->count; i++) { 1845 PXDaddress(pxd, 1846 addressXAD(&p->xad[XTENTRYSTART + i])); 1847 PXDlength(pxd, 1848 lengthXAD(&p->xad[XTENTRYSTART + i])); 1849 pxd++; 1850 } 1851 } else { 1852 /* 1853 * xdlist will point to into inode's xtree, ensure 1854 * that transaction is not committed lazily. 1855 */ 1856 xadlock->flag = mlckFREEXADLIST; 1857 xadlock->xdlist = &p->xad[XTENTRYSTART]; 1858 tblk->xflag &= ~COMMIT_LAZY; 1859 } 1860 jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2", 1861 tlck->ip, mp, xadlock->count); 1862 1863 maplock->index = 1; 1864 1865 /* mark page as invalid */ 1866 if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode)) 1867 && !(tlck->type & tlckBTROOT)) 1868 tlck->flag |= tlckFREEPAGE; 1869 /* 1870 else (tblk->xflag & COMMIT_PMAP) 1871 ? release the page; 1872 */ 1873 return; 1874 } 1875 1876 /* 1877 * page/entry truncation: file truncation (ref. xtTruncate()) 1878 * 1879 * |----------+------+------+---------------| 1880 * | | | 1881 * | | hwm - hwm before truncation 1882 * | next - truncation point 1883 * lwm - lwm before truncation 1884 * header ? 1885 */ 1886 if (tlck->type & tlckTRUNCATE) { 1887 pxd_t pxd; /* truncated extent of xad */ 1888 int twm; 1889 1890 /* 1891 * For truncation the entire linelock may be used, so it would 1892 * be difficult to store xad list in linelock itself. 1893 * Therefore, we'll just force transaction to be committed 1894 * synchronously, so that xtree pages won't be changed before 1895 * txUpdateMap runs. 1896 */ 1897 tblk->xflag &= ~COMMIT_LAZY; 1898 lwm = xtlck->lwm.offset; 1899 if (lwm == 0) 1900 lwm = XTPAGEMAXSLOT; 1901 hwm = xtlck->hwm.offset; 1902 twm = xtlck->twm.offset; 1903 1904 /* 1905 * write log records 1906 */ 1907 /* log after-image for logredo(): 1908 * 1909 * logredo() will update bmap for alloc of new/extended 1910 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from 1911 * after-image of XADlist; 1912 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when 1913 * applying the after-image to the meta-data page. 1914 */ 1915 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1916 PXDaddress(page_pxd, mp->index); 1917 PXDlength(page_pxd, 1918 mp->logical_size >> tblk->sb->s_blocksize_bits); 1919 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1920 1921 /* 1922 * truncate entry XAD[twm == next - 1]: 1923 */ 1924 if (twm == next - 1) { 1925 /* init LOG_UPDATEMAP for logredo() to update bmap for 1926 * free of truncated delta extent of the truncated 1927 * entry XAD[next - 1]: 1928 * (xtlck->pxdlock = truncated delta extent); 1929 */ 1930 pxdlock = (struct pxd_lock *) & xtlck->pxdlock; 1931 /* assert(pxdlock->type & tlckTRUNCATE); */ 1932 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 1933 lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); 1934 lrd->log.updatemap.nxd = cpu_to_le16(1); 1935 lrd->log.updatemap.pxd = pxdlock->pxd; 1936 pxd = pxdlock->pxd; /* save to format maplock */ 1937 lrd->backchain = 1938 cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 1939 } 1940 1941 /* 1942 * free entries XAD[next:hwm]: 1943 */ 1944 if (hwm >= next) { 1945 /* init LOG_UPDATEMAP of the freed extents 1946 * XAD[next:hwm] from the deleted page itself 1947 * for logredo() to update bmap; 1948 */ 1949 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 1950 lrd->log.updatemap.type = 1951 cpu_to_le16(LOG_FREEXADLIST); 1952 xtlck = (struct xtlock *) & tlck->lock; 1953 hwm = xtlck->hwm.offset; 1954 lrd->log.updatemap.nxd = 1955 cpu_to_le16(hwm - next + 1); 1956 /* reformat linelock for lmLog() */ 1957 xtlck->header.offset = next; 1958 xtlck->header.length = hwm - next + 1; 1959 xtlck->index = 1; 1960 lrd->backchain = 1961 cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1962 } 1963 1964 /* 1965 * format maplock(s) for txUpdateMap() to update bmap 1966 */ 1967 maplock->index = 0; 1968 1969 /* 1970 * allocate entries XAD[lwm:next): 1971 */ 1972 if (lwm < next) { 1973 /* format a maplock for txUpdateMap() to update bPMAP 1974 * for alloc of new/extended extents of XAD[lwm:next) 1975 * from the page itself; 1976 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. 1977 */ 1978 tlck->flag |= tlckUPDATEMAP; 1979 xadlock->flag = mlckALLOCXADLIST; 1980 xadlock->count = next - lwm; 1981 xadlock->xdlist = &p->xad[lwm]; 1982 1983 jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d lwm:%d next:%d", 1984 tlck->ip, mp, xadlock->count, lwm, next); 1985 maplock->index++; 1986 xadlock++; 1987 } 1988 1989 /* 1990 * truncate entry XAD[twm == next - 1]: 1991 */ 1992 if (twm == next - 1) { 1993 /* format a maplock for txUpdateMap() to update bmap 1994 * to free truncated delta extent of the truncated 1995 * entry XAD[next - 1]; 1996 * (xtlck->pxdlock = truncated delta extent); 1997 */ 1998 tlck->flag |= tlckUPDATEMAP; 1999 pxdlock = (struct pxd_lock *) xadlock; 2000 pxdlock->flag = mlckFREEPXD; 2001 pxdlock->count = 1; 2002 pxdlock->pxd = pxd; 2003 2004 jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d hwm:%d", 2005 ip, mp, pxdlock->count, hwm); 2006 maplock->index++; 2007 xadlock++; 2008 } 2009 2010 /* 2011 * free entries XAD[next:hwm]: 2012 */ 2013 if (hwm >= next) { 2014 /* format a maplock for txUpdateMap() to update bmap 2015 * to free extents of XAD[next:hwm] from thedeleted 2016 * page itself; 2017 */ 2018 tlck->flag |= tlckUPDATEMAP; 2019 xadlock->flag = mlckFREEXADLIST; 2020 xadlock->count = hwm - next + 1; 2021 xadlock->xdlist = &p->xad[next]; 2022 2023 jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d next:%d hwm:%d", 2024 tlck->ip, mp, xadlock->count, next, hwm); 2025 maplock->index++; 2026 } 2027 2028 /* mark page as homeward bound */ 2029 tlck->flag |= tlckWRITEPAGE; 2030 } 2031 return; 2032 } 2033 2034 /* 2035 * mapLog() 2036 * 2037 * function: log from maplock of freed data extents; 2038 */ 2039 static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 2040 struct tlock * tlck) 2041 { 2042 struct pxd_lock *pxdlock; 2043 int i, nlock; 2044 pxd_t *pxd; 2045 2046 /* 2047 * page relocation: free the source page extent 2048 * 2049 * a maplock for txUpdateMap() for free of the page 2050 * has been formatted at txLock() time saving the src 2051 * relocated page address; 2052 */ 2053 if (tlck->type & tlckRELOCATE) { 2054 /* log LOG_NOREDOPAGE of the old relocated page 2055 * for logredo() to start NoRedoPage filter; 2056 */ 2057 lrd->type = cpu_to_le16(LOG_NOREDOPAGE); 2058 pxdlock = (struct pxd_lock *) & tlck->lock; 2059 pxd = &lrd->log.redopage.pxd; 2060 *pxd = pxdlock->pxd; 2061 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 2062 2063 /* (N.B. currently, logredo() does NOT update bmap 2064 * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE); 2065 * if page free from relocation, LOG_UPDATEMAP log is 2066 * specifically generated now for logredo() 2067 * to update bmap for free of src relocated page; 2068 * (new flag LOG_RELOCATE may be introduced which will 2069 * inform logredo() to start NORedoPage filter and also 2070 * update block allocation map at the same time, thus 2071 * avoiding an extra log write); 2072 */ 2073 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 2074 lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); 2075 lrd->log.updatemap.nxd = cpu_to_le16(1); 2076 lrd->log.updatemap.pxd = pxdlock->pxd; 2077 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 2078 2079 /* a maplock for txUpdateMap() for free of the page 2080 * has been formatted at txLock() time; 2081 */ 2082 tlck->flag |= tlckUPDATEMAP; 2083 return; 2084 } 2085 /* 2086 2087 * Otherwise it's not a relocate request 2088 * 2089 */ 2090 else { 2091 /* log LOG_UPDATEMAP for logredo() to update bmap for 2092 * free of truncated/relocated delta extent of the data; 2093 * e.g.: external EA extent, relocated/truncated extent 2094 * from xtTailgate(); 2095 */ 2096 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 2097 pxdlock = (struct pxd_lock *) & tlck->lock; 2098 nlock = pxdlock->index; 2099 for (i = 0; i < nlock; i++, pxdlock++) { 2100 if (pxdlock->flag & mlckALLOCPXD) 2101 lrd->log.updatemap.type = 2102 cpu_to_le16(LOG_ALLOCPXD); 2103 else 2104 lrd->log.updatemap.type = 2105 cpu_to_le16(LOG_FREEPXD); 2106 lrd->log.updatemap.nxd = cpu_to_le16(1); 2107 lrd->log.updatemap.pxd = pxdlock->pxd; 2108 lrd->backchain = 2109 cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 2110 jfs_info("mapLog: xaddr:0x%lx xlen:0x%x", 2111 (ulong) addressPXD(&pxdlock->pxd), 2112 lengthPXD(&pxdlock->pxd)); 2113 } 2114 2115 /* update bmap */ 2116 tlck->flag |= tlckUPDATEMAP; 2117 } 2118 } 2119 2120 /* 2121 * txEA() 2122 * 2123 * function: acquire maplock for EA/ACL extents or 2124 * set COMMIT_INLINE flag; 2125 */ 2126 void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) 2127 { 2128 struct tlock *tlck = NULL; 2129 struct pxd_lock *maplock = NULL, *pxdlock = NULL; 2130 2131 /* 2132 * format maplock for alloc of new EA extent 2133 */ 2134 if (newea) { 2135 /* Since the newea could be a completely zeroed entry we need to 2136 * check for the two flags which indicate we should actually 2137 * commit new EA data 2138 */ 2139 if (newea->flag & DXD_EXTENT) { 2140 tlck = txMaplock(tid, ip, tlckMAP); 2141 maplock = (struct pxd_lock *) & tlck->lock; 2142 pxdlock = (struct pxd_lock *) maplock; 2143 pxdlock->flag = mlckALLOCPXD; 2144 PXDaddress(&pxdlock->pxd, addressDXD(newea)); 2145 PXDlength(&pxdlock->pxd, lengthDXD(newea)); 2146 pxdlock++; 2147 maplock->index = 1; 2148 } else if (newea->flag & DXD_INLINE) { 2149 tlck = NULL; 2150 2151 set_cflag(COMMIT_Inlineea, ip); 2152 } 2153 } 2154 2155 /* 2156 * format maplock for free of old EA extent 2157 */ 2158 if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) { 2159 if (tlck == NULL) { 2160 tlck = txMaplock(tid, ip, tlckMAP); 2161 maplock = (struct pxd_lock *) & tlck->lock; 2162 pxdlock = (struct pxd_lock *) maplock; 2163 maplock->index = 0; 2164 } 2165 pxdlock->flag = mlckFREEPXD; 2166 PXDaddress(&pxdlock->pxd, addressDXD(oldea)); 2167 PXDlength(&pxdlock->pxd, lengthDXD(oldea)); 2168 maplock->index++; 2169 } 2170 } 2171 2172 /* 2173 * txForce() 2174 * 2175 * function: synchronously write pages locked by transaction 2176 * after txLog() but before txUpdateMap(); 2177 */ 2178 static void txForce(struct tblock * tblk) 2179 { 2180 struct tlock *tlck; 2181 lid_t lid, next; 2182 struct metapage *mp; 2183 2184 /* 2185 * reverse the order of transaction tlocks in 2186 * careful update order of address index pages 2187 * (right to left, bottom up) 2188 */ 2189 tlck = lid_to_tlock(tblk->next); 2190 lid = tlck->next; 2191 tlck->next = 0; 2192 while (lid) { 2193 tlck = lid_to_tlock(lid); 2194 next = tlck->next; 2195 tlck->next = tblk->next; 2196 tblk->next = lid; 2197 lid = next; 2198 } 2199 2200 /* 2201 * synchronously write the page, and 2202 * hold the page for txUpdateMap(); 2203 */ 2204 for (lid = tblk->next; lid; lid = next) { 2205 tlck = lid_to_tlock(lid); 2206 next = tlck->next; 2207 2208 if ((mp = tlck->mp) != NULL && 2209 (tlck->type & tlckBTROOT) == 0) { 2210 assert(mp->xflag & COMMIT_PAGE); 2211 2212 if (tlck->flag & tlckWRITEPAGE) { 2213 tlck->flag &= ~tlckWRITEPAGE; 2214 2215 /* do not release page to freelist */ 2216 force_metapage(mp); 2217 #if 0 2218 /* 2219 * The "right" thing to do here is to 2220 * synchronously write the metadata. 2221 * With the current implementation this 2222 * is hard since write_metapage requires 2223 * us to kunmap & remap the page. If we 2224 * have tlocks pointing into the metadata 2225 * pages, we don't want to do this. I think 2226 * we can get by with synchronously writing 2227 * the pages when they are released. 2228 */ 2229 assert(mp->nohomeok); 2230 set_bit(META_dirty, &mp->flag); 2231 set_bit(META_sync, &mp->flag); 2232 #endif 2233 } 2234 } 2235 } 2236 } 2237 2238 /* 2239 * txUpdateMap() 2240 * 2241 * function: update persistent allocation map (and working map 2242 * if appropriate); 2243 * 2244 * parameter: 2245 */ 2246 static void txUpdateMap(struct tblock * tblk) 2247 { 2248 struct inode *ip; 2249 struct inode *ipimap; 2250 lid_t lid; 2251 struct tlock *tlck; 2252 struct maplock *maplock; 2253 struct pxd_lock pxdlock; 2254 int maptype; 2255 int k, nlock; 2256 struct metapage *mp = NULL; 2257 2258 ipimap = JFS_SBI(tblk->sb)->ipimap; 2259 2260 maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP; 2261 2262 2263 /* 2264 * update block allocation map 2265 * 2266 * update allocation state in pmap (and wmap) and 2267 * update lsn of the pmap page; 2268 */ 2269 /* 2270 * scan each tlock/page of transaction for block allocation/free: 2271 * 2272 * for each tlock/page of transaction, update map. 2273 * ? are there tlock for pmap and pwmap at the same time ? 2274 */ 2275 for (lid = tblk->next; lid; lid = tlck->next) { 2276 tlck = lid_to_tlock(lid); 2277 2278 if ((tlck->flag & tlckUPDATEMAP) == 0) 2279 continue; 2280 2281 if (tlck->flag & tlckFREEPAGE) { 2282 /* 2283 * Another thread may attempt to reuse freed space 2284 * immediately, so we want to get rid of the metapage 2285 * before anyone else has a chance to get it. 2286 * Lock metapage, update maps, then invalidate 2287 * the metapage. 2288 */ 2289 mp = tlck->mp; 2290 ASSERT(mp->xflag & COMMIT_PAGE); 2291 grab_metapage(mp); 2292 } 2293 2294 /* 2295 * extent list: 2296 * . in-line PXD list: 2297 * . out-of-line XAD list: 2298 */ 2299 maplock = (struct maplock *) & tlck->lock; 2300 nlock = maplock->index; 2301 2302 for (k = 0; k < nlock; k++, maplock++) { 2303 /* 2304 * allocate blocks in persistent map: 2305 * 2306 * blocks have been allocated from wmap at alloc time; 2307 */ 2308 if (maplock->flag & mlckALLOC) { 2309 txAllocPMap(ipimap, maplock, tblk); 2310 } 2311 /* 2312 * free blocks in persistent and working map: 2313 * blocks will be freed in pmap and then in wmap; 2314 * 2315 * ? tblock specifies the PMAP/PWMAP based upon 2316 * transaction 2317 * 2318 * free blocks in persistent map: 2319 * blocks will be freed from wmap at last reference 2320 * release of the object for regular files; 2321 * 2322 * Alway free blocks from both persistent & working 2323 * maps for directories 2324 */ 2325 else { /* (maplock->flag & mlckFREE) */ 2326 2327 if (tlck->flag & tlckDIRECTORY) 2328 txFreeMap(ipimap, maplock, 2329 tblk, COMMIT_PWMAP); 2330 else 2331 txFreeMap(ipimap, maplock, 2332 tblk, maptype); 2333 } 2334 } 2335 if (tlck->flag & tlckFREEPAGE) { 2336 if (!(tblk->flag & tblkGC_LAZY)) { 2337 /* This is equivalent to txRelease */ 2338 ASSERT(mp->lid == lid); 2339 tlck->mp->lid = 0; 2340 } 2341 assert(mp->nohomeok == 1); 2342 metapage_homeok(mp); 2343 discard_metapage(mp); 2344 tlck->mp = NULL; 2345 } 2346 } 2347 /* 2348 * update inode allocation map 2349 * 2350 * update allocation state in pmap and 2351 * update lsn of the pmap page; 2352 * update in-memory inode flag/state 2353 * 2354 * unlock mapper/write lock 2355 */ 2356 if (tblk->xflag & COMMIT_CREATE) { 2357 diUpdatePMap(ipimap, tblk->ino, false, tblk); 2358 /* update persistent block allocation map 2359 * for the allocation of inode extent; 2360 */ 2361 pxdlock.flag = mlckALLOCPXD; 2362 pxdlock.pxd = tblk->u.ixpxd; 2363 pxdlock.index = 1; 2364 txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk); 2365 } else if (tblk->xflag & COMMIT_DELETE) { 2366 ip = tblk->u.ip; 2367 diUpdatePMap(ipimap, ip->i_ino, true, tblk); 2368 iput(ip); 2369 } 2370 } 2371 2372 /* 2373 * txAllocPMap() 2374 * 2375 * function: allocate from persistent map; 2376 * 2377 * parameter: 2378 * ipbmap - 2379 * malock - 2380 * xad list: 2381 * pxd: 2382 * 2383 * maptype - 2384 * allocate from persistent map; 2385 * free from persistent map; 2386 * (e.g., tmp file - free from working map at releae 2387 * of last reference); 2388 * free from persistent and working map; 2389 * 2390 * lsn - log sequence number; 2391 */ 2392 static void txAllocPMap(struct inode *ip, struct maplock * maplock, 2393 struct tblock * tblk) 2394 { 2395 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; 2396 struct xdlistlock *xadlistlock; 2397 xad_t *xad; 2398 s64 xaddr; 2399 int xlen; 2400 struct pxd_lock *pxdlock; 2401 struct xdlistlock *pxdlistlock; 2402 pxd_t *pxd; 2403 int n; 2404 2405 /* 2406 * allocate from persistent map; 2407 */ 2408 if (maplock->flag & mlckALLOCXADLIST) { 2409 xadlistlock = (struct xdlistlock *) maplock; 2410 xad = xadlistlock->xdlist; 2411 for (n = 0; n < xadlistlock->count; n++, xad++) { 2412 if (xad->flag & (XAD_NEW | XAD_EXTENDED)) { 2413 xaddr = addressXAD(xad); 2414 xlen = lengthXAD(xad); 2415 dbUpdatePMap(ipbmap, false, xaddr, 2416 (s64) xlen, tblk); 2417 xad->flag &= ~(XAD_NEW | XAD_EXTENDED); 2418 jfs_info("allocPMap: xaddr:0x%lx xlen:%d", 2419 (ulong) xaddr, xlen); 2420 } 2421 } 2422 } else if (maplock->flag & mlckALLOCPXD) { 2423 pxdlock = (struct pxd_lock *) maplock; 2424 xaddr = addressPXD(&pxdlock->pxd); 2425 xlen = lengthPXD(&pxdlock->pxd); 2426 dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk); 2427 jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); 2428 } else { /* (maplock->flag & mlckALLOCPXDLIST) */ 2429 2430 pxdlistlock = (struct xdlistlock *) maplock; 2431 pxd = pxdlistlock->xdlist; 2432 for (n = 0; n < pxdlistlock->count; n++, pxd++) { 2433 xaddr = addressPXD(pxd); 2434 xlen = lengthPXD(pxd); 2435 dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, 2436 tblk); 2437 jfs_info("allocPMap: xaddr:0x%lx xlen:%d", 2438 (ulong) xaddr, xlen); 2439 } 2440 } 2441 } 2442 2443 /* 2444 * txFreeMap() 2445 * 2446 * function: free from persistent and/or working map; 2447 * 2448 * todo: optimization 2449 */ 2450 void txFreeMap(struct inode *ip, 2451 struct maplock * maplock, struct tblock * tblk, int maptype) 2452 { 2453 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; 2454 struct xdlistlock *xadlistlock; 2455 xad_t *xad; 2456 s64 xaddr; 2457 int xlen; 2458 struct pxd_lock *pxdlock; 2459 struct xdlistlock *pxdlistlock; 2460 pxd_t *pxd; 2461 int n; 2462 2463 jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x", 2464 tblk, maplock, maptype); 2465 2466 /* 2467 * free from persistent map; 2468 */ 2469 if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) { 2470 if (maplock->flag & mlckFREEXADLIST) { 2471 xadlistlock = (struct xdlistlock *) maplock; 2472 xad = xadlistlock->xdlist; 2473 for (n = 0; n < xadlistlock->count; n++, xad++) { 2474 if (!(xad->flag & XAD_NEW)) { 2475 xaddr = addressXAD(xad); 2476 xlen = lengthXAD(xad); 2477 dbUpdatePMap(ipbmap, true, xaddr, 2478 (s64) xlen, tblk); 2479 jfs_info("freePMap: xaddr:0x%lx xlen:%d", 2480 (ulong) xaddr, xlen); 2481 } 2482 } 2483 } else if (maplock->flag & mlckFREEPXD) { 2484 pxdlock = (struct pxd_lock *) maplock; 2485 xaddr = addressPXD(&pxdlock->pxd); 2486 xlen = lengthPXD(&pxdlock->pxd); 2487 dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen, 2488 tblk); 2489 jfs_info("freePMap: xaddr:0x%lx xlen:%d", 2490 (ulong) xaddr, xlen); 2491 } else { /* (maplock->flag & mlckALLOCPXDLIST) */ 2492 2493 pxdlistlock = (struct xdlistlock *) maplock; 2494 pxd = pxdlistlock->xdlist; 2495 for (n = 0; n < pxdlistlock->count; n++, pxd++) { 2496 xaddr = addressPXD(pxd); 2497 xlen = lengthPXD(pxd); 2498 dbUpdatePMap(ipbmap, true, xaddr, 2499 (s64) xlen, tblk); 2500 jfs_info("freePMap: xaddr:0x%lx xlen:%d", 2501 (ulong) xaddr, xlen); 2502 } 2503 } 2504 } 2505 2506 /* 2507 * free from working map; 2508 */ 2509 if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) { 2510 if (maplock->flag & mlckFREEXADLIST) { 2511 xadlistlock = (struct xdlistlock *) maplock; 2512 xad = xadlistlock->xdlist; 2513 for (n = 0; n < xadlistlock->count; n++, xad++) { 2514 xaddr = addressXAD(xad); 2515 xlen = lengthXAD(xad); 2516 dbFree(ip, xaddr, (s64) xlen); 2517 xad->flag = 0; 2518 jfs_info("freeWMap: xaddr:0x%lx xlen:%d", 2519 (ulong) xaddr, xlen); 2520 } 2521 } else if (maplock->flag & mlckFREEPXD) { 2522 pxdlock = (struct pxd_lock *) maplock; 2523 xaddr = addressPXD(&pxdlock->pxd); 2524 xlen = lengthPXD(&pxdlock->pxd); 2525 dbFree(ip, xaddr, (s64) xlen); 2526 jfs_info("freeWMap: xaddr:0x%lx xlen:%d", 2527 (ulong) xaddr, xlen); 2528 } else { /* (maplock->flag & mlckFREEPXDLIST) */ 2529 2530 pxdlistlock = (struct xdlistlock *) maplock; 2531 pxd = pxdlistlock->xdlist; 2532 for (n = 0; n < pxdlistlock->count; n++, pxd++) { 2533 xaddr = addressPXD(pxd); 2534 xlen = lengthPXD(pxd); 2535 dbFree(ip, xaddr, (s64) xlen); 2536 jfs_info("freeWMap: xaddr:0x%lx xlen:%d", 2537 (ulong) xaddr, xlen); 2538 } 2539 } 2540 } 2541 } 2542 2543 /* 2544 * txFreelock() 2545 * 2546 * function: remove tlock from inode anonymous locklist 2547 */ 2548 void txFreelock(struct inode *ip) 2549 { 2550 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 2551 struct tlock *xtlck, *tlck; 2552 lid_t xlid = 0, lid; 2553 2554 if (!jfs_ip->atlhead) 2555 return; 2556 2557 TXN_LOCK(); 2558 xtlck = (struct tlock *) &jfs_ip->atlhead; 2559 2560 while ((lid = xtlck->next) != 0) { 2561 tlck = lid_to_tlock(lid); 2562 if (tlck->flag & tlckFREELOCK) { 2563 xtlck->next = tlck->next; 2564 txLockFree(lid); 2565 } else { 2566 xtlck = tlck; 2567 xlid = lid; 2568 } 2569 } 2570 2571 if (jfs_ip->atlhead) 2572 jfs_ip->atltail = xlid; 2573 else { 2574 jfs_ip->atltail = 0; 2575 /* 2576 * If inode was on anon_list, remove it 2577 */ 2578 list_del_init(&jfs_ip->anon_inode_list); 2579 } 2580 TXN_UNLOCK(); 2581 } 2582 2583 /* 2584 * txAbort() 2585 * 2586 * function: abort tx before commit; 2587 * 2588 * frees line-locks and segment locks for all 2589 * segments in comdata structure. 2590 * Optionally sets state of file-system to FM_DIRTY in super-block. 2591 * log age of page-frames in memory for which caller has 2592 * are reset to 0 (to avoid logwarap). 2593 */ 2594 void txAbort(tid_t tid, int dirty) 2595 { 2596 lid_t lid, next; 2597 struct metapage *mp; 2598 struct tblock *tblk = tid_to_tblock(tid); 2599 struct tlock *tlck; 2600 2601 /* 2602 * free tlocks of the transaction 2603 */ 2604 for (lid = tblk->next; lid; lid = next) { 2605 tlck = lid_to_tlock(lid); 2606 next = tlck->next; 2607 mp = tlck->mp; 2608 JFS_IP(tlck->ip)->xtlid = 0; 2609 2610 if (mp) { 2611 mp->lid = 0; 2612 2613 /* 2614 * reset lsn of page to avoid logwarap: 2615 * 2616 * (page may have been previously committed by another 2617 * transaction(s) but has not been paged, i.e., 2618 * it may be on logsync list even though it has not 2619 * been logged for the current tx.) 2620 */ 2621 if (mp->xflag & COMMIT_PAGE && mp->lsn) 2622 LogSyncRelease(mp); 2623 } 2624 /* insert tlock at head of freelist */ 2625 TXN_LOCK(); 2626 txLockFree(lid); 2627 TXN_UNLOCK(); 2628 } 2629 2630 /* caller will free the transaction block */ 2631 2632 tblk->next = tblk->last = 0; 2633 2634 /* 2635 * mark filesystem dirty 2636 */ 2637 if (dirty) 2638 jfs_error(tblk->sb, "\n"); 2639 2640 return; 2641 } 2642 2643 /* 2644 * txLazyCommit(void) 2645 * 2646 * All transactions except those changing ipimap (COMMIT_FORCE) are 2647 * processed by this routine. This insures that the inode and block 2648 * allocation maps are updated in order. For synchronous transactions, 2649 * let the user thread finish processing after txUpdateMap() is called. 2650 */ 2651 static void txLazyCommit(struct tblock * tblk) 2652 { 2653 struct jfs_log *log; 2654 2655 while (((tblk->flag & tblkGC_READY) == 0) && 2656 ((tblk->flag & tblkGC_UNLOCKED) == 0)) { 2657 /* We must have gotten ahead of the user thread 2658 */ 2659 jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk); 2660 yield(); 2661 } 2662 2663 jfs_info("txLazyCommit: processing tblk 0x%p", tblk); 2664 2665 txUpdateMap(tblk); 2666 2667 log = (struct jfs_log *) JFS_SBI(tblk->sb)->log; 2668 2669 spin_lock_irq(&log->gclock); // LOGGC_LOCK 2670 2671 tblk->flag |= tblkGC_COMMITTED; 2672 2673 if (tblk->flag & tblkGC_READY) 2674 log->gcrtc--; 2675 2676 wake_up_all(&tblk->gcwait); // LOGGC_WAKEUP 2677 2678 /* 2679 * Can't release log->gclock until we've tested tblk->flag 2680 */ 2681 if (tblk->flag & tblkGC_LAZY) { 2682 spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK 2683 txUnlock(tblk); 2684 tblk->flag &= ~tblkGC_LAZY; 2685 txEnd(tblk - TxBlock); /* Convert back to tid */ 2686 } else 2687 spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK 2688 2689 jfs_info("txLazyCommit: done: tblk = 0x%p", tblk); 2690 } 2691 2692 /* 2693 * jfs_lazycommit(void) 2694 * 2695 * To be run as a kernel daemon. If lbmIODone is called in an interrupt 2696 * context, or where blocking is not wanted, this routine will process 2697 * committed transactions from the unlock queue. 2698 */ 2699 int jfs_lazycommit(void *arg) 2700 { 2701 int WorkDone; 2702 struct tblock *tblk; 2703 unsigned long flags; 2704 struct jfs_sb_info *sbi; 2705 2706 set_freezable(); 2707 do { 2708 LAZY_LOCK(flags); 2709 jfs_commit_thread_waking = 0; /* OK to wake another thread */ 2710 while (!list_empty(&TxAnchor.unlock_queue)) { 2711 WorkDone = 0; 2712 list_for_each_entry(tblk, &TxAnchor.unlock_queue, 2713 cqueue) { 2714 2715 sbi = JFS_SBI(tblk->sb); 2716 /* 2717 * For each volume, the transactions must be 2718 * handled in order. If another commit thread 2719 * is handling a tblk for this superblock, 2720 * skip it 2721 */ 2722 if (sbi->commit_state & IN_LAZYCOMMIT) 2723 continue; 2724 2725 sbi->commit_state |= IN_LAZYCOMMIT; 2726 WorkDone = 1; 2727 2728 /* 2729 * Remove transaction from queue 2730 */ 2731 list_del(&tblk->cqueue); 2732 2733 LAZY_UNLOCK(flags); 2734 txLazyCommit(tblk); 2735 LAZY_LOCK(flags); 2736 2737 sbi->commit_state &= ~IN_LAZYCOMMIT; 2738 /* 2739 * Don't continue in the for loop. (We can't 2740 * anyway, it's unsafe!) We want to go back to 2741 * the beginning of the list. 2742 */ 2743 break; 2744 } 2745 2746 /* If there was nothing to do, don't continue */ 2747 if (!WorkDone) 2748 break; 2749 } 2750 /* In case a wakeup came while all threads were active */ 2751 jfs_commit_thread_waking = 0; 2752 2753 if (freezing(current)) { 2754 LAZY_UNLOCK(flags); 2755 try_to_freeze(); 2756 } else { 2757 DECLARE_WAITQUEUE(wq, current); 2758 2759 add_wait_queue(&jfs_commit_thread_wait, &wq); 2760 set_current_state(TASK_INTERRUPTIBLE); 2761 LAZY_UNLOCK(flags); 2762 schedule(); 2763 remove_wait_queue(&jfs_commit_thread_wait, &wq); 2764 } 2765 } while (!kthread_should_stop()); 2766 2767 if (!list_empty(&TxAnchor.unlock_queue)) 2768 jfs_err("jfs_lazycommit being killed w/pending transactions!"); 2769 else 2770 jfs_info("jfs_lazycommit being killed"); 2771 return 0; 2772 } 2773 2774 void txLazyUnlock(struct tblock * tblk) 2775 { 2776 unsigned long flags; 2777 2778 LAZY_LOCK(flags); 2779 2780 list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue); 2781 /* 2782 * Don't wake up a commit thread if there is already one servicing 2783 * this superblock, or if the last one we woke up hasn't started yet. 2784 */ 2785 if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) && 2786 !jfs_commit_thread_waking) { 2787 jfs_commit_thread_waking = 1; 2788 wake_up(&jfs_commit_thread_wait); 2789 } 2790 LAZY_UNLOCK(flags); 2791 } 2792 2793 static void LogSyncRelease(struct metapage * mp) 2794 { 2795 struct jfs_log *log = mp->log; 2796 2797 assert(mp->nohomeok); 2798 assert(log); 2799 metapage_homeok(mp); 2800 } 2801 2802 /* 2803 * txQuiesce 2804 * 2805 * Block all new transactions and push anonymous transactions to 2806 * completion 2807 * 2808 * This does almost the same thing as jfs_sync below. We don't 2809 * worry about deadlocking when jfs_tlocks_low is set, since we would 2810 * expect jfs_sync to get us out of that jam. 2811 */ 2812 void txQuiesce(struct super_block *sb) 2813 { 2814 struct inode *ip; 2815 struct jfs_inode_info *jfs_ip; 2816 struct jfs_log *log = JFS_SBI(sb)->log; 2817 tid_t tid; 2818 2819 set_bit(log_QUIESCE, &log->flag); 2820 2821 TXN_LOCK(); 2822 restart: 2823 while (!list_empty(&TxAnchor.anon_list)) { 2824 jfs_ip = list_entry(TxAnchor.anon_list.next, 2825 struct jfs_inode_info, 2826 anon_inode_list); 2827 ip = &jfs_ip->vfs_inode; 2828 2829 /* 2830 * inode will be removed from anonymous list 2831 * when it is committed 2832 */ 2833 TXN_UNLOCK(); 2834 tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE); 2835 mutex_lock(&jfs_ip->commit_mutex); 2836 txCommit(tid, 1, &ip, 0); 2837 txEnd(tid); 2838 mutex_unlock(&jfs_ip->commit_mutex); 2839 /* 2840 * Just to be safe. I don't know how 2841 * long we can run without blocking 2842 */ 2843 cond_resched(); 2844 TXN_LOCK(); 2845 } 2846 2847 /* 2848 * If jfs_sync is running in parallel, there could be some inodes 2849 * on anon_list2. Let's check. 2850 */ 2851 if (!list_empty(&TxAnchor.anon_list2)) { 2852 list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); 2853 goto restart; 2854 } 2855 TXN_UNLOCK(); 2856 2857 /* 2858 * We may need to kick off the group commit 2859 */ 2860 jfs_flush_journal(log, 0); 2861 } 2862 2863 /* 2864 * txResume() 2865 * 2866 * Allows transactions to start again following txQuiesce 2867 */ 2868 void txResume(struct super_block *sb) 2869 { 2870 struct jfs_log *log = JFS_SBI(sb)->log; 2871 2872 clear_bit(log_QUIESCE, &log->flag); 2873 TXN_WAKEUP(&log->syncwait); 2874 } 2875 2876 /* 2877 * jfs_sync(void) 2878 * 2879 * To be run as a kernel daemon. This is awakened when tlocks run low. 2880 * We write any inodes that have anonymous tlocks so they will become 2881 * available. 2882 */ 2883 int jfs_sync(void *arg) 2884 { 2885 struct inode *ip; 2886 struct jfs_inode_info *jfs_ip; 2887 tid_t tid; 2888 2889 set_freezable(); 2890 do { 2891 /* 2892 * write each inode on the anonymous inode list 2893 */ 2894 TXN_LOCK(); 2895 while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) { 2896 jfs_ip = list_entry(TxAnchor.anon_list.next, 2897 struct jfs_inode_info, 2898 anon_inode_list); 2899 ip = &jfs_ip->vfs_inode; 2900 2901 if (! igrab(ip)) { 2902 /* 2903 * Inode is being freed 2904 */ 2905 list_del_init(&jfs_ip->anon_inode_list); 2906 } else if (mutex_trylock(&jfs_ip->commit_mutex)) { 2907 /* 2908 * inode will be removed from anonymous list 2909 * when it is committed 2910 */ 2911 TXN_UNLOCK(); 2912 tid = txBegin(ip->i_sb, COMMIT_INODE); 2913 txCommit(tid, 1, &ip, 0); 2914 txEnd(tid); 2915 mutex_unlock(&jfs_ip->commit_mutex); 2916 2917 iput(ip); 2918 /* 2919 * Just to be safe. I don't know how 2920 * long we can run without blocking 2921 */ 2922 cond_resched(); 2923 TXN_LOCK(); 2924 } else { 2925 /* We can't get the commit mutex. It may 2926 * be held by a thread waiting for tlock's 2927 * so let's not block here. Save it to 2928 * put back on the anon_list. 2929 */ 2930 2931 /* Move from anon_list to anon_list2 */ 2932 list_move(&jfs_ip->anon_inode_list, 2933 &TxAnchor.anon_list2); 2934 2935 TXN_UNLOCK(); 2936 iput(ip); 2937 TXN_LOCK(); 2938 } 2939 } 2940 /* Add anon_list2 back to anon_list */ 2941 list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); 2942 2943 if (freezing(current)) { 2944 TXN_UNLOCK(); 2945 try_to_freeze(); 2946 } else { 2947 set_current_state(TASK_INTERRUPTIBLE); 2948 TXN_UNLOCK(); 2949 schedule(); 2950 } 2951 } while (!kthread_should_stop()); 2952 2953 jfs_info("jfs_sync being killed"); 2954 return 0; 2955 } 2956 2957 #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) 2958 int jfs_txanchor_proc_show(struct seq_file *m, void *v) 2959 { 2960 char *freewait; 2961 char *freelockwait; 2962 char *lowlockwait; 2963 2964 freewait = 2965 waitqueue_active(&TxAnchor.freewait) ? "active" : "empty"; 2966 freelockwait = 2967 waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty"; 2968 lowlockwait = 2969 waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty"; 2970 2971 seq_printf(m, 2972 "JFS TxAnchor\n" 2973 "============\n" 2974 "freetid = %d\n" 2975 "freewait = %s\n" 2976 "freelock = %d\n" 2977 "freelockwait = %s\n" 2978 "lowlockwait = %s\n" 2979 "tlocksInUse = %d\n" 2980 "jfs_tlocks_low = %d\n" 2981 "unlock_queue is %sempty\n", 2982 TxAnchor.freetid, 2983 freewait, 2984 TxAnchor.freelock, 2985 freelockwait, 2986 lowlockwait, 2987 TxAnchor.tlocksInUse, 2988 jfs_tlocks_low, 2989 list_empty(&TxAnchor.unlock_queue) ? "" : "not "); 2990 return 0; 2991 } 2992 #endif 2993 2994 #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) 2995 int jfs_txstats_proc_show(struct seq_file *m, void *v) 2996 { 2997 seq_printf(m, 2998 "JFS TxStats\n" 2999 "===========\n" 3000 "calls to txBegin = %d\n" 3001 "txBegin blocked by sync barrier = %d\n" 3002 "txBegin blocked by tlocks low = %d\n" 3003 "txBegin blocked by no free tid = %d\n" 3004 "calls to txBeginAnon = %d\n" 3005 "txBeginAnon blocked by sync barrier = %d\n" 3006 "txBeginAnon blocked by tlocks low = %d\n" 3007 "calls to txLockAlloc = %d\n" 3008 "tLockAlloc blocked by no free lock = %d\n", 3009 TxStat.txBegin, 3010 TxStat.txBegin_barrier, 3011 TxStat.txBegin_lockslow, 3012 TxStat.txBegin_freetid, 3013 TxStat.txBeginAnon, 3014 TxStat.txBeginAnon_barrier, 3015 TxStat.txBeginAnon_lockslow, 3016 TxStat.txLockAlloc, 3017 TxStat.txLockAlloc_freelock); 3018 return 0; 3019 } 3020 #endif 3021