1 /* 2 * Copyright (C) International Business Machines Corp., 2000-2004 3 * Portions Copyright (C) Christoph Hellwig, 2001-2002 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 13 * the GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 */ 19 20 /* 21 * jfs_logmgr.c: log manager 22 * 23 * for related information, see transaction manager (jfs_txnmgr.c), and 24 * recovery manager (jfs_logredo.c). 25 * 26 * note: for detail, RTFS. 27 * 28 * log buffer manager: 29 * special purpose buffer manager supporting log i/o requirements. 30 * per log serial pageout of logpage 31 * queuing i/o requests and redrive i/o at iodone 32 * maintain current logpage buffer 33 * no caching since append only 34 * appropriate jfs buffer cache buffers as needed 35 * 36 * group commit: 37 * transactions which wrote COMMIT records in the same in-memory 38 * log page during the pageout of previous/current log page(s) are 39 * committed together by the pageout of the page. 40 * 41 * TBD lazy commit: 42 * transactions are committed asynchronously when the log page 43 * containing it COMMIT is paged out when it becomes full; 44 * 45 * serialization: 46 * . a per log lock serialize log write. 47 * . a per log lock serialize group commit. 48 * . a per log lock serialize log open/close; 49 * 50 * TBD log integrity: 51 * careful-write (ping-pong) of last logpage to recover from crash 52 * in overwrite. 53 * detection of split (out-of-order) write of physical sectors 54 * of last logpage via timestamp at end of each sector 55 * with its mirror data array at trailer). 56 * 57 * alternatives: 58 * lsn - 64-bit monotonically increasing integer vs 59 * 32-bit lspn and page eor. 60 */ 61 62 #include <linux/fs.h> 63 #include <linux/blkdev.h> 64 #include <linux/interrupt.h> 65 #include <linux/smp_lock.h> 66 #include <linux/completion.h> 67 #include <linux/buffer_head.h> /* for sync_blockdev() */ 68 #include <linux/bio.h> 69 #include <linux/suspend.h> 70 #include <linux/delay.h> 71 #include "jfs_incore.h" 72 #include "jfs_filsys.h" 73 #include "jfs_metapage.h" 74 #include "jfs_txnmgr.h" 75 #include "jfs_debug.h" 76 77 78 /* 79 * lbuf's ready to be redriven. Protected by log_redrive_lock (jfsIO thread) 80 */ 81 static struct lbuf *log_redrive_list; 82 static DEFINE_SPINLOCK(log_redrive_lock); 83 DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait); 84 85 86 /* 87 * log read/write serialization (per log) 88 */ 89 #define LOG_LOCK_INIT(log) init_MUTEX(&(log)->loglock) 90 #define LOG_LOCK(log) down(&((log)->loglock)) 91 #define LOG_UNLOCK(log) up(&((log)->loglock)) 92 93 94 /* 95 * log group commit serialization (per log) 96 */ 97 98 #define LOGGC_LOCK_INIT(log) spin_lock_init(&(log)->gclock) 99 #define LOGGC_LOCK(log) spin_lock_irq(&(log)->gclock) 100 #define LOGGC_UNLOCK(log) spin_unlock_irq(&(log)->gclock) 101 #define LOGGC_WAKEUP(tblk) wake_up_all(&(tblk)->gcwait) 102 103 /* 104 * log sync serialization (per log) 105 */ 106 #define LOGSYNC_DELTA(logsize) min((logsize)/8, 128*LOGPSIZE) 107 #define LOGSYNC_BARRIER(logsize) ((logsize)/4) 108 /* 109 #define LOGSYNC_DELTA(logsize) min((logsize)/4, 256*LOGPSIZE) 110 #define LOGSYNC_BARRIER(logsize) ((logsize)/2) 111 */ 112 113 114 /* 115 * log buffer cache synchronization 116 */ 117 static DEFINE_SPINLOCK(jfsLCacheLock); 118 119 #define LCACHE_LOCK(flags) spin_lock_irqsave(&jfsLCacheLock, flags) 120 #define LCACHE_UNLOCK(flags) spin_unlock_irqrestore(&jfsLCacheLock, flags) 121 122 /* 123 * See __SLEEP_COND in jfs_locks.h 124 */ 125 #define LCACHE_SLEEP_COND(wq, cond, flags) \ 126 do { \ 127 if (cond) \ 128 break; \ 129 __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \ 130 } while (0) 131 132 #define LCACHE_WAKEUP(event) wake_up(event) 133 134 135 /* 136 * lbuf buffer cache (lCache) control 137 */ 138 /* log buffer manager pageout control (cumulative, inclusive) */ 139 #define lbmREAD 0x0001 140 #define lbmWRITE 0x0002 /* enqueue at tail of write queue; 141 * init pageout if at head of queue; 142 */ 143 #define lbmRELEASE 0x0004 /* remove from write queue 144 * at completion of pageout; 145 * do not free/recycle it yet: 146 * caller will free it; 147 */ 148 #define lbmSYNC 0x0008 /* do not return to freelist 149 * when removed from write queue; 150 */ 151 #define lbmFREE 0x0010 /* return to freelist 152 * at completion of pageout; 153 * the buffer may be recycled; 154 */ 155 #define lbmDONE 0x0020 156 #define lbmERROR 0x0040 157 #define lbmGC 0x0080 /* lbmIODone to perform post-GC processing 158 * of log page 159 */ 160 #define lbmDIRECT 0x0100 161 162 /* 163 * Global list of active external journals 164 */ 165 static LIST_HEAD(jfs_external_logs); 166 static struct jfs_log *dummy_log = NULL; 167 static DECLARE_MUTEX(jfs_log_sem); 168 169 /* 170 * external references 171 */ 172 extern void txLazyUnlock(struct tblock * tblk); 173 extern int jfs_stop_threads; 174 extern struct completion jfsIOwait; 175 extern int jfs_tlocks_low; 176 177 /* 178 * forward references 179 */ 180 static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk, 181 struct lrd * lrd, struct tlock * tlck); 182 183 static int lmNextPage(struct jfs_log * log); 184 static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi, 185 int activate); 186 187 static int open_inline_log(struct super_block *sb); 188 static int open_dummy_log(struct super_block *sb); 189 static int lbmLogInit(struct jfs_log * log); 190 static void lbmLogShutdown(struct jfs_log * log); 191 static struct lbuf *lbmAllocate(struct jfs_log * log, int); 192 static void lbmFree(struct lbuf * bp); 193 static void lbmfree(struct lbuf * bp); 194 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp); 195 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block); 196 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag); 197 static int lbmIOWait(struct lbuf * bp, int flag); 198 static bio_end_io_t lbmIODone; 199 static void lbmStartIO(struct lbuf * bp); 200 static void lmGCwrite(struct jfs_log * log, int cant_block); 201 static int lmLogSync(struct jfs_log * log, int nosyncwait); 202 203 204 205 /* 206 * statistics 207 */ 208 #ifdef CONFIG_JFS_STATISTICS 209 static struct lmStat { 210 uint commit; /* # of commit */ 211 uint pagedone; /* # of page written */ 212 uint submitted; /* # of pages submitted */ 213 uint full_page; /* # of full pages submitted */ 214 uint partial_page; /* # of partial pages submitted */ 215 } lmStat; 216 #endif 217 218 219 /* 220 * NAME: lmLog() 221 * 222 * FUNCTION: write a log record; 223 * 224 * PARAMETER: 225 * 226 * RETURN: lsn - offset to the next log record to write (end-of-log); 227 * -1 - error; 228 * 229 * note: todo: log error handler 230 */ 231 int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 232 struct tlock * tlck) 233 { 234 int lsn; 235 int diffp, difft; 236 struct metapage *mp = NULL; 237 unsigned long flags; 238 239 jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p", 240 log, tblk, lrd, tlck); 241 242 LOG_LOCK(log); 243 244 /* log by (out-of-transaction) JFS ? */ 245 if (tblk == NULL) 246 goto writeRecord; 247 248 /* log from page ? */ 249 if (tlck == NULL || 250 tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL) 251 goto writeRecord; 252 253 /* 254 * initialize/update page/transaction recovery lsn 255 */ 256 lsn = log->lsn; 257 258 LOGSYNC_LOCK(log, flags); 259 260 /* 261 * initialize page lsn if first log write of the page 262 */ 263 if (mp->lsn == 0) { 264 mp->log = log; 265 mp->lsn = lsn; 266 log->count++; 267 268 /* insert page at tail of logsynclist */ 269 list_add_tail(&mp->synclist, &log->synclist); 270 } 271 272 /* 273 * initialize/update lsn of tblock of the page 274 * 275 * transaction inherits oldest lsn of pages associated 276 * with allocation/deallocation of resources (their 277 * log records are used to reconstruct allocation map 278 * at recovery time: inode for inode allocation map, 279 * B+-tree index of extent descriptors for block 280 * allocation map); 281 * allocation map pages inherit transaction lsn at 282 * commit time to allow forwarding log syncpt past log 283 * records associated with allocation/deallocation of 284 * resources only after persistent map of these map pages 285 * have been updated and propagated to home. 286 */ 287 /* 288 * initialize transaction lsn: 289 */ 290 if (tblk->lsn == 0) { 291 /* inherit lsn of its first page logged */ 292 tblk->lsn = mp->lsn; 293 log->count++; 294 295 /* insert tblock after the page on logsynclist */ 296 list_add(&tblk->synclist, &mp->synclist); 297 } 298 /* 299 * update transaction lsn: 300 */ 301 else { 302 /* inherit oldest/smallest lsn of page */ 303 logdiff(diffp, mp->lsn, log); 304 logdiff(difft, tblk->lsn, log); 305 if (diffp < difft) { 306 /* update tblock lsn with page lsn */ 307 tblk->lsn = mp->lsn; 308 309 /* move tblock after page on logsynclist */ 310 list_move(&tblk->synclist, &mp->synclist); 311 } 312 } 313 314 LOGSYNC_UNLOCK(log, flags); 315 316 /* 317 * write the log record 318 */ 319 writeRecord: 320 lsn = lmWriteRecord(log, tblk, lrd, tlck); 321 322 /* 323 * forward log syncpt if log reached next syncpt trigger 324 */ 325 logdiff(diffp, lsn, log); 326 if (diffp >= log->nextsync) 327 lsn = lmLogSync(log, 0); 328 329 /* update end-of-log lsn */ 330 log->lsn = lsn; 331 332 LOG_UNLOCK(log); 333 334 /* return end-of-log address */ 335 return lsn; 336 } 337 338 /* 339 * NAME: lmWriteRecord() 340 * 341 * FUNCTION: move the log record to current log page 342 * 343 * PARAMETER: cd - commit descriptor 344 * 345 * RETURN: end-of-log address 346 * 347 * serialization: LOG_LOCK() held on entry/exit 348 */ 349 static int 350 lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 351 struct tlock * tlck) 352 { 353 int lsn = 0; /* end-of-log address */ 354 struct lbuf *bp; /* dst log page buffer */ 355 struct logpage *lp; /* dst log page */ 356 caddr_t dst; /* destination address in log page */ 357 int dstoffset; /* end-of-log offset in log page */ 358 int freespace; /* free space in log page */ 359 caddr_t p; /* src meta-data page */ 360 caddr_t src; 361 int srclen; 362 int nbytes; /* number of bytes to move */ 363 int i; 364 int len; 365 struct linelock *linelock; 366 struct lv *lv; 367 struct lvd *lvd; 368 int l2linesize; 369 370 len = 0; 371 372 /* retrieve destination log page to write */ 373 bp = (struct lbuf *) log->bp; 374 lp = (struct logpage *) bp->l_ldata; 375 dstoffset = log->eor; 376 377 /* any log data to write ? */ 378 if (tlck == NULL) 379 goto moveLrd; 380 381 /* 382 * move log record data 383 */ 384 /* retrieve source meta-data page to log */ 385 if (tlck->flag & tlckPAGELOCK) { 386 p = (caddr_t) (tlck->mp->data); 387 linelock = (struct linelock *) & tlck->lock; 388 } 389 /* retrieve source in-memory inode to log */ 390 else if (tlck->flag & tlckINODELOCK) { 391 if (tlck->type & tlckDTREE) 392 p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot; 393 else 394 p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot; 395 linelock = (struct linelock *) & tlck->lock; 396 } 397 #ifdef _JFS_WIP 398 else if (tlck->flag & tlckINLINELOCK) { 399 400 inlinelock = (struct inlinelock *) & tlck; 401 p = (caddr_t) & inlinelock->pxd; 402 linelock = (struct linelock *) & tlck; 403 } 404 #endif /* _JFS_WIP */ 405 else { 406 jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck); 407 return 0; /* Probably should trap */ 408 } 409 l2linesize = linelock->l2linesize; 410 411 moveData: 412 ASSERT(linelock->index <= linelock->maxcnt); 413 414 lv = linelock->lv; 415 for (i = 0; i < linelock->index; i++, lv++) { 416 if (lv->length == 0) 417 continue; 418 419 /* is page full ? */ 420 if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) { 421 /* page become full: move on to next page */ 422 lmNextPage(log); 423 424 bp = log->bp; 425 lp = (struct logpage *) bp->l_ldata; 426 dstoffset = LOGPHDRSIZE; 427 } 428 429 /* 430 * move log vector data 431 */ 432 src = (u8 *) p + (lv->offset << l2linesize); 433 srclen = lv->length << l2linesize; 434 len += srclen; 435 while (srclen > 0) { 436 freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset; 437 nbytes = min(freespace, srclen); 438 dst = (caddr_t) lp + dstoffset; 439 memcpy(dst, src, nbytes); 440 dstoffset += nbytes; 441 442 /* is page not full ? */ 443 if (dstoffset < LOGPSIZE - LOGPTLRSIZE) 444 break; 445 446 /* page become full: move on to next page */ 447 lmNextPage(log); 448 449 bp = (struct lbuf *) log->bp; 450 lp = (struct logpage *) bp->l_ldata; 451 dstoffset = LOGPHDRSIZE; 452 453 srclen -= nbytes; 454 src += nbytes; 455 } 456 457 /* 458 * move log vector descriptor 459 */ 460 len += 4; 461 lvd = (struct lvd *) ((caddr_t) lp + dstoffset); 462 lvd->offset = cpu_to_le16(lv->offset); 463 lvd->length = cpu_to_le16(lv->length); 464 dstoffset += 4; 465 jfs_info("lmWriteRecord: lv offset:%d length:%d", 466 lv->offset, lv->length); 467 } 468 469 if ((i = linelock->next)) { 470 linelock = (struct linelock *) lid_to_tlock(i); 471 goto moveData; 472 } 473 474 /* 475 * move log record descriptor 476 */ 477 moveLrd: 478 lrd->length = cpu_to_le16(len); 479 480 src = (caddr_t) lrd; 481 srclen = LOGRDSIZE; 482 483 while (srclen > 0) { 484 freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset; 485 nbytes = min(freespace, srclen); 486 dst = (caddr_t) lp + dstoffset; 487 memcpy(dst, src, nbytes); 488 489 dstoffset += nbytes; 490 srclen -= nbytes; 491 492 /* are there more to move than freespace of page ? */ 493 if (srclen) 494 goto pageFull; 495 496 /* 497 * end of log record descriptor 498 */ 499 500 /* update last log record eor */ 501 log->eor = dstoffset; 502 bp->l_eor = dstoffset; 503 lsn = (log->page << L2LOGPSIZE) + dstoffset; 504 505 if (lrd->type & cpu_to_le16(LOG_COMMIT)) { 506 tblk->clsn = lsn; 507 jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn, 508 bp->l_eor); 509 510 INCREMENT(lmStat.commit); /* # of commit */ 511 512 /* 513 * enqueue tblock for group commit: 514 * 515 * enqueue tblock of non-trivial/synchronous COMMIT 516 * at tail of group commit queue 517 * (trivial/asynchronous COMMITs are ignored by 518 * group commit.) 519 */ 520 LOGGC_LOCK(log); 521 522 /* init tblock gc state */ 523 tblk->flag = tblkGC_QUEUE; 524 tblk->bp = log->bp; 525 tblk->pn = log->page; 526 tblk->eor = log->eor; 527 528 /* enqueue transaction to commit queue */ 529 list_add_tail(&tblk->cqueue, &log->cqueue); 530 531 LOGGC_UNLOCK(log); 532 } 533 534 jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x", 535 le16_to_cpu(lrd->type), log->bp, log->page, dstoffset); 536 537 /* page not full ? */ 538 if (dstoffset < LOGPSIZE - LOGPTLRSIZE) 539 return lsn; 540 541 pageFull: 542 /* page become full: move on to next page */ 543 lmNextPage(log); 544 545 bp = (struct lbuf *) log->bp; 546 lp = (struct logpage *) bp->l_ldata; 547 dstoffset = LOGPHDRSIZE; 548 src += nbytes; 549 } 550 551 return lsn; 552 } 553 554 555 /* 556 * NAME: lmNextPage() 557 * 558 * FUNCTION: write current page and allocate next page. 559 * 560 * PARAMETER: log 561 * 562 * RETURN: 0 563 * 564 * serialization: LOG_LOCK() held on entry/exit 565 */ 566 static int lmNextPage(struct jfs_log * log) 567 { 568 struct logpage *lp; 569 int lspn; /* log sequence page number */ 570 int pn; /* current page number */ 571 struct lbuf *bp; 572 struct lbuf *nextbp; 573 struct tblock *tblk; 574 575 /* get current log page number and log sequence page number */ 576 pn = log->page; 577 bp = log->bp; 578 lp = (struct logpage *) bp->l_ldata; 579 lspn = le32_to_cpu(lp->h.page); 580 581 LOGGC_LOCK(log); 582 583 /* 584 * write or queue the full page at the tail of write queue 585 */ 586 /* get the tail tblk on commit queue */ 587 if (list_empty(&log->cqueue)) 588 tblk = NULL; 589 else 590 tblk = list_entry(log->cqueue.prev, struct tblock, cqueue); 591 592 /* every tblk who has COMMIT record on the current page, 593 * and has not been committed, must be on commit queue 594 * since tblk is queued at commit queueu at the time 595 * of writing its COMMIT record on the page before 596 * page becomes full (even though the tblk thread 597 * who wrote COMMIT record may have been suspended 598 * currently); 599 */ 600 601 /* is page bound with outstanding tail tblk ? */ 602 if (tblk && tblk->pn == pn) { 603 /* mark tblk for end-of-page */ 604 tblk->flag |= tblkGC_EOP; 605 606 if (log->cflag & logGC_PAGEOUT) { 607 /* if page is not already on write queue, 608 * just enqueue (no lbmWRITE to prevent redrive) 609 * buffer to wqueue to ensure correct serial order 610 * of the pages since log pages will be added 611 * continuously 612 */ 613 if (bp->l_wqnext == NULL) 614 lbmWrite(log, bp, 0, 0); 615 } else { 616 /* 617 * No current GC leader, initiate group commit 618 */ 619 log->cflag |= logGC_PAGEOUT; 620 lmGCwrite(log, 0); 621 } 622 } 623 /* page is not bound with outstanding tblk: 624 * init write or mark it to be redriven (lbmWRITE) 625 */ 626 else { 627 /* finalize the page */ 628 bp->l_ceor = bp->l_eor; 629 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); 630 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0); 631 } 632 LOGGC_UNLOCK(log); 633 634 /* 635 * allocate/initialize next page 636 */ 637 /* if log wraps, the first data page of log is 2 638 * (0 never used, 1 is superblock). 639 */ 640 log->page = (pn == log->size - 1) ? 2 : pn + 1; 641 log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */ 642 643 /* allocate/initialize next log page buffer */ 644 nextbp = lbmAllocate(log, log->page); 645 nextbp->l_eor = log->eor; 646 log->bp = nextbp; 647 648 /* initialize next log page */ 649 lp = (struct logpage *) nextbp->l_ldata; 650 lp->h.page = lp->t.page = cpu_to_le32(lspn + 1); 651 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE); 652 653 return 0; 654 } 655 656 657 /* 658 * NAME: lmGroupCommit() 659 * 660 * FUNCTION: group commit 661 * initiate pageout of the pages with COMMIT in the order of 662 * page number - redrive pageout of the page at the head of 663 * pageout queue until full page has been written. 664 * 665 * RETURN: 666 * 667 * NOTE: 668 * LOGGC_LOCK serializes log group commit queue, and 669 * transaction blocks on the commit queue. 670 * N.B. LOG_LOCK is NOT held during lmGroupCommit(). 671 */ 672 int lmGroupCommit(struct jfs_log * log, struct tblock * tblk) 673 { 674 int rc = 0; 675 676 LOGGC_LOCK(log); 677 678 /* group committed already ? */ 679 if (tblk->flag & tblkGC_COMMITTED) { 680 if (tblk->flag & tblkGC_ERROR) 681 rc = -EIO; 682 683 LOGGC_UNLOCK(log); 684 return rc; 685 } 686 jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc); 687 688 if (tblk->xflag & COMMIT_LAZY) 689 tblk->flag |= tblkGC_LAZY; 690 691 if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) && 692 (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag) 693 || jfs_tlocks_low)) { 694 /* 695 * No pageout in progress 696 * 697 * start group commit as its group leader. 698 */ 699 log->cflag |= logGC_PAGEOUT; 700 701 lmGCwrite(log, 0); 702 } 703 704 if (tblk->xflag & COMMIT_LAZY) { 705 /* 706 * Lazy transactions can leave now 707 */ 708 LOGGC_UNLOCK(log); 709 return 0; 710 } 711 712 /* lmGCwrite gives up LOGGC_LOCK, check again */ 713 714 if (tblk->flag & tblkGC_COMMITTED) { 715 if (tblk->flag & tblkGC_ERROR) 716 rc = -EIO; 717 718 LOGGC_UNLOCK(log); 719 return rc; 720 } 721 722 /* upcount transaction waiting for completion 723 */ 724 log->gcrtc++; 725 tblk->flag |= tblkGC_READY; 726 727 __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED), 728 LOGGC_LOCK(log), LOGGC_UNLOCK(log)); 729 730 /* removed from commit queue */ 731 if (tblk->flag & tblkGC_ERROR) 732 rc = -EIO; 733 734 LOGGC_UNLOCK(log); 735 return rc; 736 } 737 738 /* 739 * NAME: lmGCwrite() 740 * 741 * FUNCTION: group commit write 742 * initiate write of log page, building a group of all transactions 743 * with commit records on that page. 744 * 745 * RETURN: None 746 * 747 * NOTE: 748 * LOGGC_LOCK must be held by caller. 749 * N.B. LOG_LOCK is NOT held during lmGroupCommit(). 750 */ 751 static void lmGCwrite(struct jfs_log * log, int cant_write) 752 { 753 struct lbuf *bp; 754 struct logpage *lp; 755 int gcpn; /* group commit page number */ 756 struct tblock *tblk; 757 struct tblock *xtblk = NULL; 758 759 /* 760 * build the commit group of a log page 761 * 762 * scan commit queue and make a commit group of all 763 * transactions with COMMIT records on the same log page. 764 */ 765 /* get the head tblk on the commit queue */ 766 gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn; 767 768 list_for_each_entry(tblk, &log->cqueue, cqueue) { 769 if (tblk->pn != gcpn) 770 break; 771 772 xtblk = tblk; 773 774 /* state transition: (QUEUE, READY) -> COMMIT */ 775 tblk->flag |= tblkGC_COMMIT; 776 } 777 tblk = xtblk; /* last tblk of the page */ 778 779 /* 780 * pageout to commit transactions on the log page. 781 */ 782 bp = (struct lbuf *) tblk->bp; 783 lp = (struct logpage *) bp->l_ldata; 784 /* is page already full ? */ 785 if (tblk->flag & tblkGC_EOP) { 786 /* mark page to free at end of group commit of the page */ 787 tblk->flag &= ~tblkGC_EOP; 788 tblk->flag |= tblkGC_FREE; 789 bp->l_ceor = bp->l_eor; 790 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); 791 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC, 792 cant_write); 793 INCREMENT(lmStat.full_page); 794 } 795 /* page is not yet full */ 796 else { 797 bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */ 798 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); 799 lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write); 800 INCREMENT(lmStat.partial_page); 801 } 802 } 803 804 /* 805 * NAME: lmPostGC() 806 * 807 * FUNCTION: group commit post-processing 808 * Processes transactions after their commit records have been written 809 * to disk, redriving log I/O if necessary. 810 * 811 * RETURN: None 812 * 813 * NOTE: 814 * This routine is called a interrupt time by lbmIODone 815 */ 816 static void lmPostGC(struct lbuf * bp) 817 { 818 unsigned long flags; 819 struct jfs_log *log = bp->l_log; 820 struct logpage *lp; 821 struct tblock *tblk, *temp; 822 823 //LOGGC_LOCK(log); 824 spin_lock_irqsave(&log->gclock, flags); 825 /* 826 * current pageout of group commit completed. 827 * 828 * remove/wakeup transactions from commit queue who were 829 * group committed with the current log page 830 */ 831 list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) { 832 if (!(tblk->flag & tblkGC_COMMIT)) 833 break; 834 /* if transaction was marked GC_COMMIT then 835 * it has been shipped in the current pageout 836 * and made it to disk - it is committed. 837 */ 838 839 if (bp->l_flag & lbmERROR) 840 tblk->flag |= tblkGC_ERROR; 841 842 /* remove it from the commit queue */ 843 list_del(&tblk->cqueue); 844 tblk->flag &= ~tblkGC_QUEUE; 845 846 if (tblk == log->flush_tblk) { 847 /* we can stop flushing the log now */ 848 clear_bit(log_FLUSH, &log->flag); 849 log->flush_tblk = NULL; 850 } 851 852 jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk, 853 tblk->flag); 854 855 if (!(tblk->xflag & COMMIT_FORCE)) 856 /* 857 * Hand tblk over to lazy commit thread 858 */ 859 txLazyUnlock(tblk); 860 else { 861 /* state transition: COMMIT -> COMMITTED */ 862 tblk->flag |= tblkGC_COMMITTED; 863 864 if (tblk->flag & tblkGC_READY) 865 log->gcrtc--; 866 867 LOGGC_WAKEUP(tblk); 868 } 869 870 /* was page full before pageout ? 871 * (and this is the last tblk bound with the page) 872 */ 873 if (tblk->flag & tblkGC_FREE) 874 lbmFree(bp); 875 /* did page become full after pageout ? 876 * (and this is the last tblk bound with the page) 877 */ 878 else if (tblk->flag & tblkGC_EOP) { 879 /* finalize the page */ 880 lp = (struct logpage *) bp->l_ldata; 881 bp->l_ceor = bp->l_eor; 882 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); 883 jfs_info("lmPostGC: calling lbmWrite"); 884 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 885 1); 886 } 887 888 } 889 890 /* are there any transactions who have entered lnGroupCommit() 891 * (whose COMMITs are after that of the last log page written. 892 * They are waiting for new group commit (above at (SLEEP 1)) 893 * or lazy transactions are on a full (queued) log page, 894 * select the latest ready transaction as new group leader and 895 * wake her up to lead her group. 896 */ 897 if ((!list_empty(&log->cqueue)) && 898 ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) || 899 test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low)) 900 /* 901 * Call lmGCwrite with new group leader 902 */ 903 lmGCwrite(log, 1); 904 905 /* no transaction are ready yet (transactions are only just 906 * queued (GC_QUEUE) and not entered for group commit yet). 907 * the first transaction entering group commit 908 * will elect herself as new group leader. 909 */ 910 else 911 log->cflag &= ~logGC_PAGEOUT; 912 913 //LOGGC_UNLOCK(log); 914 spin_unlock_irqrestore(&log->gclock, flags); 915 return; 916 } 917 918 /* 919 * NAME: lmLogSync() 920 * 921 * FUNCTION: write log SYNCPT record for specified log 922 * if new sync address is available 923 * (normally the case if sync() is executed by back-ground 924 * process). 925 * if not, explicitly run jfs_blogsync() to initiate 926 * getting of new sync address. 927 * calculate new value of i_nextsync which determines when 928 * this code is called again. 929 * 930 * PARAMETERS: log - log structure 931 * nosyncwait - 1 if called asynchronously 932 * 933 * RETURN: 0 934 * 935 * serialization: LOG_LOCK() held on entry/exit 936 */ 937 static int lmLogSync(struct jfs_log * log, int nosyncwait) 938 { 939 int logsize; 940 int written; /* written since last syncpt */ 941 int free; /* free space left available */ 942 int delta; /* additional delta to write normally */ 943 int more; /* additional write granted */ 944 struct lrd lrd; 945 int lsn; 946 struct logsyncblk *lp; 947 struct jfs_sb_info *sbi; 948 unsigned long flags; 949 950 /* push dirty metapages out to disk */ 951 list_for_each_entry(sbi, &log->sb_list, log_list) { 952 filemap_flush(sbi->ipbmap->i_mapping); 953 filemap_flush(sbi->ipimap->i_mapping); 954 filemap_flush(sbi->direct_inode->i_mapping); 955 } 956 957 /* 958 * forward syncpt 959 */ 960 /* if last sync is same as last syncpt, 961 * invoke sync point forward processing to update sync. 962 */ 963 964 if (log->sync == log->syncpt) { 965 LOGSYNC_LOCK(log, flags); 966 if (list_empty(&log->synclist)) 967 log->sync = log->lsn; 968 else { 969 lp = list_entry(log->synclist.next, 970 struct logsyncblk, synclist); 971 log->sync = lp->lsn; 972 } 973 LOGSYNC_UNLOCK(log, flags); 974 975 } 976 977 /* if sync is different from last syncpt, 978 * write a SYNCPT record with syncpt = sync. 979 * reset syncpt = sync 980 */ 981 if (log->sync != log->syncpt) { 982 lrd.logtid = 0; 983 lrd.backchain = 0; 984 lrd.type = cpu_to_le16(LOG_SYNCPT); 985 lrd.length = 0; 986 lrd.log.syncpt.sync = cpu_to_le32(log->sync); 987 lsn = lmWriteRecord(log, NULL, &lrd, NULL); 988 989 log->syncpt = log->sync; 990 } else 991 lsn = log->lsn; 992 993 /* 994 * setup next syncpt trigger (SWAG) 995 */ 996 logsize = log->logsize; 997 998 logdiff(written, lsn, log); 999 free = logsize - written; 1000 delta = LOGSYNC_DELTA(logsize); 1001 more = min(free / 2, delta); 1002 if (more < 2 * LOGPSIZE) { 1003 jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n"); 1004 /* 1005 * log wrapping 1006 * 1007 * option 1 - panic ? No.! 1008 * option 2 - shutdown file systems 1009 * associated with log ? 1010 * option 3 - extend log ? 1011 */ 1012 /* 1013 * option 4 - second chance 1014 * 1015 * mark log wrapped, and continue. 1016 * when all active transactions are completed, 1017 * mark log vaild for recovery. 1018 * if crashed during invalid state, log state 1019 * implies invald log, forcing fsck(). 1020 */ 1021 /* mark log state log wrap in log superblock */ 1022 /* log->state = LOGWRAP; */ 1023 1024 /* reset sync point computation */ 1025 log->syncpt = log->sync = lsn; 1026 log->nextsync = delta; 1027 } else 1028 /* next syncpt trigger = written + more */ 1029 log->nextsync = written + more; 1030 1031 /* return if lmLogSync() from outside of transaction, e.g., sync() */ 1032 if (nosyncwait) 1033 return lsn; 1034 1035 /* if number of bytes written from last sync point is more 1036 * than 1/4 of the log size, stop new transactions from 1037 * starting until all current transactions are completed 1038 * by setting syncbarrier flag. 1039 */ 1040 if (written > LOGSYNC_BARRIER(logsize) && logsize > 32 * LOGPSIZE) { 1041 set_bit(log_SYNCBARRIER, &log->flag); 1042 jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn, 1043 log->syncpt); 1044 /* 1045 * We may have to initiate group commit 1046 */ 1047 jfs_flush_journal(log, 0); 1048 } 1049 1050 return lsn; 1051 } 1052 1053 /* 1054 * NAME: jfs_syncpt 1055 * 1056 * FUNCTION: write log SYNCPT record for specified log 1057 * 1058 * PARAMETERS: log - log structure 1059 */ 1060 void jfs_syncpt(struct jfs_log *log) 1061 { LOG_LOCK(log); 1062 lmLogSync(log, 1); 1063 LOG_UNLOCK(log); 1064 } 1065 1066 /* 1067 * NAME: lmLogOpen() 1068 * 1069 * FUNCTION: open the log on first open; 1070 * insert filesystem in the active list of the log. 1071 * 1072 * PARAMETER: ipmnt - file system mount inode 1073 * iplog - log inode (out) 1074 * 1075 * RETURN: 1076 * 1077 * serialization: 1078 */ 1079 int lmLogOpen(struct super_block *sb) 1080 { 1081 int rc; 1082 struct block_device *bdev; 1083 struct jfs_log *log; 1084 struct jfs_sb_info *sbi = JFS_SBI(sb); 1085 1086 if (sbi->flag & JFS_NOINTEGRITY) 1087 return open_dummy_log(sb); 1088 1089 if (sbi->mntflag & JFS_INLINELOG) 1090 return open_inline_log(sb); 1091 1092 down(&jfs_log_sem); 1093 list_for_each_entry(log, &jfs_external_logs, journal_list) { 1094 if (log->bdev->bd_dev == sbi->logdev) { 1095 if (memcmp(log->uuid, sbi->loguuid, 1096 sizeof(log->uuid))) { 1097 jfs_warn("wrong uuid on JFS journal\n"); 1098 up(&jfs_log_sem); 1099 return -EINVAL; 1100 } 1101 /* 1102 * add file system to log active file system list 1103 */ 1104 if ((rc = lmLogFileSystem(log, sbi, 1))) { 1105 up(&jfs_log_sem); 1106 return rc; 1107 } 1108 goto journal_found; 1109 } 1110 } 1111 1112 if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) { 1113 up(&jfs_log_sem); 1114 return -ENOMEM; 1115 } 1116 memset(log, 0, sizeof(struct jfs_log)); 1117 INIT_LIST_HEAD(&log->sb_list); 1118 init_waitqueue_head(&log->syncwait); 1119 1120 /* 1121 * external log as separate logical volume 1122 * 1123 * file systems to log may have n-to-1 relationship; 1124 */ 1125 1126 bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE); 1127 if (IS_ERR(bdev)) { 1128 rc = -PTR_ERR(bdev); 1129 goto free; 1130 } 1131 1132 if ((rc = bd_claim(bdev, log))) { 1133 goto close; 1134 } 1135 1136 log->bdev = bdev; 1137 memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid)); 1138 1139 /* 1140 * initialize log: 1141 */ 1142 if ((rc = lmLogInit(log))) 1143 goto unclaim; 1144 1145 list_add(&log->journal_list, &jfs_external_logs); 1146 1147 /* 1148 * add file system to log active file system list 1149 */ 1150 if ((rc = lmLogFileSystem(log, sbi, 1))) 1151 goto shutdown; 1152 1153 journal_found: 1154 LOG_LOCK(log); 1155 list_add(&sbi->log_list, &log->sb_list); 1156 sbi->log = log; 1157 LOG_UNLOCK(log); 1158 1159 up(&jfs_log_sem); 1160 return 0; 1161 1162 /* 1163 * unwind on error 1164 */ 1165 shutdown: /* unwind lbmLogInit() */ 1166 list_del(&log->journal_list); 1167 lbmLogShutdown(log); 1168 1169 unclaim: 1170 bd_release(bdev); 1171 1172 close: /* close external log device */ 1173 blkdev_put(bdev); 1174 1175 free: /* free log descriptor */ 1176 up(&jfs_log_sem); 1177 kfree(log); 1178 1179 jfs_warn("lmLogOpen: exit(%d)", rc); 1180 return rc; 1181 } 1182 1183 static int open_inline_log(struct super_block *sb) 1184 { 1185 struct jfs_log *log; 1186 int rc; 1187 1188 if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) 1189 return -ENOMEM; 1190 memset(log, 0, sizeof(struct jfs_log)); 1191 INIT_LIST_HEAD(&log->sb_list); 1192 init_waitqueue_head(&log->syncwait); 1193 1194 set_bit(log_INLINELOG, &log->flag); 1195 log->bdev = sb->s_bdev; 1196 log->base = addressPXD(&JFS_SBI(sb)->logpxd); 1197 log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >> 1198 (L2LOGPSIZE - sb->s_blocksize_bits); 1199 log->l2bsize = sb->s_blocksize_bits; 1200 ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits); 1201 1202 /* 1203 * initialize log. 1204 */ 1205 if ((rc = lmLogInit(log))) { 1206 kfree(log); 1207 jfs_warn("lmLogOpen: exit(%d)", rc); 1208 return rc; 1209 } 1210 1211 list_add(&JFS_SBI(sb)->log_list, &log->sb_list); 1212 JFS_SBI(sb)->log = log; 1213 1214 return rc; 1215 } 1216 1217 static int open_dummy_log(struct super_block *sb) 1218 { 1219 int rc; 1220 1221 down(&jfs_log_sem); 1222 if (!dummy_log) { 1223 dummy_log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL); 1224 if (!dummy_log) { 1225 up(&jfs_log_sem); 1226 return -ENOMEM; 1227 } 1228 memset(dummy_log, 0, sizeof(struct jfs_log)); 1229 INIT_LIST_HEAD(&dummy_log->sb_list); 1230 init_waitqueue_head(&dummy_log->syncwait); 1231 dummy_log->no_integrity = 1; 1232 /* Make up some stuff */ 1233 dummy_log->base = 0; 1234 dummy_log->size = 1024; 1235 rc = lmLogInit(dummy_log); 1236 if (rc) { 1237 kfree(dummy_log); 1238 dummy_log = NULL; 1239 up(&jfs_log_sem); 1240 return rc; 1241 } 1242 } 1243 1244 LOG_LOCK(dummy_log); 1245 list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list); 1246 JFS_SBI(sb)->log = dummy_log; 1247 LOG_UNLOCK(dummy_log); 1248 up(&jfs_log_sem); 1249 1250 return 0; 1251 } 1252 1253 /* 1254 * NAME: lmLogInit() 1255 * 1256 * FUNCTION: log initialization at first log open. 1257 * 1258 * logredo() (or logformat()) should have been run previously. 1259 * initialize the log from log superblock. 1260 * set the log state in the superblock to LOGMOUNT and 1261 * write SYNCPT log record. 1262 * 1263 * PARAMETER: log - log structure 1264 * 1265 * RETURN: 0 - if ok 1266 * -EINVAL - bad log magic number or superblock dirty 1267 * error returned from logwait() 1268 * 1269 * serialization: single first open thread 1270 */ 1271 int lmLogInit(struct jfs_log * log) 1272 { 1273 int rc = 0; 1274 struct lrd lrd; 1275 struct logsuper *logsuper; 1276 struct lbuf *bpsuper; 1277 struct lbuf *bp; 1278 struct logpage *lp; 1279 int lsn = 0; 1280 1281 jfs_info("lmLogInit: log:0x%p", log); 1282 1283 /* initialize the group commit serialization lock */ 1284 LOGGC_LOCK_INIT(log); 1285 1286 /* allocate/initialize the log write serialization lock */ 1287 LOG_LOCK_INIT(log); 1288 1289 LOGSYNC_LOCK_INIT(log); 1290 1291 INIT_LIST_HEAD(&log->synclist); 1292 1293 INIT_LIST_HEAD(&log->cqueue); 1294 log->flush_tblk = NULL; 1295 1296 log->count = 0; 1297 1298 /* 1299 * initialize log i/o 1300 */ 1301 if ((rc = lbmLogInit(log))) 1302 return rc; 1303 1304 if (!test_bit(log_INLINELOG, &log->flag)) 1305 log->l2bsize = L2LOGPSIZE; 1306 1307 /* check for disabled journaling to disk */ 1308 if (log->no_integrity) { 1309 /* 1310 * Journal pages will still be filled. When the time comes 1311 * to actually do the I/O, the write is not done, and the 1312 * endio routine is called directly. 1313 */ 1314 bp = lbmAllocate(log , 0); 1315 log->bp = bp; 1316 bp->l_pn = bp->l_eor = 0; 1317 } else { 1318 /* 1319 * validate log superblock 1320 */ 1321 if ((rc = lbmRead(log, 1, &bpsuper))) 1322 goto errout10; 1323 1324 logsuper = (struct logsuper *) bpsuper->l_ldata; 1325 1326 if (logsuper->magic != cpu_to_le32(LOGMAGIC)) { 1327 jfs_warn("*** Log Format Error ! ***"); 1328 rc = -EINVAL; 1329 goto errout20; 1330 } 1331 1332 /* logredo() should have been run successfully. */ 1333 if (logsuper->state != cpu_to_le32(LOGREDONE)) { 1334 jfs_warn("*** Log Is Dirty ! ***"); 1335 rc = -EINVAL; 1336 goto errout20; 1337 } 1338 1339 /* initialize log from log superblock */ 1340 if (test_bit(log_INLINELOG,&log->flag)) { 1341 if (log->size != le32_to_cpu(logsuper->size)) { 1342 rc = -EINVAL; 1343 goto errout20; 1344 } 1345 jfs_info("lmLogInit: inline log:0x%p base:0x%Lx " 1346 "size:0x%x", log, 1347 (unsigned long long) log->base, log->size); 1348 } else { 1349 if (memcmp(logsuper->uuid, log->uuid, 16)) { 1350 jfs_warn("wrong uuid on JFS log device"); 1351 goto errout20; 1352 } 1353 log->size = le32_to_cpu(logsuper->size); 1354 log->l2bsize = le32_to_cpu(logsuper->l2bsize); 1355 jfs_info("lmLogInit: external log:0x%p base:0x%Lx " 1356 "size:0x%x", log, 1357 (unsigned long long) log->base, log->size); 1358 } 1359 1360 log->page = le32_to_cpu(logsuper->end) / LOGPSIZE; 1361 log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page); 1362 1363 /* 1364 * initialize for log append write mode 1365 */ 1366 /* establish current/end-of-log page/buffer */ 1367 if ((rc = lbmRead(log, log->page, &bp))) 1368 goto errout20; 1369 1370 lp = (struct logpage *) bp->l_ldata; 1371 1372 jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d", 1373 le32_to_cpu(logsuper->end), log->page, log->eor, 1374 le16_to_cpu(lp->h.eor)); 1375 1376 log->bp = bp; 1377 bp->l_pn = log->page; 1378 bp->l_eor = log->eor; 1379 1380 /* if current page is full, move on to next page */ 1381 if (log->eor >= LOGPSIZE - LOGPTLRSIZE) 1382 lmNextPage(log); 1383 1384 /* 1385 * initialize log syncpoint 1386 */ 1387 /* 1388 * write the first SYNCPT record with syncpoint = 0 1389 * (i.e., log redo up to HERE !); 1390 * remove current page from lbm write queue at end of pageout 1391 * (to write log superblock update), but do not release to 1392 * freelist; 1393 */ 1394 lrd.logtid = 0; 1395 lrd.backchain = 0; 1396 lrd.type = cpu_to_le16(LOG_SYNCPT); 1397 lrd.length = 0; 1398 lrd.log.syncpt.sync = 0; 1399 lsn = lmWriteRecord(log, NULL, &lrd, NULL); 1400 bp = log->bp; 1401 bp->l_ceor = bp->l_eor; 1402 lp = (struct logpage *) bp->l_ldata; 1403 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); 1404 lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0); 1405 if ((rc = lbmIOWait(bp, 0))) 1406 goto errout30; 1407 1408 /* 1409 * update/write superblock 1410 */ 1411 logsuper->state = cpu_to_le32(LOGMOUNT); 1412 log->serial = le32_to_cpu(logsuper->serial) + 1; 1413 logsuper->serial = cpu_to_le32(log->serial); 1414 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); 1415 if ((rc = lbmIOWait(bpsuper, lbmFREE))) 1416 goto errout30; 1417 } 1418 1419 /* initialize logsync parameters */ 1420 log->logsize = (log->size - 2) << L2LOGPSIZE; 1421 log->lsn = lsn; 1422 log->syncpt = lsn; 1423 log->sync = log->syncpt; 1424 log->nextsync = LOGSYNC_DELTA(log->logsize); 1425 1426 jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x", 1427 log->lsn, log->syncpt, log->sync); 1428 1429 /* 1430 * initialize for lazy/group commit 1431 */ 1432 log->clsn = lsn; 1433 1434 return 0; 1435 1436 /* 1437 * unwind on error 1438 */ 1439 errout30: /* release log page */ 1440 log->wqueue = NULL; 1441 bp->l_wqnext = NULL; 1442 lbmFree(bp); 1443 1444 errout20: /* release log superblock */ 1445 lbmFree(bpsuper); 1446 1447 errout10: /* unwind lbmLogInit() */ 1448 lbmLogShutdown(log); 1449 1450 jfs_warn("lmLogInit: exit(%d)", rc); 1451 return rc; 1452 } 1453 1454 1455 /* 1456 * NAME: lmLogClose() 1457 * 1458 * FUNCTION: remove file system <ipmnt> from active list of log <iplog> 1459 * and close it on last close. 1460 * 1461 * PARAMETER: sb - superblock 1462 * 1463 * RETURN: errors from subroutines 1464 * 1465 * serialization: 1466 */ 1467 int lmLogClose(struct super_block *sb) 1468 { 1469 struct jfs_sb_info *sbi = JFS_SBI(sb); 1470 struct jfs_log *log = sbi->log; 1471 struct block_device *bdev; 1472 int rc = 0; 1473 1474 jfs_info("lmLogClose: log:0x%p", log); 1475 1476 down(&jfs_log_sem); 1477 LOG_LOCK(log); 1478 list_del(&sbi->log_list); 1479 LOG_UNLOCK(log); 1480 sbi->log = NULL; 1481 1482 /* 1483 * We need to make sure all of the "written" metapages 1484 * actually make it to disk 1485 */ 1486 sync_blockdev(sb->s_bdev); 1487 1488 if (test_bit(log_INLINELOG, &log->flag)) { 1489 /* 1490 * in-line log in host file system 1491 */ 1492 rc = lmLogShutdown(log); 1493 kfree(log); 1494 goto out; 1495 } 1496 1497 if (!log->no_integrity) 1498 lmLogFileSystem(log, sbi, 0); 1499 1500 if (!list_empty(&log->sb_list)) 1501 goto out; 1502 1503 /* 1504 * TODO: ensure that the dummy_log is in a state to allow 1505 * lbmLogShutdown to deallocate all the buffers and call 1506 * kfree against dummy_log. For now, leave dummy_log & its 1507 * buffers in memory, and resuse if another no-integrity mount 1508 * is requested. 1509 */ 1510 if (log->no_integrity) 1511 goto out; 1512 1513 /* 1514 * external log as separate logical volume 1515 */ 1516 list_del(&log->journal_list); 1517 bdev = log->bdev; 1518 rc = lmLogShutdown(log); 1519 1520 bd_release(bdev); 1521 blkdev_put(bdev); 1522 1523 kfree(log); 1524 1525 out: 1526 up(&jfs_log_sem); 1527 jfs_info("lmLogClose: exit(%d)", rc); 1528 return rc; 1529 } 1530 1531 1532 /* 1533 * NAME: jfs_flush_journal() 1534 * 1535 * FUNCTION: initiate write of any outstanding transactions to the journal 1536 * and optionally wait until they are all written to disk 1537 * 1538 * wait == 0 flush until latest txn is committed, don't wait 1539 * wait == 1 flush until latest txn is committed, wait 1540 * wait > 1 flush until all txn's are complete, wait 1541 */ 1542 void jfs_flush_journal(struct jfs_log *log, int wait) 1543 { 1544 int i; 1545 struct tblock *target = NULL; 1546 struct jfs_sb_info *sbi; 1547 1548 /* jfs_write_inode may call us during read-only mount */ 1549 if (!log) 1550 return; 1551 1552 jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait); 1553 1554 LOGGC_LOCK(log); 1555 1556 if (!list_empty(&log->cqueue)) { 1557 /* 1558 * This ensures that we will keep writing to the journal as long 1559 * as there are unwritten commit records 1560 */ 1561 target = list_entry(log->cqueue.prev, struct tblock, cqueue); 1562 1563 if (test_bit(log_FLUSH, &log->flag)) { 1564 /* 1565 * We're already flushing. 1566 * if flush_tblk is NULL, we are flushing everything, 1567 * so leave it that way. Otherwise, update it to the 1568 * latest transaction 1569 */ 1570 if (log->flush_tblk) 1571 log->flush_tblk = target; 1572 } else { 1573 /* Only flush until latest transaction is committed */ 1574 log->flush_tblk = target; 1575 set_bit(log_FLUSH, &log->flag); 1576 1577 /* 1578 * Initiate I/O on outstanding transactions 1579 */ 1580 if (!(log->cflag & logGC_PAGEOUT)) { 1581 log->cflag |= logGC_PAGEOUT; 1582 lmGCwrite(log, 0); 1583 } 1584 } 1585 } 1586 if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) { 1587 /* Flush until all activity complete */ 1588 set_bit(log_FLUSH, &log->flag); 1589 log->flush_tblk = NULL; 1590 } 1591 1592 if (wait && target && !(target->flag & tblkGC_COMMITTED)) { 1593 DECLARE_WAITQUEUE(__wait, current); 1594 1595 add_wait_queue(&target->gcwait, &__wait); 1596 set_current_state(TASK_UNINTERRUPTIBLE); 1597 LOGGC_UNLOCK(log); 1598 schedule(); 1599 current->state = TASK_RUNNING; 1600 LOGGC_LOCK(log); 1601 remove_wait_queue(&target->gcwait, &__wait); 1602 } 1603 LOGGC_UNLOCK(log); 1604 1605 if (wait < 2) 1606 return; 1607 1608 list_for_each_entry(sbi, &log->sb_list, log_list) { 1609 filemap_fdatawrite(sbi->ipbmap->i_mapping); 1610 filemap_fdatawrite(sbi->ipimap->i_mapping); 1611 filemap_fdatawrite(sbi->direct_inode->i_mapping); 1612 } 1613 1614 /* 1615 * If there was recent activity, we may need to wait 1616 * for the lazycommit thread to catch up 1617 */ 1618 if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) { 1619 for (i = 0; i < 200; i++) { /* Too much? */ 1620 msleep(250); 1621 if (list_empty(&log->cqueue) && 1622 list_empty(&log->synclist)) 1623 break; 1624 } 1625 } 1626 assert(list_empty(&log->cqueue)); 1627 if (!list_empty(&log->synclist)) { 1628 struct logsyncblk *lp; 1629 1630 list_for_each_entry(lp, &log->synclist, synclist) { 1631 if (lp->xflag & COMMIT_PAGE) { 1632 struct metapage *mp = (struct metapage *)lp; 1633 dump_mem("orphan metapage", lp, 1634 sizeof(struct metapage)); 1635 dump_mem("page", mp->page, sizeof(struct page)); 1636 } 1637 else 1638 dump_mem("orphan tblock", lp, 1639 sizeof(struct tblock)); 1640 } 1641 // current->state = TASK_INTERRUPTIBLE; 1642 // schedule(); 1643 } 1644 //assert(list_empty(&log->synclist)); 1645 clear_bit(log_FLUSH, &log->flag); 1646 } 1647 1648 /* 1649 * NAME: lmLogShutdown() 1650 * 1651 * FUNCTION: log shutdown at last LogClose(). 1652 * 1653 * write log syncpt record. 1654 * update super block to set redone flag to 0. 1655 * 1656 * PARAMETER: log - log inode 1657 * 1658 * RETURN: 0 - success 1659 * 1660 * serialization: single last close thread 1661 */ 1662 int lmLogShutdown(struct jfs_log * log) 1663 { 1664 int rc; 1665 struct lrd lrd; 1666 int lsn; 1667 struct logsuper *logsuper; 1668 struct lbuf *bpsuper; 1669 struct lbuf *bp; 1670 struct logpage *lp; 1671 1672 jfs_info("lmLogShutdown: log:0x%p", log); 1673 1674 jfs_flush_journal(log, 2); 1675 1676 /* 1677 * write the last SYNCPT record with syncpoint = 0 1678 * (i.e., log redo up to HERE !) 1679 */ 1680 lrd.logtid = 0; 1681 lrd.backchain = 0; 1682 lrd.type = cpu_to_le16(LOG_SYNCPT); 1683 lrd.length = 0; 1684 lrd.log.syncpt.sync = 0; 1685 1686 lsn = lmWriteRecord(log, NULL, &lrd, NULL); 1687 bp = log->bp; 1688 lp = (struct logpage *) bp->l_ldata; 1689 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); 1690 lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0); 1691 lbmIOWait(log->bp, lbmFREE); 1692 log->bp = NULL; 1693 1694 /* 1695 * synchronous update log superblock 1696 * mark log state as shutdown cleanly 1697 * (i.e., Log does not need to be replayed). 1698 */ 1699 if ((rc = lbmRead(log, 1, &bpsuper))) 1700 goto out; 1701 1702 logsuper = (struct logsuper *) bpsuper->l_ldata; 1703 logsuper->state = cpu_to_le32(LOGREDONE); 1704 logsuper->end = cpu_to_le32(lsn); 1705 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); 1706 rc = lbmIOWait(bpsuper, lbmFREE); 1707 1708 jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d", 1709 lsn, log->page, log->eor); 1710 1711 out: 1712 /* 1713 * shutdown per log i/o 1714 */ 1715 lbmLogShutdown(log); 1716 1717 if (rc) { 1718 jfs_warn("lmLogShutdown: exit(%d)", rc); 1719 } 1720 return rc; 1721 } 1722 1723 1724 /* 1725 * NAME: lmLogFileSystem() 1726 * 1727 * FUNCTION: insert (<activate> = true)/remove (<activate> = false) 1728 * file system into/from log active file system list. 1729 * 1730 * PARAMETE: log - pointer to logs inode. 1731 * fsdev - kdev_t of filesystem. 1732 * serial - pointer to returned log serial number 1733 * activate - insert/remove device from active list. 1734 * 1735 * RETURN: 0 - success 1736 * errors returned by vms_iowait(). 1737 */ 1738 static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi, 1739 int activate) 1740 { 1741 int rc = 0; 1742 int i; 1743 struct logsuper *logsuper; 1744 struct lbuf *bpsuper; 1745 char *uuid = sbi->uuid; 1746 1747 /* 1748 * insert/remove file system device to log active file system list. 1749 */ 1750 if ((rc = lbmRead(log, 1, &bpsuper))) 1751 return rc; 1752 1753 logsuper = (struct logsuper *) bpsuper->l_ldata; 1754 if (activate) { 1755 for (i = 0; i < MAX_ACTIVE; i++) 1756 if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) { 1757 memcpy(logsuper->active[i].uuid, uuid, 16); 1758 sbi->aggregate = i; 1759 break; 1760 } 1761 if (i == MAX_ACTIVE) { 1762 jfs_warn("Too many file systems sharing journal!"); 1763 lbmFree(bpsuper); 1764 return -EMFILE; /* Is there a better rc? */ 1765 } 1766 } else { 1767 for (i = 0; i < MAX_ACTIVE; i++) 1768 if (!memcmp(logsuper->active[i].uuid, uuid, 16)) { 1769 memcpy(logsuper->active[i].uuid, NULL_UUID, 16); 1770 break; 1771 } 1772 if (i == MAX_ACTIVE) { 1773 jfs_warn("Somebody stomped on the journal!"); 1774 lbmFree(bpsuper); 1775 return -EIO; 1776 } 1777 1778 } 1779 1780 /* 1781 * synchronous write log superblock: 1782 * 1783 * write sidestream bypassing write queue: 1784 * at file system mount, log super block is updated for 1785 * activation of the file system before any log record 1786 * (MOUNT record) of the file system, and at file system 1787 * unmount, all meta data for the file system has been 1788 * flushed before log super block is updated for deactivation 1789 * of the file system. 1790 */ 1791 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); 1792 rc = lbmIOWait(bpsuper, lbmFREE); 1793 1794 return rc; 1795 } 1796 1797 /* 1798 * log buffer manager (lbm) 1799 * ------------------------ 1800 * 1801 * special purpose buffer manager supporting log i/o requirements. 1802 * 1803 * per log write queue: 1804 * log pageout occurs in serial order by fifo write queue and 1805 * restricting to a single i/o in pregress at any one time. 1806 * a circular singly-linked list 1807 * (log->wrqueue points to the tail, and buffers are linked via 1808 * bp->wrqueue field), and 1809 * maintains log page in pageout ot waiting for pageout in serial pageout. 1810 */ 1811 1812 /* 1813 * lbmLogInit() 1814 * 1815 * initialize per log I/O setup at lmLogInit() 1816 */ 1817 static int lbmLogInit(struct jfs_log * log) 1818 { /* log inode */ 1819 int i; 1820 struct lbuf *lbuf; 1821 1822 jfs_info("lbmLogInit: log:0x%p", log); 1823 1824 /* initialize current buffer cursor */ 1825 log->bp = NULL; 1826 1827 /* initialize log device write queue */ 1828 log->wqueue = NULL; 1829 1830 /* 1831 * Each log has its own buffer pages allocated to it. These are 1832 * not managed by the page cache. This ensures that a transaction 1833 * writing to the log does not block trying to allocate a page from 1834 * the page cache (for the log). This would be bad, since page 1835 * allocation waits on the kswapd thread that may be committing inodes 1836 * which would cause log activity. Was that clear? I'm trying to 1837 * avoid deadlock here. 1838 */ 1839 init_waitqueue_head(&log->free_wait); 1840 1841 log->lbuf_free = NULL; 1842 1843 for (i = 0; i < LOGPAGES;) { 1844 char *buffer; 1845 uint offset; 1846 struct page *page; 1847 1848 buffer = (char *) get_zeroed_page(GFP_KERNEL); 1849 if (buffer == NULL) 1850 goto error; 1851 page = virt_to_page(buffer); 1852 for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) { 1853 lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL); 1854 if (lbuf == NULL) { 1855 if (offset == 0) 1856 free_page((unsigned long) buffer); 1857 goto error; 1858 } 1859 if (offset) /* we already have one reference */ 1860 get_page(page); 1861 lbuf->l_offset = offset; 1862 lbuf->l_ldata = buffer + offset; 1863 lbuf->l_page = page; 1864 lbuf->l_log = log; 1865 init_waitqueue_head(&lbuf->l_ioevent); 1866 1867 lbuf->l_freelist = log->lbuf_free; 1868 log->lbuf_free = lbuf; 1869 i++; 1870 } 1871 } 1872 1873 return (0); 1874 1875 error: 1876 lbmLogShutdown(log); 1877 return -ENOMEM; 1878 } 1879 1880 1881 /* 1882 * lbmLogShutdown() 1883 * 1884 * finalize per log I/O setup at lmLogShutdown() 1885 */ 1886 static void lbmLogShutdown(struct jfs_log * log) 1887 { 1888 struct lbuf *lbuf; 1889 1890 jfs_info("lbmLogShutdown: log:0x%p", log); 1891 1892 lbuf = log->lbuf_free; 1893 while (lbuf) { 1894 struct lbuf *next = lbuf->l_freelist; 1895 __free_page(lbuf->l_page); 1896 kfree(lbuf); 1897 lbuf = next; 1898 } 1899 } 1900 1901 1902 /* 1903 * lbmAllocate() 1904 * 1905 * allocate an empty log buffer 1906 */ 1907 static struct lbuf *lbmAllocate(struct jfs_log * log, int pn) 1908 { 1909 struct lbuf *bp; 1910 unsigned long flags; 1911 1912 /* 1913 * recycle from log buffer freelist if any 1914 */ 1915 LCACHE_LOCK(flags); 1916 LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags); 1917 log->lbuf_free = bp->l_freelist; 1918 LCACHE_UNLOCK(flags); 1919 1920 bp->l_flag = 0; 1921 1922 bp->l_wqnext = NULL; 1923 bp->l_freelist = NULL; 1924 1925 bp->l_pn = pn; 1926 bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize)); 1927 bp->l_ceor = 0; 1928 1929 return bp; 1930 } 1931 1932 1933 /* 1934 * lbmFree() 1935 * 1936 * release a log buffer to freelist 1937 */ 1938 static void lbmFree(struct lbuf * bp) 1939 { 1940 unsigned long flags; 1941 1942 LCACHE_LOCK(flags); 1943 1944 lbmfree(bp); 1945 1946 LCACHE_UNLOCK(flags); 1947 } 1948 1949 static void lbmfree(struct lbuf * bp) 1950 { 1951 struct jfs_log *log = bp->l_log; 1952 1953 assert(bp->l_wqnext == NULL); 1954 1955 /* 1956 * return the buffer to head of freelist 1957 */ 1958 bp->l_freelist = log->lbuf_free; 1959 log->lbuf_free = bp; 1960 1961 wake_up(&log->free_wait); 1962 return; 1963 } 1964 1965 1966 /* 1967 * NAME: lbmRedrive 1968 * 1969 * FUNCTION: add a log buffer to the the log redrive list 1970 * 1971 * PARAMETER: 1972 * bp - log buffer 1973 * 1974 * NOTES: 1975 * Takes log_redrive_lock. 1976 */ 1977 static inline void lbmRedrive(struct lbuf *bp) 1978 { 1979 unsigned long flags; 1980 1981 spin_lock_irqsave(&log_redrive_lock, flags); 1982 bp->l_redrive_next = log_redrive_list; 1983 log_redrive_list = bp; 1984 spin_unlock_irqrestore(&log_redrive_lock, flags); 1985 1986 wake_up(&jfs_IO_thread_wait); 1987 } 1988 1989 1990 /* 1991 * lbmRead() 1992 */ 1993 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) 1994 { 1995 struct bio *bio; 1996 struct lbuf *bp; 1997 1998 /* 1999 * allocate a log buffer 2000 */ 2001 *bpp = bp = lbmAllocate(log, pn); 2002 jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn); 2003 2004 bp->l_flag |= lbmREAD; 2005 2006 bio = bio_alloc(GFP_NOFS, 1); 2007 2008 bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); 2009 bio->bi_bdev = log->bdev; 2010 bio->bi_io_vec[0].bv_page = bp->l_page; 2011 bio->bi_io_vec[0].bv_len = LOGPSIZE; 2012 bio->bi_io_vec[0].bv_offset = bp->l_offset; 2013 2014 bio->bi_vcnt = 1; 2015 bio->bi_idx = 0; 2016 bio->bi_size = LOGPSIZE; 2017 2018 bio->bi_end_io = lbmIODone; 2019 bio->bi_private = bp; 2020 submit_bio(READ_SYNC, bio); 2021 2022 wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD)); 2023 2024 return 0; 2025 } 2026 2027 2028 /* 2029 * lbmWrite() 2030 * 2031 * buffer at head of pageout queue stays after completion of 2032 * partial-page pageout and redriven by explicit initiation of 2033 * pageout by caller until full-page pageout is completed and 2034 * released. 2035 * 2036 * device driver i/o done redrives pageout of new buffer at 2037 * head of pageout queue when current buffer at head of pageout 2038 * queue is released at the completion of its full-page pageout. 2039 * 2040 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit(). 2041 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone() 2042 */ 2043 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, 2044 int cant_block) 2045 { 2046 struct lbuf *tail; 2047 unsigned long flags; 2048 2049 jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn); 2050 2051 /* map the logical block address to physical block address */ 2052 bp->l_blkno = 2053 log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); 2054 2055 LCACHE_LOCK(flags); /* disable+lock */ 2056 2057 /* 2058 * initialize buffer for device driver 2059 */ 2060 bp->l_flag = flag; 2061 2062 /* 2063 * insert bp at tail of write queue associated with log 2064 * 2065 * (request is either for bp already/currently at head of queue 2066 * or new bp to be inserted at tail) 2067 */ 2068 tail = log->wqueue; 2069 2070 /* is buffer not already on write queue ? */ 2071 if (bp->l_wqnext == NULL) { 2072 /* insert at tail of wqueue */ 2073 if (tail == NULL) { 2074 log->wqueue = bp; 2075 bp->l_wqnext = bp; 2076 } else { 2077 log->wqueue = bp; 2078 bp->l_wqnext = tail->l_wqnext; 2079 tail->l_wqnext = bp; 2080 } 2081 2082 tail = bp; 2083 } 2084 2085 /* is buffer at head of wqueue and for write ? */ 2086 if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) { 2087 LCACHE_UNLOCK(flags); /* unlock+enable */ 2088 return; 2089 } 2090 2091 LCACHE_UNLOCK(flags); /* unlock+enable */ 2092 2093 if (cant_block) 2094 lbmRedrive(bp); 2095 else if (flag & lbmSYNC) 2096 lbmStartIO(bp); 2097 else { 2098 LOGGC_UNLOCK(log); 2099 lbmStartIO(bp); 2100 LOGGC_LOCK(log); 2101 } 2102 } 2103 2104 2105 /* 2106 * lbmDirectWrite() 2107 * 2108 * initiate pageout bypassing write queue for sidestream 2109 * (e.g., log superblock) write; 2110 */ 2111 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag) 2112 { 2113 jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x", 2114 bp, flag, bp->l_pn); 2115 2116 /* 2117 * initialize buffer for device driver 2118 */ 2119 bp->l_flag = flag | lbmDIRECT; 2120 2121 /* map the logical block address to physical block address */ 2122 bp->l_blkno = 2123 log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); 2124 2125 /* 2126 * initiate pageout of the page 2127 */ 2128 lbmStartIO(bp); 2129 } 2130 2131 2132 /* 2133 * NAME: lbmStartIO() 2134 * 2135 * FUNCTION: Interface to DD strategy routine 2136 * 2137 * RETURN: none 2138 * 2139 * serialization: LCACHE_LOCK() is NOT held during log i/o; 2140 */ 2141 static void lbmStartIO(struct lbuf * bp) 2142 { 2143 struct bio *bio; 2144 struct jfs_log *log = bp->l_log; 2145 2146 jfs_info("lbmStartIO\n"); 2147 2148 bio = bio_alloc(GFP_NOFS, 1); 2149 bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); 2150 bio->bi_bdev = log->bdev; 2151 bio->bi_io_vec[0].bv_page = bp->l_page; 2152 bio->bi_io_vec[0].bv_len = LOGPSIZE; 2153 bio->bi_io_vec[0].bv_offset = bp->l_offset; 2154 2155 bio->bi_vcnt = 1; 2156 bio->bi_idx = 0; 2157 bio->bi_size = LOGPSIZE; 2158 2159 bio->bi_end_io = lbmIODone; 2160 bio->bi_private = bp; 2161 2162 /* check if journaling to disk has been disabled */ 2163 if (log->no_integrity) { 2164 bio->bi_size = 0; 2165 lbmIODone(bio, 0, 0); 2166 } else { 2167 submit_bio(WRITE_SYNC, bio); 2168 INCREMENT(lmStat.submitted); 2169 } 2170 } 2171 2172 2173 /* 2174 * lbmIOWait() 2175 */ 2176 static int lbmIOWait(struct lbuf * bp, int flag) 2177 { 2178 unsigned long flags; 2179 int rc = 0; 2180 2181 jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag); 2182 2183 LCACHE_LOCK(flags); /* disable+lock */ 2184 2185 LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags); 2186 2187 rc = (bp->l_flag & lbmERROR) ? -EIO : 0; 2188 2189 if (flag & lbmFREE) 2190 lbmfree(bp); 2191 2192 LCACHE_UNLOCK(flags); /* unlock+enable */ 2193 2194 jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag); 2195 return rc; 2196 } 2197 2198 /* 2199 * lbmIODone() 2200 * 2201 * executed at INTIODONE level 2202 */ 2203 static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) 2204 { 2205 struct lbuf *bp = bio->bi_private; 2206 struct lbuf *nextbp, *tail; 2207 struct jfs_log *log; 2208 unsigned long flags; 2209 2210 if (bio->bi_size) 2211 return 1; 2212 2213 /* 2214 * get back jfs buffer bound to the i/o buffer 2215 */ 2216 jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag); 2217 2218 LCACHE_LOCK(flags); /* disable+lock */ 2219 2220 bp->l_flag |= lbmDONE; 2221 2222 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { 2223 bp->l_flag |= lbmERROR; 2224 2225 jfs_err("lbmIODone: I/O error in JFS log"); 2226 } 2227 2228 bio_put(bio); 2229 2230 /* 2231 * pagein completion 2232 */ 2233 if (bp->l_flag & lbmREAD) { 2234 bp->l_flag &= ~lbmREAD; 2235 2236 LCACHE_UNLOCK(flags); /* unlock+enable */ 2237 2238 /* wakeup I/O initiator */ 2239 LCACHE_WAKEUP(&bp->l_ioevent); 2240 2241 return 0; 2242 } 2243 2244 /* 2245 * pageout completion 2246 * 2247 * the bp at the head of write queue has completed pageout. 2248 * 2249 * if single-commit/full-page pageout, remove the current buffer 2250 * from head of pageout queue, and redrive pageout with 2251 * the new buffer at head of pageout queue; 2252 * otherwise, the partial-page pageout buffer stays at 2253 * the head of pageout queue to be redriven for pageout 2254 * by lmGroupCommit() until full-page pageout is completed. 2255 */ 2256 bp->l_flag &= ~lbmWRITE; 2257 INCREMENT(lmStat.pagedone); 2258 2259 /* update committed lsn */ 2260 log = bp->l_log; 2261 log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor; 2262 2263 if (bp->l_flag & lbmDIRECT) { 2264 LCACHE_WAKEUP(&bp->l_ioevent); 2265 LCACHE_UNLOCK(flags); 2266 return 0; 2267 } 2268 2269 tail = log->wqueue; 2270 2271 /* single element queue */ 2272 if (bp == tail) { 2273 /* remove head buffer of full-page pageout 2274 * from log device write queue 2275 */ 2276 if (bp->l_flag & lbmRELEASE) { 2277 log->wqueue = NULL; 2278 bp->l_wqnext = NULL; 2279 } 2280 } 2281 /* multi element queue */ 2282 else { 2283 /* remove head buffer of full-page pageout 2284 * from log device write queue 2285 */ 2286 if (bp->l_flag & lbmRELEASE) { 2287 nextbp = tail->l_wqnext = bp->l_wqnext; 2288 bp->l_wqnext = NULL; 2289 2290 /* 2291 * redrive pageout of next page at head of write queue: 2292 * redrive next page without any bound tblk 2293 * (i.e., page w/o any COMMIT records), or 2294 * first page of new group commit which has been 2295 * queued after current page (subsequent pageout 2296 * is performed synchronously, except page without 2297 * any COMMITs) by lmGroupCommit() as indicated 2298 * by lbmWRITE flag; 2299 */ 2300 if (nextbp->l_flag & lbmWRITE) { 2301 /* 2302 * We can't do the I/O at interrupt time. 2303 * The jfsIO thread can do it 2304 */ 2305 lbmRedrive(nextbp); 2306 } 2307 } 2308 } 2309 2310 /* 2311 * synchronous pageout: 2312 * 2313 * buffer has not necessarily been removed from write queue 2314 * (e.g., synchronous write of partial-page with COMMIT): 2315 * leave buffer for i/o initiator to dispose 2316 */ 2317 if (bp->l_flag & lbmSYNC) { 2318 LCACHE_UNLOCK(flags); /* unlock+enable */ 2319 2320 /* wakeup I/O initiator */ 2321 LCACHE_WAKEUP(&bp->l_ioevent); 2322 } 2323 2324 /* 2325 * Group Commit pageout: 2326 */ 2327 else if (bp->l_flag & lbmGC) { 2328 LCACHE_UNLOCK(flags); 2329 lmPostGC(bp); 2330 } 2331 2332 /* 2333 * asynchronous pageout: 2334 * 2335 * buffer must have been removed from write queue: 2336 * insert buffer at head of freelist where it can be recycled 2337 */ 2338 else { 2339 assert(bp->l_flag & lbmRELEASE); 2340 assert(bp->l_flag & lbmFREE); 2341 lbmfree(bp); 2342 2343 LCACHE_UNLOCK(flags); /* unlock+enable */ 2344 } 2345 2346 return 0; 2347 } 2348 2349 int jfsIOWait(void *arg) 2350 { 2351 struct lbuf *bp; 2352 2353 daemonize("jfsIO"); 2354 2355 complete(&jfsIOwait); 2356 2357 do { 2358 DECLARE_WAITQUEUE(wq, current); 2359 2360 spin_lock_irq(&log_redrive_lock); 2361 while ((bp = log_redrive_list) != 0) { 2362 log_redrive_list = bp->l_redrive_next; 2363 bp->l_redrive_next = NULL; 2364 spin_unlock_irq(&log_redrive_lock); 2365 lbmStartIO(bp); 2366 spin_lock_irq(&log_redrive_lock); 2367 } 2368 if (current->flags & PF_FREEZE) { 2369 spin_unlock_irq(&log_redrive_lock); 2370 refrigerator(PF_FREEZE); 2371 } else { 2372 add_wait_queue(&jfs_IO_thread_wait, &wq); 2373 set_current_state(TASK_INTERRUPTIBLE); 2374 spin_unlock_irq(&log_redrive_lock); 2375 schedule(); 2376 current->state = TASK_RUNNING; 2377 remove_wait_queue(&jfs_IO_thread_wait, &wq); 2378 } 2379 } while (!jfs_stop_threads); 2380 2381 jfs_info("jfsIOWait being killed!"); 2382 complete_and_exit(&jfsIOwait, 0); 2383 } 2384 2385 /* 2386 * NAME: lmLogFormat()/jfs_logform() 2387 * 2388 * FUNCTION: format file system log 2389 * 2390 * PARAMETERS: 2391 * log - volume log 2392 * logAddress - start address of log space in FS block 2393 * logSize - length of log space in FS block; 2394 * 2395 * RETURN: 0 - success 2396 * -EIO - i/o error 2397 * 2398 * XXX: We're synchronously writing one page at a time. This needs to 2399 * be improved by writing multiple pages at once. 2400 */ 2401 int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize) 2402 { 2403 int rc = -EIO; 2404 struct jfs_sb_info *sbi; 2405 struct logsuper *logsuper; 2406 struct logpage *lp; 2407 int lspn; /* log sequence page number */ 2408 struct lrd *lrd_ptr; 2409 int npages = 0; 2410 struct lbuf *bp; 2411 2412 jfs_info("lmLogFormat: logAddress:%Ld logSize:%d", 2413 (long long)logAddress, logSize); 2414 2415 sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list); 2416 2417 /* allocate a log buffer */ 2418 bp = lbmAllocate(log, 1); 2419 2420 npages = logSize >> sbi->l2nbperpage; 2421 2422 /* 2423 * log space: 2424 * 2425 * page 0 - reserved; 2426 * page 1 - log superblock; 2427 * page 2 - log data page: A SYNC log record is written 2428 * into this page at logform time; 2429 * pages 3-N - log data page: set to empty log data pages; 2430 */ 2431 /* 2432 * init log superblock: log page 1 2433 */ 2434 logsuper = (struct logsuper *) bp->l_ldata; 2435 2436 logsuper->magic = cpu_to_le32(LOGMAGIC); 2437 logsuper->version = cpu_to_le32(LOGVERSION); 2438 logsuper->state = cpu_to_le32(LOGREDONE); 2439 logsuper->flag = cpu_to_le32(sbi->mntflag); /* ? */ 2440 logsuper->size = cpu_to_le32(npages); 2441 logsuper->bsize = cpu_to_le32(sbi->bsize); 2442 logsuper->l2bsize = cpu_to_le32(sbi->l2bsize); 2443 logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE); 2444 2445 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; 2446 bp->l_blkno = logAddress + sbi->nbperpage; 2447 lbmStartIO(bp); 2448 if ((rc = lbmIOWait(bp, 0))) 2449 goto exit; 2450 2451 /* 2452 * init pages 2 to npages-1 as log data pages: 2453 * 2454 * log page sequence number (lpsn) initialization: 2455 * 2456 * pn: 0 1 2 3 n-1 2457 * +-----+-----+=====+=====+===.....===+=====+ 2458 * lspn: N-1 0 1 N-2 2459 * <--- N page circular file ----> 2460 * 2461 * the N (= npages-2) data pages of the log is maintained as 2462 * a circular file for the log records; 2463 * lpsn grows by 1 monotonically as each log page is written 2464 * to the circular file of the log; 2465 * and setLogpage() will not reset the page number even if 2466 * the eor is equal to LOGPHDRSIZE. In order for binary search 2467 * still work in find log end process, we have to simulate the 2468 * log wrap situation at the log format time. 2469 * The 1st log page written will have the highest lpsn. Then 2470 * the succeeding log pages will have ascending order of 2471 * the lspn starting from 0, ... (N-2) 2472 */ 2473 lp = (struct logpage *) bp->l_ldata; 2474 /* 2475 * initialize 1st log page to be written: lpsn = N - 1, 2476 * write a SYNCPT log record is written to this page 2477 */ 2478 lp->h.page = lp->t.page = cpu_to_le32(npages - 3); 2479 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE); 2480 2481 lrd_ptr = (struct lrd *) &lp->data; 2482 lrd_ptr->logtid = 0; 2483 lrd_ptr->backchain = 0; 2484 lrd_ptr->type = cpu_to_le16(LOG_SYNCPT); 2485 lrd_ptr->length = 0; 2486 lrd_ptr->log.syncpt.sync = 0; 2487 2488 bp->l_blkno += sbi->nbperpage; 2489 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; 2490 lbmStartIO(bp); 2491 if ((rc = lbmIOWait(bp, 0))) 2492 goto exit; 2493 2494 /* 2495 * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2) 2496 */ 2497 for (lspn = 0; lspn < npages - 3; lspn++) { 2498 lp->h.page = lp->t.page = cpu_to_le32(lspn); 2499 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE); 2500 2501 bp->l_blkno += sbi->nbperpage; 2502 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; 2503 lbmStartIO(bp); 2504 if ((rc = lbmIOWait(bp, 0))) 2505 goto exit; 2506 } 2507 2508 rc = 0; 2509 exit: 2510 /* 2511 * finalize log 2512 */ 2513 /* release the buffer */ 2514 lbmFree(bp); 2515 2516 return rc; 2517 } 2518 2519 #ifdef CONFIG_JFS_STATISTICS 2520 int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length, 2521 int *eof, void *data) 2522 { 2523 int len = 0; 2524 off_t begin; 2525 2526 len += sprintf(buffer, 2527 "JFS Logmgr stats\n" 2528 "================\n" 2529 "commits = %d\n" 2530 "writes submitted = %d\n" 2531 "writes completed = %d\n" 2532 "full pages submitted = %d\n" 2533 "partial pages submitted = %d\n", 2534 lmStat.commit, 2535 lmStat.submitted, 2536 lmStat.pagedone, 2537 lmStat.full_page, 2538 lmStat.partial_page); 2539 2540 begin = offset; 2541 *start = buffer + begin; 2542 len -= begin; 2543 2544 if (len > length) 2545 len = length; 2546 else 2547 *eof = 1; 2548 2549 if (len < 0) 2550 len = 0; 2551 2552 return len; 2553 } 2554 #endif /* CONFIG_JFS_STATISTICS */ 2555