1*470decc6SDave Kleikamp /* 2*470decc6SDave Kleikamp * linux/fs/jbd/journal.c 3*470decc6SDave Kleikamp * 4*470decc6SDave Kleikamp * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 5*470decc6SDave Kleikamp * 6*470decc6SDave Kleikamp * Copyright 1998 Red Hat corp --- All Rights Reserved 7*470decc6SDave Kleikamp * 8*470decc6SDave Kleikamp * This file is part of the Linux kernel and is made available under 9*470decc6SDave Kleikamp * the terms of the GNU General Public License, version 2, or at your 10*470decc6SDave Kleikamp * option, any later version, incorporated herein by reference. 11*470decc6SDave Kleikamp * 12*470decc6SDave Kleikamp * Generic filesystem journal-writing code; part of the ext2fs 13*470decc6SDave Kleikamp * journaling system. 14*470decc6SDave Kleikamp * 15*470decc6SDave Kleikamp * This file manages journals: areas of disk reserved for logging 16*470decc6SDave Kleikamp * transactional updates. This includes the kernel journaling thread 17*470decc6SDave Kleikamp * which is responsible for scheduling updates to the log. 18*470decc6SDave Kleikamp * 19*470decc6SDave Kleikamp * We do not actually manage the physical storage of the journal in this 20*470decc6SDave Kleikamp * file: that is left to a per-journal policy function, which allows us 21*470decc6SDave Kleikamp * to store the journal within a filesystem-specified area for ext2 22*470decc6SDave Kleikamp * journaling (ext2 can use a reserved inode for storing the log). 23*470decc6SDave Kleikamp */ 24*470decc6SDave Kleikamp 25*470decc6SDave Kleikamp #include <linux/module.h> 26*470decc6SDave Kleikamp #include <linux/time.h> 27*470decc6SDave Kleikamp #include <linux/fs.h> 28*470decc6SDave Kleikamp #include <linux/jbd.h> 29*470decc6SDave Kleikamp #include <linux/errno.h> 30*470decc6SDave Kleikamp #include <linux/slab.h> 31*470decc6SDave Kleikamp #include <linux/smp_lock.h> 32*470decc6SDave Kleikamp #include <linux/init.h> 33*470decc6SDave Kleikamp #include <linux/mm.h> 34*470decc6SDave Kleikamp #include <linux/suspend.h> 35*470decc6SDave Kleikamp #include <linux/pagemap.h> 36*470decc6SDave Kleikamp #include <linux/kthread.h> 37*470decc6SDave Kleikamp #include <linux/poison.h> 38*470decc6SDave Kleikamp #include <linux/proc_fs.h> 39*470decc6SDave Kleikamp 40*470decc6SDave Kleikamp #include <asm/uaccess.h> 41*470decc6SDave Kleikamp #include <asm/page.h> 42*470decc6SDave Kleikamp 43*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_start); 44*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_restart); 45*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_extend); 46*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_stop); 47*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_lock_updates); 48*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_unlock_updates); 49*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_get_write_access); 50*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_get_create_access); 51*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_get_undo_access); 52*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_dirty_data); 53*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_dirty_metadata); 54*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_release_buffer); 55*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_forget); 56*470decc6SDave Kleikamp #if 0 57*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_sync_buffer); 58*470decc6SDave Kleikamp #endif 59*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_flush); 60*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_revoke); 61*470decc6SDave Kleikamp 62*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_init_dev); 63*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_init_inode); 64*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_update_format); 65*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_check_used_features); 66*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_check_available_features); 67*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_set_features); 68*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_create); 69*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_load); 70*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_destroy); 71*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_update_superblock); 72*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_abort); 73*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_errno); 74*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_ack_err); 75*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_clear_err); 76*470decc6SDave Kleikamp EXPORT_SYMBOL(log_wait_commit); 77*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_start_commit); 78*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_force_commit_nested); 79*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_wipe); 80*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_blocks_per_page); 81*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_invalidatepage); 82*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_try_to_free_buffers); 83*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_force_commit); 84*470decc6SDave Kleikamp 85*470decc6SDave Kleikamp static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); 86*470decc6SDave Kleikamp static void __journal_abort_soft (journal_t *journal, int errno); 87*470decc6SDave Kleikamp static int journal_create_jbd_slab(size_t slab_size); 88*470decc6SDave Kleikamp 89*470decc6SDave Kleikamp /* 90*470decc6SDave Kleikamp * Helper function used to manage commit timeouts 91*470decc6SDave Kleikamp */ 92*470decc6SDave Kleikamp 93*470decc6SDave Kleikamp static void commit_timeout(unsigned long __data) 94*470decc6SDave Kleikamp { 95*470decc6SDave Kleikamp struct task_struct * p = (struct task_struct *) __data; 96*470decc6SDave Kleikamp 97*470decc6SDave Kleikamp wake_up_process(p); 98*470decc6SDave Kleikamp } 99*470decc6SDave Kleikamp 100*470decc6SDave Kleikamp /* 101*470decc6SDave Kleikamp * kjournald: The main thread function used to manage a logging device 102*470decc6SDave Kleikamp * journal. 103*470decc6SDave Kleikamp * 104*470decc6SDave Kleikamp * This kernel thread is responsible for two things: 105*470decc6SDave Kleikamp * 106*470decc6SDave Kleikamp * 1) COMMIT: Every so often we need to commit the current state of the 107*470decc6SDave Kleikamp * filesystem to disk. The journal thread is responsible for writing 108*470decc6SDave Kleikamp * all of the metadata buffers to disk. 109*470decc6SDave Kleikamp * 110*470decc6SDave Kleikamp * 2) CHECKPOINT: We cannot reuse a used section of the log file until all 111*470decc6SDave Kleikamp * of the data in that part of the log has been rewritten elsewhere on 112*470decc6SDave Kleikamp * the disk. Flushing these old buffers to reclaim space in the log is 113*470decc6SDave Kleikamp * known as checkpointing, and this thread is responsible for that job. 114*470decc6SDave Kleikamp */ 115*470decc6SDave Kleikamp 116*470decc6SDave Kleikamp static int kjournald(void *arg) 117*470decc6SDave Kleikamp { 118*470decc6SDave Kleikamp journal_t *journal = arg; 119*470decc6SDave Kleikamp transaction_t *transaction; 120*470decc6SDave Kleikamp 121*470decc6SDave Kleikamp /* 122*470decc6SDave Kleikamp * Set up an interval timer which can be used to trigger a commit wakeup 123*470decc6SDave Kleikamp * after the commit interval expires 124*470decc6SDave Kleikamp */ 125*470decc6SDave Kleikamp setup_timer(&journal->j_commit_timer, commit_timeout, 126*470decc6SDave Kleikamp (unsigned long)current); 127*470decc6SDave Kleikamp 128*470decc6SDave Kleikamp /* Record that the journal thread is running */ 129*470decc6SDave Kleikamp journal->j_task = current; 130*470decc6SDave Kleikamp wake_up(&journal->j_wait_done_commit); 131*470decc6SDave Kleikamp 132*470decc6SDave Kleikamp printk(KERN_INFO "kjournald starting. Commit interval %ld seconds\n", 133*470decc6SDave Kleikamp journal->j_commit_interval / HZ); 134*470decc6SDave Kleikamp 135*470decc6SDave Kleikamp /* 136*470decc6SDave Kleikamp * And now, wait forever for commit wakeup events. 137*470decc6SDave Kleikamp */ 138*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 139*470decc6SDave Kleikamp 140*470decc6SDave Kleikamp loop: 141*470decc6SDave Kleikamp if (journal->j_flags & JFS_UNMOUNT) 142*470decc6SDave Kleikamp goto end_loop; 143*470decc6SDave Kleikamp 144*470decc6SDave Kleikamp jbd_debug(1, "commit_sequence=%d, commit_request=%d\n", 145*470decc6SDave Kleikamp journal->j_commit_sequence, journal->j_commit_request); 146*470decc6SDave Kleikamp 147*470decc6SDave Kleikamp if (journal->j_commit_sequence != journal->j_commit_request) { 148*470decc6SDave Kleikamp jbd_debug(1, "OK, requests differ\n"); 149*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 150*470decc6SDave Kleikamp del_timer_sync(&journal->j_commit_timer); 151*470decc6SDave Kleikamp journal_commit_transaction(journal); 152*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 153*470decc6SDave Kleikamp goto loop; 154*470decc6SDave Kleikamp } 155*470decc6SDave Kleikamp 156*470decc6SDave Kleikamp wake_up(&journal->j_wait_done_commit); 157*470decc6SDave Kleikamp if (freezing(current)) { 158*470decc6SDave Kleikamp /* 159*470decc6SDave Kleikamp * The simpler the better. Flushing journal isn't a 160*470decc6SDave Kleikamp * good idea, because that depends on threads that may 161*470decc6SDave Kleikamp * be already stopped. 162*470decc6SDave Kleikamp */ 163*470decc6SDave Kleikamp jbd_debug(1, "Now suspending kjournald\n"); 164*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 165*470decc6SDave Kleikamp refrigerator(); 166*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 167*470decc6SDave Kleikamp } else { 168*470decc6SDave Kleikamp /* 169*470decc6SDave Kleikamp * We assume on resume that commits are already there, 170*470decc6SDave Kleikamp * so we don't sleep 171*470decc6SDave Kleikamp */ 172*470decc6SDave Kleikamp DEFINE_WAIT(wait); 173*470decc6SDave Kleikamp int should_sleep = 1; 174*470decc6SDave Kleikamp 175*470decc6SDave Kleikamp prepare_to_wait(&journal->j_wait_commit, &wait, 176*470decc6SDave Kleikamp TASK_INTERRUPTIBLE); 177*470decc6SDave Kleikamp if (journal->j_commit_sequence != journal->j_commit_request) 178*470decc6SDave Kleikamp should_sleep = 0; 179*470decc6SDave Kleikamp transaction = journal->j_running_transaction; 180*470decc6SDave Kleikamp if (transaction && time_after_eq(jiffies, 181*470decc6SDave Kleikamp transaction->t_expires)) 182*470decc6SDave Kleikamp should_sleep = 0; 183*470decc6SDave Kleikamp if (journal->j_flags & JFS_UNMOUNT) 184*470decc6SDave Kleikamp should_sleep = 0; 185*470decc6SDave Kleikamp if (should_sleep) { 186*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 187*470decc6SDave Kleikamp schedule(); 188*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 189*470decc6SDave Kleikamp } 190*470decc6SDave Kleikamp finish_wait(&journal->j_wait_commit, &wait); 191*470decc6SDave Kleikamp } 192*470decc6SDave Kleikamp 193*470decc6SDave Kleikamp jbd_debug(1, "kjournald wakes\n"); 194*470decc6SDave Kleikamp 195*470decc6SDave Kleikamp /* 196*470decc6SDave Kleikamp * Were we woken up by a commit wakeup event? 197*470decc6SDave Kleikamp */ 198*470decc6SDave Kleikamp transaction = journal->j_running_transaction; 199*470decc6SDave Kleikamp if (transaction && time_after_eq(jiffies, transaction->t_expires)) { 200*470decc6SDave Kleikamp journal->j_commit_request = transaction->t_tid; 201*470decc6SDave Kleikamp jbd_debug(1, "woke because of timeout\n"); 202*470decc6SDave Kleikamp } 203*470decc6SDave Kleikamp goto loop; 204*470decc6SDave Kleikamp 205*470decc6SDave Kleikamp end_loop: 206*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 207*470decc6SDave Kleikamp del_timer_sync(&journal->j_commit_timer); 208*470decc6SDave Kleikamp journal->j_task = NULL; 209*470decc6SDave Kleikamp wake_up(&journal->j_wait_done_commit); 210*470decc6SDave Kleikamp jbd_debug(1, "Journal thread exiting.\n"); 211*470decc6SDave Kleikamp return 0; 212*470decc6SDave Kleikamp } 213*470decc6SDave Kleikamp 214*470decc6SDave Kleikamp static void journal_start_thread(journal_t *journal) 215*470decc6SDave Kleikamp { 216*470decc6SDave Kleikamp kthread_run(kjournald, journal, "kjournald"); 217*470decc6SDave Kleikamp wait_event(journal->j_wait_done_commit, journal->j_task != 0); 218*470decc6SDave Kleikamp } 219*470decc6SDave Kleikamp 220*470decc6SDave Kleikamp static void journal_kill_thread(journal_t *journal) 221*470decc6SDave Kleikamp { 222*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 223*470decc6SDave Kleikamp journal->j_flags |= JFS_UNMOUNT; 224*470decc6SDave Kleikamp 225*470decc6SDave Kleikamp while (journal->j_task) { 226*470decc6SDave Kleikamp wake_up(&journal->j_wait_commit); 227*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 228*470decc6SDave Kleikamp wait_event(journal->j_wait_done_commit, journal->j_task == 0); 229*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 230*470decc6SDave Kleikamp } 231*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 232*470decc6SDave Kleikamp } 233*470decc6SDave Kleikamp 234*470decc6SDave Kleikamp /* 235*470decc6SDave Kleikamp * journal_write_metadata_buffer: write a metadata buffer to the journal. 236*470decc6SDave Kleikamp * 237*470decc6SDave Kleikamp * Writes a metadata buffer to a given disk block. The actual IO is not 238*470decc6SDave Kleikamp * performed but a new buffer_head is constructed which labels the data 239*470decc6SDave Kleikamp * to be written with the correct destination disk block. 240*470decc6SDave Kleikamp * 241*470decc6SDave Kleikamp * Any magic-number escaping which needs to be done will cause a 242*470decc6SDave Kleikamp * copy-out here. If the buffer happens to start with the 243*470decc6SDave Kleikamp * JFS_MAGIC_NUMBER, then we can't write it to the log directly: the 244*470decc6SDave Kleikamp * magic number is only written to the log for descripter blocks. In 245*470decc6SDave Kleikamp * this case, we copy the data and replace the first word with 0, and we 246*470decc6SDave Kleikamp * return a result code which indicates that this buffer needs to be 247*470decc6SDave Kleikamp * marked as an escaped buffer in the corresponding log descriptor 248*470decc6SDave Kleikamp * block. The missing word can then be restored when the block is read 249*470decc6SDave Kleikamp * during recovery. 250*470decc6SDave Kleikamp * 251*470decc6SDave Kleikamp * If the source buffer has already been modified by a new transaction 252*470decc6SDave Kleikamp * since we took the last commit snapshot, we use the frozen copy of 253*470decc6SDave Kleikamp * that data for IO. If we end up using the existing buffer_head's data 254*470decc6SDave Kleikamp * for the write, then we *have* to lock the buffer to prevent anyone 255*470decc6SDave Kleikamp * else from using and possibly modifying it while the IO is in 256*470decc6SDave Kleikamp * progress. 257*470decc6SDave Kleikamp * 258*470decc6SDave Kleikamp * The function returns a pointer to the buffer_heads to be used for IO. 259*470decc6SDave Kleikamp * 260*470decc6SDave Kleikamp * We assume that the journal has already been locked in this function. 261*470decc6SDave Kleikamp * 262*470decc6SDave Kleikamp * Return value: 263*470decc6SDave Kleikamp * <0: Error 264*470decc6SDave Kleikamp * >=0: Finished OK 265*470decc6SDave Kleikamp * 266*470decc6SDave Kleikamp * On success: 267*470decc6SDave Kleikamp * Bit 0 set == escape performed on the data 268*470decc6SDave Kleikamp * Bit 1 set == buffer copy-out performed (kfree the data after IO) 269*470decc6SDave Kleikamp */ 270*470decc6SDave Kleikamp 271*470decc6SDave Kleikamp int journal_write_metadata_buffer(transaction_t *transaction, 272*470decc6SDave Kleikamp struct journal_head *jh_in, 273*470decc6SDave Kleikamp struct journal_head **jh_out, 274*470decc6SDave Kleikamp unsigned long blocknr) 275*470decc6SDave Kleikamp { 276*470decc6SDave Kleikamp int need_copy_out = 0; 277*470decc6SDave Kleikamp int done_copy_out = 0; 278*470decc6SDave Kleikamp int do_escape = 0; 279*470decc6SDave Kleikamp char *mapped_data; 280*470decc6SDave Kleikamp struct buffer_head *new_bh; 281*470decc6SDave Kleikamp struct journal_head *new_jh; 282*470decc6SDave Kleikamp struct page *new_page; 283*470decc6SDave Kleikamp unsigned int new_offset; 284*470decc6SDave Kleikamp struct buffer_head *bh_in = jh2bh(jh_in); 285*470decc6SDave Kleikamp 286*470decc6SDave Kleikamp /* 287*470decc6SDave Kleikamp * The buffer really shouldn't be locked: only the current committing 288*470decc6SDave Kleikamp * transaction is allowed to write it, so nobody else is allowed 289*470decc6SDave Kleikamp * to do any IO. 290*470decc6SDave Kleikamp * 291*470decc6SDave Kleikamp * akpm: except if we're journalling data, and write() output is 292*470decc6SDave Kleikamp * also part of a shared mapping, and another thread has 293*470decc6SDave Kleikamp * decided to launch a writepage() against this buffer. 294*470decc6SDave Kleikamp */ 295*470decc6SDave Kleikamp J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); 296*470decc6SDave Kleikamp 297*470decc6SDave Kleikamp new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); 298*470decc6SDave Kleikamp 299*470decc6SDave Kleikamp /* 300*470decc6SDave Kleikamp * If a new transaction has already done a buffer copy-out, then 301*470decc6SDave Kleikamp * we use that version of the data for the commit. 302*470decc6SDave Kleikamp */ 303*470decc6SDave Kleikamp jbd_lock_bh_state(bh_in); 304*470decc6SDave Kleikamp repeat: 305*470decc6SDave Kleikamp if (jh_in->b_frozen_data) { 306*470decc6SDave Kleikamp done_copy_out = 1; 307*470decc6SDave Kleikamp new_page = virt_to_page(jh_in->b_frozen_data); 308*470decc6SDave Kleikamp new_offset = offset_in_page(jh_in->b_frozen_data); 309*470decc6SDave Kleikamp } else { 310*470decc6SDave Kleikamp new_page = jh2bh(jh_in)->b_page; 311*470decc6SDave Kleikamp new_offset = offset_in_page(jh2bh(jh_in)->b_data); 312*470decc6SDave Kleikamp } 313*470decc6SDave Kleikamp 314*470decc6SDave Kleikamp mapped_data = kmap_atomic(new_page, KM_USER0); 315*470decc6SDave Kleikamp /* 316*470decc6SDave Kleikamp * Check for escaping 317*470decc6SDave Kleikamp */ 318*470decc6SDave Kleikamp if (*((__be32 *)(mapped_data + new_offset)) == 319*470decc6SDave Kleikamp cpu_to_be32(JFS_MAGIC_NUMBER)) { 320*470decc6SDave Kleikamp need_copy_out = 1; 321*470decc6SDave Kleikamp do_escape = 1; 322*470decc6SDave Kleikamp } 323*470decc6SDave Kleikamp kunmap_atomic(mapped_data, KM_USER0); 324*470decc6SDave Kleikamp 325*470decc6SDave Kleikamp /* 326*470decc6SDave Kleikamp * Do we need to do a data copy? 327*470decc6SDave Kleikamp */ 328*470decc6SDave Kleikamp if (need_copy_out && !done_copy_out) { 329*470decc6SDave Kleikamp char *tmp; 330*470decc6SDave Kleikamp 331*470decc6SDave Kleikamp jbd_unlock_bh_state(bh_in); 332*470decc6SDave Kleikamp tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS); 333*470decc6SDave Kleikamp jbd_lock_bh_state(bh_in); 334*470decc6SDave Kleikamp if (jh_in->b_frozen_data) { 335*470decc6SDave Kleikamp jbd_slab_free(tmp, bh_in->b_size); 336*470decc6SDave Kleikamp goto repeat; 337*470decc6SDave Kleikamp } 338*470decc6SDave Kleikamp 339*470decc6SDave Kleikamp jh_in->b_frozen_data = tmp; 340*470decc6SDave Kleikamp mapped_data = kmap_atomic(new_page, KM_USER0); 341*470decc6SDave Kleikamp memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); 342*470decc6SDave Kleikamp kunmap_atomic(mapped_data, KM_USER0); 343*470decc6SDave Kleikamp 344*470decc6SDave Kleikamp new_page = virt_to_page(tmp); 345*470decc6SDave Kleikamp new_offset = offset_in_page(tmp); 346*470decc6SDave Kleikamp done_copy_out = 1; 347*470decc6SDave Kleikamp } 348*470decc6SDave Kleikamp 349*470decc6SDave Kleikamp /* 350*470decc6SDave Kleikamp * Did we need to do an escaping? Now we've done all the 351*470decc6SDave Kleikamp * copying, we can finally do so. 352*470decc6SDave Kleikamp */ 353*470decc6SDave Kleikamp if (do_escape) { 354*470decc6SDave Kleikamp mapped_data = kmap_atomic(new_page, KM_USER0); 355*470decc6SDave Kleikamp *((unsigned int *)(mapped_data + new_offset)) = 0; 356*470decc6SDave Kleikamp kunmap_atomic(mapped_data, KM_USER0); 357*470decc6SDave Kleikamp } 358*470decc6SDave Kleikamp 359*470decc6SDave Kleikamp /* keep subsequent assertions sane */ 360*470decc6SDave Kleikamp new_bh->b_state = 0; 361*470decc6SDave Kleikamp init_buffer(new_bh, NULL, NULL); 362*470decc6SDave Kleikamp atomic_set(&new_bh->b_count, 1); 363*470decc6SDave Kleikamp jbd_unlock_bh_state(bh_in); 364*470decc6SDave Kleikamp 365*470decc6SDave Kleikamp new_jh = journal_add_journal_head(new_bh); /* This sleeps */ 366*470decc6SDave Kleikamp 367*470decc6SDave Kleikamp set_bh_page(new_bh, new_page, new_offset); 368*470decc6SDave Kleikamp new_jh->b_transaction = NULL; 369*470decc6SDave Kleikamp new_bh->b_size = jh2bh(jh_in)->b_size; 370*470decc6SDave Kleikamp new_bh->b_bdev = transaction->t_journal->j_dev; 371*470decc6SDave Kleikamp new_bh->b_blocknr = blocknr; 372*470decc6SDave Kleikamp set_buffer_mapped(new_bh); 373*470decc6SDave Kleikamp set_buffer_dirty(new_bh); 374*470decc6SDave Kleikamp 375*470decc6SDave Kleikamp *jh_out = new_jh; 376*470decc6SDave Kleikamp 377*470decc6SDave Kleikamp /* 378*470decc6SDave Kleikamp * The to-be-written buffer needs to get moved to the io queue, 379*470decc6SDave Kleikamp * and the original buffer whose contents we are shadowing or 380*470decc6SDave Kleikamp * copying is moved to the transaction's shadow queue. 381*470decc6SDave Kleikamp */ 382*470decc6SDave Kleikamp JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); 383*470decc6SDave Kleikamp journal_file_buffer(jh_in, transaction, BJ_Shadow); 384*470decc6SDave Kleikamp JBUFFER_TRACE(new_jh, "file as BJ_IO"); 385*470decc6SDave Kleikamp journal_file_buffer(new_jh, transaction, BJ_IO); 386*470decc6SDave Kleikamp 387*470decc6SDave Kleikamp return do_escape | (done_copy_out << 1); 388*470decc6SDave Kleikamp } 389*470decc6SDave Kleikamp 390*470decc6SDave Kleikamp /* 391*470decc6SDave Kleikamp * Allocation code for the journal file. Manage the space left in the 392*470decc6SDave Kleikamp * journal, so that we can begin checkpointing when appropriate. 393*470decc6SDave Kleikamp */ 394*470decc6SDave Kleikamp 395*470decc6SDave Kleikamp /* 396*470decc6SDave Kleikamp * __log_space_left: Return the number of free blocks left in the journal. 397*470decc6SDave Kleikamp * 398*470decc6SDave Kleikamp * Called with the journal already locked. 399*470decc6SDave Kleikamp * 400*470decc6SDave Kleikamp * Called under j_state_lock 401*470decc6SDave Kleikamp */ 402*470decc6SDave Kleikamp 403*470decc6SDave Kleikamp int __log_space_left(journal_t *journal) 404*470decc6SDave Kleikamp { 405*470decc6SDave Kleikamp int left = journal->j_free; 406*470decc6SDave Kleikamp 407*470decc6SDave Kleikamp assert_spin_locked(&journal->j_state_lock); 408*470decc6SDave Kleikamp 409*470decc6SDave Kleikamp /* 410*470decc6SDave Kleikamp * Be pessimistic here about the number of those free blocks which 411*470decc6SDave Kleikamp * might be required for log descriptor control blocks. 412*470decc6SDave Kleikamp */ 413*470decc6SDave Kleikamp 414*470decc6SDave Kleikamp #define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */ 415*470decc6SDave Kleikamp 416*470decc6SDave Kleikamp left -= MIN_LOG_RESERVED_BLOCKS; 417*470decc6SDave Kleikamp 418*470decc6SDave Kleikamp if (left <= 0) 419*470decc6SDave Kleikamp return 0; 420*470decc6SDave Kleikamp left -= (left >> 3); 421*470decc6SDave Kleikamp return left; 422*470decc6SDave Kleikamp } 423*470decc6SDave Kleikamp 424*470decc6SDave Kleikamp /* 425*470decc6SDave Kleikamp * Called under j_state_lock. Returns true if a transaction was started. 426*470decc6SDave Kleikamp */ 427*470decc6SDave Kleikamp int __log_start_commit(journal_t *journal, tid_t target) 428*470decc6SDave Kleikamp { 429*470decc6SDave Kleikamp /* 430*470decc6SDave Kleikamp * Are we already doing a recent enough commit? 431*470decc6SDave Kleikamp */ 432*470decc6SDave Kleikamp if (!tid_geq(journal->j_commit_request, target)) { 433*470decc6SDave Kleikamp /* 434*470decc6SDave Kleikamp * We want a new commit: OK, mark the request and wakup the 435*470decc6SDave Kleikamp * commit thread. We do _not_ do the commit ourselves. 436*470decc6SDave Kleikamp */ 437*470decc6SDave Kleikamp 438*470decc6SDave Kleikamp journal->j_commit_request = target; 439*470decc6SDave Kleikamp jbd_debug(1, "JBD: requesting commit %d/%d\n", 440*470decc6SDave Kleikamp journal->j_commit_request, 441*470decc6SDave Kleikamp journal->j_commit_sequence); 442*470decc6SDave Kleikamp wake_up(&journal->j_wait_commit); 443*470decc6SDave Kleikamp return 1; 444*470decc6SDave Kleikamp } 445*470decc6SDave Kleikamp return 0; 446*470decc6SDave Kleikamp } 447*470decc6SDave Kleikamp 448*470decc6SDave Kleikamp int log_start_commit(journal_t *journal, tid_t tid) 449*470decc6SDave Kleikamp { 450*470decc6SDave Kleikamp int ret; 451*470decc6SDave Kleikamp 452*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 453*470decc6SDave Kleikamp ret = __log_start_commit(journal, tid); 454*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 455*470decc6SDave Kleikamp return ret; 456*470decc6SDave Kleikamp } 457*470decc6SDave Kleikamp 458*470decc6SDave Kleikamp /* 459*470decc6SDave Kleikamp * Force and wait upon a commit if the calling process is not within 460*470decc6SDave Kleikamp * transaction. This is used for forcing out undo-protected data which contains 461*470decc6SDave Kleikamp * bitmaps, when the fs is running out of space. 462*470decc6SDave Kleikamp * 463*470decc6SDave Kleikamp * We can only force the running transaction if we don't have an active handle; 464*470decc6SDave Kleikamp * otherwise, we will deadlock. 465*470decc6SDave Kleikamp * 466*470decc6SDave Kleikamp * Returns true if a transaction was started. 467*470decc6SDave Kleikamp */ 468*470decc6SDave Kleikamp int journal_force_commit_nested(journal_t *journal) 469*470decc6SDave Kleikamp { 470*470decc6SDave Kleikamp transaction_t *transaction = NULL; 471*470decc6SDave Kleikamp tid_t tid; 472*470decc6SDave Kleikamp 473*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 474*470decc6SDave Kleikamp if (journal->j_running_transaction && !current->journal_info) { 475*470decc6SDave Kleikamp transaction = journal->j_running_transaction; 476*470decc6SDave Kleikamp __log_start_commit(journal, transaction->t_tid); 477*470decc6SDave Kleikamp } else if (journal->j_committing_transaction) 478*470decc6SDave Kleikamp transaction = journal->j_committing_transaction; 479*470decc6SDave Kleikamp 480*470decc6SDave Kleikamp if (!transaction) { 481*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 482*470decc6SDave Kleikamp return 0; /* Nothing to retry */ 483*470decc6SDave Kleikamp } 484*470decc6SDave Kleikamp 485*470decc6SDave Kleikamp tid = transaction->t_tid; 486*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 487*470decc6SDave Kleikamp log_wait_commit(journal, tid); 488*470decc6SDave Kleikamp return 1; 489*470decc6SDave Kleikamp } 490*470decc6SDave Kleikamp 491*470decc6SDave Kleikamp /* 492*470decc6SDave Kleikamp * Start a commit of the current running transaction (if any). Returns true 493*470decc6SDave Kleikamp * if a transaction was started, and fills its tid in at *ptid 494*470decc6SDave Kleikamp */ 495*470decc6SDave Kleikamp int journal_start_commit(journal_t *journal, tid_t *ptid) 496*470decc6SDave Kleikamp { 497*470decc6SDave Kleikamp int ret = 0; 498*470decc6SDave Kleikamp 499*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 500*470decc6SDave Kleikamp if (journal->j_running_transaction) { 501*470decc6SDave Kleikamp tid_t tid = journal->j_running_transaction->t_tid; 502*470decc6SDave Kleikamp 503*470decc6SDave Kleikamp ret = __log_start_commit(journal, tid); 504*470decc6SDave Kleikamp if (ret && ptid) 505*470decc6SDave Kleikamp *ptid = tid; 506*470decc6SDave Kleikamp } else if (journal->j_committing_transaction && ptid) { 507*470decc6SDave Kleikamp /* 508*470decc6SDave Kleikamp * If ext3_write_super() recently started a commit, then we 509*470decc6SDave Kleikamp * have to wait for completion of that transaction 510*470decc6SDave Kleikamp */ 511*470decc6SDave Kleikamp *ptid = journal->j_committing_transaction->t_tid; 512*470decc6SDave Kleikamp ret = 1; 513*470decc6SDave Kleikamp } 514*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 515*470decc6SDave Kleikamp return ret; 516*470decc6SDave Kleikamp } 517*470decc6SDave Kleikamp 518*470decc6SDave Kleikamp /* 519*470decc6SDave Kleikamp * Wait for a specified commit to complete. 520*470decc6SDave Kleikamp * The caller may not hold the journal lock. 521*470decc6SDave Kleikamp */ 522*470decc6SDave Kleikamp int log_wait_commit(journal_t *journal, tid_t tid) 523*470decc6SDave Kleikamp { 524*470decc6SDave Kleikamp int err = 0; 525*470decc6SDave Kleikamp 526*470decc6SDave Kleikamp #ifdef CONFIG_JBD_DEBUG 527*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 528*470decc6SDave Kleikamp if (!tid_geq(journal->j_commit_request, tid)) { 529*470decc6SDave Kleikamp printk(KERN_EMERG 530*470decc6SDave Kleikamp "%s: error: j_commit_request=%d, tid=%d\n", 531*470decc6SDave Kleikamp __FUNCTION__, journal->j_commit_request, tid); 532*470decc6SDave Kleikamp } 533*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 534*470decc6SDave Kleikamp #endif 535*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 536*470decc6SDave Kleikamp while (tid_gt(tid, journal->j_commit_sequence)) { 537*470decc6SDave Kleikamp jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", 538*470decc6SDave Kleikamp tid, journal->j_commit_sequence); 539*470decc6SDave Kleikamp wake_up(&journal->j_wait_commit); 540*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 541*470decc6SDave Kleikamp wait_event(journal->j_wait_done_commit, 542*470decc6SDave Kleikamp !tid_gt(tid, journal->j_commit_sequence)); 543*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 544*470decc6SDave Kleikamp } 545*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 546*470decc6SDave Kleikamp 547*470decc6SDave Kleikamp if (unlikely(is_journal_aborted(journal))) { 548*470decc6SDave Kleikamp printk(KERN_EMERG "journal commit I/O error\n"); 549*470decc6SDave Kleikamp err = -EIO; 550*470decc6SDave Kleikamp } 551*470decc6SDave Kleikamp return err; 552*470decc6SDave Kleikamp } 553*470decc6SDave Kleikamp 554*470decc6SDave Kleikamp /* 555*470decc6SDave Kleikamp * Log buffer allocation routines: 556*470decc6SDave Kleikamp */ 557*470decc6SDave Kleikamp 558*470decc6SDave Kleikamp int journal_next_log_block(journal_t *journal, unsigned long *retp) 559*470decc6SDave Kleikamp { 560*470decc6SDave Kleikamp unsigned long blocknr; 561*470decc6SDave Kleikamp 562*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 563*470decc6SDave Kleikamp J_ASSERT(journal->j_free > 1); 564*470decc6SDave Kleikamp 565*470decc6SDave Kleikamp blocknr = journal->j_head; 566*470decc6SDave Kleikamp journal->j_head++; 567*470decc6SDave Kleikamp journal->j_free--; 568*470decc6SDave Kleikamp if (journal->j_head == journal->j_last) 569*470decc6SDave Kleikamp journal->j_head = journal->j_first; 570*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 571*470decc6SDave Kleikamp return journal_bmap(journal, blocknr, retp); 572*470decc6SDave Kleikamp } 573*470decc6SDave Kleikamp 574*470decc6SDave Kleikamp /* 575*470decc6SDave Kleikamp * Conversion of logical to physical block numbers for the journal 576*470decc6SDave Kleikamp * 577*470decc6SDave Kleikamp * On external journals the journal blocks are identity-mapped, so 578*470decc6SDave Kleikamp * this is a no-op. If needed, we can use j_blk_offset - everything is 579*470decc6SDave Kleikamp * ready. 580*470decc6SDave Kleikamp */ 581*470decc6SDave Kleikamp int journal_bmap(journal_t *journal, unsigned long blocknr, 582*470decc6SDave Kleikamp unsigned long *retp) 583*470decc6SDave Kleikamp { 584*470decc6SDave Kleikamp int err = 0; 585*470decc6SDave Kleikamp unsigned long ret; 586*470decc6SDave Kleikamp 587*470decc6SDave Kleikamp if (journal->j_inode) { 588*470decc6SDave Kleikamp ret = bmap(journal->j_inode, blocknr); 589*470decc6SDave Kleikamp if (ret) 590*470decc6SDave Kleikamp *retp = ret; 591*470decc6SDave Kleikamp else { 592*470decc6SDave Kleikamp char b[BDEVNAME_SIZE]; 593*470decc6SDave Kleikamp 594*470decc6SDave Kleikamp printk(KERN_ALERT "%s: journal block not found " 595*470decc6SDave Kleikamp "at offset %lu on %s\n", 596*470decc6SDave Kleikamp __FUNCTION__, 597*470decc6SDave Kleikamp blocknr, 598*470decc6SDave Kleikamp bdevname(journal->j_dev, b)); 599*470decc6SDave Kleikamp err = -EIO; 600*470decc6SDave Kleikamp __journal_abort_soft(journal, err); 601*470decc6SDave Kleikamp } 602*470decc6SDave Kleikamp } else { 603*470decc6SDave Kleikamp *retp = blocknr; /* +journal->j_blk_offset */ 604*470decc6SDave Kleikamp } 605*470decc6SDave Kleikamp return err; 606*470decc6SDave Kleikamp } 607*470decc6SDave Kleikamp 608*470decc6SDave Kleikamp /* 609*470decc6SDave Kleikamp * We play buffer_head aliasing tricks to write data/metadata blocks to 610*470decc6SDave Kleikamp * the journal without copying their contents, but for journal 611*470decc6SDave Kleikamp * descriptor blocks we do need to generate bona fide buffers. 612*470decc6SDave Kleikamp * 613*470decc6SDave Kleikamp * After the caller of journal_get_descriptor_buffer() has finished modifying 614*470decc6SDave Kleikamp * the buffer's contents they really should run flush_dcache_page(bh->b_page). 615*470decc6SDave Kleikamp * But we don't bother doing that, so there will be coherency problems with 616*470decc6SDave Kleikamp * mmaps of blockdevs which hold live JBD-controlled filesystems. 617*470decc6SDave Kleikamp */ 618*470decc6SDave Kleikamp struct journal_head *journal_get_descriptor_buffer(journal_t *journal) 619*470decc6SDave Kleikamp { 620*470decc6SDave Kleikamp struct buffer_head *bh; 621*470decc6SDave Kleikamp unsigned long blocknr; 622*470decc6SDave Kleikamp int err; 623*470decc6SDave Kleikamp 624*470decc6SDave Kleikamp err = journal_next_log_block(journal, &blocknr); 625*470decc6SDave Kleikamp 626*470decc6SDave Kleikamp if (err) 627*470decc6SDave Kleikamp return NULL; 628*470decc6SDave Kleikamp 629*470decc6SDave Kleikamp bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 630*470decc6SDave Kleikamp lock_buffer(bh); 631*470decc6SDave Kleikamp memset(bh->b_data, 0, journal->j_blocksize); 632*470decc6SDave Kleikamp set_buffer_uptodate(bh); 633*470decc6SDave Kleikamp unlock_buffer(bh); 634*470decc6SDave Kleikamp BUFFER_TRACE(bh, "return this buffer"); 635*470decc6SDave Kleikamp return journal_add_journal_head(bh); 636*470decc6SDave Kleikamp } 637*470decc6SDave Kleikamp 638*470decc6SDave Kleikamp /* 639*470decc6SDave Kleikamp * Management for journal control blocks: functions to create and 640*470decc6SDave Kleikamp * destroy journal_t structures, and to initialise and read existing 641*470decc6SDave Kleikamp * journal blocks from disk. */ 642*470decc6SDave Kleikamp 643*470decc6SDave Kleikamp /* First: create and setup a journal_t object in memory. We initialise 644*470decc6SDave Kleikamp * very few fields yet: that has to wait until we have created the 645*470decc6SDave Kleikamp * journal structures from from scratch, or loaded them from disk. */ 646*470decc6SDave Kleikamp 647*470decc6SDave Kleikamp static journal_t * journal_init_common (void) 648*470decc6SDave Kleikamp { 649*470decc6SDave Kleikamp journal_t *journal; 650*470decc6SDave Kleikamp int err; 651*470decc6SDave Kleikamp 652*470decc6SDave Kleikamp journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL); 653*470decc6SDave Kleikamp if (!journal) 654*470decc6SDave Kleikamp goto fail; 655*470decc6SDave Kleikamp memset(journal, 0, sizeof(*journal)); 656*470decc6SDave Kleikamp 657*470decc6SDave Kleikamp init_waitqueue_head(&journal->j_wait_transaction_locked); 658*470decc6SDave Kleikamp init_waitqueue_head(&journal->j_wait_logspace); 659*470decc6SDave Kleikamp init_waitqueue_head(&journal->j_wait_done_commit); 660*470decc6SDave Kleikamp init_waitqueue_head(&journal->j_wait_checkpoint); 661*470decc6SDave Kleikamp init_waitqueue_head(&journal->j_wait_commit); 662*470decc6SDave Kleikamp init_waitqueue_head(&journal->j_wait_updates); 663*470decc6SDave Kleikamp mutex_init(&journal->j_barrier); 664*470decc6SDave Kleikamp mutex_init(&journal->j_checkpoint_mutex); 665*470decc6SDave Kleikamp spin_lock_init(&journal->j_revoke_lock); 666*470decc6SDave Kleikamp spin_lock_init(&journal->j_list_lock); 667*470decc6SDave Kleikamp spin_lock_init(&journal->j_state_lock); 668*470decc6SDave Kleikamp 669*470decc6SDave Kleikamp journal->j_commit_interval = (HZ * JBD_DEFAULT_MAX_COMMIT_AGE); 670*470decc6SDave Kleikamp 671*470decc6SDave Kleikamp /* The journal is marked for error until we succeed with recovery! */ 672*470decc6SDave Kleikamp journal->j_flags = JFS_ABORT; 673*470decc6SDave Kleikamp 674*470decc6SDave Kleikamp /* Set up a default-sized revoke table for the new mount. */ 675*470decc6SDave Kleikamp err = journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH); 676*470decc6SDave Kleikamp if (err) { 677*470decc6SDave Kleikamp kfree(journal); 678*470decc6SDave Kleikamp goto fail; 679*470decc6SDave Kleikamp } 680*470decc6SDave Kleikamp return journal; 681*470decc6SDave Kleikamp fail: 682*470decc6SDave Kleikamp return NULL; 683*470decc6SDave Kleikamp } 684*470decc6SDave Kleikamp 685*470decc6SDave Kleikamp /* journal_init_dev and journal_init_inode: 686*470decc6SDave Kleikamp * 687*470decc6SDave Kleikamp * Create a journal structure assigned some fixed set of disk blocks to 688*470decc6SDave Kleikamp * the journal. We don't actually touch those disk blocks yet, but we 689*470decc6SDave Kleikamp * need to set up all of the mapping information to tell the journaling 690*470decc6SDave Kleikamp * system where the journal blocks are. 691*470decc6SDave Kleikamp * 692*470decc6SDave Kleikamp */ 693*470decc6SDave Kleikamp 694*470decc6SDave Kleikamp /** 695*470decc6SDave Kleikamp * journal_t * journal_init_dev() - creates an initialises a journal structure 696*470decc6SDave Kleikamp * @bdev: Block device on which to create the journal 697*470decc6SDave Kleikamp * @fs_dev: Device which hold journalled filesystem for this journal. 698*470decc6SDave Kleikamp * @start: Block nr Start of journal. 699*470decc6SDave Kleikamp * @len: Length of the journal in blocks. 700*470decc6SDave Kleikamp * @blocksize: blocksize of journalling device 701*470decc6SDave Kleikamp * @returns: a newly created journal_t * 702*470decc6SDave Kleikamp * 703*470decc6SDave Kleikamp * journal_init_dev creates a journal which maps a fixed contiguous 704*470decc6SDave Kleikamp * range of blocks on an arbitrary block device. 705*470decc6SDave Kleikamp * 706*470decc6SDave Kleikamp */ 707*470decc6SDave Kleikamp journal_t * journal_init_dev(struct block_device *bdev, 708*470decc6SDave Kleikamp struct block_device *fs_dev, 709*470decc6SDave Kleikamp int start, int len, int blocksize) 710*470decc6SDave Kleikamp { 711*470decc6SDave Kleikamp journal_t *journal = journal_init_common(); 712*470decc6SDave Kleikamp struct buffer_head *bh; 713*470decc6SDave Kleikamp int n; 714*470decc6SDave Kleikamp 715*470decc6SDave Kleikamp if (!journal) 716*470decc6SDave Kleikamp return NULL; 717*470decc6SDave Kleikamp 718*470decc6SDave Kleikamp /* journal descriptor can store up to n blocks -bzzz */ 719*470decc6SDave Kleikamp journal->j_blocksize = blocksize; 720*470decc6SDave Kleikamp n = journal->j_blocksize / sizeof(journal_block_tag_t); 721*470decc6SDave Kleikamp journal->j_wbufsize = n; 722*470decc6SDave Kleikamp journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 723*470decc6SDave Kleikamp if (!journal->j_wbuf) { 724*470decc6SDave Kleikamp printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 725*470decc6SDave Kleikamp __FUNCTION__); 726*470decc6SDave Kleikamp kfree(journal); 727*470decc6SDave Kleikamp journal = NULL; 728*470decc6SDave Kleikamp } 729*470decc6SDave Kleikamp journal->j_dev = bdev; 730*470decc6SDave Kleikamp journal->j_fs_dev = fs_dev; 731*470decc6SDave Kleikamp journal->j_blk_offset = start; 732*470decc6SDave Kleikamp journal->j_maxlen = len; 733*470decc6SDave Kleikamp 734*470decc6SDave Kleikamp bh = __getblk(journal->j_dev, start, journal->j_blocksize); 735*470decc6SDave Kleikamp J_ASSERT(bh != NULL); 736*470decc6SDave Kleikamp journal->j_sb_buffer = bh; 737*470decc6SDave Kleikamp journal->j_superblock = (journal_superblock_t *)bh->b_data; 738*470decc6SDave Kleikamp 739*470decc6SDave Kleikamp return journal; 740*470decc6SDave Kleikamp } 741*470decc6SDave Kleikamp 742*470decc6SDave Kleikamp /** 743*470decc6SDave Kleikamp * journal_t * journal_init_inode () - creates a journal which maps to a inode. 744*470decc6SDave Kleikamp * @inode: An inode to create the journal in 745*470decc6SDave Kleikamp * 746*470decc6SDave Kleikamp * journal_init_inode creates a journal which maps an on-disk inode as 747*470decc6SDave Kleikamp * the journal. The inode must exist already, must support bmap() and 748*470decc6SDave Kleikamp * must have all data blocks preallocated. 749*470decc6SDave Kleikamp */ 750*470decc6SDave Kleikamp journal_t * journal_init_inode (struct inode *inode) 751*470decc6SDave Kleikamp { 752*470decc6SDave Kleikamp struct buffer_head *bh; 753*470decc6SDave Kleikamp journal_t *journal = journal_init_common(); 754*470decc6SDave Kleikamp int err; 755*470decc6SDave Kleikamp int n; 756*470decc6SDave Kleikamp unsigned long blocknr; 757*470decc6SDave Kleikamp 758*470decc6SDave Kleikamp if (!journal) 759*470decc6SDave Kleikamp return NULL; 760*470decc6SDave Kleikamp 761*470decc6SDave Kleikamp journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; 762*470decc6SDave Kleikamp journal->j_inode = inode; 763*470decc6SDave Kleikamp jbd_debug(1, 764*470decc6SDave Kleikamp "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", 765*470decc6SDave Kleikamp journal, inode->i_sb->s_id, inode->i_ino, 766*470decc6SDave Kleikamp (long long) inode->i_size, 767*470decc6SDave Kleikamp inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize); 768*470decc6SDave Kleikamp 769*470decc6SDave Kleikamp journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits; 770*470decc6SDave Kleikamp journal->j_blocksize = inode->i_sb->s_blocksize; 771*470decc6SDave Kleikamp 772*470decc6SDave Kleikamp /* journal descriptor can store up to n blocks -bzzz */ 773*470decc6SDave Kleikamp n = journal->j_blocksize / sizeof(journal_block_tag_t); 774*470decc6SDave Kleikamp journal->j_wbufsize = n; 775*470decc6SDave Kleikamp journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 776*470decc6SDave Kleikamp if (!journal->j_wbuf) { 777*470decc6SDave Kleikamp printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 778*470decc6SDave Kleikamp __FUNCTION__); 779*470decc6SDave Kleikamp kfree(journal); 780*470decc6SDave Kleikamp return NULL; 781*470decc6SDave Kleikamp } 782*470decc6SDave Kleikamp 783*470decc6SDave Kleikamp err = journal_bmap(journal, 0, &blocknr); 784*470decc6SDave Kleikamp /* If that failed, give up */ 785*470decc6SDave Kleikamp if (err) { 786*470decc6SDave Kleikamp printk(KERN_ERR "%s: Cannnot locate journal superblock\n", 787*470decc6SDave Kleikamp __FUNCTION__); 788*470decc6SDave Kleikamp kfree(journal); 789*470decc6SDave Kleikamp return NULL; 790*470decc6SDave Kleikamp } 791*470decc6SDave Kleikamp 792*470decc6SDave Kleikamp bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 793*470decc6SDave Kleikamp J_ASSERT(bh != NULL); 794*470decc6SDave Kleikamp journal->j_sb_buffer = bh; 795*470decc6SDave Kleikamp journal->j_superblock = (journal_superblock_t *)bh->b_data; 796*470decc6SDave Kleikamp 797*470decc6SDave Kleikamp return journal; 798*470decc6SDave Kleikamp } 799*470decc6SDave Kleikamp 800*470decc6SDave Kleikamp /* 801*470decc6SDave Kleikamp * If the journal init or create aborts, we need to mark the journal 802*470decc6SDave Kleikamp * superblock as being NULL to prevent the journal destroy from writing 803*470decc6SDave Kleikamp * back a bogus superblock. 804*470decc6SDave Kleikamp */ 805*470decc6SDave Kleikamp static void journal_fail_superblock (journal_t *journal) 806*470decc6SDave Kleikamp { 807*470decc6SDave Kleikamp struct buffer_head *bh = journal->j_sb_buffer; 808*470decc6SDave Kleikamp brelse(bh); 809*470decc6SDave Kleikamp journal->j_sb_buffer = NULL; 810*470decc6SDave Kleikamp } 811*470decc6SDave Kleikamp 812*470decc6SDave Kleikamp /* 813*470decc6SDave Kleikamp * Given a journal_t structure, initialise the various fields for 814*470decc6SDave Kleikamp * startup of a new journaling session. We use this both when creating 815*470decc6SDave Kleikamp * a journal, and after recovering an old journal to reset it for 816*470decc6SDave Kleikamp * subsequent use. 817*470decc6SDave Kleikamp */ 818*470decc6SDave Kleikamp 819*470decc6SDave Kleikamp static int journal_reset(journal_t *journal) 820*470decc6SDave Kleikamp { 821*470decc6SDave Kleikamp journal_superblock_t *sb = journal->j_superblock; 822*470decc6SDave Kleikamp unsigned long first, last; 823*470decc6SDave Kleikamp 824*470decc6SDave Kleikamp first = be32_to_cpu(sb->s_first); 825*470decc6SDave Kleikamp last = be32_to_cpu(sb->s_maxlen); 826*470decc6SDave Kleikamp 827*470decc6SDave Kleikamp journal->j_first = first; 828*470decc6SDave Kleikamp journal->j_last = last; 829*470decc6SDave Kleikamp 830*470decc6SDave Kleikamp journal->j_head = first; 831*470decc6SDave Kleikamp journal->j_tail = first; 832*470decc6SDave Kleikamp journal->j_free = last - first; 833*470decc6SDave Kleikamp 834*470decc6SDave Kleikamp journal->j_tail_sequence = journal->j_transaction_sequence; 835*470decc6SDave Kleikamp journal->j_commit_sequence = journal->j_transaction_sequence - 1; 836*470decc6SDave Kleikamp journal->j_commit_request = journal->j_commit_sequence; 837*470decc6SDave Kleikamp 838*470decc6SDave Kleikamp journal->j_max_transaction_buffers = journal->j_maxlen / 4; 839*470decc6SDave Kleikamp 840*470decc6SDave Kleikamp /* Add the dynamic fields and write it to disk. */ 841*470decc6SDave Kleikamp journal_update_superblock(journal, 1); 842*470decc6SDave Kleikamp journal_start_thread(journal); 843*470decc6SDave Kleikamp return 0; 844*470decc6SDave Kleikamp } 845*470decc6SDave Kleikamp 846*470decc6SDave Kleikamp /** 847*470decc6SDave Kleikamp * int journal_create() - Initialise the new journal file 848*470decc6SDave Kleikamp * @journal: Journal to create. This structure must have been initialised 849*470decc6SDave Kleikamp * 850*470decc6SDave Kleikamp * Given a journal_t structure which tells us which disk blocks we can 851*470decc6SDave Kleikamp * use, create a new journal superblock and initialise all of the 852*470decc6SDave Kleikamp * journal fields from scratch. 853*470decc6SDave Kleikamp **/ 854*470decc6SDave Kleikamp int journal_create(journal_t *journal) 855*470decc6SDave Kleikamp { 856*470decc6SDave Kleikamp unsigned long blocknr; 857*470decc6SDave Kleikamp struct buffer_head *bh; 858*470decc6SDave Kleikamp journal_superblock_t *sb; 859*470decc6SDave Kleikamp int i, err; 860*470decc6SDave Kleikamp 861*470decc6SDave Kleikamp if (journal->j_maxlen < JFS_MIN_JOURNAL_BLOCKS) { 862*470decc6SDave Kleikamp printk (KERN_ERR "Journal length (%d blocks) too short.\n", 863*470decc6SDave Kleikamp journal->j_maxlen); 864*470decc6SDave Kleikamp journal_fail_superblock(journal); 865*470decc6SDave Kleikamp return -EINVAL; 866*470decc6SDave Kleikamp } 867*470decc6SDave Kleikamp 868*470decc6SDave Kleikamp if (journal->j_inode == NULL) { 869*470decc6SDave Kleikamp /* 870*470decc6SDave Kleikamp * We don't know what block to start at! 871*470decc6SDave Kleikamp */ 872*470decc6SDave Kleikamp printk(KERN_EMERG 873*470decc6SDave Kleikamp "%s: creation of journal on external device!\n", 874*470decc6SDave Kleikamp __FUNCTION__); 875*470decc6SDave Kleikamp BUG(); 876*470decc6SDave Kleikamp } 877*470decc6SDave Kleikamp 878*470decc6SDave Kleikamp /* Zero out the entire journal on disk. We cannot afford to 879*470decc6SDave Kleikamp have any blocks on disk beginning with JFS_MAGIC_NUMBER. */ 880*470decc6SDave Kleikamp jbd_debug(1, "JBD: Zeroing out journal blocks...\n"); 881*470decc6SDave Kleikamp for (i = 0; i < journal->j_maxlen; i++) { 882*470decc6SDave Kleikamp err = journal_bmap(journal, i, &blocknr); 883*470decc6SDave Kleikamp if (err) 884*470decc6SDave Kleikamp return err; 885*470decc6SDave Kleikamp bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 886*470decc6SDave Kleikamp lock_buffer(bh); 887*470decc6SDave Kleikamp memset (bh->b_data, 0, journal->j_blocksize); 888*470decc6SDave Kleikamp BUFFER_TRACE(bh, "marking dirty"); 889*470decc6SDave Kleikamp mark_buffer_dirty(bh); 890*470decc6SDave Kleikamp BUFFER_TRACE(bh, "marking uptodate"); 891*470decc6SDave Kleikamp set_buffer_uptodate(bh); 892*470decc6SDave Kleikamp unlock_buffer(bh); 893*470decc6SDave Kleikamp __brelse(bh); 894*470decc6SDave Kleikamp } 895*470decc6SDave Kleikamp 896*470decc6SDave Kleikamp sync_blockdev(journal->j_dev); 897*470decc6SDave Kleikamp jbd_debug(1, "JBD: journal cleared.\n"); 898*470decc6SDave Kleikamp 899*470decc6SDave Kleikamp /* OK, fill in the initial static fields in the new superblock */ 900*470decc6SDave Kleikamp sb = journal->j_superblock; 901*470decc6SDave Kleikamp 902*470decc6SDave Kleikamp sb->s_header.h_magic = cpu_to_be32(JFS_MAGIC_NUMBER); 903*470decc6SDave Kleikamp sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2); 904*470decc6SDave Kleikamp 905*470decc6SDave Kleikamp sb->s_blocksize = cpu_to_be32(journal->j_blocksize); 906*470decc6SDave Kleikamp sb->s_maxlen = cpu_to_be32(journal->j_maxlen); 907*470decc6SDave Kleikamp sb->s_first = cpu_to_be32(1); 908*470decc6SDave Kleikamp 909*470decc6SDave Kleikamp journal->j_transaction_sequence = 1; 910*470decc6SDave Kleikamp 911*470decc6SDave Kleikamp journal->j_flags &= ~JFS_ABORT; 912*470decc6SDave Kleikamp journal->j_format_version = 2; 913*470decc6SDave Kleikamp 914*470decc6SDave Kleikamp return journal_reset(journal); 915*470decc6SDave Kleikamp } 916*470decc6SDave Kleikamp 917*470decc6SDave Kleikamp /** 918*470decc6SDave Kleikamp * void journal_update_superblock() - Update journal sb on disk. 919*470decc6SDave Kleikamp * @journal: The journal to update. 920*470decc6SDave Kleikamp * @wait: Set to '0' if you don't want to wait for IO completion. 921*470decc6SDave Kleikamp * 922*470decc6SDave Kleikamp * Update a journal's dynamic superblock fields and write it to disk, 923*470decc6SDave Kleikamp * optionally waiting for the IO to complete. 924*470decc6SDave Kleikamp */ 925*470decc6SDave Kleikamp void journal_update_superblock(journal_t *journal, int wait) 926*470decc6SDave Kleikamp { 927*470decc6SDave Kleikamp journal_superblock_t *sb = journal->j_superblock; 928*470decc6SDave Kleikamp struct buffer_head *bh = journal->j_sb_buffer; 929*470decc6SDave Kleikamp 930*470decc6SDave Kleikamp /* 931*470decc6SDave Kleikamp * As a special case, if the on-disk copy is already marked as needing 932*470decc6SDave Kleikamp * no recovery (s_start == 0) and there are no outstanding transactions 933*470decc6SDave Kleikamp * in the filesystem, then we can safely defer the superblock update 934*470decc6SDave Kleikamp * until the next commit by setting JFS_FLUSHED. This avoids 935*470decc6SDave Kleikamp * attempting a write to a potential-readonly device. 936*470decc6SDave Kleikamp */ 937*470decc6SDave Kleikamp if (sb->s_start == 0 && journal->j_tail_sequence == 938*470decc6SDave Kleikamp journal->j_transaction_sequence) { 939*470decc6SDave Kleikamp jbd_debug(1,"JBD: Skipping superblock update on recovered sb " 940*470decc6SDave Kleikamp "(start %ld, seq %d, errno %d)\n", 941*470decc6SDave Kleikamp journal->j_tail, journal->j_tail_sequence, 942*470decc6SDave Kleikamp journal->j_errno); 943*470decc6SDave Kleikamp goto out; 944*470decc6SDave Kleikamp } 945*470decc6SDave Kleikamp 946*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 947*470decc6SDave Kleikamp jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", 948*470decc6SDave Kleikamp journal->j_tail, journal->j_tail_sequence, journal->j_errno); 949*470decc6SDave Kleikamp 950*470decc6SDave Kleikamp sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 951*470decc6SDave Kleikamp sb->s_start = cpu_to_be32(journal->j_tail); 952*470decc6SDave Kleikamp sb->s_errno = cpu_to_be32(journal->j_errno); 953*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 954*470decc6SDave Kleikamp 955*470decc6SDave Kleikamp BUFFER_TRACE(bh, "marking dirty"); 956*470decc6SDave Kleikamp mark_buffer_dirty(bh); 957*470decc6SDave Kleikamp if (wait) 958*470decc6SDave Kleikamp sync_dirty_buffer(bh); 959*470decc6SDave Kleikamp else 960*470decc6SDave Kleikamp ll_rw_block(SWRITE, 1, &bh); 961*470decc6SDave Kleikamp 962*470decc6SDave Kleikamp out: 963*470decc6SDave Kleikamp /* If we have just flushed the log (by marking s_start==0), then 964*470decc6SDave Kleikamp * any future commit will have to be careful to update the 965*470decc6SDave Kleikamp * superblock again to re-record the true start of the log. */ 966*470decc6SDave Kleikamp 967*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 968*470decc6SDave Kleikamp if (sb->s_start) 969*470decc6SDave Kleikamp journal->j_flags &= ~JFS_FLUSHED; 970*470decc6SDave Kleikamp else 971*470decc6SDave Kleikamp journal->j_flags |= JFS_FLUSHED; 972*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 973*470decc6SDave Kleikamp } 974*470decc6SDave Kleikamp 975*470decc6SDave Kleikamp /* 976*470decc6SDave Kleikamp * Read the superblock for a given journal, performing initial 977*470decc6SDave Kleikamp * validation of the format. 978*470decc6SDave Kleikamp */ 979*470decc6SDave Kleikamp 980*470decc6SDave Kleikamp static int journal_get_superblock(journal_t *journal) 981*470decc6SDave Kleikamp { 982*470decc6SDave Kleikamp struct buffer_head *bh; 983*470decc6SDave Kleikamp journal_superblock_t *sb; 984*470decc6SDave Kleikamp int err = -EIO; 985*470decc6SDave Kleikamp 986*470decc6SDave Kleikamp bh = journal->j_sb_buffer; 987*470decc6SDave Kleikamp 988*470decc6SDave Kleikamp J_ASSERT(bh != NULL); 989*470decc6SDave Kleikamp if (!buffer_uptodate(bh)) { 990*470decc6SDave Kleikamp ll_rw_block(READ, 1, &bh); 991*470decc6SDave Kleikamp wait_on_buffer(bh); 992*470decc6SDave Kleikamp if (!buffer_uptodate(bh)) { 993*470decc6SDave Kleikamp printk (KERN_ERR 994*470decc6SDave Kleikamp "JBD: IO error reading journal superblock\n"); 995*470decc6SDave Kleikamp goto out; 996*470decc6SDave Kleikamp } 997*470decc6SDave Kleikamp } 998*470decc6SDave Kleikamp 999*470decc6SDave Kleikamp sb = journal->j_superblock; 1000*470decc6SDave Kleikamp 1001*470decc6SDave Kleikamp err = -EINVAL; 1002*470decc6SDave Kleikamp 1003*470decc6SDave Kleikamp if (sb->s_header.h_magic != cpu_to_be32(JFS_MAGIC_NUMBER) || 1004*470decc6SDave Kleikamp sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) { 1005*470decc6SDave Kleikamp printk(KERN_WARNING "JBD: no valid journal superblock found\n"); 1006*470decc6SDave Kleikamp goto out; 1007*470decc6SDave Kleikamp } 1008*470decc6SDave Kleikamp 1009*470decc6SDave Kleikamp switch(be32_to_cpu(sb->s_header.h_blocktype)) { 1010*470decc6SDave Kleikamp case JFS_SUPERBLOCK_V1: 1011*470decc6SDave Kleikamp journal->j_format_version = 1; 1012*470decc6SDave Kleikamp break; 1013*470decc6SDave Kleikamp case JFS_SUPERBLOCK_V2: 1014*470decc6SDave Kleikamp journal->j_format_version = 2; 1015*470decc6SDave Kleikamp break; 1016*470decc6SDave Kleikamp default: 1017*470decc6SDave Kleikamp printk(KERN_WARNING "JBD: unrecognised superblock format ID\n"); 1018*470decc6SDave Kleikamp goto out; 1019*470decc6SDave Kleikamp } 1020*470decc6SDave Kleikamp 1021*470decc6SDave Kleikamp if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen) 1022*470decc6SDave Kleikamp journal->j_maxlen = be32_to_cpu(sb->s_maxlen); 1023*470decc6SDave Kleikamp else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) { 1024*470decc6SDave Kleikamp printk (KERN_WARNING "JBD: journal file too short\n"); 1025*470decc6SDave Kleikamp goto out; 1026*470decc6SDave Kleikamp } 1027*470decc6SDave Kleikamp 1028*470decc6SDave Kleikamp return 0; 1029*470decc6SDave Kleikamp 1030*470decc6SDave Kleikamp out: 1031*470decc6SDave Kleikamp journal_fail_superblock(journal); 1032*470decc6SDave Kleikamp return err; 1033*470decc6SDave Kleikamp } 1034*470decc6SDave Kleikamp 1035*470decc6SDave Kleikamp /* 1036*470decc6SDave Kleikamp * Load the on-disk journal superblock and read the key fields into the 1037*470decc6SDave Kleikamp * journal_t. 1038*470decc6SDave Kleikamp */ 1039*470decc6SDave Kleikamp 1040*470decc6SDave Kleikamp static int load_superblock(journal_t *journal) 1041*470decc6SDave Kleikamp { 1042*470decc6SDave Kleikamp int err; 1043*470decc6SDave Kleikamp journal_superblock_t *sb; 1044*470decc6SDave Kleikamp 1045*470decc6SDave Kleikamp err = journal_get_superblock(journal); 1046*470decc6SDave Kleikamp if (err) 1047*470decc6SDave Kleikamp return err; 1048*470decc6SDave Kleikamp 1049*470decc6SDave Kleikamp sb = journal->j_superblock; 1050*470decc6SDave Kleikamp 1051*470decc6SDave Kleikamp journal->j_tail_sequence = be32_to_cpu(sb->s_sequence); 1052*470decc6SDave Kleikamp journal->j_tail = be32_to_cpu(sb->s_start); 1053*470decc6SDave Kleikamp journal->j_first = be32_to_cpu(sb->s_first); 1054*470decc6SDave Kleikamp journal->j_last = be32_to_cpu(sb->s_maxlen); 1055*470decc6SDave Kleikamp journal->j_errno = be32_to_cpu(sb->s_errno); 1056*470decc6SDave Kleikamp 1057*470decc6SDave Kleikamp return 0; 1058*470decc6SDave Kleikamp } 1059*470decc6SDave Kleikamp 1060*470decc6SDave Kleikamp 1061*470decc6SDave Kleikamp /** 1062*470decc6SDave Kleikamp * int journal_load() - Read journal from disk. 1063*470decc6SDave Kleikamp * @journal: Journal to act on. 1064*470decc6SDave Kleikamp * 1065*470decc6SDave Kleikamp * Given a journal_t structure which tells us which disk blocks contain 1066*470decc6SDave Kleikamp * a journal, read the journal from disk to initialise the in-memory 1067*470decc6SDave Kleikamp * structures. 1068*470decc6SDave Kleikamp */ 1069*470decc6SDave Kleikamp int journal_load(journal_t *journal) 1070*470decc6SDave Kleikamp { 1071*470decc6SDave Kleikamp int err; 1072*470decc6SDave Kleikamp journal_superblock_t *sb; 1073*470decc6SDave Kleikamp 1074*470decc6SDave Kleikamp err = load_superblock(journal); 1075*470decc6SDave Kleikamp if (err) 1076*470decc6SDave Kleikamp return err; 1077*470decc6SDave Kleikamp 1078*470decc6SDave Kleikamp sb = journal->j_superblock; 1079*470decc6SDave Kleikamp /* If this is a V2 superblock, then we have to check the 1080*470decc6SDave Kleikamp * features flags on it. */ 1081*470decc6SDave Kleikamp 1082*470decc6SDave Kleikamp if (journal->j_format_version >= 2) { 1083*470decc6SDave Kleikamp if ((sb->s_feature_ro_compat & 1084*470decc6SDave Kleikamp ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) || 1085*470decc6SDave Kleikamp (sb->s_feature_incompat & 1086*470decc6SDave Kleikamp ~cpu_to_be32(JFS_KNOWN_INCOMPAT_FEATURES))) { 1087*470decc6SDave Kleikamp printk (KERN_WARNING 1088*470decc6SDave Kleikamp "JBD: Unrecognised features on journal\n"); 1089*470decc6SDave Kleikamp return -EINVAL; 1090*470decc6SDave Kleikamp } 1091*470decc6SDave Kleikamp } 1092*470decc6SDave Kleikamp 1093*470decc6SDave Kleikamp /* 1094*470decc6SDave Kleikamp * Create a slab for this blocksize 1095*470decc6SDave Kleikamp */ 1096*470decc6SDave Kleikamp err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize)); 1097*470decc6SDave Kleikamp if (err) 1098*470decc6SDave Kleikamp return err; 1099*470decc6SDave Kleikamp 1100*470decc6SDave Kleikamp /* Let the recovery code check whether it needs to recover any 1101*470decc6SDave Kleikamp * data from the journal. */ 1102*470decc6SDave Kleikamp if (journal_recover(journal)) 1103*470decc6SDave Kleikamp goto recovery_error; 1104*470decc6SDave Kleikamp 1105*470decc6SDave Kleikamp /* OK, we've finished with the dynamic journal bits: 1106*470decc6SDave Kleikamp * reinitialise the dynamic contents of the superblock in memory 1107*470decc6SDave Kleikamp * and reset them on disk. */ 1108*470decc6SDave Kleikamp if (journal_reset(journal)) 1109*470decc6SDave Kleikamp goto recovery_error; 1110*470decc6SDave Kleikamp 1111*470decc6SDave Kleikamp journal->j_flags &= ~JFS_ABORT; 1112*470decc6SDave Kleikamp journal->j_flags |= JFS_LOADED; 1113*470decc6SDave Kleikamp return 0; 1114*470decc6SDave Kleikamp 1115*470decc6SDave Kleikamp recovery_error: 1116*470decc6SDave Kleikamp printk (KERN_WARNING "JBD: recovery failed\n"); 1117*470decc6SDave Kleikamp return -EIO; 1118*470decc6SDave Kleikamp } 1119*470decc6SDave Kleikamp 1120*470decc6SDave Kleikamp /** 1121*470decc6SDave Kleikamp * void journal_destroy() - Release a journal_t structure. 1122*470decc6SDave Kleikamp * @journal: Journal to act on. 1123*470decc6SDave Kleikamp * 1124*470decc6SDave Kleikamp * Release a journal_t structure once it is no longer in use by the 1125*470decc6SDave Kleikamp * journaled object. 1126*470decc6SDave Kleikamp */ 1127*470decc6SDave Kleikamp void journal_destroy(journal_t *journal) 1128*470decc6SDave Kleikamp { 1129*470decc6SDave Kleikamp /* Wait for the commit thread to wake up and die. */ 1130*470decc6SDave Kleikamp journal_kill_thread(journal); 1131*470decc6SDave Kleikamp 1132*470decc6SDave Kleikamp /* Force a final log commit */ 1133*470decc6SDave Kleikamp if (journal->j_running_transaction) 1134*470decc6SDave Kleikamp journal_commit_transaction(journal); 1135*470decc6SDave Kleikamp 1136*470decc6SDave Kleikamp /* Force any old transactions to disk */ 1137*470decc6SDave Kleikamp 1138*470decc6SDave Kleikamp /* Totally anal locking here... */ 1139*470decc6SDave Kleikamp spin_lock(&journal->j_list_lock); 1140*470decc6SDave Kleikamp while (journal->j_checkpoint_transactions != NULL) { 1141*470decc6SDave Kleikamp spin_unlock(&journal->j_list_lock); 1142*470decc6SDave Kleikamp log_do_checkpoint(journal); 1143*470decc6SDave Kleikamp spin_lock(&journal->j_list_lock); 1144*470decc6SDave Kleikamp } 1145*470decc6SDave Kleikamp 1146*470decc6SDave Kleikamp J_ASSERT(journal->j_running_transaction == NULL); 1147*470decc6SDave Kleikamp J_ASSERT(journal->j_committing_transaction == NULL); 1148*470decc6SDave Kleikamp J_ASSERT(journal->j_checkpoint_transactions == NULL); 1149*470decc6SDave Kleikamp spin_unlock(&journal->j_list_lock); 1150*470decc6SDave Kleikamp 1151*470decc6SDave Kleikamp /* We can now mark the journal as empty. */ 1152*470decc6SDave Kleikamp journal->j_tail = 0; 1153*470decc6SDave Kleikamp journal->j_tail_sequence = ++journal->j_transaction_sequence; 1154*470decc6SDave Kleikamp if (journal->j_sb_buffer) { 1155*470decc6SDave Kleikamp journal_update_superblock(journal, 1); 1156*470decc6SDave Kleikamp brelse(journal->j_sb_buffer); 1157*470decc6SDave Kleikamp } 1158*470decc6SDave Kleikamp 1159*470decc6SDave Kleikamp if (journal->j_inode) 1160*470decc6SDave Kleikamp iput(journal->j_inode); 1161*470decc6SDave Kleikamp if (journal->j_revoke) 1162*470decc6SDave Kleikamp journal_destroy_revoke(journal); 1163*470decc6SDave Kleikamp kfree(journal->j_wbuf); 1164*470decc6SDave Kleikamp kfree(journal); 1165*470decc6SDave Kleikamp } 1166*470decc6SDave Kleikamp 1167*470decc6SDave Kleikamp 1168*470decc6SDave Kleikamp /** 1169*470decc6SDave Kleikamp *int journal_check_used_features () - Check if features specified are used. 1170*470decc6SDave Kleikamp * @journal: Journal to check. 1171*470decc6SDave Kleikamp * @compat: bitmask of compatible features 1172*470decc6SDave Kleikamp * @ro: bitmask of features that force read-only mount 1173*470decc6SDave Kleikamp * @incompat: bitmask of incompatible features 1174*470decc6SDave Kleikamp * 1175*470decc6SDave Kleikamp * Check whether the journal uses all of a given set of 1176*470decc6SDave Kleikamp * features. Return true (non-zero) if it does. 1177*470decc6SDave Kleikamp **/ 1178*470decc6SDave Kleikamp 1179*470decc6SDave Kleikamp int journal_check_used_features (journal_t *journal, unsigned long compat, 1180*470decc6SDave Kleikamp unsigned long ro, unsigned long incompat) 1181*470decc6SDave Kleikamp { 1182*470decc6SDave Kleikamp journal_superblock_t *sb; 1183*470decc6SDave Kleikamp 1184*470decc6SDave Kleikamp if (!compat && !ro && !incompat) 1185*470decc6SDave Kleikamp return 1; 1186*470decc6SDave Kleikamp if (journal->j_format_version == 1) 1187*470decc6SDave Kleikamp return 0; 1188*470decc6SDave Kleikamp 1189*470decc6SDave Kleikamp sb = journal->j_superblock; 1190*470decc6SDave Kleikamp 1191*470decc6SDave Kleikamp if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) && 1192*470decc6SDave Kleikamp ((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) && 1193*470decc6SDave Kleikamp ((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat)) 1194*470decc6SDave Kleikamp return 1; 1195*470decc6SDave Kleikamp 1196*470decc6SDave Kleikamp return 0; 1197*470decc6SDave Kleikamp } 1198*470decc6SDave Kleikamp 1199*470decc6SDave Kleikamp /** 1200*470decc6SDave Kleikamp * int journal_check_available_features() - Check feature set in journalling layer 1201*470decc6SDave Kleikamp * @journal: Journal to check. 1202*470decc6SDave Kleikamp * @compat: bitmask of compatible features 1203*470decc6SDave Kleikamp * @ro: bitmask of features that force read-only mount 1204*470decc6SDave Kleikamp * @incompat: bitmask of incompatible features 1205*470decc6SDave Kleikamp * 1206*470decc6SDave Kleikamp * Check whether the journaling code supports the use of 1207*470decc6SDave Kleikamp * all of a given set of features on this journal. Return true 1208*470decc6SDave Kleikamp * (non-zero) if it can. */ 1209*470decc6SDave Kleikamp 1210*470decc6SDave Kleikamp int journal_check_available_features (journal_t *journal, unsigned long compat, 1211*470decc6SDave Kleikamp unsigned long ro, unsigned long incompat) 1212*470decc6SDave Kleikamp { 1213*470decc6SDave Kleikamp journal_superblock_t *sb; 1214*470decc6SDave Kleikamp 1215*470decc6SDave Kleikamp if (!compat && !ro && !incompat) 1216*470decc6SDave Kleikamp return 1; 1217*470decc6SDave Kleikamp 1218*470decc6SDave Kleikamp sb = journal->j_superblock; 1219*470decc6SDave Kleikamp 1220*470decc6SDave Kleikamp /* We can support any known requested features iff the 1221*470decc6SDave Kleikamp * superblock is in version 2. Otherwise we fail to support any 1222*470decc6SDave Kleikamp * extended sb features. */ 1223*470decc6SDave Kleikamp 1224*470decc6SDave Kleikamp if (journal->j_format_version != 2) 1225*470decc6SDave Kleikamp return 0; 1226*470decc6SDave Kleikamp 1227*470decc6SDave Kleikamp if ((compat & JFS_KNOWN_COMPAT_FEATURES) == compat && 1228*470decc6SDave Kleikamp (ro & JFS_KNOWN_ROCOMPAT_FEATURES) == ro && 1229*470decc6SDave Kleikamp (incompat & JFS_KNOWN_INCOMPAT_FEATURES) == incompat) 1230*470decc6SDave Kleikamp return 1; 1231*470decc6SDave Kleikamp 1232*470decc6SDave Kleikamp return 0; 1233*470decc6SDave Kleikamp } 1234*470decc6SDave Kleikamp 1235*470decc6SDave Kleikamp /** 1236*470decc6SDave Kleikamp * int journal_set_features () - Mark a given journal feature in the superblock 1237*470decc6SDave Kleikamp * @journal: Journal to act on. 1238*470decc6SDave Kleikamp * @compat: bitmask of compatible features 1239*470decc6SDave Kleikamp * @ro: bitmask of features that force read-only mount 1240*470decc6SDave Kleikamp * @incompat: bitmask of incompatible features 1241*470decc6SDave Kleikamp * 1242*470decc6SDave Kleikamp * Mark a given journal feature as present on the 1243*470decc6SDave Kleikamp * superblock. Returns true if the requested features could be set. 1244*470decc6SDave Kleikamp * 1245*470decc6SDave Kleikamp */ 1246*470decc6SDave Kleikamp 1247*470decc6SDave Kleikamp int journal_set_features (journal_t *journal, unsigned long compat, 1248*470decc6SDave Kleikamp unsigned long ro, unsigned long incompat) 1249*470decc6SDave Kleikamp { 1250*470decc6SDave Kleikamp journal_superblock_t *sb; 1251*470decc6SDave Kleikamp 1252*470decc6SDave Kleikamp if (journal_check_used_features(journal, compat, ro, incompat)) 1253*470decc6SDave Kleikamp return 1; 1254*470decc6SDave Kleikamp 1255*470decc6SDave Kleikamp if (!journal_check_available_features(journal, compat, ro, incompat)) 1256*470decc6SDave Kleikamp return 0; 1257*470decc6SDave Kleikamp 1258*470decc6SDave Kleikamp jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n", 1259*470decc6SDave Kleikamp compat, ro, incompat); 1260*470decc6SDave Kleikamp 1261*470decc6SDave Kleikamp sb = journal->j_superblock; 1262*470decc6SDave Kleikamp 1263*470decc6SDave Kleikamp sb->s_feature_compat |= cpu_to_be32(compat); 1264*470decc6SDave Kleikamp sb->s_feature_ro_compat |= cpu_to_be32(ro); 1265*470decc6SDave Kleikamp sb->s_feature_incompat |= cpu_to_be32(incompat); 1266*470decc6SDave Kleikamp 1267*470decc6SDave Kleikamp return 1; 1268*470decc6SDave Kleikamp } 1269*470decc6SDave Kleikamp 1270*470decc6SDave Kleikamp 1271*470decc6SDave Kleikamp /** 1272*470decc6SDave Kleikamp * int journal_update_format () - Update on-disk journal structure. 1273*470decc6SDave Kleikamp * @journal: Journal to act on. 1274*470decc6SDave Kleikamp * 1275*470decc6SDave Kleikamp * Given an initialised but unloaded journal struct, poke about in the 1276*470decc6SDave Kleikamp * on-disk structure to update it to the most recent supported version. 1277*470decc6SDave Kleikamp */ 1278*470decc6SDave Kleikamp int journal_update_format (journal_t *journal) 1279*470decc6SDave Kleikamp { 1280*470decc6SDave Kleikamp journal_superblock_t *sb; 1281*470decc6SDave Kleikamp int err; 1282*470decc6SDave Kleikamp 1283*470decc6SDave Kleikamp err = journal_get_superblock(journal); 1284*470decc6SDave Kleikamp if (err) 1285*470decc6SDave Kleikamp return err; 1286*470decc6SDave Kleikamp 1287*470decc6SDave Kleikamp sb = journal->j_superblock; 1288*470decc6SDave Kleikamp 1289*470decc6SDave Kleikamp switch (be32_to_cpu(sb->s_header.h_blocktype)) { 1290*470decc6SDave Kleikamp case JFS_SUPERBLOCK_V2: 1291*470decc6SDave Kleikamp return 0; 1292*470decc6SDave Kleikamp case JFS_SUPERBLOCK_V1: 1293*470decc6SDave Kleikamp return journal_convert_superblock_v1(journal, sb); 1294*470decc6SDave Kleikamp default: 1295*470decc6SDave Kleikamp break; 1296*470decc6SDave Kleikamp } 1297*470decc6SDave Kleikamp return -EINVAL; 1298*470decc6SDave Kleikamp } 1299*470decc6SDave Kleikamp 1300*470decc6SDave Kleikamp static int journal_convert_superblock_v1(journal_t *journal, 1301*470decc6SDave Kleikamp journal_superblock_t *sb) 1302*470decc6SDave Kleikamp { 1303*470decc6SDave Kleikamp int offset, blocksize; 1304*470decc6SDave Kleikamp struct buffer_head *bh; 1305*470decc6SDave Kleikamp 1306*470decc6SDave Kleikamp printk(KERN_WARNING 1307*470decc6SDave Kleikamp "JBD: Converting superblock from version 1 to 2.\n"); 1308*470decc6SDave Kleikamp 1309*470decc6SDave Kleikamp /* Pre-initialise new fields to zero */ 1310*470decc6SDave Kleikamp offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb); 1311*470decc6SDave Kleikamp blocksize = be32_to_cpu(sb->s_blocksize); 1312*470decc6SDave Kleikamp memset(&sb->s_feature_compat, 0, blocksize-offset); 1313*470decc6SDave Kleikamp 1314*470decc6SDave Kleikamp sb->s_nr_users = cpu_to_be32(1); 1315*470decc6SDave Kleikamp sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2); 1316*470decc6SDave Kleikamp journal->j_format_version = 2; 1317*470decc6SDave Kleikamp 1318*470decc6SDave Kleikamp bh = journal->j_sb_buffer; 1319*470decc6SDave Kleikamp BUFFER_TRACE(bh, "marking dirty"); 1320*470decc6SDave Kleikamp mark_buffer_dirty(bh); 1321*470decc6SDave Kleikamp sync_dirty_buffer(bh); 1322*470decc6SDave Kleikamp return 0; 1323*470decc6SDave Kleikamp } 1324*470decc6SDave Kleikamp 1325*470decc6SDave Kleikamp 1326*470decc6SDave Kleikamp /** 1327*470decc6SDave Kleikamp * int journal_flush () - Flush journal 1328*470decc6SDave Kleikamp * @journal: Journal to act on. 1329*470decc6SDave Kleikamp * 1330*470decc6SDave Kleikamp * Flush all data for a given journal to disk and empty the journal. 1331*470decc6SDave Kleikamp * Filesystems can use this when remounting readonly to ensure that 1332*470decc6SDave Kleikamp * recovery does not need to happen on remount. 1333*470decc6SDave Kleikamp */ 1334*470decc6SDave Kleikamp 1335*470decc6SDave Kleikamp int journal_flush(journal_t *journal) 1336*470decc6SDave Kleikamp { 1337*470decc6SDave Kleikamp int err = 0; 1338*470decc6SDave Kleikamp transaction_t *transaction = NULL; 1339*470decc6SDave Kleikamp unsigned long old_tail; 1340*470decc6SDave Kleikamp 1341*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 1342*470decc6SDave Kleikamp 1343*470decc6SDave Kleikamp /* Force everything buffered to the log... */ 1344*470decc6SDave Kleikamp if (journal->j_running_transaction) { 1345*470decc6SDave Kleikamp transaction = journal->j_running_transaction; 1346*470decc6SDave Kleikamp __log_start_commit(journal, transaction->t_tid); 1347*470decc6SDave Kleikamp } else if (journal->j_committing_transaction) 1348*470decc6SDave Kleikamp transaction = journal->j_committing_transaction; 1349*470decc6SDave Kleikamp 1350*470decc6SDave Kleikamp /* Wait for the log commit to complete... */ 1351*470decc6SDave Kleikamp if (transaction) { 1352*470decc6SDave Kleikamp tid_t tid = transaction->t_tid; 1353*470decc6SDave Kleikamp 1354*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 1355*470decc6SDave Kleikamp log_wait_commit(journal, tid); 1356*470decc6SDave Kleikamp } else { 1357*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 1358*470decc6SDave Kleikamp } 1359*470decc6SDave Kleikamp 1360*470decc6SDave Kleikamp /* ...and flush everything in the log out to disk. */ 1361*470decc6SDave Kleikamp spin_lock(&journal->j_list_lock); 1362*470decc6SDave Kleikamp while (!err && journal->j_checkpoint_transactions != NULL) { 1363*470decc6SDave Kleikamp spin_unlock(&journal->j_list_lock); 1364*470decc6SDave Kleikamp err = log_do_checkpoint(journal); 1365*470decc6SDave Kleikamp spin_lock(&journal->j_list_lock); 1366*470decc6SDave Kleikamp } 1367*470decc6SDave Kleikamp spin_unlock(&journal->j_list_lock); 1368*470decc6SDave Kleikamp cleanup_journal_tail(journal); 1369*470decc6SDave Kleikamp 1370*470decc6SDave Kleikamp /* Finally, mark the journal as really needing no recovery. 1371*470decc6SDave Kleikamp * This sets s_start==0 in the underlying superblock, which is 1372*470decc6SDave Kleikamp * the magic code for a fully-recovered superblock. Any future 1373*470decc6SDave Kleikamp * commits of data to the journal will restore the current 1374*470decc6SDave Kleikamp * s_start value. */ 1375*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 1376*470decc6SDave Kleikamp old_tail = journal->j_tail; 1377*470decc6SDave Kleikamp journal->j_tail = 0; 1378*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 1379*470decc6SDave Kleikamp journal_update_superblock(journal, 1); 1380*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 1381*470decc6SDave Kleikamp journal->j_tail = old_tail; 1382*470decc6SDave Kleikamp 1383*470decc6SDave Kleikamp J_ASSERT(!journal->j_running_transaction); 1384*470decc6SDave Kleikamp J_ASSERT(!journal->j_committing_transaction); 1385*470decc6SDave Kleikamp J_ASSERT(!journal->j_checkpoint_transactions); 1386*470decc6SDave Kleikamp J_ASSERT(journal->j_head == journal->j_tail); 1387*470decc6SDave Kleikamp J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); 1388*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 1389*470decc6SDave Kleikamp return err; 1390*470decc6SDave Kleikamp } 1391*470decc6SDave Kleikamp 1392*470decc6SDave Kleikamp /** 1393*470decc6SDave Kleikamp * int journal_wipe() - Wipe journal contents 1394*470decc6SDave Kleikamp * @journal: Journal to act on. 1395*470decc6SDave Kleikamp * @write: flag (see below) 1396*470decc6SDave Kleikamp * 1397*470decc6SDave Kleikamp * Wipe out all of the contents of a journal, safely. This will produce 1398*470decc6SDave Kleikamp * a warning if the journal contains any valid recovery information. 1399*470decc6SDave Kleikamp * Must be called between journal_init_*() and journal_load(). 1400*470decc6SDave Kleikamp * 1401*470decc6SDave Kleikamp * If 'write' is non-zero, then we wipe out the journal on disk; otherwise 1402*470decc6SDave Kleikamp * we merely suppress recovery. 1403*470decc6SDave Kleikamp */ 1404*470decc6SDave Kleikamp 1405*470decc6SDave Kleikamp int journal_wipe(journal_t *journal, int write) 1406*470decc6SDave Kleikamp { 1407*470decc6SDave Kleikamp journal_superblock_t *sb; 1408*470decc6SDave Kleikamp int err = 0; 1409*470decc6SDave Kleikamp 1410*470decc6SDave Kleikamp J_ASSERT (!(journal->j_flags & JFS_LOADED)); 1411*470decc6SDave Kleikamp 1412*470decc6SDave Kleikamp err = load_superblock(journal); 1413*470decc6SDave Kleikamp if (err) 1414*470decc6SDave Kleikamp return err; 1415*470decc6SDave Kleikamp 1416*470decc6SDave Kleikamp sb = journal->j_superblock; 1417*470decc6SDave Kleikamp 1418*470decc6SDave Kleikamp if (!journal->j_tail) 1419*470decc6SDave Kleikamp goto no_recovery; 1420*470decc6SDave Kleikamp 1421*470decc6SDave Kleikamp printk (KERN_WARNING "JBD: %s recovery information on journal\n", 1422*470decc6SDave Kleikamp write ? "Clearing" : "Ignoring"); 1423*470decc6SDave Kleikamp 1424*470decc6SDave Kleikamp err = journal_skip_recovery(journal); 1425*470decc6SDave Kleikamp if (write) 1426*470decc6SDave Kleikamp journal_update_superblock(journal, 1); 1427*470decc6SDave Kleikamp 1428*470decc6SDave Kleikamp no_recovery: 1429*470decc6SDave Kleikamp return err; 1430*470decc6SDave Kleikamp } 1431*470decc6SDave Kleikamp 1432*470decc6SDave Kleikamp /* 1433*470decc6SDave Kleikamp * journal_dev_name: format a character string to describe on what 1434*470decc6SDave Kleikamp * device this journal is present. 1435*470decc6SDave Kleikamp */ 1436*470decc6SDave Kleikamp 1437*470decc6SDave Kleikamp static const char *journal_dev_name(journal_t *journal, char *buffer) 1438*470decc6SDave Kleikamp { 1439*470decc6SDave Kleikamp struct block_device *bdev; 1440*470decc6SDave Kleikamp 1441*470decc6SDave Kleikamp if (journal->j_inode) 1442*470decc6SDave Kleikamp bdev = journal->j_inode->i_sb->s_bdev; 1443*470decc6SDave Kleikamp else 1444*470decc6SDave Kleikamp bdev = journal->j_dev; 1445*470decc6SDave Kleikamp 1446*470decc6SDave Kleikamp return bdevname(bdev, buffer); 1447*470decc6SDave Kleikamp } 1448*470decc6SDave Kleikamp 1449*470decc6SDave Kleikamp /* 1450*470decc6SDave Kleikamp * Journal abort has very specific semantics, which we describe 1451*470decc6SDave Kleikamp * for journal abort. 1452*470decc6SDave Kleikamp * 1453*470decc6SDave Kleikamp * Two internal function, which provide abort to te jbd layer 1454*470decc6SDave Kleikamp * itself are here. 1455*470decc6SDave Kleikamp */ 1456*470decc6SDave Kleikamp 1457*470decc6SDave Kleikamp /* 1458*470decc6SDave Kleikamp * Quick version for internal journal use (doesn't lock the journal). 1459*470decc6SDave Kleikamp * Aborts hard --- we mark the abort as occurred, but do _nothing_ else, 1460*470decc6SDave Kleikamp * and don't attempt to make any other journal updates. 1461*470decc6SDave Kleikamp */ 1462*470decc6SDave Kleikamp void __journal_abort_hard(journal_t *journal) 1463*470decc6SDave Kleikamp { 1464*470decc6SDave Kleikamp transaction_t *transaction; 1465*470decc6SDave Kleikamp char b[BDEVNAME_SIZE]; 1466*470decc6SDave Kleikamp 1467*470decc6SDave Kleikamp if (journal->j_flags & JFS_ABORT) 1468*470decc6SDave Kleikamp return; 1469*470decc6SDave Kleikamp 1470*470decc6SDave Kleikamp printk(KERN_ERR "Aborting journal on device %s.\n", 1471*470decc6SDave Kleikamp journal_dev_name(journal, b)); 1472*470decc6SDave Kleikamp 1473*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 1474*470decc6SDave Kleikamp journal->j_flags |= JFS_ABORT; 1475*470decc6SDave Kleikamp transaction = journal->j_running_transaction; 1476*470decc6SDave Kleikamp if (transaction) 1477*470decc6SDave Kleikamp __log_start_commit(journal, transaction->t_tid); 1478*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 1479*470decc6SDave Kleikamp } 1480*470decc6SDave Kleikamp 1481*470decc6SDave Kleikamp /* Soft abort: record the abort error status in the journal superblock, 1482*470decc6SDave Kleikamp * but don't do any other IO. */ 1483*470decc6SDave Kleikamp static void __journal_abort_soft (journal_t *journal, int errno) 1484*470decc6SDave Kleikamp { 1485*470decc6SDave Kleikamp if (journal->j_flags & JFS_ABORT) 1486*470decc6SDave Kleikamp return; 1487*470decc6SDave Kleikamp 1488*470decc6SDave Kleikamp if (!journal->j_errno) 1489*470decc6SDave Kleikamp journal->j_errno = errno; 1490*470decc6SDave Kleikamp 1491*470decc6SDave Kleikamp __journal_abort_hard(journal); 1492*470decc6SDave Kleikamp 1493*470decc6SDave Kleikamp if (errno) 1494*470decc6SDave Kleikamp journal_update_superblock(journal, 1); 1495*470decc6SDave Kleikamp } 1496*470decc6SDave Kleikamp 1497*470decc6SDave Kleikamp /** 1498*470decc6SDave Kleikamp * void journal_abort () - Shutdown the journal immediately. 1499*470decc6SDave Kleikamp * @journal: the journal to shutdown. 1500*470decc6SDave Kleikamp * @errno: an error number to record in the journal indicating 1501*470decc6SDave Kleikamp * the reason for the shutdown. 1502*470decc6SDave Kleikamp * 1503*470decc6SDave Kleikamp * Perform a complete, immediate shutdown of the ENTIRE 1504*470decc6SDave Kleikamp * journal (not of a single transaction). This operation cannot be 1505*470decc6SDave Kleikamp * undone without closing and reopening the journal. 1506*470decc6SDave Kleikamp * 1507*470decc6SDave Kleikamp * The journal_abort function is intended to support higher level error 1508*470decc6SDave Kleikamp * recovery mechanisms such as the ext2/ext3 remount-readonly error 1509*470decc6SDave Kleikamp * mode. 1510*470decc6SDave Kleikamp * 1511*470decc6SDave Kleikamp * Journal abort has very specific semantics. Any existing dirty, 1512*470decc6SDave Kleikamp * unjournaled buffers in the main filesystem will still be written to 1513*470decc6SDave Kleikamp * disk by bdflush, but the journaling mechanism will be suspended 1514*470decc6SDave Kleikamp * immediately and no further transaction commits will be honoured. 1515*470decc6SDave Kleikamp * 1516*470decc6SDave Kleikamp * Any dirty, journaled buffers will be written back to disk without 1517*470decc6SDave Kleikamp * hitting the journal. Atomicity cannot be guaranteed on an aborted 1518*470decc6SDave Kleikamp * filesystem, but we _do_ attempt to leave as much data as possible 1519*470decc6SDave Kleikamp * behind for fsck to use for cleanup. 1520*470decc6SDave Kleikamp * 1521*470decc6SDave Kleikamp * Any attempt to get a new transaction handle on a journal which is in 1522*470decc6SDave Kleikamp * ABORT state will just result in an -EROFS error return. A 1523*470decc6SDave Kleikamp * journal_stop on an existing handle will return -EIO if we have 1524*470decc6SDave Kleikamp * entered abort state during the update. 1525*470decc6SDave Kleikamp * 1526*470decc6SDave Kleikamp * Recursive transactions are not disturbed by journal abort until the 1527*470decc6SDave Kleikamp * final journal_stop, which will receive the -EIO error. 1528*470decc6SDave Kleikamp * 1529*470decc6SDave Kleikamp * Finally, the journal_abort call allows the caller to supply an errno 1530*470decc6SDave Kleikamp * which will be recorded (if possible) in the journal superblock. This 1531*470decc6SDave Kleikamp * allows a client to record failure conditions in the middle of a 1532*470decc6SDave Kleikamp * transaction without having to complete the transaction to record the 1533*470decc6SDave Kleikamp * failure to disk. ext3_error, for example, now uses this 1534*470decc6SDave Kleikamp * functionality. 1535*470decc6SDave Kleikamp * 1536*470decc6SDave Kleikamp * Errors which originate from within the journaling layer will NOT 1537*470decc6SDave Kleikamp * supply an errno; a null errno implies that absolutely no further 1538*470decc6SDave Kleikamp * writes are done to the journal (unless there are any already in 1539*470decc6SDave Kleikamp * progress). 1540*470decc6SDave Kleikamp * 1541*470decc6SDave Kleikamp */ 1542*470decc6SDave Kleikamp 1543*470decc6SDave Kleikamp void journal_abort(journal_t *journal, int errno) 1544*470decc6SDave Kleikamp { 1545*470decc6SDave Kleikamp __journal_abort_soft(journal, errno); 1546*470decc6SDave Kleikamp } 1547*470decc6SDave Kleikamp 1548*470decc6SDave Kleikamp /** 1549*470decc6SDave Kleikamp * int journal_errno () - returns the journal's error state. 1550*470decc6SDave Kleikamp * @journal: journal to examine. 1551*470decc6SDave Kleikamp * 1552*470decc6SDave Kleikamp * This is the errno numbet set with journal_abort(), the last 1553*470decc6SDave Kleikamp * time the journal was mounted - if the journal was stopped 1554*470decc6SDave Kleikamp * without calling abort this will be 0. 1555*470decc6SDave Kleikamp * 1556*470decc6SDave Kleikamp * If the journal has been aborted on this mount time -EROFS will 1557*470decc6SDave Kleikamp * be returned. 1558*470decc6SDave Kleikamp */ 1559*470decc6SDave Kleikamp int journal_errno(journal_t *journal) 1560*470decc6SDave Kleikamp { 1561*470decc6SDave Kleikamp int err; 1562*470decc6SDave Kleikamp 1563*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 1564*470decc6SDave Kleikamp if (journal->j_flags & JFS_ABORT) 1565*470decc6SDave Kleikamp err = -EROFS; 1566*470decc6SDave Kleikamp else 1567*470decc6SDave Kleikamp err = journal->j_errno; 1568*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 1569*470decc6SDave Kleikamp return err; 1570*470decc6SDave Kleikamp } 1571*470decc6SDave Kleikamp 1572*470decc6SDave Kleikamp /** 1573*470decc6SDave Kleikamp * int journal_clear_err () - clears the journal's error state 1574*470decc6SDave Kleikamp * @journal: journal to act on. 1575*470decc6SDave Kleikamp * 1576*470decc6SDave Kleikamp * An error must be cleared or Acked to take a FS out of readonly 1577*470decc6SDave Kleikamp * mode. 1578*470decc6SDave Kleikamp */ 1579*470decc6SDave Kleikamp int journal_clear_err(journal_t *journal) 1580*470decc6SDave Kleikamp { 1581*470decc6SDave Kleikamp int err = 0; 1582*470decc6SDave Kleikamp 1583*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 1584*470decc6SDave Kleikamp if (journal->j_flags & JFS_ABORT) 1585*470decc6SDave Kleikamp err = -EROFS; 1586*470decc6SDave Kleikamp else 1587*470decc6SDave Kleikamp journal->j_errno = 0; 1588*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 1589*470decc6SDave Kleikamp return err; 1590*470decc6SDave Kleikamp } 1591*470decc6SDave Kleikamp 1592*470decc6SDave Kleikamp /** 1593*470decc6SDave Kleikamp * void journal_ack_err() - Ack journal err. 1594*470decc6SDave Kleikamp * @journal: journal to act on. 1595*470decc6SDave Kleikamp * 1596*470decc6SDave Kleikamp * An error must be cleared or Acked to take a FS out of readonly 1597*470decc6SDave Kleikamp * mode. 1598*470decc6SDave Kleikamp */ 1599*470decc6SDave Kleikamp void journal_ack_err(journal_t *journal) 1600*470decc6SDave Kleikamp { 1601*470decc6SDave Kleikamp spin_lock(&journal->j_state_lock); 1602*470decc6SDave Kleikamp if (journal->j_errno) 1603*470decc6SDave Kleikamp journal->j_flags |= JFS_ACK_ERR; 1604*470decc6SDave Kleikamp spin_unlock(&journal->j_state_lock); 1605*470decc6SDave Kleikamp } 1606*470decc6SDave Kleikamp 1607*470decc6SDave Kleikamp int journal_blocks_per_page(struct inode *inode) 1608*470decc6SDave Kleikamp { 1609*470decc6SDave Kleikamp return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 1610*470decc6SDave Kleikamp } 1611*470decc6SDave Kleikamp 1612*470decc6SDave Kleikamp /* 1613*470decc6SDave Kleikamp * Simple support for retrying memory allocations. Introduced to help to 1614*470decc6SDave Kleikamp * debug different VM deadlock avoidance strategies. 1615*470decc6SDave Kleikamp */ 1616*470decc6SDave Kleikamp void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry) 1617*470decc6SDave Kleikamp { 1618*470decc6SDave Kleikamp return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0)); 1619*470decc6SDave Kleikamp } 1620*470decc6SDave Kleikamp 1621*470decc6SDave Kleikamp /* 1622*470decc6SDave Kleikamp * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed 1623*470decc6SDave Kleikamp * and allocate frozen and commit buffers from these slabs. 1624*470decc6SDave Kleikamp * 1625*470decc6SDave Kleikamp * Reason for doing this is to avoid, SLAB_DEBUG - since it could 1626*470decc6SDave Kleikamp * cause bh to cross page boundary. 1627*470decc6SDave Kleikamp */ 1628*470decc6SDave Kleikamp 1629*470decc6SDave Kleikamp #define JBD_MAX_SLABS 5 1630*470decc6SDave Kleikamp #define JBD_SLAB_INDEX(size) (size >> 11) 1631*470decc6SDave Kleikamp 1632*470decc6SDave Kleikamp static kmem_cache_t *jbd_slab[JBD_MAX_SLABS]; 1633*470decc6SDave Kleikamp static const char *jbd_slab_names[JBD_MAX_SLABS] = { 1634*470decc6SDave Kleikamp "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k" 1635*470decc6SDave Kleikamp }; 1636*470decc6SDave Kleikamp 1637*470decc6SDave Kleikamp static void journal_destroy_jbd_slabs(void) 1638*470decc6SDave Kleikamp { 1639*470decc6SDave Kleikamp int i; 1640*470decc6SDave Kleikamp 1641*470decc6SDave Kleikamp for (i = 0; i < JBD_MAX_SLABS; i++) { 1642*470decc6SDave Kleikamp if (jbd_slab[i]) 1643*470decc6SDave Kleikamp kmem_cache_destroy(jbd_slab[i]); 1644*470decc6SDave Kleikamp jbd_slab[i] = NULL; 1645*470decc6SDave Kleikamp } 1646*470decc6SDave Kleikamp } 1647*470decc6SDave Kleikamp 1648*470decc6SDave Kleikamp static int journal_create_jbd_slab(size_t slab_size) 1649*470decc6SDave Kleikamp { 1650*470decc6SDave Kleikamp int i = JBD_SLAB_INDEX(slab_size); 1651*470decc6SDave Kleikamp 1652*470decc6SDave Kleikamp BUG_ON(i >= JBD_MAX_SLABS); 1653*470decc6SDave Kleikamp 1654*470decc6SDave Kleikamp /* 1655*470decc6SDave Kleikamp * Check if we already have a slab created for this size 1656*470decc6SDave Kleikamp */ 1657*470decc6SDave Kleikamp if (jbd_slab[i]) 1658*470decc6SDave Kleikamp return 0; 1659*470decc6SDave Kleikamp 1660*470decc6SDave Kleikamp /* 1661*470decc6SDave Kleikamp * Create a slab and force alignment to be same as slabsize - 1662*470decc6SDave Kleikamp * this will make sure that allocations won't cross the page 1663*470decc6SDave Kleikamp * boundary. 1664*470decc6SDave Kleikamp */ 1665*470decc6SDave Kleikamp jbd_slab[i] = kmem_cache_create(jbd_slab_names[i], 1666*470decc6SDave Kleikamp slab_size, slab_size, 0, NULL, NULL); 1667*470decc6SDave Kleikamp if (!jbd_slab[i]) { 1668*470decc6SDave Kleikamp printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); 1669*470decc6SDave Kleikamp return -ENOMEM; 1670*470decc6SDave Kleikamp } 1671*470decc6SDave Kleikamp return 0; 1672*470decc6SDave Kleikamp } 1673*470decc6SDave Kleikamp 1674*470decc6SDave Kleikamp void * jbd_slab_alloc(size_t size, gfp_t flags) 1675*470decc6SDave Kleikamp { 1676*470decc6SDave Kleikamp int idx; 1677*470decc6SDave Kleikamp 1678*470decc6SDave Kleikamp idx = JBD_SLAB_INDEX(size); 1679*470decc6SDave Kleikamp BUG_ON(jbd_slab[idx] == NULL); 1680*470decc6SDave Kleikamp return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); 1681*470decc6SDave Kleikamp } 1682*470decc6SDave Kleikamp 1683*470decc6SDave Kleikamp void jbd_slab_free(void *ptr, size_t size) 1684*470decc6SDave Kleikamp { 1685*470decc6SDave Kleikamp int idx; 1686*470decc6SDave Kleikamp 1687*470decc6SDave Kleikamp idx = JBD_SLAB_INDEX(size); 1688*470decc6SDave Kleikamp BUG_ON(jbd_slab[idx] == NULL); 1689*470decc6SDave Kleikamp kmem_cache_free(jbd_slab[idx], ptr); 1690*470decc6SDave Kleikamp } 1691*470decc6SDave Kleikamp 1692*470decc6SDave Kleikamp /* 1693*470decc6SDave Kleikamp * Journal_head storage management 1694*470decc6SDave Kleikamp */ 1695*470decc6SDave Kleikamp static kmem_cache_t *journal_head_cache; 1696*470decc6SDave Kleikamp #ifdef CONFIG_JBD_DEBUG 1697*470decc6SDave Kleikamp static atomic_t nr_journal_heads = ATOMIC_INIT(0); 1698*470decc6SDave Kleikamp #endif 1699*470decc6SDave Kleikamp 1700*470decc6SDave Kleikamp static int journal_init_journal_head_cache(void) 1701*470decc6SDave Kleikamp { 1702*470decc6SDave Kleikamp int retval; 1703*470decc6SDave Kleikamp 1704*470decc6SDave Kleikamp J_ASSERT(journal_head_cache == 0); 1705*470decc6SDave Kleikamp journal_head_cache = kmem_cache_create("journal_head", 1706*470decc6SDave Kleikamp sizeof(struct journal_head), 1707*470decc6SDave Kleikamp 0, /* offset */ 1708*470decc6SDave Kleikamp 0, /* flags */ 1709*470decc6SDave Kleikamp NULL, /* ctor */ 1710*470decc6SDave Kleikamp NULL); /* dtor */ 1711*470decc6SDave Kleikamp retval = 0; 1712*470decc6SDave Kleikamp if (journal_head_cache == 0) { 1713*470decc6SDave Kleikamp retval = -ENOMEM; 1714*470decc6SDave Kleikamp printk(KERN_EMERG "JBD: no memory for journal_head cache\n"); 1715*470decc6SDave Kleikamp } 1716*470decc6SDave Kleikamp return retval; 1717*470decc6SDave Kleikamp } 1718*470decc6SDave Kleikamp 1719*470decc6SDave Kleikamp static void journal_destroy_journal_head_cache(void) 1720*470decc6SDave Kleikamp { 1721*470decc6SDave Kleikamp J_ASSERT(journal_head_cache != NULL); 1722*470decc6SDave Kleikamp kmem_cache_destroy(journal_head_cache); 1723*470decc6SDave Kleikamp journal_head_cache = NULL; 1724*470decc6SDave Kleikamp } 1725*470decc6SDave Kleikamp 1726*470decc6SDave Kleikamp /* 1727*470decc6SDave Kleikamp * journal_head splicing and dicing 1728*470decc6SDave Kleikamp */ 1729*470decc6SDave Kleikamp static struct journal_head *journal_alloc_journal_head(void) 1730*470decc6SDave Kleikamp { 1731*470decc6SDave Kleikamp struct journal_head *ret; 1732*470decc6SDave Kleikamp static unsigned long last_warning; 1733*470decc6SDave Kleikamp 1734*470decc6SDave Kleikamp #ifdef CONFIG_JBD_DEBUG 1735*470decc6SDave Kleikamp atomic_inc(&nr_journal_heads); 1736*470decc6SDave Kleikamp #endif 1737*470decc6SDave Kleikamp ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS); 1738*470decc6SDave Kleikamp if (ret == 0) { 1739*470decc6SDave Kleikamp jbd_debug(1, "out of memory for journal_head\n"); 1740*470decc6SDave Kleikamp if (time_after(jiffies, last_warning + 5*HZ)) { 1741*470decc6SDave Kleikamp printk(KERN_NOTICE "ENOMEM in %s, retrying.\n", 1742*470decc6SDave Kleikamp __FUNCTION__); 1743*470decc6SDave Kleikamp last_warning = jiffies; 1744*470decc6SDave Kleikamp } 1745*470decc6SDave Kleikamp while (ret == 0) { 1746*470decc6SDave Kleikamp yield(); 1747*470decc6SDave Kleikamp ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS); 1748*470decc6SDave Kleikamp } 1749*470decc6SDave Kleikamp } 1750*470decc6SDave Kleikamp return ret; 1751*470decc6SDave Kleikamp } 1752*470decc6SDave Kleikamp 1753*470decc6SDave Kleikamp static void journal_free_journal_head(struct journal_head *jh) 1754*470decc6SDave Kleikamp { 1755*470decc6SDave Kleikamp #ifdef CONFIG_JBD_DEBUG 1756*470decc6SDave Kleikamp atomic_dec(&nr_journal_heads); 1757*470decc6SDave Kleikamp memset(jh, JBD_POISON_FREE, sizeof(*jh)); 1758*470decc6SDave Kleikamp #endif 1759*470decc6SDave Kleikamp kmem_cache_free(journal_head_cache, jh); 1760*470decc6SDave Kleikamp } 1761*470decc6SDave Kleikamp 1762*470decc6SDave Kleikamp /* 1763*470decc6SDave Kleikamp * A journal_head is attached to a buffer_head whenever JBD has an 1764*470decc6SDave Kleikamp * interest in the buffer. 1765*470decc6SDave Kleikamp * 1766*470decc6SDave Kleikamp * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit 1767*470decc6SDave Kleikamp * is set. This bit is tested in core kernel code where we need to take 1768*470decc6SDave Kleikamp * JBD-specific actions. Testing the zeroness of ->b_private is not reliable 1769*470decc6SDave Kleikamp * there. 1770*470decc6SDave Kleikamp * 1771*470decc6SDave Kleikamp * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one. 1772*470decc6SDave Kleikamp * 1773*470decc6SDave Kleikamp * When a buffer has its BH_JBD bit set it is immune from being released by 1774*470decc6SDave Kleikamp * core kernel code, mainly via ->b_count. 1775*470decc6SDave Kleikamp * 1776*470decc6SDave Kleikamp * A journal_head may be detached from its buffer_head when the journal_head's 1777*470decc6SDave Kleikamp * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. 1778*470decc6SDave Kleikamp * Various places in JBD call journal_remove_journal_head() to indicate that the 1779*470decc6SDave Kleikamp * journal_head can be dropped if needed. 1780*470decc6SDave Kleikamp * 1781*470decc6SDave Kleikamp * Various places in the kernel want to attach a journal_head to a buffer_head 1782*470decc6SDave Kleikamp * _before_ attaching the journal_head to a transaction. To protect the 1783*470decc6SDave Kleikamp * journal_head in this situation, journal_add_journal_head elevates the 1784*470decc6SDave Kleikamp * journal_head's b_jcount refcount by one. The caller must call 1785*470decc6SDave Kleikamp * journal_put_journal_head() to undo this. 1786*470decc6SDave Kleikamp * 1787*470decc6SDave Kleikamp * So the typical usage would be: 1788*470decc6SDave Kleikamp * 1789*470decc6SDave Kleikamp * (Attach a journal_head if needed. Increments b_jcount) 1790*470decc6SDave Kleikamp * struct journal_head *jh = journal_add_journal_head(bh); 1791*470decc6SDave Kleikamp * ... 1792*470decc6SDave Kleikamp * jh->b_transaction = xxx; 1793*470decc6SDave Kleikamp * journal_put_journal_head(jh); 1794*470decc6SDave Kleikamp * 1795*470decc6SDave Kleikamp * Now, the journal_head's b_jcount is zero, but it is safe from being released 1796*470decc6SDave Kleikamp * because it has a non-zero b_transaction. 1797*470decc6SDave Kleikamp */ 1798*470decc6SDave Kleikamp 1799*470decc6SDave Kleikamp /* 1800*470decc6SDave Kleikamp * Give a buffer_head a journal_head. 1801*470decc6SDave Kleikamp * 1802*470decc6SDave Kleikamp * Doesn't need the journal lock. 1803*470decc6SDave Kleikamp * May sleep. 1804*470decc6SDave Kleikamp */ 1805*470decc6SDave Kleikamp struct journal_head *journal_add_journal_head(struct buffer_head *bh) 1806*470decc6SDave Kleikamp { 1807*470decc6SDave Kleikamp struct journal_head *jh; 1808*470decc6SDave Kleikamp struct journal_head *new_jh = NULL; 1809*470decc6SDave Kleikamp 1810*470decc6SDave Kleikamp repeat: 1811*470decc6SDave Kleikamp if (!buffer_jbd(bh)) { 1812*470decc6SDave Kleikamp new_jh = journal_alloc_journal_head(); 1813*470decc6SDave Kleikamp memset(new_jh, 0, sizeof(*new_jh)); 1814*470decc6SDave Kleikamp } 1815*470decc6SDave Kleikamp 1816*470decc6SDave Kleikamp jbd_lock_bh_journal_head(bh); 1817*470decc6SDave Kleikamp if (buffer_jbd(bh)) { 1818*470decc6SDave Kleikamp jh = bh2jh(bh); 1819*470decc6SDave Kleikamp } else { 1820*470decc6SDave Kleikamp J_ASSERT_BH(bh, 1821*470decc6SDave Kleikamp (atomic_read(&bh->b_count) > 0) || 1822*470decc6SDave Kleikamp (bh->b_page && bh->b_page->mapping)); 1823*470decc6SDave Kleikamp 1824*470decc6SDave Kleikamp if (!new_jh) { 1825*470decc6SDave Kleikamp jbd_unlock_bh_journal_head(bh); 1826*470decc6SDave Kleikamp goto repeat; 1827*470decc6SDave Kleikamp } 1828*470decc6SDave Kleikamp 1829*470decc6SDave Kleikamp jh = new_jh; 1830*470decc6SDave Kleikamp new_jh = NULL; /* We consumed it */ 1831*470decc6SDave Kleikamp set_buffer_jbd(bh); 1832*470decc6SDave Kleikamp bh->b_private = jh; 1833*470decc6SDave Kleikamp jh->b_bh = bh; 1834*470decc6SDave Kleikamp get_bh(bh); 1835*470decc6SDave Kleikamp BUFFER_TRACE(bh, "added journal_head"); 1836*470decc6SDave Kleikamp } 1837*470decc6SDave Kleikamp jh->b_jcount++; 1838*470decc6SDave Kleikamp jbd_unlock_bh_journal_head(bh); 1839*470decc6SDave Kleikamp if (new_jh) 1840*470decc6SDave Kleikamp journal_free_journal_head(new_jh); 1841*470decc6SDave Kleikamp return bh->b_private; 1842*470decc6SDave Kleikamp } 1843*470decc6SDave Kleikamp 1844*470decc6SDave Kleikamp /* 1845*470decc6SDave Kleikamp * Grab a ref against this buffer_head's journal_head. If it ended up not 1846*470decc6SDave Kleikamp * having a journal_head, return NULL 1847*470decc6SDave Kleikamp */ 1848*470decc6SDave Kleikamp struct journal_head *journal_grab_journal_head(struct buffer_head *bh) 1849*470decc6SDave Kleikamp { 1850*470decc6SDave Kleikamp struct journal_head *jh = NULL; 1851*470decc6SDave Kleikamp 1852*470decc6SDave Kleikamp jbd_lock_bh_journal_head(bh); 1853*470decc6SDave Kleikamp if (buffer_jbd(bh)) { 1854*470decc6SDave Kleikamp jh = bh2jh(bh); 1855*470decc6SDave Kleikamp jh->b_jcount++; 1856*470decc6SDave Kleikamp } 1857*470decc6SDave Kleikamp jbd_unlock_bh_journal_head(bh); 1858*470decc6SDave Kleikamp return jh; 1859*470decc6SDave Kleikamp } 1860*470decc6SDave Kleikamp 1861*470decc6SDave Kleikamp static void __journal_remove_journal_head(struct buffer_head *bh) 1862*470decc6SDave Kleikamp { 1863*470decc6SDave Kleikamp struct journal_head *jh = bh2jh(bh); 1864*470decc6SDave Kleikamp 1865*470decc6SDave Kleikamp J_ASSERT_JH(jh, jh->b_jcount >= 0); 1866*470decc6SDave Kleikamp 1867*470decc6SDave Kleikamp get_bh(bh); 1868*470decc6SDave Kleikamp if (jh->b_jcount == 0) { 1869*470decc6SDave Kleikamp if (jh->b_transaction == NULL && 1870*470decc6SDave Kleikamp jh->b_next_transaction == NULL && 1871*470decc6SDave Kleikamp jh->b_cp_transaction == NULL) { 1872*470decc6SDave Kleikamp J_ASSERT_JH(jh, jh->b_jlist == BJ_None); 1873*470decc6SDave Kleikamp J_ASSERT_BH(bh, buffer_jbd(bh)); 1874*470decc6SDave Kleikamp J_ASSERT_BH(bh, jh2bh(jh) == bh); 1875*470decc6SDave Kleikamp BUFFER_TRACE(bh, "remove journal_head"); 1876*470decc6SDave Kleikamp if (jh->b_frozen_data) { 1877*470decc6SDave Kleikamp printk(KERN_WARNING "%s: freeing " 1878*470decc6SDave Kleikamp "b_frozen_data\n", 1879*470decc6SDave Kleikamp __FUNCTION__); 1880*470decc6SDave Kleikamp jbd_slab_free(jh->b_frozen_data, bh->b_size); 1881*470decc6SDave Kleikamp } 1882*470decc6SDave Kleikamp if (jh->b_committed_data) { 1883*470decc6SDave Kleikamp printk(KERN_WARNING "%s: freeing " 1884*470decc6SDave Kleikamp "b_committed_data\n", 1885*470decc6SDave Kleikamp __FUNCTION__); 1886*470decc6SDave Kleikamp jbd_slab_free(jh->b_committed_data, bh->b_size); 1887*470decc6SDave Kleikamp } 1888*470decc6SDave Kleikamp bh->b_private = NULL; 1889*470decc6SDave Kleikamp jh->b_bh = NULL; /* debug, really */ 1890*470decc6SDave Kleikamp clear_buffer_jbd(bh); 1891*470decc6SDave Kleikamp __brelse(bh); 1892*470decc6SDave Kleikamp journal_free_journal_head(jh); 1893*470decc6SDave Kleikamp } else { 1894*470decc6SDave Kleikamp BUFFER_TRACE(bh, "journal_head was locked"); 1895*470decc6SDave Kleikamp } 1896*470decc6SDave Kleikamp } 1897*470decc6SDave Kleikamp } 1898*470decc6SDave Kleikamp 1899*470decc6SDave Kleikamp /* 1900*470decc6SDave Kleikamp * journal_remove_journal_head(): if the buffer isn't attached to a transaction 1901*470decc6SDave Kleikamp * and has a zero b_jcount then remove and release its journal_head. If we did 1902*470decc6SDave Kleikamp * see that the buffer is not used by any transaction we also "logically" 1903*470decc6SDave Kleikamp * decrement ->b_count. 1904*470decc6SDave Kleikamp * 1905*470decc6SDave Kleikamp * We in fact take an additional increment on ->b_count as a convenience, 1906*470decc6SDave Kleikamp * because the caller usually wants to do additional things with the bh 1907*470decc6SDave Kleikamp * after calling here. 1908*470decc6SDave Kleikamp * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some 1909*470decc6SDave Kleikamp * time. Once the caller has run __brelse(), the buffer is eligible for 1910*470decc6SDave Kleikamp * reaping by try_to_free_buffers(). 1911*470decc6SDave Kleikamp */ 1912*470decc6SDave Kleikamp void journal_remove_journal_head(struct buffer_head *bh) 1913*470decc6SDave Kleikamp { 1914*470decc6SDave Kleikamp jbd_lock_bh_journal_head(bh); 1915*470decc6SDave Kleikamp __journal_remove_journal_head(bh); 1916*470decc6SDave Kleikamp jbd_unlock_bh_journal_head(bh); 1917*470decc6SDave Kleikamp } 1918*470decc6SDave Kleikamp 1919*470decc6SDave Kleikamp /* 1920*470decc6SDave Kleikamp * Drop a reference on the passed journal_head. If it fell to zero then try to 1921*470decc6SDave Kleikamp * release the journal_head from the buffer_head. 1922*470decc6SDave Kleikamp */ 1923*470decc6SDave Kleikamp void journal_put_journal_head(struct journal_head *jh) 1924*470decc6SDave Kleikamp { 1925*470decc6SDave Kleikamp struct buffer_head *bh = jh2bh(jh); 1926*470decc6SDave Kleikamp 1927*470decc6SDave Kleikamp jbd_lock_bh_journal_head(bh); 1928*470decc6SDave Kleikamp J_ASSERT_JH(jh, jh->b_jcount > 0); 1929*470decc6SDave Kleikamp --jh->b_jcount; 1930*470decc6SDave Kleikamp if (!jh->b_jcount && !jh->b_transaction) { 1931*470decc6SDave Kleikamp __journal_remove_journal_head(bh); 1932*470decc6SDave Kleikamp __brelse(bh); 1933*470decc6SDave Kleikamp } 1934*470decc6SDave Kleikamp jbd_unlock_bh_journal_head(bh); 1935*470decc6SDave Kleikamp } 1936*470decc6SDave Kleikamp 1937*470decc6SDave Kleikamp /* 1938*470decc6SDave Kleikamp * /proc tunables 1939*470decc6SDave Kleikamp */ 1940*470decc6SDave Kleikamp #if defined(CONFIG_JBD_DEBUG) 1941*470decc6SDave Kleikamp int journal_enable_debug; 1942*470decc6SDave Kleikamp EXPORT_SYMBOL(journal_enable_debug); 1943*470decc6SDave Kleikamp #endif 1944*470decc6SDave Kleikamp 1945*470decc6SDave Kleikamp #if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS) 1946*470decc6SDave Kleikamp 1947*470decc6SDave Kleikamp static struct proc_dir_entry *proc_jbd_debug; 1948*470decc6SDave Kleikamp 1949*470decc6SDave Kleikamp static int read_jbd_debug(char *page, char **start, off_t off, 1950*470decc6SDave Kleikamp int count, int *eof, void *data) 1951*470decc6SDave Kleikamp { 1952*470decc6SDave Kleikamp int ret; 1953*470decc6SDave Kleikamp 1954*470decc6SDave Kleikamp ret = sprintf(page + off, "%d\n", journal_enable_debug); 1955*470decc6SDave Kleikamp *eof = 1; 1956*470decc6SDave Kleikamp return ret; 1957*470decc6SDave Kleikamp } 1958*470decc6SDave Kleikamp 1959*470decc6SDave Kleikamp static int write_jbd_debug(struct file *file, const char __user *buffer, 1960*470decc6SDave Kleikamp unsigned long count, void *data) 1961*470decc6SDave Kleikamp { 1962*470decc6SDave Kleikamp char buf[32]; 1963*470decc6SDave Kleikamp 1964*470decc6SDave Kleikamp if (count > ARRAY_SIZE(buf) - 1) 1965*470decc6SDave Kleikamp count = ARRAY_SIZE(buf) - 1; 1966*470decc6SDave Kleikamp if (copy_from_user(buf, buffer, count)) 1967*470decc6SDave Kleikamp return -EFAULT; 1968*470decc6SDave Kleikamp buf[ARRAY_SIZE(buf) - 1] = '\0'; 1969*470decc6SDave Kleikamp journal_enable_debug = simple_strtoul(buf, NULL, 10); 1970*470decc6SDave Kleikamp return count; 1971*470decc6SDave Kleikamp } 1972*470decc6SDave Kleikamp 1973*470decc6SDave Kleikamp #define JBD_PROC_NAME "sys/fs/jbd-debug" 1974*470decc6SDave Kleikamp 1975*470decc6SDave Kleikamp static void __init create_jbd_proc_entry(void) 1976*470decc6SDave Kleikamp { 1977*470decc6SDave Kleikamp proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL); 1978*470decc6SDave Kleikamp if (proc_jbd_debug) { 1979*470decc6SDave Kleikamp /* Why is this so hard? */ 1980*470decc6SDave Kleikamp proc_jbd_debug->read_proc = read_jbd_debug; 1981*470decc6SDave Kleikamp proc_jbd_debug->write_proc = write_jbd_debug; 1982*470decc6SDave Kleikamp } 1983*470decc6SDave Kleikamp } 1984*470decc6SDave Kleikamp 1985*470decc6SDave Kleikamp static void __exit remove_jbd_proc_entry(void) 1986*470decc6SDave Kleikamp { 1987*470decc6SDave Kleikamp if (proc_jbd_debug) 1988*470decc6SDave Kleikamp remove_proc_entry(JBD_PROC_NAME, NULL); 1989*470decc6SDave Kleikamp } 1990*470decc6SDave Kleikamp 1991*470decc6SDave Kleikamp #else 1992*470decc6SDave Kleikamp 1993*470decc6SDave Kleikamp #define create_jbd_proc_entry() do {} while (0) 1994*470decc6SDave Kleikamp #define remove_jbd_proc_entry() do {} while (0) 1995*470decc6SDave Kleikamp 1996*470decc6SDave Kleikamp #endif 1997*470decc6SDave Kleikamp 1998*470decc6SDave Kleikamp kmem_cache_t *jbd_handle_cache; 1999*470decc6SDave Kleikamp 2000*470decc6SDave Kleikamp static int __init journal_init_handle_cache(void) 2001*470decc6SDave Kleikamp { 2002*470decc6SDave Kleikamp jbd_handle_cache = kmem_cache_create("journal_handle", 2003*470decc6SDave Kleikamp sizeof(handle_t), 2004*470decc6SDave Kleikamp 0, /* offset */ 2005*470decc6SDave Kleikamp 0, /* flags */ 2006*470decc6SDave Kleikamp NULL, /* ctor */ 2007*470decc6SDave Kleikamp NULL); /* dtor */ 2008*470decc6SDave Kleikamp if (jbd_handle_cache == NULL) { 2009*470decc6SDave Kleikamp printk(KERN_EMERG "JBD: failed to create handle cache\n"); 2010*470decc6SDave Kleikamp return -ENOMEM; 2011*470decc6SDave Kleikamp } 2012*470decc6SDave Kleikamp return 0; 2013*470decc6SDave Kleikamp } 2014*470decc6SDave Kleikamp 2015*470decc6SDave Kleikamp static void journal_destroy_handle_cache(void) 2016*470decc6SDave Kleikamp { 2017*470decc6SDave Kleikamp if (jbd_handle_cache) 2018*470decc6SDave Kleikamp kmem_cache_destroy(jbd_handle_cache); 2019*470decc6SDave Kleikamp } 2020*470decc6SDave Kleikamp 2021*470decc6SDave Kleikamp /* 2022*470decc6SDave Kleikamp * Module startup and shutdown 2023*470decc6SDave Kleikamp */ 2024*470decc6SDave Kleikamp 2025*470decc6SDave Kleikamp static int __init journal_init_caches(void) 2026*470decc6SDave Kleikamp { 2027*470decc6SDave Kleikamp int ret; 2028*470decc6SDave Kleikamp 2029*470decc6SDave Kleikamp ret = journal_init_revoke_caches(); 2030*470decc6SDave Kleikamp if (ret == 0) 2031*470decc6SDave Kleikamp ret = journal_init_journal_head_cache(); 2032*470decc6SDave Kleikamp if (ret == 0) 2033*470decc6SDave Kleikamp ret = journal_init_handle_cache(); 2034*470decc6SDave Kleikamp return ret; 2035*470decc6SDave Kleikamp } 2036*470decc6SDave Kleikamp 2037*470decc6SDave Kleikamp static void journal_destroy_caches(void) 2038*470decc6SDave Kleikamp { 2039*470decc6SDave Kleikamp journal_destroy_revoke_caches(); 2040*470decc6SDave Kleikamp journal_destroy_journal_head_cache(); 2041*470decc6SDave Kleikamp journal_destroy_handle_cache(); 2042*470decc6SDave Kleikamp journal_destroy_jbd_slabs(); 2043*470decc6SDave Kleikamp } 2044*470decc6SDave Kleikamp 2045*470decc6SDave Kleikamp static int __init journal_init(void) 2046*470decc6SDave Kleikamp { 2047*470decc6SDave Kleikamp int ret; 2048*470decc6SDave Kleikamp 2049*470decc6SDave Kleikamp BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024); 2050*470decc6SDave Kleikamp 2051*470decc6SDave Kleikamp ret = journal_init_caches(); 2052*470decc6SDave Kleikamp if (ret != 0) 2053*470decc6SDave Kleikamp journal_destroy_caches(); 2054*470decc6SDave Kleikamp create_jbd_proc_entry(); 2055*470decc6SDave Kleikamp return ret; 2056*470decc6SDave Kleikamp } 2057*470decc6SDave Kleikamp 2058*470decc6SDave Kleikamp static void __exit journal_exit(void) 2059*470decc6SDave Kleikamp { 2060*470decc6SDave Kleikamp #ifdef CONFIG_JBD_DEBUG 2061*470decc6SDave Kleikamp int n = atomic_read(&nr_journal_heads); 2062*470decc6SDave Kleikamp if (n) 2063*470decc6SDave Kleikamp printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); 2064*470decc6SDave Kleikamp #endif 2065*470decc6SDave Kleikamp remove_jbd_proc_entry(); 2066*470decc6SDave Kleikamp journal_destroy_caches(); 2067*470decc6SDave Kleikamp } 2068*470decc6SDave Kleikamp 2069*470decc6SDave Kleikamp MODULE_LICENSE("GPL"); 2070*470decc6SDave Kleikamp module_init(journal_init); 2071*470decc6SDave Kleikamp module_exit(journal_exit); 2072*470decc6SDave Kleikamp 2073