1 #include <linux/fs.h> 2 #include <linux/random.h> 3 #include <linux/buffer_head.h> 4 #include <linux/utsname.h> 5 #include <linux/kthread.h> 6 7 #include "ext4.h" 8 9 /* Checksumming functions */ 10 static __u32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) 11 { 12 struct ext4_sb_info *sbi = EXT4_SB(sb); 13 int offset = offsetof(struct mmp_struct, mmp_checksum); 14 __u32 csum; 15 16 csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset); 17 18 return cpu_to_le32(csum); 19 } 20 21 int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) 22 { 23 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 24 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 25 return 1; 26 27 return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); 28 } 29 30 void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) 31 { 32 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 33 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 34 return; 35 36 mmp->mmp_checksum = ext4_mmp_csum(sb, mmp); 37 } 38 39 /* 40 * Write the MMP block using WRITE_SYNC to try to get the block on-disk 41 * faster. 42 */ 43 static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) 44 { 45 struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data); 46 47 ext4_mmp_csum_set(sb, mmp); 48 mark_buffer_dirty(bh); 49 lock_buffer(bh); 50 bh->b_end_io = end_buffer_write_sync; 51 get_bh(bh); 52 submit_bh(WRITE_SYNC, bh); 53 wait_on_buffer(bh); 54 if (unlikely(!buffer_uptodate(bh))) 55 return 1; 56 57 return 0; 58 } 59 60 /* 61 * Read the MMP block. It _must_ be read from disk and hence we clear the 62 * uptodate flag on the buffer. 63 */ 64 static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, 65 ext4_fsblk_t mmp_block) 66 { 67 struct mmp_struct *mmp; 68 69 if (*bh) 70 clear_buffer_uptodate(*bh); 71 72 /* This would be sb_bread(sb, mmp_block), except we need to be sure 73 * that the MD RAID device cache has been bypassed, and that the read 74 * is not blocked in the elevator. */ 75 if (!*bh) 76 *bh = sb_getblk(sb, mmp_block); 77 if (*bh) { 78 get_bh(*bh); 79 lock_buffer(*bh); 80 (*bh)->b_end_io = end_buffer_read_sync; 81 submit_bh(READ_SYNC, *bh); 82 wait_on_buffer(*bh); 83 if (!buffer_uptodate(*bh)) { 84 brelse(*bh); 85 *bh = NULL; 86 } 87 } 88 if (!*bh) { 89 ext4_warning(sb, "Error while reading MMP block %llu", 90 mmp_block); 91 return -EIO; 92 } 93 94 mmp = (struct mmp_struct *)((*bh)->b_data); 95 if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC || 96 !ext4_mmp_csum_verify(sb, mmp)) 97 return -EINVAL; 98 99 return 0; 100 } 101 102 /* 103 * Dump as much information as possible to help the admin. 104 */ 105 void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, 106 const char *function, unsigned int line, const char *msg) 107 { 108 __ext4_warning(sb, function, line, msg); 109 __ext4_warning(sb, function, line, 110 "MMP failure info: last update time: %llu, last update " 111 "node: %s, last update device: %s\n", 112 (long long unsigned int) le64_to_cpu(mmp->mmp_time), 113 mmp->mmp_nodename, mmp->mmp_bdevname); 114 } 115 116 /* 117 * kmmpd will update the MMP sequence every s_mmp_update_interval seconds 118 */ 119 static int kmmpd(void *data) 120 { 121 struct super_block *sb = ((struct mmpd_data *) data)->sb; 122 struct buffer_head *bh = ((struct mmpd_data *) data)->bh; 123 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 124 struct mmp_struct *mmp; 125 ext4_fsblk_t mmp_block; 126 u32 seq = 0; 127 unsigned long failed_writes = 0; 128 int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval); 129 unsigned mmp_check_interval; 130 unsigned long last_update_time; 131 unsigned long diff; 132 int retval; 133 134 mmp_block = le64_to_cpu(es->s_mmp_block); 135 mmp = (struct mmp_struct *)(bh->b_data); 136 mmp->mmp_time = cpu_to_le64(get_seconds()); 137 /* 138 * Start with the higher mmp_check_interval and reduce it if 139 * the MMP block is being updated on time. 140 */ 141 mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval, 142 EXT4_MMP_MIN_CHECK_INTERVAL); 143 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); 144 bdevname(bh->b_bdev, mmp->mmp_bdevname); 145 146 memcpy(mmp->mmp_nodename, init_utsname()->nodename, 147 sizeof(mmp->mmp_nodename)); 148 149 while (!kthread_should_stop()) { 150 if (++seq > EXT4_MMP_SEQ_MAX) 151 seq = 1; 152 153 mmp->mmp_seq = cpu_to_le32(seq); 154 mmp->mmp_time = cpu_to_le64(get_seconds()); 155 last_update_time = jiffies; 156 157 retval = write_mmp_block(sb, bh); 158 /* 159 * Don't spew too many error messages. Print one every 160 * (s_mmp_update_interval * 60) seconds. 161 */ 162 if (retval) { 163 if ((failed_writes % 60) == 0) 164 ext4_error(sb, "Error writing to MMP block"); 165 failed_writes++; 166 } 167 168 if (!(le32_to_cpu(es->s_feature_incompat) & 169 EXT4_FEATURE_INCOMPAT_MMP)) { 170 ext4_warning(sb, "kmmpd being stopped since MMP feature" 171 " has been disabled."); 172 EXT4_SB(sb)->s_mmp_tsk = NULL; 173 goto failed; 174 } 175 176 if (sb->s_flags & MS_RDONLY) { 177 ext4_warning(sb, "kmmpd being stopped since filesystem " 178 "has been remounted as readonly."); 179 EXT4_SB(sb)->s_mmp_tsk = NULL; 180 goto failed; 181 } 182 183 diff = jiffies - last_update_time; 184 if (diff < mmp_update_interval * HZ) 185 schedule_timeout_interruptible(mmp_update_interval * 186 HZ - diff); 187 188 /* 189 * We need to make sure that more than mmp_check_interval 190 * seconds have not passed since writing. If that has happened 191 * we need to check if the MMP block is as we left it. 192 */ 193 diff = jiffies - last_update_time; 194 if (diff > mmp_check_interval * HZ) { 195 struct buffer_head *bh_check = NULL; 196 struct mmp_struct *mmp_check; 197 198 retval = read_mmp_block(sb, &bh_check, mmp_block); 199 if (retval) { 200 ext4_error(sb, "error reading MMP data: %d", 201 retval); 202 203 EXT4_SB(sb)->s_mmp_tsk = NULL; 204 goto failed; 205 } 206 207 mmp_check = (struct mmp_struct *)(bh_check->b_data); 208 if (mmp->mmp_seq != mmp_check->mmp_seq || 209 memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename, 210 sizeof(mmp->mmp_nodename))) { 211 dump_mmp_msg(sb, mmp_check, 212 "Error while updating MMP info. " 213 "The filesystem seems to have been" 214 " multiply mounted."); 215 ext4_error(sb, "abort"); 216 goto failed; 217 } 218 put_bh(bh_check); 219 } 220 221 /* 222 * Adjust the mmp_check_interval depending on how much time 223 * it took for the MMP block to be written. 224 */ 225 mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ, 226 EXT4_MMP_MAX_CHECK_INTERVAL), 227 EXT4_MMP_MIN_CHECK_INTERVAL); 228 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); 229 } 230 231 /* 232 * Unmount seems to be clean. 233 */ 234 mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN); 235 mmp->mmp_time = cpu_to_le64(get_seconds()); 236 237 retval = write_mmp_block(sb, bh); 238 239 failed: 240 kfree(data); 241 brelse(bh); 242 return retval; 243 } 244 245 /* 246 * Get a random new sequence number but make sure it is not greater than 247 * EXT4_MMP_SEQ_MAX. 248 */ 249 static unsigned int mmp_new_seq(void) 250 { 251 u32 new_seq; 252 253 do { 254 get_random_bytes(&new_seq, sizeof(u32)); 255 } while (new_seq > EXT4_MMP_SEQ_MAX); 256 257 return new_seq; 258 } 259 260 /* 261 * Protect the filesystem from being mounted more than once. 262 */ 263 int ext4_multi_mount_protect(struct super_block *sb, 264 ext4_fsblk_t mmp_block) 265 { 266 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 267 struct buffer_head *bh = NULL; 268 struct mmp_struct *mmp = NULL; 269 struct mmpd_data *mmpd_data; 270 u32 seq; 271 unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval); 272 unsigned int wait_time = 0; 273 int retval; 274 275 if (mmp_block < le32_to_cpu(es->s_first_data_block) || 276 mmp_block >= ext4_blocks_count(es)) { 277 ext4_warning(sb, "Invalid MMP block in superblock"); 278 goto failed; 279 } 280 281 retval = read_mmp_block(sb, &bh, mmp_block); 282 if (retval) 283 goto failed; 284 285 mmp = (struct mmp_struct *)(bh->b_data); 286 287 if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL) 288 mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL; 289 290 /* 291 * If check_interval in MMP block is larger, use that instead of 292 * update_interval from the superblock. 293 */ 294 if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval) 295 mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval); 296 297 seq = le32_to_cpu(mmp->mmp_seq); 298 if (seq == EXT4_MMP_SEQ_CLEAN) 299 goto skip; 300 301 if (seq == EXT4_MMP_SEQ_FSCK) { 302 dump_mmp_msg(sb, mmp, "fsck is running on the filesystem"); 303 goto failed; 304 } 305 306 wait_time = min(mmp_check_interval * 2 + 1, 307 mmp_check_interval + 60); 308 309 /* Print MMP interval if more than 20 secs. */ 310 if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4) 311 ext4_warning(sb, "MMP interval %u higher than expected, please" 312 " wait.\n", wait_time * 2); 313 314 if (schedule_timeout_interruptible(HZ * wait_time) != 0) { 315 ext4_warning(sb, "MMP startup interrupted, failing mount\n"); 316 goto failed; 317 } 318 319 retval = read_mmp_block(sb, &bh, mmp_block); 320 if (retval) 321 goto failed; 322 mmp = (struct mmp_struct *)(bh->b_data); 323 if (seq != le32_to_cpu(mmp->mmp_seq)) { 324 dump_mmp_msg(sb, mmp, 325 "Device is already active on another node."); 326 goto failed; 327 } 328 329 skip: 330 /* 331 * write a new random sequence number. 332 */ 333 seq = mmp_new_seq(); 334 mmp->mmp_seq = cpu_to_le32(seq); 335 336 retval = write_mmp_block(sb, bh); 337 if (retval) 338 goto failed; 339 340 /* 341 * wait for MMP interval and check mmp_seq. 342 */ 343 if (schedule_timeout_interruptible(HZ * wait_time) != 0) { 344 ext4_warning(sb, "MMP startup interrupted, failing mount\n"); 345 goto failed; 346 } 347 348 retval = read_mmp_block(sb, &bh, mmp_block); 349 if (retval) 350 goto failed; 351 mmp = (struct mmp_struct *)(bh->b_data); 352 if (seq != le32_to_cpu(mmp->mmp_seq)) { 353 dump_mmp_msg(sb, mmp, 354 "Device is already active on another node."); 355 goto failed; 356 } 357 358 mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL); 359 if (!mmpd_data) { 360 ext4_warning(sb, "not enough memory for mmpd_data"); 361 goto failed; 362 } 363 mmpd_data->sb = sb; 364 mmpd_data->bh = bh; 365 366 /* 367 * Start a kernel thread to update the MMP block periodically. 368 */ 369 EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s", 370 bdevname(bh->b_bdev, 371 mmp->mmp_bdevname)); 372 if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) { 373 EXT4_SB(sb)->s_mmp_tsk = NULL; 374 kfree(mmpd_data); 375 ext4_warning(sb, "Unable to create kmmpd thread for %s.", 376 sb->s_id); 377 goto failed; 378 } 379 380 return 0; 381 382 failed: 383 brelse(bh); 384 return 1; 385 } 386 387 388