1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/fs.h> 3 #include <linux/random.h> 4 #include <linux/buffer_head.h> 5 #include <linux/utsname.h> 6 #include <linux/kthread.h> 7 8 #include "ext4.h" 9 10 /* Checksumming functions */ 11 static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) 12 { 13 struct ext4_sb_info *sbi = EXT4_SB(sb); 14 int offset = offsetof(struct mmp_struct, mmp_checksum); 15 __u32 csum; 16 17 csum = ext4_chksum(sbi->s_csum_seed, (char *)mmp, offset); 18 19 return cpu_to_le32(csum); 20 } 21 22 static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) 23 { 24 if (!ext4_has_feature_metadata_csum(sb)) 25 return 1; 26 27 return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); 28 } 29 30 static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) 31 { 32 if (!ext4_has_feature_metadata_csum(sb)) 33 return; 34 35 mmp->mmp_checksum = ext4_mmp_csum(sb, mmp); 36 } 37 38 /* 39 * Write the MMP block using REQ_SYNC to try to get the block on-disk 40 * faster. 41 */ 42 static int write_mmp_block_thawed(struct super_block *sb, 43 struct buffer_head *bh) 44 { 45 struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data); 46 47 ext4_mmp_csum_set(sb, mmp); 48 lock_buffer(bh); 49 bh->b_end_io = end_buffer_write_sync; 50 get_bh(bh); 51 submit_bh(REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO, bh); 52 wait_on_buffer(bh); 53 if (unlikely(!buffer_uptodate(bh))) 54 return -EIO; 55 return 0; 56 } 57 58 static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) 59 { 60 /* 61 * We protect against freezing so that we don't create dirty buffers 62 * on frozen filesystem. 63 */ 64 scoped_guard(super_write, sb) 65 return write_mmp_block_thawed(sb, bh); 66 } 67 68 /* 69 * Read the MMP block. It _must_ be read from disk and hence we clear the 70 * uptodate flag on the buffer. 71 */ 72 static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, 73 ext4_fsblk_t mmp_block) 74 { 75 struct mmp_struct *mmp; 76 int ret; 77 78 if (*bh) 79 clear_buffer_uptodate(*bh); 80 81 /* This would be sb_bread(sb, mmp_block), except we need to be sure 82 * that the MD RAID device cache has been bypassed, and that the read 83 * is not blocked in the elevator. */ 84 if (!*bh) { 85 *bh = sb_getblk(sb, mmp_block); 86 if (!*bh) { 87 ret = -ENOMEM; 88 goto warn_exit; 89 } 90 } 91 92 lock_buffer(*bh); 93 ret = ext4_read_bh(*bh, REQ_META | REQ_PRIO, NULL, false); 94 if (ret) 95 goto warn_exit; 96 97 mmp = (struct mmp_struct *)((*bh)->b_data); 98 if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) { 99 ret = -EFSCORRUPTED; 100 goto warn_exit; 101 } 102 if (!ext4_mmp_csum_verify(sb, mmp)) { 103 ret = -EFSBADCRC; 104 goto warn_exit; 105 } 106 return 0; 107 warn_exit: 108 brelse(*bh); 109 *bh = NULL; 110 ext4_warning(sb, "Error %d while reading MMP block %llu", 111 ret, mmp_block); 112 return ret; 113 } 114 115 /* 116 * Dump as much information as possible to help the admin. 117 */ 118 void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, 119 const char *function, unsigned int line, const char *msg) 120 { 121 __ext4_warning(sb, function, line, "%s", msg); 122 __ext4_warning(sb, function, line, 123 "MMP failure info: last update time: %llu, last update node: %.*s, last update device: %.*s", 124 (unsigned long long)le64_to_cpu(mmp->mmp_time), 125 (int)sizeof(mmp->mmp_nodename), mmp->mmp_nodename, 126 (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname); 127 } 128 129 /* 130 * kmmpd will update the MMP sequence every s_mmp_update_interval seconds 131 */ 132 static int kmmpd(void *data) 133 { 134 struct super_block *sb = data; 135 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 136 struct buffer_head *bh = EXT4_SB(sb)->s_mmp_bh; 137 struct mmp_struct *mmp; 138 ext4_fsblk_t mmp_block; 139 u32 seq = 0; 140 unsigned long failed_writes = 0; 141 int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval); 142 unsigned mmp_check_interval; 143 unsigned long last_update_time; 144 unsigned long diff; 145 int retval = 0; 146 147 mmp_block = le64_to_cpu(es->s_mmp_block); 148 mmp = (struct mmp_struct *)(bh->b_data); 149 mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); 150 /* 151 * Start with the higher mmp_check_interval and reduce it if 152 * the MMP block is being updated on time. 153 */ 154 mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval, 155 EXT4_MMP_MIN_CHECK_INTERVAL); 156 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); 157 158 memcpy(mmp->mmp_nodename, init_utsname()->nodename, 159 sizeof(mmp->mmp_nodename)); 160 161 while (!kthread_should_stop() && !ext4_emergency_state(sb)) { 162 if (!ext4_has_feature_mmp(sb)) { 163 ext4_warning(sb, "kmmpd being stopped since MMP feature" 164 " has been disabled."); 165 goto wait_to_exit; 166 } 167 if (++seq > EXT4_MMP_SEQ_MAX) 168 seq = 1; 169 170 mmp->mmp_seq = cpu_to_le32(seq); 171 mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); 172 last_update_time = jiffies; 173 174 retval = write_mmp_block(sb, bh); 175 /* 176 * Don't spew too many error messages. Print one every 177 * (s_mmp_update_interval * 60) seconds. 178 */ 179 if (retval) { 180 if ((failed_writes % 60) == 0) { 181 ext4_error_err(sb, -retval, 182 "Error writing to MMP block"); 183 } 184 failed_writes++; 185 } 186 187 diff = jiffies - last_update_time; 188 if (diff < mmp_update_interval * HZ) 189 schedule_timeout_interruptible(mmp_update_interval * 190 HZ - diff); 191 192 /* 193 * We need to make sure that more than mmp_check_interval 194 * seconds have not passed since writing. If that has happened 195 * we need to check if the MMP block is as we left it. 196 */ 197 diff = jiffies - last_update_time; 198 if (diff > mmp_check_interval * HZ) { 199 struct buffer_head *bh_check = NULL; 200 struct mmp_struct *mmp_check; 201 202 retval = read_mmp_block(sb, &bh_check, mmp_block); 203 if (retval) { 204 ext4_error_err(sb, -retval, 205 "error reading MMP data: %d", 206 retval); 207 goto wait_to_exit; 208 } 209 210 mmp_check = (struct mmp_struct *)(bh_check->b_data); 211 if (mmp->mmp_seq != mmp_check->mmp_seq || 212 memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename, 213 sizeof(mmp->mmp_nodename))) { 214 dump_mmp_msg(sb, mmp_check, 215 "Error while updating MMP info. " 216 "The filesystem seems to have been" 217 " multiply mounted."); 218 ext4_error_err(sb, EBUSY, "abort"); 219 put_bh(bh_check); 220 retval = -EBUSY; 221 goto wait_to_exit; 222 } 223 put_bh(bh_check); 224 } 225 226 /* 227 * Adjust the mmp_check_interval depending on how much time 228 * it took for the MMP block to be written. 229 */ 230 mmp_check_interval = clamp(EXT4_MMP_CHECK_MULT * diff / HZ, 231 EXT4_MMP_MIN_CHECK_INTERVAL, 232 EXT4_MMP_MAX_CHECK_INTERVAL); 233 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); 234 } 235 236 /* 237 * Unmount seems to be clean. 238 */ 239 mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN); 240 mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); 241 242 retval = write_mmp_block(sb, bh); 243 244 wait_to_exit: 245 while (!kthread_should_stop()) { 246 set_current_state(TASK_INTERRUPTIBLE); 247 if (!kthread_should_stop()) 248 schedule(); 249 } 250 set_current_state(TASK_RUNNING); 251 return retval; 252 } 253 254 void ext4_stop_mmpd(struct ext4_sb_info *sbi) 255 { 256 if (sbi->s_mmp_tsk) { 257 kthread_stop(sbi->s_mmp_tsk); 258 brelse(sbi->s_mmp_bh); 259 sbi->s_mmp_tsk = NULL; 260 } 261 } 262 263 /* 264 * Get a random new sequence number but make sure it is not greater than 265 * EXT4_MMP_SEQ_MAX. 266 */ 267 static unsigned int mmp_new_seq(void) 268 { 269 return get_random_u32_below(EXT4_MMP_SEQ_MAX + 1); 270 } 271 272 /* 273 * Protect the filesystem from being mounted more than once. 274 */ 275 int ext4_multi_mount_protect(struct super_block *sb, 276 ext4_fsblk_t mmp_block) 277 { 278 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 279 struct buffer_head *bh = NULL; 280 struct mmp_struct *mmp = NULL; 281 u32 seq; 282 unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval); 283 unsigned int wait_time = 0; 284 int retval; 285 286 if (mmp_block < le32_to_cpu(es->s_first_data_block) || 287 mmp_block >= ext4_blocks_count(es)) { 288 ext4_warning(sb, "Invalid MMP block in superblock"); 289 retval = -EINVAL; 290 goto failed; 291 } 292 293 retval = read_mmp_block(sb, &bh, mmp_block); 294 if (retval) 295 goto failed; 296 297 mmp = (struct mmp_struct *)(bh->b_data); 298 299 if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL) 300 mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL; 301 302 /* 303 * If check_interval in MMP block is larger, use that instead of 304 * update_interval from the superblock. 305 */ 306 if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval) 307 mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval); 308 309 seq = le32_to_cpu(mmp->mmp_seq); 310 if (seq == EXT4_MMP_SEQ_CLEAN) 311 goto skip; 312 313 if (seq == EXT4_MMP_SEQ_FSCK) { 314 dump_mmp_msg(sb, mmp, "fsck is running on the filesystem"); 315 retval = -EBUSY; 316 goto failed; 317 } 318 319 wait_time = min(mmp_check_interval * 2 + 1, 320 mmp_check_interval + 60); 321 322 /* Print MMP interval if more than 20 secs. */ 323 if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4) 324 ext4_warning(sb, "MMP interval %u higher than expected, please" 325 " wait.\n", wait_time * 2); 326 327 if (schedule_timeout_interruptible(HZ * wait_time) != 0) { 328 ext4_warning(sb, "MMP startup interrupted, failing mount\n"); 329 retval = -ETIMEDOUT; 330 goto failed; 331 } 332 333 retval = read_mmp_block(sb, &bh, mmp_block); 334 if (retval) 335 goto failed; 336 mmp = (struct mmp_struct *)(bh->b_data); 337 if (seq != le32_to_cpu(mmp->mmp_seq)) { 338 dump_mmp_msg(sb, mmp, 339 "Device is already active on another node."); 340 retval = -EBUSY; 341 goto failed; 342 } 343 344 skip: 345 /* 346 * write a new random sequence number. 347 */ 348 seq = mmp_new_seq(); 349 mmp->mmp_seq = cpu_to_le32(seq); 350 351 /* 352 * On mount / remount we are protected against fs freezing (by s_umount 353 * semaphore) and grabbing freeze protection upsets lockdep 354 */ 355 retval = write_mmp_block_thawed(sb, bh); 356 if (retval) 357 goto failed; 358 359 /* 360 * wait for MMP interval and check mmp_seq. 361 */ 362 if (schedule_timeout_interruptible(HZ * wait_time) != 0) { 363 ext4_warning(sb, "MMP startup interrupted, failing mount"); 364 retval = -ETIMEDOUT; 365 goto failed; 366 } 367 368 retval = read_mmp_block(sb, &bh, mmp_block); 369 if (retval) 370 goto failed; 371 mmp = (struct mmp_struct *)(bh->b_data); 372 if (seq != le32_to_cpu(mmp->mmp_seq)) { 373 dump_mmp_msg(sb, mmp, 374 "Device is already active on another node."); 375 retval = -EBUSY; 376 goto failed; 377 } 378 379 EXT4_SB(sb)->s_mmp_bh = bh; 380 381 BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE); 382 snprintf(mmp->mmp_bdevname, sizeof(mmp->mmp_bdevname), 383 "%pg", bh->b_bdev); 384 385 /* 386 * Start a kernel thread to update the MMP block periodically. 387 */ 388 EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, sb, "kmmpd-%.*s", 389 (int)sizeof(mmp->mmp_bdevname), 390 mmp->mmp_bdevname); 391 if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) { 392 EXT4_SB(sb)->s_mmp_tsk = NULL; 393 ext4_warning(sb, "Unable to create kmmpd thread for %s.", 394 sb->s_id); 395 retval = -ENOMEM; 396 goto failed; 397 } 398 399 return 0; 400 401 failed: 402 brelse(bh); 403 return retval; 404 } 405