1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "btree_cache.h" 5 #include "btree_update.h" 6 #include "dirent.h" 7 #include "fsck.h" 8 #include "str_hash.h" 9 #include "subvolume.h" 10 11 static int bch2_dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dirent d) 12 { 13 if (d.v->d_type == DT_SUBVOL) { 14 struct bch_subvolume subvol; 15 int ret = bch2_subvolume_get(trans, le32_to_cpu(d.v->d_child_subvol), 16 false, &subvol); 17 if (ret && !bch2_err_matches(ret, ENOENT)) 18 return ret; 19 return !ret; 20 } else { 21 struct btree_iter iter; 22 struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, 23 SPOS(0, le64_to_cpu(d.v->d_inum), d.k->p.snapshot), 0); 24 int ret = bkey_err(k); 25 if (ret) 26 return ret; 27 28 ret = bkey_is_inode(k.k); 29 bch2_trans_iter_exit(trans, &iter); 30 return ret; 31 } 32 } 33 34 static int bch2_fsck_rename_dirent(struct btree_trans *trans, 35 struct snapshots_seen *s, 36 const struct bch_hash_desc desc, 37 struct bch_hash_info *hash_info, 38 struct bkey_s_c_dirent old, 39 bool *updated_before_k_pos) 40 { 41 struct bch_fs *c = trans->c; 42 struct qstr old_name = bch2_dirent_get_name(old); 43 struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, BKEY_U64s_MAX * sizeof(u64)); 44 int ret = PTR_ERR_OR_ZERO(new); 45 if (ret) 46 return ret; 47 48 bkey_dirent_init(&new->k_i); 49 dirent_copy_target(new, old); 50 new->k.p = old.k->p; 51 52 char *renamed_buf = bch2_trans_kmalloc(trans, old_name.len + 20); 53 ret = PTR_ERR_OR_ZERO(renamed_buf); 54 if (ret) 55 return ret; 56 57 for (unsigned i = 0; i < 1000; i++) { 58 new->k.u64s = BKEY_U64s_MAX; 59 60 struct qstr renamed_name = (struct qstr) QSTR_INIT(renamed_buf, 61 sprintf(renamed_buf, "%.*s.fsck_renamed-%u", 62 old_name.len, old_name.name, i)); 63 64 ret = bch2_dirent_init_name(c, new, hash_info, &renamed_name, NULL); 65 if (ret) 66 return ret; 67 68 ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info, 69 (subvol_inum) { 0, old.k->p.inode }, 70 old.k->p.snapshot, &new->k_i, 71 BTREE_UPDATE_internal_snapshot_node| 72 STR_HASH_must_create); 73 if (ret && !bch2_err_matches(ret, EEXIST)) 74 break; 75 if (!ret) { 76 if (bpos_lt(new->k.p, old.k->p)) 77 *updated_before_k_pos = true; 78 break; 79 } 80 } 81 82 ret = ret ?: bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i); 83 bch_err_fn(c, ret); 84 return ret; 85 } 86 87 static noinline int hash_pick_winner(struct btree_trans *trans, 88 const struct bch_hash_desc desc, 89 struct bch_hash_info *hash_info, 90 struct bkey_s_c k1, 91 struct bkey_s_c k2) 92 { 93 if (bkey_val_bytes(k1.k) == bkey_val_bytes(k2.k) && 94 !memcmp(k1.v, k2.v, bkey_val_bytes(k1.k))) 95 return 0; 96 97 switch (desc.btree_id) { 98 case BTREE_ID_dirents: { 99 int ret = bch2_dirent_has_target(trans, bkey_s_c_to_dirent(k1)); 100 if (ret < 0) 101 return ret; 102 if (!ret) 103 return 0; 104 105 ret = bch2_dirent_has_target(trans, bkey_s_c_to_dirent(k2)); 106 if (ret < 0) 107 return ret; 108 if (!ret) 109 return 1; 110 return 2; 111 } 112 default: 113 return 0; 114 } 115 } 116 117 /* 118 * str_hash lookups across snapshots break in wild ways if hash_info in 119 * different snapshot versions doesn't match - so if we find one mismatch, check 120 * them all 121 */ 122 int bch2_repair_inode_hash_info(struct btree_trans *trans, 123 struct bch_inode_unpacked *snapshot_root) 124 { 125 struct bch_fs *c = trans->c; 126 struct btree_iter iter; 127 struct bkey_s_c k; 128 struct printbuf buf = PRINTBUF; 129 bool need_commit = false; 130 int ret = 0; 131 132 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, 133 POS(0, snapshot_root->bi_inum), 134 BTREE_ITER_all_snapshots, k, ret) { 135 if (bpos_ge(k.k->p, SPOS(0, snapshot_root->bi_inum, snapshot_root->bi_snapshot))) 136 break; 137 if (!bkey_is_inode(k.k)) 138 continue; 139 140 struct bch_inode_unpacked inode; 141 ret = bch2_inode_unpack(k, &inode); 142 if (ret) 143 break; 144 145 if (inode.bi_hash_seed == snapshot_root->bi_hash_seed && 146 INODE_STR_HASH(&inode) == INODE_STR_HASH(snapshot_root)) { 147 #ifdef CONFIG_BCACHEFS_DEBUG 148 struct bch_hash_info hash1 = bch2_hash_info_init(c, snapshot_root); 149 struct bch_hash_info hash2 = bch2_hash_info_init(c, &inode); 150 151 BUG_ON(hash1.type != hash2.type || 152 memcmp(&hash1.siphash_key, 153 &hash2.siphash_key, 154 sizeof(hash1.siphash_key))); 155 #endif 156 continue; 157 } 158 159 printbuf_reset(&buf); 160 prt_printf(&buf, "inode %llu hash info in snapshots %u %u don't match\n", 161 snapshot_root->bi_inum, 162 inode.bi_snapshot, 163 snapshot_root->bi_snapshot); 164 165 bch2_prt_str_hash_type(&buf, INODE_STR_HASH(&inode)); 166 prt_printf(&buf, " %llx\n", inode.bi_hash_seed); 167 168 bch2_prt_str_hash_type(&buf, INODE_STR_HASH(snapshot_root)); 169 prt_printf(&buf, " %llx", snapshot_root->bi_hash_seed); 170 171 if (fsck_err(trans, inode_snapshot_mismatch, "%s", buf.buf)) { 172 inode.bi_hash_seed = snapshot_root->bi_hash_seed; 173 SET_INODE_STR_HASH(&inode, INODE_STR_HASH(snapshot_root)); 174 175 ret = __bch2_fsck_write_inode(trans, &inode); 176 if (ret) 177 break; 178 need_commit = true; 179 } 180 } 181 182 if (ret) 183 goto err; 184 185 if (!need_commit) { 186 struct printbuf buf = PRINTBUF; 187 bch2_log_msg_start(c, &buf); 188 189 prt_printf(&buf, "inode %llu hash info mismatch with root, but mismatch not found\n", 190 snapshot_root->bi_inum); 191 192 prt_printf(&buf, "root snapshot %u ", snapshot_root->bi_snapshot); 193 bch2_prt_str_hash_type(&buf, INODE_STR_HASH(snapshot_root)); 194 prt_printf(&buf, " %llx\n", snapshot_root->bi_hash_seed); 195 #if 0 196 prt_printf(&buf, "vs snapshot %u ", hash_info->inum_snapshot); 197 bch2_prt_str_hash_type(&buf, hash_info->type); 198 prt_printf(&buf, " %llx %llx", hash_info->siphash_key.k0, hash_info->siphash_key.k1); 199 #endif 200 bch2_print_str(c, KERN_ERR, buf.buf); 201 printbuf_exit(&buf); 202 ret = bch_err_throw(c, fsck_repair_unimplemented); 203 goto err; 204 } 205 206 ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: 207 -BCH_ERR_transaction_restart_nested; 208 err: 209 fsck_err: 210 printbuf_exit(&buf); 211 bch2_trans_iter_exit(trans, &iter); 212 return ret; 213 } 214 215 /* 216 * All versions of the same inode in different snapshots must have the same hash 217 * seed/type: verify that the hash info we're using matches the root 218 */ 219 static noinline int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inum, 220 struct bch_hash_info *hash_info) 221 { 222 struct bch_inode_unpacked snapshot_root; 223 int ret = bch2_inode_find_snapshot_root(trans, inum, &snapshot_root); 224 if (ret) 225 return ret; 226 227 struct bch_hash_info hash_root = bch2_hash_info_init(trans->c, &snapshot_root); 228 if (hash_info->type != hash_root.type || 229 memcmp(&hash_info->siphash_key, 230 &hash_root.siphash_key, 231 sizeof(hash_root.siphash_key))) 232 ret = bch2_repair_inode_hash_info(trans, &snapshot_root); 233 234 return ret; 235 } 236 237 /* Put a str_hash key in its proper location, checking for duplicates */ 238 int bch2_str_hash_repair_key(struct btree_trans *trans, 239 struct snapshots_seen *s, 240 const struct bch_hash_desc *desc, 241 struct bch_hash_info *hash_info, 242 struct btree_iter *k_iter, struct bkey_s_c k, 243 struct btree_iter *dup_iter, struct bkey_s_c dup_k, 244 bool *updated_before_k_pos) 245 { 246 struct bch_fs *c = trans->c; 247 struct printbuf buf = PRINTBUF; 248 bool free_snapshots_seen = false; 249 int ret = 0; 250 251 if (!s) { 252 s = bch2_trans_kmalloc(trans, sizeof(*s)); 253 ret = PTR_ERR_OR_ZERO(s); 254 if (ret) 255 goto out; 256 257 s->pos = k_iter->pos; 258 darray_init(&s->ids); 259 260 ret = bch2_get_snapshot_overwrites(trans, desc->btree_id, k_iter->pos, &s->ids); 261 if (ret) 262 goto out; 263 264 free_snapshots_seen = true; 265 } 266 267 if (!dup_k.k) { 268 struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); 269 ret = PTR_ERR_OR_ZERO(new); 270 if (ret) 271 goto out; 272 273 dup_k = bch2_hash_set_or_get_in_snapshot(trans, dup_iter, *desc, hash_info, 274 (subvol_inum) { 0, new->k.p.inode }, 275 new->k.p.snapshot, new, 276 STR_HASH_must_create| 277 BTREE_ITER_with_updates| 278 BTREE_UPDATE_internal_snapshot_node); 279 ret = bkey_err(dup_k); 280 if (ret) 281 goto out; 282 if (dup_k.k) 283 goto duplicate_entries; 284 285 if (bpos_lt(new->k.p, k.k->p)) 286 *updated_before_k_pos = true; 287 288 ret = bch2_insert_snapshot_whiteouts(trans, desc->btree_id, 289 k_iter->pos, new->k.p) ?: 290 bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 291 BTREE_ITER_with_updates| 292 BTREE_UPDATE_internal_snapshot_node) ?: 293 bch2_fsck_update_backpointers(trans, s, *desc, hash_info, new) ?: 294 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: 295 -BCH_ERR_transaction_restart_commit; 296 } else { 297 duplicate_entries: 298 ret = hash_pick_winner(trans, *desc, hash_info, k, dup_k); 299 if (ret < 0) 300 goto out; 301 302 if (!fsck_err(trans, hash_table_key_duplicate, 303 "duplicate hash table keys%s:\n%s", 304 ret != 2 ? "" : ", both point to valid inodes", 305 (printbuf_reset(&buf), 306 bch2_bkey_val_to_text(&buf, c, k), 307 prt_newline(&buf), 308 bch2_bkey_val_to_text(&buf, c, dup_k), 309 buf.buf))) 310 goto out; 311 312 switch (ret) { 313 case 0: 314 ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0); 315 break; 316 case 1: 317 ret = bch2_hash_delete_at(trans, *desc, hash_info, dup_iter, 0); 318 break; 319 case 2: 320 ret = bch2_fsck_rename_dirent(trans, s, *desc, hash_info, 321 bkey_s_c_to_dirent(k), 322 updated_before_k_pos) ?: 323 bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 324 BTREE_ITER_with_updates); 325 goto out; 326 } 327 328 ret = bch2_trans_commit(trans, NULL, NULL, 0) ?: 329 -BCH_ERR_transaction_restart_commit; 330 } 331 out: 332 fsck_err: 333 bch2_trans_iter_exit(trans, dup_iter); 334 printbuf_exit(&buf); 335 if (free_snapshots_seen) 336 darray_exit(&s->ids); 337 return ret; 338 } 339 340 int __bch2_str_hash_check_key(struct btree_trans *trans, 341 struct snapshots_seen *s, 342 const struct bch_hash_desc *desc, 343 struct bch_hash_info *hash_info, 344 struct btree_iter *k_iter, struct bkey_s_c hash_k, 345 bool *updated_before_k_pos) 346 { 347 struct bch_fs *c = trans->c; 348 struct btree_iter iter = {}; 349 struct printbuf buf = PRINTBUF; 350 struct bkey_s_c k; 351 int ret = 0; 352 353 u64 hash = desc->hash_bkey(hash_info, hash_k); 354 if (hash_k.k->p.offset < hash) 355 goto bad_hash; 356 357 for_each_btree_key_norestart(trans, iter, desc->btree_id, 358 SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), 359 BTREE_ITER_slots| 360 BTREE_ITER_with_updates, k, ret) { 361 if (bkey_eq(k.k->p, hash_k.k->p)) 362 break; 363 364 if (k.k->type == desc->key_type && 365 !desc->cmp_bkey(k, hash_k)) { 366 ret = check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, 367 hash_info) ?: 368 bch2_str_hash_repair_key(trans, s, desc, hash_info, 369 k_iter, hash_k, 370 &iter, k, updated_before_k_pos); 371 break; 372 } 373 374 if (bkey_deleted(k.k)) 375 goto bad_hash; 376 } 377 bch2_trans_iter_exit(trans, &iter); 378 out: 379 fsck_err: 380 printbuf_exit(&buf); 381 return ret; 382 bad_hash: 383 bch2_trans_iter_exit(trans, &iter); 384 /* 385 * Before doing any repair, check hash_info itself: 386 */ 387 ret = check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, hash_info); 388 if (ret) 389 goto out; 390 391 if (fsck_err(trans, hash_table_key_wrong_offset, 392 "hash table key at wrong offset: should be at %llu\n%s", 393 hash, 394 (bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) 395 ret = bch2_str_hash_repair_key(trans, s, desc, hash_info, 396 k_iter, hash_k, 397 &iter, bkey_s_c_null, 398 updated_before_k_pos); 399 goto out; 400 } 401