1 // SPDX-License-Identifier: GPL-2.0
2
3 #include "bcachefs.h"
4 #include "btree_cache.h"
5 #include "btree_update.h"
6 #include "dirent.h"
7 #include "fsck.h"
8 #include "str_hash.h"
9 #include "subvolume.h"
10
bch2_dirent_has_target(struct btree_trans * trans,struct bkey_s_c_dirent d)11 static int bch2_dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dirent d)
12 {
13 if (d.v->d_type == DT_SUBVOL) {
14 struct bch_subvolume subvol;
15 int ret = bch2_subvolume_get(trans, le32_to_cpu(d.v->d_child_subvol),
16 false, &subvol);
17 if (ret && !bch2_err_matches(ret, ENOENT))
18 return ret;
19 return !ret;
20 } else {
21 struct btree_iter iter;
22 struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
23 SPOS(0, le64_to_cpu(d.v->d_inum), d.k->p.snapshot), 0);
24 int ret = bkey_err(k);
25 if (ret)
26 return ret;
27
28 ret = bkey_is_inode(k.k);
29 bch2_trans_iter_exit(trans, &iter);
30 return ret;
31 }
32 }
33
bch2_fsck_rename_dirent(struct btree_trans * trans,struct snapshots_seen * s,const struct bch_hash_desc desc,struct bch_hash_info * hash_info,struct bkey_s_c_dirent old,bool * updated_before_k_pos)34 static int bch2_fsck_rename_dirent(struct btree_trans *trans,
35 struct snapshots_seen *s,
36 const struct bch_hash_desc desc,
37 struct bch_hash_info *hash_info,
38 struct bkey_s_c_dirent old,
39 bool *updated_before_k_pos)
40 {
41 struct bch_fs *c = trans->c;
42 struct qstr old_name = bch2_dirent_get_name(old);
43 struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, BKEY_U64s_MAX * sizeof(u64));
44 int ret = PTR_ERR_OR_ZERO(new);
45 if (ret)
46 return ret;
47
48 bkey_dirent_init(&new->k_i);
49 dirent_copy_target(new, old);
50 new->k.p = old.k->p;
51
52 char *renamed_buf = bch2_trans_kmalloc(trans, old_name.len + 20);
53 ret = PTR_ERR_OR_ZERO(renamed_buf);
54 if (ret)
55 return ret;
56
57 for (unsigned i = 0; i < 1000; i++) {
58 new->k.u64s = BKEY_U64s_MAX;
59
60 struct qstr renamed_name = (struct qstr) QSTR_INIT(renamed_buf,
61 sprintf(renamed_buf, "%.*s.fsck_renamed-%u",
62 old_name.len, old_name.name, i));
63
64 ret = bch2_dirent_init_name(c, new, hash_info, &renamed_name, NULL);
65 if (ret)
66 return ret;
67
68 ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
69 (subvol_inum) { 0, old.k->p.inode },
70 old.k->p.snapshot, &new->k_i,
71 BTREE_UPDATE_internal_snapshot_node|
72 STR_HASH_must_create);
73 if (ret && !bch2_err_matches(ret, EEXIST))
74 break;
75 if (!ret) {
76 if (bpos_lt(new->k.p, old.k->p))
77 *updated_before_k_pos = true;
78 break;
79 }
80 }
81
82 ret = ret ?: bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i);
83 bch_err_fn(c, ret);
84 return ret;
85 }
86
hash_pick_winner(struct btree_trans * trans,const struct bch_hash_desc desc,struct bch_hash_info * hash_info,struct bkey_s_c k1,struct bkey_s_c k2)87 static noinline int hash_pick_winner(struct btree_trans *trans,
88 const struct bch_hash_desc desc,
89 struct bch_hash_info *hash_info,
90 struct bkey_s_c k1,
91 struct bkey_s_c k2)
92 {
93 if (bkey_val_bytes(k1.k) == bkey_val_bytes(k2.k) &&
94 !memcmp(k1.v, k2.v, bkey_val_bytes(k1.k)))
95 return 0;
96
97 switch (desc.btree_id) {
98 case BTREE_ID_dirents: {
99 int ret = bch2_dirent_has_target(trans, bkey_s_c_to_dirent(k1));
100 if (ret < 0)
101 return ret;
102 if (!ret)
103 return 0;
104
105 ret = bch2_dirent_has_target(trans, bkey_s_c_to_dirent(k2));
106 if (ret < 0)
107 return ret;
108 if (!ret)
109 return 1;
110 return 2;
111 }
112 default:
113 return 0;
114 }
115 }
116
117 /*
118 * str_hash lookups across snapshots break in wild ways if hash_info in
119 * different snapshot versions doesn't match - so if we find one mismatch, check
120 * them all
121 */
bch2_repair_inode_hash_info(struct btree_trans * trans,struct bch_inode_unpacked * snapshot_root)122 int bch2_repair_inode_hash_info(struct btree_trans *trans,
123 struct bch_inode_unpacked *snapshot_root)
124 {
125 struct bch_fs *c = trans->c;
126 struct btree_iter iter;
127 struct bkey_s_c k;
128 struct printbuf buf = PRINTBUF;
129 bool need_commit = false;
130 int ret = 0;
131
132 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes,
133 POS(0, snapshot_root->bi_inum),
134 BTREE_ITER_all_snapshots, k, ret) {
135 if (bpos_ge(k.k->p, SPOS(0, snapshot_root->bi_inum, snapshot_root->bi_snapshot)))
136 break;
137 if (!bkey_is_inode(k.k))
138 continue;
139
140 struct bch_inode_unpacked inode;
141 ret = bch2_inode_unpack(k, &inode);
142 if (ret)
143 break;
144
145 if (inode.bi_hash_seed == snapshot_root->bi_hash_seed &&
146 INODE_STR_HASH(&inode) == INODE_STR_HASH(snapshot_root)) {
147 #ifdef CONFIG_BCACHEFS_DEBUG
148 struct bch_hash_info hash1 = bch2_hash_info_init(c, snapshot_root);
149 struct bch_hash_info hash2 = bch2_hash_info_init(c, &inode);
150
151 BUG_ON(hash1.type != hash2.type ||
152 memcmp(&hash1.siphash_key,
153 &hash2.siphash_key,
154 sizeof(hash1.siphash_key)));
155 #endif
156 continue;
157 }
158
159 printbuf_reset(&buf);
160 prt_printf(&buf, "inode %llu hash info in snapshots %u %u don't match\n",
161 snapshot_root->bi_inum,
162 inode.bi_snapshot,
163 snapshot_root->bi_snapshot);
164
165 bch2_prt_str_hash_type(&buf, INODE_STR_HASH(&inode));
166 prt_printf(&buf, " %llx\n", inode.bi_hash_seed);
167
168 bch2_prt_str_hash_type(&buf, INODE_STR_HASH(snapshot_root));
169 prt_printf(&buf, " %llx", snapshot_root->bi_hash_seed);
170
171 if (fsck_err(trans, inode_snapshot_mismatch, "%s", buf.buf)) {
172 inode.bi_hash_seed = snapshot_root->bi_hash_seed;
173 SET_INODE_STR_HASH(&inode, INODE_STR_HASH(snapshot_root));
174
175 ret = __bch2_fsck_write_inode(trans, &inode);
176 if (ret)
177 break;
178 need_commit = true;
179 }
180 }
181
182 if (ret)
183 goto err;
184
185 if (!need_commit) {
186 struct printbuf buf = PRINTBUF;
187 bch2_log_msg_start(c, &buf);
188
189 prt_printf(&buf, "inode %llu hash info mismatch with root, but mismatch not found\n",
190 snapshot_root->bi_inum);
191
192 prt_printf(&buf, "root snapshot %u ", snapshot_root->bi_snapshot);
193 bch2_prt_str_hash_type(&buf, INODE_STR_HASH(snapshot_root));
194 prt_printf(&buf, " %llx\n", snapshot_root->bi_hash_seed);
195 #if 0
196 prt_printf(&buf, "vs snapshot %u ", hash_info->inum_snapshot);
197 bch2_prt_str_hash_type(&buf, hash_info->type);
198 prt_printf(&buf, " %llx %llx", hash_info->siphash_key.k0, hash_info->siphash_key.k1);
199 #endif
200 bch2_print_str(c, KERN_ERR, buf.buf);
201 printbuf_exit(&buf);
202 ret = bch_err_throw(c, fsck_repair_unimplemented);
203 goto err;
204 }
205
206 ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
207 -BCH_ERR_transaction_restart_nested;
208 err:
209 fsck_err:
210 printbuf_exit(&buf);
211 bch2_trans_iter_exit(trans, &iter);
212 return ret;
213 }
214
215 /*
216 * All versions of the same inode in different snapshots must have the same hash
217 * seed/type: verify that the hash info we're using matches the root
218 */
check_inode_hash_info_matches_root(struct btree_trans * trans,u64 inum,struct bch_hash_info * hash_info)219 static noinline int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inum,
220 struct bch_hash_info *hash_info)
221 {
222 struct bch_inode_unpacked snapshot_root;
223 int ret = bch2_inode_find_snapshot_root(trans, inum, &snapshot_root);
224 if (ret)
225 return ret;
226
227 struct bch_hash_info hash_root = bch2_hash_info_init(trans->c, &snapshot_root);
228 if (hash_info->type != hash_root.type ||
229 memcmp(&hash_info->siphash_key,
230 &hash_root.siphash_key,
231 sizeof(hash_root.siphash_key)))
232 ret = bch2_repair_inode_hash_info(trans, &snapshot_root);
233
234 return ret;
235 }
236
237 /* Put a str_hash key in its proper location, checking for duplicates */
bch2_str_hash_repair_key(struct btree_trans * trans,struct snapshots_seen * s,const struct bch_hash_desc * desc,struct bch_hash_info * hash_info,struct btree_iter * k_iter,struct bkey_s_c k,struct btree_iter * dup_iter,struct bkey_s_c dup_k,bool * updated_before_k_pos)238 int bch2_str_hash_repair_key(struct btree_trans *trans,
239 struct snapshots_seen *s,
240 const struct bch_hash_desc *desc,
241 struct bch_hash_info *hash_info,
242 struct btree_iter *k_iter, struct bkey_s_c k,
243 struct btree_iter *dup_iter, struct bkey_s_c dup_k,
244 bool *updated_before_k_pos)
245 {
246 struct bch_fs *c = trans->c;
247 struct printbuf buf = PRINTBUF;
248 bool free_snapshots_seen = false;
249 int ret = 0;
250
251 if (!s) {
252 s = bch2_trans_kmalloc(trans, sizeof(*s));
253 ret = PTR_ERR_OR_ZERO(s);
254 if (ret)
255 goto out;
256
257 s->pos = k_iter->pos;
258 darray_init(&s->ids);
259
260 ret = bch2_get_snapshot_overwrites(trans, desc->btree_id, k_iter->pos, &s->ids);
261 if (ret)
262 goto out;
263
264 free_snapshots_seen = true;
265 }
266
267 if (!dup_k.k) {
268 struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
269 ret = PTR_ERR_OR_ZERO(new);
270 if (ret)
271 goto out;
272
273 dup_k = bch2_hash_set_or_get_in_snapshot(trans, dup_iter, *desc, hash_info,
274 (subvol_inum) { 0, new->k.p.inode },
275 new->k.p.snapshot, new,
276 STR_HASH_must_create|
277 BTREE_ITER_with_updates|
278 BTREE_UPDATE_internal_snapshot_node);
279 ret = bkey_err(dup_k);
280 if (ret)
281 goto out;
282 if (dup_k.k)
283 goto duplicate_entries;
284
285 if (bpos_lt(new->k.p, k.k->p))
286 *updated_before_k_pos = true;
287
288 ret = bch2_insert_snapshot_whiteouts(trans, desc->btree_id,
289 k_iter->pos, new->k.p) ?:
290 bch2_hash_delete_at(trans, *desc, hash_info, k_iter,
291 BTREE_ITER_with_updates|
292 BTREE_UPDATE_internal_snapshot_node) ?:
293 bch2_fsck_update_backpointers(trans, s, *desc, hash_info, new) ?:
294 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
295 -BCH_ERR_transaction_restart_commit;
296 } else {
297 duplicate_entries:
298 ret = hash_pick_winner(trans, *desc, hash_info, k, dup_k);
299 if (ret < 0)
300 goto out;
301
302 if (!fsck_err(trans, hash_table_key_duplicate,
303 "duplicate hash table keys%s:\n%s",
304 ret != 2 ? "" : ", both point to valid inodes",
305 (printbuf_reset(&buf),
306 bch2_bkey_val_to_text(&buf, c, k),
307 prt_newline(&buf),
308 bch2_bkey_val_to_text(&buf, c, dup_k),
309 buf.buf)))
310 goto out;
311
312 switch (ret) {
313 case 0:
314 ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0);
315 break;
316 case 1:
317 ret = bch2_hash_delete_at(trans, *desc, hash_info, dup_iter, 0);
318 break;
319 case 2:
320 ret = bch2_fsck_rename_dirent(trans, s, *desc, hash_info,
321 bkey_s_c_to_dirent(k),
322 updated_before_k_pos) ?:
323 bch2_hash_delete_at(trans, *desc, hash_info, k_iter,
324 BTREE_ITER_with_updates);
325 goto out;
326 }
327
328 ret = bch2_trans_commit(trans, NULL, NULL, 0) ?:
329 -BCH_ERR_transaction_restart_commit;
330 }
331 out:
332 fsck_err:
333 bch2_trans_iter_exit(trans, dup_iter);
334 printbuf_exit(&buf);
335 if (free_snapshots_seen)
336 darray_exit(&s->ids);
337 return ret;
338 }
339
__bch2_str_hash_check_key(struct btree_trans * trans,struct snapshots_seen * s,const struct bch_hash_desc * desc,struct bch_hash_info * hash_info,struct btree_iter * k_iter,struct bkey_s_c hash_k,bool * updated_before_k_pos)340 int __bch2_str_hash_check_key(struct btree_trans *trans,
341 struct snapshots_seen *s,
342 const struct bch_hash_desc *desc,
343 struct bch_hash_info *hash_info,
344 struct btree_iter *k_iter, struct bkey_s_c hash_k,
345 bool *updated_before_k_pos)
346 {
347 struct bch_fs *c = trans->c;
348 struct btree_iter iter = {};
349 struct printbuf buf = PRINTBUF;
350 struct bkey_s_c k;
351 int ret = 0;
352
353 u64 hash = desc->hash_bkey(hash_info, hash_k);
354 if (hash_k.k->p.offset < hash)
355 goto bad_hash;
356
357 for_each_btree_key_norestart(trans, iter, desc->btree_id,
358 SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot),
359 BTREE_ITER_slots|
360 BTREE_ITER_with_updates, k, ret) {
361 if (bkey_eq(k.k->p, hash_k.k->p))
362 break;
363
364 if (k.k->type == desc->key_type &&
365 !desc->cmp_bkey(k, hash_k)) {
366 ret = check_inode_hash_info_matches_root(trans, hash_k.k->p.inode,
367 hash_info) ?:
368 bch2_str_hash_repair_key(trans, s, desc, hash_info,
369 k_iter, hash_k,
370 &iter, k, updated_before_k_pos);
371 break;
372 }
373
374 if (bkey_deleted(k.k))
375 goto bad_hash;
376 }
377 bch2_trans_iter_exit(trans, &iter);
378 out:
379 fsck_err:
380 printbuf_exit(&buf);
381 return ret;
382 bad_hash:
383 bch2_trans_iter_exit(trans, &iter);
384 /*
385 * Before doing any repair, check hash_info itself:
386 */
387 ret = check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, hash_info);
388 if (ret)
389 goto out;
390
391 if (fsck_err(trans, hash_table_key_wrong_offset,
392 "hash table key at wrong offset: should be at %llu\n%s",
393 hash,
394 (bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf)))
395 ret = bch2_str_hash_repair_key(trans, s, desc, hash_info,
396 k_iter, hash_k,
397 &iter, bkey_s_c_null,
398 updated_before_k_pos);
399 goto out;
400 }
401