xref: /linux/fs/bcachefs/str_hash.c (revision 482deed9dfa065cf3f68372dadac857541c7d504)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include "bcachefs.h"
4 #include "btree_cache.h"
5 #include "btree_update.h"
6 #include "dirent.h"
7 #include "fsck.h"
8 #include "str_hash.h"
9 #include "subvolume.h"
10 
bch2_dirent_has_target(struct btree_trans * trans,struct bkey_s_c_dirent d)11 static int bch2_dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dirent d)
12 {
13 	if (d.v->d_type == DT_SUBVOL) {
14 		struct bch_subvolume subvol;
15 		int ret = bch2_subvolume_get(trans, le32_to_cpu(d.v->d_child_subvol),
16 					     false, &subvol);
17 		if (ret && !bch2_err_matches(ret, ENOENT))
18 			return ret;
19 		return !ret;
20 	} else {
21 		struct btree_iter iter;
22 		struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
23 				SPOS(0, le64_to_cpu(d.v->d_inum), d.k->p.snapshot), 0);
24 		int ret = bkey_err(k);
25 		if (ret)
26 			return ret;
27 
28 		ret = bkey_is_inode(k.k);
29 		bch2_trans_iter_exit(trans, &iter);
30 		return ret;
31 	}
32 }
33 
bch2_fsck_rename_dirent(struct btree_trans * trans,struct snapshots_seen * s,const struct bch_hash_desc desc,struct bch_hash_info * hash_info,struct bkey_s_c_dirent old,bool * updated_before_k_pos)34 static int bch2_fsck_rename_dirent(struct btree_trans *trans,
35 				   struct snapshots_seen *s,
36 				   const struct bch_hash_desc desc,
37 				   struct bch_hash_info *hash_info,
38 				   struct bkey_s_c_dirent old,
39 				   bool *updated_before_k_pos)
40 {
41 	struct bch_fs *c = trans->c;
42 	struct qstr old_name = bch2_dirent_get_name(old);
43 	struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, BKEY_U64s_MAX * sizeof(u64));
44 	int ret = PTR_ERR_OR_ZERO(new);
45 	if (ret)
46 		return ret;
47 
48 	bkey_dirent_init(&new->k_i);
49 	dirent_copy_target(new, old);
50 	new->k.p = old.k->p;
51 
52 	char *renamed_buf = bch2_trans_kmalloc(trans, old_name.len + 20);
53 	ret = PTR_ERR_OR_ZERO(renamed_buf);
54 	if (ret)
55 		return ret;
56 
57 	for (unsigned i = 0; i < 1000; i++) {
58 		new->k.u64s = BKEY_U64s_MAX;
59 
60 		struct qstr renamed_name = (struct qstr) QSTR_INIT(renamed_buf,
61 					sprintf(renamed_buf, "%.*s.fsck_renamed-%u",
62 						old_name.len, old_name.name, i));
63 
64 		ret = bch2_dirent_init_name(c, new, hash_info, &renamed_name, NULL);
65 		if (ret)
66 			return ret;
67 
68 		ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
69 						(subvol_inum) { 0, old.k->p.inode },
70 						old.k->p.snapshot, &new->k_i,
71 						BTREE_UPDATE_internal_snapshot_node|
72 						STR_HASH_must_create);
73 		if (ret && !bch2_err_matches(ret, EEXIST))
74 			break;
75 		if (!ret) {
76 			if (bpos_lt(new->k.p, old.k->p))
77 				*updated_before_k_pos = true;
78 			break;
79 		}
80 	}
81 
82 	ret = ret ?: bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i);
83 	bch_err_fn(c, ret);
84 	return ret;
85 }
86 
hash_pick_winner(struct btree_trans * trans,const struct bch_hash_desc desc,struct bch_hash_info * hash_info,struct bkey_s_c k1,struct bkey_s_c k2)87 static noinline int hash_pick_winner(struct btree_trans *trans,
88 				     const struct bch_hash_desc desc,
89 				     struct bch_hash_info *hash_info,
90 				     struct bkey_s_c k1,
91 				     struct bkey_s_c k2)
92 {
93 	if (bkey_val_bytes(k1.k) == bkey_val_bytes(k2.k) &&
94 	    !memcmp(k1.v, k2.v, bkey_val_bytes(k1.k)))
95 		return 0;
96 
97 	switch (desc.btree_id) {
98 	case BTREE_ID_dirents: {
99 		int ret = bch2_dirent_has_target(trans, bkey_s_c_to_dirent(k1));
100 		if (ret < 0)
101 			return ret;
102 		if (!ret)
103 			return 0;
104 
105 		ret = bch2_dirent_has_target(trans, bkey_s_c_to_dirent(k2));
106 		if (ret < 0)
107 			return ret;
108 		if (!ret)
109 			return 1;
110 		return 2;
111 	}
112 	default:
113 		return 0;
114 	}
115 }
116 
117 /*
118  * str_hash lookups across snapshots break in wild ways if hash_info in
119  * different snapshot versions doesn't match - so if we find one mismatch, check
120  * them all
121  */
bch2_repair_inode_hash_info(struct btree_trans * trans,struct bch_inode_unpacked * snapshot_root)122 int bch2_repair_inode_hash_info(struct btree_trans *trans,
123 				struct bch_inode_unpacked *snapshot_root)
124 {
125 	struct bch_fs *c = trans->c;
126 	struct btree_iter iter;
127 	struct bkey_s_c k;
128 	struct printbuf buf = PRINTBUF;
129 	bool need_commit = false;
130 	int ret = 0;
131 
132 	for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes,
133 				     POS(0, snapshot_root->bi_inum),
134 				     BTREE_ITER_all_snapshots, k, ret) {
135 		if (bpos_ge(k.k->p, SPOS(0, snapshot_root->bi_inum, snapshot_root->bi_snapshot)))
136 			break;
137 		if (!bkey_is_inode(k.k))
138 			continue;
139 
140 		struct bch_inode_unpacked inode;
141 		ret = bch2_inode_unpack(k, &inode);
142 		if (ret)
143 			break;
144 
145 		if (inode.bi_hash_seed		== snapshot_root->bi_hash_seed &&
146 		    INODE_STR_HASH(&inode)	== INODE_STR_HASH(snapshot_root)) {
147 #ifdef CONFIG_BCACHEFS_DEBUG
148 			struct bch_hash_info hash1 = bch2_hash_info_init(c, snapshot_root);
149 			struct bch_hash_info hash2 = bch2_hash_info_init(c, &inode);
150 
151 			BUG_ON(hash1.type != hash2.type ||
152 			       memcmp(&hash1.siphash_key,
153 				      &hash2.siphash_key,
154 				      sizeof(hash1.siphash_key)));
155 #endif
156 			continue;
157 		}
158 
159 		printbuf_reset(&buf);
160 		prt_printf(&buf, "inode %llu hash info in snapshots %u %u don't match\n",
161 			   snapshot_root->bi_inum,
162 			   inode.bi_snapshot,
163 			   snapshot_root->bi_snapshot);
164 
165 		bch2_prt_str_hash_type(&buf, INODE_STR_HASH(&inode));
166 		prt_printf(&buf, " %llx\n", inode.bi_hash_seed);
167 
168 		bch2_prt_str_hash_type(&buf, INODE_STR_HASH(snapshot_root));
169 		prt_printf(&buf, " %llx", snapshot_root->bi_hash_seed);
170 
171 		if (fsck_err(trans, inode_snapshot_mismatch, "%s", buf.buf)) {
172 			inode.bi_hash_seed = snapshot_root->bi_hash_seed;
173 			SET_INODE_STR_HASH(&inode, INODE_STR_HASH(snapshot_root));
174 
175 			ret = __bch2_fsck_write_inode(trans, &inode);
176 			if (ret)
177 				break;
178 			need_commit = true;
179 		}
180 	}
181 
182 	if (ret)
183 		goto err;
184 
185 	if (!need_commit) {
186 		struct printbuf buf = PRINTBUF;
187 		bch2_log_msg_start(c, &buf);
188 
189 		prt_printf(&buf, "inode %llu hash info mismatch with root, but mismatch not found\n",
190 			   snapshot_root->bi_inum);
191 
192 		prt_printf(&buf, "root snapshot %u ", snapshot_root->bi_snapshot);
193 		bch2_prt_str_hash_type(&buf, INODE_STR_HASH(snapshot_root));
194 		prt_printf(&buf, " %llx\n", snapshot_root->bi_hash_seed);
195 #if 0
196 		prt_printf(&buf, "vs   snapshot %u ", hash_info->inum_snapshot);
197 		bch2_prt_str_hash_type(&buf, hash_info->type);
198 		prt_printf(&buf, " %llx %llx", hash_info->siphash_key.k0, hash_info->siphash_key.k1);
199 #endif
200 		bch2_print_str(c, KERN_ERR, buf.buf);
201 		printbuf_exit(&buf);
202 		ret = bch_err_throw(c, fsck_repair_unimplemented);
203 		goto err;
204 	}
205 
206 	ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
207 		-BCH_ERR_transaction_restart_nested;
208 err:
209 fsck_err:
210 	printbuf_exit(&buf);
211 	bch2_trans_iter_exit(trans, &iter);
212 	return ret;
213 }
214 
215 /*
216  * All versions of the same inode in different snapshots must have the same hash
217  * seed/type: verify that the hash info we're using matches the root
218  */
check_inode_hash_info_matches_root(struct btree_trans * trans,u64 inum,struct bch_hash_info * hash_info)219 static noinline int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inum,
220 						       struct bch_hash_info *hash_info)
221 {
222 	struct bch_inode_unpacked snapshot_root;
223 	int ret = bch2_inode_find_snapshot_root(trans, inum, &snapshot_root);
224 	if (ret)
225 		return ret;
226 
227 	struct bch_hash_info hash_root = bch2_hash_info_init(trans->c, &snapshot_root);
228 	if (hash_info->type != hash_root.type ||
229 	    memcmp(&hash_info->siphash_key,
230 		   &hash_root.siphash_key,
231 		   sizeof(hash_root.siphash_key)))
232 		ret = bch2_repair_inode_hash_info(trans, &snapshot_root);
233 
234 	return ret;
235 }
236 
237 /* Put a str_hash key in its proper location, checking for duplicates */
bch2_str_hash_repair_key(struct btree_trans * trans,struct snapshots_seen * s,const struct bch_hash_desc * desc,struct bch_hash_info * hash_info,struct btree_iter * k_iter,struct bkey_s_c k,struct btree_iter * dup_iter,struct bkey_s_c dup_k,bool * updated_before_k_pos)238 int bch2_str_hash_repair_key(struct btree_trans *trans,
239 			     struct snapshots_seen *s,
240 			     const struct bch_hash_desc *desc,
241 			     struct bch_hash_info *hash_info,
242 			     struct btree_iter *k_iter, struct bkey_s_c k,
243 			     struct btree_iter *dup_iter, struct bkey_s_c dup_k,
244 			     bool *updated_before_k_pos)
245 {
246 	struct bch_fs *c = trans->c;
247 	struct printbuf buf = PRINTBUF;
248 	bool free_snapshots_seen = false;
249 	int ret = 0;
250 
251 	if (!s) {
252 		s = bch2_trans_kmalloc(trans, sizeof(*s));
253 		ret = PTR_ERR_OR_ZERO(s);
254 		if (ret)
255 			goto out;
256 
257 		s->pos = k_iter->pos;
258 		darray_init(&s->ids);
259 
260 		ret = bch2_get_snapshot_overwrites(trans, desc->btree_id, k_iter->pos, &s->ids);
261 		if (ret)
262 			goto out;
263 
264 		free_snapshots_seen = true;
265 	}
266 
267 	if (!dup_k.k) {
268 		struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
269 		ret = PTR_ERR_OR_ZERO(new);
270 		if (ret)
271 			goto out;
272 
273 		dup_k = bch2_hash_set_or_get_in_snapshot(trans, dup_iter, *desc, hash_info,
274 				       (subvol_inum) { 0, new->k.p.inode },
275 				       new->k.p.snapshot, new,
276 				       STR_HASH_must_create|
277 				       BTREE_ITER_with_updates|
278 				       BTREE_UPDATE_internal_snapshot_node);
279 		ret = bkey_err(dup_k);
280 		if (ret)
281 			goto out;
282 		if (dup_k.k)
283 			goto duplicate_entries;
284 
285 		if (bpos_lt(new->k.p, k.k->p))
286 			*updated_before_k_pos = true;
287 
288 		ret =   bch2_insert_snapshot_whiteouts(trans, desc->btree_id,
289 						       k_iter->pos, new->k.p) ?:
290 			bch2_hash_delete_at(trans, *desc, hash_info, k_iter,
291 					    BTREE_ITER_with_updates|
292 					    BTREE_UPDATE_internal_snapshot_node) ?:
293 			bch2_fsck_update_backpointers(trans, s, *desc, hash_info, new) ?:
294 			bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
295 			-BCH_ERR_transaction_restart_commit;
296 	} else {
297 duplicate_entries:
298 		ret = hash_pick_winner(trans, *desc, hash_info, k, dup_k);
299 		if (ret < 0)
300 			goto out;
301 
302 		if (!fsck_err(trans, hash_table_key_duplicate,
303 			      "duplicate hash table keys%s:\n%s",
304 			      ret != 2 ? "" : ", both point to valid inodes",
305 			      (printbuf_reset(&buf),
306 			       bch2_bkey_val_to_text(&buf, c, k),
307 			       prt_newline(&buf),
308 			       bch2_bkey_val_to_text(&buf, c, dup_k),
309 			       buf.buf)))
310 			goto out;
311 
312 		switch (ret) {
313 		case 0:
314 			ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0);
315 			break;
316 		case 1:
317 			ret = bch2_hash_delete_at(trans, *desc, hash_info, dup_iter, 0);
318 			break;
319 		case 2:
320 			ret = bch2_fsck_rename_dirent(trans, s, *desc, hash_info,
321 						      bkey_s_c_to_dirent(k),
322 						      updated_before_k_pos) ?:
323 				bch2_hash_delete_at(trans, *desc, hash_info, k_iter,
324 						    BTREE_ITER_with_updates);
325 			goto out;
326 		}
327 
328 		ret = bch2_trans_commit(trans, NULL, NULL, 0) ?:
329 			-BCH_ERR_transaction_restart_commit;
330 	}
331 out:
332 fsck_err:
333 	bch2_trans_iter_exit(trans, dup_iter);
334 	printbuf_exit(&buf);
335 	if (free_snapshots_seen)
336 		darray_exit(&s->ids);
337 	return ret;
338 }
339 
__bch2_str_hash_check_key(struct btree_trans * trans,struct snapshots_seen * s,const struct bch_hash_desc * desc,struct bch_hash_info * hash_info,struct btree_iter * k_iter,struct bkey_s_c hash_k,bool * updated_before_k_pos)340 int __bch2_str_hash_check_key(struct btree_trans *trans,
341 			      struct snapshots_seen *s,
342 			      const struct bch_hash_desc *desc,
343 			      struct bch_hash_info *hash_info,
344 			      struct btree_iter *k_iter, struct bkey_s_c hash_k,
345 			      bool *updated_before_k_pos)
346 {
347 	struct bch_fs *c = trans->c;
348 	struct btree_iter iter = {};
349 	struct printbuf buf = PRINTBUF;
350 	struct bkey_s_c k;
351 	int ret = 0;
352 
353 	u64 hash = desc->hash_bkey(hash_info, hash_k);
354 	if (hash_k.k->p.offset < hash)
355 		goto bad_hash;
356 
357 	for_each_btree_key_norestart(trans, iter, desc->btree_id,
358 				     SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot),
359 				     BTREE_ITER_slots|
360 				     BTREE_ITER_with_updates, k, ret) {
361 		if (bkey_eq(k.k->p, hash_k.k->p))
362 			break;
363 
364 		if (k.k->type == desc->key_type &&
365 		    !desc->cmp_bkey(k, hash_k)) {
366 			ret =	check_inode_hash_info_matches_root(trans, hash_k.k->p.inode,
367 								   hash_info) ?:
368 				bch2_str_hash_repair_key(trans, s, desc, hash_info,
369 							 k_iter, hash_k,
370 							 &iter, k, updated_before_k_pos);
371 			break;
372 		}
373 
374 		if (bkey_deleted(k.k))
375 			goto bad_hash;
376 	}
377 	bch2_trans_iter_exit(trans, &iter);
378 out:
379 fsck_err:
380 	printbuf_exit(&buf);
381 	return ret;
382 bad_hash:
383 	bch2_trans_iter_exit(trans, &iter);
384 	/*
385 	 * Before doing any repair, check hash_info itself:
386 	 */
387 	ret = check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, hash_info);
388 	if (ret)
389 		goto out;
390 
391 	if (fsck_err(trans, hash_table_key_wrong_offset,
392 		     "hash table key at wrong offset: should be at %llu\n%s",
393 		     hash,
394 		     (bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf)))
395 		ret = bch2_str_hash_repair_key(trans, s, desc, hash_info,
396 					       k_iter, hash_k,
397 					       &iter, bkey_s_c_null,
398 					       updated_before_k_pos);
399 	goto out;
400 }
401