xref: /linux/fs/bcachefs/bkey.c (revision 5e2cb28dd7e182dfa641550dfa225913509ad45d)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include "bcachefs.h"
4 #include "bkey.h"
5 #include "bkey_cmp.h"
6 #include "bkey_methods.h"
7 #include "bset.h"
8 #include "util.h"
9 
10 const struct bkey_format bch2_bkey_format_current = BKEY_FORMAT_CURRENT;
11 
12 void bch2_bkey_packed_to_binary_text(struct printbuf *out,
13 				     const struct bkey_format *f,
14 				     const struct bkey_packed *k)
15 {
16 	const u64 *p = high_word(f, k);
17 	unsigned word_bits = 64 - high_bit_offset;
18 	unsigned nr_key_bits = bkey_format_key_bits(f) + high_bit_offset;
19 	u64 v = *p & (~0ULL >> high_bit_offset);
20 
21 	if (!nr_key_bits) {
22 		prt_str(out, "(empty)");
23 		return;
24 	}
25 
26 	while (1) {
27 		unsigned next_key_bits = nr_key_bits;
28 
29 		if (nr_key_bits < 64) {
30 			v >>= 64 - nr_key_bits;
31 			next_key_bits = 0;
32 		} else {
33 			next_key_bits -= 64;
34 		}
35 
36 		bch2_prt_u64_binary(out, v, min(word_bits, nr_key_bits));
37 
38 		if (!next_key_bits)
39 			break;
40 
41 		prt_char(out, ' ');
42 
43 		p = next_word(p);
44 		v = *p;
45 		word_bits = 64;
46 		nr_key_bits = next_key_bits;
47 	}
48 }
49 
50 #ifdef CONFIG_BCACHEFS_DEBUG
51 
52 static void bch2_bkey_pack_verify(const struct bkey_packed *packed,
53 				  const struct bkey *unpacked,
54 				  const struct bkey_format *format)
55 {
56 	struct bkey tmp;
57 
58 	BUG_ON(bkeyp_val_u64s(format, packed) !=
59 	       bkey_val_u64s(unpacked));
60 
61 	BUG_ON(packed->u64s < bkeyp_key_u64s(format, packed));
62 
63 	tmp = __bch2_bkey_unpack_key(format, packed);
64 
65 	if (memcmp(&tmp, unpacked, sizeof(struct bkey))) {
66 		struct printbuf buf = PRINTBUF;
67 
68 		prt_printf(&buf, "keys differ: format u64s %u fields %u %u %u %u %u\n",
69 		      format->key_u64s,
70 		      format->bits_per_field[0],
71 		      format->bits_per_field[1],
72 		      format->bits_per_field[2],
73 		      format->bits_per_field[3],
74 		      format->bits_per_field[4]);
75 
76 		prt_printf(&buf, "compiled unpack: ");
77 		bch2_bkey_to_text(&buf, unpacked);
78 		prt_newline(&buf);
79 
80 		prt_printf(&buf, "c unpack:        ");
81 		bch2_bkey_to_text(&buf, &tmp);
82 		prt_newline(&buf);
83 
84 		prt_printf(&buf, "compiled unpack: ");
85 		bch2_bkey_packed_to_binary_text(&buf, &bch2_bkey_format_current,
86 						(struct bkey_packed *) unpacked);
87 		prt_newline(&buf);
88 
89 		prt_printf(&buf, "c unpack:        ");
90 		bch2_bkey_packed_to_binary_text(&buf, &bch2_bkey_format_current,
91 						(struct bkey_packed *) &tmp);
92 		prt_newline(&buf);
93 
94 		panic("%s", buf.buf);
95 	}
96 }
97 
98 #else
99 static inline void bch2_bkey_pack_verify(const struct bkey_packed *packed,
100 					const struct bkey *unpacked,
101 					const struct bkey_format *format) {}
102 #endif
103 
104 struct pack_state {
105 	const struct bkey_format *format;
106 	unsigned		bits;	/* bits remaining in current word */
107 	u64			w;	/* current word */
108 	u64			*p;	/* pointer to next word */
109 };
110 
111 __always_inline
112 static struct pack_state pack_state_init(const struct bkey_format *format,
113 					 struct bkey_packed *k)
114 {
115 	u64 *p = high_word(format, k);
116 
117 	return (struct pack_state) {
118 		.format	= format,
119 		.bits	= 64 - high_bit_offset,
120 		.w	= 0,
121 		.p	= p,
122 	};
123 }
124 
125 __always_inline
126 static void pack_state_finish(struct pack_state *state,
127 			      struct bkey_packed *k)
128 {
129 	EBUG_ON(state->p <  k->_data);
130 	EBUG_ON(state->p >= (u64 *) k->_data + state->format->key_u64s);
131 
132 	*state->p = state->w;
133 }
134 
135 struct unpack_state {
136 	const struct bkey_format *format;
137 	unsigned		bits;	/* bits remaining in current word */
138 	u64			w;	/* current word */
139 	const u64		*p;	/* pointer to next word */
140 };
141 
142 __always_inline
143 static struct unpack_state unpack_state_init(const struct bkey_format *format,
144 					     const struct bkey_packed *k)
145 {
146 	const u64 *p = high_word(format, k);
147 
148 	return (struct unpack_state) {
149 		.format	= format,
150 		.bits	= 64 - high_bit_offset,
151 		.w	= *p << high_bit_offset,
152 		.p	= p,
153 	};
154 }
155 
156 __always_inline
157 static u64 get_inc_field(struct unpack_state *state, unsigned field)
158 {
159 	unsigned bits = state->format->bits_per_field[field];
160 	u64 v = 0, offset = le64_to_cpu(state->format->field_offset[field]);
161 
162 	if (bits >= state->bits) {
163 		v = state->w >> (64 - bits);
164 		bits -= state->bits;
165 
166 		state->p = next_word(state->p);
167 		state->w = *state->p;
168 		state->bits = 64;
169 	}
170 
171 	/* avoid shift by 64 if bits is 0 - bits is never 64 here: */
172 	v |= (state->w >> 1) >> (63 - bits);
173 	state->w <<= bits;
174 	state->bits -= bits;
175 
176 	return v + offset;
177 }
178 
179 __always_inline
180 static void __set_inc_field(struct pack_state *state, unsigned field, u64 v)
181 {
182 	unsigned bits = state->format->bits_per_field[field];
183 
184 	if (bits) {
185 		if (bits > state->bits) {
186 			bits -= state->bits;
187 			/* avoid shift by 64 if bits is 64 - bits is never 0 here: */
188 			state->w |= (v >> 1) >> (bits - 1);
189 
190 			*state->p = state->w;
191 			state->p = next_word(state->p);
192 			state->w = 0;
193 			state->bits = 64;
194 		}
195 
196 		state->bits -= bits;
197 		state->w |= v << state->bits;
198 	}
199 }
200 
201 __always_inline
202 static bool set_inc_field(struct pack_state *state, unsigned field, u64 v)
203 {
204 	unsigned bits = state->format->bits_per_field[field];
205 	u64 offset = le64_to_cpu(state->format->field_offset[field]);
206 
207 	if (v < offset)
208 		return false;
209 
210 	v -= offset;
211 
212 	if (fls64(v) > bits)
213 		return false;
214 
215 	__set_inc_field(state, field, v);
216 	return true;
217 }
218 
219 /*
220  * Note: does NOT set out->format (we don't know what it should be here!)
221  *
222  * Also: doesn't work on extents - it doesn't preserve the invariant that
223  * if k is packed bkey_start_pos(k) will successfully pack
224  */
225 static bool bch2_bkey_transform_key(const struct bkey_format *out_f,
226 				   struct bkey_packed *out,
227 				   const struct bkey_format *in_f,
228 				   const struct bkey_packed *in)
229 {
230 	struct pack_state out_s = pack_state_init(out_f, out);
231 	struct unpack_state in_s = unpack_state_init(in_f, in);
232 	u64 *w = out->_data;
233 	unsigned i;
234 
235 	*w = 0;
236 
237 	for (i = 0; i < BKEY_NR_FIELDS; i++)
238 		if (!set_inc_field(&out_s, i, get_inc_field(&in_s, i)))
239 			return false;
240 
241 	/* Can't happen because the val would be too big to unpack: */
242 	EBUG_ON(in->u64s - in_f->key_u64s + out_f->key_u64s > U8_MAX);
243 
244 	pack_state_finish(&out_s, out);
245 	out->u64s	= out_f->key_u64s + in->u64s - in_f->key_u64s;
246 	out->needs_whiteout = in->needs_whiteout;
247 	out->type	= in->type;
248 
249 	return true;
250 }
251 
252 bool bch2_bkey_transform(const struct bkey_format *out_f,
253 			struct bkey_packed *out,
254 			const struct bkey_format *in_f,
255 			const struct bkey_packed *in)
256 {
257 	if (!bch2_bkey_transform_key(out_f, out, in_f, in))
258 		return false;
259 
260 	memcpy_u64s((u64 *) out + out_f->key_u64s,
261 		    (u64 *) in + in_f->key_u64s,
262 		    (in->u64s - in_f->key_u64s));
263 	return true;
264 }
265 
266 struct bkey __bch2_bkey_unpack_key(const struct bkey_format *format,
267 			      const struct bkey_packed *in)
268 {
269 	struct unpack_state state = unpack_state_init(format, in);
270 	struct bkey out;
271 
272 	EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
273 	EBUG_ON(in->u64s < format->key_u64s);
274 	EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE);
275 	EBUG_ON(in->u64s - format->key_u64s + BKEY_U64s > U8_MAX);
276 
277 	out.u64s	= BKEY_U64s + in->u64s - format->key_u64s;
278 	out.format	= KEY_FORMAT_CURRENT;
279 	out.needs_whiteout = in->needs_whiteout;
280 	out.type	= in->type;
281 	out.pad[0]	= 0;
282 
283 #define x(id, field)	out.field = get_inc_field(&state, id);
284 	bkey_fields()
285 #undef x
286 
287 	return out;
288 }
289 
290 #ifndef HAVE_BCACHEFS_COMPILED_UNPACK
291 struct bpos __bkey_unpack_pos(const struct bkey_format *format,
292 				     const struct bkey_packed *in)
293 {
294 	struct unpack_state state = unpack_state_init(format, in);
295 	struct bpos out;
296 
297 	EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
298 	EBUG_ON(in->u64s < format->key_u64s);
299 	EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE);
300 
301 	out.inode	= get_inc_field(&state, BKEY_FIELD_INODE);
302 	out.offset	= get_inc_field(&state, BKEY_FIELD_OFFSET);
303 	out.snapshot	= get_inc_field(&state, BKEY_FIELD_SNAPSHOT);
304 
305 	return out;
306 }
307 #endif
308 
309 /**
310  * bch2_bkey_pack_key -- pack just the key, not the value
311  * @out:	packed result
312  * @in:		key to pack
313  * @format:	format of packed result
314  *
315  * Returns: true on success, false on failure
316  */
317 bool bch2_bkey_pack_key(struct bkey_packed *out, const struct bkey *in,
318 			const struct bkey_format *format)
319 {
320 	struct pack_state state = pack_state_init(format, out);
321 	u64 *w = out->_data;
322 
323 	EBUG_ON((void *) in == (void *) out);
324 	EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
325 	EBUG_ON(in->format != KEY_FORMAT_CURRENT);
326 
327 	*w = 0;
328 
329 #define x(id, field)	if (!set_inc_field(&state, id, in->field)) return false;
330 	bkey_fields()
331 #undef x
332 	pack_state_finish(&state, out);
333 	out->u64s	= format->key_u64s + in->u64s - BKEY_U64s;
334 	out->format	= KEY_FORMAT_LOCAL_BTREE;
335 	out->needs_whiteout = in->needs_whiteout;
336 	out->type	= in->type;
337 
338 	bch2_bkey_pack_verify(out, in, format);
339 	return true;
340 }
341 
342 /**
343  * bch2_bkey_unpack -- unpack the key and the value
344  * @b:		btree node of @src key (for packed format)
345  * @dst:	unpacked result
346  * @src:	packed input
347  */
348 void bch2_bkey_unpack(const struct btree *b, struct bkey_i *dst,
349 		      const struct bkey_packed *src)
350 {
351 	__bkey_unpack_key(b, &dst->k, src);
352 
353 	memcpy_u64s(&dst->v,
354 		    bkeyp_val(&b->format, src),
355 		    bkeyp_val_u64s(&b->format, src));
356 }
357 
358 /**
359  * bch2_bkey_pack -- pack the key and the value
360  * @dst:	packed result
361  * @src:	unpacked input
362  * @format:	format of packed result
363  *
364  * Returns: true on success, false on failure
365  */
366 bool bch2_bkey_pack(struct bkey_packed *dst, const struct bkey_i *src,
367 		    const struct bkey_format *format)
368 {
369 	struct bkey_packed tmp;
370 
371 	if (!bch2_bkey_pack_key(&tmp, &src->k, format))
372 		return false;
373 
374 	memmove_u64s((u64 *) dst + format->key_u64s,
375 		     &src->v,
376 		     bkey_val_u64s(&src->k));
377 	memcpy_u64s_small(dst, &tmp, format->key_u64s);
378 
379 	return true;
380 }
381 
382 __always_inline
383 static bool set_inc_field_lossy(struct pack_state *state, unsigned field, u64 v)
384 {
385 	unsigned bits = state->format->bits_per_field[field];
386 	u64 offset = le64_to_cpu(state->format->field_offset[field]);
387 	bool ret = true;
388 
389 	EBUG_ON(v < offset);
390 	v -= offset;
391 
392 	if (fls64(v) > bits) {
393 		v = ~(~0ULL << bits);
394 		ret = false;
395 	}
396 
397 	__set_inc_field(state, field, v);
398 	return ret;
399 }
400 
401 #ifdef CONFIG_BCACHEFS_DEBUG
402 static bool bkey_packed_successor(struct bkey_packed *out,
403 				  const struct btree *b,
404 				  struct bkey_packed k)
405 {
406 	const struct bkey_format *f = &b->format;
407 	unsigned nr_key_bits = b->nr_key_bits;
408 	unsigned first_bit, offset;
409 	u64 *p;
410 
411 	EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
412 
413 	if (!nr_key_bits)
414 		return false;
415 
416 	*out = k;
417 
418 	first_bit = high_bit_offset + nr_key_bits - 1;
419 	p = nth_word(high_word(f, out), first_bit >> 6);
420 	offset = 63 - (first_bit & 63);
421 
422 	while (nr_key_bits) {
423 		unsigned bits = min(64 - offset, nr_key_bits);
424 		u64 mask = (~0ULL >> (64 - bits)) << offset;
425 
426 		if ((*p & mask) != mask) {
427 			*p += 1ULL << offset;
428 			EBUG_ON(bch2_bkey_cmp_packed(b, out, &k) <= 0);
429 			return true;
430 		}
431 
432 		*p &= ~mask;
433 		p = prev_word(p);
434 		nr_key_bits -= bits;
435 		offset = 0;
436 	}
437 
438 	return false;
439 }
440 
441 static bool bkey_format_has_too_big_fields(const struct bkey_format *f)
442 {
443 	for (unsigned i = 0; i < f->nr_fields; i++) {
444 		unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
445 		u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1));
446 		u64 packed_max = f->bits_per_field[i]
447 			? ~((~0ULL << 1) << (f->bits_per_field[i] - 1))
448 			: 0;
449 		u64 field_offset = le64_to_cpu(f->field_offset[i]);
450 
451 		if (packed_max + field_offset < packed_max ||
452 		    packed_max + field_offset > unpacked_max)
453 			return true;
454 	}
455 
456 	return false;
457 }
458 #endif
459 
460 /*
461  * Returns a packed key that compares <= in
462  *
463  * This is used in bset_search_tree(), where we need a packed pos in order to be
464  * able to compare against the keys in the auxiliary search tree - and it's
465  * legal to use a packed pos that isn't equivalent to the original pos,
466  * _provided_ it compares <= to the original pos.
467  */
468 enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *out,
469 					   struct bpos in,
470 					   const struct btree *b)
471 {
472 	const struct bkey_format *f = &b->format;
473 	struct pack_state state = pack_state_init(f, out);
474 	u64 *w = out->_data;
475 #ifdef CONFIG_BCACHEFS_DEBUG
476 	struct bpos orig = in;
477 #endif
478 	bool exact = true;
479 	unsigned i;
480 
481 	/*
482 	 * bch2_bkey_pack_key() will write to all of f->key_u64s, minus the 3
483 	 * byte header, but pack_pos() won't if the len/version fields are big
484 	 * enough - we need to make sure to zero them out:
485 	 */
486 	for (i = 0; i < f->key_u64s; i++)
487 		w[i] = 0;
488 
489 	if (unlikely(in.snapshot <
490 		     le64_to_cpu(f->field_offset[BKEY_FIELD_SNAPSHOT]))) {
491 		if (!in.offset-- &&
492 		    !in.inode--)
493 			return BKEY_PACK_POS_FAIL;
494 		in.snapshot	= KEY_SNAPSHOT_MAX;
495 		exact = false;
496 	}
497 
498 	if (unlikely(in.offset <
499 		     le64_to_cpu(f->field_offset[BKEY_FIELD_OFFSET]))) {
500 		if (!in.inode--)
501 			return BKEY_PACK_POS_FAIL;
502 		in.offset	= KEY_OFFSET_MAX;
503 		in.snapshot	= KEY_SNAPSHOT_MAX;
504 		exact = false;
505 	}
506 
507 	if (unlikely(in.inode <
508 		     le64_to_cpu(f->field_offset[BKEY_FIELD_INODE])))
509 		return BKEY_PACK_POS_FAIL;
510 
511 	if (unlikely(!set_inc_field_lossy(&state, BKEY_FIELD_INODE, in.inode))) {
512 		in.offset	= KEY_OFFSET_MAX;
513 		in.snapshot	= KEY_SNAPSHOT_MAX;
514 		exact = false;
515 	}
516 
517 	if (unlikely(!set_inc_field_lossy(&state, BKEY_FIELD_OFFSET, in.offset))) {
518 		in.snapshot	= KEY_SNAPSHOT_MAX;
519 		exact = false;
520 	}
521 
522 	if (unlikely(!set_inc_field_lossy(&state, BKEY_FIELD_SNAPSHOT, in.snapshot)))
523 		exact = false;
524 
525 	pack_state_finish(&state, out);
526 	out->u64s	= f->key_u64s;
527 	out->format	= KEY_FORMAT_LOCAL_BTREE;
528 	out->type	= KEY_TYPE_deleted;
529 
530 #ifdef CONFIG_BCACHEFS_DEBUG
531 	if (exact) {
532 		BUG_ON(bkey_cmp_left_packed(b, out, &orig));
533 	} else {
534 		struct bkey_packed successor;
535 
536 		BUG_ON(bkey_cmp_left_packed(b, out, &orig) >= 0);
537 		BUG_ON(bkey_packed_successor(&successor, b, *out) &&
538 		       bkey_cmp_left_packed(b, &successor, &orig) < 0 &&
539 		       !bkey_format_has_too_big_fields(f));
540 	}
541 #endif
542 
543 	return exact ? BKEY_PACK_POS_EXACT : BKEY_PACK_POS_SMALLER;
544 }
545 
546 void bch2_bkey_format_init(struct bkey_format_state *s)
547 {
548 	unsigned i;
549 
550 	for (i = 0; i < ARRAY_SIZE(s->field_min); i++)
551 		s->field_min[i] = U64_MAX;
552 
553 	for (i = 0; i < ARRAY_SIZE(s->field_max); i++)
554 		s->field_max[i] = 0;
555 
556 	/* Make sure we can store a size of 0: */
557 	s->field_min[BKEY_FIELD_SIZE] = 0;
558 }
559 
560 void bch2_bkey_format_add_pos(struct bkey_format_state *s, struct bpos p)
561 {
562 	unsigned field = 0;
563 
564 	__bkey_format_add(s, field++, p.inode);
565 	__bkey_format_add(s, field++, p.offset);
566 	__bkey_format_add(s, field++, p.snapshot);
567 }
568 
569 /*
570  * We don't want it to be possible for the packed format to represent fields
571  * bigger than a u64... that will cause confusion and issues (like with
572  * bkey_packed_successor())
573  */
574 static void set_format_field(struct bkey_format *f, enum bch_bkey_fields i,
575 			     unsigned bits, u64 offset)
576 {
577 	unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
578 	u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1));
579 
580 	bits = min(bits, unpacked_bits);
581 
582 	offset = bits == unpacked_bits ? 0 : min(offset, unpacked_max - ((1ULL << bits) - 1));
583 
584 	f->bits_per_field[i]	= bits;
585 	f->field_offset[i]	= cpu_to_le64(offset);
586 }
587 
588 struct bkey_format bch2_bkey_format_done(struct bkey_format_state *s)
589 {
590 	unsigned i, bits = KEY_PACKED_BITS_START;
591 	struct bkey_format ret = {
592 		.nr_fields = BKEY_NR_FIELDS,
593 	};
594 
595 	for (i = 0; i < ARRAY_SIZE(s->field_min); i++) {
596 		s->field_min[i] = min(s->field_min[i], s->field_max[i]);
597 
598 		set_format_field(&ret, i,
599 				 fls64(s->field_max[i] - s->field_min[i]),
600 				 s->field_min[i]);
601 
602 		bits += ret.bits_per_field[i];
603 	}
604 
605 	/* allow for extent merging: */
606 	if (ret.bits_per_field[BKEY_FIELD_SIZE]) {
607 		unsigned b = min(4U, 32U - ret.bits_per_field[BKEY_FIELD_SIZE]);
608 
609 		ret.bits_per_field[BKEY_FIELD_SIZE] += b;
610 		bits += b;
611 	}
612 
613 	ret.key_u64s = DIV_ROUND_UP(bits, 64);
614 
615 	/* if we have enough spare bits, round fields up to nearest byte */
616 	bits = ret.key_u64s * 64 - bits;
617 
618 	for (i = 0; i < ARRAY_SIZE(ret.bits_per_field); i++) {
619 		unsigned r = round_up(ret.bits_per_field[i], 8) -
620 			ret.bits_per_field[i];
621 
622 		if (r <= bits) {
623 			set_format_field(&ret, i,
624 					 ret.bits_per_field[i] + r,
625 					 le64_to_cpu(ret.field_offset[i]));
626 			bits -= r;
627 		}
628 	}
629 
630 #ifdef CONFIG_BCACHEFS_DEBUG
631 	{
632 		struct printbuf buf = PRINTBUF;
633 
634 		BUG_ON(bch2_bkey_format_invalid(NULL, &ret, 0, &buf));
635 		printbuf_exit(&buf);
636 	}
637 #endif
638 	return ret;
639 }
640 
641 int bch2_bkey_format_invalid(struct bch_fs *c,
642 			     struct bkey_format *f,
643 			     enum bkey_invalid_flags flags,
644 			     struct printbuf *err)
645 {
646 	unsigned i, bits = KEY_PACKED_BITS_START;
647 
648 	if (f->nr_fields != BKEY_NR_FIELDS) {
649 		prt_printf(err, "incorrect number of fields: got %u, should be %u",
650 			   f->nr_fields, BKEY_NR_FIELDS);
651 		return -BCH_ERR_invalid;
652 	}
653 
654 	/*
655 	 * Verify that the packed format can't represent fields larger than the
656 	 * unpacked format:
657 	 */
658 	for (i = 0; i < f->nr_fields; i++) {
659 		if (!c || c->sb.version_min >= bcachefs_metadata_version_snapshot) {
660 			unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
661 			u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1));
662 			u64 packed_max = f->bits_per_field[i]
663 				? ~((~0ULL << 1) << (f->bits_per_field[i] - 1))
664 				: 0;
665 			u64 field_offset = le64_to_cpu(f->field_offset[i]);
666 
667 			if (packed_max + field_offset < packed_max ||
668 			    packed_max + field_offset > unpacked_max) {
669 				prt_printf(err, "field %u too large: %llu + %llu > %llu",
670 					   i, packed_max, field_offset, unpacked_max);
671 				return -BCH_ERR_invalid;
672 			}
673 		}
674 
675 		bits += f->bits_per_field[i];
676 	}
677 
678 	if (f->key_u64s != DIV_ROUND_UP(bits, 64)) {
679 		prt_printf(err, "incorrect key_u64s: got %u, should be %u",
680 			   f->key_u64s, DIV_ROUND_UP(bits, 64));
681 		return -BCH_ERR_invalid;
682 	}
683 
684 	return 0;
685 }
686 
687 void bch2_bkey_format_to_text(struct printbuf *out, const struct bkey_format *f)
688 {
689 	prt_printf(out, "u64s %u fields ", f->key_u64s);
690 
691 	for (unsigned i = 0; i < ARRAY_SIZE(f->bits_per_field); i++) {
692 		if (i)
693 			prt_str(out, ", ");
694 		prt_printf(out, "%u:%llu",
695 			   f->bits_per_field[i],
696 			   le64_to_cpu(f->field_offset[i]));
697 	}
698 }
699 
700 /*
701  * Most significant differing bit
702  * Bits are indexed from 0 - return is [0, nr_key_bits)
703  */
704 __pure
705 unsigned bch2_bkey_greatest_differing_bit(const struct btree *b,
706 					  const struct bkey_packed *l_k,
707 					  const struct bkey_packed *r_k)
708 {
709 	const u64 *l = high_word(&b->format, l_k);
710 	const u64 *r = high_word(&b->format, r_k);
711 	unsigned nr_key_bits = b->nr_key_bits;
712 	unsigned word_bits = 64 - high_bit_offset;
713 	u64 l_v, r_v;
714 
715 	EBUG_ON(b->nr_key_bits != bkey_format_key_bits(&b->format));
716 
717 	/* for big endian, skip past header */
718 	l_v = *l & (~0ULL >> high_bit_offset);
719 	r_v = *r & (~0ULL >> high_bit_offset);
720 
721 	while (nr_key_bits) {
722 		if (nr_key_bits < word_bits) {
723 			l_v >>= word_bits - nr_key_bits;
724 			r_v >>= word_bits - nr_key_bits;
725 			nr_key_bits = 0;
726 		} else {
727 			nr_key_bits -= word_bits;
728 		}
729 
730 		if (l_v != r_v)
731 			return fls64(l_v ^ r_v) - 1 + nr_key_bits;
732 
733 		l = next_word(l);
734 		r = next_word(r);
735 
736 		l_v = *l;
737 		r_v = *r;
738 		word_bits = 64;
739 	}
740 
741 	return 0;
742 }
743 
744 /*
745  * First set bit
746  * Bits are indexed from 0 - return is [0, nr_key_bits)
747  */
748 __pure
749 unsigned bch2_bkey_ffs(const struct btree *b, const struct bkey_packed *k)
750 {
751 	const u64 *p = high_word(&b->format, k);
752 	unsigned nr_key_bits = b->nr_key_bits;
753 	unsigned ret = 0, offset;
754 
755 	EBUG_ON(b->nr_key_bits != bkey_format_key_bits(&b->format));
756 
757 	offset = nr_key_bits;
758 	while (offset > 64) {
759 		p = next_word(p);
760 		offset -= 64;
761 	}
762 
763 	offset = 64 - offset;
764 
765 	while (nr_key_bits) {
766 		unsigned bits = nr_key_bits + offset < 64
767 			? nr_key_bits
768 			: 64 - offset;
769 
770 		u64 mask = (~0ULL >> (64 - bits)) << offset;
771 
772 		if (*p & mask)
773 			return ret + __ffs64(*p & mask) - offset;
774 
775 		p = prev_word(p);
776 		nr_key_bits -= bits;
777 		ret += bits;
778 		offset = 0;
779 	}
780 
781 	return 0;
782 }
783 
784 #ifdef HAVE_BCACHEFS_COMPILED_UNPACK
785 
786 #define I(_x)			(*(out)++ = (_x))
787 #define I1(i0)						I(i0)
788 #define I2(i0, i1)		(I1(i0),		I(i1))
789 #define I3(i0, i1, i2)		(I2(i0, i1),		I(i2))
790 #define I4(i0, i1, i2, i3)	(I3(i0, i1, i2),	I(i3))
791 #define I5(i0, i1, i2, i3, i4)	(I4(i0, i1, i2, i3),	I(i4))
792 
793 static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out,
794 			      enum bch_bkey_fields field,
795 			      unsigned dst_offset, unsigned dst_size,
796 			      bool *eax_zeroed)
797 {
798 	unsigned bits = format->bits_per_field[field];
799 	u64 offset = le64_to_cpu(format->field_offset[field]);
800 	unsigned i, byte, bit_offset, align, shl, shr;
801 
802 	if (!bits && !offset) {
803 		if (!*eax_zeroed) {
804 			/* xor eax, eax */
805 			I2(0x31, 0xc0);
806 		}
807 
808 		*eax_zeroed = true;
809 		goto set_field;
810 	}
811 
812 	if (!bits) {
813 		/* just return offset: */
814 
815 		switch (dst_size) {
816 		case 8:
817 			if (offset > S32_MAX) {
818 				/* mov [rdi + dst_offset], offset */
819 				I3(0xc7, 0x47, dst_offset);
820 				memcpy(out, &offset, 4);
821 				out += 4;
822 
823 				I3(0xc7, 0x47, dst_offset + 4);
824 				memcpy(out, (void *) &offset + 4, 4);
825 				out += 4;
826 			} else {
827 				/* mov [rdi + dst_offset], offset */
828 				/* sign extended */
829 				I4(0x48, 0xc7, 0x47, dst_offset);
830 				memcpy(out, &offset, 4);
831 				out += 4;
832 			}
833 			break;
834 		case 4:
835 			/* mov [rdi + dst_offset], offset */
836 			I3(0xc7, 0x47, dst_offset);
837 			memcpy(out, &offset, 4);
838 			out += 4;
839 			break;
840 		default:
841 			BUG();
842 		}
843 
844 		return out;
845 	}
846 
847 	bit_offset = format->key_u64s * 64;
848 	for (i = 0; i <= field; i++)
849 		bit_offset -= format->bits_per_field[i];
850 
851 	byte = bit_offset / 8;
852 	bit_offset -= byte * 8;
853 
854 	*eax_zeroed = false;
855 
856 	if (bit_offset == 0 && bits == 8) {
857 		/* movzx eax, BYTE PTR [rsi + imm8] */
858 		I4(0x0f, 0xb6, 0x46, byte);
859 	} else if (bit_offset == 0 && bits == 16) {
860 		/* movzx eax, WORD PTR [rsi + imm8] */
861 		I4(0x0f, 0xb7, 0x46, byte);
862 	} else if (bit_offset + bits <= 32) {
863 		align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3);
864 		byte -= align;
865 		bit_offset += align * 8;
866 
867 		BUG_ON(bit_offset + bits > 32);
868 
869 		/* mov eax, [rsi + imm8] */
870 		I3(0x8b, 0x46, byte);
871 
872 		if (bit_offset) {
873 			/* shr eax, imm8 */
874 			I3(0xc1, 0xe8, bit_offset);
875 		}
876 
877 		if (bit_offset + bits < 32) {
878 			unsigned mask = ~0U >> (32 - bits);
879 
880 			/* and eax, imm32 */
881 			I1(0x25);
882 			memcpy(out, &mask, 4);
883 			out += 4;
884 		}
885 	} else if (bit_offset + bits <= 64) {
886 		align = min(8 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 7);
887 		byte -= align;
888 		bit_offset += align * 8;
889 
890 		BUG_ON(bit_offset + bits > 64);
891 
892 		/* mov rax, [rsi + imm8] */
893 		I4(0x48, 0x8b, 0x46, byte);
894 
895 		shl = 64 - bit_offset - bits;
896 		shr = bit_offset + shl;
897 
898 		if (shl) {
899 			/* shl rax, imm8 */
900 			I4(0x48, 0xc1, 0xe0, shl);
901 		}
902 
903 		if (shr) {
904 			/* shr rax, imm8 */
905 			I4(0x48, 0xc1, 0xe8, shr);
906 		}
907 	} else {
908 		align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3);
909 		byte -= align;
910 		bit_offset += align * 8;
911 
912 		BUG_ON(bit_offset + bits > 96);
913 
914 		/* mov rax, [rsi + byte] */
915 		I4(0x48, 0x8b, 0x46, byte);
916 
917 		/* mov edx, [rsi + byte + 8] */
918 		I3(0x8b, 0x56, byte + 8);
919 
920 		/* bits from next word: */
921 		shr = bit_offset + bits - 64;
922 		BUG_ON(shr > bit_offset);
923 
924 		/* shr rax, bit_offset */
925 		I4(0x48, 0xc1, 0xe8, shr);
926 
927 		/* shl rdx, imm8 */
928 		I4(0x48, 0xc1, 0xe2, 64 - shr);
929 
930 		/* or rax, rdx */
931 		I3(0x48, 0x09, 0xd0);
932 
933 		shr = bit_offset - shr;
934 
935 		if (shr) {
936 			/* shr rax, imm8 */
937 			I4(0x48, 0xc1, 0xe8, shr);
938 		}
939 	}
940 
941 	/* rax += offset: */
942 	if (offset > S32_MAX) {
943 		/* mov rdx, imm64 */
944 		I2(0x48, 0xba);
945 		memcpy(out, &offset, 8);
946 		out += 8;
947 		/* add %rdx, %rax */
948 		I3(0x48, 0x01, 0xd0);
949 	} else if (offset + (~0ULL >> (64 - bits)) > U32_MAX) {
950 		/* add rax, imm32 */
951 		I2(0x48, 0x05);
952 		memcpy(out, &offset, 4);
953 		out += 4;
954 	} else if (offset) {
955 		/* add eax, imm32 */
956 		I1(0x05);
957 		memcpy(out, &offset, 4);
958 		out += 4;
959 	}
960 set_field:
961 	switch (dst_size) {
962 	case 8:
963 		/* mov [rdi + dst_offset], rax */
964 		I4(0x48, 0x89, 0x47, dst_offset);
965 		break;
966 	case 4:
967 		/* mov [rdi + dst_offset], eax */
968 		I3(0x89, 0x47, dst_offset);
969 		break;
970 	default:
971 		BUG();
972 	}
973 
974 	return out;
975 }
976 
977 int bch2_compile_bkey_format(const struct bkey_format *format, void *_out)
978 {
979 	bool eax_zeroed = false;
980 	u8 *out = _out;
981 
982 	/*
983 	 * rdi: dst - unpacked key
984 	 * rsi: src - packed key
985 	 */
986 
987 	/* k->u64s, k->format, k->type */
988 
989 	/* mov eax, [rsi] */
990 	I2(0x8b, 0x06);
991 
992 	/* add eax, BKEY_U64s - format->key_u64s */
993 	I5(0x05, BKEY_U64s - format->key_u64s, KEY_FORMAT_CURRENT, 0, 0);
994 
995 	/* and eax, imm32: mask out k->pad: */
996 	I5(0x25, 0xff, 0xff, 0xff, 0);
997 
998 	/* mov [rdi], eax */
999 	I2(0x89, 0x07);
1000 
1001 #define x(id, field)							\
1002 	out = compile_bkey_field(format, out, id,			\
1003 				 offsetof(struct bkey, field),		\
1004 				 sizeof(((struct bkey *) NULL)->field),	\
1005 				 &eax_zeroed);
1006 	bkey_fields()
1007 #undef x
1008 
1009 	/* retq */
1010 	I1(0xc3);
1011 
1012 	return (void *) out - _out;
1013 }
1014 
1015 #else
1016 #endif
1017 
1018 __pure
1019 int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *l,
1020 					  const struct bkey_packed *r,
1021 					  const struct btree *b)
1022 {
1023 	return __bch2_bkey_cmp_packed_format_checked_inlined(l, r, b);
1024 }
1025 
1026 __pure __flatten
1027 int __bch2_bkey_cmp_left_packed_format_checked(const struct btree *b,
1028 					       const struct bkey_packed *l,
1029 					       const struct bpos *r)
1030 {
1031 	return bpos_cmp(bkey_unpack_pos_format_checked(b, l), *r);
1032 }
1033 
1034 __pure __flatten
1035 int bch2_bkey_cmp_packed(const struct btree *b,
1036 			 const struct bkey_packed *l,
1037 			 const struct bkey_packed *r)
1038 {
1039 	return bch2_bkey_cmp_packed_inlined(b, l, r);
1040 }
1041 
1042 __pure __flatten
1043 int __bch2_bkey_cmp_left_packed(const struct btree *b,
1044 				const struct bkey_packed *l,
1045 				const struct bpos *r)
1046 {
1047 	const struct bkey *l_unpacked;
1048 
1049 	return unlikely(l_unpacked = packed_to_bkey_c(l))
1050 		? bpos_cmp(l_unpacked->p, *r)
1051 		: __bch2_bkey_cmp_left_packed_format_checked(b, l, r);
1052 }
1053 
1054 void bch2_bpos_swab(struct bpos *p)
1055 {
1056 	u8 *l = (u8 *) p;
1057 	u8 *h = ((u8 *) &p[1]) - 1;
1058 
1059 	while (l < h) {
1060 		swap(*l, *h);
1061 		l++;
1062 		--h;
1063 	}
1064 }
1065 
1066 void bch2_bkey_swab_key(const struct bkey_format *_f, struct bkey_packed *k)
1067 {
1068 	const struct bkey_format *f = bkey_packed(k) ? _f : &bch2_bkey_format_current;
1069 	u8 *l = k->key_start;
1070 	u8 *h = (u8 *) (k->_data + f->key_u64s) - 1;
1071 
1072 	while (l < h) {
1073 		swap(*l, *h);
1074 		l++;
1075 		--h;
1076 	}
1077 }
1078 
1079 #ifdef CONFIG_BCACHEFS_DEBUG
1080 void bch2_bkey_pack_test(void)
1081 {
1082 	struct bkey t = KEY(4134ULL, 1250629070527416633ULL, 0);
1083 	struct bkey_packed p;
1084 
1085 	struct bkey_format test_format = {
1086 		.key_u64s	= 3,
1087 		.nr_fields	= BKEY_NR_FIELDS,
1088 		.bits_per_field = {
1089 			13,
1090 			64,
1091 			32,
1092 		},
1093 	};
1094 
1095 	struct unpack_state in_s =
1096 		unpack_state_init(&bch2_bkey_format_current, (void *) &t);
1097 	struct pack_state out_s = pack_state_init(&test_format, &p);
1098 	unsigned i;
1099 
1100 	for (i = 0; i < out_s.format->nr_fields; i++) {
1101 		u64 a, v = get_inc_field(&in_s, i);
1102 
1103 		switch (i) {
1104 #define x(id, field)	case id: a = t.field; break;
1105 	bkey_fields()
1106 #undef x
1107 		default:
1108 			BUG();
1109 		}
1110 
1111 		if (a != v)
1112 			panic("got %llu actual %llu i %u\n", v, a, i);
1113 
1114 		if (!set_inc_field(&out_s, i, v))
1115 			panic("failed at %u\n", i);
1116 	}
1117 
1118 	BUG_ON(!bch2_bkey_pack_key(&p, &t, &test_format));
1119 }
1120 #endif
1121