xref: /linux/arch/s390/net/bpf_jit_comp.c (revision d9ef13f72711f2dad64cd4445472ded98fb6c954)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * BPF Jit compiler for s390.
4  *
5  * Minimum build requirements:
6  *
7  *  - HAVE_MARCH_Z196_FEATURES: laal, laalg
8  *  - HAVE_MARCH_Z10_FEATURES: msfi, cgrj, clgrj
9  *  - HAVE_MARCH_Z9_109_FEATURES: alfi, llilf, clfi, oilf, nilf
10  *  - 64BIT
11  *
12  * Copyright IBM Corp. 2012,2015
13  *
14  * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
15  *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
16  */
17 
18 #define pr_fmt(fmt) "bpf_jit: " fmt
19 
20 #include <linux/netdevice.h>
21 #include <linux/filter.h>
22 #include <linux/init.h>
23 #include <linux/bpf.h>
24 #include <linux/mm.h>
25 #include <linux/kernel.h>
26 #include <asm/cacheflush.h>
27 #include <asm/extable.h>
28 #include <asm/dis.h>
29 #include <asm/facility.h>
30 #include <asm/nospec-branch.h>
31 #include <asm/set_memory.h>
32 #include <asm/text-patching.h>
33 #include <asm/unwind.h>
34 
35 struct bpf_jit {
36 	u32 seen;		/* Flags to remember seen eBPF instructions */
37 	u16 seen_regs;		/* Mask to remember which registers are used */
38 	u32 *addrs;		/* Array with relative instruction addresses */
39 	u8 *prg_buf;		/* Start of program */
40 	int size;		/* Size of program and literal pool */
41 	int size_prg;		/* Size of program */
42 	int prg;		/* Current position in program */
43 	int lit32_start;	/* Start of 32-bit literal pool */
44 	int lit32;		/* Current position in 32-bit literal pool */
45 	int lit64_start;	/* Start of 64-bit literal pool */
46 	int lit64;		/* Current position in 64-bit literal pool */
47 	int base_ip;		/* Base address for literal pool */
48 	int exit_ip;		/* Address of exit */
49 	int tail_call_start;	/* Tail call start offset */
50 	int excnt;		/* Number of exception table entries */
51 	int prologue_plt_ret;	/* Return address for prologue hotpatch PLT */
52 	int prologue_plt;	/* Start of prologue hotpatch PLT */
53 	int kern_arena;		/* Pool offset of kernel arena address */
54 	u64 user_arena;		/* User arena address */
55 	u32 frame_off;		/* Offset of struct bpf_prog from %r15 */
56 };
57 
58 #define SEEN_MEM	BIT(0)		/* use mem[] for temporary storage */
59 #define SEEN_LITERAL	BIT(1)		/* code uses literals */
60 #define SEEN_FUNC	BIT(2)		/* calls C functions */
61 #define SEEN_STACK	(SEEN_FUNC | SEEN_MEM)
62 
63 #define NVREGS		0xffc0		/* %r6-%r15 */
64 
65 /*
66  * s390 registers
67  */
68 #define REG_W0		(MAX_BPF_JIT_REG + 0)	/* Work register 1 (even) */
69 #define REG_W1		(MAX_BPF_JIT_REG + 1)	/* Work register 2 (odd) */
70 #define REG_L		(MAX_BPF_JIT_REG + 2)	/* Literal pool register */
71 #define REG_15		(MAX_BPF_JIT_REG + 3)	/* Register 15 */
72 #define REG_0		REG_W0			/* Register 0 */
73 #define REG_1		REG_W1			/* Register 1 */
74 #define REG_2		BPF_REG_1		/* Register 2 */
75 #define REG_3		BPF_REG_2		/* Register 3 */
76 #define REG_4		BPF_REG_3		/* Register 4 */
77 #define REG_7		BPF_REG_6		/* Register 7 */
78 #define REG_8		BPF_REG_7		/* Register 8 */
79 #define REG_14		BPF_REG_0		/* Register 14 */
80 
81 /*
82  * Mapping of BPF registers to s390 registers
83  */
84 static const int reg2hex[] = {
85 	/* Return code */
86 	[BPF_REG_0]	= 14,
87 	/* Function parameters */
88 	[BPF_REG_1]	= 2,
89 	[BPF_REG_2]	= 3,
90 	[BPF_REG_3]	= 4,
91 	[BPF_REG_4]	= 5,
92 	[BPF_REG_5]	= 6,
93 	/* Call saved registers */
94 	[BPF_REG_6]	= 7,
95 	[BPF_REG_7]	= 8,
96 	[BPF_REG_8]	= 9,
97 	[BPF_REG_9]	= 10,
98 	/* BPF stack pointer */
99 	[BPF_REG_FP]	= 13,
100 	/* Register for blinding */
101 	[BPF_REG_AX]	= 12,
102 	/* Work registers for s390x backend */
103 	[REG_W0]	= 0,
104 	[REG_W1]	= 1,
105 	[REG_L]		= 11,
106 	[REG_15]	= 15,
107 };
108 
109 static inline u32 reg(u32 dst_reg, u32 src_reg)
110 {
111 	return reg2hex[dst_reg] << 4 | reg2hex[src_reg];
112 }
113 
114 static inline u32 reg_high(u32 reg)
115 {
116 	return reg2hex[reg] << 4;
117 }
118 
119 static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
120 {
121 	u32 r1 = reg2hex[b1];
122 
123 	if (r1 >= 6 && r1 <= 15)
124 		jit->seen_regs |= (1 << r1);
125 }
126 
127 static s32 off_to_pcrel(struct bpf_jit *jit, u32 off)
128 {
129 	return off - jit->prg;
130 }
131 
132 static s64 ptr_to_pcrel(struct bpf_jit *jit, const void *ptr)
133 {
134 	if (jit->prg_buf)
135 		return (const u8 *)ptr - ((const u8 *)jit->prg_buf + jit->prg);
136 	return 0;
137 }
138 
139 #define REG_SET_SEEN(b1)					\
140 ({								\
141 	reg_set_seen(jit, b1);					\
142 })
143 
144 /*
145  * EMIT macros for code generation
146  */
147 
148 #define _EMIT2(op)						\
149 ({								\
150 	if (jit->prg_buf)					\
151 		*(u16 *) (jit->prg_buf + jit->prg) = (op);	\
152 	jit->prg += 2;						\
153 })
154 
155 #define EMIT2(op, b1, b2)					\
156 ({								\
157 	_EMIT2((op) | reg(b1, b2));				\
158 	REG_SET_SEEN(b1);					\
159 	REG_SET_SEEN(b2);					\
160 })
161 
162 #define _EMIT4(op)						\
163 ({								\
164 	if (jit->prg_buf)					\
165 		*(u32 *) (jit->prg_buf + jit->prg) = (op);	\
166 	jit->prg += 4;						\
167 })
168 
169 #define EMIT4(op, b1, b2)					\
170 ({								\
171 	_EMIT4((op) | reg(b1, b2));				\
172 	REG_SET_SEEN(b1);					\
173 	REG_SET_SEEN(b2);					\
174 })
175 
176 #define EMIT4_RRF(op, b1, b2, b3)				\
177 ({								\
178 	_EMIT4((op) | reg_high(b3) << 8 | reg(b1, b2));		\
179 	REG_SET_SEEN(b1);					\
180 	REG_SET_SEEN(b2);					\
181 	REG_SET_SEEN(b3);					\
182 })
183 
184 #define _EMIT4_DISP(op, disp)					\
185 ({								\
186 	unsigned int __disp = (disp) & 0xfff;			\
187 	_EMIT4((op) | __disp);					\
188 })
189 
190 #define EMIT4_DISP(op, b1, b2, disp)				\
191 ({								\
192 	_EMIT4_DISP((op) | reg_high(b1) << 16 |			\
193 		    reg_high(b2) << 8, (disp));			\
194 	REG_SET_SEEN(b1);					\
195 	REG_SET_SEEN(b2);					\
196 })
197 
198 #define EMIT4_IMM(op, b1, imm)					\
199 ({								\
200 	unsigned int __imm = (imm) & 0xffff;			\
201 	_EMIT4((op) | reg_high(b1) << 16 | __imm);		\
202 	REG_SET_SEEN(b1);					\
203 })
204 
205 #define EMIT4_PCREL(op, pcrel)					\
206 ({								\
207 	long __pcrel = ((pcrel) >> 1) & 0xffff;			\
208 	_EMIT4((op) | __pcrel);					\
209 })
210 
211 #define EMIT4_PCREL_RIC(op, mask, target)			\
212 ({								\
213 	int __rel = off_to_pcrel(jit, target) / 2;		\
214 	_EMIT4((op) | (mask) << 20 | (__rel & 0xffff));		\
215 })
216 
217 #define _EMIT6(op1, op2)					\
218 ({								\
219 	if (jit->prg_buf) {					\
220 		*(u32 *) (jit->prg_buf + jit->prg) = (op1);	\
221 		*(u16 *) (jit->prg_buf + jit->prg + 4) = (op2);	\
222 	}							\
223 	jit->prg += 6;						\
224 })
225 
226 #define _EMIT6_DISP(op1, op2, disp)				\
227 ({								\
228 	unsigned int __disp = (disp) & 0xfff;			\
229 	_EMIT6((op1) | __disp, op2);				\
230 })
231 
232 #define _EMIT6_DISP_LH(op1, op2, disp)				\
233 ({								\
234 	u32 _disp = (u32) (disp);				\
235 	unsigned int __disp_h = _disp & 0xff000;		\
236 	unsigned int __disp_l = _disp & 0x00fff;		\
237 	_EMIT6((op1) | __disp_l, (op2) | __disp_h >> 4);	\
238 })
239 
240 #define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp)		\
241 ({								\
242 	_EMIT6_DISP_LH((op1) | reg(b1, b2) << 16 |		\
243 		       reg_high(b3) << 8, op2, disp);		\
244 	REG_SET_SEEN(b1);					\
245 	REG_SET_SEEN(b2);					\
246 	REG_SET_SEEN(b3);					\
247 })
248 
249 #define EMIT6_PCREL_RIEB(op1, op2, b1, b2, mask, target)	\
250 ({								\
251 	unsigned int rel = off_to_pcrel(jit, target) / 2;	\
252 	_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff),	\
253 	       (op2) | (mask) << 12);				\
254 	REG_SET_SEEN(b1);					\
255 	REG_SET_SEEN(b2);					\
256 })
257 
258 #define EMIT6_PCREL_RIEC(op1, op2, b1, imm, mask, target)	\
259 ({								\
260 	unsigned int rel = off_to_pcrel(jit, target) / 2;	\
261 	_EMIT6((op1) | (reg_high(b1) | (mask)) << 16 |		\
262 		(rel & 0xffff), (op2) | ((imm) & 0xff) << 8);	\
263 	REG_SET_SEEN(b1);					\
264 	BUILD_BUG_ON(((unsigned long) (imm)) > 0xff);		\
265 })
266 
267 #define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask)		\
268 ({								\
269 	int rel = off_to_pcrel(jit, addrs[(i) + (off) + 1]) / 2;\
270 	_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\
271 	REG_SET_SEEN(b1);					\
272 	REG_SET_SEEN(b2);					\
273 })
274 
275 static void emit6_pcrel_ril(struct bpf_jit *jit, u32 op, s64 pcrel)
276 {
277 	u32 pc32dbl = (s32)(pcrel / 2);
278 
279 	_EMIT6(op | pc32dbl >> 16, pc32dbl & 0xffff);
280 }
281 
282 static void emit6_pcrel_rilb(struct bpf_jit *jit, u32 op, u8 b, s64 pcrel)
283 {
284 	emit6_pcrel_ril(jit, op | reg_high(b) << 16, pcrel);
285 	REG_SET_SEEN(b);
286 }
287 
288 #define EMIT6_PCREL_RILB(op, b, target)				\
289 	emit6_pcrel_rilb(jit, op, b, off_to_pcrel(jit, target))
290 
291 #define EMIT6_PCREL_RILB_PTR(op, b, target_ptr)			\
292 	emit6_pcrel_rilb(jit, op, b, ptr_to_pcrel(jit, target_ptr))
293 
294 static void emit6_pcrel_rilc(struct bpf_jit *jit, u32 op, u8 mask, s64 pcrel)
295 {
296 	emit6_pcrel_ril(jit, op | mask << 20, pcrel);
297 }
298 
299 #define EMIT6_PCREL_RILC(op, mask, target)			\
300 	emit6_pcrel_rilc(jit, op, mask, off_to_pcrel(jit, target))
301 
302 #define EMIT6_PCREL_RILC_PTR(op, mask, target_ptr)		\
303 	emit6_pcrel_rilc(jit, op, mask, ptr_to_pcrel(jit, target_ptr))
304 
305 #define _EMIT6_IMM(op, imm)					\
306 ({								\
307 	unsigned int __imm = (imm);				\
308 	_EMIT6((op) | (__imm >> 16), __imm & 0xffff);		\
309 })
310 
311 #define EMIT6_IMM(op, b1, imm)					\
312 ({								\
313 	_EMIT6_IMM((op) | reg_high(b1) << 16, imm);		\
314 	REG_SET_SEEN(b1);					\
315 })
316 
317 #define _EMIT_CONST_U32(val)					\
318 ({								\
319 	unsigned int ret;					\
320 	ret = jit->lit32;					\
321 	if (jit->prg_buf)					\
322 		*(u32 *)(jit->prg_buf + jit->lit32) = (u32)(val);\
323 	jit->lit32 += 4;					\
324 	ret;							\
325 })
326 
327 #define EMIT_CONST_U32(val)					\
328 ({								\
329 	jit->seen |= SEEN_LITERAL;				\
330 	_EMIT_CONST_U32(val) - jit->base_ip;			\
331 })
332 
333 #define _EMIT_CONST_U64(val)					\
334 ({								\
335 	unsigned int ret;					\
336 	ret = jit->lit64;					\
337 	if (jit->prg_buf)					\
338 		*(u64 *)(jit->prg_buf + jit->lit64) = (u64)(val);\
339 	jit->lit64 += 8;					\
340 	ret;							\
341 })
342 
343 #define EMIT_CONST_U64(val)					\
344 ({								\
345 	jit->seen |= SEEN_LITERAL;				\
346 	_EMIT_CONST_U64(val) - jit->base_ip;			\
347 })
348 
349 #define EMIT_ZERO(b1)						\
350 ({								\
351 	if (!fp->aux->verifier_zext) {				\
352 		/* llgfr %dst,%dst (zero extend to 64 bit) */	\
353 		EMIT4(0xb9160000, b1, b1);			\
354 		REG_SET_SEEN(b1);				\
355 	}							\
356 })
357 
358 /*
359  * Return whether this is the first pass. The first pass is special, since we
360  * don't know any sizes yet, and thus must be conservative.
361  */
362 static bool is_first_pass(struct bpf_jit *jit)
363 {
364 	return jit->size == 0;
365 }
366 
367 /*
368  * Return whether this is the code generation pass. The code generation pass is
369  * special, since we should change as little as possible.
370  */
371 static bool is_codegen_pass(struct bpf_jit *jit)
372 {
373 	return jit->prg_buf;
374 }
375 
376 /*
377  * Return whether "rel" can be encoded as a short PC-relative offset
378  */
379 static bool is_valid_rel(int rel)
380 {
381 	return rel >= -65536 && rel <= 65534;
382 }
383 
384 /*
385  * Return whether "off" can be reached using a short PC-relative offset
386  */
387 static bool can_use_rel(struct bpf_jit *jit, int off)
388 {
389 	return is_valid_rel(off - jit->prg);
390 }
391 
392 /*
393  * Return whether given displacement can be encoded using
394  * Long-Displacement Facility
395  */
396 static bool is_valid_ldisp(int disp)
397 {
398 	return disp >= -524288 && disp <= 524287;
399 }
400 
401 /*
402  * Return whether the next 32-bit literal pool entry can be referenced using
403  * Long-Displacement Facility
404  */
405 static bool can_use_ldisp_for_lit32(struct bpf_jit *jit)
406 {
407 	return is_valid_ldisp(jit->lit32 - jit->base_ip);
408 }
409 
410 /*
411  * Return whether the next 64-bit literal pool entry can be referenced using
412  * Long-Displacement Facility
413  */
414 static bool can_use_ldisp_for_lit64(struct bpf_jit *jit)
415 {
416 	return is_valid_ldisp(jit->lit64 - jit->base_ip);
417 }
418 
419 /*
420  * Fill whole space with illegal instructions
421  */
422 static void jit_fill_hole(void *area, unsigned int size)
423 {
424 	memset(area, 0, size);
425 }
426 
427 /*
428  * Caller-allocated part of the frame.
429  * Thanks to packed stack, its otherwise unused initial part can be used for
430  * the BPF stack and for the next frame.
431  */
432 struct prog_frame {
433 	u64 unused[8];
434 	/* BPF stack starts here and grows towards 0 */
435 	u32 tail_call_cnt;
436 	u32 pad;
437 	u64 r6[10];  /* r6 - r15 */
438 	u64 backchain;
439 } __packed;
440 
441 /*
442  * Save registers from "rs" (register start) to "re" (register end) on stack
443  */
444 static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
445 {
446 	u32 off = offsetof(struct prog_frame, r6) + (rs - 6) * 8;
447 
448 	if (rs == re)
449 		/* stg %rs,off(%r15) */
450 		_EMIT6(0xe300f000 | rs << 20 | off, 0x0024);
451 	else
452 		/* stmg %rs,%re,off(%r15) */
453 		_EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0024, off);
454 }
455 
456 /*
457  * Restore registers from "rs" (register start) to "re" (register end) on stack
458  */
459 static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re)
460 {
461 	u32 off = jit->frame_off + offsetof(struct prog_frame, r6) + (rs - 6) * 8;
462 
463 	if (rs == re)
464 		/* lg %rs,off(%r15) */
465 		_EMIT6(0xe300f000 | rs << 20 | off, 0x0004);
466 	else
467 		/* lmg %rs,%re,off(%r15) */
468 		_EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0004, off);
469 }
470 
471 /*
472  * Return first seen register (from start)
473  */
474 static int get_start(u16 seen_regs, int start)
475 {
476 	int i;
477 
478 	for (i = start; i <= 15; i++) {
479 		if (seen_regs & (1 << i))
480 			return i;
481 	}
482 	return 0;
483 }
484 
485 /*
486  * Return last seen register (from start) (gap >= 2)
487  */
488 static int get_end(u16 seen_regs, int start)
489 {
490 	int i;
491 
492 	for (i = start; i < 15; i++) {
493 		if (!(seen_regs & (3 << i)))
494 			return i - 1;
495 	}
496 	return (seen_regs & (1 << 15)) ? 15 : 14;
497 }
498 
499 #define REGS_SAVE	1
500 #define REGS_RESTORE	0
501 /*
502  * Save and restore clobbered registers (6-15) on stack.
503  * We save/restore registers in chunks with gap >= 2 registers.
504  */
505 static void save_restore_regs(struct bpf_jit *jit, int op, u16 extra_regs)
506 {
507 	u16 seen_regs = jit->seen_regs | extra_regs;
508 	const int last = 15, save_restore_size = 6;
509 	int re = 6, rs;
510 
511 	if (is_first_pass(jit)) {
512 		/*
513 		 * We don't know yet which registers are used. Reserve space
514 		 * conservatively.
515 		 */
516 		jit->prg += (last - re + 1) * save_restore_size;
517 		return;
518 	}
519 
520 	do {
521 		rs = get_start(seen_regs, re);
522 		if (!rs)
523 			break;
524 		re = get_end(seen_regs, rs + 1);
525 		if (op == REGS_SAVE)
526 			save_regs(jit, rs, re);
527 		else
528 			restore_regs(jit, rs, re);
529 		re++;
530 	} while (re <= last);
531 }
532 
533 static void bpf_skip(struct bpf_jit *jit, int size)
534 {
535 	if (size >= 6 && !is_valid_rel(size)) {
536 		/* brcl 0xf,size */
537 		EMIT6_PCREL_RILC(0xc0040000, 0xf, size);
538 		size -= 6;
539 	} else if (size >= 4 && is_valid_rel(size)) {
540 		/* brc 0xf,size */
541 		EMIT4_PCREL(0xa7f40000, size);
542 		size -= 4;
543 	}
544 	while (size >= 2) {
545 		/* bcr 0,%0 */
546 		_EMIT2(0x0700);
547 		size -= 2;
548 	}
549 }
550 
551 /*
552  * PLT for hotpatchable calls. The calling convention is the same as for the
553  * ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered.
554  */
555 struct bpf_plt {
556 	char code[16];
557 	void *ret;
558 	void *target;
559 } __packed;
560 extern const struct bpf_plt bpf_plt;
561 asm(
562 	".pushsection .rodata\n"
563 	"	.balign 8\n"
564 	"bpf_plt:\n"
565 	"	lgrl %r0,bpf_plt_ret\n"
566 	"	lgrl %r1,bpf_plt_target\n"
567 	"	br %r1\n"
568 	"	.balign 8\n"
569 	"bpf_plt_ret: .quad 0\n"
570 	"bpf_plt_target: .quad 0\n"
571 	"	.popsection\n"
572 );
573 
574 static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target)
575 {
576 	memcpy(plt, &bpf_plt, sizeof(*plt));
577 	plt->ret = ret;
578 	/*
579 	 * (target == NULL) implies that the branch to this PLT entry was
580 	 * patched and became a no-op. However, some CPU could have jumped
581 	 * to this PLT entry before patching and may be still executing it.
582 	 *
583 	 * Since the intention in this case is to make the PLT entry a no-op,
584 	 * make the target point to the return label instead of NULL.
585 	 */
586 	plt->target = target ?: ret;
587 }
588 
589 /*
590  * Emit function prologue
591  *
592  * Save registers and create stack frame if necessary.
593  * Stack frame layout is described by struct prog_frame.
594  */
595 static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp)
596 {
597 	BUILD_BUG_ON(sizeof(struct prog_frame) != STACK_FRAME_OVERHEAD);
598 
599 	/* No-op for hotpatching */
600 	/* brcl 0,prologue_plt */
601 	EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt);
602 	jit->prologue_plt_ret = jit->prg;
603 
604 	if (!bpf_is_subprog(fp)) {
605 		/* Initialize the tail call counter in the main program. */
606 		/* xc tail_call_cnt(4,%r15),tail_call_cnt(%r15) */
607 		_EMIT6(0xd703f000 | offsetof(struct prog_frame, tail_call_cnt),
608 		       0xf000 | offsetof(struct prog_frame, tail_call_cnt));
609 	} else {
610 		/*
611 		 * Skip the tail call counter initialization in subprograms.
612 		 * Insert nops in order to have tail_call_start at a
613 		 * predictable offset.
614 		 */
615 		bpf_skip(jit, 6);
616 	}
617 	/* Tail calls have to skip above initialization */
618 	jit->tail_call_start = jit->prg;
619 	if (fp->aux->exception_cb) {
620 		/*
621 		 * Switch stack, the new address is in the 2nd parameter.
622 		 *
623 		 * Arrange the restoration of %r6-%r15 in the epilogue.
624 		 * Do not restore them now, the prog does not need them.
625 		 */
626 		/* lgr %r15,%r3 */
627 		EMIT4(0xb9040000, REG_15, REG_3);
628 		jit->seen_regs |= NVREGS;
629 	} else {
630 		/* Save registers */
631 		save_restore_regs(jit, REGS_SAVE,
632 				  fp->aux->exception_boundary ? NVREGS : 0);
633 	}
634 	/* Setup literal pool */
635 	if (is_first_pass(jit) || (jit->seen & SEEN_LITERAL)) {
636 		if (!is_first_pass(jit) &&
637 		    is_valid_ldisp(jit->size - (jit->prg + 2))) {
638 			/* basr %l,0 */
639 			EMIT2(0x0d00, REG_L, REG_0);
640 			jit->base_ip = jit->prg;
641 		} else {
642 			/* larl %l,lit32_start */
643 			EMIT6_PCREL_RILB(0xc0000000, REG_L, jit->lit32_start);
644 			jit->base_ip = jit->lit32_start;
645 		}
646 	}
647 	/* Setup stack and backchain */
648 	if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
649 		/* lgr %w1,%r15 (backchain) */
650 		EMIT4(0xb9040000, REG_W1, REG_15);
651 		/* la %bfp,unused_end(%r15) (BPF frame pointer) */
652 		EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15,
653 			   offsetofend(struct prog_frame, unused));
654 		/* aghi %r15,-frame_off */
655 		EMIT4_IMM(0xa70b0000, REG_15, -jit->frame_off);
656 		/* stg %w1,backchain(%r15) */
657 		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
658 			      REG_15,
659 			      offsetof(struct prog_frame, backchain));
660 	}
661 }
662 
663 /*
664  * Jump using a register either directly or via an expoline thunk
665  */
666 #define EMIT_JUMP_REG(reg) do {						\
667 	if (nospec_uses_trampoline())					\
668 		/* brcl 0xf,__s390_indirect_jump_rN */			\
669 		EMIT6_PCREL_RILC_PTR(0xc0040000, 0x0f,			\
670 				     __s390_indirect_jump_r ## reg);	\
671 	else								\
672 		/* br %rN */						\
673 		_EMIT2(0x07f0 | reg);					\
674 } while (0)
675 
676 /*
677  * Function epilogue
678  */
679 static void bpf_jit_epilogue(struct bpf_jit *jit)
680 {
681 	jit->exit_ip = jit->prg;
682 	/* Load exit code: lgr %r2,%b0 */
683 	EMIT4(0xb9040000, REG_2, BPF_REG_0);
684 	/* Restore registers */
685 	save_restore_regs(jit, REGS_RESTORE, 0);
686 	EMIT_JUMP_REG(14);
687 
688 	jit->prg = ALIGN(jit->prg, 8);
689 	jit->prologue_plt = jit->prg;
690 	if (jit->prg_buf)
691 		bpf_jit_plt((struct bpf_plt *)(jit->prg_buf + jit->prg),
692 			    jit->prg_buf + jit->prologue_plt_ret, NULL);
693 	jit->prg += sizeof(struct bpf_plt);
694 }
695 
696 bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
697 {
698 	regs->psw.addr = extable_fixup(x);
699 	if (x->data != -1)
700 		regs->gprs[x->data] = 0;
701 	return true;
702 }
703 
704 /*
705  * A single BPF probe instruction
706  */
707 struct bpf_jit_probe {
708 	int prg;	/* JITed instruction offset */
709 	int nop_prg;	/* JITed nop offset */
710 	int reg;	/* Register to clear on exception */
711 	int arena_reg;	/* Register to use for arena addressing */
712 };
713 
714 static void bpf_jit_probe_init(struct bpf_jit_probe *probe)
715 {
716 	probe->prg = -1;
717 	probe->nop_prg = -1;
718 	probe->reg = -1;
719 	probe->arena_reg = REG_0;
720 }
721 
722 /*
723  * Handlers of certain exceptions leave psw.addr pointing to the instruction
724  * directly after the failing one. Therefore, create two exception table
725  * entries and also add a nop in case two probing instructions come directly
726  * after each other.
727  */
728 static void bpf_jit_probe_emit_nop(struct bpf_jit *jit,
729 				   struct bpf_jit_probe *probe)
730 {
731 	if (probe->prg == -1 || probe->nop_prg != -1)
732 		/* The probe is not armed or nop is already emitted. */
733 		return;
734 
735 	probe->nop_prg = jit->prg;
736 	/* bcr 0,%0 */
737 	_EMIT2(0x0700);
738 }
739 
740 static void bpf_jit_probe_load_pre(struct bpf_jit *jit, struct bpf_insn *insn,
741 				   struct bpf_jit_probe *probe)
742 {
743 	if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
744 	    BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
745 	    BPF_MODE(insn->code) != BPF_PROBE_MEM32)
746 		return;
747 
748 	if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
749 		/* lgrl %r1,kern_arena */
750 		EMIT6_PCREL_RILB(0xc4080000, REG_W1, jit->kern_arena);
751 		probe->arena_reg = REG_W1;
752 	}
753 	probe->prg = jit->prg;
754 	probe->reg = reg2hex[insn->dst_reg];
755 }
756 
757 static void bpf_jit_probe_store_pre(struct bpf_jit *jit, struct bpf_insn *insn,
758 				    struct bpf_jit_probe *probe)
759 {
760 	if (BPF_MODE(insn->code) != BPF_PROBE_MEM32)
761 		return;
762 
763 	/* lgrl %r1,kern_arena */
764 	EMIT6_PCREL_RILB(0xc4080000, REG_W1, jit->kern_arena);
765 	probe->arena_reg = REG_W1;
766 	probe->prg = jit->prg;
767 }
768 
769 static void bpf_jit_probe_atomic_pre(struct bpf_jit *jit,
770 				     struct bpf_insn *insn,
771 				     struct bpf_jit_probe *probe)
772 {
773 	if (BPF_MODE(insn->code) != BPF_PROBE_ATOMIC)
774 		return;
775 
776 	/* lgrl %r1,kern_arena */
777 	EMIT6_PCREL_RILB(0xc4080000, REG_W1, jit->kern_arena);
778 	/* agr %r1,%dst */
779 	EMIT4(0xb9080000, REG_W1, insn->dst_reg);
780 	probe->arena_reg = REG_W1;
781 	probe->prg = jit->prg;
782 }
783 
784 static int bpf_jit_probe_post(struct bpf_jit *jit, struct bpf_prog *fp,
785 			      struct bpf_jit_probe *probe)
786 {
787 	struct exception_table_entry *ex;
788 	int i, prg;
789 	s64 delta;
790 	u8 *insn;
791 
792 	if (probe->prg == -1)
793 		/* The probe is not armed. */
794 		return 0;
795 	bpf_jit_probe_emit_nop(jit, probe);
796 	if (!fp->aux->extable)
797 		/* Do nothing during early JIT passes. */
798 		return 0;
799 	insn = jit->prg_buf + probe->prg;
800 	if (WARN_ON_ONCE(probe->prg + insn_length(*insn) != probe->nop_prg))
801 		/* JIT bug - gap between probe and nop instructions. */
802 		return -1;
803 	for (i = 0; i < 2; i++) {
804 		if (WARN_ON_ONCE(jit->excnt >= fp->aux->num_exentries))
805 			/* Verifier bug - not enough entries. */
806 			return -1;
807 		ex = &fp->aux->extable[jit->excnt];
808 		/* Add extable entries for probe and nop instructions. */
809 		prg = i == 0 ? probe->prg : probe->nop_prg;
810 		delta = jit->prg_buf + prg - (u8 *)&ex->insn;
811 		if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
812 			/* JIT bug - code and extable must be close. */
813 			return -1;
814 		ex->insn = delta;
815 		/*
816 		 * Land on the current instruction. Note that the extable
817 		 * infrastructure ignores the fixup field; it is handled by
818 		 * ex_handler_bpf().
819 		 */
820 		delta = jit->prg_buf + jit->prg - (u8 *)&ex->fixup;
821 		if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
822 			/* JIT bug - landing pad and extable must be close. */
823 			return -1;
824 		ex->fixup = delta;
825 		ex->type = EX_TYPE_BPF;
826 		ex->data = probe->reg;
827 		jit->excnt++;
828 	}
829 	return 0;
830 }
831 
832 /*
833  * Sign- or zero-extend the register if necessary
834  */
835 static int sign_zero_extend(struct bpf_jit *jit, int r, u8 size, u8 flags)
836 {
837 	switch (size) {
838 	case 1:
839 		if (flags & BTF_FMODEL_SIGNED_ARG)
840 			/* lgbr %r,%r */
841 			EMIT4(0xb9060000, r, r);
842 		else
843 			/* llgcr %r,%r */
844 			EMIT4(0xb9840000, r, r);
845 		return 0;
846 	case 2:
847 		if (flags & BTF_FMODEL_SIGNED_ARG)
848 			/* lghr %r,%r */
849 			EMIT4(0xb9070000, r, r);
850 		else
851 			/* llghr %r,%r */
852 			EMIT4(0xb9850000, r, r);
853 		return 0;
854 	case 4:
855 		if (flags & BTF_FMODEL_SIGNED_ARG)
856 			/* lgfr %r,%r */
857 			EMIT4(0xb9140000, r, r);
858 		else
859 			/* llgfr %r,%r */
860 			EMIT4(0xb9160000, r, r);
861 		return 0;
862 	case 8:
863 		return 0;
864 	default:
865 		return -1;
866 	}
867 }
868 
869 /*
870  * Compile one eBPF instruction into s390x code
871  *
872  * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
873  * stack space for the large switch statement.
874  */
875 static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
876 				 int i, bool extra_pass)
877 {
878 	struct bpf_insn *insn = &fp->insnsi[i];
879 	s32 branch_oc_off = insn->off;
880 	u32 dst_reg = insn->dst_reg;
881 	u32 src_reg = insn->src_reg;
882 	struct bpf_jit_probe probe;
883 	int last, insn_count = 1;
884 	u32 *addrs = jit->addrs;
885 	s32 imm = insn->imm;
886 	s16 off = insn->off;
887 	unsigned int mask;
888 	int err;
889 
890 	bpf_jit_probe_init(&probe);
891 
892 	switch (insn->code) {
893 	/*
894 	 * BPF_MOV
895 	 */
896 	case BPF_ALU | BPF_MOV | BPF_X:
897 		switch (insn->off) {
898 		case 0: /* DST = (u32) SRC */
899 			/* llgfr %dst,%src */
900 			EMIT4(0xb9160000, dst_reg, src_reg);
901 			if (insn_is_zext(&insn[1]))
902 				insn_count = 2;
903 			break;
904 		case 8: /* DST = (u32)(s8) SRC */
905 			/* lbr %dst,%src */
906 			EMIT4(0xb9260000, dst_reg, src_reg);
907 			/* llgfr %dst,%dst */
908 			EMIT4(0xb9160000, dst_reg, dst_reg);
909 			break;
910 		case 16: /* DST = (u32)(s16) SRC */
911 			/* lhr %dst,%src */
912 			EMIT4(0xb9270000, dst_reg, src_reg);
913 			/* llgfr %dst,%dst */
914 			EMIT4(0xb9160000, dst_reg, dst_reg);
915 			break;
916 		}
917 		break;
918 	case BPF_ALU64 | BPF_MOV | BPF_X:
919 		if (insn_is_cast_user(insn)) {
920 			int patch_brc;
921 
922 			/* ltgr %dst,%src */
923 			EMIT4(0xb9020000, dst_reg, src_reg);
924 			/* brc 8,0f */
925 			patch_brc = jit->prg;
926 			EMIT4_PCREL_RIC(0xa7040000, 8, 0);
927 			/* iihf %dst,user_arena>>32 */
928 			EMIT6_IMM(0xc0080000, dst_reg, jit->user_arena >> 32);
929 			/* 0: */
930 			if (jit->prg_buf)
931 				*(u16 *)(jit->prg_buf + patch_brc + 2) =
932 					(jit->prg - patch_brc) >> 1;
933 			break;
934 		}
935 		switch (insn->off) {
936 		case 0: /* DST = SRC */
937 			/* lgr %dst,%src */
938 			EMIT4(0xb9040000, dst_reg, src_reg);
939 			break;
940 		case 8: /* DST = (s8) SRC */
941 			/* lgbr %dst,%src */
942 			EMIT4(0xb9060000, dst_reg, src_reg);
943 			break;
944 		case 16: /* DST = (s16) SRC */
945 			/* lghr %dst,%src */
946 			EMIT4(0xb9070000, dst_reg, src_reg);
947 			break;
948 		case 32: /* DST = (s32) SRC */
949 			/* lgfr %dst,%src */
950 			EMIT4(0xb9140000, dst_reg, src_reg);
951 			break;
952 		}
953 		break;
954 	case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */
955 		/* llilf %dst,imm */
956 		EMIT6_IMM(0xc00f0000, dst_reg, imm);
957 		if (insn_is_zext(&insn[1]))
958 			insn_count = 2;
959 		break;
960 	case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = imm */
961 		/* lgfi %dst,imm */
962 		EMIT6_IMM(0xc0010000, dst_reg, imm);
963 		break;
964 	/*
965 	 * BPF_LD 64
966 	 */
967 	case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
968 	{
969 		/* 16 byte instruction that uses two 'struct bpf_insn' */
970 		u64 imm64;
971 
972 		imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32;
973 		/* lgrl %dst,imm */
974 		EMIT6_PCREL_RILB(0xc4080000, dst_reg, _EMIT_CONST_U64(imm64));
975 		insn_count = 2;
976 		break;
977 	}
978 	/*
979 	 * BPF_ADD
980 	 */
981 	case BPF_ALU | BPF_ADD | BPF_X: /* dst = (u32) dst + (u32) src */
982 		/* ar %dst,%src */
983 		EMIT2(0x1a00, dst_reg, src_reg);
984 		EMIT_ZERO(dst_reg);
985 		break;
986 	case BPF_ALU64 | BPF_ADD | BPF_X: /* dst = dst + src */
987 		/* agr %dst,%src */
988 		EMIT4(0xb9080000, dst_reg, src_reg);
989 		break;
990 	case BPF_ALU | BPF_ADD | BPF_K: /* dst = (u32) dst + (u32) imm */
991 		if (imm != 0) {
992 			/* alfi %dst,imm */
993 			EMIT6_IMM(0xc20b0000, dst_reg, imm);
994 		}
995 		EMIT_ZERO(dst_reg);
996 		break;
997 	case BPF_ALU64 | BPF_ADD | BPF_K: /* dst = dst + imm */
998 		if (!imm)
999 			break;
1000 		/* agfi %dst,imm */
1001 		EMIT6_IMM(0xc2080000, dst_reg, imm);
1002 		break;
1003 	/*
1004 	 * BPF_SUB
1005 	 */
1006 	case BPF_ALU | BPF_SUB | BPF_X: /* dst = (u32) dst - (u32) src */
1007 		/* sr %dst,%src */
1008 		EMIT2(0x1b00, dst_reg, src_reg);
1009 		EMIT_ZERO(dst_reg);
1010 		break;
1011 	case BPF_ALU64 | BPF_SUB | BPF_X: /* dst = dst - src */
1012 		/* sgr %dst,%src */
1013 		EMIT4(0xb9090000, dst_reg, src_reg);
1014 		break;
1015 	case BPF_ALU | BPF_SUB | BPF_K: /* dst = (u32) dst - (u32) imm */
1016 		if (imm != 0) {
1017 			/* alfi %dst,-imm */
1018 			EMIT6_IMM(0xc20b0000, dst_reg, -imm);
1019 		}
1020 		EMIT_ZERO(dst_reg);
1021 		break;
1022 	case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */
1023 		if (!imm)
1024 			break;
1025 		if (imm == -0x80000000) {
1026 			/* algfi %dst,0x80000000 */
1027 			EMIT6_IMM(0xc20a0000, dst_reg, 0x80000000);
1028 		} else {
1029 			/* agfi %dst,-imm */
1030 			EMIT6_IMM(0xc2080000, dst_reg, -imm);
1031 		}
1032 		break;
1033 	/*
1034 	 * BPF_MUL
1035 	 */
1036 	case BPF_ALU | BPF_MUL | BPF_X: /* dst = (u32) dst * (u32) src */
1037 		/* msr %dst,%src */
1038 		EMIT4(0xb2520000, dst_reg, src_reg);
1039 		EMIT_ZERO(dst_reg);
1040 		break;
1041 	case BPF_ALU64 | BPF_MUL | BPF_X: /* dst = dst * src */
1042 		/* msgr %dst,%src */
1043 		EMIT4(0xb90c0000, dst_reg, src_reg);
1044 		break;
1045 	case BPF_ALU | BPF_MUL | BPF_K: /* dst = (u32) dst * (u32) imm */
1046 		if (imm != 1) {
1047 			/* msfi %r5,imm */
1048 			EMIT6_IMM(0xc2010000, dst_reg, imm);
1049 		}
1050 		EMIT_ZERO(dst_reg);
1051 		break;
1052 	case BPF_ALU64 | BPF_MUL | BPF_K: /* dst = dst * imm */
1053 		if (imm == 1)
1054 			break;
1055 		/* msgfi %dst,imm */
1056 		EMIT6_IMM(0xc2000000, dst_reg, imm);
1057 		break;
1058 	/*
1059 	 * BPF_DIV / BPF_MOD
1060 	 */
1061 	case BPF_ALU | BPF_DIV | BPF_X:
1062 	case BPF_ALU | BPF_MOD | BPF_X:
1063 	{
1064 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
1065 
1066 		switch (off) {
1067 		case 0: /* dst = (u32) dst {/,%} (u32) src */
1068 			/* xr %w0,%w0 */
1069 			EMIT2(0x1700, REG_W0, REG_W0);
1070 			/* lr %w1,%dst */
1071 			EMIT2(0x1800, REG_W1, dst_reg);
1072 			/* dlr %w0,%src */
1073 			EMIT4(0xb9970000, REG_W0, src_reg);
1074 			break;
1075 		case 1: /* dst = (u32) ((s32) dst {/,%} (s32) src) */
1076 			/* lgfr %r1,%dst */
1077 			EMIT4(0xb9140000, REG_W1, dst_reg);
1078 			/* dsgfr %r0,%src */
1079 			EMIT4(0xb91d0000, REG_W0, src_reg);
1080 			break;
1081 		}
1082 		/* llgfr %dst,%rc */
1083 		EMIT4(0xb9160000, dst_reg, rc_reg);
1084 		if (insn_is_zext(&insn[1]))
1085 			insn_count = 2;
1086 		break;
1087 	}
1088 	case BPF_ALU64 | BPF_DIV | BPF_X:
1089 	case BPF_ALU64 | BPF_MOD | BPF_X:
1090 	{
1091 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
1092 
1093 		switch (off) {
1094 		case 0: /* dst = dst {/,%} src */
1095 			/* lghi %w0,0 */
1096 			EMIT4_IMM(0xa7090000, REG_W0, 0);
1097 			/* lgr %w1,%dst */
1098 			EMIT4(0xb9040000, REG_W1, dst_reg);
1099 			/* dlgr %w0,%src */
1100 			EMIT4(0xb9870000, REG_W0, src_reg);
1101 			break;
1102 		case 1: /* dst = (s64) dst {/,%} (s64) src */
1103 			/* lgr %w1,%dst */
1104 			EMIT4(0xb9040000, REG_W1, dst_reg);
1105 			/* dsgr %w0,%src */
1106 			EMIT4(0xb90d0000, REG_W0, src_reg);
1107 			break;
1108 		}
1109 		/* lgr %dst,%rc */
1110 		EMIT4(0xb9040000, dst_reg, rc_reg);
1111 		break;
1112 	}
1113 	case BPF_ALU | BPF_DIV | BPF_K:
1114 	case BPF_ALU | BPF_MOD | BPF_K:
1115 	{
1116 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
1117 
1118 		if (imm == 1) {
1119 			if (BPF_OP(insn->code) == BPF_MOD)
1120 				/* lghi %dst,0 */
1121 				EMIT4_IMM(0xa7090000, dst_reg, 0);
1122 			else
1123 				EMIT_ZERO(dst_reg);
1124 			break;
1125 		}
1126 		if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) {
1127 			switch (off) {
1128 			case 0: /* dst = (u32) dst {/,%} (u32) imm */
1129 				/* xr %w0,%w0 */
1130 				EMIT2(0x1700, REG_W0, REG_W0);
1131 				/* lr %w1,%dst */
1132 				EMIT2(0x1800, REG_W1, dst_reg);
1133 				/* dl %w0,<d(imm)>(%l) */
1134 				EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0,
1135 					      REG_L, EMIT_CONST_U32(imm));
1136 				break;
1137 			case 1: /* dst = (s32) dst {/,%} (s32) imm */
1138 				/* lgfr %r1,%dst */
1139 				EMIT4(0xb9140000, REG_W1, dst_reg);
1140 				/* dsgf %r0,<d(imm)>(%l) */
1141 				EMIT6_DISP_LH(0xe3000000, 0x001d, REG_W0, REG_0,
1142 					      REG_L, EMIT_CONST_U32(imm));
1143 				break;
1144 			}
1145 		} else {
1146 			switch (off) {
1147 			case 0: /* dst = (u32) dst {/,%} (u32) imm */
1148 				/* xr %w0,%w0 */
1149 				EMIT2(0x1700, REG_W0, REG_W0);
1150 				/* lr %w1,%dst */
1151 				EMIT2(0x1800, REG_W1, dst_reg);
1152 				/* lrl %dst,imm */
1153 				EMIT6_PCREL_RILB(0xc40d0000, dst_reg,
1154 						 _EMIT_CONST_U32(imm));
1155 				jit->seen |= SEEN_LITERAL;
1156 				/* dlr %w0,%dst */
1157 				EMIT4(0xb9970000, REG_W0, dst_reg);
1158 				break;
1159 			case 1: /* dst = (s32) dst {/,%} (s32) imm */
1160 				/* lgfr %w1,%dst */
1161 				EMIT4(0xb9140000, REG_W1, dst_reg);
1162 				/* lgfrl %dst,imm */
1163 				EMIT6_PCREL_RILB(0xc40c0000, dst_reg,
1164 						 _EMIT_CONST_U32(imm));
1165 				jit->seen |= SEEN_LITERAL;
1166 				/* dsgr %w0,%dst */
1167 				EMIT4(0xb90d0000, REG_W0, dst_reg);
1168 				break;
1169 			}
1170 		}
1171 		/* llgfr %dst,%rc */
1172 		EMIT4(0xb9160000, dst_reg, rc_reg);
1173 		if (insn_is_zext(&insn[1]))
1174 			insn_count = 2;
1175 		break;
1176 	}
1177 	case BPF_ALU64 | BPF_DIV | BPF_K:
1178 	case BPF_ALU64 | BPF_MOD | BPF_K:
1179 	{
1180 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
1181 
1182 		if (imm == 1) {
1183 			if (BPF_OP(insn->code) == BPF_MOD)
1184 				/* lhgi %dst,0 */
1185 				EMIT4_IMM(0xa7090000, dst_reg, 0);
1186 			break;
1187 		}
1188 		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
1189 			switch (off) {
1190 			case 0: /* dst = dst {/,%} imm */
1191 				/* lghi %w0,0 */
1192 				EMIT4_IMM(0xa7090000, REG_W0, 0);
1193 				/* lgr %w1,%dst */
1194 				EMIT4(0xb9040000, REG_W1, dst_reg);
1195 				/* dlg %w0,<d(imm)>(%l) */
1196 				EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0,
1197 					      REG_L, EMIT_CONST_U64(imm));
1198 				break;
1199 			case 1: /* dst = (s64) dst {/,%} (s64) imm */
1200 				/* lgr %w1,%dst */
1201 				EMIT4(0xb9040000, REG_W1, dst_reg);
1202 				/* dsg %w0,<d(imm)>(%l) */
1203 				EMIT6_DISP_LH(0xe3000000, 0x000d, REG_W0, REG_0,
1204 					      REG_L, EMIT_CONST_U64(imm));
1205 				break;
1206 			}
1207 		} else {
1208 			switch (off) {
1209 			case 0: /* dst = dst {/,%} imm */
1210 				/* lghi %w0,0 */
1211 				EMIT4_IMM(0xa7090000, REG_W0, 0);
1212 				/* lgr %w1,%dst */
1213 				EMIT4(0xb9040000, REG_W1, dst_reg);
1214 				/* lgrl %dst,imm */
1215 				EMIT6_PCREL_RILB(0xc4080000, dst_reg,
1216 						 _EMIT_CONST_U64(imm));
1217 				jit->seen |= SEEN_LITERAL;
1218 				/* dlgr %w0,%dst */
1219 				EMIT4(0xb9870000, REG_W0, dst_reg);
1220 				break;
1221 			case 1: /* dst = (s64) dst {/,%} (s64) imm */
1222 				/* lgr %w1,%dst */
1223 				EMIT4(0xb9040000, REG_W1, dst_reg);
1224 				/* lgrl %dst,imm */
1225 				EMIT6_PCREL_RILB(0xc4080000, dst_reg,
1226 						 _EMIT_CONST_U64(imm));
1227 				jit->seen |= SEEN_LITERAL;
1228 				/* dsgr %w0,%dst */
1229 				EMIT4(0xb90d0000, REG_W0, dst_reg);
1230 				break;
1231 			}
1232 		}
1233 		/* lgr %dst,%rc */
1234 		EMIT4(0xb9040000, dst_reg, rc_reg);
1235 		break;
1236 	}
1237 	/*
1238 	 * BPF_AND
1239 	 */
1240 	case BPF_ALU | BPF_AND | BPF_X: /* dst = (u32) dst & (u32) src */
1241 		/* nr %dst,%src */
1242 		EMIT2(0x1400, dst_reg, src_reg);
1243 		EMIT_ZERO(dst_reg);
1244 		break;
1245 	case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
1246 		/* ngr %dst,%src */
1247 		EMIT4(0xb9800000, dst_reg, src_reg);
1248 		break;
1249 	case BPF_ALU | BPF_AND | BPF_K: /* dst = (u32) dst & (u32) imm */
1250 		/* nilf %dst,imm */
1251 		EMIT6_IMM(0xc00b0000, dst_reg, imm);
1252 		EMIT_ZERO(dst_reg);
1253 		break;
1254 	case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
1255 		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
1256 			/* ng %dst,<d(imm)>(%l) */
1257 			EMIT6_DISP_LH(0xe3000000, 0x0080,
1258 				      dst_reg, REG_0, REG_L,
1259 				      EMIT_CONST_U64(imm));
1260 		} else {
1261 			/* lgrl %w0,imm */
1262 			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
1263 					 _EMIT_CONST_U64(imm));
1264 			jit->seen |= SEEN_LITERAL;
1265 			/* ngr %dst,%w0 */
1266 			EMIT4(0xb9800000, dst_reg, REG_W0);
1267 		}
1268 		break;
1269 	/*
1270 	 * BPF_OR
1271 	 */
1272 	case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
1273 		/* or %dst,%src */
1274 		EMIT2(0x1600, dst_reg, src_reg);
1275 		EMIT_ZERO(dst_reg);
1276 		break;
1277 	case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
1278 		/* ogr %dst,%src */
1279 		EMIT4(0xb9810000, dst_reg, src_reg);
1280 		break;
1281 	case BPF_ALU | BPF_OR | BPF_K: /* dst = (u32) dst | (u32) imm */
1282 		/* oilf %dst,imm */
1283 		EMIT6_IMM(0xc00d0000, dst_reg, imm);
1284 		EMIT_ZERO(dst_reg);
1285 		break;
1286 	case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */
1287 		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
1288 			/* og %dst,<d(imm)>(%l) */
1289 			EMIT6_DISP_LH(0xe3000000, 0x0081,
1290 				      dst_reg, REG_0, REG_L,
1291 				      EMIT_CONST_U64(imm));
1292 		} else {
1293 			/* lgrl %w0,imm */
1294 			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
1295 					 _EMIT_CONST_U64(imm));
1296 			jit->seen |= SEEN_LITERAL;
1297 			/* ogr %dst,%w0 */
1298 			EMIT4(0xb9810000, dst_reg, REG_W0);
1299 		}
1300 		break;
1301 	/*
1302 	 * BPF_XOR
1303 	 */
1304 	case BPF_ALU | BPF_XOR | BPF_X: /* dst = (u32) dst ^ (u32) src */
1305 		/* xr %dst,%src */
1306 		EMIT2(0x1700, dst_reg, src_reg);
1307 		EMIT_ZERO(dst_reg);
1308 		break;
1309 	case BPF_ALU64 | BPF_XOR | BPF_X: /* dst = dst ^ src */
1310 		/* xgr %dst,%src */
1311 		EMIT4(0xb9820000, dst_reg, src_reg);
1312 		break;
1313 	case BPF_ALU | BPF_XOR | BPF_K: /* dst = (u32) dst ^ (u32) imm */
1314 		if (imm != 0) {
1315 			/* xilf %dst,imm */
1316 			EMIT6_IMM(0xc0070000, dst_reg, imm);
1317 		}
1318 		EMIT_ZERO(dst_reg);
1319 		break;
1320 	case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */
1321 		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
1322 			/* xg %dst,<d(imm)>(%l) */
1323 			EMIT6_DISP_LH(0xe3000000, 0x0082,
1324 				      dst_reg, REG_0, REG_L,
1325 				      EMIT_CONST_U64(imm));
1326 		} else {
1327 			/* lgrl %w0,imm */
1328 			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
1329 					 _EMIT_CONST_U64(imm));
1330 			jit->seen |= SEEN_LITERAL;
1331 			/* xgr %dst,%w0 */
1332 			EMIT4(0xb9820000, dst_reg, REG_W0);
1333 		}
1334 		break;
1335 	/*
1336 	 * BPF_LSH
1337 	 */
1338 	case BPF_ALU | BPF_LSH | BPF_X: /* dst = (u32) dst << (u32) src */
1339 		/* sll %dst,0(%src) */
1340 		EMIT4_DISP(0x89000000, dst_reg, src_reg, 0);
1341 		EMIT_ZERO(dst_reg);
1342 		break;
1343 	case BPF_ALU64 | BPF_LSH | BPF_X: /* dst = dst << src */
1344 		/* sllg %dst,%dst,0(%src) */
1345 		EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, src_reg, 0);
1346 		break;
1347 	case BPF_ALU | BPF_LSH | BPF_K: /* dst = (u32) dst << (u32) imm */
1348 		if (imm != 0) {
1349 			/* sll %dst,imm(%r0) */
1350 			EMIT4_DISP(0x89000000, dst_reg, REG_0, imm);
1351 		}
1352 		EMIT_ZERO(dst_reg);
1353 		break;
1354 	case BPF_ALU64 | BPF_LSH | BPF_K: /* dst = dst << imm */
1355 		if (imm == 0)
1356 			break;
1357 		/* sllg %dst,%dst,imm(%r0) */
1358 		EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, REG_0, imm);
1359 		break;
1360 	/*
1361 	 * BPF_RSH
1362 	 */
1363 	case BPF_ALU | BPF_RSH | BPF_X: /* dst = (u32) dst >> (u32) src */
1364 		/* srl %dst,0(%src) */
1365 		EMIT4_DISP(0x88000000, dst_reg, src_reg, 0);
1366 		EMIT_ZERO(dst_reg);
1367 		break;
1368 	case BPF_ALU64 | BPF_RSH | BPF_X: /* dst = dst >> src */
1369 		/* srlg %dst,%dst,0(%src) */
1370 		EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, src_reg, 0);
1371 		break;
1372 	case BPF_ALU | BPF_RSH | BPF_K: /* dst = (u32) dst >> (u32) imm */
1373 		if (imm != 0) {
1374 			/* srl %dst,imm(%r0) */
1375 			EMIT4_DISP(0x88000000, dst_reg, REG_0, imm);
1376 		}
1377 		EMIT_ZERO(dst_reg);
1378 		break;
1379 	case BPF_ALU64 | BPF_RSH | BPF_K: /* dst = dst >> imm */
1380 		if (imm == 0)
1381 			break;
1382 		/* srlg %dst,%dst,imm(%r0) */
1383 		EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, REG_0, imm);
1384 		break;
1385 	/*
1386 	 * BPF_ARSH
1387 	 */
1388 	case BPF_ALU | BPF_ARSH | BPF_X: /* ((s32) dst) >>= src */
1389 		/* sra %dst,%dst,0(%src) */
1390 		EMIT4_DISP(0x8a000000, dst_reg, src_reg, 0);
1391 		EMIT_ZERO(dst_reg);
1392 		break;
1393 	case BPF_ALU64 | BPF_ARSH | BPF_X: /* ((s64) dst) >>= src */
1394 		/* srag %dst,%dst,0(%src) */
1395 		EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, src_reg, 0);
1396 		break;
1397 	case BPF_ALU | BPF_ARSH | BPF_K: /* ((s32) dst >> imm */
1398 		if (imm != 0) {
1399 			/* sra %dst,imm(%r0) */
1400 			EMIT4_DISP(0x8a000000, dst_reg, REG_0, imm);
1401 		}
1402 		EMIT_ZERO(dst_reg);
1403 		break;
1404 	case BPF_ALU64 | BPF_ARSH | BPF_K: /* ((s64) dst) >>= imm */
1405 		if (imm == 0)
1406 			break;
1407 		/* srag %dst,%dst,imm(%r0) */
1408 		EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, REG_0, imm);
1409 		break;
1410 	/*
1411 	 * BPF_NEG
1412 	 */
1413 	case BPF_ALU | BPF_NEG: /* dst = (u32) -dst */
1414 		/* lcr %dst,%dst */
1415 		EMIT2(0x1300, dst_reg, dst_reg);
1416 		EMIT_ZERO(dst_reg);
1417 		break;
1418 	case BPF_ALU64 | BPF_NEG: /* dst = -dst */
1419 		/* lcgr %dst,%dst */
1420 		EMIT4(0xb9030000, dst_reg, dst_reg);
1421 		break;
1422 	/*
1423 	 * BPF_FROM_BE/LE
1424 	 */
1425 	case BPF_ALU | BPF_END | BPF_FROM_BE:
1426 		/* s390 is big endian, therefore only clear high order bytes */
1427 		switch (imm) {
1428 		case 16: /* dst = (u16) cpu_to_be16(dst) */
1429 			/* llghr %dst,%dst */
1430 			EMIT4(0xb9850000, dst_reg, dst_reg);
1431 			if (insn_is_zext(&insn[1]))
1432 				insn_count = 2;
1433 			break;
1434 		case 32: /* dst = (u32) cpu_to_be32(dst) */
1435 			if (!fp->aux->verifier_zext)
1436 				/* llgfr %dst,%dst */
1437 				EMIT4(0xb9160000, dst_reg, dst_reg);
1438 			break;
1439 		case 64: /* dst = (u64) cpu_to_be64(dst) */
1440 			break;
1441 		}
1442 		break;
1443 	case BPF_ALU | BPF_END | BPF_FROM_LE:
1444 	case BPF_ALU64 | BPF_END | BPF_FROM_LE:
1445 		switch (imm) {
1446 		case 16: /* dst = (u16) cpu_to_le16(dst) */
1447 			/* lrvr %dst,%dst */
1448 			EMIT4(0xb91f0000, dst_reg, dst_reg);
1449 			/* srl %dst,16(%r0) */
1450 			EMIT4_DISP(0x88000000, dst_reg, REG_0, 16);
1451 			/* llghr %dst,%dst */
1452 			EMIT4(0xb9850000, dst_reg, dst_reg);
1453 			if (insn_is_zext(&insn[1]))
1454 				insn_count = 2;
1455 			break;
1456 		case 32: /* dst = (u32) cpu_to_le32(dst) */
1457 			/* lrvr %dst,%dst */
1458 			EMIT4(0xb91f0000, dst_reg, dst_reg);
1459 			if (!fp->aux->verifier_zext)
1460 				/* llgfr %dst,%dst */
1461 				EMIT4(0xb9160000, dst_reg, dst_reg);
1462 			break;
1463 		case 64: /* dst = (u64) cpu_to_le64(dst) */
1464 			/* lrvgr %dst,%dst */
1465 			EMIT4(0xb90f0000, dst_reg, dst_reg);
1466 			break;
1467 		}
1468 		break;
1469 	/*
1470 	 * BPF_NOSPEC (speculation barrier)
1471 	 */
1472 	case BPF_ST | BPF_NOSPEC:
1473 		break;
1474 	/*
1475 	 * BPF_ST(X)
1476 	 */
1477 	case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src_reg */
1478 	case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
1479 		bpf_jit_probe_store_pre(jit, insn, &probe);
1480 		/* stcy %src,off(%dst,%arena) */
1481 		EMIT6_DISP_LH(0xe3000000, 0x0072, src_reg, dst_reg,
1482 			      probe.arena_reg, off);
1483 		err = bpf_jit_probe_post(jit, fp, &probe);
1484 		if (err < 0)
1485 			return err;
1486 		jit->seen |= SEEN_MEM;
1487 		break;
1488 	case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
1489 	case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
1490 		bpf_jit_probe_store_pre(jit, insn, &probe);
1491 		/* sthy %src,off(%dst,%arena) */
1492 		EMIT6_DISP_LH(0xe3000000, 0x0070, src_reg, dst_reg,
1493 			      probe.arena_reg, off);
1494 		err = bpf_jit_probe_post(jit, fp, &probe);
1495 		if (err < 0)
1496 			return err;
1497 		jit->seen |= SEEN_MEM;
1498 		break;
1499 	case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
1500 	case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
1501 		bpf_jit_probe_store_pre(jit, insn, &probe);
1502 		/* sty %src,off(%dst,%arena) */
1503 		EMIT6_DISP_LH(0xe3000000, 0x0050, src_reg, dst_reg,
1504 			      probe.arena_reg, off);
1505 		err = bpf_jit_probe_post(jit, fp, &probe);
1506 		if (err < 0)
1507 			return err;
1508 		jit->seen |= SEEN_MEM;
1509 		break;
1510 	case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
1511 	case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
1512 		bpf_jit_probe_store_pre(jit, insn, &probe);
1513 		/* stg %src,off(%dst,%arena) */
1514 		EMIT6_DISP_LH(0xe3000000, 0x0024, src_reg, dst_reg,
1515 			      probe.arena_reg, off);
1516 		err = bpf_jit_probe_post(jit, fp, &probe);
1517 		if (err < 0)
1518 			return err;
1519 		jit->seen |= SEEN_MEM;
1520 		break;
1521 	case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
1522 	case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
1523 		/* lhi %w0,imm */
1524 		EMIT4_IMM(0xa7080000, REG_W0, (u8) imm);
1525 		bpf_jit_probe_store_pre(jit, insn, &probe);
1526 		/* stcy %w0,off(%dst,%arena) */
1527 		EMIT6_DISP_LH(0xe3000000, 0x0072, REG_W0, dst_reg,
1528 			      probe.arena_reg, off);
1529 		err = bpf_jit_probe_post(jit, fp, &probe);
1530 		if (err < 0)
1531 			return err;
1532 		jit->seen |= SEEN_MEM;
1533 		break;
1534 	case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
1535 	case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
1536 		/* lhi %w0,imm */
1537 		EMIT4_IMM(0xa7080000, REG_W0, (u16) imm);
1538 		bpf_jit_probe_store_pre(jit, insn, &probe);
1539 		/* sthy %w0,off(%dst,%arena) */
1540 		EMIT6_DISP_LH(0xe3000000, 0x0070, REG_W0, dst_reg,
1541 			      probe.arena_reg, off);
1542 		err = bpf_jit_probe_post(jit, fp, &probe);
1543 		if (err < 0)
1544 			return err;
1545 		jit->seen |= SEEN_MEM;
1546 		break;
1547 	case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
1548 	case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
1549 		/* llilf %w0,imm  */
1550 		EMIT6_IMM(0xc00f0000, REG_W0, (u32) imm);
1551 		bpf_jit_probe_store_pre(jit, insn, &probe);
1552 		/* sty %w0,off(%dst,%arena) */
1553 		EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, dst_reg,
1554 			      probe.arena_reg, off);
1555 		err = bpf_jit_probe_post(jit, fp, &probe);
1556 		if (err < 0)
1557 			return err;
1558 		jit->seen |= SEEN_MEM;
1559 		break;
1560 	case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
1561 	case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
1562 		/* lgfi %w0,imm */
1563 		EMIT6_IMM(0xc0010000, REG_W0, imm);
1564 		bpf_jit_probe_store_pre(jit, insn, &probe);
1565 		/* stg %w0,off(%dst,%arena) */
1566 		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, dst_reg,
1567 			      probe.arena_reg, off);
1568 		err = bpf_jit_probe_post(jit, fp, &probe);
1569 		if (err < 0)
1570 			return err;
1571 		jit->seen |= SEEN_MEM;
1572 		break;
1573 	/*
1574 	 * BPF_ATOMIC
1575 	 */
1576 	case BPF_STX | BPF_ATOMIC | BPF_DW:
1577 	case BPF_STX | BPF_ATOMIC | BPF_W:
1578 	case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
1579 	case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
1580 	{
1581 		bool is32 = BPF_SIZE(insn->code) == BPF_W;
1582 
1583 		/*
1584 		 * Unlike loads and stores, atomics have only a base register,
1585 		 * but no index register. For the non-arena case, simply use
1586 		 * %dst as a base. For the arena case, use the work register
1587 		 * %r1: first, load the arena base into it, and then add %dst
1588 		 * to it.
1589 		 */
1590 		probe.arena_reg = dst_reg;
1591 
1592 		switch (insn->imm) {
1593 #define EMIT_ATOMIC(op32, op64) do {					\
1594 	bpf_jit_probe_atomic_pre(jit, insn, &probe);			\
1595 	/* {op32|op64} {%w0|%src},%src,off(%arena) */			\
1596 	EMIT6_DISP_LH(0xeb000000, is32 ? (op32) : (op64),		\
1597 		      (insn->imm & BPF_FETCH) ? src_reg : REG_W0,	\
1598 		      src_reg, probe.arena_reg, off);			\
1599 	err = bpf_jit_probe_post(jit, fp, &probe);			\
1600 	if (err < 0)							\
1601 		return err;						\
1602 	if (insn->imm & BPF_FETCH) {					\
1603 		/* bcr 14,0 - see atomic_fetch_{add,and,or,xor}() */	\
1604 		_EMIT2(0x07e0);						\
1605 		if (is32)                                               \
1606 			EMIT_ZERO(src_reg);				\
1607 	}								\
1608 } while (0)
1609 		case BPF_ADD:
1610 		case BPF_ADD | BPF_FETCH:
1611 			/* {laal|laalg} */
1612 			EMIT_ATOMIC(0x00fa, 0x00ea);
1613 			break;
1614 		case BPF_AND:
1615 		case BPF_AND | BPF_FETCH:
1616 			/* {lan|lang} */
1617 			EMIT_ATOMIC(0x00f4, 0x00e4);
1618 			break;
1619 		case BPF_OR:
1620 		case BPF_OR | BPF_FETCH:
1621 			/* {lao|laog} */
1622 			EMIT_ATOMIC(0x00f6, 0x00e6);
1623 			break;
1624 		case BPF_XOR:
1625 		case BPF_XOR | BPF_FETCH:
1626 			/* {lax|laxg} */
1627 			EMIT_ATOMIC(0x00f7, 0x00e7);
1628 			break;
1629 #undef EMIT_ATOMIC
1630 		case BPF_XCHG: {
1631 			struct bpf_jit_probe load_probe = probe;
1632 			int loop_start;
1633 
1634 			bpf_jit_probe_atomic_pre(jit, insn, &load_probe);
1635 			/* {ly|lg} %w0,off(%arena) */
1636 			EMIT6_DISP_LH(0xe3000000,
1637 				      is32 ? 0x0058 : 0x0004, REG_W0, REG_0,
1638 				      load_probe.arena_reg, off);
1639 			bpf_jit_probe_emit_nop(jit, &load_probe);
1640 			/* Reuse {ly|lg}'s arena_reg for {csy|csg}. */
1641 			if (load_probe.prg != -1) {
1642 				probe.prg = jit->prg;
1643 				probe.arena_reg = load_probe.arena_reg;
1644 			}
1645 			loop_start = jit->prg;
1646 			/* 0: {csy|csg} %w0,%src,off(%arena) */
1647 			EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030,
1648 				      REG_W0, src_reg, probe.arena_reg, off);
1649 			bpf_jit_probe_emit_nop(jit, &probe);
1650 			/* brc 4,0b */
1651 			EMIT4_PCREL_RIC(0xa7040000, 4, loop_start);
1652 			/* {llgfr|lgr} %src,%w0 */
1653 			EMIT4(is32 ? 0xb9160000 : 0xb9040000, src_reg, REG_W0);
1654 			/* Both probes should land here on exception. */
1655 			err = bpf_jit_probe_post(jit, fp, &load_probe);
1656 			if (err < 0)
1657 				return err;
1658 			err = bpf_jit_probe_post(jit, fp, &probe);
1659 			if (err < 0)
1660 				return err;
1661 			if (is32 && insn_is_zext(&insn[1]))
1662 				insn_count = 2;
1663 			break;
1664 		}
1665 		case BPF_CMPXCHG:
1666 			bpf_jit_probe_atomic_pre(jit, insn, &probe);
1667 			/* 0: {csy|csg} %b0,%src,off(%arena) */
1668 			EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030,
1669 				      BPF_REG_0, src_reg,
1670 				      probe.arena_reg, off);
1671 			err = bpf_jit_probe_post(jit, fp, &probe);
1672 			if (err < 0)
1673 				return err;
1674 			break;
1675 		default:
1676 			pr_err("Unknown atomic operation %02x\n", insn->imm);
1677 			return -1;
1678 		}
1679 
1680 		jit->seen |= SEEN_MEM;
1681 		break;
1682 	}
1683 	/*
1684 	 * BPF_LDX
1685 	 */
1686 	case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
1687 	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
1688 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
1689 		bpf_jit_probe_load_pre(jit, insn, &probe);
1690 		/* llgc %dst,off(%src,%arena) */
1691 		EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg,
1692 			      probe.arena_reg, off);
1693 		err = bpf_jit_probe_post(jit, fp, &probe);
1694 		if (err < 0)
1695 			return err;
1696 		jit->seen |= SEEN_MEM;
1697 		if (insn_is_zext(&insn[1]))
1698 			insn_count = 2;
1699 		break;
1700 	case BPF_LDX | BPF_MEMSX | BPF_B: /* dst = *(s8 *)(ul) (src + off) */
1701 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
1702 		bpf_jit_probe_load_pre(jit, insn, &probe);
1703 		/* lgb %dst,off(%src) */
1704 		EMIT6_DISP_LH(0xe3000000, 0x0077, dst_reg, src_reg, REG_0, off);
1705 		err = bpf_jit_probe_post(jit, fp, &probe);
1706 		if (err < 0)
1707 			return err;
1708 		jit->seen |= SEEN_MEM;
1709 		break;
1710 	case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
1711 	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
1712 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
1713 		bpf_jit_probe_load_pre(jit, insn, &probe);
1714 		/* llgh %dst,off(%src,%arena) */
1715 		EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg,
1716 			      probe.arena_reg, off);
1717 		err = bpf_jit_probe_post(jit, fp, &probe);
1718 		if (err < 0)
1719 			return err;
1720 		jit->seen |= SEEN_MEM;
1721 		if (insn_is_zext(&insn[1]))
1722 			insn_count = 2;
1723 		break;
1724 	case BPF_LDX | BPF_MEMSX | BPF_H: /* dst = *(s16 *)(ul) (src + off) */
1725 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
1726 		bpf_jit_probe_load_pre(jit, insn, &probe);
1727 		/* lgh %dst,off(%src) */
1728 		EMIT6_DISP_LH(0xe3000000, 0x0015, dst_reg, src_reg, REG_0, off);
1729 		err = bpf_jit_probe_post(jit, fp, &probe);
1730 		if (err < 0)
1731 			return err;
1732 		jit->seen |= SEEN_MEM;
1733 		break;
1734 	case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
1735 	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
1736 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
1737 		bpf_jit_probe_load_pre(jit, insn, &probe);
1738 		/* llgf %dst,off(%src) */
1739 		jit->seen |= SEEN_MEM;
1740 		EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg,
1741 			      probe.arena_reg, off);
1742 		err = bpf_jit_probe_post(jit, fp, &probe);
1743 		if (err < 0)
1744 			return err;
1745 		if (insn_is_zext(&insn[1]))
1746 			insn_count = 2;
1747 		break;
1748 	case BPF_LDX | BPF_MEMSX | BPF_W: /* dst = *(s32 *)(ul) (src + off) */
1749 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
1750 		bpf_jit_probe_load_pre(jit, insn, &probe);
1751 		/* lgf %dst,off(%src) */
1752 		jit->seen |= SEEN_MEM;
1753 		EMIT6_DISP_LH(0xe3000000, 0x0014, dst_reg, src_reg, REG_0, off);
1754 		err = bpf_jit_probe_post(jit, fp, &probe);
1755 		if (err < 0)
1756 			return err;
1757 		break;
1758 	case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
1759 	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
1760 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
1761 		bpf_jit_probe_load_pre(jit, insn, &probe);
1762 		/* lg %dst,off(%src,%arena) */
1763 		jit->seen |= SEEN_MEM;
1764 		EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg,
1765 			      probe.arena_reg, off);
1766 		err = bpf_jit_probe_post(jit, fp, &probe);
1767 		if (err < 0)
1768 			return err;
1769 		break;
1770 	/*
1771 	 * BPF_JMP / CALL
1772 	 */
1773 	case BPF_JMP | BPF_CALL:
1774 	{
1775 		const struct btf_func_model *m;
1776 		bool func_addr_fixed;
1777 		int j, ret;
1778 		u64 func;
1779 
1780 		ret = bpf_jit_get_func_addr(fp, insn, extra_pass,
1781 					    &func, &func_addr_fixed);
1782 		if (ret < 0)
1783 			return -1;
1784 
1785 		REG_SET_SEEN(BPF_REG_5);
1786 		jit->seen |= SEEN_FUNC;
1787 
1788 		/*
1789 		 * Copy the tail call counter to where the callee expects it.
1790 		 */
1791 
1792 		if (insn->src_reg == BPF_PSEUDO_CALL)
1793 			/*
1794 			 * mvc tail_call_cnt(4,%r15),
1795 			 *     frame_off+tail_call_cnt(%r15)
1796 			 */
1797 			_EMIT6(0xd203f000 | offsetof(struct prog_frame,
1798 						     tail_call_cnt),
1799 			       0xf000 | (jit->frame_off +
1800 					 offsetof(struct prog_frame,
1801 						  tail_call_cnt)));
1802 
1803 		/* Sign-extend the kfunc arguments. */
1804 		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
1805 			m = bpf_jit_find_kfunc_model(fp, insn);
1806 			if (!m)
1807 				return -1;
1808 
1809 			for (j = 0; j < m->nr_args; j++) {
1810 				if (sign_zero_extend(jit, BPF_REG_1 + j,
1811 						     m->arg_size[j],
1812 						     m->arg_flags[j]))
1813 					return -1;
1814 			}
1815 		}
1816 
1817 		if ((void *)func == arch_bpf_timed_may_goto) {
1818 			/*
1819 			 * arch_bpf_timed_may_goto() has a special ABI: the
1820 			 * parameters are in BPF_REG_AX and BPF_REG_10; the
1821 			 * return value is in BPF_REG_AX; and all GPRs except
1822 			 * REG_W0, REG_W1, and BPF_REG_AX are callee-saved.
1823 			 */
1824 
1825 			/* brasl %r0,func */
1826 			EMIT6_PCREL_RILB_PTR(0xc0050000, REG_0, (void *)func);
1827 		} else {
1828 			/* brasl %r14,func */
1829 			EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, (void *)func);
1830 			/* lgr %b0,%r2: load return value into %b0 */
1831 			EMIT4(0xb9040000, BPF_REG_0, REG_2);
1832 		}
1833 
1834 		/*
1835 		 * Copy the potentially updated tail call counter back.
1836 		 */
1837 
1838 		if (insn->src_reg == BPF_PSEUDO_CALL)
1839 			/*
1840 			 * mvc frame_off+tail_call_cnt(%r15),
1841 			 *     tail_call_cnt(4,%r15)
1842 			 */
1843 			_EMIT6(0xd203f000 | (jit->frame_off +
1844 					     offsetof(struct prog_frame,
1845 						      tail_call_cnt)),
1846 			       0xf000 | offsetof(struct prog_frame,
1847 						 tail_call_cnt));
1848 
1849 		break;
1850 	}
1851 	case BPF_JMP | BPF_TAIL_CALL: {
1852 		int patch_1_clrj, patch_2_clij, patch_3_brc;
1853 
1854 		/*
1855 		 * Implicit input:
1856 		 *  B1: pointer to ctx
1857 		 *  B2: pointer to bpf_array
1858 		 *  B3: index in bpf_array
1859 		 *
1860 		 * if (index >= array->map.max_entries)
1861 		 *         goto out;
1862 		 */
1863 
1864 		/* llgf %w1,map.max_entries(%b2) */
1865 		EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2,
1866 			      offsetof(struct bpf_array, map.max_entries));
1867 		/* if ((u32)%b3 >= (u32)%w1) goto out; */
1868 		/* clrj %b3,%w1,0xa,out */
1869 		patch_1_clrj = jit->prg;
1870 		EMIT6_PCREL_RIEB(0xec000000, 0x0077, BPF_REG_3, REG_W1, 0xa,
1871 				 jit->prg);
1872 
1873 		/*
1874 		 * if (tail_call_cnt >= MAX_TAIL_CALL_CNT)
1875 		 *         goto out;
1876 		 *
1877 		 * tail_call_cnt is read into %w0, which needs to be preserved
1878 		 * until it's incremented and flushed.
1879 		 */
1880 
1881 		off = jit->frame_off +
1882 		      offsetof(struct prog_frame, tail_call_cnt);
1883 		/* ly %w0,off(%r15) */
1884 		EMIT6_DISP_LH(0xe3000000, 0x0058, REG_W0, REG_0, REG_15, off);
1885 		/* clij %w0,MAX_TAIL_CALL_CNT,0xa,out */
1886 		patch_2_clij = jit->prg;
1887 		EMIT6_PCREL_RIEC(0xec000000, 0x007f, REG_W0, MAX_TAIL_CALL_CNT,
1888 				 0xa, jit->prg);
1889 
1890 		/*
1891 		 * prog = array->ptrs[index];
1892 		 * if (prog == NULL)
1893 		 *         goto out;
1894 		 */
1895 
1896 		/* llgfr %r1,%b3: %r1 = (u32) index */
1897 		EMIT4(0xb9160000, REG_1, BPF_REG_3);
1898 		/* sllg %r1,%r1,3: %r1 *= 8 */
1899 		EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, REG_1, REG_0, 3);
1900 		/* ltg %r1,prog(%b2,%r1) */
1901 		EMIT6_DISP_LH(0xe3000000, 0x0002, REG_1, BPF_REG_2,
1902 			      REG_1, offsetof(struct bpf_array, ptrs));
1903 		/* brc 0x8,out */
1904 		patch_3_brc = jit->prg;
1905 		EMIT4_PCREL_RIC(0xa7040000, 8, jit->prg);
1906 
1907 		/* tail_call_cnt++; */
1908 		/* ahi %w0,1 */
1909 		EMIT4_IMM(0xa70a0000, REG_W0, 1);
1910 		/* sty %w0,off(%r15) */
1911 		EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, REG_0, REG_15, off);
1912 
1913 		/*
1914 		 * Restore registers before calling function
1915 		 */
1916 		save_restore_regs(jit, REGS_RESTORE, 0);
1917 
1918 		/*
1919 		 * goto *(prog->bpf_func + tail_call_start);
1920 		 */
1921 
1922 		/* lg %r1,bpf_func(%r1) */
1923 		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_1, REG_0,
1924 			      offsetof(struct bpf_prog, bpf_func));
1925 		if (nospec_uses_trampoline()) {
1926 			jit->seen |= SEEN_FUNC;
1927 			/* aghi %r1,tail_call_start */
1928 			EMIT4_IMM(0xa70b0000, REG_1, jit->tail_call_start);
1929 			/* brcl 0xf,__s390_indirect_jump_r1 */
1930 			EMIT6_PCREL_RILC_PTR(0xc0040000, 0xf,
1931 					     __s390_indirect_jump_r1);
1932 		} else {
1933 			/* bc 0xf,tail_call_start(%r1) */
1934 			_EMIT4(0x47f01000 + jit->tail_call_start);
1935 		}
1936 		/* out: */
1937 		if (jit->prg_buf) {
1938 			*(u16 *)(jit->prg_buf + patch_1_clrj + 2) =
1939 				(jit->prg - patch_1_clrj) >> 1;
1940 			*(u16 *)(jit->prg_buf + patch_2_clij + 2) =
1941 				(jit->prg - patch_2_clij) >> 1;
1942 			*(u16 *)(jit->prg_buf + patch_3_brc + 2) =
1943 				(jit->prg - patch_3_brc) >> 1;
1944 		}
1945 		break;
1946 	}
1947 	case BPF_JMP | BPF_EXIT: /* return b0 */
1948 		last = (i == fp->len - 1) ? 1 : 0;
1949 		if (last)
1950 			break;
1951 		if (!is_first_pass(jit) && can_use_rel(jit, jit->exit_ip))
1952 			/* brc 0xf, <exit> */
1953 			EMIT4_PCREL_RIC(0xa7040000, 0xf, jit->exit_ip);
1954 		else
1955 			/* brcl 0xf, <exit> */
1956 			EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->exit_ip);
1957 		break;
1958 	/*
1959 	 * Branch relative (number of skipped instructions) to offset on
1960 	 * condition.
1961 	 *
1962 	 * Condition code to mask mapping:
1963 	 *
1964 	 * CC | Description	   | Mask
1965 	 * ------------------------------
1966 	 * 0  | Operands equal	   |	8
1967 	 * 1  | First operand low  |	4
1968 	 * 2  | First operand high |	2
1969 	 * 3  | Unused		   |	1
1970 	 *
1971 	 * For s390x relative branches: ip = ip + off_bytes
1972 	 * For BPF relative branches:	insn = insn + off_insns + 1
1973 	 *
1974 	 * For example for s390x with offset 0 we jump to the branch
1975 	 * instruction itself (loop) and for BPF with offset 0 we
1976 	 * branch to the instruction behind the branch.
1977 	 */
1978 	case BPF_JMP32 | BPF_JA: /* if (true) */
1979 		branch_oc_off = imm;
1980 		fallthrough;
1981 	case BPF_JMP | BPF_JA: /* if (true) */
1982 		mask = 0xf000; /* j */
1983 		goto branch_oc;
1984 	case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */
1985 	case BPF_JMP32 | BPF_JSGT | BPF_K: /* ((s32) dst > (s32) imm) */
1986 		mask = 0x2000; /* jh */
1987 		goto branch_ks;
1988 	case BPF_JMP | BPF_JSLT | BPF_K: /* ((s64) dst < (s64) imm) */
1989 	case BPF_JMP32 | BPF_JSLT | BPF_K: /* ((s32) dst < (s32) imm) */
1990 		mask = 0x4000; /* jl */
1991 		goto branch_ks;
1992 	case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */
1993 	case BPF_JMP32 | BPF_JSGE | BPF_K: /* ((s32) dst >= (s32) imm) */
1994 		mask = 0xa000; /* jhe */
1995 		goto branch_ks;
1996 	case BPF_JMP | BPF_JSLE | BPF_K: /* ((s64) dst <= (s64) imm) */
1997 	case BPF_JMP32 | BPF_JSLE | BPF_K: /* ((s32) dst <= (s32) imm) */
1998 		mask = 0xc000; /* jle */
1999 		goto branch_ks;
2000 	case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */
2001 	case BPF_JMP32 | BPF_JGT | BPF_K: /* ((u32) dst_reg > (u32) imm) */
2002 		mask = 0x2000; /* jh */
2003 		goto branch_ku;
2004 	case BPF_JMP | BPF_JLT | BPF_K: /* (dst_reg < imm) */
2005 	case BPF_JMP32 | BPF_JLT | BPF_K: /* ((u32) dst_reg < (u32) imm) */
2006 		mask = 0x4000; /* jl */
2007 		goto branch_ku;
2008 	case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */
2009 	case BPF_JMP32 | BPF_JGE | BPF_K: /* ((u32) dst_reg >= (u32) imm) */
2010 		mask = 0xa000; /* jhe */
2011 		goto branch_ku;
2012 	case BPF_JMP | BPF_JLE | BPF_K: /* (dst_reg <= imm) */
2013 	case BPF_JMP32 | BPF_JLE | BPF_K: /* ((u32) dst_reg <= (u32) imm) */
2014 		mask = 0xc000; /* jle */
2015 		goto branch_ku;
2016 	case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */
2017 	case BPF_JMP32 | BPF_JNE | BPF_K: /* ((u32) dst_reg != (u32) imm) */
2018 		mask = 0x7000; /* jne */
2019 		goto branch_ku;
2020 	case BPF_JMP | BPF_JEQ | BPF_K: /* (dst_reg == imm) */
2021 	case BPF_JMP32 | BPF_JEQ | BPF_K: /* ((u32) dst_reg == (u32) imm) */
2022 		mask = 0x8000; /* je */
2023 		goto branch_ku;
2024 	case BPF_JMP | BPF_JSET | BPF_K: /* (dst_reg & imm) */
2025 	case BPF_JMP32 | BPF_JSET | BPF_K: /* ((u32) dst_reg & (u32) imm) */
2026 		mask = 0x7000; /* jnz */
2027 		if (BPF_CLASS(insn->code) == BPF_JMP32) {
2028 			/* llilf %w1,imm (load zero extend imm) */
2029 			EMIT6_IMM(0xc00f0000, REG_W1, imm);
2030 			/* nr %w1,%dst */
2031 			EMIT2(0x1400, REG_W1, dst_reg);
2032 		} else {
2033 			/* lgfi %w1,imm (load sign extend imm) */
2034 			EMIT6_IMM(0xc0010000, REG_W1, imm);
2035 			/* ngr %w1,%dst */
2036 			EMIT4(0xb9800000, REG_W1, dst_reg);
2037 		}
2038 		goto branch_oc;
2039 
2040 	case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */
2041 	case BPF_JMP32 | BPF_JSGT | BPF_X: /* ((s32) dst > (s32) src) */
2042 		mask = 0x2000; /* jh */
2043 		goto branch_xs;
2044 	case BPF_JMP | BPF_JSLT | BPF_X: /* ((s64) dst < (s64) src) */
2045 	case BPF_JMP32 | BPF_JSLT | BPF_X: /* ((s32) dst < (s32) src) */
2046 		mask = 0x4000; /* jl */
2047 		goto branch_xs;
2048 	case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */
2049 	case BPF_JMP32 | BPF_JSGE | BPF_X: /* ((s32) dst >= (s32) src) */
2050 		mask = 0xa000; /* jhe */
2051 		goto branch_xs;
2052 	case BPF_JMP | BPF_JSLE | BPF_X: /* ((s64) dst <= (s64) src) */
2053 	case BPF_JMP32 | BPF_JSLE | BPF_X: /* ((s32) dst <= (s32) src) */
2054 		mask = 0xc000; /* jle */
2055 		goto branch_xs;
2056 	case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */
2057 	case BPF_JMP32 | BPF_JGT | BPF_X: /* ((u32) dst > (u32) src) */
2058 		mask = 0x2000; /* jh */
2059 		goto branch_xu;
2060 	case BPF_JMP | BPF_JLT | BPF_X: /* (dst < src) */
2061 	case BPF_JMP32 | BPF_JLT | BPF_X: /* ((u32) dst < (u32) src) */
2062 		mask = 0x4000; /* jl */
2063 		goto branch_xu;
2064 	case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */
2065 	case BPF_JMP32 | BPF_JGE | BPF_X: /* ((u32) dst >= (u32) src) */
2066 		mask = 0xa000; /* jhe */
2067 		goto branch_xu;
2068 	case BPF_JMP | BPF_JLE | BPF_X: /* (dst <= src) */
2069 	case BPF_JMP32 | BPF_JLE | BPF_X: /* ((u32) dst <= (u32) src) */
2070 		mask = 0xc000; /* jle */
2071 		goto branch_xu;
2072 	case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */
2073 	case BPF_JMP32 | BPF_JNE | BPF_X: /* ((u32) dst != (u32) src) */
2074 		mask = 0x7000; /* jne */
2075 		goto branch_xu;
2076 	case BPF_JMP | BPF_JEQ | BPF_X: /* (dst == src) */
2077 	case BPF_JMP32 | BPF_JEQ | BPF_X: /* ((u32) dst == (u32) src) */
2078 		mask = 0x8000; /* je */
2079 		goto branch_xu;
2080 	case BPF_JMP | BPF_JSET | BPF_X: /* (dst & src) */
2081 	case BPF_JMP32 | BPF_JSET | BPF_X: /* ((u32) dst & (u32) src) */
2082 	{
2083 		bool is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
2084 
2085 		mask = 0x7000; /* jnz */
2086 		/* nrk or ngrk %w1,%dst,%src */
2087 		EMIT4_RRF((is_jmp32 ? 0xb9f40000 : 0xb9e40000),
2088 			  REG_W1, dst_reg, src_reg);
2089 		goto branch_oc;
2090 branch_ks:
2091 		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
2092 		/* cfi or cgfi %dst,imm */
2093 		EMIT6_IMM(is_jmp32 ? 0xc20d0000 : 0xc20c0000,
2094 			  dst_reg, imm);
2095 		if (!is_first_pass(jit) &&
2096 		    can_use_rel(jit, addrs[i + off + 1])) {
2097 			/* brc mask,off */
2098 			EMIT4_PCREL_RIC(0xa7040000,
2099 					mask >> 12, addrs[i + off + 1]);
2100 		} else {
2101 			/* brcl mask,off */
2102 			EMIT6_PCREL_RILC(0xc0040000,
2103 					 mask >> 12, addrs[i + off + 1]);
2104 		}
2105 		break;
2106 branch_ku:
2107 		/* lgfi %w1,imm (load sign extend imm) */
2108 		src_reg = REG_1;
2109 		EMIT6_IMM(0xc0010000, src_reg, imm);
2110 		goto branch_xu;
2111 branch_xs:
2112 		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
2113 		if (!is_first_pass(jit) &&
2114 		    can_use_rel(jit, addrs[i + off + 1])) {
2115 			/* crj or cgrj %dst,%src,mask,off */
2116 			EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
2117 				    dst_reg, src_reg, i, off, mask);
2118 		} else {
2119 			/* cr or cgr %dst,%src */
2120 			if (is_jmp32)
2121 				EMIT2(0x1900, dst_reg, src_reg);
2122 			else
2123 				EMIT4(0xb9200000, dst_reg, src_reg);
2124 			/* brcl mask,off */
2125 			EMIT6_PCREL_RILC(0xc0040000,
2126 					 mask >> 12, addrs[i + off + 1]);
2127 		}
2128 		break;
2129 branch_xu:
2130 		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
2131 		if (!is_first_pass(jit) &&
2132 		    can_use_rel(jit, addrs[i + off + 1])) {
2133 			/* clrj or clgrj %dst,%src,mask,off */
2134 			EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
2135 				    dst_reg, src_reg, i, off, mask);
2136 		} else {
2137 			/* clr or clgr %dst,%src */
2138 			if (is_jmp32)
2139 				EMIT2(0x1500, dst_reg, src_reg);
2140 			else
2141 				EMIT4(0xb9210000, dst_reg, src_reg);
2142 			/* brcl mask,off */
2143 			EMIT6_PCREL_RILC(0xc0040000,
2144 					 mask >> 12, addrs[i + off + 1]);
2145 		}
2146 		break;
2147 branch_oc:
2148 		if (!is_first_pass(jit) &&
2149 		    can_use_rel(jit, addrs[i + branch_oc_off + 1])) {
2150 			/* brc mask,off */
2151 			EMIT4_PCREL_RIC(0xa7040000,
2152 					mask >> 12,
2153 					addrs[i + branch_oc_off + 1]);
2154 		} else {
2155 			/* brcl mask,off */
2156 			EMIT6_PCREL_RILC(0xc0040000,
2157 					 mask >> 12,
2158 					 addrs[i + branch_oc_off + 1]);
2159 		}
2160 		break;
2161 	}
2162 	default: /* too complex, give up */
2163 		pr_err("Unknown opcode %02x\n", insn->code);
2164 		return -1;
2165 	}
2166 
2167 	return insn_count;
2168 }
2169 
2170 /*
2171  * Return whether new i-th instruction address does not violate any invariant
2172  */
2173 static bool bpf_is_new_addr_sane(struct bpf_jit *jit, int i)
2174 {
2175 	/* On the first pass anything goes */
2176 	if (is_first_pass(jit))
2177 		return true;
2178 
2179 	/* The codegen pass must not change anything */
2180 	if (is_codegen_pass(jit))
2181 		return jit->addrs[i] == jit->prg;
2182 
2183 	/* Passes in between must not increase code size */
2184 	return jit->addrs[i] >= jit->prg;
2185 }
2186 
2187 /*
2188  * Update the address of i-th instruction
2189  */
2190 static int bpf_set_addr(struct bpf_jit *jit, int i)
2191 {
2192 	int delta;
2193 
2194 	if (is_codegen_pass(jit)) {
2195 		delta = jit->prg - jit->addrs[i];
2196 		if (delta < 0)
2197 			bpf_skip(jit, -delta);
2198 	}
2199 	if (WARN_ON_ONCE(!bpf_is_new_addr_sane(jit, i)))
2200 		return -1;
2201 	jit->addrs[i] = jit->prg;
2202 	return 0;
2203 }
2204 
2205 /*
2206  * Compile eBPF program into s390x code
2207  */
2208 static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
2209 			bool extra_pass)
2210 {
2211 	int i, insn_count, lit32_size, lit64_size;
2212 	u64 kern_arena;
2213 
2214 	jit->lit32 = jit->lit32_start;
2215 	jit->lit64 = jit->lit64_start;
2216 	jit->prg = 0;
2217 	jit->excnt = 0;
2218 	if (is_first_pass(jit) || (jit->seen & SEEN_STACK))
2219 		jit->frame_off = sizeof(struct prog_frame) -
2220 				 offsetofend(struct prog_frame, unused) +
2221 				 round_up(fp->aux->stack_depth, 8);
2222 	else
2223 		jit->frame_off = 0;
2224 
2225 	kern_arena = bpf_arena_get_kern_vm_start(fp->aux->arena);
2226 	if (kern_arena)
2227 		jit->kern_arena = _EMIT_CONST_U64(kern_arena);
2228 	jit->user_arena = bpf_arena_get_user_vm_start(fp->aux->arena);
2229 
2230 	bpf_jit_prologue(jit, fp);
2231 	if (bpf_set_addr(jit, 0) < 0)
2232 		return -1;
2233 	for (i = 0; i < fp->len; i += insn_count) {
2234 		insn_count = bpf_jit_insn(jit, fp, i, extra_pass);
2235 		if (insn_count < 0)
2236 			return -1;
2237 		/* Next instruction address */
2238 		if (bpf_set_addr(jit, i + insn_count) < 0)
2239 			return -1;
2240 	}
2241 	bpf_jit_epilogue(jit);
2242 
2243 	lit32_size = jit->lit32 - jit->lit32_start;
2244 	lit64_size = jit->lit64 - jit->lit64_start;
2245 	jit->lit32_start = jit->prg;
2246 	if (lit32_size)
2247 		jit->lit32_start = ALIGN(jit->lit32_start, 4);
2248 	jit->lit64_start = jit->lit32_start + lit32_size;
2249 	if (lit64_size)
2250 		jit->lit64_start = ALIGN(jit->lit64_start, 8);
2251 	jit->size = jit->lit64_start + lit64_size;
2252 	jit->size_prg = jit->prg;
2253 
2254 	if (WARN_ON_ONCE(fp->aux->extable &&
2255 			 jit->excnt != fp->aux->num_exentries))
2256 		/* Verifier bug - too many entries. */
2257 		return -1;
2258 
2259 	return 0;
2260 }
2261 
2262 bool bpf_jit_needs_zext(void)
2263 {
2264 	return true;
2265 }
2266 
2267 struct s390_jit_data {
2268 	struct bpf_binary_header *header;
2269 	struct bpf_jit ctx;
2270 	int pass;
2271 };
2272 
2273 static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit,
2274 					       struct bpf_prog *fp)
2275 {
2276 	struct bpf_binary_header *header;
2277 	struct bpf_insn *insn;
2278 	u32 extable_size;
2279 	u32 code_size;
2280 	int i;
2281 
2282 	for (i = 0; i < fp->len; i++) {
2283 		insn = &fp->insnsi[i];
2284 
2285 		if (BPF_CLASS(insn->code) == BPF_STX &&
2286 		    BPF_MODE(insn->code) == BPF_PROBE_ATOMIC &&
2287 		    (BPF_SIZE(insn->code) == BPF_DW ||
2288 		     BPF_SIZE(insn->code) == BPF_W) &&
2289 		    insn->imm == BPF_XCHG)
2290 			/*
2291 			 * bpf_jit_insn() emits a load and a compare-and-swap,
2292 			 * both of which need to be probed.
2293 			 */
2294 			fp->aux->num_exentries += 1;
2295 	}
2296 	/* We need two entries per insn. */
2297 	fp->aux->num_exentries *= 2;
2298 
2299 	code_size = roundup(jit->size,
2300 			    __alignof__(struct exception_table_entry));
2301 	extable_size = fp->aux->num_exentries *
2302 		sizeof(struct exception_table_entry);
2303 	header = bpf_jit_binary_alloc(code_size + extable_size, &jit->prg_buf,
2304 				      8, jit_fill_hole);
2305 	if (!header)
2306 		return NULL;
2307 	fp->aux->extable = (struct exception_table_entry *)
2308 		(jit->prg_buf + code_size);
2309 	return header;
2310 }
2311 
2312 /*
2313  * Compile eBPF program "fp"
2314  */
2315 struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *fp)
2316 {
2317 	struct bpf_binary_header *header;
2318 	struct s390_jit_data *jit_data;
2319 	bool extra_pass = false;
2320 	struct bpf_jit jit;
2321 	int pass;
2322 
2323 	if (!fp->jit_requested)
2324 		return fp;
2325 
2326 	jit_data = fp->aux->jit_data;
2327 	if (!jit_data) {
2328 		jit_data = kzalloc_obj(*jit_data);
2329 		if (!jit_data)
2330 			return fp;
2331 		fp->aux->jit_data = jit_data;
2332 	}
2333 	if (jit_data->ctx.addrs) {
2334 		jit = jit_data->ctx;
2335 		header = jit_data->header;
2336 		extra_pass = true;
2337 		pass = jit_data->pass + 1;
2338 		goto skip_init_ctx;
2339 	}
2340 
2341 	memset(&jit, 0, sizeof(jit));
2342 	jit.addrs = kvcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
2343 	if (jit.addrs == NULL)
2344 		goto out_err;
2345 	/*
2346 	 * Three initial passes:
2347 	 *   - 1/2: Determine clobbered registers
2348 	 *   - 3:   Calculate program size and addrs array
2349 	 */
2350 	for (pass = 1; pass <= 3; pass++) {
2351 		if (bpf_jit_prog(&jit, fp, extra_pass))
2352 			goto out_err;
2353 	}
2354 	/*
2355 	 * Final pass: Allocate and generate program
2356 	 */
2357 	header = bpf_jit_alloc(&jit, fp);
2358 	if (!header)
2359 		goto out_err;
2360 skip_init_ctx:
2361 	if (bpf_jit_prog(&jit, fp, extra_pass)) {
2362 		bpf_jit_binary_free(header);
2363 		goto out_err;
2364 	}
2365 	if (bpf_jit_enable > 1) {
2366 		bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf);
2367 		print_fn_code(jit.prg_buf, jit.size_prg);
2368 	}
2369 	if (!fp->is_func || extra_pass) {
2370 		if (bpf_jit_binary_lock_ro(header)) {
2371 			bpf_jit_binary_free(header);
2372 			goto out_err;
2373 		}
2374 	} else {
2375 		jit_data->header = header;
2376 		jit_data->ctx = jit;
2377 		jit_data->pass = pass;
2378 	}
2379 	fp->bpf_func = (void *) jit.prg_buf;
2380 	fp->jited = 1;
2381 	fp->jited_len = jit.size;
2382 
2383 	if (!fp->is_func || extra_pass) {
2384 		bpf_prog_fill_jited_linfo(fp, jit.addrs + 1);
2385 free_addrs:
2386 		kvfree(jit.addrs);
2387 		kfree(jit_data);
2388 		fp->aux->jit_data = NULL;
2389 	}
2390 
2391 	return fp;
2392 
2393 out_err:
2394 	if (extra_pass) {
2395 		fp->bpf_func = NULL;
2396 		fp->jited = 0;
2397 		fp->jited_len = 0;
2398 	}
2399 	goto free_addrs;
2400 }
2401 
2402 bool bpf_jit_supports_kfunc_call(void)
2403 {
2404 	return true;
2405 }
2406 
2407 bool bpf_jit_supports_far_kfunc_call(void)
2408 {
2409 	return true;
2410 }
2411 
2412 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
2413 		       enum bpf_text_poke_type new_t, void *old_addr,
2414 		       void *new_addr)
2415 {
2416 	struct bpf_plt expected_plt, current_plt, new_plt, *plt;
2417 	struct {
2418 		u16 opc;
2419 		s32 disp;
2420 	} __packed insn;
2421 	char *ret;
2422 	int err;
2423 
2424 	/* Verify the branch to be patched. */
2425 	err = copy_from_kernel_nofault(&insn, ip, sizeof(insn));
2426 	if (err < 0)
2427 		return err;
2428 	if (insn.opc != (0xc004 | (old_addr ? 0xf0 : 0)))
2429 		return -EINVAL;
2430 
2431 	if ((new_t == BPF_MOD_JUMP || old_t == BPF_MOD_JUMP) &&
2432 	    insn.disp == ((char *)new_addr - (char *)ip) >> 1) {
2433 		/*
2434 		 * The branch already points to the destination,
2435 		 * there is no PLT.
2436 		 */
2437 	} else {
2438 		/* Verify the PLT. */
2439 		plt = ip + (insn.disp << 1);
2440 		err = copy_from_kernel_nofault(&current_plt, plt,
2441 					       sizeof(current_plt));
2442 		if (err < 0)
2443 			return err;
2444 		ret = (char *)ip + 6;
2445 		bpf_jit_plt(&expected_plt, ret, old_addr);
2446 		if (memcmp(&current_plt, &expected_plt, sizeof(current_plt)))
2447 			return -EINVAL;
2448 		/* Adjust the call address. */
2449 		bpf_jit_plt(&new_plt, ret, new_addr);
2450 		s390_kernel_write(&plt->target, &new_plt.target,
2451 				  sizeof(void *));
2452 	}
2453 
2454 	/* Adjust the mask of the branch. */
2455 	insn.opc = 0xc004 | (new_addr ? 0xf0 : 0);
2456 	s390_kernel_write((char *)ip + 1, (char *)&insn.opc + 1, 1);
2457 
2458 	/* Make the new code visible to the other CPUs. */
2459 	text_poke_sync_lock();
2460 
2461 	return 0;
2462 }
2463 
2464 struct bpf_tramp_jit {
2465 	struct bpf_jit common;
2466 	int orig_stack_args_off;/* Offset of arguments placed on stack by the
2467 				 * func_addr's original caller
2468 				 */
2469 	int stack_size;		/* Trampoline stack size */
2470 	int backchain_off;	/* Offset of backchain */
2471 	int stack_args_off;	/* Offset of stack arguments for calling
2472 				 * func_addr, has to be at the top
2473 				 */
2474 	int reg_args_off;	/* Offset of register arguments for calling
2475 				 * func_addr
2476 				 */
2477 	int ip_off;		/* For bpf_get_func_ip(), has to be at
2478 				 * (ctx - 16)
2479 				 */
2480 	int func_meta_off;	/* For bpf_get_func_arg_cnt()/fsession, has
2481 				 * to be at (ctx - 8)
2482 				 */
2483 	int bpf_args_off;	/* Offset of BPF_PROG context, which consists
2484 				 * of BPF arguments followed by return value
2485 				 */
2486 	int retval_off;		/* Offset of return value (see above) */
2487 	int r7_r8_off;		/* Offset of saved %r7 and %r8, which are used
2488 				 * for __bpf_prog_enter() return value and
2489 				 * func_addr respectively
2490 				 */
2491 	int run_ctx_off;	/* Offset of struct bpf_tramp_run_ctx */
2492 	int tccnt_off;		/* Offset of saved tailcall counter */
2493 	int r14_off;		/* Offset of saved %r14, has to be at the
2494 				 * bottom */
2495 	int do_fexit;		/* do_fexit: label */
2496 };
2497 
2498 static void load_imm64(struct bpf_jit *jit, int dst_reg, u64 val)
2499 {
2500 	/* llihf %dst_reg,val_hi */
2501 	EMIT6_IMM(0xc00e0000, dst_reg, (val >> 32));
2502 	/* oilf %rdst_reg,val_lo */
2503 	EMIT6_IMM(0xc00d0000, dst_reg, val);
2504 }
2505 
2506 static void emit_store_stack_imm64(struct bpf_jit *jit, int tmp_reg, int stack_off, u64 imm)
2507 {
2508 	load_imm64(jit, tmp_reg, imm);
2509 	/* stg %tmp_reg,stack_off(%r15) */
2510 	EMIT6_DISP_LH(0xe3000000, 0x0024, tmp_reg, REG_0, REG_15, stack_off);
2511 }
2512 
2513 static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
2514 			   const struct btf_func_model *m,
2515 			   struct bpf_tramp_link *tlink, bool save_ret)
2516 {
2517 	struct bpf_jit *jit = &tjit->common;
2518 	int cookie_off = tjit->run_ctx_off +
2519 			 offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
2520 	struct bpf_prog *p = tlink->link.prog;
2521 	int patch;
2522 
2523 	/*
2524 	 * run_ctx.cookie = tlink->cookie;
2525 	 */
2526 
2527 	emit_store_stack_imm64(jit, REG_W0, cookie_off, tlink->cookie);
2528 
2529 	/*
2530 	 * if ((start = __bpf_prog_enter(p, &run_ctx)) == 0)
2531 	 *         goto skip;
2532 	 */
2533 
2534 	/* %r2 = p */
2535 	load_imm64(jit, REG_2, (u64)p);
2536 	/* la %r3,run_ctx_off(%r15) */
2537 	EMIT4_DISP(0x41000000, REG_3, REG_15, tjit->run_ctx_off);
2538 	/* brasl %r14,__bpf_prog_enter */
2539 	EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, bpf_trampoline_enter(p));
2540 	/* ltgr %r7,%r2 */
2541 	EMIT4(0xb9020000, REG_7, REG_2);
2542 	/* brcl 8,skip */
2543 	patch = jit->prg;
2544 	EMIT6_PCREL_RILC(0xc0040000, 8, 0);
2545 
2546 	/*
2547 	 * retval = bpf_func(args, p->insnsi);
2548 	 */
2549 
2550 	/* la %r2,bpf_args_off(%r15) */
2551 	EMIT4_DISP(0x41000000, REG_2, REG_15, tjit->bpf_args_off);
2552 	/* %r3 = p->insnsi */
2553 	if (!p->jited)
2554 		load_imm64(jit, REG_3, (u64)p->insnsi);
2555 	/* brasl %r14,p->bpf_func */
2556 	EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, p->bpf_func);
2557 	/* stg %r2,retval_off(%r15) */
2558 	if (save_ret) {
2559 		if (sign_zero_extend(jit, REG_2, m->ret_size, m->ret_flags))
2560 			return -1;
2561 		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
2562 			      tjit->retval_off);
2563 	}
2564 
2565 	/* skip: */
2566 	if (jit->prg_buf)
2567 		*(u32 *)&jit->prg_buf[patch + 2] = (jit->prg - patch) >> 1;
2568 
2569 	/*
2570 	 * __bpf_prog_exit(p, start, &run_ctx);
2571 	 */
2572 
2573 	/* %r2 = p */
2574 	load_imm64(jit, REG_2, (u64)p);
2575 	/* lgr %r3,%r7 */
2576 	EMIT4(0xb9040000, REG_3, REG_7);
2577 	/* la %r4,run_ctx_off(%r15) */
2578 	EMIT4_DISP(0x41000000, REG_4, REG_15, tjit->run_ctx_off);
2579 	/* brasl %r14,__bpf_prog_exit */
2580 	EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, bpf_trampoline_exit(p));
2581 
2582 	return 0;
2583 }
2584 
2585 static int invoke_bpf(struct bpf_tramp_jit *tjit,
2586 		      const struct btf_func_model *m,
2587 		      struct bpf_tramp_links *tl, bool save_ret,
2588 		      u64 func_meta, int cookie_off)
2589 {
2590 	int i, cur_cookie = (tjit->bpf_args_off - cookie_off) / sizeof(u64);
2591 	struct bpf_jit *jit = &tjit->common;
2592 
2593 	for (i = 0; i < tl->nr_links; i++) {
2594 		if (bpf_prog_calls_session_cookie(tl->links[i])) {
2595 			u64 meta = func_meta | ((u64)cur_cookie << BPF_TRAMP_COOKIE_INDEX_SHIFT);
2596 
2597 			emit_store_stack_imm64(jit, REG_0, tjit->func_meta_off, meta);
2598 			cur_cookie--;
2599 		}
2600 		if (invoke_bpf_prog(tjit, m, tl->links[i], save_ret))
2601 			return -EINVAL;
2602 	}
2603 
2604 	return 0;
2605 }
2606 
2607 static int alloc_stack(struct bpf_tramp_jit *tjit, size_t size)
2608 {
2609 	int stack_offset = tjit->stack_size;
2610 
2611 	tjit->stack_size += size;
2612 	return stack_offset;
2613 }
2614 
2615 /* ABI uses %r2 - %r6 for parameter passing. */
2616 #define MAX_NR_REG_ARGS 5
2617 
2618 /* The "L" field of the "mvc" instruction is 8 bits. */
2619 #define MAX_MVC_SIZE 256
2620 #define MAX_NR_STACK_ARGS (MAX_MVC_SIZE / sizeof(u64))
2621 
2622 /* -mfentry generates a 6-byte nop on s390x. */
2623 #define S390X_PATCH_SIZE 6
2624 
2625 static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
2626 					 struct bpf_tramp_jit *tjit,
2627 					 const struct btf_func_model *m,
2628 					 u32 flags,
2629 					 struct bpf_tramp_links *tlinks,
2630 					 void *func_addr)
2631 {
2632 	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
2633 	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
2634 	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
2635 	int nr_bpf_args, nr_reg_args, nr_stack_args;
2636 	int cookie_cnt, cookie_off, fsession_cnt;
2637 	struct bpf_jit *jit = &tjit->common;
2638 	int arg, bpf_arg_off;
2639 	u64 func_meta;
2640 	int i, j;
2641 
2642 	/* Support as many stack arguments as "mvc" instruction can handle. */
2643 	nr_reg_args = min_t(int, m->nr_args, MAX_NR_REG_ARGS);
2644 	nr_stack_args = m->nr_args - nr_reg_args;
2645 	if (nr_stack_args > MAX_NR_STACK_ARGS)
2646 		return -ENOTSUPP;
2647 
2648 	/* Return to %r14 in the struct_ops case. */
2649 	if (flags & BPF_TRAMP_F_INDIRECT)
2650 		flags |= BPF_TRAMP_F_SKIP_FRAME;
2651 
2652 	/*
2653 	 * Compute how many arguments we need to pass to BPF programs.
2654 	 * BPF ABI mirrors that of x86_64: arguments that are 16 bytes or
2655 	 * smaller are packed into 1 or 2 registers; larger arguments are
2656 	 * passed via pointers.
2657 	 * In s390x ABI, arguments that are 8 bytes or smaller are packed into
2658 	 * a register; larger arguments are passed via pointers.
2659 	 * We need to deal with this difference.
2660 	 */
2661 	nr_bpf_args = 0;
2662 	for (i = 0; i < m->nr_args; i++) {
2663 		if (m->arg_size[i] <= 8)
2664 			nr_bpf_args += 1;
2665 		else if (m->arg_size[i] <= 16)
2666 			nr_bpf_args += 2;
2667 		else
2668 			return -ENOTSUPP;
2669 	}
2670 
2671 	cookie_cnt = bpf_fsession_cookie_cnt(tlinks);
2672 	fsession_cnt = bpf_fsession_cnt(tlinks);
2673 
2674 	/*
2675 	 * Calculate the stack layout.
2676 	 */
2677 
2678 	/*
2679 	 * Allocate STACK_FRAME_OVERHEAD bytes for the callees. As the s390x
2680 	 * ABI requires, put our backchain at the end of the allocated memory.
2681 	 */
2682 	tjit->stack_size = STACK_FRAME_OVERHEAD;
2683 	tjit->backchain_off = tjit->stack_size - sizeof(u64);
2684 	tjit->stack_args_off = alloc_stack(tjit, nr_stack_args * sizeof(u64));
2685 	tjit->reg_args_off = alloc_stack(tjit, nr_reg_args * sizeof(u64));
2686 	cookie_off = alloc_stack(tjit, cookie_cnt * sizeof(u64));
2687 	tjit->ip_off = alloc_stack(tjit, sizeof(u64));
2688 	tjit->func_meta_off = alloc_stack(tjit, sizeof(u64));
2689 	tjit->bpf_args_off = alloc_stack(tjit, nr_bpf_args * sizeof(u64));
2690 	tjit->retval_off = alloc_stack(tjit, sizeof(u64));
2691 	tjit->r7_r8_off = alloc_stack(tjit, 2 * sizeof(u64));
2692 	tjit->run_ctx_off = alloc_stack(tjit,
2693 					sizeof(struct bpf_tramp_run_ctx));
2694 	tjit->tccnt_off = alloc_stack(tjit, sizeof(u64));
2695 	tjit->r14_off = alloc_stack(tjit, sizeof(u64) * 2);
2696 	/*
2697 	 * In accordance with the s390x ABI, the caller has allocated
2698 	 * STACK_FRAME_OVERHEAD bytes for us. 8 of them contain the caller's
2699 	 * backchain, and the rest we can use.
2700 	 */
2701 	tjit->stack_size -= STACK_FRAME_OVERHEAD - sizeof(u64);
2702 	tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD;
2703 
2704 	/* lgr %r1,%r15 */
2705 	EMIT4(0xb9040000, REG_1, REG_15);
2706 	/* aghi %r15,-stack_size */
2707 	EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size);
2708 	/* stg %r1,backchain_off(%r15) */
2709 	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_1, REG_0, REG_15,
2710 		      tjit->backchain_off);
2711 	/* mvc tccnt_off(4,%r15),stack_size+tail_call_cnt(%r15) */
2712 	_EMIT6(0xd203f000 | tjit->tccnt_off,
2713 	       0xf000 | (tjit->stack_size +
2714 			 offsetof(struct prog_frame, tail_call_cnt)));
2715 	/* stmg %r2,%rN,fwd_reg_args_off(%r15) */
2716 	if (nr_reg_args)
2717 		EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
2718 			      REG_2 + (nr_reg_args - 1), REG_15,
2719 			      tjit->reg_args_off);
2720 	for (i = 0, j = 0; i < m->nr_args; i++) {
2721 		if (i < MAX_NR_REG_ARGS)
2722 			arg = REG_2 + i;
2723 		else
2724 			arg = tjit->orig_stack_args_off +
2725 			      (i - MAX_NR_REG_ARGS) * sizeof(u64);
2726 		bpf_arg_off = tjit->bpf_args_off + j * sizeof(u64);
2727 		if (m->arg_size[i] <= 8) {
2728 			if (i < MAX_NR_REG_ARGS)
2729 				/* stg %arg,bpf_arg_off(%r15) */
2730 				EMIT6_DISP_LH(0xe3000000, 0x0024, arg,
2731 					      REG_0, REG_15, bpf_arg_off);
2732 			else
2733 				/* mvc bpf_arg_off(8,%r15),arg(%r15) */
2734 				_EMIT6(0xd207f000 | bpf_arg_off,
2735 				       0xf000 | arg);
2736 			j += 1;
2737 		} else {
2738 			if (i < MAX_NR_REG_ARGS) {
2739 				/* mvc bpf_arg_off(16,%r15),0(%arg) */
2740 				_EMIT6(0xd20ff000 | bpf_arg_off,
2741 				       reg2hex[arg] << 12);
2742 			} else {
2743 				/* lg %r1,arg(%r15) */
2744 				EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_0,
2745 					      REG_15, arg);
2746 				/* mvc bpf_arg_off(16,%r15),0(%r1) */
2747 				_EMIT6(0xd20ff000 | bpf_arg_off, 0x1000);
2748 			}
2749 			j += 2;
2750 		}
2751 	}
2752 	/* stmg %r7,%r8,r7_r8_off(%r15) */
2753 	EMIT6_DISP_LH(0xeb000000, 0x0024, REG_7, REG_8, REG_15,
2754 		      tjit->r7_r8_off);
2755 	/* stg %r14,r14_off(%r15) */
2756 	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_14, REG_0, REG_15, tjit->r14_off);
2757 
2758 	if (flags & BPF_TRAMP_F_ORIG_STACK) {
2759 		/*
2760 		 * The ftrace trampoline puts the return address (which is the
2761 		 * address of the original function + S390X_PATCH_SIZE) into
2762 		 * %r0; see ftrace_shared_hotpatch_trampoline_br and
2763 		 * ftrace_init_nop() for details.
2764 		 */
2765 
2766 		/* lgr %r8,%r0 */
2767 		EMIT4(0xb9040000, REG_8, REG_0);
2768 	}
2769 
2770 	/*
2771 	 * ip = func_addr;
2772 	 * arg_cnt = m->nr_args;
2773 	 */
2774 
2775 	if (flags & BPF_TRAMP_F_IP_ARG)
2776 		emit_store_stack_imm64(jit, REG_0, tjit->ip_off, (u64)func_addr);
2777 	func_meta = nr_bpf_args;
2778 	/* lghi %r0,func_meta */
2779 	EMIT4_IMM(0xa7090000, REG_0, func_meta);
2780 	/* stg %r0,func_meta_off(%r15) */
2781 	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
2782 		      tjit->func_meta_off);
2783 
2784 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2785 		/*
2786 		 * __bpf_tramp_enter(im);
2787 		 */
2788 
2789 		/* %r2 = im */
2790 		load_imm64(jit, REG_2, (u64)im);
2791 		/* brasl %r14,__bpf_tramp_enter */
2792 		EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, __bpf_tramp_enter);
2793 	}
2794 
2795 	if (fsession_cnt) {
2796 		/* Clear all the session cookies' value. */
2797 		for (i = 0; i < cookie_cnt; i++)
2798 			emit_store_stack_imm64(jit, REG_0, cookie_off + 8 * i, 0);
2799 		/* Clear the return value to make sure fentry always gets 0. */
2800 		emit_store_stack_imm64(jit, REG_0, tjit->retval_off, 0);
2801 	}
2802 
2803 	if (invoke_bpf(tjit, m, fentry, flags & BPF_TRAMP_F_RET_FENTRY_RET,
2804 		       func_meta, cookie_off))
2805 		return -EINVAL;
2806 
2807 	if (fmod_ret->nr_links) {
2808 		/*
2809 		 * retval = 0;
2810 		 */
2811 
2812 		/* xc retval_off(8,%r15),retval_off(%r15) */
2813 		_EMIT6(0xd707f000 | tjit->retval_off,
2814 		       0xf000 | tjit->retval_off);
2815 
2816 		for (i = 0; i < fmod_ret->nr_links; i++) {
2817 			if (invoke_bpf_prog(tjit, m, fmod_ret->links[i], true))
2818 				return -EINVAL;
2819 
2820 			/*
2821 			 * if (retval)
2822 			 *         goto do_fexit;
2823 			 */
2824 
2825 			/* ltg %r0,retval_off(%r15) */
2826 			EMIT6_DISP_LH(0xe3000000, 0x0002, REG_0, REG_0, REG_15,
2827 				      tjit->retval_off);
2828 			/* brcl 7,do_fexit */
2829 			EMIT6_PCREL_RILC(0xc0040000, 7, tjit->do_fexit);
2830 		}
2831 	}
2832 
2833 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2834 		/*
2835 		 * retval = func_addr(args);
2836 		 */
2837 
2838 		/* lmg %r2,%rN,reg_args_off(%r15) */
2839 		if (nr_reg_args)
2840 			EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2,
2841 				      REG_2 + (nr_reg_args - 1), REG_15,
2842 				      tjit->reg_args_off);
2843 		/* mvc stack_args_off(N,%r15),orig_stack_args_off(%r15) */
2844 		if (nr_stack_args)
2845 			_EMIT6(0xd200f000 |
2846 				       (nr_stack_args * sizeof(u64) - 1) << 16 |
2847 				       tjit->stack_args_off,
2848 			       0xf000 | tjit->orig_stack_args_off);
2849 		/* mvc tail_call_cnt(4,%r15),tccnt_off(%r15) */
2850 		_EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt),
2851 		       0xf000 | tjit->tccnt_off);
2852 		if (flags & BPF_TRAMP_F_ORIG_STACK) {
2853 			if (nospec_uses_trampoline())
2854 				/* brasl %r14,__s390_indirect_jump_r8 */
2855 				EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
2856 						     __s390_indirect_jump_r8);
2857 			else
2858 				/* basr %r14,%r8 */
2859 				EMIT2(0x0d00, REG_14, REG_8);
2860 		} else {
2861 			/* brasl %r14,func_addr+S390X_PATCH_SIZE */
2862 			EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
2863 					     func_addr + S390X_PATCH_SIZE);
2864 		}
2865 		/* stg %r2,retval_off(%r15) */
2866 		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
2867 			      tjit->retval_off);
2868 		/* mvc tccnt_off(%r15),tail_call_cnt(4,%r15) */
2869 		_EMIT6(0xd203f000 | tjit->tccnt_off,
2870 		       0xf000 | offsetof(struct prog_frame, tail_call_cnt));
2871 
2872 		im->ip_after_call = jit->prg_buf + jit->prg;
2873 
2874 		/*
2875 		 * The following nop will be patched by bpf_tramp_image_put().
2876 		 */
2877 
2878 		/* brcl 0,im->ip_epilogue */
2879 		EMIT6_PCREL_RILC(0xc0040000, 0, (u64)im->ip_epilogue);
2880 	}
2881 
2882 	/* Set the "is_return" flag for fsession. */
2883 	func_meta |= (1ULL << BPF_TRAMP_IS_RETURN_SHIFT);
2884 	if (fsession_cnt)
2885 		emit_store_stack_imm64(jit, REG_W0, tjit->func_meta_off,
2886 				       func_meta);
2887 
2888 	/* do_fexit: */
2889 	tjit->do_fexit = jit->prg;
2890 	if (invoke_bpf(tjit, m, fexit, false, func_meta, cookie_off))
2891 		return -EINVAL;
2892 
2893 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2894 		im->ip_epilogue = jit->prg_buf + jit->prg;
2895 
2896 		/*
2897 		 * __bpf_tramp_exit(im);
2898 		 */
2899 
2900 		/* %r2 = im */
2901 		load_imm64(jit, REG_2, (u64)im);
2902 		/* brasl %r14,__bpf_tramp_exit */
2903 		EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, __bpf_tramp_exit);
2904 	}
2905 
2906 	/* lmg %r2,%rN,reg_args_off(%r15) */
2907 	if ((flags & BPF_TRAMP_F_RESTORE_REGS) && nr_reg_args)
2908 		EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2,
2909 			      REG_2 + (nr_reg_args - 1), REG_15,
2910 			      tjit->reg_args_off);
2911 	/* lgr %r1,%r8 */
2912 	if (!(flags & BPF_TRAMP_F_SKIP_FRAME) &&
2913 	    (flags & BPF_TRAMP_F_ORIG_STACK))
2914 		EMIT4(0xb9040000, REG_1, REG_8);
2915 	/* lmg %r7,%r8,r7_r8_off(%r15) */
2916 	EMIT6_DISP_LH(0xeb000000, 0x0004, REG_7, REG_8, REG_15,
2917 		      tjit->r7_r8_off);
2918 	/* lg %r14,r14_off(%r15) */
2919 	EMIT6_DISP_LH(0xe3000000, 0x0004, REG_14, REG_0, REG_15, tjit->r14_off);
2920 	/* lg %r2,retval_off(%r15) */
2921 	if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
2922 		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
2923 			      tjit->retval_off);
2924 	/* mvc stack_size+tail_call_cnt(4,%r15),tccnt_off(%r15) */
2925 	_EMIT6(0xd203f000 | (tjit->stack_size +
2926 			     offsetof(struct prog_frame, tail_call_cnt)),
2927 	       0xf000 | tjit->tccnt_off);
2928 	/* aghi %r15,stack_size */
2929 	EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
2930 	if (flags & BPF_TRAMP_F_SKIP_FRAME)
2931 		EMIT_JUMP_REG(14);
2932 	else if (flags & BPF_TRAMP_F_ORIG_STACK)
2933 		EMIT_JUMP_REG(1);
2934 	else
2935 		/* brcl 0xf,func_addr+S390X_PATCH_SIZE */
2936 		EMIT6_PCREL_RILC_PTR(0xc0040000, 0xf,
2937 				     func_addr + S390X_PATCH_SIZE);
2938 	return 0;
2939 }
2940 
2941 int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
2942 			     struct bpf_tramp_links *tlinks, void *orig_call)
2943 {
2944 	struct bpf_tramp_image im;
2945 	struct bpf_tramp_jit tjit;
2946 	int ret;
2947 
2948 	memset(&tjit, 0, sizeof(tjit));
2949 
2950 	ret = __arch_prepare_bpf_trampoline(&im, &tjit, m, flags,
2951 					    tlinks, orig_call);
2952 
2953 	return ret < 0 ? ret : tjit.common.prg;
2954 }
2955 
2956 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
2957 				void *image_end, const struct btf_func_model *m,
2958 				u32 flags, struct bpf_tramp_links *tlinks,
2959 				void *func_addr)
2960 {
2961 	struct bpf_tramp_jit tjit;
2962 	int ret;
2963 
2964 	/* Compute offsets, check whether the code fits. */
2965 	memset(&tjit, 0, sizeof(tjit));
2966 	ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
2967 					    tlinks, func_addr);
2968 
2969 	if (ret < 0)
2970 		return ret;
2971 	if (tjit.common.prg > (char *)image_end - (char *)image)
2972 		/*
2973 		 * Use the same error code as for exceeding
2974 		 * BPF_MAX_TRAMP_LINKS.
2975 		 */
2976 		return -E2BIG;
2977 
2978 	tjit.common.prg = 0;
2979 	tjit.common.prg_buf = image;
2980 	ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
2981 					    tlinks, func_addr);
2982 
2983 	return ret < 0 ? ret : tjit.common.prg;
2984 }
2985 
2986 bool bpf_jit_supports_subprog_tailcalls(void)
2987 {
2988 	return true;
2989 }
2990 
2991 bool bpf_jit_supports_arena(void)
2992 {
2993 	return true;
2994 }
2995 
2996 bool bpf_jit_supports_fsession(void)
2997 {
2998 	return true;
2999 }
3000 
3001 bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
3002 {
3003 	if (!in_arena)
3004 		return true;
3005 	switch (insn->code) {
3006 	case BPF_STX | BPF_ATOMIC | BPF_B:
3007 	case BPF_STX | BPF_ATOMIC | BPF_H:
3008 	case BPF_STX | BPF_ATOMIC | BPF_W:
3009 	case BPF_STX | BPF_ATOMIC | BPF_DW:
3010 		if (bpf_atomic_is_load_store(insn))
3011 			return false;
3012 		break;
3013 	case BPF_LDX | BPF_MEMSX | BPF_B:
3014 	case BPF_LDX | BPF_MEMSX | BPF_H:
3015 	case BPF_LDX | BPF_MEMSX | BPF_W:
3016 		return false;
3017 	}
3018 	return true;
3019 }
3020 
3021 bool bpf_jit_supports_exceptions(void)
3022 {
3023 	/*
3024 	 * Exceptions require unwinding support, which is always available,
3025 	 * because the kernel is always built with backchain.
3026 	 */
3027 	return true;
3028 }
3029 
3030 void arch_bpf_stack_walk(bool (*consume_fn)(void *, u64, u64, u64),
3031 			 void *cookie)
3032 {
3033 	unsigned long addr, prev_addr = 0;
3034 	struct unwind_state state;
3035 
3036 	unwind_for_each_frame(&state, NULL, NULL, 0) {
3037 		addr = unwind_get_return_address(&state);
3038 		if (!addr)
3039 			break;
3040 		/*
3041 		 * addr is a return address and state.sp is the value of %r15
3042 		 * at this address. exception_cb needs %r15 at entry to the
3043 		 * function containing addr, so take the next state.sp.
3044 		 *
3045 		 * There is no bp, and the exception_cb prog does not need one
3046 		 * to perform a quasi-longjmp. The common code requires a
3047 		 * non-zero bp, so pass sp there as well.
3048 		 */
3049 		if (prev_addr && !consume_fn(cookie, prev_addr, state.sp,
3050 					     state.sp))
3051 			break;
3052 		prev_addr = addr;
3053 	}
3054 }
3055 
3056 bool bpf_jit_supports_timed_may_goto(void)
3057 {
3058 	return true;
3059 }
3060