xref: /linux/arch/s390/net/bpf_jit_comp.c (revision 7fc2cd2e4b398c57c9cf961cfea05eadbf34c05c)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * BPF Jit compiler for s390.
4  *
5  * Minimum build requirements:
6  *
7  *  - HAVE_MARCH_Z196_FEATURES: laal, laalg
8  *  - HAVE_MARCH_Z10_FEATURES: msfi, cgrj, clgrj
9  *  - HAVE_MARCH_Z9_109_FEATURES: alfi, llilf, clfi, oilf, nilf
10  *  - 64BIT
11  *
12  * Copyright IBM Corp. 2012,2015
13  *
14  * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
15  *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
16  */
17 
18 #define pr_fmt(fmt) "bpf_jit: " fmt
19 
20 #include <linux/netdevice.h>
21 #include <linux/filter.h>
22 #include <linux/init.h>
23 #include <linux/bpf.h>
24 #include <linux/mm.h>
25 #include <linux/kernel.h>
26 #include <asm/cacheflush.h>
27 #include <asm/extable.h>
28 #include <asm/dis.h>
29 #include <asm/facility.h>
30 #include <asm/nospec-branch.h>
31 #include <asm/set_memory.h>
32 #include <asm/text-patching.h>
33 #include <asm/unwind.h>
34 
35 struct bpf_jit {
36 	u32 seen;		/* Flags to remember seen eBPF instructions */
37 	u16 seen_regs;		/* Mask to remember which registers are used */
38 	u32 *addrs;		/* Array with relative instruction addresses */
39 	u8 *prg_buf;		/* Start of program */
40 	int size;		/* Size of program and literal pool */
41 	int size_prg;		/* Size of program */
42 	int prg;		/* Current position in program */
43 	int lit32_start;	/* Start of 32-bit literal pool */
44 	int lit32;		/* Current position in 32-bit literal pool */
45 	int lit64_start;	/* Start of 64-bit literal pool */
46 	int lit64;		/* Current position in 64-bit literal pool */
47 	int base_ip;		/* Base address for literal pool */
48 	int exit_ip;		/* Address of exit */
49 	int tail_call_start;	/* Tail call start offset */
50 	int excnt;		/* Number of exception table entries */
51 	int prologue_plt_ret;	/* Return address for prologue hotpatch PLT */
52 	int prologue_plt;	/* Start of prologue hotpatch PLT */
53 	int kern_arena;		/* Pool offset of kernel arena address */
54 	u64 user_arena;		/* User arena address */
55 	u32 frame_off;		/* Offset of struct bpf_prog from %r15 */
56 };
57 
58 #define SEEN_MEM	BIT(0)		/* use mem[] for temporary storage */
59 #define SEEN_LITERAL	BIT(1)		/* code uses literals */
60 #define SEEN_FUNC	BIT(2)		/* calls C functions */
61 #define SEEN_STACK	(SEEN_FUNC | SEEN_MEM)
62 
63 #define NVREGS		0xffc0		/* %r6-%r15 */
64 
65 /*
66  * s390 registers
67  */
68 #define REG_W0		(MAX_BPF_JIT_REG + 0)	/* Work register 1 (even) */
69 #define REG_W1		(MAX_BPF_JIT_REG + 1)	/* Work register 2 (odd) */
70 #define REG_L		(MAX_BPF_JIT_REG + 2)	/* Literal pool register */
71 #define REG_15		(MAX_BPF_JIT_REG + 3)	/* Register 15 */
72 #define REG_0		REG_W0			/* Register 0 */
73 #define REG_1		REG_W1			/* Register 1 */
74 #define REG_2		BPF_REG_1		/* Register 2 */
75 #define REG_3		BPF_REG_2		/* Register 3 */
76 #define REG_4		BPF_REG_3		/* Register 4 */
77 #define REG_7		BPF_REG_6		/* Register 7 */
78 #define REG_8		BPF_REG_7		/* Register 8 */
79 #define REG_14		BPF_REG_0		/* Register 14 */
80 
81 /*
82  * Mapping of BPF registers to s390 registers
83  */
84 static const int reg2hex[] = {
85 	/* Return code */
86 	[BPF_REG_0]	= 14,
87 	/* Function parameters */
88 	[BPF_REG_1]	= 2,
89 	[BPF_REG_2]	= 3,
90 	[BPF_REG_3]	= 4,
91 	[BPF_REG_4]	= 5,
92 	[BPF_REG_5]	= 6,
93 	/* Call saved registers */
94 	[BPF_REG_6]	= 7,
95 	[BPF_REG_7]	= 8,
96 	[BPF_REG_8]	= 9,
97 	[BPF_REG_9]	= 10,
98 	/* BPF stack pointer */
99 	[BPF_REG_FP]	= 13,
100 	/* Register for blinding */
101 	[BPF_REG_AX]	= 12,
102 	/* Work registers for s390x backend */
103 	[REG_W0]	= 0,
104 	[REG_W1]	= 1,
105 	[REG_L]		= 11,
106 	[REG_15]	= 15,
107 };
108 
109 static inline u32 reg(u32 dst_reg, u32 src_reg)
110 {
111 	return reg2hex[dst_reg] << 4 | reg2hex[src_reg];
112 }
113 
114 static inline u32 reg_high(u32 reg)
115 {
116 	return reg2hex[reg] << 4;
117 }
118 
119 static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
120 {
121 	u32 r1 = reg2hex[b1];
122 
123 	if (r1 >= 6 && r1 <= 15)
124 		jit->seen_regs |= (1 << r1);
125 }
126 
127 static s32 off_to_pcrel(struct bpf_jit *jit, u32 off)
128 {
129 	return off - jit->prg;
130 }
131 
132 static s64 ptr_to_pcrel(struct bpf_jit *jit, const void *ptr)
133 {
134 	if (jit->prg_buf)
135 		return (const u8 *)ptr - ((const u8 *)jit->prg_buf + jit->prg);
136 	return 0;
137 }
138 
139 #define REG_SET_SEEN(b1)					\
140 ({								\
141 	reg_set_seen(jit, b1);					\
142 })
143 
144 /*
145  * EMIT macros for code generation
146  */
147 
148 #define _EMIT2(op)						\
149 ({								\
150 	if (jit->prg_buf)					\
151 		*(u16 *) (jit->prg_buf + jit->prg) = (op);	\
152 	jit->prg += 2;						\
153 })
154 
155 #define EMIT2(op, b1, b2)					\
156 ({								\
157 	_EMIT2((op) | reg(b1, b2));				\
158 	REG_SET_SEEN(b1);					\
159 	REG_SET_SEEN(b2);					\
160 })
161 
162 #define _EMIT4(op)						\
163 ({								\
164 	if (jit->prg_buf)					\
165 		*(u32 *) (jit->prg_buf + jit->prg) = (op);	\
166 	jit->prg += 4;						\
167 })
168 
169 #define EMIT4(op, b1, b2)					\
170 ({								\
171 	_EMIT4((op) | reg(b1, b2));				\
172 	REG_SET_SEEN(b1);					\
173 	REG_SET_SEEN(b2);					\
174 })
175 
176 #define EMIT4_RRF(op, b1, b2, b3)				\
177 ({								\
178 	_EMIT4((op) | reg_high(b3) << 8 | reg(b1, b2));		\
179 	REG_SET_SEEN(b1);					\
180 	REG_SET_SEEN(b2);					\
181 	REG_SET_SEEN(b3);					\
182 })
183 
184 #define _EMIT4_DISP(op, disp)					\
185 ({								\
186 	unsigned int __disp = (disp) & 0xfff;			\
187 	_EMIT4((op) | __disp);					\
188 })
189 
190 #define EMIT4_DISP(op, b1, b2, disp)				\
191 ({								\
192 	_EMIT4_DISP((op) | reg_high(b1) << 16 |			\
193 		    reg_high(b2) << 8, (disp));			\
194 	REG_SET_SEEN(b1);					\
195 	REG_SET_SEEN(b2);					\
196 })
197 
198 #define EMIT4_IMM(op, b1, imm)					\
199 ({								\
200 	unsigned int __imm = (imm) & 0xffff;			\
201 	_EMIT4((op) | reg_high(b1) << 16 | __imm);		\
202 	REG_SET_SEEN(b1);					\
203 })
204 
205 #define EMIT4_PCREL(op, pcrel)					\
206 ({								\
207 	long __pcrel = ((pcrel) >> 1) & 0xffff;			\
208 	_EMIT4((op) | __pcrel);					\
209 })
210 
211 #define EMIT4_PCREL_RIC(op, mask, target)			\
212 ({								\
213 	int __rel = off_to_pcrel(jit, target) / 2;		\
214 	_EMIT4((op) | (mask) << 20 | (__rel & 0xffff));		\
215 })
216 
217 #define _EMIT6(op1, op2)					\
218 ({								\
219 	if (jit->prg_buf) {					\
220 		*(u32 *) (jit->prg_buf + jit->prg) = (op1);	\
221 		*(u16 *) (jit->prg_buf + jit->prg + 4) = (op2);	\
222 	}							\
223 	jit->prg += 6;						\
224 })
225 
226 #define _EMIT6_DISP(op1, op2, disp)				\
227 ({								\
228 	unsigned int __disp = (disp) & 0xfff;			\
229 	_EMIT6((op1) | __disp, op2);				\
230 })
231 
232 #define _EMIT6_DISP_LH(op1, op2, disp)				\
233 ({								\
234 	u32 _disp = (u32) (disp);				\
235 	unsigned int __disp_h = _disp & 0xff000;		\
236 	unsigned int __disp_l = _disp & 0x00fff;		\
237 	_EMIT6((op1) | __disp_l, (op2) | __disp_h >> 4);	\
238 })
239 
240 #define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp)		\
241 ({								\
242 	_EMIT6_DISP_LH((op1) | reg(b1, b2) << 16 |		\
243 		       reg_high(b3) << 8, op2, disp);		\
244 	REG_SET_SEEN(b1);					\
245 	REG_SET_SEEN(b2);					\
246 	REG_SET_SEEN(b3);					\
247 })
248 
249 #define EMIT6_PCREL_RIEB(op1, op2, b1, b2, mask, target)	\
250 ({								\
251 	unsigned int rel = off_to_pcrel(jit, target) / 2;	\
252 	_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff),	\
253 	       (op2) | (mask) << 12);				\
254 	REG_SET_SEEN(b1);					\
255 	REG_SET_SEEN(b2);					\
256 })
257 
258 #define EMIT6_PCREL_RIEC(op1, op2, b1, imm, mask, target)	\
259 ({								\
260 	unsigned int rel = off_to_pcrel(jit, target) / 2;	\
261 	_EMIT6((op1) | (reg_high(b1) | (mask)) << 16 |		\
262 		(rel & 0xffff), (op2) | ((imm) & 0xff) << 8);	\
263 	REG_SET_SEEN(b1);					\
264 	BUILD_BUG_ON(((unsigned long) (imm)) > 0xff);		\
265 })
266 
267 #define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask)		\
268 ({								\
269 	int rel = off_to_pcrel(jit, addrs[(i) + (off) + 1]) / 2;\
270 	_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\
271 	REG_SET_SEEN(b1);					\
272 	REG_SET_SEEN(b2);					\
273 })
274 
275 static void emit6_pcrel_ril(struct bpf_jit *jit, u32 op, s64 pcrel)
276 {
277 	u32 pc32dbl = (s32)(pcrel / 2);
278 
279 	_EMIT6(op | pc32dbl >> 16, pc32dbl & 0xffff);
280 }
281 
282 static void emit6_pcrel_rilb(struct bpf_jit *jit, u32 op, u8 b, s64 pcrel)
283 {
284 	emit6_pcrel_ril(jit, op | reg_high(b) << 16, pcrel);
285 	REG_SET_SEEN(b);
286 }
287 
288 #define EMIT6_PCREL_RILB(op, b, target)				\
289 	emit6_pcrel_rilb(jit, op, b, off_to_pcrel(jit, target))
290 
291 #define EMIT6_PCREL_RILB_PTR(op, b, target_ptr)			\
292 	emit6_pcrel_rilb(jit, op, b, ptr_to_pcrel(jit, target_ptr))
293 
294 static void emit6_pcrel_rilc(struct bpf_jit *jit, u32 op, u8 mask, s64 pcrel)
295 {
296 	emit6_pcrel_ril(jit, op | mask << 20, pcrel);
297 }
298 
299 #define EMIT6_PCREL_RILC(op, mask, target)			\
300 	emit6_pcrel_rilc(jit, op, mask, off_to_pcrel(jit, target))
301 
302 #define EMIT6_PCREL_RILC_PTR(op, mask, target_ptr)		\
303 	emit6_pcrel_rilc(jit, op, mask, ptr_to_pcrel(jit, target_ptr))
304 
305 #define _EMIT6_IMM(op, imm)					\
306 ({								\
307 	unsigned int __imm = (imm);				\
308 	_EMIT6((op) | (__imm >> 16), __imm & 0xffff);		\
309 })
310 
311 #define EMIT6_IMM(op, b1, imm)					\
312 ({								\
313 	_EMIT6_IMM((op) | reg_high(b1) << 16, imm);		\
314 	REG_SET_SEEN(b1);					\
315 })
316 
317 #define _EMIT_CONST_U32(val)					\
318 ({								\
319 	unsigned int ret;					\
320 	ret = jit->lit32;					\
321 	if (jit->prg_buf)					\
322 		*(u32 *)(jit->prg_buf + jit->lit32) = (u32)(val);\
323 	jit->lit32 += 4;					\
324 	ret;							\
325 })
326 
327 #define EMIT_CONST_U32(val)					\
328 ({								\
329 	jit->seen |= SEEN_LITERAL;				\
330 	_EMIT_CONST_U32(val) - jit->base_ip;			\
331 })
332 
333 #define _EMIT_CONST_U64(val)					\
334 ({								\
335 	unsigned int ret;					\
336 	ret = jit->lit64;					\
337 	if (jit->prg_buf)					\
338 		*(u64 *)(jit->prg_buf + jit->lit64) = (u64)(val);\
339 	jit->lit64 += 8;					\
340 	ret;							\
341 })
342 
343 #define EMIT_CONST_U64(val)					\
344 ({								\
345 	jit->seen |= SEEN_LITERAL;				\
346 	_EMIT_CONST_U64(val) - jit->base_ip;			\
347 })
348 
349 #define EMIT_ZERO(b1)						\
350 ({								\
351 	if (!fp->aux->verifier_zext) {				\
352 		/* llgfr %dst,%dst (zero extend to 64 bit) */	\
353 		EMIT4(0xb9160000, b1, b1);			\
354 		REG_SET_SEEN(b1);				\
355 	}							\
356 })
357 
358 /*
359  * Return whether this is the first pass. The first pass is special, since we
360  * don't know any sizes yet, and thus must be conservative.
361  */
362 static bool is_first_pass(struct bpf_jit *jit)
363 {
364 	return jit->size == 0;
365 }
366 
367 /*
368  * Return whether this is the code generation pass. The code generation pass is
369  * special, since we should change as little as possible.
370  */
371 static bool is_codegen_pass(struct bpf_jit *jit)
372 {
373 	return jit->prg_buf;
374 }
375 
376 /*
377  * Return whether "rel" can be encoded as a short PC-relative offset
378  */
379 static bool is_valid_rel(int rel)
380 {
381 	return rel >= -65536 && rel <= 65534;
382 }
383 
384 /*
385  * Return whether "off" can be reached using a short PC-relative offset
386  */
387 static bool can_use_rel(struct bpf_jit *jit, int off)
388 {
389 	return is_valid_rel(off - jit->prg);
390 }
391 
392 /*
393  * Return whether given displacement can be encoded using
394  * Long-Displacement Facility
395  */
396 static bool is_valid_ldisp(int disp)
397 {
398 	return disp >= -524288 && disp <= 524287;
399 }
400 
401 /*
402  * Return whether the next 32-bit literal pool entry can be referenced using
403  * Long-Displacement Facility
404  */
405 static bool can_use_ldisp_for_lit32(struct bpf_jit *jit)
406 {
407 	return is_valid_ldisp(jit->lit32 - jit->base_ip);
408 }
409 
410 /*
411  * Return whether the next 64-bit literal pool entry can be referenced using
412  * Long-Displacement Facility
413  */
414 static bool can_use_ldisp_for_lit64(struct bpf_jit *jit)
415 {
416 	return is_valid_ldisp(jit->lit64 - jit->base_ip);
417 }
418 
419 /*
420  * Fill whole space with illegal instructions
421  */
422 static void jit_fill_hole(void *area, unsigned int size)
423 {
424 	memset(area, 0, size);
425 }
426 
427 /*
428  * Caller-allocated part of the frame.
429  * Thanks to packed stack, its otherwise unused initial part can be used for
430  * the BPF stack and for the next frame.
431  */
432 struct prog_frame {
433 	u64 unused[8];
434 	/* BPF stack starts here and grows towards 0 */
435 	u32 tail_call_cnt;
436 	u32 pad;
437 	u64 r6[10];  /* r6 - r15 */
438 	u64 backchain;
439 } __packed;
440 
441 /*
442  * Save registers from "rs" (register start) to "re" (register end) on stack
443  */
444 static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
445 {
446 	u32 off = offsetof(struct prog_frame, r6) + (rs - 6) * 8;
447 
448 	if (rs == re)
449 		/* stg %rs,off(%r15) */
450 		_EMIT6(0xe300f000 | rs << 20 | off, 0x0024);
451 	else
452 		/* stmg %rs,%re,off(%r15) */
453 		_EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0024, off);
454 }
455 
456 /*
457  * Restore registers from "rs" (register start) to "re" (register end) on stack
458  */
459 static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re)
460 {
461 	u32 off = jit->frame_off + offsetof(struct prog_frame, r6) + (rs - 6) * 8;
462 
463 	if (rs == re)
464 		/* lg %rs,off(%r15) */
465 		_EMIT6(0xe300f000 | rs << 20 | off, 0x0004);
466 	else
467 		/* lmg %rs,%re,off(%r15) */
468 		_EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0004, off);
469 }
470 
471 /*
472  * Return first seen register (from start)
473  */
474 static int get_start(u16 seen_regs, int start)
475 {
476 	int i;
477 
478 	for (i = start; i <= 15; i++) {
479 		if (seen_regs & (1 << i))
480 			return i;
481 	}
482 	return 0;
483 }
484 
485 /*
486  * Return last seen register (from start) (gap >= 2)
487  */
488 static int get_end(u16 seen_regs, int start)
489 {
490 	int i;
491 
492 	for (i = start; i < 15; i++) {
493 		if (!(seen_regs & (3 << i)))
494 			return i - 1;
495 	}
496 	return (seen_regs & (1 << 15)) ? 15 : 14;
497 }
498 
499 #define REGS_SAVE	1
500 #define REGS_RESTORE	0
501 /*
502  * Save and restore clobbered registers (6-15) on stack.
503  * We save/restore registers in chunks with gap >= 2 registers.
504  */
505 static void save_restore_regs(struct bpf_jit *jit, int op, u16 extra_regs)
506 {
507 	u16 seen_regs = jit->seen_regs | extra_regs;
508 	const int last = 15, save_restore_size = 6;
509 	int re = 6, rs;
510 
511 	if (is_first_pass(jit)) {
512 		/*
513 		 * We don't know yet which registers are used. Reserve space
514 		 * conservatively.
515 		 */
516 		jit->prg += (last - re + 1) * save_restore_size;
517 		return;
518 	}
519 
520 	do {
521 		rs = get_start(seen_regs, re);
522 		if (!rs)
523 			break;
524 		re = get_end(seen_regs, rs + 1);
525 		if (op == REGS_SAVE)
526 			save_regs(jit, rs, re);
527 		else
528 			restore_regs(jit, rs, re);
529 		re++;
530 	} while (re <= last);
531 }
532 
533 static void bpf_skip(struct bpf_jit *jit, int size)
534 {
535 	if (size >= 6 && !is_valid_rel(size)) {
536 		/* brcl 0xf,size */
537 		EMIT6_PCREL_RILC(0xc0040000, 0xf, size);
538 		size -= 6;
539 	} else if (size >= 4 && is_valid_rel(size)) {
540 		/* brc 0xf,size */
541 		EMIT4_PCREL(0xa7f40000, size);
542 		size -= 4;
543 	}
544 	while (size >= 2) {
545 		/* bcr 0,%0 */
546 		_EMIT2(0x0700);
547 		size -= 2;
548 	}
549 }
550 
551 /*
552  * PLT for hotpatchable calls. The calling convention is the same as for the
553  * ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered.
554  */
555 struct bpf_plt {
556 	char code[16];
557 	void *ret;
558 	void *target;
559 } __packed;
560 extern const struct bpf_plt bpf_plt;
561 asm(
562 	".pushsection .rodata\n"
563 	"	.balign 8\n"
564 	"bpf_plt:\n"
565 	"	lgrl %r0,bpf_plt_ret\n"
566 	"	lgrl %r1,bpf_plt_target\n"
567 	"	br %r1\n"
568 	"	.balign 8\n"
569 	"bpf_plt_ret: .quad 0\n"
570 	"bpf_plt_target: .quad 0\n"
571 	"	.popsection\n"
572 );
573 
574 static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target)
575 {
576 	memcpy(plt, &bpf_plt, sizeof(*plt));
577 	plt->ret = ret;
578 	/*
579 	 * (target == NULL) implies that the branch to this PLT entry was
580 	 * patched and became a no-op. However, some CPU could have jumped
581 	 * to this PLT entry before patching and may be still executing it.
582 	 *
583 	 * Since the intention in this case is to make the PLT entry a no-op,
584 	 * make the target point to the return label instead of NULL.
585 	 */
586 	plt->target = target ?: ret;
587 }
588 
589 /*
590  * Emit function prologue
591  *
592  * Save registers and create stack frame if necessary.
593  * Stack frame layout is described by struct prog_frame.
594  */
595 static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp)
596 {
597 	BUILD_BUG_ON(sizeof(struct prog_frame) != STACK_FRAME_OVERHEAD);
598 
599 	/* No-op for hotpatching */
600 	/* brcl 0,prologue_plt */
601 	EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt);
602 	jit->prologue_plt_ret = jit->prg;
603 
604 	if (!bpf_is_subprog(fp)) {
605 		/* Initialize the tail call counter in the main program. */
606 		/* xc tail_call_cnt(4,%r15),tail_call_cnt(%r15) */
607 		_EMIT6(0xd703f000 | offsetof(struct prog_frame, tail_call_cnt),
608 		       0xf000 | offsetof(struct prog_frame, tail_call_cnt));
609 	} else {
610 		/*
611 		 * Skip the tail call counter initialization in subprograms.
612 		 * Insert nops in order to have tail_call_start at a
613 		 * predictable offset.
614 		 */
615 		bpf_skip(jit, 6);
616 	}
617 	/* Tail calls have to skip above initialization */
618 	jit->tail_call_start = jit->prg;
619 	if (fp->aux->exception_cb) {
620 		/*
621 		 * Switch stack, the new address is in the 2nd parameter.
622 		 *
623 		 * Arrange the restoration of %r6-%r15 in the epilogue.
624 		 * Do not restore them now, the prog does not need them.
625 		 */
626 		/* lgr %r15,%r3 */
627 		EMIT4(0xb9040000, REG_15, REG_3);
628 		jit->seen_regs |= NVREGS;
629 	} else {
630 		/* Save registers */
631 		save_restore_regs(jit, REGS_SAVE,
632 				  fp->aux->exception_boundary ? NVREGS : 0);
633 	}
634 	/* Setup literal pool */
635 	if (is_first_pass(jit) || (jit->seen & SEEN_LITERAL)) {
636 		if (!is_first_pass(jit) &&
637 		    is_valid_ldisp(jit->size - (jit->prg + 2))) {
638 			/* basr %l,0 */
639 			EMIT2(0x0d00, REG_L, REG_0);
640 			jit->base_ip = jit->prg;
641 		} else {
642 			/* larl %l,lit32_start */
643 			EMIT6_PCREL_RILB(0xc0000000, REG_L, jit->lit32_start);
644 			jit->base_ip = jit->lit32_start;
645 		}
646 	}
647 	/* Setup stack and backchain */
648 	if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
649 		/* lgr %w1,%r15 (backchain) */
650 		EMIT4(0xb9040000, REG_W1, REG_15);
651 		/* la %bfp,unused_end(%r15) (BPF frame pointer) */
652 		EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15,
653 			   offsetofend(struct prog_frame, unused));
654 		/* aghi %r15,-frame_off */
655 		EMIT4_IMM(0xa70b0000, REG_15, -jit->frame_off);
656 		/* stg %w1,backchain(%r15) */
657 		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
658 			      REG_15,
659 			      offsetof(struct prog_frame, backchain));
660 	}
661 }
662 
663 /*
664  * Jump using a register either directly or via an expoline thunk
665  */
666 #define EMIT_JUMP_REG(reg) do {						\
667 	if (nospec_uses_trampoline())					\
668 		/* brcl 0xf,__s390_indirect_jump_rN */			\
669 		EMIT6_PCREL_RILC_PTR(0xc0040000, 0x0f,			\
670 				     __s390_indirect_jump_r ## reg);	\
671 	else								\
672 		/* br %rN */						\
673 		_EMIT2(0x07f0 | reg);					\
674 } while (0)
675 
676 /*
677  * Function epilogue
678  */
679 static void bpf_jit_epilogue(struct bpf_jit *jit)
680 {
681 	jit->exit_ip = jit->prg;
682 	/* Load exit code: lgr %r2,%b0 */
683 	EMIT4(0xb9040000, REG_2, BPF_REG_0);
684 	/* Restore registers */
685 	save_restore_regs(jit, REGS_RESTORE, 0);
686 	EMIT_JUMP_REG(14);
687 
688 	jit->prg = ALIGN(jit->prg, 8);
689 	jit->prologue_plt = jit->prg;
690 	if (jit->prg_buf)
691 		bpf_jit_plt((struct bpf_plt *)(jit->prg_buf + jit->prg),
692 			    jit->prg_buf + jit->prologue_plt_ret, NULL);
693 	jit->prg += sizeof(struct bpf_plt);
694 }
695 
696 bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
697 {
698 	regs->psw.addr = extable_fixup(x);
699 	if (x->data != -1)
700 		regs->gprs[x->data] = 0;
701 	return true;
702 }
703 
704 /*
705  * A single BPF probe instruction
706  */
707 struct bpf_jit_probe {
708 	int prg;	/* JITed instruction offset */
709 	int nop_prg;	/* JITed nop offset */
710 	int reg;	/* Register to clear on exception */
711 	int arena_reg;	/* Register to use for arena addressing */
712 };
713 
714 static void bpf_jit_probe_init(struct bpf_jit_probe *probe)
715 {
716 	probe->prg = -1;
717 	probe->nop_prg = -1;
718 	probe->reg = -1;
719 	probe->arena_reg = REG_0;
720 }
721 
722 /*
723  * Handlers of certain exceptions leave psw.addr pointing to the instruction
724  * directly after the failing one. Therefore, create two exception table
725  * entries and also add a nop in case two probing instructions come directly
726  * after each other.
727  */
728 static void bpf_jit_probe_emit_nop(struct bpf_jit *jit,
729 				   struct bpf_jit_probe *probe)
730 {
731 	if (probe->prg == -1 || probe->nop_prg != -1)
732 		/* The probe is not armed or nop is already emitted. */
733 		return;
734 
735 	probe->nop_prg = jit->prg;
736 	/* bcr 0,%0 */
737 	_EMIT2(0x0700);
738 }
739 
740 static void bpf_jit_probe_load_pre(struct bpf_jit *jit, struct bpf_insn *insn,
741 				   struct bpf_jit_probe *probe)
742 {
743 	if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
744 	    BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
745 	    BPF_MODE(insn->code) != BPF_PROBE_MEM32)
746 		return;
747 
748 	if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
749 		/* lgrl %r1,kern_arena */
750 		EMIT6_PCREL_RILB(0xc4080000, REG_W1, jit->kern_arena);
751 		probe->arena_reg = REG_W1;
752 	}
753 	probe->prg = jit->prg;
754 	probe->reg = reg2hex[insn->dst_reg];
755 }
756 
757 static void bpf_jit_probe_store_pre(struct bpf_jit *jit, struct bpf_insn *insn,
758 				    struct bpf_jit_probe *probe)
759 {
760 	if (BPF_MODE(insn->code) != BPF_PROBE_MEM32)
761 		return;
762 
763 	/* lgrl %r1,kern_arena */
764 	EMIT6_PCREL_RILB(0xc4080000, REG_W1, jit->kern_arena);
765 	probe->arena_reg = REG_W1;
766 	probe->prg = jit->prg;
767 }
768 
769 static void bpf_jit_probe_atomic_pre(struct bpf_jit *jit,
770 				     struct bpf_insn *insn,
771 				     struct bpf_jit_probe *probe)
772 {
773 	if (BPF_MODE(insn->code) != BPF_PROBE_ATOMIC)
774 		return;
775 
776 	/* lgrl %r1,kern_arena */
777 	EMIT6_PCREL_RILB(0xc4080000, REG_W1, jit->kern_arena);
778 	/* agr %r1,%dst */
779 	EMIT4(0xb9080000, REG_W1, insn->dst_reg);
780 	probe->arena_reg = REG_W1;
781 	probe->prg = jit->prg;
782 }
783 
784 static int bpf_jit_probe_post(struct bpf_jit *jit, struct bpf_prog *fp,
785 			      struct bpf_jit_probe *probe)
786 {
787 	struct exception_table_entry *ex;
788 	int i, prg;
789 	s64 delta;
790 	u8 *insn;
791 
792 	if (probe->prg == -1)
793 		/* The probe is not armed. */
794 		return 0;
795 	bpf_jit_probe_emit_nop(jit, probe);
796 	if (!fp->aux->extable)
797 		/* Do nothing during early JIT passes. */
798 		return 0;
799 	insn = jit->prg_buf + probe->prg;
800 	if (WARN_ON_ONCE(probe->prg + insn_length(*insn) != probe->nop_prg))
801 		/* JIT bug - gap between probe and nop instructions. */
802 		return -1;
803 	for (i = 0; i < 2; i++) {
804 		if (WARN_ON_ONCE(jit->excnt >= fp->aux->num_exentries))
805 			/* Verifier bug - not enough entries. */
806 			return -1;
807 		ex = &fp->aux->extable[jit->excnt];
808 		/* Add extable entries for probe and nop instructions. */
809 		prg = i == 0 ? probe->prg : probe->nop_prg;
810 		delta = jit->prg_buf + prg - (u8 *)&ex->insn;
811 		if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
812 			/* JIT bug - code and extable must be close. */
813 			return -1;
814 		ex->insn = delta;
815 		/*
816 		 * Land on the current instruction. Note that the extable
817 		 * infrastructure ignores the fixup field; it is handled by
818 		 * ex_handler_bpf().
819 		 */
820 		delta = jit->prg_buf + jit->prg - (u8 *)&ex->fixup;
821 		if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
822 			/* JIT bug - landing pad and extable must be close. */
823 			return -1;
824 		ex->fixup = delta;
825 		ex->type = EX_TYPE_BPF;
826 		ex->data = probe->reg;
827 		jit->excnt++;
828 	}
829 	return 0;
830 }
831 
832 /*
833  * Sign-extend the register if necessary
834  */
835 static int sign_extend(struct bpf_jit *jit, int r, u8 size, u8 flags)
836 {
837 	if (!(flags & BTF_FMODEL_SIGNED_ARG))
838 		return 0;
839 
840 	switch (size) {
841 	case 1:
842 		/* lgbr %r,%r */
843 		EMIT4(0xb9060000, r, r);
844 		return 0;
845 	case 2:
846 		/* lghr %r,%r */
847 		EMIT4(0xb9070000, r, r);
848 		return 0;
849 	case 4:
850 		/* lgfr %r,%r */
851 		EMIT4(0xb9140000, r, r);
852 		return 0;
853 	case 8:
854 		return 0;
855 	default:
856 		return -1;
857 	}
858 }
859 
860 /*
861  * Compile one eBPF instruction into s390x code
862  *
863  * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
864  * stack space for the large switch statement.
865  */
866 static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
867 				 int i, bool extra_pass)
868 {
869 	struct bpf_insn *insn = &fp->insnsi[i];
870 	s32 branch_oc_off = insn->off;
871 	u32 dst_reg = insn->dst_reg;
872 	u32 src_reg = insn->src_reg;
873 	struct bpf_jit_probe probe;
874 	int last, insn_count = 1;
875 	u32 *addrs = jit->addrs;
876 	s32 imm = insn->imm;
877 	s16 off = insn->off;
878 	unsigned int mask;
879 	int err;
880 
881 	bpf_jit_probe_init(&probe);
882 
883 	switch (insn->code) {
884 	/*
885 	 * BPF_MOV
886 	 */
887 	case BPF_ALU | BPF_MOV | BPF_X:
888 		switch (insn->off) {
889 		case 0: /* DST = (u32) SRC */
890 			/* llgfr %dst,%src */
891 			EMIT4(0xb9160000, dst_reg, src_reg);
892 			if (insn_is_zext(&insn[1]))
893 				insn_count = 2;
894 			break;
895 		case 8: /* DST = (u32)(s8) SRC */
896 			/* lbr %dst,%src */
897 			EMIT4(0xb9260000, dst_reg, src_reg);
898 			/* llgfr %dst,%dst */
899 			EMIT4(0xb9160000, dst_reg, dst_reg);
900 			break;
901 		case 16: /* DST = (u32)(s16) SRC */
902 			/* lhr %dst,%src */
903 			EMIT4(0xb9270000, dst_reg, src_reg);
904 			/* llgfr %dst,%dst */
905 			EMIT4(0xb9160000, dst_reg, dst_reg);
906 			break;
907 		}
908 		break;
909 	case BPF_ALU64 | BPF_MOV | BPF_X:
910 		if (insn_is_cast_user(insn)) {
911 			int patch_brc;
912 
913 			/* ltgr %dst,%src */
914 			EMIT4(0xb9020000, dst_reg, src_reg);
915 			/* brc 8,0f */
916 			patch_brc = jit->prg;
917 			EMIT4_PCREL_RIC(0xa7040000, 8, 0);
918 			/* iihf %dst,user_arena>>32 */
919 			EMIT6_IMM(0xc0080000, dst_reg, jit->user_arena >> 32);
920 			/* 0: */
921 			if (jit->prg_buf)
922 				*(u16 *)(jit->prg_buf + patch_brc + 2) =
923 					(jit->prg - patch_brc) >> 1;
924 			break;
925 		}
926 		switch (insn->off) {
927 		case 0: /* DST = SRC */
928 			/* lgr %dst,%src */
929 			EMIT4(0xb9040000, dst_reg, src_reg);
930 			break;
931 		case 8: /* DST = (s8) SRC */
932 			/* lgbr %dst,%src */
933 			EMIT4(0xb9060000, dst_reg, src_reg);
934 			break;
935 		case 16: /* DST = (s16) SRC */
936 			/* lghr %dst,%src */
937 			EMIT4(0xb9070000, dst_reg, src_reg);
938 			break;
939 		case 32: /* DST = (s32) SRC */
940 			/* lgfr %dst,%src */
941 			EMIT4(0xb9140000, dst_reg, src_reg);
942 			break;
943 		}
944 		break;
945 	case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */
946 		/* llilf %dst,imm */
947 		EMIT6_IMM(0xc00f0000, dst_reg, imm);
948 		if (insn_is_zext(&insn[1]))
949 			insn_count = 2;
950 		break;
951 	case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = imm */
952 		/* lgfi %dst,imm */
953 		EMIT6_IMM(0xc0010000, dst_reg, imm);
954 		break;
955 	/*
956 	 * BPF_LD 64
957 	 */
958 	case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
959 	{
960 		/* 16 byte instruction that uses two 'struct bpf_insn' */
961 		u64 imm64;
962 
963 		imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32;
964 		/* lgrl %dst,imm */
965 		EMIT6_PCREL_RILB(0xc4080000, dst_reg, _EMIT_CONST_U64(imm64));
966 		insn_count = 2;
967 		break;
968 	}
969 	/*
970 	 * BPF_ADD
971 	 */
972 	case BPF_ALU | BPF_ADD | BPF_X: /* dst = (u32) dst + (u32) src */
973 		/* ar %dst,%src */
974 		EMIT2(0x1a00, dst_reg, src_reg);
975 		EMIT_ZERO(dst_reg);
976 		break;
977 	case BPF_ALU64 | BPF_ADD | BPF_X: /* dst = dst + src */
978 		/* agr %dst,%src */
979 		EMIT4(0xb9080000, dst_reg, src_reg);
980 		break;
981 	case BPF_ALU | BPF_ADD | BPF_K: /* dst = (u32) dst + (u32) imm */
982 		if (imm != 0) {
983 			/* alfi %dst,imm */
984 			EMIT6_IMM(0xc20b0000, dst_reg, imm);
985 		}
986 		EMIT_ZERO(dst_reg);
987 		break;
988 	case BPF_ALU64 | BPF_ADD | BPF_K: /* dst = dst + imm */
989 		if (!imm)
990 			break;
991 		/* agfi %dst,imm */
992 		EMIT6_IMM(0xc2080000, dst_reg, imm);
993 		break;
994 	/*
995 	 * BPF_SUB
996 	 */
997 	case BPF_ALU | BPF_SUB | BPF_X: /* dst = (u32) dst - (u32) src */
998 		/* sr %dst,%src */
999 		EMIT2(0x1b00, dst_reg, src_reg);
1000 		EMIT_ZERO(dst_reg);
1001 		break;
1002 	case BPF_ALU64 | BPF_SUB | BPF_X: /* dst = dst - src */
1003 		/* sgr %dst,%src */
1004 		EMIT4(0xb9090000, dst_reg, src_reg);
1005 		break;
1006 	case BPF_ALU | BPF_SUB | BPF_K: /* dst = (u32) dst - (u32) imm */
1007 		if (imm != 0) {
1008 			/* alfi %dst,-imm */
1009 			EMIT6_IMM(0xc20b0000, dst_reg, -imm);
1010 		}
1011 		EMIT_ZERO(dst_reg);
1012 		break;
1013 	case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */
1014 		if (!imm)
1015 			break;
1016 		if (imm == -0x80000000) {
1017 			/* algfi %dst,0x80000000 */
1018 			EMIT6_IMM(0xc20a0000, dst_reg, 0x80000000);
1019 		} else {
1020 			/* agfi %dst,-imm */
1021 			EMIT6_IMM(0xc2080000, dst_reg, -imm);
1022 		}
1023 		break;
1024 	/*
1025 	 * BPF_MUL
1026 	 */
1027 	case BPF_ALU | BPF_MUL | BPF_X: /* dst = (u32) dst * (u32) src */
1028 		/* msr %dst,%src */
1029 		EMIT4(0xb2520000, dst_reg, src_reg);
1030 		EMIT_ZERO(dst_reg);
1031 		break;
1032 	case BPF_ALU64 | BPF_MUL | BPF_X: /* dst = dst * src */
1033 		/* msgr %dst,%src */
1034 		EMIT4(0xb90c0000, dst_reg, src_reg);
1035 		break;
1036 	case BPF_ALU | BPF_MUL | BPF_K: /* dst = (u32) dst * (u32) imm */
1037 		if (imm != 1) {
1038 			/* msfi %r5,imm */
1039 			EMIT6_IMM(0xc2010000, dst_reg, imm);
1040 		}
1041 		EMIT_ZERO(dst_reg);
1042 		break;
1043 	case BPF_ALU64 | BPF_MUL | BPF_K: /* dst = dst * imm */
1044 		if (imm == 1)
1045 			break;
1046 		/* msgfi %dst,imm */
1047 		EMIT6_IMM(0xc2000000, dst_reg, imm);
1048 		break;
1049 	/*
1050 	 * BPF_DIV / BPF_MOD
1051 	 */
1052 	case BPF_ALU | BPF_DIV | BPF_X:
1053 	case BPF_ALU | BPF_MOD | BPF_X:
1054 	{
1055 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
1056 
1057 		switch (off) {
1058 		case 0: /* dst = (u32) dst {/,%} (u32) src */
1059 			/* xr %w0,%w0 */
1060 			EMIT2(0x1700, REG_W0, REG_W0);
1061 			/* lr %w1,%dst */
1062 			EMIT2(0x1800, REG_W1, dst_reg);
1063 			/* dlr %w0,%src */
1064 			EMIT4(0xb9970000, REG_W0, src_reg);
1065 			break;
1066 		case 1: /* dst = (u32) ((s32) dst {/,%} (s32) src) */
1067 			/* lgfr %r1,%dst */
1068 			EMIT4(0xb9140000, REG_W1, dst_reg);
1069 			/* dsgfr %r0,%src */
1070 			EMIT4(0xb91d0000, REG_W0, src_reg);
1071 			break;
1072 		}
1073 		/* llgfr %dst,%rc */
1074 		EMIT4(0xb9160000, dst_reg, rc_reg);
1075 		if (insn_is_zext(&insn[1]))
1076 			insn_count = 2;
1077 		break;
1078 	}
1079 	case BPF_ALU64 | BPF_DIV | BPF_X:
1080 	case BPF_ALU64 | BPF_MOD | BPF_X:
1081 	{
1082 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
1083 
1084 		switch (off) {
1085 		case 0: /* dst = dst {/,%} src */
1086 			/* lghi %w0,0 */
1087 			EMIT4_IMM(0xa7090000, REG_W0, 0);
1088 			/* lgr %w1,%dst */
1089 			EMIT4(0xb9040000, REG_W1, dst_reg);
1090 			/* dlgr %w0,%src */
1091 			EMIT4(0xb9870000, REG_W0, src_reg);
1092 			break;
1093 		case 1: /* dst = (s64) dst {/,%} (s64) src */
1094 			/* lgr %w1,%dst */
1095 			EMIT4(0xb9040000, REG_W1, dst_reg);
1096 			/* dsgr %w0,%src */
1097 			EMIT4(0xb90d0000, REG_W0, src_reg);
1098 			break;
1099 		}
1100 		/* lgr %dst,%rc */
1101 		EMIT4(0xb9040000, dst_reg, rc_reg);
1102 		break;
1103 	}
1104 	case BPF_ALU | BPF_DIV | BPF_K:
1105 	case BPF_ALU | BPF_MOD | BPF_K:
1106 	{
1107 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
1108 
1109 		if (imm == 1) {
1110 			if (BPF_OP(insn->code) == BPF_MOD)
1111 				/* lghi %dst,0 */
1112 				EMIT4_IMM(0xa7090000, dst_reg, 0);
1113 			else
1114 				EMIT_ZERO(dst_reg);
1115 			break;
1116 		}
1117 		if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) {
1118 			switch (off) {
1119 			case 0: /* dst = (u32) dst {/,%} (u32) imm */
1120 				/* xr %w0,%w0 */
1121 				EMIT2(0x1700, REG_W0, REG_W0);
1122 				/* lr %w1,%dst */
1123 				EMIT2(0x1800, REG_W1, dst_reg);
1124 				/* dl %w0,<d(imm)>(%l) */
1125 				EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0,
1126 					      REG_L, EMIT_CONST_U32(imm));
1127 				break;
1128 			case 1: /* dst = (s32) dst {/,%} (s32) imm */
1129 				/* lgfr %r1,%dst */
1130 				EMIT4(0xb9140000, REG_W1, dst_reg);
1131 				/* dsgf %r0,<d(imm)>(%l) */
1132 				EMIT6_DISP_LH(0xe3000000, 0x001d, REG_W0, REG_0,
1133 					      REG_L, EMIT_CONST_U32(imm));
1134 				break;
1135 			}
1136 		} else {
1137 			switch (off) {
1138 			case 0: /* dst = (u32) dst {/,%} (u32) imm */
1139 				/* xr %w0,%w0 */
1140 				EMIT2(0x1700, REG_W0, REG_W0);
1141 				/* lr %w1,%dst */
1142 				EMIT2(0x1800, REG_W1, dst_reg);
1143 				/* lrl %dst,imm */
1144 				EMIT6_PCREL_RILB(0xc40d0000, dst_reg,
1145 						 _EMIT_CONST_U32(imm));
1146 				jit->seen |= SEEN_LITERAL;
1147 				/* dlr %w0,%dst */
1148 				EMIT4(0xb9970000, REG_W0, dst_reg);
1149 				break;
1150 			case 1: /* dst = (s32) dst {/,%} (s32) imm */
1151 				/* lgfr %w1,%dst */
1152 				EMIT4(0xb9140000, REG_W1, dst_reg);
1153 				/* lgfrl %dst,imm */
1154 				EMIT6_PCREL_RILB(0xc40c0000, dst_reg,
1155 						 _EMIT_CONST_U32(imm));
1156 				jit->seen |= SEEN_LITERAL;
1157 				/* dsgr %w0,%dst */
1158 				EMIT4(0xb90d0000, REG_W0, dst_reg);
1159 				break;
1160 			}
1161 		}
1162 		/* llgfr %dst,%rc */
1163 		EMIT4(0xb9160000, dst_reg, rc_reg);
1164 		if (insn_is_zext(&insn[1]))
1165 			insn_count = 2;
1166 		break;
1167 	}
1168 	case BPF_ALU64 | BPF_DIV | BPF_K:
1169 	case BPF_ALU64 | BPF_MOD | BPF_K:
1170 	{
1171 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
1172 
1173 		if (imm == 1) {
1174 			if (BPF_OP(insn->code) == BPF_MOD)
1175 				/* lhgi %dst,0 */
1176 				EMIT4_IMM(0xa7090000, dst_reg, 0);
1177 			break;
1178 		}
1179 		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
1180 			switch (off) {
1181 			case 0: /* dst = dst {/,%} imm */
1182 				/* lghi %w0,0 */
1183 				EMIT4_IMM(0xa7090000, REG_W0, 0);
1184 				/* lgr %w1,%dst */
1185 				EMIT4(0xb9040000, REG_W1, dst_reg);
1186 				/* dlg %w0,<d(imm)>(%l) */
1187 				EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0,
1188 					      REG_L, EMIT_CONST_U64(imm));
1189 				break;
1190 			case 1: /* dst = (s64) dst {/,%} (s64) imm */
1191 				/* lgr %w1,%dst */
1192 				EMIT4(0xb9040000, REG_W1, dst_reg);
1193 				/* dsg %w0,<d(imm)>(%l) */
1194 				EMIT6_DISP_LH(0xe3000000, 0x000d, REG_W0, REG_0,
1195 					      REG_L, EMIT_CONST_U64(imm));
1196 				break;
1197 			}
1198 		} else {
1199 			switch (off) {
1200 			case 0: /* dst = dst {/,%} imm */
1201 				/* lghi %w0,0 */
1202 				EMIT4_IMM(0xa7090000, REG_W0, 0);
1203 				/* lgr %w1,%dst */
1204 				EMIT4(0xb9040000, REG_W1, dst_reg);
1205 				/* lgrl %dst,imm */
1206 				EMIT6_PCREL_RILB(0xc4080000, dst_reg,
1207 						 _EMIT_CONST_U64(imm));
1208 				jit->seen |= SEEN_LITERAL;
1209 				/* dlgr %w0,%dst */
1210 				EMIT4(0xb9870000, REG_W0, dst_reg);
1211 				break;
1212 			case 1: /* dst = (s64) dst {/,%} (s64) imm */
1213 				/* lgr %w1,%dst */
1214 				EMIT4(0xb9040000, REG_W1, dst_reg);
1215 				/* lgrl %dst,imm */
1216 				EMIT6_PCREL_RILB(0xc4080000, dst_reg,
1217 						 _EMIT_CONST_U64(imm));
1218 				jit->seen |= SEEN_LITERAL;
1219 				/* dsgr %w0,%dst */
1220 				EMIT4(0xb90d0000, REG_W0, dst_reg);
1221 				break;
1222 			}
1223 		}
1224 		/* lgr %dst,%rc */
1225 		EMIT4(0xb9040000, dst_reg, rc_reg);
1226 		break;
1227 	}
1228 	/*
1229 	 * BPF_AND
1230 	 */
1231 	case BPF_ALU | BPF_AND | BPF_X: /* dst = (u32) dst & (u32) src */
1232 		/* nr %dst,%src */
1233 		EMIT2(0x1400, dst_reg, src_reg);
1234 		EMIT_ZERO(dst_reg);
1235 		break;
1236 	case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
1237 		/* ngr %dst,%src */
1238 		EMIT4(0xb9800000, dst_reg, src_reg);
1239 		break;
1240 	case BPF_ALU | BPF_AND | BPF_K: /* dst = (u32) dst & (u32) imm */
1241 		/* nilf %dst,imm */
1242 		EMIT6_IMM(0xc00b0000, dst_reg, imm);
1243 		EMIT_ZERO(dst_reg);
1244 		break;
1245 	case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
1246 		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
1247 			/* ng %dst,<d(imm)>(%l) */
1248 			EMIT6_DISP_LH(0xe3000000, 0x0080,
1249 				      dst_reg, REG_0, REG_L,
1250 				      EMIT_CONST_U64(imm));
1251 		} else {
1252 			/* lgrl %w0,imm */
1253 			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
1254 					 _EMIT_CONST_U64(imm));
1255 			jit->seen |= SEEN_LITERAL;
1256 			/* ngr %dst,%w0 */
1257 			EMIT4(0xb9800000, dst_reg, REG_W0);
1258 		}
1259 		break;
1260 	/*
1261 	 * BPF_OR
1262 	 */
1263 	case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
1264 		/* or %dst,%src */
1265 		EMIT2(0x1600, dst_reg, src_reg);
1266 		EMIT_ZERO(dst_reg);
1267 		break;
1268 	case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
1269 		/* ogr %dst,%src */
1270 		EMIT4(0xb9810000, dst_reg, src_reg);
1271 		break;
1272 	case BPF_ALU | BPF_OR | BPF_K: /* dst = (u32) dst | (u32) imm */
1273 		/* oilf %dst,imm */
1274 		EMIT6_IMM(0xc00d0000, dst_reg, imm);
1275 		EMIT_ZERO(dst_reg);
1276 		break;
1277 	case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */
1278 		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
1279 			/* og %dst,<d(imm)>(%l) */
1280 			EMIT6_DISP_LH(0xe3000000, 0x0081,
1281 				      dst_reg, REG_0, REG_L,
1282 				      EMIT_CONST_U64(imm));
1283 		} else {
1284 			/* lgrl %w0,imm */
1285 			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
1286 					 _EMIT_CONST_U64(imm));
1287 			jit->seen |= SEEN_LITERAL;
1288 			/* ogr %dst,%w0 */
1289 			EMIT4(0xb9810000, dst_reg, REG_W0);
1290 		}
1291 		break;
1292 	/*
1293 	 * BPF_XOR
1294 	 */
1295 	case BPF_ALU | BPF_XOR | BPF_X: /* dst = (u32) dst ^ (u32) src */
1296 		/* xr %dst,%src */
1297 		EMIT2(0x1700, dst_reg, src_reg);
1298 		EMIT_ZERO(dst_reg);
1299 		break;
1300 	case BPF_ALU64 | BPF_XOR | BPF_X: /* dst = dst ^ src */
1301 		/* xgr %dst,%src */
1302 		EMIT4(0xb9820000, dst_reg, src_reg);
1303 		break;
1304 	case BPF_ALU | BPF_XOR | BPF_K: /* dst = (u32) dst ^ (u32) imm */
1305 		if (imm != 0) {
1306 			/* xilf %dst,imm */
1307 			EMIT6_IMM(0xc0070000, dst_reg, imm);
1308 		}
1309 		EMIT_ZERO(dst_reg);
1310 		break;
1311 	case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */
1312 		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
1313 			/* xg %dst,<d(imm)>(%l) */
1314 			EMIT6_DISP_LH(0xe3000000, 0x0082,
1315 				      dst_reg, REG_0, REG_L,
1316 				      EMIT_CONST_U64(imm));
1317 		} else {
1318 			/* lgrl %w0,imm */
1319 			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
1320 					 _EMIT_CONST_U64(imm));
1321 			jit->seen |= SEEN_LITERAL;
1322 			/* xgr %dst,%w0 */
1323 			EMIT4(0xb9820000, dst_reg, REG_W0);
1324 		}
1325 		break;
1326 	/*
1327 	 * BPF_LSH
1328 	 */
1329 	case BPF_ALU | BPF_LSH | BPF_X: /* dst = (u32) dst << (u32) src */
1330 		/* sll %dst,0(%src) */
1331 		EMIT4_DISP(0x89000000, dst_reg, src_reg, 0);
1332 		EMIT_ZERO(dst_reg);
1333 		break;
1334 	case BPF_ALU64 | BPF_LSH | BPF_X: /* dst = dst << src */
1335 		/* sllg %dst,%dst,0(%src) */
1336 		EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, src_reg, 0);
1337 		break;
1338 	case BPF_ALU | BPF_LSH | BPF_K: /* dst = (u32) dst << (u32) imm */
1339 		if (imm != 0) {
1340 			/* sll %dst,imm(%r0) */
1341 			EMIT4_DISP(0x89000000, dst_reg, REG_0, imm);
1342 		}
1343 		EMIT_ZERO(dst_reg);
1344 		break;
1345 	case BPF_ALU64 | BPF_LSH | BPF_K: /* dst = dst << imm */
1346 		if (imm == 0)
1347 			break;
1348 		/* sllg %dst,%dst,imm(%r0) */
1349 		EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, REG_0, imm);
1350 		break;
1351 	/*
1352 	 * BPF_RSH
1353 	 */
1354 	case BPF_ALU | BPF_RSH | BPF_X: /* dst = (u32) dst >> (u32) src */
1355 		/* srl %dst,0(%src) */
1356 		EMIT4_DISP(0x88000000, dst_reg, src_reg, 0);
1357 		EMIT_ZERO(dst_reg);
1358 		break;
1359 	case BPF_ALU64 | BPF_RSH | BPF_X: /* dst = dst >> src */
1360 		/* srlg %dst,%dst,0(%src) */
1361 		EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, src_reg, 0);
1362 		break;
1363 	case BPF_ALU | BPF_RSH | BPF_K: /* dst = (u32) dst >> (u32) imm */
1364 		if (imm != 0) {
1365 			/* srl %dst,imm(%r0) */
1366 			EMIT4_DISP(0x88000000, dst_reg, REG_0, imm);
1367 		}
1368 		EMIT_ZERO(dst_reg);
1369 		break;
1370 	case BPF_ALU64 | BPF_RSH | BPF_K: /* dst = dst >> imm */
1371 		if (imm == 0)
1372 			break;
1373 		/* srlg %dst,%dst,imm(%r0) */
1374 		EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, REG_0, imm);
1375 		break;
1376 	/*
1377 	 * BPF_ARSH
1378 	 */
1379 	case BPF_ALU | BPF_ARSH | BPF_X: /* ((s32) dst) >>= src */
1380 		/* sra %dst,%dst,0(%src) */
1381 		EMIT4_DISP(0x8a000000, dst_reg, src_reg, 0);
1382 		EMIT_ZERO(dst_reg);
1383 		break;
1384 	case BPF_ALU64 | BPF_ARSH | BPF_X: /* ((s64) dst) >>= src */
1385 		/* srag %dst,%dst,0(%src) */
1386 		EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, src_reg, 0);
1387 		break;
1388 	case BPF_ALU | BPF_ARSH | BPF_K: /* ((s32) dst >> imm */
1389 		if (imm != 0) {
1390 			/* sra %dst,imm(%r0) */
1391 			EMIT4_DISP(0x8a000000, dst_reg, REG_0, imm);
1392 		}
1393 		EMIT_ZERO(dst_reg);
1394 		break;
1395 	case BPF_ALU64 | BPF_ARSH | BPF_K: /* ((s64) dst) >>= imm */
1396 		if (imm == 0)
1397 			break;
1398 		/* srag %dst,%dst,imm(%r0) */
1399 		EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, REG_0, imm);
1400 		break;
1401 	/*
1402 	 * BPF_NEG
1403 	 */
1404 	case BPF_ALU | BPF_NEG: /* dst = (u32) -dst */
1405 		/* lcr %dst,%dst */
1406 		EMIT2(0x1300, dst_reg, dst_reg);
1407 		EMIT_ZERO(dst_reg);
1408 		break;
1409 	case BPF_ALU64 | BPF_NEG: /* dst = -dst */
1410 		/* lcgr %dst,%dst */
1411 		EMIT4(0xb9030000, dst_reg, dst_reg);
1412 		break;
1413 	/*
1414 	 * BPF_FROM_BE/LE
1415 	 */
1416 	case BPF_ALU | BPF_END | BPF_FROM_BE:
1417 		/* s390 is big endian, therefore only clear high order bytes */
1418 		switch (imm) {
1419 		case 16: /* dst = (u16) cpu_to_be16(dst) */
1420 			/* llghr %dst,%dst */
1421 			EMIT4(0xb9850000, dst_reg, dst_reg);
1422 			if (insn_is_zext(&insn[1]))
1423 				insn_count = 2;
1424 			break;
1425 		case 32: /* dst = (u32) cpu_to_be32(dst) */
1426 			if (!fp->aux->verifier_zext)
1427 				/* llgfr %dst,%dst */
1428 				EMIT4(0xb9160000, dst_reg, dst_reg);
1429 			break;
1430 		case 64: /* dst = (u64) cpu_to_be64(dst) */
1431 			break;
1432 		}
1433 		break;
1434 	case BPF_ALU | BPF_END | BPF_FROM_LE:
1435 	case BPF_ALU64 | BPF_END | BPF_FROM_LE:
1436 		switch (imm) {
1437 		case 16: /* dst = (u16) cpu_to_le16(dst) */
1438 			/* lrvr %dst,%dst */
1439 			EMIT4(0xb91f0000, dst_reg, dst_reg);
1440 			/* srl %dst,16(%r0) */
1441 			EMIT4_DISP(0x88000000, dst_reg, REG_0, 16);
1442 			/* llghr %dst,%dst */
1443 			EMIT4(0xb9850000, dst_reg, dst_reg);
1444 			if (insn_is_zext(&insn[1]))
1445 				insn_count = 2;
1446 			break;
1447 		case 32: /* dst = (u32) cpu_to_le32(dst) */
1448 			/* lrvr %dst,%dst */
1449 			EMIT4(0xb91f0000, dst_reg, dst_reg);
1450 			if (!fp->aux->verifier_zext)
1451 				/* llgfr %dst,%dst */
1452 				EMIT4(0xb9160000, dst_reg, dst_reg);
1453 			break;
1454 		case 64: /* dst = (u64) cpu_to_le64(dst) */
1455 			/* lrvgr %dst,%dst */
1456 			EMIT4(0xb90f0000, dst_reg, dst_reg);
1457 			break;
1458 		}
1459 		break;
1460 	/*
1461 	 * BPF_NOSPEC (speculation barrier)
1462 	 */
1463 	case BPF_ST | BPF_NOSPEC:
1464 		break;
1465 	/*
1466 	 * BPF_ST(X)
1467 	 */
1468 	case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src_reg */
1469 	case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
1470 		bpf_jit_probe_store_pre(jit, insn, &probe);
1471 		/* stcy %src,off(%dst,%arena) */
1472 		EMIT6_DISP_LH(0xe3000000, 0x0072, src_reg, dst_reg,
1473 			      probe.arena_reg, off);
1474 		err = bpf_jit_probe_post(jit, fp, &probe);
1475 		if (err < 0)
1476 			return err;
1477 		jit->seen |= SEEN_MEM;
1478 		break;
1479 	case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
1480 	case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
1481 		bpf_jit_probe_store_pre(jit, insn, &probe);
1482 		/* sthy %src,off(%dst,%arena) */
1483 		EMIT6_DISP_LH(0xe3000000, 0x0070, src_reg, dst_reg,
1484 			      probe.arena_reg, off);
1485 		err = bpf_jit_probe_post(jit, fp, &probe);
1486 		if (err < 0)
1487 			return err;
1488 		jit->seen |= SEEN_MEM;
1489 		break;
1490 	case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
1491 	case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
1492 		bpf_jit_probe_store_pre(jit, insn, &probe);
1493 		/* sty %src,off(%dst,%arena) */
1494 		EMIT6_DISP_LH(0xe3000000, 0x0050, src_reg, dst_reg,
1495 			      probe.arena_reg, off);
1496 		err = bpf_jit_probe_post(jit, fp, &probe);
1497 		if (err < 0)
1498 			return err;
1499 		jit->seen |= SEEN_MEM;
1500 		break;
1501 	case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
1502 	case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
1503 		bpf_jit_probe_store_pre(jit, insn, &probe);
1504 		/* stg %src,off(%dst,%arena) */
1505 		EMIT6_DISP_LH(0xe3000000, 0x0024, src_reg, dst_reg,
1506 			      probe.arena_reg, off);
1507 		err = bpf_jit_probe_post(jit, fp, &probe);
1508 		if (err < 0)
1509 			return err;
1510 		jit->seen |= SEEN_MEM;
1511 		break;
1512 	case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
1513 	case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
1514 		/* lhi %w0,imm */
1515 		EMIT4_IMM(0xa7080000, REG_W0, (u8) imm);
1516 		bpf_jit_probe_store_pre(jit, insn, &probe);
1517 		/* stcy %w0,off(%dst,%arena) */
1518 		EMIT6_DISP_LH(0xe3000000, 0x0072, REG_W0, dst_reg,
1519 			      probe.arena_reg, off);
1520 		err = bpf_jit_probe_post(jit, fp, &probe);
1521 		if (err < 0)
1522 			return err;
1523 		jit->seen |= SEEN_MEM;
1524 		break;
1525 	case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
1526 	case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
1527 		/* lhi %w0,imm */
1528 		EMIT4_IMM(0xa7080000, REG_W0, (u16) imm);
1529 		bpf_jit_probe_store_pre(jit, insn, &probe);
1530 		/* sthy %w0,off(%dst,%arena) */
1531 		EMIT6_DISP_LH(0xe3000000, 0x0070, REG_W0, dst_reg,
1532 			      probe.arena_reg, off);
1533 		err = bpf_jit_probe_post(jit, fp, &probe);
1534 		if (err < 0)
1535 			return err;
1536 		jit->seen |= SEEN_MEM;
1537 		break;
1538 	case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
1539 	case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
1540 		/* llilf %w0,imm  */
1541 		EMIT6_IMM(0xc00f0000, REG_W0, (u32) imm);
1542 		bpf_jit_probe_store_pre(jit, insn, &probe);
1543 		/* sty %w0,off(%dst,%arena) */
1544 		EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, dst_reg,
1545 			      probe.arena_reg, off);
1546 		err = bpf_jit_probe_post(jit, fp, &probe);
1547 		if (err < 0)
1548 			return err;
1549 		jit->seen |= SEEN_MEM;
1550 		break;
1551 	case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
1552 	case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
1553 		/* lgfi %w0,imm */
1554 		EMIT6_IMM(0xc0010000, REG_W0, imm);
1555 		bpf_jit_probe_store_pre(jit, insn, &probe);
1556 		/* stg %w0,off(%dst,%arena) */
1557 		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, dst_reg,
1558 			      probe.arena_reg, off);
1559 		err = bpf_jit_probe_post(jit, fp, &probe);
1560 		if (err < 0)
1561 			return err;
1562 		jit->seen |= SEEN_MEM;
1563 		break;
1564 	/*
1565 	 * BPF_ATOMIC
1566 	 */
1567 	case BPF_STX | BPF_ATOMIC | BPF_DW:
1568 	case BPF_STX | BPF_ATOMIC | BPF_W:
1569 	case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
1570 	case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
1571 	{
1572 		bool is32 = BPF_SIZE(insn->code) == BPF_W;
1573 
1574 		/*
1575 		 * Unlike loads and stores, atomics have only a base register,
1576 		 * but no index register. For the non-arena case, simply use
1577 		 * %dst as a base. For the arena case, use the work register
1578 		 * %r1: first, load the arena base into it, and then add %dst
1579 		 * to it.
1580 		 */
1581 		probe.arena_reg = dst_reg;
1582 
1583 		switch (insn->imm) {
1584 #define EMIT_ATOMIC(op32, op64) do {					\
1585 	bpf_jit_probe_atomic_pre(jit, insn, &probe);			\
1586 	/* {op32|op64} {%w0|%src},%src,off(%arena) */			\
1587 	EMIT6_DISP_LH(0xeb000000, is32 ? (op32) : (op64),		\
1588 		      (insn->imm & BPF_FETCH) ? src_reg : REG_W0,	\
1589 		      src_reg, probe.arena_reg, off);			\
1590 	err = bpf_jit_probe_post(jit, fp, &probe);			\
1591 	if (err < 0)							\
1592 		return err;						\
1593 	if (insn->imm & BPF_FETCH) {					\
1594 		/* bcr 14,0 - see atomic_fetch_{add,and,or,xor}() */	\
1595 		_EMIT2(0x07e0);						\
1596 		if (is32)                                               \
1597 			EMIT_ZERO(src_reg);				\
1598 	}								\
1599 } while (0)
1600 		case BPF_ADD:
1601 		case BPF_ADD | BPF_FETCH:
1602 			/* {laal|laalg} */
1603 			EMIT_ATOMIC(0x00fa, 0x00ea);
1604 			break;
1605 		case BPF_AND:
1606 		case BPF_AND | BPF_FETCH:
1607 			/* {lan|lang} */
1608 			EMIT_ATOMIC(0x00f4, 0x00e4);
1609 			break;
1610 		case BPF_OR:
1611 		case BPF_OR | BPF_FETCH:
1612 			/* {lao|laog} */
1613 			EMIT_ATOMIC(0x00f6, 0x00e6);
1614 			break;
1615 		case BPF_XOR:
1616 		case BPF_XOR | BPF_FETCH:
1617 			/* {lax|laxg} */
1618 			EMIT_ATOMIC(0x00f7, 0x00e7);
1619 			break;
1620 #undef EMIT_ATOMIC
1621 		case BPF_XCHG: {
1622 			struct bpf_jit_probe load_probe = probe;
1623 			int loop_start;
1624 
1625 			bpf_jit_probe_atomic_pre(jit, insn, &load_probe);
1626 			/* {ly|lg} %w0,off(%arena) */
1627 			EMIT6_DISP_LH(0xe3000000,
1628 				      is32 ? 0x0058 : 0x0004, REG_W0, REG_0,
1629 				      load_probe.arena_reg, off);
1630 			bpf_jit_probe_emit_nop(jit, &load_probe);
1631 			/* Reuse {ly|lg}'s arena_reg for {csy|csg}. */
1632 			if (load_probe.prg != -1) {
1633 				probe.prg = jit->prg;
1634 				probe.arena_reg = load_probe.arena_reg;
1635 			}
1636 			loop_start = jit->prg;
1637 			/* 0: {csy|csg} %w0,%src,off(%arena) */
1638 			EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030,
1639 				      REG_W0, src_reg, probe.arena_reg, off);
1640 			bpf_jit_probe_emit_nop(jit, &probe);
1641 			/* brc 4,0b */
1642 			EMIT4_PCREL_RIC(0xa7040000, 4, loop_start);
1643 			/* {llgfr|lgr} %src,%w0 */
1644 			EMIT4(is32 ? 0xb9160000 : 0xb9040000, src_reg, REG_W0);
1645 			/* Both probes should land here on exception. */
1646 			err = bpf_jit_probe_post(jit, fp, &load_probe);
1647 			if (err < 0)
1648 				return err;
1649 			err = bpf_jit_probe_post(jit, fp, &probe);
1650 			if (err < 0)
1651 				return err;
1652 			if (is32 && insn_is_zext(&insn[1]))
1653 				insn_count = 2;
1654 			break;
1655 		}
1656 		case BPF_CMPXCHG:
1657 			bpf_jit_probe_atomic_pre(jit, insn, &probe);
1658 			/* 0: {csy|csg} %b0,%src,off(%arena) */
1659 			EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030,
1660 				      BPF_REG_0, src_reg,
1661 				      probe.arena_reg, off);
1662 			err = bpf_jit_probe_post(jit, fp, &probe);
1663 			if (err < 0)
1664 				return err;
1665 			break;
1666 		default:
1667 			pr_err("Unknown atomic operation %02x\n", insn->imm);
1668 			return -1;
1669 		}
1670 
1671 		jit->seen |= SEEN_MEM;
1672 		break;
1673 	}
1674 	/*
1675 	 * BPF_LDX
1676 	 */
1677 	case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
1678 	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
1679 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
1680 		bpf_jit_probe_load_pre(jit, insn, &probe);
1681 		/* llgc %dst,off(%src,%arena) */
1682 		EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg,
1683 			      probe.arena_reg, off);
1684 		err = bpf_jit_probe_post(jit, fp, &probe);
1685 		if (err < 0)
1686 			return err;
1687 		jit->seen |= SEEN_MEM;
1688 		if (insn_is_zext(&insn[1]))
1689 			insn_count = 2;
1690 		break;
1691 	case BPF_LDX | BPF_MEMSX | BPF_B: /* dst = *(s8 *)(ul) (src + off) */
1692 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
1693 		bpf_jit_probe_load_pre(jit, insn, &probe);
1694 		/* lgb %dst,off(%src) */
1695 		EMIT6_DISP_LH(0xe3000000, 0x0077, dst_reg, src_reg, REG_0, off);
1696 		err = bpf_jit_probe_post(jit, fp, &probe);
1697 		if (err < 0)
1698 			return err;
1699 		jit->seen |= SEEN_MEM;
1700 		break;
1701 	case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
1702 	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
1703 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
1704 		bpf_jit_probe_load_pre(jit, insn, &probe);
1705 		/* llgh %dst,off(%src,%arena) */
1706 		EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg,
1707 			      probe.arena_reg, off);
1708 		err = bpf_jit_probe_post(jit, fp, &probe);
1709 		if (err < 0)
1710 			return err;
1711 		jit->seen |= SEEN_MEM;
1712 		if (insn_is_zext(&insn[1]))
1713 			insn_count = 2;
1714 		break;
1715 	case BPF_LDX | BPF_MEMSX | BPF_H: /* dst = *(s16 *)(ul) (src + off) */
1716 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
1717 		bpf_jit_probe_load_pre(jit, insn, &probe);
1718 		/* lgh %dst,off(%src) */
1719 		EMIT6_DISP_LH(0xe3000000, 0x0015, dst_reg, src_reg, REG_0, off);
1720 		err = bpf_jit_probe_post(jit, fp, &probe);
1721 		if (err < 0)
1722 			return err;
1723 		jit->seen |= SEEN_MEM;
1724 		break;
1725 	case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
1726 	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
1727 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
1728 		bpf_jit_probe_load_pre(jit, insn, &probe);
1729 		/* llgf %dst,off(%src) */
1730 		jit->seen |= SEEN_MEM;
1731 		EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg,
1732 			      probe.arena_reg, off);
1733 		err = bpf_jit_probe_post(jit, fp, &probe);
1734 		if (err < 0)
1735 			return err;
1736 		if (insn_is_zext(&insn[1]))
1737 			insn_count = 2;
1738 		break;
1739 	case BPF_LDX | BPF_MEMSX | BPF_W: /* dst = *(s32 *)(ul) (src + off) */
1740 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
1741 		bpf_jit_probe_load_pre(jit, insn, &probe);
1742 		/* lgf %dst,off(%src) */
1743 		jit->seen |= SEEN_MEM;
1744 		EMIT6_DISP_LH(0xe3000000, 0x0014, dst_reg, src_reg, REG_0, off);
1745 		err = bpf_jit_probe_post(jit, fp, &probe);
1746 		if (err < 0)
1747 			return err;
1748 		break;
1749 	case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
1750 	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
1751 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
1752 		bpf_jit_probe_load_pre(jit, insn, &probe);
1753 		/* lg %dst,off(%src,%arena) */
1754 		jit->seen |= SEEN_MEM;
1755 		EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg,
1756 			      probe.arena_reg, off);
1757 		err = bpf_jit_probe_post(jit, fp, &probe);
1758 		if (err < 0)
1759 			return err;
1760 		break;
1761 	/*
1762 	 * BPF_JMP / CALL
1763 	 */
1764 	case BPF_JMP | BPF_CALL:
1765 	{
1766 		const struct btf_func_model *m;
1767 		bool func_addr_fixed;
1768 		int j, ret;
1769 		u64 func;
1770 
1771 		ret = bpf_jit_get_func_addr(fp, insn, extra_pass,
1772 					    &func, &func_addr_fixed);
1773 		if (ret < 0)
1774 			return -1;
1775 
1776 		REG_SET_SEEN(BPF_REG_5);
1777 		jit->seen |= SEEN_FUNC;
1778 
1779 		/*
1780 		 * Copy the tail call counter to where the callee expects it.
1781 		 */
1782 
1783 		if (insn->src_reg == BPF_PSEUDO_CALL)
1784 			/*
1785 			 * mvc tail_call_cnt(4,%r15),
1786 			 *     frame_off+tail_call_cnt(%r15)
1787 			 */
1788 			_EMIT6(0xd203f000 | offsetof(struct prog_frame,
1789 						     tail_call_cnt),
1790 			       0xf000 | (jit->frame_off +
1791 					 offsetof(struct prog_frame,
1792 						  tail_call_cnt)));
1793 
1794 		/* Sign-extend the kfunc arguments. */
1795 		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
1796 			m = bpf_jit_find_kfunc_model(fp, insn);
1797 			if (!m)
1798 				return -1;
1799 
1800 			for (j = 0; j < m->nr_args; j++) {
1801 				if (sign_extend(jit, BPF_REG_1 + j,
1802 						m->arg_size[j],
1803 						m->arg_flags[j]))
1804 					return -1;
1805 			}
1806 		}
1807 
1808 		if ((void *)func == arch_bpf_timed_may_goto) {
1809 			/*
1810 			 * arch_bpf_timed_may_goto() has a special ABI: the
1811 			 * parameters are in BPF_REG_AX and BPF_REG_10; the
1812 			 * return value is in BPF_REG_AX; and all GPRs except
1813 			 * REG_W0, REG_W1, and BPF_REG_AX are callee-saved.
1814 			 */
1815 
1816 			/* brasl %r0,func */
1817 			EMIT6_PCREL_RILB_PTR(0xc0050000, REG_0, (void *)func);
1818 		} else {
1819 			/* brasl %r14,func */
1820 			EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, (void *)func);
1821 			/* lgr %b0,%r2: load return value into %b0 */
1822 			EMIT4(0xb9040000, BPF_REG_0, REG_2);
1823 		}
1824 
1825 		/*
1826 		 * Copy the potentially updated tail call counter back.
1827 		 */
1828 
1829 		if (insn->src_reg == BPF_PSEUDO_CALL)
1830 			/*
1831 			 * mvc frame_off+tail_call_cnt(%r15),
1832 			 *     tail_call_cnt(4,%r15)
1833 			 */
1834 			_EMIT6(0xd203f000 | (jit->frame_off +
1835 					     offsetof(struct prog_frame,
1836 						      tail_call_cnt)),
1837 			       0xf000 | offsetof(struct prog_frame,
1838 						 tail_call_cnt));
1839 
1840 		break;
1841 	}
1842 	case BPF_JMP | BPF_TAIL_CALL: {
1843 		int patch_1_clrj, patch_2_clij, patch_3_brc;
1844 
1845 		/*
1846 		 * Implicit input:
1847 		 *  B1: pointer to ctx
1848 		 *  B2: pointer to bpf_array
1849 		 *  B3: index in bpf_array
1850 		 *
1851 		 * if (index >= array->map.max_entries)
1852 		 *         goto out;
1853 		 */
1854 
1855 		/* llgf %w1,map.max_entries(%b2) */
1856 		EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2,
1857 			      offsetof(struct bpf_array, map.max_entries));
1858 		/* if ((u32)%b3 >= (u32)%w1) goto out; */
1859 		/* clrj %b3,%w1,0xa,out */
1860 		patch_1_clrj = jit->prg;
1861 		EMIT6_PCREL_RIEB(0xec000000, 0x0077, BPF_REG_3, REG_W1, 0xa,
1862 				 jit->prg);
1863 
1864 		/*
1865 		 * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT)
1866 		 *         goto out;
1867 		 */
1868 
1869 		off = jit->frame_off +
1870 		      offsetof(struct prog_frame, tail_call_cnt);
1871 		/* lhi %w0,1 */
1872 		EMIT4_IMM(0xa7080000, REG_W0, 1);
1873 		/* laal %w1,%w0,off(%r15) */
1874 		EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
1875 		/* clij %w1,MAX_TAIL_CALL_CNT-1,0x2,out */
1876 		patch_2_clij = jit->prg;
1877 		EMIT6_PCREL_RIEC(0xec000000, 0x007f, REG_W1, MAX_TAIL_CALL_CNT - 1,
1878 				 2, jit->prg);
1879 
1880 		/*
1881 		 * prog = array->ptrs[index];
1882 		 * if (prog == NULL)
1883 		 *         goto out;
1884 		 */
1885 
1886 		/* llgfr %r1,%b3: %r1 = (u32) index */
1887 		EMIT4(0xb9160000, REG_1, BPF_REG_3);
1888 		/* sllg %r1,%r1,3: %r1 *= 8 */
1889 		EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, REG_1, REG_0, 3);
1890 		/* ltg %r1,prog(%b2,%r1) */
1891 		EMIT6_DISP_LH(0xe3000000, 0x0002, REG_1, BPF_REG_2,
1892 			      REG_1, offsetof(struct bpf_array, ptrs));
1893 		/* brc 0x8,out */
1894 		patch_3_brc = jit->prg;
1895 		EMIT4_PCREL_RIC(0xa7040000, 8, jit->prg);
1896 
1897 		/*
1898 		 * Restore registers before calling function
1899 		 */
1900 		save_restore_regs(jit, REGS_RESTORE, 0);
1901 
1902 		/*
1903 		 * goto *(prog->bpf_func + tail_call_start);
1904 		 */
1905 
1906 		/* lg %r1,bpf_func(%r1) */
1907 		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_1, REG_0,
1908 			      offsetof(struct bpf_prog, bpf_func));
1909 		if (nospec_uses_trampoline()) {
1910 			jit->seen |= SEEN_FUNC;
1911 			/* aghi %r1,tail_call_start */
1912 			EMIT4_IMM(0xa70b0000, REG_1, jit->tail_call_start);
1913 			/* brcl 0xf,__s390_indirect_jump_r1 */
1914 			EMIT6_PCREL_RILC_PTR(0xc0040000, 0xf,
1915 					     __s390_indirect_jump_r1);
1916 		} else {
1917 			/* bc 0xf,tail_call_start(%r1) */
1918 			_EMIT4(0x47f01000 + jit->tail_call_start);
1919 		}
1920 		/* out: */
1921 		if (jit->prg_buf) {
1922 			*(u16 *)(jit->prg_buf + patch_1_clrj + 2) =
1923 				(jit->prg - patch_1_clrj) >> 1;
1924 			*(u16 *)(jit->prg_buf + patch_2_clij + 2) =
1925 				(jit->prg - patch_2_clij) >> 1;
1926 			*(u16 *)(jit->prg_buf + patch_3_brc + 2) =
1927 				(jit->prg - patch_3_brc) >> 1;
1928 		}
1929 		break;
1930 	}
1931 	case BPF_JMP | BPF_EXIT: /* return b0 */
1932 		last = (i == fp->len - 1) ? 1 : 0;
1933 		if (last)
1934 			break;
1935 		if (!is_first_pass(jit) && can_use_rel(jit, jit->exit_ip))
1936 			/* brc 0xf, <exit> */
1937 			EMIT4_PCREL_RIC(0xa7040000, 0xf, jit->exit_ip);
1938 		else
1939 			/* brcl 0xf, <exit> */
1940 			EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->exit_ip);
1941 		break;
1942 	/*
1943 	 * Branch relative (number of skipped instructions) to offset on
1944 	 * condition.
1945 	 *
1946 	 * Condition code to mask mapping:
1947 	 *
1948 	 * CC | Description	   | Mask
1949 	 * ------------------------------
1950 	 * 0  | Operands equal	   |	8
1951 	 * 1  | First operand low  |	4
1952 	 * 2  | First operand high |	2
1953 	 * 3  | Unused		   |	1
1954 	 *
1955 	 * For s390x relative branches: ip = ip + off_bytes
1956 	 * For BPF relative branches:	insn = insn + off_insns + 1
1957 	 *
1958 	 * For example for s390x with offset 0 we jump to the branch
1959 	 * instruction itself (loop) and for BPF with offset 0 we
1960 	 * branch to the instruction behind the branch.
1961 	 */
1962 	case BPF_JMP32 | BPF_JA: /* if (true) */
1963 		branch_oc_off = imm;
1964 		fallthrough;
1965 	case BPF_JMP | BPF_JA: /* if (true) */
1966 		mask = 0xf000; /* j */
1967 		goto branch_oc;
1968 	case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */
1969 	case BPF_JMP32 | BPF_JSGT | BPF_K: /* ((s32) dst > (s32) imm) */
1970 		mask = 0x2000; /* jh */
1971 		goto branch_ks;
1972 	case BPF_JMP | BPF_JSLT | BPF_K: /* ((s64) dst < (s64) imm) */
1973 	case BPF_JMP32 | BPF_JSLT | BPF_K: /* ((s32) dst < (s32) imm) */
1974 		mask = 0x4000; /* jl */
1975 		goto branch_ks;
1976 	case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */
1977 	case BPF_JMP32 | BPF_JSGE | BPF_K: /* ((s32) dst >= (s32) imm) */
1978 		mask = 0xa000; /* jhe */
1979 		goto branch_ks;
1980 	case BPF_JMP | BPF_JSLE | BPF_K: /* ((s64) dst <= (s64) imm) */
1981 	case BPF_JMP32 | BPF_JSLE | BPF_K: /* ((s32) dst <= (s32) imm) */
1982 		mask = 0xc000; /* jle */
1983 		goto branch_ks;
1984 	case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */
1985 	case BPF_JMP32 | BPF_JGT | BPF_K: /* ((u32) dst_reg > (u32) imm) */
1986 		mask = 0x2000; /* jh */
1987 		goto branch_ku;
1988 	case BPF_JMP | BPF_JLT | BPF_K: /* (dst_reg < imm) */
1989 	case BPF_JMP32 | BPF_JLT | BPF_K: /* ((u32) dst_reg < (u32) imm) */
1990 		mask = 0x4000; /* jl */
1991 		goto branch_ku;
1992 	case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */
1993 	case BPF_JMP32 | BPF_JGE | BPF_K: /* ((u32) dst_reg >= (u32) imm) */
1994 		mask = 0xa000; /* jhe */
1995 		goto branch_ku;
1996 	case BPF_JMP | BPF_JLE | BPF_K: /* (dst_reg <= imm) */
1997 	case BPF_JMP32 | BPF_JLE | BPF_K: /* ((u32) dst_reg <= (u32) imm) */
1998 		mask = 0xc000; /* jle */
1999 		goto branch_ku;
2000 	case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */
2001 	case BPF_JMP32 | BPF_JNE | BPF_K: /* ((u32) dst_reg != (u32) imm) */
2002 		mask = 0x7000; /* jne */
2003 		goto branch_ku;
2004 	case BPF_JMP | BPF_JEQ | BPF_K: /* (dst_reg == imm) */
2005 	case BPF_JMP32 | BPF_JEQ | BPF_K: /* ((u32) dst_reg == (u32) imm) */
2006 		mask = 0x8000; /* je */
2007 		goto branch_ku;
2008 	case BPF_JMP | BPF_JSET | BPF_K: /* (dst_reg & imm) */
2009 	case BPF_JMP32 | BPF_JSET | BPF_K: /* ((u32) dst_reg & (u32) imm) */
2010 		mask = 0x7000; /* jnz */
2011 		if (BPF_CLASS(insn->code) == BPF_JMP32) {
2012 			/* llilf %w1,imm (load zero extend imm) */
2013 			EMIT6_IMM(0xc00f0000, REG_W1, imm);
2014 			/* nr %w1,%dst */
2015 			EMIT2(0x1400, REG_W1, dst_reg);
2016 		} else {
2017 			/* lgfi %w1,imm (load sign extend imm) */
2018 			EMIT6_IMM(0xc0010000, REG_W1, imm);
2019 			/* ngr %w1,%dst */
2020 			EMIT4(0xb9800000, REG_W1, dst_reg);
2021 		}
2022 		goto branch_oc;
2023 
2024 	case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */
2025 	case BPF_JMP32 | BPF_JSGT | BPF_X: /* ((s32) dst > (s32) src) */
2026 		mask = 0x2000; /* jh */
2027 		goto branch_xs;
2028 	case BPF_JMP | BPF_JSLT | BPF_X: /* ((s64) dst < (s64) src) */
2029 	case BPF_JMP32 | BPF_JSLT | BPF_X: /* ((s32) dst < (s32) src) */
2030 		mask = 0x4000; /* jl */
2031 		goto branch_xs;
2032 	case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */
2033 	case BPF_JMP32 | BPF_JSGE | BPF_X: /* ((s32) dst >= (s32) src) */
2034 		mask = 0xa000; /* jhe */
2035 		goto branch_xs;
2036 	case BPF_JMP | BPF_JSLE | BPF_X: /* ((s64) dst <= (s64) src) */
2037 	case BPF_JMP32 | BPF_JSLE | BPF_X: /* ((s32) dst <= (s32) src) */
2038 		mask = 0xc000; /* jle */
2039 		goto branch_xs;
2040 	case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */
2041 	case BPF_JMP32 | BPF_JGT | BPF_X: /* ((u32) dst > (u32) src) */
2042 		mask = 0x2000; /* jh */
2043 		goto branch_xu;
2044 	case BPF_JMP | BPF_JLT | BPF_X: /* (dst < src) */
2045 	case BPF_JMP32 | BPF_JLT | BPF_X: /* ((u32) dst < (u32) src) */
2046 		mask = 0x4000; /* jl */
2047 		goto branch_xu;
2048 	case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */
2049 	case BPF_JMP32 | BPF_JGE | BPF_X: /* ((u32) dst >= (u32) src) */
2050 		mask = 0xa000; /* jhe */
2051 		goto branch_xu;
2052 	case BPF_JMP | BPF_JLE | BPF_X: /* (dst <= src) */
2053 	case BPF_JMP32 | BPF_JLE | BPF_X: /* ((u32) dst <= (u32) src) */
2054 		mask = 0xc000; /* jle */
2055 		goto branch_xu;
2056 	case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */
2057 	case BPF_JMP32 | BPF_JNE | BPF_X: /* ((u32) dst != (u32) src) */
2058 		mask = 0x7000; /* jne */
2059 		goto branch_xu;
2060 	case BPF_JMP | BPF_JEQ | BPF_X: /* (dst == src) */
2061 	case BPF_JMP32 | BPF_JEQ | BPF_X: /* ((u32) dst == (u32) src) */
2062 		mask = 0x8000; /* je */
2063 		goto branch_xu;
2064 	case BPF_JMP | BPF_JSET | BPF_X: /* (dst & src) */
2065 	case BPF_JMP32 | BPF_JSET | BPF_X: /* ((u32) dst & (u32) src) */
2066 	{
2067 		bool is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
2068 
2069 		mask = 0x7000; /* jnz */
2070 		/* nrk or ngrk %w1,%dst,%src */
2071 		EMIT4_RRF((is_jmp32 ? 0xb9f40000 : 0xb9e40000),
2072 			  REG_W1, dst_reg, src_reg);
2073 		goto branch_oc;
2074 branch_ks:
2075 		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
2076 		/* cfi or cgfi %dst,imm */
2077 		EMIT6_IMM(is_jmp32 ? 0xc20d0000 : 0xc20c0000,
2078 			  dst_reg, imm);
2079 		if (!is_first_pass(jit) &&
2080 		    can_use_rel(jit, addrs[i + off + 1])) {
2081 			/* brc mask,off */
2082 			EMIT4_PCREL_RIC(0xa7040000,
2083 					mask >> 12, addrs[i + off + 1]);
2084 		} else {
2085 			/* brcl mask,off */
2086 			EMIT6_PCREL_RILC(0xc0040000,
2087 					 mask >> 12, addrs[i + off + 1]);
2088 		}
2089 		break;
2090 branch_ku:
2091 		/* lgfi %w1,imm (load sign extend imm) */
2092 		src_reg = REG_1;
2093 		EMIT6_IMM(0xc0010000, src_reg, imm);
2094 		goto branch_xu;
2095 branch_xs:
2096 		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
2097 		if (!is_first_pass(jit) &&
2098 		    can_use_rel(jit, addrs[i + off + 1])) {
2099 			/* crj or cgrj %dst,%src,mask,off */
2100 			EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
2101 				    dst_reg, src_reg, i, off, mask);
2102 		} else {
2103 			/* cr or cgr %dst,%src */
2104 			if (is_jmp32)
2105 				EMIT2(0x1900, dst_reg, src_reg);
2106 			else
2107 				EMIT4(0xb9200000, dst_reg, src_reg);
2108 			/* brcl mask,off */
2109 			EMIT6_PCREL_RILC(0xc0040000,
2110 					 mask >> 12, addrs[i + off + 1]);
2111 		}
2112 		break;
2113 branch_xu:
2114 		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
2115 		if (!is_first_pass(jit) &&
2116 		    can_use_rel(jit, addrs[i + off + 1])) {
2117 			/* clrj or clgrj %dst,%src,mask,off */
2118 			EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
2119 				    dst_reg, src_reg, i, off, mask);
2120 		} else {
2121 			/* clr or clgr %dst,%src */
2122 			if (is_jmp32)
2123 				EMIT2(0x1500, dst_reg, src_reg);
2124 			else
2125 				EMIT4(0xb9210000, dst_reg, src_reg);
2126 			/* brcl mask,off */
2127 			EMIT6_PCREL_RILC(0xc0040000,
2128 					 mask >> 12, addrs[i + off + 1]);
2129 		}
2130 		break;
2131 branch_oc:
2132 		if (!is_first_pass(jit) &&
2133 		    can_use_rel(jit, addrs[i + branch_oc_off + 1])) {
2134 			/* brc mask,off */
2135 			EMIT4_PCREL_RIC(0xa7040000,
2136 					mask >> 12,
2137 					addrs[i + branch_oc_off + 1]);
2138 		} else {
2139 			/* brcl mask,off */
2140 			EMIT6_PCREL_RILC(0xc0040000,
2141 					 mask >> 12,
2142 					 addrs[i + branch_oc_off + 1]);
2143 		}
2144 		break;
2145 	}
2146 	default: /* too complex, give up */
2147 		pr_err("Unknown opcode %02x\n", insn->code);
2148 		return -1;
2149 	}
2150 
2151 	return insn_count;
2152 }
2153 
2154 /*
2155  * Return whether new i-th instruction address does not violate any invariant
2156  */
2157 static bool bpf_is_new_addr_sane(struct bpf_jit *jit, int i)
2158 {
2159 	/* On the first pass anything goes */
2160 	if (is_first_pass(jit))
2161 		return true;
2162 
2163 	/* The codegen pass must not change anything */
2164 	if (is_codegen_pass(jit))
2165 		return jit->addrs[i] == jit->prg;
2166 
2167 	/* Passes in between must not increase code size */
2168 	return jit->addrs[i] >= jit->prg;
2169 }
2170 
2171 /*
2172  * Update the address of i-th instruction
2173  */
2174 static int bpf_set_addr(struct bpf_jit *jit, int i)
2175 {
2176 	int delta;
2177 
2178 	if (is_codegen_pass(jit)) {
2179 		delta = jit->prg - jit->addrs[i];
2180 		if (delta < 0)
2181 			bpf_skip(jit, -delta);
2182 	}
2183 	if (WARN_ON_ONCE(!bpf_is_new_addr_sane(jit, i)))
2184 		return -1;
2185 	jit->addrs[i] = jit->prg;
2186 	return 0;
2187 }
2188 
2189 /*
2190  * Compile eBPF program into s390x code
2191  */
2192 static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
2193 			bool extra_pass)
2194 {
2195 	int i, insn_count, lit32_size, lit64_size;
2196 	u64 kern_arena;
2197 
2198 	jit->lit32 = jit->lit32_start;
2199 	jit->lit64 = jit->lit64_start;
2200 	jit->prg = 0;
2201 	jit->excnt = 0;
2202 	if (is_first_pass(jit) || (jit->seen & SEEN_STACK))
2203 		jit->frame_off = sizeof(struct prog_frame) -
2204 				 offsetofend(struct prog_frame, unused) +
2205 				 round_up(fp->aux->stack_depth, 8);
2206 	else
2207 		jit->frame_off = 0;
2208 
2209 	kern_arena = bpf_arena_get_kern_vm_start(fp->aux->arena);
2210 	if (kern_arena)
2211 		jit->kern_arena = _EMIT_CONST_U64(kern_arena);
2212 	jit->user_arena = bpf_arena_get_user_vm_start(fp->aux->arena);
2213 
2214 	bpf_jit_prologue(jit, fp);
2215 	if (bpf_set_addr(jit, 0) < 0)
2216 		return -1;
2217 	for (i = 0; i < fp->len; i += insn_count) {
2218 		insn_count = bpf_jit_insn(jit, fp, i, extra_pass);
2219 		if (insn_count < 0)
2220 			return -1;
2221 		/* Next instruction address */
2222 		if (bpf_set_addr(jit, i + insn_count) < 0)
2223 			return -1;
2224 	}
2225 	bpf_jit_epilogue(jit);
2226 
2227 	lit32_size = jit->lit32 - jit->lit32_start;
2228 	lit64_size = jit->lit64 - jit->lit64_start;
2229 	jit->lit32_start = jit->prg;
2230 	if (lit32_size)
2231 		jit->lit32_start = ALIGN(jit->lit32_start, 4);
2232 	jit->lit64_start = jit->lit32_start + lit32_size;
2233 	if (lit64_size)
2234 		jit->lit64_start = ALIGN(jit->lit64_start, 8);
2235 	jit->size = jit->lit64_start + lit64_size;
2236 	jit->size_prg = jit->prg;
2237 
2238 	if (WARN_ON_ONCE(fp->aux->extable &&
2239 			 jit->excnt != fp->aux->num_exentries))
2240 		/* Verifier bug - too many entries. */
2241 		return -1;
2242 
2243 	return 0;
2244 }
2245 
2246 bool bpf_jit_needs_zext(void)
2247 {
2248 	return true;
2249 }
2250 
2251 struct s390_jit_data {
2252 	struct bpf_binary_header *header;
2253 	struct bpf_jit ctx;
2254 	int pass;
2255 };
2256 
2257 static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit,
2258 					       struct bpf_prog *fp)
2259 {
2260 	struct bpf_binary_header *header;
2261 	struct bpf_insn *insn;
2262 	u32 extable_size;
2263 	u32 code_size;
2264 	int i;
2265 
2266 	for (i = 0; i < fp->len; i++) {
2267 		insn = &fp->insnsi[i];
2268 
2269 		if (BPF_CLASS(insn->code) == BPF_STX &&
2270 		    BPF_MODE(insn->code) == BPF_PROBE_ATOMIC &&
2271 		    (BPF_SIZE(insn->code) == BPF_DW ||
2272 		     BPF_SIZE(insn->code) == BPF_W) &&
2273 		    insn->imm == BPF_XCHG)
2274 			/*
2275 			 * bpf_jit_insn() emits a load and a compare-and-swap,
2276 			 * both of which need to be probed.
2277 			 */
2278 			fp->aux->num_exentries += 1;
2279 	}
2280 	/* We need two entries per insn. */
2281 	fp->aux->num_exentries *= 2;
2282 
2283 	code_size = roundup(jit->size,
2284 			    __alignof__(struct exception_table_entry));
2285 	extable_size = fp->aux->num_exentries *
2286 		sizeof(struct exception_table_entry);
2287 	header = bpf_jit_binary_alloc(code_size + extable_size, &jit->prg_buf,
2288 				      8, jit_fill_hole);
2289 	if (!header)
2290 		return NULL;
2291 	fp->aux->extable = (struct exception_table_entry *)
2292 		(jit->prg_buf + code_size);
2293 	return header;
2294 }
2295 
2296 /*
2297  * Compile eBPF program "fp"
2298  */
2299 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
2300 {
2301 	struct bpf_prog *tmp, *orig_fp = fp;
2302 	struct bpf_binary_header *header;
2303 	struct s390_jit_data *jit_data;
2304 	bool tmp_blinded = false;
2305 	bool extra_pass = false;
2306 	struct bpf_jit jit;
2307 	int pass;
2308 
2309 	if (!fp->jit_requested)
2310 		return orig_fp;
2311 
2312 	tmp = bpf_jit_blind_constants(fp);
2313 	/*
2314 	 * If blinding was requested and we failed during blinding,
2315 	 * we must fall back to the interpreter.
2316 	 */
2317 	if (IS_ERR(tmp))
2318 		return orig_fp;
2319 	if (tmp != fp) {
2320 		tmp_blinded = true;
2321 		fp = tmp;
2322 	}
2323 
2324 	jit_data = fp->aux->jit_data;
2325 	if (!jit_data) {
2326 		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
2327 		if (!jit_data) {
2328 			fp = orig_fp;
2329 			goto out;
2330 		}
2331 		fp->aux->jit_data = jit_data;
2332 	}
2333 	if (jit_data->ctx.addrs) {
2334 		jit = jit_data->ctx;
2335 		header = jit_data->header;
2336 		extra_pass = true;
2337 		pass = jit_data->pass + 1;
2338 		goto skip_init_ctx;
2339 	}
2340 
2341 	memset(&jit, 0, sizeof(jit));
2342 	jit.addrs = kvcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
2343 	if (jit.addrs == NULL) {
2344 		fp = orig_fp;
2345 		goto free_addrs;
2346 	}
2347 	/*
2348 	 * Three initial passes:
2349 	 *   - 1/2: Determine clobbered registers
2350 	 *   - 3:   Calculate program size and addrs array
2351 	 */
2352 	for (pass = 1; pass <= 3; pass++) {
2353 		if (bpf_jit_prog(&jit, fp, extra_pass)) {
2354 			fp = orig_fp;
2355 			goto free_addrs;
2356 		}
2357 	}
2358 	/*
2359 	 * Final pass: Allocate and generate program
2360 	 */
2361 	header = bpf_jit_alloc(&jit, fp);
2362 	if (!header) {
2363 		fp = orig_fp;
2364 		goto free_addrs;
2365 	}
2366 skip_init_ctx:
2367 	if (bpf_jit_prog(&jit, fp, extra_pass)) {
2368 		bpf_jit_binary_free(header);
2369 		fp = orig_fp;
2370 		goto free_addrs;
2371 	}
2372 	if (bpf_jit_enable > 1) {
2373 		bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf);
2374 		print_fn_code(jit.prg_buf, jit.size_prg);
2375 	}
2376 	if (!fp->is_func || extra_pass) {
2377 		if (bpf_jit_binary_lock_ro(header)) {
2378 			bpf_jit_binary_free(header);
2379 			fp = orig_fp;
2380 			goto free_addrs;
2381 		}
2382 	} else {
2383 		jit_data->header = header;
2384 		jit_data->ctx = jit;
2385 		jit_data->pass = pass;
2386 	}
2387 	fp->bpf_func = (void *) jit.prg_buf;
2388 	fp->jited = 1;
2389 	fp->jited_len = jit.size;
2390 
2391 	if (!fp->is_func || extra_pass) {
2392 		bpf_prog_fill_jited_linfo(fp, jit.addrs + 1);
2393 free_addrs:
2394 		kvfree(jit.addrs);
2395 		kfree(jit_data);
2396 		fp->aux->jit_data = NULL;
2397 	}
2398 out:
2399 	if (tmp_blinded)
2400 		bpf_jit_prog_release_other(fp, fp == orig_fp ?
2401 					   tmp : orig_fp);
2402 	return fp;
2403 }
2404 
2405 bool bpf_jit_supports_kfunc_call(void)
2406 {
2407 	return true;
2408 }
2409 
2410 bool bpf_jit_supports_far_kfunc_call(void)
2411 {
2412 	return true;
2413 }
2414 
2415 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
2416 		       void *old_addr, void *new_addr)
2417 {
2418 	struct bpf_plt expected_plt, current_plt, new_plt, *plt;
2419 	struct {
2420 		u16 opc;
2421 		s32 disp;
2422 	} __packed insn;
2423 	char *ret;
2424 	int err;
2425 
2426 	/* Verify the branch to be patched. */
2427 	err = copy_from_kernel_nofault(&insn, ip, sizeof(insn));
2428 	if (err < 0)
2429 		return err;
2430 	if (insn.opc != (0xc004 | (old_addr ? 0xf0 : 0)))
2431 		return -EINVAL;
2432 
2433 	if (t == BPF_MOD_JUMP &&
2434 	    insn.disp == ((char *)new_addr - (char *)ip) >> 1) {
2435 		/*
2436 		 * The branch already points to the destination,
2437 		 * there is no PLT.
2438 		 */
2439 	} else {
2440 		/* Verify the PLT. */
2441 		plt = ip + (insn.disp << 1);
2442 		err = copy_from_kernel_nofault(&current_plt, plt,
2443 					       sizeof(current_plt));
2444 		if (err < 0)
2445 			return err;
2446 		ret = (char *)ip + 6;
2447 		bpf_jit_plt(&expected_plt, ret, old_addr);
2448 		if (memcmp(&current_plt, &expected_plt, sizeof(current_plt)))
2449 			return -EINVAL;
2450 		/* Adjust the call address. */
2451 		bpf_jit_plt(&new_plt, ret, new_addr);
2452 		s390_kernel_write(&plt->target, &new_plt.target,
2453 				  sizeof(void *));
2454 	}
2455 
2456 	/* Adjust the mask of the branch. */
2457 	insn.opc = 0xc004 | (new_addr ? 0xf0 : 0);
2458 	s390_kernel_write((char *)ip + 1, (char *)&insn.opc + 1, 1);
2459 
2460 	/* Make the new code visible to the other CPUs. */
2461 	text_poke_sync_lock();
2462 
2463 	return 0;
2464 }
2465 
2466 struct bpf_tramp_jit {
2467 	struct bpf_jit common;
2468 	int orig_stack_args_off;/* Offset of arguments placed on stack by the
2469 				 * func_addr's original caller
2470 				 */
2471 	int stack_size;		/* Trampoline stack size */
2472 	int backchain_off;	/* Offset of backchain */
2473 	int stack_args_off;	/* Offset of stack arguments for calling
2474 				 * func_addr, has to be at the top
2475 				 */
2476 	int reg_args_off;	/* Offset of register arguments for calling
2477 				 * func_addr
2478 				 */
2479 	int ip_off;		/* For bpf_get_func_ip(), has to be at
2480 				 * (ctx - 16)
2481 				 */
2482 	int arg_cnt_off;	/* For bpf_get_func_arg_cnt(), has to be at
2483 				 * (ctx - 8)
2484 				 */
2485 	int bpf_args_off;	/* Offset of BPF_PROG context, which consists
2486 				 * of BPF arguments followed by return value
2487 				 */
2488 	int retval_off;		/* Offset of return value (see above) */
2489 	int r7_r8_off;		/* Offset of saved %r7 and %r8, which are used
2490 				 * for __bpf_prog_enter() return value and
2491 				 * func_addr respectively
2492 				 */
2493 	int run_ctx_off;	/* Offset of struct bpf_tramp_run_ctx */
2494 	int tccnt_off;		/* Offset of saved tailcall counter */
2495 	int r14_off;		/* Offset of saved %r14, has to be at the
2496 				 * bottom */
2497 	int do_fexit;		/* do_fexit: label */
2498 };
2499 
2500 static void load_imm64(struct bpf_jit *jit, int dst_reg, u64 val)
2501 {
2502 	/* llihf %dst_reg,val_hi */
2503 	EMIT6_IMM(0xc00e0000, dst_reg, (val >> 32));
2504 	/* oilf %rdst_reg,val_lo */
2505 	EMIT6_IMM(0xc00d0000, dst_reg, val);
2506 }
2507 
2508 static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
2509 			   const struct btf_func_model *m,
2510 			   struct bpf_tramp_link *tlink, bool save_ret)
2511 {
2512 	struct bpf_jit *jit = &tjit->common;
2513 	int cookie_off = tjit->run_ctx_off +
2514 			 offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
2515 	struct bpf_prog *p = tlink->link.prog;
2516 	int patch;
2517 
2518 	/*
2519 	 * run_ctx.cookie = tlink->cookie;
2520 	 */
2521 
2522 	/* %r0 = tlink->cookie */
2523 	load_imm64(jit, REG_W0, tlink->cookie);
2524 	/* stg %r0,cookie_off(%r15) */
2525 	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, REG_0, REG_15, cookie_off);
2526 
2527 	/*
2528 	 * if ((start = __bpf_prog_enter(p, &run_ctx)) == 0)
2529 	 *         goto skip;
2530 	 */
2531 
2532 	/* %r2 = p */
2533 	load_imm64(jit, REG_2, (u64)p);
2534 	/* la %r3,run_ctx_off(%r15) */
2535 	EMIT4_DISP(0x41000000, REG_3, REG_15, tjit->run_ctx_off);
2536 	/* brasl %r14,__bpf_prog_enter */
2537 	EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, bpf_trampoline_enter(p));
2538 	/* ltgr %r7,%r2 */
2539 	EMIT4(0xb9020000, REG_7, REG_2);
2540 	/* brcl 8,skip */
2541 	patch = jit->prg;
2542 	EMIT6_PCREL_RILC(0xc0040000, 8, 0);
2543 
2544 	/*
2545 	 * retval = bpf_func(args, p->insnsi);
2546 	 */
2547 
2548 	/* la %r2,bpf_args_off(%r15) */
2549 	EMIT4_DISP(0x41000000, REG_2, REG_15, tjit->bpf_args_off);
2550 	/* %r3 = p->insnsi */
2551 	if (!p->jited)
2552 		load_imm64(jit, REG_3, (u64)p->insnsi);
2553 	/* brasl %r14,p->bpf_func */
2554 	EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, p->bpf_func);
2555 	/* stg %r2,retval_off(%r15) */
2556 	if (save_ret) {
2557 		if (sign_extend(jit, REG_2, m->ret_size, m->ret_flags))
2558 			return -1;
2559 		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
2560 			      tjit->retval_off);
2561 	}
2562 
2563 	/* skip: */
2564 	if (jit->prg_buf)
2565 		*(u32 *)&jit->prg_buf[patch + 2] = (jit->prg - patch) >> 1;
2566 
2567 	/*
2568 	 * __bpf_prog_exit(p, start, &run_ctx);
2569 	 */
2570 
2571 	/* %r2 = p */
2572 	load_imm64(jit, REG_2, (u64)p);
2573 	/* lgr %r3,%r7 */
2574 	EMIT4(0xb9040000, REG_3, REG_7);
2575 	/* la %r4,run_ctx_off(%r15) */
2576 	EMIT4_DISP(0x41000000, REG_4, REG_15, tjit->run_ctx_off);
2577 	/* brasl %r14,__bpf_prog_exit */
2578 	EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, bpf_trampoline_exit(p));
2579 
2580 	return 0;
2581 }
2582 
2583 static int alloc_stack(struct bpf_tramp_jit *tjit, size_t size)
2584 {
2585 	int stack_offset = tjit->stack_size;
2586 
2587 	tjit->stack_size += size;
2588 	return stack_offset;
2589 }
2590 
2591 /* ABI uses %r2 - %r6 for parameter passing. */
2592 #define MAX_NR_REG_ARGS 5
2593 
2594 /* The "L" field of the "mvc" instruction is 8 bits. */
2595 #define MAX_MVC_SIZE 256
2596 #define MAX_NR_STACK_ARGS (MAX_MVC_SIZE / sizeof(u64))
2597 
2598 /* -mfentry generates a 6-byte nop on s390x. */
2599 #define S390X_PATCH_SIZE 6
2600 
2601 static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
2602 					 struct bpf_tramp_jit *tjit,
2603 					 const struct btf_func_model *m,
2604 					 u32 flags,
2605 					 struct bpf_tramp_links *tlinks,
2606 					 void *func_addr)
2607 {
2608 	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
2609 	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
2610 	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
2611 	int nr_bpf_args, nr_reg_args, nr_stack_args;
2612 	struct bpf_jit *jit = &tjit->common;
2613 	int arg, bpf_arg_off;
2614 	int i, j;
2615 
2616 	/* Support as many stack arguments as "mvc" instruction can handle. */
2617 	nr_reg_args = min_t(int, m->nr_args, MAX_NR_REG_ARGS);
2618 	nr_stack_args = m->nr_args - nr_reg_args;
2619 	if (nr_stack_args > MAX_NR_STACK_ARGS)
2620 		return -ENOTSUPP;
2621 
2622 	/* Return to %r14 in the struct_ops case. */
2623 	if (flags & BPF_TRAMP_F_INDIRECT)
2624 		flags |= BPF_TRAMP_F_SKIP_FRAME;
2625 
2626 	/*
2627 	 * Compute how many arguments we need to pass to BPF programs.
2628 	 * BPF ABI mirrors that of x86_64: arguments that are 16 bytes or
2629 	 * smaller are packed into 1 or 2 registers; larger arguments are
2630 	 * passed via pointers.
2631 	 * In s390x ABI, arguments that are 8 bytes or smaller are packed into
2632 	 * a register; larger arguments are passed via pointers.
2633 	 * We need to deal with this difference.
2634 	 */
2635 	nr_bpf_args = 0;
2636 	for (i = 0; i < m->nr_args; i++) {
2637 		if (m->arg_size[i] <= 8)
2638 			nr_bpf_args += 1;
2639 		else if (m->arg_size[i] <= 16)
2640 			nr_bpf_args += 2;
2641 		else
2642 			return -ENOTSUPP;
2643 	}
2644 
2645 	/*
2646 	 * Calculate the stack layout.
2647 	 */
2648 
2649 	/*
2650 	 * Allocate STACK_FRAME_OVERHEAD bytes for the callees. As the s390x
2651 	 * ABI requires, put our backchain at the end of the allocated memory.
2652 	 */
2653 	tjit->stack_size = STACK_FRAME_OVERHEAD;
2654 	tjit->backchain_off = tjit->stack_size - sizeof(u64);
2655 	tjit->stack_args_off = alloc_stack(tjit, nr_stack_args * sizeof(u64));
2656 	tjit->reg_args_off = alloc_stack(tjit, nr_reg_args * sizeof(u64));
2657 	tjit->ip_off = alloc_stack(tjit, sizeof(u64));
2658 	tjit->arg_cnt_off = alloc_stack(tjit, sizeof(u64));
2659 	tjit->bpf_args_off = alloc_stack(tjit, nr_bpf_args * sizeof(u64));
2660 	tjit->retval_off = alloc_stack(tjit, sizeof(u64));
2661 	tjit->r7_r8_off = alloc_stack(tjit, 2 * sizeof(u64));
2662 	tjit->run_ctx_off = alloc_stack(tjit,
2663 					sizeof(struct bpf_tramp_run_ctx));
2664 	tjit->tccnt_off = alloc_stack(tjit, sizeof(u64));
2665 	tjit->r14_off = alloc_stack(tjit, sizeof(u64) * 2);
2666 	/*
2667 	 * In accordance with the s390x ABI, the caller has allocated
2668 	 * STACK_FRAME_OVERHEAD bytes for us. 8 of them contain the caller's
2669 	 * backchain, and the rest we can use.
2670 	 */
2671 	tjit->stack_size -= STACK_FRAME_OVERHEAD - sizeof(u64);
2672 	tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD;
2673 
2674 	/* lgr %r1,%r15 */
2675 	EMIT4(0xb9040000, REG_1, REG_15);
2676 	/* aghi %r15,-stack_size */
2677 	EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size);
2678 	/* stg %r1,backchain_off(%r15) */
2679 	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_1, REG_0, REG_15,
2680 		      tjit->backchain_off);
2681 	/* mvc tccnt_off(4,%r15),stack_size+tail_call_cnt(%r15) */
2682 	_EMIT6(0xd203f000 | tjit->tccnt_off,
2683 	       0xf000 | (tjit->stack_size +
2684 			 offsetof(struct prog_frame, tail_call_cnt)));
2685 	/* stmg %r2,%rN,fwd_reg_args_off(%r15) */
2686 	if (nr_reg_args)
2687 		EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
2688 			      REG_2 + (nr_reg_args - 1), REG_15,
2689 			      tjit->reg_args_off);
2690 	for (i = 0, j = 0; i < m->nr_args; i++) {
2691 		if (i < MAX_NR_REG_ARGS)
2692 			arg = REG_2 + i;
2693 		else
2694 			arg = tjit->orig_stack_args_off +
2695 			      (i - MAX_NR_REG_ARGS) * sizeof(u64);
2696 		bpf_arg_off = tjit->bpf_args_off + j * sizeof(u64);
2697 		if (m->arg_size[i] <= 8) {
2698 			if (i < MAX_NR_REG_ARGS)
2699 				/* stg %arg,bpf_arg_off(%r15) */
2700 				EMIT6_DISP_LH(0xe3000000, 0x0024, arg,
2701 					      REG_0, REG_15, bpf_arg_off);
2702 			else
2703 				/* mvc bpf_arg_off(8,%r15),arg(%r15) */
2704 				_EMIT6(0xd207f000 | bpf_arg_off,
2705 				       0xf000 | arg);
2706 			j += 1;
2707 		} else {
2708 			if (i < MAX_NR_REG_ARGS) {
2709 				/* mvc bpf_arg_off(16,%r15),0(%arg) */
2710 				_EMIT6(0xd20ff000 | bpf_arg_off,
2711 				       reg2hex[arg] << 12);
2712 			} else {
2713 				/* lg %r1,arg(%r15) */
2714 				EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_0,
2715 					      REG_15, arg);
2716 				/* mvc bpf_arg_off(16,%r15),0(%r1) */
2717 				_EMIT6(0xd20ff000 | bpf_arg_off, 0x1000);
2718 			}
2719 			j += 2;
2720 		}
2721 	}
2722 	/* stmg %r7,%r8,r7_r8_off(%r15) */
2723 	EMIT6_DISP_LH(0xeb000000, 0x0024, REG_7, REG_8, REG_15,
2724 		      tjit->r7_r8_off);
2725 	/* stg %r14,r14_off(%r15) */
2726 	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_14, REG_0, REG_15, tjit->r14_off);
2727 
2728 	if (flags & BPF_TRAMP_F_ORIG_STACK) {
2729 		/*
2730 		 * The ftrace trampoline puts the return address (which is the
2731 		 * address of the original function + S390X_PATCH_SIZE) into
2732 		 * %r0; see ftrace_shared_hotpatch_trampoline_br and
2733 		 * ftrace_init_nop() for details.
2734 		 */
2735 
2736 		/* lgr %r8,%r0 */
2737 		EMIT4(0xb9040000, REG_8, REG_0);
2738 	}
2739 
2740 	/*
2741 	 * ip = func_addr;
2742 	 * arg_cnt = m->nr_args;
2743 	 */
2744 
2745 	if (flags & BPF_TRAMP_F_IP_ARG) {
2746 		/* %r0 = func_addr */
2747 		load_imm64(jit, REG_0, (u64)func_addr);
2748 		/* stg %r0,ip_off(%r15) */
2749 		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
2750 			      tjit->ip_off);
2751 	}
2752 	/* lghi %r0,nr_bpf_args */
2753 	EMIT4_IMM(0xa7090000, REG_0, nr_bpf_args);
2754 	/* stg %r0,arg_cnt_off(%r15) */
2755 	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
2756 		      tjit->arg_cnt_off);
2757 
2758 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2759 		/*
2760 		 * __bpf_tramp_enter(im);
2761 		 */
2762 
2763 		/* %r2 = im */
2764 		load_imm64(jit, REG_2, (u64)im);
2765 		/* brasl %r14,__bpf_tramp_enter */
2766 		EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, __bpf_tramp_enter);
2767 	}
2768 
2769 	for (i = 0; i < fentry->nr_links; i++)
2770 		if (invoke_bpf_prog(tjit, m, fentry->links[i],
2771 				    flags & BPF_TRAMP_F_RET_FENTRY_RET))
2772 			return -EINVAL;
2773 
2774 	if (fmod_ret->nr_links) {
2775 		/*
2776 		 * retval = 0;
2777 		 */
2778 
2779 		/* xc retval_off(8,%r15),retval_off(%r15) */
2780 		_EMIT6(0xd707f000 | tjit->retval_off,
2781 		       0xf000 | tjit->retval_off);
2782 
2783 		for (i = 0; i < fmod_ret->nr_links; i++) {
2784 			if (invoke_bpf_prog(tjit, m, fmod_ret->links[i], true))
2785 				return -EINVAL;
2786 
2787 			/*
2788 			 * if (retval)
2789 			 *         goto do_fexit;
2790 			 */
2791 
2792 			/* ltg %r0,retval_off(%r15) */
2793 			EMIT6_DISP_LH(0xe3000000, 0x0002, REG_0, REG_0, REG_15,
2794 				      tjit->retval_off);
2795 			/* brcl 7,do_fexit */
2796 			EMIT6_PCREL_RILC(0xc0040000, 7, tjit->do_fexit);
2797 		}
2798 	}
2799 
2800 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2801 		/*
2802 		 * retval = func_addr(args);
2803 		 */
2804 
2805 		/* lmg %r2,%rN,reg_args_off(%r15) */
2806 		if (nr_reg_args)
2807 			EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2,
2808 				      REG_2 + (nr_reg_args - 1), REG_15,
2809 				      tjit->reg_args_off);
2810 		/* mvc stack_args_off(N,%r15),orig_stack_args_off(%r15) */
2811 		if (nr_stack_args)
2812 			_EMIT6(0xd200f000 |
2813 				       (nr_stack_args * sizeof(u64) - 1) << 16 |
2814 				       tjit->stack_args_off,
2815 			       0xf000 | tjit->orig_stack_args_off);
2816 		/* mvc tail_call_cnt(4,%r15),tccnt_off(%r15) */
2817 		_EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt),
2818 		       0xf000 | tjit->tccnt_off);
2819 		if (flags & BPF_TRAMP_F_ORIG_STACK) {
2820 			if (nospec_uses_trampoline())
2821 				/* brasl %r14,__s390_indirect_jump_r8 */
2822 				EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
2823 						     __s390_indirect_jump_r8);
2824 			else
2825 				/* basr %r14,%r8 */
2826 				EMIT2(0x0d00, REG_14, REG_8);
2827 		} else {
2828 			/* brasl %r14,func_addr+S390X_PATCH_SIZE */
2829 			EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
2830 					     func_addr + S390X_PATCH_SIZE);
2831 		}
2832 		/* stg %r2,retval_off(%r15) */
2833 		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
2834 			      tjit->retval_off);
2835 		/* mvc tccnt_off(%r15),tail_call_cnt(4,%r15) */
2836 		_EMIT6(0xd203f000 | tjit->tccnt_off,
2837 		       0xf000 | offsetof(struct prog_frame, tail_call_cnt));
2838 
2839 		im->ip_after_call = jit->prg_buf + jit->prg;
2840 
2841 		/*
2842 		 * The following nop will be patched by bpf_tramp_image_put().
2843 		 */
2844 
2845 		/* brcl 0,im->ip_epilogue */
2846 		EMIT6_PCREL_RILC(0xc0040000, 0, (u64)im->ip_epilogue);
2847 	}
2848 
2849 	/* do_fexit: */
2850 	tjit->do_fexit = jit->prg;
2851 	for (i = 0; i < fexit->nr_links; i++)
2852 		if (invoke_bpf_prog(tjit, m, fexit->links[i], false))
2853 			return -EINVAL;
2854 
2855 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2856 		im->ip_epilogue = jit->prg_buf + jit->prg;
2857 
2858 		/*
2859 		 * __bpf_tramp_exit(im);
2860 		 */
2861 
2862 		/* %r2 = im */
2863 		load_imm64(jit, REG_2, (u64)im);
2864 		/* brasl %r14,__bpf_tramp_exit */
2865 		EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, __bpf_tramp_exit);
2866 	}
2867 
2868 	/* lmg %r2,%rN,reg_args_off(%r15) */
2869 	if ((flags & BPF_TRAMP_F_RESTORE_REGS) && nr_reg_args)
2870 		EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2,
2871 			      REG_2 + (nr_reg_args - 1), REG_15,
2872 			      tjit->reg_args_off);
2873 	/* lgr %r1,%r8 */
2874 	if (!(flags & BPF_TRAMP_F_SKIP_FRAME) &&
2875 	    (flags & BPF_TRAMP_F_ORIG_STACK))
2876 		EMIT4(0xb9040000, REG_1, REG_8);
2877 	/* lmg %r7,%r8,r7_r8_off(%r15) */
2878 	EMIT6_DISP_LH(0xeb000000, 0x0004, REG_7, REG_8, REG_15,
2879 		      tjit->r7_r8_off);
2880 	/* lg %r14,r14_off(%r15) */
2881 	EMIT6_DISP_LH(0xe3000000, 0x0004, REG_14, REG_0, REG_15, tjit->r14_off);
2882 	/* lg %r2,retval_off(%r15) */
2883 	if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
2884 		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
2885 			      tjit->retval_off);
2886 	/* mvc stack_size+tail_call_cnt(4,%r15),tccnt_off(%r15) */
2887 	_EMIT6(0xd203f000 | (tjit->stack_size +
2888 			     offsetof(struct prog_frame, tail_call_cnt)),
2889 	       0xf000 | tjit->tccnt_off);
2890 	/* aghi %r15,stack_size */
2891 	EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
2892 	if (flags & BPF_TRAMP_F_SKIP_FRAME)
2893 		EMIT_JUMP_REG(14);
2894 	else if (flags & BPF_TRAMP_F_ORIG_STACK)
2895 		EMIT_JUMP_REG(1);
2896 	else
2897 		/* brcl 0xf,func_addr+S390X_PATCH_SIZE */
2898 		EMIT6_PCREL_RILC_PTR(0xc0040000, 0xf,
2899 				     func_addr + S390X_PATCH_SIZE);
2900 	return 0;
2901 }
2902 
2903 int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
2904 			     struct bpf_tramp_links *tlinks, void *orig_call)
2905 {
2906 	struct bpf_tramp_image im;
2907 	struct bpf_tramp_jit tjit;
2908 	int ret;
2909 
2910 	memset(&tjit, 0, sizeof(tjit));
2911 
2912 	ret = __arch_prepare_bpf_trampoline(&im, &tjit, m, flags,
2913 					    tlinks, orig_call);
2914 
2915 	return ret < 0 ? ret : tjit.common.prg;
2916 }
2917 
2918 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
2919 				void *image_end, const struct btf_func_model *m,
2920 				u32 flags, struct bpf_tramp_links *tlinks,
2921 				void *func_addr)
2922 {
2923 	struct bpf_tramp_jit tjit;
2924 	int ret;
2925 
2926 	/* Compute offsets, check whether the code fits. */
2927 	memset(&tjit, 0, sizeof(tjit));
2928 	ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
2929 					    tlinks, func_addr);
2930 
2931 	if (ret < 0)
2932 		return ret;
2933 	if (tjit.common.prg > (char *)image_end - (char *)image)
2934 		/*
2935 		 * Use the same error code as for exceeding
2936 		 * BPF_MAX_TRAMP_LINKS.
2937 		 */
2938 		return -E2BIG;
2939 
2940 	tjit.common.prg = 0;
2941 	tjit.common.prg_buf = image;
2942 	ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
2943 					    tlinks, func_addr);
2944 
2945 	return ret < 0 ? ret : tjit.common.prg;
2946 }
2947 
2948 bool bpf_jit_supports_subprog_tailcalls(void)
2949 {
2950 	return true;
2951 }
2952 
2953 bool bpf_jit_supports_arena(void)
2954 {
2955 	return true;
2956 }
2957 
2958 bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
2959 {
2960 	if (!in_arena)
2961 		return true;
2962 	switch (insn->code) {
2963 	case BPF_STX | BPF_ATOMIC | BPF_B:
2964 	case BPF_STX | BPF_ATOMIC | BPF_H:
2965 	case BPF_STX | BPF_ATOMIC | BPF_W:
2966 	case BPF_STX | BPF_ATOMIC | BPF_DW:
2967 		if (bpf_atomic_is_load_store(insn))
2968 			return false;
2969 		break;
2970 	case BPF_LDX | BPF_MEMSX | BPF_B:
2971 	case BPF_LDX | BPF_MEMSX | BPF_H:
2972 	case BPF_LDX | BPF_MEMSX | BPF_W:
2973 		return false;
2974 	}
2975 	return true;
2976 }
2977 
2978 bool bpf_jit_supports_exceptions(void)
2979 {
2980 	/*
2981 	 * Exceptions require unwinding support, which is always available,
2982 	 * because the kernel is always built with backchain.
2983 	 */
2984 	return true;
2985 }
2986 
2987 void arch_bpf_stack_walk(bool (*consume_fn)(void *, u64, u64, u64),
2988 			 void *cookie)
2989 {
2990 	unsigned long addr, prev_addr = 0;
2991 	struct unwind_state state;
2992 
2993 	unwind_for_each_frame(&state, NULL, NULL, 0) {
2994 		addr = unwind_get_return_address(&state);
2995 		if (!addr)
2996 			break;
2997 		/*
2998 		 * addr is a return address and state.sp is the value of %r15
2999 		 * at this address. exception_cb needs %r15 at entry to the
3000 		 * function containing addr, so take the next state.sp.
3001 		 *
3002 		 * There is no bp, and the exception_cb prog does not need one
3003 		 * to perform a quasi-longjmp. The common code requires a
3004 		 * non-zero bp, so pass sp there as well.
3005 		 */
3006 		if (prev_addr && !consume_fn(cookie, prev_addr, state.sp,
3007 					     state.sp))
3008 			break;
3009 		prev_addr = addr;
3010 	}
3011 }
3012 
3013 bool bpf_jit_supports_timed_may_goto(void)
3014 {
3015 	return true;
3016 }
3017