xref: /linux/arch/s390/net/bpf_jit_comp.c (revision f412eed9dfdeeb6becd7de2ffe8b5d0a8b3f81ca)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * BPF Jit compiler for s390.
4  *
5  * Minimum build requirements:
6  *
7  *  - HAVE_MARCH_Z196_FEATURES: laal, laalg
8  *  - HAVE_MARCH_Z10_FEATURES: msfi, cgrj, clgrj
9  *  - HAVE_MARCH_Z9_109_FEATURES: alfi, llilf, clfi, oilf, nilf
10  *  - PACK_STACK
11  *  - 64BIT
12  *
13  * Copyright IBM Corp. 2012,2015
14  *
15  * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
16  *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
17  */
18 
19 #define KMSG_COMPONENT "bpf_jit"
20 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
21 
22 #include <linux/netdevice.h>
23 #include <linux/filter.h>
24 #include <linux/init.h>
25 #include <linux/bpf.h>
26 #include <asm/cacheflush.h>
27 #include <asm/dis.h>
28 #include <asm/set_memory.h>
29 #include "bpf_jit.h"
30 
31 struct bpf_jit {
32 	u32 seen;		/* Flags to remember seen eBPF instructions */
33 	u32 seen_reg[16];	/* Array to remember which registers are used */
34 	u32 *addrs;		/* Array with relative instruction addresses */
35 	u8 *prg_buf;		/* Start of program */
36 	int size;		/* Size of program and literal pool */
37 	int size_prg;		/* Size of program */
38 	int prg;		/* Current position in program */
39 	int lit_start;		/* Start of literal pool */
40 	int lit;		/* Current position in literal pool */
41 	int base_ip;		/* Base address for literal pool */
42 	int ret0_ip;		/* Address of return 0 */
43 	int exit_ip;		/* Address of exit */
44 	int tail_call_start;	/* Tail call start offset */
45 	int labels[1];		/* Labels for local jumps */
46 };
47 
48 #define BPF_SIZE_MAX	0xffff	/* Max size for program (16 bit branches) */
49 
50 #define SEEN_MEM	(1 << 0)	/* use mem[] for temporary storage */
51 #define SEEN_RET0	(1 << 1)	/* ret0_ip points to a valid return 0 */
52 #define SEEN_LITERAL	(1 << 2)	/* code uses literals */
53 #define SEEN_FUNC	(1 << 3)	/* calls C functions */
54 #define SEEN_TAIL_CALL	(1 << 4)	/* code uses tail calls */
55 #define SEEN_REG_AX	(1 << 5)	/* code uses constant blinding */
56 #define SEEN_STACK	(SEEN_FUNC | SEEN_MEM)
57 
58 /*
59  * s390 registers
60  */
61 #define REG_W0		(MAX_BPF_JIT_REG + 0)	/* Work register 1 (even) */
62 #define REG_W1		(MAX_BPF_JIT_REG + 1)	/* Work register 2 (odd) */
63 #define REG_L		(MAX_BPF_JIT_REG + 2)	/* Literal pool register */
64 #define REG_15		(MAX_BPF_JIT_REG + 3)	/* Register 15 */
65 #define REG_0		REG_W0			/* Register 0 */
66 #define REG_1		REG_W1			/* Register 1 */
67 #define REG_2		BPF_REG_1		/* Register 2 */
68 #define REG_14		BPF_REG_0		/* Register 14 */
69 
70 /*
71  * Mapping of BPF registers to s390 registers
72  */
73 static const int reg2hex[] = {
74 	/* Return code */
75 	[BPF_REG_0]	= 14,
76 	/* Function parameters */
77 	[BPF_REG_1]	= 2,
78 	[BPF_REG_2]	= 3,
79 	[BPF_REG_3]	= 4,
80 	[BPF_REG_4]	= 5,
81 	[BPF_REG_5]	= 6,
82 	/* Call saved registers */
83 	[BPF_REG_6]	= 7,
84 	[BPF_REG_7]	= 8,
85 	[BPF_REG_8]	= 9,
86 	[BPF_REG_9]	= 10,
87 	/* BPF stack pointer */
88 	[BPF_REG_FP]	= 13,
89 	/* Register for blinding */
90 	[BPF_REG_AX]	= 12,
91 	/* Work registers for s390x backend */
92 	[REG_W0]	= 0,
93 	[REG_W1]	= 1,
94 	[REG_L]		= 11,
95 	[REG_15]	= 15,
96 };
97 
98 static inline u32 reg(u32 dst_reg, u32 src_reg)
99 {
100 	return reg2hex[dst_reg] << 4 | reg2hex[src_reg];
101 }
102 
103 static inline u32 reg_high(u32 reg)
104 {
105 	return reg2hex[reg] << 4;
106 }
107 
108 static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
109 {
110 	u32 r1 = reg2hex[b1];
111 
112 	if (!jit->seen_reg[r1] && r1 >= 6 && r1 <= 15)
113 		jit->seen_reg[r1] = 1;
114 }
115 
116 #define REG_SET_SEEN(b1)					\
117 ({								\
118 	reg_set_seen(jit, b1);					\
119 })
120 
121 #define REG_SEEN(b1) jit->seen_reg[reg2hex[(b1)]]
122 
123 /*
124  * EMIT macros for code generation
125  */
126 
127 #define _EMIT2(op)						\
128 ({								\
129 	if (jit->prg_buf)					\
130 		*(u16 *) (jit->prg_buf + jit->prg) = op;	\
131 	jit->prg += 2;						\
132 })
133 
134 #define EMIT2(op, b1, b2)					\
135 ({								\
136 	_EMIT2(op | reg(b1, b2));				\
137 	REG_SET_SEEN(b1);					\
138 	REG_SET_SEEN(b2);					\
139 })
140 
141 #define _EMIT4(op)						\
142 ({								\
143 	if (jit->prg_buf)					\
144 		*(u32 *) (jit->prg_buf + jit->prg) = op;	\
145 	jit->prg += 4;						\
146 })
147 
148 #define EMIT4(op, b1, b2)					\
149 ({								\
150 	_EMIT4(op | reg(b1, b2));				\
151 	REG_SET_SEEN(b1);					\
152 	REG_SET_SEEN(b2);					\
153 })
154 
155 #define EMIT4_RRF(op, b1, b2, b3)				\
156 ({								\
157 	_EMIT4(op | reg_high(b3) << 8 | reg(b1, b2));		\
158 	REG_SET_SEEN(b1);					\
159 	REG_SET_SEEN(b2);					\
160 	REG_SET_SEEN(b3);					\
161 })
162 
163 #define _EMIT4_DISP(op, disp)					\
164 ({								\
165 	unsigned int __disp = (disp) & 0xfff;			\
166 	_EMIT4(op | __disp);					\
167 })
168 
169 #define EMIT4_DISP(op, b1, b2, disp)				\
170 ({								\
171 	_EMIT4_DISP(op | reg_high(b1) << 16 |			\
172 		    reg_high(b2) << 8, disp);			\
173 	REG_SET_SEEN(b1);					\
174 	REG_SET_SEEN(b2);					\
175 })
176 
177 #define EMIT4_IMM(op, b1, imm)					\
178 ({								\
179 	unsigned int __imm = (imm) & 0xffff;			\
180 	_EMIT4(op | reg_high(b1) << 16 | __imm);		\
181 	REG_SET_SEEN(b1);					\
182 })
183 
184 #define EMIT4_PCREL(op, pcrel)					\
185 ({								\
186 	long __pcrel = ((pcrel) >> 1) & 0xffff;			\
187 	_EMIT4(op | __pcrel);					\
188 })
189 
190 #define _EMIT6(op1, op2)					\
191 ({								\
192 	if (jit->prg_buf) {					\
193 		*(u32 *) (jit->prg_buf + jit->prg) = op1;	\
194 		*(u16 *) (jit->prg_buf + jit->prg + 4) = op2;	\
195 	}							\
196 	jit->prg += 6;						\
197 })
198 
199 #define _EMIT6_DISP(op1, op2, disp)				\
200 ({								\
201 	unsigned int __disp = (disp) & 0xfff;			\
202 	_EMIT6(op1 | __disp, op2);				\
203 })
204 
205 #define _EMIT6_DISP_LH(op1, op2, disp)				\
206 ({								\
207 	u32 _disp = (u32) disp;					\
208 	unsigned int __disp_h = _disp & 0xff000;		\
209 	unsigned int __disp_l = _disp & 0x00fff;		\
210 	_EMIT6(op1 | __disp_l, op2 | __disp_h >> 4);		\
211 })
212 
213 #define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp)		\
214 ({								\
215 	_EMIT6_DISP_LH(op1 | reg(b1, b2) << 16 |		\
216 		       reg_high(b3) << 8, op2, disp);		\
217 	REG_SET_SEEN(b1);					\
218 	REG_SET_SEEN(b2);					\
219 	REG_SET_SEEN(b3);					\
220 })
221 
222 #define EMIT6_PCREL_LABEL(op1, op2, b1, b2, label, mask)	\
223 ({								\
224 	int rel = (jit->labels[label] - jit->prg) >> 1;		\
225 	_EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff),	\
226 	       op2 | mask << 12);				\
227 	REG_SET_SEEN(b1);					\
228 	REG_SET_SEEN(b2);					\
229 })
230 
231 #define EMIT6_PCREL_IMM_LABEL(op1, op2, b1, imm, label, mask)	\
232 ({								\
233 	int rel = (jit->labels[label] - jit->prg) >> 1;		\
234 	_EMIT6(op1 | (reg_high(b1) | mask) << 16 |		\
235 		(rel & 0xffff), op2 | (imm & 0xff) << 8);	\
236 	REG_SET_SEEN(b1);					\
237 	BUILD_BUG_ON(((unsigned long) imm) > 0xff);		\
238 })
239 
240 #define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask)		\
241 ({								\
242 	/* Branch instruction needs 6 bytes */			\
243 	int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\
244 	_EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask);	\
245 	REG_SET_SEEN(b1);					\
246 	REG_SET_SEEN(b2);					\
247 })
248 
249 #define _EMIT6_IMM(op, imm)					\
250 ({								\
251 	unsigned int __imm = (imm);				\
252 	_EMIT6(op | (__imm >> 16), __imm & 0xffff);		\
253 })
254 
255 #define EMIT6_IMM(op, b1, imm)					\
256 ({								\
257 	_EMIT6_IMM(op | reg_high(b1) << 16, imm);		\
258 	REG_SET_SEEN(b1);					\
259 })
260 
261 #define EMIT_CONST_U32(val)					\
262 ({								\
263 	unsigned int ret;					\
264 	ret = jit->lit - jit->base_ip;				\
265 	jit->seen |= SEEN_LITERAL;				\
266 	if (jit->prg_buf)					\
267 		*(u32 *) (jit->prg_buf + jit->lit) = (u32) val;	\
268 	jit->lit += 4;						\
269 	ret;							\
270 })
271 
272 #define EMIT_CONST_U64(val)					\
273 ({								\
274 	unsigned int ret;					\
275 	ret = jit->lit - jit->base_ip;				\
276 	jit->seen |= SEEN_LITERAL;				\
277 	if (jit->prg_buf)					\
278 		*(u64 *) (jit->prg_buf + jit->lit) = (u64) val;	\
279 	jit->lit += 8;						\
280 	ret;							\
281 })
282 
283 #define EMIT_ZERO(b1)						\
284 ({								\
285 	/* llgfr %dst,%dst (zero extend to 64 bit) */		\
286 	EMIT4(0xb9160000, b1, b1);				\
287 	REG_SET_SEEN(b1);					\
288 })
289 
290 /*
291  * Fill whole space with illegal instructions
292  */
293 static void jit_fill_hole(void *area, unsigned int size)
294 {
295 	memset(area, 0, size);
296 }
297 
298 /*
299  * Save registers from "rs" (register start) to "re" (register end) on stack
300  */
301 static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
302 {
303 	u32 off = STK_OFF_R6 + (rs - 6) * 8;
304 
305 	if (rs == re)
306 		/* stg %rs,off(%r15) */
307 		_EMIT6(0xe300f000 | rs << 20 | off, 0x0024);
308 	else
309 		/* stmg %rs,%re,off(%r15) */
310 		_EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0024, off);
311 }
312 
313 /*
314  * Restore registers from "rs" (register start) to "re" (register end) on stack
315  */
316 static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re, u32 stack_depth)
317 {
318 	u32 off = STK_OFF_R6 + (rs - 6) * 8;
319 
320 	if (jit->seen & SEEN_STACK)
321 		off += STK_OFF + stack_depth;
322 
323 	if (rs == re)
324 		/* lg %rs,off(%r15) */
325 		_EMIT6(0xe300f000 | rs << 20 | off, 0x0004);
326 	else
327 		/* lmg %rs,%re,off(%r15) */
328 		_EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0004, off);
329 }
330 
331 /*
332  * Return first seen register (from start)
333  */
334 static int get_start(struct bpf_jit *jit, int start)
335 {
336 	int i;
337 
338 	for (i = start; i <= 15; i++) {
339 		if (jit->seen_reg[i])
340 			return i;
341 	}
342 	return 0;
343 }
344 
345 /*
346  * Return last seen register (from start) (gap >= 2)
347  */
348 static int get_end(struct bpf_jit *jit, int start)
349 {
350 	int i;
351 
352 	for (i = start; i < 15; i++) {
353 		if (!jit->seen_reg[i] && !jit->seen_reg[i + 1])
354 			return i - 1;
355 	}
356 	return jit->seen_reg[15] ? 15 : 14;
357 }
358 
359 #define REGS_SAVE	1
360 #define REGS_RESTORE	0
361 /*
362  * Save and restore clobbered registers (6-15) on stack.
363  * We save/restore registers in chunks with gap >= 2 registers.
364  */
365 static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
366 {
367 
368 	int re = 6, rs;
369 
370 	do {
371 		rs = get_start(jit, re);
372 		if (!rs)
373 			break;
374 		re = get_end(jit, rs + 1);
375 		if (op == REGS_SAVE)
376 			save_regs(jit, rs, re);
377 		else
378 			restore_regs(jit, rs, re, stack_depth);
379 		re++;
380 	} while (re <= 15);
381 }
382 
383 /*
384  * Emit function prologue
385  *
386  * Save registers and create stack frame if necessary.
387  * See stack frame layout desription in "bpf_jit.h"!
388  */
389 static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
390 {
391 	if (jit->seen & SEEN_TAIL_CALL) {
392 		/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
393 		_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
394 	} else {
395 		/* j tail_call_start: NOP if no tail calls are used */
396 		EMIT4_PCREL(0xa7f40000, 6);
397 		_EMIT2(0);
398 	}
399 	/* Tail calls have to skip above initialization */
400 	jit->tail_call_start = jit->prg;
401 	/* Save registers */
402 	save_restore_regs(jit, REGS_SAVE, stack_depth);
403 	/* Setup literal pool */
404 	if (jit->seen & SEEN_LITERAL) {
405 		/* basr %r13,0 */
406 		EMIT2(0x0d00, REG_L, REG_0);
407 		jit->base_ip = jit->prg;
408 	}
409 	/* Setup stack and backchain */
410 	if (jit->seen & SEEN_STACK) {
411 		if (jit->seen & SEEN_FUNC)
412 			/* lgr %w1,%r15 (backchain) */
413 			EMIT4(0xb9040000, REG_W1, REG_15);
414 		/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
415 		EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
416 		/* aghi %r15,-STK_OFF */
417 		EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
418 		if (jit->seen & SEEN_FUNC)
419 			/* stg %w1,152(%r15) (backchain) */
420 			EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
421 				      REG_15, 152);
422 	}
423 }
424 
425 /*
426  * Function epilogue
427  */
428 static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
429 {
430 	/* Return 0 */
431 	if (jit->seen & SEEN_RET0) {
432 		jit->ret0_ip = jit->prg;
433 		/* lghi %b0,0 */
434 		EMIT4_IMM(0xa7090000, BPF_REG_0, 0);
435 	}
436 	jit->exit_ip = jit->prg;
437 	/* Load exit code: lgr %r2,%b0 */
438 	EMIT4(0xb9040000, REG_2, BPF_REG_0);
439 	/* Restore registers */
440 	save_restore_regs(jit, REGS_RESTORE, stack_depth);
441 	/* br %r14 */
442 	_EMIT2(0x07fe);
443 }
444 
445 /*
446  * Compile one eBPF instruction into s390x code
447  *
448  * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
449  * stack space for the large switch statement.
450  */
451 static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
452 {
453 	struct bpf_insn *insn = &fp->insnsi[i];
454 	int jmp_off, last, insn_count = 1;
455 	u32 dst_reg = insn->dst_reg;
456 	u32 src_reg = insn->src_reg;
457 	u32 *addrs = jit->addrs;
458 	s32 imm = insn->imm;
459 	s16 off = insn->off;
460 	unsigned int mask;
461 
462 	if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
463 		jit->seen |= SEEN_REG_AX;
464 	switch (insn->code) {
465 	/*
466 	 * BPF_MOV
467 	 */
468 	case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */
469 		/* llgfr %dst,%src */
470 		EMIT4(0xb9160000, dst_reg, src_reg);
471 		break;
472 	case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
473 		/* lgr %dst,%src */
474 		EMIT4(0xb9040000, dst_reg, src_reg);
475 		break;
476 	case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */
477 		/* llilf %dst,imm */
478 		EMIT6_IMM(0xc00f0000, dst_reg, imm);
479 		break;
480 	case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = imm */
481 		/* lgfi %dst,imm */
482 		EMIT6_IMM(0xc0010000, dst_reg, imm);
483 		break;
484 	/*
485 	 * BPF_LD 64
486 	 */
487 	case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
488 	{
489 		/* 16 byte instruction that uses two 'struct bpf_insn' */
490 		u64 imm64;
491 
492 		imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32;
493 		/* lg %dst,<d(imm)>(%l) */
494 		EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, REG_0, REG_L,
495 			      EMIT_CONST_U64(imm64));
496 		insn_count = 2;
497 		break;
498 	}
499 	/*
500 	 * BPF_ADD
501 	 */
502 	case BPF_ALU | BPF_ADD | BPF_X: /* dst = (u32) dst + (u32) src */
503 		/* ar %dst,%src */
504 		EMIT2(0x1a00, dst_reg, src_reg);
505 		EMIT_ZERO(dst_reg);
506 		break;
507 	case BPF_ALU64 | BPF_ADD | BPF_X: /* dst = dst + src */
508 		/* agr %dst,%src */
509 		EMIT4(0xb9080000, dst_reg, src_reg);
510 		break;
511 	case BPF_ALU | BPF_ADD | BPF_K: /* dst = (u32) dst + (u32) imm */
512 		if (!imm)
513 			break;
514 		/* alfi %dst,imm */
515 		EMIT6_IMM(0xc20b0000, dst_reg, imm);
516 		EMIT_ZERO(dst_reg);
517 		break;
518 	case BPF_ALU64 | BPF_ADD | BPF_K: /* dst = dst + imm */
519 		if (!imm)
520 			break;
521 		/* agfi %dst,imm */
522 		EMIT6_IMM(0xc2080000, dst_reg, imm);
523 		break;
524 	/*
525 	 * BPF_SUB
526 	 */
527 	case BPF_ALU | BPF_SUB | BPF_X: /* dst = (u32) dst - (u32) src */
528 		/* sr %dst,%src */
529 		EMIT2(0x1b00, dst_reg, src_reg);
530 		EMIT_ZERO(dst_reg);
531 		break;
532 	case BPF_ALU64 | BPF_SUB | BPF_X: /* dst = dst - src */
533 		/* sgr %dst,%src */
534 		EMIT4(0xb9090000, dst_reg, src_reg);
535 		break;
536 	case BPF_ALU | BPF_SUB | BPF_K: /* dst = (u32) dst - (u32) imm */
537 		if (!imm)
538 			break;
539 		/* alfi %dst,-imm */
540 		EMIT6_IMM(0xc20b0000, dst_reg, -imm);
541 		EMIT_ZERO(dst_reg);
542 		break;
543 	case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */
544 		if (!imm)
545 			break;
546 		/* agfi %dst,-imm */
547 		EMIT6_IMM(0xc2080000, dst_reg, -imm);
548 		break;
549 	/*
550 	 * BPF_MUL
551 	 */
552 	case BPF_ALU | BPF_MUL | BPF_X: /* dst = (u32) dst * (u32) src */
553 		/* msr %dst,%src */
554 		EMIT4(0xb2520000, dst_reg, src_reg);
555 		EMIT_ZERO(dst_reg);
556 		break;
557 	case BPF_ALU64 | BPF_MUL | BPF_X: /* dst = dst * src */
558 		/* msgr %dst,%src */
559 		EMIT4(0xb90c0000, dst_reg, src_reg);
560 		break;
561 	case BPF_ALU | BPF_MUL | BPF_K: /* dst = (u32) dst * (u32) imm */
562 		if (imm == 1)
563 			break;
564 		/* msfi %r5,imm */
565 		EMIT6_IMM(0xc2010000, dst_reg, imm);
566 		EMIT_ZERO(dst_reg);
567 		break;
568 	case BPF_ALU64 | BPF_MUL | BPF_K: /* dst = dst * imm */
569 		if (imm == 1)
570 			break;
571 		/* msgfi %dst,imm */
572 		EMIT6_IMM(0xc2000000, dst_reg, imm);
573 		break;
574 	/*
575 	 * BPF_DIV / BPF_MOD
576 	 */
577 	case BPF_ALU | BPF_DIV | BPF_X: /* dst = (u32) dst / (u32) src */
578 	case BPF_ALU | BPF_MOD | BPF_X: /* dst = (u32) dst % (u32) src */
579 	{
580 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
581 
582 		/* lhi %w0,0 */
583 		EMIT4_IMM(0xa7080000, REG_W0, 0);
584 		/* lr %w1,%dst */
585 		EMIT2(0x1800, REG_W1, dst_reg);
586 		/* dlr %w0,%src */
587 		EMIT4(0xb9970000, REG_W0, src_reg);
588 		/* llgfr %dst,%rc */
589 		EMIT4(0xb9160000, dst_reg, rc_reg);
590 		break;
591 	}
592 	case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
593 	case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */
594 	{
595 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
596 
597 		/* lghi %w0,0 */
598 		EMIT4_IMM(0xa7090000, REG_W0, 0);
599 		/* lgr %w1,%dst */
600 		EMIT4(0xb9040000, REG_W1, dst_reg);
601 		/* dlgr %w0,%dst */
602 		EMIT4(0xb9870000, REG_W0, src_reg);
603 		/* lgr %dst,%rc */
604 		EMIT4(0xb9040000, dst_reg, rc_reg);
605 		break;
606 	}
607 	case BPF_ALU | BPF_DIV | BPF_K: /* dst = (u32) dst / (u32) imm */
608 	case BPF_ALU | BPF_MOD | BPF_K: /* dst = (u32) dst % (u32) imm */
609 	{
610 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
611 
612 		if (imm == 1) {
613 			if (BPF_OP(insn->code) == BPF_MOD)
614 				/* lhgi %dst,0 */
615 				EMIT4_IMM(0xa7090000, dst_reg, 0);
616 			break;
617 		}
618 		/* lhi %w0,0 */
619 		EMIT4_IMM(0xa7080000, REG_W0, 0);
620 		/* lr %w1,%dst */
621 		EMIT2(0x1800, REG_W1, dst_reg);
622 		/* dl %w0,<d(imm)>(%l) */
623 		EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
624 			      EMIT_CONST_U32(imm));
625 		/* llgfr %dst,%rc */
626 		EMIT4(0xb9160000, dst_reg, rc_reg);
627 		break;
628 	}
629 	case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
630 	case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */
631 	{
632 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
633 
634 		if (imm == 1) {
635 			if (BPF_OP(insn->code) == BPF_MOD)
636 				/* lhgi %dst,0 */
637 				EMIT4_IMM(0xa7090000, dst_reg, 0);
638 			break;
639 		}
640 		/* lghi %w0,0 */
641 		EMIT4_IMM(0xa7090000, REG_W0, 0);
642 		/* lgr %w1,%dst */
643 		EMIT4(0xb9040000, REG_W1, dst_reg);
644 		/* dlg %w0,<d(imm)>(%l) */
645 		EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
646 			      EMIT_CONST_U64(imm));
647 		/* lgr %dst,%rc */
648 		EMIT4(0xb9040000, dst_reg, rc_reg);
649 		break;
650 	}
651 	/*
652 	 * BPF_AND
653 	 */
654 	case BPF_ALU | BPF_AND | BPF_X: /* dst = (u32) dst & (u32) src */
655 		/* nr %dst,%src */
656 		EMIT2(0x1400, dst_reg, src_reg);
657 		EMIT_ZERO(dst_reg);
658 		break;
659 	case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
660 		/* ngr %dst,%src */
661 		EMIT4(0xb9800000, dst_reg, src_reg);
662 		break;
663 	case BPF_ALU | BPF_AND | BPF_K: /* dst = (u32) dst & (u32) imm */
664 		/* nilf %dst,imm */
665 		EMIT6_IMM(0xc00b0000, dst_reg, imm);
666 		EMIT_ZERO(dst_reg);
667 		break;
668 	case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
669 		/* ng %dst,<d(imm)>(%l) */
670 		EMIT6_DISP_LH(0xe3000000, 0x0080, dst_reg, REG_0, REG_L,
671 			      EMIT_CONST_U64(imm));
672 		break;
673 	/*
674 	 * BPF_OR
675 	 */
676 	case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
677 		/* or %dst,%src */
678 		EMIT2(0x1600, dst_reg, src_reg);
679 		EMIT_ZERO(dst_reg);
680 		break;
681 	case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
682 		/* ogr %dst,%src */
683 		EMIT4(0xb9810000, dst_reg, src_reg);
684 		break;
685 	case BPF_ALU | BPF_OR | BPF_K: /* dst = (u32) dst | (u32) imm */
686 		/* oilf %dst,imm */
687 		EMIT6_IMM(0xc00d0000, dst_reg, imm);
688 		EMIT_ZERO(dst_reg);
689 		break;
690 	case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */
691 		/* og %dst,<d(imm)>(%l) */
692 		EMIT6_DISP_LH(0xe3000000, 0x0081, dst_reg, REG_0, REG_L,
693 			      EMIT_CONST_U64(imm));
694 		break;
695 	/*
696 	 * BPF_XOR
697 	 */
698 	case BPF_ALU | BPF_XOR | BPF_X: /* dst = (u32) dst ^ (u32) src */
699 		/* xr %dst,%src */
700 		EMIT2(0x1700, dst_reg, src_reg);
701 		EMIT_ZERO(dst_reg);
702 		break;
703 	case BPF_ALU64 | BPF_XOR | BPF_X: /* dst = dst ^ src */
704 		/* xgr %dst,%src */
705 		EMIT4(0xb9820000, dst_reg, src_reg);
706 		break;
707 	case BPF_ALU | BPF_XOR | BPF_K: /* dst = (u32) dst ^ (u32) imm */
708 		if (!imm)
709 			break;
710 		/* xilf %dst,imm */
711 		EMIT6_IMM(0xc0070000, dst_reg, imm);
712 		EMIT_ZERO(dst_reg);
713 		break;
714 	case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */
715 		/* xg %dst,<d(imm)>(%l) */
716 		EMIT6_DISP_LH(0xe3000000, 0x0082, dst_reg, REG_0, REG_L,
717 			      EMIT_CONST_U64(imm));
718 		break;
719 	/*
720 	 * BPF_LSH
721 	 */
722 	case BPF_ALU | BPF_LSH | BPF_X: /* dst = (u32) dst << (u32) src */
723 		/* sll %dst,0(%src) */
724 		EMIT4_DISP(0x89000000, dst_reg, src_reg, 0);
725 		EMIT_ZERO(dst_reg);
726 		break;
727 	case BPF_ALU64 | BPF_LSH | BPF_X: /* dst = dst << src */
728 		/* sllg %dst,%dst,0(%src) */
729 		EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, src_reg, 0);
730 		break;
731 	case BPF_ALU | BPF_LSH | BPF_K: /* dst = (u32) dst << (u32) imm */
732 		if (imm == 0)
733 			break;
734 		/* sll %dst,imm(%r0) */
735 		EMIT4_DISP(0x89000000, dst_reg, REG_0, imm);
736 		EMIT_ZERO(dst_reg);
737 		break;
738 	case BPF_ALU64 | BPF_LSH | BPF_K: /* dst = dst << imm */
739 		if (imm == 0)
740 			break;
741 		/* sllg %dst,%dst,imm(%r0) */
742 		EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, REG_0, imm);
743 		break;
744 	/*
745 	 * BPF_RSH
746 	 */
747 	case BPF_ALU | BPF_RSH | BPF_X: /* dst = (u32) dst >> (u32) src */
748 		/* srl %dst,0(%src) */
749 		EMIT4_DISP(0x88000000, dst_reg, src_reg, 0);
750 		EMIT_ZERO(dst_reg);
751 		break;
752 	case BPF_ALU64 | BPF_RSH | BPF_X: /* dst = dst >> src */
753 		/* srlg %dst,%dst,0(%src) */
754 		EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, src_reg, 0);
755 		break;
756 	case BPF_ALU | BPF_RSH | BPF_K: /* dst = (u32) dst >> (u32) imm */
757 		if (imm == 0)
758 			break;
759 		/* srl %dst,imm(%r0) */
760 		EMIT4_DISP(0x88000000, dst_reg, REG_0, imm);
761 		EMIT_ZERO(dst_reg);
762 		break;
763 	case BPF_ALU64 | BPF_RSH | BPF_K: /* dst = dst >> imm */
764 		if (imm == 0)
765 			break;
766 		/* srlg %dst,%dst,imm(%r0) */
767 		EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, REG_0, imm);
768 		break;
769 	/*
770 	 * BPF_ARSH
771 	 */
772 	case BPF_ALU64 | BPF_ARSH | BPF_X: /* ((s64) dst) >>= src */
773 		/* srag %dst,%dst,0(%src) */
774 		EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, src_reg, 0);
775 		break;
776 	case BPF_ALU64 | BPF_ARSH | BPF_K: /* ((s64) dst) >>= imm */
777 		if (imm == 0)
778 			break;
779 		/* srag %dst,%dst,imm(%r0) */
780 		EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, REG_0, imm);
781 		break;
782 	/*
783 	 * BPF_NEG
784 	 */
785 	case BPF_ALU | BPF_NEG: /* dst = (u32) -dst */
786 		/* lcr %dst,%dst */
787 		EMIT2(0x1300, dst_reg, dst_reg);
788 		EMIT_ZERO(dst_reg);
789 		break;
790 	case BPF_ALU64 | BPF_NEG: /* dst = -dst */
791 		/* lcgr %dst,%dst */
792 		EMIT4(0xb9130000, dst_reg, dst_reg);
793 		break;
794 	/*
795 	 * BPF_FROM_BE/LE
796 	 */
797 	case BPF_ALU | BPF_END | BPF_FROM_BE:
798 		/* s390 is big endian, therefore only clear high order bytes */
799 		switch (imm) {
800 		case 16: /* dst = (u16) cpu_to_be16(dst) */
801 			/* llghr %dst,%dst */
802 			EMIT4(0xb9850000, dst_reg, dst_reg);
803 			break;
804 		case 32: /* dst = (u32) cpu_to_be32(dst) */
805 			/* llgfr %dst,%dst */
806 			EMIT4(0xb9160000, dst_reg, dst_reg);
807 			break;
808 		case 64: /* dst = (u64) cpu_to_be64(dst) */
809 			break;
810 		}
811 		break;
812 	case BPF_ALU | BPF_END | BPF_FROM_LE:
813 		switch (imm) {
814 		case 16: /* dst = (u16) cpu_to_le16(dst) */
815 			/* lrvr %dst,%dst */
816 			EMIT4(0xb91f0000, dst_reg, dst_reg);
817 			/* srl %dst,16(%r0) */
818 			EMIT4_DISP(0x88000000, dst_reg, REG_0, 16);
819 			/* llghr %dst,%dst */
820 			EMIT4(0xb9850000, dst_reg, dst_reg);
821 			break;
822 		case 32: /* dst = (u32) cpu_to_le32(dst) */
823 			/* lrvr %dst,%dst */
824 			EMIT4(0xb91f0000, dst_reg, dst_reg);
825 			/* llgfr %dst,%dst */
826 			EMIT4(0xb9160000, dst_reg, dst_reg);
827 			break;
828 		case 64: /* dst = (u64) cpu_to_le64(dst) */
829 			/* lrvgr %dst,%dst */
830 			EMIT4(0xb90f0000, dst_reg, dst_reg);
831 			break;
832 		}
833 		break;
834 	/*
835 	 * BPF_ST(X)
836 	 */
837 	case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src_reg */
838 		/* stcy %src,off(%dst) */
839 		EMIT6_DISP_LH(0xe3000000, 0x0072, src_reg, dst_reg, REG_0, off);
840 		jit->seen |= SEEN_MEM;
841 		break;
842 	case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
843 		/* sthy %src,off(%dst) */
844 		EMIT6_DISP_LH(0xe3000000, 0x0070, src_reg, dst_reg, REG_0, off);
845 		jit->seen |= SEEN_MEM;
846 		break;
847 	case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
848 		/* sty %src,off(%dst) */
849 		EMIT6_DISP_LH(0xe3000000, 0x0050, src_reg, dst_reg, REG_0, off);
850 		jit->seen |= SEEN_MEM;
851 		break;
852 	case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
853 		/* stg %src,off(%dst) */
854 		EMIT6_DISP_LH(0xe3000000, 0x0024, src_reg, dst_reg, REG_0, off);
855 		jit->seen |= SEEN_MEM;
856 		break;
857 	case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
858 		/* lhi %w0,imm */
859 		EMIT4_IMM(0xa7080000, REG_W0, (u8) imm);
860 		/* stcy %w0,off(dst) */
861 		EMIT6_DISP_LH(0xe3000000, 0x0072, REG_W0, dst_reg, REG_0, off);
862 		jit->seen |= SEEN_MEM;
863 		break;
864 	case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
865 		/* lhi %w0,imm */
866 		EMIT4_IMM(0xa7080000, REG_W0, (u16) imm);
867 		/* sthy %w0,off(dst) */
868 		EMIT6_DISP_LH(0xe3000000, 0x0070, REG_W0, dst_reg, REG_0, off);
869 		jit->seen |= SEEN_MEM;
870 		break;
871 	case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
872 		/* llilf %w0,imm  */
873 		EMIT6_IMM(0xc00f0000, REG_W0, (u32) imm);
874 		/* sty %w0,off(%dst) */
875 		EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, dst_reg, REG_0, off);
876 		jit->seen |= SEEN_MEM;
877 		break;
878 	case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
879 		/* lgfi %w0,imm */
880 		EMIT6_IMM(0xc0010000, REG_W0, imm);
881 		/* stg %w0,off(%dst) */
882 		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, dst_reg, REG_0, off);
883 		jit->seen |= SEEN_MEM;
884 		break;
885 	/*
886 	 * BPF_STX XADD (atomic_add)
887 	 */
888 	case BPF_STX | BPF_XADD | BPF_W: /* *(u32 *)(dst + off) += src */
889 		/* laal %w0,%src,off(%dst) */
890 		EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W0, src_reg,
891 			      dst_reg, off);
892 		jit->seen |= SEEN_MEM;
893 		break;
894 	case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src */
895 		/* laalg %w0,%src,off(%dst) */
896 		EMIT6_DISP_LH(0xeb000000, 0x00ea, REG_W0, src_reg,
897 			      dst_reg, off);
898 		jit->seen |= SEEN_MEM;
899 		break;
900 	/*
901 	 * BPF_LDX
902 	 */
903 	case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
904 		/* llgc %dst,0(off,%src) */
905 		EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off);
906 		jit->seen |= SEEN_MEM;
907 		break;
908 	case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
909 		/* llgh %dst,0(off,%src) */
910 		EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off);
911 		jit->seen |= SEEN_MEM;
912 		break;
913 	case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
914 		/* llgf %dst,off(%src) */
915 		jit->seen |= SEEN_MEM;
916 		EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off);
917 		break;
918 	case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
919 		/* lg %dst,0(off,%src) */
920 		jit->seen |= SEEN_MEM;
921 		EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off);
922 		break;
923 	/*
924 	 * BPF_JMP / CALL
925 	 */
926 	case BPF_JMP | BPF_CALL:
927 	{
928 		/*
929 		 * b0 = (__bpf_call_base + imm)(b1, b2, b3, b4, b5)
930 		 */
931 		const u64 func = (u64)__bpf_call_base + imm;
932 
933 		REG_SET_SEEN(BPF_REG_5);
934 		jit->seen |= SEEN_FUNC;
935 		/* lg %w1,<d(imm)>(%l) */
936 		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
937 			      EMIT_CONST_U64(func));
938 		/* basr %r14,%w1 */
939 		EMIT2(0x0d00, REG_14, REG_W1);
940 		/* lgr %b0,%r2: load return value into %b0 */
941 		EMIT4(0xb9040000, BPF_REG_0, REG_2);
942 		break;
943 	}
944 	case BPF_JMP | BPF_TAIL_CALL:
945 		/*
946 		 * Implicit input:
947 		 *  B1: pointer to ctx
948 		 *  B2: pointer to bpf_array
949 		 *  B3: index in bpf_array
950 		 */
951 		jit->seen |= SEEN_TAIL_CALL;
952 
953 		/*
954 		 * if (index >= array->map.max_entries)
955 		 *         goto out;
956 		 */
957 
958 		/* llgf %w1,map.max_entries(%b2) */
959 		EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2,
960 			      offsetof(struct bpf_array, map.max_entries));
961 		/* clgrj %b3,%w1,0xa,label0: if %b3 >= %w1 goto out */
962 		EMIT6_PCREL_LABEL(0xec000000, 0x0065, BPF_REG_3,
963 				  REG_W1, 0, 0xa);
964 
965 		/*
966 		 * if (tail_call_cnt++ > MAX_TAIL_CALL_CNT)
967 		 *         goto out;
968 		 */
969 
970 		if (jit->seen & SEEN_STACK)
971 			off = STK_OFF_TCCNT + STK_OFF + fp->aux->stack_depth;
972 		else
973 			off = STK_OFF_TCCNT;
974 		/* lhi %w0,1 */
975 		EMIT4_IMM(0xa7080000, REG_W0, 1);
976 		/* laal %w1,%w0,off(%r15) */
977 		EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
978 		/* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */
979 		EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1,
980 				      MAX_TAIL_CALL_CNT, 0, 0x2);
981 
982 		/*
983 		 * prog = array->ptrs[index];
984 		 * if (prog == NULL)
985 		 *         goto out;
986 		 */
987 
988 		/* sllg %r1,%b3,3: %r1 = index * 8 */
989 		EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, BPF_REG_3, REG_0, 3);
990 		/* lg %r1,prog(%b2,%r1) */
991 		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, BPF_REG_2,
992 			      REG_1, offsetof(struct bpf_array, ptrs));
993 		/* clgij %r1,0,0x8,label0 */
994 		EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007d, REG_1, 0, 0, 0x8);
995 
996 		/*
997 		 * Restore registers before calling function
998 		 */
999 		save_restore_regs(jit, REGS_RESTORE, fp->aux->stack_depth);
1000 
1001 		/*
1002 		 * goto *(prog->bpf_func + tail_call_start);
1003 		 */
1004 
1005 		/* lg %r1,bpf_func(%r1) */
1006 		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_1, REG_0,
1007 			      offsetof(struct bpf_prog, bpf_func));
1008 		/* bc 0xf,tail_call_start(%r1) */
1009 		_EMIT4(0x47f01000 + jit->tail_call_start);
1010 		/* out: */
1011 		jit->labels[0] = jit->prg;
1012 		break;
1013 	case BPF_JMP | BPF_EXIT: /* return b0 */
1014 		last = (i == fp->len - 1) ? 1 : 0;
1015 		if (last && !(jit->seen & SEEN_RET0))
1016 			break;
1017 		/* j <exit> */
1018 		EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
1019 		break;
1020 	/*
1021 	 * Branch relative (number of skipped instructions) to offset on
1022 	 * condition.
1023 	 *
1024 	 * Condition code to mask mapping:
1025 	 *
1026 	 * CC | Description	   | Mask
1027 	 * ------------------------------
1028 	 * 0  | Operands equal	   |	8
1029 	 * 1  | First operand low  |	4
1030 	 * 2  | First operand high |	2
1031 	 * 3  | Unused		   |	1
1032 	 *
1033 	 * For s390x relative branches: ip = ip + off_bytes
1034 	 * For BPF relative branches:	insn = insn + off_insns + 1
1035 	 *
1036 	 * For example for s390x with offset 0 we jump to the branch
1037 	 * instruction itself (loop) and for BPF with offset 0 we
1038 	 * branch to the instruction behind the branch.
1039 	 */
1040 	case BPF_JMP | BPF_JA: /* if (true) */
1041 		mask = 0xf000; /* j */
1042 		goto branch_oc;
1043 	case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */
1044 		mask = 0x2000; /* jh */
1045 		goto branch_ks;
1046 	case BPF_JMP | BPF_JSLT | BPF_K: /* ((s64) dst < (s64) imm) */
1047 		mask = 0x4000; /* jl */
1048 		goto branch_ks;
1049 	case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */
1050 		mask = 0xa000; /* jhe */
1051 		goto branch_ks;
1052 	case BPF_JMP | BPF_JSLE | BPF_K: /* ((s64) dst <= (s64) imm) */
1053 		mask = 0xc000; /* jle */
1054 		goto branch_ks;
1055 	case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */
1056 		mask = 0x2000; /* jh */
1057 		goto branch_ku;
1058 	case BPF_JMP | BPF_JLT | BPF_K: /* (dst_reg < imm) */
1059 		mask = 0x4000; /* jl */
1060 		goto branch_ku;
1061 	case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */
1062 		mask = 0xa000; /* jhe */
1063 		goto branch_ku;
1064 	case BPF_JMP | BPF_JLE | BPF_K: /* (dst_reg <= imm) */
1065 		mask = 0xc000; /* jle */
1066 		goto branch_ku;
1067 	case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */
1068 		mask = 0x7000; /* jne */
1069 		goto branch_ku;
1070 	case BPF_JMP | BPF_JEQ | BPF_K: /* (dst_reg == imm) */
1071 		mask = 0x8000; /* je */
1072 		goto branch_ku;
1073 	case BPF_JMP | BPF_JSET | BPF_K: /* (dst_reg & imm) */
1074 		mask = 0x7000; /* jnz */
1075 		/* lgfi %w1,imm (load sign extend imm) */
1076 		EMIT6_IMM(0xc0010000, REG_W1, imm);
1077 		/* ngr %w1,%dst */
1078 		EMIT4(0xb9800000, REG_W1, dst_reg);
1079 		goto branch_oc;
1080 
1081 	case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */
1082 		mask = 0x2000; /* jh */
1083 		goto branch_xs;
1084 	case BPF_JMP | BPF_JSLT | BPF_X: /* ((s64) dst < (s64) src) */
1085 		mask = 0x4000; /* jl */
1086 		goto branch_xs;
1087 	case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */
1088 		mask = 0xa000; /* jhe */
1089 		goto branch_xs;
1090 	case BPF_JMP | BPF_JSLE | BPF_X: /* ((s64) dst <= (s64) src) */
1091 		mask = 0xc000; /* jle */
1092 		goto branch_xs;
1093 	case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */
1094 		mask = 0x2000; /* jh */
1095 		goto branch_xu;
1096 	case BPF_JMP | BPF_JLT | BPF_X: /* (dst < src) */
1097 		mask = 0x4000; /* jl */
1098 		goto branch_xu;
1099 	case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */
1100 		mask = 0xa000; /* jhe */
1101 		goto branch_xu;
1102 	case BPF_JMP | BPF_JLE | BPF_X: /* (dst <= src) */
1103 		mask = 0xc000; /* jle */
1104 		goto branch_xu;
1105 	case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */
1106 		mask = 0x7000; /* jne */
1107 		goto branch_xu;
1108 	case BPF_JMP | BPF_JEQ | BPF_X: /* (dst == src) */
1109 		mask = 0x8000; /* je */
1110 		goto branch_xu;
1111 	case BPF_JMP | BPF_JSET | BPF_X: /* (dst & src) */
1112 		mask = 0x7000; /* jnz */
1113 		/* ngrk %w1,%dst,%src */
1114 		EMIT4_RRF(0xb9e40000, REG_W1, dst_reg, src_reg);
1115 		goto branch_oc;
1116 branch_ks:
1117 		/* lgfi %w1,imm (load sign extend imm) */
1118 		EMIT6_IMM(0xc0010000, REG_W1, imm);
1119 		/* cgrj %dst,%w1,mask,off */
1120 		EMIT6_PCREL(0xec000000, 0x0064, dst_reg, REG_W1, i, off, mask);
1121 		break;
1122 branch_ku:
1123 		/* lgfi %w1,imm (load sign extend imm) */
1124 		EMIT6_IMM(0xc0010000, REG_W1, imm);
1125 		/* clgrj %dst,%w1,mask,off */
1126 		EMIT6_PCREL(0xec000000, 0x0065, dst_reg, REG_W1, i, off, mask);
1127 		break;
1128 branch_xs:
1129 		/* cgrj %dst,%src,mask,off */
1130 		EMIT6_PCREL(0xec000000, 0x0064, dst_reg, src_reg, i, off, mask);
1131 		break;
1132 branch_xu:
1133 		/* clgrj %dst,%src,mask,off */
1134 		EMIT6_PCREL(0xec000000, 0x0065, dst_reg, src_reg, i, off, mask);
1135 		break;
1136 branch_oc:
1137 		/* brc mask,jmp_off (branch instruction needs 4 bytes) */
1138 		jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
1139 		EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
1140 		break;
1141 	default: /* too complex, give up */
1142 		pr_err("Unknown opcode %02x\n", insn->code);
1143 		return -1;
1144 	}
1145 	return insn_count;
1146 }
1147 
1148 /*
1149  * Compile eBPF program into s390x code
1150  */
1151 static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)
1152 {
1153 	int i, insn_count;
1154 
1155 	jit->lit = jit->lit_start;
1156 	jit->prg = 0;
1157 
1158 	bpf_jit_prologue(jit, fp->aux->stack_depth);
1159 	for (i = 0; i < fp->len; i += insn_count) {
1160 		insn_count = bpf_jit_insn(jit, fp, i);
1161 		if (insn_count < 0)
1162 			return -1;
1163 		/* Next instruction address */
1164 		jit->addrs[i + insn_count] = jit->prg;
1165 	}
1166 	bpf_jit_epilogue(jit, fp->aux->stack_depth);
1167 
1168 	jit->lit_start = jit->prg;
1169 	jit->size = jit->lit;
1170 	jit->size_prg = jit->prg;
1171 	return 0;
1172 }
1173 
1174 /*
1175  * Compile eBPF program "fp"
1176  */
1177 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
1178 {
1179 	struct bpf_prog *tmp, *orig_fp = fp;
1180 	struct bpf_binary_header *header;
1181 	bool tmp_blinded = false;
1182 	struct bpf_jit jit;
1183 	int pass;
1184 
1185 	if (!fp->jit_requested)
1186 		return orig_fp;
1187 
1188 	tmp = bpf_jit_blind_constants(fp);
1189 	/*
1190 	 * If blinding was requested and we failed during blinding,
1191 	 * we must fall back to the interpreter.
1192 	 */
1193 	if (IS_ERR(tmp))
1194 		return orig_fp;
1195 	if (tmp != fp) {
1196 		tmp_blinded = true;
1197 		fp = tmp;
1198 	}
1199 
1200 	memset(&jit, 0, sizeof(jit));
1201 	jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
1202 	if (jit.addrs == NULL) {
1203 		fp = orig_fp;
1204 		goto out;
1205 	}
1206 	/*
1207 	 * Three initial passes:
1208 	 *   - 1/2: Determine clobbered registers
1209 	 *   - 3:   Calculate program size and addrs arrray
1210 	 */
1211 	for (pass = 1; pass <= 3; pass++) {
1212 		if (bpf_jit_prog(&jit, fp)) {
1213 			fp = orig_fp;
1214 			goto free_addrs;
1215 		}
1216 	}
1217 	/*
1218 	 * Final pass: Allocate and generate program
1219 	 */
1220 	if (jit.size >= BPF_SIZE_MAX) {
1221 		fp = orig_fp;
1222 		goto free_addrs;
1223 	}
1224 	header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole);
1225 	if (!header) {
1226 		fp = orig_fp;
1227 		goto free_addrs;
1228 	}
1229 	if (bpf_jit_prog(&jit, fp)) {
1230 		fp = orig_fp;
1231 		goto free_addrs;
1232 	}
1233 	if (bpf_jit_enable > 1) {
1234 		bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf);
1235 		print_fn_code(jit.prg_buf, jit.size_prg);
1236 	}
1237 	bpf_jit_binary_lock_ro(header);
1238 	fp->bpf_func = (void *) jit.prg_buf;
1239 	fp->jited = 1;
1240 	fp->jited_len = jit.size;
1241 free_addrs:
1242 	kfree(jit.addrs);
1243 out:
1244 	if (tmp_blinded)
1245 		bpf_jit_prog_release_other(fp, fp == orig_fp ?
1246 					   tmp : orig_fp);
1247 	return fp;
1248 }
1249