xref: /linux/samples/bpf/xdp_fwd_kern.c (revision a44e4f3ab16bc808590763a543a93b6fbf3abcc4)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2017-18 David Ahern <dsahern@gmail.com>
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  */
13 #define KBUILD_MODNAME "foo"
14 #include <uapi/linux/bpf.h>
15 #include <linux/in.h>
16 #include <linux/if_ether.h>
17 #include <linux/if_packet.h>
18 #include <linux/if_vlan.h>
19 #include <linux/ip.h>
20 #include <linux/ipv6.h>
21 
22 #include "bpf_helpers.h"
23 
24 #define IPV6_FLOWINFO_MASK              cpu_to_be32(0x0FFFFFFF)
25 
26 /* For TX-traffic redirect requires net_device ifindex to be in this devmap */
27 struct bpf_map_def SEC("maps") xdp_tx_ports = {
28 	.type = BPF_MAP_TYPE_DEVMAP,
29 	.key_size = sizeof(int),
30 	.value_size = sizeof(int),
31 	.max_entries = 64,
32 };
33 
34 /* from include/net/ip.h */
35 static __always_inline int ip_decrease_ttl(struct iphdr *iph)
36 {
37 	u32 check = (__force u32)iph->check;
38 
39 	check += (__force u32)htons(0x0100);
40 	iph->check = (__force __sum16)(check + (check >= 0xFFFF));
41 	return --iph->ttl;
42 }
43 
44 static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
45 {
46 	void *data_end = (void *)(long)ctx->data_end;
47 	void *data = (void *)(long)ctx->data;
48 	struct bpf_fib_lookup fib_params;
49 	struct ethhdr *eth = data;
50 	struct ipv6hdr *ip6h;
51 	struct iphdr *iph;
52 	u16 h_proto;
53 	u64 nh_off;
54 	int rc;
55 
56 	nh_off = sizeof(*eth);
57 	if (data + nh_off > data_end)
58 		return XDP_DROP;
59 
60 	__builtin_memset(&fib_params, 0, sizeof(fib_params));
61 
62 	h_proto = eth->h_proto;
63 	if (h_proto == htons(ETH_P_IP)) {
64 		iph = data + nh_off;
65 
66 		if (iph + 1 > data_end)
67 			return XDP_DROP;
68 
69 		if (iph->ttl <= 1)
70 			return XDP_PASS;
71 
72 		fib_params.family	= AF_INET;
73 		fib_params.tos		= iph->tos;
74 		fib_params.l4_protocol	= iph->protocol;
75 		fib_params.sport	= 0;
76 		fib_params.dport	= 0;
77 		fib_params.tot_len	= ntohs(iph->tot_len);
78 		fib_params.ipv4_src	= iph->saddr;
79 		fib_params.ipv4_dst	= iph->daddr;
80 	} else if (h_proto == htons(ETH_P_IPV6)) {
81 		struct in6_addr *src = (struct in6_addr *) fib_params.ipv6_src;
82 		struct in6_addr *dst = (struct in6_addr *) fib_params.ipv6_dst;
83 
84 		ip6h = data + nh_off;
85 		if (ip6h + 1 > data_end)
86 			return XDP_DROP;
87 
88 		if (ip6h->hop_limit <= 1)
89 			return XDP_PASS;
90 
91 		fib_params.family	= AF_INET6;
92 		fib_params.flowinfo	= *(__be32 *)ip6h & IPV6_FLOWINFO_MASK;
93 		fib_params.l4_protocol	= ip6h->nexthdr;
94 		fib_params.sport	= 0;
95 		fib_params.dport	= 0;
96 		fib_params.tot_len	= ntohs(ip6h->payload_len);
97 		*src			= ip6h->saddr;
98 		*dst			= ip6h->daddr;
99 	} else {
100 		return XDP_PASS;
101 	}
102 
103 	fib_params.ifindex = ctx->ingress_ifindex;
104 
105 	rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
106 	/*
107 	 * Some rc (return codes) from bpf_fib_lookup() are important,
108 	 * to understand how this XDP-prog interacts with network stack.
109 	 *
110 	 * BPF_FIB_LKUP_RET_NO_NEIGH:
111 	 *  Even if route lookup was a success, then the MAC-addresses are also
112 	 *  needed.  This is obtained from arp/neighbour table, but if table is
113 	 *  (still) empty then BPF_FIB_LKUP_RET_NO_NEIGH is returned.  To avoid
114 	 *  doing ARP lookup directly from XDP, then send packet to normal
115 	 *  network stack via XDP_PASS and expect it will do ARP resolution.
116 	 *
117 	 * BPF_FIB_LKUP_RET_FWD_DISABLED:
118 	 *  The bpf_fib_lookup respect sysctl net.ipv{4,6}.conf.all.forwarding
119 	 *  setting, and will return BPF_FIB_LKUP_RET_FWD_DISABLED if not
120 	 *  enabled this on ingress device.
121 	 */
122 	if (rc == BPF_FIB_LKUP_RET_SUCCESS) {
123 		/* Verify egress index has been configured as TX-port.
124 		 * (Note: User can still have inserted an egress ifindex that
125 		 * doesn't support XDP xmit, which will result in packet drops).
126 		 *
127 		 * Note: lookup in devmap supported since 0cdbb4b09a0.
128 		 * If not supported will fail with:
129 		 *  cannot pass map_type 14 into func bpf_map_lookup_elem#1:
130 		 */
131 		if (!bpf_map_lookup_elem(&xdp_tx_ports, &fib_params.ifindex))
132 			return XDP_PASS;
133 
134 		if (h_proto == htons(ETH_P_IP))
135 			ip_decrease_ttl(iph);
136 		else if (h_proto == htons(ETH_P_IPV6))
137 			ip6h->hop_limit--;
138 
139 		memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
140 		memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
141 		return bpf_redirect_map(&xdp_tx_ports, fib_params.ifindex, 0);
142 	}
143 
144 	return XDP_PASS;
145 }
146 
147 SEC("xdp_fwd")
148 int xdp_fwd_prog(struct xdp_md *ctx)
149 {
150 	return xdp_fwd_flags(ctx, 0);
151 }
152 
153 SEC("xdp_fwd_direct")
154 int xdp_fwd_direct_prog(struct xdp_md *ctx)
155 {
156 	return xdp_fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT);
157 }
158 
159 char _license[] SEC("license") = "GPL";
160