xref: /linux/net/netfilter/nf_flow_table_offload.c (revision a5210135489ae7bc1ef1cb4a8157361dd7b468cd)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/init.h>
4 #include <linux/module.h>
5 #include <linux/netfilter.h>
6 #include <linux/rhashtable.h>
7 #include <linux/netdevice.h>
8 #include <linux/tc_act/tc_csum.h>
9 #include <net/flow_offload.h>
10 #include <net/ip_tunnels.h>
11 #include <net/netfilter/nf_flow_table.h>
12 #include <net/netfilter/nf_tables.h>
13 #include <net/netfilter/nf_conntrack.h>
14 #include <net/netfilter/nf_conntrack_acct.h>
15 #include <net/netfilter/nf_conntrack_core.h>
16 #include <net/netfilter/nf_conntrack_tuple.h>
17 
18 #define NF_FLOW_RULE_ACTION_MAX	24
19 
20 static struct workqueue_struct *nf_flow_offload_add_wq;
21 static struct workqueue_struct *nf_flow_offload_del_wq;
22 static struct workqueue_struct *nf_flow_offload_stats_wq;
23 
24 struct flow_offload_work {
25 	struct list_head	list;
26 	enum flow_cls_command	cmd;
27 	struct nf_flowtable	*flowtable;
28 	struct flow_offload	*flow;
29 	struct work_struct	work;
30 };
31 
32 #define NF_FLOW_DISSECTOR(__match, __type, __field)	\
33 	(__match)->dissector.offset[__type] =		\
34 		offsetof(struct nf_flow_key, __field)
35 
nf_flow_rule_lwt_match(struct nf_flow_match * match,struct ip_tunnel_info * tun_info)36 static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
37 				   struct ip_tunnel_info *tun_info)
38 {
39 	struct nf_flow_key *mask = &match->mask;
40 	struct nf_flow_key *key = &match->key;
41 	unsigned long long enc_keys;
42 
43 	if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
44 		return;
45 
46 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
47 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
48 	key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
49 	mask->enc_key_id.keyid = 0xffffffff;
50 	enc_keys = BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
51 		   BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL);
52 
53 	if (ip_tunnel_info_af(tun_info) == AF_INET) {
54 		NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
55 				  enc_ipv4);
56 		key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
57 		key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
58 		if (key->enc_ipv4.src)
59 			mask->enc_ipv4.src = 0xffffffff;
60 		if (key->enc_ipv4.dst)
61 			mask->enc_ipv4.dst = 0xffffffff;
62 		enc_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
63 		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
64 	} else {
65 		memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
66 		       sizeof(struct in6_addr));
67 		memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
68 		       sizeof(struct in6_addr));
69 		if (memcmp(&key->enc_ipv6.src, &in6addr_any,
70 			   sizeof(struct in6_addr)))
71 			memset(&mask->enc_ipv6.src, 0xff,
72 			       sizeof(struct in6_addr));
73 		if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
74 			   sizeof(struct in6_addr)))
75 			memset(&mask->enc_ipv6.dst, 0xff,
76 			       sizeof(struct in6_addr));
77 		enc_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
78 		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
79 	}
80 
81 	match->dissector.used_keys |= enc_keys;
82 }
83 
nf_flow_rule_vlan_match(struct flow_dissector_key_vlan * key,struct flow_dissector_key_vlan * mask,u16 vlan_id,__be16 proto)84 static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
85 				    struct flow_dissector_key_vlan *mask,
86 				    u16 vlan_id, __be16 proto)
87 {
88 	key->vlan_id = vlan_id;
89 	mask->vlan_id = VLAN_VID_MASK;
90 	key->vlan_tpid = proto;
91 	mask->vlan_tpid = 0xffff;
92 }
93 
nf_flow_rule_match(struct nf_flow_match * match,const struct flow_offload_tuple * tuple,struct dst_entry * other_dst)94 static int nf_flow_rule_match(struct nf_flow_match *match,
95 			      const struct flow_offload_tuple *tuple,
96 			      struct dst_entry *other_dst)
97 {
98 	struct nf_flow_key *mask = &match->mask;
99 	struct nf_flow_key *key = &match->key;
100 	struct ip_tunnel_info *tun_info;
101 	bool vlan_encap = false;
102 
103 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
104 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
105 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
106 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
107 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
108 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
109 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
110 
111 	if (other_dst && other_dst->lwtstate) {
112 		tun_info = lwt_tun_info(other_dst->lwtstate);
113 		nf_flow_rule_lwt_match(match, tun_info);
114 	}
115 
116 	if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_TC)
117 		key->meta.ingress_ifindex = tuple->tc.iifidx;
118 	else
119 		key->meta.ingress_ifindex = tuple->iifidx;
120 
121 	mask->meta.ingress_ifindex = 0xffffffff;
122 
123 	if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
124 	    tuple->encap[0].proto == htons(ETH_P_8021Q)) {
125 		NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
126 		nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
127 					tuple->encap[0].id,
128 					tuple->encap[0].proto);
129 		vlan_encap = true;
130 	}
131 
132 	if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
133 	    tuple->encap[1].proto == htons(ETH_P_8021Q)) {
134 		if (vlan_encap) {
135 			NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
136 					  cvlan);
137 			nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
138 						tuple->encap[1].id,
139 						tuple->encap[1].proto);
140 		} else {
141 			NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
142 					  vlan);
143 			nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
144 						tuple->encap[1].id,
145 						tuple->encap[1].proto);
146 		}
147 	}
148 
149 	switch (tuple->l3proto) {
150 	case AF_INET:
151 		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
152 		key->basic.n_proto = htons(ETH_P_IP);
153 		key->ipv4.src = tuple->src_v4.s_addr;
154 		mask->ipv4.src = 0xffffffff;
155 		key->ipv4.dst = tuple->dst_v4.s_addr;
156 		mask->ipv4.dst = 0xffffffff;
157 		break;
158        case AF_INET6:
159 		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
160 		key->basic.n_proto = htons(ETH_P_IPV6);
161 		key->ipv6.src = tuple->src_v6;
162 		memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
163 		key->ipv6.dst = tuple->dst_v6;
164 		memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
165 		break;
166 	default:
167 		return -EOPNOTSUPP;
168 	}
169 	mask->control.addr_type = 0xffff;
170 	match->dissector.used_keys |= BIT_ULL(key->control.addr_type);
171 	mask->basic.n_proto = 0xffff;
172 
173 	switch (tuple->l4proto) {
174 	case IPPROTO_TCP:
175 		key->tcp.flags = 0;
176 		mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
177 		match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_TCP);
178 		break;
179 	case IPPROTO_UDP:
180 	case IPPROTO_GRE:
181 		break;
182 	default:
183 		return -EOPNOTSUPP;
184 	}
185 
186 	key->basic.ip_proto = tuple->l4proto;
187 	mask->basic.ip_proto = 0xff;
188 
189 	match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_META) |
190 				      BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
191 				      BIT_ULL(FLOW_DISSECTOR_KEY_BASIC);
192 
193 	switch (tuple->l4proto) {
194 	case IPPROTO_TCP:
195 	case IPPROTO_UDP:
196 		key->tp.src = tuple->src_port;
197 		mask->tp.src = 0xffff;
198 		key->tp.dst = tuple->dst_port;
199 		mask->tp.dst = 0xffff;
200 
201 		match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_PORTS);
202 		break;
203 	}
204 
205 	return 0;
206 }
207 
flow_offload_mangle(struct flow_action_entry * entry,enum flow_action_mangle_base htype,u32 offset,const __be32 * value,const __be32 * mask)208 static void flow_offload_mangle(struct flow_action_entry *entry,
209 				enum flow_action_mangle_base htype, u32 offset,
210 				const __be32 *value, const __be32 *mask)
211 {
212 	entry->id = FLOW_ACTION_MANGLE;
213 	entry->mangle.htype = htype;
214 	entry->mangle.offset = offset;
215 	memcpy(&entry->mangle.mask, mask, sizeof(u32));
216 	memcpy(&entry->mangle.val, value, sizeof(u32));
217 }
218 
219 static inline struct flow_action_entry *
flow_action_entry_next(struct nf_flow_rule * flow_rule)220 flow_action_entry_next(struct nf_flow_rule *flow_rule)
221 {
222 	int i;
223 
224 	if (unlikely(flow_rule->rule->action.num_entries >= NF_FLOW_RULE_ACTION_MAX))
225 		return NULL;
226 
227 	i = flow_rule->rule->action.num_entries++;
228 
229 	return &flow_rule->rule->action.entries[i];
230 }
231 
flow_offload_eth_src(struct net * net,const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)232 static int flow_offload_eth_src(struct net *net,
233 				const struct flow_offload *flow,
234 				enum flow_offload_tuple_dir dir,
235 				struct nf_flow_rule *flow_rule)
236 {
237 	struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
238 	struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
239 	const struct flow_offload_tuple *other_tuple, *this_tuple;
240 	struct net_device *dev = NULL;
241 	const unsigned char *addr;
242 	u32 mask, val;
243 	u16 val16;
244 
245 	if (!entry0 || !entry1)
246 		return -E2BIG;
247 
248 	this_tuple = &flow->tuplehash[dir].tuple;
249 
250 	switch (this_tuple->xmit_type) {
251 	case FLOW_OFFLOAD_XMIT_DIRECT:
252 		addr = this_tuple->out.h_source;
253 		break;
254 	case FLOW_OFFLOAD_XMIT_NEIGH:
255 		other_tuple = &flow->tuplehash[!dir].tuple;
256 		dev = dev_get_by_index(net, other_tuple->iifidx);
257 		if (!dev)
258 			return -ENOENT;
259 
260 		addr = dev->dev_addr;
261 		break;
262 	default:
263 		return -EOPNOTSUPP;
264 	}
265 
266 	mask = ~0xffff0000;
267 	memcpy(&val16, addr, 2);
268 	val = val16 << 16;
269 	flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
270 			    &val, &mask);
271 
272 	mask = ~0xffffffff;
273 	memcpy(&val, addr + 2, 4);
274 	flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
275 			    &val, &mask);
276 
277 	dev_put(dev);
278 
279 	return 0;
280 }
281 
flow_offload_eth_dst(struct net * net,const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)282 static int flow_offload_eth_dst(struct net *net,
283 				const struct flow_offload *flow,
284 				enum flow_offload_tuple_dir dir,
285 				struct nf_flow_rule *flow_rule)
286 {
287 	struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
288 	struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
289 	const struct flow_offload_tuple *other_tuple, *this_tuple;
290 	const struct dst_entry *dst_cache;
291 	unsigned char ha[ETH_ALEN];
292 	struct neighbour *n;
293 	const void *daddr;
294 	u32 mask, val;
295 	u8 nud_state;
296 	u16 val16;
297 
298 	if (!entry0 || !entry1)
299 		return -E2BIG;
300 
301 	this_tuple = &flow->tuplehash[dir].tuple;
302 
303 	switch (this_tuple->xmit_type) {
304 	case FLOW_OFFLOAD_XMIT_DIRECT:
305 		ether_addr_copy(ha, this_tuple->out.h_dest);
306 		break;
307 	case FLOW_OFFLOAD_XMIT_NEIGH:
308 		other_tuple = &flow->tuplehash[!dir].tuple;
309 		daddr = &other_tuple->src_v4;
310 		dst_cache = this_tuple->dst_cache;
311 		n = dst_neigh_lookup(dst_cache, daddr);
312 		if (!n)
313 			return -ENOENT;
314 
315 		read_lock_bh(&n->lock);
316 		nud_state = n->nud_state;
317 		ether_addr_copy(ha, n->ha);
318 		read_unlock_bh(&n->lock);
319 		neigh_release(n);
320 
321 		if (!(nud_state & NUD_VALID))
322 			return -ENOENT;
323 		break;
324 	default:
325 		return -EOPNOTSUPP;
326 	}
327 
328 	mask = ~0xffffffff;
329 	memcpy(&val, ha, 4);
330 	flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
331 			    &val, &mask);
332 
333 	mask = ~0x0000ffff;
334 	memcpy(&val16, ha + 4, 2);
335 	val = val16;
336 	flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
337 			    &val, &mask);
338 
339 	return 0;
340 }
341 
flow_offload_ipv4_snat(struct net * net,const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)342 static int flow_offload_ipv4_snat(struct net *net,
343 				  const struct flow_offload *flow,
344 				  enum flow_offload_tuple_dir dir,
345 				  struct nf_flow_rule *flow_rule)
346 {
347 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
348 	u32 mask = ~htonl(0xffffffff);
349 	__be32 addr;
350 	u32 offset;
351 
352 	if (!entry)
353 		return -E2BIG;
354 
355 	switch (dir) {
356 	case FLOW_OFFLOAD_DIR_ORIGINAL:
357 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
358 		offset = offsetof(struct iphdr, saddr);
359 		break;
360 	case FLOW_OFFLOAD_DIR_REPLY:
361 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
362 		offset = offsetof(struct iphdr, daddr);
363 		break;
364 	default:
365 		return -EOPNOTSUPP;
366 	}
367 
368 	flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
369 			    &addr, &mask);
370 	return 0;
371 }
372 
flow_offload_ipv4_dnat(struct net * net,const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)373 static int flow_offload_ipv4_dnat(struct net *net,
374 				  const struct flow_offload *flow,
375 				  enum flow_offload_tuple_dir dir,
376 				  struct nf_flow_rule *flow_rule)
377 {
378 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
379 	u32 mask = ~htonl(0xffffffff);
380 	__be32 addr;
381 	u32 offset;
382 
383 	if (!entry)
384 		return -E2BIG;
385 
386 	switch (dir) {
387 	case FLOW_OFFLOAD_DIR_ORIGINAL:
388 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
389 		offset = offsetof(struct iphdr, daddr);
390 		break;
391 	case FLOW_OFFLOAD_DIR_REPLY:
392 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
393 		offset = offsetof(struct iphdr, saddr);
394 		break;
395 	default:
396 		return -EOPNOTSUPP;
397 	}
398 
399 	flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
400 			    &addr, &mask);
401 	return 0;
402 }
403 
flow_offload_ipv6_mangle(struct nf_flow_rule * flow_rule,unsigned int offset,const __be32 * addr,const __be32 * mask)404 static int flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
405 				     unsigned int offset,
406 				     const __be32 *addr, const __be32 *mask)
407 {
408 	struct flow_action_entry *entry;
409 	int i;
410 
411 	for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i++) {
412 		entry = flow_action_entry_next(flow_rule);
413 		if (!entry)
414 			return -E2BIG;
415 
416 		flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
417 				    offset + i * sizeof(u32), &addr[i], mask);
418 	}
419 
420 	return 0;
421 }
422 
flow_offload_ipv6_snat(struct net * net,const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)423 static int flow_offload_ipv6_snat(struct net *net,
424 				  const struct flow_offload *flow,
425 				  enum flow_offload_tuple_dir dir,
426 				  struct nf_flow_rule *flow_rule)
427 {
428 	u32 mask = ~htonl(0xffffffff);
429 	const __be32 *addr;
430 	u32 offset;
431 
432 	switch (dir) {
433 	case FLOW_OFFLOAD_DIR_ORIGINAL:
434 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
435 		offset = offsetof(struct ipv6hdr, saddr);
436 		break;
437 	case FLOW_OFFLOAD_DIR_REPLY:
438 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
439 		offset = offsetof(struct ipv6hdr, daddr);
440 		break;
441 	default:
442 		return -EOPNOTSUPP;
443 	}
444 
445 	return flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
446 }
447 
flow_offload_ipv6_dnat(struct net * net,const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)448 static int flow_offload_ipv6_dnat(struct net *net,
449 				  const struct flow_offload *flow,
450 				  enum flow_offload_tuple_dir dir,
451 				  struct nf_flow_rule *flow_rule)
452 {
453 	u32 mask = ~htonl(0xffffffff);
454 	const __be32 *addr;
455 	u32 offset;
456 
457 	switch (dir) {
458 	case FLOW_OFFLOAD_DIR_ORIGINAL:
459 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
460 		offset = offsetof(struct ipv6hdr, daddr);
461 		break;
462 	case FLOW_OFFLOAD_DIR_REPLY:
463 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
464 		offset = offsetof(struct ipv6hdr, saddr);
465 		break;
466 	default:
467 		return -EOPNOTSUPP;
468 	}
469 
470 	return flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
471 }
472 
flow_offload_l4proto(const struct flow_offload * flow)473 static int flow_offload_l4proto(const struct flow_offload *flow)
474 {
475 	u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
476 	u8 type = 0;
477 
478 	switch (protonum) {
479 	case IPPROTO_TCP:
480 		type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
481 		break;
482 	case IPPROTO_UDP:
483 		type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
484 		break;
485 	default:
486 		break;
487 	}
488 
489 	return type;
490 }
491 
flow_offload_port_snat(struct net * net,const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)492 static int flow_offload_port_snat(struct net *net,
493 				  const struct flow_offload *flow,
494 				  enum flow_offload_tuple_dir dir,
495 				  struct nf_flow_rule *flow_rule)
496 {
497 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
498 	u32 mask, port;
499 	u32 offset;
500 
501 	if (!entry)
502 		return -E2BIG;
503 
504 	switch (dir) {
505 	case FLOW_OFFLOAD_DIR_ORIGINAL:
506 		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
507 		offset = 0; /* offsetof(struct tcphdr, source); */
508 		port = htonl(port << 16);
509 		mask = ~htonl(0xffff0000);
510 		break;
511 	case FLOW_OFFLOAD_DIR_REPLY:
512 		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
513 		offset = 0; /* offsetof(struct tcphdr, dest); */
514 		port = htonl(port);
515 		mask = ~htonl(0xffff);
516 		break;
517 	default:
518 		return -EOPNOTSUPP;
519 	}
520 
521 	flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
522 			    &port, &mask);
523 	return 0;
524 }
525 
flow_offload_port_dnat(struct net * net,const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)526 static int flow_offload_port_dnat(struct net *net,
527 				  const struct flow_offload *flow,
528 				  enum flow_offload_tuple_dir dir,
529 				  struct nf_flow_rule *flow_rule)
530 {
531 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
532 	u32 mask, port;
533 	u32 offset;
534 
535 	if (!entry)
536 		return -E2BIG;
537 
538 	switch (dir) {
539 	case FLOW_OFFLOAD_DIR_ORIGINAL:
540 		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
541 		offset = 0; /* offsetof(struct tcphdr, dest); */
542 		port = htonl(port);
543 		mask = ~htonl(0xffff);
544 		break;
545 	case FLOW_OFFLOAD_DIR_REPLY:
546 		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
547 		offset = 0; /* offsetof(struct tcphdr, source); */
548 		port = htonl(port << 16);
549 		mask = ~htonl(0xffff0000);
550 		break;
551 	default:
552 		return -EOPNOTSUPP;
553 	}
554 
555 	flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
556 			    &port, &mask);
557 	return 0;
558 }
559 
flow_offload_ipv4_checksum(struct net * net,const struct flow_offload * flow,struct nf_flow_rule * flow_rule)560 static int flow_offload_ipv4_checksum(struct net *net,
561 				      const struct flow_offload *flow,
562 				      struct nf_flow_rule *flow_rule)
563 {
564 	u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
565 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
566 
567 	if (!entry)
568 		return -E2BIG;
569 
570 	entry->id = FLOW_ACTION_CSUM;
571 	entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
572 
573 	switch (protonum) {
574 	case IPPROTO_TCP:
575 		entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
576 		break;
577 	case IPPROTO_UDP:
578 		entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
579 		break;
580 	}
581 
582 	return 0;
583 }
584 
flow_offload_redirect(struct net * net,const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)585 static int flow_offload_redirect(struct net *net,
586 				 const struct flow_offload *flow,
587 				 enum flow_offload_tuple_dir dir,
588 				 struct nf_flow_rule *flow_rule)
589 {
590 	const struct flow_offload_tuple *this_tuple, *other_tuple;
591 	struct flow_action_entry *entry;
592 	struct net_device *dev;
593 	int ifindex;
594 
595 	this_tuple = &flow->tuplehash[dir].tuple;
596 	switch (this_tuple->xmit_type) {
597 	case FLOW_OFFLOAD_XMIT_DIRECT:
598 		this_tuple = &flow->tuplehash[dir].tuple;
599 		ifindex = this_tuple->out.ifidx;
600 		break;
601 	case FLOW_OFFLOAD_XMIT_NEIGH:
602 		other_tuple = &flow->tuplehash[!dir].tuple;
603 		ifindex = other_tuple->iifidx;
604 		break;
605 	default:
606 		return -EOPNOTSUPP;
607 	}
608 
609 	dev = dev_get_by_index(net, ifindex);
610 	if (!dev)
611 		return -ENODEV;
612 
613 	entry = flow_action_entry_next(flow_rule);
614 	if (!entry) {
615 		dev_put(dev);
616 		return -E2BIG;
617 	}
618 
619 	entry->id = FLOW_ACTION_REDIRECT;
620 	entry->dev = dev;
621 
622 	return 0;
623 }
624 
flow_offload_encap_tunnel(const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)625 static int flow_offload_encap_tunnel(const struct flow_offload *flow,
626 				     enum flow_offload_tuple_dir dir,
627 				     struct nf_flow_rule *flow_rule)
628 {
629 	const struct flow_offload_tuple *this_tuple;
630 	struct flow_action_entry *entry;
631 	struct dst_entry *dst;
632 
633 	this_tuple = &flow->tuplehash[dir].tuple;
634 	if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
635 		return 0;
636 
637 	dst = this_tuple->dst_cache;
638 	if (dst && dst->lwtstate) {
639 		struct ip_tunnel_info *tun_info;
640 
641 		tun_info = lwt_tun_info(dst->lwtstate);
642 		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
643 			entry = flow_action_entry_next(flow_rule);
644 			if (!entry)
645 				return -E2BIG;
646 			entry->id = FLOW_ACTION_TUNNEL_ENCAP;
647 			entry->tunnel = tun_info;
648 		}
649 	}
650 
651 	return 0;
652 }
653 
flow_offload_decap_tunnel(const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)654 static int flow_offload_decap_tunnel(const struct flow_offload *flow,
655 				     enum flow_offload_tuple_dir dir,
656 				     struct nf_flow_rule *flow_rule)
657 {
658 	const struct flow_offload_tuple *other_tuple;
659 	struct flow_action_entry *entry;
660 	struct dst_entry *dst;
661 
662 	other_tuple = &flow->tuplehash[!dir].tuple;
663 	if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
664 		return 0;
665 
666 	dst = other_tuple->dst_cache;
667 	if (dst && dst->lwtstate) {
668 		struct ip_tunnel_info *tun_info;
669 
670 		tun_info = lwt_tun_info(dst->lwtstate);
671 		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
672 			entry = flow_action_entry_next(flow_rule);
673 			if (!entry)
674 				return -E2BIG;
675 			entry->id = FLOW_ACTION_TUNNEL_DECAP;
676 		}
677 	}
678 
679 	return 0;
680 }
681 
682 static int
nf_flow_rule_route_common(struct net * net,const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)683 nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
684 			  enum flow_offload_tuple_dir dir,
685 			  struct nf_flow_rule *flow_rule)
686 {
687 	const struct flow_offload_tuple *other_tuple;
688 	const struct flow_offload_tuple *tuple;
689 	int i;
690 
691 	if (flow_offload_decap_tunnel(flow, dir, flow_rule) < 0 ||
692 	    flow_offload_encap_tunnel(flow, dir, flow_rule) < 0)
693 		return -1;
694 
695 	if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
696 	    flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
697 		return -1;
698 
699 	tuple = &flow->tuplehash[dir].tuple;
700 
701 	for (i = 0; i < tuple->encap_num; i++) {
702 		struct flow_action_entry *entry;
703 
704 		if (tuple->in_vlan_ingress & BIT(i))
705 			continue;
706 
707 		if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
708 			entry = flow_action_entry_next(flow_rule);
709 			if (!entry)
710 				return -1;
711 			entry->id = FLOW_ACTION_VLAN_POP;
712 		}
713 	}
714 
715 	other_tuple = &flow->tuplehash[!dir].tuple;
716 
717 	for (i = 0; i < other_tuple->encap_num; i++) {
718 		struct flow_action_entry *entry;
719 
720 		if (other_tuple->in_vlan_ingress & BIT(i))
721 			continue;
722 
723 		entry = flow_action_entry_next(flow_rule);
724 		if (!entry)
725 			return -1;
726 
727 		switch (other_tuple->encap[i].proto) {
728 		case htons(ETH_P_PPP_SES):
729 			entry->id = FLOW_ACTION_PPPOE_PUSH;
730 			entry->pppoe.sid = other_tuple->encap[i].id;
731 			break;
732 		case htons(ETH_P_8021Q):
733 			entry->id = FLOW_ACTION_VLAN_PUSH;
734 			entry->vlan.vid = other_tuple->encap[i].id;
735 			entry->vlan.proto = other_tuple->encap[i].proto;
736 			break;
737 		}
738 	}
739 
740 	return 0;
741 }
742 
nf_flow_rule_route_ipv4(struct net * net,struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)743 int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow,
744 			    enum flow_offload_tuple_dir dir,
745 			    struct nf_flow_rule *flow_rule)
746 {
747 	if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
748 		return -1;
749 
750 	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
751 		if (flow_offload_ipv4_snat(net, flow, dir, flow_rule) < 0 ||
752 		    flow_offload_port_snat(net, flow, dir, flow_rule) < 0)
753 			return -1;
754 	}
755 	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
756 		if (flow_offload_ipv4_dnat(net, flow, dir, flow_rule) < 0 ||
757 		    flow_offload_port_dnat(net, flow, dir, flow_rule) < 0)
758 			return -1;
759 	}
760 	if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
761 	    test_bit(NF_FLOW_DNAT, &flow->flags))
762 		if (flow_offload_ipv4_checksum(net, flow, flow_rule) < 0)
763 			return -1;
764 
765 	if (flow_offload_redirect(net, flow, dir, flow_rule) < 0)
766 		return -1;
767 
768 	return 0;
769 }
770 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
771 
nf_flow_rule_route_ipv6(struct net * net,struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)772 int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
773 			    enum flow_offload_tuple_dir dir,
774 			    struct nf_flow_rule *flow_rule)
775 {
776 	if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
777 		return -1;
778 
779 	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
780 		if (flow_offload_ipv6_snat(net, flow, dir, flow_rule) < 0 ||
781 		    flow_offload_port_snat(net, flow, dir, flow_rule) < 0)
782 			return -1;
783 	}
784 	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
785 		if (flow_offload_ipv6_dnat(net, flow, dir, flow_rule) < 0 ||
786 		    flow_offload_port_dnat(net, flow, dir, flow_rule) < 0)
787 			return -1;
788 	}
789 
790 	if (flow_offload_redirect(net, flow, dir, flow_rule) < 0)
791 		return -1;
792 
793 	return 0;
794 }
795 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
796 
797 static struct nf_flow_rule *
nf_flow_offload_rule_alloc(struct net * net,const struct flow_offload_work * offload,enum flow_offload_tuple_dir dir)798 nf_flow_offload_rule_alloc(struct net *net,
799 			   const struct flow_offload_work *offload,
800 			   enum flow_offload_tuple_dir dir)
801 {
802 	const struct nf_flowtable *flowtable = offload->flowtable;
803 	const struct flow_offload_tuple *tuple, *other_tuple;
804 	struct flow_offload *flow = offload->flow;
805 	struct dst_entry *other_dst = NULL;
806 	struct nf_flow_rule *flow_rule;
807 	int err = -ENOMEM;
808 
809 	flow_rule = kzalloc_obj(*flow_rule);
810 	if (!flow_rule)
811 		goto err_flow;
812 
813 	flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
814 	if (!flow_rule->rule)
815 		goto err_flow_rule;
816 
817 	flow_rule->rule->match.dissector = &flow_rule->match.dissector;
818 	flow_rule->rule->match.mask = &flow_rule->match.mask;
819 	flow_rule->rule->match.key = &flow_rule->match.key;
820 
821 	tuple = &flow->tuplehash[dir].tuple;
822 	other_tuple = &flow->tuplehash[!dir].tuple;
823 	if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
824 		other_dst = other_tuple->dst_cache;
825 
826 	err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
827 	if (err < 0)
828 		goto err_flow_match;
829 
830 	flow_rule->rule->action.num_entries = 0;
831 	if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
832 		goto err_flow_match;
833 
834 	return flow_rule;
835 
836 err_flow_match:
837 	kfree(flow_rule->rule);
838 err_flow_rule:
839 	kfree(flow_rule);
840 err_flow:
841 	return NULL;
842 }
843 
__nf_flow_offload_destroy(struct nf_flow_rule * flow_rule)844 static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
845 {
846 	struct flow_action_entry *entry;
847 	int i;
848 
849 	for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
850 		entry = &flow_rule->rule->action.entries[i];
851 		if (entry->id != FLOW_ACTION_REDIRECT)
852 			continue;
853 
854 		dev_put(entry->dev);
855 	}
856 	kfree(flow_rule->rule);
857 	kfree(flow_rule);
858 }
859 
nf_flow_offload_destroy(struct nf_flow_rule * flow_rule[])860 static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
861 {
862 	int i;
863 
864 	for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
865 		__nf_flow_offload_destroy(flow_rule[i]);
866 }
867 
nf_flow_offload_alloc(const struct flow_offload_work * offload,struct nf_flow_rule * flow_rule[])868 static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
869 				 struct nf_flow_rule *flow_rule[])
870 {
871 	struct net *net = read_pnet(&offload->flowtable->net);
872 
873 	flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
874 						  FLOW_OFFLOAD_DIR_ORIGINAL);
875 	if (!flow_rule[0])
876 		return -ENOMEM;
877 
878 	flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
879 						  FLOW_OFFLOAD_DIR_REPLY);
880 	if (!flow_rule[1]) {
881 		__nf_flow_offload_destroy(flow_rule[0]);
882 		return -ENOMEM;
883 	}
884 
885 	return 0;
886 }
887 
nf_flow_offload_init(struct flow_cls_offload * cls_flow,__be16 proto,int priority,enum flow_cls_command cmd,const struct flow_offload_tuple * tuple,struct netlink_ext_ack * extack)888 static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
889 				 __be16 proto, int priority,
890 				 enum flow_cls_command cmd,
891 				 const struct flow_offload_tuple *tuple,
892 				 struct netlink_ext_ack *extack)
893 {
894 	cls_flow->common.protocol = proto;
895 	cls_flow->common.prio = priority;
896 	cls_flow->common.extack = extack;
897 	cls_flow->command = cmd;
898 	cls_flow->cookie = (unsigned long)tuple;
899 }
900 
nf_flow_offload_tuple(struct nf_flowtable * flowtable,struct flow_offload * flow,struct nf_flow_rule * flow_rule,enum flow_offload_tuple_dir dir,int priority,int cmd,struct flow_stats * stats,struct list_head * block_cb_list)901 static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
902 				 struct flow_offload *flow,
903 				 struct nf_flow_rule *flow_rule,
904 				 enum flow_offload_tuple_dir dir,
905 				 int priority, int cmd,
906 				 struct flow_stats *stats,
907 				 struct list_head *block_cb_list)
908 {
909 	struct flow_cls_offload cls_flow = {};
910 	struct netlink_ext_ack extack = {};
911 	struct flow_block_cb *block_cb;
912 	__be16 proto = ETH_P_ALL;
913 	int err, i = 0;
914 
915 	nf_flow_offload_init(&cls_flow, proto, priority, cmd,
916 			     &flow->tuplehash[dir].tuple, &extack);
917 	if (cmd == FLOW_CLS_REPLACE)
918 		cls_flow.rule = flow_rule->rule;
919 
920 	down_read(&flowtable->flow_block_lock);
921 	list_for_each_entry(block_cb, block_cb_list, list) {
922 		err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
923 				   block_cb->cb_priv);
924 		if (err < 0)
925 			continue;
926 
927 		i++;
928 	}
929 	up_read(&flowtable->flow_block_lock);
930 
931 	if (cmd == FLOW_CLS_STATS)
932 		memcpy(stats, &cls_flow.stats, sizeof(*stats));
933 
934 	return i;
935 }
936 
flow_offload_tuple_add(struct flow_offload_work * offload,struct nf_flow_rule * flow_rule,enum flow_offload_tuple_dir dir)937 static int flow_offload_tuple_add(struct flow_offload_work *offload,
938 				  struct nf_flow_rule *flow_rule,
939 				  enum flow_offload_tuple_dir dir)
940 {
941 	return nf_flow_offload_tuple(offload->flowtable, offload->flow,
942 				     flow_rule, dir,
943 				     offload->flowtable->priority,
944 				     FLOW_CLS_REPLACE, NULL,
945 				     &offload->flowtable->flow_block.cb_list);
946 }
947 
flow_offload_tuple_del(struct flow_offload_work * offload,enum flow_offload_tuple_dir dir)948 static void flow_offload_tuple_del(struct flow_offload_work *offload,
949 				   enum flow_offload_tuple_dir dir)
950 {
951 	nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
952 			      offload->flowtable->priority,
953 			      FLOW_CLS_DESTROY, NULL,
954 			      &offload->flowtable->flow_block.cb_list);
955 }
956 
flow_offload_rule_add(struct flow_offload_work * offload,struct nf_flow_rule * flow_rule[])957 static int flow_offload_rule_add(struct flow_offload_work *offload,
958 				 struct nf_flow_rule *flow_rule[])
959 {
960 	int ok_count = 0;
961 
962 	ok_count += flow_offload_tuple_add(offload, flow_rule[0],
963 					   FLOW_OFFLOAD_DIR_ORIGINAL);
964 	if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
965 		ok_count += flow_offload_tuple_add(offload, flow_rule[1],
966 						   FLOW_OFFLOAD_DIR_REPLY);
967 	if (ok_count == 0)
968 		return -ENOENT;
969 
970 	return 0;
971 }
972 
flow_offload_work_add(struct flow_offload_work * offload)973 static void flow_offload_work_add(struct flow_offload_work *offload)
974 {
975 	struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
976 	int err;
977 
978 	err = nf_flow_offload_alloc(offload, flow_rule);
979 	if (err < 0)
980 		return;
981 
982 	err = flow_offload_rule_add(offload, flow_rule);
983 	if (err < 0)
984 		goto out;
985 
986 	set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
987 
988 out:
989 	nf_flow_offload_destroy(flow_rule);
990 }
991 
flow_offload_work_del(struct flow_offload_work * offload)992 static void flow_offload_work_del(struct flow_offload_work *offload)
993 {
994 	clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
995 	flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
996 	if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
997 		flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
998 	set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
999 }
1000 
flow_offload_tuple_stats(struct flow_offload_work * offload,enum flow_offload_tuple_dir dir,struct flow_stats * stats)1001 static void flow_offload_tuple_stats(struct flow_offload_work *offload,
1002 				     enum flow_offload_tuple_dir dir,
1003 				     struct flow_stats *stats)
1004 {
1005 	nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
1006 			      offload->flowtable->priority,
1007 			      FLOW_CLS_STATS, stats,
1008 			      &offload->flowtable->flow_block.cb_list);
1009 }
1010 
flow_offload_work_stats(struct flow_offload_work * offload)1011 static void flow_offload_work_stats(struct flow_offload_work *offload)
1012 {
1013 	struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
1014 	u64 lastused;
1015 
1016 	flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
1017 	if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
1018 		flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY,
1019 					 &stats[1]);
1020 
1021 	lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
1022 	offload->flow->timeout = max_t(u64, offload->flow->timeout,
1023 				       lastused + flow_offload_get_timeout(offload->flow));
1024 
1025 	if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
1026 		if (stats[0].pkts)
1027 			nf_ct_acct_add(offload->flow->ct,
1028 				       FLOW_OFFLOAD_DIR_ORIGINAL,
1029 				       stats[0].pkts, stats[0].bytes);
1030 		if (stats[1].pkts)
1031 			nf_ct_acct_add(offload->flow->ct,
1032 				       FLOW_OFFLOAD_DIR_REPLY,
1033 				       stats[1].pkts, stats[1].bytes);
1034 	}
1035 }
1036 
flow_offload_work_handler(struct work_struct * work)1037 static void flow_offload_work_handler(struct work_struct *work)
1038 {
1039 	struct flow_offload_work *offload;
1040 	struct net *net;
1041 
1042 	offload = container_of(work, struct flow_offload_work, work);
1043 	net = read_pnet(&offload->flowtable->net);
1044 	switch (offload->cmd) {
1045 		case FLOW_CLS_REPLACE:
1046 			flow_offload_work_add(offload);
1047 			NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_add);
1048 			break;
1049 		case FLOW_CLS_DESTROY:
1050 			flow_offload_work_del(offload);
1051 			NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_del);
1052 			break;
1053 		case FLOW_CLS_STATS:
1054 			flow_offload_work_stats(offload);
1055 			NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_stats);
1056 			break;
1057 		default:
1058 			WARN_ON_ONCE(1);
1059 	}
1060 
1061 	clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
1062 	kfree(offload);
1063 }
1064 
flow_offload_queue_work(struct flow_offload_work * offload)1065 static void flow_offload_queue_work(struct flow_offload_work *offload)
1066 {
1067 	struct net *net = read_pnet(&offload->flowtable->net);
1068 
1069 	if (offload->cmd == FLOW_CLS_REPLACE) {
1070 		NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_add);
1071 		queue_work(nf_flow_offload_add_wq, &offload->work);
1072 	} else if (offload->cmd == FLOW_CLS_DESTROY) {
1073 		NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_del);
1074 		queue_work(nf_flow_offload_del_wq, &offload->work);
1075 	} else {
1076 		NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_stats);
1077 		queue_work(nf_flow_offload_stats_wq, &offload->work);
1078 	}
1079 }
1080 
1081 static struct flow_offload_work *
nf_flow_offload_work_alloc(struct nf_flowtable * flowtable,struct flow_offload * flow,unsigned int cmd)1082 nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
1083 			   struct flow_offload *flow, unsigned int cmd)
1084 {
1085 	struct flow_offload_work *offload;
1086 
1087 	if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
1088 		return NULL;
1089 
1090 	offload = kmalloc_obj(struct flow_offload_work, GFP_ATOMIC);
1091 	if (!offload) {
1092 		clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
1093 		return NULL;
1094 	}
1095 
1096 	offload->cmd = cmd;
1097 	offload->flow = flow;
1098 	offload->flowtable = flowtable;
1099 	INIT_WORK(&offload->work, flow_offload_work_handler);
1100 
1101 	return offload;
1102 }
1103 
1104 
nf_flow_offload_add(struct nf_flowtable * flowtable,struct flow_offload * flow)1105 void nf_flow_offload_add(struct nf_flowtable *flowtable,
1106 			 struct flow_offload *flow)
1107 {
1108 	struct flow_offload_work *offload;
1109 
1110 	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
1111 	if (!offload)
1112 		return;
1113 
1114 	flow_offload_queue_work(offload);
1115 }
1116 
nf_flow_offload_del(struct nf_flowtable * flowtable,struct flow_offload * flow)1117 void nf_flow_offload_del(struct nf_flowtable *flowtable,
1118 			 struct flow_offload *flow)
1119 {
1120 	struct flow_offload_work *offload;
1121 
1122 	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
1123 	if (!offload)
1124 		return;
1125 
1126 	set_bit(NF_FLOW_HW_DYING, &flow->flags);
1127 	flow_offload_queue_work(offload);
1128 }
1129 
nf_flow_offload_stats(struct nf_flowtable * flowtable,struct flow_offload * flow)1130 void nf_flow_offload_stats(struct nf_flowtable *flowtable,
1131 			   struct flow_offload *flow)
1132 {
1133 	struct flow_offload_work *offload;
1134 	__s32 delta;
1135 
1136 	delta = nf_flow_timeout_delta(flow->timeout);
1137 	if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
1138 		return;
1139 
1140 	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
1141 	if (!offload)
1142 		return;
1143 
1144 	flow_offload_queue_work(offload);
1145 }
1146 
nf_flow_table_offload_flush_cleanup(struct nf_flowtable * flowtable)1147 void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable)
1148 {
1149 	if (nf_flowtable_hw_offload(flowtable)) {
1150 		flush_workqueue(nf_flow_offload_del_wq);
1151 		nf_flow_table_gc_run(flowtable);
1152 	}
1153 }
1154 
nf_flow_table_offload_flush(struct nf_flowtable * flowtable)1155 void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
1156 {
1157 	if (nf_flowtable_hw_offload(flowtable)) {
1158 		flush_workqueue(nf_flow_offload_add_wq);
1159 		flush_workqueue(nf_flow_offload_del_wq);
1160 		flush_workqueue(nf_flow_offload_stats_wq);
1161 	}
1162 }
1163 
nf_flow_table_block_setup(struct nf_flowtable * flowtable,struct flow_block_offload * bo,enum flow_block_command cmd)1164 static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
1165 				     struct flow_block_offload *bo,
1166 				     enum flow_block_command cmd)
1167 {
1168 	struct flow_block_cb *block_cb, *next;
1169 	int err = 0;
1170 
1171 	down_write(&flowtable->flow_block_lock);
1172 	switch (cmd) {
1173 	case FLOW_BLOCK_BIND:
1174 		list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
1175 		break;
1176 	case FLOW_BLOCK_UNBIND:
1177 		list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
1178 			list_del(&block_cb->list);
1179 			flow_block_cb_free(block_cb);
1180 		}
1181 		break;
1182 	default:
1183 		WARN_ON_ONCE(1);
1184 		err = -EOPNOTSUPP;
1185 	}
1186 	up_write(&flowtable->flow_block_lock);
1187 
1188 	return err;
1189 }
1190 
nf_flow_table_block_offload_init(struct flow_block_offload * bo,struct net * net,enum flow_block_command cmd,struct nf_flowtable * flowtable,struct netlink_ext_ack * extack)1191 static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
1192 					     struct net *net,
1193 					     enum flow_block_command cmd,
1194 					     struct nf_flowtable *flowtable,
1195 					     struct netlink_ext_ack *extack)
1196 {
1197 	memset(bo, 0, sizeof(*bo));
1198 	bo->net		= net;
1199 	bo->block	= &flowtable->flow_block;
1200 	bo->command	= cmd;
1201 	bo->binder_type	= FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
1202 	bo->extack	= extack;
1203 	bo->cb_list_head = &flowtable->flow_block.cb_list;
1204 	INIT_LIST_HEAD(&bo->cb_list);
1205 }
1206 
nf_flow_table_indr_cleanup(struct flow_block_cb * block_cb)1207 static void nf_flow_table_indr_cleanup(struct flow_block_cb *block_cb)
1208 {
1209 	struct nf_flowtable *flowtable = block_cb->indr.data;
1210 	struct net_device *dev = block_cb->indr.dev;
1211 
1212 	nf_flow_table_gc_cleanup(flowtable, dev);
1213 	down_write(&flowtable->flow_block_lock);
1214 	list_del(&block_cb->list);
1215 	list_del(&block_cb->driver_list);
1216 	flow_block_cb_free(block_cb);
1217 	up_write(&flowtable->flow_block_lock);
1218 }
1219 
nf_flow_table_indr_offload_cmd(struct flow_block_offload * bo,struct nf_flowtable * flowtable,struct net_device * dev,enum flow_block_command cmd,struct netlink_ext_ack * extack)1220 static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
1221 					  struct nf_flowtable *flowtable,
1222 					  struct net_device *dev,
1223 					  enum flow_block_command cmd,
1224 					  struct netlink_ext_ack *extack)
1225 {
1226 	nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
1227 					 extack);
1228 
1229 	return flow_indr_dev_setup_offload(dev, NULL, TC_SETUP_FT, flowtable, bo,
1230 					   nf_flow_table_indr_cleanup);
1231 }
1232 
nf_flow_table_offload_cmd(struct flow_block_offload * bo,struct nf_flowtable * flowtable,struct net_device * dev,enum flow_block_command cmd,struct netlink_ext_ack * extack)1233 static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
1234 				     struct nf_flowtable *flowtable,
1235 				     struct net_device *dev,
1236 				     enum flow_block_command cmd,
1237 				     struct netlink_ext_ack *extack)
1238 {
1239 	int err;
1240 
1241 	nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
1242 					 extack);
1243 	down_write(&flowtable->flow_block_lock);
1244 	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
1245 	up_write(&flowtable->flow_block_lock);
1246 	if (err < 0)
1247 		return err;
1248 
1249 	return 0;
1250 }
1251 
nf_flow_table_offload_setup(struct nf_flowtable * flowtable,struct net_device * dev,enum flow_block_command cmd)1252 int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
1253 				struct net_device *dev,
1254 				enum flow_block_command cmd)
1255 {
1256 	struct netlink_ext_ack extack = {};
1257 	struct flow_block_offload bo;
1258 	int err;
1259 
1260 	if (!nf_flowtable_hw_offload(flowtable))
1261 		return nf_flow_offload_xdp_setup(flowtable, dev, cmd);
1262 
1263 	if (dev->netdev_ops->ndo_setup_tc)
1264 		err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
1265 						&extack);
1266 	else
1267 		err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
1268 						     &extack);
1269 	if (err < 0)
1270 		return err;
1271 
1272 	return nf_flow_table_block_setup(flowtable, &bo, cmd);
1273 }
1274 EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
1275 
nf_flow_table_offload_init(void)1276 int nf_flow_table_offload_init(void)
1277 {
1278 	nf_flow_offload_add_wq  = alloc_workqueue("nf_ft_offload_add",
1279 						  WQ_UNBOUND | WQ_SYSFS, 0);
1280 	if (!nf_flow_offload_add_wq)
1281 		return -ENOMEM;
1282 
1283 	nf_flow_offload_del_wq  = alloc_workqueue("nf_ft_offload_del",
1284 						  WQ_UNBOUND | WQ_SYSFS, 0);
1285 	if (!nf_flow_offload_del_wq)
1286 		goto err_del_wq;
1287 
1288 	nf_flow_offload_stats_wq  = alloc_workqueue("nf_ft_offload_stats",
1289 						    WQ_UNBOUND | WQ_SYSFS, 0);
1290 	if (!nf_flow_offload_stats_wq)
1291 		goto err_stats_wq;
1292 
1293 	return 0;
1294 
1295 err_stats_wq:
1296 	destroy_workqueue(nf_flow_offload_del_wq);
1297 err_del_wq:
1298 	destroy_workqueue(nf_flow_offload_add_wq);
1299 	return -ENOMEM;
1300 }
1301 
nf_flow_table_offload_exit(void)1302 void nf_flow_table_offload_exit(void)
1303 {
1304 	destroy_workqueue(nf_flow_offload_add_wq);
1305 	destroy_workqueue(nf_flow_offload_del_wq);
1306 	destroy_workqueue(nf_flow_offload_stats_wq);
1307 }
1308