xref: /linux/net/netfilter/nf_flow_table_offload.c (revision 00afb1811fa638dacf125dd1c343b7a181624dfd)
1 #include <linux/kernel.h>
2 #include <linux/init.h>
3 #include <linux/module.h>
4 #include <linux/netfilter.h>
5 #include <linux/rhashtable.h>
6 #include <linux/netdevice.h>
7 #include <linux/tc_act/tc_csum.h>
8 #include <net/flow_offload.h>
9 #include <net/ip_tunnels.h>
10 #include <net/netfilter/nf_flow_table.h>
11 #include <net/netfilter/nf_tables.h>
12 #include <net/netfilter/nf_conntrack.h>
13 #include <net/netfilter/nf_conntrack_acct.h>
14 #include <net/netfilter/nf_conntrack_core.h>
15 #include <net/netfilter/nf_conntrack_tuple.h>
16 
17 #define NF_FLOW_RULE_ACTION_MAX	24
18 
19 static struct workqueue_struct *nf_flow_offload_add_wq;
20 static struct workqueue_struct *nf_flow_offload_del_wq;
21 static struct workqueue_struct *nf_flow_offload_stats_wq;
22 
23 struct flow_offload_work {
24 	struct list_head	list;
25 	enum flow_cls_command	cmd;
26 	struct nf_flowtable	*flowtable;
27 	struct flow_offload	*flow;
28 	struct work_struct	work;
29 };
30 
31 #define NF_FLOW_DISSECTOR(__match, __type, __field)	\
32 	(__match)->dissector.offset[__type] =		\
33 		offsetof(struct nf_flow_key, __field)
34 
nf_flow_rule_lwt_match(struct nf_flow_match * match,struct ip_tunnel_info * tun_info)35 static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
36 				   struct ip_tunnel_info *tun_info)
37 {
38 	struct nf_flow_key *mask = &match->mask;
39 	struct nf_flow_key *key = &match->key;
40 	unsigned long long enc_keys;
41 
42 	if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
43 		return;
44 
45 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
46 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
47 	key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
48 	mask->enc_key_id.keyid = 0xffffffff;
49 	enc_keys = BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
50 		   BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL);
51 
52 	if (ip_tunnel_info_af(tun_info) == AF_INET) {
53 		NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
54 				  enc_ipv4);
55 		key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
56 		key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
57 		if (key->enc_ipv4.src)
58 			mask->enc_ipv4.src = 0xffffffff;
59 		if (key->enc_ipv4.dst)
60 			mask->enc_ipv4.dst = 0xffffffff;
61 		enc_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
62 		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
63 	} else {
64 		memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
65 		       sizeof(struct in6_addr));
66 		memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
67 		       sizeof(struct in6_addr));
68 		if (memcmp(&key->enc_ipv6.src, &in6addr_any,
69 			   sizeof(struct in6_addr)))
70 			memset(&mask->enc_ipv6.src, 0xff,
71 			       sizeof(struct in6_addr));
72 		if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
73 			   sizeof(struct in6_addr)))
74 			memset(&mask->enc_ipv6.dst, 0xff,
75 			       sizeof(struct in6_addr));
76 		enc_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
77 		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
78 	}
79 
80 	match->dissector.used_keys |= enc_keys;
81 }
82 
nf_flow_rule_vlan_match(struct flow_dissector_key_vlan * key,struct flow_dissector_key_vlan * mask,u16 vlan_id,__be16 proto)83 static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
84 				    struct flow_dissector_key_vlan *mask,
85 				    u16 vlan_id, __be16 proto)
86 {
87 	key->vlan_id = vlan_id;
88 	mask->vlan_id = VLAN_VID_MASK;
89 	key->vlan_tpid = proto;
90 	mask->vlan_tpid = 0xffff;
91 }
92 
nf_flow_rule_match(struct nf_flow_match * match,const struct flow_offload_tuple * tuple,struct dst_entry * other_dst)93 static int nf_flow_rule_match(struct nf_flow_match *match,
94 			      const struct flow_offload_tuple *tuple,
95 			      struct dst_entry *other_dst)
96 {
97 	struct nf_flow_key *mask = &match->mask;
98 	struct nf_flow_key *key = &match->key;
99 	struct ip_tunnel_info *tun_info;
100 	bool vlan_encap = false;
101 
102 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
103 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
104 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
105 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
106 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
107 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
108 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
109 
110 	if (other_dst && other_dst->lwtstate) {
111 		tun_info = lwt_tun_info(other_dst->lwtstate);
112 		nf_flow_rule_lwt_match(match, tun_info);
113 	}
114 
115 	if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_TC)
116 		key->meta.ingress_ifindex = tuple->tc.iifidx;
117 	else
118 		key->meta.ingress_ifindex = tuple->iifidx;
119 
120 	mask->meta.ingress_ifindex = 0xffffffff;
121 
122 	if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
123 	    tuple->encap[0].proto == htons(ETH_P_8021Q)) {
124 		NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
125 		nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
126 					tuple->encap[0].id,
127 					tuple->encap[0].proto);
128 		vlan_encap = true;
129 	}
130 
131 	if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
132 	    tuple->encap[1].proto == htons(ETH_P_8021Q)) {
133 		if (vlan_encap) {
134 			NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
135 					  cvlan);
136 			nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
137 						tuple->encap[1].id,
138 						tuple->encap[1].proto);
139 		} else {
140 			NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
141 					  vlan);
142 			nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
143 						tuple->encap[1].id,
144 						tuple->encap[1].proto);
145 		}
146 	}
147 
148 	switch (tuple->l3proto) {
149 	case AF_INET:
150 		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
151 		key->basic.n_proto = htons(ETH_P_IP);
152 		key->ipv4.src = tuple->src_v4.s_addr;
153 		mask->ipv4.src = 0xffffffff;
154 		key->ipv4.dst = tuple->dst_v4.s_addr;
155 		mask->ipv4.dst = 0xffffffff;
156 		break;
157        case AF_INET6:
158 		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
159 		key->basic.n_proto = htons(ETH_P_IPV6);
160 		key->ipv6.src = tuple->src_v6;
161 		memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
162 		key->ipv6.dst = tuple->dst_v6;
163 		memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
164 		break;
165 	default:
166 		return -EOPNOTSUPP;
167 	}
168 	mask->control.addr_type = 0xffff;
169 	match->dissector.used_keys |= BIT_ULL(key->control.addr_type);
170 	mask->basic.n_proto = 0xffff;
171 
172 	switch (tuple->l4proto) {
173 	case IPPROTO_TCP:
174 		key->tcp.flags = 0;
175 		mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
176 		match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_TCP);
177 		break;
178 	case IPPROTO_UDP:
179 	case IPPROTO_GRE:
180 		break;
181 	default:
182 		return -EOPNOTSUPP;
183 	}
184 
185 	key->basic.ip_proto = tuple->l4proto;
186 	mask->basic.ip_proto = 0xff;
187 
188 	match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_META) |
189 				      BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
190 				      BIT_ULL(FLOW_DISSECTOR_KEY_BASIC);
191 
192 	switch (tuple->l4proto) {
193 	case IPPROTO_TCP:
194 	case IPPROTO_UDP:
195 		key->tp.src = tuple->src_port;
196 		mask->tp.src = 0xffff;
197 		key->tp.dst = tuple->dst_port;
198 		mask->tp.dst = 0xffff;
199 
200 		match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_PORTS);
201 		break;
202 	}
203 
204 	return 0;
205 }
206 
flow_offload_mangle(struct flow_action_entry * entry,enum flow_action_mangle_base htype,u32 offset,const __be32 * value,const __be32 * mask)207 static void flow_offload_mangle(struct flow_action_entry *entry,
208 				enum flow_action_mangle_base htype, u32 offset,
209 				const __be32 *value, const __be32 *mask)
210 {
211 	entry->id = FLOW_ACTION_MANGLE;
212 	entry->mangle.htype = htype;
213 	entry->mangle.offset = offset;
214 	memcpy(&entry->mangle.mask, mask, sizeof(u32));
215 	memcpy(&entry->mangle.val, value, sizeof(u32));
216 }
217 
218 static inline struct flow_action_entry *
flow_action_entry_next(struct nf_flow_rule * flow_rule)219 flow_action_entry_next(struct nf_flow_rule *flow_rule)
220 {
221 	int i;
222 
223 	if (unlikely(flow_rule->rule->action.num_entries >= NF_FLOW_RULE_ACTION_MAX))
224 		return NULL;
225 
226 	i = flow_rule->rule->action.num_entries++;
227 
228 	return &flow_rule->rule->action.entries[i];
229 }
230 
flow_offload_eth_src(struct net * net,const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)231 static int flow_offload_eth_src(struct net *net,
232 				const struct flow_offload *flow,
233 				enum flow_offload_tuple_dir dir,
234 				struct nf_flow_rule *flow_rule)
235 {
236 	struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
237 	struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
238 	const struct flow_offload_tuple *other_tuple, *this_tuple;
239 	struct net_device *dev = NULL;
240 	const unsigned char *addr;
241 	u32 mask, val;
242 	u16 val16;
243 
244 	if (!entry0 || !entry1)
245 		return -E2BIG;
246 
247 	this_tuple = &flow->tuplehash[dir].tuple;
248 
249 	switch (this_tuple->xmit_type) {
250 	case FLOW_OFFLOAD_XMIT_DIRECT:
251 		addr = this_tuple->out.h_source;
252 		break;
253 	case FLOW_OFFLOAD_XMIT_NEIGH:
254 		other_tuple = &flow->tuplehash[!dir].tuple;
255 		dev = dev_get_by_index(net, other_tuple->iifidx);
256 		if (!dev)
257 			return -ENOENT;
258 
259 		addr = dev->dev_addr;
260 		break;
261 	default:
262 		return -EOPNOTSUPP;
263 	}
264 
265 	mask = ~0xffff0000;
266 	memcpy(&val16, addr, 2);
267 	val = val16 << 16;
268 	flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
269 			    &val, &mask);
270 
271 	mask = ~0xffffffff;
272 	memcpy(&val, addr + 2, 4);
273 	flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
274 			    &val, &mask);
275 
276 	dev_put(dev);
277 
278 	return 0;
279 }
280 
flow_offload_eth_dst(struct net * net,const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)281 static int flow_offload_eth_dst(struct net *net,
282 				const struct flow_offload *flow,
283 				enum flow_offload_tuple_dir dir,
284 				struct nf_flow_rule *flow_rule)
285 {
286 	struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
287 	struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
288 	const struct flow_offload_tuple *other_tuple, *this_tuple;
289 	const struct dst_entry *dst_cache;
290 	unsigned char ha[ETH_ALEN];
291 	struct neighbour *n;
292 	const void *daddr;
293 	u32 mask, val;
294 	u8 nud_state;
295 	u16 val16;
296 
297 	if (!entry0 || !entry1)
298 		return -E2BIG;
299 
300 	this_tuple = &flow->tuplehash[dir].tuple;
301 
302 	switch (this_tuple->xmit_type) {
303 	case FLOW_OFFLOAD_XMIT_DIRECT:
304 		ether_addr_copy(ha, this_tuple->out.h_dest);
305 		break;
306 	case FLOW_OFFLOAD_XMIT_NEIGH:
307 		other_tuple = &flow->tuplehash[!dir].tuple;
308 		daddr = &other_tuple->src_v4;
309 		dst_cache = this_tuple->dst_cache;
310 		n = dst_neigh_lookup(dst_cache, daddr);
311 		if (!n)
312 			return -ENOENT;
313 
314 		read_lock_bh(&n->lock);
315 		nud_state = n->nud_state;
316 		ether_addr_copy(ha, n->ha);
317 		read_unlock_bh(&n->lock);
318 		neigh_release(n);
319 
320 		if (!(nud_state & NUD_VALID))
321 			return -ENOENT;
322 		break;
323 	default:
324 		return -EOPNOTSUPP;
325 	}
326 
327 	mask = ~0xffffffff;
328 	memcpy(&val, ha, 4);
329 	flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
330 			    &val, &mask);
331 
332 	mask = ~0x0000ffff;
333 	memcpy(&val16, ha + 4, 2);
334 	val = val16;
335 	flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
336 			    &val, &mask);
337 
338 	return 0;
339 }
340 
flow_offload_ipv4_snat(struct net * net,const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)341 static int flow_offload_ipv4_snat(struct net *net,
342 				  const struct flow_offload *flow,
343 				  enum flow_offload_tuple_dir dir,
344 				  struct nf_flow_rule *flow_rule)
345 {
346 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
347 	u32 mask = ~htonl(0xffffffff);
348 	__be32 addr;
349 	u32 offset;
350 
351 	if (!entry)
352 		return -E2BIG;
353 
354 	switch (dir) {
355 	case FLOW_OFFLOAD_DIR_ORIGINAL:
356 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
357 		offset = offsetof(struct iphdr, saddr);
358 		break;
359 	case FLOW_OFFLOAD_DIR_REPLY:
360 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
361 		offset = offsetof(struct iphdr, daddr);
362 		break;
363 	default:
364 		return -EOPNOTSUPP;
365 	}
366 
367 	flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
368 			    &addr, &mask);
369 	return 0;
370 }
371 
flow_offload_ipv4_dnat(struct net * net,const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)372 static int flow_offload_ipv4_dnat(struct net *net,
373 				  const struct flow_offload *flow,
374 				  enum flow_offload_tuple_dir dir,
375 				  struct nf_flow_rule *flow_rule)
376 {
377 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
378 	u32 mask = ~htonl(0xffffffff);
379 	__be32 addr;
380 	u32 offset;
381 
382 	if (!entry)
383 		return -E2BIG;
384 
385 	switch (dir) {
386 	case FLOW_OFFLOAD_DIR_ORIGINAL:
387 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
388 		offset = offsetof(struct iphdr, daddr);
389 		break;
390 	case FLOW_OFFLOAD_DIR_REPLY:
391 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
392 		offset = offsetof(struct iphdr, saddr);
393 		break;
394 	default:
395 		return -EOPNOTSUPP;
396 	}
397 
398 	flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
399 			    &addr, &mask);
400 	return 0;
401 }
402 
flow_offload_ipv6_mangle(struct nf_flow_rule * flow_rule,unsigned int offset,const __be32 * addr,const __be32 * mask)403 static int flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
404 				     unsigned int offset,
405 				     const __be32 *addr, const __be32 *mask)
406 {
407 	struct flow_action_entry *entry;
408 	int i;
409 
410 	for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i++) {
411 		entry = flow_action_entry_next(flow_rule);
412 		if (!entry)
413 			return -E2BIG;
414 
415 		flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
416 				    offset + i * sizeof(u32), &addr[i], mask);
417 	}
418 
419 	return 0;
420 }
421 
flow_offload_ipv6_snat(struct net * net,const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)422 static int flow_offload_ipv6_snat(struct net *net,
423 				  const struct flow_offload *flow,
424 				  enum flow_offload_tuple_dir dir,
425 				  struct nf_flow_rule *flow_rule)
426 {
427 	u32 mask = ~htonl(0xffffffff);
428 	const __be32 *addr;
429 	u32 offset;
430 
431 	switch (dir) {
432 	case FLOW_OFFLOAD_DIR_ORIGINAL:
433 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
434 		offset = offsetof(struct ipv6hdr, saddr);
435 		break;
436 	case FLOW_OFFLOAD_DIR_REPLY:
437 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
438 		offset = offsetof(struct ipv6hdr, daddr);
439 		break;
440 	default:
441 		return -EOPNOTSUPP;
442 	}
443 
444 	return flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
445 }
446 
flow_offload_ipv6_dnat(struct net * net,const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)447 static int flow_offload_ipv6_dnat(struct net *net,
448 				  const struct flow_offload *flow,
449 				  enum flow_offload_tuple_dir dir,
450 				  struct nf_flow_rule *flow_rule)
451 {
452 	u32 mask = ~htonl(0xffffffff);
453 	const __be32 *addr;
454 	u32 offset;
455 
456 	switch (dir) {
457 	case FLOW_OFFLOAD_DIR_ORIGINAL:
458 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
459 		offset = offsetof(struct ipv6hdr, daddr);
460 		break;
461 	case FLOW_OFFLOAD_DIR_REPLY:
462 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
463 		offset = offsetof(struct ipv6hdr, saddr);
464 		break;
465 	default:
466 		return -EOPNOTSUPP;
467 	}
468 
469 	return flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
470 }
471 
flow_offload_l4proto(const struct flow_offload * flow)472 static int flow_offload_l4proto(const struct flow_offload *flow)
473 {
474 	u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
475 	u8 type = 0;
476 
477 	switch (protonum) {
478 	case IPPROTO_TCP:
479 		type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
480 		break;
481 	case IPPROTO_UDP:
482 		type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
483 		break;
484 	default:
485 		break;
486 	}
487 
488 	return type;
489 }
490 
flow_offload_port_snat(struct net * net,const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)491 static int flow_offload_port_snat(struct net *net,
492 				  const struct flow_offload *flow,
493 				  enum flow_offload_tuple_dir dir,
494 				  struct nf_flow_rule *flow_rule)
495 {
496 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
497 	u32 mask, port;
498 	u32 offset;
499 
500 	if (!entry)
501 		return -E2BIG;
502 
503 	switch (dir) {
504 	case FLOW_OFFLOAD_DIR_ORIGINAL:
505 		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
506 		offset = 0; /* offsetof(struct tcphdr, source); */
507 		port = htonl(port << 16);
508 		mask = ~htonl(0xffff0000);
509 		break;
510 	case FLOW_OFFLOAD_DIR_REPLY:
511 		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
512 		offset = 0; /* offsetof(struct tcphdr, dest); */
513 		port = htonl(port);
514 		mask = ~htonl(0xffff);
515 		break;
516 	default:
517 		return -EOPNOTSUPP;
518 	}
519 
520 	flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
521 			    &port, &mask);
522 	return 0;
523 }
524 
flow_offload_port_dnat(struct net * net,const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)525 static int flow_offload_port_dnat(struct net *net,
526 				  const struct flow_offload *flow,
527 				  enum flow_offload_tuple_dir dir,
528 				  struct nf_flow_rule *flow_rule)
529 {
530 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
531 	u32 mask, port;
532 	u32 offset;
533 
534 	if (!entry)
535 		return -E2BIG;
536 
537 	switch (dir) {
538 	case FLOW_OFFLOAD_DIR_ORIGINAL:
539 		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
540 		offset = 0; /* offsetof(struct tcphdr, dest); */
541 		port = htonl(port);
542 		mask = ~htonl(0xffff);
543 		break;
544 	case FLOW_OFFLOAD_DIR_REPLY:
545 		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
546 		offset = 0; /* offsetof(struct tcphdr, source); */
547 		port = htonl(port << 16);
548 		mask = ~htonl(0xffff0000);
549 		break;
550 	default:
551 		return -EOPNOTSUPP;
552 	}
553 
554 	flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
555 			    &port, &mask);
556 	return 0;
557 }
558 
flow_offload_ipv4_checksum(struct net * net,const struct flow_offload * flow,struct nf_flow_rule * flow_rule)559 static int flow_offload_ipv4_checksum(struct net *net,
560 				      const struct flow_offload *flow,
561 				      struct nf_flow_rule *flow_rule)
562 {
563 	u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
564 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
565 
566 	if (!entry)
567 		return -E2BIG;
568 
569 	entry->id = FLOW_ACTION_CSUM;
570 	entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
571 
572 	switch (protonum) {
573 	case IPPROTO_TCP:
574 		entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
575 		break;
576 	case IPPROTO_UDP:
577 		entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
578 		break;
579 	}
580 
581 	return 0;
582 }
583 
flow_offload_redirect(struct net * net,const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)584 static int flow_offload_redirect(struct net *net,
585 				 const struct flow_offload *flow,
586 				 enum flow_offload_tuple_dir dir,
587 				 struct nf_flow_rule *flow_rule)
588 {
589 	const struct flow_offload_tuple *this_tuple, *other_tuple;
590 	struct flow_action_entry *entry;
591 	struct net_device *dev;
592 	int ifindex;
593 
594 	this_tuple = &flow->tuplehash[dir].tuple;
595 	switch (this_tuple->xmit_type) {
596 	case FLOW_OFFLOAD_XMIT_DIRECT:
597 		this_tuple = &flow->tuplehash[dir].tuple;
598 		ifindex = this_tuple->out.ifidx;
599 		break;
600 	case FLOW_OFFLOAD_XMIT_NEIGH:
601 		other_tuple = &flow->tuplehash[!dir].tuple;
602 		ifindex = other_tuple->iifidx;
603 		break;
604 	default:
605 		return -EOPNOTSUPP;
606 	}
607 
608 	dev = dev_get_by_index(net, ifindex);
609 	if (!dev)
610 		return -ENODEV;
611 
612 	entry = flow_action_entry_next(flow_rule);
613 	if (!entry) {
614 		dev_put(dev);
615 		return -E2BIG;
616 	}
617 
618 	entry->id = FLOW_ACTION_REDIRECT;
619 	entry->dev = dev;
620 
621 	return 0;
622 }
623 
flow_offload_encap_tunnel(const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)624 static int flow_offload_encap_tunnel(const struct flow_offload *flow,
625 				     enum flow_offload_tuple_dir dir,
626 				     struct nf_flow_rule *flow_rule)
627 {
628 	const struct flow_offload_tuple *this_tuple;
629 	struct flow_action_entry *entry;
630 	struct dst_entry *dst;
631 
632 	this_tuple = &flow->tuplehash[dir].tuple;
633 	if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
634 		return 0;
635 
636 	dst = this_tuple->dst_cache;
637 	if (dst && dst->lwtstate) {
638 		struct ip_tunnel_info *tun_info;
639 
640 		tun_info = lwt_tun_info(dst->lwtstate);
641 		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
642 			entry = flow_action_entry_next(flow_rule);
643 			if (!entry)
644 				return -E2BIG;
645 			entry->id = FLOW_ACTION_TUNNEL_ENCAP;
646 			entry->tunnel = tun_info;
647 		}
648 	}
649 
650 	return 0;
651 }
652 
flow_offload_decap_tunnel(const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)653 static int flow_offload_decap_tunnel(const struct flow_offload *flow,
654 				     enum flow_offload_tuple_dir dir,
655 				     struct nf_flow_rule *flow_rule)
656 {
657 	const struct flow_offload_tuple *other_tuple;
658 	struct flow_action_entry *entry;
659 	struct dst_entry *dst;
660 
661 	other_tuple = &flow->tuplehash[!dir].tuple;
662 	if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
663 		return 0;
664 
665 	dst = other_tuple->dst_cache;
666 	if (dst && dst->lwtstate) {
667 		struct ip_tunnel_info *tun_info;
668 
669 		tun_info = lwt_tun_info(dst->lwtstate);
670 		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
671 			entry = flow_action_entry_next(flow_rule);
672 			if (!entry)
673 				return -E2BIG;
674 			entry->id = FLOW_ACTION_TUNNEL_DECAP;
675 		}
676 	}
677 
678 	return 0;
679 }
680 
681 static int
nf_flow_rule_route_common(struct net * net,const struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)682 nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
683 			  enum flow_offload_tuple_dir dir,
684 			  struct nf_flow_rule *flow_rule)
685 {
686 	const struct flow_offload_tuple *other_tuple;
687 	const struct flow_offload_tuple *tuple;
688 	int i;
689 
690 	if (flow_offload_decap_tunnel(flow, dir, flow_rule) < 0 ||
691 	    flow_offload_encap_tunnel(flow, dir, flow_rule) < 0)
692 		return -1;
693 
694 	if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
695 	    flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
696 		return -1;
697 
698 	tuple = &flow->tuplehash[dir].tuple;
699 
700 	for (i = 0; i < tuple->encap_num; i++) {
701 		struct flow_action_entry *entry;
702 
703 		if (tuple->in_vlan_ingress & BIT(i))
704 			continue;
705 
706 		if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
707 			entry = flow_action_entry_next(flow_rule);
708 			if (!entry)
709 				return -1;
710 			entry->id = FLOW_ACTION_VLAN_POP;
711 		}
712 	}
713 
714 	other_tuple = &flow->tuplehash[!dir].tuple;
715 
716 	for (i = 0; i < other_tuple->encap_num; i++) {
717 		struct flow_action_entry *entry;
718 
719 		if (other_tuple->in_vlan_ingress & BIT(i))
720 			continue;
721 
722 		entry = flow_action_entry_next(flow_rule);
723 		if (!entry)
724 			return -1;
725 
726 		switch (other_tuple->encap[i].proto) {
727 		case htons(ETH_P_PPP_SES):
728 			entry->id = FLOW_ACTION_PPPOE_PUSH;
729 			entry->pppoe.sid = other_tuple->encap[i].id;
730 			break;
731 		case htons(ETH_P_8021Q):
732 			entry->id = FLOW_ACTION_VLAN_PUSH;
733 			entry->vlan.vid = other_tuple->encap[i].id;
734 			entry->vlan.proto = other_tuple->encap[i].proto;
735 			break;
736 		}
737 	}
738 
739 	return 0;
740 }
741 
nf_flow_rule_route_ipv4(struct net * net,struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)742 int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow,
743 			    enum flow_offload_tuple_dir dir,
744 			    struct nf_flow_rule *flow_rule)
745 {
746 	if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
747 		return -1;
748 
749 	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
750 		if (flow_offload_ipv4_snat(net, flow, dir, flow_rule) < 0 ||
751 		    flow_offload_port_snat(net, flow, dir, flow_rule) < 0)
752 			return -1;
753 	}
754 	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
755 		if (flow_offload_ipv4_dnat(net, flow, dir, flow_rule) < 0 ||
756 		    flow_offload_port_dnat(net, flow, dir, flow_rule) < 0)
757 			return -1;
758 	}
759 	if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
760 	    test_bit(NF_FLOW_DNAT, &flow->flags))
761 		if (flow_offload_ipv4_checksum(net, flow, flow_rule) < 0)
762 			return -1;
763 
764 	if (flow_offload_redirect(net, flow, dir, flow_rule) < 0)
765 		return -1;
766 
767 	return 0;
768 }
769 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
770 
nf_flow_rule_route_ipv6(struct net * net,struct flow_offload * flow,enum flow_offload_tuple_dir dir,struct nf_flow_rule * flow_rule)771 int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
772 			    enum flow_offload_tuple_dir dir,
773 			    struct nf_flow_rule *flow_rule)
774 {
775 	if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
776 		return -1;
777 
778 	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
779 		if (flow_offload_ipv6_snat(net, flow, dir, flow_rule) < 0 ||
780 		    flow_offload_port_snat(net, flow, dir, flow_rule) < 0)
781 			return -1;
782 	}
783 	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
784 		if (flow_offload_ipv6_dnat(net, flow, dir, flow_rule) < 0 ||
785 		    flow_offload_port_dnat(net, flow, dir, flow_rule) < 0)
786 			return -1;
787 	}
788 
789 	if (flow_offload_redirect(net, flow, dir, flow_rule) < 0)
790 		return -1;
791 
792 	return 0;
793 }
794 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
795 
796 static struct nf_flow_rule *
nf_flow_offload_rule_alloc(struct net * net,const struct flow_offload_work * offload,enum flow_offload_tuple_dir dir)797 nf_flow_offload_rule_alloc(struct net *net,
798 			   const struct flow_offload_work *offload,
799 			   enum flow_offload_tuple_dir dir)
800 {
801 	const struct nf_flowtable *flowtable = offload->flowtable;
802 	const struct flow_offload_tuple *tuple, *other_tuple;
803 	struct flow_offload *flow = offload->flow;
804 	struct dst_entry *other_dst = NULL;
805 	struct nf_flow_rule *flow_rule;
806 	int err = -ENOMEM;
807 
808 	flow_rule = kzalloc_obj(*flow_rule);
809 	if (!flow_rule)
810 		goto err_flow;
811 
812 	flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
813 	if (!flow_rule->rule)
814 		goto err_flow_rule;
815 
816 	flow_rule->rule->match.dissector = &flow_rule->match.dissector;
817 	flow_rule->rule->match.mask = &flow_rule->match.mask;
818 	flow_rule->rule->match.key = &flow_rule->match.key;
819 
820 	tuple = &flow->tuplehash[dir].tuple;
821 	other_tuple = &flow->tuplehash[!dir].tuple;
822 	if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
823 		other_dst = other_tuple->dst_cache;
824 
825 	err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
826 	if (err < 0)
827 		goto err_flow_match;
828 
829 	flow_rule->rule->action.num_entries = 0;
830 	if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
831 		goto err_flow_match;
832 
833 	return flow_rule;
834 
835 err_flow_match:
836 	kfree(flow_rule->rule);
837 err_flow_rule:
838 	kfree(flow_rule);
839 err_flow:
840 	return NULL;
841 }
842 
__nf_flow_offload_destroy(struct nf_flow_rule * flow_rule)843 static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
844 {
845 	struct flow_action_entry *entry;
846 	int i;
847 
848 	for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
849 		entry = &flow_rule->rule->action.entries[i];
850 		if (entry->id != FLOW_ACTION_REDIRECT)
851 			continue;
852 
853 		dev_put(entry->dev);
854 	}
855 	kfree(flow_rule->rule);
856 	kfree(flow_rule);
857 }
858 
nf_flow_offload_destroy(struct nf_flow_rule * flow_rule[])859 static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
860 {
861 	int i;
862 
863 	for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
864 		__nf_flow_offload_destroy(flow_rule[i]);
865 }
866 
nf_flow_offload_alloc(const struct flow_offload_work * offload,struct nf_flow_rule * flow_rule[])867 static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
868 				 struct nf_flow_rule *flow_rule[])
869 {
870 	struct net *net = read_pnet(&offload->flowtable->net);
871 
872 	flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
873 						  FLOW_OFFLOAD_DIR_ORIGINAL);
874 	if (!flow_rule[0])
875 		return -ENOMEM;
876 
877 	flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
878 						  FLOW_OFFLOAD_DIR_REPLY);
879 	if (!flow_rule[1]) {
880 		__nf_flow_offload_destroy(flow_rule[0]);
881 		return -ENOMEM;
882 	}
883 
884 	return 0;
885 }
886 
nf_flow_offload_init(struct flow_cls_offload * cls_flow,__be16 proto,int priority,enum flow_cls_command cmd,const struct flow_offload_tuple * tuple,struct netlink_ext_ack * extack)887 static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
888 				 __be16 proto, int priority,
889 				 enum flow_cls_command cmd,
890 				 const struct flow_offload_tuple *tuple,
891 				 struct netlink_ext_ack *extack)
892 {
893 	cls_flow->common.protocol = proto;
894 	cls_flow->common.prio = priority;
895 	cls_flow->common.extack = extack;
896 	cls_flow->command = cmd;
897 	cls_flow->cookie = (unsigned long)tuple;
898 }
899 
nf_flow_offload_tuple(struct nf_flowtable * flowtable,struct flow_offload * flow,struct nf_flow_rule * flow_rule,enum flow_offload_tuple_dir dir,int priority,int cmd,struct flow_stats * stats,struct list_head * block_cb_list)900 static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
901 				 struct flow_offload *flow,
902 				 struct nf_flow_rule *flow_rule,
903 				 enum flow_offload_tuple_dir dir,
904 				 int priority, int cmd,
905 				 struct flow_stats *stats,
906 				 struct list_head *block_cb_list)
907 {
908 	struct flow_cls_offload cls_flow = {};
909 	struct netlink_ext_ack extack = {};
910 	struct flow_block_cb *block_cb;
911 	__be16 proto = ETH_P_ALL;
912 	int err, i = 0;
913 
914 	nf_flow_offload_init(&cls_flow, proto, priority, cmd,
915 			     &flow->tuplehash[dir].tuple, &extack);
916 	if (cmd == FLOW_CLS_REPLACE)
917 		cls_flow.rule = flow_rule->rule;
918 
919 	down_read(&flowtable->flow_block_lock);
920 	list_for_each_entry(block_cb, block_cb_list, list) {
921 		err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
922 				   block_cb->cb_priv);
923 		if (err < 0)
924 			continue;
925 
926 		i++;
927 	}
928 	up_read(&flowtable->flow_block_lock);
929 
930 	if (cmd == FLOW_CLS_STATS)
931 		memcpy(stats, &cls_flow.stats, sizeof(*stats));
932 
933 	return i;
934 }
935 
flow_offload_tuple_add(struct flow_offload_work * offload,struct nf_flow_rule * flow_rule,enum flow_offload_tuple_dir dir)936 static int flow_offload_tuple_add(struct flow_offload_work *offload,
937 				  struct nf_flow_rule *flow_rule,
938 				  enum flow_offload_tuple_dir dir)
939 {
940 	return nf_flow_offload_tuple(offload->flowtable, offload->flow,
941 				     flow_rule, dir,
942 				     offload->flowtable->priority,
943 				     FLOW_CLS_REPLACE, NULL,
944 				     &offload->flowtable->flow_block.cb_list);
945 }
946 
flow_offload_tuple_del(struct flow_offload_work * offload,enum flow_offload_tuple_dir dir)947 static void flow_offload_tuple_del(struct flow_offload_work *offload,
948 				   enum flow_offload_tuple_dir dir)
949 {
950 	nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
951 			      offload->flowtable->priority,
952 			      FLOW_CLS_DESTROY, NULL,
953 			      &offload->flowtable->flow_block.cb_list);
954 }
955 
flow_offload_rule_add(struct flow_offload_work * offload,struct nf_flow_rule * flow_rule[])956 static int flow_offload_rule_add(struct flow_offload_work *offload,
957 				 struct nf_flow_rule *flow_rule[])
958 {
959 	int ok_count = 0;
960 
961 	ok_count += flow_offload_tuple_add(offload, flow_rule[0],
962 					   FLOW_OFFLOAD_DIR_ORIGINAL);
963 	if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
964 		ok_count += flow_offload_tuple_add(offload, flow_rule[1],
965 						   FLOW_OFFLOAD_DIR_REPLY);
966 	if (ok_count == 0)
967 		return -ENOENT;
968 
969 	return 0;
970 }
971 
flow_offload_work_add(struct flow_offload_work * offload)972 static void flow_offload_work_add(struct flow_offload_work *offload)
973 {
974 	struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
975 	int err;
976 
977 	err = nf_flow_offload_alloc(offload, flow_rule);
978 	if (err < 0)
979 		return;
980 
981 	err = flow_offload_rule_add(offload, flow_rule);
982 	if (err < 0)
983 		goto out;
984 
985 	set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
986 
987 out:
988 	nf_flow_offload_destroy(flow_rule);
989 }
990 
flow_offload_work_del(struct flow_offload_work * offload)991 static void flow_offload_work_del(struct flow_offload_work *offload)
992 {
993 	clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
994 	flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
995 	if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
996 		flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
997 	set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
998 }
999 
flow_offload_tuple_stats(struct flow_offload_work * offload,enum flow_offload_tuple_dir dir,struct flow_stats * stats)1000 static void flow_offload_tuple_stats(struct flow_offload_work *offload,
1001 				     enum flow_offload_tuple_dir dir,
1002 				     struct flow_stats *stats)
1003 {
1004 	nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
1005 			      offload->flowtable->priority,
1006 			      FLOW_CLS_STATS, stats,
1007 			      &offload->flowtable->flow_block.cb_list);
1008 }
1009 
flow_offload_work_stats(struct flow_offload_work * offload)1010 static void flow_offload_work_stats(struct flow_offload_work *offload)
1011 {
1012 	struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
1013 	u64 lastused;
1014 
1015 	flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
1016 	if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
1017 		flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY,
1018 					 &stats[1]);
1019 
1020 	lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
1021 	offload->flow->timeout = max_t(u64, offload->flow->timeout,
1022 				       lastused + flow_offload_get_timeout(offload->flow));
1023 
1024 	if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
1025 		if (stats[0].pkts)
1026 			nf_ct_acct_add(offload->flow->ct,
1027 				       FLOW_OFFLOAD_DIR_ORIGINAL,
1028 				       stats[0].pkts, stats[0].bytes);
1029 		if (stats[1].pkts)
1030 			nf_ct_acct_add(offload->flow->ct,
1031 				       FLOW_OFFLOAD_DIR_REPLY,
1032 				       stats[1].pkts, stats[1].bytes);
1033 	}
1034 }
1035 
flow_offload_work_handler(struct work_struct * work)1036 static void flow_offload_work_handler(struct work_struct *work)
1037 {
1038 	struct flow_offload_work *offload;
1039 	struct net *net;
1040 
1041 	offload = container_of(work, struct flow_offload_work, work);
1042 	net = read_pnet(&offload->flowtable->net);
1043 	switch (offload->cmd) {
1044 		case FLOW_CLS_REPLACE:
1045 			flow_offload_work_add(offload);
1046 			NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_add);
1047 			break;
1048 		case FLOW_CLS_DESTROY:
1049 			flow_offload_work_del(offload);
1050 			NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_del);
1051 			break;
1052 		case FLOW_CLS_STATS:
1053 			flow_offload_work_stats(offload);
1054 			NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_stats);
1055 			break;
1056 		default:
1057 			WARN_ON_ONCE(1);
1058 	}
1059 
1060 	clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
1061 	kfree(offload);
1062 }
1063 
flow_offload_queue_work(struct flow_offload_work * offload)1064 static void flow_offload_queue_work(struct flow_offload_work *offload)
1065 {
1066 	struct net *net = read_pnet(&offload->flowtable->net);
1067 
1068 	if (offload->cmd == FLOW_CLS_REPLACE) {
1069 		NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_add);
1070 		queue_work(nf_flow_offload_add_wq, &offload->work);
1071 	} else if (offload->cmd == FLOW_CLS_DESTROY) {
1072 		NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_del);
1073 		queue_work(nf_flow_offload_del_wq, &offload->work);
1074 	} else {
1075 		NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_stats);
1076 		queue_work(nf_flow_offload_stats_wq, &offload->work);
1077 	}
1078 }
1079 
1080 static struct flow_offload_work *
nf_flow_offload_work_alloc(struct nf_flowtable * flowtable,struct flow_offload * flow,unsigned int cmd)1081 nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
1082 			   struct flow_offload *flow, unsigned int cmd)
1083 {
1084 	struct flow_offload_work *offload;
1085 
1086 	if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
1087 		return NULL;
1088 
1089 	offload = kmalloc_obj(struct flow_offload_work, GFP_ATOMIC);
1090 	if (!offload) {
1091 		clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
1092 		return NULL;
1093 	}
1094 
1095 	offload->cmd = cmd;
1096 	offload->flow = flow;
1097 	offload->flowtable = flowtable;
1098 	INIT_WORK(&offload->work, flow_offload_work_handler);
1099 
1100 	return offload;
1101 }
1102 
1103 
nf_flow_offload_add(struct nf_flowtable * flowtable,struct flow_offload * flow)1104 void nf_flow_offload_add(struct nf_flowtable *flowtable,
1105 			 struct flow_offload *flow)
1106 {
1107 	struct flow_offload_work *offload;
1108 
1109 	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
1110 	if (!offload)
1111 		return;
1112 
1113 	flow_offload_queue_work(offload);
1114 }
1115 
nf_flow_offload_del(struct nf_flowtable * flowtable,struct flow_offload * flow)1116 void nf_flow_offload_del(struct nf_flowtable *flowtable,
1117 			 struct flow_offload *flow)
1118 {
1119 	struct flow_offload_work *offload;
1120 
1121 	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
1122 	if (!offload)
1123 		return;
1124 
1125 	set_bit(NF_FLOW_HW_DYING, &flow->flags);
1126 	flow_offload_queue_work(offload);
1127 }
1128 
nf_flow_offload_stats(struct nf_flowtable * flowtable,struct flow_offload * flow)1129 void nf_flow_offload_stats(struct nf_flowtable *flowtable,
1130 			   struct flow_offload *flow)
1131 {
1132 	struct flow_offload_work *offload;
1133 	__s32 delta;
1134 
1135 	delta = nf_flow_timeout_delta(flow->timeout);
1136 	if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
1137 		return;
1138 
1139 	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
1140 	if (!offload)
1141 		return;
1142 
1143 	flow_offload_queue_work(offload);
1144 }
1145 
nf_flow_table_offload_flush_cleanup(struct nf_flowtable * flowtable)1146 void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable)
1147 {
1148 	if (nf_flowtable_hw_offload(flowtable)) {
1149 		flush_workqueue(nf_flow_offload_del_wq);
1150 		nf_flow_table_gc_run(flowtable);
1151 	}
1152 }
1153 
nf_flow_table_offload_flush(struct nf_flowtable * flowtable)1154 void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
1155 {
1156 	if (nf_flowtable_hw_offload(flowtable)) {
1157 		flush_workqueue(nf_flow_offload_add_wq);
1158 		flush_workqueue(nf_flow_offload_del_wq);
1159 		flush_workqueue(nf_flow_offload_stats_wq);
1160 	}
1161 }
1162 
nf_flow_table_block_setup(struct nf_flowtable * flowtable,struct flow_block_offload * bo,enum flow_block_command cmd)1163 static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
1164 				     struct flow_block_offload *bo,
1165 				     enum flow_block_command cmd)
1166 {
1167 	struct flow_block_cb *block_cb, *next;
1168 	int err = 0;
1169 
1170 	down_write(&flowtable->flow_block_lock);
1171 	switch (cmd) {
1172 	case FLOW_BLOCK_BIND:
1173 		list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
1174 		break;
1175 	case FLOW_BLOCK_UNBIND:
1176 		list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
1177 			list_del(&block_cb->list);
1178 			flow_block_cb_free(block_cb);
1179 		}
1180 		break;
1181 	default:
1182 		WARN_ON_ONCE(1);
1183 		err = -EOPNOTSUPP;
1184 	}
1185 	up_write(&flowtable->flow_block_lock);
1186 
1187 	return err;
1188 }
1189 
nf_flow_table_block_offload_init(struct flow_block_offload * bo,struct net * net,enum flow_block_command cmd,struct nf_flowtable * flowtable,struct netlink_ext_ack * extack)1190 static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
1191 					     struct net *net,
1192 					     enum flow_block_command cmd,
1193 					     struct nf_flowtable *flowtable,
1194 					     struct netlink_ext_ack *extack)
1195 {
1196 	memset(bo, 0, sizeof(*bo));
1197 	bo->net		= net;
1198 	bo->block	= &flowtable->flow_block;
1199 	bo->command	= cmd;
1200 	bo->binder_type	= FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
1201 	bo->extack	= extack;
1202 	bo->cb_list_head = &flowtable->flow_block.cb_list;
1203 	INIT_LIST_HEAD(&bo->cb_list);
1204 }
1205 
nf_flow_table_indr_cleanup(struct flow_block_cb * block_cb)1206 static void nf_flow_table_indr_cleanup(struct flow_block_cb *block_cb)
1207 {
1208 	struct nf_flowtable *flowtable = block_cb->indr.data;
1209 	struct net_device *dev = block_cb->indr.dev;
1210 
1211 	nf_flow_table_gc_cleanup(flowtable, dev);
1212 	down_write(&flowtable->flow_block_lock);
1213 	list_del(&block_cb->list);
1214 	list_del(&block_cb->driver_list);
1215 	flow_block_cb_free(block_cb);
1216 	up_write(&flowtable->flow_block_lock);
1217 }
1218 
nf_flow_table_indr_offload_cmd(struct flow_block_offload * bo,struct nf_flowtable * flowtable,struct net_device * dev,enum flow_block_command cmd,struct netlink_ext_ack * extack)1219 static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
1220 					  struct nf_flowtable *flowtable,
1221 					  struct net_device *dev,
1222 					  enum flow_block_command cmd,
1223 					  struct netlink_ext_ack *extack)
1224 {
1225 	nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
1226 					 extack);
1227 
1228 	return flow_indr_dev_setup_offload(dev, NULL, TC_SETUP_FT, flowtable, bo,
1229 					   nf_flow_table_indr_cleanup);
1230 }
1231 
nf_flow_table_offload_cmd(struct flow_block_offload * bo,struct nf_flowtable * flowtable,struct net_device * dev,enum flow_block_command cmd,struct netlink_ext_ack * extack)1232 static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
1233 				     struct nf_flowtable *flowtable,
1234 				     struct net_device *dev,
1235 				     enum flow_block_command cmd,
1236 				     struct netlink_ext_ack *extack)
1237 {
1238 	int err;
1239 
1240 	nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
1241 					 extack);
1242 	down_write(&flowtable->flow_block_lock);
1243 	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
1244 	up_write(&flowtable->flow_block_lock);
1245 	if (err < 0)
1246 		return err;
1247 
1248 	return 0;
1249 }
1250 
nf_flow_table_offload_setup(struct nf_flowtable * flowtable,struct net_device * dev,enum flow_block_command cmd)1251 int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
1252 				struct net_device *dev,
1253 				enum flow_block_command cmd)
1254 {
1255 	struct netlink_ext_ack extack = {};
1256 	struct flow_block_offload bo;
1257 	int err;
1258 
1259 	if (!nf_flowtable_hw_offload(flowtable))
1260 		return nf_flow_offload_xdp_setup(flowtable, dev, cmd);
1261 
1262 	if (dev->netdev_ops->ndo_setup_tc)
1263 		err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
1264 						&extack);
1265 	else
1266 		err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
1267 						     &extack);
1268 	if (err < 0)
1269 		return err;
1270 
1271 	return nf_flow_table_block_setup(flowtable, &bo, cmd);
1272 }
1273 EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
1274 
nf_flow_table_offload_init(void)1275 int nf_flow_table_offload_init(void)
1276 {
1277 	nf_flow_offload_add_wq  = alloc_workqueue("nf_ft_offload_add",
1278 						  WQ_UNBOUND | WQ_SYSFS, 0);
1279 	if (!nf_flow_offload_add_wq)
1280 		return -ENOMEM;
1281 
1282 	nf_flow_offload_del_wq  = alloc_workqueue("nf_ft_offload_del",
1283 						  WQ_UNBOUND | WQ_SYSFS, 0);
1284 	if (!nf_flow_offload_del_wq)
1285 		goto err_del_wq;
1286 
1287 	nf_flow_offload_stats_wq  = alloc_workqueue("nf_ft_offload_stats",
1288 						    WQ_UNBOUND | WQ_SYSFS, 0);
1289 	if (!nf_flow_offload_stats_wq)
1290 		goto err_stats_wq;
1291 
1292 	return 0;
1293 
1294 err_stats_wq:
1295 	destroy_workqueue(nf_flow_offload_del_wq);
1296 err_del_wq:
1297 	destroy_workqueue(nf_flow_offload_add_wq);
1298 	return -ENOMEM;
1299 }
1300 
nf_flow_table_offload_exit(void)1301 void nf_flow_table_offload_exit(void)
1302 {
1303 	destroy_workqueue(nf_flow_offload_add_wq);
1304 	destroy_workqueue(nf_flow_offload_del_wq);
1305 	destroy_workqueue(nf_flow_offload_stats_wq);
1306 }
1307