xref: /linux/drivers/net/vxlan/vxlan_vnifilter.c (revision 8be4d31cb8aaeea27bde4b7ddb26e28a89062ebf)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *	Vxlan vni filter for collect metadata mode
4  *
5  *	Authors: Roopa Prabhu <roopa@nvidia.com>
6  *
7  */
8 
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/etherdevice.h>
12 #include <linux/rhashtable.h>
13 #include <net/rtnetlink.h>
14 #include <net/net_namespace.h>
15 #include <net/sock.h>
16 #include <net/vxlan.h>
17 
18 #include "vxlan_private.h"
19 
vxlan_vni_cmp(struct rhashtable_compare_arg * arg,const void * ptr)20 static inline int vxlan_vni_cmp(struct rhashtable_compare_arg *arg,
21 				const void *ptr)
22 {
23 	const struct vxlan_vni_node *vnode = ptr;
24 	__be32 vni = *(__be32 *)arg->key;
25 
26 	return vnode->vni != vni;
27 }
28 
29 const struct rhashtable_params vxlan_vni_rht_params = {
30 	.head_offset = offsetof(struct vxlan_vni_node, vnode),
31 	.key_offset = offsetof(struct vxlan_vni_node, vni),
32 	.key_len = sizeof(__be32),
33 	.nelem_hint = 3,
34 	.max_size = VXLAN_N_VID,
35 	.obj_cmpfn = vxlan_vni_cmp,
36 	.automatic_shrinking = true,
37 };
38 
vxlan_vs_add_del_vninode(struct vxlan_dev * vxlan,struct vxlan_vni_node * v,bool del)39 static void vxlan_vs_add_del_vninode(struct vxlan_dev *vxlan,
40 				     struct vxlan_vni_node *v,
41 				     bool del)
42 {
43 	struct vxlan_dev_node *node;
44 	struct vxlan_sock *vs;
45 
46 	ASSERT_RTNL();
47 
48 	if (del) {
49 		if (!hlist_unhashed(&v->hlist4.hlist))
50 			hlist_del_init_rcu(&v->hlist4.hlist);
51 #if IS_ENABLED(CONFIG_IPV6)
52 		if (!hlist_unhashed(&v->hlist6.hlist))
53 			hlist_del_init_rcu(&v->hlist6.hlist);
54 #endif
55 		return;
56 	}
57 
58 #if IS_ENABLED(CONFIG_IPV6)
59 	vs = rtnl_dereference(vxlan->vn6_sock);
60 	if (vs && v) {
61 		node = &v->hlist6;
62 		hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
63 	}
64 #endif
65 	vs = rtnl_dereference(vxlan->vn4_sock);
66 	if (vs && v) {
67 		node = &v->hlist4;
68 		hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
69 	}
70 }
71 
vxlan_vs_add_vnigrp(struct vxlan_dev * vxlan,struct vxlan_sock * vs,bool ipv6)72 void vxlan_vs_add_vnigrp(struct vxlan_dev *vxlan,
73 			 struct vxlan_sock *vs,
74 			 bool ipv6)
75 {
76 	struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
77 	struct vxlan_vni_node *v, *tmp;
78 	struct vxlan_dev_node *node;
79 
80 	ASSERT_RTNL();
81 
82 	if (!vg)
83 		return;
84 
85 	list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
86 #if IS_ENABLED(CONFIG_IPV6)
87 		if (ipv6)
88 			node = &v->hlist6;
89 		else
90 #endif
91 			node = &v->hlist4;
92 		node->vxlan = vxlan;
93 		hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
94 	}
95 }
96 
vxlan_vs_del_vnigrp(struct vxlan_dev * vxlan)97 void vxlan_vs_del_vnigrp(struct vxlan_dev *vxlan)
98 {
99 	struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
100 	struct vxlan_vni_node *v, *tmp;
101 
102 	ASSERT_RTNL();
103 
104 	if (!vg)
105 		return;
106 
107 	list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
108 		hlist_del_init_rcu(&v->hlist4.hlist);
109 #if IS_ENABLED(CONFIG_IPV6)
110 		hlist_del_init_rcu(&v->hlist6.hlist);
111 #endif
112 	}
113 }
114 
vxlan_vnifilter_stats_get(const struct vxlan_vni_node * vninode,struct vxlan_vni_stats * dest)115 static void vxlan_vnifilter_stats_get(const struct vxlan_vni_node *vninode,
116 				      struct vxlan_vni_stats *dest)
117 {
118 	int i;
119 
120 	memset(dest, 0, sizeof(*dest));
121 	for_each_possible_cpu(i) {
122 		struct vxlan_vni_stats_pcpu *pstats;
123 		struct vxlan_vni_stats temp;
124 		unsigned int start;
125 
126 		pstats = per_cpu_ptr(vninode->stats, i);
127 		do {
128 			start = u64_stats_fetch_begin(&pstats->syncp);
129 			memcpy(&temp, &pstats->stats, sizeof(temp));
130 		} while (u64_stats_fetch_retry(&pstats->syncp, start));
131 
132 		dest->rx_packets += temp.rx_packets;
133 		dest->rx_bytes += temp.rx_bytes;
134 		dest->rx_drops += temp.rx_drops;
135 		dest->rx_errors += temp.rx_errors;
136 		dest->tx_packets += temp.tx_packets;
137 		dest->tx_bytes += temp.tx_bytes;
138 		dest->tx_drops += temp.tx_drops;
139 		dest->tx_errors += temp.tx_errors;
140 	}
141 }
142 
vxlan_vnifilter_stats_add(struct vxlan_vni_node * vninode,int type,unsigned int len)143 static void vxlan_vnifilter_stats_add(struct vxlan_vni_node *vninode,
144 				      int type, unsigned int len)
145 {
146 	struct vxlan_vni_stats_pcpu *pstats = this_cpu_ptr(vninode->stats);
147 
148 	u64_stats_update_begin(&pstats->syncp);
149 	switch (type) {
150 	case VXLAN_VNI_STATS_RX:
151 		pstats->stats.rx_bytes += len;
152 		pstats->stats.rx_packets++;
153 		break;
154 	case VXLAN_VNI_STATS_RX_DROPS:
155 		pstats->stats.rx_drops++;
156 		break;
157 	case VXLAN_VNI_STATS_RX_ERRORS:
158 		pstats->stats.rx_errors++;
159 		break;
160 	case VXLAN_VNI_STATS_TX:
161 		pstats->stats.tx_bytes += len;
162 		pstats->stats.tx_packets++;
163 		break;
164 	case VXLAN_VNI_STATS_TX_DROPS:
165 		pstats->stats.tx_drops++;
166 		break;
167 	case VXLAN_VNI_STATS_TX_ERRORS:
168 		pstats->stats.tx_errors++;
169 		break;
170 	}
171 	u64_stats_update_end(&pstats->syncp);
172 }
173 
vxlan_vnifilter_count(struct vxlan_dev * vxlan,__be32 vni,struct vxlan_vni_node * vninode,int type,unsigned int len)174 void vxlan_vnifilter_count(struct vxlan_dev *vxlan, __be32 vni,
175 			   struct vxlan_vni_node *vninode,
176 			   int type, unsigned int len)
177 {
178 	struct vxlan_vni_node *vnode;
179 
180 	if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))
181 		return;
182 
183 	if (vninode) {
184 		vnode = vninode;
185 	} else {
186 		vnode = vxlan_vnifilter_lookup(vxlan, vni);
187 		if (!vnode)
188 			return;
189 	}
190 
191 	vxlan_vnifilter_stats_add(vnode, type, len);
192 }
193 
vnirange(struct vxlan_vni_node * vbegin,struct vxlan_vni_node * vend)194 static u32 vnirange(struct vxlan_vni_node *vbegin,
195 		    struct vxlan_vni_node *vend)
196 {
197 	return (be32_to_cpu(vend->vni) - be32_to_cpu(vbegin->vni));
198 }
199 
vxlan_vnifilter_entry_nlmsg_size(void)200 static size_t vxlan_vnifilter_entry_nlmsg_size(void)
201 {
202 	return NLMSG_ALIGN(sizeof(struct tunnel_msg))
203 		+ nla_total_size(0) /* VXLAN_VNIFILTER_ENTRY */
204 		+ nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_START */
205 		+ nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_END */
206 		+ nla_total_size(sizeof(struct in6_addr));/* VXLAN_VNIFILTER_ENTRY_GROUP{6} */
207 }
208 
__vnifilter_entry_fill_stats(struct sk_buff * skb,const struct vxlan_vni_node * vbegin)209 static int __vnifilter_entry_fill_stats(struct sk_buff *skb,
210 					const struct vxlan_vni_node *vbegin)
211 {
212 	struct vxlan_vni_stats vstats;
213 	struct nlattr *vstats_attr;
214 
215 	vstats_attr = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY_STATS);
216 	if (!vstats_attr)
217 		goto out_stats_err;
218 
219 	vxlan_vnifilter_stats_get(vbegin, &vstats);
220 	if (nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_BYTES,
221 			      vstats.rx_bytes, VNIFILTER_ENTRY_STATS_PAD) ||
222 	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_PKTS,
223 			      vstats.rx_packets, VNIFILTER_ENTRY_STATS_PAD) ||
224 	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_DROPS,
225 			      vstats.rx_drops, VNIFILTER_ENTRY_STATS_PAD) ||
226 	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_ERRORS,
227 			      vstats.rx_errors, VNIFILTER_ENTRY_STATS_PAD) ||
228 	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_BYTES,
229 			      vstats.tx_bytes, VNIFILTER_ENTRY_STATS_PAD) ||
230 	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_PKTS,
231 			      vstats.tx_packets, VNIFILTER_ENTRY_STATS_PAD) ||
232 	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_DROPS,
233 			      vstats.tx_drops, VNIFILTER_ENTRY_STATS_PAD) ||
234 	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_ERRORS,
235 			      vstats.tx_errors, VNIFILTER_ENTRY_STATS_PAD))
236 		goto out_stats_err;
237 
238 	nla_nest_end(skb, vstats_attr);
239 
240 	return 0;
241 
242 out_stats_err:
243 	nla_nest_cancel(skb, vstats_attr);
244 	return -EMSGSIZE;
245 }
246 
vxlan_fill_vni_filter_entry(struct sk_buff * skb,struct vxlan_vni_node * vbegin,struct vxlan_vni_node * vend,bool fill_stats)247 static bool vxlan_fill_vni_filter_entry(struct sk_buff *skb,
248 					struct vxlan_vni_node *vbegin,
249 					struct vxlan_vni_node *vend,
250 					bool fill_stats)
251 {
252 	struct nlattr *ventry;
253 	u32 vs = be32_to_cpu(vbegin->vni);
254 	u32 ve = 0;
255 
256 	if (vbegin != vend)
257 		ve = be32_to_cpu(vend->vni);
258 
259 	ventry = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY);
260 	if (!ventry)
261 		return false;
262 
263 	if (nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_START, vs))
264 		goto out_err;
265 
266 	if (ve && nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_END, ve))
267 		goto out_err;
268 
269 	if (!vxlan_addr_any(&vbegin->remote_ip)) {
270 		if (vbegin->remote_ip.sa.sa_family == AF_INET) {
271 			if (nla_put_in_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP,
272 					    vbegin->remote_ip.sin.sin_addr.s_addr))
273 				goto out_err;
274 #if IS_ENABLED(CONFIG_IPV6)
275 		} else {
276 			if (nla_put_in6_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP6,
277 					     &vbegin->remote_ip.sin6.sin6_addr))
278 				goto out_err;
279 #endif
280 		}
281 	}
282 
283 	if (fill_stats && __vnifilter_entry_fill_stats(skb, vbegin))
284 		goto out_err;
285 
286 	nla_nest_end(skb, ventry);
287 
288 	return true;
289 
290 out_err:
291 	nla_nest_cancel(skb, ventry);
292 
293 	return false;
294 }
295 
vxlan_vnifilter_notify(const struct vxlan_dev * vxlan,struct vxlan_vni_node * vninode,int cmd)296 static void vxlan_vnifilter_notify(const struct vxlan_dev *vxlan,
297 				   struct vxlan_vni_node *vninode, int cmd)
298 {
299 	struct tunnel_msg *tmsg;
300 	struct sk_buff *skb;
301 	struct nlmsghdr *nlh;
302 	struct net *net = dev_net(vxlan->dev);
303 	int err = -ENOBUFS;
304 
305 	skb = nlmsg_new(vxlan_vnifilter_entry_nlmsg_size(), GFP_KERNEL);
306 	if (!skb)
307 		goto out_err;
308 
309 	err = -EMSGSIZE;
310 	nlh = nlmsg_put(skb, 0, 0, cmd, sizeof(*tmsg), 0);
311 	if (!nlh)
312 		goto out_err;
313 	tmsg = nlmsg_data(nlh);
314 	memset(tmsg, 0, sizeof(*tmsg));
315 	tmsg->family = AF_BRIDGE;
316 	tmsg->ifindex = vxlan->dev->ifindex;
317 
318 	if (!vxlan_fill_vni_filter_entry(skb, vninode, vninode, false))
319 		goto out_err;
320 
321 	nlmsg_end(skb, nlh);
322 	rtnl_notify(skb, net, 0, RTNLGRP_TUNNEL, NULL, GFP_KERNEL);
323 
324 	return;
325 
326 out_err:
327 	rtnl_set_sk_err(net, RTNLGRP_TUNNEL, err);
328 
329 	kfree_skb(skb);
330 }
331 
vxlan_vnifilter_dump_dev(const struct net_device * dev,struct sk_buff * skb,struct netlink_callback * cb)332 static int vxlan_vnifilter_dump_dev(const struct net_device *dev,
333 				    struct sk_buff *skb,
334 				    struct netlink_callback *cb)
335 {
336 	struct vxlan_vni_node *tmp, *v, *vbegin = NULL, *vend = NULL;
337 	struct vxlan_dev *vxlan = netdev_priv(dev);
338 	struct tunnel_msg *new_tmsg, *tmsg;
339 	int idx = 0, s_idx = cb->args[1];
340 	struct vxlan_vni_group *vg;
341 	struct nlmsghdr *nlh;
342 	bool dump_stats;
343 	int err = 0;
344 
345 	if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))
346 		return -EINVAL;
347 
348 	/* RCU needed because of the vni locking rules (rcu || rtnl) */
349 	vg = rcu_dereference(vxlan->vnigrp);
350 	if (!vg || !vg->num_vnis)
351 		return 0;
352 
353 	tmsg = nlmsg_data(cb->nlh);
354 	dump_stats = !!(tmsg->flags & TUNNEL_MSG_FLAG_STATS);
355 
356 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
357 			RTM_NEWTUNNEL, sizeof(*new_tmsg), NLM_F_MULTI);
358 	if (!nlh)
359 		return -EMSGSIZE;
360 	new_tmsg = nlmsg_data(nlh);
361 	memset(new_tmsg, 0, sizeof(*new_tmsg));
362 	new_tmsg->family = PF_BRIDGE;
363 	new_tmsg->ifindex = dev->ifindex;
364 
365 	list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
366 		if (idx < s_idx) {
367 			idx++;
368 			continue;
369 		}
370 		if (!vbegin) {
371 			vbegin = v;
372 			vend = v;
373 			continue;
374 		}
375 		if (!dump_stats && vnirange(vend, v) == 1 &&
376 		    vxlan_addr_equal(&v->remote_ip, &vend->remote_ip)) {
377 			goto update_end;
378 		} else {
379 			if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend,
380 							 dump_stats)) {
381 				err = -EMSGSIZE;
382 				break;
383 			}
384 			idx += vnirange(vbegin, vend) + 1;
385 			vbegin = v;
386 		}
387 update_end:
388 		vend = v;
389 	}
390 
391 	if (!err && vbegin) {
392 		if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend, dump_stats))
393 			err = -EMSGSIZE;
394 	}
395 
396 	cb->args[1] = err ? idx : 0;
397 
398 	nlmsg_end(skb, nlh);
399 
400 	return err;
401 }
402 
vxlan_vnifilter_dump(struct sk_buff * skb,struct netlink_callback * cb)403 static int vxlan_vnifilter_dump(struct sk_buff *skb, struct netlink_callback *cb)
404 {
405 	int idx = 0, err = 0, s_idx = cb->args[0];
406 	struct net *net = sock_net(skb->sk);
407 	struct tunnel_msg *tmsg;
408 	struct net_device *dev;
409 
410 	tmsg = nlmsg_payload(cb->nlh, sizeof(*tmsg));
411 	if (!tmsg) {
412 		NL_SET_ERR_MSG(cb->extack, "Invalid msg length");
413 		return -EINVAL;
414 	}
415 
416 	if (tmsg->flags & ~TUNNEL_MSG_VALID_USER_FLAGS) {
417 		NL_SET_ERR_MSG(cb->extack, "Invalid tunnelmsg flags in ancillary header");
418 		return -EINVAL;
419 	}
420 
421 	rcu_read_lock();
422 	if (tmsg->ifindex) {
423 		dev = dev_get_by_index_rcu(net, tmsg->ifindex);
424 		if (!dev) {
425 			err = -ENODEV;
426 			goto out_err;
427 		}
428 		if (!netif_is_vxlan(dev)) {
429 			NL_SET_ERR_MSG(cb->extack,
430 				       "The device is not a vxlan device");
431 			err = -EINVAL;
432 			goto out_err;
433 		}
434 		err = vxlan_vnifilter_dump_dev(dev, skb, cb);
435 		/* if the dump completed without an error we return 0 here */
436 		if (err != -EMSGSIZE)
437 			goto out_err;
438 	} else {
439 		for_each_netdev_rcu(net, dev) {
440 			if (!netif_is_vxlan(dev))
441 				continue;
442 			if (idx < s_idx)
443 				goto skip;
444 			err = vxlan_vnifilter_dump_dev(dev, skb, cb);
445 			if (err == -EMSGSIZE)
446 				break;
447 skip:
448 			idx++;
449 		}
450 	}
451 	cb->args[0] = idx;
452 	rcu_read_unlock();
453 
454 	return skb->len;
455 
456 out_err:
457 	rcu_read_unlock();
458 
459 	return err;
460 }
461 
462 static const struct nla_policy vni_filter_entry_policy[VXLAN_VNIFILTER_ENTRY_MAX + 1] = {
463 	[VXLAN_VNIFILTER_ENTRY_START] = { .type = NLA_U32 },
464 	[VXLAN_VNIFILTER_ENTRY_END] = { .type = NLA_U32 },
465 	[VXLAN_VNIFILTER_ENTRY_GROUP]	= { .type = NLA_BINARY,
466 					    .len = sizeof_field(struct iphdr, daddr) },
467 	[VXLAN_VNIFILTER_ENTRY_GROUP6]	= { .type = NLA_BINARY,
468 					    .len = sizeof(struct in6_addr) },
469 };
470 
471 static const struct nla_policy vni_filter_policy[VXLAN_VNIFILTER_MAX + 1] = {
472 	[VXLAN_VNIFILTER_ENTRY] = { .type = NLA_NESTED },
473 };
474 
vxlan_update_default_fdb_entry(struct vxlan_dev * vxlan,__be32 vni,union vxlan_addr * old_remote_ip,union vxlan_addr * remote_ip,struct netlink_ext_ack * extack)475 static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni,
476 					  union vxlan_addr *old_remote_ip,
477 					  union vxlan_addr *remote_ip,
478 					  struct netlink_ext_ack *extack)
479 {
480 	struct vxlan_rdst *dst = &vxlan->default_dst;
481 	int err = 0;
482 
483 	spin_lock_bh(&vxlan->hash_lock);
484 	if (remote_ip && !vxlan_addr_any(remote_ip)) {
485 		err = vxlan_fdb_update(vxlan, all_zeros_mac,
486 				       remote_ip,
487 				       NUD_REACHABLE | NUD_PERMANENT,
488 				       NLM_F_APPEND | NLM_F_CREATE,
489 				       vxlan->cfg.dst_port,
490 				       vni,
491 				       vni,
492 				       dst->remote_ifindex,
493 				       NTF_SELF, 0, true, extack);
494 		if (err) {
495 			spin_unlock_bh(&vxlan->hash_lock);
496 			return err;
497 		}
498 	}
499 
500 	if (old_remote_ip && !vxlan_addr_any(old_remote_ip)) {
501 		__vxlan_fdb_delete(vxlan, all_zeros_mac,
502 				   *old_remote_ip,
503 				   vxlan->cfg.dst_port,
504 				   vni, vni,
505 				   dst->remote_ifindex,
506 				   true);
507 	}
508 	spin_unlock_bh(&vxlan->hash_lock);
509 
510 	return err;
511 }
512 
vxlan_vni_update_group(struct vxlan_dev * vxlan,struct vxlan_vni_node * vninode,union vxlan_addr * group,bool create,bool * changed,struct netlink_ext_ack * extack)513 static int vxlan_vni_update_group(struct vxlan_dev *vxlan,
514 				  struct vxlan_vni_node *vninode,
515 				  union vxlan_addr *group,
516 				  bool create, bool *changed,
517 				  struct netlink_ext_ack *extack)
518 {
519 	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
520 	struct vxlan_rdst *dst = &vxlan->default_dst;
521 	union vxlan_addr *newrip = NULL, *oldrip = NULL;
522 	union vxlan_addr old_remote_ip;
523 	int ret = 0;
524 
525 	memcpy(&old_remote_ip, &vninode->remote_ip, sizeof(old_remote_ip));
526 
527 	/* if per vni remote ip is not present use vxlan dev
528 	 * default dst remote ip for fdb entry
529 	 */
530 	if (group && !vxlan_addr_any(group)) {
531 		newrip = group;
532 	} else {
533 		if (!vxlan_addr_any(&dst->remote_ip))
534 			newrip = &dst->remote_ip;
535 	}
536 
537 	/* if old rip exists, and no newrip,
538 	 * explicitly delete old rip
539 	 */
540 	if (!newrip && !vxlan_addr_any(&old_remote_ip))
541 		oldrip = &old_remote_ip;
542 
543 	if (!newrip && !oldrip)
544 		return 0;
545 
546 	if (!create && oldrip && newrip && vxlan_addr_equal(oldrip, newrip))
547 		return 0;
548 
549 	ret = vxlan_update_default_fdb_entry(vxlan, vninode->vni,
550 					     oldrip, newrip,
551 					     extack);
552 	if (ret)
553 		goto out;
554 
555 	if (group)
556 		memcpy(&vninode->remote_ip, group, sizeof(vninode->remote_ip));
557 
558 	if (vxlan->dev->flags & IFF_UP) {
559 		if (vxlan_addr_multicast(&old_remote_ip) &&
560 		    !vxlan_group_used(vn, vxlan, vninode->vni,
561 				      &old_remote_ip,
562 				      vxlan->default_dst.remote_ifindex)) {
563 			ret = vxlan_igmp_leave(vxlan, &old_remote_ip,
564 					       0);
565 			if (ret)
566 				goto out;
567 		}
568 
569 		if (vxlan_addr_multicast(&vninode->remote_ip)) {
570 			ret = vxlan_igmp_join(vxlan, &vninode->remote_ip, 0);
571 			if (ret == -EADDRINUSE)
572 				ret = 0;
573 			if (ret)
574 				goto out;
575 		}
576 	}
577 
578 	*changed = true;
579 
580 	return 0;
581 out:
582 	return ret;
583 }
584 
vxlan_vnilist_update_group(struct vxlan_dev * vxlan,union vxlan_addr * old_remote_ip,union vxlan_addr * new_remote_ip,struct netlink_ext_ack * extack)585 int vxlan_vnilist_update_group(struct vxlan_dev *vxlan,
586 			       union vxlan_addr *old_remote_ip,
587 			       union vxlan_addr *new_remote_ip,
588 			       struct netlink_ext_ack *extack)
589 {
590 	struct list_head *headp, *hpos;
591 	struct vxlan_vni_group *vg;
592 	struct vxlan_vni_node *vent;
593 	int ret;
594 
595 	vg = rtnl_dereference(vxlan->vnigrp);
596 
597 	headp = &vg->vni_list;
598 	list_for_each_prev(hpos, headp) {
599 		vent = list_entry(hpos, struct vxlan_vni_node, vlist);
600 		if (vxlan_addr_any(&vent->remote_ip)) {
601 			ret = vxlan_update_default_fdb_entry(vxlan, vent->vni,
602 							     old_remote_ip,
603 							     new_remote_ip,
604 							     extack);
605 			if (ret)
606 				return ret;
607 		}
608 	}
609 
610 	return 0;
611 }
612 
vxlan_vni_delete_group(struct vxlan_dev * vxlan,struct vxlan_vni_node * vninode)613 static void vxlan_vni_delete_group(struct vxlan_dev *vxlan,
614 				   struct vxlan_vni_node *vninode)
615 {
616 	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
617 	struct vxlan_rdst *dst = &vxlan->default_dst;
618 
619 	/* if per vni remote_ip not present, delete the
620 	 * default dst remote_ip previously added for this vni
621 	 */
622 	if (!vxlan_addr_any(&vninode->remote_ip) ||
623 	    !vxlan_addr_any(&dst->remote_ip)) {
624 		spin_lock_bh(&vxlan->hash_lock);
625 		__vxlan_fdb_delete(vxlan, all_zeros_mac,
626 				   (vxlan_addr_any(&vninode->remote_ip) ?
627 				   dst->remote_ip : vninode->remote_ip),
628 				   vxlan->cfg.dst_port,
629 				   vninode->vni, vninode->vni,
630 				   dst->remote_ifindex,
631 				   true);
632 		spin_unlock_bh(&vxlan->hash_lock);
633 	}
634 
635 	if (vxlan->dev->flags & IFF_UP) {
636 		if (vxlan_addr_multicast(&vninode->remote_ip) &&
637 		    !vxlan_group_used(vn, vxlan, vninode->vni,
638 				      &vninode->remote_ip,
639 				      dst->remote_ifindex)) {
640 			vxlan_igmp_leave(vxlan, &vninode->remote_ip, 0);
641 		}
642 	}
643 }
644 
vxlan_vni_update(struct vxlan_dev * vxlan,struct vxlan_vni_group * vg,__be32 vni,union vxlan_addr * group,bool * changed,struct netlink_ext_ack * extack)645 static int vxlan_vni_update(struct vxlan_dev *vxlan,
646 			    struct vxlan_vni_group *vg,
647 			    __be32 vni, union vxlan_addr *group,
648 			    bool *changed,
649 			    struct netlink_ext_ack *extack)
650 {
651 	struct vxlan_vni_node *vninode;
652 	int ret;
653 
654 	vninode = rhashtable_lookup_fast(&vg->vni_hash, &vni,
655 					 vxlan_vni_rht_params);
656 	if (!vninode)
657 		return 0;
658 
659 	ret = vxlan_vni_update_group(vxlan, vninode, group, false, changed,
660 				     extack);
661 	if (ret)
662 		return ret;
663 
664 	if (changed)
665 		vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL);
666 
667 	return 0;
668 }
669 
__vxlan_vni_add_list(struct vxlan_vni_group * vg,struct vxlan_vni_node * v)670 static void __vxlan_vni_add_list(struct vxlan_vni_group *vg,
671 				 struct vxlan_vni_node *v)
672 {
673 	struct list_head *headp, *hpos;
674 	struct vxlan_vni_node *vent;
675 
676 	headp = &vg->vni_list;
677 	list_for_each_prev(hpos, headp) {
678 		vent = list_entry(hpos, struct vxlan_vni_node, vlist);
679 		if (be32_to_cpu(v->vni) < be32_to_cpu(vent->vni))
680 			continue;
681 		else
682 			break;
683 	}
684 	list_add_rcu(&v->vlist, hpos);
685 	vg->num_vnis++;
686 }
687 
__vxlan_vni_del_list(struct vxlan_vni_group * vg,struct vxlan_vni_node * v)688 static void __vxlan_vni_del_list(struct vxlan_vni_group *vg,
689 				 struct vxlan_vni_node *v)
690 {
691 	list_del_rcu(&v->vlist);
692 	vg->num_vnis--;
693 }
694 
vxlan_vni_alloc(struct vxlan_dev * vxlan,__be32 vni)695 static struct vxlan_vni_node *vxlan_vni_alloc(struct vxlan_dev *vxlan,
696 					      __be32 vni)
697 {
698 	struct vxlan_vni_node *vninode;
699 
700 	vninode = kzalloc(sizeof(*vninode), GFP_KERNEL);
701 	if (!vninode)
702 		return NULL;
703 	vninode->stats = netdev_alloc_pcpu_stats(struct vxlan_vni_stats_pcpu);
704 	if (!vninode->stats) {
705 		kfree(vninode);
706 		return NULL;
707 	}
708 	vninode->vni = vni;
709 	vninode->hlist4.vxlan = vxlan;
710 #if IS_ENABLED(CONFIG_IPV6)
711 	vninode->hlist6.vxlan = vxlan;
712 #endif
713 
714 	return vninode;
715 }
716 
vxlan_vni_free(struct vxlan_vni_node * vninode)717 static void vxlan_vni_free(struct vxlan_vni_node *vninode)
718 {
719 	free_percpu(vninode->stats);
720 	kfree(vninode);
721 }
722 
vxlan_vni_add(struct vxlan_dev * vxlan,struct vxlan_vni_group * vg,u32 vni,union vxlan_addr * group,struct netlink_ext_ack * extack)723 static int vxlan_vni_add(struct vxlan_dev *vxlan,
724 			 struct vxlan_vni_group *vg,
725 			 u32 vni, union vxlan_addr *group,
726 			 struct netlink_ext_ack *extack)
727 {
728 	struct vxlan_vni_node *vninode;
729 	__be32 v = cpu_to_be32(vni);
730 	bool changed = false;
731 	int err = 0;
732 
733 	if (vxlan_vnifilter_lookup(vxlan, v))
734 		return vxlan_vni_update(vxlan, vg, v, group, &changed, extack);
735 
736 	err = vxlan_vni_in_use(vxlan->net, vxlan, &vxlan->cfg, v);
737 	if (err) {
738 		NL_SET_ERR_MSG(extack, "VNI in use");
739 		return err;
740 	}
741 
742 	vninode = vxlan_vni_alloc(vxlan, v);
743 	if (!vninode)
744 		return -ENOMEM;
745 
746 	err = rhashtable_lookup_insert_fast(&vg->vni_hash,
747 					    &vninode->vnode,
748 					    vxlan_vni_rht_params);
749 	if (err) {
750 		vxlan_vni_free(vninode);
751 		return err;
752 	}
753 
754 	__vxlan_vni_add_list(vg, vninode);
755 
756 	if (vxlan->dev->flags & IFF_UP)
757 		vxlan_vs_add_del_vninode(vxlan, vninode, false);
758 
759 	err = vxlan_vni_update_group(vxlan, vninode, group, true, &changed,
760 				     extack);
761 
762 	if (changed)
763 		vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL);
764 
765 	return err;
766 }
767 
vxlan_vni_node_rcu_free(struct rcu_head * rcu)768 static void vxlan_vni_node_rcu_free(struct rcu_head *rcu)
769 {
770 	struct vxlan_vni_node *v;
771 
772 	v = container_of(rcu, struct vxlan_vni_node, rcu);
773 	vxlan_vni_free(v);
774 }
775 
vxlan_vni_del(struct vxlan_dev * vxlan,struct vxlan_vni_group * vg,u32 vni,struct netlink_ext_ack * extack)776 static int vxlan_vni_del(struct vxlan_dev *vxlan,
777 			 struct vxlan_vni_group *vg,
778 			 u32 vni, struct netlink_ext_ack *extack)
779 {
780 	struct vxlan_vni_node *vninode;
781 	__be32 v = cpu_to_be32(vni);
782 	int err = 0;
783 
784 	vg = rtnl_dereference(vxlan->vnigrp);
785 
786 	vninode = rhashtable_lookup_fast(&vg->vni_hash, &v,
787 					 vxlan_vni_rht_params);
788 	if (!vninode) {
789 		err = -ENOENT;
790 		goto out;
791 	}
792 
793 	vxlan_vni_delete_group(vxlan, vninode);
794 
795 	err = rhashtable_remove_fast(&vg->vni_hash,
796 				     &vninode->vnode,
797 				     vxlan_vni_rht_params);
798 	if (err)
799 		goto out;
800 
801 	__vxlan_vni_del_list(vg, vninode);
802 
803 	vxlan_vnifilter_notify(vxlan, vninode, RTM_DELTUNNEL);
804 
805 	if (vxlan->dev->flags & IFF_UP)
806 		vxlan_vs_add_del_vninode(vxlan, vninode, true);
807 
808 	call_rcu(&vninode->rcu, vxlan_vni_node_rcu_free);
809 
810 	return 0;
811 out:
812 	return err;
813 }
814 
vxlan_vni_add_del(struct vxlan_dev * vxlan,__u32 start_vni,__u32 end_vni,union vxlan_addr * group,int cmd,struct netlink_ext_ack * extack)815 static int vxlan_vni_add_del(struct vxlan_dev *vxlan, __u32 start_vni,
816 			     __u32 end_vni, union vxlan_addr *group,
817 			     int cmd, struct netlink_ext_ack *extack)
818 {
819 	struct vxlan_vni_group *vg;
820 	int v, err = 0;
821 
822 	vg = rtnl_dereference(vxlan->vnigrp);
823 
824 	for (v = start_vni; v <= end_vni; v++) {
825 		switch (cmd) {
826 		case RTM_NEWTUNNEL:
827 			err = vxlan_vni_add(vxlan, vg, v, group, extack);
828 			break;
829 		case RTM_DELTUNNEL:
830 			err = vxlan_vni_del(vxlan, vg, v, extack);
831 			break;
832 		default:
833 			err = -EOPNOTSUPP;
834 			break;
835 		}
836 		if (err)
837 			goto out;
838 	}
839 
840 	return 0;
841 out:
842 	return err;
843 }
844 
vxlan_process_vni_filter(struct vxlan_dev * vxlan,struct nlattr * nlvnifilter,int cmd,struct netlink_ext_ack * extack)845 static int vxlan_process_vni_filter(struct vxlan_dev *vxlan,
846 				    struct nlattr *nlvnifilter,
847 				    int cmd, struct netlink_ext_ack *extack)
848 {
849 	struct nlattr *vattrs[VXLAN_VNIFILTER_ENTRY_MAX + 1];
850 	u32 vni_start = 0, vni_end = 0;
851 	union vxlan_addr group;
852 	int err;
853 
854 	err = nla_parse_nested(vattrs,
855 			       VXLAN_VNIFILTER_ENTRY_MAX,
856 			       nlvnifilter, vni_filter_entry_policy,
857 			       extack);
858 	if (err)
859 		return err;
860 
861 	if (vattrs[VXLAN_VNIFILTER_ENTRY_START]) {
862 		vni_start = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_START]);
863 		vni_end = vni_start;
864 	}
865 
866 	if (vattrs[VXLAN_VNIFILTER_ENTRY_END])
867 		vni_end = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_END]);
868 
869 	if (!vni_start && !vni_end) {
870 		NL_SET_ERR_MSG_ATTR(extack, nlvnifilter,
871 				    "vni start nor end found in vni entry");
872 		return -EINVAL;
873 	}
874 
875 	if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]) {
876 		group.sin.sin_addr.s_addr =
877 			nla_get_in_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]);
878 		group.sa.sa_family = AF_INET;
879 	} else if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]) {
880 		group.sin6.sin6_addr =
881 			nla_get_in6_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]);
882 		group.sa.sa_family = AF_INET6;
883 	} else {
884 		memset(&group, 0, sizeof(group));
885 	}
886 
887 	if (vxlan_addr_multicast(&group) && !vxlan->default_dst.remote_ifindex) {
888 		NL_SET_ERR_MSG(extack,
889 			       "Local interface required for multicast remote group");
890 
891 		return -EINVAL;
892 	}
893 
894 	err = vxlan_vni_add_del(vxlan, vni_start, vni_end, &group, cmd,
895 				extack);
896 	if (err)
897 		return err;
898 
899 	return 0;
900 }
901 
vxlan_vnigroup_uninit(struct vxlan_dev * vxlan)902 void vxlan_vnigroup_uninit(struct vxlan_dev *vxlan)
903 {
904 	struct vxlan_vni_node *v, *tmp;
905 	struct vxlan_vni_group *vg;
906 
907 	vg = rtnl_dereference(vxlan->vnigrp);
908 	list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
909 		rhashtable_remove_fast(&vg->vni_hash, &v->vnode,
910 				       vxlan_vni_rht_params);
911 		hlist_del_init_rcu(&v->hlist4.hlist);
912 #if IS_ENABLED(CONFIG_IPV6)
913 		hlist_del_init_rcu(&v->hlist6.hlist);
914 #endif
915 		__vxlan_vni_del_list(vg, v);
916 		vxlan_vnifilter_notify(vxlan, v, RTM_DELTUNNEL);
917 		call_rcu(&v->rcu, vxlan_vni_node_rcu_free);
918 	}
919 	rhashtable_destroy(&vg->vni_hash);
920 	kfree(vg);
921 }
922 
vxlan_vnigroup_init(struct vxlan_dev * vxlan)923 int vxlan_vnigroup_init(struct vxlan_dev *vxlan)
924 {
925 	struct vxlan_vni_group *vg;
926 	int ret;
927 
928 	vg = kzalloc(sizeof(*vg), GFP_KERNEL);
929 	if (!vg)
930 		return -ENOMEM;
931 	ret = rhashtable_init(&vg->vni_hash, &vxlan_vni_rht_params);
932 	if (ret) {
933 		kfree(vg);
934 		return ret;
935 	}
936 	INIT_LIST_HEAD(&vg->vni_list);
937 	rcu_assign_pointer(vxlan->vnigrp, vg);
938 
939 	return 0;
940 }
941 
vxlan_vnifilter_process(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)942 static int vxlan_vnifilter_process(struct sk_buff *skb, struct nlmsghdr *nlh,
943 				   struct netlink_ext_ack *extack)
944 {
945 	struct net *net = sock_net(skb->sk);
946 	struct tunnel_msg *tmsg;
947 	struct vxlan_dev *vxlan;
948 	struct net_device *dev;
949 	struct nlattr *attr;
950 	int err, vnis = 0;
951 	int rem;
952 
953 	/* this should validate the header and check for remaining bytes */
954 	err = nlmsg_parse(nlh, sizeof(*tmsg), NULL, VXLAN_VNIFILTER_MAX,
955 			  vni_filter_policy, extack);
956 	if (err < 0)
957 		return err;
958 
959 	tmsg = nlmsg_data(nlh);
960 	dev = __dev_get_by_index(net, tmsg->ifindex);
961 	if (!dev)
962 		return -ENODEV;
963 
964 	if (!netif_is_vxlan(dev)) {
965 		NL_SET_ERR_MSG_MOD(extack, "The device is not a vxlan device");
966 		return -EINVAL;
967 	}
968 
969 	vxlan = netdev_priv(dev);
970 
971 	if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))
972 		return -EOPNOTSUPP;
973 
974 	nlmsg_for_each_attr_type(attr, VXLAN_VNIFILTER_ENTRY, nlh,
975 				 sizeof(*tmsg), rem) {
976 		err = vxlan_process_vni_filter(vxlan, attr, nlh->nlmsg_type,
977 					       extack);
978 		vnis++;
979 		if (err)
980 			break;
981 	}
982 
983 	if (!vnis) {
984 		NL_SET_ERR_MSG_MOD(extack, "No vnis found to process");
985 		err = -EINVAL;
986 	}
987 
988 	return err;
989 }
990 
991 static const struct rtnl_msg_handler vxlan_vnifilter_rtnl_msg_handlers[] = {
992 	{THIS_MODULE, PF_BRIDGE, RTM_GETTUNNEL, NULL, vxlan_vnifilter_dump, 0},
993 	{THIS_MODULE, PF_BRIDGE, RTM_NEWTUNNEL, vxlan_vnifilter_process, NULL, 0},
994 	{THIS_MODULE, PF_BRIDGE, RTM_DELTUNNEL, vxlan_vnifilter_process, NULL, 0},
995 };
996 
vxlan_vnifilter_init(void)997 int vxlan_vnifilter_init(void)
998 {
999 	return rtnl_register_many(vxlan_vnifilter_rtnl_msg_handlers);
1000 }
1001 
vxlan_vnifilter_uninit(void)1002 void vxlan_vnifilter_uninit(void)
1003 {
1004 	rtnl_unregister_many(vxlan_vnifilter_rtnl_msg_handlers);
1005 }
1006