xref: /linux/net/switchdev/switchdev.c (revision 005438a8eef063495ac059d128eea71b58de50e5)
1 /*
2  * net/switchdev/switchdev.c - Switch device API
3  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
4  * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/types.h>
14 #include <linux/init.h>
15 #include <linux/mutex.h>
16 #include <linux/notifier.h>
17 #include <linux/netdevice.h>
18 #include <linux/if_bridge.h>
19 #include <net/ip_fib.h>
20 #include <net/switchdev.h>
21 
22 /**
23  *	switchdev_port_attr_get - Get port attribute
24  *
25  *	@dev: port device
26  *	@attr: attribute to get
27  */
28 int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
29 {
30 	const struct switchdev_ops *ops = dev->switchdev_ops;
31 	struct net_device *lower_dev;
32 	struct list_head *iter;
33 	struct switchdev_attr first = {
34 		.id = SWITCHDEV_ATTR_UNDEFINED
35 	};
36 	int err = -EOPNOTSUPP;
37 
38 	if (ops && ops->switchdev_port_attr_get)
39 		return ops->switchdev_port_attr_get(dev, attr);
40 
41 	if (attr->flags & SWITCHDEV_F_NO_RECURSE)
42 		return err;
43 
44 	/* Switch device port(s) may be stacked under
45 	 * bond/team/vlan dev, so recurse down to get attr on
46 	 * each port.  Return -ENODATA if attr values don't
47 	 * compare across ports.
48 	 */
49 
50 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
51 		err = switchdev_port_attr_get(lower_dev, attr);
52 		if (err)
53 			break;
54 		if (first.id == SWITCHDEV_ATTR_UNDEFINED)
55 			first = *attr;
56 		else if (memcmp(&first, attr, sizeof(*attr)))
57 			return -ENODATA;
58 	}
59 
60 	return err;
61 }
62 EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
63 
64 static int __switchdev_port_attr_set(struct net_device *dev,
65 				     struct switchdev_attr *attr)
66 {
67 	const struct switchdev_ops *ops = dev->switchdev_ops;
68 	struct net_device *lower_dev;
69 	struct list_head *iter;
70 	int err = -EOPNOTSUPP;
71 
72 	if (ops && ops->switchdev_port_attr_set)
73 		return ops->switchdev_port_attr_set(dev, attr);
74 
75 	if (attr->flags & SWITCHDEV_F_NO_RECURSE)
76 		return err;
77 
78 	/* Switch device port(s) may be stacked under
79 	 * bond/team/vlan dev, so recurse down to set attr on
80 	 * each port.
81 	 */
82 
83 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
84 		err = __switchdev_port_attr_set(lower_dev, attr);
85 		if (err)
86 			break;
87 	}
88 
89 	return err;
90 }
91 
92 struct switchdev_attr_set_work {
93 	struct work_struct work;
94 	struct net_device *dev;
95 	struct switchdev_attr attr;
96 };
97 
98 static void switchdev_port_attr_set_work(struct work_struct *work)
99 {
100 	struct switchdev_attr_set_work *asw =
101 		container_of(work, struct switchdev_attr_set_work, work);
102 	int err;
103 
104 	rtnl_lock();
105 	err = switchdev_port_attr_set(asw->dev, &asw->attr);
106 	if (err && err != -EOPNOTSUPP)
107 		netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n",
108 			   err, asw->attr.id);
109 	rtnl_unlock();
110 
111 	dev_put(asw->dev);
112 	kfree(work);
113 }
114 
115 static int switchdev_port_attr_set_defer(struct net_device *dev,
116 					 struct switchdev_attr *attr)
117 {
118 	struct switchdev_attr_set_work *asw;
119 
120 	asw = kmalloc(sizeof(*asw), GFP_ATOMIC);
121 	if (!asw)
122 		return -ENOMEM;
123 
124 	INIT_WORK(&asw->work, switchdev_port_attr_set_work);
125 
126 	dev_hold(dev);
127 	asw->dev = dev;
128 	memcpy(&asw->attr, attr, sizeof(asw->attr));
129 
130 	schedule_work(&asw->work);
131 
132 	return 0;
133 }
134 
135 /**
136  *	switchdev_port_attr_set - Set port attribute
137  *
138  *	@dev: port device
139  *	@attr: attribute to set
140  *
141  *	Use a 2-phase prepare-commit transaction model to ensure
142  *	system is not left in a partially updated state due to
143  *	failure from driver/device.
144  */
145 int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
146 {
147 	int err;
148 
149 	if (!rtnl_is_locked()) {
150 		/* Running prepare-commit transaction across stacked
151 		 * devices requires nothing moves, so if rtnl_lock is
152 		 * not held, schedule a worker thread to hold rtnl_lock
153 		 * while setting attr.
154 		 */
155 
156 		return switchdev_port_attr_set_defer(dev, attr);
157 	}
158 
159 	/* Phase I: prepare for attr set. Driver/device should fail
160 	 * here if there are going to be issues in the commit phase,
161 	 * such as lack of resources or support.  The driver/device
162 	 * should reserve resources needed for the commit phase here,
163 	 * but should not commit the attr.
164 	 */
165 
166 	attr->trans = SWITCHDEV_TRANS_PREPARE;
167 	err = __switchdev_port_attr_set(dev, attr);
168 	if (err) {
169 		/* Prepare phase failed: abort the transaction.  Any
170 		 * resources reserved in the prepare phase are
171 		 * released.
172 		 */
173 
174 		attr->trans = SWITCHDEV_TRANS_ABORT;
175 		__switchdev_port_attr_set(dev, attr);
176 
177 		return err;
178 	}
179 
180 	/* Phase II: commit attr set.  This cannot fail as a fault
181 	 * of driver/device.  If it does, it's a bug in the driver/device
182 	 * because the driver said everythings was OK in phase I.
183 	 */
184 
185 	attr->trans = SWITCHDEV_TRANS_COMMIT;
186 	err = __switchdev_port_attr_set(dev, attr);
187 	WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
188 	     dev->name, attr->id);
189 
190 	return err;
191 }
192 EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
193 
194 static int __switchdev_port_obj_add(struct net_device *dev,
195 				    struct switchdev_obj *obj)
196 {
197 	const struct switchdev_ops *ops = dev->switchdev_ops;
198 	struct net_device *lower_dev;
199 	struct list_head *iter;
200 	int err = -EOPNOTSUPP;
201 
202 	if (ops && ops->switchdev_port_obj_add)
203 		return ops->switchdev_port_obj_add(dev, obj);
204 
205 	/* Switch device port(s) may be stacked under
206 	 * bond/team/vlan dev, so recurse down to add object on
207 	 * each port.
208 	 */
209 
210 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
211 		err = __switchdev_port_obj_add(lower_dev, obj);
212 		if (err)
213 			break;
214 	}
215 
216 	return err;
217 }
218 
219 /**
220  *	switchdev_port_obj_add - Add port object
221  *
222  *	@dev: port device
223  *	@obj: object to add
224  *
225  *	Use a 2-phase prepare-commit transaction model to ensure
226  *	system is not left in a partially updated state due to
227  *	failure from driver/device.
228  *
229  *	rtnl_lock must be held.
230  */
231 int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
232 {
233 	int err;
234 
235 	ASSERT_RTNL();
236 
237 	/* Phase I: prepare for obj add. Driver/device should fail
238 	 * here if there are going to be issues in the commit phase,
239 	 * such as lack of resources or support.  The driver/device
240 	 * should reserve resources needed for the commit phase here,
241 	 * but should not commit the obj.
242 	 */
243 
244 	obj->trans = SWITCHDEV_TRANS_PREPARE;
245 	err = __switchdev_port_obj_add(dev, obj);
246 	if (err) {
247 		/* Prepare phase failed: abort the transaction.  Any
248 		 * resources reserved in the prepare phase are
249 		 * released.
250 		 */
251 
252 		obj->trans = SWITCHDEV_TRANS_ABORT;
253 		__switchdev_port_obj_add(dev, obj);
254 
255 		return err;
256 	}
257 
258 	/* Phase II: commit obj add.  This cannot fail as a fault
259 	 * of driver/device.  If it does, it's a bug in the driver/device
260 	 * because the driver said everythings was OK in phase I.
261 	 */
262 
263 	obj->trans = SWITCHDEV_TRANS_COMMIT;
264 	err = __switchdev_port_obj_add(dev, obj);
265 	WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
266 
267 	return err;
268 }
269 EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
270 
271 /**
272  *	switchdev_port_obj_del - Delete port object
273  *
274  *	@dev: port device
275  *	@obj: object to delete
276  */
277 int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj)
278 {
279 	const struct switchdev_ops *ops = dev->switchdev_ops;
280 	struct net_device *lower_dev;
281 	struct list_head *iter;
282 	int err = -EOPNOTSUPP;
283 
284 	if (ops && ops->switchdev_port_obj_del)
285 		return ops->switchdev_port_obj_del(dev, obj);
286 
287 	/* Switch device port(s) may be stacked under
288 	 * bond/team/vlan dev, so recurse down to delete object on
289 	 * each port.
290 	 */
291 
292 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
293 		err = switchdev_port_obj_del(lower_dev, obj);
294 		if (err)
295 			break;
296 	}
297 
298 	return err;
299 }
300 EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
301 
302 /**
303  *	switchdev_port_obj_dump - Dump port objects
304  *
305  *	@dev: port device
306  *	@obj: object to dump
307  */
308 int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj)
309 {
310 	const struct switchdev_ops *ops = dev->switchdev_ops;
311 	struct net_device *lower_dev;
312 	struct list_head *iter;
313 	int err = -EOPNOTSUPP;
314 
315 	if (ops && ops->switchdev_port_obj_dump)
316 		return ops->switchdev_port_obj_dump(dev, obj);
317 
318 	/* Switch device port(s) may be stacked under
319 	 * bond/team/vlan dev, so recurse down to dump objects on
320 	 * first port at bottom of stack.
321 	 */
322 
323 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
324 		err = switchdev_port_obj_dump(lower_dev, obj);
325 		break;
326 	}
327 
328 	return err;
329 }
330 EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
331 
332 static DEFINE_MUTEX(switchdev_mutex);
333 static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
334 
335 /**
336  *	register_switchdev_notifier - Register notifier
337  *	@nb: notifier_block
338  *
339  *	Register switch device notifier. This should be used by code
340  *	which needs to monitor events happening in particular device.
341  *	Return values are same as for atomic_notifier_chain_register().
342  */
343 int register_switchdev_notifier(struct notifier_block *nb)
344 {
345 	int err;
346 
347 	mutex_lock(&switchdev_mutex);
348 	err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
349 	mutex_unlock(&switchdev_mutex);
350 	return err;
351 }
352 EXPORT_SYMBOL_GPL(register_switchdev_notifier);
353 
354 /**
355  *	unregister_switchdev_notifier - Unregister notifier
356  *	@nb: notifier_block
357  *
358  *	Unregister switch device notifier.
359  *	Return values are same as for atomic_notifier_chain_unregister().
360  */
361 int unregister_switchdev_notifier(struct notifier_block *nb)
362 {
363 	int err;
364 
365 	mutex_lock(&switchdev_mutex);
366 	err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
367 	mutex_unlock(&switchdev_mutex);
368 	return err;
369 }
370 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
371 
372 /**
373  *	call_switchdev_notifiers - Call notifiers
374  *	@val: value passed unmodified to notifier function
375  *	@dev: port device
376  *	@info: notifier information data
377  *
378  *	Call all network notifier blocks. This should be called by driver
379  *	when it needs to propagate hardware event.
380  *	Return values are same as for atomic_notifier_call_chain().
381  */
382 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
383 			     struct switchdev_notifier_info *info)
384 {
385 	int err;
386 
387 	info->dev = dev;
388 	mutex_lock(&switchdev_mutex);
389 	err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
390 	mutex_unlock(&switchdev_mutex);
391 	return err;
392 }
393 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
394 
395 struct switchdev_vlan_dump {
396 	struct switchdev_obj obj;
397 	struct sk_buff *skb;
398 	u32 filter_mask;
399 	u16 flags;
400 	u16 begin;
401 	u16 end;
402 };
403 
404 static int switchdev_port_vlan_dump_put(struct net_device *dev,
405 					struct switchdev_vlan_dump *dump)
406 {
407 	struct bridge_vlan_info vinfo;
408 
409 	vinfo.flags = dump->flags;
410 
411 	if (dump->begin == 0 && dump->end == 0) {
412 		return 0;
413 	} else if (dump->begin == dump->end) {
414 		vinfo.vid = dump->begin;
415 		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
416 			    sizeof(vinfo), &vinfo))
417 			return -EMSGSIZE;
418 	} else {
419 		vinfo.vid = dump->begin;
420 		vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
421 		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
422 			    sizeof(vinfo), &vinfo))
423 			return -EMSGSIZE;
424 		vinfo.vid = dump->end;
425 		vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
426 		vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END;
427 		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
428 			    sizeof(vinfo), &vinfo))
429 			return -EMSGSIZE;
430 	}
431 
432 	return 0;
433 }
434 
435 static int switchdev_port_vlan_dump_cb(struct net_device *dev,
436 				       struct switchdev_obj *obj)
437 {
438 	struct switchdev_vlan_dump *dump =
439 		container_of(obj, struct switchdev_vlan_dump, obj);
440 	struct switchdev_obj_vlan *vlan = &dump->obj.u.vlan;
441 	int err = 0;
442 
443 	if (vlan->vid_begin > vlan->vid_end)
444 		return -EINVAL;
445 
446 	if (dump->filter_mask & RTEXT_FILTER_BRVLAN) {
447 		dump->flags = vlan->flags;
448 		for (dump->begin = dump->end = vlan->vid_begin;
449 		     dump->begin <= vlan->vid_end;
450 		     dump->begin++, dump->end++) {
451 			err = switchdev_port_vlan_dump_put(dev, dump);
452 			if (err)
453 				return err;
454 		}
455 	} else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) {
456 		if (dump->begin > vlan->vid_begin &&
457 		    dump->begin >= vlan->vid_end) {
458 			if ((dump->begin - 1) == vlan->vid_end &&
459 			    dump->flags == vlan->flags) {
460 				/* prepend */
461 				dump->begin = vlan->vid_begin;
462 			} else {
463 				err = switchdev_port_vlan_dump_put(dev, dump);
464 				dump->flags = vlan->flags;
465 				dump->begin = vlan->vid_begin;
466 				dump->end = vlan->vid_end;
467 			}
468 		} else if (dump->end <= vlan->vid_begin &&
469 		           dump->end < vlan->vid_end) {
470 			if ((dump->end  + 1) == vlan->vid_begin &&
471 			    dump->flags == vlan->flags) {
472 				/* append */
473 				dump->end = vlan->vid_end;
474 			} else {
475 				err = switchdev_port_vlan_dump_put(dev, dump);
476 				dump->flags = vlan->flags;
477 				dump->begin = vlan->vid_begin;
478 				dump->end = vlan->vid_end;
479 			}
480 		} else {
481 			err = -EINVAL;
482 		}
483 	}
484 
485 	return err;
486 }
487 
488 static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
489 				    u32 filter_mask)
490 {
491 	struct switchdev_vlan_dump dump = {
492 		.obj = {
493 			.id = SWITCHDEV_OBJ_PORT_VLAN,
494 			.cb = switchdev_port_vlan_dump_cb,
495 		},
496 		.skb = skb,
497 		.filter_mask = filter_mask,
498 	};
499 	int err = 0;
500 
501 	if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
502 	    (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
503 		err = switchdev_port_obj_dump(dev, &dump.obj);
504 		if (err)
505 			goto err_out;
506 		if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
507 			/* last one */
508 			err = switchdev_port_vlan_dump_put(dev, &dump);
509 	}
510 
511 err_out:
512 	return err == -EOPNOTSUPP ? 0 : err;
513 }
514 
515 /**
516  *	switchdev_port_bridge_getlink - Get bridge port attributes
517  *
518  *	@dev: port device
519  *
520  *	Called for SELF on rtnl_bridge_getlink to get bridge port
521  *	attributes.
522  */
523 int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
524 				  struct net_device *dev, u32 filter_mask,
525 				  int nlflags)
526 {
527 	struct switchdev_attr attr = {
528 		.id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
529 	};
530 	u16 mode = BRIDGE_MODE_UNDEF;
531 	u32 mask = BR_LEARNING | BR_LEARNING_SYNC;
532 	int err;
533 
534 	err = switchdev_port_attr_get(dev, &attr);
535 	if (err && err != -EOPNOTSUPP)
536 		return err;
537 
538 	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode,
539 				       attr.u.brport_flags, mask, nlflags,
540 				       filter_mask, switchdev_port_vlan_fill);
541 }
542 EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink);
543 
544 static int switchdev_port_br_setflag(struct net_device *dev,
545 				     struct nlattr *nlattr,
546 				     unsigned long brport_flag)
547 {
548 	struct switchdev_attr attr = {
549 		.id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
550 	};
551 	u8 flag = nla_get_u8(nlattr);
552 	int err;
553 
554 	err = switchdev_port_attr_get(dev, &attr);
555 	if (err)
556 		return err;
557 
558 	if (flag)
559 		attr.u.brport_flags |= brport_flag;
560 	else
561 		attr.u.brport_flags &= ~brport_flag;
562 
563 	return switchdev_port_attr_set(dev, &attr);
564 }
565 
566 static const struct nla_policy
567 switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = {
568 	[IFLA_BRPORT_STATE]		= { .type = NLA_U8 },
569 	[IFLA_BRPORT_COST]		= { .type = NLA_U32 },
570 	[IFLA_BRPORT_PRIORITY]		= { .type = NLA_U16 },
571 	[IFLA_BRPORT_MODE]		= { .type = NLA_U8 },
572 	[IFLA_BRPORT_GUARD]		= { .type = NLA_U8 },
573 	[IFLA_BRPORT_PROTECT]		= { .type = NLA_U8 },
574 	[IFLA_BRPORT_FAST_LEAVE]	= { .type = NLA_U8 },
575 	[IFLA_BRPORT_LEARNING]		= { .type = NLA_U8 },
576 	[IFLA_BRPORT_LEARNING_SYNC]	= { .type = NLA_U8 },
577 	[IFLA_BRPORT_UNICAST_FLOOD]	= { .type = NLA_U8 },
578 };
579 
580 static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
581 					      struct nlattr *protinfo)
582 {
583 	struct nlattr *attr;
584 	int rem;
585 	int err;
586 
587 	err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX,
588 				  switchdev_port_bridge_policy);
589 	if (err)
590 		return err;
591 
592 	nla_for_each_nested(attr, protinfo, rem) {
593 		switch (nla_type(attr)) {
594 		case IFLA_BRPORT_LEARNING:
595 			err = switchdev_port_br_setflag(dev, attr,
596 							BR_LEARNING);
597 			break;
598 		case IFLA_BRPORT_LEARNING_SYNC:
599 			err = switchdev_port_br_setflag(dev, attr,
600 							BR_LEARNING_SYNC);
601 			break;
602 		default:
603 			err = -EOPNOTSUPP;
604 			break;
605 		}
606 		if (err)
607 			return err;
608 	}
609 
610 	return 0;
611 }
612 
613 static int switchdev_port_br_afspec(struct net_device *dev,
614 				    struct nlattr *afspec,
615 				    int (*f)(struct net_device *dev,
616 					     struct switchdev_obj *obj))
617 {
618 	struct nlattr *attr;
619 	struct bridge_vlan_info *vinfo;
620 	struct switchdev_obj obj = {
621 		.id = SWITCHDEV_OBJ_PORT_VLAN,
622 	};
623 	struct switchdev_obj_vlan *vlan = &obj.u.vlan;
624 	int rem;
625 	int err;
626 
627 	nla_for_each_nested(attr, afspec, rem) {
628 		if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
629 			continue;
630 		if (nla_len(attr) != sizeof(struct bridge_vlan_info))
631 			return -EINVAL;
632 		vinfo = nla_data(attr);
633 		vlan->flags = vinfo->flags;
634 		if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
635 			if (vlan->vid_begin)
636 				return -EINVAL;
637 			vlan->vid_begin = vinfo->vid;
638 		} else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
639 			if (!vlan->vid_begin)
640 				return -EINVAL;
641 			vlan->vid_end = vinfo->vid;
642 			if (vlan->vid_end <= vlan->vid_begin)
643 				return -EINVAL;
644 			err = f(dev, &obj);
645 			if (err)
646 				return err;
647 			memset(vlan, 0, sizeof(*vlan));
648 		} else {
649 			if (vlan->vid_begin)
650 				return -EINVAL;
651 			vlan->vid_begin = vinfo->vid;
652 			vlan->vid_end = vinfo->vid;
653 			err = f(dev, &obj);
654 			if (err)
655 				return err;
656 			memset(vlan, 0, sizeof(*vlan));
657 		}
658 	}
659 
660 	return 0;
661 }
662 
663 /**
664  *	switchdev_port_bridge_setlink - Set bridge port attributes
665  *
666  *	@dev: port device
667  *	@nlh: netlink header
668  *	@flags: netlink flags
669  *
670  *	Called for SELF on rtnl_bridge_setlink to set bridge port
671  *	attributes.
672  */
673 int switchdev_port_bridge_setlink(struct net_device *dev,
674 				  struct nlmsghdr *nlh, u16 flags)
675 {
676 	struct nlattr *protinfo;
677 	struct nlattr *afspec;
678 	int err = 0;
679 
680 	protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
681 				   IFLA_PROTINFO);
682 	if (protinfo) {
683 		err = switchdev_port_br_setlink_protinfo(dev, protinfo);
684 		if (err)
685 			return err;
686 	}
687 
688 	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
689 				 IFLA_AF_SPEC);
690 	if (afspec)
691 		err = switchdev_port_br_afspec(dev, afspec,
692 					       switchdev_port_obj_add);
693 
694 	return err;
695 }
696 EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink);
697 
698 /**
699  *	switchdev_port_bridge_dellink - Set bridge port attributes
700  *
701  *	@dev: port device
702  *	@nlh: netlink header
703  *	@flags: netlink flags
704  *
705  *	Called for SELF on rtnl_bridge_dellink to set bridge port
706  *	attributes.
707  */
708 int switchdev_port_bridge_dellink(struct net_device *dev,
709 				  struct nlmsghdr *nlh, u16 flags)
710 {
711 	struct nlattr *afspec;
712 
713 	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
714 				 IFLA_AF_SPEC);
715 	if (afspec)
716 		return switchdev_port_br_afspec(dev, afspec,
717 						switchdev_port_obj_del);
718 
719 	return 0;
720 }
721 EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink);
722 
723 /**
724  *	switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port
725  *
726  *	@ndmsg: netlink hdr
727  *	@nlattr: netlink attributes
728  *	@dev: port device
729  *	@addr: MAC address to add
730  *	@vid: VLAN to add
731  *
732  *	Add FDB entry to switch device.
733  */
734 int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
735 			   struct net_device *dev, const unsigned char *addr,
736 			   u16 vid, u16 nlm_flags)
737 {
738 	struct switchdev_obj obj = {
739 		.id = SWITCHDEV_OBJ_PORT_FDB,
740 		.u.fdb = {
741 			.addr = addr,
742 			.vid = vid,
743 		},
744 	};
745 
746 	return switchdev_port_obj_add(dev, &obj);
747 }
748 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
749 
750 /**
751  *	switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port
752  *
753  *	@ndmsg: netlink hdr
754  *	@nlattr: netlink attributes
755  *	@dev: port device
756  *	@addr: MAC address to delete
757  *	@vid: VLAN to delete
758  *
759  *	Delete FDB entry from switch device.
760  */
761 int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
762 			   struct net_device *dev, const unsigned char *addr,
763 			   u16 vid)
764 {
765 	struct switchdev_obj obj = {
766 		.id = SWITCHDEV_OBJ_PORT_FDB,
767 		.u.fdb = {
768 			.addr = addr,
769 			.vid = vid,
770 		},
771 	};
772 
773 	return switchdev_port_obj_del(dev, &obj);
774 }
775 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
776 
777 struct switchdev_fdb_dump {
778 	struct switchdev_obj obj;
779 	struct sk_buff *skb;
780 	struct netlink_callback *cb;
781 	int idx;
782 };
783 
784 static int switchdev_port_fdb_dump_cb(struct net_device *dev,
785 				      struct switchdev_obj *obj)
786 {
787 	struct switchdev_fdb_dump *dump =
788 		container_of(obj, struct switchdev_fdb_dump, obj);
789 	u32 portid = NETLINK_CB(dump->cb->skb).portid;
790 	u32 seq = dump->cb->nlh->nlmsg_seq;
791 	struct nlmsghdr *nlh;
792 	struct ndmsg *ndm;
793 
794 	if (dump->idx < dump->cb->args[0])
795 		goto skip;
796 
797 	nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
798 			sizeof(*ndm), NLM_F_MULTI);
799 	if (!nlh)
800 		return -EMSGSIZE;
801 
802 	ndm = nlmsg_data(nlh);
803 	ndm->ndm_family  = AF_BRIDGE;
804 	ndm->ndm_pad1    = 0;
805 	ndm->ndm_pad2    = 0;
806 	ndm->ndm_flags   = NTF_SELF;
807 	ndm->ndm_type    = 0;
808 	ndm->ndm_ifindex = dev->ifindex;
809 	ndm->ndm_state   = NUD_REACHABLE;
810 
811 	if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr))
812 		goto nla_put_failure;
813 
814 	if (obj->u.fdb.vid && nla_put_u16(dump->skb, NDA_VLAN, obj->u.fdb.vid))
815 		goto nla_put_failure;
816 
817 	nlmsg_end(dump->skb, nlh);
818 
819 skip:
820 	dump->idx++;
821 	return 0;
822 
823 nla_put_failure:
824 	nlmsg_cancel(dump->skb, nlh);
825 	return -EMSGSIZE;
826 }
827 
828 /**
829  *	switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries
830  *
831  *	@skb: netlink skb
832  *	@cb: netlink callback
833  *	@dev: port device
834  *	@filter_dev: filter device
835  *	@idx:
836  *
837  *	Delete FDB entry from switch device.
838  */
839 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
840 			    struct net_device *dev,
841 			    struct net_device *filter_dev, int idx)
842 {
843 	struct switchdev_fdb_dump dump = {
844 		.obj = {
845 			.id = SWITCHDEV_OBJ_PORT_FDB,
846 			.cb = switchdev_port_fdb_dump_cb,
847 		},
848 		.skb = skb,
849 		.cb = cb,
850 		.idx = idx,
851 	};
852 	int err;
853 
854 	err = switchdev_port_obj_dump(dev, &dump.obj);
855 	if (err)
856 		return err;
857 
858 	return dump.idx;
859 }
860 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
861 
862 static struct net_device *switchdev_get_lowest_dev(struct net_device *dev)
863 {
864 	const struct switchdev_ops *ops = dev->switchdev_ops;
865 	struct net_device *lower_dev;
866 	struct net_device *port_dev;
867 	struct list_head *iter;
868 
869 	/* Recusively search down until we find a sw port dev.
870 	 * (A sw port dev supports switchdev_port_attr_get).
871 	 */
872 
873 	if (ops && ops->switchdev_port_attr_get)
874 		return dev;
875 
876 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
877 		port_dev = switchdev_get_lowest_dev(lower_dev);
878 		if (port_dev)
879 			return port_dev;
880 	}
881 
882 	return NULL;
883 }
884 
885 static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
886 {
887 	struct switchdev_attr attr = {
888 		.id = SWITCHDEV_ATTR_PORT_PARENT_ID,
889 	};
890 	struct switchdev_attr prev_attr;
891 	struct net_device *dev = NULL;
892 	int nhsel;
893 
894 	/* For this route, all nexthop devs must be on the same switch. */
895 
896 	for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
897 		const struct fib_nh *nh = &fi->fib_nh[nhsel];
898 
899 		if (!nh->nh_dev)
900 			return NULL;
901 
902 		dev = switchdev_get_lowest_dev(nh->nh_dev);
903 		if (!dev)
904 			return NULL;
905 
906 		if (switchdev_port_attr_get(dev, &attr))
907 			return NULL;
908 
909 		if (nhsel > 0) {
910 			if (prev_attr.u.ppid.id_len != attr.u.ppid.id_len)
911 				return NULL;
912 			if (memcmp(prev_attr.u.ppid.id, attr.u.ppid.id,
913 				   attr.u.ppid.id_len))
914 				return NULL;
915 		}
916 
917 		prev_attr = attr;
918 	}
919 
920 	return dev;
921 }
922 
923 /**
924  *	switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry
925  *
926  *	@dst: route's IPv4 destination address
927  *	@dst_len: destination address length (prefix length)
928  *	@fi: route FIB info structure
929  *	@tos: route TOS
930  *	@type: route type
931  *	@nlflags: netlink flags passed in (NLM_F_*)
932  *	@tb_id: route table ID
933  *
934  *	Add/modify switch IPv4 route entry.
935  */
936 int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
937 			   u8 tos, u8 type, u32 nlflags, u32 tb_id)
938 {
939 	struct switchdev_obj fib_obj = {
940 		.id = SWITCHDEV_OBJ_IPV4_FIB,
941 		.u.ipv4_fib = {
942 			.dst = dst,
943 			.dst_len = dst_len,
944 			.fi = fi,
945 			.tos = tos,
946 			.type = type,
947 			.nlflags = nlflags,
948 			.tb_id = tb_id,
949 		},
950 	};
951 	struct net_device *dev;
952 	int err = 0;
953 
954 	/* Don't offload route if using custom ip rules or if
955 	 * IPv4 FIB offloading has been disabled completely.
956 	 */
957 
958 #ifdef CONFIG_IP_MULTIPLE_TABLES
959 	if (fi->fib_net->ipv4.fib_has_custom_rules)
960 		return 0;
961 #endif
962 
963 	if (fi->fib_net->ipv4.fib_offload_disabled)
964 		return 0;
965 
966 	dev = switchdev_get_dev_by_nhs(fi);
967 	if (!dev)
968 		return 0;
969 
970 	err = switchdev_port_obj_add(dev, &fib_obj);
971 	if (!err)
972 		fi->fib_flags |= RTNH_F_OFFLOAD;
973 
974 	return err == -EOPNOTSUPP ? 0 : err;
975 }
976 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add);
977 
978 /**
979  *	switchdev_fib_ipv4_del - Delete IPv4 route entry from switch
980  *
981  *	@dst: route's IPv4 destination address
982  *	@dst_len: destination address length (prefix length)
983  *	@fi: route FIB info structure
984  *	@tos: route TOS
985  *	@type: route type
986  *	@tb_id: route table ID
987  *
988  *	Delete IPv4 route entry from switch device.
989  */
990 int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
991 			   u8 tos, u8 type, u32 tb_id)
992 {
993 	struct switchdev_obj fib_obj = {
994 		.id = SWITCHDEV_OBJ_IPV4_FIB,
995 		.u.ipv4_fib = {
996 			.dst = dst,
997 			.dst_len = dst_len,
998 			.fi = fi,
999 			.tos = tos,
1000 			.type = type,
1001 			.nlflags = 0,
1002 			.tb_id = tb_id,
1003 		},
1004 	};
1005 	struct net_device *dev;
1006 	int err = 0;
1007 
1008 	if (!(fi->fib_flags & RTNH_F_OFFLOAD))
1009 		return 0;
1010 
1011 	dev = switchdev_get_dev_by_nhs(fi);
1012 	if (!dev)
1013 		return 0;
1014 
1015 	err = switchdev_port_obj_del(dev, &fib_obj);
1016 	if (!err)
1017 		fi->fib_flags &= ~RTNH_F_OFFLOAD;
1018 
1019 	return err == -EOPNOTSUPP ? 0 : err;
1020 }
1021 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del);
1022 
1023 /**
1024  *	switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation
1025  *
1026  *	@fi: route FIB info structure
1027  */
1028 void switchdev_fib_ipv4_abort(struct fib_info *fi)
1029 {
1030 	/* There was a problem installing this route to the offload
1031 	 * device.  For now, until we come up with more refined
1032 	 * policy handling, abruptly end IPv4 fib offloading for
1033 	 * for entire net by flushing offload device(s) of all
1034 	 * IPv4 routes, and mark IPv4 fib offloading broken from
1035 	 * this point forward.
1036 	 */
1037 
1038 	fib_flush_external(fi->fib_net);
1039 	fi->fib_net->ipv4.fib_offload_disabled = true;
1040 }
1041 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
1042