xref: /linux/net/switchdev/switchdev.c (revision a41034df538968e726c6aad3e5d8b99799d2d0cd)
1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   * net/switchdev/switchdev.c - Switch device API
4   * Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us>
5   * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
6   */
7  
8  #include <linux/kernel.h>
9  #include <linux/types.h>
10  #include <linux/init.h>
11  #include <linux/mutex.h>
12  #include <linux/notifier.h>
13  #include <linux/netdevice.h>
14  #include <linux/etherdevice.h>
15  #include <linux/if_bridge.h>
16  #include <linux/list.h>
17  #include <linux/workqueue.h>
18  #include <linux/if_vlan.h>
19  #include <linux/rtnetlink.h>
20  #include <net/switchdev.h>
21  
22  static LIST_HEAD(deferred);
23  static DEFINE_SPINLOCK(deferred_lock);
24  
25  typedef void switchdev_deferred_func_t(struct net_device *dev,
26  				       const void *data);
27  
28  struct switchdev_deferred_item {
29  	struct list_head list;
30  	struct net_device *dev;
31  	switchdev_deferred_func_t *func;
32  	unsigned long data[];
33  };
34  
35  static struct switchdev_deferred_item *switchdev_deferred_dequeue(void)
36  {
37  	struct switchdev_deferred_item *dfitem;
38  
39  	spin_lock_bh(&deferred_lock);
40  	if (list_empty(&deferred)) {
41  		dfitem = NULL;
42  		goto unlock;
43  	}
44  	dfitem = list_first_entry(&deferred,
45  				  struct switchdev_deferred_item, list);
46  	list_del(&dfitem->list);
47  unlock:
48  	spin_unlock_bh(&deferred_lock);
49  	return dfitem;
50  }
51  
52  /**
53   *	switchdev_deferred_process - Process ops in deferred queue
54   *
55   *	Called to flush the ops currently queued in deferred ops queue.
56   *	rtnl_lock must be held.
57   */
58  void switchdev_deferred_process(void)
59  {
60  	struct switchdev_deferred_item *dfitem;
61  
62  	ASSERT_RTNL();
63  
64  	while ((dfitem = switchdev_deferred_dequeue())) {
65  		dfitem->func(dfitem->dev, dfitem->data);
66  		dev_put(dfitem->dev);
67  		kfree(dfitem);
68  	}
69  }
70  EXPORT_SYMBOL_GPL(switchdev_deferred_process);
71  
72  static void switchdev_deferred_process_work(struct work_struct *work)
73  {
74  	rtnl_lock();
75  	switchdev_deferred_process();
76  	rtnl_unlock();
77  }
78  
79  static DECLARE_WORK(deferred_process_work, switchdev_deferred_process_work);
80  
81  static int switchdev_deferred_enqueue(struct net_device *dev,
82  				      const void *data, size_t data_len,
83  				      switchdev_deferred_func_t *func)
84  {
85  	struct switchdev_deferred_item *dfitem;
86  
87  	dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC);
88  	if (!dfitem)
89  		return -ENOMEM;
90  	dfitem->dev = dev;
91  	dfitem->func = func;
92  	memcpy(dfitem->data, data, data_len);
93  	dev_hold(dev);
94  	spin_lock_bh(&deferred_lock);
95  	list_add_tail(&dfitem->list, &deferred);
96  	spin_unlock_bh(&deferred_lock);
97  	schedule_work(&deferred_process_work);
98  	return 0;
99  }
100  
101  static int switchdev_port_attr_notify(enum switchdev_notifier_type nt,
102  				      struct net_device *dev,
103  				      const struct switchdev_attr *attr,
104  				      struct switchdev_trans *trans)
105  {
106  	int err;
107  	int rc;
108  
109  	struct switchdev_notifier_port_attr_info attr_info = {
110  		.attr = attr,
111  		.trans = trans,
112  		.handled = false,
113  	};
114  
115  	rc = call_switchdev_blocking_notifiers(nt, dev,
116  					       &attr_info.info, NULL);
117  	err = notifier_to_errno(rc);
118  	if (err) {
119  		WARN_ON(!attr_info.handled);
120  		return err;
121  	}
122  
123  	if (!attr_info.handled)
124  		return -EOPNOTSUPP;
125  
126  	return 0;
127  }
128  
129  static int switchdev_port_attr_set_now(struct net_device *dev,
130  				       const struct switchdev_attr *attr)
131  {
132  	struct switchdev_trans trans;
133  	int err;
134  
135  	/* Phase I: prepare for attr set. Driver/device should fail
136  	 * here if there are going to be issues in the commit phase,
137  	 * such as lack of resources or support.  The driver/device
138  	 * should reserve resources needed for the commit phase here,
139  	 * but should not commit the attr.
140  	 */
141  
142  	trans.ph_prepare = true;
143  	err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr,
144  					 &trans);
145  	if (err)
146  		return err;
147  
148  	/* Phase II: commit attr set.  This cannot fail as a fault
149  	 * of driver/device.  If it does, it's a bug in the driver/device
150  	 * because the driver said everythings was OK in phase I.
151  	 */
152  
153  	trans.ph_prepare = false;
154  	err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr,
155  					 &trans);
156  	WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
157  	     dev->name, attr->id);
158  
159  	return err;
160  }
161  
162  static void switchdev_port_attr_set_deferred(struct net_device *dev,
163  					     const void *data)
164  {
165  	const struct switchdev_attr *attr = data;
166  	int err;
167  
168  	err = switchdev_port_attr_set_now(dev, attr);
169  	if (err && err != -EOPNOTSUPP)
170  		netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n",
171  			   err, attr->id);
172  	if (attr->complete)
173  		attr->complete(dev, err, attr->complete_priv);
174  }
175  
176  static int switchdev_port_attr_set_defer(struct net_device *dev,
177  					 const struct switchdev_attr *attr)
178  {
179  	return switchdev_deferred_enqueue(dev, attr, sizeof(*attr),
180  					  switchdev_port_attr_set_deferred);
181  }
182  
183  /**
184   *	switchdev_port_attr_set - Set port attribute
185   *
186   *	@dev: port device
187   *	@attr: attribute to set
188   *
189   *	Use a 2-phase prepare-commit transaction model to ensure
190   *	system is not left in a partially updated state due to
191   *	failure from driver/device.
192   *
193   *	rtnl_lock must be held and must not be in atomic section,
194   *	in case SWITCHDEV_F_DEFER flag is not set.
195   */
196  int switchdev_port_attr_set(struct net_device *dev,
197  			    const struct switchdev_attr *attr)
198  {
199  	if (attr->flags & SWITCHDEV_F_DEFER)
200  		return switchdev_port_attr_set_defer(dev, attr);
201  	ASSERT_RTNL();
202  	return switchdev_port_attr_set_now(dev, attr);
203  }
204  EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
205  
206  static size_t switchdev_obj_size(const struct switchdev_obj *obj)
207  {
208  	switch (obj->id) {
209  	case SWITCHDEV_OBJ_ID_PORT_VLAN:
210  		return sizeof(struct switchdev_obj_port_vlan);
211  	case SWITCHDEV_OBJ_ID_PORT_MDB:
212  		return sizeof(struct switchdev_obj_port_mdb);
213  	case SWITCHDEV_OBJ_ID_HOST_MDB:
214  		return sizeof(struct switchdev_obj_port_mdb);
215  	default:
216  		BUG();
217  	}
218  	return 0;
219  }
220  
221  static int switchdev_port_obj_notify(enum switchdev_notifier_type nt,
222  				     struct net_device *dev,
223  				     const struct switchdev_obj *obj,
224  				     struct switchdev_trans *trans,
225  				     struct netlink_ext_ack *extack)
226  {
227  	int rc;
228  	int err;
229  
230  	struct switchdev_notifier_port_obj_info obj_info = {
231  		.obj = obj,
232  		.trans = trans,
233  		.handled = false,
234  	};
235  
236  	rc = call_switchdev_blocking_notifiers(nt, dev, &obj_info.info, extack);
237  	err = notifier_to_errno(rc);
238  	if (err) {
239  		WARN_ON(!obj_info.handled);
240  		return err;
241  	}
242  	if (!obj_info.handled)
243  		return -EOPNOTSUPP;
244  	return 0;
245  }
246  
247  static int switchdev_port_obj_add_now(struct net_device *dev,
248  				      const struct switchdev_obj *obj,
249  				      struct netlink_ext_ack *extack)
250  {
251  	struct switchdev_trans trans;
252  	int err;
253  
254  	ASSERT_RTNL();
255  
256  	/* Phase I: prepare for obj add. Driver/device should fail
257  	 * here if there are going to be issues in the commit phase,
258  	 * such as lack of resources or support.  The driver/device
259  	 * should reserve resources needed for the commit phase here,
260  	 * but should not commit the obj.
261  	 */
262  
263  	trans.ph_prepare = true;
264  	err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
265  					dev, obj, &trans, extack);
266  	if (err)
267  		return err;
268  
269  	/* Phase II: commit obj add.  This cannot fail as a fault
270  	 * of driver/device.  If it does, it's a bug in the driver/device
271  	 * because the driver said everythings was OK in phase I.
272  	 */
273  
274  	trans.ph_prepare = false;
275  	err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
276  					dev, obj, &trans, extack);
277  	WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
278  
279  	return err;
280  }
281  
282  static void switchdev_port_obj_add_deferred(struct net_device *dev,
283  					    const void *data)
284  {
285  	const struct switchdev_obj *obj = data;
286  	int err;
287  
288  	err = switchdev_port_obj_add_now(dev, obj, NULL);
289  	if (err && err != -EOPNOTSUPP)
290  		netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
291  			   err, obj->id);
292  	if (obj->complete)
293  		obj->complete(dev, err, obj->complete_priv);
294  }
295  
296  static int switchdev_port_obj_add_defer(struct net_device *dev,
297  					const struct switchdev_obj *obj)
298  {
299  	return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
300  					  switchdev_port_obj_add_deferred);
301  }
302  
303  /**
304   *	switchdev_port_obj_add - Add port object
305   *
306   *	@dev: port device
307   *	@id: object ID
308   *	@obj: object to add
309   *
310   *	Use a 2-phase prepare-commit transaction model to ensure
311   *	system is not left in a partially updated state due to
312   *	failure from driver/device.
313   *
314   *	rtnl_lock must be held and must not be in atomic section,
315   *	in case SWITCHDEV_F_DEFER flag is not set.
316   */
317  int switchdev_port_obj_add(struct net_device *dev,
318  			   const struct switchdev_obj *obj,
319  			   struct netlink_ext_ack *extack)
320  {
321  	if (obj->flags & SWITCHDEV_F_DEFER)
322  		return switchdev_port_obj_add_defer(dev, obj);
323  	ASSERT_RTNL();
324  	return switchdev_port_obj_add_now(dev, obj, extack);
325  }
326  EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
327  
328  static int switchdev_port_obj_del_now(struct net_device *dev,
329  				      const struct switchdev_obj *obj)
330  {
331  	return switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_DEL,
332  					 dev, obj, NULL, NULL);
333  }
334  
335  static void switchdev_port_obj_del_deferred(struct net_device *dev,
336  					    const void *data)
337  {
338  	const struct switchdev_obj *obj = data;
339  	int err;
340  
341  	err = switchdev_port_obj_del_now(dev, obj);
342  	if (err && err != -EOPNOTSUPP)
343  		netdev_err(dev, "failed (err=%d) to del object (id=%d)\n",
344  			   err, obj->id);
345  	if (obj->complete)
346  		obj->complete(dev, err, obj->complete_priv);
347  }
348  
349  static int switchdev_port_obj_del_defer(struct net_device *dev,
350  					const struct switchdev_obj *obj)
351  {
352  	return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
353  					  switchdev_port_obj_del_deferred);
354  }
355  
356  /**
357   *	switchdev_port_obj_del - Delete port object
358   *
359   *	@dev: port device
360   *	@id: object ID
361   *	@obj: object to delete
362   *
363   *	rtnl_lock must be held and must not be in atomic section,
364   *	in case SWITCHDEV_F_DEFER flag is not set.
365   */
366  int switchdev_port_obj_del(struct net_device *dev,
367  			   const struct switchdev_obj *obj)
368  {
369  	if (obj->flags & SWITCHDEV_F_DEFER)
370  		return switchdev_port_obj_del_defer(dev, obj);
371  	ASSERT_RTNL();
372  	return switchdev_port_obj_del_now(dev, obj);
373  }
374  EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
375  
376  static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
377  static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
378  
379  /**
380   *	register_switchdev_notifier - Register notifier
381   *	@nb: notifier_block
382   *
383   *	Register switch device notifier.
384   */
385  int register_switchdev_notifier(struct notifier_block *nb)
386  {
387  	return atomic_notifier_chain_register(&switchdev_notif_chain, nb);
388  }
389  EXPORT_SYMBOL_GPL(register_switchdev_notifier);
390  
391  /**
392   *	unregister_switchdev_notifier - Unregister notifier
393   *	@nb: notifier_block
394   *
395   *	Unregister switch device notifier.
396   */
397  int unregister_switchdev_notifier(struct notifier_block *nb)
398  {
399  	return atomic_notifier_chain_unregister(&switchdev_notif_chain, nb);
400  }
401  EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
402  
403  /**
404   *	call_switchdev_notifiers - Call notifiers
405   *	@val: value passed unmodified to notifier function
406   *	@dev: port device
407   *	@info: notifier information data
408   *
409   *	Call all network notifier blocks.
410   */
411  int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
412  			     struct switchdev_notifier_info *info,
413  			     struct netlink_ext_ack *extack)
414  {
415  	info->dev = dev;
416  	info->extack = extack;
417  	return atomic_notifier_call_chain(&switchdev_notif_chain, val, info);
418  }
419  EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
420  
421  int register_switchdev_blocking_notifier(struct notifier_block *nb)
422  {
423  	struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
424  
425  	return blocking_notifier_chain_register(chain, nb);
426  }
427  EXPORT_SYMBOL_GPL(register_switchdev_blocking_notifier);
428  
429  int unregister_switchdev_blocking_notifier(struct notifier_block *nb)
430  {
431  	struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
432  
433  	return blocking_notifier_chain_unregister(chain, nb);
434  }
435  EXPORT_SYMBOL_GPL(unregister_switchdev_blocking_notifier);
436  
437  int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
438  				      struct switchdev_notifier_info *info,
439  				      struct netlink_ext_ack *extack)
440  {
441  	info->dev = dev;
442  	info->extack = extack;
443  	return blocking_notifier_call_chain(&switchdev_blocking_notif_chain,
444  					    val, info);
445  }
446  EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers);
447  
448  static int __switchdev_handle_port_obj_add(struct net_device *dev,
449  			struct switchdev_notifier_port_obj_info *port_obj_info,
450  			bool (*check_cb)(const struct net_device *dev),
451  			int (*add_cb)(struct net_device *dev,
452  				      const struct switchdev_obj *obj,
453  				      struct switchdev_trans *trans,
454  				      struct netlink_ext_ack *extack))
455  {
456  	struct netlink_ext_ack *extack;
457  	struct net_device *lower_dev;
458  	struct list_head *iter;
459  	int err = -EOPNOTSUPP;
460  
461  	extack = switchdev_notifier_info_to_extack(&port_obj_info->info);
462  
463  	if (check_cb(dev)) {
464  		/* This flag is only checked if the return value is success. */
465  		port_obj_info->handled = true;
466  		return add_cb(dev, port_obj_info->obj, port_obj_info->trans,
467  			      extack);
468  	}
469  
470  	/* Switch ports might be stacked under e.g. a LAG. Ignore the
471  	 * unsupported devices, another driver might be able to handle them. But
472  	 * propagate to the callers any hard errors.
473  	 *
474  	 * If the driver does its own bookkeeping of stacked ports, it's not
475  	 * necessary to go through this helper.
476  	 */
477  	netdev_for_each_lower_dev(dev, lower_dev, iter) {
478  		if (netif_is_bridge_master(lower_dev))
479  			continue;
480  
481  		err = __switchdev_handle_port_obj_add(lower_dev, port_obj_info,
482  						      check_cb, add_cb);
483  		if (err && err != -EOPNOTSUPP)
484  			return err;
485  	}
486  
487  	return err;
488  }
489  
490  int switchdev_handle_port_obj_add(struct net_device *dev,
491  			struct switchdev_notifier_port_obj_info *port_obj_info,
492  			bool (*check_cb)(const struct net_device *dev),
493  			int (*add_cb)(struct net_device *dev,
494  				      const struct switchdev_obj *obj,
495  				      struct switchdev_trans *trans,
496  				      struct netlink_ext_ack *extack))
497  {
498  	int err;
499  
500  	err = __switchdev_handle_port_obj_add(dev, port_obj_info, check_cb,
501  					      add_cb);
502  	if (err == -EOPNOTSUPP)
503  		err = 0;
504  	return err;
505  }
506  EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_add);
507  
508  static int __switchdev_handle_port_obj_del(struct net_device *dev,
509  			struct switchdev_notifier_port_obj_info *port_obj_info,
510  			bool (*check_cb)(const struct net_device *dev),
511  			int (*del_cb)(struct net_device *dev,
512  				      const struct switchdev_obj *obj))
513  {
514  	struct net_device *lower_dev;
515  	struct list_head *iter;
516  	int err = -EOPNOTSUPP;
517  
518  	if (check_cb(dev)) {
519  		/* This flag is only checked if the return value is success. */
520  		port_obj_info->handled = true;
521  		return del_cb(dev, port_obj_info->obj);
522  	}
523  
524  	/* Switch ports might be stacked under e.g. a LAG. Ignore the
525  	 * unsupported devices, another driver might be able to handle them. But
526  	 * propagate to the callers any hard errors.
527  	 *
528  	 * If the driver does its own bookkeeping of stacked ports, it's not
529  	 * necessary to go through this helper.
530  	 */
531  	netdev_for_each_lower_dev(dev, lower_dev, iter) {
532  		if (netif_is_bridge_master(lower_dev))
533  			continue;
534  
535  		err = __switchdev_handle_port_obj_del(lower_dev, port_obj_info,
536  						      check_cb, del_cb);
537  		if (err && err != -EOPNOTSUPP)
538  			return err;
539  	}
540  
541  	return err;
542  }
543  
544  int switchdev_handle_port_obj_del(struct net_device *dev,
545  			struct switchdev_notifier_port_obj_info *port_obj_info,
546  			bool (*check_cb)(const struct net_device *dev),
547  			int (*del_cb)(struct net_device *dev,
548  				      const struct switchdev_obj *obj))
549  {
550  	int err;
551  
552  	err = __switchdev_handle_port_obj_del(dev, port_obj_info, check_cb,
553  					      del_cb);
554  	if (err == -EOPNOTSUPP)
555  		err = 0;
556  	return err;
557  }
558  EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_del);
559  
560  static int __switchdev_handle_port_attr_set(struct net_device *dev,
561  			struct switchdev_notifier_port_attr_info *port_attr_info,
562  			bool (*check_cb)(const struct net_device *dev),
563  			int (*set_cb)(struct net_device *dev,
564  				      const struct switchdev_attr *attr,
565  				      struct switchdev_trans *trans))
566  {
567  	struct net_device *lower_dev;
568  	struct list_head *iter;
569  	int err = -EOPNOTSUPP;
570  
571  	if (check_cb(dev)) {
572  		port_attr_info->handled = true;
573  		return set_cb(dev, port_attr_info->attr,
574  			      port_attr_info->trans);
575  	}
576  
577  	/* Switch ports might be stacked under e.g. a LAG. Ignore the
578  	 * unsupported devices, another driver might be able to handle them. But
579  	 * propagate to the callers any hard errors.
580  	 *
581  	 * If the driver does its own bookkeeping of stacked ports, it's not
582  	 * necessary to go through this helper.
583  	 */
584  	netdev_for_each_lower_dev(dev, lower_dev, iter) {
585  		if (netif_is_bridge_master(lower_dev))
586  			continue;
587  
588  		err = __switchdev_handle_port_attr_set(lower_dev, port_attr_info,
589  						       check_cb, set_cb);
590  		if (err && err != -EOPNOTSUPP)
591  			return err;
592  	}
593  
594  	return err;
595  }
596  
597  int switchdev_handle_port_attr_set(struct net_device *dev,
598  			struct switchdev_notifier_port_attr_info *port_attr_info,
599  			bool (*check_cb)(const struct net_device *dev),
600  			int (*set_cb)(struct net_device *dev,
601  				      const struct switchdev_attr *attr,
602  				      struct switchdev_trans *trans))
603  {
604  	int err;
605  
606  	err = __switchdev_handle_port_attr_set(dev, port_attr_info, check_cb,
607  					       set_cb);
608  	if (err == -EOPNOTSUPP)
609  		err = 0;
610  	return err;
611  }
612  EXPORT_SYMBOL_GPL(switchdev_handle_port_attr_set);
613