xref: /linux/drivers/infiniband/core/device.c (revision 3a39d672e7f48b8d6b91a09afa4b55352773b4b5)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
32a1d9b7fSRoland Dreier  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  * This software is available to you under a choice of one of two
61da177e4SLinus Torvalds  * licenses.  You may choose to be licensed under the terms of the GNU
71da177e4SLinus Torvalds  * General Public License (GPL) Version 2, available from the file
81da177e4SLinus Torvalds  * COPYING in the main directory of this source tree, or the
91da177e4SLinus Torvalds  * OpenIB.org BSD license below:
101da177e4SLinus Torvalds  *
111da177e4SLinus Torvalds  *     Redistribution and use in source and binary forms, with or
121da177e4SLinus Torvalds  *     without modification, are permitted provided that the following
131da177e4SLinus Torvalds  *     conditions are met:
141da177e4SLinus Torvalds  *
151da177e4SLinus Torvalds  *      - Redistributions of source code must retain the above
161da177e4SLinus Torvalds  *        copyright notice, this list of conditions and the following
171da177e4SLinus Torvalds  *        disclaimer.
181da177e4SLinus Torvalds  *
191da177e4SLinus Torvalds  *      - Redistributions in binary form must reproduce the above
201da177e4SLinus Torvalds  *        copyright notice, this list of conditions and the following
211da177e4SLinus Torvalds  *        disclaimer in the documentation and/or other materials
221da177e4SLinus Torvalds  *        provided with the distribution.
231da177e4SLinus Torvalds  *
241da177e4SLinus Torvalds  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
251da177e4SLinus Torvalds  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
261da177e4SLinus Torvalds  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
271da177e4SLinus Torvalds  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
281da177e4SLinus Torvalds  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
291da177e4SLinus Torvalds  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
301da177e4SLinus Torvalds  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
311da177e4SLinus Torvalds  * SOFTWARE.
321da177e4SLinus Torvalds  */
331da177e4SLinus Torvalds 
341da177e4SLinus Torvalds #include <linux/module.h>
351da177e4SLinus Torvalds #include <linux/string.h>
361da177e4SLinus Torvalds #include <linux/errno.h>
379a6b090cSAhmed S. Darwish #include <linux/kernel.h>
381da177e4SLinus Torvalds #include <linux/slab.h>
391da177e4SLinus Torvalds #include <linux/init.h>
409268f72dSYotam Kenneth #include <linux/netdevice.h>
414e0f7b90SParav Pandit #include <net/net_namespace.h>
428f408ab6SDaniel Jurgens #include <linux/security.h>
438f408ab6SDaniel Jurgens #include <linux/notifier.h>
44324e227eSJason Gunthorpe #include <linux/hashtable.h>
45b2cbae2cSRoland Dreier #include <rdma/rdma_netlink.h>
4603db3a2dSMatan Barak #include <rdma/ib_addr.h>
4703db3a2dSMatan Barak #include <rdma/ib_cache.h>
48413d3347SMark Zhang #include <rdma/rdma_counter.h>
491da177e4SLinus Torvalds 
501da177e4SLinus Torvalds #include "core_priv.h"
5141eda65cSLeon Romanovsky #include "restrack.h"
521da177e4SLinus Torvalds 
531da177e4SLinus Torvalds MODULE_AUTHOR("Roland Dreier");
541da177e4SLinus Torvalds MODULE_DESCRIPTION("core kernel InfiniBand API");
551da177e4SLinus Torvalds MODULE_LICENSE("Dual BSD/GPL");
561da177e4SLinus Torvalds 
5714d3a3b2SChristoph Hellwig struct workqueue_struct *ib_comp_wq;
58f794809aSJack Morgenstein struct workqueue_struct *ib_comp_unbound_wq;
59f0626710STejun Heo struct workqueue_struct *ib_wq;
60f0626710STejun Heo EXPORT_SYMBOL_GPL(ib_wq);
61ff815a89STetsuo Handa static struct workqueue_struct *ib_unreg_wq;
62f0626710STejun Heo 
630df91bb6SJason Gunthorpe /*
64921eab11SJason Gunthorpe  * Each of the three rwsem locks (devices, clients, client_data) protects the
65921eab11SJason Gunthorpe  * xarray of the same name. Specifically it allows the caller to assert that
66921eab11SJason Gunthorpe  * the MARK will/will not be changing under the lock, and for devices and
67921eab11SJason Gunthorpe  * clients, that the value in the xarray is still a valid pointer. Change of
68921eab11SJason Gunthorpe  * the MARK is linked to the object state, so holding the lock and testing the
69921eab11SJason Gunthorpe  * MARK also asserts that the contained object is in a certain state.
70921eab11SJason Gunthorpe  *
71921eab11SJason Gunthorpe  * This is used to build a two stage register/unregister flow where objects
72921eab11SJason Gunthorpe  * can continue to be in the xarray even though they are still in progress to
73921eab11SJason Gunthorpe  * register/unregister.
74921eab11SJason Gunthorpe  *
75921eab11SJason Gunthorpe  * The xarray itself provides additional locking, and restartable iteration,
76921eab11SJason Gunthorpe  * which is also relied on.
77921eab11SJason Gunthorpe  *
78921eab11SJason Gunthorpe  * Locks should not be nested, with the exception of client_data, which is
79921eab11SJason Gunthorpe  * allowed to nest under the read side of the other two locks.
80921eab11SJason Gunthorpe  *
81921eab11SJason Gunthorpe  * The devices_rwsem also protects the device name list, any change or
82921eab11SJason Gunthorpe  * assignment of device name must also hold the write side to guarantee unique
83921eab11SJason Gunthorpe  * names.
84921eab11SJason Gunthorpe  */
85921eab11SJason Gunthorpe 
86921eab11SJason Gunthorpe /*
870df91bb6SJason Gunthorpe  * devices contains devices that have had their names assigned. The
880df91bb6SJason Gunthorpe  * devices may not be registered. Users that care about the registration
890df91bb6SJason Gunthorpe  * status need to call ib_device_try_get() on the device to ensure it is
900df91bb6SJason Gunthorpe  * registered, and keep it registered, for the required duration.
910df91bb6SJason Gunthorpe  *
920df91bb6SJason Gunthorpe  */
930df91bb6SJason Gunthorpe static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC);
94921eab11SJason Gunthorpe static DECLARE_RWSEM(devices_rwsem);
950df91bb6SJason Gunthorpe #define DEVICE_REGISTERED XA_MARK_1
960df91bb6SJason Gunthorpe 
979cd58817SJason Gunthorpe static u32 highest_client_id;
98e59178d8SJason Gunthorpe #define CLIENT_REGISTERED XA_MARK_1
99e59178d8SJason Gunthorpe static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC);
100921eab11SJason Gunthorpe static DECLARE_RWSEM(clients_rwsem);
1011da177e4SLinus Torvalds 
ib_client_put(struct ib_client * client)102621e55ffSJason Gunthorpe static void ib_client_put(struct ib_client *client)
103621e55ffSJason Gunthorpe {
104621e55ffSJason Gunthorpe 	if (refcount_dec_and_test(&client->uses))
105621e55ffSJason Gunthorpe 		complete(&client->uses_zero);
106621e55ffSJason Gunthorpe }
107621e55ffSJason Gunthorpe 
1081da177e4SLinus Torvalds /*
1090df91bb6SJason Gunthorpe  * If client_data is registered then the corresponding client must also still
1100df91bb6SJason Gunthorpe  * be registered.
1110df91bb6SJason Gunthorpe  */
1120df91bb6SJason Gunthorpe #define CLIENT_DATA_REGISTERED XA_MARK_1
1134e0f7b90SParav Pandit 
1141d2fedd8SParav Pandit unsigned int rdma_dev_net_id;
1154e0f7b90SParav Pandit 
1164e0f7b90SParav Pandit /*
1174e0f7b90SParav Pandit  * A list of net namespaces is maintained in an xarray. This is necessary
1184e0f7b90SParav Pandit  * because we can't get the locking right using the existing net ns list. We
1194e0f7b90SParav Pandit  * would require a init_net callback after the list is updated.
1204e0f7b90SParav Pandit  */
1214e0f7b90SParav Pandit static DEFINE_XARRAY_FLAGS(rdma_nets, XA_FLAGS_ALLOC);
1224e0f7b90SParav Pandit /*
1234e0f7b90SParav Pandit  * rwsem to protect accessing the rdma_nets xarray entries.
1244e0f7b90SParav Pandit  */
1254e0f7b90SParav Pandit static DECLARE_RWSEM(rdma_nets_rwsem);
1264e0f7b90SParav Pandit 
127cb7e0e13SParav Pandit bool ib_devices_shared_netns = true;
128a56bc45bSParav Pandit module_param_named(netns_mode, ib_devices_shared_netns, bool, 0444);
129a56bc45bSParav Pandit MODULE_PARM_DESC(netns_mode,
130a56bc45bSParav Pandit 		 "Share device among net namespaces; default=1 (shared)");
13141c61401SParav Pandit /**
132d6537c1aSrd.dunlab@gmail.com  * rdma_dev_access_netns() - Return whether an rdma device can be accessed
13341c61401SParav Pandit  *			     from a specified net namespace or not.
134d6537c1aSrd.dunlab@gmail.com  * @dev:	Pointer to rdma device which needs to be checked
13541c61401SParav Pandit  * @net:	Pointer to net namesapce for which access to be checked
13641c61401SParav Pandit  *
137d6537c1aSrd.dunlab@gmail.com  * When the rdma device is in shared mode, it ignores the net namespace.
138d6537c1aSrd.dunlab@gmail.com  * When the rdma device is exclusive to a net namespace, rdma device net
139d6537c1aSrd.dunlab@gmail.com  * namespace is checked against the specified one.
14041c61401SParav Pandit  */
rdma_dev_access_netns(const struct ib_device * dev,const struct net * net)14141c61401SParav Pandit bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net)
14241c61401SParav Pandit {
14341c61401SParav Pandit 	return (ib_devices_shared_netns ||
14441c61401SParav Pandit 		net_eq(read_pnet(&dev->coredev.rdma_net), net));
14541c61401SParav Pandit }
14641c61401SParav Pandit EXPORT_SYMBOL(rdma_dev_access_netns);
14741c61401SParav Pandit 
1480df91bb6SJason Gunthorpe /*
1490df91bb6SJason Gunthorpe  * xarray has this behavior where it won't iterate over NULL values stored in
1500df91bb6SJason Gunthorpe  * allocated arrays.  So we need our own iterator to see all values stored in
1510df91bb6SJason Gunthorpe  * the array. This does the same thing as xa_for_each except that it also
1520df91bb6SJason Gunthorpe  * returns NULL valued entries if the array is allocating. Simplified to only
1530df91bb6SJason Gunthorpe  * work on simple xarrays.
1540df91bb6SJason Gunthorpe  */
xan_find_marked(struct xarray * xa,unsigned long * indexp,xa_mark_t filter)1550df91bb6SJason Gunthorpe static void *xan_find_marked(struct xarray *xa, unsigned long *indexp,
1560df91bb6SJason Gunthorpe 			     xa_mark_t filter)
1570df91bb6SJason Gunthorpe {
1580df91bb6SJason Gunthorpe 	XA_STATE(xas, xa, *indexp);
1590df91bb6SJason Gunthorpe 	void *entry;
1600df91bb6SJason Gunthorpe 
1610df91bb6SJason Gunthorpe 	rcu_read_lock();
1620df91bb6SJason Gunthorpe 	do {
1630df91bb6SJason Gunthorpe 		entry = xas_find_marked(&xas, ULONG_MAX, filter);
1640df91bb6SJason Gunthorpe 		if (xa_is_zero(entry))
1650df91bb6SJason Gunthorpe 			break;
1660df91bb6SJason Gunthorpe 	} while (xas_retry(&xas, entry));
1670df91bb6SJason Gunthorpe 	rcu_read_unlock();
1680df91bb6SJason Gunthorpe 
1690df91bb6SJason Gunthorpe 	if (entry) {
1700df91bb6SJason Gunthorpe 		*indexp = xas.xa_index;
1710df91bb6SJason Gunthorpe 		if (xa_is_zero(entry))
1720df91bb6SJason Gunthorpe 			return NULL;
1730df91bb6SJason Gunthorpe 		return entry;
1740df91bb6SJason Gunthorpe 	}
1750df91bb6SJason Gunthorpe 	return XA_ERROR(-ENOENT);
1760df91bb6SJason Gunthorpe }
1770df91bb6SJason Gunthorpe #define xan_for_each_marked(xa, index, entry, filter)                          \
1780df91bb6SJason Gunthorpe 	for (index = 0, entry = xan_find_marked(xa, &(index), filter);         \
1790df91bb6SJason Gunthorpe 	     !xa_is_err(entry);                                                \
1800df91bb6SJason Gunthorpe 	     (index)++, entry = xan_find_marked(xa, &(index), filter))
1810df91bb6SJason Gunthorpe 
182324e227eSJason Gunthorpe /* RCU hash table mapping netdevice pointers to struct ib_port_data */
183324e227eSJason Gunthorpe static DEFINE_SPINLOCK(ndev_hash_lock);
184324e227eSJason Gunthorpe static DECLARE_HASHTABLE(ndev_hash, 5);
185324e227eSJason Gunthorpe 
186c2261dd7SJason Gunthorpe static void free_netdevs(struct ib_device *ib_dev);
187d0899892SJason Gunthorpe static void ib_unregister_work(struct work_struct *work);
188d0899892SJason Gunthorpe static void __ib_unregister_device(struct ib_device *device);
1898f408ab6SDaniel Jurgens static int ib_security_change(struct notifier_block *nb, unsigned long event,
1908f408ab6SDaniel Jurgens 			      void *lsm_data);
1918f408ab6SDaniel Jurgens static void ib_policy_change_task(struct work_struct *work);
1928f408ab6SDaniel Jurgens static DECLARE_WORK(ib_policy_change_work, ib_policy_change_task);
1938f408ab6SDaniel Jurgens 
__ibdev_printk(const char * level,const struct ib_device * ibdev,struct va_format * vaf)194923abb9dSGal Pressman static void __ibdev_printk(const char *level, const struct ib_device *ibdev,
195923abb9dSGal Pressman 			   struct va_format *vaf)
196923abb9dSGal Pressman {
197923abb9dSGal Pressman 	if (ibdev && ibdev->dev.parent)
198923abb9dSGal Pressman 		dev_printk_emit(level[1] - '0',
199923abb9dSGal Pressman 				ibdev->dev.parent,
200923abb9dSGal Pressman 				"%s %s %s: %pV",
201923abb9dSGal Pressman 				dev_driver_string(ibdev->dev.parent),
202923abb9dSGal Pressman 				dev_name(ibdev->dev.parent),
203923abb9dSGal Pressman 				dev_name(&ibdev->dev),
204923abb9dSGal Pressman 				vaf);
205923abb9dSGal Pressman 	else if (ibdev)
206923abb9dSGal Pressman 		printk("%s%s: %pV",
207923abb9dSGal Pressman 		       level, dev_name(&ibdev->dev), vaf);
208923abb9dSGal Pressman 	else
209923abb9dSGal Pressman 		printk("%s(NULL ib_device): %pV", level, vaf);
210923abb9dSGal Pressman }
211923abb9dSGal Pressman 
ibdev_printk(const char * level,const struct ib_device * ibdev,const char * format,...)212923abb9dSGal Pressman void ibdev_printk(const char *level, const struct ib_device *ibdev,
213923abb9dSGal Pressman 		  const char *format, ...)
214923abb9dSGal Pressman {
215923abb9dSGal Pressman 	struct va_format vaf;
216923abb9dSGal Pressman 	va_list args;
217923abb9dSGal Pressman 
218923abb9dSGal Pressman 	va_start(args, format);
219923abb9dSGal Pressman 
220923abb9dSGal Pressman 	vaf.fmt = format;
221923abb9dSGal Pressman 	vaf.va = &args;
222923abb9dSGal Pressman 
223923abb9dSGal Pressman 	__ibdev_printk(level, ibdev, &vaf);
224923abb9dSGal Pressman 
225923abb9dSGal Pressman 	va_end(args);
226923abb9dSGal Pressman }
227923abb9dSGal Pressman EXPORT_SYMBOL(ibdev_printk);
228923abb9dSGal Pressman 
229923abb9dSGal Pressman #define define_ibdev_printk_level(func, level)                  \
230923abb9dSGal Pressman void func(const struct ib_device *ibdev, const char *fmt, ...)  \
231923abb9dSGal Pressman {                                                               \
232923abb9dSGal Pressman 	struct va_format vaf;                                   \
233923abb9dSGal Pressman 	va_list args;                                           \
234923abb9dSGal Pressman 								\
235923abb9dSGal Pressman 	va_start(args, fmt);                                    \
236923abb9dSGal Pressman 								\
237923abb9dSGal Pressman 	vaf.fmt = fmt;                                          \
238923abb9dSGal Pressman 	vaf.va = &args;                                         \
239923abb9dSGal Pressman 								\
240923abb9dSGal Pressman 	__ibdev_printk(level, ibdev, &vaf);                     \
241923abb9dSGal Pressman 								\
242923abb9dSGal Pressman 	va_end(args);                                           \
243923abb9dSGal Pressman }                                                               \
244923abb9dSGal Pressman EXPORT_SYMBOL(func);
245923abb9dSGal Pressman 
246923abb9dSGal Pressman define_ibdev_printk_level(ibdev_emerg, KERN_EMERG);
247923abb9dSGal Pressman define_ibdev_printk_level(ibdev_alert, KERN_ALERT);
248923abb9dSGal Pressman define_ibdev_printk_level(ibdev_crit, KERN_CRIT);
249923abb9dSGal Pressman define_ibdev_printk_level(ibdev_err, KERN_ERR);
250923abb9dSGal Pressman define_ibdev_printk_level(ibdev_warn, KERN_WARNING);
251923abb9dSGal Pressman define_ibdev_printk_level(ibdev_notice, KERN_NOTICE);
252923abb9dSGal Pressman define_ibdev_printk_level(ibdev_info, KERN_INFO);
253923abb9dSGal Pressman 
2548f408ab6SDaniel Jurgens static struct notifier_block ibdev_lsm_nb = {
2558f408ab6SDaniel Jurgens 	.notifier_call = ib_security_change,
2568f408ab6SDaniel Jurgens };
2571da177e4SLinus Torvalds 
258decbc7a6SParav Pandit static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
259decbc7a6SParav Pandit 				 struct net *net);
260decbc7a6SParav Pandit 
261324e227eSJason Gunthorpe /* Pointer to the RCU head at the start of the ib_port_data array */
262324e227eSJason Gunthorpe struct ib_port_data_rcu {
263324e227eSJason Gunthorpe 	struct rcu_head rcu_head;
264324e227eSJason Gunthorpe 	struct ib_port_data pdata[];
265324e227eSJason Gunthorpe };
266324e227eSJason Gunthorpe 
ib_device_check_mandatory(struct ib_device * device)267deee3c7eSKamal Heib static void ib_device_check_mandatory(struct ib_device *device)
2681da177e4SLinus Torvalds {
2693023a1e9SKamal Heib #define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x }
2701da177e4SLinus Torvalds 	static const struct {
2711da177e4SLinus Torvalds 		size_t offset;
2721da177e4SLinus Torvalds 		char  *name;
2731da177e4SLinus Torvalds 	} mandatory_table[] = {
2741da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(query_device),
2751da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(query_port),
2761da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(alloc_pd),
2771da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(dealloc_pd),
2781da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(create_qp),
2791da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(modify_qp),
2801da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(destroy_qp),
2811da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(post_send),
2821da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(post_recv),
2831da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(create_cq),
2841da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(destroy_cq),
2851da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(poll_cq),
2861da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(req_notify_cq),
2871da177e4SLinus Torvalds 		IB_MANDATORY_FUNC(get_dma_mr),
28844ce37bcSJason Gunthorpe 		IB_MANDATORY_FUNC(reg_user_mr),
2897738613eSIra Weiny 		IB_MANDATORY_FUNC(dereg_mr),
2907738613eSIra Weiny 		IB_MANDATORY_FUNC(get_port_immutable)
2911da177e4SLinus Torvalds 	};
2921da177e4SLinus Torvalds 	int i;
2931da177e4SLinus Torvalds 
2946780c4faSGal Pressman 	device->kverbs_provider = true;
2959a6b090cSAhmed S. Darwish 	for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
2963023a1e9SKamal Heib 		if (!*(void **) ((void *) &device->ops +
2973023a1e9SKamal Heib 				 mandatory_table[i].offset)) {
2986780c4faSGal Pressman 			device->kverbs_provider = false;
2996780c4faSGal Pressman 			break;
3001da177e4SLinus Torvalds 		}
3011da177e4SLinus Torvalds 	}
3021da177e4SLinus Torvalds }
3031da177e4SLinus Torvalds 
304f8978bd9SLeon Romanovsky /*
30501b67117SParav Pandit  * Caller must perform ib_device_put() to return the device reference count
30601b67117SParav Pandit  * when ib_device_get_by_index() returns valid device pointer.
307f8978bd9SLeon Romanovsky  */
ib_device_get_by_index(const struct net * net,u32 index)30837eeab55SParav Pandit struct ib_device *ib_device_get_by_index(const struct net *net, u32 index)
309f8978bd9SLeon Romanovsky {
310f8978bd9SLeon Romanovsky 	struct ib_device *device;
311f8978bd9SLeon Romanovsky 
312921eab11SJason Gunthorpe 	down_read(&devices_rwsem);
3130df91bb6SJason Gunthorpe 	device = xa_load(&devices, index);
31401b67117SParav Pandit 	if (device) {
31537eeab55SParav Pandit 		if (!rdma_dev_access_netns(device, net)) {
31637eeab55SParav Pandit 			device = NULL;
31737eeab55SParav Pandit 			goto out;
31837eeab55SParav Pandit 		}
31937eeab55SParav Pandit 
320d79af724SJason Gunthorpe 		if (!ib_device_try_get(device))
32101b67117SParav Pandit 			device = NULL;
32201b67117SParav Pandit 	}
32337eeab55SParav Pandit out:
324921eab11SJason Gunthorpe 	up_read(&devices_rwsem);
325f8978bd9SLeon Romanovsky 	return device;
326f8978bd9SLeon Romanovsky }
327f8978bd9SLeon Romanovsky 
328d79af724SJason Gunthorpe /**
329d79af724SJason Gunthorpe  * ib_device_put - Release IB device reference
330d79af724SJason Gunthorpe  * @device: device whose reference to be released
331d79af724SJason Gunthorpe  *
332d79af724SJason Gunthorpe  * ib_device_put() releases reference to the IB device to allow it to be
333d79af724SJason Gunthorpe  * unregistered and eventually free.
334d79af724SJason Gunthorpe  */
ib_device_put(struct ib_device * device)33501b67117SParav Pandit void ib_device_put(struct ib_device *device)
33601b67117SParav Pandit {
33701b67117SParav Pandit 	if (refcount_dec_and_test(&device->refcount))
33801b67117SParav Pandit 		complete(&device->unreg_completion);
33901b67117SParav Pandit }
340d79af724SJason Gunthorpe EXPORT_SYMBOL(ib_device_put);
34101b67117SParav Pandit 
__ib_device_get_by_name(const char * name)3421da177e4SLinus Torvalds static struct ib_device *__ib_device_get_by_name(const char *name)
3431da177e4SLinus Torvalds {
3441da177e4SLinus Torvalds 	struct ib_device *device;
3450df91bb6SJason Gunthorpe 	unsigned long index;
3461da177e4SLinus Torvalds 
3470df91bb6SJason Gunthorpe 	xa_for_each (&devices, index, device)
348896de009SJason Gunthorpe 		if (!strcmp(name, dev_name(&device->dev)))
3491da177e4SLinus Torvalds 			return device;
3501da177e4SLinus Torvalds 
3511da177e4SLinus Torvalds 	return NULL;
3521da177e4SLinus Torvalds }
3531da177e4SLinus Torvalds 
3546cc2c8e5SJason Gunthorpe /**
3556cc2c8e5SJason Gunthorpe  * ib_device_get_by_name - Find an IB device by name
3566cc2c8e5SJason Gunthorpe  * @name: The name to look for
3576cc2c8e5SJason Gunthorpe  * @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all)
3586cc2c8e5SJason Gunthorpe  *
3596cc2c8e5SJason Gunthorpe  * Find and hold an ib_device by its name. The caller must call
3606cc2c8e5SJason Gunthorpe  * ib_device_put() on the returned pointer.
3616cc2c8e5SJason Gunthorpe  */
ib_device_get_by_name(const char * name,enum rdma_driver_id driver_id)3626cc2c8e5SJason Gunthorpe struct ib_device *ib_device_get_by_name(const char *name,
3636cc2c8e5SJason Gunthorpe 					enum rdma_driver_id driver_id)
3646cc2c8e5SJason Gunthorpe {
3656cc2c8e5SJason Gunthorpe 	struct ib_device *device;
3666cc2c8e5SJason Gunthorpe 
3676cc2c8e5SJason Gunthorpe 	down_read(&devices_rwsem);
3686cc2c8e5SJason Gunthorpe 	device = __ib_device_get_by_name(name);
3696cc2c8e5SJason Gunthorpe 	if (device && driver_id != RDMA_DRIVER_UNKNOWN &&
370b9560a41SJason Gunthorpe 	    device->ops.driver_id != driver_id)
3716cc2c8e5SJason Gunthorpe 		device = NULL;
3726cc2c8e5SJason Gunthorpe 
3736cc2c8e5SJason Gunthorpe 	if (device) {
3746cc2c8e5SJason Gunthorpe 		if (!ib_device_try_get(device))
3756cc2c8e5SJason Gunthorpe 			device = NULL;
3766cc2c8e5SJason Gunthorpe 	}
3776cc2c8e5SJason Gunthorpe 	up_read(&devices_rwsem);
3786cc2c8e5SJason Gunthorpe 	return device;
3796cc2c8e5SJason Gunthorpe }
3806cc2c8e5SJason Gunthorpe EXPORT_SYMBOL(ib_device_get_by_name);
3816cc2c8e5SJason Gunthorpe 
rename_compat_devs(struct ib_device * device)3824e0f7b90SParav Pandit static int rename_compat_devs(struct ib_device *device)
3834e0f7b90SParav Pandit {
3844e0f7b90SParav Pandit 	struct ib_core_device *cdev;
3854e0f7b90SParav Pandit 	unsigned long index;
3864e0f7b90SParav Pandit 	int ret = 0;
3874e0f7b90SParav Pandit 
3884e0f7b90SParav Pandit 	mutex_lock(&device->compat_devs_mutex);
3894e0f7b90SParav Pandit 	xa_for_each (&device->compat_devs, index, cdev) {
3904e0f7b90SParav Pandit 		ret = device_rename(&cdev->dev, dev_name(&device->dev));
3914e0f7b90SParav Pandit 		if (ret) {
3924e0f7b90SParav Pandit 			dev_warn(&cdev->dev,
3934e0f7b90SParav Pandit 				 "Fail to rename compatdev to new name %s\n",
3944e0f7b90SParav Pandit 				 dev_name(&device->dev));
3954e0f7b90SParav Pandit 			break;
3964e0f7b90SParav Pandit 		}
3974e0f7b90SParav Pandit 	}
3984e0f7b90SParav Pandit 	mutex_unlock(&device->compat_devs_mutex);
3994e0f7b90SParav Pandit 	return ret;
4004e0f7b90SParav Pandit }
4014e0f7b90SParav Pandit 
ib_device_rename(struct ib_device * ibdev,const char * name)402d21943ddSLeon Romanovsky int ib_device_rename(struct ib_device *ibdev, const char *name)
403d21943ddSLeon Romanovsky {
404dc1435c0SLeon Romanovsky 	unsigned long index;
405dc1435c0SLeon Romanovsky 	void *client_data;
406e3593b56SJason Gunthorpe 	int ret;
407d21943ddSLeon Romanovsky 
408921eab11SJason Gunthorpe 	down_write(&devices_rwsem);
409e3593b56SJason Gunthorpe 	if (!strcmp(name, dev_name(&ibdev->dev))) {
410dc1435c0SLeon Romanovsky 		up_write(&devices_rwsem);
411dc1435c0SLeon Romanovsky 		return 0;
412e3593b56SJason Gunthorpe 	}
413e3593b56SJason Gunthorpe 
414344684e6SJason Gunthorpe 	if (__ib_device_get_by_name(name)) {
415dc1435c0SLeon Romanovsky 		up_write(&devices_rwsem);
416dc1435c0SLeon Romanovsky 		return -EEXIST;
417d21943ddSLeon Romanovsky 	}
418d21943ddSLeon Romanovsky 
419d21943ddSLeon Romanovsky 	ret = device_rename(&ibdev->dev, name);
420dc1435c0SLeon Romanovsky 	if (ret) {
421921eab11SJason Gunthorpe 		up_write(&devices_rwsem);
422d21943ddSLeon Romanovsky 		return ret;
423d21943ddSLeon Romanovsky 	}
424d21943ddSLeon Romanovsky 
4252c34bb6dSWolfram Sang 	strscpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
426dc1435c0SLeon Romanovsky 	ret = rename_compat_devs(ibdev);
427dc1435c0SLeon Romanovsky 
428dc1435c0SLeon Romanovsky 	downgrade_write(&devices_rwsem);
429dc1435c0SLeon Romanovsky 	down_read(&ibdev->client_data_rwsem);
430dc1435c0SLeon Romanovsky 	xan_for_each_marked(&ibdev->client_data, index, client_data,
431dc1435c0SLeon Romanovsky 			    CLIENT_DATA_REGISTERED) {
432dc1435c0SLeon Romanovsky 		struct ib_client *client = xa_load(&clients, index);
433dc1435c0SLeon Romanovsky 
434dc1435c0SLeon Romanovsky 		if (!client || !client->rename)
435dc1435c0SLeon Romanovsky 			continue;
436dc1435c0SLeon Romanovsky 
437dc1435c0SLeon Romanovsky 		client->rename(ibdev, client_data);
438dc1435c0SLeon Romanovsky 	}
439dc1435c0SLeon Romanovsky 	up_read(&ibdev->client_data_rwsem);
440dc1435c0SLeon Romanovsky 	up_read(&devices_rwsem);
441dc1435c0SLeon Romanovsky 	return 0;
442dc1435c0SLeon Romanovsky }
443dc1435c0SLeon Romanovsky 
ib_device_set_dim(struct ib_device * ibdev,u8 use_dim)444f8fc8cd9SYamin Friedman int ib_device_set_dim(struct ib_device *ibdev, u8 use_dim)
445f8fc8cd9SYamin Friedman {
446f8fc8cd9SYamin Friedman 	if (use_dim > 1)
447f8fc8cd9SYamin Friedman 		return -EINVAL;
448f8fc8cd9SYamin Friedman 	ibdev->use_cq_dim = use_dim;
449f8fc8cd9SYamin Friedman 
450f8fc8cd9SYamin Friedman 	return 0;
451f8fc8cd9SYamin Friedman }
452f8fc8cd9SYamin Friedman 
alloc_name(struct ib_device * ibdev,const char * name)453e349f858SJason Gunthorpe static int alloc_name(struct ib_device *ibdev, const char *name)
4541da177e4SLinus Torvalds {
4551da177e4SLinus Torvalds 	struct ib_device *device;
4560df91bb6SJason Gunthorpe 	unsigned long index;
4573b88afd3SJason Gunthorpe 	struct ida inuse;
4583b88afd3SJason Gunthorpe 	int rc;
4591da177e4SLinus Torvalds 	int i;
4601da177e4SLinus Torvalds 
4619ffbe8acSNikolay Borisov 	lockdep_assert_held_write(&devices_rwsem);
4623b88afd3SJason Gunthorpe 	ida_init(&inuse);
4630df91bb6SJason Gunthorpe 	xa_for_each (&devices, index, device) {
464e349f858SJason Gunthorpe 		char buf[IB_DEVICE_NAME_MAX];
465e349f858SJason Gunthorpe 
466896de009SJason Gunthorpe 		if (sscanf(dev_name(&device->dev), name, &i) != 1)
4671da177e4SLinus Torvalds 			continue;
4683b88afd3SJason Gunthorpe 		if (i < 0 || i >= INT_MAX)
4691da177e4SLinus Torvalds 			continue;
4701da177e4SLinus Torvalds 		snprintf(buf, sizeof buf, name, i);
4713b88afd3SJason Gunthorpe 		if (strcmp(buf, dev_name(&device->dev)) != 0)
4723b88afd3SJason Gunthorpe 			continue;
4733b88afd3SJason Gunthorpe 
4743b88afd3SJason Gunthorpe 		rc = ida_alloc_range(&inuse, i, i, GFP_KERNEL);
4753b88afd3SJason Gunthorpe 		if (rc < 0)
4763b88afd3SJason Gunthorpe 			goto out;
4771da177e4SLinus Torvalds 	}
4781da177e4SLinus Torvalds 
4793b88afd3SJason Gunthorpe 	rc = ida_alloc(&inuse, GFP_KERNEL);
4803b88afd3SJason Gunthorpe 	if (rc < 0)
4813b88afd3SJason Gunthorpe 		goto out;
4821da177e4SLinus Torvalds 
4833b88afd3SJason Gunthorpe 	rc = dev_set_name(&ibdev->dev, name, rc);
4843b88afd3SJason Gunthorpe out:
4853b88afd3SJason Gunthorpe 	ida_destroy(&inuse);
4863b88afd3SJason Gunthorpe 	return rc;
4871da177e4SLinus Torvalds }
4881da177e4SLinus Torvalds 
ib_device_release(struct device * device)48955aeed06SJason Gunthorpe static void ib_device_release(struct device *device)
49055aeed06SJason Gunthorpe {
49155aeed06SJason Gunthorpe 	struct ib_device *dev = container_of(device, struct ib_device, dev);
49255aeed06SJason Gunthorpe 
493c2261dd7SJason Gunthorpe 	free_netdevs(dev);
494652432f3SJason Gunthorpe 	WARN_ON(refcount_read(&dev->refcount));
495b7066b32SJason Gunthorpe 	if (dev->hw_stats_data)
496b7066b32SJason Gunthorpe 		ib_device_release_hw_stats(dev->hw_stats_data);
49746bdf370SKamal Heib 	if (dev->port_data) {
49803db3a2dSMatan Barak 		ib_cache_release_one(dev);
499b34b269aSJason Gunthorpe 		ib_security_release_port_pkey_list(dev);
500413d3347SMark Zhang 		rdma_counter_release(dev);
501324e227eSJason Gunthorpe 		kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu,
502324e227eSJason Gunthorpe 				       pdata[0]),
503324e227eSJason Gunthorpe 			  rcu_head);
50446bdf370SKamal Heib 	}
505413d3347SMark Zhang 
506bca51197SMark Zhang 	mutex_destroy(&dev->subdev_lock);
50756594ae1SParav Pandit 	mutex_destroy(&dev->unregistration_lock);
50856594ae1SParav Pandit 	mutex_destroy(&dev->compat_devs_mutex);
50956594ae1SParav Pandit 
51046bdf370SKamal Heib 	xa_destroy(&dev->compat_devs);
51146bdf370SKamal Heib 	xa_destroy(&dev->client_data);
512324e227eSJason Gunthorpe 	kfree_rcu(dev, rcu_head);
51355aeed06SJason Gunthorpe }
51455aeed06SJason Gunthorpe 
ib_device_uevent(const struct device * device,struct kobj_uevent_env * env)51523680f0bSGreg Kroah-Hartman static int ib_device_uevent(const struct device *device,
51655aeed06SJason Gunthorpe 			    struct kobj_uevent_env *env)
51755aeed06SJason Gunthorpe {
518896de009SJason Gunthorpe 	if (add_uevent_var(env, "NAME=%s", dev_name(device)))
51955aeed06SJason Gunthorpe 		return -ENOMEM;
52055aeed06SJason Gunthorpe 
52155aeed06SJason Gunthorpe 	/*
52255aeed06SJason Gunthorpe 	 * It would be nice to pass the node GUID with the event...
52355aeed06SJason Gunthorpe 	 */
52455aeed06SJason Gunthorpe 
52555aeed06SJason Gunthorpe 	return 0;
52655aeed06SJason Gunthorpe }
52755aeed06SJason Gunthorpe 
net_namespace(const struct device * d)528fa627348SGreg Kroah-Hartman static const void *net_namespace(const struct device *d)
52962dfa795SParav Pandit {
530fa627348SGreg Kroah-Hartman 	const struct ib_core_device *coredev =
5314e0f7b90SParav Pandit 			container_of(d, struct ib_core_device, dev);
5324e0f7b90SParav Pandit 
5334e0f7b90SParav Pandit 	return read_pnet(&coredev->rdma_net);
53462dfa795SParav Pandit }
53562dfa795SParav Pandit 
53655aeed06SJason Gunthorpe static struct class ib_class = {
53755aeed06SJason Gunthorpe 	.name    = "infiniband",
53855aeed06SJason Gunthorpe 	.dev_release = ib_device_release,
53955aeed06SJason Gunthorpe 	.dev_uevent = ib_device_uevent,
54062dfa795SParav Pandit 	.ns_type = &net_ns_type_operations,
54162dfa795SParav Pandit 	.namespace = net_namespace,
54255aeed06SJason Gunthorpe };
54355aeed06SJason Gunthorpe 
rdma_init_coredev(struct ib_core_device * coredev,struct ib_device * dev,struct net * net)544cebe556bSParav Pandit static void rdma_init_coredev(struct ib_core_device *coredev,
5454e0f7b90SParav Pandit 			      struct ib_device *dev, struct net *net)
546cebe556bSParav Pandit {
547cebe556bSParav Pandit 	/* This BUILD_BUG_ON is intended to catch layout change
548cebe556bSParav Pandit 	 * of union of ib_core_device and device.
549cebe556bSParav Pandit 	 * dev must be the first element as ib_core and providers
550cebe556bSParav Pandit 	 * driver uses it. Adding anything in ib_core_device before
551cebe556bSParav Pandit 	 * device will break this assumption.
552cebe556bSParav Pandit 	 */
553cebe556bSParav Pandit 	BUILD_BUG_ON(offsetof(struct ib_device, coredev.dev) !=
554cebe556bSParav Pandit 		     offsetof(struct ib_device, dev));
555cebe556bSParav Pandit 
556cebe556bSParav Pandit 	coredev->dev.class = &ib_class;
557cebe556bSParav Pandit 	coredev->dev.groups = dev->groups;
558cebe556bSParav Pandit 	device_initialize(&coredev->dev);
559cebe556bSParav Pandit 	coredev->owner = dev;
560cebe556bSParav Pandit 	INIT_LIST_HEAD(&coredev->port_list);
5614e0f7b90SParav Pandit 	write_pnet(&coredev->rdma_net, net);
562cebe556bSParav Pandit }
563cebe556bSParav Pandit 
5641da177e4SLinus Torvalds /**
565459cc69fSLeon Romanovsky  * _ib_alloc_device - allocate an IB device struct
5661da177e4SLinus Torvalds  * @size:size of structure to allocate
5671da177e4SLinus Torvalds  *
5681da177e4SLinus Torvalds  * Low-level drivers should use ib_alloc_device() to allocate &struct
5691da177e4SLinus Torvalds  * ib_device.  @size is the size of the structure to be allocated,
5701da177e4SLinus Torvalds  * including any private data used by the low-level driver.
5711da177e4SLinus Torvalds  * ib_dealloc_device() must be used to free structures allocated with
5721da177e4SLinus Torvalds  * ib_alloc_device().
5731da177e4SLinus Torvalds  */
_ib_alloc_device(size_t size)574459cc69fSLeon Romanovsky struct ib_device *_ib_alloc_device(size_t size)
5751da177e4SLinus Torvalds {
57655aeed06SJason Gunthorpe 	struct ib_device *device;
577286e1d3fSJack Morgenstein 	unsigned int i;
5781da177e4SLinus Torvalds 
57955aeed06SJason Gunthorpe 	if (WARN_ON(size < sizeof(struct ib_device)))
58055aeed06SJason Gunthorpe 		return NULL;
58155aeed06SJason Gunthorpe 
58255aeed06SJason Gunthorpe 	device = kzalloc(size, GFP_KERNEL);
58355aeed06SJason Gunthorpe 	if (!device)
58455aeed06SJason Gunthorpe 		return NULL;
58555aeed06SJason Gunthorpe 
58641eda65cSLeon Romanovsky 	if (rdma_restrack_init(device)) {
58741eda65cSLeon Romanovsky 		kfree(device);
58841eda65cSLeon Romanovsky 		return NULL;
58941eda65cSLeon Romanovsky 	}
59002d8883fSLeon Romanovsky 
5914e0f7b90SParav Pandit 	rdma_init_coredev(&device->coredev, device, &init_net);
59255aeed06SJason Gunthorpe 
59355aeed06SJason Gunthorpe 	INIT_LIST_HEAD(&device->event_handler_list);
59440adf686SParav Pandit 	spin_lock_init(&device->qp_open_list_lock);
5956b57cea9SParav Pandit 	init_rwsem(&device->event_handler_rwsem);
596d0899892SJason Gunthorpe 	mutex_init(&device->unregistration_lock);
5970df91bb6SJason Gunthorpe 	/*
5980df91bb6SJason Gunthorpe 	 * client_data needs to be alloc because we don't want our mark to be
5990df91bb6SJason Gunthorpe 	 * destroyed if the user stores NULL in the client data.
6000df91bb6SJason Gunthorpe 	 */
6010df91bb6SJason Gunthorpe 	xa_init_flags(&device->client_data, XA_FLAGS_ALLOC);
602921eab11SJason Gunthorpe 	init_rwsem(&device->client_data_rwsem);
6034e0f7b90SParav Pandit 	xa_init_flags(&device->compat_devs, XA_FLAGS_ALLOC);
6044e0f7b90SParav Pandit 	mutex_init(&device->compat_devs_mutex);
60501b67117SParav Pandit 	init_completion(&device->unreg_completion);
606d0899892SJason Gunthorpe 	INIT_WORK(&device->unregistration_work, ib_unregister_work);
60755aeed06SJason Gunthorpe 
608286e1d3fSJack Morgenstein 	spin_lock_init(&device->cq_pools_lock);
609286e1d3fSJack Morgenstein 	for (i = 0; i < ARRAY_SIZE(device->cq_pools); i++)
610286e1d3fSJack Morgenstein 		INIT_LIST_HEAD(&device->cq_pools[i]);
611286e1d3fSJack Morgenstein 
61236721a6dSAnand Khoje 	rwlock_init(&device->cache_lock);
61336721a6dSAnand Khoje 
614c074bb1eSJason Gunthorpe 	device->uverbs_cmd_mask =
61544ce37bcSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_ALLOC_MW) |
616c074bb1eSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) |
61744ce37bcSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) |
61844ce37bcSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_CLOSE_XRCD) |
619676a80adSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) |
620c074bb1eSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
621c074bb1eSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) |
622c074bb1eSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) |
62344ce37bcSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) |
624652caba5SJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_CREATE_XSRQ) |
62544ce37bcSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_MW) |
626c074bb1eSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) |
627c074bb1eSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) |
628676a80adSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) |
629c074bb1eSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) |
630c074bb1eSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) |
63144ce37bcSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) |
63244ce37bcSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) |
633c074bb1eSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) |
634c074bb1eSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) |
63544ce37bcSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) |
63644ce37bcSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_OPEN_QP) |
63744ce37bcSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_OPEN_XRCD) |
638c074bb1eSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) |
639c074bb1eSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) |
640c074bb1eSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) |
64144ce37bcSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) |
64244ce37bcSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_REG_MR) |
64344ce37bcSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) |
64444ce37bcSJason Gunthorpe 		BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ);
645bca51197SMark Zhang 
646bca51197SMark Zhang 	mutex_init(&device->subdev_lock);
647bca51197SMark Zhang 	INIT_LIST_HEAD(&device->subdev_list_head);
648bca51197SMark Zhang 	INIT_LIST_HEAD(&device->subdev_list);
649bca51197SMark Zhang 
65055aeed06SJason Gunthorpe 	return device;
6511da177e4SLinus Torvalds }
652459cc69fSLeon Romanovsky EXPORT_SYMBOL(_ib_alloc_device);
6531da177e4SLinus Torvalds 
6541da177e4SLinus Torvalds /**
6551da177e4SLinus Torvalds  * ib_dealloc_device - free an IB device struct
6561da177e4SLinus Torvalds  * @device:structure to free
6571da177e4SLinus Torvalds  *
6581da177e4SLinus Torvalds  * Free a structure allocated with ib_alloc_device().
6591da177e4SLinus Torvalds  */
ib_dealloc_device(struct ib_device * device)6601da177e4SLinus Torvalds void ib_dealloc_device(struct ib_device *device)
6611da177e4SLinus Torvalds {
662d0899892SJason Gunthorpe 	if (device->ops.dealloc_driver)
663d0899892SJason Gunthorpe 		device->ops.dealloc_driver(device);
664d0899892SJason Gunthorpe 
665d0899892SJason Gunthorpe 	/*
666d0899892SJason Gunthorpe 	 * ib_unregister_driver() requires all devices to remain in the xarray
667d0899892SJason Gunthorpe 	 * while their ops are callable. The last op we call is dealloc_driver
668d0899892SJason Gunthorpe 	 * above.  This is needed to create a fence on op callbacks prior to
669d0899892SJason Gunthorpe 	 * allowing the driver module to unload.
670d0899892SJason Gunthorpe 	 */
671d0899892SJason Gunthorpe 	down_write(&devices_rwsem);
672d0899892SJason Gunthorpe 	if (xa_load(&devices, device->index) == device)
673d0899892SJason Gunthorpe 		xa_erase(&devices, device->index);
674d0899892SJason Gunthorpe 	up_write(&devices_rwsem);
675d0899892SJason Gunthorpe 
676c2261dd7SJason Gunthorpe 	/* Expedite releasing netdev references */
677c2261dd7SJason Gunthorpe 	free_netdevs(device);
678c2261dd7SJason Gunthorpe 
6794e0f7b90SParav Pandit 	WARN_ON(!xa_empty(&device->compat_devs));
6800df91bb6SJason Gunthorpe 	WARN_ON(!xa_empty(&device->client_data));
681652432f3SJason Gunthorpe 	WARN_ON(refcount_read(&device->refcount));
6820ad699c0SLeon Romanovsky 	rdma_restrack_clean(device);
683e155755eSParav Pandit 	/* Balances with device_initialize */
684924b8900SLeon Romanovsky 	put_device(&device->dev);
6851da177e4SLinus Torvalds }
6861da177e4SLinus Torvalds EXPORT_SYMBOL(ib_dealloc_device);
6871da177e4SLinus Torvalds 
688921eab11SJason Gunthorpe /*
689921eab11SJason Gunthorpe  * add_client_context() and remove_client_context() must be safe against
690921eab11SJason Gunthorpe  * parallel calls on the same device - registration/unregistration of both the
691921eab11SJason Gunthorpe  * device and client can be occurring in parallel.
692921eab11SJason Gunthorpe  *
693921eab11SJason Gunthorpe  * The routines need to be a fence, any caller must not return until the add
694921eab11SJason Gunthorpe  * or remove is fully completed.
695921eab11SJason Gunthorpe  */
add_client_context(struct ib_device * device,struct ib_client * client)696921eab11SJason Gunthorpe static int add_client_context(struct ib_device *device,
697921eab11SJason Gunthorpe 			      struct ib_client *client)
6981da177e4SLinus Torvalds {
699921eab11SJason Gunthorpe 	int ret = 0;
7001da177e4SLinus Torvalds 
7016780c4faSGal Pressman 	if (!device->kverbs_provider && !client->no_kverbs_req)
702921eab11SJason Gunthorpe 		return 0;
7036780c4faSGal Pressman 
704921eab11SJason Gunthorpe 	down_write(&device->client_data_rwsem);
705921eab11SJason Gunthorpe 	/*
706621e55ffSJason Gunthorpe 	 * So long as the client is registered hold both the client and device
707621e55ffSJason Gunthorpe 	 * unregistration locks.
708621e55ffSJason Gunthorpe 	 */
709621e55ffSJason Gunthorpe 	if (!refcount_inc_not_zero(&client->uses))
710621e55ffSJason Gunthorpe 		goto out_unlock;
711621e55ffSJason Gunthorpe 	refcount_inc(&device->refcount);
712621e55ffSJason Gunthorpe 
713621e55ffSJason Gunthorpe 	/*
714921eab11SJason Gunthorpe 	 * Another caller to add_client_context got here first and has already
715921eab11SJason Gunthorpe 	 * completely initialized context.
716921eab11SJason Gunthorpe 	 */
717921eab11SJason Gunthorpe 	if (xa_get_mark(&device->client_data, client->client_id,
718921eab11SJason Gunthorpe 		    CLIENT_DATA_REGISTERED))
719921eab11SJason Gunthorpe 		goto out;
720921eab11SJason Gunthorpe 
721921eab11SJason Gunthorpe 	ret = xa_err(xa_store(&device->client_data, client->client_id, NULL,
722921eab11SJason Gunthorpe 			      GFP_KERNEL));
723921eab11SJason Gunthorpe 	if (ret)
724921eab11SJason Gunthorpe 		goto out;
725921eab11SJason Gunthorpe 	downgrade_write(&device->client_data_rwsem);
72611a0ae4cSJason Gunthorpe 	if (client->add) {
72711a0ae4cSJason Gunthorpe 		if (client->add(device)) {
72811a0ae4cSJason Gunthorpe 			/*
72911a0ae4cSJason Gunthorpe 			 * If a client fails to add then the error code is
73011a0ae4cSJason Gunthorpe 			 * ignored, but we won't call any more ops on this
73111a0ae4cSJason Gunthorpe 			 * client.
73211a0ae4cSJason Gunthorpe 			 */
73311a0ae4cSJason Gunthorpe 			xa_erase(&device->client_data, client->client_id);
73411a0ae4cSJason Gunthorpe 			up_read(&device->client_data_rwsem);
73511a0ae4cSJason Gunthorpe 			ib_device_put(device);
73611a0ae4cSJason Gunthorpe 			ib_client_put(client);
73711a0ae4cSJason Gunthorpe 			return 0;
73811a0ae4cSJason Gunthorpe 		}
73911a0ae4cSJason Gunthorpe 	}
740921eab11SJason Gunthorpe 
741921eab11SJason Gunthorpe 	/* Readers shall not see a client until add has been completed */
7420df91bb6SJason Gunthorpe 	xa_set_mark(&device->client_data, client->client_id,
7430df91bb6SJason Gunthorpe 		    CLIENT_DATA_REGISTERED);
744921eab11SJason Gunthorpe 	up_read(&device->client_data_rwsem);
745921eab11SJason Gunthorpe 	return 0;
7461da177e4SLinus Torvalds 
747921eab11SJason Gunthorpe out:
748621e55ffSJason Gunthorpe 	ib_device_put(device);
749621e55ffSJason Gunthorpe 	ib_client_put(client);
750621e55ffSJason Gunthorpe out_unlock:
751921eab11SJason Gunthorpe 	up_write(&device->client_data_rwsem);
752921eab11SJason Gunthorpe 	return ret;
753921eab11SJason Gunthorpe }
754921eab11SJason Gunthorpe 
remove_client_context(struct ib_device * device,unsigned int client_id)755921eab11SJason Gunthorpe static void remove_client_context(struct ib_device *device,
756921eab11SJason Gunthorpe 				  unsigned int client_id)
757921eab11SJason Gunthorpe {
758921eab11SJason Gunthorpe 	struct ib_client *client;
759921eab11SJason Gunthorpe 	void *client_data;
760921eab11SJason Gunthorpe 
761921eab11SJason Gunthorpe 	down_write(&device->client_data_rwsem);
762921eab11SJason Gunthorpe 	if (!xa_get_mark(&device->client_data, client_id,
763921eab11SJason Gunthorpe 			 CLIENT_DATA_REGISTERED)) {
764921eab11SJason Gunthorpe 		up_write(&device->client_data_rwsem);
765921eab11SJason Gunthorpe 		return;
766921eab11SJason Gunthorpe 	}
767921eab11SJason Gunthorpe 	client_data = xa_load(&device->client_data, client_id);
768921eab11SJason Gunthorpe 	xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED);
769921eab11SJason Gunthorpe 	client = xa_load(&clients, client_id);
770621e55ffSJason Gunthorpe 	up_write(&device->client_data_rwsem);
771921eab11SJason Gunthorpe 
772921eab11SJason Gunthorpe 	/*
773921eab11SJason Gunthorpe 	 * Notice we cannot be holding any exclusive locks when calling the
774921eab11SJason Gunthorpe 	 * remove callback as the remove callback can recurse back into any
775921eab11SJason Gunthorpe 	 * public functions in this module and thus try for any locks those
776921eab11SJason Gunthorpe 	 * functions take.
777921eab11SJason Gunthorpe 	 *
778921eab11SJason Gunthorpe 	 * For this reason clients and drivers should not call the
779921eab11SJason Gunthorpe 	 * unregistration functions will holdling any locks.
780921eab11SJason Gunthorpe 	 */
781921eab11SJason Gunthorpe 	if (client->remove)
782921eab11SJason Gunthorpe 		client->remove(device, client_data);
783921eab11SJason Gunthorpe 
784921eab11SJason Gunthorpe 	xa_erase(&device->client_data, client_id);
785621e55ffSJason Gunthorpe 	ib_device_put(device);
786621e55ffSJason Gunthorpe 	ib_client_put(client);
7871da177e4SLinus Torvalds }
7881da177e4SLinus Torvalds 
alloc_port_data(struct ib_device * device)789c2261dd7SJason Gunthorpe static int alloc_port_data(struct ib_device *device)
7905eb620c8SYosef Etigin {
791324e227eSJason Gunthorpe 	struct ib_port_data_rcu *pdata_rcu;
7921fb7f897SMark Bloch 	u32 port;
793c2261dd7SJason Gunthorpe 
794c2261dd7SJason Gunthorpe 	if (device->port_data)
795c2261dd7SJason Gunthorpe 		return 0;
796c2261dd7SJason Gunthorpe 
797c2261dd7SJason Gunthorpe 	/* This can only be called once the physical port range is defined */
798c2261dd7SJason Gunthorpe 	if (WARN_ON(!device->phys_port_cnt))
799c2261dd7SJason Gunthorpe 		return -EINVAL;
8005eb620c8SYosef Etigin 
8011fb7f897SMark Bloch 	/* Reserve U32_MAX so the logic to go over all the ports is sane */
8021fb7f897SMark Bloch 	if (WARN_ON(device->phys_port_cnt == U32_MAX))
8031fb7f897SMark Bloch 		return -EINVAL;
8041fb7f897SMark Bloch 
8058ceb1357SJason Gunthorpe 	/*
8068ceb1357SJason Gunthorpe 	 * device->port_data is indexed directly by the port number to make
8077738613eSIra Weiny 	 * access to this data as efficient as possible.
8087738613eSIra Weiny 	 *
8098ceb1357SJason Gunthorpe 	 * Therefore port_data is declared as a 1 based array with potential
8108ceb1357SJason Gunthorpe 	 * empty slots at the beginning.
8117738613eSIra Weiny 	 */
812324e227eSJason Gunthorpe 	pdata_rcu = kzalloc(struct_size(pdata_rcu, pdata,
81381760bedSGustavo A. R. Silva 					size_add(rdma_end_port(device), 1)),
814324e227eSJason Gunthorpe 			    GFP_KERNEL);
815324e227eSJason Gunthorpe 	if (!pdata_rcu)
81655aeed06SJason Gunthorpe 		return -ENOMEM;
817324e227eSJason Gunthorpe 	/*
818324e227eSJason Gunthorpe 	 * The rcu_head is put in front of the port data array and the stored
819324e227eSJason Gunthorpe 	 * pointer is adjusted since we never need to see that member until
820324e227eSJason Gunthorpe 	 * kfree_rcu.
821324e227eSJason Gunthorpe 	 */
822324e227eSJason Gunthorpe 	device->port_data = pdata_rcu->pdata;
8235eb620c8SYosef Etigin 
824ea1075edSJason Gunthorpe 	rdma_for_each_port (device, port) {
8258ceb1357SJason Gunthorpe 		struct ib_port_data *pdata = &device->port_data[port];
8268ceb1357SJason Gunthorpe 
827324e227eSJason Gunthorpe 		pdata->ib_dev = device;
8288ceb1357SJason Gunthorpe 		spin_lock_init(&pdata->pkey_list_lock);
8298ceb1357SJason Gunthorpe 		INIT_LIST_HEAD(&pdata->pkey_list);
830c2261dd7SJason Gunthorpe 		spin_lock_init(&pdata->netdev_lock);
831324e227eSJason Gunthorpe 		INIT_HLIST_NODE(&pdata->ndev_hash_link);
832c2261dd7SJason Gunthorpe 	}
833c2261dd7SJason Gunthorpe 	return 0;
834c2261dd7SJason Gunthorpe }
835c2261dd7SJason Gunthorpe 
verify_immutable(const struct ib_device * dev,u32 port)8361fb7f897SMark Bloch static int verify_immutable(const struct ib_device *dev, u32 port)
837c2261dd7SJason Gunthorpe {
838c2261dd7SJason Gunthorpe 	return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
839c2261dd7SJason Gunthorpe 			    rdma_max_mad_size(dev, port) != 0);
840c2261dd7SJason Gunthorpe }
841c2261dd7SJason Gunthorpe 
setup_port_data(struct ib_device * device)842c2261dd7SJason Gunthorpe static int setup_port_data(struct ib_device *device)
843c2261dd7SJason Gunthorpe {
8441fb7f897SMark Bloch 	u32 port;
845c2261dd7SJason Gunthorpe 	int ret;
846c2261dd7SJason Gunthorpe 
847c2261dd7SJason Gunthorpe 	ret = alloc_port_data(device);
848c2261dd7SJason Gunthorpe 	if (ret)
849c2261dd7SJason Gunthorpe 		return ret;
850c2261dd7SJason Gunthorpe 
851c2261dd7SJason Gunthorpe 	rdma_for_each_port (device, port) {
852c2261dd7SJason Gunthorpe 		struct ib_port_data *pdata = &device->port_data[port];
8538ceb1357SJason Gunthorpe 
8548ceb1357SJason Gunthorpe 		ret = device->ops.get_port_immutable(device, port,
8558ceb1357SJason Gunthorpe 						     &pdata->immutable);
8565eb620c8SYosef Etigin 		if (ret)
8575eb620c8SYosef Etigin 			return ret;
85855aeed06SJason Gunthorpe 
85955aeed06SJason Gunthorpe 		if (verify_immutable(device, port))
86055aeed06SJason Gunthorpe 			return -EINVAL;
86155aeed06SJason Gunthorpe 	}
86255aeed06SJason Gunthorpe 	return 0;
8635eb620c8SYosef Etigin }
8645eb620c8SYosef Etigin 
8657416790eSParav Pandit /**
8667416790eSParav Pandit  * ib_port_immutable_read() - Read rdma port's immutable data
867168e4cd9SLeon Romanovsky  * @dev: IB device
868168e4cd9SLeon Romanovsky  * @port: port number whose immutable data to read. It starts with index 1 and
8697416790eSParav Pandit  *        valid upto including rdma_end_port().
8707416790eSParav Pandit  */
8717416790eSParav Pandit const struct ib_port_immutable*
ib_port_immutable_read(struct ib_device * dev,unsigned int port)8727416790eSParav Pandit ib_port_immutable_read(struct ib_device *dev, unsigned int port)
8737416790eSParav Pandit {
8747416790eSParav Pandit 	WARN_ON(!rdma_is_port_valid(dev, port));
8757416790eSParav Pandit 	return &dev->port_data[port].immutable;
8767416790eSParav Pandit }
8777416790eSParav Pandit EXPORT_SYMBOL(ib_port_immutable_read);
8787416790eSParav Pandit 
ib_get_device_fw_str(struct ib_device * dev,char * str)8799abb0d1bSLeon Romanovsky void ib_get_device_fw_str(struct ib_device *dev, char *str)
8805fa76c20SIra Weiny {
8813023a1e9SKamal Heib 	if (dev->ops.get_dev_fw_str)
8823023a1e9SKamal Heib 		dev->ops.get_dev_fw_str(dev, str);
8835fa76c20SIra Weiny 	else
8845fa76c20SIra Weiny 		str[0] = '\0';
8855fa76c20SIra Weiny }
8865fa76c20SIra Weiny EXPORT_SYMBOL(ib_get_device_fw_str);
8875fa76c20SIra Weiny 
ib_policy_change_task(struct work_struct * work)8888f408ab6SDaniel Jurgens static void ib_policy_change_task(struct work_struct *work)
8898f408ab6SDaniel Jurgens {
8908f408ab6SDaniel Jurgens 	struct ib_device *dev;
8910df91bb6SJason Gunthorpe 	unsigned long index;
8928f408ab6SDaniel Jurgens 
893921eab11SJason Gunthorpe 	down_read(&devices_rwsem);
8940df91bb6SJason Gunthorpe 	xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
895ea1075edSJason Gunthorpe 		unsigned int i;
8968f408ab6SDaniel Jurgens 
897ea1075edSJason Gunthorpe 		rdma_for_each_port (dev, i) {
8988f408ab6SDaniel Jurgens 			u64 sp;
899c5f8f2c5SAnand Khoje 			ib_get_cached_subnet_prefix(dev, i, &sp);
9008f408ab6SDaniel Jurgens 			ib_security_cache_change(dev, i, sp);
9018f408ab6SDaniel Jurgens 		}
9028f408ab6SDaniel Jurgens 	}
903921eab11SJason Gunthorpe 	up_read(&devices_rwsem);
9048f408ab6SDaniel Jurgens }
9058f408ab6SDaniel Jurgens 
ib_security_change(struct notifier_block * nb,unsigned long event,void * lsm_data)9068f408ab6SDaniel Jurgens static int ib_security_change(struct notifier_block *nb, unsigned long event,
9078f408ab6SDaniel Jurgens 			      void *lsm_data)
9088f408ab6SDaniel Jurgens {
9098f408ab6SDaniel Jurgens 	if (event != LSM_POLICY_CHANGE)
9108f408ab6SDaniel Jurgens 		return NOTIFY_DONE;
9118f408ab6SDaniel Jurgens 
9128f408ab6SDaniel Jurgens 	schedule_work(&ib_policy_change_work);
913c66f6741SDaniel Jurgens 	ib_mad_agent_security_change();
9148f408ab6SDaniel Jurgens 
9158f408ab6SDaniel Jurgens 	return NOTIFY_OK;
9168f408ab6SDaniel Jurgens }
9178f408ab6SDaniel Jurgens 
compatdev_release(struct device * dev)9184e0f7b90SParav Pandit static void compatdev_release(struct device *dev)
9194e0f7b90SParav Pandit {
9204e0f7b90SParav Pandit 	struct ib_core_device *cdev =
9214e0f7b90SParav Pandit 		container_of(dev, struct ib_core_device, dev);
9224e0f7b90SParav Pandit 
9234e0f7b90SParav Pandit 	kfree(cdev);
9244e0f7b90SParav Pandit }
9254e0f7b90SParav Pandit 
add_one_compat_dev(struct ib_device * device,struct rdma_dev_net * rnet)9264e0f7b90SParav Pandit static int add_one_compat_dev(struct ib_device *device,
9274e0f7b90SParav Pandit 			      struct rdma_dev_net *rnet)
9284e0f7b90SParav Pandit {
9294e0f7b90SParav Pandit 	struct ib_core_device *cdev;
9304e0f7b90SParav Pandit 	int ret;
9314e0f7b90SParav Pandit 
9322b34c558SParav Pandit 	lockdep_assert_held(&rdma_nets_rwsem);
933a56bc45bSParav Pandit 	if (!ib_devices_shared_netns)
934a56bc45bSParav Pandit 		return 0;
935a56bc45bSParav Pandit 
9364e0f7b90SParav Pandit 	/*
9374e0f7b90SParav Pandit 	 * Create and add compat device in all namespaces other than where it
9384e0f7b90SParav Pandit 	 * is currently bound to.
9394e0f7b90SParav Pandit 	 */
9404e0f7b90SParav Pandit 	if (net_eq(read_pnet(&rnet->net),
9414e0f7b90SParav Pandit 		   read_pnet(&device->coredev.rdma_net)))
9424e0f7b90SParav Pandit 		return 0;
9434e0f7b90SParav Pandit 
9444e0f7b90SParav Pandit 	/*
9454e0f7b90SParav Pandit 	 * The first of init_net() or ib_register_device() to take the
9464e0f7b90SParav Pandit 	 * compat_devs_mutex wins and gets to add the device. Others will wait
9474e0f7b90SParav Pandit 	 * for completion here.
9484e0f7b90SParav Pandit 	 */
9494e0f7b90SParav Pandit 	mutex_lock(&device->compat_devs_mutex);
9504e0f7b90SParav Pandit 	cdev = xa_load(&device->compat_devs, rnet->id);
9514e0f7b90SParav Pandit 	if (cdev) {
9524e0f7b90SParav Pandit 		ret = 0;
9534e0f7b90SParav Pandit 		goto done;
9544e0f7b90SParav Pandit 	}
9554e0f7b90SParav Pandit 	ret = xa_reserve(&device->compat_devs, rnet->id, GFP_KERNEL);
9564e0f7b90SParav Pandit 	if (ret)
9574e0f7b90SParav Pandit 		goto done;
9584e0f7b90SParav Pandit 
9594e0f7b90SParav Pandit 	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
9604e0f7b90SParav Pandit 	if (!cdev) {
9614e0f7b90SParav Pandit 		ret = -ENOMEM;
9624e0f7b90SParav Pandit 		goto cdev_err;
9634e0f7b90SParav Pandit 	}
9644e0f7b90SParav Pandit 
9654e0f7b90SParav Pandit 	cdev->dev.parent = device->dev.parent;
9664e0f7b90SParav Pandit 	rdma_init_coredev(cdev, device, read_pnet(&rnet->net));
9674e0f7b90SParav Pandit 	cdev->dev.release = compatdev_release;
968f2f2b3bbSJason Gunthorpe 	ret = dev_set_name(&cdev->dev, "%s", dev_name(&device->dev));
969f2f2b3bbSJason Gunthorpe 	if (ret)
970f2f2b3bbSJason Gunthorpe 		goto add_err;
9714e0f7b90SParav Pandit 
9724e0f7b90SParav Pandit 	ret = device_add(&cdev->dev);
9734e0f7b90SParav Pandit 	if (ret)
9744e0f7b90SParav Pandit 		goto add_err;
975eb15c78bSParav Pandit 	ret = ib_setup_port_attrs(cdev);
9765417783eSParav Pandit 	if (ret)
9775417783eSParav Pandit 		goto port_err;
9784e0f7b90SParav Pandit 
9794e0f7b90SParav Pandit 	ret = xa_err(xa_store(&device->compat_devs, rnet->id,
9804e0f7b90SParav Pandit 			      cdev, GFP_KERNEL));
9814e0f7b90SParav Pandit 	if (ret)
9824e0f7b90SParav Pandit 		goto insert_err;
9834e0f7b90SParav Pandit 
9844e0f7b90SParav Pandit 	mutex_unlock(&device->compat_devs_mutex);
9854e0f7b90SParav Pandit 	return 0;
9864e0f7b90SParav Pandit 
9874e0f7b90SParav Pandit insert_err:
9885417783eSParav Pandit 	ib_free_port_attrs(cdev);
9895417783eSParav Pandit port_err:
9904e0f7b90SParav Pandit 	device_del(&cdev->dev);
9914e0f7b90SParav Pandit add_err:
9924e0f7b90SParav Pandit 	put_device(&cdev->dev);
9934e0f7b90SParav Pandit cdev_err:
9944e0f7b90SParav Pandit 	xa_release(&device->compat_devs, rnet->id);
9954e0f7b90SParav Pandit done:
9964e0f7b90SParav Pandit 	mutex_unlock(&device->compat_devs_mutex);
9974e0f7b90SParav Pandit 	return ret;
9984e0f7b90SParav Pandit }
9994e0f7b90SParav Pandit 
remove_one_compat_dev(struct ib_device * device,u32 id)10004e0f7b90SParav Pandit static void remove_one_compat_dev(struct ib_device *device, u32 id)
10014e0f7b90SParav Pandit {
10024e0f7b90SParav Pandit 	struct ib_core_device *cdev;
10034e0f7b90SParav Pandit 
10044e0f7b90SParav Pandit 	mutex_lock(&device->compat_devs_mutex);
10054e0f7b90SParav Pandit 	cdev = xa_erase(&device->compat_devs, id);
10064e0f7b90SParav Pandit 	mutex_unlock(&device->compat_devs_mutex);
10074e0f7b90SParav Pandit 	if (cdev) {
10085417783eSParav Pandit 		ib_free_port_attrs(cdev);
10094e0f7b90SParav Pandit 		device_del(&cdev->dev);
10104e0f7b90SParav Pandit 		put_device(&cdev->dev);
10114e0f7b90SParav Pandit 	}
10124e0f7b90SParav Pandit }
10134e0f7b90SParav Pandit 
remove_compat_devs(struct ib_device * device)10144e0f7b90SParav Pandit static void remove_compat_devs(struct ib_device *device)
10154e0f7b90SParav Pandit {
10164e0f7b90SParav Pandit 	struct ib_core_device *cdev;
10174e0f7b90SParav Pandit 	unsigned long index;
10184e0f7b90SParav Pandit 
10194e0f7b90SParav Pandit 	xa_for_each (&device->compat_devs, index, cdev)
10204e0f7b90SParav Pandit 		remove_one_compat_dev(device, index);
10214e0f7b90SParav Pandit }
10224e0f7b90SParav Pandit 
add_compat_devs(struct ib_device * device)10234e0f7b90SParav Pandit static int add_compat_devs(struct ib_device *device)
10244e0f7b90SParav Pandit {
10254e0f7b90SParav Pandit 	struct rdma_dev_net *rnet;
10264e0f7b90SParav Pandit 	unsigned long index;
10274e0f7b90SParav Pandit 	int ret = 0;
10284e0f7b90SParav Pandit 
1029decbc7a6SParav Pandit 	lockdep_assert_held(&devices_rwsem);
1030decbc7a6SParav Pandit 
10314e0f7b90SParav Pandit 	down_read(&rdma_nets_rwsem);
10324e0f7b90SParav Pandit 	xa_for_each (&rdma_nets, index, rnet) {
10334e0f7b90SParav Pandit 		ret = add_one_compat_dev(device, rnet);
10344e0f7b90SParav Pandit 		if (ret)
10354e0f7b90SParav Pandit 			break;
10364e0f7b90SParav Pandit 	}
10374e0f7b90SParav Pandit 	up_read(&rdma_nets_rwsem);
10384e0f7b90SParav Pandit 	return ret;
10394e0f7b90SParav Pandit }
10404e0f7b90SParav Pandit 
remove_all_compat_devs(void)10412b34c558SParav Pandit static void remove_all_compat_devs(void)
10422b34c558SParav Pandit {
10432b34c558SParav Pandit 	struct ib_compat_device *cdev;
10442b34c558SParav Pandit 	struct ib_device *dev;
10452b34c558SParav Pandit 	unsigned long index;
10462b34c558SParav Pandit 
10472b34c558SParav Pandit 	down_read(&devices_rwsem);
10482b34c558SParav Pandit 	xa_for_each (&devices, index, dev) {
10492b34c558SParav Pandit 		unsigned long c_index = 0;
10502b34c558SParav Pandit 
10512b34c558SParav Pandit 		/* Hold nets_rwsem so that any other thread modifying this
10522b34c558SParav Pandit 		 * system param can sync with this thread.
10532b34c558SParav Pandit 		 */
10542b34c558SParav Pandit 		down_read(&rdma_nets_rwsem);
10552b34c558SParav Pandit 		xa_for_each (&dev->compat_devs, c_index, cdev)
10562b34c558SParav Pandit 			remove_one_compat_dev(dev, c_index);
10572b34c558SParav Pandit 		up_read(&rdma_nets_rwsem);
10582b34c558SParav Pandit 	}
10592b34c558SParav Pandit 	up_read(&devices_rwsem);
10602b34c558SParav Pandit }
10612b34c558SParav Pandit 
add_all_compat_devs(void)10622b34c558SParav Pandit static int add_all_compat_devs(void)
10632b34c558SParav Pandit {
10642b34c558SParav Pandit 	struct rdma_dev_net *rnet;
10652b34c558SParav Pandit 	struct ib_device *dev;
10662b34c558SParav Pandit 	unsigned long index;
10672b34c558SParav Pandit 	int ret = 0;
10682b34c558SParav Pandit 
10692b34c558SParav Pandit 	down_read(&devices_rwsem);
10702b34c558SParav Pandit 	xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
10712b34c558SParav Pandit 		unsigned long net_index = 0;
10722b34c558SParav Pandit 
10732b34c558SParav Pandit 		/* Hold nets_rwsem so that any other thread modifying this
10742b34c558SParav Pandit 		 * system param can sync with this thread.
10752b34c558SParav Pandit 		 */
10762b34c558SParav Pandit 		down_read(&rdma_nets_rwsem);
10772b34c558SParav Pandit 		xa_for_each (&rdma_nets, net_index, rnet) {
10782b34c558SParav Pandit 			ret = add_one_compat_dev(dev, rnet);
10792b34c558SParav Pandit 			if (ret)
10802b34c558SParav Pandit 				break;
10812b34c558SParav Pandit 		}
10822b34c558SParav Pandit 		up_read(&rdma_nets_rwsem);
10832b34c558SParav Pandit 	}
10842b34c558SParav Pandit 	up_read(&devices_rwsem);
10852b34c558SParav Pandit 	if (ret)
10862b34c558SParav Pandit 		remove_all_compat_devs();
10872b34c558SParav Pandit 	return ret;
10882b34c558SParav Pandit }
10892b34c558SParav Pandit 
rdma_compatdev_set(u8 enable)10902b34c558SParav Pandit int rdma_compatdev_set(u8 enable)
10912b34c558SParav Pandit {
10922b34c558SParav Pandit 	struct rdma_dev_net *rnet;
10932b34c558SParav Pandit 	unsigned long index;
10942b34c558SParav Pandit 	int ret = 0;
10952b34c558SParav Pandit 
10962b34c558SParav Pandit 	down_write(&rdma_nets_rwsem);
10972b34c558SParav Pandit 	if (ib_devices_shared_netns == enable) {
10982b34c558SParav Pandit 		up_write(&rdma_nets_rwsem);
10992b34c558SParav Pandit 		return 0;
11002b34c558SParav Pandit 	}
11012b34c558SParav Pandit 
11022b34c558SParav Pandit 	/* enable/disable of compat devices is not supported
11032b34c558SParav Pandit 	 * when more than default init_net exists.
11042b34c558SParav Pandit 	 */
11052b34c558SParav Pandit 	xa_for_each (&rdma_nets, index, rnet) {
11062b34c558SParav Pandit 		ret++;
11072b34c558SParav Pandit 		break;
11082b34c558SParav Pandit 	}
11092b34c558SParav Pandit 	if (!ret)
11102b34c558SParav Pandit 		ib_devices_shared_netns = enable;
11112b34c558SParav Pandit 	up_write(&rdma_nets_rwsem);
11122b34c558SParav Pandit 	if (ret)
11132b34c558SParav Pandit 		return -EBUSY;
11142b34c558SParav Pandit 
11152b34c558SParav Pandit 	if (enable)
11162b34c558SParav Pandit 		ret = add_all_compat_devs();
11172b34c558SParav Pandit 	else
11182b34c558SParav Pandit 		remove_all_compat_devs();
11192b34c558SParav Pandit 	return ret;
11202b34c558SParav Pandit }
11212b34c558SParav Pandit 
rdma_dev_exit_net(struct net * net)11224e0f7b90SParav Pandit static void rdma_dev_exit_net(struct net *net)
11234e0f7b90SParav Pandit {
11241d2fedd8SParav Pandit 	struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
11254e0f7b90SParav Pandit 	struct ib_device *dev;
11264e0f7b90SParav Pandit 	unsigned long index;
11274e0f7b90SParav Pandit 	int ret;
11284e0f7b90SParav Pandit 
11294e0f7b90SParav Pandit 	down_write(&rdma_nets_rwsem);
11304e0f7b90SParav Pandit 	/*
11314e0f7b90SParav Pandit 	 * Prevent the ID from being re-used and hide the id from xa_for_each.
11324e0f7b90SParav Pandit 	 */
11334e0f7b90SParav Pandit 	ret = xa_err(xa_store(&rdma_nets, rnet->id, NULL, GFP_KERNEL));
11344e0f7b90SParav Pandit 	WARN_ON(ret);
11354e0f7b90SParav Pandit 	up_write(&rdma_nets_rwsem);
11364e0f7b90SParav Pandit 
11374e0f7b90SParav Pandit 	down_read(&devices_rwsem);
11384e0f7b90SParav Pandit 	xa_for_each (&devices, index, dev) {
11394e0f7b90SParav Pandit 		get_device(&dev->dev);
11404e0f7b90SParav Pandit 		/*
11414e0f7b90SParav Pandit 		 * Release the devices_rwsem so that pontentially blocking
11424e0f7b90SParav Pandit 		 * device_del, doesn't hold the devices_rwsem for too long.
11434e0f7b90SParav Pandit 		 */
11444e0f7b90SParav Pandit 		up_read(&devices_rwsem);
11454e0f7b90SParav Pandit 
11464e0f7b90SParav Pandit 		remove_one_compat_dev(dev, rnet->id);
11474e0f7b90SParav Pandit 
1148decbc7a6SParav Pandit 		/*
1149decbc7a6SParav Pandit 		 * If the real device is in the NS then move it back to init.
1150decbc7a6SParav Pandit 		 */
1151decbc7a6SParav Pandit 		rdma_dev_change_netns(dev, net, &init_net);
1152decbc7a6SParav Pandit 
11534e0f7b90SParav Pandit 		put_device(&dev->dev);
11544e0f7b90SParav Pandit 		down_read(&devices_rwsem);
11554e0f7b90SParav Pandit 	}
11564e0f7b90SParav Pandit 	up_read(&devices_rwsem);
11574e0f7b90SParav Pandit 
11581d2fedd8SParav Pandit 	rdma_nl_net_exit(rnet);
11594e0f7b90SParav Pandit 	xa_erase(&rdma_nets, rnet->id);
11604e0f7b90SParav Pandit }
11614e0f7b90SParav Pandit 
rdma_dev_init_net(struct net * net)11624e0f7b90SParav Pandit static __net_init int rdma_dev_init_net(struct net *net)
11634e0f7b90SParav Pandit {
11641d2fedd8SParav Pandit 	struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
11654e0f7b90SParav Pandit 	unsigned long index;
11664e0f7b90SParav Pandit 	struct ib_device *dev;
11674e0f7b90SParav Pandit 	int ret;
11684e0f7b90SParav Pandit 
11691d2fedd8SParav Pandit 	write_pnet(&rnet->net, net);
11701d2fedd8SParav Pandit 
11711d2fedd8SParav Pandit 	ret = rdma_nl_net_init(rnet);
11721d2fedd8SParav Pandit 	if (ret)
11731d2fedd8SParav Pandit 		return ret;
11741d2fedd8SParav Pandit 
11754e0f7b90SParav Pandit 	/* No need to create any compat devices in default init_net. */
11764e0f7b90SParav Pandit 	if (net_eq(net, &init_net))
11774e0f7b90SParav Pandit 		return 0;
11784e0f7b90SParav Pandit 
11794e0f7b90SParav Pandit 	ret = xa_alloc(&rdma_nets, &rnet->id, rnet, xa_limit_32b, GFP_KERNEL);
11801d2fedd8SParav Pandit 	if (ret) {
11811d2fedd8SParav Pandit 		rdma_nl_net_exit(rnet);
11824e0f7b90SParav Pandit 		return ret;
11831d2fedd8SParav Pandit 	}
11844e0f7b90SParav Pandit 
11854e0f7b90SParav Pandit 	down_read(&devices_rwsem);
11864e0f7b90SParav Pandit 	xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
11872b34c558SParav Pandit 		/* Hold nets_rwsem so that netlink command cannot change
11882b34c558SParav Pandit 		 * system configuration for device sharing mode.
11892b34c558SParav Pandit 		 */
11902b34c558SParav Pandit 		down_read(&rdma_nets_rwsem);
11914e0f7b90SParav Pandit 		ret = add_one_compat_dev(dev, rnet);
11922b34c558SParav Pandit 		up_read(&rdma_nets_rwsem);
11934e0f7b90SParav Pandit 		if (ret)
11944e0f7b90SParav Pandit 			break;
11954e0f7b90SParav Pandit 	}
11964e0f7b90SParav Pandit 	up_read(&devices_rwsem);
11974e0f7b90SParav Pandit 
11984e0f7b90SParav Pandit 	if (ret)
11994e0f7b90SParav Pandit 		rdma_dev_exit_net(net);
12004e0f7b90SParav Pandit 
12014e0f7b90SParav Pandit 	return ret;
12024e0f7b90SParav Pandit }
12034e0f7b90SParav Pandit 
1204ecc82c53SLeon Romanovsky /*
1205d0899892SJason Gunthorpe  * Assign the unique string device name and the unique device index. This is
1206d0899892SJason Gunthorpe  * undone by ib_dealloc_device.
1207ecc82c53SLeon Romanovsky  */
assign_name(struct ib_device * device,const char * name)12080df91bb6SJason Gunthorpe static int assign_name(struct ib_device *device, const char *name)
12090df91bb6SJason Gunthorpe {
12100df91bb6SJason Gunthorpe 	static u32 last_id;
12110df91bb6SJason Gunthorpe 	int ret;
1212ecc82c53SLeon Romanovsky 
1213921eab11SJason Gunthorpe 	down_write(&devices_rwsem);
12140df91bb6SJason Gunthorpe 	/* Assign a unique name to the device */
12150df91bb6SJason Gunthorpe 	if (strchr(name, '%'))
12160df91bb6SJason Gunthorpe 		ret = alloc_name(device, name);
12170df91bb6SJason Gunthorpe 	else
12180df91bb6SJason Gunthorpe 		ret = dev_set_name(&device->dev, name);
12190df91bb6SJason Gunthorpe 	if (ret)
12200df91bb6SJason Gunthorpe 		goto out;
1221ecc82c53SLeon Romanovsky 
12220df91bb6SJason Gunthorpe 	if (__ib_device_get_by_name(dev_name(&device->dev))) {
12230df91bb6SJason Gunthorpe 		ret = -ENFILE;
12240df91bb6SJason Gunthorpe 		goto out;
1225ecc82c53SLeon Romanovsky 	}
12262c34bb6dSWolfram Sang 	strscpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
12270df91bb6SJason Gunthorpe 
1228ea295481SLinus Torvalds 	ret = xa_alloc_cyclic(&devices, &device->index, device, xa_limit_31b,
1229ea295481SLinus Torvalds 			&last_id, GFP_KERNEL);
1230ea295481SLinus Torvalds 	if (ret > 0)
12310df91bb6SJason Gunthorpe 		ret = 0;
1232921eab11SJason Gunthorpe 
12330df91bb6SJason Gunthorpe out:
1234921eab11SJason Gunthorpe 	up_write(&devices_rwsem);
12350df91bb6SJason Gunthorpe 	return ret;
12360df91bb6SJason Gunthorpe }
12370df91bb6SJason Gunthorpe 
1238921eab11SJason Gunthorpe /*
1239921eab11SJason Gunthorpe  * setup_device() allocates memory and sets up data that requires calling the
1240921eab11SJason Gunthorpe  * device ops, this is the only reason these actions are not done during
1241921eab11SJason Gunthorpe  * ib_alloc_device. It is undone by ib_dealloc_device().
1242921eab11SJason Gunthorpe  */
setup_device(struct ib_device * device)1243548cb4fbSParav Pandit static int setup_device(struct ib_device *device)
1244548cb4fbSParav Pandit {
1245548cb4fbSParav Pandit 	struct ib_udata uhw = {.outlen = 0, .inlen = 0};
1246548cb4fbSParav Pandit 	int ret;
1247548cb4fbSParav Pandit 
1248deee3c7eSKamal Heib 	ib_device_check_mandatory(device);
1249548cb4fbSParav Pandit 
12508ceb1357SJason Gunthorpe 	ret = setup_port_data(device);
1251548cb4fbSParav Pandit 	if (ret) {
12528ceb1357SJason Gunthorpe 		dev_warn(&device->dev, "Couldn't create per-port data\n");
1253548cb4fbSParav Pandit 		return ret;
1254548cb4fbSParav Pandit 	}
1255548cb4fbSParav Pandit 
1256548cb4fbSParav Pandit 	memset(&device->attrs, 0, sizeof(device->attrs));
12573023a1e9SKamal Heib 	ret = device->ops.query_device(device, &device->attrs, &uhw);
1258548cb4fbSParav Pandit 	if (ret) {
1259548cb4fbSParav Pandit 		dev_warn(&device->dev,
1260548cb4fbSParav Pandit 			 "Couldn't query the device attributes\n");
1261d45f89d5SJason Gunthorpe 		return ret;
1262548cb4fbSParav Pandit 	}
1263548cb4fbSParav Pandit 
1264548cb4fbSParav Pandit 	return 0;
1265548cb4fbSParav Pandit }
1266548cb4fbSParav Pandit 
disable_device(struct ib_device * device)1267921eab11SJason Gunthorpe static void disable_device(struct ib_device *device)
1268921eab11SJason Gunthorpe {
12699cd58817SJason Gunthorpe 	u32 cid;
1270921eab11SJason Gunthorpe 
1271921eab11SJason Gunthorpe 	WARN_ON(!refcount_read(&device->refcount));
1272921eab11SJason Gunthorpe 
1273921eab11SJason Gunthorpe 	down_write(&devices_rwsem);
1274921eab11SJason Gunthorpe 	xa_clear_mark(&devices, device->index, DEVICE_REGISTERED);
1275921eab11SJason Gunthorpe 	up_write(&devices_rwsem);
1276921eab11SJason Gunthorpe 
12779cd58817SJason Gunthorpe 	/*
12789cd58817SJason Gunthorpe 	 * Remove clients in LIFO order, see assign_client_id. This could be
12799cd58817SJason Gunthorpe 	 * more efficient if xarray learns to reverse iterate. Since no new
12809cd58817SJason Gunthorpe 	 * clients can be added to this ib_device past this point we only need
12819cd58817SJason Gunthorpe 	 * the maximum possible client_id value here.
12829cd58817SJason Gunthorpe 	 */
1283921eab11SJason Gunthorpe 	down_read(&clients_rwsem);
12849cd58817SJason Gunthorpe 	cid = highest_client_id;
1285921eab11SJason Gunthorpe 	up_read(&clients_rwsem);
12869cd58817SJason Gunthorpe 	while (cid) {
12879cd58817SJason Gunthorpe 		cid--;
12889cd58817SJason Gunthorpe 		remove_client_context(device, cid);
12899cd58817SJason Gunthorpe 	}
1290921eab11SJason Gunthorpe 
1291286e1d3fSJack Morgenstein 	ib_cq_pool_cleanup(device);
12924aa16152SJason Gunthorpe 
1293921eab11SJason Gunthorpe 	/* Pairs with refcount_set in enable_device */
1294921eab11SJason Gunthorpe 	ib_device_put(device);
1295921eab11SJason Gunthorpe 	wait_for_completion(&device->unreg_completion);
1296c2261dd7SJason Gunthorpe 
12974e0f7b90SParav Pandit 	/*
12984e0f7b90SParav Pandit 	 * compat devices must be removed after device refcount drops to zero.
12994e0f7b90SParav Pandit 	 * Otherwise init_net() may add more compatdevs after removing compat
13004e0f7b90SParav Pandit 	 * devices and before device is disabled.
13014e0f7b90SParav Pandit 	 */
13024e0f7b90SParav Pandit 	remove_compat_devs(device);
1303921eab11SJason Gunthorpe }
1304921eab11SJason Gunthorpe 
1305921eab11SJason Gunthorpe /*
1306921eab11SJason Gunthorpe  * An enabled device is visible to all clients and to all the public facing
1307d0899892SJason Gunthorpe  * APIs that return a device pointer. This always returns with a new get, even
1308d0899892SJason Gunthorpe  * if it fails.
1309921eab11SJason Gunthorpe  */
enable_device_and_get(struct ib_device * device)1310d0899892SJason Gunthorpe static int enable_device_and_get(struct ib_device *device)
1311921eab11SJason Gunthorpe {
1312921eab11SJason Gunthorpe 	struct ib_client *client;
1313921eab11SJason Gunthorpe 	unsigned long index;
1314d0899892SJason Gunthorpe 	int ret = 0;
1315921eab11SJason Gunthorpe 
1316d0899892SJason Gunthorpe 	/*
1317d0899892SJason Gunthorpe 	 * One ref belongs to the xa and the other belongs to this
1318d0899892SJason Gunthorpe 	 * thread. This is needed to guard against parallel unregistration.
1319d0899892SJason Gunthorpe 	 */
1320d0899892SJason Gunthorpe 	refcount_set(&device->refcount, 2);
1321921eab11SJason Gunthorpe 	down_write(&devices_rwsem);
1322921eab11SJason Gunthorpe 	xa_set_mark(&devices, device->index, DEVICE_REGISTERED);
1323d0899892SJason Gunthorpe 
1324d0899892SJason Gunthorpe 	/*
1325d0899892SJason Gunthorpe 	 * By using downgrade_write() we ensure that no other thread can clear
1326d0899892SJason Gunthorpe 	 * DEVICE_REGISTERED while we are completing the client setup.
1327d0899892SJason Gunthorpe 	 */
1328d0899892SJason Gunthorpe 	downgrade_write(&devices_rwsem);
1329921eab11SJason Gunthorpe 
1330ca22354bSJason Gunthorpe 	if (device->ops.enable_driver) {
1331ca22354bSJason Gunthorpe 		ret = device->ops.enable_driver(device);
1332ca22354bSJason Gunthorpe 		if (ret)
1333ca22354bSJason Gunthorpe 			goto out;
1334ca22354bSJason Gunthorpe 	}
1335ca22354bSJason Gunthorpe 
1336921eab11SJason Gunthorpe 	down_read(&clients_rwsem);
1337921eab11SJason Gunthorpe 	xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
1338921eab11SJason Gunthorpe 		ret = add_client_context(device, client);
1339d0899892SJason Gunthorpe 		if (ret)
1340d0899892SJason Gunthorpe 			break;
1341d0899892SJason Gunthorpe 	}
1342921eab11SJason Gunthorpe 	up_read(&clients_rwsem);
13434e0f7b90SParav Pandit 	if (!ret)
13444e0f7b90SParav Pandit 		ret = add_compat_devs(device);
1345ca22354bSJason Gunthorpe out:
1346d0899892SJason Gunthorpe 	up_read(&devices_rwsem);
1347921eab11SJason Gunthorpe 	return ret;
1348921eab11SJason Gunthorpe }
1349921eab11SJason Gunthorpe 
prevent_dealloc_device(struct ib_device * ib_dev)13500cb42c02SJason Gunthorpe static void prevent_dealloc_device(struct ib_device *ib_dev)
13510cb42c02SJason Gunthorpe {
13520cb42c02SJason Gunthorpe }
13530cb42c02SJason Gunthorpe 
ib_device_notify_register(struct ib_device * device)1354*9cbed5aaSChiara Meiohas static void ib_device_notify_register(struct ib_device *device)
1355*9cbed5aaSChiara Meiohas {
1356*9cbed5aaSChiara Meiohas 	struct net_device *netdev;
1357*9cbed5aaSChiara Meiohas 	u32 port;
1358*9cbed5aaSChiara Meiohas 	int ret;
1359*9cbed5aaSChiara Meiohas 
1360*9cbed5aaSChiara Meiohas 	ret = rdma_nl_notify_event(device, 0, RDMA_REGISTER_EVENT);
1361*9cbed5aaSChiara Meiohas 	if (ret)
1362*9cbed5aaSChiara Meiohas 		return;
1363*9cbed5aaSChiara Meiohas 
1364*9cbed5aaSChiara Meiohas 	rdma_for_each_port(device, port) {
1365*9cbed5aaSChiara Meiohas 		netdev = ib_device_get_netdev(device, port);
1366*9cbed5aaSChiara Meiohas 		if (!netdev)
1367*9cbed5aaSChiara Meiohas 			continue;
1368*9cbed5aaSChiara Meiohas 
1369*9cbed5aaSChiara Meiohas 		ret = rdma_nl_notify_event(device, port,
1370*9cbed5aaSChiara Meiohas 					   RDMA_NETDEV_ATTACH_EVENT);
1371*9cbed5aaSChiara Meiohas 		dev_put(netdev);
1372*9cbed5aaSChiara Meiohas 		if (ret)
1373*9cbed5aaSChiara Meiohas 			return;
1374*9cbed5aaSChiara Meiohas 	}
1375*9cbed5aaSChiara Meiohas }
1376*9cbed5aaSChiara Meiohas 
1377548cb4fbSParav Pandit /**
1378548cb4fbSParav Pandit  * ib_register_device - Register an IB device with IB core
1379548cb4fbSParav Pandit  * @device: Device to register
1380d6537c1aSrd.dunlab@gmail.com  * @name: unique string device name. This may include a '%' which will
1381d6537c1aSrd.dunlab@gmail.com  * 	  cause a unique index to be added to the passed device name.
1382e0477b34SJason Gunthorpe  * @dma_device: pointer to a DMA-capable device. If %NULL, then the IB
1383e0477b34SJason Gunthorpe  *	        device will be used. In this case the caller should fully
1384e0477b34SJason Gunthorpe  *		setup the ibdev for DMA. This usually means using dma_virt_ops.
1385548cb4fbSParav Pandit  *
1386548cb4fbSParav Pandit  * Low-level drivers use ib_register_device() to register their
1387548cb4fbSParav Pandit  * devices with the IB core.  All registered clients will receive a
1388548cb4fbSParav Pandit  * callback for each device that is added. @device must be allocated
1389548cb4fbSParav Pandit  * with ib_alloc_device().
1390d0899892SJason Gunthorpe  *
1391d0899892SJason Gunthorpe  * If the driver uses ops.dealloc_driver and calls any ib_unregister_device()
1392d0899892SJason Gunthorpe  * asynchronously then the device pointer may become freed as soon as this
1393d0899892SJason Gunthorpe  * function returns.
1394548cb4fbSParav Pandit  */
ib_register_device(struct ib_device * device,const char * name,struct device * dma_device)1395e0477b34SJason Gunthorpe int ib_register_device(struct ib_device *device, const char *name,
1396e0477b34SJason Gunthorpe 		       struct device *dma_device)
1397548cb4fbSParav Pandit {
1398548cb4fbSParav Pandit 	int ret;
13991da177e4SLinus Torvalds 
14000df91bb6SJason Gunthorpe 	ret = assign_name(device, name);
1401e349f858SJason Gunthorpe 	if (ret)
1402921eab11SJason Gunthorpe 		return ret;
14031da177e4SLinus Torvalds 
14045a7a9e03SChristoph Hellwig 	/*
14055a7a9e03SChristoph Hellwig 	 * If the caller does not provide a DMA capable device then the IB core
14065a7a9e03SChristoph Hellwig 	 * will set up ib_sge and scatterlist structures that stash the kernel
14075a7a9e03SChristoph Hellwig 	 * virtual address into the address field.
14085a7a9e03SChristoph Hellwig 	 */
14095a7a9e03SChristoph Hellwig 	WARN_ON(dma_device && !dma_device->dma_parms);
14105a7a9e03SChristoph Hellwig 	device->dma_device = dma_device;
14115a7a9e03SChristoph Hellwig 
1412548cb4fbSParav Pandit 	ret = setup_device(device);
1413548cb4fbSParav Pandit 	if (ret)
1414d0899892SJason Gunthorpe 		return ret;
141503db3a2dSMatan Barak 
1416d45f89d5SJason Gunthorpe 	ret = ib_cache_setup_one(device);
1417d45f89d5SJason Gunthorpe 	if (ret) {
1418d45f89d5SJason Gunthorpe 		dev_warn(&device->dev,
1419d45f89d5SJason Gunthorpe 			 "Couldn't set up InfiniBand P_Key/GID cache\n");
1420d0899892SJason Gunthorpe 		return ret;
1421d45f89d5SJason Gunthorpe 	}
1422d45f89d5SJason Gunthorpe 
1423915e4af5SJason Gunthorpe 	device->groups[0] = &ib_dev_attr_group;
1424915e4af5SJason Gunthorpe 	device->groups[1] = device->ops.device_group;
1425b7066b32SJason Gunthorpe 	ret = ib_setup_device_attrs(device);
1426b7066b32SJason Gunthorpe 	if (ret)
1427b7066b32SJason Gunthorpe 		goto cache_cleanup;
1428b7066b32SJason Gunthorpe 
14297527a7b1SParav Pandit 	ib_device_register_rdmacg(device);
14303e153a93SIra Weiny 
1431413d3347SMark Zhang 	rdma_counter_init(device);
1432413d3347SMark Zhang 
1433e7a5b4aaSLeon Romanovsky 	/*
1434e7a5b4aaSLeon Romanovsky 	 * Ensure that ADD uevent is not fired because it
1435e7a5b4aaSLeon Romanovsky 	 * is too early amd device is not initialized yet.
1436e7a5b4aaSLeon Romanovsky 	 */
1437e7a5b4aaSLeon Romanovsky 	dev_set_uevent_suppress(&device->dev, true);
14385f8f5499SParav Pandit 	ret = device_add(&device->dev);
14395f8f5499SParav Pandit 	if (ret)
14405f8f5499SParav Pandit 		goto cg_cleanup;
14415f8f5499SParav Pandit 
1442b7066b32SJason Gunthorpe 	ret = ib_setup_port_attrs(&device->coredev);
14431da177e4SLinus Torvalds 	if (ret) {
144443c7c851SJason Gunthorpe 		dev_warn(&device->dev,
144543c7c851SJason Gunthorpe 			 "Couldn't register device with driver model\n");
14465f8f5499SParav Pandit 		goto dev_cleanup;
14471da177e4SLinus Torvalds 	}
14481da177e4SLinus Torvalds 
1449d0899892SJason Gunthorpe 	ret = enable_device_and_get(device);
1450d0899892SJason Gunthorpe 	if (ret) {
1451d0899892SJason Gunthorpe 		void (*dealloc_fn)(struct ib_device *);
1452d0899892SJason Gunthorpe 
1453d0899892SJason Gunthorpe 		/*
1454d0899892SJason Gunthorpe 		 * If we hit this error flow then we don't want to
1455d0899892SJason Gunthorpe 		 * automatically dealloc the device since the caller is
1456d0899892SJason Gunthorpe 		 * expected to call ib_dealloc_device() after
1457d0899892SJason Gunthorpe 		 * ib_register_device() fails. This is tricky due to the
1458d0899892SJason Gunthorpe 		 * possibility for a parallel unregistration along with this
1459d0899892SJason Gunthorpe 		 * error flow. Since we have a refcount here we know any
1460d0899892SJason Gunthorpe 		 * parallel flow is stopped in disable_device and will see the
14610cb42c02SJason Gunthorpe 		 * special dealloc_driver pointer, causing the responsibility to
1462d0899892SJason Gunthorpe 		 * ib_dealloc_device() to revert back to this thread.
1463d0899892SJason Gunthorpe 		 */
1464d0899892SJason Gunthorpe 		dealloc_fn = device->ops.dealloc_driver;
14650cb42c02SJason Gunthorpe 		device->ops.dealloc_driver = prevent_dealloc_device;
1466d0899892SJason Gunthorpe 		ib_device_put(device);
1467d0899892SJason Gunthorpe 		__ib_unregister_device(device);
1468d0899892SJason Gunthorpe 		device->ops.dealloc_driver = dealloc_fn;
1469779e0bf4SJack Morgenstein 		dev_set_uevent_suppress(&device->dev, false);
1470d0899892SJason Gunthorpe 		return ret;
1471d0899892SJason Gunthorpe 	}
1472779e0bf4SJack Morgenstein 	dev_set_uevent_suppress(&device->dev, false);
1473779e0bf4SJack Morgenstein 	/* Mark for userspace that device is ready */
1474779e0bf4SJack Morgenstein 	kobject_uevent(&device->dev.kobj, KOBJ_ADD);
1475*9cbed5aaSChiara Meiohas 
1476*9cbed5aaSChiara Meiohas 	ib_device_notify_register(device);
1477d0899892SJason Gunthorpe 	ib_device_put(device);
14781da177e4SLinus Torvalds 
14794be3a4faSParav Pandit 	return 0;
14804be3a4faSParav Pandit 
14815f8f5499SParav Pandit dev_cleanup:
14825f8f5499SParav Pandit 	device_del(&device->dev);
14832fb4f4eaSParav Pandit cg_cleanup:
1484e7a5b4aaSLeon Romanovsky 	dev_set_uevent_suppress(&device->dev, false);
14852fb4f4eaSParav Pandit 	ib_device_unregister_rdmacg(device);
1486b7066b32SJason Gunthorpe cache_cleanup:
1487d45f89d5SJason Gunthorpe 	ib_cache_cleanup_one(device);
14881da177e4SLinus Torvalds 	return ret;
14891da177e4SLinus Torvalds }
14901da177e4SLinus Torvalds EXPORT_SYMBOL(ib_register_device);
14911da177e4SLinus Torvalds 
1492d0899892SJason Gunthorpe /* Callers must hold a get on the device. */
__ib_unregister_device(struct ib_device * ib_dev)1493d0899892SJason Gunthorpe static void __ib_unregister_device(struct ib_device *ib_dev)
1494d0899892SJason Gunthorpe {
1495bca51197SMark Zhang 	struct ib_device *sub, *tmp;
1496bca51197SMark Zhang 
1497bca51197SMark Zhang 	mutex_lock(&ib_dev->subdev_lock);
1498bca51197SMark Zhang 	list_for_each_entry_safe_reverse(sub, tmp,
1499bca51197SMark Zhang 					 &ib_dev->subdev_list_head,
1500bca51197SMark Zhang 					 subdev_list) {
1501bca51197SMark Zhang 		list_del(&sub->subdev_list);
1502bca51197SMark Zhang 		ib_dev->ops.del_sub_dev(sub);
1503bca51197SMark Zhang 		ib_device_put(ib_dev);
1504bca51197SMark Zhang 	}
1505bca51197SMark Zhang 	mutex_unlock(&ib_dev->subdev_lock);
1506bca51197SMark Zhang 
1507d0899892SJason Gunthorpe 	/*
1508d0899892SJason Gunthorpe 	 * We have a registration lock so that all the calls to unregister are
1509d0899892SJason Gunthorpe 	 * fully fenced, once any unregister returns the device is truely
1510d0899892SJason Gunthorpe 	 * unregistered even if multiple callers are unregistering it at the
1511d0899892SJason Gunthorpe 	 * same time. This also interacts with the registration flow and
1512d0899892SJason Gunthorpe 	 * provides sane semantics if register and unregister are racing.
1513d0899892SJason Gunthorpe 	 */
1514d0899892SJason Gunthorpe 	mutex_lock(&ib_dev->unregistration_lock);
1515d0899892SJason Gunthorpe 	if (!refcount_read(&ib_dev->refcount))
1516d0899892SJason Gunthorpe 		goto out;
1517d0899892SJason Gunthorpe 
1518d0899892SJason Gunthorpe 	disable_device(ib_dev);
1519*9cbed5aaSChiara Meiohas 	rdma_nl_notify_event(ib_dev, 0, RDMA_UNREGISTER_EVENT);
15203042492bSParav Pandit 
15213042492bSParav Pandit 	/* Expedite removing unregistered pointers from the hash table */
15223042492bSParav Pandit 	free_netdevs(ib_dev);
15233042492bSParav Pandit 
1524b7066b32SJason Gunthorpe 	ib_free_port_attrs(&ib_dev->coredev);
1525d0899892SJason Gunthorpe 	device_del(&ib_dev->dev);
1526d0899892SJason Gunthorpe 	ib_device_unregister_rdmacg(ib_dev);
1527d0899892SJason Gunthorpe 	ib_cache_cleanup_one(ib_dev);
1528d0899892SJason Gunthorpe 
1529d0899892SJason Gunthorpe 	/*
1530d0899892SJason Gunthorpe 	 * Drivers using the new flow may not call ib_dealloc_device except
1531d0899892SJason Gunthorpe 	 * in error unwind prior to registration success.
1532d0899892SJason Gunthorpe 	 */
15330cb42c02SJason Gunthorpe 	if (ib_dev->ops.dealloc_driver &&
15340cb42c02SJason Gunthorpe 	    ib_dev->ops.dealloc_driver != prevent_dealloc_device) {
1535d0899892SJason Gunthorpe 		WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1);
1536d0899892SJason Gunthorpe 		ib_dealloc_device(ib_dev);
1537d0899892SJason Gunthorpe 	}
1538d0899892SJason Gunthorpe out:
1539d0899892SJason Gunthorpe 	mutex_unlock(&ib_dev->unregistration_lock);
1540d0899892SJason Gunthorpe }
1541d0899892SJason Gunthorpe 
15421da177e4SLinus Torvalds /**
15431da177e4SLinus Torvalds  * ib_unregister_device - Unregister an IB device
1544d6537c1aSrd.dunlab@gmail.com  * @ib_dev: The device to unregister
15451da177e4SLinus Torvalds  *
15461da177e4SLinus Torvalds  * Unregister an IB device.  All clients will receive a remove callback.
1547d0899892SJason Gunthorpe  *
1548d0899892SJason Gunthorpe  * Callers should call this routine only once, and protect against races with
1549d0899892SJason Gunthorpe  * registration. Typically it should only be called as part of a remove
1550d0899892SJason Gunthorpe  * callback in an implementation of driver core's struct device_driver and
1551d0899892SJason Gunthorpe  * related.
1552d0899892SJason Gunthorpe  *
1553d0899892SJason Gunthorpe  * If ops.dealloc_driver is used then ib_dev will be freed upon return from
1554d0899892SJason Gunthorpe  * this function.
15551da177e4SLinus Torvalds  */
ib_unregister_device(struct ib_device * ib_dev)1556d0899892SJason Gunthorpe void ib_unregister_device(struct ib_device *ib_dev)
15571da177e4SLinus Torvalds {
1558d0899892SJason Gunthorpe 	get_device(&ib_dev->dev);
1559d0899892SJason Gunthorpe 	__ib_unregister_device(ib_dev);
1560d0899892SJason Gunthorpe 	put_device(&ib_dev->dev);
15611da177e4SLinus Torvalds }
15621da177e4SLinus Torvalds EXPORT_SYMBOL(ib_unregister_device);
15631da177e4SLinus Torvalds 
1564d0899892SJason Gunthorpe /**
1565d0899892SJason Gunthorpe  * ib_unregister_device_and_put - Unregister a device while holding a 'get'
1566d6537c1aSrd.dunlab@gmail.com  * @ib_dev: The device to unregister
1567d0899892SJason Gunthorpe  *
1568d0899892SJason Gunthorpe  * This is the same as ib_unregister_device(), except it includes an internal
1569d0899892SJason Gunthorpe  * ib_device_put() that should match a 'get' obtained by the caller.
1570d0899892SJason Gunthorpe  *
1571d0899892SJason Gunthorpe  * It is safe to call this routine concurrently from multiple threads while
1572d0899892SJason Gunthorpe  * holding the 'get'. When the function returns the device is fully
1573d0899892SJason Gunthorpe  * unregistered.
1574d0899892SJason Gunthorpe  *
1575d0899892SJason Gunthorpe  * Drivers using this flow MUST use the driver_unregister callback to clean up
1576d0899892SJason Gunthorpe  * their resources associated with the device and dealloc it.
1577d0899892SJason Gunthorpe  */
ib_unregister_device_and_put(struct ib_device * ib_dev)1578d0899892SJason Gunthorpe void ib_unregister_device_and_put(struct ib_device *ib_dev)
1579d0899892SJason Gunthorpe {
1580d0899892SJason Gunthorpe 	WARN_ON(!ib_dev->ops.dealloc_driver);
1581d0899892SJason Gunthorpe 	get_device(&ib_dev->dev);
1582d0899892SJason Gunthorpe 	ib_device_put(ib_dev);
1583d0899892SJason Gunthorpe 	__ib_unregister_device(ib_dev);
1584d0899892SJason Gunthorpe 	put_device(&ib_dev->dev);
1585d0899892SJason Gunthorpe }
1586d0899892SJason Gunthorpe EXPORT_SYMBOL(ib_unregister_device_and_put);
1587d0899892SJason Gunthorpe 
1588d0899892SJason Gunthorpe /**
1589d0899892SJason Gunthorpe  * ib_unregister_driver - Unregister all IB devices for a driver
1590d0899892SJason Gunthorpe  * @driver_id: The driver to unregister
1591d0899892SJason Gunthorpe  *
1592d0899892SJason Gunthorpe  * This implements a fence for device unregistration. It only returns once all
1593d0899892SJason Gunthorpe  * devices associated with the driver_id have fully completed their
1594d0899892SJason Gunthorpe  * unregistration and returned from ib_unregister_device*().
1595d0899892SJason Gunthorpe  *
1596d0899892SJason Gunthorpe  * If device's are not yet unregistered it goes ahead and starts unregistering
1597d0899892SJason Gunthorpe  * them.
1598d0899892SJason Gunthorpe  *
1599d0899892SJason Gunthorpe  * This does not block creation of new devices with the given driver_id, that
1600d0899892SJason Gunthorpe  * is the responsibility of the caller.
1601d0899892SJason Gunthorpe  */
ib_unregister_driver(enum rdma_driver_id driver_id)1602d0899892SJason Gunthorpe void ib_unregister_driver(enum rdma_driver_id driver_id)
1603d0899892SJason Gunthorpe {
1604d0899892SJason Gunthorpe 	struct ib_device *ib_dev;
1605d0899892SJason Gunthorpe 	unsigned long index;
1606d0899892SJason Gunthorpe 
1607d0899892SJason Gunthorpe 	down_read(&devices_rwsem);
1608d0899892SJason Gunthorpe 	xa_for_each (&devices, index, ib_dev) {
1609b9560a41SJason Gunthorpe 		if (ib_dev->ops.driver_id != driver_id)
1610d0899892SJason Gunthorpe 			continue;
1611d0899892SJason Gunthorpe 
1612d0899892SJason Gunthorpe 		get_device(&ib_dev->dev);
1613d0899892SJason Gunthorpe 		up_read(&devices_rwsem);
1614d0899892SJason Gunthorpe 
1615d0899892SJason Gunthorpe 		WARN_ON(!ib_dev->ops.dealloc_driver);
1616d0899892SJason Gunthorpe 		__ib_unregister_device(ib_dev);
1617d0899892SJason Gunthorpe 
1618d0899892SJason Gunthorpe 		put_device(&ib_dev->dev);
1619d0899892SJason Gunthorpe 		down_read(&devices_rwsem);
1620d0899892SJason Gunthorpe 	}
1621d0899892SJason Gunthorpe 	up_read(&devices_rwsem);
1622d0899892SJason Gunthorpe }
1623d0899892SJason Gunthorpe EXPORT_SYMBOL(ib_unregister_driver);
1624d0899892SJason Gunthorpe 
ib_unregister_work(struct work_struct * work)1625d0899892SJason Gunthorpe static void ib_unregister_work(struct work_struct *work)
1626d0899892SJason Gunthorpe {
1627d0899892SJason Gunthorpe 	struct ib_device *ib_dev =
1628d0899892SJason Gunthorpe 		container_of(work, struct ib_device, unregistration_work);
1629d0899892SJason Gunthorpe 
1630d0899892SJason Gunthorpe 	__ib_unregister_device(ib_dev);
1631d0899892SJason Gunthorpe 	put_device(&ib_dev->dev);
1632d0899892SJason Gunthorpe }
1633d0899892SJason Gunthorpe 
1634d0899892SJason Gunthorpe /**
1635d0899892SJason Gunthorpe  * ib_unregister_device_queued - Unregister a device using a work queue
1636d6537c1aSrd.dunlab@gmail.com  * @ib_dev: The device to unregister
1637d0899892SJason Gunthorpe  *
1638d0899892SJason Gunthorpe  * This schedules an asynchronous unregistration using a WQ for the device. A
1639d0899892SJason Gunthorpe  * driver should use this to avoid holding locks while doing unregistration,
1640d0899892SJason Gunthorpe  * such as holding the RTNL lock.
1641d0899892SJason Gunthorpe  *
1642d0899892SJason Gunthorpe  * Drivers using this API must use ib_unregister_driver before module unload
1643d0899892SJason Gunthorpe  * to ensure that all scheduled unregistrations have completed.
1644d0899892SJason Gunthorpe  */
ib_unregister_device_queued(struct ib_device * ib_dev)1645d0899892SJason Gunthorpe void ib_unregister_device_queued(struct ib_device *ib_dev)
1646d0899892SJason Gunthorpe {
1647d0899892SJason Gunthorpe 	WARN_ON(!refcount_read(&ib_dev->refcount));
1648d0899892SJason Gunthorpe 	WARN_ON(!ib_dev->ops.dealloc_driver);
1649d0899892SJason Gunthorpe 	get_device(&ib_dev->dev);
1650ff815a89STetsuo Handa 	if (!queue_work(ib_unreg_wq, &ib_dev->unregistration_work))
1651d0899892SJason Gunthorpe 		put_device(&ib_dev->dev);
1652d0899892SJason Gunthorpe }
1653d0899892SJason Gunthorpe EXPORT_SYMBOL(ib_unregister_device_queued);
1654d0899892SJason Gunthorpe 
1655decbc7a6SParav Pandit /*
1656decbc7a6SParav Pandit  * The caller must pass in a device that has the kref held and the refcount
1657decbc7a6SParav Pandit  * released. If the device is in cur_net and still registered then it is moved
1658decbc7a6SParav Pandit  * into net.
1659decbc7a6SParav Pandit  */
rdma_dev_change_netns(struct ib_device * device,struct net * cur_net,struct net * net)1660decbc7a6SParav Pandit static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
1661decbc7a6SParav Pandit 				 struct net *net)
1662decbc7a6SParav Pandit {
1663decbc7a6SParav Pandit 	int ret2 = -EINVAL;
1664decbc7a6SParav Pandit 	int ret;
1665decbc7a6SParav Pandit 
1666decbc7a6SParav Pandit 	mutex_lock(&device->unregistration_lock);
1667decbc7a6SParav Pandit 
1668decbc7a6SParav Pandit 	/*
16692e5b8a01SParav Pandit 	 * If a device not under ib_device_get() or if the unregistration_lock
16702e5b8a01SParav Pandit 	 * is not held, the namespace can be changed, or it can be unregistered.
16712e5b8a01SParav Pandit 	 * Check again under the lock.
1672decbc7a6SParav Pandit 	 */
1673decbc7a6SParav Pandit 	if (refcount_read(&device->refcount) == 0 ||
1674decbc7a6SParav Pandit 	    !net_eq(cur_net, read_pnet(&device->coredev.rdma_net))) {
1675decbc7a6SParav Pandit 		ret = -ENODEV;
1676decbc7a6SParav Pandit 		goto out;
1677decbc7a6SParav Pandit 	}
1678decbc7a6SParav Pandit 
1679decbc7a6SParav Pandit 	kobject_uevent(&device->dev.kobj, KOBJ_REMOVE);
1680decbc7a6SParav Pandit 	disable_device(device);
1681decbc7a6SParav Pandit 
1682decbc7a6SParav Pandit 	/*
1683decbc7a6SParav Pandit 	 * At this point no one can be using the device, so it is safe to
1684decbc7a6SParav Pandit 	 * change the namespace.
1685decbc7a6SParav Pandit 	 */
1686decbc7a6SParav Pandit 	write_pnet(&device->coredev.rdma_net, net);
1687decbc7a6SParav Pandit 
16882e5b8a01SParav Pandit 	down_read(&devices_rwsem);
1689decbc7a6SParav Pandit 	/*
1690decbc7a6SParav Pandit 	 * Currently rdma devices are system wide unique. So the device name
1691decbc7a6SParav Pandit 	 * is guaranteed free in the new namespace. Publish the new namespace
1692decbc7a6SParav Pandit 	 * at the sysfs level.
1693decbc7a6SParav Pandit 	 */
1694decbc7a6SParav Pandit 	ret = device_rename(&device->dev, dev_name(&device->dev));
1695decbc7a6SParav Pandit 	up_read(&devices_rwsem);
1696decbc7a6SParav Pandit 	if (ret) {
1697decbc7a6SParav Pandit 		dev_warn(&device->dev,
1698decbc7a6SParav Pandit 			 "%s: Couldn't rename device after namespace change\n",
1699decbc7a6SParav Pandit 			 __func__);
1700decbc7a6SParav Pandit 		/* Try and put things back and re-enable the device */
1701decbc7a6SParav Pandit 		write_pnet(&device->coredev.rdma_net, cur_net);
1702decbc7a6SParav Pandit 	}
1703decbc7a6SParav Pandit 
1704decbc7a6SParav Pandit 	ret2 = enable_device_and_get(device);
17052e5b8a01SParav Pandit 	if (ret2) {
1706decbc7a6SParav Pandit 		/*
1707decbc7a6SParav Pandit 		 * This shouldn't really happen, but if it does, let the user
1708decbc7a6SParav Pandit 		 * retry at later point. So don't disable the device.
1709decbc7a6SParav Pandit 		 */
1710decbc7a6SParav Pandit 		dev_warn(&device->dev,
1711decbc7a6SParav Pandit 			 "%s: Couldn't re-enable device after namespace change\n",
1712decbc7a6SParav Pandit 			 __func__);
17132e5b8a01SParav Pandit 	}
1714decbc7a6SParav Pandit 	kobject_uevent(&device->dev.kobj, KOBJ_ADD);
17152e5b8a01SParav Pandit 
1716decbc7a6SParav Pandit 	ib_device_put(device);
1717decbc7a6SParav Pandit out:
1718decbc7a6SParav Pandit 	mutex_unlock(&device->unregistration_lock);
1719decbc7a6SParav Pandit 	if (ret)
1720decbc7a6SParav Pandit 		return ret;
1721decbc7a6SParav Pandit 	return ret2;
1722decbc7a6SParav Pandit }
1723decbc7a6SParav Pandit 
ib_device_set_netns_put(struct sk_buff * skb,struct ib_device * dev,u32 ns_fd)17242e5b8a01SParav Pandit int ib_device_set_netns_put(struct sk_buff *skb,
17252e5b8a01SParav Pandit 			    struct ib_device *dev, u32 ns_fd)
17262e5b8a01SParav Pandit {
17272e5b8a01SParav Pandit 	struct net *net;
17282e5b8a01SParav Pandit 	int ret;
17292e5b8a01SParav Pandit 
17302e5b8a01SParav Pandit 	net = get_net_ns_by_fd(ns_fd);
17312e5b8a01SParav Pandit 	if (IS_ERR(net)) {
17322e5b8a01SParav Pandit 		ret = PTR_ERR(net);
17332e5b8a01SParav Pandit 		goto net_err;
17342e5b8a01SParav Pandit 	}
17352e5b8a01SParav Pandit 
17362e5b8a01SParav Pandit 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
17372e5b8a01SParav Pandit 		ret = -EPERM;
17382e5b8a01SParav Pandit 		goto ns_err;
17392e5b8a01SParav Pandit 	}
17402e5b8a01SParav Pandit 
17412e5b8a01SParav Pandit 	/*
174269d86a66SJason Gunthorpe 	 * All the ib_clients, including uverbs, are reset when the namespace is
174369d86a66SJason Gunthorpe 	 * changed and this cannot be blocked waiting for userspace to do
174469d86a66SJason Gunthorpe 	 * something, so disassociation is mandatory.
17452e5b8a01SParav Pandit 	 */
174669d86a66SJason Gunthorpe 	if (!dev->ops.disassociate_ucontext || ib_devices_shared_netns) {
17472e5b8a01SParav Pandit 		ret = -EOPNOTSUPP;
17482e5b8a01SParav Pandit 		goto ns_err;
17492e5b8a01SParav Pandit 	}
17502e5b8a01SParav Pandit 
17512e5b8a01SParav Pandit 	get_device(&dev->dev);
17522e5b8a01SParav Pandit 	ib_device_put(dev);
17532e5b8a01SParav Pandit 	ret = rdma_dev_change_netns(dev, current->nsproxy->net_ns, net);
17542e5b8a01SParav Pandit 	put_device(&dev->dev);
17552e5b8a01SParav Pandit 
17562e5b8a01SParav Pandit 	put_net(net);
17572e5b8a01SParav Pandit 	return ret;
17582e5b8a01SParav Pandit 
17592e5b8a01SParav Pandit ns_err:
17602e5b8a01SParav Pandit 	put_net(net);
17612e5b8a01SParav Pandit net_err:
17622e5b8a01SParav Pandit 	ib_device_put(dev);
17632e5b8a01SParav Pandit 	return ret;
17642e5b8a01SParav Pandit }
17652e5b8a01SParav Pandit 
17664e0f7b90SParav Pandit static struct pernet_operations rdma_dev_net_ops = {
17674e0f7b90SParav Pandit 	.init = rdma_dev_init_net,
17684e0f7b90SParav Pandit 	.exit = rdma_dev_exit_net,
17694e0f7b90SParav Pandit 	.id = &rdma_dev_net_id,
17704e0f7b90SParav Pandit 	.size = sizeof(struct rdma_dev_net),
17714e0f7b90SParav Pandit };
17724e0f7b90SParav Pandit 
assign_client_id(struct ib_client * client)1773e59178d8SJason Gunthorpe static int assign_client_id(struct ib_client *client)
1774e59178d8SJason Gunthorpe {
1775e59178d8SJason Gunthorpe 	int ret;
1776e59178d8SJason Gunthorpe 
17777a8bccd8SShifeng Li 	lockdep_assert_held(&clients_rwsem);
1778e59178d8SJason Gunthorpe 	/*
1779e59178d8SJason Gunthorpe 	 * The add/remove callbacks must be called in FIFO/LIFO order. To
1780e59178d8SJason Gunthorpe 	 * achieve this we assign client_ids so they are sorted in
17819cd58817SJason Gunthorpe 	 * registration order.
1782e59178d8SJason Gunthorpe 	 */
17839cd58817SJason Gunthorpe 	client->client_id = highest_client_id;
1784ea295481SLinus Torvalds 	ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL);
1785e59178d8SJason Gunthorpe 	if (ret)
17867a8bccd8SShifeng Li 		return ret;
1787e59178d8SJason Gunthorpe 
17889cd58817SJason Gunthorpe 	highest_client_id++;
1789921eab11SJason Gunthorpe 	xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
17907a8bccd8SShifeng Li 	return 0;
1791e59178d8SJason Gunthorpe }
1792e59178d8SJason Gunthorpe 
remove_client_id(struct ib_client * client)17939cd58817SJason Gunthorpe static void remove_client_id(struct ib_client *client)
17949cd58817SJason Gunthorpe {
17959cd58817SJason Gunthorpe 	down_write(&clients_rwsem);
17969cd58817SJason Gunthorpe 	xa_erase(&clients, client->client_id);
17979cd58817SJason Gunthorpe 	for (; highest_client_id; highest_client_id--)
17989cd58817SJason Gunthorpe 		if (xa_load(&clients, highest_client_id - 1))
17999cd58817SJason Gunthorpe 			break;
18009cd58817SJason Gunthorpe 	up_write(&clients_rwsem);
18019cd58817SJason Gunthorpe }
18029cd58817SJason Gunthorpe 
18031da177e4SLinus Torvalds /**
18041da177e4SLinus Torvalds  * ib_register_client - Register an IB client
18051da177e4SLinus Torvalds  * @client:Client to register
18061da177e4SLinus Torvalds  *
18071da177e4SLinus Torvalds  * Upper level users of the IB drivers can use ib_register_client() to
18081da177e4SLinus Torvalds  * register callbacks for IB device addition and removal.  When an IB
18091da177e4SLinus Torvalds  * device is added, each registered client's add method will be called
18101da177e4SLinus Torvalds  * (in the order the clients were registered), and when a device is
18111da177e4SLinus Torvalds  * removed, each client's remove method will be called (in the reverse
18121da177e4SLinus Torvalds  * order that clients were registered).  In addition, when
18131da177e4SLinus Torvalds  * ib_register_client() is called, the client will receive an add
18141da177e4SLinus Torvalds  * callback for all devices already registered.
18151da177e4SLinus Torvalds  */
ib_register_client(struct ib_client * client)18161da177e4SLinus Torvalds int ib_register_client(struct ib_client *client)
18171da177e4SLinus Torvalds {
18181da177e4SLinus Torvalds 	struct ib_device *device;
18190df91bb6SJason Gunthorpe 	unsigned long index;
18207a8bccd8SShifeng Li 	bool need_unreg = false;
1821e59178d8SJason Gunthorpe 	int ret;
18221da177e4SLinus Torvalds 
1823621e55ffSJason Gunthorpe 	refcount_set(&client->uses, 1);
1824621e55ffSJason Gunthorpe 	init_completion(&client->uses_zero);
18257a8bccd8SShifeng Li 
18267a8bccd8SShifeng Li 	/*
18277a8bccd8SShifeng Li 	 * The devices_rwsem is held in write mode to ensure that a racing
18287a8bccd8SShifeng Li 	 * ib_register_device() sees a consisent view of clients and devices.
18297a8bccd8SShifeng Li 	 */
18307a8bccd8SShifeng Li 	down_write(&devices_rwsem);
18317a8bccd8SShifeng Li 	down_write(&clients_rwsem);
1832e59178d8SJason Gunthorpe 	ret = assign_client_id(client);
1833921eab11SJason Gunthorpe 	if (ret)
18347a8bccd8SShifeng Li 		goto out;
1835921eab11SJason Gunthorpe 
18367a8bccd8SShifeng Li 	need_unreg = true;
1837921eab11SJason Gunthorpe 	xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) {
1838921eab11SJason Gunthorpe 		ret = add_client_context(device, client);
18397a8bccd8SShifeng Li 		if (ret)
18407a8bccd8SShifeng Li 			goto out;
18417a8bccd8SShifeng Li 	}
18427a8bccd8SShifeng Li 	ret = 0;
18437a8bccd8SShifeng Li out:
18447a8bccd8SShifeng Li 	up_write(&clients_rwsem);
18457a8bccd8SShifeng Li 	up_write(&devices_rwsem);
18467a8bccd8SShifeng Li 	if (need_unreg && ret)
1847921eab11SJason Gunthorpe 		ib_unregister_client(client);
1848e59178d8SJason Gunthorpe 	return ret;
1849e59178d8SJason Gunthorpe }
18501da177e4SLinus Torvalds EXPORT_SYMBOL(ib_register_client);
18511da177e4SLinus Torvalds 
18521da177e4SLinus Torvalds /**
18531da177e4SLinus Torvalds  * ib_unregister_client - Unregister an IB client
18541da177e4SLinus Torvalds  * @client:Client to unregister
18551da177e4SLinus Torvalds  *
18561da177e4SLinus Torvalds  * Upper level users use ib_unregister_client() to remove their client
18571da177e4SLinus Torvalds  * registration.  When ib_unregister_client() is called, the client
18581da177e4SLinus Torvalds  * will receive a remove callback for each IB device still registered.
1859921eab11SJason Gunthorpe  *
1860921eab11SJason Gunthorpe  * This is a full fence, once it returns no client callbacks will be called,
1861921eab11SJason Gunthorpe  * or are running in another thread.
18621da177e4SLinus Torvalds  */
ib_unregister_client(struct ib_client * client)18631da177e4SLinus Torvalds void ib_unregister_client(struct ib_client *client)
18641da177e4SLinus Torvalds {
18651da177e4SLinus Torvalds 	struct ib_device *device;
18660df91bb6SJason Gunthorpe 	unsigned long index;
18671da177e4SLinus Torvalds 
1868921eab11SJason Gunthorpe 	down_write(&clients_rwsem);
1869621e55ffSJason Gunthorpe 	ib_client_put(client);
1870e59178d8SJason Gunthorpe 	xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED);
1871921eab11SJason Gunthorpe 	up_write(&clients_rwsem);
18725aa44bb9SHaggai Eran 
1873621e55ffSJason Gunthorpe 	/* We do not want to have locks while calling client->remove() */
1874621e55ffSJason Gunthorpe 	rcu_read_lock();
1875621e55ffSJason Gunthorpe 	xa_for_each (&devices, index, device) {
1876621e55ffSJason Gunthorpe 		if (!ib_device_try_get(device))
1877621e55ffSJason Gunthorpe 			continue;
1878621e55ffSJason Gunthorpe 		rcu_read_unlock();
1879621e55ffSJason Gunthorpe 
18801da177e4SLinus Torvalds 		remove_client_context(device, client->client_id);
1881621e55ffSJason Gunthorpe 
1882621e55ffSJason Gunthorpe 		ib_device_put(device);
1883621e55ffSJason Gunthorpe 		rcu_read_lock();
1884621e55ffSJason Gunthorpe 	}
1885621e55ffSJason Gunthorpe 	rcu_read_unlock();
1886621e55ffSJason Gunthorpe 
1887621e55ffSJason Gunthorpe 	/*
1888621e55ffSJason Gunthorpe 	 * remove_client_context() is not a fence, it can return even though a
1889621e55ffSJason Gunthorpe 	 * removal is ongoing. Wait until all removals are completed.
1890621e55ffSJason Gunthorpe 	 */
1891621e55ffSJason Gunthorpe 	wait_for_completion(&client->uses_zero);
18929cd58817SJason Gunthorpe 	remove_client_id(client);
18931da177e4SLinus Torvalds }
18941da177e4SLinus Torvalds EXPORT_SYMBOL(ib_unregister_client);
18951da177e4SLinus Torvalds 
__ib_get_global_client_nl_info(const char * client_name,struct ib_client_nl_info * res)18960e2d00ebSJason Gunthorpe static int __ib_get_global_client_nl_info(const char *client_name,
18970e2d00ebSJason Gunthorpe 					  struct ib_client_nl_info *res)
18980e2d00ebSJason Gunthorpe {
18990e2d00ebSJason Gunthorpe 	struct ib_client *client;
19000e2d00ebSJason Gunthorpe 	unsigned long index;
19010e2d00ebSJason Gunthorpe 	int ret = -ENOENT;
19020e2d00ebSJason Gunthorpe 
19030e2d00ebSJason Gunthorpe 	down_read(&clients_rwsem);
19040e2d00ebSJason Gunthorpe 	xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
19050e2d00ebSJason Gunthorpe 		if (strcmp(client->name, client_name) != 0)
19060e2d00ebSJason Gunthorpe 			continue;
19070e2d00ebSJason Gunthorpe 		if (!client->get_global_nl_info) {
19080e2d00ebSJason Gunthorpe 			ret = -EOPNOTSUPP;
19090e2d00ebSJason Gunthorpe 			break;
19100e2d00ebSJason Gunthorpe 		}
19110e2d00ebSJason Gunthorpe 		ret = client->get_global_nl_info(res);
19120e2d00ebSJason Gunthorpe 		if (WARN_ON(ret == -ENOENT))
19130e2d00ebSJason Gunthorpe 			ret = -EINVAL;
19140e2d00ebSJason Gunthorpe 		if (!ret && res->cdev)
19150e2d00ebSJason Gunthorpe 			get_device(res->cdev);
19160e2d00ebSJason Gunthorpe 		break;
19170e2d00ebSJason Gunthorpe 	}
19180e2d00ebSJason Gunthorpe 	up_read(&clients_rwsem);
19190e2d00ebSJason Gunthorpe 	return ret;
19200e2d00ebSJason Gunthorpe }
19210e2d00ebSJason Gunthorpe 
__ib_get_client_nl_info(struct ib_device * ibdev,const char * client_name,struct ib_client_nl_info * res)19220e2d00ebSJason Gunthorpe static int __ib_get_client_nl_info(struct ib_device *ibdev,
19230e2d00ebSJason Gunthorpe 				   const char *client_name,
19240e2d00ebSJason Gunthorpe 				   struct ib_client_nl_info *res)
19250e2d00ebSJason Gunthorpe {
19260e2d00ebSJason Gunthorpe 	unsigned long index;
19270e2d00ebSJason Gunthorpe 	void *client_data;
19280e2d00ebSJason Gunthorpe 	int ret = -ENOENT;
19290e2d00ebSJason Gunthorpe 
19300e2d00ebSJason Gunthorpe 	down_read(&ibdev->client_data_rwsem);
19310e2d00ebSJason Gunthorpe 	xan_for_each_marked (&ibdev->client_data, index, client_data,
19320e2d00ebSJason Gunthorpe 			     CLIENT_DATA_REGISTERED) {
19330e2d00ebSJason Gunthorpe 		struct ib_client *client = xa_load(&clients, index);
19340e2d00ebSJason Gunthorpe 
19350e2d00ebSJason Gunthorpe 		if (!client || strcmp(client->name, client_name) != 0)
19360e2d00ebSJason Gunthorpe 			continue;
19370e2d00ebSJason Gunthorpe 		if (!client->get_nl_info) {
19380e2d00ebSJason Gunthorpe 			ret = -EOPNOTSUPP;
19390e2d00ebSJason Gunthorpe 			break;
19400e2d00ebSJason Gunthorpe 		}
19410e2d00ebSJason Gunthorpe 		ret = client->get_nl_info(ibdev, client_data, res);
19420e2d00ebSJason Gunthorpe 		if (WARN_ON(ret == -ENOENT))
19430e2d00ebSJason Gunthorpe 			ret = -EINVAL;
19440e2d00ebSJason Gunthorpe 
19450e2d00ebSJason Gunthorpe 		/*
19460e2d00ebSJason Gunthorpe 		 * The cdev is guaranteed valid as long as we are inside the
19470e2d00ebSJason Gunthorpe 		 * client_data_rwsem as remove_one can't be called. Keep it
19480e2d00ebSJason Gunthorpe 		 * valid for the caller.
19490e2d00ebSJason Gunthorpe 		 */
19500e2d00ebSJason Gunthorpe 		if (!ret && res->cdev)
19510e2d00ebSJason Gunthorpe 			get_device(res->cdev);
19520e2d00ebSJason Gunthorpe 		break;
19530e2d00ebSJason Gunthorpe 	}
19540e2d00ebSJason Gunthorpe 	up_read(&ibdev->client_data_rwsem);
19550e2d00ebSJason Gunthorpe 
19560e2d00ebSJason Gunthorpe 	return ret;
19570e2d00ebSJason Gunthorpe }
19580e2d00ebSJason Gunthorpe 
19590e2d00ebSJason Gunthorpe /**
19600e2d00ebSJason Gunthorpe  * ib_get_client_nl_info - Fetch the nl_info from a client
19614c3b53e1SLee Jones  * @ibdev: IB device
19624c3b53e1SLee Jones  * @client_name: Name of the client
19634c3b53e1SLee Jones  * @res: Result of the query
19640e2d00ebSJason Gunthorpe  */
ib_get_client_nl_info(struct ib_device * ibdev,const char * client_name,struct ib_client_nl_info * res)19650e2d00ebSJason Gunthorpe int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name,
19660e2d00ebSJason Gunthorpe 			  struct ib_client_nl_info *res)
19670e2d00ebSJason Gunthorpe {
19680e2d00ebSJason Gunthorpe 	int ret;
19690e2d00ebSJason Gunthorpe 
19700e2d00ebSJason Gunthorpe 	if (ibdev)
19710e2d00ebSJason Gunthorpe 		ret = __ib_get_client_nl_info(ibdev, client_name, res);
19720e2d00ebSJason Gunthorpe 	else
19730e2d00ebSJason Gunthorpe 		ret = __ib_get_global_client_nl_info(client_name, res);
19740e2d00ebSJason Gunthorpe #ifdef CONFIG_MODULES
19750e2d00ebSJason Gunthorpe 	if (ret == -ENOENT) {
19760e2d00ebSJason Gunthorpe 		request_module("rdma-client-%s", client_name);
19770e2d00ebSJason Gunthorpe 		if (ibdev)
19780e2d00ebSJason Gunthorpe 			ret = __ib_get_client_nl_info(ibdev, client_name, res);
19790e2d00ebSJason Gunthorpe 		else
19800e2d00ebSJason Gunthorpe 			ret = __ib_get_global_client_nl_info(client_name, res);
19810e2d00ebSJason Gunthorpe 	}
19820e2d00ebSJason Gunthorpe #endif
19830e2d00ebSJason Gunthorpe 	if (ret) {
19840e2d00ebSJason Gunthorpe 		if (ret == -ENOENT)
19850e2d00ebSJason Gunthorpe 			return -EOPNOTSUPP;
19860e2d00ebSJason Gunthorpe 		return ret;
19870e2d00ebSJason Gunthorpe 	}
19880e2d00ebSJason Gunthorpe 
19890e2d00ebSJason Gunthorpe 	if (WARN_ON(!res->cdev))
19900e2d00ebSJason Gunthorpe 		return -EINVAL;
19910e2d00ebSJason Gunthorpe 	return 0;
19920e2d00ebSJason Gunthorpe }
19930e2d00ebSJason Gunthorpe 
19941da177e4SLinus Torvalds /**
19959cd330d3SKrishna Kumar  * ib_set_client_data - Set IB client context
19961da177e4SLinus Torvalds  * @device:Device to set context for
19971da177e4SLinus Torvalds  * @client:Client to set context for
19981da177e4SLinus Torvalds  * @data:Context to set
19991da177e4SLinus Torvalds  *
20000df91bb6SJason Gunthorpe  * ib_set_client_data() sets client context data that can be retrieved with
20010df91bb6SJason Gunthorpe  * ib_get_client_data(). This can only be called while the client is
20020df91bb6SJason Gunthorpe  * registered to the device, once the ib_client remove() callback returns this
20030df91bb6SJason Gunthorpe  * cannot be called.
20041da177e4SLinus Torvalds  */
ib_set_client_data(struct ib_device * device,struct ib_client * client,void * data)20051da177e4SLinus Torvalds void ib_set_client_data(struct ib_device *device, struct ib_client *client,
20061da177e4SLinus Torvalds 			void *data)
20071da177e4SLinus Torvalds {
20080df91bb6SJason Gunthorpe 	void *rc;
20091da177e4SLinus Torvalds 
20100df91bb6SJason Gunthorpe 	if (WARN_ON(IS_ERR(data)))
20110df91bb6SJason Gunthorpe 		data = NULL;
20121da177e4SLinus Torvalds 
20130df91bb6SJason Gunthorpe 	rc = xa_store(&device->client_data, client->client_id, data,
20140df91bb6SJason Gunthorpe 		      GFP_KERNEL);
20150df91bb6SJason Gunthorpe 	WARN_ON(xa_is_err(rc));
20161da177e4SLinus Torvalds }
20171da177e4SLinus Torvalds EXPORT_SYMBOL(ib_set_client_data);
20181da177e4SLinus Torvalds 
20191da177e4SLinus Torvalds /**
20201da177e4SLinus Torvalds  * ib_register_event_handler - Register an IB event handler
20211da177e4SLinus Torvalds  * @event_handler:Handler to register
20221da177e4SLinus Torvalds  *
20231da177e4SLinus Torvalds  * ib_register_event_handler() registers an event handler that will be
20241da177e4SLinus Torvalds  * called back when asynchronous IB events occur (as defined in
20251da177e4SLinus Torvalds  * chapter 11 of the InfiniBand Architecture Specification). This
20266b57cea9SParav Pandit  * callback occurs in workqueue context.
20271da177e4SLinus Torvalds  */
ib_register_event_handler(struct ib_event_handler * event_handler)2028dcc9881eSLeon Romanovsky void ib_register_event_handler(struct ib_event_handler *event_handler)
20291da177e4SLinus Torvalds {
20306b57cea9SParav Pandit 	down_write(&event_handler->device->event_handler_rwsem);
20311da177e4SLinus Torvalds 	list_add_tail(&event_handler->list,
20321da177e4SLinus Torvalds 		      &event_handler->device->event_handler_list);
20336b57cea9SParav Pandit 	up_write(&event_handler->device->event_handler_rwsem);
20341da177e4SLinus Torvalds }
20351da177e4SLinus Torvalds EXPORT_SYMBOL(ib_register_event_handler);
20361da177e4SLinus Torvalds 
20371da177e4SLinus Torvalds /**
20381da177e4SLinus Torvalds  * ib_unregister_event_handler - Unregister an event handler
20391da177e4SLinus Torvalds  * @event_handler:Handler to unregister
20401da177e4SLinus Torvalds  *
20411da177e4SLinus Torvalds  * Unregister an event handler registered with
20421da177e4SLinus Torvalds  * ib_register_event_handler().
20431da177e4SLinus Torvalds  */
ib_unregister_event_handler(struct ib_event_handler * event_handler)2044dcc9881eSLeon Romanovsky void ib_unregister_event_handler(struct ib_event_handler *event_handler)
20451da177e4SLinus Torvalds {
20466b57cea9SParav Pandit 	down_write(&event_handler->device->event_handler_rwsem);
20471da177e4SLinus Torvalds 	list_del(&event_handler->list);
20486b57cea9SParav Pandit 	up_write(&event_handler->device->event_handler_rwsem);
20491da177e4SLinus Torvalds }
20501da177e4SLinus Torvalds EXPORT_SYMBOL(ib_unregister_event_handler);
20511da177e4SLinus Torvalds 
ib_dispatch_event_clients(struct ib_event * event)20526b57cea9SParav Pandit void ib_dispatch_event_clients(struct ib_event *event)
20531da177e4SLinus Torvalds {
20541da177e4SLinus Torvalds 	struct ib_event_handler *handler;
20551da177e4SLinus Torvalds 
20566b57cea9SParav Pandit 	down_read(&event->device->event_handler_rwsem);
20571da177e4SLinus Torvalds 
20581da177e4SLinus Torvalds 	list_for_each_entry(handler, &event->device->event_handler_list, list)
20591da177e4SLinus Torvalds 		handler->handler(handler, event);
20601da177e4SLinus Torvalds 
20616b57cea9SParav Pandit 	up_read(&event->device->event_handler_rwsem);
20621da177e4SLinus Torvalds }
20631da177e4SLinus Torvalds 
iw_query_port(struct ib_device * device,u32 port_num,struct ib_port_attr * port_attr)20644929116bSKamal Heib static int iw_query_port(struct ib_device *device,
20651fb7f897SMark Bloch 			   u32 port_num,
20664929116bSKamal Heib 			   struct ib_port_attr *port_attr)
20674929116bSKamal Heib {
20684929116bSKamal Heib 	struct in_device *inetdev;
20694929116bSKamal Heib 	struct net_device *netdev;
20704929116bSKamal Heib 
20714929116bSKamal Heib 	memset(port_attr, 0, sizeof(*port_attr));
20724929116bSKamal Heib 
20734929116bSKamal Heib 	netdev = ib_device_get_netdev(device, port_num);
20744929116bSKamal Heib 	if (!netdev)
20754929116bSKamal Heib 		return -ENODEV;
20764929116bSKamal Heib 
20774929116bSKamal Heib 	port_attr->max_mtu = IB_MTU_4096;
20784929116bSKamal Heib 	port_attr->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
20794929116bSKamal Heib 
20804929116bSKamal Heib 	if (!netif_carrier_ok(netdev)) {
20814929116bSKamal Heib 		port_attr->state = IB_PORT_DOWN;
20824929116bSKamal Heib 		port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
20834929116bSKamal Heib 	} else {
2084390d3fdcSMichal Kalderon 		rcu_read_lock();
2085390d3fdcSMichal Kalderon 		inetdev = __in_dev_get_rcu(netdev);
20864929116bSKamal Heib 
20874929116bSKamal Heib 		if (inetdev && inetdev->ifa_list) {
20884929116bSKamal Heib 			port_attr->state = IB_PORT_ACTIVE;
20894929116bSKamal Heib 			port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
20904929116bSKamal Heib 		} else {
20914929116bSKamal Heib 			port_attr->state = IB_PORT_INIT;
20924929116bSKamal Heib 			port_attr->phys_state =
20934929116bSKamal Heib 				IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING;
20944929116bSKamal Heib 		}
2095390d3fdcSMichal Kalderon 
2096390d3fdcSMichal Kalderon 		rcu_read_unlock();
20974929116bSKamal Heib 	}
20984929116bSKamal Heib 
2099390d3fdcSMichal Kalderon 	dev_put(netdev);
21001e123d96SGuoqing Jiang 	return device->ops.query_port(device, port_num, port_attr);
21014929116bSKamal Heib }
21024929116bSKamal Heib 
__ib_query_port(struct ib_device * device,u32 port_num,struct ib_port_attr * port_attr)21034929116bSKamal Heib static int __ib_query_port(struct ib_device *device,
21041fb7f897SMark Bloch 			   u32 port_num,
21054929116bSKamal Heib 			   struct ib_port_attr *port_attr)
21064929116bSKamal Heib {
21074929116bSKamal Heib 	int err;
21084929116bSKamal Heib 
21094929116bSKamal Heib 	memset(port_attr, 0, sizeof(*port_attr));
21104929116bSKamal Heib 
21114929116bSKamal Heib 	err = device->ops.query_port(device, port_num, port_attr);
21124929116bSKamal Heib 	if (err || port_attr->subnet_prefix)
21134929116bSKamal Heib 		return err;
21144929116bSKamal Heib 
21154929116bSKamal Heib 	if (rdma_port_get_link_layer(device, port_num) !=
21164929116bSKamal Heib 	    IB_LINK_LAYER_INFINIBAND)
21174929116bSKamal Heib 		return 0;
21184929116bSKamal Heib 
211921bfee9cSAnand Khoje 	ib_get_cached_subnet_prefix(device, port_num,
212021bfee9cSAnand Khoje 				    &port_attr->subnet_prefix);
21214929116bSKamal Heib 	return 0;
21224929116bSKamal Heib }
21234929116bSKamal Heib 
21241da177e4SLinus Torvalds /**
21251da177e4SLinus Torvalds  * ib_query_port - Query IB port attributes
21261da177e4SLinus Torvalds  * @device:Device to query
21271da177e4SLinus Torvalds  * @port_num:Port number to query
21281da177e4SLinus Torvalds  * @port_attr:Port attributes
21291da177e4SLinus Torvalds  *
21301da177e4SLinus Torvalds  * ib_query_port() returns the attributes of a port through the
21311da177e4SLinus Torvalds  * @port_attr pointer.
21321da177e4SLinus Torvalds  */
ib_query_port(struct ib_device * device,u32 port_num,struct ib_port_attr * port_attr)21331da177e4SLinus Torvalds int ib_query_port(struct ib_device *device,
21341fb7f897SMark Bloch 		  u32 port_num,
21351da177e4SLinus Torvalds 		  struct ib_port_attr *port_attr)
21361da177e4SLinus Torvalds {
213724dc831bSYuval Shaia 	if (!rdma_is_port_valid(device, port_num))
2138116c0074SRoland Dreier 		return -EINVAL;
2139116c0074SRoland Dreier 
21404929116bSKamal Heib 	if (rdma_protocol_iwarp(device, port_num))
21414929116bSKamal Heib 		return iw_query_port(device, port_num, port_attr);
21424929116bSKamal Heib 	else
21434929116bSKamal Heib 		return __ib_query_port(device, port_num, port_attr);
21441da177e4SLinus Torvalds }
21451da177e4SLinus Torvalds EXPORT_SYMBOL(ib_query_port);
21461da177e4SLinus Torvalds 
add_ndev_hash(struct ib_port_data * pdata)2147324e227eSJason Gunthorpe static void add_ndev_hash(struct ib_port_data *pdata)
2148324e227eSJason Gunthorpe {
2149324e227eSJason Gunthorpe 	unsigned long flags;
2150324e227eSJason Gunthorpe 
2151324e227eSJason Gunthorpe 	might_sleep();
2152324e227eSJason Gunthorpe 
2153324e227eSJason Gunthorpe 	spin_lock_irqsave(&ndev_hash_lock, flags);
2154324e227eSJason Gunthorpe 	if (hash_hashed(&pdata->ndev_hash_link)) {
2155324e227eSJason Gunthorpe 		hash_del_rcu(&pdata->ndev_hash_link);
2156324e227eSJason Gunthorpe 		spin_unlock_irqrestore(&ndev_hash_lock, flags);
2157324e227eSJason Gunthorpe 		/*
2158324e227eSJason Gunthorpe 		 * We cannot do hash_add_rcu after a hash_del_rcu until the
2159324e227eSJason Gunthorpe 		 * grace period
2160324e227eSJason Gunthorpe 		 */
2161324e227eSJason Gunthorpe 		synchronize_rcu();
2162324e227eSJason Gunthorpe 		spin_lock_irqsave(&ndev_hash_lock, flags);
2163324e227eSJason Gunthorpe 	}
2164324e227eSJason Gunthorpe 	if (pdata->netdev)
2165324e227eSJason Gunthorpe 		hash_add_rcu(ndev_hash, &pdata->ndev_hash_link,
2166324e227eSJason Gunthorpe 			     (uintptr_t)pdata->netdev);
2167324e227eSJason Gunthorpe 	spin_unlock_irqrestore(&ndev_hash_lock, flags);
2168324e227eSJason Gunthorpe }
2169324e227eSJason Gunthorpe 
21701da177e4SLinus Torvalds /**
2171c2261dd7SJason Gunthorpe  * ib_device_set_netdev - Associate the ib_dev with an underlying net_device
2172c2261dd7SJason Gunthorpe  * @ib_dev: Device to modify
2173c2261dd7SJason Gunthorpe  * @ndev: net_device to affiliate, may be NULL
2174c2261dd7SJason Gunthorpe  * @port: IB port the net_device is connected to
2175c2261dd7SJason Gunthorpe  *
2176c2261dd7SJason Gunthorpe  * Drivers should use this to link the ib_device to a netdev so the netdev
2177c2261dd7SJason Gunthorpe  * shows up in interfaces like ib_enum_roce_netdev. Only one netdev may be
2178c2261dd7SJason Gunthorpe  * affiliated with any port.
2179c2261dd7SJason Gunthorpe  *
2180c2261dd7SJason Gunthorpe  * The caller must ensure that the given ndev is not unregistered or
2181c2261dd7SJason Gunthorpe  * unregistering, and that either the ib_device is unregistered or
2182c2261dd7SJason Gunthorpe  * ib_device_set_netdev() is called with NULL when the ndev sends a
2183c2261dd7SJason Gunthorpe  * NETDEV_UNREGISTER event.
2184c2261dd7SJason Gunthorpe  */
ib_device_set_netdev(struct ib_device * ib_dev,struct net_device * ndev,u32 port)2185c2261dd7SJason Gunthorpe int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
21861fb7f897SMark Bloch 			 u32 port)
2187c2261dd7SJason Gunthorpe {
2188*9cbed5aaSChiara Meiohas 	enum rdma_nl_notify_event_type etype;
2189c2261dd7SJason Gunthorpe 	struct net_device *old_ndev;
2190c2261dd7SJason Gunthorpe 	struct ib_port_data *pdata;
2191c2261dd7SJason Gunthorpe 	unsigned long flags;
2192c2261dd7SJason Gunthorpe 	int ret;
2193c2261dd7SJason Gunthorpe 
2194917918f5SLeon Romanovsky 	if (!rdma_is_port_valid(ib_dev, port))
2195917918f5SLeon Romanovsky 		return -EINVAL;
2196917918f5SLeon Romanovsky 
2197c2261dd7SJason Gunthorpe 	/*
2198c2261dd7SJason Gunthorpe 	 * Drivers wish to call this before ib_register_driver, so we have to
2199c2261dd7SJason Gunthorpe 	 * setup the port data early.
2200c2261dd7SJason Gunthorpe 	 */
2201c2261dd7SJason Gunthorpe 	ret = alloc_port_data(ib_dev);
2202c2261dd7SJason Gunthorpe 	if (ret)
2203c2261dd7SJason Gunthorpe 		return ret;
2204c2261dd7SJason Gunthorpe 
2205c2261dd7SJason Gunthorpe 	pdata = &ib_dev->port_data[port];
2206c2261dd7SJason Gunthorpe 	spin_lock_irqsave(&pdata->netdev_lock, flags);
2207324e227eSJason Gunthorpe 	old_ndev = rcu_dereference_protected(
2208324e227eSJason Gunthorpe 		pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
2209324e227eSJason Gunthorpe 	if (old_ndev == ndev) {
2210c2261dd7SJason Gunthorpe 		spin_unlock_irqrestore(&pdata->netdev_lock, flags);
2211c2261dd7SJason Gunthorpe 		return 0;
2212c2261dd7SJason Gunthorpe 	}
2213c2261dd7SJason Gunthorpe 
2214324e227eSJason Gunthorpe 	rcu_assign_pointer(pdata->netdev, ndev);
22152043a14fSDavid Ahern 	netdev_put(old_ndev, &pdata->netdev_tracker);
22162043a14fSDavid Ahern 	netdev_hold(ndev, &pdata->netdev_tracker, GFP_ATOMIC);
2217c2261dd7SJason Gunthorpe 	spin_unlock_irqrestore(&pdata->netdev_lock, flags);
2218c2261dd7SJason Gunthorpe 
2219324e227eSJason Gunthorpe 	add_ndev_hash(pdata);
2220*9cbed5aaSChiara Meiohas 
2221*9cbed5aaSChiara Meiohas 	/* Make sure that the device is registered before we send events */
2222*9cbed5aaSChiara Meiohas 	if (xa_load(&devices, ib_dev->index) != ib_dev)
2223*9cbed5aaSChiara Meiohas 		return 0;
2224*9cbed5aaSChiara Meiohas 
2225*9cbed5aaSChiara Meiohas 	etype = ndev ? RDMA_NETDEV_ATTACH_EVENT : RDMA_NETDEV_DETACH_EVENT;
2226*9cbed5aaSChiara Meiohas 	rdma_nl_notify_event(ib_dev, port, etype);
2227*9cbed5aaSChiara Meiohas 
2228c2261dd7SJason Gunthorpe 	return 0;
2229c2261dd7SJason Gunthorpe }
2230c2261dd7SJason Gunthorpe EXPORT_SYMBOL(ib_device_set_netdev);
2231c2261dd7SJason Gunthorpe 
free_netdevs(struct ib_device * ib_dev)2232c2261dd7SJason Gunthorpe static void free_netdevs(struct ib_device *ib_dev)
2233c2261dd7SJason Gunthorpe {
2234c2261dd7SJason Gunthorpe 	unsigned long flags;
22351fb7f897SMark Bloch 	u32 port;
2236c2261dd7SJason Gunthorpe 
223746bdf370SKamal Heib 	if (!ib_dev->port_data)
223846bdf370SKamal Heib 		return;
223946bdf370SKamal Heib 
2240c2261dd7SJason Gunthorpe 	rdma_for_each_port (ib_dev, port) {
2241c2261dd7SJason Gunthorpe 		struct ib_port_data *pdata = &ib_dev->port_data[port];
2242324e227eSJason Gunthorpe 		struct net_device *ndev;
2243c2261dd7SJason Gunthorpe 
2244c2261dd7SJason Gunthorpe 		spin_lock_irqsave(&pdata->netdev_lock, flags);
2245324e227eSJason Gunthorpe 		ndev = rcu_dereference_protected(
2246324e227eSJason Gunthorpe 			pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
2247324e227eSJason Gunthorpe 		if (ndev) {
2248324e227eSJason Gunthorpe 			spin_lock(&ndev_hash_lock);
2249324e227eSJason Gunthorpe 			hash_del_rcu(&pdata->ndev_hash_link);
2250324e227eSJason Gunthorpe 			spin_unlock(&ndev_hash_lock);
2251324e227eSJason Gunthorpe 
2252324e227eSJason Gunthorpe 			/*
2253324e227eSJason Gunthorpe 			 * If this is the last dev_put there is still a
2254324e227eSJason Gunthorpe 			 * synchronize_rcu before the netdev is kfreed, so we
2255324e227eSJason Gunthorpe 			 * can continue to rely on unlocked pointer
2256324e227eSJason Gunthorpe 			 * comparisons after the put
2257324e227eSJason Gunthorpe 			 */
2258324e227eSJason Gunthorpe 			rcu_assign_pointer(pdata->netdev, NULL);
2259e42f9c2eSJason Gunthorpe 			netdev_put(ndev, &pdata->netdev_tracker);
2260c2261dd7SJason Gunthorpe 		}
2261c2261dd7SJason Gunthorpe 		spin_unlock_irqrestore(&pdata->netdev_lock, flags);
2262c2261dd7SJason Gunthorpe 	}
2263c2261dd7SJason Gunthorpe }
2264c2261dd7SJason Gunthorpe 
ib_device_get_netdev(struct ib_device * ib_dev,u32 port)2265c2261dd7SJason Gunthorpe struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
22661fb7f897SMark Bloch 					u32 port)
2267c2261dd7SJason Gunthorpe {
2268c2261dd7SJason Gunthorpe 	struct ib_port_data *pdata;
2269c2261dd7SJason Gunthorpe 	struct net_device *res;
2270c2261dd7SJason Gunthorpe 
2271c2261dd7SJason Gunthorpe 	if (!rdma_is_port_valid(ib_dev, port))
2272c2261dd7SJason Gunthorpe 		return NULL;
2273c2261dd7SJason Gunthorpe 
22748d159eb2SChiara Meiohas 	if (!ib_dev->port_data)
22758d159eb2SChiara Meiohas 		return NULL;
22768d159eb2SChiara Meiohas 
2277c2261dd7SJason Gunthorpe 	pdata = &ib_dev->port_data[port];
2278c2261dd7SJason Gunthorpe 
2279c2261dd7SJason Gunthorpe 	/*
2280c2261dd7SJason Gunthorpe 	 * New drivers should use ib_device_set_netdev() not the legacy
2281c2261dd7SJason Gunthorpe 	 * get_netdev().
2282c2261dd7SJason Gunthorpe 	 */
2283c2261dd7SJason Gunthorpe 	if (ib_dev->ops.get_netdev)
2284c2261dd7SJason Gunthorpe 		res = ib_dev->ops.get_netdev(ib_dev, port);
2285c2261dd7SJason Gunthorpe 	else {
2286c2261dd7SJason Gunthorpe 		spin_lock(&pdata->netdev_lock);
2287324e227eSJason Gunthorpe 		res = rcu_dereference_protected(
2288324e227eSJason Gunthorpe 			pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
2289c2261dd7SJason Gunthorpe 		dev_hold(res);
2290c2261dd7SJason Gunthorpe 		spin_unlock(&pdata->netdev_lock);
2291c2261dd7SJason Gunthorpe 	}
2292c2261dd7SJason Gunthorpe 
2293c2261dd7SJason Gunthorpe 	return res;
2294c2261dd7SJason Gunthorpe }
22958d159eb2SChiara Meiohas EXPORT_SYMBOL(ib_device_get_netdev);
2296c2261dd7SJason Gunthorpe 
2297c2261dd7SJason Gunthorpe /**
2298324e227eSJason Gunthorpe  * ib_device_get_by_netdev - Find an IB device associated with a netdev
2299324e227eSJason Gunthorpe  * @ndev: netdev to locate
2300324e227eSJason Gunthorpe  * @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all)
2301324e227eSJason Gunthorpe  *
2302324e227eSJason Gunthorpe  * Find and hold an ib_device that is associated with a netdev via
2303324e227eSJason Gunthorpe  * ib_device_set_netdev(). The caller must call ib_device_put() on the
2304324e227eSJason Gunthorpe  * returned pointer.
2305324e227eSJason Gunthorpe  */
ib_device_get_by_netdev(struct net_device * ndev,enum rdma_driver_id driver_id)2306324e227eSJason Gunthorpe struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
2307324e227eSJason Gunthorpe 					  enum rdma_driver_id driver_id)
2308324e227eSJason Gunthorpe {
2309324e227eSJason Gunthorpe 	struct ib_device *res = NULL;
2310324e227eSJason Gunthorpe 	struct ib_port_data *cur;
2311324e227eSJason Gunthorpe 
2312324e227eSJason Gunthorpe 	rcu_read_lock();
2313324e227eSJason Gunthorpe 	hash_for_each_possible_rcu (ndev_hash, cur, ndev_hash_link,
2314324e227eSJason Gunthorpe 				    (uintptr_t)ndev) {
2315324e227eSJason Gunthorpe 		if (rcu_access_pointer(cur->netdev) == ndev &&
2316324e227eSJason Gunthorpe 		    (driver_id == RDMA_DRIVER_UNKNOWN ||
2317b9560a41SJason Gunthorpe 		     cur->ib_dev->ops.driver_id == driver_id) &&
2318324e227eSJason Gunthorpe 		    ib_device_try_get(cur->ib_dev)) {
2319324e227eSJason Gunthorpe 			res = cur->ib_dev;
2320324e227eSJason Gunthorpe 			break;
2321324e227eSJason Gunthorpe 		}
2322324e227eSJason Gunthorpe 	}
2323324e227eSJason Gunthorpe 	rcu_read_unlock();
2324324e227eSJason Gunthorpe 
2325324e227eSJason Gunthorpe 	return res;
2326324e227eSJason Gunthorpe }
2327324e227eSJason Gunthorpe EXPORT_SYMBOL(ib_device_get_by_netdev);
2328324e227eSJason Gunthorpe 
2329324e227eSJason Gunthorpe /**
233003db3a2dSMatan Barak  * ib_enum_roce_netdev - enumerate all RoCE ports
233103db3a2dSMatan Barak  * @ib_dev : IB device we want to query
233203db3a2dSMatan Barak  * @filter: Should we call the callback?
233303db3a2dSMatan Barak  * @filter_cookie: Cookie passed to filter
233403db3a2dSMatan Barak  * @cb: Callback to call for each found RoCE ports
233503db3a2dSMatan Barak  * @cookie: Cookie passed back to the callback
233603db3a2dSMatan Barak  *
233703db3a2dSMatan Barak  * Enumerates all of the physical RoCE ports of ib_dev
233803db3a2dSMatan Barak  * which are related to netdevice and calls callback() on each
233903db3a2dSMatan Barak  * device for which filter() function returns non zero.
234003db3a2dSMatan Barak  */
ib_enum_roce_netdev(struct ib_device * ib_dev,roce_netdev_filter filter,void * filter_cookie,roce_netdev_callback cb,void * cookie)234103db3a2dSMatan Barak void ib_enum_roce_netdev(struct ib_device *ib_dev,
234203db3a2dSMatan Barak 			 roce_netdev_filter filter,
234303db3a2dSMatan Barak 			 void *filter_cookie,
234403db3a2dSMatan Barak 			 roce_netdev_callback cb,
234503db3a2dSMatan Barak 			 void *cookie)
234603db3a2dSMatan Barak {
23471fb7f897SMark Bloch 	u32 port;
234803db3a2dSMatan Barak 
2349ea1075edSJason Gunthorpe 	rdma_for_each_port (ib_dev, port)
235003db3a2dSMatan Barak 		if (rdma_protocol_roce(ib_dev, port)) {
2351c2261dd7SJason Gunthorpe 			struct net_device *idev =
2352c2261dd7SJason Gunthorpe 				ib_device_get_netdev(ib_dev, port);
235303db3a2dSMatan Barak 
235403db3a2dSMatan Barak 			if (filter(ib_dev, port, idev, filter_cookie))
235503db3a2dSMatan Barak 				cb(ib_dev, port, idev, cookie);
235603db3a2dSMatan Barak 			dev_put(idev);
235703db3a2dSMatan Barak 		}
235803db3a2dSMatan Barak }
235903db3a2dSMatan Barak 
236003db3a2dSMatan Barak /**
236103db3a2dSMatan Barak  * ib_enum_all_roce_netdevs - enumerate all RoCE devices
236203db3a2dSMatan Barak  * @filter: Should we call the callback?
236303db3a2dSMatan Barak  * @filter_cookie: Cookie passed to filter
236403db3a2dSMatan Barak  * @cb: Callback to call for each found RoCE ports
236503db3a2dSMatan Barak  * @cookie: Cookie passed back to the callback
236603db3a2dSMatan Barak  *
236703db3a2dSMatan Barak  * Enumerates all RoCE devices' physical ports which are related
236803db3a2dSMatan Barak  * to netdevices and calls callback() on each device for which
236903db3a2dSMatan Barak  * filter() function returns non zero.
237003db3a2dSMatan Barak  */
ib_enum_all_roce_netdevs(roce_netdev_filter filter,void * filter_cookie,roce_netdev_callback cb,void * cookie)237103db3a2dSMatan Barak void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
237203db3a2dSMatan Barak 			      void *filter_cookie,
237303db3a2dSMatan Barak 			      roce_netdev_callback cb,
237403db3a2dSMatan Barak 			      void *cookie)
237503db3a2dSMatan Barak {
237603db3a2dSMatan Barak 	struct ib_device *dev;
23770df91bb6SJason Gunthorpe 	unsigned long index;
237803db3a2dSMatan Barak 
2379921eab11SJason Gunthorpe 	down_read(&devices_rwsem);
23800df91bb6SJason Gunthorpe 	xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED)
238103db3a2dSMatan Barak 		ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie);
2382921eab11SJason Gunthorpe 	up_read(&devices_rwsem);
238303db3a2dSMatan Barak }
238403db3a2dSMatan Barak 
23854c3b53e1SLee Jones /*
23868030c835SLeon Romanovsky  * ib_enum_all_devs - enumerate all ib_devices
23878030c835SLeon Romanovsky  * @cb: Callback to call for each found ib_device
23888030c835SLeon Romanovsky  *
23898030c835SLeon Romanovsky  * Enumerates all ib_devices and calls callback() on each device.
23908030c835SLeon Romanovsky  */
ib_enum_all_devs(nldev_callback nldev_cb,struct sk_buff * skb,struct netlink_callback * cb)23918030c835SLeon Romanovsky int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
23928030c835SLeon Romanovsky 		     struct netlink_callback *cb)
23938030c835SLeon Romanovsky {
23940df91bb6SJason Gunthorpe 	unsigned long index;
23958030c835SLeon Romanovsky 	struct ib_device *dev;
23968030c835SLeon Romanovsky 	unsigned int idx = 0;
23978030c835SLeon Romanovsky 	int ret = 0;
23988030c835SLeon Romanovsky 
2399921eab11SJason Gunthorpe 	down_read(&devices_rwsem);
24000df91bb6SJason Gunthorpe 	xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
240137eeab55SParav Pandit 		if (!rdma_dev_access_netns(dev, sock_net(skb->sk)))
240237eeab55SParav Pandit 			continue;
240337eeab55SParav Pandit 
24048030c835SLeon Romanovsky 		ret = nldev_cb(dev, skb, cb, idx);
24058030c835SLeon Romanovsky 		if (ret)
24068030c835SLeon Romanovsky 			break;
24078030c835SLeon Romanovsky 		idx++;
24088030c835SLeon Romanovsky 	}
2409921eab11SJason Gunthorpe 	up_read(&devices_rwsem);
24108030c835SLeon Romanovsky 	return ret;
24118030c835SLeon Romanovsky }
24128030c835SLeon Romanovsky 
24138030c835SLeon Romanovsky /**
24141da177e4SLinus Torvalds  * ib_query_pkey - Get P_Key table entry
24151da177e4SLinus Torvalds  * @device:Device to query
24161da177e4SLinus Torvalds  * @port_num:Port number to query
24171da177e4SLinus Torvalds  * @index:P_Key table index to query
24181da177e4SLinus Torvalds  * @pkey:Returned P_Key
24191da177e4SLinus Torvalds  *
24201da177e4SLinus Torvalds  * ib_query_pkey() fetches the specified P_Key table entry.
24211da177e4SLinus Torvalds  */
ib_query_pkey(struct ib_device * device,u32 port_num,u16 index,u16 * pkey)24221da177e4SLinus Torvalds int ib_query_pkey(struct ib_device *device,
24231fb7f897SMark Bloch 		  u32 port_num, u16 index, u16 *pkey)
24241da177e4SLinus Torvalds {
24259af3f5cfSYuval Shaia 	if (!rdma_is_port_valid(device, port_num))
24269af3f5cfSYuval Shaia 		return -EINVAL;
24279af3f5cfSYuval Shaia 
2428ab75a6cbSKamal Heib 	if (!device->ops.query_pkey)
2429ab75a6cbSKamal Heib 		return -EOPNOTSUPP;
2430ab75a6cbSKamal Heib 
24313023a1e9SKamal Heib 	return device->ops.query_pkey(device, port_num, index, pkey);
24321da177e4SLinus Torvalds }
24331da177e4SLinus Torvalds EXPORT_SYMBOL(ib_query_pkey);
24341da177e4SLinus Torvalds 
24351da177e4SLinus Torvalds /**
24361da177e4SLinus Torvalds  * ib_modify_device - Change IB device attributes
24371da177e4SLinus Torvalds  * @device:Device to modify
24381da177e4SLinus Torvalds  * @device_modify_mask:Mask of attributes to change
24391da177e4SLinus Torvalds  * @device_modify:New attribute values
24401da177e4SLinus Torvalds  *
24411da177e4SLinus Torvalds  * ib_modify_device() changes a device's attributes as specified by
24421da177e4SLinus Torvalds  * the @device_modify_mask and @device_modify structure.
24431da177e4SLinus Torvalds  */
ib_modify_device(struct ib_device * device,int device_modify_mask,struct ib_device_modify * device_modify)24441da177e4SLinus Torvalds int ib_modify_device(struct ib_device *device,
24451da177e4SLinus Torvalds 		     int device_modify_mask,
24461da177e4SLinus Torvalds 		     struct ib_device_modify *device_modify)
24471da177e4SLinus Torvalds {
24483023a1e9SKamal Heib 	if (!device->ops.modify_device)
2449d0f3ef36SKamal Heib 		return -EOPNOTSUPP;
245010e1b54bSBart Van Assche 
24513023a1e9SKamal Heib 	return device->ops.modify_device(device, device_modify_mask,
24521da177e4SLinus Torvalds 					 device_modify);
24531da177e4SLinus Torvalds }
24541da177e4SLinus Torvalds EXPORT_SYMBOL(ib_modify_device);
24551da177e4SLinus Torvalds 
24561da177e4SLinus Torvalds /**
24571da177e4SLinus Torvalds  * ib_modify_port - Modifies the attributes for the specified port.
24581da177e4SLinus Torvalds  * @device: The device to modify.
24591da177e4SLinus Torvalds  * @port_num: The number of the port to modify.
24601da177e4SLinus Torvalds  * @port_modify_mask: Mask used to specify which attributes of the port
24611da177e4SLinus Torvalds  *   to change.
24621da177e4SLinus Torvalds  * @port_modify: New attribute values for the port.
24631da177e4SLinus Torvalds  *
24641da177e4SLinus Torvalds  * ib_modify_port() changes a port's attributes as specified by the
24651da177e4SLinus Torvalds  * @port_modify_mask and @port_modify structure.
24661da177e4SLinus Torvalds  */
ib_modify_port(struct ib_device * device,u32 port_num,int port_modify_mask,struct ib_port_modify * port_modify)24671da177e4SLinus Torvalds int ib_modify_port(struct ib_device *device,
24681fb7f897SMark Bloch 		   u32 port_num, int port_modify_mask,
24691da177e4SLinus Torvalds 		   struct ib_port_modify *port_modify)
24701da177e4SLinus Torvalds {
247161e0962dSSelvin Xavier 	int rc;
247210e1b54bSBart Van Assche 
247324dc831bSYuval Shaia 	if (!rdma_is_port_valid(device, port_num))
2474116c0074SRoland Dreier 		return -EINVAL;
2475116c0074SRoland Dreier 
24763023a1e9SKamal Heib 	if (device->ops.modify_port)
24773023a1e9SKamal Heib 		rc = device->ops.modify_port(device, port_num,
24783023a1e9SKamal Heib 					     port_modify_mask,
24791da177e4SLinus Torvalds 					     port_modify);
248055bfe905SKamal Heib 	else if (rdma_protocol_roce(device, port_num) &&
248155bfe905SKamal Heib 		 ((port_modify->set_port_cap_mask & ~IB_PORT_CM_SUP) == 0 ||
248255bfe905SKamal Heib 		  (port_modify->clr_port_cap_mask & ~IB_PORT_CM_SUP) == 0))
248355bfe905SKamal Heib 		rc = 0;
248461e0962dSSelvin Xavier 	else
248555bfe905SKamal Heib 		rc = -EOPNOTSUPP;
248661e0962dSSelvin Xavier 	return rc;
24871da177e4SLinus Torvalds }
24881da177e4SLinus Torvalds EXPORT_SYMBOL(ib_modify_port);
24891da177e4SLinus Torvalds 
24905eb620c8SYosef Etigin /**
24915eb620c8SYosef Etigin  * ib_find_gid - Returns the port number and GID table index where
2492dbb12562SParav Pandit  *   a specified GID value occurs. Its searches only for IB link layer.
24935eb620c8SYosef Etigin  * @device: The device to query.
24945eb620c8SYosef Etigin  * @gid: The GID value to search for.
24955eb620c8SYosef Etigin  * @port_num: The port number of the device where the GID value was found.
24965eb620c8SYosef Etigin  * @index: The index into the GID table where the GID was found.  This
24975eb620c8SYosef Etigin  *   parameter may be NULL.
24985eb620c8SYosef Etigin  */
ib_find_gid(struct ib_device * device,union ib_gid * gid,u32 * port_num,u16 * index)24995eb620c8SYosef Etigin int ib_find_gid(struct ib_device *device, union ib_gid *gid,
25001fb7f897SMark Bloch 		u32 *port_num, u16 *index)
25015eb620c8SYosef Etigin {
25025eb620c8SYosef Etigin 	union ib_gid tmp_gid;
25031fb7f897SMark Bloch 	u32 port;
2504ea1075edSJason Gunthorpe 	int ret, i;
25055eb620c8SYosef Etigin 
2506ea1075edSJason Gunthorpe 	rdma_for_each_port (device, port) {
250722d24f75SParav Pandit 		if (!rdma_protocol_ib(device, port))
2508b39ffa1dSMatan Barak 			continue;
2509b39ffa1dSMatan Barak 
25108ceb1357SJason Gunthorpe 		for (i = 0; i < device->port_data[port].immutable.gid_tbl_len;
25118ceb1357SJason Gunthorpe 		     ++i) {
25121dfce294SParav Pandit 			ret = rdma_query_gid(device, port, i, &tmp_gid);
25135eb620c8SYosef Etigin 			if (ret)
2514483d8051SAvihai Horon 				continue;
2515483d8051SAvihai Horon 
25165eb620c8SYosef Etigin 			if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
25175eb620c8SYosef Etigin 				*port_num = port;
25185eb620c8SYosef Etigin 				if (index)
25195eb620c8SYosef Etigin 					*index = i;
25205eb620c8SYosef Etigin 				return 0;
25215eb620c8SYosef Etigin 			}
25225eb620c8SYosef Etigin 		}
25235eb620c8SYosef Etigin 	}
25245eb620c8SYosef Etigin 
25255eb620c8SYosef Etigin 	return -ENOENT;
25265eb620c8SYosef Etigin }
25275eb620c8SYosef Etigin EXPORT_SYMBOL(ib_find_gid);
25285eb620c8SYosef Etigin 
25295eb620c8SYosef Etigin /**
25305eb620c8SYosef Etigin  * ib_find_pkey - Returns the PKey table index where a specified
25315eb620c8SYosef Etigin  *   PKey value occurs.
25325eb620c8SYosef Etigin  * @device: The device to query.
25335eb620c8SYosef Etigin  * @port_num: The port number of the device to search for the PKey.
25345eb620c8SYosef Etigin  * @pkey: The PKey value to search for.
25355eb620c8SYosef Etigin  * @index: The index into the PKey table where the PKey was found.
25365eb620c8SYosef Etigin  */
ib_find_pkey(struct ib_device * device,u32 port_num,u16 pkey,u16 * index)25375eb620c8SYosef Etigin int ib_find_pkey(struct ib_device *device,
25381fb7f897SMark Bloch 		 u32 port_num, u16 pkey, u16 *index)
25395eb620c8SYosef Etigin {
25405eb620c8SYosef Etigin 	int ret, i;
25415eb620c8SYosef Etigin 	u16 tmp_pkey;
2542ff7166c4SJack Morgenstein 	int partial_ix = -1;
25435eb620c8SYosef Etigin 
25448ceb1357SJason Gunthorpe 	for (i = 0; i < device->port_data[port_num].immutable.pkey_tbl_len;
25458ceb1357SJason Gunthorpe 	     ++i) {
25465eb620c8SYosef Etigin 		ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
25475eb620c8SYosef Etigin 		if (ret)
25485eb620c8SYosef Etigin 			return ret;
254936026eccSMoni Shoua 		if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) {
2550ff7166c4SJack Morgenstein 			/* if there is full-member pkey take it.*/
2551ff7166c4SJack Morgenstein 			if (tmp_pkey & 0x8000) {
25525eb620c8SYosef Etigin 				*index = i;
25535eb620c8SYosef Etigin 				return 0;
25545eb620c8SYosef Etigin 			}
2555ff7166c4SJack Morgenstein 			if (partial_ix < 0)
2556ff7166c4SJack Morgenstein 				partial_ix = i;
2557ff7166c4SJack Morgenstein 		}
25585eb620c8SYosef Etigin 	}
25595eb620c8SYosef Etigin 
2560ff7166c4SJack Morgenstein 	/*no full-member, if exists take the limited*/
2561ff7166c4SJack Morgenstein 	if (partial_ix >= 0) {
2562ff7166c4SJack Morgenstein 		*index = partial_ix;
2563ff7166c4SJack Morgenstein 		return 0;
2564ff7166c4SJack Morgenstein 	}
25655eb620c8SYosef Etigin 	return -ENOENT;
25665eb620c8SYosef Etigin }
25675eb620c8SYosef Etigin EXPORT_SYMBOL(ib_find_pkey);
25685eb620c8SYosef Etigin 
25699268f72dSYotam Kenneth /**
25709268f72dSYotam Kenneth  * ib_get_net_dev_by_params() - Return the appropriate net_dev
25719268f72dSYotam Kenneth  * for a received CM request
25729268f72dSYotam Kenneth  * @dev:	An RDMA device on which the request has been received.
25739268f72dSYotam Kenneth  * @port:	Port number on the RDMA device.
25749268f72dSYotam Kenneth  * @pkey:	The Pkey the request came on.
25759268f72dSYotam Kenneth  * @gid:	A GID that the net_dev uses to communicate.
25769268f72dSYotam Kenneth  * @addr:	Contains the IP address that the request specified as its
25779268f72dSYotam Kenneth  *		destination.
2578921eab11SJason Gunthorpe  *
25799268f72dSYotam Kenneth  */
ib_get_net_dev_by_params(struct ib_device * dev,u32 port,u16 pkey,const union ib_gid * gid,const struct sockaddr * addr)25809268f72dSYotam Kenneth struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
25811fb7f897SMark Bloch 					    u32 port,
25829268f72dSYotam Kenneth 					    u16 pkey,
25839268f72dSYotam Kenneth 					    const union ib_gid *gid,
25849268f72dSYotam Kenneth 					    const struct sockaddr *addr)
25859268f72dSYotam Kenneth {
25869268f72dSYotam Kenneth 	struct net_device *net_dev = NULL;
25870df91bb6SJason Gunthorpe 	unsigned long index;
25880df91bb6SJason Gunthorpe 	void *client_data;
25899268f72dSYotam Kenneth 
25909268f72dSYotam Kenneth 	if (!rdma_protocol_ib(dev, port))
25919268f72dSYotam Kenneth 		return NULL;
25929268f72dSYotam Kenneth 
2593921eab11SJason Gunthorpe 	/*
2594921eab11SJason Gunthorpe 	 * Holding the read side guarantees that the client will not become
2595921eab11SJason Gunthorpe 	 * unregistered while we are calling get_net_dev_by_params()
2596921eab11SJason Gunthorpe 	 */
2597921eab11SJason Gunthorpe 	down_read(&dev->client_data_rwsem);
25980df91bb6SJason Gunthorpe 	xan_for_each_marked (&dev->client_data, index, client_data,
25990df91bb6SJason Gunthorpe 			     CLIENT_DATA_REGISTERED) {
26000df91bb6SJason Gunthorpe 		struct ib_client *client = xa_load(&clients, index);
26019268f72dSYotam Kenneth 
26020df91bb6SJason Gunthorpe 		if (!client || !client->get_net_dev_by_params)
26039268f72dSYotam Kenneth 			continue;
26049268f72dSYotam Kenneth 
26050df91bb6SJason Gunthorpe 		net_dev = client->get_net_dev_by_params(dev, port, pkey, gid,
26060df91bb6SJason Gunthorpe 							addr, client_data);
26079268f72dSYotam Kenneth 		if (net_dev)
26089268f72dSYotam Kenneth 			break;
26099268f72dSYotam Kenneth 	}
2610921eab11SJason Gunthorpe 	up_read(&dev->client_data_rwsem);
26119268f72dSYotam Kenneth 
26129268f72dSYotam Kenneth 	return net_dev;
26139268f72dSYotam Kenneth }
26149268f72dSYotam Kenneth EXPORT_SYMBOL(ib_get_net_dev_by_params);
26159268f72dSYotam Kenneth 
ib_set_device_ops(struct ib_device * dev,const struct ib_device_ops * ops)2616521ed0d9SKamal Heib void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
2617521ed0d9SKamal Heib {
26183023a1e9SKamal Heib 	struct ib_device_ops *dev_ops = &dev->ops;
2619521ed0d9SKamal Heib #define SET_DEVICE_OP(ptr, name)                                               \
2620521ed0d9SKamal Heib 	do {                                                                   \
2621521ed0d9SKamal Heib 		if (ops->name)                                                 \
2622521ed0d9SKamal Heib 			if (!((ptr)->name))				       \
2623521ed0d9SKamal Heib 				(ptr)->name = ops->name;                       \
2624521ed0d9SKamal Heib 	} while (0)
2625521ed0d9SKamal Heib 
262630471d4bSLeon Romanovsky #define SET_OBJ_SIZE(ptr, name) SET_DEVICE_OP(ptr, size_##name)
262730471d4bSLeon Romanovsky 
2628b9560a41SJason Gunthorpe 	if (ops->driver_id != RDMA_DRIVER_UNKNOWN) {
2629b9560a41SJason Gunthorpe 		WARN_ON(dev_ops->driver_id != RDMA_DRIVER_UNKNOWN &&
2630b9560a41SJason Gunthorpe 			dev_ops->driver_id != ops->driver_id);
2631b9560a41SJason Gunthorpe 		dev_ops->driver_id = ops->driver_id;
2632b9560a41SJason Gunthorpe 	}
26337a154142SJason Gunthorpe 	if (ops->owner) {
26347a154142SJason Gunthorpe 		WARN_ON(dev_ops->owner && dev_ops->owner != ops->owner);
26357a154142SJason Gunthorpe 		dev_ops->owner = ops->owner;
26367a154142SJason Gunthorpe 	}
263772c6ec18SJason Gunthorpe 	if (ops->uverbs_abi_ver)
263872c6ec18SJason Gunthorpe 		dev_ops->uverbs_abi_ver = ops->uverbs_abi_ver;
2639b9560a41SJason Gunthorpe 
26408f71bb00SJason Gunthorpe 	dev_ops->uverbs_no_driver_id_binding |=
26418f71bb00SJason Gunthorpe 		ops->uverbs_no_driver_id_binding;
26428f71bb00SJason Gunthorpe 
26433023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, add_gid);
2644bca51197SMark Zhang 	SET_DEVICE_OP(dev_ops, add_sub_dev);
26452f1927b0SMoni Shoua 	SET_DEVICE_OP(dev_ops, advise_mr);
26463023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, alloc_dm);
26474b5f4d3fSJason Gunthorpe 	SET_DEVICE_OP(dev_ops, alloc_hw_device_stats);
26484b5f4d3fSJason Gunthorpe 	SET_DEVICE_OP(dev_ops, alloc_hw_port_stats);
26493023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, alloc_mr);
265026bc7eaeSIsrael Rukshin 	SET_DEVICE_OP(dev_ops, alloc_mr_integrity);
26513023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, alloc_mw);
26523023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, alloc_pd);
26533023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, alloc_rdma_netdev);
26543023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, alloc_ucontext);
26553023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, alloc_xrcd);
26563023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, attach_mcast);
26573023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, check_mr_status);
2658c4ffee7cSMark Zhang 	SET_DEVICE_OP(dev_ops, counter_alloc_stats);
265999fa331dSMark Zhang 	SET_DEVICE_OP(dev_ops, counter_bind_qp);
266099fa331dSMark Zhang 	SET_DEVICE_OP(dev_ops, counter_dealloc);
266199fa331dSMark Zhang 	SET_DEVICE_OP(dev_ops, counter_unbind_qp);
2662c4ffee7cSMark Zhang 	SET_DEVICE_OP(dev_ops, counter_update_stats);
26633023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, create_ah);
26643023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, create_counters);
26653023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, create_cq);
26663023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, create_flow);
26673023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, create_qp);
26683023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, create_rwq_ind_table);
26693023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, create_srq);
2670676a80adSJason Gunthorpe 	SET_DEVICE_OP(dev_ops, create_user_ah);
26713023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, create_wq);
26723023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, dealloc_dm);
2673d0899892SJason Gunthorpe 	SET_DEVICE_OP(dev_ops, dealloc_driver);
26743023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, dealloc_mw);
26753023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, dealloc_pd);
26763023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, dealloc_ucontext);
26773023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, dealloc_xrcd);
26783023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, del_gid);
2679bca51197SMark Zhang 	SET_DEVICE_OP(dev_ops, del_sub_dev);
26803023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, dereg_mr);
26813023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, destroy_ah);
26823023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, destroy_counters);
26833023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, destroy_cq);
26843023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, destroy_flow);
26853023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, destroy_flow_action);
26863023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, destroy_qp);
26873023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, destroy_rwq_ind_table);
26883023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, destroy_srq);
26893023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, destroy_wq);
2690915e4af5SJason Gunthorpe 	SET_DEVICE_OP(dev_ops, device_group);
26913023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, detach_mcast);
26923023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, disassociate_ucontext);
26933023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, drain_rq);
26943023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, drain_sq);
2695ca22354bSJason Gunthorpe 	SET_DEVICE_OP(dev_ops, enable_driver);
2696211cd945SMaor Gottlieb 	SET_DEVICE_OP(dev_ops, fill_res_cm_id_entry);
26979e2a187aSMaor Gottlieb 	SET_DEVICE_OP(dev_ops, fill_res_cq_entry);
269865959522SMaor Gottlieb 	SET_DEVICE_OP(dev_ops, fill_res_cq_entry_raw);
2699f4434529SMaor Gottlieb 	SET_DEVICE_OP(dev_ops, fill_res_mr_entry);
270065959522SMaor Gottlieb 	SET_DEVICE_OP(dev_ops, fill_res_mr_entry_raw);
27015cc34116SMaor Gottlieb 	SET_DEVICE_OP(dev_ops, fill_res_qp_entry);
270265959522SMaor Gottlieb 	SET_DEVICE_OP(dev_ops, fill_res_qp_entry_raw);
27030e32d7d4Swenglianfa 	SET_DEVICE_OP(dev_ops, fill_res_srq_entry);
2704aebf8145Swenglianfa 	SET_DEVICE_OP(dev_ops, fill_res_srq_entry_raw);
2705f4434529SMaor Gottlieb 	SET_DEVICE_OP(dev_ops, fill_stat_mr_entry);
27063023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, get_dev_fw_str);
27073023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, get_dma_mr);
27083023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, get_hw_stats);
27093023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, get_link_layer);
27103023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, get_netdev);
2711514aee66SLeon Romanovsky 	SET_DEVICE_OP(dev_ops, get_numa_node);
27123023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, get_port_immutable);
27133023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, get_vector_affinity);
27143023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, get_vf_config);
2715bfcb3c5dSDanit Goldberg 	SET_DEVICE_OP(dev_ops, get_vf_guid);
27163023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, get_vf_stats);
2717dd05cb82SKamal Heib 	SET_DEVICE_OP(dev_ops, iw_accept);
2718dd05cb82SKamal Heib 	SET_DEVICE_OP(dev_ops, iw_add_ref);
2719dd05cb82SKamal Heib 	SET_DEVICE_OP(dev_ops, iw_connect);
2720dd05cb82SKamal Heib 	SET_DEVICE_OP(dev_ops, iw_create_listen);
2721dd05cb82SKamal Heib 	SET_DEVICE_OP(dev_ops, iw_destroy_listen);
2722dd05cb82SKamal Heib 	SET_DEVICE_OP(dev_ops, iw_get_qp);
2723dd05cb82SKamal Heib 	SET_DEVICE_OP(dev_ops, iw_reject);
2724dd05cb82SKamal Heib 	SET_DEVICE_OP(dev_ops, iw_rem_ref);
27253023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, map_mr_sg);
27262cdfcdd8SMax Gurtovoy 	SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
27273023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, mmap);
27283411f9f0SMichal Kalderon 	SET_DEVICE_OP(dev_ops, mmap_free);
27293023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, modify_ah);
27303023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, modify_cq);
27313023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, modify_device);
27325e2ddd1eSAharon Landau 	SET_DEVICE_OP(dev_ops, modify_hw_stat);
27333023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, modify_port);
27343023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, modify_qp);
27353023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, modify_srq);
27363023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, modify_wq);
27373023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, peek_cq);
27383023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, poll_cq);
2739d7407d16SJason Gunthorpe 	SET_DEVICE_OP(dev_ops, port_groups);
27403023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, post_recv);
27413023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, post_send);
27423023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, post_srq_recv);
27433023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, process_mad);
27443023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, query_ah);
27453023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, query_device);
27463023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, query_gid);
27473023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, query_pkey);
27483023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, query_port);
27493023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, query_qp);
27503023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, query_srq);
27511c8fb1eaSYishai Hadas 	SET_DEVICE_OP(dev_ops, query_ucontext);
27523023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, rdma_netdev_get_params);
27533023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, read_counters);
27543023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, reg_dm_mr);
27553023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, reg_user_mr);
27563bc489e8SJianxin Xiong 	SET_DEVICE_OP(dev_ops, reg_user_mr_dmabuf);
27573023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, req_notify_cq);
27583023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, rereg_user_mr);
27593023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, resize_cq);
27603023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, set_vf_guid);
27613023a1e9SKamal Heib 	SET_DEVICE_OP(dev_ops, set_vf_link_state);
276221a428a0SLeon Romanovsky 
2763d3456914SLeon Romanovsky 	SET_OBJ_SIZE(dev_ops, ib_ah);
27643b023e1bSLeon Romanovsky 	SET_OBJ_SIZE(dev_ops, ib_counters);
2765e39afe3dSLeon Romanovsky 	SET_OBJ_SIZE(dev_ops, ib_cq);
2766d18bb3e1SLeon Romanovsky 	SET_OBJ_SIZE(dev_ops, ib_mw);
276721a428a0SLeon Romanovsky 	SET_OBJ_SIZE(dev_ops, ib_pd);
2768514aee66SLeon Romanovsky 	SET_OBJ_SIZE(dev_ops, ib_qp);
2769c0a6b5ecSLeon Romanovsky 	SET_OBJ_SIZE(dev_ops, ib_rwq_ind_table);
277068e326deSLeon Romanovsky 	SET_OBJ_SIZE(dev_ops, ib_srq);
2771a2a074efSLeon Romanovsky 	SET_OBJ_SIZE(dev_ops, ib_ucontext);
277228ad5f65SLeon Romanovsky 	SET_OBJ_SIZE(dev_ops, ib_xrcd);
2773521ed0d9SKamal Heib }
2774521ed0d9SKamal Heib EXPORT_SYMBOL(ib_set_device_ops);
2775521ed0d9SKamal Heib 
ib_add_sub_device(struct ib_device * parent,enum rdma_nl_dev_type type,const char * name)2776bca51197SMark Zhang int ib_add_sub_device(struct ib_device *parent,
2777bca51197SMark Zhang 		      enum rdma_nl_dev_type type,
2778bca51197SMark Zhang 		      const char *name)
2779bca51197SMark Zhang {
2780bca51197SMark Zhang 	struct ib_device *sub;
2781bca51197SMark Zhang 	int ret = 0;
2782bca51197SMark Zhang 
2783bca51197SMark Zhang 	if (!parent->ops.add_sub_dev || !parent->ops.del_sub_dev)
2784bca51197SMark Zhang 		return -EOPNOTSUPP;
2785bca51197SMark Zhang 
2786bca51197SMark Zhang 	if (!ib_device_try_get(parent))
2787bca51197SMark Zhang 		return -EINVAL;
2788bca51197SMark Zhang 
2789bca51197SMark Zhang 	sub = parent->ops.add_sub_dev(parent, type, name);
2790bca51197SMark Zhang 	if (IS_ERR(sub)) {
2791bca51197SMark Zhang 		ib_device_put(parent);
2792bca51197SMark Zhang 		return PTR_ERR(sub);
2793bca51197SMark Zhang 	}
2794bca51197SMark Zhang 
2795bca51197SMark Zhang 	sub->type = type;
2796bca51197SMark Zhang 	sub->parent = parent;
2797bca51197SMark Zhang 
2798bca51197SMark Zhang 	mutex_lock(&parent->subdev_lock);
2799bca51197SMark Zhang 	list_add_tail(&parent->subdev_list_head, &sub->subdev_list);
2800bca51197SMark Zhang 	mutex_unlock(&parent->subdev_lock);
2801bca51197SMark Zhang 
2802bca51197SMark Zhang 	return ret;
2803bca51197SMark Zhang }
2804bca51197SMark Zhang EXPORT_SYMBOL(ib_add_sub_device);
2805bca51197SMark Zhang 
ib_del_sub_device_and_put(struct ib_device * sub)2806bca51197SMark Zhang int ib_del_sub_device_and_put(struct ib_device *sub)
2807bca51197SMark Zhang {
2808bca51197SMark Zhang 	struct ib_device *parent = sub->parent;
2809bca51197SMark Zhang 
2810bca51197SMark Zhang 	if (!parent)
2811bca51197SMark Zhang 		return -EOPNOTSUPP;
2812bca51197SMark Zhang 
2813bca51197SMark Zhang 	mutex_lock(&parent->subdev_lock);
2814bca51197SMark Zhang 	list_del(&sub->subdev_list);
2815bca51197SMark Zhang 	mutex_unlock(&parent->subdev_lock);
2816bca51197SMark Zhang 
2817bca51197SMark Zhang 	ib_device_put(sub);
2818bca51197SMark Zhang 	parent->ops.del_sub_dev(sub);
2819bca51197SMark Zhang 	ib_device_put(parent);
2820bca51197SMark Zhang 
2821bca51197SMark Zhang 	return 0;
2822bca51197SMark Zhang }
2823bca51197SMark Zhang EXPORT_SYMBOL(ib_del_sub_device_and_put);
2824bca51197SMark Zhang 
28255a7a9e03SChristoph Hellwig #ifdef CONFIG_INFINIBAND_VIRT_DMA
ib_dma_virt_map_sg(struct ib_device * dev,struct scatterlist * sg,int nents)28265a7a9e03SChristoph Hellwig int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents)
28275a7a9e03SChristoph Hellwig {
28285a7a9e03SChristoph Hellwig 	struct scatterlist *s;
28295a7a9e03SChristoph Hellwig 	int i;
28305a7a9e03SChristoph Hellwig 
28315a7a9e03SChristoph Hellwig 	for_each_sg(sg, s, nents, i) {
28325a7a9e03SChristoph Hellwig 		sg_dma_address(s) = (uintptr_t)sg_virt(s);
28335a7a9e03SChristoph Hellwig 		sg_dma_len(s) = s->length;
28345a7a9e03SChristoph Hellwig 	}
28355a7a9e03SChristoph Hellwig 	return nents;
28365a7a9e03SChristoph Hellwig }
28375a7a9e03SChristoph Hellwig EXPORT_SYMBOL(ib_dma_virt_map_sg);
28385a7a9e03SChristoph Hellwig #endif /* CONFIG_INFINIBAND_VIRT_DMA */
28395a7a9e03SChristoph Hellwig 
2840d0e312feSLeon Romanovsky static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
2841735c631aSMark Bloch 	[RDMA_NL_LS_OP_RESOLVE] = {
2842647c75acSLeon Romanovsky 		.doit = ib_nl_handle_resolve_resp,
2843e3a2b93dSLeon Romanovsky 		.flags = RDMA_NL_ADMIN_PERM,
2844e3a2b93dSLeon Romanovsky 	},
2845735c631aSMark Bloch 	[RDMA_NL_LS_OP_SET_TIMEOUT] = {
2846647c75acSLeon Romanovsky 		.doit = ib_nl_handle_set_timeout,
2847e3a2b93dSLeon Romanovsky 		.flags = RDMA_NL_ADMIN_PERM,
2848e3a2b93dSLeon Romanovsky 	},
2849ae43f828SMark Bloch 	[RDMA_NL_LS_OP_IP_RESOLVE] = {
2850647c75acSLeon Romanovsky 		.doit = ib_nl_handle_ip_res_resp,
2851e3a2b93dSLeon Romanovsky 		.flags = RDMA_NL_ADMIN_PERM,
2852e3a2b93dSLeon Romanovsky 	},
2853735c631aSMark Bloch };
2854735c631aSMark Bloch 
ib_core_init(void)28551da177e4SLinus Torvalds static int __init ib_core_init(void)
28561da177e4SLinus Torvalds {
2857ff815a89STetsuo Handa 	int ret = -ENOMEM;
28581da177e4SLinus Torvalds 
2859f0626710STejun Heo 	ib_wq = alloc_workqueue("infiniband", 0, 0);
2860f0626710STejun Heo 	if (!ib_wq)
2861f0626710STejun Heo 		return -ENOMEM;
2862f0626710STejun Heo 
2863ff815a89STetsuo Handa 	ib_unreg_wq = alloc_workqueue("ib-unreg-wq", WQ_UNBOUND,
2864ff815a89STetsuo Handa 				      WQ_UNBOUND_MAX_ACTIVE);
2865ff815a89STetsuo Handa 	if (!ib_unreg_wq)
2866ff815a89STetsuo Handa 		goto err;
2867ff815a89STetsuo Handa 
286814d3a3b2SChristoph Hellwig 	ib_comp_wq = alloc_workqueue("ib-comp-wq",
2869b7363e67SSagi Grimberg 			WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
2870ff815a89STetsuo Handa 	if (!ib_comp_wq)
2871ff815a89STetsuo Handa 		goto err_unbound;
287214d3a3b2SChristoph Hellwig 
2873f794809aSJack Morgenstein 	ib_comp_unbound_wq =
2874f794809aSJack Morgenstein 		alloc_workqueue("ib-comp-unb-wq",
2875f794809aSJack Morgenstein 				WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
2876f794809aSJack Morgenstein 				WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
2877ff815a89STetsuo Handa 	if (!ib_comp_unbound_wq)
2878f794809aSJack Morgenstein 		goto err_comp;
2879f794809aSJack Morgenstein 
288055aeed06SJason Gunthorpe 	ret = class_register(&ib_class);
2881fd75c789SNir Muchtar 	if (ret) {
2882aba25a3eSParav Pandit 		pr_warn("Couldn't create InfiniBand device class\n");
2883f794809aSJack Morgenstein 		goto err_comp_unbound;
2884fd75c789SNir Muchtar 	}
28851da177e4SLinus Torvalds 
2886549af008SParav Pandit 	rdma_nl_init();
2887549af008SParav Pandit 
2888e3f20f02SLeon Romanovsky 	ret = addr_init();
2889e3f20f02SLeon Romanovsky 	if (ret) {
28904469add9SColin Ian King 		pr_warn("Couldn't init IB address resolution\n");
2891e3f20f02SLeon Romanovsky 		goto err_ibnl;
2892e3f20f02SLeon Romanovsky 	}
2893e3f20f02SLeon Romanovsky 
28944c2cb422SMark Bloch 	ret = ib_mad_init();
28954c2cb422SMark Bloch 	if (ret) {
28964c2cb422SMark Bloch 		pr_warn("Couldn't init IB MAD\n");
28974c2cb422SMark Bloch 		goto err_addr;
28984c2cb422SMark Bloch 	}
28994c2cb422SMark Bloch 
2900c2e49c92SMark Bloch 	ret = ib_sa_init();
2901c2e49c92SMark Bloch 	if (ret) {
2902c2e49c92SMark Bloch 		pr_warn("Couldn't init SA\n");
2903c2e49c92SMark Bloch 		goto err_mad;
2904c2e49c92SMark Bloch 	}
2905c2e49c92SMark Bloch 
290642df744cSJanne Karhunen 	ret = register_blocking_lsm_notifier(&ibdev_lsm_nb);
29078f408ab6SDaniel Jurgens 	if (ret) {
29088f408ab6SDaniel Jurgens 		pr_warn("Couldn't register LSM notifier. ret %d\n", ret);
2909c9901724SLeon Romanovsky 		goto err_sa;
29108f408ab6SDaniel Jurgens 	}
29118f408ab6SDaniel Jurgens 
29124e0f7b90SParav Pandit 	ret = register_pernet_device(&rdma_dev_net_ops);
29134e0f7b90SParav Pandit 	if (ret) {
29144e0f7b90SParav Pandit 		pr_warn("Couldn't init compat dev. ret %d\n", ret);
29154e0f7b90SParav Pandit 		goto err_compat;
29164e0f7b90SParav Pandit 	}
29174e0f7b90SParav Pandit 
29186c80b41aSLeon Romanovsky 	nldev_init();
2919c9901724SLeon Romanovsky 	rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table);
292007c0d131SChen Zhongjin 	ret = roce_gid_mgmt_init();
292107c0d131SChen Zhongjin 	if (ret) {
292207c0d131SChen Zhongjin 		pr_warn("Couldn't init RoCE GID management\n");
292307c0d131SChen Zhongjin 		goto err_parent;
292407c0d131SChen Zhongjin 	}
2925b2cbae2cSRoland Dreier 
2926fd75c789SNir Muchtar 	return 0;
2927fd75c789SNir Muchtar 
292807c0d131SChen Zhongjin err_parent:
292907c0d131SChen Zhongjin 	rdma_nl_unregister(RDMA_NL_LS);
293007c0d131SChen Zhongjin 	nldev_exit();
293107c0d131SChen Zhongjin 	unregister_pernet_device(&rdma_dev_net_ops);
29324e0f7b90SParav Pandit err_compat:
293342df744cSJanne Karhunen 	unregister_blocking_lsm_notifier(&ibdev_lsm_nb);
2934735c631aSMark Bloch err_sa:
2935735c631aSMark Bloch 	ib_sa_cleanup();
2936c2e49c92SMark Bloch err_mad:
2937c2e49c92SMark Bloch 	ib_mad_cleanup();
29384c2cb422SMark Bloch err_addr:
29394c2cb422SMark Bloch 	addr_cleanup();
2940e3f20f02SLeon Romanovsky err_ibnl:
294155aeed06SJason Gunthorpe 	class_unregister(&ib_class);
2942f794809aSJack Morgenstein err_comp_unbound:
2943f794809aSJack Morgenstein 	destroy_workqueue(ib_comp_unbound_wq);
294414d3a3b2SChristoph Hellwig err_comp:
294514d3a3b2SChristoph Hellwig 	destroy_workqueue(ib_comp_wq);
2946ff815a89STetsuo Handa err_unbound:
2947ff815a89STetsuo Handa 	destroy_workqueue(ib_unreg_wq);
2948fd75c789SNir Muchtar err:
2949fd75c789SNir Muchtar 	destroy_workqueue(ib_wq);
29501da177e4SLinus Torvalds 	return ret;
29511da177e4SLinus Torvalds }
29521da177e4SLinus Torvalds 
ib_core_cleanup(void)29531da177e4SLinus Torvalds static void __exit ib_core_cleanup(void)
29541da177e4SLinus Torvalds {
29555ef8c0c1SJason Gunthorpe 	roce_gid_mgmt_cleanup();
2956c9901724SLeon Romanovsky 	rdma_nl_unregister(RDMA_NL_LS);
29574508d32cSLeon Romanovsky 	nldev_exit();
29584e0f7b90SParav Pandit 	unregister_pernet_device(&rdma_dev_net_ops);
295942df744cSJanne Karhunen 	unregister_blocking_lsm_notifier(&ibdev_lsm_nb);
2960c2e49c92SMark Bloch 	ib_sa_cleanup();
29614c2cb422SMark Bloch 	ib_mad_cleanup();
2962e3f20f02SLeon Romanovsky 	addr_cleanup();
2963c9901724SLeon Romanovsky 	rdma_nl_exit();
296455aeed06SJason Gunthorpe 	class_unregister(&ib_class);
2965f794809aSJack Morgenstein 	destroy_workqueue(ib_comp_unbound_wq);
296614d3a3b2SChristoph Hellwig 	destroy_workqueue(ib_comp_wq);
2967f7c6a7b5SRoland Dreier 	/* Make sure that any pending umem accounting work is done. */
2968f0626710STejun Heo 	destroy_workqueue(ib_wq);
2969ff815a89STetsuo Handa 	destroy_workqueue(ib_unreg_wq);
2970e59178d8SJason Gunthorpe 	WARN_ON(!xa_empty(&clients));
29710df91bb6SJason Gunthorpe 	WARN_ON(!xa_empty(&devices));
29721da177e4SLinus Torvalds }
29731da177e4SLinus Torvalds 
2974e3bf14bdSJason Gunthorpe MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4);
2975e3bf14bdSJason Gunthorpe 
297662dfa795SParav Pandit /* ib core relies on netdev stack to first register net_ns_type_operations
297762dfa795SParav Pandit  * ns kobject type before ib_core initialization.
297862dfa795SParav Pandit  */
297962dfa795SParav Pandit fs_initcall(ib_core_init);
29801da177e4SLinus Torvalds module_exit(ib_core_cleanup);
2981