xref: /linux/drivers/infiniband/core/nldev.c (revision f474808acb3c4b30552d9c59b181244e0300d218)
1 /*
2  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. Neither the names of the copyright holders nor the names of its
13  *    contributors may be used to endorse or promote products derived from
14  *    this software without specific prior written permission.
15  *
16  * Alternatively, this software may be distributed under the terms of the
17  * GNU General Public License ("GPL") version 2 as published by the Free
18  * Software Foundation.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <linux/module.h>
34 #include <linux/pid.h>
35 #include <linux/pid_namespace.h>
36 #include <linux/mutex.h>
37 #include <net/netlink.h>
38 #include <rdma/rdma_cm.h>
39 #include <rdma/rdma_netlink.h>
40 
41 #include "core_priv.h"
42 #include "cma_priv.h"
43 #include "restrack.h"
44 
45 /*
46  * Sort array elements by the netlink attribute name
47  */
48 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
49 	[RDMA_NLDEV_ATTR_CHARDEV]		= { .type = NLA_U64 },
50 	[RDMA_NLDEV_ATTR_CHARDEV_ABI]		= { .type = NLA_U64 },
51 	[RDMA_NLDEV_ATTR_CHARDEV_NAME]		= { .type = NLA_NUL_STRING,
52 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
53 	[RDMA_NLDEV_ATTR_CHARDEV_TYPE]		= { .type = NLA_NUL_STRING,
54 					.len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
55 	[RDMA_NLDEV_ATTR_DEV_DIM]               = { .type = NLA_U8 },
56 	[RDMA_NLDEV_ATTR_DEV_INDEX]		= { .type = NLA_U32 },
57 	[RDMA_NLDEV_ATTR_DEV_NAME]		= { .type = NLA_NUL_STRING,
58 					.len = IB_DEVICE_NAME_MAX },
59 	[RDMA_NLDEV_ATTR_DEV_NODE_TYPE]		= { .type = NLA_U8 },
60 	[RDMA_NLDEV_ATTR_DEV_PROTOCOL]		= { .type = NLA_NUL_STRING,
61 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
62 	[RDMA_NLDEV_ATTR_DRIVER]		= { .type = NLA_NESTED },
63 	[RDMA_NLDEV_ATTR_DRIVER_ENTRY]		= { .type = NLA_NESTED },
64 	[RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE]	= { .type = NLA_U8 },
65 	[RDMA_NLDEV_ATTR_DRIVER_STRING]		= { .type = NLA_NUL_STRING,
66 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
67 	[RDMA_NLDEV_ATTR_DRIVER_S32]		= { .type = NLA_S32 },
68 	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
69 	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
70 	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
71 	[RDMA_NLDEV_ATTR_FW_VERSION]		= { .type = NLA_NUL_STRING,
72 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
73 	[RDMA_NLDEV_ATTR_LID]			= { .type = NLA_U32 },
74 	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type = NLA_NUL_STRING,
75 					.len = IFNAMSIZ },
76 	[RDMA_NLDEV_ATTR_LMC]			= { .type = NLA_U8 },
77 	[RDMA_NLDEV_ATTR_NDEV_INDEX]		= { .type = NLA_U32 },
78 	[RDMA_NLDEV_ATTR_NDEV_NAME]		= { .type = NLA_NUL_STRING,
79 					.len = IFNAMSIZ },
80 	[RDMA_NLDEV_ATTR_NODE_GUID]		= { .type = NLA_U64 },
81 	[RDMA_NLDEV_ATTR_PORT_INDEX]		= { .type = NLA_U32 },
82 	[RDMA_NLDEV_ATTR_PORT_PHYS_STATE]	= { .type = NLA_U8 },
83 	[RDMA_NLDEV_ATTR_PORT_STATE]		= { .type = NLA_U8 },
84 	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type = NLA_NESTED },
85 	[RDMA_NLDEV_ATTR_RES_CM_IDN]		= { .type = NLA_U32 },
86 	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type = NLA_NESTED },
87 	[RDMA_NLDEV_ATTR_RES_CQ]		= { .type = NLA_NESTED },
88 	[RDMA_NLDEV_ATTR_RES_CQE]		= { .type = NLA_U32 },
89 	[RDMA_NLDEV_ATTR_RES_CQN]		= { .type = NLA_U32 },
90 	[RDMA_NLDEV_ATTR_RES_CQ_ENTRY]		= { .type = NLA_NESTED },
91 	[RDMA_NLDEV_ATTR_RES_CTXN]		= { .type = NLA_U32 },
92 	[RDMA_NLDEV_ATTR_RES_DST_ADDR]		= {
93 			.len = sizeof(struct __kernel_sockaddr_storage) },
94 	[RDMA_NLDEV_ATTR_RES_IOVA]		= { .type = NLA_U64 },
95 	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING,
96 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
97 	[RDMA_NLDEV_ATTR_RES_LKEY]		= { .type = NLA_U32 },
98 	[RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]	= { .type = NLA_U32 },
99 	[RDMA_NLDEV_ATTR_RES_LQPN]		= { .type = NLA_U32 },
100 	[RDMA_NLDEV_ATTR_RES_MR]		= { .type = NLA_NESTED },
101 	[RDMA_NLDEV_ATTR_RES_MRLEN]		= { .type = NLA_U64 },
102 	[RDMA_NLDEV_ATTR_RES_MRN]		= { .type = NLA_U32 },
103 	[RDMA_NLDEV_ATTR_RES_MR_ENTRY]		= { .type = NLA_NESTED },
104 	[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE]	= { .type = NLA_U8 },
105 	[RDMA_NLDEV_ATTR_RES_PD]		= { .type = NLA_NESTED },
106 	[RDMA_NLDEV_ATTR_RES_PDN]		= { .type = NLA_U32 },
107 	[RDMA_NLDEV_ATTR_RES_PD_ENTRY]		= { .type = NLA_NESTED },
108 	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
109 	[RDMA_NLDEV_ATTR_RES_POLL_CTX]		= { .type = NLA_U8 },
110 	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
111 	[RDMA_NLDEV_ATTR_RES_QP]		= { .type = NLA_NESTED },
112 	[RDMA_NLDEV_ATTR_RES_QP_ENTRY]		= { .type = NLA_NESTED },
113 	[RDMA_NLDEV_ATTR_RES_RKEY]		= { .type = NLA_U32 },
114 	[RDMA_NLDEV_ATTR_RES_RQPN]		= { .type = NLA_U32 },
115 	[RDMA_NLDEV_ATTR_RES_RQ_PSN]		= { .type = NLA_U32 },
116 	[RDMA_NLDEV_ATTR_RES_SQ_PSN]		= { .type = NLA_U32 },
117 	[RDMA_NLDEV_ATTR_RES_SRC_ADDR]		= {
118 			.len = sizeof(struct __kernel_sockaddr_storage) },
119 	[RDMA_NLDEV_ATTR_RES_STATE]		= { .type = NLA_U8 },
120 	[RDMA_NLDEV_ATTR_RES_SUMMARY]		= { .type = NLA_NESTED },
121 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]	= { .type = NLA_NESTED },
122 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
123 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
124 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
125 	[RDMA_NLDEV_ATTR_RES_TYPE]		= { .type = NLA_U8 },
126 	[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
127 	[RDMA_NLDEV_ATTR_RES_USECNT]		= { .type = NLA_U64 },
128 	[RDMA_NLDEV_ATTR_SM_LID]		= { .type = NLA_U32 },
129 	[RDMA_NLDEV_ATTR_SUBNET_PREFIX]		= { .type = NLA_U64 },
130 	[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]	= { .type = NLA_U32 },
131 	[RDMA_NLDEV_ATTR_STAT_MODE]		= { .type = NLA_U32 },
132 	[RDMA_NLDEV_ATTR_STAT_RES]		= { .type = NLA_U32 },
133 	[RDMA_NLDEV_ATTR_STAT_COUNTER]		= { .type = NLA_NESTED },
134 	[RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY]	= { .type = NLA_NESTED },
135 	[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]       = { .type = NLA_U32 },
136 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]       = { .type = NLA_NESTED },
137 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY]  = { .type = NLA_NESTED },
138 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
139 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
140 	[RDMA_NLDEV_ATTR_SYS_IMAGE_GUID]	= { .type = NLA_U64 },
141 	[RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID]	= { .type = NLA_U32 },
142 	[RDMA_NLDEV_NET_NS_FD]			= { .type = NLA_U32 },
143 	[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]	= { .type = NLA_U8 },
144 };
145 
146 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
147 				      enum rdma_nldev_print_type print_type)
148 {
149 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
150 		return -EMSGSIZE;
151 	if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
152 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
153 		return -EMSGSIZE;
154 
155 	return 0;
156 }
157 
158 static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
159 				   enum rdma_nldev_print_type print_type,
160 				   u32 value)
161 {
162 	if (put_driver_name_print_type(msg, name, print_type))
163 		return -EMSGSIZE;
164 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
165 		return -EMSGSIZE;
166 
167 	return 0;
168 }
169 
170 static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
171 				   enum rdma_nldev_print_type print_type,
172 				   u64 value)
173 {
174 	if (put_driver_name_print_type(msg, name, print_type))
175 		return -EMSGSIZE;
176 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
177 			      RDMA_NLDEV_ATTR_PAD))
178 		return -EMSGSIZE;
179 
180 	return 0;
181 }
182 
183 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
184 {
185 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
186 				       value);
187 }
188 EXPORT_SYMBOL(rdma_nl_put_driver_u32);
189 
190 int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
191 			       u32 value)
192 {
193 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
194 				       value);
195 }
196 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
197 
198 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
199 {
200 	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
201 				       value);
202 }
203 EXPORT_SYMBOL(rdma_nl_put_driver_u64);
204 
205 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
206 {
207 	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
208 				       value);
209 }
210 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
211 
212 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
213 {
214 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
215 		return -EMSGSIZE;
216 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
217 			   dev_name(&device->dev)))
218 		return -EMSGSIZE;
219 
220 	return 0;
221 }
222 
223 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
224 {
225 	char fw[IB_FW_VERSION_NAME_MAX];
226 	int ret = 0;
227 	u8 port;
228 
229 	if (fill_nldev_handle(msg, device))
230 		return -EMSGSIZE;
231 
232 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
233 		return -EMSGSIZE;
234 
235 	BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
236 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
237 			      device->attrs.device_cap_flags,
238 			      RDMA_NLDEV_ATTR_PAD))
239 		return -EMSGSIZE;
240 
241 	ib_get_device_fw_str(device, fw);
242 	/* Device without FW has strlen(fw) = 0 */
243 	if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
244 		return -EMSGSIZE;
245 
246 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
247 			      be64_to_cpu(device->node_guid),
248 			      RDMA_NLDEV_ATTR_PAD))
249 		return -EMSGSIZE;
250 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
251 			      be64_to_cpu(device->attrs.sys_image_guid),
252 			      RDMA_NLDEV_ATTR_PAD))
253 		return -EMSGSIZE;
254 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
255 		return -EMSGSIZE;
256 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim))
257 		return -EMSGSIZE;
258 
259 	/*
260 	 * Link type is determined on first port and mlx4 device
261 	 * which can potentially have two different link type for the same
262 	 * IB device is considered as better to be avoided in the future,
263 	 */
264 	port = rdma_start_port(device);
265 	if (rdma_cap_opa_mad(device, port))
266 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
267 	else if (rdma_protocol_ib(device, port))
268 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
269 	else if (rdma_protocol_iwarp(device, port))
270 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
271 	else if (rdma_protocol_roce(device, port))
272 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
273 	else if (rdma_protocol_usnic(device, port))
274 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
275 				     "usnic");
276 	return ret;
277 }
278 
279 static int fill_port_info(struct sk_buff *msg,
280 			  struct ib_device *device, u32 port,
281 			  const struct net *net)
282 {
283 	struct net_device *netdev = NULL;
284 	struct ib_port_attr attr;
285 	int ret;
286 	u64 cap_flags = 0;
287 
288 	if (fill_nldev_handle(msg, device))
289 		return -EMSGSIZE;
290 
291 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
292 		return -EMSGSIZE;
293 
294 	ret = ib_query_port(device, port, &attr);
295 	if (ret)
296 		return ret;
297 
298 	if (rdma_protocol_ib(device, port)) {
299 		BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
300 				sizeof(attr.port_cap_flags2)) > sizeof(u64));
301 		cap_flags = attr.port_cap_flags |
302 			((u64)attr.port_cap_flags2 << 32);
303 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
304 				      cap_flags, RDMA_NLDEV_ATTR_PAD))
305 			return -EMSGSIZE;
306 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
307 				      attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
308 			return -EMSGSIZE;
309 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
310 			return -EMSGSIZE;
311 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
312 			return -EMSGSIZE;
313 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
314 			return -EMSGSIZE;
315 	}
316 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
317 		return -EMSGSIZE;
318 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
319 		return -EMSGSIZE;
320 
321 	netdev = ib_device_get_netdev(device, port);
322 	if (netdev && net_eq(dev_net(netdev), net)) {
323 		ret = nla_put_u32(msg,
324 				  RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
325 		if (ret)
326 			goto out;
327 		ret = nla_put_string(msg,
328 				     RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
329 	}
330 
331 out:
332 	if (netdev)
333 		dev_put(netdev);
334 	return ret;
335 }
336 
337 static int fill_res_info_entry(struct sk_buff *msg,
338 			       const char *name, u64 curr)
339 {
340 	struct nlattr *entry_attr;
341 
342 	entry_attr = nla_nest_start_noflag(msg,
343 					   RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
344 	if (!entry_attr)
345 		return -EMSGSIZE;
346 
347 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
348 		goto err;
349 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
350 			      RDMA_NLDEV_ATTR_PAD))
351 		goto err;
352 
353 	nla_nest_end(msg, entry_attr);
354 	return 0;
355 
356 err:
357 	nla_nest_cancel(msg, entry_attr);
358 	return -EMSGSIZE;
359 }
360 
361 static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
362 {
363 	static const char * const names[RDMA_RESTRACK_MAX] = {
364 		[RDMA_RESTRACK_PD] = "pd",
365 		[RDMA_RESTRACK_CQ] = "cq",
366 		[RDMA_RESTRACK_QP] = "qp",
367 		[RDMA_RESTRACK_CM_ID] = "cm_id",
368 		[RDMA_RESTRACK_MR] = "mr",
369 		[RDMA_RESTRACK_CTX] = "ctx",
370 	};
371 
372 	struct nlattr *table_attr;
373 	int ret, i, curr;
374 
375 	if (fill_nldev_handle(msg, device))
376 		return -EMSGSIZE;
377 
378 	table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
379 	if (!table_attr)
380 		return -EMSGSIZE;
381 
382 	for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
383 		if (!names[i])
384 			continue;
385 		curr = rdma_restrack_count(device, i,
386 					   task_active_pid_ns(current));
387 		ret = fill_res_info_entry(msg, names[i], curr);
388 		if (ret)
389 			goto err;
390 	}
391 
392 	nla_nest_end(msg, table_attr);
393 	return 0;
394 
395 err:
396 	nla_nest_cancel(msg, table_attr);
397 	return ret;
398 }
399 
400 static int fill_res_name_pid(struct sk_buff *msg,
401 			     struct rdma_restrack_entry *res)
402 {
403 	/*
404 	 * For user resources, user is should read /proc/PID/comm to get the
405 	 * name of the task file.
406 	 */
407 	if (rdma_is_kernel_res(res)) {
408 		if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
409 		    res->kern_name))
410 			return -EMSGSIZE;
411 	} else {
412 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
413 		    task_pid_vnr(res->task)))
414 			return -EMSGSIZE;
415 	}
416 	return 0;
417 }
418 
419 static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg,
420 			   struct rdma_restrack_entry *res)
421 {
422 	if (!dev->ops.fill_res_entry)
423 		return false;
424 	return dev->ops.fill_res_entry(msg, res);
425 }
426 
427 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
428 			     struct rdma_restrack_entry *res, uint32_t port)
429 {
430 	struct ib_qp *qp = container_of(res, struct ib_qp, res);
431 	struct ib_device *dev = qp->device;
432 	struct ib_qp_init_attr qp_init_attr;
433 	struct ib_qp_attr qp_attr;
434 	int ret;
435 
436 	ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
437 	if (ret)
438 		return ret;
439 
440 	if (port && port != qp_attr.port_num)
441 		return -EAGAIN;
442 
443 	/* In create_qp() port is not set yet */
444 	if (qp_attr.port_num &&
445 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
446 		goto err;
447 
448 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
449 		goto err;
450 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
451 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
452 				qp_attr.dest_qp_num))
453 			goto err;
454 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
455 				qp_attr.rq_psn))
456 			goto err;
457 	}
458 
459 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
460 		goto err;
461 
462 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
463 	    qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
464 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
465 			       qp_attr.path_mig_state))
466 			goto err;
467 	}
468 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
469 		goto err;
470 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
471 		goto err;
472 
473 	if (!rdma_is_kernel_res(res) &&
474 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
475 		goto err;
476 
477 	if (fill_res_name_pid(msg, res))
478 		goto err;
479 
480 	if (fill_res_entry(dev, msg, res))
481 		goto err;
482 
483 	return 0;
484 
485 err:	return -EMSGSIZE;
486 }
487 
488 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
489 				struct rdma_restrack_entry *res, uint32_t port)
490 {
491 	struct rdma_id_private *id_priv =
492 				container_of(res, struct rdma_id_private, res);
493 	struct ib_device *dev = id_priv->id.device;
494 	struct rdma_cm_id *cm_id = &id_priv->id;
495 
496 	if (port && port != cm_id->port_num)
497 		return 0;
498 
499 	if (cm_id->port_num &&
500 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
501 		goto err;
502 
503 	if (id_priv->qp_num) {
504 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
505 			goto err;
506 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
507 			goto err;
508 	}
509 
510 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
511 		goto err;
512 
513 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
514 		goto err;
515 
516 	if (cm_id->route.addr.src_addr.ss_family &&
517 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
518 		    sizeof(cm_id->route.addr.src_addr),
519 		    &cm_id->route.addr.src_addr))
520 		goto err;
521 	if (cm_id->route.addr.dst_addr.ss_family &&
522 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
523 		    sizeof(cm_id->route.addr.dst_addr),
524 		    &cm_id->route.addr.dst_addr))
525 		goto err;
526 
527 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
528 		goto err;
529 
530 	if (fill_res_name_pid(msg, res))
531 		goto err;
532 
533 	if (fill_res_entry(dev, msg, res))
534 		goto err;
535 
536 	return 0;
537 
538 err: return -EMSGSIZE;
539 }
540 
541 static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
542 			     struct rdma_restrack_entry *res, uint32_t port)
543 {
544 	struct ib_cq *cq = container_of(res, struct ib_cq, res);
545 	struct ib_device *dev = cq->device;
546 
547 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
548 		goto err;
549 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
550 			      atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
551 		goto err;
552 
553 	/* Poll context is only valid for kernel CQs */
554 	if (rdma_is_kernel_res(res) &&
555 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
556 		goto err;
557 
558 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
559 		goto err;
560 
561 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
562 		goto err;
563 	if (!rdma_is_kernel_res(res) &&
564 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
565 			cq->uobject->context->res.id))
566 		goto err;
567 
568 	if (fill_res_name_pid(msg, res))
569 		goto err;
570 
571 	if (fill_res_entry(dev, msg, res))
572 		goto err;
573 
574 	return 0;
575 
576 err:	return -EMSGSIZE;
577 }
578 
579 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
580 			     struct rdma_restrack_entry *res, uint32_t port)
581 {
582 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
583 	struct ib_device *dev = mr->pd->device;
584 
585 	if (has_cap_net_admin) {
586 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
587 			goto err;
588 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
589 			goto err;
590 	}
591 
592 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
593 			      RDMA_NLDEV_ATTR_PAD))
594 		goto err;
595 
596 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
597 		goto err;
598 
599 	if (!rdma_is_kernel_res(res) &&
600 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
601 		goto err;
602 
603 	if (fill_res_name_pid(msg, res))
604 		goto err;
605 
606 	if (fill_res_entry(dev, msg, res))
607 		goto err;
608 
609 	return 0;
610 
611 err:	return -EMSGSIZE;
612 }
613 
614 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
615 			     struct rdma_restrack_entry *res, uint32_t port)
616 {
617 	struct ib_pd *pd = container_of(res, struct ib_pd, res);
618 	struct ib_device *dev = pd->device;
619 
620 	if (has_cap_net_admin) {
621 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
622 				pd->local_dma_lkey))
623 			goto err;
624 		if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
625 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
626 				pd->unsafe_global_rkey))
627 			goto err;
628 	}
629 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
630 			      atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
631 		goto err;
632 
633 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
634 		goto err;
635 
636 	if (!rdma_is_kernel_res(res) &&
637 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
638 			pd->uobject->context->res.id))
639 		goto err;
640 
641 	if (fill_res_name_pid(msg, res))
642 		goto err;
643 
644 	if (fill_res_entry(dev, msg, res))
645 		goto err;
646 
647 	return 0;
648 
649 err:	return -EMSGSIZE;
650 }
651 
652 static int fill_stat_counter_mode(struct sk_buff *msg,
653 				  struct rdma_counter *counter)
654 {
655 	struct rdma_counter_mode *m = &counter->mode;
656 
657 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
658 		return -EMSGSIZE;
659 
660 	if (m->mode == RDMA_COUNTER_MODE_AUTO)
661 		if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
662 		    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
663 			return -EMSGSIZE;
664 
665 	return 0;
666 }
667 
668 static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
669 {
670 	struct nlattr *entry_attr;
671 
672 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
673 	if (!entry_attr)
674 		return -EMSGSIZE;
675 
676 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
677 		goto err;
678 
679 	nla_nest_end(msg, entry_attr);
680 	return 0;
681 
682 err:
683 	nla_nest_cancel(msg, entry_attr);
684 	return -EMSGSIZE;
685 }
686 
687 static int fill_stat_counter_qps(struct sk_buff *msg,
688 				 struct rdma_counter *counter)
689 {
690 	struct rdma_restrack_entry *res;
691 	struct rdma_restrack_root *rt;
692 	struct nlattr *table_attr;
693 	struct ib_qp *qp = NULL;
694 	unsigned long id = 0;
695 	int ret = 0;
696 
697 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
698 
699 	rt = &counter->device->res[RDMA_RESTRACK_QP];
700 	xa_lock(&rt->xa);
701 	xa_for_each(&rt->xa, id, res) {
702 		if (!rdma_is_visible_in_pid_ns(res))
703 			continue;
704 
705 		qp = container_of(res, struct ib_qp, res);
706 		if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
707 			continue;
708 
709 		if (!qp->counter || (qp->counter->id != counter->id))
710 			continue;
711 
712 		ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
713 		if (ret)
714 			goto err;
715 	}
716 
717 	xa_unlock(&rt->xa);
718 	nla_nest_end(msg, table_attr);
719 	return 0;
720 
721 err:
722 	xa_unlock(&rt->xa);
723 	nla_nest_cancel(msg, table_attr);
724 	return ret;
725 }
726 
727 static int fill_stat_hwcounter_entry(struct sk_buff *msg,
728 				     const char *name, u64 value)
729 {
730 	struct nlattr *entry_attr;
731 
732 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
733 	if (!entry_attr)
734 		return -EMSGSIZE;
735 
736 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
737 			   name))
738 		goto err;
739 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
740 			      value, RDMA_NLDEV_ATTR_PAD))
741 		goto err;
742 
743 	nla_nest_end(msg, entry_attr);
744 	return 0;
745 
746 err:
747 	nla_nest_cancel(msg, entry_attr);
748 	return -EMSGSIZE;
749 }
750 
751 static int fill_stat_counter_hwcounters(struct sk_buff *msg,
752 					struct rdma_counter *counter)
753 {
754 	struct rdma_hw_stats *st = counter->stats;
755 	struct nlattr *table_attr;
756 	int i;
757 
758 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
759 	if (!table_attr)
760 		return -EMSGSIZE;
761 
762 	for (i = 0; i < st->num_counters; i++)
763 		if (fill_stat_hwcounter_entry(msg, st->names[i], st->value[i]))
764 			goto err;
765 
766 	nla_nest_end(msg, table_attr);
767 	return 0;
768 
769 err:
770 	nla_nest_cancel(msg, table_attr);
771 	return -EMSGSIZE;
772 }
773 
774 static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
775 				  struct rdma_restrack_entry *res,
776 				  uint32_t port)
777 {
778 	struct rdma_counter *counter =
779 		container_of(res, struct rdma_counter, res);
780 
781 	if (port && port != counter->port)
782 		return 0;
783 
784 	/* Dump it even query failed */
785 	rdma_counter_query_stats(counter);
786 
787 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
788 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
789 	    fill_res_name_pid(msg, &counter->res) ||
790 	    fill_stat_counter_mode(msg, counter) ||
791 	    fill_stat_counter_qps(msg, counter) ||
792 	    fill_stat_counter_hwcounters(msg, counter))
793 		return -EMSGSIZE;
794 
795 	return 0;
796 }
797 
798 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
799 			  struct netlink_ext_ack *extack)
800 {
801 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
802 	struct ib_device *device;
803 	struct sk_buff *msg;
804 	u32 index;
805 	int err;
806 
807 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
808 				     nldev_policy, extack);
809 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
810 		return -EINVAL;
811 
812 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
813 
814 	device = ib_device_get_by_index(sock_net(skb->sk), index);
815 	if (!device)
816 		return -EINVAL;
817 
818 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
819 	if (!msg) {
820 		err = -ENOMEM;
821 		goto err;
822 	}
823 
824 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
825 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
826 			0, 0);
827 
828 	err = fill_dev_info(msg, device);
829 	if (err)
830 		goto err_free;
831 
832 	nlmsg_end(msg, nlh);
833 
834 	ib_device_put(device);
835 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
836 
837 err_free:
838 	nlmsg_free(msg);
839 err:
840 	ib_device_put(device);
841 	return err;
842 }
843 
844 static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
845 			  struct netlink_ext_ack *extack)
846 {
847 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
848 	struct ib_device *device;
849 	u32 index;
850 	int err;
851 
852 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
853 				     nldev_policy, extack);
854 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
855 		return -EINVAL;
856 
857 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
858 	device = ib_device_get_by_index(sock_net(skb->sk), index);
859 	if (!device)
860 		return -EINVAL;
861 
862 	if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
863 		char name[IB_DEVICE_NAME_MAX] = {};
864 
865 		nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
866 			    IB_DEVICE_NAME_MAX);
867 		err = ib_device_rename(device, name);
868 		goto done;
869 	}
870 
871 	if (tb[RDMA_NLDEV_NET_NS_FD]) {
872 		u32 ns_fd;
873 
874 		ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
875 		err = ib_device_set_netns_put(skb, device, ns_fd);
876 		goto put_done;
877 	}
878 
879 	if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) {
880 		u8 use_dim;
881 
882 		use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]);
883 		err = ib_device_set_dim(device,  use_dim);
884 		goto done;
885 	}
886 
887 done:
888 	ib_device_put(device);
889 put_done:
890 	return err;
891 }
892 
893 static int _nldev_get_dumpit(struct ib_device *device,
894 			     struct sk_buff *skb,
895 			     struct netlink_callback *cb,
896 			     unsigned int idx)
897 {
898 	int start = cb->args[0];
899 	struct nlmsghdr *nlh;
900 
901 	if (idx < start)
902 		return 0;
903 
904 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
905 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
906 			0, NLM_F_MULTI);
907 
908 	if (fill_dev_info(skb, device)) {
909 		nlmsg_cancel(skb, nlh);
910 		goto out;
911 	}
912 
913 	nlmsg_end(skb, nlh);
914 
915 	idx++;
916 
917 out:	cb->args[0] = idx;
918 	return skb->len;
919 }
920 
921 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
922 {
923 	/*
924 	 * There is no need to take lock, because
925 	 * we are relying on ib_core's locking.
926 	 */
927 	return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
928 }
929 
930 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
931 			       struct netlink_ext_ack *extack)
932 {
933 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
934 	struct ib_device *device;
935 	struct sk_buff *msg;
936 	u32 index;
937 	u32 port;
938 	int err;
939 
940 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
941 				     nldev_policy, extack);
942 	if (err ||
943 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
944 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
945 		return -EINVAL;
946 
947 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
948 	device = ib_device_get_by_index(sock_net(skb->sk), index);
949 	if (!device)
950 		return -EINVAL;
951 
952 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
953 	if (!rdma_is_port_valid(device, port)) {
954 		err = -EINVAL;
955 		goto err;
956 	}
957 
958 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
959 	if (!msg) {
960 		err = -ENOMEM;
961 		goto err;
962 	}
963 
964 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
965 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
966 			0, 0);
967 
968 	err = fill_port_info(msg, device, port, sock_net(skb->sk));
969 	if (err)
970 		goto err_free;
971 
972 	nlmsg_end(msg, nlh);
973 	ib_device_put(device);
974 
975 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
976 
977 err_free:
978 	nlmsg_free(msg);
979 err:
980 	ib_device_put(device);
981 	return err;
982 }
983 
984 static int nldev_port_get_dumpit(struct sk_buff *skb,
985 				 struct netlink_callback *cb)
986 {
987 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
988 	struct ib_device *device;
989 	int start = cb->args[0];
990 	struct nlmsghdr *nlh;
991 	u32 idx = 0;
992 	u32 ifindex;
993 	int err;
994 	unsigned int p;
995 
996 	err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
997 				     nldev_policy, NULL);
998 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
999 		return -EINVAL;
1000 
1001 	ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1002 	device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
1003 	if (!device)
1004 		return -EINVAL;
1005 
1006 	rdma_for_each_port (device, p) {
1007 		/*
1008 		 * The dumpit function returns all information from specific
1009 		 * index. This specific index is taken from the netlink
1010 		 * messages request sent by user and it is available
1011 		 * in cb->args[0].
1012 		 *
1013 		 * Usually, the user doesn't fill this field and it causes
1014 		 * to return everything.
1015 		 *
1016 		 */
1017 		if (idx < start) {
1018 			idx++;
1019 			continue;
1020 		}
1021 
1022 		nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
1023 				cb->nlh->nlmsg_seq,
1024 				RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1025 						 RDMA_NLDEV_CMD_PORT_GET),
1026 				0, NLM_F_MULTI);
1027 
1028 		if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
1029 			nlmsg_cancel(skb, nlh);
1030 			goto out;
1031 		}
1032 		idx++;
1033 		nlmsg_end(skb, nlh);
1034 	}
1035 
1036 out:
1037 	ib_device_put(device);
1038 	cb->args[0] = idx;
1039 	return skb->len;
1040 }
1041 
1042 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1043 			      struct netlink_ext_ack *extack)
1044 {
1045 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1046 	struct ib_device *device;
1047 	struct sk_buff *msg;
1048 	u32 index;
1049 	int ret;
1050 
1051 	ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1052 				     nldev_policy, extack);
1053 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1054 		return -EINVAL;
1055 
1056 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1057 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1058 	if (!device)
1059 		return -EINVAL;
1060 
1061 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1062 	if (!msg) {
1063 		ret = -ENOMEM;
1064 		goto err;
1065 	}
1066 
1067 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1068 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1069 			0, 0);
1070 
1071 	ret = fill_res_info(msg, device);
1072 	if (ret)
1073 		goto err_free;
1074 
1075 	nlmsg_end(msg, nlh);
1076 	ib_device_put(device);
1077 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1078 
1079 err_free:
1080 	nlmsg_free(msg);
1081 err:
1082 	ib_device_put(device);
1083 	return ret;
1084 }
1085 
1086 static int _nldev_res_get_dumpit(struct ib_device *device,
1087 				 struct sk_buff *skb,
1088 				 struct netlink_callback *cb,
1089 				 unsigned int idx)
1090 {
1091 	int start = cb->args[0];
1092 	struct nlmsghdr *nlh;
1093 
1094 	if (idx < start)
1095 		return 0;
1096 
1097 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1098 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1099 			0, NLM_F_MULTI);
1100 
1101 	if (fill_res_info(skb, device)) {
1102 		nlmsg_cancel(skb, nlh);
1103 		goto out;
1104 	}
1105 	nlmsg_end(skb, nlh);
1106 
1107 	idx++;
1108 
1109 out:
1110 	cb->args[0] = idx;
1111 	return skb->len;
1112 }
1113 
1114 static int nldev_res_get_dumpit(struct sk_buff *skb,
1115 				struct netlink_callback *cb)
1116 {
1117 	return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
1118 }
1119 
1120 struct nldev_fill_res_entry {
1121 	int (*fill_res_func)(struct sk_buff *msg, bool has_cap_net_admin,
1122 			     struct rdma_restrack_entry *res, u32 port);
1123 	enum rdma_nldev_attr nldev_attr;
1124 	enum rdma_nldev_command nldev_cmd;
1125 	u8 flags;
1126 	u32 entry;
1127 	u32 id;
1128 };
1129 
1130 enum nldev_res_flags {
1131 	NLDEV_PER_DEV = 1 << 0,
1132 };
1133 
1134 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
1135 	[RDMA_RESTRACK_QP] = {
1136 		.fill_res_func = fill_res_qp_entry,
1137 		.nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
1138 		.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
1139 		.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
1140 		.id = RDMA_NLDEV_ATTR_RES_LQPN,
1141 	},
1142 	[RDMA_RESTRACK_CM_ID] = {
1143 		.fill_res_func = fill_res_cm_id_entry,
1144 		.nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
1145 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
1146 		.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
1147 		.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
1148 	},
1149 	[RDMA_RESTRACK_CQ] = {
1150 		.fill_res_func = fill_res_cq_entry,
1151 		.nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
1152 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
1153 		.flags = NLDEV_PER_DEV,
1154 		.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
1155 		.id = RDMA_NLDEV_ATTR_RES_CQN,
1156 	},
1157 	[RDMA_RESTRACK_MR] = {
1158 		.fill_res_func = fill_res_mr_entry,
1159 		.nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
1160 		.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
1161 		.flags = NLDEV_PER_DEV,
1162 		.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
1163 		.id = RDMA_NLDEV_ATTR_RES_MRN,
1164 	},
1165 	[RDMA_RESTRACK_PD] = {
1166 		.fill_res_func = fill_res_pd_entry,
1167 		.nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
1168 		.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
1169 		.flags = NLDEV_PER_DEV,
1170 		.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
1171 		.id = RDMA_NLDEV_ATTR_RES_PDN,
1172 	},
1173 	[RDMA_RESTRACK_COUNTER] = {
1174 		.fill_res_func = fill_res_counter_entry,
1175 		.nldev_cmd = RDMA_NLDEV_CMD_STAT_GET,
1176 		.nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
1177 		.entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
1178 		.id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
1179 	},
1180 };
1181 
1182 static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1183 			       struct netlink_ext_ack *extack,
1184 			       enum rdma_restrack_type res_type)
1185 {
1186 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1187 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1188 	struct rdma_restrack_entry *res;
1189 	struct ib_device *device;
1190 	u32 index, id, port = 0;
1191 	bool has_cap_net_admin;
1192 	struct sk_buff *msg;
1193 	int ret;
1194 
1195 	ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1196 				     nldev_policy, extack);
1197 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
1198 		return -EINVAL;
1199 
1200 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1201 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1202 	if (!device)
1203 		return -EINVAL;
1204 
1205 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1206 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1207 		if (!rdma_is_port_valid(device, port)) {
1208 			ret = -EINVAL;
1209 			goto err;
1210 		}
1211 	}
1212 
1213 	if ((port && fe->flags & NLDEV_PER_DEV) ||
1214 	    (!port && ~fe->flags & NLDEV_PER_DEV)) {
1215 		ret = -EINVAL;
1216 		goto err;
1217 	}
1218 
1219 	id = nla_get_u32(tb[fe->id]);
1220 	res = rdma_restrack_get_byid(device, res_type, id);
1221 	if (IS_ERR(res)) {
1222 		ret = PTR_ERR(res);
1223 		goto err;
1224 	}
1225 
1226 	if (!rdma_is_visible_in_pid_ns(res)) {
1227 		ret = -ENOENT;
1228 		goto err_get;
1229 	}
1230 
1231 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1232 	if (!msg) {
1233 		ret = -ENOMEM;
1234 		goto err;
1235 	}
1236 
1237 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1238 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
1239 			0, 0);
1240 
1241 	if (fill_nldev_handle(msg, device)) {
1242 		ret = -EMSGSIZE;
1243 		goto err_free;
1244 	}
1245 
1246 	has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
1247 	ret = fe->fill_res_func(msg, has_cap_net_admin, res, port);
1248 	rdma_restrack_put(res);
1249 	if (ret)
1250 		goto err_free;
1251 
1252 	nlmsg_end(msg, nlh);
1253 	ib_device_put(device);
1254 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1255 
1256 err_free:
1257 	nlmsg_free(msg);
1258 err_get:
1259 	rdma_restrack_put(res);
1260 err:
1261 	ib_device_put(device);
1262 	return ret;
1263 }
1264 
1265 static int res_get_common_dumpit(struct sk_buff *skb,
1266 				 struct netlink_callback *cb,
1267 				 enum rdma_restrack_type res_type)
1268 {
1269 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1270 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1271 	struct rdma_restrack_entry *res;
1272 	struct rdma_restrack_root *rt;
1273 	int err, ret = 0, idx = 0;
1274 	struct nlattr *table_attr;
1275 	struct nlattr *entry_attr;
1276 	struct ib_device *device;
1277 	int start = cb->args[0];
1278 	bool has_cap_net_admin;
1279 	struct nlmsghdr *nlh;
1280 	unsigned long id;
1281 	u32 index, port = 0;
1282 	bool filled = false;
1283 
1284 	err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1285 				     nldev_policy, NULL);
1286 	/*
1287 	 * Right now, we are expecting the device index to get res information,
1288 	 * but it is possible to extend this code to return all devices in
1289 	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
1290 	 * if it doesn't exist, we will iterate over all devices.
1291 	 *
1292 	 * But it is not needed for now.
1293 	 */
1294 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1295 		return -EINVAL;
1296 
1297 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1298 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1299 	if (!device)
1300 		return -EINVAL;
1301 
1302 	/*
1303 	 * If no PORT_INDEX is supplied, we will return all QPs from that device
1304 	 */
1305 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1306 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1307 		if (!rdma_is_port_valid(device, port)) {
1308 			ret = -EINVAL;
1309 			goto err_index;
1310 		}
1311 	}
1312 
1313 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1314 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
1315 			0, NLM_F_MULTI);
1316 
1317 	if (fill_nldev_handle(skb, device)) {
1318 		ret = -EMSGSIZE;
1319 		goto err;
1320 	}
1321 
1322 	table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
1323 	if (!table_attr) {
1324 		ret = -EMSGSIZE;
1325 		goto err;
1326 	}
1327 
1328 	has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
1329 
1330 	rt = &device->res[res_type];
1331 	xa_lock(&rt->xa);
1332 	/*
1333 	 * FIXME: if the skip ahead is something common this loop should
1334 	 * use xas_for_each & xas_pause to optimize, we can have a lot of
1335 	 * objects.
1336 	 */
1337 	xa_for_each(&rt->xa, id, res) {
1338 		if (!rdma_is_visible_in_pid_ns(res))
1339 			continue;
1340 
1341 		if (idx < start || !rdma_restrack_get(res))
1342 			goto next;
1343 
1344 		xa_unlock(&rt->xa);
1345 
1346 		filled = true;
1347 
1348 		entry_attr = nla_nest_start_noflag(skb, fe->entry);
1349 		if (!entry_attr) {
1350 			ret = -EMSGSIZE;
1351 			rdma_restrack_put(res);
1352 			goto msg_full;
1353 		}
1354 
1355 		ret = fe->fill_res_func(skb, has_cap_net_admin, res, port);
1356 		rdma_restrack_put(res);
1357 
1358 		if (ret) {
1359 			nla_nest_cancel(skb, entry_attr);
1360 			if (ret == -EMSGSIZE)
1361 				goto msg_full;
1362 			if (ret == -EAGAIN)
1363 				goto again;
1364 			goto res_err;
1365 		}
1366 		nla_nest_end(skb, entry_attr);
1367 again:		xa_lock(&rt->xa);
1368 next:		idx++;
1369 	}
1370 	xa_unlock(&rt->xa);
1371 
1372 msg_full:
1373 	nla_nest_end(skb, table_attr);
1374 	nlmsg_end(skb, nlh);
1375 	cb->args[0] = idx;
1376 
1377 	/*
1378 	 * No more entries to fill, cancel the message and
1379 	 * return 0 to mark end of dumpit.
1380 	 */
1381 	if (!filled)
1382 		goto err;
1383 
1384 	ib_device_put(device);
1385 	return skb->len;
1386 
1387 res_err:
1388 	nla_nest_cancel(skb, table_attr);
1389 
1390 err:
1391 	nlmsg_cancel(skb, nlh);
1392 
1393 err_index:
1394 	ib_device_put(device);
1395 	return ret;
1396 }
1397 
1398 #define RES_GET_FUNCS(name, type)                                              \
1399 	static int nldev_res_get_##name##_dumpit(struct sk_buff *skb,          \
1400 						 struct netlink_callback *cb)  \
1401 	{                                                                      \
1402 		return res_get_common_dumpit(skb, cb, type);                   \
1403 	}                                                                      \
1404 	static int nldev_res_get_##name##_doit(struct sk_buff *skb,            \
1405 					       struct nlmsghdr *nlh,           \
1406 					       struct netlink_ext_ack *extack) \
1407 	{                                                                      \
1408 		return res_get_common_doit(skb, nlh, extack, type);            \
1409 	}
1410 
1411 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
1412 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
1413 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
1414 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
1415 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
1416 RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
1417 
1418 static LIST_HEAD(link_ops);
1419 static DECLARE_RWSEM(link_ops_rwsem);
1420 
1421 static const struct rdma_link_ops *link_ops_get(const char *type)
1422 {
1423 	const struct rdma_link_ops *ops;
1424 
1425 	list_for_each_entry(ops, &link_ops, list) {
1426 		if (!strcmp(ops->type, type))
1427 			goto out;
1428 	}
1429 	ops = NULL;
1430 out:
1431 	return ops;
1432 }
1433 
1434 void rdma_link_register(struct rdma_link_ops *ops)
1435 {
1436 	down_write(&link_ops_rwsem);
1437 	if (WARN_ON_ONCE(link_ops_get(ops->type)))
1438 		goto out;
1439 	list_add(&ops->list, &link_ops);
1440 out:
1441 	up_write(&link_ops_rwsem);
1442 }
1443 EXPORT_SYMBOL(rdma_link_register);
1444 
1445 void rdma_link_unregister(struct rdma_link_ops *ops)
1446 {
1447 	down_write(&link_ops_rwsem);
1448 	list_del(&ops->list);
1449 	up_write(&link_ops_rwsem);
1450 }
1451 EXPORT_SYMBOL(rdma_link_unregister);
1452 
1453 static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
1454 			  struct netlink_ext_ack *extack)
1455 {
1456 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1457 	char ibdev_name[IB_DEVICE_NAME_MAX];
1458 	const struct rdma_link_ops *ops;
1459 	char ndev_name[IFNAMSIZ];
1460 	struct net_device *ndev;
1461 	char type[IFNAMSIZ];
1462 	int err;
1463 
1464 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1465 				     nldev_policy, extack);
1466 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
1467 	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
1468 		return -EINVAL;
1469 
1470 	nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1471 		    sizeof(ibdev_name));
1472 	if (strchr(ibdev_name, '%'))
1473 		return -EINVAL;
1474 
1475 	nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
1476 	nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
1477 		    sizeof(ndev_name));
1478 
1479 	ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
1480 	if (!ndev)
1481 		return -ENODEV;
1482 
1483 	down_read(&link_ops_rwsem);
1484 	ops = link_ops_get(type);
1485 #ifdef CONFIG_MODULES
1486 	if (!ops) {
1487 		up_read(&link_ops_rwsem);
1488 		request_module("rdma-link-%s", type);
1489 		down_read(&link_ops_rwsem);
1490 		ops = link_ops_get(type);
1491 	}
1492 #endif
1493 	err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
1494 	up_read(&link_ops_rwsem);
1495 	dev_put(ndev);
1496 
1497 	return err;
1498 }
1499 
1500 static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
1501 			  struct netlink_ext_ack *extack)
1502 {
1503 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1504 	struct ib_device *device;
1505 	u32 index;
1506 	int err;
1507 
1508 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1509 				     nldev_policy, extack);
1510 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1511 		return -EINVAL;
1512 
1513 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1514 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1515 	if (!device)
1516 		return -EINVAL;
1517 
1518 	if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) {
1519 		ib_device_put(device);
1520 		return -EINVAL;
1521 	}
1522 
1523 	ib_unregister_device_and_put(device);
1524 	return 0;
1525 }
1526 
1527 static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
1528 			     struct netlink_ext_ack *extack)
1529 {
1530 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1531 	char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
1532 	struct ib_client_nl_info data = {};
1533 	struct ib_device *ibdev = NULL;
1534 	struct sk_buff *msg;
1535 	u32 index;
1536 	int err;
1537 
1538 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
1539 			  extack);
1540 	if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
1541 		return -EINVAL;
1542 
1543 	nla_strlcpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
1544 		    sizeof(client_name));
1545 
1546 	if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
1547 		index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1548 		ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
1549 		if (!ibdev)
1550 			return -EINVAL;
1551 
1552 		if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1553 			data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1554 			if (!rdma_is_port_valid(ibdev, data.port)) {
1555 				err = -EINVAL;
1556 				goto out_put;
1557 			}
1558 		} else {
1559 			data.port = -1;
1560 		}
1561 	} else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1562 		return -EINVAL;
1563 	}
1564 
1565 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1566 	if (!msg) {
1567 		err = -ENOMEM;
1568 		goto out_put;
1569 	}
1570 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1571 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1572 					 RDMA_NLDEV_CMD_GET_CHARDEV),
1573 			0, 0);
1574 
1575 	data.nl_msg = msg;
1576 	err = ib_get_client_nl_info(ibdev, client_name, &data);
1577 	if (err)
1578 		goto out_nlmsg;
1579 
1580 	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
1581 				huge_encode_dev(data.cdev->devt),
1582 				RDMA_NLDEV_ATTR_PAD);
1583 	if (err)
1584 		goto out_data;
1585 	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
1586 				RDMA_NLDEV_ATTR_PAD);
1587 	if (err)
1588 		goto out_data;
1589 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
1590 			   dev_name(data.cdev))) {
1591 		err = -EMSGSIZE;
1592 		goto out_data;
1593 	}
1594 
1595 	nlmsg_end(msg, nlh);
1596 	put_device(data.cdev);
1597 	if (ibdev)
1598 		ib_device_put(ibdev);
1599 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1600 
1601 out_data:
1602 	put_device(data.cdev);
1603 out_nlmsg:
1604 	nlmsg_free(msg);
1605 out_put:
1606 	if (ibdev)
1607 		ib_device_put(ibdev);
1608 	return err;
1609 }
1610 
1611 static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1612 			      struct netlink_ext_ack *extack)
1613 {
1614 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1615 	struct sk_buff *msg;
1616 	int err;
1617 
1618 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1619 			  nldev_policy, extack);
1620 	if (err)
1621 		return err;
1622 
1623 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1624 	if (!msg)
1625 		return -ENOMEM;
1626 
1627 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1628 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1629 					 RDMA_NLDEV_CMD_SYS_GET),
1630 			0, 0);
1631 
1632 	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
1633 			 (u8)ib_devices_shared_netns);
1634 	if (err) {
1635 		nlmsg_free(msg);
1636 		return err;
1637 	}
1638 	nlmsg_end(msg, nlh);
1639 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1640 }
1641 
1642 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1643 				  struct netlink_ext_ack *extack)
1644 {
1645 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1646 	u8 enable;
1647 	int err;
1648 
1649 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1650 			  nldev_policy, extack);
1651 	if (err || !tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
1652 		return -EINVAL;
1653 
1654 	enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
1655 	/* Only 0 and 1 are supported */
1656 	if (enable > 1)
1657 		return -EINVAL;
1658 
1659 	err = rdma_compatdev_set(enable);
1660 	return err;
1661 }
1662 
1663 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1664 			       struct netlink_ext_ack *extack)
1665 {
1666 	u32 index, port, mode, mask = 0, qpn, cntn = 0;
1667 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1668 	struct ib_device *device;
1669 	struct sk_buff *msg;
1670 	int ret;
1671 
1672 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1673 			  nldev_policy, extack);
1674 	/* Currently only counter for QP is supported */
1675 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
1676 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
1677 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || !tb[RDMA_NLDEV_ATTR_STAT_MODE])
1678 		return -EINVAL;
1679 
1680 	if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
1681 		return -EINVAL;
1682 
1683 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1684 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1685 	if (!device)
1686 		return -EINVAL;
1687 
1688 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1689 	if (!rdma_is_port_valid(device, port)) {
1690 		ret = -EINVAL;
1691 		goto err;
1692 	}
1693 
1694 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1695 	if (!msg) {
1696 		ret = -ENOMEM;
1697 		goto err;
1698 	}
1699 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1700 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1701 					 RDMA_NLDEV_CMD_STAT_SET),
1702 			0, 0);
1703 
1704 	mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
1705 	if (mode == RDMA_COUNTER_MODE_AUTO) {
1706 		if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
1707 			mask = nla_get_u32(
1708 				tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
1709 
1710 		ret = rdma_counter_set_auto_mode(device, port,
1711 						 mask ? true : false, mask);
1712 		if (ret)
1713 			goto err_msg;
1714 	} else {
1715 		qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
1716 		if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
1717 			cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
1718 			ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
1719 		} else {
1720 			ret = rdma_counter_bind_qpn_alloc(device, port,
1721 							  qpn, &cntn);
1722 		}
1723 		if (ret)
1724 			goto err_msg;
1725 
1726 		if (fill_nldev_handle(msg, device) ||
1727 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
1728 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
1729 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
1730 			ret = -EMSGSIZE;
1731 			goto err_fill;
1732 		}
1733 	}
1734 
1735 	nlmsg_end(msg, nlh);
1736 	ib_device_put(device);
1737 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1738 
1739 err_fill:
1740 	rdma_counter_unbind_qpn(device, port, qpn, cntn);
1741 err_msg:
1742 	nlmsg_free(msg);
1743 err:
1744 	ib_device_put(device);
1745 	return ret;
1746 }
1747 
1748 static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1749 			       struct netlink_ext_ack *extack)
1750 {
1751 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1752 	struct ib_device *device;
1753 	struct sk_buff *msg;
1754 	u32 index, port, qpn, cntn;
1755 	int ret;
1756 
1757 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1758 			  nldev_policy, extack);
1759 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
1760 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
1761 	    !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
1762 	    !tb[RDMA_NLDEV_ATTR_RES_LQPN])
1763 		return -EINVAL;
1764 
1765 	if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
1766 		return -EINVAL;
1767 
1768 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1769 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1770 	if (!device)
1771 		return -EINVAL;
1772 
1773 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1774 	if (!rdma_is_port_valid(device, port)) {
1775 		ret = -EINVAL;
1776 		goto err;
1777 	}
1778 
1779 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1780 	if (!msg) {
1781 		ret = -ENOMEM;
1782 		goto err;
1783 	}
1784 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1785 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1786 					 RDMA_NLDEV_CMD_STAT_SET),
1787 			0, 0);
1788 
1789 	cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
1790 	qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
1791 	ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
1792 	if (ret)
1793 		goto err_unbind;
1794 
1795 	if (fill_nldev_handle(msg, device) ||
1796 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
1797 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
1798 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
1799 		ret = -EMSGSIZE;
1800 		goto err_fill;
1801 	}
1802 
1803 	nlmsg_end(msg, nlh);
1804 	ib_device_put(device);
1805 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1806 
1807 err_fill:
1808 	rdma_counter_bind_qpn(device, port, qpn, cntn);
1809 err_unbind:
1810 	nlmsg_free(msg);
1811 err:
1812 	ib_device_put(device);
1813 	return ret;
1814 }
1815 
1816 static int stat_get_doit_default_counter(struct sk_buff *skb,
1817 					 struct nlmsghdr *nlh,
1818 					 struct netlink_ext_ack *extack,
1819 					 struct nlattr *tb[])
1820 {
1821 	struct rdma_hw_stats *stats;
1822 	struct nlattr *table_attr;
1823 	struct ib_device *device;
1824 	int ret, num_cnts, i;
1825 	struct sk_buff *msg;
1826 	u32 index, port;
1827 	u64 v;
1828 
1829 	if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1830 		return -EINVAL;
1831 
1832 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1833 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1834 	if (!device)
1835 		return -EINVAL;
1836 
1837 	if (!device->ops.alloc_hw_stats || !device->ops.get_hw_stats) {
1838 		ret = -EINVAL;
1839 		goto err;
1840 	}
1841 
1842 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1843 	if (!rdma_is_port_valid(device, port)) {
1844 		ret = -EINVAL;
1845 		goto err;
1846 	}
1847 
1848 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1849 	if (!msg) {
1850 		ret = -ENOMEM;
1851 		goto err;
1852 	}
1853 
1854 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1855 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1856 					 RDMA_NLDEV_CMD_STAT_GET),
1857 			0, 0);
1858 
1859 	if (fill_nldev_handle(msg, device) ||
1860 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
1861 		ret = -EMSGSIZE;
1862 		goto err_msg;
1863 	}
1864 
1865 	stats = device->port_data ? device->port_data[port].hw_stats : NULL;
1866 	if (stats == NULL) {
1867 		ret = -EINVAL;
1868 		goto err_msg;
1869 	}
1870 	mutex_lock(&stats->lock);
1871 
1872 	num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
1873 	if (num_cnts < 0) {
1874 		ret = -EINVAL;
1875 		goto err_stats;
1876 	}
1877 
1878 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
1879 	if (!table_attr) {
1880 		ret = -EMSGSIZE;
1881 		goto err_stats;
1882 	}
1883 	for (i = 0; i < num_cnts; i++) {
1884 		v = stats->value[i] +
1885 			rdma_counter_get_hwstat_value(device, port, i);
1886 		if (fill_stat_hwcounter_entry(msg, stats->names[i], v)) {
1887 			ret = -EMSGSIZE;
1888 			goto err_table;
1889 		}
1890 	}
1891 	nla_nest_end(msg, table_attr);
1892 
1893 	mutex_unlock(&stats->lock);
1894 	nlmsg_end(msg, nlh);
1895 	ib_device_put(device);
1896 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1897 
1898 err_table:
1899 	nla_nest_cancel(msg, table_attr);
1900 err_stats:
1901 	mutex_unlock(&stats->lock);
1902 err_msg:
1903 	nlmsg_free(msg);
1904 err:
1905 	ib_device_put(device);
1906 	return ret;
1907 }
1908 
1909 static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
1910 			    struct netlink_ext_ack *extack, struct nlattr *tb[])
1911 
1912 {
1913 	static enum rdma_nl_counter_mode mode;
1914 	static enum rdma_nl_counter_mask mask;
1915 	struct ib_device *device;
1916 	struct sk_buff *msg;
1917 	u32 index, port;
1918 	int ret;
1919 
1920 	if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
1921 		return nldev_res_get_counter_doit(skb, nlh, extack);
1922 
1923 	if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
1924 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1925 		return -EINVAL;
1926 
1927 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1928 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1929 	if (!device)
1930 		return -EINVAL;
1931 
1932 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1933 	if (!rdma_is_port_valid(device, port)) {
1934 		ret = -EINVAL;
1935 		goto err;
1936 	}
1937 
1938 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1939 	if (!msg) {
1940 		ret = -ENOMEM;
1941 		goto err;
1942 	}
1943 
1944 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1945 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1946 					 RDMA_NLDEV_CMD_STAT_GET),
1947 			0, 0);
1948 
1949 	ret = rdma_counter_get_mode(device, port, &mode, &mask);
1950 	if (ret)
1951 		goto err_msg;
1952 
1953 	if (fill_nldev_handle(msg, device) ||
1954 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
1955 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode))
1956 		goto err_msg;
1957 
1958 	if ((mode == RDMA_COUNTER_MODE_AUTO) &&
1959 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask))
1960 		goto err_msg;
1961 
1962 	nlmsg_end(msg, nlh);
1963 	ib_device_put(device);
1964 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1965 
1966 err_msg:
1967 	nlmsg_free(msg);
1968 err:
1969 	ib_device_put(device);
1970 	return ret;
1971 }
1972 
1973 static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1974 			       struct netlink_ext_ack *extack)
1975 {
1976 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1977 	int ret;
1978 
1979 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1980 			  nldev_policy, extack);
1981 	if (ret)
1982 		return -EINVAL;
1983 
1984 	if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
1985 		return stat_get_doit_default_counter(skb, nlh, extack, tb);
1986 
1987 	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
1988 	case RDMA_NLDEV_ATTR_RES_QP:
1989 		ret = stat_get_doit_qp(skb, nlh, extack, tb);
1990 		break;
1991 
1992 	default:
1993 		ret = -EINVAL;
1994 		break;
1995 	}
1996 
1997 	return ret;
1998 }
1999 
2000 static int nldev_stat_get_dumpit(struct sk_buff *skb,
2001 				 struct netlink_callback *cb)
2002 {
2003 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2004 	int ret;
2005 
2006 	ret = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2007 			  nldev_policy, NULL);
2008 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
2009 		return -EINVAL;
2010 
2011 	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2012 	case RDMA_NLDEV_ATTR_RES_QP:
2013 		ret = nldev_res_get_counter_dumpit(skb, cb);
2014 		break;
2015 
2016 	default:
2017 		ret = -EINVAL;
2018 		break;
2019 	}
2020 
2021 	return ret;
2022 }
2023 
2024 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
2025 	[RDMA_NLDEV_CMD_GET] = {
2026 		.doit = nldev_get_doit,
2027 		.dump = nldev_get_dumpit,
2028 	},
2029 	[RDMA_NLDEV_CMD_GET_CHARDEV] = {
2030 		.doit = nldev_get_chardev,
2031 	},
2032 	[RDMA_NLDEV_CMD_SET] = {
2033 		.doit = nldev_set_doit,
2034 		.flags = RDMA_NL_ADMIN_PERM,
2035 	},
2036 	[RDMA_NLDEV_CMD_NEWLINK] = {
2037 		.doit = nldev_newlink,
2038 		.flags = RDMA_NL_ADMIN_PERM,
2039 	},
2040 	[RDMA_NLDEV_CMD_DELLINK] = {
2041 		.doit = nldev_dellink,
2042 		.flags = RDMA_NL_ADMIN_PERM,
2043 	},
2044 	[RDMA_NLDEV_CMD_PORT_GET] = {
2045 		.doit = nldev_port_get_doit,
2046 		.dump = nldev_port_get_dumpit,
2047 	},
2048 	[RDMA_NLDEV_CMD_RES_GET] = {
2049 		.doit = nldev_res_get_doit,
2050 		.dump = nldev_res_get_dumpit,
2051 	},
2052 	[RDMA_NLDEV_CMD_RES_QP_GET] = {
2053 		.doit = nldev_res_get_qp_doit,
2054 		.dump = nldev_res_get_qp_dumpit,
2055 	},
2056 	[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
2057 		.doit = nldev_res_get_cm_id_doit,
2058 		.dump = nldev_res_get_cm_id_dumpit,
2059 	},
2060 	[RDMA_NLDEV_CMD_RES_CQ_GET] = {
2061 		.doit = nldev_res_get_cq_doit,
2062 		.dump = nldev_res_get_cq_dumpit,
2063 	},
2064 	[RDMA_NLDEV_CMD_RES_MR_GET] = {
2065 		.doit = nldev_res_get_mr_doit,
2066 		.dump = nldev_res_get_mr_dumpit,
2067 	},
2068 	[RDMA_NLDEV_CMD_RES_PD_GET] = {
2069 		.doit = nldev_res_get_pd_doit,
2070 		.dump = nldev_res_get_pd_dumpit,
2071 	},
2072 	[RDMA_NLDEV_CMD_SYS_GET] = {
2073 		.doit = nldev_sys_get_doit,
2074 	},
2075 	[RDMA_NLDEV_CMD_SYS_SET] = {
2076 		.doit = nldev_set_sys_set_doit,
2077 	},
2078 	[RDMA_NLDEV_CMD_STAT_SET] = {
2079 		.doit = nldev_stat_set_doit,
2080 		.flags = RDMA_NL_ADMIN_PERM,
2081 	},
2082 	[RDMA_NLDEV_CMD_STAT_GET] = {
2083 		.doit = nldev_stat_get_doit,
2084 		.dump = nldev_stat_get_dumpit,
2085 	},
2086 	[RDMA_NLDEV_CMD_STAT_DEL] = {
2087 		.doit = nldev_stat_del_doit,
2088 		.flags = RDMA_NL_ADMIN_PERM,
2089 	},
2090 };
2091 
2092 void __init nldev_init(void)
2093 {
2094 	rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
2095 }
2096 
2097 void __exit nldev_exit(void)
2098 {
2099 	rdma_nl_unregister(RDMA_NL_NLDEV);
2100 }
2101 
2102 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
2103