xref: /linux/drivers/infiniband/core/nldev.c (revision a3a400da206bd0cf426571633da51547d44f4f42)
1 /*
2  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. Neither the names of the copyright holders nor the names of its
13  *    contributors may be used to endorse or promote products derived from
14  *    this software without specific prior written permission.
15  *
16  * Alternatively, this software may be distributed under the terms of the
17  * GNU General Public License ("GPL") version 2 as published by the Free
18  * Software Foundation.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <linux/module.h>
34 #include <linux/pid.h>
35 #include <linux/pid_namespace.h>
36 #include <linux/mutex.h>
37 #include <net/netlink.h>
38 #include <rdma/rdma_cm.h>
39 #include <rdma/rdma_netlink.h>
40 
41 #include "core_priv.h"
42 #include "cma_priv.h"
43 #include "restrack.h"
44 
45 /*
46  * Sort array elements by the netlink attribute name
47  */
48 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
49 	[RDMA_NLDEV_ATTR_CHARDEV]		= { .type = NLA_U64 },
50 	[RDMA_NLDEV_ATTR_CHARDEV_ABI]		= { .type = NLA_U64 },
51 	[RDMA_NLDEV_ATTR_CHARDEV_NAME]		= { .type = NLA_NUL_STRING,
52 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
53 	[RDMA_NLDEV_ATTR_CHARDEV_TYPE]		= { .type = NLA_NUL_STRING,
54 					.len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
55 	[RDMA_NLDEV_ATTR_DEV_INDEX]		= { .type = NLA_U32 },
56 	[RDMA_NLDEV_ATTR_DEV_NAME]		= { .type = NLA_NUL_STRING,
57 					.len = IB_DEVICE_NAME_MAX },
58 	[RDMA_NLDEV_ATTR_DEV_NODE_TYPE]		= { .type = NLA_U8 },
59 	[RDMA_NLDEV_ATTR_DEV_PROTOCOL]		= { .type = NLA_NUL_STRING,
60 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
61 	[RDMA_NLDEV_ATTR_DRIVER]		= { .type = NLA_NESTED },
62 	[RDMA_NLDEV_ATTR_DRIVER_ENTRY]		= { .type = NLA_NESTED },
63 	[RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE]	= { .type = NLA_U8 },
64 	[RDMA_NLDEV_ATTR_DRIVER_STRING]		= { .type = NLA_NUL_STRING,
65 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
66 	[RDMA_NLDEV_ATTR_DRIVER_S32]		= { .type = NLA_S32 },
67 	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
68 	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
69 	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
70 	[RDMA_NLDEV_ATTR_FW_VERSION]		= { .type = NLA_NUL_STRING,
71 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
72 	[RDMA_NLDEV_ATTR_LID]			= { .type = NLA_U32 },
73 	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type = NLA_NUL_STRING,
74 					.len = IFNAMSIZ },
75 	[RDMA_NLDEV_ATTR_LMC]			= { .type = NLA_U8 },
76 	[RDMA_NLDEV_ATTR_NDEV_INDEX]		= { .type = NLA_U32 },
77 	[RDMA_NLDEV_ATTR_NDEV_NAME]		= { .type = NLA_NUL_STRING,
78 					.len = IFNAMSIZ },
79 	[RDMA_NLDEV_ATTR_NODE_GUID]		= { .type = NLA_U64 },
80 	[RDMA_NLDEV_ATTR_PORT_INDEX]		= { .type = NLA_U32 },
81 	[RDMA_NLDEV_ATTR_PORT_PHYS_STATE]	= { .type = NLA_U8 },
82 	[RDMA_NLDEV_ATTR_PORT_STATE]		= { .type = NLA_U8 },
83 	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type = NLA_NESTED },
84 	[RDMA_NLDEV_ATTR_RES_CM_IDN]		= { .type = NLA_U32 },
85 	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type = NLA_NESTED },
86 	[RDMA_NLDEV_ATTR_RES_CQ]		= { .type = NLA_NESTED },
87 	[RDMA_NLDEV_ATTR_RES_CQE]		= { .type = NLA_U32 },
88 	[RDMA_NLDEV_ATTR_RES_CQN]		= { .type = NLA_U32 },
89 	[RDMA_NLDEV_ATTR_RES_CQ_ENTRY]		= { .type = NLA_NESTED },
90 	[RDMA_NLDEV_ATTR_RES_CTXN]		= { .type = NLA_U32 },
91 	[RDMA_NLDEV_ATTR_RES_DST_ADDR]		= {
92 			.len = sizeof(struct __kernel_sockaddr_storage) },
93 	[RDMA_NLDEV_ATTR_RES_IOVA]		= { .type = NLA_U64 },
94 	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING,
95 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
96 	[RDMA_NLDEV_ATTR_RES_LKEY]		= { .type = NLA_U32 },
97 	[RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]	= { .type = NLA_U32 },
98 	[RDMA_NLDEV_ATTR_RES_LQPN]		= { .type = NLA_U32 },
99 	[RDMA_NLDEV_ATTR_RES_MR]		= { .type = NLA_NESTED },
100 	[RDMA_NLDEV_ATTR_RES_MRLEN]		= { .type = NLA_U64 },
101 	[RDMA_NLDEV_ATTR_RES_MRN]		= { .type = NLA_U32 },
102 	[RDMA_NLDEV_ATTR_RES_MR_ENTRY]		= { .type = NLA_NESTED },
103 	[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE]	= { .type = NLA_U8 },
104 	[RDMA_NLDEV_ATTR_RES_PD]		= { .type = NLA_NESTED },
105 	[RDMA_NLDEV_ATTR_RES_PDN]		= { .type = NLA_U32 },
106 	[RDMA_NLDEV_ATTR_RES_PD_ENTRY]		= { .type = NLA_NESTED },
107 	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
108 	[RDMA_NLDEV_ATTR_RES_POLL_CTX]		= { .type = NLA_U8 },
109 	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
110 	[RDMA_NLDEV_ATTR_RES_QP]		= { .type = NLA_NESTED },
111 	[RDMA_NLDEV_ATTR_RES_QP_ENTRY]		= { .type = NLA_NESTED },
112 	[RDMA_NLDEV_ATTR_RES_RKEY]		= { .type = NLA_U32 },
113 	[RDMA_NLDEV_ATTR_RES_RQPN]		= { .type = NLA_U32 },
114 	[RDMA_NLDEV_ATTR_RES_RQ_PSN]		= { .type = NLA_U32 },
115 	[RDMA_NLDEV_ATTR_RES_SQ_PSN]		= { .type = NLA_U32 },
116 	[RDMA_NLDEV_ATTR_RES_SRC_ADDR]		= {
117 			.len = sizeof(struct __kernel_sockaddr_storage) },
118 	[RDMA_NLDEV_ATTR_RES_STATE]		= { .type = NLA_U8 },
119 	[RDMA_NLDEV_ATTR_RES_SUMMARY]		= { .type = NLA_NESTED },
120 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]	= { .type = NLA_NESTED },
121 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
122 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
123 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
124 	[RDMA_NLDEV_ATTR_RES_TYPE]		= { .type = NLA_U8 },
125 	[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
126 	[RDMA_NLDEV_ATTR_RES_USECNT]		= { .type = NLA_U64 },
127 	[RDMA_NLDEV_ATTR_SM_LID]		= { .type = NLA_U32 },
128 	[RDMA_NLDEV_ATTR_SUBNET_PREFIX]		= { .type = NLA_U64 },
129 	[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]	= { .type = NLA_U32 },
130 	[RDMA_NLDEV_ATTR_STAT_MODE]		= { .type = NLA_U32 },
131 	[RDMA_NLDEV_ATTR_STAT_RES]		= { .type = NLA_U32 },
132 	[RDMA_NLDEV_ATTR_STAT_COUNTER]		= { .type = NLA_NESTED },
133 	[RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY]	= { .type = NLA_NESTED },
134 	[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]       = { .type = NLA_U32 },
135 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]       = { .type = NLA_NESTED },
136 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY]  = { .type = NLA_NESTED },
137 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
138 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
139 	[RDMA_NLDEV_ATTR_SYS_IMAGE_GUID]	= { .type = NLA_U64 },
140 	[RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID]	= { .type = NLA_U32 },
141 	[RDMA_NLDEV_NET_NS_FD]			= { .type = NLA_U32 },
142 	[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]	= { .type = NLA_U8 },
143 };
144 
145 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
146 				      enum rdma_nldev_print_type print_type)
147 {
148 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
149 		return -EMSGSIZE;
150 	if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
151 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
152 		return -EMSGSIZE;
153 
154 	return 0;
155 }
156 
157 static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
158 				   enum rdma_nldev_print_type print_type,
159 				   u32 value)
160 {
161 	if (put_driver_name_print_type(msg, name, print_type))
162 		return -EMSGSIZE;
163 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
164 		return -EMSGSIZE;
165 
166 	return 0;
167 }
168 
169 static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
170 				   enum rdma_nldev_print_type print_type,
171 				   u64 value)
172 {
173 	if (put_driver_name_print_type(msg, name, print_type))
174 		return -EMSGSIZE;
175 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
176 			      RDMA_NLDEV_ATTR_PAD))
177 		return -EMSGSIZE;
178 
179 	return 0;
180 }
181 
182 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
183 {
184 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
185 				       value);
186 }
187 EXPORT_SYMBOL(rdma_nl_put_driver_u32);
188 
189 int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
190 			       u32 value)
191 {
192 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
193 				       value);
194 }
195 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
196 
197 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
198 {
199 	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
200 				       value);
201 }
202 EXPORT_SYMBOL(rdma_nl_put_driver_u64);
203 
204 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
205 {
206 	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
207 				       value);
208 }
209 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
210 
211 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
212 {
213 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
214 		return -EMSGSIZE;
215 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
216 			   dev_name(&device->dev)))
217 		return -EMSGSIZE;
218 
219 	return 0;
220 }
221 
222 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
223 {
224 	char fw[IB_FW_VERSION_NAME_MAX];
225 	int ret = 0;
226 	u8 port;
227 
228 	if (fill_nldev_handle(msg, device))
229 		return -EMSGSIZE;
230 
231 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
232 		return -EMSGSIZE;
233 
234 	BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
235 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
236 			      device->attrs.device_cap_flags,
237 			      RDMA_NLDEV_ATTR_PAD))
238 		return -EMSGSIZE;
239 
240 	ib_get_device_fw_str(device, fw);
241 	/* Device without FW has strlen(fw) = 0 */
242 	if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
243 		return -EMSGSIZE;
244 
245 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
246 			      be64_to_cpu(device->node_guid),
247 			      RDMA_NLDEV_ATTR_PAD))
248 		return -EMSGSIZE;
249 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
250 			      be64_to_cpu(device->attrs.sys_image_guid),
251 			      RDMA_NLDEV_ATTR_PAD))
252 		return -EMSGSIZE;
253 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
254 		return -EMSGSIZE;
255 
256 	/*
257 	 * Link type is determined on first port and mlx4 device
258 	 * which can potentially have two different link type for the same
259 	 * IB device is considered as better to be avoided in the future,
260 	 */
261 	port = rdma_start_port(device);
262 	if (rdma_cap_opa_mad(device, port))
263 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
264 	else if (rdma_protocol_ib(device, port))
265 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
266 	else if (rdma_protocol_iwarp(device, port))
267 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
268 	else if (rdma_protocol_roce(device, port))
269 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
270 	else if (rdma_protocol_usnic(device, port))
271 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
272 				     "usnic");
273 	return ret;
274 }
275 
276 static int fill_port_info(struct sk_buff *msg,
277 			  struct ib_device *device, u32 port,
278 			  const struct net *net)
279 {
280 	struct net_device *netdev = NULL;
281 	struct ib_port_attr attr;
282 	int ret;
283 	u64 cap_flags = 0;
284 
285 	if (fill_nldev_handle(msg, device))
286 		return -EMSGSIZE;
287 
288 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
289 		return -EMSGSIZE;
290 
291 	ret = ib_query_port(device, port, &attr);
292 	if (ret)
293 		return ret;
294 
295 	if (rdma_protocol_ib(device, port)) {
296 		BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
297 				sizeof(attr.port_cap_flags2)) > sizeof(u64));
298 		cap_flags = attr.port_cap_flags |
299 			((u64)attr.port_cap_flags2 << 32);
300 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
301 				      cap_flags, RDMA_NLDEV_ATTR_PAD))
302 			return -EMSGSIZE;
303 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
304 				      attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
305 			return -EMSGSIZE;
306 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
307 			return -EMSGSIZE;
308 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
309 			return -EMSGSIZE;
310 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
311 			return -EMSGSIZE;
312 	}
313 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
314 		return -EMSGSIZE;
315 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
316 		return -EMSGSIZE;
317 
318 	netdev = ib_device_get_netdev(device, port);
319 	if (netdev && net_eq(dev_net(netdev), net)) {
320 		ret = nla_put_u32(msg,
321 				  RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
322 		if (ret)
323 			goto out;
324 		ret = nla_put_string(msg,
325 				     RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
326 	}
327 
328 out:
329 	if (netdev)
330 		dev_put(netdev);
331 	return ret;
332 }
333 
334 static int fill_res_info_entry(struct sk_buff *msg,
335 			       const char *name, u64 curr)
336 {
337 	struct nlattr *entry_attr;
338 
339 	entry_attr = nla_nest_start_noflag(msg,
340 					   RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
341 	if (!entry_attr)
342 		return -EMSGSIZE;
343 
344 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
345 		goto err;
346 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
347 			      RDMA_NLDEV_ATTR_PAD))
348 		goto err;
349 
350 	nla_nest_end(msg, entry_attr);
351 	return 0;
352 
353 err:
354 	nla_nest_cancel(msg, entry_attr);
355 	return -EMSGSIZE;
356 }
357 
358 static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
359 {
360 	static const char * const names[RDMA_RESTRACK_MAX] = {
361 		[RDMA_RESTRACK_PD] = "pd",
362 		[RDMA_RESTRACK_CQ] = "cq",
363 		[RDMA_RESTRACK_QP] = "qp",
364 		[RDMA_RESTRACK_CM_ID] = "cm_id",
365 		[RDMA_RESTRACK_MR] = "mr",
366 		[RDMA_RESTRACK_CTX] = "ctx",
367 	};
368 
369 	struct nlattr *table_attr;
370 	int ret, i, curr;
371 
372 	if (fill_nldev_handle(msg, device))
373 		return -EMSGSIZE;
374 
375 	table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
376 	if (!table_attr)
377 		return -EMSGSIZE;
378 
379 	for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
380 		if (!names[i])
381 			continue;
382 		curr = rdma_restrack_count(device, i,
383 					   task_active_pid_ns(current));
384 		ret = fill_res_info_entry(msg, names[i], curr);
385 		if (ret)
386 			goto err;
387 	}
388 
389 	nla_nest_end(msg, table_attr);
390 	return 0;
391 
392 err:
393 	nla_nest_cancel(msg, table_attr);
394 	return ret;
395 }
396 
397 static int fill_res_name_pid(struct sk_buff *msg,
398 			     struct rdma_restrack_entry *res)
399 {
400 	/*
401 	 * For user resources, user is should read /proc/PID/comm to get the
402 	 * name of the task file.
403 	 */
404 	if (rdma_is_kernel_res(res)) {
405 		if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
406 		    res->kern_name))
407 			return -EMSGSIZE;
408 	} else {
409 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
410 		    task_pid_vnr(res->task)))
411 			return -EMSGSIZE;
412 	}
413 	return 0;
414 }
415 
416 static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg,
417 			   struct rdma_restrack_entry *res)
418 {
419 	if (!dev->ops.fill_res_entry)
420 		return false;
421 	return dev->ops.fill_res_entry(msg, res);
422 }
423 
424 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
425 			     struct rdma_restrack_entry *res, uint32_t port)
426 {
427 	struct ib_qp *qp = container_of(res, struct ib_qp, res);
428 	struct ib_device *dev = qp->device;
429 	struct ib_qp_init_attr qp_init_attr;
430 	struct ib_qp_attr qp_attr;
431 	int ret;
432 
433 	ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
434 	if (ret)
435 		return ret;
436 
437 	if (port && port != qp_attr.port_num)
438 		return -EAGAIN;
439 
440 	/* In create_qp() port is not set yet */
441 	if (qp_attr.port_num &&
442 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
443 		goto err;
444 
445 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
446 		goto err;
447 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
448 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
449 				qp_attr.dest_qp_num))
450 			goto err;
451 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
452 				qp_attr.rq_psn))
453 			goto err;
454 	}
455 
456 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
457 		goto err;
458 
459 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
460 	    qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
461 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
462 			       qp_attr.path_mig_state))
463 			goto err;
464 	}
465 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
466 		goto err;
467 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
468 		goto err;
469 
470 	if (!rdma_is_kernel_res(res) &&
471 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
472 		goto err;
473 
474 	if (fill_res_name_pid(msg, res))
475 		goto err;
476 
477 	if (fill_res_entry(dev, msg, res))
478 		goto err;
479 
480 	return 0;
481 
482 err:	return -EMSGSIZE;
483 }
484 
485 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
486 				struct rdma_restrack_entry *res, uint32_t port)
487 {
488 	struct rdma_id_private *id_priv =
489 				container_of(res, struct rdma_id_private, res);
490 	struct ib_device *dev = id_priv->id.device;
491 	struct rdma_cm_id *cm_id = &id_priv->id;
492 
493 	if (port && port != cm_id->port_num)
494 		return 0;
495 
496 	if (cm_id->port_num &&
497 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
498 		goto err;
499 
500 	if (id_priv->qp_num) {
501 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
502 			goto err;
503 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
504 			goto err;
505 	}
506 
507 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
508 		goto err;
509 
510 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
511 		goto err;
512 
513 	if (cm_id->route.addr.src_addr.ss_family &&
514 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
515 		    sizeof(cm_id->route.addr.src_addr),
516 		    &cm_id->route.addr.src_addr))
517 		goto err;
518 	if (cm_id->route.addr.dst_addr.ss_family &&
519 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
520 		    sizeof(cm_id->route.addr.dst_addr),
521 		    &cm_id->route.addr.dst_addr))
522 		goto err;
523 
524 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
525 		goto err;
526 
527 	if (fill_res_name_pid(msg, res))
528 		goto err;
529 
530 	if (fill_res_entry(dev, msg, res))
531 		goto err;
532 
533 	return 0;
534 
535 err: return -EMSGSIZE;
536 }
537 
538 static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
539 			     struct rdma_restrack_entry *res, uint32_t port)
540 {
541 	struct ib_cq *cq = container_of(res, struct ib_cq, res);
542 	struct ib_device *dev = cq->device;
543 
544 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
545 		goto err;
546 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
547 			      atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
548 		goto err;
549 
550 	/* Poll context is only valid for kernel CQs */
551 	if (rdma_is_kernel_res(res) &&
552 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
553 		goto err;
554 
555 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
556 		goto err;
557 	if (!rdma_is_kernel_res(res) &&
558 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
559 			cq->uobject->context->res.id))
560 		goto err;
561 
562 	if (fill_res_name_pid(msg, res))
563 		goto err;
564 
565 	if (fill_res_entry(dev, msg, res))
566 		goto err;
567 
568 	return 0;
569 
570 err:	return -EMSGSIZE;
571 }
572 
573 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
574 			     struct rdma_restrack_entry *res, uint32_t port)
575 {
576 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
577 	struct ib_device *dev = mr->pd->device;
578 
579 	if (has_cap_net_admin) {
580 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
581 			goto err;
582 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
583 			goto err;
584 	}
585 
586 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
587 			      RDMA_NLDEV_ATTR_PAD))
588 		goto err;
589 
590 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
591 		goto err;
592 
593 	if (!rdma_is_kernel_res(res) &&
594 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
595 		goto err;
596 
597 	if (fill_res_name_pid(msg, res))
598 		goto err;
599 
600 	if (fill_res_entry(dev, msg, res))
601 		goto err;
602 
603 	return 0;
604 
605 err:	return -EMSGSIZE;
606 }
607 
608 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
609 			     struct rdma_restrack_entry *res, uint32_t port)
610 {
611 	struct ib_pd *pd = container_of(res, struct ib_pd, res);
612 	struct ib_device *dev = pd->device;
613 
614 	if (has_cap_net_admin) {
615 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
616 				pd->local_dma_lkey))
617 			goto err;
618 		if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
619 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
620 				pd->unsafe_global_rkey))
621 			goto err;
622 	}
623 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
624 			      atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
625 		goto err;
626 
627 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
628 		goto err;
629 
630 	if (!rdma_is_kernel_res(res) &&
631 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
632 			pd->uobject->context->res.id))
633 		goto err;
634 
635 	if (fill_res_name_pid(msg, res))
636 		goto err;
637 
638 	if (fill_res_entry(dev, msg, res))
639 		goto err;
640 
641 	return 0;
642 
643 err:	return -EMSGSIZE;
644 }
645 
646 static int fill_stat_counter_mode(struct sk_buff *msg,
647 				  struct rdma_counter *counter)
648 {
649 	struct rdma_counter_mode *m = &counter->mode;
650 
651 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
652 		return -EMSGSIZE;
653 
654 	if (m->mode == RDMA_COUNTER_MODE_AUTO)
655 		if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
656 		    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
657 			return -EMSGSIZE;
658 
659 	return 0;
660 }
661 
662 static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
663 {
664 	struct nlattr *entry_attr;
665 
666 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
667 	if (!entry_attr)
668 		return -EMSGSIZE;
669 
670 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
671 		goto err;
672 
673 	nla_nest_end(msg, entry_attr);
674 	return 0;
675 
676 err:
677 	nla_nest_cancel(msg, entry_attr);
678 	return -EMSGSIZE;
679 }
680 
681 static int fill_stat_counter_qps(struct sk_buff *msg,
682 				 struct rdma_counter *counter)
683 {
684 	struct rdma_restrack_entry *res;
685 	struct rdma_restrack_root *rt;
686 	struct nlattr *table_attr;
687 	struct ib_qp *qp = NULL;
688 	unsigned long id = 0;
689 	int ret = 0;
690 
691 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
692 
693 	rt = &counter->device->res[RDMA_RESTRACK_QP];
694 	xa_lock(&rt->xa);
695 	xa_for_each(&rt->xa, id, res) {
696 		if (!rdma_is_visible_in_pid_ns(res))
697 			continue;
698 
699 		qp = container_of(res, struct ib_qp, res);
700 		if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
701 			continue;
702 
703 		if (!qp->counter || (qp->counter->id != counter->id))
704 			continue;
705 
706 		ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
707 		if (ret)
708 			goto err;
709 	}
710 
711 	xa_unlock(&rt->xa);
712 	nla_nest_end(msg, table_attr);
713 	return 0;
714 
715 err:
716 	xa_unlock(&rt->xa);
717 	nla_nest_cancel(msg, table_attr);
718 	return ret;
719 }
720 
721 static int fill_stat_hwcounter_entry(struct sk_buff *msg,
722 				     const char *name, u64 value)
723 {
724 	struct nlattr *entry_attr;
725 
726 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
727 	if (!entry_attr)
728 		return -EMSGSIZE;
729 
730 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
731 			   name))
732 		goto err;
733 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
734 			      value, RDMA_NLDEV_ATTR_PAD))
735 		goto err;
736 
737 	nla_nest_end(msg, entry_attr);
738 	return 0;
739 
740 err:
741 	nla_nest_cancel(msg, entry_attr);
742 	return -EMSGSIZE;
743 }
744 
745 static int fill_stat_counter_hwcounters(struct sk_buff *msg,
746 					struct rdma_counter *counter)
747 {
748 	struct rdma_hw_stats *st = counter->stats;
749 	struct nlattr *table_attr;
750 	int i;
751 
752 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
753 	if (!table_attr)
754 		return -EMSGSIZE;
755 
756 	for (i = 0; i < st->num_counters; i++)
757 		if (fill_stat_hwcounter_entry(msg, st->names[i], st->value[i]))
758 			goto err;
759 
760 	nla_nest_end(msg, table_attr);
761 	return 0;
762 
763 err:
764 	nla_nest_cancel(msg, table_attr);
765 	return -EMSGSIZE;
766 }
767 
768 static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
769 				  struct rdma_restrack_entry *res,
770 				  uint32_t port)
771 {
772 	struct rdma_counter *counter =
773 		container_of(res, struct rdma_counter, res);
774 
775 	if (port && port != counter->port)
776 		return 0;
777 
778 	/* Dump it even query failed */
779 	rdma_counter_query_stats(counter);
780 
781 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
782 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
783 	    fill_res_name_pid(msg, &counter->res) ||
784 	    fill_stat_counter_mode(msg, counter) ||
785 	    fill_stat_counter_qps(msg, counter) ||
786 	    fill_stat_counter_hwcounters(msg, counter))
787 		return -EMSGSIZE;
788 
789 	return 0;
790 }
791 
792 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
793 			  struct netlink_ext_ack *extack)
794 {
795 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
796 	struct ib_device *device;
797 	struct sk_buff *msg;
798 	u32 index;
799 	int err;
800 
801 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
802 				     nldev_policy, extack);
803 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
804 		return -EINVAL;
805 
806 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
807 
808 	device = ib_device_get_by_index(sock_net(skb->sk), index);
809 	if (!device)
810 		return -EINVAL;
811 
812 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
813 	if (!msg) {
814 		err = -ENOMEM;
815 		goto err;
816 	}
817 
818 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
819 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
820 			0, 0);
821 
822 	err = fill_dev_info(msg, device);
823 	if (err)
824 		goto err_free;
825 
826 	nlmsg_end(msg, nlh);
827 
828 	ib_device_put(device);
829 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
830 
831 err_free:
832 	nlmsg_free(msg);
833 err:
834 	ib_device_put(device);
835 	return err;
836 }
837 
838 static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
839 			  struct netlink_ext_ack *extack)
840 {
841 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
842 	struct ib_device *device;
843 	u32 index;
844 	int err;
845 
846 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
847 				     nldev_policy, extack);
848 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
849 		return -EINVAL;
850 
851 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
852 	device = ib_device_get_by_index(sock_net(skb->sk), index);
853 	if (!device)
854 		return -EINVAL;
855 
856 	if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
857 		char name[IB_DEVICE_NAME_MAX] = {};
858 
859 		nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
860 			    IB_DEVICE_NAME_MAX);
861 		err = ib_device_rename(device, name);
862 		goto done;
863 	}
864 
865 	if (tb[RDMA_NLDEV_NET_NS_FD]) {
866 		u32 ns_fd;
867 
868 		ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
869 		err = ib_device_set_netns_put(skb, device, ns_fd);
870 		goto put_done;
871 	}
872 
873 done:
874 	ib_device_put(device);
875 put_done:
876 	return err;
877 }
878 
879 static int _nldev_get_dumpit(struct ib_device *device,
880 			     struct sk_buff *skb,
881 			     struct netlink_callback *cb,
882 			     unsigned int idx)
883 {
884 	int start = cb->args[0];
885 	struct nlmsghdr *nlh;
886 
887 	if (idx < start)
888 		return 0;
889 
890 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
891 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
892 			0, NLM_F_MULTI);
893 
894 	if (fill_dev_info(skb, device)) {
895 		nlmsg_cancel(skb, nlh);
896 		goto out;
897 	}
898 
899 	nlmsg_end(skb, nlh);
900 
901 	idx++;
902 
903 out:	cb->args[0] = idx;
904 	return skb->len;
905 }
906 
907 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
908 {
909 	/*
910 	 * There is no need to take lock, because
911 	 * we are relying on ib_core's locking.
912 	 */
913 	return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
914 }
915 
916 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
917 			       struct netlink_ext_ack *extack)
918 {
919 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
920 	struct ib_device *device;
921 	struct sk_buff *msg;
922 	u32 index;
923 	u32 port;
924 	int err;
925 
926 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
927 				     nldev_policy, extack);
928 	if (err ||
929 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
930 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
931 		return -EINVAL;
932 
933 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
934 	device = ib_device_get_by_index(sock_net(skb->sk), index);
935 	if (!device)
936 		return -EINVAL;
937 
938 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
939 	if (!rdma_is_port_valid(device, port)) {
940 		err = -EINVAL;
941 		goto err;
942 	}
943 
944 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
945 	if (!msg) {
946 		err = -ENOMEM;
947 		goto err;
948 	}
949 
950 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
951 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
952 			0, 0);
953 
954 	err = fill_port_info(msg, device, port, sock_net(skb->sk));
955 	if (err)
956 		goto err_free;
957 
958 	nlmsg_end(msg, nlh);
959 	ib_device_put(device);
960 
961 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
962 
963 err_free:
964 	nlmsg_free(msg);
965 err:
966 	ib_device_put(device);
967 	return err;
968 }
969 
970 static int nldev_port_get_dumpit(struct sk_buff *skb,
971 				 struct netlink_callback *cb)
972 {
973 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
974 	struct ib_device *device;
975 	int start = cb->args[0];
976 	struct nlmsghdr *nlh;
977 	u32 idx = 0;
978 	u32 ifindex;
979 	int err;
980 	unsigned int p;
981 
982 	err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
983 				     nldev_policy, NULL);
984 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
985 		return -EINVAL;
986 
987 	ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
988 	device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
989 	if (!device)
990 		return -EINVAL;
991 
992 	rdma_for_each_port (device, p) {
993 		/*
994 		 * The dumpit function returns all information from specific
995 		 * index. This specific index is taken from the netlink
996 		 * messages request sent by user and it is available
997 		 * in cb->args[0].
998 		 *
999 		 * Usually, the user doesn't fill this field and it causes
1000 		 * to return everything.
1001 		 *
1002 		 */
1003 		if (idx < start) {
1004 			idx++;
1005 			continue;
1006 		}
1007 
1008 		nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
1009 				cb->nlh->nlmsg_seq,
1010 				RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1011 						 RDMA_NLDEV_CMD_PORT_GET),
1012 				0, NLM_F_MULTI);
1013 
1014 		if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
1015 			nlmsg_cancel(skb, nlh);
1016 			goto out;
1017 		}
1018 		idx++;
1019 		nlmsg_end(skb, nlh);
1020 	}
1021 
1022 out:
1023 	ib_device_put(device);
1024 	cb->args[0] = idx;
1025 	return skb->len;
1026 }
1027 
1028 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1029 			      struct netlink_ext_ack *extack)
1030 {
1031 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1032 	struct ib_device *device;
1033 	struct sk_buff *msg;
1034 	u32 index;
1035 	int ret;
1036 
1037 	ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1038 				     nldev_policy, extack);
1039 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1040 		return -EINVAL;
1041 
1042 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1043 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1044 	if (!device)
1045 		return -EINVAL;
1046 
1047 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1048 	if (!msg) {
1049 		ret = -ENOMEM;
1050 		goto err;
1051 	}
1052 
1053 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1054 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1055 			0, 0);
1056 
1057 	ret = fill_res_info(msg, device);
1058 	if (ret)
1059 		goto err_free;
1060 
1061 	nlmsg_end(msg, nlh);
1062 	ib_device_put(device);
1063 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1064 
1065 err_free:
1066 	nlmsg_free(msg);
1067 err:
1068 	ib_device_put(device);
1069 	return ret;
1070 }
1071 
1072 static int _nldev_res_get_dumpit(struct ib_device *device,
1073 				 struct sk_buff *skb,
1074 				 struct netlink_callback *cb,
1075 				 unsigned int idx)
1076 {
1077 	int start = cb->args[0];
1078 	struct nlmsghdr *nlh;
1079 
1080 	if (idx < start)
1081 		return 0;
1082 
1083 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1084 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1085 			0, NLM_F_MULTI);
1086 
1087 	if (fill_res_info(skb, device)) {
1088 		nlmsg_cancel(skb, nlh);
1089 		goto out;
1090 	}
1091 	nlmsg_end(skb, nlh);
1092 
1093 	idx++;
1094 
1095 out:
1096 	cb->args[0] = idx;
1097 	return skb->len;
1098 }
1099 
1100 static int nldev_res_get_dumpit(struct sk_buff *skb,
1101 				struct netlink_callback *cb)
1102 {
1103 	return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
1104 }
1105 
1106 struct nldev_fill_res_entry {
1107 	int (*fill_res_func)(struct sk_buff *msg, bool has_cap_net_admin,
1108 			     struct rdma_restrack_entry *res, u32 port);
1109 	enum rdma_nldev_attr nldev_attr;
1110 	enum rdma_nldev_command nldev_cmd;
1111 	u8 flags;
1112 	u32 entry;
1113 	u32 id;
1114 };
1115 
1116 enum nldev_res_flags {
1117 	NLDEV_PER_DEV = 1 << 0,
1118 };
1119 
1120 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
1121 	[RDMA_RESTRACK_QP] = {
1122 		.fill_res_func = fill_res_qp_entry,
1123 		.nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
1124 		.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
1125 		.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
1126 		.id = RDMA_NLDEV_ATTR_RES_LQPN,
1127 	},
1128 	[RDMA_RESTRACK_CM_ID] = {
1129 		.fill_res_func = fill_res_cm_id_entry,
1130 		.nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
1131 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
1132 		.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
1133 		.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
1134 	},
1135 	[RDMA_RESTRACK_CQ] = {
1136 		.fill_res_func = fill_res_cq_entry,
1137 		.nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
1138 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
1139 		.flags = NLDEV_PER_DEV,
1140 		.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
1141 		.id = RDMA_NLDEV_ATTR_RES_CQN,
1142 	},
1143 	[RDMA_RESTRACK_MR] = {
1144 		.fill_res_func = fill_res_mr_entry,
1145 		.nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
1146 		.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
1147 		.flags = NLDEV_PER_DEV,
1148 		.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
1149 		.id = RDMA_NLDEV_ATTR_RES_MRN,
1150 	},
1151 	[RDMA_RESTRACK_PD] = {
1152 		.fill_res_func = fill_res_pd_entry,
1153 		.nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
1154 		.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
1155 		.flags = NLDEV_PER_DEV,
1156 		.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
1157 		.id = RDMA_NLDEV_ATTR_RES_PDN,
1158 	},
1159 	[RDMA_RESTRACK_COUNTER] = {
1160 		.fill_res_func = fill_res_counter_entry,
1161 		.nldev_cmd = RDMA_NLDEV_CMD_STAT_GET,
1162 		.nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
1163 		.entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
1164 		.id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
1165 	},
1166 };
1167 
1168 static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1169 			       struct netlink_ext_ack *extack,
1170 			       enum rdma_restrack_type res_type)
1171 {
1172 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1173 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1174 	struct rdma_restrack_entry *res;
1175 	struct ib_device *device;
1176 	u32 index, id, port = 0;
1177 	bool has_cap_net_admin;
1178 	struct sk_buff *msg;
1179 	int ret;
1180 
1181 	ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1182 				     nldev_policy, extack);
1183 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
1184 		return -EINVAL;
1185 
1186 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1187 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1188 	if (!device)
1189 		return -EINVAL;
1190 
1191 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1192 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1193 		if (!rdma_is_port_valid(device, port)) {
1194 			ret = -EINVAL;
1195 			goto err;
1196 		}
1197 	}
1198 
1199 	if ((port && fe->flags & NLDEV_PER_DEV) ||
1200 	    (!port && ~fe->flags & NLDEV_PER_DEV)) {
1201 		ret = -EINVAL;
1202 		goto err;
1203 	}
1204 
1205 	id = nla_get_u32(tb[fe->id]);
1206 	res = rdma_restrack_get_byid(device, res_type, id);
1207 	if (IS_ERR(res)) {
1208 		ret = PTR_ERR(res);
1209 		goto err;
1210 	}
1211 
1212 	if (!rdma_is_visible_in_pid_ns(res)) {
1213 		ret = -ENOENT;
1214 		goto err_get;
1215 	}
1216 
1217 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1218 	if (!msg) {
1219 		ret = -ENOMEM;
1220 		goto err;
1221 	}
1222 
1223 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1224 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
1225 			0, 0);
1226 
1227 	if (fill_nldev_handle(msg, device)) {
1228 		ret = -EMSGSIZE;
1229 		goto err_free;
1230 	}
1231 
1232 	has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
1233 	ret = fe->fill_res_func(msg, has_cap_net_admin, res, port);
1234 	rdma_restrack_put(res);
1235 	if (ret)
1236 		goto err_free;
1237 
1238 	nlmsg_end(msg, nlh);
1239 	ib_device_put(device);
1240 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1241 
1242 err_free:
1243 	nlmsg_free(msg);
1244 err_get:
1245 	rdma_restrack_put(res);
1246 err:
1247 	ib_device_put(device);
1248 	return ret;
1249 }
1250 
1251 static int res_get_common_dumpit(struct sk_buff *skb,
1252 				 struct netlink_callback *cb,
1253 				 enum rdma_restrack_type res_type)
1254 {
1255 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1256 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1257 	struct rdma_restrack_entry *res;
1258 	struct rdma_restrack_root *rt;
1259 	int err, ret = 0, idx = 0;
1260 	struct nlattr *table_attr;
1261 	struct nlattr *entry_attr;
1262 	struct ib_device *device;
1263 	int start = cb->args[0];
1264 	bool has_cap_net_admin;
1265 	struct nlmsghdr *nlh;
1266 	unsigned long id;
1267 	u32 index, port = 0;
1268 	bool filled = false;
1269 
1270 	err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1271 				     nldev_policy, NULL);
1272 	/*
1273 	 * Right now, we are expecting the device index to get res information,
1274 	 * but it is possible to extend this code to return all devices in
1275 	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
1276 	 * if it doesn't exist, we will iterate over all devices.
1277 	 *
1278 	 * But it is not needed for now.
1279 	 */
1280 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1281 		return -EINVAL;
1282 
1283 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1284 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1285 	if (!device)
1286 		return -EINVAL;
1287 
1288 	/*
1289 	 * If no PORT_INDEX is supplied, we will return all QPs from that device
1290 	 */
1291 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1292 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1293 		if (!rdma_is_port_valid(device, port)) {
1294 			ret = -EINVAL;
1295 			goto err_index;
1296 		}
1297 	}
1298 
1299 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1300 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
1301 			0, NLM_F_MULTI);
1302 
1303 	if (fill_nldev_handle(skb, device)) {
1304 		ret = -EMSGSIZE;
1305 		goto err;
1306 	}
1307 
1308 	table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
1309 	if (!table_attr) {
1310 		ret = -EMSGSIZE;
1311 		goto err;
1312 	}
1313 
1314 	has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
1315 
1316 	rt = &device->res[res_type];
1317 	xa_lock(&rt->xa);
1318 	/*
1319 	 * FIXME: if the skip ahead is something common this loop should
1320 	 * use xas_for_each & xas_pause to optimize, we can have a lot of
1321 	 * objects.
1322 	 */
1323 	xa_for_each(&rt->xa, id, res) {
1324 		if (!rdma_is_visible_in_pid_ns(res))
1325 			continue;
1326 
1327 		if (idx < start || !rdma_restrack_get(res))
1328 			goto next;
1329 
1330 		xa_unlock(&rt->xa);
1331 
1332 		filled = true;
1333 
1334 		entry_attr = nla_nest_start_noflag(skb, fe->entry);
1335 		if (!entry_attr) {
1336 			ret = -EMSGSIZE;
1337 			rdma_restrack_put(res);
1338 			goto msg_full;
1339 		}
1340 
1341 		ret = fe->fill_res_func(skb, has_cap_net_admin, res, port);
1342 		rdma_restrack_put(res);
1343 
1344 		if (ret) {
1345 			nla_nest_cancel(skb, entry_attr);
1346 			if (ret == -EMSGSIZE)
1347 				goto msg_full;
1348 			if (ret == -EAGAIN)
1349 				goto again;
1350 			goto res_err;
1351 		}
1352 		nla_nest_end(skb, entry_attr);
1353 again:		xa_lock(&rt->xa);
1354 next:		idx++;
1355 	}
1356 	xa_unlock(&rt->xa);
1357 
1358 msg_full:
1359 	nla_nest_end(skb, table_attr);
1360 	nlmsg_end(skb, nlh);
1361 	cb->args[0] = idx;
1362 
1363 	/*
1364 	 * No more entries to fill, cancel the message and
1365 	 * return 0 to mark end of dumpit.
1366 	 */
1367 	if (!filled)
1368 		goto err;
1369 
1370 	ib_device_put(device);
1371 	return skb->len;
1372 
1373 res_err:
1374 	nla_nest_cancel(skb, table_attr);
1375 
1376 err:
1377 	nlmsg_cancel(skb, nlh);
1378 
1379 err_index:
1380 	ib_device_put(device);
1381 	return ret;
1382 }
1383 
1384 #define RES_GET_FUNCS(name, type)                                              \
1385 	static int nldev_res_get_##name##_dumpit(struct sk_buff *skb,          \
1386 						 struct netlink_callback *cb)  \
1387 	{                                                                      \
1388 		return res_get_common_dumpit(skb, cb, type);                   \
1389 	}                                                                      \
1390 	static int nldev_res_get_##name##_doit(struct sk_buff *skb,            \
1391 					       struct nlmsghdr *nlh,           \
1392 					       struct netlink_ext_ack *extack) \
1393 	{                                                                      \
1394 		return res_get_common_doit(skb, nlh, extack, type);            \
1395 	}
1396 
1397 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
1398 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
1399 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
1400 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
1401 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
1402 RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
1403 
1404 static LIST_HEAD(link_ops);
1405 static DECLARE_RWSEM(link_ops_rwsem);
1406 
1407 static const struct rdma_link_ops *link_ops_get(const char *type)
1408 {
1409 	const struct rdma_link_ops *ops;
1410 
1411 	list_for_each_entry(ops, &link_ops, list) {
1412 		if (!strcmp(ops->type, type))
1413 			goto out;
1414 	}
1415 	ops = NULL;
1416 out:
1417 	return ops;
1418 }
1419 
1420 void rdma_link_register(struct rdma_link_ops *ops)
1421 {
1422 	down_write(&link_ops_rwsem);
1423 	if (WARN_ON_ONCE(link_ops_get(ops->type)))
1424 		goto out;
1425 	list_add(&ops->list, &link_ops);
1426 out:
1427 	up_write(&link_ops_rwsem);
1428 }
1429 EXPORT_SYMBOL(rdma_link_register);
1430 
1431 void rdma_link_unregister(struct rdma_link_ops *ops)
1432 {
1433 	down_write(&link_ops_rwsem);
1434 	list_del(&ops->list);
1435 	up_write(&link_ops_rwsem);
1436 }
1437 EXPORT_SYMBOL(rdma_link_unregister);
1438 
1439 static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
1440 			  struct netlink_ext_ack *extack)
1441 {
1442 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1443 	char ibdev_name[IB_DEVICE_NAME_MAX];
1444 	const struct rdma_link_ops *ops;
1445 	char ndev_name[IFNAMSIZ];
1446 	struct net_device *ndev;
1447 	char type[IFNAMSIZ];
1448 	int err;
1449 
1450 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1451 				     nldev_policy, extack);
1452 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
1453 	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
1454 		return -EINVAL;
1455 
1456 	nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1457 		    sizeof(ibdev_name));
1458 	if (strchr(ibdev_name, '%'))
1459 		return -EINVAL;
1460 
1461 	nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
1462 	nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
1463 		    sizeof(ndev_name));
1464 
1465 	ndev = dev_get_by_name(&init_net, ndev_name);
1466 	if (!ndev)
1467 		return -ENODEV;
1468 
1469 	down_read(&link_ops_rwsem);
1470 	ops = link_ops_get(type);
1471 #ifdef CONFIG_MODULES
1472 	if (!ops) {
1473 		up_read(&link_ops_rwsem);
1474 		request_module("rdma-link-%s", type);
1475 		down_read(&link_ops_rwsem);
1476 		ops = link_ops_get(type);
1477 	}
1478 #endif
1479 	err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
1480 	up_read(&link_ops_rwsem);
1481 	dev_put(ndev);
1482 
1483 	return err;
1484 }
1485 
1486 static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
1487 			  struct netlink_ext_ack *extack)
1488 {
1489 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1490 	struct ib_device *device;
1491 	u32 index;
1492 	int err;
1493 
1494 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1495 				     nldev_policy, extack);
1496 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1497 		return -EINVAL;
1498 
1499 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1500 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1501 	if (!device)
1502 		return -EINVAL;
1503 
1504 	if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) {
1505 		ib_device_put(device);
1506 		return -EINVAL;
1507 	}
1508 
1509 	ib_unregister_device_and_put(device);
1510 	return 0;
1511 }
1512 
1513 static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
1514 			     struct netlink_ext_ack *extack)
1515 {
1516 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1517 	char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
1518 	struct ib_client_nl_info data = {};
1519 	struct ib_device *ibdev = NULL;
1520 	struct sk_buff *msg;
1521 	u32 index;
1522 	int err;
1523 
1524 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
1525 			  extack);
1526 	if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
1527 		return -EINVAL;
1528 
1529 	nla_strlcpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
1530 		    sizeof(client_name));
1531 
1532 	if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
1533 		index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1534 		ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
1535 		if (!ibdev)
1536 			return -EINVAL;
1537 
1538 		if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1539 			data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1540 			if (!rdma_is_port_valid(ibdev, data.port)) {
1541 				err = -EINVAL;
1542 				goto out_put;
1543 			}
1544 		} else {
1545 			data.port = -1;
1546 		}
1547 	} else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1548 		return -EINVAL;
1549 	}
1550 
1551 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1552 	if (!msg) {
1553 		err = -ENOMEM;
1554 		goto out_put;
1555 	}
1556 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1557 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1558 					 RDMA_NLDEV_CMD_GET_CHARDEV),
1559 			0, 0);
1560 
1561 	data.nl_msg = msg;
1562 	err = ib_get_client_nl_info(ibdev, client_name, &data);
1563 	if (err)
1564 		goto out_nlmsg;
1565 
1566 	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
1567 				huge_encode_dev(data.cdev->devt),
1568 				RDMA_NLDEV_ATTR_PAD);
1569 	if (err)
1570 		goto out_data;
1571 	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
1572 				RDMA_NLDEV_ATTR_PAD);
1573 	if (err)
1574 		goto out_data;
1575 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
1576 			   dev_name(data.cdev))) {
1577 		err = -EMSGSIZE;
1578 		goto out_data;
1579 	}
1580 
1581 	nlmsg_end(msg, nlh);
1582 	put_device(data.cdev);
1583 	if (ibdev)
1584 		ib_device_put(ibdev);
1585 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1586 
1587 out_data:
1588 	put_device(data.cdev);
1589 out_nlmsg:
1590 	nlmsg_free(msg);
1591 out_put:
1592 	if (ibdev)
1593 		ib_device_put(ibdev);
1594 	return err;
1595 }
1596 
1597 static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1598 			      struct netlink_ext_ack *extack)
1599 {
1600 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1601 	struct sk_buff *msg;
1602 	int err;
1603 
1604 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1605 			  nldev_policy, extack);
1606 	if (err)
1607 		return err;
1608 
1609 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1610 	if (!msg)
1611 		return -ENOMEM;
1612 
1613 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1614 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1615 					 RDMA_NLDEV_CMD_SYS_GET),
1616 			0, 0);
1617 
1618 	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
1619 			 (u8)ib_devices_shared_netns);
1620 	if (err) {
1621 		nlmsg_free(msg);
1622 		return err;
1623 	}
1624 	nlmsg_end(msg, nlh);
1625 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1626 }
1627 
1628 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1629 				  struct netlink_ext_ack *extack)
1630 {
1631 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1632 	u8 enable;
1633 	int err;
1634 
1635 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1636 			  nldev_policy, extack);
1637 	if (err || !tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
1638 		return -EINVAL;
1639 
1640 	enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
1641 	/* Only 0 and 1 are supported */
1642 	if (enable > 1)
1643 		return -EINVAL;
1644 
1645 	err = rdma_compatdev_set(enable);
1646 	return err;
1647 }
1648 
1649 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1650 			       struct netlink_ext_ack *extack)
1651 {
1652 	u32 index, port, mode, mask = 0, qpn, cntn = 0;
1653 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1654 	struct ib_device *device;
1655 	struct sk_buff *msg;
1656 	int ret;
1657 
1658 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1659 			  nldev_policy, extack);
1660 	/* Currently only counter for QP is supported */
1661 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
1662 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
1663 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || !tb[RDMA_NLDEV_ATTR_STAT_MODE])
1664 		return -EINVAL;
1665 
1666 	if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
1667 		return -EINVAL;
1668 
1669 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1670 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1671 	if (!device)
1672 		return -EINVAL;
1673 
1674 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1675 	if (!rdma_is_port_valid(device, port)) {
1676 		ret = -EINVAL;
1677 		goto err;
1678 	}
1679 
1680 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1681 	if (!msg) {
1682 		ret = -ENOMEM;
1683 		goto err;
1684 	}
1685 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1686 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1687 					 RDMA_NLDEV_CMD_STAT_SET),
1688 			0, 0);
1689 
1690 	mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
1691 	if (mode == RDMA_COUNTER_MODE_AUTO) {
1692 		if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
1693 			mask = nla_get_u32(
1694 				tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
1695 
1696 		ret = rdma_counter_set_auto_mode(device, port,
1697 						 mask ? true : false, mask);
1698 		if (ret)
1699 			goto err_msg;
1700 	} else {
1701 		qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
1702 		if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
1703 			cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
1704 			ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
1705 		} else {
1706 			ret = rdma_counter_bind_qpn_alloc(device, port,
1707 							  qpn, &cntn);
1708 		}
1709 		if (ret)
1710 			goto err_msg;
1711 
1712 		if (fill_nldev_handle(msg, device) ||
1713 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
1714 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
1715 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
1716 			ret = -EMSGSIZE;
1717 			goto err_fill;
1718 		}
1719 	}
1720 
1721 	nlmsg_end(msg, nlh);
1722 	ib_device_put(device);
1723 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1724 
1725 err_fill:
1726 	rdma_counter_unbind_qpn(device, port, qpn, cntn);
1727 err_msg:
1728 	nlmsg_free(msg);
1729 err:
1730 	ib_device_put(device);
1731 	return ret;
1732 }
1733 
1734 static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1735 			       struct netlink_ext_ack *extack)
1736 {
1737 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1738 	struct ib_device *device;
1739 	struct sk_buff *msg;
1740 	u32 index, port, qpn, cntn;
1741 	int ret;
1742 
1743 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1744 			  nldev_policy, extack);
1745 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
1746 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
1747 	    !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
1748 	    !tb[RDMA_NLDEV_ATTR_RES_LQPN])
1749 		return -EINVAL;
1750 
1751 	if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
1752 		return -EINVAL;
1753 
1754 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1755 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1756 	if (!device)
1757 		return -EINVAL;
1758 
1759 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1760 	if (!rdma_is_port_valid(device, port)) {
1761 		ret = -EINVAL;
1762 		goto err;
1763 	}
1764 
1765 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1766 	if (!msg) {
1767 		ret = -ENOMEM;
1768 		goto err;
1769 	}
1770 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1771 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1772 					 RDMA_NLDEV_CMD_STAT_SET),
1773 			0, 0);
1774 
1775 	cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
1776 	qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
1777 	ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
1778 	if (ret)
1779 		goto err_unbind;
1780 
1781 	if (fill_nldev_handle(msg, device) ||
1782 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
1783 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
1784 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
1785 		ret = -EMSGSIZE;
1786 		goto err_fill;
1787 	}
1788 
1789 	nlmsg_end(msg, nlh);
1790 	ib_device_put(device);
1791 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1792 
1793 err_fill:
1794 	rdma_counter_bind_qpn(device, port, qpn, cntn);
1795 err_unbind:
1796 	nlmsg_free(msg);
1797 err:
1798 	ib_device_put(device);
1799 	return ret;
1800 }
1801 
1802 static int stat_get_doit_default_counter(struct sk_buff *skb,
1803 					 struct nlmsghdr *nlh,
1804 					 struct netlink_ext_ack *extack,
1805 					 struct nlattr *tb[])
1806 {
1807 	struct rdma_hw_stats *stats;
1808 	struct nlattr *table_attr;
1809 	struct ib_device *device;
1810 	int ret, num_cnts, i;
1811 	struct sk_buff *msg;
1812 	u32 index, port;
1813 	u64 v;
1814 
1815 	if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1816 		return -EINVAL;
1817 
1818 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1819 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1820 	if (!device)
1821 		return -EINVAL;
1822 
1823 	if (!device->ops.alloc_hw_stats || !device->ops.get_hw_stats) {
1824 		ret = -EINVAL;
1825 		goto err;
1826 	}
1827 
1828 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1829 	if (!rdma_is_port_valid(device, port)) {
1830 		ret = -EINVAL;
1831 		goto err;
1832 	}
1833 
1834 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1835 	if (!msg) {
1836 		ret = -ENOMEM;
1837 		goto err;
1838 	}
1839 
1840 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1841 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1842 					 RDMA_NLDEV_CMD_STAT_GET),
1843 			0, 0);
1844 
1845 	if (fill_nldev_handle(msg, device) ||
1846 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
1847 		ret = -EMSGSIZE;
1848 		goto err_msg;
1849 	}
1850 
1851 	stats = device->port_data ? device->port_data[port].hw_stats : NULL;
1852 	if (stats == NULL) {
1853 		ret = -EINVAL;
1854 		goto err_msg;
1855 	}
1856 	mutex_lock(&stats->lock);
1857 
1858 	num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
1859 	if (num_cnts < 0) {
1860 		ret = -EINVAL;
1861 		goto err_stats;
1862 	}
1863 
1864 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
1865 	if (!table_attr) {
1866 		ret = -EMSGSIZE;
1867 		goto err_stats;
1868 	}
1869 	for (i = 0; i < num_cnts; i++) {
1870 		v = stats->value[i] +
1871 			rdma_counter_get_hwstat_value(device, port, i);
1872 		if (fill_stat_hwcounter_entry(msg, stats->names[i], v)) {
1873 			ret = -EMSGSIZE;
1874 			goto err_table;
1875 		}
1876 	}
1877 	nla_nest_end(msg, table_attr);
1878 
1879 	mutex_unlock(&stats->lock);
1880 	nlmsg_end(msg, nlh);
1881 	ib_device_put(device);
1882 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1883 
1884 err_table:
1885 	nla_nest_cancel(msg, table_attr);
1886 err_stats:
1887 	mutex_unlock(&stats->lock);
1888 err_msg:
1889 	nlmsg_free(msg);
1890 err:
1891 	ib_device_put(device);
1892 	return ret;
1893 }
1894 
1895 static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
1896 			    struct netlink_ext_ack *extack, struct nlattr *tb[])
1897 
1898 {
1899 	static enum rdma_nl_counter_mode mode;
1900 	static enum rdma_nl_counter_mask mask;
1901 	struct ib_device *device;
1902 	struct sk_buff *msg;
1903 	u32 index, port;
1904 	int ret;
1905 
1906 	if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
1907 		return nldev_res_get_counter_doit(skb, nlh, extack);
1908 
1909 	if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
1910 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1911 		return -EINVAL;
1912 
1913 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1914 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1915 	if (!device)
1916 		return -EINVAL;
1917 
1918 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1919 	if (!rdma_is_port_valid(device, port)) {
1920 		ret = -EINVAL;
1921 		goto err;
1922 	}
1923 
1924 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1925 	if (!msg) {
1926 		ret = -ENOMEM;
1927 		goto err;
1928 	}
1929 
1930 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1931 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1932 					 RDMA_NLDEV_CMD_STAT_GET),
1933 			0, 0);
1934 
1935 	ret = rdma_counter_get_mode(device, port, &mode, &mask);
1936 	if (ret)
1937 		goto err_msg;
1938 
1939 	if (fill_nldev_handle(msg, device) ||
1940 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
1941 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode))
1942 		goto err_msg;
1943 
1944 	if ((mode == RDMA_COUNTER_MODE_AUTO) &&
1945 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask))
1946 		goto err_msg;
1947 
1948 	nlmsg_end(msg, nlh);
1949 	ib_device_put(device);
1950 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1951 
1952 err_msg:
1953 	nlmsg_free(msg);
1954 err:
1955 	ib_device_put(device);
1956 	return ret;
1957 }
1958 
1959 static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1960 			       struct netlink_ext_ack *extack)
1961 {
1962 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1963 	int ret;
1964 
1965 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1966 			  nldev_policy, extack);
1967 	if (ret)
1968 		return -EINVAL;
1969 
1970 	if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
1971 		return stat_get_doit_default_counter(skb, nlh, extack, tb);
1972 
1973 	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
1974 	case RDMA_NLDEV_ATTR_RES_QP:
1975 		ret = stat_get_doit_qp(skb, nlh, extack, tb);
1976 		break;
1977 
1978 	default:
1979 		ret = -EINVAL;
1980 		break;
1981 	}
1982 
1983 	return ret;
1984 }
1985 
1986 static int nldev_stat_get_dumpit(struct sk_buff *skb,
1987 				 struct netlink_callback *cb)
1988 {
1989 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1990 	int ret;
1991 
1992 	ret = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1993 			  nldev_policy, NULL);
1994 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
1995 		return -EINVAL;
1996 
1997 	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
1998 	case RDMA_NLDEV_ATTR_RES_QP:
1999 		ret = nldev_res_get_counter_dumpit(skb, cb);
2000 		break;
2001 
2002 	default:
2003 		ret = -EINVAL;
2004 		break;
2005 	}
2006 
2007 	return ret;
2008 }
2009 
2010 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
2011 	[RDMA_NLDEV_CMD_GET] = {
2012 		.doit = nldev_get_doit,
2013 		.dump = nldev_get_dumpit,
2014 	},
2015 	[RDMA_NLDEV_CMD_GET_CHARDEV] = {
2016 		.doit = nldev_get_chardev,
2017 	},
2018 	[RDMA_NLDEV_CMD_SET] = {
2019 		.doit = nldev_set_doit,
2020 		.flags = RDMA_NL_ADMIN_PERM,
2021 	},
2022 	[RDMA_NLDEV_CMD_NEWLINK] = {
2023 		.doit = nldev_newlink,
2024 		.flags = RDMA_NL_ADMIN_PERM,
2025 	},
2026 	[RDMA_NLDEV_CMD_DELLINK] = {
2027 		.doit = nldev_dellink,
2028 		.flags = RDMA_NL_ADMIN_PERM,
2029 	},
2030 	[RDMA_NLDEV_CMD_PORT_GET] = {
2031 		.doit = nldev_port_get_doit,
2032 		.dump = nldev_port_get_dumpit,
2033 	},
2034 	[RDMA_NLDEV_CMD_RES_GET] = {
2035 		.doit = nldev_res_get_doit,
2036 		.dump = nldev_res_get_dumpit,
2037 	},
2038 	[RDMA_NLDEV_CMD_RES_QP_GET] = {
2039 		.doit = nldev_res_get_qp_doit,
2040 		.dump = nldev_res_get_qp_dumpit,
2041 	},
2042 	[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
2043 		.doit = nldev_res_get_cm_id_doit,
2044 		.dump = nldev_res_get_cm_id_dumpit,
2045 	},
2046 	[RDMA_NLDEV_CMD_RES_CQ_GET] = {
2047 		.doit = nldev_res_get_cq_doit,
2048 		.dump = nldev_res_get_cq_dumpit,
2049 	},
2050 	[RDMA_NLDEV_CMD_RES_MR_GET] = {
2051 		.doit = nldev_res_get_mr_doit,
2052 		.dump = nldev_res_get_mr_dumpit,
2053 	},
2054 	[RDMA_NLDEV_CMD_RES_PD_GET] = {
2055 		.doit = nldev_res_get_pd_doit,
2056 		.dump = nldev_res_get_pd_dumpit,
2057 	},
2058 	[RDMA_NLDEV_CMD_SYS_GET] = {
2059 		.doit = nldev_sys_get_doit,
2060 	},
2061 	[RDMA_NLDEV_CMD_SYS_SET] = {
2062 		.doit = nldev_set_sys_set_doit,
2063 	},
2064 	[RDMA_NLDEV_CMD_STAT_SET] = {
2065 		.doit = nldev_stat_set_doit,
2066 		.flags = RDMA_NL_ADMIN_PERM,
2067 	},
2068 	[RDMA_NLDEV_CMD_STAT_GET] = {
2069 		.doit = nldev_stat_get_doit,
2070 		.dump = nldev_stat_get_dumpit,
2071 	},
2072 	[RDMA_NLDEV_CMD_STAT_DEL] = {
2073 		.doit = nldev_stat_del_doit,
2074 		.flags = RDMA_NL_ADMIN_PERM,
2075 	},
2076 };
2077 
2078 void __init nldev_init(void)
2079 {
2080 	rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
2081 }
2082 
2083 void __exit nldev_exit(void)
2084 {
2085 	rdma_nl_unregister(RDMA_NL_NLDEV);
2086 }
2087 
2088 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
2089