xref: /linux/drivers/infiniband/core/nldev.c (revision e04e2b760ddbe3d7b283a05898c3a029085cd8cd)
1 /*
2  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. Neither the names of the copyright holders nor the names of its
13  *    contributors may be used to endorse or promote products derived from
14  *    this software without specific prior written permission.
15  *
16  * Alternatively, this software may be distributed under the terms of the
17  * GNU General Public License ("GPL") version 2 as published by the Free
18  * Software Foundation.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <linux/module.h>
34 #include <linux/pid.h>
35 #include <linux/pid_namespace.h>
36 #include <linux/mutex.h>
37 #include <net/netlink.h>
38 #include <rdma/rdma_cm.h>
39 #include <rdma/rdma_netlink.h>
40 
41 #include "core_priv.h"
42 #include "cma_priv.h"
43 #include "restrack.h"
44 #include "uverbs.h"
45 
46 /*
47  * This determines whether a non-privileged user is allowed to specify a
48  * controlled QKEY or not, when true non-privileged user is allowed to specify
49  * a controlled QKEY.
50  */
51 static bool privileged_qkey;
52 
53 typedef int (*res_fill_func_t)(struct sk_buff*, bool,
54 			       struct rdma_restrack_entry*, uint32_t);
55 
56 /*
57  * Sort array elements by the netlink attribute name
58  */
59 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
60 	[RDMA_NLDEV_ATTR_CHARDEV]		= { .type = NLA_U64 },
61 	[RDMA_NLDEV_ATTR_CHARDEV_ABI]		= { .type = NLA_U64 },
62 	[RDMA_NLDEV_ATTR_CHARDEV_NAME]		= { .type = NLA_NUL_STRING,
63 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
64 	[RDMA_NLDEV_ATTR_CHARDEV_TYPE]		= { .type = NLA_NUL_STRING,
65 					.len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
66 	[RDMA_NLDEV_ATTR_DEV_DIM]               = { .type = NLA_U8 },
67 	[RDMA_NLDEV_ATTR_DEV_INDEX]		= { .type = NLA_U32 },
68 	[RDMA_NLDEV_ATTR_DEV_NAME]		= { .type = NLA_NUL_STRING,
69 					.len = IB_DEVICE_NAME_MAX },
70 	[RDMA_NLDEV_ATTR_DEV_NODE_TYPE]		= { .type = NLA_U8 },
71 	[RDMA_NLDEV_ATTR_DEV_PROTOCOL]		= { .type = NLA_NUL_STRING,
72 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
73 	[RDMA_NLDEV_ATTR_DRIVER]		= { .type = NLA_NESTED },
74 	[RDMA_NLDEV_ATTR_DRIVER_ENTRY]		= { .type = NLA_NESTED },
75 	[RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE]	= { .type = NLA_U8 },
76 	[RDMA_NLDEV_ATTR_DRIVER_STRING]		= { .type = NLA_NUL_STRING,
77 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
78 	[RDMA_NLDEV_ATTR_DRIVER_S32]		= { .type = NLA_S32 },
79 	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
80 	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
81 	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
82 	[RDMA_NLDEV_ATTR_FW_VERSION]		= { .type = NLA_NUL_STRING,
83 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
84 	[RDMA_NLDEV_ATTR_LID]			= { .type = NLA_U32 },
85 	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type = NLA_NUL_STRING,
86 					.len = IFNAMSIZ },
87 	[RDMA_NLDEV_ATTR_LMC]			= { .type = NLA_U8 },
88 	[RDMA_NLDEV_ATTR_NDEV_INDEX]		= { .type = NLA_U32 },
89 	[RDMA_NLDEV_ATTR_NDEV_NAME]		= { .type = NLA_NUL_STRING,
90 					.len = IFNAMSIZ },
91 	[RDMA_NLDEV_ATTR_NODE_GUID]		= { .type = NLA_U64 },
92 	[RDMA_NLDEV_ATTR_PORT_INDEX]		= { .type = NLA_U32 },
93 	[RDMA_NLDEV_ATTR_PORT_PHYS_STATE]	= { .type = NLA_U8 },
94 	[RDMA_NLDEV_ATTR_PORT_STATE]		= { .type = NLA_U8 },
95 	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type = NLA_NESTED },
96 	[RDMA_NLDEV_ATTR_RES_CM_IDN]		= { .type = NLA_U32 },
97 	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type = NLA_NESTED },
98 	[RDMA_NLDEV_ATTR_RES_CQ]		= { .type = NLA_NESTED },
99 	[RDMA_NLDEV_ATTR_RES_CQE]		= { .type = NLA_U32 },
100 	[RDMA_NLDEV_ATTR_RES_CQN]		= { .type = NLA_U32 },
101 	[RDMA_NLDEV_ATTR_RES_CQ_ENTRY]		= { .type = NLA_NESTED },
102 	[RDMA_NLDEV_ATTR_RES_CTX]		= { .type = NLA_NESTED },
103 	[RDMA_NLDEV_ATTR_RES_CTXN]		= { .type = NLA_U32 },
104 	[RDMA_NLDEV_ATTR_RES_CTX_ENTRY]		= { .type = NLA_NESTED },
105 	[RDMA_NLDEV_ATTR_RES_DST_ADDR]		= {
106 			.len = sizeof(struct __kernel_sockaddr_storage) },
107 	[RDMA_NLDEV_ATTR_RES_IOVA]		= { .type = NLA_U64 },
108 	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING,
109 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
110 	[RDMA_NLDEV_ATTR_RES_LKEY]		= { .type = NLA_U32 },
111 	[RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]	= { .type = NLA_U32 },
112 	[RDMA_NLDEV_ATTR_RES_LQPN]		= { .type = NLA_U32 },
113 	[RDMA_NLDEV_ATTR_RES_MR]		= { .type = NLA_NESTED },
114 	[RDMA_NLDEV_ATTR_RES_MRLEN]		= { .type = NLA_U64 },
115 	[RDMA_NLDEV_ATTR_RES_MRN]		= { .type = NLA_U32 },
116 	[RDMA_NLDEV_ATTR_RES_MR_ENTRY]		= { .type = NLA_NESTED },
117 	[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE]	= { .type = NLA_U8 },
118 	[RDMA_NLDEV_ATTR_RES_PD]		= { .type = NLA_NESTED },
119 	[RDMA_NLDEV_ATTR_RES_PDN]		= { .type = NLA_U32 },
120 	[RDMA_NLDEV_ATTR_RES_PD_ENTRY]		= { .type = NLA_NESTED },
121 	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
122 	[RDMA_NLDEV_ATTR_RES_POLL_CTX]		= { .type = NLA_U8 },
123 	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
124 	[RDMA_NLDEV_ATTR_RES_QP]		= { .type = NLA_NESTED },
125 	[RDMA_NLDEV_ATTR_RES_QP_ENTRY]		= { .type = NLA_NESTED },
126 	[RDMA_NLDEV_ATTR_RES_RAW]		= { .type = NLA_BINARY },
127 	[RDMA_NLDEV_ATTR_RES_RKEY]		= { .type = NLA_U32 },
128 	[RDMA_NLDEV_ATTR_RES_RQPN]		= { .type = NLA_U32 },
129 	[RDMA_NLDEV_ATTR_RES_RQ_PSN]		= { .type = NLA_U32 },
130 	[RDMA_NLDEV_ATTR_RES_SQ_PSN]		= { .type = NLA_U32 },
131 	[RDMA_NLDEV_ATTR_RES_SRC_ADDR]		= {
132 			.len = sizeof(struct __kernel_sockaddr_storage) },
133 	[RDMA_NLDEV_ATTR_RES_STATE]		= { .type = NLA_U8 },
134 	[RDMA_NLDEV_ATTR_RES_SUMMARY]		= { .type = NLA_NESTED },
135 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]	= { .type = NLA_NESTED },
136 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
137 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
138 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
139 	[RDMA_NLDEV_ATTR_RES_TYPE]		= { .type = NLA_U8 },
140 	[RDMA_NLDEV_ATTR_RES_SUBTYPE]		= { .type = NLA_NUL_STRING,
141 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
142 	[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
143 	[RDMA_NLDEV_ATTR_RES_USECNT]		= { .type = NLA_U64 },
144 	[RDMA_NLDEV_ATTR_RES_SRQ]		= { .type = NLA_NESTED },
145 	[RDMA_NLDEV_ATTR_RES_SRQN]		= { .type = NLA_U32 },
146 	[RDMA_NLDEV_ATTR_RES_SRQ_ENTRY]		= { .type = NLA_NESTED },
147 	[RDMA_NLDEV_ATTR_MIN_RANGE]		= { .type = NLA_U32 },
148 	[RDMA_NLDEV_ATTR_MAX_RANGE]		= { .type = NLA_U32 },
149 	[RDMA_NLDEV_ATTR_SM_LID]		= { .type = NLA_U32 },
150 	[RDMA_NLDEV_ATTR_SUBNET_PREFIX]		= { .type = NLA_U64 },
151 	[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]	= { .type = NLA_U32 },
152 	[RDMA_NLDEV_ATTR_STAT_MODE]		= { .type = NLA_U32 },
153 	[RDMA_NLDEV_ATTR_STAT_RES]		= { .type = NLA_U32 },
154 	[RDMA_NLDEV_ATTR_STAT_COUNTER]		= { .type = NLA_NESTED },
155 	[RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY]	= { .type = NLA_NESTED },
156 	[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]       = { .type = NLA_U32 },
157 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]       = { .type = NLA_NESTED },
158 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY]  = { .type = NLA_NESTED },
159 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
160 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
161 	[RDMA_NLDEV_ATTR_SYS_IMAGE_GUID]	= { .type = NLA_U64 },
162 	[RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID]	= { .type = NLA_U32 },
163 	[RDMA_NLDEV_NET_NS_FD]			= { .type = NLA_U32 },
164 	[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]	= { .type = NLA_U8 },
165 	[RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK]	= { .type = NLA_U8 },
166 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX]	= { .type = NLA_U32 },
167 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 },
168 	[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE] = { .type = NLA_U8 },
169 	[RDMA_NLDEV_ATTR_DRIVER_DETAILS]	= { .type = NLA_U8 },
170 	[RDMA_NLDEV_ATTR_DEV_TYPE]		= { .type = NLA_U8 },
171 	[RDMA_NLDEV_ATTR_PARENT_NAME]		= { .type = NLA_NUL_STRING },
172 	[RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE]	= { .type = NLA_U8 },
173 };
174 
175 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
176 				      enum rdma_nldev_print_type print_type)
177 {
178 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
179 		return -EMSGSIZE;
180 	if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
181 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
182 		return -EMSGSIZE;
183 
184 	return 0;
185 }
186 
187 static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
188 				   enum rdma_nldev_print_type print_type,
189 				   u32 value)
190 {
191 	if (put_driver_name_print_type(msg, name, print_type))
192 		return -EMSGSIZE;
193 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
194 		return -EMSGSIZE;
195 
196 	return 0;
197 }
198 
199 static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
200 				   enum rdma_nldev_print_type print_type,
201 				   u64 value)
202 {
203 	if (put_driver_name_print_type(msg, name, print_type))
204 		return -EMSGSIZE;
205 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
206 			      RDMA_NLDEV_ATTR_PAD))
207 		return -EMSGSIZE;
208 
209 	return 0;
210 }
211 
212 int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name,
213 			      const char *str)
214 {
215 	if (put_driver_name_print_type(msg, name,
216 				       RDMA_NLDEV_PRINT_TYPE_UNSPEC))
217 		return -EMSGSIZE;
218 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str))
219 		return -EMSGSIZE;
220 
221 	return 0;
222 }
223 EXPORT_SYMBOL(rdma_nl_put_driver_string);
224 
225 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
226 {
227 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
228 				       value);
229 }
230 EXPORT_SYMBOL(rdma_nl_put_driver_u32);
231 
232 int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
233 			       u32 value)
234 {
235 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
236 				       value);
237 }
238 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
239 
240 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
241 {
242 	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
243 				       value);
244 }
245 EXPORT_SYMBOL(rdma_nl_put_driver_u64);
246 
247 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
248 {
249 	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
250 				       value);
251 }
252 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
253 
254 bool rdma_nl_get_privileged_qkey(void)
255 {
256 	return privileged_qkey || capable(CAP_NET_RAW);
257 }
258 EXPORT_SYMBOL(rdma_nl_get_privileged_qkey);
259 
260 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
261 {
262 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
263 		return -EMSGSIZE;
264 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
265 			   dev_name(&device->dev)))
266 		return -EMSGSIZE;
267 
268 	return 0;
269 }
270 
271 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
272 {
273 	char fw[IB_FW_VERSION_NAME_MAX];
274 	int ret = 0;
275 	u32 port;
276 
277 	if (fill_nldev_handle(msg, device))
278 		return -EMSGSIZE;
279 
280 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
281 		return -EMSGSIZE;
282 
283 	BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
284 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
285 			      device->attrs.device_cap_flags,
286 			      RDMA_NLDEV_ATTR_PAD))
287 		return -EMSGSIZE;
288 
289 	ib_get_device_fw_str(device, fw);
290 	/* Device without FW has strlen(fw) = 0 */
291 	if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
292 		return -EMSGSIZE;
293 
294 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
295 			      be64_to_cpu(device->node_guid),
296 			      RDMA_NLDEV_ATTR_PAD))
297 		return -EMSGSIZE;
298 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
299 			      be64_to_cpu(device->attrs.sys_image_guid),
300 			      RDMA_NLDEV_ATTR_PAD))
301 		return -EMSGSIZE;
302 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
303 		return -EMSGSIZE;
304 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim))
305 		return -EMSGSIZE;
306 
307 	if (device->type &&
308 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_TYPE, device->type))
309 		return -EMSGSIZE;
310 
311 	if (device->parent &&
312 	    nla_put_string(msg, RDMA_NLDEV_ATTR_PARENT_NAME,
313 			   dev_name(&device->parent->dev)))
314 		return -EMSGSIZE;
315 
316 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE,
317 		       device->name_assign_type))
318 		return -EMSGSIZE;
319 
320 	/*
321 	 * Link type is determined on first port and mlx4 device
322 	 * which can potentially have two different link type for the same
323 	 * IB device is considered as better to be avoided in the future,
324 	 */
325 	port = rdma_start_port(device);
326 	if (rdma_cap_opa_mad(device, port))
327 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
328 	else if (rdma_protocol_ib(device, port))
329 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
330 	else if (rdma_protocol_iwarp(device, port))
331 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
332 	else if (rdma_protocol_roce(device, port))
333 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
334 	else if (rdma_protocol_usnic(device, port))
335 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
336 				     "usnic");
337 	return ret;
338 }
339 
340 static int fill_port_info(struct sk_buff *msg,
341 			  struct ib_device *device, u32 port,
342 			  const struct net *net)
343 {
344 	struct net_device *netdev = NULL;
345 	struct ib_port_attr attr;
346 	int ret;
347 	u64 cap_flags = 0;
348 
349 	if (fill_nldev_handle(msg, device))
350 		return -EMSGSIZE;
351 
352 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
353 		return -EMSGSIZE;
354 
355 	ret = ib_query_port(device, port, &attr);
356 	if (ret)
357 		return ret;
358 
359 	if (rdma_protocol_ib(device, port)) {
360 		BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
361 				sizeof(attr.port_cap_flags2)) > sizeof(u64));
362 		cap_flags = attr.port_cap_flags |
363 			((u64)attr.port_cap_flags2 << 32);
364 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
365 				      cap_flags, RDMA_NLDEV_ATTR_PAD))
366 			return -EMSGSIZE;
367 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
368 				      attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
369 			return -EMSGSIZE;
370 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
371 			return -EMSGSIZE;
372 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
373 			return -EMSGSIZE;
374 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
375 			return -EMSGSIZE;
376 	}
377 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
378 		return -EMSGSIZE;
379 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
380 		return -EMSGSIZE;
381 
382 	netdev = ib_device_get_netdev(device, port);
383 	if (netdev && net_eq(dev_net(netdev), net)) {
384 		ret = nla_put_u32(msg,
385 				  RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
386 		if (ret)
387 			goto out;
388 		ret = nla_put_string(msg,
389 				     RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
390 	}
391 
392 out:
393 	dev_put(netdev);
394 	return ret;
395 }
396 
397 static int fill_res_info_entry(struct sk_buff *msg,
398 			       const char *name, u64 curr)
399 {
400 	struct nlattr *entry_attr;
401 
402 	entry_attr = nla_nest_start_noflag(msg,
403 					   RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
404 	if (!entry_attr)
405 		return -EMSGSIZE;
406 
407 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
408 		goto err;
409 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
410 			      RDMA_NLDEV_ATTR_PAD))
411 		goto err;
412 
413 	nla_nest_end(msg, entry_attr);
414 	return 0;
415 
416 err:
417 	nla_nest_cancel(msg, entry_attr);
418 	return -EMSGSIZE;
419 }
420 
421 static int fill_res_info(struct sk_buff *msg, struct ib_device *device,
422 			 bool show_details)
423 {
424 	static const char * const names[RDMA_RESTRACK_MAX] = {
425 		[RDMA_RESTRACK_PD] = "pd",
426 		[RDMA_RESTRACK_CQ] = "cq",
427 		[RDMA_RESTRACK_QP] = "qp",
428 		[RDMA_RESTRACK_CM_ID] = "cm_id",
429 		[RDMA_RESTRACK_MR] = "mr",
430 		[RDMA_RESTRACK_CTX] = "ctx",
431 		[RDMA_RESTRACK_SRQ] = "srq",
432 	};
433 
434 	struct nlattr *table_attr;
435 	int ret, i, curr;
436 
437 	if (fill_nldev_handle(msg, device))
438 		return -EMSGSIZE;
439 
440 	table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
441 	if (!table_attr)
442 		return -EMSGSIZE;
443 
444 	for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
445 		if (!names[i])
446 			continue;
447 		curr = rdma_restrack_count(device, i, show_details);
448 		ret = fill_res_info_entry(msg, names[i], curr);
449 		if (ret)
450 			goto err;
451 	}
452 
453 	nla_nest_end(msg, table_attr);
454 	return 0;
455 
456 err:
457 	nla_nest_cancel(msg, table_attr);
458 	return ret;
459 }
460 
461 static int fill_res_name_pid(struct sk_buff *msg,
462 			     struct rdma_restrack_entry *res)
463 {
464 	int err = 0;
465 
466 	/*
467 	 * For user resources, user is should read /proc/PID/comm to get the
468 	 * name of the task file.
469 	 */
470 	if (rdma_is_kernel_res(res)) {
471 		err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
472 				     res->kern_name);
473 	} else {
474 		pid_t pid;
475 
476 		pid = task_pid_vnr(res->task);
477 		/*
478 		 * Task is dead and in zombie state.
479 		 * There is no need to print PID anymore.
480 		 */
481 		if (pid)
482 			/*
483 			 * This part is racy, task can be killed and PID will
484 			 * be zero right here but it is ok, next query won't
485 			 * return PID. We don't promise real-time reflection
486 			 * of SW objects.
487 			 */
488 			err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid);
489 	}
490 
491 	return err ? -EMSGSIZE : 0;
492 }
493 
494 static int fill_res_qp_entry_query(struct sk_buff *msg,
495 				   struct rdma_restrack_entry *res,
496 				   struct ib_device *dev,
497 				   struct ib_qp *qp)
498 {
499 	struct ib_qp_init_attr qp_init_attr;
500 	struct ib_qp_attr qp_attr;
501 	int ret;
502 
503 	ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
504 	if (ret)
505 		return ret;
506 
507 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
508 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
509 				qp_attr.dest_qp_num))
510 			goto err;
511 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
512 				qp_attr.rq_psn))
513 			goto err;
514 	}
515 
516 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
517 		goto err;
518 
519 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
520 	    qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
521 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
522 			       qp_attr.path_mig_state))
523 			goto err;
524 	}
525 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
526 		goto err;
527 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
528 		goto err;
529 
530 	if (dev->ops.fill_res_qp_entry)
531 		return dev->ops.fill_res_qp_entry(msg, qp);
532 	return 0;
533 
534 err:	return -EMSGSIZE;
535 }
536 
537 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
538 			     struct rdma_restrack_entry *res, uint32_t port)
539 {
540 	struct ib_qp *qp = container_of(res, struct ib_qp, res);
541 	struct ib_device *dev = qp->device;
542 	int ret;
543 
544 	if (port && port != qp->port)
545 		return -EAGAIN;
546 
547 	/* In create_qp() port is not set yet */
548 	if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
549 		return -EMSGSIZE;
550 
551 	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
552 	if (ret)
553 		return -EMSGSIZE;
554 
555 	if (!rdma_is_kernel_res(res) &&
556 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
557 		return -EMSGSIZE;
558 
559 	ret = fill_res_name_pid(msg, res);
560 	if (ret)
561 		return -EMSGSIZE;
562 
563 	return fill_res_qp_entry_query(msg, res, dev, qp);
564 }
565 
566 static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
567 				 struct rdma_restrack_entry *res, uint32_t port)
568 {
569 	struct ib_qp *qp = container_of(res, struct ib_qp, res);
570 	struct ib_device *dev = qp->device;
571 
572 	if (port && port != qp->port)
573 		return -EAGAIN;
574 	if (!dev->ops.fill_res_qp_entry_raw)
575 		return -EINVAL;
576 	return dev->ops.fill_res_qp_entry_raw(msg, qp);
577 }
578 
579 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
580 				struct rdma_restrack_entry *res, uint32_t port)
581 {
582 	struct rdma_id_private *id_priv =
583 				container_of(res, struct rdma_id_private, res);
584 	struct ib_device *dev = id_priv->id.device;
585 	struct rdma_cm_id *cm_id = &id_priv->id;
586 
587 	if (port && port != cm_id->port_num)
588 		return -EAGAIN;
589 
590 	if (cm_id->port_num &&
591 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
592 		goto err;
593 
594 	if (id_priv->qp_num) {
595 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
596 			goto err;
597 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
598 			goto err;
599 	}
600 
601 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
602 		goto err;
603 
604 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
605 		goto err;
606 
607 	if (cm_id->route.addr.src_addr.ss_family &&
608 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
609 		    sizeof(cm_id->route.addr.src_addr),
610 		    &cm_id->route.addr.src_addr))
611 		goto err;
612 	if (cm_id->route.addr.dst_addr.ss_family &&
613 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
614 		    sizeof(cm_id->route.addr.dst_addr),
615 		    &cm_id->route.addr.dst_addr))
616 		goto err;
617 
618 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
619 		goto err;
620 
621 	if (fill_res_name_pid(msg, res))
622 		goto err;
623 
624 	if (dev->ops.fill_res_cm_id_entry)
625 		return dev->ops.fill_res_cm_id_entry(msg, cm_id);
626 	return 0;
627 
628 err: return -EMSGSIZE;
629 }
630 
631 static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
632 			     struct rdma_restrack_entry *res, uint32_t port)
633 {
634 	struct ib_cq *cq = container_of(res, struct ib_cq, res);
635 	struct ib_device *dev = cq->device;
636 
637 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
638 		return -EMSGSIZE;
639 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
640 			      atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
641 		return -EMSGSIZE;
642 
643 	/* Poll context is only valid for kernel CQs */
644 	if (rdma_is_kernel_res(res) &&
645 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
646 		return -EMSGSIZE;
647 
648 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
649 		return -EMSGSIZE;
650 
651 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
652 		return -EMSGSIZE;
653 	if (!rdma_is_kernel_res(res) &&
654 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
655 			cq->uobject->uevent.uobject.context->res.id))
656 		return -EMSGSIZE;
657 
658 	if (fill_res_name_pid(msg, res))
659 		return -EMSGSIZE;
660 
661 	return (dev->ops.fill_res_cq_entry) ?
662 		dev->ops.fill_res_cq_entry(msg, cq) : 0;
663 }
664 
665 static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
666 				 struct rdma_restrack_entry *res, uint32_t port)
667 {
668 	struct ib_cq *cq = container_of(res, struct ib_cq, res);
669 	struct ib_device *dev = cq->device;
670 
671 	if (!dev->ops.fill_res_cq_entry_raw)
672 		return -EINVAL;
673 	return dev->ops.fill_res_cq_entry_raw(msg, cq);
674 }
675 
676 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
677 			     struct rdma_restrack_entry *res, uint32_t port)
678 {
679 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
680 	struct ib_device *dev = mr->pd->device;
681 
682 	if (has_cap_net_admin) {
683 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
684 			return -EMSGSIZE;
685 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
686 			return -EMSGSIZE;
687 	}
688 
689 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
690 			      RDMA_NLDEV_ATTR_PAD))
691 		return -EMSGSIZE;
692 
693 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
694 		return -EMSGSIZE;
695 
696 	if (!rdma_is_kernel_res(res) &&
697 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
698 		return -EMSGSIZE;
699 
700 	if (fill_res_name_pid(msg, res))
701 		return -EMSGSIZE;
702 
703 	return (dev->ops.fill_res_mr_entry) ?
704 		       dev->ops.fill_res_mr_entry(msg, mr) :
705 		       0;
706 }
707 
708 static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
709 				 struct rdma_restrack_entry *res, uint32_t port)
710 {
711 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
712 	struct ib_device *dev = mr->pd->device;
713 
714 	if (!dev->ops.fill_res_mr_entry_raw)
715 		return -EINVAL;
716 	return dev->ops.fill_res_mr_entry_raw(msg, mr);
717 }
718 
719 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
720 			     struct rdma_restrack_entry *res, uint32_t port)
721 {
722 	struct ib_pd *pd = container_of(res, struct ib_pd, res);
723 
724 	if (has_cap_net_admin) {
725 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
726 				pd->local_dma_lkey))
727 			goto err;
728 		if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
729 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
730 				pd->unsafe_global_rkey))
731 			goto err;
732 	}
733 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
734 			      atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
735 		goto err;
736 
737 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
738 		goto err;
739 
740 	if (!rdma_is_kernel_res(res) &&
741 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
742 			pd->uobject->context->res.id))
743 		goto err;
744 
745 	return fill_res_name_pid(msg, res);
746 
747 err:	return -EMSGSIZE;
748 }
749 
750 static int fill_res_ctx_entry(struct sk_buff *msg, bool has_cap_net_admin,
751 			      struct rdma_restrack_entry *res, uint32_t port)
752 {
753 	struct ib_ucontext *ctx = container_of(res, struct ib_ucontext, res);
754 
755 	if (rdma_is_kernel_res(res))
756 		return 0;
757 
758 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, ctx->res.id))
759 		return -EMSGSIZE;
760 
761 	return fill_res_name_pid(msg, res);
762 }
763 
764 static int fill_res_range_qp_entry(struct sk_buff *msg, uint32_t min_range,
765 				   uint32_t max_range)
766 {
767 	struct nlattr *entry_attr;
768 
769 	if (!min_range)
770 		return 0;
771 
772 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
773 	if (!entry_attr)
774 		return -EMSGSIZE;
775 
776 	if (min_range == max_range) {
777 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, min_range))
778 			goto err;
779 	} else {
780 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MIN_RANGE, min_range))
781 			goto err;
782 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MAX_RANGE, max_range))
783 			goto err;
784 	}
785 	nla_nest_end(msg, entry_attr);
786 	return 0;
787 
788 err:
789 	nla_nest_cancel(msg, entry_attr);
790 	return -EMSGSIZE;
791 }
792 
793 static int fill_res_srq_qps(struct sk_buff *msg, struct ib_srq *srq)
794 {
795 	uint32_t min_range = 0, prev = 0;
796 	struct rdma_restrack_entry *res;
797 	struct rdma_restrack_root *rt;
798 	struct nlattr *table_attr;
799 	struct ib_qp *qp = NULL;
800 	unsigned long id = 0;
801 
802 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
803 	if (!table_attr)
804 		return -EMSGSIZE;
805 
806 	rt = &srq->device->res[RDMA_RESTRACK_QP];
807 	xa_lock(&rt->xa);
808 	xa_for_each(&rt->xa, id, res) {
809 		if (!rdma_restrack_get(res))
810 			continue;
811 
812 		qp = container_of(res, struct ib_qp, res);
813 		if (!qp->srq || (qp->srq->res.id != srq->res.id)) {
814 			rdma_restrack_put(res);
815 			continue;
816 		}
817 
818 		if (qp->qp_num < prev)
819 			/* qp_num should be ascending */
820 			goto err_loop;
821 
822 		if (min_range == 0) {
823 			min_range = qp->qp_num;
824 		} else if (qp->qp_num > (prev + 1)) {
825 			if (fill_res_range_qp_entry(msg, min_range, prev))
826 				goto err_loop;
827 
828 			min_range = qp->qp_num;
829 		}
830 		prev = qp->qp_num;
831 		rdma_restrack_put(res);
832 	}
833 
834 	xa_unlock(&rt->xa);
835 
836 	if (fill_res_range_qp_entry(msg, min_range, prev))
837 		goto err;
838 
839 	nla_nest_end(msg, table_attr);
840 	return 0;
841 
842 err_loop:
843 	rdma_restrack_put(res);
844 	xa_unlock(&rt->xa);
845 err:
846 	nla_nest_cancel(msg, table_attr);
847 	return -EMSGSIZE;
848 }
849 
850 static int fill_res_srq_entry(struct sk_buff *msg, bool has_cap_net_admin,
851 			      struct rdma_restrack_entry *res, uint32_t port)
852 {
853 	struct ib_srq *srq = container_of(res, struct ib_srq, res);
854 	struct ib_device *dev = srq->device;
855 
856 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SRQN, srq->res.id))
857 		goto err;
858 
859 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, srq->srq_type))
860 		goto err;
861 
862 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, srq->pd->res.id))
863 		goto err;
864 
865 	if (ib_srq_has_cq(srq->srq_type)) {
866 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN,
867 				srq->ext.cq->res.id))
868 			goto err;
869 	}
870 
871 	if (fill_res_srq_qps(msg, srq))
872 		goto err;
873 
874 	if (fill_res_name_pid(msg, res))
875 		goto err;
876 
877 	if (dev->ops.fill_res_srq_entry)
878 		return dev->ops.fill_res_srq_entry(msg, srq);
879 
880 	return 0;
881 
882 err:
883 	return -EMSGSIZE;
884 }
885 
886 static int fill_res_srq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
887 				 struct rdma_restrack_entry *res, uint32_t port)
888 {
889 	struct ib_srq *srq = container_of(res, struct ib_srq, res);
890 	struct ib_device *dev = srq->device;
891 
892 	if (!dev->ops.fill_res_srq_entry_raw)
893 		return -EINVAL;
894 	return dev->ops.fill_res_srq_entry_raw(msg, srq);
895 }
896 
897 static int fill_stat_counter_mode(struct sk_buff *msg,
898 				  struct rdma_counter *counter)
899 {
900 	struct rdma_counter_mode *m = &counter->mode;
901 
902 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
903 		return -EMSGSIZE;
904 
905 	if (m->mode == RDMA_COUNTER_MODE_AUTO) {
906 		if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
907 		    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
908 			return -EMSGSIZE;
909 
910 		if ((m->mask & RDMA_COUNTER_MASK_PID) &&
911 		    fill_res_name_pid(msg, &counter->res))
912 			return -EMSGSIZE;
913 	}
914 
915 	return 0;
916 }
917 
918 static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
919 {
920 	struct nlattr *entry_attr;
921 
922 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
923 	if (!entry_attr)
924 		return -EMSGSIZE;
925 
926 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
927 		goto err;
928 
929 	nla_nest_end(msg, entry_attr);
930 	return 0;
931 
932 err:
933 	nla_nest_cancel(msg, entry_attr);
934 	return -EMSGSIZE;
935 }
936 
937 static int fill_stat_counter_qps(struct sk_buff *msg,
938 				 struct rdma_counter *counter)
939 {
940 	struct rdma_restrack_entry *res;
941 	struct rdma_restrack_root *rt;
942 	struct nlattr *table_attr;
943 	struct ib_qp *qp = NULL;
944 	unsigned long id = 0;
945 	int ret = 0;
946 
947 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
948 	if (!table_attr)
949 		return -EMSGSIZE;
950 
951 	rt = &counter->device->res[RDMA_RESTRACK_QP];
952 	xa_lock(&rt->xa);
953 	xa_for_each(&rt->xa, id, res) {
954 		qp = container_of(res, struct ib_qp, res);
955 		if (!qp->counter || (qp->counter->id != counter->id))
956 			continue;
957 
958 		ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
959 		if (ret)
960 			goto err;
961 	}
962 
963 	xa_unlock(&rt->xa);
964 	nla_nest_end(msg, table_attr);
965 	return 0;
966 
967 err:
968 	xa_unlock(&rt->xa);
969 	nla_nest_cancel(msg, table_attr);
970 	return ret;
971 }
972 
973 int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
974 				 u64 value)
975 {
976 	struct nlattr *entry_attr;
977 
978 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
979 	if (!entry_attr)
980 		return -EMSGSIZE;
981 
982 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
983 			   name))
984 		goto err;
985 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
986 			      value, RDMA_NLDEV_ATTR_PAD))
987 		goto err;
988 
989 	nla_nest_end(msg, entry_attr);
990 	return 0;
991 
992 err:
993 	nla_nest_cancel(msg, entry_attr);
994 	return -EMSGSIZE;
995 }
996 EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry);
997 
998 static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
999 			      struct rdma_restrack_entry *res, uint32_t port)
1000 {
1001 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
1002 	struct ib_device *dev = mr->pd->device;
1003 
1004 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
1005 		goto err;
1006 
1007 	if (dev->ops.fill_stat_mr_entry)
1008 		return dev->ops.fill_stat_mr_entry(msg, mr);
1009 	return 0;
1010 
1011 err:
1012 	return -EMSGSIZE;
1013 }
1014 
1015 static int fill_stat_counter_hwcounters(struct sk_buff *msg,
1016 					struct rdma_counter *counter)
1017 {
1018 	struct rdma_hw_stats *st = counter->stats;
1019 	struct nlattr *table_attr;
1020 	int i;
1021 
1022 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
1023 	if (!table_attr)
1024 		return -EMSGSIZE;
1025 
1026 	mutex_lock(&st->lock);
1027 	for (i = 0; i < st->num_counters; i++) {
1028 		if (test_bit(i, st->is_disabled))
1029 			continue;
1030 		if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name,
1031 						 st->value[i]))
1032 			goto err;
1033 	}
1034 	mutex_unlock(&st->lock);
1035 
1036 	nla_nest_end(msg, table_attr);
1037 	return 0;
1038 
1039 err:
1040 	mutex_unlock(&st->lock);
1041 	nla_nest_cancel(msg, table_attr);
1042 	return -EMSGSIZE;
1043 }
1044 
1045 static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
1046 				  struct rdma_restrack_entry *res,
1047 				  uint32_t port)
1048 {
1049 	struct rdma_counter *counter =
1050 		container_of(res, struct rdma_counter, res);
1051 
1052 	if (port && port != counter->port)
1053 		return -EAGAIN;
1054 
1055 	/* Dump it even query failed */
1056 	rdma_counter_query_stats(counter);
1057 
1058 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
1059 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
1060 	    fill_stat_counter_mode(msg, counter) ||
1061 	    fill_stat_counter_qps(msg, counter) ||
1062 	    fill_stat_counter_hwcounters(msg, counter))
1063 		return -EMSGSIZE;
1064 
1065 	return 0;
1066 }
1067 
1068 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1069 			  struct netlink_ext_ack *extack)
1070 {
1071 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1072 	struct ib_device *device;
1073 	struct sk_buff *msg;
1074 	u32 index;
1075 	int err;
1076 
1077 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1078 				     nldev_policy, extack);
1079 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1080 		return -EINVAL;
1081 
1082 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1083 
1084 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1085 	if (!device)
1086 		return -EINVAL;
1087 
1088 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1089 	if (!msg) {
1090 		err = -ENOMEM;
1091 		goto err;
1092 	}
1093 
1094 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1095 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1096 			0, 0);
1097 	if (!nlh) {
1098 		err = -EMSGSIZE;
1099 		goto err_free;
1100 	}
1101 
1102 	err = fill_dev_info(msg, device);
1103 	if (err)
1104 		goto err_free;
1105 
1106 	nlmsg_end(msg, nlh);
1107 
1108 	ib_device_put(device);
1109 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1110 
1111 err_free:
1112 	nlmsg_free(msg);
1113 err:
1114 	ib_device_put(device);
1115 	return err;
1116 }
1117 
1118 static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1119 			  struct netlink_ext_ack *extack)
1120 {
1121 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1122 	struct ib_device *device;
1123 	u32 index;
1124 	int err;
1125 
1126 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1127 				     nldev_policy, extack);
1128 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1129 		return -EINVAL;
1130 
1131 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1132 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1133 	if (!device)
1134 		return -EINVAL;
1135 
1136 	if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
1137 		char name[IB_DEVICE_NAME_MAX] = {};
1138 
1139 		nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1140 			    IB_DEVICE_NAME_MAX);
1141 		if (strlen(name) == 0) {
1142 			err = -EINVAL;
1143 			goto done;
1144 		}
1145 		err = ib_device_rename(device, name);
1146 		goto done;
1147 	}
1148 
1149 	if (tb[RDMA_NLDEV_NET_NS_FD]) {
1150 		u32 ns_fd;
1151 
1152 		ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
1153 		err = ib_device_set_netns_put(skb, device, ns_fd);
1154 		goto put_done;
1155 	}
1156 
1157 	if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) {
1158 		u8 use_dim;
1159 
1160 		use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]);
1161 		err = ib_device_set_dim(device,  use_dim);
1162 		goto done;
1163 	}
1164 
1165 done:
1166 	ib_device_put(device);
1167 put_done:
1168 	return err;
1169 }
1170 
1171 static int _nldev_get_dumpit(struct ib_device *device,
1172 			     struct sk_buff *skb,
1173 			     struct netlink_callback *cb,
1174 			     unsigned int idx)
1175 {
1176 	int start = cb->args[0];
1177 	struct nlmsghdr *nlh;
1178 
1179 	if (idx < start)
1180 		return 0;
1181 
1182 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1183 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1184 			0, NLM_F_MULTI);
1185 
1186 	if (!nlh || fill_dev_info(skb, device)) {
1187 		nlmsg_cancel(skb, nlh);
1188 		goto out;
1189 	}
1190 
1191 	nlmsg_end(skb, nlh);
1192 
1193 	idx++;
1194 
1195 out:	cb->args[0] = idx;
1196 	return skb->len;
1197 }
1198 
1199 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
1200 {
1201 	/*
1202 	 * There is no need to take lock, because
1203 	 * we are relying on ib_core's locking.
1204 	 */
1205 	return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
1206 }
1207 
1208 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1209 			       struct netlink_ext_ack *extack)
1210 {
1211 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1212 	struct ib_device *device;
1213 	struct sk_buff *msg;
1214 	u32 index;
1215 	u32 port;
1216 	int err;
1217 
1218 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1219 				     nldev_policy, extack);
1220 	if (err ||
1221 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
1222 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1223 		return -EINVAL;
1224 
1225 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1226 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1227 	if (!device)
1228 		return -EINVAL;
1229 
1230 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1231 	if (!rdma_is_port_valid(device, port)) {
1232 		err = -EINVAL;
1233 		goto err;
1234 	}
1235 
1236 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1237 	if (!msg) {
1238 		err = -ENOMEM;
1239 		goto err;
1240 	}
1241 
1242 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1243 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1244 			0, 0);
1245 	if (!nlh) {
1246 		err = -EMSGSIZE;
1247 		goto err_free;
1248 	}
1249 
1250 	err = fill_port_info(msg, device, port, sock_net(skb->sk));
1251 	if (err)
1252 		goto err_free;
1253 
1254 	nlmsg_end(msg, nlh);
1255 	ib_device_put(device);
1256 
1257 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1258 
1259 err_free:
1260 	nlmsg_free(msg);
1261 err:
1262 	ib_device_put(device);
1263 	return err;
1264 }
1265 
1266 static int nldev_port_get_dumpit(struct sk_buff *skb,
1267 				 struct netlink_callback *cb)
1268 {
1269 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1270 	struct ib_device *device;
1271 	int start = cb->args[0];
1272 	struct nlmsghdr *nlh;
1273 	u32 idx = 0;
1274 	u32 ifindex;
1275 	int err;
1276 	unsigned int p;
1277 
1278 	err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1279 				     nldev_policy, NULL);
1280 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1281 		return -EINVAL;
1282 
1283 	ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1284 	device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
1285 	if (!device)
1286 		return -EINVAL;
1287 
1288 	rdma_for_each_port (device, p) {
1289 		/*
1290 		 * The dumpit function returns all information from specific
1291 		 * index. This specific index is taken from the netlink
1292 		 * messages request sent by user and it is available
1293 		 * in cb->args[0].
1294 		 *
1295 		 * Usually, the user doesn't fill this field and it causes
1296 		 * to return everything.
1297 		 *
1298 		 */
1299 		if (idx < start) {
1300 			idx++;
1301 			continue;
1302 		}
1303 
1304 		nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
1305 				cb->nlh->nlmsg_seq,
1306 				RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1307 						 RDMA_NLDEV_CMD_PORT_GET),
1308 				0, NLM_F_MULTI);
1309 
1310 		if (!nlh || fill_port_info(skb, device, p, sock_net(skb->sk))) {
1311 			nlmsg_cancel(skb, nlh);
1312 			goto out;
1313 		}
1314 		idx++;
1315 		nlmsg_end(skb, nlh);
1316 	}
1317 
1318 out:
1319 	ib_device_put(device);
1320 	cb->args[0] = idx;
1321 	return skb->len;
1322 }
1323 
1324 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1325 			      struct netlink_ext_ack *extack)
1326 {
1327 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1328 	bool show_details = false;
1329 	struct ib_device *device;
1330 	struct sk_buff *msg;
1331 	u32 index;
1332 	int ret;
1333 
1334 	ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1335 				     nldev_policy, extack);
1336 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1337 		return -EINVAL;
1338 
1339 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1340 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1341 	if (!device)
1342 		return -EINVAL;
1343 
1344 	if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
1345 		show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
1346 
1347 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1348 	if (!msg) {
1349 		ret = -ENOMEM;
1350 		goto err;
1351 	}
1352 
1353 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1354 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1355 			0, 0);
1356 	if (!nlh) {
1357 		ret = -EMSGSIZE;
1358 		goto err_free;
1359 	}
1360 
1361 	ret = fill_res_info(msg, device, show_details);
1362 	if (ret)
1363 		goto err_free;
1364 
1365 	nlmsg_end(msg, nlh);
1366 	ib_device_put(device);
1367 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1368 
1369 err_free:
1370 	nlmsg_free(msg);
1371 err:
1372 	ib_device_put(device);
1373 	return ret;
1374 }
1375 
1376 static int _nldev_res_get_dumpit(struct ib_device *device,
1377 				 struct sk_buff *skb,
1378 				 struct netlink_callback *cb,
1379 				 unsigned int idx)
1380 {
1381 	int start = cb->args[0];
1382 	struct nlmsghdr *nlh;
1383 
1384 	if (idx < start)
1385 		return 0;
1386 
1387 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1388 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1389 			0, NLM_F_MULTI);
1390 
1391 	if (!nlh || fill_res_info(skb, device, false)) {
1392 		nlmsg_cancel(skb, nlh);
1393 		goto out;
1394 	}
1395 	nlmsg_end(skb, nlh);
1396 
1397 	idx++;
1398 
1399 out:
1400 	cb->args[0] = idx;
1401 	return skb->len;
1402 }
1403 
1404 static int nldev_res_get_dumpit(struct sk_buff *skb,
1405 				struct netlink_callback *cb)
1406 {
1407 	return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
1408 }
1409 
1410 struct nldev_fill_res_entry {
1411 	enum rdma_nldev_attr nldev_attr;
1412 	u8 flags;
1413 	u32 entry;
1414 	u32 id;
1415 };
1416 
1417 enum nldev_res_flags {
1418 	NLDEV_PER_DEV = 1 << 0,
1419 };
1420 
1421 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
1422 	[RDMA_RESTRACK_QP] = {
1423 		.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
1424 		.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
1425 		.id = RDMA_NLDEV_ATTR_RES_LQPN,
1426 	},
1427 	[RDMA_RESTRACK_CM_ID] = {
1428 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
1429 		.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
1430 		.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
1431 	},
1432 	[RDMA_RESTRACK_CQ] = {
1433 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
1434 		.flags = NLDEV_PER_DEV,
1435 		.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
1436 		.id = RDMA_NLDEV_ATTR_RES_CQN,
1437 	},
1438 	[RDMA_RESTRACK_MR] = {
1439 		.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
1440 		.flags = NLDEV_PER_DEV,
1441 		.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
1442 		.id = RDMA_NLDEV_ATTR_RES_MRN,
1443 	},
1444 	[RDMA_RESTRACK_PD] = {
1445 		.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
1446 		.flags = NLDEV_PER_DEV,
1447 		.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
1448 		.id = RDMA_NLDEV_ATTR_RES_PDN,
1449 	},
1450 	[RDMA_RESTRACK_COUNTER] = {
1451 		.nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
1452 		.entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
1453 		.id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
1454 	},
1455 	[RDMA_RESTRACK_CTX] = {
1456 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CTX,
1457 		.flags = NLDEV_PER_DEV,
1458 		.entry = RDMA_NLDEV_ATTR_RES_CTX_ENTRY,
1459 		.id = RDMA_NLDEV_ATTR_RES_CTXN,
1460 	},
1461 	[RDMA_RESTRACK_SRQ] = {
1462 		.nldev_attr = RDMA_NLDEV_ATTR_RES_SRQ,
1463 		.flags = NLDEV_PER_DEV,
1464 		.entry = RDMA_NLDEV_ATTR_RES_SRQ_ENTRY,
1465 		.id = RDMA_NLDEV_ATTR_RES_SRQN,
1466 	},
1467 
1468 };
1469 
1470 static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1471 			       struct netlink_ext_ack *extack,
1472 			       enum rdma_restrack_type res_type,
1473 			       res_fill_func_t fill_func)
1474 {
1475 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1476 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1477 	struct rdma_restrack_entry *res;
1478 	struct ib_device *device;
1479 	u32 index, id, port = 0;
1480 	bool has_cap_net_admin;
1481 	struct sk_buff *msg;
1482 	int ret;
1483 
1484 	ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1485 				     nldev_policy, extack);
1486 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
1487 		return -EINVAL;
1488 
1489 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1490 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1491 	if (!device)
1492 		return -EINVAL;
1493 
1494 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1495 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1496 		if (!rdma_is_port_valid(device, port)) {
1497 			ret = -EINVAL;
1498 			goto err;
1499 		}
1500 	}
1501 
1502 	if ((port && fe->flags & NLDEV_PER_DEV) ||
1503 	    (!port && ~fe->flags & NLDEV_PER_DEV)) {
1504 		ret = -EINVAL;
1505 		goto err;
1506 	}
1507 
1508 	id = nla_get_u32(tb[fe->id]);
1509 	res = rdma_restrack_get_byid(device, res_type, id);
1510 	if (IS_ERR(res)) {
1511 		ret = PTR_ERR(res);
1512 		goto err;
1513 	}
1514 
1515 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1516 	if (!msg) {
1517 		ret = -ENOMEM;
1518 		goto err_get;
1519 	}
1520 
1521 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1522 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1523 					 RDMA_NL_GET_OP(nlh->nlmsg_type)),
1524 			0, 0);
1525 
1526 	if (!nlh || fill_nldev_handle(msg, device)) {
1527 		ret = -EMSGSIZE;
1528 		goto err_free;
1529 	}
1530 
1531 	has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
1532 
1533 	ret = fill_func(msg, has_cap_net_admin, res, port);
1534 	if (ret)
1535 		goto err_free;
1536 
1537 	rdma_restrack_put(res);
1538 	nlmsg_end(msg, nlh);
1539 	ib_device_put(device);
1540 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1541 
1542 err_free:
1543 	nlmsg_free(msg);
1544 err_get:
1545 	rdma_restrack_put(res);
1546 err:
1547 	ib_device_put(device);
1548 	return ret;
1549 }
1550 
1551 static int res_get_common_dumpit(struct sk_buff *skb,
1552 				 struct netlink_callback *cb,
1553 				 enum rdma_restrack_type res_type,
1554 				 res_fill_func_t fill_func)
1555 {
1556 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1557 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1558 	struct rdma_restrack_entry *res;
1559 	struct rdma_restrack_root *rt;
1560 	int err, ret = 0, idx = 0;
1561 	bool show_details = false;
1562 	struct nlattr *table_attr;
1563 	struct nlattr *entry_attr;
1564 	struct ib_device *device;
1565 	int start = cb->args[0];
1566 	bool has_cap_net_admin;
1567 	struct nlmsghdr *nlh;
1568 	unsigned long id;
1569 	u32 index, port = 0;
1570 	bool filled = false;
1571 
1572 	err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1573 				     nldev_policy, NULL);
1574 	/*
1575 	 * Right now, we are expecting the device index to get res information,
1576 	 * but it is possible to extend this code to return all devices in
1577 	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
1578 	 * if it doesn't exist, we will iterate over all devices.
1579 	 *
1580 	 * But it is not needed for now.
1581 	 */
1582 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1583 		return -EINVAL;
1584 
1585 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1586 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1587 	if (!device)
1588 		return -EINVAL;
1589 
1590 	if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
1591 		show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
1592 
1593 	/*
1594 	 * If no PORT_INDEX is supplied, we will return all QPs from that device
1595 	 */
1596 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1597 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1598 		if (!rdma_is_port_valid(device, port)) {
1599 			ret = -EINVAL;
1600 			goto err_index;
1601 		}
1602 	}
1603 
1604 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1605 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1606 					 RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
1607 			0, NLM_F_MULTI);
1608 
1609 	if (!nlh || fill_nldev_handle(skb, device)) {
1610 		ret = -EMSGSIZE;
1611 		goto err;
1612 	}
1613 
1614 	table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
1615 	if (!table_attr) {
1616 		ret = -EMSGSIZE;
1617 		goto err;
1618 	}
1619 
1620 	has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
1621 
1622 	rt = &device->res[res_type];
1623 	xa_lock(&rt->xa);
1624 	/*
1625 	 * FIXME: if the skip ahead is something common this loop should
1626 	 * use xas_for_each & xas_pause to optimize, we can have a lot of
1627 	 * objects.
1628 	 */
1629 	xa_for_each(&rt->xa, id, res) {
1630 		if (xa_get_mark(&rt->xa, res->id, RESTRACK_DD) && !show_details)
1631 			goto next;
1632 
1633 		if (idx < start || !rdma_restrack_get(res))
1634 			goto next;
1635 
1636 		xa_unlock(&rt->xa);
1637 
1638 		filled = true;
1639 
1640 		entry_attr = nla_nest_start_noflag(skb, fe->entry);
1641 		if (!entry_attr) {
1642 			ret = -EMSGSIZE;
1643 			rdma_restrack_put(res);
1644 			goto msg_full;
1645 		}
1646 
1647 		ret = fill_func(skb, has_cap_net_admin, res, port);
1648 
1649 		rdma_restrack_put(res);
1650 
1651 		if (ret) {
1652 			nla_nest_cancel(skb, entry_attr);
1653 			if (ret == -EMSGSIZE)
1654 				goto msg_full;
1655 			if (ret == -EAGAIN)
1656 				goto again;
1657 			goto res_err;
1658 		}
1659 		nla_nest_end(skb, entry_attr);
1660 again:		xa_lock(&rt->xa);
1661 next:		idx++;
1662 	}
1663 	xa_unlock(&rt->xa);
1664 
1665 msg_full:
1666 	nla_nest_end(skb, table_attr);
1667 	nlmsg_end(skb, nlh);
1668 	cb->args[0] = idx;
1669 
1670 	/*
1671 	 * No more entries to fill, cancel the message and
1672 	 * return 0 to mark end of dumpit.
1673 	 */
1674 	if (!filled)
1675 		goto err;
1676 
1677 	ib_device_put(device);
1678 	return skb->len;
1679 
1680 res_err:
1681 	nla_nest_cancel(skb, table_attr);
1682 
1683 err:
1684 	nlmsg_cancel(skb, nlh);
1685 
1686 err_index:
1687 	ib_device_put(device);
1688 	return ret;
1689 }
1690 
1691 #define RES_GET_FUNCS(name, type)                                              \
1692 	static int nldev_res_get_##name##_dumpit(struct sk_buff *skb,          \
1693 						 struct netlink_callback *cb)  \
1694 	{                                                                      \
1695 		return res_get_common_dumpit(skb, cb, type,                    \
1696 					     fill_res_##name##_entry);         \
1697 	}                                                                      \
1698 	static int nldev_res_get_##name##_doit(struct sk_buff *skb,            \
1699 					       struct nlmsghdr *nlh,           \
1700 					       struct netlink_ext_ack *extack) \
1701 	{                                                                      \
1702 		return res_get_common_doit(skb, nlh, extack, type,             \
1703 					   fill_res_##name##_entry);           \
1704 	}
1705 
1706 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
1707 RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP);
1708 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
1709 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
1710 RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ);
1711 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
1712 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
1713 RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR);
1714 RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
1715 RES_GET_FUNCS(ctx, RDMA_RESTRACK_CTX);
1716 RES_GET_FUNCS(srq, RDMA_RESTRACK_SRQ);
1717 RES_GET_FUNCS(srq_raw, RDMA_RESTRACK_SRQ);
1718 
1719 static LIST_HEAD(link_ops);
1720 static DECLARE_RWSEM(link_ops_rwsem);
1721 
1722 static const struct rdma_link_ops *link_ops_get(const char *type)
1723 {
1724 	const struct rdma_link_ops *ops;
1725 
1726 	list_for_each_entry(ops, &link_ops, list) {
1727 		if (!strcmp(ops->type, type))
1728 			goto out;
1729 	}
1730 	ops = NULL;
1731 out:
1732 	return ops;
1733 }
1734 
1735 void rdma_link_register(struct rdma_link_ops *ops)
1736 {
1737 	down_write(&link_ops_rwsem);
1738 	if (WARN_ON_ONCE(link_ops_get(ops->type)))
1739 		goto out;
1740 	list_add(&ops->list, &link_ops);
1741 out:
1742 	up_write(&link_ops_rwsem);
1743 }
1744 EXPORT_SYMBOL(rdma_link_register);
1745 
1746 void rdma_link_unregister(struct rdma_link_ops *ops)
1747 {
1748 	down_write(&link_ops_rwsem);
1749 	list_del(&ops->list);
1750 	up_write(&link_ops_rwsem);
1751 }
1752 EXPORT_SYMBOL(rdma_link_unregister);
1753 
1754 static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
1755 			  struct netlink_ext_ack *extack)
1756 {
1757 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1758 	char ibdev_name[IB_DEVICE_NAME_MAX];
1759 	const struct rdma_link_ops *ops;
1760 	char ndev_name[IFNAMSIZ];
1761 	struct net_device *ndev;
1762 	char type[IFNAMSIZ];
1763 	int err;
1764 
1765 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1766 				     nldev_policy, extack);
1767 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
1768 	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
1769 		return -EINVAL;
1770 
1771 	nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1772 		    sizeof(ibdev_name));
1773 	if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0)
1774 		return -EINVAL;
1775 
1776 	nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
1777 	nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
1778 		    sizeof(ndev_name));
1779 
1780 	ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
1781 	if (!ndev)
1782 		return -ENODEV;
1783 
1784 	down_read(&link_ops_rwsem);
1785 	ops = link_ops_get(type);
1786 #ifdef CONFIG_MODULES
1787 	if (!ops) {
1788 		up_read(&link_ops_rwsem);
1789 		request_module("rdma-link-%s", type);
1790 		down_read(&link_ops_rwsem);
1791 		ops = link_ops_get(type);
1792 	}
1793 #endif
1794 	err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
1795 	up_read(&link_ops_rwsem);
1796 	dev_put(ndev);
1797 
1798 	return err;
1799 }
1800 
1801 static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
1802 			  struct netlink_ext_ack *extack)
1803 {
1804 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1805 	struct ib_device *device;
1806 	u32 index;
1807 	int err;
1808 
1809 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1810 				     nldev_policy, extack);
1811 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1812 		return -EINVAL;
1813 
1814 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1815 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1816 	if (!device)
1817 		return -EINVAL;
1818 
1819 	if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) {
1820 		ib_device_put(device);
1821 		return -EINVAL;
1822 	}
1823 
1824 	ib_unregister_device_and_put(device);
1825 	return 0;
1826 }
1827 
1828 static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
1829 			     struct netlink_ext_ack *extack)
1830 {
1831 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1832 	char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
1833 	struct ib_client_nl_info data = {};
1834 	struct ib_device *ibdev = NULL;
1835 	struct sk_buff *msg;
1836 	u32 index;
1837 	int err;
1838 
1839 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
1840 			  extack);
1841 	if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
1842 		return -EINVAL;
1843 
1844 	nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
1845 		    sizeof(client_name));
1846 
1847 	if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
1848 		index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1849 		ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
1850 		if (!ibdev)
1851 			return -EINVAL;
1852 
1853 		if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1854 			data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1855 			if (!rdma_is_port_valid(ibdev, data.port)) {
1856 				err = -EINVAL;
1857 				goto out_put;
1858 			}
1859 		} else {
1860 			data.port = -1;
1861 		}
1862 	} else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1863 		return -EINVAL;
1864 	}
1865 
1866 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1867 	if (!msg) {
1868 		err = -ENOMEM;
1869 		goto out_put;
1870 	}
1871 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1872 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1873 					 RDMA_NLDEV_CMD_GET_CHARDEV),
1874 			0, 0);
1875 	if (!nlh) {
1876 		err = -EMSGSIZE;
1877 		goto out_nlmsg;
1878 	}
1879 
1880 	data.nl_msg = msg;
1881 	err = ib_get_client_nl_info(ibdev, client_name, &data);
1882 	if (err)
1883 		goto out_nlmsg;
1884 
1885 	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
1886 				huge_encode_dev(data.cdev->devt),
1887 				RDMA_NLDEV_ATTR_PAD);
1888 	if (err)
1889 		goto out_data;
1890 	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
1891 				RDMA_NLDEV_ATTR_PAD);
1892 	if (err)
1893 		goto out_data;
1894 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
1895 			   dev_name(data.cdev))) {
1896 		err = -EMSGSIZE;
1897 		goto out_data;
1898 	}
1899 
1900 	nlmsg_end(msg, nlh);
1901 	put_device(data.cdev);
1902 	if (ibdev)
1903 		ib_device_put(ibdev);
1904 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1905 
1906 out_data:
1907 	put_device(data.cdev);
1908 out_nlmsg:
1909 	nlmsg_free(msg);
1910 out_put:
1911 	if (ibdev)
1912 		ib_device_put(ibdev);
1913 	return err;
1914 }
1915 
1916 static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1917 			      struct netlink_ext_ack *extack)
1918 {
1919 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1920 	struct sk_buff *msg;
1921 	int err;
1922 
1923 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1924 			  nldev_policy, extack);
1925 	if (err)
1926 		return err;
1927 
1928 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1929 	if (!msg)
1930 		return -ENOMEM;
1931 
1932 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1933 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1934 					 RDMA_NLDEV_CMD_SYS_GET),
1935 			0, 0);
1936 	if (!nlh) {
1937 		nlmsg_free(msg);
1938 		return -EMSGSIZE;
1939 	}
1940 
1941 	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
1942 			 (u8)ib_devices_shared_netns);
1943 	if (err) {
1944 		nlmsg_free(msg);
1945 		return err;
1946 	}
1947 
1948 	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE,
1949 			 (u8)privileged_qkey);
1950 	if (err) {
1951 		nlmsg_free(msg);
1952 		return err;
1953 	}
1954 	/*
1955 	 * Copy-on-fork is supported.
1956 	 * See commits:
1957 	 * 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes")
1958 	 * 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm")
1959 	 * for more details. Don't backport this without them.
1960 	 *
1961 	 * Return value ignored on purpose, assume copy-on-fork is not
1962 	 * supported in case of failure.
1963 	 */
1964 	nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, 1);
1965 
1966 	nlmsg_end(msg, nlh);
1967 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1968 }
1969 
1970 static int nldev_set_sys_set_netns_doit(struct nlattr *tb[])
1971 {
1972 	u8 enable;
1973 	int err;
1974 
1975 	enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
1976 	/* Only 0 and 1 are supported */
1977 	if (enable > 1)
1978 		return -EINVAL;
1979 
1980 	err = rdma_compatdev_set(enable);
1981 	return err;
1982 }
1983 
1984 static int nldev_set_sys_set_pqkey_doit(struct nlattr *tb[])
1985 {
1986 	u8 enable;
1987 
1988 	enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE]);
1989 	/* Only 0 and 1 are supported */
1990 	if (enable > 1)
1991 		return -EINVAL;
1992 
1993 	privileged_qkey = enable;
1994 	return 0;
1995 }
1996 
1997 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1998 				  struct netlink_ext_ack *extack)
1999 {
2000 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2001 	int err;
2002 
2003 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2004 			  nldev_policy, extack);
2005 	if (err)
2006 		return -EINVAL;
2007 
2008 	if (tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
2009 		return nldev_set_sys_set_netns_doit(tb);
2010 
2011 	if (tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE])
2012 		return nldev_set_sys_set_pqkey_doit(tb);
2013 
2014 	return -EINVAL;
2015 }
2016 
2017 
2018 static int nldev_stat_set_mode_doit(struct sk_buff *msg,
2019 				    struct netlink_ext_ack *extack,
2020 				    struct nlattr *tb[],
2021 				    struct ib_device *device, u32 port)
2022 {
2023 	u32 mode, mask = 0, qpn, cntn = 0;
2024 	int ret;
2025 
2026 	/* Currently only counter for QP is supported */
2027 	if (!tb[RDMA_NLDEV_ATTR_STAT_RES] ||
2028 	    nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
2029 		return -EINVAL;
2030 
2031 	mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
2032 	if (mode == RDMA_COUNTER_MODE_AUTO) {
2033 		if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
2034 			mask = nla_get_u32(
2035 				tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
2036 		return rdma_counter_set_auto_mode(device, port, mask, extack);
2037 	}
2038 
2039 	if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
2040 		return -EINVAL;
2041 
2042 	qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
2043 	if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
2044 		cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
2045 		ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
2046 		if (ret)
2047 			return ret;
2048 	} else {
2049 		ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn);
2050 		if (ret)
2051 			return ret;
2052 	}
2053 
2054 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
2055 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
2056 		ret = -EMSGSIZE;
2057 		goto err_fill;
2058 	}
2059 
2060 	return 0;
2061 
2062 err_fill:
2063 	rdma_counter_unbind_qpn(device, port, qpn, cntn);
2064 	return ret;
2065 }
2066 
2067 static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[],
2068 					       struct ib_device *device,
2069 					       u32 port)
2070 {
2071 	struct rdma_hw_stats *stats;
2072 	struct nlattr *entry_attr;
2073 	unsigned long *target;
2074 	int rem, i, ret = 0;
2075 	u32 index;
2076 
2077 	stats = ib_get_hw_stats_port(device, port);
2078 	if (!stats)
2079 		return -EINVAL;
2080 
2081 	target = kcalloc(BITS_TO_LONGS(stats->num_counters),
2082 			 sizeof(*stats->is_disabled), GFP_KERNEL);
2083 	if (!target)
2084 		return -ENOMEM;
2085 
2086 	nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS],
2087 			    rem) {
2088 		index = nla_get_u32(entry_attr);
2089 		if ((index >= stats->num_counters) ||
2090 		    !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) {
2091 			ret = -EINVAL;
2092 			goto out;
2093 		}
2094 
2095 		set_bit(index, target);
2096 	}
2097 
2098 	for (i = 0; i < stats->num_counters; i++) {
2099 		if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL))
2100 			continue;
2101 
2102 		ret = rdma_counter_modify(device, port, i, test_bit(i, target));
2103 		if (ret)
2104 			goto out;
2105 	}
2106 
2107 out:
2108 	kfree(target);
2109 	return ret;
2110 }
2111 
2112 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2113 			       struct netlink_ext_ack *extack)
2114 {
2115 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2116 	struct ib_device *device;
2117 	struct sk_buff *msg;
2118 	u32 index, port;
2119 	int ret;
2120 
2121 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
2122 			  extack);
2123 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2124 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2125 		return -EINVAL;
2126 
2127 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2128 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2129 	if (!device)
2130 		return -EINVAL;
2131 
2132 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2133 	if (!rdma_is_port_valid(device, port)) {
2134 		ret = -EINVAL;
2135 		goto err_put_device;
2136 	}
2137 
2138 	if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] &&
2139 	    !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
2140 		ret = -EINVAL;
2141 		goto err_put_device;
2142 	}
2143 
2144 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2145 	if (!msg) {
2146 		ret = -ENOMEM;
2147 		goto err_put_device;
2148 	}
2149 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2150 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2151 					 RDMA_NLDEV_CMD_STAT_SET),
2152 			0, 0);
2153 	if (!nlh || fill_nldev_handle(msg, device) ||
2154 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
2155 		ret = -EMSGSIZE;
2156 		goto err_free_msg;
2157 	}
2158 
2159 	if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) {
2160 		ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port);
2161 		if (ret)
2162 			goto err_free_msg;
2163 	}
2164 
2165 	if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
2166 		ret = nldev_stat_set_counter_dynamic_doit(tb, device, port);
2167 		if (ret)
2168 			goto err_free_msg;
2169 	}
2170 
2171 	nlmsg_end(msg, nlh);
2172 	ib_device_put(device);
2173 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2174 
2175 err_free_msg:
2176 	nlmsg_free(msg);
2177 err_put_device:
2178 	ib_device_put(device);
2179 	return ret;
2180 }
2181 
2182 static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2183 			       struct netlink_ext_ack *extack)
2184 {
2185 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2186 	struct ib_device *device;
2187 	struct sk_buff *msg;
2188 	u32 index, port, qpn, cntn;
2189 	int ret;
2190 
2191 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2192 			  nldev_policy, extack);
2193 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
2194 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
2195 	    !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
2196 	    !tb[RDMA_NLDEV_ATTR_RES_LQPN])
2197 		return -EINVAL;
2198 
2199 	if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
2200 		return -EINVAL;
2201 
2202 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2203 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2204 	if (!device)
2205 		return -EINVAL;
2206 
2207 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2208 	if (!rdma_is_port_valid(device, port)) {
2209 		ret = -EINVAL;
2210 		goto err;
2211 	}
2212 
2213 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2214 	if (!msg) {
2215 		ret = -ENOMEM;
2216 		goto err;
2217 	}
2218 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2219 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2220 					 RDMA_NLDEV_CMD_STAT_SET),
2221 			0, 0);
2222 	if (!nlh) {
2223 		ret = -EMSGSIZE;
2224 		goto err_fill;
2225 	}
2226 
2227 	cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
2228 	qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
2229 	if (fill_nldev_handle(msg, device) ||
2230 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
2231 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
2232 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
2233 		ret = -EMSGSIZE;
2234 		goto err_fill;
2235 	}
2236 
2237 	ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
2238 	if (ret)
2239 		goto err_fill;
2240 
2241 	nlmsg_end(msg, nlh);
2242 	ib_device_put(device);
2243 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2244 
2245 err_fill:
2246 	nlmsg_free(msg);
2247 err:
2248 	ib_device_put(device);
2249 	return ret;
2250 }
2251 
2252 static int stat_get_doit_default_counter(struct sk_buff *skb,
2253 					 struct nlmsghdr *nlh,
2254 					 struct netlink_ext_ack *extack,
2255 					 struct nlattr *tb[])
2256 {
2257 	struct rdma_hw_stats *stats;
2258 	struct nlattr *table_attr;
2259 	struct ib_device *device;
2260 	int ret, num_cnts, i;
2261 	struct sk_buff *msg;
2262 	u32 index, port;
2263 	u64 v;
2264 
2265 	if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2266 		return -EINVAL;
2267 
2268 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2269 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2270 	if (!device)
2271 		return -EINVAL;
2272 
2273 	if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) {
2274 		ret = -EINVAL;
2275 		goto err;
2276 	}
2277 
2278 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2279 	stats = ib_get_hw_stats_port(device, port);
2280 	if (!stats) {
2281 		ret = -EINVAL;
2282 		goto err;
2283 	}
2284 
2285 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2286 	if (!msg) {
2287 		ret = -ENOMEM;
2288 		goto err;
2289 	}
2290 
2291 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2292 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2293 					 RDMA_NLDEV_CMD_STAT_GET),
2294 			0, 0);
2295 
2296 	if (!nlh || fill_nldev_handle(msg, device) ||
2297 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
2298 		ret = -EMSGSIZE;
2299 		goto err_msg;
2300 	}
2301 
2302 	mutex_lock(&stats->lock);
2303 
2304 	num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
2305 	if (num_cnts < 0) {
2306 		ret = -EINVAL;
2307 		goto err_stats;
2308 	}
2309 
2310 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
2311 	if (!table_attr) {
2312 		ret = -EMSGSIZE;
2313 		goto err_stats;
2314 	}
2315 	for (i = 0; i < num_cnts; i++) {
2316 		if (test_bit(i, stats->is_disabled))
2317 			continue;
2318 
2319 		v = stats->value[i] +
2320 			rdma_counter_get_hwstat_value(device, port, i);
2321 		if (rdma_nl_stat_hwcounter_entry(msg,
2322 						 stats->descs[i].name, v)) {
2323 			ret = -EMSGSIZE;
2324 			goto err_table;
2325 		}
2326 	}
2327 	nla_nest_end(msg, table_attr);
2328 
2329 	mutex_unlock(&stats->lock);
2330 	nlmsg_end(msg, nlh);
2331 	ib_device_put(device);
2332 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2333 
2334 err_table:
2335 	nla_nest_cancel(msg, table_attr);
2336 err_stats:
2337 	mutex_unlock(&stats->lock);
2338 err_msg:
2339 	nlmsg_free(msg);
2340 err:
2341 	ib_device_put(device);
2342 	return ret;
2343 }
2344 
2345 static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
2346 			    struct netlink_ext_ack *extack, struct nlattr *tb[])
2347 
2348 {
2349 	static enum rdma_nl_counter_mode mode;
2350 	static enum rdma_nl_counter_mask mask;
2351 	struct ib_device *device;
2352 	struct sk_buff *msg;
2353 	u32 index, port;
2354 	int ret;
2355 
2356 	if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
2357 		return nldev_res_get_counter_doit(skb, nlh, extack);
2358 
2359 	if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
2360 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2361 		return -EINVAL;
2362 
2363 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2364 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2365 	if (!device)
2366 		return -EINVAL;
2367 
2368 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2369 	if (!rdma_is_port_valid(device, port)) {
2370 		ret = -EINVAL;
2371 		goto err;
2372 	}
2373 
2374 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2375 	if (!msg) {
2376 		ret = -ENOMEM;
2377 		goto err;
2378 	}
2379 
2380 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2381 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2382 					 RDMA_NLDEV_CMD_STAT_GET),
2383 			0, 0);
2384 	if (!nlh) {
2385 		ret = -EMSGSIZE;
2386 		goto err_msg;
2387 	}
2388 
2389 	ret = rdma_counter_get_mode(device, port, &mode, &mask);
2390 	if (ret)
2391 		goto err_msg;
2392 
2393 	if (fill_nldev_handle(msg, device) ||
2394 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
2395 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) {
2396 		ret = -EMSGSIZE;
2397 		goto err_msg;
2398 	}
2399 
2400 	if ((mode == RDMA_COUNTER_MODE_AUTO) &&
2401 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) {
2402 		ret = -EMSGSIZE;
2403 		goto err_msg;
2404 	}
2405 
2406 	nlmsg_end(msg, nlh);
2407 	ib_device_put(device);
2408 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2409 
2410 err_msg:
2411 	nlmsg_free(msg);
2412 err:
2413 	ib_device_put(device);
2414 	return ret;
2415 }
2416 
2417 static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2418 			       struct netlink_ext_ack *extack)
2419 {
2420 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2421 	int ret;
2422 
2423 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2424 			  nldev_policy, extack);
2425 	if (ret)
2426 		return -EINVAL;
2427 
2428 	if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
2429 		return stat_get_doit_default_counter(skb, nlh, extack, tb);
2430 
2431 	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2432 	case RDMA_NLDEV_ATTR_RES_QP:
2433 		ret = stat_get_doit_qp(skb, nlh, extack, tb);
2434 		break;
2435 	case RDMA_NLDEV_ATTR_RES_MR:
2436 		ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR,
2437 					  fill_stat_mr_entry);
2438 		break;
2439 	default:
2440 		ret = -EINVAL;
2441 		break;
2442 	}
2443 
2444 	return ret;
2445 }
2446 
2447 static int nldev_stat_get_dumpit(struct sk_buff *skb,
2448 				 struct netlink_callback *cb)
2449 {
2450 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2451 	int ret;
2452 
2453 	ret = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2454 			  nldev_policy, NULL);
2455 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
2456 		return -EINVAL;
2457 
2458 	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2459 	case RDMA_NLDEV_ATTR_RES_QP:
2460 		ret = nldev_res_get_counter_dumpit(skb, cb);
2461 		break;
2462 	case RDMA_NLDEV_ATTR_RES_MR:
2463 		ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR,
2464 					    fill_stat_mr_entry);
2465 		break;
2466 	default:
2467 		ret = -EINVAL;
2468 		break;
2469 	}
2470 
2471 	return ret;
2472 }
2473 
2474 static int nldev_stat_get_counter_status_doit(struct sk_buff *skb,
2475 					      struct nlmsghdr *nlh,
2476 					      struct netlink_ext_ack *extack)
2477 {
2478 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry;
2479 	struct rdma_hw_stats *stats;
2480 	struct ib_device *device;
2481 	struct sk_buff *msg;
2482 	u32 devid, port;
2483 	int ret, i;
2484 
2485 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2486 			  nldev_policy, extack);
2487 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2488 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2489 		return -EINVAL;
2490 
2491 	devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2492 	device = ib_device_get_by_index(sock_net(skb->sk), devid);
2493 	if (!device)
2494 		return -EINVAL;
2495 
2496 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2497 	if (!rdma_is_port_valid(device, port)) {
2498 		ret = -EINVAL;
2499 		goto err;
2500 	}
2501 
2502 	stats = ib_get_hw_stats_port(device, port);
2503 	if (!stats) {
2504 		ret = -EINVAL;
2505 		goto err;
2506 	}
2507 
2508 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2509 	if (!msg) {
2510 		ret = -ENOMEM;
2511 		goto err;
2512 	}
2513 
2514 	nlh = nlmsg_put(
2515 		msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2516 		RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS),
2517 		0, 0);
2518 
2519 	ret = -EMSGSIZE;
2520 	if (!nlh || fill_nldev_handle(msg, device) ||
2521 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
2522 		goto err_msg;
2523 
2524 	table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
2525 	if (!table)
2526 		goto err_msg;
2527 
2528 	mutex_lock(&stats->lock);
2529 	for (i = 0; i < stats->num_counters; i++) {
2530 		entry = nla_nest_start(msg,
2531 				       RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
2532 		if (!entry)
2533 			goto err_msg_table;
2534 
2535 		if (nla_put_string(msg,
2536 				   RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
2537 				   stats->descs[i].name) ||
2538 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i))
2539 			goto err_msg_entry;
2540 
2541 		if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) &&
2542 		    (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC,
2543 				!test_bit(i, stats->is_disabled))))
2544 			goto err_msg_entry;
2545 
2546 		nla_nest_end(msg, entry);
2547 	}
2548 	mutex_unlock(&stats->lock);
2549 
2550 	nla_nest_end(msg, table);
2551 	nlmsg_end(msg, nlh);
2552 	ib_device_put(device);
2553 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2554 
2555 err_msg_entry:
2556 	nla_nest_cancel(msg, entry);
2557 err_msg_table:
2558 	mutex_unlock(&stats->lock);
2559 	nla_nest_cancel(msg, table);
2560 err_msg:
2561 	nlmsg_free(msg);
2562 err:
2563 	ib_device_put(device);
2564 	return ret;
2565 }
2566 
2567 static int nldev_newdev(struct sk_buff *skb, struct nlmsghdr *nlh,
2568 			struct netlink_ext_ack *extack)
2569 {
2570 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2571 	enum rdma_nl_dev_type type;
2572 	struct ib_device *parent;
2573 	char name[IFNAMSIZ] = {};
2574 	u32 parentid;
2575 	int ret;
2576 
2577 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2578 			  nldev_policy, extack);
2579 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2580 		!tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_DEV_TYPE])
2581 		return -EINVAL;
2582 
2583 	nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(name));
2584 	type = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_TYPE]);
2585 	parentid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2586 	parent = ib_device_get_by_index(sock_net(skb->sk), parentid);
2587 	if (!parent)
2588 		return -EINVAL;
2589 
2590 	ret = ib_add_sub_device(parent, type, name);
2591 	ib_device_put(parent);
2592 
2593 	return ret;
2594 }
2595 
2596 static int nldev_deldev(struct sk_buff *skb, struct nlmsghdr *nlh,
2597 			struct netlink_ext_ack *extack)
2598 {
2599 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2600 	struct ib_device *device;
2601 	u32 devid;
2602 	int ret;
2603 
2604 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2605 			  nldev_policy, extack);
2606 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
2607 		return -EINVAL;
2608 
2609 	devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2610 	device = ib_device_get_by_index(sock_net(skb->sk), devid);
2611 	if (!device)
2612 		return -EINVAL;
2613 
2614 	return ib_del_sub_device_and_put(device);
2615 }
2616 
2617 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
2618 	[RDMA_NLDEV_CMD_GET] = {
2619 		.doit = nldev_get_doit,
2620 		.dump = nldev_get_dumpit,
2621 	},
2622 	[RDMA_NLDEV_CMD_GET_CHARDEV] = {
2623 		.doit = nldev_get_chardev,
2624 	},
2625 	[RDMA_NLDEV_CMD_SET] = {
2626 		.doit = nldev_set_doit,
2627 		.flags = RDMA_NL_ADMIN_PERM,
2628 	},
2629 	[RDMA_NLDEV_CMD_NEWLINK] = {
2630 		.doit = nldev_newlink,
2631 		.flags = RDMA_NL_ADMIN_PERM,
2632 	},
2633 	[RDMA_NLDEV_CMD_DELLINK] = {
2634 		.doit = nldev_dellink,
2635 		.flags = RDMA_NL_ADMIN_PERM,
2636 	},
2637 	[RDMA_NLDEV_CMD_PORT_GET] = {
2638 		.doit = nldev_port_get_doit,
2639 		.dump = nldev_port_get_dumpit,
2640 	},
2641 	[RDMA_NLDEV_CMD_RES_GET] = {
2642 		.doit = nldev_res_get_doit,
2643 		.dump = nldev_res_get_dumpit,
2644 	},
2645 	[RDMA_NLDEV_CMD_RES_QP_GET] = {
2646 		.doit = nldev_res_get_qp_doit,
2647 		.dump = nldev_res_get_qp_dumpit,
2648 	},
2649 	[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
2650 		.doit = nldev_res_get_cm_id_doit,
2651 		.dump = nldev_res_get_cm_id_dumpit,
2652 	},
2653 	[RDMA_NLDEV_CMD_RES_CQ_GET] = {
2654 		.doit = nldev_res_get_cq_doit,
2655 		.dump = nldev_res_get_cq_dumpit,
2656 	},
2657 	[RDMA_NLDEV_CMD_RES_MR_GET] = {
2658 		.doit = nldev_res_get_mr_doit,
2659 		.dump = nldev_res_get_mr_dumpit,
2660 	},
2661 	[RDMA_NLDEV_CMD_RES_PD_GET] = {
2662 		.doit = nldev_res_get_pd_doit,
2663 		.dump = nldev_res_get_pd_dumpit,
2664 	},
2665 	[RDMA_NLDEV_CMD_RES_CTX_GET] = {
2666 		.doit = nldev_res_get_ctx_doit,
2667 		.dump = nldev_res_get_ctx_dumpit,
2668 	},
2669 	[RDMA_NLDEV_CMD_RES_SRQ_GET] = {
2670 		.doit = nldev_res_get_srq_doit,
2671 		.dump = nldev_res_get_srq_dumpit,
2672 	},
2673 	[RDMA_NLDEV_CMD_SYS_GET] = {
2674 		.doit = nldev_sys_get_doit,
2675 	},
2676 	[RDMA_NLDEV_CMD_SYS_SET] = {
2677 		.doit = nldev_set_sys_set_doit,
2678 		.flags = RDMA_NL_ADMIN_PERM,
2679 	},
2680 	[RDMA_NLDEV_CMD_STAT_SET] = {
2681 		.doit = nldev_stat_set_doit,
2682 		.flags = RDMA_NL_ADMIN_PERM,
2683 	},
2684 	[RDMA_NLDEV_CMD_STAT_GET] = {
2685 		.doit = nldev_stat_get_doit,
2686 		.dump = nldev_stat_get_dumpit,
2687 	},
2688 	[RDMA_NLDEV_CMD_STAT_DEL] = {
2689 		.doit = nldev_stat_del_doit,
2690 		.flags = RDMA_NL_ADMIN_PERM,
2691 	},
2692 	[RDMA_NLDEV_CMD_RES_QP_GET_RAW] = {
2693 		.doit = nldev_res_get_qp_raw_doit,
2694 		.dump = nldev_res_get_qp_raw_dumpit,
2695 		.flags = RDMA_NL_ADMIN_PERM,
2696 	},
2697 	[RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = {
2698 		.doit = nldev_res_get_cq_raw_doit,
2699 		.dump = nldev_res_get_cq_raw_dumpit,
2700 		.flags = RDMA_NL_ADMIN_PERM,
2701 	},
2702 	[RDMA_NLDEV_CMD_RES_MR_GET_RAW] = {
2703 		.doit = nldev_res_get_mr_raw_doit,
2704 		.dump = nldev_res_get_mr_raw_dumpit,
2705 		.flags = RDMA_NL_ADMIN_PERM,
2706 	},
2707 	[RDMA_NLDEV_CMD_RES_SRQ_GET_RAW] = {
2708 		.doit = nldev_res_get_srq_raw_doit,
2709 		.dump = nldev_res_get_srq_raw_dumpit,
2710 		.flags = RDMA_NL_ADMIN_PERM,
2711 	},
2712 	[RDMA_NLDEV_CMD_STAT_GET_STATUS] = {
2713 		.doit = nldev_stat_get_counter_status_doit,
2714 	},
2715 	[RDMA_NLDEV_CMD_NEWDEV] = {
2716 		.doit = nldev_newdev,
2717 		.flags = RDMA_NL_ADMIN_PERM,
2718 	},
2719 	[RDMA_NLDEV_CMD_DELDEV] = {
2720 		.doit = nldev_deldev,
2721 		.flags = RDMA_NL_ADMIN_PERM,
2722 	},
2723 };
2724 
2725 void __init nldev_init(void)
2726 {
2727 	rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
2728 }
2729 
2730 void nldev_exit(void)
2731 {
2732 	rdma_nl_unregister(RDMA_NL_NLDEV);
2733 }
2734 
2735 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
2736