xref: /linux/drivers/infiniband/core/nldev.c (revision 22c55fb9eb92395d999b8404d73e58540d11bdd8)
1 /*
2  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. Neither the names of the copyright holders nor the names of its
13  *    contributors may be used to endorse or promote products derived from
14  *    this software without specific prior written permission.
15  *
16  * Alternatively, this software may be distributed under the terms of the
17  * GNU General Public License ("GPL") version 2 as published by the Free
18  * Software Foundation.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <linux/module.h>
34 #include <linux/pid.h>
35 #include <linux/pid_namespace.h>
36 #include <linux/mutex.h>
37 #include <net/netlink.h>
38 #include <rdma/rdma_cm.h>
39 #include <rdma/rdma_netlink.h>
40 
41 #include "core_priv.h"
42 #include "cma_priv.h"
43 #include "restrack.h"
44 #include "uverbs.h"
45 
46 /*
47  * This determines whether a non-privileged user is allowed to specify a
48  * controlled QKEY or not, when true non-privileged user is allowed to specify
49  * a controlled QKEY.
50  */
51 static bool privileged_qkey;
52 
53 typedef int (*res_fill_func_t)(struct sk_buff*, bool,
54 			       struct rdma_restrack_entry*, uint32_t);
55 
56 /*
57  * Sort array elements by the netlink attribute name
58  */
59 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
60 	[RDMA_NLDEV_ATTR_CHARDEV]		= { .type = NLA_U64 },
61 	[RDMA_NLDEV_ATTR_CHARDEV_ABI]		= { .type = NLA_U64 },
62 	[RDMA_NLDEV_ATTR_CHARDEV_NAME]		= { .type = NLA_NUL_STRING,
63 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
64 	[RDMA_NLDEV_ATTR_CHARDEV_TYPE]		= { .type = NLA_NUL_STRING,
65 					.len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
66 	[RDMA_NLDEV_ATTR_DEV_DIM]               = { .type = NLA_U8 },
67 	[RDMA_NLDEV_ATTR_DEV_INDEX]		= { .type = NLA_U32 },
68 	[RDMA_NLDEV_ATTR_DEV_NAME]		= { .type = NLA_NUL_STRING,
69 					.len = IB_DEVICE_NAME_MAX },
70 	[RDMA_NLDEV_ATTR_DEV_NODE_TYPE]		= { .type = NLA_U8 },
71 	[RDMA_NLDEV_ATTR_DEV_PROTOCOL]		= { .type = NLA_NUL_STRING,
72 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
73 	[RDMA_NLDEV_ATTR_DRIVER]		= { .type = NLA_NESTED },
74 	[RDMA_NLDEV_ATTR_DRIVER_ENTRY]		= { .type = NLA_NESTED },
75 	[RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE]	= { .type = NLA_U8 },
76 	[RDMA_NLDEV_ATTR_DRIVER_STRING]		= { .type = NLA_NUL_STRING,
77 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
78 	[RDMA_NLDEV_ATTR_DRIVER_S32]		= { .type = NLA_S32 },
79 	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
80 	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
81 	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
82 	[RDMA_NLDEV_ATTR_FW_VERSION]		= { .type = NLA_NUL_STRING,
83 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
84 	[RDMA_NLDEV_ATTR_LID]			= { .type = NLA_U32 },
85 	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type = NLA_NUL_STRING,
86 					.len = IFNAMSIZ },
87 	[RDMA_NLDEV_ATTR_LMC]			= { .type = NLA_U8 },
88 	[RDMA_NLDEV_ATTR_NDEV_INDEX]		= { .type = NLA_U32 },
89 	[RDMA_NLDEV_ATTR_NDEV_NAME]		= { .type = NLA_NUL_STRING,
90 					.len = IFNAMSIZ },
91 	[RDMA_NLDEV_ATTR_NODE_GUID]		= { .type = NLA_U64 },
92 	[RDMA_NLDEV_ATTR_PORT_INDEX]		= { .type = NLA_U32 },
93 	[RDMA_NLDEV_ATTR_PORT_PHYS_STATE]	= { .type = NLA_U8 },
94 	[RDMA_NLDEV_ATTR_PORT_STATE]		= { .type = NLA_U8 },
95 	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type = NLA_NESTED },
96 	[RDMA_NLDEV_ATTR_RES_CM_IDN]		= { .type = NLA_U32 },
97 	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type = NLA_NESTED },
98 	[RDMA_NLDEV_ATTR_RES_CQ]		= { .type = NLA_NESTED },
99 	[RDMA_NLDEV_ATTR_RES_CQE]		= { .type = NLA_U32 },
100 	[RDMA_NLDEV_ATTR_RES_CQN]		= { .type = NLA_U32 },
101 	[RDMA_NLDEV_ATTR_RES_CQ_ENTRY]		= { .type = NLA_NESTED },
102 	[RDMA_NLDEV_ATTR_RES_CTX]		= { .type = NLA_NESTED },
103 	[RDMA_NLDEV_ATTR_RES_CTXN]		= { .type = NLA_U32 },
104 	[RDMA_NLDEV_ATTR_RES_CTX_ENTRY]		= { .type = NLA_NESTED },
105 	[RDMA_NLDEV_ATTR_RES_DST_ADDR]		= {
106 			.len = sizeof(struct __kernel_sockaddr_storage) },
107 	[RDMA_NLDEV_ATTR_RES_IOVA]		= { .type = NLA_U64 },
108 	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING,
109 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
110 	[RDMA_NLDEV_ATTR_RES_LKEY]		= { .type = NLA_U32 },
111 	[RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]	= { .type = NLA_U32 },
112 	[RDMA_NLDEV_ATTR_RES_LQPN]		= { .type = NLA_U32 },
113 	[RDMA_NLDEV_ATTR_RES_MR]		= { .type = NLA_NESTED },
114 	[RDMA_NLDEV_ATTR_RES_MRLEN]		= { .type = NLA_U64 },
115 	[RDMA_NLDEV_ATTR_RES_MRN]		= { .type = NLA_U32 },
116 	[RDMA_NLDEV_ATTR_RES_MR_ENTRY]		= { .type = NLA_NESTED },
117 	[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE]	= { .type = NLA_U8 },
118 	[RDMA_NLDEV_ATTR_RES_PD]		= { .type = NLA_NESTED },
119 	[RDMA_NLDEV_ATTR_RES_PDN]		= { .type = NLA_U32 },
120 	[RDMA_NLDEV_ATTR_RES_PD_ENTRY]		= { .type = NLA_NESTED },
121 	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
122 	[RDMA_NLDEV_ATTR_RES_POLL_CTX]		= { .type = NLA_U8 },
123 	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
124 	[RDMA_NLDEV_ATTR_RES_QP]		= { .type = NLA_NESTED },
125 	[RDMA_NLDEV_ATTR_RES_QP_ENTRY]		= { .type = NLA_NESTED },
126 	[RDMA_NLDEV_ATTR_RES_RAW]		= { .type = NLA_BINARY },
127 	[RDMA_NLDEV_ATTR_RES_RKEY]		= { .type = NLA_U32 },
128 	[RDMA_NLDEV_ATTR_RES_RQPN]		= { .type = NLA_U32 },
129 	[RDMA_NLDEV_ATTR_RES_RQ_PSN]		= { .type = NLA_U32 },
130 	[RDMA_NLDEV_ATTR_RES_SQ_PSN]		= { .type = NLA_U32 },
131 	[RDMA_NLDEV_ATTR_RES_SRC_ADDR]		= {
132 			.len = sizeof(struct __kernel_sockaddr_storage) },
133 	[RDMA_NLDEV_ATTR_RES_STATE]		= { .type = NLA_U8 },
134 	[RDMA_NLDEV_ATTR_RES_SUMMARY]		= { .type = NLA_NESTED },
135 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]	= { .type = NLA_NESTED },
136 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
137 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
138 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
139 	[RDMA_NLDEV_ATTR_RES_TYPE]		= { .type = NLA_U8 },
140 	[RDMA_NLDEV_ATTR_RES_SUBTYPE]		= { .type = NLA_NUL_STRING,
141 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
142 	[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
143 	[RDMA_NLDEV_ATTR_RES_USECNT]		= { .type = NLA_U64 },
144 	[RDMA_NLDEV_ATTR_RES_SRQ]		= { .type = NLA_NESTED },
145 	[RDMA_NLDEV_ATTR_RES_SRQN]		= { .type = NLA_U32 },
146 	[RDMA_NLDEV_ATTR_RES_SRQ_ENTRY]		= { .type = NLA_NESTED },
147 	[RDMA_NLDEV_ATTR_MIN_RANGE]		= { .type = NLA_U32 },
148 	[RDMA_NLDEV_ATTR_MAX_RANGE]		= { .type = NLA_U32 },
149 	[RDMA_NLDEV_ATTR_SM_LID]		= { .type = NLA_U32 },
150 	[RDMA_NLDEV_ATTR_SUBNET_PREFIX]		= { .type = NLA_U64 },
151 	[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]	= { .type = NLA_U32 },
152 	[RDMA_NLDEV_ATTR_STAT_MODE]		= { .type = NLA_U32 },
153 	[RDMA_NLDEV_ATTR_STAT_RES]		= { .type = NLA_U32 },
154 	[RDMA_NLDEV_ATTR_STAT_COUNTER]		= { .type = NLA_NESTED },
155 	[RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY]	= { .type = NLA_NESTED },
156 	[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]       = { .type = NLA_U32 },
157 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]       = { .type = NLA_NESTED },
158 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY]  = { .type = NLA_NESTED },
159 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
160 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
161 	[RDMA_NLDEV_ATTR_SYS_IMAGE_GUID]	= { .type = NLA_U64 },
162 	[RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID]	= { .type = NLA_U32 },
163 	[RDMA_NLDEV_NET_NS_FD]			= { .type = NLA_U32 },
164 	[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]	= { .type = NLA_U8 },
165 	[RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK]	= { .type = NLA_U8 },
166 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX]	= { .type = NLA_U32 },
167 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 },
168 	[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE] = { .type = NLA_U8 },
169 	[RDMA_NLDEV_ATTR_DRIVER_DETAILS]	= { .type = NLA_U8 },
170 	[RDMA_NLDEV_ATTR_DEV_TYPE]		= { .type = NLA_U8 },
171 	[RDMA_NLDEV_ATTR_PARENT_NAME]		= { .type = NLA_NUL_STRING },
172 	[RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE]	= { .type = NLA_U8 },
173 	[RDMA_NLDEV_ATTR_EVENT_TYPE]		= { .type = NLA_U8 },
174 	[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED] = { .type = NLA_U8 },
175 };
176 
177 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
178 				      enum rdma_nldev_print_type print_type)
179 {
180 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
181 		return -EMSGSIZE;
182 	if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
183 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
184 		return -EMSGSIZE;
185 
186 	return 0;
187 }
188 
189 static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
190 				   enum rdma_nldev_print_type print_type,
191 				   u32 value)
192 {
193 	if (put_driver_name_print_type(msg, name, print_type))
194 		return -EMSGSIZE;
195 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
196 		return -EMSGSIZE;
197 
198 	return 0;
199 }
200 
201 static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
202 				   enum rdma_nldev_print_type print_type,
203 				   u64 value)
204 {
205 	if (put_driver_name_print_type(msg, name, print_type))
206 		return -EMSGSIZE;
207 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
208 			      RDMA_NLDEV_ATTR_PAD))
209 		return -EMSGSIZE;
210 
211 	return 0;
212 }
213 
214 int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name,
215 			      const char *str)
216 {
217 	if (put_driver_name_print_type(msg, name,
218 				       RDMA_NLDEV_PRINT_TYPE_UNSPEC))
219 		return -EMSGSIZE;
220 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str))
221 		return -EMSGSIZE;
222 
223 	return 0;
224 }
225 EXPORT_SYMBOL(rdma_nl_put_driver_string);
226 
227 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
228 {
229 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
230 				       value);
231 }
232 EXPORT_SYMBOL(rdma_nl_put_driver_u32);
233 
234 int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
235 			       u32 value)
236 {
237 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
238 				       value);
239 }
240 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
241 
242 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
243 {
244 	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
245 				       value);
246 }
247 EXPORT_SYMBOL(rdma_nl_put_driver_u64);
248 
249 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
250 {
251 	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
252 				       value);
253 }
254 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
255 
256 bool rdma_nl_get_privileged_qkey(void)
257 {
258 	return privileged_qkey;
259 }
260 EXPORT_SYMBOL(rdma_nl_get_privileged_qkey);
261 
262 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
263 {
264 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
265 		return -EMSGSIZE;
266 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
267 			   dev_name(&device->dev)))
268 		return -EMSGSIZE;
269 
270 	return 0;
271 }
272 
273 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
274 {
275 	char fw[IB_FW_VERSION_NAME_MAX];
276 	int ret = 0;
277 	u32 port;
278 
279 	if (fill_nldev_handle(msg, device))
280 		return -EMSGSIZE;
281 
282 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
283 		return -EMSGSIZE;
284 
285 	BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
286 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
287 			      device->attrs.device_cap_flags,
288 			      RDMA_NLDEV_ATTR_PAD))
289 		return -EMSGSIZE;
290 
291 	ib_get_device_fw_str(device, fw);
292 	/* Device without FW has strlen(fw) = 0 */
293 	if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
294 		return -EMSGSIZE;
295 
296 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
297 			      be64_to_cpu(device->node_guid),
298 			      RDMA_NLDEV_ATTR_PAD))
299 		return -EMSGSIZE;
300 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
301 			      be64_to_cpu(device->attrs.sys_image_guid),
302 			      RDMA_NLDEV_ATTR_PAD))
303 		return -EMSGSIZE;
304 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
305 		return -EMSGSIZE;
306 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim))
307 		return -EMSGSIZE;
308 
309 	if (device->type &&
310 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_TYPE, device->type))
311 		return -EMSGSIZE;
312 
313 	if (device->parent &&
314 	    nla_put_string(msg, RDMA_NLDEV_ATTR_PARENT_NAME,
315 			   dev_name(&device->parent->dev)))
316 		return -EMSGSIZE;
317 
318 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE,
319 		       device->name_assign_type))
320 		return -EMSGSIZE;
321 
322 	/*
323 	 * Link type is determined on first port and mlx4 device
324 	 * which can potentially have two different link type for the same
325 	 * IB device is considered as better to be avoided in the future,
326 	 */
327 	port = rdma_start_port(device);
328 	if (rdma_cap_opa_mad(device, port))
329 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
330 	else if (rdma_protocol_ib(device, port))
331 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
332 	else if (rdma_protocol_iwarp(device, port))
333 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
334 	else if (rdma_protocol_roce(device, port))
335 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
336 	else if (rdma_protocol_usnic(device, port))
337 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
338 				     "usnic");
339 	return ret;
340 }
341 
342 static int fill_port_info(struct sk_buff *msg,
343 			  struct ib_device *device, u32 port,
344 			  const struct net *net)
345 {
346 	struct net_device *netdev = NULL;
347 	struct ib_port_attr attr;
348 	int ret;
349 	u64 cap_flags = 0;
350 
351 	if (fill_nldev_handle(msg, device))
352 		return -EMSGSIZE;
353 
354 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
355 		return -EMSGSIZE;
356 
357 	ret = ib_query_port(device, port, &attr);
358 	if (ret)
359 		return ret;
360 
361 	if (rdma_protocol_ib(device, port)) {
362 		BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
363 				sizeof(attr.port_cap_flags2)) > sizeof(u64));
364 		cap_flags = attr.port_cap_flags |
365 			((u64)attr.port_cap_flags2 << 32);
366 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
367 				      cap_flags, RDMA_NLDEV_ATTR_PAD))
368 			return -EMSGSIZE;
369 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
370 				      attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
371 			return -EMSGSIZE;
372 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
373 			return -EMSGSIZE;
374 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
375 			return -EMSGSIZE;
376 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
377 			return -EMSGSIZE;
378 	}
379 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
380 		return -EMSGSIZE;
381 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
382 		return -EMSGSIZE;
383 
384 	netdev = ib_device_get_netdev(device, port);
385 	if (netdev && net_eq(dev_net(netdev), net)) {
386 		ret = nla_put_u32(msg,
387 				  RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
388 		if (ret)
389 			goto out;
390 		ret = nla_put_string(msg,
391 				     RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
392 	}
393 
394 out:
395 	dev_put(netdev);
396 	return ret;
397 }
398 
399 static int fill_res_info_entry(struct sk_buff *msg,
400 			       const char *name, u64 curr)
401 {
402 	struct nlattr *entry_attr;
403 
404 	entry_attr = nla_nest_start_noflag(msg,
405 					   RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
406 	if (!entry_attr)
407 		return -EMSGSIZE;
408 
409 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
410 		goto err;
411 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
412 			      RDMA_NLDEV_ATTR_PAD))
413 		goto err;
414 
415 	nla_nest_end(msg, entry_attr);
416 	return 0;
417 
418 err:
419 	nla_nest_cancel(msg, entry_attr);
420 	return -EMSGSIZE;
421 }
422 
423 static int fill_res_info(struct sk_buff *msg, struct ib_device *device,
424 			 bool show_details)
425 {
426 	static const char * const names[RDMA_RESTRACK_MAX] = {
427 		[RDMA_RESTRACK_PD] = "pd",
428 		[RDMA_RESTRACK_CQ] = "cq",
429 		[RDMA_RESTRACK_QP] = "qp",
430 		[RDMA_RESTRACK_CM_ID] = "cm_id",
431 		[RDMA_RESTRACK_MR] = "mr",
432 		[RDMA_RESTRACK_CTX] = "ctx",
433 		[RDMA_RESTRACK_SRQ] = "srq",
434 	};
435 
436 	struct nlattr *table_attr;
437 	int ret, i, curr;
438 
439 	if (fill_nldev_handle(msg, device))
440 		return -EMSGSIZE;
441 
442 	table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
443 	if (!table_attr)
444 		return -EMSGSIZE;
445 
446 	for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
447 		if (!names[i])
448 			continue;
449 		curr = rdma_restrack_count(device, i, show_details);
450 		ret = fill_res_info_entry(msg, names[i], curr);
451 		if (ret)
452 			goto err;
453 	}
454 
455 	nla_nest_end(msg, table_attr);
456 	return 0;
457 
458 err:
459 	nla_nest_cancel(msg, table_attr);
460 	return ret;
461 }
462 
463 static int fill_res_name_pid(struct sk_buff *msg,
464 			     struct rdma_restrack_entry *res)
465 {
466 	int err = 0;
467 
468 	/*
469 	 * For user resources, user is should read /proc/PID/comm to get the
470 	 * name of the task file.
471 	 */
472 	if (rdma_is_kernel_res(res)) {
473 		err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
474 				     res->kern_name);
475 	} else {
476 		pid_t pid;
477 
478 		pid = task_pid_vnr(res->task);
479 		/*
480 		 * Task is dead and in zombie state.
481 		 * There is no need to print PID anymore.
482 		 */
483 		if (pid)
484 			/*
485 			 * This part is racy, task can be killed and PID will
486 			 * be zero right here but it is ok, next query won't
487 			 * return PID. We don't promise real-time reflection
488 			 * of SW objects.
489 			 */
490 			err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid);
491 	}
492 
493 	return err ? -EMSGSIZE : 0;
494 }
495 
496 static int fill_res_qp_entry_query(struct sk_buff *msg,
497 				   struct rdma_restrack_entry *res,
498 				   struct ib_device *dev,
499 				   struct ib_qp *qp)
500 {
501 	struct ib_qp_init_attr qp_init_attr;
502 	struct ib_qp_attr qp_attr;
503 	int ret;
504 
505 	ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
506 	if (ret)
507 		return ret;
508 
509 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
510 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
511 				qp_attr.dest_qp_num))
512 			goto err;
513 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
514 				qp_attr.rq_psn))
515 			goto err;
516 	}
517 
518 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
519 		goto err;
520 
521 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
522 	    qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
523 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
524 			       qp_attr.path_mig_state))
525 			goto err;
526 	}
527 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
528 		goto err;
529 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
530 		goto err;
531 
532 	if (dev->ops.fill_res_qp_entry)
533 		return dev->ops.fill_res_qp_entry(msg, qp);
534 	return 0;
535 
536 err:	return -EMSGSIZE;
537 }
538 
539 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
540 			     struct rdma_restrack_entry *res, uint32_t port)
541 {
542 	struct ib_qp *qp = container_of(res, struct ib_qp, res);
543 	struct ib_device *dev = qp->device;
544 	int ret;
545 
546 	if (port && port != qp->port)
547 		return -EAGAIN;
548 
549 	/* In create_qp() port is not set yet */
550 	if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
551 		return -EMSGSIZE;
552 
553 	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
554 	if (ret)
555 		return -EMSGSIZE;
556 
557 	if (!rdma_is_kernel_res(res) &&
558 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
559 		return -EMSGSIZE;
560 
561 	ret = fill_res_name_pid(msg, res);
562 	if (ret)
563 		return -EMSGSIZE;
564 
565 	return fill_res_qp_entry_query(msg, res, dev, qp);
566 }
567 
568 static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
569 				 struct rdma_restrack_entry *res, uint32_t port)
570 {
571 	struct ib_qp *qp = container_of(res, struct ib_qp, res);
572 	struct ib_device *dev = qp->device;
573 
574 	if (port && port != qp->port)
575 		return -EAGAIN;
576 	if (!dev->ops.fill_res_qp_entry_raw)
577 		return -EINVAL;
578 	return dev->ops.fill_res_qp_entry_raw(msg, qp);
579 }
580 
581 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
582 				struct rdma_restrack_entry *res, uint32_t port)
583 {
584 	struct rdma_id_private *id_priv =
585 				container_of(res, struct rdma_id_private, res);
586 	struct ib_device *dev = id_priv->id.device;
587 	struct rdma_cm_id *cm_id = &id_priv->id;
588 
589 	if (port && port != cm_id->port_num)
590 		return -EAGAIN;
591 
592 	if (cm_id->port_num &&
593 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
594 		goto err;
595 
596 	if (id_priv->qp_num) {
597 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
598 			goto err;
599 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
600 			goto err;
601 	}
602 
603 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
604 		goto err;
605 
606 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
607 		goto err;
608 
609 	if (cm_id->route.addr.src_addr.ss_family &&
610 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
611 		    sizeof(cm_id->route.addr.src_addr),
612 		    &cm_id->route.addr.src_addr))
613 		goto err;
614 	if (cm_id->route.addr.dst_addr.ss_family &&
615 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
616 		    sizeof(cm_id->route.addr.dst_addr),
617 		    &cm_id->route.addr.dst_addr))
618 		goto err;
619 
620 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
621 		goto err;
622 
623 	if (fill_res_name_pid(msg, res))
624 		goto err;
625 
626 	if (dev->ops.fill_res_cm_id_entry)
627 		return dev->ops.fill_res_cm_id_entry(msg, cm_id);
628 	return 0;
629 
630 err: return -EMSGSIZE;
631 }
632 
633 static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
634 			     struct rdma_restrack_entry *res, uint32_t port)
635 {
636 	struct ib_cq *cq = container_of(res, struct ib_cq, res);
637 	struct ib_device *dev = cq->device;
638 
639 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
640 		return -EMSGSIZE;
641 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
642 			      atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
643 		return -EMSGSIZE;
644 
645 	/* Poll context is only valid for kernel CQs */
646 	if (rdma_is_kernel_res(res) &&
647 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
648 		return -EMSGSIZE;
649 
650 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
651 		return -EMSGSIZE;
652 
653 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
654 		return -EMSGSIZE;
655 	if (!rdma_is_kernel_res(res) &&
656 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
657 			cq->uobject->uevent.uobject.context->res.id))
658 		return -EMSGSIZE;
659 
660 	if (fill_res_name_pid(msg, res))
661 		return -EMSGSIZE;
662 
663 	return (dev->ops.fill_res_cq_entry) ?
664 		dev->ops.fill_res_cq_entry(msg, cq) : 0;
665 }
666 
667 static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
668 				 struct rdma_restrack_entry *res, uint32_t port)
669 {
670 	struct ib_cq *cq = container_of(res, struct ib_cq, res);
671 	struct ib_device *dev = cq->device;
672 
673 	if (!dev->ops.fill_res_cq_entry_raw)
674 		return -EINVAL;
675 	return dev->ops.fill_res_cq_entry_raw(msg, cq);
676 }
677 
678 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
679 			     struct rdma_restrack_entry *res, uint32_t port)
680 {
681 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
682 	struct ib_device *dev = mr->pd->device;
683 
684 	if (has_cap_net_admin) {
685 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
686 			return -EMSGSIZE;
687 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
688 			return -EMSGSIZE;
689 	}
690 
691 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
692 			      RDMA_NLDEV_ATTR_PAD))
693 		return -EMSGSIZE;
694 
695 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
696 		return -EMSGSIZE;
697 
698 	if (!rdma_is_kernel_res(res) &&
699 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
700 		return -EMSGSIZE;
701 
702 	if (fill_res_name_pid(msg, res))
703 		return -EMSGSIZE;
704 
705 	return (dev->ops.fill_res_mr_entry) ?
706 		       dev->ops.fill_res_mr_entry(msg, mr) :
707 		       0;
708 }
709 
710 static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
711 				 struct rdma_restrack_entry *res, uint32_t port)
712 {
713 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
714 	struct ib_device *dev = mr->pd->device;
715 
716 	if (!dev->ops.fill_res_mr_entry_raw)
717 		return -EINVAL;
718 	return dev->ops.fill_res_mr_entry_raw(msg, mr);
719 }
720 
721 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
722 			     struct rdma_restrack_entry *res, uint32_t port)
723 {
724 	struct ib_pd *pd = container_of(res, struct ib_pd, res);
725 
726 	if (has_cap_net_admin) {
727 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
728 				pd->local_dma_lkey))
729 			goto err;
730 		if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
731 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
732 				pd->unsafe_global_rkey))
733 			goto err;
734 	}
735 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
736 			      atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
737 		goto err;
738 
739 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
740 		goto err;
741 
742 	if (!rdma_is_kernel_res(res) &&
743 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
744 			pd->uobject->context->res.id))
745 		goto err;
746 
747 	return fill_res_name_pid(msg, res);
748 
749 err:	return -EMSGSIZE;
750 }
751 
752 static int fill_res_ctx_entry(struct sk_buff *msg, bool has_cap_net_admin,
753 			      struct rdma_restrack_entry *res, uint32_t port)
754 {
755 	struct ib_ucontext *ctx = container_of(res, struct ib_ucontext, res);
756 
757 	if (rdma_is_kernel_res(res))
758 		return 0;
759 
760 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, ctx->res.id))
761 		return -EMSGSIZE;
762 
763 	return fill_res_name_pid(msg, res);
764 }
765 
766 static int fill_res_range_qp_entry(struct sk_buff *msg, uint32_t min_range,
767 				   uint32_t max_range)
768 {
769 	struct nlattr *entry_attr;
770 
771 	if (!min_range)
772 		return 0;
773 
774 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
775 	if (!entry_attr)
776 		return -EMSGSIZE;
777 
778 	if (min_range == max_range) {
779 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, min_range))
780 			goto err;
781 	} else {
782 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MIN_RANGE, min_range))
783 			goto err;
784 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MAX_RANGE, max_range))
785 			goto err;
786 	}
787 	nla_nest_end(msg, entry_attr);
788 	return 0;
789 
790 err:
791 	nla_nest_cancel(msg, entry_attr);
792 	return -EMSGSIZE;
793 }
794 
795 static int fill_res_srq_qps(struct sk_buff *msg, struct ib_srq *srq)
796 {
797 	uint32_t min_range = 0, prev = 0;
798 	struct rdma_restrack_entry *res;
799 	struct rdma_restrack_root *rt;
800 	struct nlattr *table_attr;
801 	struct ib_qp *qp = NULL;
802 	unsigned long id = 0;
803 
804 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
805 	if (!table_attr)
806 		return -EMSGSIZE;
807 
808 	rt = &srq->device->res[RDMA_RESTRACK_QP];
809 	xa_lock(&rt->xa);
810 	xa_for_each(&rt->xa, id, res) {
811 		if (!rdma_restrack_get(res))
812 			continue;
813 
814 		qp = container_of(res, struct ib_qp, res);
815 		if (!qp->srq || (qp->srq->res.id != srq->res.id)) {
816 			rdma_restrack_put(res);
817 			continue;
818 		}
819 
820 		if (qp->qp_num < prev)
821 			/* qp_num should be ascending */
822 			goto err_loop;
823 
824 		if (min_range == 0) {
825 			min_range = qp->qp_num;
826 		} else if (qp->qp_num > (prev + 1)) {
827 			if (fill_res_range_qp_entry(msg, min_range, prev))
828 				goto err_loop;
829 
830 			min_range = qp->qp_num;
831 		}
832 		prev = qp->qp_num;
833 		rdma_restrack_put(res);
834 	}
835 
836 	xa_unlock(&rt->xa);
837 
838 	if (fill_res_range_qp_entry(msg, min_range, prev))
839 		goto err;
840 
841 	nla_nest_end(msg, table_attr);
842 	return 0;
843 
844 err_loop:
845 	rdma_restrack_put(res);
846 	xa_unlock(&rt->xa);
847 err:
848 	nla_nest_cancel(msg, table_attr);
849 	return -EMSGSIZE;
850 }
851 
852 static int fill_res_srq_entry(struct sk_buff *msg, bool has_cap_net_admin,
853 			      struct rdma_restrack_entry *res, uint32_t port)
854 {
855 	struct ib_srq *srq = container_of(res, struct ib_srq, res);
856 	struct ib_device *dev = srq->device;
857 
858 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SRQN, srq->res.id))
859 		goto err;
860 
861 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, srq->srq_type))
862 		goto err;
863 
864 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, srq->pd->res.id))
865 		goto err;
866 
867 	if (ib_srq_has_cq(srq->srq_type)) {
868 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN,
869 				srq->ext.cq->res.id))
870 			goto err;
871 	}
872 
873 	if (fill_res_srq_qps(msg, srq))
874 		goto err;
875 
876 	if (fill_res_name_pid(msg, res))
877 		goto err;
878 
879 	if (dev->ops.fill_res_srq_entry)
880 		return dev->ops.fill_res_srq_entry(msg, srq);
881 
882 	return 0;
883 
884 err:
885 	return -EMSGSIZE;
886 }
887 
888 static int fill_res_srq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
889 				 struct rdma_restrack_entry *res, uint32_t port)
890 {
891 	struct ib_srq *srq = container_of(res, struct ib_srq, res);
892 	struct ib_device *dev = srq->device;
893 
894 	if (!dev->ops.fill_res_srq_entry_raw)
895 		return -EINVAL;
896 	return dev->ops.fill_res_srq_entry_raw(msg, srq);
897 }
898 
899 static int fill_stat_counter_mode(struct sk_buff *msg,
900 				  struct rdma_counter *counter)
901 {
902 	struct rdma_counter_mode *m = &counter->mode;
903 
904 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
905 		return -EMSGSIZE;
906 
907 	if (m->mode == RDMA_COUNTER_MODE_AUTO) {
908 		if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
909 		    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
910 			return -EMSGSIZE;
911 
912 		if ((m->mask & RDMA_COUNTER_MASK_PID) &&
913 		    fill_res_name_pid(msg, &counter->res))
914 			return -EMSGSIZE;
915 	}
916 
917 	return 0;
918 }
919 
920 static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
921 {
922 	struct nlattr *entry_attr;
923 
924 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
925 	if (!entry_attr)
926 		return -EMSGSIZE;
927 
928 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
929 		goto err;
930 
931 	nla_nest_end(msg, entry_attr);
932 	return 0;
933 
934 err:
935 	nla_nest_cancel(msg, entry_attr);
936 	return -EMSGSIZE;
937 }
938 
939 static int fill_stat_counter_qps(struct sk_buff *msg,
940 				 struct rdma_counter *counter)
941 {
942 	struct rdma_restrack_entry *res;
943 	struct rdma_restrack_root *rt;
944 	struct nlattr *table_attr;
945 	struct ib_qp *qp = NULL;
946 	unsigned long id = 0;
947 	int ret = 0;
948 
949 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
950 	if (!table_attr)
951 		return -EMSGSIZE;
952 
953 	rt = &counter->device->res[RDMA_RESTRACK_QP];
954 	xa_lock(&rt->xa);
955 	xa_for_each(&rt->xa, id, res) {
956 		qp = container_of(res, struct ib_qp, res);
957 		if (!qp->counter || (qp->counter->id != counter->id))
958 			continue;
959 
960 		ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
961 		if (ret)
962 			goto err;
963 	}
964 
965 	xa_unlock(&rt->xa);
966 	nla_nest_end(msg, table_attr);
967 	return 0;
968 
969 err:
970 	xa_unlock(&rt->xa);
971 	nla_nest_cancel(msg, table_attr);
972 	return ret;
973 }
974 
975 int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
976 				 u64 value)
977 {
978 	struct nlattr *entry_attr;
979 
980 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
981 	if (!entry_attr)
982 		return -EMSGSIZE;
983 
984 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
985 			   name))
986 		goto err;
987 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
988 			      value, RDMA_NLDEV_ATTR_PAD))
989 		goto err;
990 
991 	nla_nest_end(msg, entry_attr);
992 	return 0;
993 
994 err:
995 	nla_nest_cancel(msg, entry_attr);
996 	return -EMSGSIZE;
997 }
998 EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry);
999 
1000 static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
1001 			      struct rdma_restrack_entry *res, uint32_t port)
1002 {
1003 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
1004 	struct ib_device *dev = mr->pd->device;
1005 
1006 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
1007 		goto err;
1008 
1009 	if (dev->ops.fill_stat_mr_entry)
1010 		return dev->ops.fill_stat_mr_entry(msg, mr);
1011 	return 0;
1012 
1013 err:
1014 	return -EMSGSIZE;
1015 }
1016 
1017 static int fill_stat_counter_hwcounters(struct sk_buff *msg,
1018 					struct rdma_counter *counter)
1019 {
1020 	struct rdma_hw_stats *st = counter->stats;
1021 	struct nlattr *table_attr;
1022 	int i;
1023 
1024 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
1025 	if (!table_attr)
1026 		return -EMSGSIZE;
1027 
1028 	mutex_lock(&st->lock);
1029 	for (i = 0; i < st->num_counters; i++) {
1030 		if (test_bit(i, st->is_disabled))
1031 			continue;
1032 		if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name,
1033 						 st->value[i]))
1034 			goto err;
1035 	}
1036 	mutex_unlock(&st->lock);
1037 
1038 	nla_nest_end(msg, table_attr);
1039 	return 0;
1040 
1041 err:
1042 	mutex_unlock(&st->lock);
1043 	nla_nest_cancel(msg, table_attr);
1044 	return -EMSGSIZE;
1045 }
1046 
1047 static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
1048 				  struct rdma_restrack_entry *res,
1049 				  uint32_t port)
1050 {
1051 	struct rdma_counter *counter =
1052 		container_of(res, struct rdma_counter, res);
1053 
1054 	if (port && port != counter->port)
1055 		return -EAGAIN;
1056 
1057 	/* Dump it even query failed */
1058 	rdma_counter_query_stats(counter);
1059 
1060 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
1061 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
1062 	    fill_stat_counter_mode(msg, counter) ||
1063 	    fill_stat_counter_qps(msg, counter) ||
1064 	    fill_stat_counter_hwcounters(msg, counter))
1065 		return -EMSGSIZE;
1066 
1067 	return 0;
1068 }
1069 
1070 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1071 			  struct netlink_ext_ack *extack)
1072 {
1073 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1074 	struct ib_device *device;
1075 	struct sk_buff *msg;
1076 	u32 index;
1077 	int err;
1078 
1079 	err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1080 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
1081 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1082 		return -EINVAL;
1083 
1084 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1085 
1086 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1087 	if (!device)
1088 		return -EINVAL;
1089 
1090 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1091 	if (!msg) {
1092 		err = -ENOMEM;
1093 		goto err;
1094 	}
1095 
1096 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1097 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1098 			0, 0);
1099 	if (!nlh) {
1100 		err = -EMSGSIZE;
1101 		goto err_free;
1102 	}
1103 
1104 	err = fill_dev_info(msg, device);
1105 	if (err)
1106 		goto err_free;
1107 
1108 	nlmsg_end(msg, nlh);
1109 
1110 	ib_device_put(device);
1111 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1112 
1113 err_free:
1114 	nlmsg_free(msg);
1115 err:
1116 	ib_device_put(device);
1117 	return err;
1118 }
1119 
1120 static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1121 			  struct netlink_ext_ack *extack)
1122 {
1123 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1124 	struct ib_device *device;
1125 	u32 index;
1126 	int err;
1127 
1128 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1129 			    nldev_policy, extack);
1130 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1131 		return -EINVAL;
1132 
1133 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1134 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1135 	if (!device)
1136 		return -EINVAL;
1137 
1138 	if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
1139 		char name[IB_DEVICE_NAME_MAX] = {};
1140 
1141 		nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1142 			    IB_DEVICE_NAME_MAX);
1143 		if (strlen(name) == 0) {
1144 			err = -EINVAL;
1145 			goto done;
1146 		}
1147 		err = ib_device_rename(device, name);
1148 		goto done;
1149 	}
1150 
1151 	if (tb[RDMA_NLDEV_NET_NS_FD]) {
1152 		u32 ns_fd;
1153 
1154 		ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
1155 		err = ib_device_set_netns_put(skb, device, ns_fd);
1156 		goto put_done;
1157 	}
1158 
1159 	if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) {
1160 		u8 use_dim;
1161 
1162 		use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]);
1163 		err = ib_device_set_dim(device,  use_dim);
1164 		goto done;
1165 	}
1166 
1167 done:
1168 	ib_device_put(device);
1169 put_done:
1170 	return err;
1171 }
1172 
1173 static int _nldev_get_dumpit(struct ib_device *device,
1174 			     struct sk_buff *skb,
1175 			     struct netlink_callback *cb,
1176 			     unsigned int idx)
1177 {
1178 	int start = cb->args[0];
1179 	struct nlmsghdr *nlh;
1180 
1181 	if (idx < start)
1182 		return 0;
1183 
1184 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1185 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1186 			0, NLM_F_MULTI);
1187 
1188 	if (!nlh || fill_dev_info(skb, device)) {
1189 		nlmsg_cancel(skb, nlh);
1190 		goto out;
1191 	}
1192 
1193 	nlmsg_end(skb, nlh);
1194 
1195 	idx++;
1196 
1197 out:	cb->args[0] = idx;
1198 	return skb->len;
1199 }
1200 
1201 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
1202 {
1203 	/*
1204 	 * There is no need to take lock, because
1205 	 * we are relying on ib_core's locking.
1206 	 */
1207 	return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
1208 }
1209 
1210 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1211 			       struct netlink_ext_ack *extack)
1212 {
1213 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1214 	struct ib_device *device;
1215 	struct sk_buff *msg;
1216 	u32 index;
1217 	u32 port;
1218 	int err;
1219 
1220 	err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1221 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
1222 	if (err ||
1223 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
1224 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1225 		return -EINVAL;
1226 
1227 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1228 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1229 	if (!device)
1230 		return -EINVAL;
1231 
1232 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1233 	if (!rdma_is_port_valid(device, port)) {
1234 		err = -EINVAL;
1235 		goto err;
1236 	}
1237 
1238 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1239 	if (!msg) {
1240 		err = -ENOMEM;
1241 		goto err;
1242 	}
1243 
1244 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1245 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1246 			0, 0);
1247 	if (!nlh) {
1248 		err = -EMSGSIZE;
1249 		goto err_free;
1250 	}
1251 
1252 	err = fill_port_info(msg, device, port, sock_net(skb->sk));
1253 	if (err)
1254 		goto err_free;
1255 
1256 	nlmsg_end(msg, nlh);
1257 	ib_device_put(device);
1258 
1259 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1260 
1261 err_free:
1262 	nlmsg_free(msg);
1263 err:
1264 	ib_device_put(device);
1265 	return err;
1266 }
1267 
1268 static int nldev_port_get_dumpit(struct sk_buff *skb,
1269 				 struct netlink_callback *cb)
1270 {
1271 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1272 	struct ib_device *device;
1273 	int start = cb->args[0];
1274 	struct nlmsghdr *nlh;
1275 	u32 idx = 0;
1276 	u32 ifindex;
1277 	int err;
1278 	unsigned int p;
1279 
1280 	err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1281 			    nldev_policy, NL_VALIDATE_LIBERAL, NULL);
1282 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1283 		return -EINVAL;
1284 
1285 	ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1286 	device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
1287 	if (!device)
1288 		return -EINVAL;
1289 
1290 	rdma_for_each_port (device, p) {
1291 		/*
1292 		 * The dumpit function returns all information from specific
1293 		 * index. This specific index is taken from the netlink
1294 		 * messages request sent by user and it is available
1295 		 * in cb->args[0].
1296 		 *
1297 		 * Usually, the user doesn't fill this field and it causes
1298 		 * to return everything.
1299 		 *
1300 		 */
1301 		if (idx < start) {
1302 			idx++;
1303 			continue;
1304 		}
1305 
1306 		nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
1307 				cb->nlh->nlmsg_seq,
1308 				RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1309 						 RDMA_NLDEV_CMD_PORT_GET),
1310 				0, NLM_F_MULTI);
1311 
1312 		if (!nlh || fill_port_info(skb, device, p, sock_net(skb->sk))) {
1313 			nlmsg_cancel(skb, nlh);
1314 			goto out;
1315 		}
1316 		idx++;
1317 		nlmsg_end(skb, nlh);
1318 	}
1319 
1320 out:
1321 	ib_device_put(device);
1322 	cb->args[0] = idx;
1323 	return skb->len;
1324 }
1325 
1326 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1327 			      struct netlink_ext_ack *extack)
1328 {
1329 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1330 	bool show_details = false;
1331 	struct ib_device *device;
1332 	struct sk_buff *msg;
1333 	u32 index;
1334 	int ret;
1335 
1336 	ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1337 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
1338 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1339 		return -EINVAL;
1340 
1341 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1342 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1343 	if (!device)
1344 		return -EINVAL;
1345 
1346 	if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
1347 		show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
1348 
1349 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1350 	if (!msg) {
1351 		ret = -ENOMEM;
1352 		goto err;
1353 	}
1354 
1355 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1356 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1357 			0, 0);
1358 	if (!nlh) {
1359 		ret = -EMSGSIZE;
1360 		goto err_free;
1361 	}
1362 
1363 	ret = fill_res_info(msg, device, show_details);
1364 	if (ret)
1365 		goto err_free;
1366 
1367 	nlmsg_end(msg, nlh);
1368 	ib_device_put(device);
1369 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1370 
1371 err_free:
1372 	nlmsg_free(msg);
1373 err:
1374 	ib_device_put(device);
1375 	return ret;
1376 }
1377 
1378 static int _nldev_res_get_dumpit(struct ib_device *device,
1379 				 struct sk_buff *skb,
1380 				 struct netlink_callback *cb,
1381 				 unsigned int idx)
1382 {
1383 	int start = cb->args[0];
1384 	struct nlmsghdr *nlh;
1385 
1386 	if (idx < start)
1387 		return 0;
1388 
1389 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1390 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1391 			0, NLM_F_MULTI);
1392 
1393 	if (!nlh || fill_res_info(skb, device, false)) {
1394 		nlmsg_cancel(skb, nlh);
1395 		goto out;
1396 	}
1397 	nlmsg_end(skb, nlh);
1398 
1399 	idx++;
1400 
1401 out:
1402 	cb->args[0] = idx;
1403 	return skb->len;
1404 }
1405 
1406 static int nldev_res_get_dumpit(struct sk_buff *skb,
1407 				struct netlink_callback *cb)
1408 {
1409 	return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
1410 }
1411 
1412 struct nldev_fill_res_entry {
1413 	enum rdma_nldev_attr nldev_attr;
1414 	u8 flags;
1415 	u32 entry;
1416 	u32 id;
1417 };
1418 
1419 enum nldev_res_flags {
1420 	NLDEV_PER_DEV = 1 << 0,
1421 };
1422 
1423 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
1424 	[RDMA_RESTRACK_QP] = {
1425 		.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
1426 		.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
1427 		.id = RDMA_NLDEV_ATTR_RES_LQPN,
1428 	},
1429 	[RDMA_RESTRACK_CM_ID] = {
1430 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
1431 		.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
1432 		.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
1433 	},
1434 	[RDMA_RESTRACK_CQ] = {
1435 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
1436 		.flags = NLDEV_PER_DEV,
1437 		.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
1438 		.id = RDMA_NLDEV_ATTR_RES_CQN,
1439 	},
1440 	[RDMA_RESTRACK_MR] = {
1441 		.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
1442 		.flags = NLDEV_PER_DEV,
1443 		.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
1444 		.id = RDMA_NLDEV_ATTR_RES_MRN,
1445 	},
1446 	[RDMA_RESTRACK_PD] = {
1447 		.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
1448 		.flags = NLDEV_PER_DEV,
1449 		.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
1450 		.id = RDMA_NLDEV_ATTR_RES_PDN,
1451 	},
1452 	[RDMA_RESTRACK_COUNTER] = {
1453 		.nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
1454 		.entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
1455 		.id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
1456 	},
1457 	[RDMA_RESTRACK_CTX] = {
1458 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CTX,
1459 		.flags = NLDEV_PER_DEV,
1460 		.entry = RDMA_NLDEV_ATTR_RES_CTX_ENTRY,
1461 		.id = RDMA_NLDEV_ATTR_RES_CTXN,
1462 	},
1463 	[RDMA_RESTRACK_SRQ] = {
1464 		.nldev_attr = RDMA_NLDEV_ATTR_RES_SRQ,
1465 		.flags = NLDEV_PER_DEV,
1466 		.entry = RDMA_NLDEV_ATTR_RES_SRQ_ENTRY,
1467 		.id = RDMA_NLDEV_ATTR_RES_SRQN,
1468 	},
1469 
1470 };
1471 
1472 static noinline_for_stack int
1473 res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1474 		    struct netlink_ext_ack *extack,
1475 		    enum rdma_restrack_type res_type,
1476 		    res_fill_func_t fill_func)
1477 {
1478 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1479 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1480 	struct rdma_restrack_entry *res;
1481 	struct ib_device *device;
1482 	u32 index, id, port = 0;
1483 	bool has_cap_net_admin;
1484 	struct sk_buff *msg;
1485 	int ret;
1486 
1487 	ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1488 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
1489 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
1490 		return -EINVAL;
1491 
1492 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1493 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1494 	if (!device)
1495 		return -EINVAL;
1496 
1497 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1498 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1499 		if (!rdma_is_port_valid(device, port)) {
1500 			ret = -EINVAL;
1501 			goto err;
1502 		}
1503 	}
1504 
1505 	if ((port && fe->flags & NLDEV_PER_DEV) ||
1506 	    (!port && ~fe->flags & NLDEV_PER_DEV)) {
1507 		ret = -EINVAL;
1508 		goto err;
1509 	}
1510 
1511 	id = nla_get_u32(tb[fe->id]);
1512 	res = rdma_restrack_get_byid(device, res_type, id);
1513 	if (IS_ERR(res)) {
1514 		ret = PTR_ERR(res);
1515 		goto err;
1516 	}
1517 
1518 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1519 	if (!msg) {
1520 		ret = -ENOMEM;
1521 		goto err_get;
1522 	}
1523 
1524 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1525 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1526 					 RDMA_NL_GET_OP(nlh->nlmsg_type)),
1527 			0, 0);
1528 
1529 	if (!nlh || fill_nldev_handle(msg, device)) {
1530 		ret = -EMSGSIZE;
1531 		goto err_free;
1532 	}
1533 
1534 	has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
1535 
1536 	ret = fill_func(msg, has_cap_net_admin, res, port);
1537 	if (ret)
1538 		goto err_free;
1539 
1540 	rdma_restrack_put(res);
1541 	nlmsg_end(msg, nlh);
1542 	ib_device_put(device);
1543 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1544 
1545 err_free:
1546 	nlmsg_free(msg);
1547 err_get:
1548 	rdma_restrack_put(res);
1549 err:
1550 	ib_device_put(device);
1551 	return ret;
1552 }
1553 
1554 static int res_get_common_dumpit(struct sk_buff *skb,
1555 				 struct netlink_callback *cb,
1556 				 enum rdma_restrack_type res_type,
1557 				 res_fill_func_t fill_func)
1558 {
1559 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1560 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1561 	struct rdma_restrack_entry *res;
1562 	struct rdma_restrack_root *rt;
1563 	int err, ret = 0, idx = 0;
1564 	bool show_details = false;
1565 	struct nlattr *table_attr;
1566 	struct nlattr *entry_attr;
1567 	struct ib_device *device;
1568 	int start = cb->args[0];
1569 	bool has_cap_net_admin;
1570 	struct nlmsghdr *nlh;
1571 	unsigned long id;
1572 	u32 index, port = 0;
1573 	bool filled = false;
1574 
1575 	err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1576 			    nldev_policy, NL_VALIDATE_LIBERAL, NULL);
1577 	/*
1578 	 * Right now, we are expecting the device index to get res information,
1579 	 * but it is possible to extend this code to return all devices in
1580 	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
1581 	 * if it doesn't exist, we will iterate over all devices.
1582 	 *
1583 	 * But it is not needed for now.
1584 	 */
1585 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1586 		return -EINVAL;
1587 
1588 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1589 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1590 	if (!device)
1591 		return -EINVAL;
1592 
1593 	if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
1594 		show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
1595 
1596 	/*
1597 	 * If no PORT_INDEX is supplied, we will return all QPs from that device
1598 	 */
1599 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1600 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1601 		if (!rdma_is_port_valid(device, port)) {
1602 			ret = -EINVAL;
1603 			goto err_index;
1604 		}
1605 	}
1606 
1607 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1608 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1609 					 RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
1610 			0, NLM_F_MULTI);
1611 
1612 	if (!nlh || fill_nldev_handle(skb, device)) {
1613 		ret = -EMSGSIZE;
1614 		goto err;
1615 	}
1616 
1617 	table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
1618 	if (!table_attr) {
1619 		ret = -EMSGSIZE;
1620 		goto err;
1621 	}
1622 
1623 	has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
1624 
1625 	rt = &device->res[res_type];
1626 	xa_lock(&rt->xa);
1627 	/*
1628 	 * FIXME: if the skip ahead is something common this loop should
1629 	 * use xas_for_each & xas_pause to optimize, we can have a lot of
1630 	 * objects.
1631 	 */
1632 	xa_for_each(&rt->xa, id, res) {
1633 		if (xa_get_mark(&rt->xa, res->id, RESTRACK_DD) && !show_details)
1634 			goto next;
1635 
1636 		if (idx < start || !rdma_restrack_get(res))
1637 			goto next;
1638 
1639 		xa_unlock(&rt->xa);
1640 
1641 		filled = true;
1642 
1643 		entry_attr = nla_nest_start_noflag(skb, fe->entry);
1644 		if (!entry_attr) {
1645 			ret = -EMSGSIZE;
1646 			rdma_restrack_put(res);
1647 			goto msg_full;
1648 		}
1649 
1650 		ret = fill_func(skb, has_cap_net_admin, res, port);
1651 
1652 		rdma_restrack_put(res);
1653 
1654 		if (ret) {
1655 			nla_nest_cancel(skb, entry_attr);
1656 			if (ret == -EMSGSIZE)
1657 				goto msg_full;
1658 			if (ret == -EAGAIN)
1659 				goto again;
1660 			goto res_err;
1661 		}
1662 		nla_nest_end(skb, entry_attr);
1663 again:		xa_lock(&rt->xa);
1664 next:		idx++;
1665 	}
1666 	xa_unlock(&rt->xa);
1667 
1668 msg_full:
1669 	nla_nest_end(skb, table_attr);
1670 	nlmsg_end(skb, nlh);
1671 	cb->args[0] = idx;
1672 
1673 	/*
1674 	 * No more entries to fill, cancel the message and
1675 	 * return 0 to mark end of dumpit.
1676 	 */
1677 	if (!filled)
1678 		goto err;
1679 
1680 	ib_device_put(device);
1681 	return skb->len;
1682 
1683 res_err:
1684 	nla_nest_cancel(skb, table_attr);
1685 
1686 err:
1687 	nlmsg_cancel(skb, nlh);
1688 
1689 err_index:
1690 	ib_device_put(device);
1691 	return ret;
1692 }
1693 
1694 #define RES_GET_FUNCS(name, type)                                              \
1695 	static int nldev_res_get_##name##_dumpit(struct sk_buff *skb,          \
1696 						 struct netlink_callback *cb)  \
1697 	{                                                                      \
1698 		return res_get_common_dumpit(skb, cb, type,                    \
1699 					     fill_res_##name##_entry);         \
1700 	}                                                                      \
1701 	static int nldev_res_get_##name##_doit(struct sk_buff *skb,            \
1702 					       struct nlmsghdr *nlh,           \
1703 					       struct netlink_ext_ack *extack) \
1704 	{                                                                      \
1705 		return res_get_common_doit(skb, nlh, extack, type,             \
1706 					   fill_res_##name##_entry);           \
1707 	}
1708 
1709 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
1710 RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP);
1711 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
1712 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
1713 RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ);
1714 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
1715 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
1716 RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR);
1717 RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
1718 RES_GET_FUNCS(ctx, RDMA_RESTRACK_CTX);
1719 RES_GET_FUNCS(srq, RDMA_RESTRACK_SRQ);
1720 RES_GET_FUNCS(srq_raw, RDMA_RESTRACK_SRQ);
1721 
1722 static LIST_HEAD(link_ops);
1723 static DECLARE_RWSEM(link_ops_rwsem);
1724 
1725 static const struct rdma_link_ops *link_ops_get(const char *type)
1726 {
1727 	const struct rdma_link_ops *ops;
1728 
1729 	list_for_each_entry(ops, &link_ops, list) {
1730 		if (!strcmp(ops->type, type))
1731 			goto out;
1732 	}
1733 	ops = NULL;
1734 out:
1735 	return ops;
1736 }
1737 
1738 void rdma_link_register(struct rdma_link_ops *ops)
1739 {
1740 	down_write(&link_ops_rwsem);
1741 	if (WARN_ON_ONCE(link_ops_get(ops->type)))
1742 		goto out;
1743 	list_add(&ops->list, &link_ops);
1744 out:
1745 	up_write(&link_ops_rwsem);
1746 }
1747 EXPORT_SYMBOL(rdma_link_register);
1748 
1749 void rdma_link_unregister(struct rdma_link_ops *ops)
1750 {
1751 	down_write(&link_ops_rwsem);
1752 	list_del(&ops->list);
1753 	up_write(&link_ops_rwsem);
1754 }
1755 EXPORT_SYMBOL(rdma_link_unregister);
1756 
1757 static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
1758 			  struct netlink_ext_ack *extack)
1759 {
1760 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1761 	char ibdev_name[IB_DEVICE_NAME_MAX];
1762 	const struct rdma_link_ops *ops;
1763 	char ndev_name[IFNAMSIZ];
1764 	struct net_device *ndev;
1765 	char type[IFNAMSIZ];
1766 	int err;
1767 
1768 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1769 			    nldev_policy, extack);
1770 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
1771 	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
1772 		return -EINVAL;
1773 
1774 	nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1775 		    sizeof(ibdev_name));
1776 	if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0)
1777 		return -EINVAL;
1778 
1779 	nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
1780 	nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
1781 		    sizeof(ndev_name));
1782 
1783 	ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
1784 	if (!ndev)
1785 		return -ENODEV;
1786 
1787 	down_read(&link_ops_rwsem);
1788 	ops = link_ops_get(type);
1789 #ifdef CONFIG_MODULES
1790 	if (!ops) {
1791 		up_read(&link_ops_rwsem);
1792 		request_module("rdma-link-%s", type);
1793 		down_read(&link_ops_rwsem);
1794 		ops = link_ops_get(type);
1795 	}
1796 #endif
1797 	err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
1798 	up_read(&link_ops_rwsem);
1799 	dev_put(ndev);
1800 
1801 	return err;
1802 }
1803 
1804 static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
1805 			  struct netlink_ext_ack *extack)
1806 {
1807 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1808 	struct ib_device *device;
1809 	u32 index;
1810 	int err;
1811 
1812 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1813 			    nldev_policy, extack);
1814 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1815 		return -EINVAL;
1816 
1817 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1818 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1819 	if (!device)
1820 		return -EINVAL;
1821 
1822 	if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) {
1823 		ib_device_put(device);
1824 		return -EINVAL;
1825 	}
1826 
1827 	ib_unregister_device_and_put(device);
1828 	return 0;
1829 }
1830 
1831 static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
1832 			     struct netlink_ext_ack *extack)
1833 {
1834 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1835 	char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
1836 	struct ib_client_nl_info data = {};
1837 	struct ib_device *ibdev = NULL;
1838 	struct sk_buff *msg;
1839 	u32 index;
1840 	int err;
1841 
1842 	err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
1843 			    NL_VALIDATE_LIBERAL, extack);
1844 	if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
1845 		return -EINVAL;
1846 
1847 	nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
1848 		    sizeof(client_name));
1849 
1850 	if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
1851 		index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1852 		ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
1853 		if (!ibdev)
1854 			return -EINVAL;
1855 
1856 		if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1857 			data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1858 			if (!rdma_is_port_valid(ibdev, data.port)) {
1859 				err = -EINVAL;
1860 				goto out_put;
1861 			}
1862 		} else {
1863 			data.port = -1;
1864 		}
1865 	} else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1866 		return -EINVAL;
1867 	}
1868 
1869 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1870 	if (!msg) {
1871 		err = -ENOMEM;
1872 		goto out_put;
1873 	}
1874 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1875 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1876 					 RDMA_NLDEV_CMD_GET_CHARDEV),
1877 			0, 0);
1878 	if (!nlh) {
1879 		err = -EMSGSIZE;
1880 		goto out_nlmsg;
1881 	}
1882 
1883 	data.nl_msg = msg;
1884 	err = ib_get_client_nl_info(ibdev, client_name, &data);
1885 	if (err)
1886 		goto out_nlmsg;
1887 
1888 	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
1889 				huge_encode_dev(data.cdev->devt),
1890 				RDMA_NLDEV_ATTR_PAD);
1891 	if (err)
1892 		goto out_data;
1893 	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
1894 				RDMA_NLDEV_ATTR_PAD);
1895 	if (err)
1896 		goto out_data;
1897 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
1898 			   dev_name(data.cdev))) {
1899 		err = -EMSGSIZE;
1900 		goto out_data;
1901 	}
1902 
1903 	nlmsg_end(msg, nlh);
1904 	put_device(data.cdev);
1905 	if (ibdev)
1906 		ib_device_put(ibdev);
1907 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1908 
1909 out_data:
1910 	put_device(data.cdev);
1911 out_nlmsg:
1912 	nlmsg_free(msg);
1913 out_put:
1914 	if (ibdev)
1915 		ib_device_put(ibdev);
1916 	return err;
1917 }
1918 
1919 static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1920 			      struct netlink_ext_ack *extack)
1921 {
1922 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1923 	struct sk_buff *msg;
1924 	int err;
1925 
1926 	err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1927 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
1928 	if (err)
1929 		return err;
1930 
1931 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1932 	if (!msg)
1933 		return -ENOMEM;
1934 
1935 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1936 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1937 					 RDMA_NLDEV_CMD_SYS_GET),
1938 			0, 0);
1939 	if (!nlh) {
1940 		nlmsg_free(msg);
1941 		return -EMSGSIZE;
1942 	}
1943 
1944 	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
1945 			 (u8)ib_devices_shared_netns);
1946 	if (err) {
1947 		nlmsg_free(msg);
1948 		return err;
1949 	}
1950 
1951 	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE,
1952 			 (u8)privileged_qkey);
1953 	if (err) {
1954 		nlmsg_free(msg);
1955 		return err;
1956 	}
1957 
1958 	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, 1);
1959 	if (err) {
1960 		nlmsg_free(msg);
1961 		return err;
1962 	}
1963 	/*
1964 	 * Copy-on-fork is supported.
1965 	 * See commits:
1966 	 * 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes")
1967 	 * 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm")
1968 	 * for more details. Don't backport this without them.
1969 	 *
1970 	 * Return value ignored on purpose, assume copy-on-fork is not
1971 	 * supported in case of failure.
1972 	 */
1973 	nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, 1);
1974 
1975 	nlmsg_end(msg, nlh);
1976 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1977 }
1978 
1979 static int nldev_set_sys_set_netns_doit(struct nlattr *tb[])
1980 {
1981 	u8 enable;
1982 	int err;
1983 
1984 	enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
1985 	/* Only 0 and 1 are supported */
1986 	if (enable > 1)
1987 		return -EINVAL;
1988 
1989 	err = rdma_compatdev_set(enable);
1990 	return err;
1991 }
1992 
1993 static int nldev_set_sys_set_pqkey_doit(struct nlattr *tb[])
1994 {
1995 	u8 enable;
1996 
1997 	enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE]);
1998 	/* Only 0 and 1 are supported */
1999 	if (enable > 1)
2000 		return -EINVAL;
2001 
2002 	privileged_qkey = enable;
2003 	return 0;
2004 }
2005 
2006 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2007 				  struct netlink_ext_ack *extack)
2008 {
2009 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2010 	int err;
2011 
2012 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2013 			  nldev_policy, extack);
2014 	if (err)
2015 		return -EINVAL;
2016 
2017 	if (tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
2018 		return nldev_set_sys_set_netns_doit(tb);
2019 
2020 	if (tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE])
2021 		return nldev_set_sys_set_pqkey_doit(tb);
2022 
2023 	return -EINVAL;
2024 }
2025 
2026 
2027 static int nldev_stat_set_mode_doit(struct sk_buff *msg,
2028 				    struct netlink_ext_ack *extack,
2029 				    struct nlattr *tb[],
2030 				    struct ib_device *device, u32 port)
2031 {
2032 	u32 mode, mask = 0, qpn, cntn = 0;
2033 	bool opcnt = false;
2034 	int ret;
2035 
2036 	/* Currently only counter for QP is supported */
2037 	if (!tb[RDMA_NLDEV_ATTR_STAT_RES] ||
2038 	    nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
2039 		return -EINVAL;
2040 
2041 	if (tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED])
2042 		opcnt = !!nla_get_u8(
2043 			tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED]);
2044 
2045 	mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
2046 	if (mode == RDMA_COUNTER_MODE_AUTO) {
2047 		if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
2048 			mask = nla_get_u32(
2049 				tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
2050 		return rdma_counter_set_auto_mode(device, port, mask, opcnt,
2051 						  extack);
2052 	}
2053 
2054 	if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
2055 		return -EINVAL;
2056 
2057 	qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
2058 	if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
2059 		cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
2060 		ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
2061 		if (ret)
2062 			return ret;
2063 	} else {
2064 		ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn);
2065 		if (ret)
2066 			return ret;
2067 	}
2068 
2069 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
2070 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
2071 		ret = -EMSGSIZE;
2072 		goto err_fill;
2073 	}
2074 
2075 	return 0;
2076 
2077 err_fill:
2078 	rdma_counter_unbind_qpn(device, port, qpn, cntn);
2079 	return ret;
2080 }
2081 
2082 static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[],
2083 					       struct ib_device *device,
2084 					       u32 port)
2085 {
2086 	struct rdma_hw_stats *stats;
2087 	struct nlattr *entry_attr;
2088 	unsigned long *target;
2089 	int rem, i, ret = 0;
2090 	u32 index;
2091 
2092 	stats = ib_get_hw_stats_port(device, port);
2093 	if (!stats)
2094 		return -EINVAL;
2095 
2096 	target = kcalloc(BITS_TO_LONGS(stats->num_counters),
2097 			 sizeof(*stats->is_disabled), GFP_KERNEL);
2098 	if (!target)
2099 		return -ENOMEM;
2100 
2101 	nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS],
2102 			    rem) {
2103 		index = nla_get_u32(entry_attr);
2104 		if ((index >= stats->num_counters) ||
2105 		    !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) {
2106 			ret = -EINVAL;
2107 			goto out;
2108 		}
2109 
2110 		set_bit(index, target);
2111 	}
2112 
2113 	for (i = 0; i < stats->num_counters; i++) {
2114 		if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL))
2115 			continue;
2116 
2117 		ret = rdma_counter_modify(device, port, i, test_bit(i, target));
2118 		if (ret)
2119 			goto out;
2120 	}
2121 
2122 out:
2123 	kfree(target);
2124 	return ret;
2125 }
2126 
2127 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2128 			       struct netlink_ext_ack *extack)
2129 {
2130 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2131 	struct ib_device *device;
2132 	struct sk_buff *msg;
2133 	u32 index, port;
2134 	int ret;
2135 
2136 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
2137 			  extack);
2138 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2139 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2140 		return -EINVAL;
2141 
2142 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2143 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2144 	if (!device)
2145 		return -EINVAL;
2146 
2147 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2148 	if (!rdma_is_port_valid(device, port)) {
2149 		ret = -EINVAL;
2150 		goto err_put_device;
2151 	}
2152 
2153 	if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] &&
2154 	    !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
2155 		ret = -EINVAL;
2156 		goto err_put_device;
2157 	}
2158 
2159 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2160 	if (!msg) {
2161 		ret = -ENOMEM;
2162 		goto err_put_device;
2163 	}
2164 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2165 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2166 					 RDMA_NLDEV_CMD_STAT_SET),
2167 			0, 0);
2168 	if (!nlh || fill_nldev_handle(msg, device) ||
2169 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
2170 		ret = -EMSGSIZE;
2171 		goto err_free_msg;
2172 	}
2173 
2174 	if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) {
2175 		ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port);
2176 		if (ret)
2177 			goto err_free_msg;
2178 	}
2179 
2180 	if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
2181 		ret = nldev_stat_set_counter_dynamic_doit(tb, device, port);
2182 		if (ret)
2183 			goto err_free_msg;
2184 	}
2185 
2186 	nlmsg_end(msg, nlh);
2187 	ib_device_put(device);
2188 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2189 
2190 err_free_msg:
2191 	nlmsg_free(msg);
2192 err_put_device:
2193 	ib_device_put(device);
2194 	return ret;
2195 }
2196 
2197 static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2198 			       struct netlink_ext_ack *extack)
2199 {
2200 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2201 	struct ib_device *device;
2202 	struct sk_buff *msg;
2203 	u32 index, port, qpn, cntn;
2204 	int ret;
2205 
2206 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2207 			  nldev_policy, extack);
2208 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
2209 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
2210 	    !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
2211 	    !tb[RDMA_NLDEV_ATTR_RES_LQPN])
2212 		return -EINVAL;
2213 
2214 	if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
2215 		return -EINVAL;
2216 
2217 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2218 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2219 	if (!device)
2220 		return -EINVAL;
2221 
2222 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2223 	if (!rdma_is_port_valid(device, port)) {
2224 		ret = -EINVAL;
2225 		goto err;
2226 	}
2227 
2228 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2229 	if (!msg) {
2230 		ret = -ENOMEM;
2231 		goto err;
2232 	}
2233 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2234 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2235 					 RDMA_NLDEV_CMD_STAT_SET),
2236 			0, 0);
2237 	if (!nlh) {
2238 		ret = -EMSGSIZE;
2239 		goto err_fill;
2240 	}
2241 
2242 	cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
2243 	qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
2244 	if (fill_nldev_handle(msg, device) ||
2245 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
2246 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
2247 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
2248 		ret = -EMSGSIZE;
2249 		goto err_fill;
2250 	}
2251 
2252 	ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
2253 	if (ret)
2254 		goto err_fill;
2255 
2256 	nlmsg_end(msg, nlh);
2257 	ib_device_put(device);
2258 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2259 
2260 err_fill:
2261 	nlmsg_free(msg);
2262 err:
2263 	ib_device_put(device);
2264 	return ret;
2265 }
2266 
2267 static noinline_for_stack int
2268 stat_get_doit_default_counter(struct sk_buff *skb, struct nlmsghdr *nlh,
2269 			      struct netlink_ext_ack *extack,
2270 			      struct nlattr *tb[])
2271 {
2272 	struct rdma_hw_stats *stats;
2273 	struct nlattr *table_attr;
2274 	struct ib_device *device;
2275 	int ret, num_cnts, i;
2276 	struct sk_buff *msg;
2277 	u32 index, port;
2278 	u64 v;
2279 
2280 	if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2281 		return -EINVAL;
2282 
2283 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2284 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2285 	if (!device)
2286 		return -EINVAL;
2287 
2288 	if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) {
2289 		ret = -EINVAL;
2290 		goto err;
2291 	}
2292 
2293 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2294 	stats = ib_get_hw_stats_port(device, port);
2295 	if (!stats) {
2296 		ret = -EINVAL;
2297 		goto err;
2298 	}
2299 
2300 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2301 	if (!msg) {
2302 		ret = -ENOMEM;
2303 		goto err;
2304 	}
2305 
2306 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2307 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2308 					 RDMA_NLDEV_CMD_STAT_GET),
2309 			0, 0);
2310 
2311 	if (!nlh || fill_nldev_handle(msg, device) ||
2312 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
2313 		ret = -EMSGSIZE;
2314 		goto err_msg;
2315 	}
2316 
2317 	mutex_lock(&stats->lock);
2318 
2319 	num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
2320 	if (num_cnts < 0) {
2321 		ret = -EINVAL;
2322 		goto err_stats;
2323 	}
2324 
2325 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
2326 	if (!table_attr) {
2327 		ret = -EMSGSIZE;
2328 		goto err_stats;
2329 	}
2330 	for (i = 0; i < num_cnts; i++) {
2331 		if (test_bit(i, stats->is_disabled))
2332 			continue;
2333 
2334 		v = stats->value[i] +
2335 			rdma_counter_get_hwstat_value(device, port, i);
2336 		if (rdma_nl_stat_hwcounter_entry(msg,
2337 						 stats->descs[i].name, v)) {
2338 			ret = -EMSGSIZE;
2339 			goto err_table;
2340 		}
2341 	}
2342 	nla_nest_end(msg, table_attr);
2343 
2344 	mutex_unlock(&stats->lock);
2345 	nlmsg_end(msg, nlh);
2346 	ib_device_put(device);
2347 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2348 
2349 err_table:
2350 	nla_nest_cancel(msg, table_attr);
2351 err_stats:
2352 	mutex_unlock(&stats->lock);
2353 err_msg:
2354 	nlmsg_free(msg);
2355 err:
2356 	ib_device_put(device);
2357 	return ret;
2358 }
2359 
2360 static noinline_for_stack int
2361 stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
2362 		 struct netlink_ext_ack *extack, struct nlattr *tb[])
2363 
2364 {
2365 	static enum rdma_nl_counter_mode mode;
2366 	static enum rdma_nl_counter_mask mask;
2367 	struct ib_device *device;
2368 	struct sk_buff *msg;
2369 	u32 index, port;
2370 	bool opcnt;
2371 	int ret;
2372 
2373 	if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
2374 		return nldev_res_get_counter_doit(skb, nlh, extack);
2375 
2376 	if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
2377 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2378 		return -EINVAL;
2379 
2380 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2381 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2382 	if (!device)
2383 		return -EINVAL;
2384 
2385 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2386 	if (!rdma_is_port_valid(device, port)) {
2387 		ret = -EINVAL;
2388 		goto err;
2389 	}
2390 
2391 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2392 	if (!msg) {
2393 		ret = -ENOMEM;
2394 		goto err;
2395 	}
2396 
2397 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2398 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2399 					 RDMA_NLDEV_CMD_STAT_GET),
2400 			0, 0);
2401 	if (!nlh) {
2402 		ret = -EMSGSIZE;
2403 		goto err_msg;
2404 	}
2405 
2406 	ret = rdma_counter_get_mode(device, port, &mode, &mask, &opcnt);
2407 	if (ret)
2408 		goto err_msg;
2409 
2410 	if (fill_nldev_handle(msg, device) ||
2411 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
2412 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) {
2413 		ret = -EMSGSIZE;
2414 		goto err_msg;
2415 	}
2416 
2417 	if ((mode == RDMA_COUNTER_MODE_AUTO) &&
2418 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) {
2419 		ret = -EMSGSIZE;
2420 		goto err_msg;
2421 	}
2422 
2423 	if ((mode == RDMA_COUNTER_MODE_AUTO) &&
2424 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED, opcnt)) {
2425 		ret = -EMSGSIZE;
2426 		goto err_msg;
2427 	}
2428 
2429 	nlmsg_end(msg, nlh);
2430 	ib_device_put(device);
2431 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2432 
2433 err_msg:
2434 	nlmsg_free(msg);
2435 err:
2436 	ib_device_put(device);
2437 	return ret;
2438 }
2439 
2440 static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2441 			       struct netlink_ext_ack *extack)
2442 {
2443 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2444 	int ret;
2445 
2446 	ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2447 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
2448 	if (ret)
2449 		return -EINVAL;
2450 
2451 	if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
2452 		return stat_get_doit_default_counter(skb, nlh, extack, tb);
2453 
2454 	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2455 	case RDMA_NLDEV_ATTR_RES_QP:
2456 		ret = stat_get_doit_qp(skb, nlh, extack, tb);
2457 		break;
2458 	case RDMA_NLDEV_ATTR_RES_MR:
2459 		ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR,
2460 					  fill_stat_mr_entry);
2461 		break;
2462 	default:
2463 		ret = -EINVAL;
2464 		break;
2465 	}
2466 
2467 	return ret;
2468 }
2469 
2470 static int nldev_stat_get_dumpit(struct sk_buff *skb,
2471 				 struct netlink_callback *cb)
2472 {
2473 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2474 	int ret;
2475 
2476 	ret = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2477 			    nldev_policy, NL_VALIDATE_LIBERAL, NULL);
2478 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
2479 		return -EINVAL;
2480 
2481 	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2482 	case RDMA_NLDEV_ATTR_RES_QP:
2483 		ret = nldev_res_get_counter_dumpit(skb, cb);
2484 		break;
2485 	case RDMA_NLDEV_ATTR_RES_MR:
2486 		ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR,
2487 					    fill_stat_mr_entry);
2488 		break;
2489 	default:
2490 		ret = -EINVAL;
2491 		break;
2492 	}
2493 
2494 	return ret;
2495 }
2496 
2497 static int nldev_stat_get_counter_status_doit(struct sk_buff *skb,
2498 					      struct nlmsghdr *nlh,
2499 					      struct netlink_ext_ack *extack)
2500 {
2501 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry;
2502 	struct rdma_hw_stats *stats;
2503 	struct ib_device *device;
2504 	struct sk_buff *msg;
2505 	u32 devid, port;
2506 	int ret, i;
2507 
2508 	ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2509 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
2510 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2511 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2512 		return -EINVAL;
2513 
2514 	devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2515 	device = ib_device_get_by_index(sock_net(skb->sk), devid);
2516 	if (!device)
2517 		return -EINVAL;
2518 
2519 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2520 	if (!rdma_is_port_valid(device, port)) {
2521 		ret = -EINVAL;
2522 		goto err;
2523 	}
2524 
2525 	stats = ib_get_hw_stats_port(device, port);
2526 	if (!stats) {
2527 		ret = -EINVAL;
2528 		goto err;
2529 	}
2530 
2531 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2532 	if (!msg) {
2533 		ret = -ENOMEM;
2534 		goto err;
2535 	}
2536 
2537 	nlh = nlmsg_put(
2538 		msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2539 		RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS),
2540 		0, 0);
2541 
2542 	ret = -EMSGSIZE;
2543 	if (!nlh || fill_nldev_handle(msg, device) ||
2544 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
2545 		goto err_msg;
2546 
2547 	table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
2548 	if (!table)
2549 		goto err_msg;
2550 
2551 	mutex_lock(&stats->lock);
2552 	for (i = 0; i < stats->num_counters; i++) {
2553 		entry = nla_nest_start(msg,
2554 				       RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
2555 		if (!entry)
2556 			goto err_msg_table;
2557 
2558 		if (nla_put_string(msg,
2559 				   RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
2560 				   stats->descs[i].name) ||
2561 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i))
2562 			goto err_msg_entry;
2563 
2564 		if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) &&
2565 		    (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC,
2566 				!test_bit(i, stats->is_disabled))))
2567 			goto err_msg_entry;
2568 
2569 		nla_nest_end(msg, entry);
2570 	}
2571 	mutex_unlock(&stats->lock);
2572 
2573 	nla_nest_end(msg, table);
2574 	nlmsg_end(msg, nlh);
2575 	ib_device_put(device);
2576 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2577 
2578 err_msg_entry:
2579 	nla_nest_cancel(msg, entry);
2580 err_msg_table:
2581 	mutex_unlock(&stats->lock);
2582 	nla_nest_cancel(msg, table);
2583 err_msg:
2584 	nlmsg_free(msg);
2585 err:
2586 	ib_device_put(device);
2587 	return ret;
2588 }
2589 
2590 static int nldev_newdev(struct sk_buff *skb, struct nlmsghdr *nlh,
2591 			struct netlink_ext_ack *extack)
2592 {
2593 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2594 	enum rdma_nl_dev_type type;
2595 	struct ib_device *parent;
2596 	char name[IFNAMSIZ] = {};
2597 	u32 parentid;
2598 	int ret;
2599 
2600 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2601 			  nldev_policy, extack);
2602 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2603 		!tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_DEV_TYPE])
2604 		return -EINVAL;
2605 
2606 	nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(name));
2607 	type = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_TYPE]);
2608 	parentid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2609 	parent = ib_device_get_by_index(sock_net(skb->sk), parentid);
2610 	if (!parent)
2611 		return -EINVAL;
2612 
2613 	ret = ib_add_sub_device(parent, type, name);
2614 	ib_device_put(parent);
2615 
2616 	return ret;
2617 }
2618 
2619 static int nldev_deldev(struct sk_buff *skb, struct nlmsghdr *nlh,
2620 			struct netlink_ext_ack *extack)
2621 {
2622 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2623 	struct ib_device *device;
2624 	u32 devid;
2625 	int ret;
2626 
2627 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2628 			  nldev_policy, extack);
2629 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
2630 		return -EINVAL;
2631 
2632 	devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2633 	device = ib_device_get_by_index(sock_net(skb->sk), devid);
2634 	if (!device)
2635 		return -EINVAL;
2636 
2637 	return ib_del_sub_device_and_put(device);
2638 }
2639 
2640 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
2641 	[RDMA_NLDEV_CMD_GET] = {
2642 		.doit = nldev_get_doit,
2643 		.dump = nldev_get_dumpit,
2644 	},
2645 	[RDMA_NLDEV_CMD_GET_CHARDEV] = {
2646 		.doit = nldev_get_chardev,
2647 	},
2648 	[RDMA_NLDEV_CMD_SET] = {
2649 		.doit = nldev_set_doit,
2650 		.flags = RDMA_NL_ADMIN_PERM,
2651 	},
2652 	[RDMA_NLDEV_CMD_NEWLINK] = {
2653 		.doit = nldev_newlink,
2654 		.flags = RDMA_NL_ADMIN_PERM,
2655 	},
2656 	[RDMA_NLDEV_CMD_DELLINK] = {
2657 		.doit = nldev_dellink,
2658 		.flags = RDMA_NL_ADMIN_PERM,
2659 	},
2660 	[RDMA_NLDEV_CMD_PORT_GET] = {
2661 		.doit = nldev_port_get_doit,
2662 		.dump = nldev_port_get_dumpit,
2663 	},
2664 	[RDMA_NLDEV_CMD_RES_GET] = {
2665 		.doit = nldev_res_get_doit,
2666 		.dump = nldev_res_get_dumpit,
2667 	},
2668 	[RDMA_NLDEV_CMD_RES_QP_GET] = {
2669 		.doit = nldev_res_get_qp_doit,
2670 		.dump = nldev_res_get_qp_dumpit,
2671 	},
2672 	[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
2673 		.doit = nldev_res_get_cm_id_doit,
2674 		.dump = nldev_res_get_cm_id_dumpit,
2675 	},
2676 	[RDMA_NLDEV_CMD_RES_CQ_GET] = {
2677 		.doit = nldev_res_get_cq_doit,
2678 		.dump = nldev_res_get_cq_dumpit,
2679 	},
2680 	[RDMA_NLDEV_CMD_RES_MR_GET] = {
2681 		.doit = nldev_res_get_mr_doit,
2682 		.dump = nldev_res_get_mr_dumpit,
2683 	},
2684 	[RDMA_NLDEV_CMD_RES_PD_GET] = {
2685 		.doit = nldev_res_get_pd_doit,
2686 		.dump = nldev_res_get_pd_dumpit,
2687 	},
2688 	[RDMA_NLDEV_CMD_RES_CTX_GET] = {
2689 		.doit = nldev_res_get_ctx_doit,
2690 		.dump = nldev_res_get_ctx_dumpit,
2691 	},
2692 	[RDMA_NLDEV_CMD_RES_SRQ_GET] = {
2693 		.doit = nldev_res_get_srq_doit,
2694 		.dump = nldev_res_get_srq_dumpit,
2695 	},
2696 	[RDMA_NLDEV_CMD_SYS_GET] = {
2697 		.doit = nldev_sys_get_doit,
2698 	},
2699 	[RDMA_NLDEV_CMD_SYS_SET] = {
2700 		.doit = nldev_set_sys_set_doit,
2701 		.flags = RDMA_NL_ADMIN_PERM,
2702 	},
2703 	[RDMA_NLDEV_CMD_STAT_SET] = {
2704 		.doit = nldev_stat_set_doit,
2705 		.flags = RDMA_NL_ADMIN_PERM,
2706 	},
2707 	[RDMA_NLDEV_CMD_STAT_GET] = {
2708 		.doit = nldev_stat_get_doit,
2709 		.dump = nldev_stat_get_dumpit,
2710 	},
2711 	[RDMA_NLDEV_CMD_STAT_DEL] = {
2712 		.doit = nldev_stat_del_doit,
2713 		.flags = RDMA_NL_ADMIN_PERM,
2714 	},
2715 	[RDMA_NLDEV_CMD_RES_QP_GET_RAW] = {
2716 		.doit = nldev_res_get_qp_raw_doit,
2717 		.dump = nldev_res_get_qp_raw_dumpit,
2718 		.flags = RDMA_NL_ADMIN_PERM,
2719 	},
2720 	[RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = {
2721 		.doit = nldev_res_get_cq_raw_doit,
2722 		.dump = nldev_res_get_cq_raw_dumpit,
2723 		.flags = RDMA_NL_ADMIN_PERM,
2724 	},
2725 	[RDMA_NLDEV_CMD_RES_MR_GET_RAW] = {
2726 		.doit = nldev_res_get_mr_raw_doit,
2727 		.dump = nldev_res_get_mr_raw_dumpit,
2728 		.flags = RDMA_NL_ADMIN_PERM,
2729 	},
2730 	[RDMA_NLDEV_CMD_RES_SRQ_GET_RAW] = {
2731 		.doit = nldev_res_get_srq_raw_doit,
2732 		.dump = nldev_res_get_srq_raw_dumpit,
2733 		.flags = RDMA_NL_ADMIN_PERM,
2734 	},
2735 	[RDMA_NLDEV_CMD_STAT_GET_STATUS] = {
2736 		.doit = nldev_stat_get_counter_status_doit,
2737 	},
2738 	[RDMA_NLDEV_CMD_NEWDEV] = {
2739 		.doit = nldev_newdev,
2740 		.flags = RDMA_NL_ADMIN_PERM,
2741 	},
2742 	[RDMA_NLDEV_CMD_DELDEV] = {
2743 		.doit = nldev_deldev,
2744 		.flags = RDMA_NL_ADMIN_PERM,
2745 	},
2746 };
2747 
2748 static int fill_mon_netdev_rename(struct sk_buff *msg,
2749 				  struct ib_device *device, u32 port,
2750 				  const struct net *net)
2751 {
2752 	struct net_device *netdev = ib_device_get_netdev(device, port);
2753 	int ret = 0;
2754 
2755 	if (!netdev || !net_eq(dev_net(netdev), net))
2756 		goto out;
2757 
2758 	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
2759 	if (ret)
2760 		goto out;
2761 	ret = nla_put_string(msg, RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
2762 out:
2763 	dev_put(netdev);
2764 	return ret;
2765 }
2766 
2767 static int fill_mon_netdev_association(struct sk_buff *msg,
2768 				       struct ib_device *device, u32 port,
2769 				       const struct net *net)
2770 {
2771 	struct net_device *netdev = ib_device_get_netdev(device, port);
2772 	int ret = 0;
2773 
2774 	if (netdev && !net_eq(dev_net(netdev), net))
2775 		goto out;
2776 
2777 	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index);
2778 	if (ret)
2779 		goto out;
2780 
2781 	ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
2782 			     dev_name(&device->dev));
2783 	if (ret)
2784 		goto out;
2785 
2786 	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port);
2787 	if (ret)
2788 		goto out;
2789 
2790 	if (netdev) {
2791 		ret = nla_put_u32(msg,
2792 				  RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
2793 		if (ret)
2794 			goto out;
2795 
2796 		ret = nla_put_string(msg,
2797 				     RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
2798 	}
2799 
2800 out:
2801 	dev_put(netdev);
2802 	return ret;
2803 }
2804 
2805 static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num,
2806 				    enum rdma_nl_notify_event_type type)
2807 {
2808 	struct net_device *netdev;
2809 
2810 	switch (type) {
2811 	case RDMA_REGISTER_EVENT:
2812 		dev_warn_ratelimited(&device->dev,
2813 				     "Failed to send RDMA monitor register device event\n");
2814 		break;
2815 	case RDMA_UNREGISTER_EVENT:
2816 		dev_warn_ratelimited(&device->dev,
2817 				     "Failed to send RDMA monitor unregister device event\n");
2818 		break;
2819 	case RDMA_NETDEV_ATTACH_EVENT:
2820 		netdev = ib_device_get_netdev(device, port_num);
2821 		dev_warn_ratelimited(&device->dev,
2822 				     "Failed to send RDMA monitor netdev attach event: port %d netdev %d\n",
2823 				     port_num, netdev->ifindex);
2824 		dev_put(netdev);
2825 		break;
2826 	case RDMA_NETDEV_DETACH_EVENT:
2827 		dev_warn_ratelimited(&device->dev,
2828 				     "Failed to send RDMA monitor netdev detach event: port %d\n",
2829 				     port_num);
2830 		break;
2831 	case RDMA_RENAME_EVENT:
2832 		dev_warn_ratelimited(&device->dev,
2833 				     "Failed to send RDMA monitor rename device event\n");
2834 		break;
2835 
2836 	case RDMA_NETDEV_RENAME_EVENT:
2837 		netdev = ib_device_get_netdev(device, port_num);
2838 		dev_warn_ratelimited(&device->dev,
2839 				     "Failed to send RDMA monitor netdev rename event: port %d netdev %d\n",
2840 				     port_num, netdev->ifindex);
2841 		dev_put(netdev);
2842 		break;
2843 	default:
2844 		break;
2845 	}
2846 }
2847 
2848 int rdma_nl_notify_event(struct ib_device *device, u32 port_num,
2849 			  enum rdma_nl_notify_event_type type)
2850 {
2851 	struct sk_buff *skb;
2852 	int ret = -EMSGSIZE;
2853 	struct net *net;
2854 	void *nlh;
2855 
2856 	net = read_pnet(&device->coredev.rdma_net);
2857 	if (!net)
2858 		return -EINVAL;
2859 
2860 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2861 	if (!skb)
2862 		return -ENOMEM;
2863 	nlh = nlmsg_put(skb, 0, 0,
2864 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR),
2865 			0, 0);
2866 	if (!nlh)
2867 		goto err_free;
2868 
2869 	switch (type) {
2870 	case RDMA_REGISTER_EVENT:
2871 	case RDMA_UNREGISTER_EVENT:
2872 	case RDMA_RENAME_EVENT:
2873 		ret = fill_nldev_handle(skb, device);
2874 		if (ret)
2875 			goto err_free;
2876 		break;
2877 	case RDMA_NETDEV_ATTACH_EVENT:
2878 	case RDMA_NETDEV_DETACH_EVENT:
2879 		ret = fill_mon_netdev_association(skb, device, port_num, net);
2880 		if (ret)
2881 			goto err_free;
2882 		break;
2883 	case RDMA_NETDEV_RENAME_EVENT:
2884 		ret = fill_mon_netdev_rename(skb, device, port_num, net);
2885 		if (ret)
2886 			goto err_free;
2887 		break;
2888 	default:
2889 		break;
2890 	}
2891 
2892 	ret = nla_put_u8(skb, RDMA_NLDEV_ATTR_EVENT_TYPE, type);
2893 	if (ret)
2894 		goto err_free;
2895 
2896 	nlmsg_end(skb, nlh);
2897 	ret = rdma_nl_multicast(net, skb, RDMA_NL_GROUP_NOTIFY, GFP_KERNEL);
2898 	if (ret && ret != -ESRCH) {
2899 		skb = NULL; /* skb is freed in the netlink send-op handling */
2900 		goto err_free;
2901 	}
2902 	return 0;
2903 
2904 err_free:
2905 	rdma_nl_notify_err_msg(device, port_num, type);
2906 	nlmsg_free(skb);
2907 	return ret;
2908 }
2909 
2910 void __init nldev_init(void)
2911 {
2912 	rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
2913 }
2914 
2915 void nldev_exit(void)
2916 {
2917 	rdma_nl_unregister(RDMA_NL_NLDEV);
2918 }
2919 
2920 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
2921