xref: /linux/drivers/infiniband/core/nldev.c (revision e01027cab38a1a52828eecff447ca5e015b20f92)
1 /*
2  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. Neither the names of the copyright holders nor the names of its
13  *    contributors may be used to endorse or promote products derived from
14  *    this software without specific prior written permission.
15  *
16  * Alternatively, this software may be distributed under the terms of the
17  * GNU General Public License ("GPL") version 2 as published by the Free
18  * Software Foundation.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <linux/module.h>
34 #include <linux/pid.h>
35 #include <linux/pid_namespace.h>
36 #include <linux/mutex.h>
37 #include <net/netlink.h>
38 #include <rdma/rdma_cm.h>
39 #include <rdma/rdma_netlink.h>
40 #include <rdma/frmr_pools.h>
41 
42 #include "core_priv.h"
43 #include "cma_priv.h"
44 #include "restrack.h"
45 #include "uverbs.h"
46 #include "frmr_pools.h"
47 
48 /*
49  * This determines whether a non-privileged user is allowed to specify a
50  * controlled QKEY or not, when true non-privileged user is allowed to specify
51  * a controlled QKEY.
52  */
53 static bool privileged_qkey;
54 
55 typedef int (*res_fill_func_t)(struct sk_buff*, bool,
56 			       struct rdma_restrack_entry*, uint32_t);
57 
58 /*
59  * Sort array elements by the netlink attribute name
60  */
61 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
62 	[RDMA_NLDEV_ATTR_CHARDEV]		= { .type = NLA_U64 },
63 	[RDMA_NLDEV_ATTR_CHARDEV_ABI]		= { .type = NLA_U64 },
64 	[RDMA_NLDEV_ATTR_CHARDEV_NAME]		= { .type = NLA_NUL_STRING,
65 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
66 	[RDMA_NLDEV_ATTR_CHARDEV_TYPE]		= { .type = NLA_NUL_STRING,
67 					.len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
68 	[RDMA_NLDEV_ATTR_DEV_DIM]               = { .type = NLA_U8 },
69 	[RDMA_NLDEV_ATTR_DEV_INDEX]		= { .type = NLA_U32 },
70 	[RDMA_NLDEV_ATTR_DEV_NAME]		= { .type = NLA_NUL_STRING,
71 					.len = IB_DEVICE_NAME_MAX },
72 	[RDMA_NLDEV_ATTR_DEV_NODE_TYPE]		= { .type = NLA_U8 },
73 	[RDMA_NLDEV_ATTR_DEV_PROTOCOL]		= { .type = NLA_NUL_STRING,
74 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
75 	[RDMA_NLDEV_ATTR_DRIVER]		= { .type = NLA_NESTED },
76 	[RDMA_NLDEV_ATTR_DRIVER_ENTRY]		= { .type = NLA_NESTED },
77 	[RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE]	= { .type = NLA_U8 },
78 	[RDMA_NLDEV_ATTR_DRIVER_STRING]		= { .type = NLA_NUL_STRING,
79 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
80 	[RDMA_NLDEV_ATTR_DRIVER_S32]		= { .type = NLA_S32 },
81 	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
82 	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
83 	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
84 	[RDMA_NLDEV_ATTR_FW_VERSION]		= { .type = NLA_NUL_STRING,
85 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
86 	[RDMA_NLDEV_ATTR_LID]			= { .type = NLA_U32 },
87 	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type = NLA_NUL_STRING,
88 					.len = IFNAMSIZ },
89 	[RDMA_NLDEV_ATTR_LMC]			= { .type = NLA_U8 },
90 	[RDMA_NLDEV_ATTR_NDEV_INDEX]		= { .type = NLA_U32 },
91 	[RDMA_NLDEV_ATTR_NDEV_NAME]		= { .type = NLA_NUL_STRING,
92 					.len = IFNAMSIZ },
93 	[RDMA_NLDEV_ATTR_NODE_GUID]		= { .type = NLA_U64 },
94 	[RDMA_NLDEV_ATTR_PORT_INDEX]		= { .type = NLA_U32 },
95 	[RDMA_NLDEV_ATTR_PORT_PHYS_STATE]	= { .type = NLA_U8 },
96 	[RDMA_NLDEV_ATTR_PORT_STATE]		= { .type = NLA_U8 },
97 	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type = NLA_NESTED },
98 	[RDMA_NLDEV_ATTR_RES_CM_IDN]		= { .type = NLA_U32 },
99 	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type = NLA_NESTED },
100 	[RDMA_NLDEV_ATTR_RES_CQ]		= { .type = NLA_NESTED },
101 	[RDMA_NLDEV_ATTR_RES_CQE]		= { .type = NLA_U32 },
102 	[RDMA_NLDEV_ATTR_RES_CQN]		= { .type = NLA_U32 },
103 	[RDMA_NLDEV_ATTR_RES_CQ_ENTRY]		= { .type = NLA_NESTED },
104 	[RDMA_NLDEV_ATTR_RES_CTX]		= { .type = NLA_NESTED },
105 	[RDMA_NLDEV_ATTR_RES_CTXN]		= { .type = NLA_U32 },
106 	[RDMA_NLDEV_ATTR_RES_CTX_ENTRY]		= { .type = NLA_NESTED },
107 	[RDMA_NLDEV_ATTR_RES_DST_ADDR]		= {
108 			.len = sizeof(struct __kernel_sockaddr_storage) },
109 	[RDMA_NLDEV_ATTR_RES_IOVA]		= { .type = NLA_U64 },
110 	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING,
111 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
112 	[RDMA_NLDEV_ATTR_RES_LKEY]		= { .type = NLA_U32 },
113 	[RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]	= { .type = NLA_U32 },
114 	[RDMA_NLDEV_ATTR_RES_LQPN]		= { .type = NLA_U32 },
115 	[RDMA_NLDEV_ATTR_RES_MR]		= { .type = NLA_NESTED },
116 	[RDMA_NLDEV_ATTR_RES_MRLEN]		= { .type = NLA_U64 },
117 	[RDMA_NLDEV_ATTR_RES_MRN]		= { .type = NLA_U32 },
118 	[RDMA_NLDEV_ATTR_RES_MR_ENTRY]		= { .type = NLA_NESTED },
119 	[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE]	= { .type = NLA_U8 },
120 	[RDMA_NLDEV_ATTR_RES_PD]		= { .type = NLA_NESTED },
121 	[RDMA_NLDEV_ATTR_RES_PDN]		= { .type = NLA_U32 },
122 	[RDMA_NLDEV_ATTR_RES_PD_ENTRY]		= { .type = NLA_NESTED },
123 	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
124 	[RDMA_NLDEV_ATTR_RES_POLL_CTX]		= { .type = NLA_U8 },
125 	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
126 	[RDMA_NLDEV_ATTR_RES_QP]		= { .type = NLA_NESTED },
127 	[RDMA_NLDEV_ATTR_RES_QP_ENTRY]		= { .type = NLA_NESTED },
128 	[RDMA_NLDEV_ATTR_RES_RAW]		= { .type = NLA_BINARY },
129 	[RDMA_NLDEV_ATTR_RES_RKEY]		= { .type = NLA_U32 },
130 	[RDMA_NLDEV_ATTR_RES_RQPN]		= { .type = NLA_U32 },
131 	[RDMA_NLDEV_ATTR_RES_RQ_PSN]		= { .type = NLA_U32 },
132 	[RDMA_NLDEV_ATTR_RES_SQ_PSN]		= { .type = NLA_U32 },
133 	[RDMA_NLDEV_ATTR_RES_SRC_ADDR]		= {
134 			.len = sizeof(struct __kernel_sockaddr_storage) },
135 	[RDMA_NLDEV_ATTR_RES_STATE]		= { .type = NLA_U8 },
136 	[RDMA_NLDEV_ATTR_RES_SUMMARY]		= { .type = NLA_NESTED },
137 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]	= { .type = NLA_NESTED },
138 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
139 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
140 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
141 	[RDMA_NLDEV_ATTR_RES_TYPE]		= { .type = NLA_U8 },
142 	[RDMA_NLDEV_ATTR_RES_SUBTYPE]		= { .type = NLA_NUL_STRING,
143 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
144 	[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
145 	[RDMA_NLDEV_ATTR_RES_USECNT]		= { .type = NLA_U64 },
146 	[RDMA_NLDEV_ATTR_RES_SRQ]		= { .type = NLA_NESTED },
147 	[RDMA_NLDEV_ATTR_RES_SRQN]		= { .type = NLA_U32 },
148 	[RDMA_NLDEV_ATTR_RES_SRQ_ENTRY]		= { .type = NLA_NESTED },
149 	[RDMA_NLDEV_ATTR_MIN_RANGE]		= { .type = NLA_U32 },
150 	[RDMA_NLDEV_ATTR_MAX_RANGE]		= { .type = NLA_U32 },
151 	[RDMA_NLDEV_ATTR_SM_LID]		= { .type = NLA_U32 },
152 	[RDMA_NLDEV_ATTR_SUBNET_PREFIX]		= { .type = NLA_U64 },
153 	[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]	= { .type = NLA_U32 },
154 	[RDMA_NLDEV_ATTR_STAT_MODE]		= { .type = NLA_U32 },
155 	[RDMA_NLDEV_ATTR_STAT_RES]		= { .type = NLA_U32 },
156 	[RDMA_NLDEV_ATTR_STAT_COUNTER]		= { .type = NLA_NESTED },
157 	[RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY]	= { .type = NLA_NESTED },
158 	[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]       = { .type = NLA_U32 },
159 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]       = { .type = NLA_NESTED },
160 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY]  = { .type = NLA_NESTED },
161 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
162 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
163 	[RDMA_NLDEV_ATTR_SYS_IMAGE_GUID]	= { .type = NLA_U64 },
164 	[RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID]	= { .type = NLA_U32 },
165 	[RDMA_NLDEV_NET_NS_FD]			= { .type = NLA_U32 },
166 	[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]	= { .type = NLA_U8 },
167 	[RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK]	= { .type = NLA_U8 },
168 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX]	= { .type = NLA_U32 },
169 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 },
170 	[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE] = { .type = NLA_U8 },
171 	[RDMA_NLDEV_ATTR_DRIVER_DETAILS]	= { .type = NLA_U8 },
172 	[RDMA_NLDEV_ATTR_DEV_TYPE]		= { .type = NLA_U8 },
173 	[RDMA_NLDEV_ATTR_PARENT_NAME]		= { .type = NLA_NUL_STRING },
174 	[RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE]	= { .type = NLA_U8 },
175 	[RDMA_NLDEV_ATTR_EVENT_TYPE]		= { .type = NLA_U8 },
176 	[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED] = { .type = NLA_U8 },
177 	[RDMA_NLDEV_ATTR_FRMR_POOLS]		= { .type = NLA_NESTED },
178 	[RDMA_NLDEV_ATTR_FRMR_POOL_ENTRY]	= { .type = NLA_NESTED },
179 	[RDMA_NLDEV_ATTR_FRMR_POOL_KEY]		= { .type = NLA_NESTED },
180 	[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS]	= { .type = NLA_U8 },
181 	[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS] = { .type = NLA_U32 },
182 	[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY] = { .type = NLA_U64 },
183 	[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS] = { .type = NLA_U64 },
184 	[RDMA_NLDEV_ATTR_FRMR_POOL_QUEUE_HANDLES] = { .type = NLA_U32 },
185 	[RDMA_NLDEV_ATTR_FRMR_POOL_MAX_IN_USE]	= { .type = NLA_U64 },
186 	[RDMA_NLDEV_ATTR_FRMR_POOL_IN_USE]	= { .type = NLA_U64 },
187 	[RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD] = { .type = NLA_U32 },
188 	[RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES] = { .type = NLA_U32 },
189 	[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY] = { .type = NLA_U64 },
190 };
191 
192 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
193 				      enum rdma_nldev_print_type print_type)
194 {
195 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
196 		return -EMSGSIZE;
197 	if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
198 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
199 		return -EMSGSIZE;
200 
201 	return 0;
202 }
203 
204 static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
205 				   enum rdma_nldev_print_type print_type,
206 				   u32 value)
207 {
208 	if (put_driver_name_print_type(msg, name, print_type))
209 		return -EMSGSIZE;
210 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
211 		return -EMSGSIZE;
212 
213 	return 0;
214 }
215 
216 static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
217 				   enum rdma_nldev_print_type print_type,
218 				   u64 value)
219 {
220 	if (put_driver_name_print_type(msg, name, print_type))
221 		return -EMSGSIZE;
222 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
223 			      RDMA_NLDEV_ATTR_PAD))
224 		return -EMSGSIZE;
225 
226 	return 0;
227 }
228 
229 int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name,
230 			      const char *str)
231 {
232 	if (put_driver_name_print_type(msg, name,
233 				       RDMA_NLDEV_PRINT_TYPE_UNSPEC))
234 		return -EMSGSIZE;
235 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str))
236 		return -EMSGSIZE;
237 
238 	return 0;
239 }
240 EXPORT_SYMBOL(rdma_nl_put_driver_string);
241 
242 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
243 {
244 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
245 				       value);
246 }
247 EXPORT_SYMBOL(rdma_nl_put_driver_u32);
248 
249 int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
250 			       u32 value)
251 {
252 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
253 				       value);
254 }
255 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
256 
257 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
258 {
259 	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
260 				       value);
261 }
262 EXPORT_SYMBOL(rdma_nl_put_driver_u64);
263 
264 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
265 {
266 	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
267 				       value);
268 }
269 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
270 
271 bool rdma_nl_get_privileged_qkey(void)
272 {
273 	return privileged_qkey;
274 }
275 EXPORT_SYMBOL(rdma_nl_get_privileged_qkey);
276 
277 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
278 {
279 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
280 		return -EMSGSIZE;
281 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
282 			   dev_name(&device->dev)))
283 		return -EMSGSIZE;
284 
285 	return 0;
286 }
287 
288 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
289 {
290 	char fw[IB_FW_VERSION_NAME_MAX];
291 	int ret = 0;
292 	u32 port;
293 
294 	if (fill_nldev_handle(msg, device))
295 		return -EMSGSIZE;
296 
297 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
298 		return -EMSGSIZE;
299 
300 	BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
301 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
302 			      device->attrs.device_cap_flags,
303 			      RDMA_NLDEV_ATTR_PAD))
304 		return -EMSGSIZE;
305 
306 	ib_get_device_fw_str(device, fw);
307 	/* Device without FW has strlen(fw) = 0 */
308 	if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
309 		return -EMSGSIZE;
310 
311 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
312 			      be64_to_cpu(device->node_guid),
313 			      RDMA_NLDEV_ATTR_PAD))
314 		return -EMSGSIZE;
315 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
316 			      be64_to_cpu(device->attrs.sys_image_guid),
317 			      RDMA_NLDEV_ATTR_PAD))
318 		return -EMSGSIZE;
319 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
320 		return -EMSGSIZE;
321 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim))
322 		return -EMSGSIZE;
323 
324 	if (device->type &&
325 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_TYPE, device->type))
326 		return -EMSGSIZE;
327 
328 	if (device->parent &&
329 	    nla_put_string(msg, RDMA_NLDEV_ATTR_PARENT_NAME,
330 			   dev_name(&device->parent->dev)))
331 		return -EMSGSIZE;
332 
333 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE,
334 		       device->name_assign_type))
335 		return -EMSGSIZE;
336 
337 	/*
338 	 * Link type is determined on first port and mlx4 device
339 	 * which can potentially have two different link type for the same
340 	 * IB device is considered as better to be avoided in the future,
341 	 */
342 	port = rdma_start_port(device);
343 	if (rdma_cap_opa_mad(device, port))
344 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
345 	else if (rdma_protocol_ib(device, port))
346 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
347 	else if (rdma_protocol_iwarp(device, port))
348 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
349 	else if (rdma_protocol_roce(device, port))
350 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
351 	else if (rdma_protocol_usnic(device, port))
352 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
353 				     "usnic");
354 	return ret;
355 }
356 
357 static int fill_port_info(struct sk_buff *msg,
358 			  struct ib_device *device, u32 port,
359 			  const struct net *net)
360 {
361 	struct net_device *netdev = NULL;
362 	struct ib_port_attr attr;
363 	int ret;
364 	u64 cap_flags = 0;
365 
366 	if (fill_nldev_handle(msg, device))
367 		return -EMSGSIZE;
368 
369 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
370 		return -EMSGSIZE;
371 
372 	ret = ib_query_port(device, port, &attr);
373 	if (ret)
374 		return ret;
375 
376 	if (rdma_protocol_ib(device, port)) {
377 		BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
378 				sizeof(attr.port_cap_flags2)) > sizeof(u64));
379 		cap_flags = attr.port_cap_flags |
380 			((u64)attr.port_cap_flags2 << 32);
381 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
382 				      cap_flags, RDMA_NLDEV_ATTR_PAD))
383 			return -EMSGSIZE;
384 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
385 				      attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
386 			return -EMSGSIZE;
387 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
388 			return -EMSGSIZE;
389 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
390 			return -EMSGSIZE;
391 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
392 			return -EMSGSIZE;
393 	}
394 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
395 		return -EMSGSIZE;
396 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
397 		return -EMSGSIZE;
398 
399 	netdev = ib_device_get_netdev(device, port);
400 	if (netdev && net_eq(dev_net(netdev), net)) {
401 		ret = nla_put_u32(msg,
402 				  RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
403 		if (ret)
404 			goto out;
405 		ret = nla_put_string(msg,
406 				     RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
407 	}
408 
409 out:
410 	dev_put(netdev);
411 	return ret;
412 }
413 
414 static int fill_res_info_entry(struct sk_buff *msg,
415 			       const char *name, u64 curr)
416 {
417 	struct nlattr *entry_attr;
418 
419 	entry_attr = nla_nest_start_noflag(msg,
420 					   RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
421 	if (!entry_attr)
422 		return -EMSGSIZE;
423 
424 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
425 		goto err;
426 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
427 			      RDMA_NLDEV_ATTR_PAD))
428 		goto err;
429 
430 	nla_nest_end(msg, entry_attr);
431 	return 0;
432 
433 err:
434 	nla_nest_cancel(msg, entry_attr);
435 	return -EMSGSIZE;
436 }
437 
438 static int fill_res_info(struct sk_buff *msg, struct ib_device *device,
439 			 bool show_details)
440 {
441 	static const char * const names[RDMA_RESTRACK_MAX] = {
442 		[RDMA_RESTRACK_PD] = "pd",
443 		[RDMA_RESTRACK_CQ] = "cq",
444 		[RDMA_RESTRACK_QP] = "qp",
445 		[RDMA_RESTRACK_CM_ID] = "cm_id",
446 		[RDMA_RESTRACK_MR] = "mr",
447 		[RDMA_RESTRACK_CTX] = "ctx",
448 		[RDMA_RESTRACK_SRQ] = "srq",
449 	};
450 
451 	struct nlattr *table_attr;
452 	int ret, i, curr;
453 
454 	if (fill_nldev_handle(msg, device))
455 		return -EMSGSIZE;
456 
457 	table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
458 	if (!table_attr)
459 		return -EMSGSIZE;
460 
461 	for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
462 		if (!names[i])
463 			continue;
464 		curr = rdma_restrack_count(device, i, show_details);
465 		ret = fill_res_info_entry(msg, names[i], curr);
466 		if (ret)
467 			goto err;
468 	}
469 
470 	nla_nest_end(msg, table_attr);
471 	return 0;
472 
473 err:
474 	nla_nest_cancel(msg, table_attr);
475 	return ret;
476 }
477 
478 static int fill_res_name_pid(struct sk_buff *msg,
479 			     struct rdma_restrack_entry *res)
480 {
481 	int err = 0;
482 
483 	/*
484 	 * For user resources, user is should read /proc/PID/comm to get the
485 	 * name of the task file.
486 	 */
487 	if (rdma_is_kernel_res(res)) {
488 		err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
489 				     res->kern_name);
490 	} else {
491 		pid_t pid;
492 
493 		pid = task_pid_vnr(res->task);
494 		/*
495 		 * Task is dead and in zombie state.
496 		 * There is no need to print PID anymore.
497 		 */
498 		if (pid)
499 			/*
500 			 * This part is racy, task can be killed and PID will
501 			 * be zero right here but it is ok, next query won't
502 			 * return PID. We don't promise real-time reflection
503 			 * of SW objects.
504 			 */
505 			err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid);
506 	}
507 
508 	return err ? -EMSGSIZE : 0;
509 }
510 
511 static int fill_res_qp_entry_query(struct sk_buff *msg,
512 				   struct rdma_restrack_entry *res,
513 				   struct ib_device *dev,
514 				   struct ib_qp *qp)
515 {
516 	struct ib_qp_init_attr qp_init_attr;
517 	struct ib_qp_attr qp_attr;
518 	int ret;
519 
520 	ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
521 	if (ret)
522 		return ret;
523 
524 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
525 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
526 				qp_attr.dest_qp_num))
527 			goto err;
528 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
529 				qp_attr.rq_psn))
530 			goto err;
531 	}
532 
533 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
534 		goto err;
535 
536 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
537 	    qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
538 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
539 			       qp_attr.path_mig_state))
540 			goto err;
541 	}
542 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
543 		goto err;
544 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
545 		goto err;
546 
547 	if (dev->ops.fill_res_qp_entry)
548 		return dev->ops.fill_res_qp_entry(msg, qp);
549 	return 0;
550 
551 err:	return -EMSGSIZE;
552 }
553 
554 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
555 			     struct rdma_restrack_entry *res, uint32_t port)
556 {
557 	struct ib_qp *qp = container_of(res, struct ib_qp, res);
558 	struct ib_device *dev = qp->device;
559 	int ret;
560 
561 	if (port && port != qp->port)
562 		return -EAGAIN;
563 
564 	/* In create_qp() port is not set yet */
565 	if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
566 		return -EMSGSIZE;
567 
568 	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
569 	if (ret)
570 		return -EMSGSIZE;
571 
572 	if (!rdma_is_kernel_res(res) &&
573 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
574 		return -EMSGSIZE;
575 
576 	ret = fill_res_name_pid(msg, res);
577 	if (ret)
578 		return -EMSGSIZE;
579 
580 	return fill_res_qp_entry_query(msg, res, dev, qp);
581 }
582 
583 static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
584 				 struct rdma_restrack_entry *res, uint32_t port)
585 {
586 	struct ib_qp *qp = container_of(res, struct ib_qp, res);
587 	struct ib_device *dev = qp->device;
588 
589 	if (port && port != qp->port)
590 		return -EAGAIN;
591 	if (!dev->ops.fill_res_qp_entry_raw)
592 		return -EINVAL;
593 	return dev->ops.fill_res_qp_entry_raw(msg, qp);
594 }
595 
596 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
597 				struct rdma_restrack_entry *res, uint32_t port)
598 {
599 	struct rdma_id_private *id_priv =
600 				container_of(res, struct rdma_id_private, res);
601 	struct ib_device *dev = id_priv->id.device;
602 	struct rdma_cm_id *cm_id = &id_priv->id;
603 
604 	if (port && port != cm_id->port_num)
605 		return -EAGAIN;
606 
607 	if (cm_id->port_num &&
608 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
609 		goto err;
610 
611 	if (id_priv->qp_num) {
612 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
613 			goto err;
614 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
615 			goto err;
616 	}
617 
618 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
619 		goto err;
620 
621 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
622 		goto err;
623 
624 	if (cm_id->route.addr.src_addr.ss_family &&
625 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
626 		    sizeof(cm_id->route.addr.src_addr),
627 		    &cm_id->route.addr.src_addr))
628 		goto err;
629 	if (cm_id->route.addr.dst_addr.ss_family &&
630 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
631 		    sizeof(cm_id->route.addr.dst_addr),
632 		    &cm_id->route.addr.dst_addr))
633 		goto err;
634 
635 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
636 		goto err;
637 
638 	if (fill_res_name_pid(msg, res))
639 		goto err;
640 
641 	if (dev->ops.fill_res_cm_id_entry)
642 		return dev->ops.fill_res_cm_id_entry(msg, cm_id);
643 	return 0;
644 
645 err: return -EMSGSIZE;
646 }
647 
648 static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
649 			     struct rdma_restrack_entry *res, uint32_t port)
650 {
651 	struct ib_cq *cq = container_of(res, struct ib_cq, res);
652 	struct ib_device *dev = cq->device;
653 
654 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
655 		return -EMSGSIZE;
656 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
657 			      atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
658 		return -EMSGSIZE;
659 
660 	/* Poll context is only valid for kernel CQs */
661 	if (rdma_is_kernel_res(res) &&
662 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
663 		return -EMSGSIZE;
664 
665 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
666 		return -EMSGSIZE;
667 
668 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
669 		return -EMSGSIZE;
670 	if (!rdma_is_kernel_res(res) &&
671 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
672 			cq->uobject->uevent.uobject.context->res.id))
673 		return -EMSGSIZE;
674 
675 	if (fill_res_name_pid(msg, res))
676 		return -EMSGSIZE;
677 
678 	return (dev->ops.fill_res_cq_entry) ?
679 		dev->ops.fill_res_cq_entry(msg, cq) : 0;
680 }
681 
682 static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
683 				 struct rdma_restrack_entry *res, uint32_t port)
684 {
685 	struct ib_cq *cq = container_of(res, struct ib_cq, res);
686 	struct ib_device *dev = cq->device;
687 
688 	if (!dev->ops.fill_res_cq_entry_raw)
689 		return -EINVAL;
690 	return dev->ops.fill_res_cq_entry_raw(msg, cq);
691 }
692 
693 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
694 			     struct rdma_restrack_entry *res, uint32_t port)
695 {
696 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
697 	struct ib_device *dev = mr->pd->device;
698 
699 	if (has_cap_net_admin) {
700 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
701 			return -EMSGSIZE;
702 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
703 			return -EMSGSIZE;
704 	}
705 
706 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
707 			      RDMA_NLDEV_ATTR_PAD))
708 		return -EMSGSIZE;
709 
710 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
711 		return -EMSGSIZE;
712 
713 	if (!rdma_is_kernel_res(res) &&
714 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
715 		return -EMSGSIZE;
716 
717 	if (fill_res_name_pid(msg, res))
718 		return -EMSGSIZE;
719 
720 	return (dev->ops.fill_res_mr_entry) ?
721 		       dev->ops.fill_res_mr_entry(msg, mr) :
722 		       0;
723 }
724 
725 static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
726 				 struct rdma_restrack_entry *res, uint32_t port)
727 {
728 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
729 	struct ib_device *dev = mr->pd->device;
730 
731 	if (!dev->ops.fill_res_mr_entry_raw)
732 		return -EINVAL;
733 	return dev->ops.fill_res_mr_entry_raw(msg, mr);
734 }
735 
736 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
737 			     struct rdma_restrack_entry *res, uint32_t port)
738 {
739 	struct ib_pd *pd = container_of(res, struct ib_pd, res);
740 
741 	if (has_cap_net_admin) {
742 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
743 				pd->local_dma_lkey))
744 			goto err;
745 		if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
746 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
747 				pd->unsafe_global_rkey))
748 			goto err;
749 	}
750 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
751 			      atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
752 		goto err;
753 
754 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
755 		goto err;
756 
757 	if (!rdma_is_kernel_res(res) &&
758 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
759 			pd->uobject->context->res.id))
760 		goto err;
761 
762 	return fill_res_name_pid(msg, res);
763 
764 err:	return -EMSGSIZE;
765 }
766 
767 static int fill_res_ctx_entry(struct sk_buff *msg, bool has_cap_net_admin,
768 			      struct rdma_restrack_entry *res, uint32_t port)
769 {
770 	struct ib_ucontext *ctx = container_of(res, struct ib_ucontext, res);
771 
772 	if (rdma_is_kernel_res(res))
773 		return 0;
774 
775 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, ctx->res.id))
776 		return -EMSGSIZE;
777 
778 	return fill_res_name_pid(msg, res);
779 }
780 
781 static int fill_res_range_qp_entry(struct sk_buff *msg, uint32_t min_range,
782 				   uint32_t max_range)
783 {
784 	struct nlattr *entry_attr;
785 
786 	if (!min_range)
787 		return 0;
788 
789 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
790 	if (!entry_attr)
791 		return -EMSGSIZE;
792 
793 	if (min_range == max_range) {
794 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, min_range))
795 			goto err;
796 	} else {
797 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MIN_RANGE, min_range))
798 			goto err;
799 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MAX_RANGE, max_range))
800 			goto err;
801 	}
802 	nla_nest_end(msg, entry_attr);
803 	return 0;
804 
805 err:
806 	nla_nest_cancel(msg, entry_attr);
807 	return -EMSGSIZE;
808 }
809 
810 static int fill_res_srq_qps(struct sk_buff *msg, struct ib_srq *srq)
811 {
812 	uint32_t min_range = 0, prev = 0;
813 	struct rdma_restrack_entry *res;
814 	struct rdma_restrack_root *rt;
815 	struct nlattr *table_attr;
816 	struct ib_qp *qp = NULL;
817 	unsigned long id = 0;
818 
819 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
820 	if (!table_attr)
821 		return -EMSGSIZE;
822 
823 	rt = &srq->device->res[RDMA_RESTRACK_QP];
824 	xa_lock(&rt->xa);
825 	xa_for_each(&rt->xa, id, res) {
826 		if (!rdma_restrack_get(res))
827 			continue;
828 
829 		qp = container_of(res, struct ib_qp, res);
830 		if (!qp->srq || (qp->srq->res.id != srq->res.id)) {
831 			rdma_restrack_put(res);
832 			continue;
833 		}
834 
835 		if (qp->qp_num < prev)
836 			/* qp_num should be ascending */
837 			goto err_loop;
838 
839 		if (min_range == 0) {
840 			min_range = qp->qp_num;
841 		} else if (qp->qp_num > (prev + 1)) {
842 			if (fill_res_range_qp_entry(msg, min_range, prev))
843 				goto err_loop;
844 
845 			min_range = qp->qp_num;
846 		}
847 		prev = qp->qp_num;
848 		rdma_restrack_put(res);
849 	}
850 
851 	xa_unlock(&rt->xa);
852 
853 	if (fill_res_range_qp_entry(msg, min_range, prev))
854 		goto err;
855 
856 	nla_nest_end(msg, table_attr);
857 	return 0;
858 
859 err_loop:
860 	rdma_restrack_put(res);
861 	xa_unlock(&rt->xa);
862 err:
863 	nla_nest_cancel(msg, table_attr);
864 	return -EMSGSIZE;
865 }
866 
867 static int fill_res_srq_entry(struct sk_buff *msg, bool has_cap_net_admin,
868 			      struct rdma_restrack_entry *res, uint32_t port)
869 {
870 	struct ib_srq *srq = container_of(res, struct ib_srq, res);
871 	struct ib_device *dev = srq->device;
872 
873 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SRQN, srq->res.id))
874 		goto err;
875 
876 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, srq->srq_type))
877 		goto err;
878 
879 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, srq->pd->res.id))
880 		goto err;
881 
882 	if (ib_srq_has_cq(srq->srq_type)) {
883 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN,
884 				srq->ext.cq->res.id))
885 			goto err;
886 	}
887 
888 	if (fill_res_srq_qps(msg, srq))
889 		goto err;
890 
891 	if (fill_res_name_pid(msg, res))
892 		goto err;
893 
894 	if (dev->ops.fill_res_srq_entry)
895 		return dev->ops.fill_res_srq_entry(msg, srq);
896 
897 	return 0;
898 
899 err:
900 	return -EMSGSIZE;
901 }
902 
903 static int fill_res_srq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
904 				 struct rdma_restrack_entry *res, uint32_t port)
905 {
906 	struct ib_srq *srq = container_of(res, struct ib_srq, res);
907 	struct ib_device *dev = srq->device;
908 
909 	if (!dev->ops.fill_res_srq_entry_raw)
910 		return -EINVAL;
911 	return dev->ops.fill_res_srq_entry_raw(msg, srq);
912 }
913 
914 static int fill_stat_counter_mode(struct sk_buff *msg,
915 				  struct rdma_counter *counter)
916 {
917 	struct rdma_counter_mode *m = &counter->mode;
918 
919 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
920 		return -EMSGSIZE;
921 
922 	if (m->mode == RDMA_COUNTER_MODE_AUTO) {
923 		if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
924 		    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
925 			return -EMSGSIZE;
926 
927 		if ((m->mask & RDMA_COUNTER_MASK_PID) &&
928 		    fill_res_name_pid(msg, &counter->res))
929 			return -EMSGSIZE;
930 	}
931 
932 	return 0;
933 }
934 
935 static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
936 {
937 	struct nlattr *entry_attr;
938 
939 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
940 	if (!entry_attr)
941 		return -EMSGSIZE;
942 
943 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
944 		goto err;
945 
946 	nla_nest_end(msg, entry_attr);
947 	return 0;
948 
949 err:
950 	nla_nest_cancel(msg, entry_attr);
951 	return -EMSGSIZE;
952 }
953 
954 static int fill_stat_counter_qps(struct sk_buff *msg,
955 				 struct rdma_counter *counter)
956 {
957 	struct rdma_restrack_entry *res;
958 	struct rdma_restrack_root *rt;
959 	struct nlattr *table_attr;
960 	struct ib_qp *qp = NULL;
961 	unsigned long id = 0;
962 	int ret = 0;
963 
964 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
965 	if (!table_attr)
966 		return -EMSGSIZE;
967 
968 	rt = &counter->device->res[RDMA_RESTRACK_QP];
969 	xa_lock(&rt->xa);
970 	xa_for_each(&rt->xa, id, res) {
971 		qp = container_of(res, struct ib_qp, res);
972 		if (!qp->counter || (qp->counter->id != counter->id))
973 			continue;
974 
975 		ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
976 		if (ret)
977 			goto err;
978 	}
979 
980 	xa_unlock(&rt->xa);
981 	nla_nest_end(msg, table_attr);
982 	return 0;
983 
984 err:
985 	xa_unlock(&rt->xa);
986 	nla_nest_cancel(msg, table_attr);
987 	return ret;
988 }
989 
990 int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
991 				 u64 value)
992 {
993 	struct nlattr *entry_attr;
994 
995 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
996 	if (!entry_attr)
997 		return -EMSGSIZE;
998 
999 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
1000 			   name))
1001 		goto err;
1002 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
1003 			      value, RDMA_NLDEV_ATTR_PAD))
1004 		goto err;
1005 
1006 	nla_nest_end(msg, entry_attr);
1007 	return 0;
1008 
1009 err:
1010 	nla_nest_cancel(msg, entry_attr);
1011 	return -EMSGSIZE;
1012 }
1013 EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry);
1014 
1015 static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
1016 			      struct rdma_restrack_entry *res, uint32_t port)
1017 {
1018 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
1019 	struct ib_device *dev = mr->pd->device;
1020 
1021 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
1022 		goto err;
1023 
1024 	if (dev->ops.fill_stat_mr_entry)
1025 		return dev->ops.fill_stat_mr_entry(msg, mr);
1026 	return 0;
1027 
1028 err:
1029 	return -EMSGSIZE;
1030 }
1031 
1032 static int fill_stat_counter_hwcounters(struct sk_buff *msg,
1033 					struct rdma_counter *counter)
1034 {
1035 	struct rdma_hw_stats *st = counter->stats;
1036 	struct nlattr *table_attr;
1037 	int i;
1038 
1039 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
1040 	if (!table_attr)
1041 		return -EMSGSIZE;
1042 
1043 	mutex_lock(&st->lock);
1044 	for (i = 0; i < st->num_counters; i++) {
1045 		if (test_bit(i, st->is_disabled))
1046 			continue;
1047 		if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name,
1048 						 st->value[i]))
1049 			goto err;
1050 	}
1051 	mutex_unlock(&st->lock);
1052 
1053 	nla_nest_end(msg, table_attr);
1054 	return 0;
1055 
1056 err:
1057 	mutex_unlock(&st->lock);
1058 	nla_nest_cancel(msg, table_attr);
1059 	return -EMSGSIZE;
1060 }
1061 
1062 static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
1063 				  struct rdma_restrack_entry *res,
1064 				  uint32_t port)
1065 {
1066 	struct rdma_counter *counter =
1067 		container_of(res, struct rdma_counter, res);
1068 
1069 	if (port && port != counter->port)
1070 		return -EAGAIN;
1071 
1072 	/* Dump it even query failed */
1073 	rdma_counter_query_stats(counter);
1074 
1075 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
1076 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
1077 	    fill_stat_counter_mode(msg, counter) ||
1078 	    fill_stat_counter_qps(msg, counter) ||
1079 	    fill_stat_counter_hwcounters(msg, counter))
1080 		return -EMSGSIZE;
1081 
1082 	return 0;
1083 }
1084 
1085 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1086 			  struct netlink_ext_ack *extack)
1087 {
1088 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1089 	struct ib_device *device;
1090 	struct sk_buff *msg;
1091 	u32 index;
1092 	int err;
1093 
1094 	err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1095 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
1096 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1097 		return -EINVAL;
1098 
1099 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1100 
1101 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1102 	if (!device)
1103 		return -EINVAL;
1104 
1105 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1106 	if (!msg) {
1107 		err = -ENOMEM;
1108 		goto err;
1109 	}
1110 
1111 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1112 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1113 			0, 0);
1114 	if (!nlh) {
1115 		err = -EMSGSIZE;
1116 		goto err_free;
1117 	}
1118 
1119 	err = fill_dev_info(msg, device);
1120 	if (err)
1121 		goto err_free;
1122 
1123 	nlmsg_end(msg, nlh);
1124 
1125 	ib_device_put(device);
1126 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1127 
1128 err_free:
1129 	nlmsg_free(msg);
1130 err:
1131 	ib_device_put(device);
1132 	return err;
1133 }
1134 
1135 static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1136 			  struct netlink_ext_ack *extack)
1137 {
1138 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1139 	struct ib_device *device;
1140 	u32 index;
1141 	int err;
1142 
1143 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1144 			    nldev_policy, extack);
1145 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1146 		return -EINVAL;
1147 
1148 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1149 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1150 	if (!device)
1151 		return -EINVAL;
1152 
1153 	if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
1154 		char name[IB_DEVICE_NAME_MAX] = {};
1155 
1156 		nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1157 			    IB_DEVICE_NAME_MAX);
1158 		if (strlen(name) == 0) {
1159 			err = -EINVAL;
1160 			goto done;
1161 		}
1162 		err = ib_device_rename(device, name);
1163 		goto done;
1164 	}
1165 
1166 	if (tb[RDMA_NLDEV_NET_NS_FD]) {
1167 		u32 ns_fd;
1168 
1169 		ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
1170 		err = ib_device_set_netns_put(skb, device, ns_fd);
1171 		goto put_done;
1172 	}
1173 
1174 	if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) {
1175 		u8 use_dim;
1176 
1177 		use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]);
1178 		err = ib_device_set_dim(device,  use_dim);
1179 		goto done;
1180 	}
1181 
1182 done:
1183 	ib_device_put(device);
1184 put_done:
1185 	return err;
1186 }
1187 
1188 static int _nldev_get_dumpit(struct ib_device *device,
1189 			     struct sk_buff *skb,
1190 			     struct netlink_callback *cb,
1191 			     unsigned int idx)
1192 {
1193 	int start = cb->args[0];
1194 	struct nlmsghdr *nlh;
1195 
1196 	if (idx < start)
1197 		return 0;
1198 
1199 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1200 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1201 			0, NLM_F_MULTI);
1202 
1203 	if (!nlh || fill_dev_info(skb, device)) {
1204 		nlmsg_cancel(skb, nlh);
1205 		goto out;
1206 	}
1207 
1208 	nlmsg_end(skb, nlh);
1209 
1210 	idx++;
1211 
1212 out:	cb->args[0] = idx;
1213 	return skb->len;
1214 }
1215 
1216 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
1217 {
1218 	/*
1219 	 * There is no need to take lock, because
1220 	 * we are relying on ib_core's locking.
1221 	 */
1222 	return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
1223 }
1224 
1225 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1226 			       struct netlink_ext_ack *extack)
1227 {
1228 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1229 	struct ib_device *device;
1230 	struct sk_buff *msg;
1231 	u32 index;
1232 	u32 port;
1233 	int err;
1234 
1235 	err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1236 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
1237 	if (err ||
1238 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
1239 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1240 		return -EINVAL;
1241 
1242 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1243 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1244 	if (!device)
1245 		return -EINVAL;
1246 
1247 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1248 	if (!rdma_is_port_valid(device, port)) {
1249 		err = -EINVAL;
1250 		goto err;
1251 	}
1252 
1253 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1254 	if (!msg) {
1255 		err = -ENOMEM;
1256 		goto err;
1257 	}
1258 
1259 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1260 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1261 			0, 0);
1262 	if (!nlh) {
1263 		err = -EMSGSIZE;
1264 		goto err_free;
1265 	}
1266 
1267 	err = fill_port_info(msg, device, port, sock_net(skb->sk));
1268 	if (err)
1269 		goto err_free;
1270 
1271 	nlmsg_end(msg, nlh);
1272 	ib_device_put(device);
1273 
1274 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1275 
1276 err_free:
1277 	nlmsg_free(msg);
1278 err:
1279 	ib_device_put(device);
1280 	return err;
1281 }
1282 
1283 static int nldev_port_get_dumpit(struct sk_buff *skb,
1284 				 struct netlink_callback *cb)
1285 {
1286 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1287 	struct ib_device *device;
1288 	int start = cb->args[0];
1289 	struct nlmsghdr *nlh;
1290 	u32 idx = 0;
1291 	u32 ifindex;
1292 	int err;
1293 	unsigned int p;
1294 
1295 	err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1296 			    nldev_policy, NL_VALIDATE_LIBERAL, NULL);
1297 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1298 		return -EINVAL;
1299 
1300 	ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1301 	device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
1302 	if (!device)
1303 		return -EINVAL;
1304 
1305 	rdma_for_each_port (device, p) {
1306 		/*
1307 		 * The dumpit function returns all information from specific
1308 		 * index. This specific index is taken from the netlink
1309 		 * messages request sent by user and it is available
1310 		 * in cb->args[0].
1311 		 *
1312 		 * Usually, the user doesn't fill this field and it causes
1313 		 * to return everything.
1314 		 *
1315 		 */
1316 		if (idx < start) {
1317 			idx++;
1318 			continue;
1319 		}
1320 
1321 		nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
1322 				cb->nlh->nlmsg_seq,
1323 				RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1324 						 RDMA_NLDEV_CMD_PORT_GET),
1325 				0, NLM_F_MULTI);
1326 
1327 		if (!nlh || fill_port_info(skb, device, p, sock_net(skb->sk))) {
1328 			nlmsg_cancel(skb, nlh);
1329 			goto out;
1330 		}
1331 		idx++;
1332 		nlmsg_end(skb, nlh);
1333 	}
1334 
1335 out:
1336 	ib_device_put(device);
1337 	cb->args[0] = idx;
1338 	return skb->len;
1339 }
1340 
1341 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1342 			      struct netlink_ext_ack *extack)
1343 {
1344 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1345 	bool show_details = false;
1346 	struct ib_device *device;
1347 	struct sk_buff *msg;
1348 	u32 index;
1349 	int ret;
1350 
1351 	ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1352 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
1353 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1354 		return -EINVAL;
1355 
1356 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1357 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1358 	if (!device)
1359 		return -EINVAL;
1360 
1361 	if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
1362 		show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
1363 
1364 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1365 	if (!msg) {
1366 		ret = -ENOMEM;
1367 		goto err;
1368 	}
1369 
1370 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1371 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1372 			0, 0);
1373 	if (!nlh) {
1374 		ret = -EMSGSIZE;
1375 		goto err_free;
1376 	}
1377 
1378 	ret = fill_res_info(msg, device, show_details);
1379 	if (ret)
1380 		goto err_free;
1381 
1382 	nlmsg_end(msg, nlh);
1383 	ib_device_put(device);
1384 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1385 
1386 err_free:
1387 	nlmsg_free(msg);
1388 err:
1389 	ib_device_put(device);
1390 	return ret;
1391 }
1392 
1393 static int _nldev_res_get_dumpit(struct ib_device *device,
1394 				 struct sk_buff *skb,
1395 				 struct netlink_callback *cb,
1396 				 unsigned int idx)
1397 {
1398 	int start = cb->args[0];
1399 	struct nlmsghdr *nlh;
1400 
1401 	if (idx < start)
1402 		return 0;
1403 
1404 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1405 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1406 			0, NLM_F_MULTI);
1407 
1408 	if (!nlh || fill_res_info(skb, device, false)) {
1409 		nlmsg_cancel(skb, nlh);
1410 		goto out;
1411 	}
1412 	nlmsg_end(skb, nlh);
1413 
1414 	idx++;
1415 
1416 out:
1417 	cb->args[0] = idx;
1418 	return skb->len;
1419 }
1420 
1421 static int nldev_res_get_dumpit(struct sk_buff *skb,
1422 				struct netlink_callback *cb)
1423 {
1424 	return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
1425 }
1426 
1427 struct nldev_fill_res_entry {
1428 	enum rdma_nldev_attr nldev_attr;
1429 	u8 flags;
1430 	u32 entry;
1431 	u32 id;
1432 };
1433 
1434 enum nldev_res_flags {
1435 	NLDEV_PER_DEV = 1 << 0,
1436 };
1437 
1438 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
1439 	[RDMA_RESTRACK_QP] = {
1440 		.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
1441 		.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
1442 		.id = RDMA_NLDEV_ATTR_RES_LQPN,
1443 	},
1444 	[RDMA_RESTRACK_CM_ID] = {
1445 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
1446 		.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
1447 		.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
1448 	},
1449 	[RDMA_RESTRACK_CQ] = {
1450 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
1451 		.flags = NLDEV_PER_DEV,
1452 		.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
1453 		.id = RDMA_NLDEV_ATTR_RES_CQN,
1454 	},
1455 	[RDMA_RESTRACK_MR] = {
1456 		.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
1457 		.flags = NLDEV_PER_DEV,
1458 		.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
1459 		.id = RDMA_NLDEV_ATTR_RES_MRN,
1460 	},
1461 	[RDMA_RESTRACK_PD] = {
1462 		.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
1463 		.flags = NLDEV_PER_DEV,
1464 		.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
1465 		.id = RDMA_NLDEV_ATTR_RES_PDN,
1466 	},
1467 	[RDMA_RESTRACK_COUNTER] = {
1468 		.nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
1469 		.entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
1470 		.id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
1471 	},
1472 	[RDMA_RESTRACK_CTX] = {
1473 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CTX,
1474 		.flags = NLDEV_PER_DEV,
1475 		.entry = RDMA_NLDEV_ATTR_RES_CTX_ENTRY,
1476 		.id = RDMA_NLDEV_ATTR_RES_CTXN,
1477 	},
1478 	[RDMA_RESTRACK_SRQ] = {
1479 		.nldev_attr = RDMA_NLDEV_ATTR_RES_SRQ,
1480 		.flags = NLDEV_PER_DEV,
1481 		.entry = RDMA_NLDEV_ATTR_RES_SRQ_ENTRY,
1482 		.id = RDMA_NLDEV_ATTR_RES_SRQN,
1483 	},
1484 
1485 };
1486 
1487 static noinline_for_stack int
1488 res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1489 		    struct netlink_ext_ack *extack,
1490 		    enum rdma_restrack_type res_type,
1491 		    res_fill_func_t fill_func)
1492 {
1493 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1494 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1495 	struct rdma_restrack_entry *res;
1496 	struct ib_device *device;
1497 	u32 index, id, port = 0;
1498 	bool has_cap_net_admin;
1499 	struct sk_buff *msg;
1500 	int ret;
1501 
1502 	ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1503 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
1504 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
1505 		return -EINVAL;
1506 
1507 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1508 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1509 	if (!device)
1510 		return -EINVAL;
1511 
1512 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1513 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1514 		if (!rdma_is_port_valid(device, port)) {
1515 			ret = -EINVAL;
1516 			goto err;
1517 		}
1518 	}
1519 
1520 	if ((port && fe->flags & NLDEV_PER_DEV) ||
1521 	    (!port && ~fe->flags & NLDEV_PER_DEV)) {
1522 		ret = -EINVAL;
1523 		goto err;
1524 	}
1525 
1526 	id = nla_get_u32(tb[fe->id]);
1527 	res = rdma_restrack_get_byid(device, res_type, id);
1528 	if (IS_ERR(res)) {
1529 		ret = PTR_ERR(res);
1530 		goto err;
1531 	}
1532 
1533 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1534 	if (!msg) {
1535 		ret = -ENOMEM;
1536 		goto err_get;
1537 	}
1538 
1539 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1540 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1541 					 RDMA_NL_GET_OP(nlh->nlmsg_type)),
1542 			0, 0);
1543 
1544 	if (!nlh || fill_nldev_handle(msg, device)) {
1545 		ret = -EMSGSIZE;
1546 		goto err_free;
1547 	}
1548 
1549 	has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
1550 
1551 	ret = fill_func(msg, has_cap_net_admin, res, port);
1552 	if (ret)
1553 		goto err_free;
1554 
1555 	rdma_restrack_put(res);
1556 	nlmsg_end(msg, nlh);
1557 	ib_device_put(device);
1558 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1559 
1560 err_free:
1561 	nlmsg_free(msg);
1562 err_get:
1563 	rdma_restrack_put(res);
1564 err:
1565 	ib_device_put(device);
1566 	return ret;
1567 }
1568 
1569 static int res_get_common_dumpit(struct sk_buff *skb,
1570 				 struct netlink_callback *cb,
1571 				 enum rdma_restrack_type res_type,
1572 				 res_fill_func_t fill_func)
1573 {
1574 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1575 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1576 	struct rdma_restrack_entry *res;
1577 	struct rdma_restrack_root *rt;
1578 	int err, ret = 0, idx = 0;
1579 	bool show_details = false;
1580 	struct nlattr *table_attr;
1581 	struct nlattr *entry_attr;
1582 	struct ib_device *device;
1583 	int start = cb->args[0];
1584 	bool has_cap_net_admin;
1585 	struct nlmsghdr *nlh;
1586 	unsigned long id;
1587 	u32 index, port = 0;
1588 	bool filled = false;
1589 
1590 	err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1591 			    nldev_policy, NL_VALIDATE_LIBERAL, NULL);
1592 	/*
1593 	 * Right now, we are expecting the device index to get res information,
1594 	 * but it is possible to extend this code to return all devices in
1595 	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
1596 	 * if it doesn't exist, we will iterate over all devices.
1597 	 *
1598 	 * But it is not needed for now.
1599 	 */
1600 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1601 		return -EINVAL;
1602 
1603 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1604 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1605 	if (!device)
1606 		return -EINVAL;
1607 
1608 	if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
1609 		show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
1610 
1611 	/*
1612 	 * If no PORT_INDEX is supplied, we will return all QPs from that device
1613 	 */
1614 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1615 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1616 		if (!rdma_is_port_valid(device, port)) {
1617 			ret = -EINVAL;
1618 			goto err_index;
1619 		}
1620 	}
1621 
1622 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1623 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1624 					 RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
1625 			0, NLM_F_MULTI);
1626 
1627 	if (!nlh || fill_nldev_handle(skb, device)) {
1628 		ret = -EMSGSIZE;
1629 		goto err;
1630 	}
1631 
1632 	table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
1633 	if (!table_attr) {
1634 		ret = -EMSGSIZE;
1635 		goto err;
1636 	}
1637 
1638 	has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
1639 
1640 	rt = &device->res[res_type];
1641 	xa_lock(&rt->xa);
1642 	/*
1643 	 * FIXME: if the skip ahead is something common this loop should
1644 	 * use xas_for_each & xas_pause to optimize, we can have a lot of
1645 	 * objects.
1646 	 */
1647 	xa_for_each(&rt->xa, id, res) {
1648 		if (xa_get_mark(&rt->xa, res->id, RESTRACK_DD) && !show_details)
1649 			goto next;
1650 
1651 		if (idx < start || !rdma_restrack_get(res))
1652 			goto next;
1653 
1654 		xa_unlock(&rt->xa);
1655 
1656 		filled = true;
1657 
1658 		entry_attr = nla_nest_start_noflag(skb, fe->entry);
1659 		if (!entry_attr) {
1660 			ret = -EMSGSIZE;
1661 			rdma_restrack_put(res);
1662 			goto msg_full;
1663 		}
1664 
1665 		ret = fill_func(skb, has_cap_net_admin, res, port);
1666 
1667 		rdma_restrack_put(res);
1668 
1669 		if (ret) {
1670 			nla_nest_cancel(skb, entry_attr);
1671 			if (ret == -EMSGSIZE)
1672 				goto msg_full;
1673 			if (ret == -EAGAIN)
1674 				goto again;
1675 			goto res_err;
1676 		}
1677 		nla_nest_end(skb, entry_attr);
1678 again:		xa_lock(&rt->xa);
1679 next:		idx++;
1680 	}
1681 	xa_unlock(&rt->xa);
1682 
1683 msg_full:
1684 	nla_nest_end(skb, table_attr);
1685 	nlmsg_end(skb, nlh);
1686 	cb->args[0] = idx;
1687 
1688 	/*
1689 	 * No more entries to fill, cancel the message and
1690 	 * return 0 to mark end of dumpit.
1691 	 */
1692 	if (!filled)
1693 		goto err;
1694 
1695 	ib_device_put(device);
1696 	return skb->len;
1697 
1698 res_err:
1699 	nla_nest_cancel(skb, table_attr);
1700 
1701 err:
1702 	nlmsg_cancel(skb, nlh);
1703 
1704 err_index:
1705 	ib_device_put(device);
1706 	return ret;
1707 }
1708 
1709 #define RES_GET_FUNCS(name, type)                                              \
1710 	static int nldev_res_get_##name##_dumpit(struct sk_buff *skb,          \
1711 						 struct netlink_callback *cb)  \
1712 	{                                                                      \
1713 		return res_get_common_dumpit(skb, cb, type,                    \
1714 					     fill_res_##name##_entry);         \
1715 	}                                                                      \
1716 	static int nldev_res_get_##name##_doit(struct sk_buff *skb,            \
1717 					       struct nlmsghdr *nlh,           \
1718 					       struct netlink_ext_ack *extack) \
1719 	{                                                                      \
1720 		return res_get_common_doit(skb, nlh, extack, type,             \
1721 					   fill_res_##name##_entry);           \
1722 	}
1723 
1724 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
1725 RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP);
1726 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
1727 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
1728 RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ);
1729 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
1730 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
1731 RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR);
1732 RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
1733 RES_GET_FUNCS(ctx, RDMA_RESTRACK_CTX);
1734 RES_GET_FUNCS(srq, RDMA_RESTRACK_SRQ);
1735 RES_GET_FUNCS(srq_raw, RDMA_RESTRACK_SRQ);
1736 
1737 static LIST_HEAD(link_ops);
1738 static DECLARE_RWSEM(link_ops_rwsem);
1739 
1740 static const struct rdma_link_ops *link_ops_get(const char *type)
1741 {
1742 	const struct rdma_link_ops *ops;
1743 
1744 	list_for_each_entry(ops, &link_ops, list) {
1745 		if (!strcmp(ops->type, type))
1746 			goto out;
1747 	}
1748 	ops = NULL;
1749 out:
1750 	return ops;
1751 }
1752 
1753 void rdma_link_register(struct rdma_link_ops *ops)
1754 {
1755 	down_write(&link_ops_rwsem);
1756 	if (WARN_ON_ONCE(link_ops_get(ops->type)))
1757 		goto out;
1758 	list_add(&ops->list, &link_ops);
1759 out:
1760 	up_write(&link_ops_rwsem);
1761 }
1762 EXPORT_SYMBOL(rdma_link_register);
1763 
1764 void rdma_link_unregister(struct rdma_link_ops *ops)
1765 {
1766 	down_write(&link_ops_rwsem);
1767 	list_del(&ops->list);
1768 	up_write(&link_ops_rwsem);
1769 }
1770 EXPORT_SYMBOL(rdma_link_unregister);
1771 
1772 static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
1773 			  struct netlink_ext_ack *extack)
1774 {
1775 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1776 	char ibdev_name[IB_DEVICE_NAME_MAX];
1777 	const struct rdma_link_ops *ops;
1778 	char ndev_name[IFNAMSIZ];
1779 	struct net_device *ndev;
1780 	char type[IFNAMSIZ];
1781 	int err;
1782 
1783 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1784 			    nldev_policy, extack);
1785 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
1786 	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
1787 		return -EINVAL;
1788 
1789 	nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1790 		    sizeof(ibdev_name));
1791 	if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0)
1792 		return -EINVAL;
1793 
1794 	nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
1795 	nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
1796 		    sizeof(ndev_name));
1797 
1798 	ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
1799 	if (!ndev)
1800 		return -ENODEV;
1801 
1802 	down_read(&link_ops_rwsem);
1803 	ops = link_ops_get(type);
1804 #ifdef CONFIG_MODULES
1805 	if (!ops) {
1806 		up_read(&link_ops_rwsem);
1807 		request_module("rdma-link-%s", type);
1808 		down_read(&link_ops_rwsem);
1809 		ops = link_ops_get(type);
1810 	}
1811 #endif
1812 	err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
1813 	up_read(&link_ops_rwsem);
1814 	dev_put(ndev);
1815 
1816 	return err;
1817 }
1818 
1819 static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
1820 			  struct netlink_ext_ack *extack)
1821 {
1822 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1823 	struct ib_device *device;
1824 	u32 index;
1825 	int err;
1826 
1827 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1828 			    nldev_policy, extack);
1829 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1830 		return -EINVAL;
1831 
1832 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1833 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1834 	if (!device)
1835 		return -EINVAL;
1836 
1837 	if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) {
1838 		ib_device_put(device);
1839 		return -EINVAL;
1840 	}
1841 
1842 	/*
1843 	 * This path is triggered by the 'rdma link delete' administrative command.
1844 	 * For Soft-RoCE (RXE), we ensure that transport sockets are closed here.
1845 	 * Note: iWARP driver does not implement .dellink, so this logic is
1846 	 * implicitly scoped to the driver supporting dynamic link deletion like RXE.
1847 	 */
1848 	if (device->link_ops && device->link_ops->dellink) {
1849 		err = device->link_ops->dellink(device);
1850 		if (err)
1851 			return err;
1852 	}
1853 
1854 	ib_unregister_device_and_put(device);
1855 	return 0;
1856 }
1857 
1858 static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
1859 			     struct netlink_ext_ack *extack)
1860 {
1861 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1862 	char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
1863 	struct ib_client_nl_info data = {};
1864 	struct ib_device *ibdev = NULL;
1865 	struct sk_buff *msg;
1866 	u32 index;
1867 	int err;
1868 
1869 	err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
1870 			    NL_VALIDATE_LIBERAL, extack);
1871 	if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
1872 		return -EINVAL;
1873 
1874 	nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
1875 		    sizeof(client_name));
1876 
1877 	if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
1878 		index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1879 		ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
1880 		if (!ibdev)
1881 			return -EINVAL;
1882 
1883 		if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1884 			data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1885 			if (!rdma_is_port_valid(ibdev, data.port)) {
1886 				err = -EINVAL;
1887 				goto out_put;
1888 			}
1889 		} else {
1890 			data.port = -1;
1891 		}
1892 	} else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1893 		return -EINVAL;
1894 	}
1895 
1896 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1897 	if (!msg) {
1898 		err = -ENOMEM;
1899 		goto out_put;
1900 	}
1901 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1902 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1903 					 RDMA_NLDEV_CMD_GET_CHARDEV),
1904 			0, 0);
1905 	if (!nlh) {
1906 		err = -EMSGSIZE;
1907 		goto out_nlmsg;
1908 	}
1909 
1910 	data.nl_msg = msg;
1911 	err = ib_get_client_nl_info(ibdev, client_name, &data);
1912 	if (err)
1913 		goto out_nlmsg;
1914 
1915 	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
1916 				huge_encode_dev(data.cdev->devt),
1917 				RDMA_NLDEV_ATTR_PAD);
1918 	if (err)
1919 		goto out_data;
1920 	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
1921 				RDMA_NLDEV_ATTR_PAD);
1922 	if (err)
1923 		goto out_data;
1924 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
1925 			   dev_name(data.cdev))) {
1926 		err = -EMSGSIZE;
1927 		goto out_data;
1928 	}
1929 
1930 	nlmsg_end(msg, nlh);
1931 	put_device(data.cdev);
1932 	if (ibdev)
1933 		ib_device_put(ibdev);
1934 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1935 
1936 out_data:
1937 	put_device(data.cdev);
1938 out_nlmsg:
1939 	nlmsg_free(msg);
1940 out_put:
1941 	if (ibdev)
1942 		ib_device_put(ibdev);
1943 	return err;
1944 }
1945 
1946 static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1947 			      struct netlink_ext_ack *extack)
1948 {
1949 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1950 	struct sk_buff *msg;
1951 	int err;
1952 
1953 	err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1954 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
1955 	if (err)
1956 		return err;
1957 
1958 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1959 	if (!msg)
1960 		return -ENOMEM;
1961 
1962 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1963 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1964 					 RDMA_NLDEV_CMD_SYS_GET),
1965 			0, 0);
1966 	if (!nlh) {
1967 		nlmsg_free(msg);
1968 		return -EMSGSIZE;
1969 	}
1970 
1971 	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
1972 			 (u8)ib_devices_shared_netns);
1973 	if (err) {
1974 		nlmsg_free(msg);
1975 		return err;
1976 	}
1977 
1978 	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE,
1979 			 (u8)privileged_qkey);
1980 	if (err) {
1981 		nlmsg_free(msg);
1982 		return err;
1983 	}
1984 
1985 	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, 1);
1986 	if (err) {
1987 		nlmsg_free(msg);
1988 		return err;
1989 	}
1990 	/*
1991 	 * Copy-on-fork is supported.
1992 	 * See commits:
1993 	 * 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes")
1994 	 * 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm")
1995 	 * for more details. Don't backport this without them.
1996 	 *
1997 	 * Return value ignored on purpose, assume copy-on-fork is not
1998 	 * supported in case of failure.
1999 	 */
2000 	nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, 1);
2001 
2002 	nlmsg_end(msg, nlh);
2003 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2004 }
2005 
2006 static int nldev_set_sys_set_netns_doit(struct nlattr *tb[])
2007 {
2008 	u8 enable;
2009 	int err;
2010 
2011 	enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
2012 	/* Only 0 and 1 are supported */
2013 	if (enable > 1)
2014 		return -EINVAL;
2015 
2016 	err = rdma_compatdev_set(enable);
2017 	return err;
2018 }
2019 
2020 static int nldev_set_sys_set_pqkey_doit(struct nlattr *tb[])
2021 {
2022 	u8 enable;
2023 
2024 	enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE]);
2025 	/* Only 0 and 1 are supported */
2026 	if (enable > 1)
2027 		return -EINVAL;
2028 
2029 	privileged_qkey = enable;
2030 	return 0;
2031 }
2032 
2033 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2034 				  struct netlink_ext_ack *extack)
2035 {
2036 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2037 	int err;
2038 
2039 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2040 			  nldev_policy, extack);
2041 	if (err)
2042 		return -EINVAL;
2043 
2044 	if (tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
2045 		return nldev_set_sys_set_netns_doit(tb);
2046 
2047 	if (tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE])
2048 		return nldev_set_sys_set_pqkey_doit(tb);
2049 
2050 	return -EINVAL;
2051 }
2052 
2053 
2054 static int nldev_stat_set_mode_doit(struct sk_buff *msg,
2055 				    struct netlink_ext_ack *extack,
2056 				    struct nlattr *tb[],
2057 				    struct ib_device *device, u32 port)
2058 {
2059 	u32 mode, mask = 0, qpn, cntn = 0;
2060 	bool opcnt = false;
2061 	int ret;
2062 
2063 	/* Currently only counter for QP is supported */
2064 	if (!tb[RDMA_NLDEV_ATTR_STAT_RES] ||
2065 	    nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
2066 		return -EINVAL;
2067 
2068 	if (tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED])
2069 		opcnt = !!nla_get_u8(
2070 			tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED]);
2071 
2072 	mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
2073 	if (mode == RDMA_COUNTER_MODE_AUTO) {
2074 		if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
2075 			mask = nla_get_u32(
2076 				tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
2077 		return rdma_counter_set_auto_mode(device, port, mask, opcnt,
2078 						  extack);
2079 	}
2080 
2081 	if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
2082 		return -EINVAL;
2083 
2084 	qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
2085 	if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
2086 		cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
2087 		ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
2088 		if (ret)
2089 			return ret;
2090 	} else {
2091 		ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn);
2092 		if (ret)
2093 			return ret;
2094 	}
2095 
2096 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
2097 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
2098 		ret = -EMSGSIZE;
2099 		goto err_fill;
2100 	}
2101 
2102 	return 0;
2103 
2104 err_fill:
2105 	rdma_counter_unbind_qpn(device, port, qpn, cntn);
2106 	return ret;
2107 }
2108 
2109 static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[],
2110 					       struct ib_device *device,
2111 					       u32 port)
2112 {
2113 	struct rdma_hw_stats *stats;
2114 	struct nlattr *entry_attr;
2115 	unsigned long *target;
2116 	int rem, i, ret = 0;
2117 	u32 index;
2118 
2119 	stats = ib_get_hw_stats_port(device, port);
2120 	if (!stats)
2121 		return -EINVAL;
2122 
2123 	target = kcalloc(BITS_TO_LONGS(stats->num_counters),
2124 			 sizeof(*stats->is_disabled), GFP_KERNEL);
2125 	if (!target)
2126 		return -ENOMEM;
2127 
2128 	nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS],
2129 			    rem) {
2130 		index = nla_get_u32(entry_attr);
2131 		if ((index >= stats->num_counters) ||
2132 		    !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) {
2133 			ret = -EINVAL;
2134 			goto out;
2135 		}
2136 
2137 		set_bit(index, target);
2138 	}
2139 
2140 	for (i = 0; i < stats->num_counters; i++) {
2141 		if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL))
2142 			continue;
2143 
2144 		ret = rdma_counter_modify(device, port, i, test_bit(i, target));
2145 		if (ret)
2146 			goto out;
2147 	}
2148 
2149 out:
2150 	kfree(target);
2151 	return ret;
2152 }
2153 
2154 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2155 			       struct netlink_ext_ack *extack)
2156 {
2157 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2158 	struct ib_device *device;
2159 	struct sk_buff *msg;
2160 	u32 index, port;
2161 	int ret;
2162 
2163 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
2164 			  extack);
2165 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2166 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2167 		return -EINVAL;
2168 
2169 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2170 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2171 	if (!device)
2172 		return -EINVAL;
2173 
2174 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2175 	if (!rdma_is_port_valid(device, port)) {
2176 		ret = -EINVAL;
2177 		goto err_put_device;
2178 	}
2179 
2180 	if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] &&
2181 	    !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
2182 		ret = -EINVAL;
2183 		goto err_put_device;
2184 	}
2185 
2186 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2187 	if (!msg) {
2188 		ret = -ENOMEM;
2189 		goto err_put_device;
2190 	}
2191 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2192 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2193 					 RDMA_NLDEV_CMD_STAT_SET),
2194 			0, 0);
2195 	if (!nlh || fill_nldev_handle(msg, device) ||
2196 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
2197 		ret = -EMSGSIZE;
2198 		goto err_free_msg;
2199 	}
2200 
2201 	if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) {
2202 		ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port);
2203 		if (ret)
2204 			goto err_free_msg;
2205 	}
2206 
2207 	if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
2208 		ret = nldev_stat_set_counter_dynamic_doit(tb, device, port);
2209 		if (ret)
2210 			goto err_free_msg;
2211 	}
2212 
2213 	nlmsg_end(msg, nlh);
2214 	ib_device_put(device);
2215 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2216 
2217 err_free_msg:
2218 	nlmsg_free(msg);
2219 err_put_device:
2220 	ib_device_put(device);
2221 	return ret;
2222 }
2223 
2224 static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2225 			       struct netlink_ext_ack *extack)
2226 {
2227 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2228 	struct ib_device *device;
2229 	struct sk_buff *msg;
2230 	u32 index, port, qpn, cntn;
2231 	int ret;
2232 
2233 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2234 			  nldev_policy, extack);
2235 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
2236 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
2237 	    !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
2238 	    !tb[RDMA_NLDEV_ATTR_RES_LQPN])
2239 		return -EINVAL;
2240 
2241 	if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
2242 		return -EINVAL;
2243 
2244 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2245 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2246 	if (!device)
2247 		return -EINVAL;
2248 
2249 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2250 	if (!rdma_is_port_valid(device, port)) {
2251 		ret = -EINVAL;
2252 		goto err;
2253 	}
2254 
2255 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2256 	if (!msg) {
2257 		ret = -ENOMEM;
2258 		goto err;
2259 	}
2260 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2261 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2262 					 RDMA_NLDEV_CMD_STAT_SET),
2263 			0, 0);
2264 	if (!nlh) {
2265 		ret = -EMSGSIZE;
2266 		goto err_fill;
2267 	}
2268 
2269 	cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
2270 	qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
2271 	if (fill_nldev_handle(msg, device) ||
2272 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
2273 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
2274 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
2275 		ret = -EMSGSIZE;
2276 		goto err_fill;
2277 	}
2278 
2279 	ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
2280 	if (ret)
2281 		goto err_fill;
2282 
2283 	nlmsg_end(msg, nlh);
2284 	ib_device_put(device);
2285 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2286 
2287 err_fill:
2288 	nlmsg_free(msg);
2289 err:
2290 	ib_device_put(device);
2291 	return ret;
2292 }
2293 
2294 static noinline_for_stack int
2295 stat_get_doit_default_counter(struct sk_buff *skb, struct nlmsghdr *nlh,
2296 			      struct netlink_ext_ack *extack,
2297 			      struct nlattr *tb[])
2298 {
2299 	struct rdma_hw_stats *stats;
2300 	struct nlattr *table_attr;
2301 	struct ib_device *device;
2302 	int ret, num_cnts, i;
2303 	struct sk_buff *msg;
2304 	u32 index, port;
2305 	u64 v;
2306 
2307 	if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2308 		return -EINVAL;
2309 
2310 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2311 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2312 	if (!device)
2313 		return -EINVAL;
2314 
2315 	if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) {
2316 		ret = -EINVAL;
2317 		goto err;
2318 	}
2319 
2320 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2321 	stats = ib_get_hw_stats_port(device, port);
2322 	if (!stats) {
2323 		ret = -EINVAL;
2324 		goto err;
2325 	}
2326 
2327 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2328 	if (!msg) {
2329 		ret = -ENOMEM;
2330 		goto err;
2331 	}
2332 
2333 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2334 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2335 					 RDMA_NLDEV_CMD_STAT_GET),
2336 			0, 0);
2337 
2338 	if (!nlh || fill_nldev_handle(msg, device) ||
2339 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
2340 		ret = -EMSGSIZE;
2341 		goto err_msg;
2342 	}
2343 
2344 	mutex_lock(&stats->lock);
2345 
2346 	num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
2347 	if (num_cnts < 0) {
2348 		ret = -EINVAL;
2349 		goto err_stats;
2350 	}
2351 
2352 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
2353 	if (!table_attr) {
2354 		ret = -EMSGSIZE;
2355 		goto err_stats;
2356 	}
2357 	for (i = 0; i < num_cnts; i++) {
2358 		if (test_bit(i, stats->is_disabled))
2359 			continue;
2360 
2361 		v = stats->value[i] +
2362 			rdma_counter_get_hwstat_value(device, port, i);
2363 		if (rdma_nl_stat_hwcounter_entry(msg,
2364 						 stats->descs[i].name, v)) {
2365 			ret = -EMSGSIZE;
2366 			goto err_table;
2367 		}
2368 	}
2369 	nla_nest_end(msg, table_attr);
2370 
2371 	mutex_unlock(&stats->lock);
2372 	nlmsg_end(msg, nlh);
2373 	ib_device_put(device);
2374 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2375 
2376 err_table:
2377 	nla_nest_cancel(msg, table_attr);
2378 err_stats:
2379 	mutex_unlock(&stats->lock);
2380 err_msg:
2381 	nlmsg_free(msg);
2382 err:
2383 	ib_device_put(device);
2384 	return ret;
2385 }
2386 
2387 static noinline_for_stack int
2388 stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
2389 		 struct netlink_ext_ack *extack, struct nlattr *tb[])
2390 
2391 {
2392 	static enum rdma_nl_counter_mode mode;
2393 	static enum rdma_nl_counter_mask mask;
2394 	struct ib_device *device;
2395 	struct sk_buff *msg;
2396 	u32 index, port;
2397 	bool opcnt;
2398 	int ret;
2399 
2400 	if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
2401 		return nldev_res_get_counter_doit(skb, nlh, extack);
2402 
2403 	if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
2404 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2405 		return -EINVAL;
2406 
2407 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2408 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2409 	if (!device)
2410 		return -EINVAL;
2411 
2412 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2413 	if (!rdma_is_port_valid(device, port)) {
2414 		ret = -EINVAL;
2415 		goto err;
2416 	}
2417 
2418 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2419 	if (!msg) {
2420 		ret = -ENOMEM;
2421 		goto err;
2422 	}
2423 
2424 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2425 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2426 					 RDMA_NLDEV_CMD_STAT_GET),
2427 			0, 0);
2428 	if (!nlh) {
2429 		ret = -EMSGSIZE;
2430 		goto err_msg;
2431 	}
2432 
2433 	ret = rdma_counter_get_mode(device, port, &mode, &mask, &opcnt);
2434 	if (ret)
2435 		goto err_msg;
2436 
2437 	if (fill_nldev_handle(msg, device) ||
2438 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
2439 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) {
2440 		ret = -EMSGSIZE;
2441 		goto err_msg;
2442 	}
2443 
2444 	if ((mode == RDMA_COUNTER_MODE_AUTO) &&
2445 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) {
2446 		ret = -EMSGSIZE;
2447 		goto err_msg;
2448 	}
2449 
2450 	if ((mode == RDMA_COUNTER_MODE_AUTO) &&
2451 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED, opcnt)) {
2452 		ret = -EMSGSIZE;
2453 		goto err_msg;
2454 	}
2455 
2456 	nlmsg_end(msg, nlh);
2457 	ib_device_put(device);
2458 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2459 
2460 err_msg:
2461 	nlmsg_free(msg);
2462 err:
2463 	ib_device_put(device);
2464 	return ret;
2465 }
2466 
2467 static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2468 			       struct netlink_ext_ack *extack)
2469 {
2470 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2471 	int ret;
2472 
2473 	ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2474 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
2475 	if (ret)
2476 		return -EINVAL;
2477 
2478 	if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
2479 		return stat_get_doit_default_counter(skb, nlh, extack, tb);
2480 
2481 	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2482 	case RDMA_NLDEV_ATTR_RES_QP:
2483 		ret = stat_get_doit_qp(skb, nlh, extack, tb);
2484 		break;
2485 	case RDMA_NLDEV_ATTR_RES_MR:
2486 		ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR,
2487 					  fill_stat_mr_entry);
2488 		break;
2489 	default:
2490 		ret = -EINVAL;
2491 		break;
2492 	}
2493 
2494 	return ret;
2495 }
2496 
2497 static int nldev_stat_get_dumpit(struct sk_buff *skb,
2498 				 struct netlink_callback *cb)
2499 {
2500 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2501 	int ret;
2502 
2503 	ret = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2504 			    nldev_policy, NL_VALIDATE_LIBERAL, NULL);
2505 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
2506 		return -EINVAL;
2507 
2508 	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2509 	case RDMA_NLDEV_ATTR_RES_QP:
2510 		ret = nldev_res_get_counter_dumpit(skb, cb);
2511 		break;
2512 	case RDMA_NLDEV_ATTR_RES_MR:
2513 		ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR,
2514 					    fill_stat_mr_entry);
2515 		break;
2516 	default:
2517 		ret = -EINVAL;
2518 		break;
2519 	}
2520 
2521 	return ret;
2522 }
2523 
2524 static int nldev_stat_get_counter_status_doit(struct sk_buff *skb,
2525 					      struct nlmsghdr *nlh,
2526 					      struct netlink_ext_ack *extack)
2527 {
2528 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry;
2529 	struct rdma_hw_stats *stats;
2530 	struct ib_device *device;
2531 	struct sk_buff *msg;
2532 	u32 devid, port;
2533 	int ret, i;
2534 
2535 	ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2536 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
2537 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2538 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2539 		return -EINVAL;
2540 
2541 	devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2542 	device = ib_device_get_by_index(sock_net(skb->sk), devid);
2543 	if (!device)
2544 		return -EINVAL;
2545 
2546 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2547 	if (!rdma_is_port_valid(device, port)) {
2548 		ret = -EINVAL;
2549 		goto err;
2550 	}
2551 
2552 	stats = ib_get_hw_stats_port(device, port);
2553 	if (!stats) {
2554 		ret = -EINVAL;
2555 		goto err;
2556 	}
2557 
2558 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2559 	if (!msg) {
2560 		ret = -ENOMEM;
2561 		goto err;
2562 	}
2563 
2564 	nlh = nlmsg_put(
2565 		msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2566 		RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS),
2567 		0, 0);
2568 
2569 	ret = -EMSGSIZE;
2570 	if (!nlh || fill_nldev_handle(msg, device) ||
2571 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
2572 		goto err_msg;
2573 
2574 	table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
2575 	if (!table)
2576 		goto err_msg;
2577 
2578 	mutex_lock(&stats->lock);
2579 	for (i = 0; i < stats->num_counters; i++) {
2580 		entry = nla_nest_start(msg,
2581 				       RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
2582 		if (!entry)
2583 			goto err_msg_table;
2584 
2585 		if (nla_put_string(msg,
2586 				   RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
2587 				   stats->descs[i].name) ||
2588 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i))
2589 			goto err_msg_entry;
2590 
2591 		if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) &&
2592 		    (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC,
2593 				!test_bit(i, stats->is_disabled))))
2594 			goto err_msg_entry;
2595 
2596 		nla_nest_end(msg, entry);
2597 	}
2598 	mutex_unlock(&stats->lock);
2599 
2600 	nla_nest_end(msg, table);
2601 	nlmsg_end(msg, nlh);
2602 	ib_device_put(device);
2603 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2604 
2605 err_msg_entry:
2606 	nla_nest_cancel(msg, entry);
2607 err_msg_table:
2608 	mutex_unlock(&stats->lock);
2609 	nla_nest_cancel(msg, table);
2610 err_msg:
2611 	nlmsg_free(msg);
2612 err:
2613 	ib_device_put(device);
2614 	return ret;
2615 }
2616 
2617 static int nldev_newdev(struct sk_buff *skb, struct nlmsghdr *nlh,
2618 			struct netlink_ext_ack *extack)
2619 {
2620 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2621 	enum rdma_nl_dev_type type;
2622 	struct ib_device *parent;
2623 	char name[IFNAMSIZ] = {};
2624 	u32 parentid;
2625 	int ret;
2626 
2627 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2628 			  nldev_policy, extack);
2629 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2630 		!tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_DEV_TYPE])
2631 		return -EINVAL;
2632 
2633 	nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(name));
2634 	type = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_TYPE]);
2635 	parentid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2636 	parent = ib_device_get_by_index(sock_net(skb->sk), parentid);
2637 	if (!parent)
2638 		return -EINVAL;
2639 
2640 	ret = ib_add_sub_device(parent, type, name);
2641 	ib_device_put(parent);
2642 
2643 	return ret;
2644 }
2645 
2646 static int nldev_deldev(struct sk_buff *skb, struct nlmsghdr *nlh,
2647 			struct netlink_ext_ack *extack)
2648 {
2649 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2650 	struct ib_device *device;
2651 	u32 devid;
2652 	int ret;
2653 
2654 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2655 			  nldev_policy, extack);
2656 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
2657 		return -EINVAL;
2658 
2659 	devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2660 	device = ib_device_get_by_index(sock_net(skb->sk), devid);
2661 	if (!device)
2662 		return -EINVAL;
2663 
2664 	return ib_del_sub_device_and_put(device);
2665 }
2666 
2667 static int fill_frmr_pool_key(struct sk_buff *msg, struct ib_frmr_key *key)
2668 {
2669 	struct nlattr *key_attr;
2670 
2671 	key_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY);
2672 	if (!key_attr)
2673 		return -EMSGSIZE;
2674 
2675 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS, key->ats))
2676 		goto err;
2677 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS,
2678 			key->access_flags))
2679 		goto err;
2680 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY,
2681 			      key->vendor_key, RDMA_NLDEV_ATTR_PAD))
2682 		goto err;
2683 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS,
2684 			      key->num_dma_blocks, RDMA_NLDEV_ATTR_PAD))
2685 		goto err;
2686 
2687 	if (key->kernel_vendor_key &&
2688 	    nla_put_u64_64bit(msg,
2689 			      RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY,
2690 			      key->kernel_vendor_key, RDMA_NLDEV_ATTR_PAD))
2691 		goto err;
2692 
2693 	nla_nest_end(msg, key_attr);
2694 	return 0;
2695 
2696 err:
2697 	return -EMSGSIZE;
2698 }
2699 
2700 static int fill_frmr_pool_entry(struct sk_buff *msg, struct ib_frmr_pool *pool)
2701 {
2702 	if (fill_frmr_pool_key(msg, &pool->key))
2703 		return -EMSGSIZE;
2704 
2705 	spin_lock(&pool->lock);
2706 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_QUEUE_HANDLES,
2707 			pool->queue.ci + pool->inactive_queue.ci))
2708 		goto err_unlock;
2709 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_MAX_IN_USE,
2710 			      pool->max_in_use, RDMA_NLDEV_ATTR_PAD))
2711 		goto err_unlock;
2712 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_IN_USE,
2713 			      pool->in_use, RDMA_NLDEV_ATTR_PAD))
2714 		goto err_unlock;
2715 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES,
2716 			pool->pinned_handles))
2717 		goto err_unlock;
2718 	spin_unlock(&pool->lock);
2719 
2720 	return 0;
2721 
2722 err_unlock:
2723 	spin_unlock(&pool->lock);
2724 	return -EMSGSIZE;
2725 }
2726 
2727 static int nldev_frmr_pools_parse_key(struct nlattr *tb[],
2728 				      struct ib_frmr_key *key,
2729 				      struct netlink_ext_ack *extack)
2730 {
2731 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS])
2732 		key->ats = nla_get_u8(tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS]);
2733 
2734 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS])
2735 		key->access_flags = nla_get_u32(
2736 			tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS]);
2737 
2738 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY])
2739 		key->vendor_key = nla_get_u64(
2740 			tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY]);
2741 
2742 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS])
2743 		key->num_dma_blocks = nla_get_u64(
2744 			tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS]);
2745 
2746 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY])
2747 		return -EINVAL;
2748 
2749 	return 0;
2750 }
2751 
2752 static int nldev_frmr_pools_set_pinned(struct ib_device *device,
2753 				       struct nlattr *tb[],
2754 				       struct netlink_ext_ack *extack)
2755 {
2756 	struct nlattr *key_tb[RDMA_NLDEV_ATTR_MAX];
2757 	struct ib_frmr_key key = { 0 };
2758 	u32 pinned_handles = 0;
2759 	int err = 0;
2760 
2761 	pinned_handles =
2762 		nla_get_u32(tb[RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES]);
2763 
2764 	if (!tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY])
2765 		return -EINVAL;
2766 
2767 	err = nla_parse_nested(key_tb, RDMA_NLDEV_ATTR_MAX - 1,
2768 			       tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY], nldev_policy,
2769 			       extack);
2770 	if (err)
2771 		return err;
2772 
2773 	err = nldev_frmr_pools_parse_key(key_tb, &key, extack);
2774 	if (err)
2775 		return err;
2776 
2777 	err = ib_frmr_pools_set_pinned(device, &key, pinned_handles);
2778 
2779 	return err;
2780 }
2781 
2782 static int nldev_frmr_pools_get_dumpit(struct sk_buff *skb,
2783 				       struct netlink_callback *cb)
2784 {
2785 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2786 	struct ib_frmr_pools *pools;
2787 	int err, ret = 0, idx = 0;
2788 	struct ib_frmr_pool *pool;
2789 	struct nlattr *table_attr;
2790 	struct nlattr *entry_attr;
2791 	bool show_details = false;
2792 	struct ib_device *device;
2793 	int start = cb->args[0];
2794 	struct rb_node *node;
2795 	struct nlmsghdr *nlh;
2796 	bool filled = false;
2797 
2798 	err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2799 			    nldev_policy, NL_VALIDATE_LIBERAL, NULL);
2800 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
2801 		return -EINVAL;
2802 
2803 	device = ib_device_get_by_index(
2804 		sock_net(skb->sk), nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]));
2805 	if (!device)
2806 		return -EINVAL;
2807 
2808 	if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
2809 		show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
2810 
2811 	pools = device->frmr_pools;
2812 	if (!pools) {
2813 		ib_device_put(device);
2814 		return 0;
2815 	}
2816 
2817 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2818 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2819 					 RDMA_NLDEV_CMD_FRMR_POOLS_GET),
2820 			0, NLM_F_MULTI);
2821 
2822 	if (!nlh || fill_nldev_handle(skb, device)) {
2823 		ret = -EMSGSIZE;
2824 		goto err;
2825 	}
2826 
2827 	table_attr = nla_nest_start_noflag(skb, RDMA_NLDEV_ATTR_FRMR_POOLS);
2828 	if (!table_attr) {
2829 		ret = -EMSGSIZE;
2830 		goto err;
2831 	}
2832 
2833 	read_lock(&pools->rb_lock);
2834 	for (node = rb_first(&pools->rb_root); node; node = rb_next(node)) {
2835 		pool = rb_entry(node, struct ib_frmr_pool, node);
2836 		if (pool->key.kernel_vendor_key && !show_details)
2837 			continue;
2838 
2839 		if (idx < start) {
2840 			idx++;
2841 			continue;
2842 		}
2843 
2844 		filled = true;
2845 
2846 		entry_attr = nla_nest_start_noflag(
2847 			skb, RDMA_NLDEV_ATTR_FRMR_POOL_ENTRY);
2848 		if (!entry_attr) {
2849 			ret = -EMSGSIZE;
2850 			goto end_msg;
2851 		}
2852 
2853 		if (fill_frmr_pool_entry(skb, pool)) {
2854 			nla_nest_cancel(skb, entry_attr);
2855 			ret = -EMSGSIZE;
2856 			goto end_msg;
2857 		}
2858 
2859 		nla_nest_end(skb, entry_attr);
2860 		idx++;
2861 	}
2862 end_msg:
2863 	read_unlock(&pools->rb_lock);
2864 
2865 	nla_nest_end(skb, table_attr);
2866 	nlmsg_end(skb, nlh);
2867 	cb->args[0] = idx;
2868 
2869 	/*
2870 	 * No more entries to fill, cancel the message and
2871 	 * return 0 to mark end of dumpit.
2872 	 */
2873 	if (!filled)
2874 		goto err;
2875 
2876 	ib_device_put(device);
2877 	return skb->len;
2878 
2879 err:
2880 	nlmsg_cancel(skb, nlh);
2881 	ib_device_put(device);
2882 	return ret;
2883 }
2884 
2885 static int nldev_frmr_pools_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2886 				     struct netlink_ext_ack *extack)
2887 {
2888 	struct ib_device *device;
2889 	struct nlattr **tb;
2890 	u32 aging_period;
2891 	int err;
2892 
2893 	tb = kzalloc_objs(*tb, RDMA_NLDEV_ATTR_MAX, GFP_KERNEL);
2894 	if (!tb)
2895 		return -ENOMEM;
2896 
2897 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
2898 			  extack);
2899 	if (err)
2900 		goto free_tb;
2901 
2902 	if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
2903 		err = -EINVAL;
2904 		goto free_tb;
2905 	}
2906 
2907 	device = ib_device_get_by_index(
2908 		sock_net(skb->sk), nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]));
2909 	if (!device) {
2910 		err = -EINVAL;
2911 		goto free_tb;
2912 	}
2913 
2914 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD]) {
2915 		aging_period = nla_get_u32(
2916 			tb[RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD]);
2917 		err = ib_frmr_pools_set_aging_period(device, aging_period);
2918 		goto done;
2919 	}
2920 
2921 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES])
2922 		err = nldev_frmr_pools_set_pinned(device, tb, extack);
2923 
2924 done:
2925 	ib_device_put(device);
2926 free_tb:
2927 	kfree(tb);
2928 	return err;
2929 }
2930 
2931 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
2932 	[RDMA_NLDEV_CMD_GET] = {
2933 		.doit = nldev_get_doit,
2934 		.dump = nldev_get_dumpit,
2935 	},
2936 	[RDMA_NLDEV_CMD_GET_CHARDEV] = {
2937 		.doit = nldev_get_chardev,
2938 	},
2939 	[RDMA_NLDEV_CMD_SET] = {
2940 		.doit = nldev_set_doit,
2941 		.flags = RDMA_NL_ADMIN_PERM,
2942 	},
2943 	[RDMA_NLDEV_CMD_NEWLINK] = {
2944 		.doit = nldev_newlink,
2945 		.flags = RDMA_NL_ADMIN_PERM,
2946 	},
2947 	[RDMA_NLDEV_CMD_DELLINK] = {
2948 		.doit = nldev_dellink,
2949 		.flags = RDMA_NL_ADMIN_PERM,
2950 	},
2951 	[RDMA_NLDEV_CMD_PORT_GET] = {
2952 		.doit = nldev_port_get_doit,
2953 		.dump = nldev_port_get_dumpit,
2954 	},
2955 	[RDMA_NLDEV_CMD_RES_GET] = {
2956 		.doit = nldev_res_get_doit,
2957 		.dump = nldev_res_get_dumpit,
2958 	},
2959 	[RDMA_NLDEV_CMD_RES_QP_GET] = {
2960 		.doit = nldev_res_get_qp_doit,
2961 		.dump = nldev_res_get_qp_dumpit,
2962 	},
2963 	[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
2964 		.doit = nldev_res_get_cm_id_doit,
2965 		.dump = nldev_res_get_cm_id_dumpit,
2966 	},
2967 	[RDMA_NLDEV_CMD_RES_CQ_GET] = {
2968 		.doit = nldev_res_get_cq_doit,
2969 		.dump = nldev_res_get_cq_dumpit,
2970 	},
2971 	[RDMA_NLDEV_CMD_RES_MR_GET] = {
2972 		.doit = nldev_res_get_mr_doit,
2973 		.dump = nldev_res_get_mr_dumpit,
2974 	},
2975 	[RDMA_NLDEV_CMD_RES_PD_GET] = {
2976 		.doit = nldev_res_get_pd_doit,
2977 		.dump = nldev_res_get_pd_dumpit,
2978 	},
2979 	[RDMA_NLDEV_CMD_RES_CTX_GET] = {
2980 		.doit = nldev_res_get_ctx_doit,
2981 		.dump = nldev_res_get_ctx_dumpit,
2982 	},
2983 	[RDMA_NLDEV_CMD_RES_SRQ_GET] = {
2984 		.doit = nldev_res_get_srq_doit,
2985 		.dump = nldev_res_get_srq_dumpit,
2986 	},
2987 	[RDMA_NLDEV_CMD_SYS_GET] = {
2988 		.doit = nldev_sys_get_doit,
2989 	},
2990 	[RDMA_NLDEV_CMD_SYS_SET] = {
2991 		.doit = nldev_set_sys_set_doit,
2992 		.flags = RDMA_NL_ADMIN_PERM,
2993 	},
2994 	[RDMA_NLDEV_CMD_STAT_SET] = {
2995 		.doit = nldev_stat_set_doit,
2996 		.flags = RDMA_NL_ADMIN_PERM,
2997 	},
2998 	[RDMA_NLDEV_CMD_STAT_GET] = {
2999 		.doit = nldev_stat_get_doit,
3000 		.dump = nldev_stat_get_dumpit,
3001 	},
3002 	[RDMA_NLDEV_CMD_STAT_DEL] = {
3003 		.doit = nldev_stat_del_doit,
3004 		.flags = RDMA_NL_ADMIN_PERM,
3005 	},
3006 	[RDMA_NLDEV_CMD_RES_QP_GET_RAW] = {
3007 		.doit = nldev_res_get_qp_raw_doit,
3008 		.dump = nldev_res_get_qp_raw_dumpit,
3009 		.flags = RDMA_NL_ADMIN_PERM,
3010 	},
3011 	[RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = {
3012 		.doit = nldev_res_get_cq_raw_doit,
3013 		.dump = nldev_res_get_cq_raw_dumpit,
3014 		.flags = RDMA_NL_ADMIN_PERM,
3015 	},
3016 	[RDMA_NLDEV_CMD_RES_MR_GET_RAW] = {
3017 		.doit = nldev_res_get_mr_raw_doit,
3018 		.dump = nldev_res_get_mr_raw_dumpit,
3019 		.flags = RDMA_NL_ADMIN_PERM,
3020 	},
3021 	[RDMA_NLDEV_CMD_RES_SRQ_GET_RAW] = {
3022 		.doit = nldev_res_get_srq_raw_doit,
3023 		.dump = nldev_res_get_srq_raw_dumpit,
3024 		.flags = RDMA_NL_ADMIN_PERM,
3025 	},
3026 	[RDMA_NLDEV_CMD_STAT_GET_STATUS] = {
3027 		.doit = nldev_stat_get_counter_status_doit,
3028 	},
3029 	[RDMA_NLDEV_CMD_NEWDEV] = {
3030 		.doit = nldev_newdev,
3031 		.flags = RDMA_NL_ADMIN_PERM,
3032 	},
3033 	[RDMA_NLDEV_CMD_DELDEV] = {
3034 		.doit = nldev_deldev,
3035 		.flags = RDMA_NL_ADMIN_PERM,
3036 	},
3037 	[RDMA_NLDEV_CMD_FRMR_POOLS_GET] = {
3038 		.dump = nldev_frmr_pools_get_dumpit,
3039 	},
3040 	[RDMA_NLDEV_CMD_FRMR_POOLS_SET] = {
3041 		.doit = nldev_frmr_pools_set_doit,
3042 		.flags = RDMA_NL_ADMIN_PERM,
3043 	},
3044 };
3045 
3046 static int fill_mon_netdev_rename(struct sk_buff *msg,
3047 				  struct ib_device *device, u32 port,
3048 				  const struct net *net)
3049 {
3050 	struct net_device *netdev = ib_device_get_netdev(device, port);
3051 	int ret = 0;
3052 
3053 	if (!netdev || !net_eq(dev_net(netdev), net))
3054 		goto out;
3055 
3056 	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
3057 	if (ret)
3058 		goto out;
3059 	ret = nla_put_string(msg, RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
3060 out:
3061 	dev_put(netdev);
3062 	return ret;
3063 }
3064 
3065 static int fill_mon_netdev_association(struct sk_buff *msg,
3066 				       struct ib_device *device, u32 port,
3067 				       const struct net *net)
3068 {
3069 	struct net_device *netdev = ib_device_get_netdev(device, port);
3070 	int ret = 0;
3071 
3072 	if (netdev && !net_eq(dev_net(netdev), net))
3073 		goto out;
3074 
3075 	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index);
3076 	if (ret)
3077 		goto out;
3078 
3079 	ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
3080 			     dev_name(&device->dev));
3081 	if (ret)
3082 		goto out;
3083 
3084 	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port);
3085 	if (ret)
3086 		goto out;
3087 
3088 	if (netdev) {
3089 		ret = nla_put_u32(msg,
3090 				  RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
3091 		if (ret)
3092 			goto out;
3093 
3094 		ret = nla_put_string(msg,
3095 				     RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
3096 	}
3097 
3098 out:
3099 	dev_put(netdev);
3100 	return ret;
3101 }
3102 
3103 static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num,
3104 				    enum rdma_nl_notify_event_type type)
3105 {
3106 	struct net_device *netdev;
3107 
3108 	switch (type) {
3109 	case RDMA_REGISTER_EVENT:
3110 		dev_warn_ratelimited(&device->dev,
3111 				     "Failed to send RDMA monitor register device event\n");
3112 		break;
3113 	case RDMA_UNREGISTER_EVENT:
3114 		dev_warn_ratelimited(&device->dev,
3115 				     "Failed to send RDMA monitor unregister device event\n");
3116 		break;
3117 	case RDMA_NETDEV_ATTACH_EVENT:
3118 		netdev = ib_device_get_netdev(device, port_num);
3119 		dev_warn_ratelimited(&device->dev,
3120 				     "Failed to send RDMA monitor netdev attach event: port %d netdev %d\n",
3121 				     port_num, netdev->ifindex);
3122 		dev_put(netdev);
3123 		break;
3124 	case RDMA_NETDEV_DETACH_EVENT:
3125 		dev_warn_ratelimited(&device->dev,
3126 				     "Failed to send RDMA monitor netdev detach event: port %d\n",
3127 				     port_num);
3128 		break;
3129 	case RDMA_RENAME_EVENT:
3130 		dev_warn_ratelimited(&device->dev,
3131 				     "Failed to send RDMA monitor rename device event\n");
3132 		break;
3133 
3134 	case RDMA_NETDEV_RENAME_EVENT:
3135 		netdev = ib_device_get_netdev(device, port_num);
3136 		dev_warn_ratelimited(&device->dev,
3137 				     "Failed to send RDMA monitor netdev rename event: port %d netdev %d\n",
3138 				     port_num, netdev->ifindex);
3139 		dev_put(netdev);
3140 		break;
3141 	default:
3142 		break;
3143 	}
3144 }
3145 
3146 int rdma_nl_notify_event(struct ib_device *device, u32 port_num,
3147 			  enum rdma_nl_notify_event_type type)
3148 {
3149 	struct sk_buff *skb;
3150 	int ret = -EMSGSIZE;
3151 	struct net *net;
3152 	void *nlh;
3153 
3154 	net = read_pnet(&device->coredev.rdma_net);
3155 	if (!net)
3156 		return -EINVAL;
3157 
3158 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3159 	if (!skb)
3160 		return -ENOMEM;
3161 	nlh = nlmsg_put(skb, 0, 0,
3162 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR),
3163 			0, 0);
3164 	if (!nlh)
3165 		goto err_free;
3166 
3167 	switch (type) {
3168 	case RDMA_REGISTER_EVENT:
3169 	case RDMA_UNREGISTER_EVENT:
3170 	case RDMA_RENAME_EVENT:
3171 		ret = fill_nldev_handle(skb, device);
3172 		if (ret)
3173 			goto err_free;
3174 		break;
3175 	case RDMA_NETDEV_ATTACH_EVENT:
3176 	case RDMA_NETDEV_DETACH_EVENT:
3177 		ret = fill_mon_netdev_association(skb, device, port_num, net);
3178 		if (ret)
3179 			goto err_free;
3180 		break;
3181 	case RDMA_NETDEV_RENAME_EVENT:
3182 		ret = fill_mon_netdev_rename(skb, device, port_num, net);
3183 		if (ret)
3184 			goto err_free;
3185 		break;
3186 	default:
3187 		break;
3188 	}
3189 
3190 	ret = nla_put_u8(skb, RDMA_NLDEV_ATTR_EVENT_TYPE, type);
3191 	if (ret)
3192 		goto err_free;
3193 
3194 	nlmsg_end(skb, nlh);
3195 	ret = rdma_nl_multicast(net, skb, RDMA_NL_GROUP_NOTIFY, GFP_KERNEL);
3196 	if (ret && ret != -ESRCH) {
3197 		skb = NULL; /* skb is freed in the netlink send-op handling */
3198 		goto err_free;
3199 	}
3200 	return 0;
3201 
3202 err_free:
3203 	rdma_nl_notify_err_msg(device, port_num, type);
3204 	nlmsg_free(skb);
3205 	return ret;
3206 }
3207 
3208 void __init nldev_init(void)
3209 {
3210 	rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
3211 }
3212 
3213 void nldev_exit(void)
3214 {
3215 	rdma_nl_unregister(RDMA_NL_NLDEV);
3216 }
3217 
3218 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
3219