xref: /linux/drivers/infiniband/core/nldev.c (revision 50d5c02ab8e62325548bd3a6e6b758a9dcd6e7c3)
1 /*
2  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. Neither the names of the copyright holders nor the names of its
13  *    contributors may be used to endorse or promote products derived from
14  *    this software without specific prior written permission.
15  *
16  * Alternatively, this software may be distributed under the terms of the
17  * GNU General Public License ("GPL") version 2 as published by the Free
18  * Software Foundation.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <linux/module.h>
34 #include <linux/pid.h>
35 #include <linux/pid_namespace.h>
36 #include <linux/mutex.h>
37 #include <net/netlink.h>
38 #include <rdma/rdma_cm.h>
39 #include <rdma/rdma_netlink.h>
40 #include <rdma/frmr_pools.h>
41 
42 #include "core_priv.h"
43 #include "cma_priv.h"
44 #include "restrack.h"
45 #include "uverbs.h"
46 #include "frmr_pools.h"
47 
48 /*
49  * This determines whether a non-privileged user is allowed to specify a
50  * controlled QKEY or not, when true non-privileged user is allowed to specify
51  * a controlled QKEY.
52  */
53 static bool privileged_qkey;
54 static DEFINE_MUTEX(nldev_dellink_mutex);
55 
56 typedef int (*res_fill_func_t)(struct sk_buff*, bool,
57 			       struct rdma_restrack_entry*, uint32_t);
58 
59 /*
60  * Sort array elements by the netlink attribute name
61  */
62 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
63 	[RDMA_NLDEV_ATTR_CHARDEV]		= { .type = NLA_U64 },
64 	[RDMA_NLDEV_ATTR_CHARDEV_ABI]		= { .type = NLA_U64 },
65 	[RDMA_NLDEV_ATTR_CHARDEV_NAME]		= { .type = NLA_NUL_STRING,
66 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
67 	[RDMA_NLDEV_ATTR_CHARDEV_TYPE]		= { .type = NLA_NUL_STRING,
68 					.len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
69 	[RDMA_NLDEV_ATTR_DEV_DIM]               = { .type = NLA_U8 },
70 	[RDMA_NLDEV_ATTR_DEV_INDEX]		= { .type = NLA_U32 },
71 	[RDMA_NLDEV_ATTR_DEV_NAME]		= { .type = NLA_NUL_STRING,
72 					.len = IB_DEVICE_NAME_MAX },
73 	[RDMA_NLDEV_ATTR_DEV_NODE_TYPE]		= { .type = NLA_U8 },
74 	[RDMA_NLDEV_ATTR_DEV_PROTOCOL]		= { .type = NLA_NUL_STRING,
75 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
76 	[RDMA_NLDEV_ATTR_DRIVER]		= { .type = NLA_NESTED },
77 	[RDMA_NLDEV_ATTR_DRIVER_ENTRY]		= { .type = NLA_NESTED },
78 	[RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE]	= { .type = NLA_U8 },
79 	[RDMA_NLDEV_ATTR_DRIVER_STRING]		= { .type = NLA_NUL_STRING,
80 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
81 	[RDMA_NLDEV_ATTR_DRIVER_S32]		= { .type = NLA_S32 },
82 	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
83 	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
84 	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
85 	[RDMA_NLDEV_ATTR_FW_VERSION]		= { .type = NLA_NUL_STRING,
86 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
87 	[RDMA_NLDEV_ATTR_LID]			= { .type = NLA_U32 },
88 	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type = NLA_NUL_STRING,
89 					.len = IFNAMSIZ },
90 	[RDMA_NLDEV_ATTR_LMC]			= { .type = NLA_U8 },
91 	[RDMA_NLDEV_ATTR_NDEV_INDEX]		= { .type = NLA_U32 },
92 	[RDMA_NLDEV_ATTR_NDEV_NAME]		= { .type = NLA_NUL_STRING,
93 					.len = IFNAMSIZ },
94 	[RDMA_NLDEV_ATTR_NODE_GUID]		= { .type = NLA_U64 },
95 	[RDMA_NLDEV_ATTR_PORT_INDEX]		= { .type = NLA_U32 },
96 	[RDMA_NLDEV_ATTR_PORT_PHYS_STATE]	= { .type = NLA_U8 },
97 	[RDMA_NLDEV_ATTR_PORT_STATE]		= { .type = NLA_U8 },
98 	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type = NLA_NESTED },
99 	[RDMA_NLDEV_ATTR_RES_CM_IDN]		= { .type = NLA_U32 },
100 	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type = NLA_NESTED },
101 	[RDMA_NLDEV_ATTR_RES_CQ]		= { .type = NLA_NESTED },
102 	[RDMA_NLDEV_ATTR_RES_CQE]		= { .type = NLA_U32 },
103 	[RDMA_NLDEV_ATTR_RES_CQN]		= { .type = NLA_U32 },
104 	[RDMA_NLDEV_ATTR_RES_CQ_ENTRY]		= { .type = NLA_NESTED },
105 	[RDMA_NLDEV_ATTR_RES_CTX]		= { .type = NLA_NESTED },
106 	[RDMA_NLDEV_ATTR_RES_CTXN]		= { .type = NLA_U32 },
107 	[RDMA_NLDEV_ATTR_RES_CTX_ENTRY]		= { .type = NLA_NESTED },
108 	[RDMA_NLDEV_ATTR_RES_DST_ADDR]		= {
109 			.len = sizeof(struct __kernel_sockaddr_storage) },
110 	[RDMA_NLDEV_ATTR_RES_IOVA]		= { .type = NLA_U64 },
111 	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING,
112 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
113 	[RDMA_NLDEV_ATTR_RES_LKEY]		= { .type = NLA_U32 },
114 	[RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]	= { .type = NLA_U32 },
115 	[RDMA_NLDEV_ATTR_RES_LQPN]		= { .type = NLA_U32 },
116 	[RDMA_NLDEV_ATTR_RES_MR]		= { .type = NLA_NESTED },
117 	[RDMA_NLDEV_ATTR_RES_MRLEN]		= { .type = NLA_U64 },
118 	[RDMA_NLDEV_ATTR_RES_MRN]		= { .type = NLA_U32 },
119 	[RDMA_NLDEV_ATTR_RES_MR_ENTRY]		= { .type = NLA_NESTED },
120 	[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE]	= { .type = NLA_U8 },
121 	[RDMA_NLDEV_ATTR_RES_PD]		= { .type = NLA_NESTED },
122 	[RDMA_NLDEV_ATTR_RES_PDN]		= { .type = NLA_U32 },
123 	[RDMA_NLDEV_ATTR_RES_PD_ENTRY]		= { .type = NLA_NESTED },
124 	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
125 	[RDMA_NLDEV_ATTR_RES_POLL_CTX]		= { .type = NLA_U8 },
126 	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
127 	[RDMA_NLDEV_ATTR_RES_QP]		= { .type = NLA_NESTED },
128 	[RDMA_NLDEV_ATTR_RES_QP_ENTRY]		= { .type = NLA_NESTED },
129 	[RDMA_NLDEV_ATTR_RES_RAW]		= { .type = NLA_BINARY },
130 	[RDMA_NLDEV_ATTR_RES_RKEY]		= { .type = NLA_U32 },
131 	[RDMA_NLDEV_ATTR_RES_RQPN]		= { .type = NLA_U32 },
132 	[RDMA_NLDEV_ATTR_RES_RQ_PSN]		= { .type = NLA_U32 },
133 	[RDMA_NLDEV_ATTR_RES_SQ_PSN]		= { .type = NLA_U32 },
134 	[RDMA_NLDEV_ATTR_RES_SRC_ADDR]		= {
135 			.len = sizeof(struct __kernel_sockaddr_storage) },
136 	[RDMA_NLDEV_ATTR_RES_STATE]		= { .type = NLA_U8 },
137 	[RDMA_NLDEV_ATTR_RES_SUMMARY]		= { .type = NLA_NESTED },
138 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]	= { .type = NLA_NESTED },
139 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
140 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
141 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
142 	[RDMA_NLDEV_ATTR_RES_TYPE]		= { .type = NLA_U8 },
143 	[RDMA_NLDEV_ATTR_RES_SUBTYPE]		= { .type = NLA_NUL_STRING,
144 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
145 	[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
146 	[RDMA_NLDEV_ATTR_RES_USECNT]		= { .type = NLA_U64 },
147 	[RDMA_NLDEV_ATTR_RES_SRQ]		= { .type = NLA_NESTED },
148 	[RDMA_NLDEV_ATTR_RES_SRQN]		= { .type = NLA_U32 },
149 	[RDMA_NLDEV_ATTR_RES_SRQ_ENTRY]		= { .type = NLA_NESTED },
150 	[RDMA_NLDEV_ATTR_MIN_RANGE]		= { .type = NLA_U32 },
151 	[RDMA_NLDEV_ATTR_MAX_RANGE]		= { .type = NLA_U32 },
152 	[RDMA_NLDEV_ATTR_SM_LID]		= { .type = NLA_U32 },
153 	[RDMA_NLDEV_ATTR_SUBNET_PREFIX]		= { .type = NLA_U64 },
154 	[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]	= { .type = NLA_U32 },
155 	[RDMA_NLDEV_ATTR_STAT_MODE]		= { .type = NLA_U32 },
156 	[RDMA_NLDEV_ATTR_STAT_RES]		= { .type = NLA_U32 },
157 	[RDMA_NLDEV_ATTR_STAT_COUNTER]		= { .type = NLA_NESTED },
158 	[RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY]	= { .type = NLA_NESTED },
159 	[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]       = { .type = NLA_U32 },
160 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]       = { .type = NLA_NESTED },
161 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY]  = { .type = NLA_NESTED },
162 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
163 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
164 	[RDMA_NLDEV_ATTR_SYS_IMAGE_GUID]	= { .type = NLA_U64 },
165 	[RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID]	= { .type = NLA_U32 },
166 	[RDMA_NLDEV_NET_NS_FD]			= { .type = NLA_U32 },
167 	[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]	= { .type = NLA_U8 },
168 	[RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK]	= { .type = NLA_U8 },
169 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX]	= { .type = NLA_U32 },
170 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 },
171 	[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE] = { .type = NLA_U8 },
172 	[RDMA_NLDEV_ATTR_DRIVER_DETAILS]	= { .type = NLA_U8 },
173 	[RDMA_NLDEV_ATTR_DEV_TYPE]		= { .type = NLA_U8 },
174 	[RDMA_NLDEV_ATTR_PARENT_NAME]		= { .type = NLA_NUL_STRING },
175 	[RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE]	= { .type = NLA_U8 },
176 	[RDMA_NLDEV_ATTR_EVENT_TYPE]		= { .type = NLA_U8 },
177 	[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED] = { .type = NLA_U8 },
178 	[RDMA_NLDEV_ATTR_FRMR_POOLS]		= { .type = NLA_NESTED },
179 	[RDMA_NLDEV_ATTR_FRMR_POOL_ENTRY]	= { .type = NLA_NESTED },
180 	[RDMA_NLDEV_ATTR_FRMR_POOL_KEY]		= { .type = NLA_NESTED },
181 	[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS]	= { .type = NLA_U8 },
182 	[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS] = { .type = NLA_U32 },
183 	[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY] = { .type = NLA_U64 },
184 	[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS] = { .type = NLA_U64 },
185 	[RDMA_NLDEV_ATTR_FRMR_POOL_QUEUE_HANDLES] = { .type = NLA_U32 },
186 	[RDMA_NLDEV_ATTR_FRMR_POOL_MAX_IN_USE]	= { .type = NLA_U64 },
187 	[RDMA_NLDEV_ATTR_FRMR_POOL_IN_USE]	= { .type = NLA_U64 },
188 	[RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD] = { .type = NLA_U32 },
189 	[RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES] = { .type = NLA_U32 },
190 	[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY] = { .type = NLA_U64 },
191 };
192 
193 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
194 				      enum rdma_nldev_print_type print_type)
195 {
196 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
197 		return -EMSGSIZE;
198 	if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
199 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
200 		return -EMSGSIZE;
201 
202 	return 0;
203 }
204 
205 static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
206 				   enum rdma_nldev_print_type print_type,
207 				   u32 value)
208 {
209 	if (put_driver_name_print_type(msg, name, print_type))
210 		return -EMSGSIZE;
211 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
212 		return -EMSGSIZE;
213 
214 	return 0;
215 }
216 
217 static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
218 				   enum rdma_nldev_print_type print_type,
219 				   u64 value)
220 {
221 	if (put_driver_name_print_type(msg, name, print_type))
222 		return -EMSGSIZE;
223 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
224 			      RDMA_NLDEV_ATTR_PAD))
225 		return -EMSGSIZE;
226 
227 	return 0;
228 }
229 
230 int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name,
231 			      const char *str)
232 {
233 	if (put_driver_name_print_type(msg, name,
234 				       RDMA_NLDEV_PRINT_TYPE_UNSPEC))
235 		return -EMSGSIZE;
236 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str))
237 		return -EMSGSIZE;
238 
239 	return 0;
240 }
241 EXPORT_SYMBOL(rdma_nl_put_driver_string);
242 
243 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
244 {
245 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
246 				       value);
247 }
248 EXPORT_SYMBOL(rdma_nl_put_driver_u32);
249 
250 int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
251 			       u32 value)
252 {
253 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
254 				       value);
255 }
256 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
257 
258 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
259 {
260 	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
261 				       value);
262 }
263 EXPORT_SYMBOL(rdma_nl_put_driver_u64);
264 
265 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
266 {
267 	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
268 				       value);
269 }
270 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
271 
272 bool rdma_nl_get_privileged_qkey(void)
273 {
274 	return privileged_qkey;
275 }
276 EXPORT_SYMBOL(rdma_nl_get_privileged_qkey);
277 
278 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
279 {
280 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
281 		return -EMSGSIZE;
282 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
283 			   dev_name(&device->dev)))
284 		return -EMSGSIZE;
285 
286 	return 0;
287 }
288 
289 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
290 {
291 	char fw[IB_FW_VERSION_NAME_MAX];
292 	int ret = 0;
293 	u32 port;
294 
295 	if (fill_nldev_handle(msg, device))
296 		return -EMSGSIZE;
297 
298 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
299 		return -EMSGSIZE;
300 
301 	BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
302 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
303 			      device->attrs.device_cap_flags,
304 			      RDMA_NLDEV_ATTR_PAD))
305 		return -EMSGSIZE;
306 
307 	ib_get_device_fw_str(device, fw);
308 	/* Device without FW has strlen(fw) = 0 */
309 	if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
310 		return -EMSGSIZE;
311 
312 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
313 			      be64_to_cpu(device->node_guid),
314 			      RDMA_NLDEV_ATTR_PAD))
315 		return -EMSGSIZE;
316 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
317 			      be64_to_cpu(device->attrs.sys_image_guid),
318 			      RDMA_NLDEV_ATTR_PAD))
319 		return -EMSGSIZE;
320 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
321 		return -EMSGSIZE;
322 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim))
323 		return -EMSGSIZE;
324 
325 	if (device->type &&
326 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_TYPE, device->type))
327 		return -EMSGSIZE;
328 
329 	if (device->parent &&
330 	    nla_put_string(msg, RDMA_NLDEV_ATTR_PARENT_NAME,
331 			   dev_name(&device->parent->dev)))
332 		return -EMSGSIZE;
333 
334 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE,
335 		       device->name_assign_type))
336 		return -EMSGSIZE;
337 
338 	/*
339 	 * Link type is determined on first port and mlx4 device
340 	 * which can potentially have two different link type for the same
341 	 * IB device is considered as better to be avoided in the future,
342 	 */
343 	port = rdma_start_port(device);
344 	if (rdma_cap_opa_mad(device, port))
345 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
346 	else if (rdma_protocol_ib(device, port))
347 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
348 	else if (rdma_protocol_iwarp(device, port))
349 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
350 	else if (rdma_protocol_roce(device, port))
351 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
352 	else if (rdma_protocol_usnic(device, port))
353 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
354 				     "usnic");
355 	return ret;
356 }
357 
358 static int fill_port_info(struct sk_buff *msg,
359 			  struct ib_device *device, u32 port,
360 			  const struct net *net)
361 {
362 	struct net_device *netdev = NULL;
363 	struct ib_port_attr attr;
364 	int ret;
365 	u64 cap_flags = 0;
366 
367 	if (fill_nldev_handle(msg, device))
368 		return -EMSGSIZE;
369 
370 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
371 		return -EMSGSIZE;
372 
373 	ret = ib_query_port(device, port, &attr);
374 	if (ret)
375 		return ret;
376 
377 	if (rdma_protocol_ib(device, port)) {
378 		BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
379 				sizeof(attr.port_cap_flags2)) > sizeof(u64));
380 		cap_flags = attr.port_cap_flags |
381 			((u64)attr.port_cap_flags2 << 32);
382 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
383 				      cap_flags, RDMA_NLDEV_ATTR_PAD))
384 			return -EMSGSIZE;
385 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
386 				      attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
387 			return -EMSGSIZE;
388 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
389 			return -EMSGSIZE;
390 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
391 			return -EMSGSIZE;
392 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
393 			return -EMSGSIZE;
394 	}
395 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
396 		return -EMSGSIZE;
397 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
398 		return -EMSGSIZE;
399 
400 	netdev = ib_device_get_netdev(device, port);
401 	if (netdev && net_eq(dev_net(netdev), net)) {
402 		ret = nla_put_u32(msg,
403 				  RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
404 		if (ret)
405 			goto out;
406 		ret = nla_put_string(msg,
407 				     RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
408 	}
409 
410 out:
411 	dev_put(netdev);
412 	return ret;
413 }
414 
415 static int fill_res_info_entry(struct sk_buff *msg,
416 			       const char *name, u64 curr)
417 {
418 	struct nlattr *entry_attr;
419 
420 	entry_attr = nla_nest_start_noflag(msg,
421 					   RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
422 	if (!entry_attr)
423 		return -EMSGSIZE;
424 
425 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
426 		goto err;
427 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
428 			      RDMA_NLDEV_ATTR_PAD))
429 		goto err;
430 
431 	nla_nest_end(msg, entry_attr);
432 	return 0;
433 
434 err:
435 	nla_nest_cancel(msg, entry_attr);
436 	return -EMSGSIZE;
437 }
438 
439 static int fill_res_info(struct sk_buff *msg, struct ib_device *device,
440 			 bool show_details)
441 {
442 	static const char * const names[RDMA_RESTRACK_MAX] = {
443 		[RDMA_RESTRACK_PD] = "pd",
444 		[RDMA_RESTRACK_CQ] = "cq",
445 		[RDMA_RESTRACK_QP] = "qp",
446 		[RDMA_RESTRACK_CM_ID] = "cm_id",
447 		[RDMA_RESTRACK_MR] = "mr",
448 		[RDMA_RESTRACK_CTX] = "ctx",
449 		[RDMA_RESTRACK_SRQ] = "srq",
450 	};
451 
452 	struct nlattr *table_attr;
453 	int ret, i, curr;
454 
455 	if (fill_nldev_handle(msg, device))
456 		return -EMSGSIZE;
457 
458 	table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
459 	if (!table_attr)
460 		return -EMSGSIZE;
461 
462 	for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
463 		if (!names[i])
464 			continue;
465 		curr = rdma_restrack_count(device, i, show_details);
466 		ret = fill_res_info_entry(msg, names[i], curr);
467 		if (ret)
468 			goto err;
469 	}
470 
471 	nla_nest_end(msg, table_attr);
472 	return 0;
473 
474 err:
475 	nla_nest_cancel(msg, table_attr);
476 	return ret;
477 }
478 
479 static int fill_res_name_pid(struct sk_buff *msg,
480 			     struct rdma_restrack_entry *res)
481 {
482 	int err = 0;
483 
484 	/*
485 	 * For user resources, user is should read /proc/PID/comm to get the
486 	 * name of the task file.
487 	 */
488 	if (rdma_is_kernel_res(res)) {
489 		err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
490 				     res->kern_name);
491 	} else {
492 		pid_t pid;
493 
494 		pid = task_pid_vnr(res->task);
495 		/*
496 		 * Task is dead and in zombie state.
497 		 * There is no need to print PID anymore.
498 		 */
499 		if (pid)
500 			/*
501 			 * This part is racy, task can be killed and PID will
502 			 * be zero right here but it is ok, next query won't
503 			 * return PID. We don't promise real-time reflection
504 			 * of SW objects.
505 			 */
506 			err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid);
507 	}
508 
509 	return err ? -EMSGSIZE : 0;
510 }
511 
512 static int fill_res_qp_entry_query(struct sk_buff *msg,
513 				   struct rdma_restrack_entry *res,
514 				   struct ib_device *dev,
515 				   struct ib_qp *qp)
516 {
517 	struct ib_qp_init_attr qp_init_attr;
518 	struct ib_qp_attr qp_attr;
519 	int ret;
520 
521 	ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
522 	if (ret)
523 		return ret;
524 
525 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
526 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
527 				qp_attr.dest_qp_num))
528 			goto err;
529 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
530 				qp_attr.rq_psn))
531 			goto err;
532 	}
533 
534 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
535 		goto err;
536 
537 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
538 	    qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
539 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
540 			       qp_attr.path_mig_state))
541 			goto err;
542 	}
543 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
544 		goto err;
545 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
546 		goto err;
547 
548 	if (dev->ops.fill_res_qp_entry)
549 		return dev->ops.fill_res_qp_entry(msg, qp);
550 	return 0;
551 
552 err:	return -EMSGSIZE;
553 }
554 
555 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
556 			     struct rdma_restrack_entry *res, uint32_t port)
557 {
558 	struct ib_qp *qp = container_of(res, struct ib_qp, res);
559 	struct ib_device *dev = qp->device;
560 	int ret;
561 
562 	if (port && port != qp->port)
563 		return -EAGAIN;
564 
565 	/* In create_qp() port is not set yet */
566 	if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
567 		return -EMSGSIZE;
568 
569 	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
570 	if (ret)
571 		return -EMSGSIZE;
572 
573 	if (!rdma_is_kernel_res(res) &&
574 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
575 		return -EMSGSIZE;
576 
577 	ret = fill_res_name_pid(msg, res);
578 	if (ret)
579 		return -EMSGSIZE;
580 
581 	return fill_res_qp_entry_query(msg, res, dev, qp);
582 }
583 
584 static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
585 				 struct rdma_restrack_entry *res, uint32_t port)
586 {
587 	struct ib_qp *qp = container_of(res, struct ib_qp, res);
588 	struct ib_device *dev = qp->device;
589 
590 	if (port && port != qp->port)
591 		return -EAGAIN;
592 	if (!dev->ops.fill_res_qp_entry_raw)
593 		return -EINVAL;
594 	return dev->ops.fill_res_qp_entry_raw(msg, qp);
595 }
596 
597 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
598 				struct rdma_restrack_entry *res, uint32_t port)
599 {
600 	struct rdma_id_private *id_priv =
601 				container_of(res, struct rdma_id_private, res);
602 	struct ib_device *dev = id_priv->id.device;
603 	struct rdma_cm_id *cm_id = &id_priv->id;
604 
605 	if (port && port != cm_id->port_num)
606 		return -EAGAIN;
607 
608 	if (cm_id->port_num &&
609 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
610 		goto err;
611 
612 	if (id_priv->qp_num) {
613 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
614 			goto err;
615 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
616 			goto err;
617 	}
618 
619 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
620 		goto err;
621 
622 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
623 		goto err;
624 
625 	if (cm_id->route.addr.src_addr.ss_family &&
626 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
627 		    sizeof(cm_id->route.addr.src_addr),
628 		    &cm_id->route.addr.src_addr))
629 		goto err;
630 	if (cm_id->route.addr.dst_addr.ss_family &&
631 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
632 		    sizeof(cm_id->route.addr.dst_addr),
633 		    &cm_id->route.addr.dst_addr))
634 		goto err;
635 
636 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
637 		goto err;
638 
639 	if (fill_res_name_pid(msg, res))
640 		goto err;
641 
642 	if (dev->ops.fill_res_cm_id_entry)
643 		return dev->ops.fill_res_cm_id_entry(msg, cm_id);
644 	return 0;
645 
646 err: return -EMSGSIZE;
647 }
648 
649 static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
650 			     struct rdma_restrack_entry *res, uint32_t port)
651 {
652 	struct ib_cq *cq = container_of(res, struct ib_cq, res);
653 	struct ib_device *dev = cq->device;
654 
655 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
656 		return -EMSGSIZE;
657 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
658 			      atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
659 		return -EMSGSIZE;
660 
661 	/* Poll context is only valid for kernel CQs */
662 	if (rdma_is_kernel_res(res) &&
663 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
664 		return -EMSGSIZE;
665 
666 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
667 		return -EMSGSIZE;
668 
669 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
670 		return -EMSGSIZE;
671 	if (!rdma_is_kernel_res(res) &&
672 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
673 			cq->uobject->uevent.uobject.context->res.id))
674 		return -EMSGSIZE;
675 
676 	if (fill_res_name_pid(msg, res))
677 		return -EMSGSIZE;
678 
679 	return (dev->ops.fill_res_cq_entry) ?
680 		dev->ops.fill_res_cq_entry(msg, cq) : 0;
681 }
682 
683 static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
684 				 struct rdma_restrack_entry *res, uint32_t port)
685 {
686 	struct ib_cq *cq = container_of(res, struct ib_cq, res);
687 	struct ib_device *dev = cq->device;
688 
689 	if (!dev->ops.fill_res_cq_entry_raw)
690 		return -EINVAL;
691 	return dev->ops.fill_res_cq_entry_raw(msg, cq);
692 }
693 
694 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
695 			     struct rdma_restrack_entry *res, uint32_t port)
696 {
697 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
698 	struct ib_device *dev = mr->device;
699 
700 	if (has_cap_net_admin) {
701 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
702 			return -EMSGSIZE;
703 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
704 			return -EMSGSIZE;
705 	}
706 
707 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
708 			      RDMA_NLDEV_ATTR_PAD))
709 		return -EMSGSIZE;
710 
711 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
712 		return -EMSGSIZE;
713 
714 	if (!rdma_is_kernel_res(res)) {
715 		struct ib_pd *pd = READ_ONCE(mr->pd);
716 
717 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, pd->res.id))
718 			return -EMSGSIZE;
719 	}
720 
721 	if (fill_res_name_pid(msg, res))
722 		return -EMSGSIZE;
723 
724 	return (dev->ops.fill_res_mr_entry) ?
725 		       dev->ops.fill_res_mr_entry(msg, mr) :
726 		       0;
727 }
728 
729 static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
730 				 struct rdma_restrack_entry *res, uint32_t port)
731 {
732 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
733 	struct ib_device *dev = mr->device;
734 
735 	if (!dev->ops.fill_res_mr_entry_raw)
736 		return -EINVAL;
737 	return dev->ops.fill_res_mr_entry_raw(msg, mr);
738 }
739 
740 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
741 			     struct rdma_restrack_entry *res, uint32_t port)
742 {
743 	struct ib_pd *pd = container_of(res, struct ib_pd, res);
744 
745 	if (has_cap_net_admin) {
746 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
747 				pd->local_dma_lkey))
748 			goto err;
749 		if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
750 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
751 				pd->unsafe_global_rkey))
752 			goto err;
753 	}
754 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
755 			      atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
756 		goto err;
757 
758 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
759 		goto err;
760 
761 	if (!rdma_is_kernel_res(res) &&
762 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
763 			pd->uobject->context->res.id))
764 		goto err;
765 
766 	return fill_res_name_pid(msg, res);
767 
768 err:	return -EMSGSIZE;
769 }
770 
771 static int fill_res_ctx_entry(struct sk_buff *msg, bool has_cap_net_admin,
772 			      struct rdma_restrack_entry *res, uint32_t port)
773 {
774 	struct ib_ucontext *ctx = container_of(res, struct ib_ucontext, res);
775 
776 	if (rdma_is_kernel_res(res))
777 		return 0;
778 
779 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, ctx->res.id))
780 		return -EMSGSIZE;
781 
782 	return fill_res_name_pid(msg, res);
783 }
784 
785 static int fill_res_range_qp_entry(struct sk_buff *msg, uint32_t min_range,
786 				   uint32_t max_range)
787 {
788 	struct nlattr *entry_attr;
789 
790 	if (!min_range)
791 		return 0;
792 
793 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
794 	if (!entry_attr)
795 		return -EMSGSIZE;
796 
797 	if (min_range == max_range) {
798 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, min_range))
799 			goto err;
800 	} else {
801 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MIN_RANGE, min_range))
802 			goto err;
803 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MAX_RANGE, max_range))
804 			goto err;
805 	}
806 	nla_nest_end(msg, entry_attr);
807 	return 0;
808 
809 err:
810 	nla_nest_cancel(msg, entry_attr);
811 	return -EMSGSIZE;
812 }
813 
814 static int fill_res_srq_qps(struct sk_buff *msg, struct ib_srq *srq)
815 {
816 	uint32_t min_range = 0, prev = 0;
817 	struct rdma_restrack_entry *res;
818 	struct rdma_restrack_root *rt;
819 	struct nlattr *table_attr;
820 	struct ib_qp *qp = NULL;
821 	unsigned long id = 0;
822 
823 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
824 	if (!table_attr)
825 		return -EMSGSIZE;
826 
827 	rt = &srq->device->res[RDMA_RESTRACK_QP];
828 	xa_lock(&rt->xa);
829 	xa_for_each(&rt->xa, id, res) {
830 		if (!rdma_restrack_get(res))
831 			continue;
832 
833 		qp = container_of(res, struct ib_qp, res);
834 		if (!qp->srq || (qp->srq->res.id != srq->res.id)) {
835 			rdma_restrack_put(res);
836 			continue;
837 		}
838 
839 		if (qp->qp_num < prev)
840 			/* qp_num should be ascending */
841 			goto err_loop;
842 
843 		if (min_range == 0) {
844 			min_range = qp->qp_num;
845 		} else if (qp->qp_num > (prev + 1)) {
846 			if (fill_res_range_qp_entry(msg, min_range, prev))
847 				goto err_loop;
848 
849 			min_range = qp->qp_num;
850 		}
851 		prev = qp->qp_num;
852 		rdma_restrack_put(res);
853 	}
854 
855 	xa_unlock(&rt->xa);
856 
857 	if (fill_res_range_qp_entry(msg, min_range, prev))
858 		goto err;
859 
860 	nla_nest_end(msg, table_attr);
861 	return 0;
862 
863 err_loop:
864 	rdma_restrack_put(res);
865 	xa_unlock(&rt->xa);
866 err:
867 	nla_nest_cancel(msg, table_attr);
868 	return -EMSGSIZE;
869 }
870 
871 static int fill_res_srq_entry(struct sk_buff *msg, bool has_cap_net_admin,
872 			      struct rdma_restrack_entry *res, uint32_t port)
873 {
874 	struct ib_srq *srq = container_of(res, struct ib_srq, res);
875 	struct ib_device *dev = srq->device;
876 
877 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SRQN, srq->res.id))
878 		goto err;
879 
880 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, srq->srq_type))
881 		goto err;
882 
883 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, srq->pd->res.id))
884 		goto err;
885 
886 	if (ib_srq_has_cq(srq->srq_type)) {
887 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN,
888 				srq->ext.cq->res.id))
889 			goto err;
890 	}
891 
892 	if (fill_res_srq_qps(msg, srq))
893 		goto err;
894 
895 	if (fill_res_name_pid(msg, res))
896 		goto err;
897 
898 	if (dev->ops.fill_res_srq_entry)
899 		return dev->ops.fill_res_srq_entry(msg, srq);
900 
901 	return 0;
902 
903 err:
904 	return -EMSGSIZE;
905 }
906 
907 static int fill_res_srq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
908 				 struct rdma_restrack_entry *res, uint32_t port)
909 {
910 	struct ib_srq *srq = container_of(res, struct ib_srq, res);
911 	struct ib_device *dev = srq->device;
912 
913 	if (!dev->ops.fill_res_srq_entry_raw)
914 		return -EINVAL;
915 	return dev->ops.fill_res_srq_entry_raw(msg, srq);
916 }
917 
918 static int fill_stat_counter_mode(struct sk_buff *msg,
919 				  struct rdma_counter *counter)
920 {
921 	struct rdma_counter_mode *m = &counter->mode;
922 
923 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
924 		return -EMSGSIZE;
925 
926 	if (m->mode == RDMA_COUNTER_MODE_AUTO) {
927 		if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
928 		    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
929 			return -EMSGSIZE;
930 
931 		if ((m->mask & RDMA_COUNTER_MASK_PID) &&
932 		    fill_res_name_pid(msg, &counter->res))
933 			return -EMSGSIZE;
934 	}
935 
936 	return 0;
937 }
938 
939 static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
940 {
941 	struct nlattr *entry_attr;
942 
943 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
944 	if (!entry_attr)
945 		return -EMSGSIZE;
946 
947 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
948 		goto err;
949 
950 	nla_nest_end(msg, entry_attr);
951 	return 0;
952 
953 err:
954 	nla_nest_cancel(msg, entry_attr);
955 	return -EMSGSIZE;
956 }
957 
958 static int fill_stat_counter_qps(struct sk_buff *msg,
959 				 struct rdma_counter *counter)
960 {
961 	struct rdma_restrack_entry *res;
962 	struct rdma_restrack_root *rt;
963 	struct nlattr *table_attr;
964 	struct ib_qp *qp = NULL;
965 	unsigned long id = 0;
966 	int ret = 0;
967 
968 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
969 	if (!table_attr)
970 		return -EMSGSIZE;
971 
972 	rt = &counter->device->res[RDMA_RESTRACK_QP];
973 	xa_lock(&rt->xa);
974 	xa_for_each(&rt->xa, id, res) {
975 		qp = container_of(res, struct ib_qp, res);
976 		if (!qp->counter || (qp->counter->id != counter->id))
977 			continue;
978 
979 		ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
980 		if (ret)
981 			goto err;
982 	}
983 
984 	xa_unlock(&rt->xa);
985 	nla_nest_end(msg, table_attr);
986 	return 0;
987 
988 err:
989 	xa_unlock(&rt->xa);
990 	nla_nest_cancel(msg, table_attr);
991 	return ret;
992 }
993 
994 int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
995 				 u64 value)
996 {
997 	struct nlattr *entry_attr;
998 
999 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
1000 	if (!entry_attr)
1001 		return -EMSGSIZE;
1002 
1003 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
1004 			   name))
1005 		goto err;
1006 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
1007 			      value, RDMA_NLDEV_ATTR_PAD))
1008 		goto err;
1009 
1010 	nla_nest_end(msg, entry_attr);
1011 	return 0;
1012 
1013 err:
1014 	nla_nest_cancel(msg, entry_attr);
1015 	return -EMSGSIZE;
1016 }
1017 EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry);
1018 
1019 static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
1020 			      struct rdma_restrack_entry *res, uint32_t port)
1021 {
1022 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
1023 	struct ib_device *dev = mr->device;
1024 
1025 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
1026 		goto err;
1027 
1028 	if (dev->ops.fill_stat_mr_entry)
1029 		return dev->ops.fill_stat_mr_entry(msg, mr);
1030 	return 0;
1031 
1032 err:
1033 	return -EMSGSIZE;
1034 }
1035 
1036 static int fill_stat_counter_hwcounters(struct sk_buff *msg,
1037 					struct rdma_counter *counter)
1038 {
1039 	struct rdma_hw_stats *st = counter->stats;
1040 	struct nlattr *table_attr;
1041 	int i;
1042 
1043 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
1044 	if (!table_attr)
1045 		return -EMSGSIZE;
1046 
1047 	mutex_lock(&st->lock);
1048 	for (i = 0; i < st->num_counters; i++) {
1049 		if (test_bit(i, st->is_disabled))
1050 			continue;
1051 		if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name,
1052 						 st->value[i]))
1053 			goto err;
1054 	}
1055 	mutex_unlock(&st->lock);
1056 
1057 	nla_nest_end(msg, table_attr);
1058 	return 0;
1059 
1060 err:
1061 	mutex_unlock(&st->lock);
1062 	nla_nest_cancel(msg, table_attr);
1063 	return -EMSGSIZE;
1064 }
1065 
1066 static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
1067 				  struct rdma_restrack_entry *res,
1068 				  uint32_t port)
1069 {
1070 	struct rdma_counter *counter =
1071 		container_of(res, struct rdma_counter, res);
1072 
1073 	if (port && port != counter->port)
1074 		return -EAGAIN;
1075 
1076 	/* Dump it even query failed */
1077 	rdma_counter_query_stats(counter);
1078 
1079 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
1080 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
1081 	    fill_stat_counter_mode(msg, counter) ||
1082 	    fill_stat_counter_qps(msg, counter) ||
1083 	    fill_stat_counter_hwcounters(msg, counter))
1084 		return -EMSGSIZE;
1085 
1086 	return 0;
1087 }
1088 
1089 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1090 			  struct netlink_ext_ack *extack)
1091 {
1092 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1093 	struct ib_device *device;
1094 	struct sk_buff *msg;
1095 	u32 index;
1096 	int err;
1097 
1098 	err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1099 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
1100 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1101 		return -EINVAL;
1102 
1103 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1104 
1105 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1106 	if (!device)
1107 		return -EINVAL;
1108 
1109 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1110 	if (!msg) {
1111 		err = -ENOMEM;
1112 		goto err;
1113 	}
1114 
1115 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1116 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1117 			0, 0);
1118 	if (!nlh) {
1119 		err = -EMSGSIZE;
1120 		goto err_free;
1121 	}
1122 
1123 	err = fill_dev_info(msg, device);
1124 	if (err)
1125 		goto err_free;
1126 
1127 	nlmsg_end(msg, nlh);
1128 
1129 	ib_device_put(device);
1130 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1131 
1132 err_free:
1133 	nlmsg_free(msg);
1134 err:
1135 	ib_device_put(device);
1136 	return err;
1137 }
1138 
1139 static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1140 			  struct netlink_ext_ack *extack)
1141 {
1142 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1143 	struct ib_device *device;
1144 	u32 index;
1145 	int err;
1146 
1147 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1148 			    nldev_policy, extack);
1149 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1150 		return -EINVAL;
1151 
1152 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1153 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1154 	if (!device)
1155 		return -EINVAL;
1156 
1157 	if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
1158 		char name[IB_DEVICE_NAME_MAX] = {};
1159 
1160 		nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1161 			    IB_DEVICE_NAME_MAX);
1162 		if (strlen(name) == 0) {
1163 			err = -EINVAL;
1164 			goto done;
1165 		}
1166 		err = ib_device_rename(device, name);
1167 		goto done;
1168 	}
1169 
1170 	if (tb[RDMA_NLDEV_NET_NS_FD]) {
1171 		u32 ns_fd;
1172 
1173 		ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
1174 		err = ib_device_set_netns_put(skb, device, ns_fd);
1175 		goto put_done;
1176 	}
1177 
1178 	if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) {
1179 		u8 use_dim;
1180 
1181 		use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]);
1182 		err = ib_device_set_dim(device,  use_dim);
1183 		goto done;
1184 	}
1185 
1186 done:
1187 	ib_device_put(device);
1188 put_done:
1189 	return err;
1190 }
1191 
1192 static int _nldev_get_dumpit(struct ib_device *device,
1193 			     struct sk_buff *skb,
1194 			     struct netlink_callback *cb,
1195 			     unsigned int idx)
1196 {
1197 	int start = cb->args[0];
1198 	struct nlmsghdr *nlh;
1199 
1200 	if (idx < start)
1201 		return 0;
1202 
1203 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1204 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1205 			0, NLM_F_MULTI);
1206 
1207 	if (!nlh || fill_dev_info(skb, device)) {
1208 		nlmsg_cancel(skb, nlh);
1209 		goto out;
1210 	}
1211 
1212 	nlmsg_end(skb, nlh);
1213 
1214 	idx++;
1215 
1216 out:	cb->args[0] = idx;
1217 	return skb->len;
1218 }
1219 
1220 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
1221 {
1222 	/*
1223 	 * There is no need to take lock, because
1224 	 * we are relying on ib_core's locking.
1225 	 */
1226 	return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
1227 }
1228 
1229 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1230 			       struct netlink_ext_ack *extack)
1231 {
1232 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1233 	struct ib_device *device;
1234 	struct sk_buff *msg;
1235 	u32 index;
1236 	u32 port;
1237 	int err;
1238 
1239 	err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1240 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
1241 	if (err ||
1242 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
1243 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1244 		return -EINVAL;
1245 
1246 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1247 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1248 	if (!device)
1249 		return -EINVAL;
1250 
1251 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1252 	if (!rdma_is_port_valid(device, port)) {
1253 		err = -EINVAL;
1254 		goto err;
1255 	}
1256 
1257 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1258 	if (!msg) {
1259 		err = -ENOMEM;
1260 		goto err;
1261 	}
1262 
1263 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1264 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1265 			0, 0);
1266 	if (!nlh) {
1267 		err = -EMSGSIZE;
1268 		goto err_free;
1269 	}
1270 
1271 	err = fill_port_info(msg, device, port, sock_net(skb->sk));
1272 	if (err)
1273 		goto err_free;
1274 
1275 	nlmsg_end(msg, nlh);
1276 	ib_device_put(device);
1277 
1278 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1279 
1280 err_free:
1281 	nlmsg_free(msg);
1282 err:
1283 	ib_device_put(device);
1284 	return err;
1285 }
1286 
1287 static int nldev_port_get_dumpit(struct sk_buff *skb,
1288 				 struct netlink_callback *cb)
1289 {
1290 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1291 	struct ib_device *device;
1292 	int start = cb->args[0];
1293 	struct nlmsghdr *nlh;
1294 	u32 idx = 0;
1295 	u32 ifindex;
1296 	int err;
1297 	unsigned int p;
1298 
1299 	err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1300 			    nldev_policy, NL_VALIDATE_LIBERAL, NULL);
1301 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1302 		return -EINVAL;
1303 
1304 	ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1305 	device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
1306 	if (!device)
1307 		return -EINVAL;
1308 
1309 	rdma_for_each_port (device, p) {
1310 		/*
1311 		 * The dumpit function returns all information from specific
1312 		 * index. This specific index is taken from the netlink
1313 		 * messages request sent by user and it is available
1314 		 * in cb->args[0].
1315 		 *
1316 		 * Usually, the user doesn't fill this field and it causes
1317 		 * to return everything.
1318 		 *
1319 		 */
1320 		if (idx < start) {
1321 			idx++;
1322 			continue;
1323 		}
1324 
1325 		nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
1326 				cb->nlh->nlmsg_seq,
1327 				RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1328 						 RDMA_NLDEV_CMD_PORT_GET),
1329 				0, NLM_F_MULTI);
1330 
1331 		if (!nlh || fill_port_info(skb, device, p, sock_net(skb->sk))) {
1332 			nlmsg_cancel(skb, nlh);
1333 			goto out;
1334 		}
1335 		idx++;
1336 		nlmsg_end(skb, nlh);
1337 	}
1338 
1339 out:
1340 	ib_device_put(device);
1341 	cb->args[0] = idx;
1342 	return skb->len;
1343 }
1344 
1345 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1346 			      struct netlink_ext_ack *extack)
1347 {
1348 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1349 	bool show_details = false;
1350 	struct ib_device *device;
1351 	struct sk_buff *msg;
1352 	u32 index;
1353 	int ret;
1354 
1355 	ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1356 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
1357 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1358 		return -EINVAL;
1359 
1360 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1361 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1362 	if (!device)
1363 		return -EINVAL;
1364 
1365 	if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
1366 		show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
1367 
1368 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1369 	if (!msg) {
1370 		ret = -ENOMEM;
1371 		goto err;
1372 	}
1373 
1374 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1375 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1376 			0, 0);
1377 	if (!nlh) {
1378 		ret = -EMSGSIZE;
1379 		goto err_free;
1380 	}
1381 
1382 	ret = fill_res_info(msg, device, show_details);
1383 	if (ret)
1384 		goto err_free;
1385 
1386 	nlmsg_end(msg, nlh);
1387 	ib_device_put(device);
1388 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1389 
1390 err_free:
1391 	nlmsg_free(msg);
1392 err:
1393 	ib_device_put(device);
1394 	return ret;
1395 }
1396 
1397 static int _nldev_res_get_dumpit(struct ib_device *device,
1398 				 struct sk_buff *skb,
1399 				 struct netlink_callback *cb,
1400 				 unsigned int idx)
1401 {
1402 	int start = cb->args[0];
1403 	struct nlmsghdr *nlh;
1404 
1405 	if (idx < start)
1406 		return 0;
1407 
1408 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1409 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1410 			0, NLM_F_MULTI);
1411 
1412 	if (!nlh || fill_res_info(skb, device, false)) {
1413 		nlmsg_cancel(skb, nlh);
1414 		goto out;
1415 	}
1416 	nlmsg_end(skb, nlh);
1417 
1418 	idx++;
1419 
1420 out:
1421 	cb->args[0] = idx;
1422 	return skb->len;
1423 }
1424 
1425 static int nldev_res_get_dumpit(struct sk_buff *skb,
1426 				struct netlink_callback *cb)
1427 {
1428 	return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
1429 }
1430 
1431 struct nldev_fill_res_entry {
1432 	enum rdma_nldev_attr nldev_attr;
1433 	u8 flags;
1434 	u32 entry;
1435 	u32 id;
1436 };
1437 
1438 enum nldev_res_flags {
1439 	NLDEV_PER_DEV = 1 << 0,
1440 };
1441 
1442 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
1443 	[RDMA_RESTRACK_QP] = {
1444 		.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
1445 		.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
1446 		.id = RDMA_NLDEV_ATTR_RES_LQPN,
1447 	},
1448 	[RDMA_RESTRACK_CM_ID] = {
1449 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
1450 		.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
1451 		.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
1452 	},
1453 	[RDMA_RESTRACK_CQ] = {
1454 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
1455 		.flags = NLDEV_PER_DEV,
1456 		.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
1457 		.id = RDMA_NLDEV_ATTR_RES_CQN,
1458 	},
1459 	[RDMA_RESTRACK_MR] = {
1460 		.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
1461 		.flags = NLDEV_PER_DEV,
1462 		.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
1463 		.id = RDMA_NLDEV_ATTR_RES_MRN,
1464 	},
1465 	[RDMA_RESTRACK_PD] = {
1466 		.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
1467 		.flags = NLDEV_PER_DEV,
1468 		.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
1469 		.id = RDMA_NLDEV_ATTR_RES_PDN,
1470 	},
1471 	[RDMA_RESTRACK_COUNTER] = {
1472 		.nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
1473 		.entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
1474 		.id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
1475 	},
1476 	[RDMA_RESTRACK_CTX] = {
1477 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CTX,
1478 		.flags = NLDEV_PER_DEV,
1479 		.entry = RDMA_NLDEV_ATTR_RES_CTX_ENTRY,
1480 		.id = RDMA_NLDEV_ATTR_RES_CTXN,
1481 	},
1482 	[RDMA_RESTRACK_SRQ] = {
1483 		.nldev_attr = RDMA_NLDEV_ATTR_RES_SRQ,
1484 		.flags = NLDEV_PER_DEV,
1485 		.entry = RDMA_NLDEV_ATTR_RES_SRQ_ENTRY,
1486 		.id = RDMA_NLDEV_ATTR_RES_SRQN,
1487 	},
1488 
1489 };
1490 
1491 static noinline_for_stack int
1492 res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1493 		    struct netlink_ext_ack *extack,
1494 		    enum rdma_restrack_type res_type,
1495 		    res_fill_func_t fill_func)
1496 {
1497 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1498 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1499 	struct rdma_restrack_entry *res;
1500 	struct ib_device *device;
1501 	u32 index, id, port = 0;
1502 	bool has_cap_net_admin;
1503 	struct sk_buff *msg;
1504 	int ret;
1505 
1506 	ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1507 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
1508 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
1509 		return -EINVAL;
1510 
1511 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1512 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1513 	if (!device)
1514 		return -EINVAL;
1515 
1516 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1517 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1518 		if (!rdma_is_port_valid(device, port)) {
1519 			ret = -EINVAL;
1520 			goto err;
1521 		}
1522 	}
1523 
1524 	if ((port && fe->flags & NLDEV_PER_DEV) ||
1525 	    (!port && ~fe->flags & NLDEV_PER_DEV)) {
1526 		ret = -EINVAL;
1527 		goto err;
1528 	}
1529 
1530 	id = nla_get_u32(tb[fe->id]);
1531 	res = rdma_restrack_get_byid(device, res_type, id);
1532 	if (IS_ERR(res)) {
1533 		ret = PTR_ERR(res);
1534 		goto err;
1535 	}
1536 
1537 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1538 	if (!msg) {
1539 		ret = -ENOMEM;
1540 		goto err_get;
1541 	}
1542 
1543 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1544 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1545 					 RDMA_NL_GET_OP(nlh->nlmsg_type)),
1546 			0, 0);
1547 
1548 	if (!nlh || fill_nldev_handle(msg, device)) {
1549 		ret = -EMSGSIZE;
1550 		goto err_free;
1551 	}
1552 
1553 	has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
1554 
1555 	ret = fill_func(msg, has_cap_net_admin, res, port);
1556 	if (ret)
1557 		goto err_free;
1558 
1559 	rdma_restrack_put(res);
1560 	nlmsg_end(msg, nlh);
1561 	ib_device_put(device);
1562 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1563 
1564 err_free:
1565 	nlmsg_free(msg);
1566 err_get:
1567 	rdma_restrack_put(res);
1568 err:
1569 	ib_device_put(device);
1570 	return ret;
1571 }
1572 
1573 static int res_get_common_dumpit(struct sk_buff *skb,
1574 				 struct netlink_callback *cb,
1575 				 enum rdma_restrack_type res_type,
1576 				 res_fill_func_t fill_func)
1577 {
1578 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1579 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1580 	struct rdma_restrack_entry *res;
1581 	struct rdma_restrack_root *rt;
1582 	int err, ret = 0, idx = 0;
1583 	bool show_details = false;
1584 	struct nlattr *table_attr;
1585 	struct nlattr *entry_attr;
1586 	struct ib_device *device;
1587 	int start = cb->args[0];
1588 	bool has_cap_net_admin;
1589 	struct nlmsghdr *nlh;
1590 	unsigned long id;
1591 	u32 index, port = 0;
1592 	bool filled = false;
1593 
1594 	err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1595 			    nldev_policy, NL_VALIDATE_LIBERAL, NULL);
1596 	/*
1597 	 * Right now, we are expecting the device index to get res information,
1598 	 * but it is possible to extend this code to return all devices in
1599 	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
1600 	 * if it doesn't exist, we will iterate over all devices.
1601 	 *
1602 	 * But it is not needed for now.
1603 	 */
1604 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1605 		return -EINVAL;
1606 
1607 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1608 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1609 	if (!device)
1610 		return -EINVAL;
1611 
1612 	if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
1613 		show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
1614 
1615 	/*
1616 	 * If no PORT_INDEX is supplied, we will return all QPs from that device
1617 	 */
1618 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1619 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1620 		if (!rdma_is_port_valid(device, port)) {
1621 			ret = -EINVAL;
1622 			goto err_index;
1623 		}
1624 	}
1625 
1626 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1627 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1628 					 RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
1629 			0, NLM_F_MULTI);
1630 
1631 	if (!nlh || fill_nldev_handle(skb, device)) {
1632 		ret = -EMSGSIZE;
1633 		goto err;
1634 	}
1635 
1636 	table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
1637 	if (!table_attr) {
1638 		ret = -EMSGSIZE;
1639 		goto err;
1640 	}
1641 
1642 	has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
1643 
1644 	rt = &device->res[res_type];
1645 	xa_lock(&rt->xa);
1646 	/*
1647 	 * FIXME: if the skip ahead is something common this loop should
1648 	 * use xas_for_each & xas_pause to optimize, we can have a lot of
1649 	 * objects.
1650 	 */
1651 	xa_for_each(&rt->xa, id, res) {
1652 		if (xa_get_mark(&rt->xa, res->id, RESTRACK_DD) && !show_details)
1653 			goto next;
1654 
1655 		if (idx < start || !rdma_restrack_get(res))
1656 			goto next;
1657 
1658 		xa_unlock(&rt->xa);
1659 
1660 		filled = true;
1661 
1662 		entry_attr = nla_nest_start_noflag(skb, fe->entry);
1663 		if (!entry_attr) {
1664 			ret = -EMSGSIZE;
1665 			rdma_restrack_put(res);
1666 			goto msg_full;
1667 		}
1668 
1669 		ret = fill_func(skb, has_cap_net_admin, res, port);
1670 
1671 		rdma_restrack_put(res);
1672 
1673 		if (ret) {
1674 			nla_nest_cancel(skb, entry_attr);
1675 			if (ret == -EMSGSIZE)
1676 				goto msg_full;
1677 			if (ret == -EAGAIN)
1678 				goto again;
1679 			goto res_err;
1680 		}
1681 		nla_nest_end(skb, entry_attr);
1682 again:		xa_lock(&rt->xa);
1683 next:		idx++;
1684 	}
1685 	xa_unlock(&rt->xa);
1686 
1687 msg_full:
1688 	nla_nest_end(skb, table_attr);
1689 	nlmsg_end(skb, nlh);
1690 	cb->args[0] = idx;
1691 
1692 	/*
1693 	 * No more entries to fill, cancel the message and
1694 	 * return 0 to mark end of dumpit.
1695 	 */
1696 	if (!filled)
1697 		goto err;
1698 
1699 	ib_device_put(device);
1700 	return skb->len;
1701 
1702 res_err:
1703 	nla_nest_cancel(skb, table_attr);
1704 
1705 err:
1706 	nlmsg_cancel(skb, nlh);
1707 
1708 err_index:
1709 	ib_device_put(device);
1710 	return ret;
1711 }
1712 
1713 #define RES_GET_FUNCS(name, type)                                              \
1714 	static int nldev_res_get_##name##_dumpit(struct sk_buff *skb,          \
1715 						 struct netlink_callback *cb)  \
1716 	{                                                                      \
1717 		return res_get_common_dumpit(skb, cb, type,                    \
1718 					     fill_res_##name##_entry);         \
1719 	}                                                                      \
1720 	static int nldev_res_get_##name##_doit(struct sk_buff *skb,            \
1721 					       struct nlmsghdr *nlh,           \
1722 					       struct netlink_ext_ack *extack) \
1723 	{                                                                      \
1724 		return res_get_common_doit(skb, nlh, extack, type,             \
1725 					   fill_res_##name##_entry);           \
1726 	}
1727 
1728 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
1729 RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP);
1730 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
1731 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
1732 RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ);
1733 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
1734 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
1735 RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR);
1736 RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
1737 RES_GET_FUNCS(ctx, RDMA_RESTRACK_CTX);
1738 RES_GET_FUNCS(srq, RDMA_RESTRACK_SRQ);
1739 RES_GET_FUNCS(srq_raw, RDMA_RESTRACK_SRQ);
1740 
1741 static LIST_HEAD(link_ops);
1742 static DECLARE_RWSEM(link_ops_rwsem);
1743 
1744 static const struct rdma_link_ops *link_ops_get(const char *type)
1745 {
1746 	const struct rdma_link_ops *ops;
1747 
1748 	list_for_each_entry(ops, &link_ops, list) {
1749 		if (!strcmp(ops->type, type))
1750 			goto out;
1751 	}
1752 	ops = NULL;
1753 out:
1754 	return ops;
1755 }
1756 
1757 void rdma_link_register(struct rdma_link_ops *ops)
1758 {
1759 	down_write(&link_ops_rwsem);
1760 	if (WARN_ON_ONCE(link_ops_get(ops->type)))
1761 		goto out;
1762 	list_add(&ops->list, &link_ops);
1763 out:
1764 	up_write(&link_ops_rwsem);
1765 }
1766 EXPORT_SYMBOL(rdma_link_register);
1767 
1768 void rdma_link_unregister(struct rdma_link_ops *ops)
1769 {
1770 	down_write(&link_ops_rwsem);
1771 	list_del(&ops->list);
1772 	up_write(&link_ops_rwsem);
1773 }
1774 EXPORT_SYMBOL(rdma_link_unregister);
1775 
1776 static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
1777 			  struct netlink_ext_ack *extack)
1778 {
1779 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1780 	char ibdev_name[IB_DEVICE_NAME_MAX];
1781 	const struct rdma_link_ops *ops;
1782 	char ndev_name[IFNAMSIZ];
1783 	struct net_device *ndev;
1784 	char type[IFNAMSIZ];
1785 	int err;
1786 
1787 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1788 			    nldev_policy, extack);
1789 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
1790 	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
1791 		return -EINVAL;
1792 
1793 	nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1794 		    sizeof(ibdev_name));
1795 	if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0)
1796 		return -EINVAL;
1797 
1798 	nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
1799 	nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
1800 		    sizeof(ndev_name));
1801 
1802 	ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
1803 	if (!ndev)
1804 		return -ENODEV;
1805 
1806 	down_read(&link_ops_rwsem);
1807 	ops = link_ops_get(type);
1808 #ifdef CONFIG_MODULES
1809 	if (!ops) {
1810 		up_read(&link_ops_rwsem);
1811 		request_module("rdma-link-%s", type);
1812 		down_read(&link_ops_rwsem);
1813 		ops = link_ops_get(type);
1814 	}
1815 #endif
1816 	err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
1817 	up_read(&link_ops_rwsem);
1818 	dev_put(ndev);
1819 
1820 	return err;
1821 }
1822 
1823 static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
1824 			  struct netlink_ext_ack *extack)
1825 {
1826 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1827 	struct ib_device *device;
1828 	u32 index;
1829 	int err;
1830 
1831 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1832 			    nldev_policy, extack);
1833 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1834 		return -EINVAL;
1835 
1836 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1837 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1838 	if (!device)
1839 		return -EINVAL;
1840 
1841 	if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) {
1842 		ib_device_put(device);
1843 		return -EINVAL;
1844 	}
1845 
1846 	/*
1847 	 * This path is triggered by the 'rdma link delete' administrative command.
1848 	 * For Soft-RoCE (RXE), we ensure that transport sockets are closed here.
1849 	 * Note: iWARP driver does not implement .dellink, so this logic is
1850 	 * implicitly scoped to the driver supporting dynamic link deletion like RXE.
1851 	 */
1852 	if (device->link_ops && device->link_ops->dellink) {
1853 		mutex_lock(&nldev_dellink_mutex);
1854 		err = device->link_ops->dellink(device);
1855 		mutex_unlock(&nldev_dellink_mutex);
1856 		if (err)
1857 			return err;
1858 	}
1859 
1860 	ib_unregister_device_and_put(device);
1861 	return 0;
1862 }
1863 
1864 static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
1865 			     struct netlink_ext_ack *extack)
1866 {
1867 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1868 	char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
1869 	struct ib_client_nl_info data = {};
1870 	struct ib_device *ibdev = NULL;
1871 	struct sk_buff *msg;
1872 	u32 index;
1873 	int err;
1874 
1875 	err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
1876 			    NL_VALIDATE_LIBERAL, extack);
1877 	if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
1878 		return -EINVAL;
1879 
1880 	nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
1881 		    sizeof(client_name));
1882 
1883 	if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
1884 		index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1885 		ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
1886 		if (!ibdev)
1887 			return -EINVAL;
1888 
1889 		if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1890 			data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1891 			if (!rdma_is_port_valid(ibdev, data.port)) {
1892 				err = -EINVAL;
1893 				goto out_put;
1894 			}
1895 		} else {
1896 			data.port = -1;
1897 		}
1898 	} else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1899 		return -EINVAL;
1900 	}
1901 
1902 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1903 	if (!msg) {
1904 		err = -ENOMEM;
1905 		goto out_put;
1906 	}
1907 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1908 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1909 					 RDMA_NLDEV_CMD_GET_CHARDEV),
1910 			0, 0);
1911 	if (!nlh) {
1912 		err = -EMSGSIZE;
1913 		goto out_nlmsg;
1914 	}
1915 
1916 	data.nl_msg = msg;
1917 	err = ib_get_client_nl_info(ibdev, client_name, &data);
1918 	if (err)
1919 		goto out_nlmsg;
1920 
1921 	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
1922 				huge_encode_dev(data.cdev->devt),
1923 				RDMA_NLDEV_ATTR_PAD);
1924 	if (err)
1925 		goto out_data;
1926 	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
1927 				RDMA_NLDEV_ATTR_PAD);
1928 	if (err)
1929 		goto out_data;
1930 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
1931 			   dev_name(data.cdev))) {
1932 		err = -EMSGSIZE;
1933 		goto out_data;
1934 	}
1935 
1936 	nlmsg_end(msg, nlh);
1937 	put_device(data.cdev);
1938 	if (ibdev)
1939 		ib_device_put(ibdev);
1940 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1941 
1942 out_data:
1943 	put_device(data.cdev);
1944 out_nlmsg:
1945 	nlmsg_free(msg);
1946 out_put:
1947 	if (ibdev)
1948 		ib_device_put(ibdev);
1949 	return err;
1950 }
1951 
1952 static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1953 			      struct netlink_ext_ack *extack)
1954 {
1955 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1956 	struct sk_buff *msg;
1957 	int err;
1958 
1959 	err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1960 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
1961 	if (err)
1962 		return err;
1963 
1964 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1965 	if (!msg)
1966 		return -ENOMEM;
1967 
1968 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1969 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1970 					 RDMA_NLDEV_CMD_SYS_GET),
1971 			0, 0);
1972 	if (!nlh) {
1973 		nlmsg_free(msg);
1974 		return -EMSGSIZE;
1975 	}
1976 
1977 	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
1978 			 (u8)ib_devices_shared_netns);
1979 	if (err) {
1980 		nlmsg_free(msg);
1981 		return err;
1982 	}
1983 
1984 	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE,
1985 			 (u8)privileged_qkey);
1986 	if (err) {
1987 		nlmsg_free(msg);
1988 		return err;
1989 	}
1990 
1991 	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, 1);
1992 	if (err) {
1993 		nlmsg_free(msg);
1994 		return err;
1995 	}
1996 	/*
1997 	 * Copy-on-fork is supported.
1998 	 * See commits:
1999 	 * 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes")
2000 	 * 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm")
2001 	 * for more details. Don't backport this without them.
2002 	 *
2003 	 * Return value ignored on purpose, assume copy-on-fork is not
2004 	 * supported in case of failure.
2005 	 */
2006 	nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, 1);
2007 
2008 	nlmsg_end(msg, nlh);
2009 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2010 }
2011 
2012 static int nldev_set_sys_set_netns_doit(struct nlattr *tb[])
2013 {
2014 	u8 enable;
2015 	int err;
2016 
2017 	enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
2018 	/* Only 0 and 1 are supported */
2019 	if (enable > 1)
2020 		return -EINVAL;
2021 
2022 	err = rdma_compatdev_set(enable);
2023 	return err;
2024 }
2025 
2026 static int nldev_set_sys_set_pqkey_doit(struct nlattr *tb[])
2027 {
2028 	u8 enable;
2029 
2030 	enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE]);
2031 	/* Only 0 and 1 are supported */
2032 	if (enable > 1)
2033 		return -EINVAL;
2034 
2035 	privileged_qkey = enable;
2036 	return 0;
2037 }
2038 
2039 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2040 				  struct netlink_ext_ack *extack)
2041 {
2042 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2043 	int err;
2044 
2045 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2046 			  nldev_policy, extack);
2047 	if (err)
2048 		return -EINVAL;
2049 
2050 	if (tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
2051 		return nldev_set_sys_set_netns_doit(tb);
2052 
2053 	if (tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE])
2054 		return nldev_set_sys_set_pqkey_doit(tb);
2055 
2056 	return -EINVAL;
2057 }
2058 
2059 
2060 static int nldev_stat_set_mode_doit(struct sk_buff *msg,
2061 				    struct netlink_ext_ack *extack,
2062 				    struct nlattr *tb[],
2063 				    struct ib_device *device, u32 port)
2064 {
2065 	u32 mode, mask = 0, qpn, cntn = 0;
2066 	bool opcnt = false;
2067 	int ret;
2068 
2069 	/* Currently only counter for QP is supported */
2070 	if (!tb[RDMA_NLDEV_ATTR_STAT_RES] ||
2071 	    nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
2072 		return -EINVAL;
2073 
2074 	if (tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED])
2075 		opcnt = !!nla_get_u8(
2076 			tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED]);
2077 
2078 	mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
2079 	if (mode == RDMA_COUNTER_MODE_AUTO) {
2080 		if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
2081 			mask = nla_get_u32(
2082 				tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
2083 		return rdma_counter_set_auto_mode(device, port, mask, opcnt,
2084 						  extack);
2085 	}
2086 
2087 	if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
2088 		return -EINVAL;
2089 
2090 	qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
2091 	if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
2092 		cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
2093 		ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
2094 		if (ret)
2095 			return ret;
2096 	} else {
2097 		ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn);
2098 		if (ret)
2099 			return ret;
2100 	}
2101 
2102 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
2103 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
2104 		ret = -EMSGSIZE;
2105 		goto err_fill;
2106 	}
2107 
2108 	return 0;
2109 
2110 err_fill:
2111 	rdma_counter_unbind_qpn(device, port, qpn, cntn);
2112 	return ret;
2113 }
2114 
2115 static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[],
2116 					       struct ib_device *device,
2117 					       u32 port)
2118 {
2119 	struct rdma_hw_stats *stats;
2120 	struct nlattr *entry_attr;
2121 	unsigned long *target;
2122 	int rem, i, ret = 0;
2123 	u32 index;
2124 
2125 	stats = ib_get_hw_stats_port(device, port);
2126 	if (!stats)
2127 		return -EINVAL;
2128 
2129 	target = kcalloc(BITS_TO_LONGS(stats->num_counters),
2130 			 sizeof(*stats->is_disabled), GFP_KERNEL);
2131 	if (!target)
2132 		return -ENOMEM;
2133 
2134 	nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS],
2135 			    rem) {
2136 		index = nla_get_u32(entry_attr);
2137 		if ((index >= stats->num_counters) ||
2138 		    !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) {
2139 			ret = -EINVAL;
2140 			goto out;
2141 		}
2142 
2143 		set_bit(index, target);
2144 	}
2145 
2146 	for (i = 0; i < stats->num_counters; i++) {
2147 		if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL))
2148 			continue;
2149 
2150 		ret = rdma_counter_modify(device, port, i, test_bit(i, target));
2151 		if (ret)
2152 			goto out;
2153 	}
2154 
2155 out:
2156 	kfree(target);
2157 	return ret;
2158 }
2159 
2160 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2161 			       struct netlink_ext_ack *extack)
2162 {
2163 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2164 	struct ib_device *device;
2165 	struct sk_buff *msg;
2166 	u32 index, port;
2167 	int ret;
2168 
2169 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
2170 			  extack);
2171 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2172 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2173 		return -EINVAL;
2174 
2175 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2176 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2177 	if (!device)
2178 		return -EINVAL;
2179 
2180 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2181 	if (!rdma_is_port_valid(device, port)) {
2182 		ret = -EINVAL;
2183 		goto err_put_device;
2184 	}
2185 
2186 	if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] &&
2187 	    !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
2188 		ret = -EINVAL;
2189 		goto err_put_device;
2190 	}
2191 
2192 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2193 	if (!msg) {
2194 		ret = -ENOMEM;
2195 		goto err_put_device;
2196 	}
2197 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2198 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2199 					 RDMA_NLDEV_CMD_STAT_SET),
2200 			0, 0);
2201 	if (!nlh || fill_nldev_handle(msg, device) ||
2202 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
2203 		ret = -EMSGSIZE;
2204 		goto err_free_msg;
2205 	}
2206 
2207 	if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) {
2208 		ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port);
2209 		if (ret)
2210 			goto err_free_msg;
2211 	}
2212 
2213 	if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
2214 		ret = nldev_stat_set_counter_dynamic_doit(tb, device, port);
2215 		if (ret)
2216 			goto err_free_msg;
2217 	}
2218 
2219 	nlmsg_end(msg, nlh);
2220 	ib_device_put(device);
2221 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2222 
2223 err_free_msg:
2224 	nlmsg_free(msg);
2225 err_put_device:
2226 	ib_device_put(device);
2227 	return ret;
2228 }
2229 
2230 static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2231 			       struct netlink_ext_ack *extack)
2232 {
2233 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2234 	struct ib_device *device;
2235 	struct sk_buff *msg;
2236 	u32 index, port, qpn, cntn;
2237 	int ret;
2238 
2239 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2240 			  nldev_policy, extack);
2241 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
2242 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
2243 	    !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
2244 	    !tb[RDMA_NLDEV_ATTR_RES_LQPN])
2245 		return -EINVAL;
2246 
2247 	if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
2248 		return -EINVAL;
2249 
2250 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2251 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2252 	if (!device)
2253 		return -EINVAL;
2254 
2255 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2256 	if (!rdma_is_port_valid(device, port)) {
2257 		ret = -EINVAL;
2258 		goto err;
2259 	}
2260 
2261 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2262 	if (!msg) {
2263 		ret = -ENOMEM;
2264 		goto err;
2265 	}
2266 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2267 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2268 					 RDMA_NLDEV_CMD_STAT_SET),
2269 			0, 0);
2270 	if (!nlh) {
2271 		ret = -EMSGSIZE;
2272 		goto err_fill;
2273 	}
2274 
2275 	cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
2276 	qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
2277 	if (fill_nldev_handle(msg, device) ||
2278 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
2279 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
2280 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
2281 		ret = -EMSGSIZE;
2282 		goto err_fill;
2283 	}
2284 
2285 	ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
2286 	if (ret)
2287 		goto err_fill;
2288 
2289 	nlmsg_end(msg, nlh);
2290 	ib_device_put(device);
2291 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2292 
2293 err_fill:
2294 	nlmsg_free(msg);
2295 err:
2296 	ib_device_put(device);
2297 	return ret;
2298 }
2299 
2300 static noinline_for_stack int
2301 stat_get_doit_default_counter(struct sk_buff *skb, struct nlmsghdr *nlh,
2302 			      struct netlink_ext_ack *extack,
2303 			      struct nlattr *tb[])
2304 {
2305 	struct rdma_hw_stats *stats;
2306 	struct nlattr *table_attr;
2307 	struct ib_device *device;
2308 	int ret, num_cnts, i;
2309 	struct sk_buff *msg;
2310 	u32 index, port;
2311 	u64 v;
2312 
2313 	if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2314 		return -EINVAL;
2315 
2316 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2317 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2318 	if (!device)
2319 		return -EINVAL;
2320 
2321 	if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) {
2322 		ret = -EINVAL;
2323 		goto err;
2324 	}
2325 
2326 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2327 	stats = ib_get_hw_stats_port(device, port);
2328 	if (!stats) {
2329 		ret = -EINVAL;
2330 		goto err;
2331 	}
2332 
2333 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2334 	if (!msg) {
2335 		ret = -ENOMEM;
2336 		goto err;
2337 	}
2338 
2339 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2340 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2341 					 RDMA_NLDEV_CMD_STAT_GET),
2342 			0, 0);
2343 
2344 	if (!nlh || fill_nldev_handle(msg, device) ||
2345 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
2346 		ret = -EMSGSIZE;
2347 		goto err_msg;
2348 	}
2349 
2350 	mutex_lock(&stats->lock);
2351 
2352 	num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
2353 	if (num_cnts < 0) {
2354 		ret = -EINVAL;
2355 		goto err_stats;
2356 	}
2357 
2358 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
2359 	if (!table_attr) {
2360 		ret = -EMSGSIZE;
2361 		goto err_stats;
2362 	}
2363 	for (i = 0; i < num_cnts; i++) {
2364 		if (test_bit(i, stats->is_disabled))
2365 			continue;
2366 
2367 		v = stats->value[i] +
2368 			rdma_counter_get_hwstat_value(device, port, i);
2369 		if (rdma_nl_stat_hwcounter_entry(msg,
2370 						 stats->descs[i].name, v)) {
2371 			ret = -EMSGSIZE;
2372 			goto err_table;
2373 		}
2374 	}
2375 	nla_nest_end(msg, table_attr);
2376 
2377 	mutex_unlock(&stats->lock);
2378 	nlmsg_end(msg, nlh);
2379 	ib_device_put(device);
2380 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2381 
2382 err_table:
2383 	nla_nest_cancel(msg, table_attr);
2384 err_stats:
2385 	mutex_unlock(&stats->lock);
2386 err_msg:
2387 	nlmsg_free(msg);
2388 err:
2389 	ib_device_put(device);
2390 	return ret;
2391 }
2392 
2393 static noinline_for_stack int
2394 stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
2395 		 struct netlink_ext_ack *extack, struct nlattr *tb[])
2396 
2397 {
2398 	static enum rdma_nl_counter_mode mode;
2399 	static enum rdma_nl_counter_mask mask;
2400 	struct ib_device *device;
2401 	struct sk_buff *msg;
2402 	u32 index, port;
2403 	bool opcnt;
2404 	int ret;
2405 
2406 	if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
2407 		return nldev_res_get_counter_doit(skb, nlh, extack);
2408 
2409 	if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
2410 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2411 		return -EINVAL;
2412 
2413 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2414 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2415 	if (!device)
2416 		return -EINVAL;
2417 
2418 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2419 	if (!rdma_is_port_valid(device, port)) {
2420 		ret = -EINVAL;
2421 		goto err;
2422 	}
2423 
2424 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2425 	if (!msg) {
2426 		ret = -ENOMEM;
2427 		goto err;
2428 	}
2429 
2430 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2431 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2432 					 RDMA_NLDEV_CMD_STAT_GET),
2433 			0, 0);
2434 	if (!nlh) {
2435 		ret = -EMSGSIZE;
2436 		goto err_msg;
2437 	}
2438 
2439 	ret = rdma_counter_get_mode(device, port, &mode, &mask, &opcnt);
2440 	if (ret)
2441 		goto err_msg;
2442 
2443 	if (fill_nldev_handle(msg, device) ||
2444 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
2445 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) {
2446 		ret = -EMSGSIZE;
2447 		goto err_msg;
2448 	}
2449 
2450 	if ((mode == RDMA_COUNTER_MODE_AUTO) &&
2451 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) {
2452 		ret = -EMSGSIZE;
2453 		goto err_msg;
2454 	}
2455 
2456 	if ((mode == RDMA_COUNTER_MODE_AUTO) &&
2457 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED, opcnt)) {
2458 		ret = -EMSGSIZE;
2459 		goto err_msg;
2460 	}
2461 
2462 	nlmsg_end(msg, nlh);
2463 	ib_device_put(device);
2464 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2465 
2466 err_msg:
2467 	nlmsg_free(msg);
2468 err:
2469 	ib_device_put(device);
2470 	return ret;
2471 }
2472 
2473 static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2474 			       struct netlink_ext_ack *extack)
2475 {
2476 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2477 	int ret;
2478 
2479 	ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2480 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
2481 	if (ret)
2482 		return -EINVAL;
2483 
2484 	if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
2485 		return stat_get_doit_default_counter(skb, nlh, extack, tb);
2486 
2487 	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2488 	case RDMA_NLDEV_ATTR_RES_QP:
2489 		ret = stat_get_doit_qp(skb, nlh, extack, tb);
2490 		break;
2491 	case RDMA_NLDEV_ATTR_RES_MR:
2492 		ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR,
2493 					  fill_stat_mr_entry);
2494 		break;
2495 	default:
2496 		ret = -EINVAL;
2497 		break;
2498 	}
2499 
2500 	return ret;
2501 }
2502 
2503 static int nldev_stat_get_dumpit(struct sk_buff *skb,
2504 				 struct netlink_callback *cb)
2505 {
2506 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2507 	int ret;
2508 
2509 	ret = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2510 			    nldev_policy, NL_VALIDATE_LIBERAL, NULL);
2511 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
2512 		return -EINVAL;
2513 
2514 	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2515 	case RDMA_NLDEV_ATTR_RES_QP:
2516 		ret = nldev_res_get_counter_dumpit(skb, cb);
2517 		break;
2518 	case RDMA_NLDEV_ATTR_RES_MR:
2519 		ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR,
2520 					    fill_stat_mr_entry);
2521 		break;
2522 	default:
2523 		ret = -EINVAL;
2524 		break;
2525 	}
2526 
2527 	return ret;
2528 }
2529 
2530 static int nldev_stat_get_counter_status_doit(struct sk_buff *skb,
2531 					      struct nlmsghdr *nlh,
2532 					      struct netlink_ext_ack *extack)
2533 {
2534 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry;
2535 	struct rdma_hw_stats *stats;
2536 	struct ib_device *device;
2537 	struct sk_buff *msg;
2538 	u32 devid, port;
2539 	int ret, i;
2540 
2541 	ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2542 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
2543 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2544 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2545 		return -EINVAL;
2546 
2547 	devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2548 	device = ib_device_get_by_index(sock_net(skb->sk), devid);
2549 	if (!device)
2550 		return -EINVAL;
2551 
2552 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2553 	if (!rdma_is_port_valid(device, port)) {
2554 		ret = -EINVAL;
2555 		goto err;
2556 	}
2557 
2558 	stats = ib_get_hw_stats_port(device, port);
2559 	if (!stats) {
2560 		ret = -EINVAL;
2561 		goto err;
2562 	}
2563 
2564 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2565 	if (!msg) {
2566 		ret = -ENOMEM;
2567 		goto err;
2568 	}
2569 
2570 	nlh = nlmsg_put(
2571 		msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2572 		RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS),
2573 		0, 0);
2574 
2575 	ret = -EMSGSIZE;
2576 	if (!nlh || fill_nldev_handle(msg, device) ||
2577 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
2578 		goto err_msg;
2579 
2580 	table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
2581 	if (!table)
2582 		goto err_msg;
2583 
2584 	mutex_lock(&stats->lock);
2585 	for (i = 0; i < stats->num_counters; i++) {
2586 		entry = nla_nest_start(msg,
2587 				       RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
2588 		if (!entry)
2589 			goto err_msg_table;
2590 
2591 		if (nla_put_string(msg,
2592 				   RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
2593 				   stats->descs[i].name) ||
2594 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i))
2595 			goto err_msg_entry;
2596 
2597 		if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) &&
2598 		    (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC,
2599 				!test_bit(i, stats->is_disabled))))
2600 			goto err_msg_entry;
2601 
2602 		nla_nest_end(msg, entry);
2603 	}
2604 	mutex_unlock(&stats->lock);
2605 
2606 	nla_nest_end(msg, table);
2607 	nlmsg_end(msg, nlh);
2608 	ib_device_put(device);
2609 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2610 
2611 err_msg_entry:
2612 	nla_nest_cancel(msg, entry);
2613 err_msg_table:
2614 	mutex_unlock(&stats->lock);
2615 	nla_nest_cancel(msg, table);
2616 err_msg:
2617 	nlmsg_free(msg);
2618 err:
2619 	ib_device_put(device);
2620 	return ret;
2621 }
2622 
2623 static int nldev_newdev(struct sk_buff *skb, struct nlmsghdr *nlh,
2624 			struct netlink_ext_ack *extack)
2625 {
2626 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2627 	enum rdma_nl_dev_type type;
2628 	struct ib_device *parent;
2629 	char name[IFNAMSIZ] = {};
2630 	u32 parentid;
2631 	int ret;
2632 
2633 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2634 			  nldev_policy, extack);
2635 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2636 		!tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_DEV_TYPE])
2637 		return -EINVAL;
2638 
2639 	nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(name));
2640 	type = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_TYPE]);
2641 	parentid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2642 	parent = ib_device_get_by_index(sock_net(skb->sk), parentid);
2643 	if (!parent)
2644 		return -EINVAL;
2645 
2646 	ret = ib_add_sub_device(parent, type, name);
2647 	ib_device_put(parent);
2648 
2649 	return ret;
2650 }
2651 
2652 static int nldev_deldev(struct sk_buff *skb, struct nlmsghdr *nlh,
2653 			struct netlink_ext_ack *extack)
2654 {
2655 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2656 	struct ib_device *device;
2657 	u32 devid;
2658 	int ret;
2659 
2660 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2661 			  nldev_policy, extack);
2662 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
2663 		return -EINVAL;
2664 
2665 	devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2666 	device = ib_device_get_by_index(sock_net(skb->sk), devid);
2667 	if (!device)
2668 		return -EINVAL;
2669 
2670 	return ib_del_sub_device_and_put(device);
2671 }
2672 
2673 static int fill_frmr_pool_key(struct sk_buff *msg, struct ib_frmr_key *key)
2674 {
2675 	struct nlattr *key_attr;
2676 
2677 	key_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY);
2678 	if (!key_attr)
2679 		return -EMSGSIZE;
2680 
2681 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS, key->ats))
2682 		goto err;
2683 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS,
2684 			key->access_flags))
2685 		goto err;
2686 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY,
2687 			      key->vendor_key, RDMA_NLDEV_ATTR_PAD))
2688 		goto err;
2689 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS,
2690 			      key->num_dma_blocks, RDMA_NLDEV_ATTR_PAD))
2691 		goto err;
2692 
2693 	if (key->kernel_vendor_key &&
2694 	    nla_put_u64_64bit(msg,
2695 			      RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY,
2696 			      key->kernel_vendor_key, RDMA_NLDEV_ATTR_PAD))
2697 		goto err;
2698 
2699 	nla_nest_end(msg, key_attr);
2700 	return 0;
2701 
2702 err:
2703 	return -EMSGSIZE;
2704 }
2705 
2706 static int fill_frmr_pool_entry(struct sk_buff *msg, struct ib_frmr_pool *pool)
2707 {
2708 	if (fill_frmr_pool_key(msg, &pool->key))
2709 		return -EMSGSIZE;
2710 
2711 	spin_lock(&pool->lock);
2712 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_QUEUE_HANDLES,
2713 			pool->queue.ci + pool->inactive_queue.ci))
2714 		goto err_unlock;
2715 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_MAX_IN_USE,
2716 			      pool->max_in_use, RDMA_NLDEV_ATTR_PAD))
2717 		goto err_unlock;
2718 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_IN_USE,
2719 			      pool->in_use, RDMA_NLDEV_ATTR_PAD))
2720 		goto err_unlock;
2721 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES,
2722 			pool->pinned_handles))
2723 		goto err_unlock;
2724 	spin_unlock(&pool->lock);
2725 
2726 	return 0;
2727 
2728 err_unlock:
2729 	spin_unlock(&pool->lock);
2730 	return -EMSGSIZE;
2731 }
2732 
2733 static int nldev_frmr_pools_parse_key(struct nlattr *tb[],
2734 				      struct ib_frmr_key *key,
2735 				      struct netlink_ext_ack *extack)
2736 {
2737 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS])
2738 		key->ats = nla_get_u8(tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS]);
2739 
2740 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS])
2741 		key->access_flags = nla_get_u32(
2742 			tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS]);
2743 
2744 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY])
2745 		key->vendor_key = nla_get_u64(
2746 			tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY]);
2747 
2748 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS])
2749 		key->num_dma_blocks = nla_get_u64(
2750 			tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS]);
2751 
2752 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY])
2753 		return -EINVAL;
2754 
2755 	return 0;
2756 }
2757 
2758 static int nldev_frmr_pools_set_pinned(struct ib_device *device,
2759 				       struct nlattr *tb[],
2760 				       struct netlink_ext_ack *extack)
2761 {
2762 	struct nlattr *key_tb[RDMA_NLDEV_ATTR_MAX];
2763 	struct ib_frmr_key key = { 0 };
2764 	u32 pinned_handles = 0;
2765 	int err = 0;
2766 
2767 	pinned_handles =
2768 		nla_get_u32(tb[RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES]);
2769 
2770 	if (!tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY])
2771 		return -EINVAL;
2772 
2773 	err = nla_parse_nested(key_tb, RDMA_NLDEV_ATTR_MAX - 1,
2774 			       tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY], nldev_policy,
2775 			       extack);
2776 	if (err)
2777 		return err;
2778 
2779 	err = nldev_frmr_pools_parse_key(key_tb, &key, extack);
2780 	if (err)
2781 		return err;
2782 
2783 	err = ib_frmr_pools_set_pinned(device, &key, pinned_handles);
2784 
2785 	return err;
2786 }
2787 
2788 static int nldev_frmr_pools_get_dumpit(struct sk_buff *skb,
2789 				       struct netlink_callback *cb)
2790 {
2791 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2792 	struct ib_frmr_pools *pools;
2793 	int err, ret = 0, idx = 0;
2794 	struct ib_frmr_pool *pool;
2795 	struct nlattr *table_attr;
2796 	struct nlattr *entry_attr;
2797 	bool show_details = false;
2798 	struct ib_device *device;
2799 	int start = cb->args[0];
2800 	struct rb_node *node;
2801 	struct nlmsghdr *nlh;
2802 	bool filled = false;
2803 
2804 	err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2805 			    nldev_policy, NL_VALIDATE_LIBERAL, NULL);
2806 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
2807 		return -EINVAL;
2808 
2809 	device = ib_device_get_by_index(
2810 		sock_net(skb->sk), nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]));
2811 	if (!device)
2812 		return -EINVAL;
2813 
2814 	if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
2815 		show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
2816 
2817 	pools = device->frmr_pools;
2818 	if (!pools) {
2819 		ib_device_put(device);
2820 		return 0;
2821 	}
2822 
2823 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2824 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2825 					 RDMA_NLDEV_CMD_FRMR_POOLS_GET),
2826 			0, NLM_F_MULTI);
2827 
2828 	if (!nlh || fill_nldev_handle(skb, device)) {
2829 		ret = -EMSGSIZE;
2830 		goto err;
2831 	}
2832 
2833 	table_attr = nla_nest_start_noflag(skb, RDMA_NLDEV_ATTR_FRMR_POOLS);
2834 	if (!table_attr) {
2835 		ret = -EMSGSIZE;
2836 		goto err;
2837 	}
2838 
2839 	read_lock(&pools->rb_lock);
2840 	for (node = rb_first(&pools->rb_root); node; node = rb_next(node)) {
2841 		pool = rb_entry(node, struct ib_frmr_pool, node);
2842 		if (pool->key.kernel_vendor_key && !show_details)
2843 			continue;
2844 
2845 		if (idx < start) {
2846 			idx++;
2847 			continue;
2848 		}
2849 
2850 		filled = true;
2851 
2852 		entry_attr = nla_nest_start_noflag(
2853 			skb, RDMA_NLDEV_ATTR_FRMR_POOL_ENTRY);
2854 		if (!entry_attr) {
2855 			ret = -EMSGSIZE;
2856 			goto end_msg;
2857 		}
2858 
2859 		if (fill_frmr_pool_entry(skb, pool)) {
2860 			nla_nest_cancel(skb, entry_attr);
2861 			ret = -EMSGSIZE;
2862 			goto end_msg;
2863 		}
2864 
2865 		nla_nest_end(skb, entry_attr);
2866 		idx++;
2867 	}
2868 end_msg:
2869 	read_unlock(&pools->rb_lock);
2870 
2871 	nla_nest_end(skb, table_attr);
2872 	nlmsg_end(skb, nlh);
2873 	cb->args[0] = idx;
2874 
2875 	/*
2876 	 * No more entries to fill, cancel the message and
2877 	 * return 0 to mark end of dumpit.
2878 	 */
2879 	if (!filled)
2880 		goto err;
2881 
2882 	ib_device_put(device);
2883 	return skb->len;
2884 
2885 err:
2886 	nlmsg_cancel(skb, nlh);
2887 	ib_device_put(device);
2888 	return ret;
2889 }
2890 
2891 static int nldev_frmr_pools_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2892 				     struct netlink_ext_ack *extack)
2893 {
2894 	struct ib_device *device;
2895 	struct nlattr **tb;
2896 	u32 aging_period;
2897 	int err;
2898 
2899 	tb = kzalloc_objs(*tb, RDMA_NLDEV_ATTR_MAX, GFP_KERNEL);
2900 	if (!tb)
2901 		return -ENOMEM;
2902 
2903 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
2904 			  extack);
2905 	if (err)
2906 		goto free_tb;
2907 
2908 	if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
2909 		err = -EINVAL;
2910 		goto free_tb;
2911 	}
2912 
2913 	device = ib_device_get_by_index(
2914 		sock_net(skb->sk), nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]));
2915 	if (!device) {
2916 		err = -EINVAL;
2917 		goto free_tb;
2918 	}
2919 
2920 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD]) {
2921 		aging_period = nla_get_u32(
2922 			tb[RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD]);
2923 		err = ib_frmr_pools_set_aging_period(device, aging_period);
2924 		goto done;
2925 	}
2926 
2927 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES])
2928 		err = nldev_frmr_pools_set_pinned(device, tb, extack);
2929 
2930 done:
2931 	ib_device_put(device);
2932 free_tb:
2933 	kfree(tb);
2934 	return err;
2935 }
2936 
2937 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
2938 	[RDMA_NLDEV_CMD_GET] = {
2939 		.doit = nldev_get_doit,
2940 		.dump = nldev_get_dumpit,
2941 	},
2942 	[RDMA_NLDEV_CMD_GET_CHARDEV] = {
2943 		.doit = nldev_get_chardev,
2944 	},
2945 	[RDMA_NLDEV_CMD_SET] = {
2946 		.doit = nldev_set_doit,
2947 		.flags = RDMA_NL_ADMIN_PERM,
2948 	},
2949 	[RDMA_NLDEV_CMD_NEWLINK] = {
2950 		.doit = nldev_newlink,
2951 		.flags = RDMA_NL_ADMIN_PERM,
2952 	},
2953 	[RDMA_NLDEV_CMD_DELLINK] = {
2954 		.doit = nldev_dellink,
2955 		.flags = RDMA_NL_ADMIN_PERM,
2956 	},
2957 	[RDMA_NLDEV_CMD_PORT_GET] = {
2958 		.doit = nldev_port_get_doit,
2959 		.dump = nldev_port_get_dumpit,
2960 	},
2961 	[RDMA_NLDEV_CMD_RES_GET] = {
2962 		.doit = nldev_res_get_doit,
2963 		.dump = nldev_res_get_dumpit,
2964 	},
2965 	[RDMA_NLDEV_CMD_RES_QP_GET] = {
2966 		.doit = nldev_res_get_qp_doit,
2967 		.dump = nldev_res_get_qp_dumpit,
2968 	},
2969 	[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
2970 		.doit = nldev_res_get_cm_id_doit,
2971 		.dump = nldev_res_get_cm_id_dumpit,
2972 	},
2973 	[RDMA_NLDEV_CMD_RES_CQ_GET] = {
2974 		.doit = nldev_res_get_cq_doit,
2975 		.dump = nldev_res_get_cq_dumpit,
2976 	},
2977 	[RDMA_NLDEV_CMD_RES_MR_GET] = {
2978 		.doit = nldev_res_get_mr_doit,
2979 		.dump = nldev_res_get_mr_dumpit,
2980 	},
2981 	[RDMA_NLDEV_CMD_RES_PD_GET] = {
2982 		.doit = nldev_res_get_pd_doit,
2983 		.dump = nldev_res_get_pd_dumpit,
2984 	},
2985 	[RDMA_NLDEV_CMD_RES_CTX_GET] = {
2986 		.doit = nldev_res_get_ctx_doit,
2987 		.dump = nldev_res_get_ctx_dumpit,
2988 	},
2989 	[RDMA_NLDEV_CMD_RES_SRQ_GET] = {
2990 		.doit = nldev_res_get_srq_doit,
2991 		.dump = nldev_res_get_srq_dumpit,
2992 	},
2993 	[RDMA_NLDEV_CMD_SYS_GET] = {
2994 		.doit = nldev_sys_get_doit,
2995 	},
2996 	[RDMA_NLDEV_CMD_SYS_SET] = {
2997 		.doit = nldev_set_sys_set_doit,
2998 		.flags = RDMA_NL_ADMIN_PERM,
2999 	},
3000 	[RDMA_NLDEV_CMD_STAT_SET] = {
3001 		.doit = nldev_stat_set_doit,
3002 		.flags = RDMA_NL_ADMIN_PERM,
3003 	},
3004 	[RDMA_NLDEV_CMD_STAT_GET] = {
3005 		.doit = nldev_stat_get_doit,
3006 		.dump = nldev_stat_get_dumpit,
3007 	},
3008 	[RDMA_NLDEV_CMD_STAT_DEL] = {
3009 		.doit = nldev_stat_del_doit,
3010 		.flags = RDMA_NL_ADMIN_PERM,
3011 	},
3012 	[RDMA_NLDEV_CMD_RES_QP_GET_RAW] = {
3013 		.doit = nldev_res_get_qp_raw_doit,
3014 		.dump = nldev_res_get_qp_raw_dumpit,
3015 		.flags = RDMA_NL_ADMIN_PERM,
3016 	},
3017 	[RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = {
3018 		.doit = nldev_res_get_cq_raw_doit,
3019 		.dump = nldev_res_get_cq_raw_dumpit,
3020 		.flags = RDMA_NL_ADMIN_PERM,
3021 	},
3022 	[RDMA_NLDEV_CMD_RES_MR_GET_RAW] = {
3023 		.doit = nldev_res_get_mr_raw_doit,
3024 		.dump = nldev_res_get_mr_raw_dumpit,
3025 		.flags = RDMA_NL_ADMIN_PERM,
3026 	},
3027 	[RDMA_NLDEV_CMD_RES_SRQ_GET_RAW] = {
3028 		.doit = nldev_res_get_srq_raw_doit,
3029 		.dump = nldev_res_get_srq_raw_dumpit,
3030 		.flags = RDMA_NL_ADMIN_PERM,
3031 	},
3032 	[RDMA_NLDEV_CMD_STAT_GET_STATUS] = {
3033 		.doit = nldev_stat_get_counter_status_doit,
3034 	},
3035 	[RDMA_NLDEV_CMD_NEWDEV] = {
3036 		.doit = nldev_newdev,
3037 		.flags = RDMA_NL_ADMIN_PERM,
3038 	},
3039 	[RDMA_NLDEV_CMD_DELDEV] = {
3040 		.doit = nldev_deldev,
3041 		.flags = RDMA_NL_ADMIN_PERM,
3042 	},
3043 	[RDMA_NLDEV_CMD_FRMR_POOLS_GET] = {
3044 		.dump = nldev_frmr_pools_get_dumpit,
3045 	},
3046 	[RDMA_NLDEV_CMD_FRMR_POOLS_SET] = {
3047 		.doit = nldev_frmr_pools_set_doit,
3048 		.flags = RDMA_NL_ADMIN_PERM,
3049 	},
3050 };
3051 
3052 static int fill_mon_netdev_rename(struct sk_buff *msg,
3053 				  struct ib_device *device, u32 port,
3054 				  const struct net *net)
3055 {
3056 	struct net_device *netdev = ib_device_get_netdev(device, port);
3057 	int ret = 0;
3058 
3059 	if (!netdev || !net_eq(dev_net(netdev), net))
3060 		goto out;
3061 
3062 	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
3063 	if (ret)
3064 		goto out;
3065 	ret = nla_put_string(msg, RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
3066 out:
3067 	dev_put(netdev);
3068 	return ret;
3069 }
3070 
3071 static int fill_mon_netdev_association(struct sk_buff *msg,
3072 				       struct ib_device *device, u32 port,
3073 				       const struct net *net)
3074 {
3075 	struct net_device *netdev = ib_device_get_netdev(device, port);
3076 	int ret = 0;
3077 
3078 	if (netdev && !net_eq(dev_net(netdev), net))
3079 		goto out;
3080 
3081 	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index);
3082 	if (ret)
3083 		goto out;
3084 
3085 	ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
3086 			     dev_name(&device->dev));
3087 	if (ret)
3088 		goto out;
3089 
3090 	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port);
3091 	if (ret)
3092 		goto out;
3093 
3094 	if (netdev) {
3095 		ret = nla_put_u32(msg,
3096 				  RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
3097 		if (ret)
3098 			goto out;
3099 
3100 		ret = nla_put_string(msg,
3101 				     RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
3102 	}
3103 
3104 out:
3105 	dev_put(netdev);
3106 	return ret;
3107 }
3108 
3109 static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num,
3110 				    enum rdma_nl_notify_event_type type)
3111 {
3112 	struct net_device *netdev;
3113 
3114 	switch (type) {
3115 	case RDMA_REGISTER_EVENT:
3116 		dev_warn_ratelimited(&device->dev,
3117 				     "Failed to send RDMA monitor register device event\n");
3118 		break;
3119 	case RDMA_UNREGISTER_EVENT:
3120 		dev_warn_ratelimited(&device->dev,
3121 				     "Failed to send RDMA monitor unregister device event\n");
3122 		break;
3123 	case RDMA_NETDEV_ATTACH_EVENT:
3124 		netdev = ib_device_get_netdev(device, port_num);
3125 		dev_warn_ratelimited(&device->dev,
3126 				     "Failed to send RDMA monitor netdev attach event: port %d netdev %d\n",
3127 				     port_num, netdev->ifindex);
3128 		dev_put(netdev);
3129 		break;
3130 	case RDMA_NETDEV_DETACH_EVENT:
3131 		dev_warn_ratelimited(&device->dev,
3132 				     "Failed to send RDMA monitor netdev detach event: port %d\n",
3133 				     port_num);
3134 		break;
3135 	case RDMA_RENAME_EVENT:
3136 		dev_warn_ratelimited(&device->dev,
3137 				     "Failed to send RDMA monitor rename device event\n");
3138 		break;
3139 
3140 	case RDMA_NETDEV_RENAME_EVENT:
3141 		netdev = ib_device_get_netdev(device, port_num);
3142 		dev_warn_ratelimited(&device->dev,
3143 				     "Failed to send RDMA monitor netdev rename event: port %d netdev %d\n",
3144 				     port_num, netdev->ifindex);
3145 		dev_put(netdev);
3146 		break;
3147 	default:
3148 		break;
3149 	}
3150 }
3151 
3152 int rdma_nl_notify_event(struct ib_device *device, u32 port_num,
3153 			  enum rdma_nl_notify_event_type type)
3154 {
3155 	struct sk_buff *skb;
3156 	int ret = -EMSGSIZE;
3157 	struct net *net;
3158 	void *nlh;
3159 
3160 	net = read_pnet(&device->coredev.rdma_net);
3161 	if (!net)
3162 		return -EINVAL;
3163 
3164 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3165 	if (!skb)
3166 		return -ENOMEM;
3167 	nlh = nlmsg_put(skb, 0, 0,
3168 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR),
3169 			0, 0);
3170 	if (!nlh)
3171 		goto err_free;
3172 
3173 	switch (type) {
3174 	case RDMA_REGISTER_EVENT:
3175 	case RDMA_UNREGISTER_EVENT:
3176 	case RDMA_RENAME_EVENT:
3177 		ret = fill_nldev_handle(skb, device);
3178 		if (ret)
3179 			goto err_free;
3180 		break;
3181 	case RDMA_NETDEV_ATTACH_EVENT:
3182 	case RDMA_NETDEV_DETACH_EVENT:
3183 		ret = fill_mon_netdev_association(skb, device, port_num, net);
3184 		if (ret)
3185 			goto err_free;
3186 		break;
3187 	case RDMA_NETDEV_RENAME_EVENT:
3188 		ret = fill_mon_netdev_rename(skb, device, port_num, net);
3189 		if (ret)
3190 			goto err_free;
3191 		break;
3192 	default:
3193 		break;
3194 	}
3195 
3196 	ret = nla_put_u8(skb, RDMA_NLDEV_ATTR_EVENT_TYPE, type);
3197 	if (ret)
3198 		goto err_free;
3199 
3200 	nlmsg_end(skb, nlh);
3201 	ret = rdma_nl_multicast(net, skb, RDMA_NL_GROUP_NOTIFY, GFP_KERNEL);
3202 	if (ret && ret != -ESRCH) {
3203 		skb = NULL; /* skb is freed in the netlink send-op handling */
3204 		goto err_free;
3205 	}
3206 	return 0;
3207 
3208 err_free:
3209 	rdma_nl_notify_err_msg(device, port_num, type);
3210 	nlmsg_free(skb);
3211 	return ret;
3212 }
3213 
3214 void __init nldev_init(void)
3215 {
3216 	rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
3217 }
3218 
3219 void nldev_exit(void)
3220 {
3221 	rdma_nl_unregister(RDMA_NL_NLDEV);
3222 }
3223 
3224 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
3225