xref: /linux/drivers/infiniband/core/nldev.c (revision ab868c10971c5d2cd27b3709d11225941eabe78e)
1 /*
2  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. Neither the names of the copyright holders nor the names of its
13  *    contributors may be used to endorse or promote products derived from
14  *    this software without specific prior written permission.
15  *
16  * Alternatively, this software may be distributed under the terms of the
17  * GNU General Public License ("GPL") version 2 as published by the Free
18  * Software Foundation.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <linux/module.h>
34 #include <linux/pid.h>
35 #include <linux/pid_namespace.h>
36 #include <linux/mutex.h>
37 #include <net/netlink.h>
38 #include <rdma/rdma_cm.h>
39 #include <rdma/rdma_netlink.h>
40 #include <rdma/frmr_pools.h>
41 
42 #include "core_priv.h"
43 #include "cma_priv.h"
44 #include "restrack.h"
45 #include "uverbs.h"
46 #include "frmr_pools.h"
47 
48 /*
49  * This determines whether a non-privileged user is allowed to specify a
50  * controlled QKEY or not, when true non-privileged user is allowed to specify
51  * a controlled QKEY.
52  */
53 static bool privileged_qkey;
54 static DEFINE_MUTEX(nldev_dellink_mutex);
55 
56 typedef int (*res_fill_func_t)(struct sk_buff*, bool,
57 			       struct rdma_restrack_entry*, uint32_t);
58 
59 /*
60  * Sort array elements by the netlink attribute name
61  */
62 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
63 	[RDMA_NLDEV_ATTR_CHARDEV]		= { .type = NLA_U64 },
64 	[RDMA_NLDEV_ATTR_CHARDEV_ABI]		= { .type = NLA_U64 },
65 	[RDMA_NLDEV_ATTR_CHARDEV_NAME]		= { .type = NLA_NUL_STRING,
66 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
67 	[RDMA_NLDEV_ATTR_CHARDEV_TYPE]		= { .type = NLA_NUL_STRING,
68 					.len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
69 	[RDMA_NLDEV_ATTR_DEV_DIM]               = { .type = NLA_U8 },
70 	[RDMA_NLDEV_ATTR_DEV_INDEX]		= { .type = NLA_U32 },
71 	[RDMA_NLDEV_ATTR_DEV_NAME]		= { .type = NLA_NUL_STRING,
72 					.len = IB_DEVICE_NAME_MAX },
73 	[RDMA_NLDEV_ATTR_DEV_NODE_TYPE]		= { .type = NLA_U8 },
74 	[RDMA_NLDEV_ATTR_DEV_PROTOCOL]		= { .type = NLA_NUL_STRING,
75 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
76 	[RDMA_NLDEV_ATTR_DRIVER]		= { .type = NLA_NESTED },
77 	[RDMA_NLDEV_ATTR_DRIVER_ENTRY]		= { .type = NLA_NESTED },
78 	[RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE]	= { .type = NLA_U8 },
79 	[RDMA_NLDEV_ATTR_DRIVER_STRING]		= { .type = NLA_NUL_STRING,
80 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
81 	[RDMA_NLDEV_ATTR_DRIVER_S32]		= { .type = NLA_S32 },
82 	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
83 	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
84 	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
85 	[RDMA_NLDEV_ATTR_FW_VERSION]		= { .type = NLA_NUL_STRING,
86 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
87 	[RDMA_NLDEV_ATTR_LID]			= { .type = NLA_U32 },
88 	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type = NLA_NUL_STRING,
89 					.len = IFNAMSIZ },
90 	[RDMA_NLDEV_ATTR_LMC]			= { .type = NLA_U8 },
91 	[RDMA_NLDEV_ATTR_NDEV_INDEX]		= { .type = NLA_U32 },
92 	[RDMA_NLDEV_ATTR_NDEV_NAME]		= { .type = NLA_NUL_STRING,
93 					.len = IFNAMSIZ },
94 	[RDMA_NLDEV_ATTR_NODE_GUID]		= { .type = NLA_U64 },
95 	[RDMA_NLDEV_ATTR_PORT_INDEX]		= { .type = NLA_U32 },
96 	[RDMA_NLDEV_ATTR_PORT_PHYS_STATE]	= { .type = NLA_U8 },
97 	[RDMA_NLDEV_ATTR_PORT_STATE]		= { .type = NLA_U8 },
98 	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type = NLA_NESTED },
99 	[RDMA_NLDEV_ATTR_RES_CM_IDN]		= { .type = NLA_U32 },
100 	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type = NLA_NESTED },
101 	[RDMA_NLDEV_ATTR_RES_CQ]		= { .type = NLA_NESTED },
102 	[RDMA_NLDEV_ATTR_RES_CQE]		= { .type = NLA_U32 },
103 	[RDMA_NLDEV_ATTR_RES_CQN]		= { .type = NLA_U32 },
104 	[RDMA_NLDEV_ATTR_RES_CQ_ENTRY]		= { .type = NLA_NESTED },
105 	[RDMA_NLDEV_ATTR_RES_CTX]		= { .type = NLA_NESTED },
106 	[RDMA_NLDEV_ATTR_RES_CTXN]		= { .type = NLA_U32 },
107 	[RDMA_NLDEV_ATTR_RES_CTX_ENTRY]		= { .type = NLA_NESTED },
108 	[RDMA_NLDEV_ATTR_RES_DST_ADDR]		= {
109 			.len = sizeof(struct __kernel_sockaddr_storage) },
110 	[RDMA_NLDEV_ATTR_RES_IOVA]		= { .type = NLA_U64 },
111 	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING,
112 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
113 	[RDMA_NLDEV_ATTR_RES_LKEY]		= { .type = NLA_U32 },
114 	[RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]	= { .type = NLA_U32 },
115 	[RDMA_NLDEV_ATTR_RES_LQPN]		= { .type = NLA_U32 },
116 	[RDMA_NLDEV_ATTR_RES_MR]		= { .type = NLA_NESTED },
117 	[RDMA_NLDEV_ATTR_RES_MRLEN]		= { .type = NLA_U64 },
118 	[RDMA_NLDEV_ATTR_RES_MRN]		= { .type = NLA_U32 },
119 	[RDMA_NLDEV_ATTR_RES_MR_ENTRY]		= { .type = NLA_NESTED },
120 	[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE]	= { .type = NLA_U8 },
121 	[RDMA_NLDEV_ATTR_RES_PD]		= { .type = NLA_NESTED },
122 	[RDMA_NLDEV_ATTR_RES_PDN]		= { .type = NLA_U32 },
123 	[RDMA_NLDEV_ATTR_RES_PD_ENTRY]		= { .type = NLA_NESTED },
124 	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
125 	[RDMA_NLDEV_ATTR_RES_POLL_CTX]		= { .type = NLA_U8 },
126 	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
127 	[RDMA_NLDEV_ATTR_RES_QP]		= { .type = NLA_NESTED },
128 	[RDMA_NLDEV_ATTR_RES_QP_ENTRY]		= { .type = NLA_NESTED },
129 	[RDMA_NLDEV_ATTR_RES_RAW]		= { .type = NLA_BINARY },
130 	[RDMA_NLDEV_ATTR_RES_RKEY]		= { .type = NLA_U32 },
131 	[RDMA_NLDEV_ATTR_RES_RQPN]		= { .type = NLA_U32 },
132 	[RDMA_NLDEV_ATTR_RES_RQ_PSN]		= { .type = NLA_U32 },
133 	[RDMA_NLDEV_ATTR_RES_SQ_PSN]		= { .type = NLA_U32 },
134 	[RDMA_NLDEV_ATTR_RES_SRC_ADDR]		= {
135 			.len = sizeof(struct __kernel_sockaddr_storage) },
136 	[RDMA_NLDEV_ATTR_RES_STATE]		= { .type = NLA_U8 },
137 	[RDMA_NLDEV_ATTR_RES_SUMMARY]		= { .type = NLA_NESTED },
138 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]	= { .type = NLA_NESTED },
139 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
140 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
141 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
142 	[RDMA_NLDEV_ATTR_RES_TYPE]		= { .type = NLA_U8 },
143 	[RDMA_NLDEV_ATTR_RES_SUBTYPE]		= { .type = NLA_NUL_STRING,
144 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
145 	[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
146 	[RDMA_NLDEV_ATTR_RES_USECNT]		= { .type = NLA_U64 },
147 	[RDMA_NLDEV_ATTR_RES_SRQ]		= { .type = NLA_NESTED },
148 	[RDMA_NLDEV_ATTR_RES_SRQN]		= { .type = NLA_U32 },
149 	[RDMA_NLDEV_ATTR_RES_SRQ_ENTRY]		= { .type = NLA_NESTED },
150 	[RDMA_NLDEV_ATTR_MIN_RANGE]		= { .type = NLA_U32 },
151 	[RDMA_NLDEV_ATTR_MAX_RANGE]		= { .type = NLA_U32 },
152 	[RDMA_NLDEV_ATTR_SM_LID]		= { .type = NLA_U32 },
153 	[RDMA_NLDEV_ATTR_SUBNET_PREFIX]		= { .type = NLA_U64 },
154 	[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]	= { .type = NLA_U32 },
155 	[RDMA_NLDEV_ATTR_STAT_MODE]		= { .type = NLA_U32 },
156 	[RDMA_NLDEV_ATTR_STAT_RES]		= { .type = NLA_U32 },
157 	[RDMA_NLDEV_ATTR_STAT_COUNTER]		= { .type = NLA_NESTED },
158 	[RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY]	= { .type = NLA_NESTED },
159 	[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]       = { .type = NLA_U32 },
160 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]       = { .type = NLA_NESTED },
161 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY]  = { .type = NLA_NESTED },
162 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
163 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
164 	[RDMA_NLDEV_ATTR_SYS_IMAGE_GUID]	= { .type = NLA_U64 },
165 	[RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID]	= { .type = NLA_U32 },
166 	[RDMA_NLDEV_NET_NS_FD]			= { .type = NLA_U32 },
167 	[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]	= { .type = NLA_U8 },
168 	[RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK]	= { .type = NLA_U8 },
169 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX]	= { .type = NLA_U32 },
170 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 },
171 	[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE] = { .type = NLA_U8 },
172 	[RDMA_NLDEV_ATTR_DRIVER_DETAILS]	= { .type = NLA_U8 },
173 	[RDMA_NLDEV_ATTR_DEV_TYPE]		= { .type = NLA_U8 },
174 	[RDMA_NLDEV_ATTR_PARENT_NAME]		= { .type = NLA_NUL_STRING },
175 	[RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE]	= { .type = NLA_U8 },
176 	[RDMA_NLDEV_ATTR_EVENT_TYPE]		= { .type = NLA_U8 },
177 	[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED] = { .type = NLA_U8 },
178 	[RDMA_NLDEV_ATTR_FRMR_POOLS]		= { .type = NLA_NESTED },
179 	[RDMA_NLDEV_ATTR_FRMR_POOL_ENTRY]	= { .type = NLA_NESTED },
180 	[RDMA_NLDEV_ATTR_FRMR_POOL_KEY]		= { .type = NLA_NESTED },
181 	[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS]	= { .type = NLA_U8 },
182 	[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS] = { .type = NLA_U32 },
183 	[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY] = { .type = NLA_U64 },
184 	[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS] = { .type = NLA_U64 },
185 	[RDMA_NLDEV_ATTR_FRMR_POOL_QUEUE_HANDLES] = { .type = NLA_U32 },
186 	[RDMA_NLDEV_ATTR_FRMR_POOL_MAX_IN_USE]	= { .type = NLA_U64 },
187 	[RDMA_NLDEV_ATTR_FRMR_POOL_IN_USE]	= { .type = NLA_U64 },
188 	[RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD] = { .type = NLA_U32 },
189 	[RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES] = { .type = NLA_U32 },
190 	[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY] = { .type = NLA_U64 },
191 };
192 
put_driver_name_print_type(struct sk_buff * msg,const char * name,enum rdma_nldev_print_type print_type)193 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
194 				      enum rdma_nldev_print_type print_type)
195 {
196 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
197 		return -EMSGSIZE;
198 	if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
199 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
200 		return -EMSGSIZE;
201 
202 	return 0;
203 }
204 
_rdma_nl_put_driver_u32(struct sk_buff * msg,const char * name,enum rdma_nldev_print_type print_type,u32 value)205 static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
206 				   enum rdma_nldev_print_type print_type,
207 				   u32 value)
208 {
209 	if (put_driver_name_print_type(msg, name, print_type))
210 		return -EMSGSIZE;
211 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
212 		return -EMSGSIZE;
213 
214 	return 0;
215 }
216 
_rdma_nl_put_driver_u64(struct sk_buff * msg,const char * name,enum rdma_nldev_print_type print_type,u64 value)217 static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
218 				   enum rdma_nldev_print_type print_type,
219 				   u64 value)
220 {
221 	if (put_driver_name_print_type(msg, name, print_type))
222 		return -EMSGSIZE;
223 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
224 			      RDMA_NLDEV_ATTR_PAD))
225 		return -EMSGSIZE;
226 
227 	return 0;
228 }
229 
rdma_nl_put_driver_string(struct sk_buff * msg,const char * name,const char * str)230 int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name,
231 			      const char *str)
232 {
233 	if (put_driver_name_print_type(msg, name,
234 				       RDMA_NLDEV_PRINT_TYPE_UNSPEC))
235 		return -EMSGSIZE;
236 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str))
237 		return -EMSGSIZE;
238 
239 	return 0;
240 }
241 EXPORT_SYMBOL(rdma_nl_put_driver_string);
242 
rdma_nl_put_driver_u32(struct sk_buff * msg,const char * name,u32 value)243 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
244 {
245 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
246 				       value);
247 }
248 EXPORT_SYMBOL(rdma_nl_put_driver_u32);
249 
rdma_nl_put_driver_u32_hex(struct sk_buff * msg,const char * name,u32 value)250 int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
251 			       u32 value)
252 {
253 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
254 				       value);
255 }
256 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
257 
rdma_nl_put_driver_u64(struct sk_buff * msg,const char * name,u64 value)258 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
259 {
260 	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
261 				       value);
262 }
263 EXPORT_SYMBOL(rdma_nl_put_driver_u64);
264 
rdma_nl_put_driver_u64_hex(struct sk_buff * msg,const char * name,u64 value)265 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
266 {
267 	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
268 				       value);
269 }
270 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
271 
rdma_nl_get_privileged_qkey(void)272 bool rdma_nl_get_privileged_qkey(void)
273 {
274 	return privileged_qkey;
275 }
276 EXPORT_SYMBOL(rdma_nl_get_privileged_qkey);
277 
fill_nldev_handle(struct sk_buff * msg,struct ib_device * device)278 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
279 {
280 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
281 		return -EMSGSIZE;
282 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
283 			   dev_name(&device->dev)))
284 		return -EMSGSIZE;
285 
286 	return 0;
287 }
288 
fill_dev_info(struct sk_buff * msg,struct ib_device * device)289 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
290 {
291 	char fw[IB_FW_VERSION_NAME_MAX];
292 	int ret = 0;
293 	u32 port;
294 
295 	if (fill_nldev_handle(msg, device))
296 		return -EMSGSIZE;
297 
298 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
299 		return -EMSGSIZE;
300 
301 	BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
302 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
303 			      device->attrs.device_cap_flags,
304 			      RDMA_NLDEV_ATTR_PAD))
305 		return -EMSGSIZE;
306 
307 	ib_get_device_fw_str(device, fw);
308 	/* Device without FW has strlen(fw) = 0 */
309 	if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
310 		return -EMSGSIZE;
311 
312 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
313 			      be64_to_cpu(device->node_guid),
314 			      RDMA_NLDEV_ATTR_PAD))
315 		return -EMSGSIZE;
316 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
317 			      be64_to_cpu(device->attrs.sys_image_guid),
318 			      RDMA_NLDEV_ATTR_PAD))
319 		return -EMSGSIZE;
320 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
321 		return -EMSGSIZE;
322 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim))
323 		return -EMSGSIZE;
324 
325 	if (device->type &&
326 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_TYPE, device->type))
327 		return -EMSGSIZE;
328 
329 	if (device->parent &&
330 	    nla_put_string(msg, RDMA_NLDEV_ATTR_PARENT_NAME,
331 			   dev_name(&device->parent->dev)))
332 		return -EMSGSIZE;
333 
334 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE,
335 		       device->name_assign_type))
336 		return -EMSGSIZE;
337 
338 	/*
339 	 * Link type is determined on first port and mlx4 device
340 	 * which can potentially have two different link type for the same
341 	 * IB device is considered as better to be avoided in the future,
342 	 */
343 	port = rdma_start_port(device);
344 	if (rdma_cap_opa_mad(device, port))
345 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
346 	else if (rdma_protocol_ib(device, port))
347 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
348 	else if (rdma_protocol_iwarp(device, port))
349 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
350 	else if (rdma_protocol_roce(device, port))
351 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
352 	else if (rdma_protocol_usnic(device, port))
353 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
354 				     "usnic");
355 	return ret;
356 }
357 
fill_port_info(struct sk_buff * msg,struct ib_device * device,u32 port,const struct net * net)358 static int fill_port_info(struct sk_buff *msg,
359 			  struct ib_device *device, u32 port,
360 			  const struct net *net)
361 {
362 	struct net_device *netdev = NULL;
363 	struct ib_port_attr attr;
364 	int ret;
365 	u64 cap_flags = 0;
366 
367 	if (fill_nldev_handle(msg, device))
368 		return -EMSGSIZE;
369 
370 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
371 		return -EMSGSIZE;
372 
373 	ret = ib_query_port(device, port, &attr);
374 	if (ret)
375 		return ret;
376 
377 	if (rdma_protocol_ib(device, port)) {
378 		BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
379 				sizeof(attr.port_cap_flags2)) > sizeof(u64));
380 		cap_flags = attr.port_cap_flags |
381 			((u64)attr.port_cap_flags2 << 32);
382 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
383 				      cap_flags, RDMA_NLDEV_ATTR_PAD))
384 			return -EMSGSIZE;
385 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
386 				      attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
387 			return -EMSGSIZE;
388 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
389 			return -EMSGSIZE;
390 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
391 			return -EMSGSIZE;
392 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
393 			return -EMSGSIZE;
394 	}
395 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
396 		return -EMSGSIZE;
397 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
398 		return -EMSGSIZE;
399 
400 	netdev = ib_device_get_netdev(device, port);
401 	if (netdev && net_eq(dev_net(netdev), net)) {
402 		ret = nla_put_u32(msg,
403 				  RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
404 		if (ret)
405 			goto out;
406 		ret = nla_put_string(msg,
407 				     RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
408 	}
409 
410 out:
411 	dev_put(netdev);
412 	return ret;
413 }
414 
fill_res_info_entry(struct sk_buff * msg,const char * name,u64 curr)415 static int fill_res_info_entry(struct sk_buff *msg,
416 			       const char *name, u64 curr)
417 {
418 	struct nlattr *entry_attr;
419 
420 	entry_attr = nla_nest_start_noflag(msg,
421 					   RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
422 	if (!entry_attr)
423 		return -EMSGSIZE;
424 
425 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
426 		goto err;
427 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
428 			      RDMA_NLDEV_ATTR_PAD))
429 		goto err;
430 
431 	nla_nest_end(msg, entry_attr);
432 	return 0;
433 
434 err:
435 	nla_nest_cancel(msg, entry_attr);
436 	return -EMSGSIZE;
437 }
438 
fill_res_info(struct sk_buff * msg,struct ib_device * device,bool show_details)439 static int fill_res_info(struct sk_buff *msg, struct ib_device *device,
440 			 bool show_details)
441 {
442 	static const char * const names[RDMA_RESTRACK_MAX] = {
443 		[RDMA_RESTRACK_PD] = "pd",
444 		[RDMA_RESTRACK_CQ] = "cq",
445 		[RDMA_RESTRACK_QP] = "qp",
446 		[RDMA_RESTRACK_CM_ID] = "cm_id",
447 		[RDMA_RESTRACK_MR] = "mr",
448 		[RDMA_RESTRACK_CTX] = "ctx",
449 		[RDMA_RESTRACK_SRQ] = "srq",
450 	};
451 
452 	struct nlattr *table_attr;
453 	int ret, i, curr;
454 
455 	if (fill_nldev_handle(msg, device))
456 		return -EMSGSIZE;
457 
458 	table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
459 	if (!table_attr)
460 		return -EMSGSIZE;
461 
462 	for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
463 		if (!names[i])
464 			continue;
465 		curr = rdma_restrack_count(device, i, show_details);
466 		ret = fill_res_info_entry(msg, names[i], curr);
467 		if (ret)
468 			goto err;
469 	}
470 
471 	nla_nest_end(msg, table_attr);
472 	return 0;
473 
474 err:
475 	nla_nest_cancel(msg, table_attr);
476 	return ret;
477 }
478 
fill_res_name_pid(struct sk_buff * msg,struct rdma_restrack_entry * res)479 static int fill_res_name_pid(struct sk_buff *msg,
480 			     struct rdma_restrack_entry *res)
481 {
482 	int err = 0;
483 
484 	/*
485 	 * For user resources, user is should read /proc/PID/comm to get the
486 	 * name of the task file.
487 	 */
488 	if (rdma_is_kernel_res(res)) {
489 		err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
490 				     res->kern_name);
491 	} else {
492 		pid_t pid;
493 
494 		pid = task_pid_vnr(res->task);
495 		/*
496 		 * Task is dead and in zombie state.
497 		 * There is no need to print PID anymore.
498 		 */
499 		if (pid)
500 			/*
501 			 * This part is racy, task can be killed and PID will
502 			 * be zero right here but it is ok, next query won't
503 			 * return PID. We don't promise real-time reflection
504 			 * of SW objects.
505 			 */
506 			err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid);
507 	}
508 
509 	return err ? -EMSGSIZE : 0;
510 }
511 
fill_res_qp_entry_query(struct sk_buff * msg,struct rdma_restrack_entry * res,struct ib_device * dev,struct ib_qp * qp)512 static int fill_res_qp_entry_query(struct sk_buff *msg,
513 				   struct rdma_restrack_entry *res,
514 				   struct ib_device *dev,
515 				   struct ib_qp *qp)
516 {
517 	struct ib_qp_init_attr qp_init_attr;
518 	struct ib_qp_attr qp_attr;
519 	int ret;
520 
521 	ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
522 	if (ret)
523 		return ret;
524 
525 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
526 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
527 				qp_attr.dest_qp_num))
528 			goto err;
529 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
530 				qp_attr.rq_psn))
531 			goto err;
532 	}
533 
534 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
535 		goto err;
536 
537 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
538 	    qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
539 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
540 			       qp_attr.path_mig_state))
541 			goto err;
542 	}
543 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
544 		goto err;
545 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
546 		goto err;
547 
548 	if (dev->ops.fill_res_qp_entry)
549 		return dev->ops.fill_res_qp_entry(msg, qp);
550 	return 0;
551 
552 err:	return -EMSGSIZE;
553 }
554 
fill_res_qp_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)555 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
556 			     struct rdma_restrack_entry *res, uint32_t port)
557 {
558 	struct ib_qp *qp = container_of(res, struct ib_qp, res);
559 	struct ib_device *dev = qp->device;
560 	int ret;
561 
562 	if (port && port != qp->port)
563 		return -EAGAIN;
564 
565 	/* In create_qp() port is not set yet */
566 	if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
567 		return -EMSGSIZE;
568 
569 	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
570 	if (ret)
571 		return -EMSGSIZE;
572 
573 	if (!rdma_is_kernel_res(res) &&
574 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
575 		return -EMSGSIZE;
576 
577 	ret = fill_res_name_pid(msg, res);
578 	if (ret)
579 		return -EMSGSIZE;
580 
581 	return fill_res_qp_entry_query(msg, res, dev, qp);
582 }
583 
fill_res_qp_raw_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)584 static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
585 				 struct rdma_restrack_entry *res, uint32_t port)
586 {
587 	struct ib_qp *qp = container_of(res, struct ib_qp, res);
588 	struct ib_device *dev = qp->device;
589 
590 	if (port && port != qp->port)
591 		return -EAGAIN;
592 	if (!dev->ops.fill_res_qp_entry_raw)
593 		return -EINVAL;
594 	return dev->ops.fill_res_qp_entry_raw(msg, qp);
595 }
596 
fill_res_cm_id_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)597 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
598 				struct rdma_restrack_entry *res, uint32_t port)
599 {
600 	struct rdma_id_private *id_priv =
601 				container_of(res, struct rdma_id_private, res);
602 	struct ib_device *dev = id_priv->id.device;
603 	struct rdma_cm_id *cm_id = &id_priv->id;
604 
605 	if (port && port != cm_id->port_num)
606 		return -EAGAIN;
607 
608 	if (cm_id->port_num &&
609 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
610 		goto err;
611 
612 	if (id_priv->qp_num) {
613 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
614 			goto err;
615 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
616 			goto err;
617 	}
618 
619 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
620 		goto err;
621 
622 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
623 		goto err;
624 
625 	if (cm_id->route.addr.src_addr.ss_family &&
626 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
627 		    sizeof(cm_id->route.addr.src_addr),
628 		    &cm_id->route.addr.src_addr))
629 		goto err;
630 	if (cm_id->route.addr.dst_addr.ss_family &&
631 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
632 		    sizeof(cm_id->route.addr.dst_addr),
633 		    &cm_id->route.addr.dst_addr))
634 		goto err;
635 
636 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
637 		goto err;
638 
639 	if (fill_res_name_pid(msg, res))
640 		goto err;
641 
642 	if (dev->ops.fill_res_cm_id_entry)
643 		return dev->ops.fill_res_cm_id_entry(msg, cm_id);
644 	return 0;
645 
646 err: return -EMSGSIZE;
647 }
648 
fill_res_cq_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)649 static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
650 			     struct rdma_restrack_entry *res, uint32_t port)
651 {
652 	struct ib_cq *cq = container_of(res, struct ib_cq, res);
653 	struct ib_device *dev = cq->device;
654 
655 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
656 		return -EMSGSIZE;
657 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
658 			      atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
659 		return -EMSGSIZE;
660 
661 	/* Poll context is only valid for kernel CQs */
662 	if (rdma_is_kernel_res(res) &&
663 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
664 		return -EMSGSIZE;
665 
666 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
667 		return -EMSGSIZE;
668 
669 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
670 		return -EMSGSIZE;
671 	if (!rdma_is_kernel_res(res) &&
672 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
673 			cq->uobject->uevent.uobject.context->res.id))
674 		return -EMSGSIZE;
675 
676 	if (fill_res_name_pid(msg, res))
677 		return -EMSGSIZE;
678 
679 	return (dev->ops.fill_res_cq_entry) ?
680 		dev->ops.fill_res_cq_entry(msg, cq) : 0;
681 }
682 
fill_res_cq_raw_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)683 static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
684 				 struct rdma_restrack_entry *res, uint32_t port)
685 {
686 	struct ib_cq *cq = container_of(res, struct ib_cq, res);
687 	struct ib_device *dev = cq->device;
688 
689 	if (!dev->ops.fill_res_cq_entry_raw)
690 		return -EINVAL;
691 	return dev->ops.fill_res_cq_entry_raw(msg, cq);
692 }
693 
fill_res_mr_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)694 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
695 			     struct rdma_restrack_entry *res, uint32_t port)
696 {
697 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
698 	struct ib_device *dev = mr->pd->device;
699 
700 	if (has_cap_net_admin) {
701 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
702 			return -EMSGSIZE;
703 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
704 			return -EMSGSIZE;
705 	}
706 
707 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
708 			      RDMA_NLDEV_ATTR_PAD))
709 		return -EMSGSIZE;
710 
711 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
712 		return -EMSGSIZE;
713 
714 	if (!rdma_is_kernel_res(res) &&
715 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
716 		return -EMSGSIZE;
717 
718 	if (fill_res_name_pid(msg, res))
719 		return -EMSGSIZE;
720 
721 	return (dev->ops.fill_res_mr_entry) ?
722 		       dev->ops.fill_res_mr_entry(msg, mr) :
723 		       0;
724 }
725 
fill_res_mr_raw_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)726 static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
727 				 struct rdma_restrack_entry *res, uint32_t port)
728 {
729 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
730 	struct ib_device *dev = mr->pd->device;
731 
732 	if (!dev->ops.fill_res_mr_entry_raw)
733 		return -EINVAL;
734 	return dev->ops.fill_res_mr_entry_raw(msg, mr);
735 }
736 
fill_res_pd_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)737 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
738 			     struct rdma_restrack_entry *res, uint32_t port)
739 {
740 	struct ib_pd *pd = container_of(res, struct ib_pd, res);
741 
742 	if (has_cap_net_admin) {
743 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
744 				pd->local_dma_lkey))
745 			goto err;
746 		if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
747 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
748 				pd->unsafe_global_rkey))
749 			goto err;
750 	}
751 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
752 			      atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
753 		goto err;
754 
755 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
756 		goto err;
757 
758 	if (!rdma_is_kernel_res(res) &&
759 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
760 			pd->uobject->context->res.id))
761 		goto err;
762 
763 	return fill_res_name_pid(msg, res);
764 
765 err:	return -EMSGSIZE;
766 }
767 
fill_res_ctx_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)768 static int fill_res_ctx_entry(struct sk_buff *msg, bool has_cap_net_admin,
769 			      struct rdma_restrack_entry *res, uint32_t port)
770 {
771 	struct ib_ucontext *ctx = container_of(res, struct ib_ucontext, res);
772 
773 	if (rdma_is_kernel_res(res))
774 		return 0;
775 
776 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, ctx->res.id))
777 		return -EMSGSIZE;
778 
779 	return fill_res_name_pid(msg, res);
780 }
781 
fill_res_range_qp_entry(struct sk_buff * msg,uint32_t min_range,uint32_t max_range)782 static int fill_res_range_qp_entry(struct sk_buff *msg, uint32_t min_range,
783 				   uint32_t max_range)
784 {
785 	struct nlattr *entry_attr;
786 
787 	if (!min_range)
788 		return 0;
789 
790 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
791 	if (!entry_attr)
792 		return -EMSGSIZE;
793 
794 	if (min_range == max_range) {
795 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, min_range))
796 			goto err;
797 	} else {
798 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MIN_RANGE, min_range))
799 			goto err;
800 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MAX_RANGE, max_range))
801 			goto err;
802 	}
803 	nla_nest_end(msg, entry_attr);
804 	return 0;
805 
806 err:
807 	nla_nest_cancel(msg, entry_attr);
808 	return -EMSGSIZE;
809 }
810 
fill_res_srq_qps(struct sk_buff * msg,struct ib_srq * srq)811 static int fill_res_srq_qps(struct sk_buff *msg, struct ib_srq *srq)
812 {
813 	uint32_t min_range = 0, prev = 0;
814 	struct rdma_restrack_entry *res;
815 	struct rdma_restrack_root *rt;
816 	struct nlattr *table_attr;
817 	struct ib_qp *qp = NULL;
818 	unsigned long id = 0;
819 
820 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
821 	if (!table_attr)
822 		return -EMSGSIZE;
823 
824 	rt = &srq->device->res[RDMA_RESTRACK_QP];
825 	xa_lock(&rt->xa);
826 	xa_for_each(&rt->xa, id, res) {
827 		if (!rdma_restrack_get(res))
828 			continue;
829 
830 		qp = container_of(res, struct ib_qp, res);
831 		if (!qp->srq || (qp->srq->res.id != srq->res.id)) {
832 			rdma_restrack_put(res);
833 			continue;
834 		}
835 
836 		if (qp->qp_num < prev)
837 			/* qp_num should be ascending */
838 			goto err_loop;
839 
840 		if (min_range == 0) {
841 			min_range = qp->qp_num;
842 		} else if (qp->qp_num > (prev + 1)) {
843 			if (fill_res_range_qp_entry(msg, min_range, prev))
844 				goto err_loop;
845 
846 			min_range = qp->qp_num;
847 		}
848 		prev = qp->qp_num;
849 		rdma_restrack_put(res);
850 	}
851 
852 	xa_unlock(&rt->xa);
853 
854 	if (fill_res_range_qp_entry(msg, min_range, prev))
855 		goto err;
856 
857 	nla_nest_end(msg, table_attr);
858 	return 0;
859 
860 err_loop:
861 	rdma_restrack_put(res);
862 	xa_unlock(&rt->xa);
863 err:
864 	nla_nest_cancel(msg, table_attr);
865 	return -EMSGSIZE;
866 }
867 
fill_res_srq_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)868 static int fill_res_srq_entry(struct sk_buff *msg, bool has_cap_net_admin,
869 			      struct rdma_restrack_entry *res, uint32_t port)
870 {
871 	struct ib_srq *srq = container_of(res, struct ib_srq, res);
872 	struct ib_device *dev = srq->device;
873 
874 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SRQN, srq->res.id))
875 		goto err;
876 
877 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, srq->srq_type))
878 		goto err;
879 
880 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, srq->pd->res.id))
881 		goto err;
882 
883 	if (ib_srq_has_cq(srq->srq_type)) {
884 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN,
885 				srq->ext.cq->res.id))
886 			goto err;
887 	}
888 
889 	if (fill_res_srq_qps(msg, srq))
890 		goto err;
891 
892 	if (fill_res_name_pid(msg, res))
893 		goto err;
894 
895 	if (dev->ops.fill_res_srq_entry)
896 		return dev->ops.fill_res_srq_entry(msg, srq);
897 
898 	return 0;
899 
900 err:
901 	return -EMSGSIZE;
902 }
903 
fill_res_srq_raw_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)904 static int fill_res_srq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
905 				 struct rdma_restrack_entry *res, uint32_t port)
906 {
907 	struct ib_srq *srq = container_of(res, struct ib_srq, res);
908 	struct ib_device *dev = srq->device;
909 
910 	if (!dev->ops.fill_res_srq_entry_raw)
911 		return -EINVAL;
912 	return dev->ops.fill_res_srq_entry_raw(msg, srq);
913 }
914 
fill_stat_counter_mode(struct sk_buff * msg,struct rdma_counter * counter)915 static int fill_stat_counter_mode(struct sk_buff *msg,
916 				  struct rdma_counter *counter)
917 {
918 	struct rdma_counter_mode *m = &counter->mode;
919 
920 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
921 		return -EMSGSIZE;
922 
923 	if (m->mode == RDMA_COUNTER_MODE_AUTO) {
924 		if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
925 		    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
926 			return -EMSGSIZE;
927 
928 		if ((m->mask & RDMA_COUNTER_MASK_PID) &&
929 		    fill_res_name_pid(msg, &counter->res))
930 			return -EMSGSIZE;
931 	}
932 
933 	return 0;
934 }
935 
fill_stat_counter_qp_entry(struct sk_buff * msg,u32 qpn)936 static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
937 {
938 	struct nlattr *entry_attr;
939 
940 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
941 	if (!entry_attr)
942 		return -EMSGSIZE;
943 
944 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
945 		goto err;
946 
947 	nla_nest_end(msg, entry_attr);
948 	return 0;
949 
950 err:
951 	nla_nest_cancel(msg, entry_attr);
952 	return -EMSGSIZE;
953 }
954 
fill_stat_counter_qps(struct sk_buff * msg,struct rdma_counter * counter)955 static int fill_stat_counter_qps(struct sk_buff *msg,
956 				 struct rdma_counter *counter)
957 {
958 	struct rdma_restrack_entry *res;
959 	struct rdma_restrack_root *rt;
960 	struct nlattr *table_attr;
961 	struct ib_qp *qp = NULL;
962 	unsigned long id = 0;
963 	int ret = 0;
964 
965 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
966 	if (!table_attr)
967 		return -EMSGSIZE;
968 
969 	rt = &counter->device->res[RDMA_RESTRACK_QP];
970 	xa_lock(&rt->xa);
971 	xa_for_each(&rt->xa, id, res) {
972 		qp = container_of(res, struct ib_qp, res);
973 		if (!qp->counter || (qp->counter->id != counter->id))
974 			continue;
975 
976 		ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
977 		if (ret)
978 			goto err;
979 	}
980 
981 	xa_unlock(&rt->xa);
982 	nla_nest_end(msg, table_attr);
983 	return 0;
984 
985 err:
986 	xa_unlock(&rt->xa);
987 	nla_nest_cancel(msg, table_attr);
988 	return ret;
989 }
990 
rdma_nl_stat_hwcounter_entry(struct sk_buff * msg,const char * name,u64 value)991 int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
992 				 u64 value)
993 {
994 	struct nlattr *entry_attr;
995 
996 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
997 	if (!entry_attr)
998 		return -EMSGSIZE;
999 
1000 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
1001 			   name))
1002 		goto err;
1003 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
1004 			      value, RDMA_NLDEV_ATTR_PAD))
1005 		goto err;
1006 
1007 	nla_nest_end(msg, entry_attr);
1008 	return 0;
1009 
1010 err:
1011 	nla_nest_cancel(msg, entry_attr);
1012 	return -EMSGSIZE;
1013 }
1014 EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry);
1015 
fill_stat_mr_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)1016 static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
1017 			      struct rdma_restrack_entry *res, uint32_t port)
1018 {
1019 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
1020 	struct ib_device *dev = mr->pd->device;
1021 
1022 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
1023 		goto err;
1024 
1025 	if (dev->ops.fill_stat_mr_entry)
1026 		return dev->ops.fill_stat_mr_entry(msg, mr);
1027 	return 0;
1028 
1029 err:
1030 	return -EMSGSIZE;
1031 }
1032 
fill_stat_counter_hwcounters(struct sk_buff * msg,struct rdma_counter * counter)1033 static int fill_stat_counter_hwcounters(struct sk_buff *msg,
1034 					struct rdma_counter *counter)
1035 {
1036 	struct rdma_hw_stats *st = counter->stats;
1037 	struct nlattr *table_attr;
1038 	int i;
1039 
1040 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
1041 	if (!table_attr)
1042 		return -EMSGSIZE;
1043 
1044 	mutex_lock(&st->lock);
1045 	for (i = 0; i < st->num_counters; i++) {
1046 		if (test_bit(i, st->is_disabled))
1047 			continue;
1048 		if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name,
1049 						 st->value[i]))
1050 			goto err;
1051 	}
1052 	mutex_unlock(&st->lock);
1053 
1054 	nla_nest_end(msg, table_attr);
1055 	return 0;
1056 
1057 err:
1058 	mutex_unlock(&st->lock);
1059 	nla_nest_cancel(msg, table_attr);
1060 	return -EMSGSIZE;
1061 }
1062 
fill_res_counter_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)1063 static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
1064 				  struct rdma_restrack_entry *res,
1065 				  uint32_t port)
1066 {
1067 	struct rdma_counter *counter =
1068 		container_of(res, struct rdma_counter, res);
1069 
1070 	if (port && port != counter->port)
1071 		return -EAGAIN;
1072 
1073 	/* Dump it even query failed */
1074 	rdma_counter_query_stats(counter);
1075 
1076 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
1077 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
1078 	    fill_stat_counter_mode(msg, counter) ||
1079 	    fill_stat_counter_qps(msg, counter) ||
1080 	    fill_stat_counter_hwcounters(msg, counter))
1081 		return -EMSGSIZE;
1082 
1083 	return 0;
1084 }
1085 
nldev_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1086 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1087 			  struct netlink_ext_ack *extack)
1088 {
1089 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1090 	struct ib_device *device;
1091 	struct sk_buff *msg;
1092 	u32 index;
1093 	int err;
1094 
1095 	err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1096 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
1097 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1098 		return -EINVAL;
1099 
1100 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1101 
1102 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1103 	if (!device)
1104 		return -EINVAL;
1105 
1106 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1107 	if (!msg) {
1108 		err = -ENOMEM;
1109 		goto err;
1110 	}
1111 
1112 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1113 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1114 			0, 0);
1115 	if (!nlh) {
1116 		err = -EMSGSIZE;
1117 		goto err_free;
1118 	}
1119 
1120 	err = fill_dev_info(msg, device);
1121 	if (err)
1122 		goto err_free;
1123 
1124 	nlmsg_end(msg, nlh);
1125 
1126 	ib_device_put(device);
1127 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1128 
1129 err_free:
1130 	nlmsg_free(msg);
1131 err:
1132 	ib_device_put(device);
1133 	return err;
1134 }
1135 
nldev_set_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1136 static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1137 			  struct netlink_ext_ack *extack)
1138 {
1139 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1140 	struct ib_device *device;
1141 	u32 index;
1142 	int err;
1143 
1144 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1145 			    nldev_policy, extack);
1146 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1147 		return -EINVAL;
1148 
1149 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1150 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1151 	if (!device)
1152 		return -EINVAL;
1153 
1154 	if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
1155 		char name[IB_DEVICE_NAME_MAX] = {};
1156 
1157 		nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1158 			    IB_DEVICE_NAME_MAX);
1159 		if (strlen(name) == 0) {
1160 			err = -EINVAL;
1161 			goto done;
1162 		}
1163 		err = ib_device_rename(device, name);
1164 		goto done;
1165 	}
1166 
1167 	if (tb[RDMA_NLDEV_NET_NS_FD]) {
1168 		u32 ns_fd;
1169 
1170 		ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
1171 		err = ib_device_set_netns_put(skb, device, ns_fd);
1172 		goto put_done;
1173 	}
1174 
1175 	if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) {
1176 		u8 use_dim;
1177 
1178 		use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]);
1179 		err = ib_device_set_dim(device,  use_dim);
1180 		goto done;
1181 	}
1182 
1183 done:
1184 	ib_device_put(device);
1185 put_done:
1186 	return err;
1187 }
1188 
_nldev_get_dumpit(struct ib_device * device,struct sk_buff * skb,struct netlink_callback * cb,unsigned int idx)1189 static int _nldev_get_dumpit(struct ib_device *device,
1190 			     struct sk_buff *skb,
1191 			     struct netlink_callback *cb,
1192 			     unsigned int idx)
1193 {
1194 	int start = cb->args[0];
1195 	struct nlmsghdr *nlh;
1196 
1197 	if (idx < start)
1198 		return 0;
1199 
1200 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1201 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1202 			0, NLM_F_MULTI);
1203 
1204 	if (!nlh || fill_dev_info(skb, device)) {
1205 		nlmsg_cancel(skb, nlh);
1206 		goto out;
1207 	}
1208 
1209 	nlmsg_end(skb, nlh);
1210 
1211 	idx++;
1212 
1213 out:	cb->args[0] = idx;
1214 	return skb->len;
1215 }
1216 
nldev_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)1217 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
1218 {
1219 	/*
1220 	 * There is no need to take lock, because
1221 	 * we are relying on ib_core's locking.
1222 	 */
1223 	return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
1224 }
1225 
nldev_port_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1226 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1227 			       struct netlink_ext_ack *extack)
1228 {
1229 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1230 	struct ib_device *device;
1231 	struct sk_buff *msg;
1232 	u32 index;
1233 	u32 port;
1234 	int err;
1235 
1236 	err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1237 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
1238 	if (err ||
1239 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
1240 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1241 		return -EINVAL;
1242 
1243 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1244 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1245 	if (!device)
1246 		return -EINVAL;
1247 
1248 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1249 	if (!rdma_is_port_valid(device, port)) {
1250 		err = -EINVAL;
1251 		goto err;
1252 	}
1253 
1254 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1255 	if (!msg) {
1256 		err = -ENOMEM;
1257 		goto err;
1258 	}
1259 
1260 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1261 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1262 			0, 0);
1263 	if (!nlh) {
1264 		err = -EMSGSIZE;
1265 		goto err_free;
1266 	}
1267 
1268 	err = fill_port_info(msg, device, port, sock_net(skb->sk));
1269 	if (err)
1270 		goto err_free;
1271 
1272 	nlmsg_end(msg, nlh);
1273 	ib_device_put(device);
1274 
1275 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1276 
1277 err_free:
1278 	nlmsg_free(msg);
1279 err:
1280 	ib_device_put(device);
1281 	return err;
1282 }
1283 
nldev_port_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)1284 static int nldev_port_get_dumpit(struct sk_buff *skb,
1285 				 struct netlink_callback *cb)
1286 {
1287 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1288 	struct ib_device *device;
1289 	int start = cb->args[0];
1290 	struct nlmsghdr *nlh;
1291 	u32 idx = 0;
1292 	u32 ifindex;
1293 	int err;
1294 	unsigned int p;
1295 
1296 	err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1297 			    nldev_policy, NL_VALIDATE_LIBERAL, NULL);
1298 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1299 		return -EINVAL;
1300 
1301 	ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1302 	device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
1303 	if (!device)
1304 		return -EINVAL;
1305 
1306 	rdma_for_each_port (device, p) {
1307 		/*
1308 		 * The dumpit function returns all information from specific
1309 		 * index. This specific index is taken from the netlink
1310 		 * messages request sent by user and it is available
1311 		 * in cb->args[0].
1312 		 *
1313 		 * Usually, the user doesn't fill this field and it causes
1314 		 * to return everything.
1315 		 *
1316 		 */
1317 		if (idx < start) {
1318 			idx++;
1319 			continue;
1320 		}
1321 
1322 		nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
1323 				cb->nlh->nlmsg_seq,
1324 				RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1325 						 RDMA_NLDEV_CMD_PORT_GET),
1326 				0, NLM_F_MULTI);
1327 
1328 		if (!nlh || fill_port_info(skb, device, p, sock_net(skb->sk))) {
1329 			nlmsg_cancel(skb, nlh);
1330 			goto out;
1331 		}
1332 		idx++;
1333 		nlmsg_end(skb, nlh);
1334 	}
1335 
1336 out:
1337 	ib_device_put(device);
1338 	cb->args[0] = idx;
1339 	return skb->len;
1340 }
1341 
nldev_res_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1342 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1343 			      struct netlink_ext_ack *extack)
1344 {
1345 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1346 	bool show_details = false;
1347 	struct ib_device *device;
1348 	struct sk_buff *msg;
1349 	u32 index;
1350 	int ret;
1351 
1352 	ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1353 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
1354 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1355 		return -EINVAL;
1356 
1357 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1358 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1359 	if (!device)
1360 		return -EINVAL;
1361 
1362 	if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
1363 		show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
1364 
1365 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1366 	if (!msg) {
1367 		ret = -ENOMEM;
1368 		goto err;
1369 	}
1370 
1371 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1372 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1373 			0, 0);
1374 	if (!nlh) {
1375 		ret = -EMSGSIZE;
1376 		goto err_free;
1377 	}
1378 
1379 	ret = fill_res_info(msg, device, show_details);
1380 	if (ret)
1381 		goto err_free;
1382 
1383 	nlmsg_end(msg, nlh);
1384 	ib_device_put(device);
1385 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1386 
1387 err_free:
1388 	nlmsg_free(msg);
1389 err:
1390 	ib_device_put(device);
1391 	return ret;
1392 }
1393 
_nldev_res_get_dumpit(struct ib_device * device,struct sk_buff * skb,struct netlink_callback * cb,unsigned int idx)1394 static int _nldev_res_get_dumpit(struct ib_device *device,
1395 				 struct sk_buff *skb,
1396 				 struct netlink_callback *cb,
1397 				 unsigned int idx)
1398 {
1399 	int start = cb->args[0];
1400 	struct nlmsghdr *nlh;
1401 
1402 	if (idx < start)
1403 		return 0;
1404 
1405 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1406 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1407 			0, NLM_F_MULTI);
1408 
1409 	if (!nlh || fill_res_info(skb, device, false)) {
1410 		nlmsg_cancel(skb, nlh);
1411 		goto out;
1412 	}
1413 	nlmsg_end(skb, nlh);
1414 
1415 	idx++;
1416 
1417 out:
1418 	cb->args[0] = idx;
1419 	return skb->len;
1420 }
1421 
nldev_res_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)1422 static int nldev_res_get_dumpit(struct sk_buff *skb,
1423 				struct netlink_callback *cb)
1424 {
1425 	return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
1426 }
1427 
1428 struct nldev_fill_res_entry {
1429 	enum rdma_nldev_attr nldev_attr;
1430 	u8 flags;
1431 	u32 entry;
1432 	u32 id;
1433 };
1434 
1435 enum nldev_res_flags {
1436 	NLDEV_PER_DEV = 1 << 0,
1437 };
1438 
1439 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
1440 	[RDMA_RESTRACK_QP] = {
1441 		.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
1442 		.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
1443 		.id = RDMA_NLDEV_ATTR_RES_LQPN,
1444 	},
1445 	[RDMA_RESTRACK_CM_ID] = {
1446 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
1447 		.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
1448 		.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
1449 	},
1450 	[RDMA_RESTRACK_CQ] = {
1451 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
1452 		.flags = NLDEV_PER_DEV,
1453 		.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
1454 		.id = RDMA_NLDEV_ATTR_RES_CQN,
1455 	},
1456 	[RDMA_RESTRACK_MR] = {
1457 		.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
1458 		.flags = NLDEV_PER_DEV,
1459 		.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
1460 		.id = RDMA_NLDEV_ATTR_RES_MRN,
1461 	},
1462 	[RDMA_RESTRACK_PD] = {
1463 		.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
1464 		.flags = NLDEV_PER_DEV,
1465 		.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
1466 		.id = RDMA_NLDEV_ATTR_RES_PDN,
1467 	},
1468 	[RDMA_RESTRACK_COUNTER] = {
1469 		.nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
1470 		.entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
1471 		.id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
1472 	},
1473 	[RDMA_RESTRACK_CTX] = {
1474 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CTX,
1475 		.flags = NLDEV_PER_DEV,
1476 		.entry = RDMA_NLDEV_ATTR_RES_CTX_ENTRY,
1477 		.id = RDMA_NLDEV_ATTR_RES_CTXN,
1478 	},
1479 	[RDMA_RESTRACK_SRQ] = {
1480 		.nldev_attr = RDMA_NLDEV_ATTR_RES_SRQ,
1481 		.flags = NLDEV_PER_DEV,
1482 		.entry = RDMA_NLDEV_ATTR_RES_SRQ_ENTRY,
1483 		.id = RDMA_NLDEV_ATTR_RES_SRQN,
1484 	},
1485 
1486 };
1487 
1488 static noinline_for_stack int
res_get_common_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack,enum rdma_restrack_type res_type,res_fill_func_t fill_func)1489 res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1490 		    struct netlink_ext_ack *extack,
1491 		    enum rdma_restrack_type res_type,
1492 		    res_fill_func_t fill_func)
1493 {
1494 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1495 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1496 	struct rdma_restrack_entry *res;
1497 	struct ib_device *device;
1498 	u32 index, id, port = 0;
1499 	bool has_cap_net_admin;
1500 	struct sk_buff *msg;
1501 	int ret;
1502 
1503 	ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1504 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
1505 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
1506 		return -EINVAL;
1507 
1508 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1509 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1510 	if (!device)
1511 		return -EINVAL;
1512 
1513 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1514 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1515 		if (!rdma_is_port_valid(device, port)) {
1516 			ret = -EINVAL;
1517 			goto err;
1518 		}
1519 	}
1520 
1521 	if ((port && fe->flags & NLDEV_PER_DEV) ||
1522 	    (!port && ~fe->flags & NLDEV_PER_DEV)) {
1523 		ret = -EINVAL;
1524 		goto err;
1525 	}
1526 
1527 	id = nla_get_u32(tb[fe->id]);
1528 	res = rdma_restrack_get_byid(device, res_type, id);
1529 	if (IS_ERR(res)) {
1530 		ret = PTR_ERR(res);
1531 		goto err;
1532 	}
1533 
1534 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1535 	if (!msg) {
1536 		ret = -ENOMEM;
1537 		goto err_get;
1538 	}
1539 
1540 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1541 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1542 					 RDMA_NL_GET_OP(nlh->nlmsg_type)),
1543 			0, 0);
1544 
1545 	if (!nlh || fill_nldev_handle(msg, device)) {
1546 		ret = -EMSGSIZE;
1547 		goto err_free;
1548 	}
1549 
1550 	has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
1551 
1552 	ret = fill_func(msg, has_cap_net_admin, res, port);
1553 	if (ret)
1554 		goto err_free;
1555 
1556 	rdma_restrack_put(res);
1557 	nlmsg_end(msg, nlh);
1558 	ib_device_put(device);
1559 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1560 
1561 err_free:
1562 	nlmsg_free(msg);
1563 err_get:
1564 	rdma_restrack_put(res);
1565 err:
1566 	ib_device_put(device);
1567 	return ret;
1568 }
1569 
res_get_common_dumpit(struct sk_buff * skb,struct netlink_callback * cb,enum rdma_restrack_type res_type,res_fill_func_t fill_func)1570 static int res_get_common_dumpit(struct sk_buff *skb,
1571 				 struct netlink_callback *cb,
1572 				 enum rdma_restrack_type res_type,
1573 				 res_fill_func_t fill_func)
1574 {
1575 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1576 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1577 	struct rdma_restrack_entry *res;
1578 	struct rdma_restrack_root *rt;
1579 	int err, ret = 0, idx = 0;
1580 	bool show_details = false;
1581 	struct nlattr *table_attr;
1582 	struct nlattr *entry_attr;
1583 	struct ib_device *device;
1584 	int start = cb->args[0];
1585 	bool has_cap_net_admin;
1586 	struct nlmsghdr *nlh;
1587 	unsigned long id;
1588 	u32 index, port = 0;
1589 	bool filled = false;
1590 
1591 	err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1592 			    nldev_policy, NL_VALIDATE_LIBERAL, NULL);
1593 	/*
1594 	 * Right now, we are expecting the device index to get res information,
1595 	 * but it is possible to extend this code to return all devices in
1596 	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
1597 	 * if it doesn't exist, we will iterate over all devices.
1598 	 *
1599 	 * But it is not needed for now.
1600 	 */
1601 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1602 		return -EINVAL;
1603 
1604 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1605 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1606 	if (!device)
1607 		return -EINVAL;
1608 
1609 	if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
1610 		show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
1611 
1612 	/*
1613 	 * If no PORT_INDEX is supplied, we will return all QPs from that device
1614 	 */
1615 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1616 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1617 		if (!rdma_is_port_valid(device, port)) {
1618 			ret = -EINVAL;
1619 			goto err_index;
1620 		}
1621 	}
1622 
1623 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1624 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1625 					 RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
1626 			0, NLM_F_MULTI);
1627 
1628 	if (!nlh || fill_nldev_handle(skb, device)) {
1629 		ret = -EMSGSIZE;
1630 		goto err;
1631 	}
1632 
1633 	table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
1634 	if (!table_attr) {
1635 		ret = -EMSGSIZE;
1636 		goto err;
1637 	}
1638 
1639 	has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
1640 
1641 	rt = &device->res[res_type];
1642 	xa_lock(&rt->xa);
1643 	/*
1644 	 * FIXME: if the skip ahead is something common this loop should
1645 	 * use xas_for_each & xas_pause to optimize, we can have a lot of
1646 	 * objects.
1647 	 */
1648 	xa_for_each(&rt->xa, id, res) {
1649 		if (xa_get_mark(&rt->xa, res->id, RESTRACK_DD) && !show_details)
1650 			goto next;
1651 
1652 		if (idx < start || !rdma_restrack_get(res))
1653 			goto next;
1654 
1655 		xa_unlock(&rt->xa);
1656 
1657 		filled = true;
1658 
1659 		entry_attr = nla_nest_start_noflag(skb, fe->entry);
1660 		if (!entry_attr) {
1661 			ret = -EMSGSIZE;
1662 			rdma_restrack_put(res);
1663 			goto msg_full;
1664 		}
1665 
1666 		ret = fill_func(skb, has_cap_net_admin, res, port);
1667 
1668 		rdma_restrack_put(res);
1669 
1670 		if (ret) {
1671 			nla_nest_cancel(skb, entry_attr);
1672 			if (ret == -EMSGSIZE)
1673 				goto msg_full;
1674 			if (ret == -EAGAIN)
1675 				goto again;
1676 			goto res_err;
1677 		}
1678 		nla_nest_end(skb, entry_attr);
1679 again:		xa_lock(&rt->xa);
1680 next:		idx++;
1681 	}
1682 	xa_unlock(&rt->xa);
1683 
1684 msg_full:
1685 	nla_nest_end(skb, table_attr);
1686 	nlmsg_end(skb, nlh);
1687 	cb->args[0] = idx;
1688 
1689 	/*
1690 	 * No more entries to fill, cancel the message and
1691 	 * return 0 to mark end of dumpit.
1692 	 */
1693 	if (!filled)
1694 		goto err;
1695 
1696 	ib_device_put(device);
1697 	return skb->len;
1698 
1699 res_err:
1700 	nla_nest_cancel(skb, table_attr);
1701 
1702 err:
1703 	nlmsg_cancel(skb, nlh);
1704 
1705 err_index:
1706 	ib_device_put(device);
1707 	return ret;
1708 }
1709 
1710 #define RES_GET_FUNCS(name, type)                                              \
1711 	static int nldev_res_get_##name##_dumpit(struct sk_buff *skb,          \
1712 						 struct netlink_callback *cb)  \
1713 	{                                                                      \
1714 		return res_get_common_dumpit(skb, cb, type,                    \
1715 					     fill_res_##name##_entry);         \
1716 	}                                                                      \
1717 	static int nldev_res_get_##name##_doit(struct sk_buff *skb,            \
1718 					       struct nlmsghdr *nlh,           \
1719 					       struct netlink_ext_ack *extack) \
1720 	{                                                                      \
1721 		return res_get_common_doit(skb, nlh, extack, type,             \
1722 					   fill_res_##name##_entry);           \
1723 	}
1724 
1725 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
1726 RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP);
1727 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
1728 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
1729 RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ);
1730 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
1731 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
1732 RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR);
1733 RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
1734 RES_GET_FUNCS(ctx, RDMA_RESTRACK_CTX);
1735 RES_GET_FUNCS(srq, RDMA_RESTRACK_SRQ);
1736 RES_GET_FUNCS(srq_raw, RDMA_RESTRACK_SRQ);
1737 
1738 static LIST_HEAD(link_ops);
1739 static DECLARE_RWSEM(link_ops_rwsem);
1740 
link_ops_get(const char * type)1741 static const struct rdma_link_ops *link_ops_get(const char *type)
1742 {
1743 	const struct rdma_link_ops *ops;
1744 
1745 	list_for_each_entry(ops, &link_ops, list) {
1746 		if (!strcmp(ops->type, type))
1747 			goto out;
1748 	}
1749 	ops = NULL;
1750 out:
1751 	return ops;
1752 }
1753 
rdma_link_register(struct rdma_link_ops * ops)1754 void rdma_link_register(struct rdma_link_ops *ops)
1755 {
1756 	down_write(&link_ops_rwsem);
1757 	if (WARN_ON_ONCE(link_ops_get(ops->type)))
1758 		goto out;
1759 	list_add(&ops->list, &link_ops);
1760 out:
1761 	up_write(&link_ops_rwsem);
1762 }
1763 EXPORT_SYMBOL(rdma_link_register);
1764 
rdma_link_unregister(struct rdma_link_ops * ops)1765 void rdma_link_unregister(struct rdma_link_ops *ops)
1766 {
1767 	down_write(&link_ops_rwsem);
1768 	list_del(&ops->list);
1769 	up_write(&link_ops_rwsem);
1770 }
1771 EXPORT_SYMBOL(rdma_link_unregister);
1772 
nldev_newlink(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1773 static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
1774 			  struct netlink_ext_ack *extack)
1775 {
1776 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1777 	char ibdev_name[IB_DEVICE_NAME_MAX];
1778 	const struct rdma_link_ops *ops;
1779 	char ndev_name[IFNAMSIZ];
1780 	struct net_device *ndev;
1781 	char type[IFNAMSIZ];
1782 	int err;
1783 
1784 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1785 			    nldev_policy, extack);
1786 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
1787 	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
1788 		return -EINVAL;
1789 
1790 	nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1791 		    sizeof(ibdev_name));
1792 	if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0)
1793 		return -EINVAL;
1794 
1795 	nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
1796 	nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
1797 		    sizeof(ndev_name));
1798 
1799 	ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
1800 	if (!ndev)
1801 		return -ENODEV;
1802 
1803 	down_read(&link_ops_rwsem);
1804 	ops = link_ops_get(type);
1805 #ifdef CONFIG_MODULES
1806 	if (!ops) {
1807 		up_read(&link_ops_rwsem);
1808 		request_module("rdma-link-%s", type);
1809 		down_read(&link_ops_rwsem);
1810 		ops = link_ops_get(type);
1811 	}
1812 #endif
1813 	err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
1814 	up_read(&link_ops_rwsem);
1815 	dev_put(ndev);
1816 
1817 	return err;
1818 }
1819 
nldev_dellink(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1820 static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
1821 			  struct netlink_ext_ack *extack)
1822 {
1823 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1824 	struct ib_device *device;
1825 	u32 index;
1826 	int err;
1827 
1828 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1829 			    nldev_policy, extack);
1830 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1831 		return -EINVAL;
1832 
1833 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1834 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1835 	if (!device)
1836 		return -EINVAL;
1837 
1838 	if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) {
1839 		ib_device_put(device);
1840 		return -EINVAL;
1841 	}
1842 
1843 	/*
1844 	 * This path is triggered by the 'rdma link delete' administrative command.
1845 	 * For Soft-RoCE (RXE), we ensure that transport sockets are closed here.
1846 	 * Note: iWARP driver does not implement .dellink, so this logic is
1847 	 * implicitly scoped to the driver supporting dynamic link deletion like RXE.
1848 	 */
1849 	if (device->link_ops && device->link_ops->dellink) {
1850 		mutex_lock(&nldev_dellink_mutex);
1851 		err = device->link_ops->dellink(device);
1852 		mutex_unlock(&nldev_dellink_mutex);
1853 		if (err)
1854 			return err;
1855 	}
1856 
1857 	ib_unregister_device_and_put(device);
1858 	return 0;
1859 }
1860 
nldev_get_chardev(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1861 static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
1862 			     struct netlink_ext_ack *extack)
1863 {
1864 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1865 	char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
1866 	struct ib_client_nl_info data = {};
1867 	struct ib_device *ibdev = NULL;
1868 	struct sk_buff *msg;
1869 	u32 index;
1870 	int err;
1871 
1872 	err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
1873 			    NL_VALIDATE_LIBERAL, extack);
1874 	if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
1875 		return -EINVAL;
1876 
1877 	nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
1878 		    sizeof(client_name));
1879 
1880 	if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
1881 		index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1882 		ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
1883 		if (!ibdev)
1884 			return -EINVAL;
1885 
1886 		if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1887 			data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1888 			if (!rdma_is_port_valid(ibdev, data.port)) {
1889 				err = -EINVAL;
1890 				goto out_put;
1891 			}
1892 		} else {
1893 			data.port = -1;
1894 		}
1895 	} else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1896 		return -EINVAL;
1897 	}
1898 
1899 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1900 	if (!msg) {
1901 		err = -ENOMEM;
1902 		goto out_put;
1903 	}
1904 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1905 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1906 					 RDMA_NLDEV_CMD_GET_CHARDEV),
1907 			0, 0);
1908 	if (!nlh) {
1909 		err = -EMSGSIZE;
1910 		goto out_nlmsg;
1911 	}
1912 
1913 	data.nl_msg = msg;
1914 	err = ib_get_client_nl_info(ibdev, client_name, &data);
1915 	if (err)
1916 		goto out_nlmsg;
1917 
1918 	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
1919 				huge_encode_dev(data.cdev->devt),
1920 				RDMA_NLDEV_ATTR_PAD);
1921 	if (err)
1922 		goto out_data;
1923 	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
1924 				RDMA_NLDEV_ATTR_PAD);
1925 	if (err)
1926 		goto out_data;
1927 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
1928 			   dev_name(data.cdev))) {
1929 		err = -EMSGSIZE;
1930 		goto out_data;
1931 	}
1932 
1933 	nlmsg_end(msg, nlh);
1934 	put_device(data.cdev);
1935 	if (ibdev)
1936 		ib_device_put(ibdev);
1937 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1938 
1939 out_data:
1940 	put_device(data.cdev);
1941 out_nlmsg:
1942 	nlmsg_free(msg);
1943 out_put:
1944 	if (ibdev)
1945 		ib_device_put(ibdev);
1946 	return err;
1947 }
1948 
nldev_sys_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1949 static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1950 			      struct netlink_ext_ack *extack)
1951 {
1952 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1953 	struct sk_buff *msg;
1954 	int err;
1955 
1956 	err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1957 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
1958 	if (err)
1959 		return err;
1960 
1961 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1962 	if (!msg)
1963 		return -ENOMEM;
1964 
1965 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1966 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1967 					 RDMA_NLDEV_CMD_SYS_GET),
1968 			0, 0);
1969 	if (!nlh) {
1970 		nlmsg_free(msg);
1971 		return -EMSGSIZE;
1972 	}
1973 
1974 	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
1975 			 (u8)ib_devices_shared_netns);
1976 	if (err) {
1977 		nlmsg_free(msg);
1978 		return err;
1979 	}
1980 
1981 	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE,
1982 			 (u8)privileged_qkey);
1983 	if (err) {
1984 		nlmsg_free(msg);
1985 		return err;
1986 	}
1987 
1988 	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, 1);
1989 	if (err) {
1990 		nlmsg_free(msg);
1991 		return err;
1992 	}
1993 	/*
1994 	 * Copy-on-fork is supported.
1995 	 * See commits:
1996 	 * 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes")
1997 	 * 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm")
1998 	 * for more details. Don't backport this without them.
1999 	 *
2000 	 * Return value ignored on purpose, assume copy-on-fork is not
2001 	 * supported in case of failure.
2002 	 */
2003 	nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, 1);
2004 
2005 	nlmsg_end(msg, nlh);
2006 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2007 }
2008 
nldev_set_sys_set_netns_doit(struct nlattr * tb[])2009 static int nldev_set_sys_set_netns_doit(struct nlattr *tb[])
2010 {
2011 	u8 enable;
2012 	int err;
2013 
2014 	enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
2015 	/* Only 0 and 1 are supported */
2016 	if (enable > 1)
2017 		return -EINVAL;
2018 
2019 	err = rdma_compatdev_set(enable);
2020 	return err;
2021 }
2022 
nldev_set_sys_set_pqkey_doit(struct nlattr * tb[])2023 static int nldev_set_sys_set_pqkey_doit(struct nlattr *tb[])
2024 {
2025 	u8 enable;
2026 
2027 	enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE]);
2028 	/* Only 0 and 1 are supported */
2029 	if (enable > 1)
2030 		return -EINVAL;
2031 
2032 	privileged_qkey = enable;
2033 	return 0;
2034 }
2035 
nldev_set_sys_set_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2036 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2037 				  struct netlink_ext_ack *extack)
2038 {
2039 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2040 	int err;
2041 
2042 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2043 			  nldev_policy, extack);
2044 	if (err)
2045 		return -EINVAL;
2046 
2047 	if (tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
2048 		return nldev_set_sys_set_netns_doit(tb);
2049 
2050 	if (tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE])
2051 		return nldev_set_sys_set_pqkey_doit(tb);
2052 
2053 	return -EINVAL;
2054 }
2055 
2056 
nldev_stat_set_mode_doit(struct sk_buff * msg,struct netlink_ext_ack * extack,struct nlattr * tb[],struct ib_device * device,u32 port)2057 static int nldev_stat_set_mode_doit(struct sk_buff *msg,
2058 				    struct netlink_ext_ack *extack,
2059 				    struct nlattr *tb[],
2060 				    struct ib_device *device, u32 port)
2061 {
2062 	u32 mode, mask = 0, qpn, cntn = 0;
2063 	bool opcnt = false;
2064 	int ret;
2065 
2066 	/* Currently only counter for QP is supported */
2067 	if (!tb[RDMA_NLDEV_ATTR_STAT_RES] ||
2068 	    nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
2069 		return -EINVAL;
2070 
2071 	if (tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED])
2072 		opcnt = !!nla_get_u8(
2073 			tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED]);
2074 
2075 	mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
2076 	if (mode == RDMA_COUNTER_MODE_AUTO) {
2077 		if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
2078 			mask = nla_get_u32(
2079 				tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
2080 		return rdma_counter_set_auto_mode(device, port, mask, opcnt,
2081 						  extack);
2082 	}
2083 
2084 	if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
2085 		return -EINVAL;
2086 
2087 	qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
2088 	if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
2089 		cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
2090 		ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
2091 		if (ret)
2092 			return ret;
2093 	} else {
2094 		ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn);
2095 		if (ret)
2096 			return ret;
2097 	}
2098 
2099 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
2100 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
2101 		ret = -EMSGSIZE;
2102 		goto err_fill;
2103 	}
2104 
2105 	return 0;
2106 
2107 err_fill:
2108 	rdma_counter_unbind_qpn(device, port, qpn, cntn);
2109 	return ret;
2110 }
2111 
nldev_stat_set_counter_dynamic_doit(struct nlattr * tb[],struct ib_device * device,u32 port)2112 static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[],
2113 					       struct ib_device *device,
2114 					       u32 port)
2115 {
2116 	struct rdma_hw_stats *stats;
2117 	struct nlattr *entry_attr;
2118 	unsigned long *target;
2119 	int rem, i, ret = 0;
2120 	u32 index;
2121 
2122 	stats = ib_get_hw_stats_port(device, port);
2123 	if (!stats)
2124 		return -EINVAL;
2125 
2126 	target = kcalloc(BITS_TO_LONGS(stats->num_counters),
2127 			 sizeof(*stats->is_disabled), GFP_KERNEL);
2128 	if (!target)
2129 		return -ENOMEM;
2130 
2131 	nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS],
2132 			    rem) {
2133 		index = nla_get_u32(entry_attr);
2134 		if ((index >= stats->num_counters) ||
2135 		    !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) {
2136 			ret = -EINVAL;
2137 			goto out;
2138 		}
2139 
2140 		set_bit(index, target);
2141 	}
2142 
2143 	for (i = 0; i < stats->num_counters; i++) {
2144 		if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL))
2145 			continue;
2146 
2147 		ret = rdma_counter_modify(device, port, i, test_bit(i, target));
2148 		if (ret)
2149 			goto out;
2150 	}
2151 
2152 out:
2153 	kfree(target);
2154 	return ret;
2155 }
2156 
nldev_stat_set_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2157 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2158 			       struct netlink_ext_ack *extack)
2159 {
2160 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2161 	struct ib_device *device;
2162 	struct sk_buff *msg;
2163 	u32 index, port;
2164 	int ret;
2165 
2166 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
2167 			  extack);
2168 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2169 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2170 		return -EINVAL;
2171 
2172 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2173 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2174 	if (!device)
2175 		return -EINVAL;
2176 
2177 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2178 	if (!rdma_is_port_valid(device, port)) {
2179 		ret = -EINVAL;
2180 		goto err_put_device;
2181 	}
2182 
2183 	if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] &&
2184 	    !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
2185 		ret = -EINVAL;
2186 		goto err_put_device;
2187 	}
2188 
2189 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2190 	if (!msg) {
2191 		ret = -ENOMEM;
2192 		goto err_put_device;
2193 	}
2194 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2195 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2196 					 RDMA_NLDEV_CMD_STAT_SET),
2197 			0, 0);
2198 	if (!nlh || fill_nldev_handle(msg, device) ||
2199 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
2200 		ret = -EMSGSIZE;
2201 		goto err_free_msg;
2202 	}
2203 
2204 	if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) {
2205 		ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port);
2206 		if (ret)
2207 			goto err_free_msg;
2208 	}
2209 
2210 	if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
2211 		ret = nldev_stat_set_counter_dynamic_doit(tb, device, port);
2212 		if (ret)
2213 			goto err_free_msg;
2214 	}
2215 
2216 	nlmsg_end(msg, nlh);
2217 	ib_device_put(device);
2218 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2219 
2220 err_free_msg:
2221 	nlmsg_free(msg);
2222 err_put_device:
2223 	ib_device_put(device);
2224 	return ret;
2225 }
2226 
nldev_stat_del_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2227 static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2228 			       struct netlink_ext_ack *extack)
2229 {
2230 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2231 	struct ib_device *device;
2232 	struct sk_buff *msg;
2233 	u32 index, port, qpn, cntn;
2234 	int ret;
2235 
2236 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2237 			  nldev_policy, extack);
2238 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
2239 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
2240 	    !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
2241 	    !tb[RDMA_NLDEV_ATTR_RES_LQPN])
2242 		return -EINVAL;
2243 
2244 	if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
2245 		return -EINVAL;
2246 
2247 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2248 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2249 	if (!device)
2250 		return -EINVAL;
2251 
2252 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2253 	if (!rdma_is_port_valid(device, port)) {
2254 		ret = -EINVAL;
2255 		goto err;
2256 	}
2257 
2258 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2259 	if (!msg) {
2260 		ret = -ENOMEM;
2261 		goto err;
2262 	}
2263 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2264 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2265 					 RDMA_NLDEV_CMD_STAT_SET),
2266 			0, 0);
2267 	if (!nlh) {
2268 		ret = -EMSGSIZE;
2269 		goto err_fill;
2270 	}
2271 
2272 	cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
2273 	qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
2274 	if (fill_nldev_handle(msg, device) ||
2275 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
2276 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
2277 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
2278 		ret = -EMSGSIZE;
2279 		goto err_fill;
2280 	}
2281 
2282 	ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
2283 	if (ret)
2284 		goto err_fill;
2285 
2286 	nlmsg_end(msg, nlh);
2287 	ib_device_put(device);
2288 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2289 
2290 err_fill:
2291 	nlmsg_free(msg);
2292 err:
2293 	ib_device_put(device);
2294 	return ret;
2295 }
2296 
2297 static noinline_for_stack int
stat_get_doit_default_counter(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack,struct nlattr * tb[])2298 stat_get_doit_default_counter(struct sk_buff *skb, struct nlmsghdr *nlh,
2299 			      struct netlink_ext_ack *extack,
2300 			      struct nlattr *tb[])
2301 {
2302 	struct rdma_hw_stats *stats;
2303 	struct nlattr *table_attr;
2304 	struct ib_device *device;
2305 	int ret, num_cnts, i;
2306 	struct sk_buff *msg;
2307 	u32 index, port;
2308 	u64 v;
2309 
2310 	if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2311 		return -EINVAL;
2312 
2313 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2314 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2315 	if (!device)
2316 		return -EINVAL;
2317 
2318 	if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) {
2319 		ret = -EINVAL;
2320 		goto err;
2321 	}
2322 
2323 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2324 	stats = ib_get_hw_stats_port(device, port);
2325 	if (!stats) {
2326 		ret = -EINVAL;
2327 		goto err;
2328 	}
2329 
2330 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2331 	if (!msg) {
2332 		ret = -ENOMEM;
2333 		goto err;
2334 	}
2335 
2336 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2337 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2338 					 RDMA_NLDEV_CMD_STAT_GET),
2339 			0, 0);
2340 
2341 	if (!nlh || fill_nldev_handle(msg, device) ||
2342 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
2343 		ret = -EMSGSIZE;
2344 		goto err_msg;
2345 	}
2346 
2347 	mutex_lock(&stats->lock);
2348 
2349 	num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
2350 	if (num_cnts < 0) {
2351 		ret = -EINVAL;
2352 		goto err_stats;
2353 	}
2354 
2355 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
2356 	if (!table_attr) {
2357 		ret = -EMSGSIZE;
2358 		goto err_stats;
2359 	}
2360 	for (i = 0; i < num_cnts; i++) {
2361 		if (test_bit(i, stats->is_disabled))
2362 			continue;
2363 
2364 		v = stats->value[i] +
2365 			rdma_counter_get_hwstat_value(device, port, i);
2366 		if (rdma_nl_stat_hwcounter_entry(msg,
2367 						 stats->descs[i].name, v)) {
2368 			ret = -EMSGSIZE;
2369 			goto err_table;
2370 		}
2371 	}
2372 	nla_nest_end(msg, table_attr);
2373 
2374 	mutex_unlock(&stats->lock);
2375 	nlmsg_end(msg, nlh);
2376 	ib_device_put(device);
2377 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2378 
2379 err_table:
2380 	nla_nest_cancel(msg, table_attr);
2381 err_stats:
2382 	mutex_unlock(&stats->lock);
2383 err_msg:
2384 	nlmsg_free(msg);
2385 err:
2386 	ib_device_put(device);
2387 	return ret;
2388 }
2389 
2390 static noinline_for_stack int
stat_get_doit_qp(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack,struct nlattr * tb[])2391 stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
2392 		 struct netlink_ext_ack *extack, struct nlattr *tb[])
2393 
2394 {
2395 	static enum rdma_nl_counter_mode mode;
2396 	static enum rdma_nl_counter_mask mask;
2397 	struct ib_device *device;
2398 	struct sk_buff *msg;
2399 	u32 index, port;
2400 	bool opcnt;
2401 	int ret;
2402 
2403 	if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
2404 		return nldev_res_get_counter_doit(skb, nlh, extack);
2405 
2406 	if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
2407 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2408 		return -EINVAL;
2409 
2410 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2411 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2412 	if (!device)
2413 		return -EINVAL;
2414 
2415 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2416 	if (!rdma_is_port_valid(device, port)) {
2417 		ret = -EINVAL;
2418 		goto err;
2419 	}
2420 
2421 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2422 	if (!msg) {
2423 		ret = -ENOMEM;
2424 		goto err;
2425 	}
2426 
2427 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2428 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2429 					 RDMA_NLDEV_CMD_STAT_GET),
2430 			0, 0);
2431 	if (!nlh) {
2432 		ret = -EMSGSIZE;
2433 		goto err_msg;
2434 	}
2435 
2436 	ret = rdma_counter_get_mode(device, port, &mode, &mask, &opcnt);
2437 	if (ret)
2438 		goto err_msg;
2439 
2440 	if (fill_nldev_handle(msg, device) ||
2441 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
2442 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) {
2443 		ret = -EMSGSIZE;
2444 		goto err_msg;
2445 	}
2446 
2447 	if ((mode == RDMA_COUNTER_MODE_AUTO) &&
2448 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) {
2449 		ret = -EMSGSIZE;
2450 		goto err_msg;
2451 	}
2452 
2453 	if ((mode == RDMA_COUNTER_MODE_AUTO) &&
2454 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED, opcnt)) {
2455 		ret = -EMSGSIZE;
2456 		goto err_msg;
2457 	}
2458 
2459 	nlmsg_end(msg, nlh);
2460 	ib_device_put(device);
2461 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2462 
2463 err_msg:
2464 	nlmsg_free(msg);
2465 err:
2466 	ib_device_put(device);
2467 	return ret;
2468 }
2469 
nldev_stat_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2470 static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2471 			       struct netlink_ext_ack *extack)
2472 {
2473 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2474 	int ret;
2475 
2476 	ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2477 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
2478 	if (ret)
2479 		return -EINVAL;
2480 
2481 	if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
2482 		return stat_get_doit_default_counter(skb, nlh, extack, tb);
2483 
2484 	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2485 	case RDMA_NLDEV_ATTR_RES_QP:
2486 		ret = stat_get_doit_qp(skb, nlh, extack, tb);
2487 		break;
2488 	case RDMA_NLDEV_ATTR_RES_MR:
2489 		ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR,
2490 					  fill_stat_mr_entry);
2491 		break;
2492 	default:
2493 		ret = -EINVAL;
2494 		break;
2495 	}
2496 
2497 	return ret;
2498 }
2499 
nldev_stat_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)2500 static int nldev_stat_get_dumpit(struct sk_buff *skb,
2501 				 struct netlink_callback *cb)
2502 {
2503 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2504 	int ret;
2505 
2506 	ret = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2507 			    nldev_policy, NL_VALIDATE_LIBERAL, NULL);
2508 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
2509 		return -EINVAL;
2510 
2511 	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2512 	case RDMA_NLDEV_ATTR_RES_QP:
2513 		ret = nldev_res_get_counter_dumpit(skb, cb);
2514 		break;
2515 	case RDMA_NLDEV_ATTR_RES_MR:
2516 		ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR,
2517 					    fill_stat_mr_entry);
2518 		break;
2519 	default:
2520 		ret = -EINVAL;
2521 		break;
2522 	}
2523 
2524 	return ret;
2525 }
2526 
nldev_stat_get_counter_status_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2527 static int nldev_stat_get_counter_status_doit(struct sk_buff *skb,
2528 					      struct nlmsghdr *nlh,
2529 					      struct netlink_ext_ack *extack)
2530 {
2531 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry;
2532 	struct rdma_hw_stats *stats;
2533 	struct ib_device *device;
2534 	struct sk_buff *msg;
2535 	u32 devid, port;
2536 	int ret, i;
2537 
2538 	ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2539 			    nldev_policy, NL_VALIDATE_LIBERAL, extack);
2540 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2541 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2542 		return -EINVAL;
2543 
2544 	devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2545 	device = ib_device_get_by_index(sock_net(skb->sk), devid);
2546 	if (!device)
2547 		return -EINVAL;
2548 
2549 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2550 	if (!rdma_is_port_valid(device, port)) {
2551 		ret = -EINVAL;
2552 		goto err;
2553 	}
2554 
2555 	stats = ib_get_hw_stats_port(device, port);
2556 	if (!stats) {
2557 		ret = -EINVAL;
2558 		goto err;
2559 	}
2560 
2561 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2562 	if (!msg) {
2563 		ret = -ENOMEM;
2564 		goto err;
2565 	}
2566 
2567 	nlh = nlmsg_put(
2568 		msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2569 		RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS),
2570 		0, 0);
2571 
2572 	ret = -EMSGSIZE;
2573 	if (!nlh || fill_nldev_handle(msg, device) ||
2574 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
2575 		goto err_msg;
2576 
2577 	table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
2578 	if (!table)
2579 		goto err_msg;
2580 
2581 	mutex_lock(&stats->lock);
2582 	for (i = 0; i < stats->num_counters; i++) {
2583 		entry = nla_nest_start(msg,
2584 				       RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
2585 		if (!entry)
2586 			goto err_msg_table;
2587 
2588 		if (nla_put_string(msg,
2589 				   RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
2590 				   stats->descs[i].name) ||
2591 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i))
2592 			goto err_msg_entry;
2593 
2594 		if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) &&
2595 		    (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC,
2596 				!test_bit(i, stats->is_disabled))))
2597 			goto err_msg_entry;
2598 
2599 		nla_nest_end(msg, entry);
2600 	}
2601 	mutex_unlock(&stats->lock);
2602 
2603 	nla_nest_end(msg, table);
2604 	nlmsg_end(msg, nlh);
2605 	ib_device_put(device);
2606 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2607 
2608 err_msg_entry:
2609 	nla_nest_cancel(msg, entry);
2610 err_msg_table:
2611 	mutex_unlock(&stats->lock);
2612 	nla_nest_cancel(msg, table);
2613 err_msg:
2614 	nlmsg_free(msg);
2615 err:
2616 	ib_device_put(device);
2617 	return ret;
2618 }
2619 
nldev_newdev(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2620 static int nldev_newdev(struct sk_buff *skb, struct nlmsghdr *nlh,
2621 			struct netlink_ext_ack *extack)
2622 {
2623 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2624 	enum rdma_nl_dev_type type;
2625 	struct ib_device *parent;
2626 	char name[IFNAMSIZ] = {};
2627 	u32 parentid;
2628 	int ret;
2629 
2630 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2631 			  nldev_policy, extack);
2632 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2633 		!tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_DEV_TYPE])
2634 		return -EINVAL;
2635 
2636 	nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(name));
2637 	type = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_TYPE]);
2638 	parentid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2639 	parent = ib_device_get_by_index(sock_net(skb->sk), parentid);
2640 	if (!parent)
2641 		return -EINVAL;
2642 
2643 	ret = ib_add_sub_device(parent, type, name);
2644 	ib_device_put(parent);
2645 
2646 	return ret;
2647 }
2648 
nldev_deldev(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2649 static int nldev_deldev(struct sk_buff *skb, struct nlmsghdr *nlh,
2650 			struct netlink_ext_ack *extack)
2651 {
2652 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2653 	struct ib_device *device;
2654 	u32 devid;
2655 	int ret;
2656 
2657 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2658 			  nldev_policy, extack);
2659 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
2660 		return -EINVAL;
2661 
2662 	devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2663 	device = ib_device_get_by_index(sock_net(skb->sk), devid);
2664 	if (!device)
2665 		return -EINVAL;
2666 
2667 	return ib_del_sub_device_and_put(device);
2668 }
2669 
fill_frmr_pool_key(struct sk_buff * msg,struct ib_frmr_key * key)2670 static int fill_frmr_pool_key(struct sk_buff *msg, struct ib_frmr_key *key)
2671 {
2672 	struct nlattr *key_attr;
2673 
2674 	key_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY);
2675 	if (!key_attr)
2676 		return -EMSGSIZE;
2677 
2678 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS, key->ats))
2679 		goto err;
2680 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS,
2681 			key->access_flags))
2682 		goto err;
2683 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY,
2684 			      key->vendor_key, RDMA_NLDEV_ATTR_PAD))
2685 		goto err;
2686 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS,
2687 			      key->num_dma_blocks, RDMA_NLDEV_ATTR_PAD))
2688 		goto err;
2689 
2690 	if (key->kernel_vendor_key &&
2691 	    nla_put_u64_64bit(msg,
2692 			      RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY,
2693 			      key->kernel_vendor_key, RDMA_NLDEV_ATTR_PAD))
2694 		goto err;
2695 
2696 	nla_nest_end(msg, key_attr);
2697 	return 0;
2698 
2699 err:
2700 	return -EMSGSIZE;
2701 }
2702 
fill_frmr_pool_entry(struct sk_buff * msg,struct ib_frmr_pool * pool)2703 static int fill_frmr_pool_entry(struct sk_buff *msg, struct ib_frmr_pool *pool)
2704 {
2705 	if (fill_frmr_pool_key(msg, &pool->key))
2706 		return -EMSGSIZE;
2707 
2708 	spin_lock(&pool->lock);
2709 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_QUEUE_HANDLES,
2710 			pool->queue.ci + pool->inactive_queue.ci))
2711 		goto err_unlock;
2712 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_MAX_IN_USE,
2713 			      pool->max_in_use, RDMA_NLDEV_ATTR_PAD))
2714 		goto err_unlock;
2715 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_IN_USE,
2716 			      pool->in_use, RDMA_NLDEV_ATTR_PAD))
2717 		goto err_unlock;
2718 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES,
2719 			pool->pinned_handles))
2720 		goto err_unlock;
2721 	spin_unlock(&pool->lock);
2722 
2723 	return 0;
2724 
2725 err_unlock:
2726 	spin_unlock(&pool->lock);
2727 	return -EMSGSIZE;
2728 }
2729 
nldev_frmr_pools_parse_key(struct nlattr * tb[],struct ib_frmr_key * key,struct netlink_ext_ack * extack)2730 static int nldev_frmr_pools_parse_key(struct nlattr *tb[],
2731 				      struct ib_frmr_key *key,
2732 				      struct netlink_ext_ack *extack)
2733 {
2734 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS])
2735 		key->ats = nla_get_u8(tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS]);
2736 
2737 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS])
2738 		key->access_flags = nla_get_u32(
2739 			tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS]);
2740 
2741 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY])
2742 		key->vendor_key = nla_get_u64(
2743 			tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY]);
2744 
2745 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS])
2746 		key->num_dma_blocks = nla_get_u64(
2747 			tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS]);
2748 
2749 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY])
2750 		return -EINVAL;
2751 
2752 	return 0;
2753 }
2754 
nldev_frmr_pools_set_pinned(struct ib_device * device,struct nlattr * tb[],struct netlink_ext_ack * extack)2755 static int nldev_frmr_pools_set_pinned(struct ib_device *device,
2756 				       struct nlattr *tb[],
2757 				       struct netlink_ext_ack *extack)
2758 {
2759 	struct nlattr *key_tb[RDMA_NLDEV_ATTR_MAX];
2760 	struct ib_frmr_key key = { 0 };
2761 	u32 pinned_handles = 0;
2762 	int err = 0;
2763 
2764 	pinned_handles =
2765 		nla_get_u32(tb[RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES]);
2766 
2767 	if (!tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY])
2768 		return -EINVAL;
2769 
2770 	err = nla_parse_nested(key_tb, RDMA_NLDEV_ATTR_MAX - 1,
2771 			       tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY], nldev_policy,
2772 			       extack);
2773 	if (err)
2774 		return err;
2775 
2776 	err = nldev_frmr_pools_parse_key(key_tb, &key, extack);
2777 	if (err)
2778 		return err;
2779 
2780 	err = ib_frmr_pools_set_pinned(device, &key, pinned_handles);
2781 
2782 	return err;
2783 }
2784 
nldev_frmr_pools_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)2785 static int nldev_frmr_pools_get_dumpit(struct sk_buff *skb,
2786 				       struct netlink_callback *cb)
2787 {
2788 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2789 	struct ib_frmr_pools *pools;
2790 	int err, ret = 0, idx = 0;
2791 	struct ib_frmr_pool *pool;
2792 	struct nlattr *table_attr;
2793 	struct nlattr *entry_attr;
2794 	bool show_details = false;
2795 	struct ib_device *device;
2796 	int start = cb->args[0];
2797 	struct rb_node *node;
2798 	struct nlmsghdr *nlh;
2799 	bool filled = false;
2800 
2801 	err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2802 			    nldev_policy, NL_VALIDATE_LIBERAL, NULL);
2803 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
2804 		return -EINVAL;
2805 
2806 	device = ib_device_get_by_index(
2807 		sock_net(skb->sk), nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]));
2808 	if (!device)
2809 		return -EINVAL;
2810 
2811 	if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
2812 		show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
2813 
2814 	pools = device->frmr_pools;
2815 	if (!pools) {
2816 		ib_device_put(device);
2817 		return 0;
2818 	}
2819 
2820 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2821 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2822 					 RDMA_NLDEV_CMD_FRMR_POOLS_GET),
2823 			0, NLM_F_MULTI);
2824 
2825 	if (!nlh || fill_nldev_handle(skb, device)) {
2826 		ret = -EMSGSIZE;
2827 		goto err;
2828 	}
2829 
2830 	table_attr = nla_nest_start_noflag(skb, RDMA_NLDEV_ATTR_FRMR_POOLS);
2831 	if (!table_attr) {
2832 		ret = -EMSGSIZE;
2833 		goto err;
2834 	}
2835 
2836 	read_lock(&pools->rb_lock);
2837 	for (node = rb_first(&pools->rb_root); node; node = rb_next(node)) {
2838 		pool = rb_entry(node, struct ib_frmr_pool, node);
2839 		if (pool->key.kernel_vendor_key && !show_details)
2840 			continue;
2841 
2842 		if (idx < start) {
2843 			idx++;
2844 			continue;
2845 		}
2846 
2847 		filled = true;
2848 
2849 		entry_attr = nla_nest_start_noflag(
2850 			skb, RDMA_NLDEV_ATTR_FRMR_POOL_ENTRY);
2851 		if (!entry_attr) {
2852 			ret = -EMSGSIZE;
2853 			goto end_msg;
2854 		}
2855 
2856 		if (fill_frmr_pool_entry(skb, pool)) {
2857 			nla_nest_cancel(skb, entry_attr);
2858 			ret = -EMSGSIZE;
2859 			goto end_msg;
2860 		}
2861 
2862 		nla_nest_end(skb, entry_attr);
2863 		idx++;
2864 	}
2865 end_msg:
2866 	read_unlock(&pools->rb_lock);
2867 
2868 	nla_nest_end(skb, table_attr);
2869 	nlmsg_end(skb, nlh);
2870 	cb->args[0] = idx;
2871 
2872 	/*
2873 	 * No more entries to fill, cancel the message and
2874 	 * return 0 to mark end of dumpit.
2875 	 */
2876 	if (!filled)
2877 		goto err;
2878 
2879 	ib_device_put(device);
2880 	return skb->len;
2881 
2882 err:
2883 	nlmsg_cancel(skb, nlh);
2884 	ib_device_put(device);
2885 	return ret;
2886 }
2887 
nldev_frmr_pools_set_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2888 static int nldev_frmr_pools_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2889 				     struct netlink_ext_ack *extack)
2890 {
2891 	struct ib_device *device;
2892 	struct nlattr **tb;
2893 	u32 aging_period;
2894 	int err;
2895 
2896 	tb = kzalloc_objs(*tb, RDMA_NLDEV_ATTR_MAX, GFP_KERNEL);
2897 	if (!tb)
2898 		return -ENOMEM;
2899 
2900 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
2901 			  extack);
2902 	if (err)
2903 		goto free_tb;
2904 
2905 	if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
2906 		err = -EINVAL;
2907 		goto free_tb;
2908 	}
2909 
2910 	device = ib_device_get_by_index(
2911 		sock_net(skb->sk), nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]));
2912 	if (!device) {
2913 		err = -EINVAL;
2914 		goto free_tb;
2915 	}
2916 
2917 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD]) {
2918 		aging_period = nla_get_u32(
2919 			tb[RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD]);
2920 		err = ib_frmr_pools_set_aging_period(device, aging_period);
2921 		goto done;
2922 	}
2923 
2924 	if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES])
2925 		err = nldev_frmr_pools_set_pinned(device, tb, extack);
2926 
2927 done:
2928 	ib_device_put(device);
2929 free_tb:
2930 	kfree(tb);
2931 	return err;
2932 }
2933 
2934 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
2935 	[RDMA_NLDEV_CMD_GET] = {
2936 		.doit = nldev_get_doit,
2937 		.dump = nldev_get_dumpit,
2938 	},
2939 	[RDMA_NLDEV_CMD_GET_CHARDEV] = {
2940 		.doit = nldev_get_chardev,
2941 	},
2942 	[RDMA_NLDEV_CMD_SET] = {
2943 		.doit = nldev_set_doit,
2944 		.flags = RDMA_NL_ADMIN_PERM,
2945 	},
2946 	[RDMA_NLDEV_CMD_NEWLINK] = {
2947 		.doit = nldev_newlink,
2948 		.flags = RDMA_NL_ADMIN_PERM,
2949 	},
2950 	[RDMA_NLDEV_CMD_DELLINK] = {
2951 		.doit = nldev_dellink,
2952 		.flags = RDMA_NL_ADMIN_PERM,
2953 	},
2954 	[RDMA_NLDEV_CMD_PORT_GET] = {
2955 		.doit = nldev_port_get_doit,
2956 		.dump = nldev_port_get_dumpit,
2957 	},
2958 	[RDMA_NLDEV_CMD_RES_GET] = {
2959 		.doit = nldev_res_get_doit,
2960 		.dump = nldev_res_get_dumpit,
2961 	},
2962 	[RDMA_NLDEV_CMD_RES_QP_GET] = {
2963 		.doit = nldev_res_get_qp_doit,
2964 		.dump = nldev_res_get_qp_dumpit,
2965 	},
2966 	[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
2967 		.doit = nldev_res_get_cm_id_doit,
2968 		.dump = nldev_res_get_cm_id_dumpit,
2969 	},
2970 	[RDMA_NLDEV_CMD_RES_CQ_GET] = {
2971 		.doit = nldev_res_get_cq_doit,
2972 		.dump = nldev_res_get_cq_dumpit,
2973 	},
2974 	[RDMA_NLDEV_CMD_RES_MR_GET] = {
2975 		.doit = nldev_res_get_mr_doit,
2976 		.dump = nldev_res_get_mr_dumpit,
2977 	},
2978 	[RDMA_NLDEV_CMD_RES_PD_GET] = {
2979 		.doit = nldev_res_get_pd_doit,
2980 		.dump = nldev_res_get_pd_dumpit,
2981 	},
2982 	[RDMA_NLDEV_CMD_RES_CTX_GET] = {
2983 		.doit = nldev_res_get_ctx_doit,
2984 		.dump = nldev_res_get_ctx_dumpit,
2985 	},
2986 	[RDMA_NLDEV_CMD_RES_SRQ_GET] = {
2987 		.doit = nldev_res_get_srq_doit,
2988 		.dump = nldev_res_get_srq_dumpit,
2989 	},
2990 	[RDMA_NLDEV_CMD_SYS_GET] = {
2991 		.doit = nldev_sys_get_doit,
2992 	},
2993 	[RDMA_NLDEV_CMD_SYS_SET] = {
2994 		.doit = nldev_set_sys_set_doit,
2995 		.flags = RDMA_NL_ADMIN_PERM,
2996 	},
2997 	[RDMA_NLDEV_CMD_STAT_SET] = {
2998 		.doit = nldev_stat_set_doit,
2999 		.flags = RDMA_NL_ADMIN_PERM,
3000 	},
3001 	[RDMA_NLDEV_CMD_STAT_GET] = {
3002 		.doit = nldev_stat_get_doit,
3003 		.dump = nldev_stat_get_dumpit,
3004 	},
3005 	[RDMA_NLDEV_CMD_STAT_DEL] = {
3006 		.doit = nldev_stat_del_doit,
3007 		.flags = RDMA_NL_ADMIN_PERM,
3008 	},
3009 	[RDMA_NLDEV_CMD_RES_QP_GET_RAW] = {
3010 		.doit = nldev_res_get_qp_raw_doit,
3011 		.dump = nldev_res_get_qp_raw_dumpit,
3012 		.flags = RDMA_NL_ADMIN_PERM,
3013 	},
3014 	[RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = {
3015 		.doit = nldev_res_get_cq_raw_doit,
3016 		.dump = nldev_res_get_cq_raw_dumpit,
3017 		.flags = RDMA_NL_ADMIN_PERM,
3018 	},
3019 	[RDMA_NLDEV_CMD_RES_MR_GET_RAW] = {
3020 		.doit = nldev_res_get_mr_raw_doit,
3021 		.dump = nldev_res_get_mr_raw_dumpit,
3022 		.flags = RDMA_NL_ADMIN_PERM,
3023 	},
3024 	[RDMA_NLDEV_CMD_RES_SRQ_GET_RAW] = {
3025 		.doit = nldev_res_get_srq_raw_doit,
3026 		.dump = nldev_res_get_srq_raw_dumpit,
3027 		.flags = RDMA_NL_ADMIN_PERM,
3028 	},
3029 	[RDMA_NLDEV_CMD_STAT_GET_STATUS] = {
3030 		.doit = nldev_stat_get_counter_status_doit,
3031 	},
3032 	[RDMA_NLDEV_CMD_NEWDEV] = {
3033 		.doit = nldev_newdev,
3034 		.flags = RDMA_NL_ADMIN_PERM,
3035 	},
3036 	[RDMA_NLDEV_CMD_DELDEV] = {
3037 		.doit = nldev_deldev,
3038 		.flags = RDMA_NL_ADMIN_PERM,
3039 	},
3040 	[RDMA_NLDEV_CMD_FRMR_POOLS_GET] = {
3041 		.dump = nldev_frmr_pools_get_dumpit,
3042 	},
3043 	[RDMA_NLDEV_CMD_FRMR_POOLS_SET] = {
3044 		.doit = nldev_frmr_pools_set_doit,
3045 		.flags = RDMA_NL_ADMIN_PERM,
3046 	},
3047 };
3048 
fill_mon_netdev_rename(struct sk_buff * msg,struct ib_device * device,u32 port,const struct net * net)3049 static int fill_mon_netdev_rename(struct sk_buff *msg,
3050 				  struct ib_device *device, u32 port,
3051 				  const struct net *net)
3052 {
3053 	struct net_device *netdev = ib_device_get_netdev(device, port);
3054 	int ret = 0;
3055 
3056 	if (!netdev || !net_eq(dev_net(netdev), net))
3057 		goto out;
3058 
3059 	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
3060 	if (ret)
3061 		goto out;
3062 	ret = nla_put_string(msg, RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
3063 out:
3064 	dev_put(netdev);
3065 	return ret;
3066 }
3067 
fill_mon_netdev_association(struct sk_buff * msg,struct ib_device * device,u32 port,const struct net * net)3068 static int fill_mon_netdev_association(struct sk_buff *msg,
3069 				       struct ib_device *device, u32 port,
3070 				       const struct net *net)
3071 {
3072 	struct net_device *netdev = ib_device_get_netdev(device, port);
3073 	int ret = 0;
3074 
3075 	if (netdev && !net_eq(dev_net(netdev), net))
3076 		goto out;
3077 
3078 	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index);
3079 	if (ret)
3080 		goto out;
3081 
3082 	ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
3083 			     dev_name(&device->dev));
3084 	if (ret)
3085 		goto out;
3086 
3087 	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port);
3088 	if (ret)
3089 		goto out;
3090 
3091 	if (netdev) {
3092 		ret = nla_put_u32(msg,
3093 				  RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
3094 		if (ret)
3095 			goto out;
3096 
3097 		ret = nla_put_string(msg,
3098 				     RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
3099 	}
3100 
3101 out:
3102 	dev_put(netdev);
3103 	return ret;
3104 }
3105 
rdma_nl_notify_err_msg(struct ib_device * device,u32 port_num,enum rdma_nl_notify_event_type type)3106 static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num,
3107 				    enum rdma_nl_notify_event_type type)
3108 {
3109 	struct net_device *netdev;
3110 
3111 	switch (type) {
3112 	case RDMA_REGISTER_EVENT:
3113 		dev_warn_ratelimited(&device->dev,
3114 				     "Failed to send RDMA monitor register device event\n");
3115 		break;
3116 	case RDMA_UNREGISTER_EVENT:
3117 		dev_warn_ratelimited(&device->dev,
3118 				     "Failed to send RDMA monitor unregister device event\n");
3119 		break;
3120 	case RDMA_NETDEV_ATTACH_EVENT:
3121 		netdev = ib_device_get_netdev(device, port_num);
3122 		dev_warn_ratelimited(&device->dev,
3123 				     "Failed to send RDMA monitor netdev attach event: port %d netdev %d\n",
3124 				     port_num, netdev->ifindex);
3125 		dev_put(netdev);
3126 		break;
3127 	case RDMA_NETDEV_DETACH_EVENT:
3128 		dev_warn_ratelimited(&device->dev,
3129 				     "Failed to send RDMA monitor netdev detach event: port %d\n",
3130 				     port_num);
3131 		break;
3132 	case RDMA_RENAME_EVENT:
3133 		dev_warn_ratelimited(&device->dev,
3134 				     "Failed to send RDMA monitor rename device event\n");
3135 		break;
3136 
3137 	case RDMA_NETDEV_RENAME_EVENT:
3138 		netdev = ib_device_get_netdev(device, port_num);
3139 		dev_warn_ratelimited(&device->dev,
3140 				     "Failed to send RDMA monitor netdev rename event: port %d netdev %d\n",
3141 				     port_num, netdev->ifindex);
3142 		dev_put(netdev);
3143 		break;
3144 	default:
3145 		break;
3146 	}
3147 }
3148 
rdma_nl_notify_event(struct ib_device * device,u32 port_num,enum rdma_nl_notify_event_type type)3149 int rdma_nl_notify_event(struct ib_device *device, u32 port_num,
3150 			  enum rdma_nl_notify_event_type type)
3151 {
3152 	struct sk_buff *skb;
3153 	int ret = -EMSGSIZE;
3154 	struct net *net;
3155 	void *nlh;
3156 
3157 	net = read_pnet(&device->coredev.rdma_net);
3158 	if (!net)
3159 		return -EINVAL;
3160 
3161 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3162 	if (!skb)
3163 		return -ENOMEM;
3164 	nlh = nlmsg_put(skb, 0, 0,
3165 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR),
3166 			0, 0);
3167 	if (!nlh)
3168 		goto err_free;
3169 
3170 	switch (type) {
3171 	case RDMA_REGISTER_EVENT:
3172 	case RDMA_UNREGISTER_EVENT:
3173 	case RDMA_RENAME_EVENT:
3174 		ret = fill_nldev_handle(skb, device);
3175 		if (ret)
3176 			goto err_free;
3177 		break;
3178 	case RDMA_NETDEV_ATTACH_EVENT:
3179 	case RDMA_NETDEV_DETACH_EVENT:
3180 		ret = fill_mon_netdev_association(skb, device, port_num, net);
3181 		if (ret)
3182 			goto err_free;
3183 		break;
3184 	case RDMA_NETDEV_RENAME_EVENT:
3185 		ret = fill_mon_netdev_rename(skb, device, port_num, net);
3186 		if (ret)
3187 			goto err_free;
3188 		break;
3189 	default:
3190 		break;
3191 	}
3192 
3193 	ret = nla_put_u8(skb, RDMA_NLDEV_ATTR_EVENT_TYPE, type);
3194 	if (ret)
3195 		goto err_free;
3196 
3197 	nlmsg_end(skb, nlh);
3198 	ret = rdma_nl_multicast(net, skb, RDMA_NL_GROUP_NOTIFY, GFP_KERNEL);
3199 	if (ret && ret != -ESRCH) {
3200 		skb = NULL; /* skb is freed in the netlink send-op handling */
3201 		goto err_free;
3202 	}
3203 	return 0;
3204 
3205 err_free:
3206 	rdma_nl_notify_err_msg(device, port_num, type);
3207 	nlmsg_free(skb);
3208 	return ret;
3209 }
3210 
nldev_init(void)3211 void __init nldev_init(void)
3212 {
3213 	rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
3214 }
3215 
nldev_exit(void)3216 void nldev_exit(void)
3217 {
3218 	rdma_nl_unregister(RDMA_NL_NLDEV);
3219 }
3220 
3221 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
3222