xref: /linux/drivers/infiniband/core/nldev.c (revision 55f3538c4923e9dfca132e99ebec370e8094afda)
1 /*
2  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. Neither the names of the copyright holders nor the names of its
13  *    contributors may be used to endorse or promote products derived from
14  *    this software without specific prior written permission.
15  *
16  * Alternatively, this software may be distributed under the terms of the
17  * GNU General Public License ("GPL") version 2 as published by the Free
18  * Software Foundation.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <linux/module.h>
34 #include <linux/pid.h>
35 #include <linux/pid_namespace.h>
36 #include <net/netlink.h>
37 #include <rdma/rdma_netlink.h>
38 
39 #include "core_priv.h"
40 
41 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
42 	[RDMA_NLDEV_ATTR_DEV_INDEX]     = { .type = NLA_U32 },
43 	[RDMA_NLDEV_ATTR_DEV_NAME]	= { .type = NLA_NUL_STRING,
44 					    .len = IB_DEVICE_NAME_MAX - 1},
45 	[RDMA_NLDEV_ATTR_PORT_INDEX]	= { .type = NLA_U32 },
46 	[RDMA_NLDEV_ATTR_FW_VERSION]	= { .type = NLA_NUL_STRING,
47 					    .len = IB_FW_VERSION_NAME_MAX - 1},
48 	[RDMA_NLDEV_ATTR_NODE_GUID]	= { .type = NLA_U64 },
49 	[RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 },
50 	[RDMA_NLDEV_ATTR_SUBNET_PREFIX]	= { .type = NLA_U64 },
51 	[RDMA_NLDEV_ATTR_LID]		= { .type = NLA_U32 },
52 	[RDMA_NLDEV_ATTR_SM_LID]	= { .type = NLA_U32 },
53 	[RDMA_NLDEV_ATTR_LMC]		= { .type = NLA_U8 },
54 	[RDMA_NLDEV_ATTR_PORT_STATE]	= { .type = NLA_U8 },
55 	[RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
56 	[RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
57 	[RDMA_NLDEV_ATTR_RES_SUMMARY]	= { .type = NLA_NESTED },
58 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]	= { .type = NLA_NESTED },
59 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = { .type = NLA_NUL_STRING,
60 					     .len = 16 },
61 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = { .type = NLA_U64 },
62 	[RDMA_NLDEV_ATTR_RES_QP]		= { .type = NLA_NESTED },
63 	[RDMA_NLDEV_ATTR_RES_QP_ENTRY]		= { .type = NLA_NESTED },
64 	[RDMA_NLDEV_ATTR_RES_LQPN]		= { .type = NLA_U32 },
65 	[RDMA_NLDEV_ATTR_RES_RQPN]		= { .type = NLA_U32 },
66 	[RDMA_NLDEV_ATTR_RES_RQ_PSN]		= { .type = NLA_U32 },
67 	[RDMA_NLDEV_ATTR_RES_SQ_PSN]		= { .type = NLA_U32 },
68 	[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
69 	[RDMA_NLDEV_ATTR_RES_TYPE]		= { .type = NLA_U8 },
70 	[RDMA_NLDEV_ATTR_RES_STATE]		= { .type = NLA_U8 },
71 	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
72 	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING,
73 						    .len = TASK_COMM_LEN },
74 };
75 
76 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
77 {
78 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
79 		return -EMSGSIZE;
80 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
81 		return -EMSGSIZE;
82 
83 	return 0;
84 }
85 
86 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
87 {
88 	char fw[IB_FW_VERSION_NAME_MAX];
89 
90 	if (fill_nldev_handle(msg, device))
91 		return -EMSGSIZE;
92 
93 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
94 		return -EMSGSIZE;
95 
96 	BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
97 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
98 			      device->attrs.device_cap_flags, 0))
99 		return -EMSGSIZE;
100 
101 	ib_get_device_fw_str(device, fw);
102 	/* Device without FW has strlen(fw) */
103 	if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
104 		return -EMSGSIZE;
105 
106 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
107 			      be64_to_cpu(device->node_guid), 0))
108 		return -EMSGSIZE;
109 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
110 			      be64_to_cpu(device->attrs.sys_image_guid), 0))
111 		return -EMSGSIZE;
112 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
113 		return -EMSGSIZE;
114 	return 0;
115 }
116 
117 static int fill_port_info(struct sk_buff *msg,
118 			  struct ib_device *device, u32 port)
119 {
120 	struct ib_port_attr attr;
121 	int ret;
122 
123 	if (fill_nldev_handle(msg, device))
124 		return -EMSGSIZE;
125 
126 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
127 		return -EMSGSIZE;
128 
129 	ret = ib_query_port(device, port, &attr);
130 	if (ret)
131 		return ret;
132 
133 	BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64));
134 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
135 			      (u64)attr.port_cap_flags, 0))
136 		return -EMSGSIZE;
137 	if (rdma_protocol_ib(device, port) &&
138 	    nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
139 			      attr.subnet_prefix, 0))
140 		return -EMSGSIZE;
141 	if (rdma_protocol_ib(device, port)) {
142 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
143 			return -EMSGSIZE;
144 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
145 			return -EMSGSIZE;
146 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
147 			return -EMSGSIZE;
148 	}
149 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
150 		return -EMSGSIZE;
151 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
152 		return -EMSGSIZE;
153 	return 0;
154 }
155 
156 static int fill_res_info_entry(struct sk_buff *msg,
157 			       const char *name, u64 curr)
158 {
159 	struct nlattr *entry_attr;
160 
161 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
162 	if (!entry_attr)
163 		return -EMSGSIZE;
164 
165 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
166 		goto err;
167 	if (nla_put_u64_64bit(msg,
168 			      RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr, 0))
169 		goto err;
170 
171 	nla_nest_end(msg, entry_attr);
172 	return 0;
173 
174 err:
175 	nla_nest_cancel(msg, entry_attr);
176 	return -EMSGSIZE;
177 }
178 
179 static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
180 {
181 	static const char * const names[RDMA_RESTRACK_MAX] = {
182 		[RDMA_RESTRACK_PD] = "pd",
183 		[RDMA_RESTRACK_CQ] = "cq",
184 		[RDMA_RESTRACK_QP] = "qp",
185 	};
186 
187 	struct rdma_restrack_root *res = &device->res;
188 	struct nlattr *table_attr;
189 	int ret, i, curr;
190 
191 	if (fill_nldev_handle(msg, device))
192 		return -EMSGSIZE;
193 
194 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
195 	if (!table_attr)
196 		return -EMSGSIZE;
197 
198 	for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
199 		if (!names[i])
200 			continue;
201 		curr = rdma_restrack_count(res, i, task_active_pid_ns(current));
202 		ret = fill_res_info_entry(msg, names[i], curr);
203 		if (ret)
204 			goto err;
205 	}
206 
207 	nla_nest_end(msg, table_attr);
208 	return 0;
209 
210 err:
211 	nla_nest_cancel(msg, table_attr);
212 	return ret;
213 }
214 
215 static int fill_res_qp_entry(struct sk_buff *msg,
216 			     struct ib_qp *qp, uint32_t port)
217 {
218 	struct rdma_restrack_entry *res = &qp->res;
219 	struct ib_qp_init_attr qp_init_attr;
220 	struct nlattr *entry_attr;
221 	struct ib_qp_attr qp_attr;
222 	int ret;
223 
224 	ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
225 	if (ret)
226 		return ret;
227 
228 	if (port && port != qp_attr.port_num)
229 		return 0;
230 
231 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
232 	if (!entry_attr)
233 		goto out;
234 
235 	/* In create_qp() port is not set yet */
236 	if (qp_attr.port_num &&
237 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
238 		goto err;
239 
240 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
241 		goto err;
242 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
243 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
244 				qp_attr.dest_qp_num))
245 			goto err;
246 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
247 				qp_attr.rq_psn))
248 			goto err;
249 	}
250 
251 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
252 		goto err;
253 
254 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
255 	    qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
256 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
257 			       qp_attr.path_mig_state))
258 			goto err;
259 	}
260 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
261 		goto err;
262 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
263 		goto err;
264 
265 	/*
266 	 * Existence of task means that it is user QP and netlink
267 	 * user is invited to go and read /proc/PID/comm to get name
268 	 * of the task file and res->task_com should be NULL.
269 	 */
270 	if (rdma_is_kernel_res(res)) {
271 		if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, res->kern_name))
272 			goto err;
273 	} else {
274 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, task_pid_vnr(res->task)))
275 			goto err;
276 	}
277 
278 	nla_nest_end(msg, entry_attr);
279 	return 0;
280 
281 err:
282 	nla_nest_cancel(msg, entry_attr);
283 out:
284 	return -EMSGSIZE;
285 }
286 
287 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
288 			  struct netlink_ext_ack *extack)
289 {
290 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
291 	struct ib_device *device;
292 	struct sk_buff *msg;
293 	u32 index;
294 	int err;
295 
296 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
297 			  nldev_policy, extack);
298 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
299 		return -EINVAL;
300 
301 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
302 
303 	device = ib_device_get_by_index(index);
304 	if (!device)
305 		return -EINVAL;
306 
307 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
308 	if (!msg) {
309 		err = -ENOMEM;
310 		goto err;
311 	}
312 
313 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
314 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
315 			0, 0);
316 
317 	err = fill_dev_info(msg, device);
318 	if (err)
319 		goto err_free;
320 
321 	nlmsg_end(msg, nlh);
322 
323 	put_device(&device->dev);
324 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
325 
326 err_free:
327 	nlmsg_free(msg);
328 err:
329 	put_device(&device->dev);
330 	return err;
331 }
332 
333 static int _nldev_get_dumpit(struct ib_device *device,
334 			     struct sk_buff *skb,
335 			     struct netlink_callback *cb,
336 			     unsigned int idx)
337 {
338 	int start = cb->args[0];
339 	struct nlmsghdr *nlh;
340 
341 	if (idx < start)
342 		return 0;
343 
344 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
345 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
346 			0, NLM_F_MULTI);
347 
348 	if (fill_dev_info(skb, device)) {
349 		nlmsg_cancel(skb, nlh);
350 		goto out;
351 	}
352 
353 	nlmsg_end(skb, nlh);
354 
355 	idx++;
356 
357 out:	cb->args[0] = idx;
358 	return skb->len;
359 }
360 
361 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
362 {
363 	/*
364 	 * There is no need to take lock, because
365 	 * we are relying on ib_core's lists_rwsem
366 	 */
367 	return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
368 }
369 
370 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
371 			       struct netlink_ext_ack *extack)
372 {
373 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
374 	struct ib_device *device;
375 	struct sk_buff *msg;
376 	u32 index;
377 	u32 port;
378 	int err;
379 
380 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
381 			  nldev_policy, extack);
382 	if (err ||
383 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
384 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
385 		return -EINVAL;
386 
387 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
388 	device = ib_device_get_by_index(index);
389 	if (!device)
390 		return -EINVAL;
391 
392 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
393 	if (!rdma_is_port_valid(device, port)) {
394 		err = -EINVAL;
395 		goto err;
396 	}
397 
398 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
399 	if (!msg) {
400 		err = -ENOMEM;
401 		goto err;
402 	}
403 
404 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
405 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
406 			0, 0);
407 
408 	err = fill_port_info(msg, device, port);
409 	if (err)
410 		goto err_free;
411 
412 	nlmsg_end(msg, nlh);
413 	put_device(&device->dev);
414 
415 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
416 
417 err_free:
418 	nlmsg_free(msg);
419 err:
420 	put_device(&device->dev);
421 	return err;
422 }
423 
424 static int nldev_port_get_dumpit(struct sk_buff *skb,
425 				 struct netlink_callback *cb)
426 {
427 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
428 	struct ib_device *device;
429 	int start = cb->args[0];
430 	struct nlmsghdr *nlh;
431 	u32 idx = 0;
432 	u32 ifindex;
433 	int err;
434 	u32 p;
435 
436 	err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
437 			  nldev_policy, NULL);
438 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
439 		return -EINVAL;
440 
441 	ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
442 	device = ib_device_get_by_index(ifindex);
443 	if (!device)
444 		return -EINVAL;
445 
446 	for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
447 		/*
448 		 * The dumpit function returns all information from specific
449 		 * index. This specific index is taken from the netlink
450 		 * messages request sent by user and it is available
451 		 * in cb->args[0].
452 		 *
453 		 * Usually, the user doesn't fill this field and it causes
454 		 * to return everything.
455 		 *
456 		 */
457 		if (idx < start) {
458 			idx++;
459 			continue;
460 		}
461 
462 		nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
463 				cb->nlh->nlmsg_seq,
464 				RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
465 						 RDMA_NLDEV_CMD_PORT_GET),
466 				0, NLM_F_MULTI);
467 
468 		if (fill_port_info(skb, device, p)) {
469 			nlmsg_cancel(skb, nlh);
470 			goto out;
471 		}
472 		idx++;
473 		nlmsg_end(skb, nlh);
474 	}
475 
476 out:
477 	put_device(&device->dev);
478 	cb->args[0] = idx;
479 	return skb->len;
480 }
481 
482 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
483 			      struct netlink_ext_ack *extack)
484 {
485 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
486 	struct ib_device *device;
487 	struct sk_buff *msg;
488 	u32 index;
489 	int ret;
490 
491 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
492 			  nldev_policy, extack);
493 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
494 		return -EINVAL;
495 
496 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
497 	device = ib_device_get_by_index(index);
498 	if (!device)
499 		return -EINVAL;
500 
501 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
502 	if (!msg)
503 		goto err;
504 
505 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
506 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
507 			0, 0);
508 
509 	ret = fill_res_info(msg, device);
510 	if (ret)
511 		goto err_free;
512 
513 	nlmsg_end(msg, nlh);
514 	put_device(&device->dev);
515 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
516 
517 err_free:
518 	nlmsg_free(msg);
519 err:
520 	put_device(&device->dev);
521 	return ret;
522 }
523 
524 static int _nldev_res_get_dumpit(struct ib_device *device,
525 				 struct sk_buff *skb,
526 				 struct netlink_callback *cb,
527 				 unsigned int idx)
528 {
529 	int start = cb->args[0];
530 	struct nlmsghdr *nlh;
531 
532 	if (idx < start)
533 		return 0;
534 
535 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
536 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
537 			0, NLM_F_MULTI);
538 
539 	if (fill_res_info(skb, device)) {
540 		nlmsg_cancel(skb, nlh);
541 		goto out;
542 	}
543 
544 	nlmsg_end(skb, nlh);
545 
546 	idx++;
547 
548 out:
549 	cb->args[0] = idx;
550 	return skb->len;
551 }
552 
553 static int nldev_res_get_dumpit(struct sk_buff *skb,
554 				struct netlink_callback *cb)
555 {
556 	return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
557 }
558 
559 static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
560 				   struct netlink_callback *cb)
561 {
562 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
563 	struct rdma_restrack_entry *res;
564 	int err, ret = 0, idx = 0;
565 	struct nlattr *table_attr;
566 	struct ib_device *device;
567 	int start = cb->args[0];
568 	struct ib_qp *qp = NULL;
569 	struct nlmsghdr *nlh;
570 	u32 index, port = 0;
571 
572 	err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
573 			  nldev_policy, NULL);
574 	/*
575 	 * Right now, we are expecting the device index to get QP information,
576 	 * but it is possible to extend this code to return all devices in
577 	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
578 	 * if it doesn't exist, we will iterate over all devices.
579 	 *
580 	 * But it is not needed for now.
581 	 */
582 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
583 		return -EINVAL;
584 
585 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
586 	device = ib_device_get_by_index(index);
587 	if (!device)
588 		return -EINVAL;
589 
590 	/*
591 	 * If no PORT_INDEX is supplied, we will return all QPs from that device
592 	 */
593 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
594 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
595 		if (!rdma_is_port_valid(device, port)) {
596 			ret = -EINVAL;
597 			goto err_index;
598 		}
599 	}
600 
601 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
602 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_QP_GET),
603 			0, NLM_F_MULTI);
604 
605 	if (fill_nldev_handle(skb, device)) {
606 		ret = -EMSGSIZE;
607 		goto err;
608 	}
609 
610 	table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_QP);
611 	if (!table_attr) {
612 		ret = -EMSGSIZE;
613 		goto err;
614 	}
615 
616 	down_read(&device->res.rwsem);
617 	hash_for_each_possible(device->res.hash, res, node, RDMA_RESTRACK_QP) {
618 		if (idx < start)
619 			goto next;
620 
621 		if ((rdma_is_kernel_res(res) &&
622 		     task_active_pid_ns(current) != &init_pid_ns) ||
623 		    (!rdma_is_kernel_res(res) &&
624 		     task_active_pid_ns(current) != task_active_pid_ns(res->task)))
625 			/*
626 			 * 1. Kernel QPs should be visible in init namspace only
627 			 * 2. Present only QPs visible in the current namespace
628 			 */
629 			goto next;
630 
631 		if (!rdma_restrack_get(res))
632 			/*
633 			 * Resource is under release now, but we are not
634 			 * relesing lock now, so it will be released in
635 			 * our next pass, once we will get ->next pointer.
636 			 */
637 			goto next;
638 
639 		qp = container_of(res, struct ib_qp, res);
640 
641 		up_read(&device->res.rwsem);
642 		ret = fill_res_qp_entry(skb, qp, port);
643 		down_read(&device->res.rwsem);
644 		/*
645 		 * Return resource back, but it won't be released till
646 		 * the &device->res.rwsem will be released for write.
647 		 */
648 		rdma_restrack_put(res);
649 
650 		if (ret == -EMSGSIZE)
651 			/*
652 			 * There is a chance to optimize here.
653 			 * It can be done by using list_prepare_entry
654 			 * and list_for_each_entry_continue afterwards.
655 			 */
656 			break;
657 		if (ret)
658 			goto res_err;
659 next:		idx++;
660 	}
661 	up_read(&device->res.rwsem);
662 
663 	nla_nest_end(skb, table_attr);
664 	nlmsg_end(skb, nlh);
665 	cb->args[0] = idx;
666 
667 	/*
668 	 * No more QPs to fill, cancel the message and
669 	 * return 0 to mark end of dumpit.
670 	 */
671 	if (!qp)
672 		goto err;
673 
674 	put_device(&device->dev);
675 	return skb->len;
676 
677 res_err:
678 	nla_nest_cancel(skb, table_attr);
679 	up_read(&device->res.rwsem);
680 
681 err:
682 	nlmsg_cancel(skb, nlh);
683 
684 err_index:
685 	put_device(&device->dev);
686 	return ret;
687 }
688 
689 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
690 	[RDMA_NLDEV_CMD_GET] = {
691 		.doit = nldev_get_doit,
692 		.dump = nldev_get_dumpit,
693 	},
694 	[RDMA_NLDEV_CMD_PORT_GET] = {
695 		.doit = nldev_port_get_doit,
696 		.dump = nldev_port_get_dumpit,
697 	},
698 	[RDMA_NLDEV_CMD_RES_GET] = {
699 		.doit = nldev_res_get_doit,
700 		.dump = nldev_res_get_dumpit,
701 	},
702 	[RDMA_NLDEV_CMD_RES_QP_GET] = {
703 		.dump = nldev_res_get_qp_dumpit,
704 		/*
705 		 * .doit is not implemented yet for two reasons:
706 		 * 1. It is not needed yet.
707 		 * 2. There is a need to provide identifier, while it is easy
708 		 * for the QPs (device index + port index + LQPN), it is not
709 		 * the case for the rest of resources (PD and CQ). Because it
710 		 * is better to provide similar interface for all resources,
711 		 * let's wait till we will have other resources implemented
712 		 * too.
713 		 */
714 	},
715 };
716 
717 void __init nldev_init(void)
718 {
719 	rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
720 }
721 
722 void __exit nldev_exit(void)
723 {
724 	rdma_nl_unregister(RDMA_NL_NLDEV);
725 }
726 
727 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
728