xref: /linux/drivers/infiniband/core/ucma.c (revision cc4589ebfae6f8dbb5cf880a0a67eedab3416492)
1 /*
2  * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *	copyright notice, this list of conditions and the following
16  *	disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *	copyright notice, this list of conditions and the following
20  *	disclaimer in the documentation and/or other materials
21  *	provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/completion.h>
34 #include <linux/file.h>
35 #include <linux/mutex.h>
36 #include <linux/poll.h>
37 #include <linux/sched.h>
38 #include <linux/idr.h>
39 #include <linux/in.h>
40 #include <linux/in6.h>
41 #include <linux/miscdevice.h>
42 #include <linux/slab.h>
43 
44 #include <rdma/rdma_user_cm.h>
45 #include <rdma/ib_marshall.h>
46 #include <rdma/rdma_cm.h>
47 #include <rdma/rdma_cm_ib.h>
48 
49 MODULE_AUTHOR("Sean Hefty");
50 MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
51 MODULE_LICENSE("Dual BSD/GPL");
52 
53 enum {
54 	UCMA_MAX_BACKLOG	= 128
55 };
56 
57 struct ucma_file {
58 	struct mutex		mut;
59 	struct file		*filp;
60 	struct list_head	ctx_list;
61 	struct list_head	event_list;
62 	wait_queue_head_t	poll_wait;
63 };
64 
65 struct ucma_context {
66 	int			id;
67 	struct completion	comp;
68 	atomic_t		ref;
69 	int			events_reported;
70 	int			backlog;
71 
72 	struct ucma_file	*file;
73 	struct rdma_cm_id	*cm_id;
74 	u64			uid;
75 
76 	struct list_head	list;
77 	struct list_head	mc_list;
78 };
79 
80 struct ucma_multicast {
81 	struct ucma_context	*ctx;
82 	int			id;
83 	int			events_reported;
84 
85 	u64			uid;
86 	struct list_head	list;
87 	struct sockaddr_storage	addr;
88 };
89 
90 struct ucma_event {
91 	struct ucma_context	*ctx;
92 	struct ucma_multicast	*mc;
93 	struct list_head	list;
94 	struct rdma_cm_id	*cm_id;
95 	struct rdma_ucm_event_resp resp;
96 };
97 
98 static DEFINE_MUTEX(mut);
99 static DEFINE_IDR(ctx_idr);
100 static DEFINE_IDR(multicast_idr);
101 
102 static inline struct ucma_context *_ucma_find_context(int id,
103 						      struct ucma_file *file)
104 {
105 	struct ucma_context *ctx;
106 
107 	ctx = idr_find(&ctx_idr, id);
108 	if (!ctx)
109 		ctx = ERR_PTR(-ENOENT);
110 	else if (ctx->file != file)
111 		ctx = ERR_PTR(-EINVAL);
112 	return ctx;
113 }
114 
115 static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
116 {
117 	struct ucma_context *ctx;
118 
119 	mutex_lock(&mut);
120 	ctx = _ucma_find_context(id, file);
121 	if (!IS_ERR(ctx))
122 		atomic_inc(&ctx->ref);
123 	mutex_unlock(&mut);
124 	return ctx;
125 }
126 
127 static void ucma_put_ctx(struct ucma_context *ctx)
128 {
129 	if (atomic_dec_and_test(&ctx->ref))
130 		complete(&ctx->comp);
131 }
132 
133 static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
134 {
135 	struct ucma_context *ctx;
136 	int ret;
137 
138 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
139 	if (!ctx)
140 		return NULL;
141 
142 	atomic_set(&ctx->ref, 1);
143 	init_completion(&ctx->comp);
144 	INIT_LIST_HEAD(&ctx->mc_list);
145 	ctx->file = file;
146 
147 	do {
148 		ret = idr_pre_get(&ctx_idr, GFP_KERNEL);
149 		if (!ret)
150 			goto error;
151 
152 		mutex_lock(&mut);
153 		ret = idr_get_new(&ctx_idr, ctx, &ctx->id);
154 		mutex_unlock(&mut);
155 	} while (ret == -EAGAIN);
156 
157 	if (ret)
158 		goto error;
159 
160 	list_add_tail(&ctx->list, &file->ctx_list);
161 	return ctx;
162 
163 error:
164 	kfree(ctx);
165 	return NULL;
166 }
167 
168 static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
169 {
170 	struct ucma_multicast *mc;
171 	int ret;
172 
173 	mc = kzalloc(sizeof(*mc), GFP_KERNEL);
174 	if (!mc)
175 		return NULL;
176 
177 	do {
178 		ret = idr_pre_get(&multicast_idr, GFP_KERNEL);
179 		if (!ret)
180 			goto error;
181 
182 		mutex_lock(&mut);
183 		ret = idr_get_new(&multicast_idr, mc, &mc->id);
184 		mutex_unlock(&mut);
185 	} while (ret == -EAGAIN);
186 
187 	if (ret)
188 		goto error;
189 
190 	mc->ctx = ctx;
191 	list_add_tail(&mc->list, &ctx->mc_list);
192 	return mc;
193 
194 error:
195 	kfree(mc);
196 	return NULL;
197 }
198 
199 static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
200 				 struct rdma_conn_param *src)
201 {
202 	if (src->private_data_len)
203 		memcpy(dst->private_data, src->private_data,
204 		       src->private_data_len);
205 	dst->private_data_len = src->private_data_len;
206 	dst->responder_resources =src->responder_resources;
207 	dst->initiator_depth = src->initiator_depth;
208 	dst->flow_control = src->flow_control;
209 	dst->retry_count = src->retry_count;
210 	dst->rnr_retry_count = src->rnr_retry_count;
211 	dst->srq = src->srq;
212 	dst->qp_num = src->qp_num;
213 }
214 
215 static void ucma_copy_ud_event(struct rdma_ucm_ud_param *dst,
216 			       struct rdma_ud_param *src)
217 {
218 	if (src->private_data_len)
219 		memcpy(dst->private_data, src->private_data,
220 		       src->private_data_len);
221 	dst->private_data_len = src->private_data_len;
222 	ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr);
223 	dst->qp_num = src->qp_num;
224 	dst->qkey = src->qkey;
225 }
226 
227 static void ucma_set_event_context(struct ucma_context *ctx,
228 				   struct rdma_cm_event *event,
229 				   struct ucma_event *uevent)
230 {
231 	uevent->ctx = ctx;
232 	switch (event->event) {
233 	case RDMA_CM_EVENT_MULTICAST_JOIN:
234 	case RDMA_CM_EVENT_MULTICAST_ERROR:
235 		uevent->mc = (struct ucma_multicast *)
236 			     event->param.ud.private_data;
237 		uevent->resp.uid = uevent->mc->uid;
238 		uevent->resp.id = uevent->mc->id;
239 		break;
240 	default:
241 		uevent->resp.uid = ctx->uid;
242 		uevent->resp.id = ctx->id;
243 		break;
244 	}
245 }
246 
247 static int ucma_event_handler(struct rdma_cm_id *cm_id,
248 			      struct rdma_cm_event *event)
249 {
250 	struct ucma_event *uevent;
251 	struct ucma_context *ctx = cm_id->context;
252 	int ret = 0;
253 
254 	uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
255 	if (!uevent)
256 		return event->event == RDMA_CM_EVENT_CONNECT_REQUEST;
257 
258 	uevent->cm_id = cm_id;
259 	ucma_set_event_context(ctx, event, uevent);
260 	uevent->resp.event = event->event;
261 	uevent->resp.status = event->status;
262 	if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB)
263 		ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);
264 	else
265 		ucma_copy_conn_event(&uevent->resp.param.conn,
266 				     &event->param.conn);
267 
268 	mutex_lock(&ctx->file->mut);
269 	if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
270 		if (!ctx->backlog) {
271 			ret = -ENOMEM;
272 			kfree(uevent);
273 			goto out;
274 		}
275 		ctx->backlog--;
276 	} else if (!ctx->uid) {
277 		/*
278 		 * We ignore events for new connections until userspace has set
279 		 * their context.  This can only happen if an error occurs on a
280 		 * new connection before the user accepts it.  This is okay,
281 		 * since the accept will just fail later.
282 		 */
283 		kfree(uevent);
284 		goto out;
285 	}
286 
287 	list_add_tail(&uevent->list, &ctx->file->event_list);
288 	wake_up_interruptible(&ctx->file->poll_wait);
289 out:
290 	mutex_unlock(&ctx->file->mut);
291 	return ret;
292 }
293 
294 static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
295 			      int in_len, int out_len)
296 {
297 	struct ucma_context *ctx;
298 	struct rdma_ucm_get_event cmd;
299 	struct ucma_event *uevent;
300 	int ret = 0;
301 	DEFINE_WAIT(wait);
302 
303 	if (out_len < sizeof uevent->resp)
304 		return -ENOSPC;
305 
306 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
307 		return -EFAULT;
308 
309 	mutex_lock(&file->mut);
310 	while (list_empty(&file->event_list)) {
311 		mutex_unlock(&file->mut);
312 
313 		if (file->filp->f_flags & O_NONBLOCK)
314 			return -EAGAIN;
315 
316 		if (wait_event_interruptible(file->poll_wait,
317 					     !list_empty(&file->event_list)))
318 			return -ERESTARTSYS;
319 
320 		mutex_lock(&file->mut);
321 	}
322 
323 	uevent = list_entry(file->event_list.next, struct ucma_event, list);
324 
325 	if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
326 		ctx = ucma_alloc_ctx(file);
327 		if (!ctx) {
328 			ret = -ENOMEM;
329 			goto done;
330 		}
331 		uevent->ctx->backlog++;
332 		ctx->cm_id = uevent->cm_id;
333 		ctx->cm_id->context = ctx;
334 		uevent->resp.id = ctx->id;
335 	}
336 
337 	if (copy_to_user((void __user *)(unsigned long)cmd.response,
338 			 &uevent->resp, sizeof uevent->resp)) {
339 		ret = -EFAULT;
340 		goto done;
341 	}
342 
343 	list_del(&uevent->list);
344 	uevent->ctx->events_reported++;
345 	if (uevent->mc)
346 		uevent->mc->events_reported++;
347 	kfree(uevent);
348 done:
349 	mutex_unlock(&file->mut);
350 	return ret;
351 }
352 
353 static ssize_t ucma_create_id(struct ucma_file *file,
354 				const char __user *inbuf,
355 				int in_len, int out_len)
356 {
357 	struct rdma_ucm_create_id cmd;
358 	struct rdma_ucm_create_id_resp resp;
359 	struct ucma_context *ctx;
360 	int ret;
361 
362 	if (out_len < sizeof(resp))
363 		return -ENOSPC;
364 
365 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
366 		return -EFAULT;
367 
368 	mutex_lock(&file->mut);
369 	ctx = ucma_alloc_ctx(file);
370 	mutex_unlock(&file->mut);
371 	if (!ctx)
372 		return -ENOMEM;
373 
374 	ctx->uid = cmd.uid;
375 	ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps);
376 	if (IS_ERR(ctx->cm_id)) {
377 		ret = PTR_ERR(ctx->cm_id);
378 		goto err1;
379 	}
380 
381 	resp.id = ctx->id;
382 	if (copy_to_user((void __user *)(unsigned long)cmd.response,
383 			 &resp, sizeof(resp))) {
384 		ret = -EFAULT;
385 		goto err2;
386 	}
387 	return 0;
388 
389 err2:
390 	rdma_destroy_id(ctx->cm_id);
391 err1:
392 	mutex_lock(&mut);
393 	idr_remove(&ctx_idr, ctx->id);
394 	mutex_unlock(&mut);
395 	kfree(ctx);
396 	return ret;
397 }
398 
399 static void ucma_cleanup_multicast(struct ucma_context *ctx)
400 {
401 	struct ucma_multicast *mc, *tmp;
402 
403 	mutex_lock(&mut);
404 	list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
405 		list_del(&mc->list);
406 		idr_remove(&multicast_idr, mc->id);
407 		kfree(mc);
408 	}
409 	mutex_unlock(&mut);
410 }
411 
412 static void ucma_cleanup_events(struct ucma_context *ctx)
413 {
414 	struct ucma_event *uevent, *tmp;
415 
416 	list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
417 		if (uevent->ctx != ctx)
418 			continue;
419 
420 		list_del(&uevent->list);
421 
422 		/* clear incoming connections. */
423 		if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
424 			rdma_destroy_id(uevent->cm_id);
425 
426 		kfree(uevent);
427 	}
428 }
429 
430 static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
431 {
432 	struct ucma_event *uevent, *tmp;
433 
434 	list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
435 		if (uevent->mc != mc)
436 			continue;
437 
438 		list_del(&uevent->list);
439 		kfree(uevent);
440 	}
441 }
442 
443 static int ucma_free_ctx(struct ucma_context *ctx)
444 {
445 	int events_reported;
446 
447 	/* No new events will be generated after destroying the id. */
448 	rdma_destroy_id(ctx->cm_id);
449 
450 	ucma_cleanup_multicast(ctx);
451 
452 	/* Cleanup events not yet reported to the user. */
453 	mutex_lock(&ctx->file->mut);
454 	ucma_cleanup_events(ctx);
455 	list_del(&ctx->list);
456 	mutex_unlock(&ctx->file->mut);
457 
458 	events_reported = ctx->events_reported;
459 	kfree(ctx);
460 	return events_reported;
461 }
462 
463 static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
464 			       int in_len, int out_len)
465 {
466 	struct rdma_ucm_destroy_id cmd;
467 	struct rdma_ucm_destroy_id_resp resp;
468 	struct ucma_context *ctx;
469 	int ret = 0;
470 
471 	if (out_len < sizeof(resp))
472 		return -ENOSPC;
473 
474 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
475 		return -EFAULT;
476 
477 	mutex_lock(&mut);
478 	ctx = _ucma_find_context(cmd.id, file);
479 	if (!IS_ERR(ctx))
480 		idr_remove(&ctx_idr, ctx->id);
481 	mutex_unlock(&mut);
482 
483 	if (IS_ERR(ctx))
484 		return PTR_ERR(ctx);
485 
486 	ucma_put_ctx(ctx);
487 	wait_for_completion(&ctx->comp);
488 	resp.events_reported = ucma_free_ctx(ctx);
489 
490 	if (copy_to_user((void __user *)(unsigned long)cmd.response,
491 			 &resp, sizeof(resp)))
492 		ret = -EFAULT;
493 
494 	return ret;
495 }
496 
497 static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf,
498 			      int in_len, int out_len)
499 {
500 	struct rdma_ucm_bind_addr cmd;
501 	struct ucma_context *ctx;
502 	int ret;
503 
504 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
505 		return -EFAULT;
506 
507 	ctx = ucma_get_ctx(file, cmd.id);
508 	if (IS_ERR(ctx))
509 		return PTR_ERR(ctx);
510 
511 	ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
512 	ucma_put_ctx(ctx);
513 	return ret;
514 }
515 
516 static ssize_t ucma_resolve_addr(struct ucma_file *file,
517 				 const char __user *inbuf,
518 				 int in_len, int out_len)
519 {
520 	struct rdma_ucm_resolve_addr cmd;
521 	struct ucma_context *ctx;
522 	int ret;
523 
524 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
525 		return -EFAULT;
526 
527 	ctx = ucma_get_ctx(file, cmd.id);
528 	if (IS_ERR(ctx))
529 		return PTR_ERR(ctx);
530 
531 	ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
532 				(struct sockaddr *) &cmd.dst_addr,
533 				cmd.timeout_ms);
534 	ucma_put_ctx(ctx);
535 	return ret;
536 }
537 
538 static ssize_t ucma_resolve_route(struct ucma_file *file,
539 				  const char __user *inbuf,
540 				  int in_len, int out_len)
541 {
542 	struct rdma_ucm_resolve_route cmd;
543 	struct ucma_context *ctx;
544 	int ret;
545 
546 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
547 		return -EFAULT;
548 
549 	ctx = ucma_get_ctx(file, cmd.id);
550 	if (IS_ERR(ctx))
551 		return PTR_ERR(ctx);
552 
553 	ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms);
554 	ucma_put_ctx(ctx);
555 	return ret;
556 }
557 
558 static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
559 			       struct rdma_route *route)
560 {
561 	struct rdma_dev_addr *dev_addr;
562 
563 	resp->num_paths = route->num_paths;
564 	switch (route->num_paths) {
565 	case 0:
566 		dev_addr = &route->addr.dev_addr;
567 		rdma_addr_get_dgid(dev_addr,
568 				   (union ib_gid *) &resp->ib_route[0].dgid);
569 		rdma_addr_get_sgid(dev_addr,
570 				   (union ib_gid *) &resp->ib_route[0].sgid);
571 		resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
572 		break;
573 	case 2:
574 		ib_copy_path_rec_to_user(&resp->ib_route[1],
575 					 &route->path_rec[1]);
576 		/* fall through */
577 	case 1:
578 		ib_copy_path_rec_to_user(&resp->ib_route[0],
579 					 &route->path_rec[0]);
580 		break;
581 	default:
582 		break;
583 	}
584 }
585 
586 static ssize_t ucma_query_route(struct ucma_file *file,
587 				const char __user *inbuf,
588 				int in_len, int out_len)
589 {
590 	struct rdma_ucm_query_route cmd;
591 	struct rdma_ucm_query_route_resp resp;
592 	struct ucma_context *ctx;
593 	struct sockaddr *addr;
594 	int ret = 0;
595 
596 	if (out_len < sizeof(resp))
597 		return -ENOSPC;
598 
599 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
600 		return -EFAULT;
601 
602 	ctx = ucma_get_ctx(file, cmd.id);
603 	if (IS_ERR(ctx))
604 		return PTR_ERR(ctx);
605 
606 	memset(&resp, 0, sizeof resp);
607 	addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
608 	memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ?
609 				     sizeof(struct sockaddr_in) :
610 				     sizeof(struct sockaddr_in6));
611 	addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr;
612 	memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ?
613 				     sizeof(struct sockaddr_in) :
614 				     sizeof(struct sockaddr_in6));
615 	if (!ctx->cm_id->device)
616 		goto out;
617 
618 	resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
619 	resp.port_num = ctx->cm_id->port_num;
620 	switch (rdma_node_get_transport(ctx->cm_id->device->node_type)) {
621 	case RDMA_TRANSPORT_IB:
622 		ucma_copy_ib_route(&resp, &ctx->cm_id->route);
623 		break;
624 	default:
625 		break;
626 	}
627 
628 out:
629 	if (copy_to_user((void __user *)(unsigned long)cmd.response,
630 			 &resp, sizeof(resp)))
631 		ret = -EFAULT;
632 
633 	ucma_put_ctx(ctx);
634 	return ret;
635 }
636 
637 static void ucma_copy_conn_param(struct rdma_conn_param *dst,
638 				 struct rdma_ucm_conn_param *src)
639 {
640 	dst->private_data = src->private_data;
641 	dst->private_data_len = src->private_data_len;
642 	dst->responder_resources =src->responder_resources;
643 	dst->initiator_depth = src->initiator_depth;
644 	dst->flow_control = src->flow_control;
645 	dst->retry_count = src->retry_count;
646 	dst->rnr_retry_count = src->rnr_retry_count;
647 	dst->srq = src->srq;
648 	dst->qp_num = src->qp_num;
649 }
650 
651 static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
652 			    int in_len, int out_len)
653 {
654 	struct rdma_ucm_connect cmd;
655 	struct rdma_conn_param conn_param;
656 	struct ucma_context *ctx;
657 	int ret;
658 
659 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
660 		return -EFAULT;
661 
662 	if (!cmd.conn_param.valid)
663 		return -EINVAL;
664 
665 	ctx = ucma_get_ctx(file, cmd.id);
666 	if (IS_ERR(ctx))
667 		return PTR_ERR(ctx);
668 
669 	ucma_copy_conn_param(&conn_param, &cmd.conn_param);
670 	ret = rdma_connect(ctx->cm_id, &conn_param);
671 	ucma_put_ctx(ctx);
672 	return ret;
673 }
674 
675 static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf,
676 			   int in_len, int out_len)
677 {
678 	struct rdma_ucm_listen cmd;
679 	struct ucma_context *ctx;
680 	int ret;
681 
682 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
683 		return -EFAULT;
684 
685 	ctx = ucma_get_ctx(file, cmd.id);
686 	if (IS_ERR(ctx))
687 		return PTR_ERR(ctx);
688 
689 	ctx->backlog = cmd.backlog > 0 && cmd.backlog < UCMA_MAX_BACKLOG ?
690 		       cmd.backlog : UCMA_MAX_BACKLOG;
691 	ret = rdma_listen(ctx->cm_id, ctx->backlog);
692 	ucma_put_ctx(ctx);
693 	return ret;
694 }
695 
696 static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
697 			   int in_len, int out_len)
698 {
699 	struct rdma_ucm_accept cmd;
700 	struct rdma_conn_param conn_param;
701 	struct ucma_context *ctx;
702 	int ret;
703 
704 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
705 		return -EFAULT;
706 
707 	ctx = ucma_get_ctx(file, cmd.id);
708 	if (IS_ERR(ctx))
709 		return PTR_ERR(ctx);
710 
711 	if (cmd.conn_param.valid) {
712 		ctx->uid = cmd.uid;
713 		ucma_copy_conn_param(&conn_param, &cmd.conn_param);
714 		ret = rdma_accept(ctx->cm_id, &conn_param);
715 	} else
716 		ret = rdma_accept(ctx->cm_id, NULL);
717 
718 	ucma_put_ctx(ctx);
719 	return ret;
720 }
721 
722 static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf,
723 			   int in_len, int out_len)
724 {
725 	struct rdma_ucm_reject cmd;
726 	struct ucma_context *ctx;
727 	int ret;
728 
729 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
730 		return -EFAULT;
731 
732 	ctx = ucma_get_ctx(file, cmd.id);
733 	if (IS_ERR(ctx))
734 		return PTR_ERR(ctx);
735 
736 	ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len);
737 	ucma_put_ctx(ctx);
738 	return ret;
739 }
740 
741 static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf,
742 			       int in_len, int out_len)
743 {
744 	struct rdma_ucm_disconnect cmd;
745 	struct ucma_context *ctx;
746 	int ret;
747 
748 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
749 		return -EFAULT;
750 
751 	ctx = ucma_get_ctx(file, cmd.id);
752 	if (IS_ERR(ctx))
753 		return PTR_ERR(ctx);
754 
755 	ret = rdma_disconnect(ctx->cm_id);
756 	ucma_put_ctx(ctx);
757 	return ret;
758 }
759 
760 static ssize_t ucma_init_qp_attr(struct ucma_file *file,
761 				 const char __user *inbuf,
762 				 int in_len, int out_len)
763 {
764 	struct rdma_ucm_init_qp_attr cmd;
765 	struct ib_uverbs_qp_attr resp;
766 	struct ucma_context *ctx;
767 	struct ib_qp_attr qp_attr;
768 	int ret;
769 
770 	if (out_len < sizeof(resp))
771 		return -ENOSPC;
772 
773 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
774 		return -EFAULT;
775 
776 	ctx = ucma_get_ctx(file, cmd.id);
777 	if (IS_ERR(ctx))
778 		return PTR_ERR(ctx);
779 
780 	resp.qp_attr_mask = 0;
781 	memset(&qp_attr, 0, sizeof qp_attr);
782 	qp_attr.qp_state = cmd.qp_state;
783 	ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask);
784 	if (ret)
785 		goto out;
786 
787 	ib_copy_qp_attr_to_user(&resp, &qp_attr);
788 	if (copy_to_user((void __user *)(unsigned long)cmd.response,
789 			 &resp, sizeof(resp)))
790 		ret = -EFAULT;
791 
792 out:
793 	ucma_put_ctx(ctx);
794 	return ret;
795 }
796 
797 static int ucma_set_option_id(struct ucma_context *ctx, int optname,
798 			      void *optval, size_t optlen)
799 {
800 	int ret = 0;
801 
802 	switch (optname) {
803 	case RDMA_OPTION_ID_TOS:
804 		if (optlen != sizeof(u8)) {
805 			ret = -EINVAL;
806 			break;
807 		}
808 		rdma_set_service_type(ctx->cm_id, *((u8 *) optval));
809 		break;
810 	default:
811 		ret = -ENOSYS;
812 	}
813 
814 	return ret;
815 }
816 
817 static int ucma_set_ib_path(struct ucma_context *ctx,
818 			    struct ib_path_rec_data *path_data, size_t optlen)
819 {
820 	struct ib_sa_path_rec sa_path;
821 	struct rdma_cm_event event;
822 	int ret;
823 
824 	if (optlen % sizeof(*path_data))
825 		return -EINVAL;
826 
827 	for (; optlen; optlen -= sizeof(*path_data), path_data++) {
828 		if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY |
829 					 IB_PATH_BIDIRECTIONAL))
830 			break;
831 	}
832 
833 	if (!optlen)
834 		return -EINVAL;
835 
836 	ib_sa_unpack_path(path_data->path_rec, &sa_path);
837 	ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
838 	if (ret)
839 		return ret;
840 
841 	memset(&event, 0, sizeof event);
842 	event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
843 	return ucma_event_handler(ctx->cm_id, &event);
844 }
845 
846 static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
847 			      void *optval, size_t optlen)
848 {
849 	int ret;
850 
851 	switch (optname) {
852 	case RDMA_OPTION_IB_PATH:
853 		ret = ucma_set_ib_path(ctx, optval, optlen);
854 		break;
855 	default:
856 		ret = -ENOSYS;
857 	}
858 
859 	return ret;
860 }
861 
862 static int ucma_set_option_level(struct ucma_context *ctx, int level,
863 				 int optname, void *optval, size_t optlen)
864 {
865 	int ret;
866 
867 	switch (level) {
868 	case RDMA_OPTION_ID:
869 		ret = ucma_set_option_id(ctx, optname, optval, optlen);
870 		break;
871 	case RDMA_OPTION_IB:
872 		ret = ucma_set_option_ib(ctx, optname, optval, optlen);
873 		break;
874 	default:
875 		ret = -ENOSYS;
876 	}
877 
878 	return ret;
879 }
880 
881 static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
882 			       int in_len, int out_len)
883 {
884 	struct rdma_ucm_set_option cmd;
885 	struct ucma_context *ctx;
886 	void *optval;
887 	int ret;
888 
889 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
890 		return -EFAULT;
891 
892 	ctx = ucma_get_ctx(file, cmd.id);
893 	if (IS_ERR(ctx))
894 		return PTR_ERR(ctx);
895 
896 	optval = kmalloc(cmd.optlen, GFP_KERNEL);
897 	if (!optval) {
898 		ret = -ENOMEM;
899 		goto out1;
900 	}
901 
902 	if (copy_from_user(optval, (void __user *) (unsigned long) cmd.optval,
903 			   cmd.optlen)) {
904 		ret = -EFAULT;
905 		goto out2;
906 	}
907 
908 	ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval,
909 				    cmd.optlen);
910 out2:
911 	kfree(optval);
912 out1:
913 	ucma_put_ctx(ctx);
914 	return ret;
915 }
916 
917 static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
918 			   int in_len, int out_len)
919 {
920 	struct rdma_ucm_notify cmd;
921 	struct ucma_context *ctx;
922 	int ret;
923 
924 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
925 		return -EFAULT;
926 
927 	ctx = ucma_get_ctx(file, cmd.id);
928 	if (IS_ERR(ctx))
929 		return PTR_ERR(ctx);
930 
931 	ret = rdma_notify(ctx->cm_id, (enum ib_event_type) cmd.event);
932 	ucma_put_ctx(ctx);
933 	return ret;
934 }
935 
936 static ssize_t ucma_join_multicast(struct ucma_file *file,
937 				   const char __user *inbuf,
938 				   int in_len, int out_len)
939 {
940 	struct rdma_ucm_join_mcast cmd;
941 	struct rdma_ucm_create_id_resp resp;
942 	struct ucma_context *ctx;
943 	struct ucma_multicast *mc;
944 	int ret;
945 
946 	if (out_len < sizeof(resp))
947 		return -ENOSPC;
948 
949 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
950 		return -EFAULT;
951 
952 	ctx = ucma_get_ctx(file, cmd.id);
953 	if (IS_ERR(ctx))
954 		return PTR_ERR(ctx);
955 
956 	mutex_lock(&file->mut);
957 	mc = ucma_alloc_multicast(ctx);
958 	if (!mc) {
959 		ret = -ENOMEM;
960 		goto err1;
961 	}
962 
963 	mc->uid = cmd.uid;
964 	memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr);
965 	ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc);
966 	if (ret)
967 		goto err2;
968 
969 	resp.id = mc->id;
970 	if (copy_to_user((void __user *)(unsigned long)cmd.response,
971 			 &resp, sizeof(resp))) {
972 		ret = -EFAULT;
973 		goto err3;
974 	}
975 
976 	mutex_unlock(&file->mut);
977 	ucma_put_ctx(ctx);
978 	return 0;
979 
980 err3:
981 	rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr);
982 	ucma_cleanup_mc_events(mc);
983 err2:
984 	mutex_lock(&mut);
985 	idr_remove(&multicast_idr, mc->id);
986 	mutex_unlock(&mut);
987 	list_del(&mc->list);
988 	kfree(mc);
989 err1:
990 	mutex_unlock(&file->mut);
991 	ucma_put_ctx(ctx);
992 	return ret;
993 }
994 
995 static ssize_t ucma_leave_multicast(struct ucma_file *file,
996 				    const char __user *inbuf,
997 				    int in_len, int out_len)
998 {
999 	struct rdma_ucm_destroy_id cmd;
1000 	struct rdma_ucm_destroy_id_resp resp;
1001 	struct ucma_multicast *mc;
1002 	int ret = 0;
1003 
1004 	if (out_len < sizeof(resp))
1005 		return -ENOSPC;
1006 
1007 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1008 		return -EFAULT;
1009 
1010 	mutex_lock(&mut);
1011 	mc = idr_find(&multicast_idr, cmd.id);
1012 	if (!mc)
1013 		mc = ERR_PTR(-ENOENT);
1014 	else if (mc->ctx->file != file)
1015 		mc = ERR_PTR(-EINVAL);
1016 	else {
1017 		idr_remove(&multicast_idr, mc->id);
1018 		atomic_inc(&mc->ctx->ref);
1019 	}
1020 	mutex_unlock(&mut);
1021 
1022 	if (IS_ERR(mc)) {
1023 		ret = PTR_ERR(mc);
1024 		goto out;
1025 	}
1026 
1027 	rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
1028 	mutex_lock(&mc->ctx->file->mut);
1029 	ucma_cleanup_mc_events(mc);
1030 	list_del(&mc->list);
1031 	mutex_unlock(&mc->ctx->file->mut);
1032 
1033 	ucma_put_ctx(mc->ctx);
1034 	resp.events_reported = mc->events_reported;
1035 	kfree(mc);
1036 
1037 	if (copy_to_user((void __user *)(unsigned long)cmd.response,
1038 			 &resp, sizeof(resp)))
1039 		ret = -EFAULT;
1040 out:
1041 	return ret;
1042 }
1043 
1044 static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2)
1045 {
1046 	/* Acquire mutex's based on pointer comparison to prevent deadlock. */
1047 	if (file1 < file2) {
1048 		mutex_lock(&file1->mut);
1049 		mutex_lock(&file2->mut);
1050 	} else {
1051 		mutex_lock(&file2->mut);
1052 		mutex_lock(&file1->mut);
1053 	}
1054 }
1055 
1056 static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2)
1057 {
1058 	if (file1 < file2) {
1059 		mutex_unlock(&file2->mut);
1060 		mutex_unlock(&file1->mut);
1061 	} else {
1062 		mutex_unlock(&file1->mut);
1063 		mutex_unlock(&file2->mut);
1064 	}
1065 }
1066 
1067 static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file)
1068 {
1069 	struct ucma_event *uevent, *tmp;
1070 
1071 	list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list)
1072 		if (uevent->ctx == ctx)
1073 			list_move_tail(&uevent->list, &file->event_list);
1074 }
1075 
1076 static ssize_t ucma_migrate_id(struct ucma_file *new_file,
1077 			       const char __user *inbuf,
1078 			       int in_len, int out_len)
1079 {
1080 	struct rdma_ucm_migrate_id cmd;
1081 	struct rdma_ucm_migrate_resp resp;
1082 	struct ucma_context *ctx;
1083 	struct file *filp;
1084 	struct ucma_file *cur_file;
1085 	int ret = 0;
1086 
1087 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1088 		return -EFAULT;
1089 
1090 	/* Get current fd to protect against it being closed */
1091 	filp = fget(cmd.fd);
1092 	if (!filp)
1093 		return -ENOENT;
1094 
1095 	/* Validate current fd and prevent destruction of id. */
1096 	ctx = ucma_get_ctx(filp->private_data, cmd.id);
1097 	if (IS_ERR(ctx)) {
1098 		ret = PTR_ERR(ctx);
1099 		goto file_put;
1100 	}
1101 
1102 	cur_file = ctx->file;
1103 	if (cur_file == new_file) {
1104 		resp.events_reported = ctx->events_reported;
1105 		goto response;
1106 	}
1107 
1108 	/*
1109 	 * Migrate events between fd's, maintaining order, and avoiding new
1110 	 * events being added before existing events.
1111 	 */
1112 	ucma_lock_files(cur_file, new_file);
1113 	mutex_lock(&mut);
1114 
1115 	list_move_tail(&ctx->list, &new_file->ctx_list);
1116 	ucma_move_events(ctx, new_file);
1117 	ctx->file = new_file;
1118 	resp.events_reported = ctx->events_reported;
1119 
1120 	mutex_unlock(&mut);
1121 	ucma_unlock_files(cur_file, new_file);
1122 
1123 response:
1124 	if (copy_to_user((void __user *)(unsigned long)cmd.response,
1125 			 &resp, sizeof(resp)))
1126 		ret = -EFAULT;
1127 
1128 	ucma_put_ctx(ctx);
1129 file_put:
1130 	fput(filp);
1131 	return ret;
1132 }
1133 
1134 static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
1135 				   const char __user *inbuf,
1136 				   int in_len, int out_len) = {
1137 	[RDMA_USER_CM_CMD_CREATE_ID]	= ucma_create_id,
1138 	[RDMA_USER_CM_CMD_DESTROY_ID]	= ucma_destroy_id,
1139 	[RDMA_USER_CM_CMD_BIND_ADDR]	= ucma_bind_addr,
1140 	[RDMA_USER_CM_CMD_RESOLVE_ADDR]	= ucma_resolve_addr,
1141 	[RDMA_USER_CM_CMD_RESOLVE_ROUTE]= ucma_resolve_route,
1142 	[RDMA_USER_CM_CMD_QUERY_ROUTE]	= ucma_query_route,
1143 	[RDMA_USER_CM_CMD_CONNECT]	= ucma_connect,
1144 	[RDMA_USER_CM_CMD_LISTEN]	= ucma_listen,
1145 	[RDMA_USER_CM_CMD_ACCEPT]	= ucma_accept,
1146 	[RDMA_USER_CM_CMD_REJECT]	= ucma_reject,
1147 	[RDMA_USER_CM_CMD_DISCONNECT]	= ucma_disconnect,
1148 	[RDMA_USER_CM_CMD_INIT_QP_ATTR]	= ucma_init_qp_attr,
1149 	[RDMA_USER_CM_CMD_GET_EVENT]	= ucma_get_event,
1150 	[RDMA_USER_CM_CMD_GET_OPTION]	= NULL,
1151 	[RDMA_USER_CM_CMD_SET_OPTION]	= ucma_set_option,
1152 	[RDMA_USER_CM_CMD_NOTIFY]	= ucma_notify,
1153 	[RDMA_USER_CM_CMD_JOIN_MCAST]	= ucma_join_multicast,
1154 	[RDMA_USER_CM_CMD_LEAVE_MCAST]	= ucma_leave_multicast,
1155 	[RDMA_USER_CM_CMD_MIGRATE_ID]	= ucma_migrate_id
1156 };
1157 
1158 static ssize_t ucma_write(struct file *filp, const char __user *buf,
1159 			  size_t len, loff_t *pos)
1160 {
1161 	struct ucma_file *file = filp->private_data;
1162 	struct rdma_ucm_cmd_hdr hdr;
1163 	ssize_t ret;
1164 
1165 	if (len < sizeof(hdr))
1166 		return -EINVAL;
1167 
1168 	if (copy_from_user(&hdr, buf, sizeof(hdr)))
1169 		return -EFAULT;
1170 
1171 	if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
1172 		return -EINVAL;
1173 
1174 	if (hdr.in + sizeof(hdr) > len)
1175 		return -EINVAL;
1176 
1177 	if (!ucma_cmd_table[hdr.cmd])
1178 		return -ENOSYS;
1179 
1180 	ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out);
1181 	if (!ret)
1182 		ret = len;
1183 
1184 	return ret;
1185 }
1186 
1187 static unsigned int ucma_poll(struct file *filp, struct poll_table_struct *wait)
1188 {
1189 	struct ucma_file *file = filp->private_data;
1190 	unsigned int mask = 0;
1191 
1192 	poll_wait(filp, &file->poll_wait, wait);
1193 
1194 	if (!list_empty(&file->event_list))
1195 		mask = POLLIN | POLLRDNORM;
1196 
1197 	return mask;
1198 }
1199 
1200 /*
1201  * ucma_open() does not need the BKL:
1202  *
1203  *  - no global state is referred to;
1204  *  - there is no ioctl method to race against;
1205  *  - no further module initialization is required for open to work
1206  *    after the device is registered.
1207  */
1208 static int ucma_open(struct inode *inode, struct file *filp)
1209 {
1210 	struct ucma_file *file;
1211 
1212 	file = kmalloc(sizeof *file, GFP_KERNEL);
1213 	if (!file)
1214 		return -ENOMEM;
1215 
1216 	INIT_LIST_HEAD(&file->event_list);
1217 	INIT_LIST_HEAD(&file->ctx_list);
1218 	init_waitqueue_head(&file->poll_wait);
1219 	mutex_init(&file->mut);
1220 
1221 	filp->private_data = file;
1222 	file->filp = filp;
1223 
1224 	return nonseekable_open(inode, filp);
1225 }
1226 
1227 static int ucma_close(struct inode *inode, struct file *filp)
1228 {
1229 	struct ucma_file *file = filp->private_data;
1230 	struct ucma_context *ctx, *tmp;
1231 
1232 	mutex_lock(&file->mut);
1233 	list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) {
1234 		mutex_unlock(&file->mut);
1235 
1236 		mutex_lock(&mut);
1237 		idr_remove(&ctx_idr, ctx->id);
1238 		mutex_unlock(&mut);
1239 
1240 		ucma_free_ctx(ctx);
1241 		mutex_lock(&file->mut);
1242 	}
1243 	mutex_unlock(&file->mut);
1244 	kfree(file);
1245 	return 0;
1246 }
1247 
1248 static const struct file_operations ucma_fops = {
1249 	.owner 	 = THIS_MODULE,
1250 	.open 	 = ucma_open,
1251 	.release = ucma_close,
1252 	.write	 = ucma_write,
1253 	.poll    = ucma_poll,
1254 	.llseek	 = no_llseek,
1255 };
1256 
1257 static struct miscdevice ucma_misc = {
1258 	.minor	= MISC_DYNAMIC_MINOR,
1259 	.name	= "rdma_cm",
1260 	.fops	= &ucma_fops,
1261 };
1262 
1263 static ssize_t show_abi_version(struct device *dev,
1264 				struct device_attribute *attr,
1265 				char *buf)
1266 {
1267 	return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION);
1268 }
1269 static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
1270 
1271 static int __init ucma_init(void)
1272 {
1273 	int ret;
1274 
1275 	ret = misc_register(&ucma_misc);
1276 	if (ret)
1277 		return ret;
1278 
1279 	ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version);
1280 	if (ret) {
1281 		printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n");
1282 		goto err;
1283 	}
1284 	return 0;
1285 err:
1286 	misc_deregister(&ucma_misc);
1287 	return ret;
1288 }
1289 
1290 static void __exit ucma_cleanup(void)
1291 {
1292 	device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
1293 	misc_deregister(&ucma_misc);
1294 	idr_destroy(&ctx_idr);
1295 }
1296 
1297 module_init(ucma_init);
1298 module_exit(ucma_cleanup);
1299