xref: /linux/drivers/infiniband/core/cma.c (revision 5e8d780d745c1619aba81fe7166c5a4b5cad2b84)
1 /*
2  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3  * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4  * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5  * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
6  *
7  * This Software is licensed under one of the following licenses:
8  *
9  * 1) under the terms of the "Common Public License 1.0" a copy of which is
10  *    available from the Open Source Initiative, see
11  *    http://www.opensource.org/licenses/cpl.php.
12  *
13  * 2) under the terms of the "The BSD License" a copy of which is
14  *    available from the Open Source Initiative, see
15  *    http://www.opensource.org/licenses/bsd-license.php.
16  *
17  * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
18  *    copy of which is available from the Open Source Initiative, see
19  *    http://www.opensource.org/licenses/gpl-license.php.
20  *
21  * Licensee has the right to choose one of the above licenses.
22  *
23  * Redistributions of source code must retain the above copyright
24  * notice and one of the license notices.
25  *
26  * Redistributions in binary form must reproduce both the above copyright
27  * notice, one of the license notices in the documentation
28  * and/or other materials provided with the distribution.
29  *
30  */
31 
32 #include <linux/completion.h>
33 #include <linux/in.h>
34 #include <linux/in6.h>
35 #include <linux/mutex.h>
36 #include <linux/random.h>
37 #include <linux/idr.h>
38 
39 #include <net/tcp.h>
40 
41 #include <rdma/rdma_cm.h>
42 #include <rdma/rdma_cm_ib.h>
43 #include <rdma/ib_cache.h>
44 #include <rdma/ib_cm.h>
45 #include <rdma/ib_sa.h>
46 
47 MODULE_AUTHOR("Sean Hefty");
48 MODULE_DESCRIPTION("Generic RDMA CM Agent");
49 MODULE_LICENSE("Dual BSD/GPL");
50 
51 #define CMA_CM_RESPONSE_TIMEOUT 20
52 #define CMA_MAX_CM_RETRIES 3
53 
54 static void cma_add_one(struct ib_device *device);
55 static void cma_remove_one(struct ib_device *device);
56 
57 static struct ib_client cma_client = {
58 	.name   = "cma",
59 	.add    = cma_add_one,
60 	.remove = cma_remove_one
61 };
62 
63 static LIST_HEAD(dev_list);
64 static LIST_HEAD(listen_any_list);
65 static DEFINE_MUTEX(lock);
66 static struct workqueue_struct *cma_wq;
67 static DEFINE_IDR(sdp_ps);
68 static DEFINE_IDR(tcp_ps);
69 
70 struct cma_device {
71 	struct list_head	list;
72 	struct ib_device	*device;
73 	__be64			node_guid;
74 	struct completion	comp;
75 	atomic_t		refcount;
76 	struct list_head	id_list;
77 };
78 
79 enum cma_state {
80 	CMA_IDLE,
81 	CMA_ADDR_QUERY,
82 	CMA_ADDR_RESOLVED,
83 	CMA_ROUTE_QUERY,
84 	CMA_ROUTE_RESOLVED,
85 	CMA_CONNECT,
86 	CMA_DISCONNECT,
87 	CMA_ADDR_BOUND,
88 	CMA_LISTEN,
89 	CMA_DEVICE_REMOVAL,
90 	CMA_DESTROYING
91 };
92 
93 struct rdma_bind_list {
94 	struct idr		*ps;
95 	struct hlist_head	owners;
96 	unsigned short		port;
97 };
98 
99 /*
100  * Device removal can occur at anytime, so we need extra handling to
101  * serialize notifying the user of device removal with other callbacks.
102  * We do this by disabling removal notification while a callback is in process,
103  * and reporting it after the callback completes.
104  */
105 struct rdma_id_private {
106 	struct rdma_cm_id	id;
107 
108 	struct rdma_bind_list	*bind_list;
109 	struct hlist_node	node;
110 	struct list_head	list;
111 	struct list_head	listen_list;
112 	struct cma_device	*cma_dev;
113 
114 	enum cma_state		state;
115 	spinlock_t		lock;
116 	struct completion	comp;
117 	atomic_t		refcount;
118 	wait_queue_head_t	wait_remove;
119 	atomic_t		dev_remove;
120 
121 	int			backlog;
122 	int			timeout_ms;
123 	struct ib_sa_query	*query;
124 	int			query_id;
125 	union {
126 		struct ib_cm_id	*ib;
127 	} cm_id;
128 
129 	u32			seq_num;
130 	u32			qp_num;
131 	enum ib_qp_type		qp_type;
132 	u8			srq;
133 };
134 
135 struct cma_work {
136 	struct work_struct	work;
137 	struct rdma_id_private	*id;
138 	enum cma_state		old_state;
139 	enum cma_state		new_state;
140 	struct rdma_cm_event	event;
141 };
142 
143 union cma_ip_addr {
144 	struct in6_addr ip6;
145 	struct {
146 		__u32 pad[3];
147 		__u32 addr;
148 	} ip4;
149 };
150 
151 struct cma_hdr {
152 	u8 cma_version;
153 	u8 ip_version;	/* IP version: 7:4 */
154 	__u16 port;
155 	union cma_ip_addr src_addr;
156 	union cma_ip_addr dst_addr;
157 };
158 
159 struct sdp_hh {
160 	u8 bsdh[16];
161 	u8 sdp_version; /* Major version: 7:4 */
162 	u8 ip_version;	/* IP version: 7:4 */
163 	u8 sdp_specific1[10];
164 	__u16 port;
165 	__u16 sdp_specific2;
166 	union cma_ip_addr src_addr;
167 	union cma_ip_addr dst_addr;
168 };
169 
170 struct sdp_hah {
171 	u8 bsdh[16];
172 	u8 sdp_version;
173 };
174 
175 #define CMA_VERSION 0x00
176 #define SDP_MAJ_VERSION 0x2
177 
178 static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
179 {
180 	unsigned long flags;
181 	int ret;
182 
183 	spin_lock_irqsave(&id_priv->lock, flags);
184 	ret = (id_priv->state == comp);
185 	spin_unlock_irqrestore(&id_priv->lock, flags);
186 	return ret;
187 }
188 
189 static int cma_comp_exch(struct rdma_id_private *id_priv,
190 			 enum cma_state comp, enum cma_state exch)
191 {
192 	unsigned long flags;
193 	int ret;
194 
195 	spin_lock_irqsave(&id_priv->lock, flags);
196 	if ((ret = (id_priv->state == comp)))
197 		id_priv->state = exch;
198 	spin_unlock_irqrestore(&id_priv->lock, flags);
199 	return ret;
200 }
201 
202 static enum cma_state cma_exch(struct rdma_id_private *id_priv,
203 			       enum cma_state exch)
204 {
205 	unsigned long flags;
206 	enum cma_state old;
207 
208 	spin_lock_irqsave(&id_priv->lock, flags);
209 	old = id_priv->state;
210 	id_priv->state = exch;
211 	spin_unlock_irqrestore(&id_priv->lock, flags);
212 	return old;
213 }
214 
215 static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
216 {
217 	return hdr->ip_version >> 4;
218 }
219 
220 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
221 {
222 	hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
223 }
224 
225 static inline u8 sdp_get_majv(u8 sdp_version)
226 {
227 	return sdp_version >> 4;
228 }
229 
230 static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
231 {
232 	return hh->ip_version >> 4;
233 }
234 
235 static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
236 {
237 	hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
238 }
239 
240 static void cma_attach_to_dev(struct rdma_id_private *id_priv,
241 			      struct cma_device *cma_dev)
242 {
243 	atomic_inc(&cma_dev->refcount);
244 	id_priv->cma_dev = cma_dev;
245 	id_priv->id.device = cma_dev->device;
246 	list_add_tail(&id_priv->list, &cma_dev->id_list);
247 }
248 
249 static inline void cma_deref_dev(struct cma_device *cma_dev)
250 {
251 	if (atomic_dec_and_test(&cma_dev->refcount))
252 		complete(&cma_dev->comp);
253 }
254 
255 static void cma_detach_from_dev(struct rdma_id_private *id_priv)
256 {
257 	list_del(&id_priv->list);
258 	cma_deref_dev(id_priv->cma_dev);
259 	id_priv->cma_dev = NULL;
260 }
261 
262 static int cma_acquire_ib_dev(struct rdma_id_private *id_priv)
263 {
264 	struct cma_device *cma_dev;
265 	union ib_gid *gid;
266 	int ret = -ENODEV;
267 
268 	gid = ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr);
269 
270 	mutex_lock(&lock);
271 	list_for_each_entry(cma_dev, &dev_list, list) {
272 		ret = ib_find_cached_gid(cma_dev->device, gid,
273 					 &id_priv->id.port_num, NULL);
274 		if (!ret) {
275 			cma_attach_to_dev(id_priv, cma_dev);
276 			break;
277 		}
278 	}
279 	mutex_unlock(&lock);
280 	return ret;
281 }
282 
283 static int cma_acquire_dev(struct rdma_id_private *id_priv)
284 {
285 	switch (id_priv->id.route.addr.dev_addr.dev_type) {
286 	case IB_NODE_CA:
287 		return cma_acquire_ib_dev(id_priv);
288 	default:
289 		return -ENODEV;
290 	}
291 }
292 
293 static void cma_deref_id(struct rdma_id_private *id_priv)
294 {
295 	if (atomic_dec_and_test(&id_priv->refcount))
296 		complete(&id_priv->comp);
297 }
298 
299 static void cma_release_remove(struct rdma_id_private *id_priv)
300 {
301 	if (atomic_dec_and_test(&id_priv->dev_remove))
302 		wake_up(&id_priv->wait_remove);
303 }
304 
305 struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
306 				  void *context, enum rdma_port_space ps)
307 {
308 	struct rdma_id_private *id_priv;
309 
310 	id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
311 	if (!id_priv)
312 		return ERR_PTR(-ENOMEM);
313 
314 	id_priv->state = CMA_IDLE;
315 	id_priv->id.context = context;
316 	id_priv->id.event_handler = event_handler;
317 	id_priv->id.ps = ps;
318 	spin_lock_init(&id_priv->lock);
319 	init_completion(&id_priv->comp);
320 	atomic_set(&id_priv->refcount, 1);
321 	init_waitqueue_head(&id_priv->wait_remove);
322 	atomic_set(&id_priv->dev_remove, 0);
323 	INIT_LIST_HEAD(&id_priv->listen_list);
324 	get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
325 
326 	return &id_priv->id;
327 }
328 EXPORT_SYMBOL(rdma_create_id);
329 
330 static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
331 {
332 	struct ib_qp_attr qp_attr;
333 	struct rdma_dev_addr *dev_addr;
334 	int ret;
335 
336 	dev_addr = &id_priv->id.route.addr.dev_addr;
337 	ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
338 				  ib_addr_get_pkey(dev_addr),
339 				  &qp_attr.pkey_index);
340 	if (ret)
341 		return ret;
342 
343 	qp_attr.qp_state = IB_QPS_INIT;
344 	qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
345 	qp_attr.port_num = id_priv->id.port_num;
346 	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS |
347 					  IB_QP_PKEY_INDEX | IB_QP_PORT);
348 }
349 
350 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
351 		   struct ib_qp_init_attr *qp_init_attr)
352 {
353 	struct rdma_id_private *id_priv;
354 	struct ib_qp *qp;
355 	int ret;
356 
357 	id_priv = container_of(id, struct rdma_id_private, id);
358 	if (id->device != pd->device)
359 		return -EINVAL;
360 
361 	qp = ib_create_qp(pd, qp_init_attr);
362 	if (IS_ERR(qp))
363 		return PTR_ERR(qp);
364 
365 	switch (id->device->node_type) {
366 	case IB_NODE_CA:
367 		ret = cma_init_ib_qp(id_priv, qp);
368 		break;
369 	default:
370 		ret = -ENOSYS;
371 		break;
372 	}
373 
374 	if (ret)
375 		goto err;
376 
377 	id->qp = qp;
378 	id_priv->qp_num = qp->qp_num;
379 	id_priv->qp_type = qp->qp_type;
380 	id_priv->srq = (qp->srq != NULL);
381 	return 0;
382 err:
383 	ib_destroy_qp(qp);
384 	return ret;
385 }
386 EXPORT_SYMBOL(rdma_create_qp);
387 
388 void rdma_destroy_qp(struct rdma_cm_id *id)
389 {
390 	ib_destroy_qp(id->qp);
391 }
392 EXPORT_SYMBOL(rdma_destroy_qp);
393 
394 static int cma_modify_qp_rtr(struct rdma_cm_id *id)
395 {
396 	struct ib_qp_attr qp_attr;
397 	int qp_attr_mask, ret;
398 
399 	if (!id->qp)
400 		return 0;
401 
402 	/* Need to update QP attributes from default values. */
403 	qp_attr.qp_state = IB_QPS_INIT;
404 	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
405 	if (ret)
406 		return ret;
407 
408 	ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
409 	if (ret)
410 		return ret;
411 
412 	qp_attr.qp_state = IB_QPS_RTR;
413 	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
414 	if (ret)
415 		return ret;
416 
417 	return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
418 }
419 
420 static int cma_modify_qp_rts(struct rdma_cm_id *id)
421 {
422 	struct ib_qp_attr qp_attr;
423 	int qp_attr_mask, ret;
424 
425 	if (!id->qp)
426 		return 0;
427 
428 	qp_attr.qp_state = IB_QPS_RTS;
429 	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
430 	if (ret)
431 		return ret;
432 
433 	return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
434 }
435 
436 static int cma_modify_qp_err(struct rdma_cm_id *id)
437 {
438 	struct ib_qp_attr qp_attr;
439 
440 	if (!id->qp)
441 		return 0;
442 
443 	qp_attr.qp_state = IB_QPS_ERR;
444 	return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE);
445 }
446 
447 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
448 		       int *qp_attr_mask)
449 {
450 	struct rdma_id_private *id_priv;
451 	int ret;
452 
453 	id_priv = container_of(id, struct rdma_id_private, id);
454 	switch (id_priv->id.device->node_type) {
455 	case IB_NODE_CA:
456 		ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
457 					 qp_attr_mask);
458 		if (qp_attr->qp_state == IB_QPS_RTR)
459 			qp_attr->rq_psn = id_priv->seq_num;
460 		break;
461 	default:
462 		ret = -ENOSYS;
463 		break;
464 	}
465 
466 	return ret;
467 }
468 EXPORT_SYMBOL(rdma_init_qp_attr);
469 
470 static inline int cma_zero_addr(struct sockaddr *addr)
471 {
472 	struct in6_addr *ip6;
473 
474 	if (addr->sa_family == AF_INET)
475 		return ZERONET(((struct sockaddr_in *) addr)->sin_addr.s_addr);
476 	else {
477 		ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
478 		return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
479 			ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
480 	}
481 }
482 
483 static inline int cma_loopback_addr(struct sockaddr *addr)
484 {
485 	return LOOPBACK(((struct sockaddr_in *) addr)->sin_addr.s_addr);
486 }
487 
488 static inline int cma_any_addr(struct sockaddr *addr)
489 {
490 	return cma_zero_addr(addr) || cma_loopback_addr(addr);
491 }
492 
493 static inline int cma_any_port(struct sockaddr *addr)
494 {
495 	return !((struct sockaddr_in *) addr)->sin_port;
496 }
497 
498 static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
499 			    u8 *ip_ver, __u16 *port,
500 			    union cma_ip_addr **src, union cma_ip_addr **dst)
501 {
502 	switch (ps) {
503 	case RDMA_PS_SDP:
504 		if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
505 		    SDP_MAJ_VERSION)
506 			return -EINVAL;
507 
508 		*ip_ver	= sdp_get_ip_ver(hdr);
509 		*port	= ((struct sdp_hh *) hdr)->port;
510 		*src	= &((struct sdp_hh *) hdr)->src_addr;
511 		*dst	= &((struct sdp_hh *) hdr)->dst_addr;
512 		break;
513 	default:
514 		if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
515 			return -EINVAL;
516 
517 		*ip_ver	= cma_get_ip_ver(hdr);
518 		*port	= ((struct cma_hdr *) hdr)->port;
519 		*src	= &((struct cma_hdr *) hdr)->src_addr;
520 		*dst	= &((struct cma_hdr *) hdr)->dst_addr;
521 		break;
522 	}
523 
524 	if (*ip_ver != 4 && *ip_ver != 6)
525 		return -EINVAL;
526 	return 0;
527 }
528 
529 static void cma_save_net_info(struct rdma_addr *addr,
530 			      struct rdma_addr *listen_addr,
531 			      u8 ip_ver, __u16 port,
532 			      union cma_ip_addr *src, union cma_ip_addr *dst)
533 {
534 	struct sockaddr_in *listen4, *ip4;
535 	struct sockaddr_in6 *listen6, *ip6;
536 
537 	switch (ip_ver) {
538 	case 4:
539 		listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
540 		ip4 = (struct sockaddr_in *) &addr->src_addr;
541 		ip4->sin_family = listen4->sin_family;
542 		ip4->sin_addr.s_addr = dst->ip4.addr;
543 		ip4->sin_port = listen4->sin_port;
544 
545 		ip4 = (struct sockaddr_in *) &addr->dst_addr;
546 		ip4->sin_family = listen4->sin_family;
547 		ip4->sin_addr.s_addr = src->ip4.addr;
548 		ip4->sin_port = port;
549 		break;
550 	case 6:
551 		listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
552 		ip6 = (struct sockaddr_in6 *) &addr->src_addr;
553 		ip6->sin6_family = listen6->sin6_family;
554 		ip6->sin6_addr = dst->ip6;
555 		ip6->sin6_port = listen6->sin6_port;
556 
557 		ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
558 		ip6->sin6_family = listen6->sin6_family;
559 		ip6->sin6_addr = src->ip6;
560 		ip6->sin6_port = port;
561 		break;
562 	default:
563 		break;
564 	}
565 }
566 
567 static inline int cma_user_data_offset(enum rdma_port_space ps)
568 {
569 	switch (ps) {
570 	case RDMA_PS_SDP:
571 		return 0;
572 	default:
573 		return sizeof(struct cma_hdr);
574 	}
575 }
576 
577 static int cma_notify_user(struct rdma_id_private *id_priv,
578 			   enum rdma_cm_event_type type, int status,
579 			   void *data, u8 data_len)
580 {
581 	struct rdma_cm_event event;
582 
583 	event.event = type;
584 	event.status = status;
585 	event.private_data = data;
586 	event.private_data_len = data_len;
587 
588 	return id_priv->id.event_handler(&id_priv->id, &event);
589 }
590 
591 static void cma_cancel_route(struct rdma_id_private *id_priv)
592 {
593 	switch (id_priv->id.device->node_type) {
594 	case IB_NODE_CA:
595 		if (id_priv->query)
596 			ib_sa_cancel_query(id_priv->query_id, id_priv->query);
597 		break;
598 	default:
599 		break;
600 	}
601 }
602 
603 static inline int cma_internal_listen(struct rdma_id_private *id_priv)
604 {
605 	return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev &&
606 	       cma_any_addr(&id_priv->id.route.addr.src_addr);
607 }
608 
609 static void cma_destroy_listen(struct rdma_id_private *id_priv)
610 {
611 	cma_exch(id_priv, CMA_DESTROYING);
612 
613 	if (id_priv->cma_dev) {
614 		switch (id_priv->id.device->node_type) {
615 		case IB_NODE_CA:
616 	 		if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
617 				ib_destroy_cm_id(id_priv->cm_id.ib);
618 			break;
619 		default:
620 			break;
621 		}
622 		cma_detach_from_dev(id_priv);
623 	}
624 	list_del(&id_priv->listen_list);
625 
626 	cma_deref_id(id_priv);
627 	wait_for_completion(&id_priv->comp);
628 
629 	kfree(id_priv);
630 }
631 
632 static void cma_cancel_listens(struct rdma_id_private *id_priv)
633 {
634 	struct rdma_id_private *dev_id_priv;
635 
636 	mutex_lock(&lock);
637 	list_del(&id_priv->list);
638 
639 	while (!list_empty(&id_priv->listen_list)) {
640 		dev_id_priv = list_entry(id_priv->listen_list.next,
641 					 struct rdma_id_private, listen_list);
642 		cma_destroy_listen(dev_id_priv);
643 	}
644 	mutex_unlock(&lock);
645 }
646 
647 static void cma_cancel_operation(struct rdma_id_private *id_priv,
648 				 enum cma_state state)
649 {
650 	switch (state) {
651 	case CMA_ADDR_QUERY:
652 		rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
653 		break;
654 	case CMA_ROUTE_QUERY:
655 		cma_cancel_route(id_priv);
656 		break;
657 	case CMA_LISTEN:
658 		if (cma_any_addr(&id_priv->id.route.addr.src_addr) &&
659 		    !id_priv->cma_dev)
660 			cma_cancel_listens(id_priv);
661 		break;
662 	default:
663 		break;
664 	}
665 }
666 
667 static void cma_release_port(struct rdma_id_private *id_priv)
668 {
669 	struct rdma_bind_list *bind_list = id_priv->bind_list;
670 
671 	if (!bind_list)
672 		return;
673 
674 	mutex_lock(&lock);
675 	hlist_del(&id_priv->node);
676 	if (hlist_empty(&bind_list->owners)) {
677 		idr_remove(bind_list->ps, bind_list->port);
678 		kfree(bind_list);
679 	}
680 	mutex_unlock(&lock);
681 }
682 
683 void rdma_destroy_id(struct rdma_cm_id *id)
684 {
685 	struct rdma_id_private *id_priv;
686 	enum cma_state state;
687 
688 	id_priv = container_of(id, struct rdma_id_private, id);
689 	state = cma_exch(id_priv, CMA_DESTROYING);
690 	cma_cancel_operation(id_priv, state);
691 
692 	if (id_priv->cma_dev) {
693 		switch (id->device->node_type) {
694 		case IB_NODE_CA:
695 	 		if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
696 				ib_destroy_cm_id(id_priv->cm_id.ib);
697 			break;
698 		default:
699 			break;
700 		}
701 	  	mutex_lock(&lock);
702 		cma_detach_from_dev(id_priv);
703 		mutex_unlock(&lock);
704 	}
705 
706 	cma_release_port(id_priv);
707 	cma_deref_id(id_priv);
708 	wait_for_completion(&id_priv->comp);
709 
710 	kfree(id_priv->id.route.path_rec);
711 	kfree(id_priv);
712 }
713 EXPORT_SYMBOL(rdma_destroy_id);
714 
715 static int cma_rep_recv(struct rdma_id_private *id_priv)
716 {
717 	int ret;
718 
719 	ret = cma_modify_qp_rtr(&id_priv->id);
720 	if (ret)
721 		goto reject;
722 
723 	ret = cma_modify_qp_rts(&id_priv->id);
724 	if (ret)
725 		goto reject;
726 
727 	ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
728 	if (ret)
729 		goto reject;
730 
731 	return 0;
732 reject:
733 	cma_modify_qp_err(&id_priv->id);
734 	ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
735 		       NULL, 0, NULL, 0);
736 	return ret;
737 }
738 
739 static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
740 {
741 	if (id_priv->id.ps == RDMA_PS_SDP &&
742 	    sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
743 	    SDP_MAJ_VERSION)
744 		return -EINVAL;
745 
746 	return 0;
747 }
748 
749 static int cma_rtu_recv(struct rdma_id_private *id_priv)
750 {
751 	int ret;
752 
753 	ret = cma_modify_qp_rts(&id_priv->id);
754 	if (ret)
755 		goto reject;
756 
757 	return 0;
758 reject:
759 	cma_modify_qp_err(&id_priv->id);
760 	ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
761 		       NULL, 0, NULL, 0);
762 	return ret;
763 }
764 
765 static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
766 {
767 	struct rdma_id_private *id_priv = cm_id->context;
768 	enum rdma_cm_event_type event;
769 	u8 private_data_len = 0;
770 	int ret = 0, status = 0;
771 
772 	atomic_inc(&id_priv->dev_remove);
773 	if (!cma_comp(id_priv, CMA_CONNECT))
774 		goto out;
775 
776 	switch (ib_event->event) {
777 	case IB_CM_REQ_ERROR:
778 	case IB_CM_REP_ERROR:
779 		event = RDMA_CM_EVENT_UNREACHABLE;
780 		status = -ETIMEDOUT;
781 		break;
782 	case IB_CM_REP_RECEIVED:
783 		status = cma_verify_rep(id_priv, ib_event->private_data);
784 		if (status)
785 			event = RDMA_CM_EVENT_CONNECT_ERROR;
786 		else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
787 			status = cma_rep_recv(id_priv);
788 			event = status ? RDMA_CM_EVENT_CONNECT_ERROR :
789 					 RDMA_CM_EVENT_ESTABLISHED;
790 		} else
791 			event = RDMA_CM_EVENT_CONNECT_RESPONSE;
792 		private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
793 		break;
794 	case IB_CM_RTU_RECEIVED:
795 		status = cma_rtu_recv(id_priv);
796 		event = status ? RDMA_CM_EVENT_CONNECT_ERROR :
797 				 RDMA_CM_EVENT_ESTABLISHED;
798 		break;
799 	case IB_CM_DREQ_ERROR:
800 		status = -ETIMEDOUT; /* fall through */
801 	case IB_CM_DREQ_RECEIVED:
802 	case IB_CM_DREP_RECEIVED:
803 		if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
804 			goto out;
805 		event = RDMA_CM_EVENT_DISCONNECTED;
806 		break;
807 	case IB_CM_TIMEWAIT_EXIT:
808 	case IB_CM_MRA_RECEIVED:
809 		/* ignore event */
810 		goto out;
811 	case IB_CM_REJ_RECEIVED:
812 		cma_modify_qp_err(&id_priv->id);
813 		status = ib_event->param.rej_rcvd.reason;
814 		event = RDMA_CM_EVENT_REJECTED;
815 		break;
816 	default:
817 		printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d",
818 		       ib_event->event);
819 		goto out;
820 	}
821 
822 	ret = cma_notify_user(id_priv, event, status, ib_event->private_data,
823 			      private_data_len);
824 	if (ret) {
825 		/* Destroy the CM ID by returning a non-zero value. */
826 		id_priv->cm_id.ib = NULL;
827 		cma_exch(id_priv, CMA_DESTROYING);
828 		cma_release_remove(id_priv);
829 		rdma_destroy_id(&id_priv->id);
830 		return ret;
831 	}
832 out:
833 	cma_release_remove(id_priv);
834 	return ret;
835 }
836 
837 static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id,
838 					  struct ib_cm_event *ib_event)
839 {
840 	struct rdma_id_private *id_priv;
841 	struct rdma_cm_id *id;
842 	struct rdma_route *rt;
843 	union cma_ip_addr *src, *dst;
844 	__u16 port;
845 	u8 ip_ver;
846 
847 	id = rdma_create_id(listen_id->event_handler, listen_id->context,
848 			    listen_id->ps);
849 	if (IS_ERR(id))
850 		return NULL;
851 
852 	rt = &id->route;
853 	rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
854 	rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, GFP_KERNEL);
855 	if (!rt->path_rec)
856 		goto err;
857 
858 	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
859 			     &ip_ver, &port, &src, &dst))
860 		goto err;
861 
862 	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
863 			  ip_ver, port, src, dst);
864 	rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
865 	if (rt->num_paths == 2)
866 		rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
867 
868 	ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
869 	ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
870 	ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
871 	rt->addr.dev_addr.dev_type = IB_NODE_CA;
872 
873 	id_priv = container_of(id, struct rdma_id_private, id);
874 	id_priv->state = CMA_CONNECT;
875 	return id_priv;
876 err:
877 	rdma_destroy_id(id);
878 	return NULL;
879 }
880 
881 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
882 {
883 	struct rdma_id_private *listen_id, *conn_id;
884 	int offset, ret;
885 
886 	listen_id = cm_id->context;
887 	atomic_inc(&listen_id->dev_remove);
888 	if (!cma_comp(listen_id, CMA_LISTEN)) {
889 		ret = -ECONNABORTED;
890 		goto out;
891 	}
892 
893 	conn_id = cma_new_id(&listen_id->id, ib_event);
894 	if (!conn_id) {
895 		ret = -ENOMEM;
896 		goto out;
897 	}
898 
899 	atomic_inc(&conn_id->dev_remove);
900 	ret = cma_acquire_ib_dev(conn_id);
901 	if (ret) {
902 		ret = -ENODEV;
903 		cma_release_remove(conn_id);
904 		rdma_destroy_id(&conn_id->id);
905 		goto out;
906 	}
907 
908 	conn_id->cm_id.ib = cm_id;
909 	cm_id->context = conn_id;
910 	cm_id->cm_handler = cma_ib_handler;
911 
912 	offset = cma_user_data_offset(listen_id->id.ps);
913 	ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0,
914 			      ib_event->private_data + offset,
915 			      IB_CM_REQ_PRIVATE_DATA_SIZE - offset);
916 	if (ret) {
917 		/* Destroy the CM ID by returning a non-zero value. */
918 		conn_id->cm_id.ib = NULL;
919 		cma_exch(conn_id, CMA_DESTROYING);
920 		cma_release_remove(conn_id);
921 		rdma_destroy_id(&conn_id->id);
922 	}
923 out:
924 	cma_release_remove(listen_id);
925 	return ret;
926 }
927 
928 static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
929 {
930 	return cpu_to_be64(((u64)ps << 16) +
931 	       be16_to_cpu(((struct sockaddr_in *) addr)->sin_port));
932 }
933 
934 static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
935 				 struct ib_cm_compare_data *compare)
936 {
937 	struct cma_hdr *cma_data, *cma_mask;
938 	struct sdp_hh *sdp_data, *sdp_mask;
939 	__u32 ip4_addr;
940 	struct in6_addr ip6_addr;
941 
942 	memset(compare, 0, sizeof *compare);
943 	cma_data = (void *) compare->data;
944 	cma_mask = (void *) compare->mask;
945 	sdp_data = (void *) compare->data;
946 	sdp_mask = (void *) compare->mask;
947 
948 	switch (addr->sa_family) {
949 	case AF_INET:
950 		ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
951 		if (ps == RDMA_PS_SDP) {
952 			sdp_set_ip_ver(sdp_data, 4);
953 			sdp_set_ip_ver(sdp_mask, 0xF);
954 			sdp_data->dst_addr.ip4.addr = ip4_addr;
955 			sdp_mask->dst_addr.ip4.addr = ~0;
956 		} else {
957 			cma_set_ip_ver(cma_data, 4);
958 			cma_set_ip_ver(cma_mask, 0xF);
959 			cma_data->dst_addr.ip4.addr = ip4_addr;
960 			cma_mask->dst_addr.ip4.addr = ~0;
961 		}
962 		break;
963 	case AF_INET6:
964 		ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
965 		if (ps == RDMA_PS_SDP) {
966 			sdp_set_ip_ver(sdp_data, 6);
967 			sdp_set_ip_ver(sdp_mask, 0xF);
968 			sdp_data->dst_addr.ip6 = ip6_addr;
969 			memset(&sdp_mask->dst_addr.ip6, 0xFF,
970 			       sizeof sdp_mask->dst_addr.ip6);
971 		} else {
972 			cma_set_ip_ver(cma_data, 6);
973 			cma_set_ip_ver(cma_mask, 0xF);
974 			cma_data->dst_addr.ip6 = ip6_addr;
975 			memset(&cma_mask->dst_addr.ip6, 0xFF,
976 			       sizeof cma_mask->dst_addr.ip6);
977 		}
978 		break;
979 	default:
980 		break;
981 	}
982 }
983 
984 static int cma_ib_listen(struct rdma_id_private *id_priv)
985 {
986 	struct ib_cm_compare_data compare_data;
987 	struct sockaddr *addr;
988 	__be64 svc_id;
989 	int ret;
990 
991 	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
992 					    id_priv);
993 	if (IS_ERR(id_priv->cm_id.ib))
994 		return PTR_ERR(id_priv->cm_id.ib);
995 
996 	addr = &id_priv->id.route.addr.src_addr;
997 	svc_id = cma_get_service_id(id_priv->id.ps, addr);
998 	if (cma_any_addr(addr))
999 		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
1000 	else {
1001 		cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
1002 		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
1003 	}
1004 
1005 	if (ret) {
1006 		ib_destroy_cm_id(id_priv->cm_id.ib);
1007 		id_priv->cm_id.ib = NULL;
1008 	}
1009 
1010 	return ret;
1011 }
1012 
1013 static int cma_listen_handler(struct rdma_cm_id *id,
1014 			      struct rdma_cm_event *event)
1015 {
1016 	struct rdma_id_private *id_priv = id->context;
1017 
1018 	id->context = id_priv->id.context;
1019 	id->event_handler = id_priv->id.event_handler;
1020 	return id_priv->id.event_handler(id, event);
1021 }
1022 
1023 static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1024 			      struct cma_device *cma_dev)
1025 {
1026 	struct rdma_id_private *dev_id_priv;
1027 	struct rdma_cm_id *id;
1028 	int ret;
1029 
1030 	id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
1031 	if (IS_ERR(id))
1032 		return;
1033 
1034 	dev_id_priv = container_of(id, struct rdma_id_private, id);
1035 
1036 	dev_id_priv->state = CMA_ADDR_BOUND;
1037 	memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
1038 	       ip_addr_size(&id_priv->id.route.addr.src_addr));
1039 
1040 	cma_attach_to_dev(dev_id_priv, cma_dev);
1041 	list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
1042 
1043 	ret = rdma_listen(id, id_priv->backlog);
1044 	if (ret)
1045 		goto err;
1046 
1047 	return;
1048 err:
1049 	cma_destroy_listen(dev_id_priv);
1050 }
1051 
1052 static void cma_listen_on_all(struct rdma_id_private *id_priv)
1053 {
1054 	struct cma_device *cma_dev;
1055 
1056 	mutex_lock(&lock);
1057 	list_add_tail(&id_priv->list, &listen_any_list);
1058 	list_for_each_entry(cma_dev, &dev_list, list)
1059 		cma_listen_on_dev(id_priv, cma_dev);
1060 	mutex_unlock(&lock);
1061 }
1062 
1063 static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
1064 {
1065 	struct sockaddr_in addr_in;
1066 
1067 	memset(&addr_in, 0, sizeof addr_in);
1068 	addr_in.sin_family = af;
1069 	return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
1070 }
1071 
1072 int rdma_listen(struct rdma_cm_id *id, int backlog)
1073 {
1074 	struct rdma_id_private *id_priv;
1075 	int ret;
1076 
1077 	id_priv = container_of(id, struct rdma_id_private, id);
1078 	if (id_priv->state == CMA_IDLE) {
1079 		ret = cma_bind_any(id, AF_INET);
1080 		if (ret)
1081 			return ret;
1082 	}
1083 
1084 	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
1085 		return -EINVAL;
1086 
1087 	id_priv->backlog = backlog;
1088 	if (id->device) {
1089 		switch (id->device->node_type) {
1090 		case IB_NODE_CA:
1091 			ret = cma_ib_listen(id_priv);
1092 			if (ret)
1093 				goto err;
1094 			break;
1095 		default:
1096 			ret = -ENOSYS;
1097 			goto err;
1098 		}
1099 	} else
1100 		cma_listen_on_all(id_priv);
1101 
1102 	return 0;
1103 err:
1104 	id_priv->backlog = 0;
1105 	cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
1106 	return ret;
1107 }
1108 EXPORT_SYMBOL(rdma_listen);
1109 
1110 static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1111 			      void *context)
1112 {
1113 	struct cma_work *work = context;
1114 	struct rdma_route *route;
1115 
1116 	route = &work->id->id.route;
1117 
1118 	if (!status) {
1119 		route->num_paths = 1;
1120 		*route->path_rec = *path_rec;
1121 	} else {
1122 		work->old_state = CMA_ROUTE_QUERY;
1123 		work->new_state = CMA_ADDR_RESOLVED;
1124 		work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
1125 	}
1126 
1127 	queue_work(cma_wq, &work->work);
1128 }
1129 
1130 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1131 			      struct cma_work *work)
1132 {
1133 	struct rdma_dev_addr *addr = &id_priv->id.route.addr.dev_addr;
1134 	struct ib_sa_path_rec path_rec;
1135 
1136 	memset(&path_rec, 0, sizeof path_rec);
1137 	path_rec.sgid = *ib_addr_get_sgid(addr);
1138 	path_rec.dgid = *ib_addr_get_dgid(addr);
1139 	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
1140 	path_rec.numb_path = 1;
1141 
1142 	id_priv->query_id = ib_sa_path_rec_get(id_priv->id.device,
1143 				id_priv->id.port_num, &path_rec,
1144 				IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1145 				IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH,
1146 				timeout_ms, GFP_KERNEL,
1147 				cma_query_handler, work, &id_priv->query);
1148 
1149 	return (id_priv->query_id < 0) ? id_priv->query_id : 0;
1150 }
1151 
1152 static void cma_work_handler(void *data)
1153 {
1154 	struct cma_work *work = data;
1155 	struct rdma_id_private *id_priv = work->id;
1156 	int destroy = 0;
1157 
1158 	atomic_inc(&id_priv->dev_remove);
1159 	if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
1160 		goto out;
1161 
1162 	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1163 		cma_exch(id_priv, CMA_DESTROYING);
1164 		destroy = 1;
1165 	}
1166 out:
1167 	cma_release_remove(id_priv);
1168 	cma_deref_id(id_priv);
1169 	if (destroy)
1170 		rdma_destroy_id(&id_priv->id);
1171 	kfree(work);
1172 }
1173 
1174 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
1175 {
1176 	struct rdma_route *route = &id_priv->id.route;
1177 	struct cma_work *work;
1178 	int ret;
1179 
1180 	work = kzalloc(sizeof *work, GFP_KERNEL);
1181 	if (!work)
1182 		return -ENOMEM;
1183 
1184 	work->id = id_priv;
1185 	INIT_WORK(&work->work, cma_work_handler, work);
1186 	work->old_state = CMA_ROUTE_QUERY;
1187 	work->new_state = CMA_ROUTE_RESOLVED;
1188 	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1189 
1190 	route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
1191 	if (!route->path_rec) {
1192 		ret = -ENOMEM;
1193 		goto err1;
1194 	}
1195 
1196 	ret = cma_query_ib_route(id_priv, timeout_ms, work);
1197 	if (ret)
1198 		goto err2;
1199 
1200 	return 0;
1201 err2:
1202 	kfree(route->path_rec);
1203 	route->path_rec = NULL;
1204 err1:
1205 	kfree(work);
1206 	return ret;
1207 }
1208 
1209 int rdma_set_ib_paths(struct rdma_cm_id *id,
1210 		      struct ib_sa_path_rec *path_rec, int num_paths)
1211 {
1212 	struct rdma_id_private *id_priv;
1213 	int ret;
1214 
1215 	id_priv = container_of(id, struct rdma_id_private, id);
1216 	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
1217 		return -EINVAL;
1218 
1219 	id->route.path_rec = kmalloc(sizeof *path_rec * num_paths, GFP_KERNEL);
1220 	if (!id->route.path_rec) {
1221 		ret = -ENOMEM;
1222 		goto err;
1223 	}
1224 
1225 	memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths);
1226 	return 0;
1227 err:
1228 	cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
1229 	return ret;
1230 }
1231 EXPORT_SYMBOL(rdma_set_ib_paths);
1232 
1233 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1234 {
1235 	struct rdma_id_private *id_priv;
1236 	int ret;
1237 
1238 	id_priv = container_of(id, struct rdma_id_private, id);
1239 	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
1240 		return -EINVAL;
1241 
1242 	atomic_inc(&id_priv->refcount);
1243 	switch (id->device->node_type) {
1244 	case IB_NODE_CA:
1245 		ret = cma_resolve_ib_route(id_priv, timeout_ms);
1246 		break;
1247 	default:
1248 		ret = -ENOSYS;
1249 		break;
1250 	}
1251 	if (ret)
1252 		goto err;
1253 
1254 	return 0;
1255 err:
1256 	cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
1257 	cma_deref_id(id_priv);
1258 	return ret;
1259 }
1260 EXPORT_SYMBOL(rdma_resolve_route);
1261 
1262 static int cma_bind_loopback(struct rdma_id_private *id_priv)
1263 {
1264 	struct cma_device *cma_dev;
1265 	struct ib_port_attr port_attr;
1266 	union ib_gid *gid;
1267 	u16 pkey;
1268 	int ret;
1269 	u8 p;
1270 
1271 	mutex_lock(&lock);
1272 	list_for_each_entry(cma_dev, &dev_list, list)
1273 		for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
1274 			if (!ib_query_port (cma_dev->device, p, &port_attr) &&
1275 			    port_attr.state == IB_PORT_ACTIVE)
1276 				goto port_found;
1277 
1278 	if (!list_empty(&dev_list)) {
1279 		p = 1;
1280 		cma_dev = list_entry(dev_list.next, struct cma_device, list);
1281 	} else {
1282 		ret = -ENODEV;
1283 		goto out;
1284 	}
1285 
1286 port_found:
1287 	gid = ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr);
1288 	ret = ib_get_cached_gid(cma_dev->device, p, 0, gid);
1289 	if (ret)
1290 		goto out;
1291 
1292 	ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
1293 	if (ret)
1294 		goto out;
1295 
1296 	ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
1297 	id_priv->id.port_num = p;
1298 	cma_attach_to_dev(id_priv, cma_dev);
1299 out:
1300 	mutex_unlock(&lock);
1301 	return ret;
1302 }
1303 
1304 static void addr_handler(int status, struct sockaddr *src_addr,
1305 			 struct rdma_dev_addr *dev_addr, void *context)
1306 {
1307 	struct rdma_id_private *id_priv = context;
1308 	enum rdma_cm_event_type event;
1309 
1310 	atomic_inc(&id_priv->dev_remove);
1311 	if (!id_priv->cma_dev && !status)
1312 		status = cma_acquire_dev(id_priv);
1313 
1314 	if (status) {
1315 		if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND))
1316 			goto out;
1317 		event = RDMA_CM_EVENT_ADDR_ERROR;
1318 	} else {
1319 		if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED))
1320 			goto out;
1321 		memcpy(&id_priv->id.route.addr.src_addr, src_addr,
1322 		       ip_addr_size(src_addr));
1323 		event = RDMA_CM_EVENT_ADDR_RESOLVED;
1324 	}
1325 
1326 	if (cma_notify_user(id_priv, event, status, NULL, 0)) {
1327 		cma_exch(id_priv, CMA_DESTROYING);
1328 		cma_release_remove(id_priv);
1329 		cma_deref_id(id_priv);
1330 		rdma_destroy_id(&id_priv->id);
1331 		return;
1332 	}
1333 out:
1334 	cma_release_remove(id_priv);
1335 	cma_deref_id(id_priv);
1336 }
1337 
1338 static int cma_resolve_loopback(struct rdma_id_private *id_priv)
1339 {
1340 	struct cma_work *work;
1341 	struct sockaddr_in *src_in, *dst_in;
1342 	int ret;
1343 
1344 	work = kzalloc(sizeof *work, GFP_KERNEL);
1345 	if (!work)
1346 		return -ENOMEM;
1347 
1348 	if (!id_priv->cma_dev) {
1349 		ret = cma_bind_loopback(id_priv);
1350 		if (ret)
1351 			goto err;
1352 	}
1353 
1354 	ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr,
1355 			 ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr));
1356 
1357 	if (cma_zero_addr(&id_priv->id.route.addr.src_addr)) {
1358 		src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
1359 		dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
1360 		src_in->sin_family = dst_in->sin_family;
1361 		src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
1362 	}
1363 
1364 	work->id = id_priv;
1365 	INIT_WORK(&work->work, cma_work_handler, work);
1366 	work->old_state = CMA_ADDR_QUERY;
1367 	work->new_state = CMA_ADDR_RESOLVED;
1368 	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1369 	queue_work(cma_wq, &work->work);
1370 	return 0;
1371 err:
1372 	kfree(work);
1373 	return ret;
1374 }
1375 
1376 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1377 			 struct sockaddr *dst_addr)
1378 {
1379 	if (src_addr && src_addr->sa_family)
1380 		return rdma_bind_addr(id, src_addr);
1381 	else
1382 		return cma_bind_any(id, dst_addr->sa_family);
1383 }
1384 
1385 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1386 		      struct sockaddr *dst_addr, int timeout_ms)
1387 {
1388 	struct rdma_id_private *id_priv;
1389 	int ret;
1390 
1391 	id_priv = container_of(id, struct rdma_id_private, id);
1392 	if (id_priv->state == CMA_IDLE) {
1393 		ret = cma_bind_addr(id, src_addr, dst_addr);
1394 		if (ret)
1395 			return ret;
1396 	}
1397 
1398 	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
1399 		return -EINVAL;
1400 
1401 	atomic_inc(&id_priv->refcount);
1402 	memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
1403 	if (cma_any_addr(dst_addr))
1404 		ret = cma_resolve_loopback(id_priv);
1405 	else
1406 		ret = rdma_resolve_ip(&id->route.addr.src_addr, dst_addr,
1407 				      &id->route.addr.dev_addr,
1408 				      timeout_ms, addr_handler, id_priv);
1409 	if (ret)
1410 		goto err;
1411 
1412 	return 0;
1413 err:
1414 	cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
1415 	cma_deref_id(id_priv);
1416 	return ret;
1417 }
1418 EXPORT_SYMBOL(rdma_resolve_addr);
1419 
1420 static void cma_bind_port(struct rdma_bind_list *bind_list,
1421 			  struct rdma_id_private *id_priv)
1422 {
1423 	struct sockaddr_in *sin;
1424 
1425 	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1426 	sin->sin_port = htons(bind_list->port);
1427 	id_priv->bind_list = bind_list;
1428 	hlist_add_head(&id_priv->node, &bind_list->owners);
1429 }
1430 
1431 static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
1432 			  unsigned short snum)
1433 {
1434 	struct rdma_bind_list *bind_list;
1435 	int port, start, ret;
1436 
1437 	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
1438 	if (!bind_list)
1439 		return -ENOMEM;
1440 
1441 	start = snum ? snum : sysctl_local_port_range[0];
1442 
1443 	do {
1444 		ret = idr_get_new_above(ps, bind_list, start, &port);
1445 	} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
1446 
1447 	if (ret)
1448 		goto err;
1449 
1450 	if ((snum && port != snum) ||
1451 	    (!snum && port > sysctl_local_port_range[1])) {
1452 		idr_remove(ps, port);
1453 		ret = -EADDRNOTAVAIL;
1454 		goto err;
1455 	}
1456 
1457 	bind_list->ps = ps;
1458 	bind_list->port = (unsigned short) port;
1459 	cma_bind_port(bind_list, id_priv);
1460 	return 0;
1461 err:
1462 	kfree(bind_list);
1463 	return ret;
1464 }
1465 
1466 static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
1467 {
1468 	struct rdma_id_private *cur_id;
1469 	struct sockaddr_in *sin, *cur_sin;
1470 	struct rdma_bind_list *bind_list;
1471 	struct hlist_node *node;
1472 	unsigned short snum;
1473 
1474 	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1475 	snum = ntohs(sin->sin_port);
1476 	if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
1477 		return -EACCES;
1478 
1479 	bind_list = idr_find(ps, snum);
1480 	if (!bind_list)
1481 		return cma_alloc_port(ps, id_priv, snum);
1482 
1483 	/*
1484 	 * We don't support binding to any address if anyone is bound to
1485 	 * a specific address on the same port.
1486 	 */
1487 	if (cma_any_addr(&id_priv->id.route.addr.src_addr))
1488 		return -EADDRNOTAVAIL;
1489 
1490 	hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
1491 		if (cma_any_addr(&cur_id->id.route.addr.src_addr))
1492 			return -EADDRNOTAVAIL;
1493 
1494 		cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
1495 		if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
1496 			return -EADDRINUSE;
1497 	}
1498 
1499 	cma_bind_port(bind_list, id_priv);
1500 	return 0;
1501 }
1502 
1503 static int cma_get_port(struct rdma_id_private *id_priv)
1504 {
1505 	struct idr *ps;
1506 	int ret;
1507 
1508 	switch (id_priv->id.ps) {
1509 	case RDMA_PS_SDP:
1510 		ps = &sdp_ps;
1511 		break;
1512 	case RDMA_PS_TCP:
1513 		ps = &tcp_ps;
1514 		break;
1515 	default:
1516 		return -EPROTONOSUPPORT;
1517 	}
1518 
1519 	mutex_lock(&lock);
1520 	if (cma_any_port(&id_priv->id.route.addr.src_addr))
1521 		ret = cma_alloc_port(ps, id_priv, 0);
1522 	else
1523 		ret = cma_use_port(ps, id_priv);
1524 	mutex_unlock(&lock);
1525 
1526 	return ret;
1527 }
1528 
1529 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
1530 {
1531 	struct rdma_id_private *id_priv;
1532 	int ret;
1533 
1534 	if (addr->sa_family != AF_INET)
1535 		return -EAFNOSUPPORT;
1536 
1537 	id_priv = container_of(id, struct rdma_id_private, id);
1538 	if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
1539 		return -EINVAL;
1540 
1541 	if (!cma_any_addr(addr)) {
1542 		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
1543 		if (!ret)
1544 			ret = cma_acquire_dev(id_priv);
1545 		if (ret)
1546 			goto err;
1547 	}
1548 
1549 	memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
1550 	ret = cma_get_port(id_priv);
1551 	if (ret)
1552 		goto err;
1553 
1554 	return 0;
1555 err:
1556 	cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
1557 	return ret;
1558 }
1559 EXPORT_SYMBOL(rdma_bind_addr);
1560 
1561 static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
1562 			  struct rdma_route *route)
1563 {
1564 	struct sockaddr_in *src4, *dst4;
1565 	struct cma_hdr *cma_hdr;
1566 	struct sdp_hh *sdp_hdr;
1567 
1568 	src4 = (struct sockaddr_in *) &route->addr.src_addr;
1569 	dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
1570 
1571 	switch (ps) {
1572 	case RDMA_PS_SDP:
1573 		sdp_hdr = hdr;
1574 		if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
1575 			return -EINVAL;
1576 		sdp_set_ip_ver(sdp_hdr, 4);
1577 		sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
1578 		sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
1579 		sdp_hdr->port = src4->sin_port;
1580 		break;
1581 	default:
1582 		cma_hdr = hdr;
1583 		cma_hdr->cma_version = CMA_VERSION;
1584 		cma_set_ip_ver(cma_hdr, 4);
1585 		cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
1586 		cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
1587 		cma_hdr->port = src4->sin_port;
1588 		break;
1589 	}
1590 	return 0;
1591 }
1592 
1593 static int cma_connect_ib(struct rdma_id_private *id_priv,
1594 			  struct rdma_conn_param *conn_param)
1595 {
1596 	struct ib_cm_req_param req;
1597 	struct rdma_route *route;
1598 	void *private_data;
1599 	int offset, ret;
1600 
1601 	memset(&req, 0, sizeof req);
1602 	offset = cma_user_data_offset(id_priv->id.ps);
1603 	req.private_data_len = offset + conn_param->private_data_len;
1604 	private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
1605 	if (!private_data)
1606 		return -ENOMEM;
1607 
1608 	if (conn_param->private_data && conn_param->private_data_len)
1609 		memcpy(private_data + offset, conn_param->private_data,
1610 		       conn_param->private_data_len);
1611 
1612 	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
1613 					    id_priv);
1614 	if (IS_ERR(id_priv->cm_id.ib)) {
1615 		ret = PTR_ERR(id_priv->cm_id.ib);
1616 		goto out;
1617 	}
1618 
1619 	route = &id_priv->id.route;
1620 	ret = cma_format_hdr(private_data, id_priv->id.ps, route);
1621 	if (ret)
1622 		goto out;
1623 	req.private_data = private_data;
1624 
1625 	req.primary_path = &route->path_rec[0];
1626 	if (route->num_paths == 2)
1627 		req.alternate_path = &route->path_rec[1];
1628 
1629 	req.service_id = cma_get_service_id(id_priv->id.ps,
1630 					    &route->addr.dst_addr);
1631 	req.qp_num = id_priv->qp_num;
1632 	req.qp_type = id_priv->qp_type;
1633 	req.starting_psn = id_priv->seq_num;
1634 	req.responder_resources = conn_param->responder_resources;
1635 	req.initiator_depth = conn_param->initiator_depth;
1636 	req.flow_control = conn_param->flow_control;
1637 	req.retry_count = conn_param->retry_count;
1638 	req.rnr_retry_count = conn_param->rnr_retry_count;
1639 	req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
1640 	req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
1641 	req.max_cm_retries = CMA_MAX_CM_RETRIES;
1642 	req.srq = id_priv->srq ? 1 : 0;
1643 
1644 	ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
1645 out:
1646 	kfree(private_data);
1647 	return ret;
1648 }
1649 
1650 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
1651 {
1652 	struct rdma_id_private *id_priv;
1653 	int ret;
1654 
1655 	id_priv = container_of(id, struct rdma_id_private, id);
1656 	if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
1657 		return -EINVAL;
1658 
1659 	if (!id->qp) {
1660 		id_priv->qp_num = conn_param->qp_num;
1661 		id_priv->qp_type = conn_param->qp_type;
1662 		id_priv->srq = conn_param->srq;
1663 	}
1664 
1665 	switch (id->device->node_type) {
1666 	case IB_NODE_CA:
1667 		ret = cma_connect_ib(id_priv, conn_param);
1668 		break;
1669 	default:
1670 		ret = -ENOSYS;
1671 		break;
1672 	}
1673 	if (ret)
1674 		goto err;
1675 
1676 	return 0;
1677 err:
1678 	cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
1679 	return ret;
1680 }
1681 EXPORT_SYMBOL(rdma_connect);
1682 
1683 static int cma_accept_ib(struct rdma_id_private *id_priv,
1684 			 struct rdma_conn_param *conn_param)
1685 {
1686 	struct ib_cm_rep_param rep;
1687 	int ret;
1688 
1689 	ret = cma_modify_qp_rtr(&id_priv->id);
1690 	if (ret)
1691 		return ret;
1692 
1693 	memset(&rep, 0, sizeof rep);
1694 	rep.qp_num = id_priv->qp_num;
1695 	rep.starting_psn = id_priv->seq_num;
1696 	rep.private_data = conn_param->private_data;
1697 	rep.private_data_len = conn_param->private_data_len;
1698 	rep.responder_resources = conn_param->responder_resources;
1699 	rep.initiator_depth = conn_param->initiator_depth;
1700 	rep.target_ack_delay = CMA_CM_RESPONSE_TIMEOUT;
1701 	rep.failover_accepted = 0;
1702 	rep.flow_control = conn_param->flow_control;
1703 	rep.rnr_retry_count = conn_param->rnr_retry_count;
1704 	rep.srq = id_priv->srq ? 1 : 0;
1705 
1706 	return ib_send_cm_rep(id_priv->cm_id.ib, &rep);
1707 }
1708 
1709 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
1710 {
1711 	struct rdma_id_private *id_priv;
1712 	int ret;
1713 
1714 	id_priv = container_of(id, struct rdma_id_private, id);
1715 	if (!cma_comp(id_priv, CMA_CONNECT))
1716 		return -EINVAL;
1717 
1718 	if (!id->qp && conn_param) {
1719 		id_priv->qp_num = conn_param->qp_num;
1720 		id_priv->qp_type = conn_param->qp_type;
1721 		id_priv->srq = conn_param->srq;
1722 	}
1723 
1724 	switch (id->device->node_type) {
1725 	case IB_NODE_CA:
1726 		if (conn_param)
1727 			ret = cma_accept_ib(id_priv, conn_param);
1728 		else
1729 			ret = cma_rep_recv(id_priv);
1730 		break;
1731 	default:
1732 		ret = -ENOSYS;
1733 		break;
1734 	}
1735 
1736 	if (ret)
1737 		goto reject;
1738 
1739 	return 0;
1740 reject:
1741 	cma_modify_qp_err(id);
1742 	rdma_reject(id, NULL, 0);
1743 	return ret;
1744 }
1745 EXPORT_SYMBOL(rdma_accept);
1746 
1747 int rdma_reject(struct rdma_cm_id *id, const void *private_data,
1748 		u8 private_data_len)
1749 {
1750 	struct rdma_id_private *id_priv;
1751 	int ret;
1752 
1753 	id_priv = container_of(id, struct rdma_id_private, id);
1754 	if (!cma_comp(id_priv, CMA_CONNECT))
1755 		return -EINVAL;
1756 
1757 	switch (id->device->node_type) {
1758 	case IB_NODE_CA:
1759 		ret = ib_send_cm_rej(id_priv->cm_id.ib,
1760 				     IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
1761 				     private_data, private_data_len);
1762 		break;
1763 	default:
1764 		ret = -ENOSYS;
1765 		break;
1766 	}
1767 	return ret;
1768 }
1769 EXPORT_SYMBOL(rdma_reject);
1770 
1771 int rdma_disconnect(struct rdma_cm_id *id)
1772 {
1773 	struct rdma_id_private *id_priv;
1774 	int ret;
1775 
1776 	id_priv = container_of(id, struct rdma_id_private, id);
1777 	if (!cma_comp(id_priv, CMA_CONNECT) &&
1778 	    !cma_comp(id_priv, CMA_DISCONNECT))
1779 		return -EINVAL;
1780 
1781 	ret = cma_modify_qp_err(id);
1782 	if (ret)
1783 		goto out;
1784 
1785 	switch (id->device->node_type) {
1786 	case IB_NODE_CA:
1787 		/* Initiate or respond to a disconnect. */
1788 		if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
1789 			ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
1790 		break;
1791 	default:
1792 		break;
1793 	}
1794 out:
1795 	return ret;
1796 }
1797 EXPORT_SYMBOL(rdma_disconnect);
1798 
1799 static void cma_add_one(struct ib_device *device)
1800 {
1801 	struct cma_device *cma_dev;
1802 	struct rdma_id_private *id_priv;
1803 
1804 	cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
1805 	if (!cma_dev)
1806 		return;
1807 
1808 	cma_dev->device = device;
1809 	cma_dev->node_guid = device->node_guid;
1810 	if (!cma_dev->node_guid)
1811 		goto err;
1812 
1813 	init_completion(&cma_dev->comp);
1814 	atomic_set(&cma_dev->refcount, 1);
1815 	INIT_LIST_HEAD(&cma_dev->id_list);
1816 	ib_set_client_data(device, &cma_client, cma_dev);
1817 
1818 	mutex_lock(&lock);
1819 	list_add_tail(&cma_dev->list, &dev_list);
1820 	list_for_each_entry(id_priv, &listen_any_list, list)
1821 		cma_listen_on_dev(id_priv, cma_dev);
1822 	mutex_unlock(&lock);
1823 	return;
1824 err:
1825 	kfree(cma_dev);
1826 }
1827 
1828 static int cma_remove_id_dev(struct rdma_id_private *id_priv)
1829 {
1830 	enum cma_state state;
1831 
1832 	/* Record that we want to remove the device */
1833 	state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
1834 	if (state == CMA_DESTROYING)
1835 		return 0;
1836 
1837 	cma_cancel_operation(id_priv, state);
1838 	wait_event(id_priv->wait_remove, !atomic_read(&id_priv->dev_remove));
1839 
1840 	/* Check for destruction from another callback. */
1841 	if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
1842 		return 0;
1843 
1844 	return cma_notify_user(id_priv, RDMA_CM_EVENT_DEVICE_REMOVAL,
1845 			       0, NULL, 0);
1846 }
1847 
1848 static void cma_process_remove(struct cma_device *cma_dev)
1849 {
1850 	struct list_head remove_list;
1851 	struct rdma_id_private *id_priv;
1852 	int ret;
1853 
1854 	INIT_LIST_HEAD(&remove_list);
1855 
1856 	mutex_lock(&lock);
1857 	while (!list_empty(&cma_dev->id_list)) {
1858 		id_priv = list_entry(cma_dev->id_list.next,
1859 				     struct rdma_id_private, list);
1860 
1861 		if (cma_internal_listen(id_priv)) {
1862 			cma_destroy_listen(id_priv);
1863 			continue;
1864 		}
1865 
1866 		list_del(&id_priv->list);
1867 		list_add_tail(&id_priv->list, &remove_list);
1868 		atomic_inc(&id_priv->refcount);
1869 		mutex_unlock(&lock);
1870 
1871 		ret = cma_remove_id_dev(id_priv);
1872 		cma_deref_id(id_priv);
1873 		if (ret)
1874 			rdma_destroy_id(&id_priv->id);
1875 
1876 		mutex_lock(&lock);
1877 	}
1878 	mutex_unlock(&lock);
1879 
1880 	cma_deref_dev(cma_dev);
1881 	wait_for_completion(&cma_dev->comp);
1882 }
1883 
1884 static void cma_remove_one(struct ib_device *device)
1885 {
1886 	struct cma_device *cma_dev;
1887 
1888 	cma_dev = ib_get_client_data(device, &cma_client);
1889 	if (!cma_dev)
1890 		return;
1891 
1892 	mutex_lock(&lock);
1893 	list_del(&cma_dev->list);
1894 	mutex_unlock(&lock);
1895 
1896 	cma_process_remove(cma_dev);
1897 	kfree(cma_dev);
1898 }
1899 
1900 static int cma_init(void)
1901 {
1902 	int ret;
1903 
1904 	cma_wq = create_singlethread_workqueue("rdma_cm_wq");
1905 	if (!cma_wq)
1906 		return -ENOMEM;
1907 
1908 	ret = ib_register_client(&cma_client);
1909 	if (ret)
1910 		goto err;
1911 	return 0;
1912 
1913 err:
1914 	destroy_workqueue(cma_wq);
1915 	return ret;
1916 }
1917 
1918 static void cma_cleanup(void)
1919 {
1920 	ib_unregister_client(&cma_client);
1921 	destroy_workqueue(cma_wq);
1922 	idr_destroy(&sdp_ps);
1923 	idr_destroy(&tcp_ps);
1924 }
1925 
1926 module_init(cma_init);
1927 module_exit(cma_cleanup);
1928