xref: /linux/drivers/infiniband/core/cma.c (revision b454cc6636d254fbf6049b73e9560aee76fb04a3)
1 /*
2  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3  * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4  * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5  * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
6  *
7  * This Software is licensed under one of the following licenses:
8  *
9  * 1) under the terms of the "Common Public License 1.0" a copy of which is
10  *    available from the Open Source Initiative, see
11  *    http://www.opensource.org/licenses/cpl.php.
12  *
13  * 2) under the terms of the "The BSD License" a copy of which is
14  *    available from the Open Source Initiative, see
15  *    http://www.opensource.org/licenses/bsd-license.php.
16  *
17  * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
18  *    copy of which is available from the Open Source Initiative, see
19  *    http://www.opensource.org/licenses/gpl-license.php.
20  *
21  * Licensee has the right to choose one of the above licenses.
22  *
23  * Redistributions of source code must retain the above copyright
24  * notice and one of the license notices.
25  *
26  * Redistributions in binary form must reproduce both the above copyright
27  * notice, one of the license notices in the documentation
28  * and/or other materials provided with the distribution.
29  *
30  */
31 
32 #include <linux/completion.h>
33 #include <linux/in.h>
34 #include <linux/in6.h>
35 #include <linux/mutex.h>
36 #include <linux/random.h>
37 #include <linux/idr.h>
38 #include <linux/inetdevice.h>
39 
40 #include <net/tcp.h>
41 
42 #include <rdma/rdma_cm.h>
43 #include <rdma/rdma_cm_ib.h>
44 #include <rdma/ib_cache.h>
45 #include <rdma/ib_cm.h>
46 #include <rdma/ib_sa.h>
47 #include <rdma/iw_cm.h>
48 
49 MODULE_AUTHOR("Sean Hefty");
50 MODULE_DESCRIPTION("Generic RDMA CM Agent");
51 MODULE_LICENSE("Dual BSD/GPL");
52 
53 #define CMA_CM_RESPONSE_TIMEOUT 20
54 #define CMA_MAX_CM_RETRIES 15
55 
56 static void cma_add_one(struct ib_device *device);
57 static void cma_remove_one(struct ib_device *device);
58 
59 static struct ib_client cma_client = {
60 	.name   = "cma",
61 	.add    = cma_add_one,
62 	.remove = cma_remove_one
63 };
64 
65 static struct ib_sa_client sa_client;
66 static struct rdma_addr_client addr_client;
67 static LIST_HEAD(dev_list);
68 static LIST_HEAD(listen_any_list);
69 static DEFINE_MUTEX(lock);
70 static struct workqueue_struct *cma_wq;
71 static DEFINE_IDR(sdp_ps);
72 static DEFINE_IDR(tcp_ps);
73 static DEFINE_IDR(udp_ps);
74 
75 struct cma_device {
76 	struct list_head	list;
77 	struct ib_device	*device;
78 	__be64			node_guid;
79 	struct completion	comp;
80 	atomic_t		refcount;
81 	struct list_head	id_list;
82 };
83 
84 enum cma_state {
85 	CMA_IDLE,
86 	CMA_ADDR_QUERY,
87 	CMA_ADDR_RESOLVED,
88 	CMA_ROUTE_QUERY,
89 	CMA_ROUTE_RESOLVED,
90 	CMA_CONNECT,
91 	CMA_DISCONNECT,
92 	CMA_ADDR_BOUND,
93 	CMA_LISTEN,
94 	CMA_DEVICE_REMOVAL,
95 	CMA_DESTROYING
96 };
97 
98 struct rdma_bind_list {
99 	struct idr		*ps;
100 	struct hlist_head	owners;
101 	unsigned short		port;
102 };
103 
104 /*
105  * Device removal can occur at anytime, so we need extra handling to
106  * serialize notifying the user of device removal with other callbacks.
107  * We do this by disabling removal notification while a callback is in process,
108  * and reporting it after the callback completes.
109  */
110 struct rdma_id_private {
111 	struct rdma_cm_id	id;
112 
113 	struct rdma_bind_list	*bind_list;
114 	struct hlist_node	node;
115 	struct list_head	list;
116 	struct list_head	listen_list;
117 	struct cma_device	*cma_dev;
118 
119 	enum cma_state		state;
120 	spinlock_t		lock;
121 	struct completion	comp;
122 	atomic_t		refcount;
123 	wait_queue_head_t	wait_remove;
124 	atomic_t		dev_remove;
125 
126 	int			backlog;
127 	int			timeout_ms;
128 	struct ib_sa_query	*query;
129 	int			query_id;
130 	union {
131 		struct ib_cm_id	*ib;
132 		struct iw_cm_id	*iw;
133 	} cm_id;
134 
135 	u32			seq_num;
136 	u32			qp_num;
137 	u8			srq;
138 };
139 
140 struct cma_work {
141 	struct work_struct	work;
142 	struct rdma_id_private	*id;
143 	enum cma_state		old_state;
144 	enum cma_state		new_state;
145 	struct rdma_cm_event	event;
146 };
147 
148 union cma_ip_addr {
149 	struct in6_addr ip6;
150 	struct {
151 		__u32 pad[3];
152 		__u32 addr;
153 	} ip4;
154 };
155 
156 struct cma_hdr {
157 	u8 cma_version;
158 	u8 ip_version;	/* IP version: 7:4 */
159 	__u16 port;
160 	union cma_ip_addr src_addr;
161 	union cma_ip_addr dst_addr;
162 };
163 
164 struct sdp_hh {
165 	u8 bsdh[16];
166 	u8 sdp_version; /* Major version: 7:4 */
167 	u8 ip_version;	/* IP version: 7:4 */
168 	u8 sdp_specific1[10];
169 	__u16 port;
170 	__u16 sdp_specific2;
171 	union cma_ip_addr src_addr;
172 	union cma_ip_addr dst_addr;
173 };
174 
175 struct sdp_hah {
176 	u8 bsdh[16];
177 	u8 sdp_version;
178 };
179 
180 #define CMA_VERSION 0x00
181 #define SDP_MAJ_VERSION 0x2
182 
183 static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
184 {
185 	unsigned long flags;
186 	int ret;
187 
188 	spin_lock_irqsave(&id_priv->lock, flags);
189 	ret = (id_priv->state == comp);
190 	spin_unlock_irqrestore(&id_priv->lock, flags);
191 	return ret;
192 }
193 
194 static int cma_comp_exch(struct rdma_id_private *id_priv,
195 			 enum cma_state comp, enum cma_state exch)
196 {
197 	unsigned long flags;
198 	int ret;
199 
200 	spin_lock_irqsave(&id_priv->lock, flags);
201 	if ((ret = (id_priv->state == comp)))
202 		id_priv->state = exch;
203 	spin_unlock_irqrestore(&id_priv->lock, flags);
204 	return ret;
205 }
206 
207 static enum cma_state cma_exch(struct rdma_id_private *id_priv,
208 			       enum cma_state exch)
209 {
210 	unsigned long flags;
211 	enum cma_state old;
212 
213 	spin_lock_irqsave(&id_priv->lock, flags);
214 	old = id_priv->state;
215 	id_priv->state = exch;
216 	spin_unlock_irqrestore(&id_priv->lock, flags);
217 	return old;
218 }
219 
220 static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
221 {
222 	return hdr->ip_version >> 4;
223 }
224 
225 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
226 {
227 	hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
228 }
229 
230 static inline u8 sdp_get_majv(u8 sdp_version)
231 {
232 	return sdp_version >> 4;
233 }
234 
235 static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
236 {
237 	return hh->ip_version >> 4;
238 }
239 
240 static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
241 {
242 	hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
243 }
244 
245 static void cma_attach_to_dev(struct rdma_id_private *id_priv,
246 			      struct cma_device *cma_dev)
247 {
248 	atomic_inc(&cma_dev->refcount);
249 	id_priv->cma_dev = cma_dev;
250 	id_priv->id.device = cma_dev->device;
251 	list_add_tail(&id_priv->list, &cma_dev->id_list);
252 }
253 
254 static inline void cma_deref_dev(struct cma_device *cma_dev)
255 {
256 	if (atomic_dec_and_test(&cma_dev->refcount))
257 		complete(&cma_dev->comp);
258 }
259 
260 static void cma_detach_from_dev(struct rdma_id_private *id_priv)
261 {
262 	list_del(&id_priv->list);
263 	cma_deref_dev(id_priv->cma_dev);
264 	id_priv->cma_dev = NULL;
265 }
266 
267 static int cma_acquire_dev(struct rdma_id_private *id_priv)
268 {
269 	enum rdma_node_type dev_type = id_priv->id.route.addr.dev_addr.dev_type;
270 	struct cma_device *cma_dev;
271 	union ib_gid gid;
272 	int ret = -ENODEV;
273 
274 	switch (rdma_node_get_transport(dev_type)) {
275 	case RDMA_TRANSPORT_IB:
276 		ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
277 		break;
278 	case RDMA_TRANSPORT_IWARP:
279 		iw_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
280 		break;
281 	default:
282 		return -ENODEV;
283 	}
284 
285 	list_for_each_entry(cma_dev, &dev_list, list) {
286 		ret = ib_find_cached_gid(cma_dev->device, &gid,
287 					 &id_priv->id.port_num, NULL);
288 		if (!ret) {
289 			cma_attach_to_dev(id_priv, cma_dev);
290 			break;
291 		}
292 	}
293 	return ret;
294 }
295 
296 static void cma_deref_id(struct rdma_id_private *id_priv)
297 {
298 	if (atomic_dec_and_test(&id_priv->refcount))
299 		complete(&id_priv->comp);
300 }
301 
302 static void cma_release_remove(struct rdma_id_private *id_priv)
303 {
304 	if (atomic_dec_and_test(&id_priv->dev_remove))
305 		wake_up(&id_priv->wait_remove);
306 }
307 
308 struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
309 				  void *context, enum rdma_port_space ps)
310 {
311 	struct rdma_id_private *id_priv;
312 
313 	id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
314 	if (!id_priv)
315 		return ERR_PTR(-ENOMEM);
316 
317 	id_priv->state = CMA_IDLE;
318 	id_priv->id.context = context;
319 	id_priv->id.event_handler = event_handler;
320 	id_priv->id.ps = ps;
321 	spin_lock_init(&id_priv->lock);
322 	init_completion(&id_priv->comp);
323 	atomic_set(&id_priv->refcount, 1);
324 	init_waitqueue_head(&id_priv->wait_remove);
325 	atomic_set(&id_priv->dev_remove, 0);
326 	INIT_LIST_HEAD(&id_priv->listen_list);
327 	get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
328 
329 	return &id_priv->id;
330 }
331 EXPORT_SYMBOL(rdma_create_id);
332 
333 static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
334 {
335 	struct ib_qp_attr qp_attr;
336 	struct rdma_dev_addr *dev_addr;
337 	int ret;
338 
339 	dev_addr = &id_priv->id.route.addr.dev_addr;
340 	ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
341 				  ib_addr_get_pkey(dev_addr),
342 				  &qp_attr.pkey_index);
343 	if (ret)
344 		return ret;
345 
346 	qp_attr.qp_state = IB_QPS_INIT;
347 	qp_attr.qp_access_flags = 0;
348 	qp_attr.port_num = id_priv->id.port_num;
349 	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS |
350 					  IB_QP_PKEY_INDEX | IB_QP_PORT);
351 }
352 
353 static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
354 {
355 	struct ib_qp_attr qp_attr;
356 
357 	qp_attr.qp_state = IB_QPS_INIT;
358 	qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
359 
360 	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS);
361 }
362 
363 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
364 		   struct ib_qp_init_attr *qp_init_attr)
365 {
366 	struct rdma_id_private *id_priv;
367 	struct ib_qp *qp;
368 	int ret;
369 
370 	id_priv = container_of(id, struct rdma_id_private, id);
371 	if (id->device != pd->device)
372 		return -EINVAL;
373 
374 	qp = ib_create_qp(pd, qp_init_attr);
375 	if (IS_ERR(qp))
376 		return PTR_ERR(qp);
377 
378 	switch (rdma_node_get_transport(id->device->node_type)) {
379 	case RDMA_TRANSPORT_IB:
380 		ret = cma_init_ib_qp(id_priv, qp);
381 		break;
382 	case RDMA_TRANSPORT_IWARP:
383 		ret = cma_init_iw_qp(id_priv, qp);
384 		break;
385 	default:
386 		ret = -ENOSYS;
387 		break;
388 	}
389 
390 	if (ret)
391 		goto err;
392 
393 	id->qp = qp;
394 	id_priv->qp_num = qp->qp_num;
395 	id_priv->srq = (qp->srq != NULL);
396 	return 0;
397 err:
398 	ib_destroy_qp(qp);
399 	return ret;
400 }
401 EXPORT_SYMBOL(rdma_create_qp);
402 
403 void rdma_destroy_qp(struct rdma_cm_id *id)
404 {
405 	ib_destroy_qp(id->qp);
406 }
407 EXPORT_SYMBOL(rdma_destroy_qp);
408 
409 static int cma_modify_qp_rtr(struct rdma_cm_id *id)
410 {
411 	struct ib_qp_attr qp_attr;
412 	int qp_attr_mask, ret;
413 
414 	if (!id->qp)
415 		return 0;
416 
417 	/* Need to update QP attributes from default values. */
418 	qp_attr.qp_state = IB_QPS_INIT;
419 	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
420 	if (ret)
421 		return ret;
422 
423 	ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
424 	if (ret)
425 		return ret;
426 
427 	qp_attr.qp_state = IB_QPS_RTR;
428 	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
429 	if (ret)
430 		return ret;
431 
432 	return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
433 }
434 
435 static int cma_modify_qp_rts(struct rdma_cm_id *id)
436 {
437 	struct ib_qp_attr qp_attr;
438 	int qp_attr_mask, ret;
439 
440 	if (!id->qp)
441 		return 0;
442 
443 	qp_attr.qp_state = IB_QPS_RTS;
444 	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
445 	if (ret)
446 		return ret;
447 
448 	return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
449 }
450 
451 static int cma_modify_qp_err(struct rdma_cm_id *id)
452 {
453 	struct ib_qp_attr qp_attr;
454 
455 	if (!id->qp)
456 		return 0;
457 
458 	qp_attr.qp_state = IB_QPS_ERR;
459 	return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE);
460 }
461 
462 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
463 		       int *qp_attr_mask)
464 {
465 	struct rdma_id_private *id_priv;
466 	int ret;
467 
468 	id_priv = container_of(id, struct rdma_id_private, id);
469 	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
470 	case RDMA_TRANSPORT_IB:
471 		ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
472 					 qp_attr_mask);
473 		if (qp_attr->qp_state == IB_QPS_RTR)
474 			qp_attr->rq_psn = id_priv->seq_num;
475 		break;
476 	case RDMA_TRANSPORT_IWARP:
477 		ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
478 					qp_attr_mask);
479 		break;
480 	default:
481 		ret = -ENOSYS;
482 		break;
483 	}
484 
485 	return ret;
486 }
487 EXPORT_SYMBOL(rdma_init_qp_attr);
488 
489 static inline int cma_zero_addr(struct sockaddr *addr)
490 {
491 	struct in6_addr *ip6;
492 
493 	if (addr->sa_family == AF_INET)
494 		return ZERONET(((struct sockaddr_in *) addr)->sin_addr.s_addr);
495 	else {
496 		ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
497 		return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
498 			ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
499 	}
500 }
501 
502 static inline int cma_loopback_addr(struct sockaddr *addr)
503 {
504 	return LOOPBACK(((struct sockaddr_in *) addr)->sin_addr.s_addr);
505 }
506 
507 static inline int cma_any_addr(struct sockaddr *addr)
508 {
509 	return cma_zero_addr(addr) || cma_loopback_addr(addr);
510 }
511 
512 static inline __be16 cma_port(struct sockaddr *addr)
513 {
514 	if (addr->sa_family == AF_INET)
515 		return ((struct sockaddr_in *) addr)->sin_port;
516 	else
517 		return ((struct sockaddr_in6 *) addr)->sin6_port;
518 }
519 
520 static inline int cma_any_port(struct sockaddr *addr)
521 {
522 	return !cma_port(addr);
523 }
524 
525 static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
526 			    u8 *ip_ver, __u16 *port,
527 			    union cma_ip_addr **src, union cma_ip_addr **dst)
528 {
529 	switch (ps) {
530 	case RDMA_PS_SDP:
531 		if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
532 		    SDP_MAJ_VERSION)
533 			return -EINVAL;
534 
535 		*ip_ver	= sdp_get_ip_ver(hdr);
536 		*port	= ((struct sdp_hh *) hdr)->port;
537 		*src	= &((struct sdp_hh *) hdr)->src_addr;
538 		*dst	= &((struct sdp_hh *) hdr)->dst_addr;
539 		break;
540 	default:
541 		if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
542 			return -EINVAL;
543 
544 		*ip_ver	= cma_get_ip_ver(hdr);
545 		*port	= ((struct cma_hdr *) hdr)->port;
546 		*src	= &((struct cma_hdr *) hdr)->src_addr;
547 		*dst	= &((struct cma_hdr *) hdr)->dst_addr;
548 		break;
549 	}
550 
551 	if (*ip_ver != 4 && *ip_ver != 6)
552 		return -EINVAL;
553 	return 0;
554 }
555 
556 static void cma_save_net_info(struct rdma_addr *addr,
557 			      struct rdma_addr *listen_addr,
558 			      u8 ip_ver, __u16 port,
559 			      union cma_ip_addr *src, union cma_ip_addr *dst)
560 {
561 	struct sockaddr_in *listen4, *ip4;
562 	struct sockaddr_in6 *listen6, *ip6;
563 
564 	switch (ip_ver) {
565 	case 4:
566 		listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
567 		ip4 = (struct sockaddr_in *) &addr->src_addr;
568 		ip4->sin_family = listen4->sin_family;
569 		ip4->sin_addr.s_addr = dst->ip4.addr;
570 		ip4->sin_port = listen4->sin_port;
571 
572 		ip4 = (struct sockaddr_in *) &addr->dst_addr;
573 		ip4->sin_family = listen4->sin_family;
574 		ip4->sin_addr.s_addr = src->ip4.addr;
575 		ip4->sin_port = port;
576 		break;
577 	case 6:
578 		listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
579 		ip6 = (struct sockaddr_in6 *) &addr->src_addr;
580 		ip6->sin6_family = listen6->sin6_family;
581 		ip6->sin6_addr = dst->ip6;
582 		ip6->sin6_port = listen6->sin6_port;
583 
584 		ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
585 		ip6->sin6_family = listen6->sin6_family;
586 		ip6->sin6_addr = src->ip6;
587 		ip6->sin6_port = port;
588 		break;
589 	default:
590 		break;
591 	}
592 }
593 
594 static inline int cma_user_data_offset(enum rdma_port_space ps)
595 {
596 	switch (ps) {
597 	case RDMA_PS_SDP:
598 		return 0;
599 	default:
600 		return sizeof(struct cma_hdr);
601 	}
602 }
603 
604 static void cma_cancel_route(struct rdma_id_private *id_priv)
605 {
606 	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
607 	case RDMA_TRANSPORT_IB:
608 		if (id_priv->query)
609 			ib_sa_cancel_query(id_priv->query_id, id_priv->query);
610 		break;
611 	default:
612 		break;
613 	}
614 }
615 
616 static inline int cma_internal_listen(struct rdma_id_private *id_priv)
617 {
618 	return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev &&
619 	       cma_any_addr(&id_priv->id.route.addr.src_addr);
620 }
621 
622 static void cma_destroy_listen(struct rdma_id_private *id_priv)
623 {
624 	cma_exch(id_priv, CMA_DESTROYING);
625 
626 	if (id_priv->cma_dev) {
627 		switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
628 		case RDMA_TRANSPORT_IB:
629 			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
630 				ib_destroy_cm_id(id_priv->cm_id.ib);
631 			break;
632 		case RDMA_TRANSPORT_IWARP:
633 			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
634 				iw_destroy_cm_id(id_priv->cm_id.iw);
635 			break;
636 		default:
637 			break;
638 		}
639 		cma_detach_from_dev(id_priv);
640 	}
641 	list_del(&id_priv->listen_list);
642 
643 	cma_deref_id(id_priv);
644 	wait_for_completion(&id_priv->comp);
645 
646 	kfree(id_priv);
647 }
648 
649 static void cma_cancel_listens(struct rdma_id_private *id_priv)
650 {
651 	struct rdma_id_private *dev_id_priv;
652 
653 	mutex_lock(&lock);
654 	list_del(&id_priv->list);
655 
656 	while (!list_empty(&id_priv->listen_list)) {
657 		dev_id_priv = list_entry(id_priv->listen_list.next,
658 					 struct rdma_id_private, listen_list);
659 		cma_destroy_listen(dev_id_priv);
660 	}
661 	mutex_unlock(&lock);
662 }
663 
664 static void cma_cancel_operation(struct rdma_id_private *id_priv,
665 				 enum cma_state state)
666 {
667 	switch (state) {
668 	case CMA_ADDR_QUERY:
669 		rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
670 		break;
671 	case CMA_ROUTE_QUERY:
672 		cma_cancel_route(id_priv);
673 		break;
674 	case CMA_LISTEN:
675 		if (cma_any_addr(&id_priv->id.route.addr.src_addr) &&
676 		    !id_priv->cma_dev)
677 			cma_cancel_listens(id_priv);
678 		break;
679 	default:
680 		break;
681 	}
682 }
683 
684 static void cma_release_port(struct rdma_id_private *id_priv)
685 {
686 	struct rdma_bind_list *bind_list = id_priv->bind_list;
687 
688 	if (!bind_list)
689 		return;
690 
691 	mutex_lock(&lock);
692 	hlist_del(&id_priv->node);
693 	if (hlist_empty(&bind_list->owners)) {
694 		idr_remove(bind_list->ps, bind_list->port);
695 		kfree(bind_list);
696 	}
697 	mutex_unlock(&lock);
698 }
699 
700 void rdma_destroy_id(struct rdma_cm_id *id)
701 {
702 	struct rdma_id_private *id_priv;
703 	enum cma_state state;
704 
705 	id_priv = container_of(id, struct rdma_id_private, id);
706 	state = cma_exch(id_priv, CMA_DESTROYING);
707 	cma_cancel_operation(id_priv, state);
708 
709 	mutex_lock(&lock);
710 	if (id_priv->cma_dev) {
711 		mutex_unlock(&lock);
712 		switch (rdma_node_get_transport(id->device->node_type)) {
713 		case RDMA_TRANSPORT_IB:
714 			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
715 				ib_destroy_cm_id(id_priv->cm_id.ib);
716 			break;
717 		case RDMA_TRANSPORT_IWARP:
718 			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
719 				iw_destroy_cm_id(id_priv->cm_id.iw);
720 			break;
721 		default:
722 			break;
723 		}
724 		mutex_lock(&lock);
725 		cma_detach_from_dev(id_priv);
726 	}
727 	mutex_unlock(&lock);
728 
729 	cma_release_port(id_priv);
730 	cma_deref_id(id_priv);
731 	wait_for_completion(&id_priv->comp);
732 
733 	kfree(id_priv->id.route.path_rec);
734 	kfree(id_priv);
735 }
736 EXPORT_SYMBOL(rdma_destroy_id);
737 
738 static int cma_rep_recv(struct rdma_id_private *id_priv)
739 {
740 	int ret;
741 
742 	ret = cma_modify_qp_rtr(&id_priv->id);
743 	if (ret)
744 		goto reject;
745 
746 	ret = cma_modify_qp_rts(&id_priv->id);
747 	if (ret)
748 		goto reject;
749 
750 	ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
751 	if (ret)
752 		goto reject;
753 
754 	return 0;
755 reject:
756 	cma_modify_qp_err(&id_priv->id);
757 	ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
758 		       NULL, 0, NULL, 0);
759 	return ret;
760 }
761 
762 static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
763 {
764 	if (id_priv->id.ps == RDMA_PS_SDP &&
765 	    sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
766 	    SDP_MAJ_VERSION)
767 		return -EINVAL;
768 
769 	return 0;
770 }
771 
772 static void cma_set_rep_event_data(struct rdma_cm_event *event,
773 				   struct ib_cm_rep_event_param *rep_data,
774 				   void *private_data)
775 {
776 	event->param.conn.private_data = private_data;
777 	event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
778 	event->param.conn.responder_resources = rep_data->responder_resources;
779 	event->param.conn.initiator_depth = rep_data->initiator_depth;
780 	event->param.conn.flow_control = rep_data->flow_control;
781 	event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
782 	event->param.conn.srq = rep_data->srq;
783 	event->param.conn.qp_num = rep_data->remote_qpn;
784 }
785 
786 static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
787 {
788 	struct rdma_id_private *id_priv = cm_id->context;
789 	struct rdma_cm_event event;
790 	int ret = 0;
791 
792 	atomic_inc(&id_priv->dev_remove);
793 	if (!cma_comp(id_priv, CMA_CONNECT))
794 		goto out;
795 
796 	memset(&event, 0, sizeof event);
797 	switch (ib_event->event) {
798 	case IB_CM_REQ_ERROR:
799 	case IB_CM_REP_ERROR:
800 		event.event = RDMA_CM_EVENT_UNREACHABLE;
801 		event.status = -ETIMEDOUT;
802 		break;
803 	case IB_CM_REP_RECEIVED:
804 		event.status = cma_verify_rep(id_priv, ib_event->private_data);
805 		if (event.status)
806 			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
807 		else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
808 			event.status = cma_rep_recv(id_priv);
809 			event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
810 						     RDMA_CM_EVENT_ESTABLISHED;
811 		} else
812 			event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
813 		cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
814 				       ib_event->private_data);
815 		break;
816 	case IB_CM_RTU_RECEIVED:
817 	case IB_CM_USER_ESTABLISHED:
818 		event.event = RDMA_CM_EVENT_ESTABLISHED;
819 		break;
820 	case IB_CM_DREQ_ERROR:
821 		event.status = -ETIMEDOUT; /* fall through */
822 	case IB_CM_DREQ_RECEIVED:
823 	case IB_CM_DREP_RECEIVED:
824 		if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
825 			goto out;
826 		event.event = RDMA_CM_EVENT_DISCONNECTED;
827 		break;
828 	case IB_CM_TIMEWAIT_EXIT:
829 	case IB_CM_MRA_RECEIVED:
830 		/* ignore event */
831 		goto out;
832 	case IB_CM_REJ_RECEIVED:
833 		cma_modify_qp_err(&id_priv->id);
834 		event.status = ib_event->param.rej_rcvd.reason;
835 		event.event = RDMA_CM_EVENT_REJECTED;
836 		event.param.conn.private_data = ib_event->private_data;
837 		event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
838 		break;
839 	default:
840 		printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d",
841 		       ib_event->event);
842 		goto out;
843 	}
844 
845 	ret = id_priv->id.event_handler(&id_priv->id, &event);
846 	if (ret) {
847 		/* Destroy the CM ID by returning a non-zero value. */
848 		id_priv->cm_id.ib = NULL;
849 		cma_exch(id_priv, CMA_DESTROYING);
850 		cma_release_remove(id_priv);
851 		rdma_destroy_id(&id_priv->id);
852 		return ret;
853 	}
854 out:
855 	cma_release_remove(id_priv);
856 	return ret;
857 }
858 
859 static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
860 					       struct ib_cm_event *ib_event)
861 {
862 	struct rdma_id_private *id_priv;
863 	struct rdma_cm_id *id;
864 	struct rdma_route *rt;
865 	union cma_ip_addr *src, *dst;
866 	__u16 port;
867 	u8 ip_ver;
868 
869 	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
870 			     &ip_ver, &port, &src, &dst))
871 		goto err;
872 
873 	id = rdma_create_id(listen_id->event_handler, listen_id->context,
874 			    listen_id->ps);
875 	if (IS_ERR(id))
876 		goto err;
877 
878 	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
879 			  ip_ver, port, src, dst);
880 
881 	rt = &id->route;
882 	rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
883 	rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
884 			       GFP_KERNEL);
885 	if (!rt->path_rec)
886 		goto destroy_id;
887 
888 	rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
889 	if (rt->num_paths == 2)
890 		rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
891 
892 	ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
893 	ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
894 	ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
895 	rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA;
896 
897 	id_priv = container_of(id, struct rdma_id_private, id);
898 	id_priv->state = CMA_CONNECT;
899 	return id_priv;
900 
901 destroy_id:
902 	rdma_destroy_id(id);
903 err:
904 	return NULL;
905 }
906 
907 static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
908 					      struct ib_cm_event *ib_event)
909 {
910 	struct rdma_id_private *id_priv;
911 	struct rdma_cm_id *id;
912 	union cma_ip_addr *src, *dst;
913 	__u16 port;
914 	u8 ip_ver;
915 	int ret;
916 
917 	id = rdma_create_id(listen_id->event_handler, listen_id->context,
918 			    listen_id->ps);
919 	if (IS_ERR(id))
920 		return NULL;
921 
922 
923 	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
924 			     &ip_ver, &port, &src, &dst))
925 		goto err;
926 
927 	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
928 			  ip_ver, port, src, dst);
929 
930 	ret = rdma_translate_ip(&id->route.addr.src_addr,
931 				&id->route.addr.dev_addr);
932 	if (ret)
933 		goto err;
934 
935 	id_priv = container_of(id, struct rdma_id_private, id);
936 	id_priv->state = CMA_CONNECT;
937 	return id_priv;
938 err:
939 	rdma_destroy_id(id);
940 	return NULL;
941 }
942 
943 static void cma_set_req_event_data(struct rdma_cm_event *event,
944 				   struct ib_cm_req_event_param *req_data,
945 				   void *private_data, int offset)
946 {
947 	event->param.conn.private_data = private_data + offset;
948 	event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
949 	event->param.conn.responder_resources = req_data->responder_resources;
950 	event->param.conn.initiator_depth = req_data->initiator_depth;
951 	event->param.conn.flow_control = req_data->flow_control;
952 	event->param.conn.retry_count = req_data->retry_count;
953 	event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
954 	event->param.conn.srq = req_data->srq;
955 	event->param.conn.qp_num = req_data->remote_qpn;
956 }
957 
958 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
959 {
960 	struct rdma_id_private *listen_id, *conn_id;
961 	struct rdma_cm_event event;
962 	int offset, ret;
963 
964 	listen_id = cm_id->context;
965 	atomic_inc(&listen_id->dev_remove);
966 	if (!cma_comp(listen_id, CMA_LISTEN)) {
967 		ret = -ECONNABORTED;
968 		goto out;
969 	}
970 
971 	memset(&event, 0, sizeof event);
972 	offset = cma_user_data_offset(listen_id->id.ps);
973 	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
974 	if (listen_id->id.ps == RDMA_PS_UDP) {
975 		conn_id = cma_new_udp_id(&listen_id->id, ib_event);
976 		event.param.ud.private_data = ib_event->private_data + offset;
977 		event.param.ud.private_data_len =
978 				IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
979 	} else {
980 		conn_id = cma_new_conn_id(&listen_id->id, ib_event);
981 		cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
982 				       ib_event->private_data, offset);
983 	}
984 	if (!conn_id) {
985 		ret = -ENOMEM;
986 		goto out;
987 	}
988 
989 	atomic_inc(&conn_id->dev_remove);
990 	mutex_lock(&lock);
991 	ret = cma_acquire_dev(conn_id);
992 	mutex_unlock(&lock);
993 	if (ret)
994 		goto release_conn_id;
995 
996 	conn_id->cm_id.ib = cm_id;
997 	cm_id->context = conn_id;
998 	cm_id->cm_handler = cma_ib_handler;
999 
1000 	ret = conn_id->id.event_handler(&conn_id->id, &event);
1001 	if (!ret)
1002 		goto out;
1003 
1004 	/* Destroy the CM ID by returning a non-zero value. */
1005 	conn_id->cm_id.ib = NULL;
1006 
1007 release_conn_id:
1008 	cma_exch(conn_id, CMA_DESTROYING);
1009 	cma_release_remove(conn_id);
1010 	rdma_destroy_id(&conn_id->id);
1011 
1012 out:
1013 	cma_release_remove(listen_id);
1014 	return ret;
1015 }
1016 
1017 static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
1018 {
1019 	return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr)));
1020 }
1021 
1022 static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
1023 				 struct ib_cm_compare_data *compare)
1024 {
1025 	struct cma_hdr *cma_data, *cma_mask;
1026 	struct sdp_hh *sdp_data, *sdp_mask;
1027 	__u32 ip4_addr;
1028 	struct in6_addr ip6_addr;
1029 
1030 	memset(compare, 0, sizeof *compare);
1031 	cma_data = (void *) compare->data;
1032 	cma_mask = (void *) compare->mask;
1033 	sdp_data = (void *) compare->data;
1034 	sdp_mask = (void *) compare->mask;
1035 
1036 	switch (addr->sa_family) {
1037 	case AF_INET:
1038 		ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
1039 		if (ps == RDMA_PS_SDP) {
1040 			sdp_set_ip_ver(sdp_data, 4);
1041 			sdp_set_ip_ver(sdp_mask, 0xF);
1042 			sdp_data->dst_addr.ip4.addr = ip4_addr;
1043 			sdp_mask->dst_addr.ip4.addr = ~0;
1044 		} else {
1045 			cma_set_ip_ver(cma_data, 4);
1046 			cma_set_ip_ver(cma_mask, 0xF);
1047 			cma_data->dst_addr.ip4.addr = ip4_addr;
1048 			cma_mask->dst_addr.ip4.addr = ~0;
1049 		}
1050 		break;
1051 	case AF_INET6:
1052 		ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
1053 		if (ps == RDMA_PS_SDP) {
1054 			sdp_set_ip_ver(sdp_data, 6);
1055 			sdp_set_ip_ver(sdp_mask, 0xF);
1056 			sdp_data->dst_addr.ip6 = ip6_addr;
1057 			memset(&sdp_mask->dst_addr.ip6, 0xFF,
1058 			       sizeof sdp_mask->dst_addr.ip6);
1059 		} else {
1060 			cma_set_ip_ver(cma_data, 6);
1061 			cma_set_ip_ver(cma_mask, 0xF);
1062 			cma_data->dst_addr.ip6 = ip6_addr;
1063 			memset(&cma_mask->dst_addr.ip6, 0xFF,
1064 			       sizeof cma_mask->dst_addr.ip6);
1065 		}
1066 		break;
1067 	default:
1068 		break;
1069 	}
1070 }
1071 
1072 static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1073 {
1074 	struct rdma_id_private *id_priv = iw_id->context;
1075 	struct rdma_cm_event event;
1076 	struct sockaddr_in *sin;
1077 	int ret = 0;
1078 
1079 	memset(&event, 0, sizeof event);
1080 	atomic_inc(&id_priv->dev_remove);
1081 
1082 	switch (iw_event->event) {
1083 	case IW_CM_EVENT_CLOSE:
1084 		event.event = RDMA_CM_EVENT_DISCONNECTED;
1085 		break;
1086 	case IW_CM_EVENT_CONNECT_REPLY:
1087 		sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1088 		*sin = iw_event->local_addr;
1089 		sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
1090 		*sin = iw_event->remote_addr;
1091 		switch (iw_event->status) {
1092 		case 0:
1093 			event.event = RDMA_CM_EVENT_ESTABLISHED;
1094 			break;
1095 		case -ECONNRESET:
1096 		case -ECONNREFUSED:
1097 			event.event = RDMA_CM_EVENT_REJECTED;
1098 			break;
1099 		case -ETIMEDOUT:
1100 			event.event = RDMA_CM_EVENT_UNREACHABLE;
1101 			break;
1102 		default:
1103 			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1104 			break;
1105 		}
1106 		break;
1107 	case IW_CM_EVENT_ESTABLISHED:
1108 		event.event = RDMA_CM_EVENT_ESTABLISHED;
1109 		break;
1110 	default:
1111 		BUG_ON(1);
1112 	}
1113 
1114 	event.status = iw_event->status;
1115 	event.param.conn.private_data = iw_event->private_data;
1116 	event.param.conn.private_data_len = iw_event->private_data_len;
1117 	ret = id_priv->id.event_handler(&id_priv->id, &event);
1118 	if (ret) {
1119 		/* Destroy the CM ID by returning a non-zero value. */
1120 		id_priv->cm_id.iw = NULL;
1121 		cma_exch(id_priv, CMA_DESTROYING);
1122 		cma_release_remove(id_priv);
1123 		rdma_destroy_id(&id_priv->id);
1124 		return ret;
1125 	}
1126 
1127 	cma_release_remove(id_priv);
1128 	return ret;
1129 }
1130 
1131 static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1132 			       struct iw_cm_event *iw_event)
1133 {
1134 	struct rdma_cm_id *new_cm_id;
1135 	struct rdma_id_private *listen_id, *conn_id;
1136 	struct sockaddr_in *sin;
1137 	struct net_device *dev = NULL;
1138 	struct rdma_cm_event event;
1139 	int ret;
1140 
1141 	listen_id = cm_id->context;
1142 	atomic_inc(&listen_id->dev_remove);
1143 	if (!cma_comp(listen_id, CMA_LISTEN)) {
1144 		ret = -ECONNABORTED;
1145 		goto out;
1146 	}
1147 
1148 	/* Create a new RDMA id for the new IW CM ID */
1149 	new_cm_id = rdma_create_id(listen_id->id.event_handler,
1150 				   listen_id->id.context,
1151 				   RDMA_PS_TCP);
1152 	if (!new_cm_id) {
1153 		ret = -ENOMEM;
1154 		goto out;
1155 	}
1156 	conn_id = container_of(new_cm_id, struct rdma_id_private, id);
1157 	atomic_inc(&conn_id->dev_remove);
1158 	conn_id->state = CMA_CONNECT;
1159 
1160 	dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr);
1161 	if (!dev) {
1162 		ret = -EADDRNOTAVAIL;
1163 		cma_release_remove(conn_id);
1164 		rdma_destroy_id(new_cm_id);
1165 		goto out;
1166 	}
1167 	ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
1168 	if (ret) {
1169 		cma_release_remove(conn_id);
1170 		rdma_destroy_id(new_cm_id);
1171 		goto out;
1172 	}
1173 
1174 	mutex_lock(&lock);
1175 	ret = cma_acquire_dev(conn_id);
1176 	mutex_unlock(&lock);
1177 	if (ret) {
1178 		cma_release_remove(conn_id);
1179 		rdma_destroy_id(new_cm_id);
1180 		goto out;
1181 	}
1182 
1183 	conn_id->cm_id.iw = cm_id;
1184 	cm_id->context = conn_id;
1185 	cm_id->cm_handler = cma_iw_handler;
1186 
1187 	sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
1188 	*sin = iw_event->local_addr;
1189 	sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
1190 	*sin = iw_event->remote_addr;
1191 
1192 	memset(&event, 0, sizeof event);
1193 	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1194 	event.param.conn.private_data = iw_event->private_data;
1195 	event.param.conn.private_data_len = iw_event->private_data_len;
1196 	ret = conn_id->id.event_handler(&conn_id->id, &event);
1197 	if (ret) {
1198 		/* User wants to destroy the CM ID */
1199 		conn_id->cm_id.iw = NULL;
1200 		cma_exch(conn_id, CMA_DESTROYING);
1201 		cma_release_remove(conn_id);
1202 		rdma_destroy_id(&conn_id->id);
1203 	}
1204 
1205 out:
1206 	if (dev)
1207 		dev_put(dev);
1208 	cma_release_remove(listen_id);
1209 	return ret;
1210 }
1211 
1212 static int cma_ib_listen(struct rdma_id_private *id_priv)
1213 {
1214 	struct ib_cm_compare_data compare_data;
1215 	struct sockaddr *addr;
1216 	__be64 svc_id;
1217 	int ret;
1218 
1219 	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
1220 					    id_priv);
1221 	if (IS_ERR(id_priv->cm_id.ib))
1222 		return PTR_ERR(id_priv->cm_id.ib);
1223 
1224 	addr = &id_priv->id.route.addr.src_addr;
1225 	svc_id = cma_get_service_id(id_priv->id.ps, addr);
1226 	if (cma_any_addr(addr))
1227 		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
1228 	else {
1229 		cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
1230 		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
1231 	}
1232 
1233 	if (ret) {
1234 		ib_destroy_cm_id(id_priv->cm_id.ib);
1235 		id_priv->cm_id.ib = NULL;
1236 	}
1237 
1238 	return ret;
1239 }
1240 
1241 static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
1242 {
1243 	int ret;
1244 	struct sockaddr_in *sin;
1245 
1246 	id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
1247 					    iw_conn_req_handler,
1248 					    id_priv);
1249 	if (IS_ERR(id_priv->cm_id.iw))
1250 		return PTR_ERR(id_priv->cm_id.iw);
1251 
1252 	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1253 	id_priv->cm_id.iw->local_addr = *sin;
1254 
1255 	ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
1256 
1257 	if (ret) {
1258 		iw_destroy_cm_id(id_priv->cm_id.iw);
1259 		id_priv->cm_id.iw = NULL;
1260 	}
1261 
1262 	return ret;
1263 }
1264 
1265 static int cma_listen_handler(struct rdma_cm_id *id,
1266 			      struct rdma_cm_event *event)
1267 {
1268 	struct rdma_id_private *id_priv = id->context;
1269 
1270 	id->context = id_priv->id.context;
1271 	id->event_handler = id_priv->id.event_handler;
1272 	return id_priv->id.event_handler(id, event);
1273 }
1274 
1275 static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1276 			      struct cma_device *cma_dev)
1277 {
1278 	struct rdma_id_private *dev_id_priv;
1279 	struct rdma_cm_id *id;
1280 	int ret;
1281 
1282 	id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
1283 	if (IS_ERR(id))
1284 		return;
1285 
1286 	dev_id_priv = container_of(id, struct rdma_id_private, id);
1287 
1288 	dev_id_priv->state = CMA_ADDR_BOUND;
1289 	memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
1290 	       ip_addr_size(&id_priv->id.route.addr.src_addr));
1291 
1292 	cma_attach_to_dev(dev_id_priv, cma_dev);
1293 	list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
1294 
1295 	ret = rdma_listen(id, id_priv->backlog);
1296 	if (ret)
1297 		goto err;
1298 
1299 	return;
1300 err:
1301 	cma_destroy_listen(dev_id_priv);
1302 }
1303 
1304 static void cma_listen_on_all(struct rdma_id_private *id_priv)
1305 {
1306 	struct cma_device *cma_dev;
1307 
1308 	mutex_lock(&lock);
1309 	list_add_tail(&id_priv->list, &listen_any_list);
1310 	list_for_each_entry(cma_dev, &dev_list, list)
1311 		cma_listen_on_dev(id_priv, cma_dev);
1312 	mutex_unlock(&lock);
1313 }
1314 
1315 static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
1316 {
1317 	struct sockaddr_in addr_in;
1318 
1319 	memset(&addr_in, 0, sizeof addr_in);
1320 	addr_in.sin_family = af;
1321 	return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
1322 }
1323 
1324 int rdma_listen(struct rdma_cm_id *id, int backlog)
1325 {
1326 	struct rdma_id_private *id_priv;
1327 	int ret;
1328 
1329 	id_priv = container_of(id, struct rdma_id_private, id);
1330 	if (id_priv->state == CMA_IDLE) {
1331 		ret = cma_bind_any(id, AF_INET);
1332 		if (ret)
1333 			return ret;
1334 	}
1335 
1336 	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
1337 		return -EINVAL;
1338 
1339 	id_priv->backlog = backlog;
1340 	if (id->device) {
1341 		switch (rdma_node_get_transport(id->device->node_type)) {
1342 		case RDMA_TRANSPORT_IB:
1343 			ret = cma_ib_listen(id_priv);
1344 			if (ret)
1345 				goto err;
1346 			break;
1347 		case RDMA_TRANSPORT_IWARP:
1348 			ret = cma_iw_listen(id_priv, backlog);
1349 			if (ret)
1350 				goto err;
1351 			break;
1352 		default:
1353 			ret = -ENOSYS;
1354 			goto err;
1355 		}
1356 	} else
1357 		cma_listen_on_all(id_priv);
1358 
1359 	return 0;
1360 err:
1361 	id_priv->backlog = 0;
1362 	cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
1363 	return ret;
1364 }
1365 EXPORT_SYMBOL(rdma_listen);
1366 
1367 static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1368 			      void *context)
1369 {
1370 	struct cma_work *work = context;
1371 	struct rdma_route *route;
1372 
1373 	route = &work->id->id.route;
1374 
1375 	if (!status) {
1376 		route->num_paths = 1;
1377 		*route->path_rec = *path_rec;
1378 	} else {
1379 		work->old_state = CMA_ROUTE_QUERY;
1380 		work->new_state = CMA_ADDR_RESOLVED;
1381 		work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
1382 		work->event.status = status;
1383 	}
1384 
1385 	queue_work(cma_wq, &work->work);
1386 }
1387 
1388 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1389 			      struct cma_work *work)
1390 {
1391 	struct rdma_dev_addr *addr = &id_priv->id.route.addr.dev_addr;
1392 	struct ib_sa_path_rec path_rec;
1393 
1394 	memset(&path_rec, 0, sizeof path_rec);
1395 	ib_addr_get_sgid(addr, &path_rec.sgid);
1396 	ib_addr_get_dgid(addr, &path_rec.dgid);
1397 	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
1398 	path_rec.numb_path = 1;
1399 
1400 	id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
1401 				id_priv->id.port_num, &path_rec,
1402 				IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1403 				IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH,
1404 				timeout_ms, GFP_KERNEL,
1405 				cma_query_handler, work, &id_priv->query);
1406 
1407 	return (id_priv->query_id < 0) ? id_priv->query_id : 0;
1408 }
1409 
1410 static void cma_work_handler(struct work_struct *_work)
1411 {
1412 	struct cma_work *work = container_of(_work, struct cma_work, work);
1413 	struct rdma_id_private *id_priv = work->id;
1414 	int destroy = 0;
1415 
1416 	atomic_inc(&id_priv->dev_remove);
1417 	if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
1418 		goto out;
1419 
1420 	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1421 		cma_exch(id_priv, CMA_DESTROYING);
1422 		destroy = 1;
1423 	}
1424 out:
1425 	cma_release_remove(id_priv);
1426 	cma_deref_id(id_priv);
1427 	if (destroy)
1428 		rdma_destroy_id(&id_priv->id);
1429 	kfree(work);
1430 }
1431 
1432 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
1433 {
1434 	struct rdma_route *route = &id_priv->id.route;
1435 	struct cma_work *work;
1436 	int ret;
1437 
1438 	work = kzalloc(sizeof *work, GFP_KERNEL);
1439 	if (!work)
1440 		return -ENOMEM;
1441 
1442 	work->id = id_priv;
1443 	INIT_WORK(&work->work, cma_work_handler);
1444 	work->old_state = CMA_ROUTE_QUERY;
1445 	work->new_state = CMA_ROUTE_RESOLVED;
1446 	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1447 
1448 	route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
1449 	if (!route->path_rec) {
1450 		ret = -ENOMEM;
1451 		goto err1;
1452 	}
1453 
1454 	ret = cma_query_ib_route(id_priv, timeout_ms, work);
1455 	if (ret)
1456 		goto err2;
1457 
1458 	return 0;
1459 err2:
1460 	kfree(route->path_rec);
1461 	route->path_rec = NULL;
1462 err1:
1463 	kfree(work);
1464 	return ret;
1465 }
1466 
1467 int rdma_set_ib_paths(struct rdma_cm_id *id,
1468 		      struct ib_sa_path_rec *path_rec, int num_paths)
1469 {
1470 	struct rdma_id_private *id_priv;
1471 	int ret;
1472 
1473 	id_priv = container_of(id, struct rdma_id_private, id);
1474 	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
1475 		return -EINVAL;
1476 
1477 	id->route.path_rec = kmalloc(sizeof *path_rec * num_paths, GFP_KERNEL);
1478 	if (!id->route.path_rec) {
1479 		ret = -ENOMEM;
1480 		goto err;
1481 	}
1482 
1483 	memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths);
1484 	return 0;
1485 err:
1486 	cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
1487 	return ret;
1488 }
1489 EXPORT_SYMBOL(rdma_set_ib_paths);
1490 
1491 static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
1492 {
1493 	struct cma_work *work;
1494 
1495 	work = kzalloc(sizeof *work, GFP_KERNEL);
1496 	if (!work)
1497 		return -ENOMEM;
1498 
1499 	work->id = id_priv;
1500 	INIT_WORK(&work->work, cma_work_handler);
1501 	work->old_state = CMA_ROUTE_QUERY;
1502 	work->new_state = CMA_ROUTE_RESOLVED;
1503 	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1504 	queue_work(cma_wq, &work->work);
1505 	return 0;
1506 }
1507 
1508 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1509 {
1510 	struct rdma_id_private *id_priv;
1511 	int ret;
1512 
1513 	id_priv = container_of(id, struct rdma_id_private, id);
1514 	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
1515 		return -EINVAL;
1516 
1517 	atomic_inc(&id_priv->refcount);
1518 	switch (rdma_node_get_transport(id->device->node_type)) {
1519 	case RDMA_TRANSPORT_IB:
1520 		ret = cma_resolve_ib_route(id_priv, timeout_ms);
1521 		break;
1522 	case RDMA_TRANSPORT_IWARP:
1523 		ret = cma_resolve_iw_route(id_priv, timeout_ms);
1524 		break;
1525 	default:
1526 		ret = -ENOSYS;
1527 		break;
1528 	}
1529 	if (ret)
1530 		goto err;
1531 
1532 	return 0;
1533 err:
1534 	cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
1535 	cma_deref_id(id_priv);
1536 	return ret;
1537 }
1538 EXPORT_SYMBOL(rdma_resolve_route);
1539 
1540 static int cma_bind_loopback(struct rdma_id_private *id_priv)
1541 {
1542 	struct cma_device *cma_dev;
1543 	struct ib_port_attr port_attr;
1544 	union ib_gid gid;
1545 	u16 pkey;
1546 	int ret;
1547 	u8 p;
1548 
1549 	mutex_lock(&lock);
1550 	if (list_empty(&dev_list)) {
1551 		ret = -ENODEV;
1552 		goto out;
1553 	}
1554 	list_for_each_entry(cma_dev, &dev_list, list)
1555 		for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
1556 			if (!ib_query_port(cma_dev->device, p, &port_attr) &&
1557 			    port_attr.state == IB_PORT_ACTIVE)
1558 				goto port_found;
1559 
1560 	p = 1;
1561 	cma_dev = list_entry(dev_list.next, struct cma_device, list);
1562 
1563 port_found:
1564 	ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
1565 	if (ret)
1566 		goto out;
1567 
1568 	ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
1569 	if (ret)
1570 		goto out;
1571 
1572 	ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1573 	ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
1574 	id_priv->id.port_num = p;
1575 	cma_attach_to_dev(id_priv, cma_dev);
1576 out:
1577 	mutex_unlock(&lock);
1578 	return ret;
1579 }
1580 
1581 static void addr_handler(int status, struct sockaddr *src_addr,
1582 			 struct rdma_dev_addr *dev_addr, void *context)
1583 {
1584 	struct rdma_id_private *id_priv = context;
1585 	struct rdma_cm_event event;
1586 
1587 	memset(&event, 0, sizeof event);
1588 	atomic_inc(&id_priv->dev_remove);
1589 
1590 	/*
1591 	 * Grab mutex to block rdma_destroy_id() from removing the device while
1592 	 * we're trying to acquire it.
1593 	 */
1594 	mutex_lock(&lock);
1595 	if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
1596 		mutex_unlock(&lock);
1597 		goto out;
1598 	}
1599 
1600 	if (!status && !id_priv->cma_dev)
1601 		status = cma_acquire_dev(id_priv);
1602 	mutex_unlock(&lock);
1603 
1604 	if (status) {
1605 		if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
1606 			goto out;
1607 		event.event = RDMA_CM_EVENT_ADDR_ERROR;
1608 		event.status = status;
1609 	} else {
1610 		memcpy(&id_priv->id.route.addr.src_addr, src_addr,
1611 		       ip_addr_size(src_addr));
1612 		event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1613 	}
1614 
1615 	if (id_priv->id.event_handler(&id_priv->id, &event)) {
1616 		cma_exch(id_priv, CMA_DESTROYING);
1617 		cma_release_remove(id_priv);
1618 		cma_deref_id(id_priv);
1619 		rdma_destroy_id(&id_priv->id);
1620 		return;
1621 	}
1622 out:
1623 	cma_release_remove(id_priv);
1624 	cma_deref_id(id_priv);
1625 }
1626 
1627 static int cma_resolve_loopback(struct rdma_id_private *id_priv)
1628 {
1629 	struct cma_work *work;
1630 	struct sockaddr_in *src_in, *dst_in;
1631 	union ib_gid gid;
1632 	int ret;
1633 
1634 	work = kzalloc(sizeof *work, GFP_KERNEL);
1635 	if (!work)
1636 		return -ENOMEM;
1637 
1638 	if (!id_priv->cma_dev) {
1639 		ret = cma_bind_loopback(id_priv);
1640 		if (ret)
1641 			goto err;
1642 	}
1643 
1644 	ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1645 	ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
1646 
1647 	if (cma_zero_addr(&id_priv->id.route.addr.src_addr)) {
1648 		src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
1649 		dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
1650 		src_in->sin_family = dst_in->sin_family;
1651 		src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
1652 	}
1653 
1654 	work->id = id_priv;
1655 	INIT_WORK(&work->work, cma_work_handler);
1656 	work->old_state = CMA_ADDR_QUERY;
1657 	work->new_state = CMA_ADDR_RESOLVED;
1658 	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1659 	queue_work(cma_wq, &work->work);
1660 	return 0;
1661 err:
1662 	kfree(work);
1663 	return ret;
1664 }
1665 
1666 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1667 			 struct sockaddr *dst_addr)
1668 {
1669 	if (src_addr && src_addr->sa_family)
1670 		return rdma_bind_addr(id, src_addr);
1671 	else
1672 		return cma_bind_any(id, dst_addr->sa_family);
1673 }
1674 
1675 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1676 		      struct sockaddr *dst_addr, int timeout_ms)
1677 {
1678 	struct rdma_id_private *id_priv;
1679 	int ret;
1680 
1681 	id_priv = container_of(id, struct rdma_id_private, id);
1682 	if (id_priv->state == CMA_IDLE) {
1683 		ret = cma_bind_addr(id, src_addr, dst_addr);
1684 		if (ret)
1685 			return ret;
1686 	}
1687 
1688 	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
1689 		return -EINVAL;
1690 
1691 	atomic_inc(&id_priv->refcount);
1692 	memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
1693 	if (cma_any_addr(dst_addr))
1694 		ret = cma_resolve_loopback(id_priv);
1695 	else
1696 		ret = rdma_resolve_ip(&addr_client, &id->route.addr.src_addr,
1697 				      dst_addr, &id->route.addr.dev_addr,
1698 				      timeout_ms, addr_handler, id_priv);
1699 	if (ret)
1700 		goto err;
1701 
1702 	return 0;
1703 err:
1704 	cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
1705 	cma_deref_id(id_priv);
1706 	return ret;
1707 }
1708 EXPORT_SYMBOL(rdma_resolve_addr);
1709 
1710 static void cma_bind_port(struct rdma_bind_list *bind_list,
1711 			  struct rdma_id_private *id_priv)
1712 {
1713 	struct sockaddr_in *sin;
1714 
1715 	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1716 	sin->sin_port = htons(bind_list->port);
1717 	id_priv->bind_list = bind_list;
1718 	hlist_add_head(&id_priv->node, &bind_list->owners);
1719 }
1720 
1721 static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
1722 			  unsigned short snum)
1723 {
1724 	struct rdma_bind_list *bind_list;
1725 	int port, start, ret;
1726 
1727 	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
1728 	if (!bind_list)
1729 		return -ENOMEM;
1730 
1731 	start = snum ? snum : sysctl_local_port_range[0];
1732 
1733 	do {
1734 		ret = idr_get_new_above(ps, bind_list, start, &port);
1735 	} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
1736 
1737 	if (ret)
1738 		goto err;
1739 
1740 	if ((snum && port != snum) ||
1741 	    (!snum && port > sysctl_local_port_range[1])) {
1742 		idr_remove(ps, port);
1743 		ret = -EADDRNOTAVAIL;
1744 		goto err;
1745 	}
1746 
1747 	bind_list->ps = ps;
1748 	bind_list->port = (unsigned short) port;
1749 	cma_bind_port(bind_list, id_priv);
1750 	return 0;
1751 err:
1752 	kfree(bind_list);
1753 	return ret;
1754 }
1755 
1756 static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
1757 {
1758 	struct rdma_id_private *cur_id;
1759 	struct sockaddr_in *sin, *cur_sin;
1760 	struct rdma_bind_list *bind_list;
1761 	struct hlist_node *node;
1762 	unsigned short snum;
1763 
1764 	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1765 	snum = ntohs(sin->sin_port);
1766 	if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
1767 		return -EACCES;
1768 
1769 	bind_list = idr_find(ps, snum);
1770 	if (!bind_list)
1771 		return cma_alloc_port(ps, id_priv, snum);
1772 
1773 	/*
1774 	 * We don't support binding to any address if anyone is bound to
1775 	 * a specific address on the same port.
1776 	 */
1777 	if (cma_any_addr(&id_priv->id.route.addr.src_addr))
1778 		return -EADDRNOTAVAIL;
1779 
1780 	hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
1781 		if (cma_any_addr(&cur_id->id.route.addr.src_addr))
1782 			return -EADDRNOTAVAIL;
1783 
1784 		cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
1785 		if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
1786 			return -EADDRINUSE;
1787 	}
1788 
1789 	cma_bind_port(bind_list, id_priv);
1790 	return 0;
1791 }
1792 
1793 static int cma_get_port(struct rdma_id_private *id_priv)
1794 {
1795 	struct idr *ps;
1796 	int ret;
1797 
1798 	switch (id_priv->id.ps) {
1799 	case RDMA_PS_SDP:
1800 		ps = &sdp_ps;
1801 		break;
1802 	case RDMA_PS_TCP:
1803 		ps = &tcp_ps;
1804 		break;
1805 	case RDMA_PS_UDP:
1806 		ps = &udp_ps;
1807 		break;
1808 	default:
1809 		return -EPROTONOSUPPORT;
1810 	}
1811 
1812 	mutex_lock(&lock);
1813 	if (cma_any_port(&id_priv->id.route.addr.src_addr))
1814 		ret = cma_alloc_port(ps, id_priv, 0);
1815 	else
1816 		ret = cma_use_port(ps, id_priv);
1817 	mutex_unlock(&lock);
1818 
1819 	return ret;
1820 }
1821 
1822 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
1823 {
1824 	struct rdma_id_private *id_priv;
1825 	int ret;
1826 
1827 	if (addr->sa_family != AF_INET)
1828 		return -EAFNOSUPPORT;
1829 
1830 	id_priv = container_of(id, struct rdma_id_private, id);
1831 	if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
1832 		return -EINVAL;
1833 
1834 	if (!cma_any_addr(addr)) {
1835 		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
1836 		if (ret)
1837 			goto err1;
1838 
1839 		mutex_lock(&lock);
1840 		ret = cma_acquire_dev(id_priv);
1841 		mutex_unlock(&lock);
1842 		if (ret)
1843 			goto err1;
1844 	}
1845 
1846 	memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
1847 	ret = cma_get_port(id_priv);
1848 	if (ret)
1849 		goto err2;
1850 
1851 	return 0;
1852 err2:
1853 	if (!cma_any_addr(addr)) {
1854 		mutex_lock(&lock);
1855 		cma_detach_from_dev(id_priv);
1856 		mutex_unlock(&lock);
1857 	}
1858 err1:
1859 	cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
1860 	return ret;
1861 }
1862 EXPORT_SYMBOL(rdma_bind_addr);
1863 
1864 static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
1865 			  struct rdma_route *route)
1866 {
1867 	struct sockaddr_in *src4, *dst4;
1868 	struct cma_hdr *cma_hdr;
1869 	struct sdp_hh *sdp_hdr;
1870 
1871 	src4 = (struct sockaddr_in *) &route->addr.src_addr;
1872 	dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
1873 
1874 	switch (ps) {
1875 	case RDMA_PS_SDP:
1876 		sdp_hdr = hdr;
1877 		if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
1878 			return -EINVAL;
1879 		sdp_set_ip_ver(sdp_hdr, 4);
1880 		sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
1881 		sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
1882 		sdp_hdr->port = src4->sin_port;
1883 		break;
1884 	default:
1885 		cma_hdr = hdr;
1886 		cma_hdr->cma_version = CMA_VERSION;
1887 		cma_set_ip_ver(cma_hdr, 4);
1888 		cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
1889 		cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
1890 		cma_hdr->port = src4->sin_port;
1891 		break;
1892 	}
1893 	return 0;
1894 }
1895 
1896 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
1897 				struct ib_cm_event *ib_event)
1898 {
1899 	struct rdma_id_private *id_priv = cm_id->context;
1900 	struct rdma_cm_event event;
1901 	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
1902 	int ret = 0;
1903 
1904 	memset(&event, 0, sizeof event);
1905 	atomic_inc(&id_priv->dev_remove);
1906 	if (!cma_comp(id_priv, CMA_CONNECT))
1907 		goto out;
1908 
1909 	switch (ib_event->event) {
1910 	case IB_CM_SIDR_REQ_ERROR:
1911 		event.event = RDMA_CM_EVENT_UNREACHABLE;
1912 		event.status = -ETIMEDOUT;
1913 		break;
1914 	case IB_CM_SIDR_REP_RECEIVED:
1915 		event.param.ud.private_data = ib_event->private_data;
1916 		event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
1917 		if (rep->status != IB_SIDR_SUCCESS) {
1918 			event.event = RDMA_CM_EVENT_UNREACHABLE;
1919 			event.status = ib_event->param.sidr_rep_rcvd.status;
1920 			break;
1921 		}
1922 		if (rep->qkey != RDMA_UD_QKEY) {
1923 			event.event = RDMA_CM_EVENT_UNREACHABLE;
1924 			event.status = -EINVAL;
1925 			break;
1926 		}
1927 		ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
1928 				     id_priv->id.route.path_rec,
1929 				     &event.param.ud.ah_attr);
1930 		event.param.ud.qp_num = rep->qpn;
1931 		event.param.ud.qkey = rep->qkey;
1932 		event.event = RDMA_CM_EVENT_ESTABLISHED;
1933 		event.status = 0;
1934 		break;
1935 	default:
1936 		printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d",
1937 		       ib_event->event);
1938 		goto out;
1939 	}
1940 
1941 	ret = id_priv->id.event_handler(&id_priv->id, &event);
1942 	if (ret) {
1943 		/* Destroy the CM ID by returning a non-zero value. */
1944 		id_priv->cm_id.ib = NULL;
1945 		cma_exch(id_priv, CMA_DESTROYING);
1946 		cma_release_remove(id_priv);
1947 		rdma_destroy_id(&id_priv->id);
1948 		return ret;
1949 	}
1950 out:
1951 	cma_release_remove(id_priv);
1952 	return ret;
1953 }
1954 
1955 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
1956 			      struct rdma_conn_param *conn_param)
1957 {
1958 	struct ib_cm_sidr_req_param req;
1959 	struct rdma_route *route;
1960 	int ret;
1961 
1962 	req.private_data_len = sizeof(struct cma_hdr) +
1963 			       conn_param->private_data_len;
1964 	req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
1965 	if (!req.private_data)
1966 		return -ENOMEM;
1967 
1968 	if (conn_param->private_data && conn_param->private_data_len)
1969 		memcpy((void *) req.private_data + sizeof(struct cma_hdr),
1970 		       conn_param->private_data, conn_param->private_data_len);
1971 
1972 	route = &id_priv->id.route;
1973 	ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route);
1974 	if (ret)
1975 		goto out;
1976 
1977 	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device,
1978 					    cma_sidr_rep_handler, id_priv);
1979 	if (IS_ERR(id_priv->cm_id.ib)) {
1980 		ret = PTR_ERR(id_priv->cm_id.ib);
1981 		goto out;
1982 	}
1983 
1984 	req.path = route->path_rec;
1985 	req.service_id = cma_get_service_id(id_priv->id.ps,
1986 					    &route->addr.dst_addr);
1987 	req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
1988 	req.max_cm_retries = CMA_MAX_CM_RETRIES;
1989 
1990 	ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
1991 	if (ret) {
1992 		ib_destroy_cm_id(id_priv->cm_id.ib);
1993 		id_priv->cm_id.ib = NULL;
1994 	}
1995 out:
1996 	kfree(req.private_data);
1997 	return ret;
1998 }
1999 
2000 static int cma_connect_ib(struct rdma_id_private *id_priv,
2001 			  struct rdma_conn_param *conn_param)
2002 {
2003 	struct ib_cm_req_param req;
2004 	struct rdma_route *route;
2005 	void *private_data;
2006 	int offset, ret;
2007 
2008 	memset(&req, 0, sizeof req);
2009 	offset = cma_user_data_offset(id_priv->id.ps);
2010 	req.private_data_len = offset + conn_param->private_data_len;
2011 	private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2012 	if (!private_data)
2013 		return -ENOMEM;
2014 
2015 	if (conn_param->private_data && conn_param->private_data_len)
2016 		memcpy(private_data + offset, conn_param->private_data,
2017 		       conn_param->private_data_len);
2018 
2019 	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
2020 					    id_priv);
2021 	if (IS_ERR(id_priv->cm_id.ib)) {
2022 		ret = PTR_ERR(id_priv->cm_id.ib);
2023 		goto out;
2024 	}
2025 
2026 	route = &id_priv->id.route;
2027 	ret = cma_format_hdr(private_data, id_priv->id.ps, route);
2028 	if (ret)
2029 		goto out;
2030 	req.private_data = private_data;
2031 
2032 	req.primary_path = &route->path_rec[0];
2033 	if (route->num_paths == 2)
2034 		req.alternate_path = &route->path_rec[1];
2035 
2036 	req.service_id = cma_get_service_id(id_priv->id.ps,
2037 					    &route->addr.dst_addr);
2038 	req.qp_num = id_priv->qp_num;
2039 	req.qp_type = IB_QPT_RC;
2040 	req.starting_psn = id_priv->seq_num;
2041 	req.responder_resources = conn_param->responder_resources;
2042 	req.initiator_depth = conn_param->initiator_depth;
2043 	req.flow_control = conn_param->flow_control;
2044 	req.retry_count = conn_param->retry_count;
2045 	req.rnr_retry_count = conn_param->rnr_retry_count;
2046 	req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2047 	req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2048 	req.max_cm_retries = CMA_MAX_CM_RETRIES;
2049 	req.srq = id_priv->srq ? 1 : 0;
2050 
2051 	ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
2052 out:
2053 	if (ret && !IS_ERR(id_priv->cm_id.ib)) {
2054 		ib_destroy_cm_id(id_priv->cm_id.ib);
2055 		id_priv->cm_id.ib = NULL;
2056 	}
2057 
2058 	kfree(private_data);
2059 	return ret;
2060 }
2061 
2062 static int cma_connect_iw(struct rdma_id_private *id_priv,
2063 			  struct rdma_conn_param *conn_param)
2064 {
2065 	struct iw_cm_id *cm_id;
2066 	struct sockaddr_in* sin;
2067 	int ret;
2068 	struct iw_cm_conn_param iw_param;
2069 
2070 	cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
2071 	if (IS_ERR(cm_id)) {
2072 		ret = PTR_ERR(cm_id);
2073 		goto out;
2074 	}
2075 
2076 	id_priv->cm_id.iw = cm_id;
2077 
2078 	sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
2079 	cm_id->local_addr = *sin;
2080 
2081 	sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
2082 	cm_id->remote_addr = *sin;
2083 
2084 	ret = cma_modify_qp_rtr(&id_priv->id);
2085 	if (ret)
2086 		goto out;
2087 
2088 	iw_param.ord = conn_param->initiator_depth;
2089 	iw_param.ird = conn_param->responder_resources;
2090 	iw_param.private_data = conn_param->private_data;
2091 	iw_param.private_data_len = conn_param->private_data_len;
2092 	if (id_priv->id.qp)
2093 		iw_param.qpn = id_priv->qp_num;
2094 	else
2095 		iw_param.qpn = conn_param->qp_num;
2096 	ret = iw_cm_connect(cm_id, &iw_param);
2097 out:
2098 	if (ret && !IS_ERR(cm_id)) {
2099 		iw_destroy_cm_id(cm_id);
2100 		id_priv->cm_id.iw = NULL;
2101 	}
2102 	return ret;
2103 }
2104 
2105 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2106 {
2107 	struct rdma_id_private *id_priv;
2108 	int ret;
2109 
2110 	id_priv = container_of(id, struct rdma_id_private, id);
2111 	if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
2112 		return -EINVAL;
2113 
2114 	if (!id->qp) {
2115 		id_priv->qp_num = conn_param->qp_num;
2116 		id_priv->srq = conn_param->srq;
2117 	}
2118 
2119 	switch (rdma_node_get_transport(id->device->node_type)) {
2120 	case RDMA_TRANSPORT_IB:
2121 		if (id->ps == RDMA_PS_UDP)
2122 			ret = cma_resolve_ib_udp(id_priv, conn_param);
2123 		else
2124 			ret = cma_connect_ib(id_priv, conn_param);
2125 		break;
2126 	case RDMA_TRANSPORT_IWARP:
2127 		ret = cma_connect_iw(id_priv, conn_param);
2128 		break;
2129 	default:
2130 		ret = -ENOSYS;
2131 		break;
2132 	}
2133 	if (ret)
2134 		goto err;
2135 
2136 	return 0;
2137 err:
2138 	cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
2139 	return ret;
2140 }
2141 EXPORT_SYMBOL(rdma_connect);
2142 
2143 static int cma_accept_ib(struct rdma_id_private *id_priv,
2144 			 struct rdma_conn_param *conn_param)
2145 {
2146 	struct ib_cm_rep_param rep;
2147 	struct ib_qp_attr qp_attr;
2148 	int qp_attr_mask, ret;
2149 
2150 	if (id_priv->id.qp) {
2151 		ret = cma_modify_qp_rtr(&id_priv->id);
2152 		if (ret)
2153 			goto out;
2154 
2155 		qp_attr.qp_state = IB_QPS_RTS;
2156 		ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, &qp_attr,
2157 					 &qp_attr_mask);
2158 		if (ret)
2159 			goto out;
2160 
2161 		qp_attr.max_rd_atomic = conn_param->initiator_depth;
2162 		ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
2163 		if (ret)
2164 			goto out;
2165 	}
2166 
2167 	memset(&rep, 0, sizeof rep);
2168 	rep.qp_num = id_priv->qp_num;
2169 	rep.starting_psn = id_priv->seq_num;
2170 	rep.private_data = conn_param->private_data;
2171 	rep.private_data_len = conn_param->private_data_len;
2172 	rep.responder_resources = conn_param->responder_resources;
2173 	rep.initiator_depth = conn_param->initiator_depth;
2174 	rep.target_ack_delay = CMA_CM_RESPONSE_TIMEOUT;
2175 	rep.failover_accepted = 0;
2176 	rep.flow_control = conn_param->flow_control;
2177 	rep.rnr_retry_count = conn_param->rnr_retry_count;
2178 	rep.srq = id_priv->srq ? 1 : 0;
2179 
2180 	ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
2181 out:
2182 	return ret;
2183 }
2184 
2185 static int cma_accept_iw(struct rdma_id_private *id_priv,
2186 		  struct rdma_conn_param *conn_param)
2187 {
2188 	struct iw_cm_conn_param iw_param;
2189 	int ret;
2190 
2191 	ret = cma_modify_qp_rtr(&id_priv->id);
2192 	if (ret)
2193 		return ret;
2194 
2195 	iw_param.ord = conn_param->initiator_depth;
2196 	iw_param.ird = conn_param->responder_resources;
2197 	iw_param.private_data = conn_param->private_data;
2198 	iw_param.private_data_len = conn_param->private_data_len;
2199 	if (id_priv->id.qp) {
2200 		iw_param.qpn = id_priv->qp_num;
2201 	} else
2202 		iw_param.qpn = conn_param->qp_num;
2203 
2204 	return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
2205 }
2206 
2207 static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
2208 			     enum ib_cm_sidr_status status,
2209 			     const void *private_data, int private_data_len)
2210 {
2211 	struct ib_cm_sidr_rep_param rep;
2212 
2213 	memset(&rep, 0, sizeof rep);
2214 	rep.status = status;
2215 	if (status == IB_SIDR_SUCCESS) {
2216 		rep.qp_num = id_priv->qp_num;
2217 		rep.qkey = RDMA_UD_QKEY;
2218 	}
2219 	rep.private_data = private_data;
2220 	rep.private_data_len = private_data_len;
2221 
2222 	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
2223 }
2224 
2225 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2226 {
2227 	struct rdma_id_private *id_priv;
2228 	int ret;
2229 
2230 	id_priv = container_of(id, struct rdma_id_private, id);
2231 	if (!cma_comp(id_priv, CMA_CONNECT))
2232 		return -EINVAL;
2233 
2234 	if (!id->qp && conn_param) {
2235 		id_priv->qp_num = conn_param->qp_num;
2236 		id_priv->srq = conn_param->srq;
2237 	}
2238 
2239 	switch (rdma_node_get_transport(id->device->node_type)) {
2240 	case RDMA_TRANSPORT_IB:
2241 		if (id->ps == RDMA_PS_UDP)
2242 			ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2243 						conn_param->private_data,
2244 						conn_param->private_data_len);
2245 		else if (conn_param)
2246 			ret = cma_accept_ib(id_priv, conn_param);
2247 		else
2248 			ret = cma_rep_recv(id_priv);
2249 		break;
2250 	case RDMA_TRANSPORT_IWARP:
2251 		ret = cma_accept_iw(id_priv, conn_param);
2252 		break;
2253 	default:
2254 		ret = -ENOSYS;
2255 		break;
2256 	}
2257 
2258 	if (ret)
2259 		goto reject;
2260 
2261 	return 0;
2262 reject:
2263 	cma_modify_qp_err(id);
2264 	rdma_reject(id, NULL, 0);
2265 	return ret;
2266 }
2267 EXPORT_SYMBOL(rdma_accept);
2268 
2269 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
2270 {
2271 	struct rdma_id_private *id_priv;
2272 	int ret;
2273 
2274 	id_priv = container_of(id, struct rdma_id_private, id);
2275 	if (!cma_comp(id_priv, CMA_CONNECT))
2276 		return -EINVAL;
2277 
2278 	switch (id->device->node_type) {
2279 	case RDMA_NODE_IB_CA:
2280 		ret = ib_cm_notify(id_priv->cm_id.ib, event);
2281 		break;
2282 	default:
2283 		ret = 0;
2284 		break;
2285 	}
2286 	return ret;
2287 }
2288 EXPORT_SYMBOL(rdma_notify);
2289 
2290 int rdma_reject(struct rdma_cm_id *id, const void *private_data,
2291 		u8 private_data_len)
2292 {
2293 	struct rdma_id_private *id_priv;
2294 	int ret;
2295 
2296 	id_priv = container_of(id, struct rdma_id_private, id);
2297 	if (!cma_comp(id_priv, CMA_CONNECT))
2298 		return -EINVAL;
2299 
2300 	switch (rdma_node_get_transport(id->device->node_type)) {
2301 	case RDMA_TRANSPORT_IB:
2302 		if (id->ps == RDMA_PS_UDP)
2303 			ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
2304 						private_data, private_data_len);
2305 		else
2306 			ret = ib_send_cm_rej(id_priv->cm_id.ib,
2307 					     IB_CM_REJ_CONSUMER_DEFINED, NULL,
2308 					     0, private_data, private_data_len);
2309 		break;
2310 	case RDMA_TRANSPORT_IWARP:
2311 		ret = iw_cm_reject(id_priv->cm_id.iw,
2312 				   private_data, private_data_len);
2313 		break;
2314 	default:
2315 		ret = -ENOSYS;
2316 		break;
2317 	}
2318 	return ret;
2319 }
2320 EXPORT_SYMBOL(rdma_reject);
2321 
2322 int rdma_disconnect(struct rdma_cm_id *id)
2323 {
2324 	struct rdma_id_private *id_priv;
2325 	int ret;
2326 
2327 	id_priv = container_of(id, struct rdma_id_private, id);
2328 	if (!cma_comp(id_priv, CMA_CONNECT) &&
2329 	    !cma_comp(id_priv, CMA_DISCONNECT))
2330 		return -EINVAL;
2331 
2332 	switch (rdma_node_get_transport(id->device->node_type)) {
2333 	case RDMA_TRANSPORT_IB:
2334 		ret = cma_modify_qp_err(id);
2335 		if (ret)
2336 			goto out;
2337 		/* Initiate or respond to a disconnect. */
2338 		if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
2339 			ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
2340 		break;
2341 	case RDMA_TRANSPORT_IWARP:
2342 		ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
2343 		break;
2344 	default:
2345 		ret = -EINVAL;
2346 		break;
2347 	}
2348 out:
2349 	return ret;
2350 }
2351 EXPORT_SYMBOL(rdma_disconnect);
2352 
2353 static void cma_add_one(struct ib_device *device)
2354 {
2355 	struct cma_device *cma_dev;
2356 	struct rdma_id_private *id_priv;
2357 
2358 	cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
2359 	if (!cma_dev)
2360 		return;
2361 
2362 	cma_dev->device = device;
2363 	cma_dev->node_guid = device->node_guid;
2364 
2365 	init_completion(&cma_dev->comp);
2366 	atomic_set(&cma_dev->refcount, 1);
2367 	INIT_LIST_HEAD(&cma_dev->id_list);
2368 	ib_set_client_data(device, &cma_client, cma_dev);
2369 
2370 	mutex_lock(&lock);
2371 	list_add_tail(&cma_dev->list, &dev_list);
2372 	list_for_each_entry(id_priv, &listen_any_list, list)
2373 		cma_listen_on_dev(id_priv, cma_dev);
2374 	mutex_unlock(&lock);
2375 }
2376 
2377 static int cma_remove_id_dev(struct rdma_id_private *id_priv)
2378 {
2379 	struct rdma_cm_event event;
2380 	enum cma_state state;
2381 
2382 	/* Record that we want to remove the device */
2383 	state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
2384 	if (state == CMA_DESTROYING)
2385 		return 0;
2386 
2387 	cma_cancel_operation(id_priv, state);
2388 	wait_event(id_priv->wait_remove, !atomic_read(&id_priv->dev_remove));
2389 
2390 	/* Check for destruction from another callback. */
2391 	if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
2392 		return 0;
2393 
2394 	memset(&event, 0, sizeof event);
2395 	event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
2396 	return id_priv->id.event_handler(&id_priv->id, &event);
2397 }
2398 
2399 static void cma_process_remove(struct cma_device *cma_dev)
2400 {
2401 	struct rdma_id_private *id_priv;
2402 	int ret;
2403 
2404 	mutex_lock(&lock);
2405 	while (!list_empty(&cma_dev->id_list)) {
2406 		id_priv = list_entry(cma_dev->id_list.next,
2407 				     struct rdma_id_private, list);
2408 
2409 		if (cma_internal_listen(id_priv)) {
2410 			cma_destroy_listen(id_priv);
2411 			continue;
2412 		}
2413 
2414 		list_del_init(&id_priv->list);
2415 		atomic_inc(&id_priv->refcount);
2416 		mutex_unlock(&lock);
2417 
2418 		ret = cma_remove_id_dev(id_priv);
2419 		cma_deref_id(id_priv);
2420 		if (ret)
2421 			rdma_destroy_id(&id_priv->id);
2422 
2423 		mutex_lock(&lock);
2424 	}
2425 	mutex_unlock(&lock);
2426 
2427 	cma_deref_dev(cma_dev);
2428 	wait_for_completion(&cma_dev->comp);
2429 }
2430 
2431 static void cma_remove_one(struct ib_device *device)
2432 {
2433 	struct cma_device *cma_dev;
2434 
2435 	cma_dev = ib_get_client_data(device, &cma_client);
2436 	if (!cma_dev)
2437 		return;
2438 
2439 	mutex_lock(&lock);
2440 	list_del(&cma_dev->list);
2441 	mutex_unlock(&lock);
2442 
2443 	cma_process_remove(cma_dev);
2444 	kfree(cma_dev);
2445 }
2446 
2447 static int cma_init(void)
2448 {
2449 	int ret;
2450 
2451 	cma_wq = create_singlethread_workqueue("rdma_cm_wq");
2452 	if (!cma_wq)
2453 		return -ENOMEM;
2454 
2455 	ib_sa_register_client(&sa_client);
2456 	rdma_addr_register_client(&addr_client);
2457 
2458 	ret = ib_register_client(&cma_client);
2459 	if (ret)
2460 		goto err;
2461 	return 0;
2462 
2463 err:
2464 	rdma_addr_unregister_client(&addr_client);
2465 	ib_sa_unregister_client(&sa_client);
2466 	destroy_workqueue(cma_wq);
2467 	return ret;
2468 }
2469 
2470 static void cma_cleanup(void)
2471 {
2472 	ib_unregister_client(&cma_client);
2473 	rdma_addr_unregister_client(&addr_client);
2474 	ib_sa_unregister_client(&sa_client);
2475 	destroy_workqueue(cma_wq);
2476 	idr_destroy(&sdp_ps);
2477 	idr_destroy(&tcp_ps);
2478 	idr_destroy(&udp_ps);
2479 }
2480 
2481 module_init(cma_init);
2482 module_exit(cma_cleanup);
2483