1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3 *
4 * Copyright (c) 2005 Voltaire Inc. All rights reserved.
5 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
6 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
7 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
8 *
9 * This software is available to you under a choice of one of two
10 * licenses. You may choose to be licensed under the terms of the GNU
11 * General Public License (GPL) Version 2, available from the file
12 * COPYING in the main directory of this source tree, or the
13 * OpenIB.org BSD license below:
14 *
15 * Redistribution and use in source and binary forms, with or
16 * without modification, are permitted provided that the following
17 * conditions are met:
18 *
19 * - Redistributions of source code must retain the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer.
22 *
23 * - Redistributions in binary form must reproduce the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer in the documentation and/or other materials
26 * provided with the distribution.
27 *
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 * SOFTWARE.
36 */
37
38 #include <sys/cdefs.h>
39 #define LINUXKPI_PARAM_PREFIX ibcore_
40
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43
44 #include <linux/completion.h>
45 #include <linux/in.h>
46 #include <linux/in6.h>
47 #include <linux/mutex.h>
48 #include <linux/random.h>
49 #include <linux/idr.h>
50 #include <linux/slab.h>
51 #include <linux/module.h>
52 #include <net/route.h>
53 #include <net/route/nhop.h>
54
55 #include <net/tcp.h>
56 #include <net/ipv6.h>
57
58 #include <netinet/in_fib.h>
59
60 #include <netinet6/in6_fib.h>
61 #include <netinet6/scope6_var.h>
62 #include <netinet6/ip6_var.h>
63
64 #include <rdma/rdma_cm.h>
65 #include <rdma/rdma_cm_ib.h>
66 #include <rdma/rdma_sdp.h>
67 #include <rdma/ib.h>
68 #include <rdma/ib_addr.h>
69 #include <rdma/ib_cache.h>
70 #include <rdma/ib_cm.h>
71 #include <rdma/ib_sa.h>
72 #include <rdma/iw_cm.h>
73
74 #include <sys/priv.h>
75
76 #include "core_priv.h"
77 #include "cma_priv.h"
78
79 MODULE_AUTHOR("Sean Hefty");
80 MODULE_DESCRIPTION("Generic RDMA CM Agent");
81 MODULE_LICENSE("Dual BSD/GPL");
82
83 #define CMA_CM_RESPONSE_TIMEOUT 20
84 #define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000
85 #define CMA_MAX_CM_RETRIES 15
86 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
87 #define CMA_IBOE_PACKET_LIFETIME 18
88
89 static const char * const cma_events[] = {
90 [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved",
91 [RDMA_CM_EVENT_ADDR_ERROR] = "address error",
92 [RDMA_CM_EVENT_ROUTE_RESOLVED] = "route resolved ",
93 [RDMA_CM_EVENT_ROUTE_ERROR] = "route error",
94 [RDMA_CM_EVENT_CONNECT_REQUEST] = "connect request",
95 [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response",
96 [RDMA_CM_EVENT_CONNECT_ERROR] = "connect error",
97 [RDMA_CM_EVENT_UNREACHABLE] = "unreachable",
98 [RDMA_CM_EVENT_REJECTED] = "rejected",
99 [RDMA_CM_EVENT_ESTABLISHED] = "established",
100 [RDMA_CM_EVENT_DISCONNECTED] = "disconnected",
101 [RDMA_CM_EVENT_DEVICE_REMOVAL] = "device removal",
102 [RDMA_CM_EVENT_MULTICAST_JOIN] = "multicast join",
103 [RDMA_CM_EVENT_MULTICAST_ERROR] = "multicast error",
104 [RDMA_CM_EVENT_ADDR_CHANGE] = "address change",
105 [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit",
106 };
107
rdma_event_msg(enum rdma_cm_event_type event)108 const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
109 {
110 size_t index = event;
111
112 return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ?
113 cma_events[index] : "unrecognized event";
114 }
115 EXPORT_SYMBOL(rdma_event_msg);
116
rdma_reject_msg(struct rdma_cm_id * id,int reason)117 const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
118 int reason)
119 {
120 if (rdma_ib_or_roce(id->device, id->port_num))
121 return ibcm_reject_msg(reason);
122
123 if (rdma_protocol_iwarp(id->device, id->port_num))
124 return iwcm_reject_msg(reason);
125
126 WARN_ON_ONCE(1);
127 return "unrecognized transport";
128 }
129 EXPORT_SYMBOL(rdma_reject_msg);
130
rdma_is_consumer_reject(struct rdma_cm_id * id,int reason)131 bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
132 {
133 if (rdma_ib_or_roce(id->device, id->port_num))
134 return reason == IB_CM_REJ_CONSUMER_DEFINED;
135
136 if (rdma_protocol_iwarp(id->device, id->port_num))
137 return reason == -ECONNREFUSED;
138
139 WARN_ON_ONCE(1);
140 return false;
141 }
142 EXPORT_SYMBOL(rdma_is_consumer_reject);
143
rdma_consumer_reject_data(struct rdma_cm_id * id,struct rdma_cm_event * ev,u8 * data_len)144 const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
145 struct rdma_cm_event *ev, u8 *data_len)
146 {
147 const void *p;
148
149 if (rdma_is_consumer_reject(id, ev->status)) {
150 *data_len = ev->param.conn.private_data_len;
151 p = ev->param.conn.private_data;
152 } else {
153 *data_len = 0;
154 p = NULL;
155 }
156 return p;
157 }
158 EXPORT_SYMBOL(rdma_consumer_reject_data);
159
160 static int cma_check_linklocal(struct rdma_dev_addr *, const struct sockaddr *);
161 static void cma_add_one(struct ib_device *device);
162 static void cma_remove_one(struct ib_device *device, void *client_data);
163 static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id);
164
165 static struct ib_client cma_client = {
166 .name = "cma",
167 .add = cma_add_one,
168 .remove = cma_remove_one
169 };
170
171 static struct ib_sa_client sa_client;
172 static LIST_HEAD(dev_list);
173 static LIST_HEAD(listen_any_list);
174 static DEFINE_MUTEX(lock);
175 static struct workqueue_struct *cma_wq;
176
177 struct cma_pernet {
178 struct idr tcp_ps;
179 struct idr udp_ps;
180 struct idr ipoib_ps;
181 struct idr ib_ps;
182 struct idr sdp_ps;
183 };
184
185 VNET_DEFINE(struct cma_pernet, cma_pernet);
186
cma_pernet_ptr(struct vnet * vnet)187 static struct cma_pernet *cma_pernet_ptr(struct vnet *vnet)
188 {
189 struct cma_pernet *retval;
190
191 CURVNET_SET_QUIET(vnet);
192 retval = &VNET(cma_pernet);
193 CURVNET_RESTORE();
194
195 return (retval);
196 }
197
cma_pernet_idr(struct vnet * net,enum rdma_port_space ps)198 static struct idr *cma_pernet_idr(struct vnet *net, enum rdma_port_space ps)
199 {
200 struct cma_pernet *pernet = cma_pernet_ptr(net);
201
202 switch (ps) {
203 case RDMA_PS_TCP:
204 return &pernet->tcp_ps;
205 case RDMA_PS_UDP:
206 return &pernet->udp_ps;
207 case RDMA_PS_IPOIB:
208 return &pernet->ipoib_ps;
209 case RDMA_PS_IB:
210 return &pernet->ib_ps;
211 case RDMA_PS_SDP:
212 return &pernet->sdp_ps;
213 default:
214 return NULL;
215 }
216 }
217
218 struct cma_device {
219 struct list_head list;
220 struct ib_device *device;
221 struct completion comp;
222 atomic_t refcount;
223 struct list_head id_list;
224 struct sysctl_ctx_list sysctl_ctx;
225 enum ib_gid_type *default_gid_type;
226 u8 *default_roce_tos;
227 };
228
229 struct rdma_bind_list {
230 enum rdma_port_space ps;
231 struct hlist_head owners;
232 unsigned short port;
233 };
234
235 struct class_port_info_context {
236 struct ib_class_port_info *class_port_info;
237 struct ib_device *device;
238 struct completion done;
239 struct ib_sa_query *sa_query;
240 u8 port_num;
241 };
242
cma_ps_alloc(struct vnet * vnet,enum rdma_port_space ps,struct rdma_bind_list * bind_list,int snum)243 static int cma_ps_alloc(struct vnet *vnet, enum rdma_port_space ps,
244 struct rdma_bind_list *bind_list, int snum)
245 {
246 struct idr *idr = cma_pernet_idr(vnet, ps);
247
248 return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL);
249 }
250
cma_ps_find(struct vnet * net,enum rdma_port_space ps,int snum)251 static struct rdma_bind_list *cma_ps_find(struct vnet *net,
252 enum rdma_port_space ps, int snum)
253 {
254 struct idr *idr = cma_pernet_idr(net, ps);
255
256 return idr_find(idr, snum);
257 }
258
cma_ps_remove(struct vnet * net,enum rdma_port_space ps,int snum)259 static void cma_ps_remove(struct vnet *net, enum rdma_port_space ps, int snum)
260 {
261 struct idr *idr = cma_pernet_idr(net, ps);
262
263 idr_remove(idr, snum);
264 }
265
266 enum {
267 CMA_OPTION_AFONLY,
268 };
269
cma_ref_dev(struct cma_device * cma_dev)270 void cma_ref_dev(struct cma_device *cma_dev)
271 {
272 atomic_inc(&cma_dev->refcount);
273 }
274
cma_enum_devices_by_ibdev(cma_device_filter filter,void * cookie)275 struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
276 void *cookie)
277 {
278 struct cma_device *cma_dev;
279 struct cma_device *found_cma_dev = NULL;
280
281 mutex_lock(&lock);
282
283 list_for_each_entry(cma_dev, &dev_list, list)
284 if (filter(cma_dev->device, cookie)) {
285 found_cma_dev = cma_dev;
286 break;
287 }
288
289 if (found_cma_dev)
290 cma_ref_dev(found_cma_dev);
291 mutex_unlock(&lock);
292 return found_cma_dev;
293 }
294
cma_get_default_gid_type(struct cma_device * cma_dev,unsigned int port)295 int cma_get_default_gid_type(struct cma_device *cma_dev,
296 unsigned int port)
297 {
298 if (!rdma_is_port_valid(cma_dev->device, port))
299 return -EINVAL;
300
301 return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)];
302 }
303
cma_set_default_gid_type(struct cma_device * cma_dev,unsigned int port,enum ib_gid_type default_gid_type)304 int cma_set_default_gid_type(struct cma_device *cma_dev,
305 unsigned int port,
306 enum ib_gid_type default_gid_type)
307 {
308 unsigned long supported_gids;
309
310 if (!rdma_is_port_valid(cma_dev->device, port))
311 return -EINVAL;
312
313 supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
314
315 if (!(supported_gids & 1 << default_gid_type))
316 return -EINVAL;
317
318 cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] =
319 default_gid_type;
320
321 return 0;
322 }
323
cma_get_default_roce_tos(struct cma_device * cma_dev,unsigned int port)324 int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port)
325 {
326 if (!rdma_is_port_valid(cma_dev->device, port))
327 return -EINVAL;
328
329 return cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)];
330 }
331
cma_set_default_roce_tos(struct cma_device * cma_dev,unsigned int port,u8 default_roce_tos)332 int cma_set_default_roce_tos(struct cma_device *cma_dev, unsigned int port,
333 u8 default_roce_tos)
334 {
335 if (!rdma_is_port_valid(cma_dev->device, port))
336 return -EINVAL;
337
338 cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)] =
339 default_roce_tos;
340
341 return 0;
342 }
cma_get_ib_dev(struct cma_device * cma_dev)343 struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
344 {
345 return cma_dev->device;
346 }
347
348 /*
349 * Device removal can occur at anytime, so we need extra handling to
350 * serialize notifying the user of device removal with other callbacks.
351 * We do this by disabling removal notification while a callback is in process,
352 * and reporting it after the callback completes.
353 */
354 struct cma_multicast {
355 struct rdma_id_private *id_priv;
356 union {
357 struct ib_sa_multicast *ib;
358 } multicast;
359 struct list_head list;
360 void *context;
361 struct sockaddr_storage addr;
362 struct kref mcref;
363 u8 join_state;
364 };
365
366 struct cma_work {
367 struct work_struct work;
368 struct rdma_id_private *id;
369 enum rdma_cm_state old_state;
370 enum rdma_cm_state new_state;
371 struct rdma_cm_event event;
372 };
373
374 struct cma_ndev_work {
375 struct work_struct work;
376 struct rdma_id_private *id;
377 struct rdma_cm_event event;
378 };
379
380 struct iboe_mcast_work {
381 struct work_struct work;
382 struct rdma_id_private *id;
383 struct cma_multicast *mc;
384 };
385
386 struct cma_hdr {
387 u8 cma_version;
388 u8 ip_version; /* IP version: 7:4 */
389 __be16 port;
390 union cma_ip_addr src_addr;
391 union cma_ip_addr dst_addr;
392 };
393
394 #define CMA_VERSION 0x00
395 #define SDP_MAJ_VERSION 0x2
396
397 struct cma_req_info {
398 struct ib_device *device;
399 union ib_gid local_gid;
400 __be64 service_id;
401 int port;
402 bool has_gid;
403 u16 pkey;
404 };
405
cma_comp(struct rdma_id_private * id_priv,enum rdma_cm_state comp)406 static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
407 {
408 unsigned long flags;
409 int ret;
410
411 spin_lock_irqsave(&id_priv->lock, flags);
412 ret = (id_priv->state == comp);
413 spin_unlock_irqrestore(&id_priv->lock, flags);
414 return ret;
415 }
416
cma_comp_exch(struct rdma_id_private * id_priv,enum rdma_cm_state comp,enum rdma_cm_state exch)417 static int cma_comp_exch(struct rdma_id_private *id_priv,
418 enum rdma_cm_state comp, enum rdma_cm_state exch)
419 {
420 unsigned long flags;
421 int ret;
422
423 spin_lock_irqsave(&id_priv->lock, flags);
424 if ((ret = (id_priv->state == comp)))
425 id_priv->state = exch;
426 spin_unlock_irqrestore(&id_priv->lock, flags);
427 return ret;
428 }
429
cma_exch(struct rdma_id_private * id_priv,enum rdma_cm_state exch)430 static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
431 enum rdma_cm_state exch)
432 {
433 unsigned long flags;
434 enum rdma_cm_state old;
435
436 spin_lock_irqsave(&id_priv->lock, flags);
437 old = id_priv->state;
438 id_priv->state = exch;
439 spin_unlock_irqrestore(&id_priv->lock, flags);
440 return old;
441 }
442
cma_get_ip_ver(const struct cma_hdr * hdr)443 static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
444 {
445 return hdr->ip_version >> 4;
446 }
447
cma_set_ip_ver(struct cma_hdr * hdr,u8 ip_ver)448 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
449 {
450 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
451 }
452
sdp_get_majv(u8 sdp_version)453 static inline u8 sdp_get_majv(u8 sdp_version)
454 {
455 return sdp_version >> 4;
456 }
457
sdp_get_ip_ver(const struct sdp_hh * hh)458 static inline u8 sdp_get_ip_ver(const struct sdp_hh *hh)
459 {
460 return hh->ipv_cap >> 4;
461 }
462
sdp_set_ip_ver(struct sdp_hh * hh,u8 ip_ver)463 static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
464 {
465 hh->ipv_cap = (ip_ver << 4) | (hh->ipv_cap & 0xF);
466 }
467
cma_igmp_send(if_t ndev,const union ib_gid * mgid,bool join)468 static int cma_igmp_send(if_t ndev, const union ib_gid *mgid, bool join)
469 {
470 int retval;
471
472 if (ndev) {
473 union rdma_sockaddr addr;
474
475 rdma_gid2ip(&addr._sockaddr, mgid);
476
477 CURVNET_SET_QUIET(if_getvnet(ndev));
478 if (join)
479 retval = -if_addmulti(ndev, &addr._sockaddr, NULL);
480 else
481 retval = -if_delmulti(ndev, &addr._sockaddr);
482 CURVNET_RESTORE();
483 } else {
484 retval = -ENODEV;
485 }
486 return retval;
487 }
488
_cma_attach_to_dev(struct rdma_id_private * id_priv,struct cma_device * cma_dev)489 static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
490 struct cma_device *cma_dev)
491 {
492 cma_ref_dev(cma_dev);
493 id_priv->cma_dev = cma_dev;
494 id_priv->id.device = cma_dev->device;
495 id_priv->id.route.addr.dev_addr.transport =
496 rdma_node_get_transport(cma_dev->device->node_type);
497 list_add_tail(&id_priv->list, &cma_dev->id_list);
498 }
499
cma_attach_to_dev(struct rdma_id_private * id_priv,struct cma_device * cma_dev)500 static void cma_attach_to_dev(struct rdma_id_private *id_priv,
501 struct cma_device *cma_dev)
502 {
503 _cma_attach_to_dev(id_priv, cma_dev);
504 id_priv->gid_type =
505 cma_dev->default_gid_type[id_priv->id.port_num -
506 rdma_start_port(cma_dev->device)];
507 }
508
cma_deref_dev(struct cma_device * cma_dev)509 void cma_deref_dev(struct cma_device *cma_dev)
510 {
511 if (atomic_dec_and_test(&cma_dev->refcount))
512 complete(&cma_dev->comp);
513 }
514
release_mc(struct kref * kref)515 static inline void release_mc(struct kref *kref)
516 {
517 struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
518
519 kfree(mc->multicast.ib);
520 kfree(mc);
521 }
522
cma_release_dev(struct rdma_id_private * id_priv)523 static void cma_release_dev(struct rdma_id_private *id_priv)
524 {
525 mutex_lock(&lock);
526 list_del(&id_priv->list);
527 cma_deref_dev(id_priv->cma_dev);
528 id_priv->cma_dev = NULL;
529 mutex_unlock(&lock);
530 }
531
cma_src_addr(struct rdma_id_private * id_priv)532 static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
533 {
534 return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
535 }
536
cma_dst_addr(struct rdma_id_private * id_priv)537 static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
538 {
539 return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
540 }
541
cma_family(struct rdma_id_private * id_priv)542 static inline unsigned short cma_family(struct rdma_id_private *id_priv)
543 {
544 return id_priv->id.route.addr.src_addr.ss_family;
545 }
546
cma_set_qkey(struct rdma_id_private * id_priv,u32 qkey)547 static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
548 {
549 struct ib_sa_mcmember_rec rec;
550 int ret = 0;
551
552 if (id_priv->qkey) {
553 if (qkey && id_priv->qkey != qkey)
554 return -EINVAL;
555 return 0;
556 }
557
558 if (qkey) {
559 id_priv->qkey = qkey;
560 return 0;
561 }
562
563 switch (id_priv->id.ps) {
564 case RDMA_PS_UDP:
565 case RDMA_PS_IB:
566 id_priv->qkey = RDMA_UDP_QKEY;
567 break;
568 case RDMA_PS_IPOIB:
569 ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid);
570 ret = ib_sa_get_mcmember_rec(id_priv->id.device,
571 id_priv->id.port_num, &rec.mgid,
572 &rec);
573 if (!ret)
574 id_priv->qkey = be32_to_cpu(rec.qkey);
575 break;
576 default:
577 break;
578 }
579 return ret;
580 }
581
cma_translate_ib(struct sockaddr_ib * sib,struct rdma_dev_addr * dev_addr)582 static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
583 {
584 dev_addr->dev_type = ARPHRD_INFINIBAND;
585 rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr);
586 ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey));
587 }
588
cma_translate_addr(struct sockaddr * addr,struct rdma_dev_addr * dev_addr)589 static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
590 {
591 int ret;
592
593 if (addr->sa_family != AF_IB) {
594 ret = rdma_translate_ip(addr, dev_addr);
595 } else {
596 cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
597 ret = 0;
598 }
599
600 return ret;
601 }
602
603 static const struct ib_gid_attr *
cma_validate_port(struct ib_device * device,u8 port,enum ib_gid_type gid_type,union ib_gid * gid,struct rdma_id_private * id_priv)604 cma_validate_port(struct ib_device *device, u8 port,
605 enum ib_gid_type gid_type,
606 union ib_gid *gid,
607 struct rdma_id_private *id_priv)
608 {
609 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
610 int bound_if_index = dev_addr->bound_dev_if;
611 const struct ib_gid_attr *sgid_attr;
612 int dev_type = dev_addr->dev_type;
613 if_t ndev = NULL;
614
615 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
616 return ERR_PTR(-ENODEV);
617
618 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
619 return ERR_PTR(-ENODEV);
620
621 if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
622 ndev = dev_get_by_index(dev_addr->net, bound_if_index);
623 if (!ndev)
624 return ERR_PTR(-ENODEV);
625 } else {
626 gid_type = IB_GID_TYPE_IB;
627 }
628
629 sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev);
630 if (ndev)
631 dev_put(ndev);
632 return sgid_attr;
633 }
634
cma_bind_sgid_attr(struct rdma_id_private * id_priv,const struct ib_gid_attr * sgid_attr)635 static void cma_bind_sgid_attr(struct rdma_id_private *id_priv,
636 const struct ib_gid_attr *sgid_attr)
637 {
638 WARN_ON(id_priv->id.route.addr.dev_addr.sgid_attr);
639 id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr;
640 }
641
cma_acquire_dev(struct rdma_id_private * id_priv,const struct rdma_id_private * listen_id_priv)642 static int cma_acquire_dev(struct rdma_id_private *id_priv,
643 const struct rdma_id_private *listen_id_priv)
644 {
645 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
646 const struct ib_gid_attr *sgid_attr;
647 struct cma_device *cma_dev;
648 union ib_gid gid, iboe_gid, *gidp;
649 enum ib_gid_type gid_type;
650 int ret = -ENODEV;
651 u8 port;
652
653 if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
654 id_priv->id.ps == RDMA_PS_IPOIB)
655 return -EINVAL;
656
657 mutex_lock(&lock);
658 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
659 &iboe_gid);
660
661 memcpy(&gid, dev_addr->src_dev_addr +
662 rdma_addr_gid_offset(dev_addr), sizeof gid);
663
664 if (listen_id_priv) {
665 cma_dev = listen_id_priv->cma_dev;
666 port = listen_id_priv->id.port_num;
667
668 if (!rdma_is_port_valid(cma_dev->device, port))
669 goto skip_listen_id;
670
671 gidp = rdma_protocol_roce(cma_dev->device, port) ?
672 &iboe_gid : &gid;
673 gid_type = listen_id_priv->gid_type;
674 sgid_attr = cma_validate_port(cma_dev->device, port,
675 gid_type, gidp, id_priv);
676 if (!IS_ERR(sgid_attr)) {
677 id_priv->id.port_num = port;
678 cma_bind_sgid_attr(id_priv, sgid_attr);
679 ret = 0;
680 goto out;
681 }
682 }
683
684 skip_listen_id:
685 list_for_each_entry(cma_dev, &dev_list, list) {
686 for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
687 if (listen_id_priv &&
688 listen_id_priv->cma_dev == cma_dev &&
689 listen_id_priv->id.port_num == port)
690 continue;
691
692 gidp = rdma_protocol_roce(cma_dev->device, port) ?
693 &iboe_gid : &gid;
694 gid_type = cma_dev->default_gid_type[port - 1];
695 sgid_attr = cma_validate_port(cma_dev->device, port,
696 gid_type, gidp, id_priv);
697 if (!IS_ERR(sgid_attr)) {
698 id_priv->id.port_num = port;
699 cma_bind_sgid_attr(id_priv, sgid_attr);
700 ret = 0;
701 goto out;
702 }
703 }
704 }
705
706 out:
707 if (!ret)
708 cma_attach_to_dev(id_priv, cma_dev);
709
710 mutex_unlock(&lock);
711 return ret;
712 }
713
714 /*
715 * Select the source IB device and address to reach the destination IB address.
716 */
cma_resolve_ib_dev(struct rdma_id_private * id_priv)717 static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
718 {
719 struct cma_device *cma_dev, *cur_dev;
720 struct sockaddr_ib *addr;
721 union ib_gid gid, sgid, *dgid;
722 u16 pkey, index;
723 u8 p;
724 enum ib_port_state port_state;
725 int i;
726
727 cma_dev = NULL;
728 addr = (struct sockaddr_ib *) cma_dst_addr(id_priv);
729 dgid = (union ib_gid *) &addr->sib_addr;
730 pkey = ntohs(addr->sib_pkey);
731
732 mutex_lock(&lock);
733 list_for_each_entry(cur_dev, &dev_list, list) {
734 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
735 if (!rdma_cap_af_ib(cur_dev->device, p))
736 continue;
737
738 if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
739 continue;
740
741 if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
742 continue;
743 for (i = 0; !rdma_query_gid(cur_dev->device,
744 p, i, &gid);
745 i++) {
746 if (!memcmp(&gid, dgid, sizeof(gid))) {
747 cma_dev = cur_dev;
748 sgid = gid;
749 id_priv->id.port_num = p;
750 goto found;
751 }
752
753 if (!cma_dev && (gid.global.subnet_prefix ==
754 dgid->global.subnet_prefix) &&
755 port_state == IB_PORT_ACTIVE) {
756 cma_dev = cur_dev;
757 sgid = gid;
758 id_priv->id.port_num = p;
759 goto found;
760 }
761 }
762 }
763 }
764 mutex_unlock(&lock);
765 return -ENODEV;
766
767 found:
768 cma_attach_to_dev(id_priv, cma_dev);
769 mutex_unlock(&lock);
770 addr = (struct sockaddr_ib *)cma_src_addr(id_priv);
771 memcpy(&addr->sib_addr, &sgid, sizeof(sgid));
772 cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr);
773 return 0;
774 }
775
cma_deref_id(struct rdma_id_private * id_priv)776 static void cma_deref_id(struct rdma_id_private *id_priv)
777 {
778 if (atomic_dec_and_test(&id_priv->refcount))
779 complete(&id_priv->comp);
780 }
781
rdma_create_id(struct vnet * net,rdma_cm_event_handler event_handler,void * context,enum rdma_port_space ps,enum ib_qp_type qp_type)782 struct rdma_cm_id *rdma_create_id(struct vnet *net,
783 rdma_cm_event_handler event_handler,
784 void *context, enum rdma_port_space ps,
785 enum ib_qp_type qp_type)
786 {
787 struct rdma_id_private *id_priv;
788
789 #ifdef VIMAGE
790 if (net == NULL)
791 return ERR_PTR(-EINVAL);
792 #endif
793 id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
794 if (!id_priv)
795 return ERR_PTR(-ENOMEM);
796
797 id_priv->owner = task_pid_nr(current);
798 id_priv->state = RDMA_CM_IDLE;
799 id_priv->id.context = context;
800 id_priv->id.event_handler = event_handler;
801 id_priv->id.ps = ps;
802 id_priv->id.qp_type = qp_type;
803 id_priv->tos_set = false;
804 id_priv->timeout_set = false;
805 spin_lock_init(&id_priv->lock);
806 mutex_init(&id_priv->qp_mutex);
807 init_completion(&id_priv->comp);
808 atomic_set(&id_priv->refcount, 1);
809 mutex_init(&id_priv->handler_mutex);
810 INIT_LIST_HEAD(&id_priv->listen_list);
811 INIT_LIST_HEAD(&id_priv->mc_list);
812 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
813 id_priv->seq_num &= 0x00ffffff;
814 id_priv->id.route.addr.dev_addr.net = net;
815
816 return &id_priv->id;
817 }
818 EXPORT_SYMBOL(rdma_create_id);
819
cma_init_ud_qp(struct rdma_id_private * id_priv,struct ib_qp * qp)820 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
821 {
822 struct ib_qp_attr qp_attr;
823 int qp_attr_mask, ret;
824
825 qp_attr.qp_state = IB_QPS_INIT;
826 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
827 if (ret)
828 return ret;
829
830 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
831 if (ret)
832 return ret;
833
834 qp_attr.qp_state = IB_QPS_RTR;
835 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
836 if (ret)
837 return ret;
838
839 qp_attr.qp_state = IB_QPS_RTS;
840 qp_attr.sq_psn = 0;
841 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
842
843 return ret;
844 }
845
cma_init_conn_qp(struct rdma_id_private * id_priv,struct ib_qp * qp)846 static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
847 {
848 struct ib_qp_attr qp_attr;
849 int qp_attr_mask, ret;
850
851 qp_attr.qp_state = IB_QPS_INIT;
852 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
853 if (ret)
854 return ret;
855
856 return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
857 }
858
rdma_create_qp(struct rdma_cm_id * id,struct ib_pd * pd,struct ib_qp_init_attr * qp_init_attr)859 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
860 struct ib_qp_init_attr *qp_init_attr)
861 {
862 struct rdma_id_private *id_priv;
863 struct ib_qp *qp;
864 int ret;
865
866 id_priv = container_of(id, struct rdma_id_private, id);
867 if (id->device != pd->device)
868 return -EINVAL;
869
870 qp_init_attr->port_num = id->port_num;
871 qp = ib_create_qp(pd, qp_init_attr);
872 if (IS_ERR(qp))
873 return PTR_ERR(qp);
874
875 if (id->qp_type == IB_QPT_UD)
876 ret = cma_init_ud_qp(id_priv, qp);
877 else
878 ret = cma_init_conn_qp(id_priv, qp);
879 if (ret)
880 goto err;
881
882 id->qp = qp;
883 id_priv->qp_num = qp->qp_num;
884 id_priv->srq = (qp->srq != NULL);
885 return 0;
886 err:
887 ib_destroy_qp(qp);
888 return ret;
889 }
890 EXPORT_SYMBOL(rdma_create_qp);
891
rdma_destroy_qp(struct rdma_cm_id * id)892 void rdma_destroy_qp(struct rdma_cm_id *id)
893 {
894 struct rdma_id_private *id_priv;
895
896 id_priv = container_of(id, struct rdma_id_private, id);
897 mutex_lock(&id_priv->qp_mutex);
898 ib_destroy_qp(id_priv->id.qp);
899 id_priv->id.qp = NULL;
900 mutex_unlock(&id_priv->qp_mutex);
901 }
902 EXPORT_SYMBOL(rdma_destroy_qp);
903
cma_modify_qp_rtr(struct rdma_id_private * id_priv,struct rdma_conn_param * conn_param)904 static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
905 struct rdma_conn_param *conn_param)
906 {
907 struct ib_qp_attr qp_attr;
908 int qp_attr_mask, ret;
909
910 mutex_lock(&id_priv->qp_mutex);
911 if (!id_priv->id.qp) {
912 ret = 0;
913 goto out;
914 }
915
916 /* Need to update QP attributes from default values. */
917 qp_attr.qp_state = IB_QPS_INIT;
918 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
919 if (ret)
920 goto out;
921
922 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
923 if (ret)
924 goto out;
925
926 qp_attr.qp_state = IB_QPS_RTR;
927 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
928 if (ret)
929 goto out;
930
931 BUG_ON(id_priv->cma_dev->device != id_priv->id.device);
932
933 if (conn_param)
934 qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
935 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
936 out:
937 mutex_unlock(&id_priv->qp_mutex);
938 return ret;
939 }
940
cma_modify_qp_rts(struct rdma_id_private * id_priv,struct rdma_conn_param * conn_param)941 static int cma_modify_qp_rts(struct rdma_id_private *id_priv,
942 struct rdma_conn_param *conn_param)
943 {
944 struct ib_qp_attr qp_attr;
945 int qp_attr_mask, ret;
946
947 mutex_lock(&id_priv->qp_mutex);
948 if (!id_priv->id.qp) {
949 ret = 0;
950 goto out;
951 }
952
953 qp_attr.qp_state = IB_QPS_RTS;
954 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
955 if (ret)
956 goto out;
957
958 if (conn_param)
959 qp_attr.max_rd_atomic = conn_param->initiator_depth;
960 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
961 out:
962 mutex_unlock(&id_priv->qp_mutex);
963 return ret;
964 }
965
cma_modify_qp_err(struct rdma_id_private * id_priv)966 static int cma_modify_qp_err(struct rdma_id_private *id_priv)
967 {
968 struct ib_qp_attr qp_attr;
969 int ret;
970
971 mutex_lock(&id_priv->qp_mutex);
972 if (!id_priv->id.qp) {
973 ret = 0;
974 goto out;
975 }
976
977 qp_attr.qp_state = IB_QPS_ERR;
978 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
979 out:
980 mutex_unlock(&id_priv->qp_mutex);
981 return ret;
982 }
983
cma_ib_init_qp_attr(struct rdma_id_private * id_priv,struct ib_qp_attr * qp_attr,int * qp_attr_mask)984 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
985 struct ib_qp_attr *qp_attr, int *qp_attr_mask)
986 {
987 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
988 int ret;
989 u16 pkey;
990
991 if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num))
992 pkey = 0xffff;
993 else
994 pkey = ib_addr_get_pkey(dev_addr);
995
996 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
997 pkey, &qp_attr->pkey_index);
998 if (ret)
999 return ret;
1000
1001 qp_attr->port_num = id_priv->id.port_num;
1002 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
1003
1004 if (id_priv->id.qp_type == IB_QPT_UD) {
1005 ret = cma_set_qkey(id_priv, 0);
1006 if (ret)
1007 return ret;
1008
1009 qp_attr->qkey = id_priv->qkey;
1010 *qp_attr_mask |= IB_QP_QKEY;
1011 } else {
1012 qp_attr->qp_access_flags = 0;
1013 *qp_attr_mask |= IB_QP_ACCESS_FLAGS;
1014 }
1015 return 0;
1016 }
1017
rdma_init_qp_attr(struct rdma_cm_id * id,struct ib_qp_attr * qp_attr,int * qp_attr_mask)1018 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
1019 int *qp_attr_mask)
1020 {
1021 struct rdma_id_private *id_priv;
1022 int ret = 0;
1023
1024 id_priv = container_of(id, struct rdma_id_private, id);
1025 if (rdma_cap_ib_cm(id->device, id->port_num)) {
1026 if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD))
1027 ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
1028 else
1029 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
1030 qp_attr_mask);
1031
1032 if (qp_attr->qp_state == IB_QPS_RTR)
1033 qp_attr->rq_psn = id_priv->seq_num;
1034 } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
1035 if (!id_priv->cm_id.iw) {
1036 qp_attr->qp_access_flags = 0;
1037 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
1038 } else
1039 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
1040 qp_attr_mask);
1041 qp_attr->port_num = id_priv->id.port_num;
1042 *qp_attr_mask |= IB_QP_PORT;
1043 } else
1044 ret = -ENOSYS;
1045
1046 if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
1047 qp_attr->timeout = id_priv->timeout;
1048
1049 return ret;
1050 }
1051 EXPORT_SYMBOL(rdma_init_qp_attr);
1052
cma_zero_addr(const struct sockaddr * addr)1053 static inline bool cma_zero_addr(const struct sockaddr *addr)
1054 {
1055 switch (addr->sa_family) {
1056 case AF_INET:
1057 return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr);
1058 case AF_INET6:
1059 return ipv6_addr_any(&((struct sockaddr_in6 *)addr)->sin6_addr);
1060 case AF_IB:
1061 return ib_addr_any(&((struct sockaddr_ib *)addr)->sib_addr);
1062 default:
1063 return false;
1064 }
1065 }
1066
cma_loopback_addr(const struct sockaddr * addr)1067 static inline bool cma_loopback_addr(const struct sockaddr *addr)
1068 {
1069 switch (addr->sa_family) {
1070 #ifdef INET
1071 /*
1072 * ipv4_is_loopback() requires an inet variable via vnet,
1073 * not present if INET is not included.
1074 */
1075 case AF_INET:
1076 return ipv4_is_loopback(
1077 ((struct sockaddr_in *)addr)->sin_addr.s_addr);
1078 #endif
1079 #ifdef INET6
1080 case AF_INET6:
1081 return ipv6_addr_loopback(
1082 &((struct sockaddr_in6 *)addr)->sin6_addr);
1083 #endif
1084 case AF_IB:
1085 return ib_addr_loopback(
1086 &((struct sockaddr_ib *)addr)->sib_addr);
1087 default:
1088 return false;
1089 }
1090 }
1091
cma_any_addr(struct vnet * vnet,const struct sockaddr * addr)1092 static inline bool cma_any_addr(struct vnet *vnet, const struct sockaddr *addr)
1093 {
1094 bool ret;
1095
1096 CURVNET_SET_QUIET(vnet);
1097 ret = cma_zero_addr(addr) || cma_loopback_addr(addr);
1098 CURVNET_RESTORE();
1099
1100 return (ret);
1101 }
1102
cma_addr_cmp(struct sockaddr * src,struct sockaddr * dst)1103 static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
1104 {
1105 if (src->sa_family != dst->sa_family)
1106 return -1;
1107
1108 switch (src->sa_family) {
1109 case AF_INET:
1110 return ((struct sockaddr_in *) src)->sin_addr.s_addr !=
1111 ((struct sockaddr_in *) dst)->sin_addr.s_addr;
1112 case AF_INET6:
1113 return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr,
1114 &((struct sockaddr_in6 *) dst)->sin6_addr);
1115 default:
1116 return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr,
1117 &((struct sockaddr_ib *) dst)->sib_addr);
1118 }
1119 }
1120
cma_port(const struct sockaddr * addr)1121 static __be16 cma_port(const struct sockaddr *addr)
1122 {
1123 struct sockaddr_ib *sib;
1124
1125 switch (addr->sa_family) {
1126 case AF_INET:
1127 return ((struct sockaddr_in *) addr)->sin_port;
1128 case AF_INET6:
1129 return ((struct sockaddr_in6 *) addr)->sin6_port;
1130 case AF_IB:
1131 sib = (struct sockaddr_ib *) addr;
1132 return htons((u16) (be64_to_cpu(sib->sib_sid) &
1133 be64_to_cpu(sib->sib_sid_mask)));
1134 default:
1135 return 0;
1136 }
1137 }
1138
cma_any_port(const struct sockaddr * addr)1139 static inline int cma_any_port(const struct sockaddr *addr)
1140 {
1141 return !cma_port(addr);
1142 }
1143
cma_save_ib_info(struct sockaddr * src_addr,struct sockaddr * dst_addr,const struct rdma_cm_id * listen_id,const struct sa_path_rec * path)1144 static void cma_save_ib_info(struct sockaddr *src_addr,
1145 struct sockaddr *dst_addr,
1146 const struct rdma_cm_id *listen_id,
1147 const struct sa_path_rec *path)
1148 {
1149 struct sockaddr_ib *listen_ib, *ib;
1150
1151 listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr;
1152 if (src_addr) {
1153 ib = (struct sockaddr_ib *)src_addr;
1154 ib->sib_family = AF_IB;
1155 if (path) {
1156 ib->sib_pkey = path->pkey;
1157 ib->sib_flowinfo = path->flow_label;
1158 memcpy(&ib->sib_addr, &path->sgid, 16);
1159 ib->sib_sid = path->service_id;
1160 ib->sib_scope_id = 0;
1161 } else {
1162 ib->sib_pkey = listen_ib->sib_pkey;
1163 ib->sib_flowinfo = listen_ib->sib_flowinfo;
1164 ib->sib_addr = listen_ib->sib_addr;
1165 ib->sib_sid = listen_ib->sib_sid;
1166 ib->sib_scope_id = listen_ib->sib_scope_id;
1167 }
1168 ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL);
1169 }
1170 if (dst_addr) {
1171 ib = (struct sockaddr_ib *)dst_addr;
1172 ib->sib_family = AF_IB;
1173 if (path) {
1174 ib->sib_pkey = path->pkey;
1175 ib->sib_flowinfo = path->flow_label;
1176 memcpy(&ib->sib_addr, &path->dgid, 16);
1177 }
1178 }
1179 }
1180
cma_save_ip4_info(struct sockaddr_in * src_addr,struct sockaddr_in * dst_addr,struct cma_hdr * hdr,__be16 local_port)1181 static void cma_save_ip4_info(struct sockaddr_in *src_addr,
1182 struct sockaddr_in *dst_addr,
1183 struct cma_hdr *hdr,
1184 __be16 local_port)
1185 {
1186 if (src_addr) {
1187 *src_addr = (struct sockaddr_in) {
1188 .sin_len = sizeof(struct sockaddr_in),
1189 .sin_family = AF_INET,
1190 .sin_addr.s_addr = hdr->dst_addr.ip4.addr,
1191 .sin_port = local_port,
1192 };
1193 }
1194
1195 if (dst_addr) {
1196 *dst_addr = (struct sockaddr_in) {
1197 .sin_len = sizeof(struct sockaddr_in),
1198 .sin_family = AF_INET,
1199 .sin_addr.s_addr = hdr->src_addr.ip4.addr,
1200 .sin_port = hdr->port,
1201 };
1202 }
1203 }
1204
cma_ip6_clear_scope_id(struct in6_addr * addr)1205 static void cma_ip6_clear_scope_id(struct in6_addr *addr)
1206 {
1207 /* make sure link local scope ID gets zeroed */
1208 if (IN6_IS_SCOPE_LINKLOCAL(addr) ||
1209 IN6_IS_ADDR_MC_INTFACELOCAL(addr)) {
1210 /* use byte-access to be alignment safe */
1211 addr->s6_addr[2] = 0;
1212 addr->s6_addr[3] = 0;
1213 }
1214 }
1215
cma_save_ip6_info(struct sockaddr_in6 * src_addr,struct sockaddr_in6 * dst_addr,struct cma_hdr * hdr,__be16 local_port)1216 static void cma_save_ip6_info(struct sockaddr_in6 *src_addr,
1217 struct sockaddr_in6 *dst_addr,
1218 struct cma_hdr *hdr,
1219 __be16 local_port)
1220 {
1221 if (src_addr) {
1222 *src_addr = (struct sockaddr_in6) {
1223 .sin6_len = sizeof(struct sockaddr_in6),
1224 .sin6_family = AF_INET6,
1225 .sin6_addr = hdr->dst_addr.ip6,
1226 .sin6_port = local_port,
1227 };
1228 cma_ip6_clear_scope_id(&src_addr->sin6_addr);
1229 }
1230
1231 if (dst_addr) {
1232 *dst_addr = (struct sockaddr_in6) {
1233 .sin6_len = sizeof(struct sockaddr_in6),
1234 .sin6_family = AF_INET6,
1235 .sin6_addr = hdr->src_addr.ip6,
1236 .sin6_port = hdr->port,
1237 };
1238 cma_ip6_clear_scope_id(&dst_addr->sin6_addr);
1239 }
1240 }
1241
cma_port_from_service_id(__be64 service_id)1242 static u16 cma_port_from_service_id(__be64 service_id)
1243 {
1244 return (u16)be64_to_cpu(service_id);
1245 }
1246
sdp_save_ip_info(struct sockaddr * src_addr,struct sockaddr * dst_addr,const struct sdp_hh * hdr,__be64 service_id)1247 static int sdp_save_ip_info(struct sockaddr *src_addr,
1248 struct sockaddr *dst_addr,
1249 const struct sdp_hh *hdr,
1250 __be64 service_id)
1251 {
1252 __be16 local_port;
1253
1254 BUG_ON(src_addr == NULL || dst_addr == NULL);
1255
1256 if (sdp_get_majv(hdr->majv_minv) != SDP_MAJ_VERSION)
1257 return -EINVAL;
1258
1259 local_port = htons(cma_port_from_service_id(service_id));
1260
1261 switch (sdp_get_ip_ver(hdr)) {
1262 case 4: {
1263 struct sockaddr_in *s4, *d4;
1264
1265 s4 = (void *)src_addr;
1266 d4 = (void *)dst_addr;
1267
1268 *s4 = (struct sockaddr_in) {
1269 .sin_len = sizeof(*s4),
1270 .sin_family = AF_INET,
1271 .sin_addr.s_addr = hdr->dst_addr.ip4.addr,
1272 .sin_port = local_port,
1273 };
1274 *d4 = (struct sockaddr_in) {
1275 .sin_len = sizeof(*d4),
1276 .sin_family = AF_INET,
1277 .sin_addr.s_addr = hdr->src_addr.ip4.addr,
1278 .sin_port = hdr->port,
1279 };
1280 break;
1281 }
1282 case 6: {
1283 struct sockaddr_in6 *s6, *d6;
1284
1285 s6 = (void *)src_addr;
1286 d6 = (void *)dst_addr;
1287
1288 *s6 = (struct sockaddr_in6) {
1289 .sin6_len = sizeof(*s6),
1290 .sin6_family = AF_INET6,
1291 .sin6_addr = hdr->dst_addr.ip6,
1292 .sin6_port = local_port,
1293 };
1294 *d6 = (struct sockaddr_in6) {
1295 .sin6_len = sizeof(*d6),
1296 .sin6_family = AF_INET6,
1297 .sin6_addr = hdr->src_addr.ip6,
1298 .sin6_port = hdr->port,
1299 };
1300 cma_ip6_clear_scope_id(&s6->sin6_addr);
1301 cma_ip6_clear_scope_id(&d6->sin6_addr);
1302 break;
1303 }
1304 default:
1305 return -EAFNOSUPPORT;
1306 }
1307
1308 return 0;
1309 }
1310
cma_save_ip_info(struct sockaddr * src_addr,struct sockaddr * dst_addr,const struct ib_cm_event * ib_event,__be64 service_id)1311 static int cma_save_ip_info(struct sockaddr *src_addr,
1312 struct sockaddr *dst_addr,
1313 const struct ib_cm_event *ib_event,
1314 __be64 service_id)
1315 {
1316 struct cma_hdr *hdr;
1317 __be16 port;
1318
1319 if (rdma_ps_from_service_id(service_id) == RDMA_PS_SDP)
1320 return sdp_save_ip_info(src_addr, dst_addr,
1321 ib_event->private_data, service_id);
1322
1323 hdr = ib_event->private_data;
1324 if (hdr->cma_version != CMA_VERSION)
1325 return -EINVAL;
1326
1327 port = htons(cma_port_from_service_id(service_id));
1328
1329 switch (cma_get_ip_ver(hdr)) {
1330 case 4:
1331 cma_save_ip4_info((struct sockaddr_in *)src_addr,
1332 (struct sockaddr_in *)dst_addr, hdr, port);
1333 break;
1334 case 6:
1335 cma_save_ip6_info((struct sockaddr_in6 *)src_addr,
1336 (struct sockaddr_in6 *)dst_addr, hdr, port);
1337 break;
1338 default:
1339 return -EAFNOSUPPORT;
1340 }
1341
1342 return 0;
1343 }
1344
cma_save_net_info(struct sockaddr * src_addr,struct sockaddr * dst_addr,const struct rdma_cm_id * listen_id,const struct ib_cm_event * ib_event,sa_family_t sa_family,__be64 service_id)1345 static int cma_save_net_info(struct sockaddr *src_addr,
1346 struct sockaddr *dst_addr,
1347 const struct rdma_cm_id *listen_id,
1348 const struct ib_cm_event *ib_event,
1349 sa_family_t sa_family, __be64 service_id)
1350 {
1351 if (sa_family == AF_IB) {
1352 if (ib_event->event == IB_CM_REQ_RECEIVED)
1353 cma_save_ib_info(src_addr, dst_addr, listen_id,
1354 ib_event->param.req_rcvd.primary_path);
1355 else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED)
1356 cma_save_ib_info(src_addr, dst_addr, listen_id, NULL);
1357 return 0;
1358 }
1359
1360 return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id);
1361 }
1362
cma_save_req_info(const struct ib_cm_event * ib_event,struct cma_req_info * req)1363 static int cma_save_req_info(const struct ib_cm_event *ib_event,
1364 struct cma_req_info *req)
1365 {
1366 const struct ib_cm_req_event_param *req_param =
1367 &ib_event->param.req_rcvd;
1368 const struct ib_cm_sidr_req_event_param *sidr_param =
1369 &ib_event->param.sidr_req_rcvd;
1370
1371 switch (ib_event->event) {
1372 case IB_CM_REQ_RECEIVED:
1373 req->device = req_param->listen_id->device;
1374 req->port = req_param->port;
1375 memcpy(&req->local_gid, &req_param->primary_path->sgid,
1376 sizeof(req->local_gid));
1377 req->has_gid = true;
1378 req->service_id = req_param->primary_path->service_id;
1379 req->pkey = be16_to_cpu(req_param->primary_path->pkey);
1380 if (req->pkey != req_param->bth_pkey)
1381 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n"
1382 "RDMA CMA: in the future this may cause the request to be dropped\n",
1383 req_param->bth_pkey, req->pkey);
1384 break;
1385 case IB_CM_SIDR_REQ_RECEIVED:
1386 req->device = sidr_param->listen_id->device;
1387 req->port = sidr_param->port;
1388 req->has_gid = false;
1389 req->service_id = sidr_param->service_id;
1390 req->pkey = sidr_param->pkey;
1391 if (req->pkey != sidr_param->bth_pkey)
1392 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n"
1393 "RDMA CMA: in the future this may cause the request to be dropped\n",
1394 sidr_param->bth_pkey, req->pkey);
1395 break;
1396 default:
1397 return -EINVAL;
1398 }
1399
1400 return 0;
1401 }
1402
1403 #ifdef INET
validate_ipv4_net_dev_addr(struct vnet * vnet,const __be32 saddr,const __be32 daddr)1404 static bool validate_ipv4_net_dev_addr(struct vnet *vnet,
1405 const __be32 saddr, const __be32 daddr)
1406 {
1407 bool ret;
1408 CURVNET_SET(vnet);
1409 ret = ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
1410 ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) ||
1411 ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) ||
1412 ipv4_is_loopback(saddr);
1413 CURVNET_RESTORE();
1414 return (ret);
1415 }
1416 #endif
1417
validate_ipv4_net_dev(if_t net_dev,const struct sockaddr_in * dst_addr,const struct sockaddr_in * src_addr)1418 static bool validate_ipv4_net_dev(if_t net_dev,
1419 const struct sockaddr_in *dst_addr,
1420 const struct sockaddr_in *src_addr)
1421 {
1422 #ifdef INET
1423 __be32 daddr = dst_addr->sin_addr.s_addr,
1424 saddr = src_addr->sin_addr.s_addr;
1425 if_t dst_dev;
1426 struct nhop_object *nh;
1427 bool ret;
1428
1429 if (validate_ipv4_net_dev_addr(if_getvnet(net_dev), saddr, daddr))
1430 return false;
1431
1432 dst_dev = ip_ifp_find(if_getvnet(net_dev), daddr);
1433 if (dst_dev != net_dev) {
1434 if (dst_dev != NULL)
1435 dev_put(dst_dev);
1436 return false;
1437 }
1438 dev_put(dst_dev);
1439
1440 /*
1441 * Check for loopback.
1442 */
1443 if (saddr == daddr)
1444 return true;
1445
1446 CURVNET_SET(if_getvnet(net_dev));
1447 nh = fib4_lookup(RT_DEFAULT_FIB, src_addr->sin_addr, 0, NHR_NONE, 0);
1448 if (nh != NULL)
1449 ret = (nh->nh_ifp == net_dev);
1450 else
1451 ret = false;
1452 CURVNET_RESTORE();
1453 return ret;
1454 #else
1455 return false;
1456 #endif
1457 }
1458
validate_ipv6_net_dev(if_t net_dev,const struct sockaddr_in6 * dst_addr,const struct sockaddr_in6 * src_addr)1459 static bool validate_ipv6_net_dev(if_t net_dev,
1460 const struct sockaddr_in6 *dst_addr,
1461 const struct sockaddr_in6 *src_addr)
1462 {
1463 #ifdef INET6
1464 struct sockaddr_in6 src_tmp = *src_addr;
1465 struct sockaddr_in6 dst_tmp = *dst_addr;
1466 if_t dst_dev;
1467 struct nhop_object *nh;
1468 bool ret;
1469
1470 dst_dev = ip6_ifp_find(if_getvnet(net_dev), dst_tmp.sin6_addr,
1471 if_getindex(net_dev));
1472 if (dst_dev != net_dev) {
1473 if (dst_dev != NULL)
1474 dev_put(dst_dev);
1475 return false;
1476 }
1477 dev_put(dst_dev);
1478
1479 CURVNET_SET(if_getvnet(net_dev));
1480
1481 /*
1482 * Make sure the scope ID gets embedded.
1483 */
1484 src_tmp.sin6_scope_id = if_getindex(net_dev);
1485 sa6_embedscope(&src_tmp, 0);
1486
1487 dst_tmp.sin6_scope_id = if_getindex(net_dev);
1488 sa6_embedscope(&dst_tmp, 0);
1489
1490 /*
1491 * Check for loopback after scope ID
1492 * has been embedded:
1493 */
1494 if (memcmp(&src_tmp.sin6_addr, &dst_tmp.sin6_addr,
1495 sizeof(dst_tmp.sin6_addr)) == 0) {
1496 ret = true;
1497 } else {
1498 /* non-loopback case */
1499 nh = fib6_lookup(RT_DEFAULT_FIB, &src_addr->sin6_addr,
1500 if_getindex(net_dev), NHR_NONE, 0);
1501 if (nh != NULL)
1502 ret = (nh->nh_ifp == net_dev);
1503 else
1504 ret = false;
1505 }
1506 CURVNET_RESTORE();
1507 return ret;
1508 #else
1509 return false;
1510 #endif
1511 }
1512
validate_net_dev(if_t net_dev,const struct sockaddr * daddr,const struct sockaddr * saddr)1513 static bool validate_net_dev(if_t net_dev,
1514 const struct sockaddr *daddr,
1515 const struct sockaddr *saddr)
1516 {
1517 const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr;
1518 const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr;
1519 const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr;
1520 const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr;
1521
1522 switch (daddr->sa_family) {
1523 case AF_INET:
1524 return saddr->sa_family == AF_INET &&
1525 validate_ipv4_net_dev(net_dev, daddr4, saddr4);
1526
1527 case AF_INET6:
1528 return saddr->sa_family == AF_INET6 &&
1529 validate_ipv6_net_dev(net_dev, daddr6, saddr6);
1530
1531 default:
1532 return false;
1533 }
1534 }
1535
1536 static if_t
roce_get_net_dev_by_cm_event(const struct ib_cm_event * ib_event)1537 roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event)
1538 {
1539 const struct ib_gid_attr *sgid_attr = NULL;
1540 if_t ndev;
1541
1542 if (ib_event->event == IB_CM_REQ_RECEIVED)
1543 sgid_attr = ib_event->param.req_rcvd.ppath_sgid_attr;
1544 else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED)
1545 sgid_attr = ib_event->param.sidr_req_rcvd.sgid_attr;
1546
1547 if (!sgid_attr)
1548 return NULL;
1549
1550 rcu_read_lock();
1551 ndev = rdma_read_gid_attr_ndev_rcu(sgid_attr);
1552 if (IS_ERR(ndev))
1553 ndev = NULL;
1554 else
1555 dev_hold(ndev);
1556 rcu_read_unlock();
1557 return ndev;
1558 }
1559
cma_get_net_dev(const struct ib_cm_event * ib_event,const struct cma_req_info * req)1560 static if_t cma_get_net_dev(const struct ib_cm_event *ib_event,
1561 const struct cma_req_info *req)
1562 {
1563 struct sockaddr_storage listen_addr_storage, src_addr_storage;
1564 struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage,
1565 *src_addr = (struct sockaddr *)&src_addr_storage;
1566 if_t net_dev;
1567 const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL;
1568 struct epoch_tracker et;
1569 int err;
1570
1571 err = cma_save_ip_info(listen_addr, src_addr, ib_event,
1572 req->service_id);
1573 if (err)
1574 return ERR_PTR(err);
1575
1576 if (rdma_protocol_roce(req->device, req->port))
1577 net_dev = roce_get_net_dev_by_cm_event(ib_event);
1578 else
1579 net_dev = ib_get_net_dev_by_params(req->device, req->port,
1580 req->pkey,
1581 gid, listen_addr);
1582
1583 if (!net_dev)
1584 return ERR_PTR(-ENODEV);
1585
1586 NET_EPOCH_ENTER(et);
1587 if (!validate_net_dev(net_dev, listen_addr, src_addr)) {
1588 NET_EPOCH_EXIT(et);
1589 dev_put(net_dev);
1590 return ERR_PTR(-EHOSTUNREACH);
1591 }
1592 NET_EPOCH_EXIT(et);
1593
1594 return net_dev;
1595 }
1596
rdma_ps_from_service_id(__be64 service_id)1597 static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id)
1598 {
1599 return (be64_to_cpu(service_id) >> 16) & 0xffff;
1600 }
1601
sdp_match_private_data(struct rdma_id_private * id_priv,const struct sdp_hh * hdr,struct sockaddr * addr)1602 static bool sdp_match_private_data(struct rdma_id_private *id_priv,
1603 const struct sdp_hh *hdr,
1604 struct sockaddr *addr)
1605 {
1606 __be32 ip4_addr;
1607 struct in6_addr ip6_addr;
1608 struct vnet *vnet = id_priv->id.route.addr.dev_addr.net;
1609
1610 switch (addr->sa_family) {
1611 case AF_INET:
1612 ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr;
1613 if (sdp_get_ip_ver(hdr) != 4)
1614 return false;
1615 if (!cma_any_addr(vnet, addr) &&
1616 hdr->dst_addr.ip4.addr != ip4_addr)
1617 return false;
1618 break;
1619 case AF_INET6:
1620 ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr;
1621 if (sdp_get_ip_ver(hdr) != 6)
1622 return false;
1623 cma_ip6_clear_scope_id(&ip6_addr);
1624 if (!cma_any_addr(vnet, addr) &&
1625 memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr)))
1626 return false;
1627 break;
1628 case AF_IB:
1629 return true;
1630 default:
1631 return false;
1632 }
1633
1634 return true;
1635 }
1636
cma_match_private_data(struct rdma_id_private * id_priv,const void * vhdr)1637 static bool cma_match_private_data(struct rdma_id_private *id_priv,
1638 const void *vhdr)
1639 {
1640 const struct cma_hdr *hdr = vhdr;
1641 struct sockaddr *addr = cma_src_addr(id_priv);
1642 struct vnet *vnet = id_priv->id.route.addr.dev_addr.net;
1643 __be32 ip4_addr;
1644 struct in6_addr ip6_addr;
1645
1646 if (cma_any_addr(vnet, addr) && !id_priv->afonly)
1647 return true;
1648
1649 if (id_priv->id.ps == RDMA_PS_SDP)
1650 return sdp_match_private_data(id_priv, vhdr, addr);
1651
1652 switch (addr->sa_family) {
1653 case AF_INET:
1654 ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr;
1655 if (cma_get_ip_ver(hdr) != 4)
1656 return false;
1657 if (!cma_any_addr(vnet, addr) &&
1658 hdr->dst_addr.ip4.addr != ip4_addr)
1659 return false;
1660 break;
1661 case AF_INET6:
1662 ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr;
1663 if (cma_get_ip_ver(hdr) != 6)
1664 return false;
1665 cma_ip6_clear_scope_id(&ip6_addr);
1666 if (!cma_any_addr(vnet, addr) &&
1667 memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr)))
1668 return false;
1669 break;
1670 case AF_IB:
1671 return true;
1672 default:
1673 return false;
1674 }
1675
1676 return true;
1677 }
1678
cma_protocol_roce(const struct rdma_cm_id * id)1679 static bool cma_protocol_roce(const struct rdma_cm_id *id)
1680 {
1681 struct ib_device *device = id->device;
1682 const int port_num = id->port_num ?: rdma_start_port(device);
1683
1684 return rdma_protocol_roce(device, port_num);
1685 }
1686
cma_match_net_dev(const struct rdma_cm_id * id,const if_t net_dev,u8 port_num)1687 static bool cma_match_net_dev(const struct rdma_cm_id *id,
1688 const if_t net_dev,
1689 u8 port_num)
1690 {
1691 const struct rdma_addr *addr = &id->route.addr;
1692
1693 if (!net_dev) {
1694 if (id->port_num && id->port_num != port_num)
1695 return false;
1696
1697 if (id->ps == RDMA_PS_SDP) {
1698 if (addr->src_addr.ss_family == AF_INET ||
1699 addr->src_addr.ss_family == AF_INET6)
1700 return true;
1701 return false;
1702 }
1703 /* This request is an AF_IB request */
1704 return (addr->src_addr.ss_family == AF_IB);
1705 }
1706
1707 /*
1708 * Net namespaces must match, and if the listner is listening
1709 * on a specific netdevice than netdevice must match as well.
1710 */
1711 if (net_eq(dev_net(net_dev), addr->dev_addr.net) &&
1712 (!!addr->dev_addr.bound_dev_if ==
1713 (addr->dev_addr.bound_dev_if == if_getindex(net_dev))))
1714 return true;
1715 else
1716 return false;
1717 }
1718
cma_find_listener(const struct rdma_bind_list * bind_list,const struct ib_cm_id * cm_id,const struct ib_cm_event * ib_event,const struct cma_req_info * req,const if_t net_dev)1719 static struct rdma_id_private *cma_find_listener(
1720 const struct rdma_bind_list *bind_list,
1721 const struct ib_cm_id *cm_id,
1722 const struct ib_cm_event *ib_event,
1723 const struct cma_req_info *req,
1724 const if_t net_dev)
1725 {
1726 struct rdma_id_private *id_priv, *id_priv_dev;
1727
1728 if (!bind_list)
1729 return ERR_PTR(-EINVAL);
1730
1731 hlist_for_each_entry(id_priv, &bind_list->owners, node) {
1732 if (cma_match_private_data(id_priv, ib_event->private_data)) {
1733 if (id_priv->id.device == cm_id->device &&
1734 cma_match_net_dev(&id_priv->id, net_dev, req->port))
1735 return id_priv;
1736 list_for_each_entry(id_priv_dev,
1737 &id_priv->listen_list,
1738 listen_list) {
1739 if (id_priv_dev->id.device == cm_id->device &&
1740 cma_match_net_dev(&id_priv_dev->id, net_dev, req->port))
1741 return id_priv_dev;
1742 }
1743 }
1744 }
1745
1746 return ERR_PTR(-EINVAL);
1747 }
1748
1749 static struct rdma_id_private *
cma_ib_id_from_event(struct ib_cm_id * cm_id,const struct ib_cm_event * ib_event,if_t * net_dev)1750 cma_ib_id_from_event(struct ib_cm_id *cm_id,
1751 const struct ib_cm_event *ib_event,
1752 if_t *net_dev)
1753 {
1754 struct cma_req_info req;
1755 struct rdma_bind_list *bind_list;
1756 struct rdma_id_private *id_priv;
1757 int err;
1758
1759 err = cma_save_req_info(ib_event, &req);
1760 if (err)
1761 return ERR_PTR(err);
1762
1763 if (rdma_ps_from_service_id(cm_id->service_id) == RDMA_PS_SDP) {
1764 *net_dev = NULL;
1765 goto there_is_no_net_dev;
1766 }
1767
1768 *net_dev = cma_get_net_dev(ib_event, &req);
1769 if (IS_ERR(*net_dev)) {
1770 if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
1771 /* Assuming the protocol is AF_IB */
1772 *net_dev = NULL;
1773 } else {
1774 return ERR_CAST(*net_dev);
1775 }
1776 }
1777
1778 there_is_no_net_dev:
1779 bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
1780 rdma_ps_from_service_id(req.service_id),
1781 cma_port_from_service_id(req.service_id));
1782 id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev);
1783 if (IS_ERR(id_priv) && *net_dev) {
1784 dev_put(*net_dev);
1785 *net_dev = NULL;
1786 }
1787
1788 return id_priv;
1789 }
1790
cma_user_data_offset(struct rdma_id_private * id_priv)1791 static inline u8 cma_user_data_offset(struct rdma_id_private *id_priv)
1792 {
1793 if (cma_family(id_priv) == AF_IB)
1794 return 0;
1795 if (id_priv->id.ps == RDMA_PS_SDP)
1796 return 0;
1797 return sizeof(struct cma_hdr);
1798 }
1799
cma_cancel_route(struct rdma_id_private * id_priv)1800 static void cma_cancel_route(struct rdma_id_private *id_priv)
1801 {
1802 if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) {
1803 if (id_priv->query)
1804 ib_sa_cancel_query(id_priv->query_id, id_priv->query);
1805 }
1806 }
1807
cma_cancel_listens(struct rdma_id_private * id_priv)1808 static void cma_cancel_listens(struct rdma_id_private *id_priv)
1809 {
1810 struct rdma_id_private *dev_id_priv;
1811
1812 /*
1813 * Remove from listen_any_list to prevent added devices from spawning
1814 * additional listen requests.
1815 */
1816 mutex_lock(&lock);
1817 list_del(&id_priv->list);
1818
1819 while (!list_empty(&id_priv->listen_list)) {
1820 dev_id_priv = list_entry(id_priv->listen_list.next,
1821 struct rdma_id_private, listen_list);
1822 /* sync with device removal to avoid duplicate destruction */
1823 list_del_init(&dev_id_priv->list);
1824 list_del(&dev_id_priv->listen_list);
1825 mutex_unlock(&lock);
1826
1827 rdma_destroy_id(&dev_id_priv->id);
1828 mutex_lock(&lock);
1829 }
1830 mutex_unlock(&lock);
1831 }
1832
cma_cancel_operation(struct rdma_id_private * id_priv,enum rdma_cm_state state)1833 static void cma_cancel_operation(struct rdma_id_private *id_priv,
1834 enum rdma_cm_state state)
1835 {
1836 struct vnet *vnet = id_priv->id.route.addr.dev_addr.net;
1837
1838 switch (state) {
1839 case RDMA_CM_ADDR_QUERY:
1840 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
1841 break;
1842 case RDMA_CM_ROUTE_QUERY:
1843 cma_cancel_route(id_priv);
1844 break;
1845 case RDMA_CM_LISTEN:
1846 if (cma_any_addr(vnet, cma_src_addr(id_priv)) && !id_priv->cma_dev)
1847 cma_cancel_listens(id_priv);
1848 break;
1849 default:
1850 break;
1851 }
1852 }
1853
cma_release_port(struct rdma_id_private * id_priv)1854 static void cma_release_port(struct rdma_id_private *id_priv)
1855 {
1856 struct rdma_bind_list *bind_list = id_priv->bind_list;
1857 struct vnet *net = id_priv->id.route.addr.dev_addr.net;
1858
1859 if (!bind_list)
1860 return;
1861
1862 mutex_lock(&lock);
1863 hlist_del(&id_priv->node);
1864 if (hlist_empty(&bind_list->owners)) {
1865 cma_ps_remove(net, bind_list->ps, bind_list->port);
1866 kfree(bind_list);
1867 }
1868 mutex_unlock(&lock);
1869 }
1870
cma_leave_roce_mc_group(struct rdma_id_private * id_priv,struct cma_multicast * mc)1871 static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv,
1872 struct cma_multicast *mc)
1873 {
1874 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
1875 if_t ndev = NULL;
1876
1877 if (dev_addr->bound_dev_if)
1878 ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
1879 if (ndev) {
1880 cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false);
1881 dev_put(ndev);
1882 }
1883 kref_put(&mc->mcref, release_mc);
1884 }
1885
cma_leave_mc_groups(struct rdma_id_private * id_priv)1886 static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
1887 {
1888 struct cma_multicast *mc;
1889
1890 while (!list_empty(&id_priv->mc_list)) {
1891 mc = container_of(id_priv->mc_list.next,
1892 struct cma_multicast, list);
1893 list_del(&mc->list);
1894 if (rdma_cap_ib_mcast(id_priv->cma_dev->device,
1895 id_priv->id.port_num)) {
1896 ib_sa_free_multicast(mc->multicast.ib);
1897 kfree(mc);
1898 } else {
1899 cma_leave_roce_mc_group(id_priv, mc);
1900 }
1901 }
1902 }
1903
rdma_destroy_id(struct rdma_cm_id * id)1904 void rdma_destroy_id(struct rdma_cm_id *id)
1905 {
1906 struct rdma_id_private *id_priv;
1907 enum rdma_cm_state state;
1908
1909 id_priv = container_of(id, struct rdma_id_private, id);
1910 state = cma_exch(id_priv, RDMA_CM_DESTROYING);
1911 cma_cancel_operation(id_priv, state);
1912
1913 /*
1914 * Wait for any active callback to finish. New callbacks will find
1915 * the id_priv state set to destroying and abort.
1916 */
1917 mutex_lock(&id_priv->handler_mutex);
1918 mutex_unlock(&id_priv->handler_mutex);
1919
1920 if (id_priv->cma_dev) {
1921 if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
1922 if (id_priv->cm_id.ib)
1923 ib_destroy_cm_id(id_priv->cm_id.ib);
1924 } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) {
1925 if (id_priv->cm_id.iw)
1926 iw_destroy_cm_id(id_priv->cm_id.iw);
1927 }
1928 cma_leave_mc_groups(id_priv);
1929 cma_release_dev(id_priv);
1930 }
1931
1932 cma_release_port(id_priv);
1933 cma_deref_id(id_priv);
1934 wait_for_completion(&id_priv->comp);
1935
1936 if (id_priv->internal_id)
1937 cma_deref_id(id_priv->id.context);
1938
1939 kfree(id_priv->id.route.path_rec);
1940
1941 if (id_priv->id.route.addr.dev_addr.sgid_attr)
1942 rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
1943
1944 kfree(id_priv);
1945 }
1946 EXPORT_SYMBOL(rdma_destroy_id);
1947
cma_rep_recv(struct rdma_id_private * id_priv)1948 static int cma_rep_recv(struct rdma_id_private *id_priv)
1949 {
1950 int ret;
1951
1952 ret = cma_modify_qp_rtr(id_priv, NULL);
1953 if (ret)
1954 goto reject;
1955
1956 ret = cma_modify_qp_rts(id_priv, NULL);
1957 if (ret)
1958 goto reject;
1959
1960 ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
1961 if (ret)
1962 goto reject;
1963
1964 return 0;
1965 reject:
1966 cma_modify_qp_err(id_priv);
1967 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
1968 NULL, 0, NULL, 0);
1969 return ret;
1970 }
1971
sdp_verify_rep(const struct sdp_hah * data)1972 static int sdp_verify_rep(const struct sdp_hah *data)
1973 {
1974 if (sdp_get_majv(data->majv_minv) != SDP_MAJ_VERSION)
1975 return -EINVAL;
1976 return 0;
1977 }
1978
cma_set_rep_event_data(struct rdma_cm_event * event,const struct ib_cm_rep_event_param * rep_data,void * private_data)1979 static void cma_set_rep_event_data(struct rdma_cm_event *event,
1980 const struct ib_cm_rep_event_param *rep_data,
1981 void *private_data)
1982 {
1983 event->param.conn.private_data = private_data;
1984 event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
1985 event->param.conn.responder_resources = rep_data->responder_resources;
1986 event->param.conn.initiator_depth = rep_data->initiator_depth;
1987 event->param.conn.flow_control = rep_data->flow_control;
1988 event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
1989 event->param.conn.srq = rep_data->srq;
1990 event->param.conn.qp_num = rep_data->remote_qpn;
1991 }
1992
cma_ib_handler(struct ib_cm_id * cm_id,const struct ib_cm_event * ib_event)1993 static int cma_ib_handler(struct ib_cm_id *cm_id,
1994 const struct ib_cm_event *ib_event)
1995 {
1996 struct rdma_id_private *id_priv = cm_id->context;
1997 struct rdma_cm_event event = {};
1998 int ret = 0;
1999
2000 mutex_lock(&id_priv->handler_mutex);
2001 if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
2002 id_priv->state != RDMA_CM_CONNECT) ||
2003 (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
2004 id_priv->state != RDMA_CM_DISCONNECT))
2005 goto out;
2006
2007 switch (ib_event->event) {
2008 case IB_CM_REQ_ERROR:
2009 case IB_CM_REP_ERROR:
2010 event.event = RDMA_CM_EVENT_UNREACHABLE;
2011 event.status = -ETIMEDOUT;
2012 break;
2013 case IB_CM_REP_RECEIVED:
2014 if (id_priv->id.ps == RDMA_PS_SDP) {
2015 event.status = sdp_verify_rep(ib_event->private_data);
2016 if (event.status)
2017 event.event = RDMA_CM_EVENT_CONNECT_ERROR;
2018 else
2019 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
2020 } else {
2021 if (id_priv->id.qp) {
2022 event.status = cma_rep_recv(id_priv);
2023 event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
2024 RDMA_CM_EVENT_ESTABLISHED;
2025 } else {
2026 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
2027 }
2028 }
2029 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
2030 ib_event->private_data);
2031 break;
2032 case IB_CM_RTU_RECEIVED:
2033 case IB_CM_USER_ESTABLISHED:
2034 event.event = RDMA_CM_EVENT_ESTABLISHED;
2035 break;
2036 case IB_CM_DREQ_ERROR:
2037 event.status = -ETIMEDOUT; /* fall through */
2038 case IB_CM_DREQ_RECEIVED:
2039 case IB_CM_DREP_RECEIVED:
2040 if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT,
2041 RDMA_CM_DISCONNECT))
2042 goto out;
2043 event.event = RDMA_CM_EVENT_DISCONNECTED;
2044 break;
2045 case IB_CM_TIMEWAIT_EXIT:
2046 event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT;
2047 break;
2048 case IB_CM_MRA_RECEIVED:
2049 /* ignore event */
2050 goto out;
2051 case IB_CM_REJ_RECEIVED:
2052 cma_modify_qp_err(id_priv);
2053 event.status = ib_event->param.rej_rcvd.reason;
2054 event.event = RDMA_CM_EVENT_REJECTED;
2055 event.param.conn.private_data = ib_event->private_data;
2056 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
2057 break;
2058 default:
2059 pr_err("RDMA CMA: unexpected IB CM event: %d\n",
2060 ib_event->event);
2061 goto out;
2062 }
2063
2064 ret = id_priv->id.event_handler(&id_priv->id, &event);
2065 if (ret) {
2066 /* Destroy the CM ID by returning a non-zero value. */
2067 id_priv->cm_id.ib = NULL;
2068 cma_exch(id_priv, RDMA_CM_DESTROYING);
2069 mutex_unlock(&id_priv->handler_mutex);
2070 rdma_destroy_id(&id_priv->id);
2071 return ret;
2072 }
2073 out:
2074 mutex_unlock(&id_priv->handler_mutex);
2075 return ret;
2076 }
2077
2078 static struct rdma_id_private *
cma_ib_new_conn_id(const struct rdma_cm_id * listen_id,const struct ib_cm_event * ib_event,if_t net_dev)2079 cma_ib_new_conn_id(const struct rdma_cm_id *listen_id,
2080 const struct ib_cm_event *ib_event,
2081 if_t net_dev)
2082 {
2083 struct rdma_id_private *id_priv;
2084 struct rdma_cm_id *id;
2085 struct rdma_route *rt;
2086 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
2087 struct sa_path_rec *path = ib_event->param.req_rcvd.primary_path;
2088 const __be64 service_id =
2089 ib_event->param.req_rcvd.primary_path->service_id;
2090 struct vnet *vnet = listen_id->route.addr.dev_addr.net;
2091 int ret;
2092
2093 id = rdma_create_id(vnet,
2094 listen_id->event_handler, listen_id->context,
2095 listen_id->ps, ib_event->param.req_rcvd.qp_type);
2096 if (IS_ERR(id))
2097 return NULL;
2098
2099 id_priv = container_of(id, struct rdma_id_private, id);
2100 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
2101 (struct sockaddr *)&id->route.addr.dst_addr,
2102 listen_id, ib_event, ss_family, service_id))
2103 goto err;
2104
2105 rt = &id->route;
2106 rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
2107 rt->path_rec = kmalloc_array(rt->num_paths, sizeof(*rt->path_rec),
2108 GFP_KERNEL);
2109 if (!rt->path_rec)
2110 goto err;
2111
2112 rt->path_rec[0] = *path;
2113 if (rt->num_paths == 2)
2114 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
2115
2116 if (net_dev) {
2117 rdma_copy_src_l2_addr(&rt->addr.dev_addr, net_dev);
2118 } else {
2119 if (!cma_protocol_roce(listen_id) &&
2120 cma_any_addr(vnet, cma_src_addr(id_priv))) {
2121 rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
2122 rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
2123 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
2124 } else if (!cma_any_addr(vnet, cma_src_addr(id_priv))) {
2125 ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr);
2126 if (ret)
2127 goto err;
2128 }
2129 }
2130 rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
2131
2132 id_priv->state = RDMA_CM_CONNECT;
2133 return id_priv;
2134
2135 err:
2136 rdma_destroy_id(id);
2137 return NULL;
2138 }
2139
2140 static struct rdma_id_private *
cma_ib_new_udp_id(const struct rdma_cm_id * listen_id,const struct ib_cm_event * ib_event,if_t net_dev)2141 cma_ib_new_udp_id(const struct rdma_cm_id *listen_id,
2142 const struct ib_cm_event *ib_event,
2143 if_t net_dev)
2144 {
2145 struct rdma_id_private *id_priv;
2146 struct rdma_cm_id *id;
2147 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
2148 struct vnet *vnet = listen_id->route.addr.dev_addr.net;
2149 int ret;
2150
2151 id = rdma_create_id(vnet, listen_id->event_handler, listen_id->context,
2152 listen_id->ps, IB_QPT_UD);
2153 if (IS_ERR(id))
2154 return NULL;
2155
2156 id_priv = container_of(id, struct rdma_id_private, id);
2157 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
2158 (struct sockaddr *)&id->route.addr.dst_addr,
2159 listen_id, ib_event, ss_family,
2160 ib_event->param.sidr_req_rcvd.service_id))
2161 goto err;
2162
2163 if (net_dev) {
2164 rdma_copy_src_l2_addr(&id->route.addr.dev_addr, net_dev);
2165 } else {
2166 if (!cma_any_addr(vnet, cma_src_addr(id_priv))) {
2167 ret = cma_translate_addr(cma_src_addr(id_priv),
2168 &id->route.addr.dev_addr);
2169 if (ret)
2170 goto err;
2171 }
2172 }
2173
2174 id_priv->state = RDMA_CM_CONNECT;
2175 return id_priv;
2176 err:
2177 rdma_destroy_id(id);
2178 return NULL;
2179 }
2180
cma_set_req_event_data(struct rdma_cm_event * event,const struct ib_cm_req_event_param * req_data,void * private_data,int offset)2181 static void cma_set_req_event_data(struct rdma_cm_event *event,
2182 const struct ib_cm_req_event_param *req_data,
2183 void *private_data, int offset)
2184 {
2185 event->param.conn.private_data = (char *)private_data + offset;
2186 event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
2187 event->param.conn.responder_resources = req_data->responder_resources;
2188 event->param.conn.initiator_depth = req_data->initiator_depth;
2189 event->param.conn.flow_control = req_data->flow_control;
2190 event->param.conn.retry_count = req_data->retry_count;
2191 event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
2192 event->param.conn.srq = req_data->srq;
2193 event->param.conn.qp_num = req_data->remote_qpn;
2194 }
2195
cma_ib_check_req_qp_type(const struct rdma_cm_id * id,const struct ib_cm_event * ib_event)2196 static int cma_ib_check_req_qp_type(const struct rdma_cm_id *id,
2197 const struct ib_cm_event *ib_event)
2198 {
2199 return (((ib_event->event == IB_CM_REQ_RECEIVED) &&
2200 (ib_event->param.req_rcvd.qp_type == id->qp_type)) ||
2201 ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) &&
2202 (id->qp_type == IB_QPT_UD)) ||
2203 (!id->qp_type));
2204 }
2205
cma_ib_req_handler(struct ib_cm_id * cm_id,const struct ib_cm_event * ib_event)2206 static int cma_ib_req_handler(struct ib_cm_id *cm_id,
2207 const struct ib_cm_event *ib_event)
2208 {
2209 struct rdma_id_private *listen_id, *conn_id = NULL;
2210 struct rdma_cm_event event = {};
2211 if_t net_dev;
2212 u8 offset;
2213 int ret;
2214
2215 listen_id = cma_ib_id_from_event(cm_id, ib_event, &net_dev);
2216 if (IS_ERR(listen_id))
2217 return PTR_ERR(listen_id);
2218
2219 if (!cma_ib_check_req_qp_type(&listen_id->id, ib_event)) {
2220 ret = -EINVAL;
2221 goto net_dev_put;
2222 }
2223
2224 mutex_lock(&listen_id->handler_mutex);
2225 if (listen_id->state != RDMA_CM_LISTEN) {
2226 ret = -ECONNABORTED;
2227 goto err1;
2228 }
2229
2230 offset = cma_user_data_offset(listen_id);
2231 event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
2232 if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
2233 conn_id = cma_ib_new_udp_id(&listen_id->id, ib_event, net_dev);
2234 event.param.ud.private_data = (char *)ib_event->private_data + offset;
2235 event.param.ud.private_data_len =
2236 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
2237 } else {
2238 conn_id = cma_ib_new_conn_id(&listen_id->id, ib_event, net_dev);
2239 cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
2240 ib_event->private_data, offset);
2241 }
2242 if (!conn_id) {
2243 ret = -ENOMEM;
2244 goto err1;
2245 }
2246
2247 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
2248 ret = cma_acquire_dev(conn_id, listen_id);
2249 if (ret)
2250 goto err2;
2251
2252 conn_id->cm_id.ib = cm_id;
2253 cm_id->context = conn_id;
2254 cm_id->cm_handler = cma_ib_handler;
2255
2256 /*
2257 * Protect against the user destroying conn_id from another thread
2258 * until we're done accessing it.
2259 */
2260 atomic_inc(&conn_id->refcount);
2261 ret = conn_id->id.event_handler(&conn_id->id, &event);
2262 if (ret)
2263 goto err3;
2264 /*
2265 * Acquire mutex to prevent user executing rdma_destroy_id()
2266 * while we're accessing the cm_id.
2267 */
2268 mutex_lock(&lock);
2269 if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
2270 (conn_id->id.qp_type != IB_QPT_UD))
2271 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
2272 mutex_unlock(&lock);
2273 mutex_unlock(&conn_id->handler_mutex);
2274 mutex_unlock(&listen_id->handler_mutex);
2275 cma_deref_id(conn_id);
2276 if (net_dev)
2277 dev_put(net_dev);
2278 return 0;
2279
2280 err3:
2281 cma_deref_id(conn_id);
2282 /* Destroy the CM ID by returning a non-zero value. */
2283 conn_id->cm_id.ib = NULL;
2284 err2:
2285 cma_exch(conn_id, RDMA_CM_DESTROYING);
2286 mutex_unlock(&conn_id->handler_mutex);
2287 err1:
2288 mutex_unlock(&listen_id->handler_mutex);
2289 if (conn_id)
2290 rdma_destroy_id(&conn_id->id);
2291
2292 net_dev_put:
2293 if (net_dev)
2294 dev_put(net_dev);
2295
2296 return ret;
2297 }
2298
rdma_get_service_id(struct rdma_cm_id * id,struct sockaddr * addr)2299 __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr)
2300 {
2301 if (addr->sa_family == AF_IB)
2302 return ((struct sockaddr_ib *) addr)->sib_sid;
2303
2304 return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr)));
2305 }
2306 EXPORT_SYMBOL(rdma_get_service_id);
2307
rdma_read_gids(struct rdma_cm_id * cm_id,union ib_gid * sgid,union ib_gid * dgid)2308 void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid,
2309 union ib_gid *dgid)
2310 {
2311 struct rdma_addr *addr = &cm_id->route.addr;
2312
2313 if (!cm_id->device) {
2314 if (sgid)
2315 memset(sgid, 0, sizeof(*sgid));
2316 if (dgid)
2317 memset(dgid, 0, sizeof(*dgid));
2318 return;
2319 }
2320
2321 if (rdma_protocol_roce(cm_id->device, cm_id->port_num)) {
2322 if (sgid)
2323 rdma_ip2gid((struct sockaddr *)&addr->src_addr, sgid);
2324 if (dgid)
2325 rdma_ip2gid((struct sockaddr *)&addr->dst_addr, dgid);
2326 } else {
2327 if (sgid)
2328 rdma_addr_get_sgid(&addr->dev_addr, sgid);
2329 if (dgid)
2330 rdma_addr_get_dgid(&addr->dev_addr, dgid);
2331 }
2332 }
2333 EXPORT_SYMBOL(rdma_read_gids);
2334
cma_iw_handler(struct iw_cm_id * iw_id,struct iw_cm_event * iw_event)2335 static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
2336 {
2337 struct rdma_id_private *id_priv = iw_id->context;
2338 struct rdma_cm_event event = {};
2339 int ret = 0;
2340 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
2341 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
2342
2343 mutex_lock(&id_priv->handler_mutex);
2344 if (id_priv->state != RDMA_CM_CONNECT)
2345 goto out;
2346
2347 switch (iw_event->event) {
2348 case IW_CM_EVENT_CLOSE:
2349 event.event = RDMA_CM_EVENT_DISCONNECTED;
2350 break;
2351 case IW_CM_EVENT_CONNECT_REPLY:
2352 memcpy(cma_src_addr(id_priv), laddr,
2353 rdma_addr_size(laddr));
2354 memcpy(cma_dst_addr(id_priv), raddr,
2355 rdma_addr_size(raddr));
2356 switch (iw_event->status) {
2357 case 0:
2358 event.event = RDMA_CM_EVENT_ESTABLISHED;
2359 event.param.conn.initiator_depth = iw_event->ird;
2360 event.param.conn.responder_resources = iw_event->ord;
2361 break;
2362 case -ECONNRESET:
2363 case -ECONNREFUSED:
2364 event.event = RDMA_CM_EVENT_REJECTED;
2365 break;
2366 case -ETIMEDOUT:
2367 event.event = RDMA_CM_EVENT_UNREACHABLE;
2368 break;
2369 default:
2370 event.event = RDMA_CM_EVENT_CONNECT_ERROR;
2371 break;
2372 }
2373 break;
2374 case IW_CM_EVENT_ESTABLISHED:
2375 event.event = RDMA_CM_EVENT_ESTABLISHED;
2376 event.param.conn.initiator_depth = iw_event->ird;
2377 event.param.conn.responder_resources = iw_event->ord;
2378 break;
2379 default:
2380 BUG_ON(1);
2381 }
2382
2383 event.status = iw_event->status;
2384 event.param.conn.private_data = iw_event->private_data;
2385 event.param.conn.private_data_len = iw_event->private_data_len;
2386 ret = id_priv->id.event_handler(&id_priv->id, &event);
2387 if (ret) {
2388 /* Destroy the CM ID by returning a non-zero value. */
2389 id_priv->cm_id.iw = NULL;
2390 cma_exch(id_priv, RDMA_CM_DESTROYING);
2391 mutex_unlock(&id_priv->handler_mutex);
2392 rdma_destroy_id(&id_priv->id);
2393 return ret;
2394 }
2395
2396 out:
2397 mutex_unlock(&id_priv->handler_mutex);
2398 return ret;
2399 }
2400
iw_conn_req_handler(struct iw_cm_id * cm_id,struct iw_cm_event * iw_event)2401 static int iw_conn_req_handler(struct iw_cm_id *cm_id,
2402 struct iw_cm_event *iw_event)
2403 {
2404 struct rdma_cm_id *new_cm_id;
2405 struct rdma_id_private *listen_id, *conn_id;
2406 struct rdma_cm_event event = {};
2407 int ret = -ECONNABORTED;
2408 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
2409 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
2410
2411 event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
2412 event.param.conn.private_data = iw_event->private_data;
2413 event.param.conn.private_data_len = iw_event->private_data_len;
2414 event.param.conn.initiator_depth = iw_event->ird;
2415 event.param.conn.responder_resources = iw_event->ord;
2416
2417 listen_id = cm_id->context;
2418
2419 mutex_lock(&listen_id->handler_mutex);
2420 if (listen_id->state != RDMA_CM_LISTEN)
2421 goto out;
2422
2423 /* Create a new RDMA id for the new IW CM ID */
2424 new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
2425 listen_id->id.event_handler,
2426 listen_id->id.context,
2427 RDMA_PS_TCP, IB_QPT_RC);
2428 if (IS_ERR(new_cm_id)) {
2429 ret = -ENOMEM;
2430 goto out;
2431 }
2432 conn_id = container_of(new_cm_id, struct rdma_id_private, id);
2433 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
2434 conn_id->state = RDMA_CM_CONNECT;
2435
2436 ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
2437 if (ret) {
2438 mutex_unlock(&conn_id->handler_mutex);
2439 rdma_destroy_id(new_cm_id);
2440 goto out;
2441 }
2442
2443 ret = cma_acquire_dev(conn_id, listen_id);
2444 if (ret) {
2445 mutex_unlock(&conn_id->handler_mutex);
2446 rdma_destroy_id(new_cm_id);
2447 goto out;
2448 }
2449
2450 conn_id->cm_id.iw = cm_id;
2451 cm_id->context = conn_id;
2452 cm_id->cm_handler = cma_iw_handler;
2453
2454 memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
2455 memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
2456
2457 /*
2458 * Protect against the user destroying conn_id from another thread
2459 * until we're done accessing it.
2460 */
2461 atomic_inc(&conn_id->refcount);
2462 ret = conn_id->id.event_handler(&conn_id->id, &event);
2463 if (ret) {
2464 /* User wants to destroy the CM ID */
2465 conn_id->cm_id.iw = NULL;
2466 cma_exch(conn_id, RDMA_CM_DESTROYING);
2467 mutex_unlock(&conn_id->handler_mutex);
2468 cma_deref_id(conn_id);
2469 rdma_destroy_id(&conn_id->id);
2470 goto out;
2471 }
2472
2473 mutex_unlock(&conn_id->handler_mutex);
2474 cma_deref_id(conn_id);
2475
2476 out:
2477 mutex_unlock(&listen_id->handler_mutex);
2478 return ret;
2479 }
2480
cma_ib_listen(struct rdma_id_private * id_priv)2481 static int cma_ib_listen(struct rdma_id_private *id_priv)
2482 {
2483 struct sockaddr *addr;
2484 struct ib_cm_id *id;
2485 __be64 svc_id;
2486
2487 addr = cma_src_addr(id_priv);
2488 svc_id = rdma_get_service_id(&id_priv->id, addr);
2489 id = ib_cm_insert_listen(id_priv->id.device,
2490 cma_ib_req_handler, svc_id);
2491 if (IS_ERR(id))
2492 return PTR_ERR(id);
2493 id_priv->cm_id.ib = id;
2494
2495 return 0;
2496 }
2497
cma_iw_listen(struct rdma_id_private * id_priv,int backlog)2498 static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
2499 {
2500 int ret;
2501 struct iw_cm_id *id;
2502
2503 id = iw_create_cm_id(id_priv->id.device,
2504 iw_conn_req_handler,
2505 id_priv);
2506 if (IS_ERR(id))
2507 return PTR_ERR(id);
2508
2509 id->tos = id_priv->tos;
2510 id_priv->cm_id.iw = id;
2511
2512 memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv),
2513 rdma_addr_size(cma_src_addr(id_priv)));
2514
2515 ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
2516
2517 if (ret) {
2518 iw_destroy_cm_id(id_priv->cm_id.iw);
2519 id_priv->cm_id.iw = NULL;
2520 }
2521
2522 return ret;
2523 }
2524
cma_listen_handler(struct rdma_cm_id * id,struct rdma_cm_event * event)2525 static int cma_listen_handler(struct rdma_cm_id *id,
2526 struct rdma_cm_event *event)
2527 {
2528 struct rdma_id_private *id_priv = id->context;
2529
2530 id->context = id_priv->id.context;
2531 id->event_handler = id_priv->id.event_handler;
2532 return id_priv->id.event_handler(id, event);
2533 }
2534
cma_listen_on_dev(struct rdma_id_private * id_priv,struct cma_device * cma_dev)2535 static void cma_listen_on_dev(struct rdma_id_private *id_priv,
2536 struct cma_device *cma_dev)
2537 {
2538 struct rdma_id_private *dev_id_priv;
2539 struct rdma_cm_id *id;
2540 struct vnet *net = id_priv->id.route.addr.dev_addr.net;
2541 int ret;
2542
2543 if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
2544 return;
2545
2546 id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
2547 id_priv->id.qp_type);
2548 if (IS_ERR(id))
2549 return;
2550
2551 dev_id_priv = container_of(id, struct rdma_id_private, id);
2552
2553 dev_id_priv->state = RDMA_CM_ADDR_BOUND;
2554 memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
2555 rdma_addr_size(cma_src_addr(id_priv)));
2556
2557 _cma_attach_to_dev(dev_id_priv, cma_dev);
2558 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
2559 atomic_inc(&id_priv->refcount);
2560 dev_id_priv->internal_id = 1;
2561 dev_id_priv->afonly = id_priv->afonly;
2562
2563 ret = rdma_listen(id, id_priv->backlog);
2564 if (ret)
2565 pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n",
2566 ret, cma_dev->device->name);
2567 }
2568
cma_listen_on_all(struct rdma_id_private * id_priv)2569 static void cma_listen_on_all(struct rdma_id_private *id_priv)
2570 {
2571 struct cma_device *cma_dev;
2572
2573 mutex_lock(&lock);
2574 list_add_tail(&id_priv->list, &listen_any_list);
2575 list_for_each_entry(cma_dev, &dev_list, list)
2576 cma_listen_on_dev(id_priv, cma_dev);
2577 mutex_unlock(&lock);
2578 }
2579
rdma_set_service_type(struct rdma_cm_id * id,int tos)2580 void rdma_set_service_type(struct rdma_cm_id *id, int tos)
2581 {
2582 struct rdma_id_private *id_priv;
2583
2584 id_priv = container_of(id, struct rdma_id_private, id);
2585 id_priv->tos = (u8) tos;
2586 id_priv->tos_set = true;
2587 }
2588 EXPORT_SYMBOL(rdma_set_service_type);
2589
2590 /**
2591 * rdma_set_ack_timeout() - Set the ack timeout of QP associated
2592 * with a connection identifier.
2593 * @id: Communication identifier to associated with service type.
2594 * @timeout: Ack timeout to set a QP, expressed as 4.096 * 2^(timeout) usec.
2595 *
2596 * This function should be called before rdma_connect() on active side,
2597 * and on passive side before rdma_accept(). It is applicable to primary
2598 * path only. The timeout will affect the local side of the QP, it is not
2599 * negotiated with remote side and zero disables the timer.
2600 *
2601 * Return: 0 for success
2602 */
rdma_set_ack_timeout(struct rdma_cm_id * id,u8 timeout)2603 int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
2604 {
2605 struct rdma_id_private *id_priv;
2606
2607 if (id->qp_type != IB_QPT_RC)
2608 return -EINVAL;
2609
2610 id_priv = container_of(id, struct rdma_id_private, id);
2611 id_priv->timeout = timeout;
2612 id_priv->timeout_set = true;
2613
2614 return 0;
2615 }
2616 EXPORT_SYMBOL(rdma_set_ack_timeout);
2617
cma_query_handler(int status,struct sa_path_rec * path_rec,void * context)2618 static void cma_query_handler(int status, struct sa_path_rec *path_rec,
2619 void *context)
2620 {
2621 struct cma_work *work = context;
2622 struct rdma_route *route;
2623
2624 route = &work->id->id.route;
2625
2626 if (!status) {
2627 route->num_paths = 1;
2628 *route->path_rec = *path_rec;
2629 } else {
2630 work->old_state = RDMA_CM_ROUTE_QUERY;
2631 work->new_state = RDMA_CM_ADDR_RESOLVED;
2632 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
2633 work->event.status = status;
2634 }
2635
2636 queue_work(cma_wq, &work->work);
2637 }
2638
cma_query_ib_route(struct rdma_id_private * id_priv,int timeout_ms,struct cma_work * work)2639 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
2640 struct cma_work *work)
2641 {
2642 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2643 struct sa_path_rec path_rec;
2644 ib_sa_comp_mask comp_mask;
2645 struct sockaddr_in6 *sin6;
2646 struct sockaddr_ib *sib;
2647
2648 memset(&path_rec, 0, sizeof path_rec);
2649
2650 if (rdma_cap_opa_ah(id_priv->id.device, id_priv->id.port_num))
2651 path_rec.rec_type = SA_PATH_REC_TYPE_OPA;
2652 else
2653 path_rec.rec_type = SA_PATH_REC_TYPE_IB;
2654 rdma_addr_get_sgid(dev_addr, &path_rec.sgid);
2655 rdma_addr_get_dgid(dev_addr, &path_rec.dgid);
2656 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
2657 path_rec.numb_path = 1;
2658 path_rec.reversible = 1;
2659 path_rec.service_id = rdma_get_service_id(&id_priv->id,
2660 cma_dst_addr(id_priv));
2661
2662 comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
2663 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
2664 IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
2665
2666 switch (cma_family(id_priv)) {
2667 case AF_INET:
2668 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
2669 comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
2670 break;
2671 case AF_INET6:
2672 sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
2673 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
2674 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
2675 break;
2676 case AF_IB:
2677 sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
2678 path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20);
2679 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
2680 break;
2681 }
2682
2683 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
2684 id_priv->id.port_num, &path_rec,
2685 comp_mask, timeout_ms,
2686 GFP_KERNEL, cma_query_handler,
2687 work, &id_priv->query);
2688
2689 return (id_priv->query_id < 0) ? id_priv->query_id : 0;
2690 }
2691
cma_work_handler(struct work_struct * _work)2692 static void cma_work_handler(struct work_struct *_work)
2693 {
2694 struct cma_work *work = container_of(_work, struct cma_work, work);
2695 struct rdma_id_private *id_priv = work->id;
2696 int destroy = 0;
2697
2698 mutex_lock(&id_priv->handler_mutex);
2699 if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
2700 goto out;
2701
2702 if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
2703 cma_exch(id_priv, RDMA_CM_DESTROYING);
2704 destroy = 1;
2705 }
2706 out:
2707 mutex_unlock(&id_priv->handler_mutex);
2708 cma_deref_id(id_priv);
2709 if (destroy)
2710 rdma_destroy_id(&id_priv->id);
2711 kfree(work);
2712 }
2713
cma_init_resolve_route_work(struct cma_work * work,struct rdma_id_private * id_priv)2714 static void cma_init_resolve_route_work(struct cma_work *work,
2715 struct rdma_id_private *id_priv)
2716 {
2717 work->id = id_priv;
2718 INIT_WORK(&work->work, cma_work_handler);
2719 work->old_state = RDMA_CM_ROUTE_QUERY;
2720 work->new_state = RDMA_CM_ROUTE_RESOLVED;
2721 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
2722 }
2723
cma_init_resolve_addr_work(struct cma_work * work,struct rdma_id_private * id_priv)2724 static void cma_init_resolve_addr_work(struct cma_work *work,
2725 struct rdma_id_private *id_priv)
2726 {
2727 work->id = id_priv;
2728 INIT_WORK(&work->work, cma_work_handler);
2729 work->old_state = RDMA_CM_ADDR_QUERY;
2730 work->new_state = RDMA_CM_ADDR_RESOLVED;
2731 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2732 }
2733
cma_resolve_ib_route(struct rdma_id_private * id_priv,int timeout_ms)2734 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
2735 {
2736 struct rdma_route *route = &id_priv->id.route;
2737 struct cma_work *work;
2738 int ret;
2739
2740 work = kzalloc(sizeof *work, GFP_KERNEL);
2741 if (!work)
2742 return -ENOMEM;
2743
2744 cma_init_resolve_route_work(work, id_priv);
2745
2746 route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
2747 if (!route->path_rec) {
2748 ret = -ENOMEM;
2749 goto err1;
2750 }
2751
2752 ret = cma_query_ib_route(id_priv, timeout_ms, work);
2753 if (ret)
2754 goto err2;
2755
2756 return 0;
2757 err2:
2758 kfree(route->path_rec);
2759 route->path_rec = NULL;
2760 err1:
2761 kfree(work);
2762 return ret;
2763 }
2764
cma_route_gid_type(enum rdma_network_type network_type,unsigned long supported_gids,enum ib_gid_type default_gid)2765 static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type,
2766 unsigned long supported_gids,
2767 enum ib_gid_type default_gid)
2768 {
2769 if ((network_type == RDMA_NETWORK_IPV4 ||
2770 network_type == RDMA_NETWORK_IPV6) &&
2771 test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids))
2772 return IB_GID_TYPE_ROCE_UDP_ENCAP;
2773
2774 return default_gid;
2775 }
2776
2777 /*
2778 * cma_iboe_set_path_rec_l2_fields() is helper function which sets
2779 * path record type based on GID type.
2780 * It also sets up other L2 fields which includes destination mac address
2781 * netdev ifindex, of the path record.
2782 * It returns the netdev of the bound interface for this path record entry.
2783 */
2784 static if_t
cma_iboe_set_path_rec_l2_fields(struct rdma_id_private * id_priv)2785 cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv)
2786 {
2787 struct rdma_route *route = &id_priv->id.route;
2788 enum ib_gid_type gid_type = IB_GID_TYPE_ROCE;
2789 struct rdma_addr *addr = &route->addr;
2790 unsigned long supported_gids;
2791 if_t ndev;
2792
2793 if (!addr->dev_addr.bound_dev_if)
2794 return NULL;
2795
2796 ndev = dev_get_by_index(addr->dev_addr.net,
2797 addr->dev_addr.bound_dev_if);
2798 if (!ndev)
2799 return NULL;
2800
2801 supported_gids = roce_gid_type_mask_support(id_priv->id.device,
2802 id_priv->id.port_num);
2803 gid_type = cma_route_gid_type(addr->dev_addr.network,
2804 supported_gids,
2805 id_priv->gid_type);
2806 /* Use the hint from IP Stack to select GID Type */
2807 if (gid_type < ib_network_to_gid_type(addr->dev_addr.network))
2808 gid_type = ib_network_to_gid_type(addr->dev_addr.network);
2809 route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
2810
2811 route->path_rec->roce.route_resolved = true;
2812 sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
2813 return ndev;
2814 }
2815
rdma_set_ib_path(struct rdma_cm_id * id,struct sa_path_rec * path_rec)2816 int rdma_set_ib_path(struct rdma_cm_id *id,
2817 struct sa_path_rec *path_rec)
2818 {
2819 struct rdma_id_private *id_priv;
2820 if_t ndev;
2821 int ret;
2822
2823 id_priv = container_of(id, struct rdma_id_private, id);
2824 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
2825 RDMA_CM_ROUTE_RESOLVED))
2826 return -EINVAL;
2827
2828 id->route.path_rec = kmemdup(path_rec, sizeof(*path_rec),
2829 GFP_KERNEL);
2830 if (!id->route.path_rec) {
2831 ret = -ENOMEM;
2832 goto err;
2833 }
2834
2835 if (rdma_protocol_roce(id->device, id->port_num)) {
2836 ndev = cma_iboe_set_path_rec_l2_fields(id_priv);
2837 if (!ndev) {
2838 ret = -ENODEV;
2839 goto err_free;
2840 }
2841 dev_put(ndev);
2842 }
2843
2844 id->route.num_paths = 1;
2845 return 0;
2846
2847 err_free:
2848 kfree(id->route.path_rec);
2849 id->route.path_rec = NULL;
2850 err:
2851 cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED);
2852 return ret;
2853 }
2854 EXPORT_SYMBOL(rdma_set_ib_path);
2855
cma_resolve_iw_route(struct rdma_id_private * id_priv,int timeout_ms)2856 static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
2857 {
2858 struct cma_work *work;
2859
2860 work = kzalloc(sizeof *work, GFP_KERNEL);
2861 if (!work)
2862 return -ENOMEM;
2863
2864 cma_init_resolve_route_work(work, id_priv);
2865 queue_work(cma_wq, &work->work);
2866 return 0;
2867 }
2868
iboe_tos_to_sl(if_t ndev,int tos)2869 static int iboe_tos_to_sl(if_t ndev, int tos)
2870 {
2871 /* get service level, SL, from IPv4 type of service, TOS */
2872 int sl = (tos >> 5) & 0x7;
2873
2874 /* final mappings are done by the vendor specific drivers */
2875 return sl;
2876 }
2877
cma_resolve_iboe_route(struct rdma_id_private * id_priv)2878 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
2879 {
2880 struct rdma_route *route = &id_priv->id.route;
2881 struct rdma_addr *addr = &route->addr;
2882 struct cma_work *work;
2883 int ret;
2884 if_t ndev;
2885
2886 u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
2887 rdma_start_port(id_priv->cma_dev->device)];
2888 u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
2889
2890
2891 work = kzalloc(sizeof *work, GFP_KERNEL);
2892 if (!work)
2893 return -ENOMEM;
2894
2895 route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
2896 if (!route->path_rec) {
2897 ret = -ENOMEM;
2898 goto err1;
2899 }
2900
2901 route->num_paths = 1;
2902
2903 ndev = cma_iboe_set_path_rec_l2_fields(id_priv);
2904 if (!ndev) {
2905 ret = -ENODEV;
2906 goto err2;
2907 }
2908
2909 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
2910 &route->path_rec->sgid);
2911 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
2912 &route->path_rec->dgid);
2913
2914 if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
2915 /* TODO: get the hoplimit from the inet/inet6 device */
2916 route->path_rec->hop_limit = addr->dev_addr.hoplimit;
2917 else
2918 route->path_rec->hop_limit = 1;
2919 route->path_rec->reversible = 1;
2920 route->path_rec->pkey = cpu_to_be16(0xffff);
2921 route->path_rec->mtu_selector = IB_SA_EQ;
2922 route->path_rec->sl = iboe_tos_to_sl(ndev, tos);
2923 route->path_rec->traffic_class = tos;
2924 route->path_rec->mtu = iboe_get_mtu(if_getmtu(ndev));
2925 route->path_rec->rate_selector = IB_SA_EQ;
2926 route->path_rec->rate = iboe_get_rate(ndev);
2927 dev_put(ndev);
2928 route->path_rec->packet_life_time_selector = IB_SA_EQ;
2929 route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
2930 if (!route->path_rec->mtu) {
2931 ret = -EINVAL;
2932 goto err2;
2933 }
2934
2935 cma_init_resolve_route_work(work, id_priv);
2936 queue_work(cma_wq, &work->work);
2937
2938 return 0;
2939
2940 err2:
2941 kfree(route->path_rec);
2942 route->path_rec = NULL;
2943 err1:
2944 kfree(work);
2945 return ret;
2946 }
2947
rdma_resolve_route(struct rdma_cm_id * id,int timeout_ms)2948 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
2949 {
2950 struct rdma_id_private *id_priv;
2951 int ret;
2952
2953 id_priv = container_of(id, struct rdma_id_private, id);
2954 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
2955 return -EINVAL;
2956
2957 atomic_inc(&id_priv->refcount);
2958 if (rdma_cap_ib_sa(id->device, id->port_num))
2959 ret = cma_resolve_ib_route(id_priv, timeout_ms);
2960 else if (rdma_protocol_roce(id->device, id->port_num))
2961 ret = cma_resolve_iboe_route(id_priv);
2962 else if (rdma_protocol_iwarp(id->device, id->port_num))
2963 ret = cma_resolve_iw_route(id_priv, timeout_ms);
2964 else
2965 ret = -ENOSYS;
2966
2967 if (ret)
2968 goto err;
2969
2970 return 0;
2971 err:
2972 cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
2973 cma_deref_id(id_priv);
2974 return ret;
2975 }
2976 EXPORT_SYMBOL(rdma_resolve_route);
2977
cma_set_loopback(struct sockaddr * addr)2978 static void cma_set_loopback(struct sockaddr *addr)
2979 {
2980 switch (addr->sa_family) {
2981 case AF_INET:
2982 ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
2983 break;
2984 case AF_INET6:
2985 ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr,
2986 0, 0, 0, htonl(1));
2987 break;
2988 default:
2989 ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr,
2990 0, 0, 0, htonl(1));
2991 break;
2992 }
2993 }
2994
cma_bind_loopback(struct rdma_id_private * id_priv)2995 static int cma_bind_loopback(struct rdma_id_private *id_priv)
2996 {
2997 struct cma_device *cma_dev, *cur_dev;
2998 struct ib_port_attr port_attr;
2999 union ib_gid gid;
3000 u16 pkey;
3001 int ret;
3002 u8 p;
3003
3004 cma_dev = NULL;
3005 mutex_lock(&lock);
3006 list_for_each_entry(cur_dev, &dev_list, list) {
3007 if (cma_family(id_priv) == AF_IB &&
3008 !rdma_cap_ib_cm(cur_dev->device, 1))
3009 continue;
3010
3011 if (!cma_dev)
3012 cma_dev = cur_dev;
3013
3014 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
3015 if (!ib_query_port(cur_dev->device, p, &port_attr) &&
3016 port_attr.state == IB_PORT_ACTIVE) {
3017 cma_dev = cur_dev;
3018 goto port_found;
3019 }
3020 }
3021 }
3022
3023 if (!cma_dev) {
3024 ret = -ENODEV;
3025 goto out;
3026 }
3027
3028 p = 1;
3029
3030 port_found:
3031 ret = rdma_query_gid(cma_dev->device, p, 0, &gid);
3032 if (ret)
3033 goto out;
3034
3035 ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
3036 if (ret)
3037 goto out;
3038
3039 id_priv->id.route.addr.dev_addr.dev_type =
3040 (rdma_protocol_ib(cma_dev->device, p)) ?
3041 ARPHRD_INFINIBAND : ARPHRD_ETHER;
3042
3043 rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
3044 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
3045 id_priv->id.port_num = p;
3046 cma_attach_to_dev(id_priv, cma_dev);
3047 cma_set_loopback(cma_src_addr(id_priv));
3048 out:
3049 mutex_unlock(&lock);
3050 return ret;
3051 }
3052
addr_handler(int status,struct sockaddr * src_addr,struct rdma_dev_addr * dev_addr,void * context)3053 static void addr_handler(int status, struct sockaddr *src_addr,
3054 struct rdma_dev_addr *dev_addr, void *context)
3055 {
3056 struct rdma_id_private *id_priv = context;
3057 struct rdma_cm_event event = {};
3058
3059 mutex_lock(&id_priv->handler_mutex);
3060 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY,
3061 RDMA_CM_ADDR_RESOLVED))
3062 goto out;
3063
3064 memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
3065 if (!status && !id_priv->cma_dev)
3066 status = cma_acquire_dev(id_priv, NULL);
3067
3068 if (status) {
3069 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
3070 RDMA_CM_ADDR_BOUND))
3071 goto out;
3072 event.event = RDMA_CM_EVENT_ADDR_ERROR;
3073 event.status = status;
3074 } else
3075 event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
3076
3077 if (id_priv->id.event_handler(&id_priv->id, &event)) {
3078 cma_exch(id_priv, RDMA_CM_DESTROYING);
3079 mutex_unlock(&id_priv->handler_mutex);
3080 rdma_destroy_id(&id_priv->id);
3081 return;
3082 }
3083 out:
3084 mutex_unlock(&id_priv->handler_mutex);
3085 }
3086
cma_resolve_loopback(struct rdma_id_private * id_priv)3087 static int cma_resolve_loopback(struct rdma_id_private *id_priv)
3088 {
3089 struct cma_work *work;
3090 union ib_gid gid;
3091 int ret;
3092
3093 work = kzalloc(sizeof *work, GFP_KERNEL);
3094 if (!work)
3095 return -ENOMEM;
3096
3097 if (!id_priv->cma_dev) {
3098 ret = cma_bind_loopback(id_priv);
3099 if (ret)
3100 goto err;
3101 }
3102
3103 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
3104 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
3105
3106 atomic_inc(&id_priv->refcount);
3107 cma_init_resolve_addr_work(work, id_priv);
3108 queue_work(cma_wq, &work->work);
3109 return 0;
3110 err:
3111 kfree(work);
3112 return ret;
3113 }
3114
cma_resolve_ib_addr(struct rdma_id_private * id_priv)3115 static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
3116 {
3117 struct cma_work *work;
3118 int ret;
3119
3120 work = kzalloc(sizeof *work, GFP_KERNEL);
3121 if (!work)
3122 return -ENOMEM;
3123
3124 if (!id_priv->cma_dev) {
3125 ret = cma_resolve_ib_dev(id_priv);
3126 if (ret)
3127 goto err;
3128 }
3129
3130 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
3131 &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
3132
3133 atomic_inc(&id_priv->refcount);
3134 cma_init_resolve_addr_work(work, id_priv);
3135 queue_work(cma_wq, &work->work);
3136 return 0;
3137 err:
3138 kfree(work);
3139 return ret;
3140 }
3141
cma_bind_addr(struct rdma_cm_id * id,struct sockaddr * src_addr,const struct sockaddr * dst_addr)3142 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
3143 const struct sockaddr *dst_addr)
3144 {
3145 if (!src_addr || !src_addr->sa_family) {
3146 src_addr = (struct sockaddr *) &id->route.addr.src_addr;
3147 src_addr->sa_family = dst_addr->sa_family;
3148 if (dst_addr->sa_family == AF_INET6) {
3149 struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
3150 struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
3151 src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
3152 if (IN6_IS_SCOPE_LINKLOCAL(&dst_addr6->sin6_addr) ||
3153 IN6_IS_ADDR_MC_INTFACELOCAL(&dst_addr6->sin6_addr))
3154 id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id;
3155 } else if (dst_addr->sa_family == AF_IB) {
3156 ((struct sockaddr_ib *) src_addr)->sib_pkey =
3157 ((struct sockaddr_ib *) dst_addr)->sib_pkey;
3158 }
3159 }
3160 return rdma_bind_addr(id, src_addr);
3161 }
3162
rdma_resolve_addr(struct rdma_cm_id * id,struct sockaddr * src_addr,const struct sockaddr * dst_addr,int timeout_ms)3163 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
3164 const struct sockaddr *dst_addr, int timeout_ms)
3165 {
3166 struct rdma_id_private *id_priv;
3167 struct vnet *vnet = id->route.addr.dev_addr.net;
3168 int ret;
3169
3170 id_priv = container_of(id, struct rdma_id_private, id);
3171 if (id_priv->state == RDMA_CM_IDLE) {
3172 ret = cma_bind_addr(id, src_addr, dst_addr);
3173 if (ret)
3174 return ret;
3175 }
3176
3177 if (cma_family(id_priv) != dst_addr->sa_family)
3178 return -EINVAL;
3179
3180 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
3181 return -EINVAL;
3182
3183 memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
3184 if (cma_any_addr(vnet, dst_addr)) {
3185 ret = cma_resolve_loopback(id_priv);
3186 } else {
3187 if (dst_addr->sa_family == AF_IB) {
3188 ret = cma_resolve_ib_addr(id_priv);
3189 } else {
3190 ret = cma_check_linklocal(&id->route.addr.dev_addr, dst_addr);
3191 if (ret)
3192 goto err;
3193
3194 ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
3195 &id->route.addr.dev_addr,
3196 timeout_ms, addr_handler,
3197 false, id_priv);
3198 }
3199 }
3200 if (ret)
3201 goto err;
3202
3203 return 0;
3204 err:
3205 cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
3206 return ret;
3207 }
3208 EXPORT_SYMBOL(rdma_resolve_addr);
3209
rdma_set_reuseaddr(struct rdma_cm_id * id,int reuse)3210 int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
3211 {
3212 struct rdma_id_private *id_priv;
3213 unsigned long flags;
3214 int ret;
3215
3216 id_priv = container_of(id, struct rdma_id_private, id);
3217 spin_lock_irqsave(&id_priv->lock, flags);
3218 if (reuse || id_priv->state == RDMA_CM_IDLE) {
3219 id_priv->reuseaddr = reuse;
3220 ret = 0;
3221 } else {
3222 ret = -EINVAL;
3223 }
3224 spin_unlock_irqrestore(&id_priv->lock, flags);
3225 return ret;
3226 }
3227 EXPORT_SYMBOL(rdma_set_reuseaddr);
3228
rdma_set_afonly(struct rdma_cm_id * id,int afonly)3229 int rdma_set_afonly(struct rdma_cm_id *id, int afonly)
3230 {
3231 struct rdma_id_private *id_priv;
3232 unsigned long flags;
3233 int ret;
3234
3235 id_priv = container_of(id, struct rdma_id_private, id);
3236 spin_lock_irqsave(&id_priv->lock, flags);
3237 if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) {
3238 id_priv->options |= (1 << CMA_OPTION_AFONLY);
3239 id_priv->afonly = afonly;
3240 ret = 0;
3241 } else {
3242 ret = -EINVAL;
3243 }
3244 spin_unlock_irqrestore(&id_priv->lock, flags);
3245 return ret;
3246 }
3247 EXPORT_SYMBOL(rdma_set_afonly);
3248
cma_bind_port(struct rdma_bind_list * bind_list,struct rdma_id_private * id_priv)3249 static void cma_bind_port(struct rdma_bind_list *bind_list,
3250 struct rdma_id_private *id_priv)
3251 {
3252 struct sockaddr *addr;
3253 struct sockaddr_ib *sib;
3254 u64 sid, mask;
3255 __be16 port;
3256
3257 addr = cma_src_addr(id_priv);
3258 port = htons(bind_list->port);
3259
3260 switch (addr->sa_family) {
3261 case AF_INET:
3262 ((struct sockaddr_in *) addr)->sin_port = port;
3263 break;
3264 case AF_INET6:
3265 ((struct sockaddr_in6 *) addr)->sin6_port = port;
3266 break;
3267 case AF_IB:
3268 sib = (struct sockaddr_ib *) addr;
3269 sid = be64_to_cpu(sib->sib_sid);
3270 mask = be64_to_cpu(sib->sib_sid_mask);
3271 sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port));
3272 sib->sib_sid_mask = cpu_to_be64(~0ULL);
3273 break;
3274 }
3275 id_priv->bind_list = bind_list;
3276 hlist_add_head(&id_priv->node, &bind_list->owners);
3277 }
3278
cma_alloc_port(enum rdma_port_space ps,struct rdma_id_private * id_priv,unsigned short snum)3279 static int cma_alloc_port(enum rdma_port_space ps,
3280 struct rdma_id_private *id_priv, unsigned short snum)
3281 {
3282 struct rdma_bind_list *bind_list;
3283 int ret;
3284
3285 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
3286 if (!bind_list)
3287 return -ENOMEM;
3288
3289 ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list,
3290 snum);
3291 if (ret < 0)
3292 goto err;
3293
3294 bind_list->ps = ps;
3295 bind_list->port = (unsigned short)ret;
3296 cma_bind_port(bind_list, id_priv);
3297 return 0;
3298 err:
3299 kfree(bind_list);
3300 return ret == -ENOSPC ? -EADDRNOTAVAIL : ret;
3301 }
3302
cma_port_is_unique(struct rdma_bind_list * bind_list,struct rdma_id_private * id_priv)3303 static int cma_port_is_unique(struct rdma_bind_list *bind_list,
3304 struct rdma_id_private *id_priv)
3305 {
3306 struct rdma_id_private *cur_id;
3307 struct vnet *vnet = id_priv->id.route.addr.dev_addr.net;
3308 struct sockaddr *daddr = cma_dst_addr(id_priv);
3309 struct sockaddr *saddr = cma_src_addr(id_priv);
3310 __be16 dport = cma_port(daddr);
3311
3312 hlist_for_each_entry(cur_id, &bind_list->owners, node) {
3313 struct vnet *cur_vnet = cur_id->id.route.addr.dev_addr.net;
3314 struct sockaddr *cur_daddr = cma_dst_addr(cur_id);
3315 struct sockaddr *cur_saddr = cma_src_addr(cur_id);
3316 __be16 cur_dport = cma_port(cur_daddr);
3317
3318 if (id_priv == cur_id)
3319 continue;
3320
3321 /* different dest port -> unique */
3322 if (!cma_any_port(cur_daddr) &&
3323 (dport != cur_dport))
3324 continue;
3325
3326 /* different src address -> unique */
3327 if (!cma_any_addr(vnet, saddr) &&
3328 !cma_any_addr(cur_vnet, cur_saddr) &&
3329 cma_addr_cmp(saddr, cur_saddr))
3330 continue;
3331
3332 /* different dst address -> unique */
3333 if (!cma_any_addr(cur_vnet, cur_daddr) &&
3334 cma_addr_cmp(daddr, cur_daddr))
3335 continue;
3336
3337 return -EADDRNOTAVAIL;
3338 }
3339 return 0;
3340 }
3341
cma_alloc_any_port(enum rdma_port_space ps,struct rdma_id_private * id_priv)3342 static int cma_alloc_any_port(enum rdma_port_space ps,
3343 struct rdma_id_private *id_priv)
3344 {
3345 static unsigned int last_used_port;
3346 int low, high, remaining;
3347 unsigned int rover;
3348 struct vnet *net = id_priv->id.route.addr.dev_addr.net;
3349
3350 inet_get_local_port_range(net, &low, &high);
3351 remaining = (high - low) + 1;
3352 rover = prandom_u32() % remaining + low;
3353 retry:
3354 if (last_used_port != rover) {
3355 struct rdma_bind_list *bind_list;
3356 int ret;
3357
3358 bind_list = cma_ps_find(net, ps, (unsigned short)rover);
3359
3360 if (!bind_list) {
3361 ret = cma_alloc_port(ps, id_priv, rover);
3362 } else {
3363 ret = cma_port_is_unique(bind_list, id_priv);
3364 if (!ret)
3365 cma_bind_port(bind_list, id_priv);
3366 }
3367 /*
3368 * Remember previously used port number in order to avoid
3369 * re-using same port immediately after it is closed.
3370 */
3371 if (!ret)
3372 last_used_port = rover;
3373 if (ret != -EADDRNOTAVAIL)
3374 return ret;
3375 }
3376 if (--remaining) {
3377 rover++;
3378 if ((rover < low) || (rover > high))
3379 rover = low;
3380 goto retry;
3381 }
3382 return -EADDRNOTAVAIL;
3383 }
3384
3385 /*
3386 * Check that the requested port is available. This is called when trying to
3387 * bind to a specific port, or when trying to listen on a bound port. In
3388 * the latter case, the provided id_priv may already be on the bind_list, but
3389 * we still need to check that it's okay to start listening.
3390 */
cma_check_port(struct rdma_bind_list * bind_list,struct rdma_id_private * id_priv,uint8_t reuseaddr)3391 static int cma_check_port(struct rdma_bind_list *bind_list,
3392 struct rdma_id_private *id_priv, uint8_t reuseaddr)
3393 {
3394 struct rdma_id_private *cur_id;
3395 struct sockaddr *addr, *cur_addr;
3396 struct vnet *vnet;
3397
3398 addr = cma_src_addr(id_priv);
3399 hlist_for_each_entry(cur_id, &bind_list->owners, node) {
3400 if (id_priv == cur_id)
3401 continue;
3402
3403 if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
3404 cur_id->reuseaddr)
3405 continue;
3406
3407 cur_addr = cma_src_addr(cur_id);
3408 if (id_priv->afonly && cur_id->afonly &&
3409 (addr->sa_family != cur_addr->sa_family))
3410 continue;
3411
3412 vnet = cur_id->id.route.addr.dev_addr.net;
3413 if (cma_any_addr(vnet, addr) || cma_any_addr(vnet, cur_addr))
3414 return -EADDRNOTAVAIL;
3415
3416 if (!cma_addr_cmp(addr, cur_addr))
3417 return -EADDRINUSE;
3418 }
3419 return 0;
3420 }
3421
cma_use_port(enum rdma_port_space ps,struct rdma_id_private * id_priv)3422 static int cma_use_port(enum rdma_port_space ps,
3423 struct rdma_id_private *id_priv)
3424 {
3425 struct rdma_bind_list *bind_list;
3426 unsigned short snum;
3427 int ret;
3428
3429 snum = ntohs(cma_port(cma_src_addr(id_priv)));
3430 if (snum < IPPORT_RESERVED &&
3431 priv_check(curthread, PRIV_NETINET_BINDANY) != 0)
3432 return -EACCES;
3433
3434 bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum);
3435 if (!bind_list) {
3436 ret = cma_alloc_port(ps, id_priv, snum);
3437 } else {
3438 ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr);
3439 if (!ret)
3440 cma_bind_port(bind_list, id_priv);
3441 }
3442 return ret;
3443 }
3444
cma_bind_listen(struct rdma_id_private * id_priv)3445 static int cma_bind_listen(struct rdma_id_private *id_priv)
3446 {
3447 struct rdma_bind_list *bind_list = id_priv->bind_list;
3448 int ret = 0;
3449
3450 mutex_lock(&lock);
3451 if (bind_list->owners.first->next)
3452 ret = cma_check_port(bind_list, id_priv, 0);
3453 mutex_unlock(&lock);
3454 return ret;
3455 }
3456
cma_select_inet_ps(struct rdma_id_private * id_priv)3457 static enum rdma_port_space cma_select_inet_ps(
3458 struct rdma_id_private *id_priv)
3459 {
3460 switch (id_priv->id.ps) {
3461 case RDMA_PS_TCP:
3462 case RDMA_PS_UDP:
3463 case RDMA_PS_IPOIB:
3464 case RDMA_PS_IB:
3465 case RDMA_PS_SDP:
3466 return id_priv->id.ps;
3467 default:
3468
3469 return 0;
3470 }
3471 }
3472
cma_select_ib_ps(struct rdma_id_private * id_priv)3473 static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv)
3474 {
3475 enum rdma_port_space ps = 0;
3476 struct sockaddr_ib *sib;
3477 u64 sid_ps, mask, sid;
3478
3479 sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
3480 mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK;
3481 sid = be64_to_cpu(sib->sib_sid) & mask;
3482
3483 if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) {
3484 sid_ps = RDMA_IB_IP_PS_IB;
3485 ps = RDMA_PS_IB;
3486 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) &&
3487 (sid == (RDMA_IB_IP_PS_TCP & mask))) {
3488 sid_ps = RDMA_IB_IP_PS_TCP;
3489 ps = RDMA_PS_TCP;
3490 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) &&
3491 (sid == (RDMA_IB_IP_PS_UDP & mask))) {
3492 sid_ps = RDMA_IB_IP_PS_UDP;
3493 ps = RDMA_PS_UDP;
3494 }
3495
3496 if (ps) {
3497 sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib)));
3498 sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK |
3499 be64_to_cpu(sib->sib_sid_mask));
3500 }
3501 return ps;
3502 }
3503
cma_get_port(struct rdma_id_private * id_priv)3504 static int cma_get_port(struct rdma_id_private *id_priv)
3505 {
3506 enum rdma_port_space ps;
3507 int ret;
3508
3509 if (cma_family(id_priv) != AF_IB)
3510 ps = cma_select_inet_ps(id_priv);
3511 else
3512 ps = cma_select_ib_ps(id_priv);
3513 if (!ps)
3514 return -EPROTONOSUPPORT;
3515
3516 mutex_lock(&lock);
3517 if (cma_any_port(cma_src_addr(id_priv)))
3518 ret = cma_alloc_any_port(ps, id_priv);
3519 else
3520 ret = cma_use_port(ps, id_priv);
3521 mutex_unlock(&lock);
3522
3523 return ret;
3524 }
3525
cma_check_linklocal(struct rdma_dev_addr * dev_addr,const struct sockaddr * addr)3526 static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
3527 const struct sockaddr *addr)
3528 {
3529 #ifdef INET6
3530 struct sockaddr_in6 sin6;
3531
3532 if (addr->sa_family != AF_INET6)
3533 return 0;
3534
3535 sin6 = *(struct sockaddr_in6 *)addr;
3536
3537 if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr) ||
3538 IN6_IS_ADDR_MC_INTFACELOCAL(&sin6.sin6_addr)) {
3539 bool failure;
3540
3541 CURVNET_SET_QUIET(dev_addr->net);
3542 failure = sa6_recoverscope(&sin6) || sin6.sin6_scope_id == 0;
3543 CURVNET_RESTORE();
3544
3545 /* check if IPv6 scope ID is not set */
3546 if (failure)
3547 return -EINVAL;
3548 dev_addr->bound_dev_if = sin6.sin6_scope_id;
3549 }
3550 #endif
3551 return 0;
3552 }
3553
rdma_listen(struct rdma_cm_id * id,int backlog)3554 int rdma_listen(struct rdma_cm_id *id, int backlog)
3555 {
3556 struct rdma_id_private *id_priv;
3557 int ret;
3558
3559 id_priv = container_of(id, struct rdma_id_private, id);
3560 if (id_priv->state == RDMA_CM_IDLE) {
3561 id->route.addr.src_addr.ss_family = AF_INET;
3562 ret = rdma_bind_addr(id, cma_src_addr(id_priv));
3563 if (ret)
3564 return ret;
3565 }
3566
3567 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
3568 return -EINVAL;
3569
3570 if (id_priv->reuseaddr) {
3571 ret = cma_bind_listen(id_priv);
3572 if (ret)
3573 goto err;
3574 }
3575
3576 id_priv->backlog = backlog;
3577 if (id->device) {
3578 if (rdma_cap_ib_cm(id->device, 1)) {
3579 ret = cma_ib_listen(id_priv);
3580 if (ret)
3581 goto err;
3582 } else if (rdma_cap_iw_cm(id->device, 1)) {
3583 ret = cma_iw_listen(id_priv, backlog);
3584 if (ret)
3585 goto err;
3586 } else {
3587 ret = -ENOSYS;
3588 goto err;
3589 }
3590 } else
3591 cma_listen_on_all(id_priv);
3592
3593 return 0;
3594 err:
3595 id_priv->backlog = 0;
3596 cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
3597 return ret;
3598 }
3599 EXPORT_SYMBOL(rdma_listen);
3600
rdma_bind_addr(struct rdma_cm_id * id,struct sockaddr * addr)3601 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
3602 {
3603 struct rdma_id_private *id_priv;
3604 struct vnet *vnet = id->route.addr.dev_addr.net;
3605 int ret;
3606 struct sockaddr *daddr;
3607
3608 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
3609 addr->sa_family != AF_IB)
3610 return -EAFNOSUPPORT;
3611
3612 id_priv = container_of(id, struct rdma_id_private, id);
3613 if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND))
3614 return -EINVAL;
3615
3616 ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
3617 if (ret)
3618 goto err1;
3619
3620 memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
3621 if (!cma_any_addr(vnet, addr)) {
3622 ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
3623 if (ret)
3624 goto err1;
3625
3626 ret = cma_acquire_dev(id_priv, NULL);
3627 if (ret)
3628 goto err1;
3629 }
3630
3631 if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) {
3632 if (addr->sa_family == AF_INET)
3633 id_priv->afonly = 1;
3634 #ifdef INET6
3635 else if (addr->sa_family == AF_INET6) {
3636 CURVNET_SET_QUIET(vnet);
3637 id_priv->afonly = V_ip6_v6only;
3638 CURVNET_RESTORE();
3639 }
3640 #endif
3641 }
3642 ret = cma_get_port(id_priv);
3643 if (ret)
3644 goto err2;
3645
3646 daddr = cma_dst_addr(id_priv);
3647 daddr->sa_family = addr->sa_family;
3648
3649 return 0;
3650 err2:
3651 if (id_priv->cma_dev)
3652 cma_release_dev(id_priv);
3653 err1:
3654 cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
3655 return ret;
3656 }
3657 EXPORT_SYMBOL(rdma_bind_addr);
3658
sdp_format_hdr(struct sdp_hh * sdp_hdr,struct rdma_id_private * id_priv)3659 static int sdp_format_hdr(struct sdp_hh *sdp_hdr, struct rdma_id_private *id_priv)
3660 {
3661 /*
3662 * XXXCEM: CMA just sets the version itself rather than relying on
3663 * passed in packet to have the major version set. Should we?
3664 */
3665 if (sdp_get_majv(sdp_hdr->majv_minv) != SDP_MAJ_VERSION)
3666 return -EINVAL;
3667
3668 if (cma_family(id_priv) == AF_INET) {
3669 struct sockaddr_in *src4, *dst4;
3670
3671 src4 = (struct sockaddr_in *) cma_src_addr(id_priv);
3672 dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv);
3673
3674 sdp_set_ip_ver(sdp_hdr, 4);
3675 sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
3676 sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
3677 sdp_hdr->port = src4->sin_port;
3678 } else if (cma_family(id_priv) == AF_INET6) {
3679 struct sockaddr_in6 *src6, *dst6;
3680
3681 src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
3682 dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv);
3683
3684 sdp_set_ip_ver(sdp_hdr, 6);
3685 sdp_hdr->src_addr.ip6 = src6->sin6_addr;
3686 sdp_hdr->dst_addr.ip6 = dst6->sin6_addr;
3687 sdp_hdr->port = src6->sin6_port;
3688 cma_ip6_clear_scope_id(&sdp_hdr->src_addr.ip6);
3689 cma_ip6_clear_scope_id(&sdp_hdr->dst_addr.ip6);
3690 } else
3691 return -EAFNOSUPPORT;
3692 return 0;
3693 }
3694
cma_format_hdr(void * hdr,struct rdma_id_private * id_priv)3695 static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
3696 {
3697 struct cma_hdr *cma_hdr;
3698
3699 if (id_priv->id.ps == RDMA_PS_SDP)
3700 return sdp_format_hdr(hdr, id_priv);
3701
3702 cma_hdr = hdr;
3703 cma_hdr->cma_version = CMA_VERSION;
3704 if (cma_family(id_priv) == AF_INET) {
3705 struct sockaddr_in *src4, *dst4;
3706
3707 src4 = (struct sockaddr_in *) cma_src_addr(id_priv);
3708 dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv);
3709
3710 cma_set_ip_ver(cma_hdr, 4);
3711 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
3712 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
3713 cma_hdr->port = src4->sin_port;
3714 } else if (cma_family(id_priv) == AF_INET6) {
3715 struct sockaddr_in6 *src6, *dst6;
3716
3717 src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
3718 dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv);
3719
3720 cma_set_ip_ver(cma_hdr, 6);
3721 cma_hdr->src_addr.ip6 = src6->sin6_addr;
3722 cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
3723 cma_hdr->port = src6->sin6_port;
3724 cma_ip6_clear_scope_id(&cma_hdr->src_addr.ip6);
3725 cma_ip6_clear_scope_id(&cma_hdr->dst_addr.ip6);
3726 }
3727 return 0;
3728 }
3729
cma_sidr_rep_handler(struct ib_cm_id * cm_id,const struct ib_cm_event * ib_event)3730 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
3731 const struct ib_cm_event *ib_event)
3732 {
3733 struct rdma_id_private *id_priv = cm_id->context;
3734 struct rdma_cm_event event = {};
3735 const struct ib_cm_sidr_rep_event_param *rep =
3736 &ib_event->param.sidr_rep_rcvd;
3737 int ret = 0;
3738
3739 mutex_lock(&id_priv->handler_mutex);
3740 if (id_priv->state != RDMA_CM_CONNECT)
3741 goto out;
3742
3743 switch (ib_event->event) {
3744 case IB_CM_SIDR_REQ_ERROR:
3745 event.event = RDMA_CM_EVENT_UNREACHABLE;
3746 event.status = -ETIMEDOUT;
3747 break;
3748 case IB_CM_SIDR_REP_RECEIVED:
3749 event.param.ud.private_data = ib_event->private_data;
3750 event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
3751 if (rep->status != IB_SIDR_SUCCESS) {
3752 event.event = RDMA_CM_EVENT_UNREACHABLE;
3753 event.status = ib_event->param.sidr_rep_rcvd.status;
3754 break;
3755 }
3756 ret = cma_set_qkey(id_priv, rep->qkey);
3757 if (ret) {
3758 event.event = RDMA_CM_EVENT_ADDR_ERROR;
3759 event.status = ret;
3760 break;
3761 }
3762 ret = ib_init_ah_attr_from_path(id_priv->id.device,
3763 id_priv->id.port_num,
3764 id_priv->id.route.path_rec,
3765 &event.param.ud.ah_attr,
3766 rep->sgid_attr);
3767 if (ret) {
3768 event.event = RDMA_CM_EVENT_ADDR_ERROR;
3769 event.status = ret;
3770 break;
3771 }
3772 event.param.ud.qp_num = rep->qpn;
3773 event.param.ud.qkey = rep->qkey;
3774 event.event = RDMA_CM_EVENT_ESTABLISHED;
3775 event.status = 0;
3776 break;
3777 default:
3778 pr_err("RDMA CMA: unexpected IB CM event: %d\n",
3779 ib_event->event);
3780 goto out;
3781 }
3782
3783 ret = id_priv->id.event_handler(&id_priv->id, &event);
3784
3785 rdma_destroy_ah_attr(&event.param.ud.ah_attr);
3786 if (ret) {
3787 /* Destroy the CM ID by returning a non-zero value. */
3788 id_priv->cm_id.ib = NULL;
3789 cma_exch(id_priv, RDMA_CM_DESTROYING);
3790 mutex_unlock(&id_priv->handler_mutex);
3791 rdma_destroy_id(&id_priv->id);
3792 return ret;
3793 }
3794 out:
3795 mutex_unlock(&id_priv->handler_mutex);
3796 return ret;
3797 }
3798
cma_resolve_ib_udp(struct rdma_id_private * id_priv,struct rdma_conn_param * conn_param)3799 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
3800 struct rdma_conn_param *conn_param)
3801 {
3802 struct ib_cm_sidr_req_param req;
3803 struct ib_cm_id *id;
3804 void *private_data;
3805 u8 offset;
3806 int ret;
3807
3808 memset(&req, 0, sizeof req);
3809 offset = cma_user_data_offset(id_priv);
3810 req.private_data_len = offset + conn_param->private_data_len;
3811 if (req.private_data_len < conn_param->private_data_len)
3812 return -EINVAL;
3813
3814 if (req.private_data_len) {
3815 private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
3816 if (!private_data)
3817 return -ENOMEM;
3818 } else {
3819 private_data = NULL;
3820 }
3821
3822 if (conn_param->private_data && conn_param->private_data_len)
3823 memcpy((char *)private_data + offset, conn_param->private_data,
3824 conn_param->private_data_len);
3825
3826 if (private_data) {
3827 ret = cma_format_hdr(private_data, id_priv);
3828 if (ret)
3829 goto out;
3830 req.private_data = private_data;
3831 }
3832
3833 id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler,
3834 id_priv);
3835 if (IS_ERR(id)) {
3836 ret = PTR_ERR(id);
3837 goto out;
3838 }
3839 id_priv->cm_id.ib = id;
3840
3841 req.path = id_priv->id.route.path_rec;
3842 req.sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
3843 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
3844 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
3845 req.max_cm_retries = CMA_MAX_CM_RETRIES;
3846
3847 ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
3848 if (ret) {
3849 ib_destroy_cm_id(id_priv->cm_id.ib);
3850 id_priv->cm_id.ib = NULL;
3851 }
3852 out:
3853 kfree(private_data);
3854 return ret;
3855 }
3856
cma_connect_ib(struct rdma_id_private * id_priv,struct rdma_conn_param * conn_param)3857 static int cma_connect_ib(struct rdma_id_private *id_priv,
3858 struct rdma_conn_param *conn_param)
3859 {
3860 struct ib_cm_req_param req;
3861 struct rdma_route *route;
3862 void *private_data;
3863 struct ib_cm_id *id;
3864 u8 offset;
3865 int ret;
3866
3867 memset(&req, 0, sizeof req);
3868 offset = cma_user_data_offset(id_priv);
3869 req.private_data_len = offset + conn_param->private_data_len;
3870 if (req.private_data_len < conn_param->private_data_len)
3871 return -EINVAL;
3872
3873 if (req.private_data_len) {
3874 private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
3875 if (!private_data)
3876 return -ENOMEM;
3877 } else {
3878 private_data = NULL;
3879 }
3880
3881 if (conn_param->private_data && conn_param->private_data_len)
3882 memcpy((char *)private_data + offset, conn_param->private_data,
3883 conn_param->private_data_len);
3884
3885 id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv);
3886 if (IS_ERR(id)) {
3887 ret = PTR_ERR(id);
3888 goto out;
3889 }
3890 id_priv->cm_id.ib = id;
3891
3892 route = &id_priv->id.route;
3893 if (private_data) {
3894 ret = cma_format_hdr(private_data, id_priv);
3895 if (ret)
3896 goto out;
3897 req.private_data = private_data;
3898 }
3899
3900 req.primary_path = &route->path_rec[0];
3901 if (route->num_paths == 2)
3902 req.alternate_path = &route->path_rec[1];
3903
3904 req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
3905 /* Alternate path SGID attribute currently unsupported */
3906 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
3907 req.qp_num = id_priv->qp_num;
3908 req.qp_type = id_priv->id.qp_type;
3909 req.starting_psn = id_priv->seq_num;
3910 req.responder_resources = conn_param->responder_resources;
3911 req.initiator_depth = conn_param->initiator_depth;
3912 req.flow_control = conn_param->flow_control;
3913 req.retry_count = min_t(u8, 7, conn_param->retry_count);
3914 req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
3915 req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
3916 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
3917 req.max_cm_retries = CMA_MAX_CM_RETRIES;
3918 req.srq = id_priv->srq ? 1 : 0;
3919
3920 ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
3921 out:
3922 if (ret && !IS_ERR(id)) {
3923 ib_destroy_cm_id(id);
3924 id_priv->cm_id.ib = NULL;
3925 }
3926
3927 kfree(private_data);
3928 return ret;
3929 }
3930
cma_connect_iw(struct rdma_id_private * id_priv,struct rdma_conn_param * conn_param)3931 static int cma_connect_iw(struct rdma_id_private *id_priv,
3932 struct rdma_conn_param *conn_param)
3933 {
3934 struct iw_cm_id *cm_id;
3935 int ret;
3936 struct iw_cm_conn_param iw_param;
3937
3938 cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
3939 if (IS_ERR(cm_id))
3940 return PTR_ERR(cm_id);
3941
3942 cm_id->tos = id_priv->tos;
3943 id_priv->cm_id.iw = cm_id;
3944
3945 memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
3946 rdma_addr_size(cma_src_addr(id_priv)));
3947 memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv),
3948 rdma_addr_size(cma_dst_addr(id_priv)));
3949
3950 ret = cma_modify_qp_rtr(id_priv, conn_param);
3951 if (ret)
3952 goto out;
3953
3954 if (conn_param) {
3955 iw_param.ord = conn_param->initiator_depth;
3956 iw_param.ird = conn_param->responder_resources;
3957 iw_param.private_data = conn_param->private_data;
3958 iw_param.private_data_len = conn_param->private_data_len;
3959 iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num;
3960 } else {
3961 memset(&iw_param, 0, sizeof iw_param);
3962 iw_param.qpn = id_priv->qp_num;
3963 }
3964 ret = iw_cm_connect(cm_id, &iw_param);
3965 out:
3966 if (ret) {
3967 iw_destroy_cm_id(cm_id);
3968 id_priv->cm_id.iw = NULL;
3969 }
3970 return ret;
3971 }
3972
rdma_connect(struct rdma_cm_id * id,struct rdma_conn_param * conn_param)3973 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
3974 {
3975 struct rdma_id_private *id_priv;
3976 int ret;
3977
3978 id_priv = container_of(id, struct rdma_id_private, id);
3979 if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
3980 return -EINVAL;
3981
3982 if (!id->qp) {
3983 id_priv->qp_num = conn_param->qp_num;
3984 id_priv->srq = conn_param->srq;
3985 }
3986
3987 if (rdma_cap_ib_cm(id->device, id->port_num)) {
3988 if (id->qp_type == IB_QPT_UD)
3989 ret = cma_resolve_ib_udp(id_priv, conn_param);
3990 else
3991 ret = cma_connect_ib(id_priv, conn_param);
3992 } else if (rdma_cap_iw_cm(id->device, id->port_num))
3993 ret = cma_connect_iw(id_priv, conn_param);
3994 else
3995 ret = -ENOSYS;
3996 if (ret)
3997 goto err;
3998
3999 return 0;
4000 err:
4001 cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
4002 return ret;
4003 }
4004 EXPORT_SYMBOL(rdma_connect);
4005
cma_accept_ib(struct rdma_id_private * id_priv,struct rdma_conn_param * conn_param)4006 static int cma_accept_ib(struct rdma_id_private *id_priv,
4007 struct rdma_conn_param *conn_param)
4008 {
4009 struct ib_cm_rep_param rep;
4010 int ret;
4011
4012 ret = cma_modify_qp_rtr(id_priv, conn_param);
4013 if (ret)
4014 goto out;
4015
4016 ret = cma_modify_qp_rts(id_priv, conn_param);
4017 if (ret)
4018 goto out;
4019
4020 memset(&rep, 0, sizeof rep);
4021 rep.qp_num = id_priv->qp_num;
4022 rep.starting_psn = id_priv->seq_num;
4023 rep.private_data = conn_param->private_data;
4024 rep.private_data_len = conn_param->private_data_len;
4025 rep.responder_resources = conn_param->responder_resources;
4026 rep.initiator_depth = conn_param->initiator_depth;
4027 rep.failover_accepted = 0;
4028 rep.flow_control = conn_param->flow_control;
4029 rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
4030 rep.srq = id_priv->srq ? 1 : 0;
4031
4032 ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
4033 out:
4034 return ret;
4035 }
4036
cma_accept_iw(struct rdma_id_private * id_priv,struct rdma_conn_param * conn_param)4037 static int cma_accept_iw(struct rdma_id_private *id_priv,
4038 struct rdma_conn_param *conn_param)
4039 {
4040 struct iw_cm_conn_param iw_param;
4041 int ret;
4042
4043 ret = cma_modify_qp_rtr(id_priv, conn_param);
4044 if (ret)
4045 return ret;
4046
4047 iw_param.ord = conn_param->initiator_depth;
4048 iw_param.ird = conn_param->responder_resources;
4049 iw_param.private_data = conn_param->private_data;
4050 iw_param.private_data_len = conn_param->private_data_len;
4051 if (id_priv->id.qp) {
4052 iw_param.qpn = id_priv->qp_num;
4053 } else
4054 iw_param.qpn = conn_param->qp_num;
4055
4056 return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
4057 }
4058
cma_send_sidr_rep(struct rdma_id_private * id_priv,enum ib_cm_sidr_status status,u32 qkey,const void * private_data,int private_data_len)4059 static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
4060 enum ib_cm_sidr_status status, u32 qkey,
4061 const void *private_data, int private_data_len)
4062 {
4063 struct ib_cm_sidr_rep_param rep;
4064 int ret;
4065
4066 memset(&rep, 0, sizeof rep);
4067 rep.status = status;
4068 if (status == IB_SIDR_SUCCESS) {
4069 ret = cma_set_qkey(id_priv, qkey);
4070 if (ret)
4071 return ret;
4072 rep.qp_num = id_priv->qp_num;
4073 rep.qkey = id_priv->qkey;
4074 }
4075 rep.private_data = private_data;
4076 rep.private_data_len = private_data_len;
4077
4078 return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
4079 }
4080
rdma_accept(struct rdma_cm_id * id,struct rdma_conn_param * conn_param)4081 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
4082 {
4083 struct rdma_id_private *id_priv;
4084 int ret;
4085
4086 id_priv = container_of(id, struct rdma_id_private, id);
4087
4088 id_priv->owner = task_pid_nr(current);
4089
4090 if (!cma_comp(id_priv, RDMA_CM_CONNECT))
4091 return -EINVAL;
4092
4093 if (!id->qp && conn_param) {
4094 id_priv->qp_num = conn_param->qp_num;
4095 id_priv->srq = conn_param->srq;
4096 }
4097
4098 if (rdma_cap_ib_cm(id->device, id->port_num)) {
4099 if (id->qp_type == IB_QPT_UD) {
4100 if (conn_param)
4101 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
4102 conn_param->qkey,
4103 conn_param->private_data,
4104 conn_param->private_data_len);
4105 else
4106 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
4107 0, NULL, 0);
4108 } else {
4109 if (conn_param)
4110 ret = cma_accept_ib(id_priv, conn_param);
4111 else
4112 ret = cma_rep_recv(id_priv);
4113 }
4114 } else if (rdma_cap_iw_cm(id->device, id->port_num))
4115 ret = cma_accept_iw(id_priv, conn_param);
4116 else
4117 ret = -ENOSYS;
4118
4119 if (ret)
4120 goto reject;
4121
4122 return 0;
4123 reject:
4124 cma_modify_qp_err(id_priv);
4125 rdma_reject(id, NULL, 0);
4126 return ret;
4127 }
4128 EXPORT_SYMBOL(rdma_accept);
4129
rdma_notify(struct rdma_cm_id * id,enum ib_event_type event)4130 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
4131 {
4132 struct rdma_id_private *id_priv;
4133 int ret;
4134
4135 id_priv = container_of(id, struct rdma_id_private, id);
4136 if (!id_priv->cm_id.ib)
4137 return -EINVAL;
4138
4139 switch (id->device->node_type) {
4140 case RDMA_NODE_IB_CA:
4141 ret = ib_cm_notify(id_priv->cm_id.ib, event);
4142 break;
4143 default:
4144 ret = 0;
4145 break;
4146 }
4147 return ret;
4148 }
4149 EXPORT_SYMBOL(rdma_notify);
4150
rdma_reject(struct rdma_cm_id * id,const void * private_data,u8 private_data_len)4151 int rdma_reject(struct rdma_cm_id *id, const void *private_data,
4152 u8 private_data_len)
4153 {
4154 struct rdma_id_private *id_priv;
4155 int ret;
4156
4157 id_priv = container_of(id, struct rdma_id_private, id);
4158 if (!id_priv->cm_id.ib)
4159 return -EINVAL;
4160
4161 if (rdma_cap_ib_cm(id->device, id->port_num)) {
4162 if (id->qp_type == IB_QPT_UD)
4163 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0,
4164 private_data, private_data_len);
4165 else
4166 ret = ib_send_cm_rej(id_priv->cm_id.ib,
4167 IB_CM_REJ_CONSUMER_DEFINED, NULL,
4168 0, private_data, private_data_len);
4169 } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
4170 ret = iw_cm_reject(id_priv->cm_id.iw,
4171 private_data, private_data_len);
4172 } else
4173 ret = -ENOSYS;
4174
4175 return ret;
4176 }
4177 EXPORT_SYMBOL(rdma_reject);
4178
rdma_disconnect(struct rdma_cm_id * id)4179 int rdma_disconnect(struct rdma_cm_id *id)
4180 {
4181 struct rdma_id_private *id_priv;
4182 int ret;
4183
4184 id_priv = container_of(id, struct rdma_id_private, id);
4185 if (!id_priv->cm_id.ib)
4186 return -EINVAL;
4187
4188 if (rdma_cap_ib_cm(id->device, id->port_num)) {
4189 ret = cma_modify_qp_err(id_priv);
4190 if (ret)
4191 goto out;
4192 /* Initiate or respond to a disconnect. */
4193 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
4194 ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
4195 } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
4196 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
4197 } else
4198 ret = -EINVAL;
4199
4200 out:
4201 return ret;
4202 }
4203 EXPORT_SYMBOL(rdma_disconnect);
4204
cma_ib_mc_handler(int status,struct ib_sa_multicast * multicast)4205 static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
4206 {
4207 struct rdma_id_private *id_priv;
4208 struct cma_multicast *mc = multicast->context;
4209 struct rdma_cm_event event = {};
4210 int ret = 0;
4211
4212 id_priv = mc->id_priv;
4213 mutex_lock(&id_priv->handler_mutex);
4214 if (id_priv->state != RDMA_CM_ADDR_BOUND &&
4215 id_priv->state != RDMA_CM_ADDR_RESOLVED)
4216 goto out;
4217
4218 if (!status)
4219 status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
4220 mutex_lock(&id_priv->qp_mutex);
4221 if (!status && id_priv->id.qp)
4222 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
4223 be16_to_cpu(multicast->rec.mlid));
4224 mutex_unlock(&id_priv->qp_mutex);
4225
4226 event.status = status;
4227 event.param.ud.private_data = mc->context;
4228 if (!status) {
4229 struct rdma_dev_addr *dev_addr =
4230 &id_priv->id.route.addr.dev_addr;
4231 if_t ndev =
4232 dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
4233 enum ib_gid_type gid_type =
4234 id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
4235 rdma_start_port(id_priv->cma_dev->device)];
4236
4237 event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
4238 ret = ib_init_ah_from_mcmember(id_priv->id.device,
4239 id_priv->id.port_num,
4240 &multicast->rec,
4241 ndev, gid_type,
4242 &event.param.ud.ah_attr);
4243 if (ret)
4244 event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
4245
4246 event.param.ud.qp_num = 0xFFFFFF;
4247 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
4248 if (ndev)
4249 dev_put(ndev);
4250 } else
4251 event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
4252
4253 ret = id_priv->id.event_handler(&id_priv->id, &event);
4254
4255 rdma_destroy_ah_attr(&event.param.ud.ah_attr);
4256 if (ret) {
4257 cma_exch(id_priv, RDMA_CM_DESTROYING);
4258 mutex_unlock(&id_priv->handler_mutex);
4259 rdma_destroy_id(&id_priv->id);
4260 return 0;
4261 }
4262
4263 out:
4264 mutex_unlock(&id_priv->handler_mutex);
4265 return 0;
4266 }
4267
cma_set_mgid(struct rdma_id_private * id_priv,struct sockaddr * addr,union ib_gid * mgid)4268 static void cma_set_mgid(struct rdma_id_private *id_priv,
4269 struct sockaddr *addr, union ib_gid *mgid)
4270 {
4271 unsigned char mc_map[MAX_ADDR_LEN];
4272 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
4273 struct sockaddr_in *sin = (struct sockaddr_in *) addr;
4274 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
4275
4276 if (cma_any_addr(dev_addr->net, addr)) {
4277 memset(mgid, 0, sizeof *mgid);
4278 } else if ((addr->sa_family == AF_INET6) &&
4279 ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
4280 0xFF10A01B)) {
4281 /* IPv6 address is an SA assigned MGID. */
4282 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
4283 } else if (addr->sa_family == AF_IB) {
4284 memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid);
4285 } else if (addr->sa_family == AF_INET6) {
4286 ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
4287 if (id_priv->id.ps == RDMA_PS_UDP)
4288 mc_map[7] = 0x01; /* Use RDMA CM signature */
4289 *mgid = *(union ib_gid *) (mc_map + 4);
4290 } else {
4291 ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
4292 if (id_priv->id.ps == RDMA_PS_UDP)
4293 mc_map[7] = 0x01; /* Use RDMA CM signature */
4294 *mgid = *(union ib_gid *) (mc_map + 4);
4295 }
4296 }
4297
cma_join_ib_multicast(struct rdma_id_private * id_priv,struct cma_multicast * mc)4298 static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
4299 struct cma_multicast *mc)
4300 {
4301 struct ib_sa_mcmember_rec rec;
4302 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
4303 ib_sa_comp_mask comp_mask;
4304 int ret;
4305
4306 ib_addr_get_mgid(dev_addr, &rec.mgid);
4307 ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
4308 &rec.mgid, &rec);
4309 if (ret)
4310 return ret;
4311
4312 ret = cma_set_qkey(id_priv, 0);
4313 if (ret)
4314 return ret;
4315
4316 cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
4317 rec.qkey = cpu_to_be32(id_priv->qkey);
4318 rdma_addr_get_sgid(dev_addr, &rec.port_gid);
4319 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
4320 rec.join_state = mc->join_state;
4321
4322 if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) &&
4323 (!ib_sa_sendonly_fullmem_support(&sa_client,
4324 id_priv->id.device,
4325 id_priv->id.port_num))) {
4326 pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
4327 "RDMA CM: SM doesn't support Send Only Full Member option\n",
4328 id_priv->id.device->name, id_priv->id.port_num);
4329 return -EOPNOTSUPP;
4330 }
4331
4332 comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
4333 IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
4334 IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
4335 IB_SA_MCMEMBER_REC_FLOW_LABEL |
4336 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
4337
4338 if (id_priv->id.ps == RDMA_PS_IPOIB)
4339 comp_mask |= IB_SA_MCMEMBER_REC_RATE |
4340 IB_SA_MCMEMBER_REC_RATE_SELECTOR |
4341 IB_SA_MCMEMBER_REC_MTU_SELECTOR |
4342 IB_SA_MCMEMBER_REC_MTU |
4343 IB_SA_MCMEMBER_REC_HOP_LIMIT;
4344
4345 mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
4346 id_priv->id.port_num, &rec,
4347 comp_mask, GFP_KERNEL,
4348 cma_ib_mc_handler, mc);
4349 return PTR_ERR_OR_ZERO(mc->multicast.ib);
4350 }
4351
iboe_mcast_work_handler(struct work_struct * work)4352 static void iboe_mcast_work_handler(struct work_struct *work)
4353 {
4354 struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
4355 struct cma_multicast *mc = mw->mc;
4356 struct ib_sa_multicast *m = mc->multicast.ib;
4357
4358 mc->multicast.ib->context = mc;
4359 cma_ib_mc_handler(0, m);
4360 kref_put(&mc->mcref, release_mc);
4361 kfree(mw);
4362 }
4363
cma_iboe_set_mgid(struct vnet * vnet,struct sockaddr * addr,union ib_gid * mgid,enum ib_gid_type gid_type)4364 static void cma_iboe_set_mgid(struct vnet *vnet, struct sockaddr *addr,
4365 union ib_gid *mgid, enum ib_gid_type gid_type)
4366 {
4367 struct sockaddr_in *sin = (struct sockaddr_in *)addr;
4368 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
4369
4370 if (cma_any_addr(vnet, addr)) {
4371 memset(mgid, 0, sizeof *mgid);
4372 } else if (addr->sa_family == AF_INET6) {
4373 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
4374 } else {
4375 mgid->raw[0] =
4376 (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0xff;
4377 mgid->raw[1] =
4378 (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0x0e;
4379 mgid->raw[2] = 0;
4380 mgid->raw[3] = 0;
4381 mgid->raw[4] = 0;
4382 mgid->raw[5] = 0;
4383 mgid->raw[6] = 0;
4384 mgid->raw[7] = 0;
4385 mgid->raw[8] = 0;
4386 mgid->raw[9] = 0;
4387 mgid->raw[10] = 0xff;
4388 mgid->raw[11] = 0xff;
4389 *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr;
4390 }
4391 }
4392
cma_iboe_join_multicast(struct rdma_id_private * id_priv,struct cma_multicast * mc)4393 static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
4394 struct cma_multicast *mc)
4395 {
4396 struct iboe_mcast_work *work;
4397 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
4398 int err = 0;
4399 struct sockaddr *addr = (struct sockaddr *)&mc->addr;
4400 if_t ndev = NULL;
4401 enum ib_gid_type gid_type;
4402 bool send_only;
4403
4404 send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
4405
4406 if (cma_zero_addr((struct sockaddr *)&mc->addr))
4407 return -EINVAL;
4408
4409 work = kzalloc(sizeof *work, GFP_KERNEL);
4410 if (!work)
4411 return -ENOMEM;
4412
4413 mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
4414 if (!mc->multicast.ib) {
4415 err = -ENOMEM;
4416 goto out1;
4417 }
4418
4419 gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
4420 rdma_start_port(id_priv->cma_dev->device)];
4421 cma_iboe_set_mgid(dev_addr->net, addr, &mc->multicast.ib->rec.mgid, gid_type);
4422
4423 mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
4424 if (id_priv->id.ps == RDMA_PS_UDP)
4425 mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
4426
4427 if (dev_addr->bound_dev_if)
4428 ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
4429 if (!ndev) {
4430 err = -ENODEV;
4431 goto out2;
4432 }
4433 mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
4434 mc->multicast.ib->rec.hop_limit = 1;
4435 mc->multicast.ib->rec.mtu = iboe_get_mtu(if_getmtu(ndev));
4436
4437 if (addr->sa_family == AF_INET || addr->sa_family == AF_INET6) {
4438 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
4439 mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
4440 if (!send_only) {
4441 err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
4442 true);
4443 }
4444 }
4445 } else {
4446 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
4447 err = -ENOTSUPP;
4448 }
4449 dev_put(ndev);
4450 if (err || !mc->multicast.ib->rec.mtu) {
4451 if (!err)
4452 err = -EINVAL;
4453 goto out2;
4454 }
4455 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
4456 &mc->multicast.ib->rec.port_gid);
4457 work->id = id_priv;
4458 work->mc = mc;
4459 INIT_WORK(&work->work, iboe_mcast_work_handler);
4460 kref_get(&mc->mcref);
4461 queue_work(cma_wq, &work->work);
4462
4463 return 0;
4464
4465 out2:
4466 kfree(mc->multicast.ib);
4467 out1:
4468 kfree(work);
4469 return err;
4470 }
4471
rdma_join_multicast(struct rdma_cm_id * id,struct sockaddr * addr,u8 join_state,void * context)4472 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
4473 u8 join_state, void *context)
4474 {
4475 struct rdma_id_private *id_priv;
4476 struct cma_multicast *mc;
4477 int ret;
4478
4479 if (!id->device)
4480 return -EINVAL;
4481
4482 id_priv = container_of(id, struct rdma_id_private, id);
4483 if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
4484 !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
4485 return -EINVAL;
4486
4487 mc = kmalloc(sizeof *mc, GFP_KERNEL);
4488 if (!mc)
4489 return -ENOMEM;
4490
4491 memcpy(&mc->addr, addr, rdma_addr_size(addr));
4492 mc->context = context;
4493 mc->id_priv = id_priv;
4494 mc->join_state = join_state;
4495
4496 if (rdma_protocol_roce(id->device, id->port_num)) {
4497 kref_init(&mc->mcref);
4498 ret = cma_iboe_join_multicast(id_priv, mc);
4499 if (ret)
4500 goto out_err;
4501 } else if (rdma_cap_ib_mcast(id->device, id->port_num)) {
4502 ret = cma_join_ib_multicast(id_priv, mc);
4503 if (ret)
4504 goto out_err;
4505 } else {
4506 ret = -ENOSYS;
4507 goto out_err;
4508 }
4509
4510 spin_lock(&id_priv->lock);
4511 list_add(&mc->list, &id_priv->mc_list);
4512 spin_unlock(&id_priv->lock);
4513
4514 return 0;
4515 out_err:
4516 kfree(mc);
4517 return ret;
4518 }
4519 EXPORT_SYMBOL(rdma_join_multicast);
4520
rdma_leave_multicast(struct rdma_cm_id * id,struct sockaddr * addr)4521 void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
4522 {
4523 struct rdma_id_private *id_priv;
4524 struct cma_multicast *mc;
4525
4526 id_priv = container_of(id, struct rdma_id_private, id);
4527 spin_lock_irq(&id_priv->lock);
4528 list_for_each_entry(mc, &id_priv->mc_list, list) {
4529 if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {
4530 list_del(&mc->list);
4531 spin_unlock_irq(&id_priv->lock);
4532
4533 if (id->qp)
4534 ib_detach_mcast(id->qp,
4535 &mc->multicast.ib->rec.mgid,
4536 be16_to_cpu(mc->multicast.ib->rec.mlid));
4537
4538 BUG_ON(id_priv->cma_dev->device != id->device);
4539
4540 if (rdma_cap_ib_mcast(id->device, id->port_num)) {
4541 ib_sa_free_multicast(mc->multicast.ib);
4542 kfree(mc);
4543 } else if (rdma_protocol_roce(id->device, id->port_num)) {
4544 cma_leave_roce_mc_group(id_priv, mc);
4545 }
4546 return;
4547 }
4548 }
4549 spin_unlock_irq(&id_priv->lock);
4550 }
4551 EXPORT_SYMBOL(rdma_leave_multicast);
4552
4553 static int
sysctl_cma_default_roce_mode(SYSCTL_HANDLER_ARGS)4554 sysctl_cma_default_roce_mode(SYSCTL_HANDLER_ARGS)
4555 {
4556 struct cma_device *cma_dev = arg1;
4557 const int port = arg2;
4558 char buf[64];
4559 int error;
4560
4561 strlcpy(buf, ib_cache_gid_type_str(
4562 cma_get_default_gid_type(cma_dev, port)), sizeof(buf));
4563
4564 error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
4565 if (error != 0 || req->newptr == NULL)
4566 goto done;
4567
4568 error = ib_cache_gid_parse_type_str(buf);
4569 if (error < 0) {
4570 error = EINVAL;
4571 goto done;
4572 }
4573
4574 cma_set_default_gid_type(cma_dev, port, error);
4575 error = 0;
4576 done:
4577 return (error);
4578 }
4579
4580 static int
sysctl_cma_default_roce_tos(SYSCTL_HANDLER_ARGS)4581 sysctl_cma_default_roce_tos(SYSCTL_HANDLER_ARGS)
4582 {
4583 struct cma_device *cma_dev = arg1;
4584 const int port = arg2;
4585 int error;
4586 u8 tos;
4587
4588 tos = cma_get_default_roce_tos(cma_dev, port);
4589
4590 error = sysctl_handle_8(oidp, &tos, sizeof(tos), req);
4591 if (error != 0 || req->newptr == NULL)
4592 goto done;
4593
4594 error = -cma_set_default_roce_tos(cma_dev, port, tos);
4595 done:
4596 return (error);
4597 }
4598
cma_add_one(struct ib_device * device)4599 static void cma_add_one(struct ib_device *device)
4600 {
4601 struct cma_device *cma_dev;
4602 struct rdma_id_private *id_priv;
4603 unsigned int i;
4604
4605 cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
4606 if (!cma_dev)
4607 return;
4608
4609 sysctl_ctx_init(&cma_dev->sysctl_ctx);
4610
4611 cma_dev->device = device;
4612 cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
4613 sizeof(*cma_dev->default_gid_type),
4614 GFP_KERNEL);
4615 if (!cma_dev->default_gid_type)
4616 goto free_cma_dev;
4617
4618 cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
4619 sizeof(*cma_dev->default_roce_tos),
4620 GFP_KERNEL);
4621 if (!cma_dev->default_roce_tos)
4622 goto free_gid_type;
4623
4624 for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
4625 unsigned long supported_gids;
4626 unsigned int default_gid_type;
4627
4628 supported_gids = roce_gid_type_mask_support(device, i);
4629
4630 if (WARN_ON(!supported_gids)) {
4631 /* set something valid */
4632 default_gid_type = 0;
4633 } else if (test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) {
4634 /* prefer RoCEv2, if supported */
4635 default_gid_type = IB_GID_TYPE_ROCE_UDP_ENCAP;
4636 } else {
4637 default_gid_type = find_first_bit(&supported_gids,
4638 BITS_PER_LONG);
4639 }
4640 cma_dev->default_gid_type[i - rdma_start_port(device)] =
4641 default_gid_type;
4642 cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0;
4643 }
4644
4645 init_completion(&cma_dev->comp);
4646 atomic_set(&cma_dev->refcount, 1);
4647 INIT_LIST_HEAD(&cma_dev->id_list);
4648 ib_set_client_data(device, &cma_client, cma_dev);
4649
4650 mutex_lock(&lock);
4651 list_add_tail(&cma_dev->list, &dev_list);
4652 list_for_each_entry(id_priv, &listen_any_list, list)
4653 cma_listen_on_dev(id_priv, cma_dev);
4654 mutex_unlock(&lock);
4655
4656 for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
4657 char buf[64];
4658
4659 snprintf(buf, sizeof(buf), "default_roce_mode_port%d", i);
4660
4661 (void) SYSCTL_ADD_PROC(&cma_dev->sysctl_ctx,
4662 SYSCTL_CHILDREN(device->ports_parent->parent->oidp),
4663 OID_AUTO, buf, CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
4664 cma_dev, i, &sysctl_cma_default_roce_mode, "A",
4665 "Default RoCE mode. Valid values: IB/RoCE v1 and RoCE v2");
4666
4667 snprintf(buf, sizeof(buf), "default_roce_tos_port%d", i);
4668
4669 (void) SYSCTL_ADD_PROC(&cma_dev->sysctl_ctx,
4670 SYSCTL_CHILDREN(device->ports_parent->parent->oidp),
4671 OID_AUTO, buf, CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
4672 cma_dev, i, &sysctl_cma_default_roce_tos, "CU",
4673 "Default RoCE TOS. Valid values: 0-255");
4674 }
4675
4676 return;
4677
4678 free_gid_type:
4679 kfree(cma_dev->default_gid_type);
4680
4681 free_cma_dev:
4682 kfree(cma_dev);
4683
4684 return;
4685 }
4686
cma_remove_id_dev(struct rdma_id_private * id_priv)4687 static int cma_remove_id_dev(struct rdma_id_private *id_priv)
4688 {
4689 struct rdma_cm_event event = {};
4690 enum rdma_cm_state state;
4691 int ret = 0;
4692
4693 /* Record that we want to remove the device */
4694 state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
4695 if (state == RDMA_CM_DESTROYING)
4696 return 0;
4697
4698 cma_cancel_operation(id_priv, state);
4699 mutex_lock(&id_priv->handler_mutex);
4700
4701 /* Check for destruction from another callback. */
4702 if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
4703 goto out;
4704
4705 event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
4706 ret = id_priv->id.event_handler(&id_priv->id, &event);
4707 out:
4708 mutex_unlock(&id_priv->handler_mutex);
4709 return ret;
4710 }
4711
cma_process_remove(struct cma_device * cma_dev)4712 static void cma_process_remove(struct cma_device *cma_dev)
4713 {
4714 struct rdma_id_private *id_priv;
4715 int ret;
4716
4717 mutex_lock(&lock);
4718 while (!list_empty(&cma_dev->id_list)) {
4719 id_priv = list_entry(cma_dev->id_list.next,
4720 struct rdma_id_private, list);
4721
4722 list_del(&id_priv->listen_list);
4723 list_del_init(&id_priv->list);
4724 atomic_inc(&id_priv->refcount);
4725 mutex_unlock(&lock);
4726
4727 ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
4728 cma_deref_id(id_priv);
4729 if (ret)
4730 rdma_destroy_id(&id_priv->id);
4731
4732 mutex_lock(&lock);
4733 }
4734 mutex_unlock(&lock);
4735
4736 cma_deref_dev(cma_dev);
4737 wait_for_completion(&cma_dev->comp);
4738 }
4739
cma_remove_one(struct ib_device * device,void * client_data)4740 static void cma_remove_one(struct ib_device *device, void *client_data)
4741 {
4742 struct cma_device *cma_dev = client_data;
4743
4744 if (!cma_dev)
4745 return;
4746
4747 mutex_lock(&lock);
4748 list_del(&cma_dev->list);
4749 mutex_unlock(&lock);
4750
4751 cma_process_remove(cma_dev);
4752 sysctl_ctx_free(&cma_dev->sysctl_ctx);
4753 kfree(cma_dev->default_roce_tos);
4754 kfree(cma_dev->default_gid_type);
4755 kfree(cma_dev);
4756 }
4757
cma_init_vnet(void * arg)4758 static void cma_init_vnet(void *arg)
4759 {
4760 struct cma_pernet *pernet = &VNET(cma_pernet);
4761
4762 idr_init(&pernet->tcp_ps);
4763 idr_init(&pernet->udp_ps);
4764 idr_init(&pernet->ipoib_ps);
4765 idr_init(&pernet->ib_ps);
4766 idr_init(&pernet->sdp_ps);
4767 }
4768 VNET_SYSINIT(cma_init_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_FIRST, cma_init_vnet, NULL);
4769
cma_destroy_vnet(void * arg)4770 static void cma_destroy_vnet(void *arg)
4771 {
4772 struct cma_pernet *pernet = &VNET(cma_pernet);
4773
4774 idr_destroy(&pernet->tcp_ps);
4775 idr_destroy(&pernet->udp_ps);
4776 idr_destroy(&pernet->ipoib_ps);
4777 idr_destroy(&pernet->ib_ps);
4778 idr_destroy(&pernet->sdp_ps);
4779 }
4780 VNET_SYSUNINIT(cma_destroy_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_SECOND, cma_destroy_vnet, NULL);
4781
cma_init(void)4782 static int __init cma_init(void)
4783 {
4784 int ret;
4785
4786 cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM);
4787 if (!cma_wq)
4788 return -ENOMEM;
4789
4790 ib_sa_register_client(&sa_client);
4791
4792 ret = ib_register_client(&cma_client);
4793 if (ret)
4794 goto err;
4795
4796 cma_configfs_init();
4797
4798 return 0;
4799
4800 err:
4801 ib_sa_unregister_client(&sa_client);
4802 destroy_workqueue(cma_wq);
4803 return ret;
4804 }
4805
cma_cleanup(void)4806 static void __exit cma_cleanup(void)
4807 {
4808 cma_configfs_exit();
4809 ib_unregister_client(&cma_client);
4810 ib_sa_unregister_client(&sa_client);
4811 destroy_workqueue(cma_wq);
4812 }
4813
4814 module_init_order(cma_init, SI_ORDER_FOURTH);
4815 module_exit_order(cma_cleanup, SI_ORDER_FOURTH);
4816