xref: /freebsd/contrib/ofed/librdmacm/acm.c (revision 966e279052f33b1665480e0327c177013cb8205a)
1 /*
2  * Copyright (c) 2010-2012 Intel Corporation.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <config.h>
34 
35 #include <stdio.h>
36 #include <inttypes.h>
37 #include <sys/types.h>
38 #include <sys/socket.h>
39 #include <netdb.h>
40 #include <unistd.h>
41 
42 #include "cma.h"
43 #include <rdma/rdma_cma.h>
44 #include <infiniband/ib.h>
45 #include <infiniband/sa.h>
46 
47 #define ACM_VERSION             1
48 
49 #define ACM_OP_RESOLVE          0x01
50 #define ACM_OP_ACK              0x80
51 
52 #define ACM_STATUS_SUCCESS      0
53 #define ACM_STATUS_ENOMEM       1
54 #define ACM_STATUS_EINVAL       2
55 #define ACM_STATUS_ENODATA      3
56 #define ACM_STATUS_ENOTCONN     5
57 #define ACM_STATUS_ETIMEDOUT    6
58 #define ACM_STATUS_ESRCADDR     7
59 #define ACM_STATUS_ESRCTYPE     8
60 #define ACM_STATUS_EDESTADDR    9
61 #define ACM_STATUS_EDESTTYPE    10
62 
63 #define ACM_FLAGS_NODELAY	(1<<30)
64 
65 #define ACM_MSG_HDR_LENGTH      16
66 #define ACM_MAX_ADDRESS         64
67 #define ACM_MSG_EP_LENGTH       72
68 #define ACM_MSG_DATA_LENGTH     (ACM_MSG_EP_LENGTH * 8)
69 
70 struct acm_hdr {
71 	uint8_t                 version;
72 	uint8_t                 opcode;
73 	uint8_t                 status;
74 	uint8_t		        data[3];
75 	uint16_t                length;
76 	uint64_t                tid;
77 };
78 
79 #define ACM_EP_INFO_NAME        0x0001
80 #define ACM_EP_INFO_ADDRESS_IP  0x0002
81 #define ACM_EP_INFO_ADDRESS_IP6 0x0003
82 #define ACM_EP_INFO_PATH        0x0010
83 
84 union acm_ep_info {
85 	uint8_t                 addr[ACM_MAX_ADDRESS];
86 	uint8_t                 name[ACM_MAX_ADDRESS];
87 	struct ibv_path_record  path;
88 };
89 
90 #define ACM_EP_FLAG_SOURCE      (1<<0)
91 #define ACM_EP_FLAG_DEST        (1<<1)
92 
93 struct acm_ep_addr_data {
94 	uint32_t                flags;
95 	uint16_t                type;
96 	uint16_t                reserved;
97 	union acm_ep_info       info;
98 };
99 
100 struct acm_resolve_msg {
101 	struct acm_hdr          hdr;
102 	struct acm_ep_addr_data data[0];
103 };
104 
105 struct acm_msg {
106 	struct acm_hdr                  hdr;
107 	union{
108 		uint8_t                 data[ACM_MSG_DATA_LENGTH];
109 		struct acm_ep_addr_data resolve_data[0];
110 	};
111 };
112 
113 static pthread_mutex_t acm_lock = PTHREAD_MUTEX_INITIALIZER;
114 static int sock = -1;
115 static uint16_t server_port;
116 
117 static int ucma_set_server_port(void)
118 {
119 	FILE *f;
120 
121 	if ((f = fopen(IBACM_PORT_FILE, "r" STREAM_CLOEXEC))) {
122 		if (fscanf(f, "%" SCNu16, &server_port) != 1)
123 			server_port = 0;
124 		fclose(f);
125 	}
126 	return server_port;
127 }
128 
129 void ucma_ib_init(void)
130 {
131 	struct sockaddr_in addr;
132 	static int init;
133 	int ret;
134 
135 	if (init)
136 		return;
137 
138 	pthread_mutex_lock(&acm_lock);
139 	if (init)
140 		goto unlock;
141 
142 	if (!ucma_set_server_port())
143 		goto out;
144 
145 	sock = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP);
146 	if (sock < 0)
147 		goto out;
148 
149 	memset(&addr, 0, sizeof addr);
150 	addr.sin_family = AF_INET;
151 	addr.sin_addr.s_addr = htobe32(INADDR_LOOPBACK);
152 	addr.sin_port = htobe16(server_port);
153 	ret = connect(sock, (struct sockaddr *) &addr, sizeof(addr));
154 	if (ret) {
155 		close(sock);
156 		sock = -1;
157 	}
158 out:
159 	init = 1;
160 unlock:
161 	pthread_mutex_unlock(&acm_lock);
162 }
163 
164 void ucma_ib_cleanup(void)
165 {
166 	if (sock >= 0) {
167 		shutdown(sock, SHUT_RDWR);
168 		close(sock);
169 	}
170 }
171 
172 static int ucma_ib_set_addr(struct rdma_addrinfo *ib_rai,
173 			    struct rdma_addrinfo *rai)
174 {
175 	struct sockaddr_ib *src, *dst;
176 	struct ibv_path_record *path;
177 
178 	src = calloc(1, sizeof(*src));
179 	if (!src)
180 		return ERR(ENOMEM);
181 
182 	dst = calloc(1, sizeof(*dst));
183 	if (!dst) {
184 		free(src);
185 		return ERR(ENOMEM);
186 	}
187 
188 	path = &((struct ibv_path_data *) ib_rai->ai_route)->path;
189 
190 	src->sib_family = AF_IB;
191 	src->sib_pkey = path->pkey;
192 	src->sib_flowinfo = htobe32(be32toh(path->flowlabel_hoplimit) >> 8);
193 	memcpy(&src->sib_addr, &path->sgid, 16);
194 	ucma_set_sid(ib_rai->ai_port_space, rai->ai_src_addr, src);
195 
196 	dst->sib_family = AF_IB;
197 	dst->sib_pkey = path->pkey;
198 	dst->sib_flowinfo = htobe32(be32toh(path->flowlabel_hoplimit) >> 8);
199 	memcpy(&dst->sib_addr, &path->dgid, 16);
200 	ucma_set_sid(ib_rai->ai_port_space, rai->ai_dst_addr, dst);
201 
202 	ib_rai->ai_src_addr = (struct sockaddr *) src;
203 	ib_rai->ai_src_len = sizeof(*src);
204 
205 	ib_rai->ai_dst_addr = (struct sockaddr *) dst;
206 	ib_rai->ai_dst_len = sizeof(*dst);
207 
208 	return 0;
209 }
210 
211 static int ucma_ib_set_connect(struct rdma_addrinfo *ib_rai,
212 			       struct rdma_addrinfo *rai)
213 {
214 	struct ib_connect_hdr *hdr;
215 
216 	if (rai->ai_family == AF_IB)
217 		return 0;
218 
219 	hdr = calloc(1, sizeof(*hdr));
220 	if (!hdr)
221 		return ERR(ENOMEM);
222 
223 	if (rai->ai_family == AF_INET) {
224 		hdr->ip_version = 4 << 4;
225 		memcpy(&hdr->cma_src_ip4,
226 		       &((struct sockaddr_in *) rai->ai_src_addr)->sin_addr, 4);
227 		memcpy(&hdr->cma_dst_ip4,
228 		       &((struct sockaddr_in *) rai->ai_dst_addr)->sin_addr, 4);
229 	} else {
230 		hdr->ip_version = 6 << 4;
231 		memcpy(&hdr->cma_src_ip6,
232 		       &((struct sockaddr_in6 *) rai->ai_src_addr)->sin6_addr, 16);
233 		memcpy(&hdr->cma_dst_ip6,
234 		       &((struct sockaddr_in6 *) rai->ai_dst_addr)->sin6_addr, 16);
235 	}
236 
237 	ib_rai->ai_connect = hdr;
238 	ib_rai->ai_connect_len = sizeof(*hdr);
239 	return 0;
240 }
241 
242 static void ucma_resolve_af_ib(struct rdma_addrinfo **rai)
243 {
244 	struct rdma_addrinfo *ib_rai;
245 
246 	ib_rai = calloc(1, sizeof(*ib_rai));
247 	if (!ib_rai)
248 		return;
249 
250 	ib_rai->ai_flags = (*rai)->ai_flags;
251 	ib_rai->ai_family = AF_IB;
252 	ib_rai->ai_qp_type = (*rai)->ai_qp_type;
253 	ib_rai->ai_port_space = (*rai)->ai_port_space;
254 
255 	ib_rai->ai_route = calloc(1, (*rai)->ai_route_len);
256 	if (!ib_rai->ai_route)
257 		goto err;
258 
259 	memcpy(ib_rai->ai_route, (*rai)->ai_route, (*rai)->ai_route_len);
260 	ib_rai->ai_route_len = (*rai)->ai_route_len;
261 
262 	if ((*rai)->ai_src_canonname) {
263 		ib_rai->ai_src_canonname = strdup((*rai)->ai_src_canonname);
264 		if (!ib_rai->ai_src_canonname)
265 			goto err;
266 	}
267 
268 	if ((*rai)->ai_dst_canonname) {
269 		ib_rai->ai_dst_canonname = strdup((*rai)->ai_dst_canonname);
270 		if (!ib_rai->ai_dst_canonname)
271 			goto err;
272 	}
273 
274 	if (ucma_ib_set_connect(ib_rai, *rai))
275 		goto err;
276 
277 	if (ucma_ib_set_addr(ib_rai, *rai))
278 		goto err;
279 
280 	ib_rai->ai_next = *rai;
281 	*rai = ib_rai;
282 	return;
283 
284 err:
285 	rdma_freeaddrinfo(ib_rai);
286 }
287 
288 static void ucma_ib_save_resp(struct rdma_addrinfo *rai, struct acm_msg *msg)
289 {
290 	struct acm_ep_addr_data *ep_data;
291 	struct ibv_path_data *path_data = NULL;
292 	struct sockaddr_in *sin;
293 	struct sockaddr_in6 *sin6;
294 	int i, cnt, path_cnt = 0;
295 
296 	cnt = (msg->hdr.length - ACM_MSG_HDR_LENGTH) / ACM_MSG_EP_LENGTH;
297 	for (i = 0; i < cnt; i++) {
298 		ep_data = &msg->resolve_data[i];
299 		switch (ep_data->type) {
300 		case ACM_EP_INFO_PATH:
301 			ep_data->type = 0;
302 			if (!path_data)
303 				path_data = (struct ibv_path_data *) ep_data;
304 			path_cnt++;
305 			break;
306 		case ACM_EP_INFO_ADDRESS_IP:
307 			if (!(ep_data->flags & ACM_EP_FLAG_SOURCE) || rai->ai_src_len)
308 				break;
309 
310 			sin = calloc(1, sizeof(*sin));
311 			if (!sin)
312 				break;
313 
314 			sin->sin_family = AF_INET;
315 			memcpy(&sin->sin_addr, &ep_data->info.addr, 4);
316 			rai->ai_src_len = sizeof(*sin);
317 			rai->ai_src_addr = (struct sockaddr *) sin;
318 			break;
319 		case ACM_EP_INFO_ADDRESS_IP6:
320 			if (!(ep_data->flags & ACM_EP_FLAG_SOURCE) || rai->ai_src_len)
321 				break;
322 
323 			sin6 = calloc(1, sizeof(*sin6));
324 			if (!sin6)
325 				break;
326 
327 			sin6->sin6_family = AF_INET6;
328 			memcpy(&sin6->sin6_addr, &ep_data->info.addr, 16);
329 			rai->ai_src_len = sizeof(*sin6);
330 			rai->ai_src_addr = (struct sockaddr *) sin6;
331 			break;
332 		default:
333 			break;
334 		}
335 	}
336 
337 	rai->ai_route = calloc(path_cnt, sizeof(*path_data));
338 	if (rai->ai_route) {
339 		memcpy(rai->ai_route, path_data, path_cnt * sizeof(*path_data));
340 		rai->ai_route_len = path_cnt * sizeof(*path_data);
341 	}
342 }
343 
344 static void ucma_set_ep_addr(struct acm_ep_addr_data *data, struct sockaddr *addr)
345 {
346 	if (addr->sa_family == AF_INET) {
347 		data->type = ACM_EP_INFO_ADDRESS_IP;
348 		memcpy(data->info.addr, &((struct sockaddr_in *) addr)->sin_addr, 4);
349 	} else {
350 		data->type = ACM_EP_INFO_ADDRESS_IP6;
351 		memcpy(data->info.addr, &((struct sockaddr_in6 *) addr)->sin6_addr, 16);
352 	}
353 }
354 
355 static int ucma_inet_addr(struct sockaddr *addr, socklen_t len)
356 {
357 	return len && addr && (addr->sa_family == AF_INET ||
358 			       addr->sa_family == AF_INET6);
359 }
360 
361 static int ucma_ib_addr(struct sockaddr *addr, socklen_t len)
362 {
363 	return len && addr && (addr->sa_family == AF_IB);
364 }
365 
366 void ucma_ib_resolve(struct rdma_addrinfo **rai,
367 		     const struct rdma_addrinfo *hints)
368 {
369 	struct acm_msg msg;
370 	struct acm_ep_addr_data *data;
371 	int ret;
372 
373 	ucma_ib_init();
374 	if (sock < 0)
375 		return;
376 
377 	memset(&msg, 0, sizeof msg);
378 	msg.hdr.version = ACM_VERSION;
379 	msg.hdr.opcode = ACM_OP_RESOLVE;
380 	msg.hdr.length = ACM_MSG_HDR_LENGTH;
381 
382 	data = &msg.resolve_data[0];
383 	if (ucma_inet_addr((*rai)->ai_src_addr, (*rai)->ai_src_len)) {
384 		data->flags = ACM_EP_FLAG_SOURCE;
385 		ucma_set_ep_addr(data, (*rai)->ai_src_addr);
386 		data++;
387 		msg.hdr.length += ACM_MSG_EP_LENGTH;
388 	}
389 
390 	if (ucma_inet_addr((*rai)->ai_dst_addr, (*rai)->ai_dst_len)) {
391 		data->flags = ACM_EP_FLAG_DEST;
392 		if (hints->ai_flags & (RAI_NUMERICHOST | RAI_NOROUTE))
393 			data->flags |= ACM_FLAGS_NODELAY;
394 		ucma_set_ep_addr(data, (*rai)->ai_dst_addr);
395 		data++;
396 		msg.hdr.length += ACM_MSG_EP_LENGTH;
397 	}
398 
399 	if (hints->ai_route_len ||
400 	    ucma_ib_addr((*rai)->ai_src_addr, (*rai)->ai_src_len) ||
401 	    ucma_ib_addr((*rai)->ai_dst_addr, (*rai)->ai_dst_len)) {
402 		struct ibv_path_record *path;
403 
404 		if (hints->ai_route_len == sizeof(struct ibv_path_record))
405 			path = (struct ibv_path_record *) hints->ai_route;
406 		else if (hints->ai_route_len == sizeof(struct ibv_path_data))
407 			path = &((struct ibv_path_data *) hints->ai_route)->path;
408 		else
409 			path = NULL;
410 
411 		if (path)
412 			memcpy(&data->info.path, path, sizeof(*path));
413 
414 		if (ucma_ib_addr((*rai)->ai_src_addr, (*rai)->ai_src_len)) {
415 			memcpy(&data->info.path.sgid,
416 			       &((struct sockaddr_ib *) (*rai)->ai_src_addr)->sib_addr, 16);
417 		}
418 		if (ucma_ib_addr((*rai)->ai_dst_addr, (*rai)->ai_dst_len)) {
419 			memcpy(&data->info.path.dgid,
420 			       &((struct sockaddr_ib *) (*rai)->ai_dst_addr)->sib_addr, 16);
421 		}
422 		data->type = ACM_EP_INFO_PATH;
423 		data++;
424 		msg.hdr.length += ACM_MSG_EP_LENGTH;
425 	}
426 
427 	pthread_mutex_lock(&acm_lock);
428 	ret = send(sock, (char *) &msg, msg.hdr.length, 0);
429 	if (ret != msg.hdr.length) {
430 		pthread_mutex_unlock(&acm_lock);
431 		return;
432 	}
433 
434 	ret = recv(sock, (char *) &msg, sizeof msg, 0);
435 	pthread_mutex_unlock(&acm_lock);
436 	if (ret < ACM_MSG_HDR_LENGTH || ret != msg.hdr.length || msg.hdr.status)
437 		return;
438 
439 	ucma_ib_save_resp(*rai, &msg);
440 
441 	if (af_ib_support && !(hints->ai_flags & RAI_ROUTEONLY) && (*rai)->ai_route_len)
442 		ucma_resolve_af_ib(rai);
443 }
444