xref: /freebsd/contrib/ofed/librdmacm/acm.c (revision 87181516ef48be852d5e5fee53c6e0dbfc62f21e)
1  /*
2   * Copyright (c) 2010-2012 Intel Corporation.  All rights reserved.
3   *
4   * This software is available to you under a choice of one of two
5   * licenses.  You may choose to be licensed under the terms of the GNU
6   * General Public License (GPL) Version 2, available from the file
7   * COPYING in the main directory of this source tree, or the
8   * OpenIB.org BSD license below:
9   *
10   *     Redistribution and use in source and binary forms, with or
11   *     without modification, are permitted provided that the following
12   *     conditions are met:
13   *
14   *      - Redistributions of source code must retain the above
15   *        copyright notice, this list of conditions and the following
16   *        disclaimer.
17   *
18   *      - Redistributions in binary form must reproduce the above
19   *        copyright notice, this list of conditions and the following
20   *        disclaimer in the documentation and/or other materials
21   *        provided with the distribution.
22   *
23   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26   * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27   * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28   * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29   * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30   * SOFTWARE.
31   */
32  
33  #include <config.h>
34  
35  #include <stdio.h>
36  #include <inttypes.h>
37  #include <sys/types.h>
38  #include <sys/socket.h>
39  #include <netdb.h>
40  #include <unistd.h>
41  
42  #include "cma.h"
43  #include <rdma/rdma_cma.h>
44  #include <infiniband/ib.h>
45  #include <infiniband/sa.h>
46  
47  #define ACM_VERSION             1
48  
49  #define ACM_OP_RESOLVE          0x01
50  #define ACM_OP_ACK              0x80
51  
52  #define ACM_STATUS_SUCCESS      0
53  #define ACM_STATUS_ENOMEM       1
54  #define ACM_STATUS_EINVAL       2
55  #define ACM_STATUS_ENODATA      3
56  #define ACM_STATUS_ENOTCONN     5
57  #define ACM_STATUS_ETIMEDOUT    6
58  #define ACM_STATUS_ESRCADDR     7
59  #define ACM_STATUS_ESRCTYPE     8
60  #define ACM_STATUS_EDESTADDR    9
61  #define ACM_STATUS_EDESTTYPE    10
62  
63  #define ACM_FLAGS_NODELAY	(1<<30)
64  
65  #define ACM_MSG_HDR_LENGTH      16
66  #define ACM_MAX_ADDRESS         64
67  #define ACM_MSG_EP_LENGTH       72
68  #define ACM_MSG_DATA_LENGTH     (ACM_MSG_EP_LENGTH * 8)
69  
70  struct acm_hdr {
71  	uint8_t                 version;
72  	uint8_t                 opcode;
73  	uint8_t                 status;
74  	uint8_t		        data[3];
75  	uint16_t                length;
76  	uint64_t                tid;
77  };
78  
79  #define ACM_EP_INFO_NAME        0x0001
80  #define ACM_EP_INFO_ADDRESS_IP  0x0002
81  #define ACM_EP_INFO_ADDRESS_IP6 0x0003
82  #define ACM_EP_INFO_PATH        0x0010
83  
84  union acm_ep_info {
85  	uint8_t                 addr[ACM_MAX_ADDRESS];
86  	uint8_t                 name[ACM_MAX_ADDRESS];
87  	struct ibv_path_record  path;
88  };
89  
90  #define ACM_EP_FLAG_SOURCE      (1<<0)
91  #define ACM_EP_FLAG_DEST        (1<<1)
92  
93  struct acm_ep_addr_data {
94  	uint32_t                flags;
95  	uint16_t                type;
96  	uint16_t                reserved;
97  	union acm_ep_info       info;
98  };
99  
100  struct acm_resolve_msg {
101  	struct acm_hdr          hdr;
102  	struct acm_ep_addr_data data[0];
103  };
104  
105  struct acm_msg {
106  	struct acm_hdr                  hdr;
107  	union{
108  		uint8_t                 data[ACM_MSG_DATA_LENGTH];
109  		struct acm_ep_addr_data resolve_data[0];
110  	};
111  };
112  
113  static pthread_mutex_t acm_lock = PTHREAD_MUTEX_INITIALIZER;
114  static int sock = -1;
115  static uint16_t server_port;
116  
ucma_set_server_port(void)117  static int ucma_set_server_port(void)
118  {
119  	FILE *f;
120  
121  	if ((f = fopen(IBACM_PORT_FILE, "r" STREAM_CLOEXEC))) {
122  		if (fscanf(f, "%" SCNu16, &server_port) != 1)
123  			server_port = 0;
124  		fclose(f);
125  	}
126  	return server_port;
127  }
128  
ucma_ib_init(void)129  void ucma_ib_init(void)
130  {
131  	struct sockaddr_in addr;
132  	static int init;
133  	int ret;
134  
135  	if (init)
136  		return;
137  
138  	pthread_mutex_lock(&acm_lock);
139  	if (init)
140  		goto unlock;
141  
142  	if (!ucma_set_server_port())
143  		goto out;
144  
145  	sock = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP);
146  	if (sock < 0)
147  		goto out;
148  
149  	memset(&addr, 0, sizeof addr);
150  	addr.sin_family = AF_INET;
151  	addr.sin_addr.s_addr = htobe32(INADDR_LOOPBACK);
152  	addr.sin_port = htobe16(server_port);
153  	ret = connect(sock, (struct sockaddr *) &addr, sizeof(addr));
154  	if (ret) {
155  		close(sock);
156  		sock = -1;
157  	}
158  out:
159  	init = 1;
160  unlock:
161  	pthread_mutex_unlock(&acm_lock);
162  }
163  
ucma_ib_cleanup(void)164  void ucma_ib_cleanup(void)
165  {
166  	if (sock >= 0) {
167  		shutdown(sock, SHUT_RDWR);
168  		close(sock);
169  	}
170  }
171  
ucma_ib_set_addr(struct rdma_addrinfo * ib_rai,struct rdma_addrinfo * rai)172  static int ucma_ib_set_addr(struct rdma_addrinfo *ib_rai,
173  			    struct rdma_addrinfo *rai)
174  {
175  	struct sockaddr_ib *src, *dst;
176  	struct ibv_path_record *path;
177  
178  	src = calloc(1, sizeof(*src));
179  	if (!src)
180  		return ERR(ENOMEM);
181  
182  	dst = calloc(1, sizeof(*dst));
183  	if (!dst) {
184  		free(src);
185  		return ERR(ENOMEM);
186  	}
187  
188  	path = &((struct ibv_path_data *) ib_rai->ai_route)->path;
189  
190  	src->sib_family = AF_IB;
191  	src->sib_pkey = path->pkey;
192  	src->sib_flowinfo = htobe32(be32toh(path->flowlabel_hoplimit) >> 8);
193  	memcpy(&src->sib_addr, &path->sgid, 16);
194  	ucma_set_sid(ib_rai->ai_port_space, rai->ai_src_addr, src);
195  
196  	dst->sib_family = AF_IB;
197  	dst->sib_pkey = path->pkey;
198  	dst->sib_flowinfo = htobe32(be32toh(path->flowlabel_hoplimit) >> 8);
199  	memcpy(&dst->sib_addr, &path->dgid, 16);
200  	ucma_set_sid(ib_rai->ai_port_space, rai->ai_dst_addr, dst);
201  
202  	ib_rai->ai_src_addr = (struct sockaddr *) src;
203  	ib_rai->ai_src_len = sizeof(*src);
204  
205  	ib_rai->ai_dst_addr = (struct sockaddr *) dst;
206  	ib_rai->ai_dst_len = sizeof(*dst);
207  
208  	return 0;
209  }
210  
ucma_ib_set_connect(struct rdma_addrinfo * ib_rai,struct rdma_addrinfo * rai)211  static int ucma_ib_set_connect(struct rdma_addrinfo *ib_rai,
212  			       struct rdma_addrinfo *rai)
213  {
214  	struct ib_connect_hdr *hdr;
215  
216  	if (rai->ai_family == AF_IB)
217  		return 0;
218  
219  	hdr = calloc(1, sizeof(*hdr));
220  	if (!hdr)
221  		return ERR(ENOMEM);
222  
223  	if (rai->ai_family == AF_INET) {
224  		hdr->ip_version = 4 << 4;
225  		memcpy(&hdr->cma_src_ip4,
226  		       &((struct sockaddr_in *) rai->ai_src_addr)->sin_addr, 4);
227  		memcpy(&hdr->cma_dst_ip4,
228  		       &((struct sockaddr_in *) rai->ai_dst_addr)->sin_addr, 4);
229  	} else {
230  		hdr->ip_version = 6 << 4;
231  		memcpy(&hdr->cma_src_ip6,
232  		       &((struct sockaddr_in6 *) rai->ai_src_addr)->sin6_addr, 16);
233  		memcpy(&hdr->cma_dst_ip6,
234  		       &((struct sockaddr_in6 *) rai->ai_dst_addr)->sin6_addr, 16);
235  	}
236  
237  	ib_rai->ai_connect = hdr;
238  	ib_rai->ai_connect_len = sizeof(*hdr);
239  	return 0;
240  }
241  
ucma_resolve_af_ib(struct rdma_addrinfo ** rai)242  static void ucma_resolve_af_ib(struct rdma_addrinfo **rai)
243  {
244  	struct rdma_addrinfo *ib_rai;
245  
246  	ib_rai = calloc(1, sizeof(*ib_rai));
247  	if (!ib_rai)
248  		return;
249  
250  	ib_rai->ai_flags = (*rai)->ai_flags;
251  	ib_rai->ai_family = AF_IB;
252  	ib_rai->ai_qp_type = (*rai)->ai_qp_type;
253  	ib_rai->ai_port_space = (*rai)->ai_port_space;
254  
255  	ib_rai->ai_route = calloc(1, (*rai)->ai_route_len);
256  	if (!ib_rai->ai_route)
257  		goto err;
258  
259  	memcpy(ib_rai->ai_route, (*rai)->ai_route, (*rai)->ai_route_len);
260  	ib_rai->ai_route_len = (*rai)->ai_route_len;
261  
262  	if ((*rai)->ai_src_canonname) {
263  		ib_rai->ai_src_canonname = strdup((*rai)->ai_src_canonname);
264  		if (!ib_rai->ai_src_canonname)
265  			goto err;
266  	}
267  
268  	if ((*rai)->ai_dst_canonname) {
269  		ib_rai->ai_dst_canonname = strdup((*rai)->ai_dst_canonname);
270  		if (!ib_rai->ai_dst_canonname)
271  			goto err;
272  	}
273  
274  	if (ucma_ib_set_connect(ib_rai, *rai))
275  		goto err;
276  
277  	if (ucma_ib_set_addr(ib_rai, *rai))
278  		goto err;
279  
280  	ib_rai->ai_next = *rai;
281  	*rai = ib_rai;
282  	return;
283  
284  err:
285  	rdma_freeaddrinfo(ib_rai);
286  }
287  
ucma_ib_save_resp(struct rdma_addrinfo * rai,struct acm_msg * msg)288  static void ucma_ib_save_resp(struct rdma_addrinfo *rai, struct acm_msg *msg)
289  {
290  	struct acm_ep_addr_data *ep_data;
291  	struct ibv_path_data *path_data = NULL;
292  	struct sockaddr_in *sin;
293  	struct sockaddr_in6 *sin6;
294  	int i, cnt, path_cnt = 0;
295  
296  	cnt = (msg->hdr.length - ACM_MSG_HDR_LENGTH) / ACM_MSG_EP_LENGTH;
297  	for (i = 0; i < cnt; i++) {
298  		ep_data = &msg->resolve_data[i];
299  		switch (ep_data->type) {
300  		case ACM_EP_INFO_PATH:
301  			ep_data->type = 0;
302  			if (!path_data)
303  				path_data = (struct ibv_path_data *) ep_data;
304  			path_cnt++;
305  			break;
306  		case ACM_EP_INFO_ADDRESS_IP:
307  			if (!(ep_data->flags & ACM_EP_FLAG_SOURCE) || rai->ai_src_len)
308  				break;
309  
310  			sin = calloc(1, sizeof(*sin));
311  			if (!sin)
312  				break;
313  
314  			sin->sin_family = AF_INET;
315  			memcpy(&sin->sin_addr, &ep_data->info.addr, 4);
316  			rai->ai_src_len = sizeof(*sin);
317  			rai->ai_src_addr = (struct sockaddr *) sin;
318  			break;
319  		case ACM_EP_INFO_ADDRESS_IP6:
320  			if (!(ep_data->flags & ACM_EP_FLAG_SOURCE) || rai->ai_src_len)
321  				break;
322  
323  			sin6 = calloc(1, sizeof(*sin6));
324  			if (!sin6)
325  				break;
326  
327  			sin6->sin6_family = AF_INET6;
328  			memcpy(&sin6->sin6_addr, &ep_data->info.addr, 16);
329  			rai->ai_src_len = sizeof(*sin6);
330  			rai->ai_src_addr = (struct sockaddr *) sin6;
331  			break;
332  		default:
333  			break;
334  		}
335  	}
336  
337  	rai->ai_route = calloc(path_cnt, sizeof(*path_data));
338  	if (rai->ai_route) {
339  		memcpy(rai->ai_route, path_data, path_cnt * sizeof(*path_data));
340  		rai->ai_route_len = path_cnt * sizeof(*path_data);
341  	}
342  }
343  
ucma_set_ep_addr(struct acm_ep_addr_data * data,struct sockaddr * addr)344  static void ucma_set_ep_addr(struct acm_ep_addr_data *data, struct sockaddr *addr)
345  {
346  	if (addr->sa_family == AF_INET) {
347  		data->type = ACM_EP_INFO_ADDRESS_IP;
348  		memcpy(data->info.addr, &((struct sockaddr_in *) addr)->sin_addr, 4);
349  	} else {
350  		data->type = ACM_EP_INFO_ADDRESS_IP6;
351  		memcpy(data->info.addr, &((struct sockaddr_in6 *) addr)->sin6_addr, 16);
352  	}
353  }
354  
ucma_inet_addr(struct sockaddr * addr,socklen_t len)355  static int ucma_inet_addr(struct sockaddr *addr, socklen_t len)
356  {
357  	return len && addr && (addr->sa_family == AF_INET ||
358  			       addr->sa_family == AF_INET6);
359  }
360  
ucma_ib_addr(struct sockaddr * addr,socklen_t len)361  static int ucma_ib_addr(struct sockaddr *addr, socklen_t len)
362  {
363  	return len && addr && (addr->sa_family == AF_IB);
364  }
365  
ucma_ib_resolve(struct rdma_addrinfo ** rai,const struct rdma_addrinfo * hints)366  void ucma_ib_resolve(struct rdma_addrinfo **rai,
367  		     const struct rdma_addrinfo *hints)
368  {
369  	struct acm_msg msg;
370  	struct acm_ep_addr_data *data;
371  	int ret;
372  
373  	ucma_ib_init();
374  	if (sock < 0)
375  		return;
376  
377  	memset(&msg, 0, sizeof msg);
378  	msg.hdr.version = ACM_VERSION;
379  	msg.hdr.opcode = ACM_OP_RESOLVE;
380  	msg.hdr.length = ACM_MSG_HDR_LENGTH;
381  
382  	data = &msg.resolve_data[0];
383  	if (ucma_inet_addr((*rai)->ai_src_addr, (*rai)->ai_src_len)) {
384  		data->flags = ACM_EP_FLAG_SOURCE;
385  		ucma_set_ep_addr(data, (*rai)->ai_src_addr);
386  		data++;
387  		msg.hdr.length += ACM_MSG_EP_LENGTH;
388  	}
389  
390  	if (ucma_inet_addr((*rai)->ai_dst_addr, (*rai)->ai_dst_len)) {
391  		data->flags = ACM_EP_FLAG_DEST;
392  		if (hints->ai_flags & (RAI_NUMERICHOST | RAI_NOROUTE))
393  			data->flags |= ACM_FLAGS_NODELAY;
394  		ucma_set_ep_addr(data, (*rai)->ai_dst_addr);
395  		data++;
396  		msg.hdr.length += ACM_MSG_EP_LENGTH;
397  	}
398  
399  	if (hints->ai_route_len ||
400  	    ucma_ib_addr((*rai)->ai_src_addr, (*rai)->ai_src_len) ||
401  	    ucma_ib_addr((*rai)->ai_dst_addr, (*rai)->ai_dst_len)) {
402  		struct ibv_path_record *path;
403  
404  		if (hints->ai_route_len == sizeof(struct ibv_path_record))
405  			path = (struct ibv_path_record *) hints->ai_route;
406  		else if (hints->ai_route_len == sizeof(struct ibv_path_data))
407  			path = &((struct ibv_path_data *) hints->ai_route)->path;
408  		else
409  			path = NULL;
410  
411  		if (path)
412  			memcpy(&data->info.path, path, sizeof(*path));
413  
414  		if (ucma_ib_addr((*rai)->ai_src_addr, (*rai)->ai_src_len)) {
415  			memcpy(&data->info.path.sgid,
416  			       &((struct sockaddr_ib *) (*rai)->ai_src_addr)->sib_addr, 16);
417  		}
418  		if (ucma_ib_addr((*rai)->ai_dst_addr, (*rai)->ai_dst_len)) {
419  			memcpy(&data->info.path.dgid,
420  			       &((struct sockaddr_ib *) (*rai)->ai_dst_addr)->sib_addr, 16);
421  		}
422  		data->type = ACM_EP_INFO_PATH;
423  		data++;
424  		msg.hdr.length += ACM_MSG_EP_LENGTH;
425  	}
426  
427  	pthread_mutex_lock(&acm_lock);
428  	ret = send(sock, (char *) &msg, msg.hdr.length, 0);
429  	if (ret != msg.hdr.length) {
430  		pthread_mutex_unlock(&acm_lock);
431  		return;
432  	}
433  
434  	ret = recv(sock, (char *) &msg, sizeof msg, 0);
435  	pthread_mutex_unlock(&acm_lock);
436  	if (ret < ACM_MSG_HDR_LENGTH || ret != msg.hdr.length || msg.hdr.status)
437  		return;
438  
439  	ucma_ib_save_resp(*rai, &msg);
440  
441  	if (af_ib_support && !(hints->ai_flags & RAI_ROUTEONLY) && (*rai)->ai_route_len)
442  		ucma_resolve_af_ib(rai);
443  }
444