xref: /titanic_50/usr/src/lib/udapl/udapl_tavor/common/dapl_ia_open.c (revision 9e39c5ba00a55fa05777cc94b148296af305e135)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved.
24  */
25 
26 /*
27  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 
32 /*
33  *
34  * MODULE: dapl_ia_open.c
35  *
36  * PURPOSE: Interface Adapter management
37  * Description: Interfaces in this file are completely described in
38  *		the DAPL 1.1 API, Chapter 6, section 2
39  *
40  * $Id: dapl_ia_open.c,v 1.30 2003/07/31 14:04:17 jlentini Exp $
41  */
42 
43 #include "dapl.h"
44 #include "dapl_provider.h"
45 #include "dapl_evd_util.h"
46 #include "dapl_hca_util.h"
47 #include "dapl_ia_util.h"
48 #include "dapl_adapter_util.h"
49 #include <sys/systeminfo.h>
50 #include <libdevinfo.h>
51 
52 
53 /*
54  * LOCAL PROTOTYPES
55  */
56 #if defined(IBHOSTS_NAMING)
57 void dapli_assign_hca_ip_address(
58 	DAPL_HCA		*hca_ptr,
59 	char 			*device_name);
60 #endif /* IBHOSTS_NAMING */
61 
62 static void dapli_hca_cleanup(DAPL_HCA *hca_ptr, DAT_BOOLEAN dec_ref);
63 
64 /*
65  * Determine whether the platform supports RO (Relaxed ordering)
66  * Return B_TRUE if it does support RO and B_FALSE if it does not support RO
67  *
68  * udapl_ro_disallowed is an out paramter returning whether or not
69  * relaxed ordering should be disabled (regardless of whether the platform
70  * is capable of supporting relaxed ordering)
71  *
72  */
73 static boolean_t
dapl_ro_disallowed(void)74 dapl_ro_disallowed(void)
75 {
76 	static const char * const non_ro_capable_platforms[] = {
77 		"i86pc",
78 		"i86xpv",
79 		"SUNW,Sun-Fire-V215",
80 		"SUNW,Sun-Fire-V245",
81 		"SUNW,Sun-Fire-V445",
82 		"SUNW,Sun-Fire-T1000",
83 		"SUNW,Sun-Fire-T200",
84 		"SUNW,Sun-Blade-T6300",
85 		"SUNW,Sun-Blade-T6320",
86 		"SUNW,SPARC-Enterprise-T1000",
87 		"SUNW,SPARC-Enterprise-T2000",
88 		"SUNW,SPARC-Enterprise-T5120",
89 		"SUNW,SPARC-Enterprise-T5220",
90 		NULL
91 	};
92 	char platform[256 + 1];
93 	register int i;
94 	register const char *cp;
95 	int ret;
96 	di_node_t root_node, node;
97 	boolean_t ro_disallowed;
98 	static const char *ro_disallowed_property =
99 	    "pci-relaxed-ordering-disallowed";
100 	int bool;
101 	int *boolp = &bool;
102 
103 	ret = sysinfo(SI_PLATFORM, platform, sizeof (platform));
104 	if ((ret != -1) && (ret <= sizeof (platform))) {
105 		for (i = 0; (cp = non_ro_capable_platforms[i]) != NULL; ++i) {
106 			if (strcmp(platform, cp) == 0)
107 				return (B_TRUE);
108 		}
109 	}
110 
111 	/*
112 	 * This function only finds and looks at the FIRST udapl node.
113 	 * It is assumed that there can only be one such node.
114 	 */
115 	if ((root_node = di_init("/", DINFOSUBTREE | DINFOPROP)) == DI_NODE_NIL)
116 		return (B_FALSE);
117 
118 	node = di_drv_first_node("daplt", root_node);
119 	if (node != DI_NODE_NIL) {
120 		ret = di_prop_lookup_ints(DDI_DEV_T_ANY, node,
121 		    ro_disallowed_property, &boolp);
122 		switch (ret) {
123 		case 0:
124 		case 1:
125 			ro_disallowed = B_TRUE;
126 			break;
127 		default:
128 			ro_disallowed = B_FALSE;
129 			break;
130 		}
131 
132 	}
133 	else
134 		ro_disallowed = B_FALSE;
135 
136 	di_fini(root_node);
137 
138 	return (ro_disallowed);
139 }
140 
141 /*
142  * dapl_ia_open
143  *
144  * DAPL Requirements Version xxx, 6.2.1.1
145  *
146  * Open a provider and return a handle. The handle enables the user
147  * to invoke operations on this provider.
148  *
149  * The dat_ia_open  call is actually part of the DAT registration module.
150  * That function maps the DAT_NAME parameter of dat_ia_open to a DAT_PROVIDER,
151  * and calls this function.
152  *
153  * Input:
154  *	provider
155  *	async_evd_qlen
156  *	async_evd_handle_ptr
157  *
158  * Output:
159  *	async_evd_handle
160  *	ia_handle
161  *
162  * Return Values:
163  * 	DAT_SUCCESS
164  * 	DAT_INSUFFICIENT_RESOURCES
165  * 	DAT_INVALID_PARAMETER
166  * 	DAT_INVALID_HANDLE
167  * 	DAT_NAME_NOT_FOUND	(returned by dat registry if necessary)
168  */
169 DAT_RETURN
dapl_ia_open(IN const DAT_NAME_PTR name,IN DAT_COUNT async_evd_qlen,INOUT DAT_EVD_HANDLE * async_evd_handle_ptr,OUT DAT_IA_HANDLE * ia_handle_ptr,IN boolean_t ro_aware_client)170 dapl_ia_open(
171 	IN	const DAT_NAME_PTR	name,
172 	IN	DAT_COUNT		async_evd_qlen,
173 	INOUT	DAT_EVD_HANDLE		*async_evd_handle_ptr,
174 	OUT	DAT_IA_HANDLE		*ia_handle_ptr,
175 	IN	boolean_t		ro_aware_client)
176 {
177 	DAT_RETURN	dat_status;
178 	DAT_PROVIDER	*provider;
179 	DAPL_HCA	*hca_ptr;
180 	DAPL_IA		*ia_ptr;
181 	DAPL_EVD	*evd_ptr;
182 	boolean_t	ro_disallowed;
183 
184 	dat_status = DAT_SUCCESS;
185 	hca_ptr = NULL;
186 	ia_ptr = NULL;
187 
188 	dapl_dbg_log(DAPL_DBG_TYPE_API,
189 	    "dapl_ia_open(%s, %d, %p, %p, %d)\n",
190 	    name,
191 	    async_evd_qlen,
192 	    async_evd_handle_ptr,
193 	    ia_handle_ptr,
194 	    ro_aware_client);
195 
196 	dat_status = dapl_provider_list_search(name, &provider);
197 	if (DAT_SUCCESS != dat_status) {
198 		dapl_dbg_log(DAPL_DBG_TYPE_API,
199 		    "dapl_ia_open: dapl_provider_list_search(\"%s\") returned "
200 		    "%d\n",
201 		    name,
202 		    dat_status);
203 
204 		dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG1);
205 		goto bail;
206 	}
207 
208 	/* ia_handle_ptr and async_evd_handle_ptr cannot be NULL */
209 	if (ia_handle_ptr == NULL) {
210 		dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG4);
211 		goto bail;
212 	}
213 	if (async_evd_handle_ptr == NULL) {
214 		dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG3);
215 		goto bail;
216 	}
217 
218 	/* initialize the caller's OUT param */
219 	*ia_handle_ptr = DAT_HANDLE_NULL;
220 
221 	/* get the hca_ptr */
222 	hca_ptr = (DAPL_HCA *)provider->extension;
223 
224 	/*
225 	 * Open the HCA if it has not been done before.
226 	 */
227 	dapl_os_lock(&hca_ptr->lock);
228 	if (hca_ptr->ib_hca_handle == IB_INVALID_HANDLE) {
229 		/* register with the HW */
230 		dat_status = dapls_ib_open_hca(hca_ptr,
231 		    &hca_ptr->ib_hca_handle);
232 
233 		if (dat_status != DAT_SUCCESS) {
234 			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
235 			    "dapls_ib_open_hca failed %d\n", dat_status);
236 			dapl_os_unlock(&hca_ptr->lock);
237 			goto bail;
238 		}
239 
240 		/* create a cq domain for this HCA */
241 		dat_status = dapls_ib_cqd_create(hca_ptr);
242 
243 		if (dat_status != DAT_SUCCESS) {
244 			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
245 			    "ERR: Cannot allocate CQD: err %x\n", dat_status);
246 			dapli_hca_cleanup(hca_ptr, DAT_FALSE);
247 			dapl_os_unlock(&hca_ptr->lock);
248 			goto bail;
249 		}
250 		/*
251 		 * Obtain the IP address associated with this name and HCA.
252 		 */
253 
254 #ifdef IBHOSTS_NAMING
255 		dapli_assign_hca_ip_address(hca_ptr, name);
256 #endif /* IBHOSTS_NAMING */
257 
258 		/*
259 		 * Obtain IA attributes from the HCA to limit certain
260 		 * operations.
261 		 * If using DAPL_ATS naming, ib_query_hca will also set the ip
262 		 * address.
263 		 */
264 		dat_status = dapls_ib_query_hca(hca_ptr,
265 		    &hca_ptr->ia_attr,
266 		    NULL,
267 		    &hca_ptr->hca_address, NULL);
268 		if (dat_status != DAT_SUCCESS) {
269 			dapli_hca_cleanup(hca_ptr, DAT_FALSE);
270 			dapl_os_unlock(&hca_ptr->lock);
271 			goto bail;
272 		}
273 	}
274 
275 	/* is the IA going to use the ConnectX? */
276 	if (hca_ptr->hermon_resize_cq != 0) {
277 		/*
278 		 * We are running with a ConnectX.
279 		 * Determine whether platform is RO capable.
280 		 * If platform support RO and client does not
281 		 * support RO and we are not disabling RO, reject the open.
282 		 */
283 		ro_disallowed = dapl_ro_disallowed();
284 
285 		if (! ro_aware_client && ! ro_disallowed) {
286 			dapl_dbg_log(DAPL_DBG_TYPE_API,
287 			    "dapl_ia_open: failing ro_disallowed %d "
288 			    "ro_aware_client %d \n",
289 			    ro_disallowed, ro_aware_client);
290 
291 			dat_status = DAT_ERROR(DAT_INVALID_PARAMETER,
292 			    DAT_INVALID_RO_COOKIE);
293 			dapli_hca_cleanup(hca_ptr, DAT_FALSE);
294 			dapl_os_unlock(&hca_ptr->lock);
295 			goto bail;
296 		}
297 	} else {
298 		/* We are not running with a Connect X */
299 		ro_disallowed = B_TRUE;
300 	}
301 
302 
303 	/* Take a reference on the hca_handle */
304 	dapl_os_atomic_inc(&hca_ptr->handle_ref_count);
305 	dapl_os_unlock(&hca_ptr->lock);
306 
307 	/* Allocate and initialize ia structure */
308 	ia_ptr = dapl_ia_alloc(provider, hca_ptr);
309 	if (!ia_ptr) {
310 		dapl_os_lock(&hca_ptr->lock);
311 		dapli_hca_cleanup(hca_ptr, DAT_TRUE);
312 		dapl_os_unlock(&hca_ptr->lock);
313 		dat_status = DAT_ERROR(DAT_INSUFFICIENT_RESOURCES,
314 		    DAT_RESOURCE_MEMORY);
315 		goto bail;
316 	}
317 
318 	/*
319 	 * Note when we should be disabling relaxed ordering.
320 	 * If the property indicates that we should not use relaxed ordering
321 	 * we remember that fact.  If the platform is supposed to be
322 	 * non relaxed ordering capable, we disable relaxed ordering as
323 	 * well, just in case the property or the list indicating that
324 	 * this platform is not relaxed ordering capable is mistaken.
325 	 */
326 	if (ro_disallowed)
327 		ia_ptr->dapl_flags |= DAPL_DISABLE_RO;
328 
329 	/*
330 	 * we need an async EVD for this IA
331 	 * use the one passed in (if non-NULL) or create one
332 	 */
333 
334 	evd_ptr = (DAPL_EVD *) *async_evd_handle_ptr;
335 	if (evd_ptr) {
336 		if (DAPL_BAD_HANDLE(evd_ptr, DAPL_MAGIC_EVD) ||
337 		    ! (evd_ptr->evd_flags & DAT_EVD_ASYNC_FLAG)) {
338 			dat_status = DAT_ERROR(DAT_INVALID_HANDLE,
339 			    DAT_INVALID_HANDLE_EVD_ASYNC);
340 			goto bail;
341 		}
342 		/*
343 		 * InfiniBand allows only 1 asychronous event handler per HCA
344 		 * (see InfiniBand Spec, release 1.1, vol I, section 11.5.2,
345 		 *  page 559).
346 		 *
347 		 * We only need to make sure that this EVD's CQ belongs to
348 		 * the same HCA as is being opened.
349 		 */
350 
351 		if (evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle !=
352 		    hca_ptr->ib_hca_handle) {
353 			dat_status = DAT_ERROR(DAT_INVALID_HANDLE,
354 			    DAT_INVALID_HANDLE_EVD_ASYNC);
355 			goto bail;
356 		}
357 
358 		ia_ptr->cleanup_async_error_evd = DAT_FALSE;
359 		ia_ptr->async_error_evd = evd_ptr;
360 	} else {
361 		/*
362 		 * Verify we have >0 length, and let the provider check the
363 		 * size
364 		 */
365 		if (async_evd_qlen <= 0) {
366 			dat_status = DAT_ERROR(DAT_INVALID_PARAMETER,
367 			    DAT_INVALID_ARG2);
368 			goto bail;
369 		}
370 		dat_status = dapls_evd_internal_create(ia_ptr,
371 		    NULL,	/* CNO ptr */
372 		    async_evd_qlen,
373 		    DAT_EVD_ASYNC_FLAG,
374 		    &evd_ptr);
375 		if (dat_status != DAT_SUCCESS) {
376 			goto bail;
377 		}
378 
379 		dapl_os_atomic_inc(&evd_ptr->evd_ref_count);
380 
381 		dapl_os_lock(&hca_ptr->lock);
382 		if (hca_ptr->async_evd != (DAPL_EVD *) 0) {
383 #if 0
384 			/*
385 			 * The async EVD for this HCA has already been assigned.
386 			 * It's an error to try and assign another one.
387 			 *
388 			 * However, we need to somehow allow multiple IAs
389 			 * off of the same HCA.  The right way to do this
390 			 * is by dispatching events off the HCA to the
391 			 * appropriate IA, but we aren't there yet.  So for
392 			 * now we create the EVD but don't connect it to
393 			 * anything.
394 			 */
395 			dapl_os_atomic_dec(&evd_ptr->evd_ref_count);
396 			dapl_evd_free(evd_ptr);
397 			dat_status = DAT_ERROR(DAT_INVALID_PARAMETER,
398 			    DAT_INVALID_ARG4);
399 			goto bail;
400 #endif
401 			dapl_os_unlock(&hca_ptr->lock);
402 		} else {
403 			hca_ptr->async_evd = evd_ptr;
404 			dapl_os_unlock(&hca_ptr->lock);
405 
406 			/*
407 			 * Register the handlers associated with the async EVD.
408 			 */
409 			dat_status = dapls_ia_setup_callbacks(ia_ptr, evd_ptr);
410 			if (dat_status != DAT_SUCCESS) {
411 				/* Assign the EVD so it gets cleaned up */
412 				ia_ptr->cleanup_async_error_evd = DAT_TRUE;
413 				ia_ptr->async_error_evd = evd_ptr;
414 				goto bail;
415 			}
416 		}
417 
418 		ia_ptr->cleanup_async_error_evd = DAT_TRUE;
419 		ia_ptr->async_error_evd = evd_ptr;
420 	}
421 
422 	dat_status = DAT_SUCCESS;
423 	*ia_handle_ptr = ia_ptr;
424 	*async_evd_handle_ptr = evd_ptr;
425 
426 bail:
427 	if (dat_status != DAT_SUCCESS) {
428 		if (ia_ptr) {
429 			/* This will release the async EVD if needed.  */
430 			(void) dapl_ia_close(ia_ptr, DAT_CLOSE_ABRUPT_FLAG);
431 		}
432 	}
433 
434 	dapl_dbg_log(DAPL_DBG_TYPE_RTN,
435 	    "dapl_ia_open () returns 0x%x\n",
436 	    dat_status);
437 
438 	return (dat_status);
439 }
440 
441 /*
442  * dapli_hca_cleanup
443  *
444  * Clean up partially allocated HCA stuff. Strictly to make cleanup
445  * simple.
446  */
447 void
dapli_hca_cleanup(DAPL_HCA * hca_ptr,DAT_BOOLEAN dec_ref)448 dapli_hca_cleanup(
449 	DAPL_HCA	*hca_ptr,
450 	DAT_BOOLEAN	dec_ref)
451 {
452 	(void) dapls_ib_close_hca(hca_ptr->ib_hca_handle);
453 	hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
454 	if (dec_ref == DAT_TRUE) {
455 		dapl_os_atomic_dec(&hca_ptr->handle_ref_count);
456 	}
457 }
458 
459 #if defined(IBHOSTS_NAMING)
460 
461 char *dapli_get_adapter_num(
462 	char 			*device_name);
463 
464 void dapli_setup_dummy_addr(
465 	IN  DAPL_HCA		*hca_ptr,
466 	IN  char		*hca_name);
467 /*
468  * dapli_assign_hca_ip_address
469  *
470  * Obtain the IP address of the passed in name, which represents a
471  * port on the hca. There are three methods here to obtain the
472  * appropriate IP address, each with their own shortcoming:
473  * 1) IPOIB_NAMING. Requires the implementation of the IPoIB
474  *    interface defined in include/dapl/ipoib_names.h. This is
475  *    not the recommended interface as IPoIB is limited at
476  *    the point we need to obtain an IP address on the
477  *    passive side of a connection. The code supporting this
478  *    implementation has been removed.
479  *
480  * 2) IBHOSTS. An entry exists in DNS and in the /etc/dapl/ibhosts
481  *    file. The immediate drawback here is that we must dictate
482  *    how to name the interface, which is a stated DAPL non-goal.
483  *    In the broader perspective, this method requires us to xmit
484  *    the IP address in the private data of a connection, which has
485  *    other fun problems. This is the default method and is known to
486  *    work, but it has problems.
487  *
488  * 3) Obtain the IP address from the driver, which has registered
489  *    the address with the SA for retrieval.
490  *
491  *
492  * Input:
493  *	hca_ptr			Pointer to HCA structure
494  *	device_name		Name of device as reported by the provider
495  *
496  * Output:
497  * 	none
498  *
499  * Returns:
500  * 	char * to string number
501  */
502 void
dapli_assign_hca_ip_address(DAPL_HCA * hca_ptr,char * device_name)503 dapli_assign_hca_ip_address(
504 	DAPL_HCA		*hca_ptr,
505 	char 			*device_name)
506 {
507 	char		*adapter_num;
508 #define	NAMELEN	128
509 	struct addrinfo	*addr;
510 	char 		hostname[NAMELEN];
511 	char		*str;
512 	int		rc;
513 
514 	/*
515 	 * Obtain the IP address of the adapter. This is a simple
516 	 * scheme that creates a name that must appear available to
517 	 * DNS, e.g. it must be in the local site DNS or in the local
518 	 * /etc/hosts file, etc.
519 	 *
520 	 *	<hostname>-ib<index>
521 	 *
522 	 * This scheme obviously doesn't work with adapters from
523 	 * multiple vendors, but will suffice in common installations.
524 	 */
525 
526 	rc = gethostname(hostname, NAMELEN);
527 	/*
528 	 * Strip off domain info if it exists (e.g. mynode.mydomain.com)
529 	 */
530 	for (str = hostname; *str && *str != '.'; ) {
531 		str++;
532 	}
533 	if (*str == '.') {
534 		*str = '\0';
535 	}
536 	dapl_os_strcat(hostname, "-ib");
537 	adapter_num = dapli_get_adapter_num(device_name);
538 	dapl_os_strcat(hostname, adapter_num);
539 
540 	rc = dapls_osd_getaddrinfo(hostname, &addr);
541 
542 	if (rc != 0) {
543 		/* Not registered in DNS, provide a dummy value */
544 		dapli_setup_dummy_addr(hca_ptr, hostname);
545 	} else {
546 		/*
547 		 * hca_address is defined as a DAT_SOCK_ADDR6 whereas ai_addr
548 		 * is a sockaddr
549 		 */
550 		(void) dapl_os_memcpy((void *)&hca_ptr->hca_address,
551 		    (void *)(addr->ai_addr), sizeof (DAT_SOCK_ADDR6));
552 	}
553 }
554 
555 
556 /*
557  * dapli_stup_dummy_addr
558  *
559  * Set up a dummy local address for the HCA. Things are not going
560  * to work too well if this happens.
561  * We call this routine if:
562  *  - remote host adapter name is not in DNS
563  *  - IPoIB implementation is not correctly set up
564  *  - Similar nonsense.
565  *
566  * Input:
567  *      hca_ptr
568  *	rhost_name		Name of remote adapter
569  *
570  * Output:
571  * 	none
572  *
573  * Returns:
574  * 	none
575  */
576 void
dapli_setup_dummy_addr(IN DAPL_HCA * hca_ptr,IN char * rhost_name)577 dapli_setup_dummy_addr(
578 	IN  DAPL_HCA		*hca_ptr,
579 	IN  char		*rhost_name)
580 {
581 	struct sockaddr_in	*si;
582 
583 	/* Not registered in DNS, provide a dummy value */
584 	dapl_dbg_log(DAPL_DBG_TYPE_ERR, "WARNING: <%s> not registered in DNS,"
585 	    " using dummy IP value\n", rhost_name);
586 	si = (struct sockaddr_in *)&hca_ptr->hca_address;
587 	si->sin_family = AF_INET;
588 	si->sin_addr.s_addr = 0x01020304;
589 }
590 
591 
592 /*
593  * dapls_get_adapter_num
594  *
595  * Given a device name, return a string of the device number
596  *
597  * Input:
598  *	device_name		Name of device as reported by the provider
599  *
600  * Output:
601  * 	none
602  *
603  * Returns:
604  * 	char * to string number
605  */
606 char *
dapli_get_adapter_num(char * device_name)607 dapli_get_adapter_num(
608 	char 		*device_name)
609 {
610 	static char	*zero = "0";
611 	char		*p;
612 
613 	/*
614 	 * Optimisticaly simple algorithm: the device number appears at
615 	 * the end of the device name string. Device that do not end
616 	 * in a number are by default "0".
617 	 */
618 
619 	for (p = device_name; *p; p++) {
620 		if (isdigit(*p)) {
621 			return (p);
622 		}
623 	}
624 
625 	return (zero);
626 }
627 #endif /* IBHOSTS_NAMING */
628 
629 
630 /*
631  * Local variables:
632  *  c-indent-level: 4
633  *  c-basic-offset: 4
634  *  tab-width: 8
635  * End:
636  */
637