1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved.
24 */
25
26 /*
27 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
29 */
30
31
32 /*
33 *
34 * MODULE: dapl_ia_open.c
35 *
36 * PURPOSE: Interface Adapter management
37 * Description: Interfaces in this file are completely described in
38 * the DAPL 1.1 API, Chapter 6, section 2
39 *
40 * $Id: dapl_ia_open.c,v 1.30 2003/07/31 14:04:17 jlentini Exp $
41 */
42
43 #include "dapl.h"
44 #include "dapl_provider.h"
45 #include "dapl_evd_util.h"
46 #include "dapl_hca_util.h"
47 #include "dapl_ia_util.h"
48 #include "dapl_adapter_util.h"
49 #include <sys/systeminfo.h>
50 #include <libdevinfo.h>
51
52
53 /*
54 * LOCAL PROTOTYPES
55 */
56 #if defined(IBHOSTS_NAMING)
57 void dapli_assign_hca_ip_address(
58 DAPL_HCA *hca_ptr,
59 char *device_name);
60 #endif /* IBHOSTS_NAMING */
61
62 static void dapli_hca_cleanup(DAPL_HCA *hca_ptr, DAT_BOOLEAN dec_ref);
63
64 /*
65 * Determine whether the platform supports RO (Relaxed ordering)
66 * Return B_TRUE if it does support RO and B_FALSE if it does not support RO
67 *
68 * udapl_ro_disallowed is an out paramter returning whether or not
69 * relaxed ordering should be disabled (regardless of whether the platform
70 * is capable of supporting relaxed ordering)
71 *
72 */
73 static boolean_t
dapl_ro_disallowed(void)74 dapl_ro_disallowed(void)
75 {
76 static const char * const non_ro_capable_platforms[] = {
77 "i86pc",
78 "i86xpv",
79 "SUNW,Sun-Fire-V215",
80 "SUNW,Sun-Fire-V245",
81 "SUNW,Sun-Fire-V445",
82 "SUNW,Sun-Fire-T1000",
83 "SUNW,Sun-Fire-T200",
84 "SUNW,Sun-Blade-T6300",
85 "SUNW,Sun-Blade-T6320",
86 "SUNW,SPARC-Enterprise-T1000",
87 "SUNW,SPARC-Enterprise-T2000",
88 "SUNW,SPARC-Enterprise-T5120",
89 "SUNW,SPARC-Enterprise-T5220",
90 NULL
91 };
92 char platform[256 + 1];
93 register int i;
94 register const char *cp;
95 int ret;
96 di_node_t root_node, node;
97 boolean_t ro_disallowed;
98 static const char *ro_disallowed_property =
99 "pci-relaxed-ordering-disallowed";
100 int bool;
101 int *boolp = &bool;
102
103 ret = sysinfo(SI_PLATFORM, platform, sizeof (platform));
104 if ((ret != -1) && (ret <= sizeof (platform))) {
105 for (i = 0; (cp = non_ro_capable_platforms[i]) != NULL; ++i) {
106 if (strcmp(platform, cp) == 0)
107 return (B_TRUE);
108 }
109 }
110
111 /*
112 * This function only finds and looks at the FIRST udapl node.
113 * It is assumed that there can only be one such node.
114 */
115 if ((root_node = di_init("/", DINFOSUBTREE | DINFOPROP)) == DI_NODE_NIL)
116 return (B_FALSE);
117
118 node = di_drv_first_node("daplt", root_node);
119 if (node != DI_NODE_NIL) {
120 ret = di_prop_lookup_ints(DDI_DEV_T_ANY, node,
121 ro_disallowed_property, &boolp);
122 switch (ret) {
123 case 0:
124 case 1:
125 ro_disallowed = B_TRUE;
126 break;
127 default:
128 ro_disallowed = B_FALSE;
129 break;
130 }
131
132 }
133 else
134 ro_disallowed = B_FALSE;
135
136 di_fini(root_node);
137
138 return (ro_disallowed);
139 }
140
141 /*
142 * dapl_ia_open
143 *
144 * DAPL Requirements Version xxx, 6.2.1.1
145 *
146 * Open a provider and return a handle. The handle enables the user
147 * to invoke operations on this provider.
148 *
149 * The dat_ia_open call is actually part of the DAT registration module.
150 * That function maps the DAT_NAME parameter of dat_ia_open to a DAT_PROVIDER,
151 * and calls this function.
152 *
153 * Input:
154 * provider
155 * async_evd_qlen
156 * async_evd_handle_ptr
157 *
158 * Output:
159 * async_evd_handle
160 * ia_handle
161 *
162 * Return Values:
163 * DAT_SUCCESS
164 * DAT_INSUFFICIENT_RESOURCES
165 * DAT_INVALID_PARAMETER
166 * DAT_INVALID_HANDLE
167 * DAT_NAME_NOT_FOUND (returned by dat registry if necessary)
168 */
169 DAT_RETURN
dapl_ia_open(IN const DAT_NAME_PTR name,IN DAT_COUNT async_evd_qlen,INOUT DAT_EVD_HANDLE * async_evd_handle_ptr,OUT DAT_IA_HANDLE * ia_handle_ptr,IN boolean_t ro_aware_client)170 dapl_ia_open(
171 IN const DAT_NAME_PTR name,
172 IN DAT_COUNT async_evd_qlen,
173 INOUT DAT_EVD_HANDLE *async_evd_handle_ptr,
174 OUT DAT_IA_HANDLE *ia_handle_ptr,
175 IN boolean_t ro_aware_client)
176 {
177 DAT_RETURN dat_status;
178 DAT_PROVIDER *provider;
179 DAPL_HCA *hca_ptr;
180 DAPL_IA *ia_ptr;
181 DAPL_EVD *evd_ptr;
182 boolean_t ro_disallowed;
183
184 dat_status = DAT_SUCCESS;
185 hca_ptr = NULL;
186 ia_ptr = NULL;
187
188 dapl_dbg_log(DAPL_DBG_TYPE_API,
189 "dapl_ia_open(%s, %d, %p, %p, %d)\n",
190 name,
191 async_evd_qlen,
192 async_evd_handle_ptr,
193 ia_handle_ptr,
194 ro_aware_client);
195
196 dat_status = dapl_provider_list_search(name, &provider);
197 if (DAT_SUCCESS != dat_status) {
198 dapl_dbg_log(DAPL_DBG_TYPE_API,
199 "dapl_ia_open: dapl_provider_list_search(\"%s\") returned "
200 "%d\n",
201 name,
202 dat_status);
203
204 dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG1);
205 goto bail;
206 }
207
208 /* ia_handle_ptr and async_evd_handle_ptr cannot be NULL */
209 if (ia_handle_ptr == NULL) {
210 dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG4);
211 goto bail;
212 }
213 if (async_evd_handle_ptr == NULL) {
214 dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG3);
215 goto bail;
216 }
217
218 /* initialize the caller's OUT param */
219 *ia_handle_ptr = DAT_HANDLE_NULL;
220
221 /* get the hca_ptr */
222 hca_ptr = (DAPL_HCA *)provider->extension;
223
224 /*
225 * Open the HCA if it has not been done before.
226 */
227 dapl_os_lock(&hca_ptr->lock);
228 if (hca_ptr->ib_hca_handle == IB_INVALID_HANDLE) {
229 /* register with the HW */
230 dat_status = dapls_ib_open_hca(hca_ptr,
231 &hca_ptr->ib_hca_handle);
232
233 if (dat_status != DAT_SUCCESS) {
234 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
235 "dapls_ib_open_hca failed %d\n", dat_status);
236 dapl_os_unlock(&hca_ptr->lock);
237 goto bail;
238 }
239
240 /* create a cq domain for this HCA */
241 dat_status = dapls_ib_cqd_create(hca_ptr);
242
243 if (dat_status != DAT_SUCCESS) {
244 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
245 "ERR: Cannot allocate CQD: err %x\n", dat_status);
246 dapli_hca_cleanup(hca_ptr, DAT_FALSE);
247 dapl_os_unlock(&hca_ptr->lock);
248 goto bail;
249 }
250 /*
251 * Obtain the IP address associated with this name and HCA.
252 */
253
254 #ifdef IBHOSTS_NAMING
255 dapli_assign_hca_ip_address(hca_ptr, name);
256 #endif /* IBHOSTS_NAMING */
257
258 /*
259 * Obtain IA attributes from the HCA to limit certain
260 * operations.
261 * If using DAPL_ATS naming, ib_query_hca will also set the ip
262 * address.
263 */
264 dat_status = dapls_ib_query_hca(hca_ptr,
265 &hca_ptr->ia_attr,
266 NULL,
267 &hca_ptr->hca_address, NULL);
268 if (dat_status != DAT_SUCCESS) {
269 dapli_hca_cleanup(hca_ptr, DAT_FALSE);
270 dapl_os_unlock(&hca_ptr->lock);
271 goto bail;
272 }
273 }
274
275 /* is the IA going to use the ConnectX? */
276 if (hca_ptr->hermon_resize_cq != 0) {
277 /*
278 * We are running with a ConnectX.
279 * Determine whether platform is RO capable.
280 * If platform support RO and client does not
281 * support RO and we are not disabling RO, reject the open.
282 */
283 ro_disallowed = dapl_ro_disallowed();
284
285 if (! ro_aware_client && ! ro_disallowed) {
286 dapl_dbg_log(DAPL_DBG_TYPE_API,
287 "dapl_ia_open: failing ro_disallowed %d "
288 "ro_aware_client %d \n",
289 ro_disallowed, ro_aware_client);
290
291 dat_status = DAT_ERROR(DAT_INVALID_PARAMETER,
292 DAT_INVALID_RO_COOKIE);
293 dapli_hca_cleanup(hca_ptr, DAT_FALSE);
294 dapl_os_unlock(&hca_ptr->lock);
295 goto bail;
296 }
297 } else {
298 /* We are not running with a Connect X */
299 ro_disallowed = B_TRUE;
300 }
301
302
303 /* Take a reference on the hca_handle */
304 dapl_os_atomic_inc(&hca_ptr->handle_ref_count);
305 dapl_os_unlock(&hca_ptr->lock);
306
307 /* Allocate and initialize ia structure */
308 ia_ptr = dapl_ia_alloc(provider, hca_ptr);
309 if (!ia_ptr) {
310 dapl_os_lock(&hca_ptr->lock);
311 dapli_hca_cleanup(hca_ptr, DAT_TRUE);
312 dapl_os_unlock(&hca_ptr->lock);
313 dat_status = DAT_ERROR(DAT_INSUFFICIENT_RESOURCES,
314 DAT_RESOURCE_MEMORY);
315 goto bail;
316 }
317
318 /*
319 * Note when we should be disabling relaxed ordering.
320 * If the property indicates that we should not use relaxed ordering
321 * we remember that fact. If the platform is supposed to be
322 * non relaxed ordering capable, we disable relaxed ordering as
323 * well, just in case the property or the list indicating that
324 * this platform is not relaxed ordering capable is mistaken.
325 */
326 if (ro_disallowed)
327 ia_ptr->dapl_flags |= DAPL_DISABLE_RO;
328
329 /*
330 * we need an async EVD for this IA
331 * use the one passed in (if non-NULL) or create one
332 */
333
334 evd_ptr = (DAPL_EVD *) *async_evd_handle_ptr;
335 if (evd_ptr) {
336 if (DAPL_BAD_HANDLE(evd_ptr, DAPL_MAGIC_EVD) ||
337 ! (evd_ptr->evd_flags & DAT_EVD_ASYNC_FLAG)) {
338 dat_status = DAT_ERROR(DAT_INVALID_HANDLE,
339 DAT_INVALID_HANDLE_EVD_ASYNC);
340 goto bail;
341 }
342 /*
343 * InfiniBand allows only 1 asychronous event handler per HCA
344 * (see InfiniBand Spec, release 1.1, vol I, section 11.5.2,
345 * page 559).
346 *
347 * We only need to make sure that this EVD's CQ belongs to
348 * the same HCA as is being opened.
349 */
350
351 if (evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle !=
352 hca_ptr->ib_hca_handle) {
353 dat_status = DAT_ERROR(DAT_INVALID_HANDLE,
354 DAT_INVALID_HANDLE_EVD_ASYNC);
355 goto bail;
356 }
357
358 ia_ptr->cleanup_async_error_evd = DAT_FALSE;
359 ia_ptr->async_error_evd = evd_ptr;
360 } else {
361 /*
362 * Verify we have >0 length, and let the provider check the
363 * size
364 */
365 if (async_evd_qlen <= 0) {
366 dat_status = DAT_ERROR(DAT_INVALID_PARAMETER,
367 DAT_INVALID_ARG2);
368 goto bail;
369 }
370 dat_status = dapls_evd_internal_create(ia_ptr,
371 NULL, /* CNO ptr */
372 async_evd_qlen,
373 DAT_EVD_ASYNC_FLAG,
374 &evd_ptr);
375 if (dat_status != DAT_SUCCESS) {
376 goto bail;
377 }
378
379 dapl_os_atomic_inc(&evd_ptr->evd_ref_count);
380
381 dapl_os_lock(&hca_ptr->lock);
382 if (hca_ptr->async_evd != (DAPL_EVD *) 0) {
383 #if 0
384 /*
385 * The async EVD for this HCA has already been assigned.
386 * It's an error to try and assign another one.
387 *
388 * However, we need to somehow allow multiple IAs
389 * off of the same HCA. The right way to do this
390 * is by dispatching events off the HCA to the
391 * appropriate IA, but we aren't there yet. So for
392 * now we create the EVD but don't connect it to
393 * anything.
394 */
395 dapl_os_atomic_dec(&evd_ptr->evd_ref_count);
396 dapl_evd_free(evd_ptr);
397 dat_status = DAT_ERROR(DAT_INVALID_PARAMETER,
398 DAT_INVALID_ARG4);
399 goto bail;
400 #endif
401 dapl_os_unlock(&hca_ptr->lock);
402 } else {
403 hca_ptr->async_evd = evd_ptr;
404 dapl_os_unlock(&hca_ptr->lock);
405
406 /*
407 * Register the handlers associated with the async EVD.
408 */
409 dat_status = dapls_ia_setup_callbacks(ia_ptr, evd_ptr);
410 if (dat_status != DAT_SUCCESS) {
411 /* Assign the EVD so it gets cleaned up */
412 ia_ptr->cleanup_async_error_evd = DAT_TRUE;
413 ia_ptr->async_error_evd = evd_ptr;
414 goto bail;
415 }
416 }
417
418 ia_ptr->cleanup_async_error_evd = DAT_TRUE;
419 ia_ptr->async_error_evd = evd_ptr;
420 }
421
422 dat_status = DAT_SUCCESS;
423 *ia_handle_ptr = ia_ptr;
424 *async_evd_handle_ptr = evd_ptr;
425
426 bail:
427 if (dat_status != DAT_SUCCESS) {
428 if (ia_ptr) {
429 /* This will release the async EVD if needed. */
430 (void) dapl_ia_close(ia_ptr, DAT_CLOSE_ABRUPT_FLAG);
431 }
432 }
433
434 dapl_dbg_log(DAPL_DBG_TYPE_RTN,
435 "dapl_ia_open () returns 0x%x\n",
436 dat_status);
437
438 return (dat_status);
439 }
440
441 /*
442 * dapli_hca_cleanup
443 *
444 * Clean up partially allocated HCA stuff. Strictly to make cleanup
445 * simple.
446 */
447 void
dapli_hca_cleanup(DAPL_HCA * hca_ptr,DAT_BOOLEAN dec_ref)448 dapli_hca_cleanup(
449 DAPL_HCA *hca_ptr,
450 DAT_BOOLEAN dec_ref)
451 {
452 (void) dapls_ib_close_hca(hca_ptr->ib_hca_handle);
453 hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
454 if (dec_ref == DAT_TRUE) {
455 dapl_os_atomic_dec(&hca_ptr->handle_ref_count);
456 }
457 }
458
459 #if defined(IBHOSTS_NAMING)
460
461 char *dapli_get_adapter_num(
462 char *device_name);
463
464 void dapli_setup_dummy_addr(
465 IN DAPL_HCA *hca_ptr,
466 IN char *hca_name);
467 /*
468 * dapli_assign_hca_ip_address
469 *
470 * Obtain the IP address of the passed in name, which represents a
471 * port on the hca. There are three methods here to obtain the
472 * appropriate IP address, each with their own shortcoming:
473 * 1) IPOIB_NAMING. Requires the implementation of the IPoIB
474 * interface defined in include/dapl/ipoib_names.h. This is
475 * not the recommended interface as IPoIB is limited at
476 * the point we need to obtain an IP address on the
477 * passive side of a connection. The code supporting this
478 * implementation has been removed.
479 *
480 * 2) IBHOSTS. An entry exists in DNS and in the /etc/dapl/ibhosts
481 * file. The immediate drawback here is that we must dictate
482 * how to name the interface, which is a stated DAPL non-goal.
483 * In the broader perspective, this method requires us to xmit
484 * the IP address in the private data of a connection, which has
485 * other fun problems. This is the default method and is known to
486 * work, but it has problems.
487 *
488 * 3) Obtain the IP address from the driver, which has registered
489 * the address with the SA for retrieval.
490 *
491 *
492 * Input:
493 * hca_ptr Pointer to HCA structure
494 * device_name Name of device as reported by the provider
495 *
496 * Output:
497 * none
498 *
499 * Returns:
500 * char * to string number
501 */
502 void
dapli_assign_hca_ip_address(DAPL_HCA * hca_ptr,char * device_name)503 dapli_assign_hca_ip_address(
504 DAPL_HCA *hca_ptr,
505 char *device_name)
506 {
507 char *adapter_num;
508 #define NAMELEN 128
509 struct addrinfo *addr;
510 char hostname[NAMELEN];
511 char *str;
512 int rc;
513
514 /*
515 * Obtain the IP address of the adapter. This is a simple
516 * scheme that creates a name that must appear available to
517 * DNS, e.g. it must be in the local site DNS or in the local
518 * /etc/hosts file, etc.
519 *
520 * <hostname>-ib<index>
521 *
522 * This scheme obviously doesn't work with adapters from
523 * multiple vendors, but will suffice in common installations.
524 */
525
526 rc = gethostname(hostname, NAMELEN);
527 /*
528 * Strip off domain info if it exists (e.g. mynode.mydomain.com)
529 */
530 for (str = hostname; *str && *str != '.'; ) {
531 str++;
532 }
533 if (*str == '.') {
534 *str = '\0';
535 }
536 dapl_os_strcat(hostname, "-ib");
537 adapter_num = dapli_get_adapter_num(device_name);
538 dapl_os_strcat(hostname, adapter_num);
539
540 rc = dapls_osd_getaddrinfo(hostname, &addr);
541
542 if (rc != 0) {
543 /* Not registered in DNS, provide a dummy value */
544 dapli_setup_dummy_addr(hca_ptr, hostname);
545 } else {
546 /*
547 * hca_address is defined as a DAT_SOCK_ADDR6 whereas ai_addr
548 * is a sockaddr
549 */
550 (void) dapl_os_memcpy((void *)&hca_ptr->hca_address,
551 (void *)(addr->ai_addr), sizeof (DAT_SOCK_ADDR6));
552 }
553 }
554
555
556 /*
557 * dapli_stup_dummy_addr
558 *
559 * Set up a dummy local address for the HCA. Things are not going
560 * to work too well if this happens.
561 * We call this routine if:
562 * - remote host adapter name is not in DNS
563 * - IPoIB implementation is not correctly set up
564 * - Similar nonsense.
565 *
566 * Input:
567 * hca_ptr
568 * rhost_name Name of remote adapter
569 *
570 * Output:
571 * none
572 *
573 * Returns:
574 * none
575 */
576 void
dapli_setup_dummy_addr(IN DAPL_HCA * hca_ptr,IN char * rhost_name)577 dapli_setup_dummy_addr(
578 IN DAPL_HCA *hca_ptr,
579 IN char *rhost_name)
580 {
581 struct sockaddr_in *si;
582
583 /* Not registered in DNS, provide a dummy value */
584 dapl_dbg_log(DAPL_DBG_TYPE_ERR, "WARNING: <%s> not registered in DNS,"
585 " using dummy IP value\n", rhost_name);
586 si = (struct sockaddr_in *)&hca_ptr->hca_address;
587 si->sin_family = AF_INET;
588 si->sin_addr.s_addr = 0x01020304;
589 }
590
591
592 /*
593 * dapls_get_adapter_num
594 *
595 * Given a device name, return a string of the device number
596 *
597 * Input:
598 * device_name Name of device as reported by the provider
599 *
600 * Output:
601 * none
602 *
603 * Returns:
604 * char * to string number
605 */
606 char *
dapli_get_adapter_num(char * device_name)607 dapli_get_adapter_num(
608 char *device_name)
609 {
610 static char *zero = "0";
611 char *p;
612
613 /*
614 * Optimisticaly simple algorithm: the device number appears at
615 * the end of the device name string. Device that do not end
616 * in a number are by default "0".
617 */
618
619 for (p = device_name; *p; p++) {
620 if (isdigit(*p)) {
621 return (p);
622 }
623 }
624
625 return (zero);
626 }
627 #endif /* IBHOSTS_NAMING */
628
629
630 /*
631 * Local variables:
632 * c-indent-level: 4
633 * c-basic-offset: 4
634 * tab-width: 8
635 * End:
636 */
637