xref: /illumos-gate/usr/src/uts/common/io/comstar/port/srpt/srpt_ioc.c (revision 67dbe2be0c0f1e2eb428b89088bb5667e8f0b9f6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * I/O Controller functions for the Solaris COMSTAR SCSI RDMA Protocol
29  * Target (SRPT) port provider.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/ddi.h>
34 #include <sys/types.h>
35 #include <sys/sunddi.h>
36 #include <sys/atomic.h>
37 #include <sys/sysmacros.h>
38 #include <sys/ib/ibtl/ibti.h>
39 #include <sys/sdt.h>
40 
41 #include "srp.h"
42 #include "srpt_impl.h"
43 #include "srpt_ioc.h"
44 #include "srpt_stp.h"
45 #include "srpt_ch.h"
46 
47 /*
48  * srpt_ioc_srq_size - Tunable parameter that specifies the number
49  * of receive WQ entries that can be posted to the IOC shared
50  * receive queue.
51  */
52 uint32_t	srpt_ioc_srq_size = SRPT_DEFAULT_IOC_SRQ_SIZE;
53 extern uint16_t srpt_send_msg_depth;
54 
55 /* IOC profile capabilities mask must be big-endian */
56 typedef struct srpt_ioc_opcap_bits_s {
57 #if	defined(_BIT_FIELDS_LTOH)
58 	uint8_t		af:1,
59 			at:1,
60 			wf:1,
61 			wt:1,
62 			rf:1,
63 			rt:1,
64 			sf:1,
65 			st:1;
66 #elif	defined(_BIT_FIELDS_HTOL)
67 	uint8_t		st:1,
68 			sf:1,
69 			rt:1,
70 			rf:1,
71 			wt:1,
72 			wf:1,
73 			at:1,
74 			af:1;
75 #else
76 #error	One of _BIT_FIELDS_LTOH or _BIT_FIELDS_HTOL must be defined
77 #endif
78 } srpt_ioc_opcap_bits_t;
79 
80 typedef union {
81 	srpt_ioc_opcap_bits_t	bits;
82 	uint8_t			mask;
83 } srpt_ioc_opcap_mask_t;
84 
85 /*
86  * vmem arena variables - values derived from iSER
87  */
88 #define	SRPT_MR_QUANTSIZE	0x400			/* 1K */
89 #define	SRPT_MIN_CHUNKSIZE	0x100000		/* 1MB */
90 
91 /* use less memory on 32-bit kernels as it's much more constrained */
92 #ifdef _LP64
93 #define	SRPT_BUF_MR_CHUNKSIZE	0x1000000		/* 16MB */
94 #define	SRPT_BUF_POOL_MAX	0x40000000		/* 1GB */
95 #else
96 #define	SRPT_BUF_MR_CHUNKSIZE	0x400000		/* 4MB */
97 #define	SRPT_BUF_POOL_MAX	0x4000000		/* 64MB */
98 #endif
99 
100 static ibt_mr_flags_t	srpt_dbuf_mr_flags =
101     IBT_MR_ENABLE_LOCAL_WRITE | IBT_MR_ENABLE_REMOTE_WRITE |
102     IBT_MR_ENABLE_REMOTE_READ;
103 
104 void srpt_ioc_ib_async_hdlr(void *clnt, ibt_hca_hdl_t hdl,
105 	ibt_async_code_t code, ibt_async_event_t *event);
106 
107 static struct ibt_clnt_modinfo_s srpt_ibt_modinfo = {
108 	IBTI_V_CURR,
109 	IBT_STORAGE_DEV,
110 	srpt_ioc_ib_async_hdlr,
111 	NULL,
112 	"srpt"
113 };
114 
115 static srpt_ioc_t *srpt_ioc_init(ib_guid_t guid);
116 static void srpt_ioc_fini(srpt_ioc_t *ioc);
117 
118 static srpt_vmem_pool_t *srpt_vmem_create(const char *name, srpt_ioc_t *ioc,
119     ib_memlen_t chunksize, uint64_t maxsize, ibt_mr_flags_t flags);
120 static void *srpt_vmem_alloc(srpt_vmem_pool_t *vm_pool, size_t size);
121 static int srpt_vmem_mr_compare(const void *a, const void *b);
122 static srpt_mr_t *srpt_vmem_chunk_alloc(srpt_vmem_pool_t *ioc,
123     ib_memlen_t chunksize);
124 static void srpt_vmem_destroy(srpt_vmem_pool_t *vm_pool);
125 static void srpt_vmem_free(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size);
126 static srpt_mr_t *srpt_reg_mem(srpt_vmem_pool_t *vm_pool, ib_vaddr_t vaddr,
127     ib_memlen_t len);
128 static void srpt_vmem_chunk_free(srpt_vmem_pool_t *vm_pool, srpt_mr_t *mr);
129 static void srpt_dereg_mem(srpt_ioc_t *ioc, srpt_mr_t *mr);
130 static int srpt_vmem_mr(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size,
131     srpt_mr_t *mr);
132 
133 /*
134  * srpt_ioc_attach() - I/O Controller attach
135  *
136  * Attach to IBTF and initialize I/O controllers. The srpt_ctxt->sc_rwlock
137  * should be held outside of this call.
138  */
139 int
140 srpt_ioc_attach()
141 {
142 	int		status;
143 	int		hca_cnt;
144 	int		hca_ndx;
145 	ib_guid_t	*guid;
146 	srpt_ioc_t	*ioc;
147 
148 	ASSERT(srpt_ctxt != NULL);
149 
150 	/*
151 	 * Attach to IBTF and initialize a list of IB devices.  Each
152 	 * HCA will be represented by an I/O Controller.
153 	 */
154 	status = ibt_attach(&srpt_ibt_modinfo, srpt_ctxt->sc_dip,
155 	    srpt_ctxt,  &srpt_ctxt->sc_ibt_hdl);
156 	if (status != DDI_SUCCESS) {
157 		SRPT_DPRINTF_L1("ioc_attach, ibt_attach failed (0x%x)",
158 		    status);
159 		return (DDI_FAILURE);
160 	}
161 
162 	hca_cnt = ibt_get_hca_list(&guid);
163 	if (hca_cnt < 1) {
164 		/*
165 		 * not a fatal error.  Service will be up and
166 		 * waiting for ATTACH events.
167 		 */
168 		SRPT_DPRINTF_L2("ioc_attach, no HCA found");
169 		return (DDI_SUCCESS);
170 	}
171 
172 	for (hca_ndx = 0; hca_ndx < hca_cnt; hca_ndx++) {
173 		SRPT_DPRINTF_L2("ioc_attach, adding I/O"
174 		    " Controller (%016llx)", (u_longlong_t)guid[hca_ndx]);
175 
176 		ioc = srpt_ioc_init(guid[hca_ndx]);
177 		if (ioc == NULL) {
178 			SRPT_DPRINTF_L1("ioc_attach, ioc_init GUID(%016llx)"
179 			    " failed", (u_longlong_t)guid[hca_ndx]);
180 			continue;
181 		}
182 		list_insert_tail(&srpt_ctxt->sc_ioc_list, ioc);
183 		SRPT_DPRINTF_L2("ioc_attach, I/O Controller ibt HCA hdl (%p)",
184 		    (void *)ioc->ioc_ibt_hdl);
185 		srpt_ctxt->sc_num_iocs++;
186 	}
187 
188 	ibt_free_hca_list(guid, hca_cnt);
189 	SRPT_DPRINTF_L3("ioc_attach, added %d I/O Controller(s)",
190 	    srpt_ctxt->sc_num_iocs);
191 	return (DDI_SUCCESS);
192 }
193 
194 /*
195  * srpt_ioc_detach() - I/O Controller detach
196  *
197  * srpt_ctxt->sc_rwlock should be held outside of this call.
198  */
199 void
200 srpt_ioc_detach()
201 {
202 	srpt_ioc_t	*ioc;
203 
204 	ASSERT(srpt_ctxt != NULL);
205 
206 	while ((ioc = list_head(&srpt_ctxt->sc_ioc_list)) != NULL) {
207 		list_remove(&srpt_ctxt->sc_ioc_list, ioc);
208 		SRPT_DPRINTF_L2("ioc_detach, removing I/O Controller(%p)"
209 		    " (%016llx), ibt_hdl(%p)",
210 		    (void *)ioc,
211 		    ioc ? (u_longlong_t)ioc->ioc_guid : 0x0ll,
212 		    (void *)ioc->ioc_ibt_hdl);
213 		srpt_ioc_fini(ioc);
214 	}
215 
216 	(void) ibt_detach(srpt_ctxt->sc_ibt_hdl);
217 	srpt_ctxt->sc_ibt_hdl = NULL;
218 }
219 
220 /*
221  * srpt_ioc_init() - I/O Controller initialization
222  *
223  * Requires srpt_ctxt->rw_lock be held outside of call.
224  */
225 static srpt_ioc_t *
226 srpt_ioc_init(ib_guid_t guid)
227 {
228 	ibt_status_t		status;
229 	srpt_ioc_t		*ioc;
230 	ibt_hca_attr_t		hca_attr;
231 	uint_t			iu_ndx;
232 	uint_t			err_ndx;
233 	ibt_mr_attr_t		mr_attr;
234 	ibt_mr_desc_t		mr_desc;
235 	srpt_iu_t		*iu;
236 	ibt_srq_sizes_t		srq_attr;
237 	char			namebuf[32];
238 	size_t			iu_offset;
239 
240 	status = ibt_query_hca_byguid(guid, &hca_attr);
241 	if (status != IBT_SUCCESS) {
242 		SRPT_DPRINTF_L1("ioc_init, HCA query error (%d)",
243 		    status);
244 		return (NULL);
245 	}
246 
247 	ioc = srpt_ioc_get_locked(guid);
248 	if (ioc != NULL) {
249 		SRPT_DPRINTF_L1("ioc_init, HCA already exists");
250 		return (NULL);
251 	}
252 
253 	ioc = kmem_zalloc(sizeof (srpt_ioc_t), KM_SLEEP);
254 
255 	rw_init(&ioc->ioc_rwlock, NULL, RW_DRIVER, NULL);
256 	rw_enter(&ioc->ioc_rwlock, RW_WRITER);
257 
258 	bcopy(&hca_attr, &ioc->ioc_attr, sizeof (ibt_hca_attr_t));
259 
260 	SRPT_DPRINTF_L2("ioc_init, HCA max mr=%d, mrlen=%lld",
261 	    hca_attr.hca_max_memr, (u_longlong_t)hca_attr.hca_max_memr_len);
262 	ioc->ioc_guid   = guid;
263 
264 	status = ibt_open_hca(srpt_ctxt->sc_ibt_hdl, guid, &ioc->ioc_ibt_hdl);
265 	if (status != IBT_SUCCESS) {
266 		SRPT_DPRINTF_L1("ioc_init, IBT open failed (%d)", status);
267 		goto hca_open_err;
268 	}
269 
270 	status = ibt_alloc_pd(ioc->ioc_ibt_hdl, IBT_PD_NO_FLAGS,
271 	    &ioc->ioc_pd_hdl);
272 	if (status != IBT_SUCCESS) {
273 		SRPT_DPRINTF_L1("ioc_init, IBT create PD failed (%d)", status);
274 		goto pd_alloc_err;
275 	}
276 
277 	/*
278 	 * We require hardware support for SRQs.  We use a common SRQ to
279 	 * reduce channel memory consumption.
280 	 */
281 	if ((ioc->ioc_attr.hca_flags & IBT_HCA_SRQ) == 0) {
282 		SRPT_DPRINTF_L0("ioc_init, no SRQ capability, not supported");
283 		goto srq_alloc_err;
284 	}
285 
286 	SRPT_DPRINTF_L3("ioc_init, Using shared receive queues, max srq work"
287 	    " queue size(%d), def size = %d", ioc->ioc_attr.hca_max_srqs_sz,
288 	    srpt_ioc_srq_size);
289 	srq_attr.srq_wr_sz = min(srpt_ioc_srq_size,
290 	    ioc->ioc_attr.hca_max_srqs_sz);
291 	srq_attr.srq_sgl_sz = 1;
292 
293 	status = ibt_alloc_srq(ioc->ioc_ibt_hdl, IBT_SRQ_NO_FLAGS,
294 	    ioc->ioc_pd_hdl, &srq_attr, &ioc->ioc_srq_hdl,
295 	    &ioc->ioc_srq_attr);
296 	if (status != IBT_SUCCESS) {
297 		SRPT_DPRINTF_L1("ioc_init, IBT create SRQ failed(%d)", status);
298 		goto srq_alloc_err;
299 	}
300 
301 	SRPT_DPRINTF_L2("ioc_init, SRQ WR size(%d), SG size(%d)",
302 	    ioc->ioc_srq_attr.srq_wr_sz, ioc->ioc_srq_attr.srq_sgl_sz);
303 
304 	ibt_set_srq_private(ioc->ioc_srq_hdl, ioc);
305 
306 	/*
307 	 * Allocate a pool of SRP IU message buffers and post them to
308 	 * the I/O Controller SRQ.  We let the SRQ manage the free IU
309 	 * messages.
310 	 */
311 	ioc->ioc_num_iu_entries =
312 	    min(srq_attr.srq_wr_sz, srpt_ioc_srq_size) - 1;
313 
314 	ioc->ioc_iu_pool = kmem_zalloc(sizeof (srpt_iu_t) *
315 	    ioc->ioc_num_iu_entries, KM_SLEEP);
316 
317 	ioc->ioc_iu_bufs = kmem_alloc(SRPT_DEFAULT_SEND_MSG_SIZE *
318 	    ioc->ioc_num_iu_entries, KM_SLEEP);
319 
320 	if ((ioc->ioc_iu_pool == NULL) || (ioc->ioc_iu_bufs == NULL)) {
321 		SRPT_DPRINTF_L1("ioc_init, failed to allocate SRQ IUs");
322 		goto srq_iu_alloc_err;
323 	}
324 
325 	mr_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)ioc->ioc_iu_bufs;
326 	mr_attr.mr_len   = SRPT_DEFAULT_SEND_MSG_SIZE * ioc->ioc_num_iu_entries;
327 	mr_attr.mr_as    = NULL;
328 	mr_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE;
329 
330 	status = ibt_register_mr(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl,
331 	    &mr_attr, &ioc->ioc_iu_mr_hdl, &mr_desc);
332 	if (status != IBT_SUCCESS) {
333 		SRPT_DPRINTF_L1("ioc_init, IU buffer pool MR err(%d)",
334 		    status);
335 		goto srq_iu_alloc_err;
336 	}
337 
338 	for (iu_ndx = 0, iu = ioc->ioc_iu_pool; iu_ndx <
339 	    ioc->ioc_num_iu_entries; iu_ndx++, iu++) {
340 
341 		iu_offset = (iu_ndx * SRPT_DEFAULT_SEND_MSG_SIZE);
342 		iu->iu_buf = (void *)((uintptr_t)ioc->ioc_iu_bufs + iu_offset);
343 
344 		mutex_init(&iu->iu_lock, NULL, MUTEX_DRIVER, NULL);
345 
346 		iu->iu_sge.ds_va  = mr_desc.md_vaddr + iu_offset;
347 		iu->iu_sge.ds_key = mr_desc.md_lkey;
348 		iu->iu_sge.ds_len = SRPT_DEFAULT_SEND_MSG_SIZE;
349 		iu->iu_ioc	  = ioc;
350 		iu->iu_pool_ndx   = iu_ndx;
351 
352 		status = srpt_ioc_post_recv_iu(ioc, &ioc->ioc_iu_pool[iu_ndx]);
353 		if (status != IBT_SUCCESS) {
354 			SRPT_DPRINTF_L1("ioc_init, SRQ IU post err(%d)",
355 			    status);
356 			goto srq_iu_post_err;
357 		}
358 	}
359 
360 	/*
361 	 * Initialize the dbuf vmem arena
362 	 */
363 	(void) snprintf(namebuf, sizeof (namebuf),
364 	    "srpt_buf_pool_%16llX", (u_longlong_t)guid);
365 	ioc->ioc_dbuf_pool = srpt_vmem_create(namebuf, ioc,
366 	    SRPT_BUF_MR_CHUNKSIZE, SRPT_BUF_POOL_MAX, srpt_dbuf_mr_flags);
367 
368 	if (ioc->ioc_dbuf_pool == NULL) {
369 		goto stmf_db_alloc_err;
370 	}
371 
372 	/*
373 	 * Allocate the I/O Controller STMF data buffer allocator.  The
374 	 * data store will span all targets associated with this IOC.
375 	 */
376 	ioc->ioc_stmf_ds = stmf_alloc(STMF_STRUCT_DBUF_STORE, 0, 0);
377 	if (ioc->ioc_stmf_ds == NULL) {
378 		SRPT_DPRINTF_L1("ioc_attach, STMF DBUF alloc failure for IOC");
379 		goto stmf_db_alloc_err;
380 	}
381 	ioc->ioc_stmf_ds->ds_alloc_data_buf = &srpt_ioc_ds_alloc_dbuf;
382 	ioc->ioc_stmf_ds->ds_free_data_buf  = &srpt_ioc_ds_free_dbuf;
383 	ioc->ioc_stmf_ds->ds_port_private   = ioc;
384 
385 	rw_exit(&ioc->ioc_rwlock);
386 	return (ioc);
387 
388 stmf_db_alloc_err:
389 	if (ioc->ioc_dbuf_pool != NULL) {
390 		srpt_vmem_destroy(ioc->ioc_dbuf_pool);
391 	}
392 
393 srq_iu_post_err:
394 	if (ioc->ioc_iu_mr_hdl != NULL) {
395 		status = ibt_deregister_mr(ioc->ioc_ibt_hdl,
396 		    ioc->ioc_iu_mr_hdl);
397 		if (status != IBT_SUCCESS) {
398 			SRPT_DPRINTF_L1("ioc_init, error deregistering"
399 			    " memory region (%d)", status);
400 		}
401 	}
402 	for (err_ndx = 0, iu = ioc->ioc_iu_pool; err_ndx < iu_ndx;
403 	    err_ndx++, iu++) {
404 		mutex_destroy(&iu->iu_lock);
405 	}
406 
407 srq_iu_alloc_err:
408 	if (ioc->ioc_iu_bufs != NULL) {
409 		kmem_free(ioc->ioc_iu_bufs, SRPT_DEFAULT_SEND_MSG_SIZE *
410 		    ioc->ioc_num_iu_entries);
411 	}
412 	if (ioc->ioc_iu_pool != NULL) {
413 		kmem_free(ioc->ioc_iu_pool,
414 		    sizeof (srpt_iu_t) * ioc->ioc_num_iu_entries);
415 	}
416 	if (ioc->ioc_srq_hdl != NULL) {
417 		status = ibt_free_srq(ioc->ioc_srq_hdl);
418 		if (status != IBT_SUCCESS) {
419 			SRPT_DPRINTF_L1("ioc_init, error freeing SRQ (%d)",
420 			    status);
421 		}
422 
423 	}
424 
425 srq_alloc_err:
426 	status = ibt_free_pd(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl);
427 	if (status != IBT_SUCCESS) {
428 		SRPT_DPRINTF_L1("ioc_init, free PD error (%d)", status);
429 	}
430 
431 pd_alloc_err:
432 	status = ibt_close_hca(ioc->ioc_ibt_hdl);
433 	if (status != IBT_SUCCESS) {
434 		SRPT_DPRINTF_L1("ioc_init, close ioc error (%d)", status);
435 	}
436 
437 hca_open_err:
438 	rw_exit(&ioc->ioc_rwlock);
439 	rw_destroy(&ioc->ioc_rwlock);
440 	kmem_free(ioc, sizeof (*ioc));
441 	return (NULL);
442 }
443 
444 /*
445  * srpt_ioc_fini() - I/O Controller Cleanup
446  *
447  * Requires srpt_ctxt->sc_rwlock be held outside of call.
448  */
449 static void
450 srpt_ioc_fini(srpt_ioc_t *ioc)
451 {
452 	int		status;
453 	int		ndx;
454 
455 	/*
456 	 * Note driver flows will have already taken all SRP
457 	 * services running on the I/O Controller off-line.
458 	 */
459 	rw_enter(&ioc->ioc_rwlock, RW_WRITER);
460 	if (ioc->ioc_ibt_hdl != NULL) {
461 		if (ioc->ioc_stmf_ds != NULL) {
462 			stmf_free(ioc->ioc_stmf_ds);
463 		}
464 
465 		if (ioc->ioc_srq_hdl != NULL) {
466 			SRPT_DPRINTF_L4("ioc_fini, freeing SRQ");
467 			status = ibt_free_srq(ioc->ioc_srq_hdl);
468 			if (status != IBT_SUCCESS) {
469 				SRPT_DPRINTF_L1("ioc_fini, free SRQ"
470 				    " error (%d)", status);
471 			}
472 		}
473 
474 		if (ioc->ioc_iu_mr_hdl != NULL) {
475 			status = ibt_deregister_mr(
476 			    ioc->ioc_ibt_hdl, ioc->ioc_iu_mr_hdl);
477 			if (status != IBT_SUCCESS) {
478 				SRPT_DPRINTF_L1("ioc_fini, error deregistering"
479 				    " memory region (%d)", status);
480 			}
481 		}
482 
483 		if (ioc->ioc_iu_bufs != NULL) {
484 			kmem_free(ioc->ioc_iu_bufs, SRPT_DEFAULT_SEND_MSG_SIZE *
485 			    ioc->ioc_num_iu_entries);
486 		}
487 
488 		if (ioc->ioc_iu_pool != NULL) {
489 			SRPT_DPRINTF_L4("ioc_fini, freeing IU entries");
490 			for (ndx = 0; ndx < ioc->ioc_num_iu_entries; ndx++) {
491 				mutex_destroy(&ioc->ioc_iu_pool[ndx].iu_lock);
492 			}
493 
494 			SRPT_DPRINTF_L4("ioc_fini, free IU pool struct");
495 			kmem_free(ioc->ioc_iu_pool,
496 			    sizeof (srpt_iu_t) * (ioc->ioc_num_iu_entries));
497 			ioc->ioc_iu_pool = NULL;
498 			ioc->ioc_num_iu_entries = 0;
499 		}
500 
501 		if (ioc->ioc_dbuf_pool != NULL) {
502 			srpt_vmem_destroy(ioc->ioc_dbuf_pool);
503 		}
504 
505 		if (ioc->ioc_pd_hdl != NULL) {
506 			status = ibt_free_pd(ioc->ioc_ibt_hdl,
507 			    ioc->ioc_pd_hdl);
508 			if (status != IBT_SUCCESS) {
509 				SRPT_DPRINTF_L1("ioc_fini, free PD"
510 				    " error (%d)", status);
511 			}
512 		}
513 
514 		status = ibt_close_hca(ioc->ioc_ibt_hdl);
515 		if (status != IBT_SUCCESS) {
516 			SRPT_DPRINTF_L1(
517 			    "ioc_fini, close ioc error (%d)", status);
518 		}
519 	}
520 	rw_exit(&ioc->ioc_rwlock);
521 	rw_destroy(&ioc->ioc_rwlock);
522 	kmem_free(ioc, sizeof (srpt_ioc_t));
523 }
524 
525 /*
526  * srpt_ioc_port_active() - I/O Controller port active
527  */
528 static void
529 srpt_ioc_port_active(ibt_async_event_t *event)
530 {
531 	ibt_status_t		status;
532 	srpt_ioc_t		*ioc;
533 	srpt_target_port_t	*tgt = NULL;
534 	boolean_t		online_target = B_FALSE;
535 	stmf_change_status_t	cstatus;
536 
537 	ASSERT(event != NULL);
538 
539 	SRPT_DPRINTF_L3("ioc_port_active event handler, invoked");
540 
541 	/*
542 	 * Find the HCA in question and if the HCA has completed
543 	 * initialization, and the SRP Target service for the
544 	 * the I/O Controller exists, then bind this port.
545 	 */
546 	ioc = srpt_ioc_get(event->ev_hca_guid);
547 
548 	if (ioc == NULL) {
549 		SRPT_DPRINTF_L2("ioc_port_active, I/O Controller not"
550 		    " active");
551 		return;
552 	}
553 
554 	tgt = ioc->ioc_tgt_port;
555 	if (tgt == NULL) {
556 		SRPT_DPRINTF_L2("ioc_port_active, no I/O Controller target"
557 		    " undefined");
558 		return;
559 	}
560 
561 
562 	/*
563 	 * We take the target lock here to serialize this operation
564 	 * with any STMF initiated target state transitions.  If
565 	 * SRP is off-line then the service handle is NULL.
566 	 */
567 	mutex_enter(&tgt->tp_lock);
568 
569 	if (tgt->tp_ibt_svc_hdl != NULL) {
570 		status = srpt_ioc_svc_bind(tgt, event->ev_port);
571 		if ((status != IBT_SUCCESS) &&
572 		    (status != IBT_HCA_PORT_NOT_ACTIVE)) {
573 			SRPT_DPRINTF_L1("ioc_port_active, bind failed (%d)",
574 			    status);
575 		}
576 	} else {
577 		/* if we were offline because of no ports, try onlining now */
578 		if ((tgt->tp_num_active_ports == 0) &&
579 		    (tgt->tp_requested_state != tgt->tp_state) &&
580 		    (tgt->tp_requested_state == SRPT_TGT_STATE_ONLINE)) {
581 			online_target = B_TRUE;
582 			cstatus.st_completion_status = STMF_SUCCESS;
583 			cstatus.st_additional_info = "port active";
584 		}
585 	}
586 
587 	mutex_exit(&tgt->tp_lock);
588 
589 	if (online_target) {
590 		stmf_status_t	ret;
591 
592 		ret = stmf_ctl(STMF_CMD_LPORT_ONLINE, tgt->tp_lport, &cstatus);
593 
594 		if (ret == STMF_SUCCESS) {
595 			SRPT_DPRINTF_L1("ioc_port_active, port %d active, "
596 			    "target %016llx online requested", event->ev_port,
597 			    (u_longlong_t)ioc->ioc_guid);
598 		} else if (ret != STMF_ALREADY) {
599 			SRPT_DPRINTF_L1("ioc_port_active, port %d active, "
600 			    "target %016llx failed online request: %d",
601 			    event->ev_port, (u_longlong_t)ioc->ioc_guid,
602 			    (int)ret);
603 		}
604 	}
605 }
606 
607 /*
608  * srpt_ioc_port_down()
609  */
610 static void
611 srpt_ioc_port_down(ibt_async_event_t *event)
612 {
613 	srpt_ioc_t		*ioc;
614 	srpt_target_port_t	*tgt;
615 	srpt_channel_t		*ch;
616 	srpt_channel_t		*next_ch;
617 	boolean_t		offline_target = B_FALSE;
618 	stmf_change_status_t	cstatus;
619 
620 	SRPT_DPRINTF_L3("ioc_port_down event handler, invoked");
621 
622 	/*
623 	 * Find the HCA in question and if the HCA has completed
624 	 * initialization, and the SRP Target service for the
625 	 * the I/O Controller exists, then logout initiators
626 	 * through this port.
627 	 */
628 	ioc = srpt_ioc_get(event->ev_hca_guid);
629 
630 	if (ioc == NULL) {
631 		SRPT_DPRINTF_L2("ioc_port_down, I/O Controller not"
632 		    " active");
633 		return;
634 	}
635 
636 	/*
637 	 * We only have one target now, but we could go through all
638 	 * SCSI target ports if more are added.
639 	 */
640 	tgt = ioc->ioc_tgt_port;
641 	if (tgt == NULL) {
642 		SRPT_DPRINTF_L2("ioc_port_down, no I/O Controller target"
643 		    " undefined");
644 		return;
645 	}
646 	mutex_enter(&tgt->tp_lock);
647 
648 	/*
649 	 * For all channel's logged in through this port, initiate a
650 	 * disconnect.
651 	 */
652 	mutex_enter(&tgt->tp_ch_list_lock);
653 	ch = list_head(&tgt->tp_ch_list);
654 	while (ch != NULL) {
655 		next_ch = list_next(&tgt->tp_ch_list, ch);
656 		if (ch->ch_session && (ch->ch_session->ss_hw_port ==
657 		    event->ev_port)) {
658 			srpt_ch_disconnect(ch);
659 		}
660 		ch = next_ch;
661 	}
662 	mutex_exit(&tgt->tp_ch_list_lock);
663 
664 	tgt->tp_num_active_ports--;
665 
666 	/* if we have no active ports, take the target offline */
667 	if ((tgt->tp_num_active_ports == 0) &&
668 	    (tgt->tp_state == SRPT_TGT_STATE_ONLINE)) {
669 		cstatus.st_completion_status = STMF_SUCCESS;
670 		cstatus.st_additional_info = "no ports active";
671 		offline_target = B_TRUE;
672 	}
673 
674 	mutex_exit(&tgt->tp_lock);
675 
676 	if (offline_target) {
677 		stmf_status_t	ret;
678 
679 		ret = stmf_ctl(STMF_CMD_LPORT_OFFLINE, tgt->tp_lport, &cstatus);
680 
681 		if (ret == STMF_SUCCESS) {
682 			SRPT_DPRINTF_L1("ioc_port_down, port %d down, target "
683 			    "%016llx offline requested", event->ev_port,
684 			    (u_longlong_t)ioc->ioc_guid);
685 		} else if (ret != STMF_ALREADY) {
686 			SRPT_DPRINTF_L1("ioc_port_down, port %d down, target "
687 			    "%016llx failed offline request: %d",
688 			    event->ev_port,
689 			    (u_longlong_t)ioc->ioc_guid, (int)ret);
690 		}
691 	}
692 }
693 
694 /*
695  * srpt_ioc_ib_async_hdlr - I/O Controller IB asynchronous events
696  */
697 /* ARGSUSED */
698 void
699 srpt_ioc_ib_async_hdlr(void *clnt, ibt_hca_hdl_t hdl,
700 	ibt_async_code_t code, ibt_async_event_t *event)
701 {
702 	srpt_ioc_t		*ioc;
703 	srpt_channel_t		*ch;
704 
705 	switch (code) {
706 	case IBT_EVENT_PORT_UP:
707 		srpt_ioc_port_active(event);
708 		break;
709 
710 	case IBT_ERROR_PORT_DOWN:
711 		srpt_ioc_port_down(event);
712 		break;
713 
714 	case IBT_HCA_ATTACH_EVENT:
715 		rw_enter(&srpt_ctxt->sc_rwlock, RW_WRITER);
716 		ioc = srpt_ioc_init(event->ev_hca_guid);
717 
718 		if (ioc == NULL) {
719 			rw_exit(&srpt_ctxt->sc_rwlock);
720 			SRPT_DPRINTF_L1("ib_async_hdlr, HCA_ATTACH"
721 			    " event failed to initialize HCA (0x%016llx)",
722 			    (u_longlong_t)event->ev_hca_guid);
723 			return;
724 		}
725 		SRPT_DPRINTF_L2("HCA_ATTACH_EVENT: I/O Controller"
726 		    " ibt hdl (%p)",
727 		    (void *)ioc->ioc_ibt_hdl);
728 
729 		rw_enter(&ioc->ioc_rwlock, RW_WRITER);
730 		ioc->ioc_tgt_port = srpt_stp_alloc_port(ioc, ioc->ioc_guid);
731 		if (ioc->ioc_tgt_port == NULL) {
732 			SRPT_DPRINTF_L1("ioc_ib_async_hdlr, alloc SCSI "
733 			    "target port error for HCA (0x%016llx)",
734 			    (u_longlong_t)event->ev_hca_guid);
735 			rw_exit(&ioc->ioc_rwlock);
736 			srpt_ioc_fini(ioc);
737 			rw_exit(&srpt_ctxt->sc_rwlock);
738 			return;
739 		}
740 
741 		/*
742 		 * New HCA added with default SCSI Target Port, SRP service
743 		 * will be started when SCSI Target Port is brought
744 		 * on-line by STMF.
745 		 */
746 		srpt_ctxt->sc_num_iocs++;
747 		list_insert_tail(&srpt_ctxt->sc_ioc_list, ioc);
748 
749 		rw_exit(&ioc->ioc_rwlock);
750 		rw_exit(&srpt_ctxt->sc_rwlock);
751 		break;
752 
753 	case IBT_HCA_DETACH_EVENT:
754 		SRPT_DPRINTF_L1(
755 		    "ioc_iob_async_hdlr, HCA_DETACH_EVENT received.");
756 		break;
757 
758 	case IBT_EVENT_EMPTY_CHAN:
759 		/* Channel in ERROR state is now empty */
760 		ch = (srpt_channel_t *)ibt_get_chan_private(event->ev_chan_hdl);
761 		SRPT_DPRINTF_L3(
762 		    "ioc_iob_async_hdlr, received empty channel error on %p",
763 		    (void *)ch);
764 		break;
765 
766 	default:
767 		SRPT_DPRINTF_L2("ioc_ib_async_hdlr, event not "
768 		    "handled (%d)", code);
769 		break;
770 	}
771 }
772 
773 /*
774  * srpt_ioc_svc_bind()
775  */
776 ibt_status_t
777 srpt_ioc_svc_bind(srpt_target_port_t *tgt, uint_t portnum)
778 {
779 	ibt_status_t		status;
780 	srpt_hw_port_t		*port;
781 	ibt_hca_portinfo_t	*portinfo;
782 	uint_t			qportinfo_sz;
783 	uint_t			qportnum;
784 	ib_gid_t		new_gid;
785 	srpt_ioc_t		*ioc;
786 	srpt_session_t		sess;
787 
788 	ASSERT(tgt != NULL);
789 	ASSERT(tgt->tp_ioc != NULL);
790 	ioc = tgt->tp_ioc;
791 
792 	if (tgt->tp_ibt_svc_hdl == NULL) {
793 		SRPT_DPRINTF_L2("ioc_svc_bind, NULL SCSI target port"
794 		    " service");
795 		return (IBT_INVALID_PARAM);
796 	}
797 
798 	if (portnum == 0 || portnum > tgt->tp_nports) {
799 		SRPT_DPRINTF_L2("ioc_svc_bind, bad port (%d)", portnum);
800 		return (IBT_INVALID_PARAM);
801 	}
802 	status = ibt_query_hca_ports(ioc->ioc_ibt_hdl, portnum,
803 	    &portinfo, &qportnum, &qportinfo_sz);
804 	if (status != IBT_SUCCESS) {
805 		SRPT_DPRINTF_L1("ioc_svc_bind, query port error (%d)",
806 		    portnum);
807 		return (IBT_INVALID_PARAM);
808 	}
809 
810 	ASSERT(portinfo != NULL);
811 
812 	/*
813 	 * If port is not active do nothing, caller should attempt to bind
814 	 * after the port goes active.
815 	 */
816 	if (portinfo->p_linkstate != IBT_PORT_ACTIVE) {
817 		SRPT_DPRINTF_L2("ioc_svc_bind, port %d not in active state",
818 		    portnum);
819 		ibt_free_portinfo(portinfo, qportinfo_sz);
820 		return (IBT_HCA_PORT_NOT_ACTIVE);
821 	}
822 
823 	port    = &tgt->tp_hw_port[portnum-1];
824 	new_gid = portinfo->p_sgid_tbl[0];
825 	ibt_free_portinfo(portinfo, qportinfo_sz);
826 
827 	/*
828 	 * If previously bound and the port GID has changed,
829 	 * rebind to the new GID.
830 	 */
831 	if (port->hwp_bind_hdl != NULL) {
832 		if (new_gid.gid_guid != port->hwp_gid.gid_guid ||
833 		    new_gid.gid_prefix != port->hwp_gid.gid_prefix) {
834 			SRPT_DPRINTF_L2("ioc_svc_bind, unregister current"
835 			    " bind");
836 			(void) ibt_unbind_service(tgt->tp_ibt_svc_hdl,
837 			    port->hwp_bind_hdl);
838 			port->hwp_bind_hdl = NULL;
839 		}
840 	}
841 	SRPT_DPRINTF_L2("ioc_svc_bind, bind service, %016llx:%016llx",
842 	    (u_longlong_t)new_gid.gid_prefix,
843 	    (u_longlong_t)new_gid.gid_guid);
844 
845 	/*
846 	 * Pass SCSI Target Port as CM private data, the target will always
847 	 * exist while this service is bound.
848 	 */
849 	status = ibt_bind_service(tgt->tp_ibt_svc_hdl, new_gid, NULL, tgt,
850 	    &port->hwp_bind_hdl);
851 	if (status != IBT_SUCCESS && status != IBT_CM_SERVICE_EXISTS) {
852 		SRPT_DPRINTF_L1("ioc_svc_bind, bind error (%d)", status);
853 		return (status);
854 	}
855 	tgt->tp_num_active_ports++;
856 	port->hwp_gid.gid_prefix = new_gid.gid_prefix;
857 	port->hwp_gid.gid_guid = new_gid.gid_guid;
858 
859 	/* setting up a transient structure for the dtrace probe. */
860 	bzero(&sess, sizeof (srpt_session_t));
861 	ALIAS_STR(sess.ss_t_gid, new_gid.gid_prefix, new_gid.gid_guid);
862 	EUI_STR(sess.ss_t_name, tgt->tp_ibt_svc_id);
863 
864 	DTRACE_SRP_1(service__up, srpt_session_t, &sess);
865 
866 	return (IBT_SUCCESS);
867 }
868 
869 /*
870  * srpt_ioc_svc_unbind()
871  */
872 void
873 srpt_ioc_svc_unbind(srpt_target_port_t *tgt, uint_t portnum)
874 {
875 	srpt_hw_port_t		*port;
876 	srpt_session_t		sess;
877 	ibt_status_t		ret;
878 
879 	if (tgt == NULL) {
880 		SRPT_DPRINTF_L2("ioc_svc_unbind, SCSI target does not exist");
881 		return;
882 	}
883 
884 	if (portnum == 0 || portnum > tgt->tp_nports) {
885 		SRPT_DPRINTF_L2("ioc_svc_unbind, bad port (%d)", portnum);
886 		return;
887 	}
888 	port = &tgt->tp_hw_port[portnum-1];
889 
890 	/* setting up a transient structure for the dtrace probe. */
891 	bzero(&sess, sizeof (srpt_session_t));
892 	ALIAS_STR(sess.ss_t_gid, port->hwp_gid.gid_prefix,
893 	    port->hwp_gid.gid_guid);
894 	EUI_STR(sess.ss_t_name, tgt->tp_ibt_svc_id);
895 
896 	DTRACE_SRP_1(service__down, srpt_session_t, &sess);
897 
898 	if (tgt->tp_ibt_svc_hdl != NULL && port->hwp_bind_hdl != NULL) {
899 		SRPT_DPRINTF_L2("ioc_svc_unbind, unregister current bind");
900 		ret = ibt_unbind_service(tgt->tp_ibt_svc_hdl,
901 		    port->hwp_bind_hdl);
902 		if (ret != IBT_SUCCESS) {
903 			SRPT_DPRINTF_L1(
904 			    "ioc_svc_unbind, unregister port %d failed: %d",
905 			    portnum, ret);
906 		} else {
907 			port->hwp_bind_hdl = NULL;
908 			port->hwp_gid.gid_prefix = 0;
909 			port->hwp_gid.gid_guid = 0;
910 		}
911 	}
912 }
913 
914 /*
915  * srpt_ioc_svc_unbind_all()
916  */
917 void
918 srpt_ioc_svc_unbind_all(srpt_target_port_t *tgt)
919 {
920 	uint_t		portnum;
921 
922 	if (tgt == NULL) {
923 		SRPT_DPRINTF_L2("ioc_svc_unbind_all, NULL SCSI target port"
924 		    " specified");
925 		return;
926 	}
927 	for (portnum = 1; portnum <= tgt->tp_nports; portnum++) {
928 		srpt_ioc_svc_unbind(tgt, portnum);
929 	}
930 }
931 
932 /*
933  * srpt_ioc_get_locked()
934  *
935  * Requires srpt_ctxt->rw_lock be held outside of call.
936  */
937 srpt_ioc_t *
938 srpt_ioc_get_locked(ib_guid_t guid)
939 {
940 	srpt_ioc_t	*ioc;
941 
942 	ioc = list_head(&srpt_ctxt->sc_ioc_list);
943 	while (ioc != NULL) {
944 		if (ioc->ioc_guid == guid) {
945 			break;
946 		}
947 		ioc = list_next(&srpt_ctxt->sc_ioc_list, ioc);
948 	}
949 	return (ioc);
950 }
951 
952 /*
953  * srpt_ioc_get()
954  */
955 srpt_ioc_t *
956 srpt_ioc_get(ib_guid_t guid)
957 {
958 	srpt_ioc_t	*ioc;
959 
960 	rw_enter(&srpt_ctxt->sc_rwlock, RW_READER);
961 	ioc = srpt_ioc_get_locked(guid);
962 	rw_exit(&srpt_ctxt->sc_rwlock);
963 	return (ioc);
964 }
965 
966 /*
967  * srpt_ioc_post_recv_iu()
968  */
969 ibt_status_t
970 srpt_ioc_post_recv_iu(srpt_ioc_t *ioc, srpt_iu_t *iu)
971 {
972 	ibt_status_t		status;
973 	ibt_recv_wr_t		wr;
974 	uint_t			posted;
975 
976 	ASSERT(ioc != NULL);
977 	ASSERT(iu != NULL);
978 
979 	wr.wr_id  = (ibt_wrid_t)(uintptr_t)iu;
980 	wr.wr_nds = 1;
981 	wr.wr_sgl = &iu->iu_sge;
982 	posted    = 0;
983 
984 	status = ibt_post_srq(ioc->ioc_srq_hdl, &wr, 1, &posted);
985 	if (status != IBT_SUCCESS) {
986 		SRPT_DPRINTF_L2("ioc_post_recv_iu, post error (%d)",
987 		    status);
988 	}
989 	return (status);
990 }
991 
992 /*
993  * srpt_ioc_repost_recv_iu()
994  */
995 void
996 srpt_ioc_repost_recv_iu(srpt_ioc_t *ioc, srpt_iu_t *iu)
997 {
998 	srpt_channel_t		*ch;
999 	ibt_status_t		status;
1000 
1001 	ASSERT(iu != NULL);
1002 	ASSERT(mutex_owned(&iu->iu_lock));
1003 
1004 	/*
1005 	 * Some additional sanity checks while in debug state, all STMF
1006 	 * related task activities should be complete prior to returning
1007 	 * this IU to the available pool.
1008 	 */
1009 	ASSERT(iu->iu_stmf_task == NULL);
1010 	ASSERT(iu->iu_sq_posted_cnt == 0);
1011 
1012 	ch = iu->iu_ch;
1013 	iu->iu_ch = NULL;
1014 	iu->iu_num_rdescs = 0;
1015 	iu->iu_rdescs = NULL;
1016 	iu->iu_tot_xfer_len = 0;
1017 	iu->iu_tag = 0;
1018 	iu->iu_flags = 0;
1019 	iu->iu_sq_posted_cnt = 0;
1020 
1021 	status = srpt_ioc_post_recv_iu(ioc, iu);
1022 
1023 	if (status != IBT_SUCCESS) {
1024 		/*
1025 		 * Very bad, we should initiate a shutdown of the I/O
1026 		 * Controller here, off-lining any targets associated
1027 		 * with this I/O Controller (and therefore disconnecting
1028 		 * any logins that remain).
1029 		 *
1030 		 * In practice this should never happen so we put
1031 		 * the code near the bottom of the implementation list.
1032 		 */
1033 		SRPT_DPRINTF_L0("ioc_repost_recv_iu, error RX IU (%d)",
1034 		    status);
1035 		ASSERT(0);
1036 	} else if (ch != NULL) {
1037 		atomic_inc_32(&ch->ch_req_lim_delta);
1038 	}
1039 }
1040 
1041 /*
1042  * srpt_ioc_init_profile()
1043  *
1044  * SRP I/O Controller serialization lock must be held when this
1045  * routine is invoked.
1046  */
1047 void
1048 srpt_ioc_init_profile(srpt_ioc_t *ioc)
1049 {
1050 	srpt_ioc_opcap_mask_t		capmask = {0};
1051 
1052 	ASSERT(ioc != NULL);
1053 
1054 	ioc->ioc_profile.ioc_guid = h2b64(ioc->ioc_guid);
1055 	(void) memcpy(ioc->ioc_profile.ioc_id_string,
1056 	    "Solaris SRP Target 0.9a", 23);
1057 
1058 	/*
1059 	 * Note vendor ID and subsystem ID are 24 bit values.  Low order
1060 	 * 8 bits in vendor ID field is slot and is initialized to zero.
1061 	 * Low order 8 bits of subsystem ID is a reserved field and
1062 	 * initialized to zero.
1063 	 */
1064 	ioc->ioc_profile.ioc_vendorid =
1065 	    h2b32((uint32_t)(ioc->ioc_attr.hca_vendor_id << 8));
1066 	ioc->ioc_profile.ioc_deviceid =
1067 	    h2b32((uint32_t)ioc->ioc_attr.hca_device_id);
1068 	ioc->ioc_profile.ioc_device_ver =
1069 	    h2b16((uint16_t)ioc->ioc_attr.hca_version_id);
1070 	ioc->ioc_profile.ioc_subsys_vendorid =
1071 	    h2b32((uint32_t)(ioc->ioc_attr.hca_vendor_id << 8));
1072 	ioc->ioc_profile.ioc_subsys_id = h2b32(0);
1073 	ioc->ioc_profile.ioc_io_class = h2b16(SRP_REV_16A_IO_CLASS);
1074 	ioc->ioc_profile.ioc_io_subclass = h2b16(SRP_IO_SUBCLASS);
1075 	ioc->ioc_profile.ioc_protocol = h2b16(SRP_PROTOCOL);
1076 	ioc->ioc_profile.ioc_protocol_ver = h2b16(SRP_PROTOCOL_VERSION);
1077 	ioc->ioc_profile.ioc_send_msg_qdepth = h2b16(srpt_send_msg_depth);
1078 	ioc->ioc_profile.ioc_rdma_read_qdepth =
1079 	    ioc->ioc_attr.hca_max_rdma_out_chan;
1080 	ioc->ioc_profile.ioc_send_msg_sz = h2b32(SRPT_DEFAULT_SEND_MSG_SIZE);
1081 	ioc->ioc_profile.ioc_rdma_xfer_sz = h2b32(SRPT_DEFAULT_MAX_RDMA_SIZE);
1082 
1083 	capmask.bits.st = 1;	/* Messages can be sent to IOC */
1084 	capmask.bits.sf = 1;	/* Messages can be sent from IOC */
1085 	capmask.bits.rf = 1;	/* RDMA Reads can be sent from IOC */
1086 	capmask.bits.wf = 1;	/* RDMA Writes can be sent from IOC */
1087 	ioc->ioc_profile.ioc_ctrl_opcap_mask = capmask.mask;
1088 
1089 	/*
1090 	 * We currently only have one target, but if we had a list we would
1091 	 * go through that list and only count those that are ONLINE when
1092 	 * setting the services count and entries.
1093 	 */
1094 	if (ioc->ioc_tgt_port->tp_srp_enabled) {
1095 		ioc->ioc_profile.ioc_service_entries = 1;
1096 		ioc->ioc_svc.srv_id = h2b64(ioc->ioc_guid);
1097 		(void) snprintf((char *)ioc->ioc_svc.srv_name,
1098 		    IB_DM_MAX_SVC_NAME_LEN, "SRP.T10:%016llx",
1099 		    (u_longlong_t)ioc->ioc_guid);
1100 	} else {
1101 		ioc->ioc_profile.ioc_service_entries = 0;
1102 		ioc->ioc_svc.srv_id = 0;
1103 	}
1104 }
1105 
1106 /*
1107  * srpt_ioc_ds_alloc_dbuf()
1108  */
1109 /* ARGSUSED */
1110 stmf_data_buf_t *
1111 srpt_ioc_ds_alloc_dbuf(struct scsi_task *task, uint32_t size,
1112 	uint32_t *pminsize, uint32_t flags)
1113 {
1114 	srpt_iu_t		*iu;
1115 	srpt_ioc_t		*ioc;
1116 	srpt_ds_dbuf_t		*dbuf;
1117 	stmf_data_buf_t		*stmf_dbuf;
1118 	void			*buf;
1119 	srpt_mr_t		mr;
1120 
1121 	ASSERT(task != NULL);
1122 	iu  = task->task_port_private;
1123 	ioc = iu->iu_ioc;
1124 
1125 	SRPT_DPRINTF_L4("ioc_ds_alloc_dbuf, invoked ioc(%p)"
1126 	    " size(%d), flags(%x)",
1127 	    (void *)ioc, size, flags);
1128 
1129 	buf = srpt_vmem_alloc(ioc->ioc_dbuf_pool, size);
1130 	if (buf == NULL) {
1131 		return (NULL);
1132 	}
1133 
1134 	if (srpt_vmem_mr(ioc->ioc_dbuf_pool, buf, size, &mr) != 0) {
1135 		goto stmf_alloc_err;
1136 	}
1137 
1138 	stmf_dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, sizeof (srpt_ds_dbuf_t),
1139 	    0);
1140 	if (stmf_dbuf == NULL) {
1141 		SRPT_DPRINTF_L2("ioc_ds_alloc_dbuf, stmf_alloc failed");
1142 		goto stmf_alloc_err;
1143 	}
1144 
1145 	dbuf = stmf_dbuf->db_port_private;
1146 	dbuf->db_stmf_buf = stmf_dbuf;
1147 	dbuf->db_mr_hdl = mr.mr_hdl;
1148 	dbuf->db_ioc = ioc;
1149 	dbuf->db_sge.ds_va = mr.mr_va;
1150 	dbuf->db_sge.ds_key = mr.mr_lkey;
1151 	dbuf->db_sge.ds_len = size;
1152 
1153 	stmf_dbuf->db_buf_size = size;
1154 	stmf_dbuf->db_data_size = size;
1155 	stmf_dbuf->db_relative_offset = 0;
1156 	stmf_dbuf->db_flags = 0;
1157 	stmf_dbuf->db_xfer_status = 0;
1158 	stmf_dbuf->db_sglist_length = 1;
1159 	stmf_dbuf->db_sglist[0].seg_addr = buf;
1160 	stmf_dbuf->db_sglist[0].seg_length = size;
1161 
1162 	return (stmf_dbuf);
1163 
1164 buf_mr_err:
1165 	stmf_free(stmf_dbuf);
1166 
1167 stmf_alloc_err:
1168 	srpt_vmem_free(ioc->ioc_dbuf_pool, buf, size);
1169 
1170 	return (NULL);
1171 }
1172 
1173 void
1174 srpt_ioc_ds_free_dbuf(struct stmf_dbuf_store *ds,
1175 	stmf_data_buf_t *dbuf)
1176 {
1177 	srpt_ioc_t	*ioc;
1178 
1179 	SRPT_DPRINTF_L4("ioc_ds_free_dbuf, invoked buf (%p)",
1180 	    (void *)dbuf);
1181 	ioc = ds->ds_port_private;
1182 
1183 	srpt_vmem_free(ioc->ioc_dbuf_pool, dbuf->db_sglist[0].seg_addr,
1184 	    dbuf->db_buf_size);
1185 	stmf_free(dbuf);
1186 }
1187 
1188 /* Memory arena routines */
1189 
1190 static srpt_vmem_pool_t *
1191 srpt_vmem_create(const char *name, srpt_ioc_t *ioc, ib_memlen_t chunksize,
1192     uint64_t maxsize, ibt_mr_flags_t flags)
1193 {
1194 	srpt_mr_t		*chunk;
1195 	srpt_vmem_pool_t	*result;
1196 
1197 	ASSERT(chunksize <= maxsize);
1198 
1199 	result = kmem_zalloc(sizeof (srpt_vmem_pool_t), KM_SLEEP);
1200 
1201 	result->svp_ioc = ioc;
1202 	result->svp_chunksize = chunksize;
1203 	result->svp_max_size = maxsize;
1204 	result->svp_flags = flags;
1205 
1206 	rw_init(&result->svp_lock, NULL, RW_DRIVER, NULL);
1207 	avl_create(&result->svp_mr_list, srpt_vmem_mr_compare,
1208 	    sizeof (srpt_mr_t), offsetof(srpt_mr_t, mr_avl));
1209 
1210 	chunk = srpt_vmem_chunk_alloc(result, chunksize);
1211 
1212 	avl_add(&result->svp_mr_list, chunk);
1213 	result->svp_total_size = chunksize;
1214 
1215 	result->svp_vmem = vmem_create(name,
1216 	    (void*)(uintptr_t)chunk->mr_va,
1217 	    (size_t)chunk->mr_len, SRPT_MR_QUANTSIZE,
1218 	    NULL, NULL, NULL, 0, VM_SLEEP);
1219 
1220 	return (result);
1221 }
1222 
1223 static void
1224 srpt_vmem_destroy(srpt_vmem_pool_t *vm_pool)
1225 {
1226 	srpt_mr_t		*chunk;
1227 	srpt_mr_t		*next;
1228 
1229 	rw_enter(&vm_pool->svp_lock, RW_WRITER);
1230 	vmem_destroy(vm_pool->svp_vmem);
1231 
1232 	chunk = avl_first(&vm_pool->svp_mr_list);
1233 
1234 	while (chunk != NULL) {
1235 		next = AVL_NEXT(&vm_pool->svp_mr_list, chunk);
1236 		avl_remove(&vm_pool->svp_mr_list, chunk);
1237 		srpt_vmem_chunk_free(vm_pool, chunk);
1238 		chunk = next;
1239 	}
1240 
1241 	avl_destroy(&vm_pool->svp_mr_list);
1242 
1243 	rw_exit(&vm_pool->svp_lock);
1244 	rw_destroy(&vm_pool->svp_lock);
1245 
1246 	kmem_free(vm_pool, sizeof (srpt_vmem_pool_t));
1247 }
1248 
1249 static void *
1250 srpt_vmem_alloc(srpt_vmem_pool_t *vm_pool, size_t size)
1251 {
1252 	void		*result;
1253 	srpt_mr_t	*next;
1254 	ib_memlen_t	chunklen;
1255 
1256 	ASSERT(vm_pool != NULL);
1257 
1258 	result = vmem_alloc(vm_pool->svp_vmem, size,
1259 	    VM_NOSLEEP | VM_FIRSTFIT);
1260 
1261 	if (result != NULL) {
1262 		/* memory successfully allocated */
1263 		return (result);
1264 	}
1265 
1266 	/* need more vmem */
1267 	rw_enter(&vm_pool->svp_lock, RW_WRITER);
1268 	chunklen = vm_pool->svp_chunksize;
1269 
1270 	if (vm_pool->svp_total_size >= vm_pool->svp_max_size) {
1271 		/* no more room to alloc */
1272 		rw_exit(&vm_pool->svp_lock);
1273 		return (NULL);
1274 	}
1275 
1276 	if ((vm_pool->svp_total_size + chunklen) > vm_pool->svp_max_size) {
1277 		chunklen = vm_pool->svp_max_size - vm_pool->svp_total_size;
1278 	}
1279 
1280 	next = srpt_vmem_chunk_alloc(vm_pool, chunklen);
1281 	if (next != NULL) {
1282 		/*
1283 		 * Note that the size of the chunk we got
1284 		 * may not be the size we requested.  Use the
1285 		 * length returned in the chunk itself.
1286 		 */
1287 		if (vmem_add(vm_pool->svp_vmem, (void*)(uintptr_t)next->mr_va,
1288 		    next->mr_len, VM_NOSLEEP) == NULL) {
1289 			srpt_vmem_chunk_free(vm_pool, next);
1290 			SRPT_DPRINTF_L2("vmem_add failed");
1291 		} else {
1292 			vm_pool->svp_total_size += next->mr_len;
1293 			avl_add(&vm_pool->svp_mr_list, next);
1294 		}
1295 	}
1296 
1297 	rw_exit(&vm_pool->svp_lock);
1298 
1299 	result = vmem_alloc(vm_pool->svp_vmem, size, VM_NOSLEEP | VM_FIRSTFIT);
1300 
1301 	return (result);
1302 }
1303 
1304 static void
1305 srpt_vmem_free(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size)
1306 {
1307 	vmem_free(vm_pool->svp_vmem, vaddr, size);
1308 }
1309 
1310 static int
1311 srpt_vmem_mr(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size,
1312     srpt_mr_t *mr)
1313 {
1314 	avl_index_t		where;
1315 	ib_vaddr_t		mrva = (ib_vaddr_t)(uintptr_t)vaddr;
1316 	srpt_mr_t		chunk;
1317 	srpt_mr_t		*nearest;
1318 	ib_vaddr_t		chunk_end;
1319 	int			status = DDI_FAILURE;
1320 
1321 	rw_enter(&vm_pool->svp_lock, RW_READER);
1322 
1323 	chunk.mr_va = mrva;
1324 	nearest = avl_find(&vm_pool->svp_mr_list, &chunk, &where);
1325 
1326 	if (nearest == NULL) {
1327 		nearest = avl_nearest(&vm_pool->svp_mr_list, where,
1328 		    AVL_BEFORE);
1329 	}
1330 
1331 	if (nearest != NULL) {
1332 		/* Verify this chunk contains the specified address range */
1333 		ASSERT(nearest->mr_va <= mrva);
1334 
1335 		chunk_end = nearest->mr_va + nearest->mr_len;
1336 		if (chunk_end >= mrva + size) {
1337 			mr->mr_hdl = nearest->mr_hdl;
1338 			mr->mr_va = mrva;
1339 			mr->mr_len = size;
1340 			mr->mr_lkey = nearest->mr_lkey;
1341 			mr->mr_rkey = nearest->mr_rkey;
1342 			status = DDI_SUCCESS;
1343 		}
1344 	}
1345 
1346 	rw_exit(&vm_pool->svp_lock);
1347 	return (status);
1348 }
1349 
1350 static srpt_mr_t *
1351 srpt_vmem_chunk_alloc(srpt_vmem_pool_t *vm_pool, ib_memlen_t chunksize)
1352 {
1353 	void			*chunk = NULL;
1354 	srpt_mr_t		*result = NULL;
1355 
1356 	while ((chunk == NULL) && (chunksize >= SRPT_MIN_CHUNKSIZE)) {
1357 		chunk = kmem_alloc(chunksize, KM_NOSLEEP);
1358 		if (chunk == NULL) {
1359 			SRPT_DPRINTF_L2("srpt_vmem_chunk_alloc: "
1360 			    "failed to alloc chunk of %d, trying %d",
1361 			    (int)chunksize, (int)chunksize/2);
1362 			chunksize /= 2;
1363 		}
1364 	}
1365 
1366 	if (chunk != NULL) {
1367 		result = srpt_reg_mem(vm_pool, (ib_vaddr_t)(uintptr_t)chunk,
1368 		    chunksize);
1369 		if (result == NULL) {
1370 			SRPT_DPRINTF_L2("srpt_vmem_chunk_alloc: "
1371 			    "chunk registration failed");
1372 			kmem_free(chunk, chunksize);
1373 		}
1374 	}
1375 
1376 	return (result);
1377 }
1378 
1379 static void
1380 srpt_vmem_chunk_free(srpt_vmem_pool_t *vm_pool, srpt_mr_t *mr)
1381 {
1382 	void			*chunk = (void *)(uintptr_t)mr->mr_va;
1383 	ib_memlen_t		chunksize = mr->mr_len;
1384 
1385 	srpt_dereg_mem(vm_pool->svp_ioc, mr);
1386 	kmem_free(chunk, chunksize);
1387 }
1388 
1389 static srpt_mr_t *
1390 srpt_reg_mem(srpt_vmem_pool_t *vm_pool, ib_vaddr_t vaddr, ib_memlen_t len)
1391 {
1392 	srpt_mr_t		*result = NULL;
1393 	ibt_mr_attr_t		mr_attr;
1394 	ibt_mr_desc_t		mr_desc;
1395 	ibt_status_t		status;
1396 	srpt_ioc_t		*ioc = vm_pool->svp_ioc;
1397 
1398 	result = kmem_zalloc(sizeof (srpt_mr_t), KM_NOSLEEP);
1399 	if (result == NULL) {
1400 		SRPT_DPRINTF_L2("srpt_reg_mem: failed to allocate");
1401 		return (NULL);
1402 	}
1403 
1404 	bzero(&mr_attr, sizeof (ibt_mr_attr_t));
1405 	bzero(&mr_desc, sizeof (ibt_mr_desc_t));
1406 
1407 	mr_attr.mr_vaddr = vaddr;
1408 	mr_attr.mr_len = len;
1409 	mr_attr.mr_as = NULL;
1410 	mr_attr.mr_flags = vm_pool->svp_flags;
1411 
1412 	status = ibt_register_mr(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl,
1413 	    &mr_attr, &result->mr_hdl, &mr_desc);
1414 	if (status != IBT_SUCCESS) {
1415 		SRPT_DPRINTF_L2("srpt_reg_mem: ibt_register_mr "
1416 		    "failed %d", status);
1417 		kmem_free(result, sizeof (srpt_mr_t));
1418 		return (NULL);
1419 	}
1420 
1421 	result->mr_va = mr_attr.mr_vaddr;
1422 	result->mr_len = mr_attr.mr_len;
1423 	result->mr_lkey = mr_desc.md_lkey;
1424 	result->mr_rkey = mr_desc.md_rkey;
1425 
1426 	return (result);
1427 }
1428 
1429 static void
1430 srpt_dereg_mem(srpt_ioc_t *ioc, srpt_mr_t *mr)
1431 {
1432 	ibt_status_t		status;
1433 
1434 	status = ibt_deregister_mr(ioc->ioc_ibt_hdl, mr->mr_hdl);
1435 	if (status != IBT_SUCCESS) {
1436 		SRPT_DPRINTF_L1("ioc_fini, error deregistering MR (%d)",
1437 		    status);
1438 	}
1439 	kmem_free(mr, sizeof (srpt_mr_t));
1440 }
1441 
1442 static int
1443 srpt_vmem_mr_compare(const void *a, const void *b)
1444 {
1445 	srpt_mr_t		*mr1 = (srpt_mr_t *)a;
1446 	srpt_mr_t		*mr2 = (srpt_mr_t *)b;
1447 
1448 	/* sort and match by virtual address */
1449 	if (mr1->mr_va < mr2->mr_va) {
1450 		return (-1);
1451 	} else if (mr1->mr_va > mr2->mr_va) {
1452 		return (1);
1453 	}
1454 
1455 	return (0);
1456 }
1457