xref: /illumos-gate/usr/src/uts/common/io/ib/clients/iser/iser_resource.c (revision 30e7468f8f41aa30ada067b2c1d5d284046514da)
1*30e7468fSPeter Dunlap /*
2*30e7468fSPeter Dunlap  * CDDL HEADER START
3*30e7468fSPeter Dunlap  *
4*30e7468fSPeter Dunlap  * The contents of this file are subject to the terms of the
5*30e7468fSPeter Dunlap  * Common Development and Distribution License (the "License").
6*30e7468fSPeter Dunlap  * You may not use this file except in compliance with the License.
7*30e7468fSPeter Dunlap  *
8*30e7468fSPeter Dunlap  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*30e7468fSPeter Dunlap  * or http://www.opensolaris.org/os/licensing.
10*30e7468fSPeter Dunlap  * See the License for the specific language governing permissions
11*30e7468fSPeter Dunlap  * and limitations under the License.
12*30e7468fSPeter Dunlap  *
13*30e7468fSPeter Dunlap  * When distributing Covered Code, include this CDDL HEADER in each
14*30e7468fSPeter Dunlap  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*30e7468fSPeter Dunlap  * If applicable, add the following below this CDDL HEADER, with the
16*30e7468fSPeter Dunlap  * fields enclosed by brackets "[]" replaced with your own identifying
17*30e7468fSPeter Dunlap  * information: Portions Copyright [yyyy] [name of copyright owner]
18*30e7468fSPeter Dunlap  *
19*30e7468fSPeter Dunlap  * CDDL HEADER END
20*30e7468fSPeter Dunlap  */
21*30e7468fSPeter Dunlap /*
22*30e7468fSPeter Dunlap  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23*30e7468fSPeter Dunlap  * Use is subject to license terms.
24*30e7468fSPeter Dunlap  */
25*30e7468fSPeter Dunlap 
26*30e7468fSPeter Dunlap #include <sys/types.h>
27*30e7468fSPeter Dunlap #include <sys/ddi.h>
28*30e7468fSPeter Dunlap #include <sys/types.h>
29*30e7468fSPeter Dunlap #include <sys/socket.h>
30*30e7468fSPeter Dunlap #include <netinet/in.h>
31*30e7468fSPeter Dunlap #include <sys/sunddi.h>
32*30e7468fSPeter Dunlap #include <sys/sysmacros.h>
33*30e7468fSPeter Dunlap #include <sys/ib/ibtl/ibti.h>
34*30e7468fSPeter Dunlap #include <sys/ib/ibtl/ibtl_types.h>
35*30e7468fSPeter Dunlap 
36*30e7468fSPeter Dunlap #include <sys/ib/clients/iser/iser.h>
37*30e7468fSPeter Dunlap 
38*30e7468fSPeter Dunlap /*
39*30e7468fSPeter Dunlap  * iser_resource.c
40*30e7468fSPeter Dunlap  *    Routines for allocating resources for iSER
41*30e7468fSPeter Dunlap  */
42*30e7468fSPeter Dunlap 
43*30e7468fSPeter Dunlap static iser_mr_t *iser_vmem_chunk_alloc(iser_hca_t *hca, ib_memlen_t chunksize,
44*30e7468fSPeter Dunlap     ibt_mr_flags_t mr_flags);
45*30e7468fSPeter Dunlap 
46*30e7468fSPeter Dunlap static void iser_vmem_chunk_free(iser_hca_t *hca, iser_mr_t *iser_mr);
47*30e7468fSPeter Dunlap 
48*30e7468fSPeter Dunlap static iser_mr_t *iser_reg_mem(iser_hca_t *hca, ib_vaddr_t vaddr,
49*30e7468fSPeter Dunlap     ib_memlen_t len, ibt_mr_flags_t mr_flags);
50*30e7468fSPeter Dunlap 
51*30e7468fSPeter Dunlap static void iser_dereg_mem(iser_hca_t *hca, iser_mr_t *mr);
52*30e7468fSPeter Dunlap 
53*30e7468fSPeter Dunlap static int iser_vmem_mr_compare(const void *void_mr1, const void *void_mr2);
54*30e7468fSPeter Dunlap 
55*30e7468fSPeter Dunlap /*
56*30e7468fSPeter Dunlap  * iser_init_hca_caches()
57*30e7468fSPeter Dunlap  * Invoked per HCA instance initialization, to establish HCA-wide
58*30e7468fSPeter Dunlap  * message and buffer kmem caches. Note we'll uniquify cache names
59*30e7468fSPeter Dunlap  * with the lower 32-bits of the HCA GUID.
60*30e7468fSPeter Dunlap  */
61*30e7468fSPeter Dunlap void
62*30e7468fSPeter Dunlap iser_init_hca_caches(iser_hca_t *hca)
63*30e7468fSPeter Dunlap {
64*30e7468fSPeter Dunlap 	char name[ISER_CACHE_NAMELEN];
65*30e7468fSPeter Dunlap 
66*30e7468fSPeter Dunlap 	(void) snprintf(name, ISER_CACHE_NAMELEN, "iser_msg_pool_%08x",
67*30e7468fSPeter Dunlap 	    (uint32_t)(hca->hca_guid & 0xFFFFFFFF));
68*30e7468fSPeter Dunlap 	hca->hca_msg_pool = iser_vmem_create(name, hca, ISER_MSG_MR_CHUNKSIZE,
69*30e7468fSPeter Dunlap 	    ISER_MSG_POOL_MAX, ISER_MSG_MR_FLAGS);
70*30e7468fSPeter Dunlap 	(void) snprintf(name, ISER_CACHE_NAMELEN, "iser_msg_cache_%08x",
71*30e7468fSPeter Dunlap 	    (uint32_t)(hca->hca_guid & 0xFFFFFFFF));
72*30e7468fSPeter Dunlap 	hca->iser_msg_cache = kmem_cache_create(name, sizeof (iser_msg_t),
73*30e7468fSPeter Dunlap 	    0, &iser_msg_cache_constructor, &iser_msg_cache_destructor,
74*30e7468fSPeter Dunlap 	    NULL, hca, NULL, KM_SLEEP);
75*30e7468fSPeter Dunlap 
76*30e7468fSPeter Dunlap 	(void) snprintf(name, ISER_CACHE_NAMELEN, "iser_buf_pool_%08x",
77*30e7468fSPeter Dunlap 	    (uint32_t)(hca->hca_guid & 0xFFFFFFFF));
78*30e7468fSPeter Dunlap 	hca->hca_buf_pool = iser_vmem_create(name, hca, ISER_BUF_MR_CHUNKSIZE,
79*30e7468fSPeter Dunlap 	    ISER_BUF_POOL_MAX, ISER_BUF_MR_FLAGS);
80*30e7468fSPeter Dunlap 	(void) snprintf(name, ISER_CACHE_NAMELEN, "iser_buf_cache_%08x",
81*30e7468fSPeter Dunlap 	    (uint32_t)(hca->hca_guid & 0xFFFFFFFF));
82*30e7468fSPeter Dunlap 	hca->iser_buf_cache = kmem_cache_create(name, sizeof (iser_buf_t),
83*30e7468fSPeter Dunlap 	    0, &iser_buf_cache_constructor, &iser_buf_cache_destructor,
84*30e7468fSPeter Dunlap 	    NULL, hca, NULL, KM_SLEEP);
85*30e7468fSPeter Dunlap }
86*30e7468fSPeter Dunlap 
87*30e7468fSPeter Dunlap /*
88*30e7468fSPeter Dunlap  * iser_fini_hca_caches()
89*30e7468fSPeter Dunlap  * Invoked per HCA instance teardown, this routine cleans up the
90*30e7468fSPeter Dunlap  * message and buffer handle caches.
91*30e7468fSPeter Dunlap  */
92*30e7468fSPeter Dunlap void
93*30e7468fSPeter Dunlap iser_fini_hca_caches(iser_hca_t *hca)
94*30e7468fSPeter Dunlap {
95*30e7468fSPeter Dunlap 	kmem_cache_destroy(hca->iser_buf_cache);
96*30e7468fSPeter Dunlap 	iser_vmem_destroy(hca->hca_buf_pool);
97*30e7468fSPeter Dunlap 	kmem_cache_destroy(hca->iser_msg_cache);
98*30e7468fSPeter Dunlap 	iser_vmem_destroy(hca->hca_msg_pool);
99*30e7468fSPeter Dunlap }
100*30e7468fSPeter Dunlap 
101*30e7468fSPeter Dunlap /*
102*30e7468fSPeter Dunlap  * Allocate and initialize an iSER WR handle
103*30e7468fSPeter Dunlap  */
104*30e7468fSPeter Dunlap iser_wr_t *
105*30e7468fSPeter Dunlap iser_wr_get()
106*30e7468fSPeter Dunlap {
107*30e7468fSPeter Dunlap 	iser_wr_t	*iser_wr;
108*30e7468fSPeter Dunlap 
109*30e7468fSPeter Dunlap 	iser_wr = kmem_cache_alloc(iser_state->iser_wr_cache, KM_NOSLEEP);
110*30e7468fSPeter Dunlap 	if (iser_wr != NULL) {
111*30e7468fSPeter Dunlap 		iser_wr->iw_type = ISER_WR_UNDEFINED;
112*30e7468fSPeter Dunlap 		iser_wr->iw_msg  = NULL;
113*30e7468fSPeter Dunlap 		iser_wr->iw_buf  = NULL;
114*30e7468fSPeter Dunlap 		iser_wr->iw_pdu  = NULL;
115*30e7468fSPeter Dunlap 	}
116*30e7468fSPeter Dunlap 
117*30e7468fSPeter Dunlap 	return (iser_wr);
118*30e7468fSPeter Dunlap }
119*30e7468fSPeter Dunlap 
120*30e7468fSPeter Dunlap /*
121*30e7468fSPeter Dunlap  * Free an iSER WR handle back to the global cache
122*30e7468fSPeter Dunlap  */
123*30e7468fSPeter Dunlap void
124*30e7468fSPeter Dunlap iser_wr_free(iser_wr_t *iser_wr)
125*30e7468fSPeter Dunlap {
126*30e7468fSPeter Dunlap 	kmem_cache_free(iser_state->iser_wr_cache, iser_wr);
127*30e7468fSPeter Dunlap }
128*30e7468fSPeter Dunlap 
129*30e7468fSPeter Dunlap /*
130*30e7468fSPeter Dunlap  * iser_msg_cache_constructor()
131*30e7468fSPeter Dunlap  * Allocate and register memory for an iSER Control-type PDU message.
132*30e7468fSPeter Dunlap  * The cached objects will retain this memory registration in the HCA,
133*30e7468fSPeter Dunlap  * and thus provide a cache of pre-allocated and registered messages
134*30e7468fSPeter Dunlap  * for use in iSER.
135*30e7468fSPeter Dunlap  */
136*30e7468fSPeter Dunlap /* ARGSUSED */
137*30e7468fSPeter Dunlap int
138*30e7468fSPeter Dunlap iser_msg_cache_constructor(void *msg_void, void *arg, int flags)
139*30e7468fSPeter Dunlap {
140*30e7468fSPeter Dunlap 	void		*memp = NULL;
141*30e7468fSPeter Dunlap 	int		status;
142*30e7468fSPeter Dunlap 	iser_msg_t	*msg = (iser_msg_t *)msg_void;
143*30e7468fSPeter Dunlap 	iser_hca_t	*hca = (iser_hca_t *)arg;
144*30e7468fSPeter Dunlap 	iser_mr_t	mr;
145*30e7468fSPeter Dunlap 
146*30e7468fSPeter Dunlap 	memp = iser_vmem_alloc(hca->hca_msg_pool, ISER_MAX_CTRLPDU_LEN);
147*30e7468fSPeter Dunlap 	if (memp == NULL) {
148*30e7468fSPeter Dunlap 		ISER_LOG(CE_NOTE, "iser_msg_cache_constructor: "
149*30e7468fSPeter Dunlap 		    "failed to allocate backing memory");
150*30e7468fSPeter Dunlap 		return (DDI_FAILURE);
151*30e7468fSPeter Dunlap 	}
152*30e7468fSPeter Dunlap 
153*30e7468fSPeter Dunlap 	/* Fill in iser_mr for the memory we just allocated */
154*30e7468fSPeter Dunlap 	status = iser_vmem_mr(hca->hca_msg_pool, memp,
155*30e7468fSPeter Dunlap 	    ISER_MAX_CTRLPDU_LEN, &mr);
156*30e7468fSPeter Dunlap 	if (status != IDM_STATUS_SUCCESS) {
157*30e7468fSPeter Dunlap 		ISER_LOG(CE_NOTE, "iser_msg_cache_constructor: "
158*30e7468fSPeter Dunlap 		    "couldn't find mr for %p", memp);
159*30e7468fSPeter Dunlap 		iser_vmem_free(hca->hca_msg_pool, memp, ISER_MAX_CTRLPDU_LEN);
160*30e7468fSPeter Dunlap 		return (DDI_FAILURE);
161*30e7468fSPeter Dunlap 	}
162*30e7468fSPeter Dunlap 
163*30e7468fSPeter Dunlap 	msg->msg_ds.ds_va	= (ib_vaddr_t)(uintptr_t)memp;
164*30e7468fSPeter Dunlap 	msg->msg_ds.ds_key	= mr.is_mrlkey;
165*30e7468fSPeter Dunlap 
166*30e7468fSPeter Dunlap 	/* Set a backpointer to this cache to save a lookup on free */
167*30e7468fSPeter Dunlap 	msg->cache = hca->iser_msg_cache;
168*30e7468fSPeter Dunlap 
169*30e7468fSPeter Dunlap 	return (DDI_SUCCESS);
170*30e7468fSPeter Dunlap }
171*30e7468fSPeter Dunlap 
172*30e7468fSPeter Dunlap /*
173*30e7468fSPeter Dunlap  * Deregister and free registered memory from an iser_msg_t handle.
174*30e7468fSPeter Dunlap  */
175*30e7468fSPeter Dunlap void
176*30e7468fSPeter Dunlap iser_msg_cache_destructor(void *mr, void *arg)
177*30e7468fSPeter Dunlap {
178*30e7468fSPeter Dunlap 	iser_msg_t	*msg = (iser_msg_t *)mr;
179*30e7468fSPeter Dunlap 	iser_hca_t	*hca = (iser_hca_t *)arg;
180*30e7468fSPeter Dunlap 	uint8_t		*memp;
181*30e7468fSPeter Dunlap 
182*30e7468fSPeter Dunlap 	memp = (uint8_t *)(uintptr_t)(ib_vaddr_t)msg->msg_ds.ds_va;
183*30e7468fSPeter Dunlap 	iser_vmem_free(hca->hca_msg_pool, memp, ISER_MAX_CTRLPDU_LEN);
184*30e7468fSPeter Dunlap }
185*30e7468fSPeter Dunlap 
186*30e7468fSPeter Dunlap /*
187*30e7468fSPeter Dunlap  * Pull a msg handle off of hca's msg cache. If no object is available
188*30e7468fSPeter Dunlap  * on the cache, a new message buffer will be allocated and registered
189*30e7468fSPeter Dunlap  * with the HCA. Once freed, this message will not be unregistered, thus
190*30e7468fSPeter Dunlap  * building up a cache of pre-allocated and registered message buffers
191*30e7468fSPeter Dunlap  * over time.
192*30e7468fSPeter Dunlap  */
193*30e7468fSPeter Dunlap iser_msg_t *
194*30e7468fSPeter Dunlap iser_msg_get(iser_hca_t *hca, int num, int *ret)
195*30e7468fSPeter Dunlap {
196*30e7468fSPeter Dunlap 	iser_msg_t	*tmp, *msg = NULL;
197*30e7468fSPeter Dunlap 	int i;
198*30e7468fSPeter Dunlap 
199*30e7468fSPeter Dunlap 	ASSERT(hca != NULL);
200*30e7468fSPeter Dunlap 
201*30e7468fSPeter Dunlap 	/*
202*30e7468fSPeter Dunlap 	 * Pull num number of message handles off the cache, linking
203*30e7468fSPeter Dunlap 	 * them if more than one have been requested.
204*30e7468fSPeter Dunlap 	 */
205*30e7468fSPeter Dunlap 	for (i = 0; i < num; i++) {
206*30e7468fSPeter Dunlap 		tmp = kmem_cache_alloc(hca->iser_msg_cache, KM_NOSLEEP);
207*30e7468fSPeter Dunlap 		if (tmp == NULL) {
208*30e7468fSPeter Dunlap 			ISER_LOG(CE_NOTE, "iser_msg_get: alloc failed, "
209*30e7468fSPeter Dunlap 			    "requested (%d) allocated (%d)", num, i);
210*30e7468fSPeter Dunlap 			break;
211*30e7468fSPeter Dunlap 		}
212*30e7468fSPeter Dunlap 		tmp->msg_ds.ds_len	= ISER_MAX_CTRLPDU_LEN;
213*30e7468fSPeter Dunlap 		tmp->nextp = msg;
214*30e7468fSPeter Dunlap 		msg = tmp;
215*30e7468fSPeter Dunlap 	}
216*30e7468fSPeter Dunlap 
217*30e7468fSPeter Dunlap 	if (ret != NULL) {
218*30e7468fSPeter Dunlap 		*ret = i;
219*30e7468fSPeter Dunlap 	}
220*30e7468fSPeter Dunlap 
221*30e7468fSPeter Dunlap 	return (msg);
222*30e7468fSPeter Dunlap }
223*30e7468fSPeter Dunlap 
224*30e7468fSPeter Dunlap /*
225*30e7468fSPeter Dunlap  * Free this msg back to its cache, leaving the memory contained by
226*30e7468fSPeter Dunlap  * it registered for later re-use.
227*30e7468fSPeter Dunlap  */
228*30e7468fSPeter Dunlap void
229*30e7468fSPeter Dunlap iser_msg_free(iser_msg_t *msg)
230*30e7468fSPeter Dunlap {
231*30e7468fSPeter Dunlap 	kmem_cache_free(msg->cache, msg);
232*30e7468fSPeter Dunlap }
233*30e7468fSPeter Dunlap 
234*30e7468fSPeter Dunlap /*
235*30e7468fSPeter Dunlap  * iser_buf_cache_constructor()
236*30e7468fSPeter Dunlap  * Allocate and register memory for an iSER RDMA operation. The cached
237*30e7468fSPeter Dunlap  * objects will retain this memory registration in the HCA, and thus
238*30e7468fSPeter Dunlap  * provide a cache of pre-allocated and registered messages for use in
239*30e7468fSPeter Dunlap  * iSER.
240*30e7468fSPeter Dunlap  */
241*30e7468fSPeter Dunlap /* ARGSUSED */
242*30e7468fSPeter Dunlap int
243*30e7468fSPeter Dunlap iser_buf_cache_constructor(void *mr, void *arg, int flags)
244*30e7468fSPeter Dunlap {
245*30e7468fSPeter Dunlap 	uint8_t		*memp;
246*30e7468fSPeter Dunlap 	idm_status_t	status;
247*30e7468fSPeter Dunlap 	iser_buf_t	*iser_buf = (iser_buf_t *)mr;
248*30e7468fSPeter Dunlap 	iser_hca_t	*hca = (iser_hca_t *)arg;
249*30e7468fSPeter Dunlap 
250*30e7468fSPeter Dunlap 	/* Allocate an iser_mr handle for this buffer */
251*30e7468fSPeter Dunlap 	iser_buf->iser_mr = kmem_zalloc(sizeof (iser_mr_t), KM_NOSLEEP);
252*30e7468fSPeter Dunlap 	if (iser_buf->iser_mr == NULL) {
253*30e7468fSPeter Dunlap 		ISER_LOG(CE_NOTE, "iser_buf_cache_constructor: "
254*30e7468fSPeter Dunlap 		    "failed to allocate memory for iser_mr handle");
255*30e7468fSPeter Dunlap 		return (DDI_FAILURE);
256*30e7468fSPeter Dunlap 	}
257*30e7468fSPeter Dunlap 
258*30e7468fSPeter Dunlap 	memp = iser_vmem_alloc(hca->hca_buf_pool, ISER_DEFAULT_BUFLEN);
259*30e7468fSPeter Dunlap 	if (memp == NULL) {
260*30e7468fSPeter Dunlap 		kmem_free(iser_buf->iser_mr, sizeof (iser_mr_t));
261*30e7468fSPeter Dunlap 		return (DDI_FAILURE);
262*30e7468fSPeter Dunlap 	}
263*30e7468fSPeter Dunlap 
264*30e7468fSPeter Dunlap 	/* Fill in iser_mr for the memory we just allocated */
265*30e7468fSPeter Dunlap 	status = iser_vmem_mr(hca->hca_buf_pool, memp, ISER_DEFAULT_BUFLEN,
266*30e7468fSPeter Dunlap 	    iser_buf->iser_mr);
267*30e7468fSPeter Dunlap 
268*30e7468fSPeter Dunlap 	if (status != IDM_STATUS_SUCCESS) {
269*30e7468fSPeter Dunlap 		return (DDI_FAILURE);
270*30e7468fSPeter Dunlap 	}
271*30e7468fSPeter Dunlap 
272*30e7468fSPeter Dunlap 	/* Set buf pointer and len for later manipulation (if necessary) */
273*30e7468fSPeter Dunlap 	iser_buf->buf		= (uint64_t *)(uintptr_t)memp;
274*30e7468fSPeter Dunlap 	iser_buf->buflen	= ISER_DEFAULT_BUFLEN;
275*30e7468fSPeter Dunlap 
276*30e7468fSPeter Dunlap 	/* Populate the SGE Vaddr and L_key for the xfer operation later */
277*30e7468fSPeter Dunlap 	iser_buf->buf_ds.ds_va	= iser_buf->iser_mr->is_mrva;
278*30e7468fSPeter Dunlap 	iser_buf->buf_ds.ds_key	= iser_buf->iser_mr->is_mrlkey;
279*30e7468fSPeter Dunlap 
280*30e7468fSPeter Dunlap 	/* Set a backpointer to this cache to save a lookup on free */
281*30e7468fSPeter Dunlap 	iser_buf->cache = hca->iser_buf_cache;
282*30e7468fSPeter Dunlap 
283*30e7468fSPeter Dunlap 	gethrestime(&iser_buf->buf_constructed);
284*30e7468fSPeter Dunlap 
285*30e7468fSPeter Dunlap 	return (DDI_SUCCESS);
286*30e7468fSPeter Dunlap }
287*30e7468fSPeter Dunlap 
288*30e7468fSPeter Dunlap /*
289*30e7468fSPeter Dunlap  * Deregister and free registered memory from an iser_buf_t handle.
290*30e7468fSPeter Dunlap  */
291*30e7468fSPeter Dunlap void
292*30e7468fSPeter Dunlap iser_buf_cache_destructor(void *mr, void *arg)
293*30e7468fSPeter Dunlap {
294*30e7468fSPeter Dunlap 	iser_buf_t	*iser_buf = (iser_buf_t *)mr;
295*30e7468fSPeter Dunlap 	iser_hca_t	*hca = (iser_hca_t *)arg;
296*30e7468fSPeter Dunlap 
297*30e7468fSPeter Dunlap 	gethrestime(&iser_buf->buf_destructed);
298*30e7468fSPeter Dunlap 
299*30e7468fSPeter Dunlap 	iser_vmem_free(hca->hca_buf_pool, iser_buf->buf, iser_buf->buflen);
300*30e7468fSPeter Dunlap 
301*30e7468fSPeter Dunlap 	kmem_free(iser_buf->iser_mr, sizeof (iser_mr_t));
302*30e7468fSPeter Dunlap }
303*30e7468fSPeter Dunlap 
304*30e7468fSPeter Dunlap /*
305*30e7468fSPeter Dunlap  * Registration for initiator buffers
306*30e7468fSPeter Dunlap  */
307*30e7468fSPeter Dunlap int
308*30e7468fSPeter Dunlap iser_reg_rdma_mem(iser_hca_t *hca, idm_buf_t *idb)
309*30e7468fSPeter Dunlap {
310*30e7468fSPeter Dunlap 	iser_mr_t	*iser_mr = NULL;
311*30e7468fSPeter Dunlap 
312*30e7468fSPeter Dunlap 	ASSERT(idb != NULL);
313*30e7468fSPeter Dunlap 	ASSERT(idb->idb_buflen > 0);
314*30e7468fSPeter Dunlap 
315*30e7468fSPeter Dunlap 	iser_mr = iser_reg_mem(hca, (ib_vaddr_t)(uintptr_t)idb->idb_buf,
316*30e7468fSPeter Dunlap 	    idb->idb_buflen, ISER_BUF_MR_FLAGS | IBT_MR_NOSLEEP);
317*30e7468fSPeter Dunlap 	if (iser_mr == NULL) {
318*30e7468fSPeter Dunlap 		ISER_LOG(CE_NOTE, "iser_reg_rdma_mem: failed to register "
319*30e7468fSPeter Dunlap 		    "memory for idm_buf_t");
320*30e7468fSPeter Dunlap 		return (DDI_FAILURE);
321*30e7468fSPeter Dunlap 	}
322*30e7468fSPeter Dunlap 
323*30e7468fSPeter Dunlap 	idb->idb_reg_private	= (void *)iser_mr;
324*30e7468fSPeter Dunlap 
325*30e7468fSPeter Dunlap 	return (DDI_SUCCESS);
326*30e7468fSPeter Dunlap }
327*30e7468fSPeter Dunlap 
328*30e7468fSPeter Dunlap void
329*30e7468fSPeter Dunlap iser_dereg_rdma_mem(iser_hca_t *hca, idm_buf_t *idb)
330*30e7468fSPeter Dunlap {
331*30e7468fSPeter Dunlap 	iser_mr_t	*mr;
332*30e7468fSPeter Dunlap 
333*30e7468fSPeter Dunlap 	ASSERT(idb != NULL);
334*30e7468fSPeter Dunlap 	mr = (iser_mr_t *)idb->idb_reg_private;
335*30e7468fSPeter Dunlap 
336*30e7468fSPeter Dunlap 	iser_dereg_mem(hca, mr);
337*30e7468fSPeter Dunlap }
338*30e7468fSPeter Dunlap 
339*30e7468fSPeter Dunlap iser_vmem_mr_pool_t *
340*30e7468fSPeter Dunlap iser_vmem_create(const char *name, iser_hca_t *hca, ib_memlen_t chunksize,
341*30e7468fSPeter Dunlap     uint64_t max_total_size, ibt_mr_flags_t arena_mr_flags)
342*30e7468fSPeter Dunlap {
343*30e7468fSPeter Dunlap 	iser_mr_t		*first_chunk;
344*30e7468fSPeter Dunlap 	iser_vmem_mr_pool_t	*result;
345*30e7468fSPeter Dunlap 
346*30e7468fSPeter Dunlap 	ASSERT(chunksize <= max_total_size);
347*30e7468fSPeter Dunlap 	result = kmem_zalloc(sizeof (*result), KM_SLEEP);
348*30e7468fSPeter Dunlap 	result->ivmp_hca = hca;
349*30e7468fSPeter Dunlap 	result->ivmp_mr_flags = arena_mr_flags;
350*30e7468fSPeter Dunlap 	result->ivmp_chunksize = chunksize;
351*30e7468fSPeter Dunlap 	result->ivmp_max_total_size = max_total_size;
352*30e7468fSPeter Dunlap 	mutex_init(&result->ivmp_mutex, NULL, MUTEX_DRIVER, NULL);
353*30e7468fSPeter Dunlap 	avl_create(&result->ivmp_mr_list, iser_vmem_mr_compare,
354*30e7468fSPeter Dunlap 	    sizeof (iser_mr_t), offsetof(iser_mr_t, is_avl_ln));
355*30e7468fSPeter Dunlap 
356*30e7468fSPeter Dunlap 	first_chunk = iser_vmem_chunk_alloc(hca, chunksize,
357*30e7468fSPeter Dunlap 	    arena_mr_flags | IBT_MR_SLEEP);
358*30e7468fSPeter Dunlap 
359*30e7468fSPeter Dunlap 	avl_add(&result->ivmp_mr_list, first_chunk);
360*30e7468fSPeter Dunlap 	result->ivmp_total_size += chunksize;
361*30e7468fSPeter Dunlap 
362*30e7468fSPeter Dunlap 	result->ivmp_vmem = vmem_create(name,
363*30e7468fSPeter Dunlap 	    (void *)(uintptr_t)first_chunk->is_mrva,
364*30e7468fSPeter Dunlap 	    (size_t)first_chunk->is_mrlen, ISER_MR_QUANTSIZE,
365*30e7468fSPeter Dunlap 	    NULL, NULL, NULL, 0, VM_SLEEP);
366*30e7468fSPeter Dunlap 
367*30e7468fSPeter Dunlap 	return (result);
368*30e7468fSPeter Dunlap }
369*30e7468fSPeter Dunlap 
370*30e7468fSPeter Dunlap void
371*30e7468fSPeter Dunlap iser_vmem_destroy(iser_vmem_mr_pool_t *vmr_pool)
372*30e7468fSPeter Dunlap {
373*30e7468fSPeter Dunlap 	iser_mr_t	*chunk, *next_chunk;
374*30e7468fSPeter Dunlap 
375*30e7468fSPeter Dunlap 	mutex_enter(&vmr_pool->ivmp_mutex);
376*30e7468fSPeter Dunlap 	vmem_destroy(vmr_pool->ivmp_vmem);
377*30e7468fSPeter Dunlap 
378*30e7468fSPeter Dunlap 	for (chunk = avl_first(&vmr_pool->ivmp_mr_list); chunk != NULL;
379*30e7468fSPeter Dunlap 	    chunk = next_chunk) {
380*30e7468fSPeter Dunlap 		next_chunk = AVL_NEXT(&vmr_pool->ivmp_mr_list, chunk);
381*30e7468fSPeter Dunlap 		avl_remove(&vmr_pool->ivmp_mr_list, chunk);
382*30e7468fSPeter Dunlap 		iser_vmem_chunk_free(vmr_pool->ivmp_hca, chunk);
383*30e7468fSPeter Dunlap 	}
384*30e7468fSPeter Dunlap 	mutex_exit(&vmr_pool->ivmp_mutex);
385*30e7468fSPeter Dunlap 
386*30e7468fSPeter Dunlap 	avl_destroy(&vmr_pool->ivmp_mr_list);
387*30e7468fSPeter Dunlap 	mutex_destroy(&vmr_pool->ivmp_mutex);
388*30e7468fSPeter Dunlap 
389*30e7468fSPeter Dunlap 	kmem_free(vmr_pool, sizeof (*vmr_pool));
390*30e7468fSPeter Dunlap }
391*30e7468fSPeter Dunlap 
392*30e7468fSPeter Dunlap void *
393*30e7468fSPeter Dunlap iser_vmem_alloc(iser_vmem_mr_pool_t *vmr_pool, size_t size)
394*30e7468fSPeter Dunlap {
395*30e7468fSPeter Dunlap 	void		*result;
396*30e7468fSPeter Dunlap 	iser_mr_t	*next_chunk;
397*30e7468fSPeter Dunlap 	ib_memlen_t	chunk_len;
398*30e7468fSPeter Dunlap 	result = vmem_alloc(vmr_pool->ivmp_vmem, size,
399*30e7468fSPeter Dunlap 	    VM_NOSLEEP | VM_FIRSTFIT);
400*30e7468fSPeter Dunlap 	if (result == NULL) {
401*30e7468fSPeter Dunlap 		mutex_enter(&vmr_pool->ivmp_mutex);
402*30e7468fSPeter Dunlap 		chunk_len = vmr_pool->ivmp_chunksize;
403*30e7468fSPeter Dunlap 		if ((vmr_pool->ivmp_total_size + chunk_len) >
404*30e7468fSPeter Dunlap 		    vmr_pool->ivmp_max_total_size) {
405*30e7468fSPeter Dunlap 			/*
406*30e7468fSPeter Dunlap 			 * Don't go over the pool size limit.  We can allocate
407*30e7468fSPeter Dunlap 			 * partial chunks so it's not always the case that
408*30e7468fSPeter Dunlap 			 * current_size + chunk_size == max_total_size
409*30e7468fSPeter Dunlap 			 */
410*30e7468fSPeter Dunlap 			if (vmr_pool->ivmp_total_size >=
411*30e7468fSPeter Dunlap 			    vmr_pool->ivmp_max_total_size) {
412*30e7468fSPeter Dunlap 				mutex_exit(&vmr_pool->ivmp_mutex);
413*30e7468fSPeter Dunlap 				return (NULL);
414*30e7468fSPeter Dunlap 			} else {
415*30e7468fSPeter Dunlap 				chunk_len = vmr_pool->ivmp_max_total_size -
416*30e7468fSPeter Dunlap 				    vmr_pool->ivmp_total_size;
417*30e7468fSPeter Dunlap 			}
418*30e7468fSPeter Dunlap 		}
419*30e7468fSPeter Dunlap 		next_chunk = iser_vmem_chunk_alloc(vmr_pool->ivmp_hca,
420*30e7468fSPeter Dunlap 		    chunk_len, vmr_pool->ivmp_mr_flags | IBT_MR_NOSLEEP);
421*30e7468fSPeter Dunlap 		if (next_chunk != NULL) {
422*30e7468fSPeter Dunlap 			if (vmem_add(vmr_pool->ivmp_vmem,
423*30e7468fSPeter Dunlap 			    (void *)(uintptr_t)next_chunk->is_mrva,
424*30e7468fSPeter Dunlap 			    next_chunk->is_mrlen, VM_NOSLEEP) == NULL) {
425*30e7468fSPeter Dunlap 				/* Free the chunk we just allocated */
426*30e7468fSPeter Dunlap 				iser_vmem_chunk_free(vmr_pool->ivmp_hca,
427*30e7468fSPeter Dunlap 				    next_chunk);
428*30e7468fSPeter Dunlap 			} else {
429*30e7468fSPeter Dunlap 				vmr_pool->ivmp_total_size +=
430*30e7468fSPeter Dunlap 				    next_chunk->is_mrlen;
431*30e7468fSPeter Dunlap 				avl_add(&vmr_pool->ivmp_mr_list, next_chunk);
432*30e7468fSPeter Dunlap 			}
433*30e7468fSPeter Dunlap 
434*30e7468fSPeter Dunlap 			result = vmem_alloc(vmr_pool->ivmp_vmem, size,
435*30e7468fSPeter Dunlap 			    VM_NOSLEEP | VM_FIRSTFIT);
436*30e7468fSPeter Dunlap 		}
437*30e7468fSPeter Dunlap 
438*30e7468fSPeter Dunlap 		mutex_exit(&vmr_pool->ivmp_mutex);
439*30e7468fSPeter Dunlap 	}
440*30e7468fSPeter Dunlap 
441*30e7468fSPeter Dunlap 	return (result);
442*30e7468fSPeter Dunlap }
443*30e7468fSPeter Dunlap 
444*30e7468fSPeter Dunlap 
445*30e7468fSPeter Dunlap void
446*30e7468fSPeter Dunlap iser_vmem_free(iser_vmem_mr_pool_t *vmr_pool, void *vaddr, size_t size)
447*30e7468fSPeter Dunlap {
448*30e7468fSPeter Dunlap 	vmem_free(vmr_pool->ivmp_vmem, vaddr, size);
449*30e7468fSPeter Dunlap }
450*30e7468fSPeter Dunlap 
451*30e7468fSPeter Dunlap idm_status_t
452*30e7468fSPeter Dunlap iser_vmem_mr(iser_vmem_mr_pool_t *vmr_pool, void *vaddr, size_t size,
453*30e7468fSPeter Dunlap     iser_mr_t *mr)
454*30e7468fSPeter Dunlap {
455*30e7468fSPeter Dunlap 	avl_index_t	where;
456*30e7468fSPeter Dunlap 	ib_vaddr_t	mrva = (ib_vaddr_t)(uintptr_t)vaddr;
457*30e7468fSPeter Dunlap 	iser_mr_t	search_chunk;
458*30e7468fSPeter Dunlap 	iser_mr_t	*nearest_chunk;
459*30e7468fSPeter Dunlap 	ib_vaddr_t	chunk_end;
460*30e7468fSPeter Dunlap 
461*30e7468fSPeter Dunlap 	mutex_enter(&vmr_pool->ivmp_mutex);
462*30e7468fSPeter Dunlap 	search_chunk.is_mrva = mrva;
463*30e7468fSPeter Dunlap 	nearest_chunk = avl_find(&vmr_pool->ivmp_mr_list, &search_chunk,
464*30e7468fSPeter Dunlap 	    &where);
465*30e7468fSPeter Dunlap 	if (nearest_chunk == NULL) {
466*30e7468fSPeter Dunlap 		nearest_chunk = avl_nearest(&vmr_pool->ivmp_mr_list, where,
467*30e7468fSPeter Dunlap 		    AVL_BEFORE);
468*30e7468fSPeter Dunlap 		if (nearest_chunk == NULL) {
469*30e7468fSPeter Dunlap 			mutex_exit(&vmr_pool->ivmp_mutex);
470*30e7468fSPeter Dunlap 			return (IDM_STATUS_FAIL);
471*30e7468fSPeter Dunlap 		}
472*30e7468fSPeter Dunlap 	}
473*30e7468fSPeter Dunlap 
474*30e7468fSPeter Dunlap 	/* See if this chunk contains the specified address range */
475*30e7468fSPeter Dunlap 	ASSERT(nearest_chunk->is_mrva <= mrva);
476*30e7468fSPeter Dunlap 	chunk_end = nearest_chunk->is_mrva + nearest_chunk->is_mrlen;
477*30e7468fSPeter Dunlap 	if (chunk_end >= mrva + size) {
478*30e7468fSPeter Dunlap 		/* Yes, this chunk contains the address range */
479*30e7468fSPeter Dunlap 		mr->is_mrhdl = nearest_chunk->is_mrhdl;
480*30e7468fSPeter Dunlap 		mr->is_mrva = mrva;
481*30e7468fSPeter Dunlap 		mr->is_mrlen = size;
482*30e7468fSPeter Dunlap 		mr->is_mrlkey = nearest_chunk->is_mrlkey;
483*30e7468fSPeter Dunlap 		mr->is_mrrkey = nearest_chunk->is_mrrkey;
484*30e7468fSPeter Dunlap 		mutex_exit(&vmr_pool->ivmp_mutex);
485*30e7468fSPeter Dunlap 		return (IDM_STATUS_SUCCESS);
486*30e7468fSPeter Dunlap 	}
487*30e7468fSPeter Dunlap 	mutex_exit(&vmr_pool->ivmp_mutex);
488*30e7468fSPeter Dunlap 
489*30e7468fSPeter Dunlap 	return (IDM_STATUS_FAIL);
490*30e7468fSPeter Dunlap }
491*30e7468fSPeter Dunlap 
492*30e7468fSPeter Dunlap static iser_mr_t *
493*30e7468fSPeter Dunlap iser_vmem_chunk_alloc(iser_hca_t *hca, ib_memlen_t chunksize,
494*30e7468fSPeter Dunlap     ibt_mr_flags_t mr_flags)
495*30e7468fSPeter Dunlap {
496*30e7468fSPeter Dunlap 	void		*chunk = NULL;
497*30e7468fSPeter Dunlap 	iser_mr_t	*result = NULL;
498*30e7468fSPeter Dunlap 	int		km_flags = 0;
499*30e7468fSPeter Dunlap 
500*30e7468fSPeter Dunlap 	if (mr_flags & IBT_MR_NOSLEEP)
501*30e7468fSPeter Dunlap 		km_flags |= KM_NOSLEEP;
502*30e7468fSPeter Dunlap 
503*30e7468fSPeter Dunlap 	while ((chunk == NULL) && (chunksize >= ISER_MIN_CHUNKSIZE)) {
504*30e7468fSPeter Dunlap 		chunk = kmem_alloc(chunksize, km_flags);
505*30e7468fSPeter Dunlap 		if (chunk == NULL) {
506*30e7468fSPeter Dunlap 			ISER_LOG(CE_NOTE, "iser_vmem_chunk_alloc: "
507*30e7468fSPeter Dunlap 			    "chunk alloc of %d failed, trying %d",
508*30e7468fSPeter Dunlap 			    (int)chunksize, (int)(chunksize / 2));
509*30e7468fSPeter Dunlap 			chunksize /= 2;
510*30e7468fSPeter Dunlap 		} else {
511*30e7468fSPeter Dunlap 			ISER_LOG(CE_NOTE, "iser_vmem_chunk_alloc: "
512*30e7468fSPeter Dunlap 			    "New chunk %p size %d", chunk, (int)chunksize);
513*30e7468fSPeter Dunlap 		}
514*30e7468fSPeter Dunlap 	}
515*30e7468fSPeter Dunlap 
516*30e7468fSPeter Dunlap 	if (chunk != NULL) {
517*30e7468fSPeter Dunlap 		result = iser_reg_mem(hca, (ib_vaddr_t)(uintptr_t)chunk,
518*30e7468fSPeter Dunlap 		    chunksize, mr_flags);
519*30e7468fSPeter Dunlap 		if (result == NULL) {
520*30e7468fSPeter Dunlap 			ISER_LOG(CE_NOTE, "iser_vmem_chunk_alloc: "
521*30e7468fSPeter Dunlap 			    "Chunk registration failed");
522*30e7468fSPeter Dunlap 			kmem_free(chunk, chunksize);
523*30e7468fSPeter Dunlap 		}
524*30e7468fSPeter Dunlap 	}
525*30e7468fSPeter Dunlap 
526*30e7468fSPeter Dunlap 	return (result);
527*30e7468fSPeter Dunlap }
528*30e7468fSPeter Dunlap 
529*30e7468fSPeter Dunlap static void
530*30e7468fSPeter Dunlap iser_vmem_chunk_free(iser_hca_t *hca, iser_mr_t *iser_mr)
531*30e7468fSPeter Dunlap {
532*30e7468fSPeter Dunlap 	void		*chunk		= (void *)(uintptr_t)iser_mr->is_mrva;
533*30e7468fSPeter Dunlap 	ib_memlen_t	chunksize	= iser_mr->is_mrlen;
534*30e7468fSPeter Dunlap 
535*30e7468fSPeter Dunlap 	iser_dereg_mem(hca, iser_mr);
536*30e7468fSPeter Dunlap 
537*30e7468fSPeter Dunlap 	kmem_free(chunk, chunksize);
538*30e7468fSPeter Dunlap }
539*30e7468fSPeter Dunlap 
540*30e7468fSPeter Dunlap iser_mr_t *
541*30e7468fSPeter Dunlap iser_reg_mem(iser_hca_t *hca, ib_vaddr_t vaddr, ib_memlen_t len,
542*30e7468fSPeter Dunlap     ibt_mr_flags_t mr_flags)
543*30e7468fSPeter Dunlap {
544*30e7468fSPeter Dunlap 	iser_mr_t	*result = NULL;
545*30e7468fSPeter Dunlap 	ibt_mr_attr_t   mr_attr;
546*30e7468fSPeter Dunlap 	ibt_mr_desc_t	mr_desc;
547*30e7468fSPeter Dunlap 	ibt_status_t	status;
548*30e7468fSPeter Dunlap 	int		km_flags = 0;
549*30e7468fSPeter Dunlap 
550*30e7468fSPeter Dunlap 	if (mr_flags & IBT_MR_NOSLEEP)
551*30e7468fSPeter Dunlap 		mr_flags |= KM_NOSLEEP;
552*30e7468fSPeter Dunlap 
553*30e7468fSPeter Dunlap 	result = (iser_mr_t *)kmem_zalloc(sizeof (iser_mr_t), km_flags);
554*30e7468fSPeter Dunlap 	if (result == NULL) {
555*30e7468fSPeter Dunlap 		ISER_LOG(CE_NOTE, "iser_reg_mem: failed to allocate "
556*30e7468fSPeter Dunlap 		    "memory for iser_mr handle");
557*30e7468fSPeter Dunlap 		return (NULL);
558*30e7468fSPeter Dunlap 	}
559*30e7468fSPeter Dunlap 
560*30e7468fSPeter Dunlap 	bzero(&mr_attr, sizeof (ibt_mr_attr_t));
561*30e7468fSPeter Dunlap 	bzero(&mr_desc, sizeof (ibt_mr_desc_t));
562*30e7468fSPeter Dunlap 
563*30e7468fSPeter Dunlap 	mr_attr.mr_vaddr	= vaddr;
564*30e7468fSPeter Dunlap 	mr_attr.mr_len		= len;
565*30e7468fSPeter Dunlap 	mr_attr.mr_as		= NULL;
566*30e7468fSPeter Dunlap 	mr_attr.mr_flags	= mr_flags;
567*30e7468fSPeter Dunlap 
568*30e7468fSPeter Dunlap 	status = ibt_register_mr(hca->hca_hdl, hca->hca_pdhdl, &mr_attr,
569*30e7468fSPeter Dunlap 	    &result->is_mrhdl, &mr_desc);
570*30e7468fSPeter Dunlap 	if (status != IBT_SUCCESS) {
571*30e7468fSPeter Dunlap 		ISER_LOG(CE_NOTE, "iser_reg_mem: ibt_register_mr "
572*30e7468fSPeter Dunlap 		    "failure (%d)", status);
573*30e7468fSPeter Dunlap 		kmem_free(result, sizeof (iser_mr_t));
574*30e7468fSPeter Dunlap 		return (NULL);
575*30e7468fSPeter Dunlap 	}
576*30e7468fSPeter Dunlap 
577*30e7468fSPeter Dunlap 	result->is_mrva		= mr_attr.mr_vaddr;
578*30e7468fSPeter Dunlap 	result->is_mrlen	= mr_attr.mr_len;
579*30e7468fSPeter Dunlap 	result->is_mrlkey	= mr_desc.md_lkey;
580*30e7468fSPeter Dunlap 	result->is_mrrkey	= mr_desc.md_rkey;
581*30e7468fSPeter Dunlap 
582*30e7468fSPeter Dunlap 	return (result);
583*30e7468fSPeter Dunlap }
584*30e7468fSPeter Dunlap 
585*30e7468fSPeter Dunlap void
586*30e7468fSPeter Dunlap iser_dereg_mem(iser_hca_t *hca, iser_mr_t *mr)
587*30e7468fSPeter Dunlap {
588*30e7468fSPeter Dunlap 	ibt_deregister_mr(hca->hca_hdl, mr->is_mrhdl);
589*30e7468fSPeter Dunlap 	kmem_free(mr, sizeof (iser_mr_t));
590*30e7468fSPeter Dunlap }
591*30e7468fSPeter Dunlap 
592*30e7468fSPeter Dunlap static int
593*30e7468fSPeter Dunlap iser_vmem_mr_compare(const void *void_mr1, const void *void_mr2)
594*30e7468fSPeter Dunlap {
595*30e7468fSPeter Dunlap 	iser_mr_t *mr1 = (iser_mr_t *)void_mr1;
596*30e7468fSPeter Dunlap 	iser_mr_t *mr2 = (iser_mr_t *)void_mr2;
597*30e7468fSPeter Dunlap 
598*30e7468fSPeter Dunlap 	/* Sort memory chunks by their virtual address */
599*30e7468fSPeter Dunlap 	if (mr1->is_mrva < mr2->is_mrva)
600*30e7468fSPeter Dunlap 		return (-1);
601*30e7468fSPeter Dunlap 	else if (mr1->is_mrva > mr2->is_mrva)
602*30e7468fSPeter Dunlap 		return (1);
603*30e7468fSPeter Dunlap 
604*30e7468fSPeter Dunlap 	return (0);
605*30e7468fSPeter Dunlap }
606