xref: /illumos-gate/usr/src/uts/common/io/ib/clients/iser/iser_idm.c (revision 6a634c9dca3093f3922e4b7ab826d7bdf17bf78e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/ddi.h>
26 #include <sys/sunddi.h>
27 
28 #include <sys/socket.h>		/* networking stuff */
29 #include <sys/sysmacros.h>	/* offsetof */
30 
31 #include <sys/ib/clients/iser/iser.h>
32 #include <sys/ib/clients/iser/iser_idm.h>
33 
34 /*
35  * iSER transport routines
36  *
37  * All transport functions except iser_tgt_svc_create() are called through
38  * the ops vector, iser_tgt_svc_create() is called from the async handler
39  * inaddition to being called by the ULP
40  */
41 
42 static void iser_pdu_tx(idm_conn_t *ic, idm_pdu_t *pdu);
43 
44 static idm_status_t iser_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
45 static idm_status_t iser_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
46 static idm_status_t iser_tgt_enable_datamover(idm_conn_t *ic);
47 static idm_status_t iser_ini_enable_datamover(idm_conn_t *ic);
48 static void iser_notice_key_values(struct idm_conn_s *ic,
49     nvlist_t *negotiated_nvl);
50 static kv_status_t iser_declare_key_values(struct idm_conn_s *ic,
51     nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
52 static idm_status_t iser_free_task_rsrcs(idm_task_t *idt);
53 static kv_status_t iser_negotiate_key_values(idm_conn_t *ic,
54     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
55 static kv_status_t iser_handle_numerical(nvpair_t *nvp, uint64_t value,
56     const idm_kv_xlate_t *ikvx, uint64_t min_value, uint64_t max_value,
57     uint64_t iser_max_value, nvlist_t *request_nvl, nvlist_t *response_nvl,
58     nvlist_t *negotiated_nvl);
59 static kv_status_t iser_handle_boolean(nvpair_t *nvp, boolean_t value,
60     const idm_kv_xlate_t *ikvx, boolean_t iser_value, nvlist_t *request_nvl,
61     nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
62 static kv_status_t iser_handle_key(nvpair_t *nvp, const idm_kv_xlate_t *ikvx,
63     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
64 static kv_status_t iser_process_request_nvlist(nvlist_t *request_nvl,
65     nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
66 static boolean_t iser_conn_is_capable(idm_conn_req_t *ic,
67     idm_transport_caps_t *caps);
68 static idm_status_t iser_buf_alloc(idm_buf_t *idb, uint64_t buflen);
69 static idm_status_t iser_buf_setup(idm_buf_t *idb);
70 static void iser_buf_teardown(idm_buf_t *idb);
71 static void iser_buf_free(idm_buf_t *idb);
72 static void iser_tgt_svc_destroy(struct idm_svc_s *is);
73 static idm_status_t iser_tgt_svc_online(struct idm_svc_s *is);
74 static void iser_tgt_svc_offline(struct idm_svc_s *is);
75 static idm_status_t iser_tgt_conn_connect(struct idm_conn_s *ic);
76 static idm_status_t iser_ini_conn_create(idm_conn_req_t *cr,
77     struct idm_conn_s *ic);
78 static void iser_conn_destroy(struct idm_conn_s *ic);
79 static idm_status_t iser_ini_conn_connect(struct idm_conn_s *ic);
80 static void iser_conn_disconnect(struct idm_conn_s *ic);
81 
82 /*
83  * iSER IDM transport operations
84  */
85 idm_transport_ops_t iser_transport_ops = {
86 	&iser_pdu_tx,			/* it_tx_pdu */
87 	&iser_buf_tx_to_ini,		/* it_buf_tx_to_ini */
88 	&iser_buf_rx_from_ini,		/* it_buf_rx_from_ini */
89 	NULL,				/* it_rx_datain */
90 	NULL,				/* it_rx_rtt */
91 	NULL,				/* it_rx_dataout */
92 	NULL,				/* it_alloc_conn_rsrc */
93 	NULL,				/* it_free_conn_rsrc */
94 	&iser_tgt_enable_datamover,	/* it_tgt_enable_datamover */
95 	&iser_ini_enable_datamover,	/* it_ini_enable_datamover */
96 	NULL,				/* it_conn_terminate */
97 	&iser_free_task_rsrcs,		/* it_free_task_rsrc */
98 	&iser_negotiate_key_values,	/* it_negotiate_key_values */
99 	&iser_notice_key_values,	/* it_notice_key_values */
100 	&iser_conn_is_capable,		/* it_conn_is_capable */
101 	&iser_buf_alloc,		/* it_buf_alloc */
102 	&iser_buf_free,			/* it_buf_free */
103 	&iser_buf_setup,		/* it_buf_setup */
104 	&iser_buf_teardown,		/* it_buf_teardown */
105 	&iser_tgt_svc_create,		/* it_tgt_svc_create */
106 	&iser_tgt_svc_destroy,		/* it_tgt_svc_destroy */
107 	&iser_tgt_svc_online,		/* it_tgt_svc_online */
108 	&iser_tgt_svc_offline,		/* it_tgt_svc_offline */
109 	&iser_conn_destroy,		/* it_tgt_conn_destroy */
110 	&iser_tgt_conn_connect,		/* it_tgt_conn_connect */
111 	&iser_conn_disconnect,		/* it_tgt_conn_disconnect */
112 	&iser_ini_conn_create,		/* it_ini_conn_create */
113 	&iser_conn_destroy,		/* it_ini_conn_destroy */
114 	&iser_ini_conn_connect,		/* it_ini_conn_connect */
115 	&iser_conn_disconnect,		/* it_ini_conn_disconnect */
116 	&iser_declare_key_values	/* it_declare_key_values */
117 };
118 
119 /*
120  * iSER IDM transport capabilities
121  */
122 idm_transport_caps_t iser_transport_caps = {
123 	0		/* flags */
124 };
125 
126 int
iser_idm_register()127 iser_idm_register()
128 {
129 	idm_transport_attr_t	attr;
130 	idm_status_t		status;
131 
132 	attr.type	= IDM_TRANSPORT_TYPE_ISER;
133 	attr.it_ops	= &iser_transport_ops;
134 	attr.it_caps	= &iser_transport_caps;
135 
136 	status = idm_transport_register(&attr);
137 	if (status != IDM_STATUS_SUCCESS) {
138 		ISER_LOG(CE_WARN, "Failed to register iSER transport with IDM");
139 		return (DDI_FAILURE);
140 	}
141 
142 	ISER_LOG(CE_NOTE, "Registered iSER transport with IDM");
143 
144 	return (DDI_SUCCESS);
145 }
146 
147 /*
148  * iser_ini_conn_create()
149  * Allocate an iSER initiator connection context
150  */
151 static idm_status_t
iser_ini_conn_create(idm_conn_req_t * cr,idm_conn_t * ic)152 iser_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
153 {
154 	iser_chan_t	*iser_chan = NULL;
155 	iser_conn_t	*iser_conn;
156 
157 	/* Allocate and set up a connection handle */
158 	iser_conn = kmem_zalloc(sizeof (iser_conn_t), KM_SLEEP);
159 	mutex_init(&iser_conn->ic_lock, NULL, MUTEX_DRIVER, NULL);
160 
161 	/* Allocate and open a channel to the target node */
162 	iser_chan = iser_channel_alloc(NULL, &cr->cr_ini_dst_addr);
163 	if (iser_chan == NULL) {
164 		ISER_LOG(CE_WARN, "iser: failed to allocate channel");
165 		mutex_destroy(&iser_conn->ic_lock);
166 		kmem_free(iser_conn, sizeof (iser_conn_t));
167 		return (IDM_STATUS_FAIL);
168 	}
169 
170 	/*
171 	 * The local IP and remote IP are filled in iser_channel_alloc. The
172 	 * remote port needs to be filled in from idm_conn_req_t. The local
173 	 * port is irrelevant. Internal representation of the port in the
174 	 * IDM sockaddr structure is in network byte order. IBT expects the
175 	 * port in host byte order.
176 	 */
177 	switch (cr->cr_ini_dst_addr.sin.sa_family) {
178 	case AF_INET:
179 		iser_chan->ic_rport = ntohs(cr->cr_ini_dst_addr.sin4.sin_port);
180 		break;
181 	case AF_INET6:
182 		iser_chan->ic_rport = ntohs(cr->cr_ini_dst_addr.sin6.sin6_port);
183 		break;
184 	default:
185 		iser_chan->ic_rport = ISCSI_LISTEN_PORT;
186 	}
187 	iser_chan->ic_lport = 0;
188 
189 	cv_init(&iser_conn->ic_stage_cv, NULL, CV_DEFAULT, NULL);
190 	iser_conn->ic_type = ISER_CONN_TYPE_INI;
191 	iser_conn->ic_stage = ISER_CONN_STAGE_ALLOCATED;
192 	iser_conn->ic_chan = iser_chan;
193 	iser_conn->ic_idmc = ic;
194 
195 	/*
196 	 * Set a pointer to the iser_conn in the iser_chan for easy
197 	 * access during CM event handling
198 	 */
199 	iser_chan->ic_conn = iser_conn;
200 
201 	/* Set the iSER conn handle in the IDM conn private handle */
202 	ic->ic_transport_private = (void *)iser_conn;
203 
204 	/* Set the transport header length */
205 	ic->ic_transport_hdrlen = ISER_HEADER_LENGTH;
206 
207 	return (IDM_STATUS_SUCCESS);
208 }
209 
210 /*
211  * iser_internal_conn_destroy()
212  * Tear down iSER-specific connection resources. This is used below
213  * in iser_conn_destroy(), but also from the CM code when we may have
214  * some of the connection established, but not fully connected.
215  */
216 void
iser_internal_conn_destroy(iser_conn_t * ic)217 iser_internal_conn_destroy(iser_conn_t *ic)
218 {
219 	mutex_enter(&ic->ic_lock);
220 	iser_channel_free(ic->ic_chan);
221 	if ((ic->ic_type == ISER_CONN_TYPE_TGT) &&
222 	    (ic->ic_stage == ISER_CONN_STAGE_ALLOCATED)) {
223 		/*
224 		 * This is a target connection that has yet to be
225 		 * established. Free our reference on the target
226 		 * service handle.
227 		 */
228 		iser_tgt_svc_rele(ic->ic_idms->is_iser_svc);
229 	}
230 	cv_destroy(&ic->ic_stage_cv);
231 	mutex_exit(&ic->ic_lock);
232 	mutex_destroy(&ic->ic_lock);
233 	kmem_free(ic, sizeof (iser_conn_t));
234 }
235 
236 /*
237  * iser_conn_destroy()
238  * Tear down an initiator or target connection.
239  */
240 static void
iser_conn_destroy(idm_conn_t * ic)241 iser_conn_destroy(idm_conn_t *ic)
242 {
243 	iser_conn_t	*iser_conn;
244 	iser_conn = (iser_conn_t *)ic->ic_transport_private;
245 
246 	iser_internal_conn_destroy(iser_conn);
247 	ic->ic_transport_private = NULL;
248 }
249 
250 /*
251  * iser_ini_conn_connect()
252  * Establish the connection referred to by the handle previously allocated via
253  * iser_ini_conn_create().
254  */
255 static idm_status_t
iser_ini_conn_connect(idm_conn_t * ic)256 iser_ini_conn_connect(idm_conn_t *ic)
257 {
258 	iser_conn_t		*iser_conn;
259 	iser_status_t		status;
260 
261 	iser_conn = (iser_conn_t *)ic->ic_transport_private;
262 
263 	status = iser_channel_open(iser_conn->ic_chan);
264 	if (status != ISER_STATUS_SUCCESS) {
265 		ISER_LOG(CE_WARN, "iser: failed to open channel");
266 		return (IDM_STATUS_FAIL);
267 	}
268 
269 	/*
270 	 * Set the local and remote addresses in the idm conn handle.
271 	 */
272 	iser_ib_conv_ibtaddr2sockaddr(&ic->ic_laddr,
273 	    &iser_conn->ic_chan->ic_localip, iser_conn->ic_chan->ic_lport);
274 	iser_ib_conv_ibtaddr2sockaddr(&ic->ic_raddr,
275 	    &iser_conn->ic_chan->ic_remoteip, iser_conn->ic_chan->ic_rport);
276 
277 	mutex_enter(&iser_conn->ic_lock);
278 	/* Hold a reference on the IDM connection handle */
279 	idm_conn_hold(ic);
280 	iser_conn->ic_stage = ISER_CONN_STAGE_IC_CONNECTED;
281 	mutex_exit(&iser_conn->ic_lock);
282 
283 	return (IDM_STATUS_SUCCESS);
284 }
285 
286 /*
287  * iser_conn_disconnect()
288  * Shutdown this iSER connection
289  */
290 static void
iser_conn_disconnect(idm_conn_t * ic)291 iser_conn_disconnect(idm_conn_t *ic)
292 {
293 	iser_conn_t	*iser_conn;
294 
295 	iser_conn = (iser_conn_t *)ic->ic_transport_private;
296 
297 	mutex_enter(&iser_conn->ic_lock);
298 	iser_conn->ic_stage = ISER_CONN_STAGE_CLOSING;
299 	mutex_exit(&iser_conn->ic_lock);
300 
301 	/* Close the channel */
302 	iser_channel_close(iser_conn->ic_chan);
303 
304 	/* Free our reference held on the IDM conn handle, and set CLOSED */
305 	mutex_enter(&iser_conn->ic_lock);
306 	idm_conn_rele(iser_conn->ic_idmc);
307 	iser_conn->ic_stage = ISER_CONN_STAGE_CLOSED;
308 	mutex_exit(&iser_conn->ic_lock);
309 }
310 
311 /*
312  * iser_tgt_svc_create()
313  * Establish the CM service for inbound iSER service requests on the port
314  * indicated by sr->sr_port.
315  * idm_svc_req_t contains the service parameters.
316  */
317 idm_status_t
iser_tgt_svc_create(idm_svc_req_t * sr,idm_svc_t * is)318 iser_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
319 {
320 	iser_svc_t		*iser_svc;
321 
322 	int			rc;
323 
324 	iser_svc = kmem_zalloc(sizeof (iser_svc_t), KM_SLEEP);
325 	is->is_iser_svc = (void *)iser_svc;
326 
327 	idm_refcnt_init(&iser_svc->is_refcnt, iser_svc);
328 
329 	list_create(&iser_svc->is_sbindlist, sizeof (iser_sbind_t),
330 	    offsetof(iser_sbind_t, is_list_node));
331 	iser_svc->is_svcid = ibt_get_ip_sid(IPPROTO_TCP, sr->sr_port);
332 
333 	/*
334 	 * Register an iSER target service for the requested port
335 	 * and set the iser_svc structure in the idm_svc handle.
336 	 */
337 	rc = iser_register_service(is);
338 	if (rc != DDI_SUCCESS) {
339 		ISER_LOG(CE_NOTE, "iser_tgt_svc_create: iser_register_service "
340 		    "failed on port (%d): rc (0x%x)", sr->sr_port, rc);
341 		(void) ibt_release_ip_sid(iser_svc->is_svcid);
342 		list_destroy(&iser_svc->is_sbindlist);
343 		idm_refcnt_destroy(&iser_svc->is_refcnt);
344 		kmem_free(iser_svc, sizeof (iser_svc_t));
345 		return (IDM_STATUS_FAIL);
346 	}
347 
348 	return (IDM_STATUS_SUCCESS);
349 }
350 
351 /* IDM refcnt utilities for the iSER service handle */
352 void
iser_tgt_svc_hold(iser_svc_t * is)353 iser_tgt_svc_hold(iser_svc_t *is)
354 {
355 	idm_refcnt_hold(&is->is_refcnt);
356 }
357 
358 void
iser_tgt_svc_rele(iser_svc_t * is)359 iser_tgt_svc_rele(iser_svc_t *is)
360 {
361 	idm_refcnt_rele(&is->is_refcnt);
362 }
363 
364 /*
365  * iser_tgt_svc_destroy()
366  * Teardown resources allocated in iser_tgt_svc_create()
367  */
368 static void
iser_tgt_svc_destroy(idm_svc_t * is)369 iser_tgt_svc_destroy(idm_svc_t *is)
370 {
371 	iser_svc_t	*iser_svc;
372 
373 	iser_svc = (iser_svc_t *)is->is_iser_svc;
374 
375 	/*
376 	 * Deregister the iSER target service on this port and free
377 	 * the iser_svc structure from the idm_svc handle.
378 	 */
379 	iser_deregister_service(is);
380 
381 	/* Wait for the iSER service handle's refcnt to zero */
382 	idm_refcnt_wait_ref(&iser_svc->is_refcnt);
383 
384 	list_destroy(&iser_svc->is_sbindlist);
385 
386 	idm_refcnt_destroy(&iser_svc->is_refcnt);
387 
388 	kmem_free(iser_svc, sizeof (iser_svc_t));
389 }
390 
391 /*
392  * iser_tgt_svc_online()
393  * Bind the CM service allocated via iser_tgt_svc_create().
394  */
395 static idm_status_t
iser_tgt_svc_online(idm_svc_t * is)396 iser_tgt_svc_online(idm_svc_t *is)
397 {
398 	iser_status_t	status;
399 
400 	mutex_enter(&is->is_mutex);
401 
402 	/*
403 	 * Pass the IDM service handle as the client private data for
404 	 * later use.
405 	 */
406 	status = iser_bind_service(is);
407 	if (status != ISER_STATUS_SUCCESS) {
408 		ISER_LOG(CE_NOTE, "iser_tgt_svc_online: failed bind service");
409 		mutex_exit(&is->is_mutex);
410 		return (IDM_STATUS_FAIL);
411 	}
412 
413 	mutex_exit(&is->is_mutex);
414 	return (IDM_STATUS_SUCCESS);
415 }
416 
417 /*
418  * iser_tgt_svc_offline
419  * Unbind the service on all available HCA ports.
420  */
421 static void
iser_tgt_svc_offline(idm_svc_t * is)422 iser_tgt_svc_offline(idm_svc_t *is)
423 {
424 	mutex_enter(&is->is_mutex);
425 
426 	iser_unbind_service(is);
427 	mutex_exit(&is->is_mutex);
428 
429 }
430 
431 /*
432  * iser_tgt_conn_connect()
433  * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
434  * is invoked from the SM as a result of an inbound connection request.
435  */
436 /* ARGSUSED */
437 static idm_status_t
iser_tgt_conn_connect(idm_conn_t * ic)438 iser_tgt_conn_connect(idm_conn_t *ic)
439 {
440 	/* No action required */
441 	return (IDM_STATUS_SUCCESS);
442 }
443 
444 /*
445  * iser_tgt_enable_datamover() sets the transport private data on the
446  * idm_conn_t and move the conn stage to indicate logged in.
447  */
448 static idm_status_t
iser_tgt_enable_datamover(idm_conn_t * ic)449 iser_tgt_enable_datamover(idm_conn_t *ic)
450 {
451 	iser_conn_t	*iser_conn;
452 
453 	iser_conn = (iser_conn_t *)ic->ic_transport_private;
454 	mutex_enter(&iser_conn->ic_lock);
455 
456 	iser_conn->ic_stage = ISER_CONN_STAGE_LOGGED_IN;
457 	mutex_exit(&iser_conn->ic_lock);
458 
459 	return (IDM_STATUS_SUCCESS);
460 }
461 
462 /*
463  * iser_ini_enable_datamover() is used by the iSCSI initator to request that a
464  * specified iSCSI connection be transitioned to iSER-assisted mode.
465  * In the case of iSER, the RDMA resources for a reliable connection have
466  * already been allocated at this time, and the 'RDMAExtensions' is set to 'Yes'
467  * so no further negotiations are required at this time.
468  * The initiator now sends the first iSER Message - 'Hello' to the target
469  * and waits for  the 'HelloReply' Message from the target before directing
470  * the initiator to go into the Full Feature Phase.
471  *
472  * No transport op is required on the target side.
473  */
474 static idm_status_t
iser_ini_enable_datamover(idm_conn_t * ic)475 iser_ini_enable_datamover(idm_conn_t *ic)
476 {
477 
478 	iser_conn_t	*iser_conn;
479 	clock_t		delay;
480 	int		status;
481 
482 	iser_conn = (iser_conn_t *)ic->ic_transport_private;
483 
484 	mutex_enter(&iser_conn->ic_lock);
485 	iser_conn->ic_stage = ISER_CONN_STAGE_HELLO_SENT;
486 	mutex_exit(&iser_conn->ic_lock);
487 
488 	/* Send the iSER Hello Message to the target */
489 	status = iser_xfer_hello_msg(iser_conn->ic_chan);
490 	if (status != ISER_STATUS_SUCCESS) {
491 
492 		mutex_enter(&iser_conn->ic_lock);
493 		iser_conn->ic_stage = ISER_CONN_STAGE_HELLO_SENT_FAIL;
494 		mutex_exit(&iser_conn->ic_lock);
495 
496 		return (IDM_STATUS_FAIL);
497 	}
498 
499 	/*
500 	 * Acquire the iser_conn->ic_lock and wait for the iSER HelloReply
501 	 * Message from the target, i.e. iser_conn_stage_t to be set to
502 	 * ISER_CONN_STAGE_HELLOREPLY_RCV. If the handshake does not
503 	 * complete within a specified time period (.5s), then return failure.
504 	 *
505 	 */
506 	delay = ddi_get_lbolt() + drv_usectohz(500000);
507 
508 	mutex_enter(&iser_conn->ic_lock);
509 	while ((iser_conn->ic_stage != ISER_CONN_STAGE_HELLOREPLY_RCV) &&
510 	    (ddi_get_lbolt() < delay)) {
511 
512 		(void) cv_timedwait(&iser_conn->ic_stage_cv,
513 		    &iser_conn->ic_lock, delay);
514 	}
515 
516 	switch (iser_conn->ic_stage) {
517 	case ISER_CONN_STAGE_HELLOREPLY_RCV:
518 		iser_conn->ic_stage = ISER_CONN_STAGE_LOGGED_IN;
519 		mutex_exit(&iser_conn->ic_lock);
520 		/*
521 		 * Return suceess to indicate that the initiator connection can
522 		 * go to the next phase - FFP
523 		 */
524 		return (IDM_STATUS_SUCCESS);
525 	default:
526 		iser_conn->ic_stage = ISER_CONN_STAGE_HELLOREPLY_RCV_FAIL;
527 		mutex_exit(&iser_conn->ic_lock);
528 		return (IDM_STATUS_FAIL);
529 
530 	}
531 
532 	/* STATEMENT_NEVER_REACHED */
533 }
534 
535 /*
536  * iser_free_task_rsrcs()
537  * This routine does not currently need to do anything. It is used in
538  * the sockets transport to explicitly complete any buffers on the task,
539  * but we can rely on our RCaP layer to finish up it's work without any
540  * intervention.
541  */
542 /* ARGSUSED */
543 idm_status_t
iser_free_task_rsrcs(idm_task_t * idt)544 iser_free_task_rsrcs(idm_task_t *idt)
545 {
546 	return (IDM_STATUS_SUCCESS);
547 }
548 
549 /*
550  * iser_negotiate_key_values() validates the key values for this connection
551  */
552 /* ARGSUSED */
553 static kv_status_t
iser_negotiate_key_values(idm_conn_t * ic,nvlist_t * request_nvl,nvlist_t * response_nvl,nvlist_t * negotiated_nvl)554 iser_negotiate_key_values(idm_conn_t *ic, nvlist_t *request_nvl,
555     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
556 {
557 	kv_status_t		kvrc = KV_HANDLED;
558 
559 	/* Process the request nvlist */
560 	kvrc = iser_process_request_nvlist(request_nvl, response_nvl,
561 	    negotiated_nvl);
562 
563 	/* We must be using RDMA, so set the flag on the ic handle */
564 	ic->ic_rdma_extensions = B_TRUE;
565 
566 	return (kvrc);
567 }
568 
569 /* Process a list of key=value pairs from a login request */
570 static kv_status_t
iser_process_request_nvlist(nvlist_t * request_nvl,nvlist_t * response_nvl,nvlist_t * negotiated_nvl)571 iser_process_request_nvlist(nvlist_t *request_nvl, nvlist_t *response_nvl,
572     nvlist_t *negotiated_nvl)
573 {
574 	const idm_kv_xlate_t	*ikvx;
575 	char			*nvp_name;
576 	nvpair_t		*nvp;
577 	nvpair_t		*next_nvp;
578 	kv_status_t		kvrc = KV_HANDLED;
579 	boolean_t		transit = B_TRUE;
580 
581 	/* Process the list */
582 	nvp = nvlist_next_nvpair(request_nvl, NULL);
583 	while (nvp != NULL) {
584 		next_nvp = nvlist_next_nvpair(request_nvl, nvp);
585 
586 		nvp_name = nvpair_name(nvp);
587 		ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
588 
589 		kvrc = iser_handle_key(nvp, ikvx, request_nvl, response_nvl,
590 		    negotiated_nvl);
591 		if (kvrc != KV_HANDLED) {
592 			if (kvrc == KV_HANDLED_NO_TRANSIT) {
593 				/* we countered, clear the transit flag */
594 				transit = B_FALSE;
595 			} else {
596 				/* error, bail out */
597 				break;
598 			}
599 		}
600 
601 		nvp = next_nvp;
602 	}
603 	/*
604 	 * If the current kv_status_t indicates success, we've handled
605 	 * the entire list. Explicitly set kvrc to NO_TRANSIT if we've
606 	 * cleared the transit flag along the way.
607 	 */
608 	if ((kvrc == KV_HANDLED) && (transit == B_FALSE)) {
609 		kvrc = KV_HANDLED_NO_TRANSIT;
610 	}
611 
612 	return (kvrc);
613 }
614 
615 /* Handle a given list, boolean or numerical key=value pair */
616 static kv_status_t
iser_handle_key(nvpair_t * nvp,const idm_kv_xlate_t * ikvx,nvlist_t * request_nvl,nvlist_t * response_nvl,nvlist_t * negotiated_nvl)617 iser_handle_key(nvpair_t *nvp, const idm_kv_xlate_t *ikvx,
618     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
619 {
620 	kv_status_t		kvrc = KV_UNHANDLED;
621 	boolean_t		bool_val;
622 	uint64_t		num_val;
623 	int			nvrc;
624 
625 	/* Retrieve values for booleans and numericals */
626 	switch (ikvx->ik_key_id) {
627 		/* Booleans */
628 	case KI_RDMA_EXTENSIONS:
629 	case KI_IMMEDIATE_DATA:
630 		nvrc = nvpair_value_boolean_value(nvp, &bool_val);
631 		ASSERT(nvrc == 0);
632 		break;
633 		/* Numericals */
634 	case KI_INITIATOR_RECV_DATA_SEGMENT_LENGTH:
635 	case KI_TARGET_RECV_DATA_SEGMENT_LENGTH:
636 	case KI_MAX_OUTSTANDING_UNEXPECTED_PDUS:
637 		nvrc = nvpair_value_uint64(nvp, &num_val);
638 		ASSERT(nvrc == 0);
639 		break;
640 	default:
641 		break;
642 	}
643 
644 	/*
645 	 * Now handle the values according to the key name. Keys not
646 	 * specifically handled here will be negotiated by the iscsi
647 	 * target. Negotiated values take effect when
648 	 * iser_notice_key_values gets called.
649 	 */
650 	switch (ikvx->ik_key_id) {
651 	case KI_RDMA_EXTENSIONS:
652 		/* Ensure "Yes" */
653 		kvrc = iser_handle_boolean(nvp, bool_val, ikvx, B_TRUE,
654 		    request_nvl, response_nvl, negotiated_nvl);
655 		break;
656 	case KI_TARGET_RECV_DATA_SEGMENT_LENGTH:
657 		/* Validate the proposed value */
658 		kvrc = iser_handle_numerical(nvp, num_val, ikvx,
659 		    ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_MIN,
660 		    ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_MAX,
661 		    ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_IMPL_MAX,
662 		    request_nvl, response_nvl, negotiated_nvl);
663 		break;
664 	case KI_INITIATOR_RECV_DATA_SEGMENT_LENGTH:
665 		/* Validate the proposed value */
666 		kvrc = iser_handle_numerical(nvp, num_val, ikvx,
667 		    ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_MIN,
668 		    ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_MAX,
669 		    ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_IMPL_MAX,
670 		    request_nvl, response_nvl, negotiated_nvl);
671 		break;
672 	case KI_IMMEDIATE_DATA:
673 		/* Ensure "No" */
674 		kvrc = iser_handle_boolean(nvp, bool_val, ikvx, B_FALSE,
675 		    request_nvl, response_nvl, negotiated_nvl);
676 		break;
677 	case KI_MAX_OUTSTANDING_UNEXPECTED_PDUS:
678 		/* Validate the proposed value */
679 		kvrc = iser_handle_numerical(nvp, num_val, ikvx,
680 		    ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_MIN,
681 		    ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_MAX,
682 		    ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_IMPL_MAX,
683 		    request_nvl, response_nvl, negotiated_nvl);
684 		break;
685 	default:
686 		/*
687 		 * All other keys, including invalid keys, will be
688 		 * handled at the client layer.
689 		 */
690 		kvrc = KV_HANDLED;
691 		break;
692 	}
693 
694 	return (kvrc);
695 }
696 
697 
698 /* Validate a proposed boolean value, and set the alternate if necessary */
699 static kv_status_t
iser_handle_boolean(nvpair_t * nvp,boolean_t value,const idm_kv_xlate_t * ikvx,boolean_t iser_value,nvlist_t * request_nvl,nvlist_t * response_nvl,nvlist_t * negotiated_nvl)700 iser_handle_boolean(nvpair_t *nvp, boolean_t value, const idm_kv_xlate_t *ikvx,
701     boolean_t iser_value, nvlist_t *request_nvl, nvlist_t *response_nvl,
702     nvlist_t *negotiated_nvl)
703 {
704 	kv_status_t		kvrc = KV_UNHANDLED;
705 	int			nvrc;
706 	boolean_t		respond = B_FALSE;
707 
708 	if (value != iser_value) {
709 		/*
710 		 * Respond back to initiator with our value, and
711 		 * set the return value to unset the transit bit.
712 		 */
713 		value = iser_value;
714 		nvrc = nvlist_add_boolean_value(negotiated_nvl,
715 		    ikvx->ik_key_name, value);
716 		if (nvrc == 0) {
717 			kvrc = KV_HANDLED_NO_TRANSIT;
718 			respond = B_TRUE;
719 		}
720 
721 	} else {
722 		/* Add this to our negotiated values */
723 		nvrc = nvlist_add_nvpair(negotiated_nvl, nvp);
724 		/* Respond if this is not a declarative */
725 		respond = (ikvx->ik_declarative == B_FALSE);
726 	}
727 
728 	/* Response of Simple-value Negotiation */
729 	if (nvrc == 0 && respond) {
730 		nvrc = nvlist_add_boolean_value(response_nvl,
731 		    ikvx->ik_key_name, value);
732 		/* Remove from the request (we've handled it) */
733 		(void) nvlist_remove_all(request_nvl, ikvx->ik_key_name);
734 	}
735 
736 	if (kvrc == KV_HANDLED_NO_TRANSIT) {
737 		return (kvrc);
738 	}
739 
740 	return (idm_nvstat_to_kvstat(nvrc));
741 }
742 
743 /*
744  * Validate a proposed value against the iSER and/or iSCSI RFC's minimum and
745  * maximum values, and set an alternate, if necessary.  Note that the value
746  * 'iser_max_value" represents our implementation maximum (typically the max).
747  */
748 static kv_status_t
iser_handle_numerical(nvpair_t * nvp,uint64_t value,const idm_kv_xlate_t * ikvx,uint64_t min_value,uint64_t max_value,uint64_t iser_max_value,nvlist_t * request_nvl,nvlist_t * response_nvl,nvlist_t * negotiated_nvl)749 iser_handle_numerical(nvpair_t *nvp, uint64_t value, const idm_kv_xlate_t *ikvx,
750     uint64_t min_value, uint64_t max_value, uint64_t iser_max_value,
751     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
752 {
753 	kv_status_t		kvrc = KV_UNHANDLED;
754 	int			nvrc;
755 	boolean_t		respond = B_FALSE;
756 
757 	/* Validate against standard */
758 	if ((value < min_value) || (value > max_value)) {
759 		kvrc = KV_VALUE_ERROR;
760 	} else {
761 		if (value > iser_max_value) {
762 			/*
763 			 * Respond back to initiator with our value, and
764 			 * set the return value to unset the transit bit.
765 			 */
766 			value = iser_max_value;
767 			nvrc = nvlist_add_uint64(negotiated_nvl,
768 			    ikvx->ik_key_name, value);
769 			if (nvrc == 0) {
770 				kvrc = KV_HANDLED_NO_TRANSIT;
771 				respond = B_TRUE;
772 			}
773 		} else {
774 			/* Add this to our negotiated values */
775 			nvrc = nvlist_add_nvpair(negotiated_nvl, nvp);
776 			/* Respond if this is not a declarative */
777 			respond = (ikvx->ik_declarative == B_FALSE);
778 		}
779 
780 		/* Response of Simple-value Negotiation */
781 		if (nvrc == 0 && respond) {
782 			nvrc = nvlist_add_uint64(response_nvl,
783 			    ikvx->ik_key_name, value);
784 			/* Remove from the request (we've handled it) */
785 			(void) nvlist_remove_all(request_nvl,
786 			    ikvx->ik_key_name);
787 		}
788 	}
789 
790 	if (kvrc == KV_HANDLED_NO_TRANSIT) {
791 		return (kvrc);
792 	}
793 
794 	return (idm_nvstat_to_kvstat(nvrc));
795 }
796 
797 /*
798  * iser_declare_key_values() declares the declarative key values for
799  * this connection.
800  */
801 /* ARGSUSED */
802 static kv_status_t
iser_declare_key_values(idm_conn_t * ic,nvlist_t * config_nvl,nvlist_t * outgoing_nvl)803 iser_declare_key_values(idm_conn_t *ic, nvlist_t *config_nvl,
804     nvlist_t *outgoing_nvl)
805 {
806 	kv_status_t		kvrc;
807 	int			nvrc = 0;
808 	int			rc;
809 	uint64_t		uint64_val;
810 
811 	if ((rc = nvlist_lookup_uint64(config_nvl,
812 	    ISER_KV_KEY_NAME_MAX_OUTSTANDING_PDU, &uint64_val)) != ENOENT) {
813 		ASSERT(rc == 0);
814 		if (outgoing_nvl) {
815 			nvrc = nvlist_add_uint64(outgoing_nvl,
816 			    ISER_KV_KEY_NAME_MAX_OUTSTANDING_PDU, uint64_val);
817 		}
818 	}
819 	kvrc = idm_nvstat_to_kvstat(nvrc);
820 	return (kvrc);
821 }
822 
823 /*
824  * iser_notice_key_values() activates the negotiated key values for
825  * this connection.
826  */
827 static void
iser_notice_key_values(idm_conn_t * ic,nvlist_t * negotiated_nvl)828 iser_notice_key_values(idm_conn_t *ic, nvlist_t *negotiated_nvl)
829 {
830 	iser_conn_t		*iser_conn;
831 	boolean_t		boolean_val;
832 	uint64_t		uint64_val;
833 	int			nvrc;
834 	char			*digest_choice_string;
835 
836 	iser_conn = (iser_conn_t *)ic->ic_transport_private;
837 
838 	/*
839 	 * Validate the final negotiated operational parameters,
840 	 * and save a copy.
841 	 */
842 	if ((nvrc = nvlist_lookup_string(negotiated_nvl,
843 	    "HeaderDigest", &digest_choice_string)) != ENOENT) {
844 		ASSERT(nvrc == 0);
845 
846 		/*
847 		 * Per the iSER RFC, override the negotiated value with "None"
848 		 */
849 		iser_conn->ic_op_params.op_header_digest = B_FALSE;
850 	}
851 
852 	if ((nvrc = nvlist_lookup_string(negotiated_nvl,
853 	    "DataDigest", &digest_choice_string)) != ENOENT) {
854 		ASSERT(nvrc == 0);
855 
856 		/*
857 		 * Per the iSER RFC, override the negotiated value with "None"
858 		 */
859 		iser_conn->ic_op_params.op_data_digest = B_FALSE;
860 	}
861 
862 	if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl,
863 	    "RDMAExtensions", &boolean_val)) != ENOENT) {
864 		ASSERT(nvrc == 0);
865 		iser_conn->ic_op_params.op_rdma_extensions = boolean_val;
866 	}
867 
868 	if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl,
869 	    "OFMarker", &boolean_val)) != ENOENT) {
870 		ASSERT(nvrc == 0);
871 		/*
872 		 * Per the iSER RFC, override the negotiated value with "No"
873 		 */
874 		iser_conn->ic_op_params.op_ofmarker = B_FALSE;
875 	}
876 
877 	if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl,
878 	    "IFMarker", &boolean_val)) != ENOENT) {
879 		ASSERT(nvrc == 0);
880 		/*
881 		 * Per the iSER RFC, override the negotiated value with "No"
882 		 */
883 		iser_conn->ic_op_params.op_ifmarker = B_FALSE;
884 	}
885 
886 	if ((nvrc = nvlist_lookup_uint64(negotiated_nvl,
887 	    "TargetRecvDataSegmentLength", &uint64_val)) != ENOENT) {
888 		ASSERT(nvrc == 0);
889 		iser_conn->ic_op_params.op_target_recv_data_segment_length =
890 		    uint64_val;
891 	}
892 
893 	if ((nvrc = nvlist_lookup_uint64(negotiated_nvl,
894 	    "InitiatorRecvDataSegmentLength", &uint64_val)) != ENOENT) {
895 		ASSERT(nvrc == 0);
896 		iser_conn->ic_op_params.op_initiator_recv_data_segment_length =
897 		    uint64_val;
898 	}
899 
900 	if ((nvrc = nvlist_lookup_uint64(negotiated_nvl,
901 	    "MaxOutstandingUnexpectedPDUs", &uint64_val)) != ENOENT) {
902 		ASSERT(nvrc == 0);
903 		iser_conn->ic_op_params.op_max_outstanding_unexpected_pdus =
904 		    uint64_val;
905 	}
906 
907 	/* Test boolean values which are required by RFC 5046 */
908 #ifdef ISER_DEBUG
909 	ASSERT(iser_conn->ic_op_params.op_rdma_extensions == B_TRUE);
910 	ASSERT(iser_conn->ic_op_params.op_header_digest == B_FALSE);
911 	ASSERT(iser_conn->ic_op_params.op_data_digest == B_FALSE);
912 	ASSERT(iser_conn->ic_op_params.op_ofmarker == B_FALSE);
913 	ASSERT(iser_conn->ic_op_params.op_ifmarker == B_FALSE);
914 #endif
915 }
916 
917 
918 /*
919  * iser_conn_is_capable() verifies that the passed connection is provided
920  * for by an iSER-capable link.
921  * NOTE: When utilizing InfiniBand RC as an RCaP, this routine will check
922  * if the link is on IPoIB. This only indicates a chance that the link is
923  * on an RCaP, and thus iSER-capable, since we may be running on an IB-Eth
924  * gateway, or other IB but non-RCaP link. Rather than fully establishing the
925  * link to verify RCaP here, we instead will return B_TRUE
926  * indicating the link is iSER-capable, if the link is IPoIB. If then in
927  * iser_ini_conn_create() the link proves not be RCaP, IDM will fall back
928  * to using the IDM Sockets transport.
929  */
930 /* ARGSUSED */
931 static boolean_t
iser_conn_is_capable(idm_conn_req_t * cr,idm_transport_caps_t * caps)932 iser_conn_is_capable(idm_conn_req_t *cr, idm_transport_caps_t *caps)
933 {
934 	/* A NULL value for laddr indicates implicit source */
935 	return (iser_path_exists(NULL, &cr->cr_ini_dst_addr));
936 }
937 
938 /*
939  * iser_pdu_tx() transmits a Control PDU via the iSER channel. We pull the
940  * channel out of the idm_conn_t passed in, and pass it and the pdu to the
941  * iser_xfer routine.
942  */
943 static void
iser_pdu_tx(idm_conn_t * ic,idm_pdu_t * pdu)944 iser_pdu_tx(idm_conn_t *ic, idm_pdu_t *pdu)
945 {
946 	iser_conn_t	*iser_conn;
947 	iser_status_t	iser_status;
948 
949 	iser_conn = (iser_conn_t *)ic->ic_transport_private;
950 
951 	iser_status = iser_xfer_ctrlpdu(iser_conn->ic_chan, pdu);
952 	if (iser_status != ISER_STATUS_SUCCESS) {
953 		ISER_LOG(CE_WARN, "iser_pdu_tx: failed iser_xfer_ctrlpdu: "
954 		    "ic (0x%p) pdu (0x%p)", (void *) ic, (void *) pdu);
955 		/* Fail this PDU transmission */
956 		idm_pdu_complete(pdu, IDM_STATUS_FAIL);
957 	}
958 
959 	/*
960 	 * We successfully posted this PDU for transmission.
961 	 * The completion handler will invoke idm_pdu_complete()
962 	 * with the completion status. See iser_cq.c for more
963 	 * information.
964 	 */
965 }
966 
967 /*
968  * iser_buf_tx_to_ini() transmits the data buffer encoded in idb to the
969  * initiator to fulfill SCSI Read commands. An iser_xfer routine is invoked
970  * to implement the RDMA operations.
971  *
972  * Caller holds idt->idt_mutex.
973  */
974 static idm_status_t
iser_buf_tx_to_ini(idm_task_t * idt,idm_buf_t * idb)975 iser_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
976 {
977 	iser_status_t	iser_status;
978 	idm_status_t	idm_status = IDM_STATUS_SUCCESS;
979 
980 	ASSERT(mutex_owned(&idt->idt_mutex));
981 
982 	iser_status = iser_xfer_buf_to_ini(idt, idb);
983 
984 	if (iser_status != ISER_STATUS_SUCCESS) {
985 		ISER_LOG(CE_WARN, "iser_buf_tx_to_ini: failed "
986 		    "iser_xfer_buf_to_ini: idt (0x%p) idb (0x%p)",
987 		    (void *) idt, (void *) idb);
988 		idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
989 		return (IDM_STATUS_FAIL);
990 	}
991 
992 	/*
993 	 * iSCSIt's Data Completion Notify callback is invoked from
994 	 * the Work Request Send completion Handler
995 	 */
996 
997 	mutex_exit(&idt->idt_mutex);
998 	return (idm_status);
999 }
1000 
1001 /*
1002  * iser_buf_tx_from_ini() transmits data from the initiator into the buffer
1003  * in idb to fulfill SCSI Write commands. An iser_xfer routine is invoked
1004  * to implement the RDMA operations.
1005  *
1006  * Caller holds idt->idt_mutex.
1007  */
1008 static idm_status_t
iser_buf_rx_from_ini(idm_task_t * idt,idm_buf_t * idb)1009 iser_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
1010 {
1011 	iser_status_t	iser_status;
1012 	idm_status_t	idm_status = IDM_STATUS_SUCCESS;
1013 
1014 	ASSERT(mutex_owned(&idt->idt_mutex));
1015 
1016 	iser_status = iser_xfer_buf_from_ini(idt, idb);
1017 
1018 	if (iser_status != ISER_STATUS_SUCCESS) {
1019 		ISER_LOG(CE_WARN, "iser_buf_rx_from_ini: failed "
1020 		    "iser_xfer_buf_from_ini: idt (0x%p) idb (0x%p)",
1021 		    (void *) idt, (void *) idb);
1022 		idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1023 		return (IDM_STATUS_FAIL);
1024 	}
1025 
1026 	/*
1027 	 * iSCSIt's Data Completion Notify callback is invoked from
1028 	 * the Work Request Send completion Handler
1029 	 */
1030 
1031 	mutex_exit(&idt->idt_mutex);
1032 	return (idm_status);
1033 }
1034 
1035 /*
1036  * iser_buf_alloc() allocates a buffer and registers it with the IBTF for
1037  * use with iSER. Each HCA has it's own kmem cache for establishing a pool
1038  * of registered buffers, when once initially allocated, will remain
1039  * registered with the HCA. This routine is invoked only on the target,
1040  * where we have the requirement to pre-allocate buffers for the upper layers.
1041  * Note: buflen is compared to ISER_DEFAULT_BUFLEN, and allocation is failed
1042  * if the requested buflen is larger than our default.
1043  */
1044 /* ARGSUSED */
1045 static idm_status_t
iser_buf_alloc(idm_buf_t * idb,uint64_t buflen)1046 iser_buf_alloc(idm_buf_t *idb, uint64_t buflen)
1047 {
1048 	iser_conn_t	*iser_conn;
1049 	iser_hca_t	*iser_hca;
1050 	iser_buf_t	*iser_buf;
1051 
1052 	if (buflen > ISER_DEFAULT_BUFLEN) {
1053 		return (IDM_STATUS_FAIL);
1054 	}
1055 
1056 	iser_conn = (iser_conn_t *)idb->idb_ic->ic_transport_private;
1057 	iser_hca = iser_conn->ic_chan->ic_hca;
1058 
1059 	/*
1060 	 * Allocate a buffer from this HCA's cache. Once initialized, these
1061 	 * will remain allocated and registered (see above).
1062 	 */
1063 	iser_buf = kmem_cache_alloc(iser_hca->iser_buf_cache, KM_NOSLEEP);
1064 	if (iser_buf == NULL) {
1065 		ISER_LOG(CE_NOTE, "iser_buf_alloc: alloc failed");
1066 		return (IDM_STATUS_FAIL);
1067 	}
1068 
1069 	/* Set the allocated data buffer pointer in the IDM buf handle */
1070 	idb->idb_buf = iser_buf->buf;
1071 
1072 	/* Set the private buf and reg handles in the IDM buf handle */
1073 	idb->idb_buf_private = (void *)iser_buf;
1074 	idb->idb_reg_private = (void *)iser_buf->iser_mr;
1075 
1076 	return (IDM_STATUS_SUCCESS);
1077 }
1078 
1079 /*
1080  * iser_buf_free() frees the buffer handle passed in. Note that the cached
1081  * kmem object has an HCA-registered buffer in it which will not be freed.
1082  * This allows us to build up a cache of pre-allocated and registered
1083  * buffers for use on the target.
1084  */
1085 static void
iser_buf_free(idm_buf_t * buf)1086 iser_buf_free(idm_buf_t *buf)
1087 {
1088 	iser_buf_t	*iser_buf;
1089 
1090 	iser_buf = buf->idb_buf_private;
1091 	kmem_cache_free(iser_buf->cache, iser_buf);
1092 }
1093 
1094 /*
1095  * iser_buf_setup() is invoked on the initiator in order to register memory
1096  * on demand for use with the iSER layer.
1097  */
1098 static idm_status_t
iser_buf_setup(idm_buf_t * idb)1099 iser_buf_setup(idm_buf_t *idb)
1100 {
1101 	iser_conn_t	*iser_conn;
1102 	iser_chan_t	*iser_chan;
1103 	iser_hca_t	*iser_hca;
1104 	iser_buf_t	*iser_buf;
1105 	int		status;
1106 
1107 	ASSERT(idb->idb_buf != NULL);
1108 
1109 	iser_conn = (iser_conn_t *)idb->idb_ic->ic_transport_private;
1110 	ASSERT(iser_conn != NULL);
1111 
1112 	iser_hca = iser_conn->ic_chan->ic_hca;
1113 
1114 	iser_chan = iser_conn->ic_chan;
1115 	ASSERT(iser_chan != NULL);
1116 
1117 	/*
1118 	 * Memory registration is known to be slow, so for small
1119 	 * transfers, use pre-registered memory buffers and just
1120 	 * copy the data into/from them at the appropriate time
1121 	 */
1122 	if (idb->idb_buflen < ISER_BCOPY_THRESHOLD) {
1123 		iser_buf =
1124 		    kmem_cache_alloc(iser_hca->iser_buf_cache, KM_NOSLEEP);
1125 
1126 		if (iser_buf == NULL) {
1127 
1128 			/* Fail over to dynamic registration */
1129 			status = iser_reg_rdma_mem(iser_chan->ic_hca, idb);
1130 			idb->idb_bufalloc = B_FALSE;
1131 			return (status);
1132 		}
1133 
1134 		/*
1135 		 * Set the allocated data buffer pointer in the IDM buf handle
1136 		 * Data is to be copied from/to this buffer using bcopy
1137 		 */
1138 		idb->idb_bufptr = idb->idb_buf;
1139 		idb->idb_bufbcopy = B_TRUE;
1140 
1141 		idb->idb_buf = iser_buf->buf;
1142 
1143 		/* Set the private buf and reg handles in the IDM buf handle */
1144 		idb->idb_buf_private = (void *)iser_buf;
1145 		idb->idb_reg_private = (void *)iser_buf->iser_mr;
1146 
1147 		/* Ensure bufalloc'd flag is set */
1148 		idb->idb_bufalloc = B_TRUE;
1149 
1150 		return (IDM_STATUS_SUCCESS);
1151 
1152 	} else {
1153 
1154 		/* Dynamically register the memory passed in on the idb */
1155 		status = iser_reg_rdma_mem(iser_chan->ic_hca, idb);
1156 
1157 		/* Ensure bufalloc'd flag is unset */
1158 		idb->idb_bufalloc = B_FALSE;
1159 
1160 		return (status);
1161 	}
1162 }
1163 
1164 /*
1165  * iser_buf_teardown() is invoked on the initiator in order to register memory
1166  * on demand for use with the iSER layer.
1167  */
1168 static void
iser_buf_teardown(idm_buf_t * idb)1169 iser_buf_teardown(idm_buf_t *idb)
1170 {
1171 	iser_conn_t	*iser_conn;
1172 
1173 	iser_conn = (iser_conn_t *)idb->idb_ic->ic_transport_private;
1174 
1175 	/* Deregister the memory passed in on the idb */
1176 	iser_dereg_rdma_mem(iser_conn->ic_chan->ic_hca, idb);
1177 }
1178