xref: /titanic_51/usr/src/uts/common/io/idm/idm.c (revision d7e7cb9c207e40874f6a4b61ca8ea1526b5555bd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/cpuvar.h>
26 #include <sys/conf.h>
27 #include <sys/file.h>
28 #include <sys/ddi.h>
29 #include <sys/sunddi.h>
30 #include <sys/modctl.h>
31 
32 #include <sys/socket.h>
33 #include <sys/strsubr.h>
34 #include <sys/sysmacros.h>
35 
36 #include <sys/socketvar.h>
37 #include <netinet/in.h>
38 
39 #include <sys/idm/idm.h>
40 #include <sys/idm/idm_so.h>
41 
42 #define	IDM_NAME_VERSION	"iSCSI Data Mover"
43 
44 extern struct mod_ops mod_miscops;
45 extern struct mod_ops mod_miscops;
46 
47 static struct modlmisc modlmisc = {
48 	&mod_miscops,	/* Type of module */
49 	IDM_NAME_VERSION
50 };
51 
52 static struct modlinkage modlinkage = {
53 	MODREV_1, (void *)&modlmisc, NULL
54 };
55 
56 extern void idm_wd_thread(void *arg);
57 
58 static int _idm_init(void);
59 static int _idm_fini(void);
60 static void idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf);
61 static void idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf);
62 static void idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf);
63 static void idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf);
64 static void idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt,
65     idm_abort_type_t abort_type);
66 static void idm_task_aborted(idm_task_t *idt, idm_status_t status);
67 static idm_pdu_t *idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen,
68     int sleepflag);
69 
70 boolean_t idm_conn_logging = 0;
71 boolean_t idm_svc_logging = 0;
72 #ifdef DEBUG
73 boolean_t idm_pattern_checking = 1;
74 #else
75 boolean_t idm_pattern_checking = 0;
76 #endif
77 
78 /*
79  * Potential tuneable for the maximum number of tasks.  Default to
80  * IDM_TASKIDS_MAX
81  */
82 
83 uint32_t	idm_max_taskids = IDM_TASKIDS_MAX;
84 
85 /*
86  * Global list of transport handles
87  *   These are listed in preferential order, so we can simply take the
88  *   first "it_conn_is_capable" hit. Note also that the order maps to
89  *   the order of the idm_transport_type_t list.
90  */
91 idm_transport_t idm_transport_list[] = {
92 
93 	/* iSER on InfiniBand transport handle */
94 	{IDM_TRANSPORT_TYPE_ISER,	/* type */
95 	"/devices/ib/iser@0:iser",	/* device path */
96 	NULL,				/* LDI handle */
97 	NULL,				/* transport ops */
98 	NULL},				/* transport caps */
99 
100 	/* IDM native sockets transport handle */
101 	{IDM_TRANSPORT_TYPE_SOCKETS,	/* type */
102 	NULL,				/* device path */
103 	NULL,				/* LDI handle */
104 	NULL,				/* transport ops */
105 	NULL}				/* transport caps */
106 
107 };
108 
109 int
110 _init(void)
111 {
112 	int rc;
113 
114 	if ((rc = _idm_init()) != 0) {
115 		return (rc);
116 	}
117 
118 	return (mod_install(&modlinkage));
119 }
120 
121 int
122 _fini(void)
123 {
124 	int rc;
125 
126 	if ((rc = _idm_fini()) != 0) {
127 		return (rc);
128 	}
129 
130 	if ((rc = mod_remove(&modlinkage)) != 0) {
131 		return (rc);
132 	}
133 
134 	return (rc);
135 }
136 
137 int
138 _info(struct modinfo *modinfop)
139 {
140 	return (mod_info(&modlinkage, modinfop));
141 }
142 
143 /*
144  * idm_transport_register()
145  *
146  * Provides a mechanism for an IDM transport driver to register its
147  * transport ops and caps with the IDM kernel module. Invoked during
148  * a transport driver's attach routine.
149  */
150 idm_status_t
151 idm_transport_register(idm_transport_attr_t *attr)
152 {
153 	ASSERT(attr->it_ops != NULL);
154 	ASSERT(attr->it_caps != NULL);
155 
156 	switch (attr->type) {
157 	/* All known non-native transports here; for now, iSER */
158 	case IDM_TRANSPORT_TYPE_ISER:
159 		idm_transport_list[attr->type].it_ops	= attr->it_ops;
160 		idm_transport_list[attr->type].it_caps	= attr->it_caps;
161 		return (IDM_STATUS_SUCCESS);
162 
163 	default:
164 		cmn_err(CE_NOTE, "idm: unknown transport type (0x%x) in "
165 		    "idm_transport_register", attr->type);
166 		return (IDM_STATUS_SUCCESS);
167 	}
168 }
169 
170 /*
171  * idm_ini_conn_create
172  *
173  * This function is invoked by the iSCSI layer to create a connection context.
174  * This does not actually establish the socket connection.
175  *
176  * cr - Connection request parameters
177  * new_con - Output parameter that contains the new request if successful
178  *
179  */
180 idm_status_t
181 idm_ini_conn_create(idm_conn_req_t *cr, idm_conn_t **new_con)
182 {
183 	idm_transport_t		*it;
184 	idm_conn_t		*ic;
185 	int			rc;
186 
187 	it = idm_transport_lookup(cr);
188 
189 retry:
190 	ic = idm_conn_create_common(CONN_TYPE_INI, it->it_type,
191 	    &cr->icr_conn_ops);
192 
193 	bcopy(&cr->cr_ini_dst_addr, &ic->ic_ini_dst_addr,
194 	    sizeof (cr->cr_ini_dst_addr));
195 
196 	/* create the transport-specific connection components */
197 	rc = it->it_ops->it_ini_conn_create(cr, ic);
198 	if (rc != IDM_STATUS_SUCCESS) {
199 		/* cleanup the failed connection */
200 		idm_conn_destroy_common(ic);
201 
202 		/*
203 		 * It is possible for an IB client to connect to
204 		 * an ethernet-only client via an IB-eth gateway.
205 		 * Therefore, if we are attempting to use iSER and
206 		 * fail, retry with sockets before ultimately
207 		 * failing the connection.
208 		 */
209 		if (it->it_type == IDM_TRANSPORT_TYPE_ISER) {
210 			it = &idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS];
211 			goto retry;
212 		}
213 
214 		return (IDM_STATUS_FAIL);
215 	}
216 
217 	*new_con = ic;
218 
219 	mutex_enter(&idm.idm_global_mutex);
220 	list_insert_tail(&idm.idm_ini_conn_list, ic);
221 	mutex_exit(&idm.idm_global_mutex);
222 
223 	return (IDM_STATUS_SUCCESS);
224 }
225 
226 /*
227  * idm_ini_conn_destroy
228  *
229  * Releases any resources associated with the connection.  This is the
230  * complement to idm_ini_conn_create.
231  * ic - idm_conn_t structure representing the relevant connection
232  *
233  */
234 void
235 idm_ini_conn_destroy_task(void *ic_void)
236 {
237 	idm_conn_t *ic = ic_void;
238 
239 	ic->ic_transport_ops->it_ini_conn_destroy(ic);
240 	idm_conn_destroy_common(ic);
241 }
242 
243 void
244 idm_ini_conn_destroy(idm_conn_t *ic)
245 {
246 	/*
247 	 * It's reasonable for the initiator to call idm_ini_conn_destroy
248 	 * from within the context of the CN_CONNECT_DESTROY notification.
249 	 * That's a problem since we want to destroy the taskq for the
250 	 * state machine associated with the connection.  Remove the
251 	 * connection from the list right away then handle the remaining
252 	 * work via the idm_global_taskq.
253 	 */
254 	mutex_enter(&idm.idm_global_mutex);
255 	list_remove(&idm.idm_ini_conn_list, ic);
256 	mutex_exit(&idm.idm_global_mutex);
257 
258 	if (taskq_dispatch(idm.idm_global_taskq,
259 	    &idm_ini_conn_destroy_task, ic, TQ_SLEEP) == NULL) {
260 		cmn_err(CE_WARN,
261 		    "idm_ini_conn_destroy: Couldn't dispatch task");
262 	}
263 }
264 
265 /*
266  * idm_ini_conn_connect
267  *
268  * Establish connection to the remote system identified in idm_conn_t.
269  * The connection parameters including the remote IP address were established
270  * in the call to idm_ini_conn_create.  The IDM state machine will
271  * perform client notifications as necessary to prompt the initiator through
272  * the login process.  IDM also keeps a timer running so that if the login
273  * process doesn't complete in a timely manner it will fail.
274  *
275  * ic - idm_conn_t structure representing the relevant connection
276  *
277  * Returns success if the connection was established, otherwise some kind
278  * of meaningful error code.
279  *
280  * Upon return the login has either failed or is loggin in (ffp)
281  */
282 idm_status_t
283 idm_ini_conn_connect(idm_conn_t *ic)
284 {
285 	idm_status_t	rc;
286 
287 	rc = idm_conn_sm_init(ic);
288 	if (rc != IDM_STATUS_SUCCESS) {
289 		return (ic->ic_conn_sm_status);
290 	}
291 
292 	/* Hold connection until we return */
293 	idm_conn_hold(ic);
294 
295 	/* Kick state machine */
296 	idm_conn_event(ic, CE_CONNECT_REQ, NULL);
297 
298 	/* Wait for login flag */
299 	mutex_enter(&ic->ic_state_mutex);
300 	while (!(ic->ic_state_flags & CF_LOGIN_READY) &&
301 	    !(ic->ic_state_flags & CF_ERROR)) {
302 		cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex);
303 	}
304 
305 	/*
306 	 * The CN_READY_TO_LOGIN and/or the CN_CONNECT_FAIL call to
307 	 * idm_notify_client has already been generated by the idm conn
308 	 * state machine.  If connection fails any time after this
309 	 * check, we will detect it in iscsi_login.
310 	 */
311 	if (ic->ic_state_flags & CF_ERROR) {
312 		rc = ic->ic_conn_sm_status;
313 	}
314 	mutex_exit(&ic->ic_state_mutex);
315 	idm_conn_rele(ic);
316 
317 	return (rc);
318 }
319 
320 /*
321  * idm_ini_conn_disconnect
322  *
323  * Forces a connection (previously established using idm_ini_conn_connect)
324  * to perform a controlled shutdown, cleaning up any outstanding requests.
325  *
326  * ic - idm_conn_t structure representing the relevant connection
327  *
328  * This is asynchronous and will return before the connection is properly
329  * shutdown
330  */
331 /* ARGSUSED */
332 void
333 idm_ini_conn_disconnect(idm_conn_t *ic)
334 {
335 	idm_conn_event(ic, CE_TRANSPORT_FAIL, NULL);
336 }
337 
338 /*
339  * idm_ini_conn_disconnect_wait
340  *
341  * Forces a connection (previously established using idm_ini_conn_connect)
342  * to perform a controlled shutdown.  Blocks until the connection is
343  * disconnected.
344  *
345  * ic - idm_conn_t structure representing the relevant connection
346  */
347 /* ARGSUSED */
348 void
349 idm_ini_conn_disconnect_sync(idm_conn_t *ic)
350 {
351 	mutex_enter(&ic->ic_state_mutex);
352 	if ((ic->ic_state != CS_S9_INIT_ERROR) &&
353 	    (ic->ic_state != CS_S11_COMPLETE)) {
354 		idm_conn_event_locked(ic, CE_TRANSPORT_FAIL, NULL, CT_NONE);
355 		while ((ic->ic_state != CS_S9_INIT_ERROR) &&
356 		    (ic->ic_state != CS_S11_COMPLETE))
357 			cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex);
358 	}
359 	mutex_exit(&ic->ic_state_mutex);
360 }
361 
362 /*
363  * idm_tgt_svc_create
364  *
365  * The target calls this service to obtain a service context for each available
366  * transport, starting a service of each type related to the IP address and port
367  * passed. The idm_svc_req_t contains the service parameters.
368  */
369 idm_status_t
370 idm_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t **new_svc)
371 {
372 	idm_transport_type_t	type;
373 	idm_transport_t		*it;
374 	idm_svc_t		*is;
375 	int			rc;
376 
377 	*new_svc = NULL;
378 	is = kmem_zalloc(sizeof (idm_svc_t), KM_SLEEP);
379 
380 	/* Initialize transport-agnostic components of the service handle */
381 	is->is_svc_req = *sr;
382 	mutex_init(&is->is_mutex, NULL, MUTEX_DEFAULT, NULL);
383 	cv_init(&is->is_cv, NULL, CV_DEFAULT, NULL);
384 	mutex_init(&is->is_count_mutex, NULL, MUTEX_DEFAULT, NULL);
385 	cv_init(&is->is_count_cv, NULL, CV_DEFAULT, NULL);
386 	idm_refcnt_init(&is->is_refcnt, is);
387 
388 	/*
389 	 * Make sure all available transports are setup.  We call this now
390 	 * instead of at initialization time in case IB has become available
391 	 * since we started (hotplug, etc).
392 	 */
393 	idm_transport_setup(sr->sr_li, B_FALSE);
394 
395 	/*
396 	 * Loop through the transports, configuring the transport-specific
397 	 * components of each one.
398 	 */
399 	for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
400 
401 		it = &idm_transport_list[type];
402 		/*
403 		 * If it_ops is NULL then the transport is unconfigured
404 		 * and we shouldn't try to start the service.
405 		 */
406 		if (it->it_ops == NULL) {
407 			continue;
408 		}
409 
410 		rc = it->it_ops->it_tgt_svc_create(sr, is);
411 		if (rc != IDM_STATUS_SUCCESS) {
412 			/* Teardown any configured services */
413 			while (type--) {
414 				it = &idm_transport_list[type];
415 				if (it->it_ops == NULL) {
416 					continue;
417 				}
418 				it->it_ops->it_tgt_svc_destroy(is);
419 			}
420 			/* Free the svc context and return */
421 			kmem_free(is, sizeof (idm_svc_t));
422 			return (rc);
423 		}
424 	}
425 
426 	*new_svc = is;
427 
428 	mutex_enter(&idm.idm_global_mutex);
429 	list_insert_tail(&idm.idm_tgt_svc_list, is);
430 	mutex_exit(&idm.idm_global_mutex);
431 
432 	return (IDM_STATUS_SUCCESS);
433 }
434 
435 /*
436  * idm_tgt_svc_destroy
437  *
438  * is - idm_svc_t returned by the call to idm_tgt_svc_create
439  *
440  * Cleanup any resources associated with the idm_svc_t.
441  */
442 void
443 idm_tgt_svc_destroy(idm_svc_t *is)
444 {
445 	idm_transport_type_t	type;
446 	idm_transport_t		*it;
447 
448 	mutex_enter(&idm.idm_global_mutex);
449 	/* remove this service from the global list */
450 	list_remove(&idm.idm_tgt_svc_list, is);
451 	/* wakeup any waiters for service change */
452 	cv_broadcast(&idm.idm_tgt_svc_cv);
453 	mutex_exit(&idm.idm_global_mutex);
454 
455 	/* teardown each transport-specific service */
456 	for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
457 		it = &idm_transport_list[type];
458 		if (it->it_ops == NULL) {
459 			continue;
460 		}
461 
462 		it->it_ops->it_tgt_svc_destroy(is);
463 	}
464 
465 	/* tear down the svc resources */
466 	idm_refcnt_destroy(&is->is_refcnt);
467 	cv_destroy(&is->is_count_cv);
468 	mutex_destroy(&is->is_count_mutex);
469 	cv_destroy(&is->is_cv);
470 	mutex_destroy(&is->is_mutex);
471 
472 	/* free the svc handle */
473 	kmem_free(is, sizeof (idm_svc_t));
474 }
475 
476 void
477 idm_tgt_svc_hold(idm_svc_t *is)
478 {
479 	idm_refcnt_hold(&is->is_refcnt);
480 }
481 
482 void
483 idm_tgt_svc_rele_and_destroy(idm_svc_t *is)
484 {
485 	idm_refcnt_rele_and_destroy(&is->is_refcnt,
486 	    (idm_refcnt_cb_t *)&idm_tgt_svc_destroy);
487 }
488 
489 /*
490  * idm_tgt_svc_online
491  *
492  * is - idm_svc_t returned by the call to idm_tgt_svc_create
493  *
494  * Online each transport service, as we want this target to be accessible
495  * via any configured transport.
496  *
497  * When the initiator establishes a new connection to the target, IDM will
498  * call the "new connect" callback defined in the idm_svc_req_t structure
499  * and it will pass an idm_conn_t structure representing that new connection.
500  */
501 idm_status_t
502 idm_tgt_svc_online(idm_svc_t *is)
503 {
504 
505 	idm_transport_type_t	type, last_type;
506 	idm_transport_t		*it;
507 	int			rc = IDM_STATUS_SUCCESS;
508 
509 	mutex_enter(&is->is_mutex);
510 	if (is->is_online == 0) {
511 		/* Walk through each of the transports and online them */
512 		for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
513 			it = &idm_transport_list[type];
514 			if (it->it_ops == NULL) {
515 				/* transport is not registered */
516 				continue;
517 			}
518 
519 			mutex_exit(&is->is_mutex);
520 			rc = it->it_ops->it_tgt_svc_online(is);
521 			mutex_enter(&is->is_mutex);
522 			if (rc != IDM_STATUS_SUCCESS) {
523 				last_type = type;
524 				break;
525 			}
526 		}
527 		if (rc != IDM_STATUS_SUCCESS) {
528 			/*
529 			 * The last transport failed to online.
530 			 * Offline any transport onlined above and
531 			 * do not online the target.
532 			 */
533 			for (type = 0; type < last_type; type++) {
534 				it = &idm_transport_list[type];
535 				if (it->it_ops == NULL) {
536 					/* transport is not registered */
537 					continue;
538 				}
539 
540 				mutex_exit(&is->is_mutex);
541 				it->it_ops->it_tgt_svc_offline(is);
542 				mutex_enter(&is->is_mutex);
543 			}
544 		} else {
545 			/* Target service now online */
546 			is->is_online = 1;
547 		}
548 	} else {
549 		/* Target service already online, just bump the count */
550 		is->is_online++;
551 	}
552 	mutex_exit(&is->is_mutex);
553 
554 	return (rc);
555 }
556 
557 /*
558  * idm_tgt_svc_offline
559  *
560  * is - idm_svc_t returned by the call to idm_tgt_svc_create
561  *
562  * Shutdown any online target services.
563  */
564 void
565 idm_tgt_svc_offline(idm_svc_t *is)
566 {
567 	idm_transport_type_t	type;
568 	idm_transport_t		*it;
569 
570 	mutex_enter(&is->is_mutex);
571 	is->is_online--;
572 	if (is->is_online == 0) {
573 		/* Walk through each of the transports and offline them */
574 		for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
575 			it = &idm_transport_list[type];
576 			if (it->it_ops == NULL) {
577 				/* transport is not registered */
578 				continue;
579 			}
580 
581 			mutex_exit(&is->is_mutex);
582 			it->it_ops->it_tgt_svc_offline(is);
583 			mutex_enter(&is->is_mutex);
584 		}
585 	}
586 	mutex_exit(&is->is_mutex);
587 }
588 
589 /*
590  * idm_tgt_svc_lookup
591  *
592  * Lookup a service instance listening on the specified port
593  */
594 
595 idm_svc_t *
596 idm_tgt_svc_lookup(uint16_t port)
597 {
598 	idm_svc_t *result;
599 
600 retry:
601 	mutex_enter(&idm.idm_global_mutex);
602 	for (result = list_head(&idm.idm_tgt_svc_list);
603 	    result != NULL;
604 	    result = list_next(&idm.idm_tgt_svc_list, result)) {
605 		if (result->is_svc_req.sr_port == port) {
606 			if (result->is_online == 0) {
607 				/*
608 				 * A service exists on this port, but it
609 				 * is going away, wait for it to cleanup.
610 				 */
611 				cv_wait(&idm.idm_tgt_svc_cv,
612 				    &idm.idm_global_mutex);
613 				mutex_exit(&idm.idm_global_mutex);
614 				goto retry;
615 			}
616 			idm_tgt_svc_hold(result);
617 			mutex_exit(&idm.idm_global_mutex);
618 			return (result);
619 		}
620 	}
621 	mutex_exit(&idm.idm_global_mutex);
622 
623 	return (NULL);
624 }
625 
626 /*
627  * idm_negotiate_key_values()
628  * Give IDM level a chance to negotiate any login parameters it should own.
629  *  -- leave unhandled parameters alone on request_nvl
630  *  -- move all handled parameters to response_nvl with an appropriate response
631  *  -- also add an entry to negotiated_nvl for any accepted parameters
632  */
633 kv_status_t
634 idm_negotiate_key_values(idm_conn_t *ic, nvlist_t *request_nvl,
635     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
636 {
637 	ASSERT(ic->ic_transport_ops != NULL);
638 	return (ic->ic_transport_ops->it_negotiate_key_values(ic,
639 	    request_nvl, response_nvl, negotiated_nvl));
640 }
641 
642 /*
643  * idm_notice_key_values()
644  * Activate at the IDM level any parameters that have been negotiated.
645  * Passes the set of key value pairs to the transport for activation.
646  * This will be invoked as the connection is entering full-feature mode.
647  */
648 void
649 idm_notice_key_values(idm_conn_t *ic, nvlist_t *negotiated_nvl)
650 {
651 	ASSERT(ic->ic_transport_ops != NULL);
652 	ic->ic_transport_ops->it_notice_key_values(ic, negotiated_nvl);
653 }
654 
655 /*
656  * idm_declare_key_values()
657  * Activate an operational set of declarative parameters from the config_nvl,
658  * and return the selected values in the outgoing_nvl.
659  */
660 kv_status_t
661 idm_declare_key_values(idm_conn_t *ic, nvlist_t *config_nvl,
662     nvlist_t *outgoing_nvl)
663 {
664 	ASSERT(ic->ic_transport_ops != NULL);
665 	return (ic->ic_transport_ops->it_declare_key_values(ic, config_nvl,
666 	    outgoing_nvl));
667 }
668 
669 /*
670  * idm_buf_tx_to_ini
671  *
672  * This is IDM's implementation of the 'Put_Data' operational primitive.
673  *
674  * This function is invoked by a target iSCSI layer to request its local
675  * Datamover layer to transmit the Data-In PDU to the peer iSCSI layer
676  * on the remote iSCSI node. The I/O buffer represented by 'idb' is
677  * transferred to the initiator associated with task 'idt'. The connection
678  * info, contents of the Data-In PDU header, the DataDescriptorIn, BHS,
679  * and the callback (idb->idb_buf_cb) at transfer completion are
680  * provided as input.
681  *
682  * This data transfer takes place transparently to the remote iSCSI layer,
683  * i.e. without its participation.
684  *
685  * Using sockets, IDM implements the data transfer by segmenting the data
686  * buffer into appropriately sized iSCSI PDUs and transmitting them to the
687  * initiator. iSER performs the transfer using RDMA write.
688  *
689  */
690 idm_status_t
691 idm_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb,
692     uint32_t offset, uint32_t xfer_len,
693     idm_buf_cb_t idb_buf_cb, void *cb_arg)
694 {
695 	idm_status_t rc;
696 
697 	idb->idb_bufoffset = offset;
698 	idb->idb_xfer_len = xfer_len;
699 	idb->idb_buf_cb = idb_buf_cb;
700 	idb->idb_cb_arg = cb_arg;
701 	gethrestime(&idb->idb_xfer_start);
702 
703 	/*
704 	 * Buffer should not contain the pattern.  If the pattern is
705 	 * present then we've been asked to transmit initialized data
706 	 */
707 	IDM_BUFPAT_CHECK(idb, xfer_len, BP_CHECK_ASSERT);
708 
709 	mutex_enter(&idt->idt_mutex);
710 	switch (idt->idt_state) {
711 	case TASK_ACTIVE:
712 		idt->idt_tx_to_ini_start++;
713 		idm_task_hold(idt);
714 		idm_buf_bind_in_locked(idt, idb);
715 		idb->idb_in_transport = B_TRUE;
716 		rc = (*idt->idt_ic->ic_transport_ops->it_buf_tx_to_ini)
717 		    (idt, idb);
718 		return (rc);
719 
720 	case TASK_SUSPENDING:
721 	case TASK_SUSPENDED:
722 		/*
723 		 * Bind buffer but don't start a transfer since the task
724 		 * is suspended
725 		 */
726 		idm_buf_bind_in_locked(idt, idb);
727 		mutex_exit(&idt->idt_mutex);
728 		return (IDM_STATUS_SUCCESS);
729 
730 	case TASK_ABORTING:
731 	case TASK_ABORTED:
732 		/*
733 		 * Once the task is aborted, any buffers added to the
734 		 * idt_inbufv will never get cleaned up, so just return
735 		 * SUCCESS.  The buffer should get cleaned up by the
736 		 * client or framework once task_aborted has completed.
737 		 */
738 		mutex_exit(&idt->idt_mutex);
739 		return (IDM_STATUS_SUCCESS);
740 
741 	default:
742 		ASSERT(0);
743 		break;
744 	}
745 	mutex_exit(&idt->idt_mutex);
746 
747 	return (IDM_STATUS_FAIL);
748 }
749 
750 /*
751  * idm_buf_rx_from_ini
752  *
753  * This is IDM's implementation of the 'Get_Data' operational primitive.
754  *
755  * This function is invoked by a target iSCSI layer to request its local
756  * Datamover layer to retrieve certain data identified by the R2T PDU from the
757  * peer iSCSI layer on the remote node. The retrieved Data-Out PDU will be
758  * mapped to the respective buffer by the task tags (ITT & TTT).
759  * The connection information, contents of an R2T PDU, DataDescriptor, BHS, and
760  * the callback (idb->idb_buf_cb) notification for data transfer completion are
761  * are provided as input.
762  *
763  * When an iSCSI node sends an R2T PDU to its local Datamover layer, the local
764  * Datamover layer, the local and remote Datamover layers transparently bring
765  * about the data transfer requested by the R2T PDU, without the participation
766  * of the iSCSI layers.
767  *
768  * Using sockets, IDM transmits an R2T PDU for each buffer and the rx_data_out()
769  * assembles the Data-Out PDUs into the buffer. iSER uses RDMA read.
770  *
771  */
772 idm_status_t
773 idm_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb,
774     uint32_t offset, uint32_t xfer_len,
775     idm_buf_cb_t idb_buf_cb, void *cb_arg)
776 {
777 	idm_status_t rc;
778 
779 	idb->idb_bufoffset = offset;
780 	idb->idb_xfer_len = xfer_len;
781 	idb->idb_buf_cb = idb_buf_cb;
782 	idb->idb_cb_arg = cb_arg;
783 	gethrestime(&idb->idb_xfer_start);
784 
785 	/*
786 	 * "In" buf list is for "Data In" PDU's, "Out" buf list is for
787 	 * "Data Out" PDU's
788 	 */
789 	mutex_enter(&idt->idt_mutex);
790 	switch (idt->idt_state) {
791 	case TASK_ACTIVE:
792 		idt->idt_rx_from_ini_start++;
793 		idm_task_hold(idt);
794 		idm_buf_bind_out_locked(idt, idb);
795 		idb->idb_in_transport = B_TRUE;
796 		rc = (*idt->idt_ic->ic_transport_ops->it_buf_rx_from_ini)
797 		    (idt, idb);
798 		return (rc);
799 	case TASK_SUSPENDING:
800 	case TASK_SUSPENDED:
801 	case TASK_ABORTING:
802 	case TASK_ABORTED:
803 		/*
804 		 * Bind buffer but don't start a transfer since the task
805 		 * is suspended
806 		 */
807 		idm_buf_bind_out_locked(idt, idb);
808 		mutex_exit(&idt->idt_mutex);
809 		return (IDM_STATUS_SUCCESS);
810 	default:
811 		ASSERT(0);
812 		break;
813 	}
814 	mutex_exit(&idt->idt_mutex);
815 
816 	return (IDM_STATUS_FAIL);
817 }
818 
819 /*
820  * idm_buf_tx_to_ini_done
821  *
822  * The transport calls this after it has completed a transfer requested by
823  * a call to transport_buf_tx_to_ini
824  *
825  * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning.
826  * idt may be freed after the call to idb->idb_buf_cb.
827  */
828 void
829 idm_buf_tx_to_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status)
830 {
831 	ASSERT(mutex_owned(&idt->idt_mutex));
832 	idb->idb_in_transport = B_FALSE;
833 	idb->idb_tx_thread = B_FALSE;
834 	idt->idt_tx_to_ini_done++;
835 	gethrestime(&idb->idb_xfer_done);
836 
837 	/*
838 	 * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or
839 	 * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes
840 	 * to 0.
841 	 */
842 	idm_task_rele(idt);
843 	idb->idb_status = status;
844 
845 	switch (idt->idt_state) {
846 	case TASK_ACTIVE:
847 		idt->idt_ic->ic_timestamp = ddi_get_lbolt();
848 		idm_buf_unbind_in_locked(idt, idb);
849 		mutex_exit(&idt->idt_mutex);
850 		(*idb->idb_buf_cb)(idb, status);
851 		return;
852 	case TASK_SUSPENDING:
853 	case TASK_SUSPENDED:
854 	case TASK_ABORTING:
855 	case TASK_ABORTED:
856 		/*
857 		 * To keep things simple we will ignore the case where the
858 		 * transfer was successful and leave all buffers bound to the
859 		 * task.  This allows us to also ignore the case where we've
860 		 * been asked to abort a task but the last transfer of the
861 		 * task has completed.  IDM has no idea whether this was, in
862 		 * fact, the last transfer of the task so it would be difficult
863 		 * to handle this case.  Everything should get sorted out again
864 		 * after task reassignment is complete.
865 		 *
866 		 * In the case of TASK_ABORTING we could conceivably call the
867 		 * buffer callback here but the timing of when the client's
868 		 * client_task_aborted callback is invoked vs. when the client's
869 		 * buffer callback gets invoked gets sticky.  We don't want
870 		 * the client to here from us again after the call to
871 		 * client_task_aborted() but we don't want to give it a bunch
872 		 * of failed buffer transfers until we've called
873 		 * client_task_aborted().  Instead we'll just leave all the
874 		 * buffers bound and allow the client to cleanup.
875 		 */
876 		break;
877 	default:
878 		ASSERT(0);
879 	}
880 	mutex_exit(&idt->idt_mutex);
881 }
882 
883 /*
884  * idm_buf_rx_from_ini_done
885  *
886  * The transport calls this after it has completed a transfer requested by
887  * a call totransport_buf_tx_to_ini
888  *
889  * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning.
890  * idt may be freed after the call to idb->idb_buf_cb.
891  */
892 void
893 idm_buf_rx_from_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status)
894 {
895 	ASSERT(mutex_owned(&idt->idt_mutex));
896 	idb->idb_in_transport = B_FALSE;
897 	idt->idt_rx_from_ini_done++;
898 	gethrestime(&idb->idb_xfer_done);
899 
900 	/*
901 	 * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or
902 	 * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes
903 	 * to 0.
904 	 */
905 	idm_task_rele(idt);
906 	idb->idb_status = status;
907 
908 	if (status == IDM_STATUS_SUCCESS) {
909 		/*
910 		 * Buffer should not contain the pattern.  If it does then
911 		 * we did not get the data from the remote host.
912 		 */
913 		IDM_BUFPAT_CHECK(idb, idb->idb_xfer_len, BP_CHECK_ASSERT);
914 	}
915 
916 	switch (idt->idt_state) {
917 	case TASK_ACTIVE:
918 		idt->idt_ic->ic_timestamp = ddi_get_lbolt();
919 		idm_buf_unbind_out_locked(idt, idb);
920 		mutex_exit(&idt->idt_mutex);
921 		(*idb->idb_buf_cb)(idb, status);
922 		return;
923 	case TASK_SUSPENDING:
924 	case TASK_SUSPENDED:
925 	case TASK_ABORTING:
926 	case TASK_ABORTED:
927 		/*
928 		 * To keep things simple we will ignore the case where the
929 		 * transfer was successful and leave all buffers bound to the
930 		 * task.  This allows us to also ignore the case where we've
931 		 * been asked to abort a task but the last transfer of the
932 		 * task has completed.  IDM has no idea whether this was, in
933 		 * fact, the last transfer of the task so it would be difficult
934 		 * to handle this case.  Everything should get sorted out again
935 		 * after task reassignment is complete.
936 		 *
937 		 * In the case of TASK_ABORTING we could conceivably call the
938 		 * buffer callback here but the timing of when the client's
939 		 * client_task_aborted callback is invoked vs. when the client's
940 		 * buffer callback gets invoked gets sticky.  We don't want
941 		 * the client to here from us again after the call to
942 		 * client_task_aborted() but we don't want to give it a bunch
943 		 * of failed buffer transfers until we've called
944 		 * client_task_aborted().  Instead we'll just leave all the
945 		 * buffers bound and allow the client to cleanup.
946 		 */
947 		break;
948 	default:
949 		ASSERT(0);
950 	}
951 	mutex_exit(&idt->idt_mutex);
952 }
953 
954 /*
955  * idm_buf_alloc
956  *
957  * Allocates a buffer handle and registers it for use with the transport
958  * layer. If a buffer is not passed on bufptr, the buffer will be allocated
959  * as well as the handle.
960  *
961  * ic		- connection on which the buffer will be transferred
962  * bufptr	- allocate memory for buffer if NULL, else assign to buffer
963  * buflen	- length of buffer
964  *
965  * Returns idm_buf_t handle if successful, otherwise NULL
966  */
967 idm_buf_t *
968 idm_buf_alloc(idm_conn_t *ic, void *bufptr, uint64_t buflen)
969 {
970 	idm_buf_t	*buf = NULL;
971 	int		rc;
972 
973 	ASSERT(ic != NULL);
974 	ASSERT(idm.idm_buf_cache != NULL);
975 	ASSERT(buflen > 0);
976 
977 	/* Don't allocate new buffers if we are not in FFP */
978 	mutex_enter(&ic->ic_state_mutex);
979 	if (!ic->ic_ffp) {
980 		mutex_exit(&ic->ic_state_mutex);
981 		return (NULL);
982 	}
983 
984 
985 	idm_conn_hold(ic);
986 	mutex_exit(&ic->ic_state_mutex);
987 
988 	buf = kmem_cache_alloc(idm.idm_buf_cache, KM_NOSLEEP);
989 	if (buf == NULL) {
990 		idm_conn_rele(ic);
991 		return (NULL);
992 	}
993 
994 	buf->idb_ic		= ic;
995 	buf->idb_buflen		= buflen;
996 	buf->idb_exp_offset	= 0;
997 	buf->idb_bufoffset	= 0;
998 	buf->idb_xfer_len 	= 0;
999 	buf->idb_magic		= IDM_BUF_MAGIC;
1000 	buf->idb_in_transport	= B_FALSE;
1001 	buf->idb_bufbcopy	= B_FALSE;
1002 
1003 	/*
1004 	 * If bufptr is NULL, we have an implicit request to allocate
1005 	 * memory for this IDM buffer handle and register it for use
1006 	 * with the transport. To simplify this, and to give more freedom
1007 	 * to the transport layer for it's own buffer management, both of
1008 	 * these actions will take place in the transport layer.
1009 	 * If bufptr is set, then the caller has allocated memory (or more
1010 	 * likely it's been passed from an upper layer), and we need only
1011 	 * register the buffer for use with the transport layer.
1012 	 */
1013 	if (bufptr == NULL) {
1014 		/*
1015 		 * Allocate a buffer from the transport layer (which
1016 		 * will also register the buffer for use).
1017 		 */
1018 		rc = ic->ic_transport_ops->it_buf_alloc(buf, buflen);
1019 		if (rc != 0) {
1020 			idm_conn_rele(ic);
1021 			kmem_cache_free(idm.idm_buf_cache, buf);
1022 			return (NULL);
1023 		}
1024 		/* Set the bufalloc'd flag */
1025 		buf->idb_bufalloc = B_TRUE;
1026 	} else {
1027 		/*
1028 		 * For large transfers, Set the passed bufptr into
1029 		 * the buf handle, and register the handle with the
1030 		 * transport layer. As memory registration with the
1031 		 * transport layer is a time/cpu intensive operation,
1032 		 * for small transfers (up to a pre-defined bcopy
1033 		 * threshold), use pre-registered memory buffers
1034 		 * and bcopy data at the appropriate time.
1035 		 */
1036 		buf->idb_buf = bufptr;
1037 
1038 		rc = ic->ic_transport_ops->it_buf_setup(buf);
1039 		if (rc != 0) {
1040 			idm_conn_rele(ic);
1041 			kmem_cache_free(idm.idm_buf_cache, buf);
1042 			return (NULL);
1043 		}
1044 		/*
1045 		 * The transport layer is now expected to set the idb_bufalloc
1046 		 * correctly to indicate if resources have been allocated.
1047 		 */
1048 	}
1049 
1050 	IDM_BUFPAT_SET(buf);
1051 
1052 	return (buf);
1053 }
1054 
1055 /*
1056  * idm_buf_free
1057  *
1058  * Release a buffer handle along with the associated buffer that was allocated
1059  * or assigned with idm_buf_alloc
1060  */
1061 void
1062 idm_buf_free(idm_buf_t *buf)
1063 {
1064 	idm_conn_t *ic = buf->idb_ic;
1065 
1066 
1067 	buf->idb_task_binding	= NULL;
1068 
1069 	if (buf->idb_bufalloc) {
1070 		ic->ic_transport_ops->it_buf_free(buf);
1071 	} else {
1072 		ic->ic_transport_ops->it_buf_teardown(buf);
1073 	}
1074 	kmem_cache_free(idm.idm_buf_cache, buf);
1075 	idm_conn_rele(ic);
1076 }
1077 
1078 /*
1079  * idm_buf_bind_in
1080  *
1081  * This function associates a buffer with a task. This is only for use by the
1082  * iSCSI initiator that will have only one buffer per transfer direction
1083  *
1084  */
1085 void
1086 idm_buf_bind_in(idm_task_t *idt, idm_buf_t *buf)
1087 {
1088 	mutex_enter(&idt->idt_mutex);
1089 	idm_buf_bind_in_locked(idt, buf);
1090 	mutex_exit(&idt->idt_mutex);
1091 }
1092 
1093 static void
1094 idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf)
1095 {
1096 	buf->idb_task_binding = idt;
1097 	buf->idb_ic = idt->idt_ic;
1098 	idm_listbuf_insert(&idt->idt_inbufv, buf);
1099 }
1100 
1101 void
1102 idm_buf_bind_out(idm_task_t *idt, idm_buf_t *buf)
1103 {
1104 	/*
1105 	 * For small transfers, the iSER transport delegates the IDM
1106 	 * layer to bcopy the SCSI Write data for faster IOPS.
1107 	 */
1108 	if (buf->idb_bufbcopy == B_TRUE) {
1109 
1110 		bcopy(buf->idb_bufptr, buf->idb_buf, buf->idb_buflen);
1111 	}
1112 	mutex_enter(&idt->idt_mutex);
1113 	idm_buf_bind_out_locked(idt, buf);
1114 	mutex_exit(&idt->idt_mutex);
1115 }
1116 
1117 static void
1118 idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf)
1119 {
1120 	buf->idb_task_binding = idt;
1121 	buf->idb_ic = idt->idt_ic;
1122 	idm_listbuf_insert(&idt->idt_outbufv, buf);
1123 }
1124 
1125 void
1126 idm_buf_unbind_in(idm_task_t *idt, idm_buf_t *buf)
1127 {
1128 	/*
1129 	 * For small transfers, the iSER transport delegates the IDM
1130 	 * layer to bcopy the SCSI Read data into the read buufer
1131 	 * for faster IOPS.
1132 	 */
1133 	if (buf->idb_bufbcopy == B_TRUE) {
1134 		bcopy(buf->idb_buf, buf->idb_bufptr, buf->idb_buflen);
1135 	}
1136 	mutex_enter(&idt->idt_mutex);
1137 	idm_buf_unbind_in_locked(idt, buf);
1138 	mutex_exit(&idt->idt_mutex);
1139 }
1140 
1141 static void
1142 idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf)
1143 {
1144 	list_remove(&idt->idt_inbufv, buf);
1145 }
1146 
1147 void
1148 idm_buf_unbind_out(idm_task_t *idt, idm_buf_t *buf)
1149 {
1150 	mutex_enter(&idt->idt_mutex);
1151 	idm_buf_unbind_out_locked(idt, buf);
1152 	mutex_exit(&idt->idt_mutex);
1153 }
1154 
1155 static void
1156 idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf)
1157 {
1158 	list_remove(&idt->idt_outbufv, buf);
1159 }
1160 
1161 /*
1162  * idm_buf_find() will lookup the idm_buf_t based on the relative offset in the
1163  * iSCSI PDU
1164  */
1165 idm_buf_t *
1166 idm_buf_find(void *lbuf, size_t data_offset)
1167 {
1168 	idm_buf_t	*idb;
1169 	list_t		*lst = (list_t *)lbuf;
1170 
1171 	/* iterate through the list to find the buffer */
1172 	for (idb = list_head(lst); idb != NULL; idb = list_next(lst, idb)) {
1173 
1174 		ASSERT((idb->idb_ic->ic_conn_type == CONN_TYPE_TGT) ||
1175 		    (idb->idb_bufoffset == 0));
1176 
1177 		if ((data_offset >= idb->idb_bufoffset) &&
1178 		    (data_offset < (idb->idb_bufoffset + idb->idb_buflen))) {
1179 
1180 			return (idb);
1181 		}
1182 	}
1183 
1184 	return (NULL);
1185 }
1186 
1187 void
1188 idm_bufpat_set(idm_buf_t *idb)
1189 {
1190 	idm_bufpat_t	*bufpat;
1191 	int		len, i;
1192 
1193 	len = idb->idb_buflen;
1194 	len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t);
1195 
1196 	bufpat = idb->idb_buf;
1197 	for (i = 0; i < len; i += sizeof (idm_bufpat_t)) {
1198 		bufpat->bufpat_idb = idb;
1199 		bufpat->bufpat_bufmagic = IDM_BUF_MAGIC;
1200 		bufpat->bufpat_offset = i;
1201 		bufpat++;
1202 	}
1203 }
1204 
1205 boolean_t
1206 idm_bufpat_check(idm_buf_t *idb, int check_len, idm_bufpat_check_type_t type)
1207 {
1208 	idm_bufpat_t	*bufpat;
1209 	int		len, i;
1210 
1211 	len = (type == BP_CHECK_QUICK) ? sizeof (idm_bufpat_t) : check_len;
1212 	len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t);
1213 	ASSERT(len <= idb->idb_buflen);
1214 	bufpat = idb->idb_buf;
1215 
1216 	/*
1217 	 * Don't check the pattern in buffers that came from outside IDM
1218 	 * (these will be buffers from the initiator that we opted not
1219 	 * to double-buffer)
1220 	 */
1221 	if (!idb->idb_bufalloc)
1222 		return (B_FALSE);
1223 
1224 	/*
1225 	 * Return true if we find the pattern anywhere in the buffer
1226 	 */
1227 	for (i = 0; i < len; i += sizeof (idm_bufpat_t)) {
1228 		if (BUFPAT_MATCH(bufpat, idb)) {
1229 			IDM_CONN_LOG(CE_WARN, "idm_bufpat_check found: "
1230 			    "idb %p bufpat %p "
1231 			    "bufpat_idb=%p bufmagic=%08x offset=%08x",
1232 			    (void *)idb, (void *)bufpat, bufpat->bufpat_idb,
1233 			    bufpat->bufpat_bufmagic, bufpat->bufpat_offset);
1234 			DTRACE_PROBE2(bufpat__pattern__found,
1235 			    idm_buf_t *, idb, idm_bufpat_t *, bufpat);
1236 			if (type == BP_CHECK_ASSERT) {
1237 				ASSERT(0);
1238 			}
1239 			return (B_TRUE);
1240 		}
1241 		bufpat++;
1242 	}
1243 
1244 	return (B_FALSE);
1245 }
1246 
1247 /*
1248  * idm_task_alloc
1249  *
1250  * This function will allocate a idm_task_t structure. A task tag is also
1251  * generated and saved in idt_tt. The task is not active.
1252  */
1253 idm_task_t *
1254 idm_task_alloc(idm_conn_t *ic)
1255 {
1256 	idm_task_t	*idt;
1257 
1258 	ASSERT(ic != NULL);
1259 
1260 	/* Don't allocate new tasks if we are not in FFP */
1261 	if (!ic->ic_ffp) {
1262 		return (NULL);
1263 	}
1264 	idt = kmem_cache_alloc(idm.idm_task_cache, KM_NOSLEEP);
1265 	if (idt == NULL) {
1266 		return (NULL);
1267 	}
1268 
1269 	ASSERT(list_is_empty(&idt->idt_inbufv));
1270 	ASSERT(list_is_empty(&idt->idt_outbufv));
1271 
1272 	mutex_enter(&ic->ic_state_mutex);
1273 	if (!ic->ic_ffp) {
1274 		mutex_exit(&ic->ic_state_mutex);
1275 		kmem_cache_free(idm.idm_task_cache, idt);
1276 		return (NULL);
1277 	}
1278 	idm_conn_hold(ic);
1279 	mutex_exit(&ic->ic_state_mutex);
1280 
1281 	idt->idt_state		= TASK_IDLE;
1282 	idt->idt_ic		= ic;
1283 	idt->idt_private 	= NULL;
1284 	idt->idt_exp_datasn	= 0;
1285 	idt->idt_exp_rttsn	= 0;
1286 	idt->idt_flags		= 0;
1287 	return (idt);
1288 }
1289 
1290 /*
1291  * idm_task_start
1292  *
1293  * Mark the task active and initialize some stats. The caller
1294  * sets up the idm_task_t structure with a prior call to idm_task_alloc().
1295  * The task service does not function as a task/work engine, it is the
1296  * responsibility of the initiator to start the data transfer and free the
1297  * resources.
1298  */
1299 void
1300 idm_task_start(idm_task_t *idt, uintptr_t handle)
1301 {
1302 	ASSERT(idt != NULL);
1303 
1304 	/* mark the task as ACTIVE */
1305 	idt->idt_state = TASK_ACTIVE;
1306 	idt->idt_client_handle = handle;
1307 	idt->idt_tx_to_ini_start = idt->idt_tx_to_ini_done =
1308 	    idt->idt_rx_from_ini_start = idt->idt_rx_from_ini_done =
1309 	    idt->idt_tx_bytes = idt->idt_rx_bytes = 0;
1310 }
1311 
1312 /*
1313  * idm_task_done
1314  *
1315  * This function sets the state to indicate that the task is no longer active.
1316  */
1317 void
1318 idm_task_done(idm_task_t *idt)
1319 {
1320 	ASSERT(idt != NULL);
1321 
1322 	mutex_enter(&idt->idt_mutex);
1323 	idt->idt_state = TASK_IDLE;
1324 	mutex_exit(&idt->idt_mutex);
1325 
1326 	/*
1327 	 * Although unlikely it is possible for a reference to come in after
1328 	 * the client has decided the task is over but before we've marked
1329 	 * the task idle.  One specific unavoidable scenario is the case where
1330 	 * received PDU with the matching ITT/TTT results in a successful
1331 	 * lookup of this task.  We are at the mercy of the remote node in
1332 	 * that case so we need to handle it.  Now that the task state
1333 	 * has changed no more references will occur so a simple call to
1334 	 * idm_refcnt_wait_ref should deal with the situation.
1335 	 */
1336 	idm_refcnt_wait_ref(&idt->idt_refcnt);
1337 	idm_refcnt_reset(&idt->idt_refcnt);
1338 }
1339 
1340 /*
1341  * idm_task_free
1342  *
1343  * This function will free the Task Tag and the memory allocated for the task
1344  * idm_task_done should be called prior to this call
1345  */
1346 void
1347 idm_task_free(idm_task_t *idt)
1348 {
1349 	idm_conn_t *ic;
1350 
1351 	ASSERT(idt != NULL);
1352 	ASSERT(idt->idt_refcnt.ir_refcnt == 0);
1353 	ASSERT(idt->idt_state == TASK_IDLE);
1354 
1355 	ic = idt->idt_ic;
1356 
1357 	/*
1358 	 * It's possible for items to still be in the idt_inbufv list if
1359 	 * they were added after idm_free_task_rsrc was called.  We rely on
1360 	 * STMF to free all buffers associated with the task however STMF
1361 	 * doesn't know that we have this reference to the buffers.
1362 	 * Use list_create so that we don't end up with stale references
1363 	 * to these buffers.
1364 	 */
1365 	list_create(&idt->idt_inbufv, sizeof (idm_buf_t),
1366 	    offsetof(idm_buf_t, idb_buflink));
1367 	list_create(&idt->idt_outbufv, sizeof (idm_buf_t),
1368 	    offsetof(idm_buf_t, idb_buflink));
1369 
1370 	kmem_cache_free(idm.idm_task_cache, idt);
1371 
1372 	idm_conn_rele(ic);
1373 }
1374 
1375 /*
1376  * idm_task_find_common
1377  *	common code for idm_task_find() and idm_task_find_and_complete()
1378  */
1379 /*ARGSUSED*/
1380 static idm_task_t *
1381 idm_task_find_common(idm_conn_t *ic, uint32_t itt, uint32_t ttt,
1382     boolean_t complete)
1383 {
1384 	uint32_t	tt, client_handle;
1385 	idm_task_t	*idt;
1386 
1387 	/*
1388 	 * Must match both itt and ttt.  The table is indexed by itt
1389 	 * for initiator connections and ttt for target connections.
1390 	 */
1391 	if (IDM_CONN_ISTGT(ic)) {
1392 		tt = ttt;
1393 		client_handle = itt;
1394 	} else {
1395 		tt = itt;
1396 		client_handle = ttt;
1397 	}
1398 
1399 	rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1400 	if (tt >= idm.idm_taskid_max) {
1401 		rw_exit(&idm.idm_taskid_table_lock);
1402 		return (NULL);
1403 	}
1404 
1405 	idt = idm.idm_taskid_table[tt];
1406 
1407 	if (idt != NULL) {
1408 		mutex_enter(&idt->idt_mutex);
1409 		if ((idt->idt_state != TASK_ACTIVE) ||
1410 		    (idt->idt_ic != ic) ||
1411 		    (IDM_CONN_ISTGT(ic) &&
1412 		    (idt->idt_client_handle != client_handle))) {
1413 			/*
1414 			 * Task doesn't match or task is aborting and
1415 			 * we don't want any more references.
1416 			 */
1417 			if ((idt->idt_ic != ic) &&
1418 			    (idt->idt_state == TASK_ACTIVE) &&
1419 			    (IDM_CONN_ISINI(ic) || idt->idt_client_handle ==
1420 			    client_handle)) {
1421 				IDM_CONN_LOG(CE_WARN,
1422 				"idm_task_find: wrong connection %p != %p",
1423 				    (void *)ic, (void *)idt->idt_ic);
1424 			}
1425 			mutex_exit(&idt->idt_mutex);
1426 			rw_exit(&idm.idm_taskid_table_lock);
1427 			return (NULL);
1428 		}
1429 		idm_task_hold(idt);
1430 		/*
1431 		 * Set the task state to TASK_COMPLETE so it can no longer
1432 		 * be found or aborted.
1433 		 */
1434 		if (B_TRUE == complete)
1435 			idt->idt_state = TASK_COMPLETE;
1436 		mutex_exit(&idt->idt_mutex);
1437 	}
1438 	rw_exit(&idm.idm_taskid_table_lock);
1439 
1440 	return (idt);
1441 }
1442 
1443 /*
1444  * This function looks up a task by task tag.
1445  */
1446 idm_task_t *
1447 idm_task_find(idm_conn_t *ic, uint32_t itt, uint32_t ttt)
1448 {
1449 	return (idm_task_find_common(ic, itt, ttt, B_FALSE));
1450 }
1451 
1452 /*
1453  * This function looks up a task by task tag. If found, the task state
1454  * is atomically set to TASK_COMPLETE so it can longer be found or aborted.
1455  */
1456 idm_task_t *
1457 idm_task_find_and_complete(idm_conn_t *ic, uint32_t itt, uint32_t ttt)
1458 {
1459 	return (idm_task_find_common(ic, itt, ttt, B_TRUE));
1460 }
1461 
1462 /*
1463  * idm_task_find_by_handle
1464  *
1465  * This function looks up a task by the client-private idt_client_handle.
1466  *
1467  * This function should NEVER be called in the performance path.  It is
1468  * intended strictly for error recovery/task management.
1469  */
1470 /*ARGSUSED*/
1471 void *
1472 idm_task_find_by_handle(idm_conn_t *ic, uintptr_t handle)
1473 {
1474 	idm_task_t	*idt = NULL;
1475 	int		idx = 0;
1476 
1477 	rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1478 
1479 	for (idx = 0; idx < idm.idm_taskid_max; idx++) {
1480 		idt = idm.idm_taskid_table[idx];
1481 
1482 		if (idt == NULL)
1483 			continue;
1484 
1485 		mutex_enter(&idt->idt_mutex);
1486 
1487 		if (idt->idt_state != TASK_ACTIVE) {
1488 			/*
1489 			 * Task is either in suspend, abort, or already
1490 			 * complete.
1491 			 */
1492 			mutex_exit(&idt->idt_mutex);
1493 			continue;
1494 		}
1495 
1496 		if (idt->idt_client_handle == handle) {
1497 			idm_task_hold(idt);
1498 			mutex_exit(&idt->idt_mutex);
1499 			break;
1500 		}
1501 
1502 		mutex_exit(&idt->idt_mutex);
1503 	}
1504 
1505 	rw_exit(&idm.idm_taskid_table_lock);
1506 
1507 	if ((idt == NULL) || (idx == idm.idm_taskid_max))
1508 		return (NULL);
1509 
1510 	return (idt->idt_private);
1511 }
1512 
1513 void
1514 idm_task_hold(idm_task_t *idt)
1515 {
1516 	idm_refcnt_hold(&idt->idt_refcnt);
1517 }
1518 
1519 void
1520 idm_task_rele(idm_task_t *idt)
1521 {
1522 	idm_refcnt_rele(&idt->idt_refcnt);
1523 }
1524 
1525 void
1526 idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
1527 {
1528 	idm_task_t	*task;
1529 	int		idx;
1530 
1531 	/*
1532 	 * Passing NULL as the task indicates that all tasks
1533 	 * for this connection should be aborted.
1534 	 */
1535 	if (idt == NULL) {
1536 		/*
1537 		 * Only the connection state machine should ask for
1538 		 * all tasks to abort and this should never happen in FFP.
1539 		 */
1540 		ASSERT(!ic->ic_ffp);
1541 		rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1542 		for (idx = 0; idx < idm.idm_taskid_max; idx++) {
1543 			task = idm.idm_taskid_table[idx];
1544 			if (task == NULL)
1545 				continue;
1546 			mutex_enter(&task->idt_mutex);
1547 			if ((task->idt_state != TASK_IDLE) &&
1548 			    (task->idt_state != TASK_COMPLETE) &&
1549 			    (task->idt_ic == ic)) {
1550 				rw_exit(&idm.idm_taskid_table_lock);
1551 				idm_task_abort_one(ic, task, abort_type);
1552 				rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1553 			} else
1554 				mutex_exit(&task->idt_mutex);
1555 		}
1556 		rw_exit(&idm.idm_taskid_table_lock);
1557 	} else {
1558 		mutex_enter(&idt->idt_mutex);
1559 		idm_task_abort_one(ic, idt, abort_type);
1560 	}
1561 }
1562 
1563 static void
1564 idm_task_abort_unref_cb(void *ref)
1565 {
1566 	idm_task_t *idt = ref;
1567 
1568 	mutex_enter(&idt->idt_mutex);
1569 	switch (idt->idt_state) {
1570 	case TASK_SUSPENDING:
1571 		idt->idt_state = TASK_SUSPENDED;
1572 		mutex_exit(&idt->idt_mutex);
1573 		idm_task_aborted(idt, IDM_STATUS_SUSPENDED);
1574 		return;
1575 	case TASK_ABORTING:
1576 		idt->idt_state = TASK_ABORTED;
1577 		mutex_exit(&idt->idt_mutex);
1578 		idm_task_aborted(idt, IDM_STATUS_ABORTED);
1579 		return;
1580 	default:
1581 		mutex_exit(&idt->idt_mutex);
1582 		ASSERT(0);
1583 		break;
1584 	}
1585 }
1586 
1587 /*
1588  * Abort the idm task.
1589  *    Caller must hold the task mutex, which will be released before return
1590  */
1591 static void
1592 idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
1593 {
1594 	/* Caller must hold connection mutex */
1595 	ASSERT(mutex_owned(&idt->idt_mutex));
1596 	switch (idt->idt_state) {
1597 	case TASK_ACTIVE:
1598 		switch (abort_type) {
1599 		case AT_INTERNAL_SUSPEND:
1600 			/* Call transport to release any resources */
1601 			idt->idt_state = TASK_SUSPENDING;
1602 			mutex_exit(&idt->idt_mutex);
1603 			ic->ic_transport_ops->it_free_task_rsrc(idt);
1604 
1605 			/*
1606 			 * Wait for outstanding references.  When all
1607 			 * references are released the callback will call
1608 			 * idm_task_aborted().
1609 			 */
1610 			idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1611 			    &idm_task_abort_unref_cb);
1612 			return;
1613 		case AT_INTERNAL_ABORT:
1614 		case AT_TASK_MGMT_ABORT:
1615 			idt->idt_state = TASK_ABORTING;
1616 			mutex_exit(&idt->idt_mutex);
1617 			ic->ic_transport_ops->it_free_task_rsrc(idt);
1618 
1619 			/*
1620 			 * Wait for outstanding references.  When all
1621 			 * references are released the callback will call
1622 			 * idm_task_aborted().
1623 			 */
1624 			idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1625 			    &idm_task_abort_unref_cb);
1626 			return;
1627 		default:
1628 			ASSERT(0);
1629 		}
1630 		break;
1631 	case TASK_SUSPENDING:
1632 		/* Already called transport_free_task_rsrc(); */
1633 		switch (abort_type) {
1634 		case AT_INTERNAL_SUSPEND:
1635 			/* Already doing it */
1636 			break;
1637 		case AT_INTERNAL_ABORT:
1638 		case AT_TASK_MGMT_ABORT:
1639 			idt->idt_state = TASK_ABORTING;
1640 			break;
1641 		default:
1642 			ASSERT(0);
1643 		}
1644 		break;
1645 	case TASK_SUSPENDED:
1646 		/* Already called transport_free_task_rsrc(); */
1647 		switch (abort_type) {
1648 		case AT_INTERNAL_SUSPEND:
1649 			/* Already doing it */
1650 			break;
1651 		case AT_INTERNAL_ABORT:
1652 		case AT_TASK_MGMT_ABORT:
1653 			idt->idt_state = TASK_ABORTING;
1654 			mutex_exit(&idt->idt_mutex);
1655 
1656 			/*
1657 			 * We could probably call idm_task_aborted directly
1658 			 * here but we may be holding the conn lock. It's
1659 			 * easier to just switch contexts.  Even though
1660 			 * we shouldn't really have any references we'll
1661 			 * set the state to TASK_ABORTING instead of
1662 			 * TASK_ABORTED so we can use the same code path.
1663 			 */
1664 			idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1665 			    &idm_task_abort_unref_cb);
1666 			return;
1667 		default:
1668 			ASSERT(0);
1669 		}
1670 		break;
1671 	case TASK_ABORTING:
1672 	case TASK_ABORTED:
1673 		switch (abort_type) {
1674 		case AT_INTERNAL_SUSPEND:
1675 			/* We're already past this point... */
1676 		case AT_INTERNAL_ABORT:
1677 		case AT_TASK_MGMT_ABORT:
1678 			/* Already doing it */
1679 			break;
1680 		default:
1681 			ASSERT(0);
1682 		}
1683 		break;
1684 	case TASK_COMPLETE:
1685 		/*
1686 		 * In this case, let it go.  The status has already been
1687 		 * sent (which may or may not get successfully transmitted)
1688 		 * and we don't want to end up in a race between completing
1689 		 * the status PDU and marking the task suspended.
1690 		 */
1691 		break;
1692 	default:
1693 		ASSERT(0);
1694 	}
1695 	mutex_exit(&idt->idt_mutex);
1696 }
1697 
1698 static void
1699 idm_task_aborted(idm_task_t *idt, idm_status_t status)
1700 {
1701 	(*idt->idt_ic->ic_conn_ops.icb_task_aborted)(idt, status);
1702 }
1703 
1704 /*
1705  * idm_pdu_tx
1706  *
1707  * This is IDM's implementation of the 'Send_Control' operational primitive.
1708  * This function is invoked by an initiator iSCSI layer requesting the transfer
1709  * of a iSCSI command PDU or a target iSCSI layer requesting the transfer of a
1710  * iSCSI response PDU. The PDU will be transmitted as-is by the local Datamover
1711  * layer to the peer iSCSI layer in the remote iSCSI node. The connection info
1712  * and iSCSI PDU-specific qualifiers namely BHS, AHS, DataDescriptor and Size
1713  * are provided as input.
1714  *
1715  */
1716 void
1717 idm_pdu_tx(idm_pdu_t *pdu)
1718 {
1719 	idm_conn_t		*ic = pdu->isp_ic;
1720 	iscsi_async_evt_hdr_t	*async_evt;
1721 
1722 	/*
1723 	 * If we are in full-featured mode then route SCSI-related
1724 	 * commands to the appropriate function vector without checking
1725 	 * the connection state.  We will only be in full-feature mode
1726 	 * when we are in an acceptable state for SCSI PDU's.
1727 	 *
1728 	 * We also need to ensure that there are no PDU events outstanding
1729 	 * on the state machine.  Any non-SCSI PDU's received in full-feature
1730 	 * mode will result in PDU events and until these have been handled
1731 	 * we need to route all PDU's through the state machine as PDU
1732 	 * events to maintain ordering.
1733 	 *
1734 	 * Note that IDM cannot enter FFP mode until it processes in
1735 	 * its state machine the last xmit of the login process.
1736 	 * Hence, checking the IDM_PDU_LOGIN_TX flag here would be
1737 	 * superfluous.
1738 	 */
1739 	mutex_enter(&ic->ic_state_mutex);
1740 	if (ic->ic_ffp && (ic->ic_pdu_events == 0)) {
1741 		mutex_exit(&ic->ic_state_mutex);
1742 		switch (IDM_PDU_OPCODE(pdu)) {
1743 		case ISCSI_OP_SCSI_RSP:
1744 			/* Target only */
1745 			DTRACE_ISCSI_2(scsi__response, idm_conn_t *, ic,
1746 			    iscsi_scsi_rsp_hdr_t *,
1747 			    (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr);
1748 			idm_pdu_tx_forward(ic, pdu);
1749 			return;
1750 		case ISCSI_OP_SCSI_TASK_MGT_RSP:
1751 			/* Target only */
1752 			DTRACE_ISCSI_2(task__response, idm_conn_t *, ic,
1753 			    iscsi_text_rsp_hdr_t *,
1754 			    (iscsi_text_rsp_hdr_t *)pdu->isp_hdr);
1755 			idm_pdu_tx_forward(ic, pdu);
1756 			return;
1757 		case ISCSI_OP_SCSI_DATA_RSP:
1758 			/* Target only */
1759 			DTRACE_ISCSI_2(data__send, idm_conn_t *, ic,
1760 			    iscsi_data_rsp_hdr_t *,
1761 			    (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
1762 			idm_pdu_tx_forward(ic, pdu);
1763 			return;
1764 		case ISCSI_OP_RTT_RSP:
1765 			/* Target only */
1766 			DTRACE_ISCSI_2(data__request, idm_conn_t *, ic,
1767 			    iscsi_rtt_hdr_t *,
1768 			    (iscsi_rtt_hdr_t *)pdu->isp_hdr);
1769 			idm_pdu_tx_forward(ic, pdu);
1770 			return;
1771 		case ISCSI_OP_NOOP_IN:
1772 			/* Target only */
1773 			DTRACE_ISCSI_2(nop__send, idm_conn_t *, ic,
1774 			    iscsi_nop_in_hdr_t *,
1775 			    (iscsi_nop_in_hdr_t *)pdu->isp_hdr);
1776 			idm_pdu_tx_forward(ic, pdu);
1777 			return;
1778 		case ISCSI_OP_TEXT_RSP:
1779 			/* Target only */
1780 			DTRACE_ISCSI_2(text__response, idm_conn_t *, ic,
1781 			    iscsi_text_rsp_hdr_t *,
1782 			    (iscsi_text_rsp_hdr_t *)pdu->isp_hdr);
1783 			idm_pdu_tx_forward(ic, pdu);
1784 			return;
1785 		case ISCSI_OP_TEXT_CMD:
1786 		case ISCSI_OP_NOOP_OUT:
1787 		case ISCSI_OP_SCSI_CMD:
1788 		case ISCSI_OP_SCSI_DATA:
1789 		case ISCSI_OP_SCSI_TASK_MGT_MSG:
1790 			/* Initiator only */
1791 			idm_pdu_tx_forward(ic, pdu);
1792 			return;
1793 		default:
1794 			break;
1795 		}
1796 
1797 		mutex_enter(&ic->ic_state_mutex);
1798 	}
1799 
1800 	/*
1801 	 * Any PDU's processed outside of full-feature mode and non-SCSI
1802 	 * PDU's in full-feature mode are handled by generating an
1803 	 * event to the connection state machine.  The state machine
1804 	 * will validate the PDU against the current state and either
1805 	 * transmit the PDU if the opcode is allowed or handle an
1806 	 * error if the PDU is not allowed.
1807 	 *
1808 	 * This code-path will also generate any events that are implied
1809 	 * by the PDU opcode.  For example a "login response" with success
1810 	 * status generates a CE_LOGOUT_SUCCESS_SND event.
1811 	 */
1812 	switch (IDM_PDU_OPCODE(pdu)) {
1813 	case ISCSI_OP_LOGIN_CMD:
1814 		idm_conn_tx_pdu_event(ic, CE_LOGIN_SND, (uintptr_t)pdu);
1815 		break;
1816 	case ISCSI_OP_LOGIN_RSP:
1817 		DTRACE_ISCSI_2(login__response, idm_conn_t *, ic,
1818 		    iscsi_login_rsp_hdr_t *,
1819 		    (iscsi_login_rsp_hdr_t *)pdu->isp_hdr);
1820 		idm_parse_login_rsp(ic, pdu, /* Is RX */ B_FALSE);
1821 		break;
1822 	case ISCSI_OP_LOGOUT_CMD:
1823 		idm_parse_logout_req(ic, pdu, /* Is RX */ B_FALSE);
1824 		break;
1825 	case ISCSI_OP_LOGOUT_RSP:
1826 		DTRACE_ISCSI_2(logout__response, idm_conn_t *, ic,
1827 		    iscsi_logout_rsp_hdr_t *,
1828 		    (iscsi_logout_rsp_hdr_t *)pdu->isp_hdr);
1829 		idm_parse_logout_rsp(ic, pdu, /* Is RX */ B_FALSE);
1830 		break;
1831 	case ISCSI_OP_ASYNC_EVENT:
1832 		DTRACE_ISCSI_2(async__send, idm_conn_t *, ic,
1833 		    iscsi_async_evt_hdr_t *,
1834 		    (iscsi_async_evt_hdr_t *)pdu->isp_hdr);
1835 		async_evt = (iscsi_async_evt_hdr_t *)pdu->isp_hdr;
1836 		switch (async_evt->async_event) {
1837 		case ISCSI_ASYNC_EVENT_REQUEST_LOGOUT:
1838 			idm_conn_tx_pdu_event(ic, CE_ASYNC_LOGOUT_SND,
1839 			    (uintptr_t)pdu);
1840 			break;
1841 		case ISCSI_ASYNC_EVENT_DROPPING_CONNECTION:
1842 			idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_CONN_SND,
1843 			    (uintptr_t)pdu);
1844 			break;
1845 		case ISCSI_ASYNC_EVENT_DROPPING_ALL_CONNECTIONS:
1846 			idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_ALL_CONN_SND,
1847 			    (uintptr_t)pdu);
1848 			break;
1849 		case ISCSI_ASYNC_EVENT_SCSI_EVENT:
1850 		case ISCSI_ASYNC_EVENT_PARAM_NEGOTIATION:
1851 		default:
1852 			idm_conn_tx_pdu_event(ic, CE_MISC_TX,
1853 			    (uintptr_t)pdu);
1854 			break;
1855 		}
1856 		break;
1857 	case ISCSI_OP_SCSI_RSP:
1858 		/* Target only */
1859 		DTRACE_ISCSI_2(scsi__response, idm_conn_t *, ic,
1860 		    iscsi_scsi_rsp_hdr_t *,
1861 		    (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr);
1862 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1863 		break;
1864 	case ISCSI_OP_SCSI_TASK_MGT_RSP:
1865 		/* Target only */
1866 		DTRACE_ISCSI_2(task__response, idm_conn_t *, ic,
1867 		    iscsi_scsi_task_mgt_rsp_hdr_t *,
1868 		    (iscsi_scsi_task_mgt_rsp_hdr_t *)pdu->isp_hdr);
1869 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1870 		break;
1871 	case ISCSI_OP_SCSI_DATA_RSP:
1872 		/* Target only */
1873 		DTRACE_ISCSI_2(data__send, idm_conn_t *, ic,
1874 		    iscsi_data_rsp_hdr_t *,
1875 		    (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
1876 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1877 		break;
1878 	case ISCSI_OP_RTT_RSP:
1879 		/* Target only */
1880 		DTRACE_ISCSI_2(data__request, idm_conn_t *, ic,
1881 		    iscsi_rtt_hdr_t *,
1882 		    (iscsi_rtt_hdr_t *)pdu->isp_hdr);
1883 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1884 		break;
1885 	case ISCSI_OP_NOOP_IN:
1886 		/* Target only */
1887 		DTRACE_ISCSI_2(nop__send, idm_conn_t *, ic,
1888 		    iscsi_nop_in_hdr_t *,
1889 		    (iscsi_nop_in_hdr_t *)pdu->isp_hdr);
1890 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1891 		break;
1892 	case ISCSI_OP_TEXT_RSP:
1893 		/* Target only */
1894 		DTRACE_ISCSI_2(text__response, idm_conn_t *, ic,
1895 		    iscsi_text_rsp_hdr_t *,
1896 		    (iscsi_text_rsp_hdr_t *)pdu->isp_hdr);
1897 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1898 		break;
1899 		/* Initiator only */
1900 	case ISCSI_OP_SCSI_CMD:
1901 	case ISCSI_OP_SCSI_TASK_MGT_MSG:
1902 	case ISCSI_OP_SCSI_DATA:
1903 	case ISCSI_OP_NOOP_OUT:
1904 	case ISCSI_OP_TEXT_CMD:
1905 	case ISCSI_OP_SNACK_CMD:
1906 	case ISCSI_OP_REJECT_MSG:
1907 	default:
1908 		/*
1909 		 * Connection state machine will validate these PDU's against
1910 		 * the current state.  A PDU not allowed in the current
1911 		 * state will cause a protocol error.
1912 		 */
1913 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1914 		break;
1915 	}
1916 	mutex_exit(&ic->ic_state_mutex);
1917 }
1918 
1919 /*
1920  * Common allocation of a PDU along with memory for header and data.
1921  */
1922 static idm_pdu_t *
1923 idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen, int sleepflag)
1924 {
1925 	idm_pdu_t *result;
1926 
1927 	/*
1928 	 * IDM clients should cache these structures for performance
1929 	 * critical paths.  We can't cache effectively in IDM because we
1930 	 * don't know the correct header and data size.
1931 	 *
1932 	 * Valid header length is assumed to be hdrlen and valid data
1933 	 * length is assumed to be datalen.  isp_hdrlen and isp_datalen
1934 	 * can be adjusted after the PDU is returned if necessary.
1935 	 */
1936 	result = kmem_zalloc(sizeof (idm_pdu_t) + hdrlen + datalen, sleepflag);
1937 	if (result != NULL) {
1938 		/* For idm_pdu_free sanity check */
1939 		result->isp_flags |= IDM_PDU_ALLOC;
1940 		/* pointer arithmetic */
1941 		result->isp_hdr = (iscsi_hdr_t *)(result + 1);
1942 		result->isp_hdrlen = hdrlen;
1943 		result->isp_hdrbuflen = hdrlen;
1944 		result->isp_transport_hdrlen = 0;
1945 		if (datalen != 0)
1946 			result->isp_data = (uint8_t *)result->isp_hdr + hdrlen;
1947 		result->isp_datalen = datalen;
1948 		result->isp_databuflen = datalen;
1949 		result->isp_magic = IDM_PDU_MAGIC;
1950 	}
1951 
1952 	return (result);
1953 }
1954 
1955 /*
1956  * Typical idm_pdu_alloc invocation, will block for resources.
1957  */
1958 idm_pdu_t *
1959 idm_pdu_alloc(uint_t hdrlen, uint_t datalen)
1960 {
1961 	return (idm_pdu_alloc_common(hdrlen, datalen, KM_SLEEP));
1962 }
1963 
1964 /*
1965  * Non-blocking idm_pdu_alloc implementation, returns NULL if resources
1966  * are not available.  Needed for transport-layer allocations which may
1967  * be invoking in interrupt context.
1968  */
1969 idm_pdu_t *
1970 idm_pdu_alloc_nosleep(uint_t hdrlen, uint_t datalen)
1971 {
1972 	return (idm_pdu_alloc_common(hdrlen, datalen, KM_NOSLEEP));
1973 }
1974 
1975 /*
1976  * Free a PDU previously allocated with idm_pdu_alloc() including any
1977  * header and data space allocated as part of the original request.
1978  * Additional memory regions referenced by subsequent modification of
1979  * the isp_hdr and/or isp_data fields will not be freed.
1980  */
1981 void
1982 idm_pdu_free(idm_pdu_t *pdu)
1983 {
1984 	/* Make sure the structure was allocated using idm_pdu_alloc() */
1985 	ASSERT(pdu->isp_flags & IDM_PDU_ALLOC);
1986 	kmem_free(pdu,
1987 	    sizeof (idm_pdu_t) + pdu->isp_hdrbuflen + pdu->isp_databuflen);
1988 }
1989 
1990 /*
1991  * Initialize the connection, private and callback fields in a PDU.
1992  */
1993 void
1994 idm_pdu_init(idm_pdu_t *pdu, idm_conn_t *ic, void *private, idm_pdu_cb_t *cb)
1995 {
1996 	/*
1997 	 * idm_pdu_complete() will call idm_pdu_free if the callback is
1998 	 * NULL.  This will only work if the PDU was originally allocated
1999 	 * with idm_pdu_alloc().
2000 	 */
2001 	ASSERT((pdu->isp_flags & IDM_PDU_ALLOC) ||
2002 	    (cb != NULL));
2003 	pdu->isp_magic = IDM_PDU_MAGIC;
2004 	pdu->isp_ic = ic;
2005 	pdu->isp_private = private;
2006 	pdu->isp_callback = cb;
2007 }
2008 
2009 /*
2010  * Initialize the header and header length field.  This function should
2011  * not be used to adjust the header length in a buffer allocated via
2012  * pdu_pdu_alloc since it overwrites the existing header pointer.
2013  */
2014 void
2015 idm_pdu_init_hdr(idm_pdu_t *pdu, uint8_t *hdr, uint_t hdrlen)
2016 {
2017 	pdu->isp_hdr = (iscsi_hdr_t *)((void *)hdr);
2018 	pdu->isp_hdrlen = hdrlen;
2019 }
2020 
2021 /*
2022  * Initialize the data and data length fields.  This function should
2023  * not be used to adjust the data length of a buffer allocated via
2024  * idm_pdu_alloc since it overwrites the existing data pointer.
2025  */
2026 void
2027 idm_pdu_init_data(idm_pdu_t *pdu, uint8_t *data, uint_t datalen)
2028 {
2029 	pdu->isp_data = data;
2030 	pdu->isp_datalen = datalen;
2031 }
2032 
2033 void
2034 idm_pdu_complete(idm_pdu_t *pdu, idm_status_t status)
2035 {
2036 	if (pdu->isp_callback) {
2037 		pdu->isp_status = status;
2038 		(*pdu->isp_callback)(pdu, status);
2039 	} else {
2040 		idm_pdu_free(pdu);
2041 	}
2042 }
2043 
2044 /*
2045  * State machine auditing
2046  */
2047 
2048 void
2049 idm_sm_audit_init(sm_audit_buf_t *audit_buf)
2050 {
2051 	bzero(audit_buf, sizeof (sm_audit_buf_t));
2052 	audit_buf->sab_max_index = SM_AUDIT_BUF_MAX_REC - 1;
2053 }
2054 
2055 static
2056 sm_audit_record_t *
2057 idm_sm_audit_common(sm_audit_buf_t *audit_buf, sm_audit_record_type_t r_type,
2058     sm_audit_sm_type_t sm_type,
2059     int current_state)
2060 {
2061 	sm_audit_record_t *sar;
2062 
2063 	sar = audit_buf->sab_records;
2064 	sar += audit_buf->sab_index;
2065 	audit_buf->sab_index++;
2066 	audit_buf->sab_index &= audit_buf->sab_max_index;
2067 
2068 	sar->sar_type = r_type;
2069 	gethrestime(&sar->sar_timestamp);
2070 	sar->sar_sm_type = sm_type;
2071 	sar->sar_state = current_state;
2072 
2073 	return (sar);
2074 }
2075 
2076 void
2077 idm_sm_audit_event(sm_audit_buf_t *audit_buf,
2078     sm_audit_sm_type_t sm_type, int current_state,
2079     int event, uintptr_t event_info)
2080 {
2081 	sm_audit_record_t *sar;
2082 
2083 	sar = idm_sm_audit_common(audit_buf, SAR_STATE_EVENT,
2084 	    sm_type, current_state);
2085 	sar->sar_event = event;
2086 	sar->sar_event_info = event_info;
2087 }
2088 
2089 void
2090 idm_sm_audit_state_change(sm_audit_buf_t *audit_buf,
2091     sm_audit_sm_type_t sm_type, int current_state, int new_state)
2092 {
2093 	sm_audit_record_t *sar;
2094 
2095 	sar = idm_sm_audit_common(audit_buf, SAR_STATE_CHANGE,
2096 	    sm_type, current_state);
2097 	sar->sar_new_state = new_state;
2098 }
2099 
2100 
2101 /*
2102  * Object reference tracking
2103  */
2104 
2105 void
2106 idm_refcnt_init(idm_refcnt_t *refcnt, void *referenced_obj)
2107 {
2108 	bzero(refcnt, sizeof (*refcnt));
2109 	idm_refcnt_reset(refcnt);
2110 	refcnt->ir_referenced_obj = referenced_obj;
2111 	bzero(&refcnt->ir_audit_buf, sizeof (refcnt_audit_buf_t));
2112 	refcnt->ir_audit_buf.anb_max_index = REFCNT_AUDIT_BUF_MAX_REC - 1;
2113 	mutex_init(&refcnt->ir_mutex, NULL, MUTEX_DEFAULT, NULL);
2114 	cv_init(&refcnt->ir_cv, NULL, CV_DEFAULT, NULL);
2115 }
2116 
2117 void
2118 idm_refcnt_destroy(idm_refcnt_t *refcnt)
2119 {
2120 	/*
2121 	 * Grab the mutex to there are no other lingering threads holding
2122 	 * the mutex before we destroy it (e.g. idm_refcnt_rele just after
2123 	 * the refcnt goes to zero if ir_waiting == REF_WAIT_ASYNC)
2124 	 */
2125 	mutex_enter(&refcnt->ir_mutex);
2126 	ASSERT(refcnt->ir_refcnt == 0);
2127 	cv_destroy(&refcnt->ir_cv);
2128 	mutex_destroy(&refcnt->ir_mutex);
2129 }
2130 
2131 void
2132 idm_refcnt_reset(idm_refcnt_t *refcnt)
2133 {
2134 	refcnt->ir_waiting = REF_NOWAIT;
2135 	refcnt->ir_refcnt = 0;
2136 }
2137 
2138 void
2139 idm_refcnt_hold(idm_refcnt_t *refcnt)
2140 {
2141 	/*
2142 	 * Nothing should take a hold on an object after a call to
2143 	 * idm_refcnt_wait_ref or idm_refcnd_async_wait_ref
2144 	 */
2145 	ASSERT(refcnt->ir_waiting == REF_NOWAIT);
2146 
2147 	mutex_enter(&refcnt->ir_mutex);
2148 	refcnt->ir_refcnt++;
2149 	REFCNT_AUDIT(refcnt);
2150 	mutex_exit(&refcnt->ir_mutex);
2151 }
2152 
2153 static void
2154 idm_refcnt_unref_task(void *refcnt_void)
2155 {
2156 	idm_refcnt_t *refcnt = refcnt_void;
2157 
2158 	REFCNT_AUDIT(refcnt);
2159 	(*refcnt->ir_cb)(refcnt->ir_referenced_obj);
2160 }
2161 
2162 void
2163 idm_refcnt_rele(idm_refcnt_t *refcnt)
2164 {
2165 	mutex_enter(&refcnt->ir_mutex);
2166 	ASSERT(refcnt->ir_refcnt > 0);
2167 	refcnt->ir_refcnt--;
2168 	REFCNT_AUDIT(refcnt);
2169 	if (refcnt->ir_waiting == REF_NOWAIT) {
2170 		/* No one is waiting on this object */
2171 		mutex_exit(&refcnt->ir_mutex);
2172 		return;
2173 	}
2174 
2175 	/*
2176 	 * Someone is waiting for this object to go idle so check if
2177 	 * refcnt is 0.  Waiting on an object then later grabbing another
2178 	 * reference is not allowed so we don't need to handle that case.
2179 	 */
2180 	if (refcnt->ir_refcnt == 0) {
2181 		if (refcnt->ir_waiting == REF_WAIT_ASYNC) {
2182 			if (taskq_dispatch(idm.idm_global_taskq,
2183 			    &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
2184 				cmn_err(CE_WARN,
2185 				    "idm_refcnt_rele: Couldn't dispatch task");
2186 			}
2187 		} else if (refcnt->ir_waiting == REF_WAIT_SYNC) {
2188 			cv_signal(&refcnt->ir_cv);
2189 		}
2190 	}
2191 	mutex_exit(&refcnt->ir_mutex);
2192 }
2193 
2194 void
2195 idm_refcnt_rele_and_destroy(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func)
2196 {
2197 	mutex_enter(&refcnt->ir_mutex);
2198 	ASSERT(refcnt->ir_refcnt > 0);
2199 	refcnt->ir_refcnt--;
2200 	REFCNT_AUDIT(refcnt);
2201 
2202 	/*
2203 	 * Someone is waiting for this object to go idle so check if
2204 	 * refcnt is 0.  Waiting on an object then later grabbing another
2205 	 * reference is not allowed so we don't need to handle that case.
2206 	 */
2207 	if (refcnt->ir_refcnt == 0) {
2208 		refcnt->ir_cb = cb_func;
2209 		refcnt->ir_waiting = REF_WAIT_ASYNC;
2210 		if (taskq_dispatch(idm.idm_global_taskq,
2211 		    &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
2212 			cmn_err(CE_WARN,
2213 			    "idm_refcnt_rele: Couldn't dispatch task");
2214 		}
2215 	}
2216 	mutex_exit(&refcnt->ir_mutex);
2217 }
2218 
2219 void
2220 idm_refcnt_wait_ref(idm_refcnt_t *refcnt)
2221 {
2222 	mutex_enter(&refcnt->ir_mutex);
2223 	refcnt->ir_waiting = REF_WAIT_SYNC;
2224 	REFCNT_AUDIT(refcnt);
2225 	while (refcnt->ir_refcnt != 0)
2226 		cv_wait(&refcnt->ir_cv, &refcnt->ir_mutex);
2227 	mutex_exit(&refcnt->ir_mutex);
2228 }
2229 
2230 void
2231 idm_refcnt_async_wait_ref(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func)
2232 {
2233 	mutex_enter(&refcnt->ir_mutex);
2234 	refcnt->ir_waiting = REF_WAIT_ASYNC;
2235 	refcnt->ir_cb = cb_func;
2236 	REFCNT_AUDIT(refcnt);
2237 	/*
2238 	 * It's possible we don't have any references.  To make things easier
2239 	 * on the caller use a taskq to call the callback instead of
2240 	 * calling it synchronously
2241 	 */
2242 	if (refcnt->ir_refcnt == 0) {
2243 		if (taskq_dispatch(idm.idm_global_taskq,
2244 		    &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
2245 			cmn_err(CE_WARN,
2246 			    "idm_refcnt_async_wait_ref: "
2247 			    "Couldn't dispatch task");
2248 		}
2249 	}
2250 	mutex_exit(&refcnt->ir_mutex);
2251 }
2252 
2253 void
2254 idm_refcnt_destroy_unref_obj(idm_refcnt_t *refcnt,
2255     idm_refcnt_cb_t *cb_func)
2256 {
2257 	mutex_enter(&refcnt->ir_mutex);
2258 	if (refcnt->ir_refcnt == 0) {
2259 		mutex_exit(&refcnt->ir_mutex);
2260 		(*cb_func)(refcnt->ir_referenced_obj);
2261 		return;
2262 	}
2263 	mutex_exit(&refcnt->ir_mutex);
2264 }
2265 
2266 void
2267 idm_conn_hold(idm_conn_t *ic)
2268 {
2269 	idm_refcnt_hold(&ic->ic_refcnt);
2270 }
2271 
2272 void
2273 idm_conn_rele(idm_conn_t *ic)
2274 {
2275 	idm_refcnt_rele(&ic->ic_refcnt);
2276 }
2277 
2278 void
2279 idm_conn_set_target_name(idm_conn_t *ic, char *target_name)
2280 {
2281 	(void) strlcpy(ic->ic_target_name, target_name, ISCSI_MAX_NAME_LEN + 1);
2282 }
2283 
2284 void
2285 idm_conn_set_initiator_name(idm_conn_t *ic, char *initiator_name)
2286 {
2287 	(void) strlcpy(ic->ic_initiator_name, initiator_name,
2288 	    ISCSI_MAX_NAME_LEN + 1);
2289 }
2290 
2291 void
2292 idm_conn_set_isid(idm_conn_t *ic, uint8_t isid[ISCSI_ISID_LEN])
2293 {
2294 	(void) snprintf(ic->ic_isid, ISCSI_MAX_ISID_LEN + 1,
2295 	    "%02x%02x%02x%02x%02x%02x",
2296 	    isid[0], isid[1], isid[2], isid[3], isid[4], isid[5]);
2297 }
2298 
2299 static int
2300 _idm_init(void)
2301 {
2302 	/* Initialize the rwlock for the taskid table */
2303 	rw_init(&idm.idm_taskid_table_lock, NULL, RW_DRIVER, NULL);
2304 
2305 	/* Initialize the global mutex and taskq */
2306 	mutex_init(&idm.idm_global_mutex, NULL, MUTEX_DEFAULT, NULL);
2307 
2308 	cv_init(&idm.idm_tgt_svc_cv, NULL, CV_DEFAULT, NULL);
2309 	cv_init(&idm.idm_wd_cv, NULL, CV_DEFAULT, NULL);
2310 
2311 	/*
2312 	 * The maximum allocation needs to be high here since there can be
2313 	 * many concurrent tasks using the global taskq.
2314 	 */
2315 	idm.idm_global_taskq = taskq_create("idm_global_taskq", 1, minclsyspri,
2316 	    128, 16384, TASKQ_PREPOPULATE);
2317 	if (idm.idm_global_taskq == NULL) {
2318 		cv_destroy(&idm.idm_wd_cv);
2319 		cv_destroy(&idm.idm_tgt_svc_cv);
2320 		mutex_destroy(&idm.idm_global_mutex);
2321 		rw_destroy(&idm.idm_taskid_table_lock);
2322 		return (ENOMEM);
2323 	}
2324 
2325 	/* Start watchdog thread */
2326 	idm.idm_wd_thread = thread_create(NULL, 0,
2327 	    idm_wd_thread, NULL, 0, &p0, TS_RUN, minclsyspri);
2328 	if (idm.idm_wd_thread == NULL) {
2329 		/* Couldn't create the watchdog thread */
2330 		taskq_destroy(idm.idm_global_taskq);
2331 		cv_destroy(&idm.idm_wd_cv);
2332 		cv_destroy(&idm.idm_tgt_svc_cv);
2333 		mutex_destroy(&idm.idm_global_mutex);
2334 		rw_destroy(&idm.idm_taskid_table_lock);
2335 		return (ENOMEM);
2336 	}
2337 
2338 	/* Pause until the watchdog thread is running */
2339 	mutex_enter(&idm.idm_global_mutex);
2340 	while (!idm.idm_wd_thread_running)
2341 		cv_wait(&idm.idm_wd_cv, &idm.idm_global_mutex);
2342 	mutex_exit(&idm.idm_global_mutex);
2343 
2344 	/*
2345 	 * Allocate the task ID table and set "next" to 0.
2346 	 */
2347 
2348 	idm.idm_taskid_max = idm_max_taskids;
2349 	idm.idm_taskid_table = (idm_task_t **)
2350 	    kmem_zalloc(idm.idm_taskid_max * sizeof (idm_task_t *), KM_SLEEP);
2351 	idm.idm_taskid_next = 0;
2352 
2353 	/* Create the global buffer and task kmem caches */
2354 	idm.idm_buf_cache = kmem_cache_create("idm_buf_cache",
2355 	    sizeof (idm_buf_t), 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
2356 
2357 	/*
2358 	 * Note, we're explicitly allocating an additional iSER header-
2359 	 * sized chunk for each of these elements. See idm_task_constructor().
2360 	 */
2361 	idm.idm_task_cache = kmem_cache_create("idm_task_cache",
2362 	    sizeof (idm_task_t) + IDM_TRANSPORT_HEADER_LENGTH, 8,
2363 	    &idm_task_constructor, &idm_task_destructor,
2364 	    NULL, NULL, NULL, KM_SLEEP);
2365 
2366 	/* Create the service and connection context lists */
2367 	list_create(&idm.idm_tgt_svc_list, sizeof (idm_svc_t),
2368 	    offsetof(idm_svc_t, is_list_node));
2369 	list_create(&idm.idm_tgt_conn_list, sizeof (idm_conn_t),
2370 	    offsetof(idm_conn_t, ic_list_node));
2371 	list_create(&idm.idm_ini_conn_list, sizeof (idm_conn_t),
2372 	    offsetof(idm_conn_t, ic_list_node));
2373 
2374 	/* Initialize the native sockets transport */
2375 	idm_so_init(&idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS]);
2376 
2377 	/* Create connection ID pool */
2378 	(void) idm_idpool_create(&idm.idm_conn_id_pool);
2379 
2380 	return (DDI_SUCCESS);
2381 }
2382 
2383 static int
2384 _idm_fini(void)
2385 {
2386 	if (!list_is_empty(&idm.idm_ini_conn_list) ||
2387 	    !list_is_empty(&idm.idm_tgt_conn_list) ||
2388 	    !list_is_empty(&idm.idm_tgt_svc_list)) {
2389 		return (EBUSY);
2390 	}
2391 
2392 	mutex_enter(&idm.idm_global_mutex);
2393 	idm.idm_wd_thread_running = B_FALSE;
2394 	cv_signal(&idm.idm_wd_cv);
2395 	mutex_exit(&idm.idm_global_mutex);
2396 
2397 	thread_join(idm.idm_wd_thread_did);
2398 
2399 	idm_idpool_destroy(&idm.idm_conn_id_pool);
2400 
2401 	/* Close any LDI handles we have open on transport drivers */
2402 	mutex_enter(&idm.idm_global_mutex);
2403 	idm_transport_teardown();
2404 	mutex_exit(&idm.idm_global_mutex);
2405 
2406 	/* Teardown the native sockets transport */
2407 	idm_so_fini();
2408 
2409 	list_destroy(&idm.idm_ini_conn_list);
2410 	list_destroy(&idm.idm_tgt_conn_list);
2411 	list_destroy(&idm.idm_tgt_svc_list);
2412 	kmem_cache_destroy(idm.idm_task_cache);
2413 	kmem_cache_destroy(idm.idm_buf_cache);
2414 	kmem_free(idm.idm_taskid_table,
2415 	    idm.idm_taskid_max * sizeof (idm_task_t *));
2416 	mutex_destroy(&idm.idm_global_mutex);
2417 	cv_destroy(&idm.idm_wd_cv);
2418 	cv_destroy(&idm.idm_tgt_svc_cv);
2419 	rw_destroy(&idm.idm_taskid_table_lock);
2420 
2421 	return (0);
2422 }
2423