xref: /illumos-gate/usr/src/uts/common/io/idm/idm.c (revision c9eab9d4e096bb9b983e9b007577edfa73c32eff)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/cpuvar.h>
27 #include <sys/conf.h>
28 #include <sys/file.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/modctl.h>
32 
33 #include <sys/socket.h>
34 #include <sys/strsubr.h>
35 #include <sys/sysmacros.h>
36 
37 #include <sys/socketvar.h>
38 #include <netinet/in.h>
39 
40 #include <sys/idm/idm.h>
41 #include <sys/idm/idm_so.h>
42 
43 #define	IDM_NAME_VERSION	"iSCSI Data Mover"
44 
45 extern struct mod_ops mod_miscops;
46 extern struct mod_ops mod_miscops;
47 
48 static struct modlmisc modlmisc = {
49 	&mod_miscops,	/* Type of module */
50 	IDM_NAME_VERSION
51 };
52 
53 static struct modlinkage modlinkage = {
54 	MODREV_1, (void *)&modlmisc, NULL
55 };
56 
57 extern int idm_task_compare(const void *t1, const void *t2);
58 extern void idm_wd_thread(void *arg);
59 
60 static int _idm_init(void);
61 static int _idm_fini(void);
62 static void idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf);
63 static void idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf);
64 static void idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf);
65 static void idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf);
66 static void idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt,
67     idm_abort_type_t abort_type);
68 static void idm_task_aborted(idm_task_t *idt, idm_status_t status);
69 
70 boolean_t idm_conn_logging = 0;
71 boolean_t idm_svc_logging = 0;
72 
73 /*
74  * Potential tuneable for the maximum number of tasks.  Default to
75  * IDM_TASKIDS_MAX
76  */
77 
78 uint32_t	idm_max_taskids = IDM_TASKIDS_MAX;
79 
80 /*
81  * Global list of transport handles
82  *   These are listed in preferential order, so we can simply take the
83  *   first "it_conn_is_capable" hit. Note also that the order maps to
84  *   the order of the idm_transport_type_t list.
85  */
86 idm_transport_t idm_transport_list[] = {
87 
88 	/* iSER on InfiniBand transport handle */
89 	{IDM_TRANSPORT_TYPE_ISER,	/* type */
90 	"/devices/ib/iser@0:iser",	/* device path */
91 	NULL,				/* LDI handle */
92 	NULL,				/* transport ops */
93 	NULL},				/* transport caps */
94 
95 	/* IDM native sockets transport handle */
96 	{IDM_TRANSPORT_TYPE_SOCKETS,	/* type */
97 	NULL,				/* device path */
98 	NULL,				/* LDI handle */
99 	NULL,				/* transport ops */
100 	NULL}				/* transport caps */
101 
102 };
103 
104 int
105 _init(void)
106 {
107 	int rc;
108 
109 	if ((rc = _idm_init()) != 0) {
110 		return (rc);
111 	}
112 
113 	return (mod_install(&modlinkage));
114 }
115 
116 int
117 _fini(void)
118 {
119 	int rc;
120 
121 	if ((rc = _idm_fini()) != 0) {
122 		return (rc);
123 	}
124 
125 	if ((rc = mod_remove(&modlinkage)) != 0) {
126 		return (rc);
127 	}
128 
129 	return (rc);
130 }
131 
132 int
133 _info(struct modinfo *modinfop)
134 {
135 	return (mod_info(&modlinkage, modinfop));
136 }
137 
138 /*
139  * idm_transport_register()
140  *
141  * Provides a mechanism for an IDM transport driver to register its
142  * transport ops and caps with the IDM kernel module. Invoked during
143  * a transport driver's attach routine.
144  */
145 idm_status_t
146 idm_transport_register(idm_transport_attr_t *attr)
147 {
148 	ASSERT(attr->it_ops != NULL);
149 	ASSERT(attr->it_caps != NULL);
150 
151 	switch (attr->type) {
152 	/* All known non-native transports here; for now, iSER */
153 	case IDM_TRANSPORT_TYPE_ISER:
154 		idm_transport_list[attr->type].it_ops	= attr->it_ops;
155 		idm_transport_list[attr->type].it_caps	= attr->it_caps;
156 		return (IDM_STATUS_SUCCESS);
157 
158 	default:
159 		cmn_err(CE_NOTE, "idm: unknown transport type (0x%x) in "
160 		    "idm_transport_register", attr->type);
161 		return (IDM_STATUS_SUCCESS);
162 	}
163 }
164 
165 /*
166  * idm_ini_conn_create
167  *
168  * This function is invoked by the iSCSI layer to create a connection context.
169  * This does not actually establish the socket connection.
170  *
171  * cr - Connection request parameters
172  * new_con - Output parameter that contains the new request if successful
173  *
174  */
175 idm_status_t
176 idm_ini_conn_create(idm_conn_req_t *cr, idm_conn_t **new_con)
177 {
178 	idm_transport_t		*it;
179 	idm_conn_t		*ic;
180 	int			rc;
181 
182 	it = idm_transport_lookup(cr);
183 
184 retry:
185 	ic = idm_conn_create_common(CONN_TYPE_INI, it->it_type,
186 	    &cr->icr_conn_ops);
187 
188 	bcopy(&cr->cr_ini_dst_addr, &ic->ic_ini_dst_addr,
189 	    sizeof (cr->cr_ini_dst_addr));
190 
191 	/* create the transport-specific connection components */
192 	rc = it->it_ops->it_ini_conn_create(cr, ic);
193 	if (rc != IDM_STATUS_SUCCESS) {
194 		/* cleanup the failed connection */
195 		idm_conn_destroy_common(ic);
196 		kmem_free(ic, sizeof (idm_conn_t));
197 
198 		/*
199 		 * It is possible for an IB client to connect to
200 		 * an ethernet-only client via an IB-eth gateway.
201 		 * Therefore, if we are attempting to use iSER and
202 		 * fail, retry with sockets before ultimately
203 		 * failing the connection.
204 		 */
205 		if (it->it_type == IDM_TRANSPORT_TYPE_ISER) {
206 			it = &idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS];
207 			goto retry;
208 		}
209 
210 		return (IDM_STATUS_FAIL);
211 	}
212 
213 	*new_con = ic;
214 
215 	mutex_enter(&idm.idm_global_mutex);
216 	list_insert_tail(&idm.idm_ini_conn_list, ic);
217 	mutex_exit(&idm.idm_global_mutex);
218 
219 	return (IDM_STATUS_SUCCESS);
220 }
221 
222 /*
223  * idm_ini_conn_destroy
224  *
225  * Releases any resources associated with the connection.  This is the
226  * complement to idm_ini_conn_create.
227  * ic - idm_conn_t structure representing the relevant connection
228  *
229  */
230 void
231 idm_ini_conn_destroy(idm_conn_t *ic)
232 {
233 	mutex_enter(&idm.idm_global_mutex);
234 	list_remove(&idm.idm_ini_conn_list, ic);
235 	mutex_exit(&idm.idm_global_mutex);
236 
237 	ic->ic_transport_ops->it_ini_conn_destroy(ic);
238 	idm_conn_destroy_common(ic);
239 }
240 
241 /*
242  * idm_ini_conn_connect
243  *
244  * Establish connection to the remote system identified in idm_conn_t.
245  * The connection parameters including the remote IP address were established
246  * in the call to idm_ini_conn_create.
247  *
248  * ic - idm_conn_t structure representing the relevant connection
249  *
250  * Returns success if the connection was established, otherwise some kind
251  * of meaningful error code.
252  *
253  * Upon return the initiator can send a "login" request when it is ready.
254  */
255 idm_status_t
256 idm_ini_conn_connect(idm_conn_t *ic)
257 {
258 	idm_status_t	rc;
259 
260 	rc = idm_conn_sm_init(ic);
261 	if (rc != IDM_STATUS_SUCCESS) {
262 		return (ic->ic_conn_sm_status);
263 	}
264 	/* Kick state machine */
265 	idm_conn_event(ic, CE_CONNECT_REQ, NULL);
266 
267 	/* Wait for login flag */
268 	mutex_enter(&ic->ic_state_mutex);
269 	while (!(ic->ic_state_flags & CF_LOGIN_READY) &&
270 	    !(ic->ic_state_flags & CF_ERROR)) {
271 		cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex);
272 	}
273 	mutex_exit(&ic->ic_state_mutex);
274 
275 	if (ic->ic_state_flags & CF_ERROR) {
276 		/* ic->ic_conn_sm_status will contains failure status */
277 		return (ic->ic_conn_sm_status);
278 	}
279 
280 	/* Ready to login */
281 	ASSERT(ic->ic_state_flags & CF_LOGIN_READY);
282 	(void) idm_notify_client(ic, CN_READY_FOR_LOGIN, NULL);
283 
284 	return (IDM_STATUS_SUCCESS);
285 }
286 
287 /*
288  * idm_ini_conn_sm_fini_task()
289  *
290  * Dispatch a thread on the global taskq to tear down an initiator connection's
291  * state machine. Note: We cannot do this from the disconnect thread as we will
292  * end up in a situation wherein the thread is running on a taskq that it then
293  * attempts to destroy.
294  */
295 static void
296 idm_ini_conn_sm_fini_task(void *ic_void)
297 {
298 	idm_conn_sm_fini((idm_conn_t *)ic_void);
299 }
300 
301 /*
302  * idm_ini_conn_disconnect
303  *
304  * Forces a connection (previously established using idm_ini_conn_connect)
305  * to perform a controlled shutdown, cleaning up any outstanding requests.
306  *
307  * ic - idm_conn_t structure representing the relevant connection
308  *
309  * This is synchronous and it will return when the connection has been
310  * properly shutdown.
311  */
312 /* ARGSUSED */
313 void
314 idm_ini_conn_disconnect(idm_conn_t *ic)
315 {
316 	mutex_enter(&ic->ic_state_mutex);
317 
318 	if (ic->ic_state_flags == 0) {
319 		/* already disconnected */
320 		mutex_exit(&ic->ic_state_mutex);
321 		return;
322 	}
323 	ic->ic_state_flags = 0;
324 	ic->ic_conn_sm_status = 0;
325 	mutex_exit(&ic->ic_state_mutex);
326 
327 	/* invoke the transport-specific conn_destroy */
328 	(void) ic->ic_transport_ops->it_ini_conn_disconnect(ic);
329 
330 	/* teardown the connection sm */
331 	(void) taskq_dispatch(idm.idm_global_taskq, &idm_ini_conn_sm_fini_task,
332 	    (void *)ic, TQ_SLEEP);
333 }
334 
335 /*
336  * idm_tgt_svc_create
337  *
338  * The target calls this service to obtain a service context for each available
339  * transport, starting a service of each type related to the IP address and port
340  * passed. The idm_svc_req_t contains the service parameters.
341  */
342 idm_status_t
343 idm_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t **new_svc)
344 {
345 	idm_transport_type_t	type;
346 	idm_transport_t		*it;
347 	idm_svc_t		*is;
348 	int			rc;
349 
350 	*new_svc = NULL;
351 	is = kmem_zalloc(sizeof (idm_svc_t), KM_SLEEP);
352 
353 	/* Initialize transport-agnostic components of the service handle */
354 	is->is_svc_req = *sr;
355 	mutex_init(&is->is_mutex, NULL, MUTEX_DEFAULT, NULL);
356 	cv_init(&is->is_cv, NULL, CV_DEFAULT, NULL);
357 	mutex_init(&is->is_count_mutex, NULL, MUTEX_DEFAULT, NULL);
358 	cv_init(&is->is_count_cv, NULL, CV_DEFAULT, NULL);
359 	idm_refcnt_init(&is->is_refcnt, is);
360 
361 	/*
362 	 * Make sure all available transports are setup.  We call this now
363 	 * instead of at initialization time in case IB has become available
364 	 * since we started (hotplug, etc).
365 	 */
366 	idm_transport_setup(sr->sr_li);
367 
368 	/*
369 	 * Loop through the transports, configuring the transport-specific
370 	 * components of each one.
371 	 */
372 	for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
373 
374 		it = &idm_transport_list[type];
375 		/*
376 		 * If it_ops is NULL then the transport is unconfigured
377 		 * and we shouldn't try to start the service.
378 		 */
379 		if (it->it_ops == NULL) {
380 			continue;
381 		}
382 
383 		rc = it->it_ops->it_tgt_svc_create(sr, is);
384 		if (rc != IDM_STATUS_SUCCESS) {
385 			/* Teardown any configured services */
386 			while (type--) {
387 				it = &idm_transport_list[type];
388 				if (it->it_ops == NULL) {
389 					continue;
390 				}
391 				it->it_ops->it_tgt_svc_destroy(is);
392 			}
393 			/* Free the svc context and return */
394 			kmem_free(is, sizeof (idm_svc_t));
395 			return (rc);
396 		}
397 	}
398 
399 	*new_svc = is;
400 
401 	mutex_enter(&idm.idm_global_mutex);
402 	list_insert_tail(&idm.idm_tgt_svc_list, is);
403 	mutex_exit(&idm.idm_global_mutex);
404 
405 	return (IDM_STATUS_SUCCESS);
406 }
407 
408 /*
409  * idm_tgt_svc_destroy
410  *
411  * is - idm_svc_t returned by the call to idm_tgt_svc_create
412  *
413  * Cleanup any resources associated with the idm_svc_t.
414  */
415 void
416 idm_tgt_svc_destroy(idm_svc_t *is)
417 {
418 	idm_transport_type_t	type;
419 	idm_transport_t		*it;
420 
421 	mutex_enter(&idm.idm_global_mutex);
422 	/* remove this service from the global list */
423 	list_remove(&idm.idm_tgt_svc_list, is);
424 	/* wakeup any waiters for service change */
425 	cv_broadcast(&idm.idm_tgt_svc_cv);
426 	mutex_exit(&idm.idm_global_mutex);
427 
428 	/* tear down the svc resources */
429 	idm_refcnt_destroy(&is->is_refcnt);
430 	cv_destroy(&is->is_count_cv);
431 	mutex_destroy(&is->is_count_mutex);
432 	cv_destroy(&is->is_cv);
433 	mutex_destroy(&is->is_mutex);
434 
435 	/* teardown each transport-specific service */
436 	for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
437 		it = &idm_transport_list[type];
438 		if (it->it_ops == NULL) {
439 			continue;
440 		}
441 
442 		it->it_ops->it_tgt_svc_destroy(is);
443 	}
444 
445 	/* free the svc handle */
446 	kmem_free(is, sizeof (idm_svc_t));
447 }
448 
449 void
450 idm_tgt_svc_hold(idm_svc_t *is)
451 {
452 	idm_refcnt_hold(&is->is_refcnt);
453 }
454 
455 void
456 idm_tgt_svc_rele_and_destroy(idm_svc_t *is)
457 {
458 	idm_refcnt_rele_and_destroy(&is->is_refcnt,
459 	    (idm_refcnt_cb_t *)&idm_tgt_svc_destroy);
460 }
461 
462 /*
463  * idm_tgt_svc_online
464  *
465  * is - idm_svc_t returned by the call to idm_tgt_svc_create
466  *
467  * Online each transport service, as we want this target to be accessible
468  * via any configured transport.
469  *
470  * When the initiator establishes a new connection to the target, IDM will
471  * call the "new connect" callback defined in the idm_svc_req_t structure
472  * and it will pass an idm_conn_t structure representing that new connection.
473  */
474 idm_status_t
475 idm_tgt_svc_online(idm_svc_t *is)
476 {
477 
478 	idm_transport_type_t	type;
479 	idm_transport_t		*it;
480 	int			rc;
481 	int			svc_found;
482 
483 	mutex_enter(&is->is_mutex);
484 	/* Walk through each of the transports and online them */
485 	if (is->is_online == 0) {
486 		svc_found = 0;
487 		for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
488 			it = &idm_transport_list[type];
489 			if (it->it_ops == NULL) {
490 				/* transport is not registered */
491 				continue;
492 			}
493 
494 			mutex_exit(&is->is_mutex);
495 			rc = it->it_ops->it_tgt_svc_online(is);
496 			mutex_enter(&is->is_mutex);
497 			if (rc == IDM_STATUS_SUCCESS) {
498 				/* We have at least one service running. */
499 				svc_found = 1;
500 			}
501 		}
502 	} else {
503 		svc_found = 1;
504 	}
505 	if (svc_found)
506 		is->is_online++;
507 	mutex_exit(&is->is_mutex);
508 
509 	return (svc_found ? IDM_STATUS_SUCCESS : IDM_STATUS_FAIL);
510 }
511 
512 /*
513  * idm_tgt_svc_offline
514  *
515  * is - idm_svc_t returned by the call to idm_tgt_svc_create
516  *
517  * Shutdown any online target services.
518  */
519 void
520 idm_tgt_svc_offline(idm_svc_t *is)
521 {
522 	idm_transport_type_t	type;
523 	idm_transport_t		*it;
524 
525 	mutex_enter(&is->is_mutex);
526 	is->is_online--;
527 	if (is->is_online == 0) {
528 		/* Walk through each of the transports and offline them */
529 		for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
530 			it = &idm_transport_list[type];
531 			if (it->it_ops == NULL) {
532 				/* transport is not registered */
533 				continue;
534 			}
535 
536 			mutex_exit(&is->is_mutex);
537 			it->it_ops->it_tgt_svc_offline(is);
538 			mutex_enter(&is->is_mutex);
539 		}
540 	}
541 	mutex_exit(&is->is_mutex);
542 }
543 
544 /*
545  * idm_tgt_svc_lookup
546  *
547  * Lookup a service instance listening on the specified port
548  */
549 
550 idm_svc_t *
551 idm_tgt_svc_lookup(uint16_t port)
552 {
553 	idm_svc_t *result;
554 
555 retry:
556 	mutex_enter(&idm.idm_global_mutex);
557 	for (result = list_head(&idm.idm_tgt_svc_list);
558 	    result != NULL;
559 	    result = list_next(&idm.idm_tgt_svc_list, result)) {
560 		if (result->is_svc_req.sr_port == port) {
561 			if (result->is_online == 0) {
562 				/*
563 				 * A service exists on this port, but it
564 				 * is going away, wait for it to cleanup.
565 				 */
566 				cv_wait(&idm.idm_tgt_svc_cv,
567 				    &idm.idm_global_mutex);
568 				mutex_exit(&idm.idm_global_mutex);
569 				goto retry;
570 			}
571 			idm_tgt_svc_hold(result);
572 			mutex_exit(&idm.idm_global_mutex);
573 			return (result);
574 		}
575 	}
576 	mutex_exit(&idm.idm_global_mutex);
577 
578 	return (NULL);
579 }
580 
581 /*
582  * idm_negotiate_key_values()
583  * Give IDM level a chance to negotiate any login parameters it should own.
584  *  -- leave unhandled parameters alone on request_nvl
585  *  -- move all handled parameters to response_nvl with an appropriate response
586  *  -- also add an entry to negotiated_nvl for any accepted parameters
587  */
588 kv_status_t
589 idm_negotiate_key_values(idm_conn_t *ic, nvlist_t *request_nvl,
590     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
591 {
592 	ASSERT(ic->ic_transport_ops != NULL);
593 	return (ic->ic_transport_ops->it_negotiate_key_values(ic,
594 	    request_nvl, response_nvl, negotiated_nvl));
595 }
596 
597 /*
598  * idm_notice_key_values()
599  * Activate at the IDM level any parameters that have been negotiated.
600  * Passes the set of key value pairs to the transport for activation.
601  * This will be invoked as the connection is entering full-feature mode.
602  */
603 idm_status_t
604 idm_notice_key_values(idm_conn_t *ic, nvlist_t *negotiated_nvl)
605 {
606 	ASSERT(ic->ic_transport_ops != NULL);
607 	return (ic->ic_transport_ops->it_notice_key_values(ic,
608 	    negotiated_nvl));
609 }
610 
611 /*
612  * idm_buf_tx_to_ini
613  *
614  * This is IDM's implementation of the 'Put_Data' operational primitive.
615  *
616  * This function is invoked by a target iSCSI layer to request its local
617  * Datamover layer to transmit the Data-In PDU to the peer iSCSI layer
618  * on the remote iSCSI node. The I/O buffer represented by 'idb' is
619  * transferred to the initiator associated with task 'idt'. The connection
620  * info, contents of the Data-In PDU header, the DataDescriptorIn, BHS,
621  * and the callback (idb->idb_buf_cb) at transfer completion are
622  * provided as input.
623  *
624  * This data transfer takes place transparently to the remote iSCSI layer,
625  * i.e. without its participation.
626  *
627  * Using sockets, IDM implements the data transfer by segmenting the data
628  * buffer into appropriately sized iSCSI PDUs and transmitting them to the
629  * initiator. iSER performs the transfer using RDMA write.
630  *
631  */
632 idm_status_t
633 idm_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb,
634     uint32_t offset, uint32_t xfer_len,
635     idm_buf_cb_t idb_buf_cb, void *cb_arg)
636 {
637 	idm_status_t rc;
638 
639 	idb->idb_bufoffset = offset;
640 	idb->idb_xfer_len = xfer_len;
641 	idb->idb_buf_cb = idb_buf_cb;
642 	idb->idb_cb_arg = cb_arg;
643 
644 	mutex_enter(&idt->idt_mutex);
645 	switch (idt->idt_state) {
646 	case TASK_ACTIVE:
647 		idt->idt_tx_to_ini_start++;
648 		idm_task_hold(idt);
649 		idm_buf_bind_in_locked(idt, idb);
650 		idb->idb_in_transport = B_TRUE;
651 		rc = (*idt->idt_ic->ic_transport_ops->it_buf_tx_to_ini)
652 		    (idt, idb);
653 		return (rc);
654 
655 	case TASK_SUSPENDING:
656 	case TASK_SUSPENDED:
657 		/*
658 		 * Bind buffer but don't start a transfer since the task
659 		 * is suspended
660 		 */
661 		idm_buf_bind_in_locked(idt, idb);
662 		mutex_exit(&idt->idt_mutex);
663 		return (IDM_STATUS_SUCCESS);
664 
665 	case TASK_ABORTING:
666 	case TASK_ABORTED:
667 		/*
668 		 * Once the task is aborted, any buffers added to the
669 		 * idt_inbufv will never get cleaned up, so just return
670 		 * SUCCESS.  The buffer should get cleaned up by the
671 		 * client or framework once task_aborted has completed.
672 		 */
673 		mutex_exit(&idt->idt_mutex);
674 		return (IDM_STATUS_SUCCESS);
675 
676 	default:
677 		ASSERT(0);
678 		break;
679 	}
680 	mutex_exit(&idt->idt_mutex);
681 
682 	return (IDM_STATUS_FAIL);
683 }
684 
685 /*
686  * idm_buf_rx_from_ini
687  *
688  * This is IDM's implementation of the 'Get_Data' operational primitive.
689  *
690  * This function is invoked by a target iSCSI layer to request its local
691  * Datamover layer to retrieve certain data identified by the R2T PDU from the
692  * peer iSCSI layer on the remote node. The retrieved Data-Out PDU will be
693  * mapped to the respective buffer by the task tags (ITT & TTT).
694  * The connection information, contents of an R2T PDU, DataDescriptor, BHS, and
695  * the callback (idb->idb_buf_cb) notification for data transfer completion are
696  * are provided as input.
697  *
698  * When an iSCSI node sends an R2T PDU to its local Datamover layer, the local
699  * Datamover layer, the local and remote Datamover layers transparently bring
700  * about the data transfer requested by the R2T PDU, without the participation
701  * of the iSCSI layers.
702  *
703  * Using sockets, IDM transmits an R2T PDU for each buffer and the rx_data_out()
704  * assembles the Data-Out PDUs into the buffer. iSER uses RDMA read.
705  *
706  */
707 idm_status_t
708 idm_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb,
709     uint32_t offset, uint32_t xfer_len,
710     idm_buf_cb_t idb_buf_cb, void *cb_arg)
711 {
712 	idm_status_t rc;
713 
714 	idb->idb_bufoffset = offset;
715 	idb->idb_xfer_len = xfer_len;
716 	idb->idb_buf_cb = idb_buf_cb;
717 	idb->idb_cb_arg = cb_arg;
718 
719 	/*
720 	 * "In" buf list is for "Data In" PDU's, "Out" buf list is for
721 	 * "Data Out" PDU's
722 	 */
723 	mutex_enter(&idt->idt_mutex);
724 	switch (idt->idt_state) {
725 	case TASK_ACTIVE:
726 		idt->idt_rx_from_ini_start++;
727 		idm_task_hold(idt);
728 		idm_buf_bind_out_locked(idt, idb);
729 		idb->idb_in_transport = B_TRUE;
730 		rc = (*idt->idt_ic->ic_transport_ops->it_buf_rx_from_ini)
731 		    (idt, idb);
732 		return (rc);
733 	case TASK_SUSPENDING:
734 	case TASK_SUSPENDED:
735 	case TASK_ABORTING:
736 	case TASK_ABORTED:
737 		/*
738 		 * Bind buffer but don't start a transfer since the task
739 		 * is suspended
740 		 */
741 		idm_buf_bind_out_locked(idt, idb);
742 		mutex_exit(&idt->idt_mutex);
743 		return (IDM_STATUS_SUCCESS);
744 	default:
745 		ASSERT(0);
746 		break;
747 	}
748 	mutex_exit(&idt->idt_mutex);
749 
750 	return (IDM_STATUS_FAIL);
751 }
752 
753 /*
754  * idm_buf_tx_to_ini_done
755  *
756  * The transport calls this after it has completed a transfer requested by
757  * a call to transport_buf_tx_to_ini
758  *
759  * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning.
760  * idt may be freed after the call to idb->idb_buf_cb.
761  */
762 void
763 idm_buf_tx_to_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status)
764 {
765 	ASSERT(mutex_owned(&idt->idt_mutex));
766 	idb->idb_in_transport = B_FALSE;
767 	idb->idb_tx_thread = B_FALSE;
768 	idt->idt_tx_to_ini_done++;
769 
770 	/*
771 	 * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or
772 	 * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes
773 	 * to 0.
774 	 */
775 	idm_task_rele(idt);
776 	idb->idb_status = status;
777 
778 	switch (idt->idt_state) {
779 	case TASK_ACTIVE:
780 		idm_buf_unbind_in_locked(idt, idb);
781 		mutex_exit(&idt->idt_mutex);
782 		(*idb->idb_buf_cb)(idb, status);
783 		return;
784 	case TASK_SUSPENDING:
785 	case TASK_SUSPENDED:
786 	case TASK_ABORTING:
787 	case TASK_ABORTED:
788 		/*
789 		 * To keep things simple we will ignore the case where the
790 		 * transfer was successful and leave all buffers bound to the
791 		 * task.  This allows us to also ignore the case where we've
792 		 * been asked to abort a task but the last transfer of the
793 		 * task has completed.  IDM has no idea whether this was, in
794 		 * fact, the last transfer of the task so it would be difficult
795 		 * to handle this case.  Everything should get sorted out again
796 		 * after task reassignment is complete.
797 		 *
798 		 * In the case of TASK_ABORTING we could conceivably call the
799 		 * buffer callback here but the timing of when the client's
800 		 * client_task_aborted callback is invoked vs. when the client's
801 		 * buffer callback gets invoked gets sticky.  We don't want
802 		 * the client to here from us again after the call to
803 		 * client_task_aborted() but we don't want to give it a bunch
804 		 * of failed buffer transfers until we've called
805 		 * client_task_aborted().  Instead we'll just leave all the
806 		 * buffers bound and allow the client to cleanup.
807 		 */
808 		break;
809 	default:
810 		ASSERT(0);
811 	}
812 	mutex_exit(&idt->idt_mutex);
813 }
814 
815 /*
816  * idm_buf_rx_from_ini_done
817  *
818  * The transport calls this after it has completed a transfer requested by
819  * a call totransport_buf_tx_to_ini
820  *
821  * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning.
822  * idt may be freed after the call to idb->idb_buf_cb.
823  */
824 void
825 idm_buf_rx_from_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status)
826 {
827 	ASSERT(mutex_owned(&idt->idt_mutex));
828 	idb->idb_in_transport = B_FALSE;
829 	idt->idt_rx_from_ini_done++;
830 
831 	/*
832 	 * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or
833 	 * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes
834 	 * to 0.
835 	 */
836 	idm_task_rele(idt);
837 	idb->idb_status = status;
838 
839 	switch (idt->idt_state) {
840 	case TASK_ACTIVE:
841 		idm_buf_unbind_out_locked(idt, idb);
842 		mutex_exit(&idt->idt_mutex);
843 		(*idb->idb_buf_cb)(idb, status);
844 		return;
845 	case TASK_SUSPENDING:
846 	case TASK_SUSPENDED:
847 	case TASK_ABORTING:
848 	case TASK_ABORTED:
849 		/*
850 		 * To keep things simple we will ignore the case where the
851 		 * transfer was successful and leave all buffers bound to the
852 		 * task.  This allows us to also ignore the case where we've
853 		 * been asked to abort a task but the last transfer of the
854 		 * task has completed.  IDM has no idea whether this was, in
855 		 * fact, the last transfer of the task so it would be difficult
856 		 * to handle this case.  Everything should get sorted out again
857 		 * after task reassignment is complete.
858 		 *
859 		 * In the case of TASK_ABORTING we could conceivably call the
860 		 * buffer callback here but the timing of when the client's
861 		 * client_task_aborted callback is invoked vs. when the client's
862 		 * buffer callback gets invoked gets sticky.  We don't want
863 		 * the client to here from us again after the call to
864 		 * client_task_aborted() but we don't want to give it a bunch
865 		 * of failed buffer transfers until we've called
866 		 * client_task_aborted().  Instead we'll just leave all the
867 		 * buffers bound and allow the client to cleanup.
868 		 */
869 		break;
870 	default:
871 		ASSERT(0);
872 	}
873 	mutex_exit(&idt->idt_mutex);
874 }
875 
876 /*
877  * idm_buf_alloc
878  *
879  * Allocates a buffer handle and registers it for use with the transport
880  * layer. If a buffer is not passed on bufptr, the buffer will be allocated
881  * as well as the handle.
882  *
883  * ic		- connection on which the buffer will be transferred
884  * bufptr	- allocate memory for buffer if NULL, else assign to buffer
885  * buflen	- length of buffer
886  *
887  * Returns idm_buf_t handle if successful, otherwise NULL
888  */
889 idm_buf_t *
890 idm_buf_alloc(idm_conn_t *ic, void *bufptr, uint64_t buflen)
891 {
892 	idm_buf_t	*buf = NULL;
893 	int		rc;
894 
895 	ASSERT(ic != NULL);
896 	ASSERT(idm.idm_buf_cache != NULL);
897 	ASSERT(buflen > 0);
898 
899 	/* Don't allocate new buffers if we are not in FFP */
900 	mutex_enter(&ic->ic_state_mutex);
901 	if (!ic->ic_ffp) {
902 		mutex_exit(&ic->ic_state_mutex);
903 		return (NULL);
904 	}
905 
906 
907 	idm_conn_hold(ic);
908 	mutex_exit(&ic->ic_state_mutex);
909 
910 	buf = kmem_cache_alloc(idm.idm_buf_cache, KM_NOSLEEP);
911 	if (buf == NULL) {
912 		idm_conn_rele(ic);
913 		return (NULL);
914 	}
915 
916 	buf->idb_ic		= ic;
917 	buf->idb_buflen		= buflen;
918 	buf->idb_exp_offset	= 0;
919 	buf->idb_bufoffset	= 0;
920 	buf->idb_xfer_len 	= 0;
921 	buf->idb_magic		= IDM_BUF_MAGIC;
922 
923 	/*
924 	 * If bufptr is NULL, we have an implicit request to allocate
925 	 * memory for this IDM buffer handle and register it for use
926 	 * with the transport. To simplify this, and to give more freedom
927 	 * to the transport layer for it's own buffer management, both of
928 	 * these actions will take place in the transport layer.
929 	 * If bufptr is set, then the caller has allocated memory (or more
930 	 * likely it's been passed from an upper layer), and we need only
931 	 * register the buffer for use with the transport layer.
932 	 */
933 	if (bufptr == NULL) {
934 		/*
935 		 * Allocate a buffer from the transport layer (which
936 		 * will also register the buffer for use).
937 		 */
938 		rc = ic->ic_transport_ops->it_buf_alloc(buf, buflen);
939 		if (rc != 0) {
940 			idm_conn_rele(ic);
941 			kmem_cache_free(idm.idm_buf_cache, buf);
942 			return (NULL);
943 		}
944 		/* Set the bufalloc'd flag */
945 		buf->idb_bufalloc = B_TRUE;
946 	} else {
947 		/*
948 		 * Set the passed bufptr into the buf handle, and
949 		 * register the handle with the transport layer.
950 		 */
951 		buf->idb_buf = bufptr;
952 
953 		rc = ic->ic_transport_ops->it_buf_setup(buf);
954 		if (rc != 0) {
955 			idm_conn_rele(ic);
956 			kmem_cache_free(idm.idm_buf_cache, buf);
957 			return (NULL);
958 		}
959 		/* Ensure bufalloc'd flag is unset */
960 		buf->idb_bufalloc = B_FALSE;
961 	}
962 
963 	return (buf);
964 
965 }
966 
967 /*
968  * idm_buf_free
969  *
970  * Release a buffer handle along with the associated buffer that was allocated
971  * or assigned with idm_buf_alloc
972  */
973 void
974 idm_buf_free(idm_buf_t *buf)
975 {
976 	idm_conn_t *ic = buf->idb_ic;
977 
978 
979 	buf->idb_task_binding	= NULL;
980 
981 	if (buf->idb_bufalloc) {
982 		ic->ic_transport_ops->it_buf_free(buf);
983 	} else {
984 		ic->ic_transport_ops->it_buf_teardown(buf);
985 	}
986 	kmem_cache_free(idm.idm_buf_cache, buf);
987 	idm_conn_rele(ic);
988 }
989 
990 /*
991  * idm_buf_bind_in
992  *
993  * This function associates a buffer with a task. This is only for use by the
994  * iSCSI initiator that will have only one buffer per transfer direction
995  *
996  */
997 void
998 idm_buf_bind_in(idm_task_t *idt, idm_buf_t *buf)
999 {
1000 	mutex_enter(&idt->idt_mutex);
1001 	idm_buf_bind_in_locked(idt, buf);
1002 	mutex_exit(&idt->idt_mutex);
1003 }
1004 
1005 static void
1006 idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf)
1007 {
1008 	buf->idb_task_binding = idt;
1009 	buf->idb_ic = idt->idt_ic;
1010 	idm_listbuf_insert(&idt->idt_inbufv, buf);
1011 }
1012 
1013 void
1014 idm_buf_bind_out(idm_task_t *idt, idm_buf_t *buf)
1015 {
1016 	mutex_enter(&idt->idt_mutex);
1017 	idm_buf_bind_out_locked(idt, buf);
1018 	mutex_exit(&idt->idt_mutex);
1019 }
1020 
1021 static void
1022 idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf)
1023 {
1024 	buf->idb_task_binding = idt;
1025 	buf->idb_ic = idt->idt_ic;
1026 	idm_listbuf_insert(&idt->idt_outbufv, buf);
1027 }
1028 
1029 void
1030 idm_buf_unbind_in(idm_task_t *idt, idm_buf_t *buf)
1031 {
1032 	mutex_enter(&idt->idt_mutex);
1033 	idm_buf_unbind_in_locked(idt, buf);
1034 	mutex_exit(&idt->idt_mutex);
1035 }
1036 
1037 static void
1038 idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf)
1039 {
1040 	list_remove(&idt->idt_inbufv, buf);
1041 }
1042 
1043 void
1044 idm_buf_unbind_out(idm_task_t *idt, idm_buf_t *buf)
1045 {
1046 	mutex_enter(&idt->idt_mutex);
1047 	idm_buf_unbind_out_locked(idt, buf);
1048 	mutex_exit(&idt->idt_mutex);
1049 }
1050 
1051 static void
1052 idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf)
1053 {
1054 	list_remove(&idt->idt_outbufv, buf);
1055 }
1056 
1057 /*
1058  * idm_buf_find() will lookup the idm_buf_t based on the relative offset in the
1059  * iSCSI PDU
1060  */
1061 idm_buf_t *
1062 idm_buf_find(void *lbuf, size_t data_offset)
1063 {
1064 	idm_buf_t	*idb;
1065 	list_t		*lst = (list_t *)lbuf;
1066 
1067 	/* iterate through the list to find the buffer */
1068 	for (idb = list_head(lst); idb != NULL; idb = list_next(lst, idb)) {
1069 
1070 		ASSERT((idb->idb_ic->ic_conn_type == CONN_TYPE_TGT) ||
1071 		    (idb->idb_bufoffset == 0));
1072 
1073 		if ((data_offset >= idb->idb_bufoffset) &&
1074 		    (data_offset < (idb->idb_bufoffset + idb->idb_buflen))) {
1075 
1076 			return (idb);
1077 		}
1078 	}
1079 
1080 	return (NULL);
1081 }
1082 
1083 /*
1084  * idm_task_alloc
1085  *
1086  * This function will allocate a idm_task_t structure. A task tag is also
1087  * generated and saved in idt_tt. The task is not active.
1088  */
1089 idm_task_t *
1090 idm_task_alloc(idm_conn_t *ic)
1091 {
1092 	idm_task_t	*idt;
1093 
1094 	ASSERT(ic != NULL);
1095 
1096 	/* Don't allocate new tasks if we are not in FFP */
1097 	mutex_enter(&ic->ic_state_mutex);
1098 	if (!ic->ic_ffp) {
1099 		mutex_exit(&ic->ic_state_mutex);
1100 		return (NULL);
1101 	}
1102 	idt = kmem_cache_alloc(idm.idm_task_cache, KM_NOSLEEP);
1103 	if (idt == NULL) {
1104 		mutex_exit(&ic->ic_state_mutex);
1105 		return (NULL);
1106 	}
1107 
1108 	ASSERT(list_is_empty(&idt->idt_inbufv));
1109 	ASSERT(list_is_empty(&idt->idt_outbufv));
1110 
1111 	idm_conn_hold(ic);
1112 	mutex_exit(&ic->ic_state_mutex);
1113 
1114 	idt->idt_state		= TASK_IDLE;
1115 	idt->idt_ic		= ic;
1116 	idt->idt_private 	= NULL;
1117 	idt->idt_exp_datasn	= 0;
1118 	idt->idt_exp_rttsn	= 0;
1119 
1120 	return (idt);
1121 }
1122 
1123 /*
1124  * idm_task_start
1125  *
1126  * Add the task to an AVL tree to notify IDM about a new task. The caller
1127  * sets up the idm_task_t structure with a prior call to idm_task_alloc().
1128  * The task service does not function as a task/work engine, it is the
1129  * responsibility of the initiator to start the data transfer and free the
1130  * resources.
1131  */
1132 void
1133 idm_task_start(idm_task_t *idt, uintptr_t handle)
1134 {
1135 	ASSERT(idt != NULL);
1136 
1137 	/* mark the task as ACTIVE */
1138 	idt->idt_state = TASK_ACTIVE;
1139 	idt->idt_client_handle = handle;
1140 	idt->idt_tx_to_ini_start = idt->idt_tx_to_ini_done =
1141 	    idt->idt_rx_from_ini_start = idt->idt_rx_from_ini_done = 0;
1142 }
1143 
1144 /*
1145  * idm_task_done
1146  *
1147  * This function will remove the task from the AVL tree indicating that the
1148  * task is no longer active.
1149  */
1150 void
1151 idm_task_done(idm_task_t *idt)
1152 {
1153 	ASSERT(idt != NULL);
1154 	ASSERT(idt->idt_refcnt.ir_refcnt == 0);
1155 
1156 	idt->idt_state = TASK_IDLE;
1157 	idm_refcnt_reset(&idt->idt_refcnt);
1158 }
1159 
1160 /*
1161  * idm_task_free
1162  *
1163  * This function will free the Task Tag and the memory allocated for the task
1164  * idm_task_done should be called prior to this call
1165  */
1166 void
1167 idm_task_free(idm_task_t *idt)
1168 {
1169 	idm_conn_t *ic = idt->idt_ic;
1170 
1171 	ASSERT(idt != NULL);
1172 	ASSERT(idt->idt_state == TASK_IDLE);
1173 
1174 	/*
1175 	 * It's possible for items to still be in the idt_inbufv list if
1176 	 * they were added after idm_task_cleanup was called.  We rely on
1177 	 * STMF to free all buffers associated with the task however STMF
1178 	 * doesn't know that we have this reference to the buffers.
1179 	 * Use list_create so that we don't end up with stale references
1180 	 * to these buffers.
1181 	 */
1182 	list_create(&idt->idt_inbufv, sizeof (idm_buf_t),
1183 	    offsetof(idm_buf_t, idb_buflink));
1184 	list_create(&idt->idt_outbufv, sizeof (idm_buf_t),
1185 	    offsetof(idm_buf_t, idb_buflink));
1186 
1187 	kmem_cache_free(idm.idm_task_cache, idt);
1188 
1189 	idm_conn_rele(ic);
1190 }
1191 
1192 /*
1193  * idm_task_find
1194  *
1195  * This function looks up a task by task tag
1196  */
1197 /*ARGSUSED*/
1198 idm_task_t *
1199 idm_task_find(idm_conn_t *ic, uint32_t itt, uint32_t ttt)
1200 {
1201 	uint32_t	tt, client_handle;
1202 	idm_task_t	*idt;
1203 
1204 	/*
1205 	 * Must match both itt and ttt.  The table is indexed by itt
1206 	 * for initiator connections and ttt for target connections.
1207 	 */
1208 	if (IDM_CONN_ISTGT(ic)) {
1209 		tt = ttt;
1210 		client_handle = itt;
1211 	} else {
1212 		tt = itt;
1213 		client_handle = ttt;
1214 	}
1215 
1216 	rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1217 	if (tt >= idm.idm_taskid_max) {
1218 		rw_exit(&idm.idm_taskid_table_lock);
1219 		return (NULL);
1220 	}
1221 
1222 	idt = idm.idm_taskid_table[tt];
1223 
1224 	if (idt != NULL) {
1225 		mutex_enter(&idt->idt_mutex);
1226 		if ((idt->idt_state != TASK_ACTIVE) ||
1227 		    (IDM_CONN_ISTGT(ic) &&
1228 		    (idt->idt_client_handle != client_handle))) {
1229 			/*
1230 			 * Task is aborting, we don't want any more references.
1231 			 */
1232 			mutex_exit(&idt->idt_mutex);
1233 			rw_exit(&idm.idm_taskid_table_lock);
1234 			return (NULL);
1235 		}
1236 		idm_task_hold(idt);
1237 		mutex_exit(&idt->idt_mutex);
1238 	}
1239 	rw_exit(&idm.idm_taskid_table_lock);
1240 
1241 	return (idt);
1242 }
1243 
1244 /*
1245  * idm_task_find_by_handle
1246  *
1247  * This function looks up a task by the client-private idt_client_handle.
1248  *
1249  * This function should NEVER be called in the performance path.  It is
1250  * intended strictly for error recovery/task management.
1251  */
1252 /*ARGSUSED*/
1253 void *
1254 idm_task_find_by_handle(idm_conn_t *ic, uintptr_t handle)
1255 {
1256 	idm_task_t	*idt = NULL;
1257 	int		idx = 0;
1258 
1259 	rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1260 
1261 	for (idx = 0; idx < idm.idm_taskid_max; idx++) {
1262 		idt = idm.idm_taskid_table[idx];
1263 
1264 		if (idt == NULL)
1265 			continue;
1266 
1267 		mutex_enter(&idt->idt_mutex);
1268 
1269 		if (idt->idt_state != TASK_ACTIVE) {
1270 			/*
1271 			 * Task is either in suspend, abort, or already
1272 			 * complete.
1273 			 */
1274 			mutex_exit(&idt->idt_mutex);
1275 			continue;
1276 		}
1277 
1278 		if (idt->idt_client_handle == handle) {
1279 			idm_task_hold(idt);
1280 			mutex_exit(&idt->idt_mutex);
1281 			break;
1282 		}
1283 
1284 		mutex_exit(&idt->idt_mutex);
1285 	}
1286 
1287 	rw_exit(&idm.idm_taskid_table_lock);
1288 
1289 	if ((idt == NULL) || (idx == idm.idm_taskid_max))
1290 		return (NULL);
1291 
1292 	return (idt->idt_private);
1293 }
1294 
1295 void
1296 idm_task_hold(idm_task_t *idt)
1297 {
1298 	idm_refcnt_hold(&idt->idt_refcnt);
1299 }
1300 
1301 void
1302 idm_task_rele(idm_task_t *idt)
1303 {
1304 	idm_refcnt_rele(&idt->idt_refcnt);
1305 }
1306 
1307 void
1308 idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
1309 {
1310 	idm_task_t	*task;
1311 	int		idx;
1312 
1313 	/*
1314 	 * Passing NULL as the task indicates that all tasks
1315 	 * for this connection should be aborted.
1316 	 */
1317 	if (idt == NULL) {
1318 		/*
1319 		 * Only the connection state machine should ask for
1320 		 * all tasks to abort and this should never happen in FFP.
1321 		 */
1322 		ASSERT(!ic->ic_ffp);
1323 		rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1324 		for (idx = 0; idx < idm.idm_taskid_max; idx++) {
1325 			task = idm.idm_taskid_table[idx];
1326 			if (task && (task->idt_state != TASK_IDLE) &&
1327 			    (task->idt_ic == ic)) {
1328 				rw_exit(&idm.idm_taskid_table_lock);
1329 				idm_task_abort_one(ic, task, abort_type);
1330 				rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1331 			}
1332 		}
1333 		rw_exit(&idm.idm_taskid_table_lock);
1334 	} else {
1335 		idm_task_abort_one(ic, idt, abort_type);
1336 	}
1337 }
1338 
1339 static void
1340 idm_task_abort_unref_cb(void *ref)
1341 {
1342 	idm_task_t *idt = ref;
1343 
1344 	mutex_enter(&idt->idt_mutex);
1345 	switch (idt->idt_state) {
1346 	case TASK_SUSPENDING:
1347 		idt->idt_state = TASK_SUSPENDED;
1348 		mutex_exit(&idt->idt_mutex);
1349 		idm_task_aborted(idt, IDM_STATUS_SUSPENDED);
1350 		return;
1351 	case TASK_ABORTING:
1352 		idt->idt_state = TASK_ABORTED;
1353 		mutex_exit(&idt->idt_mutex);
1354 		idm_task_aborted(idt, IDM_STATUS_ABORTED);
1355 		return;
1356 	default:
1357 		mutex_exit(&idt->idt_mutex);
1358 		ASSERT(0);
1359 		break;
1360 	}
1361 }
1362 
1363 static void
1364 idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
1365 {
1366 	/* Caller must hold connection mutex */
1367 	mutex_enter(&idt->idt_mutex);
1368 	switch (idt->idt_state) {
1369 	case TASK_ACTIVE:
1370 		switch (abort_type) {
1371 		case AT_INTERNAL_SUSPEND:
1372 			/* Call transport to release any resources */
1373 			idt->idt_state = TASK_SUSPENDING;
1374 			mutex_exit(&idt->idt_mutex);
1375 			ic->ic_transport_ops->it_free_task_rsrc(idt);
1376 
1377 			/*
1378 			 * Wait for outstanding references.  When all
1379 			 * references are released the callback will call
1380 			 * idm_task_aborted().
1381 			 */
1382 			idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1383 			    &idm_task_abort_unref_cb);
1384 			return;
1385 		case AT_INTERNAL_ABORT:
1386 		case AT_TASK_MGMT_ABORT:
1387 			idt->idt_state = TASK_ABORTING;
1388 			mutex_exit(&idt->idt_mutex);
1389 			ic->ic_transport_ops->it_free_task_rsrc(idt);
1390 
1391 			/*
1392 			 * Wait for outstanding references.  When all
1393 			 * references are released the callback will call
1394 			 * idm_task_aborted().
1395 			 */
1396 			idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1397 			    &idm_task_abort_unref_cb);
1398 			return;
1399 		default:
1400 			ASSERT(0);
1401 		}
1402 		break;
1403 	case TASK_SUSPENDING:
1404 		/* Already called transport_free_task_rsrc(); */
1405 		switch (abort_type) {
1406 		case AT_INTERNAL_SUSPEND:
1407 			/* Already doing it */
1408 			break;
1409 		case AT_INTERNAL_ABORT:
1410 		case AT_TASK_MGMT_ABORT:
1411 			idt->idt_state = TASK_ABORTING;
1412 			break;
1413 		default:
1414 			ASSERT(0);
1415 		}
1416 		break;
1417 	case TASK_SUSPENDED:
1418 		/* Already called transport_free_task_rsrc(); */
1419 		switch (abort_type) {
1420 		case AT_INTERNAL_SUSPEND:
1421 			/* Already doing it */
1422 			break;
1423 		case AT_INTERNAL_ABORT:
1424 		case AT_TASK_MGMT_ABORT:
1425 			idt->idt_state = TASK_ABORTING;
1426 			mutex_exit(&idt->idt_mutex);
1427 
1428 			/*
1429 			 * We could probably call idm_task_aborted directly
1430 			 * here but we may be holding the conn lock. It's
1431 			 * easier to just switch contexts.  Even though
1432 			 * we shouldn't really have any references we'll
1433 			 * set the state to TASK_ABORTING instead of
1434 			 * TASK_ABORTED so we can use the same code path.
1435 			 */
1436 			idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1437 			    &idm_task_abort_unref_cb);
1438 			return;
1439 		default:
1440 			ASSERT(0);
1441 		}
1442 		break;
1443 	case TASK_ABORTING:
1444 	case TASK_ABORTED:
1445 		switch (abort_type) {
1446 		case AT_INTERNAL_SUSPEND:
1447 			/* We're already past this point... */
1448 		case AT_INTERNAL_ABORT:
1449 		case AT_TASK_MGMT_ABORT:
1450 			/* Already doing it */
1451 			break;
1452 		default:
1453 			ASSERT(0);
1454 		}
1455 		break;
1456 	case TASK_COMPLETE:
1457 		/*
1458 		 * In this case, let it go.  The status has already been
1459 		 * sent (which may or may not get successfully transmitted)
1460 		 * and we don't want to end up in a race between completing
1461 		 * the status PDU and marking the task suspended.
1462 		 */
1463 		break;
1464 	default:
1465 		ASSERT(0);
1466 	}
1467 	mutex_exit(&idt->idt_mutex);
1468 }
1469 
1470 static void
1471 idm_task_aborted(idm_task_t *idt, idm_status_t status)
1472 {
1473 	(*idt->idt_ic->ic_conn_ops.icb_task_aborted)(idt, status);
1474 }
1475 
1476 void
1477 idm_task_cleanup(idm_task_t *idt)
1478 {
1479 	idm_buf_t *idb, *next_idb;
1480 	list_t		tmp_buflist;
1481 	ASSERT((idt->idt_state == TASK_SUSPENDED) ||
1482 	    (idt->idt_state == TASK_ABORTED));
1483 
1484 	list_create(&tmp_buflist, sizeof (idm_buf_t),
1485 	    offsetof(idm_buf_t, idb_buflink));
1486 
1487 	/*
1488 	 * Remove all the buffers from the task and add them to a
1489 	 * temporary local list -- we do this so that we can hold
1490 	 * the task lock and prevent the task from going away if
1491 	 * the client decides to call idm_task_done/idm_task_free.
1492 	 * This could happen during abort in iscsit.
1493 	 */
1494 	mutex_enter(&idt->idt_mutex);
1495 	for (idb = list_head(&idt->idt_inbufv);
1496 	    idb != NULL;
1497 	    idb = next_idb) {
1498 		next_idb = list_next(&idt->idt_inbufv, idb);
1499 		idm_buf_unbind_in_locked(idt, idb);
1500 		list_insert_tail(&tmp_buflist, idb);
1501 	}
1502 
1503 	for (idb = list_head(&idt->idt_outbufv);
1504 	    idb != NULL;
1505 	    idb = next_idb) {
1506 		next_idb = list_next(&idt->idt_outbufv, idb);
1507 		idm_buf_unbind_out_locked(idt, idb);
1508 		list_insert_tail(&tmp_buflist, idb);
1509 	}
1510 	mutex_exit(&idt->idt_mutex);
1511 
1512 	for (idb = list_head(&tmp_buflist); idb != NULL; idb = next_idb) {
1513 		next_idb = list_next(&tmp_buflist, idb);
1514 		list_remove(&tmp_buflist, idb);
1515 		(*idb->idb_buf_cb)(idb, IDM_STATUS_ABORTED);
1516 	}
1517 	list_destroy(&tmp_buflist);
1518 }
1519 
1520 
1521 /*
1522  * idm_pdu_tx
1523  *
1524  * This is IDM's implementation of the 'Send_Control' operational primitive.
1525  * This function is invoked by an initiator iSCSI layer requesting the transfer
1526  * of a iSCSI command PDU or a target iSCSI layer requesting the transfer of a
1527  * iSCSI response PDU. The PDU will be transmitted as-is by the local Datamover
1528  * layer to the peer iSCSI layer in the remote iSCSI node. The connection info
1529  * and iSCSI PDU-specific qualifiers namely BHS, AHS, DataDescriptor and Size
1530  * are provided as input.
1531  *
1532  */
1533 void
1534 idm_pdu_tx(idm_pdu_t *pdu)
1535 {
1536 	idm_conn_t		*ic = pdu->isp_ic;
1537 	iscsi_async_evt_hdr_t	*async_evt;
1538 
1539 	/*
1540 	 * If we are in full-featured mode then route SCSI-related
1541 	 * commands to the appropriate function vector without checking
1542 	 * the connection state.  We will only be in full-feature mode
1543 	 * when we are in an acceptable state for SCSI PDU's.
1544 	 *
1545 	 * We also need to ensure that there are no PDU events outstanding
1546 	 * on the state machine.  Any non-SCSI PDU's received in full-feature
1547 	 * mode will result in PDU events and until these have been handled
1548 	 * we need to route all PDU's through the state machine as PDU
1549 	 * events to maintain ordering.
1550 	 *
1551 	 * Note that IDM cannot enter FFP mode until it processes in
1552 	 * its state machine the last xmit of the login process.
1553 	 * Hence, checking the IDM_PDU_LOGIN_TX flag here would be
1554 	 * superfluous.
1555 	 */
1556 	mutex_enter(&ic->ic_state_mutex);
1557 	if (ic->ic_ffp && (ic->ic_pdu_events == 0)) {
1558 		mutex_exit(&ic->ic_state_mutex);
1559 		switch (IDM_PDU_OPCODE(pdu)) {
1560 		case ISCSI_OP_SCSI_RSP:
1561 			/* Target only */
1562 			idm_pdu_tx_forward(ic, pdu);
1563 			return;
1564 		case ISCSI_OP_SCSI_TASK_MGT_RSP:
1565 			/* Target only */
1566 			idm_pdu_tx_forward(ic, pdu);
1567 			return;
1568 		case ISCSI_OP_SCSI_DATA_RSP:
1569 			/* Target only */
1570 			idm_pdu_tx_forward(ic, pdu);
1571 			return;
1572 		case ISCSI_OP_RTT_RSP:
1573 			/* Target only */
1574 			idm_pdu_tx_forward(ic, pdu);
1575 			return;
1576 		case ISCSI_OP_NOOP_IN:
1577 			/* Target only */
1578 			idm_pdu_tx_forward(ic, pdu);
1579 			return;
1580 		case ISCSI_OP_TEXT_RSP:
1581 			/* Target only */
1582 			idm_pdu_tx_forward(ic, pdu);
1583 			return;
1584 		case ISCSI_OP_TEXT_CMD:
1585 		case ISCSI_OP_NOOP_OUT:
1586 		case ISCSI_OP_SCSI_CMD:
1587 		case ISCSI_OP_SCSI_DATA:
1588 		case ISCSI_OP_SCSI_TASK_MGT_MSG:
1589 			/* Initiator only */
1590 			idm_pdu_tx_forward(ic, pdu);
1591 			return;
1592 		default:
1593 			break;
1594 		}
1595 
1596 		mutex_enter(&ic->ic_state_mutex);
1597 	}
1598 
1599 	/*
1600 	 * Any PDU's processed outside of full-feature mode and non-SCSI
1601 	 * PDU's in full-feature mode are handled by generating an
1602 	 * event to the connection state machine.  The state machine
1603 	 * will validate the PDU against the current state and either
1604 	 * transmit the PDU if the opcode is allowed or handle an
1605 	 * error if the PDU is not allowed.
1606 	 *
1607 	 * This code-path will also generate any events that are implied
1608 	 * by the PDU opcode.  For example a "login response" with success
1609 	 * status generates a CE_LOGOUT_SUCCESS_SND event.
1610 	 */
1611 	switch (IDM_PDU_OPCODE(pdu)) {
1612 	case ISCSI_OP_LOGIN_CMD:
1613 		idm_conn_tx_pdu_event(ic, CE_LOGIN_SND, (uintptr_t)pdu);
1614 		break;
1615 	case ISCSI_OP_LOGIN_RSP:
1616 		idm_parse_login_rsp(ic, pdu, /* Is RX */ B_FALSE);
1617 		break;
1618 	case ISCSI_OP_LOGOUT_CMD:
1619 		idm_parse_logout_req(ic, pdu, /* Is RX */ B_FALSE);
1620 		break;
1621 	case ISCSI_OP_LOGOUT_RSP:
1622 		idm_parse_logout_rsp(ic, pdu, /* Is RX */ B_FALSE);
1623 		break;
1624 	case ISCSI_OP_ASYNC_EVENT:
1625 		async_evt = (iscsi_async_evt_hdr_t *)pdu->isp_hdr;
1626 		switch (async_evt->async_event) {
1627 		case ISCSI_ASYNC_EVENT_REQUEST_LOGOUT:
1628 			idm_conn_tx_pdu_event(ic, CE_ASYNC_LOGOUT_SND,
1629 			    (uintptr_t)pdu);
1630 			break;
1631 		case ISCSI_ASYNC_EVENT_DROPPING_CONNECTION:
1632 			idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_CONN_SND,
1633 			    (uintptr_t)pdu);
1634 			break;
1635 		case ISCSI_ASYNC_EVENT_DROPPING_ALL_CONNECTIONS:
1636 			idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_ALL_CONN_SND,
1637 			    (uintptr_t)pdu);
1638 			break;
1639 		case ISCSI_ASYNC_EVENT_SCSI_EVENT:
1640 		case ISCSI_ASYNC_EVENT_PARAM_NEGOTIATION:
1641 		default:
1642 			idm_conn_tx_pdu_event(ic, CE_MISC_TX,
1643 			    (uintptr_t)pdu);
1644 			break;
1645 		}
1646 		break;
1647 	case ISCSI_OP_SCSI_RSP:
1648 		/* Target only */
1649 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1650 		break;
1651 	case ISCSI_OP_SCSI_TASK_MGT_RSP:
1652 		/* Target only */
1653 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1654 		break;
1655 	case ISCSI_OP_SCSI_DATA_RSP:
1656 		/* Target only */
1657 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1658 		break;
1659 	case ISCSI_OP_RTT_RSP:
1660 		/* Target only */
1661 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1662 		break;
1663 	case ISCSI_OP_NOOP_IN:
1664 		/* Target only */
1665 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1666 		break;
1667 	case ISCSI_OP_TEXT_RSP:
1668 		/* Target only */
1669 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1670 		break;
1671 		/* Initiator only */
1672 	case ISCSI_OP_SCSI_CMD:
1673 	case ISCSI_OP_SCSI_TASK_MGT_MSG:
1674 	case ISCSI_OP_SCSI_DATA:
1675 	case ISCSI_OP_NOOP_OUT:
1676 	case ISCSI_OP_TEXT_CMD:
1677 	case ISCSI_OP_SNACK_CMD:
1678 	case ISCSI_OP_REJECT_MSG:
1679 	default:
1680 		/*
1681 		 * Connection state machine will validate these PDU's against
1682 		 * the current state.  A PDU not allowed in the current
1683 		 * state will cause a protocol error.
1684 		 */
1685 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1686 		break;
1687 	}
1688 	mutex_exit(&ic->ic_state_mutex);
1689 }
1690 
1691 /*
1692  * Allocates a PDU along with memory for header and data.
1693  */
1694 
1695 idm_pdu_t *
1696 idm_pdu_alloc(uint_t hdrlen, uint_t datalen)
1697 {
1698 	idm_pdu_t *result;
1699 
1700 	/*
1701 	 * IDM clients should cache these structures for performance
1702 	 * critical paths.  We can't cache effectively in IDM because we
1703 	 * don't know the correct header and data size.
1704 	 *
1705 	 * Valid header length is assumed to be hdrlen and valid data
1706 	 * length is assumed to be datalen.  isp_hdrlen and isp_datalen
1707 	 * can be adjusted after the PDU is returned if necessary.
1708 	 */
1709 	result = kmem_zalloc(sizeof (idm_pdu_t) + hdrlen + datalen, KM_SLEEP);
1710 	result->isp_flags |= IDM_PDU_ALLOC; /* For idm_pdu_free sanity check */
1711 	result->isp_hdr = (iscsi_hdr_t *)(result + 1); /* Ptr. Arithmetic */
1712 	result->isp_hdrlen = hdrlen;
1713 	result->isp_hdrbuflen = hdrlen;
1714 	result->isp_transport_hdrlen = 0;
1715 	result->isp_data = (uint8_t *)result->isp_hdr + hdrlen;
1716 	result->isp_datalen = datalen;
1717 	result->isp_databuflen = datalen;
1718 	result->isp_magic = IDM_PDU_MAGIC;
1719 
1720 	return (result);
1721 }
1722 
1723 /*
1724  * Free a PDU previously allocated with idm_pdu_alloc() including any
1725  * header and data space allocated as part of the original request.
1726  * Additional memory regions referenced by subsequent modification of
1727  * the isp_hdr and/or isp_data fields will not be freed.
1728  */
1729 void
1730 idm_pdu_free(idm_pdu_t *pdu)
1731 {
1732 	/* Make sure the structure was allocated using idm_pdu_alloc() */
1733 	ASSERT(pdu->isp_flags & IDM_PDU_ALLOC);
1734 	kmem_free(pdu,
1735 	    sizeof (idm_pdu_t) + pdu->isp_hdrbuflen + pdu->isp_databuflen);
1736 }
1737 
1738 /*
1739  * Initialize the connection, private and callback fields in a PDU.
1740  */
1741 void
1742 idm_pdu_init(idm_pdu_t *pdu, idm_conn_t *ic, void *private, idm_pdu_cb_t *cb)
1743 {
1744 	/*
1745 	 * idm_pdu_complete() will call idm_pdu_free if the callback is
1746 	 * NULL.  This will only work if the PDU was originally allocated
1747 	 * with idm_pdu_alloc().
1748 	 */
1749 	ASSERT((pdu->isp_flags & IDM_PDU_ALLOC) ||
1750 	    (cb != NULL));
1751 	pdu->isp_magic = IDM_PDU_MAGIC;
1752 	pdu->isp_ic = ic;
1753 	pdu->isp_private = private;
1754 	pdu->isp_callback = cb;
1755 }
1756 
1757 /*
1758  * Initialize the header and header length field.  This function should
1759  * not be used to adjust the header length in a buffer allocated via
1760  * pdu_pdu_alloc since it overwrites the existing header pointer.
1761  */
1762 void
1763 idm_pdu_init_hdr(idm_pdu_t *pdu, uint8_t *hdr, uint_t hdrlen)
1764 {
1765 	pdu->isp_hdr = (iscsi_hdr_t *)((void *)hdr);
1766 	pdu->isp_hdrlen = hdrlen;
1767 }
1768 
1769 /*
1770  * Initialize the data and data length fields.  This function should
1771  * not be used to adjust the data length of a buffer allocated via
1772  * idm_pdu_alloc since it overwrites the existing data pointer.
1773  */
1774 void
1775 idm_pdu_init_data(idm_pdu_t *pdu, uint8_t *data, uint_t datalen)
1776 {
1777 	pdu->isp_data = data;
1778 	pdu->isp_datalen = datalen;
1779 }
1780 
1781 void
1782 idm_pdu_complete(idm_pdu_t *pdu, idm_status_t status)
1783 {
1784 	if (pdu->isp_callback) {
1785 		pdu->isp_status = status;
1786 		(*pdu->isp_callback)(pdu, status);
1787 	} else {
1788 		idm_pdu_free(pdu);
1789 	}
1790 }
1791 
1792 /*
1793  * State machine auditing
1794  */
1795 
1796 void
1797 idm_sm_audit_init(sm_audit_buf_t *audit_buf)
1798 {
1799 	bzero(audit_buf, sizeof (sm_audit_buf_t));
1800 	audit_buf->sab_max_index = SM_AUDIT_BUF_MAX_REC - 1;
1801 }
1802 
1803 static
1804 sm_audit_record_t *
1805 idm_sm_audit_common(sm_audit_buf_t *audit_buf, sm_audit_record_type_t r_type,
1806     sm_audit_sm_type_t sm_type,
1807     int current_state)
1808 {
1809 	sm_audit_record_t *sar;
1810 
1811 	sar = audit_buf->sab_records;
1812 	sar += audit_buf->sab_index;
1813 	audit_buf->sab_index++;
1814 	audit_buf->sab_index &= audit_buf->sab_max_index;
1815 
1816 	sar->sar_type = r_type;
1817 	gethrestime(&sar->sar_timestamp);
1818 	sar->sar_sm_type = sm_type;
1819 	sar->sar_state = current_state;
1820 
1821 	return (sar);
1822 }
1823 
1824 void
1825 idm_sm_audit_event(sm_audit_buf_t *audit_buf,
1826     sm_audit_sm_type_t sm_type, int current_state,
1827     int event, uintptr_t event_info)
1828 {
1829 	sm_audit_record_t *sar;
1830 
1831 	sar = idm_sm_audit_common(audit_buf, SAR_STATE_EVENT,
1832 	    sm_type, current_state);
1833 	sar->sar_event = event;
1834 	sar->sar_event_info = event_info;
1835 }
1836 
1837 void
1838 idm_sm_audit_state_change(sm_audit_buf_t *audit_buf,
1839     sm_audit_sm_type_t sm_type, int current_state, int new_state)
1840 {
1841 	sm_audit_record_t *sar;
1842 
1843 	sar = idm_sm_audit_common(audit_buf, SAR_STATE_CHANGE,
1844 	    sm_type, current_state);
1845 	sar->sar_new_state = new_state;
1846 }
1847 
1848 
1849 /*
1850  * Object reference tracking
1851  */
1852 
1853 void
1854 idm_refcnt_init(idm_refcnt_t *refcnt, void *referenced_obj)
1855 {
1856 	bzero(refcnt, sizeof (*refcnt));
1857 	idm_refcnt_reset(refcnt);
1858 	refcnt->ir_referenced_obj = referenced_obj;
1859 	bzero(&refcnt->ir_audit_buf, sizeof (refcnt_audit_buf_t));
1860 	refcnt->ir_audit_buf.anb_max_index = REFCNT_AUDIT_BUF_MAX_REC - 1;
1861 	mutex_init(&refcnt->ir_mutex, NULL, MUTEX_DEFAULT, NULL);
1862 	cv_init(&refcnt->ir_cv, NULL, CV_DEFAULT, NULL);
1863 }
1864 
1865 void
1866 idm_refcnt_destroy(idm_refcnt_t *refcnt)
1867 {
1868 	ASSERT(refcnt->ir_refcnt == 0);
1869 	cv_destroy(&refcnt->ir_cv);
1870 	mutex_destroy(&refcnt->ir_mutex);
1871 }
1872 
1873 void
1874 idm_refcnt_reset(idm_refcnt_t *refcnt)
1875 {
1876 	refcnt->ir_waiting = REF_NOWAIT;
1877 	refcnt->ir_refcnt = 0;
1878 }
1879 
1880 void
1881 idm_refcnt_hold(idm_refcnt_t *refcnt)
1882 {
1883 	/*
1884 	 * Nothing should take a hold on an object after a call to
1885 	 * idm_refcnt_wait_ref or idm_refcnd_async_wait_ref
1886 	 */
1887 	ASSERT(refcnt->ir_waiting == REF_NOWAIT);
1888 
1889 	mutex_enter(&refcnt->ir_mutex);
1890 	refcnt->ir_refcnt++;
1891 	REFCNT_AUDIT(refcnt);
1892 	mutex_exit(&refcnt->ir_mutex);
1893 }
1894 
1895 static void
1896 idm_refcnt_unref_task(void *refcnt_void)
1897 {
1898 	idm_refcnt_t *refcnt = refcnt_void;
1899 
1900 	REFCNT_AUDIT(refcnt);
1901 	(*refcnt->ir_cb)(refcnt->ir_referenced_obj);
1902 }
1903 
1904 void
1905 idm_refcnt_rele(idm_refcnt_t *refcnt)
1906 {
1907 	mutex_enter(&refcnt->ir_mutex);
1908 	ASSERT(refcnt->ir_refcnt > 0);
1909 	refcnt->ir_refcnt--;
1910 	REFCNT_AUDIT(refcnt);
1911 	if (refcnt->ir_waiting == REF_NOWAIT) {
1912 		/* No one is waiting on this object */
1913 		mutex_exit(&refcnt->ir_mutex);
1914 		return;
1915 	}
1916 
1917 	/*
1918 	 * Someone is waiting for this object to go idle so check if
1919 	 * refcnt is 0.  Waiting on an object then later grabbing another
1920 	 * reference is not allowed so we don't need to handle that case.
1921 	 */
1922 	if (refcnt->ir_refcnt == 0) {
1923 		if (refcnt->ir_waiting == REF_WAIT_ASYNC) {
1924 			if (taskq_dispatch(idm.idm_global_taskq,
1925 			    &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
1926 				cmn_err(CE_WARN,
1927 				    "idm_refcnt_rele: Couldn't dispatch task");
1928 			}
1929 		} else if (refcnt->ir_waiting == REF_WAIT_SYNC) {
1930 			cv_signal(&refcnt->ir_cv);
1931 		}
1932 	}
1933 	mutex_exit(&refcnt->ir_mutex);
1934 }
1935 
1936 void
1937 idm_refcnt_rele_and_destroy(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func)
1938 {
1939 	mutex_enter(&refcnt->ir_mutex);
1940 	ASSERT(refcnt->ir_refcnt > 0);
1941 	refcnt->ir_refcnt--;
1942 	REFCNT_AUDIT(refcnt);
1943 
1944 	/*
1945 	 * Someone is waiting for this object to go idle so check if
1946 	 * refcnt is 0.  Waiting on an object then later grabbing another
1947 	 * reference is not allowed so we don't need to handle that case.
1948 	 */
1949 	if (refcnt->ir_refcnt == 0) {
1950 		refcnt->ir_cb = cb_func;
1951 		refcnt->ir_waiting = REF_WAIT_ASYNC;
1952 		if (taskq_dispatch(idm.idm_global_taskq,
1953 		    &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
1954 			cmn_err(CE_WARN,
1955 			    "idm_refcnt_rele: Couldn't dispatch task");
1956 		}
1957 	}
1958 	mutex_exit(&refcnt->ir_mutex);
1959 }
1960 
1961 void
1962 idm_refcnt_wait_ref(idm_refcnt_t *refcnt)
1963 {
1964 	mutex_enter(&refcnt->ir_mutex);
1965 	refcnt->ir_waiting = REF_WAIT_SYNC;
1966 	REFCNT_AUDIT(refcnt);
1967 	while (refcnt->ir_refcnt != 0)
1968 		cv_wait(&refcnt->ir_cv, &refcnt->ir_mutex);
1969 	mutex_exit(&refcnt->ir_mutex);
1970 }
1971 
1972 void
1973 idm_refcnt_async_wait_ref(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func)
1974 {
1975 	mutex_enter(&refcnt->ir_mutex);
1976 	refcnt->ir_waiting = REF_WAIT_ASYNC;
1977 	refcnt->ir_cb = cb_func;
1978 	REFCNT_AUDIT(refcnt);
1979 	/*
1980 	 * It's possible we don't have any references.  To make things easier
1981 	 * on the caller use a taskq to call the callback instead of
1982 	 * calling it synchronously
1983 	 */
1984 	if (refcnt->ir_refcnt == 0) {
1985 		if (taskq_dispatch(idm.idm_global_taskq,
1986 		    &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
1987 			cmn_err(CE_WARN,
1988 			    "idm_refcnt_async_wait_ref: "
1989 			    "Couldn't dispatch task");
1990 		}
1991 	}
1992 	mutex_exit(&refcnt->ir_mutex);
1993 }
1994 
1995 void
1996 idm_refcnt_destroy_unref_obj(idm_refcnt_t *refcnt,
1997     idm_refcnt_cb_t *cb_func)
1998 {
1999 	mutex_enter(&refcnt->ir_mutex);
2000 	if (refcnt->ir_refcnt == 0) {
2001 		mutex_exit(&refcnt->ir_mutex);
2002 		(*cb_func)(refcnt->ir_referenced_obj);
2003 		return;
2004 	}
2005 	mutex_exit(&refcnt->ir_mutex);
2006 }
2007 
2008 void
2009 idm_conn_hold(idm_conn_t *ic)
2010 {
2011 	idm_refcnt_hold(&ic->ic_refcnt);
2012 }
2013 
2014 void
2015 idm_conn_rele(idm_conn_t *ic)
2016 {
2017 	idm_refcnt_rele(&ic->ic_refcnt);
2018 }
2019 
2020 
2021 static int
2022 _idm_init(void)
2023 {
2024 	/* Initialize the rwlock for the taskid table */
2025 	rw_init(&idm.idm_taskid_table_lock, NULL, RW_DRIVER, NULL);
2026 
2027 	/* Initialize the global mutex and taskq */
2028 	mutex_init(&idm.idm_global_mutex, NULL, MUTEX_DEFAULT, NULL);
2029 
2030 	cv_init(&idm.idm_tgt_svc_cv, NULL, CV_DEFAULT, NULL);
2031 	cv_init(&idm.idm_wd_cv, NULL, CV_DEFAULT, NULL);
2032 
2033 	idm.idm_global_taskq = taskq_create("idm_global_taskq", 1, minclsyspri,
2034 	    4, 4, TASKQ_PREPOPULATE);
2035 	if (idm.idm_global_taskq == NULL) {
2036 		cv_destroy(&idm.idm_wd_cv);
2037 		cv_destroy(&idm.idm_tgt_svc_cv);
2038 		mutex_destroy(&idm.idm_global_mutex);
2039 		rw_destroy(&idm.idm_taskid_table_lock);
2040 		return (ENOMEM);
2041 	}
2042 
2043 	/* Start watchdog thread */
2044 	idm.idm_wd_thread = thread_create(NULL, 0,
2045 	    idm_wd_thread, NULL, 0, &p0, TS_RUN, minclsyspri);
2046 	if (idm.idm_wd_thread == NULL) {
2047 		/* Couldn't create the watchdog thread */
2048 		taskq_destroy(idm.idm_global_taskq);
2049 		cv_destroy(&idm.idm_wd_cv);
2050 		cv_destroy(&idm.idm_tgt_svc_cv);
2051 		mutex_destroy(&idm.idm_global_mutex);
2052 		rw_destroy(&idm.idm_taskid_table_lock);
2053 		return (ENOMEM);
2054 	}
2055 
2056 	/* Pause until the watchdog thread is running */
2057 	mutex_enter(&idm.idm_global_mutex);
2058 	while (!idm.idm_wd_thread_running)
2059 		cv_wait(&idm.idm_wd_cv, &idm.idm_global_mutex);
2060 	mutex_exit(&idm.idm_global_mutex);
2061 
2062 	/*
2063 	 * Allocate the task ID table and set "next" to 0.
2064 	 */
2065 
2066 	idm.idm_taskid_max = idm_max_taskids;
2067 	idm.idm_taskid_table = (idm_task_t **)
2068 	    kmem_zalloc(idm.idm_taskid_max * sizeof (idm_task_t *), KM_SLEEP);
2069 	idm.idm_taskid_next = 0;
2070 
2071 	/* Create the global buffer and task kmem caches */
2072 	idm.idm_buf_cache = kmem_cache_create("idm_buf_cache",
2073 	    sizeof (idm_buf_t), 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
2074 
2075 	/*
2076 	 * Note, we're explicitly allocating an additional iSER header-
2077 	 * sized chunk for each of these elements. See idm_task_constructor().
2078 	 */
2079 	idm.idm_task_cache = kmem_cache_create("idm_task_cache",
2080 	    sizeof (idm_task_t) + IDM_TRANSPORT_HEADER_LENGTH, 8,
2081 	    &idm_task_constructor, &idm_task_destructor,
2082 	    NULL, NULL, NULL, KM_SLEEP);
2083 
2084 	/* Create the service and connection context lists */
2085 	list_create(&idm.idm_tgt_svc_list, sizeof (idm_svc_t),
2086 	    offsetof(idm_svc_t, is_list_node));
2087 	list_create(&idm.idm_tgt_conn_list, sizeof (idm_conn_t),
2088 	    offsetof(idm_conn_t, ic_list_node));
2089 	list_create(&idm.idm_ini_conn_list, sizeof (idm_conn_t),
2090 	    offsetof(idm_conn_t, ic_list_node));
2091 
2092 	/* Initialize the native sockets transport */
2093 	idm_so_init(&idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS]);
2094 
2095 	/* Create connection ID pool */
2096 	(void) idm_idpool_create(&idm.idm_conn_id_pool);
2097 
2098 	return (DDI_SUCCESS);
2099 }
2100 
2101 static int
2102 _idm_fini(void)
2103 {
2104 	if (!list_is_empty(&idm.idm_ini_conn_list) ||
2105 	    !list_is_empty(&idm.idm_tgt_conn_list) ||
2106 	    !list_is_empty(&idm.idm_tgt_svc_list)) {
2107 		return (EBUSY);
2108 	}
2109 
2110 	mutex_enter(&idm.idm_global_mutex);
2111 	idm.idm_wd_thread_running = B_FALSE;
2112 	cv_signal(&idm.idm_wd_cv);
2113 	mutex_exit(&idm.idm_global_mutex);
2114 
2115 	thread_join(idm.idm_wd_thread_did);
2116 
2117 	idm_idpool_destroy(&idm.idm_conn_id_pool);
2118 	idm_so_fini();
2119 	list_destroy(&idm.idm_ini_conn_list);
2120 	list_destroy(&idm.idm_tgt_conn_list);
2121 	list_destroy(&idm.idm_tgt_svc_list);
2122 	kmem_cache_destroy(idm.idm_task_cache);
2123 	kmem_cache_destroy(idm.idm_buf_cache);
2124 	kmem_free(idm.idm_taskid_table,
2125 	    idm.idm_taskid_max * sizeof (idm_task_t *));
2126 	mutex_destroy(&idm.idm_global_mutex);
2127 	cv_destroy(&idm.idm_wd_cv);
2128 	cv_destroy(&idm.idm_tgt_svc_cv);
2129 	rw_destroy(&idm.idm_taskid_table_lock);
2130 
2131 	return (0);
2132 }
2133