xref: /illumos-gate/usr/src/uts/common/io/idm/idm.c (revision 8e0c82482ca0deab087522e43df87919b37037f3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/cpuvar.h>
26 #include <sys/conf.h>
27 #include <sys/file.h>
28 #include <sys/ddi.h>
29 #include <sys/sunddi.h>
30 #include <sys/modctl.h>
31 
32 #include <sys/socket.h>
33 #include <sys/strsubr.h>
34 #include <sys/sysmacros.h>
35 
36 #include <sys/socketvar.h>
37 #include <netinet/in.h>
38 
39 #include <sys/idm/idm.h>
40 #include <sys/idm/idm_so.h>
41 
42 #define	IDM_NAME_VERSION	"iSCSI Data Mover"
43 
44 extern struct mod_ops mod_miscops;
45 extern struct mod_ops mod_miscops;
46 
47 static struct modlmisc modlmisc = {
48 	&mod_miscops,	/* Type of module */
49 	IDM_NAME_VERSION
50 };
51 
52 static struct modlinkage modlinkage = {
53 	MODREV_1, (void *)&modlmisc, NULL
54 };
55 
56 extern void idm_wd_thread(void *arg);
57 
58 static int _idm_init(void);
59 static int _idm_fini(void);
60 static void idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf);
61 static void idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf);
62 static void idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf);
63 static void idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf);
64 static void idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt,
65     idm_abort_type_t abort_type);
66 static void idm_task_aborted(idm_task_t *idt, idm_status_t status);
67 static idm_pdu_t *idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen,
68     int sleepflag);
69 
70 boolean_t idm_conn_logging = 0;
71 boolean_t idm_svc_logging = 0;
72 #ifdef DEBUG
73 boolean_t idm_pattern_checking = 1;
74 #else
75 boolean_t idm_pattern_checking = 0;
76 #endif
77 
78 /*
79  * Potential tuneable for the maximum number of tasks.  Default to
80  * IDM_TASKIDS_MAX
81  */
82 
83 uint32_t	idm_max_taskids = IDM_TASKIDS_MAX;
84 
85 /*
86  * Global list of transport handles
87  *   These are listed in preferential order, so we can simply take the
88  *   first "it_conn_is_capable" hit. Note also that the order maps to
89  *   the order of the idm_transport_type_t list.
90  */
91 idm_transport_t idm_transport_list[] = {
92 
93 	/* iSER on InfiniBand transport handle */
94 	{IDM_TRANSPORT_TYPE_ISER,	/* type */
95 	"/devices/ib/iser@0:iser",	/* device path */
96 	NULL,				/* LDI handle */
97 	NULL,				/* transport ops */
98 	NULL},				/* transport caps */
99 
100 	/* IDM native sockets transport handle */
101 	{IDM_TRANSPORT_TYPE_SOCKETS,	/* type */
102 	NULL,				/* device path */
103 	NULL,				/* LDI handle */
104 	NULL,				/* transport ops */
105 	NULL}				/* transport caps */
106 
107 };
108 
109 int
110 _init(void)
111 {
112 	int rc;
113 
114 	if ((rc = _idm_init()) != 0) {
115 		return (rc);
116 	}
117 
118 	return (mod_install(&modlinkage));
119 }
120 
121 int
122 _fini(void)
123 {
124 	int rc;
125 
126 	if ((rc = _idm_fini()) != 0) {
127 		return (rc);
128 	}
129 
130 	if ((rc = mod_remove(&modlinkage)) != 0) {
131 		return (rc);
132 	}
133 
134 	return (rc);
135 }
136 
137 int
138 _info(struct modinfo *modinfop)
139 {
140 	return (mod_info(&modlinkage, modinfop));
141 }
142 
143 /*
144  * idm_transport_register()
145  *
146  * Provides a mechanism for an IDM transport driver to register its
147  * transport ops and caps with the IDM kernel module. Invoked during
148  * a transport driver's attach routine.
149  */
150 idm_status_t
151 idm_transport_register(idm_transport_attr_t *attr)
152 {
153 	ASSERT(attr->it_ops != NULL);
154 	ASSERT(attr->it_caps != NULL);
155 
156 	switch (attr->type) {
157 	/* All known non-native transports here; for now, iSER */
158 	case IDM_TRANSPORT_TYPE_ISER:
159 		idm_transport_list[attr->type].it_ops	= attr->it_ops;
160 		idm_transport_list[attr->type].it_caps	= attr->it_caps;
161 		return (IDM_STATUS_SUCCESS);
162 
163 	default:
164 		cmn_err(CE_NOTE, "idm: unknown transport type (0x%x) in "
165 		    "idm_transport_register", attr->type);
166 		return (IDM_STATUS_SUCCESS);
167 	}
168 }
169 
170 /*
171  * idm_ini_conn_create
172  *
173  * This function is invoked by the iSCSI layer to create a connection context.
174  * This does not actually establish the socket connection.
175  *
176  * cr - Connection request parameters
177  * new_con - Output parameter that contains the new request if successful
178  *
179  */
180 idm_status_t
181 idm_ini_conn_create(idm_conn_req_t *cr, idm_conn_t **new_con)
182 {
183 	idm_transport_t		*it;
184 	idm_conn_t		*ic;
185 	int			rc;
186 
187 	it = idm_transport_lookup(cr);
188 
189 retry:
190 	ic = idm_conn_create_common(CONN_TYPE_INI, it->it_type,
191 	    &cr->icr_conn_ops);
192 
193 	bcopy(&cr->cr_ini_dst_addr, &ic->ic_ini_dst_addr,
194 	    sizeof (cr->cr_ini_dst_addr));
195 
196 	/* create the transport-specific connection components */
197 	rc = it->it_ops->it_ini_conn_create(cr, ic);
198 	if (rc != IDM_STATUS_SUCCESS) {
199 		/* cleanup the failed connection */
200 		idm_conn_destroy_common(ic);
201 
202 		/*
203 		 * It is possible for an IB client to connect to
204 		 * an ethernet-only client via an IB-eth gateway.
205 		 * Therefore, if we are attempting to use iSER and
206 		 * fail, retry with sockets before ultimately
207 		 * failing the connection.
208 		 */
209 		if (it->it_type == IDM_TRANSPORT_TYPE_ISER) {
210 			it = &idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS];
211 			goto retry;
212 		}
213 
214 		return (IDM_STATUS_FAIL);
215 	}
216 
217 	*new_con = ic;
218 
219 	mutex_enter(&idm.idm_global_mutex);
220 	list_insert_tail(&idm.idm_ini_conn_list, ic);
221 	mutex_exit(&idm.idm_global_mutex);
222 
223 	return (IDM_STATUS_SUCCESS);
224 }
225 
226 /*
227  * idm_ini_conn_destroy
228  *
229  * Releases any resources associated with the connection.  This is the
230  * complement to idm_ini_conn_create.
231  * ic - idm_conn_t structure representing the relevant connection
232  *
233  */
234 void
235 idm_ini_conn_destroy_task(void *ic_void)
236 {
237 	idm_conn_t *ic = ic_void;
238 
239 	ic->ic_transport_ops->it_ini_conn_destroy(ic);
240 	idm_conn_destroy_common(ic);
241 }
242 
243 void
244 idm_ini_conn_destroy(idm_conn_t *ic)
245 {
246 	/*
247 	 * It's reasonable for the initiator to call idm_ini_conn_destroy
248 	 * from within the context of the CN_CONNECT_DESTROY notification.
249 	 * That's a problem since we want to destroy the taskq for the
250 	 * state machine associated with the connection.  Remove the
251 	 * connection from the list right away then handle the remaining
252 	 * work via the idm_global_taskq.
253 	 */
254 	mutex_enter(&idm.idm_global_mutex);
255 	list_remove(&idm.idm_ini_conn_list, ic);
256 	mutex_exit(&idm.idm_global_mutex);
257 
258 	if (taskq_dispatch(idm.idm_global_taskq,
259 	    &idm_ini_conn_destroy_task, ic, TQ_SLEEP) == TASKQID_INVALID) {
260 		cmn_err(CE_WARN,
261 		    "idm_ini_conn_destroy: Couldn't dispatch task");
262 	}
263 }
264 
265 /*
266  * idm_ini_conn_connect
267  *
268  * Establish connection to the remote system identified in idm_conn_t.
269  * The connection parameters including the remote IP address were established
270  * in the call to idm_ini_conn_create.  The IDM state machine will
271  * perform client notifications as necessary to prompt the initiator through
272  * the login process.  IDM also keeps a timer running so that if the login
273  * process doesn't complete in a timely manner it will fail.
274  *
275  * ic - idm_conn_t structure representing the relevant connection
276  *
277  * Returns success if the connection was established, otherwise some kind
278  * of meaningful error code.
279  *
280  * Upon return the login has either failed or is loggin in (ffp)
281  */
282 idm_status_t
283 idm_ini_conn_connect(idm_conn_t *ic)
284 {
285 	idm_status_t	rc;
286 
287 	rc = idm_conn_sm_init(ic);
288 	if (rc != IDM_STATUS_SUCCESS) {
289 		return (ic->ic_conn_sm_status);
290 	}
291 
292 	/* Hold connection until we return */
293 	idm_conn_hold(ic);
294 
295 	/* Kick state machine */
296 	idm_conn_event(ic, CE_CONNECT_REQ, (uintptr_t)NULL);
297 
298 	/* Wait for login flag */
299 	mutex_enter(&ic->ic_state_mutex);
300 	while (!(ic->ic_state_flags & CF_LOGIN_READY) &&
301 	    !(ic->ic_state_flags & CF_ERROR)) {
302 		cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex);
303 	}
304 
305 	/*
306 	 * The CN_READY_TO_LOGIN and/or the CN_CONNECT_FAIL call to
307 	 * idm_notify_client has already been generated by the idm conn
308 	 * state machine.  If connection fails any time after this
309 	 * check, we will detect it in iscsi_login.
310 	 */
311 	if (ic->ic_state_flags & CF_ERROR) {
312 		rc = ic->ic_conn_sm_status;
313 	}
314 	mutex_exit(&ic->ic_state_mutex);
315 	idm_conn_rele(ic);
316 
317 	return (rc);
318 }
319 
320 /*
321  * idm_ini_conn_disconnect
322  *
323  * Forces a connection (previously established using idm_ini_conn_connect)
324  * to perform a controlled shutdown, cleaning up any outstanding requests.
325  *
326  * ic - idm_conn_t structure representing the relevant connection
327  *
328  * This is asynchronous and will return before the connection is properly
329  * shutdown
330  */
331 /* ARGSUSED */
332 void
333 idm_ini_conn_disconnect(idm_conn_t *ic)
334 {
335 	idm_conn_event(ic, CE_TRANSPORT_FAIL, (uintptr_t)NULL);
336 }
337 
338 /*
339  * idm_ini_conn_disconnect_wait
340  *
341  * Forces a connection (previously established using idm_ini_conn_connect)
342  * to perform a controlled shutdown.  Blocks until the connection is
343  * disconnected.
344  *
345  * ic - idm_conn_t structure representing the relevant connection
346  */
347 /* ARGSUSED */
348 void
349 idm_ini_conn_disconnect_sync(idm_conn_t *ic)
350 {
351 	mutex_enter(&ic->ic_state_mutex);
352 	if ((ic->ic_state != CS_S9_INIT_ERROR) &&
353 	    (ic->ic_state != CS_S11_COMPLETE)) {
354 		idm_conn_event_locked(ic, CE_TRANSPORT_FAIL, (uintptr_t)NULL,
355 		    CT_NONE);
356 		while ((ic->ic_state != CS_S9_INIT_ERROR) &&
357 		    (ic->ic_state != CS_S11_COMPLETE))
358 			cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex);
359 	}
360 	mutex_exit(&ic->ic_state_mutex);
361 }
362 
363 /*
364  * idm_tgt_svc_create
365  *
366  * The target calls this service to obtain a service context for each available
367  * transport, starting a service of each type related to the IP address and port
368  * passed. The idm_svc_req_t contains the service parameters.
369  */
370 idm_status_t
371 idm_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t **new_svc)
372 {
373 	idm_transport_type_t	type;
374 	idm_transport_t		*it;
375 	idm_svc_t		*is;
376 	int			rc;
377 
378 	*new_svc = NULL;
379 	is = kmem_zalloc(sizeof (idm_svc_t), KM_SLEEP);
380 
381 	/* Initialize transport-agnostic components of the service handle */
382 	is->is_svc_req = *sr;
383 	mutex_init(&is->is_mutex, NULL, MUTEX_DEFAULT, NULL);
384 	cv_init(&is->is_cv, NULL, CV_DEFAULT, NULL);
385 	mutex_init(&is->is_count_mutex, NULL, MUTEX_DEFAULT, NULL);
386 	cv_init(&is->is_count_cv, NULL, CV_DEFAULT, NULL);
387 	idm_refcnt_init(&is->is_refcnt, is);
388 
389 	/*
390 	 * Make sure all available transports are setup.  We call this now
391 	 * instead of at initialization time in case IB has become available
392 	 * since we started (hotplug, etc).
393 	 */
394 	idm_transport_setup(sr->sr_li, B_FALSE);
395 
396 	/*
397 	 * Loop through the transports, configuring the transport-specific
398 	 * components of each one.
399 	 */
400 	for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
401 
402 		it = &idm_transport_list[type];
403 		/*
404 		 * If it_ops is NULL then the transport is unconfigured
405 		 * and we shouldn't try to start the service.
406 		 */
407 		if (it->it_ops == NULL) {
408 			continue;
409 		}
410 
411 		rc = it->it_ops->it_tgt_svc_create(sr, is);
412 		if (rc != IDM_STATUS_SUCCESS) {
413 			/* Teardown any configured services */
414 			while (type--) {
415 				it = &idm_transport_list[type];
416 				if (it->it_ops == NULL) {
417 					continue;
418 				}
419 				it->it_ops->it_tgt_svc_destroy(is);
420 			}
421 			/* Free the svc context and return */
422 			kmem_free(is, sizeof (idm_svc_t));
423 			return (rc);
424 		}
425 	}
426 
427 	*new_svc = is;
428 
429 	mutex_enter(&idm.idm_global_mutex);
430 	list_insert_tail(&idm.idm_tgt_svc_list, is);
431 	mutex_exit(&idm.idm_global_mutex);
432 
433 	return (IDM_STATUS_SUCCESS);
434 }
435 
436 /*
437  * idm_tgt_svc_destroy
438  *
439  * is - idm_svc_t returned by the call to idm_tgt_svc_create
440  *
441  * Cleanup any resources associated with the idm_svc_t.
442  */
443 void
444 idm_tgt_svc_destroy(idm_svc_t *is)
445 {
446 	idm_transport_type_t	type;
447 	idm_transport_t		*it;
448 
449 	mutex_enter(&idm.idm_global_mutex);
450 	/* remove this service from the global list */
451 	list_remove(&idm.idm_tgt_svc_list, is);
452 	/* wakeup any waiters for service change */
453 	cv_broadcast(&idm.idm_tgt_svc_cv);
454 	mutex_exit(&idm.idm_global_mutex);
455 
456 	/* teardown each transport-specific service */
457 	for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
458 		it = &idm_transport_list[type];
459 		if (it->it_ops == NULL) {
460 			continue;
461 		}
462 
463 		it->it_ops->it_tgt_svc_destroy(is);
464 	}
465 
466 	/* tear down the svc resources */
467 	idm_refcnt_destroy(&is->is_refcnt);
468 	cv_destroy(&is->is_count_cv);
469 	mutex_destroy(&is->is_count_mutex);
470 	cv_destroy(&is->is_cv);
471 	mutex_destroy(&is->is_mutex);
472 
473 	/* free the svc handle */
474 	kmem_free(is, sizeof (idm_svc_t));
475 }
476 
477 void
478 idm_tgt_svc_hold(idm_svc_t *is)
479 {
480 	idm_refcnt_hold(&is->is_refcnt);
481 }
482 
483 void
484 idm_tgt_svc_rele_and_destroy(idm_svc_t *is)
485 {
486 	idm_refcnt_rele_and_destroy(&is->is_refcnt,
487 	    (idm_refcnt_cb_t *)&idm_tgt_svc_destroy);
488 }
489 
490 /*
491  * idm_tgt_svc_online
492  *
493  * is - idm_svc_t returned by the call to idm_tgt_svc_create
494  *
495  * Online each transport service, as we want this target to be accessible
496  * via any configured transport.
497  *
498  * When the initiator establishes a new connection to the target, IDM will
499  * call the "new connect" callback defined in the idm_svc_req_t structure
500  * and it will pass an idm_conn_t structure representing that new connection.
501  */
502 idm_status_t
503 idm_tgt_svc_online(idm_svc_t *is)
504 {
505 
506 	idm_transport_type_t	type, last_type;
507 	idm_transport_t		*it;
508 	int			rc = IDM_STATUS_SUCCESS;
509 
510 	mutex_enter(&is->is_mutex);
511 	if (is->is_online == 0) {
512 		/* Walk through each of the transports and online them */
513 		for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
514 			it = &idm_transport_list[type];
515 			if (it->it_ops == NULL) {
516 				/* transport is not registered */
517 				continue;
518 			}
519 
520 			mutex_exit(&is->is_mutex);
521 			rc = it->it_ops->it_tgt_svc_online(is);
522 			mutex_enter(&is->is_mutex);
523 			if (rc != IDM_STATUS_SUCCESS) {
524 				last_type = type;
525 				break;
526 			}
527 		}
528 		if (rc != IDM_STATUS_SUCCESS) {
529 			/*
530 			 * The last transport failed to online.
531 			 * Offline any transport onlined above and
532 			 * do not online the target.
533 			 */
534 			for (type = 0; type < last_type; type++) {
535 				it = &idm_transport_list[type];
536 				if (it->it_ops == NULL) {
537 					/* transport is not registered */
538 					continue;
539 				}
540 
541 				mutex_exit(&is->is_mutex);
542 				it->it_ops->it_tgt_svc_offline(is);
543 				mutex_enter(&is->is_mutex);
544 			}
545 		} else {
546 			/* Target service now online */
547 			is->is_online = 1;
548 		}
549 	} else {
550 		/* Target service already online, just bump the count */
551 		is->is_online++;
552 	}
553 	mutex_exit(&is->is_mutex);
554 
555 	return (rc);
556 }
557 
558 /*
559  * idm_tgt_svc_offline
560  *
561  * is - idm_svc_t returned by the call to idm_tgt_svc_create
562  *
563  * Shutdown any online target services.
564  */
565 void
566 idm_tgt_svc_offline(idm_svc_t *is)
567 {
568 	idm_transport_type_t	type;
569 	idm_transport_t		*it;
570 
571 	mutex_enter(&is->is_mutex);
572 	is->is_online--;
573 	if (is->is_online == 0) {
574 		/* Walk through each of the transports and offline them */
575 		for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
576 			it = &idm_transport_list[type];
577 			if (it->it_ops == NULL) {
578 				/* transport is not registered */
579 				continue;
580 			}
581 
582 			mutex_exit(&is->is_mutex);
583 			it->it_ops->it_tgt_svc_offline(is);
584 			mutex_enter(&is->is_mutex);
585 		}
586 	}
587 	mutex_exit(&is->is_mutex);
588 }
589 
590 /*
591  * idm_tgt_svc_lookup
592  *
593  * Lookup a service instance listening on the specified port
594  */
595 
596 idm_svc_t *
597 idm_tgt_svc_lookup(uint16_t port)
598 {
599 	idm_svc_t *result;
600 
601 retry:
602 	mutex_enter(&idm.idm_global_mutex);
603 	for (result = list_head(&idm.idm_tgt_svc_list);
604 	    result != NULL;
605 	    result = list_next(&idm.idm_tgt_svc_list, result)) {
606 		if (result->is_svc_req.sr_port == port) {
607 			if (result->is_online == 0) {
608 				/*
609 				 * A service exists on this port, but it
610 				 * is going away, wait for it to cleanup.
611 				 */
612 				cv_wait(&idm.idm_tgt_svc_cv,
613 				    &idm.idm_global_mutex);
614 				mutex_exit(&idm.idm_global_mutex);
615 				goto retry;
616 			}
617 			idm_tgt_svc_hold(result);
618 			mutex_exit(&idm.idm_global_mutex);
619 			return (result);
620 		}
621 	}
622 	mutex_exit(&idm.idm_global_mutex);
623 
624 	return (NULL);
625 }
626 
627 /*
628  * idm_negotiate_key_values()
629  * Give IDM level a chance to negotiate any login parameters it should own.
630  *  -- leave unhandled parameters alone on request_nvl
631  *  -- move all handled parameters to response_nvl with an appropriate response
632  *  -- also add an entry to negotiated_nvl for any accepted parameters
633  */
634 kv_status_t
635 idm_negotiate_key_values(idm_conn_t *ic, nvlist_t *request_nvl,
636     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
637 {
638 	ASSERT(ic->ic_transport_ops != NULL);
639 	return (ic->ic_transport_ops->it_negotiate_key_values(ic,
640 	    request_nvl, response_nvl, negotiated_nvl));
641 }
642 
643 /*
644  * idm_notice_key_values()
645  * Activate at the IDM level any parameters that have been negotiated.
646  * Passes the set of key value pairs to the transport for activation.
647  * This will be invoked as the connection is entering full-feature mode.
648  */
649 void
650 idm_notice_key_values(idm_conn_t *ic, nvlist_t *negotiated_nvl)
651 {
652 	ASSERT(ic->ic_transport_ops != NULL);
653 	ic->ic_transport_ops->it_notice_key_values(ic, negotiated_nvl);
654 }
655 
656 /*
657  * idm_declare_key_values()
658  * Activate an operational set of declarative parameters from the config_nvl,
659  * and return the selected values in the outgoing_nvl.
660  */
661 kv_status_t
662 idm_declare_key_values(idm_conn_t *ic, nvlist_t *config_nvl,
663     nvlist_t *outgoing_nvl)
664 {
665 	ASSERT(ic->ic_transport_ops != NULL);
666 	return (ic->ic_transport_ops->it_declare_key_values(ic, config_nvl,
667 	    outgoing_nvl));
668 }
669 
670 /*
671  * idm_buf_tx_to_ini
672  *
673  * This is IDM's implementation of the 'Put_Data' operational primitive.
674  *
675  * This function is invoked by a target iSCSI layer to request its local
676  * Datamover layer to transmit the Data-In PDU to the peer iSCSI layer
677  * on the remote iSCSI node. The I/O buffer represented by 'idb' is
678  * transferred to the initiator associated with task 'idt'. The connection
679  * info, contents of the Data-In PDU header, the DataDescriptorIn, BHS,
680  * and the callback (idb->idb_buf_cb) at transfer completion are
681  * provided as input.
682  *
683  * This data transfer takes place transparently to the remote iSCSI layer,
684  * i.e. without its participation.
685  *
686  * Using sockets, IDM implements the data transfer by segmenting the data
687  * buffer into appropriately sized iSCSI PDUs and transmitting them to the
688  * initiator. iSER performs the transfer using RDMA write.
689  *
690  */
691 idm_status_t
692 idm_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb,
693     uint32_t offset, uint32_t xfer_len,
694     idm_buf_cb_t idb_buf_cb, void *cb_arg)
695 {
696 	idm_status_t rc;
697 
698 	idb->idb_bufoffset = offset;
699 	idb->idb_xfer_len = xfer_len;
700 	idb->idb_buf_cb = idb_buf_cb;
701 	idb->idb_cb_arg = cb_arg;
702 	gethrestime(&idb->idb_xfer_start);
703 
704 	/*
705 	 * Buffer should not contain the pattern.  If the pattern is
706 	 * present then we've been asked to transmit initialized data
707 	 */
708 	IDM_BUFPAT_CHECK(idb, xfer_len, BP_CHECK_ASSERT);
709 
710 	mutex_enter(&idt->idt_mutex);
711 	switch (idt->idt_state) {
712 	case TASK_ACTIVE:
713 		idt->idt_tx_to_ini_start++;
714 		idm_task_hold(idt);
715 		idm_buf_bind_in_locked(idt, idb);
716 		idb->idb_in_transport = B_TRUE;
717 		rc = (*idt->idt_ic->ic_transport_ops->it_buf_tx_to_ini)
718 		    (idt, idb);
719 		return (rc);
720 
721 	case TASK_SUSPENDING:
722 	case TASK_SUSPENDED:
723 		/*
724 		 * Bind buffer but don't start a transfer since the task
725 		 * is suspended
726 		 */
727 		idm_buf_bind_in_locked(idt, idb);
728 		mutex_exit(&idt->idt_mutex);
729 		return (IDM_STATUS_SUCCESS);
730 
731 	case TASK_ABORTING:
732 	case TASK_ABORTED:
733 		/*
734 		 * Once the task is aborted, any buffers added to the
735 		 * idt_inbufv will never get cleaned up, so just return
736 		 * SUCCESS.  The buffer should get cleaned up by the
737 		 * client or framework once task_aborted has completed.
738 		 */
739 		mutex_exit(&idt->idt_mutex);
740 		return (IDM_STATUS_SUCCESS);
741 
742 	default:
743 		ASSERT(0);
744 		break;
745 	}
746 	mutex_exit(&idt->idt_mutex);
747 
748 	return (IDM_STATUS_FAIL);
749 }
750 
751 /*
752  * idm_buf_rx_from_ini
753  *
754  * This is IDM's implementation of the 'Get_Data' operational primitive.
755  *
756  * This function is invoked by a target iSCSI layer to request its local
757  * Datamover layer to retrieve certain data identified by the R2T PDU from the
758  * peer iSCSI layer on the remote node. The retrieved Data-Out PDU will be
759  * mapped to the respective buffer by the task tags (ITT & TTT).
760  * The connection information, contents of an R2T PDU, DataDescriptor, BHS, and
761  * the callback (idb->idb_buf_cb) notification for data transfer completion are
762  * are provided as input.
763  *
764  * When an iSCSI node sends an R2T PDU to its local Datamover layer, the local
765  * Datamover layer, the local and remote Datamover layers transparently bring
766  * about the data transfer requested by the R2T PDU, without the participation
767  * of the iSCSI layers.
768  *
769  * Using sockets, IDM transmits an R2T PDU for each buffer and the rx_data_out()
770  * assembles the Data-Out PDUs into the buffer. iSER uses RDMA read.
771  *
772  */
773 idm_status_t
774 idm_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb,
775     uint32_t offset, uint32_t xfer_len,
776     idm_buf_cb_t idb_buf_cb, void *cb_arg)
777 {
778 	idm_status_t rc;
779 
780 	idb->idb_bufoffset = offset;
781 	idb->idb_xfer_len = xfer_len;
782 	idb->idb_buf_cb = idb_buf_cb;
783 	idb->idb_cb_arg = cb_arg;
784 	gethrestime(&idb->idb_xfer_start);
785 
786 	/*
787 	 * "In" buf list is for "Data In" PDU's, "Out" buf list is for
788 	 * "Data Out" PDU's
789 	 */
790 	mutex_enter(&idt->idt_mutex);
791 	switch (idt->idt_state) {
792 	case TASK_ACTIVE:
793 		idt->idt_rx_from_ini_start++;
794 		idm_task_hold(idt);
795 		idm_buf_bind_out_locked(idt, idb);
796 		idb->idb_in_transport = B_TRUE;
797 		rc = (*idt->idt_ic->ic_transport_ops->it_buf_rx_from_ini)
798 		    (idt, idb);
799 		return (rc);
800 	case TASK_SUSPENDING:
801 	case TASK_SUSPENDED:
802 	case TASK_ABORTING:
803 	case TASK_ABORTED:
804 		/*
805 		 * Bind buffer but don't start a transfer since the task
806 		 * is suspended
807 		 */
808 		idm_buf_bind_out_locked(idt, idb);
809 		mutex_exit(&idt->idt_mutex);
810 		return (IDM_STATUS_SUCCESS);
811 	default:
812 		ASSERT(0);
813 		break;
814 	}
815 	mutex_exit(&idt->idt_mutex);
816 
817 	return (IDM_STATUS_FAIL);
818 }
819 
820 /*
821  * idm_buf_tx_to_ini_done
822  *
823  * The transport calls this after it has completed a transfer requested by
824  * a call to transport_buf_tx_to_ini
825  *
826  * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning.
827  * idt may be freed after the call to idb->idb_buf_cb.
828  */
829 void
830 idm_buf_tx_to_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status)
831 {
832 	ASSERT(mutex_owned(&idt->idt_mutex));
833 	idb->idb_in_transport = B_FALSE;
834 	idb->idb_tx_thread = B_FALSE;
835 	idt->idt_tx_to_ini_done++;
836 	gethrestime(&idb->idb_xfer_done);
837 
838 	/*
839 	 * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or
840 	 * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes
841 	 * to 0.
842 	 */
843 	idm_task_rele(idt);
844 	idb->idb_status = status;
845 
846 	switch (idt->idt_state) {
847 	case TASK_ACTIVE:
848 		idt->idt_ic->ic_timestamp = ddi_get_lbolt();
849 		idm_buf_unbind_in_locked(idt, idb);
850 		mutex_exit(&idt->idt_mutex);
851 		(*idb->idb_buf_cb)(idb, status);
852 		return;
853 	case TASK_SUSPENDING:
854 	case TASK_SUSPENDED:
855 	case TASK_ABORTING:
856 	case TASK_ABORTED:
857 		/*
858 		 * To keep things simple we will ignore the case where the
859 		 * transfer was successful and leave all buffers bound to the
860 		 * task.  This allows us to also ignore the case where we've
861 		 * been asked to abort a task but the last transfer of the
862 		 * task has completed.  IDM has no idea whether this was, in
863 		 * fact, the last transfer of the task so it would be difficult
864 		 * to handle this case.  Everything should get sorted out again
865 		 * after task reassignment is complete.
866 		 *
867 		 * In the case of TASK_ABORTING we could conceivably call the
868 		 * buffer callback here but the timing of when the client's
869 		 * client_task_aborted callback is invoked vs. when the client's
870 		 * buffer callback gets invoked gets sticky.  We don't want
871 		 * the client to here from us again after the call to
872 		 * client_task_aborted() but we don't want to give it a bunch
873 		 * of failed buffer transfers until we've called
874 		 * client_task_aborted().  Instead we'll just leave all the
875 		 * buffers bound and allow the client to cleanup.
876 		 */
877 		break;
878 	default:
879 		ASSERT(0);
880 	}
881 	mutex_exit(&idt->idt_mutex);
882 }
883 
884 /*
885  * idm_buf_rx_from_ini_done
886  *
887  * The transport calls this after it has completed a transfer requested by
888  * a call totransport_buf_tx_to_ini
889  *
890  * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning.
891  * idt may be freed after the call to idb->idb_buf_cb.
892  */
893 void
894 idm_buf_rx_from_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status)
895 {
896 	ASSERT(mutex_owned(&idt->idt_mutex));
897 	idb->idb_in_transport = B_FALSE;
898 	idt->idt_rx_from_ini_done++;
899 	gethrestime(&idb->idb_xfer_done);
900 
901 	/*
902 	 * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or
903 	 * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes
904 	 * to 0.
905 	 */
906 	idm_task_rele(idt);
907 	idb->idb_status = status;
908 
909 	if (status == IDM_STATUS_SUCCESS) {
910 		/*
911 		 * Buffer should not contain the pattern.  If it does then
912 		 * we did not get the data from the remote host.
913 		 */
914 		IDM_BUFPAT_CHECK(idb, idb->idb_xfer_len, BP_CHECK_ASSERT);
915 	}
916 
917 	switch (idt->idt_state) {
918 	case TASK_ACTIVE:
919 		idt->idt_ic->ic_timestamp = ddi_get_lbolt();
920 		idm_buf_unbind_out_locked(idt, idb);
921 		mutex_exit(&idt->idt_mutex);
922 		(*idb->idb_buf_cb)(idb, status);
923 		return;
924 	case TASK_SUSPENDING:
925 	case TASK_SUSPENDED:
926 	case TASK_ABORTING:
927 	case TASK_ABORTED:
928 		/*
929 		 * To keep things simple we will ignore the case where the
930 		 * transfer was successful and leave all buffers bound to the
931 		 * task.  This allows us to also ignore the case where we've
932 		 * been asked to abort a task but the last transfer of the
933 		 * task has completed.  IDM has no idea whether this was, in
934 		 * fact, the last transfer of the task so it would be difficult
935 		 * to handle this case.  Everything should get sorted out again
936 		 * after task reassignment is complete.
937 		 *
938 		 * In the case of TASK_ABORTING we could conceivably call the
939 		 * buffer callback here but the timing of when the client's
940 		 * client_task_aborted callback is invoked vs. when the client's
941 		 * buffer callback gets invoked gets sticky.  We don't want
942 		 * the client to here from us again after the call to
943 		 * client_task_aborted() but we don't want to give it a bunch
944 		 * of failed buffer transfers until we've called
945 		 * client_task_aborted().  Instead we'll just leave all the
946 		 * buffers bound and allow the client to cleanup.
947 		 */
948 		break;
949 	default:
950 		ASSERT(0);
951 	}
952 	mutex_exit(&idt->idt_mutex);
953 }
954 
955 /*
956  * idm_buf_alloc
957  *
958  * Allocates a buffer handle and registers it for use with the transport
959  * layer. If a buffer is not passed on bufptr, the buffer will be allocated
960  * as well as the handle.
961  *
962  * ic		- connection on which the buffer will be transferred
963  * bufptr	- allocate memory for buffer if NULL, else assign to buffer
964  * buflen	- length of buffer
965  *
966  * Returns idm_buf_t handle if successful, otherwise NULL
967  */
968 idm_buf_t *
969 idm_buf_alloc(idm_conn_t *ic, void *bufptr, uint64_t buflen)
970 {
971 	idm_buf_t	*buf = NULL;
972 	int		rc;
973 
974 	ASSERT(ic != NULL);
975 	ASSERT(idm.idm_buf_cache != NULL);
976 	ASSERT(buflen > 0);
977 
978 	/* Don't allocate new buffers if we are not in FFP */
979 	mutex_enter(&ic->ic_state_mutex);
980 	if (!ic->ic_ffp) {
981 		mutex_exit(&ic->ic_state_mutex);
982 		return (NULL);
983 	}
984 
985 
986 	idm_conn_hold(ic);
987 	mutex_exit(&ic->ic_state_mutex);
988 
989 	buf = kmem_cache_alloc(idm.idm_buf_cache, KM_NOSLEEP);
990 	if (buf == NULL) {
991 		idm_conn_rele(ic);
992 		return (NULL);
993 	}
994 
995 	buf->idb_ic		= ic;
996 	buf->idb_buflen		= buflen;
997 	buf->idb_exp_offset	= 0;
998 	buf->idb_bufoffset	= 0;
999 	buf->idb_xfer_len	= 0;
1000 	buf->idb_magic		= IDM_BUF_MAGIC;
1001 	buf->idb_in_transport	= B_FALSE;
1002 	buf->idb_bufbcopy	= B_FALSE;
1003 
1004 	/*
1005 	 * If bufptr is NULL, we have an implicit request to allocate
1006 	 * memory for this IDM buffer handle and register it for use
1007 	 * with the transport. To simplify this, and to give more freedom
1008 	 * to the transport layer for it's own buffer management, both of
1009 	 * these actions will take place in the transport layer.
1010 	 * If bufptr is set, then the caller has allocated memory (or more
1011 	 * likely it's been passed from an upper layer), and we need only
1012 	 * register the buffer for use with the transport layer.
1013 	 */
1014 	if (bufptr == NULL) {
1015 		/*
1016 		 * Allocate a buffer from the transport layer (which
1017 		 * will also register the buffer for use).
1018 		 */
1019 		rc = ic->ic_transport_ops->it_buf_alloc(buf, buflen);
1020 		if (rc != 0) {
1021 			idm_conn_rele(ic);
1022 			kmem_cache_free(idm.idm_buf_cache, buf);
1023 			return (NULL);
1024 		}
1025 		/* Set the bufalloc'd flag */
1026 		buf->idb_bufalloc = B_TRUE;
1027 	} else {
1028 		/*
1029 		 * For large transfers, Set the passed bufptr into
1030 		 * the buf handle, and register the handle with the
1031 		 * transport layer. As memory registration with the
1032 		 * transport layer is a time/cpu intensive operation,
1033 		 * for small transfers (up to a pre-defined bcopy
1034 		 * threshold), use pre-registered memory buffers
1035 		 * and bcopy data at the appropriate time.
1036 		 */
1037 		buf->idb_buf = bufptr;
1038 
1039 		rc = ic->ic_transport_ops->it_buf_setup(buf);
1040 		if (rc != 0) {
1041 			idm_conn_rele(ic);
1042 			kmem_cache_free(idm.idm_buf_cache, buf);
1043 			return (NULL);
1044 		}
1045 		/*
1046 		 * The transport layer is now expected to set the idb_bufalloc
1047 		 * correctly to indicate if resources have been allocated.
1048 		 */
1049 	}
1050 
1051 	IDM_BUFPAT_SET(buf);
1052 
1053 	return (buf);
1054 }
1055 
1056 /*
1057  * idm_buf_free
1058  *
1059  * Release a buffer handle along with the associated buffer that was allocated
1060  * or assigned with idm_buf_alloc
1061  */
1062 void
1063 idm_buf_free(idm_buf_t *buf)
1064 {
1065 	idm_conn_t *ic = buf->idb_ic;
1066 
1067 
1068 	buf->idb_task_binding	= NULL;
1069 
1070 	if (buf->idb_bufalloc) {
1071 		ic->ic_transport_ops->it_buf_free(buf);
1072 	} else {
1073 		ic->ic_transport_ops->it_buf_teardown(buf);
1074 	}
1075 	kmem_cache_free(idm.idm_buf_cache, buf);
1076 	idm_conn_rele(ic);
1077 }
1078 
1079 /*
1080  * idm_buf_bind_in
1081  *
1082  * This function associates a buffer with a task. This is only for use by the
1083  * iSCSI initiator that will have only one buffer per transfer direction
1084  *
1085  */
1086 void
1087 idm_buf_bind_in(idm_task_t *idt, idm_buf_t *buf)
1088 {
1089 	mutex_enter(&idt->idt_mutex);
1090 	idm_buf_bind_in_locked(idt, buf);
1091 	mutex_exit(&idt->idt_mutex);
1092 }
1093 
1094 static void
1095 idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf)
1096 {
1097 	buf->idb_task_binding = idt;
1098 	buf->idb_ic = idt->idt_ic;
1099 	idm_listbuf_insert(&idt->idt_inbufv, buf);
1100 }
1101 
1102 void
1103 idm_buf_bind_out(idm_task_t *idt, idm_buf_t *buf)
1104 {
1105 	/*
1106 	 * For small transfers, the iSER transport delegates the IDM
1107 	 * layer to bcopy the SCSI Write data for faster IOPS.
1108 	 */
1109 	if (buf->idb_bufbcopy == B_TRUE) {
1110 
1111 		bcopy(buf->idb_bufptr, buf->idb_buf, buf->idb_buflen);
1112 	}
1113 	mutex_enter(&idt->idt_mutex);
1114 	idm_buf_bind_out_locked(idt, buf);
1115 	mutex_exit(&idt->idt_mutex);
1116 }
1117 
1118 static void
1119 idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf)
1120 {
1121 	buf->idb_task_binding = idt;
1122 	buf->idb_ic = idt->idt_ic;
1123 	idm_listbuf_insert(&idt->idt_outbufv, buf);
1124 }
1125 
1126 void
1127 idm_buf_unbind_in(idm_task_t *idt, idm_buf_t *buf)
1128 {
1129 	/*
1130 	 * For small transfers, the iSER transport delegates the IDM
1131 	 * layer to bcopy the SCSI Read data into the read buufer
1132 	 * for faster IOPS.
1133 	 */
1134 	if (buf->idb_bufbcopy == B_TRUE) {
1135 		bcopy(buf->idb_buf, buf->idb_bufptr, buf->idb_buflen);
1136 	}
1137 	mutex_enter(&idt->idt_mutex);
1138 	idm_buf_unbind_in_locked(idt, buf);
1139 	mutex_exit(&idt->idt_mutex);
1140 }
1141 
1142 static void
1143 idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf)
1144 {
1145 	list_remove(&idt->idt_inbufv, buf);
1146 }
1147 
1148 void
1149 idm_buf_unbind_out(idm_task_t *idt, idm_buf_t *buf)
1150 {
1151 	mutex_enter(&idt->idt_mutex);
1152 	idm_buf_unbind_out_locked(idt, buf);
1153 	mutex_exit(&idt->idt_mutex);
1154 }
1155 
1156 static void
1157 idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf)
1158 {
1159 	list_remove(&idt->idt_outbufv, buf);
1160 }
1161 
1162 /*
1163  * idm_buf_find() will lookup the idm_buf_t based on the relative offset in the
1164  * iSCSI PDU
1165  */
1166 idm_buf_t *
1167 idm_buf_find(void *lbuf, size_t data_offset)
1168 {
1169 	idm_buf_t	*idb;
1170 	list_t		*lst = (list_t *)lbuf;
1171 
1172 	/* iterate through the list to find the buffer */
1173 	for (idb = list_head(lst); idb != NULL; idb = list_next(lst, idb)) {
1174 
1175 		ASSERT((idb->idb_ic->ic_conn_type == CONN_TYPE_TGT) ||
1176 		    (idb->idb_bufoffset == 0));
1177 
1178 		if ((data_offset >= idb->idb_bufoffset) &&
1179 		    (data_offset < (idb->idb_bufoffset + idb->idb_buflen))) {
1180 
1181 			return (idb);
1182 		}
1183 	}
1184 
1185 	return (NULL);
1186 }
1187 
1188 void
1189 idm_bufpat_set(idm_buf_t *idb)
1190 {
1191 	idm_bufpat_t	*bufpat;
1192 	int		len, i;
1193 
1194 	len = idb->idb_buflen;
1195 	len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t);
1196 
1197 	bufpat = idb->idb_buf;
1198 	for (i = 0; i < len; i += sizeof (idm_bufpat_t)) {
1199 		bufpat->bufpat_idb = idb;
1200 		bufpat->bufpat_bufmagic = IDM_BUF_MAGIC;
1201 		bufpat->bufpat_offset = i;
1202 		bufpat++;
1203 	}
1204 }
1205 
1206 boolean_t
1207 idm_bufpat_check(idm_buf_t *idb, int check_len, idm_bufpat_check_type_t type)
1208 {
1209 	idm_bufpat_t	*bufpat;
1210 	int		len, i;
1211 
1212 	len = (type == BP_CHECK_QUICK) ? sizeof (idm_bufpat_t) : check_len;
1213 	len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t);
1214 	ASSERT(len <= idb->idb_buflen);
1215 	bufpat = idb->idb_buf;
1216 
1217 	/*
1218 	 * Don't check the pattern in buffers that came from outside IDM
1219 	 * (these will be buffers from the initiator that we opted not
1220 	 * to double-buffer)
1221 	 */
1222 	if (!idb->idb_bufalloc)
1223 		return (B_FALSE);
1224 
1225 	/*
1226 	 * Return true if we find the pattern anywhere in the buffer
1227 	 */
1228 	for (i = 0; i < len; i += sizeof (idm_bufpat_t)) {
1229 		if (BUFPAT_MATCH(bufpat, idb)) {
1230 			IDM_CONN_LOG(CE_WARN, "idm_bufpat_check found: "
1231 			    "idb %p bufpat %p "
1232 			    "bufpat_idb=%p bufmagic=%08x offset=%08x",
1233 			    (void *)idb, (void *)bufpat, bufpat->bufpat_idb,
1234 			    bufpat->bufpat_bufmagic, bufpat->bufpat_offset);
1235 			DTRACE_PROBE2(bufpat__pattern__found,
1236 			    idm_buf_t *, idb, idm_bufpat_t *, bufpat);
1237 			if (type == BP_CHECK_ASSERT) {
1238 				ASSERT(0);
1239 			}
1240 			return (B_TRUE);
1241 		}
1242 		bufpat++;
1243 	}
1244 
1245 	return (B_FALSE);
1246 }
1247 
1248 /*
1249  * idm_task_alloc
1250  *
1251  * This function will allocate a idm_task_t structure. A task tag is also
1252  * generated and saved in idt_tt. The task is not active.
1253  */
1254 idm_task_t *
1255 idm_task_alloc(idm_conn_t *ic)
1256 {
1257 	idm_task_t	*idt;
1258 
1259 	ASSERT(ic != NULL);
1260 
1261 	/* Don't allocate new tasks if we are not in FFP */
1262 	if (!ic->ic_ffp) {
1263 		return (NULL);
1264 	}
1265 	idt = kmem_cache_alloc(idm.idm_task_cache, KM_NOSLEEP);
1266 	if (idt == NULL) {
1267 		return (NULL);
1268 	}
1269 
1270 	ASSERT(list_is_empty(&idt->idt_inbufv));
1271 	ASSERT(list_is_empty(&idt->idt_outbufv));
1272 
1273 	mutex_enter(&ic->ic_state_mutex);
1274 	if (!ic->ic_ffp) {
1275 		mutex_exit(&ic->ic_state_mutex);
1276 		kmem_cache_free(idm.idm_task_cache, idt);
1277 		return (NULL);
1278 	}
1279 	idm_conn_hold(ic);
1280 	mutex_exit(&ic->ic_state_mutex);
1281 
1282 	idt->idt_state		= TASK_IDLE;
1283 	idt->idt_ic		= ic;
1284 	idt->idt_private	= NULL;
1285 	idt->idt_exp_datasn	= 0;
1286 	idt->idt_exp_rttsn	= 0;
1287 	idt->idt_flags		= 0;
1288 	return (idt);
1289 }
1290 
1291 /*
1292  * idm_task_start
1293  *
1294  * Mark the task active and initialize some stats. The caller
1295  * sets up the idm_task_t structure with a prior call to idm_task_alloc().
1296  * The task service does not function as a task/work engine, it is the
1297  * responsibility of the initiator to start the data transfer and free the
1298  * resources.
1299  */
1300 void
1301 idm_task_start(idm_task_t *idt, uintptr_t handle)
1302 {
1303 	ASSERT(idt != NULL);
1304 
1305 	/* mark the task as ACTIVE */
1306 	idt->idt_state = TASK_ACTIVE;
1307 	idt->idt_client_handle = handle;
1308 	idt->idt_tx_to_ini_start = idt->idt_tx_to_ini_done =
1309 	    idt->idt_rx_from_ini_start = idt->idt_rx_from_ini_done =
1310 	    idt->idt_tx_bytes = idt->idt_rx_bytes = 0;
1311 }
1312 
1313 /*
1314  * idm_task_done
1315  *
1316  * This function sets the state to indicate that the task is no longer active.
1317  */
1318 void
1319 idm_task_done(idm_task_t *idt)
1320 {
1321 	ASSERT(idt != NULL);
1322 
1323 	mutex_enter(&idt->idt_mutex);
1324 	idt->idt_state = TASK_IDLE;
1325 	mutex_exit(&idt->idt_mutex);
1326 
1327 	/*
1328 	 * Although unlikely it is possible for a reference to come in after
1329 	 * the client has decided the task is over but before we've marked
1330 	 * the task idle.  One specific unavoidable scenario is the case where
1331 	 * received PDU with the matching ITT/TTT results in a successful
1332 	 * lookup of this task.  We are at the mercy of the remote node in
1333 	 * that case so we need to handle it.  Now that the task state
1334 	 * has changed no more references will occur so a simple call to
1335 	 * idm_refcnt_wait_ref should deal with the situation.
1336 	 */
1337 	idm_refcnt_wait_ref(&idt->idt_refcnt);
1338 	idm_refcnt_reset(&idt->idt_refcnt);
1339 }
1340 
1341 /*
1342  * idm_task_free
1343  *
1344  * This function will free the Task Tag and the memory allocated for the task
1345  * idm_task_done should be called prior to this call
1346  */
1347 void
1348 idm_task_free(idm_task_t *idt)
1349 {
1350 	idm_conn_t *ic;
1351 
1352 	ASSERT(idt != NULL);
1353 	ASSERT(idt->idt_refcnt.ir_refcnt == 0);
1354 	ASSERT(idt->idt_state == TASK_IDLE);
1355 
1356 	ic = idt->idt_ic;
1357 
1358 	/*
1359 	 * It's possible for items to still be in the idt_inbufv list if
1360 	 * they were added after idm_free_task_rsrc was called.  We rely on
1361 	 * STMF to free all buffers associated with the task however STMF
1362 	 * doesn't know that we have this reference to the buffers.
1363 	 * Use list_create so that we don't end up with stale references
1364 	 * to these buffers.
1365 	 */
1366 	list_create(&idt->idt_inbufv, sizeof (idm_buf_t),
1367 	    offsetof(idm_buf_t, idb_buflink));
1368 	list_create(&idt->idt_outbufv, sizeof (idm_buf_t),
1369 	    offsetof(idm_buf_t, idb_buflink));
1370 
1371 	kmem_cache_free(idm.idm_task_cache, idt);
1372 
1373 	idm_conn_rele(ic);
1374 }
1375 
1376 /*
1377  * idm_task_find_common
1378  *	common code for idm_task_find() and idm_task_find_and_complete()
1379  */
1380 /*ARGSUSED*/
1381 static idm_task_t *
1382 idm_task_find_common(idm_conn_t *ic, uint32_t itt, uint32_t ttt,
1383     boolean_t complete)
1384 {
1385 	uint32_t	tt, client_handle;
1386 	idm_task_t	*idt;
1387 
1388 	/*
1389 	 * Must match both itt and ttt.  The table is indexed by itt
1390 	 * for initiator connections and ttt for target connections.
1391 	 */
1392 	if (IDM_CONN_ISTGT(ic)) {
1393 		tt = ttt;
1394 		client_handle = itt;
1395 	} else {
1396 		tt = itt;
1397 		client_handle = ttt;
1398 	}
1399 
1400 	rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1401 	if (tt >= idm.idm_taskid_max) {
1402 		rw_exit(&idm.idm_taskid_table_lock);
1403 		return (NULL);
1404 	}
1405 
1406 	idt = idm.idm_taskid_table[tt];
1407 
1408 	if (idt != NULL) {
1409 		mutex_enter(&idt->idt_mutex);
1410 		if ((idt->idt_state != TASK_ACTIVE) ||
1411 		    (idt->idt_ic != ic) ||
1412 		    (IDM_CONN_ISTGT(ic) &&
1413 		    (idt->idt_client_handle != client_handle))) {
1414 			/*
1415 			 * Task doesn't match or task is aborting and
1416 			 * we don't want any more references.
1417 			 */
1418 			if ((idt->idt_ic != ic) &&
1419 			    (idt->idt_state == TASK_ACTIVE) &&
1420 			    (IDM_CONN_ISINI(ic) || idt->idt_client_handle ==
1421 			    client_handle)) {
1422 				IDM_CONN_LOG(CE_WARN,
1423 				"idm_task_find: wrong connection %p != %p",
1424 				    (void *)ic, (void *)idt->idt_ic);
1425 			}
1426 			mutex_exit(&idt->idt_mutex);
1427 			rw_exit(&idm.idm_taskid_table_lock);
1428 			return (NULL);
1429 		}
1430 		idm_task_hold(idt);
1431 		/*
1432 		 * Set the task state to TASK_COMPLETE so it can no longer
1433 		 * be found or aborted.
1434 		 */
1435 		if (B_TRUE == complete)
1436 			idt->idt_state = TASK_COMPLETE;
1437 		mutex_exit(&idt->idt_mutex);
1438 	}
1439 	rw_exit(&idm.idm_taskid_table_lock);
1440 
1441 	return (idt);
1442 }
1443 
1444 /*
1445  * This function looks up a task by task tag.
1446  */
1447 idm_task_t *
1448 idm_task_find(idm_conn_t *ic, uint32_t itt, uint32_t ttt)
1449 {
1450 	return (idm_task_find_common(ic, itt, ttt, B_FALSE));
1451 }
1452 
1453 /*
1454  * This function looks up a task by task tag. If found, the task state
1455  * is atomically set to TASK_COMPLETE so it can longer be found or aborted.
1456  */
1457 idm_task_t *
1458 idm_task_find_and_complete(idm_conn_t *ic, uint32_t itt, uint32_t ttt)
1459 {
1460 	return (idm_task_find_common(ic, itt, ttt, B_TRUE));
1461 }
1462 
1463 /*
1464  * idm_task_find_by_handle
1465  *
1466  * This function looks up a task by the client-private idt_client_handle.
1467  *
1468  * This function should NEVER be called in the performance path.  It is
1469  * intended strictly for error recovery/task management.
1470  */
1471 /*ARGSUSED*/
1472 void *
1473 idm_task_find_by_handle(idm_conn_t *ic, uintptr_t handle)
1474 {
1475 	idm_task_t	*idt = NULL;
1476 	int		idx = 0;
1477 
1478 	rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1479 
1480 	for (idx = 0; idx < idm.idm_taskid_max; idx++) {
1481 		idt = idm.idm_taskid_table[idx];
1482 
1483 		if (idt == NULL)
1484 			continue;
1485 
1486 		mutex_enter(&idt->idt_mutex);
1487 
1488 		if (idt->idt_state != TASK_ACTIVE) {
1489 			/*
1490 			 * Task is either in suspend, abort, or already
1491 			 * complete.
1492 			 */
1493 			mutex_exit(&idt->idt_mutex);
1494 			continue;
1495 		}
1496 
1497 		if (idt->idt_client_handle == handle) {
1498 			idm_task_hold(idt);
1499 			mutex_exit(&idt->idt_mutex);
1500 			break;
1501 		}
1502 
1503 		mutex_exit(&idt->idt_mutex);
1504 	}
1505 
1506 	rw_exit(&idm.idm_taskid_table_lock);
1507 
1508 	if ((idt == NULL) || (idx == idm.idm_taskid_max))
1509 		return (NULL);
1510 
1511 	return (idt->idt_private);
1512 }
1513 
1514 void
1515 idm_task_hold(idm_task_t *idt)
1516 {
1517 	idm_refcnt_hold(&idt->idt_refcnt);
1518 }
1519 
1520 void
1521 idm_task_rele(idm_task_t *idt)
1522 {
1523 	idm_refcnt_rele(&idt->idt_refcnt);
1524 }
1525 
1526 void
1527 idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
1528 {
1529 	idm_task_t	*task;
1530 	int		idx;
1531 
1532 	/*
1533 	 * Passing NULL as the task indicates that all tasks
1534 	 * for this connection should be aborted.
1535 	 */
1536 	if (idt == NULL) {
1537 		/*
1538 		 * Only the connection state machine should ask for
1539 		 * all tasks to abort and this should never happen in FFP.
1540 		 */
1541 		ASSERT(!ic->ic_ffp);
1542 		rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1543 		for (idx = 0; idx < idm.idm_taskid_max; idx++) {
1544 			task = idm.idm_taskid_table[idx];
1545 			if (task == NULL)
1546 				continue;
1547 			mutex_enter(&task->idt_mutex);
1548 			if ((task->idt_state != TASK_IDLE) &&
1549 			    (task->idt_state != TASK_COMPLETE) &&
1550 			    (task->idt_ic == ic)) {
1551 				rw_exit(&idm.idm_taskid_table_lock);
1552 				idm_task_abort_one(ic, task, abort_type);
1553 				rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1554 			} else
1555 				mutex_exit(&task->idt_mutex);
1556 		}
1557 		rw_exit(&idm.idm_taskid_table_lock);
1558 	} else {
1559 		mutex_enter(&idt->idt_mutex);
1560 		idm_task_abort_one(ic, idt, abort_type);
1561 	}
1562 }
1563 
1564 static void
1565 idm_task_abort_unref_cb(void *ref)
1566 {
1567 	idm_task_t *idt = ref;
1568 
1569 	mutex_enter(&idt->idt_mutex);
1570 	switch (idt->idt_state) {
1571 	case TASK_SUSPENDING:
1572 		idt->idt_state = TASK_SUSPENDED;
1573 		mutex_exit(&idt->idt_mutex);
1574 		idm_task_aborted(idt, IDM_STATUS_SUSPENDED);
1575 		return;
1576 	case TASK_ABORTING:
1577 		idt->idt_state = TASK_ABORTED;
1578 		mutex_exit(&idt->idt_mutex);
1579 		idm_task_aborted(idt, IDM_STATUS_ABORTED);
1580 		return;
1581 	default:
1582 		mutex_exit(&idt->idt_mutex);
1583 		ASSERT(0);
1584 		break;
1585 	}
1586 }
1587 
1588 /*
1589  * Abort the idm task.
1590  *    Caller must hold the task mutex, which will be released before return
1591  */
1592 static void
1593 idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
1594 {
1595 	/* Caller must hold connection mutex */
1596 	ASSERT(mutex_owned(&idt->idt_mutex));
1597 	switch (idt->idt_state) {
1598 	case TASK_ACTIVE:
1599 		switch (abort_type) {
1600 		case AT_INTERNAL_SUSPEND:
1601 			/* Call transport to release any resources */
1602 			idt->idt_state = TASK_SUSPENDING;
1603 			mutex_exit(&idt->idt_mutex);
1604 			ic->ic_transport_ops->it_free_task_rsrc(idt);
1605 
1606 			/*
1607 			 * Wait for outstanding references.  When all
1608 			 * references are released the callback will call
1609 			 * idm_task_aborted().
1610 			 */
1611 			idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1612 			    &idm_task_abort_unref_cb);
1613 			return;
1614 		case AT_INTERNAL_ABORT:
1615 		case AT_TASK_MGMT_ABORT:
1616 			idt->idt_state = TASK_ABORTING;
1617 			mutex_exit(&idt->idt_mutex);
1618 			ic->ic_transport_ops->it_free_task_rsrc(idt);
1619 
1620 			/*
1621 			 * Wait for outstanding references.  When all
1622 			 * references are released the callback will call
1623 			 * idm_task_aborted().
1624 			 */
1625 			idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1626 			    &idm_task_abort_unref_cb);
1627 			return;
1628 		default:
1629 			ASSERT(0);
1630 		}
1631 		break;
1632 	case TASK_SUSPENDING:
1633 		/* Already called transport_free_task_rsrc(); */
1634 		switch (abort_type) {
1635 		case AT_INTERNAL_SUSPEND:
1636 			/* Already doing it */
1637 			break;
1638 		case AT_INTERNAL_ABORT:
1639 		case AT_TASK_MGMT_ABORT:
1640 			idt->idt_state = TASK_ABORTING;
1641 			break;
1642 		default:
1643 			ASSERT(0);
1644 		}
1645 		break;
1646 	case TASK_SUSPENDED:
1647 		/* Already called transport_free_task_rsrc(); */
1648 		switch (abort_type) {
1649 		case AT_INTERNAL_SUSPEND:
1650 			/* Already doing it */
1651 			break;
1652 		case AT_INTERNAL_ABORT:
1653 		case AT_TASK_MGMT_ABORT:
1654 			idt->idt_state = TASK_ABORTING;
1655 			mutex_exit(&idt->idt_mutex);
1656 
1657 			/*
1658 			 * We could probably call idm_task_aborted directly
1659 			 * here but we may be holding the conn lock. It's
1660 			 * easier to just switch contexts.  Even though
1661 			 * we shouldn't really have any references we'll
1662 			 * set the state to TASK_ABORTING instead of
1663 			 * TASK_ABORTED so we can use the same code path.
1664 			 */
1665 			idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1666 			    &idm_task_abort_unref_cb);
1667 			return;
1668 		default:
1669 			ASSERT(0);
1670 		}
1671 		break;
1672 	case TASK_ABORTING:
1673 	case TASK_ABORTED:
1674 		switch (abort_type) {
1675 		case AT_INTERNAL_SUSPEND:
1676 			/* We're already past this point... */
1677 		case AT_INTERNAL_ABORT:
1678 		case AT_TASK_MGMT_ABORT:
1679 			/* Already doing it */
1680 			break;
1681 		default:
1682 			ASSERT(0);
1683 		}
1684 		break;
1685 	case TASK_COMPLETE:
1686 		/*
1687 		 * In this case, let it go.  The status has already been
1688 		 * sent (which may or may not get successfully transmitted)
1689 		 * and we don't want to end up in a race between completing
1690 		 * the status PDU and marking the task suspended.
1691 		 */
1692 		break;
1693 	default:
1694 		ASSERT(0);
1695 	}
1696 	mutex_exit(&idt->idt_mutex);
1697 }
1698 
1699 static void
1700 idm_task_aborted(idm_task_t *idt, idm_status_t status)
1701 {
1702 	(*idt->idt_ic->ic_conn_ops.icb_task_aborted)(idt, status);
1703 }
1704 
1705 /*
1706  * idm_pdu_tx
1707  *
1708  * This is IDM's implementation of the 'Send_Control' operational primitive.
1709  * This function is invoked by an initiator iSCSI layer requesting the transfer
1710  * of a iSCSI command PDU or a target iSCSI layer requesting the transfer of a
1711  * iSCSI response PDU. The PDU will be transmitted as-is by the local Datamover
1712  * layer to the peer iSCSI layer in the remote iSCSI node. The connection info
1713  * and iSCSI PDU-specific qualifiers namely BHS, AHS, DataDescriptor and Size
1714  * are provided as input.
1715  *
1716  */
1717 void
1718 idm_pdu_tx(idm_pdu_t *pdu)
1719 {
1720 	idm_conn_t		*ic = pdu->isp_ic;
1721 	iscsi_async_evt_hdr_t	*async_evt;
1722 
1723 	/*
1724 	 * If we are in full-featured mode then route SCSI-related
1725 	 * commands to the appropriate function vector without checking
1726 	 * the connection state.  We will only be in full-feature mode
1727 	 * when we are in an acceptable state for SCSI PDU's.
1728 	 *
1729 	 * We also need to ensure that there are no PDU events outstanding
1730 	 * on the state machine.  Any non-SCSI PDU's received in full-feature
1731 	 * mode will result in PDU events and until these have been handled
1732 	 * we need to route all PDU's through the state machine as PDU
1733 	 * events to maintain ordering.
1734 	 *
1735 	 * Note that IDM cannot enter FFP mode until it processes in
1736 	 * its state machine the last xmit of the login process.
1737 	 * Hence, checking the IDM_PDU_LOGIN_TX flag here would be
1738 	 * superfluous.
1739 	 */
1740 	mutex_enter(&ic->ic_state_mutex);
1741 	if (ic->ic_ffp && (ic->ic_pdu_events == 0)) {
1742 		mutex_exit(&ic->ic_state_mutex);
1743 		switch (IDM_PDU_OPCODE(pdu)) {
1744 		case ISCSI_OP_SCSI_RSP:
1745 			/* Target only */
1746 			DTRACE_ISCSI_2(scsi__response, idm_conn_t *, ic,
1747 			    iscsi_scsi_rsp_hdr_t *,
1748 			    (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr);
1749 			idm_pdu_tx_forward(ic, pdu);
1750 			return;
1751 		case ISCSI_OP_SCSI_TASK_MGT_RSP:
1752 			/* Target only */
1753 			DTRACE_ISCSI_2(task__response, idm_conn_t *, ic,
1754 			    iscsi_text_rsp_hdr_t *,
1755 			    (iscsi_text_rsp_hdr_t *)pdu->isp_hdr);
1756 			idm_pdu_tx_forward(ic, pdu);
1757 			return;
1758 		case ISCSI_OP_SCSI_DATA_RSP:
1759 			/* Target only */
1760 			DTRACE_ISCSI_2(data__send, idm_conn_t *, ic,
1761 			    iscsi_data_rsp_hdr_t *,
1762 			    (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
1763 			idm_pdu_tx_forward(ic, pdu);
1764 			return;
1765 		case ISCSI_OP_RTT_RSP:
1766 			/* Target only */
1767 			DTRACE_ISCSI_2(data__request, idm_conn_t *, ic,
1768 			    iscsi_rtt_hdr_t *,
1769 			    (iscsi_rtt_hdr_t *)pdu->isp_hdr);
1770 			idm_pdu_tx_forward(ic, pdu);
1771 			return;
1772 		case ISCSI_OP_NOOP_IN:
1773 			/* Target only */
1774 			DTRACE_ISCSI_2(nop__send, idm_conn_t *, ic,
1775 			    iscsi_nop_in_hdr_t *,
1776 			    (iscsi_nop_in_hdr_t *)pdu->isp_hdr);
1777 			idm_pdu_tx_forward(ic, pdu);
1778 			return;
1779 		case ISCSI_OP_TEXT_RSP:
1780 			/* Target only */
1781 			DTRACE_ISCSI_2(text__response, idm_conn_t *, ic,
1782 			    iscsi_text_rsp_hdr_t *,
1783 			    (iscsi_text_rsp_hdr_t *)pdu->isp_hdr);
1784 			idm_pdu_tx_forward(ic, pdu);
1785 			return;
1786 		case ISCSI_OP_TEXT_CMD:
1787 		case ISCSI_OP_NOOP_OUT:
1788 		case ISCSI_OP_SCSI_CMD:
1789 		case ISCSI_OP_SCSI_DATA:
1790 		case ISCSI_OP_SCSI_TASK_MGT_MSG:
1791 			/* Initiator only */
1792 			idm_pdu_tx_forward(ic, pdu);
1793 			return;
1794 		default:
1795 			break;
1796 		}
1797 
1798 		mutex_enter(&ic->ic_state_mutex);
1799 	}
1800 
1801 	/*
1802 	 * Any PDU's processed outside of full-feature mode and non-SCSI
1803 	 * PDU's in full-feature mode are handled by generating an
1804 	 * event to the connection state machine.  The state machine
1805 	 * will validate the PDU against the current state and either
1806 	 * transmit the PDU if the opcode is allowed or handle an
1807 	 * error if the PDU is not allowed.
1808 	 *
1809 	 * This code-path will also generate any events that are implied
1810 	 * by the PDU opcode.  For example a "login response" with success
1811 	 * status generates a CE_LOGOUT_SUCCESS_SND event.
1812 	 */
1813 	switch (IDM_PDU_OPCODE(pdu)) {
1814 	case ISCSI_OP_LOGIN_CMD:
1815 		idm_conn_tx_pdu_event(ic, CE_LOGIN_SND, (uintptr_t)pdu);
1816 		break;
1817 	case ISCSI_OP_LOGIN_RSP:
1818 		DTRACE_ISCSI_2(login__response, idm_conn_t *, ic,
1819 		    iscsi_login_rsp_hdr_t *,
1820 		    (iscsi_login_rsp_hdr_t *)pdu->isp_hdr);
1821 		idm_parse_login_rsp(ic, pdu, /* Is RX */ B_FALSE);
1822 		break;
1823 	case ISCSI_OP_LOGOUT_CMD:
1824 		idm_parse_logout_req(ic, pdu, /* Is RX */ B_FALSE);
1825 		break;
1826 	case ISCSI_OP_LOGOUT_RSP:
1827 		DTRACE_ISCSI_2(logout__response, idm_conn_t *, ic,
1828 		    iscsi_logout_rsp_hdr_t *,
1829 		    (iscsi_logout_rsp_hdr_t *)pdu->isp_hdr);
1830 		idm_parse_logout_rsp(ic, pdu, /* Is RX */ B_FALSE);
1831 		break;
1832 	case ISCSI_OP_ASYNC_EVENT:
1833 		DTRACE_ISCSI_2(async__send, idm_conn_t *, ic,
1834 		    iscsi_async_evt_hdr_t *,
1835 		    (iscsi_async_evt_hdr_t *)pdu->isp_hdr);
1836 		async_evt = (iscsi_async_evt_hdr_t *)pdu->isp_hdr;
1837 		switch (async_evt->async_event) {
1838 		case ISCSI_ASYNC_EVENT_REQUEST_LOGOUT:
1839 			idm_conn_tx_pdu_event(ic, CE_ASYNC_LOGOUT_SND,
1840 			    (uintptr_t)pdu);
1841 			break;
1842 		case ISCSI_ASYNC_EVENT_DROPPING_CONNECTION:
1843 			idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_CONN_SND,
1844 			    (uintptr_t)pdu);
1845 			break;
1846 		case ISCSI_ASYNC_EVENT_DROPPING_ALL_CONNECTIONS:
1847 			idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_ALL_CONN_SND,
1848 			    (uintptr_t)pdu);
1849 			break;
1850 		case ISCSI_ASYNC_EVENT_SCSI_EVENT:
1851 		case ISCSI_ASYNC_EVENT_PARAM_NEGOTIATION:
1852 		default:
1853 			idm_conn_tx_pdu_event(ic, CE_MISC_TX,
1854 			    (uintptr_t)pdu);
1855 			break;
1856 		}
1857 		break;
1858 	case ISCSI_OP_SCSI_RSP:
1859 		/* Target only */
1860 		DTRACE_ISCSI_2(scsi__response, idm_conn_t *, ic,
1861 		    iscsi_scsi_rsp_hdr_t *,
1862 		    (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr);
1863 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1864 		break;
1865 	case ISCSI_OP_SCSI_TASK_MGT_RSP:
1866 		/* Target only */
1867 		DTRACE_ISCSI_2(task__response, idm_conn_t *, ic,
1868 		    iscsi_scsi_task_mgt_rsp_hdr_t *,
1869 		    (iscsi_scsi_task_mgt_rsp_hdr_t *)pdu->isp_hdr);
1870 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1871 		break;
1872 	case ISCSI_OP_SCSI_DATA_RSP:
1873 		/* Target only */
1874 		DTRACE_ISCSI_2(data__send, idm_conn_t *, ic,
1875 		    iscsi_data_rsp_hdr_t *,
1876 		    (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
1877 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1878 		break;
1879 	case ISCSI_OP_RTT_RSP:
1880 		/* Target only */
1881 		DTRACE_ISCSI_2(data__request, idm_conn_t *, ic,
1882 		    iscsi_rtt_hdr_t *,
1883 		    (iscsi_rtt_hdr_t *)pdu->isp_hdr);
1884 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1885 		break;
1886 	case ISCSI_OP_NOOP_IN:
1887 		/* Target only */
1888 		DTRACE_ISCSI_2(nop__send, idm_conn_t *, ic,
1889 		    iscsi_nop_in_hdr_t *,
1890 		    (iscsi_nop_in_hdr_t *)pdu->isp_hdr);
1891 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1892 		break;
1893 	case ISCSI_OP_TEXT_RSP:
1894 		/* Target only */
1895 		DTRACE_ISCSI_2(text__response, idm_conn_t *, ic,
1896 		    iscsi_text_rsp_hdr_t *,
1897 		    (iscsi_text_rsp_hdr_t *)pdu->isp_hdr);
1898 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1899 		break;
1900 		/* Initiator only */
1901 	case ISCSI_OP_SCSI_CMD:
1902 	case ISCSI_OP_SCSI_TASK_MGT_MSG:
1903 	case ISCSI_OP_SCSI_DATA:
1904 	case ISCSI_OP_NOOP_OUT:
1905 	case ISCSI_OP_TEXT_CMD:
1906 	case ISCSI_OP_SNACK_CMD:
1907 	case ISCSI_OP_REJECT_MSG:
1908 	default:
1909 		/*
1910 		 * Connection state machine will validate these PDU's against
1911 		 * the current state.  A PDU not allowed in the current
1912 		 * state will cause a protocol error.
1913 		 */
1914 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1915 		break;
1916 	}
1917 	mutex_exit(&ic->ic_state_mutex);
1918 }
1919 
1920 /*
1921  * Common allocation of a PDU along with memory for header and data.
1922  */
1923 static idm_pdu_t *
1924 idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen, int sleepflag)
1925 {
1926 	idm_pdu_t *result;
1927 
1928 	/*
1929 	 * IDM clients should cache these structures for performance
1930 	 * critical paths.  We can't cache effectively in IDM because we
1931 	 * don't know the correct header and data size.
1932 	 *
1933 	 * Valid header length is assumed to be hdrlen and valid data
1934 	 * length is assumed to be datalen.  isp_hdrlen and isp_datalen
1935 	 * can be adjusted after the PDU is returned if necessary.
1936 	 */
1937 	result = kmem_zalloc(sizeof (idm_pdu_t) + hdrlen + datalen, sleepflag);
1938 	if (result != NULL) {
1939 		/* For idm_pdu_free sanity check */
1940 		result->isp_flags |= IDM_PDU_ALLOC;
1941 		/* pointer arithmetic */
1942 		result->isp_hdr = (iscsi_hdr_t *)(result + 1);
1943 		result->isp_hdrlen = hdrlen;
1944 		result->isp_hdrbuflen = hdrlen;
1945 		result->isp_transport_hdrlen = 0;
1946 		if (datalen != 0)
1947 			result->isp_data = (uint8_t *)result->isp_hdr + hdrlen;
1948 		result->isp_datalen = datalen;
1949 		result->isp_databuflen = datalen;
1950 		result->isp_magic = IDM_PDU_MAGIC;
1951 	}
1952 
1953 	return (result);
1954 }
1955 
1956 /*
1957  * Typical idm_pdu_alloc invocation, will block for resources.
1958  */
1959 idm_pdu_t *
1960 idm_pdu_alloc(uint_t hdrlen, uint_t datalen)
1961 {
1962 	return (idm_pdu_alloc_common(hdrlen, datalen, KM_SLEEP));
1963 }
1964 
1965 /*
1966  * Non-blocking idm_pdu_alloc implementation, returns NULL if resources
1967  * are not available.  Needed for transport-layer allocations which may
1968  * be invoking in interrupt context.
1969  */
1970 idm_pdu_t *
1971 idm_pdu_alloc_nosleep(uint_t hdrlen, uint_t datalen)
1972 {
1973 	return (idm_pdu_alloc_common(hdrlen, datalen, KM_NOSLEEP));
1974 }
1975 
1976 /*
1977  * Free a PDU previously allocated with idm_pdu_alloc() including any
1978  * header and data space allocated as part of the original request.
1979  * Additional memory regions referenced by subsequent modification of
1980  * the isp_hdr and/or isp_data fields will not be freed.
1981  */
1982 void
1983 idm_pdu_free(idm_pdu_t *pdu)
1984 {
1985 	/* Make sure the structure was allocated using idm_pdu_alloc() */
1986 	ASSERT(pdu->isp_flags & IDM_PDU_ALLOC);
1987 	kmem_free(pdu,
1988 	    sizeof (idm_pdu_t) + pdu->isp_hdrbuflen + pdu->isp_databuflen);
1989 }
1990 
1991 /*
1992  * Initialize the connection, private and callback fields in a PDU.
1993  */
1994 void
1995 idm_pdu_init(idm_pdu_t *pdu, idm_conn_t *ic, void *private, idm_pdu_cb_t *cb)
1996 {
1997 	/*
1998 	 * idm_pdu_complete() will call idm_pdu_free if the callback is
1999 	 * NULL.  This will only work if the PDU was originally allocated
2000 	 * with idm_pdu_alloc().
2001 	 */
2002 	ASSERT((pdu->isp_flags & IDM_PDU_ALLOC) ||
2003 	    (cb != NULL));
2004 	pdu->isp_magic = IDM_PDU_MAGIC;
2005 	pdu->isp_ic = ic;
2006 	pdu->isp_private = private;
2007 	pdu->isp_callback = cb;
2008 }
2009 
2010 /*
2011  * Initialize the header and header length field.  This function should
2012  * not be used to adjust the header length in a buffer allocated via
2013  * pdu_pdu_alloc since it overwrites the existing header pointer.
2014  */
2015 void
2016 idm_pdu_init_hdr(idm_pdu_t *pdu, uint8_t *hdr, uint_t hdrlen)
2017 {
2018 	pdu->isp_hdr = (iscsi_hdr_t *)((void *)hdr);
2019 	pdu->isp_hdrlen = hdrlen;
2020 }
2021 
2022 /*
2023  * Initialize the data and data length fields.  This function should
2024  * not be used to adjust the data length of a buffer allocated via
2025  * idm_pdu_alloc since it overwrites the existing data pointer.
2026  */
2027 void
2028 idm_pdu_init_data(idm_pdu_t *pdu, uint8_t *data, uint_t datalen)
2029 {
2030 	pdu->isp_data = data;
2031 	pdu->isp_datalen = datalen;
2032 }
2033 
2034 void
2035 idm_pdu_complete(idm_pdu_t *pdu, idm_status_t status)
2036 {
2037 	if (pdu->isp_callback) {
2038 		pdu->isp_status = status;
2039 		(*pdu->isp_callback)(pdu, status);
2040 	} else {
2041 		idm_pdu_free(pdu);
2042 	}
2043 }
2044 
2045 /*
2046  * State machine auditing
2047  */
2048 
2049 void
2050 idm_sm_audit_init(sm_audit_buf_t *audit_buf)
2051 {
2052 	bzero(audit_buf, sizeof (sm_audit_buf_t));
2053 	audit_buf->sab_max_index = SM_AUDIT_BUF_MAX_REC - 1;
2054 }
2055 
2056 static
2057 sm_audit_record_t *
2058 idm_sm_audit_common(sm_audit_buf_t *audit_buf, sm_audit_record_type_t r_type,
2059     sm_audit_sm_type_t sm_type,
2060     int current_state)
2061 {
2062 	sm_audit_record_t *sar;
2063 
2064 	sar = audit_buf->sab_records;
2065 	sar += audit_buf->sab_index;
2066 	audit_buf->sab_index++;
2067 	audit_buf->sab_index &= audit_buf->sab_max_index;
2068 
2069 	sar->sar_type = r_type;
2070 	gethrestime(&sar->sar_timestamp);
2071 	sar->sar_sm_type = sm_type;
2072 	sar->sar_state = current_state;
2073 
2074 	return (sar);
2075 }
2076 
2077 void
2078 idm_sm_audit_event(sm_audit_buf_t *audit_buf,
2079     sm_audit_sm_type_t sm_type, int current_state,
2080     int event, uintptr_t event_info)
2081 {
2082 	sm_audit_record_t *sar;
2083 
2084 	sar = idm_sm_audit_common(audit_buf, SAR_STATE_EVENT,
2085 	    sm_type, current_state);
2086 	sar->sar_event = event;
2087 	sar->sar_event_info = event_info;
2088 }
2089 
2090 void
2091 idm_sm_audit_state_change(sm_audit_buf_t *audit_buf,
2092     sm_audit_sm_type_t sm_type, int current_state, int new_state)
2093 {
2094 	sm_audit_record_t *sar;
2095 
2096 	sar = idm_sm_audit_common(audit_buf, SAR_STATE_CHANGE,
2097 	    sm_type, current_state);
2098 	sar->sar_new_state = new_state;
2099 }
2100 
2101 
2102 /*
2103  * Object reference tracking
2104  */
2105 
2106 void
2107 idm_refcnt_init(idm_refcnt_t *refcnt, void *referenced_obj)
2108 {
2109 	bzero(refcnt, sizeof (*refcnt));
2110 	idm_refcnt_reset(refcnt);
2111 	refcnt->ir_referenced_obj = referenced_obj;
2112 	bzero(&refcnt->ir_audit_buf, sizeof (refcnt_audit_buf_t));
2113 	refcnt->ir_audit_buf.anb_max_index = REFCNT_AUDIT_BUF_MAX_REC - 1;
2114 	mutex_init(&refcnt->ir_mutex, NULL, MUTEX_DEFAULT, NULL);
2115 	cv_init(&refcnt->ir_cv, NULL, CV_DEFAULT, NULL);
2116 }
2117 
2118 void
2119 idm_refcnt_destroy(idm_refcnt_t *refcnt)
2120 {
2121 	/*
2122 	 * Grab the mutex to there are no other lingering threads holding
2123 	 * the mutex before we destroy it (e.g. idm_refcnt_rele just after
2124 	 * the refcnt goes to zero if ir_waiting == REF_WAIT_ASYNC)
2125 	 */
2126 	mutex_enter(&refcnt->ir_mutex);
2127 	ASSERT(refcnt->ir_refcnt == 0);
2128 	cv_destroy(&refcnt->ir_cv);
2129 	mutex_destroy(&refcnt->ir_mutex);
2130 }
2131 
2132 void
2133 idm_refcnt_reset(idm_refcnt_t *refcnt)
2134 {
2135 	refcnt->ir_waiting = REF_NOWAIT;
2136 	refcnt->ir_refcnt = 0;
2137 }
2138 
2139 void
2140 idm_refcnt_hold(idm_refcnt_t *refcnt)
2141 {
2142 	/*
2143 	 * Nothing should take a hold on an object after a call to
2144 	 * idm_refcnt_wait_ref or idm_refcnd_async_wait_ref
2145 	 */
2146 	ASSERT(refcnt->ir_waiting == REF_NOWAIT);
2147 
2148 	mutex_enter(&refcnt->ir_mutex);
2149 	refcnt->ir_refcnt++;
2150 	REFCNT_AUDIT(refcnt);
2151 	mutex_exit(&refcnt->ir_mutex);
2152 }
2153 
2154 static void
2155 idm_refcnt_unref_task(void *refcnt_void)
2156 {
2157 	idm_refcnt_t *refcnt = refcnt_void;
2158 
2159 	REFCNT_AUDIT(refcnt);
2160 	(*refcnt->ir_cb)(refcnt->ir_referenced_obj);
2161 }
2162 
2163 void
2164 idm_refcnt_rele(idm_refcnt_t *refcnt)
2165 {
2166 	mutex_enter(&refcnt->ir_mutex);
2167 	ASSERT(refcnt->ir_refcnt > 0);
2168 	refcnt->ir_refcnt--;
2169 	REFCNT_AUDIT(refcnt);
2170 	if (refcnt->ir_waiting == REF_NOWAIT) {
2171 		/* No one is waiting on this object */
2172 		mutex_exit(&refcnt->ir_mutex);
2173 		return;
2174 	}
2175 
2176 	/*
2177 	 * Someone is waiting for this object to go idle so check if
2178 	 * refcnt is 0.  Waiting on an object then later grabbing another
2179 	 * reference is not allowed so we don't need to handle that case.
2180 	 */
2181 	if (refcnt->ir_refcnt == 0) {
2182 		if (refcnt->ir_waiting == REF_WAIT_ASYNC) {
2183 			if (taskq_dispatch(idm.idm_global_taskq,
2184 			    &idm_refcnt_unref_task, refcnt, TQ_SLEEP) ==
2185 			    TASKQID_INVALID) {
2186 				cmn_err(CE_WARN,
2187 				    "idm_refcnt_rele: Couldn't dispatch task");
2188 			}
2189 		} else if (refcnt->ir_waiting == REF_WAIT_SYNC) {
2190 			cv_signal(&refcnt->ir_cv);
2191 		}
2192 	}
2193 	mutex_exit(&refcnt->ir_mutex);
2194 }
2195 
2196 void
2197 idm_refcnt_rele_and_destroy(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func)
2198 {
2199 	mutex_enter(&refcnt->ir_mutex);
2200 	ASSERT(refcnt->ir_refcnt > 0);
2201 	refcnt->ir_refcnt--;
2202 	REFCNT_AUDIT(refcnt);
2203 
2204 	/*
2205 	 * Someone is waiting for this object to go idle so check if
2206 	 * refcnt is 0.  Waiting on an object then later grabbing another
2207 	 * reference is not allowed so we don't need to handle that case.
2208 	 */
2209 	if (refcnt->ir_refcnt == 0) {
2210 		refcnt->ir_cb = cb_func;
2211 		refcnt->ir_waiting = REF_WAIT_ASYNC;
2212 		if (taskq_dispatch(idm.idm_global_taskq,
2213 		    &idm_refcnt_unref_task, refcnt, TQ_SLEEP) ==
2214 		    TASKQID_INVALID) {
2215 			cmn_err(CE_WARN,
2216 			    "idm_refcnt_rele: Couldn't dispatch task");
2217 		}
2218 	}
2219 	mutex_exit(&refcnt->ir_mutex);
2220 }
2221 
2222 void
2223 idm_refcnt_wait_ref(idm_refcnt_t *refcnt)
2224 {
2225 	mutex_enter(&refcnt->ir_mutex);
2226 	refcnt->ir_waiting = REF_WAIT_SYNC;
2227 	REFCNT_AUDIT(refcnt);
2228 	while (refcnt->ir_refcnt != 0)
2229 		cv_wait(&refcnt->ir_cv, &refcnt->ir_mutex);
2230 	mutex_exit(&refcnt->ir_mutex);
2231 }
2232 
2233 void
2234 idm_refcnt_async_wait_ref(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func)
2235 {
2236 	mutex_enter(&refcnt->ir_mutex);
2237 	refcnt->ir_waiting = REF_WAIT_ASYNC;
2238 	refcnt->ir_cb = cb_func;
2239 	REFCNT_AUDIT(refcnt);
2240 	/*
2241 	 * It's possible we don't have any references.  To make things easier
2242 	 * on the caller use a taskq to call the callback instead of
2243 	 * calling it synchronously
2244 	 */
2245 	if (refcnt->ir_refcnt == 0) {
2246 		if (taskq_dispatch(idm.idm_global_taskq,
2247 		    &idm_refcnt_unref_task, refcnt, TQ_SLEEP) ==
2248 		    TASKQID_INVALID) {
2249 			cmn_err(CE_WARN,
2250 			    "idm_refcnt_async_wait_ref: "
2251 			    "Couldn't dispatch task");
2252 		}
2253 	}
2254 	mutex_exit(&refcnt->ir_mutex);
2255 }
2256 
2257 void
2258 idm_refcnt_destroy_unref_obj(idm_refcnt_t *refcnt,
2259     idm_refcnt_cb_t *cb_func)
2260 {
2261 	mutex_enter(&refcnt->ir_mutex);
2262 	if (refcnt->ir_refcnt == 0) {
2263 		mutex_exit(&refcnt->ir_mutex);
2264 		(*cb_func)(refcnt->ir_referenced_obj);
2265 		return;
2266 	}
2267 	mutex_exit(&refcnt->ir_mutex);
2268 }
2269 
2270 void
2271 idm_conn_hold(idm_conn_t *ic)
2272 {
2273 	idm_refcnt_hold(&ic->ic_refcnt);
2274 }
2275 
2276 void
2277 idm_conn_rele(idm_conn_t *ic)
2278 {
2279 	idm_refcnt_rele(&ic->ic_refcnt);
2280 }
2281 
2282 void
2283 idm_conn_set_target_name(idm_conn_t *ic, char *target_name)
2284 {
2285 	(void) strlcpy(ic->ic_target_name, target_name, ISCSI_MAX_NAME_LEN + 1);
2286 }
2287 
2288 void
2289 idm_conn_set_initiator_name(idm_conn_t *ic, char *initiator_name)
2290 {
2291 	(void) strlcpy(ic->ic_initiator_name, initiator_name,
2292 	    ISCSI_MAX_NAME_LEN + 1);
2293 }
2294 
2295 void
2296 idm_conn_set_isid(idm_conn_t *ic, uint8_t isid[ISCSI_ISID_LEN])
2297 {
2298 	(void) snprintf(ic->ic_isid, ISCSI_MAX_ISID_LEN + 1,
2299 	    "%02x%02x%02x%02x%02x%02x",
2300 	    isid[0], isid[1], isid[2], isid[3], isid[4], isid[5]);
2301 }
2302 
2303 static int
2304 _idm_init(void)
2305 {
2306 	/* Initialize the rwlock for the taskid table */
2307 	rw_init(&idm.idm_taskid_table_lock, NULL, RW_DRIVER, NULL);
2308 
2309 	/* Initialize the global mutex and taskq */
2310 	mutex_init(&idm.idm_global_mutex, NULL, MUTEX_DEFAULT, NULL);
2311 
2312 	cv_init(&idm.idm_tgt_svc_cv, NULL, CV_DEFAULT, NULL);
2313 	cv_init(&idm.idm_wd_cv, NULL, CV_DEFAULT, NULL);
2314 
2315 	/*
2316 	 * The maximum allocation needs to be high here since there can be
2317 	 * many concurrent tasks using the global taskq.
2318 	 */
2319 	idm.idm_global_taskq = taskq_create("idm_global_taskq", 1, minclsyspri,
2320 	    128, 16384, TASKQ_PREPOPULATE);
2321 	if (idm.idm_global_taskq == NULL) {
2322 		cv_destroy(&idm.idm_wd_cv);
2323 		cv_destroy(&idm.idm_tgt_svc_cv);
2324 		mutex_destroy(&idm.idm_global_mutex);
2325 		rw_destroy(&idm.idm_taskid_table_lock);
2326 		return (ENOMEM);
2327 	}
2328 
2329 	/* Start watchdog thread */
2330 	idm.idm_wd_thread = thread_create(NULL, 0,
2331 	    idm_wd_thread, NULL, 0, &p0, TS_RUN, minclsyspri);
2332 	if (idm.idm_wd_thread == NULL) {
2333 		/* Couldn't create the watchdog thread */
2334 		taskq_destroy(idm.idm_global_taskq);
2335 		cv_destroy(&idm.idm_wd_cv);
2336 		cv_destroy(&idm.idm_tgt_svc_cv);
2337 		mutex_destroy(&idm.idm_global_mutex);
2338 		rw_destroy(&idm.idm_taskid_table_lock);
2339 		return (ENOMEM);
2340 	}
2341 
2342 	/* Pause until the watchdog thread is running */
2343 	mutex_enter(&idm.idm_global_mutex);
2344 	while (!idm.idm_wd_thread_running)
2345 		cv_wait(&idm.idm_wd_cv, &idm.idm_global_mutex);
2346 	mutex_exit(&idm.idm_global_mutex);
2347 
2348 	/*
2349 	 * Allocate the task ID table and set "next" to 0.
2350 	 */
2351 
2352 	idm.idm_taskid_max = idm_max_taskids;
2353 	idm.idm_taskid_table = (idm_task_t **)
2354 	    kmem_zalloc(idm.idm_taskid_max * sizeof (idm_task_t *), KM_SLEEP);
2355 	idm.idm_taskid_next = 0;
2356 
2357 	/* Create the global buffer and task kmem caches */
2358 	idm.idm_buf_cache = kmem_cache_create("idm_buf_cache",
2359 	    sizeof (idm_buf_t), 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
2360 
2361 	/*
2362 	 * Note, we're explicitly allocating an additional iSER header-
2363 	 * sized chunk for each of these elements. See idm_task_constructor().
2364 	 */
2365 	idm.idm_task_cache = kmem_cache_create("idm_task_cache",
2366 	    sizeof (idm_task_t) + IDM_TRANSPORT_HEADER_LENGTH, 8,
2367 	    &idm_task_constructor, &idm_task_destructor,
2368 	    NULL, NULL, NULL, KM_SLEEP);
2369 
2370 	/* Create the service and connection context lists */
2371 	list_create(&idm.idm_tgt_svc_list, sizeof (idm_svc_t),
2372 	    offsetof(idm_svc_t, is_list_node));
2373 	list_create(&idm.idm_tgt_conn_list, sizeof (idm_conn_t),
2374 	    offsetof(idm_conn_t, ic_list_node));
2375 	list_create(&idm.idm_ini_conn_list, sizeof (idm_conn_t),
2376 	    offsetof(idm_conn_t, ic_list_node));
2377 
2378 	/* Initialize the native sockets transport */
2379 	idm_so_init(&idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS]);
2380 
2381 	/* Create connection ID pool */
2382 	(void) idm_idpool_create(&idm.idm_conn_id_pool);
2383 
2384 	return (DDI_SUCCESS);
2385 }
2386 
2387 static int
2388 _idm_fini(void)
2389 {
2390 	if (!list_is_empty(&idm.idm_ini_conn_list) ||
2391 	    !list_is_empty(&idm.idm_tgt_conn_list) ||
2392 	    !list_is_empty(&idm.idm_tgt_svc_list)) {
2393 		return (EBUSY);
2394 	}
2395 
2396 	mutex_enter(&idm.idm_global_mutex);
2397 	idm.idm_wd_thread_running = B_FALSE;
2398 	cv_signal(&idm.idm_wd_cv);
2399 	mutex_exit(&idm.idm_global_mutex);
2400 
2401 	thread_join(idm.idm_wd_thread_did);
2402 
2403 	idm_idpool_destroy(&idm.idm_conn_id_pool);
2404 
2405 	/* Close any LDI handles we have open on transport drivers */
2406 	mutex_enter(&idm.idm_global_mutex);
2407 	idm_transport_teardown();
2408 	mutex_exit(&idm.idm_global_mutex);
2409 
2410 	/* Teardown the native sockets transport */
2411 	idm_so_fini();
2412 
2413 	list_destroy(&idm.idm_ini_conn_list);
2414 	list_destroy(&idm.idm_tgt_conn_list);
2415 	list_destroy(&idm.idm_tgt_svc_list);
2416 	kmem_cache_destroy(idm.idm_task_cache);
2417 	kmem_cache_destroy(idm.idm_buf_cache);
2418 	kmem_free(idm.idm_taskid_table,
2419 	    idm.idm_taskid_max * sizeof (idm_task_t *));
2420 	mutex_destroy(&idm.idm_global_mutex);
2421 	cv_destroy(&idm.idm_wd_cv);
2422 	cv_destroy(&idm.idm_tgt_svc_cv);
2423 	rw_destroy(&idm.idm_taskid_table_lock);
2424 
2425 	return (0);
2426 }
2427