xref: /titanic_52/usr/src/uts/common/io/idm/idm.c (revision 55f5292c612446ce6f93ddd248c0019b5974618b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/cpuvar.h>
27 #include <sys/conf.h>
28 #include <sys/file.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/modctl.h>
32 
33 #include <sys/socket.h>
34 #include <sys/strsubr.h>
35 #include <sys/sysmacros.h>
36 
37 #include <sys/socketvar.h>
38 #include <netinet/in.h>
39 
40 #include <sys/idm/idm.h>
41 #include <sys/idm/idm_so.h>
42 
43 #define	IDM_NAME_VERSION	"iSCSI Data Mover"
44 
45 extern struct mod_ops mod_miscops;
46 extern struct mod_ops mod_miscops;
47 
48 static struct modlmisc modlmisc = {
49 	&mod_miscops,	/* Type of module */
50 	IDM_NAME_VERSION
51 };
52 
53 static struct modlinkage modlinkage = {
54 	MODREV_1, (void *)&modlmisc, NULL
55 };
56 
57 extern int idm_task_compare(const void *t1, const void *t2);
58 extern void idm_wd_thread(void *arg);
59 
60 static int _idm_init(void);
61 static int _idm_fini(void);
62 static void idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf);
63 static void idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf);
64 static void idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf);
65 static void idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf);
66 static void idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt,
67     idm_abort_type_t abort_type);
68 static void idm_task_aborted(idm_task_t *idt, idm_status_t status);
69 static idm_pdu_t *idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen,
70     int sleepflag);
71 
72 boolean_t idm_conn_logging = 0;
73 boolean_t idm_svc_logging = 0;
74 #ifdef DEBUG
75 boolean_t idm_pattern_checking = 1;
76 #else
77 boolean_t idm_pattern_checking = 0;
78 #endif
79 
80 /*
81  * Potential tuneable for the maximum number of tasks.  Default to
82  * IDM_TASKIDS_MAX
83  */
84 
85 uint32_t	idm_max_taskids = IDM_TASKIDS_MAX;
86 
87 /*
88  * Global list of transport handles
89  *   These are listed in preferential order, so we can simply take the
90  *   first "it_conn_is_capable" hit. Note also that the order maps to
91  *   the order of the idm_transport_type_t list.
92  */
93 idm_transport_t idm_transport_list[] = {
94 
95 	/* iSER on InfiniBand transport handle */
96 	{IDM_TRANSPORT_TYPE_ISER,	/* type */
97 	"/devices/ib/iser@0:iser",	/* device path */
98 	NULL,				/* LDI handle */
99 	NULL,				/* transport ops */
100 	NULL},				/* transport caps */
101 
102 	/* IDM native sockets transport handle */
103 	{IDM_TRANSPORT_TYPE_SOCKETS,	/* type */
104 	NULL,				/* device path */
105 	NULL,				/* LDI handle */
106 	NULL,				/* transport ops */
107 	NULL}				/* transport caps */
108 
109 };
110 
111 int
112 _init(void)
113 {
114 	int rc;
115 
116 	if ((rc = _idm_init()) != 0) {
117 		return (rc);
118 	}
119 
120 	return (mod_install(&modlinkage));
121 }
122 
123 int
124 _fini(void)
125 {
126 	int rc;
127 
128 	if ((rc = _idm_fini()) != 0) {
129 		return (rc);
130 	}
131 
132 	if ((rc = mod_remove(&modlinkage)) != 0) {
133 		return (rc);
134 	}
135 
136 	return (rc);
137 }
138 
139 int
140 _info(struct modinfo *modinfop)
141 {
142 	return (mod_info(&modlinkage, modinfop));
143 }
144 
145 /*
146  * idm_transport_register()
147  *
148  * Provides a mechanism for an IDM transport driver to register its
149  * transport ops and caps with the IDM kernel module. Invoked during
150  * a transport driver's attach routine.
151  */
152 idm_status_t
153 idm_transport_register(idm_transport_attr_t *attr)
154 {
155 	ASSERT(attr->it_ops != NULL);
156 	ASSERT(attr->it_caps != NULL);
157 
158 	switch (attr->type) {
159 	/* All known non-native transports here; for now, iSER */
160 	case IDM_TRANSPORT_TYPE_ISER:
161 		idm_transport_list[attr->type].it_ops	= attr->it_ops;
162 		idm_transport_list[attr->type].it_caps	= attr->it_caps;
163 		return (IDM_STATUS_SUCCESS);
164 
165 	default:
166 		cmn_err(CE_NOTE, "idm: unknown transport type (0x%x) in "
167 		    "idm_transport_register", attr->type);
168 		return (IDM_STATUS_SUCCESS);
169 	}
170 }
171 
172 /*
173  * idm_ini_conn_create
174  *
175  * This function is invoked by the iSCSI layer to create a connection context.
176  * This does not actually establish the socket connection.
177  *
178  * cr - Connection request parameters
179  * new_con - Output parameter that contains the new request if successful
180  *
181  */
182 idm_status_t
183 idm_ini_conn_create(idm_conn_req_t *cr, idm_conn_t **new_con)
184 {
185 	idm_transport_t		*it;
186 	idm_conn_t		*ic;
187 	int			rc;
188 
189 	it = idm_transport_lookup(cr);
190 
191 retry:
192 	ic = idm_conn_create_common(CONN_TYPE_INI, it->it_type,
193 	    &cr->icr_conn_ops);
194 
195 	bcopy(&cr->cr_ini_dst_addr, &ic->ic_ini_dst_addr,
196 	    sizeof (cr->cr_ini_dst_addr));
197 
198 	/* create the transport-specific connection components */
199 	rc = it->it_ops->it_ini_conn_create(cr, ic);
200 	if (rc != IDM_STATUS_SUCCESS) {
201 		/* cleanup the failed connection */
202 		idm_conn_destroy_common(ic);
203 
204 		/*
205 		 * It is possible for an IB client to connect to
206 		 * an ethernet-only client via an IB-eth gateway.
207 		 * Therefore, if we are attempting to use iSER and
208 		 * fail, retry with sockets before ultimately
209 		 * failing the connection.
210 		 */
211 		if (it->it_type == IDM_TRANSPORT_TYPE_ISER) {
212 			it = &idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS];
213 			goto retry;
214 		}
215 
216 		return (IDM_STATUS_FAIL);
217 	}
218 
219 	*new_con = ic;
220 
221 	mutex_enter(&idm.idm_global_mutex);
222 	list_insert_tail(&idm.idm_ini_conn_list, ic);
223 	mutex_exit(&idm.idm_global_mutex);
224 
225 	return (IDM_STATUS_SUCCESS);
226 }
227 
228 /*
229  * idm_ini_conn_destroy
230  *
231  * Releases any resources associated with the connection.  This is the
232  * complement to idm_ini_conn_create.
233  * ic - idm_conn_t structure representing the relevant connection
234  *
235  */
236 void
237 idm_ini_conn_destroy_task(void *ic_void)
238 {
239 	idm_conn_t *ic = ic_void;
240 
241 	ic->ic_transport_ops->it_ini_conn_destroy(ic);
242 	idm_conn_destroy_common(ic);
243 }
244 
245 void
246 idm_ini_conn_destroy(idm_conn_t *ic)
247 {
248 	/*
249 	 * It's reasonable for the initiator to call idm_ini_conn_destroy
250 	 * from within the context of the CN_CONNECT_DESTROY notification.
251 	 * That's a problem since we want to destroy the taskq for the
252 	 * state machine associated with the connection.  Remove the
253 	 * connection from the list right away then handle the remaining
254 	 * work via the idm_global_taskq.
255 	 */
256 	mutex_enter(&idm.idm_global_mutex);
257 	list_remove(&idm.idm_ini_conn_list, ic);
258 	mutex_exit(&idm.idm_global_mutex);
259 
260 	if (taskq_dispatch(idm.idm_global_taskq,
261 	    &idm_ini_conn_destroy_task, ic, TQ_SLEEP) == NULL) {
262 		cmn_err(CE_WARN,
263 		    "idm_ini_conn_destroy: Couldn't dispatch task");
264 	}
265 }
266 
267 /*
268  * idm_ini_conn_connect
269  *
270  * Establish connection to the remote system identified in idm_conn_t.
271  * The connection parameters including the remote IP address were established
272  * in the call to idm_ini_conn_create.  The IDM state machine will
273  * perform client notifications as necessary to prompt the initiator through
274  * the login process.  IDM also keeps a timer running so that if the login
275  * process doesn't complete in a timely manner it will fail.
276  *
277  * ic - idm_conn_t structure representing the relevant connection
278  *
279  * Returns success if the connection was established, otherwise some kind
280  * of meaningful error code.
281  *
282  * Upon return the login has either failed or is loggin in (ffp)
283  */
284 idm_status_t
285 idm_ini_conn_connect(idm_conn_t *ic)
286 {
287 	idm_status_t	rc;
288 
289 	rc = idm_conn_sm_init(ic);
290 	if (rc != IDM_STATUS_SUCCESS) {
291 		return (ic->ic_conn_sm_status);
292 	}
293 
294 	/* Hold connection until we return */
295 	idm_conn_hold(ic);
296 
297 	/* Kick state machine */
298 	idm_conn_event(ic, CE_CONNECT_REQ, NULL);
299 
300 	/* Wait for login flag */
301 	mutex_enter(&ic->ic_state_mutex);
302 	while (!(ic->ic_state_flags & CF_LOGIN_READY) &&
303 	    !(ic->ic_state_flags & CF_ERROR)) {
304 		cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex);
305 	}
306 
307 	/*
308 	 * The CN_READY_TO_LOGIN and/or the CN_CONNECT_FAIL call to
309 	 * idm_notify_client has already been generated by the idm conn
310 	 * state machine.  If connection fails any time after this
311 	 * check, we will detect it in iscsi_login.
312 	 */
313 	if (ic->ic_state_flags & CF_ERROR) {
314 		rc = ic->ic_conn_sm_status;
315 	}
316 	mutex_exit(&ic->ic_state_mutex);
317 	idm_conn_rele(ic);
318 
319 	return (rc);
320 }
321 
322 /*
323  * idm_ini_conn_disconnect
324  *
325  * Forces a connection (previously established using idm_ini_conn_connect)
326  * to perform a controlled shutdown, cleaning up any outstanding requests.
327  *
328  * ic - idm_conn_t structure representing the relevant connection
329  *
330  * This is asynchronous and will return before the connection is properly
331  * shutdown
332  */
333 /* ARGSUSED */
334 void
335 idm_ini_conn_disconnect(idm_conn_t *ic)
336 {
337 	idm_conn_event(ic, CE_TRANSPORT_FAIL, NULL);
338 }
339 
340 /*
341  * idm_ini_conn_disconnect_wait
342  *
343  * Forces a connection (previously established using idm_ini_conn_connect)
344  * to perform a controlled shutdown.  Blocks until the connection is
345  * disconnected.
346  *
347  * ic - idm_conn_t structure representing the relevant connection
348  */
349 /* ARGSUSED */
350 void
351 idm_ini_conn_disconnect_sync(idm_conn_t *ic)
352 {
353 	mutex_enter(&ic->ic_state_mutex);
354 	if ((ic->ic_state != CS_S9_INIT_ERROR) &&
355 	    (ic->ic_state != CS_S11_COMPLETE)) {
356 		idm_conn_event_locked(ic, CE_TRANSPORT_FAIL, NULL, CT_NONE);
357 		while ((ic->ic_state != CS_S9_INIT_ERROR) &&
358 		    (ic->ic_state != CS_S11_COMPLETE))
359 			cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex);
360 	}
361 	mutex_exit(&ic->ic_state_mutex);
362 }
363 
364 /*
365  * idm_tgt_svc_create
366  *
367  * The target calls this service to obtain a service context for each available
368  * transport, starting a service of each type related to the IP address and port
369  * passed. The idm_svc_req_t contains the service parameters.
370  */
371 idm_status_t
372 idm_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t **new_svc)
373 {
374 	idm_transport_type_t	type;
375 	idm_transport_t		*it;
376 	idm_svc_t		*is;
377 	int			rc;
378 
379 	*new_svc = NULL;
380 	is = kmem_zalloc(sizeof (idm_svc_t), KM_SLEEP);
381 
382 	/* Initialize transport-agnostic components of the service handle */
383 	is->is_svc_req = *sr;
384 	mutex_init(&is->is_mutex, NULL, MUTEX_DEFAULT, NULL);
385 	cv_init(&is->is_cv, NULL, CV_DEFAULT, NULL);
386 	mutex_init(&is->is_count_mutex, NULL, MUTEX_DEFAULT, NULL);
387 	cv_init(&is->is_count_cv, NULL, CV_DEFAULT, NULL);
388 	idm_refcnt_init(&is->is_refcnt, is);
389 
390 	/*
391 	 * Make sure all available transports are setup.  We call this now
392 	 * instead of at initialization time in case IB has become available
393 	 * since we started (hotplug, etc).
394 	 */
395 	idm_transport_setup(sr->sr_li);
396 
397 	/*
398 	 * Loop through the transports, configuring the transport-specific
399 	 * components of each one.
400 	 */
401 	for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
402 
403 		it = &idm_transport_list[type];
404 		/*
405 		 * If it_ops is NULL then the transport is unconfigured
406 		 * and we shouldn't try to start the service.
407 		 */
408 		if (it->it_ops == NULL) {
409 			continue;
410 		}
411 
412 		rc = it->it_ops->it_tgt_svc_create(sr, is);
413 		if (rc != IDM_STATUS_SUCCESS) {
414 			/* Teardown any configured services */
415 			while (type--) {
416 				it = &idm_transport_list[type];
417 				if (it->it_ops == NULL) {
418 					continue;
419 				}
420 				it->it_ops->it_tgt_svc_destroy(is);
421 			}
422 			/* Free the svc context and return */
423 			kmem_free(is, sizeof (idm_svc_t));
424 			return (rc);
425 		}
426 	}
427 
428 	*new_svc = is;
429 
430 	mutex_enter(&idm.idm_global_mutex);
431 	list_insert_tail(&idm.idm_tgt_svc_list, is);
432 	mutex_exit(&idm.idm_global_mutex);
433 
434 	return (IDM_STATUS_SUCCESS);
435 }
436 
437 /*
438  * idm_tgt_svc_destroy
439  *
440  * is - idm_svc_t returned by the call to idm_tgt_svc_create
441  *
442  * Cleanup any resources associated with the idm_svc_t.
443  */
444 void
445 idm_tgt_svc_destroy(idm_svc_t *is)
446 {
447 	idm_transport_type_t	type;
448 	idm_transport_t		*it;
449 
450 	mutex_enter(&idm.idm_global_mutex);
451 	/* remove this service from the global list */
452 	list_remove(&idm.idm_tgt_svc_list, is);
453 	/* wakeup any waiters for service change */
454 	cv_broadcast(&idm.idm_tgt_svc_cv);
455 	mutex_exit(&idm.idm_global_mutex);
456 
457 	/* teardown each transport-specific service */
458 	for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
459 		it = &idm_transport_list[type];
460 		if (it->it_ops == NULL) {
461 			continue;
462 		}
463 
464 		it->it_ops->it_tgt_svc_destroy(is);
465 	}
466 
467 	/* tear down the svc resources */
468 	idm_refcnt_destroy(&is->is_refcnt);
469 	cv_destroy(&is->is_count_cv);
470 	mutex_destroy(&is->is_count_mutex);
471 	cv_destroy(&is->is_cv);
472 	mutex_destroy(&is->is_mutex);
473 
474 	/* free the svc handle */
475 	kmem_free(is, sizeof (idm_svc_t));
476 }
477 
478 void
479 idm_tgt_svc_hold(idm_svc_t *is)
480 {
481 	idm_refcnt_hold(&is->is_refcnt);
482 }
483 
484 void
485 idm_tgt_svc_rele_and_destroy(idm_svc_t *is)
486 {
487 	idm_refcnt_rele_and_destroy(&is->is_refcnt,
488 	    (idm_refcnt_cb_t *)&idm_tgt_svc_destroy);
489 }
490 
491 /*
492  * idm_tgt_svc_online
493  *
494  * is - idm_svc_t returned by the call to idm_tgt_svc_create
495  *
496  * Online each transport service, as we want this target to be accessible
497  * via any configured transport.
498  *
499  * When the initiator establishes a new connection to the target, IDM will
500  * call the "new connect" callback defined in the idm_svc_req_t structure
501  * and it will pass an idm_conn_t structure representing that new connection.
502  */
503 idm_status_t
504 idm_tgt_svc_online(idm_svc_t *is)
505 {
506 
507 	idm_transport_type_t	type, last_type;
508 	idm_transport_t		*it;
509 	int			rc = IDM_STATUS_SUCCESS;
510 
511 	mutex_enter(&is->is_mutex);
512 	if (is->is_online == 0) {
513 		/* Walk through each of the transports and online them */
514 		for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
515 			it = &idm_transport_list[type];
516 			if (it->it_ops == NULL) {
517 				/* transport is not registered */
518 				continue;
519 			}
520 
521 			mutex_exit(&is->is_mutex);
522 			rc = it->it_ops->it_tgt_svc_online(is);
523 			mutex_enter(&is->is_mutex);
524 			if (rc != IDM_STATUS_SUCCESS) {
525 				last_type = type;
526 				break;
527 			}
528 		}
529 		if (rc != IDM_STATUS_SUCCESS) {
530 			/*
531 			 * The last transport failed to online.
532 			 * Offline any transport onlined above and
533 			 * do not online the target.
534 			 */
535 			for (type = 0; type < last_type; type++) {
536 				it = &idm_transport_list[type];
537 				if (it->it_ops == NULL) {
538 					/* transport is not registered */
539 					continue;
540 				}
541 
542 				mutex_exit(&is->is_mutex);
543 				it->it_ops->it_tgt_svc_offline(is);
544 				mutex_enter(&is->is_mutex);
545 			}
546 		} else {
547 			/* Target service now online */
548 			is->is_online = 1;
549 		}
550 	} else {
551 		/* Target service already online, just bump the count */
552 		is->is_online++;
553 	}
554 	mutex_exit(&is->is_mutex);
555 
556 	return (rc);
557 }
558 
559 /*
560  * idm_tgt_svc_offline
561  *
562  * is - idm_svc_t returned by the call to idm_tgt_svc_create
563  *
564  * Shutdown any online target services.
565  */
566 void
567 idm_tgt_svc_offline(idm_svc_t *is)
568 {
569 	idm_transport_type_t	type;
570 	idm_transport_t		*it;
571 
572 	mutex_enter(&is->is_mutex);
573 	is->is_online--;
574 	if (is->is_online == 0) {
575 		/* Walk through each of the transports and offline them */
576 		for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
577 			it = &idm_transport_list[type];
578 			if (it->it_ops == NULL) {
579 				/* transport is not registered */
580 				continue;
581 			}
582 
583 			mutex_exit(&is->is_mutex);
584 			it->it_ops->it_tgt_svc_offline(is);
585 			mutex_enter(&is->is_mutex);
586 		}
587 	}
588 	mutex_exit(&is->is_mutex);
589 }
590 
591 /*
592  * idm_tgt_svc_lookup
593  *
594  * Lookup a service instance listening on the specified port
595  */
596 
597 idm_svc_t *
598 idm_tgt_svc_lookup(uint16_t port)
599 {
600 	idm_svc_t *result;
601 
602 retry:
603 	mutex_enter(&idm.idm_global_mutex);
604 	for (result = list_head(&idm.idm_tgt_svc_list);
605 	    result != NULL;
606 	    result = list_next(&idm.idm_tgt_svc_list, result)) {
607 		if (result->is_svc_req.sr_port == port) {
608 			if (result->is_online == 0) {
609 				/*
610 				 * A service exists on this port, but it
611 				 * is going away, wait for it to cleanup.
612 				 */
613 				cv_wait(&idm.idm_tgt_svc_cv,
614 				    &idm.idm_global_mutex);
615 				mutex_exit(&idm.idm_global_mutex);
616 				goto retry;
617 			}
618 			idm_tgt_svc_hold(result);
619 			mutex_exit(&idm.idm_global_mutex);
620 			return (result);
621 		}
622 	}
623 	mutex_exit(&idm.idm_global_mutex);
624 
625 	return (NULL);
626 }
627 
628 /*
629  * idm_negotiate_key_values()
630  * Give IDM level a chance to negotiate any login parameters it should own.
631  *  -- leave unhandled parameters alone on request_nvl
632  *  -- move all handled parameters to response_nvl with an appropriate response
633  *  -- also add an entry to negotiated_nvl for any accepted parameters
634  */
635 kv_status_t
636 idm_negotiate_key_values(idm_conn_t *ic, nvlist_t *request_nvl,
637     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
638 {
639 	ASSERT(ic->ic_transport_ops != NULL);
640 	return (ic->ic_transport_ops->it_negotiate_key_values(ic,
641 	    request_nvl, response_nvl, negotiated_nvl));
642 }
643 
644 /*
645  * idm_notice_key_values()
646  * Activate at the IDM level any parameters that have been negotiated.
647  * Passes the set of key value pairs to the transport for activation.
648  * This will be invoked as the connection is entering full-feature mode.
649  */
650 void
651 idm_notice_key_values(idm_conn_t *ic, nvlist_t *negotiated_nvl)
652 {
653 	ASSERT(ic->ic_transport_ops != NULL);
654 	ic->ic_transport_ops->it_notice_key_values(ic, negotiated_nvl);
655 }
656 
657 /*
658  * idm_declare_key_values()
659  * Activate an operational set of declarative parameters from the config_nvl,
660  * and return the selected values in the outgoing_nvl.
661  */
662 kv_status_t
663 idm_declare_key_values(idm_conn_t *ic, nvlist_t *config_nvl,
664     nvlist_t *outgoing_nvl)
665 {
666 	ASSERT(ic->ic_transport_ops != NULL);
667 	return (ic->ic_transport_ops->it_declare_key_values(ic, config_nvl,
668 	    outgoing_nvl));
669 }
670 
671 /*
672  * idm_buf_tx_to_ini
673  *
674  * This is IDM's implementation of the 'Put_Data' operational primitive.
675  *
676  * This function is invoked by a target iSCSI layer to request its local
677  * Datamover layer to transmit the Data-In PDU to the peer iSCSI layer
678  * on the remote iSCSI node. The I/O buffer represented by 'idb' is
679  * transferred to the initiator associated with task 'idt'. The connection
680  * info, contents of the Data-In PDU header, the DataDescriptorIn, BHS,
681  * and the callback (idb->idb_buf_cb) at transfer completion are
682  * provided as input.
683  *
684  * This data transfer takes place transparently to the remote iSCSI layer,
685  * i.e. without its participation.
686  *
687  * Using sockets, IDM implements the data transfer by segmenting the data
688  * buffer into appropriately sized iSCSI PDUs and transmitting them to the
689  * initiator. iSER performs the transfer using RDMA write.
690  *
691  */
692 idm_status_t
693 idm_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb,
694     uint32_t offset, uint32_t xfer_len,
695     idm_buf_cb_t idb_buf_cb, void *cb_arg)
696 {
697 	idm_status_t rc;
698 
699 	idb->idb_bufoffset = offset;
700 	idb->idb_xfer_len = xfer_len;
701 	idb->idb_buf_cb = idb_buf_cb;
702 	idb->idb_cb_arg = cb_arg;
703 	gethrestime(&idb->idb_xfer_start);
704 
705 	/*
706 	 * Buffer should not contain the pattern.  If the pattern is
707 	 * present then we've been asked to transmit initialized data
708 	 */
709 	IDM_BUFPAT_CHECK(idb, xfer_len, BP_CHECK_ASSERT);
710 
711 	mutex_enter(&idt->idt_mutex);
712 	switch (idt->idt_state) {
713 	case TASK_ACTIVE:
714 		idt->idt_tx_to_ini_start++;
715 		idm_task_hold(idt);
716 		idm_buf_bind_in_locked(idt, idb);
717 		idb->idb_in_transport = B_TRUE;
718 		rc = (*idt->idt_ic->ic_transport_ops->it_buf_tx_to_ini)
719 		    (idt, idb);
720 		return (rc);
721 
722 	case TASK_SUSPENDING:
723 	case TASK_SUSPENDED:
724 		/*
725 		 * Bind buffer but don't start a transfer since the task
726 		 * is suspended
727 		 */
728 		idm_buf_bind_in_locked(idt, idb);
729 		mutex_exit(&idt->idt_mutex);
730 		return (IDM_STATUS_SUCCESS);
731 
732 	case TASK_ABORTING:
733 	case TASK_ABORTED:
734 		/*
735 		 * Once the task is aborted, any buffers added to the
736 		 * idt_inbufv will never get cleaned up, so just return
737 		 * SUCCESS.  The buffer should get cleaned up by the
738 		 * client or framework once task_aborted has completed.
739 		 */
740 		mutex_exit(&idt->idt_mutex);
741 		return (IDM_STATUS_SUCCESS);
742 
743 	default:
744 		ASSERT(0);
745 		break;
746 	}
747 	mutex_exit(&idt->idt_mutex);
748 
749 	return (IDM_STATUS_FAIL);
750 }
751 
752 /*
753  * idm_buf_rx_from_ini
754  *
755  * This is IDM's implementation of the 'Get_Data' operational primitive.
756  *
757  * This function is invoked by a target iSCSI layer to request its local
758  * Datamover layer to retrieve certain data identified by the R2T PDU from the
759  * peer iSCSI layer on the remote node. The retrieved Data-Out PDU will be
760  * mapped to the respective buffer by the task tags (ITT & TTT).
761  * The connection information, contents of an R2T PDU, DataDescriptor, BHS, and
762  * the callback (idb->idb_buf_cb) notification for data transfer completion are
763  * are provided as input.
764  *
765  * When an iSCSI node sends an R2T PDU to its local Datamover layer, the local
766  * Datamover layer, the local and remote Datamover layers transparently bring
767  * about the data transfer requested by the R2T PDU, without the participation
768  * of the iSCSI layers.
769  *
770  * Using sockets, IDM transmits an R2T PDU for each buffer and the rx_data_out()
771  * assembles the Data-Out PDUs into the buffer. iSER uses RDMA read.
772  *
773  */
774 idm_status_t
775 idm_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb,
776     uint32_t offset, uint32_t xfer_len,
777     idm_buf_cb_t idb_buf_cb, void *cb_arg)
778 {
779 	idm_status_t rc;
780 
781 	idb->idb_bufoffset = offset;
782 	idb->idb_xfer_len = xfer_len;
783 	idb->idb_buf_cb = idb_buf_cb;
784 	idb->idb_cb_arg = cb_arg;
785 	gethrestime(&idb->idb_xfer_start);
786 
787 	/*
788 	 * "In" buf list is for "Data In" PDU's, "Out" buf list is for
789 	 * "Data Out" PDU's
790 	 */
791 	mutex_enter(&idt->idt_mutex);
792 	switch (idt->idt_state) {
793 	case TASK_ACTIVE:
794 		idt->idt_rx_from_ini_start++;
795 		idm_task_hold(idt);
796 		idm_buf_bind_out_locked(idt, idb);
797 		idb->idb_in_transport = B_TRUE;
798 		rc = (*idt->idt_ic->ic_transport_ops->it_buf_rx_from_ini)
799 		    (idt, idb);
800 		return (rc);
801 	case TASK_SUSPENDING:
802 	case TASK_SUSPENDED:
803 	case TASK_ABORTING:
804 	case TASK_ABORTED:
805 		/*
806 		 * Bind buffer but don't start a transfer since the task
807 		 * is suspended
808 		 */
809 		idm_buf_bind_out_locked(idt, idb);
810 		mutex_exit(&idt->idt_mutex);
811 		return (IDM_STATUS_SUCCESS);
812 	default:
813 		ASSERT(0);
814 		break;
815 	}
816 	mutex_exit(&idt->idt_mutex);
817 
818 	return (IDM_STATUS_FAIL);
819 }
820 
821 /*
822  * idm_buf_tx_to_ini_done
823  *
824  * The transport calls this after it has completed a transfer requested by
825  * a call to transport_buf_tx_to_ini
826  *
827  * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning.
828  * idt may be freed after the call to idb->idb_buf_cb.
829  */
830 void
831 idm_buf_tx_to_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status)
832 {
833 	ASSERT(mutex_owned(&idt->idt_mutex));
834 	idb->idb_in_transport = B_FALSE;
835 	idb->idb_tx_thread = B_FALSE;
836 	idt->idt_tx_to_ini_done++;
837 	gethrestime(&idb->idb_xfer_done);
838 
839 	/*
840 	 * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or
841 	 * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes
842 	 * to 0.
843 	 */
844 	idm_task_rele(idt);
845 	idb->idb_status = status;
846 
847 	switch (idt->idt_state) {
848 	case TASK_ACTIVE:
849 		idt->idt_ic->ic_timestamp = ddi_get_lbolt();
850 		idm_buf_unbind_in_locked(idt, idb);
851 		mutex_exit(&idt->idt_mutex);
852 		(*idb->idb_buf_cb)(idb, status);
853 		return;
854 	case TASK_SUSPENDING:
855 	case TASK_SUSPENDED:
856 	case TASK_ABORTING:
857 	case TASK_ABORTED:
858 		/*
859 		 * To keep things simple we will ignore the case where the
860 		 * transfer was successful and leave all buffers bound to the
861 		 * task.  This allows us to also ignore the case where we've
862 		 * been asked to abort a task but the last transfer of the
863 		 * task has completed.  IDM has no idea whether this was, in
864 		 * fact, the last transfer of the task so it would be difficult
865 		 * to handle this case.  Everything should get sorted out again
866 		 * after task reassignment is complete.
867 		 *
868 		 * In the case of TASK_ABORTING we could conceivably call the
869 		 * buffer callback here but the timing of when the client's
870 		 * client_task_aborted callback is invoked vs. when the client's
871 		 * buffer callback gets invoked gets sticky.  We don't want
872 		 * the client to here from us again after the call to
873 		 * client_task_aborted() but we don't want to give it a bunch
874 		 * of failed buffer transfers until we've called
875 		 * client_task_aborted().  Instead we'll just leave all the
876 		 * buffers bound and allow the client to cleanup.
877 		 */
878 		break;
879 	default:
880 		ASSERT(0);
881 	}
882 	mutex_exit(&idt->idt_mutex);
883 }
884 
885 /*
886  * idm_buf_rx_from_ini_done
887  *
888  * The transport calls this after it has completed a transfer requested by
889  * a call totransport_buf_tx_to_ini
890  *
891  * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning.
892  * idt may be freed after the call to idb->idb_buf_cb.
893  */
894 void
895 idm_buf_rx_from_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status)
896 {
897 	ASSERT(mutex_owned(&idt->idt_mutex));
898 	idb->idb_in_transport = B_FALSE;
899 	idt->idt_rx_from_ini_done++;
900 	gethrestime(&idb->idb_xfer_done);
901 
902 	/*
903 	 * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or
904 	 * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes
905 	 * to 0.
906 	 */
907 	idm_task_rele(idt);
908 	idb->idb_status = status;
909 
910 	if (status == IDM_STATUS_SUCCESS) {
911 		/*
912 		 * Buffer should not contain the pattern.  If it does then
913 		 * we did not get the data from the remote host.
914 		 */
915 		IDM_BUFPAT_CHECK(idb, idb->idb_xfer_len, BP_CHECK_ASSERT);
916 	}
917 
918 	switch (idt->idt_state) {
919 	case TASK_ACTIVE:
920 		idt->idt_ic->ic_timestamp = ddi_get_lbolt();
921 		idm_buf_unbind_out_locked(idt, idb);
922 		mutex_exit(&idt->idt_mutex);
923 		(*idb->idb_buf_cb)(idb, status);
924 		return;
925 	case TASK_SUSPENDING:
926 	case TASK_SUSPENDED:
927 	case TASK_ABORTING:
928 	case TASK_ABORTED:
929 		/*
930 		 * To keep things simple we will ignore the case where the
931 		 * transfer was successful and leave all buffers bound to the
932 		 * task.  This allows us to also ignore the case where we've
933 		 * been asked to abort a task but the last transfer of the
934 		 * task has completed.  IDM has no idea whether this was, in
935 		 * fact, the last transfer of the task so it would be difficult
936 		 * to handle this case.  Everything should get sorted out again
937 		 * after task reassignment is complete.
938 		 *
939 		 * In the case of TASK_ABORTING we could conceivably call the
940 		 * buffer callback here but the timing of when the client's
941 		 * client_task_aborted callback is invoked vs. when the client's
942 		 * buffer callback gets invoked gets sticky.  We don't want
943 		 * the client to here from us again after the call to
944 		 * client_task_aborted() but we don't want to give it a bunch
945 		 * of failed buffer transfers until we've called
946 		 * client_task_aborted().  Instead we'll just leave all the
947 		 * buffers bound and allow the client to cleanup.
948 		 */
949 		break;
950 	default:
951 		ASSERT(0);
952 	}
953 	mutex_exit(&idt->idt_mutex);
954 }
955 
956 /*
957  * idm_buf_alloc
958  *
959  * Allocates a buffer handle and registers it for use with the transport
960  * layer. If a buffer is not passed on bufptr, the buffer will be allocated
961  * as well as the handle.
962  *
963  * ic		- connection on which the buffer will be transferred
964  * bufptr	- allocate memory for buffer if NULL, else assign to buffer
965  * buflen	- length of buffer
966  *
967  * Returns idm_buf_t handle if successful, otherwise NULL
968  */
969 idm_buf_t *
970 idm_buf_alloc(idm_conn_t *ic, void *bufptr, uint64_t buflen)
971 {
972 	idm_buf_t	*buf = NULL;
973 	int		rc;
974 
975 	ASSERT(ic != NULL);
976 	ASSERT(idm.idm_buf_cache != NULL);
977 	ASSERT(buflen > 0);
978 
979 	/* Don't allocate new buffers if we are not in FFP */
980 	mutex_enter(&ic->ic_state_mutex);
981 	if (!ic->ic_ffp) {
982 		mutex_exit(&ic->ic_state_mutex);
983 		return (NULL);
984 	}
985 
986 
987 	idm_conn_hold(ic);
988 	mutex_exit(&ic->ic_state_mutex);
989 
990 	buf = kmem_cache_alloc(idm.idm_buf_cache, KM_NOSLEEP);
991 	if (buf == NULL) {
992 		idm_conn_rele(ic);
993 		return (NULL);
994 	}
995 
996 	buf->idb_ic		= ic;
997 	buf->idb_buflen		= buflen;
998 	buf->idb_exp_offset	= 0;
999 	buf->idb_bufoffset	= 0;
1000 	buf->idb_xfer_len 	= 0;
1001 	buf->idb_magic		= IDM_BUF_MAGIC;
1002 	buf->idb_in_transport	= B_FALSE;
1003 	buf->idb_bufbcopy	= B_FALSE;
1004 
1005 	/*
1006 	 * If bufptr is NULL, we have an implicit request to allocate
1007 	 * memory for this IDM buffer handle and register it for use
1008 	 * with the transport. To simplify this, and to give more freedom
1009 	 * to the transport layer for it's own buffer management, both of
1010 	 * these actions will take place in the transport layer.
1011 	 * If bufptr is set, then the caller has allocated memory (or more
1012 	 * likely it's been passed from an upper layer), and we need only
1013 	 * register the buffer for use with the transport layer.
1014 	 */
1015 	if (bufptr == NULL) {
1016 		/*
1017 		 * Allocate a buffer from the transport layer (which
1018 		 * will also register the buffer for use).
1019 		 */
1020 		rc = ic->ic_transport_ops->it_buf_alloc(buf, buflen);
1021 		if (rc != 0) {
1022 			idm_conn_rele(ic);
1023 			kmem_cache_free(idm.idm_buf_cache, buf);
1024 			return (NULL);
1025 		}
1026 		/* Set the bufalloc'd flag */
1027 		buf->idb_bufalloc = B_TRUE;
1028 	} else {
1029 		/*
1030 		 * For large transfers, Set the passed bufptr into
1031 		 * the buf handle, and register the handle with the
1032 		 * transport layer. As memory registration with the
1033 		 * transport layer is a time/cpu intensive operation,
1034 		 * for small transfers (up to a pre-defined bcopy
1035 		 * threshold), use pre-registered memory buffers
1036 		 * and bcopy data at the appropriate time.
1037 		 */
1038 		buf->idb_buf = bufptr;
1039 
1040 		rc = ic->ic_transport_ops->it_buf_setup(buf);
1041 		if (rc != 0) {
1042 			idm_conn_rele(ic);
1043 			kmem_cache_free(idm.idm_buf_cache, buf);
1044 			return (NULL);
1045 		}
1046 		/*
1047 		 * The transport layer is now expected to set the idb_bufalloc
1048 		 * correctly to indicate if resources have been allocated.
1049 		 */
1050 	}
1051 
1052 	IDM_BUFPAT_SET(buf);
1053 
1054 	return (buf);
1055 }
1056 
1057 /*
1058  * idm_buf_free
1059  *
1060  * Release a buffer handle along with the associated buffer that was allocated
1061  * or assigned with idm_buf_alloc
1062  */
1063 void
1064 idm_buf_free(idm_buf_t *buf)
1065 {
1066 	idm_conn_t *ic = buf->idb_ic;
1067 
1068 
1069 	buf->idb_task_binding	= NULL;
1070 
1071 	if (buf->idb_bufalloc) {
1072 		ic->ic_transport_ops->it_buf_free(buf);
1073 	} else {
1074 		ic->ic_transport_ops->it_buf_teardown(buf);
1075 	}
1076 	kmem_cache_free(idm.idm_buf_cache, buf);
1077 	idm_conn_rele(ic);
1078 }
1079 
1080 /*
1081  * idm_buf_bind_in
1082  *
1083  * This function associates a buffer with a task. This is only for use by the
1084  * iSCSI initiator that will have only one buffer per transfer direction
1085  *
1086  */
1087 void
1088 idm_buf_bind_in(idm_task_t *idt, idm_buf_t *buf)
1089 {
1090 	mutex_enter(&idt->idt_mutex);
1091 	idm_buf_bind_in_locked(idt, buf);
1092 	mutex_exit(&idt->idt_mutex);
1093 }
1094 
1095 static void
1096 idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf)
1097 {
1098 	buf->idb_task_binding = idt;
1099 	buf->idb_ic = idt->idt_ic;
1100 	idm_listbuf_insert(&idt->idt_inbufv, buf);
1101 }
1102 
1103 void
1104 idm_buf_bind_out(idm_task_t *idt, idm_buf_t *buf)
1105 {
1106 	/*
1107 	 * For small transfers, the iSER transport delegates the IDM
1108 	 * layer to bcopy the SCSI Write data for faster IOPS.
1109 	 */
1110 	if (buf->idb_bufbcopy == B_TRUE) {
1111 
1112 		bcopy(buf->idb_bufptr, buf->idb_buf, buf->idb_buflen);
1113 	}
1114 	mutex_enter(&idt->idt_mutex);
1115 	idm_buf_bind_out_locked(idt, buf);
1116 	mutex_exit(&idt->idt_mutex);
1117 }
1118 
1119 static void
1120 idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf)
1121 {
1122 	buf->idb_task_binding = idt;
1123 	buf->idb_ic = idt->idt_ic;
1124 	idm_listbuf_insert(&idt->idt_outbufv, buf);
1125 }
1126 
1127 void
1128 idm_buf_unbind_in(idm_task_t *idt, idm_buf_t *buf)
1129 {
1130 	/*
1131 	 * For small transfers, the iSER transport delegates the IDM
1132 	 * layer to bcopy the SCSI Read data into the read buufer
1133 	 * for faster IOPS.
1134 	 */
1135 	if (buf->idb_bufbcopy == B_TRUE) {
1136 		bcopy(buf->idb_buf, buf->idb_bufptr, buf->idb_buflen);
1137 	}
1138 	mutex_enter(&idt->idt_mutex);
1139 	idm_buf_unbind_in_locked(idt, buf);
1140 	mutex_exit(&idt->idt_mutex);
1141 }
1142 
1143 static void
1144 idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf)
1145 {
1146 	list_remove(&idt->idt_inbufv, buf);
1147 }
1148 
1149 void
1150 idm_buf_unbind_out(idm_task_t *idt, idm_buf_t *buf)
1151 {
1152 	mutex_enter(&idt->idt_mutex);
1153 	idm_buf_unbind_out_locked(idt, buf);
1154 	mutex_exit(&idt->idt_mutex);
1155 }
1156 
1157 static void
1158 idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf)
1159 {
1160 	list_remove(&idt->idt_outbufv, buf);
1161 }
1162 
1163 /*
1164  * idm_buf_find() will lookup the idm_buf_t based on the relative offset in the
1165  * iSCSI PDU
1166  */
1167 idm_buf_t *
1168 idm_buf_find(void *lbuf, size_t data_offset)
1169 {
1170 	idm_buf_t	*idb;
1171 	list_t		*lst = (list_t *)lbuf;
1172 
1173 	/* iterate through the list to find the buffer */
1174 	for (idb = list_head(lst); idb != NULL; idb = list_next(lst, idb)) {
1175 
1176 		ASSERT((idb->idb_ic->ic_conn_type == CONN_TYPE_TGT) ||
1177 		    (idb->idb_bufoffset == 0));
1178 
1179 		if ((data_offset >= idb->idb_bufoffset) &&
1180 		    (data_offset < (idb->idb_bufoffset + idb->idb_buflen))) {
1181 
1182 			return (idb);
1183 		}
1184 	}
1185 
1186 	return (NULL);
1187 }
1188 
1189 void
1190 idm_bufpat_set(idm_buf_t *idb)
1191 {
1192 	idm_bufpat_t	*bufpat;
1193 	int		len, i;
1194 
1195 	len = idb->idb_buflen;
1196 	len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t);
1197 
1198 	bufpat = idb->idb_buf;
1199 	for (i = 0; i < len; i += sizeof (idm_bufpat_t)) {
1200 		bufpat->bufpat_idb = idb;
1201 		bufpat->bufpat_bufmagic = IDM_BUF_MAGIC;
1202 		bufpat->bufpat_offset = i;
1203 		bufpat++;
1204 	}
1205 }
1206 
1207 boolean_t
1208 idm_bufpat_check(idm_buf_t *idb, int check_len, idm_bufpat_check_type_t type)
1209 {
1210 	idm_bufpat_t	*bufpat;
1211 	int		len, i;
1212 
1213 	len = (type == BP_CHECK_QUICK) ? sizeof (idm_bufpat_t) : check_len;
1214 	len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t);
1215 	ASSERT(len <= idb->idb_buflen);
1216 	bufpat = idb->idb_buf;
1217 
1218 	/*
1219 	 * Don't check the pattern in buffers that came from outside IDM
1220 	 * (these will be buffers from the initiator that we opted not
1221 	 * to double-buffer)
1222 	 */
1223 	if (!idb->idb_bufalloc)
1224 		return (B_FALSE);
1225 
1226 	/*
1227 	 * Return true if we find the pattern anywhere in the buffer
1228 	 */
1229 	for (i = 0; i < len; i += sizeof (idm_bufpat_t)) {
1230 		if (BUFPAT_MATCH(bufpat, idb)) {
1231 			IDM_CONN_LOG(CE_WARN, "idm_bufpat_check found: "
1232 			    "idb %p bufpat %p "
1233 			    "bufpat_idb=%p bufmagic=%08x offset=%08x",
1234 			    (void *)idb, (void *)bufpat, bufpat->bufpat_idb,
1235 			    bufpat->bufpat_bufmagic, bufpat->bufpat_offset);
1236 			DTRACE_PROBE2(bufpat__pattern__found,
1237 			    idm_buf_t *, idb, idm_bufpat_t *, bufpat);
1238 			if (type == BP_CHECK_ASSERT) {
1239 				ASSERT(0);
1240 			}
1241 			return (B_TRUE);
1242 		}
1243 		bufpat++;
1244 	}
1245 
1246 	return (B_FALSE);
1247 }
1248 
1249 /*
1250  * idm_task_alloc
1251  *
1252  * This function will allocate a idm_task_t structure. A task tag is also
1253  * generated and saved in idt_tt. The task is not active.
1254  */
1255 idm_task_t *
1256 idm_task_alloc(idm_conn_t *ic)
1257 {
1258 	idm_task_t	*idt;
1259 
1260 	ASSERT(ic != NULL);
1261 
1262 	/* Don't allocate new tasks if we are not in FFP */
1263 	if (!ic->ic_ffp) {
1264 		return (NULL);
1265 	}
1266 	idt = kmem_cache_alloc(idm.idm_task_cache, KM_NOSLEEP);
1267 	if (idt == NULL) {
1268 		return (NULL);
1269 	}
1270 
1271 	ASSERT(list_is_empty(&idt->idt_inbufv));
1272 	ASSERT(list_is_empty(&idt->idt_outbufv));
1273 
1274 	mutex_enter(&ic->ic_state_mutex);
1275 	if (!ic->ic_ffp) {
1276 		mutex_exit(&ic->ic_state_mutex);
1277 		kmem_cache_free(idm.idm_task_cache, idt);
1278 		return (NULL);
1279 	}
1280 	idm_conn_hold(ic);
1281 	mutex_exit(&ic->ic_state_mutex);
1282 
1283 	idt->idt_state		= TASK_IDLE;
1284 	idt->idt_ic		= ic;
1285 	idt->idt_private 	= NULL;
1286 	idt->idt_exp_datasn	= 0;
1287 	idt->idt_exp_rttsn	= 0;
1288 	idt->idt_flags		= 0;
1289 	return (idt);
1290 }
1291 
1292 /*
1293  * idm_task_start
1294  *
1295  * Mark the task active and initialize some stats. The caller
1296  * sets up the idm_task_t structure with a prior call to idm_task_alloc().
1297  * The task service does not function as a task/work engine, it is the
1298  * responsibility of the initiator to start the data transfer and free the
1299  * resources.
1300  */
1301 void
1302 idm_task_start(idm_task_t *idt, uintptr_t handle)
1303 {
1304 	ASSERT(idt != NULL);
1305 
1306 	/* mark the task as ACTIVE */
1307 	idt->idt_state = TASK_ACTIVE;
1308 	idt->idt_client_handle = handle;
1309 	idt->idt_tx_to_ini_start = idt->idt_tx_to_ini_done =
1310 	    idt->idt_rx_from_ini_start = idt->idt_rx_from_ini_done =
1311 	    idt->idt_tx_bytes = idt->idt_rx_bytes = 0;
1312 }
1313 
1314 /*
1315  * idm_task_done
1316  *
1317  * This function sets the state to indicate that the task is no longer active.
1318  */
1319 void
1320 idm_task_done(idm_task_t *idt)
1321 {
1322 	ASSERT(idt != NULL);
1323 
1324 	mutex_enter(&idt->idt_mutex);
1325 	idt->idt_state = TASK_IDLE;
1326 	mutex_exit(&idt->idt_mutex);
1327 
1328 	/*
1329 	 * Although unlikely it is possible for a reference to come in after
1330 	 * the client has decided the task is over but before we've marked
1331 	 * the task idle.  One specific unavoidable scenario is the case where
1332 	 * received PDU with the matching ITT/TTT results in a successful
1333 	 * lookup of this task.  We are at the mercy of the remote node in
1334 	 * that case so we need to handle it.  Now that the task state
1335 	 * has changed no more references will occur so a simple call to
1336 	 * idm_refcnt_wait_ref should deal with the situation.
1337 	 */
1338 	idm_refcnt_wait_ref(&idt->idt_refcnt);
1339 	idm_refcnt_reset(&idt->idt_refcnt);
1340 }
1341 
1342 /*
1343  * idm_task_free
1344  *
1345  * This function will free the Task Tag and the memory allocated for the task
1346  * idm_task_done should be called prior to this call
1347  */
1348 void
1349 idm_task_free(idm_task_t *idt)
1350 {
1351 	idm_conn_t *ic;
1352 
1353 	ASSERT(idt != NULL);
1354 	ASSERT(idt->idt_refcnt.ir_refcnt == 0);
1355 	ASSERT(idt->idt_state == TASK_IDLE);
1356 
1357 	ic = idt->idt_ic;
1358 
1359 	/*
1360 	 * It's possible for items to still be in the idt_inbufv list if
1361 	 * they were added after idm_free_task_rsrc was called.  We rely on
1362 	 * STMF to free all buffers associated with the task however STMF
1363 	 * doesn't know that we have this reference to the buffers.
1364 	 * Use list_create so that we don't end up with stale references
1365 	 * to these buffers.
1366 	 */
1367 	list_create(&idt->idt_inbufv, sizeof (idm_buf_t),
1368 	    offsetof(idm_buf_t, idb_buflink));
1369 	list_create(&idt->idt_outbufv, sizeof (idm_buf_t),
1370 	    offsetof(idm_buf_t, idb_buflink));
1371 
1372 	kmem_cache_free(idm.idm_task_cache, idt);
1373 
1374 	idm_conn_rele(ic);
1375 }
1376 
1377 /*
1378  * idm_task_find_common
1379  *	common code for idm_task_find() and idm_task_find_and_complete()
1380  */
1381 /*ARGSUSED*/
1382 static idm_task_t *
1383 idm_task_find_common(idm_conn_t *ic, uint32_t itt, uint32_t ttt,
1384     boolean_t complete)
1385 {
1386 	uint32_t	tt, client_handle;
1387 	idm_task_t	*idt;
1388 
1389 	/*
1390 	 * Must match both itt and ttt.  The table is indexed by itt
1391 	 * for initiator connections and ttt for target connections.
1392 	 */
1393 	if (IDM_CONN_ISTGT(ic)) {
1394 		tt = ttt;
1395 		client_handle = itt;
1396 	} else {
1397 		tt = itt;
1398 		client_handle = ttt;
1399 	}
1400 
1401 	rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1402 	if (tt >= idm.idm_taskid_max) {
1403 		rw_exit(&idm.idm_taskid_table_lock);
1404 		return (NULL);
1405 	}
1406 
1407 	idt = idm.idm_taskid_table[tt];
1408 
1409 	if (idt != NULL) {
1410 		mutex_enter(&idt->idt_mutex);
1411 		if ((idt->idt_state != TASK_ACTIVE) ||
1412 		    (idt->idt_ic != ic) ||
1413 		    (IDM_CONN_ISTGT(ic) &&
1414 		    (idt->idt_client_handle != client_handle))) {
1415 			/*
1416 			 * Task doesn't match or task is aborting and
1417 			 * we don't want any more references.
1418 			 */
1419 			if ((idt->idt_ic != ic) &&
1420 			    (idt->idt_state == TASK_ACTIVE) &&
1421 			    (IDM_CONN_ISINI(ic) || idt->idt_client_handle ==
1422 			    client_handle)) {
1423 				IDM_CONN_LOG(CE_WARN,
1424 				"idm_task_find: wrong connection %p != %p",
1425 				    (void *)ic, (void *)idt->idt_ic);
1426 			}
1427 			mutex_exit(&idt->idt_mutex);
1428 			rw_exit(&idm.idm_taskid_table_lock);
1429 			return (NULL);
1430 		}
1431 		idm_task_hold(idt);
1432 		/*
1433 		 * Set the task state to TASK_COMPLETE so it can no longer
1434 		 * be found or aborted.
1435 		 */
1436 		if (B_TRUE == complete)
1437 			idt->idt_state = TASK_COMPLETE;
1438 		mutex_exit(&idt->idt_mutex);
1439 	}
1440 	rw_exit(&idm.idm_taskid_table_lock);
1441 
1442 	return (idt);
1443 }
1444 
1445 /*
1446  * This function looks up a task by task tag.
1447  */
1448 idm_task_t *
1449 idm_task_find(idm_conn_t *ic, uint32_t itt, uint32_t ttt)
1450 {
1451 	return (idm_task_find_common(ic, itt, ttt, B_FALSE));
1452 }
1453 
1454 /*
1455  * This function looks up a task by task tag. If found, the task state
1456  * is atomically set to TASK_COMPLETE so it can longer be found or aborted.
1457  */
1458 idm_task_t *
1459 idm_task_find_and_complete(idm_conn_t *ic, uint32_t itt, uint32_t ttt)
1460 {
1461 	return (idm_task_find_common(ic, itt, ttt, B_TRUE));
1462 }
1463 
1464 /*
1465  * idm_task_find_by_handle
1466  *
1467  * This function looks up a task by the client-private idt_client_handle.
1468  *
1469  * This function should NEVER be called in the performance path.  It is
1470  * intended strictly for error recovery/task management.
1471  */
1472 /*ARGSUSED*/
1473 void *
1474 idm_task_find_by_handle(idm_conn_t *ic, uintptr_t handle)
1475 {
1476 	idm_task_t	*idt = NULL;
1477 	int		idx = 0;
1478 
1479 	rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1480 
1481 	for (idx = 0; idx < idm.idm_taskid_max; idx++) {
1482 		idt = idm.idm_taskid_table[idx];
1483 
1484 		if (idt == NULL)
1485 			continue;
1486 
1487 		mutex_enter(&idt->idt_mutex);
1488 
1489 		if (idt->idt_state != TASK_ACTIVE) {
1490 			/*
1491 			 * Task is either in suspend, abort, or already
1492 			 * complete.
1493 			 */
1494 			mutex_exit(&idt->idt_mutex);
1495 			continue;
1496 		}
1497 
1498 		if (idt->idt_client_handle == handle) {
1499 			idm_task_hold(idt);
1500 			mutex_exit(&idt->idt_mutex);
1501 			break;
1502 		}
1503 
1504 		mutex_exit(&idt->idt_mutex);
1505 	}
1506 
1507 	rw_exit(&idm.idm_taskid_table_lock);
1508 
1509 	if ((idt == NULL) || (idx == idm.idm_taskid_max))
1510 		return (NULL);
1511 
1512 	return (idt->idt_private);
1513 }
1514 
1515 void
1516 idm_task_hold(idm_task_t *idt)
1517 {
1518 	idm_refcnt_hold(&idt->idt_refcnt);
1519 }
1520 
1521 void
1522 idm_task_rele(idm_task_t *idt)
1523 {
1524 	idm_refcnt_rele(&idt->idt_refcnt);
1525 }
1526 
1527 void
1528 idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
1529 {
1530 	idm_task_t	*task;
1531 	int		idx;
1532 
1533 	/*
1534 	 * Passing NULL as the task indicates that all tasks
1535 	 * for this connection should be aborted.
1536 	 */
1537 	if (idt == NULL) {
1538 		/*
1539 		 * Only the connection state machine should ask for
1540 		 * all tasks to abort and this should never happen in FFP.
1541 		 */
1542 		ASSERT(!ic->ic_ffp);
1543 		rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1544 		for (idx = 0; idx < idm.idm_taskid_max; idx++) {
1545 			task = idm.idm_taskid_table[idx];
1546 			if (task == NULL)
1547 				continue;
1548 			mutex_enter(&task->idt_mutex);
1549 			if ((task->idt_state != TASK_IDLE) &&
1550 			    (task->idt_state != TASK_COMPLETE) &&
1551 			    (task->idt_ic == ic)) {
1552 				rw_exit(&idm.idm_taskid_table_lock);
1553 				idm_task_abort_one(ic, task, abort_type);
1554 				rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1555 			} else
1556 				mutex_exit(&task->idt_mutex);
1557 		}
1558 		rw_exit(&idm.idm_taskid_table_lock);
1559 	} else {
1560 		mutex_enter(&idt->idt_mutex);
1561 		idm_task_abort_one(ic, idt, abort_type);
1562 	}
1563 }
1564 
1565 static void
1566 idm_task_abort_unref_cb(void *ref)
1567 {
1568 	idm_task_t *idt = ref;
1569 
1570 	mutex_enter(&idt->idt_mutex);
1571 	switch (idt->idt_state) {
1572 	case TASK_SUSPENDING:
1573 		idt->idt_state = TASK_SUSPENDED;
1574 		mutex_exit(&idt->idt_mutex);
1575 		idm_task_aborted(idt, IDM_STATUS_SUSPENDED);
1576 		return;
1577 	case TASK_ABORTING:
1578 		idt->idt_state = TASK_ABORTED;
1579 		mutex_exit(&idt->idt_mutex);
1580 		idm_task_aborted(idt, IDM_STATUS_ABORTED);
1581 		return;
1582 	default:
1583 		mutex_exit(&idt->idt_mutex);
1584 		ASSERT(0);
1585 		break;
1586 	}
1587 }
1588 
1589 /*
1590  * Abort the idm task.
1591  *    Caller must hold the task mutex, which will be released before return
1592  */
1593 static void
1594 idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
1595 {
1596 	/* Caller must hold connection mutex */
1597 	ASSERT(mutex_owned(&idt->idt_mutex));
1598 	switch (idt->idt_state) {
1599 	case TASK_ACTIVE:
1600 		switch (abort_type) {
1601 		case AT_INTERNAL_SUSPEND:
1602 			/* Call transport to release any resources */
1603 			idt->idt_state = TASK_SUSPENDING;
1604 			mutex_exit(&idt->idt_mutex);
1605 			ic->ic_transport_ops->it_free_task_rsrc(idt);
1606 
1607 			/*
1608 			 * Wait for outstanding references.  When all
1609 			 * references are released the callback will call
1610 			 * idm_task_aborted().
1611 			 */
1612 			idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1613 			    &idm_task_abort_unref_cb);
1614 			return;
1615 		case AT_INTERNAL_ABORT:
1616 		case AT_TASK_MGMT_ABORT:
1617 			idt->idt_state = TASK_ABORTING;
1618 			mutex_exit(&idt->idt_mutex);
1619 			ic->ic_transport_ops->it_free_task_rsrc(idt);
1620 
1621 			/*
1622 			 * Wait for outstanding references.  When all
1623 			 * references are released the callback will call
1624 			 * idm_task_aborted().
1625 			 */
1626 			idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1627 			    &idm_task_abort_unref_cb);
1628 			return;
1629 		default:
1630 			ASSERT(0);
1631 		}
1632 		break;
1633 	case TASK_SUSPENDING:
1634 		/* Already called transport_free_task_rsrc(); */
1635 		switch (abort_type) {
1636 		case AT_INTERNAL_SUSPEND:
1637 			/* Already doing it */
1638 			break;
1639 		case AT_INTERNAL_ABORT:
1640 		case AT_TASK_MGMT_ABORT:
1641 			idt->idt_state = TASK_ABORTING;
1642 			break;
1643 		default:
1644 			ASSERT(0);
1645 		}
1646 		break;
1647 	case TASK_SUSPENDED:
1648 		/* Already called transport_free_task_rsrc(); */
1649 		switch (abort_type) {
1650 		case AT_INTERNAL_SUSPEND:
1651 			/* Already doing it */
1652 			break;
1653 		case AT_INTERNAL_ABORT:
1654 		case AT_TASK_MGMT_ABORT:
1655 			idt->idt_state = TASK_ABORTING;
1656 			mutex_exit(&idt->idt_mutex);
1657 
1658 			/*
1659 			 * We could probably call idm_task_aborted directly
1660 			 * here but we may be holding the conn lock. It's
1661 			 * easier to just switch contexts.  Even though
1662 			 * we shouldn't really have any references we'll
1663 			 * set the state to TASK_ABORTING instead of
1664 			 * TASK_ABORTED so we can use the same code path.
1665 			 */
1666 			idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1667 			    &idm_task_abort_unref_cb);
1668 			return;
1669 		default:
1670 			ASSERT(0);
1671 		}
1672 		break;
1673 	case TASK_ABORTING:
1674 	case TASK_ABORTED:
1675 		switch (abort_type) {
1676 		case AT_INTERNAL_SUSPEND:
1677 			/* We're already past this point... */
1678 		case AT_INTERNAL_ABORT:
1679 		case AT_TASK_MGMT_ABORT:
1680 			/* Already doing it */
1681 			break;
1682 		default:
1683 			ASSERT(0);
1684 		}
1685 		break;
1686 	case TASK_COMPLETE:
1687 		/*
1688 		 * In this case, let it go.  The status has already been
1689 		 * sent (which may or may not get successfully transmitted)
1690 		 * and we don't want to end up in a race between completing
1691 		 * the status PDU and marking the task suspended.
1692 		 */
1693 		break;
1694 	default:
1695 		ASSERT(0);
1696 	}
1697 	mutex_exit(&idt->idt_mutex);
1698 }
1699 
1700 static void
1701 idm_task_aborted(idm_task_t *idt, idm_status_t status)
1702 {
1703 	(*idt->idt_ic->ic_conn_ops.icb_task_aborted)(idt, status);
1704 }
1705 
1706 /*
1707  * idm_pdu_tx
1708  *
1709  * This is IDM's implementation of the 'Send_Control' operational primitive.
1710  * This function is invoked by an initiator iSCSI layer requesting the transfer
1711  * of a iSCSI command PDU or a target iSCSI layer requesting the transfer of a
1712  * iSCSI response PDU. The PDU will be transmitted as-is by the local Datamover
1713  * layer to the peer iSCSI layer in the remote iSCSI node. The connection info
1714  * and iSCSI PDU-specific qualifiers namely BHS, AHS, DataDescriptor and Size
1715  * are provided as input.
1716  *
1717  */
1718 void
1719 idm_pdu_tx(idm_pdu_t *pdu)
1720 {
1721 	idm_conn_t		*ic = pdu->isp_ic;
1722 	iscsi_async_evt_hdr_t	*async_evt;
1723 
1724 	/*
1725 	 * If we are in full-featured mode then route SCSI-related
1726 	 * commands to the appropriate function vector without checking
1727 	 * the connection state.  We will only be in full-feature mode
1728 	 * when we are in an acceptable state for SCSI PDU's.
1729 	 *
1730 	 * We also need to ensure that there are no PDU events outstanding
1731 	 * on the state machine.  Any non-SCSI PDU's received in full-feature
1732 	 * mode will result in PDU events and until these have been handled
1733 	 * we need to route all PDU's through the state machine as PDU
1734 	 * events to maintain ordering.
1735 	 *
1736 	 * Note that IDM cannot enter FFP mode until it processes in
1737 	 * its state machine the last xmit of the login process.
1738 	 * Hence, checking the IDM_PDU_LOGIN_TX flag here would be
1739 	 * superfluous.
1740 	 */
1741 	mutex_enter(&ic->ic_state_mutex);
1742 	if (ic->ic_ffp && (ic->ic_pdu_events == 0)) {
1743 		mutex_exit(&ic->ic_state_mutex);
1744 		switch (IDM_PDU_OPCODE(pdu)) {
1745 		case ISCSI_OP_SCSI_RSP:
1746 			/* Target only */
1747 			DTRACE_ISCSI_2(scsi__response, idm_conn_t *, ic,
1748 			    iscsi_scsi_rsp_hdr_t *,
1749 			    (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr);
1750 			idm_pdu_tx_forward(ic, pdu);
1751 			return;
1752 		case ISCSI_OP_SCSI_TASK_MGT_RSP:
1753 			/* Target only */
1754 			DTRACE_ISCSI_2(task__response, idm_conn_t *, ic,
1755 			    iscsi_text_rsp_hdr_t *,
1756 			    (iscsi_text_rsp_hdr_t *)pdu->isp_hdr);
1757 			idm_pdu_tx_forward(ic, pdu);
1758 			return;
1759 		case ISCSI_OP_SCSI_DATA_RSP:
1760 			/* Target only */
1761 			DTRACE_ISCSI_2(data__send, idm_conn_t *, ic,
1762 			    iscsi_data_rsp_hdr_t *,
1763 			    (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
1764 			idm_pdu_tx_forward(ic, pdu);
1765 			return;
1766 		case ISCSI_OP_RTT_RSP:
1767 			/* Target only */
1768 			DTRACE_ISCSI_2(data__request, idm_conn_t *, ic,
1769 			    iscsi_rtt_hdr_t *,
1770 			    (iscsi_rtt_hdr_t *)pdu->isp_hdr);
1771 			idm_pdu_tx_forward(ic, pdu);
1772 			return;
1773 		case ISCSI_OP_NOOP_IN:
1774 			/* Target only */
1775 			DTRACE_ISCSI_2(nop__send, idm_conn_t *, ic,
1776 			    iscsi_nop_in_hdr_t *,
1777 			    (iscsi_nop_in_hdr_t *)pdu->isp_hdr);
1778 			idm_pdu_tx_forward(ic, pdu);
1779 			return;
1780 		case ISCSI_OP_TEXT_RSP:
1781 			/* Target only */
1782 			DTRACE_ISCSI_2(text__response, idm_conn_t *, ic,
1783 			    iscsi_text_rsp_hdr_t *,
1784 			    (iscsi_text_rsp_hdr_t *)pdu->isp_hdr);
1785 			idm_pdu_tx_forward(ic, pdu);
1786 			return;
1787 		case ISCSI_OP_TEXT_CMD:
1788 		case ISCSI_OP_NOOP_OUT:
1789 		case ISCSI_OP_SCSI_CMD:
1790 		case ISCSI_OP_SCSI_DATA:
1791 		case ISCSI_OP_SCSI_TASK_MGT_MSG:
1792 			/* Initiator only */
1793 			idm_pdu_tx_forward(ic, pdu);
1794 			return;
1795 		default:
1796 			break;
1797 		}
1798 
1799 		mutex_enter(&ic->ic_state_mutex);
1800 	}
1801 
1802 	/*
1803 	 * Any PDU's processed outside of full-feature mode and non-SCSI
1804 	 * PDU's in full-feature mode are handled by generating an
1805 	 * event to the connection state machine.  The state machine
1806 	 * will validate the PDU against the current state and either
1807 	 * transmit the PDU if the opcode is allowed or handle an
1808 	 * error if the PDU is not allowed.
1809 	 *
1810 	 * This code-path will also generate any events that are implied
1811 	 * by the PDU opcode.  For example a "login response" with success
1812 	 * status generates a CE_LOGOUT_SUCCESS_SND event.
1813 	 */
1814 	switch (IDM_PDU_OPCODE(pdu)) {
1815 	case ISCSI_OP_LOGIN_CMD:
1816 		idm_conn_tx_pdu_event(ic, CE_LOGIN_SND, (uintptr_t)pdu);
1817 		break;
1818 	case ISCSI_OP_LOGIN_RSP:
1819 		DTRACE_ISCSI_2(login__response, idm_conn_t *, ic,
1820 		    iscsi_login_rsp_hdr_t *,
1821 		    (iscsi_login_rsp_hdr_t *)pdu->isp_hdr);
1822 		idm_parse_login_rsp(ic, pdu, /* Is RX */ B_FALSE);
1823 		break;
1824 	case ISCSI_OP_LOGOUT_CMD:
1825 		idm_parse_logout_req(ic, pdu, /* Is RX */ B_FALSE);
1826 		break;
1827 	case ISCSI_OP_LOGOUT_RSP:
1828 		DTRACE_ISCSI_2(logout__response, idm_conn_t *, ic,
1829 		    iscsi_logout_rsp_hdr_t *,
1830 		    (iscsi_logout_rsp_hdr_t *)pdu->isp_hdr);
1831 		idm_parse_logout_rsp(ic, pdu, /* Is RX */ B_FALSE);
1832 		break;
1833 	case ISCSI_OP_ASYNC_EVENT:
1834 		DTRACE_ISCSI_2(async__send, idm_conn_t *, ic,
1835 		    iscsi_async_evt_hdr_t *,
1836 		    (iscsi_async_evt_hdr_t *)pdu->isp_hdr);
1837 		async_evt = (iscsi_async_evt_hdr_t *)pdu->isp_hdr;
1838 		switch (async_evt->async_event) {
1839 		case ISCSI_ASYNC_EVENT_REQUEST_LOGOUT:
1840 			idm_conn_tx_pdu_event(ic, CE_ASYNC_LOGOUT_SND,
1841 			    (uintptr_t)pdu);
1842 			break;
1843 		case ISCSI_ASYNC_EVENT_DROPPING_CONNECTION:
1844 			idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_CONN_SND,
1845 			    (uintptr_t)pdu);
1846 			break;
1847 		case ISCSI_ASYNC_EVENT_DROPPING_ALL_CONNECTIONS:
1848 			idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_ALL_CONN_SND,
1849 			    (uintptr_t)pdu);
1850 			break;
1851 		case ISCSI_ASYNC_EVENT_SCSI_EVENT:
1852 		case ISCSI_ASYNC_EVENT_PARAM_NEGOTIATION:
1853 		default:
1854 			idm_conn_tx_pdu_event(ic, CE_MISC_TX,
1855 			    (uintptr_t)pdu);
1856 			break;
1857 		}
1858 		break;
1859 	case ISCSI_OP_SCSI_RSP:
1860 		/* Target only */
1861 		DTRACE_ISCSI_2(scsi__response, idm_conn_t *, ic,
1862 		    iscsi_scsi_rsp_hdr_t *,
1863 		    (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr);
1864 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1865 		break;
1866 	case ISCSI_OP_SCSI_TASK_MGT_RSP:
1867 		/* Target only */
1868 		DTRACE_ISCSI_2(task__response, idm_conn_t *, ic,
1869 		    iscsi_scsi_task_mgt_rsp_hdr_t *,
1870 		    (iscsi_scsi_task_mgt_rsp_hdr_t *)pdu->isp_hdr);
1871 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1872 		break;
1873 	case ISCSI_OP_SCSI_DATA_RSP:
1874 		/* Target only */
1875 		DTRACE_ISCSI_2(data__send, idm_conn_t *, ic,
1876 		    iscsi_data_rsp_hdr_t *,
1877 		    (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
1878 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1879 		break;
1880 	case ISCSI_OP_RTT_RSP:
1881 		/* Target only */
1882 		DTRACE_ISCSI_2(data__request, idm_conn_t *, ic,
1883 		    iscsi_rtt_hdr_t *,
1884 		    (iscsi_rtt_hdr_t *)pdu->isp_hdr);
1885 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1886 		break;
1887 	case ISCSI_OP_NOOP_IN:
1888 		/* Target only */
1889 		DTRACE_ISCSI_2(nop__send, idm_conn_t *, ic,
1890 		    iscsi_nop_in_hdr_t *,
1891 		    (iscsi_nop_in_hdr_t *)pdu->isp_hdr);
1892 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1893 		break;
1894 	case ISCSI_OP_TEXT_RSP:
1895 		/* Target only */
1896 		DTRACE_ISCSI_2(text__response, idm_conn_t *, ic,
1897 		    iscsi_text_rsp_hdr_t *,
1898 		    (iscsi_text_rsp_hdr_t *)pdu->isp_hdr);
1899 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1900 		break;
1901 		/* Initiator only */
1902 	case ISCSI_OP_SCSI_CMD:
1903 	case ISCSI_OP_SCSI_TASK_MGT_MSG:
1904 	case ISCSI_OP_SCSI_DATA:
1905 	case ISCSI_OP_NOOP_OUT:
1906 	case ISCSI_OP_TEXT_CMD:
1907 	case ISCSI_OP_SNACK_CMD:
1908 	case ISCSI_OP_REJECT_MSG:
1909 	default:
1910 		/*
1911 		 * Connection state machine will validate these PDU's against
1912 		 * the current state.  A PDU not allowed in the current
1913 		 * state will cause a protocol error.
1914 		 */
1915 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1916 		break;
1917 	}
1918 	mutex_exit(&ic->ic_state_mutex);
1919 }
1920 
1921 /*
1922  * Common allocation of a PDU along with memory for header and data.
1923  */
1924 static idm_pdu_t *
1925 idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen, int sleepflag)
1926 {
1927 	idm_pdu_t *result;
1928 
1929 	/*
1930 	 * IDM clients should cache these structures for performance
1931 	 * critical paths.  We can't cache effectively in IDM because we
1932 	 * don't know the correct header and data size.
1933 	 *
1934 	 * Valid header length is assumed to be hdrlen and valid data
1935 	 * length is assumed to be datalen.  isp_hdrlen and isp_datalen
1936 	 * can be adjusted after the PDU is returned if necessary.
1937 	 */
1938 	result = kmem_zalloc(sizeof (idm_pdu_t) + hdrlen + datalen, sleepflag);
1939 	if (result != NULL) {
1940 		/* For idm_pdu_free sanity check */
1941 		result->isp_flags |= IDM_PDU_ALLOC;
1942 		/* pointer arithmetic */
1943 		result->isp_hdr = (iscsi_hdr_t *)(result + 1);
1944 		result->isp_hdrlen = hdrlen;
1945 		result->isp_hdrbuflen = hdrlen;
1946 		result->isp_transport_hdrlen = 0;
1947 		if (datalen != 0)
1948 			result->isp_data = (uint8_t *)result->isp_hdr + hdrlen;
1949 		result->isp_datalen = datalen;
1950 		result->isp_databuflen = datalen;
1951 		result->isp_magic = IDM_PDU_MAGIC;
1952 	}
1953 
1954 	return (result);
1955 }
1956 
1957 /*
1958  * Typical idm_pdu_alloc invocation, will block for resources.
1959  */
1960 idm_pdu_t *
1961 idm_pdu_alloc(uint_t hdrlen, uint_t datalen)
1962 {
1963 	return (idm_pdu_alloc_common(hdrlen, datalen, KM_SLEEP));
1964 }
1965 
1966 /*
1967  * Non-blocking idm_pdu_alloc implementation, returns NULL if resources
1968  * are not available.  Needed for transport-layer allocations which may
1969  * be invoking in interrupt context.
1970  */
1971 idm_pdu_t *
1972 idm_pdu_alloc_nosleep(uint_t hdrlen, uint_t datalen)
1973 {
1974 	return (idm_pdu_alloc_common(hdrlen, datalen, KM_NOSLEEP));
1975 }
1976 
1977 /*
1978  * Free a PDU previously allocated with idm_pdu_alloc() including any
1979  * header and data space allocated as part of the original request.
1980  * Additional memory regions referenced by subsequent modification of
1981  * the isp_hdr and/or isp_data fields will not be freed.
1982  */
1983 void
1984 idm_pdu_free(idm_pdu_t *pdu)
1985 {
1986 	/* Make sure the structure was allocated using idm_pdu_alloc() */
1987 	ASSERT(pdu->isp_flags & IDM_PDU_ALLOC);
1988 	kmem_free(pdu,
1989 	    sizeof (idm_pdu_t) + pdu->isp_hdrbuflen + pdu->isp_databuflen);
1990 }
1991 
1992 /*
1993  * Initialize the connection, private and callback fields in a PDU.
1994  */
1995 void
1996 idm_pdu_init(idm_pdu_t *pdu, idm_conn_t *ic, void *private, idm_pdu_cb_t *cb)
1997 {
1998 	/*
1999 	 * idm_pdu_complete() will call idm_pdu_free if the callback is
2000 	 * NULL.  This will only work if the PDU was originally allocated
2001 	 * with idm_pdu_alloc().
2002 	 */
2003 	ASSERT((pdu->isp_flags & IDM_PDU_ALLOC) ||
2004 	    (cb != NULL));
2005 	pdu->isp_magic = IDM_PDU_MAGIC;
2006 	pdu->isp_ic = ic;
2007 	pdu->isp_private = private;
2008 	pdu->isp_callback = cb;
2009 }
2010 
2011 /*
2012  * Initialize the header and header length field.  This function should
2013  * not be used to adjust the header length in a buffer allocated via
2014  * pdu_pdu_alloc since it overwrites the existing header pointer.
2015  */
2016 void
2017 idm_pdu_init_hdr(idm_pdu_t *pdu, uint8_t *hdr, uint_t hdrlen)
2018 {
2019 	pdu->isp_hdr = (iscsi_hdr_t *)((void *)hdr);
2020 	pdu->isp_hdrlen = hdrlen;
2021 }
2022 
2023 /*
2024  * Initialize the data and data length fields.  This function should
2025  * not be used to adjust the data length of a buffer allocated via
2026  * idm_pdu_alloc since it overwrites the existing data pointer.
2027  */
2028 void
2029 idm_pdu_init_data(idm_pdu_t *pdu, uint8_t *data, uint_t datalen)
2030 {
2031 	pdu->isp_data = data;
2032 	pdu->isp_datalen = datalen;
2033 }
2034 
2035 void
2036 idm_pdu_complete(idm_pdu_t *pdu, idm_status_t status)
2037 {
2038 	if (pdu->isp_callback) {
2039 		pdu->isp_status = status;
2040 		(*pdu->isp_callback)(pdu, status);
2041 	} else {
2042 		idm_pdu_free(pdu);
2043 	}
2044 }
2045 
2046 /*
2047  * State machine auditing
2048  */
2049 
2050 void
2051 idm_sm_audit_init(sm_audit_buf_t *audit_buf)
2052 {
2053 	bzero(audit_buf, sizeof (sm_audit_buf_t));
2054 	audit_buf->sab_max_index = SM_AUDIT_BUF_MAX_REC - 1;
2055 }
2056 
2057 static
2058 sm_audit_record_t *
2059 idm_sm_audit_common(sm_audit_buf_t *audit_buf, sm_audit_record_type_t r_type,
2060     sm_audit_sm_type_t sm_type,
2061     int current_state)
2062 {
2063 	sm_audit_record_t *sar;
2064 
2065 	sar = audit_buf->sab_records;
2066 	sar += audit_buf->sab_index;
2067 	audit_buf->sab_index++;
2068 	audit_buf->sab_index &= audit_buf->sab_max_index;
2069 
2070 	sar->sar_type = r_type;
2071 	gethrestime(&sar->sar_timestamp);
2072 	sar->sar_sm_type = sm_type;
2073 	sar->sar_state = current_state;
2074 
2075 	return (sar);
2076 }
2077 
2078 void
2079 idm_sm_audit_event(sm_audit_buf_t *audit_buf,
2080     sm_audit_sm_type_t sm_type, int current_state,
2081     int event, uintptr_t event_info)
2082 {
2083 	sm_audit_record_t *sar;
2084 
2085 	sar = idm_sm_audit_common(audit_buf, SAR_STATE_EVENT,
2086 	    sm_type, current_state);
2087 	sar->sar_event = event;
2088 	sar->sar_event_info = event_info;
2089 }
2090 
2091 void
2092 idm_sm_audit_state_change(sm_audit_buf_t *audit_buf,
2093     sm_audit_sm_type_t sm_type, int current_state, int new_state)
2094 {
2095 	sm_audit_record_t *sar;
2096 
2097 	sar = idm_sm_audit_common(audit_buf, SAR_STATE_CHANGE,
2098 	    sm_type, current_state);
2099 	sar->sar_new_state = new_state;
2100 }
2101 
2102 
2103 /*
2104  * Object reference tracking
2105  */
2106 
2107 void
2108 idm_refcnt_init(idm_refcnt_t *refcnt, void *referenced_obj)
2109 {
2110 	bzero(refcnt, sizeof (*refcnt));
2111 	idm_refcnt_reset(refcnt);
2112 	refcnt->ir_referenced_obj = referenced_obj;
2113 	bzero(&refcnt->ir_audit_buf, sizeof (refcnt_audit_buf_t));
2114 	refcnt->ir_audit_buf.anb_max_index = REFCNT_AUDIT_BUF_MAX_REC - 1;
2115 	mutex_init(&refcnt->ir_mutex, NULL, MUTEX_DEFAULT, NULL);
2116 	cv_init(&refcnt->ir_cv, NULL, CV_DEFAULT, NULL);
2117 }
2118 
2119 void
2120 idm_refcnt_destroy(idm_refcnt_t *refcnt)
2121 {
2122 	/*
2123 	 * Grab the mutex to there are no other lingering threads holding
2124 	 * the mutex before we destroy it (e.g. idm_refcnt_rele just after
2125 	 * the refcnt goes to zero if ir_waiting == REF_WAIT_ASYNC)
2126 	 */
2127 	mutex_enter(&refcnt->ir_mutex);
2128 	ASSERT(refcnt->ir_refcnt == 0);
2129 	cv_destroy(&refcnt->ir_cv);
2130 	mutex_destroy(&refcnt->ir_mutex);
2131 }
2132 
2133 void
2134 idm_refcnt_reset(idm_refcnt_t *refcnt)
2135 {
2136 	refcnt->ir_waiting = REF_NOWAIT;
2137 	refcnt->ir_refcnt = 0;
2138 }
2139 
2140 void
2141 idm_refcnt_hold(idm_refcnt_t *refcnt)
2142 {
2143 	/*
2144 	 * Nothing should take a hold on an object after a call to
2145 	 * idm_refcnt_wait_ref or idm_refcnd_async_wait_ref
2146 	 */
2147 	ASSERT(refcnt->ir_waiting == REF_NOWAIT);
2148 
2149 	mutex_enter(&refcnt->ir_mutex);
2150 	refcnt->ir_refcnt++;
2151 	REFCNT_AUDIT(refcnt);
2152 	mutex_exit(&refcnt->ir_mutex);
2153 }
2154 
2155 static void
2156 idm_refcnt_unref_task(void *refcnt_void)
2157 {
2158 	idm_refcnt_t *refcnt = refcnt_void;
2159 
2160 	REFCNT_AUDIT(refcnt);
2161 	(*refcnt->ir_cb)(refcnt->ir_referenced_obj);
2162 }
2163 
2164 void
2165 idm_refcnt_rele(idm_refcnt_t *refcnt)
2166 {
2167 	mutex_enter(&refcnt->ir_mutex);
2168 	ASSERT(refcnt->ir_refcnt > 0);
2169 	refcnt->ir_refcnt--;
2170 	REFCNT_AUDIT(refcnt);
2171 	if (refcnt->ir_waiting == REF_NOWAIT) {
2172 		/* No one is waiting on this object */
2173 		mutex_exit(&refcnt->ir_mutex);
2174 		return;
2175 	}
2176 
2177 	/*
2178 	 * Someone is waiting for this object to go idle so check if
2179 	 * refcnt is 0.  Waiting on an object then later grabbing another
2180 	 * reference is not allowed so we don't need to handle that case.
2181 	 */
2182 	if (refcnt->ir_refcnt == 0) {
2183 		if (refcnt->ir_waiting == REF_WAIT_ASYNC) {
2184 			if (taskq_dispatch(idm.idm_global_taskq,
2185 			    &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
2186 				cmn_err(CE_WARN,
2187 				    "idm_refcnt_rele: Couldn't dispatch task");
2188 			}
2189 		} else if (refcnt->ir_waiting == REF_WAIT_SYNC) {
2190 			cv_signal(&refcnt->ir_cv);
2191 		}
2192 	}
2193 	mutex_exit(&refcnt->ir_mutex);
2194 }
2195 
2196 void
2197 idm_refcnt_rele_and_destroy(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func)
2198 {
2199 	mutex_enter(&refcnt->ir_mutex);
2200 	ASSERT(refcnt->ir_refcnt > 0);
2201 	refcnt->ir_refcnt--;
2202 	REFCNT_AUDIT(refcnt);
2203 
2204 	/*
2205 	 * Someone is waiting for this object to go idle so check if
2206 	 * refcnt is 0.  Waiting on an object then later grabbing another
2207 	 * reference is not allowed so we don't need to handle that case.
2208 	 */
2209 	if (refcnt->ir_refcnt == 0) {
2210 		refcnt->ir_cb = cb_func;
2211 		refcnt->ir_waiting = REF_WAIT_ASYNC;
2212 		if (taskq_dispatch(idm.idm_global_taskq,
2213 		    &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
2214 			cmn_err(CE_WARN,
2215 			    "idm_refcnt_rele: Couldn't dispatch task");
2216 		}
2217 	}
2218 	mutex_exit(&refcnt->ir_mutex);
2219 }
2220 
2221 void
2222 idm_refcnt_wait_ref(idm_refcnt_t *refcnt)
2223 {
2224 	mutex_enter(&refcnt->ir_mutex);
2225 	refcnt->ir_waiting = REF_WAIT_SYNC;
2226 	REFCNT_AUDIT(refcnt);
2227 	while (refcnt->ir_refcnt != 0)
2228 		cv_wait(&refcnt->ir_cv, &refcnt->ir_mutex);
2229 	mutex_exit(&refcnt->ir_mutex);
2230 }
2231 
2232 void
2233 idm_refcnt_async_wait_ref(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func)
2234 {
2235 	mutex_enter(&refcnt->ir_mutex);
2236 	refcnt->ir_waiting = REF_WAIT_ASYNC;
2237 	refcnt->ir_cb = cb_func;
2238 	REFCNT_AUDIT(refcnt);
2239 	/*
2240 	 * It's possible we don't have any references.  To make things easier
2241 	 * on the caller use a taskq to call the callback instead of
2242 	 * calling it synchronously
2243 	 */
2244 	if (refcnt->ir_refcnt == 0) {
2245 		if (taskq_dispatch(idm.idm_global_taskq,
2246 		    &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
2247 			cmn_err(CE_WARN,
2248 			    "idm_refcnt_async_wait_ref: "
2249 			    "Couldn't dispatch task");
2250 		}
2251 	}
2252 	mutex_exit(&refcnt->ir_mutex);
2253 }
2254 
2255 void
2256 idm_refcnt_destroy_unref_obj(idm_refcnt_t *refcnt,
2257     idm_refcnt_cb_t *cb_func)
2258 {
2259 	mutex_enter(&refcnt->ir_mutex);
2260 	if (refcnt->ir_refcnt == 0) {
2261 		mutex_exit(&refcnt->ir_mutex);
2262 		(*cb_func)(refcnt->ir_referenced_obj);
2263 		return;
2264 	}
2265 	mutex_exit(&refcnt->ir_mutex);
2266 }
2267 
2268 void
2269 idm_conn_hold(idm_conn_t *ic)
2270 {
2271 	idm_refcnt_hold(&ic->ic_refcnt);
2272 }
2273 
2274 void
2275 idm_conn_rele(idm_conn_t *ic)
2276 {
2277 	idm_refcnt_rele(&ic->ic_refcnt);
2278 }
2279 
2280 void
2281 idm_conn_set_target_name(idm_conn_t *ic, char *target_name)
2282 {
2283 	(void) strlcpy(ic->ic_target_name, target_name, ISCSI_MAX_NAME_LEN + 1);
2284 }
2285 
2286 void
2287 idm_conn_set_initiator_name(idm_conn_t *ic, char *initiator_name)
2288 {
2289 	(void) strlcpy(ic->ic_initiator_name, initiator_name,
2290 	    ISCSI_MAX_NAME_LEN + 1);
2291 }
2292 
2293 void
2294 idm_conn_set_isid(idm_conn_t *ic, uint8_t isid[ISCSI_ISID_LEN])
2295 {
2296 	(void) snprintf(ic->ic_isid, ISCSI_MAX_ISID_LEN + 1,
2297 	    "%02x%02x%02x%02x%02x%02x",
2298 	    isid[0], isid[1], isid[2], isid[3], isid[4], isid[5]);
2299 }
2300 
2301 static int
2302 _idm_init(void)
2303 {
2304 	/* Initialize the rwlock for the taskid table */
2305 	rw_init(&idm.idm_taskid_table_lock, NULL, RW_DRIVER, NULL);
2306 
2307 	/* Initialize the global mutex and taskq */
2308 	mutex_init(&idm.idm_global_mutex, NULL, MUTEX_DEFAULT, NULL);
2309 
2310 	cv_init(&idm.idm_tgt_svc_cv, NULL, CV_DEFAULT, NULL);
2311 	cv_init(&idm.idm_wd_cv, NULL, CV_DEFAULT, NULL);
2312 
2313 	/*
2314 	 * The maximum allocation needs to be high here since there can be
2315 	 * many concurrent tasks using the global taskq.
2316 	 */
2317 	idm.idm_global_taskq = taskq_create("idm_global_taskq", 1, minclsyspri,
2318 	    128, 16384, TASKQ_PREPOPULATE);
2319 	if (idm.idm_global_taskq == NULL) {
2320 		cv_destroy(&idm.idm_wd_cv);
2321 		cv_destroy(&idm.idm_tgt_svc_cv);
2322 		mutex_destroy(&idm.idm_global_mutex);
2323 		rw_destroy(&idm.idm_taskid_table_lock);
2324 		return (ENOMEM);
2325 	}
2326 
2327 	/* Start watchdog thread */
2328 	idm.idm_wd_thread = thread_create(NULL, 0,
2329 	    idm_wd_thread, NULL, 0, &p0, TS_RUN, minclsyspri);
2330 	if (idm.idm_wd_thread == NULL) {
2331 		/* Couldn't create the watchdog thread */
2332 		taskq_destroy(idm.idm_global_taskq);
2333 		cv_destroy(&idm.idm_wd_cv);
2334 		cv_destroy(&idm.idm_tgt_svc_cv);
2335 		mutex_destroy(&idm.idm_global_mutex);
2336 		rw_destroy(&idm.idm_taskid_table_lock);
2337 		return (ENOMEM);
2338 	}
2339 
2340 	/* Pause until the watchdog thread is running */
2341 	mutex_enter(&idm.idm_global_mutex);
2342 	while (!idm.idm_wd_thread_running)
2343 		cv_wait(&idm.idm_wd_cv, &idm.idm_global_mutex);
2344 	mutex_exit(&idm.idm_global_mutex);
2345 
2346 	/*
2347 	 * Allocate the task ID table and set "next" to 0.
2348 	 */
2349 
2350 	idm.idm_taskid_max = idm_max_taskids;
2351 	idm.idm_taskid_table = (idm_task_t **)
2352 	    kmem_zalloc(idm.idm_taskid_max * sizeof (idm_task_t *), KM_SLEEP);
2353 	idm.idm_taskid_next = 0;
2354 
2355 	/* Create the global buffer and task kmem caches */
2356 	idm.idm_buf_cache = kmem_cache_create("idm_buf_cache",
2357 	    sizeof (idm_buf_t), 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
2358 
2359 	/*
2360 	 * Note, we're explicitly allocating an additional iSER header-
2361 	 * sized chunk for each of these elements. See idm_task_constructor().
2362 	 */
2363 	idm.idm_task_cache = kmem_cache_create("idm_task_cache",
2364 	    sizeof (idm_task_t) + IDM_TRANSPORT_HEADER_LENGTH, 8,
2365 	    &idm_task_constructor, &idm_task_destructor,
2366 	    NULL, NULL, NULL, KM_SLEEP);
2367 
2368 	/* Create the service and connection context lists */
2369 	list_create(&idm.idm_tgt_svc_list, sizeof (idm_svc_t),
2370 	    offsetof(idm_svc_t, is_list_node));
2371 	list_create(&idm.idm_tgt_conn_list, sizeof (idm_conn_t),
2372 	    offsetof(idm_conn_t, ic_list_node));
2373 	list_create(&idm.idm_ini_conn_list, sizeof (idm_conn_t),
2374 	    offsetof(idm_conn_t, ic_list_node));
2375 
2376 	/* Initialize the native sockets transport */
2377 	idm_so_init(&idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS]);
2378 
2379 	/* Create connection ID pool */
2380 	(void) idm_idpool_create(&idm.idm_conn_id_pool);
2381 
2382 	return (DDI_SUCCESS);
2383 }
2384 
2385 static int
2386 _idm_fini(void)
2387 {
2388 	if (!list_is_empty(&idm.idm_ini_conn_list) ||
2389 	    !list_is_empty(&idm.idm_tgt_conn_list) ||
2390 	    !list_is_empty(&idm.idm_tgt_svc_list)) {
2391 		return (EBUSY);
2392 	}
2393 
2394 	mutex_enter(&idm.idm_global_mutex);
2395 	idm.idm_wd_thread_running = B_FALSE;
2396 	cv_signal(&idm.idm_wd_cv);
2397 	mutex_exit(&idm.idm_global_mutex);
2398 
2399 	thread_join(idm.idm_wd_thread_did);
2400 
2401 	idm_idpool_destroy(&idm.idm_conn_id_pool);
2402 
2403 	/* Close any LDI handles we have open on transport drivers */
2404 	mutex_enter(&idm.idm_global_mutex);
2405 	idm_transport_teardown();
2406 	mutex_exit(&idm.idm_global_mutex);
2407 
2408 	/* Teardown the native sockets transport */
2409 	idm_so_fini();
2410 
2411 	list_destroy(&idm.idm_ini_conn_list);
2412 	list_destroy(&idm.idm_tgt_conn_list);
2413 	list_destroy(&idm.idm_tgt_svc_list);
2414 	kmem_cache_destroy(idm.idm_task_cache);
2415 	kmem_cache_destroy(idm.idm_buf_cache);
2416 	kmem_free(idm.idm_taskid_table,
2417 	    idm.idm_taskid_max * sizeof (idm_task_t *));
2418 	mutex_destroy(&idm.idm_global_mutex);
2419 	cv_destroy(&idm.idm_wd_cv);
2420 	cv_destroy(&idm.idm_tgt_svc_cv);
2421 	rw_destroy(&idm.idm_taskid_table_lock);
2422 
2423 	return (0);
2424 }
2425