xref: /titanic_41/usr/src/uts/common/io/idm/idm_so.c (revision 0f1b305ee9e700c825d9e9ad1ea1e4311d212eb2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/conf.h>
27 #include <sys/stat.h>
28 #include <sys/file.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/modctl.h>
32 #include <sys/priv.h>
33 #include <sys/cpuvar.h>
34 #include <sys/socket.h>
35 #include <sys/strsubr.h>
36 #include <sys/sysmacros.h>
37 #include <sys/sdt.h>
38 #include <netinet/tcp.h>
39 #include <inet/tcp.h>
40 #include <sys/socketvar.h>
41 #include <sys/pathname.h>
42 #include <sys/fs/snode.h>
43 #include <sys/fs/dv_node.h>
44 #include <sys/vnode.h>
45 #include <netinet/in.h>
46 #include <net/if.h>
47 #include <sys/sockio.h>
48 #include <sys/ksocket.h>
49 #include <sys/idm/idm.h>
50 #include <sys/idm/idm_so.h>
51 #include <sys/idm/idm_text.h>
52 
53 /*
54  * in6addr_any is currently all zeroes, but use the macro in case this
55  * ever changes.
56  */
57 const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
58 
59 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
60 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
61 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
62 
63 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
64 static void idm_so_conn_destroy_common(idm_conn_t *ic);
65 static void idm_so_conn_connect_common(idm_conn_t *ic);
66 
67 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc);
68 static void idm_set_ini_postconnect_options(idm_so_conn_t *sc);
69 static void idm_set_tgt_connect_options(ksocket_t so);
70 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
71 
72 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
73 static idm_status_t idm_so_send_buf_region(idm_task_t *idt, uint8_t opcode,
74     idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
75 
76 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
77     uint32_t ro, uint32_t dlength);
78 
79 static idm_status_t idm_so_handle_digest(idm_conn_t *it,
80     nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
81 
82 /*
83  * Transport ops prototypes
84  */
85 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
86 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
87 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
88 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
89 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
90 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
91 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
92 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
93     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
94 static idm_status_t idm_so_notice_key_values(idm_conn_t *it,
95     nvlist_t *negotiated_nvl);
96 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
97     idm_transport_caps_t *caps);
98 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
99 static void idm_so_buf_free(idm_buf_t *idb);
100 static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
101 static void idm_so_buf_teardown(idm_buf_t *idb);
102 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
103 static void idm_so_tgt_svc_destroy(idm_svc_t *is);
104 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
105 static void idm_so_tgt_svc_offline(idm_svc_t *is);
106 static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
107 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
108 static void idm_so_conn_disconnect(idm_conn_t *ic);
109 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
110 static void idm_so_ini_conn_destroy(idm_conn_t *ic);
111 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
112 
113 /*
114  * IDM Native Sockets transport operations
115  */
116 static
117 idm_transport_ops_t idm_so_transport_ops = {
118 	idm_so_tx,			/* it_tx_pdu */
119 	idm_so_buf_tx_to_ini,		/* it_buf_tx_to_ini */
120 	idm_so_buf_rx_from_ini,		/* it_buf_rx_from_ini */
121 	idm_so_rx_datain,		/* it_rx_datain */
122 	idm_so_rx_rtt,			/* it_rx_rtt */
123 	idm_so_rx_dataout,		/* it_rx_dataout */
124 	NULL,				/* it_alloc_conn_rsrc */
125 	NULL,				/* it_free_conn_rsrc */
126 	NULL,				/* it_tgt_enable_datamover */
127 	NULL,				/* it_ini_enable_datamover */
128 	NULL,				/* it_conn_terminate */
129 	idm_so_free_task_rsrc,		/* it_free_task_rsrc */
130 	idm_so_negotiate_key_values,	/* it_negotiate_key_values */
131 	idm_so_notice_key_values,	/* it_notice_key_values */
132 	idm_so_conn_is_capable,		/* it_conn_is_capable */
133 	idm_so_buf_alloc,		/* it_buf_alloc */
134 	idm_so_buf_free,		/* it_buf_free */
135 	idm_so_buf_setup,		/* it_buf_setup */
136 	idm_so_buf_teardown,		/* it_buf_teardown */
137 	idm_so_tgt_svc_create,		/* it_tgt_svc_create */
138 	idm_so_tgt_svc_destroy,		/* it_tgt_svc_destroy */
139 	idm_so_tgt_svc_online,		/* it_tgt_svc_online */
140 	idm_so_tgt_svc_offline,		/* it_tgt_svc_offline */
141 	idm_so_tgt_conn_destroy,	/* it_tgt_conn_destroy */
142 	idm_so_tgt_conn_connect,	/* it_tgt_conn_connect */
143 	idm_so_conn_disconnect,		/* it_tgt_conn_disconnect */
144 	idm_so_ini_conn_create,		/* it_ini_conn_create */
145 	idm_so_ini_conn_destroy,	/* it_ini_conn_destroy */
146 	idm_so_ini_conn_connect,	/* it_ini_conn_connect */
147 	idm_so_conn_disconnect		/* it_ini_conn_disconnect */
148 };
149 
150 /*
151  * idm_so_init()
152  * Sockets transport initialization
153  */
154 void
155 idm_so_init(idm_transport_t *it)
156 {
157 	/* Cache for IDM Data and R2T Transmit PDU's */
158 	idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
159 	    sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
160 	    &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
161 
162 	/* Cache for IDM Receive PDU's */
163 	idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
164 	    sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
165 	    &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
166 
167 	/* Set the sockets transport ops */
168 	it->it_ops = &idm_so_transport_ops;
169 }
170 
171 /*
172  * idm_so_fini()
173  * Sockets transport teardown
174  */
175 void
176 idm_so_fini(void)
177 {
178 	kmem_cache_destroy(idm.idm_sotx_pdu_cache);
179 	kmem_cache_destroy(idm.idm_sorx_pdu_cache);
180 }
181 
182 ksocket_t
183 idm_socreate(int domain, int type, int protocol)
184 {
185 	ksocket_t ks;
186 
187 	if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
188 	    CRED())) {
189 		return (ks);
190 	} else {
191 		return (NULL);
192 	}
193 }
194 
195 /*
196  * idm_soshutdown will disconnect the socket and prevent subsequent PDU
197  * reception and transmission.  The sonode still exists but its state
198  * gets modified to indicate it is no longer connected.  Calls to
199  * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
200  * regain control of a thread stuck in idm_sorecv.
201  */
202 void
203 idm_soshutdown(ksocket_t so)
204 {
205 	(void) ksocket_shutdown(so, SHUT_RDWR, CRED());
206 }
207 
208 /*
209  * idm_sodestroy releases all resources associated with a socket previously
210  * created with idm_socreate.  The socket must be shutdown using
211  * idm_soshutdown before the socket is destroyed with idm_sodestroy,
212  * otherwise undefined behavior will result.
213  */
214 void
215 idm_sodestroy(ksocket_t ks)
216 {
217 	(void) ksocket_close(ks, CRED());
218 }
219 
220 /*
221  * IP address filter functions to flag addresses that should not
222  * go out to initiators through discovery.
223  */
224 static boolean_t
225 idm_v4_addr_okay(struct in_addr *in_addr)
226 {
227 	in_addr_t addr = ntohl(in_addr->s_addr);
228 
229 	if ((INADDR_NONE == addr) ||
230 	    (IN_MULTICAST(addr)) ||
231 	    ((addr >> IN_CLASSA_NSHIFT) == 0) ||
232 	    ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
233 		return (B_FALSE);
234 	}
235 	return (B_TRUE);
236 }
237 
238 static boolean_t
239 idm_v6_addr_okay(struct in6_addr *addr6)
240 {
241 
242 	if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
243 	    (IN6_IS_ADDR_LOOPBACK(addr6)) ||
244 	    (IN6_IS_ADDR_MULTICAST(addr6)) ||
245 	    (IN6_IS_ADDR_V4MAPPED(addr6)) ||
246 	    (IN6_IS_ADDR_V4COMPAT(addr6)) ||
247 	    (IN6_IS_ADDR_LINKLOCAL(addr6))) {
248 		return (B_FALSE);
249 	}
250 	return (B_TRUE);
251 }
252 
253 /*
254  * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
255  * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
256  */
257 int
258 idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
259 {
260 	ksocket_t 		so4, so6;
261 	struct lifnum		lifn;
262 	struct lifconf		lifc;
263 	struct lifreq		*lp;
264 	int			rval;
265 	int			numifs;
266 	int			bufsize;
267 	void			*buf;
268 	int			i, j, n, rc;
269 	struct sockaddr_storage	ss;
270 	struct sockaddr_in	*sin;
271 	struct sockaddr_in6	*sin6;
272 	idm_addr_t		*ip;
273 	idm_addr_list_t		*ipaddr;
274 	int			size_ipaddr;
275 
276 	*ipaddr_p = NULL;
277 	size_ipaddr = 0;
278 	buf = NULL;
279 
280 	/* create an ipv4 and ipv6 UDP socket */
281 	if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
282 		return (0);
283 	if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
284 		idm_sodestroy(so6);
285 		return (0);
286 	}
287 
288 
289 retry_count:
290 	/* snapshot the current number of interfaces */
291 	lifn.lifn_family = PF_UNSPEC;
292 	lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
293 	lifn.lifn_count = 0;
294 	/* use vp6 for ioctls with unspecified families by default */
295 	if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
296 	    != 0) {
297 		goto cleanup;
298 	}
299 
300 	numifs = lifn.lifn_count;
301 	if (numifs <= 0) {
302 		goto cleanup;
303 	}
304 
305 	/* allocate extra room in case more interfaces appear */
306 	numifs += 10;
307 
308 	/* get the interface names and ip addresses */
309 	bufsize = numifs * sizeof (struct lifreq);
310 	buf = kmem_alloc(bufsize, KM_SLEEP);
311 
312 	lifc.lifc_family = AF_UNSPEC;
313 	lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
314 	lifc.lifc_len = bufsize;
315 	lifc.lifc_buf = buf;
316 	rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
317 	if (rc != 0) {
318 		goto cleanup;
319 	}
320 	/* if our extra room is used up, try again */
321 	if (bufsize <= lifc.lifc_len) {
322 		kmem_free(buf, bufsize);
323 		buf = NULL;
324 		goto retry_count;
325 	}
326 	/* calc actual number of ifconfs */
327 	n = lifc.lifc_len / sizeof (struct lifreq);
328 
329 	/* get ip address */
330 	if (n > 0) {
331 		size_ipaddr = sizeof (idm_addr_list_t) +
332 		    (n - 1) * sizeof (idm_addr_t);
333 		ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
334 	} else {
335 		goto cleanup;
336 	}
337 
338 	/*
339 	 * Examine the array of interfaces and filter uninteresting ones
340 	 */
341 	for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
342 
343 		/*
344 		 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
345 		 */
346 		ss = lp->lifr_addr;
347 		/*
348 		 * fetch the flags using the socket of the correct family
349 		 */
350 		switch (ss.ss_family) {
351 		case AF_INET:
352 			rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
353 			    &rval, CRED());
354 			break;
355 		case AF_INET6:
356 			rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
357 			    &rval, CRED());
358 			break;
359 		default:
360 			continue;
361 		}
362 		if (rc == 0) {
363 			/*
364 			 * If we got the flags, skip uninteresting
365 			 * interfaces based on flags
366 			 */
367 			if ((lp->lifr_flags & IFF_UP) != IFF_UP)
368 				continue;
369 			if (lp->lifr_flags &
370 			    (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
371 				continue;
372 		}
373 
374 		/* save ip address */
375 		ip = &ipaddr->al_addrs[j];
376 		switch (ss.ss_family) {
377 		case AF_INET:
378 			sin = (struct sockaddr_in *)&ss;
379 			if (!idm_v4_addr_okay(&sin->sin_addr))
380 				continue;
381 			ip->a_addr.i_addr.in4 = sin->sin_addr;
382 			ip->a_addr.i_insize = sizeof (struct in_addr);
383 			break;
384 		case AF_INET6:
385 			sin6 = (struct sockaddr_in6 *)&ss;
386 			if (!idm_v6_addr_okay(&sin6->sin6_addr))
387 				continue;
388 			ip->a_addr.i_addr.in6 = sin6->sin6_addr;
389 			ip->a_addr.i_insize = sizeof (struct in6_addr);
390 			break;
391 		default:
392 			continue;
393 		}
394 		j++;
395 	}
396 
397 	if (j == 0) {
398 		/* no valid ifaddr */
399 		kmem_free(ipaddr, size_ipaddr);
400 		size_ipaddr = 0;
401 		ipaddr = NULL;
402 	} else {
403 		ipaddr->al_out_cnt = j;
404 	}
405 
406 
407 cleanup:
408 	idm_sodestroy(so6);
409 	idm_sodestroy(so4);
410 
411 	if (buf != NULL)
412 		kmem_free(buf, bufsize);
413 
414 	*ipaddr_p = ipaddr;
415 	return (size_ipaddr);
416 }
417 
418 int
419 idm_sorecv(ksocket_t so, void *msg, size_t len)
420 {
421 	iovec_t iov;
422 
423 	ASSERT(so != NULL);
424 	ASSERT(len != 0);
425 
426 	/*
427 	 * Fill in iovec and receive data
428 	 */
429 	iov.iov_base = msg;
430 	iov.iov_len = len;
431 
432 	return (idm_iov_sorecv(so, &iov, 1, len));
433 }
434 
435 /*
436  * idm_sosendto - Sends a buffered data on a non-connected socket.
437  *
438  * This function puts the data provided on the wire by calling sosendmsg.
439  * It will return only when all the data has been sent or if an error
440  * occurs.
441  *
442  * Returns 0 for success, the socket errno value if sosendmsg fails, and
443  * -1 if sosendmsg returns success but uio_resid != 0
444  */
445 int
446 idm_sosendto(ksocket_t so, void *buff, size_t len,
447     struct sockaddr *name, socklen_t namelen)
448 {
449 	struct msghdr		msg;
450 	struct iovec		iov[1];
451 	int			error;
452 	size_t			sent = 0;
453 
454 	iov[0].iov_base	= buff;
455 	iov[0].iov_len	= len;
456 
457 	/* Initialization of the message header. */
458 	bzero(&msg, sizeof (msg));
459 	msg.msg_iov	= iov;
460 	msg.msg_iovlen	= 1;
461 	msg.msg_name	= name;
462 	msg.msg_namelen	= namelen;
463 
464 	if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
465 		/* Data sent */
466 		if (sent == len) {
467 			/* All data sent.  Success. */
468 			return (0);
469 		} else {
470 			/* Not all data was sent.  Failure */
471 			return (-1);
472 		}
473 	}
474 
475 	/* Send failed */
476 	return (error);
477 }
478 
479 /*
480  * idm_iov_sosend - Sends an iovec on a connection.
481  *
482  * This function puts the data provided on the wire by calling sosendmsg.
483  * It will return only when all the data has been sent or if an error
484  * occurs.
485  *
486  * Returns 0 for success, the socket errno value if sosendmsg fails, and
487  * -1 if sosendmsg returns success but uio_resid != 0
488  */
489 int
490 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
491 {
492 	struct msghdr		msg;
493 	int			error;
494 	size_t 			sent = 0;
495 
496 	ASSERT(iop != NULL);
497 
498 	/* Initialization of the message header. */
499 	bzero(&msg, sizeof (msg));
500 	msg.msg_iov	= iop;
501 	msg.msg_iovlen	= iovlen;
502 
503 	if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
504 	    == 0) {
505 		/* Data sent */
506 		if (sent == total_len) {
507 			/* All data sent.  Success. */
508 			return (0);
509 		} else {
510 			/* Not all data was sent.  Failure */
511 			return (-1);
512 		}
513 	}
514 
515 	/* Send failed */
516 	return (error);
517 }
518 
519 /*
520  * idm_iov_sorecv - Receives an iovec from a connection
521  *
522  * This function gets the data asked for from the socket.  It will return
523  * only when all the requested data has been retrieved or if an error
524  * occurs.
525  *
526  * Returns 0 for success, the socket errno value if sorecvmsg fails, and
527  * -1 if sorecvmsg returns success but uio_resid != 0
528  */
529 int
530 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
531 {
532 	struct msghdr		msg;
533 	int			error;
534 	size_t			recv;
535 	int 			flags;
536 
537 	ASSERT(iop != NULL);
538 
539 	/* Initialization of the message header. */
540 	bzero(&msg, sizeof (msg));
541 	msg.msg_iov	= iop;
542 	msg.msg_iovlen	= iovlen;
543 	flags		= MSG_WAITALL;
544 
545 	if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
546 	    == 0) {
547 		/* Received data */
548 		if (recv == total_len) {
549 			/* All requested data received.  Success */
550 			return (0);
551 		} else {
552 			/*
553 			 * Not all data was received.  The connection has
554 			 * probably failed.
555 			 */
556 			return (-1);
557 		}
558 	}
559 
560 	/* Receive failed */
561 	return (error);
562 }
563 
564 static void
565 idm_set_ini_preconnect_options(idm_so_conn_t *sc)
566 {
567 	int	conn_abort = 10000;
568 	int	conn_notify = 2000;
569 	int	abort = 30000;
570 
571 	/* Pre-connect socket options */
572 	(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
573 	    TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
574 	    CRED());
575 	(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
576 	    TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
577 	    CRED());
578 	(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, TCP_ABORT_THRESHOLD,
579 	    (char *)&abort, sizeof (int), CRED());
580 }
581 
582 static void
583 idm_set_ini_postconnect_options(idm_so_conn_t *sc)
584 {
585 	int32_t		rcvbuf = IDM_RCVBUF_SIZE;
586 	int32_t		sndbuf = IDM_SNDBUF_SIZE;
587 	const int	on = 1;
588 
589 	/* Set postconnect options */
590 	(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, TCP_NODELAY,
591 	    (char *)&on, sizeof (int), CRED());
592 	(void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_RCVBUF,
593 	    (char *)&rcvbuf, sizeof (int), CRED());
594 	(void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_SNDBUF,
595 	    (char *)&sndbuf, sizeof (int), CRED());
596 }
597 
598 static void
599 idm_set_tgt_connect_options(ksocket_t ks)
600 {
601 	int32_t		rcvbuf = IDM_RCVBUF_SIZE;
602 	int32_t		sndbuf = IDM_SNDBUF_SIZE;
603 	const int	on = 1;
604 
605 	/* Set connect options */
606 	(void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
607 	    (char *)&rcvbuf, sizeof (int), CRED());
608 	(void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
609 	    (char *)&sndbuf, sizeof (int), CRED());
610 	(void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
611 	    (char *)&on, sizeof (on), CRED());
612 }
613 
614 static uint32_t
615 n2h24(const uchar_t *ptr)
616 {
617 	return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
618 }
619 
620 
621 static idm_status_t
622 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
623 {
624 	iscsi_hdr_t	*bhs;
625 	uint32_t	hdr_digest_crc;
626 	uint32_t	crc_calculated;
627 	void		*new_hdr;
628 	int		ahslen = 0;
629 	int		total_len = 0;
630 	int		iovlen = 0;
631 	struct iovec	iov[2];
632 	idm_so_conn_t	*so_conn;
633 	int		rc;
634 
635 	so_conn = ic->ic_transport_private;
636 
637 	/*
638 	 * Read BHS
639 	 */
640 	bhs = pdu->isp_hdr;
641 	rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
642 	if (rc != IDM_STATUS_SUCCESS) {
643 		return (IDM_STATUS_FAIL);
644 	}
645 
646 	/*
647 	 * Check actual AHS length against the amount available in the buffer
648 	 */
649 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
650 	    (bhs->hlength * sizeof (uint32_t));
651 	pdu->isp_datalen = n2h24(bhs->dlength);
652 	if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
653 		/* Allocate a new header segment and change the callback */
654 		new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
655 		bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
656 		pdu->isp_hdr = new_hdr;
657 		pdu->isp_flags |= IDM_PDU_ADDL_HDR;
658 
659 		/*
660 		 * This callback will restore the expected values after
661 		 * the RX PDU has been processed.
662 		 */
663 		pdu->isp_callback = idm_sorx_addl_pdu_cb;
664 	}
665 
666 	/*
667 	 * Setup receipt of additional header and header digest (if enabled).
668 	 */
669 	if (bhs->hlength > 0) {
670 		iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
671 		ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
672 		iov[iovlen].iov_len = ahslen;
673 		total_len += iov[iovlen].iov_len;
674 		iovlen++;
675 	}
676 
677 	if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
678 		iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
679 		iov[iovlen].iov_len = sizeof (hdr_digest_crc);
680 		total_len += iov[iovlen].iov_len;
681 		iovlen++;
682 	}
683 
684 	if ((iovlen != 0) &&
685 	    (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
686 	    total_len) != 0)) {
687 		return (IDM_STATUS_FAIL);
688 	}
689 
690 	/*
691 	 * Validate header digest if enabled
692 	 */
693 	if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
694 		crc_calculated = idm_crc32c(pdu->isp_hdr,
695 		    sizeof (iscsi_hdr_t) + ahslen);
696 		if (crc_calculated != hdr_digest_crc) {
697 			/* Invalid Header Digest */
698 			return (IDM_STATUS_HEADER_DIGEST);
699 		}
700 	}
701 
702 	return (0);
703 }
704 
705 /*
706  * idm_so_ini_conn_create()
707  * Allocate the sockets transport connection resources.
708  */
709 static idm_status_t
710 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
711 {
712 	ksocket_t	so;
713 	idm_so_conn_t	*so_conn;
714 	idm_status_t	idmrc;
715 
716 	so = idm_socreate(cr->cr_domain, cr->cr_type,
717 	    cr->cr_protocol);
718 	if (so == NULL) {
719 		return (IDM_STATUS_FAIL);
720 	}
721 
722 	/* Bind the socket if configured to do so */
723 	if (cr->cr_bound) {
724 		if (ksocket_bind(so, &cr->cr_bound_addr.sin,
725 		    SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
726 			idm_sodestroy(so);
727 			return (IDM_STATUS_FAIL);
728 		}
729 	}
730 
731 	idmrc = idm_so_conn_create_common(ic, so);
732 	if (idmrc != IDM_STATUS_SUCCESS) {
733 		idm_soshutdown(so);
734 		idm_sodestroy(so);
735 		return (IDM_STATUS_FAIL);
736 	}
737 
738 	so_conn = ic->ic_transport_private;
739 	/* Set up socket options */
740 	idm_set_ini_preconnect_options(so_conn);
741 
742 	return (IDM_STATUS_SUCCESS);
743 }
744 
745 /*
746  * idm_so_ini_conn_destroy()
747  * Tear down the sockets transport connection resources.
748  */
749 static void
750 idm_so_ini_conn_destroy(idm_conn_t *ic)
751 {
752 	idm_so_conn_destroy_common(ic);
753 }
754 
755 /*
756  * idm_so_ini_conn_connect()
757  * Establish the connection referred to by the handle previously allocated via
758  * idm_so_ini_conn_create().
759  */
760 static idm_status_t
761 idm_so_ini_conn_connect(idm_conn_t *ic)
762 {
763 	idm_so_conn_t	*so_conn;
764 
765 	so_conn = ic->ic_transport_private;
766 
767 	if (ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
768 	    (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED()) != 0) {
769 		idm_soshutdown(so_conn->ic_so);
770 		return (IDM_STATUS_FAIL);
771 	}
772 
773 	idm_so_conn_connect_common(ic);
774 
775 	idm_set_ini_postconnect_options(so_conn);
776 
777 	return (IDM_STATUS_SUCCESS);
778 }
779 
780 idm_status_t
781 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
782 {
783 	idm_status_t	idmrc;
784 
785 	idmrc = idm_so_conn_create_common(ic, new_so);
786 
787 	return (idmrc);
788 }
789 
790 static void
791 idm_so_tgt_conn_destroy(idm_conn_t *ic)
792 {
793 	idm_so_conn_destroy_common(ic);
794 }
795 
796 /*
797  * idm_so_tgt_conn_connect()
798  * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
799  * is invoked from the SM as a result of an inbound connection request.
800  */
801 static idm_status_t
802 idm_so_tgt_conn_connect(idm_conn_t *ic)
803 {
804 	idm_so_conn_connect_common(ic);
805 
806 	return (IDM_STATUS_SUCCESS);
807 }
808 
809 static idm_status_t
810 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
811 {
812 	idm_so_conn_t	*so_conn;
813 
814 	so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
815 	so_conn->ic_so = new_so;
816 
817 	ic->ic_transport_private = so_conn;
818 	ic->ic_transport_hdrlen = 0;
819 
820 	/* Set the scoreboarding flag on this connection */
821 	ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
822 
823 	/*
824 	 * Initialize tx thread mutex and list
825 	 */
826 	mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
827 	cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
828 	list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
829 	    offsetof(idm_pdu_t, idm_tx_link));
830 
831 	return (IDM_STATUS_SUCCESS);
832 }
833 
834 static void
835 idm_so_conn_destroy_common(idm_conn_t *ic)
836 {
837 	idm_so_conn_t	*so_conn = ic->ic_transport_private;
838 
839 	ic->ic_transport_private = NULL;
840 	idm_sodestroy(so_conn->ic_so);
841 	list_destroy(&so_conn->ic_tx_list);
842 	mutex_destroy(&so_conn->ic_tx_mutex);
843 	cv_destroy(&so_conn->ic_tx_cv);
844 
845 	kmem_free(so_conn, sizeof (idm_so_conn_t));
846 }
847 
848 static void
849 idm_so_conn_connect_common(idm_conn_t *ic)
850 {
851 	idm_so_conn_t	*so_conn;
852 	struct sockaddr_in6	t_addr;
853 	socklen_t	t_addrlen = 0;
854 
855 	so_conn = ic->ic_transport_private;
856 	bzero(&t_addr, sizeof (struct sockaddr_in6));
857 	t_addrlen = sizeof (struct sockaddr_in6);
858 
859 	/* Set the local and remote addresses in the idm conn handle */
860 	ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
861 	    &t_addrlen, CRED());
862 	bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
863 	ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
864 	    &t_addrlen, CRED());
865 	bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
866 
867 	mutex_enter(&ic->ic_mutex);
868 	so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
869 	    &p0, TS_RUN, minclsyspri);
870 	so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
871 	    &p0, TS_RUN, minclsyspri);
872 
873 	while (!so_conn->ic_rx_thread_running || !so_conn->ic_tx_thread_running)
874 		cv_wait(&ic->ic_cv, &ic->ic_mutex);
875 	mutex_exit(&ic->ic_mutex);
876 }
877 
878 /*
879  * idm_so_conn_disconnect()
880  * Shutdown the socket connection and stop the thread
881  */
882 static void
883 idm_so_conn_disconnect(idm_conn_t *ic)
884 {
885 	idm_so_conn_t	*so_conn;
886 
887 	so_conn = ic->ic_transport_private;
888 
889 	mutex_enter(&ic->ic_mutex);
890 	so_conn->ic_rx_thread_running = B_FALSE;
891 	so_conn->ic_tx_thread_running = B_FALSE;
892 	/* We need to wakeup the TX thread */
893 	mutex_enter(&so_conn->ic_tx_mutex);
894 	cv_signal(&so_conn->ic_tx_cv);
895 	mutex_exit(&so_conn->ic_tx_mutex);
896 	mutex_exit(&ic->ic_mutex);
897 
898 	/* This should wakeup the RX thread if it is sleeping */
899 	idm_soshutdown(so_conn->ic_so);
900 
901 	thread_join(so_conn->ic_tx_thread_did);
902 	thread_join(so_conn->ic_rx_thread_did);
903 }
904 
905 /*
906  * idm_so_tgt_svc_create()
907  * Establish a service on an IP address and port.  idm_svc_req_t contains
908  * the service parameters.
909  */
910 /*ARGSUSED*/
911 static idm_status_t
912 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
913 {
914 	idm_so_svc_t		*so_svc;
915 
916 	so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
917 
918 	/* Set the new sockets service in svc handle */
919 	is->is_so_svc = (void *)so_svc;
920 
921 	return (IDM_STATUS_SUCCESS);
922 }
923 
924 /*
925  * idm_so_tgt_svc_destroy()
926  * Teardown sockets resources allocated in idm_so_tgt_svc_create()
927  */
928 static void
929 idm_so_tgt_svc_destroy(idm_svc_t *is)
930 {
931 	/* the socket will have been torn down; free the service */
932 	kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
933 }
934 
935 /*
936  * idm_so_tgt_svc_online()
937  * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
938  */
939 
940 static idm_status_t
941 idm_so_tgt_svc_online(idm_svc_t *is)
942 {
943 	idm_so_svc_t		*so_svc;
944 	idm_svc_req_t		*sr = &is->is_svc_req;
945 	struct sockaddr_in6	sin6_ip;
946 	const uint32_t		on = 1;
947 	const uint32_t		off = 0;
948 
949 	mutex_enter(&is->is_mutex);
950 	so_svc = (idm_so_svc_t *)is->is_so_svc;
951 
952 	/*
953 	 * Try creating an IPv6 socket first
954 	 */
955 	if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
956 		mutex_exit(&is->is_mutex);
957 		return (IDM_STATUS_FAIL);
958 	} else {
959 		bzero(&sin6_ip, sizeof (sin6_ip));
960 		sin6_ip.sin6_family = AF_INET6;
961 		sin6_ip.sin6_port = htons(sr->sr_port);
962 		sin6_ip.sin6_addr = in6addr_any;
963 
964 		(void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
965 		    SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
966 		/*
967 		 * Turn off SO_MAC_EXEMPT so future sobinds succeed
968 		 */
969 		(void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
970 		    SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED());
971 
972 		if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
973 		    sizeof (sin6_ip), CRED()) != 0) {
974 			mutex_exit(&is->is_mutex);
975 			idm_sodestroy(so_svc->is_so);
976 			return (IDM_STATUS_FAIL);
977 		}
978 	}
979 
980 	idm_set_tgt_connect_options(so_svc->is_so);
981 
982 	if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
983 		mutex_exit(&is->is_mutex);
984 		idm_soshutdown(so_svc->is_so);
985 		idm_sodestroy(so_svc->is_so);
986 		return (IDM_STATUS_FAIL);
987 	}
988 
989 	/* Launch a watch thread */
990 	so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
991 	    is, 0, &p0, TS_RUN, minclsyspri);
992 
993 	if (so_svc->is_thread == NULL) {
994 		/* Failure to launch; teardown the socket */
995 		mutex_exit(&is->is_mutex);
996 		idm_soshutdown(so_svc->is_so);
997 		idm_sodestroy(so_svc->is_so);
998 		return (IDM_STATUS_FAIL);
999 	}
1000 	ksocket_hold(so_svc->is_so);
1001 	/* Wait for the port watcher thread to start */
1002 	while (!so_svc->is_thread_running)
1003 		cv_wait(&is->is_cv, &is->is_mutex);
1004 	mutex_exit(&is->is_mutex);
1005 
1006 	return (IDM_STATUS_SUCCESS);
1007 }
1008 
1009 /*
1010  * idm_so_tgt_svc_offline
1011  *
1012  * Stop listening on the IP address and port identified by idm_svc_t.
1013  */
1014 static void
1015 idm_so_tgt_svc_offline(idm_svc_t *is)
1016 {
1017 	idm_so_svc_t		*so_svc;
1018 	mutex_enter(&is->is_mutex);
1019 	so_svc = (idm_so_svc_t *)is->is_so_svc;
1020 	so_svc->is_thread_running = B_FALSE;
1021 	mutex_exit(&is->is_mutex);
1022 
1023 	/*
1024 	 * Teardown socket
1025 	 */
1026 	idm_sodestroy(so_svc->is_so);
1027 
1028 	/*
1029 	 * Now we expect the port watcher thread to terminate
1030 	 */
1031 	thread_join(so_svc->is_thread_did);
1032 }
1033 
1034 /*
1035  * Watch thread for target service connection establishment.
1036  */
1037 void
1038 idm_so_svc_port_watcher(void *arg)
1039 {
1040 	idm_svc_t		*svc = arg;
1041 	ksocket_t		new_so;
1042 	idm_conn_t		*ic;
1043 	idm_status_t		idmrc;
1044 	idm_so_svc_t		*so_svc;
1045 	int			rc;
1046 	const uint32_t		off = 0;
1047 	struct sockaddr_in6 	t_addr;
1048 	socklen_t		t_addrlen;
1049 
1050 	bzero(&t_addr, sizeof (struct sockaddr_in6));
1051 	t_addrlen = sizeof (struct sockaddr_in6);
1052 	mutex_enter(&svc->is_mutex);
1053 
1054 	so_svc = svc->is_so_svc;
1055 	so_svc->is_thread_running = B_TRUE;
1056 	so_svc->is_thread_did = so_svc->is_thread->t_did;
1057 
1058 	cv_signal(&svc->is_cv);
1059 
1060 	IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
1061 	    svc->is_svc_req.sr_port);
1062 
1063 	while (so_svc->is_thread_running) {
1064 		mutex_exit(&svc->is_mutex);
1065 
1066 		if ((rc = ksocket_accept(so_svc->is_so,
1067 		    (struct sockaddr *)&t_addr, &t_addrlen,
1068 		    &new_so, CRED())) != 0) {
1069 			mutex_enter(&svc->is_mutex);
1070 			if (rc == ECONNABORTED)
1071 				continue;
1072 			/* Connection problem */
1073 			break;
1074 		}
1075 		/*
1076 		 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1077 		 */
1078 		(void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
1079 		    (char *)&off, sizeof (off), CRED());
1080 
1081 		idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
1082 		    &ic);
1083 		if (idmrc != IDM_STATUS_SUCCESS) {
1084 			/* Drop connection */
1085 			idm_soshutdown(new_so);
1086 			idm_sodestroy(new_so);
1087 			mutex_enter(&svc->is_mutex);
1088 			continue;
1089 		}
1090 
1091 		idmrc = idm_so_tgt_conn_create(ic, new_so);
1092 		if (idmrc != IDM_STATUS_SUCCESS) {
1093 			idm_svc_conn_destroy(ic);
1094 			idm_soshutdown(new_so);
1095 			idm_sodestroy(new_so);
1096 			mutex_enter(&svc->is_mutex);
1097 			continue;
1098 		}
1099 
1100 		/*
1101 		 * Kick the state machine.  At CS_S3_XPT_UP the state machine
1102 		 * will notify the client (target) about the new connection.
1103 		 */
1104 		idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
1105 
1106 		mutex_enter(&svc->is_mutex);
1107 	}
1108 	ksocket_rele(so_svc->is_so);
1109 	so_svc->is_thread_running = B_FALSE;
1110 	mutex_exit(&svc->is_mutex);
1111 
1112 	IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
1113 	    svc->is_svc_req.sr_port);
1114 
1115 	thread_exit();
1116 }
1117 
1118 /*
1119  * idm_so_free_task_rsrc() stops any ongoing processing of the task and
1120  * frees resources associated with the task.
1121  *
1122  * It's not clear that this should return idm_status_t.  What do we do
1123  * if it fails?
1124  */
1125 static idm_status_t
1126 idm_so_free_task_rsrc(idm_task_t *idt)
1127 {
1128 	idm_buf_t	*idb;
1129 
1130 	/*
1131 	 * If this is a target connection, call idm_buf_rx_from_ini_done for
1132 	 * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
1133 	 *
1134 	 * In addition, remove any buffers associated with this task from
1135 	 * the ic_tx_list.  We'll do this by walking the idt_inbufv list, but
1136 	 * items don't actually get removed from that list (and completion
1137 	 * routines called) until idm_task_cleanup.
1138 	 */
1139 	mutex_enter(&idt->idt_mutex);
1140 
1141 	for (idb = list_head(&idt->idt_outbufv); idb != NULL;
1142 	    idb = list_next(&idt->idt_outbufv, idb)) {
1143 		if (idb->idb_in_transport) {
1144 			/*
1145 			 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1146 			 */
1147 			idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1148 			mutex_enter(&idt->idt_mutex);
1149 		}
1150 	}
1151 
1152 	for (idb = list_head(&idt->idt_inbufv); idb != NULL;
1153 	    idb = list_next(&idt->idt_inbufv, idb)) {
1154 		/*
1155 		 * We want to remove these items from the tx_list as well,
1156 		 * but knowing it's in the idt_inbufv list is not a guarantee
1157 		 * that it's in the tx_list.  If it's on the tx list then
1158 		 * let idm_sotx_thread() clean it up.
1159 		 */
1160 		if (idb->idb_in_transport && !idb->idb_tx_thread) {
1161 			/*
1162 			 * idm_buf_tx_to_ini_done releases idt->idt_mutex
1163 			 */
1164 			idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
1165 			mutex_enter(&idt->idt_mutex);
1166 		}
1167 	}
1168 
1169 	mutex_exit(&idt->idt_mutex);
1170 
1171 	return (IDM_STATUS_SUCCESS);
1172 }
1173 
1174 /*
1175  * idm_so_negotiate_key_values() validates the key values for this connection
1176  */
1177 /* ARGSUSED */
1178 static kv_status_t
1179 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
1180     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
1181 {
1182 	/* All parameters are negotiated at the iscsit level */
1183 	return (KV_HANDLED);
1184 }
1185 
1186 /*
1187  * idm_so_notice_key_values() activates the negotiated key values for
1188  * this connection.
1189  */
1190 static idm_status_t
1191 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
1192 {
1193 	char			*nvp_name;
1194 	nvpair_t		*nvp;
1195 	nvpair_t		*next_nvp;
1196 	int			nvrc;
1197 	idm_status_t		idm_status;
1198 	const idm_kv_xlate_t	*ikvx;
1199 
1200 	for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
1201 	    nvp != NULL; nvp = next_nvp) {
1202 		next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
1203 		nvp_name = nvpair_name(nvp);
1204 
1205 		ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1206 		switch (ikvx->ik_key_id) {
1207 		case KI_HEADER_DIGEST:
1208 		case KI_DATA_DIGEST:
1209 			idm_status = idm_so_handle_digest(it, nvp, ikvx);
1210 			ASSERT(idm_status == 0);
1211 
1212 			/* Remove processed item from negotiated_nvl list */
1213 			nvrc = nvlist_remove_all(
1214 			    negotiated_nvl, ikvx->ik_key_name);
1215 			ASSERT(nvrc == 0);
1216 			break;
1217 		default:
1218 			break;
1219 		}
1220 	}
1221 	return (IDM_STATUS_SUCCESS);
1222 }
1223 
1224 
1225 static idm_status_t
1226 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
1227     const idm_kv_xlate_t *ikvx)
1228 {
1229 	int			nvrc;
1230 	char			*digest_choice_string;
1231 
1232 	nvrc = nvpair_value_string(digest_choice,
1233 	    &digest_choice_string);
1234 	ASSERT(nvrc == 0);
1235 	if (strcasecmp(digest_choice_string, "crc32c") == 0) {
1236 		switch (ikvx->ik_key_id) {
1237 		case KI_HEADER_DIGEST:
1238 			it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
1239 			break;
1240 		case KI_DATA_DIGEST:
1241 			it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
1242 			break;
1243 		default:
1244 			ASSERT(0);
1245 			break;
1246 		}
1247 	} else if (strcasecmp(digest_choice_string, "none") == 0) {
1248 		switch (ikvx->ik_key_id) {
1249 		case KI_HEADER_DIGEST:
1250 			it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
1251 			break;
1252 		case KI_DATA_DIGEST:
1253 			it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
1254 			break;
1255 		default:
1256 			ASSERT(0);
1257 			break;
1258 		}
1259 	} else {
1260 		ASSERT(0);
1261 	}
1262 
1263 	return (IDM_STATUS_SUCCESS);
1264 }
1265 
1266 
1267 /*
1268  * idm_so_conn_is_capable() verifies that the passed connection is provided
1269  * for by the sockets interface.
1270  */
1271 /* ARGSUSED */
1272 static boolean_t
1273 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
1274 {
1275 	return (B_TRUE);
1276 }
1277 
1278 /*
1279  * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
1280  * idm_sorecv_scsidata() function invoked earlier actually reads the data
1281  * off the socket into the appropriate buffers.
1282  */
1283 static void
1284 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
1285 {
1286 	iscsi_data_hdr_t	*bhs;
1287 	idm_task_t		*idt;
1288 	idm_buf_t		*idb;
1289 	uint32_t		datasn;
1290 	size_t			offset;
1291 	iscsi_hdr_t		*ihp = (iscsi_hdr_t *)pdu->isp_hdr;
1292 	iscsi_data_rsp_hdr_t    *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
1293 
1294 	ASSERT(ic != NULL);
1295 	ASSERT(pdu != NULL);
1296 
1297 	bhs	= (iscsi_data_hdr_t *)pdu->isp_hdr;
1298 	datasn	= ntohl(bhs->datasn);
1299 	offset	= ntohl(bhs->offset);
1300 
1301 	ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA_RSP);
1302 
1303 	/*
1304 	 * Look up the task corresponding to the initiator task tag
1305 	 * to get the buffers affiliated with the task.
1306 	 */
1307 	idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1308 	if (idt == NULL) {
1309 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
1310 		idm_pdu_rx_protocol_error(ic, pdu);
1311 		return;
1312 	}
1313 
1314 	idb = pdu->isp_sorx_buf;
1315 	if (idb == NULL) {
1316 		IDM_CONN_LOG(CE_WARN,
1317 		    "idm_so_rx_datain: failed to find buffer");
1318 		idm_task_rele(idt);
1319 		idm_pdu_rx_protocol_error(ic, pdu);
1320 		return;
1321 	}
1322 
1323 	/*
1324 	 * DataSN values should be sequential and should not have any gaps or
1325 	 * repetitions. Check the DataSN with the one stored in the task.
1326 	 */
1327 	if (datasn == idt->idt_exp_datasn) {
1328 		idt->idt_exp_datasn++; /* keep track of DataSN received */
1329 	} else {
1330 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
1331 		idm_task_rele(idt);
1332 		idm_pdu_rx_protocol_error(ic, pdu);
1333 		return;
1334 	}
1335 	idm_task_rele(idt);
1336 
1337 	/*
1338 	 * PDUs in a sequence should be in continuously increasing
1339 	 * address offset
1340 	 */
1341 	if (offset != idb->idb_exp_offset) {
1342 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
1343 		idm_pdu_rx_protocol_error(ic, pdu);
1344 		return;
1345 	}
1346 	/* Expected next relative buffer offset */
1347 	idb->idb_exp_offset += n2h24(bhs->dlength);
1348 
1349 	/*
1350 	 * For now call scsi_rsp which will process the data rsp
1351 	 * Revisit, need to provide an explicit client entry point for
1352 	 * phase collapse completions.
1353 	 */
1354 	if (((ihp->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_SCSI_DATA_RSP) &&
1355 	    (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
1356 		(*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
1357 	}
1358 
1359 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1360 }
1361 
1362 /*
1363  * The idm_so_rx_dataout() function is used by the iSCSI target to read
1364  * data from the Data-Out PDU sent by the iSCSI initiator.
1365  *
1366  * This function gets the Initiator Task Tag from the PDU BHS and looks up the
1367  * task to get the buffers associated with the PDU. A PDU might span buffers.
1368  * The data is then read into the respective buffer.
1369  */
1370 static void
1371 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
1372 {
1373 
1374 	iscsi_data_hdr_t	*bhs;
1375 	idm_task_t		*idt;
1376 	idm_buf_t		*idb;
1377 	size_t			offset;
1378 
1379 	ASSERT(ic != NULL);
1380 	ASSERT(pdu != NULL);
1381 
1382 	bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1383 	offset = ntohl(bhs->offset);
1384 	ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA);
1385 
1386 	/*
1387 	 * Look up the task corresponding to the initiator task tag
1388 	 * to get the buffers affiliated with the task.
1389 	 */
1390 	idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1391 	if (idt == NULL) {
1392 		IDM_CONN_LOG(CE_WARN,
1393 		    "idm_so_rx_dataout: failed to find task");
1394 		idm_pdu_rx_protocol_error(ic, pdu);
1395 		return;
1396 	}
1397 
1398 	idb = pdu->isp_sorx_buf;
1399 	if (idb == NULL) {
1400 		IDM_CONN_LOG(CE_WARN,
1401 		    "idm_so_rx_dataout: failed to find buffer");
1402 		idm_task_rele(idt);
1403 		idm_pdu_rx_protocol_error(ic, pdu);
1404 		return;
1405 	}
1406 
1407 	/* Keep track of data transferred - check data offsets */
1408 	if (offset != idb->idb_exp_offset) {
1409 		IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
1410 		    "%ld, %d", offset, idb->idb_exp_offset);
1411 		idm_task_rele(idt);
1412 		idm_pdu_rx_protocol_error(ic, pdu);
1413 		return;
1414 	}
1415 	/* Expected next relative offset */
1416 	idb->idb_exp_offset += ntoh24(bhs->dlength);
1417 
1418 	/*
1419 	 * Call the buffer callback when the transfer is complete
1420 	 *
1421 	 * The connection state machine should only abort tasks after
1422 	 * shutting down the connection so we are assured that there
1423 	 * won't be a simultaneous attempt to abort this task at the
1424 	 * same time as we are processing this PDU (due to a connection
1425 	 * state change).
1426 	 */
1427 	if (bhs->flags & ISCSI_FLAG_FINAL) {
1428 		/*
1429 		 * We only want to call idm_buf_rx_from_ini_done once
1430 		 * per transfer.  It's possible that this task has
1431 		 * already been aborted in which case
1432 		 * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
1433 		 * for each buffer with idb_in_transport==B_TRUE.  To
1434 		 * close this window and ensure that this doesn't happen,
1435 		 * we'll clear idb->idb_in_transport now while holding
1436 		 * the task mutex.   This is only really an issue for
1437 		 * SCSI task abort -- if tasks were being aborted because
1438 		 * of a connection state change the state machine would
1439 		 * have already stopped the receive thread.
1440 		 */
1441 		mutex_enter(&idt->idt_mutex);
1442 
1443 		/*
1444 		 * Release the task hold here (obtained in idm_task_find)
1445 		 * because the task may complete synchronously during
1446 		 * idm_buf_rx_from_ini_done.  Since we still have an active
1447 		 * buffer we know there is at least one additional hold on idt.
1448 		 */
1449 		idm_task_rele(idt);
1450 
1451 		/*
1452 		 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1453 		 */
1454 		idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
1455 		idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1456 		return;
1457 	}
1458 
1459 	idm_task_rele(idt);
1460 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1461 }
1462 
1463 /*
1464  * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
1465  * the R2T PDU sent by the iSCSI target indicating that it is ready to
1466  * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
1467  * and looks up the task in the task tree using the itt to get the output
1468  * buffers associated the task. The R2T PDU contains the offset of the
1469  * requested data and the data length. This function then constructs a
1470  * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
1471  * PDU is associated with the R2T by the Target Transfer Tag  (ttt).
1472  */
1473 static void
1474 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
1475 {
1476 	idm_task_t		*idt;
1477 	idm_buf_t		*idb;
1478 	iscsi_rtt_hdr_t		*rtt_hdr;
1479 	uint32_t		data_offset;
1480 
1481 	ASSERT(ic != NULL);
1482 	ASSERT(pdu != NULL);
1483 
1484 	rtt_hdr	= (iscsi_rtt_hdr_t *)pdu->isp_hdr;
1485 	data_offset = ntohl(rtt_hdr->data_offset);
1486 
1487 	idt	= idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
1488 
1489 	if (idt == NULL) {
1490 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
1491 		idm_pdu_rx_protocol_error(ic, pdu);
1492 		return;
1493 	}
1494 
1495 	/* Find the buffer bound to the task by the iSCSI initiator */
1496 	mutex_enter(&idt->idt_mutex);
1497 	idb = idm_buf_find(&idt->idt_outbufv, data_offset);
1498 	idt->idt_r2t_ttt = rtt_hdr->ttt;
1499 	/* reset to zero */
1500 	idt->idt_exp_datasn = 0;
1501 	if (idb == NULL) {
1502 		mutex_exit(&idt->idt_mutex);
1503 		idm_task_rele(idt);
1504 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
1505 		idm_pdu_rx_protocol_error(ic, pdu);
1506 		return;
1507 	}
1508 
1509 	(void) idm_so_send_buf_region(idt, ISCSI_OP_SCSI_DATA, idb,
1510 	    data_offset, ntohl(rtt_hdr->data_length));
1511 	mutex_exit(&idt->idt_mutex);
1512 
1513 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1514 	idm_task_rele(idt);
1515 
1516 }
1517 
1518 idm_status_t
1519 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
1520 {
1521 	uint8_t		pad[ISCSI_PAD_WORD_LEN];
1522 	int		pad_len;
1523 	uint32_t	data_digest_crc;
1524 	uint32_t	crc_calculated;
1525 	int		total_len;
1526 	idm_so_conn_t	*so_conn;
1527 
1528 	so_conn = ic->ic_transport_private;
1529 
1530 	pad_len = ((ISCSI_PAD_WORD_LEN -
1531 	    (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
1532 	    (ISCSI_PAD_WORD_LEN - 1));
1533 
1534 	ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
1535 
1536 	total_len = pdu->isp_datalen;
1537 
1538 	if (pad_len) {
1539 		pdu->isp_iov[pdu->isp_iovlen].iov_base	= (char *)&pad;
1540 		pdu->isp_iov[pdu->isp_iovlen].iov_len	= pad_len;
1541 		total_len		+= pad_len;
1542 		pdu->isp_iovlen++;
1543 	}
1544 
1545 	/* setup data digest */
1546 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1547 		pdu->isp_iov[pdu->isp_iovlen].iov_base =
1548 		    (char *)&data_digest_crc;
1549 		pdu->isp_iov[pdu->isp_iovlen].iov_len =
1550 		    sizeof (data_digest_crc);
1551 		total_len		+= sizeof (data_digest_crc);
1552 		pdu->isp_iovlen++;
1553 	}
1554 
1555 	if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
1556 	    pdu->isp_iovlen, total_len) != 0) {
1557 		return (IDM_STATUS_IO);
1558 	}
1559 
1560 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1561 		crc_calculated = idm_crc32c(pdu->isp_data,
1562 		    pdu->isp_datalen);
1563 		if (pad_len) {
1564 			crc_calculated = idm_crc32c_continued((char *)&pad,
1565 			    pad_len, crc_calculated);
1566 		}
1567 		if (crc_calculated != data_digest_crc) {
1568 			IDM_CONN_LOG(CE_WARN,
1569 			    "idm_sorecvdata: "
1570 			    "CRC error: actual 0x%x, calc 0x%x",
1571 			    data_digest_crc, crc_calculated);
1572 
1573 			/* Invalid Data Digest */
1574 			return (IDM_STATUS_DATA_DIGEST);
1575 		}
1576 	}
1577 
1578 	return (IDM_STATUS_SUCCESS);
1579 }
1580 
1581 /*
1582  * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
1583  * Data-type PDU header must be read into the idm_pdu_t structure prior to
1584  * calling this function.
1585  */
1586 idm_status_t
1587 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1588 {
1589 	iscsi_data_hdr_t	*bhs;
1590 	idm_task_t		*task;
1591 	uint32_t		offset;
1592 	uint8_t			opcode;
1593 	uint32_t		dlength;
1594 	list_t			*buflst;
1595 	uint32_t		xfer_bytes;
1596 	idm_status_t		status;
1597 
1598 	ASSERT(ic != NULL);
1599 	ASSERT(pdu != NULL);
1600 
1601 	bhs	= (iscsi_data_hdr_t *)pdu->isp_hdr;
1602 
1603 	offset	= ntohl(bhs->offset);
1604 	opcode	= bhs->opcode;
1605 	dlength = n2h24(bhs->dlength);
1606 
1607 	ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
1608 	    (opcode == ISCSI_OP_SCSI_DATA));
1609 
1610 	/*
1611 	 * Successful lookup implicitly gets a "hold" on the task.  This
1612 	 * hold must be released before leaving this function.  At one
1613 	 * point we were caching this task context and retaining the hold
1614 	 * but it turned out to be very difficult to release the hold properly.
1615 	 * The task can be aborted and the connection shutdown between this
1616 	 * call and the subsequent expected call to idm_so_rx_datain/
1617 	 * idm_so_rx_dataout (in which case those functions are not called).
1618 	 * Releasing the hold in the PDU callback doesn't work well either
1619 	 * because the whole task may be completed by then at which point
1620 	 * it is too late to release the hold -- for better or worse this
1621 	 * code doesn't wait on the refcnts during normal operation.
1622 	 * idm_task_find() is very fast and it is not a huge burden if we
1623 	 * have to do it twice.
1624 	 */
1625 	task = idm_task_find(ic, bhs->itt, bhs->ttt);
1626 	if (task == NULL) {
1627 		IDM_CONN_LOG(CE_WARN,
1628 		    "idm_sorecv_scsidata: could not find task");
1629 		return (IDM_STATUS_FAIL);
1630 	}
1631 
1632 	mutex_enter(&task->idt_mutex);
1633 	buflst	= (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
1634 	    &task->idt_inbufv : &task->idt_outbufv;
1635 	pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
1636 	mutex_exit(&task->idt_mutex);
1637 
1638 	if (pdu->isp_sorx_buf == NULL) {
1639 		idm_task_rele(task);
1640 		IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
1641 		    "buffer for offset %x opcode=%x",
1642 		    offset, opcode);
1643 		return (IDM_STATUS_FAIL);
1644 	}
1645 
1646 	xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
1647 	ASSERT(xfer_bytes != 0);
1648 	if (xfer_bytes != dlength) {
1649 		idm_task_rele(task);
1650 		/*
1651 		 * Buffer overflow, connection error.  The PDU data is still
1652 		 * sitting in the socket so we can't use the connection
1653 		 * again until that data is drained.
1654 		 */
1655 		return (IDM_STATUS_FAIL);
1656 	}
1657 
1658 	status = idm_sorecvdata(ic, pdu);
1659 
1660 	idm_task_rele(task);
1661 
1662 	return (status);
1663 }
1664 
1665 static uint32_t
1666 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
1667 {
1668 	uint32_t	buf_ro = ro - idb->idb_bufoffset;
1669 	uint32_t	xfer_len = min(dlength, idb->idb_buflen - buf_ro);
1670 
1671 	ASSERT(ro >= idb->idb_bufoffset);
1672 
1673 	pdu->isp_iov[pdu->isp_iovlen].iov_base	=
1674 	    (caddr_t)idb->idb_buf + buf_ro;
1675 	pdu->isp_iov[pdu->isp_iovlen].iov_len	= xfer_len;
1676 	pdu->isp_iovlen++;
1677 
1678 	return (xfer_len);
1679 }
1680 
1681 int
1682 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1683 {
1684 	pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
1685 	ASSERT(pdu->isp_data != NULL);
1686 
1687 	pdu->isp_databuflen = pdu->isp_datalen;
1688 	pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
1689 	pdu->isp_iov[0].iov_len = pdu->isp_datalen;
1690 	pdu->isp_iovlen = 1;
1691 	/*
1692 	 * Since we are associating a new data buffer with this received
1693 	 * PDU we need to set a specific callback to free the data
1694 	 * after the PDU is processed.
1695 	 */
1696 	pdu->isp_flags |= IDM_PDU_ADDL_DATA;
1697 	pdu->isp_callback = idm_sorx_addl_pdu_cb;
1698 
1699 	return (idm_sorecvdata(ic, pdu));
1700 }
1701 
1702 void
1703 idm_sorx_thread(void *arg)
1704 {
1705 	boolean_t	conn_failure = B_FALSE;
1706 	idm_conn_t	*ic = (idm_conn_t *)arg;
1707 	idm_so_conn_t	*so_conn;
1708 	idm_pdu_t	*pdu;
1709 	idm_status_t	rc;
1710 
1711 	idm_conn_hold(ic);
1712 
1713 	mutex_enter(&ic->ic_mutex);
1714 
1715 	so_conn = ic->ic_transport_private;
1716 	so_conn->ic_rx_thread_running = B_TRUE;
1717 	so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
1718 	cv_signal(&ic->ic_cv);
1719 
1720 	while (so_conn->ic_rx_thread_running) {
1721 		mutex_exit(&ic->ic_mutex);
1722 
1723 		/*
1724 		 * Get PDU with default header size (large enough for
1725 		 * BHS plus any anticipated AHS).  PDU from
1726 		 * the cache will have all values set correctly
1727 		 * for sockets RX including callback.
1728 		 */
1729 		pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
1730 		pdu->isp_ic = ic;
1731 		pdu->isp_flags = 0;
1732 		pdu->isp_transport_hdrlen = 0;
1733 
1734 		if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
1735 			/*
1736 			 * Call idm_pdu_complete so that we call the callback
1737 			 * and ensure any memory allocated in idm_sorecvhdr
1738 			 * gets freed up.
1739 			 */
1740 			idm_pdu_complete(pdu, IDM_STATUS_FAIL);
1741 
1742 			/*
1743 			 * If ic_rx_thread_running is still set then
1744 			 * this is some kind of connection problem
1745 			 * on the socket.  In this case we want to
1746 			 * generate an event.  Otherwise some other
1747 			 * thread closed the socket due to another
1748 			 * issue in which case we don't need to
1749 			 * generate an event.
1750 			 */
1751 			mutex_enter(&ic->ic_mutex);
1752 			if (so_conn->ic_rx_thread_running) {
1753 				conn_failure = B_TRUE;
1754 				so_conn->ic_rx_thread_running = B_FALSE;
1755 			}
1756 
1757 			continue;
1758 		}
1759 
1760 		/*
1761 		 * Header has been read and validated.  Now we need
1762 		 * to read the PDU data payload (if present).  SCSI data
1763 		 * need to be transferred from the socket directly into
1764 		 * the associated transfer buffer for the SCSI task.
1765 		 */
1766 		if (pdu->isp_datalen != 0) {
1767 			if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
1768 			    (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
1769 				rc = idm_sorecv_scsidata(ic, pdu);
1770 				/*
1771 				 * All SCSI errors are fatal to the
1772 				 * connection right now since we have no
1773 				 * place to put the data.  What we need
1774 				 * is some kind of sink to dispose of unwanted
1775 				 * SCSI data.  For example an invalid task tag
1776 				 * should not kill the connection (although
1777 				 * we may want to drop the connection).
1778 				 */
1779 			} else {
1780 				/*
1781 				 * Not data PDUs so allocate a buffer for the
1782 				 * data segment and read the remaining data.
1783 				 */
1784 				rc = idm_sorecv_nonscsidata(ic, pdu);
1785 			}
1786 			if (rc != 0) {
1787 				/*
1788 				 * Call idm_pdu_complete so that we call the
1789 				 * callback and ensure any memory allocated
1790 				 * in idm_sorecvhdr gets freed up.
1791 				 */
1792 				idm_pdu_complete(pdu, IDM_STATUS_FAIL);
1793 
1794 				/*
1795 				 * If ic_rx_thread_running is still set then
1796 				 * this is some kind of connection problem
1797 				 * on the socket.  In this case we want to
1798 				 * generate an event.  Otherwise some other
1799 				 * thread closed the socket due to another
1800 				 * issue in which case we don't need to
1801 				 * generate an event.
1802 				 */
1803 				mutex_enter(&ic->ic_mutex);
1804 				if (so_conn->ic_rx_thread_running) {
1805 					conn_failure = B_TRUE;
1806 					so_conn->ic_rx_thread_running = B_FALSE;
1807 				}
1808 				continue;
1809 			}
1810 		}
1811 
1812 		/*
1813 		 * Process RX PDU
1814 		 */
1815 		idm_pdu_rx(ic, pdu);
1816 
1817 		mutex_enter(&ic->ic_mutex);
1818 	}
1819 
1820 	mutex_exit(&ic->ic_mutex);
1821 
1822 	/*
1823 	 * If we dropped out of the RX processing loop because of
1824 	 * a socket problem or other connection failure (including
1825 	 * digest errors) then we need to generate a state machine
1826 	 * event to shut the connection down.
1827 	 * If the state machine is already in, for example, INIT_ERROR, this
1828 	 * event will get dropped, and the TX thread will never be notified
1829 	 * to shut down.  To be safe, we'll just notify it here.
1830 	 */
1831 	if (conn_failure) {
1832 		if (so_conn->ic_tx_thread_running) {
1833 			so_conn->ic_tx_thread_running = B_FALSE;
1834 			mutex_enter(&so_conn->ic_tx_mutex);
1835 			cv_signal(&so_conn->ic_tx_cv);
1836 			mutex_exit(&so_conn->ic_tx_mutex);
1837 		}
1838 
1839 		idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
1840 	}
1841 
1842 	idm_conn_rele(ic);
1843 
1844 	thread_exit();
1845 }
1846 
1847 /*
1848  * idm_so_tx
1849  *
1850  * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
1851  * point.  By definition, it is supposed to be fast.  So, simply queue
1852  * the entry and return.  The real work is done by idm_i_so_tx() via
1853  * idm_sotx_thread().
1854  */
1855 
1856 static void
1857 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
1858 {
1859 	idm_so_conn_t *so_conn = ic->ic_transport_private;
1860 
1861 	ASSERT(pdu->isp_ic == ic);
1862 	mutex_enter(&so_conn->ic_tx_mutex);
1863 
1864 	if (!so_conn->ic_tx_thread_running) {
1865 		mutex_exit(&so_conn->ic_tx_mutex);
1866 		idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
1867 		return;
1868 	}
1869 
1870 	list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
1871 	cv_signal(&so_conn->ic_tx_cv);
1872 	mutex_exit(&so_conn->ic_tx_mutex);
1873 }
1874 
1875 static idm_status_t
1876 idm_i_so_tx(idm_pdu_t *pdu)
1877 {
1878 	idm_conn_t	*ic = pdu->isp_ic;
1879 	idm_status_t	status = IDM_STATUS_SUCCESS;
1880 	uint8_t		pad[ISCSI_PAD_WORD_LEN];
1881 	int		pad_len;
1882 	uint32_t	hdr_digest_crc;
1883 	uint32_t	data_digest_crc = 0;
1884 	int		total_len = 0;
1885 	int		iovlen = 0;
1886 	struct iovec	iov[6];
1887 	idm_so_conn_t	*so_conn;
1888 
1889 	so_conn = ic->ic_transport_private;
1890 
1891 	/* Setup BHS */
1892 	iov[iovlen].iov_base	= (caddr_t)pdu->isp_hdr;
1893 	iov[iovlen].iov_len	= pdu->isp_hdrlen;
1894 	total_len		+= iov[iovlen].iov_len;
1895 	iovlen++;
1896 
1897 	/* Setup header digest */
1898 	if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
1899 	    (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
1900 		hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
1901 
1902 		iov[iovlen].iov_base	= (caddr_t)&hdr_digest_crc;
1903 		iov[iovlen].iov_len	= sizeof (hdr_digest_crc);
1904 		total_len		+= iov[iovlen].iov_len;
1905 		iovlen++;
1906 	}
1907 
1908 	/* Setup the data */
1909 	if (pdu->isp_datalen) {
1910 		idm_task_t		*idt;
1911 		idm_buf_t		*idb;
1912 		iscsi_data_hdr_t	*ihp;
1913 		ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
1914 		/* Write of immediate data */
1915 		if (ic->ic_ffp &&
1916 		    (ihp->opcode == ISCSI_OP_SCSI_CMD ||
1917 		    ihp->opcode == ISCSI_OP_SCSI_DATA)) {
1918 			idt = idm_task_find(ic, ihp->itt, ihp->ttt);
1919 			if (idt) {
1920 				mutex_enter(&idt->idt_mutex);
1921 				idb = idm_buf_find(&idt->idt_outbufv, 0);
1922 				mutex_exit(&idt->idt_mutex);
1923 				idb->idb_xfer_len += pdu->isp_datalen;
1924 			}
1925 		}
1926 
1927 		iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
1928 		iov[iovlen].iov_len  = pdu->isp_datalen;
1929 		total_len += iov[iovlen].iov_len;
1930 		iovlen++;
1931 	}
1932 
1933 	/* Setup the data pad if necessary */
1934 	pad_len = ((ISCSI_PAD_WORD_LEN -
1935 	    (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
1936 	    (ISCSI_PAD_WORD_LEN - 1));
1937 
1938 	if (pad_len) {
1939 		bzero(pad, sizeof (pad));
1940 		iov[iovlen].iov_base = (void *)&pad;
1941 		iov[iovlen].iov_len  = pad_len;
1942 		total_len		+= iov[iovlen].iov_len;
1943 		iovlen++;
1944 	}
1945 
1946 	/*
1947 	 * Setup the data digest if enabled.  Data-digest is not sent
1948 	 * for login-phase PDUs.
1949 	 */
1950 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
1951 	    ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
1952 	    (pdu->isp_datalen || pad_len)) {
1953 		/*
1954 		 * RFC3720/10.2.3: A zero-length Data Segment also
1955 		 * implies a zero-length data digest.
1956 		 */
1957 		if (pdu->isp_datalen) {
1958 			data_digest_crc = idm_crc32c(pdu->isp_data,
1959 			    pdu->isp_datalen);
1960 		}
1961 		if (pad_len) {
1962 			data_digest_crc = idm_crc32c_continued(&pad,
1963 			    pad_len, data_digest_crc);
1964 		}
1965 
1966 		iov[iovlen].iov_base	= (caddr_t)&data_digest_crc;
1967 		iov[iovlen].iov_len	= sizeof (data_digest_crc);
1968 		total_len		+= iov[iovlen].iov_len;
1969 		iovlen++;
1970 	}
1971 
1972 	/* Transmit the PDU */
1973 	if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
1974 	    total_len) != 0) {
1975 		/* Set error status */
1976 		IDM_CONN_LOG(CE_WARN,
1977 		    "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
1978 		    "data: %p", (void *) so_conn->ic_so, (void *) ic,
1979 		    (void *) pdu->isp_data);
1980 		status = IDM_STATUS_IO;
1981 	}
1982 
1983 	/*
1984 	 * Success does not mean that the PDU actually reached the
1985 	 * remote node since it could get dropped along the way.
1986 	 */
1987 	idm_pdu_complete(pdu, status);
1988 
1989 	return (status);
1990 }
1991 
1992 /*
1993  * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
1994  * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
1995  * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
1996  * A target can invoke this function multiple times for a single read command
1997  * (identified by the same ITT) to split the input into several sequences.
1998  *
1999  * DataSN starts with 0 for the first data PDU of an input command and advances
2000  * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
2001  * which is set to 1 for the last data PDU of a sequence.
2002  *
2003  * Scope for Prototype build:
2004  * The data PDUs within a sequence will be sent in order with the buffer offset
2005  * in increasing order. i.e. initiator and target must have negotiated the
2006  * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
2007  *
2008  * Caller holds idt->idt_mutex
2009  */
2010 static idm_status_t
2011 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
2012 {
2013 	idm_so_conn_t	*so_conn = idb->idb_ic->ic_transport_private;
2014 	idm_pdu_t	tmppdu;
2015 
2016 	ASSERT(mutex_owned(&idt->idt_mutex));
2017 
2018 	/*
2019 	 * Put the idm_buf_t on the tx queue.  It will be transmitted by
2020 	 * idm_sotx_thread.
2021 	 */
2022 	mutex_enter(&so_conn->ic_tx_mutex);
2023 
2024 	if (!so_conn->ic_tx_thread_running) {
2025 		mutex_exit(&so_conn->ic_tx_mutex);
2026 		/*
2027 		 * Don't release idt->idt_mutex since we're supposed to hold
2028 		 * in when calling idm_buf_tx_to_ini_done
2029 		 */
2030 		idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2031 		return (IDM_STATUS_FAIL);
2032 	}
2033 
2034 	/*
2035 	 * Build a template for the data PDU headers we will use so that
2036 	 * the SN values will stay consistent with other PDU's we are
2037 	 * transmitting like R2T and SCSI status.
2038 	 */
2039 	bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2040 	tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
2041 	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2042 	    ISCSI_OP_SCSI_DATA_RSP);
2043 	idb->idb_tx_thread = B_TRUE;
2044 	list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
2045 	cv_signal(&so_conn->ic_tx_cv);
2046 	mutex_exit(&so_conn->ic_tx_mutex);
2047 	mutex_exit(&idt->idt_mutex);
2048 
2049 	/*
2050 	 * Returning success here indicates the transfer was successfully
2051 	 * dispatched -- it does not mean that the transfer completed
2052 	 * successfully.
2053 	 */
2054 	return (IDM_STATUS_SUCCESS);
2055 }
2056 
2057 /*
2058  * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
2059  * data blocks it is ready to receive from the initiator in response to a WRITE
2060  * SCSI command. The target iSCSI layer passes the information about the desired
2061  * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
2062  * offset and datalen are passed via the 'idb' argument.
2063  *
2064  * Scope for Prototype build:
2065  * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
2066  * negotiated the "InitialR2T" to "Yes".
2067  *
2068  * Caller holds idt->idt_mutex
2069  */
2070 static idm_status_t
2071 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
2072 {
2073 	idm_pdu_t		*pdu;
2074 	iscsi_rtt_hdr_t		*rtt;
2075 
2076 	ASSERT(mutex_owned(&idt->idt_mutex));
2077 
2078 	pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2079 	pdu->isp_ic = idt->idt_ic;
2080 	bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
2081 
2082 	/* iSCSI layer fills the TTT, ITT, StatSN, ExpCmdSN, MaxCmdSN */
2083 	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
2084 
2085 	/* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
2086 	rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
2087 
2088 	rtt->opcode		= ISCSI_OP_RTT_RSP;
2089 	rtt->flags		= ISCSI_FLAG_FINAL;
2090 	rtt->data_offset	= htonl(idb->idb_bufoffset);
2091 	rtt->data_length	= htonl(idb->idb_xfer_len);
2092 	rtt->rttsn		= htonl(idt->idt_exp_rttsn++);
2093 
2094 	/* Keep track of buffer offsets */
2095 	idb->idb_exp_offset	= idb->idb_bufoffset;
2096 	mutex_exit(&idt->idt_mutex);
2097 
2098 	/*
2099 	 * Transmit the PDU.  Call the internal routine directly as there
2100 	 * is already implicit ordering of the PDU.
2101 	 */
2102 	(void) idm_i_so_tx(pdu);
2103 
2104 	return (IDM_STATUS_SUCCESS);
2105 }
2106 
2107 static idm_status_t
2108 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
2109 {
2110 	idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
2111 	if (idb->idb_buf == NULL) {
2112 		IDM_CONN_LOG(CE_NOTE,
2113 		    "idm_so_buf_alloc: failed buffer allocation");
2114 		return (IDM_STATUS_FAIL);
2115 	}
2116 	return (IDM_STATUS_SUCCESS);
2117 }
2118 
2119 /* ARGSUSED */
2120 static idm_status_t
2121 idm_so_buf_setup(idm_buf_t *idb)
2122 {
2123 	/* nothing to do here */
2124 	return (IDM_STATUS_SUCCESS);
2125 }
2126 
2127 /* ARGSUSED */
2128 static void
2129 idm_so_buf_teardown(idm_buf_t *idb)
2130 {
2131 	/* nothing to do here */
2132 }
2133 
2134 static void
2135 idm_so_buf_free(idm_buf_t *idb)
2136 {
2137 	kmem_free(idb->idb_buf, idb->idb_buflen);
2138 }
2139 
2140 idm_status_t
2141 idm_so_send_buf_region(idm_task_t *idt, uint8_t opcode, idm_buf_t *idb,
2142     uint32_t buf_region_offset, uint32_t buf_region_length)
2143 {
2144 	idm_conn_t		*ic;
2145 	uint32_t		max_dataseglen;
2146 	size_t			remainder, chunk;
2147 	uint32_t		data_offset = buf_region_offset;
2148 	iscsi_data_hdr_t	*bhs;
2149 	idm_pdu_t		*pdu;
2150 
2151 	ASSERT(mutex_owned(&idt->idt_mutex));
2152 
2153 	ic = idt->idt_ic;
2154 
2155 	max_dataseglen = 8192; /* Need value from login negotiation */
2156 	remainder = buf_region_length;
2157 
2158 	while (remainder) {
2159 		if (idt->idt_state != TASK_ACTIVE) {
2160 			ASSERT((idt->idt_state != TASK_IDLE) &&
2161 			    (idt->idt_state != TASK_COMPLETE));
2162 			return (IDM_STATUS_ABORTED);
2163 		}
2164 
2165 		/* check to see if we need to chunk the data */
2166 		if (remainder > max_dataseglen) {
2167 			chunk = max_dataseglen;
2168 		} else {
2169 			chunk = remainder;
2170 		}
2171 
2172 		/* Data PDU headers will always be sizeof (iscsi_hdr_t) */
2173 		pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2174 		pdu->isp_ic = ic;
2175 
2176 		/*
2177 		 * For target we've already built a build a header template
2178 		 * to use during the transfer.  Use this template so that
2179 		 * the SN values stay consistent with any unrelated PDU's
2180 		 * being transmitted.
2181 		 */
2182 		if (opcode == ISCSI_OP_SCSI_DATA_RSP) {
2183 			bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
2184 			    sizeof (iscsi_hdr_t));
2185 		} else {
2186 			/*
2187 			 * OK for now, but we should remove this bzero and
2188 			 * make sure the build_hdr function is initializing the
2189 			 * header properly
2190 			 */
2191 			bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2192 
2193 			/*
2194 			 * setup iscsi data hdr
2195 			 * callback to the iSCSI layer to fill in the BHS
2196 			 * CmdSN, StatSN, ExpCmdSN, MaxCmdSN, TTT, ITT and
2197 			 * opcode
2198 			 */
2199 			(*ic->ic_conn_ops.icb_build_hdr)(idt, pdu, opcode);
2200 		}
2201 
2202 		/*
2203 		 * Set DataSN, data offset, and flags in BHS
2204 		 * For the prototype build, A = 0, S = 0, U = 0
2205 		 */
2206 		bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
2207 
2208 		bhs->datasn		= htonl(idt->idt_exp_datasn++);
2209 
2210 		hton24(bhs->dlength, chunk);
2211 		bhs->offset = htonl(idb->idb_bufoffset + data_offset);
2212 
2213 		if (chunk == remainder) {
2214 			bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
2215 		}
2216 
2217 		/* setup data */
2218 		pdu->isp_data	=  (uint8_t *)idb->idb_buf + data_offset;
2219 		pdu->isp_datalen = (uint_t)chunk;
2220 		remainder	-= chunk;
2221 		data_offset	+= chunk;
2222 
2223 		/*
2224 		 * Now that we're done working with idt_exp_datasn,
2225 		 * idt->idt_state and idb->idb_bufoffset we can release
2226 		 * the task lock -- don't want to hold it across the
2227 		 * call to idm_i_so_tx since we could block.
2228 		 */
2229 		mutex_exit(&idt->idt_mutex);
2230 
2231 		/*
2232 		 * Transmit the PDU.  Call the internal routine directly
2233 		 * as there is already implicit ordering.
2234 		 */
2235 		(void) idm_i_so_tx(pdu);
2236 
2237 		mutex_enter(&idt->idt_mutex);
2238 	}
2239 
2240 	return (IDM_STATUS_SUCCESS);
2241 }
2242 
2243 /*
2244  * TX PDU cache
2245  */
2246 /* ARGSUSED */
2247 int
2248 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
2249 {
2250 	idm_pdu_t	*pdu = hdl;
2251 
2252 	bzero(pdu, sizeof (idm_pdu_t));
2253 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2254 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2255 	pdu->isp_callback = idm_sotx_cache_pdu_cb;
2256 	pdu->isp_magic = IDM_PDU_MAGIC;
2257 	bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2258 
2259 	return (0);
2260 }
2261 
2262 /* ARGSUSED */
2263 void
2264 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2265 {
2266 	/* reset values between use */
2267 	pdu->isp_datalen = 0;
2268 
2269 	kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
2270 }
2271 
2272 /*
2273  * RX PDU cache
2274  */
2275 /* ARGSUSED */
2276 int
2277 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
2278 {
2279 	idm_pdu_t	*pdu = hdl;
2280 
2281 	bzero(pdu, sizeof (idm_pdu_t));
2282 	pdu->isp_magic = IDM_PDU_MAGIC;
2283 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2284 	pdu->isp_callback = idm_sorx_cache_pdu_cb;
2285 
2286 	return (0);
2287 }
2288 
2289 /* ARGSUSED */
2290 static void
2291 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2292 {
2293 	pdu->isp_iovlen = 0;
2294 	pdu->isp_sorx_buf = 0;
2295 	kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
2296 }
2297 
2298 static void
2299 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2300 {
2301 	/*
2302 	 * We had to modify our cached RX PDU with a longer header buffer
2303 	 * and/or a longer data buffer.  Release the new buffers and fix
2304 	 * the fields back to what we would expect for a cached RX PDU.
2305 	 */
2306 	if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
2307 		kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
2308 	}
2309 	if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
2310 		kmem_free(pdu->isp_data, pdu->isp_datalen);
2311 	}
2312 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
2313 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2314 	pdu->isp_data = NULL;
2315 	pdu->isp_datalen = 0;
2316 	pdu->isp_sorx_buf = 0;
2317 	pdu->isp_callback = idm_sorx_cache_pdu_cb;
2318 	idm_sorx_cache_pdu_cb(pdu, status);
2319 }
2320 
2321 /*
2322  * This thread is only active when I/O is queued for transmit
2323  * because the socket is busy.
2324  */
2325 void
2326 idm_sotx_thread(void *arg)
2327 {
2328 	idm_conn_t	*ic = arg;
2329 	idm_tx_obj_t	*object, *next;
2330 	idm_so_conn_t	*so_conn;
2331 	idm_status_t	status = IDM_STATUS_SUCCESS;
2332 
2333 	idm_conn_hold(ic);
2334 
2335 	mutex_enter(&ic->ic_mutex);
2336 	so_conn = ic->ic_transport_private;
2337 	so_conn->ic_tx_thread_running = B_TRUE;
2338 	so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
2339 	cv_signal(&ic->ic_cv);
2340 	mutex_exit(&ic->ic_mutex);
2341 
2342 	mutex_enter(&so_conn->ic_tx_mutex);
2343 
2344 	while (so_conn->ic_tx_thread_running) {
2345 		while (list_is_empty(&so_conn->ic_tx_list)) {
2346 			DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
2347 			cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
2348 			DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
2349 
2350 			if (!so_conn->ic_tx_thread_running) {
2351 				goto tx_bail;
2352 			}
2353 		}
2354 
2355 		object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2356 		list_remove(&so_conn->ic_tx_list, object);
2357 		mutex_exit(&so_conn->ic_tx_mutex);
2358 
2359 		switch (object->idm_tx_obj_magic) {
2360 		case IDM_PDU_MAGIC:
2361 			DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
2362 			    idm_pdu_t *, (idm_pdu_t *)object);
2363 
2364 			status = idm_i_so_tx((idm_pdu_t *)object);
2365 			break;
2366 
2367 		case IDM_BUF_MAGIC: {
2368 			idm_buf_t *idb = (idm_buf_t *)object;
2369 			idm_task_t *idt = idb->idb_task_binding;
2370 
2371 			DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
2372 			    idm_buf_t *, idb);
2373 
2374 			mutex_enter(&idt->idt_mutex);
2375 			status = idm_so_send_buf_region(idt,
2376 			    ISCSI_OP_SCSI_DATA_RSP, idb, 0, idb->idb_xfer_len);
2377 
2378 			/*
2379 			 * TX thread owns the buffer so we expect it to
2380 			 * be "in transport"
2381 			 */
2382 			ASSERT(idb->idb_in_transport);
2383 			/*
2384 			 * idm_buf_tx_to_ini_done releases idt->idt_mutex
2385 			 */
2386 			idm_buf_tx_to_ini_done(idt, idb, status);
2387 			break;
2388 		}
2389 
2390 		default:
2391 			IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
2392 			    "(0x%08x)", object->idm_tx_obj_magic);
2393 			status = IDM_STATUS_FAIL;
2394 		}
2395 
2396 		mutex_enter(&so_conn->ic_tx_mutex);
2397 
2398 		if (status != IDM_STATUS_SUCCESS) {
2399 			so_conn->ic_tx_thread_running = B_FALSE;
2400 			idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
2401 		}
2402 	}
2403 
2404 	/*
2405 	 * Before we leave, we need to abort every item remaining in the
2406 	 * TX list.
2407 	 */
2408 
2409 tx_bail:
2410 	object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2411 
2412 	while (object != NULL) {
2413 		next = list_next(&so_conn->ic_tx_list, object);
2414 
2415 		list_remove(&so_conn->ic_tx_list, object);
2416 		switch (object->idm_tx_obj_magic) {
2417 		case IDM_PDU_MAGIC:
2418 			idm_pdu_complete((idm_pdu_t *)object,
2419 			    IDM_STATUS_ABORTED);
2420 			break;
2421 
2422 		case IDM_BUF_MAGIC: {
2423 			idm_buf_t *idb = (idm_buf_t *)object;
2424 			idm_task_t *idt = idb->idb_task_binding;
2425 			mutex_exit(&so_conn->ic_tx_mutex);
2426 			mutex_enter(&idt->idt_mutex);
2427 			/*
2428 			 * TX thread owns the buffer so we expect it to
2429 			 * be "in transport"
2430 			 */
2431 			ASSERT(idb->idb_in_transport);
2432 			/*
2433 			 * idm_buf_tx_to_ini_done releases idt->idt_mutex
2434 			 */
2435 			idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2436 			mutex_enter(&so_conn->ic_tx_mutex);
2437 			break;
2438 		}
2439 		default:
2440 			IDM_CONN_LOG(CE_WARN,
2441 			    "idm_sotx_thread: Unexpected magic "
2442 			    "(0x%08x)", object->idm_tx_obj_magic);
2443 		}
2444 
2445 		object = next;
2446 	}
2447 
2448 	mutex_exit(&so_conn->ic_tx_mutex);
2449 	idm_conn_rele(ic);
2450 	thread_exit();
2451 	/*NOTREACHED*/
2452 }
2453