xref: /titanic_51/usr/src/uts/common/io/idm/idm_so.c (revision c3f47e92daa4c56e82fafc8f05b909d30e908996)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/conf.h>
27 #include <sys/stat.h>
28 #include <sys/file.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/modctl.h>
32 #include <sys/priv.h>
33 #include <sys/cpuvar.h>
34 #include <sys/socket.h>
35 #include <sys/strsubr.h>
36 #include <sys/sysmacros.h>
37 #include <sys/sdt.h>
38 #include <netinet/tcp.h>
39 #include <inet/tcp.h>
40 #include <sys/socketvar.h>
41 #include <sys/pathname.h>
42 #include <sys/fs/snode.h>
43 #include <sys/fs/dv_node.h>
44 #include <sys/vnode.h>
45 #include <netinet/in.h>
46 #include <net/if.h>
47 #include <sys/sockio.h>
48 
49 #include <sys/idm/idm.h>
50 #include <sys/idm/idm_so.h>
51 #include <sys/idm/idm_text.h>
52 
53 /*
54  * in6addr_any is currently all zeroes, but use the macro in case this
55  * ever changes.
56  */
57 const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
58 
59 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
60 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
61 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
62 
63 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic,
64     struct sonode *new_so);
65 static void idm_so_conn_destroy_common(idm_conn_t *ic);
66 static void idm_so_conn_connect_common(idm_conn_t *ic);
67 
68 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc);
69 static void idm_set_ini_postconnect_options(idm_so_conn_t *sc);
70 static void idm_set_tgt_connect_options(struct sonode *sonode);
71 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
72 
73 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
74 static idm_status_t idm_so_send_buf_region(idm_task_t *idt, uint8_t opcode,
75     idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
76 
77 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
78     uint32_t ro, uint32_t dlength);
79 
80 static idm_status_t idm_so_handle_digest(idm_conn_t *it,
81     nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
82 
83 /*
84  * Transport ops prototypes
85  */
86 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
87 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
88 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
89 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
90 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
91 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
92 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
93 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
94     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
95 static idm_status_t idm_so_notice_key_values(idm_conn_t *it,
96     nvlist_t *negotiated_nvl);
97 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
98     idm_transport_caps_t *caps);
99 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
100 static void idm_so_buf_free(idm_buf_t *idb);
101 static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
102 static void idm_so_buf_teardown(idm_buf_t *idb);
103 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
104 static void idm_so_tgt_svc_destroy(idm_svc_t *is);
105 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
106 static void idm_so_tgt_svc_offline(idm_svc_t *is);
107 static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
108 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
109 static void idm_so_conn_disconnect(idm_conn_t *ic);
110 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
111 static void idm_so_ini_conn_destroy(idm_conn_t *ic);
112 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
113 
114 /*
115  * IDM Native Sockets transport operations
116  */
117 static
118 idm_transport_ops_t idm_so_transport_ops = {
119 	idm_so_tx,			/* it_tx_pdu */
120 	idm_so_buf_tx_to_ini,		/* it_buf_tx_to_ini */
121 	idm_so_buf_rx_from_ini,		/* it_buf_rx_from_ini */
122 	idm_so_rx_datain,		/* it_rx_datain */
123 	idm_so_rx_rtt,			/* it_rx_rtt */
124 	idm_so_rx_dataout,		/* it_rx_dataout */
125 	NULL,				/* it_alloc_conn_rsrc */
126 	NULL,				/* it_free_conn_rsrc */
127 	NULL,				/* it_tgt_enable_datamover */
128 	NULL,				/* it_ini_enable_datamover */
129 	NULL,				/* it_conn_terminate */
130 	idm_so_free_task_rsrc,		/* it_free_task_rsrc */
131 	idm_so_negotiate_key_values,	/* it_negotiate_key_values */
132 	idm_so_notice_key_values,	/* it_notice_key_values */
133 	idm_so_conn_is_capable,		/* it_conn_is_capable */
134 	idm_so_buf_alloc,		/* it_buf_alloc */
135 	idm_so_buf_free,		/* it_buf_free */
136 	idm_so_buf_setup,		/* it_buf_setup */
137 	idm_so_buf_teardown,		/* it_buf_teardown */
138 	idm_so_tgt_svc_create,		/* it_tgt_svc_create */
139 	idm_so_tgt_svc_destroy,		/* it_tgt_svc_destroy */
140 	idm_so_tgt_svc_online,		/* it_tgt_svc_online */
141 	idm_so_tgt_svc_offline,		/* it_tgt_svc_offline */
142 	idm_so_tgt_conn_destroy,	/* it_tgt_conn_destroy */
143 	idm_so_tgt_conn_connect,	/* it_tgt_conn_connect */
144 	idm_so_conn_disconnect,		/* it_tgt_conn_disconnect */
145 	idm_so_ini_conn_create,		/* it_ini_conn_create */
146 	idm_so_ini_conn_destroy,	/* it_ini_conn_destroy */
147 	idm_so_ini_conn_connect,	/* it_ini_conn_connect */
148 	idm_so_conn_disconnect		/* it_ini_conn_disconnect */
149 };
150 
151 /*
152  * idm_so_init()
153  * Sockets transport initialization
154  */
155 void
156 idm_so_init(idm_transport_t *it)
157 {
158 	/* Cache for IDM Data and R2T Transmit PDU's */
159 	idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
160 	    sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
161 	    &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
162 
163 	/* Cache for IDM Receive PDU's */
164 	idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
165 	    sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
166 	    &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
167 
168 	/* Set the sockets transport ops */
169 	it->it_ops = &idm_so_transport_ops;
170 }
171 
172 /*
173  * idm_so_fini()
174  * Sockets transport teardown
175  */
176 void
177 idm_so_fini(void)
178 {
179 	kmem_cache_destroy(idm.idm_sotx_pdu_cache);
180 	kmem_cache_destroy(idm.idm_sorx_pdu_cache);
181 }
182 
183 struct sonode *
184 idm_socreate(int domain, int type, int protocol)
185 {
186 	vnode_t		*dvp;
187 	vnode_t		*vp;
188 	struct snode	*csp;
189 	int		err;
190 	major_t		maj;
191 
192 	if ((vp = solookup(domain, type, protocol, NULL, &err)) == NULL) {
193 
194 		/*
195 		 * solookup calls sogetvp if the vp is not found in the cache.
196 		 * Since the call to sogetvp is hardwired to use USERSPACE
197 		 * and declared static we'll do the work here instead.
198 		 */
199 		err = lookupname(type == SOCK_STREAM ? "/dev/tcp" : "/dev/udp",
200 		    UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
201 		if (err != 0)
202 			return (NULL);
203 
204 		/* Check that it is the correct vnode */
205 		if (vp->v_type != VCHR) {
206 			VN_RELE(vp);
207 			return (NULL);
208 		}
209 
210 		csp = VTOS(VTOS(vp)->s_commonvp);
211 		if (!(csp->s_flag & SDIPSET)) {
212 			char    *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
213 
214 			err = ddi_dev_pathname(vp->v_rdev, S_IFCHR,
215 			    pathname);
216 			if (err == 0) {
217 				err = devfs_lookupname(pathname, NULLVPP,
218 				    &dvp);
219 			}
220 			VN_RELE(vp);
221 			kmem_free(pathname, MAXPATHLEN);
222 			if (err != 0) {
223 				return (NULL);
224 			}
225 			vp = dvp;
226 		}
227 
228 		maj = getmajor(vp->v_rdev);
229 		if (!STREAMSTAB(maj)) {
230 			VN_RELE(vp);
231 			return (NULL);
232 		}
233 	}
234 	return (socreate(vp, domain, type, protocol, SOV_DEFAULT, NULL, &err));
235 }
236 
237 /*
238  * idm_soshutdown will disconnect the socket and prevent subsequent PDU
239  * reception and transmission.  The sonode still exists but its state
240  * gets modified to indicate it is no longer connected.  Calls to
241  * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
242  * regain control of a thread stuck in idm_sorecv.
243  */
244 void
245 idm_soshutdown(struct sonode *so)
246 {
247 	(void) soshutdown(so, SHUT_RDWR);
248 }
249 
250 /*
251  * idm_sodestroy releases all resources associated with a socket previously
252  * created with idm_socreate.  The socket must be shutdown using
253  * idm_soshutdown before the socket is destroyed with idm_sodestroy,
254  * otherwise undefined behavior will result.
255  */
256 void
257 idm_sodestroy(struct sonode *so)
258 {
259 	vnode_t *vp = SOTOV(so);
260 
261 	(void) VOP_CLOSE(vp, 0, 1, 0, kcred, NULL);
262 
263 	VN_RELE(vp);
264 }
265 
266 /*
267  * IP address filter functions to flag addresses that should not
268  * go out to initiators through discovery.
269  */
270 static boolean_t
271 idm_v4_addr_okay(struct in_addr *in_addr)
272 {
273 	in_addr_t addr = ntohl(in_addr->s_addr);
274 
275 	if ((INADDR_NONE == addr) ||
276 	    (IN_MULTICAST(addr)) ||
277 	    ((addr >> IN_CLASSA_NSHIFT) == 0) ||
278 	    ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
279 		return (B_FALSE);
280 	}
281 	return (B_TRUE);
282 }
283 
284 static boolean_t
285 idm_v6_addr_okay(struct in6_addr *addr6)
286 {
287 
288 	if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
289 	    (IN6_IS_ADDR_LOOPBACK(addr6)) ||
290 	    (IN6_IS_ADDR_MULTICAST(addr6)) ||
291 	    (IN6_IS_ADDR_V4MAPPED(addr6)) ||
292 	    (IN6_IS_ADDR_V4COMPAT(addr6)) ||
293 	    (IN6_IS_ADDR_LINKLOCAL(addr6))) {
294 		return (B_FALSE);
295 	}
296 	return (B_TRUE);
297 }
298 
299 /*
300  * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
301  * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
302  */
303 int
304 idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
305 {
306 	struct sonode 		*so4, *so6;
307 	vnode_t 		*vp, *vp4, *vp6;
308 	struct lifnum		lifn;
309 	struct lifconf		lifc;
310 	struct lifreq		*lp;
311 	int			rval;
312 	int			numifs;
313 	int			bufsize;
314 	void			*buf;
315 	int			i, j, n, rc;
316 	struct sockaddr_storage	ss;
317 	struct sockaddr_in	*sin;
318 	struct sockaddr_in6	*sin6;
319 	idm_addr_t		*ip;
320 	idm_addr_list_t		*ipaddr;
321 	int			size_ipaddr;
322 
323 	*ipaddr_p = NULL;
324 	size_ipaddr = 0;
325 	buf = NULL;
326 
327 	/* create an ipv4 and ipv6 UDP socket */
328 	if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
329 		return (0);
330 	if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
331 		idm_sodestroy(so6);
332 		return (0);
333 	}
334 
335 	/* setup the vp's for each socket type */
336 	vp6 = SOTOV(so6);
337 	vp4 = SOTOV(so4);
338 	/* use vp6 for ioctls with unspecified families by default */
339 	vp = vp6;
340 
341 retry_count:
342 	/* snapshot the current number of interfaces */
343 	lifn.lifn_family = PF_UNSPEC;
344 	lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
345 	lifn.lifn_count = 0;
346 	if (VOP_IOCTL(vp, SIOCGLIFNUM, (intptr_t)&lifn, FKIOCTL, kcred,
347 	    &rval, NULL) != 0) {
348 		goto cleanup;
349 	}
350 
351 	numifs = lifn.lifn_count;
352 	if (numifs <= 0) {
353 		goto cleanup;
354 	}
355 
356 	/* allocate extra room in case more interfaces appear */
357 	numifs += 10;
358 
359 	/* get the interface names and ip addresses */
360 	bufsize = numifs * sizeof (struct lifreq);
361 	buf = kmem_alloc(bufsize, KM_SLEEP);
362 
363 	lifc.lifc_family = AF_UNSPEC;
364 	lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
365 	lifc.lifc_len = bufsize;
366 	lifc.lifc_buf = buf;
367 	rc = VOP_IOCTL(vp, SIOCGLIFCONF, (intptr_t)&lifc, FKIOCTL, kcred,
368 	    &rval, NULL);
369 	if (rc != 0) {
370 		goto cleanup;
371 	}
372 	/* if our extra room is used up, try again */
373 	if (bufsize <= lifc.lifc_len) {
374 		kmem_free(buf, bufsize);
375 		buf = NULL;
376 		goto retry_count;
377 	}
378 	/* calc actual number of ifconfs */
379 	n = lifc.lifc_len / sizeof (struct lifreq);
380 
381 	/* get ip address */
382 	if (n > 0) {
383 		size_ipaddr = sizeof (idm_addr_list_t) +
384 		    (n - 1) * sizeof (idm_addr_t);
385 		ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
386 	} else {
387 		goto cleanup;
388 	}
389 
390 	/*
391 	 * Examine the array of interfaces and filter uninteresting ones
392 	 */
393 	for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
394 
395 		/*
396 		 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
397 		 */
398 		ss = lp->lifr_addr;
399 		/*
400 		 * fetch the flags using the socket of the correct family
401 		 */
402 		switch (ss.ss_family) {
403 		case AF_INET:
404 			vp = vp4;
405 			break;
406 		case AF_INET6:
407 			vp = vp6;
408 			break;
409 		default:
410 			continue;
411 		}
412 		rc =  VOP_IOCTL(vp, SIOCGLIFFLAGS, (intptr_t)lp, FKIOCTL, kcred,
413 		    &rval, NULL);
414 		if (rc == 0) {
415 			/*
416 			 * If we got the flags, skip uninteresting
417 			 * interfaces based on flags
418 			 */
419 			if ((lp->lifr_flags & IFF_UP) != IFF_UP)
420 				continue;
421 			if (lp->lifr_flags &
422 			    (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
423 				continue;
424 		}
425 
426 		/* save ip address */
427 		ip = &ipaddr->al_addrs[j];
428 		switch (ss.ss_family) {
429 		case AF_INET:
430 			sin = (struct sockaddr_in *)&ss;
431 			if (!idm_v4_addr_okay(&sin->sin_addr))
432 				continue;
433 			ip->a_addr.i_addr.in4 = sin->sin_addr;
434 			ip->a_addr.i_insize = sizeof (struct in_addr);
435 			break;
436 		case AF_INET6:
437 			sin6 = (struct sockaddr_in6 *)&ss;
438 			if (!idm_v6_addr_okay(&sin6->sin6_addr))
439 				continue;
440 			ip->a_addr.i_addr.in6 = sin6->sin6_addr;
441 			ip->a_addr.i_insize = sizeof (struct in6_addr);
442 			break;
443 		default:
444 			continue;
445 		}
446 		j++;
447 	}
448 
449 	if (j == 0) {
450 		/* no valid ifaddr */
451 		kmem_free(ipaddr, size_ipaddr);
452 		size_ipaddr = 0;
453 		ipaddr = NULL;
454 	} else {
455 		ipaddr->al_out_cnt = j;
456 	}
457 
458 
459 cleanup:
460 	idm_sodestroy(so6);
461 	idm_sodestroy(so4);
462 
463 	if (buf != NULL)
464 		kmem_free(buf, bufsize);
465 
466 	*ipaddr_p = ipaddr;
467 	return (size_ipaddr);
468 }
469 
470 int
471 idm_sorecv(struct sonode *so, void *msg, size_t len)
472 {
473 	iovec_t iov;
474 
475 	ASSERT(so != NULL);
476 	ASSERT(len != 0);
477 
478 	/*
479 	 * Fill in iovec and receive data
480 	 */
481 	iov.iov_base = msg;
482 	iov.iov_len = len;
483 
484 	return (idm_iov_sorecv(so, &iov, 1, len));
485 }
486 
487 /*
488  * idm_sosendto - Sends a buffered data on a non-connected socket.
489  *
490  * This function puts the data provided on the wire by calling sosendmsg.
491  * It will return only when all the data has been sent or if an error
492  * occurs.
493  *
494  * Returns 0 for success, the socket errno value if sosendmsg fails, and
495  * -1 if sosendmsg returns success but uio_resid != 0
496  */
497 int
498 idm_sosendto(struct sonode *so, void *buff, size_t len,
499     struct sockaddr *name, socklen_t namelen)
500 {
501 	struct msghdr		msg;
502 	struct uio		uio;
503 	struct iovec		iov[1];
504 	int			error;
505 
506 	iov[0].iov_base	= buff;
507 	iov[0].iov_len	= len;
508 
509 	/* Initialization of the message header. */
510 	bzero(&msg, sizeof (msg));
511 	msg.msg_iov	= iov;
512 	msg.msg_iovlen	= 1;
513 
514 	/* Initialization of the uio structure. */
515 	uio.uio_iov	= iov;
516 	uio.uio_iovcnt	= 1;
517 	uio.uio_segflg	= UIO_SYSSPACE;
518 	uio.uio_resid	= len;
519 
520 	msg.msg_name	= name;
521 	msg.msg_namelen	= namelen;
522 
523 	if ((error = sosendmsg(so, &msg, &uio)) == 0) {
524 		/* Data sent */
525 		if (uio.uio_resid == 0) {
526 			/* All data sent.  Success. */
527 			return (0);
528 		} else {
529 			/* Not all data was sent.  Failure */
530 			return (-1);
531 		}
532 	}
533 
534 	/* Send failed */
535 	return (error);
536 }
537 
538 /*
539  * idm_iov_sosend - Sends an iovec on a connection.
540  *
541  * This function puts the data provided on the wire by calling sosendmsg.
542  * It will return only when all the data has been sent or if an error
543  * occurs.
544  *
545  * Returns 0 for success, the socket errno value if sosendmsg fails, and
546  * -1 if sosendmsg returns success but uio_resid != 0
547  */
548 int
549 idm_iov_sosend(struct sonode *so, iovec_t *iop, int iovlen, size_t total_len)
550 {
551 	struct msghdr		msg;
552 	struct uio		uio;
553 	int			error;
554 
555 	ASSERT(iop != NULL);
556 
557 	/* Initialization of the message header. */
558 	bzero(&msg, sizeof (msg));
559 	msg.msg_iov	= iop;
560 	msg.msg_iovlen	= iovlen;
561 
562 	/* Initialization of the uio structure. */
563 	bzero(&uio, sizeof (uio));
564 	uio.uio_iov	= iop;
565 	uio.uio_iovcnt	= iovlen;
566 	uio.uio_segflg	= UIO_SYSSPACE;
567 	uio.uio_resid	= total_len;
568 
569 	if ((error = sosendmsg(so, &msg, &uio)) == 0) {
570 		/* Data sent */
571 		if (uio.uio_resid == 0) {
572 			/* All data sent.  Success. */
573 			return (0);
574 		} else {
575 			/* Not all data was sent.  Failure */
576 			return (-1);
577 		}
578 	}
579 
580 	/* Send failed */
581 	return (error);
582 }
583 
584 /*
585  * idm_iov_sorecv - Receives an iovec from a connection
586  *
587  * This function gets the data asked for from the socket.  It will return
588  * only when all the requested data has been retrieved or if an error
589  * occurs.
590  *
591  * Returns 0 for success, the socket errno value if sorecvmsg fails, and
592  * -1 if sorecvmsg returns success but uio_resid != 0
593  */
594 int
595 idm_iov_sorecv(struct sonode *so, iovec_t *iop, int iovlen, size_t total_len)
596 {
597 	struct msghdr		msg;
598 	struct uio		uio;
599 	int			error;
600 
601 	ASSERT(iop != NULL);
602 
603 	/* Initialization of the message header. */
604 	bzero(&msg, sizeof (msg));
605 	msg.msg_iov	= iop;
606 	msg.msg_flags	= MSG_WAITALL;
607 	msg.msg_iovlen	= iovlen;
608 
609 	/* Initialization of the uio structure. */
610 	bzero(&uio, sizeof (uio));
611 	uio.uio_iov	= iop;
612 	uio.uio_iovcnt	= iovlen;
613 	uio.uio_segflg	= UIO_SYSSPACE;
614 	uio.uio_resid	= total_len;
615 
616 	if ((error = sorecvmsg(so, &msg, &uio)) == 0) {
617 		/* Received data */
618 		if (uio.uio_resid == 0) {
619 			/* All requested data received.  Success */
620 			return (0);
621 		} else {
622 			/*
623 			 * Not all data was received.  The connection has
624 			 * probably failed.
625 			 */
626 			return (-1);
627 		}
628 	}
629 
630 	/* Receive failed */
631 	return (error);
632 }
633 
634 static void
635 idm_set_ini_preconnect_options(idm_so_conn_t *sc)
636 {
637 	int	conn_abort = 10000;
638 	int	conn_notify = 2000;
639 	int	abort = 30000;
640 
641 	/* Pre-connect socket options */
642 	(void) sosetsockopt(sc->ic_so, IPPROTO_TCP, TCP_CONN_NOTIFY_THRESHOLD,
643 	    (char *)&conn_notify, sizeof (int));
644 	(void) sosetsockopt(sc->ic_so, IPPROTO_TCP, TCP_CONN_ABORT_THRESHOLD,
645 	    (char *)&conn_abort, sizeof (int));
646 	(void) sosetsockopt(sc->ic_so, IPPROTO_TCP, TCP_ABORT_THRESHOLD,
647 	    (char *)&abort, sizeof (int));
648 }
649 
650 static void
651 idm_set_ini_postconnect_options(idm_so_conn_t *sc)
652 {
653 	int32_t		rcvbuf = IDM_RCVBUF_SIZE;
654 	int32_t		sndbuf = IDM_SNDBUF_SIZE;
655 	const int	on = 1;
656 
657 	/* Set postconnect options */
658 	(void) sosetsockopt(sc->ic_so, IPPROTO_TCP, TCP_NODELAY,
659 	    (char *)&on, sizeof (int));
660 	(void) sosetsockopt(sc->ic_so, SOL_SOCKET, SO_RCVBUF,
661 	    (char *)&rcvbuf, sizeof (int));
662 	(void) sosetsockopt(sc->ic_so, SOL_SOCKET, SO_SNDBUF,
663 	    (char *)&sndbuf, sizeof (int));
664 }
665 
666 static void
667 idm_set_tgt_connect_options(struct sonode *sonode)
668 {
669 	int32_t		rcvbuf = IDM_RCVBUF_SIZE;
670 	int32_t		sndbuf = IDM_SNDBUF_SIZE;
671 	const int	on = 1;
672 
673 	/* Set connect options */
674 	(void) sosetsockopt(sonode, SOL_SOCKET, SO_RCVBUF,
675 	    (char *)&rcvbuf, sizeof (int));
676 	(void) sosetsockopt(sonode, SOL_SOCKET, SO_SNDBUF,
677 	    (char *)&sndbuf, sizeof (int));
678 	(void) sosetsockopt(sonode, IPPROTO_TCP, TCP_NODELAY,
679 	    (char *)&on, sizeof (on));
680 }
681 
682 static uint32_t
683 n2h24(const uchar_t *ptr)
684 {
685 	return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
686 }
687 
688 
689 static idm_status_t
690 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
691 {
692 	iscsi_hdr_t	*bhs;
693 	uint32_t	hdr_digest_crc;
694 	uint32_t	crc_calculated;
695 	void		*new_hdr;
696 	int		ahslen = 0;
697 	int		total_len = 0;
698 	int		iovlen = 0;
699 	struct iovec	iov[2];
700 	idm_so_conn_t	*so_conn;
701 	int		rc;
702 
703 	so_conn = ic->ic_transport_private;
704 
705 	/*
706 	 * Read BHS
707 	 */
708 	bhs = pdu->isp_hdr;
709 	rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
710 	if (rc != IDM_STATUS_SUCCESS) {
711 		return (IDM_STATUS_FAIL);
712 	}
713 
714 	/*
715 	 * Check actual AHS length against the amount available in the buffer
716 	 */
717 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
718 	    (bhs->hlength * sizeof (uint32_t));
719 	pdu->isp_datalen = n2h24(bhs->dlength);
720 	if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
721 		/* Allocate a new header segment and change the callback */
722 		new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
723 		bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
724 		pdu->isp_hdr = new_hdr;
725 		pdu->isp_flags |= IDM_PDU_ADDL_HDR;
726 
727 		/*
728 		 * This callback will restore the expected values after
729 		 * the RX PDU has been processed.
730 		 */
731 		pdu->isp_callback = idm_sorx_addl_pdu_cb;
732 	}
733 
734 	/*
735 	 * Setup receipt of additional header and header digest (if enabled).
736 	 */
737 	if (bhs->hlength > 0) {
738 		iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
739 		ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
740 		iov[iovlen].iov_len = ahslen;
741 		total_len += iov[iovlen].iov_len;
742 		iovlen++;
743 	}
744 
745 	if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
746 		iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
747 		iov[iovlen].iov_len = sizeof (hdr_digest_crc);
748 		total_len += iov[iovlen].iov_len;
749 		iovlen++;
750 	}
751 
752 	if ((iovlen != 0) &&
753 	    (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
754 	    total_len) != 0)) {
755 		return (IDM_STATUS_FAIL);
756 	}
757 
758 	/*
759 	 * Validate header digest if enabled
760 	 */
761 	if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
762 		crc_calculated = idm_crc32c(pdu->isp_hdr,
763 		    sizeof (iscsi_hdr_t) + ahslen);
764 		if (crc_calculated != hdr_digest_crc) {
765 			/* Invalid Header Digest */
766 			return (IDM_STATUS_HEADER_DIGEST);
767 		}
768 	}
769 
770 	return (0);
771 }
772 
773 /*
774  * idm_so_ini_conn_create()
775  * Allocate the sockets transport connection resources.
776  */
777 static idm_status_t
778 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
779 {
780 	struct sonode	*so;
781 	idm_so_conn_t	*so_conn;
782 	idm_status_t	idmrc;
783 
784 	so = idm_socreate(cr->cr_domain, cr->cr_type,
785 	    cr->cr_protocol);
786 	if (so == NULL) {
787 		return (IDM_STATUS_FAIL);
788 	}
789 
790 	/* Bind the socket if configured to do so */
791 	if (cr->cr_bound) {
792 		if (sobind(so, &cr->cr_bound_addr.sin,
793 		    SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), 0, 0) != 0) {
794 			idm_sodestroy(so);
795 			return (IDM_STATUS_FAIL);
796 		}
797 	}
798 
799 	idmrc = idm_so_conn_create_common(ic, so);
800 	if (idmrc != IDM_STATUS_SUCCESS) {
801 		idm_soshutdown(so);
802 		idm_sodestroy(so);
803 		return (IDM_STATUS_FAIL);
804 	}
805 
806 	so_conn = ic->ic_transport_private;
807 	/* Set up socket options */
808 	idm_set_ini_preconnect_options(so_conn);
809 
810 	return (IDM_STATUS_SUCCESS);
811 }
812 
813 /*
814  * idm_so_ini_conn_destroy()
815  * Tear down the sockets transport connection resources.
816  */
817 static void
818 idm_so_ini_conn_destroy(idm_conn_t *ic)
819 {
820 	idm_so_conn_destroy_common(ic);
821 }
822 
823 /*
824  * idm_so_ini_conn_connect()
825  * Establish the connection referred to by the handle previously allocated via
826  * idm_so_ini_conn_create().
827  */
828 static idm_status_t
829 idm_so_ini_conn_connect(idm_conn_t *ic)
830 {
831 	idm_so_conn_t	*so_conn;
832 
833 	so_conn = ic->ic_transport_private;
834 
835 	if (soconnect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
836 	    (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), 0, 0) != 0) {
837 		idm_soshutdown(so_conn->ic_so);
838 		return (IDM_STATUS_FAIL);
839 	}
840 
841 	idm_so_conn_connect_common(ic);
842 
843 	idm_set_ini_postconnect_options(so_conn);
844 
845 	return (IDM_STATUS_SUCCESS);
846 }
847 
848 idm_status_t
849 idm_so_tgt_conn_create(idm_conn_t *ic, struct sonode *new_so)
850 {
851 	idm_status_t	idmrc;
852 
853 	idmrc = idm_so_conn_create_common(ic, new_so);
854 
855 	return (idmrc);
856 }
857 
858 static void
859 idm_so_tgt_conn_destroy(idm_conn_t *ic)
860 {
861 	idm_so_conn_destroy_common(ic);
862 }
863 
864 /*
865  * idm_so_tgt_conn_connect()
866  * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
867  * is invoked from the SM as a result of an inbound connection request.
868  */
869 static idm_status_t
870 idm_so_tgt_conn_connect(idm_conn_t *ic)
871 {
872 	idm_so_conn_connect_common(ic);
873 
874 	return (IDM_STATUS_SUCCESS);
875 }
876 
877 static idm_status_t
878 idm_so_conn_create_common(idm_conn_t *ic, struct sonode *new_so)
879 {
880 	idm_so_conn_t	*so_conn;
881 
882 	so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
883 	so_conn->ic_so = new_so;
884 
885 	ic->ic_transport_private = so_conn;
886 	ic->ic_transport_hdrlen = 0;
887 
888 	/* Set the scoreboarding flag on this connection */
889 	ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
890 
891 	/*
892 	 * Initialize tx thread mutex and list
893 	 */
894 	mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
895 	cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
896 	list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
897 	    offsetof(idm_pdu_t, idm_tx_link));
898 
899 	return (IDM_STATUS_SUCCESS);
900 }
901 
902 static void
903 idm_so_conn_destroy_common(idm_conn_t *ic)
904 {
905 	idm_so_conn_t	*so_conn = ic->ic_transport_private;
906 
907 	ic->ic_transport_private = NULL;
908 	idm_sodestroy(so_conn->ic_so);
909 	list_destroy(&so_conn->ic_tx_list);
910 	mutex_destroy(&so_conn->ic_tx_mutex);
911 	cv_destroy(&so_conn->ic_tx_cv);
912 
913 	kmem_free(so_conn, sizeof (idm_so_conn_t));
914 }
915 
916 static void
917 idm_so_conn_connect_common(idm_conn_t *ic)
918 {
919 	idm_so_conn_t	*so_conn;
920 
921 	so_conn = ic->ic_transport_private;
922 
923 	SOP_GETSOCKNAME(so_conn->ic_so);
924 
925 	/* Set the local and remote addresses in the idm conn handle */
926 	mutex_enter(&so_conn->ic_so->so_lock);
927 	bcopy(so_conn->ic_so->so_laddr_sa, &ic->ic_laddr,
928 	    so_conn->ic_so->so_laddr_len);
929 	bcopy(so_conn->ic_so->so_faddr_sa, &ic->ic_raddr,
930 	    so_conn->ic_so->so_faddr_len);
931 	mutex_exit(&so_conn->ic_so->so_lock);
932 
933 	mutex_enter(&ic->ic_mutex);
934 	so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
935 	    &p0, TS_RUN, minclsyspri);
936 	so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
937 	    &p0, TS_RUN, minclsyspri);
938 
939 	while (!so_conn->ic_rx_thread_running || !so_conn->ic_tx_thread_running)
940 		cv_wait(&ic->ic_cv, &ic->ic_mutex);
941 	mutex_exit(&ic->ic_mutex);
942 }
943 
944 /*
945  * idm_so_conn_disconnect()
946  * Shutdown the socket connection and stop the thread
947  */
948 static void
949 idm_so_conn_disconnect(idm_conn_t *ic)
950 {
951 	idm_so_conn_t	*so_conn;
952 
953 	so_conn = ic->ic_transport_private;
954 
955 	mutex_enter(&ic->ic_mutex);
956 	so_conn->ic_rx_thread_running = B_FALSE;
957 	so_conn->ic_tx_thread_running = B_FALSE;
958 	/* We need to wakeup the TX thread */
959 	mutex_enter(&so_conn->ic_tx_mutex);
960 	cv_signal(&so_conn->ic_tx_cv);
961 	mutex_exit(&so_conn->ic_tx_mutex);
962 	mutex_exit(&ic->ic_mutex);
963 
964 	/* This should wakeup the RX thread if it is sleeping */
965 	idm_soshutdown(so_conn->ic_so);
966 
967 	thread_join(so_conn->ic_tx_thread_did);
968 	thread_join(so_conn->ic_rx_thread_did);
969 }
970 
971 /*
972  * idm_so_tgt_svc_create()
973  * Establish a service on an IP address and port.  idm_svc_req_t contains
974  * the service parameters.
975  */
976 /*ARGSUSED*/
977 static idm_status_t
978 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
979 {
980 	idm_so_svc_t		*so_svc;
981 
982 	so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
983 
984 	/* Set the new sockets service in svc handle */
985 	is->is_so_svc = (void *)so_svc;
986 
987 	return (IDM_STATUS_SUCCESS);
988 }
989 
990 /*
991  * idm_so_tgt_svc_destroy()
992  * Teardown sockets resources allocated in idm_so_tgt_svc_create()
993  */
994 static void
995 idm_so_tgt_svc_destroy(idm_svc_t *is)
996 {
997 	/* the socket will have been torn down; free the service */
998 	kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
999 }
1000 
1001 /*
1002  * idm_so_tgt_svc_online()
1003  * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
1004  */
1005 
1006 static idm_status_t
1007 idm_so_tgt_svc_online(idm_svc_t *is)
1008 {
1009 	idm_so_svc_t		*so_svc;
1010 	idm_svc_req_t		*sr = &is->is_svc_req;
1011 	struct sockaddr_in6	sin6_ip;
1012 	const uint32_t		on = 1;
1013 	const uint32_t		off = 0;
1014 
1015 	mutex_enter(&is->is_mutex);
1016 	so_svc = (idm_so_svc_t *)is->is_so_svc;
1017 
1018 	/*
1019 	 * Try creating an IPv6 socket first
1020 	 */
1021 	if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
1022 		mutex_exit(&is->is_mutex);
1023 		return (IDM_STATUS_FAIL);
1024 	} else {
1025 		bzero(&sin6_ip, sizeof (sin6_ip));
1026 		sin6_ip.sin6_family = AF_INET6;
1027 		sin6_ip.sin6_port = htons(sr->sr_port);
1028 		sin6_ip.sin6_addr = in6addr_any;
1029 
1030 		(void) sosetsockopt(so_svc->is_so, SOL_SOCKET, SO_REUSEADDR,
1031 		    (char *)&on, sizeof (on));
1032 		/*
1033 		 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1034 		 */
1035 		(void) sosetsockopt(so_svc->is_so, SOL_SOCKET, SO_MAC_EXEMPT,
1036 		    (char *)&off, sizeof (off));
1037 
1038 		if (sobind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
1039 		    sizeof (sin6_ip), 0, 0) != 0) {
1040 			mutex_exit(&is->is_mutex);
1041 			idm_sodestroy(so_svc->is_so);
1042 			return (IDM_STATUS_FAIL);
1043 		}
1044 	}
1045 
1046 	idm_set_tgt_connect_options(so_svc->is_so);
1047 
1048 	if (solisten(so_svc->is_so, 5) != 0) {
1049 		mutex_exit(&is->is_mutex);
1050 		idm_soshutdown(so_svc->is_so);
1051 		idm_sodestroy(so_svc->is_so);
1052 		return (IDM_STATUS_FAIL);
1053 	}
1054 
1055 	/* Launch a watch thread */
1056 	so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
1057 	    is, 0, &p0, TS_RUN, minclsyspri);
1058 
1059 	if (so_svc->is_thread == NULL) {
1060 		/* Failure to launch; teardown the socket */
1061 		mutex_exit(&is->is_mutex);
1062 		idm_soshutdown(so_svc->is_so);
1063 		idm_sodestroy(so_svc->is_so);
1064 		return (IDM_STATUS_FAIL);
1065 	}
1066 
1067 	/* Wait for the port watcher thread to start */
1068 	while (!so_svc->is_thread_running)
1069 		cv_wait(&is->is_cv, &is->is_mutex);
1070 	mutex_exit(&is->is_mutex);
1071 
1072 	return (IDM_STATUS_SUCCESS);
1073 }
1074 
1075 /*
1076  * idm_so_tgt_svc_offline
1077  *
1078  * Stop listening on the IP address and port identified by idm_svc_t.
1079  */
1080 static void
1081 idm_so_tgt_svc_offline(idm_svc_t *is)
1082 {
1083 	idm_so_svc_t		*so_svc;
1084 
1085 	mutex_enter(&is->is_mutex);
1086 	so_svc = (idm_so_svc_t *)is->is_so_svc;
1087 	so_svc->is_thread_running = B_FALSE;
1088 	mutex_exit(&is->is_mutex);
1089 
1090 	/*
1091 	 * When called from the kernel, soaccept blocks and cannot be woken
1092 	 * up via the sockfs API.  soclose does not work like you would
1093 	 * hope.  When the Volo project is available we can switch to that
1094 	 * API which should address this issue.  For now, we will poke at
1095 	 * the socket to wake it up.
1096 	 */
1097 	mutex_enter(&so_svc->is_so->so_lock);
1098 	so_svc->is_so->so_error = EINTR;
1099 	cv_signal(&so_svc->is_so->so_connind_cv);
1100 	mutex_exit(&so_svc->is_so->so_lock);
1101 
1102 	/*
1103 	 * Now we expect the port watcher thread to terminate
1104 	 */
1105 	thread_join(so_svc->is_thread_did);
1106 
1107 	/*
1108 	 * Teardown socket
1109 	 */
1110 	idm_sodestroy(so_svc->is_so);
1111 }
1112 
1113 /*
1114  * Watch thread for target service connection establishment.
1115  */
1116 void
1117 idm_so_svc_port_watcher(void *arg)
1118 {
1119 	idm_svc_t		*svc = arg;
1120 	struct sonode		*new_so;
1121 	idm_conn_t		*ic;
1122 	idm_status_t		idmrc;
1123 	idm_so_svc_t		*so_svc;
1124 	int			rc;
1125 	const uint32_t		off = 0;
1126 
1127 	mutex_enter(&svc->is_mutex);
1128 
1129 	so_svc = svc->is_so_svc;
1130 	so_svc->is_thread_running = B_TRUE;
1131 	so_svc->is_thread_did = so_svc->is_thread->t_did;
1132 
1133 	cv_signal(&svc->is_cv);
1134 
1135 	IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
1136 	    svc->is_svc_req.sr_port);
1137 
1138 	while (so_svc->is_thread_running) {
1139 		mutex_exit(&svc->is_mutex);
1140 
1141 		if ((rc = soaccept(so_svc->is_so, 0, &new_so)) != 0) {
1142 			mutex_enter(&svc->is_mutex);
1143 			if (rc == ECONNABORTED)
1144 				continue;
1145 			/* Connection problem */
1146 			break;
1147 		}
1148 		/*
1149 		 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1150 		 */
1151 		(void) sosetsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
1152 		    (char *)&off, sizeof (off));
1153 
1154 		idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
1155 		    &ic);
1156 		if (idmrc != IDM_STATUS_SUCCESS) {
1157 			/* Drop connection */
1158 			idm_soshutdown(new_so);
1159 			idm_sodestroy(new_so);
1160 			mutex_enter(&svc->is_mutex);
1161 			continue;
1162 		}
1163 
1164 		idmrc = idm_so_tgt_conn_create(ic, new_so);
1165 		if (idmrc != IDM_STATUS_SUCCESS) {
1166 			idm_svc_conn_destroy(ic);
1167 			idm_soshutdown(new_so);
1168 			idm_sodestroy(new_so);
1169 			mutex_enter(&svc->is_mutex);
1170 			continue;
1171 		}
1172 
1173 		/*
1174 		 * Kick the state machine.  At CS_S3_XPT_UP the state machine
1175 		 * will notify the client (target) about the new connection.
1176 		 */
1177 		idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
1178 
1179 		mutex_enter(&svc->is_mutex);
1180 	}
1181 
1182 	so_svc->is_thread_running = B_FALSE;
1183 	mutex_exit(&svc->is_mutex);
1184 
1185 	IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
1186 	    svc->is_svc_req.sr_port);
1187 
1188 	thread_exit();
1189 }
1190 
1191 /*
1192  * idm_so_free_task_rsrc() stops any ongoing processing of the task and
1193  * frees resources associated with the task.
1194  *
1195  * It's not clear that this should return idm_status_t.  What do we do
1196  * if it fails?
1197  */
1198 static idm_status_t
1199 idm_so_free_task_rsrc(idm_task_t *idt)
1200 {
1201 	idm_buf_t	*idb;
1202 
1203 	/*
1204 	 * If this is a target connection, call idm_buf_rx_from_ini_done for
1205 	 * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
1206 	 *
1207 	 * In addition, remove any buffers associated with this task from
1208 	 * the ic_tx_list.  We'll do this by walking the idt_inbufv list, but
1209 	 * items don't actually get removed from that list (and completion
1210 	 * routines called) until idm_task_cleanup.
1211 	 */
1212 	mutex_enter(&idt->idt_mutex);
1213 
1214 	for (idb = list_head(&idt->idt_outbufv); idb != NULL;
1215 	    idb = list_next(&idt->idt_outbufv, idb)) {
1216 		if (idb->idb_in_transport) {
1217 			/*
1218 			 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1219 			 */
1220 			idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1221 			mutex_enter(&idt->idt_mutex);
1222 		}
1223 	}
1224 
1225 	for (idb = list_head(&idt->idt_inbufv); idb != NULL;
1226 	    idb = list_next(&idt->idt_inbufv, idb)) {
1227 		/*
1228 		 * We want to remove these items from the tx_list as well,
1229 		 * but knowing it's in the idt_inbufv list is not a guarantee
1230 		 * that it's in the tx_list.  If it's on the tx list then
1231 		 * let idm_sotx_thread() clean it up.
1232 		 */
1233 		if (idb->idb_in_transport && !idb->idb_tx_thread) {
1234 			/*
1235 			 * idm_buf_tx_to_ini_done releases idt->idt_mutex
1236 			 */
1237 			idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
1238 			mutex_enter(&idt->idt_mutex);
1239 		}
1240 	}
1241 
1242 	mutex_exit(&idt->idt_mutex);
1243 
1244 	return (IDM_STATUS_SUCCESS);
1245 }
1246 
1247 /*
1248  * idm_so_negotiate_key_values() validates the key values for this connection
1249  */
1250 /* ARGSUSED */
1251 static kv_status_t
1252 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
1253     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
1254 {
1255 	/* All parameters are negotiated at the iscsit level */
1256 	return (KV_HANDLED);
1257 }
1258 
1259 /*
1260  * idm_so_notice_key_values() activates the negotiated key values for
1261  * this connection.
1262  */
1263 static idm_status_t
1264 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
1265 {
1266 	char			*nvp_name;
1267 	nvpair_t		*nvp;
1268 	nvpair_t		*next_nvp;
1269 	int			nvrc;
1270 	idm_status_t		idm_status;
1271 	const idm_kv_xlate_t	*ikvx;
1272 
1273 	for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
1274 	    nvp != NULL; nvp = next_nvp) {
1275 		next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
1276 		nvp_name = nvpair_name(nvp);
1277 
1278 		ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1279 		switch (ikvx->ik_key_id) {
1280 		case KI_HEADER_DIGEST:
1281 		case KI_DATA_DIGEST:
1282 			idm_status = idm_so_handle_digest(it, nvp, ikvx);
1283 			ASSERT(idm_status == 0);
1284 
1285 			/* Remove processed item from negotiated_nvl list */
1286 			nvrc = nvlist_remove_all(
1287 			    negotiated_nvl, ikvx->ik_key_name);
1288 			ASSERT(nvrc == 0);
1289 			break;
1290 		default:
1291 			break;
1292 		}
1293 	}
1294 	return (IDM_STATUS_SUCCESS);
1295 }
1296 
1297 
1298 static idm_status_t
1299 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
1300     const idm_kv_xlate_t *ikvx)
1301 {
1302 	int			nvrc;
1303 	char			*digest_choice_string;
1304 
1305 	nvrc = nvpair_value_string(digest_choice,
1306 	    &digest_choice_string);
1307 	ASSERT(nvrc == 0);
1308 	if (strcasecmp(digest_choice_string, "crc32c") == 0) {
1309 		switch (ikvx->ik_key_id) {
1310 		case KI_HEADER_DIGEST:
1311 			it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
1312 			break;
1313 		case KI_DATA_DIGEST:
1314 			it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
1315 			break;
1316 		default:
1317 			ASSERT(0);
1318 			break;
1319 		}
1320 	} else if (strcasecmp(digest_choice_string, "none") == 0) {
1321 		switch (ikvx->ik_key_id) {
1322 		case KI_HEADER_DIGEST:
1323 			it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
1324 			break;
1325 		case KI_DATA_DIGEST:
1326 			it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
1327 			break;
1328 		default:
1329 			ASSERT(0);
1330 			break;
1331 		}
1332 	} else {
1333 		ASSERT(0);
1334 	}
1335 
1336 	return (IDM_STATUS_SUCCESS);
1337 }
1338 
1339 
1340 /*
1341  * idm_so_conn_is_capable() verifies that the passed connection is provided
1342  * for by the sockets interface.
1343  */
1344 /* ARGSUSED */
1345 static boolean_t
1346 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
1347 {
1348 	return (B_TRUE);
1349 }
1350 
1351 /*
1352  * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
1353  * idm_sorecv_scsidata() function invoked earlier actually reads the data
1354  * off the socket into the appropriate buffers.
1355  */
1356 static void
1357 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
1358 {
1359 	iscsi_data_hdr_t	*bhs;
1360 	idm_task_t		*idt;
1361 	idm_buf_t		*idb;
1362 	uint32_t		datasn;
1363 	size_t			offset;
1364 	iscsi_hdr_t		*ihp = (iscsi_hdr_t *)pdu->isp_hdr;
1365 	iscsi_data_rsp_hdr_t    *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
1366 
1367 	ASSERT(ic != NULL);
1368 	ASSERT(pdu != NULL);
1369 
1370 	bhs	= (iscsi_data_hdr_t *)pdu->isp_hdr;
1371 	datasn	= ntohl(bhs->datasn);
1372 	offset	= ntohl(bhs->offset);
1373 
1374 	ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA_RSP);
1375 
1376 	/*
1377 	 * Look up the task corresponding to the initiator task tag
1378 	 * to get the buffers affiliated with the task.
1379 	 */
1380 	idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1381 	if (idt == NULL) {
1382 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
1383 		idm_pdu_rx_protocol_error(ic, pdu);
1384 		return;
1385 	}
1386 
1387 	idb = pdu->isp_sorx_buf;
1388 	if (idb == NULL) {
1389 		IDM_CONN_LOG(CE_WARN,
1390 		    "idm_so_rx_datain: failed to find buffer");
1391 		idm_task_rele(idt);
1392 		idm_pdu_rx_protocol_error(ic, pdu);
1393 		return;
1394 	}
1395 
1396 	/*
1397 	 * DataSN values should be sequential and should not have any gaps or
1398 	 * repetitions. Check the DataSN with the one stored in the task.
1399 	 */
1400 	if (datasn == idt->idt_exp_datasn) {
1401 		idt->idt_exp_datasn++; /* keep track of DataSN received */
1402 	} else {
1403 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
1404 		idm_task_rele(idt);
1405 		idm_pdu_rx_protocol_error(ic, pdu);
1406 		return;
1407 	}
1408 	idm_task_rele(idt);
1409 
1410 	/*
1411 	 * PDUs in a sequence should be in continuously increasing
1412 	 * address offset
1413 	 */
1414 	if (offset != idb->idb_exp_offset) {
1415 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
1416 		idm_pdu_rx_protocol_error(ic, pdu);
1417 		return;
1418 	}
1419 	/* Expected next relative buffer offset */
1420 	idb->idb_exp_offset += n2h24(bhs->dlength);
1421 
1422 	/*
1423 	 * For now call scsi_rsp which will process the data rsp
1424 	 * Revisit, need to provide an explicit client entry point for
1425 	 * phase collapse completions.
1426 	 */
1427 	if (((ihp->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_SCSI_DATA_RSP) &&
1428 	    (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
1429 		(*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
1430 	}
1431 
1432 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1433 }
1434 
1435 /*
1436  * The idm_so_rx_dataout() function is used by the iSCSI target to read
1437  * data from the Data-Out PDU sent by the iSCSI initiator.
1438  *
1439  * This function gets the Initiator Task Tag from the PDU BHS and looks up the
1440  * task to get the buffers associated with the PDU. A PDU might span buffers.
1441  * The data is then read into the respective buffer.
1442  */
1443 static void
1444 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
1445 {
1446 
1447 	iscsi_data_hdr_t	*bhs;
1448 	idm_task_t		*idt;
1449 	idm_buf_t		*idb;
1450 	size_t			offset;
1451 
1452 	ASSERT(ic != NULL);
1453 	ASSERT(pdu != NULL);
1454 
1455 	bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1456 	offset = ntohl(bhs->offset);
1457 	ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA);
1458 
1459 	/*
1460 	 * Look up the task corresponding to the initiator task tag
1461 	 * to get the buffers affiliated with the task.
1462 	 */
1463 	idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1464 	if (idt == NULL) {
1465 		IDM_CONN_LOG(CE_WARN,
1466 		    "idm_so_rx_dataout: failed to find task");
1467 		idm_pdu_rx_protocol_error(ic, pdu);
1468 		return;
1469 	}
1470 
1471 	idb = pdu->isp_sorx_buf;
1472 	if (idb == NULL) {
1473 		IDM_CONN_LOG(CE_WARN,
1474 		    "idm_so_rx_dataout: failed to find buffer");
1475 		idm_task_rele(idt);
1476 		idm_pdu_rx_protocol_error(ic, pdu);
1477 		return;
1478 	}
1479 
1480 	/* Keep track of data transferred - check data offsets */
1481 	if (offset != idb->idb_exp_offset) {
1482 		IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
1483 		    "%ld, %d", offset, idb->idb_exp_offset);
1484 		idm_task_rele(idt);
1485 		idm_pdu_rx_protocol_error(ic, pdu);
1486 		return;
1487 	}
1488 	/* Expected next relative offset */
1489 	idb->idb_exp_offset += ntoh24(bhs->dlength);
1490 
1491 	/*
1492 	 * Call the buffer callback when the transfer is complete
1493 	 *
1494 	 * The connection state machine should only abort tasks after
1495 	 * shutting down the connection so we are assured that there
1496 	 * won't be a simultaneous attempt to abort this task at the
1497 	 * same time as we are processing this PDU (due to a connection
1498 	 * state change).
1499 	 */
1500 	if (bhs->flags & ISCSI_FLAG_FINAL) {
1501 		/*
1502 		 * We only want to call idm_buf_rx_from_ini_done once
1503 		 * per transfer.  It's possible that this task has
1504 		 * already been aborted in which case
1505 		 * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
1506 		 * for each buffer with idb_in_transport==B_TRUE.  To
1507 		 * close this window and ensure that this doesn't happen,
1508 		 * we'll clear idb->idb_in_transport now while holding
1509 		 * the task mutex.   This is only really an issue for
1510 		 * SCSI task abort -- if tasks were being aborted because
1511 		 * of a connection state change the state machine would
1512 		 * have already stopped the receive thread.
1513 		 */
1514 		mutex_enter(&idt->idt_mutex);
1515 
1516 		/*
1517 		 * Release the task hold here (obtained in idm_task_find)
1518 		 * because the task may complete synchronously during
1519 		 * idm_buf_rx_from_ini_done.  Since we still have an active
1520 		 * buffer we know there is at least one additional hold on idt.
1521 		 */
1522 		idm_task_rele(idt);
1523 
1524 		/*
1525 		 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1526 		 */
1527 		idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
1528 		idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1529 		return;
1530 	}
1531 
1532 	idm_task_rele(idt);
1533 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1534 }
1535 
1536 /*
1537  * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
1538  * the R2T PDU sent by the iSCSI target indicating that it is ready to
1539  * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
1540  * and looks up the task in the task tree using the itt to get the output
1541  * buffers associated the task. The R2T PDU contains the offset of the
1542  * requested data and the data length. This function then constructs a
1543  * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
1544  * PDU is associated with the R2T by the Target Transfer Tag  (ttt).
1545  */
1546 static void
1547 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
1548 {
1549 	idm_task_t		*idt;
1550 	idm_buf_t		*idb;
1551 	iscsi_rtt_hdr_t		*rtt_hdr;
1552 	uint32_t		data_offset;
1553 
1554 	ASSERT(ic != NULL);
1555 	ASSERT(pdu != NULL);
1556 
1557 	rtt_hdr	= (iscsi_rtt_hdr_t *)pdu->isp_hdr;
1558 	data_offset = ntohl(rtt_hdr->data_offset);
1559 
1560 	idt	= idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
1561 
1562 	if (idt == NULL) {
1563 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
1564 		idm_pdu_rx_protocol_error(ic, pdu);
1565 		return;
1566 	}
1567 
1568 	/* Find the buffer bound to the task by the iSCSI initiator */
1569 	mutex_enter(&idt->idt_mutex);
1570 	idb = idm_buf_find(&idt->idt_outbufv, data_offset);
1571 	idt->idt_r2t_ttt = rtt_hdr->ttt;
1572 	/* reset to zero */
1573 	idt->idt_exp_datasn = 0;
1574 	if (idb == NULL) {
1575 		mutex_exit(&idt->idt_mutex);
1576 		idm_task_rele(idt);
1577 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
1578 		idm_pdu_rx_protocol_error(ic, pdu);
1579 		return;
1580 	}
1581 
1582 	(void) idm_so_send_buf_region(idt, ISCSI_OP_SCSI_DATA, idb,
1583 	    data_offset, ntohl(rtt_hdr->data_length));
1584 	mutex_exit(&idt->idt_mutex);
1585 
1586 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1587 	idm_task_rele(idt);
1588 
1589 }
1590 
1591 idm_status_t
1592 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
1593 {
1594 	uint8_t		pad[ISCSI_PAD_WORD_LEN];
1595 	int		pad_len;
1596 	uint32_t	data_digest_crc;
1597 	uint32_t	crc_calculated;
1598 	int		total_len;
1599 	idm_so_conn_t	*so_conn;
1600 
1601 	so_conn = ic->ic_transport_private;
1602 
1603 	pad_len = ((ISCSI_PAD_WORD_LEN -
1604 	    (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
1605 	    (ISCSI_PAD_WORD_LEN - 1));
1606 
1607 	ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
1608 
1609 	total_len = pdu->isp_datalen;
1610 
1611 	if (pad_len) {
1612 		pdu->isp_iov[pdu->isp_iovlen].iov_base	= (char *)&pad;
1613 		pdu->isp_iov[pdu->isp_iovlen].iov_len	= pad_len;
1614 		total_len		+= pad_len;
1615 		pdu->isp_iovlen++;
1616 	}
1617 
1618 	/* setup data digest */
1619 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1620 		pdu->isp_iov[pdu->isp_iovlen].iov_base =
1621 		    (char *)&data_digest_crc;
1622 		pdu->isp_iov[pdu->isp_iovlen].iov_len =
1623 		    sizeof (data_digest_crc);
1624 		total_len		+= sizeof (data_digest_crc);
1625 		pdu->isp_iovlen++;
1626 	}
1627 
1628 	if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
1629 	    pdu->isp_iovlen, total_len) != 0) {
1630 		return (IDM_STATUS_IO);
1631 	}
1632 
1633 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1634 		crc_calculated = idm_crc32c(pdu->isp_data,
1635 		    pdu->isp_datalen);
1636 		if (pad_len) {
1637 			crc_calculated = idm_crc32c_continued((char *)&pad,
1638 			    pad_len, crc_calculated);
1639 		}
1640 		if (crc_calculated != data_digest_crc) {
1641 			IDM_CONN_LOG(CE_WARN,
1642 			    "idm_sorecvdata: "
1643 			    "CRC error: actual 0x%x, calc 0x%x",
1644 			    data_digest_crc, crc_calculated);
1645 
1646 			/* Invalid Data Digest */
1647 			return (IDM_STATUS_DATA_DIGEST);
1648 		}
1649 	}
1650 
1651 	return (IDM_STATUS_SUCCESS);
1652 }
1653 
1654 /*
1655  * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
1656  * Data-type PDU header must be read into the idm_pdu_t structure prior to
1657  * calling this function.
1658  */
1659 idm_status_t
1660 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1661 {
1662 	iscsi_data_hdr_t	*bhs;
1663 	idm_task_t		*task;
1664 	uint32_t		offset;
1665 	uint8_t			opcode;
1666 	uint32_t		dlength;
1667 	list_t			*buflst;
1668 	uint32_t		xfer_bytes;
1669 	idm_status_t		status;
1670 
1671 	ASSERT(ic != NULL);
1672 	ASSERT(pdu != NULL);
1673 
1674 	bhs	= (iscsi_data_hdr_t *)pdu->isp_hdr;
1675 
1676 	offset	= ntohl(bhs->offset);
1677 	opcode	= bhs->opcode;
1678 	dlength = n2h24(bhs->dlength);
1679 
1680 	ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
1681 	    (opcode == ISCSI_OP_SCSI_DATA));
1682 
1683 	/*
1684 	 * Successful lookup implicitly gets a "hold" on the task.  This
1685 	 * hold must be released before leaving this function.  At one
1686 	 * point we were caching this task context and retaining the hold
1687 	 * but it turned out to be very difficult to release the hold properly.
1688 	 * The task can be aborted and the connection shutdown between this
1689 	 * call and the subsequent expected call to idm_so_rx_datain/
1690 	 * idm_so_rx_dataout (in which case those functions are not called).
1691 	 * Releasing the hold in the PDU callback doesn't work well either
1692 	 * because the whole task may be completed by then at which point
1693 	 * it is too late to release the hold -- for better or worse this
1694 	 * code doesn't wait on the refcnts during normal operation.
1695 	 * idm_task_find() is very fast and it is not a huge burden if we
1696 	 * have to do it twice.
1697 	 */
1698 	task = idm_task_find(ic, bhs->itt, bhs->ttt);
1699 	if (task == NULL) {
1700 		IDM_CONN_LOG(CE_WARN,
1701 		    "idm_sorecv_scsidata: could not find task");
1702 		return (IDM_STATUS_FAIL);
1703 	}
1704 
1705 	mutex_enter(&task->idt_mutex);
1706 	buflst	= (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
1707 	    &task->idt_inbufv : &task->idt_outbufv;
1708 	pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
1709 	mutex_exit(&task->idt_mutex);
1710 
1711 	if (pdu->isp_sorx_buf == NULL) {
1712 		idm_task_rele(task);
1713 		IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
1714 		    "buffer for offset %x opcode=%x",
1715 		    offset, opcode);
1716 		return (IDM_STATUS_FAIL);
1717 	}
1718 
1719 	xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
1720 	ASSERT(xfer_bytes != 0);
1721 	if (xfer_bytes != dlength) {
1722 		idm_task_rele(task);
1723 		/*
1724 		 * Buffer overflow, connection error.  The PDU data is still
1725 		 * sitting in the socket so we can't use the connection
1726 		 * again until that data is drained.
1727 		 */
1728 		return (IDM_STATUS_FAIL);
1729 	}
1730 
1731 	status = idm_sorecvdata(ic, pdu);
1732 
1733 	idm_task_rele(task);
1734 
1735 	return (status);
1736 }
1737 
1738 static uint32_t
1739 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
1740 {
1741 	uint32_t	buf_ro = ro - idb->idb_bufoffset;
1742 	uint32_t	xfer_len = min(dlength, idb->idb_buflen - buf_ro);
1743 
1744 	ASSERT(ro >= idb->idb_bufoffset);
1745 
1746 	pdu->isp_iov[pdu->isp_iovlen].iov_base	=
1747 	    (caddr_t)idb->idb_buf + buf_ro;
1748 	pdu->isp_iov[pdu->isp_iovlen].iov_len	= xfer_len;
1749 	pdu->isp_iovlen++;
1750 
1751 	return (xfer_len);
1752 }
1753 
1754 int
1755 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1756 {
1757 	pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
1758 	ASSERT(pdu->isp_data != NULL);
1759 
1760 	pdu->isp_databuflen = pdu->isp_datalen;
1761 	pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
1762 	pdu->isp_iov[0].iov_len = pdu->isp_datalen;
1763 	pdu->isp_iovlen = 1;
1764 	/*
1765 	 * Since we are associating a new data buffer with this received
1766 	 * PDU we need to set a specific callback to free the data
1767 	 * after the PDU is processed.
1768 	 */
1769 	pdu->isp_flags |= IDM_PDU_ADDL_DATA;
1770 	pdu->isp_callback = idm_sorx_addl_pdu_cb;
1771 
1772 	return (idm_sorecvdata(ic, pdu));
1773 }
1774 
1775 void
1776 idm_sorx_thread(void *arg)
1777 {
1778 	boolean_t	conn_failure = B_FALSE;
1779 	idm_conn_t	*ic = (idm_conn_t *)arg;
1780 	idm_so_conn_t	*so_conn;
1781 	idm_pdu_t	*pdu;
1782 	idm_status_t	rc;
1783 
1784 	idm_conn_hold(ic);
1785 
1786 	mutex_enter(&ic->ic_mutex);
1787 
1788 	so_conn = ic->ic_transport_private;
1789 	so_conn->ic_rx_thread_running = B_TRUE;
1790 	so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
1791 	cv_signal(&ic->ic_cv);
1792 
1793 	while (so_conn->ic_rx_thread_running) {
1794 		mutex_exit(&ic->ic_mutex);
1795 
1796 		/*
1797 		 * Get PDU with default header size (large enough for
1798 		 * BHS plus any anticipated AHS).  PDU from
1799 		 * the cache will have all values set correctly
1800 		 * for sockets RX including callback.
1801 		 */
1802 		pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
1803 		pdu->isp_ic = ic;
1804 		pdu->isp_flags = 0;
1805 		pdu->isp_transport_hdrlen = 0;
1806 
1807 		if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
1808 			/*
1809 			 * Call idm_pdu_complete so that we call the callback
1810 			 * and ensure any memory allocated in idm_sorecvhdr
1811 			 * gets freed up.
1812 			 */
1813 			idm_pdu_complete(pdu, IDM_STATUS_FAIL);
1814 
1815 			/*
1816 			 * If ic_rx_thread_running is still set then
1817 			 * this is some kind of connection problem
1818 			 * on the socket.  In this case we want to
1819 			 * generate an event.  Otherwise some other
1820 			 * thread closed the socket due to another
1821 			 * issue in which case we don't need to
1822 			 * generate an event.
1823 			 */
1824 			mutex_enter(&ic->ic_mutex);
1825 			if (so_conn->ic_rx_thread_running) {
1826 				conn_failure = B_TRUE;
1827 				so_conn->ic_rx_thread_running = B_FALSE;
1828 			}
1829 
1830 			continue;
1831 		}
1832 
1833 		/*
1834 		 * Header has been read and validated.  Now we need
1835 		 * to read the PDU data payload (if present).  SCSI data
1836 		 * need to be transferred from the socket directly into
1837 		 * the associated transfer buffer for the SCSI task.
1838 		 */
1839 		if (pdu->isp_datalen != 0) {
1840 			if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
1841 			    (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
1842 				rc = idm_sorecv_scsidata(ic, pdu);
1843 				/*
1844 				 * All SCSI errors are fatal to the
1845 				 * connection right now since we have no
1846 				 * place to put the data.  What we need
1847 				 * is some kind of sink to dispose of unwanted
1848 				 * SCSI data.  For example an invalid task tag
1849 				 * should not kill the connection (although
1850 				 * we may want to drop the connection).
1851 				 */
1852 			} else {
1853 				/*
1854 				 * Not data PDUs so allocate a buffer for the
1855 				 * data segment and read the remaining data.
1856 				 */
1857 				rc = idm_sorecv_nonscsidata(ic, pdu);
1858 			}
1859 			if (rc != 0) {
1860 				/*
1861 				 * Call idm_pdu_complete so that we call the
1862 				 * callback and ensure any memory allocated
1863 				 * in idm_sorecvhdr gets freed up.
1864 				 */
1865 				idm_pdu_complete(pdu, IDM_STATUS_FAIL);
1866 
1867 				/*
1868 				 * If ic_rx_thread_running is still set then
1869 				 * this is some kind of connection problem
1870 				 * on the socket.  In this case we want to
1871 				 * generate an event.  Otherwise some other
1872 				 * thread closed the socket due to another
1873 				 * issue in which case we don't need to
1874 				 * generate an event.
1875 				 */
1876 				mutex_enter(&ic->ic_mutex);
1877 				if (so_conn->ic_rx_thread_running) {
1878 					conn_failure = B_TRUE;
1879 					so_conn->ic_rx_thread_running = B_FALSE;
1880 				}
1881 				continue;
1882 			}
1883 		}
1884 
1885 		/*
1886 		 * Process RX PDU
1887 		 */
1888 		idm_pdu_rx(ic, pdu);
1889 
1890 		mutex_enter(&ic->ic_mutex);
1891 	}
1892 
1893 	mutex_exit(&ic->ic_mutex);
1894 
1895 	/*
1896 	 * If we dropped out of the RX processing loop because of
1897 	 * a socket problem or other connection failure (including
1898 	 * digest errors) then we need to generate a state machine
1899 	 * event to shut the connection down.
1900 	 * If the state machine is already in, for example, INIT_ERROR, this
1901 	 * event will get dropped, and the TX thread will never be notified
1902 	 * to shut down.  To be safe, we'll just notify it here.
1903 	 */
1904 	if (conn_failure) {
1905 		if (so_conn->ic_tx_thread_running) {
1906 			so_conn->ic_tx_thread_running = B_FALSE;
1907 			mutex_enter(&so_conn->ic_tx_mutex);
1908 			cv_signal(&so_conn->ic_tx_cv);
1909 			mutex_exit(&so_conn->ic_tx_mutex);
1910 		}
1911 
1912 		idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
1913 	}
1914 
1915 	idm_conn_rele(ic);
1916 
1917 	thread_exit();
1918 }
1919 
1920 /*
1921  * idm_so_tx
1922  *
1923  * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
1924  * point.  By definition, it is supposed to be fast.  So, simply queue
1925  * the entry and return.  The real work is done by idm_i_so_tx() via
1926  * idm_sotx_thread().
1927  */
1928 
1929 static void
1930 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
1931 {
1932 	idm_so_conn_t *so_conn = ic->ic_transport_private;
1933 
1934 	ASSERT(pdu->isp_ic == ic);
1935 	mutex_enter(&so_conn->ic_tx_mutex);
1936 
1937 	if (!so_conn->ic_tx_thread_running) {
1938 		mutex_exit(&so_conn->ic_tx_mutex);
1939 		idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
1940 		return;
1941 	}
1942 
1943 	list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
1944 	cv_signal(&so_conn->ic_tx_cv);
1945 	mutex_exit(&so_conn->ic_tx_mutex);
1946 }
1947 
1948 static idm_status_t
1949 idm_i_so_tx(idm_pdu_t *pdu)
1950 {
1951 	idm_conn_t	*ic = pdu->isp_ic;
1952 	idm_status_t	status = IDM_STATUS_SUCCESS;
1953 	uint8_t		pad[ISCSI_PAD_WORD_LEN];
1954 	int		pad_len;
1955 	uint32_t	hdr_digest_crc;
1956 	uint32_t	data_digest_crc = 0;
1957 	int		total_len = 0;
1958 	int		iovlen = 0;
1959 	struct iovec	iov[6];
1960 	idm_so_conn_t	*so_conn;
1961 
1962 	so_conn = ic->ic_transport_private;
1963 
1964 	/* Setup BHS */
1965 	iov[iovlen].iov_base	= (caddr_t)pdu->isp_hdr;
1966 	iov[iovlen].iov_len	= pdu->isp_hdrlen;
1967 	total_len		+= iov[iovlen].iov_len;
1968 	iovlen++;
1969 
1970 	/* Setup header digest */
1971 	if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
1972 	    (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
1973 		hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
1974 
1975 		iov[iovlen].iov_base	= (caddr_t)&hdr_digest_crc;
1976 		iov[iovlen].iov_len	= sizeof (hdr_digest_crc);
1977 		total_len		+= iov[iovlen].iov_len;
1978 		iovlen++;
1979 	}
1980 
1981 	/* Setup the data */
1982 	if (pdu->isp_datalen) {
1983 		idm_task_t		*idt;
1984 		idm_buf_t		*idb;
1985 		iscsi_data_hdr_t	*ihp;
1986 		ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
1987 		/* Write of immediate data */
1988 		if (ic->ic_ffp &&
1989 		    (ihp->opcode == ISCSI_OP_SCSI_CMD ||
1990 		    ihp->opcode == ISCSI_OP_SCSI_DATA)) {
1991 			idt = idm_task_find(ic, ihp->itt, ihp->ttt);
1992 			if (idt) {
1993 				mutex_enter(&idt->idt_mutex);
1994 				idb = idm_buf_find(&idt->idt_outbufv, 0);
1995 				mutex_exit(&idt->idt_mutex);
1996 				idb->idb_xfer_len += pdu->isp_datalen;
1997 			}
1998 		}
1999 
2000 		iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
2001 		iov[iovlen].iov_len  = pdu->isp_datalen;
2002 		total_len += iov[iovlen].iov_len;
2003 		iovlen++;
2004 	}
2005 
2006 	/* Setup the data pad if necessary */
2007 	pad_len = ((ISCSI_PAD_WORD_LEN -
2008 	    (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
2009 	    (ISCSI_PAD_WORD_LEN - 1));
2010 
2011 	if (pad_len) {
2012 		bzero(pad, sizeof (pad));
2013 		iov[iovlen].iov_base = (void *)&pad;
2014 		iov[iovlen].iov_len  = pad_len;
2015 		total_len		+= iov[iovlen].iov_len;
2016 		iovlen++;
2017 	}
2018 
2019 	/*
2020 	 * Setup the data digest if enabled.  Data-digest is not sent
2021 	 * for login-phase PDUs.
2022 	 */
2023 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
2024 	    ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2025 	    (pdu->isp_datalen || pad_len)) {
2026 		/*
2027 		 * RFC3720/10.2.3: A zero-length Data Segment also
2028 		 * implies a zero-length data digest.
2029 		 */
2030 		if (pdu->isp_datalen) {
2031 			data_digest_crc = idm_crc32c(pdu->isp_data,
2032 			    pdu->isp_datalen);
2033 		}
2034 		if (pad_len) {
2035 			data_digest_crc = idm_crc32c_continued(&pad,
2036 			    pad_len, data_digest_crc);
2037 		}
2038 
2039 		iov[iovlen].iov_base	= (caddr_t)&data_digest_crc;
2040 		iov[iovlen].iov_len	= sizeof (data_digest_crc);
2041 		total_len		+= iov[iovlen].iov_len;
2042 		iovlen++;
2043 	}
2044 
2045 	/* Transmit the PDU */
2046 	if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
2047 	    total_len) != 0) {
2048 		/* Set error status */
2049 		IDM_CONN_LOG(CE_WARN,
2050 		    "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
2051 		    "data: %p", (void *) so_conn->ic_so, (void *) ic,
2052 		    (void *) pdu->isp_data);
2053 		status = IDM_STATUS_IO;
2054 	}
2055 
2056 	/*
2057 	 * Success does not mean that the PDU actually reached the
2058 	 * remote node since it could get dropped along the way.
2059 	 */
2060 	idm_pdu_complete(pdu, status);
2061 
2062 	return (status);
2063 }
2064 
2065 /*
2066  * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
2067  * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
2068  * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
2069  * A target can invoke this function multiple times for a single read command
2070  * (identified by the same ITT) to split the input into several sequences.
2071  *
2072  * DataSN starts with 0 for the first data PDU of an input command and advances
2073  * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
2074  * which is set to 1 for the last data PDU of a sequence.
2075  *
2076  * Scope for Prototype build:
2077  * The data PDUs within a sequence will be sent in order with the buffer offset
2078  * in increasing order. i.e. initiator and target must have negotiated the
2079  * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
2080  *
2081  * Caller holds idt->idt_mutex
2082  */
2083 static idm_status_t
2084 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
2085 {
2086 	idm_so_conn_t	*so_conn = idb->idb_ic->ic_transport_private;
2087 	idm_pdu_t	tmppdu;
2088 
2089 	ASSERT(mutex_owned(&idt->idt_mutex));
2090 
2091 	/*
2092 	 * Put the idm_buf_t on the tx queue.  It will be transmitted by
2093 	 * idm_sotx_thread.
2094 	 */
2095 	mutex_enter(&so_conn->ic_tx_mutex);
2096 
2097 	if (!so_conn->ic_tx_thread_running) {
2098 		mutex_exit(&so_conn->ic_tx_mutex);
2099 		/*
2100 		 * Don't release idt->idt_mutex since we're supposed to hold
2101 		 * in when calling idm_buf_tx_to_ini_done
2102 		 */
2103 		idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2104 		return (IDM_STATUS_FAIL);
2105 	}
2106 
2107 	/*
2108 	 * Build a template for the data PDU headers we will use so that
2109 	 * the SN values will stay consistent with other PDU's we are
2110 	 * transmitting like R2T and SCSI status.
2111 	 */
2112 	bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2113 	tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
2114 	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2115 	    ISCSI_OP_SCSI_DATA_RSP);
2116 	idb->idb_tx_thread = B_TRUE;
2117 	list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
2118 	cv_signal(&so_conn->ic_tx_cv);
2119 	mutex_exit(&so_conn->ic_tx_mutex);
2120 	mutex_exit(&idt->idt_mutex);
2121 
2122 	/*
2123 	 * Returning success here indicates the transfer was successfully
2124 	 * dispatched -- it does not mean that the transfer completed
2125 	 * successfully.
2126 	 */
2127 	return (IDM_STATUS_SUCCESS);
2128 }
2129 
2130 /*
2131  * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
2132  * data blocks it is ready to receive from the initiator in response to a WRITE
2133  * SCSI command. The target iSCSI layer passes the information about the desired
2134  * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
2135  * offset and datalen are passed via the 'idb' argument.
2136  *
2137  * Scope for Prototype build:
2138  * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
2139  * negotiated the "InitialR2T" to "Yes".
2140  *
2141  * Caller holds idt->idt_mutex
2142  */
2143 static idm_status_t
2144 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
2145 {
2146 	idm_pdu_t		*pdu;
2147 	iscsi_rtt_hdr_t		*rtt;
2148 
2149 	ASSERT(mutex_owned(&idt->idt_mutex));
2150 
2151 	pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2152 	pdu->isp_ic = idt->idt_ic;
2153 	bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
2154 
2155 	/* iSCSI layer fills the TTT, ITT, StatSN, ExpCmdSN, MaxCmdSN */
2156 	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
2157 
2158 	/* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
2159 	rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
2160 
2161 	rtt->opcode		= ISCSI_OP_RTT_RSP;
2162 	rtt->flags		= ISCSI_FLAG_FINAL;
2163 	rtt->data_offset	= htonl(idb->idb_bufoffset);
2164 	rtt->data_length	= htonl(idb->idb_xfer_len);
2165 	rtt->rttsn		= htonl(idt->idt_exp_rttsn++);
2166 
2167 	/* Keep track of buffer offsets */
2168 	idb->idb_exp_offset	= idb->idb_bufoffset;
2169 	mutex_exit(&idt->idt_mutex);
2170 
2171 	/*
2172 	 * Transmit the PDU.  Call the internal routine directly as there
2173 	 * is already implicit ordering of the PDU.
2174 	 */
2175 	(void) idm_i_so_tx(pdu);
2176 
2177 	return (IDM_STATUS_SUCCESS);
2178 }
2179 
2180 static idm_status_t
2181 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
2182 {
2183 	idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
2184 	if (idb->idb_buf == NULL) {
2185 		IDM_CONN_LOG(CE_NOTE,
2186 		    "idm_so_buf_alloc: failed buffer allocation");
2187 		return (IDM_STATUS_FAIL);
2188 	}
2189 	return (IDM_STATUS_SUCCESS);
2190 }
2191 
2192 /* ARGSUSED */
2193 static idm_status_t
2194 idm_so_buf_setup(idm_buf_t *idb)
2195 {
2196 	/* nothing to do here */
2197 	return (IDM_STATUS_SUCCESS);
2198 }
2199 
2200 /* ARGSUSED */
2201 static void
2202 idm_so_buf_teardown(idm_buf_t *idb)
2203 {
2204 	/* nothing to do here */
2205 }
2206 
2207 static void
2208 idm_so_buf_free(idm_buf_t *idb)
2209 {
2210 	kmem_free(idb->idb_buf, idb->idb_buflen);
2211 }
2212 
2213 idm_status_t
2214 idm_so_send_buf_region(idm_task_t *idt, uint8_t opcode, idm_buf_t *idb,
2215     uint32_t buf_region_offset, uint32_t buf_region_length)
2216 {
2217 	idm_conn_t		*ic;
2218 	uint32_t		max_dataseglen;
2219 	size_t			remainder, chunk;
2220 	uint32_t		data_offset = buf_region_offset;
2221 	iscsi_data_hdr_t	*bhs;
2222 	idm_pdu_t		*pdu;
2223 
2224 	ASSERT(mutex_owned(&idt->idt_mutex));
2225 
2226 	ic = idt->idt_ic;
2227 
2228 	max_dataseglen = 8192; /* Need value from login negotiation */
2229 	remainder = buf_region_length;
2230 
2231 	while (remainder) {
2232 		if (idt->idt_state != TASK_ACTIVE) {
2233 			ASSERT((idt->idt_state != TASK_IDLE) &&
2234 			    (idt->idt_state != TASK_COMPLETE));
2235 			return (IDM_STATUS_ABORTED);
2236 		}
2237 
2238 		/* check to see if we need to chunk the data */
2239 		if (remainder > max_dataseglen) {
2240 			chunk = max_dataseglen;
2241 		} else {
2242 			chunk = remainder;
2243 		}
2244 
2245 		/* Data PDU headers will always be sizeof (iscsi_hdr_t) */
2246 		pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2247 		pdu->isp_ic = ic;
2248 
2249 		/*
2250 		 * For target we've already built a build a header template
2251 		 * to use during the transfer.  Use this template so that
2252 		 * the SN values stay consistent with any unrelated PDU's
2253 		 * being transmitted.
2254 		 */
2255 		if (opcode == ISCSI_OP_SCSI_DATA_RSP) {
2256 			bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
2257 			    sizeof (iscsi_hdr_t));
2258 		} else {
2259 			/*
2260 			 * OK for now, but we should remove this bzero and
2261 			 * make sure the build_hdr function is initializing the
2262 			 * header properly
2263 			 */
2264 			bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2265 
2266 			/*
2267 			 * setup iscsi data hdr
2268 			 * callback to the iSCSI layer to fill in the BHS
2269 			 * CmdSN, StatSN, ExpCmdSN, MaxCmdSN, TTT, ITT and
2270 			 * opcode
2271 			 */
2272 			(*ic->ic_conn_ops.icb_build_hdr)(idt, pdu, opcode);
2273 		}
2274 
2275 		/*
2276 		 * Set DataSN, data offset, and flags in BHS
2277 		 * For the prototype build, A = 0, S = 0, U = 0
2278 		 */
2279 		bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
2280 
2281 		bhs->datasn		= htonl(idt->idt_exp_datasn++);
2282 
2283 		hton24(bhs->dlength, chunk);
2284 		bhs->offset = htonl(idb->idb_bufoffset + data_offset);
2285 
2286 		if (chunk == remainder) {
2287 			bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
2288 		}
2289 
2290 		/* setup data */
2291 		pdu->isp_data	=  (uint8_t *)idb->idb_buf + data_offset;
2292 		pdu->isp_datalen = (uint_t)chunk;
2293 		remainder	-= chunk;
2294 		data_offset	+= chunk;
2295 
2296 		/*
2297 		 * Now that we're done working with idt_exp_datasn,
2298 		 * idt->idt_state and idb->idb_bufoffset we can release
2299 		 * the task lock -- don't want to hold it across the
2300 		 * call to idm_i_so_tx since we could block.
2301 		 */
2302 		mutex_exit(&idt->idt_mutex);
2303 
2304 		/*
2305 		 * Transmit the PDU.  Call the internal routine directly
2306 		 * as there is already implicit ordering.
2307 		 */
2308 		(void) idm_i_so_tx(pdu);
2309 
2310 		mutex_enter(&idt->idt_mutex);
2311 	}
2312 
2313 	return (IDM_STATUS_SUCCESS);
2314 }
2315 
2316 /*
2317  * TX PDU cache
2318  */
2319 /* ARGSUSED */
2320 int
2321 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
2322 {
2323 	idm_pdu_t	*pdu = hdl;
2324 
2325 	bzero(pdu, sizeof (idm_pdu_t));
2326 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2327 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2328 	pdu->isp_callback = idm_sotx_cache_pdu_cb;
2329 	pdu->isp_magic = IDM_PDU_MAGIC;
2330 	bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2331 
2332 	return (0);
2333 }
2334 
2335 /* ARGSUSED */
2336 void
2337 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2338 {
2339 	/* reset values between use */
2340 	pdu->isp_datalen = 0;
2341 
2342 	kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
2343 }
2344 
2345 /*
2346  * RX PDU cache
2347  */
2348 /* ARGSUSED */
2349 int
2350 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
2351 {
2352 	idm_pdu_t	*pdu = hdl;
2353 
2354 	bzero(pdu, sizeof (idm_pdu_t));
2355 	pdu->isp_magic = IDM_PDU_MAGIC;
2356 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2357 	pdu->isp_callback = idm_sorx_cache_pdu_cb;
2358 
2359 	return (0);
2360 }
2361 
2362 /* ARGSUSED */
2363 static void
2364 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2365 {
2366 	pdu->isp_iovlen = 0;
2367 	pdu->isp_sorx_buf = 0;
2368 	kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
2369 }
2370 
2371 static void
2372 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2373 {
2374 	/*
2375 	 * We had to modify our cached RX PDU with a longer header buffer
2376 	 * and/or a longer data buffer.  Release the new buffers and fix
2377 	 * the fields back to what we would expect for a cached RX PDU.
2378 	 */
2379 	if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
2380 		kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
2381 	}
2382 	if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
2383 		kmem_free(pdu->isp_data, pdu->isp_datalen);
2384 	}
2385 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
2386 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2387 	pdu->isp_data = NULL;
2388 	pdu->isp_datalen = 0;
2389 	pdu->isp_sorx_buf = 0;
2390 	pdu->isp_callback = idm_sorx_cache_pdu_cb;
2391 	idm_sorx_cache_pdu_cb(pdu, status);
2392 }
2393 
2394 /*
2395  * This thread is only active when I/O is queued for transmit
2396  * because the socket is busy.
2397  */
2398 void
2399 idm_sotx_thread(void *arg)
2400 {
2401 	idm_conn_t	*ic = arg;
2402 	idm_tx_obj_t	*object, *next;
2403 	idm_so_conn_t	*so_conn;
2404 	idm_status_t	status = IDM_STATUS_SUCCESS;
2405 
2406 	idm_conn_hold(ic);
2407 
2408 	mutex_enter(&ic->ic_mutex);
2409 	so_conn = ic->ic_transport_private;
2410 	so_conn->ic_tx_thread_running = B_TRUE;
2411 	so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
2412 	cv_signal(&ic->ic_cv);
2413 	mutex_exit(&ic->ic_mutex);
2414 
2415 	mutex_enter(&so_conn->ic_tx_mutex);
2416 
2417 	while (so_conn->ic_tx_thread_running) {
2418 		while (list_is_empty(&so_conn->ic_tx_list)) {
2419 			DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
2420 			cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
2421 			DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
2422 
2423 			if (!so_conn->ic_tx_thread_running) {
2424 				goto tx_bail;
2425 			}
2426 		}
2427 
2428 		object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2429 		list_remove(&so_conn->ic_tx_list, object);
2430 		mutex_exit(&so_conn->ic_tx_mutex);
2431 
2432 		switch (object->idm_tx_obj_magic) {
2433 		case IDM_PDU_MAGIC:
2434 			DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
2435 			    idm_pdu_t *, (idm_pdu_t *)object);
2436 
2437 			status = idm_i_so_tx((idm_pdu_t *)object);
2438 			break;
2439 
2440 		case IDM_BUF_MAGIC: {
2441 			idm_buf_t *idb = (idm_buf_t *)object;
2442 			idm_task_t *idt = idb->idb_task_binding;
2443 
2444 			DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
2445 			    idm_buf_t *, idb);
2446 
2447 			mutex_enter(&idt->idt_mutex);
2448 			status = idm_so_send_buf_region(idt,
2449 			    ISCSI_OP_SCSI_DATA_RSP, idb, 0, idb->idb_xfer_len);
2450 
2451 			/*
2452 			 * TX thread owns the buffer so we expect it to
2453 			 * be "in transport"
2454 			 */
2455 			ASSERT(idb->idb_in_transport);
2456 			/*
2457 			 * idm_buf_tx_to_ini_done releases idt->idt_mutex
2458 			 */
2459 			idm_buf_tx_to_ini_done(idt, idb, status);
2460 			break;
2461 		}
2462 
2463 		default:
2464 			IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
2465 			    "(0x%08x)", object->idm_tx_obj_magic);
2466 			status = IDM_STATUS_FAIL;
2467 		}
2468 
2469 		mutex_enter(&so_conn->ic_tx_mutex);
2470 
2471 		if (status != IDM_STATUS_SUCCESS) {
2472 			so_conn->ic_tx_thread_running = B_FALSE;
2473 			idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
2474 		}
2475 	}
2476 
2477 	/*
2478 	 * Before we leave, we need to abort every item remaining in the
2479 	 * TX list.
2480 	 */
2481 
2482 tx_bail:
2483 	object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2484 
2485 	while (object != NULL) {
2486 		next = list_next(&so_conn->ic_tx_list, object);
2487 
2488 		list_remove(&so_conn->ic_tx_list, object);
2489 		switch (object->idm_tx_obj_magic) {
2490 		case IDM_PDU_MAGIC:
2491 			idm_pdu_complete((idm_pdu_t *)object,
2492 			    IDM_STATUS_ABORTED);
2493 			break;
2494 
2495 		case IDM_BUF_MAGIC: {
2496 			idm_buf_t *idb = (idm_buf_t *)object;
2497 			idm_task_t *idt = idb->idb_task_binding;
2498 			mutex_exit(&so_conn->ic_tx_mutex);
2499 			mutex_enter(&idt->idt_mutex);
2500 			/*
2501 			 * TX thread owns the buffer so we expect it to
2502 			 * be "in transport"
2503 			 */
2504 			ASSERT(idb->idb_in_transport);
2505 			/*
2506 			 * idm_buf_tx_to_ini_done releases idt->idt_mutex
2507 			 */
2508 			idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2509 			mutex_enter(&so_conn->ic_tx_mutex);
2510 			break;
2511 		}
2512 		default:
2513 			IDM_CONN_LOG(CE_WARN,
2514 			    "idm_sotx_thread: Unexpected magic "
2515 			    "(0x%08x)", object->idm_tx_obj_magic);
2516 		}
2517 
2518 		object = next;
2519 	}
2520 
2521 	mutex_exit(&so_conn->ic_tx_mutex);
2522 	idm_conn_rele(ic);
2523 	thread_exit();
2524 	/*NOTREACHED*/
2525 }
2526