xref: /titanic_51/usr/src/uts/common/io/idm/idm_so.c (revision b533f56bf95137d3de6666bd923e15ec373ea611)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/conf.h>
27 #include <sys/stat.h>
28 #include <sys/file.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/modctl.h>
32 #include <sys/priv.h>
33 #include <sys/cpuvar.h>
34 #include <sys/socket.h>
35 #include <sys/strsubr.h>
36 #include <sys/sysmacros.h>
37 #include <sys/sdt.h>
38 #include <netinet/tcp.h>
39 #include <inet/tcp.h>
40 #include <sys/socketvar.h>
41 #include <sys/pathname.h>
42 #include <sys/fs/snode.h>
43 #include <sys/fs/dv_node.h>
44 #include <sys/vnode.h>
45 #include <netinet/in.h>
46 #include <net/if.h>
47 #include <sys/sockio.h>
48 #include <sys/ksocket.h>
49 #include <sys/filio.h>		/* FIONBIO */
50 #include <sys/iscsi_protocol.h>
51 #include <sys/idm/idm.h>
52 #include <sys/idm/idm_so.h>
53 #include <sys/idm/idm_text.h>
54 
55 #define	IN_PROGRESS_DELAY	1
56 
57 /*
58  * in6addr_any is currently all zeroes, but use the macro in case this
59  * ever changes.
60  */
61 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
62 
63 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
64 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
65 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
66 
67 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
68 static void idm_so_conn_destroy_common(idm_conn_t *ic);
69 static void idm_so_conn_connect_common(idm_conn_t *ic);
70 
71 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc,
72     boolean_t boot_conn);
73 static void idm_set_ini_postconnect_options(idm_so_conn_t *sc);
74 static void idm_set_tgt_connect_options(ksocket_t so);
75 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
76 
77 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
78 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt,
79     idm_buf_t *idb, uint32_t offset, uint32_t length);
80 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb);
81 static idm_status_t idm_so_send_buf_region(idm_task_t *idt,
82     idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
83 
84 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
85     uint32_t ro, uint32_t dlength);
86 
87 static idm_status_t idm_so_handle_digest(idm_conn_t *it,
88     nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
89 
90 static void idm_so_socket_set_nonblock(struct sonode *node);
91 static void idm_so_socket_set_block(struct sonode *node);
92 
93 /*
94  * Transport ops prototypes
95  */
96 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
97 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
98 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
99 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
100 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
101 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
102 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
103 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
104     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
105 static void idm_so_notice_key_values(idm_conn_t *it,
106     nvlist_t *negotiated_nvl);
107 static kv_status_t idm_so_declare_key_values(idm_conn_t *it,
108     nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
109 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
110     idm_transport_caps_t *caps);
111 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
112 static void idm_so_buf_free(idm_buf_t *idb);
113 static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
114 static void idm_so_buf_teardown(idm_buf_t *idb);
115 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
116 static void idm_so_tgt_svc_destroy(idm_svc_t *is);
117 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
118 static void idm_so_tgt_svc_offline(idm_svc_t *is);
119 static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
120 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
121 static void idm_so_conn_disconnect(idm_conn_t *ic);
122 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
123 static void idm_so_ini_conn_destroy(idm_conn_t *ic);
124 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
125 
126 /*
127  * IDM Native Sockets transport operations
128  */
129 static
130 idm_transport_ops_t idm_so_transport_ops = {
131 	idm_so_tx,			/* it_tx_pdu */
132 	idm_so_buf_tx_to_ini,		/* it_buf_tx_to_ini */
133 	idm_so_buf_rx_from_ini,		/* it_buf_rx_from_ini */
134 	idm_so_rx_datain,		/* it_rx_datain */
135 	idm_so_rx_rtt,			/* it_rx_rtt */
136 	idm_so_rx_dataout,		/* it_rx_dataout */
137 	NULL,				/* it_alloc_conn_rsrc */
138 	NULL,				/* it_free_conn_rsrc */
139 	NULL,				/* it_tgt_enable_datamover */
140 	NULL,				/* it_ini_enable_datamover */
141 	NULL,				/* it_conn_terminate */
142 	idm_so_free_task_rsrc,		/* it_free_task_rsrc */
143 	idm_so_negotiate_key_values,	/* it_negotiate_key_values */
144 	idm_so_notice_key_values,	/* it_notice_key_values */
145 	idm_so_conn_is_capable,		/* it_conn_is_capable */
146 	idm_so_buf_alloc,		/* it_buf_alloc */
147 	idm_so_buf_free,		/* it_buf_free */
148 	idm_so_buf_setup,		/* it_buf_setup */
149 	idm_so_buf_teardown,		/* it_buf_teardown */
150 	idm_so_tgt_svc_create,		/* it_tgt_svc_create */
151 	idm_so_tgt_svc_destroy,		/* it_tgt_svc_destroy */
152 	idm_so_tgt_svc_online,		/* it_tgt_svc_online */
153 	idm_so_tgt_svc_offline,		/* it_tgt_svc_offline */
154 	idm_so_tgt_conn_destroy,	/* it_tgt_conn_destroy */
155 	idm_so_tgt_conn_connect,	/* it_tgt_conn_connect */
156 	idm_so_conn_disconnect,		/* it_tgt_conn_disconnect */
157 	idm_so_ini_conn_create,		/* it_ini_conn_create */
158 	idm_so_ini_conn_destroy,	/* it_ini_conn_destroy */
159 	idm_so_ini_conn_connect,	/* it_ini_conn_connect */
160 	idm_so_conn_disconnect,		/* it_ini_conn_disconnect */
161 	idm_so_declare_key_values	/* it_declare_key_values */
162 };
163 
164 kmutex_t	idm_so_timed_socket_mutex;
165 /*
166  * idm_so_init()
167  * Sockets transport initialization
168  */
169 void
170 idm_so_init(idm_transport_t *it)
171 {
172 	/* Cache for IDM Data and R2T Transmit PDU's */
173 	idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
174 	    sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
175 	    &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
176 
177 	/* Cache for IDM Receive PDU's */
178 	idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
179 	    sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
180 	    &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
181 
182 	/* 128k buffer cache */
183 	idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache",
184 	    IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
185 
186 	/* Set the sockets transport ops */
187 	it->it_ops = &idm_so_transport_ops;
188 
189 	mutex_init(&idm_so_timed_socket_mutex, NULL, MUTEX_DEFAULT, NULL);
190 
191 }
192 
193 /*
194  * idm_so_fini()
195  * Sockets transport teardown
196  */
197 void
198 idm_so_fini(void)
199 {
200 	kmem_cache_destroy(idm.idm_so_128k_buf_cache);
201 	kmem_cache_destroy(idm.idm_sotx_pdu_cache);
202 	kmem_cache_destroy(idm.idm_sorx_pdu_cache);
203 	mutex_destroy(&idm_so_timed_socket_mutex);
204 }
205 
206 ksocket_t
207 idm_socreate(int domain, int type, int protocol)
208 {
209 	ksocket_t ks;
210 
211 	if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
212 	    CRED())) {
213 		return (ks);
214 	} else {
215 		return (NULL);
216 	}
217 }
218 
219 /*
220  * idm_soshutdown will disconnect the socket and prevent subsequent PDU
221  * reception and transmission.  The sonode still exists but its state
222  * gets modified to indicate it is no longer connected.  Calls to
223  * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
224  * regain control of a thread stuck in idm_sorecv.
225  */
226 void
227 idm_soshutdown(ksocket_t so)
228 {
229 	(void) ksocket_shutdown(so, SHUT_RDWR, CRED());
230 }
231 
232 /*
233  * idm_sodestroy releases all resources associated with a socket previously
234  * created with idm_socreate.  The socket must be shutdown using
235  * idm_soshutdown before the socket is destroyed with idm_sodestroy,
236  * otherwise undefined behavior will result.
237  */
238 void
239 idm_sodestroy(ksocket_t ks)
240 {
241 	(void) ksocket_close(ks, CRED());
242 }
243 
244 /*
245  * Function to compare two addresses in sockaddr_storage format
246  */
247 
248 int
249 idm_ss_compare(const struct sockaddr_storage *cmp_ss1,
250     const struct sockaddr_storage *cmp_ss2,
251     boolean_t v4_mapped_as_v4,
252     boolean_t compare_ports)
253 {
254 	struct sockaddr_storage			mapped_v4_ss1, mapped_v4_ss2;
255 	const struct sockaddr_storage		*ss1, *ss2;
256 	struct in_addr				*in1, *in2;
257 	struct in6_addr				*in61, *in62;
258 	int i;
259 
260 	/*
261 	 * Normalize V4-mapped IPv6 addresses into V4 format if
262 	 * v4_mapped_as_v4 is B_TRUE.
263 	 */
264 	ss1 = cmp_ss1;
265 	ss2 = cmp_ss2;
266 	if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) {
267 		in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
268 		if (IN6_IS_ADDR_V4MAPPED(in61)) {
269 			bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1));
270 			mapped_v4_ss1.ss_family = AF_INET;
271 			((struct sockaddr_in *)&mapped_v4_ss1)->sin_port =
272 			    ((struct sockaddr_in *)ss1)->sin_port;
273 			IN6_V4MAPPED_TO_INADDR(in61,
274 			    &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr);
275 			ss1 = &mapped_v4_ss1;
276 		}
277 	}
278 	ss2 = cmp_ss2;
279 	if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) {
280 		in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
281 		if (IN6_IS_ADDR_V4MAPPED(in62)) {
282 			bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2));
283 			mapped_v4_ss2.ss_family = AF_INET;
284 			((struct sockaddr_in *)&mapped_v4_ss2)->sin_port =
285 			    ((struct sockaddr_in *)ss2)->sin_port;
286 			IN6_V4MAPPED_TO_INADDR(in62,
287 			    &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr);
288 			ss2 = &mapped_v4_ss2;
289 		}
290 	}
291 
292 	/*
293 	 * Compare ports, then address family, then ip address
294 	 */
295 	if (compare_ports &&
296 	    (((struct sockaddr_in *)ss1)->sin_port !=
297 	    ((struct sockaddr_in *)ss2)->sin_port)) {
298 		if (((struct sockaddr_in *)ss1)->sin_port >
299 		    ((struct sockaddr_in *)ss2)->sin_port)
300 			return (1);
301 		else
302 			return (-1);
303 	}
304 
305 	/*
306 	 * ports are the same
307 	 */
308 	if (ss1->ss_family != ss2->ss_family) {
309 		if (ss1->ss_family == AF_INET)
310 			return (1);
311 		else
312 			return (-1);
313 	}
314 
315 	/*
316 	 * address families are the same
317 	 */
318 	if (ss1->ss_family == AF_INET) {
319 		in1 = &((struct sockaddr_in *)ss1)->sin_addr;
320 		in2 = &((struct sockaddr_in *)ss2)->sin_addr;
321 
322 		if (in1->s_addr > in2->s_addr)
323 			return (1);
324 		else if (in1->s_addr < in2->s_addr)
325 			return (-1);
326 		else
327 			return (0);
328 	} else if (ss1->ss_family == AF_INET6) {
329 		in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
330 		in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
331 
332 		for (i = 0; i < 4; i++) {
333 			if (in61->s6_addr32[i] > in62->s6_addr32[i])
334 				return (1);
335 			else if (in61->s6_addr32[i] < in62->s6_addr32[i])
336 				return (-1);
337 		}
338 		return (0);
339 	}
340 
341 	return (1);
342 }
343 
344 /*
345  * IP address filter functions to flag addresses that should not
346  * go out to initiators through discovery.
347  */
348 static boolean_t
349 idm_v4_addr_okay(struct in_addr *in_addr)
350 {
351 	in_addr_t addr = ntohl(in_addr->s_addr);
352 
353 	if ((INADDR_NONE == addr) ||
354 	    (IN_MULTICAST(addr)) ||
355 	    ((addr >> IN_CLASSA_NSHIFT) == 0) ||
356 	    ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
357 		return (B_FALSE);
358 	}
359 	return (B_TRUE);
360 }
361 
362 static boolean_t
363 idm_v6_addr_okay(struct in6_addr *addr6)
364 {
365 
366 	if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
367 	    (IN6_IS_ADDR_LOOPBACK(addr6)) ||
368 	    (IN6_IS_ADDR_MULTICAST(addr6)) ||
369 	    (IN6_IS_ADDR_V4MAPPED(addr6)) ||
370 	    (IN6_IS_ADDR_V4COMPAT(addr6)) ||
371 	    (IN6_IS_ADDR_LINKLOCAL(addr6))) {
372 		return (B_FALSE);
373 	}
374 	return (B_TRUE);
375 }
376 
377 /*
378  * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
379  * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
380  */
381 int
382 idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
383 {
384 	ksocket_t 		so4, so6;
385 	struct lifnum		lifn;
386 	struct lifconf		lifc;
387 	struct lifreq		*lp;
388 	int			rval;
389 	int			numifs;
390 	int			bufsize;
391 	void			*buf;
392 	int			i, j, n, rc;
393 	struct sockaddr_storage	ss;
394 	struct sockaddr_in	*sin;
395 	struct sockaddr_in6	*sin6;
396 	idm_addr_t		*ip;
397 	idm_addr_list_t		*ipaddr = NULL;
398 	int			size_ipaddr;
399 
400 	*ipaddr_p = NULL;
401 	size_ipaddr = 0;
402 	buf = NULL;
403 
404 	/* create an ipv4 and ipv6 UDP socket */
405 	if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
406 		return (0);
407 	if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
408 		idm_sodestroy(so6);
409 		return (0);
410 	}
411 
412 
413 retry_count:
414 	/* snapshot the current number of interfaces */
415 	lifn.lifn_family = PF_UNSPEC;
416 	lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
417 	lifn.lifn_count = 0;
418 	/* use vp6 for ioctls with unspecified families by default */
419 	if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
420 	    != 0) {
421 		goto cleanup;
422 	}
423 
424 	numifs = lifn.lifn_count;
425 	if (numifs <= 0) {
426 		goto cleanup;
427 	}
428 
429 	/* allocate extra room in case more interfaces appear */
430 	numifs += 10;
431 
432 	/* get the interface names and ip addresses */
433 	bufsize = numifs * sizeof (struct lifreq);
434 	buf = kmem_alloc(bufsize, KM_SLEEP);
435 
436 	lifc.lifc_family = AF_UNSPEC;
437 	lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
438 	lifc.lifc_len = bufsize;
439 	lifc.lifc_buf = buf;
440 	rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
441 	if (rc != 0) {
442 		goto cleanup;
443 	}
444 	/* if our extra room is used up, try again */
445 	if (bufsize <= lifc.lifc_len) {
446 		kmem_free(buf, bufsize);
447 		buf = NULL;
448 		goto retry_count;
449 	}
450 	/* calc actual number of ifconfs */
451 	n = lifc.lifc_len / sizeof (struct lifreq);
452 
453 	/* get ip address */
454 	if (n > 0) {
455 		size_ipaddr = sizeof (idm_addr_list_t) +
456 		    (n - 1) * sizeof (idm_addr_t);
457 		ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
458 	} else {
459 		goto cleanup;
460 	}
461 
462 	/*
463 	 * Examine the array of interfaces and filter uninteresting ones
464 	 */
465 	for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
466 
467 		/*
468 		 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
469 		 */
470 		ss = lp->lifr_addr;
471 		/*
472 		 * fetch the flags using the socket of the correct family
473 		 */
474 		switch (ss.ss_family) {
475 		case AF_INET:
476 			rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
477 			    &rval, CRED());
478 			break;
479 		case AF_INET6:
480 			rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
481 			    &rval, CRED());
482 			break;
483 		default:
484 			continue;
485 		}
486 		if (rc == 0) {
487 			/*
488 			 * If we got the flags, skip uninteresting
489 			 * interfaces based on flags
490 			 */
491 			if ((lp->lifr_flags & IFF_UP) != IFF_UP)
492 				continue;
493 			if (lp->lifr_flags &
494 			    (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
495 				continue;
496 		}
497 
498 		/* save ip address */
499 		ip = &ipaddr->al_addrs[j];
500 		switch (ss.ss_family) {
501 		case AF_INET:
502 			sin = (struct sockaddr_in *)&ss;
503 			if (!idm_v4_addr_okay(&sin->sin_addr))
504 				continue;
505 			ip->a_addr.i_addr.in4 = sin->sin_addr;
506 			ip->a_addr.i_insize = sizeof (struct in_addr);
507 			break;
508 		case AF_INET6:
509 			sin6 = (struct sockaddr_in6 *)&ss;
510 			if (!idm_v6_addr_okay(&sin6->sin6_addr))
511 				continue;
512 			ip->a_addr.i_addr.in6 = sin6->sin6_addr;
513 			ip->a_addr.i_insize = sizeof (struct in6_addr);
514 			break;
515 		default:
516 			continue;
517 		}
518 		j++;
519 	}
520 
521 	if (j == 0) {
522 		/* no valid ifaddr */
523 		kmem_free(ipaddr, size_ipaddr);
524 		size_ipaddr = 0;
525 		ipaddr = NULL;
526 	} else {
527 		ipaddr->al_out_cnt = j;
528 	}
529 
530 
531 cleanup:
532 	idm_sodestroy(so6);
533 	idm_sodestroy(so4);
534 
535 	if (buf != NULL)
536 		kmem_free(buf, bufsize);
537 
538 	*ipaddr_p = ipaddr;
539 	return (size_ipaddr);
540 }
541 
542 int
543 idm_sorecv(ksocket_t so, void *msg, size_t len)
544 {
545 	iovec_t iov;
546 
547 	ASSERT(so != NULL);
548 	ASSERT(len != 0);
549 
550 	/*
551 	 * Fill in iovec and receive data
552 	 */
553 	iov.iov_base = msg;
554 	iov.iov_len = len;
555 
556 	return (idm_iov_sorecv(so, &iov, 1, len));
557 }
558 
559 /*
560  * idm_sosendto - Sends a buffered data on a non-connected socket.
561  *
562  * This function puts the data provided on the wire by calling sosendmsg.
563  * It will return only when all the data has been sent or if an error
564  * occurs.
565  *
566  * Returns 0 for success, the socket errno value if sosendmsg fails, and
567  * -1 if sosendmsg returns success but uio_resid != 0
568  */
569 int
570 idm_sosendto(ksocket_t so, void *buff, size_t len,
571     struct sockaddr *name, socklen_t namelen)
572 {
573 	struct msghdr		msg;
574 	struct iovec		iov[1];
575 	int			error;
576 	size_t			sent = 0;
577 
578 	iov[0].iov_base	= buff;
579 	iov[0].iov_len	= len;
580 
581 	/* Initialization of the message header. */
582 	bzero(&msg, sizeof (msg));
583 	msg.msg_iov	= iov;
584 	msg.msg_iovlen	= 1;
585 	msg.msg_name	= name;
586 	msg.msg_namelen	= namelen;
587 
588 	if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
589 		/* Data sent */
590 		if (sent == len) {
591 			/* All data sent.  Success. */
592 			return (0);
593 		} else {
594 			/* Not all data was sent.  Failure */
595 			return (-1);
596 		}
597 	}
598 
599 	/* Send failed */
600 	return (error);
601 }
602 
603 /*
604  * idm_iov_sosend - Sends an iovec on a connection.
605  *
606  * This function puts the data provided on the wire by calling sosendmsg.
607  * It will return only when all the data has been sent or if an error
608  * occurs.
609  *
610  * Returns 0 for success, the socket errno value if sosendmsg fails, and
611  * -1 if sosendmsg returns success but uio_resid != 0
612  */
613 int
614 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
615 {
616 	struct msghdr		msg;
617 	int			error;
618 	size_t 			sent = 0;
619 
620 	ASSERT(iop != NULL);
621 
622 	/* Initialization of the message header. */
623 	bzero(&msg, sizeof (msg));
624 	msg.msg_iov	= iop;
625 	msg.msg_iovlen	= iovlen;
626 
627 	if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
628 	    == 0) {
629 		/* Data sent */
630 		if (sent == total_len) {
631 			/* All data sent.  Success. */
632 			return (0);
633 		} else {
634 			/* Not all data was sent.  Failure */
635 			return (-1);
636 		}
637 	}
638 
639 	/* Send failed */
640 	return (error);
641 }
642 
643 /*
644  * idm_iov_sorecv - Receives an iovec from a connection
645  *
646  * This function gets the data asked for from the socket.  It will return
647  * only when all the requested data has been retrieved or if an error
648  * occurs.
649  *
650  * Returns 0 for success, the socket errno value if sorecvmsg fails, and
651  * -1 if sorecvmsg returns success but uio_resid != 0
652  */
653 int
654 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
655 {
656 	struct msghdr		msg;
657 	int			error;
658 	size_t			recv;
659 	int 			flags;
660 
661 	ASSERT(iop != NULL);
662 
663 	/* Initialization of the message header. */
664 	bzero(&msg, sizeof (msg));
665 	msg.msg_iov	= iop;
666 	msg.msg_iovlen	= iovlen;
667 	flags		= MSG_WAITALL;
668 
669 	if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
670 	    == 0) {
671 		/* Received data */
672 		if (recv == total_len) {
673 			/* All requested data received.  Success */
674 			return (0);
675 		} else {
676 			/*
677 			 * Not all data was received.  The connection has
678 			 * probably failed.
679 			 */
680 			return (-1);
681 		}
682 	}
683 
684 	/* Receive failed */
685 	return (error);
686 }
687 
688 static void
689 idm_set_ini_preconnect_options(idm_so_conn_t *sc, boolean_t boot_conn)
690 {
691 	int	conn_abort = 10000;
692 	int	conn_notify = 2000;
693 	int	abort = 30000;
694 
695 	/* Pre-connect socket options */
696 	(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
697 	    TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
698 	    CRED());
699 	if (boot_conn == B_FALSE) {
700 		(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
701 		    TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
702 		    CRED());
703 		(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
704 		    TCP_ABORT_THRESHOLD,
705 		    (char *)&abort, sizeof (int), CRED());
706 	}
707 }
708 
709 static void
710 idm_set_ini_postconnect_options(idm_so_conn_t *sc)
711 {
712 	int32_t		rcvbuf = IDM_RCVBUF_SIZE;
713 	int32_t		sndbuf = IDM_SNDBUF_SIZE;
714 	const int	on = 1;
715 
716 	/* Set postconnect options */
717 	(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, TCP_NODELAY,
718 	    (char *)&on, sizeof (int), CRED());
719 	(void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_RCVBUF,
720 	    (char *)&rcvbuf, sizeof (int), CRED());
721 	(void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_SNDBUF,
722 	    (char *)&sndbuf, sizeof (int), CRED());
723 }
724 
725 static void
726 idm_set_tgt_connect_options(ksocket_t ks)
727 {
728 	int32_t		rcvbuf = IDM_RCVBUF_SIZE;
729 	int32_t		sndbuf = IDM_SNDBUF_SIZE;
730 	const int	on = 1;
731 
732 	/* Set connect options */
733 	(void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
734 	    (char *)&rcvbuf, sizeof (int), CRED());
735 	(void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
736 	    (char *)&sndbuf, sizeof (int), CRED());
737 	(void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
738 	    (char *)&on, sizeof (on), CRED());
739 }
740 
741 static uint32_t
742 n2h24(const uchar_t *ptr)
743 {
744 	return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
745 }
746 
747 
748 static idm_status_t
749 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
750 {
751 	iscsi_hdr_t	*bhs;
752 	uint32_t	hdr_digest_crc;
753 	uint32_t	crc_calculated;
754 	void		*new_hdr;
755 	int		ahslen = 0;
756 	int		total_len = 0;
757 	int		iovlen = 0;
758 	struct iovec	iov[2];
759 	idm_so_conn_t	*so_conn;
760 	int		rc;
761 
762 	so_conn = ic->ic_transport_private;
763 
764 	/*
765 	 * Read BHS
766 	 */
767 	bhs = pdu->isp_hdr;
768 	rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
769 	if (rc != IDM_STATUS_SUCCESS) {
770 		return (IDM_STATUS_FAIL);
771 	}
772 
773 	/*
774 	 * Check actual AHS length against the amount available in the buffer
775 	 */
776 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
777 	    (bhs->hlength * sizeof (uint32_t));
778 	pdu->isp_datalen = n2h24(bhs->dlength);
779 	if (ic->ic_conn_type == CONN_TYPE_TGT &&
780 	    pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) {
781 		IDM_CONN_LOG(CE_WARN,
782 		    "idm_sorecvhdr: exceeded the max data segment length");
783 		return (IDM_STATUS_FAIL);
784 	}
785 	if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
786 		/* Allocate a new header segment and change the callback */
787 		new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
788 		bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
789 		pdu->isp_hdr = new_hdr;
790 		pdu->isp_flags |= IDM_PDU_ADDL_HDR;
791 
792 		/*
793 		 * This callback will restore the expected values after
794 		 * the RX PDU has been processed.
795 		 */
796 		pdu->isp_callback = idm_sorx_addl_pdu_cb;
797 	}
798 
799 	/*
800 	 * Setup receipt of additional header and header digest (if enabled).
801 	 */
802 	if (bhs->hlength > 0) {
803 		iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
804 		ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
805 		iov[iovlen].iov_len = ahslen;
806 		total_len += iov[iovlen].iov_len;
807 		iovlen++;
808 	}
809 
810 	if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
811 		iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
812 		iov[iovlen].iov_len = sizeof (hdr_digest_crc);
813 		total_len += iov[iovlen].iov_len;
814 		iovlen++;
815 	}
816 
817 	if ((iovlen != 0) &&
818 	    (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
819 	    total_len) != 0)) {
820 		return (IDM_STATUS_FAIL);
821 	}
822 
823 	/*
824 	 * Validate header digest if enabled
825 	 */
826 	if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
827 		crc_calculated = idm_crc32c(pdu->isp_hdr,
828 		    sizeof (iscsi_hdr_t) + ahslen);
829 		if (crc_calculated != hdr_digest_crc) {
830 			/* Invalid Header Digest */
831 			return (IDM_STATUS_HEADER_DIGEST);
832 		}
833 	}
834 
835 	return (0);
836 }
837 
838 /*
839  * idm_so_ini_conn_create()
840  * Allocate the sockets transport connection resources.
841  */
842 static idm_status_t
843 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
844 {
845 	ksocket_t	so;
846 	idm_so_conn_t	*so_conn;
847 	idm_status_t	idmrc;
848 
849 	so = idm_socreate(cr->cr_domain, cr->cr_type,
850 	    cr->cr_protocol);
851 	if (so == NULL) {
852 		return (IDM_STATUS_FAIL);
853 	}
854 
855 	/* Bind the socket if configured to do so */
856 	if (cr->cr_bound) {
857 		if (ksocket_bind(so, &cr->cr_bound_addr.sin,
858 		    SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
859 			idm_sodestroy(so);
860 			return (IDM_STATUS_FAIL);
861 		}
862 	}
863 
864 	idmrc = idm_so_conn_create_common(ic, so);
865 	if (idmrc != IDM_STATUS_SUCCESS) {
866 		idm_soshutdown(so);
867 		idm_sodestroy(so);
868 		return (IDM_STATUS_FAIL);
869 	}
870 
871 	so_conn = ic->ic_transport_private;
872 	/* Set up socket options */
873 	idm_set_ini_preconnect_options(so_conn, cr->cr_boot_conn);
874 
875 	return (IDM_STATUS_SUCCESS);
876 }
877 
878 /*
879  * idm_so_ini_conn_destroy()
880  * Tear down the sockets transport connection resources.
881  */
882 static void
883 idm_so_ini_conn_destroy(idm_conn_t *ic)
884 {
885 	idm_so_conn_destroy_common(ic);
886 }
887 
888 /*
889  * idm_so_ini_conn_connect()
890  * Establish the connection referred to by the handle previously allocated via
891  * idm_so_ini_conn_create().
892  */
893 static idm_status_t
894 idm_so_ini_conn_connect(idm_conn_t *ic)
895 {
896 	idm_so_conn_t	*so_conn;
897 	struct sonode	*node = NULL;
898 	int 		rc;
899 	clock_t		lbolt, conn_login_max, conn_login_interval;
900 	boolean_t	nonblock;
901 
902 	so_conn = ic->ic_transport_private;
903 	nonblock = ic->ic_conn_params.nonblock_socket;
904 	conn_login_max = ic->ic_conn_params.conn_login_max;
905 	conn_login_interval = ddi_get_lbolt() +
906 	    SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
907 
908 	if (nonblock == B_TRUE) {
909 		node = ((struct sonode *)(so_conn->ic_so));
910 		/* Set to none block socket mode */
911 		idm_so_socket_set_nonblock(node);
912 		do {
913 			rc = ksocket_connect(so_conn->ic_so,
914 			    &ic->ic_ini_dst_addr.sin,
915 			    (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)),
916 			    CRED());
917 			if (rc == 0 || rc == EISCONN) {
918 				/* socket success or already success */
919 				rc = IDM_STATUS_SUCCESS;
920 				break;
921 			}
922 			if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) ||
923 			    (rc == ECONNRESET)) {
924 				/* socket connection timeout or refuse */
925 				break;
926 			}
927 			lbolt = ddi_get_lbolt();
928 			if (lbolt > conn_login_max) {
929 				/*
930 				 * Connection retry timeout,
931 				 * failed connect to target.
932 				 */
933 				break;
934 			}
935 			if (lbolt < conn_login_interval) {
936 				if ((rc == EINPROGRESS) || (rc == EALREADY)) {
937 					/* TCP connect still in progress */
938 					delay(SEC_TO_TICK(IN_PROGRESS_DELAY));
939 					continue;
940 				} else {
941 					delay(conn_login_interval - lbolt);
942 				}
943 			}
944 			conn_login_interval = ddi_get_lbolt() +
945 			    SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
946 		} while (rc != 0);
947 		/* resume to nonblock mode */
948 		if (rc == IDM_STATUS_SUCCESS) {
949 			idm_so_socket_set_block(node);
950 		}
951 	} else {
952 		rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
953 		    (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED());
954 	}
955 
956 	if (rc != 0) {
957 		idm_soshutdown(so_conn->ic_so);
958 		return (IDM_STATUS_FAIL);
959 	}
960 
961 	idm_so_conn_connect_common(ic);
962 
963 	idm_set_ini_postconnect_options(so_conn);
964 
965 	return (IDM_STATUS_SUCCESS);
966 }
967 
968 idm_status_t
969 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
970 {
971 	idm_status_t	idmrc;
972 
973 	idmrc = idm_so_conn_create_common(ic, new_so);
974 
975 	return (idmrc);
976 }
977 
978 static void
979 idm_so_tgt_conn_destroy(idm_conn_t *ic)
980 {
981 	idm_so_conn_destroy_common(ic);
982 }
983 
984 /*
985  * idm_so_tgt_conn_connect()
986  * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
987  * is invoked from the SM as a result of an inbound connection request.
988  */
989 static idm_status_t
990 idm_so_tgt_conn_connect(idm_conn_t *ic)
991 {
992 	idm_so_conn_connect_common(ic);
993 
994 	return (IDM_STATUS_SUCCESS);
995 }
996 
997 static idm_status_t
998 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
999 {
1000 	idm_so_conn_t	*so_conn;
1001 
1002 	so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
1003 	so_conn->ic_so = new_so;
1004 
1005 	ic->ic_transport_private = so_conn;
1006 	ic->ic_transport_hdrlen = 0;
1007 
1008 	/* Set the scoreboarding flag on this connection */
1009 	ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
1010 	ic->ic_conn_params.max_recv_dataseglen =
1011 	    ISCSI_DEFAULT_MAX_RECV_SEG_LEN;
1012 	ic->ic_conn_params.max_xmit_dataseglen =
1013 	    ISCSI_DEFAULT_MAX_XMIT_SEG_LEN;
1014 
1015 	/*
1016 	 * Initialize tx thread mutex and list
1017 	 */
1018 	mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
1019 	cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
1020 	list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
1021 	    offsetof(idm_pdu_t, idm_tx_link));
1022 
1023 	return (IDM_STATUS_SUCCESS);
1024 }
1025 
1026 static void
1027 idm_so_conn_destroy_common(idm_conn_t *ic)
1028 {
1029 	idm_so_conn_t	*so_conn = ic->ic_transport_private;
1030 
1031 	ic->ic_transport_private = NULL;
1032 	idm_sodestroy(so_conn->ic_so);
1033 	list_destroy(&so_conn->ic_tx_list);
1034 	mutex_destroy(&so_conn->ic_tx_mutex);
1035 	cv_destroy(&so_conn->ic_tx_cv);
1036 
1037 	kmem_free(so_conn, sizeof (idm_so_conn_t));
1038 }
1039 
1040 static void
1041 idm_so_conn_connect_common(idm_conn_t *ic)
1042 {
1043 	idm_so_conn_t	*so_conn;
1044 	struct sockaddr_in6	t_addr;
1045 	socklen_t	t_addrlen = 0;
1046 
1047 	so_conn = ic->ic_transport_private;
1048 	bzero(&t_addr, sizeof (struct sockaddr_in6));
1049 	t_addrlen = sizeof (struct sockaddr_in6);
1050 
1051 	/* Set the local and remote addresses in the idm conn handle */
1052 	(void) ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
1053 	    &t_addrlen, CRED());
1054 	bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
1055 	(void) ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
1056 	    &t_addrlen, CRED());
1057 	bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
1058 
1059 	mutex_enter(&ic->ic_mutex);
1060 	so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
1061 	    &p0, TS_RUN, minclsyspri);
1062 	so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
1063 	    &p0, TS_RUN, minclsyspri);
1064 
1065 	while (so_conn->ic_rx_thread_did == 0 ||
1066 	    so_conn->ic_tx_thread_did == 0)
1067 		cv_wait(&ic->ic_cv, &ic->ic_mutex);
1068 	mutex_exit(&ic->ic_mutex);
1069 }
1070 
1071 /*
1072  * idm_so_conn_disconnect()
1073  * Shutdown the socket connection and stop the thread
1074  */
1075 static void
1076 idm_so_conn_disconnect(idm_conn_t *ic)
1077 {
1078 	idm_so_conn_t	*so_conn;
1079 
1080 	so_conn = ic->ic_transport_private;
1081 
1082 	mutex_enter(&ic->ic_mutex);
1083 	so_conn->ic_rx_thread_running = B_FALSE;
1084 	so_conn->ic_tx_thread_running = B_FALSE;
1085 	/* We need to wakeup the TX thread */
1086 	mutex_enter(&so_conn->ic_tx_mutex);
1087 	cv_signal(&so_conn->ic_tx_cv);
1088 	mutex_exit(&so_conn->ic_tx_mutex);
1089 	mutex_exit(&ic->ic_mutex);
1090 
1091 	/* This should wakeup the RX thread if it is sleeping */
1092 	idm_soshutdown(so_conn->ic_so);
1093 
1094 	thread_join(so_conn->ic_tx_thread_did);
1095 	thread_join(so_conn->ic_rx_thread_did);
1096 }
1097 
1098 /*
1099  * idm_so_tgt_svc_create()
1100  * Establish a service on an IP address and port.  idm_svc_req_t contains
1101  * the service parameters.
1102  */
1103 /*ARGSUSED*/
1104 static idm_status_t
1105 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
1106 {
1107 	idm_so_svc_t		*so_svc;
1108 
1109 	so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
1110 
1111 	/* Set the new sockets service in svc handle */
1112 	is->is_so_svc = (void *)so_svc;
1113 
1114 	return (IDM_STATUS_SUCCESS);
1115 }
1116 
1117 /*
1118  * idm_so_tgt_svc_destroy()
1119  * Teardown sockets resources allocated in idm_so_tgt_svc_create()
1120  */
1121 static void
1122 idm_so_tgt_svc_destroy(idm_svc_t *is)
1123 {
1124 	/* the socket will have been torn down; free the service */
1125 	kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
1126 }
1127 
1128 /*
1129  * idm_so_tgt_svc_online()
1130  * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
1131  */
1132 
1133 static idm_status_t
1134 idm_so_tgt_svc_online(idm_svc_t *is)
1135 {
1136 	idm_so_svc_t		*so_svc;
1137 	idm_svc_req_t		*sr = &is->is_svc_req;
1138 	struct sockaddr_in6	sin6_ip;
1139 	const uint32_t		on = 1;
1140 	const uint32_t		off = 0;
1141 
1142 	mutex_enter(&is->is_mutex);
1143 	so_svc = (idm_so_svc_t *)is->is_so_svc;
1144 
1145 	/*
1146 	 * Try creating an IPv6 socket first
1147 	 */
1148 	if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
1149 		mutex_exit(&is->is_mutex);
1150 		return (IDM_STATUS_FAIL);
1151 	} else {
1152 		bzero(&sin6_ip, sizeof (sin6_ip));
1153 		sin6_ip.sin6_family = AF_INET6;
1154 		sin6_ip.sin6_port = htons(sr->sr_port);
1155 		sin6_ip.sin6_addr = in6addr_any;
1156 
1157 		(void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1158 		    SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
1159 		/*
1160 		 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1161 		 */
1162 		(void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1163 		    SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED());
1164 
1165 		if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
1166 		    sizeof (sin6_ip), CRED()) != 0) {
1167 			mutex_exit(&is->is_mutex);
1168 			idm_sodestroy(so_svc->is_so);
1169 			return (IDM_STATUS_FAIL);
1170 		}
1171 	}
1172 
1173 	idm_set_tgt_connect_options(so_svc->is_so);
1174 
1175 	if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
1176 		mutex_exit(&is->is_mutex);
1177 		idm_soshutdown(so_svc->is_so);
1178 		idm_sodestroy(so_svc->is_so);
1179 		return (IDM_STATUS_FAIL);
1180 	}
1181 
1182 	/* Launch a watch thread */
1183 	so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
1184 	    is, 0, &p0, TS_RUN, minclsyspri);
1185 
1186 	if (so_svc->is_thread == NULL) {
1187 		/* Failure to launch; teardown the socket */
1188 		mutex_exit(&is->is_mutex);
1189 		idm_soshutdown(so_svc->is_so);
1190 		idm_sodestroy(so_svc->is_so);
1191 		return (IDM_STATUS_FAIL);
1192 	}
1193 	ksocket_hold(so_svc->is_so);
1194 	/* Wait for the port watcher thread to start */
1195 	while (!so_svc->is_thread_running)
1196 		cv_wait(&is->is_cv, &is->is_mutex);
1197 	mutex_exit(&is->is_mutex);
1198 
1199 	return (IDM_STATUS_SUCCESS);
1200 }
1201 
1202 /*
1203  * idm_so_tgt_svc_offline
1204  *
1205  * Stop listening on the IP address and port identified by idm_svc_t.
1206  */
1207 static void
1208 idm_so_tgt_svc_offline(idm_svc_t *is)
1209 {
1210 	idm_so_svc_t		*so_svc;
1211 	mutex_enter(&is->is_mutex);
1212 	so_svc = (idm_so_svc_t *)is->is_so_svc;
1213 	so_svc->is_thread_running = B_FALSE;
1214 	mutex_exit(&is->is_mutex);
1215 
1216 	/*
1217 	 * Teardown socket
1218 	 */
1219 	idm_sodestroy(so_svc->is_so);
1220 
1221 	/*
1222 	 * Now we expect the port watcher thread to terminate
1223 	 */
1224 	thread_join(so_svc->is_thread_did);
1225 }
1226 
1227 /*
1228  * Watch thread for target service connection establishment.
1229  */
1230 void
1231 idm_so_svc_port_watcher(void *arg)
1232 {
1233 	idm_svc_t		*svc = arg;
1234 	ksocket_t		new_so;
1235 	idm_conn_t		*ic;
1236 	idm_status_t		idmrc;
1237 	idm_so_svc_t		*so_svc;
1238 	int			rc;
1239 	const uint32_t		off = 0;
1240 	struct sockaddr_in6 	t_addr;
1241 	socklen_t		t_addrlen;
1242 
1243 	bzero(&t_addr, sizeof (struct sockaddr_in6));
1244 	t_addrlen = sizeof (struct sockaddr_in6);
1245 	mutex_enter(&svc->is_mutex);
1246 
1247 	so_svc = svc->is_so_svc;
1248 	so_svc->is_thread_running = B_TRUE;
1249 	so_svc->is_thread_did = so_svc->is_thread->t_did;
1250 
1251 	cv_signal(&svc->is_cv);
1252 
1253 	IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
1254 	    svc->is_svc_req.sr_port);
1255 
1256 	while (so_svc->is_thread_running) {
1257 		mutex_exit(&svc->is_mutex);
1258 
1259 		if ((rc = ksocket_accept(so_svc->is_so,
1260 		    (struct sockaddr *)&t_addr, &t_addrlen,
1261 		    &new_so, CRED())) != 0) {
1262 			mutex_enter(&svc->is_mutex);
1263 			if (rc == ECONNABORTED)
1264 				continue;
1265 			/* Connection problem */
1266 			break;
1267 		}
1268 		/*
1269 		 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1270 		 */
1271 		(void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
1272 		    (char *)&off, sizeof (off), CRED());
1273 
1274 		idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
1275 		    &ic);
1276 		if (idmrc != IDM_STATUS_SUCCESS) {
1277 			/* Drop connection */
1278 			idm_soshutdown(new_so);
1279 			idm_sodestroy(new_so);
1280 			mutex_enter(&svc->is_mutex);
1281 			continue;
1282 		}
1283 
1284 		idmrc = idm_so_tgt_conn_create(ic, new_so);
1285 		if (idmrc != IDM_STATUS_SUCCESS) {
1286 			idm_svc_conn_destroy(ic);
1287 			idm_soshutdown(new_so);
1288 			idm_sodestroy(new_so);
1289 			mutex_enter(&svc->is_mutex);
1290 			continue;
1291 		}
1292 
1293 		/*
1294 		 * Kick the state machine.  At CS_S3_XPT_UP the state machine
1295 		 * will notify the client (target) about the new connection.
1296 		 */
1297 		idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
1298 
1299 		mutex_enter(&svc->is_mutex);
1300 	}
1301 	ksocket_rele(so_svc->is_so);
1302 	so_svc->is_thread_running = B_FALSE;
1303 	mutex_exit(&svc->is_mutex);
1304 
1305 	IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
1306 	    svc->is_svc_req.sr_port);
1307 
1308 	thread_exit();
1309 }
1310 
1311 /*
1312  * idm_so_free_task_rsrc() stops any ongoing processing of the task and
1313  * frees resources associated with the task.
1314  *
1315  * It's not clear that this should return idm_status_t.  What do we do
1316  * if it fails?
1317  */
1318 static idm_status_t
1319 idm_so_free_task_rsrc(idm_task_t *idt)
1320 {
1321 	idm_buf_t	*idb, *next_idb;
1322 
1323 	/*
1324 	 * There is nothing to cleanup on initiator connections
1325 	 */
1326 	if (IDM_CONN_ISINI(idt->idt_ic))
1327 		return (IDM_STATUS_SUCCESS);
1328 
1329 	/*
1330 	 * If this is a target connection, call idm_buf_rx_from_ini_done for
1331 	 * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
1332 	 *
1333 	 * In addition, remove any buffers associated with this task from
1334 	 * the ic_tx_list.  We'll do this by walking the idt_inbufv list, but
1335 	 * items don't actually get removed from that list (and completion
1336 	 * routines called) until idm_task_cleanup.
1337 	 */
1338 	mutex_enter(&idt->idt_mutex);
1339 
1340 	for (idb = list_head(&idt->idt_outbufv); idb != NULL; idb = next_idb) {
1341 		next_idb = list_next(&idt->idt_outbufv, idb);
1342 		if (idb->idb_in_transport) {
1343 			/*
1344 			 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1345 			 */
1346 			DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1347 			    uintptr_t, idb->idb_buf,
1348 			    uint32_t, idb->idb_bufoffset,
1349 			    uint64_t, 0, uint32_t, 0, uint32_t, 0,
1350 			    uint32_t, idb->idb_xfer_len,
1351 			    int, XFER_BUF_RX_FROM_INI);
1352 			idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1353 			mutex_enter(&idt->idt_mutex);
1354 		}
1355 	}
1356 
1357 	for (idb = list_head(&idt->idt_inbufv); idb != NULL; idb = next_idb) {
1358 		next_idb = list_next(&idt->idt_inbufv, idb);
1359 		/*
1360 		 * We want to remove these items from the tx_list as well,
1361 		 * but knowing it's in the idt_inbufv list is not a guarantee
1362 		 * that it's in the tx_list.  If it's on the tx list then
1363 		 * let idm_sotx_thread() clean it up.
1364 		 */
1365 		if (idb->idb_in_transport && !idb->idb_tx_thread) {
1366 			/*
1367 			 * idm_buf_tx_to_ini_done releases idt->idt_mutex
1368 			 */
1369 			DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1370 			    uintptr_t, idb->idb_buf,
1371 			    uint32_t, idb->idb_bufoffset,
1372 			    uint64_t, 0, uint32_t, 0, uint32_t, 0,
1373 			    uint32_t, idb->idb_xfer_len,
1374 			    int, XFER_BUF_TX_TO_INI);
1375 			idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
1376 			mutex_enter(&idt->idt_mutex);
1377 		}
1378 	}
1379 
1380 	mutex_exit(&idt->idt_mutex);
1381 
1382 	return (IDM_STATUS_SUCCESS);
1383 }
1384 
1385 /*
1386  * idm_so_negotiate_key_values() validates the key values for this connection
1387  */
1388 /* ARGSUSED */
1389 static kv_status_t
1390 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
1391     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
1392 {
1393 	/* All parameters are negotiated at the iscsit level */
1394 	return (KV_HANDLED);
1395 }
1396 
1397 /*
1398  * idm_so_notice_key_values() activates the negotiated key values for
1399  * this connection.
1400  */
1401 static void
1402 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
1403 {
1404 	char			*nvp_name;
1405 	nvpair_t		*nvp;
1406 	nvpair_t		*next_nvp;
1407 	int			nvrc;
1408 	idm_status_t		idm_status;
1409 	const idm_kv_xlate_t	*ikvx;
1410 	uint64_t		num_val;
1411 
1412 	for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
1413 	    nvp != NULL; nvp = next_nvp) {
1414 		next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
1415 		nvp_name = nvpair_name(nvp);
1416 
1417 		ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1418 		switch (ikvx->ik_key_id) {
1419 		case KI_HEADER_DIGEST:
1420 		case KI_DATA_DIGEST:
1421 			idm_status = idm_so_handle_digest(it, nvp, ikvx);
1422 			ASSERT(idm_status == 0);
1423 
1424 			/* Remove processed item from negotiated_nvl list */
1425 			nvrc = nvlist_remove_all(
1426 			    negotiated_nvl, ikvx->ik_key_name);
1427 			ASSERT(nvrc == 0);
1428 			break;
1429 		case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1430 			/*
1431 			 * Just pass the value down to idm layer.
1432 			 * No need to remove it from negotiated_nvl list here.
1433 			 */
1434 			nvrc = nvpair_value_uint64(nvp, &num_val);
1435 			ASSERT(nvrc == 0);
1436 			it->ic_conn_params.max_xmit_dataseglen =
1437 			    (uint32_t)num_val;
1438 			break;
1439 		default:
1440 			break;
1441 		}
1442 	}
1443 }
1444 
1445 /*
1446  * idm_so_declare_key_values() declares the key values for this connection
1447  */
1448 /* ARGSUSED */
1449 static kv_status_t
1450 idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl,
1451     nvlist_t *outgoing_nvl)
1452 {
1453 	char			*nvp_name;
1454 	nvpair_t		*nvp;
1455 	nvpair_t		*next_nvp;
1456 	kv_status_t		kvrc;
1457 	int			nvrc = 0;
1458 	const idm_kv_xlate_t	*ikvx;
1459 	uint64_t		num_val;
1460 
1461 	for (nvp = nvlist_next_nvpair(config_nvl, NULL);
1462 	    nvp != NULL && nvrc == 0; nvp = next_nvp) {
1463 		next_nvp = nvlist_next_nvpair(config_nvl, nvp);
1464 		nvp_name = nvpair_name(nvp);
1465 
1466 		ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1467 		switch (ikvx->ik_key_id) {
1468 		case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1469 			if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) {
1470 				break;
1471 			}
1472 			if (outgoing_nvl &&
1473 			    (nvrc = nvlist_add_uint64(outgoing_nvl,
1474 			    nvp_name, num_val)) != 0) {
1475 				break;
1476 			}
1477 			it->ic_conn_params.max_recv_dataseglen =
1478 			    (uint32_t)num_val;
1479 			break;
1480 		default:
1481 			break;
1482 		}
1483 	}
1484 	kvrc = idm_nvstat_to_kvstat(nvrc);
1485 	return (kvrc);
1486 }
1487 
1488 static idm_status_t
1489 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
1490     const idm_kv_xlate_t *ikvx)
1491 {
1492 	int			nvrc;
1493 	char			*digest_choice_string;
1494 
1495 	nvrc = nvpair_value_string(digest_choice,
1496 	    &digest_choice_string);
1497 	ASSERT(nvrc == 0);
1498 	if (strcasecmp(digest_choice_string, "crc32c") == 0) {
1499 		switch (ikvx->ik_key_id) {
1500 		case KI_HEADER_DIGEST:
1501 			it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
1502 			break;
1503 		case KI_DATA_DIGEST:
1504 			it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
1505 			break;
1506 		default:
1507 			ASSERT(0);
1508 			break;
1509 		}
1510 	} else if (strcasecmp(digest_choice_string, "none") == 0) {
1511 		switch (ikvx->ik_key_id) {
1512 		case KI_HEADER_DIGEST:
1513 			it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
1514 			break;
1515 		case KI_DATA_DIGEST:
1516 			it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
1517 			break;
1518 		default:
1519 			ASSERT(0);
1520 			break;
1521 		}
1522 	} else {
1523 		ASSERT(0);
1524 	}
1525 
1526 	return (IDM_STATUS_SUCCESS);
1527 }
1528 
1529 
1530 /*
1531  * idm_so_conn_is_capable() verifies that the passed connection is provided
1532  * for by the sockets interface.
1533  */
1534 /* ARGSUSED */
1535 static boolean_t
1536 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
1537 {
1538 	return (B_TRUE);
1539 }
1540 
1541 /*
1542  * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
1543  * idm_sorecv_scsidata() function invoked earlier actually reads the data
1544  * off the socket into the appropriate buffers.
1545  */
1546 static void
1547 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
1548 {
1549 	iscsi_data_hdr_t	*bhs;
1550 	idm_task_t		*idt;
1551 	idm_buf_t		*idb;
1552 	uint32_t		datasn;
1553 	size_t			offset;
1554 	iscsi_hdr_t		*ihp = (iscsi_hdr_t *)pdu->isp_hdr;
1555 	iscsi_data_rsp_hdr_t    *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
1556 
1557 	ASSERT(ic != NULL);
1558 	ASSERT(pdu != NULL);
1559 
1560 	bhs	= (iscsi_data_hdr_t *)pdu->isp_hdr;
1561 	datasn	= ntohl(bhs->datasn);
1562 	offset	= ntohl(bhs->offset);
1563 
1564 	ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA_RSP);
1565 
1566 	/*
1567 	 * Look up the task corresponding to the initiator task tag
1568 	 * to get the buffers affiliated with the task.
1569 	 */
1570 	idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1571 	if (idt == NULL) {
1572 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
1573 		idm_pdu_rx_protocol_error(ic, pdu);
1574 		return;
1575 	}
1576 
1577 	idb = pdu->isp_sorx_buf;
1578 	if (idb == NULL) {
1579 		IDM_CONN_LOG(CE_WARN,
1580 		    "idm_so_rx_datain: failed to find buffer");
1581 		idm_task_rele(idt);
1582 		idm_pdu_rx_protocol_error(ic, pdu);
1583 		return;
1584 	}
1585 
1586 	/*
1587 	 * DataSN values should be sequential and should not have any gaps or
1588 	 * repetitions. Check the DataSN with the one stored in the task.
1589 	 */
1590 	if (datasn == idt->idt_exp_datasn) {
1591 		idt->idt_exp_datasn++; /* keep track of DataSN received */
1592 	} else {
1593 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
1594 		idm_task_rele(idt);
1595 		idm_pdu_rx_protocol_error(ic, pdu);
1596 		return;
1597 	}
1598 
1599 	/*
1600 	 * PDUs in a sequence should be in continuously increasing
1601 	 * address offset
1602 	 */
1603 	if (offset != idb->idb_exp_offset) {
1604 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
1605 		idm_task_rele(idt);
1606 		idm_pdu_rx_protocol_error(ic, pdu);
1607 		return;
1608 	}
1609 	/* Expected next relative buffer offset */
1610 	idb->idb_exp_offset += n2h24(bhs->dlength);
1611 	idt->idt_rx_bytes += n2h24(bhs->dlength);
1612 
1613 	idm_task_rele(idt);
1614 
1615 	/*
1616 	 * For now call scsi_rsp which will process the data rsp
1617 	 * Revisit, need to provide an explicit client entry point for
1618 	 * phase collapse completions.
1619 	 */
1620 	if (((ihp->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_SCSI_DATA_RSP) &&
1621 	    (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
1622 		(*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
1623 	}
1624 
1625 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1626 }
1627 
1628 /*
1629  * The idm_so_rx_dataout() function is used by the iSCSI target to read
1630  * data from the Data-Out PDU sent by the iSCSI initiator.
1631  *
1632  * This function gets the Initiator Task Tag from the PDU BHS and looks up the
1633  * task to get the buffers associated with the PDU. A PDU might span buffers.
1634  * The data is then read into the respective buffer.
1635  */
1636 static void
1637 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
1638 {
1639 
1640 	iscsi_data_hdr_t	*bhs;
1641 	idm_task_t		*idt;
1642 	idm_buf_t		*idb;
1643 	size_t			offset;
1644 
1645 	ASSERT(ic != NULL);
1646 	ASSERT(pdu != NULL);
1647 
1648 	bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1649 	offset = ntohl(bhs->offset);
1650 	ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA);
1651 
1652 	/*
1653 	 * Look up the task corresponding to the initiator task tag
1654 	 * to get the buffers affiliated with the task.
1655 	 */
1656 	idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1657 	if (idt == NULL) {
1658 		IDM_CONN_LOG(CE_WARN,
1659 		    "idm_so_rx_dataout: failed to find task");
1660 		idm_pdu_rx_protocol_error(ic, pdu);
1661 		return;
1662 	}
1663 
1664 	idb = pdu->isp_sorx_buf;
1665 	if (idb == NULL) {
1666 		IDM_CONN_LOG(CE_WARN,
1667 		    "idm_so_rx_dataout: failed to find buffer");
1668 		idm_task_rele(idt);
1669 		idm_pdu_rx_protocol_error(ic, pdu);
1670 		return;
1671 	}
1672 
1673 	/* Keep track of data transferred - check data offsets */
1674 	if (offset != idb->idb_exp_offset) {
1675 		IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
1676 		    "%ld, %d", offset, idb->idb_exp_offset);
1677 		idm_task_rele(idt);
1678 		idm_pdu_rx_protocol_error(ic, pdu);
1679 		return;
1680 	}
1681 	/* Expected next relative offset */
1682 	idb->idb_exp_offset += ntoh24(bhs->dlength);
1683 	idt->idt_rx_bytes += n2h24(bhs->dlength);
1684 
1685 	/*
1686 	 * Call the buffer callback when the transfer is complete
1687 	 *
1688 	 * The connection state machine should only abort tasks after
1689 	 * shutting down the connection so we are assured that there
1690 	 * won't be a simultaneous attempt to abort this task at the
1691 	 * same time as we are processing this PDU (due to a connection
1692 	 * state change).
1693 	 */
1694 	if (bhs->flags & ISCSI_FLAG_FINAL) {
1695 		/*
1696 		 * We only want to call idm_buf_rx_from_ini_done once
1697 		 * per transfer.  It's possible that this task has
1698 		 * already been aborted in which case
1699 		 * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
1700 		 * for each buffer with idb_in_transport==B_TRUE.  To
1701 		 * close this window and ensure that this doesn't happen,
1702 		 * we'll clear idb->idb_in_transport now while holding
1703 		 * the task mutex.   This is only really an issue for
1704 		 * SCSI task abort -- if tasks were being aborted because
1705 		 * of a connection state change the state machine would
1706 		 * have already stopped the receive thread.
1707 		 */
1708 		mutex_enter(&idt->idt_mutex);
1709 
1710 		/*
1711 		 * Release the task hold here (obtained in idm_task_find)
1712 		 * because the task may complete synchronously during
1713 		 * idm_buf_rx_from_ini_done.  Since we still have an active
1714 		 * buffer we know there is at least one additional hold on idt.
1715 		 */
1716 		idm_task_rele(idt);
1717 
1718 		/*
1719 		 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1720 		 */
1721 		DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1722 		    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
1723 		    uint64_t, 0, uint32_t, 0, uint32_t, 0,
1724 		    uint32_t, idb->idb_xfer_len,
1725 		    int, XFER_BUF_RX_FROM_INI);
1726 		idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
1727 		idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1728 		return;
1729 	}
1730 
1731 	idm_task_rele(idt);
1732 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1733 }
1734 
1735 /*
1736  * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
1737  * the R2T PDU sent by the iSCSI target indicating that it is ready to
1738  * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
1739  * and looks up the task in the task tree using the itt to get the output
1740  * buffers associated the task. The R2T PDU contains the offset of the
1741  * requested data and the data length. This function then constructs a
1742  * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
1743  * PDU is associated with the R2T by the Target Transfer Tag  (ttt).
1744  */
1745 
1746 static void
1747 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
1748 {
1749 	idm_task_t		*idt;
1750 	idm_buf_t		*idb;
1751 	iscsi_rtt_hdr_t		*rtt_hdr;
1752 	uint32_t		data_offset;
1753 	uint32_t		data_length;
1754 
1755 	ASSERT(ic != NULL);
1756 	ASSERT(pdu != NULL);
1757 
1758 	rtt_hdr	= (iscsi_rtt_hdr_t *)pdu->isp_hdr;
1759 	data_offset = ntohl(rtt_hdr->data_offset);
1760 	data_length = ntohl(rtt_hdr->data_length);
1761 	idt	= idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
1762 
1763 	if (idt == NULL) {
1764 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
1765 		idm_pdu_rx_protocol_error(ic, pdu);
1766 		return;
1767 	}
1768 
1769 	/* Find the buffer bound to the task by the iSCSI initiator */
1770 	mutex_enter(&idt->idt_mutex);
1771 	idb = idm_buf_find(&idt->idt_outbufv, data_offset);
1772 	if (idb == NULL) {
1773 		mutex_exit(&idt->idt_mutex);
1774 		idm_task_rele(idt);
1775 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
1776 		idm_pdu_rx_protocol_error(ic, pdu);
1777 		return;
1778 	}
1779 
1780 	/* return buffer contains this data */
1781 	if (data_offset + data_length > idb->idb_buflen) {
1782 		/* Overflow */
1783 		mutex_exit(&idt->idt_mutex);
1784 		idm_task_rele(idt);
1785 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside "
1786 		    "buffer");
1787 		idm_pdu_rx_protocol_error(ic, pdu);
1788 		return;
1789 	}
1790 
1791 	idt->idt_r2t_ttt = rtt_hdr->ttt;
1792 	idt->idt_exp_datasn = 0;
1793 
1794 	idm_so_send_rtt_data(ic, idt, idb, data_offset,
1795 	    ntohl(rtt_hdr->data_length));
1796 	/*
1797 	 * the idt_mutex is released in idm_so_send_rtt_data
1798 	 */
1799 
1800 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1801 	idm_task_rele(idt);
1802 
1803 }
1804 
1805 idm_status_t
1806 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
1807 {
1808 	uint8_t		pad[ISCSI_PAD_WORD_LEN];
1809 	int		pad_len;
1810 	uint32_t	data_digest_crc;
1811 	uint32_t	crc_calculated;
1812 	int		total_len;
1813 	idm_so_conn_t	*so_conn;
1814 
1815 	so_conn = ic->ic_transport_private;
1816 
1817 	pad_len = ((ISCSI_PAD_WORD_LEN -
1818 	    (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
1819 	    (ISCSI_PAD_WORD_LEN - 1));
1820 
1821 	ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
1822 
1823 	total_len = pdu->isp_datalen;
1824 
1825 	if (pad_len) {
1826 		pdu->isp_iov[pdu->isp_iovlen].iov_base	= (char *)&pad;
1827 		pdu->isp_iov[pdu->isp_iovlen].iov_len	= pad_len;
1828 		total_len		+= pad_len;
1829 		pdu->isp_iovlen++;
1830 	}
1831 
1832 	/* setup data digest */
1833 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1834 		pdu->isp_iov[pdu->isp_iovlen].iov_base =
1835 		    (char *)&data_digest_crc;
1836 		pdu->isp_iov[pdu->isp_iovlen].iov_len =
1837 		    sizeof (data_digest_crc);
1838 		total_len		+= sizeof (data_digest_crc);
1839 		pdu->isp_iovlen++;
1840 	}
1841 
1842 	pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base;
1843 
1844 	if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
1845 	    pdu->isp_iovlen, total_len) != 0) {
1846 		return (IDM_STATUS_IO);
1847 	}
1848 
1849 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1850 		crc_calculated = idm_crc32c(pdu->isp_data,
1851 		    pdu->isp_datalen);
1852 		if (pad_len) {
1853 			crc_calculated = idm_crc32c_continued((char *)&pad,
1854 			    pad_len, crc_calculated);
1855 		}
1856 		if (crc_calculated != data_digest_crc) {
1857 			IDM_CONN_LOG(CE_WARN,
1858 			    "idm_sorecvdata: "
1859 			    "CRC error: actual 0x%x, calc 0x%x",
1860 			    data_digest_crc, crc_calculated);
1861 
1862 			/* Invalid Data Digest */
1863 			return (IDM_STATUS_DATA_DIGEST);
1864 		}
1865 	}
1866 
1867 	return (IDM_STATUS_SUCCESS);
1868 }
1869 
1870 /*
1871  * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
1872  * Data-type PDU header must be read into the idm_pdu_t structure prior to
1873  * calling this function.
1874  */
1875 idm_status_t
1876 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1877 {
1878 	iscsi_data_hdr_t	*bhs;
1879 	idm_task_t		*task;
1880 	uint32_t		offset;
1881 	uint8_t			opcode;
1882 	uint32_t		dlength;
1883 	list_t			*buflst;
1884 	uint32_t		xfer_bytes;
1885 	idm_status_t		status;
1886 
1887 	ASSERT(ic != NULL);
1888 	ASSERT(pdu != NULL);
1889 
1890 	bhs	= (iscsi_data_hdr_t *)pdu->isp_hdr;
1891 
1892 	offset	= ntohl(bhs->offset);
1893 	opcode	= bhs->opcode;
1894 	dlength = n2h24(bhs->dlength);
1895 
1896 	ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
1897 	    (opcode == ISCSI_OP_SCSI_DATA));
1898 
1899 	/*
1900 	 * Successful lookup implicitly gets a "hold" on the task.  This
1901 	 * hold must be released before leaving this function.  At one
1902 	 * point we were caching this task context and retaining the hold
1903 	 * but it turned out to be very difficult to release the hold properly.
1904 	 * The task can be aborted and the connection shutdown between this
1905 	 * call and the subsequent expected call to idm_so_rx_datain/
1906 	 * idm_so_rx_dataout (in which case those functions are not called).
1907 	 * Releasing the hold in the PDU callback doesn't work well either
1908 	 * because the whole task may be completed by then at which point
1909 	 * it is too late to release the hold -- for better or worse this
1910 	 * code doesn't wait on the refcnts during normal operation.
1911 	 * idm_task_find() is very fast and it is not a huge burden if we
1912 	 * have to do it twice.
1913 	 */
1914 	task = idm_task_find(ic, bhs->itt, bhs->ttt);
1915 	if (task == NULL) {
1916 		IDM_CONN_LOG(CE_WARN,
1917 		    "idm_sorecv_scsidata: could not find task");
1918 		return (IDM_STATUS_FAIL);
1919 	}
1920 
1921 	mutex_enter(&task->idt_mutex);
1922 	buflst	= (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
1923 	    &task->idt_inbufv : &task->idt_outbufv;
1924 	pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
1925 	mutex_exit(&task->idt_mutex);
1926 
1927 	if (pdu->isp_sorx_buf == NULL) {
1928 		idm_task_rele(task);
1929 		IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
1930 		    "buffer for offset %x opcode=%x",
1931 		    offset, opcode);
1932 		return (IDM_STATUS_FAIL);
1933 	}
1934 
1935 	xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
1936 	ASSERT(xfer_bytes != 0);
1937 	if (xfer_bytes != dlength) {
1938 		idm_task_rele(task);
1939 		/*
1940 		 * Buffer overflow, connection error.  The PDU data is still
1941 		 * sitting in the socket so we can't use the connection
1942 		 * again until that data is drained.
1943 		 */
1944 		return (IDM_STATUS_FAIL);
1945 	}
1946 
1947 	status = idm_sorecvdata(ic, pdu);
1948 
1949 	idm_task_rele(task);
1950 
1951 	return (status);
1952 }
1953 
1954 static uint32_t
1955 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
1956 {
1957 	uint32_t	buf_ro = ro - idb->idb_bufoffset;
1958 	uint32_t	xfer_len = min(dlength, idb->idb_buflen - buf_ro);
1959 
1960 	ASSERT(ro >= idb->idb_bufoffset);
1961 
1962 	pdu->isp_iov[pdu->isp_iovlen].iov_base	=
1963 	    (caddr_t)idb->idb_buf + buf_ro;
1964 	pdu->isp_iov[pdu->isp_iovlen].iov_len	= xfer_len;
1965 	pdu->isp_iovlen++;
1966 
1967 	return (xfer_len);
1968 }
1969 
1970 int
1971 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1972 {
1973 	pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
1974 	ASSERT(pdu->isp_data != NULL);
1975 
1976 	pdu->isp_databuflen = pdu->isp_datalen;
1977 	pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
1978 	pdu->isp_iov[0].iov_len = pdu->isp_datalen;
1979 	pdu->isp_iovlen = 1;
1980 	/*
1981 	 * Since we are associating a new data buffer with this received
1982 	 * PDU we need to set a specific callback to free the data
1983 	 * after the PDU is processed.
1984 	 */
1985 	pdu->isp_flags |= IDM_PDU_ADDL_DATA;
1986 	pdu->isp_callback = idm_sorx_addl_pdu_cb;
1987 
1988 	return (idm_sorecvdata(ic, pdu));
1989 }
1990 
1991 void
1992 idm_sorx_thread(void *arg)
1993 {
1994 	boolean_t	conn_failure = B_FALSE;
1995 	idm_conn_t	*ic = (idm_conn_t *)arg;
1996 	idm_so_conn_t	*so_conn;
1997 	idm_pdu_t	*pdu;
1998 	idm_status_t	rc;
1999 
2000 	idm_conn_hold(ic);
2001 
2002 	mutex_enter(&ic->ic_mutex);
2003 
2004 	so_conn = ic->ic_transport_private;
2005 	so_conn->ic_rx_thread_running = B_TRUE;
2006 	so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
2007 	cv_signal(&ic->ic_cv);
2008 
2009 	while (so_conn->ic_rx_thread_running) {
2010 		mutex_exit(&ic->ic_mutex);
2011 
2012 		/*
2013 		 * Get PDU with default header size (large enough for
2014 		 * BHS plus any anticipated AHS).  PDU from
2015 		 * the cache will have all values set correctly
2016 		 * for sockets RX including callback.
2017 		 */
2018 		pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
2019 		pdu->isp_ic = ic;
2020 		pdu->isp_flags = 0;
2021 		pdu->isp_transport_hdrlen = 0;
2022 
2023 		if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
2024 			/*
2025 			 * Call idm_pdu_complete so that we call the callback
2026 			 * and ensure any memory allocated in idm_sorecvhdr
2027 			 * gets freed up.
2028 			 */
2029 			idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2030 
2031 			/*
2032 			 * If ic_rx_thread_running is still set then
2033 			 * this is some kind of connection problem
2034 			 * on the socket.  In this case we want to
2035 			 * generate an event.  Otherwise some other
2036 			 * thread closed the socket due to another
2037 			 * issue in which case we don't need to
2038 			 * generate an event.
2039 			 */
2040 			mutex_enter(&ic->ic_mutex);
2041 			if (so_conn->ic_rx_thread_running) {
2042 				conn_failure = B_TRUE;
2043 				so_conn->ic_rx_thread_running = B_FALSE;
2044 			}
2045 
2046 			continue;
2047 		}
2048 
2049 		/*
2050 		 * Header has been read and validated.  Now we need
2051 		 * to read the PDU data payload (if present).  SCSI data
2052 		 * need to be transferred from the socket directly into
2053 		 * the associated transfer buffer for the SCSI task.
2054 		 */
2055 		if (pdu->isp_datalen != 0) {
2056 			if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
2057 			    (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
2058 				rc = idm_sorecv_scsidata(ic, pdu);
2059 				/*
2060 				 * All SCSI errors are fatal to the
2061 				 * connection right now since we have no
2062 				 * place to put the data.  What we need
2063 				 * is some kind of sink to dispose of unwanted
2064 				 * SCSI data.  For example an invalid task tag
2065 				 * should not kill the connection (although
2066 				 * we may want to drop the connection).
2067 				 */
2068 			} else {
2069 				/*
2070 				 * Not data PDUs so allocate a buffer for the
2071 				 * data segment and read the remaining data.
2072 				 */
2073 				rc = idm_sorecv_nonscsidata(ic, pdu);
2074 			}
2075 			if (rc != 0) {
2076 				/*
2077 				 * Call idm_pdu_complete so that we call the
2078 				 * callback and ensure any memory allocated
2079 				 * in idm_sorecvhdr gets freed up.
2080 				 */
2081 				idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2082 
2083 				/*
2084 				 * If ic_rx_thread_running is still set then
2085 				 * this is some kind of connection problem
2086 				 * on the socket.  In this case we want to
2087 				 * generate an event.  Otherwise some other
2088 				 * thread closed the socket due to another
2089 				 * issue in which case we don't need to
2090 				 * generate an event.
2091 				 */
2092 				mutex_enter(&ic->ic_mutex);
2093 				if (so_conn->ic_rx_thread_running) {
2094 					conn_failure = B_TRUE;
2095 					so_conn->ic_rx_thread_running = B_FALSE;
2096 				}
2097 				continue;
2098 			}
2099 		}
2100 
2101 		/*
2102 		 * Process RX PDU
2103 		 */
2104 		idm_pdu_rx(ic, pdu);
2105 
2106 		mutex_enter(&ic->ic_mutex);
2107 	}
2108 
2109 	mutex_exit(&ic->ic_mutex);
2110 
2111 	/*
2112 	 * If we dropped out of the RX processing loop because of
2113 	 * a socket problem or other connection failure (including
2114 	 * digest errors) then we need to generate a state machine
2115 	 * event to shut the connection down.
2116 	 * If the state machine is already in, for example, INIT_ERROR, this
2117 	 * event will get dropped, and the TX thread will never be notified
2118 	 * to shut down.  To be safe, we'll just notify it here.
2119 	 */
2120 	if (conn_failure) {
2121 		if (so_conn->ic_tx_thread_running) {
2122 			so_conn->ic_tx_thread_running = B_FALSE;
2123 			mutex_enter(&so_conn->ic_tx_mutex);
2124 			cv_signal(&so_conn->ic_tx_cv);
2125 			mutex_exit(&so_conn->ic_tx_mutex);
2126 		}
2127 
2128 		idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
2129 	}
2130 
2131 	idm_conn_rele(ic);
2132 
2133 	thread_exit();
2134 }
2135 
2136 /*
2137  * idm_so_tx
2138  *
2139  * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
2140  * point.  By definition, it is supposed to be fast.  So, simply queue
2141  * the entry and return.  The real work is done by idm_i_so_tx() via
2142  * idm_sotx_thread().
2143  */
2144 
2145 static void
2146 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
2147 {
2148 	idm_so_conn_t *so_conn = ic->ic_transport_private;
2149 
2150 	ASSERT(pdu->isp_ic == ic);
2151 	mutex_enter(&so_conn->ic_tx_mutex);
2152 
2153 	if (!so_conn->ic_tx_thread_running) {
2154 		mutex_exit(&so_conn->ic_tx_mutex);
2155 		idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
2156 		return;
2157 	}
2158 
2159 	list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
2160 	cv_signal(&so_conn->ic_tx_cv);
2161 	mutex_exit(&so_conn->ic_tx_mutex);
2162 }
2163 
2164 static idm_status_t
2165 idm_i_so_tx(idm_pdu_t *pdu)
2166 {
2167 	idm_conn_t	*ic = pdu->isp_ic;
2168 	idm_status_t	status = IDM_STATUS_SUCCESS;
2169 	uint8_t		pad[ISCSI_PAD_WORD_LEN];
2170 	int		pad_len;
2171 	uint32_t	hdr_digest_crc;
2172 	uint32_t	data_digest_crc = 0;
2173 	int		total_len = 0;
2174 	int		iovlen = 0;
2175 	struct iovec	iov[6];
2176 	idm_so_conn_t	*so_conn;
2177 
2178 	so_conn = ic->ic_transport_private;
2179 
2180 	/* Setup BHS */
2181 	iov[iovlen].iov_base	= (caddr_t)pdu->isp_hdr;
2182 	iov[iovlen].iov_len	= pdu->isp_hdrlen;
2183 	total_len		+= iov[iovlen].iov_len;
2184 	iovlen++;
2185 
2186 	/* Setup header digest */
2187 	if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2188 	    (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
2189 		hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
2190 
2191 		iov[iovlen].iov_base	= (caddr_t)&hdr_digest_crc;
2192 		iov[iovlen].iov_len	= sizeof (hdr_digest_crc);
2193 		total_len		+= iov[iovlen].iov_len;
2194 		iovlen++;
2195 	}
2196 
2197 	/* Setup the data */
2198 	if (pdu->isp_datalen) {
2199 		idm_task_t		*idt;
2200 		idm_buf_t		*idb;
2201 		iscsi_data_hdr_t	*ihp;
2202 		ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
2203 		/* Write of immediate data */
2204 		if (ic->ic_ffp &&
2205 		    (ihp->opcode == ISCSI_OP_SCSI_CMD ||
2206 		    ihp->opcode == ISCSI_OP_SCSI_DATA)) {
2207 			idt = idm_task_find(ic, ihp->itt, ihp->ttt);
2208 			if (idt) {
2209 				mutex_enter(&idt->idt_mutex);
2210 				idb = idm_buf_find(&idt->idt_outbufv, 0);
2211 				mutex_exit(&idt->idt_mutex);
2212 				/*
2213 				 * If the initiator call to idm_buf_alloc
2214 				 * failed then we can get to this point
2215 				 * without a bound buffer.  The associated
2216 				 * connection failure will clean things up
2217 				 * later.  It would be nice to come up with
2218 				 * a cleaner way to handle this.  In
2219 				 * particular it seems absurd to look up
2220 				 * the task and the buffer just to update
2221 				 * this counter.
2222 				 */
2223 				if (idb)
2224 					idb->idb_xfer_len += pdu->isp_datalen;
2225 				idm_task_rele(idt);
2226 			}
2227 		}
2228 
2229 		iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
2230 		iov[iovlen].iov_len  = pdu->isp_datalen;
2231 		total_len += iov[iovlen].iov_len;
2232 		iovlen++;
2233 	}
2234 
2235 	/* Setup the data pad if necessary */
2236 	pad_len = ((ISCSI_PAD_WORD_LEN -
2237 	    (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
2238 	    (ISCSI_PAD_WORD_LEN - 1));
2239 
2240 	if (pad_len) {
2241 		bzero(pad, sizeof (pad));
2242 		iov[iovlen].iov_base = (void *)&pad;
2243 		iov[iovlen].iov_len  = pad_len;
2244 		total_len		+= iov[iovlen].iov_len;
2245 		iovlen++;
2246 	}
2247 
2248 	/*
2249 	 * Setup the data digest if enabled.  Data-digest is not sent
2250 	 * for login-phase PDUs.
2251 	 */
2252 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
2253 	    ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2254 	    (pdu->isp_datalen || pad_len)) {
2255 		/*
2256 		 * RFC3720/10.2.3: A zero-length Data Segment also
2257 		 * implies a zero-length data digest.
2258 		 */
2259 		if (pdu->isp_datalen) {
2260 			data_digest_crc = idm_crc32c(pdu->isp_data,
2261 			    pdu->isp_datalen);
2262 		}
2263 		if (pad_len) {
2264 			data_digest_crc = idm_crc32c_continued(&pad,
2265 			    pad_len, data_digest_crc);
2266 		}
2267 
2268 		iov[iovlen].iov_base	= (caddr_t)&data_digest_crc;
2269 		iov[iovlen].iov_len	= sizeof (data_digest_crc);
2270 		total_len		+= iov[iovlen].iov_len;
2271 		iovlen++;
2272 	}
2273 
2274 	/* Transmit the PDU */
2275 	if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
2276 	    total_len) != 0) {
2277 		/* Set error status */
2278 		IDM_CONN_LOG(CE_WARN,
2279 		    "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
2280 		    "data: %p", (void *) so_conn->ic_so, (void *) ic,
2281 		    (void *) pdu->isp_data);
2282 		status = IDM_STATUS_IO;
2283 	}
2284 
2285 	/*
2286 	 * Success does not mean that the PDU actually reached the
2287 	 * remote node since it could get dropped along the way.
2288 	 */
2289 	idm_pdu_complete(pdu, status);
2290 
2291 	return (status);
2292 }
2293 
2294 /*
2295  * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
2296  * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
2297  * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
2298  * A target can invoke this function multiple times for a single read command
2299  * (identified by the same ITT) to split the input into several sequences.
2300  *
2301  * DataSN starts with 0 for the first data PDU of an input command and advances
2302  * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
2303  * which is set to 1 for the last data PDU of a sequence.
2304  * If the initiator supports phase collapse, the status bit must be set along
2305  * with the F bit to indicate that the status is shipped together with the last
2306  * Data-In PDU.
2307  *
2308  * The data PDUs within a sequence will be sent in order with the buffer offset
2309  * in increasing order. i.e. initiator and target must have negotiated the
2310  * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
2311  *
2312  * Caller holds idt->idt_mutex
2313  */
2314 static idm_status_t
2315 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
2316 {
2317 	idm_so_conn_t	*so_conn = idb->idb_ic->ic_transport_private;
2318 	idm_pdu_t	tmppdu;
2319 
2320 	ASSERT(mutex_owned(&idt->idt_mutex));
2321 
2322 	/*
2323 	 * Put the idm_buf_t on the tx queue.  It will be transmitted by
2324 	 * idm_sotx_thread.
2325 	 */
2326 	mutex_enter(&so_conn->ic_tx_mutex);
2327 
2328 	DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2329 	    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2330 	    uint64_t, 0, uint32_t, 0, uint32_t, 0,
2331 	    uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI);
2332 
2333 	if (!so_conn->ic_tx_thread_running) {
2334 		mutex_exit(&so_conn->ic_tx_mutex);
2335 		/*
2336 		 * Don't release idt->idt_mutex since we're supposed to hold
2337 		 * in when calling idm_buf_tx_to_ini_done
2338 		 */
2339 		DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
2340 		    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2341 		    uint64_t, 0, uint32_t, 0, uint32_t, 0,
2342 		    uint32_t, idb->idb_xfer_len,
2343 		    int, XFER_BUF_TX_TO_INI);
2344 		idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2345 		return (IDM_STATUS_FAIL);
2346 	}
2347 
2348 	/*
2349 	 * Build a template for the data PDU headers we will use so that
2350 	 * the SN values will stay consistent with other PDU's we are
2351 	 * transmitting like R2T and SCSI status.
2352 	 */
2353 	bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2354 	tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
2355 	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2356 	    ISCSI_OP_SCSI_DATA_RSP);
2357 	idb->idb_tx_thread = B_TRUE;
2358 	list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
2359 	cv_signal(&so_conn->ic_tx_cv);
2360 	mutex_exit(&so_conn->ic_tx_mutex);
2361 	mutex_exit(&idt->idt_mutex);
2362 
2363 	/*
2364 	 * Returning success here indicates the transfer was successfully
2365 	 * dispatched -- it does not mean that the transfer completed
2366 	 * successfully.
2367 	 */
2368 	return (IDM_STATUS_SUCCESS);
2369 }
2370 
2371 /*
2372  * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
2373  * data blocks it is ready to receive from the initiator in response to a WRITE
2374  * SCSI command. The target iSCSI layer passes the information about the desired
2375  * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
2376  * offset and datalen are passed via the 'idb' argument.
2377  *
2378  * Scope for Prototype build:
2379  * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
2380  * negotiated the "InitialR2T" to "Yes".
2381  *
2382  * Caller holds idt->idt_mutex
2383  */
2384 static idm_status_t
2385 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
2386 {
2387 	idm_pdu_t		*pdu;
2388 	iscsi_rtt_hdr_t		*rtt;
2389 
2390 	ASSERT(mutex_owned(&idt->idt_mutex));
2391 
2392 	DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2393 	    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2394 	    uint64_t, 0, uint32_t, 0, uint32_t, 0,
2395 	    uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI);
2396 
2397 	pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2398 	pdu->isp_ic = idt->idt_ic;
2399 	pdu->isp_flags = IDM_PDU_SET_STATSN;
2400 	bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
2401 
2402 	/* iSCSI layer fills the TTT, ITT, ExpCmdSN, MaxCmdSN */
2403 	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
2404 
2405 	/* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
2406 	rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
2407 
2408 	rtt->opcode		= ISCSI_OP_RTT_RSP;
2409 	rtt->flags		= ISCSI_FLAG_FINAL;
2410 	rtt->data_offset	= htonl(idb->idb_bufoffset);
2411 	rtt->data_length	= htonl(idb->idb_xfer_len);
2412 	rtt->rttsn		= htonl(idt->idt_exp_rttsn++);
2413 
2414 	/* Keep track of buffer offsets */
2415 	idb->idb_exp_offset	= idb->idb_bufoffset;
2416 	mutex_exit(&idt->idt_mutex);
2417 
2418 	/*
2419 	 * Transmit the PDU.
2420 	 */
2421 	idm_pdu_tx(pdu);
2422 
2423 	return (IDM_STATUS_SUCCESS);
2424 }
2425 
2426 static idm_status_t
2427 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
2428 {
2429 	if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) {
2430 		idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache,
2431 		    KM_NOSLEEP);
2432 		idb->idb_buf_private = idm.idm_so_128k_buf_cache;
2433 	} else {
2434 		idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
2435 		idb->idb_buf_private = NULL;
2436 	}
2437 
2438 	if (idb->idb_buf == NULL) {
2439 		IDM_CONN_LOG(CE_NOTE,
2440 		    "idm_so_buf_alloc: failed buffer allocation");
2441 		return (IDM_STATUS_FAIL);
2442 	}
2443 
2444 	return (IDM_STATUS_SUCCESS);
2445 }
2446 
2447 /* ARGSUSED */
2448 static idm_status_t
2449 idm_so_buf_setup(idm_buf_t *idb)
2450 {
2451 	/* Ensure bufalloc'd flag is unset */
2452 	idb->idb_bufalloc = B_FALSE;
2453 
2454 	return (IDM_STATUS_SUCCESS);
2455 }
2456 
2457 /* ARGSUSED */
2458 static void
2459 idm_so_buf_teardown(idm_buf_t *idb)
2460 {
2461 	/* nothing to do here */
2462 }
2463 
2464 static void
2465 idm_so_buf_free(idm_buf_t *idb)
2466 {
2467 	if (idb->idb_buf_private == NULL) {
2468 		kmem_free(idb->idb_buf, idb->idb_buflen);
2469 	} else {
2470 		kmem_cache_free(idb->idb_buf_private, idb->idb_buf);
2471 	}
2472 }
2473 
2474 static void
2475 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb,
2476     uint32_t offset, uint32_t length)
2477 {
2478 	idm_so_conn_t	*so_conn = ic->ic_transport_private;
2479 	idm_pdu_t	tmppdu;
2480 	idm_buf_t	*rtt_buf;
2481 
2482 	ASSERT(mutex_owned(&idt->idt_mutex));
2483 
2484 	/*
2485 	 * Allocate a buffer to represent the RTT transfer.  We could further
2486 	 * optimize this by allocating the buffers internally from an rtt
2487 	 * specific buffer cache since this is socket-specific code but for
2488 	 * now we will keep it simple.
2489 	 */
2490 	rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length);
2491 	if (rtt_buf == NULL) {
2492 		/*
2493 		 * If we're in FFP then the failure was likely a resource
2494 		 * allocation issue and we should close the connection by
2495 		 * sending a CE_TRANSPORT_FAIL event.
2496 		 *
2497 		 * If we're not in FFP then idm_buf_alloc will always
2498 		 * fail and the state is transitioning to "complete" anyway
2499 		 * so we won't bother to send an event.
2500 		 */
2501 		mutex_enter(&ic->ic_state_mutex);
2502 		if (ic->ic_ffp)
2503 			idm_conn_event_locked(ic, CE_TRANSPORT_FAIL,
2504 			    NULL, CT_NONE);
2505 		mutex_exit(&ic->ic_state_mutex);
2506 		mutex_exit(&idt->idt_mutex);
2507 		return;
2508 	}
2509 
2510 	rtt_buf->idb_buf_cb = NULL;
2511 	rtt_buf->idb_cb_arg = NULL;
2512 	rtt_buf->idb_bufoffset = offset;
2513 	rtt_buf->idb_xfer_len = length;
2514 	rtt_buf->idb_ic = idt->idt_ic;
2515 	rtt_buf->idb_task_binding = idt;
2516 
2517 	/*
2518 	 * The new buffer (if any) represents an additional
2519 	 * reference on the task
2520 	 */
2521 	idm_task_hold(idt);
2522 	mutex_exit(&idt->idt_mutex);
2523 
2524 	/*
2525 	 * Put the idm_buf_t on the tx queue.  It will be transmitted by
2526 	 * idm_sotx_thread.
2527 	 */
2528 	mutex_enter(&so_conn->ic_tx_mutex);
2529 
2530 	if (!so_conn->ic_tx_thread_running) {
2531 		idm_buf_free(rtt_buf);
2532 		mutex_exit(&so_conn->ic_tx_mutex);
2533 		idm_task_rele(idt);
2534 		return;
2535 	}
2536 
2537 	/*
2538 	 * Build a template for the data PDU headers we will use so that
2539 	 * the SN values will stay consistent with other PDU's we are
2540 	 * transmitting like R2T and SCSI status.
2541 	 */
2542 	bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2543 	tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl;
2544 	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2545 	    ISCSI_OP_SCSI_DATA);
2546 	rtt_buf->idb_tx_thread = B_TRUE;
2547 	rtt_buf->idb_in_transport = B_TRUE;
2548 	list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf);
2549 	cv_signal(&so_conn->ic_tx_cv);
2550 	mutex_exit(&so_conn->ic_tx_mutex);
2551 }
2552 
2553 static void
2554 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb)
2555 {
2556 	/*
2557 	 * Don't worry about status -- we assume any error handling
2558 	 * is performed by the caller (idm_sotx_thread).
2559 	 */
2560 	idb->idb_in_transport = B_FALSE;
2561 	idm_task_rele(idt);
2562 	idm_buf_free(idb);
2563 }
2564 
2565 static idm_status_t
2566 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb,
2567     uint32_t buf_region_offset, uint32_t buf_region_length)
2568 {
2569 	idm_conn_t		*ic;
2570 	uint32_t		max_dataseglen;
2571 	size_t			remainder, chunk;
2572 	uint32_t		data_offset = buf_region_offset;
2573 	iscsi_data_hdr_t	*bhs;
2574 	idm_pdu_t		*pdu;
2575 	idm_status_t		tx_status;
2576 
2577 	ASSERT(mutex_owned(&idt->idt_mutex));
2578 
2579 	ic = idt->idt_ic;
2580 
2581 	max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen;
2582 	remainder = buf_region_length;
2583 
2584 	while (remainder) {
2585 		if (idt->idt_state != TASK_ACTIVE) {
2586 			ASSERT((idt->idt_state != TASK_IDLE) &&
2587 			    (idt->idt_state != TASK_COMPLETE));
2588 			return (IDM_STATUS_ABORTED);
2589 		}
2590 
2591 		/* check to see if we need to chunk the data */
2592 		if (remainder > max_dataseglen) {
2593 			chunk = max_dataseglen;
2594 		} else {
2595 			chunk = remainder;
2596 		}
2597 
2598 		/* Data PDU headers will always be sizeof (iscsi_hdr_t) */
2599 		pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2600 		pdu->isp_ic = ic;
2601 		pdu->isp_flags = 0;	/* initialize isp_flags */
2602 
2603 		/*
2604 		 * We've already built a build a header template
2605 		 * to use during the transfer.  Use this template so that
2606 		 * the SN values stay consistent with any unrelated PDU's
2607 		 * being transmitted.
2608 		 */
2609 		bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
2610 		    sizeof (iscsi_hdr_t));
2611 
2612 		/*
2613 		 * Set DataSN, data offset, and flags in BHS
2614 		 * For the prototype build, A = 0, S = 0, U = 0
2615 		 */
2616 		bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
2617 
2618 		bhs->datasn		= htonl(idt->idt_exp_datasn++);
2619 
2620 		hton24(bhs->dlength, chunk);
2621 		bhs->offset = htonl(idb->idb_bufoffset + data_offset);
2622 
2623 		/* setup data */
2624 		pdu->isp_data	=  (uint8_t *)idb->idb_buf + data_offset;
2625 		pdu->isp_datalen = (uint_t)chunk;
2626 
2627 		if (chunk == remainder) {
2628 			bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
2629 			/* Piggyback the status with the last data PDU */
2630 			if (idt->idt_flags & IDM_TASK_PHASECOLLAPSE_REQ) {
2631 				pdu->isp_flags |= IDM_PDU_SET_STATSN |
2632 				    IDM_PDU_ADVANCE_STATSN;
2633 				(*idt->idt_ic->ic_conn_ops.icb_update_statsn)
2634 				    (idt, pdu);
2635 				idt->idt_flags |=
2636 				    IDM_TASK_PHASECOLLAPSE_SUCCESS;
2637 
2638 			}
2639 		}
2640 
2641 		remainder	-= chunk;
2642 		data_offset	+= chunk;
2643 
2644 		/* Instrument the data-send DTrace probe. */
2645 		if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) {
2646 			DTRACE_ISCSI_2(data__send,
2647 			    idm_conn_t *, idt->idt_ic,
2648 			    iscsi_data_rsp_hdr_t *,
2649 			    (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
2650 		}
2651 
2652 		/*
2653 		 * Now that we're done working with idt_exp_datasn,
2654 		 * idt->idt_state and idb->idb_bufoffset we can release
2655 		 * the task lock -- don't want to hold it across the
2656 		 * call to idm_i_so_tx since we could block.
2657 		 */
2658 		mutex_exit(&idt->idt_mutex);
2659 
2660 		/*
2661 		 * Transmit the PDU.  Call the internal routine directly
2662 		 * as there is already implicit ordering.
2663 		 */
2664 		if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) {
2665 			mutex_enter(&idt->idt_mutex);
2666 			return (tx_status);
2667 		}
2668 
2669 		mutex_enter(&idt->idt_mutex);
2670 		idt->idt_tx_bytes += chunk;
2671 	}
2672 
2673 	return (IDM_STATUS_SUCCESS);
2674 }
2675 
2676 /*
2677  * TX PDU cache
2678  */
2679 /* ARGSUSED */
2680 int
2681 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
2682 {
2683 	idm_pdu_t	*pdu = hdl;
2684 
2685 	bzero(pdu, sizeof (idm_pdu_t));
2686 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2687 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2688 	pdu->isp_callback = idm_sotx_cache_pdu_cb;
2689 	pdu->isp_magic = IDM_PDU_MAGIC;
2690 	bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2691 
2692 	return (0);
2693 }
2694 
2695 /* ARGSUSED */
2696 void
2697 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2698 {
2699 	/* reset values between use */
2700 	pdu->isp_datalen = 0;
2701 
2702 	kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
2703 }
2704 
2705 /*
2706  * RX PDU cache
2707  */
2708 /* ARGSUSED */
2709 int
2710 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
2711 {
2712 	idm_pdu_t	*pdu = hdl;
2713 
2714 	bzero(pdu, sizeof (idm_pdu_t));
2715 	pdu->isp_magic = IDM_PDU_MAGIC;
2716 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2717 	pdu->isp_callback = idm_sorx_cache_pdu_cb;
2718 
2719 	return (0);
2720 }
2721 
2722 /* ARGSUSED */
2723 static void
2724 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2725 {
2726 	pdu->isp_iovlen = 0;
2727 	pdu->isp_sorx_buf = 0;
2728 	kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
2729 }
2730 
2731 static void
2732 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2733 {
2734 	/*
2735 	 * We had to modify our cached RX PDU with a longer header buffer
2736 	 * and/or a longer data buffer.  Release the new buffers and fix
2737 	 * the fields back to what we would expect for a cached RX PDU.
2738 	 */
2739 	if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
2740 		kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
2741 	}
2742 	if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
2743 		kmem_free(pdu->isp_data, pdu->isp_datalen);
2744 	}
2745 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
2746 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2747 	pdu->isp_data = NULL;
2748 	pdu->isp_datalen = 0;
2749 	pdu->isp_sorx_buf = 0;
2750 	pdu->isp_callback = idm_sorx_cache_pdu_cb;
2751 	idm_sorx_cache_pdu_cb(pdu, status);
2752 }
2753 
2754 /*
2755  * This thread is only active when I/O is queued for transmit
2756  * because the socket is busy.
2757  */
2758 void
2759 idm_sotx_thread(void *arg)
2760 {
2761 	idm_conn_t	*ic = arg;
2762 	idm_tx_obj_t	*object, *next;
2763 	idm_so_conn_t	*so_conn;
2764 	idm_status_t	status = IDM_STATUS_SUCCESS;
2765 
2766 	idm_conn_hold(ic);
2767 
2768 	mutex_enter(&ic->ic_mutex);
2769 	so_conn = ic->ic_transport_private;
2770 	so_conn->ic_tx_thread_running = B_TRUE;
2771 	so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
2772 	cv_signal(&ic->ic_cv);
2773 	mutex_exit(&ic->ic_mutex);
2774 
2775 	mutex_enter(&so_conn->ic_tx_mutex);
2776 
2777 	while (so_conn->ic_tx_thread_running) {
2778 		while (list_is_empty(&so_conn->ic_tx_list)) {
2779 			DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
2780 			cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
2781 			DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
2782 
2783 			if (!so_conn->ic_tx_thread_running) {
2784 				goto tx_bail;
2785 			}
2786 		}
2787 
2788 		object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2789 		list_remove(&so_conn->ic_tx_list, object);
2790 		mutex_exit(&so_conn->ic_tx_mutex);
2791 
2792 		switch (object->idm_tx_obj_magic) {
2793 		case IDM_PDU_MAGIC: {
2794 			idm_pdu_t *pdu = (idm_pdu_t *)object;
2795 			DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
2796 			    idm_pdu_t *, (idm_pdu_t *)object);
2797 
2798 			if (pdu->isp_flags & IDM_PDU_SET_STATSN) {
2799 				/* No IDM task */
2800 				(ic->ic_conn_ops.icb_update_statsn)(NULL, pdu);
2801 			}
2802 			status = idm_i_so_tx((idm_pdu_t *)object);
2803 			break;
2804 		}
2805 		case IDM_BUF_MAGIC: {
2806 			idm_buf_t *idb = (idm_buf_t *)object;
2807 			idm_task_t *idt = idb->idb_task_binding;
2808 
2809 			DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
2810 			    idm_buf_t *, idb);
2811 
2812 			mutex_enter(&idt->idt_mutex);
2813 			status = idm_so_send_buf_region(idt,
2814 			    idb, 0, idb->idb_xfer_len);
2815 
2816 			/*
2817 			 * TX thread owns the buffer so we expect it to
2818 			 * be "in transport"
2819 			 */
2820 			ASSERT(idb->idb_in_transport);
2821 			if (IDM_CONN_ISTGT(ic)) {
2822 				/*
2823 				 * idm_buf_tx_to_ini_done releases
2824 				 * idt->idt_mutex
2825 				 */
2826 				DTRACE_ISCSI_8(xfer__done,
2827 				    idm_conn_t *, idt->idt_ic,
2828 				    uintptr_t, idb->idb_buf,
2829 				    uint32_t, idb->idb_bufoffset,
2830 				    uint64_t, 0, uint32_t, 0, uint32_t, 0,
2831 				    uint32_t, idb->idb_xfer_len,
2832 				    int, XFER_BUF_TX_TO_INI);
2833 				idm_buf_tx_to_ini_done(idt, idb, status);
2834 			} else {
2835 				idm_so_send_rtt_data_done(idt, idb);
2836 				mutex_exit(&idt->idt_mutex);
2837 			}
2838 			break;
2839 		}
2840 
2841 		default:
2842 			IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
2843 			    "(0x%08x)", object->idm_tx_obj_magic);
2844 			status = IDM_STATUS_FAIL;
2845 		}
2846 
2847 		mutex_enter(&so_conn->ic_tx_mutex);
2848 
2849 		if (status != IDM_STATUS_SUCCESS) {
2850 			so_conn->ic_tx_thread_running = B_FALSE;
2851 			idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
2852 		}
2853 	}
2854 
2855 	/*
2856 	 * Before we leave, we need to abort every item remaining in the
2857 	 * TX list.
2858 	 */
2859 
2860 tx_bail:
2861 	object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2862 
2863 	while (object != NULL) {
2864 		next = list_next(&so_conn->ic_tx_list, object);
2865 
2866 		list_remove(&so_conn->ic_tx_list, object);
2867 		switch (object->idm_tx_obj_magic) {
2868 		case IDM_PDU_MAGIC:
2869 			idm_pdu_complete((idm_pdu_t *)object,
2870 			    IDM_STATUS_ABORTED);
2871 			break;
2872 
2873 		case IDM_BUF_MAGIC: {
2874 			idm_buf_t *idb = (idm_buf_t *)object;
2875 			idm_task_t *idt = idb->idb_task_binding;
2876 			mutex_exit(&so_conn->ic_tx_mutex);
2877 			mutex_enter(&idt->idt_mutex);
2878 			/*
2879 			 * TX thread owns the buffer so we expect it to
2880 			 * be "in transport"
2881 			 */
2882 			ASSERT(idb->idb_in_transport);
2883 			if (IDM_CONN_ISTGT(ic)) {
2884 				/*
2885 				 * idm_buf_tx_to_ini_done releases
2886 				 * idt->idt_mutex
2887 				 */
2888 				DTRACE_ISCSI_8(xfer__done,
2889 				    idm_conn_t *, idt->idt_ic,
2890 				    uintptr_t, idb->idb_buf,
2891 				    uint32_t, idb->idb_bufoffset,
2892 				    uint64_t, 0, uint32_t, 0, uint32_t, 0,
2893 				    uint32_t, idb->idb_xfer_len,
2894 				    int, XFER_BUF_TX_TO_INI);
2895 				idm_buf_tx_to_ini_done(idt, idb,
2896 				    IDM_STATUS_ABORTED);
2897 			} else {
2898 				idm_so_send_rtt_data_done(idt, idb);
2899 				mutex_exit(&idt->idt_mutex);
2900 			}
2901 			mutex_enter(&so_conn->ic_tx_mutex);
2902 			break;
2903 		}
2904 		default:
2905 			IDM_CONN_LOG(CE_WARN,
2906 			    "idm_sotx_thread: Unexpected magic "
2907 			    "(0x%08x)", object->idm_tx_obj_magic);
2908 		}
2909 
2910 		object = next;
2911 	}
2912 
2913 	mutex_exit(&so_conn->ic_tx_mutex);
2914 	idm_conn_rele(ic);
2915 	thread_exit();
2916 	/*NOTREACHED*/
2917 }
2918 
2919 static void
2920 idm_so_socket_set_nonblock(struct sonode *node)
2921 {
2922 	(void) VOP_SETFL(node->so_vnode, node->so_flag,
2923 	    (node->so_state | FNONBLOCK), CRED(), NULL);
2924 }
2925 
2926 static void
2927 idm_so_socket_set_block(struct sonode *node)
2928 {
2929 	(void) VOP_SETFL(node->so_vnode, node->so_flag,
2930 	    (node->so_state & (~FNONBLOCK)), CRED(), NULL);
2931 }
2932 
2933 
2934 /*
2935  * Called by kernel sockets when the connection has been accepted or
2936  * rejected. In early volo, a "disconnect" callback was sent instead of
2937  * "connectfailed", so we check for both.
2938  */
2939 /* ARGSUSED */
2940 void
2941 idm_so_timed_socket_connect_cb(ksocket_t ks,
2942     ksocket_callback_event_t ev, void *arg, uintptr_t info)
2943 {
2944 	idm_so_timed_socket_t	*itp = arg;
2945 	ASSERT(itp != NULL);
2946 	ASSERT(ev == KSOCKET_EV_CONNECTED ||
2947 	    ev == KSOCKET_EV_CONNECTFAILED ||
2948 	    ev == KSOCKET_EV_DISCONNECTED);
2949 
2950 	mutex_enter(&idm_so_timed_socket_mutex);
2951 	itp->it_callback_called = B_TRUE;
2952 	if (ev == KSOCKET_EV_CONNECTED) {
2953 		itp->it_socket_error_code = 0;
2954 	} else {
2955 		/* Make sure the error code is non-zero on error */
2956 		if (info == 0)
2957 			info = ECONNRESET;
2958 		itp->it_socket_error_code = (int)info;
2959 	}
2960 	cv_signal(&itp->it_cv);
2961 	mutex_exit(&idm_so_timed_socket_mutex);
2962 }
2963 
2964 int
2965 idm_so_timed_socket_connect(ksocket_t ks,
2966     struct sockaddr_storage *sa, int sa_sz, int login_max_usec)
2967 {
2968 	clock_t			conn_login_max;
2969 	int			rc, nonblocking, rval;
2970 	idm_so_timed_socket_t	it;
2971 	ksocket_callbacks_t	ks_cb;
2972 
2973 	conn_login_max = ddi_get_lbolt() + drv_usectohz(login_max_usec);
2974 
2975 	/*
2976 	 * Set to non-block socket mode, with callback on connect
2977 	 * Early volo used "disconnected" instead of "connectfailed",
2978 	 * so set callback to look for both.
2979 	 */
2980 	bzero(&it, sizeof (it));
2981 	ks_cb.ksock_cb_flags = KSOCKET_CB_CONNECTED |
2982 	    KSOCKET_CB_CONNECTFAILED | KSOCKET_CB_DISCONNECTED;
2983 	ks_cb.ksock_cb_connected = idm_so_timed_socket_connect_cb;
2984 	ks_cb.ksock_cb_connectfailed = idm_so_timed_socket_connect_cb;
2985 	ks_cb.ksock_cb_disconnected = idm_so_timed_socket_connect_cb;
2986 	cv_init(&it.it_cv, NULL, CV_DEFAULT, NULL);
2987 	rc = ksocket_setcallbacks(ks, &ks_cb, &it, CRED());
2988 	if (rc != 0)
2989 		return (rc);
2990 
2991 	/* Set to non-blocking mode */
2992 	nonblocking = 1;
2993 	rc = ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
2994 	    CRED());
2995 	if (rc != 0)
2996 		goto cleanup;
2997 
2998 	bzero(&it, sizeof (it));
2999 	for (;;) {
3000 		/*
3001 		 * Warning -- in a loopback scenario, the call to
3002 		 * the connect_cb can occur inside the call to
3003 		 * ksocket_connect. Do not hold the mutex around the
3004 		 * call to ksocket_connect.
3005 		 */
3006 		rc = ksocket_connect(ks, (struct sockaddr *)sa, sa_sz, CRED());
3007 		if (rc == 0 || rc == EISCONN) {
3008 			/* socket success or already success */
3009 			rc = 0;
3010 			break;
3011 		}
3012 		if ((rc != EINPROGRESS) && (rc != EALREADY)) {
3013 			break;
3014 		}
3015 
3016 		/* TCP connect still in progress. See if out of time. */
3017 		if (ddi_get_lbolt() > conn_login_max) {
3018 			/*
3019 			 * Connection retry timeout,
3020 			 * failed connect to target.
3021 			 */
3022 			rc = ETIMEDOUT;
3023 			break;
3024 		}
3025 
3026 		/*
3027 		 * TCP connect still in progress.  Sleep until callback.
3028 		 * Do NOT go to sleep if the callback already occurred!
3029 		 */
3030 		mutex_enter(&idm_so_timed_socket_mutex);
3031 		if (!it.it_callback_called) {
3032 			(void) cv_timedwait(&it.it_cv,
3033 			    &idm_so_timed_socket_mutex, conn_login_max);
3034 		}
3035 		if (it.it_callback_called) {
3036 			rc = it.it_socket_error_code;
3037 			mutex_exit(&idm_so_timed_socket_mutex);
3038 			break;
3039 		}
3040 		/* If timer expires, go call ksocket_connect one last time. */
3041 		mutex_exit(&idm_so_timed_socket_mutex);
3042 	}
3043 
3044 	/* resume blocking mode */
3045 	nonblocking = 0;
3046 	(void) ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3047 	    CRED());
3048 cleanup:
3049 	(void) ksocket_setcallbacks(ks, NULL, NULL, CRED());
3050 	cv_destroy(&it.it_cv);
3051 	if (rc != 0) {
3052 		idm_soshutdown(ks);
3053 	}
3054 	return (rc);
3055 }
3056 
3057 
3058 void
3059 idm_addr_to_sa(idm_addr_t *dportal, struct sockaddr_storage *sa)
3060 {
3061 	int			dp_addr_size;
3062 	struct sockaddr_in	*sin;
3063 	struct sockaddr_in6	*sin6;
3064 
3065 	/* Build sockaddr_storage for this portal (idm_addr_t) */
3066 	bzero(sa, sizeof (*sa));
3067 	dp_addr_size = dportal->a_addr.i_insize;
3068 	if (dp_addr_size == sizeof (struct in_addr)) {
3069 		/* IPv4 */
3070 		sa->ss_family = AF_INET;
3071 		sin = (struct sockaddr_in *)sa;
3072 		sin->sin_port = htons(dportal->a_port);
3073 		bcopy(&dportal->a_addr.i_addr.in4,
3074 		    &sin->sin_addr, sizeof (struct in_addr));
3075 	} else if (dp_addr_size == sizeof (struct in6_addr)) {
3076 		/* IPv6 */
3077 		sa->ss_family = AF_INET6;
3078 		sin6 = (struct sockaddr_in6 *)sa;
3079 		sin6->sin6_port = htons(dportal->a_port);
3080 		bcopy(&dportal->a_addr.i_addr.in6,
3081 		    &sin6->sin6_addr, sizeof (struct in6_addr));
3082 	} else {
3083 		ASSERT(0);
3084 	}
3085 }
3086 
3087 
3088 /*
3089  * return a human-readable form of a sockaddr_storage, in the form
3090  * [ip-address]:port.  This is used in calls to logging functions.
3091  * If several calls to idm_sa_ntop are made within the same invocation
3092  * of a logging function, then each one needs its own buf.
3093  */
3094 const char *
3095 idm_sa_ntop(const struct sockaddr_storage *sa,
3096     char *buf, size_t size)
3097 {
3098 	static const char bogus_ip[] = "[0].-1";
3099 	char tmp[INET6_ADDRSTRLEN];
3100 
3101 	switch (sa->ss_family) {
3102 	case AF_INET6:
3103 		{
3104 			const struct sockaddr_in6 *in6 =
3105 			    (const struct sockaddr_in6 *) sa;
3106 
3107 			if (inet_ntop(in6->sin6_family,
3108 			    &in6->sin6_addr, tmp, sizeof (tmp)) == NULL) {
3109 				goto err;
3110 			}
3111 			if (strlen(tmp) + sizeof ("[].65535") > size) {
3112 				goto err;
3113 			}
3114 			/* struct sockaddr_storage gets port info from v4 loc */
3115 			(void) snprintf(buf, size, "[%s].%u", tmp,
3116 			    ntohs(in6->sin6_port));
3117 			return (buf);
3118 		}
3119 	case AF_INET:
3120 		{
3121 			const struct sockaddr_in *in =
3122 			    (const struct sockaddr_in *) sa;
3123 
3124 			if (inet_ntop(in->sin_family, &in->sin_addr,
3125 			    tmp, sizeof (tmp)) == NULL) {
3126 				goto err;
3127 			}
3128 			if (strlen(tmp) + sizeof ("[].65535") > size) {
3129 				goto err;
3130 			}
3131 			(void) snprintf(buf, size,  "[%s].%u", tmp,
3132 			    ntohs(in->sin_port));
3133 			return (buf);
3134 		}
3135 	default:
3136 		break;
3137 	}
3138 err:
3139 	(void) snprintf(buf, size, "%s", bogus_ip);
3140 	return (buf);
3141 }
3142