1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright (c) 2013 by Delphix. All rights reserved.
27 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
28 * Copyright (c) 2017, Joyent, Inc. All rights reserved.
29 */
30
31 #include <sys/conf.h>
32 #include <sys/stat.h>
33 #include <sys/file.h>
34 #include <sys/ddi.h>
35 #include <sys/sunddi.h>
36 #include <sys/modctl.h>
37 #include <sys/priv.h>
38 #include <sys/cpuvar.h>
39 #include <sys/socket.h>
40 #include <sys/strsubr.h>
41 #include <sys/sysmacros.h>
42 #include <sys/sdt.h>
43 #include <netinet/tcp.h>
44 #include <inet/tcp.h>
45 #include <sys/socketvar.h>
46 #include <sys/pathname.h>
47 #include <sys/fs/snode.h>
48 #include <sys/fs/dv_node.h>
49 #include <sys/vnode.h>
50 #include <netinet/in.h>
51 #include <net/if.h>
52 #include <sys/sockio.h>
53 #include <sys/ksocket.h>
54 #include <sys/filio.h> /* FIONBIO */
55 #include <sys/iscsi_protocol.h>
56 #include <sys/idm/idm.h>
57 #include <sys/idm/idm_so.h>
58 #include <sys/idm/idm_text.h>
59
60 #define IN_PROGRESS_DELAY 1
61
62 /*
63 * in6addr_any is currently all zeroes, but use the macro in case this
64 * ever changes.
65 */
66 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
67
68 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
69 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
70 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
71
72 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
73 static void idm_so_conn_destroy_common(idm_conn_t *ic);
74 static void idm_so_conn_connect_common(idm_conn_t *ic);
75
76 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc,
77 boolean_t boot_conn);
78 static void idm_set_postconnect_options(ksocket_t so);
79 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
80
81 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
82 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt,
83 idm_buf_t *idb, uint32_t offset, uint32_t length);
84 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb);
85 static idm_status_t idm_so_send_buf_region(idm_task_t *idt,
86 idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
87
88 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
89 uint32_t ro, uint32_t dlength);
90
91 static idm_status_t idm_so_handle_digest(idm_conn_t *it,
92 nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
93
94 static void idm_so_socket_set_nonblock(struct sonode *node);
95 static void idm_so_socket_set_block(struct sonode *node);
96
97 /*
98 * Transport ops prototypes
99 */
100 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
101 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
102 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
103 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
104 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
105 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
106 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
107 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
108 nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
109 static void idm_so_notice_key_values(idm_conn_t *it,
110 nvlist_t *negotiated_nvl);
111 static kv_status_t idm_so_declare_key_values(idm_conn_t *it,
112 nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
113 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
114 idm_transport_caps_t *caps);
115 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
116 static void idm_so_buf_free(idm_buf_t *idb);
117 static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
118 static void idm_so_buf_teardown(idm_buf_t *idb);
119 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
120 static void idm_so_tgt_svc_destroy(idm_svc_t *is);
121 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
122 static void idm_so_tgt_svc_offline(idm_svc_t *is);
123 static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
124 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
125 static void idm_so_conn_disconnect(idm_conn_t *ic);
126 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
127 static void idm_so_ini_conn_destroy(idm_conn_t *ic);
128 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
129
130 /*
131 * IDM Native Sockets transport operations
132 */
133 static
134 idm_transport_ops_t idm_so_transport_ops = {
135 idm_so_tx, /* it_tx_pdu */
136 idm_so_buf_tx_to_ini, /* it_buf_tx_to_ini */
137 idm_so_buf_rx_from_ini, /* it_buf_rx_from_ini */
138 idm_so_rx_datain, /* it_rx_datain */
139 idm_so_rx_rtt, /* it_rx_rtt */
140 idm_so_rx_dataout, /* it_rx_dataout */
141 NULL, /* it_alloc_conn_rsrc */
142 NULL, /* it_free_conn_rsrc */
143 NULL, /* it_tgt_enable_datamover */
144 NULL, /* it_ini_enable_datamover */
145 NULL, /* it_conn_terminate */
146 idm_so_free_task_rsrc, /* it_free_task_rsrc */
147 idm_so_negotiate_key_values, /* it_negotiate_key_values */
148 idm_so_notice_key_values, /* it_notice_key_values */
149 idm_so_conn_is_capable, /* it_conn_is_capable */
150 idm_so_buf_alloc, /* it_buf_alloc */
151 idm_so_buf_free, /* it_buf_free */
152 idm_so_buf_setup, /* it_buf_setup */
153 idm_so_buf_teardown, /* it_buf_teardown */
154 idm_so_tgt_svc_create, /* it_tgt_svc_create */
155 idm_so_tgt_svc_destroy, /* it_tgt_svc_destroy */
156 idm_so_tgt_svc_online, /* it_tgt_svc_online */
157 idm_so_tgt_svc_offline, /* it_tgt_svc_offline */
158 idm_so_tgt_conn_destroy, /* it_tgt_conn_destroy */
159 idm_so_tgt_conn_connect, /* it_tgt_conn_connect */
160 idm_so_conn_disconnect, /* it_tgt_conn_disconnect */
161 idm_so_ini_conn_create, /* it_ini_conn_create */
162 idm_so_ini_conn_destroy, /* it_ini_conn_destroy */
163 idm_so_ini_conn_connect, /* it_ini_conn_connect */
164 idm_so_conn_disconnect, /* it_ini_conn_disconnect */
165 idm_so_declare_key_values /* it_declare_key_values */
166 };
167
168 kmutex_t idm_so_timed_socket_mutex;
169
170 int32_t idm_so_sndbuf = IDM_SNDBUF_SIZE;
171 int32_t idm_so_rcvbuf = IDM_RCVBUF_SIZE;
172
173 /*
174 * idm_so_init()
175 * Sockets transport initialization
176 */
177 void
idm_so_init(idm_transport_t * it)178 idm_so_init(idm_transport_t *it)
179 {
180 /* Cache for IDM Data and R2T Transmit PDU's */
181 idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
182 sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
183 &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
184
185 /* Cache for IDM Receive PDU's */
186 idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
187 sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
188 &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
189
190 /* 128k buffer cache */
191 idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache",
192 IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
193
194 /* Set the sockets transport ops */
195 it->it_ops = &idm_so_transport_ops;
196
197 mutex_init(&idm_so_timed_socket_mutex, NULL, MUTEX_DEFAULT, NULL);
198
199 }
200
201 /*
202 * idm_so_fini()
203 * Sockets transport teardown
204 */
205 void
idm_so_fini(void)206 idm_so_fini(void)
207 {
208 kmem_cache_destroy(idm.idm_so_128k_buf_cache);
209 kmem_cache_destroy(idm.idm_sotx_pdu_cache);
210 kmem_cache_destroy(idm.idm_sorx_pdu_cache);
211 mutex_destroy(&idm_so_timed_socket_mutex);
212 }
213
214 ksocket_t
idm_socreate(int domain,int type,int protocol)215 idm_socreate(int domain, int type, int protocol)
216 {
217 ksocket_t ks;
218
219 if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
220 CRED())) {
221 return (ks);
222 } else {
223 return (NULL);
224 }
225 }
226
227 /*
228 * idm_soshutdown will disconnect the socket and prevent subsequent PDU
229 * reception and transmission. The sonode still exists but its state
230 * gets modified to indicate it is no longer connected. Calls to
231 * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
232 * regain control of a thread stuck in idm_sorecv.
233 */
234 void
idm_soshutdown(ksocket_t so)235 idm_soshutdown(ksocket_t so)
236 {
237 (void) ksocket_shutdown(so, SHUT_RDWR, CRED());
238 }
239
240 /*
241 * idm_sodestroy releases all resources associated with a socket previously
242 * created with idm_socreate. The socket must be shutdown using
243 * idm_soshutdown before the socket is destroyed with idm_sodestroy,
244 * otherwise undefined behavior will result.
245 */
246 void
idm_sodestroy(ksocket_t ks)247 idm_sodestroy(ksocket_t ks)
248 {
249 (void) ksocket_close(ks, CRED());
250 }
251
252 /*
253 * Function to compare two addresses in sockaddr_storage format
254 */
255
256 int
idm_ss_compare(const struct sockaddr_storage * cmp_ss1,const struct sockaddr_storage * cmp_ss2,boolean_t v4_mapped_as_v4,boolean_t compare_ports)257 idm_ss_compare(const struct sockaddr_storage *cmp_ss1,
258 const struct sockaddr_storage *cmp_ss2,
259 boolean_t v4_mapped_as_v4,
260 boolean_t compare_ports)
261 {
262 struct sockaddr_storage mapped_v4_ss1, mapped_v4_ss2;
263 const struct sockaddr_storage *ss1, *ss2;
264 struct in_addr *in1, *in2;
265 struct in6_addr *in61, *in62;
266 int i;
267
268 /*
269 * Normalize V4-mapped IPv6 addresses into V4 format if
270 * v4_mapped_as_v4 is B_TRUE.
271 */
272 ss1 = cmp_ss1;
273 ss2 = cmp_ss2;
274 if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) {
275 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
276 if (IN6_IS_ADDR_V4MAPPED(in61)) {
277 bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1));
278 mapped_v4_ss1.ss_family = AF_INET;
279 ((struct sockaddr_in *)&mapped_v4_ss1)->sin_port =
280 ((struct sockaddr_in *)ss1)->sin_port;
281 IN6_V4MAPPED_TO_INADDR(in61,
282 &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr);
283 ss1 = &mapped_v4_ss1;
284 }
285 }
286 ss2 = cmp_ss2;
287 if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) {
288 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
289 if (IN6_IS_ADDR_V4MAPPED(in62)) {
290 bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2));
291 mapped_v4_ss2.ss_family = AF_INET;
292 ((struct sockaddr_in *)&mapped_v4_ss2)->sin_port =
293 ((struct sockaddr_in *)ss2)->sin_port;
294 IN6_V4MAPPED_TO_INADDR(in62,
295 &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr);
296 ss2 = &mapped_v4_ss2;
297 }
298 }
299
300 /*
301 * Compare ports, then address family, then ip address
302 */
303 if (compare_ports &&
304 (((struct sockaddr_in *)ss1)->sin_port !=
305 ((struct sockaddr_in *)ss2)->sin_port)) {
306 if (((struct sockaddr_in *)ss1)->sin_port >
307 ((struct sockaddr_in *)ss2)->sin_port)
308 return (1);
309 else
310 return (-1);
311 }
312
313 /*
314 * ports are the same
315 */
316 if (ss1->ss_family != ss2->ss_family) {
317 if (ss1->ss_family == AF_INET)
318 return (1);
319 else
320 return (-1);
321 }
322
323 /*
324 * address families are the same
325 */
326 if (ss1->ss_family == AF_INET) {
327 in1 = &((struct sockaddr_in *)ss1)->sin_addr;
328 in2 = &((struct sockaddr_in *)ss2)->sin_addr;
329
330 if (in1->s_addr > in2->s_addr)
331 return (1);
332 else if (in1->s_addr < in2->s_addr)
333 return (-1);
334 else
335 return (0);
336 } else if (ss1->ss_family == AF_INET6) {
337 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
338 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
339
340 for (i = 0; i < 4; i++) {
341 if (in61->s6_addr32[i] > in62->s6_addr32[i])
342 return (1);
343 else if (in61->s6_addr32[i] < in62->s6_addr32[i])
344 return (-1);
345 }
346 return (0);
347 }
348
349 return (1);
350 }
351
352 /*
353 * IP address filter functions to flag addresses that should not
354 * go out to initiators through discovery.
355 */
356 static boolean_t
idm_v4_addr_okay(struct in_addr * in_addr)357 idm_v4_addr_okay(struct in_addr *in_addr)
358 {
359 in_addr_t addr = ntohl(in_addr->s_addr);
360
361 if ((INADDR_NONE == addr) ||
362 (IN_MULTICAST(addr)) ||
363 ((addr >> IN_CLASSA_NSHIFT) == 0) ||
364 ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
365 return (B_FALSE);
366 }
367 return (B_TRUE);
368 }
369
370 static boolean_t
idm_v6_addr_okay(struct in6_addr * addr6)371 idm_v6_addr_okay(struct in6_addr *addr6)
372 {
373
374 if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
375 (IN6_IS_ADDR_LOOPBACK(addr6)) ||
376 (IN6_IS_ADDR_MULTICAST(addr6)) ||
377 (IN6_IS_ADDR_V4MAPPED(addr6)) ||
378 (IN6_IS_ADDR_V4COMPAT(addr6)) ||
379 (IN6_IS_ADDR_LINKLOCAL(addr6))) {
380 return (B_FALSE);
381 }
382 return (B_TRUE);
383 }
384
385 /*
386 * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
387 * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
388 */
389 int
idm_get_ipaddr(idm_addr_list_t ** ipaddr_p)390 idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
391 {
392 ksocket_t so4, so6;
393 struct lifnum lifn;
394 struct lifconf lifc;
395 struct lifreq *lp;
396 int rval;
397 int numifs;
398 int bufsize;
399 void *buf;
400 int i, j, n, rc;
401 struct sockaddr_storage ss;
402 struct sockaddr_in *sin;
403 struct sockaddr_in6 *sin6;
404 idm_addr_t *ip;
405 idm_addr_list_t *ipaddr = NULL;
406 int size_ipaddr;
407
408 *ipaddr_p = NULL;
409 size_ipaddr = 0;
410 buf = NULL;
411
412 /* create an ipv4 and ipv6 UDP socket */
413 if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
414 return (0);
415 if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
416 idm_sodestroy(so6);
417 return (0);
418 }
419
420
421 retry_count:
422 /* snapshot the current number of interfaces */
423 lifn.lifn_family = PF_UNSPEC;
424 lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
425 lifn.lifn_count = 0;
426 /* use vp6 for ioctls with unspecified families by default */
427 if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
428 != 0) {
429 goto cleanup;
430 }
431
432 numifs = lifn.lifn_count;
433 if (numifs <= 0) {
434 goto cleanup;
435 }
436
437 /* allocate extra room in case more interfaces appear */
438 numifs += 10;
439
440 /* get the interface names and ip addresses */
441 bufsize = numifs * sizeof (struct lifreq);
442 buf = kmem_alloc(bufsize, KM_SLEEP);
443
444 lifc.lifc_family = AF_UNSPEC;
445 lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
446 lifc.lifc_len = bufsize;
447 lifc.lifc_buf = buf;
448 rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
449 if (rc != 0) {
450 goto cleanup;
451 }
452 /* if our extra room is used up, try again */
453 if (bufsize <= lifc.lifc_len) {
454 kmem_free(buf, bufsize);
455 buf = NULL;
456 goto retry_count;
457 }
458 /* calc actual number of ifconfs */
459 n = lifc.lifc_len / sizeof (struct lifreq);
460
461 /* get ip address */
462 if (n > 0) {
463 size_ipaddr = sizeof (idm_addr_list_t) +
464 (n - 1) * sizeof (idm_addr_t);
465 ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
466 } else {
467 goto cleanup;
468 }
469
470 /*
471 * Examine the array of interfaces and filter uninteresting ones
472 */
473 for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
474
475 /*
476 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
477 */
478 ss = lp->lifr_addr;
479 /*
480 * fetch the flags using the socket of the correct family
481 */
482 switch (ss.ss_family) {
483 case AF_INET:
484 rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
485 &rval, CRED());
486 break;
487 case AF_INET6:
488 rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
489 &rval, CRED());
490 break;
491 default:
492 continue;
493 }
494 if (rc == 0) {
495 /*
496 * If we got the flags, skip uninteresting
497 * interfaces based on flags
498 */
499 if ((lp->lifr_flags & IFF_UP) != IFF_UP)
500 continue;
501 if (lp->lifr_flags &
502 (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
503 continue;
504 }
505
506 /* save ip address */
507 ip = &ipaddr->al_addrs[j];
508 switch (ss.ss_family) {
509 case AF_INET:
510 sin = (struct sockaddr_in *)&ss;
511 if (!idm_v4_addr_okay(&sin->sin_addr))
512 continue;
513 ip->a_addr.i_addr.in4 = sin->sin_addr;
514 ip->a_addr.i_insize = sizeof (struct in_addr);
515 break;
516 case AF_INET6:
517 sin6 = (struct sockaddr_in6 *)&ss;
518 if (!idm_v6_addr_okay(&sin6->sin6_addr))
519 continue;
520 ip->a_addr.i_addr.in6 = sin6->sin6_addr;
521 ip->a_addr.i_insize = sizeof (struct in6_addr);
522 break;
523 default:
524 continue;
525 }
526 j++;
527 }
528
529 if (j == 0) {
530 /* no valid ifaddr */
531 kmem_free(ipaddr, size_ipaddr);
532 size_ipaddr = 0;
533 ipaddr = NULL;
534 } else {
535 ipaddr->al_out_cnt = j;
536 }
537
538
539 cleanup:
540 idm_sodestroy(so6);
541 idm_sodestroy(so4);
542
543 if (buf != NULL)
544 kmem_free(buf, bufsize);
545
546 *ipaddr_p = ipaddr;
547 return (size_ipaddr);
548 }
549
550 int
idm_sorecv(ksocket_t so,void * msg,size_t len)551 idm_sorecv(ksocket_t so, void *msg, size_t len)
552 {
553 iovec_t iov;
554
555 ASSERT(so != NULL);
556 ASSERT(len != 0);
557
558 /*
559 * Fill in iovec and receive data
560 */
561 iov.iov_base = msg;
562 iov.iov_len = len;
563
564 return (idm_iov_sorecv(so, &iov, 1, len));
565 }
566
567 /*
568 * idm_sosendto - Sends a buffered data on a non-connected socket.
569 *
570 * This function puts the data provided on the wire by calling sosendmsg.
571 * It will return only when all the data has been sent or if an error
572 * occurs.
573 *
574 * Returns 0 for success, the socket errno value if sosendmsg fails, and
575 * -1 if sosendmsg returns success but uio_resid != 0
576 */
577 int
idm_sosendto(ksocket_t so,void * buff,size_t len,struct sockaddr * name,socklen_t namelen)578 idm_sosendto(ksocket_t so, void *buff, size_t len,
579 struct sockaddr *name, socklen_t namelen)
580 {
581 struct msghdr msg;
582 struct iovec iov[1];
583 int error;
584 size_t sent = 0;
585
586 iov[0].iov_base = buff;
587 iov[0].iov_len = len;
588
589 /* Initialization of the message header. */
590 bzero(&msg, sizeof (msg));
591 msg.msg_iov = iov;
592 msg.msg_iovlen = 1;
593 msg.msg_name = name;
594 msg.msg_namelen = namelen;
595
596 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
597 /* Data sent */
598 if (sent == len) {
599 /* All data sent. Success. */
600 return (0);
601 } else {
602 /* Not all data was sent. Failure */
603 return (-1);
604 }
605 }
606
607 /* Send failed */
608 return (error);
609 }
610
611 /*
612 * idm_iov_sosend - Sends an iovec on a connection.
613 *
614 * This function puts the data provided on the wire by calling sosendmsg.
615 * It will return only when all the data has been sent or if an error
616 * occurs.
617 *
618 * Returns 0 for success, the socket errno value if sosendmsg fails, and
619 * -1 if sosendmsg returns success but uio_resid != 0
620 */
621 int
idm_iov_sosend(ksocket_t so,iovec_t * iop,int iovlen,size_t total_len)622 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
623 {
624 struct msghdr msg;
625 int error;
626 size_t sent = 0;
627
628 ASSERT(iop != NULL);
629
630 /* Initialization of the message header. */
631 bzero(&msg, sizeof (msg));
632 msg.msg_iov = iop;
633 msg.msg_iovlen = iovlen;
634
635 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
636 == 0) {
637 /* Data sent */
638 if (sent == total_len) {
639 /* All data sent. Success. */
640 return (0);
641 } else {
642 /* Not all data was sent. Failure */
643 return (-1);
644 }
645 }
646
647 /* Send failed */
648 return (error);
649 }
650
651 /*
652 * idm_iov_sorecv - Receives an iovec from a connection
653 *
654 * This function gets the data asked for from the socket. It will return
655 * only when all the requested data has been retrieved or if an error
656 * occurs.
657 *
658 * Returns 0 for success, the socket errno value if sorecvmsg fails, and
659 * -1 if sorecvmsg returns success but uio_resid != 0
660 */
661 int
idm_iov_sorecv(ksocket_t so,iovec_t * iop,int iovlen,size_t total_len)662 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
663 {
664 struct msghdr msg;
665 int error;
666 size_t recv;
667 int flags;
668
669 ASSERT(iop != NULL);
670
671 /* Initialization of the message header. */
672 bzero(&msg, sizeof (msg));
673 msg.msg_iov = iop;
674 msg.msg_iovlen = iovlen;
675 flags = MSG_WAITALL;
676
677 if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
678 == 0) {
679 /* Received data */
680 if (recv == total_len) {
681 /* All requested data received. Success */
682 return (0);
683 } else {
684 /*
685 * Not all data was received. The connection has
686 * probably failed.
687 */
688 return (-1);
689 }
690 }
691
692 /* Receive failed */
693 return (error);
694 }
695
696 static void
idm_set_ini_preconnect_options(idm_so_conn_t * sc,boolean_t boot_conn)697 idm_set_ini_preconnect_options(idm_so_conn_t *sc, boolean_t boot_conn)
698 {
699 int conn_abort = 10000;
700 int conn_notify = 2000;
701 int abort = 30000;
702
703 /* Pre-connect socket options */
704 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
705 TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
706 CRED());
707 if (boot_conn == B_FALSE) {
708 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
709 TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
710 CRED());
711 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
712 TCP_ABORT_THRESHOLD,
713 (char *)&abort, sizeof (int), CRED());
714 }
715 }
716
717 static void
idm_set_postconnect_options(ksocket_t ks)718 idm_set_postconnect_options(ksocket_t ks)
719 {
720 const int on = 1;
721
722 /* Set connect options */
723 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
724 (char *)&idm_so_rcvbuf, sizeof (idm_so_rcvbuf), CRED());
725 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
726 (char *)&idm_so_sndbuf, sizeof (idm_so_sndbuf), CRED());
727 (void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
728 (char *)&on, sizeof (on), CRED());
729 }
730
731 static uint32_t
n2h24(const uchar_t * ptr)732 n2h24(const uchar_t *ptr)
733 {
734 return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
735 }
736
737 static boolean_t
idm_dataseglenokay(idm_conn_t * ic,idm_pdu_t * pdu)738 idm_dataseglenokay(idm_conn_t *ic, idm_pdu_t *pdu)
739 {
740 iscsi_hdr_t *bhs;
741
742 if (ic->ic_conn_type == CONN_TYPE_TGT &&
743 pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) {
744 IDM_CONN_LOG(CE_WARN,
745 "idm_dataseglenokay: exceeded the max data segment length");
746 return (B_FALSE);
747 }
748
749 bhs = pdu->isp_hdr;
750 /*
751 * Filter out any RFC3720 data-size violations.
752 */
753 switch (IDM_PDU_OPCODE(pdu)) {
754 case ISCSI_OP_SCSI_TASK_MGT_MSG:
755 case ISCSI_OP_SCSI_TASK_MGT_RSP:
756 case ISCSI_OP_RTT_RSP:
757 case ISCSI_OP_LOGOUT_CMD:
758 /*
759 * Data-segment not allowed and additional headers not allowed.
760 * (both must be zero according to the RFC3720.)
761 */
762 if (bhs->hlength != 0 || pdu->isp_datalen != 0)
763 return (B_FALSE);
764 break;
765 case ISCSI_OP_NOOP_OUT:
766 case ISCSI_OP_LOGIN_CMD:
767 case ISCSI_OP_TEXT_CMD:
768 case ISCSI_OP_SNACK_CMD:
769 case ISCSI_OP_NOOP_IN:
770 case ISCSI_OP_SCSI_RSP:
771 case ISCSI_OP_LOGIN_RSP:
772 case ISCSI_OP_TEXT_RSP:
773 case ISCSI_OP_SCSI_DATA_RSP:
774 case ISCSI_OP_LOGOUT_RSP:
775 case ISCSI_OP_ASYNC_EVENT:
776 case ISCSI_OP_REJECT_MSG:
777 /*
778 * Additional headers not allowed.
779 * (must be zero according to RFC3720.)
780 */
781 if (bhs->hlength)
782 return (B_FALSE);
783 break;
784 case ISCSI_OP_SCSI_CMD:
785 /*
786 * See RFC3720, section 10.3
787 *
788 * For pure read cmds, data-segment-length must be zero.
789 * For non-final transfers, data-size must be even number of
790 * 4-byte words.
791 * For any transfer, an expected byte count must be provided.
792 * For bidirectional transfers, an additional-header must be
793 * provided (for the read byte-count.)
794 */
795 if (pdu->isp_datalen) {
796 if ((bhs->flags & (ISCSI_FLAG_CMD_READ |
797 ISCSI_FLAG_CMD_WRITE)) == ISCSI_FLAG_CMD_READ)
798 return (B_FALSE);
799 if ((bhs->flags & ISCSI_FLAG_FINAL) == 0 &&
800 ((pdu->isp_datalen & 0x3) != 0))
801 return (B_FALSE);
802 }
803 if (bhs->flags & (ISCSI_FLAG_CMD_READ |
804 ISCSI_FLAG_CMD_WRITE)) {
805 iscsi_scsi_cmd_hdr_t *cmdhdr =
806 (iscsi_scsi_cmd_hdr_t *)bhs;
807 /*
808 * we're transfering some data, we must have a
809 * byte count
810 */
811 if (cmdhdr->data_length == 0)
812 return (B_FALSE);
813 }
814 break;
815 case ISCSI_OP_SCSI_DATA:
816 /*
817 * See RFC3720, section 10.7
818 *
819 * Additional headers aren't allowed, and the data-size must
820 * be an even number of 4-byte words (unless the final bit
821 * is set.)
822 */
823 if (bhs->hlength)
824 return (B_FALSE);
825 if ((bhs->flags & ISCSI_FLAG_FINAL) == 0 &&
826 ((pdu->isp_datalen & 0x3) != 0))
827 return (B_FALSE);
828 break;
829 default:
830 break;
831 }
832 return (B_TRUE);
833 }
834
835 static idm_status_t
idm_sorecvhdr(idm_conn_t * ic,idm_pdu_t * pdu)836 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
837 {
838 iscsi_hdr_t *bhs;
839 uint32_t hdr_digest_crc;
840 uint32_t crc_calculated;
841 void *new_hdr;
842 int ahslen = 0;
843 int total_len = 0;
844 int iovlen = 0;
845 struct iovec iov[2];
846 idm_so_conn_t *so_conn;
847 int rc;
848
849 so_conn = ic->ic_transport_private;
850
851 /*
852 * Read BHS
853 */
854 bhs = pdu->isp_hdr;
855 rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
856 if (rc != IDM_STATUS_SUCCESS) {
857 return (IDM_STATUS_FAIL);
858 }
859
860 /*
861 * Check actual AHS length against the amount available in the buffer
862 */
863 if ((IDM_PDU_OPCODE(pdu) != ISCSI_OP_SCSI_CMD) &&
864 (bhs->hlength != 0)) {
865 /* ---- hlength is only only valid for SCSI Request ---- */
866 return (IDM_STATUS_FAIL);
867 }
868 pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
869 (bhs->hlength * sizeof (uint32_t));
870 pdu->isp_datalen = n2h24(bhs->dlength);
871
872 if (!idm_dataseglenokay(ic, pdu)) {
873 IDM_CONN_LOG(CE_WARN,
874 "idm_sorecvhdr: invalid data segment length");
875 return (IDM_STATUS_FAIL);
876 }
877 if (bhs->hlength > IDM_SORX_WIRE_AHSLEN) {
878 /* Allocate a new header segment and change the callback */
879 new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
880 bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
881 pdu->isp_hdr = new_hdr;
882 pdu->isp_flags |= IDM_PDU_ADDL_HDR;
883
884 /*
885 * This callback will restore the expected values after
886 * the RX PDU has been processed.
887 */
888 pdu->isp_callback = idm_sorx_addl_pdu_cb;
889 }
890
891 /*
892 * Setup receipt of additional header and header digest (if enabled).
893 */
894 if (bhs->hlength > 0) {
895 iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
896 ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
897 iov[iovlen].iov_len = ahslen;
898 total_len += iov[iovlen].iov_len;
899 iovlen++;
900 }
901
902 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
903 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
904 iov[iovlen].iov_len = sizeof (hdr_digest_crc);
905 total_len += iov[iovlen].iov_len;
906 iovlen++;
907 }
908
909 if ((iovlen != 0) &&
910 (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
911 total_len) != 0)) {
912 return (IDM_STATUS_FAIL);
913 }
914
915 /*
916 * Validate header digest if enabled
917 */
918 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
919 crc_calculated = idm_crc32c(pdu->isp_hdr,
920 sizeof (iscsi_hdr_t) + ahslen);
921 if (crc_calculated != hdr_digest_crc) {
922 /* Invalid Header Digest */
923 return (IDM_STATUS_HEADER_DIGEST);
924 }
925 }
926
927 return (0);
928 }
929
930 /*
931 * idm_so_ini_conn_create()
932 * Allocate the sockets transport connection resources.
933 */
934 static idm_status_t
idm_so_ini_conn_create(idm_conn_req_t * cr,idm_conn_t * ic)935 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
936 {
937 ksocket_t so;
938 idm_so_conn_t *so_conn;
939 idm_status_t idmrc;
940
941 so = idm_socreate(cr->cr_domain, cr->cr_type,
942 cr->cr_protocol);
943 if (so == NULL) {
944 return (IDM_STATUS_FAIL);
945 }
946
947 /* Bind the socket if configured to do so */
948 if (cr->cr_bound) {
949 if (ksocket_bind(so, &cr->cr_bound_addr.sin,
950 SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
951 idm_sodestroy(so);
952 return (IDM_STATUS_FAIL);
953 }
954 }
955
956 idmrc = idm_so_conn_create_common(ic, so);
957 if (idmrc != IDM_STATUS_SUCCESS) {
958 idm_soshutdown(so);
959 idm_sodestroy(so);
960 return (IDM_STATUS_FAIL);
961 }
962
963 so_conn = ic->ic_transport_private;
964 /* Set up socket options */
965 idm_set_ini_preconnect_options(so_conn, cr->cr_boot_conn);
966
967 return (IDM_STATUS_SUCCESS);
968 }
969
970 /*
971 * idm_so_ini_conn_destroy()
972 * Tear down the sockets transport connection resources.
973 */
974 static void
idm_so_ini_conn_destroy(idm_conn_t * ic)975 idm_so_ini_conn_destroy(idm_conn_t *ic)
976 {
977 idm_so_conn_destroy_common(ic);
978 }
979
980 /*
981 * idm_so_ini_conn_connect()
982 * Establish the connection referred to by the handle previously allocated via
983 * idm_so_ini_conn_create().
984 */
985 static idm_status_t
idm_so_ini_conn_connect(idm_conn_t * ic)986 idm_so_ini_conn_connect(idm_conn_t *ic)
987 {
988 idm_so_conn_t *so_conn;
989 struct sonode *node = NULL;
990 int rc;
991 clock_t lbolt, conn_login_max, conn_login_interval;
992 boolean_t nonblock;
993
994 so_conn = ic->ic_transport_private;
995 nonblock = ic->ic_conn_params.nonblock_socket;
996 conn_login_max = ic->ic_conn_params.conn_login_max;
997 conn_login_interval = ddi_get_lbolt() +
998 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
999
1000 if (nonblock == B_TRUE) {
1001 node = ((struct sonode *)(so_conn->ic_so));
1002 /* Set to none block socket mode */
1003 idm_so_socket_set_nonblock(node);
1004 do {
1005 rc = ksocket_connect(so_conn->ic_so,
1006 &ic->ic_ini_dst_addr.sin,
1007 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)),
1008 CRED());
1009 if (rc == 0 || rc == EISCONN) {
1010 /* socket success or already success */
1011 rc = IDM_STATUS_SUCCESS;
1012 break;
1013 }
1014 if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) ||
1015 (rc == ECONNRESET)) {
1016 /* socket connection timeout or refuse */
1017 break;
1018 }
1019 lbolt = ddi_get_lbolt();
1020 if (lbolt > conn_login_max) {
1021 /*
1022 * Connection retry timeout,
1023 * failed connect to target.
1024 */
1025 break;
1026 }
1027 if (lbolt < conn_login_interval) {
1028 if ((rc == EINPROGRESS) || (rc == EALREADY)) {
1029 /* TCP connect still in progress */
1030 delay(SEC_TO_TICK(IN_PROGRESS_DELAY));
1031 continue;
1032 } else {
1033 delay(conn_login_interval - lbolt);
1034 }
1035 }
1036 conn_login_interval = ddi_get_lbolt() +
1037 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
1038 } while (rc != 0);
1039 /* resume to nonblock mode */
1040 if (rc == IDM_STATUS_SUCCESS) {
1041 idm_so_socket_set_block(node);
1042 }
1043 } else {
1044 rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
1045 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED());
1046 }
1047
1048 if (rc != 0) {
1049 idm_soshutdown(so_conn->ic_so);
1050 return (IDM_STATUS_FAIL);
1051 }
1052
1053 idm_so_conn_connect_common(ic);
1054
1055 idm_set_postconnect_options(so_conn->ic_so);
1056
1057 return (IDM_STATUS_SUCCESS);
1058 }
1059
1060 idm_status_t
idm_so_tgt_conn_create(idm_conn_t * ic,ksocket_t new_so)1061 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
1062 {
1063 idm_status_t idmrc;
1064
1065 idm_set_postconnect_options(new_so);
1066 idmrc = idm_so_conn_create_common(ic, new_so);
1067
1068 return (idmrc);
1069 }
1070
1071 static void
idm_so_tgt_conn_destroy(idm_conn_t * ic)1072 idm_so_tgt_conn_destroy(idm_conn_t *ic)
1073 {
1074 idm_so_conn_destroy_common(ic);
1075 }
1076
1077 /*
1078 * idm_so_tgt_conn_connect()
1079 * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
1080 * is invoked from the SM as a result of an inbound connection request.
1081 */
1082 static idm_status_t
idm_so_tgt_conn_connect(idm_conn_t * ic)1083 idm_so_tgt_conn_connect(idm_conn_t *ic)
1084 {
1085 idm_so_conn_connect_common(ic);
1086
1087 return (IDM_STATUS_SUCCESS);
1088 }
1089
1090 static idm_status_t
idm_so_conn_create_common(idm_conn_t * ic,ksocket_t new_so)1091 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
1092 {
1093 idm_so_conn_t *so_conn;
1094
1095 so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
1096 so_conn->ic_so = new_so;
1097
1098 ic->ic_transport_private = so_conn;
1099 ic->ic_transport_hdrlen = 0;
1100
1101 /* Set the scoreboarding flag on this connection */
1102 ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
1103 ic->ic_conn_params.max_recv_dataseglen =
1104 ISCSI_DEFAULT_MAX_RECV_SEG_LEN;
1105 ic->ic_conn_params.max_xmit_dataseglen =
1106 ISCSI_DEFAULT_MAX_XMIT_SEG_LEN;
1107
1108 /*
1109 * Initialize tx thread mutex and list
1110 */
1111 mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
1112 cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
1113 list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
1114 offsetof(idm_pdu_t, idm_tx_link));
1115
1116 return (IDM_STATUS_SUCCESS);
1117 }
1118
1119 static void
idm_so_conn_destroy_common(idm_conn_t * ic)1120 idm_so_conn_destroy_common(idm_conn_t *ic)
1121 {
1122 idm_so_conn_t *so_conn = ic->ic_transport_private;
1123
1124 ic->ic_transport_private = NULL;
1125 idm_sodestroy(so_conn->ic_so);
1126 list_destroy(&so_conn->ic_tx_list);
1127 mutex_destroy(&so_conn->ic_tx_mutex);
1128 cv_destroy(&so_conn->ic_tx_cv);
1129
1130 kmem_free(so_conn, sizeof (idm_so_conn_t));
1131 }
1132
1133 static void
idm_so_conn_connect_common(idm_conn_t * ic)1134 idm_so_conn_connect_common(idm_conn_t *ic)
1135 {
1136 idm_so_conn_t *so_conn;
1137 struct sockaddr_in6 t_addr;
1138 socklen_t t_addrlen = 0;
1139
1140 so_conn = ic->ic_transport_private;
1141 bzero(&t_addr, sizeof (struct sockaddr_in6));
1142 t_addrlen = sizeof (struct sockaddr_in6);
1143
1144 /* Set the local and remote addresses in the idm conn handle */
1145 (void) ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
1146 &t_addrlen, CRED());
1147 bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
1148 (void) ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
1149 &t_addrlen, CRED());
1150 bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
1151
1152 mutex_enter(&ic->ic_mutex);
1153 so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
1154 &p0, TS_RUN, minclsyspri);
1155 so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
1156 &p0, TS_RUN, minclsyspri);
1157
1158 while (so_conn->ic_rx_thread_did == 0 ||
1159 so_conn->ic_tx_thread_did == 0)
1160 cv_wait(&ic->ic_cv, &ic->ic_mutex);
1161 mutex_exit(&ic->ic_mutex);
1162 }
1163
1164 /*
1165 * idm_so_conn_disconnect()
1166 * Shutdown the socket connection and stop the thread
1167 */
1168 static void
idm_so_conn_disconnect(idm_conn_t * ic)1169 idm_so_conn_disconnect(idm_conn_t *ic)
1170 {
1171 idm_so_conn_t *so_conn;
1172
1173 so_conn = ic->ic_transport_private;
1174
1175 mutex_enter(&ic->ic_mutex);
1176 so_conn->ic_rx_thread_running = B_FALSE;
1177 so_conn->ic_tx_thread_running = B_FALSE;
1178 /* We need to wakeup the TX thread */
1179 mutex_enter(&so_conn->ic_tx_mutex);
1180 cv_signal(&so_conn->ic_tx_cv);
1181 mutex_exit(&so_conn->ic_tx_mutex);
1182 mutex_exit(&ic->ic_mutex);
1183
1184 /* This should wakeup the RX thread if it is sleeping */
1185 idm_soshutdown(so_conn->ic_so);
1186
1187 thread_join(so_conn->ic_tx_thread_did);
1188 thread_join(so_conn->ic_rx_thread_did);
1189 }
1190
1191 /*
1192 * idm_so_tgt_svc_create()
1193 * Establish a service on an IP address and port. idm_svc_req_t contains
1194 * the service parameters.
1195 */
1196 /*ARGSUSED*/
1197 static idm_status_t
idm_so_tgt_svc_create(idm_svc_req_t * sr,idm_svc_t * is)1198 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
1199 {
1200 idm_so_svc_t *so_svc;
1201
1202 so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
1203
1204 /* Set the new sockets service in svc handle */
1205 is->is_so_svc = (void *)so_svc;
1206
1207 return (IDM_STATUS_SUCCESS);
1208 }
1209
1210 /*
1211 * idm_so_tgt_svc_destroy()
1212 * Teardown sockets resources allocated in idm_so_tgt_svc_create()
1213 */
1214 static void
idm_so_tgt_svc_destroy(idm_svc_t * is)1215 idm_so_tgt_svc_destroy(idm_svc_t *is)
1216 {
1217 /* the socket will have been torn down; free the service */
1218 kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
1219 }
1220
1221 /*
1222 * idm_so_tgt_svc_online()
1223 * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
1224 */
1225
1226 static idm_status_t
idm_so_tgt_svc_online(idm_svc_t * is)1227 idm_so_tgt_svc_online(idm_svc_t *is)
1228 {
1229 idm_so_svc_t *so_svc;
1230 idm_svc_req_t *sr = &is->is_svc_req;
1231 struct sockaddr_in6 sin6_ip;
1232 const uint32_t on = 1;
1233 const uint32_t off = 0;
1234
1235 mutex_enter(&is->is_mutex);
1236 so_svc = (idm_so_svc_t *)is->is_so_svc;
1237
1238 /*
1239 * Try creating an IPv6 socket first
1240 */
1241 if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
1242 mutex_exit(&is->is_mutex);
1243 return (IDM_STATUS_FAIL);
1244 } else {
1245 bzero(&sin6_ip, sizeof (sin6_ip));
1246 sin6_ip.sin6_family = AF_INET6;
1247 sin6_ip.sin6_port = htons(sr->sr_port);
1248 sin6_ip.sin6_addr = in6addr_any;
1249
1250 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1251 SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
1252 /*
1253 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1254 */
1255 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1256 SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED());
1257
1258 if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
1259 sizeof (sin6_ip), CRED()) != 0) {
1260 mutex_exit(&is->is_mutex);
1261 idm_sodestroy(so_svc->is_so);
1262 return (IDM_STATUS_FAIL);
1263 }
1264 }
1265
1266 idm_set_postconnect_options(so_svc->is_so);
1267
1268 if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
1269 mutex_exit(&is->is_mutex);
1270 idm_soshutdown(so_svc->is_so);
1271 idm_sodestroy(so_svc->is_so);
1272 return (IDM_STATUS_FAIL);
1273 }
1274
1275 /* Launch a watch thread */
1276 so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
1277 is, 0, &p0, TS_RUN, minclsyspri);
1278
1279 if (so_svc->is_thread == NULL) {
1280 /* Failure to launch; teardown the socket */
1281 mutex_exit(&is->is_mutex);
1282 idm_soshutdown(so_svc->is_so);
1283 idm_sodestroy(so_svc->is_so);
1284 return (IDM_STATUS_FAIL);
1285 }
1286 ksocket_hold(so_svc->is_so);
1287 /* Wait for the port watcher thread to start */
1288 while (!so_svc->is_thread_running)
1289 cv_wait(&is->is_cv, &is->is_mutex);
1290 mutex_exit(&is->is_mutex);
1291
1292 return (IDM_STATUS_SUCCESS);
1293 }
1294
1295 /*
1296 * idm_so_tgt_svc_offline
1297 *
1298 * Stop listening on the IP address and port identified by idm_svc_t.
1299 */
1300 static void
idm_so_tgt_svc_offline(idm_svc_t * is)1301 idm_so_tgt_svc_offline(idm_svc_t *is)
1302 {
1303 idm_so_svc_t *so_svc;
1304 mutex_enter(&is->is_mutex);
1305 so_svc = (idm_so_svc_t *)is->is_so_svc;
1306 so_svc->is_thread_running = B_FALSE;
1307 mutex_exit(&is->is_mutex);
1308
1309 /*
1310 * Teardown socket
1311 */
1312 idm_sodestroy(so_svc->is_so);
1313
1314 /*
1315 * Now we expect the port watcher thread to terminate
1316 */
1317 thread_join(so_svc->is_thread_did);
1318 }
1319
1320 /*
1321 * Watch thread for target service connection establishment.
1322 */
1323 void
idm_so_svc_port_watcher(void * arg)1324 idm_so_svc_port_watcher(void *arg)
1325 {
1326 idm_svc_t *svc = arg;
1327 ksocket_t new_so;
1328 idm_conn_t *ic;
1329 idm_status_t idmrc;
1330 idm_so_svc_t *so_svc;
1331 int rc;
1332 const uint32_t off = 0;
1333 struct sockaddr_in6 t_addr;
1334 socklen_t t_addrlen;
1335
1336 bzero(&t_addr, sizeof (struct sockaddr_in6));
1337 t_addrlen = sizeof (struct sockaddr_in6);
1338 mutex_enter(&svc->is_mutex);
1339
1340 so_svc = svc->is_so_svc;
1341 so_svc->is_thread_running = B_TRUE;
1342 so_svc->is_thread_did = so_svc->is_thread->t_did;
1343
1344 cv_signal(&svc->is_cv);
1345
1346 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
1347 svc->is_svc_req.sr_port);
1348
1349 while (so_svc->is_thread_running) {
1350 mutex_exit(&svc->is_mutex);
1351
1352 if ((rc = ksocket_accept(so_svc->is_so,
1353 (struct sockaddr *)&t_addr, &t_addrlen,
1354 &new_so, CRED())) != 0) {
1355 mutex_enter(&svc->is_mutex);
1356 if (rc != ECONNABORTED && rc != EINTR) {
1357 IDM_SVC_LOG(CE_NOTE, "idm_so_svc_port_watcher:"
1358 " ksocket_accept failed %d", rc);
1359 }
1360 /*
1361 * Unclean shutdown of this thread is not handled
1362 * wait for !is_thread_running.
1363 */
1364 continue;
1365 }
1366 /*
1367 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1368 */
1369 (void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
1370 (char *)&off, sizeof (off), CRED());
1371
1372 idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
1373 &ic);
1374 if (idmrc != IDM_STATUS_SUCCESS) {
1375 /* Drop connection */
1376 idm_soshutdown(new_so);
1377 idm_sodestroy(new_so);
1378 mutex_enter(&svc->is_mutex);
1379 continue;
1380 }
1381
1382 idmrc = idm_so_tgt_conn_create(ic, new_so);
1383 if (idmrc != IDM_STATUS_SUCCESS) {
1384 idm_svc_conn_destroy(ic);
1385 idm_soshutdown(new_so);
1386 idm_sodestroy(new_so);
1387 mutex_enter(&svc->is_mutex);
1388 continue;
1389 }
1390
1391 /*
1392 * Kick the state machine. At CS_S3_XPT_UP the state machine
1393 * will notify the client (target) about the new connection.
1394 */
1395 idm_conn_event(ic, CE_CONNECT_ACCEPT, (uintptr_t)NULL);
1396
1397 mutex_enter(&svc->is_mutex);
1398 }
1399 ksocket_rele(so_svc->is_so);
1400 so_svc->is_thread_running = B_FALSE;
1401 mutex_exit(&svc->is_mutex);
1402
1403 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
1404 svc->is_svc_req.sr_port);
1405
1406 thread_exit();
1407 }
1408
1409 /*
1410 * idm_so_free_task_rsrc() stops any ongoing processing of the task and
1411 * frees resources associated with the task.
1412 *
1413 * It's not clear that this should return idm_status_t. What do we do
1414 * if it fails?
1415 */
1416 static idm_status_t
idm_so_free_task_rsrc(idm_task_t * idt)1417 idm_so_free_task_rsrc(idm_task_t *idt)
1418 {
1419 idm_buf_t *idb, *next_idb;
1420
1421 /*
1422 * There is nothing to cleanup on initiator connections
1423 */
1424 if (IDM_CONN_ISINI(idt->idt_ic))
1425 return (IDM_STATUS_SUCCESS);
1426
1427 /*
1428 * If this is a target connection, call idm_buf_rx_from_ini_done for
1429 * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
1430 *
1431 * In addition, remove any buffers associated with this task from
1432 * the ic_tx_list. We'll do this by walking the idt_inbufv list, but
1433 * items don't actually get removed from that list (and completion
1434 * routines called) until idm_task_cleanup.
1435 */
1436 mutex_enter(&idt->idt_mutex);
1437
1438 for (idb = list_head(&idt->idt_outbufv); idb != NULL; idb = next_idb) {
1439 next_idb = list_next(&idt->idt_outbufv, idb);
1440 if (idb->idb_in_transport) {
1441 /*
1442 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1443 */
1444 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1445 uintptr_t, idb->idb_buf,
1446 uint32_t, idb->idb_bufoffset,
1447 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1448 uint32_t, idb->idb_xfer_len,
1449 int, XFER_BUF_RX_FROM_INI);
1450 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1451 mutex_enter(&idt->idt_mutex);
1452 }
1453 }
1454
1455 for (idb = list_head(&idt->idt_inbufv); idb != NULL; idb = next_idb) {
1456 next_idb = list_next(&idt->idt_inbufv, idb);
1457 /*
1458 * We want to remove these items from the tx_list as well,
1459 * but knowing it's in the idt_inbufv list is not a guarantee
1460 * that it's in the tx_list. If it's on the tx list then
1461 * let idm_sotx_thread() clean it up.
1462 */
1463 if (idb->idb_in_transport && !idb->idb_tx_thread) {
1464 /*
1465 * idm_buf_tx_to_ini_done releases idt->idt_mutex
1466 */
1467 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1468 uintptr_t, idb->idb_buf,
1469 uint32_t, idb->idb_bufoffset,
1470 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1471 uint32_t, idb->idb_xfer_len,
1472 int, XFER_BUF_TX_TO_INI);
1473 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
1474 mutex_enter(&idt->idt_mutex);
1475 }
1476 }
1477
1478 mutex_exit(&idt->idt_mutex);
1479
1480 return (IDM_STATUS_SUCCESS);
1481 }
1482
1483 /*
1484 * idm_so_negotiate_key_values() validates the key values for this connection
1485 */
1486 /* ARGSUSED */
1487 static kv_status_t
idm_so_negotiate_key_values(idm_conn_t * it,nvlist_t * request_nvl,nvlist_t * response_nvl,nvlist_t * negotiated_nvl)1488 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
1489 nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
1490 {
1491 /* All parameters are negotiated at the iscsit level */
1492 return (KV_HANDLED);
1493 }
1494
1495 /*
1496 * idm_so_notice_key_values() activates the negotiated key values for
1497 * this connection.
1498 */
1499 static void
idm_so_notice_key_values(idm_conn_t * it,nvlist_t * negotiated_nvl)1500 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
1501 {
1502 char *nvp_name;
1503 nvpair_t *nvp;
1504 nvpair_t *next_nvp;
1505 int nvrc;
1506 idm_status_t idm_status;
1507 const idm_kv_xlate_t *ikvx;
1508 uint64_t num_val;
1509
1510 for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
1511 nvp != NULL; nvp = next_nvp) {
1512 next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
1513 nvp_name = nvpair_name(nvp);
1514
1515 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1516 switch (ikvx->ik_key_id) {
1517 case KI_HEADER_DIGEST:
1518 case KI_DATA_DIGEST:
1519 idm_status = idm_so_handle_digest(it, nvp, ikvx);
1520 ASSERT(idm_status == 0);
1521
1522 /* Remove processed item from negotiated_nvl list */
1523 nvrc = nvlist_remove_all(
1524 negotiated_nvl, ikvx->ik_key_name);
1525 ASSERT(nvrc == 0);
1526 break;
1527 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1528 /*
1529 * Just pass the value down to idm layer.
1530 * No need to remove it from negotiated_nvl list here.
1531 */
1532 nvrc = nvpair_value_uint64(nvp, &num_val);
1533 ASSERT(nvrc == 0);
1534 it->ic_conn_params.max_xmit_dataseglen =
1535 (uint32_t)num_val;
1536 break;
1537 default:
1538 break;
1539 }
1540 }
1541 }
1542
1543 /*
1544 * idm_so_declare_key_values() declares the key values for this connection
1545 */
1546 /* ARGSUSED */
1547 static kv_status_t
idm_so_declare_key_values(idm_conn_t * it,nvlist_t * config_nvl,nvlist_t * outgoing_nvl)1548 idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl,
1549 nvlist_t *outgoing_nvl)
1550 {
1551 char *nvp_name;
1552 nvpair_t *nvp;
1553 nvpair_t *next_nvp;
1554 kv_status_t kvrc;
1555 int nvrc = 0;
1556 const idm_kv_xlate_t *ikvx;
1557 uint64_t num_val;
1558
1559 for (nvp = nvlist_next_nvpair(config_nvl, NULL);
1560 nvp != NULL && nvrc == 0; nvp = next_nvp) {
1561 next_nvp = nvlist_next_nvpair(config_nvl, nvp);
1562 nvp_name = nvpair_name(nvp);
1563
1564 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1565 switch (ikvx->ik_key_id) {
1566 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1567 if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) {
1568 break;
1569 }
1570 if (outgoing_nvl &&
1571 (nvrc = nvlist_add_uint64(outgoing_nvl,
1572 nvp_name, num_val)) != 0) {
1573 break;
1574 }
1575 it->ic_conn_params.max_recv_dataseglen =
1576 (uint32_t)num_val;
1577 break;
1578 default:
1579 break;
1580 }
1581 }
1582 kvrc = idm_nvstat_to_kvstat(nvrc);
1583 return (kvrc);
1584 }
1585
1586 static idm_status_t
idm_so_handle_digest(idm_conn_t * it,nvpair_t * digest_choice,const idm_kv_xlate_t * ikvx)1587 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
1588 const idm_kv_xlate_t *ikvx)
1589 {
1590 int nvrc;
1591 char *digest_choice_string;
1592
1593 nvrc = nvpair_value_string(digest_choice,
1594 &digest_choice_string);
1595 ASSERT(nvrc == 0);
1596 if (strcasecmp(digest_choice_string, "crc32c") == 0) {
1597 switch (ikvx->ik_key_id) {
1598 case KI_HEADER_DIGEST:
1599 it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
1600 break;
1601 case KI_DATA_DIGEST:
1602 it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
1603 break;
1604 default:
1605 ASSERT(0);
1606 break;
1607 }
1608 } else if (strcasecmp(digest_choice_string, "none") == 0) {
1609 switch (ikvx->ik_key_id) {
1610 case KI_HEADER_DIGEST:
1611 it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
1612 break;
1613 case KI_DATA_DIGEST:
1614 it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
1615 break;
1616 default:
1617 ASSERT(0);
1618 break;
1619 }
1620 } else {
1621 ASSERT(0);
1622 }
1623
1624 return (IDM_STATUS_SUCCESS);
1625 }
1626
1627
1628 /*
1629 * idm_so_conn_is_capable() verifies that the passed connection is provided
1630 * for by the sockets interface.
1631 */
1632 /* ARGSUSED */
1633 static boolean_t
idm_so_conn_is_capable(idm_conn_req_t * ic,idm_transport_caps_t * caps)1634 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
1635 {
1636 return (B_TRUE);
1637 }
1638
1639 /*
1640 * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
1641 * idm_sorecv_scsidata() function invoked earlier actually reads the data
1642 * off the socket into the appropriate buffers.
1643 */
1644 static void
idm_so_rx_datain(idm_conn_t * ic,idm_pdu_t * pdu)1645 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
1646 {
1647 iscsi_data_hdr_t *bhs;
1648 idm_task_t *idt;
1649 idm_buf_t *idb;
1650 uint32_t datasn;
1651 size_t offset;
1652 iscsi_hdr_t *ihp = (iscsi_hdr_t *)pdu->isp_hdr;
1653 iscsi_data_rsp_hdr_t *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
1654
1655 ASSERT(ic != NULL);
1656 ASSERT(pdu != NULL);
1657 ASSERT(IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP);
1658
1659 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1660 datasn = ntohl(bhs->datasn);
1661 offset = ntohl(bhs->offset);
1662
1663 /*
1664 * Look up the task corresponding to the initiator task tag
1665 * to get the buffers affiliated with the task.
1666 */
1667 idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1668 if (idt == NULL) {
1669 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
1670 idm_pdu_rx_protocol_error(ic, pdu);
1671 return;
1672 }
1673
1674 idb = pdu->isp_sorx_buf;
1675 if (idb == NULL) {
1676 IDM_CONN_LOG(CE_WARN,
1677 "idm_so_rx_datain: failed to find buffer");
1678 idm_task_rele(idt);
1679 idm_pdu_rx_protocol_error(ic, pdu);
1680 return;
1681 }
1682
1683 /*
1684 * DataSN values should be sequential and should not have any gaps or
1685 * repetitions. Check the DataSN with the one stored in the task.
1686 */
1687 if (datasn == idt->idt_exp_datasn) {
1688 idt->idt_exp_datasn++; /* keep track of DataSN received */
1689 } else {
1690 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
1691 idm_task_rele(idt);
1692 idm_pdu_rx_protocol_error(ic, pdu);
1693 return;
1694 }
1695
1696 /*
1697 * PDUs in a sequence should be in continuously increasing
1698 * address offset
1699 */
1700 if (offset != idb->idb_exp_offset) {
1701 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
1702 idm_task_rele(idt);
1703 idm_pdu_rx_protocol_error(ic, pdu);
1704 return;
1705 }
1706 /* Expected next relative buffer offset */
1707 idb->idb_exp_offset += n2h24(bhs->dlength);
1708 idt->idt_rx_bytes += n2h24(bhs->dlength);
1709
1710 idm_task_rele(idt);
1711
1712 /*
1713 * For now call scsi_rsp which will process the data rsp
1714 * Revisit, need to provide an explicit client entry point for
1715 * phase collapse completions.
1716 */
1717 if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) &&
1718 (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
1719 (*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
1720 }
1721
1722 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1723 }
1724
1725 /*
1726 * The idm_so_rx_dataout() function is used by the iSCSI target to read
1727 * data from the Data-Out PDU sent by the iSCSI initiator.
1728 *
1729 * This function gets the Initiator Task Tag from the PDU BHS and looks up the
1730 * task to get the buffers associated with the PDU. A PDU might span buffers.
1731 * The data is then read into the respective buffer.
1732 */
1733 static void
idm_so_rx_dataout(idm_conn_t * ic,idm_pdu_t * pdu)1734 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
1735 {
1736
1737 iscsi_data_hdr_t *bhs;
1738 idm_task_t *idt;
1739 idm_buf_t *idb;
1740 size_t offset;
1741
1742 ASSERT(ic != NULL);
1743 ASSERT(pdu != NULL);
1744 ASSERT(IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA);
1745
1746 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1747 offset = ntohl(bhs->offset);
1748
1749 /*
1750 * Look up the task corresponding to the initiator task tag
1751 * to get the buffers affiliated with the task.
1752 */
1753 idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1754 if (idt == NULL) {
1755 IDM_CONN_LOG(CE_WARN,
1756 "idm_so_rx_dataout: failed to find task");
1757 idm_pdu_rx_protocol_error(ic, pdu);
1758 return;
1759 }
1760
1761 idb = pdu->isp_sorx_buf;
1762 if (idb == NULL) {
1763 IDM_CONN_LOG(CE_WARN,
1764 "idm_so_rx_dataout: failed to find buffer");
1765 idm_task_rele(idt);
1766 idm_pdu_rx_protocol_error(ic, pdu);
1767 return;
1768 }
1769
1770 /* Keep track of data transferred - check data offsets */
1771 if (offset != idb->idb_exp_offset) {
1772 IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
1773 "%ld, %d", offset, idb->idb_exp_offset);
1774 idm_task_rele(idt);
1775 idm_pdu_rx_protocol_error(ic, pdu);
1776 return;
1777 }
1778 /* Expected next relative offset */
1779 idb->idb_exp_offset += ntoh24(bhs->dlength);
1780 idt->idt_rx_bytes += n2h24(bhs->dlength);
1781
1782 /*
1783 * Call the buffer callback when the transfer is complete
1784 *
1785 * The connection state machine should only abort tasks after
1786 * shutting down the connection so we are assured that there
1787 * won't be a simultaneous attempt to abort this task at the
1788 * same time as we are processing this PDU (due to a connection
1789 * state change).
1790 */
1791 if (bhs->flags & ISCSI_FLAG_FINAL) {
1792 /*
1793 * We have gotten the last data-message for the current
1794 * transfer. idb_xfer_len represents the data that the
1795 * command intended to transfer, it does not represent the
1796 * actual number of bytes transferred. If we have not
1797 * transferred the expected number of bytes something is
1798 * wrong.
1799 *
1800 * We have two options, when there is a mismatch, we can
1801 * regard the transfer as invalid -- or we can modify our
1802 * notion of "xfer_len." In order to be as stringent as
1803 * possible, here we regard this transfer as in error; and
1804 * bail out.
1805 */
1806 if (idb->idb_buflen == idb->idb_xfer_len &&
1807 idb->idb_buflen !=
1808 (idb->idb_exp_offset - idb->idb_bufoffset)) {
1809 printf("idm_so_rx_dataout: incomplete transfer, "
1810 "protocol err");
1811 IDM_CONN_LOG(CE_NOTE,
1812 "idm_so_rx_dataout: incomplete transfer: %ld, %d",
1813 offset, (int)(idb->idb_exp_offset - offset));
1814 idm_task_rele(idt);
1815 idm_pdu_rx_protocol_error(ic, pdu);
1816 return;
1817 }
1818 /*
1819 * We only want to call idm_buf_rx_from_ini_done once
1820 * per transfer. It's possible that this task has
1821 * already been aborted in which case
1822 * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
1823 * for each buffer with idb_in_transport==B_TRUE. To
1824 * close this window and ensure that this doesn't happen,
1825 * we'll clear idb->idb_in_transport now while holding
1826 * the task mutex. This is only really an issue for
1827 * SCSI task abort -- if tasks were being aborted because
1828 * of a connection state change the state machine would
1829 * have already stopped the receive thread.
1830 */
1831 mutex_enter(&idt->idt_mutex);
1832
1833 /*
1834 * Release the task hold here (obtained in idm_task_find)
1835 * because the task may complete synchronously during
1836 * idm_buf_rx_from_ini_done. Since we still have an active
1837 * buffer we know there is at least one additional hold on idt.
1838 */
1839 idm_task_rele(idt);
1840
1841 /*
1842 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1843 */
1844 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1845 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
1846 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1847 uint32_t, idb->idb_xfer_len,
1848 int, XFER_BUF_RX_FROM_INI);
1849 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
1850 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1851 return;
1852 }
1853
1854 idm_task_rele(idt);
1855 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1856 }
1857
1858 /*
1859 * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
1860 * the R2T PDU sent by the iSCSI target indicating that it is ready to
1861 * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
1862 * and looks up the task in the task tree using the itt to get the output
1863 * buffers associated the task. The R2T PDU contains the offset of the
1864 * requested data and the data length. This function then constructs a
1865 * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
1866 * PDU is associated with the R2T by the Target Transfer Tag (ttt).
1867 */
1868
1869 static void
idm_so_rx_rtt(idm_conn_t * ic,idm_pdu_t * pdu)1870 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
1871 {
1872 idm_task_t *idt;
1873 idm_buf_t *idb;
1874 iscsi_rtt_hdr_t *rtt_hdr;
1875 uint32_t data_offset;
1876 uint32_t data_length;
1877
1878 ASSERT(ic != NULL);
1879 ASSERT(pdu != NULL);
1880
1881 rtt_hdr = (iscsi_rtt_hdr_t *)pdu->isp_hdr;
1882 data_offset = ntohl(rtt_hdr->data_offset);
1883 data_length = ntohl(rtt_hdr->data_length);
1884 idt = idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
1885
1886 if (idt == NULL) {
1887 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
1888 idm_pdu_rx_protocol_error(ic, pdu);
1889 return;
1890 }
1891
1892 /* Find the buffer bound to the task by the iSCSI initiator */
1893 mutex_enter(&idt->idt_mutex);
1894 idb = idm_buf_find(&idt->idt_outbufv, data_offset);
1895 if (idb == NULL) {
1896 mutex_exit(&idt->idt_mutex);
1897 idm_task_rele(idt);
1898 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
1899 idm_pdu_rx_protocol_error(ic, pdu);
1900 return;
1901 }
1902
1903 /* return buffer contains this data */
1904 if (data_offset + data_length > idb->idb_buflen) {
1905 /* Overflow */
1906 mutex_exit(&idt->idt_mutex);
1907 idm_task_rele(idt);
1908 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside "
1909 "buffer");
1910 idm_pdu_rx_protocol_error(ic, pdu);
1911 return;
1912 }
1913
1914 idt->idt_r2t_ttt = rtt_hdr->ttt;
1915 idt->idt_exp_datasn = 0;
1916
1917 idm_so_send_rtt_data(ic, idt, idb, data_offset,
1918 ntohl(rtt_hdr->data_length));
1919 /*
1920 * the idt_mutex is released in idm_so_send_rtt_data
1921 */
1922
1923 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1924 idm_task_rele(idt);
1925
1926 }
1927
1928 idm_status_t
idm_sorecvdata(idm_conn_t * ic,idm_pdu_t * pdu)1929 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
1930 {
1931 uint8_t pad[ISCSI_PAD_WORD_LEN];
1932 int pad_len;
1933 uint32_t data_digest_crc;
1934 uint32_t crc_calculated;
1935 int total_len;
1936 idm_so_conn_t *so_conn;
1937
1938 so_conn = ic->ic_transport_private;
1939
1940 pad_len = ((ISCSI_PAD_WORD_LEN -
1941 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
1942 (ISCSI_PAD_WORD_LEN - 1));
1943
1944 ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
1945
1946 total_len = pdu->isp_datalen;
1947
1948 if (pad_len) {
1949 pdu->isp_iov[pdu->isp_iovlen].iov_base = (char *)&pad;
1950 pdu->isp_iov[pdu->isp_iovlen].iov_len = pad_len;
1951 total_len += pad_len;
1952 pdu->isp_iovlen++;
1953 }
1954
1955 /* setup data digest */
1956 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1957 pdu->isp_iov[pdu->isp_iovlen].iov_base =
1958 (char *)&data_digest_crc;
1959 pdu->isp_iov[pdu->isp_iovlen].iov_len =
1960 sizeof (data_digest_crc);
1961 total_len += sizeof (data_digest_crc);
1962 pdu->isp_iovlen++;
1963 }
1964
1965 pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base;
1966
1967 if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
1968 pdu->isp_iovlen, total_len) != 0) {
1969 return (IDM_STATUS_IO);
1970 }
1971
1972 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1973 crc_calculated = idm_crc32c(pdu->isp_data,
1974 pdu->isp_datalen);
1975 if (pad_len) {
1976 crc_calculated = idm_crc32c_continued((char *)&pad,
1977 pad_len, crc_calculated);
1978 }
1979 if (crc_calculated != data_digest_crc) {
1980 IDM_CONN_LOG(CE_WARN,
1981 "idm_sorecvdata: "
1982 "CRC error: actual 0x%x, calc 0x%x",
1983 data_digest_crc, crc_calculated);
1984
1985 /* Invalid Data Digest */
1986 return (IDM_STATUS_DATA_DIGEST);
1987 }
1988 }
1989
1990 return (IDM_STATUS_SUCCESS);
1991 }
1992
1993 /*
1994 * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
1995 * Data-type PDU header must be read into the idm_pdu_t structure prior to
1996 * calling this function.
1997 */
1998 idm_status_t
idm_sorecv_scsidata(idm_conn_t * ic,idm_pdu_t * pdu)1999 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
2000 {
2001 iscsi_data_hdr_t *bhs;
2002 idm_task_t *task;
2003 uint32_t offset;
2004 uint8_t opcode;
2005 uint32_t dlength;
2006 list_t *buflst;
2007 uint32_t xfer_bytes;
2008 idm_status_t status;
2009
2010 ASSERT(ic != NULL);
2011 ASSERT(pdu != NULL);
2012
2013 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
2014
2015 offset = ntohl(bhs->offset);
2016 opcode = IDM_PDU_OPCODE(pdu);
2017 dlength = n2h24(bhs->dlength);
2018
2019 ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
2020 (opcode == ISCSI_OP_SCSI_DATA));
2021
2022 /*
2023 * Successful lookup implicitly gets a "hold" on the task. This
2024 * hold must be released before leaving this function. At one
2025 * point we were caching this task context and retaining the hold
2026 * but it turned out to be very difficult to release the hold properly.
2027 * The task can be aborted and the connection shutdown between this
2028 * call and the subsequent expected call to idm_so_rx_datain/
2029 * idm_so_rx_dataout (in which case those functions are not called).
2030 * Releasing the hold in the PDU callback doesn't work well either
2031 * because the whole task may be completed by then at which point
2032 * it is too late to release the hold -- for better or worse this
2033 * code doesn't wait on the refcnts during normal operation.
2034 * idm_task_find() is very fast and it is not a huge burden if we
2035 * have to do it twice.
2036 */
2037 task = idm_task_find(ic, bhs->itt, bhs->ttt);
2038 if (task == NULL) {
2039 IDM_CONN_LOG(CE_WARN,
2040 "idm_sorecv_scsidata: could not find task");
2041 return (IDM_STATUS_FAIL);
2042 }
2043
2044 mutex_enter(&task->idt_mutex);
2045 buflst = (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
2046 &task->idt_inbufv : &task->idt_outbufv;
2047 pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
2048 mutex_exit(&task->idt_mutex);
2049
2050 if (pdu->isp_sorx_buf == NULL) {
2051 idm_task_rele(task);
2052 IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
2053 "buffer for offset %x opcode=%x",
2054 offset, opcode);
2055 return (IDM_STATUS_FAIL);
2056 }
2057
2058 xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
2059 ASSERT(xfer_bytes != 0);
2060 if (xfer_bytes != dlength) {
2061 idm_task_rele(task);
2062 /*
2063 * Buffer overflow, connection error. The PDU data is still
2064 * sitting in the socket so we can't use the connection
2065 * again until that data is drained.
2066 */
2067 return (IDM_STATUS_FAIL);
2068 }
2069
2070 status = idm_sorecvdata(ic, pdu);
2071
2072 idm_task_rele(task);
2073
2074 return (status);
2075 }
2076
2077 static uint32_t
idm_fill_iov(idm_pdu_t * pdu,idm_buf_t * idb,uint32_t ro,uint32_t dlength)2078 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
2079 {
2080 uint32_t buf_ro = ro - idb->idb_bufoffset;
2081 uint32_t xfer_len = min(dlength, idb->idb_buflen - buf_ro);
2082
2083 ASSERT(ro >= idb->idb_bufoffset);
2084
2085 pdu->isp_iov[pdu->isp_iovlen].iov_base =
2086 (caddr_t)idb->idb_buf + buf_ro;
2087 pdu->isp_iov[pdu->isp_iovlen].iov_len = xfer_len;
2088 pdu->isp_iovlen++;
2089
2090 return (xfer_len);
2091 }
2092
2093 int
idm_sorecv_nonscsidata(idm_conn_t * ic,idm_pdu_t * pdu)2094 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
2095 {
2096 pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
2097 ASSERT(pdu->isp_data != NULL);
2098
2099 pdu->isp_databuflen = pdu->isp_datalen;
2100 pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
2101 pdu->isp_iov[0].iov_len = pdu->isp_datalen;
2102 pdu->isp_iovlen = 1;
2103 /*
2104 * Since we are associating a new data buffer with this received
2105 * PDU we need to set a specific callback to free the data
2106 * after the PDU is processed.
2107 */
2108 pdu->isp_flags |= IDM_PDU_ADDL_DATA;
2109 pdu->isp_callback = idm_sorx_addl_pdu_cb;
2110
2111 return (idm_sorecvdata(ic, pdu));
2112 }
2113
2114 void
idm_sorx_thread(void * arg)2115 idm_sorx_thread(void *arg)
2116 {
2117 boolean_t conn_failure = B_FALSE;
2118 idm_conn_t *ic = (idm_conn_t *)arg;
2119 idm_so_conn_t *so_conn;
2120 idm_pdu_t *pdu;
2121 idm_status_t rc;
2122
2123 idm_conn_hold(ic);
2124
2125 mutex_enter(&ic->ic_mutex);
2126
2127 so_conn = ic->ic_transport_private;
2128 so_conn->ic_rx_thread_running = B_TRUE;
2129 so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
2130 cv_signal(&ic->ic_cv);
2131
2132 while (so_conn->ic_rx_thread_running) {
2133 mutex_exit(&ic->ic_mutex);
2134
2135 /*
2136 * Get PDU with default header size (large enough for
2137 * BHS plus any anticipated AHS). PDU from
2138 * the cache will have all values set correctly
2139 * for sockets RX including callback.
2140 */
2141 pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
2142 pdu->isp_ic = ic;
2143 pdu->isp_flags = 0;
2144 pdu->isp_transport_hdrlen = 0;
2145
2146 if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
2147 /*
2148 * Call idm_pdu_complete so that we call the callback
2149 * and ensure any memory allocated in idm_sorecvhdr
2150 * gets freed up.
2151 */
2152 idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2153
2154 /*
2155 * If ic_rx_thread_running is still set then
2156 * this is some kind of connection problem
2157 * on the socket. In this case we want to
2158 * generate an event. Otherwise some other
2159 * thread closed the socket due to another
2160 * issue in which case we don't need to
2161 * generate an event.
2162 */
2163 mutex_enter(&ic->ic_mutex);
2164 if (so_conn->ic_rx_thread_running) {
2165 conn_failure = B_TRUE;
2166 so_conn->ic_rx_thread_running = B_FALSE;
2167 }
2168
2169 continue;
2170 }
2171
2172 /*
2173 * Header has been read and validated. Now we need
2174 * to read the PDU data payload (if present). SCSI data
2175 * need to be transferred from the socket directly into
2176 * the associated transfer buffer for the SCSI task.
2177 */
2178 if (pdu->isp_datalen != 0) {
2179 if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
2180 (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
2181 rc = idm_sorecv_scsidata(ic, pdu);
2182 /*
2183 * All SCSI errors are fatal to the
2184 * connection right now since we have no
2185 * place to put the data. What we need
2186 * is some kind of sink to dispose of unwanted
2187 * SCSI data. For example an invalid task tag
2188 * should not kill the connection (although
2189 * we may want to drop the connection).
2190 */
2191 } else {
2192 /*
2193 * Not data PDUs so allocate a buffer for the
2194 * data segment and read the remaining data.
2195 */
2196 rc = idm_sorecv_nonscsidata(ic, pdu);
2197 }
2198 if (rc != 0) {
2199 /*
2200 * Call idm_pdu_complete so that we call the
2201 * callback and ensure any memory allocated
2202 * in idm_sorecvhdr gets freed up.
2203 */
2204 idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2205
2206 /*
2207 * If ic_rx_thread_running is still set then
2208 * this is some kind of connection problem
2209 * on the socket. In this case we want to
2210 * generate an event. Otherwise some other
2211 * thread closed the socket due to another
2212 * issue in which case we don't need to
2213 * generate an event.
2214 */
2215 mutex_enter(&ic->ic_mutex);
2216 if (so_conn->ic_rx_thread_running) {
2217 conn_failure = B_TRUE;
2218 so_conn->ic_rx_thread_running = B_FALSE;
2219 }
2220 continue;
2221 }
2222 }
2223
2224 /*
2225 * Process RX PDU
2226 */
2227 idm_pdu_rx(ic, pdu);
2228
2229 mutex_enter(&ic->ic_mutex);
2230 }
2231
2232 mutex_exit(&ic->ic_mutex);
2233
2234 /*
2235 * If we dropped out of the RX processing loop because of
2236 * a socket problem or other connection failure (including
2237 * digest errors) then we need to generate a state machine
2238 * event to shut the connection down.
2239 * If the state machine is already in, for example, INIT_ERROR, this
2240 * event will get dropped, and the TX thread will never be notified
2241 * to shut down. To be safe, we'll just notify it here.
2242 */
2243 if (conn_failure) {
2244 if (so_conn->ic_tx_thread_running) {
2245 so_conn->ic_tx_thread_running = B_FALSE;
2246 mutex_enter(&so_conn->ic_tx_mutex);
2247 cv_signal(&so_conn->ic_tx_cv);
2248 mutex_exit(&so_conn->ic_tx_mutex);
2249 }
2250
2251 idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
2252 }
2253
2254 idm_conn_rele(ic);
2255
2256 thread_exit();
2257 }
2258
2259 /*
2260 * idm_so_tx
2261 *
2262 * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
2263 * point. By definition, it is supposed to be fast. So, simply queue
2264 * the entry and return. The real work is done by idm_i_so_tx() via
2265 * idm_sotx_thread().
2266 */
2267
2268 static void
idm_so_tx(idm_conn_t * ic,idm_pdu_t * pdu)2269 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
2270 {
2271 idm_so_conn_t *so_conn = ic->ic_transport_private;
2272
2273 ASSERT(pdu->isp_ic == ic);
2274 mutex_enter(&so_conn->ic_tx_mutex);
2275
2276 if (!so_conn->ic_tx_thread_running) {
2277 mutex_exit(&so_conn->ic_tx_mutex);
2278 idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
2279 return;
2280 }
2281
2282 list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
2283 cv_signal(&so_conn->ic_tx_cv);
2284 mutex_exit(&so_conn->ic_tx_mutex);
2285 }
2286
2287 static idm_status_t
idm_i_so_tx(idm_pdu_t * pdu)2288 idm_i_so_tx(idm_pdu_t *pdu)
2289 {
2290 idm_conn_t *ic = pdu->isp_ic;
2291 idm_status_t status = IDM_STATUS_SUCCESS;
2292 uint8_t pad[ISCSI_PAD_WORD_LEN];
2293 int pad_len;
2294 uint32_t hdr_digest_crc;
2295 uint32_t data_digest_crc = 0;
2296 int total_len = 0;
2297 int iovlen = 0;
2298 struct iovec iov[6];
2299 idm_so_conn_t *so_conn;
2300
2301 so_conn = ic->ic_transport_private;
2302
2303 /* Setup BHS */
2304 iov[iovlen].iov_base = (caddr_t)pdu->isp_hdr;
2305 iov[iovlen].iov_len = pdu->isp_hdrlen;
2306 total_len += iov[iovlen].iov_len;
2307 iovlen++;
2308
2309 /* Setup header digest */
2310 if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2311 (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
2312 hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
2313
2314 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
2315 iov[iovlen].iov_len = sizeof (hdr_digest_crc);
2316 total_len += iov[iovlen].iov_len;
2317 iovlen++;
2318 }
2319
2320 /* Setup the data */
2321 if (pdu->isp_datalen) {
2322 idm_task_t *idt;
2323 idm_buf_t *idb;
2324 iscsi_data_hdr_t *ihp;
2325 ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
2326 /* Write of immediate data */
2327 if (ic->ic_ffp &&
2328 (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_CMD ||
2329 IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA)) {
2330 idt = idm_task_find(ic, ihp->itt, ihp->ttt);
2331 if (idt) {
2332 mutex_enter(&idt->idt_mutex);
2333 idb = idm_buf_find(&idt->idt_outbufv, 0);
2334 mutex_exit(&idt->idt_mutex);
2335 /*
2336 * If the initiator call to idm_buf_alloc
2337 * failed then we can get to this point
2338 * without a bound buffer. The associated
2339 * connection failure will clean things up
2340 * later. It would be nice to come up with
2341 * a cleaner way to handle this. In
2342 * particular it seems absurd to look up
2343 * the task and the buffer just to update
2344 * this counter.
2345 */
2346 if (idb)
2347 idb->idb_xfer_len += pdu->isp_datalen;
2348 idm_task_rele(idt);
2349 }
2350 }
2351
2352 iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
2353 iov[iovlen].iov_len = pdu->isp_datalen;
2354 total_len += iov[iovlen].iov_len;
2355 iovlen++;
2356 }
2357
2358 /* Setup the data pad if necessary */
2359 pad_len = ((ISCSI_PAD_WORD_LEN -
2360 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
2361 (ISCSI_PAD_WORD_LEN - 1));
2362
2363 if (pad_len) {
2364 bzero(pad, sizeof (pad));
2365 iov[iovlen].iov_base = (void *)&pad;
2366 iov[iovlen].iov_len = pad_len;
2367 total_len += iov[iovlen].iov_len;
2368 iovlen++;
2369 }
2370
2371 /*
2372 * Setup the data digest if enabled. Data-digest is not sent
2373 * for login-phase PDUs.
2374 */
2375 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
2376 ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2377 (pdu->isp_datalen || pad_len)) {
2378 /*
2379 * RFC3720/10.2.3: A zero-length Data Segment also
2380 * implies a zero-length data digest.
2381 */
2382 if (pdu->isp_datalen) {
2383 data_digest_crc = idm_crc32c(pdu->isp_data,
2384 pdu->isp_datalen);
2385 }
2386 if (pad_len) {
2387 data_digest_crc = idm_crc32c_continued(&pad,
2388 pad_len, data_digest_crc);
2389 }
2390
2391 iov[iovlen].iov_base = (caddr_t)&data_digest_crc;
2392 iov[iovlen].iov_len = sizeof (data_digest_crc);
2393 total_len += iov[iovlen].iov_len;
2394 iovlen++;
2395 }
2396
2397 /* Transmit the PDU */
2398 if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
2399 total_len) != 0) {
2400 /* Set error status */
2401 IDM_CONN_LOG(CE_WARN,
2402 "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
2403 "data: %p", (void *) so_conn->ic_so, (void *) ic,
2404 (void *) pdu->isp_data);
2405 status = IDM_STATUS_IO;
2406 }
2407
2408 /*
2409 * Success does not mean that the PDU actually reached the
2410 * remote node since it could get dropped along the way.
2411 */
2412 idm_pdu_complete(pdu, status);
2413
2414 return (status);
2415 }
2416
2417 /*
2418 * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
2419 * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
2420 * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
2421 * A target can invoke this function multiple times for a single read command
2422 * (identified by the same ITT) to split the input into several sequences.
2423 *
2424 * DataSN starts with 0 for the first data PDU of an input command and advances
2425 * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
2426 * which is set to 1 for the last data PDU of a sequence.
2427 * If the initiator supports phase collapse, the status bit must be set along
2428 * with the F bit to indicate that the status is shipped together with the last
2429 * Data-In PDU.
2430 *
2431 * The data PDUs within a sequence will be sent in order with the buffer offset
2432 * in increasing order. i.e. initiator and target must have negotiated the
2433 * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
2434 *
2435 * Caller holds idt->idt_mutex
2436 */
2437 static idm_status_t
idm_so_buf_tx_to_ini(idm_task_t * idt,idm_buf_t * idb)2438 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
2439 {
2440 idm_so_conn_t *so_conn = idb->idb_ic->ic_transport_private;
2441 idm_pdu_t tmppdu;
2442
2443 ASSERT(mutex_owned(&idt->idt_mutex));
2444
2445 /*
2446 * Put the idm_buf_t on the tx queue. It will be transmitted by
2447 * idm_sotx_thread.
2448 */
2449 mutex_enter(&so_conn->ic_tx_mutex);
2450
2451 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2452 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2453 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2454 uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI);
2455
2456 if (!so_conn->ic_tx_thread_running) {
2457 mutex_exit(&so_conn->ic_tx_mutex);
2458 /*
2459 * Don't release idt->idt_mutex since we're supposed to hold
2460 * in when calling idm_buf_tx_to_ini_done
2461 */
2462 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
2463 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2464 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2465 uint32_t, idb->idb_xfer_len,
2466 int, XFER_BUF_TX_TO_INI);
2467 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2468 return (IDM_STATUS_FAIL);
2469 }
2470
2471 /*
2472 * Build a template for the data PDU headers we will use so that
2473 * the SN values will stay consistent with other PDU's we are
2474 * transmitting like R2T and SCSI status.
2475 */
2476 bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2477 tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
2478 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2479 ISCSI_OP_SCSI_DATA_RSP);
2480 idb->idb_tx_thread = B_TRUE;
2481 list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
2482 cv_signal(&so_conn->ic_tx_cv);
2483 mutex_exit(&so_conn->ic_tx_mutex);
2484 mutex_exit(&idt->idt_mutex);
2485
2486 /*
2487 * Returning success here indicates the transfer was successfully
2488 * dispatched -- it does not mean that the transfer completed
2489 * successfully.
2490 */
2491 return (IDM_STATUS_SUCCESS);
2492 }
2493
2494 /*
2495 * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
2496 * data blocks it is ready to receive from the initiator in response to a WRITE
2497 * SCSI command. The target iSCSI layer passes the information about the desired
2498 * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
2499 * offset and datalen are passed via the 'idb' argument.
2500 *
2501 * Scope for Prototype build:
2502 * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
2503 * negotiated the "InitialR2T" to "Yes".
2504 *
2505 * Caller holds idt->idt_mutex
2506 */
2507 static idm_status_t
idm_so_buf_rx_from_ini(idm_task_t * idt,idm_buf_t * idb)2508 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
2509 {
2510 idm_pdu_t *pdu;
2511 iscsi_rtt_hdr_t *rtt;
2512
2513 ASSERT(mutex_owned(&idt->idt_mutex));
2514
2515 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2516 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2517 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2518 uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI);
2519
2520 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2521 pdu->isp_ic = idt->idt_ic;
2522 pdu->isp_flags = IDM_PDU_SET_STATSN;
2523 bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
2524
2525 /* iSCSI layer fills the TTT, ITT, ExpCmdSN, MaxCmdSN */
2526 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
2527
2528 /* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
2529 rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
2530
2531 rtt->opcode = ISCSI_OP_RTT_RSP;
2532 rtt->flags = ISCSI_FLAG_FINAL;
2533 rtt->data_offset = htonl(idb->idb_bufoffset);
2534 rtt->data_length = htonl(idb->idb_xfer_len);
2535 rtt->rttsn = htonl(idt->idt_exp_rttsn++);
2536
2537 /* Keep track of buffer offsets */
2538 idb->idb_exp_offset = idb->idb_bufoffset;
2539 mutex_exit(&idt->idt_mutex);
2540
2541 /*
2542 * Transmit the PDU.
2543 */
2544 idm_pdu_tx(pdu);
2545
2546 return (IDM_STATUS_SUCCESS);
2547 }
2548
2549 static idm_status_t
idm_so_buf_alloc(idm_buf_t * idb,uint64_t buflen)2550 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
2551 {
2552 if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) {
2553 idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache,
2554 KM_NOSLEEP);
2555 idb->idb_buf_private = idm.idm_so_128k_buf_cache;
2556 } else {
2557 idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
2558 idb->idb_buf_private = NULL;
2559 }
2560
2561 if (idb->idb_buf == NULL) {
2562 IDM_CONN_LOG(CE_NOTE,
2563 "idm_so_buf_alloc: failed buffer allocation");
2564 return (IDM_STATUS_FAIL);
2565 }
2566
2567 return (IDM_STATUS_SUCCESS);
2568 }
2569
2570 /* ARGSUSED */
2571 static idm_status_t
idm_so_buf_setup(idm_buf_t * idb)2572 idm_so_buf_setup(idm_buf_t *idb)
2573 {
2574 /* Ensure bufalloc'd flag is unset */
2575 idb->idb_bufalloc = B_FALSE;
2576
2577 return (IDM_STATUS_SUCCESS);
2578 }
2579
2580 /* ARGSUSED */
2581 static void
idm_so_buf_teardown(idm_buf_t * idb)2582 idm_so_buf_teardown(idm_buf_t *idb)
2583 {
2584 /* nothing to do here */
2585 }
2586
2587 static void
idm_so_buf_free(idm_buf_t * idb)2588 idm_so_buf_free(idm_buf_t *idb)
2589 {
2590 if (idb->idb_buf_private == NULL) {
2591 kmem_free(idb->idb_buf, idb->idb_buflen);
2592 } else {
2593 kmem_cache_free(idb->idb_buf_private, idb->idb_buf);
2594 }
2595 }
2596
2597 static void
idm_so_send_rtt_data(idm_conn_t * ic,idm_task_t * idt,idm_buf_t * idb,uint32_t offset,uint32_t length)2598 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb,
2599 uint32_t offset, uint32_t length)
2600 {
2601 idm_so_conn_t *so_conn = ic->ic_transport_private;
2602 idm_pdu_t tmppdu;
2603 idm_buf_t *rtt_buf;
2604
2605 ASSERT(mutex_owned(&idt->idt_mutex));
2606
2607 /*
2608 * Allocate a buffer to represent the RTT transfer. We could further
2609 * optimize this by allocating the buffers internally from an rtt
2610 * specific buffer cache since this is socket-specific code but for
2611 * now we will keep it simple.
2612 */
2613 rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length);
2614 if (rtt_buf == NULL) {
2615 /*
2616 * If we're in FFP then the failure was likely a resource
2617 * allocation issue and we should close the connection by
2618 * sending a CE_TRANSPORT_FAIL event.
2619 *
2620 * If we're not in FFP then idm_buf_alloc will always
2621 * fail and the state is transitioning to "complete" anyway
2622 * so we won't bother to send an event.
2623 */
2624 mutex_enter(&ic->ic_state_mutex);
2625 if (ic->ic_ffp)
2626 idm_conn_event_locked(ic, CE_TRANSPORT_FAIL,
2627 (uintptr_t)NULL, CT_NONE);
2628 mutex_exit(&ic->ic_state_mutex);
2629 mutex_exit(&idt->idt_mutex);
2630 return;
2631 }
2632
2633 rtt_buf->idb_buf_cb = NULL;
2634 rtt_buf->idb_cb_arg = NULL;
2635 rtt_buf->idb_bufoffset = offset;
2636 rtt_buf->idb_xfer_len = length;
2637 rtt_buf->idb_ic = idt->idt_ic;
2638 rtt_buf->idb_task_binding = idt;
2639
2640 /*
2641 * The new buffer (if any) represents an additional
2642 * reference on the task
2643 */
2644 idm_task_hold(idt);
2645 mutex_exit(&idt->idt_mutex);
2646
2647 /*
2648 * Put the idm_buf_t on the tx queue. It will be transmitted by
2649 * idm_sotx_thread.
2650 */
2651 mutex_enter(&so_conn->ic_tx_mutex);
2652
2653 if (!so_conn->ic_tx_thread_running) {
2654 idm_buf_free(rtt_buf);
2655 mutex_exit(&so_conn->ic_tx_mutex);
2656 idm_task_rele(idt);
2657 return;
2658 }
2659
2660 /*
2661 * Build a template for the data PDU headers we will use so that
2662 * the SN values will stay consistent with other PDU's we are
2663 * transmitting like R2T and SCSI status.
2664 */
2665 bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2666 tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl;
2667 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2668 ISCSI_OP_SCSI_DATA);
2669 rtt_buf->idb_tx_thread = B_TRUE;
2670 rtt_buf->idb_in_transport = B_TRUE;
2671 list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf);
2672 cv_signal(&so_conn->ic_tx_cv);
2673 mutex_exit(&so_conn->ic_tx_mutex);
2674 }
2675
2676 static void
idm_so_send_rtt_data_done(idm_task_t * idt,idm_buf_t * idb)2677 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb)
2678 {
2679 /*
2680 * Don't worry about status -- we assume any error handling
2681 * is performed by the caller (idm_sotx_thread).
2682 */
2683 idb->idb_in_transport = B_FALSE;
2684 idm_task_rele(idt);
2685 idm_buf_free(idb);
2686 }
2687
2688 static idm_status_t
idm_so_send_buf_region(idm_task_t * idt,idm_buf_t * idb,uint32_t buf_region_offset,uint32_t buf_region_length)2689 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb,
2690 uint32_t buf_region_offset, uint32_t buf_region_length)
2691 {
2692 idm_conn_t *ic;
2693 uint32_t max_dataseglen;
2694 size_t remainder, chunk;
2695 uint32_t data_offset = buf_region_offset;
2696 iscsi_data_hdr_t *bhs;
2697 idm_pdu_t *pdu;
2698 idm_status_t tx_status;
2699
2700 ASSERT(mutex_owned(&idt->idt_mutex));
2701
2702 ic = idt->idt_ic;
2703
2704 max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen;
2705 remainder = buf_region_length;
2706
2707 while (remainder) {
2708 if (idt->idt_state != TASK_ACTIVE) {
2709 ASSERT((idt->idt_state != TASK_IDLE) &&
2710 (idt->idt_state != TASK_COMPLETE));
2711 return (IDM_STATUS_ABORTED);
2712 }
2713
2714 /* check to see if we need to chunk the data */
2715 if (remainder > max_dataseglen) {
2716 chunk = max_dataseglen;
2717 } else {
2718 chunk = remainder;
2719 }
2720
2721 /* Data PDU headers will always be sizeof (iscsi_hdr_t) */
2722 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2723 pdu->isp_ic = ic;
2724 pdu->isp_flags = 0; /* initialize isp_flags */
2725
2726 /*
2727 * We've already built a build a header template
2728 * to use during the transfer. Use this template so that
2729 * the SN values stay consistent with any unrelated PDU's
2730 * being transmitted.
2731 */
2732 bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
2733 sizeof (iscsi_hdr_t));
2734
2735 /*
2736 * Set DataSN, data offset, and flags in BHS
2737 * For the prototype build, A = 0, S = 0, U = 0
2738 */
2739 bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
2740
2741 bhs->datasn = htonl(idt->idt_exp_datasn++);
2742
2743 hton24(bhs->dlength, chunk);
2744 bhs->offset = htonl(idb->idb_bufoffset + data_offset);
2745
2746 /* setup data */
2747 pdu->isp_data = (uint8_t *)idb->idb_buf + data_offset;
2748 pdu->isp_datalen = (uint_t)chunk;
2749
2750 if (chunk == remainder) {
2751 bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
2752 /* Piggyback the status with the last data PDU */
2753 if (idt->idt_flags & IDM_TASK_PHASECOLLAPSE_REQ) {
2754 pdu->isp_flags |= IDM_PDU_SET_STATSN |
2755 IDM_PDU_ADVANCE_STATSN;
2756 (*idt->idt_ic->ic_conn_ops.icb_update_statsn)
2757 (idt, pdu);
2758 idt->idt_flags |=
2759 IDM_TASK_PHASECOLLAPSE_SUCCESS;
2760
2761 }
2762 }
2763
2764 remainder -= chunk;
2765 data_offset += chunk;
2766
2767 /* Instrument the data-send DTrace probe. */
2768 if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) {
2769 DTRACE_ISCSI_2(data__send,
2770 idm_conn_t *, idt->idt_ic,
2771 iscsi_data_rsp_hdr_t *,
2772 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
2773 }
2774
2775 /*
2776 * Now that we're done working with idt_exp_datasn,
2777 * idt->idt_state and idb->idb_bufoffset we can release
2778 * the task lock -- don't want to hold it across the
2779 * call to idm_i_so_tx since we could block.
2780 */
2781 mutex_exit(&idt->idt_mutex);
2782
2783 /*
2784 * Transmit the PDU. Call the internal routine directly
2785 * as there is already implicit ordering.
2786 */
2787 if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) {
2788 mutex_enter(&idt->idt_mutex);
2789 return (tx_status);
2790 }
2791
2792 mutex_enter(&idt->idt_mutex);
2793 idt->idt_tx_bytes += chunk;
2794 }
2795
2796 return (IDM_STATUS_SUCCESS);
2797 }
2798
2799 /*
2800 * TX PDU cache
2801 */
2802 /* ARGSUSED */
2803 int
idm_sotx_pdu_constructor(void * hdl,void * arg,int flags)2804 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
2805 {
2806 idm_pdu_t *pdu = hdl;
2807
2808 bzero(pdu, sizeof (idm_pdu_t));
2809 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2810 pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2811 pdu->isp_callback = idm_sotx_cache_pdu_cb;
2812 pdu->isp_magic = IDM_PDU_MAGIC;
2813 bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2814
2815 return (0);
2816 }
2817
2818 /* ARGSUSED */
2819 void
idm_sotx_cache_pdu_cb(idm_pdu_t * pdu,idm_status_t status)2820 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2821 {
2822 /* reset values between use */
2823 pdu->isp_datalen = 0;
2824
2825 kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
2826 }
2827
2828 /*
2829 * RX PDU cache
2830 */
2831 /* ARGSUSED */
2832 int
idm_sorx_pdu_constructor(void * hdl,void * arg,int flags)2833 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
2834 {
2835 idm_pdu_t *pdu = hdl;
2836
2837 bzero(pdu, sizeof (idm_pdu_t));
2838 pdu->isp_magic = IDM_PDU_MAGIC;
2839 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2840 pdu->isp_callback = idm_sorx_cache_pdu_cb;
2841
2842 return (0);
2843 }
2844
2845 /* ARGSUSED */
2846 static void
idm_sorx_cache_pdu_cb(idm_pdu_t * pdu,idm_status_t status)2847 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2848 {
2849 pdu->isp_iovlen = 0;
2850 pdu->isp_sorx_buf = 0;
2851 kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
2852 }
2853
2854 static void
idm_sorx_addl_pdu_cb(idm_pdu_t * pdu,idm_status_t status)2855 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2856 {
2857 /*
2858 * We had to modify our cached RX PDU with a longer header buffer
2859 * and/or a longer data buffer. Release the new buffers and fix
2860 * the fields back to what we would expect for a cached RX PDU.
2861 */
2862 if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
2863 kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
2864 }
2865 if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
2866 kmem_free(pdu->isp_data, pdu->isp_datalen);
2867 }
2868 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
2869 pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2870 pdu->isp_data = NULL;
2871 pdu->isp_datalen = 0;
2872 pdu->isp_sorx_buf = 0;
2873 pdu->isp_callback = idm_sorx_cache_pdu_cb;
2874 idm_sorx_cache_pdu_cb(pdu, status);
2875 }
2876
2877 /*
2878 * This thread is only active when I/O is queued for transmit
2879 * because the socket is busy.
2880 */
2881 void
idm_sotx_thread(void * arg)2882 idm_sotx_thread(void *arg)
2883 {
2884 idm_conn_t *ic = arg;
2885 idm_tx_obj_t *object, *next;
2886 idm_so_conn_t *so_conn;
2887 idm_status_t status = IDM_STATUS_SUCCESS;
2888
2889 idm_conn_hold(ic);
2890
2891 mutex_enter(&ic->ic_mutex);
2892 so_conn = ic->ic_transport_private;
2893 so_conn->ic_tx_thread_running = B_TRUE;
2894 so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
2895 cv_signal(&ic->ic_cv);
2896 mutex_exit(&ic->ic_mutex);
2897
2898 mutex_enter(&so_conn->ic_tx_mutex);
2899
2900 while (so_conn->ic_tx_thread_running) {
2901 while (list_is_empty(&so_conn->ic_tx_list)) {
2902 DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
2903 cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
2904 DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
2905
2906 if (!so_conn->ic_tx_thread_running) {
2907 goto tx_bail;
2908 }
2909 }
2910
2911 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2912 list_remove(&so_conn->ic_tx_list, object);
2913 mutex_exit(&so_conn->ic_tx_mutex);
2914
2915 switch (object->idm_tx_obj_magic) {
2916 case IDM_PDU_MAGIC: {
2917 idm_pdu_t *pdu = (idm_pdu_t *)object;
2918 DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
2919 idm_pdu_t *, (idm_pdu_t *)object);
2920
2921 if (pdu->isp_flags & IDM_PDU_SET_STATSN) {
2922 /* No IDM task */
2923 (ic->ic_conn_ops.icb_update_statsn)(NULL, pdu);
2924 }
2925 status = idm_i_so_tx((idm_pdu_t *)object);
2926 break;
2927 }
2928 case IDM_BUF_MAGIC: {
2929 idm_buf_t *idb = (idm_buf_t *)object;
2930 idm_task_t *idt = idb->idb_task_binding;
2931
2932 DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
2933 idm_buf_t *, idb);
2934
2935 mutex_enter(&idt->idt_mutex);
2936 status = idm_so_send_buf_region(idt,
2937 idb, 0, idb->idb_xfer_len);
2938
2939 /*
2940 * TX thread owns the buffer so we expect it to
2941 * be "in transport"
2942 */
2943 ASSERT(idb->idb_in_transport);
2944 if (IDM_CONN_ISTGT(ic)) {
2945 /*
2946 * idm_buf_tx_to_ini_done releases
2947 * idt->idt_mutex
2948 */
2949 DTRACE_ISCSI_8(xfer__done,
2950 idm_conn_t *, idt->idt_ic,
2951 uintptr_t, idb->idb_buf,
2952 uint32_t, idb->idb_bufoffset,
2953 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2954 uint32_t, idb->idb_xfer_len,
2955 int, XFER_BUF_TX_TO_INI);
2956 idm_buf_tx_to_ini_done(idt, idb, status);
2957 } else {
2958 idm_so_send_rtt_data_done(idt, idb);
2959 mutex_exit(&idt->idt_mutex);
2960 }
2961 break;
2962 }
2963
2964 default:
2965 IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
2966 "(0x%08x)", object->idm_tx_obj_magic);
2967 status = IDM_STATUS_FAIL;
2968 }
2969
2970 mutex_enter(&so_conn->ic_tx_mutex);
2971
2972 if (status != IDM_STATUS_SUCCESS) {
2973 so_conn->ic_tx_thread_running = B_FALSE;
2974 idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
2975 }
2976 }
2977
2978 /*
2979 * Before we leave, we need to abort every item remaining in the
2980 * TX list.
2981 */
2982
2983 tx_bail:
2984 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2985
2986 while (object != NULL) {
2987 next = list_next(&so_conn->ic_tx_list, object);
2988
2989 list_remove(&so_conn->ic_tx_list, object);
2990 switch (object->idm_tx_obj_magic) {
2991 case IDM_PDU_MAGIC:
2992 idm_pdu_complete((idm_pdu_t *)object,
2993 IDM_STATUS_ABORTED);
2994 break;
2995
2996 case IDM_BUF_MAGIC: {
2997 idm_buf_t *idb = (idm_buf_t *)object;
2998 idm_task_t *idt = idb->idb_task_binding;
2999 mutex_exit(&so_conn->ic_tx_mutex);
3000 mutex_enter(&idt->idt_mutex);
3001 /*
3002 * TX thread owns the buffer so we expect it to
3003 * be "in transport"
3004 */
3005 ASSERT(idb->idb_in_transport);
3006 if (IDM_CONN_ISTGT(ic)) {
3007 /*
3008 * idm_buf_tx_to_ini_done releases
3009 * idt->idt_mutex
3010 */
3011 DTRACE_ISCSI_8(xfer__done,
3012 idm_conn_t *, idt->idt_ic,
3013 uintptr_t, idb->idb_buf,
3014 uint32_t, idb->idb_bufoffset,
3015 uint64_t, 0, uint32_t, 0, uint32_t, 0,
3016 uint32_t, idb->idb_xfer_len,
3017 int, XFER_BUF_TX_TO_INI);
3018 idm_buf_tx_to_ini_done(idt, idb,
3019 IDM_STATUS_ABORTED);
3020 } else {
3021 idm_so_send_rtt_data_done(idt, idb);
3022 mutex_exit(&idt->idt_mutex);
3023 }
3024 mutex_enter(&so_conn->ic_tx_mutex);
3025 break;
3026 }
3027 default:
3028 IDM_CONN_LOG(CE_WARN,
3029 "idm_sotx_thread: Unexpected magic "
3030 "(0x%08x)", object->idm_tx_obj_magic);
3031 }
3032
3033 object = next;
3034 }
3035
3036 mutex_exit(&so_conn->ic_tx_mutex);
3037 idm_conn_rele(ic);
3038 thread_exit();
3039 /*NOTREACHED*/
3040 }
3041
3042 static void
idm_so_socket_set_nonblock(struct sonode * node)3043 idm_so_socket_set_nonblock(struct sonode *node)
3044 {
3045 (void) VOP_SETFL(node->so_vnode, node->so_flag,
3046 (node->so_state | FNONBLOCK), CRED(), NULL);
3047 }
3048
3049 static void
idm_so_socket_set_block(struct sonode * node)3050 idm_so_socket_set_block(struct sonode *node)
3051 {
3052 (void) VOP_SETFL(node->so_vnode, node->so_flag,
3053 (node->so_state & (~FNONBLOCK)), CRED(), NULL);
3054 }
3055
3056
3057 /*
3058 * Called by kernel sockets when the connection has been accepted or
3059 * rejected. In early volo, a "disconnect" callback was sent instead of
3060 * "connectfailed", so we check for both.
3061 */
3062 /* ARGSUSED */
3063 void
idm_so_timed_socket_connect_cb(ksocket_t ks,ksocket_callback_event_t ev,void * arg,uintptr_t info)3064 idm_so_timed_socket_connect_cb(ksocket_t ks,
3065 ksocket_callback_event_t ev, void *arg, uintptr_t info)
3066 {
3067 idm_so_timed_socket_t *itp = arg;
3068 ASSERT(itp != NULL);
3069 ASSERT(ev == KSOCKET_EV_CONNECTED ||
3070 ev == KSOCKET_EV_CONNECTFAILED ||
3071 ev == KSOCKET_EV_DISCONNECTED);
3072
3073 mutex_enter(&idm_so_timed_socket_mutex);
3074 itp->it_callback_called = B_TRUE;
3075 if (ev == KSOCKET_EV_CONNECTED) {
3076 itp->it_socket_error_code = 0;
3077 } else {
3078 /* Make sure the error code is non-zero on error */
3079 if (info == 0)
3080 info = ECONNRESET;
3081 itp->it_socket_error_code = (int)info;
3082 }
3083 cv_signal(&itp->it_cv);
3084 mutex_exit(&idm_so_timed_socket_mutex);
3085 }
3086
3087 int
idm_so_timed_socket_connect(ksocket_t ks,struct sockaddr_storage * sa,int sa_sz,int login_max_usec)3088 idm_so_timed_socket_connect(ksocket_t ks,
3089 struct sockaddr_storage *sa, int sa_sz, int login_max_usec)
3090 {
3091 clock_t conn_login_max;
3092 int rc, nonblocking, rval;
3093 idm_so_timed_socket_t it;
3094 ksocket_callbacks_t ks_cb;
3095
3096 conn_login_max = ddi_get_lbolt() + drv_usectohz(login_max_usec);
3097
3098 /*
3099 * Set to non-block socket mode, with callback on connect
3100 * Early volo used "disconnected" instead of "connectfailed",
3101 * so set callback to look for both.
3102 */
3103 bzero(&it, sizeof (it));
3104 ks_cb.ksock_cb_flags = KSOCKET_CB_CONNECTED |
3105 KSOCKET_CB_CONNECTFAILED | KSOCKET_CB_DISCONNECTED;
3106 ks_cb.ksock_cb_connected = idm_so_timed_socket_connect_cb;
3107 ks_cb.ksock_cb_connectfailed = idm_so_timed_socket_connect_cb;
3108 ks_cb.ksock_cb_disconnected = idm_so_timed_socket_connect_cb;
3109 cv_init(&it.it_cv, NULL, CV_DEFAULT, NULL);
3110 rc = ksocket_setcallbacks(ks, &ks_cb, &it, CRED());
3111 if (rc != 0)
3112 return (rc);
3113
3114 /* Set to non-blocking mode */
3115 nonblocking = 1;
3116 rc = ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3117 CRED());
3118 if (rc != 0)
3119 goto cleanup;
3120
3121 bzero(&it, sizeof (it));
3122 for (;;) {
3123 /*
3124 * Warning -- in a loopback scenario, the call to
3125 * the connect_cb can occur inside the call to
3126 * ksocket_connect. Do not hold the mutex around the
3127 * call to ksocket_connect.
3128 */
3129 rc = ksocket_connect(ks, (struct sockaddr *)sa, sa_sz, CRED());
3130 if (rc == 0 || rc == EISCONN) {
3131 /* socket success or already success */
3132 rc = 0;
3133 break;
3134 }
3135 if ((rc != EINPROGRESS) && (rc != EALREADY)) {
3136 break;
3137 }
3138
3139 /* TCP connect still in progress. See if out of time. */
3140 if (ddi_get_lbolt() > conn_login_max) {
3141 /*
3142 * Connection retry timeout,
3143 * failed connect to target.
3144 */
3145 rc = ETIMEDOUT;
3146 break;
3147 }
3148
3149 /*
3150 * TCP connect still in progress. Sleep until callback.
3151 * Do NOT go to sleep if the callback already occurred!
3152 */
3153 mutex_enter(&idm_so_timed_socket_mutex);
3154 if (!it.it_callback_called) {
3155 (void) cv_timedwait(&it.it_cv,
3156 &idm_so_timed_socket_mutex, conn_login_max);
3157 }
3158 if (it.it_callback_called) {
3159 rc = it.it_socket_error_code;
3160 mutex_exit(&idm_so_timed_socket_mutex);
3161 break;
3162 }
3163 /* If timer expires, go call ksocket_connect one last time. */
3164 mutex_exit(&idm_so_timed_socket_mutex);
3165 }
3166
3167 /* resume blocking mode */
3168 nonblocking = 0;
3169 (void) ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3170 CRED());
3171 cleanup:
3172 (void) ksocket_setcallbacks(ks, NULL, NULL, CRED());
3173 cv_destroy(&it.it_cv);
3174 if (rc != 0) {
3175 idm_soshutdown(ks);
3176 }
3177 return (rc);
3178 }
3179
3180
3181 void
idm_addr_to_sa(idm_addr_t * dportal,struct sockaddr_storage * sa)3182 idm_addr_to_sa(idm_addr_t *dportal, struct sockaddr_storage *sa)
3183 {
3184 int dp_addr_size;
3185 struct sockaddr_in *sin;
3186 struct sockaddr_in6 *sin6;
3187
3188 /* Build sockaddr_storage for this portal (idm_addr_t) */
3189 bzero(sa, sizeof (*sa));
3190 dp_addr_size = dportal->a_addr.i_insize;
3191 if (dp_addr_size == sizeof (struct in_addr)) {
3192 /* IPv4 */
3193 sa->ss_family = AF_INET;
3194 sin = (struct sockaddr_in *)sa;
3195 sin->sin_port = htons(dportal->a_port);
3196 bcopy(&dportal->a_addr.i_addr.in4,
3197 &sin->sin_addr, sizeof (struct in_addr));
3198 } else if (dp_addr_size == sizeof (struct in6_addr)) {
3199 /* IPv6 */
3200 sa->ss_family = AF_INET6;
3201 sin6 = (struct sockaddr_in6 *)sa;
3202 sin6->sin6_port = htons(dportal->a_port);
3203 bcopy(&dportal->a_addr.i_addr.in6,
3204 &sin6->sin6_addr, sizeof (struct in6_addr));
3205 } else {
3206 ASSERT(0);
3207 }
3208 }
3209
3210
3211 /*
3212 * return a human-readable form of a sockaddr_storage, in the form
3213 * [ip-address]:port. This is used in calls to logging functions.
3214 * If several calls to idm_sa_ntop are made within the same invocation
3215 * of a logging function, then each one needs its own buf.
3216 */
3217 const char *
idm_sa_ntop(const struct sockaddr_storage * sa,char * buf,size_t size)3218 idm_sa_ntop(const struct sockaddr_storage *sa,
3219 char *buf, size_t size)
3220 {
3221 static const char bogus_ip[] = "[0].-1";
3222 char tmp[INET6_ADDRSTRLEN];
3223
3224 switch (sa->ss_family) {
3225 case AF_INET6: {
3226 const struct sockaddr_in6 *in6 =
3227 (const struct sockaddr_in6 *) sa;
3228
3229 (void) inet_ntop(in6->sin6_family, &in6->sin6_addr, tmp,
3230 sizeof (tmp));
3231 if (strlen(tmp) + sizeof ("[].65535") > size)
3232 goto err;
3233 /* struct sockaddr_storage gets port info from v4 loc */
3234 (void) snprintf(buf, size, "[%s].%u", tmp,
3235 ntohs(in6->sin6_port));
3236 return (buf);
3237 }
3238 case AF_INET: {
3239 const struct sockaddr_in *in = (const struct sockaddr_in *) sa;
3240
3241 (void) inet_ntop(in->sin_family, &in->sin_addr, tmp,
3242 sizeof (tmp));
3243 if (strlen(tmp) + sizeof ("[].65535") > size)
3244 goto err;
3245 (void) snprintf(buf, size, "[%s].%u", tmp,
3246 ntohs(in->sin_port));
3247 return (buf);
3248 }
3249 default:
3250 break;
3251 }
3252 err:
3253 (void) snprintf(buf, size, "%s", bogus_ip);
3254 return (buf);
3255 }
3256