1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2013 by Delphix. All rights reserved. 27 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 28 * Copyright (c) 2017, Joyent, Inc. All rights reserved. 29 */ 30 31 #include <sys/conf.h> 32 #include <sys/stat.h> 33 #include <sys/file.h> 34 #include <sys/ddi.h> 35 #include <sys/sunddi.h> 36 #include <sys/modctl.h> 37 #include <sys/priv.h> 38 #include <sys/cpuvar.h> 39 #include <sys/socket.h> 40 #include <sys/strsubr.h> 41 #include <sys/sysmacros.h> 42 #include <sys/sdt.h> 43 #include <netinet/tcp.h> 44 #include <inet/tcp.h> 45 #include <sys/socketvar.h> 46 #include <sys/pathname.h> 47 #include <sys/fs/snode.h> 48 #include <sys/fs/dv_node.h> 49 #include <sys/vnode.h> 50 #include <netinet/in.h> 51 #include <net/if.h> 52 #include <sys/sockio.h> 53 #include <sys/ksocket.h> 54 #include <sys/filio.h> /* FIONBIO */ 55 #include <sys/iscsi_protocol.h> 56 #include <sys/idm/idm.h> 57 #include <sys/idm/idm_so.h> 58 #include <sys/idm/idm_text.h> 59 60 #define IN_PROGRESS_DELAY 1 61 62 /* 63 * in6addr_any is currently all zeroes, but use the macro in case this 64 * ever changes. 65 */ 66 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; 67 68 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status); 69 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status); 70 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status); 71 72 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so); 73 static void idm_so_conn_destroy_common(idm_conn_t *ic); 74 static void idm_so_conn_connect_common(idm_conn_t *ic); 75 76 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc, 77 boolean_t boot_conn); 78 static void idm_set_postconnect_options(ksocket_t so); 79 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu); 80 81 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu); 82 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, 83 idm_buf_t *idb, uint32_t offset, uint32_t length); 84 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb); 85 static idm_status_t idm_so_send_buf_region(idm_task_t *idt, 86 idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length); 87 88 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, 89 uint32_t ro, uint32_t dlength); 90 91 static idm_status_t idm_so_handle_digest(idm_conn_t *it, 92 nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx); 93 94 static void idm_so_socket_set_nonblock(struct sonode *node); 95 static void idm_so_socket_set_block(struct sonode *node); 96 97 /* 98 * Transport ops prototypes 99 */ 100 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu); 101 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb); 102 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb); 103 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu); 104 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu); 105 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu); 106 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt); 107 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it, 108 nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl); 109 static void idm_so_notice_key_values(idm_conn_t *it, 110 nvlist_t *negotiated_nvl); 111 static kv_status_t idm_so_declare_key_values(idm_conn_t *it, 112 nvlist_t *config_nvl, nvlist_t *outgoing_nvl); 113 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic, 114 idm_transport_caps_t *caps); 115 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen); 116 static void idm_so_buf_free(idm_buf_t *idb); 117 static idm_status_t idm_so_buf_setup(idm_buf_t *idb); 118 static void idm_so_buf_teardown(idm_buf_t *idb); 119 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is); 120 static void idm_so_tgt_svc_destroy(idm_svc_t *is); 121 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is); 122 static void idm_so_tgt_svc_offline(idm_svc_t *is); 123 static void idm_so_tgt_conn_destroy(idm_conn_t *ic); 124 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic); 125 static void idm_so_conn_disconnect(idm_conn_t *ic); 126 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic); 127 static void idm_so_ini_conn_destroy(idm_conn_t *ic); 128 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic); 129 130 /* 131 * IDM Native Sockets transport operations 132 */ 133 static 134 idm_transport_ops_t idm_so_transport_ops = { 135 idm_so_tx, /* it_tx_pdu */ 136 idm_so_buf_tx_to_ini, /* it_buf_tx_to_ini */ 137 idm_so_buf_rx_from_ini, /* it_buf_rx_from_ini */ 138 idm_so_rx_datain, /* it_rx_datain */ 139 idm_so_rx_rtt, /* it_rx_rtt */ 140 idm_so_rx_dataout, /* it_rx_dataout */ 141 NULL, /* it_alloc_conn_rsrc */ 142 NULL, /* it_free_conn_rsrc */ 143 NULL, /* it_tgt_enable_datamover */ 144 NULL, /* it_ini_enable_datamover */ 145 NULL, /* it_conn_terminate */ 146 idm_so_free_task_rsrc, /* it_free_task_rsrc */ 147 idm_so_negotiate_key_values, /* it_negotiate_key_values */ 148 idm_so_notice_key_values, /* it_notice_key_values */ 149 idm_so_conn_is_capable, /* it_conn_is_capable */ 150 idm_so_buf_alloc, /* it_buf_alloc */ 151 idm_so_buf_free, /* it_buf_free */ 152 idm_so_buf_setup, /* it_buf_setup */ 153 idm_so_buf_teardown, /* it_buf_teardown */ 154 idm_so_tgt_svc_create, /* it_tgt_svc_create */ 155 idm_so_tgt_svc_destroy, /* it_tgt_svc_destroy */ 156 idm_so_tgt_svc_online, /* it_tgt_svc_online */ 157 idm_so_tgt_svc_offline, /* it_tgt_svc_offline */ 158 idm_so_tgt_conn_destroy, /* it_tgt_conn_destroy */ 159 idm_so_tgt_conn_connect, /* it_tgt_conn_connect */ 160 idm_so_conn_disconnect, /* it_tgt_conn_disconnect */ 161 idm_so_ini_conn_create, /* it_ini_conn_create */ 162 idm_so_ini_conn_destroy, /* it_ini_conn_destroy */ 163 idm_so_ini_conn_connect, /* it_ini_conn_connect */ 164 idm_so_conn_disconnect, /* it_ini_conn_disconnect */ 165 idm_so_declare_key_values /* it_declare_key_values */ 166 }; 167 168 kmutex_t idm_so_timed_socket_mutex; 169 170 int32_t idm_so_sndbuf = IDM_SNDBUF_SIZE; 171 int32_t idm_so_rcvbuf = IDM_RCVBUF_SIZE; 172 173 /* 174 * idm_so_init() 175 * Sockets transport initialization 176 */ 177 void 178 idm_so_init(idm_transport_t *it) 179 { 180 /* Cache for IDM Data and R2T Transmit PDU's */ 181 idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache", 182 sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8, 183 &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP); 184 185 /* Cache for IDM Receive PDU's */ 186 idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache", 187 sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8, 188 &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP); 189 190 /* 128k buffer cache */ 191 idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache", 192 IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP); 193 194 /* Set the sockets transport ops */ 195 it->it_ops = &idm_so_transport_ops; 196 197 mutex_init(&idm_so_timed_socket_mutex, NULL, MUTEX_DEFAULT, NULL); 198 199 } 200 201 /* 202 * idm_so_fini() 203 * Sockets transport teardown 204 */ 205 void 206 idm_so_fini(void) 207 { 208 kmem_cache_destroy(idm.idm_so_128k_buf_cache); 209 kmem_cache_destroy(idm.idm_sotx_pdu_cache); 210 kmem_cache_destroy(idm.idm_sorx_pdu_cache); 211 mutex_destroy(&idm_so_timed_socket_mutex); 212 } 213 214 ksocket_t 215 idm_socreate(int domain, int type, int protocol) 216 { 217 ksocket_t ks; 218 219 if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP, 220 CRED())) { 221 return (ks); 222 } else { 223 return (NULL); 224 } 225 } 226 227 /* 228 * idm_soshutdown will disconnect the socket and prevent subsequent PDU 229 * reception and transmission. The sonode still exists but its state 230 * gets modified to indicate it is no longer connected. Calls to 231 * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used 232 * regain control of a thread stuck in idm_sorecv. 233 */ 234 void 235 idm_soshutdown(ksocket_t so) 236 { 237 (void) ksocket_shutdown(so, SHUT_RDWR, CRED()); 238 } 239 240 /* 241 * idm_sodestroy releases all resources associated with a socket previously 242 * created with idm_socreate. The socket must be shutdown using 243 * idm_soshutdown before the socket is destroyed with idm_sodestroy, 244 * otherwise undefined behavior will result. 245 */ 246 void 247 idm_sodestroy(ksocket_t ks) 248 { 249 (void) ksocket_close(ks, CRED()); 250 } 251 252 /* 253 * Function to compare two addresses in sockaddr_storage format 254 */ 255 256 int 257 idm_ss_compare(const struct sockaddr_storage *cmp_ss1, 258 const struct sockaddr_storage *cmp_ss2, 259 boolean_t v4_mapped_as_v4, 260 boolean_t compare_ports) 261 { 262 struct sockaddr_storage mapped_v4_ss1, mapped_v4_ss2; 263 const struct sockaddr_storage *ss1, *ss2; 264 struct in_addr *in1, *in2; 265 struct in6_addr *in61, *in62; 266 int i; 267 268 /* 269 * Normalize V4-mapped IPv6 addresses into V4 format if 270 * v4_mapped_as_v4 is B_TRUE. 271 */ 272 ss1 = cmp_ss1; 273 ss2 = cmp_ss2; 274 if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) { 275 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr; 276 if (IN6_IS_ADDR_V4MAPPED(in61)) { 277 bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1)); 278 mapped_v4_ss1.ss_family = AF_INET; 279 ((struct sockaddr_in *)&mapped_v4_ss1)->sin_port = 280 ((struct sockaddr_in *)ss1)->sin_port; 281 IN6_V4MAPPED_TO_INADDR(in61, 282 &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr); 283 ss1 = &mapped_v4_ss1; 284 } 285 } 286 ss2 = cmp_ss2; 287 if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) { 288 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr; 289 if (IN6_IS_ADDR_V4MAPPED(in62)) { 290 bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2)); 291 mapped_v4_ss2.ss_family = AF_INET; 292 ((struct sockaddr_in *)&mapped_v4_ss2)->sin_port = 293 ((struct sockaddr_in *)ss2)->sin_port; 294 IN6_V4MAPPED_TO_INADDR(in62, 295 &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr); 296 ss2 = &mapped_v4_ss2; 297 } 298 } 299 300 /* 301 * Compare ports, then address family, then ip address 302 */ 303 if (compare_ports && 304 (((struct sockaddr_in *)ss1)->sin_port != 305 ((struct sockaddr_in *)ss2)->sin_port)) { 306 if (((struct sockaddr_in *)ss1)->sin_port > 307 ((struct sockaddr_in *)ss2)->sin_port) 308 return (1); 309 else 310 return (-1); 311 } 312 313 /* 314 * ports are the same 315 */ 316 if (ss1->ss_family != ss2->ss_family) { 317 if (ss1->ss_family == AF_INET) 318 return (1); 319 else 320 return (-1); 321 } 322 323 /* 324 * address families are the same 325 */ 326 if (ss1->ss_family == AF_INET) { 327 in1 = &((struct sockaddr_in *)ss1)->sin_addr; 328 in2 = &((struct sockaddr_in *)ss2)->sin_addr; 329 330 if (in1->s_addr > in2->s_addr) 331 return (1); 332 else if (in1->s_addr < in2->s_addr) 333 return (-1); 334 else 335 return (0); 336 } else if (ss1->ss_family == AF_INET6) { 337 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr; 338 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr; 339 340 for (i = 0; i < 4; i++) { 341 if (in61->s6_addr32[i] > in62->s6_addr32[i]) 342 return (1); 343 else if (in61->s6_addr32[i] < in62->s6_addr32[i]) 344 return (-1); 345 } 346 return (0); 347 } 348 349 return (1); 350 } 351 352 /* 353 * IP address filter functions to flag addresses that should not 354 * go out to initiators through discovery. 355 */ 356 static boolean_t 357 idm_v4_addr_okay(struct in_addr *in_addr) 358 { 359 in_addr_t addr = ntohl(in_addr->s_addr); 360 361 if ((INADDR_NONE == addr) || 362 (IN_MULTICAST(addr)) || 363 ((addr >> IN_CLASSA_NSHIFT) == 0) || 364 ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) { 365 return (B_FALSE); 366 } 367 return (B_TRUE); 368 } 369 370 static boolean_t 371 idm_v6_addr_okay(struct in6_addr *addr6) 372 { 373 374 if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) || 375 (IN6_IS_ADDR_LOOPBACK(addr6)) || 376 (IN6_IS_ADDR_MULTICAST(addr6)) || 377 (IN6_IS_ADDR_V4MAPPED(addr6)) || 378 (IN6_IS_ADDR_V4COMPAT(addr6)) || 379 (IN6_IS_ADDR_LINKLOCAL(addr6))) { 380 return (B_FALSE); 381 } 382 return (B_TRUE); 383 } 384 385 /* 386 * idm_get_ipaddr will retrieve a list of IP Addresses which the host is 387 * configured with by sending down a sequence of kernel ioctl to IP STREAMS. 388 */ 389 int 390 idm_get_ipaddr(idm_addr_list_t **ipaddr_p) 391 { 392 ksocket_t so4, so6; 393 struct lifnum lifn; 394 struct lifconf lifc; 395 struct lifreq *lp; 396 int rval; 397 int numifs; 398 int bufsize; 399 void *buf; 400 int i, j, n, rc; 401 struct sockaddr_storage ss; 402 struct sockaddr_in *sin; 403 struct sockaddr_in6 *sin6; 404 idm_addr_t *ip; 405 idm_addr_list_t *ipaddr = NULL; 406 int size_ipaddr; 407 408 *ipaddr_p = NULL; 409 size_ipaddr = 0; 410 buf = NULL; 411 412 /* create an ipv4 and ipv6 UDP socket */ 413 if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL) 414 return (0); 415 if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) { 416 idm_sodestroy(so6); 417 return (0); 418 } 419 420 421 retry_count: 422 /* snapshot the current number of interfaces */ 423 lifn.lifn_family = PF_UNSPEC; 424 lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES; 425 lifn.lifn_count = 0; 426 /* use vp6 for ioctls with unspecified families by default */ 427 if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED()) 428 != 0) { 429 goto cleanup; 430 } 431 432 numifs = lifn.lifn_count; 433 if (numifs <= 0) { 434 goto cleanup; 435 } 436 437 /* allocate extra room in case more interfaces appear */ 438 numifs += 10; 439 440 /* get the interface names and ip addresses */ 441 bufsize = numifs * sizeof (struct lifreq); 442 buf = kmem_alloc(bufsize, KM_SLEEP); 443 444 lifc.lifc_family = AF_UNSPEC; 445 lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES; 446 lifc.lifc_len = bufsize; 447 lifc.lifc_buf = buf; 448 rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED()); 449 if (rc != 0) { 450 goto cleanup; 451 } 452 /* if our extra room is used up, try again */ 453 if (bufsize <= lifc.lifc_len) { 454 kmem_free(buf, bufsize); 455 buf = NULL; 456 goto retry_count; 457 } 458 /* calc actual number of ifconfs */ 459 n = lifc.lifc_len / sizeof (struct lifreq); 460 461 /* get ip address */ 462 if (n > 0) { 463 size_ipaddr = sizeof (idm_addr_list_t) + 464 (n - 1) * sizeof (idm_addr_t); 465 ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP); 466 } else { 467 goto cleanup; 468 } 469 470 /* 471 * Examine the array of interfaces and filter uninteresting ones 472 */ 473 for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) { 474 475 /* 476 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive 477 */ 478 ss = lp->lifr_addr; 479 /* 480 * fetch the flags using the socket of the correct family 481 */ 482 switch (ss.ss_family) { 483 case AF_INET: 484 rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp, 485 &rval, CRED()); 486 break; 487 case AF_INET6: 488 rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp, 489 &rval, CRED()); 490 break; 491 default: 492 continue; 493 } 494 if (rc == 0) { 495 /* 496 * If we got the flags, skip uninteresting 497 * interfaces based on flags 498 */ 499 if ((lp->lifr_flags & IFF_UP) != IFF_UP) 500 continue; 501 if (lp->lifr_flags & 502 (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED)) 503 continue; 504 } 505 506 /* save ip address */ 507 ip = &ipaddr->al_addrs[j]; 508 switch (ss.ss_family) { 509 case AF_INET: 510 sin = (struct sockaddr_in *)&ss; 511 if (!idm_v4_addr_okay(&sin->sin_addr)) 512 continue; 513 ip->a_addr.i_addr.in4 = sin->sin_addr; 514 ip->a_addr.i_insize = sizeof (struct in_addr); 515 break; 516 case AF_INET6: 517 sin6 = (struct sockaddr_in6 *)&ss; 518 if (!idm_v6_addr_okay(&sin6->sin6_addr)) 519 continue; 520 ip->a_addr.i_addr.in6 = sin6->sin6_addr; 521 ip->a_addr.i_insize = sizeof (struct in6_addr); 522 break; 523 default: 524 continue; 525 } 526 j++; 527 } 528 529 if (j == 0) { 530 /* no valid ifaddr */ 531 kmem_free(ipaddr, size_ipaddr); 532 size_ipaddr = 0; 533 ipaddr = NULL; 534 } else { 535 ipaddr->al_out_cnt = j; 536 } 537 538 539 cleanup: 540 idm_sodestroy(so6); 541 idm_sodestroy(so4); 542 543 if (buf != NULL) 544 kmem_free(buf, bufsize); 545 546 *ipaddr_p = ipaddr; 547 return (size_ipaddr); 548 } 549 550 int 551 idm_sorecv(ksocket_t so, void *msg, size_t len) 552 { 553 iovec_t iov; 554 555 ASSERT(so != NULL); 556 ASSERT(len != 0); 557 558 /* 559 * Fill in iovec and receive data 560 */ 561 iov.iov_base = msg; 562 iov.iov_len = len; 563 564 return (idm_iov_sorecv(so, &iov, 1, len)); 565 } 566 567 /* 568 * idm_sosendto - Sends a buffered data on a non-connected socket. 569 * 570 * This function puts the data provided on the wire by calling sosendmsg. 571 * It will return only when all the data has been sent or if an error 572 * occurs. 573 * 574 * Returns 0 for success, the socket errno value if sosendmsg fails, and 575 * -1 if sosendmsg returns success but uio_resid != 0 576 */ 577 int 578 idm_sosendto(ksocket_t so, void *buff, size_t len, 579 struct sockaddr *name, socklen_t namelen) 580 { 581 struct msghdr msg; 582 struct iovec iov[1]; 583 int error; 584 size_t sent = 0; 585 586 iov[0].iov_base = buff; 587 iov[0].iov_len = len; 588 589 /* Initialization of the message header. */ 590 bzero(&msg, sizeof (msg)); 591 msg.msg_iov = iov; 592 msg.msg_iovlen = 1; 593 msg.msg_name = name; 594 msg.msg_namelen = namelen; 595 596 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) { 597 /* Data sent */ 598 if (sent == len) { 599 /* All data sent. Success. */ 600 return (0); 601 } else { 602 /* Not all data was sent. Failure */ 603 return (-1); 604 } 605 } 606 607 /* Send failed */ 608 return (error); 609 } 610 611 /* 612 * idm_iov_sosend - Sends an iovec on a connection. 613 * 614 * This function puts the data provided on the wire by calling sosendmsg. 615 * It will return only when all the data has been sent or if an error 616 * occurs. 617 * 618 * Returns 0 for success, the socket errno value if sosendmsg fails, and 619 * -1 if sosendmsg returns success but uio_resid != 0 620 */ 621 int 622 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len) 623 { 624 struct msghdr msg; 625 int error; 626 size_t sent = 0; 627 628 ASSERT(iop != NULL); 629 630 /* Initialization of the message header. */ 631 bzero(&msg, sizeof (msg)); 632 msg.msg_iov = iop; 633 msg.msg_iovlen = iovlen; 634 635 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) 636 == 0) { 637 /* Data sent */ 638 if (sent == total_len) { 639 /* All data sent. Success. */ 640 return (0); 641 } else { 642 /* Not all data was sent. Failure */ 643 return (-1); 644 } 645 } 646 647 /* Send failed */ 648 return (error); 649 } 650 651 /* 652 * idm_iov_sorecv - Receives an iovec from a connection 653 * 654 * This function gets the data asked for from the socket. It will return 655 * only when all the requested data has been retrieved or if an error 656 * occurs. 657 * 658 * Returns 0 for success, the socket errno value if sorecvmsg fails, and 659 * -1 if sorecvmsg returns success but uio_resid != 0 660 */ 661 int 662 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len) 663 { 664 struct msghdr msg; 665 int error; 666 size_t recv; 667 int flags; 668 669 ASSERT(iop != NULL); 670 671 /* Initialization of the message header. */ 672 bzero(&msg, sizeof (msg)); 673 msg.msg_iov = iop; 674 msg.msg_iovlen = iovlen; 675 flags = MSG_WAITALL; 676 677 if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED())) 678 == 0) { 679 /* Received data */ 680 if (recv == total_len) { 681 /* All requested data received. Success */ 682 return (0); 683 } else { 684 /* 685 * Not all data was received. The connection has 686 * probably failed. 687 */ 688 return (-1); 689 } 690 } 691 692 /* Receive failed */ 693 return (error); 694 } 695 696 static void 697 idm_set_ini_preconnect_options(idm_so_conn_t *sc, boolean_t boot_conn) 698 { 699 int conn_abort = 10000; 700 int conn_notify = 2000; 701 int abort = 30000; 702 703 /* Pre-connect socket options */ 704 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, 705 TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int), 706 CRED()); 707 if (boot_conn == B_FALSE) { 708 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, 709 TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int), 710 CRED()); 711 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, 712 TCP_ABORT_THRESHOLD, 713 (char *)&abort, sizeof (int), CRED()); 714 } 715 } 716 717 static void 718 idm_set_postconnect_options(ksocket_t ks) 719 { 720 const int on = 1; 721 722 /* Set connect options */ 723 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF, 724 (char *)&idm_so_rcvbuf, sizeof (idm_so_rcvbuf), CRED()); 725 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF, 726 (char *)&idm_so_sndbuf, sizeof (idm_so_sndbuf), CRED()); 727 (void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY, 728 (char *)&on, sizeof (on), CRED()); 729 } 730 731 static uint32_t 732 n2h24(const uchar_t *ptr) 733 { 734 return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]); 735 } 736 737 static boolean_t 738 idm_dataseglenokay(idm_conn_t *ic, idm_pdu_t *pdu) 739 { 740 iscsi_hdr_t *bhs; 741 742 if (ic->ic_conn_type == CONN_TYPE_TGT && 743 pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) { 744 IDM_CONN_LOG(CE_WARN, 745 "idm_dataseglenokay: exceeded the max data segment length"); 746 return (B_FALSE); 747 } 748 749 bhs = pdu->isp_hdr; 750 /* 751 * Filter out any RFC3720 data-size violations. 752 */ 753 switch (IDM_PDU_OPCODE(pdu)) { 754 case ISCSI_OP_SCSI_TASK_MGT_MSG: 755 case ISCSI_OP_SCSI_TASK_MGT_RSP: 756 case ISCSI_OP_RTT_RSP: 757 case ISCSI_OP_LOGOUT_CMD: 758 /* 759 * Data-segment not allowed and additional headers not allowed. 760 * (both must be zero according to the RFC3720.) 761 */ 762 if (bhs->hlength != 0 || pdu->isp_datalen != 0) 763 return (B_FALSE); 764 break; 765 case ISCSI_OP_NOOP_OUT: 766 case ISCSI_OP_LOGIN_CMD: 767 case ISCSI_OP_TEXT_CMD: 768 case ISCSI_OP_SNACK_CMD: 769 case ISCSI_OP_NOOP_IN: 770 case ISCSI_OP_SCSI_RSP: 771 case ISCSI_OP_LOGIN_RSP: 772 case ISCSI_OP_TEXT_RSP: 773 case ISCSI_OP_SCSI_DATA_RSP: 774 case ISCSI_OP_LOGOUT_RSP: 775 case ISCSI_OP_ASYNC_EVENT: 776 case ISCSI_OP_REJECT_MSG: 777 /* 778 * Additional headers not allowed. 779 * (must be zero according to RFC3720.) 780 */ 781 if (bhs->hlength) 782 return (B_FALSE); 783 break; 784 case ISCSI_OP_SCSI_CMD: 785 /* 786 * See RFC3720, section 10.3 787 * 788 * For pure read cmds, data-segment-length must be zero. 789 * For non-final transfers, data-size must be even number of 790 * 4-byte words. 791 * For any transfer, an expected byte count must be provided. 792 * For bidirectional transfers, an additional-header must be 793 * provided (for the read byte-count.) 794 */ 795 if (pdu->isp_datalen) { 796 if ((bhs->flags & (ISCSI_FLAG_CMD_READ | 797 ISCSI_FLAG_CMD_WRITE)) == ISCSI_FLAG_CMD_READ) 798 return (B_FALSE); 799 if ((bhs->flags & ISCSI_FLAG_FINAL) == 0 && 800 ((pdu->isp_datalen & 0x3) != 0)) 801 return (B_FALSE); 802 } 803 if (bhs->flags & (ISCSI_FLAG_CMD_READ | 804 ISCSI_FLAG_CMD_WRITE)) { 805 iscsi_scsi_cmd_hdr_t *cmdhdr = 806 (iscsi_scsi_cmd_hdr_t *)bhs; 807 /* 808 * we're transfering some data, we must have a 809 * byte count 810 */ 811 if (cmdhdr->data_length == 0) 812 return (B_FALSE); 813 } 814 break; 815 case ISCSI_OP_SCSI_DATA: 816 /* 817 * See RFC3720, section 10.7 818 * 819 * Additional headers aren't allowed, and the data-size must 820 * be an even number of 4-byte words (unless the final bit 821 * is set.) 822 */ 823 if (bhs->hlength) 824 return (B_FALSE); 825 if ((bhs->flags & ISCSI_FLAG_FINAL) == 0 && 826 ((pdu->isp_datalen & 0x3) != 0)) 827 return (B_FALSE); 828 break; 829 default: 830 break; 831 } 832 return (B_TRUE); 833 } 834 835 static idm_status_t 836 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu) 837 { 838 iscsi_hdr_t *bhs; 839 uint32_t hdr_digest_crc; 840 uint32_t crc_calculated; 841 void *new_hdr; 842 int ahslen = 0; 843 int total_len = 0; 844 int iovlen = 0; 845 struct iovec iov[2]; 846 idm_so_conn_t *so_conn; 847 int rc; 848 849 so_conn = ic->ic_transport_private; 850 851 /* 852 * Read BHS 853 */ 854 bhs = pdu->isp_hdr; 855 rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t)); 856 if (rc != IDM_STATUS_SUCCESS) { 857 return (IDM_STATUS_FAIL); 858 } 859 860 /* 861 * Check actual AHS length against the amount available in the buffer 862 */ 863 if ((IDM_PDU_OPCODE(pdu) != ISCSI_OP_SCSI_CMD) && 864 (bhs->hlength != 0)) { 865 /* ---- hlength is only only valid for SCSI Request ---- */ 866 return (IDM_STATUS_FAIL); 867 } 868 pdu->isp_hdrlen = sizeof (iscsi_hdr_t) + 869 (bhs->hlength * sizeof (uint32_t)); 870 pdu->isp_datalen = n2h24(bhs->dlength); 871 872 if (!idm_dataseglenokay(ic, pdu)) { 873 IDM_CONN_LOG(CE_WARN, 874 "idm_sorecvhdr: invalid data segment length"); 875 return (IDM_STATUS_FAIL); 876 } 877 if (bhs->hlength > IDM_SORX_WIRE_AHSLEN) { 878 /* Allocate a new header segment and change the callback */ 879 new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP); 880 bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t)); 881 pdu->isp_hdr = new_hdr; 882 pdu->isp_flags |= IDM_PDU_ADDL_HDR; 883 884 /* 885 * This callback will restore the expected values after 886 * the RX PDU has been processed. 887 */ 888 pdu->isp_callback = idm_sorx_addl_pdu_cb; 889 } 890 891 /* 892 * Setup receipt of additional header and header digest (if enabled). 893 */ 894 if (bhs->hlength > 0) { 895 iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1); 896 ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t); 897 iov[iovlen].iov_len = ahslen; 898 total_len += iov[iovlen].iov_len; 899 iovlen++; 900 } 901 902 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) { 903 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc; 904 iov[iovlen].iov_len = sizeof (hdr_digest_crc); 905 total_len += iov[iovlen].iov_len; 906 iovlen++; 907 } 908 909 if ((iovlen != 0) && 910 (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen, 911 total_len) != 0)) { 912 return (IDM_STATUS_FAIL); 913 } 914 915 /* 916 * Validate header digest if enabled 917 */ 918 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) { 919 crc_calculated = idm_crc32c(pdu->isp_hdr, 920 sizeof (iscsi_hdr_t) + ahslen); 921 if (crc_calculated != hdr_digest_crc) { 922 /* Invalid Header Digest */ 923 return (IDM_STATUS_HEADER_DIGEST); 924 } 925 } 926 927 return (0); 928 } 929 930 /* 931 * idm_so_ini_conn_create() 932 * Allocate the sockets transport connection resources. 933 */ 934 static idm_status_t 935 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic) 936 { 937 ksocket_t so; 938 idm_so_conn_t *so_conn; 939 idm_status_t idmrc; 940 941 so = idm_socreate(cr->cr_domain, cr->cr_type, 942 cr->cr_protocol); 943 if (so == NULL) { 944 return (IDM_STATUS_FAIL); 945 } 946 947 /* Bind the socket if configured to do so */ 948 if (cr->cr_bound) { 949 if (ksocket_bind(so, &cr->cr_bound_addr.sin, 950 SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) { 951 idm_sodestroy(so); 952 return (IDM_STATUS_FAIL); 953 } 954 } 955 956 idmrc = idm_so_conn_create_common(ic, so); 957 if (idmrc != IDM_STATUS_SUCCESS) { 958 idm_soshutdown(so); 959 idm_sodestroy(so); 960 return (IDM_STATUS_FAIL); 961 } 962 963 so_conn = ic->ic_transport_private; 964 /* Set up socket options */ 965 idm_set_ini_preconnect_options(so_conn, cr->cr_boot_conn); 966 967 return (IDM_STATUS_SUCCESS); 968 } 969 970 /* 971 * idm_so_ini_conn_destroy() 972 * Tear down the sockets transport connection resources. 973 */ 974 static void 975 idm_so_ini_conn_destroy(idm_conn_t *ic) 976 { 977 idm_so_conn_destroy_common(ic); 978 } 979 980 /* 981 * idm_so_ini_conn_connect() 982 * Establish the connection referred to by the handle previously allocated via 983 * idm_so_ini_conn_create(). 984 */ 985 static idm_status_t 986 idm_so_ini_conn_connect(idm_conn_t *ic) 987 { 988 idm_so_conn_t *so_conn; 989 struct sonode *node = NULL; 990 int rc; 991 clock_t lbolt, conn_login_max, conn_login_interval; 992 boolean_t nonblock; 993 994 so_conn = ic->ic_transport_private; 995 nonblock = ic->ic_conn_params.nonblock_socket; 996 conn_login_max = ic->ic_conn_params.conn_login_max; 997 conn_login_interval = ddi_get_lbolt() + 998 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval); 999 1000 if (nonblock == B_TRUE) { 1001 node = ((struct sonode *)(so_conn->ic_so)); 1002 /* Set to none block socket mode */ 1003 idm_so_socket_set_nonblock(node); 1004 do { 1005 rc = ksocket_connect(so_conn->ic_so, 1006 &ic->ic_ini_dst_addr.sin, 1007 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), 1008 CRED()); 1009 if (rc == 0 || rc == EISCONN) { 1010 /* socket success or already success */ 1011 rc = IDM_STATUS_SUCCESS; 1012 break; 1013 } 1014 if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) || 1015 (rc == ECONNRESET)) { 1016 /* socket connection timeout or refuse */ 1017 break; 1018 } 1019 lbolt = ddi_get_lbolt(); 1020 if (lbolt > conn_login_max) { 1021 /* 1022 * Connection retry timeout, 1023 * failed connect to target. 1024 */ 1025 break; 1026 } 1027 if (lbolt < conn_login_interval) { 1028 if ((rc == EINPROGRESS) || (rc == EALREADY)) { 1029 /* TCP connect still in progress */ 1030 delay(SEC_TO_TICK(IN_PROGRESS_DELAY)); 1031 continue; 1032 } else { 1033 delay(conn_login_interval - lbolt); 1034 } 1035 } 1036 conn_login_interval = ddi_get_lbolt() + 1037 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval); 1038 } while (rc != 0); 1039 /* resume to nonblock mode */ 1040 if (rc == IDM_STATUS_SUCCESS) { 1041 idm_so_socket_set_block(node); 1042 } 1043 } else { 1044 rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin, 1045 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED()); 1046 } 1047 1048 if (rc != 0) { 1049 idm_soshutdown(so_conn->ic_so); 1050 return (IDM_STATUS_FAIL); 1051 } 1052 1053 idm_so_conn_connect_common(ic); 1054 1055 idm_set_postconnect_options(so_conn->ic_so); 1056 1057 return (IDM_STATUS_SUCCESS); 1058 } 1059 1060 idm_status_t 1061 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so) 1062 { 1063 idm_status_t idmrc; 1064 1065 idm_set_postconnect_options(new_so); 1066 idmrc = idm_so_conn_create_common(ic, new_so); 1067 1068 return (idmrc); 1069 } 1070 1071 static void 1072 idm_so_tgt_conn_destroy(idm_conn_t *ic) 1073 { 1074 idm_so_conn_destroy_common(ic); 1075 } 1076 1077 /* 1078 * idm_so_tgt_conn_connect() 1079 * Establish the connection in ic, passed from idm_tgt_conn_finish(), which 1080 * is invoked from the SM as a result of an inbound connection request. 1081 */ 1082 static idm_status_t 1083 idm_so_tgt_conn_connect(idm_conn_t *ic) 1084 { 1085 idm_so_conn_connect_common(ic); 1086 1087 return (IDM_STATUS_SUCCESS); 1088 } 1089 1090 static idm_status_t 1091 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so) 1092 { 1093 idm_so_conn_t *so_conn; 1094 1095 so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP); 1096 so_conn->ic_so = new_so; 1097 1098 ic->ic_transport_private = so_conn; 1099 ic->ic_transport_hdrlen = 0; 1100 1101 /* Set the scoreboarding flag on this connection */ 1102 ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD; 1103 ic->ic_conn_params.max_recv_dataseglen = 1104 ISCSI_DEFAULT_MAX_RECV_SEG_LEN; 1105 ic->ic_conn_params.max_xmit_dataseglen = 1106 ISCSI_DEFAULT_MAX_XMIT_SEG_LEN; 1107 1108 /* 1109 * Initialize tx thread mutex and list 1110 */ 1111 mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL); 1112 cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL); 1113 list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t), 1114 offsetof(idm_pdu_t, idm_tx_link)); 1115 1116 return (IDM_STATUS_SUCCESS); 1117 } 1118 1119 static void 1120 idm_so_conn_destroy_common(idm_conn_t *ic) 1121 { 1122 idm_so_conn_t *so_conn = ic->ic_transport_private; 1123 1124 ic->ic_transport_private = NULL; 1125 idm_sodestroy(so_conn->ic_so); 1126 list_destroy(&so_conn->ic_tx_list); 1127 mutex_destroy(&so_conn->ic_tx_mutex); 1128 cv_destroy(&so_conn->ic_tx_cv); 1129 1130 kmem_free(so_conn, sizeof (idm_so_conn_t)); 1131 } 1132 1133 static void 1134 idm_so_conn_connect_common(idm_conn_t *ic) 1135 { 1136 idm_so_conn_t *so_conn; 1137 struct sockaddr_in6 t_addr; 1138 socklen_t t_addrlen = 0; 1139 1140 so_conn = ic->ic_transport_private; 1141 bzero(&t_addr, sizeof (struct sockaddr_in6)); 1142 t_addrlen = sizeof (struct sockaddr_in6); 1143 1144 /* Set the local and remote addresses in the idm conn handle */ 1145 (void) ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr, 1146 &t_addrlen, CRED()); 1147 bcopy(&t_addr, &ic->ic_laddr, t_addrlen); 1148 (void) ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr, 1149 &t_addrlen, CRED()); 1150 bcopy(&t_addr, &ic->ic_raddr, t_addrlen); 1151 1152 mutex_enter(&ic->ic_mutex); 1153 so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0, 1154 &p0, TS_RUN, minclsyspri); 1155 so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0, 1156 &p0, TS_RUN, minclsyspri); 1157 1158 while (so_conn->ic_rx_thread_did == 0 || 1159 so_conn->ic_tx_thread_did == 0) 1160 cv_wait(&ic->ic_cv, &ic->ic_mutex); 1161 mutex_exit(&ic->ic_mutex); 1162 } 1163 1164 /* 1165 * idm_so_conn_disconnect() 1166 * Shutdown the socket connection and stop the thread 1167 */ 1168 static void 1169 idm_so_conn_disconnect(idm_conn_t *ic) 1170 { 1171 idm_so_conn_t *so_conn; 1172 1173 so_conn = ic->ic_transport_private; 1174 1175 mutex_enter(&ic->ic_mutex); 1176 so_conn->ic_rx_thread_running = B_FALSE; 1177 so_conn->ic_tx_thread_running = B_FALSE; 1178 /* We need to wakeup the TX thread */ 1179 mutex_enter(&so_conn->ic_tx_mutex); 1180 cv_signal(&so_conn->ic_tx_cv); 1181 mutex_exit(&so_conn->ic_tx_mutex); 1182 mutex_exit(&ic->ic_mutex); 1183 1184 /* This should wakeup the RX thread if it is sleeping */ 1185 idm_soshutdown(so_conn->ic_so); 1186 1187 thread_join(so_conn->ic_tx_thread_did); 1188 thread_join(so_conn->ic_rx_thread_did); 1189 } 1190 1191 /* 1192 * idm_so_tgt_svc_create() 1193 * Establish a service on an IP address and port. idm_svc_req_t contains 1194 * the service parameters. 1195 */ 1196 /*ARGSUSED*/ 1197 static idm_status_t 1198 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is) 1199 { 1200 idm_so_svc_t *so_svc; 1201 1202 so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP); 1203 1204 /* Set the new sockets service in svc handle */ 1205 is->is_so_svc = (void *)so_svc; 1206 1207 return (IDM_STATUS_SUCCESS); 1208 } 1209 1210 /* 1211 * idm_so_tgt_svc_destroy() 1212 * Teardown sockets resources allocated in idm_so_tgt_svc_create() 1213 */ 1214 static void 1215 idm_so_tgt_svc_destroy(idm_svc_t *is) 1216 { 1217 /* the socket will have been torn down; free the service */ 1218 kmem_free(is->is_so_svc, sizeof (idm_so_svc_t)); 1219 } 1220 1221 /* 1222 * idm_so_tgt_svc_online() 1223 * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create() 1224 */ 1225 1226 static idm_status_t 1227 idm_so_tgt_svc_online(idm_svc_t *is) 1228 { 1229 idm_so_svc_t *so_svc; 1230 idm_svc_req_t *sr = &is->is_svc_req; 1231 struct sockaddr_in6 sin6_ip; 1232 const uint32_t on = 1; 1233 const uint32_t off = 0; 1234 1235 mutex_enter(&is->is_mutex); 1236 so_svc = (idm_so_svc_t *)is->is_so_svc; 1237 1238 /* 1239 * Try creating an IPv6 socket first 1240 */ 1241 if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) { 1242 mutex_exit(&is->is_mutex); 1243 return (IDM_STATUS_FAIL); 1244 } else { 1245 bzero(&sin6_ip, sizeof (sin6_ip)); 1246 sin6_ip.sin6_family = AF_INET6; 1247 sin6_ip.sin6_port = htons(sr->sr_port); 1248 sin6_ip.sin6_addr = in6addr_any; 1249 1250 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET, 1251 SO_REUSEADDR, (char *)&on, sizeof (on), CRED()); 1252 /* 1253 * Turn off SO_MAC_EXEMPT so future sobinds succeed 1254 */ 1255 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET, 1256 SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED()); 1257 1258 if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip, 1259 sizeof (sin6_ip), CRED()) != 0) { 1260 mutex_exit(&is->is_mutex); 1261 idm_sodestroy(so_svc->is_so); 1262 return (IDM_STATUS_FAIL); 1263 } 1264 } 1265 1266 idm_set_postconnect_options(so_svc->is_so); 1267 1268 if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) { 1269 mutex_exit(&is->is_mutex); 1270 idm_soshutdown(so_svc->is_so); 1271 idm_sodestroy(so_svc->is_so); 1272 return (IDM_STATUS_FAIL); 1273 } 1274 1275 /* Launch a watch thread */ 1276 so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher, 1277 is, 0, &p0, TS_RUN, minclsyspri); 1278 1279 if (so_svc->is_thread == NULL) { 1280 /* Failure to launch; teardown the socket */ 1281 mutex_exit(&is->is_mutex); 1282 idm_soshutdown(so_svc->is_so); 1283 idm_sodestroy(so_svc->is_so); 1284 return (IDM_STATUS_FAIL); 1285 } 1286 ksocket_hold(so_svc->is_so); 1287 /* Wait for the port watcher thread to start */ 1288 while (!so_svc->is_thread_running) 1289 cv_wait(&is->is_cv, &is->is_mutex); 1290 mutex_exit(&is->is_mutex); 1291 1292 return (IDM_STATUS_SUCCESS); 1293 } 1294 1295 /* 1296 * idm_so_tgt_svc_offline 1297 * 1298 * Stop listening on the IP address and port identified by idm_svc_t. 1299 */ 1300 static void 1301 idm_so_tgt_svc_offline(idm_svc_t *is) 1302 { 1303 idm_so_svc_t *so_svc; 1304 mutex_enter(&is->is_mutex); 1305 so_svc = (idm_so_svc_t *)is->is_so_svc; 1306 so_svc->is_thread_running = B_FALSE; 1307 mutex_exit(&is->is_mutex); 1308 1309 /* 1310 * Teardown socket 1311 */ 1312 idm_sodestroy(so_svc->is_so); 1313 1314 /* 1315 * Now we expect the port watcher thread to terminate 1316 */ 1317 thread_join(so_svc->is_thread_did); 1318 } 1319 1320 /* 1321 * Watch thread for target service connection establishment. 1322 */ 1323 void 1324 idm_so_svc_port_watcher(void *arg) 1325 { 1326 idm_svc_t *svc = arg; 1327 ksocket_t new_so; 1328 idm_conn_t *ic; 1329 idm_status_t idmrc; 1330 idm_so_svc_t *so_svc; 1331 int rc; 1332 const uint32_t off = 0; 1333 struct sockaddr_in6 t_addr; 1334 socklen_t t_addrlen; 1335 1336 bzero(&t_addr, sizeof (struct sockaddr_in6)); 1337 t_addrlen = sizeof (struct sockaddr_in6); 1338 mutex_enter(&svc->is_mutex); 1339 1340 so_svc = svc->is_so_svc; 1341 so_svc->is_thread_running = B_TRUE; 1342 so_svc->is_thread_did = so_svc->is_thread->t_did; 1343 1344 cv_signal(&svc->is_cv); 1345 1346 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc, 1347 svc->is_svc_req.sr_port); 1348 1349 while (so_svc->is_thread_running) { 1350 mutex_exit(&svc->is_mutex); 1351 1352 if ((rc = ksocket_accept(so_svc->is_so, 1353 (struct sockaddr *)&t_addr, &t_addrlen, 1354 &new_so, CRED())) != 0) { 1355 mutex_enter(&svc->is_mutex); 1356 if (rc != ECONNABORTED && rc != EINTR) { 1357 IDM_SVC_LOG(CE_NOTE, "idm_so_svc_port_watcher:" 1358 " ksocket_accept failed %d", rc); 1359 } 1360 /* 1361 * Unclean shutdown of this thread is not handled 1362 * wait for !is_thread_running. 1363 */ 1364 continue; 1365 } 1366 /* 1367 * Turn off SO_MAC_EXEMPT so future sobinds succeed 1368 */ 1369 (void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT, 1370 (char *)&off, sizeof (off), CRED()); 1371 1372 idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS, 1373 &ic); 1374 if (idmrc != IDM_STATUS_SUCCESS) { 1375 /* Drop connection */ 1376 idm_soshutdown(new_so); 1377 idm_sodestroy(new_so); 1378 mutex_enter(&svc->is_mutex); 1379 continue; 1380 } 1381 1382 idmrc = idm_so_tgt_conn_create(ic, new_so); 1383 if (idmrc != IDM_STATUS_SUCCESS) { 1384 idm_svc_conn_destroy(ic); 1385 idm_soshutdown(new_so); 1386 idm_sodestroy(new_so); 1387 mutex_enter(&svc->is_mutex); 1388 continue; 1389 } 1390 1391 /* 1392 * Kick the state machine. At CS_S3_XPT_UP the state machine 1393 * will notify the client (target) about the new connection. 1394 */ 1395 idm_conn_event(ic, CE_CONNECT_ACCEPT, (uintptr_t)NULL); 1396 1397 mutex_enter(&svc->is_mutex); 1398 } 1399 ksocket_rele(so_svc->is_so); 1400 so_svc->is_thread_running = B_FALSE; 1401 mutex_exit(&svc->is_mutex); 1402 1403 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc, 1404 svc->is_svc_req.sr_port); 1405 1406 thread_exit(); 1407 } 1408 1409 /* 1410 * idm_so_free_task_rsrc() stops any ongoing processing of the task and 1411 * frees resources associated with the task. 1412 * 1413 * It's not clear that this should return idm_status_t. What do we do 1414 * if it fails? 1415 */ 1416 static idm_status_t 1417 idm_so_free_task_rsrc(idm_task_t *idt) 1418 { 1419 idm_buf_t *idb, *next_idb; 1420 1421 /* 1422 * There is nothing to cleanup on initiator connections 1423 */ 1424 if (IDM_CONN_ISINI(idt->idt_ic)) 1425 return (IDM_STATUS_SUCCESS); 1426 1427 /* 1428 * If this is a target connection, call idm_buf_rx_from_ini_done for 1429 * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE. 1430 * 1431 * In addition, remove any buffers associated with this task from 1432 * the ic_tx_list. We'll do this by walking the idt_inbufv list, but 1433 * items don't actually get removed from that list (and completion 1434 * routines called) until idm_task_cleanup. 1435 */ 1436 mutex_enter(&idt->idt_mutex); 1437 1438 for (idb = list_head(&idt->idt_outbufv); idb != NULL; idb = next_idb) { 1439 next_idb = list_next(&idt->idt_outbufv, idb); 1440 if (idb->idb_in_transport) { 1441 /* 1442 * idm_buf_rx_from_ini_done releases idt->idt_mutex 1443 */ 1444 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic, 1445 uintptr_t, idb->idb_buf, 1446 uint32_t, idb->idb_bufoffset, 1447 uint64_t, 0, uint32_t, 0, uint32_t, 0, 1448 uint32_t, idb->idb_xfer_len, 1449 int, XFER_BUF_RX_FROM_INI); 1450 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED); 1451 mutex_enter(&idt->idt_mutex); 1452 } 1453 } 1454 1455 for (idb = list_head(&idt->idt_inbufv); idb != NULL; idb = next_idb) { 1456 next_idb = list_next(&idt->idt_inbufv, idb); 1457 /* 1458 * We want to remove these items from the tx_list as well, 1459 * but knowing it's in the idt_inbufv list is not a guarantee 1460 * that it's in the tx_list. If it's on the tx list then 1461 * let idm_sotx_thread() clean it up. 1462 */ 1463 if (idb->idb_in_transport && !idb->idb_tx_thread) { 1464 /* 1465 * idm_buf_tx_to_ini_done releases idt->idt_mutex 1466 */ 1467 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic, 1468 uintptr_t, idb->idb_buf, 1469 uint32_t, idb->idb_bufoffset, 1470 uint64_t, 0, uint32_t, 0, uint32_t, 0, 1471 uint32_t, idb->idb_xfer_len, 1472 int, XFER_BUF_TX_TO_INI); 1473 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED); 1474 mutex_enter(&idt->idt_mutex); 1475 } 1476 } 1477 1478 mutex_exit(&idt->idt_mutex); 1479 1480 return (IDM_STATUS_SUCCESS); 1481 } 1482 1483 /* 1484 * idm_so_negotiate_key_values() validates the key values for this connection 1485 */ 1486 /* ARGSUSED */ 1487 static kv_status_t 1488 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl, 1489 nvlist_t *response_nvl, nvlist_t *negotiated_nvl) 1490 { 1491 /* All parameters are negotiated at the iscsit level */ 1492 return (KV_HANDLED); 1493 } 1494 1495 /* 1496 * idm_so_notice_key_values() activates the negotiated key values for 1497 * this connection. 1498 */ 1499 static void 1500 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl) 1501 { 1502 char *nvp_name; 1503 nvpair_t *nvp; 1504 nvpair_t *next_nvp; 1505 int nvrc; 1506 idm_status_t idm_status; 1507 const idm_kv_xlate_t *ikvx; 1508 uint64_t num_val; 1509 1510 for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL); 1511 nvp != NULL; nvp = next_nvp) { 1512 next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp); 1513 nvp_name = nvpair_name(nvp); 1514 1515 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name)); 1516 switch (ikvx->ik_key_id) { 1517 case KI_HEADER_DIGEST: 1518 case KI_DATA_DIGEST: 1519 idm_status = idm_so_handle_digest(it, nvp, ikvx); 1520 ASSERT(idm_status == 0); 1521 1522 /* Remove processed item from negotiated_nvl list */ 1523 nvrc = nvlist_remove_all( 1524 negotiated_nvl, ikvx->ik_key_name); 1525 ASSERT(nvrc == 0); 1526 break; 1527 case KI_MAX_RECV_DATA_SEGMENT_LENGTH: 1528 /* 1529 * Just pass the value down to idm layer. 1530 * No need to remove it from negotiated_nvl list here. 1531 */ 1532 nvrc = nvpair_value_uint64(nvp, &num_val); 1533 ASSERT(nvrc == 0); 1534 it->ic_conn_params.max_xmit_dataseglen = 1535 (uint32_t)num_val; 1536 break; 1537 default: 1538 break; 1539 } 1540 } 1541 } 1542 1543 /* 1544 * idm_so_declare_key_values() declares the key values for this connection 1545 */ 1546 /* ARGSUSED */ 1547 static kv_status_t 1548 idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl, 1549 nvlist_t *outgoing_nvl) 1550 { 1551 char *nvp_name; 1552 nvpair_t *nvp; 1553 nvpair_t *next_nvp; 1554 kv_status_t kvrc; 1555 int nvrc = 0; 1556 const idm_kv_xlate_t *ikvx; 1557 uint64_t num_val; 1558 1559 for (nvp = nvlist_next_nvpair(config_nvl, NULL); 1560 nvp != NULL && nvrc == 0; nvp = next_nvp) { 1561 next_nvp = nvlist_next_nvpair(config_nvl, nvp); 1562 nvp_name = nvpair_name(nvp); 1563 1564 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name)); 1565 switch (ikvx->ik_key_id) { 1566 case KI_MAX_RECV_DATA_SEGMENT_LENGTH: 1567 if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) { 1568 break; 1569 } 1570 if (outgoing_nvl && 1571 (nvrc = nvlist_add_uint64(outgoing_nvl, 1572 nvp_name, num_val)) != 0) { 1573 break; 1574 } 1575 it->ic_conn_params.max_recv_dataseglen = 1576 (uint32_t)num_val; 1577 break; 1578 default: 1579 break; 1580 } 1581 } 1582 kvrc = idm_nvstat_to_kvstat(nvrc); 1583 return (kvrc); 1584 } 1585 1586 static idm_status_t 1587 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice, 1588 const idm_kv_xlate_t *ikvx) 1589 { 1590 int nvrc; 1591 char *digest_choice_string; 1592 1593 nvrc = nvpair_value_string(digest_choice, 1594 &digest_choice_string); 1595 ASSERT(nvrc == 0); 1596 if (strcasecmp(digest_choice_string, "crc32c") == 0) { 1597 switch (ikvx->ik_key_id) { 1598 case KI_HEADER_DIGEST: 1599 it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST; 1600 break; 1601 case KI_DATA_DIGEST: 1602 it->ic_conn_flags |= IDM_CONN_DATA_DIGEST; 1603 break; 1604 default: 1605 ASSERT(0); 1606 break; 1607 } 1608 } else if (strcasecmp(digest_choice_string, "none") == 0) { 1609 switch (ikvx->ik_key_id) { 1610 case KI_HEADER_DIGEST: 1611 it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST; 1612 break; 1613 case KI_DATA_DIGEST: 1614 it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST; 1615 break; 1616 default: 1617 ASSERT(0); 1618 break; 1619 } 1620 } else { 1621 ASSERT(0); 1622 } 1623 1624 return (IDM_STATUS_SUCCESS); 1625 } 1626 1627 1628 /* 1629 * idm_so_conn_is_capable() verifies that the passed connection is provided 1630 * for by the sockets interface. 1631 */ 1632 /* ARGSUSED */ 1633 static boolean_t 1634 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps) 1635 { 1636 return (B_TRUE); 1637 } 1638 1639 /* 1640 * idm_so_rx_datain() validates the Data Sequence number of the PDU. The 1641 * idm_sorecv_scsidata() function invoked earlier actually reads the data 1642 * off the socket into the appropriate buffers. 1643 */ 1644 static void 1645 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu) 1646 { 1647 iscsi_data_hdr_t *bhs; 1648 idm_task_t *idt; 1649 idm_buf_t *idb; 1650 uint32_t datasn; 1651 size_t offset; 1652 iscsi_hdr_t *ihp = (iscsi_hdr_t *)pdu->isp_hdr; 1653 iscsi_data_rsp_hdr_t *idrhp = (iscsi_data_rsp_hdr_t *)ihp; 1654 1655 ASSERT(ic != NULL); 1656 ASSERT(pdu != NULL); 1657 ASSERT(IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP); 1658 1659 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr; 1660 datasn = ntohl(bhs->datasn); 1661 offset = ntohl(bhs->offset); 1662 1663 /* 1664 * Look up the task corresponding to the initiator task tag 1665 * to get the buffers affiliated with the task. 1666 */ 1667 idt = idm_task_find(ic, bhs->itt, bhs->ttt); 1668 if (idt == NULL) { 1669 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task"); 1670 idm_pdu_rx_protocol_error(ic, pdu); 1671 return; 1672 } 1673 1674 idb = pdu->isp_sorx_buf; 1675 if (idb == NULL) { 1676 IDM_CONN_LOG(CE_WARN, 1677 "idm_so_rx_datain: failed to find buffer"); 1678 idm_task_rele(idt); 1679 idm_pdu_rx_protocol_error(ic, pdu); 1680 return; 1681 } 1682 1683 /* 1684 * DataSN values should be sequential and should not have any gaps or 1685 * repetitions. Check the DataSN with the one stored in the task. 1686 */ 1687 if (datasn == idt->idt_exp_datasn) { 1688 idt->idt_exp_datasn++; /* keep track of DataSN received */ 1689 } else { 1690 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order"); 1691 idm_task_rele(idt); 1692 idm_pdu_rx_protocol_error(ic, pdu); 1693 return; 1694 } 1695 1696 /* 1697 * PDUs in a sequence should be in continuously increasing 1698 * address offset 1699 */ 1700 if (offset != idb->idb_exp_offset) { 1701 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset"); 1702 idm_task_rele(idt); 1703 idm_pdu_rx_protocol_error(ic, pdu); 1704 return; 1705 } 1706 /* Expected next relative buffer offset */ 1707 idb->idb_exp_offset += n2h24(bhs->dlength); 1708 idt->idt_rx_bytes += n2h24(bhs->dlength); 1709 1710 idm_task_rele(idt); 1711 1712 /* 1713 * For now call scsi_rsp which will process the data rsp 1714 * Revisit, need to provide an explicit client entry point for 1715 * phase collapse completions. 1716 */ 1717 if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) && 1718 (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) { 1719 (*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu); 1720 } 1721 1722 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS); 1723 } 1724 1725 /* 1726 * The idm_so_rx_dataout() function is used by the iSCSI target to read 1727 * data from the Data-Out PDU sent by the iSCSI initiator. 1728 * 1729 * This function gets the Initiator Task Tag from the PDU BHS and looks up the 1730 * task to get the buffers associated with the PDU. A PDU might span buffers. 1731 * The data is then read into the respective buffer. 1732 */ 1733 static void 1734 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu) 1735 { 1736 1737 iscsi_data_hdr_t *bhs; 1738 idm_task_t *idt; 1739 idm_buf_t *idb; 1740 size_t offset; 1741 1742 ASSERT(ic != NULL); 1743 ASSERT(pdu != NULL); 1744 ASSERT(IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA); 1745 1746 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr; 1747 offset = ntohl(bhs->offset); 1748 1749 /* 1750 * Look up the task corresponding to the initiator task tag 1751 * to get the buffers affiliated with the task. 1752 */ 1753 idt = idm_task_find(ic, bhs->itt, bhs->ttt); 1754 if (idt == NULL) { 1755 IDM_CONN_LOG(CE_WARN, 1756 "idm_so_rx_dataout: failed to find task"); 1757 idm_pdu_rx_protocol_error(ic, pdu); 1758 return; 1759 } 1760 1761 idb = pdu->isp_sorx_buf; 1762 if (idb == NULL) { 1763 IDM_CONN_LOG(CE_WARN, 1764 "idm_so_rx_dataout: failed to find buffer"); 1765 idm_task_rele(idt); 1766 idm_pdu_rx_protocol_error(ic, pdu); 1767 return; 1768 } 1769 1770 /* Keep track of data transferred - check data offsets */ 1771 if (offset != idb->idb_exp_offset) { 1772 IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: " 1773 "%ld, %d", offset, idb->idb_exp_offset); 1774 idm_task_rele(idt); 1775 idm_pdu_rx_protocol_error(ic, pdu); 1776 return; 1777 } 1778 /* Expected next relative offset */ 1779 idb->idb_exp_offset += ntoh24(bhs->dlength); 1780 idt->idt_rx_bytes += n2h24(bhs->dlength); 1781 1782 /* 1783 * Call the buffer callback when the transfer is complete 1784 * 1785 * The connection state machine should only abort tasks after 1786 * shutting down the connection so we are assured that there 1787 * won't be a simultaneous attempt to abort this task at the 1788 * same time as we are processing this PDU (due to a connection 1789 * state change). 1790 */ 1791 if (bhs->flags & ISCSI_FLAG_FINAL) { 1792 /* 1793 * We have gotten the last data-message for the current 1794 * transfer. idb_xfer_len represents the data that the 1795 * command intended to transfer, it does not represent the 1796 * actual number of bytes transferred. If we have not 1797 * transferred the expected number of bytes something is 1798 * wrong. 1799 * 1800 * We have two options, when there is a mismatch, we can 1801 * regard the transfer as invalid -- or we can modify our 1802 * notion of "xfer_len." In order to be as stringent as 1803 * possible, here we regard this transfer as in error; and 1804 * bail out. 1805 */ 1806 if (idb->idb_buflen == idb->idb_xfer_len && 1807 idb->idb_buflen != 1808 (idb->idb_exp_offset - idb->idb_bufoffset)) { 1809 printf("idm_so_rx_dataout: incomplete transfer, " 1810 "protocol err"); 1811 IDM_CONN_LOG(CE_NOTE, 1812 "idm_so_rx_dataout: incomplete transfer: %ld, %d", 1813 offset, (int)(idb->idb_exp_offset - offset)); 1814 idm_task_rele(idt); 1815 idm_pdu_rx_protocol_error(ic, pdu); 1816 return; 1817 } 1818 /* 1819 * We only want to call idm_buf_rx_from_ini_done once 1820 * per transfer. It's possible that this task has 1821 * already been aborted in which case 1822 * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done 1823 * for each buffer with idb_in_transport==B_TRUE. To 1824 * close this window and ensure that this doesn't happen, 1825 * we'll clear idb->idb_in_transport now while holding 1826 * the task mutex. This is only really an issue for 1827 * SCSI task abort -- if tasks were being aborted because 1828 * of a connection state change the state machine would 1829 * have already stopped the receive thread. 1830 */ 1831 mutex_enter(&idt->idt_mutex); 1832 1833 /* 1834 * Release the task hold here (obtained in idm_task_find) 1835 * because the task may complete synchronously during 1836 * idm_buf_rx_from_ini_done. Since we still have an active 1837 * buffer we know there is at least one additional hold on idt. 1838 */ 1839 idm_task_rele(idt); 1840 1841 /* 1842 * idm_buf_rx_from_ini_done releases idt->idt_mutex 1843 */ 1844 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic, 1845 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset, 1846 uint64_t, 0, uint32_t, 0, uint32_t, 0, 1847 uint32_t, idb->idb_xfer_len, 1848 int, XFER_BUF_RX_FROM_INI); 1849 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS); 1850 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS); 1851 return; 1852 } 1853 1854 idm_task_rele(idt); 1855 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS); 1856 } 1857 1858 /* 1859 * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle 1860 * the R2T PDU sent by the iSCSI target indicating that it is ready to 1861 * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS 1862 * and looks up the task in the task tree using the itt to get the output 1863 * buffers associated the task. The R2T PDU contains the offset of the 1864 * requested data and the data length. This function then constructs a 1865 * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out 1866 * PDU is associated with the R2T by the Target Transfer Tag (ttt). 1867 */ 1868 1869 static void 1870 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu) 1871 { 1872 idm_task_t *idt; 1873 idm_buf_t *idb; 1874 iscsi_rtt_hdr_t *rtt_hdr; 1875 uint32_t data_offset; 1876 uint32_t data_length; 1877 1878 ASSERT(ic != NULL); 1879 ASSERT(pdu != NULL); 1880 1881 rtt_hdr = (iscsi_rtt_hdr_t *)pdu->isp_hdr; 1882 data_offset = ntohl(rtt_hdr->data_offset); 1883 data_length = ntohl(rtt_hdr->data_length); 1884 idt = idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt); 1885 1886 if (idt == NULL) { 1887 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task"); 1888 idm_pdu_rx_protocol_error(ic, pdu); 1889 return; 1890 } 1891 1892 /* Find the buffer bound to the task by the iSCSI initiator */ 1893 mutex_enter(&idt->idt_mutex); 1894 idb = idm_buf_find(&idt->idt_outbufv, data_offset); 1895 if (idb == NULL) { 1896 mutex_exit(&idt->idt_mutex); 1897 idm_task_rele(idt); 1898 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer"); 1899 idm_pdu_rx_protocol_error(ic, pdu); 1900 return; 1901 } 1902 1903 /* return buffer contains this data */ 1904 if (data_offset + data_length > idb->idb_buflen) { 1905 /* Overflow */ 1906 mutex_exit(&idt->idt_mutex); 1907 idm_task_rele(idt); 1908 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside " 1909 "buffer"); 1910 idm_pdu_rx_protocol_error(ic, pdu); 1911 return; 1912 } 1913 1914 idt->idt_r2t_ttt = rtt_hdr->ttt; 1915 idt->idt_exp_datasn = 0; 1916 1917 idm_so_send_rtt_data(ic, idt, idb, data_offset, 1918 ntohl(rtt_hdr->data_length)); 1919 /* 1920 * the idt_mutex is released in idm_so_send_rtt_data 1921 */ 1922 1923 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS); 1924 idm_task_rele(idt); 1925 1926 } 1927 1928 idm_status_t 1929 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu) 1930 { 1931 uint8_t pad[ISCSI_PAD_WORD_LEN]; 1932 int pad_len; 1933 uint32_t data_digest_crc; 1934 uint32_t crc_calculated; 1935 int total_len; 1936 idm_so_conn_t *so_conn; 1937 1938 so_conn = ic->ic_transport_private; 1939 1940 pad_len = ((ISCSI_PAD_WORD_LEN - 1941 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) & 1942 (ISCSI_PAD_WORD_LEN - 1)); 1943 1944 ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */ 1945 1946 total_len = pdu->isp_datalen; 1947 1948 if (pad_len) { 1949 pdu->isp_iov[pdu->isp_iovlen].iov_base = (char *)&pad; 1950 pdu->isp_iov[pdu->isp_iovlen].iov_len = pad_len; 1951 total_len += pad_len; 1952 pdu->isp_iovlen++; 1953 } 1954 1955 /* setup data digest */ 1956 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) { 1957 pdu->isp_iov[pdu->isp_iovlen].iov_base = 1958 (char *)&data_digest_crc; 1959 pdu->isp_iov[pdu->isp_iovlen].iov_len = 1960 sizeof (data_digest_crc); 1961 total_len += sizeof (data_digest_crc); 1962 pdu->isp_iovlen++; 1963 } 1964 1965 pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base; 1966 1967 if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0], 1968 pdu->isp_iovlen, total_len) != 0) { 1969 return (IDM_STATUS_IO); 1970 } 1971 1972 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) { 1973 crc_calculated = idm_crc32c(pdu->isp_data, 1974 pdu->isp_datalen); 1975 if (pad_len) { 1976 crc_calculated = idm_crc32c_continued((char *)&pad, 1977 pad_len, crc_calculated); 1978 } 1979 if (crc_calculated != data_digest_crc) { 1980 IDM_CONN_LOG(CE_WARN, 1981 "idm_sorecvdata: " 1982 "CRC error: actual 0x%x, calc 0x%x", 1983 data_digest_crc, crc_calculated); 1984 1985 /* Invalid Data Digest */ 1986 return (IDM_STATUS_DATA_DIGEST); 1987 } 1988 } 1989 1990 return (IDM_STATUS_SUCCESS); 1991 } 1992 1993 /* 1994 * idm_sorecv_scsidata() is used to receive scsi data from the socket. The 1995 * Data-type PDU header must be read into the idm_pdu_t structure prior to 1996 * calling this function. 1997 */ 1998 idm_status_t 1999 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu) 2000 { 2001 iscsi_data_hdr_t *bhs; 2002 idm_task_t *task; 2003 uint32_t offset; 2004 uint8_t opcode; 2005 uint32_t dlength; 2006 list_t *buflst; 2007 uint32_t xfer_bytes; 2008 idm_status_t status; 2009 2010 ASSERT(ic != NULL); 2011 ASSERT(pdu != NULL); 2012 2013 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr; 2014 2015 offset = ntohl(bhs->offset); 2016 opcode = IDM_PDU_OPCODE(pdu); 2017 dlength = n2h24(bhs->dlength); 2018 2019 ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) || 2020 (opcode == ISCSI_OP_SCSI_DATA)); 2021 2022 /* 2023 * Successful lookup implicitly gets a "hold" on the task. This 2024 * hold must be released before leaving this function. At one 2025 * point we were caching this task context and retaining the hold 2026 * but it turned out to be very difficult to release the hold properly. 2027 * The task can be aborted and the connection shutdown between this 2028 * call and the subsequent expected call to idm_so_rx_datain/ 2029 * idm_so_rx_dataout (in which case those functions are not called). 2030 * Releasing the hold in the PDU callback doesn't work well either 2031 * because the whole task may be completed by then at which point 2032 * it is too late to release the hold -- for better or worse this 2033 * code doesn't wait on the refcnts during normal operation. 2034 * idm_task_find() is very fast and it is not a huge burden if we 2035 * have to do it twice. 2036 */ 2037 task = idm_task_find(ic, bhs->itt, bhs->ttt); 2038 if (task == NULL) { 2039 IDM_CONN_LOG(CE_WARN, 2040 "idm_sorecv_scsidata: could not find task"); 2041 return (IDM_STATUS_FAIL); 2042 } 2043 2044 mutex_enter(&task->idt_mutex); 2045 buflst = (opcode == ISCSI_OP_SCSI_DATA_RSP) ? 2046 &task->idt_inbufv : &task->idt_outbufv; 2047 pdu->isp_sorx_buf = idm_buf_find(buflst, offset); 2048 mutex_exit(&task->idt_mutex); 2049 2050 if (pdu->isp_sorx_buf == NULL) { 2051 idm_task_rele(task); 2052 IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find " 2053 "buffer for offset %x opcode=%x", 2054 offset, opcode); 2055 return (IDM_STATUS_FAIL); 2056 } 2057 2058 xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength); 2059 ASSERT(xfer_bytes != 0); 2060 if (xfer_bytes != dlength) { 2061 idm_task_rele(task); 2062 /* 2063 * Buffer overflow, connection error. The PDU data is still 2064 * sitting in the socket so we can't use the connection 2065 * again until that data is drained. 2066 */ 2067 return (IDM_STATUS_FAIL); 2068 } 2069 2070 status = idm_sorecvdata(ic, pdu); 2071 2072 idm_task_rele(task); 2073 2074 return (status); 2075 } 2076 2077 static uint32_t 2078 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength) 2079 { 2080 uint32_t buf_ro = ro - idb->idb_bufoffset; 2081 uint32_t xfer_len = min(dlength, idb->idb_buflen - buf_ro); 2082 2083 ASSERT(ro >= idb->idb_bufoffset); 2084 2085 pdu->isp_iov[pdu->isp_iovlen].iov_base = 2086 (caddr_t)idb->idb_buf + buf_ro; 2087 pdu->isp_iov[pdu->isp_iovlen].iov_len = xfer_len; 2088 pdu->isp_iovlen++; 2089 2090 return (xfer_len); 2091 } 2092 2093 int 2094 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu) 2095 { 2096 pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP); 2097 ASSERT(pdu->isp_data != NULL); 2098 2099 pdu->isp_databuflen = pdu->isp_datalen; 2100 pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data; 2101 pdu->isp_iov[0].iov_len = pdu->isp_datalen; 2102 pdu->isp_iovlen = 1; 2103 /* 2104 * Since we are associating a new data buffer with this received 2105 * PDU we need to set a specific callback to free the data 2106 * after the PDU is processed. 2107 */ 2108 pdu->isp_flags |= IDM_PDU_ADDL_DATA; 2109 pdu->isp_callback = idm_sorx_addl_pdu_cb; 2110 2111 return (idm_sorecvdata(ic, pdu)); 2112 } 2113 2114 void 2115 idm_sorx_thread(void *arg) 2116 { 2117 boolean_t conn_failure = B_FALSE; 2118 idm_conn_t *ic = (idm_conn_t *)arg; 2119 idm_so_conn_t *so_conn; 2120 idm_pdu_t *pdu; 2121 idm_status_t rc; 2122 2123 idm_conn_hold(ic); 2124 2125 mutex_enter(&ic->ic_mutex); 2126 2127 so_conn = ic->ic_transport_private; 2128 so_conn->ic_rx_thread_running = B_TRUE; 2129 so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did; 2130 cv_signal(&ic->ic_cv); 2131 2132 while (so_conn->ic_rx_thread_running) { 2133 mutex_exit(&ic->ic_mutex); 2134 2135 /* 2136 * Get PDU with default header size (large enough for 2137 * BHS plus any anticipated AHS). PDU from 2138 * the cache will have all values set correctly 2139 * for sockets RX including callback. 2140 */ 2141 pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP); 2142 pdu->isp_ic = ic; 2143 pdu->isp_flags = 0; 2144 pdu->isp_transport_hdrlen = 0; 2145 2146 if ((rc = idm_sorecvhdr(ic, pdu)) != 0) { 2147 /* 2148 * Call idm_pdu_complete so that we call the callback 2149 * and ensure any memory allocated in idm_sorecvhdr 2150 * gets freed up. 2151 */ 2152 idm_pdu_complete(pdu, IDM_STATUS_FAIL); 2153 2154 /* 2155 * If ic_rx_thread_running is still set then 2156 * this is some kind of connection problem 2157 * on the socket. In this case we want to 2158 * generate an event. Otherwise some other 2159 * thread closed the socket due to another 2160 * issue in which case we don't need to 2161 * generate an event. 2162 */ 2163 mutex_enter(&ic->ic_mutex); 2164 if (so_conn->ic_rx_thread_running) { 2165 conn_failure = B_TRUE; 2166 so_conn->ic_rx_thread_running = B_FALSE; 2167 } 2168 2169 continue; 2170 } 2171 2172 /* 2173 * Header has been read and validated. Now we need 2174 * to read the PDU data payload (if present). SCSI data 2175 * need to be transferred from the socket directly into 2176 * the associated transfer buffer for the SCSI task. 2177 */ 2178 if (pdu->isp_datalen != 0) { 2179 if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) || 2180 (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) { 2181 rc = idm_sorecv_scsidata(ic, pdu); 2182 /* 2183 * All SCSI errors are fatal to the 2184 * connection right now since we have no 2185 * place to put the data. What we need 2186 * is some kind of sink to dispose of unwanted 2187 * SCSI data. For example an invalid task tag 2188 * should not kill the connection (although 2189 * we may want to drop the connection). 2190 */ 2191 } else { 2192 /* 2193 * Not data PDUs so allocate a buffer for the 2194 * data segment and read the remaining data. 2195 */ 2196 rc = idm_sorecv_nonscsidata(ic, pdu); 2197 } 2198 if (rc != 0) { 2199 /* 2200 * Call idm_pdu_complete so that we call the 2201 * callback and ensure any memory allocated 2202 * in idm_sorecvhdr gets freed up. 2203 */ 2204 idm_pdu_complete(pdu, IDM_STATUS_FAIL); 2205 2206 /* 2207 * If ic_rx_thread_running is still set then 2208 * this is some kind of connection problem 2209 * on the socket. In this case we want to 2210 * generate an event. Otherwise some other 2211 * thread closed the socket due to another 2212 * issue in which case we don't need to 2213 * generate an event. 2214 */ 2215 mutex_enter(&ic->ic_mutex); 2216 if (so_conn->ic_rx_thread_running) { 2217 conn_failure = B_TRUE; 2218 so_conn->ic_rx_thread_running = B_FALSE; 2219 } 2220 continue; 2221 } 2222 } 2223 2224 /* 2225 * Process RX PDU 2226 */ 2227 idm_pdu_rx(ic, pdu); 2228 2229 mutex_enter(&ic->ic_mutex); 2230 } 2231 2232 mutex_exit(&ic->ic_mutex); 2233 2234 /* 2235 * If we dropped out of the RX processing loop because of 2236 * a socket problem or other connection failure (including 2237 * digest errors) then we need to generate a state machine 2238 * event to shut the connection down. 2239 * If the state machine is already in, for example, INIT_ERROR, this 2240 * event will get dropped, and the TX thread will never be notified 2241 * to shut down. To be safe, we'll just notify it here. 2242 */ 2243 if (conn_failure) { 2244 if (so_conn->ic_tx_thread_running) { 2245 so_conn->ic_tx_thread_running = B_FALSE; 2246 mutex_enter(&so_conn->ic_tx_mutex); 2247 cv_signal(&so_conn->ic_tx_cv); 2248 mutex_exit(&so_conn->ic_tx_mutex); 2249 } 2250 2251 idm_conn_event(ic, CE_TRANSPORT_FAIL, rc); 2252 } 2253 2254 idm_conn_rele(ic); 2255 2256 thread_exit(); 2257 } 2258 2259 /* 2260 * idm_so_tx 2261 * 2262 * This is the implementation of idm_transport_ops_t's it_tx_pdu entry 2263 * point. By definition, it is supposed to be fast. So, simply queue 2264 * the entry and return. The real work is done by idm_i_so_tx() via 2265 * idm_sotx_thread(). 2266 */ 2267 2268 static void 2269 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu) 2270 { 2271 idm_so_conn_t *so_conn = ic->ic_transport_private; 2272 2273 ASSERT(pdu->isp_ic == ic); 2274 mutex_enter(&so_conn->ic_tx_mutex); 2275 2276 if (!so_conn->ic_tx_thread_running) { 2277 mutex_exit(&so_conn->ic_tx_mutex); 2278 idm_pdu_complete(pdu, IDM_STATUS_ABORTED); 2279 return; 2280 } 2281 2282 list_insert_tail(&so_conn->ic_tx_list, (void *)pdu); 2283 cv_signal(&so_conn->ic_tx_cv); 2284 mutex_exit(&so_conn->ic_tx_mutex); 2285 } 2286 2287 static idm_status_t 2288 idm_i_so_tx(idm_pdu_t *pdu) 2289 { 2290 idm_conn_t *ic = pdu->isp_ic; 2291 idm_status_t status = IDM_STATUS_SUCCESS; 2292 uint8_t pad[ISCSI_PAD_WORD_LEN]; 2293 int pad_len; 2294 uint32_t hdr_digest_crc; 2295 uint32_t data_digest_crc = 0; 2296 int total_len = 0; 2297 int iovlen = 0; 2298 struct iovec iov[6]; 2299 idm_so_conn_t *so_conn; 2300 2301 so_conn = ic->ic_transport_private; 2302 2303 /* Setup BHS */ 2304 iov[iovlen].iov_base = (caddr_t)pdu->isp_hdr; 2305 iov[iovlen].iov_len = pdu->isp_hdrlen; 2306 total_len += iov[iovlen].iov_len; 2307 iovlen++; 2308 2309 /* Setup header digest */ 2310 if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) && 2311 (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) { 2312 hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen); 2313 2314 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc; 2315 iov[iovlen].iov_len = sizeof (hdr_digest_crc); 2316 total_len += iov[iovlen].iov_len; 2317 iovlen++; 2318 } 2319 2320 /* Setup the data */ 2321 if (pdu->isp_datalen) { 2322 idm_task_t *idt; 2323 idm_buf_t *idb; 2324 iscsi_data_hdr_t *ihp; 2325 ihp = (iscsi_data_hdr_t *)pdu->isp_hdr; 2326 /* Write of immediate data */ 2327 if (ic->ic_ffp && 2328 (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_CMD || 2329 IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA)) { 2330 idt = idm_task_find(ic, ihp->itt, ihp->ttt); 2331 if (idt) { 2332 mutex_enter(&idt->idt_mutex); 2333 idb = idm_buf_find(&idt->idt_outbufv, 0); 2334 mutex_exit(&idt->idt_mutex); 2335 /* 2336 * If the initiator call to idm_buf_alloc 2337 * failed then we can get to this point 2338 * without a bound buffer. The associated 2339 * connection failure will clean things up 2340 * later. It would be nice to come up with 2341 * a cleaner way to handle this. In 2342 * particular it seems absurd to look up 2343 * the task and the buffer just to update 2344 * this counter. 2345 */ 2346 if (idb) 2347 idb->idb_xfer_len += pdu->isp_datalen; 2348 idm_task_rele(idt); 2349 } 2350 } 2351 2352 iov[iovlen].iov_base = (caddr_t)pdu->isp_data; 2353 iov[iovlen].iov_len = pdu->isp_datalen; 2354 total_len += iov[iovlen].iov_len; 2355 iovlen++; 2356 } 2357 2358 /* Setup the data pad if necessary */ 2359 pad_len = ((ISCSI_PAD_WORD_LEN - 2360 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) & 2361 (ISCSI_PAD_WORD_LEN - 1)); 2362 2363 if (pad_len) { 2364 bzero(pad, sizeof (pad)); 2365 iov[iovlen].iov_base = (void *)&pad; 2366 iov[iovlen].iov_len = pad_len; 2367 total_len += iov[iovlen].iov_len; 2368 iovlen++; 2369 } 2370 2371 /* 2372 * Setup the data digest if enabled. Data-digest is not sent 2373 * for login-phase PDUs. 2374 */ 2375 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) && 2376 ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) && 2377 (pdu->isp_datalen || pad_len)) { 2378 /* 2379 * RFC3720/10.2.3: A zero-length Data Segment also 2380 * implies a zero-length data digest. 2381 */ 2382 if (pdu->isp_datalen) { 2383 data_digest_crc = idm_crc32c(pdu->isp_data, 2384 pdu->isp_datalen); 2385 } 2386 if (pad_len) { 2387 data_digest_crc = idm_crc32c_continued(&pad, 2388 pad_len, data_digest_crc); 2389 } 2390 2391 iov[iovlen].iov_base = (caddr_t)&data_digest_crc; 2392 iov[iovlen].iov_len = sizeof (data_digest_crc); 2393 total_len += iov[iovlen].iov_len; 2394 iovlen++; 2395 } 2396 2397 /* Transmit the PDU */ 2398 if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen, 2399 total_len) != 0) { 2400 /* Set error status */ 2401 IDM_CONN_LOG(CE_WARN, 2402 "idm_so_tx: failed to transmit the PDU, so: %p ic: %p " 2403 "data: %p", (void *) so_conn->ic_so, (void *) ic, 2404 (void *) pdu->isp_data); 2405 status = IDM_STATUS_IO; 2406 } 2407 2408 /* 2409 * Success does not mean that the PDU actually reached the 2410 * remote node since it could get dropped along the way. 2411 */ 2412 idm_pdu_complete(pdu, status); 2413 2414 return (status); 2415 } 2416 2417 /* 2418 * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the 2419 * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength, 2420 * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN. 2421 * A target can invoke this function multiple times for a single read command 2422 * (identified by the same ITT) to split the input into several sequences. 2423 * 2424 * DataSN starts with 0 for the first data PDU of an input command and advances 2425 * by 1 for each subsequent data PDU. Each sequence will have its own F bit, 2426 * which is set to 1 for the last data PDU of a sequence. 2427 * If the initiator supports phase collapse, the status bit must be set along 2428 * with the F bit to indicate that the status is shipped together with the last 2429 * Data-In PDU. 2430 * 2431 * The data PDUs within a sequence will be sent in order with the buffer offset 2432 * in increasing order. i.e. initiator and target must have negotiated the 2433 * "DataPDUInOrder" to "Yes". The order between sequences is not enforced. 2434 * 2435 * Caller holds idt->idt_mutex 2436 */ 2437 static idm_status_t 2438 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb) 2439 { 2440 idm_so_conn_t *so_conn = idb->idb_ic->ic_transport_private; 2441 idm_pdu_t tmppdu; 2442 2443 ASSERT(mutex_owned(&idt->idt_mutex)); 2444 2445 /* 2446 * Put the idm_buf_t on the tx queue. It will be transmitted by 2447 * idm_sotx_thread. 2448 */ 2449 mutex_enter(&so_conn->ic_tx_mutex); 2450 2451 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic, 2452 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset, 2453 uint64_t, 0, uint32_t, 0, uint32_t, 0, 2454 uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI); 2455 2456 if (!so_conn->ic_tx_thread_running) { 2457 mutex_exit(&so_conn->ic_tx_mutex); 2458 /* 2459 * Don't release idt->idt_mutex since we're supposed to hold 2460 * in when calling idm_buf_tx_to_ini_done 2461 */ 2462 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic, 2463 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset, 2464 uint64_t, 0, uint32_t, 0, uint32_t, 0, 2465 uint32_t, idb->idb_xfer_len, 2466 int, XFER_BUF_TX_TO_INI); 2467 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED); 2468 return (IDM_STATUS_FAIL); 2469 } 2470 2471 /* 2472 * Build a template for the data PDU headers we will use so that 2473 * the SN values will stay consistent with other PDU's we are 2474 * transmitting like R2T and SCSI status. 2475 */ 2476 bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t)); 2477 tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl; 2478 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu, 2479 ISCSI_OP_SCSI_DATA_RSP); 2480 idb->idb_tx_thread = B_TRUE; 2481 list_insert_tail(&so_conn->ic_tx_list, (void *)idb); 2482 cv_signal(&so_conn->ic_tx_cv); 2483 mutex_exit(&so_conn->ic_tx_mutex); 2484 mutex_exit(&idt->idt_mutex); 2485 2486 /* 2487 * Returning success here indicates the transfer was successfully 2488 * dispatched -- it does not mean that the transfer completed 2489 * successfully. 2490 */ 2491 return (IDM_STATUS_SUCCESS); 2492 } 2493 2494 /* 2495 * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the 2496 * data blocks it is ready to receive from the initiator in response to a WRITE 2497 * SCSI command. The target iSCSI layer passes the information about the desired 2498 * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer 2499 * offset and datalen are passed via the 'idb' argument. 2500 * 2501 * Scope for Prototype build: 2502 * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have 2503 * negotiated the "InitialR2T" to "Yes". 2504 * 2505 * Caller holds idt->idt_mutex 2506 */ 2507 static idm_status_t 2508 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb) 2509 { 2510 idm_pdu_t *pdu; 2511 iscsi_rtt_hdr_t *rtt; 2512 2513 ASSERT(mutex_owned(&idt->idt_mutex)); 2514 2515 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic, 2516 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset, 2517 uint64_t, 0, uint32_t, 0, uint32_t, 0, 2518 uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI); 2519 2520 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP); 2521 pdu->isp_ic = idt->idt_ic; 2522 pdu->isp_flags = IDM_PDU_SET_STATSN; 2523 bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t)); 2524 2525 /* iSCSI layer fills the TTT, ITT, ExpCmdSN, MaxCmdSN */ 2526 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP); 2527 2528 /* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */ 2529 rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr); 2530 2531 rtt->opcode = ISCSI_OP_RTT_RSP; 2532 rtt->flags = ISCSI_FLAG_FINAL; 2533 rtt->data_offset = htonl(idb->idb_bufoffset); 2534 rtt->data_length = htonl(idb->idb_xfer_len); 2535 rtt->rttsn = htonl(idt->idt_exp_rttsn++); 2536 2537 /* Keep track of buffer offsets */ 2538 idb->idb_exp_offset = idb->idb_bufoffset; 2539 mutex_exit(&idt->idt_mutex); 2540 2541 /* 2542 * Transmit the PDU. 2543 */ 2544 idm_pdu_tx(pdu); 2545 2546 return (IDM_STATUS_SUCCESS); 2547 } 2548 2549 static idm_status_t 2550 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen) 2551 { 2552 if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) { 2553 idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache, 2554 KM_NOSLEEP); 2555 idb->idb_buf_private = idm.idm_so_128k_buf_cache; 2556 } else { 2557 idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP); 2558 idb->idb_buf_private = NULL; 2559 } 2560 2561 if (idb->idb_buf == NULL) { 2562 IDM_CONN_LOG(CE_NOTE, 2563 "idm_so_buf_alloc: failed buffer allocation"); 2564 return (IDM_STATUS_FAIL); 2565 } 2566 2567 return (IDM_STATUS_SUCCESS); 2568 } 2569 2570 /* ARGSUSED */ 2571 static idm_status_t 2572 idm_so_buf_setup(idm_buf_t *idb) 2573 { 2574 /* Ensure bufalloc'd flag is unset */ 2575 idb->idb_bufalloc = B_FALSE; 2576 2577 return (IDM_STATUS_SUCCESS); 2578 } 2579 2580 /* ARGSUSED */ 2581 static void 2582 idm_so_buf_teardown(idm_buf_t *idb) 2583 { 2584 /* nothing to do here */ 2585 } 2586 2587 static void 2588 idm_so_buf_free(idm_buf_t *idb) 2589 { 2590 if (idb->idb_buf_private == NULL) { 2591 kmem_free(idb->idb_buf, idb->idb_buflen); 2592 } else { 2593 kmem_cache_free(idb->idb_buf_private, idb->idb_buf); 2594 } 2595 } 2596 2597 static void 2598 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb, 2599 uint32_t offset, uint32_t length) 2600 { 2601 idm_so_conn_t *so_conn = ic->ic_transport_private; 2602 idm_pdu_t tmppdu; 2603 idm_buf_t *rtt_buf; 2604 2605 ASSERT(mutex_owned(&idt->idt_mutex)); 2606 2607 /* 2608 * Allocate a buffer to represent the RTT transfer. We could further 2609 * optimize this by allocating the buffers internally from an rtt 2610 * specific buffer cache since this is socket-specific code but for 2611 * now we will keep it simple. 2612 */ 2613 rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length); 2614 if (rtt_buf == NULL) { 2615 /* 2616 * If we're in FFP then the failure was likely a resource 2617 * allocation issue and we should close the connection by 2618 * sending a CE_TRANSPORT_FAIL event. 2619 * 2620 * If we're not in FFP then idm_buf_alloc will always 2621 * fail and the state is transitioning to "complete" anyway 2622 * so we won't bother to send an event. 2623 */ 2624 mutex_enter(&ic->ic_state_mutex); 2625 if (ic->ic_ffp) 2626 idm_conn_event_locked(ic, CE_TRANSPORT_FAIL, 2627 (uintptr_t)NULL, CT_NONE); 2628 mutex_exit(&ic->ic_state_mutex); 2629 mutex_exit(&idt->idt_mutex); 2630 return; 2631 } 2632 2633 rtt_buf->idb_buf_cb = NULL; 2634 rtt_buf->idb_cb_arg = NULL; 2635 rtt_buf->idb_bufoffset = offset; 2636 rtt_buf->idb_xfer_len = length; 2637 rtt_buf->idb_ic = idt->idt_ic; 2638 rtt_buf->idb_task_binding = idt; 2639 2640 /* 2641 * The new buffer (if any) represents an additional 2642 * reference on the task 2643 */ 2644 idm_task_hold(idt); 2645 mutex_exit(&idt->idt_mutex); 2646 2647 /* 2648 * Put the idm_buf_t on the tx queue. It will be transmitted by 2649 * idm_sotx_thread. 2650 */ 2651 mutex_enter(&so_conn->ic_tx_mutex); 2652 2653 if (!so_conn->ic_tx_thread_running) { 2654 idm_buf_free(rtt_buf); 2655 mutex_exit(&so_conn->ic_tx_mutex); 2656 idm_task_rele(idt); 2657 return; 2658 } 2659 2660 /* 2661 * Build a template for the data PDU headers we will use so that 2662 * the SN values will stay consistent with other PDU's we are 2663 * transmitting like R2T and SCSI status. 2664 */ 2665 bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t)); 2666 tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl; 2667 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu, 2668 ISCSI_OP_SCSI_DATA); 2669 rtt_buf->idb_tx_thread = B_TRUE; 2670 rtt_buf->idb_in_transport = B_TRUE; 2671 list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf); 2672 cv_signal(&so_conn->ic_tx_cv); 2673 mutex_exit(&so_conn->ic_tx_mutex); 2674 } 2675 2676 static void 2677 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb) 2678 { 2679 /* 2680 * Don't worry about status -- we assume any error handling 2681 * is performed by the caller (idm_sotx_thread). 2682 */ 2683 idb->idb_in_transport = B_FALSE; 2684 idm_task_rele(idt); 2685 idm_buf_free(idb); 2686 } 2687 2688 static idm_status_t 2689 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb, 2690 uint32_t buf_region_offset, uint32_t buf_region_length) 2691 { 2692 idm_conn_t *ic; 2693 uint32_t max_dataseglen; 2694 size_t remainder, chunk; 2695 uint32_t data_offset = buf_region_offset; 2696 iscsi_data_hdr_t *bhs; 2697 idm_pdu_t *pdu; 2698 idm_status_t tx_status; 2699 2700 ASSERT(mutex_owned(&idt->idt_mutex)); 2701 2702 ic = idt->idt_ic; 2703 2704 max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen; 2705 remainder = buf_region_length; 2706 2707 while (remainder) { 2708 if (idt->idt_state != TASK_ACTIVE) { 2709 ASSERT((idt->idt_state != TASK_IDLE) && 2710 (idt->idt_state != TASK_COMPLETE)); 2711 return (IDM_STATUS_ABORTED); 2712 } 2713 2714 /* check to see if we need to chunk the data */ 2715 if (remainder > max_dataseglen) { 2716 chunk = max_dataseglen; 2717 } else { 2718 chunk = remainder; 2719 } 2720 2721 /* Data PDU headers will always be sizeof (iscsi_hdr_t) */ 2722 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP); 2723 pdu->isp_ic = ic; 2724 pdu->isp_flags = 0; /* initialize isp_flags */ 2725 2726 /* 2727 * We've already built a build a header template 2728 * to use during the transfer. Use this template so that 2729 * the SN values stay consistent with any unrelated PDU's 2730 * being transmitted. 2731 */ 2732 bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr, 2733 sizeof (iscsi_hdr_t)); 2734 2735 /* 2736 * Set DataSN, data offset, and flags in BHS 2737 * For the prototype build, A = 0, S = 0, U = 0 2738 */ 2739 bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr); 2740 2741 bhs->datasn = htonl(idt->idt_exp_datasn++); 2742 2743 hton24(bhs->dlength, chunk); 2744 bhs->offset = htonl(idb->idb_bufoffset + data_offset); 2745 2746 /* setup data */ 2747 pdu->isp_data = (uint8_t *)idb->idb_buf + data_offset; 2748 pdu->isp_datalen = (uint_t)chunk; 2749 2750 if (chunk == remainder) { 2751 bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */ 2752 /* Piggyback the status with the last data PDU */ 2753 if (idt->idt_flags & IDM_TASK_PHASECOLLAPSE_REQ) { 2754 pdu->isp_flags |= IDM_PDU_SET_STATSN | 2755 IDM_PDU_ADVANCE_STATSN; 2756 (*idt->idt_ic->ic_conn_ops.icb_update_statsn) 2757 (idt, pdu); 2758 idt->idt_flags |= 2759 IDM_TASK_PHASECOLLAPSE_SUCCESS; 2760 2761 } 2762 } 2763 2764 remainder -= chunk; 2765 data_offset += chunk; 2766 2767 /* Instrument the data-send DTrace probe. */ 2768 if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) { 2769 DTRACE_ISCSI_2(data__send, 2770 idm_conn_t *, idt->idt_ic, 2771 iscsi_data_rsp_hdr_t *, 2772 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr); 2773 } 2774 2775 /* 2776 * Now that we're done working with idt_exp_datasn, 2777 * idt->idt_state and idb->idb_bufoffset we can release 2778 * the task lock -- don't want to hold it across the 2779 * call to idm_i_so_tx since we could block. 2780 */ 2781 mutex_exit(&idt->idt_mutex); 2782 2783 /* 2784 * Transmit the PDU. Call the internal routine directly 2785 * as there is already implicit ordering. 2786 */ 2787 if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) { 2788 mutex_enter(&idt->idt_mutex); 2789 return (tx_status); 2790 } 2791 2792 mutex_enter(&idt->idt_mutex); 2793 idt->idt_tx_bytes += chunk; 2794 } 2795 2796 return (IDM_STATUS_SUCCESS); 2797 } 2798 2799 /* 2800 * TX PDU cache 2801 */ 2802 /* ARGSUSED */ 2803 int 2804 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags) 2805 { 2806 idm_pdu_t *pdu = hdl; 2807 2808 bzero(pdu, sizeof (idm_pdu_t)); 2809 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */ 2810 pdu->isp_hdrlen = sizeof (iscsi_hdr_t); 2811 pdu->isp_callback = idm_sotx_cache_pdu_cb; 2812 pdu->isp_magic = IDM_PDU_MAGIC; 2813 bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t)); 2814 2815 return (0); 2816 } 2817 2818 /* ARGSUSED */ 2819 void 2820 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status) 2821 { 2822 /* reset values between use */ 2823 pdu->isp_datalen = 0; 2824 2825 kmem_cache_free(idm.idm_sotx_pdu_cache, pdu); 2826 } 2827 2828 /* 2829 * RX PDU cache 2830 */ 2831 /* ARGSUSED */ 2832 int 2833 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags) 2834 { 2835 idm_pdu_t *pdu = hdl; 2836 2837 bzero(pdu, sizeof (idm_pdu_t)); 2838 pdu->isp_magic = IDM_PDU_MAGIC; 2839 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */ 2840 pdu->isp_callback = idm_sorx_cache_pdu_cb; 2841 2842 return (0); 2843 } 2844 2845 /* ARGSUSED */ 2846 static void 2847 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status) 2848 { 2849 pdu->isp_iovlen = 0; 2850 pdu->isp_sorx_buf = 0; 2851 kmem_cache_free(idm.idm_sorx_pdu_cache, pdu); 2852 } 2853 2854 static void 2855 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status) 2856 { 2857 /* 2858 * We had to modify our cached RX PDU with a longer header buffer 2859 * and/or a longer data buffer. Release the new buffers and fix 2860 * the fields back to what we would expect for a cached RX PDU. 2861 */ 2862 if (pdu->isp_flags & IDM_PDU_ADDL_HDR) { 2863 kmem_free(pdu->isp_hdr, pdu->isp_hdrlen); 2864 } 2865 if (pdu->isp_flags & IDM_PDU_ADDL_DATA) { 2866 kmem_free(pdu->isp_data, pdu->isp_datalen); 2867 } 2868 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); 2869 pdu->isp_hdrlen = sizeof (iscsi_hdr_t); 2870 pdu->isp_data = NULL; 2871 pdu->isp_datalen = 0; 2872 pdu->isp_sorx_buf = 0; 2873 pdu->isp_callback = idm_sorx_cache_pdu_cb; 2874 idm_sorx_cache_pdu_cb(pdu, status); 2875 } 2876 2877 /* 2878 * This thread is only active when I/O is queued for transmit 2879 * because the socket is busy. 2880 */ 2881 void 2882 idm_sotx_thread(void *arg) 2883 { 2884 idm_conn_t *ic = arg; 2885 idm_tx_obj_t *object, *next; 2886 idm_so_conn_t *so_conn; 2887 idm_status_t status = IDM_STATUS_SUCCESS; 2888 2889 idm_conn_hold(ic); 2890 2891 mutex_enter(&ic->ic_mutex); 2892 so_conn = ic->ic_transport_private; 2893 so_conn->ic_tx_thread_running = B_TRUE; 2894 so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did; 2895 cv_signal(&ic->ic_cv); 2896 mutex_exit(&ic->ic_mutex); 2897 2898 mutex_enter(&so_conn->ic_tx_mutex); 2899 2900 while (so_conn->ic_tx_thread_running) { 2901 while (list_is_empty(&so_conn->ic_tx_list)) { 2902 DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic); 2903 cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex); 2904 DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic); 2905 2906 if (!so_conn->ic_tx_thread_running) { 2907 goto tx_bail; 2908 } 2909 } 2910 2911 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list); 2912 list_remove(&so_conn->ic_tx_list, object); 2913 mutex_exit(&so_conn->ic_tx_mutex); 2914 2915 switch (object->idm_tx_obj_magic) { 2916 case IDM_PDU_MAGIC: { 2917 idm_pdu_t *pdu = (idm_pdu_t *)object; 2918 DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic, 2919 idm_pdu_t *, (idm_pdu_t *)object); 2920 2921 if (pdu->isp_flags & IDM_PDU_SET_STATSN) { 2922 /* No IDM task */ 2923 (ic->ic_conn_ops.icb_update_statsn)(NULL, pdu); 2924 } 2925 status = idm_i_so_tx((idm_pdu_t *)object); 2926 break; 2927 } 2928 case IDM_BUF_MAGIC: { 2929 idm_buf_t *idb = (idm_buf_t *)object; 2930 idm_task_t *idt = idb->idb_task_binding; 2931 2932 DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic, 2933 idm_buf_t *, idb); 2934 2935 mutex_enter(&idt->idt_mutex); 2936 status = idm_so_send_buf_region(idt, 2937 idb, 0, idb->idb_xfer_len); 2938 2939 /* 2940 * TX thread owns the buffer so we expect it to 2941 * be "in transport" 2942 */ 2943 ASSERT(idb->idb_in_transport); 2944 if (IDM_CONN_ISTGT(ic)) { 2945 /* 2946 * idm_buf_tx_to_ini_done releases 2947 * idt->idt_mutex 2948 */ 2949 DTRACE_ISCSI_8(xfer__done, 2950 idm_conn_t *, idt->idt_ic, 2951 uintptr_t, idb->idb_buf, 2952 uint32_t, idb->idb_bufoffset, 2953 uint64_t, 0, uint32_t, 0, uint32_t, 0, 2954 uint32_t, idb->idb_xfer_len, 2955 int, XFER_BUF_TX_TO_INI); 2956 idm_buf_tx_to_ini_done(idt, idb, status); 2957 } else { 2958 idm_so_send_rtt_data_done(idt, idb); 2959 mutex_exit(&idt->idt_mutex); 2960 } 2961 break; 2962 } 2963 2964 default: 2965 IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic " 2966 "(0x%08x)", object->idm_tx_obj_magic); 2967 status = IDM_STATUS_FAIL; 2968 } 2969 2970 mutex_enter(&so_conn->ic_tx_mutex); 2971 2972 if (status != IDM_STATUS_SUCCESS) { 2973 so_conn->ic_tx_thread_running = B_FALSE; 2974 idm_conn_event(ic, CE_TRANSPORT_FAIL, status); 2975 } 2976 } 2977 2978 /* 2979 * Before we leave, we need to abort every item remaining in the 2980 * TX list. 2981 */ 2982 2983 tx_bail: 2984 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list); 2985 2986 while (object != NULL) { 2987 next = list_next(&so_conn->ic_tx_list, object); 2988 2989 list_remove(&so_conn->ic_tx_list, object); 2990 switch (object->idm_tx_obj_magic) { 2991 case IDM_PDU_MAGIC: 2992 idm_pdu_complete((idm_pdu_t *)object, 2993 IDM_STATUS_ABORTED); 2994 break; 2995 2996 case IDM_BUF_MAGIC: { 2997 idm_buf_t *idb = (idm_buf_t *)object; 2998 idm_task_t *idt = idb->idb_task_binding; 2999 mutex_exit(&so_conn->ic_tx_mutex); 3000 mutex_enter(&idt->idt_mutex); 3001 /* 3002 * TX thread owns the buffer so we expect it to 3003 * be "in transport" 3004 */ 3005 ASSERT(idb->idb_in_transport); 3006 if (IDM_CONN_ISTGT(ic)) { 3007 /* 3008 * idm_buf_tx_to_ini_done releases 3009 * idt->idt_mutex 3010 */ 3011 DTRACE_ISCSI_8(xfer__done, 3012 idm_conn_t *, idt->idt_ic, 3013 uintptr_t, idb->idb_buf, 3014 uint32_t, idb->idb_bufoffset, 3015 uint64_t, 0, uint32_t, 0, uint32_t, 0, 3016 uint32_t, idb->idb_xfer_len, 3017 int, XFER_BUF_TX_TO_INI); 3018 idm_buf_tx_to_ini_done(idt, idb, 3019 IDM_STATUS_ABORTED); 3020 } else { 3021 idm_so_send_rtt_data_done(idt, idb); 3022 mutex_exit(&idt->idt_mutex); 3023 } 3024 mutex_enter(&so_conn->ic_tx_mutex); 3025 break; 3026 } 3027 default: 3028 IDM_CONN_LOG(CE_WARN, 3029 "idm_sotx_thread: Unexpected magic " 3030 "(0x%08x)", object->idm_tx_obj_magic); 3031 } 3032 3033 object = next; 3034 } 3035 3036 mutex_exit(&so_conn->ic_tx_mutex); 3037 idm_conn_rele(ic); 3038 thread_exit(); 3039 /*NOTREACHED*/ 3040 } 3041 3042 static void 3043 idm_so_socket_set_nonblock(struct sonode *node) 3044 { 3045 (void) VOP_SETFL(node->so_vnode, node->so_flag, 3046 (node->so_state | FNONBLOCK), CRED(), NULL); 3047 } 3048 3049 static void 3050 idm_so_socket_set_block(struct sonode *node) 3051 { 3052 (void) VOP_SETFL(node->so_vnode, node->so_flag, 3053 (node->so_state & (~FNONBLOCK)), CRED(), NULL); 3054 } 3055 3056 3057 /* 3058 * Called by kernel sockets when the connection has been accepted or 3059 * rejected. In early volo, a "disconnect" callback was sent instead of 3060 * "connectfailed", so we check for both. 3061 */ 3062 /* ARGSUSED */ 3063 void 3064 idm_so_timed_socket_connect_cb(ksocket_t ks, 3065 ksocket_callback_event_t ev, void *arg, uintptr_t info) 3066 { 3067 idm_so_timed_socket_t *itp = arg; 3068 ASSERT(itp != NULL); 3069 ASSERT(ev == KSOCKET_EV_CONNECTED || 3070 ev == KSOCKET_EV_CONNECTFAILED || 3071 ev == KSOCKET_EV_DISCONNECTED); 3072 3073 mutex_enter(&idm_so_timed_socket_mutex); 3074 itp->it_callback_called = B_TRUE; 3075 if (ev == KSOCKET_EV_CONNECTED) { 3076 itp->it_socket_error_code = 0; 3077 } else { 3078 /* Make sure the error code is non-zero on error */ 3079 if (info == 0) 3080 info = ECONNRESET; 3081 itp->it_socket_error_code = (int)info; 3082 } 3083 cv_signal(&itp->it_cv); 3084 mutex_exit(&idm_so_timed_socket_mutex); 3085 } 3086 3087 int 3088 idm_so_timed_socket_connect(ksocket_t ks, 3089 struct sockaddr_storage *sa, int sa_sz, int login_max_usec) 3090 { 3091 clock_t conn_login_max; 3092 int rc, nonblocking, rval; 3093 idm_so_timed_socket_t it; 3094 ksocket_callbacks_t ks_cb; 3095 3096 conn_login_max = ddi_get_lbolt() + drv_usectohz(login_max_usec); 3097 3098 /* 3099 * Set to non-block socket mode, with callback on connect 3100 * Early volo used "disconnected" instead of "connectfailed", 3101 * so set callback to look for both. 3102 */ 3103 bzero(&it, sizeof (it)); 3104 ks_cb.ksock_cb_flags = KSOCKET_CB_CONNECTED | 3105 KSOCKET_CB_CONNECTFAILED | KSOCKET_CB_DISCONNECTED; 3106 ks_cb.ksock_cb_connected = idm_so_timed_socket_connect_cb; 3107 ks_cb.ksock_cb_connectfailed = idm_so_timed_socket_connect_cb; 3108 ks_cb.ksock_cb_disconnected = idm_so_timed_socket_connect_cb; 3109 cv_init(&it.it_cv, NULL, CV_DEFAULT, NULL); 3110 rc = ksocket_setcallbacks(ks, &ks_cb, &it, CRED()); 3111 if (rc != 0) 3112 return (rc); 3113 3114 /* Set to non-blocking mode */ 3115 nonblocking = 1; 3116 rc = ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval, 3117 CRED()); 3118 if (rc != 0) 3119 goto cleanup; 3120 3121 bzero(&it, sizeof (it)); 3122 for (;;) { 3123 /* 3124 * Warning -- in a loopback scenario, the call to 3125 * the connect_cb can occur inside the call to 3126 * ksocket_connect. Do not hold the mutex around the 3127 * call to ksocket_connect. 3128 */ 3129 rc = ksocket_connect(ks, (struct sockaddr *)sa, sa_sz, CRED()); 3130 if (rc == 0 || rc == EISCONN) { 3131 /* socket success or already success */ 3132 rc = 0; 3133 break; 3134 } 3135 if ((rc != EINPROGRESS) && (rc != EALREADY)) { 3136 break; 3137 } 3138 3139 /* TCP connect still in progress. See if out of time. */ 3140 if (ddi_get_lbolt() > conn_login_max) { 3141 /* 3142 * Connection retry timeout, 3143 * failed connect to target. 3144 */ 3145 rc = ETIMEDOUT; 3146 break; 3147 } 3148 3149 /* 3150 * TCP connect still in progress. Sleep until callback. 3151 * Do NOT go to sleep if the callback already occurred! 3152 */ 3153 mutex_enter(&idm_so_timed_socket_mutex); 3154 if (!it.it_callback_called) { 3155 (void) cv_timedwait(&it.it_cv, 3156 &idm_so_timed_socket_mutex, conn_login_max); 3157 } 3158 if (it.it_callback_called) { 3159 rc = it.it_socket_error_code; 3160 mutex_exit(&idm_so_timed_socket_mutex); 3161 break; 3162 } 3163 /* If timer expires, go call ksocket_connect one last time. */ 3164 mutex_exit(&idm_so_timed_socket_mutex); 3165 } 3166 3167 /* resume blocking mode */ 3168 nonblocking = 0; 3169 (void) ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval, 3170 CRED()); 3171 cleanup: 3172 (void) ksocket_setcallbacks(ks, NULL, NULL, CRED()); 3173 cv_destroy(&it.it_cv); 3174 if (rc != 0) { 3175 idm_soshutdown(ks); 3176 } 3177 return (rc); 3178 } 3179 3180 3181 void 3182 idm_addr_to_sa(idm_addr_t *dportal, struct sockaddr_storage *sa) 3183 { 3184 int dp_addr_size; 3185 struct sockaddr_in *sin; 3186 struct sockaddr_in6 *sin6; 3187 3188 /* Build sockaddr_storage for this portal (idm_addr_t) */ 3189 bzero(sa, sizeof (*sa)); 3190 dp_addr_size = dportal->a_addr.i_insize; 3191 if (dp_addr_size == sizeof (struct in_addr)) { 3192 /* IPv4 */ 3193 sa->ss_family = AF_INET; 3194 sin = (struct sockaddr_in *)sa; 3195 sin->sin_port = htons(dportal->a_port); 3196 bcopy(&dportal->a_addr.i_addr.in4, 3197 &sin->sin_addr, sizeof (struct in_addr)); 3198 } else if (dp_addr_size == sizeof (struct in6_addr)) { 3199 /* IPv6 */ 3200 sa->ss_family = AF_INET6; 3201 sin6 = (struct sockaddr_in6 *)sa; 3202 sin6->sin6_port = htons(dportal->a_port); 3203 bcopy(&dportal->a_addr.i_addr.in6, 3204 &sin6->sin6_addr, sizeof (struct in6_addr)); 3205 } else { 3206 ASSERT(0); 3207 } 3208 } 3209 3210 3211 /* 3212 * return a human-readable form of a sockaddr_storage, in the form 3213 * [ip-address]:port. This is used in calls to logging functions. 3214 * If several calls to idm_sa_ntop are made within the same invocation 3215 * of a logging function, then each one needs its own buf. 3216 */ 3217 const char * 3218 idm_sa_ntop(const struct sockaddr_storage *sa, 3219 char *buf, size_t size) 3220 { 3221 static const char bogus_ip[] = "[0].-1"; 3222 char tmp[INET6_ADDRSTRLEN]; 3223 3224 switch (sa->ss_family) { 3225 case AF_INET6: { 3226 const struct sockaddr_in6 *in6 = 3227 (const struct sockaddr_in6 *) sa; 3228 3229 (void) inet_ntop(in6->sin6_family, &in6->sin6_addr, tmp, 3230 sizeof (tmp)); 3231 if (strlen(tmp) + sizeof ("[].65535") > size) 3232 goto err; 3233 /* struct sockaddr_storage gets port info from v4 loc */ 3234 (void) snprintf(buf, size, "[%s].%u", tmp, 3235 ntohs(in6->sin6_port)); 3236 return (buf); 3237 } 3238 case AF_INET: { 3239 const struct sockaddr_in *in = (const struct sockaddr_in *) sa; 3240 3241 (void) inet_ntop(in->sin_family, &in->sin_addr, tmp, 3242 sizeof (tmp)); 3243 if (strlen(tmp) + sizeof ("[].65535") > size) 3244 goto err; 3245 (void) snprintf(buf, size, "[%s].%u", tmp, 3246 ntohs(in->sin_port)); 3247 return (buf); 3248 } 3249 default: 3250 break; 3251 } 3252 err: 3253 (void) snprintf(buf, size, "%s", bogus_ip); 3254 return (buf); 3255 } 3256