1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2013 by Delphix. All rights reserved. 27 * Copyright (c) 2017, Joyent, Inc. All rights reserved. 28 */ 29 30 #include <sys/conf.h> 31 #include <sys/stat.h> 32 #include <sys/file.h> 33 #include <sys/ddi.h> 34 #include <sys/sunddi.h> 35 #include <sys/modctl.h> 36 #include <sys/priv.h> 37 #include <sys/cpuvar.h> 38 #include <sys/socket.h> 39 #include <sys/strsubr.h> 40 #include <sys/sysmacros.h> 41 #include <sys/sdt.h> 42 #include <netinet/tcp.h> 43 #include <inet/tcp.h> 44 #include <sys/socketvar.h> 45 #include <sys/pathname.h> 46 #include <sys/fs/snode.h> 47 #include <sys/fs/dv_node.h> 48 #include <sys/vnode.h> 49 #include <netinet/in.h> 50 #include <net/if.h> 51 #include <sys/sockio.h> 52 #include <sys/ksocket.h> 53 #include <sys/filio.h> /* FIONBIO */ 54 #include <sys/iscsi_protocol.h> 55 #include <sys/idm/idm.h> 56 #include <sys/idm/idm_so.h> 57 #include <sys/idm/idm_text.h> 58 59 #define IN_PROGRESS_DELAY 1 60 61 /* 62 * in6addr_any is currently all zeroes, but use the macro in case this 63 * ever changes. 64 */ 65 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; 66 67 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status); 68 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status); 69 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status); 70 71 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so); 72 static void idm_so_conn_destroy_common(idm_conn_t *ic); 73 static void idm_so_conn_connect_common(idm_conn_t *ic); 74 75 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc, 76 boolean_t boot_conn); 77 static void idm_set_postconnect_options(ksocket_t so); 78 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu); 79 80 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu); 81 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, 82 idm_buf_t *idb, uint32_t offset, uint32_t length); 83 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb); 84 static idm_status_t idm_so_send_buf_region(idm_task_t *idt, 85 idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length); 86 87 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, 88 uint32_t ro, uint32_t dlength); 89 90 static idm_status_t idm_so_handle_digest(idm_conn_t *it, 91 nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx); 92 93 static void idm_so_socket_set_nonblock(struct sonode *node); 94 static void idm_so_socket_set_block(struct sonode *node); 95 96 /* 97 * Transport ops prototypes 98 */ 99 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu); 100 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb); 101 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb); 102 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu); 103 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu); 104 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu); 105 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt); 106 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it, 107 nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl); 108 static void idm_so_notice_key_values(idm_conn_t *it, 109 nvlist_t *negotiated_nvl); 110 static kv_status_t idm_so_declare_key_values(idm_conn_t *it, 111 nvlist_t *config_nvl, nvlist_t *outgoing_nvl); 112 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic, 113 idm_transport_caps_t *caps); 114 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen); 115 static void idm_so_buf_free(idm_buf_t *idb); 116 static idm_status_t idm_so_buf_setup(idm_buf_t *idb); 117 static void idm_so_buf_teardown(idm_buf_t *idb); 118 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is); 119 static void idm_so_tgt_svc_destroy(idm_svc_t *is); 120 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is); 121 static void idm_so_tgt_svc_offline(idm_svc_t *is); 122 static void idm_so_tgt_conn_destroy(idm_conn_t *ic); 123 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic); 124 static void idm_so_conn_disconnect(idm_conn_t *ic); 125 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic); 126 static void idm_so_ini_conn_destroy(idm_conn_t *ic); 127 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic); 128 129 /* 130 * IDM Native Sockets transport operations 131 */ 132 static 133 idm_transport_ops_t idm_so_transport_ops = { 134 idm_so_tx, /* it_tx_pdu */ 135 idm_so_buf_tx_to_ini, /* it_buf_tx_to_ini */ 136 idm_so_buf_rx_from_ini, /* it_buf_rx_from_ini */ 137 idm_so_rx_datain, /* it_rx_datain */ 138 idm_so_rx_rtt, /* it_rx_rtt */ 139 idm_so_rx_dataout, /* it_rx_dataout */ 140 NULL, /* it_alloc_conn_rsrc */ 141 NULL, /* it_free_conn_rsrc */ 142 NULL, /* it_tgt_enable_datamover */ 143 NULL, /* it_ini_enable_datamover */ 144 NULL, /* it_conn_terminate */ 145 idm_so_free_task_rsrc, /* it_free_task_rsrc */ 146 idm_so_negotiate_key_values, /* it_negotiate_key_values */ 147 idm_so_notice_key_values, /* it_notice_key_values */ 148 idm_so_conn_is_capable, /* it_conn_is_capable */ 149 idm_so_buf_alloc, /* it_buf_alloc */ 150 idm_so_buf_free, /* it_buf_free */ 151 idm_so_buf_setup, /* it_buf_setup */ 152 idm_so_buf_teardown, /* it_buf_teardown */ 153 idm_so_tgt_svc_create, /* it_tgt_svc_create */ 154 idm_so_tgt_svc_destroy, /* it_tgt_svc_destroy */ 155 idm_so_tgt_svc_online, /* it_tgt_svc_online */ 156 idm_so_tgt_svc_offline, /* it_tgt_svc_offline */ 157 idm_so_tgt_conn_destroy, /* it_tgt_conn_destroy */ 158 idm_so_tgt_conn_connect, /* it_tgt_conn_connect */ 159 idm_so_conn_disconnect, /* it_tgt_conn_disconnect */ 160 idm_so_ini_conn_create, /* it_ini_conn_create */ 161 idm_so_ini_conn_destroy, /* it_ini_conn_destroy */ 162 idm_so_ini_conn_connect, /* it_ini_conn_connect */ 163 idm_so_conn_disconnect, /* it_ini_conn_disconnect */ 164 idm_so_declare_key_values /* it_declare_key_values */ 165 }; 166 167 kmutex_t idm_so_timed_socket_mutex; 168 169 int32_t idm_so_sndbuf = IDM_SNDBUF_SIZE; 170 int32_t idm_so_rcvbuf = IDM_RCVBUF_SIZE; 171 172 /* 173 * idm_so_init() 174 * Sockets transport initialization 175 */ 176 void 177 idm_so_init(idm_transport_t *it) 178 { 179 /* Cache for IDM Data and R2T Transmit PDU's */ 180 idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache", 181 sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8, 182 &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP); 183 184 /* Cache for IDM Receive PDU's */ 185 idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache", 186 sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8, 187 &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP); 188 189 /* 128k buffer cache */ 190 idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache", 191 IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP); 192 193 /* Set the sockets transport ops */ 194 it->it_ops = &idm_so_transport_ops; 195 196 mutex_init(&idm_so_timed_socket_mutex, NULL, MUTEX_DEFAULT, NULL); 197 198 } 199 200 /* 201 * idm_so_fini() 202 * Sockets transport teardown 203 */ 204 void 205 idm_so_fini(void) 206 { 207 kmem_cache_destroy(idm.idm_so_128k_buf_cache); 208 kmem_cache_destroy(idm.idm_sotx_pdu_cache); 209 kmem_cache_destroy(idm.idm_sorx_pdu_cache); 210 mutex_destroy(&idm_so_timed_socket_mutex); 211 } 212 213 ksocket_t 214 idm_socreate(int domain, int type, int protocol) 215 { 216 ksocket_t ks; 217 218 if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP, 219 CRED())) { 220 return (ks); 221 } else { 222 return (NULL); 223 } 224 } 225 226 /* 227 * idm_soshutdown will disconnect the socket and prevent subsequent PDU 228 * reception and transmission. The sonode still exists but its state 229 * gets modified to indicate it is no longer connected. Calls to 230 * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used 231 * regain control of a thread stuck in idm_sorecv. 232 */ 233 void 234 idm_soshutdown(ksocket_t so) 235 { 236 (void) ksocket_shutdown(so, SHUT_RDWR, CRED()); 237 } 238 239 /* 240 * idm_sodestroy releases all resources associated with a socket previously 241 * created with idm_socreate. The socket must be shutdown using 242 * idm_soshutdown before the socket is destroyed with idm_sodestroy, 243 * otherwise undefined behavior will result. 244 */ 245 void 246 idm_sodestroy(ksocket_t ks) 247 { 248 (void) ksocket_close(ks, CRED()); 249 } 250 251 /* 252 * Function to compare two addresses in sockaddr_storage format 253 */ 254 255 int 256 idm_ss_compare(const struct sockaddr_storage *cmp_ss1, 257 const struct sockaddr_storage *cmp_ss2, 258 boolean_t v4_mapped_as_v4, 259 boolean_t compare_ports) 260 { 261 struct sockaddr_storage mapped_v4_ss1, mapped_v4_ss2; 262 const struct sockaddr_storage *ss1, *ss2; 263 struct in_addr *in1, *in2; 264 struct in6_addr *in61, *in62; 265 int i; 266 267 /* 268 * Normalize V4-mapped IPv6 addresses into V4 format if 269 * v4_mapped_as_v4 is B_TRUE. 270 */ 271 ss1 = cmp_ss1; 272 ss2 = cmp_ss2; 273 if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) { 274 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr; 275 if (IN6_IS_ADDR_V4MAPPED(in61)) { 276 bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1)); 277 mapped_v4_ss1.ss_family = AF_INET; 278 ((struct sockaddr_in *)&mapped_v4_ss1)->sin_port = 279 ((struct sockaddr_in *)ss1)->sin_port; 280 IN6_V4MAPPED_TO_INADDR(in61, 281 &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr); 282 ss1 = &mapped_v4_ss1; 283 } 284 } 285 ss2 = cmp_ss2; 286 if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) { 287 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr; 288 if (IN6_IS_ADDR_V4MAPPED(in62)) { 289 bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2)); 290 mapped_v4_ss2.ss_family = AF_INET; 291 ((struct sockaddr_in *)&mapped_v4_ss2)->sin_port = 292 ((struct sockaddr_in *)ss2)->sin_port; 293 IN6_V4MAPPED_TO_INADDR(in62, 294 &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr); 295 ss2 = &mapped_v4_ss2; 296 } 297 } 298 299 /* 300 * Compare ports, then address family, then ip address 301 */ 302 if (compare_ports && 303 (((struct sockaddr_in *)ss1)->sin_port != 304 ((struct sockaddr_in *)ss2)->sin_port)) { 305 if (((struct sockaddr_in *)ss1)->sin_port > 306 ((struct sockaddr_in *)ss2)->sin_port) 307 return (1); 308 else 309 return (-1); 310 } 311 312 /* 313 * ports are the same 314 */ 315 if (ss1->ss_family != ss2->ss_family) { 316 if (ss1->ss_family == AF_INET) 317 return (1); 318 else 319 return (-1); 320 } 321 322 /* 323 * address families are the same 324 */ 325 if (ss1->ss_family == AF_INET) { 326 in1 = &((struct sockaddr_in *)ss1)->sin_addr; 327 in2 = &((struct sockaddr_in *)ss2)->sin_addr; 328 329 if (in1->s_addr > in2->s_addr) 330 return (1); 331 else if (in1->s_addr < in2->s_addr) 332 return (-1); 333 else 334 return (0); 335 } else if (ss1->ss_family == AF_INET6) { 336 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr; 337 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr; 338 339 for (i = 0; i < 4; i++) { 340 if (in61->s6_addr32[i] > in62->s6_addr32[i]) 341 return (1); 342 else if (in61->s6_addr32[i] < in62->s6_addr32[i]) 343 return (-1); 344 } 345 return (0); 346 } 347 348 return (1); 349 } 350 351 /* 352 * IP address filter functions to flag addresses that should not 353 * go out to initiators through discovery. 354 */ 355 static boolean_t 356 idm_v4_addr_okay(struct in_addr *in_addr) 357 { 358 in_addr_t addr = ntohl(in_addr->s_addr); 359 360 if ((INADDR_NONE == addr) || 361 (IN_MULTICAST(addr)) || 362 ((addr >> IN_CLASSA_NSHIFT) == 0) || 363 ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) { 364 return (B_FALSE); 365 } 366 return (B_TRUE); 367 } 368 369 static boolean_t 370 idm_v6_addr_okay(struct in6_addr *addr6) 371 { 372 373 if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) || 374 (IN6_IS_ADDR_LOOPBACK(addr6)) || 375 (IN6_IS_ADDR_MULTICAST(addr6)) || 376 (IN6_IS_ADDR_V4MAPPED(addr6)) || 377 (IN6_IS_ADDR_V4COMPAT(addr6)) || 378 (IN6_IS_ADDR_LINKLOCAL(addr6))) { 379 return (B_FALSE); 380 } 381 return (B_TRUE); 382 } 383 384 /* 385 * idm_get_ipaddr will retrieve a list of IP Addresses which the host is 386 * configured with by sending down a sequence of kernel ioctl to IP STREAMS. 387 */ 388 int 389 idm_get_ipaddr(idm_addr_list_t **ipaddr_p) 390 { 391 ksocket_t so4, so6; 392 struct lifnum lifn; 393 struct lifconf lifc; 394 struct lifreq *lp; 395 int rval; 396 int numifs; 397 int bufsize; 398 void *buf; 399 int i, j, n, rc; 400 struct sockaddr_storage ss; 401 struct sockaddr_in *sin; 402 struct sockaddr_in6 *sin6; 403 idm_addr_t *ip; 404 idm_addr_list_t *ipaddr = NULL; 405 int size_ipaddr; 406 407 *ipaddr_p = NULL; 408 size_ipaddr = 0; 409 buf = NULL; 410 411 /* create an ipv4 and ipv6 UDP socket */ 412 if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL) 413 return (0); 414 if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) { 415 idm_sodestroy(so6); 416 return (0); 417 } 418 419 420 retry_count: 421 /* snapshot the current number of interfaces */ 422 lifn.lifn_family = PF_UNSPEC; 423 lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES; 424 lifn.lifn_count = 0; 425 /* use vp6 for ioctls with unspecified families by default */ 426 if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED()) 427 != 0) { 428 goto cleanup; 429 } 430 431 numifs = lifn.lifn_count; 432 if (numifs <= 0) { 433 goto cleanup; 434 } 435 436 /* allocate extra room in case more interfaces appear */ 437 numifs += 10; 438 439 /* get the interface names and ip addresses */ 440 bufsize = numifs * sizeof (struct lifreq); 441 buf = kmem_alloc(bufsize, KM_SLEEP); 442 443 lifc.lifc_family = AF_UNSPEC; 444 lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES; 445 lifc.lifc_len = bufsize; 446 lifc.lifc_buf = buf; 447 rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED()); 448 if (rc != 0) { 449 goto cleanup; 450 } 451 /* if our extra room is used up, try again */ 452 if (bufsize <= lifc.lifc_len) { 453 kmem_free(buf, bufsize); 454 buf = NULL; 455 goto retry_count; 456 } 457 /* calc actual number of ifconfs */ 458 n = lifc.lifc_len / sizeof (struct lifreq); 459 460 /* get ip address */ 461 if (n > 0) { 462 size_ipaddr = sizeof (idm_addr_list_t) + 463 (n - 1) * sizeof (idm_addr_t); 464 ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP); 465 } else { 466 goto cleanup; 467 } 468 469 /* 470 * Examine the array of interfaces and filter uninteresting ones 471 */ 472 for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) { 473 474 /* 475 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive 476 */ 477 ss = lp->lifr_addr; 478 /* 479 * fetch the flags using the socket of the correct family 480 */ 481 switch (ss.ss_family) { 482 case AF_INET: 483 rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp, 484 &rval, CRED()); 485 break; 486 case AF_INET6: 487 rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp, 488 &rval, CRED()); 489 break; 490 default: 491 continue; 492 } 493 if (rc == 0) { 494 /* 495 * If we got the flags, skip uninteresting 496 * interfaces based on flags 497 */ 498 if ((lp->lifr_flags & IFF_UP) != IFF_UP) 499 continue; 500 if (lp->lifr_flags & 501 (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED)) 502 continue; 503 } 504 505 /* save ip address */ 506 ip = &ipaddr->al_addrs[j]; 507 switch (ss.ss_family) { 508 case AF_INET: 509 sin = (struct sockaddr_in *)&ss; 510 if (!idm_v4_addr_okay(&sin->sin_addr)) 511 continue; 512 ip->a_addr.i_addr.in4 = sin->sin_addr; 513 ip->a_addr.i_insize = sizeof (struct in_addr); 514 break; 515 case AF_INET6: 516 sin6 = (struct sockaddr_in6 *)&ss; 517 if (!idm_v6_addr_okay(&sin6->sin6_addr)) 518 continue; 519 ip->a_addr.i_addr.in6 = sin6->sin6_addr; 520 ip->a_addr.i_insize = sizeof (struct in6_addr); 521 break; 522 default: 523 continue; 524 } 525 j++; 526 } 527 528 if (j == 0) { 529 /* no valid ifaddr */ 530 kmem_free(ipaddr, size_ipaddr); 531 size_ipaddr = 0; 532 ipaddr = NULL; 533 } else { 534 ipaddr->al_out_cnt = j; 535 } 536 537 538 cleanup: 539 idm_sodestroy(so6); 540 idm_sodestroy(so4); 541 542 if (buf != NULL) 543 kmem_free(buf, bufsize); 544 545 *ipaddr_p = ipaddr; 546 return (size_ipaddr); 547 } 548 549 int 550 idm_sorecv(ksocket_t so, void *msg, size_t len) 551 { 552 iovec_t iov; 553 554 ASSERT(so != NULL); 555 ASSERT(len != 0); 556 557 /* 558 * Fill in iovec and receive data 559 */ 560 iov.iov_base = msg; 561 iov.iov_len = len; 562 563 return (idm_iov_sorecv(so, &iov, 1, len)); 564 } 565 566 /* 567 * idm_sosendto - Sends a buffered data on a non-connected socket. 568 * 569 * This function puts the data provided on the wire by calling sosendmsg. 570 * It will return only when all the data has been sent or if an error 571 * occurs. 572 * 573 * Returns 0 for success, the socket errno value if sosendmsg fails, and 574 * -1 if sosendmsg returns success but uio_resid != 0 575 */ 576 int 577 idm_sosendto(ksocket_t so, void *buff, size_t len, 578 struct sockaddr *name, socklen_t namelen) 579 { 580 struct msghdr msg; 581 struct iovec iov[1]; 582 int error; 583 size_t sent = 0; 584 585 iov[0].iov_base = buff; 586 iov[0].iov_len = len; 587 588 /* Initialization of the message header. */ 589 bzero(&msg, sizeof (msg)); 590 msg.msg_iov = iov; 591 msg.msg_iovlen = 1; 592 msg.msg_name = name; 593 msg.msg_namelen = namelen; 594 595 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) { 596 /* Data sent */ 597 if (sent == len) { 598 /* All data sent. Success. */ 599 return (0); 600 } else { 601 /* Not all data was sent. Failure */ 602 return (-1); 603 } 604 } 605 606 /* Send failed */ 607 return (error); 608 } 609 610 /* 611 * idm_iov_sosend - Sends an iovec on a connection. 612 * 613 * This function puts the data provided on the wire by calling sosendmsg. 614 * It will return only when all the data has been sent or if an error 615 * occurs. 616 * 617 * Returns 0 for success, the socket errno value if sosendmsg fails, and 618 * -1 if sosendmsg returns success but uio_resid != 0 619 */ 620 int 621 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len) 622 { 623 struct msghdr msg; 624 int error; 625 size_t sent = 0; 626 627 ASSERT(iop != NULL); 628 629 /* Initialization of the message header. */ 630 bzero(&msg, sizeof (msg)); 631 msg.msg_iov = iop; 632 msg.msg_iovlen = iovlen; 633 634 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) 635 == 0) { 636 /* Data sent */ 637 if (sent == total_len) { 638 /* All data sent. Success. */ 639 return (0); 640 } else { 641 /* Not all data was sent. Failure */ 642 return (-1); 643 } 644 } 645 646 /* Send failed */ 647 return (error); 648 } 649 650 /* 651 * idm_iov_sorecv - Receives an iovec from a connection 652 * 653 * This function gets the data asked for from the socket. It will return 654 * only when all the requested data has been retrieved or if an error 655 * occurs. 656 * 657 * Returns 0 for success, the socket errno value if sorecvmsg fails, and 658 * -1 if sorecvmsg returns success but uio_resid != 0 659 */ 660 int 661 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len) 662 { 663 struct msghdr msg; 664 int error; 665 size_t recv; 666 int flags; 667 668 ASSERT(iop != NULL); 669 670 /* Initialization of the message header. */ 671 bzero(&msg, sizeof (msg)); 672 msg.msg_iov = iop; 673 msg.msg_iovlen = iovlen; 674 flags = MSG_WAITALL; 675 676 if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED())) 677 == 0) { 678 /* Received data */ 679 if (recv == total_len) { 680 /* All requested data received. Success */ 681 return (0); 682 } else { 683 /* 684 * Not all data was received. The connection has 685 * probably failed. 686 */ 687 return (-1); 688 } 689 } 690 691 /* Receive failed */ 692 return (error); 693 } 694 695 static void 696 idm_set_ini_preconnect_options(idm_so_conn_t *sc, boolean_t boot_conn) 697 { 698 int conn_abort = 10000; 699 int conn_notify = 2000; 700 int abort = 30000; 701 702 /* Pre-connect socket options */ 703 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, 704 TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int), 705 CRED()); 706 if (boot_conn == B_FALSE) { 707 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, 708 TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int), 709 CRED()); 710 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, 711 TCP_ABORT_THRESHOLD, 712 (char *)&abort, sizeof (int), CRED()); 713 } 714 } 715 716 static void 717 idm_set_postconnect_options(ksocket_t ks) 718 { 719 const int on = 1; 720 721 /* Set connect options */ 722 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF, 723 (char *)&idm_so_rcvbuf, sizeof (idm_so_rcvbuf), CRED()); 724 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF, 725 (char *)&idm_so_sndbuf, sizeof (idm_so_sndbuf), CRED()); 726 (void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY, 727 (char *)&on, sizeof (on), CRED()); 728 } 729 730 static uint32_t 731 n2h24(const uchar_t *ptr) 732 { 733 return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]); 734 } 735 736 static boolean_t 737 idm_dataseglenokay(idm_conn_t *ic, idm_pdu_t *pdu) 738 { 739 iscsi_hdr_t *bhs; 740 741 if (ic->ic_conn_type == CONN_TYPE_TGT && 742 pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) { 743 IDM_CONN_LOG(CE_WARN, 744 "idm_dataseglenokay: exceeded the max data segment length"); 745 return (B_FALSE); 746 } 747 748 bhs = pdu->isp_hdr; 749 /* 750 * Filter out any RFC3720 data-size violations. 751 */ 752 switch (IDM_PDU_OPCODE(pdu)) { 753 case ISCSI_OP_SCSI_TASK_MGT_MSG: 754 case ISCSI_OP_SCSI_TASK_MGT_RSP: 755 case ISCSI_OP_RTT_RSP: 756 case ISCSI_OP_LOGOUT_CMD: 757 /* 758 * Data-segment not allowed and additional headers not allowed. 759 * (both must be zero according to the RFC3720.) 760 */ 761 if (bhs->hlength != 0 || pdu->isp_datalen != 0) 762 return (B_FALSE); 763 break; 764 case ISCSI_OP_NOOP_OUT: 765 case ISCSI_OP_LOGIN_CMD: 766 case ISCSI_OP_TEXT_CMD: 767 case ISCSI_OP_SNACK_CMD: 768 case ISCSI_OP_NOOP_IN: 769 case ISCSI_OP_SCSI_RSP: 770 case ISCSI_OP_LOGIN_RSP: 771 case ISCSI_OP_TEXT_RSP: 772 case ISCSI_OP_SCSI_DATA_RSP: 773 case ISCSI_OP_LOGOUT_RSP: 774 case ISCSI_OP_ASYNC_EVENT: 775 case ISCSI_OP_REJECT_MSG: 776 /* 777 * Additional headers not allowed. 778 * (must be zero according to RFC3720.) 779 */ 780 if (bhs->hlength) 781 return (B_FALSE); 782 break; 783 case ISCSI_OP_SCSI_CMD: 784 /* 785 * See RFC3720, section 10.3 786 * 787 * For pure read cmds, data-segment-length must be zero. 788 * For non-final transfers, data-size must be even number of 789 * 4-byte words. 790 * For any transfer, an expected byte count must be provided. 791 * For bidirectional transfers, an additional-header must be 792 * provided (for the read byte-count.) 793 */ 794 if (pdu->isp_datalen) { 795 if ((bhs->flags & (ISCSI_FLAG_CMD_READ | 796 ISCSI_FLAG_CMD_WRITE)) == ISCSI_FLAG_CMD_READ) 797 return (B_FALSE); 798 if ((bhs->flags & ISCSI_FLAG_FINAL) == 0 && 799 ((pdu->isp_datalen & 0x3) != 0)) 800 return (B_FALSE); 801 } 802 if (bhs->flags & (ISCSI_FLAG_CMD_READ | 803 ISCSI_FLAG_CMD_WRITE)) { 804 iscsi_scsi_cmd_hdr_t *cmdhdr = 805 (iscsi_scsi_cmd_hdr_t *)bhs; 806 /* 807 * we're transfering some data, we must have a 808 * byte count 809 */ 810 if (cmdhdr->data_length == 0) 811 return (B_FALSE); 812 } 813 break; 814 case ISCSI_OP_SCSI_DATA: 815 /* 816 * See RFC3720, section 10.7 817 * 818 * Additional headers aren't allowed, and the data-size must 819 * be an even number of 4-byte words (unless the final bit 820 * is set.) 821 */ 822 if (bhs->hlength) 823 return (B_FALSE); 824 if ((bhs->flags & ISCSI_FLAG_FINAL) == 0 && 825 ((pdu->isp_datalen & 0x3) != 0)) 826 return (B_FALSE); 827 break; 828 default: 829 break; 830 } 831 return (B_TRUE); 832 } 833 834 static idm_status_t 835 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu) 836 { 837 iscsi_hdr_t *bhs; 838 uint32_t hdr_digest_crc; 839 uint32_t crc_calculated; 840 void *new_hdr; 841 int ahslen = 0; 842 int total_len = 0; 843 int iovlen = 0; 844 struct iovec iov[2]; 845 idm_so_conn_t *so_conn; 846 int rc; 847 848 so_conn = ic->ic_transport_private; 849 850 /* 851 * Read BHS 852 */ 853 bhs = pdu->isp_hdr; 854 rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t)); 855 if (rc != IDM_STATUS_SUCCESS) { 856 return (IDM_STATUS_FAIL); 857 } 858 859 /* 860 * Check actual AHS length against the amount available in the buffer 861 */ 862 pdu->isp_hdrlen = sizeof (iscsi_hdr_t) + 863 (bhs->hlength * sizeof (uint32_t)); 864 pdu->isp_datalen = n2h24(bhs->dlength); 865 866 if (!idm_dataseglenokay(ic, pdu)) { 867 IDM_CONN_LOG(CE_WARN, 868 "idm_sorecvhdr: invalid data segment length"); 869 return (IDM_STATUS_FAIL); 870 } 871 if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) { 872 /* Allocate a new header segment and change the callback */ 873 new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP); 874 bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t)); 875 pdu->isp_hdr = new_hdr; 876 pdu->isp_flags |= IDM_PDU_ADDL_HDR; 877 878 /* 879 * This callback will restore the expected values after 880 * the RX PDU has been processed. 881 */ 882 pdu->isp_callback = idm_sorx_addl_pdu_cb; 883 } 884 885 /* 886 * Setup receipt of additional header and header digest (if enabled). 887 */ 888 if (bhs->hlength > 0) { 889 iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1); 890 ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t); 891 iov[iovlen].iov_len = ahslen; 892 total_len += iov[iovlen].iov_len; 893 iovlen++; 894 } 895 896 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) { 897 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc; 898 iov[iovlen].iov_len = sizeof (hdr_digest_crc); 899 total_len += iov[iovlen].iov_len; 900 iovlen++; 901 } 902 903 if ((iovlen != 0) && 904 (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen, 905 total_len) != 0)) { 906 return (IDM_STATUS_FAIL); 907 } 908 909 /* 910 * Validate header digest if enabled 911 */ 912 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) { 913 crc_calculated = idm_crc32c(pdu->isp_hdr, 914 sizeof (iscsi_hdr_t) + ahslen); 915 if (crc_calculated != hdr_digest_crc) { 916 /* Invalid Header Digest */ 917 return (IDM_STATUS_HEADER_DIGEST); 918 } 919 } 920 921 return (0); 922 } 923 924 /* 925 * idm_so_ini_conn_create() 926 * Allocate the sockets transport connection resources. 927 */ 928 static idm_status_t 929 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic) 930 { 931 ksocket_t so; 932 idm_so_conn_t *so_conn; 933 idm_status_t idmrc; 934 935 so = idm_socreate(cr->cr_domain, cr->cr_type, 936 cr->cr_protocol); 937 if (so == NULL) { 938 return (IDM_STATUS_FAIL); 939 } 940 941 /* Bind the socket if configured to do so */ 942 if (cr->cr_bound) { 943 if (ksocket_bind(so, &cr->cr_bound_addr.sin, 944 SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) { 945 idm_sodestroy(so); 946 return (IDM_STATUS_FAIL); 947 } 948 } 949 950 idmrc = idm_so_conn_create_common(ic, so); 951 if (idmrc != IDM_STATUS_SUCCESS) { 952 idm_soshutdown(so); 953 idm_sodestroy(so); 954 return (IDM_STATUS_FAIL); 955 } 956 957 so_conn = ic->ic_transport_private; 958 /* Set up socket options */ 959 idm_set_ini_preconnect_options(so_conn, cr->cr_boot_conn); 960 961 return (IDM_STATUS_SUCCESS); 962 } 963 964 /* 965 * idm_so_ini_conn_destroy() 966 * Tear down the sockets transport connection resources. 967 */ 968 static void 969 idm_so_ini_conn_destroy(idm_conn_t *ic) 970 { 971 idm_so_conn_destroy_common(ic); 972 } 973 974 /* 975 * idm_so_ini_conn_connect() 976 * Establish the connection referred to by the handle previously allocated via 977 * idm_so_ini_conn_create(). 978 */ 979 static idm_status_t 980 idm_so_ini_conn_connect(idm_conn_t *ic) 981 { 982 idm_so_conn_t *so_conn; 983 struct sonode *node = NULL; 984 int rc; 985 clock_t lbolt, conn_login_max, conn_login_interval; 986 boolean_t nonblock; 987 988 so_conn = ic->ic_transport_private; 989 nonblock = ic->ic_conn_params.nonblock_socket; 990 conn_login_max = ic->ic_conn_params.conn_login_max; 991 conn_login_interval = ddi_get_lbolt() + 992 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval); 993 994 if (nonblock == B_TRUE) { 995 node = ((struct sonode *)(so_conn->ic_so)); 996 /* Set to none block socket mode */ 997 idm_so_socket_set_nonblock(node); 998 do { 999 rc = ksocket_connect(so_conn->ic_so, 1000 &ic->ic_ini_dst_addr.sin, 1001 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), 1002 CRED()); 1003 if (rc == 0 || rc == EISCONN) { 1004 /* socket success or already success */ 1005 rc = IDM_STATUS_SUCCESS; 1006 break; 1007 } 1008 if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) || 1009 (rc == ECONNRESET)) { 1010 /* socket connection timeout or refuse */ 1011 break; 1012 } 1013 lbolt = ddi_get_lbolt(); 1014 if (lbolt > conn_login_max) { 1015 /* 1016 * Connection retry timeout, 1017 * failed connect to target. 1018 */ 1019 break; 1020 } 1021 if (lbolt < conn_login_interval) { 1022 if ((rc == EINPROGRESS) || (rc == EALREADY)) { 1023 /* TCP connect still in progress */ 1024 delay(SEC_TO_TICK(IN_PROGRESS_DELAY)); 1025 continue; 1026 } else { 1027 delay(conn_login_interval - lbolt); 1028 } 1029 } 1030 conn_login_interval = ddi_get_lbolt() + 1031 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval); 1032 } while (rc != 0); 1033 /* resume to nonblock mode */ 1034 if (rc == IDM_STATUS_SUCCESS) { 1035 idm_so_socket_set_block(node); 1036 } 1037 } else { 1038 rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin, 1039 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED()); 1040 } 1041 1042 if (rc != 0) { 1043 idm_soshutdown(so_conn->ic_so); 1044 return (IDM_STATUS_FAIL); 1045 } 1046 1047 idm_so_conn_connect_common(ic); 1048 1049 idm_set_postconnect_options(so_conn->ic_so); 1050 1051 return (IDM_STATUS_SUCCESS); 1052 } 1053 1054 idm_status_t 1055 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so) 1056 { 1057 idm_status_t idmrc; 1058 1059 idm_set_postconnect_options(new_so); 1060 idmrc = idm_so_conn_create_common(ic, new_so); 1061 1062 return (idmrc); 1063 } 1064 1065 static void 1066 idm_so_tgt_conn_destroy(idm_conn_t *ic) 1067 { 1068 idm_so_conn_destroy_common(ic); 1069 } 1070 1071 /* 1072 * idm_so_tgt_conn_connect() 1073 * Establish the connection in ic, passed from idm_tgt_conn_finish(), which 1074 * is invoked from the SM as a result of an inbound connection request. 1075 */ 1076 static idm_status_t 1077 idm_so_tgt_conn_connect(idm_conn_t *ic) 1078 { 1079 idm_so_conn_connect_common(ic); 1080 1081 return (IDM_STATUS_SUCCESS); 1082 } 1083 1084 static idm_status_t 1085 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so) 1086 { 1087 idm_so_conn_t *so_conn; 1088 1089 so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP); 1090 so_conn->ic_so = new_so; 1091 1092 ic->ic_transport_private = so_conn; 1093 ic->ic_transport_hdrlen = 0; 1094 1095 /* Set the scoreboarding flag on this connection */ 1096 ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD; 1097 ic->ic_conn_params.max_recv_dataseglen = 1098 ISCSI_DEFAULT_MAX_RECV_SEG_LEN; 1099 ic->ic_conn_params.max_xmit_dataseglen = 1100 ISCSI_DEFAULT_MAX_XMIT_SEG_LEN; 1101 1102 /* 1103 * Initialize tx thread mutex and list 1104 */ 1105 mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL); 1106 cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL); 1107 list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t), 1108 offsetof(idm_pdu_t, idm_tx_link)); 1109 1110 return (IDM_STATUS_SUCCESS); 1111 } 1112 1113 static void 1114 idm_so_conn_destroy_common(idm_conn_t *ic) 1115 { 1116 idm_so_conn_t *so_conn = ic->ic_transport_private; 1117 1118 ic->ic_transport_private = NULL; 1119 idm_sodestroy(so_conn->ic_so); 1120 list_destroy(&so_conn->ic_tx_list); 1121 mutex_destroy(&so_conn->ic_tx_mutex); 1122 cv_destroy(&so_conn->ic_tx_cv); 1123 1124 kmem_free(so_conn, sizeof (idm_so_conn_t)); 1125 } 1126 1127 static void 1128 idm_so_conn_connect_common(idm_conn_t *ic) 1129 { 1130 idm_so_conn_t *so_conn; 1131 struct sockaddr_in6 t_addr; 1132 socklen_t t_addrlen = 0; 1133 1134 so_conn = ic->ic_transport_private; 1135 bzero(&t_addr, sizeof (struct sockaddr_in6)); 1136 t_addrlen = sizeof (struct sockaddr_in6); 1137 1138 /* Set the local and remote addresses in the idm conn handle */ 1139 (void) ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr, 1140 &t_addrlen, CRED()); 1141 bcopy(&t_addr, &ic->ic_laddr, t_addrlen); 1142 (void) ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr, 1143 &t_addrlen, CRED()); 1144 bcopy(&t_addr, &ic->ic_raddr, t_addrlen); 1145 1146 mutex_enter(&ic->ic_mutex); 1147 so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0, 1148 &p0, TS_RUN, minclsyspri); 1149 so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0, 1150 &p0, TS_RUN, minclsyspri); 1151 1152 while (so_conn->ic_rx_thread_did == 0 || 1153 so_conn->ic_tx_thread_did == 0) 1154 cv_wait(&ic->ic_cv, &ic->ic_mutex); 1155 mutex_exit(&ic->ic_mutex); 1156 } 1157 1158 /* 1159 * idm_so_conn_disconnect() 1160 * Shutdown the socket connection and stop the thread 1161 */ 1162 static void 1163 idm_so_conn_disconnect(idm_conn_t *ic) 1164 { 1165 idm_so_conn_t *so_conn; 1166 1167 so_conn = ic->ic_transport_private; 1168 1169 mutex_enter(&ic->ic_mutex); 1170 so_conn->ic_rx_thread_running = B_FALSE; 1171 so_conn->ic_tx_thread_running = B_FALSE; 1172 /* We need to wakeup the TX thread */ 1173 mutex_enter(&so_conn->ic_tx_mutex); 1174 cv_signal(&so_conn->ic_tx_cv); 1175 mutex_exit(&so_conn->ic_tx_mutex); 1176 mutex_exit(&ic->ic_mutex); 1177 1178 /* This should wakeup the RX thread if it is sleeping */ 1179 idm_soshutdown(so_conn->ic_so); 1180 1181 thread_join(so_conn->ic_tx_thread_did); 1182 thread_join(so_conn->ic_rx_thread_did); 1183 } 1184 1185 /* 1186 * idm_so_tgt_svc_create() 1187 * Establish a service on an IP address and port. idm_svc_req_t contains 1188 * the service parameters. 1189 */ 1190 /*ARGSUSED*/ 1191 static idm_status_t 1192 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is) 1193 { 1194 idm_so_svc_t *so_svc; 1195 1196 so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP); 1197 1198 /* Set the new sockets service in svc handle */ 1199 is->is_so_svc = (void *)so_svc; 1200 1201 return (IDM_STATUS_SUCCESS); 1202 } 1203 1204 /* 1205 * idm_so_tgt_svc_destroy() 1206 * Teardown sockets resources allocated in idm_so_tgt_svc_create() 1207 */ 1208 static void 1209 idm_so_tgt_svc_destroy(idm_svc_t *is) 1210 { 1211 /* the socket will have been torn down; free the service */ 1212 kmem_free(is->is_so_svc, sizeof (idm_so_svc_t)); 1213 } 1214 1215 /* 1216 * idm_so_tgt_svc_online() 1217 * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create() 1218 */ 1219 1220 static idm_status_t 1221 idm_so_tgt_svc_online(idm_svc_t *is) 1222 { 1223 idm_so_svc_t *so_svc; 1224 idm_svc_req_t *sr = &is->is_svc_req; 1225 struct sockaddr_in6 sin6_ip; 1226 const uint32_t on = 1; 1227 const uint32_t off = 0; 1228 1229 mutex_enter(&is->is_mutex); 1230 so_svc = (idm_so_svc_t *)is->is_so_svc; 1231 1232 /* 1233 * Try creating an IPv6 socket first 1234 */ 1235 if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) { 1236 mutex_exit(&is->is_mutex); 1237 return (IDM_STATUS_FAIL); 1238 } else { 1239 bzero(&sin6_ip, sizeof (sin6_ip)); 1240 sin6_ip.sin6_family = AF_INET6; 1241 sin6_ip.sin6_port = htons(sr->sr_port); 1242 sin6_ip.sin6_addr = in6addr_any; 1243 1244 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET, 1245 SO_REUSEADDR, (char *)&on, sizeof (on), CRED()); 1246 /* 1247 * Turn off SO_MAC_EXEMPT so future sobinds succeed 1248 */ 1249 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET, 1250 SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED()); 1251 1252 if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip, 1253 sizeof (sin6_ip), CRED()) != 0) { 1254 mutex_exit(&is->is_mutex); 1255 idm_sodestroy(so_svc->is_so); 1256 return (IDM_STATUS_FAIL); 1257 } 1258 } 1259 1260 idm_set_postconnect_options(so_svc->is_so); 1261 1262 if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) { 1263 mutex_exit(&is->is_mutex); 1264 idm_soshutdown(so_svc->is_so); 1265 idm_sodestroy(so_svc->is_so); 1266 return (IDM_STATUS_FAIL); 1267 } 1268 1269 /* Launch a watch thread */ 1270 so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher, 1271 is, 0, &p0, TS_RUN, minclsyspri); 1272 1273 if (so_svc->is_thread == NULL) { 1274 /* Failure to launch; teardown the socket */ 1275 mutex_exit(&is->is_mutex); 1276 idm_soshutdown(so_svc->is_so); 1277 idm_sodestroy(so_svc->is_so); 1278 return (IDM_STATUS_FAIL); 1279 } 1280 ksocket_hold(so_svc->is_so); 1281 /* Wait for the port watcher thread to start */ 1282 while (!so_svc->is_thread_running) 1283 cv_wait(&is->is_cv, &is->is_mutex); 1284 mutex_exit(&is->is_mutex); 1285 1286 return (IDM_STATUS_SUCCESS); 1287 } 1288 1289 /* 1290 * idm_so_tgt_svc_offline 1291 * 1292 * Stop listening on the IP address and port identified by idm_svc_t. 1293 */ 1294 static void 1295 idm_so_tgt_svc_offline(idm_svc_t *is) 1296 { 1297 idm_so_svc_t *so_svc; 1298 mutex_enter(&is->is_mutex); 1299 so_svc = (idm_so_svc_t *)is->is_so_svc; 1300 so_svc->is_thread_running = B_FALSE; 1301 mutex_exit(&is->is_mutex); 1302 1303 /* 1304 * Teardown socket 1305 */ 1306 idm_sodestroy(so_svc->is_so); 1307 1308 /* 1309 * Now we expect the port watcher thread to terminate 1310 */ 1311 thread_join(so_svc->is_thread_did); 1312 } 1313 1314 /* 1315 * Watch thread for target service connection establishment. 1316 */ 1317 void 1318 idm_so_svc_port_watcher(void *arg) 1319 { 1320 idm_svc_t *svc = arg; 1321 ksocket_t new_so; 1322 idm_conn_t *ic; 1323 idm_status_t idmrc; 1324 idm_so_svc_t *so_svc; 1325 int rc; 1326 const uint32_t off = 0; 1327 struct sockaddr_in6 t_addr; 1328 socklen_t t_addrlen; 1329 1330 bzero(&t_addr, sizeof (struct sockaddr_in6)); 1331 t_addrlen = sizeof (struct sockaddr_in6); 1332 mutex_enter(&svc->is_mutex); 1333 1334 so_svc = svc->is_so_svc; 1335 so_svc->is_thread_running = B_TRUE; 1336 so_svc->is_thread_did = so_svc->is_thread->t_did; 1337 1338 cv_signal(&svc->is_cv); 1339 1340 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc, 1341 svc->is_svc_req.sr_port); 1342 1343 while (so_svc->is_thread_running) { 1344 mutex_exit(&svc->is_mutex); 1345 1346 if ((rc = ksocket_accept(so_svc->is_so, 1347 (struct sockaddr *)&t_addr, &t_addrlen, 1348 &new_so, CRED())) != 0) { 1349 mutex_enter(&svc->is_mutex); 1350 if (rc != ECONNABORTED && rc != EINTR) { 1351 IDM_SVC_LOG(CE_NOTE, "idm_so_svc_port_watcher:" 1352 " ksocket_accept failed %d", rc); 1353 } 1354 /* 1355 * Unclean shutdown of this thread is not handled 1356 * wait for !is_thread_running. 1357 */ 1358 continue; 1359 } 1360 /* 1361 * Turn off SO_MAC_EXEMPT so future sobinds succeed 1362 */ 1363 (void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT, 1364 (char *)&off, sizeof (off), CRED()); 1365 1366 idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS, 1367 &ic); 1368 if (idmrc != IDM_STATUS_SUCCESS) { 1369 /* Drop connection */ 1370 idm_soshutdown(new_so); 1371 idm_sodestroy(new_so); 1372 mutex_enter(&svc->is_mutex); 1373 continue; 1374 } 1375 1376 idmrc = idm_so_tgt_conn_create(ic, new_so); 1377 if (idmrc != IDM_STATUS_SUCCESS) { 1378 idm_svc_conn_destroy(ic); 1379 idm_soshutdown(new_so); 1380 idm_sodestroy(new_so); 1381 mutex_enter(&svc->is_mutex); 1382 continue; 1383 } 1384 1385 /* 1386 * Kick the state machine. At CS_S3_XPT_UP the state machine 1387 * will notify the client (target) about the new connection. 1388 */ 1389 idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL); 1390 1391 mutex_enter(&svc->is_mutex); 1392 } 1393 ksocket_rele(so_svc->is_so); 1394 so_svc->is_thread_running = B_FALSE; 1395 mutex_exit(&svc->is_mutex); 1396 1397 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc, 1398 svc->is_svc_req.sr_port); 1399 1400 thread_exit(); 1401 } 1402 1403 /* 1404 * idm_so_free_task_rsrc() stops any ongoing processing of the task and 1405 * frees resources associated with the task. 1406 * 1407 * It's not clear that this should return idm_status_t. What do we do 1408 * if it fails? 1409 */ 1410 static idm_status_t 1411 idm_so_free_task_rsrc(idm_task_t *idt) 1412 { 1413 idm_buf_t *idb, *next_idb; 1414 1415 /* 1416 * There is nothing to cleanup on initiator connections 1417 */ 1418 if (IDM_CONN_ISINI(idt->idt_ic)) 1419 return (IDM_STATUS_SUCCESS); 1420 1421 /* 1422 * If this is a target connection, call idm_buf_rx_from_ini_done for 1423 * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE. 1424 * 1425 * In addition, remove any buffers associated with this task from 1426 * the ic_tx_list. We'll do this by walking the idt_inbufv list, but 1427 * items don't actually get removed from that list (and completion 1428 * routines called) until idm_task_cleanup. 1429 */ 1430 mutex_enter(&idt->idt_mutex); 1431 1432 for (idb = list_head(&idt->idt_outbufv); idb != NULL; idb = next_idb) { 1433 next_idb = list_next(&idt->idt_outbufv, idb); 1434 if (idb->idb_in_transport) { 1435 /* 1436 * idm_buf_rx_from_ini_done releases idt->idt_mutex 1437 */ 1438 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic, 1439 uintptr_t, idb->idb_buf, 1440 uint32_t, idb->idb_bufoffset, 1441 uint64_t, 0, uint32_t, 0, uint32_t, 0, 1442 uint32_t, idb->idb_xfer_len, 1443 int, XFER_BUF_RX_FROM_INI); 1444 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED); 1445 mutex_enter(&idt->idt_mutex); 1446 } 1447 } 1448 1449 for (idb = list_head(&idt->idt_inbufv); idb != NULL; idb = next_idb) { 1450 next_idb = list_next(&idt->idt_inbufv, idb); 1451 /* 1452 * We want to remove these items from the tx_list as well, 1453 * but knowing it's in the idt_inbufv list is not a guarantee 1454 * that it's in the tx_list. If it's on the tx list then 1455 * let idm_sotx_thread() clean it up. 1456 */ 1457 if (idb->idb_in_transport && !idb->idb_tx_thread) { 1458 /* 1459 * idm_buf_tx_to_ini_done releases idt->idt_mutex 1460 */ 1461 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic, 1462 uintptr_t, idb->idb_buf, 1463 uint32_t, idb->idb_bufoffset, 1464 uint64_t, 0, uint32_t, 0, uint32_t, 0, 1465 uint32_t, idb->idb_xfer_len, 1466 int, XFER_BUF_TX_TO_INI); 1467 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED); 1468 mutex_enter(&idt->idt_mutex); 1469 } 1470 } 1471 1472 mutex_exit(&idt->idt_mutex); 1473 1474 return (IDM_STATUS_SUCCESS); 1475 } 1476 1477 /* 1478 * idm_so_negotiate_key_values() validates the key values for this connection 1479 */ 1480 /* ARGSUSED */ 1481 static kv_status_t 1482 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl, 1483 nvlist_t *response_nvl, nvlist_t *negotiated_nvl) 1484 { 1485 /* All parameters are negotiated at the iscsit level */ 1486 return (KV_HANDLED); 1487 } 1488 1489 /* 1490 * idm_so_notice_key_values() activates the negotiated key values for 1491 * this connection. 1492 */ 1493 static void 1494 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl) 1495 { 1496 char *nvp_name; 1497 nvpair_t *nvp; 1498 nvpair_t *next_nvp; 1499 int nvrc; 1500 idm_status_t idm_status; 1501 const idm_kv_xlate_t *ikvx; 1502 uint64_t num_val; 1503 1504 for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL); 1505 nvp != NULL; nvp = next_nvp) { 1506 next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp); 1507 nvp_name = nvpair_name(nvp); 1508 1509 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name)); 1510 switch (ikvx->ik_key_id) { 1511 case KI_HEADER_DIGEST: 1512 case KI_DATA_DIGEST: 1513 idm_status = idm_so_handle_digest(it, nvp, ikvx); 1514 ASSERT(idm_status == 0); 1515 1516 /* Remove processed item from negotiated_nvl list */ 1517 nvrc = nvlist_remove_all( 1518 negotiated_nvl, ikvx->ik_key_name); 1519 ASSERT(nvrc == 0); 1520 break; 1521 case KI_MAX_RECV_DATA_SEGMENT_LENGTH: 1522 /* 1523 * Just pass the value down to idm layer. 1524 * No need to remove it from negotiated_nvl list here. 1525 */ 1526 nvrc = nvpair_value_uint64(nvp, &num_val); 1527 ASSERT(nvrc == 0); 1528 it->ic_conn_params.max_xmit_dataseglen = 1529 (uint32_t)num_val; 1530 break; 1531 default: 1532 break; 1533 } 1534 } 1535 } 1536 1537 /* 1538 * idm_so_declare_key_values() declares the key values for this connection 1539 */ 1540 /* ARGSUSED */ 1541 static kv_status_t 1542 idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl, 1543 nvlist_t *outgoing_nvl) 1544 { 1545 char *nvp_name; 1546 nvpair_t *nvp; 1547 nvpair_t *next_nvp; 1548 kv_status_t kvrc; 1549 int nvrc = 0; 1550 const idm_kv_xlate_t *ikvx; 1551 uint64_t num_val; 1552 1553 for (nvp = nvlist_next_nvpair(config_nvl, NULL); 1554 nvp != NULL && nvrc == 0; nvp = next_nvp) { 1555 next_nvp = nvlist_next_nvpair(config_nvl, nvp); 1556 nvp_name = nvpair_name(nvp); 1557 1558 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name)); 1559 switch (ikvx->ik_key_id) { 1560 case KI_MAX_RECV_DATA_SEGMENT_LENGTH: 1561 if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) { 1562 break; 1563 } 1564 if (outgoing_nvl && 1565 (nvrc = nvlist_add_uint64(outgoing_nvl, 1566 nvp_name, num_val)) != 0) { 1567 break; 1568 } 1569 it->ic_conn_params.max_recv_dataseglen = 1570 (uint32_t)num_val; 1571 break; 1572 default: 1573 break; 1574 } 1575 } 1576 kvrc = idm_nvstat_to_kvstat(nvrc); 1577 return (kvrc); 1578 } 1579 1580 static idm_status_t 1581 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice, 1582 const idm_kv_xlate_t *ikvx) 1583 { 1584 int nvrc; 1585 char *digest_choice_string; 1586 1587 nvrc = nvpair_value_string(digest_choice, 1588 &digest_choice_string); 1589 ASSERT(nvrc == 0); 1590 if (strcasecmp(digest_choice_string, "crc32c") == 0) { 1591 switch (ikvx->ik_key_id) { 1592 case KI_HEADER_DIGEST: 1593 it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST; 1594 break; 1595 case KI_DATA_DIGEST: 1596 it->ic_conn_flags |= IDM_CONN_DATA_DIGEST; 1597 break; 1598 default: 1599 ASSERT(0); 1600 break; 1601 } 1602 } else if (strcasecmp(digest_choice_string, "none") == 0) { 1603 switch (ikvx->ik_key_id) { 1604 case KI_HEADER_DIGEST: 1605 it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST; 1606 break; 1607 case KI_DATA_DIGEST: 1608 it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST; 1609 break; 1610 default: 1611 ASSERT(0); 1612 break; 1613 } 1614 } else { 1615 ASSERT(0); 1616 } 1617 1618 return (IDM_STATUS_SUCCESS); 1619 } 1620 1621 1622 /* 1623 * idm_so_conn_is_capable() verifies that the passed connection is provided 1624 * for by the sockets interface. 1625 */ 1626 /* ARGSUSED */ 1627 static boolean_t 1628 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps) 1629 { 1630 return (B_TRUE); 1631 } 1632 1633 /* 1634 * idm_so_rx_datain() validates the Data Sequence number of the PDU. The 1635 * idm_sorecv_scsidata() function invoked earlier actually reads the data 1636 * off the socket into the appropriate buffers. 1637 */ 1638 static void 1639 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu) 1640 { 1641 iscsi_data_hdr_t *bhs; 1642 idm_task_t *idt; 1643 idm_buf_t *idb; 1644 uint32_t datasn; 1645 size_t offset; 1646 iscsi_hdr_t *ihp = (iscsi_hdr_t *)pdu->isp_hdr; 1647 iscsi_data_rsp_hdr_t *idrhp = (iscsi_data_rsp_hdr_t *)ihp; 1648 1649 ASSERT(ic != NULL); 1650 ASSERT(pdu != NULL); 1651 ASSERT(IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP); 1652 1653 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr; 1654 datasn = ntohl(bhs->datasn); 1655 offset = ntohl(bhs->offset); 1656 1657 /* 1658 * Look up the task corresponding to the initiator task tag 1659 * to get the buffers affiliated with the task. 1660 */ 1661 idt = idm_task_find(ic, bhs->itt, bhs->ttt); 1662 if (idt == NULL) { 1663 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task"); 1664 idm_pdu_rx_protocol_error(ic, pdu); 1665 return; 1666 } 1667 1668 idb = pdu->isp_sorx_buf; 1669 if (idb == NULL) { 1670 IDM_CONN_LOG(CE_WARN, 1671 "idm_so_rx_datain: failed to find buffer"); 1672 idm_task_rele(idt); 1673 idm_pdu_rx_protocol_error(ic, pdu); 1674 return; 1675 } 1676 1677 /* 1678 * DataSN values should be sequential and should not have any gaps or 1679 * repetitions. Check the DataSN with the one stored in the task. 1680 */ 1681 if (datasn == idt->idt_exp_datasn) { 1682 idt->idt_exp_datasn++; /* keep track of DataSN received */ 1683 } else { 1684 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order"); 1685 idm_task_rele(idt); 1686 idm_pdu_rx_protocol_error(ic, pdu); 1687 return; 1688 } 1689 1690 /* 1691 * PDUs in a sequence should be in continuously increasing 1692 * address offset 1693 */ 1694 if (offset != idb->idb_exp_offset) { 1695 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset"); 1696 idm_task_rele(idt); 1697 idm_pdu_rx_protocol_error(ic, pdu); 1698 return; 1699 } 1700 /* Expected next relative buffer offset */ 1701 idb->idb_exp_offset += n2h24(bhs->dlength); 1702 idt->idt_rx_bytes += n2h24(bhs->dlength); 1703 1704 idm_task_rele(idt); 1705 1706 /* 1707 * For now call scsi_rsp which will process the data rsp 1708 * Revisit, need to provide an explicit client entry point for 1709 * phase collapse completions. 1710 */ 1711 if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) && 1712 (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) { 1713 (*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu); 1714 } 1715 1716 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS); 1717 } 1718 1719 /* 1720 * The idm_so_rx_dataout() function is used by the iSCSI target to read 1721 * data from the Data-Out PDU sent by the iSCSI initiator. 1722 * 1723 * This function gets the Initiator Task Tag from the PDU BHS and looks up the 1724 * task to get the buffers associated with the PDU. A PDU might span buffers. 1725 * The data is then read into the respective buffer. 1726 */ 1727 static void 1728 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu) 1729 { 1730 1731 iscsi_data_hdr_t *bhs; 1732 idm_task_t *idt; 1733 idm_buf_t *idb; 1734 size_t offset; 1735 1736 ASSERT(ic != NULL); 1737 ASSERT(pdu != NULL); 1738 ASSERT(IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA); 1739 1740 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr; 1741 offset = ntohl(bhs->offset); 1742 1743 /* 1744 * Look up the task corresponding to the initiator task tag 1745 * to get the buffers affiliated with the task. 1746 */ 1747 idt = idm_task_find(ic, bhs->itt, bhs->ttt); 1748 if (idt == NULL) { 1749 IDM_CONN_LOG(CE_WARN, 1750 "idm_so_rx_dataout: failed to find task"); 1751 idm_pdu_rx_protocol_error(ic, pdu); 1752 return; 1753 } 1754 1755 idb = pdu->isp_sorx_buf; 1756 if (idb == NULL) { 1757 IDM_CONN_LOG(CE_WARN, 1758 "idm_so_rx_dataout: failed to find buffer"); 1759 idm_task_rele(idt); 1760 idm_pdu_rx_protocol_error(ic, pdu); 1761 return; 1762 } 1763 1764 /* Keep track of data transferred - check data offsets */ 1765 if (offset != idb->idb_exp_offset) { 1766 IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: " 1767 "%ld, %d", offset, idb->idb_exp_offset); 1768 idm_task_rele(idt); 1769 idm_pdu_rx_protocol_error(ic, pdu); 1770 return; 1771 } 1772 /* Expected next relative offset */ 1773 idb->idb_exp_offset += ntoh24(bhs->dlength); 1774 idt->idt_rx_bytes += n2h24(bhs->dlength); 1775 1776 /* 1777 * Call the buffer callback when the transfer is complete 1778 * 1779 * The connection state machine should only abort tasks after 1780 * shutting down the connection so we are assured that there 1781 * won't be a simultaneous attempt to abort this task at the 1782 * same time as we are processing this PDU (due to a connection 1783 * state change). 1784 */ 1785 if (bhs->flags & ISCSI_FLAG_FINAL) { 1786 /* 1787 * We have gotten the last data-message for the current 1788 * transfer. idb_xfer_len represents the data that the 1789 * command intended to transfer, it does not represent the 1790 * actual number of bytes transferred. If we have not 1791 * transferred the expected number of bytes something is 1792 * wrong. 1793 * 1794 * We have two options, when there is a mismatch, we can 1795 * regard the transfer as invalid -- or we can modify our 1796 * notion of "xfer_len." In order to be as stringent as 1797 * possible, here we regard this transfer as in error; and 1798 * bail out. 1799 */ 1800 if (idb->idb_buflen == idb->idb_xfer_len && 1801 idb->idb_buflen != 1802 (idb->idb_exp_offset - idb->idb_bufoffset)) { 1803 printf("idm_so_rx_dataout: incomplete transfer, " 1804 "protocol err"); 1805 IDM_CONN_LOG(CE_NOTE, 1806 "idm_so_rx_dataout: incomplete transfer: %ld, %d", 1807 offset, (int)(idb->idb_exp_offset - offset)); 1808 idm_task_rele(idt); 1809 idm_pdu_rx_protocol_error(ic, pdu); 1810 return; 1811 } 1812 /* 1813 * We only want to call idm_buf_rx_from_ini_done once 1814 * per transfer. It's possible that this task has 1815 * already been aborted in which case 1816 * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done 1817 * for each buffer with idb_in_transport==B_TRUE. To 1818 * close this window and ensure that this doesn't happen, 1819 * we'll clear idb->idb_in_transport now while holding 1820 * the task mutex. This is only really an issue for 1821 * SCSI task abort -- if tasks were being aborted because 1822 * of a connection state change the state machine would 1823 * have already stopped the receive thread. 1824 */ 1825 mutex_enter(&idt->idt_mutex); 1826 1827 /* 1828 * Release the task hold here (obtained in idm_task_find) 1829 * because the task may complete synchronously during 1830 * idm_buf_rx_from_ini_done. Since we still have an active 1831 * buffer we know there is at least one additional hold on idt. 1832 */ 1833 idm_task_rele(idt); 1834 1835 /* 1836 * idm_buf_rx_from_ini_done releases idt->idt_mutex 1837 */ 1838 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic, 1839 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset, 1840 uint64_t, 0, uint32_t, 0, uint32_t, 0, 1841 uint32_t, idb->idb_xfer_len, 1842 int, XFER_BUF_RX_FROM_INI); 1843 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS); 1844 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS); 1845 return; 1846 } 1847 1848 idm_task_rele(idt); 1849 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS); 1850 } 1851 1852 /* 1853 * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle 1854 * the R2T PDU sent by the iSCSI target indicating that it is ready to 1855 * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS 1856 * and looks up the task in the task tree using the itt to get the output 1857 * buffers associated the task. The R2T PDU contains the offset of the 1858 * requested data and the data length. This function then constructs a 1859 * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out 1860 * PDU is associated with the R2T by the Target Transfer Tag (ttt). 1861 */ 1862 1863 static void 1864 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu) 1865 { 1866 idm_task_t *idt; 1867 idm_buf_t *idb; 1868 iscsi_rtt_hdr_t *rtt_hdr; 1869 uint32_t data_offset; 1870 uint32_t data_length; 1871 1872 ASSERT(ic != NULL); 1873 ASSERT(pdu != NULL); 1874 1875 rtt_hdr = (iscsi_rtt_hdr_t *)pdu->isp_hdr; 1876 data_offset = ntohl(rtt_hdr->data_offset); 1877 data_length = ntohl(rtt_hdr->data_length); 1878 idt = idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt); 1879 1880 if (idt == NULL) { 1881 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task"); 1882 idm_pdu_rx_protocol_error(ic, pdu); 1883 return; 1884 } 1885 1886 /* Find the buffer bound to the task by the iSCSI initiator */ 1887 mutex_enter(&idt->idt_mutex); 1888 idb = idm_buf_find(&idt->idt_outbufv, data_offset); 1889 if (idb == NULL) { 1890 mutex_exit(&idt->idt_mutex); 1891 idm_task_rele(idt); 1892 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer"); 1893 idm_pdu_rx_protocol_error(ic, pdu); 1894 return; 1895 } 1896 1897 /* return buffer contains this data */ 1898 if (data_offset + data_length > idb->idb_buflen) { 1899 /* Overflow */ 1900 mutex_exit(&idt->idt_mutex); 1901 idm_task_rele(idt); 1902 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside " 1903 "buffer"); 1904 idm_pdu_rx_protocol_error(ic, pdu); 1905 return; 1906 } 1907 1908 idt->idt_r2t_ttt = rtt_hdr->ttt; 1909 idt->idt_exp_datasn = 0; 1910 1911 idm_so_send_rtt_data(ic, idt, idb, data_offset, 1912 ntohl(rtt_hdr->data_length)); 1913 /* 1914 * the idt_mutex is released in idm_so_send_rtt_data 1915 */ 1916 1917 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS); 1918 idm_task_rele(idt); 1919 1920 } 1921 1922 idm_status_t 1923 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu) 1924 { 1925 uint8_t pad[ISCSI_PAD_WORD_LEN]; 1926 int pad_len; 1927 uint32_t data_digest_crc; 1928 uint32_t crc_calculated; 1929 int total_len; 1930 idm_so_conn_t *so_conn; 1931 1932 so_conn = ic->ic_transport_private; 1933 1934 pad_len = ((ISCSI_PAD_WORD_LEN - 1935 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) & 1936 (ISCSI_PAD_WORD_LEN - 1)); 1937 1938 ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */ 1939 1940 total_len = pdu->isp_datalen; 1941 1942 if (pad_len) { 1943 pdu->isp_iov[pdu->isp_iovlen].iov_base = (char *)&pad; 1944 pdu->isp_iov[pdu->isp_iovlen].iov_len = pad_len; 1945 total_len += pad_len; 1946 pdu->isp_iovlen++; 1947 } 1948 1949 /* setup data digest */ 1950 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) { 1951 pdu->isp_iov[pdu->isp_iovlen].iov_base = 1952 (char *)&data_digest_crc; 1953 pdu->isp_iov[pdu->isp_iovlen].iov_len = 1954 sizeof (data_digest_crc); 1955 total_len += sizeof (data_digest_crc); 1956 pdu->isp_iovlen++; 1957 } 1958 1959 pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base; 1960 1961 if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0], 1962 pdu->isp_iovlen, total_len) != 0) { 1963 return (IDM_STATUS_IO); 1964 } 1965 1966 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) { 1967 crc_calculated = idm_crc32c(pdu->isp_data, 1968 pdu->isp_datalen); 1969 if (pad_len) { 1970 crc_calculated = idm_crc32c_continued((char *)&pad, 1971 pad_len, crc_calculated); 1972 } 1973 if (crc_calculated != data_digest_crc) { 1974 IDM_CONN_LOG(CE_WARN, 1975 "idm_sorecvdata: " 1976 "CRC error: actual 0x%x, calc 0x%x", 1977 data_digest_crc, crc_calculated); 1978 1979 /* Invalid Data Digest */ 1980 return (IDM_STATUS_DATA_DIGEST); 1981 } 1982 } 1983 1984 return (IDM_STATUS_SUCCESS); 1985 } 1986 1987 /* 1988 * idm_sorecv_scsidata() is used to receive scsi data from the socket. The 1989 * Data-type PDU header must be read into the idm_pdu_t structure prior to 1990 * calling this function. 1991 */ 1992 idm_status_t 1993 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu) 1994 { 1995 iscsi_data_hdr_t *bhs; 1996 idm_task_t *task; 1997 uint32_t offset; 1998 uint8_t opcode; 1999 uint32_t dlength; 2000 list_t *buflst; 2001 uint32_t xfer_bytes; 2002 idm_status_t status; 2003 2004 ASSERT(ic != NULL); 2005 ASSERT(pdu != NULL); 2006 2007 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr; 2008 2009 offset = ntohl(bhs->offset); 2010 opcode = IDM_PDU_OPCODE(pdu); 2011 dlength = n2h24(bhs->dlength); 2012 2013 ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) || 2014 (opcode == ISCSI_OP_SCSI_DATA)); 2015 2016 /* 2017 * Successful lookup implicitly gets a "hold" on the task. This 2018 * hold must be released before leaving this function. At one 2019 * point we were caching this task context and retaining the hold 2020 * but it turned out to be very difficult to release the hold properly. 2021 * The task can be aborted and the connection shutdown between this 2022 * call and the subsequent expected call to idm_so_rx_datain/ 2023 * idm_so_rx_dataout (in which case those functions are not called). 2024 * Releasing the hold in the PDU callback doesn't work well either 2025 * because the whole task may be completed by then at which point 2026 * it is too late to release the hold -- for better or worse this 2027 * code doesn't wait on the refcnts during normal operation. 2028 * idm_task_find() is very fast and it is not a huge burden if we 2029 * have to do it twice. 2030 */ 2031 task = idm_task_find(ic, bhs->itt, bhs->ttt); 2032 if (task == NULL) { 2033 IDM_CONN_LOG(CE_WARN, 2034 "idm_sorecv_scsidata: could not find task"); 2035 return (IDM_STATUS_FAIL); 2036 } 2037 2038 mutex_enter(&task->idt_mutex); 2039 buflst = (opcode == ISCSI_OP_SCSI_DATA_RSP) ? 2040 &task->idt_inbufv : &task->idt_outbufv; 2041 pdu->isp_sorx_buf = idm_buf_find(buflst, offset); 2042 mutex_exit(&task->idt_mutex); 2043 2044 if (pdu->isp_sorx_buf == NULL) { 2045 idm_task_rele(task); 2046 IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find " 2047 "buffer for offset %x opcode=%x", 2048 offset, opcode); 2049 return (IDM_STATUS_FAIL); 2050 } 2051 2052 xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength); 2053 ASSERT(xfer_bytes != 0); 2054 if (xfer_bytes != dlength) { 2055 idm_task_rele(task); 2056 /* 2057 * Buffer overflow, connection error. The PDU data is still 2058 * sitting in the socket so we can't use the connection 2059 * again until that data is drained. 2060 */ 2061 return (IDM_STATUS_FAIL); 2062 } 2063 2064 status = idm_sorecvdata(ic, pdu); 2065 2066 idm_task_rele(task); 2067 2068 return (status); 2069 } 2070 2071 static uint32_t 2072 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength) 2073 { 2074 uint32_t buf_ro = ro - idb->idb_bufoffset; 2075 uint32_t xfer_len = min(dlength, idb->idb_buflen - buf_ro); 2076 2077 ASSERT(ro >= idb->idb_bufoffset); 2078 2079 pdu->isp_iov[pdu->isp_iovlen].iov_base = 2080 (caddr_t)idb->idb_buf + buf_ro; 2081 pdu->isp_iov[pdu->isp_iovlen].iov_len = xfer_len; 2082 pdu->isp_iovlen++; 2083 2084 return (xfer_len); 2085 } 2086 2087 int 2088 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu) 2089 { 2090 pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP); 2091 ASSERT(pdu->isp_data != NULL); 2092 2093 pdu->isp_databuflen = pdu->isp_datalen; 2094 pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data; 2095 pdu->isp_iov[0].iov_len = pdu->isp_datalen; 2096 pdu->isp_iovlen = 1; 2097 /* 2098 * Since we are associating a new data buffer with this received 2099 * PDU we need to set a specific callback to free the data 2100 * after the PDU is processed. 2101 */ 2102 pdu->isp_flags |= IDM_PDU_ADDL_DATA; 2103 pdu->isp_callback = idm_sorx_addl_pdu_cb; 2104 2105 return (idm_sorecvdata(ic, pdu)); 2106 } 2107 2108 void 2109 idm_sorx_thread(void *arg) 2110 { 2111 boolean_t conn_failure = B_FALSE; 2112 idm_conn_t *ic = (idm_conn_t *)arg; 2113 idm_so_conn_t *so_conn; 2114 idm_pdu_t *pdu; 2115 idm_status_t rc; 2116 2117 idm_conn_hold(ic); 2118 2119 mutex_enter(&ic->ic_mutex); 2120 2121 so_conn = ic->ic_transport_private; 2122 so_conn->ic_rx_thread_running = B_TRUE; 2123 so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did; 2124 cv_signal(&ic->ic_cv); 2125 2126 while (so_conn->ic_rx_thread_running) { 2127 mutex_exit(&ic->ic_mutex); 2128 2129 /* 2130 * Get PDU with default header size (large enough for 2131 * BHS plus any anticipated AHS). PDU from 2132 * the cache will have all values set correctly 2133 * for sockets RX including callback. 2134 */ 2135 pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP); 2136 pdu->isp_ic = ic; 2137 pdu->isp_flags = 0; 2138 pdu->isp_transport_hdrlen = 0; 2139 2140 if ((rc = idm_sorecvhdr(ic, pdu)) != 0) { 2141 /* 2142 * Call idm_pdu_complete so that we call the callback 2143 * and ensure any memory allocated in idm_sorecvhdr 2144 * gets freed up. 2145 */ 2146 idm_pdu_complete(pdu, IDM_STATUS_FAIL); 2147 2148 /* 2149 * If ic_rx_thread_running is still set then 2150 * this is some kind of connection problem 2151 * on the socket. In this case we want to 2152 * generate an event. Otherwise some other 2153 * thread closed the socket due to another 2154 * issue in which case we don't need to 2155 * generate an event. 2156 */ 2157 mutex_enter(&ic->ic_mutex); 2158 if (so_conn->ic_rx_thread_running) { 2159 conn_failure = B_TRUE; 2160 so_conn->ic_rx_thread_running = B_FALSE; 2161 } 2162 2163 continue; 2164 } 2165 2166 /* 2167 * Header has been read and validated. Now we need 2168 * to read the PDU data payload (if present). SCSI data 2169 * need to be transferred from the socket directly into 2170 * the associated transfer buffer for the SCSI task. 2171 */ 2172 if (pdu->isp_datalen != 0) { 2173 if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) || 2174 (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) { 2175 rc = idm_sorecv_scsidata(ic, pdu); 2176 /* 2177 * All SCSI errors are fatal to the 2178 * connection right now since we have no 2179 * place to put the data. What we need 2180 * is some kind of sink to dispose of unwanted 2181 * SCSI data. For example an invalid task tag 2182 * should not kill the connection (although 2183 * we may want to drop the connection). 2184 */ 2185 } else { 2186 /* 2187 * Not data PDUs so allocate a buffer for the 2188 * data segment and read the remaining data. 2189 */ 2190 rc = idm_sorecv_nonscsidata(ic, pdu); 2191 } 2192 if (rc != 0) { 2193 /* 2194 * Call idm_pdu_complete so that we call the 2195 * callback and ensure any memory allocated 2196 * in idm_sorecvhdr gets freed up. 2197 */ 2198 idm_pdu_complete(pdu, IDM_STATUS_FAIL); 2199 2200 /* 2201 * If ic_rx_thread_running is still set then 2202 * this is some kind of connection problem 2203 * on the socket. In this case we want to 2204 * generate an event. Otherwise some other 2205 * thread closed the socket due to another 2206 * issue in which case we don't need to 2207 * generate an event. 2208 */ 2209 mutex_enter(&ic->ic_mutex); 2210 if (so_conn->ic_rx_thread_running) { 2211 conn_failure = B_TRUE; 2212 so_conn->ic_rx_thread_running = B_FALSE; 2213 } 2214 continue; 2215 } 2216 } 2217 2218 /* 2219 * Process RX PDU 2220 */ 2221 idm_pdu_rx(ic, pdu); 2222 2223 mutex_enter(&ic->ic_mutex); 2224 } 2225 2226 mutex_exit(&ic->ic_mutex); 2227 2228 /* 2229 * If we dropped out of the RX processing loop because of 2230 * a socket problem or other connection failure (including 2231 * digest errors) then we need to generate a state machine 2232 * event to shut the connection down. 2233 * If the state machine is already in, for example, INIT_ERROR, this 2234 * event will get dropped, and the TX thread will never be notified 2235 * to shut down. To be safe, we'll just notify it here. 2236 */ 2237 if (conn_failure) { 2238 if (so_conn->ic_tx_thread_running) { 2239 so_conn->ic_tx_thread_running = B_FALSE; 2240 mutex_enter(&so_conn->ic_tx_mutex); 2241 cv_signal(&so_conn->ic_tx_cv); 2242 mutex_exit(&so_conn->ic_tx_mutex); 2243 } 2244 2245 idm_conn_event(ic, CE_TRANSPORT_FAIL, rc); 2246 } 2247 2248 idm_conn_rele(ic); 2249 2250 thread_exit(); 2251 } 2252 2253 /* 2254 * idm_so_tx 2255 * 2256 * This is the implementation of idm_transport_ops_t's it_tx_pdu entry 2257 * point. By definition, it is supposed to be fast. So, simply queue 2258 * the entry and return. The real work is done by idm_i_so_tx() via 2259 * idm_sotx_thread(). 2260 */ 2261 2262 static void 2263 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu) 2264 { 2265 idm_so_conn_t *so_conn = ic->ic_transport_private; 2266 2267 ASSERT(pdu->isp_ic == ic); 2268 mutex_enter(&so_conn->ic_tx_mutex); 2269 2270 if (!so_conn->ic_tx_thread_running) { 2271 mutex_exit(&so_conn->ic_tx_mutex); 2272 idm_pdu_complete(pdu, IDM_STATUS_ABORTED); 2273 return; 2274 } 2275 2276 list_insert_tail(&so_conn->ic_tx_list, (void *)pdu); 2277 cv_signal(&so_conn->ic_tx_cv); 2278 mutex_exit(&so_conn->ic_tx_mutex); 2279 } 2280 2281 static idm_status_t 2282 idm_i_so_tx(idm_pdu_t *pdu) 2283 { 2284 idm_conn_t *ic = pdu->isp_ic; 2285 idm_status_t status = IDM_STATUS_SUCCESS; 2286 uint8_t pad[ISCSI_PAD_WORD_LEN]; 2287 int pad_len; 2288 uint32_t hdr_digest_crc; 2289 uint32_t data_digest_crc = 0; 2290 int total_len = 0; 2291 int iovlen = 0; 2292 struct iovec iov[6]; 2293 idm_so_conn_t *so_conn; 2294 2295 so_conn = ic->ic_transport_private; 2296 2297 /* Setup BHS */ 2298 iov[iovlen].iov_base = (caddr_t)pdu->isp_hdr; 2299 iov[iovlen].iov_len = pdu->isp_hdrlen; 2300 total_len += iov[iovlen].iov_len; 2301 iovlen++; 2302 2303 /* Setup header digest */ 2304 if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) && 2305 (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) { 2306 hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen); 2307 2308 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc; 2309 iov[iovlen].iov_len = sizeof (hdr_digest_crc); 2310 total_len += iov[iovlen].iov_len; 2311 iovlen++; 2312 } 2313 2314 /* Setup the data */ 2315 if (pdu->isp_datalen) { 2316 idm_task_t *idt; 2317 idm_buf_t *idb; 2318 iscsi_data_hdr_t *ihp; 2319 ihp = (iscsi_data_hdr_t *)pdu->isp_hdr; 2320 /* Write of immediate data */ 2321 if (ic->ic_ffp && 2322 (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_CMD || 2323 IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA)) { 2324 idt = idm_task_find(ic, ihp->itt, ihp->ttt); 2325 if (idt) { 2326 mutex_enter(&idt->idt_mutex); 2327 idb = idm_buf_find(&idt->idt_outbufv, 0); 2328 mutex_exit(&idt->idt_mutex); 2329 /* 2330 * If the initiator call to idm_buf_alloc 2331 * failed then we can get to this point 2332 * without a bound buffer. The associated 2333 * connection failure will clean things up 2334 * later. It would be nice to come up with 2335 * a cleaner way to handle this. In 2336 * particular it seems absurd to look up 2337 * the task and the buffer just to update 2338 * this counter. 2339 */ 2340 if (idb) 2341 idb->idb_xfer_len += pdu->isp_datalen; 2342 idm_task_rele(idt); 2343 } 2344 } 2345 2346 iov[iovlen].iov_base = (caddr_t)pdu->isp_data; 2347 iov[iovlen].iov_len = pdu->isp_datalen; 2348 total_len += iov[iovlen].iov_len; 2349 iovlen++; 2350 } 2351 2352 /* Setup the data pad if necessary */ 2353 pad_len = ((ISCSI_PAD_WORD_LEN - 2354 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) & 2355 (ISCSI_PAD_WORD_LEN - 1)); 2356 2357 if (pad_len) { 2358 bzero(pad, sizeof (pad)); 2359 iov[iovlen].iov_base = (void *)&pad; 2360 iov[iovlen].iov_len = pad_len; 2361 total_len += iov[iovlen].iov_len; 2362 iovlen++; 2363 } 2364 2365 /* 2366 * Setup the data digest if enabled. Data-digest is not sent 2367 * for login-phase PDUs. 2368 */ 2369 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) && 2370 ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) && 2371 (pdu->isp_datalen || pad_len)) { 2372 /* 2373 * RFC3720/10.2.3: A zero-length Data Segment also 2374 * implies a zero-length data digest. 2375 */ 2376 if (pdu->isp_datalen) { 2377 data_digest_crc = idm_crc32c(pdu->isp_data, 2378 pdu->isp_datalen); 2379 } 2380 if (pad_len) { 2381 data_digest_crc = idm_crc32c_continued(&pad, 2382 pad_len, data_digest_crc); 2383 } 2384 2385 iov[iovlen].iov_base = (caddr_t)&data_digest_crc; 2386 iov[iovlen].iov_len = sizeof (data_digest_crc); 2387 total_len += iov[iovlen].iov_len; 2388 iovlen++; 2389 } 2390 2391 /* Transmit the PDU */ 2392 if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen, 2393 total_len) != 0) { 2394 /* Set error status */ 2395 IDM_CONN_LOG(CE_WARN, 2396 "idm_so_tx: failed to transmit the PDU, so: %p ic: %p " 2397 "data: %p", (void *) so_conn->ic_so, (void *) ic, 2398 (void *) pdu->isp_data); 2399 status = IDM_STATUS_IO; 2400 } 2401 2402 /* 2403 * Success does not mean that the PDU actually reached the 2404 * remote node since it could get dropped along the way. 2405 */ 2406 idm_pdu_complete(pdu, status); 2407 2408 return (status); 2409 } 2410 2411 /* 2412 * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the 2413 * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength, 2414 * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN. 2415 * A target can invoke this function multiple times for a single read command 2416 * (identified by the same ITT) to split the input into several sequences. 2417 * 2418 * DataSN starts with 0 for the first data PDU of an input command and advances 2419 * by 1 for each subsequent data PDU. Each sequence will have its own F bit, 2420 * which is set to 1 for the last data PDU of a sequence. 2421 * If the initiator supports phase collapse, the status bit must be set along 2422 * with the F bit to indicate that the status is shipped together with the last 2423 * Data-In PDU. 2424 * 2425 * The data PDUs within a sequence will be sent in order with the buffer offset 2426 * in increasing order. i.e. initiator and target must have negotiated the 2427 * "DataPDUInOrder" to "Yes". The order between sequences is not enforced. 2428 * 2429 * Caller holds idt->idt_mutex 2430 */ 2431 static idm_status_t 2432 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb) 2433 { 2434 idm_so_conn_t *so_conn = idb->idb_ic->ic_transport_private; 2435 idm_pdu_t tmppdu; 2436 2437 ASSERT(mutex_owned(&idt->idt_mutex)); 2438 2439 /* 2440 * Put the idm_buf_t on the tx queue. It will be transmitted by 2441 * idm_sotx_thread. 2442 */ 2443 mutex_enter(&so_conn->ic_tx_mutex); 2444 2445 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic, 2446 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset, 2447 uint64_t, 0, uint32_t, 0, uint32_t, 0, 2448 uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI); 2449 2450 if (!so_conn->ic_tx_thread_running) { 2451 mutex_exit(&so_conn->ic_tx_mutex); 2452 /* 2453 * Don't release idt->idt_mutex since we're supposed to hold 2454 * in when calling idm_buf_tx_to_ini_done 2455 */ 2456 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic, 2457 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset, 2458 uint64_t, 0, uint32_t, 0, uint32_t, 0, 2459 uint32_t, idb->idb_xfer_len, 2460 int, XFER_BUF_TX_TO_INI); 2461 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED); 2462 return (IDM_STATUS_FAIL); 2463 } 2464 2465 /* 2466 * Build a template for the data PDU headers we will use so that 2467 * the SN values will stay consistent with other PDU's we are 2468 * transmitting like R2T and SCSI status. 2469 */ 2470 bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t)); 2471 tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl; 2472 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu, 2473 ISCSI_OP_SCSI_DATA_RSP); 2474 idb->idb_tx_thread = B_TRUE; 2475 list_insert_tail(&so_conn->ic_tx_list, (void *)idb); 2476 cv_signal(&so_conn->ic_tx_cv); 2477 mutex_exit(&so_conn->ic_tx_mutex); 2478 mutex_exit(&idt->idt_mutex); 2479 2480 /* 2481 * Returning success here indicates the transfer was successfully 2482 * dispatched -- it does not mean that the transfer completed 2483 * successfully. 2484 */ 2485 return (IDM_STATUS_SUCCESS); 2486 } 2487 2488 /* 2489 * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the 2490 * data blocks it is ready to receive from the initiator in response to a WRITE 2491 * SCSI command. The target iSCSI layer passes the information about the desired 2492 * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer 2493 * offset and datalen are passed via the 'idb' argument. 2494 * 2495 * Scope for Prototype build: 2496 * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have 2497 * negotiated the "InitialR2T" to "Yes". 2498 * 2499 * Caller holds idt->idt_mutex 2500 */ 2501 static idm_status_t 2502 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb) 2503 { 2504 idm_pdu_t *pdu; 2505 iscsi_rtt_hdr_t *rtt; 2506 2507 ASSERT(mutex_owned(&idt->idt_mutex)); 2508 2509 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic, 2510 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset, 2511 uint64_t, 0, uint32_t, 0, uint32_t, 0, 2512 uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI); 2513 2514 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP); 2515 pdu->isp_ic = idt->idt_ic; 2516 pdu->isp_flags = IDM_PDU_SET_STATSN; 2517 bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t)); 2518 2519 /* iSCSI layer fills the TTT, ITT, ExpCmdSN, MaxCmdSN */ 2520 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP); 2521 2522 /* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */ 2523 rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr); 2524 2525 rtt->opcode = ISCSI_OP_RTT_RSP; 2526 rtt->flags = ISCSI_FLAG_FINAL; 2527 rtt->data_offset = htonl(idb->idb_bufoffset); 2528 rtt->data_length = htonl(idb->idb_xfer_len); 2529 rtt->rttsn = htonl(idt->idt_exp_rttsn++); 2530 2531 /* Keep track of buffer offsets */ 2532 idb->idb_exp_offset = idb->idb_bufoffset; 2533 mutex_exit(&idt->idt_mutex); 2534 2535 /* 2536 * Transmit the PDU. 2537 */ 2538 idm_pdu_tx(pdu); 2539 2540 return (IDM_STATUS_SUCCESS); 2541 } 2542 2543 static idm_status_t 2544 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen) 2545 { 2546 if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) { 2547 idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache, 2548 KM_NOSLEEP); 2549 idb->idb_buf_private = idm.idm_so_128k_buf_cache; 2550 } else { 2551 idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP); 2552 idb->idb_buf_private = NULL; 2553 } 2554 2555 if (idb->idb_buf == NULL) { 2556 IDM_CONN_LOG(CE_NOTE, 2557 "idm_so_buf_alloc: failed buffer allocation"); 2558 return (IDM_STATUS_FAIL); 2559 } 2560 2561 return (IDM_STATUS_SUCCESS); 2562 } 2563 2564 /* ARGSUSED */ 2565 static idm_status_t 2566 idm_so_buf_setup(idm_buf_t *idb) 2567 { 2568 /* Ensure bufalloc'd flag is unset */ 2569 idb->idb_bufalloc = B_FALSE; 2570 2571 return (IDM_STATUS_SUCCESS); 2572 } 2573 2574 /* ARGSUSED */ 2575 static void 2576 idm_so_buf_teardown(idm_buf_t *idb) 2577 { 2578 /* nothing to do here */ 2579 } 2580 2581 static void 2582 idm_so_buf_free(idm_buf_t *idb) 2583 { 2584 if (idb->idb_buf_private == NULL) { 2585 kmem_free(idb->idb_buf, idb->idb_buflen); 2586 } else { 2587 kmem_cache_free(idb->idb_buf_private, idb->idb_buf); 2588 } 2589 } 2590 2591 static void 2592 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb, 2593 uint32_t offset, uint32_t length) 2594 { 2595 idm_so_conn_t *so_conn = ic->ic_transport_private; 2596 idm_pdu_t tmppdu; 2597 idm_buf_t *rtt_buf; 2598 2599 ASSERT(mutex_owned(&idt->idt_mutex)); 2600 2601 /* 2602 * Allocate a buffer to represent the RTT transfer. We could further 2603 * optimize this by allocating the buffers internally from an rtt 2604 * specific buffer cache since this is socket-specific code but for 2605 * now we will keep it simple. 2606 */ 2607 rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length); 2608 if (rtt_buf == NULL) { 2609 /* 2610 * If we're in FFP then the failure was likely a resource 2611 * allocation issue and we should close the connection by 2612 * sending a CE_TRANSPORT_FAIL event. 2613 * 2614 * If we're not in FFP then idm_buf_alloc will always 2615 * fail and the state is transitioning to "complete" anyway 2616 * so we won't bother to send an event. 2617 */ 2618 mutex_enter(&ic->ic_state_mutex); 2619 if (ic->ic_ffp) 2620 idm_conn_event_locked(ic, CE_TRANSPORT_FAIL, 2621 NULL, CT_NONE); 2622 mutex_exit(&ic->ic_state_mutex); 2623 mutex_exit(&idt->idt_mutex); 2624 return; 2625 } 2626 2627 rtt_buf->idb_buf_cb = NULL; 2628 rtt_buf->idb_cb_arg = NULL; 2629 rtt_buf->idb_bufoffset = offset; 2630 rtt_buf->idb_xfer_len = length; 2631 rtt_buf->idb_ic = idt->idt_ic; 2632 rtt_buf->idb_task_binding = idt; 2633 2634 /* 2635 * The new buffer (if any) represents an additional 2636 * reference on the task 2637 */ 2638 idm_task_hold(idt); 2639 mutex_exit(&idt->idt_mutex); 2640 2641 /* 2642 * Put the idm_buf_t on the tx queue. It will be transmitted by 2643 * idm_sotx_thread. 2644 */ 2645 mutex_enter(&so_conn->ic_tx_mutex); 2646 2647 if (!so_conn->ic_tx_thread_running) { 2648 idm_buf_free(rtt_buf); 2649 mutex_exit(&so_conn->ic_tx_mutex); 2650 idm_task_rele(idt); 2651 return; 2652 } 2653 2654 /* 2655 * Build a template for the data PDU headers we will use so that 2656 * the SN values will stay consistent with other PDU's we are 2657 * transmitting like R2T and SCSI status. 2658 */ 2659 bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t)); 2660 tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl; 2661 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu, 2662 ISCSI_OP_SCSI_DATA); 2663 rtt_buf->idb_tx_thread = B_TRUE; 2664 rtt_buf->idb_in_transport = B_TRUE; 2665 list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf); 2666 cv_signal(&so_conn->ic_tx_cv); 2667 mutex_exit(&so_conn->ic_tx_mutex); 2668 } 2669 2670 static void 2671 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb) 2672 { 2673 /* 2674 * Don't worry about status -- we assume any error handling 2675 * is performed by the caller (idm_sotx_thread). 2676 */ 2677 idb->idb_in_transport = B_FALSE; 2678 idm_task_rele(idt); 2679 idm_buf_free(idb); 2680 } 2681 2682 static idm_status_t 2683 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb, 2684 uint32_t buf_region_offset, uint32_t buf_region_length) 2685 { 2686 idm_conn_t *ic; 2687 uint32_t max_dataseglen; 2688 size_t remainder, chunk; 2689 uint32_t data_offset = buf_region_offset; 2690 iscsi_data_hdr_t *bhs; 2691 idm_pdu_t *pdu; 2692 idm_status_t tx_status; 2693 2694 ASSERT(mutex_owned(&idt->idt_mutex)); 2695 2696 ic = idt->idt_ic; 2697 2698 max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen; 2699 remainder = buf_region_length; 2700 2701 while (remainder) { 2702 if (idt->idt_state != TASK_ACTIVE) { 2703 ASSERT((idt->idt_state != TASK_IDLE) && 2704 (idt->idt_state != TASK_COMPLETE)); 2705 return (IDM_STATUS_ABORTED); 2706 } 2707 2708 /* check to see if we need to chunk the data */ 2709 if (remainder > max_dataseglen) { 2710 chunk = max_dataseglen; 2711 } else { 2712 chunk = remainder; 2713 } 2714 2715 /* Data PDU headers will always be sizeof (iscsi_hdr_t) */ 2716 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP); 2717 pdu->isp_ic = ic; 2718 pdu->isp_flags = 0; /* initialize isp_flags */ 2719 2720 /* 2721 * We've already built a build a header template 2722 * to use during the transfer. Use this template so that 2723 * the SN values stay consistent with any unrelated PDU's 2724 * being transmitted. 2725 */ 2726 bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr, 2727 sizeof (iscsi_hdr_t)); 2728 2729 /* 2730 * Set DataSN, data offset, and flags in BHS 2731 * For the prototype build, A = 0, S = 0, U = 0 2732 */ 2733 bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr); 2734 2735 bhs->datasn = htonl(idt->idt_exp_datasn++); 2736 2737 hton24(bhs->dlength, chunk); 2738 bhs->offset = htonl(idb->idb_bufoffset + data_offset); 2739 2740 /* setup data */ 2741 pdu->isp_data = (uint8_t *)idb->idb_buf + data_offset; 2742 pdu->isp_datalen = (uint_t)chunk; 2743 2744 if (chunk == remainder) { 2745 bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */ 2746 /* Piggyback the status with the last data PDU */ 2747 if (idt->idt_flags & IDM_TASK_PHASECOLLAPSE_REQ) { 2748 pdu->isp_flags |= IDM_PDU_SET_STATSN | 2749 IDM_PDU_ADVANCE_STATSN; 2750 (*idt->idt_ic->ic_conn_ops.icb_update_statsn) 2751 (idt, pdu); 2752 idt->idt_flags |= 2753 IDM_TASK_PHASECOLLAPSE_SUCCESS; 2754 2755 } 2756 } 2757 2758 remainder -= chunk; 2759 data_offset += chunk; 2760 2761 /* Instrument the data-send DTrace probe. */ 2762 if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) { 2763 DTRACE_ISCSI_2(data__send, 2764 idm_conn_t *, idt->idt_ic, 2765 iscsi_data_rsp_hdr_t *, 2766 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr); 2767 } 2768 2769 /* 2770 * Now that we're done working with idt_exp_datasn, 2771 * idt->idt_state and idb->idb_bufoffset we can release 2772 * the task lock -- don't want to hold it across the 2773 * call to idm_i_so_tx since we could block. 2774 */ 2775 mutex_exit(&idt->idt_mutex); 2776 2777 /* 2778 * Transmit the PDU. Call the internal routine directly 2779 * as there is already implicit ordering. 2780 */ 2781 if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) { 2782 mutex_enter(&idt->idt_mutex); 2783 return (tx_status); 2784 } 2785 2786 mutex_enter(&idt->idt_mutex); 2787 idt->idt_tx_bytes += chunk; 2788 } 2789 2790 return (IDM_STATUS_SUCCESS); 2791 } 2792 2793 /* 2794 * TX PDU cache 2795 */ 2796 /* ARGSUSED */ 2797 int 2798 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags) 2799 { 2800 idm_pdu_t *pdu = hdl; 2801 2802 bzero(pdu, sizeof (idm_pdu_t)); 2803 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */ 2804 pdu->isp_hdrlen = sizeof (iscsi_hdr_t); 2805 pdu->isp_callback = idm_sotx_cache_pdu_cb; 2806 pdu->isp_magic = IDM_PDU_MAGIC; 2807 bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t)); 2808 2809 return (0); 2810 } 2811 2812 /* ARGSUSED */ 2813 void 2814 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status) 2815 { 2816 /* reset values between use */ 2817 pdu->isp_datalen = 0; 2818 2819 kmem_cache_free(idm.idm_sotx_pdu_cache, pdu); 2820 } 2821 2822 /* 2823 * RX PDU cache 2824 */ 2825 /* ARGSUSED */ 2826 int 2827 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags) 2828 { 2829 idm_pdu_t *pdu = hdl; 2830 2831 bzero(pdu, sizeof (idm_pdu_t)); 2832 pdu->isp_magic = IDM_PDU_MAGIC; 2833 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */ 2834 pdu->isp_callback = idm_sorx_cache_pdu_cb; 2835 2836 return (0); 2837 } 2838 2839 /* ARGSUSED */ 2840 static void 2841 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status) 2842 { 2843 pdu->isp_iovlen = 0; 2844 pdu->isp_sorx_buf = 0; 2845 kmem_cache_free(idm.idm_sorx_pdu_cache, pdu); 2846 } 2847 2848 static void 2849 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status) 2850 { 2851 /* 2852 * We had to modify our cached RX PDU with a longer header buffer 2853 * and/or a longer data buffer. Release the new buffers and fix 2854 * the fields back to what we would expect for a cached RX PDU. 2855 */ 2856 if (pdu->isp_flags & IDM_PDU_ADDL_HDR) { 2857 kmem_free(pdu->isp_hdr, pdu->isp_hdrlen); 2858 } 2859 if (pdu->isp_flags & IDM_PDU_ADDL_DATA) { 2860 kmem_free(pdu->isp_data, pdu->isp_datalen); 2861 } 2862 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); 2863 pdu->isp_hdrlen = sizeof (iscsi_hdr_t); 2864 pdu->isp_data = NULL; 2865 pdu->isp_datalen = 0; 2866 pdu->isp_sorx_buf = 0; 2867 pdu->isp_callback = idm_sorx_cache_pdu_cb; 2868 idm_sorx_cache_pdu_cb(pdu, status); 2869 } 2870 2871 /* 2872 * This thread is only active when I/O is queued for transmit 2873 * because the socket is busy. 2874 */ 2875 void 2876 idm_sotx_thread(void *arg) 2877 { 2878 idm_conn_t *ic = arg; 2879 idm_tx_obj_t *object, *next; 2880 idm_so_conn_t *so_conn; 2881 idm_status_t status = IDM_STATUS_SUCCESS; 2882 2883 idm_conn_hold(ic); 2884 2885 mutex_enter(&ic->ic_mutex); 2886 so_conn = ic->ic_transport_private; 2887 so_conn->ic_tx_thread_running = B_TRUE; 2888 so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did; 2889 cv_signal(&ic->ic_cv); 2890 mutex_exit(&ic->ic_mutex); 2891 2892 mutex_enter(&so_conn->ic_tx_mutex); 2893 2894 while (so_conn->ic_tx_thread_running) { 2895 while (list_is_empty(&so_conn->ic_tx_list)) { 2896 DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic); 2897 cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex); 2898 DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic); 2899 2900 if (!so_conn->ic_tx_thread_running) { 2901 goto tx_bail; 2902 } 2903 } 2904 2905 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list); 2906 list_remove(&so_conn->ic_tx_list, object); 2907 mutex_exit(&so_conn->ic_tx_mutex); 2908 2909 switch (object->idm_tx_obj_magic) { 2910 case IDM_PDU_MAGIC: { 2911 idm_pdu_t *pdu = (idm_pdu_t *)object; 2912 DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic, 2913 idm_pdu_t *, (idm_pdu_t *)object); 2914 2915 if (pdu->isp_flags & IDM_PDU_SET_STATSN) { 2916 /* No IDM task */ 2917 (ic->ic_conn_ops.icb_update_statsn)(NULL, pdu); 2918 } 2919 status = idm_i_so_tx((idm_pdu_t *)object); 2920 break; 2921 } 2922 case IDM_BUF_MAGIC: { 2923 idm_buf_t *idb = (idm_buf_t *)object; 2924 idm_task_t *idt = idb->idb_task_binding; 2925 2926 DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic, 2927 idm_buf_t *, idb); 2928 2929 mutex_enter(&idt->idt_mutex); 2930 status = idm_so_send_buf_region(idt, 2931 idb, 0, idb->idb_xfer_len); 2932 2933 /* 2934 * TX thread owns the buffer so we expect it to 2935 * be "in transport" 2936 */ 2937 ASSERT(idb->idb_in_transport); 2938 if (IDM_CONN_ISTGT(ic)) { 2939 /* 2940 * idm_buf_tx_to_ini_done releases 2941 * idt->idt_mutex 2942 */ 2943 DTRACE_ISCSI_8(xfer__done, 2944 idm_conn_t *, idt->idt_ic, 2945 uintptr_t, idb->idb_buf, 2946 uint32_t, idb->idb_bufoffset, 2947 uint64_t, 0, uint32_t, 0, uint32_t, 0, 2948 uint32_t, idb->idb_xfer_len, 2949 int, XFER_BUF_TX_TO_INI); 2950 idm_buf_tx_to_ini_done(idt, idb, status); 2951 } else { 2952 idm_so_send_rtt_data_done(idt, idb); 2953 mutex_exit(&idt->idt_mutex); 2954 } 2955 break; 2956 } 2957 2958 default: 2959 IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic " 2960 "(0x%08x)", object->idm_tx_obj_magic); 2961 status = IDM_STATUS_FAIL; 2962 } 2963 2964 mutex_enter(&so_conn->ic_tx_mutex); 2965 2966 if (status != IDM_STATUS_SUCCESS) { 2967 so_conn->ic_tx_thread_running = B_FALSE; 2968 idm_conn_event(ic, CE_TRANSPORT_FAIL, status); 2969 } 2970 } 2971 2972 /* 2973 * Before we leave, we need to abort every item remaining in the 2974 * TX list. 2975 */ 2976 2977 tx_bail: 2978 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list); 2979 2980 while (object != NULL) { 2981 next = list_next(&so_conn->ic_tx_list, object); 2982 2983 list_remove(&so_conn->ic_tx_list, object); 2984 switch (object->idm_tx_obj_magic) { 2985 case IDM_PDU_MAGIC: 2986 idm_pdu_complete((idm_pdu_t *)object, 2987 IDM_STATUS_ABORTED); 2988 break; 2989 2990 case IDM_BUF_MAGIC: { 2991 idm_buf_t *idb = (idm_buf_t *)object; 2992 idm_task_t *idt = idb->idb_task_binding; 2993 mutex_exit(&so_conn->ic_tx_mutex); 2994 mutex_enter(&idt->idt_mutex); 2995 /* 2996 * TX thread owns the buffer so we expect it to 2997 * be "in transport" 2998 */ 2999 ASSERT(idb->idb_in_transport); 3000 if (IDM_CONN_ISTGT(ic)) { 3001 /* 3002 * idm_buf_tx_to_ini_done releases 3003 * idt->idt_mutex 3004 */ 3005 DTRACE_ISCSI_8(xfer__done, 3006 idm_conn_t *, idt->idt_ic, 3007 uintptr_t, idb->idb_buf, 3008 uint32_t, idb->idb_bufoffset, 3009 uint64_t, 0, uint32_t, 0, uint32_t, 0, 3010 uint32_t, idb->idb_xfer_len, 3011 int, XFER_BUF_TX_TO_INI); 3012 idm_buf_tx_to_ini_done(idt, idb, 3013 IDM_STATUS_ABORTED); 3014 } else { 3015 idm_so_send_rtt_data_done(idt, idb); 3016 mutex_exit(&idt->idt_mutex); 3017 } 3018 mutex_enter(&so_conn->ic_tx_mutex); 3019 break; 3020 } 3021 default: 3022 IDM_CONN_LOG(CE_WARN, 3023 "idm_sotx_thread: Unexpected magic " 3024 "(0x%08x)", object->idm_tx_obj_magic); 3025 } 3026 3027 object = next; 3028 } 3029 3030 mutex_exit(&so_conn->ic_tx_mutex); 3031 idm_conn_rele(ic); 3032 thread_exit(); 3033 /*NOTREACHED*/ 3034 } 3035 3036 static void 3037 idm_so_socket_set_nonblock(struct sonode *node) 3038 { 3039 (void) VOP_SETFL(node->so_vnode, node->so_flag, 3040 (node->so_state | FNONBLOCK), CRED(), NULL); 3041 } 3042 3043 static void 3044 idm_so_socket_set_block(struct sonode *node) 3045 { 3046 (void) VOP_SETFL(node->so_vnode, node->so_flag, 3047 (node->so_state & (~FNONBLOCK)), CRED(), NULL); 3048 } 3049 3050 3051 /* 3052 * Called by kernel sockets when the connection has been accepted or 3053 * rejected. In early volo, a "disconnect" callback was sent instead of 3054 * "connectfailed", so we check for both. 3055 */ 3056 /* ARGSUSED */ 3057 void 3058 idm_so_timed_socket_connect_cb(ksocket_t ks, 3059 ksocket_callback_event_t ev, void *arg, uintptr_t info) 3060 { 3061 idm_so_timed_socket_t *itp = arg; 3062 ASSERT(itp != NULL); 3063 ASSERT(ev == KSOCKET_EV_CONNECTED || 3064 ev == KSOCKET_EV_CONNECTFAILED || 3065 ev == KSOCKET_EV_DISCONNECTED); 3066 3067 mutex_enter(&idm_so_timed_socket_mutex); 3068 itp->it_callback_called = B_TRUE; 3069 if (ev == KSOCKET_EV_CONNECTED) { 3070 itp->it_socket_error_code = 0; 3071 } else { 3072 /* Make sure the error code is non-zero on error */ 3073 if (info == 0) 3074 info = ECONNRESET; 3075 itp->it_socket_error_code = (int)info; 3076 } 3077 cv_signal(&itp->it_cv); 3078 mutex_exit(&idm_so_timed_socket_mutex); 3079 } 3080 3081 int 3082 idm_so_timed_socket_connect(ksocket_t ks, 3083 struct sockaddr_storage *sa, int sa_sz, int login_max_usec) 3084 { 3085 clock_t conn_login_max; 3086 int rc, nonblocking, rval; 3087 idm_so_timed_socket_t it; 3088 ksocket_callbacks_t ks_cb; 3089 3090 conn_login_max = ddi_get_lbolt() + drv_usectohz(login_max_usec); 3091 3092 /* 3093 * Set to non-block socket mode, with callback on connect 3094 * Early volo used "disconnected" instead of "connectfailed", 3095 * so set callback to look for both. 3096 */ 3097 bzero(&it, sizeof (it)); 3098 ks_cb.ksock_cb_flags = KSOCKET_CB_CONNECTED | 3099 KSOCKET_CB_CONNECTFAILED | KSOCKET_CB_DISCONNECTED; 3100 ks_cb.ksock_cb_connected = idm_so_timed_socket_connect_cb; 3101 ks_cb.ksock_cb_connectfailed = idm_so_timed_socket_connect_cb; 3102 ks_cb.ksock_cb_disconnected = idm_so_timed_socket_connect_cb; 3103 cv_init(&it.it_cv, NULL, CV_DEFAULT, NULL); 3104 rc = ksocket_setcallbacks(ks, &ks_cb, &it, CRED()); 3105 if (rc != 0) 3106 return (rc); 3107 3108 /* Set to non-blocking mode */ 3109 nonblocking = 1; 3110 rc = ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval, 3111 CRED()); 3112 if (rc != 0) 3113 goto cleanup; 3114 3115 bzero(&it, sizeof (it)); 3116 for (;;) { 3117 /* 3118 * Warning -- in a loopback scenario, the call to 3119 * the connect_cb can occur inside the call to 3120 * ksocket_connect. Do not hold the mutex around the 3121 * call to ksocket_connect. 3122 */ 3123 rc = ksocket_connect(ks, (struct sockaddr *)sa, sa_sz, CRED()); 3124 if (rc == 0 || rc == EISCONN) { 3125 /* socket success or already success */ 3126 rc = 0; 3127 break; 3128 } 3129 if ((rc != EINPROGRESS) && (rc != EALREADY)) { 3130 break; 3131 } 3132 3133 /* TCP connect still in progress. See if out of time. */ 3134 if (ddi_get_lbolt() > conn_login_max) { 3135 /* 3136 * Connection retry timeout, 3137 * failed connect to target. 3138 */ 3139 rc = ETIMEDOUT; 3140 break; 3141 } 3142 3143 /* 3144 * TCP connect still in progress. Sleep until callback. 3145 * Do NOT go to sleep if the callback already occurred! 3146 */ 3147 mutex_enter(&idm_so_timed_socket_mutex); 3148 if (!it.it_callback_called) { 3149 (void) cv_timedwait(&it.it_cv, 3150 &idm_so_timed_socket_mutex, conn_login_max); 3151 } 3152 if (it.it_callback_called) { 3153 rc = it.it_socket_error_code; 3154 mutex_exit(&idm_so_timed_socket_mutex); 3155 break; 3156 } 3157 /* If timer expires, go call ksocket_connect one last time. */ 3158 mutex_exit(&idm_so_timed_socket_mutex); 3159 } 3160 3161 /* resume blocking mode */ 3162 nonblocking = 0; 3163 (void) ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval, 3164 CRED()); 3165 cleanup: 3166 (void) ksocket_setcallbacks(ks, NULL, NULL, CRED()); 3167 cv_destroy(&it.it_cv); 3168 if (rc != 0) { 3169 idm_soshutdown(ks); 3170 } 3171 return (rc); 3172 } 3173 3174 3175 void 3176 idm_addr_to_sa(idm_addr_t *dportal, struct sockaddr_storage *sa) 3177 { 3178 int dp_addr_size; 3179 struct sockaddr_in *sin; 3180 struct sockaddr_in6 *sin6; 3181 3182 /* Build sockaddr_storage for this portal (idm_addr_t) */ 3183 bzero(sa, sizeof (*sa)); 3184 dp_addr_size = dportal->a_addr.i_insize; 3185 if (dp_addr_size == sizeof (struct in_addr)) { 3186 /* IPv4 */ 3187 sa->ss_family = AF_INET; 3188 sin = (struct sockaddr_in *)sa; 3189 sin->sin_port = htons(dportal->a_port); 3190 bcopy(&dportal->a_addr.i_addr.in4, 3191 &sin->sin_addr, sizeof (struct in_addr)); 3192 } else if (dp_addr_size == sizeof (struct in6_addr)) { 3193 /* IPv6 */ 3194 sa->ss_family = AF_INET6; 3195 sin6 = (struct sockaddr_in6 *)sa; 3196 sin6->sin6_port = htons(dportal->a_port); 3197 bcopy(&dportal->a_addr.i_addr.in6, 3198 &sin6->sin6_addr, sizeof (struct in6_addr)); 3199 } else { 3200 ASSERT(0); 3201 } 3202 } 3203 3204 3205 /* 3206 * return a human-readable form of a sockaddr_storage, in the form 3207 * [ip-address]:port. This is used in calls to logging functions. 3208 * If several calls to idm_sa_ntop are made within the same invocation 3209 * of a logging function, then each one needs its own buf. 3210 */ 3211 const char * 3212 idm_sa_ntop(const struct sockaddr_storage *sa, 3213 char *buf, size_t size) 3214 { 3215 static const char bogus_ip[] = "[0].-1"; 3216 char tmp[INET6_ADDRSTRLEN]; 3217 3218 switch (sa->ss_family) { 3219 case AF_INET6: { 3220 const struct sockaddr_in6 *in6 = 3221 (const struct sockaddr_in6 *) sa; 3222 3223 (void) inet_ntop(in6->sin6_family, &in6->sin6_addr, tmp, 3224 sizeof (tmp)); 3225 if (strlen(tmp) + sizeof ("[].65535") > size) 3226 goto err; 3227 /* struct sockaddr_storage gets port info from v4 loc */ 3228 (void) snprintf(buf, size, "[%s].%u", tmp, 3229 ntohs(in6->sin6_port)); 3230 return (buf); 3231 } 3232 case AF_INET: { 3233 const struct sockaddr_in *in = (const struct sockaddr_in *) sa; 3234 3235 (void) inet_ntop(in->sin_family, &in->sin_addr, tmp, 3236 sizeof (tmp)); 3237 if (strlen(tmp) + sizeof ("[].65535") > size) 3238 goto err; 3239 (void) snprintf(buf, size, "[%s].%u", tmp, 3240 ntohs(in->sin_port)); 3241 return (buf); 3242 } 3243 default: 3244 break; 3245 } 3246 err: 3247 (void) snprintf(buf, size, "%s", bogus_ip); 3248 return (buf); 3249 } 3250