1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/conf.h> 27 #include <sys/stat.h> 28 #include <sys/file.h> 29 #include <sys/ddi.h> 30 #include <sys/sunddi.h> 31 #include <sys/modctl.h> 32 #include <sys/priv.h> 33 #include <sys/cpuvar.h> 34 #include <sys/socket.h> 35 #include <sys/strsubr.h> 36 #include <sys/sysmacros.h> 37 #include <sys/sdt.h> 38 #include <netinet/tcp.h> 39 #include <inet/tcp.h> 40 #include <sys/socketvar.h> 41 #include <sys/pathname.h> 42 #include <sys/fs/snode.h> 43 #include <sys/fs/dv_node.h> 44 #include <sys/vnode.h> 45 #include <netinet/in.h> 46 #include <net/if.h> 47 #include <sys/sockio.h> 48 #include <sys/ksocket.h> 49 #include <sys/idm/idm.h> 50 #include <sys/idm/idm_so.h> 51 #include <sys/idm/idm_text.h> 52 53 #define IN_PROGRESS_DELAY 1 54 55 /* 56 * in6addr_any is currently all zeroes, but use the macro in case this 57 * ever changes. 58 */ 59 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; 60 61 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status); 62 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status); 63 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status); 64 65 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so); 66 static void idm_so_conn_destroy_common(idm_conn_t *ic); 67 static void idm_so_conn_connect_common(idm_conn_t *ic); 68 69 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc); 70 static void idm_set_ini_postconnect_options(idm_so_conn_t *sc); 71 static void idm_set_tgt_connect_options(ksocket_t so); 72 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu); 73 74 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu); 75 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, 76 idm_buf_t *idb, uint32_t offset, uint32_t length); 77 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb); 78 static idm_status_t idm_so_send_buf_region(idm_task_t *idt, 79 idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length); 80 81 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, 82 uint32_t ro, uint32_t dlength); 83 84 static idm_status_t idm_so_handle_digest(idm_conn_t *it, 85 nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx); 86 87 static void idm_so_socket_set_nonblock(struct sonode *node); 88 static void idm_so_socket_set_block(struct sonode *node); 89 90 /* 91 * Transport ops prototypes 92 */ 93 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu); 94 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb); 95 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb); 96 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu); 97 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu); 98 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu); 99 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt); 100 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it, 101 nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl); 102 static void idm_so_notice_key_values(idm_conn_t *it, 103 nvlist_t *negotiated_nvl); 104 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic, 105 idm_transport_caps_t *caps); 106 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen); 107 static void idm_so_buf_free(idm_buf_t *idb); 108 static idm_status_t idm_so_buf_setup(idm_buf_t *idb); 109 static void idm_so_buf_teardown(idm_buf_t *idb); 110 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is); 111 static void idm_so_tgt_svc_destroy(idm_svc_t *is); 112 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is); 113 static void idm_so_tgt_svc_offline(idm_svc_t *is); 114 static void idm_so_tgt_conn_destroy(idm_conn_t *ic); 115 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic); 116 static void idm_so_conn_disconnect(idm_conn_t *ic); 117 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic); 118 static void idm_so_ini_conn_destroy(idm_conn_t *ic); 119 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic); 120 121 /* 122 * IDM Native Sockets transport operations 123 */ 124 static 125 idm_transport_ops_t idm_so_transport_ops = { 126 idm_so_tx, /* it_tx_pdu */ 127 idm_so_buf_tx_to_ini, /* it_buf_tx_to_ini */ 128 idm_so_buf_rx_from_ini, /* it_buf_rx_from_ini */ 129 idm_so_rx_datain, /* it_rx_datain */ 130 idm_so_rx_rtt, /* it_rx_rtt */ 131 idm_so_rx_dataout, /* it_rx_dataout */ 132 NULL, /* it_alloc_conn_rsrc */ 133 NULL, /* it_free_conn_rsrc */ 134 NULL, /* it_tgt_enable_datamover */ 135 NULL, /* it_ini_enable_datamover */ 136 NULL, /* it_conn_terminate */ 137 idm_so_free_task_rsrc, /* it_free_task_rsrc */ 138 idm_so_negotiate_key_values, /* it_negotiate_key_values */ 139 idm_so_notice_key_values, /* it_notice_key_values */ 140 idm_so_conn_is_capable, /* it_conn_is_capable */ 141 idm_so_buf_alloc, /* it_buf_alloc */ 142 idm_so_buf_free, /* it_buf_free */ 143 idm_so_buf_setup, /* it_buf_setup */ 144 idm_so_buf_teardown, /* it_buf_teardown */ 145 idm_so_tgt_svc_create, /* it_tgt_svc_create */ 146 idm_so_tgt_svc_destroy, /* it_tgt_svc_destroy */ 147 idm_so_tgt_svc_online, /* it_tgt_svc_online */ 148 idm_so_tgt_svc_offline, /* it_tgt_svc_offline */ 149 idm_so_tgt_conn_destroy, /* it_tgt_conn_destroy */ 150 idm_so_tgt_conn_connect, /* it_tgt_conn_connect */ 151 idm_so_conn_disconnect, /* it_tgt_conn_disconnect */ 152 idm_so_ini_conn_create, /* it_ini_conn_create */ 153 idm_so_ini_conn_destroy, /* it_ini_conn_destroy */ 154 idm_so_ini_conn_connect, /* it_ini_conn_connect */ 155 idm_so_conn_disconnect /* it_ini_conn_disconnect */ 156 }; 157 158 /* 159 * idm_so_init() 160 * Sockets transport initialization 161 */ 162 void 163 idm_so_init(idm_transport_t *it) 164 { 165 /* Cache for IDM Data and R2T Transmit PDU's */ 166 idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache", 167 sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8, 168 &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP); 169 170 /* Cache for IDM Receive PDU's */ 171 idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache", 172 sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8, 173 &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP); 174 175 /* 128k buffer cache */ 176 idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache", 177 IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP); 178 179 /* Set the sockets transport ops */ 180 it->it_ops = &idm_so_transport_ops; 181 } 182 183 /* 184 * idm_so_fini() 185 * Sockets transport teardown 186 */ 187 void 188 idm_so_fini(void) 189 { 190 kmem_cache_destroy(idm.idm_so_128k_buf_cache); 191 kmem_cache_destroy(idm.idm_sotx_pdu_cache); 192 kmem_cache_destroy(idm.idm_sorx_pdu_cache); 193 } 194 195 ksocket_t 196 idm_socreate(int domain, int type, int protocol) 197 { 198 ksocket_t ks; 199 200 if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP, 201 CRED())) { 202 return (ks); 203 } else { 204 return (NULL); 205 } 206 } 207 208 /* 209 * idm_soshutdown will disconnect the socket and prevent subsequent PDU 210 * reception and transmission. The sonode still exists but its state 211 * gets modified to indicate it is no longer connected. Calls to 212 * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used 213 * regain control of a thread stuck in idm_sorecv. 214 */ 215 void 216 idm_soshutdown(ksocket_t so) 217 { 218 (void) ksocket_shutdown(so, SHUT_RDWR, CRED()); 219 } 220 221 /* 222 * idm_sodestroy releases all resources associated with a socket previously 223 * created with idm_socreate. The socket must be shutdown using 224 * idm_soshutdown before the socket is destroyed with idm_sodestroy, 225 * otherwise undefined behavior will result. 226 */ 227 void 228 idm_sodestroy(ksocket_t ks) 229 { 230 (void) ksocket_close(ks, CRED()); 231 } 232 233 /* 234 * Function to compare two addresses in sockaddr_storage format 235 */ 236 237 int 238 idm_ss_compare(const struct sockaddr_storage *cmp_ss1, 239 const struct sockaddr_storage *cmp_ss2, 240 boolean_t v4_mapped_as_v4) 241 { 242 struct sockaddr_storage mapped_v4_ss1, mapped_v4_ss2; 243 const struct sockaddr_storage *ss1, *ss2; 244 struct in_addr *in1, *in2; 245 struct in6_addr *in61, *in62; 246 int i; 247 248 /* 249 * Normalize V4-mapped IPv6 addresses into V4 format if 250 * v4_mapped_as_v4 is B_TRUE. 251 */ 252 ss1 = cmp_ss1; 253 ss2 = cmp_ss2; 254 if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) { 255 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr; 256 if (IN6_IS_ADDR_V4MAPPED(in61)) { 257 bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1)); 258 mapped_v4_ss1.ss_family = AF_INET; 259 ((struct sockaddr_in *)&mapped_v4_ss1)->sin_port = 260 ((struct sockaddr_in *)ss1)->sin_port; 261 IN6_V4MAPPED_TO_INADDR(in61, 262 &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr); 263 ss1 = &mapped_v4_ss1; 264 } 265 } 266 ss2 = cmp_ss2; 267 if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) { 268 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr; 269 if (IN6_IS_ADDR_V4MAPPED(in62)) { 270 bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2)); 271 mapped_v4_ss2.ss_family = AF_INET; 272 ((struct sockaddr_in *)&mapped_v4_ss2)->sin_port = 273 ((struct sockaddr_in *)ss2)->sin_port; 274 IN6_V4MAPPED_TO_INADDR(in62, 275 &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr); 276 ss2 = &mapped_v4_ss2; 277 } 278 } 279 280 /* 281 * Compare ports, then address family, then ip address 282 */ 283 if (((struct sockaddr_in *)ss1)->sin_port != 284 ((struct sockaddr_in *)ss2)->sin_port) { 285 if (((struct sockaddr_in *)ss1)->sin_port > 286 ((struct sockaddr_in *)ss2)->sin_port) 287 return (1); 288 else 289 return (-1); 290 } 291 292 /* 293 * ports are the same 294 */ 295 if (ss1->ss_family != ss2->ss_family) { 296 if (ss1->ss_family == AF_INET) 297 return (1); 298 else 299 return (-1); 300 } 301 302 /* 303 * address families are the same 304 */ 305 if (ss1->ss_family == AF_INET) { 306 in1 = &((struct sockaddr_in *)ss1)->sin_addr; 307 in2 = &((struct sockaddr_in *)ss2)->sin_addr; 308 309 if (in1->s_addr > in2->s_addr) 310 return (1); 311 else if (in1->s_addr < in2->s_addr) 312 return (-1); 313 else 314 return (0); 315 } else if (ss1->ss_family == AF_INET6) { 316 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr; 317 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr; 318 319 for (i = 0; i < 4; i++) { 320 if (in61->s6_addr32[i] > in62->s6_addr32[i]) 321 return (1); 322 else if (in61->s6_addr32[i] < in62->s6_addr32[i]) 323 return (-1); 324 } 325 return (0); 326 } 327 328 return (1); 329 } 330 331 /* 332 * IP address filter functions to flag addresses that should not 333 * go out to initiators through discovery. 334 */ 335 static boolean_t 336 idm_v4_addr_okay(struct in_addr *in_addr) 337 { 338 in_addr_t addr = ntohl(in_addr->s_addr); 339 340 if ((INADDR_NONE == addr) || 341 (IN_MULTICAST(addr)) || 342 ((addr >> IN_CLASSA_NSHIFT) == 0) || 343 ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) { 344 return (B_FALSE); 345 } 346 return (B_TRUE); 347 } 348 349 static boolean_t 350 idm_v6_addr_okay(struct in6_addr *addr6) 351 { 352 353 if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) || 354 (IN6_IS_ADDR_LOOPBACK(addr6)) || 355 (IN6_IS_ADDR_MULTICAST(addr6)) || 356 (IN6_IS_ADDR_V4MAPPED(addr6)) || 357 (IN6_IS_ADDR_V4COMPAT(addr6)) || 358 (IN6_IS_ADDR_LINKLOCAL(addr6))) { 359 return (B_FALSE); 360 } 361 return (B_TRUE); 362 } 363 364 /* 365 * idm_get_ipaddr will retrieve a list of IP Addresses which the host is 366 * configured with by sending down a sequence of kernel ioctl to IP STREAMS. 367 */ 368 int 369 idm_get_ipaddr(idm_addr_list_t **ipaddr_p) 370 { 371 ksocket_t so4, so6; 372 struct lifnum lifn; 373 struct lifconf lifc; 374 struct lifreq *lp; 375 int rval; 376 int numifs; 377 int bufsize; 378 void *buf; 379 int i, j, n, rc; 380 struct sockaddr_storage ss; 381 struct sockaddr_in *sin; 382 struct sockaddr_in6 *sin6; 383 idm_addr_t *ip; 384 idm_addr_list_t *ipaddr; 385 int size_ipaddr; 386 387 *ipaddr_p = NULL; 388 size_ipaddr = 0; 389 buf = NULL; 390 391 /* create an ipv4 and ipv6 UDP socket */ 392 if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL) 393 return (0); 394 if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) { 395 idm_sodestroy(so6); 396 return (0); 397 } 398 399 400 retry_count: 401 /* snapshot the current number of interfaces */ 402 lifn.lifn_family = PF_UNSPEC; 403 lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES; 404 lifn.lifn_count = 0; 405 /* use vp6 for ioctls with unspecified families by default */ 406 if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED()) 407 != 0) { 408 goto cleanup; 409 } 410 411 numifs = lifn.lifn_count; 412 if (numifs <= 0) { 413 goto cleanup; 414 } 415 416 /* allocate extra room in case more interfaces appear */ 417 numifs += 10; 418 419 /* get the interface names and ip addresses */ 420 bufsize = numifs * sizeof (struct lifreq); 421 buf = kmem_alloc(bufsize, KM_SLEEP); 422 423 lifc.lifc_family = AF_UNSPEC; 424 lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES; 425 lifc.lifc_len = bufsize; 426 lifc.lifc_buf = buf; 427 rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED()); 428 if (rc != 0) { 429 goto cleanup; 430 } 431 /* if our extra room is used up, try again */ 432 if (bufsize <= lifc.lifc_len) { 433 kmem_free(buf, bufsize); 434 buf = NULL; 435 goto retry_count; 436 } 437 /* calc actual number of ifconfs */ 438 n = lifc.lifc_len / sizeof (struct lifreq); 439 440 /* get ip address */ 441 if (n > 0) { 442 size_ipaddr = sizeof (idm_addr_list_t) + 443 (n - 1) * sizeof (idm_addr_t); 444 ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP); 445 } else { 446 goto cleanup; 447 } 448 449 /* 450 * Examine the array of interfaces and filter uninteresting ones 451 */ 452 for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) { 453 454 /* 455 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive 456 */ 457 ss = lp->lifr_addr; 458 /* 459 * fetch the flags using the socket of the correct family 460 */ 461 switch (ss.ss_family) { 462 case AF_INET: 463 rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp, 464 &rval, CRED()); 465 break; 466 case AF_INET6: 467 rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp, 468 &rval, CRED()); 469 break; 470 default: 471 continue; 472 } 473 if (rc == 0) { 474 /* 475 * If we got the flags, skip uninteresting 476 * interfaces based on flags 477 */ 478 if ((lp->lifr_flags & IFF_UP) != IFF_UP) 479 continue; 480 if (lp->lifr_flags & 481 (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED)) 482 continue; 483 } 484 485 /* save ip address */ 486 ip = &ipaddr->al_addrs[j]; 487 switch (ss.ss_family) { 488 case AF_INET: 489 sin = (struct sockaddr_in *)&ss; 490 if (!idm_v4_addr_okay(&sin->sin_addr)) 491 continue; 492 ip->a_addr.i_addr.in4 = sin->sin_addr; 493 ip->a_addr.i_insize = sizeof (struct in_addr); 494 break; 495 case AF_INET6: 496 sin6 = (struct sockaddr_in6 *)&ss; 497 if (!idm_v6_addr_okay(&sin6->sin6_addr)) 498 continue; 499 ip->a_addr.i_addr.in6 = sin6->sin6_addr; 500 ip->a_addr.i_insize = sizeof (struct in6_addr); 501 break; 502 default: 503 continue; 504 } 505 j++; 506 } 507 508 if (j == 0) { 509 /* no valid ifaddr */ 510 kmem_free(ipaddr, size_ipaddr); 511 size_ipaddr = 0; 512 ipaddr = NULL; 513 } else { 514 ipaddr->al_out_cnt = j; 515 } 516 517 518 cleanup: 519 idm_sodestroy(so6); 520 idm_sodestroy(so4); 521 522 if (buf != NULL) 523 kmem_free(buf, bufsize); 524 525 *ipaddr_p = ipaddr; 526 return (size_ipaddr); 527 } 528 529 int 530 idm_sorecv(ksocket_t so, void *msg, size_t len) 531 { 532 iovec_t iov; 533 534 ASSERT(so != NULL); 535 ASSERT(len != 0); 536 537 /* 538 * Fill in iovec and receive data 539 */ 540 iov.iov_base = msg; 541 iov.iov_len = len; 542 543 return (idm_iov_sorecv(so, &iov, 1, len)); 544 } 545 546 /* 547 * idm_sosendto - Sends a buffered data on a non-connected socket. 548 * 549 * This function puts the data provided on the wire by calling sosendmsg. 550 * It will return only when all the data has been sent or if an error 551 * occurs. 552 * 553 * Returns 0 for success, the socket errno value if sosendmsg fails, and 554 * -1 if sosendmsg returns success but uio_resid != 0 555 */ 556 int 557 idm_sosendto(ksocket_t so, void *buff, size_t len, 558 struct sockaddr *name, socklen_t namelen) 559 { 560 struct msghdr msg; 561 struct iovec iov[1]; 562 int error; 563 size_t sent = 0; 564 565 iov[0].iov_base = buff; 566 iov[0].iov_len = len; 567 568 /* Initialization of the message header. */ 569 bzero(&msg, sizeof (msg)); 570 msg.msg_iov = iov; 571 msg.msg_iovlen = 1; 572 msg.msg_name = name; 573 msg.msg_namelen = namelen; 574 575 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) { 576 /* Data sent */ 577 if (sent == len) { 578 /* All data sent. Success. */ 579 return (0); 580 } else { 581 /* Not all data was sent. Failure */ 582 return (-1); 583 } 584 } 585 586 /* Send failed */ 587 return (error); 588 } 589 590 /* 591 * idm_iov_sosend - Sends an iovec on a connection. 592 * 593 * This function puts the data provided on the wire by calling sosendmsg. 594 * It will return only when all the data has been sent or if an error 595 * occurs. 596 * 597 * Returns 0 for success, the socket errno value if sosendmsg fails, and 598 * -1 if sosendmsg returns success but uio_resid != 0 599 */ 600 int 601 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len) 602 { 603 struct msghdr msg; 604 int error; 605 size_t sent = 0; 606 607 ASSERT(iop != NULL); 608 609 /* Initialization of the message header. */ 610 bzero(&msg, sizeof (msg)); 611 msg.msg_iov = iop; 612 msg.msg_iovlen = iovlen; 613 614 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) 615 == 0) { 616 /* Data sent */ 617 if (sent == total_len) { 618 /* All data sent. Success. */ 619 return (0); 620 } else { 621 /* Not all data was sent. Failure */ 622 return (-1); 623 } 624 } 625 626 /* Send failed */ 627 return (error); 628 } 629 630 /* 631 * idm_iov_sorecv - Receives an iovec from a connection 632 * 633 * This function gets the data asked for from the socket. It will return 634 * only when all the requested data has been retrieved or if an error 635 * occurs. 636 * 637 * Returns 0 for success, the socket errno value if sorecvmsg fails, and 638 * -1 if sorecvmsg returns success but uio_resid != 0 639 */ 640 int 641 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len) 642 { 643 struct msghdr msg; 644 int error; 645 size_t recv; 646 int flags; 647 648 ASSERT(iop != NULL); 649 650 /* Initialization of the message header. */ 651 bzero(&msg, sizeof (msg)); 652 msg.msg_iov = iop; 653 msg.msg_iovlen = iovlen; 654 flags = MSG_WAITALL; 655 656 if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED())) 657 == 0) { 658 /* Received data */ 659 if (recv == total_len) { 660 /* All requested data received. Success */ 661 return (0); 662 } else { 663 /* 664 * Not all data was received. The connection has 665 * probably failed. 666 */ 667 return (-1); 668 } 669 } 670 671 /* Receive failed */ 672 return (error); 673 } 674 675 static void 676 idm_set_ini_preconnect_options(idm_so_conn_t *sc) 677 { 678 int conn_abort = 10000; 679 int conn_notify = 2000; 680 int abort = 30000; 681 682 /* Pre-connect socket options */ 683 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, 684 TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int), 685 CRED()); 686 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, 687 TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int), 688 CRED()); 689 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, TCP_ABORT_THRESHOLD, 690 (char *)&abort, sizeof (int), CRED()); 691 } 692 693 static void 694 idm_set_ini_postconnect_options(idm_so_conn_t *sc) 695 { 696 int32_t rcvbuf = IDM_RCVBUF_SIZE; 697 int32_t sndbuf = IDM_SNDBUF_SIZE; 698 const int on = 1; 699 700 /* Set postconnect options */ 701 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, TCP_NODELAY, 702 (char *)&on, sizeof (int), CRED()); 703 (void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_RCVBUF, 704 (char *)&rcvbuf, sizeof (int), CRED()); 705 (void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_SNDBUF, 706 (char *)&sndbuf, sizeof (int), CRED()); 707 } 708 709 static void 710 idm_set_tgt_connect_options(ksocket_t ks) 711 { 712 int32_t rcvbuf = IDM_RCVBUF_SIZE; 713 int32_t sndbuf = IDM_SNDBUF_SIZE; 714 const int on = 1; 715 716 /* Set connect options */ 717 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF, 718 (char *)&rcvbuf, sizeof (int), CRED()); 719 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF, 720 (char *)&sndbuf, sizeof (int), CRED()); 721 (void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY, 722 (char *)&on, sizeof (on), CRED()); 723 } 724 725 static uint32_t 726 n2h24(const uchar_t *ptr) 727 { 728 return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]); 729 } 730 731 732 static idm_status_t 733 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu) 734 { 735 iscsi_hdr_t *bhs; 736 uint32_t hdr_digest_crc; 737 uint32_t crc_calculated; 738 void *new_hdr; 739 int ahslen = 0; 740 int total_len = 0; 741 int iovlen = 0; 742 struct iovec iov[2]; 743 idm_so_conn_t *so_conn; 744 int rc; 745 746 so_conn = ic->ic_transport_private; 747 748 /* 749 * Read BHS 750 */ 751 bhs = pdu->isp_hdr; 752 rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t)); 753 if (rc != IDM_STATUS_SUCCESS) { 754 return (IDM_STATUS_FAIL); 755 } 756 757 /* 758 * Check actual AHS length against the amount available in the buffer 759 */ 760 pdu->isp_hdrlen = sizeof (iscsi_hdr_t) + 761 (bhs->hlength * sizeof (uint32_t)); 762 pdu->isp_datalen = n2h24(bhs->dlength); 763 if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) { 764 /* Allocate a new header segment and change the callback */ 765 new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP); 766 bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t)); 767 pdu->isp_hdr = new_hdr; 768 pdu->isp_flags |= IDM_PDU_ADDL_HDR; 769 770 /* 771 * This callback will restore the expected values after 772 * the RX PDU has been processed. 773 */ 774 pdu->isp_callback = idm_sorx_addl_pdu_cb; 775 } 776 777 /* 778 * Setup receipt of additional header and header digest (if enabled). 779 */ 780 if (bhs->hlength > 0) { 781 iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1); 782 ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t); 783 iov[iovlen].iov_len = ahslen; 784 total_len += iov[iovlen].iov_len; 785 iovlen++; 786 } 787 788 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) { 789 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc; 790 iov[iovlen].iov_len = sizeof (hdr_digest_crc); 791 total_len += iov[iovlen].iov_len; 792 iovlen++; 793 } 794 795 if ((iovlen != 0) && 796 (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen, 797 total_len) != 0)) { 798 return (IDM_STATUS_FAIL); 799 } 800 801 /* 802 * Validate header digest if enabled 803 */ 804 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) { 805 crc_calculated = idm_crc32c(pdu->isp_hdr, 806 sizeof (iscsi_hdr_t) + ahslen); 807 if (crc_calculated != hdr_digest_crc) { 808 /* Invalid Header Digest */ 809 return (IDM_STATUS_HEADER_DIGEST); 810 } 811 } 812 813 return (0); 814 } 815 816 /* 817 * idm_so_ini_conn_create() 818 * Allocate the sockets transport connection resources. 819 */ 820 static idm_status_t 821 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic) 822 { 823 ksocket_t so; 824 idm_so_conn_t *so_conn; 825 idm_status_t idmrc; 826 827 so = idm_socreate(cr->cr_domain, cr->cr_type, 828 cr->cr_protocol); 829 if (so == NULL) { 830 return (IDM_STATUS_FAIL); 831 } 832 833 /* Bind the socket if configured to do so */ 834 if (cr->cr_bound) { 835 if (ksocket_bind(so, &cr->cr_bound_addr.sin, 836 SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) { 837 idm_sodestroy(so); 838 return (IDM_STATUS_FAIL); 839 } 840 } 841 842 idmrc = idm_so_conn_create_common(ic, so); 843 if (idmrc != IDM_STATUS_SUCCESS) { 844 idm_soshutdown(so); 845 idm_sodestroy(so); 846 return (IDM_STATUS_FAIL); 847 } 848 849 so_conn = ic->ic_transport_private; 850 /* Set up socket options */ 851 idm_set_ini_preconnect_options(so_conn); 852 853 return (IDM_STATUS_SUCCESS); 854 } 855 856 /* 857 * idm_so_ini_conn_destroy() 858 * Tear down the sockets transport connection resources. 859 */ 860 static void 861 idm_so_ini_conn_destroy(idm_conn_t *ic) 862 { 863 idm_so_conn_destroy_common(ic); 864 } 865 866 /* 867 * idm_so_ini_conn_connect() 868 * Establish the connection referred to by the handle previously allocated via 869 * idm_so_ini_conn_create(). 870 */ 871 static idm_status_t 872 idm_so_ini_conn_connect(idm_conn_t *ic) 873 { 874 idm_so_conn_t *so_conn; 875 struct sonode *node = NULL; 876 int rc; 877 clock_t lbolt, conn_login_max, conn_login_interval; 878 boolean_t nonblock; 879 880 so_conn = ic->ic_transport_private; 881 nonblock = ic->ic_conn_params.nonblock_socket; 882 conn_login_max = ic->ic_conn_params.conn_login_max; 883 conn_login_interval = ddi_get_lbolt() + 884 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval); 885 886 if (nonblock == B_TRUE) { 887 node = ((struct sonode *)(so_conn->ic_so)); 888 /* Set to none block socket mode */ 889 idm_so_socket_set_nonblock(node); 890 do { 891 rc = ksocket_connect(so_conn->ic_so, 892 &ic->ic_ini_dst_addr.sin, 893 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), 894 CRED()); 895 if (rc == 0 || rc == EISCONN) { 896 /* socket success or already success */ 897 rc = IDM_STATUS_SUCCESS; 898 break; 899 } 900 if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) || 901 (rc == ECONNRESET)) { 902 /* socket connection timeout or refuse */ 903 break; 904 } 905 lbolt = ddi_get_lbolt(); 906 if (lbolt > conn_login_max) { 907 /* 908 * Connection retry timeout, 909 * failed connect to target. 910 */ 911 break; 912 } 913 if (lbolt < conn_login_interval) { 914 if ((rc == EINPROGRESS) || (rc == EALREADY)) { 915 /* TCP connect still in progress */ 916 delay(SEC_TO_TICK(IN_PROGRESS_DELAY)); 917 continue; 918 } else { 919 delay(conn_login_interval - lbolt); 920 } 921 } 922 conn_login_interval = ddi_get_lbolt() + 923 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval); 924 } while (rc != 0); 925 /* resume to nonblock mode */ 926 if (rc == IDM_STATUS_SUCCESS) { 927 idm_so_socket_set_block(node); 928 } 929 } else { 930 rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin, 931 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED()); 932 } 933 934 if (rc != 0) { 935 idm_soshutdown(so_conn->ic_so); 936 return (IDM_STATUS_FAIL); 937 } 938 939 idm_so_conn_connect_common(ic); 940 941 idm_set_ini_postconnect_options(so_conn); 942 943 return (IDM_STATUS_SUCCESS); 944 } 945 946 idm_status_t 947 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so) 948 { 949 idm_status_t idmrc; 950 951 idmrc = idm_so_conn_create_common(ic, new_so); 952 953 return (idmrc); 954 } 955 956 static void 957 idm_so_tgt_conn_destroy(idm_conn_t *ic) 958 { 959 idm_so_conn_destroy_common(ic); 960 } 961 962 /* 963 * idm_so_tgt_conn_connect() 964 * Establish the connection in ic, passed from idm_tgt_conn_finish(), which 965 * is invoked from the SM as a result of an inbound connection request. 966 */ 967 static idm_status_t 968 idm_so_tgt_conn_connect(idm_conn_t *ic) 969 { 970 idm_so_conn_connect_common(ic); 971 972 return (IDM_STATUS_SUCCESS); 973 } 974 975 static idm_status_t 976 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so) 977 { 978 idm_so_conn_t *so_conn; 979 980 so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP); 981 so_conn->ic_so = new_so; 982 983 ic->ic_transport_private = so_conn; 984 ic->ic_transport_hdrlen = 0; 985 986 /* Set the scoreboarding flag on this connection */ 987 ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD; 988 989 /* 990 * Initialize tx thread mutex and list 991 */ 992 mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL); 993 cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL); 994 list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t), 995 offsetof(idm_pdu_t, idm_tx_link)); 996 997 return (IDM_STATUS_SUCCESS); 998 } 999 1000 static void 1001 idm_so_conn_destroy_common(idm_conn_t *ic) 1002 { 1003 idm_so_conn_t *so_conn = ic->ic_transport_private; 1004 1005 ic->ic_transport_private = NULL; 1006 idm_sodestroy(so_conn->ic_so); 1007 list_destroy(&so_conn->ic_tx_list); 1008 mutex_destroy(&so_conn->ic_tx_mutex); 1009 cv_destroy(&so_conn->ic_tx_cv); 1010 1011 kmem_free(so_conn, sizeof (idm_so_conn_t)); 1012 } 1013 1014 static void 1015 idm_so_conn_connect_common(idm_conn_t *ic) 1016 { 1017 idm_so_conn_t *so_conn; 1018 struct sockaddr_in6 t_addr; 1019 socklen_t t_addrlen = 0; 1020 1021 so_conn = ic->ic_transport_private; 1022 bzero(&t_addr, sizeof (struct sockaddr_in6)); 1023 t_addrlen = sizeof (struct sockaddr_in6); 1024 1025 /* Set the local and remote addresses in the idm conn handle */ 1026 ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr, 1027 &t_addrlen, CRED()); 1028 bcopy(&t_addr, &ic->ic_laddr, t_addrlen); 1029 ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr, 1030 &t_addrlen, CRED()); 1031 bcopy(&t_addr, &ic->ic_raddr, t_addrlen); 1032 1033 mutex_enter(&ic->ic_mutex); 1034 so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0, 1035 &p0, TS_RUN, minclsyspri); 1036 so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0, 1037 &p0, TS_RUN, minclsyspri); 1038 1039 while (!so_conn->ic_rx_thread_running || !so_conn->ic_tx_thread_running) 1040 cv_wait(&ic->ic_cv, &ic->ic_mutex); 1041 mutex_exit(&ic->ic_mutex); 1042 } 1043 1044 /* 1045 * idm_so_conn_disconnect() 1046 * Shutdown the socket connection and stop the thread 1047 */ 1048 static void 1049 idm_so_conn_disconnect(idm_conn_t *ic) 1050 { 1051 idm_so_conn_t *so_conn; 1052 1053 so_conn = ic->ic_transport_private; 1054 1055 mutex_enter(&ic->ic_mutex); 1056 so_conn->ic_rx_thread_running = B_FALSE; 1057 so_conn->ic_tx_thread_running = B_FALSE; 1058 /* We need to wakeup the TX thread */ 1059 mutex_enter(&so_conn->ic_tx_mutex); 1060 cv_signal(&so_conn->ic_tx_cv); 1061 mutex_exit(&so_conn->ic_tx_mutex); 1062 mutex_exit(&ic->ic_mutex); 1063 1064 /* This should wakeup the RX thread if it is sleeping */ 1065 idm_soshutdown(so_conn->ic_so); 1066 1067 thread_join(so_conn->ic_tx_thread_did); 1068 thread_join(so_conn->ic_rx_thread_did); 1069 } 1070 1071 /* 1072 * idm_so_tgt_svc_create() 1073 * Establish a service on an IP address and port. idm_svc_req_t contains 1074 * the service parameters. 1075 */ 1076 /*ARGSUSED*/ 1077 static idm_status_t 1078 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is) 1079 { 1080 idm_so_svc_t *so_svc; 1081 1082 so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP); 1083 1084 /* Set the new sockets service in svc handle */ 1085 is->is_so_svc = (void *)so_svc; 1086 1087 return (IDM_STATUS_SUCCESS); 1088 } 1089 1090 /* 1091 * idm_so_tgt_svc_destroy() 1092 * Teardown sockets resources allocated in idm_so_tgt_svc_create() 1093 */ 1094 static void 1095 idm_so_tgt_svc_destroy(idm_svc_t *is) 1096 { 1097 /* the socket will have been torn down; free the service */ 1098 kmem_free(is->is_so_svc, sizeof (idm_so_svc_t)); 1099 } 1100 1101 /* 1102 * idm_so_tgt_svc_online() 1103 * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create() 1104 */ 1105 1106 static idm_status_t 1107 idm_so_tgt_svc_online(idm_svc_t *is) 1108 { 1109 idm_so_svc_t *so_svc; 1110 idm_svc_req_t *sr = &is->is_svc_req; 1111 struct sockaddr_in6 sin6_ip; 1112 const uint32_t on = 1; 1113 const uint32_t off = 0; 1114 1115 mutex_enter(&is->is_mutex); 1116 so_svc = (idm_so_svc_t *)is->is_so_svc; 1117 1118 /* 1119 * Try creating an IPv6 socket first 1120 */ 1121 if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) { 1122 mutex_exit(&is->is_mutex); 1123 return (IDM_STATUS_FAIL); 1124 } else { 1125 bzero(&sin6_ip, sizeof (sin6_ip)); 1126 sin6_ip.sin6_family = AF_INET6; 1127 sin6_ip.sin6_port = htons(sr->sr_port); 1128 sin6_ip.sin6_addr = in6addr_any; 1129 1130 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET, 1131 SO_REUSEADDR, (char *)&on, sizeof (on), CRED()); 1132 /* 1133 * Turn off SO_MAC_EXEMPT so future sobinds succeed 1134 */ 1135 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET, 1136 SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED()); 1137 1138 if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip, 1139 sizeof (sin6_ip), CRED()) != 0) { 1140 mutex_exit(&is->is_mutex); 1141 idm_sodestroy(so_svc->is_so); 1142 return (IDM_STATUS_FAIL); 1143 } 1144 } 1145 1146 idm_set_tgt_connect_options(so_svc->is_so); 1147 1148 if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) { 1149 mutex_exit(&is->is_mutex); 1150 idm_soshutdown(so_svc->is_so); 1151 idm_sodestroy(so_svc->is_so); 1152 return (IDM_STATUS_FAIL); 1153 } 1154 1155 /* Launch a watch thread */ 1156 so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher, 1157 is, 0, &p0, TS_RUN, minclsyspri); 1158 1159 if (so_svc->is_thread == NULL) { 1160 /* Failure to launch; teardown the socket */ 1161 mutex_exit(&is->is_mutex); 1162 idm_soshutdown(so_svc->is_so); 1163 idm_sodestroy(so_svc->is_so); 1164 return (IDM_STATUS_FAIL); 1165 } 1166 ksocket_hold(so_svc->is_so); 1167 /* Wait for the port watcher thread to start */ 1168 while (!so_svc->is_thread_running) 1169 cv_wait(&is->is_cv, &is->is_mutex); 1170 mutex_exit(&is->is_mutex); 1171 1172 return (IDM_STATUS_SUCCESS); 1173 } 1174 1175 /* 1176 * idm_so_tgt_svc_offline 1177 * 1178 * Stop listening on the IP address and port identified by idm_svc_t. 1179 */ 1180 static void 1181 idm_so_tgt_svc_offline(idm_svc_t *is) 1182 { 1183 idm_so_svc_t *so_svc; 1184 mutex_enter(&is->is_mutex); 1185 so_svc = (idm_so_svc_t *)is->is_so_svc; 1186 so_svc->is_thread_running = B_FALSE; 1187 mutex_exit(&is->is_mutex); 1188 1189 /* 1190 * Teardown socket 1191 */ 1192 idm_sodestroy(so_svc->is_so); 1193 1194 /* 1195 * Now we expect the port watcher thread to terminate 1196 */ 1197 thread_join(so_svc->is_thread_did); 1198 } 1199 1200 /* 1201 * Watch thread for target service connection establishment. 1202 */ 1203 void 1204 idm_so_svc_port_watcher(void *arg) 1205 { 1206 idm_svc_t *svc = arg; 1207 ksocket_t new_so; 1208 idm_conn_t *ic; 1209 idm_status_t idmrc; 1210 idm_so_svc_t *so_svc; 1211 int rc; 1212 const uint32_t off = 0; 1213 struct sockaddr_in6 t_addr; 1214 socklen_t t_addrlen; 1215 1216 bzero(&t_addr, sizeof (struct sockaddr_in6)); 1217 t_addrlen = sizeof (struct sockaddr_in6); 1218 mutex_enter(&svc->is_mutex); 1219 1220 so_svc = svc->is_so_svc; 1221 so_svc->is_thread_running = B_TRUE; 1222 so_svc->is_thread_did = so_svc->is_thread->t_did; 1223 1224 cv_signal(&svc->is_cv); 1225 1226 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc, 1227 svc->is_svc_req.sr_port); 1228 1229 while (so_svc->is_thread_running) { 1230 mutex_exit(&svc->is_mutex); 1231 1232 if ((rc = ksocket_accept(so_svc->is_so, 1233 (struct sockaddr *)&t_addr, &t_addrlen, 1234 &new_so, CRED())) != 0) { 1235 mutex_enter(&svc->is_mutex); 1236 if (rc == ECONNABORTED) 1237 continue; 1238 /* Connection problem */ 1239 break; 1240 } 1241 /* 1242 * Turn off SO_MAC_EXEMPT so future sobinds succeed 1243 */ 1244 (void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT, 1245 (char *)&off, sizeof (off), CRED()); 1246 1247 idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS, 1248 &ic); 1249 if (idmrc != IDM_STATUS_SUCCESS) { 1250 /* Drop connection */ 1251 idm_soshutdown(new_so); 1252 idm_sodestroy(new_so); 1253 mutex_enter(&svc->is_mutex); 1254 continue; 1255 } 1256 1257 idmrc = idm_so_tgt_conn_create(ic, new_so); 1258 if (idmrc != IDM_STATUS_SUCCESS) { 1259 idm_svc_conn_destroy(ic); 1260 idm_soshutdown(new_so); 1261 idm_sodestroy(new_so); 1262 mutex_enter(&svc->is_mutex); 1263 continue; 1264 } 1265 1266 /* 1267 * Kick the state machine. At CS_S3_XPT_UP the state machine 1268 * will notify the client (target) about the new connection. 1269 */ 1270 idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL); 1271 1272 mutex_enter(&svc->is_mutex); 1273 } 1274 ksocket_rele(so_svc->is_so); 1275 so_svc->is_thread_running = B_FALSE; 1276 mutex_exit(&svc->is_mutex); 1277 1278 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc, 1279 svc->is_svc_req.sr_port); 1280 1281 thread_exit(); 1282 } 1283 1284 /* 1285 * idm_so_free_task_rsrc() stops any ongoing processing of the task and 1286 * frees resources associated with the task. 1287 * 1288 * It's not clear that this should return idm_status_t. What do we do 1289 * if it fails? 1290 */ 1291 static idm_status_t 1292 idm_so_free_task_rsrc(idm_task_t *idt) 1293 { 1294 idm_buf_t *idb; 1295 1296 /* 1297 * There is nothing to cleanup on initiator connections 1298 */ 1299 if (IDM_CONN_ISINI(idt->idt_ic)) 1300 return (IDM_STATUS_SUCCESS); 1301 1302 /* 1303 * If this is a target connection, call idm_buf_rx_from_ini_done for 1304 * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE. 1305 * 1306 * In addition, remove any buffers associated with this task from 1307 * the ic_tx_list. We'll do this by walking the idt_inbufv list, but 1308 * items don't actually get removed from that list (and completion 1309 * routines called) until idm_task_cleanup. 1310 */ 1311 mutex_enter(&idt->idt_mutex); 1312 1313 for (idb = list_head(&idt->idt_outbufv); idb != NULL; 1314 idb = list_next(&idt->idt_outbufv, idb)) { 1315 if (idb->idb_in_transport) { 1316 /* 1317 * idm_buf_rx_from_ini_done releases idt->idt_mutex 1318 */ 1319 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic, 1320 uintptr_t, idb->idb_buf, 1321 uint32_t, idb->idb_bufoffset, 1322 uint64_t, 0, uint32_t, 0, uint32_t, 0, 1323 uint32_t, idb->idb_xfer_len, 1324 int, XFER_BUF_RX_FROM_INI); 1325 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED); 1326 mutex_enter(&idt->idt_mutex); 1327 } 1328 } 1329 1330 for (idb = list_head(&idt->idt_inbufv); idb != NULL; 1331 idb = list_next(&idt->idt_inbufv, idb)) { 1332 /* 1333 * We want to remove these items from the tx_list as well, 1334 * but knowing it's in the idt_inbufv list is not a guarantee 1335 * that it's in the tx_list. If it's on the tx list then 1336 * let idm_sotx_thread() clean it up. 1337 */ 1338 if (idb->idb_in_transport && !idb->idb_tx_thread) { 1339 /* 1340 * idm_buf_tx_to_ini_done releases idt->idt_mutex 1341 */ 1342 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic, 1343 uintptr_t, idb->idb_buf, 1344 uint32_t, idb->idb_bufoffset, 1345 uint64_t, 0, uint32_t, 0, uint32_t, 0, 1346 uint32_t, idb->idb_xfer_len, 1347 int, XFER_BUF_TX_TO_INI); 1348 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED); 1349 mutex_enter(&idt->idt_mutex); 1350 } 1351 } 1352 1353 mutex_exit(&idt->idt_mutex); 1354 1355 return (IDM_STATUS_SUCCESS); 1356 } 1357 1358 /* 1359 * idm_so_negotiate_key_values() validates the key values for this connection 1360 */ 1361 /* ARGSUSED */ 1362 static kv_status_t 1363 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl, 1364 nvlist_t *response_nvl, nvlist_t *negotiated_nvl) 1365 { 1366 /* All parameters are negotiated at the iscsit level */ 1367 return (KV_HANDLED); 1368 } 1369 1370 /* 1371 * idm_so_notice_key_values() activates the negotiated key values for 1372 * this connection. 1373 */ 1374 static void 1375 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl) 1376 { 1377 char *nvp_name; 1378 nvpair_t *nvp; 1379 nvpair_t *next_nvp; 1380 int nvrc; 1381 idm_status_t idm_status; 1382 const idm_kv_xlate_t *ikvx; 1383 1384 for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL); 1385 nvp != NULL; nvp = next_nvp) { 1386 next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp); 1387 nvp_name = nvpair_name(nvp); 1388 1389 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name)); 1390 switch (ikvx->ik_key_id) { 1391 case KI_HEADER_DIGEST: 1392 case KI_DATA_DIGEST: 1393 idm_status = idm_so_handle_digest(it, nvp, ikvx); 1394 ASSERT(idm_status == 0); 1395 1396 /* Remove processed item from negotiated_nvl list */ 1397 nvrc = nvlist_remove_all( 1398 negotiated_nvl, ikvx->ik_key_name); 1399 ASSERT(nvrc == 0); 1400 break; 1401 default: 1402 break; 1403 } 1404 } 1405 } 1406 1407 1408 static idm_status_t 1409 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice, 1410 const idm_kv_xlate_t *ikvx) 1411 { 1412 int nvrc; 1413 char *digest_choice_string; 1414 1415 nvrc = nvpair_value_string(digest_choice, 1416 &digest_choice_string); 1417 ASSERT(nvrc == 0); 1418 if (strcasecmp(digest_choice_string, "crc32c") == 0) { 1419 switch (ikvx->ik_key_id) { 1420 case KI_HEADER_DIGEST: 1421 it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST; 1422 break; 1423 case KI_DATA_DIGEST: 1424 it->ic_conn_flags |= IDM_CONN_DATA_DIGEST; 1425 break; 1426 default: 1427 ASSERT(0); 1428 break; 1429 } 1430 } else if (strcasecmp(digest_choice_string, "none") == 0) { 1431 switch (ikvx->ik_key_id) { 1432 case KI_HEADER_DIGEST: 1433 it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST; 1434 break; 1435 case KI_DATA_DIGEST: 1436 it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST; 1437 break; 1438 default: 1439 ASSERT(0); 1440 break; 1441 } 1442 } else { 1443 ASSERT(0); 1444 } 1445 1446 return (IDM_STATUS_SUCCESS); 1447 } 1448 1449 1450 /* 1451 * idm_so_conn_is_capable() verifies that the passed connection is provided 1452 * for by the sockets interface. 1453 */ 1454 /* ARGSUSED */ 1455 static boolean_t 1456 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps) 1457 { 1458 return (B_TRUE); 1459 } 1460 1461 /* 1462 * idm_so_rx_datain() validates the Data Sequence number of the PDU. The 1463 * idm_sorecv_scsidata() function invoked earlier actually reads the data 1464 * off the socket into the appropriate buffers. 1465 */ 1466 static void 1467 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu) 1468 { 1469 iscsi_data_hdr_t *bhs; 1470 idm_task_t *idt; 1471 idm_buf_t *idb; 1472 uint32_t datasn; 1473 size_t offset; 1474 iscsi_hdr_t *ihp = (iscsi_hdr_t *)pdu->isp_hdr; 1475 iscsi_data_rsp_hdr_t *idrhp = (iscsi_data_rsp_hdr_t *)ihp; 1476 1477 ASSERT(ic != NULL); 1478 ASSERT(pdu != NULL); 1479 1480 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr; 1481 datasn = ntohl(bhs->datasn); 1482 offset = ntohl(bhs->offset); 1483 1484 ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA_RSP); 1485 1486 /* 1487 * Look up the task corresponding to the initiator task tag 1488 * to get the buffers affiliated with the task. 1489 */ 1490 idt = idm_task_find(ic, bhs->itt, bhs->ttt); 1491 if (idt == NULL) { 1492 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task"); 1493 idm_pdu_rx_protocol_error(ic, pdu); 1494 return; 1495 } 1496 1497 idb = pdu->isp_sorx_buf; 1498 if (idb == NULL) { 1499 IDM_CONN_LOG(CE_WARN, 1500 "idm_so_rx_datain: failed to find buffer"); 1501 idm_task_rele(idt); 1502 idm_pdu_rx_protocol_error(ic, pdu); 1503 return; 1504 } 1505 1506 /* 1507 * DataSN values should be sequential and should not have any gaps or 1508 * repetitions. Check the DataSN with the one stored in the task. 1509 */ 1510 if (datasn == idt->idt_exp_datasn) { 1511 idt->idt_exp_datasn++; /* keep track of DataSN received */ 1512 } else { 1513 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order"); 1514 idm_task_rele(idt); 1515 idm_pdu_rx_protocol_error(ic, pdu); 1516 return; 1517 } 1518 1519 /* 1520 * PDUs in a sequence should be in continuously increasing 1521 * address offset 1522 */ 1523 if (offset != idb->idb_exp_offset) { 1524 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset"); 1525 idm_task_rele(idt); 1526 idm_pdu_rx_protocol_error(ic, pdu); 1527 return; 1528 } 1529 /* Expected next relative buffer offset */ 1530 idb->idb_exp_offset += n2h24(bhs->dlength); 1531 idt->idt_rx_bytes += n2h24(bhs->dlength); 1532 1533 idm_task_rele(idt); 1534 1535 /* 1536 * For now call scsi_rsp which will process the data rsp 1537 * Revisit, need to provide an explicit client entry point for 1538 * phase collapse completions. 1539 */ 1540 if (((ihp->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_SCSI_DATA_RSP) && 1541 (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) { 1542 (*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu); 1543 } 1544 1545 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS); 1546 } 1547 1548 /* 1549 * The idm_so_rx_dataout() function is used by the iSCSI target to read 1550 * data from the Data-Out PDU sent by the iSCSI initiator. 1551 * 1552 * This function gets the Initiator Task Tag from the PDU BHS and looks up the 1553 * task to get the buffers associated with the PDU. A PDU might span buffers. 1554 * The data is then read into the respective buffer. 1555 */ 1556 static void 1557 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu) 1558 { 1559 1560 iscsi_data_hdr_t *bhs; 1561 idm_task_t *idt; 1562 idm_buf_t *idb; 1563 size_t offset; 1564 1565 ASSERT(ic != NULL); 1566 ASSERT(pdu != NULL); 1567 1568 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr; 1569 offset = ntohl(bhs->offset); 1570 ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA); 1571 1572 /* 1573 * Look up the task corresponding to the initiator task tag 1574 * to get the buffers affiliated with the task. 1575 */ 1576 idt = idm_task_find(ic, bhs->itt, bhs->ttt); 1577 if (idt == NULL) { 1578 IDM_CONN_LOG(CE_WARN, 1579 "idm_so_rx_dataout: failed to find task"); 1580 idm_pdu_rx_protocol_error(ic, pdu); 1581 return; 1582 } 1583 1584 idb = pdu->isp_sorx_buf; 1585 if (idb == NULL) { 1586 IDM_CONN_LOG(CE_WARN, 1587 "idm_so_rx_dataout: failed to find buffer"); 1588 idm_task_rele(idt); 1589 idm_pdu_rx_protocol_error(ic, pdu); 1590 return; 1591 } 1592 1593 /* Keep track of data transferred - check data offsets */ 1594 if (offset != idb->idb_exp_offset) { 1595 IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: " 1596 "%ld, %d", offset, idb->idb_exp_offset); 1597 idm_task_rele(idt); 1598 idm_pdu_rx_protocol_error(ic, pdu); 1599 return; 1600 } 1601 /* Expected next relative offset */ 1602 idb->idb_exp_offset += ntoh24(bhs->dlength); 1603 idt->idt_rx_bytes += n2h24(bhs->dlength); 1604 1605 /* 1606 * Call the buffer callback when the transfer is complete 1607 * 1608 * The connection state machine should only abort tasks after 1609 * shutting down the connection so we are assured that there 1610 * won't be a simultaneous attempt to abort this task at the 1611 * same time as we are processing this PDU (due to a connection 1612 * state change). 1613 */ 1614 if (bhs->flags & ISCSI_FLAG_FINAL) { 1615 /* 1616 * We only want to call idm_buf_rx_from_ini_done once 1617 * per transfer. It's possible that this task has 1618 * already been aborted in which case 1619 * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done 1620 * for each buffer with idb_in_transport==B_TRUE. To 1621 * close this window and ensure that this doesn't happen, 1622 * we'll clear idb->idb_in_transport now while holding 1623 * the task mutex. This is only really an issue for 1624 * SCSI task abort -- if tasks were being aborted because 1625 * of a connection state change the state machine would 1626 * have already stopped the receive thread. 1627 */ 1628 mutex_enter(&idt->idt_mutex); 1629 1630 /* 1631 * Release the task hold here (obtained in idm_task_find) 1632 * because the task may complete synchronously during 1633 * idm_buf_rx_from_ini_done. Since we still have an active 1634 * buffer we know there is at least one additional hold on idt. 1635 */ 1636 idm_task_rele(idt); 1637 1638 /* 1639 * idm_buf_rx_from_ini_done releases idt->idt_mutex 1640 */ 1641 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic, 1642 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset, 1643 uint64_t, 0, uint32_t, 0, uint32_t, 0, 1644 uint32_t, idb->idb_xfer_len, 1645 int, XFER_BUF_RX_FROM_INI); 1646 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS); 1647 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS); 1648 return; 1649 } 1650 1651 idm_task_rele(idt); 1652 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS); 1653 } 1654 1655 /* 1656 * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle 1657 * the R2T PDU sent by the iSCSI target indicating that it is ready to 1658 * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS 1659 * and looks up the task in the task tree using the itt to get the output 1660 * buffers associated the task. The R2T PDU contains the offset of the 1661 * requested data and the data length. This function then constructs a 1662 * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out 1663 * PDU is associated with the R2T by the Target Transfer Tag (ttt). 1664 */ 1665 1666 static void 1667 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu) 1668 { 1669 idm_task_t *idt; 1670 idm_buf_t *idb; 1671 iscsi_rtt_hdr_t *rtt_hdr; 1672 uint32_t data_offset; 1673 uint32_t data_length; 1674 1675 ASSERT(ic != NULL); 1676 ASSERT(pdu != NULL); 1677 1678 rtt_hdr = (iscsi_rtt_hdr_t *)pdu->isp_hdr; 1679 data_offset = ntohl(rtt_hdr->data_offset); 1680 data_length = ntohl(rtt_hdr->data_length); 1681 idt = idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt); 1682 1683 if (idt == NULL) { 1684 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task"); 1685 idm_pdu_rx_protocol_error(ic, pdu); 1686 return; 1687 } 1688 1689 /* Find the buffer bound to the task by the iSCSI initiator */ 1690 mutex_enter(&idt->idt_mutex); 1691 idb = idm_buf_find(&idt->idt_outbufv, data_offset); 1692 if (idb == NULL) { 1693 mutex_exit(&idt->idt_mutex); 1694 idm_task_rele(idt); 1695 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer"); 1696 idm_pdu_rx_protocol_error(ic, pdu); 1697 return; 1698 } 1699 1700 /* return buffer contains this data */ 1701 if (data_offset + data_length > idb->idb_buflen) { 1702 /* Overflow */ 1703 mutex_exit(&idt->idt_mutex); 1704 idm_task_rele(idt); 1705 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside " 1706 "buffer"); 1707 idm_pdu_rx_protocol_error(ic, pdu); 1708 return; 1709 } 1710 1711 idt->idt_r2t_ttt = rtt_hdr->ttt; 1712 idt->idt_exp_datasn = 0; 1713 1714 idm_so_send_rtt_data(ic, idt, idb, data_offset, 1715 ntohl(rtt_hdr->data_length)); 1716 mutex_exit(&idt->idt_mutex); 1717 1718 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS); 1719 idm_task_rele(idt); 1720 1721 } 1722 1723 idm_status_t 1724 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu) 1725 { 1726 uint8_t pad[ISCSI_PAD_WORD_LEN]; 1727 int pad_len; 1728 uint32_t data_digest_crc; 1729 uint32_t crc_calculated; 1730 int total_len; 1731 idm_so_conn_t *so_conn; 1732 1733 so_conn = ic->ic_transport_private; 1734 1735 pad_len = ((ISCSI_PAD_WORD_LEN - 1736 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) & 1737 (ISCSI_PAD_WORD_LEN - 1)); 1738 1739 ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */ 1740 1741 total_len = pdu->isp_datalen; 1742 1743 if (pad_len) { 1744 pdu->isp_iov[pdu->isp_iovlen].iov_base = (char *)&pad; 1745 pdu->isp_iov[pdu->isp_iovlen].iov_len = pad_len; 1746 total_len += pad_len; 1747 pdu->isp_iovlen++; 1748 } 1749 1750 /* setup data digest */ 1751 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) { 1752 pdu->isp_iov[pdu->isp_iovlen].iov_base = 1753 (char *)&data_digest_crc; 1754 pdu->isp_iov[pdu->isp_iovlen].iov_len = 1755 sizeof (data_digest_crc); 1756 total_len += sizeof (data_digest_crc); 1757 pdu->isp_iovlen++; 1758 } 1759 1760 pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base; 1761 1762 if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0], 1763 pdu->isp_iovlen, total_len) != 0) { 1764 return (IDM_STATUS_IO); 1765 } 1766 1767 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) { 1768 crc_calculated = idm_crc32c(pdu->isp_data, 1769 pdu->isp_datalen); 1770 if (pad_len) { 1771 crc_calculated = idm_crc32c_continued((char *)&pad, 1772 pad_len, crc_calculated); 1773 } 1774 if (crc_calculated != data_digest_crc) { 1775 IDM_CONN_LOG(CE_WARN, 1776 "idm_sorecvdata: " 1777 "CRC error: actual 0x%x, calc 0x%x", 1778 data_digest_crc, crc_calculated); 1779 1780 /* Invalid Data Digest */ 1781 return (IDM_STATUS_DATA_DIGEST); 1782 } 1783 } 1784 1785 return (IDM_STATUS_SUCCESS); 1786 } 1787 1788 /* 1789 * idm_sorecv_scsidata() is used to receive scsi data from the socket. The 1790 * Data-type PDU header must be read into the idm_pdu_t structure prior to 1791 * calling this function. 1792 */ 1793 idm_status_t 1794 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu) 1795 { 1796 iscsi_data_hdr_t *bhs; 1797 idm_task_t *task; 1798 uint32_t offset; 1799 uint8_t opcode; 1800 uint32_t dlength; 1801 list_t *buflst; 1802 uint32_t xfer_bytes; 1803 idm_status_t status; 1804 1805 ASSERT(ic != NULL); 1806 ASSERT(pdu != NULL); 1807 1808 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr; 1809 1810 offset = ntohl(bhs->offset); 1811 opcode = bhs->opcode; 1812 dlength = n2h24(bhs->dlength); 1813 1814 ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) || 1815 (opcode == ISCSI_OP_SCSI_DATA)); 1816 1817 /* 1818 * Successful lookup implicitly gets a "hold" on the task. This 1819 * hold must be released before leaving this function. At one 1820 * point we were caching this task context and retaining the hold 1821 * but it turned out to be very difficult to release the hold properly. 1822 * The task can be aborted and the connection shutdown between this 1823 * call and the subsequent expected call to idm_so_rx_datain/ 1824 * idm_so_rx_dataout (in which case those functions are not called). 1825 * Releasing the hold in the PDU callback doesn't work well either 1826 * because the whole task may be completed by then at which point 1827 * it is too late to release the hold -- for better or worse this 1828 * code doesn't wait on the refcnts during normal operation. 1829 * idm_task_find() is very fast and it is not a huge burden if we 1830 * have to do it twice. 1831 */ 1832 task = idm_task_find(ic, bhs->itt, bhs->ttt); 1833 if (task == NULL) { 1834 IDM_CONN_LOG(CE_WARN, 1835 "idm_sorecv_scsidata: could not find task"); 1836 return (IDM_STATUS_FAIL); 1837 } 1838 1839 mutex_enter(&task->idt_mutex); 1840 buflst = (opcode == ISCSI_OP_SCSI_DATA_RSP) ? 1841 &task->idt_inbufv : &task->idt_outbufv; 1842 pdu->isp_sorx_buf = idm_buf_find(buflst, offset); 1843 mutex_exit(&task->idt_mutex); 1844 1845 if (pdu->isp_sorx_buf == NULL) { 1846 idm_task_rele(task); 1847 IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find " 1848 "buffer for offset %x opcode=%x", 1849 offset, opcode); 1850 return (IDM_STATUS_FAIL); 1851 } 1852 1853 xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength); 1854 ASSERT(xfer_bytes != 0); 1855 if (xfer_bytes != dlength) { 1856 idm_task_rele(task); 1857 /* 1858 * Buffer overflow, connection error. The PDU data is still 1859 * sitting in the socket so we can't use the connection 1860 * again until that data is drained. 1861 */ 1862 return (IDM_STATUS_FAIL); 1863 } 1864 1865 status = idm_sorecvdata(ic, pdu); 1866 1867 idm_task_rele(task); 1868 1869 return (status); 1870 } 1871 1872 static uint32_t 1873 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength) 1874 { 1875 uint32_t buf_ro = ro - idb->idb_bufoffset; 1876 uint32_t xfer_len = min(dlength, idb->idb_buflen - buf_ro); 1877 1878 ASSERT(ro >= idb->idb_bufoffset); 1879 1880 pdu->isp_iov[pdu->isp_iovlen].iov_base = 1881 (caddr_t)idb->idb_buf + buf_ro; 1882 pdu->isp_iov[pdu->isp_iovlen].iov_len = xfer_len; 1883 pdu->isp_iovlen++; 1884 1885 return (xfer_len); 1886 } 1887 1888 int 1889 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu) 1890 { 1891 pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP); 1892 ASSERT(pdu->isp_data != NULL); 1893 1894 pdu->isp_databuflen = pdu->isp_datalen; 1895 pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data; 1896 pdu->isp_iov[0].iov_len = pdu->isp_datalen; 1897 pdu->isp_iovlen = 1; 1898 /* 1899 * Since we are associating a new data buffer with this received 1900 * PDU we need to set a specific callback to free the data 1901 * after the PDU is processed. 1902 */ 1903 pdu->isp_flags |= IDM_PDU_ADDL_DATA; 1904 pdu->isp_callback = idm_sorx_addl_pdu_cb; 1905 1906 return (idm_sorecvdata(ic, pdu)); 1907 } 1908 1909 void 1910 idm_sorx_thread(void *arg) 1911 { 1912 boolean_t conn_failure = B_FALSE; 1913 idm_conn_t *ic = (idm_conn_t *)arg; 1914 idm_so_conn_t *so_conn; 1915 idm_pdu_t *pdu; 1916 idm_status_t rc; 1917 1918 idm_conn_hold(ic); 1919 1920 mutex_enter(&ic->ic_mutex); 1921 1922 so_conn = ic->ic_transport_private; 1923 so_conn->ic_rx_thread_running = B_TRUE; 1924 so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did; 1925 cv_signal(&ic->ic_cv); 1926 1927 while (so_conn->ic_rx_thread_running) { 1928 mutex_exit(&ic->ic_mutex); 1929 1930 /* 1931 * Get PDU with default header size (large enough for 1932 * BHS plus any anticipated AHS). PDU from 1933 * the cache will have all values set correctly 1934 * for sockets RX including callback. 1935 */ 1936 pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP); 1937 pdu->isp_ic = ic; 1938 pdu->isp_flags = 0; 1939 pdu->isp_transport_hdrlen = 0; 1940 1941 if ((rc = idm_sorecvhdr(ic, pdu)) != 0) { 1942 /* 1943 * Call idm_pdu_complete so that we call the callback 1944 * and ensure any memory allocated in idm_sorecvhdr 1945 * gets freed up. 1946 */ 1947 idm_pdu_complete(pdu, IDM_STATUS_FAIL); 1948 1949 /* 1950 * If ic_rx_thread_running is still set then 1951 * this is some kind of connection problem 1952 * on the socket. In this case we want to 1953 * generate an event. Otherwise some other 1954 * thread closed the socket due to another 1955 * issue in which case we don't need to 1956 * generate an event. 1957 */ 1958 mutex_enter(&ic->ic_mutex); 1959 if (so_conn->ic_rx_thread_running) { 1960 conn_failure = B_TRUE; 1961 so_conn->ic_rx_thread_running = B_FALSE; 1962 } 1963 1964 continue; 1965 } 1966 1967 /* 1968 * Header has been read and validated. Now we need 1969 * to read the PDU data payload (if present). SCSI data 1970 * need to be transferred from the socket directly into 1971 * the associated transfer buffer for the SCSI task. 1972 */ 1973 if (pdu->isp_datalen != 0) { 1974 if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) || 1975 (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) { 1976 rc = idm_sorecv_scsidata(ic, pdu); 1977 /* 1978 * All SCSI errors are fatal to the 1979 * connection right now since we have no 1980 * place to put the data. What we need 1981 * is some kind of sink to dispose of unwanted 1982 * SCSI data. For example an invalid task tag 1983 * should not kill the connection (although 1984 * we may want to drop the connection). 1985 */ 1986 } else { 1987 /* 1988 * Not data PDUs so allocate a buffer for the 1989 * data segment and read the remaining data. 1990 */ 1991 rc = idm_sorecv_nonscsidata(ic, pdu); 1992 } 1993 if (rc != 0) { 1994 /* 1995 * Call idm_pdu_complete so that we call the 1996 * callback and ensure any memory allocated 1997 * in idm_sorecvhdr gets freed up. 1998 */ 1999 idm_pdu_complete(pdu, IDM_STATUS_FAIL); 2000 2001 /* 2002 * If ic_rx_thread_running is still set then 2003 * this is some kind of connection problem 2004 * on the socket. In this case we want to 2005 * generate an event. Otherwise some other 2006 * thread closed the socket due to another 2007 * issue in which case we don't need to 2008 * generate an event. 2009 */ 2010 mutex_enter(&ic->ic_mutex); 2011 if (so_conn->ic_rx_thread_running) { 2012 conn_failure = B_TRUE; 2013 so_conn->ic_rx_thread_running = B_FALSE; 2014 } 2015 continue; 2016 } 2017 } 2018 2019 /* 2020 * Process RX PDU 2021 */ 2022 idm_pdu_rx(ic, pdu); 2023 2024 mutex_enter(&ic->ic_mutex); 2025 } 2026 2027 mutex_exit(&ic->ic_mutex); 2028 2029 /* 2030 * If we dropped out of the RX processing loop because of 2031 * a socket problem or other connection failure (including 2032 * digest errors) then we need to generate a state machine 2033 * event to shut the connection down. 2034 * If the state machine is already in, for example, INIT_ERROR, this 2035 * event will get dropped, and the TX thread will never be notified 2036 * to shut down. To be safe, we'll just notify it here. 2037 */ 2038 if (conn_failure) { 2039 if (so_conn->ic_tx_thread_running) { 2040 so_conn->ic_tx_thread_running = B_FALSE; 2041 mutex_enter(&so_conn->ic_tx_mutex); 2042 cv_signal(&so_conn->ic_tx_cv); 2043 mutex_exit(&so_conn->ic_tx_mutex); 2044 } 2045 2046 idm_conn_event(ic, CE_TRANSPORT_FAIL, rc); 2047 } 2048 2049 idm_conn_rele(ic); 2050 2051 thread_exit(); 2052 } 2053 2054 /* 2055 * idm_so_tx 2056 * 2057 * This is the implementation of idm_transport_ops_t's it_tx_pdu entry 2058 * point. By definition, it is supposed to be fast. So, simply queue 2059 * the entry and return. The real work is done by idm_i_so_tx() via 2060 * idm_sotx_thread(). 2061 */ 2062 2063 static void 2064 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu) 2065 { 2066 idm_so_conn_t *so_conn = ic->ic_transport_private; 2067 2068 ASSERT(pdu->isp_ic == ic); 2069 mutex_enter(&so_conn->ic_tx_mutex); 2070 2071 if (!so_conn->ic_tx_thread_running) { 2072 mutex_exit(&so_conn->ic_tx_mutex); 2073 idm_pdu_complete(pdu, IDM_STATUS_ABORTED); 2074 return; 2075 } 2076 2077 list_insert_tail(&so_conn->ic_tx_list, (void *)pdu); 2078 cv_signal(&so_conn->ic_tx_cv); 2079 mutex_exit(&so_conn->ic_tx_mutex); 2080 } 2081 2082 static idm_status_t 2083 idm_i_so_tx(idm_pdu_t *pdu) 2084 { 2085 idm_conn_t *ic = pdu->isp_ic; 2086 idm_status_t status = IDM_STATUS_SUCCESS; 2087 uint8_t pad[ISCSI_PAD_WORD_LEN]; 2088 int pad_len; 2089 uint32_t hdr_digest_crc; 2090 uint32_t data_digest_crc = 0; 2091 int total_len = 0; 2092 int iovlen = 0; 2093 struct iovec iov[6]; 2094 idm_so_conn_t *so_conn; 2095 2096 so_conn = ic->ic_transport_private; 2097 2098 /* Setup BHS */ 2099 iov[iovlen].iov_base = (caddr_t)pdu->isp_hdr; 2100 iov[iovlen].iov_len = pdu->isp_hdrlen; 2101 total_len += iov[iovlen].iov_len; 2102 iovlen++; 2103 2104 /* Setup header digest */ 2105 if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) && 2106 (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) { 2107 hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen); 2108 2109 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc; 2110 iov[iovlen].iov_len = sizeof (hdr_digest_crc); 2111 total_len += iov[iovlen].iov_len; 2112 iovlen++; 2113 } 2114 2115 /* Setup the data */ 2116 if (pdu->isp_datalen) { 2117 idm_task_t *idt; 2118 idm_buf_t *idb; 2119 iscsi_data_hdr_t *ihp; 2120 ihp = (iscsi_data_hdr_t *)pdu->isp_hdr; 2121 /* Write of immediate data */ 2122 if (ic->ic_ffp && 2123 (ihp->opcode == ISCSI_OP_SCSI_CMD || 2124 ihp->opcode == ISCSI_OP_SCSI_DATA)) { 2125 idt = idm_task_find(ic, ihp->itt, ihp->ttt); 2126 if (idt) { 2127 mutex_enter(&idt->idt_mutex); 2128 idb = idm_buf_find(&idt->idt_outbufv, 0); 2129 mutex_exit(&idt->idt_mutex); 2130 /* 2131 * If the initiator call to idm_buf_alloc 2132 * failed then we can get to this point 2133 * without a bound buffer. The associated 2134 * connection failure will clean things up 2135 * later. It would be nice to come up with 2136 * a cleaner way to handle this. In 2137 * particular it seems absurd to look up 2138 * the task and the buffer just to update 2139 * this counter. 2140 */ 2141 if (idb) 2142 idb->idb_xfer_len += pdu->isp_datalen; 2143 idm_task_rele(idt); 2144 } 2145 } 2146 2147 iov[iovlen].iov_base = (caddr_t)pdu->isp_data; 2148 iov[iovlen].iov_len = pdu->isp_datalen; 2149 total_len += iov[iovlen].iov_len; 2150 iovlen++; 2151 } 2152 2153 /* Setup the data pad if necessary */ 2154 pad_len = ((ISCSI_PAD_WORD_LEN - 2155 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) & 2156 (ISCSI_PAD_WORD_LEN - 1)); 2157 2158 if (pad_len) { 2159 bzero(pad, sizeof (pad)); 2160 iov[iovlen].iov_base = (void *)&pad; 2161 iov[iovlen].iov_len = pad_len; 2162 total_len += iov[iovlen].iov_len; 2163 iovlen++; 2164 } 2165 2166 /* 2167 * Setup the data digest if enabled. Data-digest is not sent 2168 * for login-phase PDUs. 2169 */ 2170 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) && 2171 ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) && 2172 (pdu->isp_datalen || pad_len)) { 2173 /* 2174 * RFC3720/10.2.3: A zero-length Data Segment also 2175 * implies a zero-length data digest. 2176 */ 2177 if (pdu->isp_datalen) { 2178 data_digest_crc = idm_crc32c(pdu->isp_data, 2179 pdu->isp_datalen); 2180 } 2181 if (pad_len) { 2182 data_digest_crc = idm_crc32c_continued(&pad, 2183 pad_len, data_digest_crc); 2184 } 2185 2186 iov[iovlen].iov_base = (caddr_t)&data_digest_crc; 2187 iov[iovlen].iov_len = sizeof (data_digest_crc); 2188 total_len += iov[iovlen].iov_len; 2189 iovlen++; 2190 } 2191 2192 /* Transmit the PDU */ 2193 if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen, 2194 total_len) != 0) { 2195 /* Set error status */ 2196 IDM_CONN_LOG(CE_WARN, 2197 "idm_so_tx: failed to transmit the PDU, so: %p ic: %p " 2198 "data: %p", (void *) so_conn->ic_so, (void *) ic, 2199 (void *) pdu->isp_data); 2200 status = IDM_STATUS_IO; 2201 } 2202 2203 /* 2204 * Success does not mean that the PDU actually reached the 2205 * remote node since it could get dropped along the way. 2206 */ 2207 idm_pdu_complete(pdu, status); 2208 2209 return (status); 2210 } 2211 2212 /* 2213 * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the 2214 * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength, 2215 * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN. 2216 * A target can invoke this function multiple times for a single read command 2217 * (identified by the same ITT) to split the input into several sequences. 2218 * 2219 * DataSN starts with 0 for the first data PDU of an input command and advances 2220 * by 1 for each subsequent data PDU. Each sequence will have its own F bit, 2221 * which is set to 1 for the last data PDU of a sequence. 2222 * 2223 * Scope for Prototype build: 2224 * The data PDUs within a sequence will be sent in order with the buffer offset 2225 * in increasing order. i.e. initiator and target must have negotiated the 2226 * "DataPDUInOrder" to "Yes". The order between sequences is not enforced. 2227 * 2228 * Caller holds idt->idt_mutex 2229 */ 2230 static idm_status_t 2231 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb) 2232 { 2233 idm_so_conn_t *so_conn = idb->idb_ic->ic_transport_private; 2234 idm_pdu_t tmppdu; 2235 2236 ASSERT(mutex_owned(&idt->idt_mutex)); 2237 2238 /* 2239 * Put the idm_buf_t on the tx queue. It will be transmitted by 2240 * idm_sotx_thread. 2241 */ 2242 mutex_enter(&so_conn->ic_tx_mutex); 2243 2244 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic, 2245 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset, 2246 uint64_t, 0, uint32_t, 0, uint32_t, 0, 2247 uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI); 2248 2249 if (!so_conn->ic_tx_thread_running) { 2250 mutex_exit(&so_conn->ic_tx_mutex); 2251 /* 2252 * Don't release idt->idt_mutex since we're supposed to hold 2253 * in when calling idm_buf_tx_to_ini_done 2254 */ 2255 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic, 2256 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset, 2257 uint64_t, 0, uint32_t, 0, uint32_t, 0, 2258 uint32_t, idb->idb_xfer_len, 2259 int, XFER_BUF_TX_TO_INI); 2260 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED); 2261 return (IDM_STATUS_FAIL); 2262 } 2263 2264 /* 2265 * Build a template for the data PDU headers we will use so that 2266 * the SN values will stay consistent with other PDU's we are 2267 * transmitting like R2T and SCSI status. 2268 */ 2269 bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t)); 2270 tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl; 2271 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu, 2272 ISCSI_OP_SCSI_DATA_RSP); 2273 idb->idb_tx_thread = B_TRUE; 2274 list_insert_tail(&so_conn->ic_tx_list, (void *)idb); 2275 cv_signal(&so_conn->ic_tx_cv); 2276 mutex_exit(&so_conn->ic_tx_mutex); 2277 mutex_exit(&idt->idt_mutex); 2278 2279 /* 2280 * Returning success here indicates the transfer was successfully 2281 * dispatched -- it does not mean that the transfer completed 2282 * successfully. 2283 */ 2284 return (IDM_STATUS_SUCCESS); 2285 } 2286 2287 /* 2288 * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the 2289 * data blocks it is ready to receive from the initiator in response to a WRITE 2290 * SCSI command. The target iSCSI layer passes the information about the desired 2291 * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer 2292 * offset and datalen are passed via the 'idb' argument. 2293 * 2294 * Scope for Prototype build: 2295 * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have 2296 * negotiated the "InitialR2T" to "Yes". 2297 * 2298 * Caller holds idt->idt_mutex 2299 */ 2300 static idm_status_t 2301 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb) 2302 { 2303 idm_pdu_t *pdu; 2304 iscsi_rtt_hdr_t *rtt; 2305 2306 ASSERT(mutex_owned(&idt->idt_mutex)); 2307 2308 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic, 2309 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset, 2310 uint64_t, 0, uint32_t, 0, uint32_t, 0, 2311 uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI); 2312 2313 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP); 2314 pdu->isp_ic = idt->idt_ic; 2315 bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t)); 2316 2317 /* iSCSI layer fills the TTT, ITT, StatSN, ExpCmdSN, MaxCmdSN */ 2318 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP); 2319 2320 /* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */ 2321 rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr); 2322 2323 rtt->opcode = ISCSI_OP_RTT_RSP; 2324 rtt->flags = ISCSI_FLAG_FINAL; 2325 rtt->data_offset = htonl(idb->idb_bufoffset); 2326 rtt->data_length = htonl(idb->idb_xfer_len); 2327 rtt->rttsn = htonl(idt->idt_exp_rttsn++); 2328 2329 /* Keep track of buffer offsets */ 2330 idb->idb_exp_offset = idb->idb_bufoffset; 2331 mutex_exit(&idt->idt_mutex); 2332 2333 /* 2334 * Transmit the PDU. 2335 */ 2336 idm_pdu_tx(pdu); 2337 2338 return (IDM_STATUS_SUCCESS); 2339 } 2340 2341 static idm_status_t 2342 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen) 2343 { 2344 if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) { 2345 idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache, 2346 KM_NOSLEEP); 2347 idb->idb_buf_private = idm.idm_so_128k_buf_cache; 2348 } else { 2349 idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP); 2350 idb->idb_buf_private = NULL; 2351 } 2352 2353 if (idb->idb_buf == NULL) { 2354 IDM_CONN_LOG(CE_NOTE, 2355 "idm_so_buf_alloc: failed buffer allocation"); 2356 return (IDM_STATUS_FAIL); 2357 } 2358 2359 return (IDM_STATUS_SUCCESS); 2360 } 2361 2362 /* ARGSUSED */ 2363 static idm_status_t 2364 idm_so_buf_setup(idm_buf_t *idb) 2365 { 2366 /* Ensure bufalloc'd flag is unset */ 2367 idb->idb_bufalloc = B_FALSE; 2368 2369 return (IDM_STATUS_SUCCESS); 2370 } 2371 2372 /* ARGSUSED */ 2373 static void 2374 idm_so_buf_teardown(idm_buf_t *idb) 2375 { 2376 /* nothing to do here */ 2377 } 2378 2379 static void 2380 idm_so_buf_free(idm_buf_t *idb) 2381 { 2382 if (idb->idb_buf_private == NULL) { 2383 kmem_free(idb->idb_buf, idb->idb_buflen); 2384 } else { 2385 kmem_cache_free(idb->idb_buf_private, idb->idb_buf); 2386 } 2387 } 2388 2389 static void 2390 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb, 2391 uint32_t offset, uint32_t length) 2392 { 2393 idm_so_conn_t *so_conn = ic->ic_transport_private; 2394 idm_pdu_t tmppdu; 2395 idm_buf_t *rtt_buf; 2396 2397 ASSERT(mutex_owned(&idt->idt_mutex)); 2398 2399 /* 2400 * Allocate a buffer to represent the RTT transfer. We could further 2401 * optimize this by allocating the buffers internally from an rtt 2402 * specific buffer cache since this is socket-specific code but for 2403 * now we will keep it simple. 2404 */ 2405 rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length); 2406 if (rtt_buf == NULL) { 2407 /* 2408 * If we're in FFP then the failure was likely a resource 2409 * allocation issue and we should close the connection by 2410 * sending a CE_TRANSPORT_FAIL event. 2411 * 2412 * If we're not in FFP then idm_buf_alloc will always 2413 * fail and the state is transitioning to "complete" anyway 2414 * so we won't bother to send an event. 2415 */ 2416 mutex_enter(&ic->ic_state_mutex); 2417 if (ic->ic_ffp) 2418 idm_conn_event_locked(ic, CE_TRANSPORT_FAIL, 2419 NULL, CT_NONE); 2420 mutex_exit(&ic->ic_state_mutex); 2421 return; 2422 } 2423 2424 rtt_buf->idb_buf_cb = NULL; 2425 rtt_buf->idb_cb_arg = NULL; 2426 rtt_buf->idb_bufoffset = offset; 2427 rtt_buf->idb_xfer_len = length; 2428 rtt_buf->idb_ic = idt->idt_ic; 2429 rtt_buf->idb_task_binding = idt; 2430 2431 /* 2432 * Put the idm_buf_t on the tx queue. It will be transmitted by 2433 * idm_sotx_thread. 2434 */ 2435 mutex_enter(&so_conn->ic_tx_mutex); 2436 2437 if (!so_conn->ic_tx_thread_running) { 2438 idm_buf_free(rtt_buf); 2439 mutex_exit(&so_conn->ic_tx_mutex); 2440 return; 2441 } 2442 2443 /* 2444 * This new buffer represents an additional reference on the task 2445 */ 2446 idm_task_hold(idt); 2447 2448 /* 2449 * Build a template for the data PDU headers we will use so that 2450 * the SN values will stay consistent with other PDU's we are 2451 * transmitting like R2T and SCSI status. 2452 */ 2453 bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t)); 2454 tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl; 2455 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu, 2456 ISCSI_OP_SCSI_DATA); 2457 rtt_buf->idb_tx_thread = B_TRUE; 2458 rtt_buf->idb_in_transport = B_TRUE; 2459 list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf); 2460 cv_signal(&so_conn->ic_tx_cv); 2461 mutex_exit(&so_conn->ic_tx_mutex); 2462 } 2463 2464 static void 2465 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb) 2466 { 2467 /* 2468 * Don't worry about status -- we assume any error handling 2469 * is performed by the caller (idm_sotx_thread). 2470 */ 2471 idb->idb_in_transport = B_FALSE; 2472 idm_task_rele(idt); 2473 idm_buf_free(idb); 2474 } 2475 2476 static idm_status_t 2477 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb, 2478 uint32_t buf_region_offset, uint32_t buf_region_length) 2479 { 2480 idm_conn_t *ic; 2481 uint32_t max_dataseglen; 2482 size_t remainder, chunk; 2483 uint32_t data_offset = buf_region_offset; 2484 iscsi_data_hdr_t *bhs; 2485 idm_pdu_t *pdu; 2486 idm_status_t tx_status; 2487 2488 ASSERT(mutex_owned(&idt->idt_mutex)); 2489 2490 ic = idt->idt_ic; 2491 2492 max_dataseglen = 8192; /* Need value from login negotiation */ 2493 remainder = buf_region_length; 2494 2495 while (remainder) { 2496 if (idt->idt_state != TASK_ACTIVE) { 2497 ASSERT((idt->idt_state != TASK_IDLE) && 2498 (idt->idt_state != TASK_COMPLETE)); 2499 return (IDM_STATUS_ABORTED); 2500 } 2501 2502 /* check to see if we need to chunk the data */ 2503 if (remainder > max_dataseglen) { 2504 chunk = max_dataseglen; 2505 } else { 2506 chunk = remainder; 2507 } 2508 2509 /* Data PDU headers will always be sizeof (iscsi_hdr_t) */ 2510 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP); 2511 pdu->isp_ic = ic; 2512 2513 /* 2514 * We've already built a build a header template 2515 * to use during the transfer. Use this template so that 2516 * the SN values stay consistent with any unrelated PDU's 2517 * being transmitted. 2518 */ 2519 bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr, 2520 sizeof (iscsi_hdr_t)); 2521 2522 /* 2523 * Set DataSN, data offset, and flags in BHS 2524 * For the prototype build, A = 0, S = 0, U = 0 2525 */ 2526 bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr); 2527 2528 bhs->datasn = htonl(idt->idt_exp_datasn++); 2529 2530 hton24(bhs->dlength, chunk); 2531 bhs->offset = htonl(idb->idb_bufoffset + data_offset); 2532 2533 if (chunk == remainder) { 2534 bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */ 2535 } 2536 2537 /* Instrument the data-send DTrace probe. */ 2538 if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) { 2539 DTRACE_ISCSI_2(data__send, 2540 idm_conn_t *, idt->idt_ic, 2541 iscsi_data_rsp_hdr_t *, 2542 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr); 2543 } 2544 /* setup data */ 2545 pdu->isp_data = (uint8_t *)idb->idb_buf + data_offset; 2546 pdu->isp_datalen = (uint_t)chunk; 2547 remainder -= chunk; 2548 data_offset += chunk; 2549 2550 /* 2551 * Now that we're done working with idt_exp_datasn, 2552 * idt->idt_state and idb->idb_bufoffset we can release 2553 * the task lock -- don't want to hold it across the 2554 * call to idm_i_so_tx since we could block. 2555 */ 2556 mutex_exit(&idt->idt_mutex); 2557 2558 /* 2559 * Transmit the PDU. Call the internal routine directly 2560 * as there is already implicit ordering. 2561 */ 2562 if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) { 2563 mutex_enter(&idt->idt_mutex); 2564 return (tx_status); 2565 } 2566 2567 mutex_enter(&idt->idt_mutex); 2568 idt->idt_tx_bytes += chunk; 2569 } 2570 2571 return (IDM_STATUS_SUCCESS); 2572 } 2573 2574 /* 2575 * TX PDU cache 2576 */ 2577 /* ARGSUSED */ 2578 int 2579 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags) 2580 { 2581 idm_pdu_t *pdu = hdl; 2582 2583 bzero(pdu, sizeof (idm_pdu_t)); 2584 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */ 2585 pdu->isp_hdrlen = sizeof (iscsi_hdr_t); 2586 pdu->isp_callback = idm_sotx_cache_pdu_cb; 2587 pdu->isp_magic = IDM_PDU_MAGIC; 2588 bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t)); 2589 2590 return (0); 2591 } 2592 2593 /* ARGSUSED */ 2594 void 2595 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status) 2596 { 2597 /* reset values between use */ 2598 pdu->isp_datalen = 0; 2599 2600 kmem_cache_free(idm.idm_sotx_pdu_cache, pdu); 2601 } 2602 2603 /* 2604 * RX PDU cache 2605 */ 2606 /* ARGSUSED */ 2607 int 2608 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags) 2609 { 2610 idm_pdu_t *pdu = hdl; 2611 2612 bzero(pdu, sizeof (idm_pdu_t)); 2613 pdu->isp_magic = IDM_PDU_MAGIC; 2614 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */ 2615 pdu->isp_callback = idm_sorx_cache_pdu_cb; 2616 2617 return (0); 2618 } 2619 2620 /* ARGSUSED */ 2621 static void 2622 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status) 2623 { 2624 pdu->isp_iovlen = 0; 2625 pdu->isp_sorx_buf = 0; 2626 kmem_cache_free(idm.idm_sorx_pdu_cache, pdu); 2627 } 2628 2629 static void 2630 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status) 2631 { 2632 /* 2633 * We had to modify our cached RX PDU with a longer header buffer 2634 * and/or a longer data buffer. Release the new buffers and fix 2635 * the fields back to what we would expect for a cached RX PDU. 2636 */ 2637 if (pdu->isp_flags & IDM_PDU_ADDL_HDR) { 2638 kmem_free(pdu->isp_hdr, pdu->isp_hdrlen); 2639 } 2640 if (pdu->isp_flags & IDM_PDU_ADDL_DATA) { 2641 kmem_free(pdu->isp_data, pdu->isp_datalen); 2642 } 2643 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); 2644 pdu->isp_hdrlen = sizeof (iscsi_hdr_t); 2645 pdu->isp_data = NULL; 2646 pdu->isp_datalen = 0; 2647 pdu->isp_sorx_buf = 0; 2648 pdu->isp_callback = idm_sorx_cache_pdu_cb; 2649 idm_sorx_cache_pdu_cb(pdu, status); 2650 } 2651 2652 /* 2653 * This thread is only active when I/O is queued for transmit 2654 * because the socket is busy. 2655 */ 2656 void 2657 idm_sotx_thread(void *arg) 2658 { 2659 idm_conn_t *ic = arg; 2660 idm_tx_obj_t *object, *next; 2661 idm_so_conn_t *so_conn; 2662 idm_status_t status = IDM_STATUS_SUCCESS; 2663 2664 idm_conn_hold(ic); 2665 2666 mutex_enter(&ic->ic_mutex); 2667 so_conn = ic->ic_transport_private; 2668 so_conn->ic_tx_thread_running = B_TRUE; 2669 so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did; 2670 cv_signal(&ic->ic_cv); 2671 mutex_exit(&ic->ic_mutex); 2672 2673 mutex_enter(&so_conn->ic_tx_mutex); 2674 2675 while (so_conn->ic_tx_thread_running) { 2676 while (list_is_empty(&so_conn->ic_tx_list)) { 2677 DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic); 2678 cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex); 2679 DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic); 2680 2681 if (!so_conn->ic_tx_thread_running) { 2682 goto tx_bail; 2683 } 2684 } 2685 2686 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list); 2687 list_remove(&so_conn->ic_tx_list, object); 2688 mutex_exit(&so_conn->ic_tx_mutex); 2689 2690 switch (object->idm_tx_obj_magic) { 2691 case IDM_PDU_MAGIC: 2692 DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic, 2693 idm_pdu_t *, (idm_pdu_t *)object); 2694 2695 status = idm_i_so_tx((idm_pdu_t *)object); 2696 break; 2697 2698 case IDM_BUF_MAGIC: { 2699 idm_buf_t *idb = (idm_buf_t *)object; 2700 idm_task_t *idt = idb->idb_task_binding; 2701 2702 DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic, 2703 idm_buf_t *, idb); 2704 2705 mutex_enter(&idt->idt_mutex); 2706 status = idm_so_send_buf_region(idt, 2707 idb, 0, idb->idb_xfer_len); 2708 2709 /* 2710 * TX thread owns the buffer so we expect it to 2711 * be "in transport" 2712 */ 2713 ASSERT(idb->idb_in_transport); 2714 if (IDM_CONN_ISTGT(ic)) { 2715 /* 2716 * idm_buf_tx_to_ini_done releases 2717 * idt->idt_mutex 2718 */ 2719 DTRACE_ISCSI_8(xfer__done, 2720 idm_conn_t *, idt->idt_ic, 2721 uintptr_t, idb->idb_buf, 2722 uint32_t, idb->idb_bufoffset, 2723 uint64_t, 0, uint32_t, 0, uint32_t, 0, 2724 uint32_t, idb->idb_xfer_len, 2725 int, XFER_BUF_TX_TO_INI); 2726 idm_buf_tx_to_ini_done(idt, idb, status); 2727 } else { 2728 idm_so_send_rtt_data_done(idt, idb); 2729 mutex_exit(&idt->idt_mutex); 2730 } 2731 break; 2732 } 2733 2734 default: 2735 IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic " 2736 "(0x%08x)", object->idm_tx_obj_magic); 2737 status = IDM_STATUS_FAIL; 2738 } 2739 2740 mutex_enter(&so_conn->ic_tx_mutex); 2741 2742 if (status != IDM_STATUS_SUCCESS) { 2743 so_conn->ic_tx_thread_running = B_FALSE; 2744 idm_conn_event(ic, CE_TRANSPORT_FAIL, status); 2745 } 2746 } 2747 2748 /* 2749 * Before we leave, we need to abort every item remaining in the 2750 * TX list. 2751 */ 2752 2753 tx_bail: 2754 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list); 2755 2756 while (object != NULL) { 2757 next = list_next(&so_conn->ic_tx_list, object); 2758 2759 list_remove(&so_conn->ic_tx_list, object); 2760 switch (object->idm_tx_obj_magic) { 2761 case IDM_PDU_MAGIC: 2762 idm_pdu_complete((idm_pdu_t *)object, 2763 IDM_STATUS_ABORTED); 2764 break; 2765 2766 case IDM_BUF_MAGIC: { 2767 idm_buf_t *idb = (idm_buf_t *)object; 2768 idm_task_t *idt = idb->idb_task_binding; 2769 mutex_exit(&so_conn->ic_tx_mutex); 2770 mutex_enter(&idt->idt_mutex); 2771 /* 2772 * TX thread owns the buffer so we expect it to 2773 * be "in transport" 2774 */ 2775 ASSERT(idb->idb_in_transport); 2776 if (IDM_CONN_ISTGT(ic)) { 2777 /* 2778 * idm_buf_tx_to_ini_done releases 2779 * idt->idt_mutex 2780 */ 2781 DTRACE_ISCSI_8(xfer__done, 2782 idm_conn_t *, idt->idt_ic, 2783 uintptr_t, idb->idb_buf, 2784 uint32_t, idb->idb_bufoffset, 2785 uint64_t, 0, uint32_t, 0, uint32_t, 0, 2786 uint32_t, idb->idb_xfer_len, 2787 int, XFER_BUF_TX_TO_INI); 2788 idm_buf_tx_to_ini_done(idt, idb, 2789 IDM_STATUS_ABORTED); 2790 } else { 2791 idm_so_send_rtt_data_done(idt, idb); 2792 mutex_exit(&idt->idt_mutex); 2793 } 2794 mutex_enter(&so_conn->ic_tx_mutex); 2795 break; 2796 } 2797 default: 2798 IDM_CONN_LOG(CE_WARN, 2799 "idm_sotx_thread: Unexpected magic " 2800 "(0x%08x)", object->idm_tx_obj_magic); 2801 } 2802 2803 object = next; 2804 } 2805 2806 mutex_exit(&so_conn->ic_tx_mutex); 2807 idm_conn_rele(ic); 2808 thread_exit(); 2809 /*NOTREACHED*/ 2810 } 2811 2812 static void 2813 idm_so_socket_set_nonblock(struct sonode *node) 2814 { 2815 (void) VOP_SETFL(node->so_vnode, node->so_flag, 2816 (node->so_state | FNONBLOCK), CRED(), NULL); 2817 } 2818 2819 static void 2820 idm_so_socket_set_block(struct sonode *node) 2821 { 2822 (void) VOP_SETFL(node->so_vnode, node->so_flag, 2823 (node->so_state & (~FNONBLOCK)), CRED(), NULL); 2824 } 2825