1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * 25 * iSCSI Software Initiator 26 */ 27 28 #include <sys/socket.h> /* networking stuff */ 29 #include <sys/strsubr.h> /* networking stuff */ 30 #include <netinet/tcp.h> /* TCP_NODELAY */ 31 #include <sys/socketvar.h> /* _ALLOC_SLEEP */ 32 #include <sys/pathname.h> /* declares: lookupname */ 33 #include <sys/fs/snode.h> /* defines: VTOS */ 34 #include <sys/fs/dv_node.h> /* declares: devfs_lookupname */ 35 #include <sys/bootconf.h> 36 #include <sys/bootprops.h> 37 38 #include "iscsi.h" 39 40 /* 41 * This is a high level description of the default 42 * iscsi_net transport interfaces. These are used 43 * to create, send, recv, and close standard TCP/IP 44 * messages. In addition there are extensions to send 45 * and recv iSCSI PDU data. 46 * 47 * NOTE: It would be very easy for an iSCSI HBA vendor 48 * to register their own functions over the top of 49 * the default interfaces. This would allow an iSCSI 50 * HBA to use the same iscsiadm management interfaces 51 * and the Solaris iSCSI session / connection management. 52 * The current problem with this approach is we only 53 * allow one one registered transport table. This 54 * would be pretty easy to correct although will require 55 * additional CLI changes to manage multiple interfaces. 56 * If a vendor can present compelling performance data, 57 * then Sun will be willing to enhance this support for 58 * multiple interface tables and better CLI management. 59 * 60 * The following listing describes the iscsi_net 61 * entry points: 62 * 63 * socket - Creates TCP/IP socket connection. In the 64 * default implementation creates a sonode 65 * via the sockfs kernel layer. 66 * bind - Performs standard TCP/IP BSD operation. In 67 * the default implementation this only act 68 * as a soft binding based on the IP and routing 69 * tables. It would be preferred if this was 70 * a hard binding but that is currently not 71 * possible with Solaris's networking stack. 72 * connect - Performs standard TCP/IP BSD operation. This 73 * establishes the TCP SYN to the peer IP address. 74 * listen - Performs standard TCP/IP BSD operation. This 75 * listens for incoming peer connections. 76 * accept - Performs standard TCP/IP BSD operation. This 77 * accepts incoming peer connections. 78 * shutdown - This disconnects the TCP/IP connection while 79 * maintaining the resources. 80 * close - This disconnects the TCP/IP connection and 81 * releases the resources. 82 * 83 * getsockopt - Gets socket option for specified socket. 84 * setsockopt - Sets socket option for specified socket. 85 * 86 * The current socket options that are used by the initiator 87 * are listed below. 88 * 89 * TCP_CONN_NOTIFY_THRESHOLD 90 * TCP_CONN_ABORT_THRESHOLD 91 * TCP_ABORT_THRESHOLD 92 * TCP_NODELAY 93 * SO_RCVBUF 94 * SO_SNDBUF 95 * 96 * iscsi_net_poll - Poll socket interface for a specified amount 97 * of data. If data not received in timeout 98 * period fail request. 99 * iscsi_net_sendmsg - Send message on socket connection 100 * iscsi_net_recvmsg - Receive message on socket connection 101 * 102 * iscsi_net_sendpdu - Send iSCSI PDU on socket connection 103 * iscsi_net_recvhdr - Receive iSCSI header on socket connection 104 * iscsi_net_recvdata - Receive iSCSI data on socket connection 105 * 106 * The iSCSI interfaces have the below optional flags. 107 * 108 * ISCSI_NET_HEADER_DIGEST - The interface should either 109 * generate or validate the iSCSI 110 * header digest CRC. 111 * ISCSI_NET_DATA_DIGESt - The interface should either 112 * generate or validate the iSCSI 113 * data digest CRC. 114 */ 115 116 117 /* global */ 118 iscsi_network_t *iscsi_net; 119 120 /* consts */ 121 122 /* 123 * This table is used for quick validation of incoming 124 * iSCSI PDU opcodes. A value of '0' in the table below 125 * indicated that the opcode is invalid for an iSCSI 126 * initiator to receive. 127 */ 128 const int is_incoming_opcode_invalid[256] = { 129 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ 130 /* 0x0X */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 131 /* 0x1X */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 132 /* 0x2X */ 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 133 /* 0x3X */ 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 134 /* 0x4X */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 135 /* 0x5X */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 136 /* 0x6X */ 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 137 /* 0x7X */ 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 138 /* 0x8X */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 139 /* 0x9X */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 140 /* 0xAX */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 141 /* 0xBX */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 142 /* 0xCX */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 143 /* 0xDX */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 144 /* 0xEX */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 145 /* 0xFX */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 146 }; 147 /* 148 * Define macros to manipulate snode, vnode, and open device flags 149 */ 150 #define VTYP_VALID(i) (((i) == VCHR) || ((i) == VBLK)) 151 #define STYP_VALID(i) (((i) == S_IFCHR) || ((i) == S_IFBLK)) 152 #define STYP_TO_VTYP(i) (((i) == S_IFCHR) ? VCHR : VBLK) 153 154 #define IP_4_BITS 32 155 #define IP_6_BITS 128 156 157 extern int modrootloaded; 158 extern ib_boot_prop_t *iscsiboot_prop; 159 160 /* prototypes */ 161 162 /* for iSCSI boot */ 163 static int net_up = 0; 164 static iscsi_status_t iscsi_net_interface(); 165 static int iscsi_ldi_vp_from_name(char *path, vnode_t **vpp); 166 /* boot prototypes end */ 167 168 static void * iscsi_net_socket(int domain, int type, int protocol); 169 static int iscsi_net_bind(void *socket, struct sockaddr * 170 name, int name_len, int backlog, int flags); 171 static int iscsi_net_connect(void *socket, struct sockaddr * 172 name, int name_len, int fflag, int flags); 173 static int iscsi_net_listen(void *socket, int backlog); 174 static void * iscsi_net_accept(void *socket, struct sockaddr *addr, 175 int *addr_len); 176 static int iscsi_net_getsockname(void *socket); 177 static int iscsi_net_getsockopt(void *socket, int level, 178 int option_name, void *option_val, int *option_len, int flags); 179 static int iscsi_net_setsockopt(void *socket, int level, 180 int option_name, void *option_val, int option_len); 181 static int iscsi_net_shutdown(void *socket, int how); 182 static void iscsi_net_close(void *socket); 183 184 static size_t iscsi_net_poll(void *socket, clock_t timeout); 185 static size_t iscsi_net_sendmsg(void *socket, struct msghdr *msg); 186 static size_t iscsi_net_recvmsg(void *socket, 187 struct msghdr *msg, int timeout); 188 189 static iscsi_status_t iscsi_net_sendpdu(void *socket, iscsi_hdr_t *ihp, 190 char *data, int flags); 191 static iscsi_status_t iscsi_net_recvdata(void *socket, iscsi_hdr_t *ihp, 192 char *data, int max_data_length, int timeout, int flags); 193 static iscsi_status_t iscsi_net_recvhdr(void *socket, iscsi_hdr_t *ihp, 194 int header_length, int timeout, int flags); 195 196 static void iscsi_net_set_preconnect_options(void *socket); 197 static void iscsi_net_set_postconnect_options(void *socket); 198 199 /* 200 * +--------------------------------------------------------------------+ 201 * | network interface registration functions | 202 * +--------------------------------------------------------------------+ 203 */ 204 205 /* 206 * iscsi_net_init - initialize network interface 207 */ 208 void 209 iscsi_net_init() 210 { 211 iscsi_net = kmem_zalloc(sizeof (*iscsi_net), KM_SLEEP); 212 213 iscsi_net->socket = iscsi_net_socket; 214 215 iscsi_net->bind = iscsi_net_bind; 216 iscsi_net->connect = iscsi_net_connect; 217 iscsi_net->listen = iscsi_net_listen; 218 iscsi_net->accept = iscsi_net_accept; 219 iscsi_net->shutdown = iscsi_net_shutdown; 220 iscsi_net->close = iscsi_net_close; 221 222 iscsi_net->getsockname = iscsi_net_getsockname; 223 iscsi_net->getsockopt = iscsi_net_getsockopt; 224 iscsi_net->setsockopt = iscsi_net_setsockopt; 225 226 iscsi_net->poll = iscsi_net_poll; 227 iscsi_net->sendmsg = iscsi_net_sendmsg; 228 iscsi_net->recvmsg = iscsi_net_recvmsg; 229 230 iscsi_net->sendpdu = iscsi_net_sendpdu; 231 iscsi_net->recvhdr = iscsi_net_recvhdr; 232 iscsi_net->recvdata = iscsi_net_recvdata; 233 } 234 235 /* 236 * iscsi_net_fini - release network interface 237 */ 238 void 239 iscsi_net_fini() 240 { 241 kmem_free(iscsi_net, sizeof (*iscsi_net)); 242 iscsi_net = NULL; 243 } 244 245 246 /* 247 * iscsi_net_set_preconnect_options - 248 */ 249 static void 250 iscsi_net_set_preconnect_options(void *socket) 251 { 252 int ret = 0; 253 ret += iscsi_net->setsockopt(socket, IPPROTO_TCP, 254 TCP_CONN_NOTIFY_THRESHOLD, (char *)&iscsi_net->tweaks. 255 conn_notify_threshold, sizeof (int)); 256 ret += iscsi_net->setsockopt(socket, IPPROTO_TCP, 257 TCP_CONN_ABORT_THRESHOLD, (char *)&iscsi_net->tweaks. 258 conn_abort_threshold, sizeof (int)); 259 ret += iscsi_net->setsockopt(socket, IPPROTO_TCP, TCP_ABORT_THRESHOLD, 260 (char *)&iscsi_net->tweaks.abort_threshold, sizeof (int)); 261 if (ret != 0) { 262 cmn_err(CE_NOTE, "iscsi connection failed to set socket option" 263 "TCP_CONN_NOTIFY_THRESHOLD, TCP_CONN_ABORT_THRESHOLD or " 264 "TCP_ABORT_THRESHOLD"); 265 } 266 } 267 268 /* 269 * iscsi_net_set_postconnect_options - 270 */ 271 static void 272 iscsi_net_set_postconnect_options(void *socket) 273 { 274 int ret = 0; 275 ret += iscsi_net->setsockopt(socket, IPPROTO_TCP, TCP_NODELAY, 276 (char *)&iscsi_net->tweaks.nodelay, sizeof (int)); 277 ret += iscsi_net->setsockopt(socket, SOL_SOCKET, SO_RCVBUF, 278 (char *)&iscsi_net->tweaks.rcvbuf, sizeof (int)); 279 ret += iscsi_net->setsockopt(socket, SOL_SOCKET, SO_SNDBUF, 280 (char *)&iscsi_net->tweaks.sndbuf, sizeof (int)); 281 if (ret != 0) { 282 cmn_err(CE_NOTE, "iscsi connection failed to set socket option" 283 "TCP_NODELAY, SO_RCVBUF or SO_SNDBUF"); 284 } 285 } 286 287 288 /* 289 * +--------------------------------------------------------------------+ 290 * | register network interfaces | 291 * +--------------------------------------------------------------------+ 292 */ 293 294 /* 295 * iscsi_net_socket - create socket 296 */ 297 static void * 298 iscsi_net_socket(int domain, int type, int protocol) 299 { 300 vnode_t *dvp = NULL, 301 *vp = NULL; 302 struct snode *csp = NULL; 303 int err = 0; 304 major_t maj; 305 306 if (!modrootloaded && !net_up && iscsiboot_prop) { 307 if (iscsi_net_interface() == ISCSI_STATUS_SUCCESS) 308 net_up = 1; 309 } 310 311 /* ---- solookup: start ---- */ 312 if ((vp = solookup(domain, type, protocol, NULL, &err)) == NULL) { 313 314 /* 315 * solookup calls sogetvp if the vp is not found in 316 * the cache. Since the call to sogetvp is hardwired 317 * to use USERSPACE and declared static we'll do the 318 * work here instead. 319 */ 320 if (!modrootloaded) { 321 err = iscsi_ldi_vp_from_name("/devices/pseudo/tcp@0:" 322 "tcp", &vp); 323 } else { 324 err = lookupname(type == SOCK_STREAM ? "/dev/tcp" : 325 "/dev/udp", UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 326 } 327 if (err) { 328 return (NULL); 329 } 330 331 /* ---- check that it is the correct vnode ---- */ 332 if (vp->v_type != VCHR) { 333 VN_RELE(vp); 334 return (NULL); 335 } 336 337 csp = VTOS(VTOS(vp)->s_commonvp); 338 if (!(csp->s_flag & SDIPSET)) { 339 char *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 340 err = ddi_dev_pathname(vp->v_rdev, S_IFCHR, 341 pathname); 342 if (err == 0) { 343 err = devfs_lookupname(pathname, NULLVPP, 344 &dvp); 345 } 346 VN_RELE(vp); 347 kmem_free(pathname, MAXPATHLEN); 348 if (err != 0) { 349 return (NULL); 350 } 351 vp = dvp; 352 } 353 354 maj = getmajor(vp->v_rdev); 355 if (!STREAMSTAB(maj)) { 356 VN_RELE(vp); 357 return (NULL); 358 } 359 } 360 /* ---- solookup: end ---- */ 361 return (socreate(vp, domain, type, protocol, SOV_DEFAULT, NULL, &err)); 362 } 363 364 /* 365 * iscsi_net_bind - bind socket to a specific sockaddr 366 */ 367 static int 368 iscsi_net_bind(void *socket, struct sockaddr *name, int name_len, 369 int backlog, int flags) 370 { 371 return (sobind((struct sonode *)socket, name, name_len, 372 backlog, flags)); 373 } 374 375 /* 376 * iscsi_net_connect - connect socket to peer sockaddr 377 */ 378 static int 379 iscsi_net_connect(void *socket, struct sockaddr *name, int name_len, 380 int fflag, int flags) 381 { 382 int rval; 383 384 iscsi_net_set_preconnect_options(socket); 385 rval = soconnect((struct sonode *)socket, name, 386 name_len, fflag, flags); 387 iscsi_net_set_postconnect_options(socket); 388 389 return (rval); 390 } 391 392 /* 393 * iscsi_net_listen - listen to socket for peer connections 394 */ 395 static int 396 iscsi_net_listen(void *socket, int backlog) 397 { 398 return (solisten((struct sonode *)socket, backlog)); 399 } 400 401 /* 402 * iscsi_net_accept - accept peer socket connections 403 */ 404 static void * 405 iscsi_net_accept(void *socket, struct sockaddr *addr, int *addr_len) 406 { 407 struct sonode *listening_socket; 408 409 (void) soaccept((struct sonode *)socket, 410 ((struct sonode *)socket)->so_flag, 411 &listening_socket); 412 if (listening_socket != NULL) { 413 bcopy(listening_socket->so_faddr_sa, addr, 414 (socklen_t)listening_socket->so_faddr_len); 415 *addr_len = listening_socket->so_faddr_len; 416 } else { 417 *addr_len = 0; 418 } 419 420 return ((void *)listening_socket); 421 } 422 423 /* 424 * iscsi_net_getsockname - 425 */ 426 static int 427 iscsi_net_getsockname(void *socket) 428 { 429 return (sogetsockname((struct sonode *)socket)); 430 } 431 432 /* 433 * iscsi_net_getsockopt - get value of option on socket 434 */ 435 static int 436 iscsi_net_getsockopt(void *socket, int level, int option_name, 437 void *option_val, int *option_len, int flags) 438 { 439 return (sogetsockopt((struct sonode *)socket, level, 440 option_name, option_val, (socklen_t *)option_len, 441 flags)); 442 } 443 444 /* 445 * iscsi_net_setsockopt - set value for option on socket 446 */ 447 static int 448 iscsi_net_setsockopt(void *socket, int level, int option_name, 449 void *option_val, int option_len) 450 { 451 return (sosetsockopt((struct sonode *)socket, level, 452 option_name, option_val, option_len)); 453 } 454 455 /* 456 * iscsi_net_shutdown - shutdown socket connection 457 */ 458 static int 459 iscsi_net_shutdown(void *socket, int how) 460 { 461 return (soshutdown((struct sonode *)socket, how)); 462 } 463 464 /* 465 * iscsi_net_close - shutdown socket connection and release resources 466 */ 467 static void 468 iscsi_net_close(void *socket) 469 { 470 vnode_t *vp = SOTOV((struct sonode *)socket); 471 (void) soshutdown((struct sonode *)socket, 2); 472 (void) VOP_CLOSE(vp, 0, 1, 0, kcred, NULL); 473 VN_RELE(vp); 474 } 475 476 /* 477 * iscsi_net_poll - poll socket for data 478 */ 479 static size_t 480 iscsi_net_poll(void *socket, clock_t timeout) 481 { 482 int pflag; 483 uchar_t pri; 484 rval_t rval; 485 486 pri = 0; 487 pflag = MSG_ANY; 488 return (kstrgetmsg(SOTOV((struct sonode *)socket), NULL, NULL, 489 &pri, &pflag, timeout, &rval)); 490 } 491 492 /* 493 * iscsi_net_sendmsg - send message on socket 494 */ 495 /* ARGSUSED */ 496 static size_t 497 iscsi_net_sendmsg(void *socket, struct msghdr *msg) 498 { 499 int i = 0; 500 int total_len = 0; 501 struct uio uio; 502 503 /* Initialization of the uio structure. */ 504 bzero(&uio, sizeof (uio)); 505 uio.uio_iov = msg->msg_iov; 506 uio.uio_iovcnt = msg->msg_iovlen; 507 uio.uio_segflg = UIO_SYSSPACE; 508 509 for (i = 0; i < msg->msg_iovlen; i++) { 510 total_len += (msg->msg_iov)[i].iov_len; 511 } 512 uio.uio_resid = total_len; 513 514 (void) sosendmsg((struct sonode *)socket, msg, &uio); 515 DTRACE_PROBE2(sosendmsg, size_t, total_len, size_t, uio.uio_resid); 516 return (total_len - uio.uio_resid); 517 } 518 519 /* 520 * iscsi_net_recvmsg - receive message on socket 521 */ 522 /* ARGSUSED */ 523 static size_t 524 iscsi_net_recvmsg(void *socket, struct msghdr *msg, int timeout) 525 { 526 int idx; 527 int total_len = 0; 528 struct uio uio; 529 uchar_t pri = 0; 530 int prflag = MSG_ANY; 531 rval_t rval; 532 struct sonode *sonode = (struct sonode *)socket; 533 534 /* Initialization of the uio structure. */ 535 bzero(&uio, sizeof (uio)); 536 uio.uio_iov = msg->msg_iov; 537 uio.uio_iovcnt = msg->msg_iovlen; 538 uio.uio_segflg = UIO_SYSSPACE; 539 540 for (idx = 0; idx < msg->msg_iovlen; idx++) { 541 total_len += (msg->msg_iov)[idx].iov_len; 542 } 543 uio.uio_resid = total_len; 544 545 /* If timeout requested on receive */ 546 if (timeout > 0) { 547 boolean_t loopback = B_FALSE; 548 549 /* And this isn't a loopback connection */ 550 if (sonode->so_laddr.soa_sa->sa_family == AF_INET) { 551 struct sockaddr_in *lin = 552 (struct sockaddr_in *)sonode->so_laddr.soa_sa; 553 struct sockaddr_in *fin = 554 (struct sockaddr_in *)sonode->so_faddr.soa_sa; 555 556 if ((lin->sin_family == fin->sin_family) && 557 (bcmp(&lin->sin_addr, &fin->sin_addr, 558 sizeof (struct in_addr)) == 0)) { 559 loopback = B_TRUE; 560 } 561 } else { 562 struct sockaddr_in6 *lin6 = 563 (struct sockaddr_in6 *)sonode->so_laddr.soa_sa; 564 struct sockaddr_in6 *fin6 = 565 (struct sockaddr_in6 *)sonode->so_faddr.soa_sa; 566 567 if ((lin6->sin6_family == fin6->sin6_family) && 568 (bcmp(&lin6->sin6_addr, &fin6->sin6_addr, 569 sizeof (struct in6_addr)) == 0)) { 570 loopback = B_TRUE; 571 } 572 } 573 574 if (loopback == B_FALSE) { 575 /* 576 * Then poll device for up to the timeout 577 * period or the requested data is received. 578 */ 579 if (kstrgetmsg(SOTOV(sonode), 580 NULL, NULL, &pri, &prflag, timeout * 1000, 581 &rval) == ETIME) { 582 return (0); 583 } 584 } 585 } 586 587 /* 588 * Receive the requested data. Block until all 589 * data is received. 590 * 591 * resid occurs only when the connection is 592 * disconnected. In that case it will return 593 * the amount of data that was not received. 594 * In general this is the total amount we 595 * requested. 596 */ 597 (void) sorecvmsg((struct sonode *)socket, msg, &uio); 598 DTRACE_PROBE2(sorecvmsg, size_t, total_len, size_t, uio.uio_resid); 599 return (total_len - uio.uio_resid); 600 } 601 602 /* 603 * iscsi_net_sendpdu - send iscsi pdu on socket 604 */ 605 static iscsi_status_t 606 iscsi_net_sendpdu(void *socket, iscsi_hdr_t *ihp, char *data, int flags) 607 { 608 uint32_t pad; 609 uint32_t crc_hdr; 610 uint32_t crc_data; 611 uint32_t pad_len; 612 uint32_t data_len; 613 iovec_t iovec[ISCSI_MAX_IOVEC]; 614 int iovlen = 0; 615 size_t total_len = 0; 616 size_t send_len; 617 struct msghdr msg; 618 619 ASSERT(socket != NULL); 620 ASSERT(ihp != NULL); 621 622 /* 623 * Let's send the header first. 'hlength' is in 32-bit 624 * quantities, so we need to multiply by four to get bytes 625 */ 626 ASSERT(iovlen < ISCSI_MAX_IOVEC); 627 iovec[iovlen].iov_base = (void *)ihp; 628 iovec[iovlen].iov_len = sizeof (*ihp) + ihp->hlength * 4; 629 total_len += sizeof (*ihp) + ihp->hlength * 4; 630 iovlen++; 631 632 /* Let's transmit the header digest if we have to. */ 633 if ((flags & ISCSI_NET_HEADER_DIGEST) != 0) { 634 ASSERT(iovlen < ISCSI_MAX_IOVEC); 635 /* 636 * Converting the calculated CRC via htonl is not 637 * necessary because iscsi_crc32c calculates 638 * the value as it expects to be written 639 */ 640 crc_hdr = iscsi_crc32c((char *)ihp, 641 sizeof (iscsi_hdr_t) + ihp->hlength * 4); 642 643 iovec[iovlen].iov_base = (void *)&crc_hdr; 644 iovec[iovlen].iov_len = sizeof (crc_hdr); 645 total_len += sizeof (crc_hdr); 646 iovlen++; 647 } 648 649 /* Let's transmit the data if any. */ 650 data_len = ntoh24(ihp->dlength); 651 652 if (data_len) { 653 654 ASSERT(iovlen < ISCSI_MAX_IOVEC); 655 iovec[iovlen].iov_base = (void *)data; 656 iovec[iovlen].iov_len = data_len; 657 total_len += data_len; 658 iovlen++; 659 660 pad_len = ((ISCSI_PAD_WORD_LEN - 661 (data_len & (ISCSI_PAD_WORD_LEN - 1))) & 662 (ISCSI_PAD_WORD_LEN - 1)); 663 664 /* Let's transmit the data pad if any. */ 665 if (pad_len) { 666 667 ASSERT(iovlen < ISCSI_MAX_IOVEC); 668 pad = 0; 669 iovec[iovlen].iov_base = (void *)&pad; 670 iovec[iovlen].iov_len = pad_len; 671 total_len += pad_len; 672 iovlen++; 673 } 674 675 /* Let's transmit the data digest if we have to. */ 676 if ((flags & ISCSI_NET_DATA_DIGEST) != 0) { 677 678 ASSERT(iovlen < ISCSI_MAX_IOVEC); 679 /* 680 * Converting the calculated CRC via htonl is not 681 * necessary because iscsi_crc32c calculates the 682 * value as it expects to be written 683 */ 684 crc_data = iscsi_crc32c(data, data_len); 685 crc_data = iscsi_crc32c_continued( 686 (char *)&pad, pad_len, crc_data); 687 688 iovec[iovlen].iov_base = (void *)&crc_data; 689 iovec[iovlen].iov_len = sizeof (crc_data); 690 total_len += sizeof (crc_data); 691 iovlen++; 692 } 693 } 694 695 DTRACE_PROBE4(tx, void *, socket, iovec_t *, &iovec[0], 696 int, iovlen, int, total_len); 697 698 /* Initialization of the message header. */ 699 bzero(&msg, sizeof (msg)); 700 msg.msg_iov = &iovec[0]; 701 msg.msg_flags = MSG_WAITALL; 702 msg.msg_iovlen = iovlen; 703 704 send_len = iscsi_net->sendmsg((struct sonode *)socket, &msg); 705 DTRACE_PROBE2(sendmsg, size_t, total_len, size_t, send_len); 706 if (total_len != send_len) { 707 return (ISCSI_STATUS_TCP_TX_ERROR); 708 } 709 return (ISCSI_STATUS_SUCCESS); 710 } 711 712 /* 713 * iscsi_net_recvhdr - receive iscsi hdr on socket 714 */ 715 static iscsi_status_t 716 iscsi_net_recvhdr(void *socket, iscsi_hdr_t *ihp, int header_length, 717 int timeout, int flags) 718 { 719 iovec_t iov[ISCSI_MAX_IOVEC]; 720 int iovlen = 1; 721 int total_len = 0; 722 uint32_t crc_actual = 0; 723 uint32_t crc_calculated = 0; 724 char *adhdr = NULL; 725 int adhdr_length = 0; 726 struct msghdr msg; 727 size_t recv_len; 728 729 ASSERT(socket != NULL); 730 ASSERT(ihp != NULL); 731 732 if (header_length < sizeof (iscsi_hdr_t)) { 733 ASSERT(FALSE); 734 return (ISCSI_STATUS_INTERNAL_ERROR); 735 } 736 737 /* 738 * Receive primary header 739 */ 740 iov[0].iov_base = (char *)ihp; 741 iov[0].iov_len = sizeof (iscsi_hdr_t); 742 743 bzero(&msg, sizeof (msg)); 744 msg.msg_iov = iov; 745 msg.msg_flags = MSG_WAITALL; 746 msg.msg_iovlen = iovlen; 747 748 recv_len = iscsi_net->recvmsg(socket, &msg, timeout); 749 if (recv_len != sizeof (iscsi_hdr_t)) { 750 return (ISCSI_STATUS_TCP_RX_ERROR); 751 } 752 753 DTRACE_PROBE2(rx_hdr, void *, socket, iovec_t *iop, &iov[0]); 754 755 /* verify incoming opcode is a valid operation */ 756 if (is_incoming_opcode_invalid[ihp->opcode]) { 757 cmn_err(CE_WARN, "iscsi connection(%p) protocol error - " 758 "received an unsupported opcode:0x%02x", 759 socket, ihp->opcode); 760 return (ISCSI_STATUS_PROTOCOL_ERROR); 761 } 762 763 /* 764 * Setup receipt of additional header 765 */ 766 if (ihp->hlength > 0) { 767 adhdr = ((char *)ihp) + sizeof (iscsi_hdr_t); 768 adhdr_length = header_length - sizeof (iscsi_hdr_t); 769 /* make sure enough space is available for adhdr */ 770 if (ihp->hlength > adhdr_length) { 771 ASSERT(FALSE); 772 return (ISCSI_STATUS_INTERNAL_ERROR); 773 } 774 775 ASSERT(iovlen < ISCSI_MAX_IOVEC); 776 iov[iovlen].iov_base = adhdr; 777 iov[iovlen].iov_len = adhdr_length; 778 total_len += adhdr_length; 779 iovlen++; 780 } 781 782 /* 783 * Setup receipt of header digest if enabled and connection 784 * is in full feature mode. 785 */ 786 if ((flags & ISCSI_NET_HEADER_DIGEST) != 0) { 787 ASSERT(iovlen < ISCSI_MAX_IOVEC); 788 iov[iovlen].iov_base = (char *)&crc_actual; 789 iov[iovlen].iov_len = sizeof (uint32_t); 790 total_len += sizeof (uint32_t); 791 iovlen++; 792 } 793 794 /* 795 * Read additional header and/or header digest if pieces 796 * are available 797 */ 798 if (iovlen > 1) { 799 800 bzero(&msg, sizeof (msg)); 801 msg.msg_iov = iov; 802 msg.msg_flags = MSG_WAITALL; 803 msg.msg_iovlen = iovlen; 804 805 recv_len = iscsi_net->recvmsg(socket, &msg, timeout); 806 if (recv_len != total_len) { 807 return (ISCSI_STATUS_TCP_RX_ERROR); 808 } 809 810 DTRACE_PROBE4(rx_adhdr_digest, void *, socket, 811 iovec_t *iop, &iov[0], int, iovlen, int, total_len); 812 813 /* 814 * Verify header digest if enabled and connection 815 * is in full feature mode 816 */ 817 if ((flags & ISCSI_NET_HEADER_DIGEST) != 0) { 818 crc_calculated = iscsi_crc32c((uchar_t *)ihp, 819 sizeof (iscsi_hdr_t) + ihp->hlength * 4); 820 821 /* 822 * Converting actual CRC read via ntohl is not 823 * necessary because iscsi_crc32c calculates the 824 * value as it expect to be read 825 */ 826 if (crc_calculated != crc_actual) { 827 /* Invalid Header Digest */ 828 cmn_err(CE_WARN, "iscsi connection(%p) " 829 "protocol error - encountered a header " 830 "digest error expected:0x%08x " 831 "received:0x%08x", socket, 832 crc_calculated, crc_actual); 833 return (ISCSI_STATUS_HEADER_DIGEST_ERROR); 834 } 835 } 836 } 837 return (ISCSI_STATUS_SUCCESS); 838 } 839 840 841 /* 842 * iscsi_net_recvdata - receive iscsi data payload from socket 843 */ 844 static iscsi_status_t 845 iscsi_net_recvdata(void *socket, iscsi_hdr_t *ihp, char *data, 846 int max_data_length, int timeout, int flags) 847 { 848 struct iovec iov[3]; 849 int iovlen = 1; 850 int total_len = 0; 851 int dlength = 0; 852 int pad_len = 0; 853 uint8_t pad[ISCSI_PAD_WORD_LEN]; 854 uint32_t crc_calculated = 0; 855 uint32_t crc_actual = 0; 856 struct msghdr msg; 857 size_t recv_len; 858 859 ASSERT(socket != NULL); 860 ASSERT(ihp != NULL); 861 ASSERT(data != NULL); 862 863 /* short hand dlength */ 864 dlength = ntoh24(ihp->dlength); 865 866 /* verify dlength is valid */ 867 if (dlength > max_data_length) { 868 cmn_err(CE_WARN, "iscsi connection(%p) protocol error - " 869 "invalid data lengths itt:0x%x received:0x%x " 870 "max expected:0x%x", socket, ihp->itt, 871 dlength, max_data_length); 872 return (ISCSI_STATUS_PROTOCOL_ERROR); 873 } 874 875 if (dlength) { 876 877 /* calculate pad */ 878 pad_len = ((ISCSI_PAD_WORD_LEN - 879 (dlength & (ISCSI_PAD_WORD_LEN - 1))) & 880 (ISCSI_PAD_WORD_LEN - 1)); 881 882 /* setup data iovec */ 883 iov[0].iov_base = (char *)data; 884 iov[0].iov_len = dlength; 885 total_len = dlength; 886 887 /* if pad setup pad iovec */ 888 if (pad_len) { 889 iov[iovlen].iov_base = (char *)&pad; 890 iov[iovlen].iov_len = pad_len; 891 total_len += pad_len; 892 iovlen++; 893 } 894 895 /* setup data digest */ 896 if ((flags & ISCSI_NET_DATA_DIGEST) != 0) { 897 iov[iovlen].iov_base = (char *)&crc_actual; 898 iov[iovlen].iov_len = sizeof (crc_actual); 899 total_len += sizeof (crc_actual); 900 iovlen++; 901 } 902 903 bzero(&msg, sizeof (msg)); 904 msg.msg_iov = iov; 905 msg.msg_flags = MSG_WAITALL; 906 msg.msg_iovlen = iovlen; 907 908 recv_len = iscsi_net->recvmsg(socket, &msg, timeout); 909 if (recv_len != total_len) { 910 return (ISCSI_STATUS_TCP_RX_ERROR); 911 } 912 913 DTRACE_PROBE4(rx_data, void *, socket, iovec_t *iop, 914 &iov[0], int, iovlen, int, total_len); 915 916 /* verify data digest is present */ 917 if ((flags & ISCSI_NET_DATA_DIGEST) != 0) { 918 919 crc_calculated = iscsi_crc32c(data, dlength); 920 crc_calculated = iscsi_crc32c_continued( 921 (char *)&pad, pad_len, crc_calculated); 922 923 /* 924 * Converting actual CRC read via ntohl is not 925 * necessary because iscsi_crc32c calculates the 926 * value as it expects to be read 927 */ 928 if (crc_calculated != crc_actual) { 929 cmn_err(CE_WARN, "iscsi connection(%p) " 930 "protocol error - encountered a data " 931 "digest error itt:0x%x expected:0x%08x " 932 "received:0x%08x", socket, 933 ihp->itt, crc_calculated, crc_actual); 934 return (ISCSI_STATUS_DATA_DIGEST_ERROR); 935 } 936 } 937 } 938 return (ISCSI_STATUS_SUCCESS); 939 } 940 941 /* 942 * Convert a prefix length to a mask. 943 */ 944 static iscsi_status_t 945 iscsi_prefixlentomask(int prefixlen, int maxlen, uchar_t *mask) 946 { 947 if (prefixlen < 0 || prefixlen > maxlen || mask == NULL) { 948 return (ISCSI_STATUS_INTERNAL_ERROR); 949 } 950 951 while (prefixlen > 0) { 952 if (prefixlen >= 8) { 953 *mask = 0xff; 954 mask++; 955 prefixlen = prefixlen - 8; 956 continue; 957 } 958 *mask = *mask | (1 << (8 - prefixlen)); 959 prefixlen--; 960 } 961 return (ISCSI_STATUS_SUCCESS); 962 } 963 964 static iscsi_status_t 965 iscsi_net_interface() 966 { 967 struct in_addr braddr; 968 struct in_addr subnet; 969 struct in_addr myaddr; 970 struct in_addr defgateway; 971 struct in6_addr myaddr6; 972 struct in6_addr subnet6; 973 uchar_t mask_prefix = 0; 974 int mask_bits = 1; 975 TIUSER *tiptr; 976 TIUSER *tiptr6; 977 char ifname[16] = {0}; 978 iscsi_status_t status; 979 980 struct knetconfig dl_udp_netconf = { 981 NC_TPI_CLTS, 982 NC_INET, 983 NC_UDP, 984 0, }; 985 struct knetconfig dl_udp6_netconf = { 986 NC_TPI_CLTS, 987 NC_INET6, 988 NC_UDP, 989 0, }; 990 991 (void) strlcpy(ifname, rootfs.bo_ifname, sizeof (ifname)); 992 993 if (iscsiboot_prop->boot_nic.sin_family == AF_INET) { 994 /* 995 * Assumes only one linkage array element. 996 */ 997 dl_udp_netconf.knc_rdev = 998 makedevice(clone_major, ddi_name_to_major("udp")); 999 1000 myaddr.s_addr = 1001 iscsiboot_prop->boot_nic.nic_ip_u.u_in4.s_addr; 1002 1003 mask_prefix = iscsiboot_prop->boot_nic.sub_mask_prefix; 1004 (void) memset(&subnet.s_addr, 0, sizeof (subnet)); 1005 status = iscsi_prefixlentomask(mask_prefix, IP_4_BITS, 1006 (uchar_t *)&subnet.s_addr); 1007 if (status != ISCSI_STATUS_SUCCESS) { 1008 return (status); 1009 } 1010 1011 mask_bits = mask_bits << (IP_4_BITS - mask_prefix); 1012 mask_bits = mask_bits - 1; 1013 /* 1014 * Set the last mask bits of the ip address with 1, then 1015 * we can get the broadcast address. 1016 */ 1017 braddr.s_addr = myaddr.s_addr | mask_bits; 1018 1019 defgateway.s_addr = 1020 iscsiboot_prop->boot_nic.nic_gw_u.u_in4.s_addr; 1021 1022 /* initialize interface */ 1023 if (t_kopen((file_t *)NULL, dl_udp_netconf.knc_rdev, 1024 FREAD|FWRITE, &tiptr, CRED()) == 0) { 1025 if (kdlifconfig(tiptr, AF_INET, &myaddr, &subnet, 1026 &braddr, &defgateway, ifname)) { 1027 cmn_err(CE_WARN, "Failed to configure" 1028 " iSCSI boot nic"); 1029 (void) t_kclose(tiptr, 0); 1030 return (ISCSI_STATUS_INTERNAL_ERROR); 1031 } 1032 } else { 1033 cmn_err(CE_WARN, "Failed to configure" 1034 " iSCSI boot nic"); 1035 return (ISCSI_STATUS_INTERNAL_ERROR); 1036 } 1037 return (ISCSI_STATUS_SUCCESS); 1038 } else { 1039 dl_udp6_netconf.knc_rdev = 1040 makedevice(clone_major, ddi_name_to_major("udp6")); 1041 1042 bcopy(&iscsiboot_prop->boot_nic.nic_ip_u.u_in6.s6_addr, 1043 &myaddr6.s6_addr, 16); 1044 1045 (void) memset(&subnet6, 0, sizeof (subnet6)); 1046 mask_prefix = iscsiboot_prop->boot_nic.sub_mask_prefix; 1047 status = iscsi_prefixlentomask(mask_prefix, IP_6_BITS, 1048 (uchar_t *)&subnet6.s6_addr); 1049 if (status != ISCSI_STATUS_SUCCESS) { 1050 return (status); 1051 } 1052 1053 if (t_kopen((file_t *)NULL, dl_udp6_netconf.knc_rdev, 1054 FREAD|FWRITE, &tiptr6, CRED()) == 0) { 1055 if (kdlifconfig(tiptr6, AF_INET6, &myaddr6, 1056 &subnet6, NULL, NULL, ifname)) { 1057 cmn_err(CE_WARN, "Failed to configure" 1058 " iSCSI boot nic"); 1059 (void) t_kclose(tiptr, 0); 1060 return (ISCSI_STATUS_INTERNAL_ERROR); 1061 } 1062 } else { 1063 cmn_err(CE_WARN, "Failed to configure" 1064 " iSCSI boot nic"); 1065 return (ISCSI_STATUS_INTERNAL_ERROR); 1066 } 1067 return (ISCSI_STATUS_SUCCESS); 1068 } 1069 } 1070 1071 /* 1072 * vp is needed to create the socket for the time being. 1073 */ 1074 static int 1075 iscsi_ldi_vp_from_name(char *path, vnode_t **vpp) 1076 { 1077 vnode_t *vp = NULL; 1078 int ret; 1079 1080 /* sanity check required input parameters */ 1081 if ((path == NULL) || (vpp == NULL)) 1082 return (EINVAL); 1083 1084 if (modrootloaded) { 1085 cred_t *saved_cred = curthread->t_cred; 1086 1087 /* we don't want lookupname to fail because of credentials */ 1088 curthread->t_cred = kcred; 1089 1090 /* 1091 * all lookups should be done in the global zone. but 1092 * lookupnameat() won't actually do this if an absolute 1093 * path is passed in. since the ldi interfaces require an 1094 * absolute path we pass lookupnameat() a pointer to 1095 * the character after the leading '/' and tell it to 1096 * start searching at the current system root directory. 1097 */ 1098 ASSERT(*path == '/'); 1099 ret = lookupnameat(path + 1, UIO_SYSSPACE, FOLLOW, NULLVPP, 1100 &vp, rootdir); 1101 1102 /* restore this threads credentials */ 1103 curthread->t_cred = saved_cred; 1104 1105 if (ret == 0) { 1106 if (!vn_matchops(vp, spec_getvnodeops()) || 1107 !VTYP_VALID(vp->v_type)) { 1108 VN_RELE(vp); 1109 return (ENXIO); 1110 } 1111 } 1112 } 1113 1114 if (vp == NULL) { 1115 dev_info_t *dip; 1116 dev_t dev; 1117 int spec_type; 1118 1119 /* 1120 * Root is not mounted, the minor node is not specified, 1121 * or an OBP path has been specified. 1122 */ 1123 1124 /* 1125 * Determine if path can be pruned to produce an 1126 * OBP or devfs path for resolve_pathname. 1127 */ 1128 if (strncmp(path, "/devices/", 9) == 0) 1129 path += strlen("/devices"); 1130 1131 /* 1132 * if no minor node was specified the DEFAULT minor node 1133 * will be returned. if there is no DEFAULT minor node 1134 * one will be fabricated of type S_IFCHR with the minor 1135 * number equal to the instance number. 1136 */ 1137 ret = resolve_pathname(path, &dip, &dev, &spec_type); 1138 if (ret != 0) 1139 return (ENODEV); 1140 1141 ASSERT(STYP_VALID(spec_type)); 1142 vp = makespecvp(dev, STYP_TO_VTYP(spec_type)); 1143 spec_assoc_vp_with_devi(vp, dip); 1144 ddi_release_devi(dip); 1145 } 1146 1147 *vpp = vp; 1148 return (0); 1149 } 1150