xref: /titanic_51/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_net.c (revision bbe725837bc5e084e6762c7984672bc05d76baf1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * iSCSI Software Initiator
26  */
27 
28 #include <sys/socket.h>		/* networking stuff */
29 #include <sys/strsubr.h>	/* networking stuff */
30 #include <netinet/tcp.h>	/* TCP_NODELAY */
31 #include <sys/socketvar.h>	/* _ALLOC_SLEEP */
32 #include <sys/pathname.h>	/* declares:	lookupname */
33 #include <sys/fs/snode.h>	/* defines:	VTOS */
34 #include <sys/fs/dv_node.h>	/* declares:	devfs_lookupname */
35 #include <sys/bootconf.h>
36 #include <sys/bootprops.h>
37 #include <netinet/in.h>
38 #include "iscsi.h"
39 #include <sys/ksocket.h>
40 
41 /*
42  * This is a high level description of the default
43  * iscsi_net transport interfaces.  These are used
44  * to create, send, recv, and close standard TCP/IP
45  * messages.  In addition there are extensions to send
46  * and recv iSCSI PDU data.
47  *
48  * NOTE: It would be very easy for an iSCSI HBA vendor
49  * to register their own functions over the top of
50  * the default interfaces.  This would allow an iSCSI
51  * HBA to use the same iscsiadm management interfaces
52  * and the Solaris iSCSI session / connection management.
53  * The current problem with this approach is we only
54  * allow one one registered transport table.  This
55  * would be pretty easy to correct although will require
56  * additional CLI changes to manage multiple interfaces.
57  * If a vendor can present compelling performance data,
58  * then Sun will be willing to enhance this support for
59  * multiple interface tables and better CLI management.
60  *
61  * The following listing describes the iscsi_net
62  * entry points:
63  *
64  *   socket	    - Creates TCP/IP socket connection.  In the
65  *		       default implementation creates a sonode
66  *		       via the sockfs kernel layer.
67  *   bind	      - Performs standard TCP/IP BSD operation.  In
68  *		       the default implementation this only act
69  *		       as a soft binding based on the IP and routing
70  *			 tables.  It would be preferred if this was
71  *			 a hard binding but that is currently not
72  *			 possible with Solaris's networking stack.
73  *   connect	   - Performs standard TCP/IP BSD operation.  This
74  *		       establishes the TCP SYN to the peer IP address.
75  *   listen	    - Performs standard TCP/IP BSD operation.  This
76  *		       listens for incoming peer connections.
77  *   accept	    - Performs standard TCP/IP BSD operation.  This
78  *		       accepts incoming peer connections.
79  *   shutdown	  - This disconnects the TCP/IP connection while
80  *		       maintaining the resources.
81  *   close	     - This disconnects the TCP/IP connection and
82  *		       releases the resources.
83  *
84  *   getsockopt	- Gets socket option for specified socket.
85  *   setsockopt	- Sets socket option for specified socket.
86  *
87  *      The current socket options that are used by the initiator
88  *      are listed below.
89  *
90  *	TCP_CONN_NOTIFY_THRESHOLD
91  *	TCP_CONN_ABORT_THRESHOLD
92  *	TCP_ABORT_THRESHOLD
93  *	TCP_NODELAY
94  *	SO_RCVBUF
95  *	SO_SNDBUF
96  *
97  *   iscsi_net_poll    - Poll socket interface for a specified amount
98  *		       of data.  If data not received in timeout
99  *		       period fail request.
100  *   iscsi_net_sendmsg - Send message on socket connection
101  *   iscsi_net_recvmsg - Receive message on socket connection
102  *
103  *   iscsi_net_sendpdu - Send iSCSI PDU on socket connection
104  *   iscsi_net_recvhdr - Receive iSCSI header on socket connection
105  *   iscsi_net_recvdata - Receive iSCSI data on socket connection
106  *
107  *     The iSCSI interfaces have the below optional flags.
108  *
109  *       ISCSI_NET_HEADER_DIGEST - The interface should either
110  *				generate or validate the iSCSI
111  *				header digest CRC.
112  *       ISCSI_NET_DATA_DIGESt   - The interface should either
113  *			      generate or validate the iSCSI
114  *			      data digest CRC.
115  */
116 
117 
118 /* global */
119 iscsi_network_t *iscsi_net;
120 
121 /* consts */
122 
123 /*
124  * This table is used for quick validation of incoming
125  * iSCSI PDU opcodes.  A value of '0' in the table below
126  * indicated that the opcode is invalid for an iSCSI
127  * initiator to receive.
128  */
129 const int   is_incoming_opcode_invalid[256] = {
130 	/*		0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F */
131 	/* 0x0X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 	/* 0x1X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 	/* 0x2X */	0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 	/* 0x3X */	1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
135 	/* 0x4X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 	/* 0x5X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
137 	/* 0x6X */	0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
138 	/* 0x7X */	1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
139 	/* 0x8X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 	/* 0x9X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 	/* 0xAX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 	/* 0xBX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 	/* 0xCX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 	/* 0xDX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 	/* 0xEX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 	/* 0xFX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
147 };
148 
149 #define	IP_4_BITS	32
150 #define	IP_6_BITS	128
151 
152 extern int modrootloaded;
153 extern ib_boot_prop_t   *iscsiboot_prop;
154 
155 /* prototypes */
156 static void * iscsi_net_socket(int domain, int type, int protocol);
157 static int iscsi_net_bind(void *socket, struct sockaddr *
158     name, int name_len, int backlog, int flags);
159 static int iscsi_net_connect(void *socket, struct sockaddr *
160     name, int name_len, int fflag, int flags);
161 static int iscsi_net_listen(void *socket, int backlog);
162 static void * iscsi_net_accept(void *socket, struct sockaddr *addr,
163     int *addr_len);
164 static int iscsi_net_getsockname(void *socket, struct sockaddr *, socklen_t *);
165 static int iscsi_net_getsockopt(void *socket, int level,
166     int option_name, void *option_val, int *option_len, int flags);
167 static int iscsi_net_setsockopt(void *socket, int level,
168     int option_name, void *option_val, int option_len);
169 static int iscsi_net_shutdown(void *socket, int how);
170 static void iscsi_net_close(void *socket);
171 
172 static size_t iscsi_net_poll(void *socket, clock_t timeout);
173 static size_t iscsi_net_sendmsg(void *socket, struct msghdr *msg);
174 static size_t iscsi_net_recvmsg(void *socket,
175     struct msghdr *msg, int timeout);
176 
177 static iscsi_status_t iscsi_net_sendpdu(void *socket, iscsi_hdr_t *ihp,
178     char *data, int flags);
179 static iscsi_status_t iscsi_net_recvdata(void *socket, iscsi_hdr_t *ihp,
180     char *data, int max_data_length, int timeout, int flags);
181 static iscsi_status_t iscsi_net_recvhdr(void *socket, iscsi_hdr_t *ihp,
182     int header_length, int timeout, int flags);
183 
184 static void iscsi_net_set_connect_options(void *socket);
185 
186 /*
187  * +--------------------------------------------------------------------+
188  * | network interface registration functions			   |
189  * +--------------------------------------------------------------------+
190  */
191 
192 /*
193  * iscsi_net_init - initialize network interface
194  */
195 void
196 iscsi_net_init()
197 {
198 	iscsi_net = kmem_zalloc(sizeof (*iscsi_net), KM_SLEEP);
199 
200 	iscsi_net->socket	= iscsi_net_socket;
201 
202 	iscsi_net->bind		= iscsi_net_bind;
203 	iscsi_net->connect	= iscsi_net_connect;
204 	iscsi_net->listen	= iscsi_net_listen;
205 	iscsi_net->accept	= iscsi_net_accept;
206 	iscsi_net->shutdown	= iscsi_net_shutdown;
207 	iscsi_net->close	= iscsi_net_close;
208 
209 	iscsi_net->getsockname	= iscsi_net_getsockname;
210 	iscsi_net->getsockopt	= iscsi_net_getsockopt;
211 	iscsi_net->setsockopt	= iscsi_net_setsockopt;
212 
213 	iscsi_net->poll		= iscsi_net_poll;
214 	iscsi_net->sendmsg	= iscsi_net_sendmsg;
215 	iscsi_net->recvmsg	= iscsi_net_recvmsg;
216 
217 	iscsi_net->sendpdu	= iscsi_net_sendpdu;
218 	iscsi_net->recvhdr	= iscsi_net_recvhdr;
219 	iscsi_net->recvdata	= iscsi_net_recvdata;
220 }
221 
222 /*
223  * iscsi_net_fini - release network interface
224  */
225 void
226 iscsi_net_fini()
227 {
228 	kmem_free(iscsi_net, sizeof (*iscsi_net));
229 	iscsi_net = NULL;
230 }
231 
232 /*
233  * iscsi_net_set_connect_options -
234  */
235 static void
236 iscsi_net_set_connect_options(void *socket)
237 {
238 	int ret = 0;
239 	ret += iscsi_net->setsockopt(socket, IPPROTO_TCP,
240 	    TCP_CONN_NOTIFY_THRESHOLD, (char *)&iscsi_net->tweaks.
241 	    conn_notify_threshold, sizeof (int));
242 	ret += iscsi_net->setsockopt(socket, IPPROTO_TCP,
243 	    TCP_CONN_ABORT_THRESHOLD, (char *)&iscsi_net->tweaks.
244 	    conn_abort_threshold, sizeof (int));
245 	ret += iscsi_net->setsockopt(socket, IPPROTO_TCP, TCP_ABORT_THRESHOLD,
246 	    (char *)&iscsi_net->tweaks.abort_threshold, sizeof (int));
247 	ret += iscsi_net->setsockopt(socket, IPPROTO_TCP, TCP_NODELAY,
248 	    (char *)&iscsi_net->tweaks.nodelay, sizeof (int));
249 	ret += iscsi_net->setsockopt(socket, SOL_SOCKET, SO_RCVBUF,
250 	    (char *)&iscsi_net->tweaks.rcvbuf, sizeof (int));
251 	ret += iscsi_net->setsockopt(socket, SOL_SOCKET, SO_SNDBUF,
252 	    (char *)&iscsi_net->tweaks.sndbuf, sizeof (int));
253 	if (ret != 0) {
254 		cmn_err(CE_NOTE, "iscsi connection failed to set socket option"
255 		    "TCP_CONN_NOTIFY_THRESHOLD, TCP_CONN_ABORT_THRESHOLD,"
256 		    "TCP_ABORT_THRESHOLD, TCP_NODELAY, SO_RCVBUF or SO_SNDBUF");
257 	}
258 }
259 
260 /*
261  * +--------------------------------------------------------------------+
262  * | register network interfaces					|
263  * +--------------------------------------------------------------------+
264  */
265 
266 /*
267  * iscsi_net_socket - create socket
268  */
269 static void *
270 iscsi_net_socket(int domain, int type, int protocol)
271 {
272 	ksocket_t	socket;
273 	int 		err	= 0;
274 
275 	err = ksocket_socket(&socket, domain, type, protocol, KSOCKET_SLEEP,
276 	    CRED());
277 	if (!err)
278 		return ((void *)socket);
279 	else
280 		return (NULL);
281 
282 }
283 
284 /*
285  * iscsi_net_bind - bind socket to a specific sockaddr
286  */
287 /* ARGSUSED */
288 static int
289 iscsi_net_bind(void *socket, struct sockaddr *name, int name_len,
290 	int backlog, int flags)
291 {
292 	ksocket_t ks = (ksocket_t)socket;
293 	int error;
294 	error = ksocket_bind(ks, name, name_len, CRED());
295 	if (error == 0 && backlog != 0)
296 		error = ksocket_listen(ks, backlog, CRED());
297 
298 	return (error);
299 }
300 
301 /*
302  * iscsi_net_connect - connect socket to peer sockaddr
303  */
304 /* ARGSUSED */
305 static int
306 iscsi_net_connect(void *socket, struct sockaddr *name, int name_len,
307 	int fflag, int flags)
308 {
309 	ksocket_t ks = (ksocket_t)socket;
310 	int rval;
311 
312 	iscsi_net_set_connect_options(socket);
313 	rval = ksocket_connect(ks, name, name_len, CRED());
314 
315 	return (rval);
316 }
317 
318 /*
319  * iscsi_net_listen - listen to socket for peer connections
320  */
321 static int
322 iscsi_net_listen(void *socket, int backlog)
323 {
324 	ksocket_t ks = (ksocket_t)socket;
325 	return (ksocket_listen(ks, backlog, CRED()));
326 }
327 
328 /*
329  * iscsi_net_accept - accept peer socket connections
330  */
331 static void *
332 iscsi_net_accept(void *socket, struct sockaddr *addr, int *addr_len)
333 {
334 	ksocket_t listen_ks;
335 	ksocket_t ks = (ksocket_t)socket;
336 
337 	(void) ksocket_accept(ks, addr, (socklen_t *)addr_len, &listen_ks,
338 	    CRED());
339 
340 	return ((void *)listen_ks);
341 }
342 
343 /*
344  * iscsi_net_getsockname -
345  */
346 static int
347 iscsi_net_getsockname(void *socket, struct sockaddr *addr, socklen_t *addrlen)
348 {
349 	ksocket_t ks = (ksocket_t)socket;
350 	return (ksocket_getsockname(ks, addr, addrlen, CRED()));
351 }
352 
353 /*
354  * iscsi_net_getsockopt - get value of option on socket
355  */
356 /* ARGSUSED */
357 static int
358 iscsi_net_getsockopt(void *socket, int level, int option_name,
359 	void *option_val, int *option_len, int flags)
360 {
361 	ksocket_t ks = (ksocket_t)socket;
362 	return (ksocket_getsockopt(ks, level, option_name, option_val,
363 	    option_len, CRED()));
364 }
365 
366 /*
367  * iscsi_net_setsockopt - set value for option on socket
368  */
369 static int
370 iscsi_net_setsockopt(void *socket, int level, int option_name,
371 	void *option_val, int option_len)
372 {
373 	ksocket_t ks = (ksocket_t)socket;
374 	return (ksocket_setsockopt(ks, level, option_name, option_val,
375 	    option_len, CRED()));
376 }
377 
378 /*
379  * iscsi_net_shutdown - shutdown socket connection
380  */
381 static int
382 iscsi_net_shutdown(void *socket, int how)
383 {
384 	ksocket_t ks = (ksocket_t)socket;
385 	return (ksocket_shutdown(ks, how, CRED()));
386 }
387 
388 /*
389  * iscsi_net_close - shutdown socket connection and release resources
390  */
391 static void
392 iscsi_net_close(void *socket)
393 {
394 	ksocket_t ks = (ksocket_t)socket;
395 	(void) ksocket_close(ks, CRED());
396 }
397 
398 /*
399  * iscsi_net_poll - poll socket for data
400  */
401 /* ARGSUSED */
402 static size_t
403 iscsi_net_poll(void *socket, clock_t timeout)
404 {
405 	int pflag;
406 	char msg[64];
407 	size_t recv = 0;
408 	ksocket_t ks = (ksocket_t)socket;
409 
410 	if (get_udatamodel() == DATAMODEL_NONE ||
411 	    get_udatamodel() == DATAMODEL_NATIVE) {
412 		struct timeval tl;
413 
414 		/* timeout is millisecond */
415 		tl.tv_sec = timeout / 1000;
416 		tl.tv_usec = (timeout % 1000) * 1000;
417 		if (ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVTIMEO, &tl,
418 		    sizeof (struct timeval), CRED()))
419 			return (0);
420 	} else {
421 		struct timeval32 tl;
422 
423 		/* timeout is millisecond */
424 		tl.tv_sec = timeout / 1000;
425 		tl.tv_usec = (timeout % 1000) * 1000;
426 		if (ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVTIMEO, &tl,
427 		    sizeof (struct timeval32), CRED()))
428 			return (0);
429 	}
430 
431 	pflag = MSG_ANY;
432 	bzero(msg, sizeof (msg));
433 	return (ksocket_recv(ks, msg, sizeof (msg), pflag, &recv, CRED()));
434 }
435 
436 /*
437  * iscsi_net_sendmsg - send message on socket
438  */
439 /* ARGSUSED */
440 static size_t
441 iscsi_net_sendmsg(void *socket, struct msghdr *msg)
442 {
443 	ksocket_t ks = (ksocket_t)socket;
444 	size_t sent = 0;
445 	int flag = msg->msg_flags;
446 	(void) ksocket_sendmsg(ks, msg, flag, &sent, CRED());
447 	DTRACE_PROBE1(ksocket_sendmsg, size_t, sent);
448 	return (sent);
449 }
450 
451 /*
452  * iscsi_net_recvmsg - receive message on socket
453  */
454 /* ARGSUSED */
455 static size_t
456 iscsi_net_recvmsg(void *socket, struct msghdr *msg, int timeout)
457 {
458 	int		prflag	    = msg->msg_flags;
459 	ksocket_t	ks	    = (ksocket_t)socket;
460 	size_t 		recv	    = 0;
461 
462 	/* Set recv timeout */
463 	if (get_udatamodel() == DATAMODEL_NONE ||
464 	    get_udatamodel() == DATAMODEL_NATIVE) {
465 		struct timeval tl;
466 
467 		tl.tv_sec = timeout;
468 		tl.tv_usec = 0;
469 		if (ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVTIMEO, &tl,
470 		    sizeof (struct timeval), CRED()))
471 			return (0);
472 	} else {
473 		struct timeval32 tl;
474 
475 		tl.tv_sec = timeout;
476 		tl.tv_usec = 0;
477 		if (ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVTIMEO, &tl,
478 		    sizeof (struct timeval32), CRED()))
479 			return (0);
480 	}
481 	/*
482 	 * Receive the requested data.  Block until all
483 	 * data is received or timeout.
484 	 */
485 	ksocket_hold(ks);
486 	(void) ksocket_recvmsg(ks, msg, prflag, &recv, CRED());
487 	ksocket_rele(ks);
488 	DTRACE_PROBE1(ksocket_recvmsg, size_t, recv);
489 	return (recv);
490 }
491 
492 /*
493  * iscsi_net_sendpdu - send iscsi pdu on socket
494  */
495 static iscsi_status_t
496 iscsi_net_sendpdu(void *socket, iscsi_hdr_t *ihp, char *data, int flags)
497 {
498 	uint32_t	pad;
499 	uint32_t	crc_hdr;
500 	uint32_t	crc_data;
501 	uint32_t	pad_len;
502 	uint32_t	data_len;
503 	iovec_t		iovec[ISCSI_MAX_IOVEC];
504 	int		iovlen = 0;
505 	size_t		total_len = 0;
506 	size_t		send_len;
507 	struct msghdr	msg;
508 
509 	ASSERT(socket != NULL);
510 	ASSERT(ihp != NULL);
511 
512 	/*
513 	 * Let's send the header first.  'hlength' is in 32-bit
514 	 * quantities, so we need to multiply by four to get bytes
515 	 */
516 	ASSERT(iovlen < ISCSI_MAX_IOVEC);
517 	iovec[iovlen].iov_base = (void *)ihp;
518 	iovec[iovlen].iov_len  = sizeof (*ihp) + ihp->hlength * 4;
519 	total_len += sizeof (*ihp) + ihp->hlength * 4;
520 	iovlen++;
521 
522 	/* Let's transmit the header digest if we have to. */
523 	if ((flags & ISCSI_NET_HEADER_DIGEST) != 0) {
524 		ASSERT(iovlen < ISCSI_MAX_IOVEC);
525 		/*
526 		 * Converting the calculated CRC via htonl is not
527 		 * necessary because iscsi_crc32c calculates
528 		 * the value as it expects to be written
529 		 */
530 		crc_hdr = iscsi_crc32c((char *)ihp,
531 		    sizeof (iscsi_hdr_t) + ihp->hlength * 4);
532 
533 		iovec[iovlen].iov_base = (void *)&crc_hdr;
534 		iovec[iovlen].iov_len  = sizeof (crc_hdr);
535 		total_len += sizeof (crc_hdr);
536 		iovlen++;
537 	}
538 
539 	/* Let's transmit the data if any. */
540 	data_len = ntoh24(ihp->dlength);
541 
542 	if (data_len) {
543 
544 		ASSERT(iovlen < ISCSI_MAX_IOVEC);
545 		iovec[iovlen].iov_base = (void *)data;
546 		iovec[iovlen].iov_len  = data_len;
547 		total_len += data_len;
548 		iovlen++;
549 
550 		pad_len = ((ISCSI_PAD_WORD_LEN -
551 		    (data_len & (ISCSI_PAD_WORD_LEN - 1))) &
552 		    (ISCSI_PAD_WORD_LEN - 1));
553 
554 		/* Let's transmit the data pad if any. */
555 		if (pad_len) {
556 
557 			ASSERT(iovlen < ISCSI_MAX_IOVEC);
558 			pad = 0;
559 			iovec[iovlen].iov_base = (void *)&pad;
560 			iovec[iovlen].iov_len  = pad_len;
561 			total_len += pad_len;
562 			iovlen++;
563 		}
564 
565 		/* Let's transmit the data digest if we have to. */
566 		if ((flags & ISCSI_NET_DATA_DIGEST) != 0) {
567 
568 			ASSERT(iovlen < ISCSI_MAX_IOVEC);
569 			/*
570 			 * Converting the calculated CRC via htonl is not
571 			 * necessary because iscsi_crc32c calculates the
572 			 * value as it expects to be written
573 			 */
574 			crc_data = iscsi_crc32c(data, data_len);
575 			crc_data = iscsi_crc32c_continued(
576 			    (char *)&pad, pad_len, crc_data);
577 
578 			iovec[iovlen].iov_base = (void *)&crc_data;
579 			iovec[iovlen].iov_len  = sizeof (crc_data);
580 			total_len += sizeof (crc_data);
581 			iovlen++;
582 		}
583 	}
584 
585 	DTRACE_PROBE4(tx, void *, socket, iovec_t *, &iovec[0],
586 	    int, iovlen, int, total_len);
587 
588 	/* Initialization of the message header. */
589 	bzero(&msg, sizeof (msg));
590 	msg.msg_iov	= &iovec[0];
591 	msg.msg_flags	= MSG_WAITALL;
592 	msg.msg_iovlen	= iovlen;
593 
594 	send_len = iscsi_net->sendmsg(socket, &msg);
595 	DTRACE_PROBE2(sendmsg, size_t, total_len, size_t, send_len);
596 	if (total_len != send_len) {
597 		return (ISCSI_STATUS_TCP_TX_ERROR);
598 	}
599 	return (ISCSI_STATUS_SUCCESS);
600 }
601 
602 /*
603  * iscsi_net_recvhdr - receive iscsi hdr on socket
604  */
605 static iscsi_status_t
606 iscsi_net_recvhdr(void *socket, iscsi_hdr_t *ihp, int header_length,
607     int timeout, int flags)
608 {
609 	iovec_t		    iov[ISCSI_MAX_IOVEC];
610 	int		    iovlen		= 1;
611 	int		    total_len		= 0;
612 	uint32_t	    crc_actual		= 0;
613 	uint32_t	    crc_calculated	= 0;
614 	char		    *adhdr		= NULL;
615 	int		    adhdr_length	= 0;
616 	struct msghdr	    msg;
617 	size_t		    recv_len;
618 
619 	ASSERT(socket != NULL);
620 	ASSERT(ihp != NULL);
621 
622 	if (header_length < sizeof (iscsi_hdr_t)) {
623 		ASSERT(FALSE);
624 		return (ISCSI_STATUS_INTERNAL_ERROR);
625 	}
626 
627 	/*
628 	 * Receive primary header
629 	 */
630 	iov[0].iov_base = (char *)ihp;
631 	iov[0].iov_len = sizeof (iscsi_hdr_t);
632 
633 	bzero(&msg, sizeof (msg));
634 	msg.msg_iov	= iov;
635 	msg.msg_flags	= MSG_WAITALL;
636 	msg.msg_iovlen	= iovlen;
637 
638 	recv_len = iscsi_net->recvmsg(socket, &msg, timeout);
639 	if (recv_len != sizeof (iscsi_hdr_t)) {
640 		return (ISCSI_STATUS_TCP_RX_ERROR);
641 	}
642 
643 	DTRACE_PROBE2(rx_hdr, void *, socket, iovec_t *iop, &iov[0]);
644 
645 	/* verify incoming opcode is a valid operation */
646 	if (is_incoming_opcode_invalid[ihp->opcode]) {
647 		cmn_err(CE_WARN, "iscsi connection(%p) protocol error - "
648 		    "received an unsupported opcode:0x%02x",
649 		    socket, ihp->opcode);
650 		return (ISCSI_STATUS_PROTOCOL_ERROR);
651 	}
652 
653 	/*
654 	 * Setup receipt of additional header
655 	 */
656 	if (ihp->hlength > 0) {
657 		adhdr = ((char *)ihp) + sizeof (iscsi_hdr_t);
658 		adhdr_length = header_length - sizeof (iscsi_hdr_t);
659 		/* make sure enough space is available for adhdr */
660 		if (ihp->hlength > adhdr_length) {
661 			ASSERT(FALSE);
662 			return (ISCSI_STATUS_INTERNAL_ERROR);
663 		}
664 
665 		ASSERT(iovlen < ISCSI_MAX_IOVEC);
666 		iov[iovlen].iov_base = adhdr;
667 		iov[iovlen].iov_len = adhdr_length;
668 		total_len += adhdr_length;
669 		iovlen++;
670 	}
671 
672 	/*
673 	 * Setup receipt of header digest if enabled and connection
674 	 * is in full feature mode.
675 	 */
676 	if ((flags & ISCSI_NET_HEADER_DIGEST) != 0) {
677 		ASSERT(iovlen < ISCSI_MAX_IOVEC);
678 		iov[iovlen].iov_base = (char *)&crc_actual;
679 		iov[iovlen].iov_len = sizeof (uint32_t);
680 		total_len += sizeof (uint32_t);
681 		iovlen++;
682 	}
683 
684 	/*
685 	 * Read additional header and/or header digest if pieces
686 	 * are available
687 	 */
688 	if (iovlen > 1) {
689 
690 		bzero(&msg, sizeof (msg));
691 		msg.msg_iov	= iov;
692 		msg.msg_flags	= MSG_WAITALL;
693 		msg.msg_iovlen	= iovlen;
694 
695 		recv_len = iscsi_net->recvmsg(socket, &msg, timeout);
696 		if (recv_len != total_len) {
697 			return (ISCSI_STATUS_TCP_RX_ERROR);
698 		}
699 
700 		DTRACE_PROBE4(rx_adhdr_digest, void *, socket,
701 		    iovec_t *iop, &iov[0], int, iovlen, int, total_len);
702 
703 		/*
704 		 * Verify header digest if enabled and connection
705 		 * is in full feature mode
706 		 */
707 		if ((flags & ISCSI_NET_HEADER_DIGEST) != 0) {
708 			crc_calculated = iscsi_crc32c((uchar_t *)ihp,
709 			    sizeof (iscsi_hdr_t) + ihp->hlength * 4);
710 
711 			/*
712 			 * Converting actual CRC read via ntohl is not
713 			 * necessary because iscsi_crc32c calculates the
714 			 * value as it expect to be read
715 			 */
716 			if (crc_calculated != crc_actual) {
717 				/* Invalid Header Digest */
718 				cmn_err(CE_WARN, "iscsi connection(%p) "
719 				    "protocol error - encountered a header "
720 				    "digest error expected:0x%08x "
721 				    "received:0x%08x", socket,
722 				    crc_calculated, crc_actual);
723 				return (ISCSI_STATUS_HEADER_DIGEST_ERROR);
724 			}
725 		}
726 	}
727 	return (ISCSI_STATUS_SUCCESS);
728 }
729 
730 
731 /*
732  * iscsi_net_recvdata - receive iscsi data payload from socket
733  */
734 static iscsi_status_t
735 iscsi_net_recvdata(void *socket, iscsi_hdr_t *ihp, char *data,
736     int max_data_length, int timeout, int flags)
737 {
738 	struct iovec	iov[3];
739 	int		iovlen			= 1;
740 	int		total_len		= 0;
741 	int		dlength			= 0;
742 	int		pad_len			= 0;
743 	uint8_t		pad[ISCSI_PAD_WORD_LEN];
744 	uint32_t	crc_calculated		= 0;
745 	uint32_t	crc_actual		= 0;
746 	struct msghdr	msg;
747 	size_t		recv_len;
748 
749 	ASSERT(socket != NULL);
750 	ASSERT(ihp != NULL);
751 	ASSERT(data != NULL);
752 
753 	/* short hand dlength */
754 	dlength = ntoh24(ihp->dlength);
755 
756 	/* verify dlength is valid */
757 	if (dlength > max_data_length) {
758 		cmn_err(CE_WARN, "iscsi connection(%p) protocol error - "
759 		    "invalid data lengths itt:0x%x received:0x%x "
760 		    "max expected:0x%x", socket, ihp->itt,
761 		    dlength, max_data_length);
762 		return (ISCSI_STATUS_PROTOCOL_ERROR);
763 	}
764 
765 	if (dlength) {
766 		/* calculate pad */
767 		pad_len = ((ISCSI_PAD_WORD_LEN -
768 		    (dlength & (ISCSI_PAD_WORD_LEN - 1))) &
769 		    (ISCSI_PAD_WORD_LEN - 1));
770 
771 		/* setup data iovec */
772 		iov[0].iov_base	= (char *)data;
773 		iov[0].iov_len	= dlength;
774 		total_len	= dlength;
775 
776 		/* if pad setup pad iovec */
777 		if (pad_len) {
778 			iov[iovlen].iov_base	= (char *)&pad;
779 			iov[iovlen].iov_len	= pad_len;
780 			total_len		+= pad_len;
781 			iovlen++;
782 		}
783 
784 		/* setup data digest */
785 		if ((flags & ISCSI_NET_DATA_DIGEST) != 0) {
786 			iov[iovlen].iov_base	= (char *)&crc_actual;
787 			iov[iovlen].iov_len	= sizeof (crc_actual);
788 			total_len		+= sizeof (crc_actual);
789 			iovlen++;
790 		}
791 
792 		bzero(&msg, sizeof (msg));
793 		msg.msg_iov	= iov;
794 		msg.msg_flags	= MSG_WAITALL;
795 		msg.msg_iovlen	= iovlen;
796 
797 		recv_len = iscsi_net->recvmsg(socket, &msg, timeout);
798 		if (recv_len != total_len) {
799 			return (ISCSI_STATUS_TCP_RX_ERROR);
800 		}
801 
802 		DTRACE_PROBE4(rx_data, void *, socket, iovec_t *iop,
803 		    &iov[0], int, iovlen, int, total_len);
804 
805 		/* verify data digest is present */
806 		if ((flags & ISCSI_NET_DATA_DIGEST) != 0) {
807 
808 			crc_calculated = iscsi_crc32c(data, dlength);
809 			crc_calculated = iscsi_crc32c_continued(
810 			    (char *)&pad, pad_len, crc_calculated);
811 
812 			/*
813 			 * Converting actual CRC read via ntohl is not
814 			 * necessary because iscsi_crc32c calculates the
815 			 * value as it expects to be read
816 			 */
817 			if (crc_calculated != crc_actual) {
818 				cmn_err(CE_WARN, "iscsi connection(%p) "
819 				    "protocol error - encountered a data "
820 				    "digest error itt:0x%x expected:0x%08x "
821 				    "received:0x%08x", socket,
822 				    ihp->itt, crc_calculated, crc_actual);
823 				return (ISCSI_STATUS_DATA_DIGEST_ERROR);
824 			}
825 		}
826 	}
827 	return (ISCSI_STATUS_SUCCESS);
828 }
829 
830 /*
831  * Convert a prefix length to a mask.
832  */
833 static iscsi_status_t
834 iscsi_prefixlentomask(int prefixlen, int maxlen, uchar_t *mask)
835 {
836 	if (prefixlen < 0 || prefixlen > maxlen || mask == NULL) {
837 		return (ISCSI_STATUS_INTERNAL_ERROR);
838 	}
839 
840 	while (prefixlen > 0) {
841 		if (prefixlen >= 8) {
842 			*mask = 0xff;
843 			mask++;
844 			prefixlen = prefixlen - 8;
845 			continue;
846 		}
847 		*mask = *mask | (1 << (8 - prefixlen));
848 		prefixlen--;
849 	}
850 	return (ISCSI_STATUS_SUCCESS);
851 }
852 
853 iscsi_status_t
854 iscsi_net_interface(boolean_t reset)
855 {
856 	struct in_addr	braddr;
857 	struct in_addr	subnet;
858 	struct in_addr	myaddr;
859 	struct in_addr	defgateway;
860 	struct in6_addr myaddr6;
861 	struct in6_addr subnet6;
862 	uchar_t		mask_prefix = 0;
863 	int		mask_bits   = 1;
864 	TIUSER		*tiptr;
865 	TIUSER		*tiptr6;
866 	char		ifname[16]	= {0};
867 	iscsi_status_t	status;
868 
869 	struct knetconfig dl_udp_netconf = {
870 	    NC_TPI_CLTS,
871 	    NC_INET,
872 	    NC_UDP,
873 	    0, };
874 	struct knetconfig dl_udp6_netconf = {
875 	    NC_TPI_CLTS,
876 	    NC_INET6,
877 	    NC_UDP,
878 	    0, };
879 
880 	(void) strlcpy(ifname, rootfs.bo_ifname, sizeof (ifname));
881 
882 	if (iscsiboot_prop->boot_nic.sin_family == AF_INET) {
883 		/*
884 		 * Assumes only one linkage array element.
885 		 */
886 		dl_udp_netconf.knc_rdev =
887 		    makedevice(clone_major, ddi_name_to_major("udp"));
888 
889 		myaddr.s_addr =
890 		    iscsiboot_prop->boot_nic.nic_ip_u.u_in4.s_addr;
891 
892 		mask_prefix = iscsiboot_prop->boot_nic.sub_mask_prefix;
893 		(void) memset(&subnet.s_addr, 0, sizeof (subnet));
894 		status = iscsi_prefixlentomask(mask_prefix, IP_4_BITS,
895 		    (uchar_t *)&subnet.s_addr);
896 		if (status != ISCSI_STATUS_SUCCESS) {
897 			return (status);
898 		}
899 
900 		mask_bits = mask_bits << (IP_4_BITS - mask_prefix);
901 		mask_bits = mask_bits - 1;
902 		/*
903 		 * Set the last mask bits of the ip address with 1, then
904 		 * we can get the broadcast address.
905 		 */
906 		braddr.s_addr = myaddr.s_addr | mask_bits;
907 
908 		defgateway.s_addr =
909 		    iscsiboot_prop->boot_nic.nic_gw_u.u_in4.s_addr;
910 
911 		/* initialize interface */
912 		if (t_kopen((file_t *)NULL, dl_udp_netconf.knc_rdev,
913 		    FREAD|FWRITE, &tiptr, CRED()) == 0) {
914 			int	ret	= 0;
915 			if (reset == B_TRUE) {
916 				ret = kdlifconfig(tiptr, AF_INET, &myaddr,
917 				    &subnet, NULL, NULL, ifname);
918 			} else if (defgateway.s_addr == 0) {
919 				/* No default gate way specified */
920 				ret = kdlifconfig(tiptr, AF_INET, &myaddr,
921 				    &subnet, &braddr, NULL, ifname);
922 			} else {
923 				ret = kdlifconfig(tiptr, AF_INET, &myaddr,
924 				    &subnet, &braddr, &defgateway, ifname);
925 			}
926 			if (ret != 0) {
927 				(void) t_kclose(tiptr, 0);
928 				cmn_err(CE_WARN, "Failed to configure"
929 				    " iSCSI boot nic");
930 				return (ISCSI_STATUS_INTERNAL_ERROR);
931 			}
932 			(void) t_kclose(tiptr, 0);
933 		} else {
934 			cmn_err(CE_WARN, "Failed to configure"
935 			    " iSCSI boot nic");
936 			return (ISCSI_STATUS_INTERNAL_ERROR);
937 		}
938 		return (ISCSI_STATUS_SUCCESS);
939 	} else {
940 		dl_udp6_netconf.knc_rdev =
941 		    makedevice(clone_major, ddi_name_to_major("udp6"));
942 
943 		bcopy(&iscsiboot_prop->boot_nic.nic_ip_u.u_in6.s6_addr,
944 		    &myaddr6.s6_addr, 16);
945 
946 		(void) memset(&subnet6, 0, sizeof (subnet6));
947 		mask_prefix = iscsiboot_prop->boot_nic.sub_mask_prefix;
948 		status = iscsi_prefixlentomask(mask_prefix, IP_6_BITS,
949 		    (uchar_t *)&subnet6.s6_addr);
950 		if (status != ISCSI_STATUS_SUCCESS) {
951 			return (status);
952 		}
953 
954 		if (t_kopen((file_t *)NULL, dl_udp6_netconf.knc_rdev,
955 		    FREAD|FWRITE, &tiptr6, CRED()) == 0) {
956 			if (kdlifconfig(tiptr6, AF_INET6, &myaddr6,
957 			    &subnet6, NULL, NULL, ifname)) {
958 				cmn_err(CE_WARN, "Failed to configure"
959 				    " iSCSI boot nic");
960 				(void) t_kclose(tiptr6, 0);
961 				return (ISCSI_STATUS_INTERNAL_ERROR);
962 			}
963 			(void) t_kclose(tiptr6, 0);
964 		} else {
965 			cmn_err(CE_WARN, "Failed to configure"
966 			    " iSCSI boot nic");
967 			return (ISCSI_STATUS_INTERNAL_ERROR);
968 		}
969 		return (ISCSI_STATUS_SUCCESS);
970 	}
971 }
972