xref: /titanic_50/usr/src/uts/common/io/idm/idm_impl.c (revision 286caa64ad16838107c004ca772b1d153e7c7510)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/conf.h>
28 #include <sys/file.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/cpuvar.h>
32 
33 #include <sys/socket.h>
34 #include <sys/strsubr.h>
35 #include <sys/socketvar.h>
36 #include <sys/sysmacros.h>
37 
38 #include <sys/idm/idm.h>
39 #include <sys/idm/idm_so.h>
40 
41 extern idm_transport_t  idm_transport_list[];
42 
43 void
44 idm_pdu_rx(idm_conn_t *ic, idm_pdu_t *pdu)
45 {
46 	iscsi_async_evt_hdr_t *async_evt;
47 
48 	/*
49 	 * If we are in full-featured mode then route SCSI-related
50 	 * commands to the appropriate function vector
51 	 */
52 	ic->ic_timestamp = ddi_get_lbolt();
53 	mutex_enter(&ic->ic_state_mutex);
54 	if (ic->ic_ffp && ic->ic_pdu_events == 0) {
55 		mutex_exit(&ic->ic_state_mutex);
56 
57 		if (idm_pdu_rx_forward_ffp(ic, pdu) == B_TRUE) {
58 			/* Forwarded SCSI-related commands */
59 			return;
60 		}
61 		mutex_enter(&ic->ic_state_mutex);
62 	}
63 
64 	/*
65 	 * If we get here with a SCSI-related PDU then we are not in
66 	 * full-feature mode and the PDU is a protocol error (SCSI command
67 	 * PDU's may sometimes be an exception, see below).  All
68 	 * non-SCSI PDU's get treated them the same regardless of whether
69 	 * we are in full-feature mode.
70 	 *
71 	 * Look at the opcode and in some cases the PDU status and
72 	 * determine the appropriate event to send to the connection
73 	 * state machine.  Generate the event, passing the PDU as data.
74 	 * If the current connection state allows reception of the event
75 	 * the PDU will be submitted to the IDM client for processing,
76 	 * otherwise the PDU will be dropped.
77 	 */
78 	switch (IDM_PDU_OPCODE(pdu)) {
79 	case ISCSI_OP_LOGIN_CMD:
80 		idm_conn_rx_pdu_event(ic, CE_LOGIN_RCV, (uintptr_t)pdu);
81 		break;
82 	case ISCSI_OP_LOGIN_RSP:
83 		idm_parse_login_rsp(ic, pdu, /* RX */ B_TRUE);
84 		break;
85 	case ISCSI_OP_LOGOUT_CMD:
86 		idm_parse_logout_req(ic, pdu, /* RX */ B_TRUE);
87 		break;
88 	case ISCSI_OP_LOGOUT_RSP:
89 		idm_parse_logout_rsp(ic, pdu, /* RX */ B_TRUE);
90 		break;
91 	case ISCSI_OP_ASYNC_EVENT:
92 		async_evt = (iscsi_async_evt_hdr_t *)pdu->isp_hdr;
93 		switch (async_evt->async_event) {
94 		case ISCSI_ASYNC_EVENT_REQUEST_LOGOUT:
95 			idm_conn_rx_pdu_event(ic, CE_ASYNC_LOGOUT_RCV,
96 			    (uintptr_t)pdu);
97 			break;
98 		case ISCSI_ASYNC_EVENT_DROPPING_CONNECTION:
99 			idm_conn_rx_pdu_event(ic, CE_ASYNC_DROP_CONN_RCV,
100 			    (uintptr_t)pdu);
101 			break;
102 		case ISCSI_ASYNC_EVENT_DROPPING_ALL_CONNECTIONS:
103 			idm_conn_rx_pdu_event(ic, CE_ASYNC_DROP_ALL_CONN_RCV,
104 			    (uintptr_t)pdu);
105 			break;
106 		case ISCSI_ASYNC_EVENT_SCSI_EVENT:
107 		case ISCSI_ASYNC_EVENT_PARAM_NEGOTIATION:
108 		default:
109 			idm_conn_rx_pdu_event(ic, CE_MISC_RX,
110 			    (uintptr_t)pdu);
111 			break;
112 		}
113 		break;
114 	case ISCSI_OP_SCSI_CMD:
115 		/*
116 		 * Consider this scenario:  We are a target connection
117 		 * in "in login" state and a "login success sent" event has
118 		 * been generated but not yet handled.  Since we've sent
119 		 * the login response but we haven't actually transitioned
120 		 * to FFP mode we might conceivably receive a SCSI command
121 		 * from the initiator before we are ready.  We are actually
122 		 * in FFP we just don't know it yet -- to address this we
123 		 * can generate an event corresponding to the SCSI command.
124 		 * At the point when the event is handled by the state
125 		 * machine the login request will have been handled and we
126 		 * should be in FFP.  If we are not in FFP by that time
127 		 * we can reject the SCSI command with a protocol error.
128 		 *
129 		 * This scenario only applies to the target.
130 		 */
131 	case ISCSI_OP_SCSI_DATA:
132 	case ISCSI_OP_SCSI_DATA_RSP:
133 	case ISCSI_OP_RTT_RSP:
134 	case ISCSI_OP_SNACK_CMD:
135 	case ISCSI_OP_NOOP_IN:
136 	case ISCSI_OP_NOOP_OUT:
137 	case ISCSI_OP_TEXT_CMD:
138 	case ISCSI_OP_TEXT_RSP:
139 	case ISCSI_OP_REJECT_MSG:
140 	case ISCSI_OP_SCSI_TASK_MGT_MSG:
141 	case ISCSI_OP_SCSI_TASK_MGT_RSP:
142 		/* Validate received PDU against current state */
143 		idm_conn_rx_pdu_event(ic, CE_MISC_RX,
144 		    (uintptr_t)pdu);
145 		break;
146 	}
147 	mutex_exit(&ic->ic_state_mutex);
148 }
149 
150 void
151 idm_pdu_tx_forward(idm_conn_t *ic, idm_pdu_t *pdu)
152 {
153 	(*ic->ic_transport_ops->it_tx_pdu)(ic, pdu);
154 }
155 
156 boolean_t
157 idm_pdu_rx_forward_ffp(idm_conn_t *ic, idm_pdu_t *pdu)
158 {
159 	/*
160 	 * If this is an FFP request, call the appropriate handler
161 	 * and return B_TRUE, otherwise return B_FALSE.
162 	 */
163 	switch (IDM_PDU_OPCODE(pdu)) {
164 	case ISCSI_OP_SCSI_CMD:
165 		(*ic->ic_conn_ops.icb_rx_scsi_cmd)(ic, pdu);
166 		return (B_TRUE);
167 	case ISCSI_OP_SCSI_RSP:
168 		(*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
169 		return (B_TRUE);
170 	case ISCSI_OP_SCSI_DATA:
171 		(*ic->ic_transport_ops->it_rx_dataout)(ic, pdu);
172 		return (B_TRUE);
173 	case ISCSI_OP_SCSI_DATA_RSP:
174 		(*ic->ic_transport_ops->it_rx_datain)(ic, pdu);
175 		return (B_TRUE);
176 	case ISCSI_OP_RTT_RSP:
177 		(*ic->ic_transport_ops->it_rx_rtt)(ic, pdu);
178 		return (B_TRUE);
179 	case ISCSI_OP_SCSI_TASK_MGT_MSG:
180 	case ISCSI_OP_SCSI_TASK_MGT_RSP:
181 	case ISCSI_OP_TEXT_CMD:
182 	case ISCSI_OP_TEXT_RSP:
183 	case ISCSI_OP_NOOP_OUT:
184 	case ISCSI_OP_NOOP_IN:
185 		(*ic->ic_conn_ops.icb_rx_misc)(ic, pdu);
186 		return (B_TRUE);
187 	default:
188 		return (B_FALSE);
189 	}
190 	/*NOTREACHED*/
191 }
192 
193 void
194 idm_pdu_rx_forward(idm_conn_t *ic, idm_pdu_t *pdu)
195 {
196 	/*
197 	 * Some PDU's specific to FFP get special handling.  This function
198 	 * will normally never be called in FFP with an FFP PDU since this
199 	 * is a slow path but in can happen on the target side during
200 	 * the transition to FFP.  We primarily call
201 	 * idm_pdu_rx_forward_ffp here to avoid code duplication.
202 	 */
203 	if (idm_pdu_rx_forward_ffp(ic, pdu) == B_FALSE) {
204 		/*
205 		 * Non-FFP PDU, use generic RC handler
206 		 */
207 		(*ic->ic_conn_ops.icb_rx_misc)(ic, pdu);
208 	}
209 }
210 
211 void
212 idm_parse_login_rsp(idm_conn_t *ic, idm_pdu_t *login_rsp_pdu, boolean_t rx)
213 {
214 	iscsi_login_rsp_hdr_t	*login_rsp =
215 	    (iscsi_login_rsp_hdr_t *)login_rsp_pdu->isp_hdr;
216 	idm_conn_event_t	new_event;
217 
218 	if (login_rsp->status_class == ISCSI_STATUS_CLASS_SUCCESS) {
219 		if (!(login_rsp->flags & ISCSI_FLAG_LOGIN_CONTINUE) &&
220 		    (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) &&
221 		    (ISCSI_LOGIN_NEXT_STAGE(login_rsp->flags) ==
222 		    ISCSI_FULL_FEATURE_PHASE)) {
223 			new_event = (rx ? CE_LOGIN_SUCCESS_RCV :
224 			    CE_LOGIN_SUCCESS_SND);
225 		} else {
226 			new_event = (rx ? CE_MISC_RX : CE_MISC_TX);
227 		}
228 	} else {
229 		new_event = (rx ? CE_LOGIN_FAIL_RCV : CE_LOGIN_FAIL_SND);
230 	}
231 
232 	if (rx) {
233 		idm_conn_rx_pdu_event(ic, new_event, (uintptr_t)login_rsp_pdu);
234 	} else {
235 		idm_conn_tx_pdu_event(ic, new_event, (uintptr_t)login_rsp_pdu);
236 	}
237 }
238 
239 
240 void
241 idm_parse_logout_req(idm_conn_t *ic, idm_pdu_t *logout_req_pdu, boolean_t rx)
242 {
243 	iscsi_logout_hdr_t 	*logout_req =
244 	    (iscsi_logout_hdr_t *)logout_req_pdu->isp_hdr;
245 	idm_conn_event_t	new_event;
246 	uint8_t			reason =
247 	    (logout_req->flags & ISCSI_FLAG_LOGOUT_REASON_MASK);
248 
249 	/*
250 	 *	For a normal logout (close connection or close session) IDM
251 	 *	will terminate processing of all tasks completing the tasks
252 	 *	back to the client with a status indicating the connection
253 	 *	was logged out.  These tasks do not get completed.
254 	 *
255 	 *	For a "close connection for recovery logout) IDM suspends
256 	 *	processing of all tasks and completes them back to the client
257 	 *	with a status indicating connection was logged out for
258 	 *	recovery.  Both initiator and target hang onto these tasks.
259 	 *	When we add ERL2 support IDM will need to provide mechanisms
260 	 *	to change the task and buffer associations to a new connection.
261 	 *
262 	 *	This code doesn't address the possibility of MC/S.  We'll
263 	 *	need to decide how the separate connections get handled
264 	 *	in that case.  One simple option is to make the client
265 	 *	generate the events for the other connections.
266 	 */
267 	if (reason == ISCSI_LOGOUT_REASON_CLOSE_SESSION) {
268 		new_event =
269 		    (rx ? CE_LOGOUT_SESSION_RCV : CE_LOGOUT_SESSION_SND);
270 	} else if ((reason == ISCSI_LOGOUT_REASON_CLOSE_CONNECTION) ||
271 	    (reason == ISCSI_LOGOUT_REASON_RECOVERY)) {
272 		/* Check logout CID against this connection's CID */
273 		if (ntohs(logout_req->cid) == ic->ic_login_cid) {
274 			/* Logout is for this connection */
275 			new_event = (rx ? CE_LOGOUT_THIS_CONN_RCV :
276 			    CE_LOGOUT_THIS_CONN_SND);
277 		} else {
278 			/*
279 			 * Logout affects another connection.  This is not
280 			 * a relevant event for this connection so we'll
281 			 * just treat it as a normal PDU event.  Client
282 			 * will need to lookup the other connection and
283 			 * generate the event.
284 			 */
285 			new_event = (rx ? CE_MISC_RX : CE_MISC_TX);
286 		}
287 	} else {
288 		/* Invalid reason code */
289 		new_event = (rx ? CE_RX_PROTOCOL_ERROR : CE_TX_PROTOCOL_ERROR);
290 	}
291 
292 	if (rx) {
293 		idm_conn_rx_pdu_event(ic, new_event, (uintptr_t)logout_req_pdu);
294 	} else {
295 		idm_conn_tx_pdu_event(ic, new_event, (uintptr_t)logout_req_pdu);
296 	}
297 }
298 
299 
300 
301 void
302 idm_parse_logout_rsp(idm_conn_t *ic, idm_pdu_t *logout_rsp_pdu, boolean_t rx)
303 {
304 	idm_conn_event_t	new_event;
305 	iscsi_logout_rsp_hdr_t *logout_rsp =
306 	    (iscsi_logout_rsp_hdr_t *)logout_rsp_pdu->isp_hdr;
307 
308 	if (logout_rsp->response == ISCSI_STATUS_CLASS_SUCCESS) {
309 		new_event = rx ? CE_LOGOUT_SUCCESS_RCV : CE_LOGOUT_SUCCESS_SND;
310 	} else {
311 		new_event = rx ? CE_LOGOUT_FAIL_RCV : CE_LOGOUT_FAIL_SND;
312 	}
313 
314 	if (rx) {
315 		idm_conn_rx_pdu_event(ic, new_event, (uintptr_t)logout_rsp_pdu);
316 	} else {
317 		idm_conn_tx_pdu_event(ic, new_event, (uintptr_t)logout_rsp_pdu);
318 	}
319 }
320 
321 /*
322  * idm_svc_conn_create()
323  * Transport-agnostic service connection creation, invoked from the transport
324  * layer.
325  */
326 idm_status_t
327 idm_svc_conn_create(idm_svc_t *is, idm_transport_type_t tt,
328     idm_conn_t **ic_result)
329 {
330 	idm_conn_t	*ic;
331 	idm_status_t	rc;
332 
333 	mutex_enter(&is->is_mutex);
334 	if (!is->is_online) {
335 		mutex_exit(&is->is_mutex);
336 		return (IDM_STATUS_FAIL);
337 	}
338 	mutex_exit(&is->is_mutex);
339 
340 	ic = idm_conn_create_common(CONN_TYPE_TGT, tt,
341 	    &is->is_svc_req.sr_conn_ops);
342 	ic->ic_svc_binding = is;
343 
344 	/*
345 	 * Prepare connection state machine
346 	 */
347 	if ((rc = idm_conn_sm_init(ic)) != 0) {
348 		idm_conn_destroy_common(ic);
349 		return (rc);
350 	}
351 
352 
353 	*ic_result = ic;
354 
355 	mutex_enter(&idm.idm_global_mutex);
356 	list_insert_tail(&idm.idm_tgt_conn_list, ic);
357 	idm.idm_tgt_conn_count++;
358 	mutex_exit(&idm.idm_global_mutex);
359 
360 	return (IDM_STATUS_SUCCESS);
361 }
362 
363 void
364 idm_svc_conn_destroy(idm_conn_t *ic)
365 {
366 	mutex_enter(&idm.idm_global_mutex);
367 	list_remove(&idm.idm_tgt_conn_list, ic);
368 	idm.idm_tgt_conn_count--;
369 	mutex_exit(&idm.idm_global_mutex);
370 
371 	if (ic->ic_transport_private != NULL) {
372 		ic->ic_transport_ops->it_tgt_conn_destroy(ic);
373 	}
374 	idm_conn_destroy_common(ic);
375 }
376 
377 /*
378  * idm_conn_create_common()
379  *
380  * Allocate and initialize IDM connection context
381  */
382 idm_conn_t *
383 idm_conn_create_common(idm_conn_type_t conn_type, idm_transport_type_t tt,
384     idm_conn_ops_t *conn_ops)
385 {
386 	idm_conn_t		*ic;
387 	idm_transport_t		*it;
388 	idm_transport_type_t	type;
389 
390 	for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
391 		it = &idm_transport_list[type];
392 
393 		if ((it->it_ops != NULL) && (it->it_type == tt))
394 			break;
395 	}
396 	ASSERT(it->it_type == tt);
397 	if (it->it_type != tt)
398 		return (NULL);
399 
400 	ic = kmem_zalloc(sizeof (idm_conn_t), KM_SLEEP);
401 
402 	/* Initialize data */
403 	ic->ic_conn_type = conn_type;
404 	ic->ic_conn_ops = *conn_ops;
405 	ic->ic_transport_ops = it->it_ops;
406 	ic->ic_transport_type = tt;
407 	ic->ic_transport_private = NULL; /* Set by transport service */
408 	ic->ic_internal_cid = idm_cid_alloc();
409 	if (ic->ic_internal_cid == 0) {
410 		kmem_free(ic, sizeof (idm_conn_t));
411 		return (NULL);
412 	}
413 	mutex_init(&ic->ic_mutex, NULL, MUTEX_DEFAULT, NULL);
414 	cv_init(&ic->ic_cv, NULL, CV_DEFAULT, NULL);
415 	idm_refcnt_init(&ic->ic_refcnt, ic);
416 
417 	return (ic);
418 }
419 
420 void
421 idm_conn_destroy_common(idm_conn_t *ic)
422 {
423 	idm_conn_sm_fini(ic);
424 	idm_refcnt_destroy(&ic->ic_refcnt);
425 	cv_destroy(&ic->ic_cv);
426 	mutex_destroy(&ic->ic_mutex);
427 	idm_cid_free(ic->ic_internal_cid);
428 
429 	kmem_free(ic, sizeof (idm_conn_t));
430 }
431 
432 /*
433  * Invoked from the SM as a result of client's invocation of
434  * idm_ini_conn_connect()
435  */
436 idm_status_t
437 idm_ini_conn_finish(idm_conn_t *ic)
438 {
439 	/* invoke transport-specific connection */
440 	return (ic->ic_transport_ops->it_ini_conn_connect(ic));
441 }
442 
443 idm_status_t
444 idm_tgt_conn_finish(idm_conn_t *ic)
445 {
446 	idm_status_t rc;
447 
448 	rc = idm_notify_client(ic, CN_CONNECT_ACCEPT, NULL);
449 	if (rc != IDM_STATUS_SUCCESS) {
450 		return (IDM_STATUS_REJECT);
451 	}
452 
453 	/* Target client is ready to receive a login, start connection */
454 	return (ic->ic_transport_ops->it_tgt_conn_connect(ic));
455 }
456 
457 idm_transport_t *
458 idm_transport_lookup(idm_conn_req_t *cr)
459 {
460 	idm_transport_type_t	type;
461 	idm_transport_t		*it;
462 	idm_transport_caps_t	caps;
463 
464 	/*
465 	 * Make sure all available transports are setup.  We call this now
466 	 * instead of at initialization time in case IB has become available
467 	 * since we started (hotplug, etc).
468 	 */
469 	idm_transport_setup(cr->cr_li);
470 
471 	/* Determine the transport for this connection */
472 	for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
473 		it = &idm_transport_list[type];
474 
475 		if (it->it_ops == NULL) {
476 			/* transport is not registered */
477 			continue;
478 		}
479 
480 		if (it->it_ops->it_conn_is_capable(cr, &caps)) {
481 			return (it);
482 		}
483 	}
484 
485 	ASSERT(0);
486 	return (NULL); /* Make gcc happy */
487 }
488 
489 void
490 idm_transport_setup(ldi_ident_t li)
491 {
492 	idm_transport_type_t	type;
493 	idm_transport_t		*it;
494 	int			rc;
495 
496 	for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
497 		it = &idm_transport_list[type];
498 		/*
499 		 * We may want to store the LDI handle in the idm_svc_t
500 		 * and then allow multiple calls to ldi_open_by_name.  This
501 		 * would enable the LDI code to track who has the device open
502 		 * which could be useful in the case where we have multiple
503 		 * services and perhaps also have initiator and target opening
504 		 * the transport simultaneously.  For now we stick with the
505 		 * plan.
506 		 */
507 		if (it->it_ops == NULL) {
508 			/* transport is not ready, try to initialize it */
509 			if (it->it_type == IDM_TRANSPORT_TYPE_SOCKETS) {
510 				idm_so_init(it);
511 			} else {
512 				rc = ldi_open_by_name(it->it_device_path,
513 				    FREAD | FWRITE, kcred, &it->it_ldi_hdl, li);
514 				/*
515 				 * If the open is successful we will have
516 				 * filled in the LDI handle in the transport
517 				 * table and we expect that the transport
518 				 * registered itself.
519 				 */
520 				if (rc != 0) {
521 					it->it_ldi_hdl = NULL;
522 				}
523 			}
524 		}
525 	}
526 }
527 
528 void
529 idm_transport_teardown()
530 {
531 	idm_transport_type_t	type;
532 	idm_transport_t		*it;
533 
534 	ASSERT(mutex_owned(&idm.idm_global_mutex));
535 
536 	/* Caller holds the IDM global mutex */
537 	for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
538 		it = &idm_transport_list[type];
539 		/* If we have an open LDI handle on this driver, close it */
540 		if (it->it_ldi_hdl != NULL) {
541 			(void) ldi_close(it->it_ldi_hdl, FNDELAY, kcred);
542 			it->it_ldi_hdl = NULL;
543 		}
544 	}
545 }
546 
547 /*
548  * ID pool code.  We use this to generate unique structure identifiers without
549  * searching the existing structures.  This avoids the need to lock entire
550  * sets of structures at inopportune times.  Adapted from the CIFS server code.
551  *
552  *    A pool of IDs is a pool of 16 bit numbers. It is implemented as a bitmap.
553  *    A bit set to '1' indicates that that particular value has been allocated.
554  *    The allocation process is done shifting a bit through the whole bitmap.
555  *    The current position of that index bit is kept in the idm_idpool_t
556  *    structure and represented by a byte index (0 to buffer size minus 1) and
557  *    a bit index (0 to 7).
558  *
559  *    The pools start with a size of 8 bytes or 64 IDs. Each time the pool runs
560  *    out of IDs its current size is doubled until it reaches its maximum size
561  *    (8192 bytes or 65536 IDs). The IDs 0 and 65535 are never given out which
562  *    means that a pool can have a maximum number of 65534 IDs available.
563  */
564 
565 static int
566 idm_idpool_increment(
567     idm_idpool_t	*pool)
568 {
569 	uint8_t		*new_pool;
570 	uint32_t	new_size;
571 
572 	ASSERT(pool->id_magic == IDM_IDPOOL_MAGIC);
573 
574 	new_size = pool->id_size * 2;
575 	if (new_size <= IDM_IDPOOL_MAX_SIZE) {
576 		new_pool = kmem_alloc(new_size / 8, KM_NOSLEEP);
577 		if (new_pool) {
578 			bzero(new_pool, new_size / 8);
579 			bcopy(pool->id_pool, new_pool, pool->id_size / 8);
580 			kmem_free(pool->id_pool, pool->id_size / 8);
581 			pool->id_pool = new_pool;
582 			pool->id_free_counter += new_size - pool->id_size;
583 			pool->id_max_free_counter += new_size - pool->id_size;
584 			pool->id_size = new_size;
585 			pool->id_idx_msk = (new_size / 8) - 1;
586 			if (new_size >= IDM_IDPOOL_MAX_SIZE) {
587 				/* id -1 made unavailable */
588 				pool->id_pool[pool->id_idx_msk] = 0x80;
589 				pool->id_free_counter--;
590 				pool->id_max_free_counter--;
591 			}
592 			return (0);
593 		}
594 	}
595 	return (-1);
596 }
597 
598 /*
599  * idm_idpool_constructor
600  *
601  * This function initializes the pool structure provided.
602  */
603 
604 int
605 idm_idpool_create(idm_idpool_t *pool)
606 {
607 
608 	ASSERT(pool->id_magic != IDM_IDPOOL_MAGIC);
609 
610 	pool->id_size = IDM_IDPOOL_MIN_SIZE;
611 	pool->id_idx_msk = (IDM_IDPOOL_MIN_SIZE / 8) - 1;
612 	pool->id_free_counter = IDM_IDPOOL_MIN_SIZE - 1;
613 	pool->id_max_free_counter = IDM_IDPOOL_MIN_SIZE - 1;
614 	pool->id_bit = 0x02;
615 	pool->id_bit_idx = 1;
616 	pool->id_idx = 0;
617 	pool->id_pool = (uint8_t *)kmem_alloc((IDM_IDPOOL_MIN_SIZE / 8),
618 	    KM_SLEEP);
619 	bzero(pool->id_pool, (IDM_IDPOOL_MIN_SIZE / 8));
620 	/* -1 id made unavailable */
621 	pool->id_pool[0] = 0x01;		/* id 0 made unavailable */
622 	mutex_init(&pool->id_mutex, NULL, MUTEX_DEFAULT, NULL);
623 	pool->id_magic = IDM_IDPOOL_MAGIC;
624 	return (0);
625 }
626 
627 /*
628  * idm_idpool_destructor
629  *
630  * This function tears down and frees the resources associated with the
631  * pool provided.
632  */
633 
634 void
635 idm_idpool_destroy(idm_idpool_t *pool)
636 {
637 	ASSERT(pool->id_magic == IDM_IDPOOL_MAGIC);
638 	ASSERT(pool->id_free_counter == pool->id_max_free_counter);
639 	pool->id_magic = (uint32_t)~IDM_IDPOOL_MAGIC;
640 	mutex_destroy(&pool->id_mutex);
641 	kmem_free(pool->id_pool, (size_t)(pool->id_size / 8));
642 }
643 
644 /*
645  * idm_idpool_alloc
646  *
647  * This function allocates an ID from the pool provided.
648  */
649 int
650 idm_idpool_alloc(idm_idpool_t *pool, uint16_t *id)
651 {
652 	uint32_t	i;
653 	uint8_t		bit;
654 	uint8_t		bit_idx;
655 	uint8_t		byte;
656 
657 	ASSERT(pool->id_magic == IDM_IDPOOL_MAGIC);
658 
659 	mutex_enter(&pool->id_mutex);
660 	if ((pool->id_free_counter == 0) && idm_idpool_increment(pool)) {
661 		mutex_exit(&pool->id_mutex);
662 		return (-1);
663 	}
664 
665 	i = pool->id_size;
666 	while (i) {
667 		bit = pool->id_bit;
668 		bit_idx = pool->id_bit_idx;
669 		byte = pool->id_pool[pool->id_idx];
670 		while (bit) {
671 			if (byte & bit) {
672 				bit = bit << 1;
673 				bit_idx++;
674 				continue;
675 			}
676 			pool->id_pool[pool->id_idx] |= bit;
677 			*id = (uint16_t)(pool->id_idx * 8 + (uint32_t)bit_idx);
678 			pool->id_free_counter--;
679 			pool->id_bit = bit;
680 			pool->id_bit_idx = bit_idx;
681 			mutex_exit(&pool->id_mutex);
682 			return (0);
683 		}
684 		pool->id_bit = 1;
685 		pool->id_bit_idx = 0;
686 		pool->id_idx++;
687 		pool->id_idx &= pool->id_idx_msk;
688 		--i;
689 	}
690 	/*
691 	 * This section of code shouldn't be reached. If there are IDs
692 	 * available and none could be found there's a problem.
693 	 */
694 	ASSERT(0);
695 	mutex_exit(&pool->id_mutex);
696 	return (-1);
697 }
698 
699 /*
700  * idm_idpool_free
701  *
702  * This function frees the ID provided.
703  */
704 void
705 idm_idpool_free(idm_idpool_t *pool, uint16_t id)
706 {
707 	ASSERT(pool->id_magic == IDM_IDPOOL_MAGIC);
708 	ASSERT(id != 0);
709 	ASSERT(id != 0xFFFF);
710 
711 	mutex_enter(&pool->id_mutex);
712 	if (pool->id_pool[id >> 3] & (1 << (id & 7))) {
713 		pool->id_pool[id >> 3] &= ~(1 << (id & 7));
714 		pool->id_free_counter++;
715 		ASSERT(pool->id_free_counter <= pool->id_max_free_counter);
716 		mutex_exit(&pool->id_mutex);
717 		return;
718 	}
719 	/* Freeing a free ID. */
720 	ASSERT(0);
721 	mutex_exit(&pool->id_mutex);
722 }
723 
724 uint32_t
725 idm_cid_alloc(void)
726 {
727 	/*
728 	 * ID pool works with 16-bit identifiers right now.  That should
729 	 * be plenty since we will probably never have more than 2^16
730 	 * connections simultaneously.
731 	 */
732 	uint16_t cid16;
733 
734 	if (idm_idpool_alloc(&idm.idm_conn_id_pool, &cid16) == -1) {
735 		return (0); /* Fail */
736 	}
737 
738 	return ((uint32_t)cid16);
739 }
740 
741 void
742 idm_cid_free(uint32_t cid)
743 {
744 	idm_idpool_free(&idm.idm_conn_id_pool, (uint16_t)cid);
745 }
746 
747 
748 /*
749  * Code for generating the header and data digests
750  *
751  * This is the CRC-32C table
752  * Generated with:
753  * width = 32 bits
754  * poly = 0x1EDC6F41
755  * reflect input bytes = true
756  * reflect output bytes = true
757  */
758 
759 uint32_t idm_crc32c_table[256] =
760 {
761 	0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
762 	0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
763 	0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
764 	0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
765 	0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
766 	0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
767 	0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
768 	0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
769 	0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
770 	0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
771 	0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
772 	0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
773 	0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
774 	0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
775 	0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
776 	0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
777 	0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
778 	0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
779 	0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
780 	0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
781 	0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
782 	0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
783 	0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
784 	0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
785 	0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
786 	0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
787 	0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
788 	0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
789 	0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
790 	0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
791 	0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
792 	0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
793 	0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
794 	0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
795 	0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
796 	0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
797 	0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
798 	0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
799 	0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
800 	0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
801 	0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
802 	0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
803 	0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
804 	0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
805 	0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
806 	0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
807 	0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
808 	0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
809 	0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
810 	0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
811 	0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
812 	0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
813 	0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
814 	0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
815 	0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
816 	0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
817 	0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
818 	0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
819 	0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
820 	0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
821 	0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
822 	0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
823 	0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
824 	0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
825 };
826 
827 /*
828  * iscsi_crc32c - Steps through buffer one byte at at time, calculates
829  * reflected crc using table.
830  */
831 uint32_t
832 idm_crc32c(void *address, unsigned long length)
833 {
834 	uint8_t *buffer = address;
835 	uint32_t crc = 0xffffffff, result;
836 #ifdef _BIG_ENDIAN
837 	uint8_t byte0, byte1, byte2, byte3;
838 #endif
839 
840 	ASSERT(address != NULL);
841 
842 	while (length--) {
843 		crc = idm_crc32c_table[(crc ^ *buffer++) & 0xFFL] ^
844 		    (crc >> 8);
845 	}
846 	result = crc ^ 0xffffffff;
847 
848 #ifdef	_BIG_ENDIAN
849 	byte0 = (uint8_t)(result & 0xFF);
850 	byte1 = (uint8_t)((result >> 8) & 0xFF);
851 	byte2 = (uint8_t)((result >> 16) & 0xFF);
852 	byte3 = (uint8_t)((result >> 24) & 0xFF);
853 	result = ((byte0 << 24) | (byte1 << 16) | (byte2 << 8) | byte3);
854 #endif	/* _BIG_ENDIAN */
855 
856 	return (result);
857 }
858 
859 
860 /*
861  * idm_crc32c_continued - Continues stepping through buffer one
862  * byte at at time, calculates reflected crc using table.
863  */
864 uint32_t
865 idm_crc32c_continued(void *address, unsigned long length, uint32_t crc)
866 {
867 	uint8_t *buffer = address;
868 	uint32_t result;
869 #ifdef	_BIG_ENDIAN
870 	uint8_t byte0, byte1, byte2, byte3;
871 #endif
872 
873 	ASSERT(address != NULL);
874 
875 #ifdef	_BIG_ENDIAN
876 	byte0 = (uint8_t)((crc >> 24) & 0xFF);
877 	byte1 = (uint8_t)((crc >> 16) & 0xFF);
878 	byte2 = (uint8_t)((crc >> 8) & 0xFF);
879 	byte3 = (uint8_t)(crc & 0xFF);
880 	crc = ((byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0);
881 #endif
882 
883 	crc = crc ^ 0xffffffff;
884 	while (length--) {
885 		crc = idm_crc32c_table[(crc ^ *buffer++) & 0xFFL] ^
886 		    (crc >> 8);
887 	}
888 	result = crc ^ 0xffffffff;
889 
890 #ifdef	_BIG_ENDIAN
891 	byte0 = (uint8_t)(result & 0xFF);
892 	byte1 = (uint8_t)((result >> 8) & 0xFF);
893 	byte2 = (uint8_t)((result >> 16) & 0xFF);
894 	byte3 = (uint8_t)((result >> 24) & 0xFF);
895 	result = ((byte0 << 24) | (byte1 << 16) | (byte2 << 8) | byte3);
896 #endif
897 	return (result);
898 }
899 
900 /* ARGSUSED */
901 int
902 idm_task_constructor(void *hdl, void *arg, int flags)
903 {
904 	idm_task_t *idt = (idm_task_t *)hdl;
905 	uint32_t next_task;
906 
907 	mutex_init(&idt->idt_mutex, NULL, MUTEX_DEFAULT, NULL);
908 
909 	/* Find the next free task ID */
910 	rw_enter(&idm.idm_taskid_table_lock, RW_WRITER);
911 	next_task = idm.idm_taskid_next;
912 	while (idm.idm_taskid_table[next_task]) {
913 		next_task++;
914 		if (next_task == idm.idm_taskid_max)
915 			next_task = 0;
916 		if (next_task == idm.idm_taskid_next) {
917 			rw_exit(&idm.idm_taskid_table_lock);
918 			return (-1);
919 		}
920 	}
921 
922 	idm.idm_taskid_table[next_task] = idt;
923 	idm.idm_taskid_next = (next_task + 1) % idm.idm_taskid_max;
924 	rw_exit(&idm.idm_taskid_table_lock);
925 
926 	idt->idt_tt = next_task;
927 
928 	list_create(&idt->idt_inbufv, sizeof (idm_buf_t),
929 	    offsetof(idm_buf_t, idb_buflink));
930 	list_create(&idt->idt_outbufv, sizeof (idm_buf_t),
931 	    offsetof(idm_buf_t, idb_buflink));
932 	idm_refcnt_init(&idt->idt_refcnt, idt);
933 
934 	/*
935 	 * Set the transport header pointer explicitly.  This removes the
936 	 * need for per-transport header allocation, which simplifies cache
937 	 * init considerably.  If at a later date we have an additional IDM
938 	 * transport that requires a different size, we'll revisit this.
939 	 */
940 	idt->idt_transport_hdr = (void *)(idt + 1); /* pointer arithmetic */
941 
942 	return (0);
943 }
944 
945 /* ARGSUSED */
946 void
947 idm_task_destructor(void *hdl, void *arg)
948 {
949 	idm_task_t *idt = (idm_task_t *)hdl;
950 
951 	/* Remove the task from the ID table */
952 	rw_enter(&idm.idm_taskid_table_lock, RW_WRITER);
953 	idm.idm_taskid_table[idt->idt_tt] = NULL;
954 	rw_exit(&idm.idm_taskid_table_lock);
955 
956 	/* free the inbuf and outbuf */
957 	idm_refcnt_destroy(&idt->idt_refcnt);
958 	list_destroy(&idt->idt_inbufv);
959 	list_destroy(&idt->idt_outbufv);
960 
961 	/*
962 	 * The final call to idm_task_rele may happen with the task
963 	 * mutex held which may invoke this destructor immediately.
964 	 * Stall here until the task mutex owner lets go.
965 	 */
966 	mutex_enter(&idt->idt_mutex);
967 	mutex_destroy(&idt->idt_mutex);
968 }
969 
970 /*
971  * idm_listbuf_insert searches from the back of the list looking for the
972  * insertion point.
973  */
974 void
975 idm_listbuf_insert(list_t *lst, idm_buf_t *buf)
976 {
977 	idm_buf_t	*idb;
978 
979 	/* iterate through the list to find the insertion point */
980 	for (idb = list_tail(lst); idb != NULL; idb = list_prev(lst, idb)) {
981 
982 		if (idb->idb_bufoffset < buf->idb_bufoffset) {
983 
984 			list_insert_after(lst, idb, buf);
985 			return;
986 		}
987 	}
988 
989 	/* add the buf to the head of the list */
990 	list_insert_head(lst, buf);
991 
992 }
993 
994 /*ARGSUSED*/
995 void
996 idm_wd_thread(void *arg)
997 {
998 	idm_conn_t	*ic;
999 	clock_t		wake_time;
1000 	clock_t		idle_time;
1001 
1002 	/* Record the thread id for thread_join() */
1003 	idm.idm_wd_thread_did = curthread->t_did;
1004 	mutex_enter(&idm.idm_global_mutex);
1005 	idm.idm_wd_thread_running = B_TRUE;
1006 	cv_signal(&idm.idm_wd_cv);
1007 
1008 	while (idm.idm_wd_thread_running) {
1009 		for (ic = list_head(&idm.idm_tgt_conn_list);
1010 		    ic != NULL;
1011 		    ic = list_next(&idm.idm_tgt_conn_list, ic)) {
1012 			idle_time = ddi_get_lbolt() - ic->ic_timestamp;
1013 
1014 			/*
1015 			 * If this connection is in FFP then grab a hold
1016 			 * and check the various timeout thresholds.  Otherwise
1017 			 * the connection is closing and we should just
1018 			 * move on to the next one.
1019 			 */
1020 			mutex_enter(&ic->ic_state_mutex);
1021 			if (ic->ic_ffp) {
1022 				idm_conn_hold(ic);
1023 			} else {
1024 				mutex_exit(&ic->ic_state_mutex);
1025 				continue;
1026 			}
1027 
1028 			/*
1029 			 * If there hasn't been any activity on this
1030 			 * connection for the keepalive timeout period
1031 			 * and if the client has provided a keepalive
1032 			 * callback then call the keepalive callback.
1033 			 * This allows the client to take action to keep
1034 			 * the link alive (like send a nop PDU).
1035 			 */
1036 			if ((TICK_TO_SEC(idle_time) >=
1037 			    IDM_TRANSPORT_KEEPALIVE_IDLE_TIMEOUT) &&
1038 			    !ic->ic_keepalive) {
1039 				ic->ic_keepalive = B_TRUE;
1040 				if (ic->ic_conn_ops.icb_keepalive) {
1041 					mutex_exit(&ic->ic_state_mutex);
1042 					mutex_exit(&idm.idm_global_mutex);
1043 					(*ic->ic_conn_ops.icb_keepalive)(ic);
1044 					mutex_enter(&idm.idm_global_mutex);
1045 					mutex_enter(&ic->ic_state_mutex);
1046 				}
1047 			} else if ((TICK_TO_SEC(idle_time) <
1048 			    IDM_TRANSPORT_KEEPALIVE_IDLE_TIMEOUT)) {
1049 				/* Reset keepalive */
1050 				ic->ic_keepalive = B_FALSE;
1051 			}
1052 
1053 			/*
1054 			 * If there hasn't been any activity on this
1055 			 * connection for the failure timeout period then
1056 			 * drop the connection.  We expect the initiator
1057 			 * to keep the connection alive if it wants the
1058 			 * connection to stay open.
1059 			 *
1060 			 * If it turns out to be desireable to take a
1061 			 * more active role in maintaining the connect
1062 			 * we could add a client callback to send
1063 			 * a "keepalive" kind of message (no doubt a nop)
1064 			 * and fire that on a shorter timer.
1065 			 */
1066 			if (TICK_TO_SEC(idle_time) >
1067 			    IDM_TRANSPORT_FAIL_IDLE_TIMEOUT) {
1068 				mutex_exit(&ic->ic_state_mutex);
1069 				mutex_exit(&idm.idm_global_mutex);
1070 				IDM_SM_LOG(CE_WARN, "idm_wd_thread: "
1071 				    "conn %p idle for %d seconds, "
1072 				    "sending CE_TRANSPORT_FAIL",
1073 				    (void *)ic, (int)idle_time);
1074 				idm_conn_event(ic, CE_TRANSPORT_FAIL, NULL);
1075 				mutex_enter(&idm.idm_global_mutex);
1076 				mutex_enter(&ic->ic_state_mutex);
1077 			}
1078 
1079 			idm_conn_rele(ic);
1080 
1081 			mutex_exit(&ic->ic_state_mutex);
1082 		}
1083 
1084 		wake_time = ddi_get_lbolt() + SEC_TO_TICK(IDM_WD_INTERVAL);
1085 		(void) cv_timedwait(&idm.idm_wd_cv, &idm.idm_global_mutex,
1086 		    wake_time);
1087 	}
1088 	mutex_exit(&idm.idm_global_mutex);
1089 
1090 	thread_exit();
1091 }
1092