xref: /illumos-gate/usr/src/uts/common/io/idm/idm_impl.c (revision 74ceea2d43ae370f7678d3f98ea3e47fde931098)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/conf.h>
28 #include <sys/file.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/cpuvar.h>
32 
33 #include <sys/socket.h>
34 #include <sys/strsubr.h>
35 #include <sys/socketvar.h>
36 #include <sys/sysmacros.h>
37 
38 #include <sys/idm/idm.h>
39 #include <sys/idm/idm_so.h>
40 
41 extern idm_transport_t  idm_transport_list[];
42 
43 void
44 idm_pdu_rx(idm_conn_t *ic, idm_pdu_t *pdu)
45 {
46 	iscsi_async_evt_hdr_t *async_evt;
47 
48 	/*
49 	 * If we are in full-featured mode then route SCSI-related
50 	 * commands to the appropriate function vector
51 	 */
52 	ic->ic_timestamp = ddi_get_lbolt();
53 	mutex_enter(&ic->ic_state_mutex);
54 	if (ic->ic_ffp && ic->ic_pdu_events == 0) {
55 		mutex_exit(&ic->ic_state_mutex);
56 
57 		if (idm_pdu_rx_forward_ffp(ic, pdu) == B_TRUE) {
58 			/* Forwarded SCSI-related commands */
59 			return;
60 		}
61 		mutex_enter(&ic->ic_state_mutex);
62 	}
63 
64 	/*
65 	 * If we get here with a SCSI-related PDU then we are not in
66 	 * full-feature mode and the PDU is a protocol error (SCSI command
67 	 * PDU's may sometimes be an exception, see below).  All
68 	 * non-SCSI PDU's get treated them the same regardless of whether
69 	 * we are in full-feature mode.
70 	 *
71 	 * Look at the opcode and in some cases the PDU status and
72 	 * determine the appropriate event to send to the connection
73 	 * state machine.  Generate the event, passing the PDU as data.
74 	 * If the current connection state allows reception of the event
75 	 * the PDU will be submitted to the IDM client for processing,
76 	 * otherwise the PDU will be dropped.
77 	 */
78 	switch (IDM_PDU_OPCODE(pdu)) {
79 	case ISCSI_OP_LOGIN_CMD:
80 		idm_conn_rx_pdu_event(ic, CE_LOGIN_RCV, (uintptr_t)pdu);
81 		break;
82 	case ISCSI_OP_LOGIN_RSP:
83 		idm_parse_login_rsp(ic, pdu, /* RX */ B_TRUE);
84 		break;
85 	case ISCSI_OP_LOGOUT_CMD:
86 		idm_parse_logout_req(ic, pdu, /* RX */ B_TRUE);
87 		break;
88 	case ISCSI_OP_LOGOUT_RSP:
89 		idm_parse_logout_rsp(ic, pdu, /* RX */ B_TRUE);
90 		break;
91 	case ISCSI_OP_ASYNC_EVENT:
92 		async_evt = (iscsi_async_evt_hdr_t *)pdu->isp_hdr;
93 		switch (async_evt->opcode) {
94 		case ISCSI_ASYNC_EVENT_REQUEST_LOGOUT:
95 			idm_conn_rx_pdu_event(ic, CE_ASYNC_LOGOUT_RCV,
96 			    (uintptr_t)pdu);
97 			break;
98 		case ISCSI_ASYNC_EVENT_DROPPING_CONNECTION:
99 			idm_conn_rx_pdu_event(ic, CE_ASYNC_DROP_CONN_RCV,
100 			    (uintptr_t)pdu);
101 			break;
102 		case ISCSI_ASYNC_EVENT_DROPPING_ALL_CONNECTIONS:
103 			idm_conn_rx_pdu_event(ic, CE_ASYNC_DROP_ALL_CONN_RCV,
104 			    (uintptr_t)pdu);
105 			break;
106 		case ISCSI_ASYNC_EVENT_SCSI_EVENT:
107 		case ISCSI_ASYNC_EVENT_PARAM_NEGOTIATION:
108 		default:
109 			idm_conn_rx_pdu_event(ic, CE_MISC_RX,
110 			    (uintptr_t)pdu);
111 			break;
112 		}
113 		break;
114 	case ISCSI_OP_SCSI_CMD:
115 		/*
116 		 * Consider this scenario:  We are a target connection
117 		 * in "in login" state and a "login success sent" event has
118 		 * been generated but not yet handled.  Since we've sent
119 		 * the login response but we haven't actually transitioned
120 		 * to FFP mode we might conceivably receive a SCSI command
121 		 * from the initiator before we are ready.  We are actually
122 		 * in FFP we just don't know it yet -- to address this we
123 		 * can generate an event corresponding to the SCSI command.
124 		 * At the point when the event is handled by the state
125 		 * machine the login request will have been handled and we
126 		 * should be in FFP.  If we are not in FFP by that time
127 		 * we can reject the SCSI command with a protocol error.
128 		 *
129 		 * This scenario only applies to the target.
130 		 */
131 	case ISCSI_OP_SCSI_DATA:
132 	case ISCSI_OP_SCSI_DATA_RSP:
133 	case ISCSI_OP_RTT_RSP:
134 	case ISCSI_OP_SNACK_CMD:
135 	case ISCSI_OP_NOOP_IN:
136 	case ISCSI_OP_NOOP_OUT:
137 	case ISCSI_OP_TEXT_CMD:
138 	case ISCSI_OP_TEXT_RSP:
139 	case ISCSI_OP_REJECT_MSG:
140 	case ISCSI_OP_SCSI_TASK_MGT_MSG:
141 	case ISCSI_OP_SCSI_TASK_MGT_RSP:
142 		/* Validate received PDU against current state */
143 		idm_conn_rx_pdu_event(ic, CE_MISC_RX,
144 		    (uintptr_t)pdu);
145 		break;
146 	}
147 	mutex_exit(&ic->ic_state_mutex);
148 }
149 
150 void
151 idm_pdu_tx_forward(idm_conn_t *ic, idm_pdu_t *pdu)
152 {
153 	(*ic->ic_transport_ops->it_tx_pdu)(ic, pdu);
154 }
155 
156 boolean_t
157 idm_pdu_rx_forward_ffp(idm_conn_t *ic, idm_pdu_t *pdu)
158 {
159 	/*
160 	 * If this is an FFP request, call the appropriate handler
161 	 * and return B_TRUE, otherwise return B_FALSE.
162 	 */
163 	switch (IDM_PDU_OPCODE(pdu)) {
164 	case ISCSI_OP_SCSI_CMD:
165 		(*ic->ic_conn_ops.icb_rx_scsi_cmd)(ic, pdu);
166 		return (B_TRUE);
167 	case ISCSI_OP_SCSI_RSP:
168 		(*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
169 		return (B_TRUE);
170 	case ISCSI_OP_SCSI_DATA:
171 		(*ic->ic_transport_ops->it_rx_dataout)(ic, pdu);
172 		return (B_TRUE);
173 	case ISCSI_OP_SCSI_DATA_RSP:
174 		(*ic->ic_transport_ops->it_rx_datain)(ic, pdu);
175 		return (B_TRUE);
176 	case ISCSI_OP_RTT_RSP:
177 		(*ic->ic_transport_ops->it_rx_rtt)(ic, pdu);
178 		return (B_TRUE);
179 	case ISCSI_OP_SCSI_TASK_MGT_MSG:
180 	case ISCSI_OP_SCSI_TASK_MGT_RSP:
181 	case ISCSI_OP_TEXT_CMD:
182 	case ISCSI_OP_TEXT_RSP:
183 	case ISCSI_OP_NOOP_OUT:
184 	case ISCSI_OP_NOOP_IN:
185 		(*ic->ic_conn_ops.icb_rx_misc)(ic, pdu);
186 		return (B_TRUE);
187 	default:
188 		return (B_FALSE);
189 	}
190 	/*NOTREACHED*/
191 }
192 
193 void
194 idm_pdu_rx_forward(idm_conn_t *ic, idm_pdu_t *pdu)
195 {
196 	/*
197 	 * Some PDU's specific to FFP get special handling.  This function
198 	 * will normally never be called in FFP with an FFP PDU since this
199 	 * is a slow path but in can happen on the target side during
200 	 * the transition to FFP.  We primarily call
201 	 * idm_pdu_rx_forward_ffp here to avoid code duplication.
202 	 */
203 	if (idm_pdu_rx_forward_ffp(ic, pdu) == B_FALSE) {
204 		/*
205 		 * Non-FFP PDU, use generic RC handler
206 		 */
207 		(*ic->ic_conn_ops.icb_rx_misc)(ic, pdu);
208 	}
209 }
210 
211 void
212 idm_parse_login_rsp(idm_conn_t *ic, idm_pdu_t *login_rsp_pdu, boolean_t rx)
213 {
214 	iscsi_login_rsp_hdr_t	*login_rsp =
215 	    (iscsi_login_rsp_hdr_t *)login_rsp_pdu->isp_hdr;
216 	idm_conn_event_t	new_event;
217 
218 	if (login_rsp->status_class == ISCSI_STATUS_CLASS_SUCCESS) {
219 		if (!(login_rsp->flags & ISCSI_FLAG_LOGIN_CONTINUE) &&
220 		    (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) &&
221 		    (ISCSI_LOGIN_NEXT_STAGE(login_rsp->flags) ==
222 		    ISCSI_FULL_FEATURE_PHASE)) {
223 			new_event = (rx ? CE_LOGIN_SUCCESS_RCV :
224 			    CE_LOGIN_SUCCESS_SND);
225 		} else {
226 			new_event = (rx ? CE_MISC_RX : CE_MISC_TX);
227 		}
228 	} else {
229 		new_event = (rx ? CE_LOGIN_FAIL_RCV : CE_LOGIN_FAIL_SND);
230 	}
231 
232 	if (rx) {
233 		idm_conn_rx_pdu_event(ic, new_event, (uintptr_t)login_rsp_pdu);
234 	} else {
235 		idm_conn_tx_pdu_event(ic, new_event, (uintptr_t)login_rsp_pdu);
236 	}
237 }
238 
239 
240 void
241 idm_parse_logout_req(idm_conn_t *ic, idm_pdu_t *logout_req_pdu, boolean_t rx)
242 {
243 	iscsi_logout_hdr_t 	*logout_req =
244 	    (iscsi_logout_hdr_t *)logout_req_pdu->isp_hdr;
245 	idm_conn_event_t	new_event;
246 	uint8_t			reason =
247 	    (logout_req->flags & ISCSI_FLAG_LOGOUT_REASON_MASK);
248 
249 	/*
250 	 *	For a normal logout (close connection or close session) IDM
251 	 *	will terminate processing of all tasks completing the tasks
252 	 *	back to the client with a status indicating the connection
253 	 *	was logged out.  These tasks do not get completed.
254 	 *
255 	 *	For a "close connection for recovery logout) IDM suspends
256 	 *	processing of all tasks and completes them back to the client
257 	 *	with a status indicating connection was logged out for
258 	 *	recovery.  Both initiator and target hang onto these tasks.
259 	 *	When we add ERL2 support IDM will need to provide mechanisms
260 	 *	to change the task and buffer associations to a new connection.
261 	 *
262 	 *	This code doesn't address the possibility of MC/S.  We'll
263 	 *	need to decide how the separate connections get handled
264 	 *	in that case.  One simple option is to make the client
265 	 *	generate the events for the other connections.
266 	 */
267 	if (reason == ISCSI_LOGOUT_REASON_CLOSE_SESSION) {
268 		new_event =
269 		    (rx ? CE_LOGOUT_SESSION_RCV : CE_LOGOUT_SESSION_SND);
270 	} else if ((reason == ISCSI_LOGOUT_REASON_CLOSE_CONNECTION) ||
271 	    (reason == ISCSI_LOGOUT_REASON_RECOVERY)) {
272 		/* Check logout CID against this connection's CID */
273 		if (ntohs(logout_req->cid) == ic->ic_login_cid) {
274 			/* Logout is for this connection */
275 			new_event = (rx ? CE_LOGOUT_THIS_CONN_RCV :
276 			    CE_LOGOUT_THIS_CONN_SND);
277 		} else {
278 			/*
279 			 * Logout affects another connection.  This is not
280 			 * a relevant event for this connection so we'll
281 			 * just treat it as a normal PDU event.  Client
282 			 * will need to lookup the other connection and
283 			 * generate the event.
284 			 */
285 			new_event = (rx ? CE_MISC_RX : CE_MISC_TX);
286 		}
287 	} else {
288 		/* Invalid reason code */
289 		new_event = (rx ? CE_RX_PROTOCOL_ERROR : CE_TX_PROTOCOL_ERROR);
290 	}
291 
292 	if (rx) {
293 		idm_conn_rx_pdu_event(ic, new_event, (uintptr_t)logout_req_pdu);
294 	} else {
295 		idm_conn_tx_pdu_event(ic, new_event, (uintptr_t)logout_req_pdu);
296 	}
297 }
298 
299 
300 
301 void
302 idm_parse_logout_rsp(idm_conn_t *ic, idm_pdu_t *logout_rsp_pdu, boolean_t rx)
303 {
304 	idm_conn_event_t	new_event;
305 	iscsi_logout_rsp_hdr_t *logout_rsp =
306 	    (iscsi_logout_rsp_hdr_t *)logout_rsp_pdu->isp_hdr;
307 
308 	if (logout_rsp->response == ISCSI_STATUS_CLASS_SUCCESS) {
309 		new_event = rx ? CE_LOGOUT_SUCCESS_RCV : CE_LOGOUT_SUCCESS_SND;
310 	} else {
311 		new_event = rx ? CE_LOGOUT_FAIL_RCV : CE_LOGOUT_FAIL_SND;
312 	}
313 
314 	if (rx) {
315 		idm_conn_rx_pdu_event(ic, new_event, (uintptr_t)logout_rsp_pdu);
316 	} else {
317 		idm_conn_tx_pdu_event(ic, new_event, (uintptr_t)logout_rsp_pdu);
318 	}
319 }
320 
321 /*
322  * idm_svc_conn_create()
323  * Transport-agnostic service connection creation, invoked from the transport
324  * layer.
325  */
326 idm_status_t
327 idm_svc_conn_create(idm_svc_t *is, idm_transport_type_t tt,
328     idm_conn_t **ic_result)
329 {
330 	idm_conn_t	*ic;
331 	idm_status_t	rc;
332 
333 	ic = idm_conn_create_common(CONN_TYPE_TGT, tt,
334 	    &is->is_svc_req.sr_conn_ops);
335 	ic->ic_svc_binding = is;
336 
337 	/*
338 	 * Prepare connection state machine
339 	 */
340 	if ((rc = idm_conn_sm_init(ic)) != 0) {
341 		idm_conn_destroy_common(ic);
342 		return (rc);
343 	}
344 
345 
346 	*ic_result = ic;
347 
348 	mutex_enter(&idm.idm_global_mutex);
349 	list_insert_tail(&idm.idm_tgt_conn_list, ic);
350 	idm.idm_tgt_conn_count++;
351 	mutex_exit(&idm.idm_global_mutex);
352 
353 	return (0);
354 }
355 
356 void
357 idm_svc_conn_destroy(idm_conn_t *ic)
358 {
359 	mutex_enter(&idm.idm_global_mutex);
360 	list_remove(&idm.idm_tgt_conn_list, ic);
361 	idm.idm_tgt_conn_count--;
362 	mutex_exit(&idm.idm_global_mutex);
363 
364 	idm_conn_sm_fini(ic);
365 
366 	if (ic->ic_transport_private != NULL) {
367 		ic->ic_transport_ops->it_tgt_conn_destroy(ic);
368 	}
369 	idm_conn_destroy_common(ic);
370 }
371 
372 /*
373  * idm_conn_create_common()
374  *
375  * Allocate and initialize IDM connection context
376  */
377 idm_conn_t *
378 idm_conn_create_common(idm_conn_type_t conn_type, idm_transport_type_t tt,
379     idm_conn_ops_t *conn_ops)
380 {
381 	idm_conn_t		*ic;
382 	idm_transport_t		*it;
383 	idm_transport_type_t	type;
384 
385 	for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
386 		it = &idm_transport_list[type];
387 
388 		if ((it->it_ops != NULL) && (it->it_type == tt))
389 			break;
390 	}
391 	ASSERT(it->it_type == tt);
392 	if (it->it_type != tt)
393 		return (NULL);
394 
395 	ic = kmem_zalloc(sizeof (idm_conn_t), KM_SLEEP);
396 
397 	/* Initialize data */
398 	ic->ic_conn_type = conn_type;
399 	ic->ic_conn_ops = *conn_ops;
400 	ic->ic_transport_ops = it->it_ops;
401 	ic->ic_transport_type = tt;
402 	ic->ic_transport_private = NULL; /* Set by transport service */
403 	ic->ic_internal_cid = idm_cid_alloc();
404 	if (ic->ic_internal_cid == 0) {
405 		kmem_free(ic, sizeof (idm_conn_t));
406 		return (NULL);
407 	}
408 	mutex_init(&ic->ic_mutex, NULL, MUTEX_DEFAULT, NULL);
409 	cv_init(&ic->ic_cv, NULL, CV_DEFAULT, NULL);
410 	idm_refcnt_init(&ic->ic_refcnt, ic);
411 
412 	return (ic);
413 }
414 
415 void
416 idm_conn_destroy_common(idm_conn_t *ic)
417 {
418 	idm_refcnt_destroy(&ic->ic_refcnt);
419 	cv_destroy(&ic->ic_cv);
420 	mutex_destroy(&ic->ic_mutex);
421 	idm_cid_free(ic->ic_internal_cid);
422 
423 	kmem_free(ic, sizeof (idm_conn_t));
424 }
425 
426 /*
427  * Invoked from the SM as a result of client's invocation of
428  * idm_ini_conn_connect()
429  */
430 idm_status_t
431 idm_ini_conn_finish(idm_conn_t *ic)
432 {
433 	/* invoke transport-specific connection */
434 	return (ic->ic_transport_ops->it_ini_conn_connect(ic));
435 }
436 
437 idm_status_t
438 idm_tgt_conn_finish(idm_conn_t *ic)
439 {
440 	idm_status_t rc;
441 
442 	rc = idm_notify_client(ic, CN_CONNECT_ACCEPT, NULL);
443 	if (rc != IDM_STATUS_SUCCESS) {
444 		return (IDM_STATUS_REJECT);
445 	}
446 
447 	/* Target client is ready to receive a login, start connection */
448 	return (ic->ic_transport_ops->it_tgt_conn_connect(ic));
449 }
450 
451 idm_transport_t *
452 idm_transport_lookup(idm_conn_req_t *cr)
453 {
454 	idm_transport_type_t	type;
455 	idm_transport_t		*it;
456 	idm_transport_caps_t	caps;
457 
458 	/*
459 	 * Make sure all available transports are setup.  We call this now
460 	 * instead of at initialization time in case IB has become available
461 	 * since we started (hotplug, etc).
462 	 */
463 	idm_transport_setup(cr->cr_li);
464 
465 	/* Determine the transport for this connection */
466 	for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
467 		it = &idm_transport_list[type];
468 
469 		if (it->it_ops == NULL) {
470 			/* transport is not registered */
471 			continue;
472 		}
473 
474 		if (it->it_ops->it_conn_is_capable(cr, &caps)) {
475 			return (it);
476 		}
477 	}
478 
479 	ASSERT(0);
480 	return (NULL); /* Make gcc happy */
481 }
482 
483 void
484 idm_transport_setup(ldi_ident_t li)
485 {
486 	idm_transport_type_t	type;
487 	idm_transport_t		*it;
488 	int			rc;
489 
490 	for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
491 		it = &idm_transport_list[type];
492 		/*
493 		 * We may want to store the LDI handle in the idm_svc_t
494 		 * and then allow multiple calls to ldi_open_by_name.  This
495 		 * would enable the LDI code to track who has the device open
496 		 * which could be useful in the case where we have multiple
497 		 * services and perhaps also have initiator and target opening
498 		 * the transport simultaneously.  For now we stick with the
499 		 * plan.
500 		 */
501 		if (it->it_ops == NULL) {
502 			/* transport is not ready, try to initialize it */
503 			if (it->it_type == IDM_TRANSPORT_TYPE_SOCKETS) {
504 				idm_so_init(it);
505 			} else {
506 				rc = ldi_open_by_name(it->it_device_path,
507 				    FREAD | FWRITE, kcred, &it->it_ldi_hdl, li);
508 				/*
509 				 * If the open is successful we will have
510 				 * filled in the LDI handle in the transport
511 				 * table and we expect that the transport
512 				 * registered itself.
513 				 */
514 				if (rc != 0) {
515 					it->it_ldi_hdl = NULL;
516 				}
517 			}
518 		}
519 	}
520 }
521 
522 /*
523  * ID pool code.  We use this to generate unique structure identifiers without
524  * searching the existing structures.  This avoids the need to lock entire
525  * sets of structures at inopportune times.  Adapted from the CIFS server code.
526  *
527  *    A pool of IDs is a pool of 16 bit numbers. It is implemented as a bitmap.
528  *    A bit set to '1' indicates that that particular value has been allocated.
529  *    The allocation process is done shifting a bit through the whole bitmap.
530  *    The current position of that index bit is kept in the idm_idpool_t
531  *    structure and represented by a byte index (0 to buffer size minus 1) and
532  *    a bit index (0 to 7).
533  *
534  *    The pools start with a size of 8 bytes or 64 IDs. Each time the pool runs
535  *    out of IDs its current size is doubled until it reaches its maximum size
536  *    (8192 bytes or 65536 IDs). The IDs 0 and 65535 are never given out which
537  *    means that a pool can have a maximum number of 65534 IDs available.
538  */
539 
540 static int
541 idm_idpool_increment(
542     idm_idpool_t	*pool)
543 {
544 	uint8_t		*new_pool;
545 	uint32_t	new_size;
546 
547 	ASSERT(pool->id_magic == IDM_IDPOOL_MAGIC);
548 
549 	new_size = pool->id_size * 2;
550 	if (new_size <= IDM_IDPOOL_MAX_SIZE) {
551 		new_pool = kmem_alloc(new_size / 8, KM_NOSLEEP);
552 		if (new_pool) {
553 			bzero(new_pool, new_size / 8);
554 			bcopy(pool->id_pool, new_pool, pool->id_size / 8);
555 			kmem_free(pool->id_pool, pool->id_size / 8);
556 			pool->id_pool = new_pool;
557 			pool->id_free_counter += new_size - pool->id_size;
558 			pool->id_max_free_counter += new_size - pool->id_size;
559 			pool->id_size = new_size;
560 			pool->id_idx_msk = (new_size / 8) - 1;
561 			if (new_size >= IDM_IDPOOL_MAX_SIZE) {
562 				/* id -1 made unavailable */
563 				pool->id_pool[pool->id_idx_msk] = 0x80;
564 				pool->id_free_counter--;
565 				pool->id_max_free_counter--;
566 			}
567 			return (0);
568 		}
569 	}
570 	return (-1);
571 }
572 
573 /*
574  * idm_idpool_constructor
575  *
576  * This function initializes the pool structure provided.
577  */
578 
579 int
580 idm_idpool_create(idm_idpool_t *pool)
581 {
582 
583 	ASSERT(pool->id_magic != IDM_IDPOOL_MAGIC);
584 
585 	pool->id_size = IDM_IDPOOL_MIN_SIZE;
586 	pool->id_idx_msk = (IDM_IDPOOL_MIN_SIZE / 8) - 1;
587 	pool->id_free_counter = IDM_IDPOOL_MIN_SIZE - 1;
588 	pool->id_max_free_counter = IDM_IDPOOL_MIN_SIZE - 1;
589 	pool->id_bit = 0x02;
590 	pool->id_bit_idx = 1;
591 	pool->id_idx = 0;
592 	pool->id_pool = (uint8_t *)kmem_alloc((IDM_IDPOOL_MIN_SIZE / 8),
593 	    KM_SLEEP);
594 	bzero(pool->id_pool, (IDM_IDPOOL_MIN_SIZE / 8));
595 	/* -1 id made unavailable */
596 	pool->id_pool[0] = 0x01;		/* id 0 made unavailable */
597 	mutex_init(&pool->id_mutex, NULL, MUTEX_DEFAULT, NULL);
598 	pool->id_magic = IDM_IDPOOL_MAGIC;
599 	return (0);
600 }
601 
602 /*
603  * idm_idpool_destructor
604  *
605  * This function tears down and frees the resources associated with the
606  * pool provided.
607  */
608 
609 void
610 idm_idpool_destroy(idm_idpool_t *pool)
611 {
612 	ASSERT(pool->id_magic == IDM_IDPOOL_MAGIC);
613 	ASSERT(pool->id_free_counter == pool->id_max_free_counter);
614 	pool->id_magic = (uint32_t)~IDM_IDPOOL_MAGIC;
615 	mutex_destroy(&pool->id_mutex);
616 	kmem_free(pool->id_pool, (size_t)(pool->id_size / 8));
617 }
618 
619 /*
620  * idm_idpool_alloc
621  *
622  * This function allocates an ID from the pool provided.
623  */
624 int
625 idm_idpool_alloc(idm_idpool_t *pool, uint16_t *id)
626 {
627 	uint32_t	i;
628 	uint8_t		bit;
629 	uint8_t		bit_idx;
630 	uint8_t		byte;
631 
632 	ASSERT(pool->id_magic == IDM_IDPOOL_MAGIC);
633 
634 	mutex_enter(&pool->id_mutex);
635 	if ((pool->id_free_counter == 0) && idm_idpool_increment(pool)) {
636 		mutex_exit(&pool->id_mutex);
637 		return (-1);
638 	}
639 
640 	i = pool->id_size;
641 	while (i) {
642 		bit = pool->id_bit;
643 		bit_idx = pool->id_bit_idx;
644 		byte = pool->id_pool[pool->id_idx];
645 		while (bit) {
646 			if (byte & bit) {
647 				bit = bit << 1;
648 				bit_idx++;
649 				continue;
650 			}
651 			pool->id_pool[pool->id_idx] |= bit;
652 			*id = (uint16_t)(pool->id_idx * 8 + (uint32_t)bit_idx);
653 			pool->id_free_counter--;
654 			pool->id_bit = bit;
655 			pool->id_bit_idx = bit_idx;
656 			mutex_exit(&pool->id_mutex);
657 			return (0);
658 		}
659 		pool->id_bit = 1;
660 		pool->id_bit_idx = 0;
661 		pool->id_idx++;
662 		pool->id_idx &= pool->id_idx_msk;
663 		--i;
664 	}
665 	/*
666 	 * This section of code shouldn't be reached. If there are IDs
667 	 * available and none could be found there's a problem.
668 	 */
669 	ASSERT(0);
670 	mutex_exit(&pool->id_mutex);
671 	return (-1);
672 }
673 
674 /*
675  * idm_idpool_free
676  *
677  * This function frees the ID provided.
678  */
679 void
680 idm_idpool_free(idm_idpool_t *pool, uint16_t id)
681 {
682 	ASSERT(pool->id_magic == IDM_IDPOOL_MAGIC);
683 	ASSERT(id != 0);
684 	ASSERT(id != 0xFFFF);
685 
686 	mutex_enter(&pool->id_mutex);
687 	if (pool->id_pool[id >> 3] & (1 << (id & 7))) {
688 		pool->id_pool[id >> 3] &= ~(1 << (id & 7));
689 		pool->id_free_counter++;
690 		ASSERT(pool->id_free_counter <= pool->id_max_free_counter);
691 		mutex_exit(&pool->id_mutex);
692 		return;
693 	}
694 	/* Freeing a free ID. */
695 	ASSERT(0);
696 	mutex_exit(&pool->id_mutex);
697 }
698 
699 uint32_t
700 idm_cid_alloc(void)
701 {
702 	/*
703 	 * ID pool works with 16-bit identifiers right now.  That should
704 	 * be plenty since we will probably never have more than 2^16
705 	 * connections simultaneously.
706 	 */
707 	uint16_t cid16;
708 
709 	if (idm_idpool_alloc(&idm.idm_conn_id_pool, &cid16) == -1) {
710 		return (0); /* Fail */
711 	}
712 
713 	return ((uint32_t)cid16);
714 }
715 
716 void
717 idm_cid_free(uint32_t cid)
718 {
719 	idm_idpool_free(&idm.idm_conn_id_pool, (uint16_t)cid);
720 }
721 
722 
723 /*
724  * Code for generating the header and data digests
725  *
726  * This is the CRC-32C table
727  * Generated with:
728  * width = 32 bits
729  * poly = 0x1EDC6F41
730  * reflect input bytes = true
731  * reflect output bytes = true
732  */
733 
734 uint32_t idm_crc32c_table[256] =
735 {
736 	0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
737 	0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
738 	0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
739 	0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
740 	0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
741 	0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
742 	0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
743 	0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
744 	0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
745 	0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
746 	0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
747 	0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
748 	0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
749 	0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
750 	0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
751 	0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
752 	0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
753 	0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
754 	0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
755 	0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
756 	0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
757 	0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
758 	0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
759 	0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
760 	0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
761 	0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
762 	0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
763 	0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
764 	0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
765 	0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
766 	0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
767 	0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
768 	0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
769 	0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
770 	0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
771 	0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
772 	0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
773 	0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
774 	0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
775 	0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
776 	0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
777 	0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
778 	0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
779 	0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
780 	0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
781 	0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
782 	0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
783 	0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
784 	0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
785 	0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
786 	0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
787 	0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
788 	0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
789 	0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
790 	0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
791 	0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
792 	0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
793 	0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
794 	0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
795 	0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
796 	0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
797 	0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
798 	0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
799 	0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
800 };
801 
802 /*
803  * iscsi_crc32c - Steps through buffer one byte at at time, calculates
804  * reflected crc using table.
805  */
806 uint32_t
807 idm_crc32c(void *address, unsigned long length)
808 {
809 	uint8_t *buffer = address;
810 	uint32_t crc = 0xffffffff, result;
811 #ifdef _BIG_ENDIAN
812 	uint8_t byte0, byte1, byte2, byte3;
813 #endif
814 
815 	ASSERT(address != NULL);
816 
817 	while (length--) {
818 		crc = idm_crc32c_table[(crc ^ *buffer++) & 0xFFL] ^
819 		    (crc >> 8);
820 	}
821 	result = crc ^ 0xffffffff;
822 
823 #ifdef	_BIG_ENDIAN
824 	byte0 = (uint8_t)(result & 0xFF);
825 	byte1 = (uint8_t)((result >> 8) & 0xFF);
826 	byte2 = (uint8_t)((result >> 16) & 0xFF);
827 	byte3 = (uint8_t)((result >> 24) & 0xFF);
828 	result = ((byte0 << 24) | (byte1 << 16) | (byte2 << 8) | byte3);
829 #endif	/* _BIG_ENDIAN */
830 
831 	return (result);
832 }
833 
834 
835 /*
836  * idm_crc32c_continued - Continues stepping through buffer one
837  * byte at at time, calculates reflected crc using table.
838  */
839 uint32_t
840 idm_crc32c_continued(void *address, unsigned long length, uint32_t crc)
841 {
842 	uint8_t *buffer = address;
843 	uint32_t result;
844 #ifdef	_BIG_ENDIAN
845 	uint8_t byte0, byte1, byte2, byte3;
846 #endif
847 
848 	ASSERT(address != NULL);
849 
850 #ifdef	_BIG_ENDIAN
851 	byte0 = (uint8_t)((crc >> 24) & 0xFF);
852 	byte1 = (uint8_t)((crc >> 16) & 0xFF);
853 	byte2 = (uint8_t)((crc >> 8) & 0xFF);
854 	byte3 = (uint8_t)(crc & 0xFF);
855 	crc = ((byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0);
856 #endif
857 
858 	crc = crc ^ 0xffffffff;
859 	while (length--) {
860 		crc = idm_crc32c_table[(crc ^ *buffer++) & 0xFFL] ^
861 		    (crc >> 8);
862 	}
863 	result = crc ^ 0xffffffff;
864 
865 #ifdef	_BIG_ENDIAN
866 	byte0 = (uint8_t)(result & 0xFF);
867 	byte1 = (uint8_t)((result >> 8) & 0xFF);
868 	byte2 = (uint8_t)((result >> 16) & 0xFF);
869 	byte3 = (uint8_t)((result >> 24) & 0xFF);
870 	result = ((byte0 << 24) | (byte1 << 16) | (byte2 << 8) | byte3);
871 #endif
872 	return (result);
873 }
874 
875 /* ARGSUSED */
876 int
877 idm_task_constructor(void *hdl, void *arg, int flags)
878 {
879 	idm_task_t *idt = (idm_task_t *)hdl;
880 	uint32_t next_task;
881 
882 	mutex_init(&idt->idt_mutex, NULL, MUTEX_DEFAULT, NULL);
883 
884 	/* Find the next free task ID */
885 	rw_enter(&idm.idm_taskid_table_lock, RW_WRITER);
886 	next_task = idm.idm_taskid_next;
887 	while (idm.idm_taskid_table[next_task]) {
888 		next_task++;
889 		if (next_task == idm.idm_taskid_max)
890 			next_task = 0;
891 		if (next_task == idm.idm_taskid_next) {
892 			rw_exit(&idm.idm_taskid_table_lock);
893 			return (-1);
894 		}
895 	}
896 
897 	idm.idm_taskid_table[next_task] = idt;
898 	idm.idm_taskid_next = (next_task + 1) % idm.idm_taskid_max;
899 	rw_exit(&idm.idm_taskid_table_lock);
900 
901 	idt->idt_tt = next_task;
902 
903 	list_create(&idt->idt_inbufv, sizeof (idm_buf_t),
904 	    offsetof(idm_buf_t, idb_buflink));
905 	list_create(&idt->idt_outbufv, sizeof (idm_buf_t),
906 	    offsetof(idm_buf_t, idb_buflink));
907 	idm_refcnt_init(&idt->idt_refcnt, idt);
908 
909 	/*
910 	 * Set the transport header pointer explicitly.  This removes the
911 	 * need for per-transport header allocation, which simplifies cache
912 	 * init considerably.  If at a later date we have an additional IDM
913 	 * transport that requires a different size, we'll revisit this.
914 	 */
915 	idt->idt_transport_hdr = (void *)(idt + 1); /* pointer arithmetic */
916 
917 	return (0);
918 }
919 
920 /* ARGSUSED */
921 void
922 idm_task_destructor(void *hdl, void *arg)
923 {
924 	idm_task_t *idt = (idm_task_t *)hdl;
925 
926 	/* Remove the task from the ID table */
927 	rw_enter(&idm.idm_taskid_table_lock, RW_WRITER);
928 	idm.idm_taskid_table[idt->idt_tt] = NULL;
929 	rw_exit(&idm.idm_taskid_table_lock);
930 
931 	/* free the inbuf and outbuf */
932 	idm_refcnt_destroy(&idt->idt_refcnt);
933 	list_destroy(&idt->idt_inbufv);
934 	list_destroy(&idt->idt_outbufv);
935 
936 	mutex_destroy(&idt->idt_mutex);
937 }
938 
939 /*
940  * idm_listbuf_insert searches from the back of the list looking for the
941  * insertion point.
942  */
943 void
944 idm_listbuf_insert(list_t *lst, idm_buf_t *buf)
945 {
946 	idm_buf_t	*idb;
947 
948 	/* iterate through the list to find the insertion point */
949 	for (idb = list_tail(lst); idb != NULL; idb = list_prev(lst, idb)) {
950 
951 		if (idb->idb_bufoffset < buf->idb_bufoffset) {
952 
953 			list_insert_after(lst, idb, buf);
954 			return;
955 		}
956 	}
957 
958 	/* add the buf to the head of the list */
959 	list_insert_head(lst, buf);
960 
961 }
962 
963 /*ARGSUSED*/
964 void
965 idm_wd_thread(void *arg)
966 {
967 	idm_conn_t	*ic;
968 	clock_t		wake_time;
969 	clock_t		idle_time;
970 
971 	/* Record the thread id for thread_join() */
972 	idm.idm_wd_thread_did = curthread->t_did;
973 	mutex_enter(&idm.idm_global_mutex);
974 	idm.idm_wd_thread_running = B_TRUE;
975 	cv_signal(&idm.idm_wd_cv);
976 
977 	while (idm.idm_wd_thread_running) {
978 		for (ic = list_head(&idm.idm_tgt_conn_list);
979 		    ic != NULL;
980 		    ic = list_next(&idm.idm_tgt_conn_list, ic)) {
981 			idle_time = ddi_get_lbolt() - ic->ic_timestamp;
982 
983 			/*
984 			 * If there hasn't been any activity on this
985 			 * connection for the specified period then
986 			 * drop the connection.  We expect the initiator
987 			 * to keep the connection alive if it wants the
988 			 * connection to stay open.
989 			 *
990 			 * If it turns out to be desireable to take a
991 			 * more active role in maintaining the connect
992 			 * we could add a client callback to send
993 			 * a "keepalive" kind of message (no doubt a nop)
994 			 * and fire that on a shorter timer.
995 			 */
996 			if (TICK_TO_SEC(idle_time) >
997 			    IDM_TRANSPORT_FAIL_IDLE_TIMEOUT) {
998 				/*
999 				 * Only send the transport fail if we're in
1000 				 * FFP.  State machine timers should handle
1001 				 * problems in non-ffp states.
1002 				 */
1003 				if (ic->ic_ffp) {
1004 					mutex_exit(&idm.idm_global_mutex);
1005 					IDM_SM_LOG(CE_WARN, "idm_wd_thread: "
1006 					    "conn %p idle for %d seconds, "
1007 					    "sending CE_TRANSPORT_FAIL",
1008 					    (void *)ic, (int)idle_time);
1009 					idm_conn_event(ic, CE_TRANSPORT_FAIL,
1010 					    NULL);
1011 					mutex_enter(&idm.idm_global_mutex);
1012 				}
1013 			}
1014 		}
1015 
1016 		wake_time = lbolt + SEC_TO_TICK(IDM_WD_INTERVAL);
1017 		(void) cv_timedwait(&idm.idm_wd_cv, &idm.idm_global_mutex,
1018 		    wake_time);
1019 	}
1020 	mutex_exit(&idm.idm_global_mutex);
1021 
1022 	thread_exit();
1023 }
1024