xref: /illumos-gate/usr/src/uts/common/io/comstar/port/srpt/srpt_ch.c (revision 4558d122136f151d62acbbc02ddb42df89a5ef66)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * RDMA channel interface for Solaris SCSI RDMA Protocol Target (SRP)
28  * transport port provider module for the COMSTAR framework.
29  */
30 
31 #include <sys/cpuvar.h>
32 #include <sys/types.h>
33 #include <sys/conf.h>
34 #include <sys/stat.h>
35 #include <sys/file.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/modctl.h>
39 #include <sys/sysmacros.h>
40 #include <sys/sdt.h>
41 #include <sys/taskq.h>
42 #include <sys/scsi/scsi.h>
43 #include <sys/ib/ibtl/ibti.h>
44 
45 #include <sys/stmf.h>
46 #include <sys/stmf_ioctl.h>
47 #include <sys/portif.h>
48 
49 #include "srp.h"
50 #include "srpt_impl.h"
51 #include "srpt_ioc.h"
52 #include "srpt_stp.h"
53 #include "srpt_ch.h"
54 
55 extern srpt_ctxt_t *srpt_ctxt;
56 extern uint16_t srpt_send_msg_depth;
57 
58 /*
59  * Prototypes.
60  */
61 static void srpt_ch_scq_hdlr(ibt_cq_hdl_t cq_dhl, void *arg);
62 static void srpt_ch_rcq_hdlr(ibt_cq_hdl_t cq_dhl, void *arg);
63 static void srpt_ch_process_iu(srpt_channel_t *ch, srpt_iu_t *iu);
64 
65 /*
66  * srpt_ch_alloc()
67  */
68 srpt_channel_t *
srpt_ch_alloc(srpt_target_port_t * tgt,uint8_t port)69 srpt_ch_alloc(srpt_target_port_t *tgt, uint8_t port)
70 {
71 	ibt_status_t			status;
72 	srpt_channel_t			*ch;
73 	ibt_cq_attr_t			cq_attr;
74 	ibt_rc_chan_alloc_args_t	ch_args;
75 	uint32_t			cq_real_size;
76 	srpt_ioc_t			*ioc;
77 
78 	ASSERT(tgt != NULL);
79 	ioc = tgt->tp_ioc;
80 	ASSERT(ioc != NULL);
81 
82 	ch = kmem_zalloc(sizeof (*ch), KM_SLEEP);
83 	rw_init(&ch->ch_rwlock, NULL, RW_DRIVER, NULL);
84 	mutex_init(&ch->ch_reflock, NULL, MUTEX_DRIVER, NULL);
85 	cv_init(&ch->ch_cv_complete, NULL, CV_DRIVER, NULL);
86 	ch->ch_refcnt	= 1;
87 	ch->ch_cv_waiters = 0;
88 
89 	ch->ch_state  = SRPT_CHANNEL_CONNECTING;
90 	ch->ch_tgt    = tgt;
91 	ch->ch_req_lim_delta = 0;
92 	ch->ch_ti_iu_len = 0;
93 
94 	cq_attr.cq_size	 = srpt_send_msg_depth * 2;
95 	cq_attr.cq_sched = 0;
96 	cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
97 
98 	status = ibt_alloc_cq(ioc->ioc_ibt_hdl, &cq_attr, &ch->ch_scq_hdl,
99 	    &cq_real_size);
100 	if (status != IBT_SUCCESS) {
101 		SRPT_DPRINTF_L1("ch_alloc, send CQ alloc error (%d)",
102 		    status);
103 		goto scq_alloc_err;
104 	}
105 
106 	cq_attr.cq_size	 = srpt_send_msg_depth + 1;
107 	cq_attr.cq_sched = 0;
108 	cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
109 
110 	status = ibt_alloc_cq(ioc->ioc_ibt_hdl, &cq_attr, &ch->ch_rcq_hdl,
111 	    &cq_real_size);
112 	if (status != IBT_SUCCESS) {
113 		SRPT_DPRINTF_L2("ch_alloc, receive CQ alloc error (%d)",
114 		    status);
115 		goto rcq_alloc_err;
116 	}
117 
118 	ibt_set_cq_handler(ch->ch_scq_hdl, srpt_ch_scq_hdlr, ch);
119 	ibt_set_cq_handler(ch->ch_rcq_hdl, srpt_ch_rcq_hdlr, ch);
120 	(void) ibt_enable_cq_notify(ch->ch_scq_hdl, IBT_NEXT_COMPLETION);
121 	(void) ibt_enable_cq_notify(ch->ch_rcq_hdl, IBT_NEXT_COMPLETION);
122 
123 	ch_args.rc_flags   = IBT_WR_SIGNALED;
124 
125 	/* Maker certain initiator can not read/write our memory */
126 	ch_args.rc_control = 0;
127 
128 	ch_args.rc_hca_port_num = port;
129 
130 	/*
131 	 * Any SRP IU can result in a number of STMF data buffer transfers
132 	 * and those transfers themselves could span multiple initiator
133 	 * buffers.  Therefore, the number of send WQE's actually required
134 	 * can vary.  Here we assume that on average an I/O will require
135 	 * no more than SRPT_MAX_OUT_IO_PER_CMD send WQE's.  In practice
136 	 * this will prevent send work queue overrun, but we will also
137 	 * inform STMF to throttle I/O should the work queue become full.
138 	 *
139 	 * If the HCA tells us the max outstanding WRs for a channel is
140 	 * lower than our default, use the HCA value.
141 	 */
142 	ch_args.rc_sizes.cs_sq = min(ioc->ioc_attr.hca_max_chan_sz,
143 	    (srpt_send_msg_depth * SRPT_MAX_OUT_IO_PER_CMD));
144 	ch_args.rc_sizes.cs_rq =  0;
145 	ch_args.rc_sizes.cs_sq_sgl = 2;
146 	ch_args.rc_sizes.cs_rq_sgl = 0;
147 
148 	ch_args.rc_scq = ch->ch_scq_hdl;
149 	ch_args.rc_rcq = ch->ch_rcq_hdl;
150 	ch_args.rc_pd  = ioc->ioc_pd_hdl;
151 	ch_args.rc_clone_chan = NULL;
152 	ch_args.rc_srq = ioc->ioc_srq_hdl;
153 
154 	status = ibt_alloc_rc_channel(ioc->ioc_ibt_hdl, IBT_ACHAN_USES_SRQ,
155 	    &ch_args, &ch->ch_chan_hdl, &ch->ch_sizes);
156 	if (status != IBT_SUCCESS) {
157 		SRPT_DPRINTF_L2("ch_alloc, IBT channel alloc error (%d)",
158 		    status);
159 		goto qp_alloc_err;
160 	}
161 
162 	/*
163 	 * Create pool of send WQE entries to map send wqe work IDs
164 	 * to various types (specifically in error cases where OP
165 	 * is not known).
166 	 */
167 	ch->ch_num_swqe = ch->ch_sizes.cs_sq;
168 	SRPT_DPRINTF_L2("ch_alloc, number of SWQEs = %u", ch->ch_num_swqe);
169 	ch->ch_swqe = kmem_zalloc(sizeof (srpt_swqe_t) * ch->ch_num_swqe,
170 	    KM_SLEEP);
171 	if (ch->ch_swqe == NULL) {
172 		SRPT_DPRINTF_L2("ch_alloc, SWQE alloc error");
173 		(void) ibt_free_channel(ch->ch_chan_hdl);
174 		goto qp_alloc_err;
175 	}
176 	mutex_init(&ch->ch_swqe_lock, NULL, MUTEX_DRIVER, NULL);
177 	ch->ch_head = 1;
178 	for (ch->ch_tail = 1; ch->ch_tail < ch->ch_num_swqe -1; ch->ch_tail++) {
179 		ch->ch_swqe[ch->ch_tail].sw_next = ch->ch_tail + 1;
180 	}
181 	ch->ch_swqe[ch->ch_tail].sw_next = 0;
182 
183 	ibt_set_chan_private(ch->ch_chan_hdl, ch);
184 	return (ch);
185 
186 qp_alloc_err:
187 	(void) ibt_free_cq(ch->ch_rcq_hdl);
188 
189 rcq_alloc_err:
190 	(void) ibt_free_cq(ch->ch_scq_hdl);
191 
192 scq_alloc_err:
193 	cv_destroy(&ch->ch_cv_complete);
194 	mutex_destroy(&ch->ch_reflock);
195 	rw_destroy(&ch->ch_rwlock);
196 	kmem_free(ch, sizeof (*ch));
197 
198 	return (NULL);
199 }
200 
201 /*
202  * srpt_ch_add_ref()
203  */
204 void
srpt_ch_add_ref(srpt_channel_t * ch)205 srpt_ch_add_ref(srpt_channel_t *ch)
206 {
207 	mutex_enter(&ch->ch_reflock);
208 	ch->ch_refcnt++;
209 	SRPT_DPRINTF_L4("ch_add_ref, ch (%p), refcnt (%d)",
210 	    (void *)ch, ch->ch_refcnt);
211 	ASSERT(ch->ch_refcnt != 0);
212 	mutex_exit(&ch->ch_reflock);
213 }
214 
215 /*
216  * srpt_ch_release_ref()
217  *
218  * A non-zero value for wait causes thread to block until all references
219  * to channel are released.
220  */
221 void
srpt_ch_release_ref(srpt_channel_t * ch,uint_t wait)222 srpt_ch_release_ref(srpt_channel_t *ch, uint_t wait)
223 {
224 	mutex_enter(&ch->ch_reflock);
225 
226 	SRPT_DPRINTF_L4("ch_release_ref, ch (%p), refcnt (%d), wait (%d)",
227 	    (void *)ch, ch->ch_refcnt, wait);
228 
229 	ASSERT(ch->ch_refcnt != 0);
230 
231 	ch->ch_refcnt--;
232 
233 	if (ch->ch_refcnt != 0) {
234 		if (wait) {
235 			ch->ch_cv_waiters++;
236 			while (ch->ch_refcnt != 0) {
237 				cv_wait(&ch->ch_cv_complete, &ch->ch_reflock);
238 			}
239 			ch->ch_cv_waiters--;
240 		} else {
241 			mutex_exit(&ch->ch_reflock);
242 			return;
243 		}
244 	}
245 
246 	/*
247 	 * Last thread out frees the IB resources, locks/conditions and memory
248 	 */
249 	if (ch->ch_cv_waiters > 0) {
250 		/* we're not last, wake someone else up */
251 		cv_signal(&ch->ch_cv_complete);
252 		mutex_exit(&ch->ch_reflock);
253 		return;
254 	}
255 
256 	SRPT_DPRINTF_L3("ch_release_ref - release resources");
257 	if (ch->ch_chan_hdl) {
258 		SRPT_DPRINTF_L3("ch_release_ref - free channel");
259 		(void) ibt_free_channel(ch->ch_chan_hdl);
260 	}
261 
262 	if (ch->ch_scq_hdl) {
263 		(void) ibt_free_cq(ch->ch_scq_hdl);
264 	}
265 
266 	if (ch->ch_rcq_hdl) {
267 		(void) ibt_free_cq(ch->ch_rcq_hdl);
268 	}
269 
270 	/*
271 	 * There should be no IU's associated with this
272 	 * channel on the SCSI session.
273 	 */
274 	if (ch->ch_session != NULL) {
275 		ASSERT(list_is_empty(&ch->ch_session->ss_task_list));
276 
277 		/*
278 		 * Currently only have one channel per session, we will
279 		 * need to release a reference when support is added
280 		 * for multi-channel target login.
281 		 */
282 		srpt_stp_free_session(ch->ch_session);
283 		ch->ch_session = NULL;
284 	}
285 
286 	kmem_free(ch->ch_swqe, sizeof (srpt_swqe_t) * ch->ch_num_swqe);
287 	mutex_destroy(&ch->ch_swqe_lock);
288 	mutex_exit(&ch->ch_reflock);
289 	mutex_destroy(&ch->ch_reflock);
290 	rw_destroy(&ch->ch_rwlock);
291 	kmem_free(ch, sizeof (srpt_channel_t));
292 }
293 
294 /*
295  * srpt_ch_disconnect()
296  */
297 void
srpt_ch_disconnect(srpt_channel_t * ch)298 srpt_ch_disconnect(srpt_channel_t *ch)
299 {
300 	ibt_status_t		status;
301 
302 	SRPT_DPRINTF_L3("ch_disconnect, invoked for ch (%p)",
303 	    (void *)ch);
304 
305 	rw_enter(&ch->ch_rwlock, RW_WRITER);
306 
307 	/*
308 	 * If we are already in the process of disconnecting then
309 	 * nothing need be done, CM will call-back into us when done.
310 	 */
311 	if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) {
312 		SRPT_DPRINTF_L2("ch_disconnect, called when"
313 		    " disconnect in progress");
314 		rw_exit(&ch->ch_rwlock);
315 		return;
316 	}
317 	ch->ch_state = SRPT_CHANNEL_DISCONNECTING;
318 	rw_exit(&ch->ch_rwlock);
319 
320 	/*
321 	 * Initiate the sending of the CM DREQ message, the private data
322 	 * should be the SRP Target logout IU.  We don't really care about
323 	 * the remote CM DREP message returned.  We issue this in an
324 	 * asynchronous manner and will cleanup when called back by CM.
325 	 */
326 	status = ibt_close_rc_channel(ch->ch_chan_hdl, IBT_NONBLOCKING,
327 	    NULL, 0, NULL, NULL, 0);
328 
329 	if (status != IBT_SUCCESS) {
330 		SRPT_DPRINTF_L2("ch_disconnect, close RC channel"
331 		    " err(%d)", status);
332 	}
333 }
334 
335 /*
336  * srpt_ch_cleanup()
337  */
338 void
srpt_ch_cleanup(srpt_channel_t * ch)339 srpt_ch_cleanup(srpt_channel_t *ch)
340 {
341 	srpt_iu_t		*iu;
342 	srpt_iu_t		*next;
343 	ibt_wc_t		wc;
344 	srpt_target_port_t	*tgt;
345 	srpt_channel_t		*tgt_ch;
346 	scsi_task_t		*iutask;
347 
348 	SRPT_DPRINTF_L3("ch_cleanup, invoked for ch(%p), state(%d)",
349 	    (void *)ch, ch->ch_state);
350 
351 	/* add a ref for the channel until we're done */
352 	srpt_ch_add_ref(ch);
353 
354 	tgt = ch->ch_tgt;
355 	ASSERT(tgt != NULL);
356 
357 	/*
358 	 * Make certain the channel is in the target ports list of
359 	 * known channels and remove it (releasing the target
360 	 * ports reference to the channel).
361 	 */
362 	mutex_enter(&tgt->tp_ch_list_lock);
363 	tgt_ch = list_head(&tgt->tp_ch_list);
364 	while (tgt_ch != NULL) {
365 		if (tgt_ch == ch) {
366 			list_remove(&tgt->tp_ch_list, tgt_ch);
367 			srpt_ch_release_ref(tgt_ch, 0);
368 			break;
369 		}
370 		tgt_ch = list_next(&tgt->tp_ch_list, tgt_ch);
371 	}
372 	mutex_exit(&tgt->tp_ch_list_lock);
373 
374 	if (tgt_ch == NULL) {
375 		SRPT_DPRINTF_L2("ch_cleanup, target channel no"
376 		    "longer known to target");
377 		srpt_ch_release_ref(ch, 0);
378 		return;
379 	}
380 
381 	rw_enter(&ch->ch_rwlock, RW_WRITER);
382 	ch->ch_state = SRPT_CHANNEL_DISCONNECTING;
383 	rw_exit(&ch->ch_rwlock);
384 
385 	/*
386 	 * Don't accept any further incoming requests, and clean
387 	 * up the receive queue.  The send queue is left alone
388 	 * so tasks can finish and clean up (whether normally
389 	 * or via abort).
390 	 */
391 	if (ch->ch_rcq_hdl) {
392 		ibt_set_cq_handler(ch->ch_rcq_hdl, NULL, NULL);
393 
394 		while (ibt_poll_cq(ch->ch_rcq_hdl, &wc, 1, NULL) ==
395 		    IBT_SUCCESS) {
396 			iu = (srpt_iu_t *)(uintptr_t)wc.wc_id;
397 			SRPT_DPRINTF_L4("ch_cleanup, recovering"
398 			    " outstanding RX iu(%p)", (void *)iu);
399 			mutex_enter(&iu->iu_lock);
400 			srpt_ioc_repost_recv_iu(iu->iu_ioc, iu);
401 			/*
402 			 * Channel reference has not yet been added for this
403 			 * IU, so do not decrement.
404 			 */
405 			mutex_exit(&iu->iu_lock);
406 		}
407 	}
408 
409 	/*
410 	 * Go through the list of outstanding IU for the channel's SCSI
411 	 * session and for each either abort or complete an abort.
412 	 */
413 	rw_enter(&ch->ch_rwlock, RW_READER);
414 	if (ch->ch_session != NULL) {
415 		rw_enter(&ch->ch_session->ss_rwlock, RW_READER);
416 		iu = list_head(&ch->ch_session->ss_task_list);
417 		while (iu != NULL) {
418 			next = list_next(&ch->ch_session->ss_task_list, iu);
419 
420 			mutex_enter(&iu->iu_lock);
421 			if (ch == iu->iu_ch) {
422 				if (iu->iu_stmf_task == NULL) {
423 					cmn_err(CE_NOTE,
424 					    "ch_cleanup, NULL stmf task");
425 					ASSERT(0);
426 				}
427 				iutask = iu->iu_stmf_task;
428 			} else {
429 				iutask = NULL;
430 			}
431 			mutex_exit(&iu->iu_lock);
432 
433 			if (iutask != NULL) {
434 				SRPT_DPRINTF_L4("ch_cleanup, aborting "
435 				    "task(%p)", (void *)iutask);
436 				stmf_abort(STMF_QUEUE_TASK_ABORT, iutask,
437 				    STMF_ABORTED, NULL);
438 			}
439 			iu = next;
440 		}
441 		rw_exit(&ch->ch_session->ss_rwlock);
442 	}
443 	rw_exit(&ch->ch_rwlock);
444 
445 	srpt_ch_release_ref(ch, 0);
446 }
447 
448 /*
449  * srpt_ch_rsp_comp()
450  *
451  * Process a completion for an IB SEND message.  A SEND completion
452  * is for a SRP response packet sent back to the initiator.  It
453  * will not have a STMF SCSI task associated with it if it was
454  * sent for a rejected IU, or was a task management abort response.
455  */
456 static void
srpt_ch_rsp_comp(srpt_channel_t * ch,srpt_iu_t * iu,ibt_wc_status_t wc_status)457 srpt_ch_rsp_comp(srpt_channel_t *ch, srpt_iu_t *iu,
458 	ibt_wc_status_t wc_status)
459 {
460 	stmf_status_t	st = STMF_SUCCESS;
461 
462 	ASSERT(iu->iu_ch == ch);
463 
464 	/*
465 	 * Process the completion regardless whether it's a failure or
466 	 * success.  At this point, we've processed as far as we can and
467 	 * just need to complete the associated task.
468 	 */
469 
470 	if (wc_status != IBT_SUCCESS) {
471 		SRPT_DPRINTF_L2("ch_rsp_comp, WC status err(%d)",
472 		    wc_status);
473 
474 		st = STMF_FAILURE;
475 
476 		if (wc_status != IBT_WC_WR_FLUSHED_ERR) {
477 			srpt_ch_disconnect(ch);
478 		}
479 	}
480 
481 	/*
482 	 * If the IU response completion is not associated with
483 	 * with a SCSI task, release the IU to return the resource
484 	 * and the reference to the channel it holds.
485 	 */
486 	mutex_enter(&iu->iu_lock);
487 	atomic_dec_32(&iu->iu_sq_posted_cnt);
488 
489 	if (iu->iu_stmf_task == NULL) {
490 		srpt_ioc_repost_recv_iu(iu->iu_ioc, iu);
491 		mutex_exit(&iu->iu_lock);
492 		srpt_ch_release_ref(ch, 0);
493 		return;
494 	}
495 
496 	/*
497 	 * We should not get a SEND completion where the task has already
498 	 * completed aborting and STMF has been informed.
499 	 */
500 	ASSERT((iu->iu_flags & SRPT_IU_ABORTED) == 0);
501 
502 	/*
503 	 * Let STMF know we are done.
504 	 */
505 	mutex_exit(&iu->iu_lock);
506 
507 	stmf_send_status_done(iu->iu_stmf_task, st, STMF_IOF_LPORT_DONE);
508 }
509 
510 /*
511  * srpt_ch_data_comp()
512  *
513  * Process an IB completion for a RDMA operation.  This completion
514  * should be associated with the last RDMA operation for any
515  * data buffer transfer.
516  */
517 static void
srpt_ch_data_comp(srpt_channel_t * ch,stmf_data_buf_t * stmf_dbuf,ibt_wc_status_t wc_status)518 srpt_ch_data_comp(srpt_channel_t *ch, stmf_data_buf_t *stmf_dbuf,
519 	ibt_wc_status_t wc_status)
520 {
521 	srpt_ds_dbuf_t		*dbuf;
522 	srpt_iu_t		*iu;
523 	stmf_status_t		status;
524 
525 	ASSERT(stmf_dbuf != NULL);
526 
527 	dbuf = (srpt_ds_dbuf_t *)stmf_dbuf->db_port_private;
528 
529 	ASSERT(dbuf != NULL);
530 
531 	iu = dbuf->db_iu;
532 
533 	ASSERT(iu != NULL);
534 	ASSERT(iu->iu_ch == ch);
535 
536 	/*
537 	 * If work completion indicates non-flush failure, then
538 	 * start a channel disconnect (asynchronous) and release
539 	 * the reference to the IU.  The task will be cleaned
540 	 * up with STMF during channel shutdown processing.
541 	 */
542 	if (wc_status != IBT_SUCCESS) {
543 		SRPT_DPRINTF_L2("ch_data_comp, WC status err(%d)",
544 		    wc_status);
545 		if (wc_status != IBT_WC_WR_FLUSHED_ERR) {
546 			srpt_ch_disconnect(ch);
547 		}
548 		atomic_dec_32(&iu->iu_sq_posted_cnt);
549 		return;
550 	}
551 
552 	/*
553 	 * If STMF has requested this task be aborted, then if this is the
554 	 * last I/O operation outstanding, notify STMF the task has been
555 	 *  aborted and ignore the completion.
556 	 */
557 	mutex_enter(&iu->iu_lock);
558 	atomic_dec_32(&iu->iu_sq_posted_cnt);
559 
560 	if ((iu->iu_flags & SRPT_IU_STMF_ABORTING) != 0) {
561 		scsi_task_t	*abort_task = iu->iu_stmf_task;
562 
563 		mutex_exit(&iu->iu_lock);
564 		stmf_abort(STMF_REQUEUE_TASK_ABORT_LPORT, abort_task,
565 		    STMF_ABORTED, NULL);
566 		return;
567 	}
568 
569 	/*
570 	 * We should not get an RDMA completion where the task has already
571 	 * completed aborting and STMF has been informed.
572 	 */
573 	ASSERT((iu->iu_flags & SRPT_IU_ABORTED) == 0);
574 
575 	/*
576 	 * Good completion for last RDMA op associated with a data buffer
577 	 * I/O, if specified initiate status otherwise let STMF know we are
578 	 * done.
579 	 */
580 	stmf_dbuf->db_xfer_status = STMF_SUCCESS;
581 	mutex_exit(&iu->iu_lock);
582 
583 	DTRACE_SRP_8(xfer__done, srpt_channel_t, ch,
584 	    ibt_wr_ds_t, &(dbuf->db_sge), srpt_iu_t, iu,
585 	    ibt_send_wr_t, 0, uint32_t, stmf_dbuf->db_data_size,
586 	    uint32_t, 0, uint32_t, 0,
587 	    uint32_t, (stmf_dbuf->db_flags & DB_DIRECTION_TO_RPORT) ? 1 : 0);
588 
589 	if ((stmf_dbuf->db_flags & DB_SEND_STATUS_GOOD) != 0) {
590 		status = srpt_stp_send_status(dbuf->db_iu->iu_stmf_task, 0);
591 		if (status == STMF_SUCCESS) {
592 			return;
593 		}
594 		stmf_dbuf->db_xfer_status = STMF_FAILURE;
595 	}
596 	stmf_data_xfer_done(dbuf->db_iu->iu_stmf_task, stmf_dbuf, 0);
597 }
598 
599 /*
600  * srpt_ch_scq_hdlr()
601  */
602 static void
srpt_ch_scq_hdlr(ibt_cq_hdl_t cq_hdl,void * arg)603 srpt_ch_scq_hdlr(ibt_cq_hdl_t cq_hdl, void *arg)
604 {
605 	ibt_status_t		status;
606 	srpt_channel_t		*ch = arg;
607 	ibt_wc_t		wc[SRPT_SEND_WC_POLL_SIZE];
608 	ibt_wc_t		*wcp;
609 	int			i;
610 	uint32_t		cq_rearmed = 0;
611 	uint32_t		entries;
612 	srpt_swqe_t		*swqe;
613 
614 	ASSERT(ch != NULL);
615 
616 	/* Reference channel for the duration of this call */
617 	srpt_ch_add_ref(ch);
618 
619 	for (;;) {
620 		status = ibt_poll_cq(cq_hdl, &wc[0], SRPT_SEND_WC_POLL_SIZE,
621 		    &entries);
622 
623 		if (status != IBT_SUCCESS) {
624 			if (status != IBT_CQ_EMPTY) {
625 				/*
626 				 * This error should not happen. It indicates
627 				 * something abnormal has gone wrong and means
628 				 * either a hardware or programming logic error.
629 				 */
630 				SRPT_DPRINTF_L2(
631 				    "ch_scq_hdlr, unexpected CQ err(%d)",
632 				    status);
633 				srpt_ch_disconnect(ch);
634 			}
635 
636 			/*
637 			 * If we have not rearmed the CQ do so now and poll to
638 			 * eliminate race; otherwise we are done.
639 			 */
640 			if (cq_rearmed == 0) {
641 				(void) ibt_enable_cq_notify(ch->ch_scq_hdl,
642 				    IBT_NEXT_COMPLETION);
643 				cq_rearmed = 1;
644 				continue;
645 			} else {
646 				break;
647 			}
648 		}
649 
650 		for (wcp = wc, i = 0; i < entries; i++, wcp++) {
651 
652 			/*
653 			 * A zero work ID indicates this CQE is associated
654 			 * with an intermediate post of a RDMA data transfer
655 			 * operation.  Since intermediate data requests are
656 			 * unsignaled, we should only get these if there was
657 			 * an error.  No action is required.
658 			 */
659 			if (wcp->wc_id == 0) {
660 				continue;
661 			}
662 			swqe = ch->ch_swqe + wcp->wc_id;
663 
664 			switch (swqe->sw_type) {
665 			case SRPT_SWQE_TYPE_RESP:
666 				srpt_ch_rsp_comp(ch, (srpt_iu_t *)
667 				    swqe->sw_addr, wcp->wc_status);
668 				break;
669 
670 			case SRPT_SWQE_TYPE_DATA:
671 				srpt_ch_data_comp(ch, (stmf_data_buf_t *)
672 				    swqe->sw_addr, wcp->wc_status);
673 				break;
674 
675 			default:
676 				SRPT_DPRINTF_L2("ch_scq_hdlr, bad type(%d)",
677 				    swqe->sw_type);
678 				ASSERT(0);
679 			}
680 
681 			srpt_ch_free_swqe_wrid(ch, wcp->wc_id);
682 		}
683 	}
684 
685 	srpt_ch_release_ref(ch, 0);
686 }
687 
688 /*
689  * srpt_ch_rcq_hdlr()
690  */
691 static void
srpt_ch_rcq_hdlr(ibt_cq_hdl_t cq_hdl,void * arg)692 srpt_ch_rcq_hdlr(ibt_cq_hdl_t cq_hdl, void *arg)
693 {
694 	ibt_status_t		status;
695 	srpt_channel_t		*ch = arg;
696 	ibt_wc_t		wc[SRPT_RECV_WC_POLL_SIZE];
697 	ibt_wc_t		*wcp;
698 	int			i;
699 	uint32_t		entries;
700 	srpt_iu_t		*iu;
701 	uint_t			cq_rearmed = 0;
702 
703 	/*
704 	 * The channel object will exists while the CQ handler call-back
705 	 * is installed.
706 	 */
707 	ASSERT(ch != NULL);
708 	srpt_ch_add_ref(ch);
709 
710 	/*
711 	 * If we know a channel disconnect has started do nothing
712 	 * and let channel cleanup code recover resources from the CQ.
713 	 * We are not concerned about races with the state transition
714 	 * since the code will do the correct thing either way. This
715 	 * is simply to circumvent rearming the CQ, and it will
716 	 * catch the state next time.
717 	 */
718 	rw_enter(&ch->ch_rwlock, RW_READER);
719 	if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) {
720 		SRPT_DPRINTF_L2("ch_rcq_hdlr, channel disconnecting");
721 		rw_exit(&ch->ch_rwlock);
722 		srpt_ch_release_ref(ch, 0);
723 		return;
724 	}
725 	rw_exit(&ch->ch_rwlock);
726 
727 	for (;;) {
728 		status = ibt_poll_cq(cq_hdl, &wc[0], SRPT_RECV_WC_POLL_SIZE,
729 		    &entries);
730 
731 		if (status != IBT_SUCCESS) {
732 			if (status != IBT_CQ_EMPTY) {
733 				/*
734 				 * This error should not happen. It indicates
735 				 * something abnormal has gone wrong and means
736 				 * either a hardware or programming logic error.
737 				 */
738 				SRPT_DPRINTF_L2(
739 				    "ch_rcq_hdlr, unexpected CQ err(%d)",
740 				    status);
741 				srpt_ch_disconnect(ch);
742 				break;
743 			}
744 
745 			/*
746 			 * If we have not rearmed the CQ do so now and poll to
747 			 * eliminate race; otherwise we are done.
748 			 */
749 			if (cq_rearmed == 0) {
750 				(void) ibt_enable_cq_notify(ch->ch_rcq_hdl,
751 				    IBT_NEXT_COMPLETION);
752 				cq_rearmed = 1;
753 				continue;
754 			} else {
755 				break;
756 			}
757 		}
758 
759 		for (wcp = wc, i = 0; i < entries; i++, wcp++) {
760 
761 			/*
762 			 *  Check wc_status before proceeding.  If the
763 			 *  status indicates a channel problem, stop processing.
764 			 */
765 			if (wcp->wc_status != IBT_WC_SUCCESS) {
766 				if (wcp->wc_status == IBT_WC_WR_FLUSHED_ERR) {
767 					SRPT_DPRINTF_L2(
768 					    "ch_rcq, unexpected"
769 					    " wc_status err(%d)",
770 					    wcp->wc_status);
771 					srpt_ch_disconnect(ch);
772 					goto done;
773 				} else {
774 					/* skip IUs with errors */
775 					SRPT_DPRINTF_L2(
776 					    "ch_rcq, ERROR comp(%d)",
777 					    wcp->wc_status);
778 					/* XXX - verify not leaking IUs */
779 					continue;
780 				}
781 			}
782 
783 			iu = (srpt_iu_t *)(uintptr_t)wcp->wc_id;
784 			ASSERT(iu != NULL);
785 
786 			/*
787 			 * Process the IU.
788 			 */
789 			ASSERT(wcp->wc_type == IBT_WRC_RECV);
790 			srpt_ch_process_iu(ch, iu);
791 		}
792 	}
793 
794 done:
795 	srpt_ch_release_ref(ch, 0);
796 }
797 
798 /*
799  * srpt_ch_srp_cmd()
800  */
801 static int
srpt_ch_srp_cmd(srpt_channel_t * ch,srpt_iu_t * iu)802 srpt_ch_srp_cmd(srpt_channel_t *ch, srpt_iu_t *iu)
803 {
804 	srp_cmd_req_t		*cmd = (srp_cmd_req_t *)iu->iu_buf;
805 	srp_indirect_desc_t	*i_desc;
806 	uint_t			i_di_cnt;
807 	uint_t			i_do_cnt;
808 	uint8_t			do_fmt;
809 	uint8_t			di_fmt;
810 	uint32_t		*cur_desc_off;
811 	int			i;
812 	ibt_status_t		status;
813 	uint8_t			addlen;
814 
815 
816 	DTRACE_SRP_2(task__command, srpt_channel_t, ch, srp_cmd_req_t, cmd);
817 	iu->iu_ch  = ch;
818 	iu->iu_tag = cmd->cr_tag;
819 
820 	/*
821 	 * The SRP specification and SAM require support for bi-directional
822 	 * data transfer, so we create a single buffer descriptor list that
823 	 * in the IU buffer that covers the data-in and data-out buffers.
824 	 * In practice we will just see unidirectional transfers with either
825 	 * data-in or data out descriptors.  If we were to take that as fact,
826 	 * we could reduce overhead slightly.
827 	 */
828 
829 	/*
830 	 * additional length is a 6-bit number in 4-byte words, so multiply by 4
831 	 * to get bytes.
832 	 */
833 	addlen = cmd->cr_add_cdb_len & 0x3f;	/* mask off 6 bits */
834 
835 	cur_desc_off = (uint32_t *)(void *)&cmd->cr_add_data;
836 	cur_desc_off  += addlen;		/* 32-bit arithmetic */
837 	iu->iu_num_rdescs = 0;
838 	iu->iu_rdescs = (srp_direct_desc_t *)(void *)cur_desc_off;
839 
840 	/*
841 	 * Examine buffer description for Data In (i.e. data flows
842 	 * to the initiator).
843 	 */
844 	i_do_cnt = i_di_cnt = 0;
845 	di_fmt = cmd->cr_buf_fmt >> 4;
846 	if (di_fmt == SRP_DATA_DESC_DIRECT) {
847 		iu->iu_num_rdescs = 1;
848 		cur_desc_off = (uint32_t *)(void *)&iu->iu_rdescs[1];
849 	} else if (di_fmt == SRP_DATA_DESC_INDIRECT) {
850 		i_desc = (srp_indirect_desc_t *)iu->iu_rdescs;
851 		i_di_cnt  = b2h32(i_desc->id_table.dd_len) /
852 		    sizeof (srp_direct_desc_t);
853 
854 		/*
855 		 * Some initiators like OFED occasionally use the wrong counts,
856 		 * so check total to allow for this.  NOTE: we do not support
857 		 * reading of the descriptor table from the initiator, so if
858 		 * not all descriptors are in the IU we drop the task.
859 		 */
860 		if (i_di_cnt > (cmd->cr_dicnt + cmd->cr_docnt)) {
861 			SRPT_DPRINTF_L2("ch_srp_cmd, remote RDMA of"
862 			    " descriptors not supported");
863 			SRPT_DPRINTF_L2("ch_srp_cmd, sizeof entry (%d),"
864 			    " i_di_cnt(%d), cr_dicnt(%d)",
865 			    (uint_t)sizeof (srp_direct_desc_t),
866 			    i_di_cnt, cmd->cr_dicnt);
867 			iu->iu_rdescs = NULL;
868 			return (1);
869 		}
870 		bcopy(&i_desc->id_desc[0], iu->iu_rdescs,
871 		    sizeof (srp_direct_desc_t) * i_di_cnt);
872 		iu->iu_num_rdescs += i_di_cnt;
873 		cur_desc_off = (uint32_t *)(void *)&i_desc->id_desc[i_di_cnt];
874 	}
875 
876 	/*
877 	 * Examine buffer description for Data Out (i.e. data flows
878 	 * from the initiator).
879 	 */
880 	do_fmt = cmd->cr_buf_fmt & 0x0F;
881 	if (do_fmt == SRP_DATA_DESC_DIRECT) {
882 		if (di_fmt == SRP_DATA_DESC_DIRECT) {
883 			bcopy(cur_desc_off, &iu->iu_rdescs[iu->iu_num_rdescs],
884 			    sizeof (srp_direct_desc_t));
885 		}
886 		iu->iu_num_rdescs++;
887 	} else if (do_fmt == SRP_DATA_DESC_INDIRECT) {
888 		i_desc = (srp_indirect_desc_t *)cur_desc_off;
889 		i_do_cnt  = b2h32(i_desc->id_table.dd_len) /
890 		    sizeof (srp_direct_desc_t);
891 
892 		/*
893 		 * Some initiators like OFED occasionally use the wrong counts,
894 		 * so check total to allow for this.  NOTE: we do not support
895 		 * reading of the descriptor table from the initiator, so if
896 		 * not all descriptors are in the IU we drop the task.
897 		 */
898 		if ((i_di_cnt + i_do_cnt) > (cmd->cr_dicnt + cmd->cr_docnt)) {
899 			SRPT_DPRINTF_L2("ch_srp_cmd, remote RDMA of"
900 			    " descriptors not supported");
901 			SRPT_DPRINTF_L2("ch_srp_cmd, sizeof entry (%d),"
902 			    " i_do_cnt(%d), cr_docnt(%d)",
903 			    (uint_t)sizeof (srp_direct_desc_t),
904 			    i_do_cnt, cmd->cr_docnt);
905 			iu->iu_rdescs = 0;
906 			return (1);
907 		}
908 		bcopy(&i_desc->id_desc[0], &iu->iu_rdescs[iu->iu_num_rdescs],
909 		    sizeof (srp_direct_desc_t) * i_do_cnt);
910 		iu->iu_num_rdescs += i_do_cnt;
911 	}
912 
913 	iu->iu_tot_xfer_len = 0;
914 	for (i = 0; i < iu->iu_num_rdescs; i++) {
915 		iu->iu_rdescs[i].dd_vaddr = b2h64(iu->iu_rdescs[i].dd_vaddr);
916 		iu->iu_rdescs[i].dd_hdl   = b2h32(iu->iu_rdescs[i].dd_hdl);
917 		iu->iu_rdescs[i].dd_len   = b2h32(iu->iu_rdescs[i].dd_len);
918 		iu->iu_tot_xfer_len += iu->iu_rdescs[i].dd_len;
919 	}
920 
921 #ifdef DEBUG
922 	if (srpt_errlevel >= SRPT_LOG_L4) {
923 		SRPT_DPRINTF_L4("ch_srp_cmd, iu->iu_tot_xfer_len (%d)",
924 		    iu->iu_tot_xfer_len);
925 		for (i = 0; i < iu->iu_num_rdescs; i++) {
926 			SRPT_DPRINTF_L4("ch_srp_cmd, rdescs[%d].dd_vaddr"
927 			    " (0x%08llx)",
928 			    i, (u_longlong_t)iu->iu_rdescs[i].dd_vaddr);
929 			SRPT_DPRINTF_L4("ch_srp_cmd, rdescs[%d].dd_hdl"
930 			    " (0x%08x)", i, iu->iu_rdescs[i].dd_hdl);
931 			SRPT_DPRINTF_L4("ch_srp_cmd, rdescs[%d].dd_len (%d)",
932 			    i, iu->iu_rdescs[i].dd_len);
933 		}
934 		SRPT_DPRINTF_L4("ch_srp_cmd, LUN (0x%08lx)",
935 		    (unsigned long int) *((uint64_t *)(void *) cmd->cr_lun));
936 	}
937 #endif
938 	rw_enter(&ch->ch_rwlock, RW_READER);
939 
940 	if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) {
941 		/*
942 		 * The channel has begun disconnecting, so ignore the
943 		 * the command returning the IU resources.
944 		 */
945 		rw_exit(&ch->ch_rwlock);
946 		return (1);
947 	}
948 
949 	/*
950 	 * Once a SCSI task is allocated and assigned to the IU, it
951 	 * owns those IU resources, which will be held until STMF
952 	 * is notified the task is done (from a lport perspective).
953 	 */
954 	iu->iu_stmf_task = stmf_task_alloc(ch->ch_tgt->tp_lport,
955 	    ch->ch_session->ss_ss, cmd->cr_lun,
956 	    SRP_CDB_SIZE + (addlen * 4), 0);
957 	if (iu->iu_stmf_task == NULL) {
958 		/*
959 		 * Could not allocate, return status to the initiator
960 		 * indicating that we are temporarily unable to process
961 		 * commands.  If unable to send, immediately return IU
962 		 * resource.
963 		 */
964 		SRPT_DPRINTF_L2("ch_srp_cmd, SCSI task allocation failure");
965 		rw_exit(&ch->ch_rwlock);
966 		mutex_enter(&iu->iu_lock);
967 		status = srpt_stp_send_response(iu, STATUS_BUSY, 0, 0, 0,
968 		    NULL, SRPT_NO_FENCE_SEND);
969 		mutex_exit(&iu->iu_lock);
970 		if (status != IBT_SUCCESS) {
971 			SRPT_DPRINTF_L2("ch_srp_cmd, error(%d) posting error"
972 			    " response", status);
973 			return (1);
974 		} else {
975 			return (0);
976 		}
977 	}
978 
979 	iu->iu_stmf_task->task_port_private = iu;
980 	iu->iu_stmf_task->task_flags = 0;
981 
982 	if (di_fmt != 0) {
983 		iu->iu_stmf_task->task_flags |= TF_WRITE_DATA;
984 	}
985 	if (do_fmt != 0) {
986 		iu->iu_stmf_task->task_flags |= TF_READ_DATA;
987 	}
988 
989 	switch (cmd->cr_task_attr) {
990 	case SRP_TSK_ATTR_QTYPE_SIMPLE:
991 		iu->iu_stmf_task->task_flags |=	TF_ATTR_SIMPLE_QUEUE;
992 		break;
993 
994 	case SRP_TSK_ATTR_QTYPE_HEAD_OF_Q:
995 		iu->iu_stmf_task->task_flags |=	TF_ATTR_HEAD_OF_QUEUE;
996 		break;
997 
998 	case SRP_TSK_ATTR_QTYPE_ORDERED:
999 		iu->iu_stmf_task->task_flags |=	TF_ATTR_ORDERED_QUEUE;
1000 		break;
1001 
1002 	case SRP_TSK_ATTR_QTYPE_ACA_Q_TAG:
1003 		iu->iu_stmf_task->task_flags |=	TF_ATTR_ACA;
1004 		break;
1005 
1006 	default:
1007 		SRPT_DPRINTF_L2("ch_srp_cmd, reserved task attr (%d)",
1008 		    cmd->cr_task_attr);
1009 		iu->iu_stmf_task->task_flags |=	TF_ATTR_ORDERED_QUEUE;
1010 		break;
1011 	}
1012 	iu->iu_stmf_task->task_additional_flags = 0;
1013 	iu->iu_stmf_task->task_priority		= 0;
1014 	iu->iu_stmf_task->task_mgmt_function    = TM_NONE;
1015 	iu->iu_stmf_task->task_max_nbufs	= STMF_BUFS_MAX;
1016 	iu->iu_stmf_task->task_expected_xfer_length = iu->iu_tot_xfer_len;
1017 	iu->iu_stmf_task->task_csn_size		= 0;
1018 
1019 	bcopy(cmd->cr_cdb, iu->iu_stmf_task->task_cdb,
1020 	    SRP_CDB_SIZE);
1021 	if (addlen != 0) {
1022 		bcopy(&cmd->cr_add_data,
1023 		    iu->iu_stmf_task->task_cdb + SRP_CDB_SIZE,
1024 		    addlen * 4);
1025 	}
1026 
1027 	/*
1028 	 * Add the IU/task to the session and post to STMF.  The task will
1029 	 * remain in the session's list until STMF is informed by SRP that
1030 	 * it is done with the task.
1031 	 */
1032 	DTRACE_SRP_3(scsi__command, srpt_channel_t, iu->iu_ch,
1033 	    scsi_task_t, iu->iu_stmf_task, srp_cmd_req_t, cmd);
1034 	srpt_stp_add_task(ch->ch_session, iu);
1035 
1036 	SRPT_DPRINTF_L3("ch_srp_cmd, new task (%p) posted",
1037 	    (void *)iu->iu_stmf_task);
1038 	stmf_post_task(iu->iu_stmf_task, NULL);
1039 	rw_exit(&ch->ch_rwlock);
1040 
1041 	return (0);
1042 }
1043 
1044 /*
1045  * srpt_ch_task_mgmt_abort()
1046  *
1047  * Returns 0 on success, indicating we've sent a management response.
1048  * Returns !0 to indicate failure; the IU should be reposted.
1049  */
1050 static ibt_status_t
srpt_ch_task_mgmt_abort(srpt_channel_t * ch,srpt_iu_t * iu,uint64_t tag_to_abort)1051 srpt_ch_task_mgmt_abort(srpt_channel_t *ch, srpt_iu_t *iu,
1052 	uint64_t tag_to_abort)
1053 {
1054 	srpt_session_t	*session = ch->ch_session;
1055 	srpt_iu_t	*ss_iu;
1056 	ibt_status_t	status;
1057 
1058 	/*
1059 	 * Locate the associated task (tag_to_abort) in the
1060 	 * session's active task list.
1061 	 */
1062 	rw_enter(&session->ss_rwlock, RW_READER);
1063 	ss_iu = list_head(&session->ss_task_list);
1064 	while (ss_iu != NULL) {
1065 		mutex_enter(&ss_iu->iu_lock);
1066 		if ((tag_to_abort == ss_iu->iu_tag)) {
1067 			mutex_exit(&ss_iu->iu_lock);
1068 			break;
1069 		}
1070 		mutex_exit(&ss_iu->iu_lock);
1071 		ss_iu = list_next(&session->ss_task_list, ss_iu);
1072 	}
1073 	rw_exit(&session->ss_rwlock);
1074 
1075 	/*
1076 	 * Take appropriate action based on state of task
1077 	 * to be aborted:
1078 	 * 1) No longer exists - do nothing.
1079 	 * 2) Previously aborted or status queued - do nothing.
1080 	 * 3) Otherwise - initiate abort.
1081 	 */
1082 	if (ss_iu == NULL)  {
1083 		goto send_mgmt_resp;
1084 	}
1085 
1086 	mutex_enter(&ss_iu->iu_lock);
1087 	if ((ss_iu->iu_flags & (SRPT_IU_STMF_ABORTING |
1088 	    SRPT_IU_ABORTED | SRPT_IU_RESP_SENT)) != 0) {
1089 		mutex_exit(&ss_iu->iu_lock);
1090 		goto send_mgmt_resp;
1091 	}
1092 
1093 	/*
1094 	 * Set aborting flag and notify STMF of abort request.  No
1095 	 * additional I/O will be queued for this IU.
1096 	 */
1097 	SRPT_DPRINTF_L3("ch_task_mgmt_abort, task found");
1098 	ss_iu->iu_flags |= SRPT_IU_SRP_ABORTING;
1099 	mutex_exit(&ss_iu->iu_lock);
1100 	stmf_abort(STMF_QUEUE_TASK_ABORT,
1101 	    ss_iu->iu_stmf_task, STMF_ABORTED, NULL);
1102 
1103 send_mgmt_resp:
1104 	mutex_enter(&iu->iu_lock);
1105 	status = srpt_stp_send_mgmt_response(iu, SRP_TM_SUCCESS,
1106 	    SRPT_FENCE_SEND);
1107 	mutex_exit(&iu->iu_lock);
1108 
1109 	if (status != IBT_SUCCESS) {
1110 		SRPT_DPRINTF_L2("ch_task_mgmt_abort, err(%d)"
1111 		    " posting abort response", status);
1112 	}
1113 
1114 	return (status);
1115 }
1116 
1117 /*
1118  * srpt_ch_srp_task_mgmt()
1119  */
1120 static int
srpt_ch_srp_task_mgmt(srpt_channel_t * ch,srpt_iu_t * iu)1121 srpt_ch_srp_task_mgmt(srpt_channel_t *ch, srpt_iu_t *iu)
1122 {
1123 	srp_tsk_mgmt_t		*tsk = (srp_tsk_mgmt_t *)iu->iu_buf;
1124 	uint8_t			tm_fn;
1125 	ibt_status_t		status;
1126 
1127 	SRPT_DPRINTF_L3("ch_srp_task_mgmt, SRP TASK MGMT func(%d)",
1128 	    tsk->tm_function);
1129 
1130 	/*
1131 	 * Both tag and lun fileds have the same corresponding offsets
1132 	 * in both srp_tsk_mgmt_t and srp_cmd_req_t structures.  The
1133 	 * casting will allow us to use the same dtrace translator.
1134 	 */
1135 	DTRACE_SRP_2(task__command, srpt_channel_t, ch,
1136 	    srp_cmd_req_t, (srp_cmd_req_t *)tsk);
1137 
1138 	iu->iu_ch  = ch;
1139 	iu->iu_tag = tsk->tm_tag;
1140 
1141 	/*
1142 	 * Task management aborts are processed directly by the SRP driver;
1143 	 * all other task management requests are handed off to STMF.
1144 	 */
1145 	switch (tsk->tm_function) {
1146 	case SRP_TSK_MGMT_ABORT_TASK:
1147 		/*
1148 		 * Initiate SCSI transport protocol specific task abort
1149 		 * logic.
1150 		 */
1151 		status = srpt_ch_task_mgmt_abort(ch, iu, tsk->tm_task_tag);
1152 		if (status != IBT_SUCCESS) {
1153 			/* repost this IU */
1154 			return (1);
1155 		} else {
1156 			return (0);
1157 		}
1158 
1159 	case SRP_TSK_MGMT_ABORT_TASK_SET:
1160 		tm_fn = TM_ABORT_TASK_SET;
1161 		break;
1162 
1163 	case SRP_TSK_MGMT_CLEAR_TASK_SET:
1164 		tm_fn = TM_CLEAR_TASK_SET;
1165 		break;
1166 
1167 	case SRP_TSK_MGMT_LUN_RESET:
1168 		tm_fn = TM_LUN_RESET;
1169 		break;
1170 
1171 	case SRP_TSK_MGMT_CLEAR_ACA:
1172 		tm_fn = TM_CLEAR_ACA;
1173 		break;
1174 
1175 	default:
1176 		/*
1177 		 * SRP does not support the requested task management
1178 		 * function; return a not supported status in the response.
1179 		 */
1180 		SRPT_DPRINTF_L2("ch_srp_task_mgmt, SRP task mgmt fn(%d)"
1181 		    " not supported", tsk->tm_function);
1182 		mutex_enter(&iu->iu_lock);
1183 		status = srpt_stp_send_mgmt_response(iu,
1184 		    SRP_TM_NOT_SUPPORTED, SRPT_NO_FENCE_SEND);
1185 		mutex_exit(&iu->iu_lock);
1186 		if (status != IBT_SUCCESS) {
1187 			SRPT_DPRINTF_L2("ch_srp_task_mgmt, err(%d) posting"
1188 			    " response", status);
1189 			return (1);
1190 		}
1191 		return (0);
1192 	}
1193 
1194 	rw_enter(&ch->ch_rwlock, RW_READER);
1195 	if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) {
1196 		/*
1197 		 * The channel has begun disconnecting, so ignore the
1198 		 * the command returning the IU resources.
1199 		 */
1200 		rw_exit(&ch->ch_rwlock);
1201 		return (1);
1202 	}
1203 
1204 	/*
1205 	 * Once a SCSI mgmt task is allocated and assigned to the IU, it
1206 	 * owns those IU resources, which will be held until we inform
1207 	 * STMF that we are done with the task (from an lports perspective).
1208 	 */
1209 	iu->iu_stmf_task = stmf_task_alloc(ch->ch_tgt->tp_lport,
1210 	    ch->ch_session->ss_ss, tsk->tm_lun, 0, STMF_TASK_EXT_NONE);
1211 	if (iu->iu_stmf_task == NULL) {
1212 		/*
1213 		 * Could not allocate, return status to the initiator
1214 		 * indicating that we are temporarily unable to process
1215 		 * commands.  If unable to send, immediately return IU
1216 		 * resource.
1217 		 */
1218 		SRPT_DPRINTF_L2("ch_srp_task_mgmt, SCSI task allocation"
1219 		    " failure");
1220 		rw_exit(&ch->ch_rwlock);
1221 		mutex_enter(&iu->iu_lock);
1222 		status = srpt_stp_send_response(iu, STATUS_BUSY, 0, 0, 0,
1223 		    NULL, SRPT_NO_FENCE_SEND);
1224 		mutex_exit(&iu->iu_lock);
1225 		if (status != IBT_SUCCESS) {
1226 			SRPT_DPRINTF_L2("ch_srp_task_mgmt, err(%d) posting"
1227 			    "busy response", status);
1228 			/* repost the IU */
1229 			return (1);
1230 		}
1231 		return (0);
1232 	}
1233 
1234 	iu->iu_stmf_task->task_port_private = iu;
1235 	iu->iu_stmf_task->task_flags = 0;
1236 	iu->iu_stmf_task->task_additional_flags =
1237 	    TASK_AF_NO_EXPECTED_XFER_LENGTH;
1238 	iu->iu_stmf_task->task_priority = 0;
1239 	iu->iu_stmf_task->task_mgmt_function = tm_fn;
1240 	iu->iu_stmf_task->task_max_nbufs = STMF_BUFS_MAX;
1241 	iu->iu_stmf_task->task_expected_xfer_length = 0;
1242 	iu->iu_stmf_task->task_csn_size = 0;
1243 
1244 	/*
1245 	 * Add the IU/task to the session and post to STMF.  The task will
1246 	 * remain in the session's list until STMF is informed by SRP that
1247 	 * it is done with the task.
1248 	 */
1249 	srpt_stp_add_task(ch->ch_session, iu);
1250 
1251 	SRPT_DPRINTF_L3("ch_srp_task_mgmt, new mgmt task(%p) posted",
1252 	    (void *)iu->iu_stmf_task);
1253 	stmf_post_task(iu->iu_stmf_task, NULL);
1254 	rw_exit(&ch->ch_rwlock);
1255 
1256 	return (0);
1257 }
1258 
1259 /*
1260  * srpt_ch_process_iu()
1261  */
1262 static void
srpt_ch_process_iu(srpt_channel_t * ch,srpt_iu_t * iu)1263 srpt_ch_process_iu(srpt_channel_t *ch, srpt_iu_t *iu)
1264 {
1265 	srpt_iu_data_t	*iud;
1266 	int		status = 1;
1267 
1268 	/*
1269 	 * IU adds reference to channel which will represent a
1270 	 * a reference by STMF.  If for whatever reason the IU
1271 	 * is not handed off to STMF, then this reference will be
1272 	 * released.  Otherwise, the reference will be released when
1273 	 * SRP informs STMF that the associated SCSI task is done.
1274 	 */
1275 	srpt_ch_add_ref(ch);
1276 
1277 	/*
1278 	 * Validate login RC channel state. Normally active, if
1279 	 * not active then we need to handle a possible race between the
1280 	 * receipt of a implied RTU and CM calling back to notify of the
1281 	 * state transition.
1282 	 */
1283 	rw_enter(&ch->ch_rwlock, RW_READER);
1284 	if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) {
1285 		rw_exit(&ch->ch_rwlock);
1286 		goto repost_iu;
1287 	}
1288 	rw_exit(&ch->ch_rwlock);
1289 
1290 	iud = iu->iu_buf;
1291 
1292 	switch (iud->rx_iu.srp_op) {
1293 	case SRP_IU_CMD:
1294 		status = srpt_ch_srp_cmd(ch, iu);
1295 		break;
1296 
1297 	case SRP_IU_TASK_MGMT:
1298 		status = srpt_ch_srp_task_mgmt(ch, iu);
1299 		return;
1300 
1301 	case SRP_IU_I_LOGOUT:
1302 		SRPT_DPRINTF_L3("ch_process_iu, SRP INITIATOR LOGOUT");
1303 		/*
1304 		 * Initiators should logout by issuing a CM disconnect
1305 		 * request (DREQ) with the logout IU in the private data;
1306 		 * however some initiators have been known to send the
1307 		 * IU in-band, if this happens just initiate the logout.
1308 		 * Note that we do not return a response as per the
1309 		 * specification.
1310 		 */
1311 		srpt_stp_logout(ch);
1312 		break;
1313 
1314 	case SRP_IU_AER_RSP:
1315 	case SRP_IU_CRED_RSP:
1316 	default:
1317 		/*
1318 		 * We don't send asynchronous events or ask for credit
1319 		 * adjustments, so nothing need be done.  Log we got an
1320 		 * unexpected IU but then just repost the IU to the SRQ.
1321 		 */
1322 		SRPT_DPRINTF_L2("ch_process_iu, invalid IU from initiator,"
1323 		    " IU opcode(%d)", iud->rx_iu.srp_op);
1324 		break;
1325 	}
1326 
1327 	if (status == 0) {
1328 		return;
1329 	}
1330 
1331 repost_iu:
1332 	SRPT_DPRINTF_L4("process_iu:  reposting iu %p", (void *)iu);
1333 	mutex_enter(&iu->iu_lock);
1334 	srpt_ioc_repost_recv_iu(iu->iu_ioc, iu);
1335 	mutex_exit(&iu->iu_lock);
1336 	srpt_ch_release_ref(ch, 0);
1337 }
1338 
1339 /*
1340  * srpt_ch_post_send
1341  */
1342 ibt_status_t
srpt_ch_post_send(srpt_channel_t * ch,srpt_iu_t * iu,uint32_t len,uint_t fence)1343 srpt_ch_post_send(srpt_channel_t *ch, srpt_iu_t *iu, uint32_t len,
1344 	uint_t fence)
1345 {
1346 	ibt_status_t		status;
1347 	ibt_send_wr_t		wr;
1348 	ibt_wr_ds_t		ds;
1349 	uint_t			posted;
1350 
1351 	ASSERT(ch != NULL);
1352 	ASSERT(iu != NULL);
1353 	ASSERT(mutex_owned(&iu->iu_lock));
1354 
1355 	rw_enter(&ch->ch_rwlock, RW_READER);
1356 	if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) {
1357 		rw_exit(&ch->ch_rwlock);
1358 		SRPT_DPRINTF_L2("ch_post_send, bad ch state (%d)",
1359 		    ch->ch_state);
1360 		return (IBT_FAILURE);
1361 	}
1362 	rw_exit(&ch->ch_rwlock);
1363 
1364 	wr.wr_id = srpt_ch_alloc_swqe_wrid(ch, SRPT_SWQE_TYPE_RESP,
1365 	    (void *)iu);
1366 	if (wr.wr_id == 0) {
1367 		SRPT_DPRINTF_L2("ch_post_send, queue full");
1368 		return (IBT_FAILURE);
1369 	}
1370 
1371 	atomic_inc_32(&iu->iu_sq_posted_cnt);
1372 
1373 	wr.wr_flags = IBT_WR_SEND_SIGNAL;
1374 	if (fence == SRPT_FENCE_SEND) {
1375 		wr.wr_flags |= IBT_WR_SEND_FENCE;
1376 	}
1377 	wr.wr_opcode = IBT_WRC_SEND;
1378 	wr.wr_trans  = IBT_RC_SRV;
1379 	wr.wr_nds = 1;
1380 	wr.wr_sgl = &ds;
1381 
1382 	ds.ds_va = iu->iu_sge.ds_va;
1383 	ds.ds_key = iu->iu_sge.ds_key;
1384 	ds.ds_len = len;
1385 
1386 	SRPT_DPRINTF_L4("ch_post_send, posting SRP response to channel"
1387 	    " ds.ds_va (0x%16llx), ds.ds_key (0x%08x), "
1388 	    " ds.ds_len (%d)",
1389 	    (u_longlong_t)ds.ds_va, ds.ds_key, ds.ds_len);
1390 
1391 	status = ibt_post_send(ch->ch_chan_hdl, &wr, 1, &posted);
1392 	if (status != IBT_SUCCESS) {
1393 		SRPT_DPRINTF_L2("ch_post_send, post_send failed (%d)",
1394 		    status);
1395 		atomic_dec_32(&iu->iu_sq_posted_cnt);
1396 		srpt_ch_free_swqe_wrid(ch, wr.wr_id);
1397 		return (status);
1398 	}
1399 
1400 	return (IBT_SUCCESS);
1401 }
1402 
1403 /*
1404  * srpt_ch_alloc_swqe_wrid()
1405  */
1406 ibt_wrid_t
srpt_ch_alloc_swqe_wrid(srpt_channel_t * ch,srpt_swqe_type_t wqe_type,void * addr)1407 srpt_ch_alloc_swqe_wrid(srpt_channel_t *ch,
1408 	srpt_swqe_type_t wqe_type, void *addr)
1409 {
1410 	ibt_wrid_t	wrid;
1411 
1412 	mutex_enter(&ch->ch_swqe_lock);
1413 	if (ch->ch_head == ch->ch_tail) {
1414 		mutex_exit(&ch->ch_swqe_lock);
1415 		return ((ibt_wrid_t)0);
1416 	}
1417 	wrid = (ibt_wrid_t)ch->ch_head;
1418 	ch->ch_swqe[ch->ch_head].sw_type = wqe_type;
1419 	ch->ch_swqe[ch->ch_head].sw_addr = addr;
1420 	ch->ch_head = ch->ch_swqe[ch->ch_head].sw_next;
1421 	ch->ch_swqe_posted++;
1422 	mutex_exit(&ch->ch_swqe_lock);
1423 	return (wrid);
1424 }
1425 
1426 /*
1427  * srpt_ch_free_swqe_wrid()
1428  */
1429 void
srpt_ch_free_swqe_wrid(srpt_channel_t * ch,ibt_wrid_t id)1430 srpt_ch_free_swqe_wrid(srpt_channel_t *ch, ibt_wrid_t id)
1431 {
1432 	mutex_enter(&ch->ch_swqe_lock);
1433 	ch->ch_swqe[ch->ch_tail].sw_next = id;
1434 	ch->ch_tail = (uint32_t)id;
1435 	ch->ch_swqe_posted--;
1436 	mutex_exit(&ch->ch_swqe_lock);
1437 }
1438