xref: /illumos-gate/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_scsi.c (revision 134379c07d59b848341b71d3c4819af39ad347cc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/conf.h>
26 #include <sys/file.h>
27 #include <sys/ddi.h>
28 #include <sys/sunddi.h>
29 #include <sys/modctl.h>
30 #include <sys/scsi/scsi.h>
31 #include <sys/scsi/impl/scsi_reset_notify.h>
32 #include <sys/scsi/generic/mode.h>
33 #include <sys/disp.h>
34 #include <sys/byteorder.h>
35 #include <sys/atomic.h>
36 #include <sys/sdt.h>
37 #include <sys/dkio.h>
38 
39 #include <stmf.h>
40 #include <lpif.h>
41 #include <portif.h>
42 #include <stmf_ioctl.h>
43 #include <stmf_sbd.h>
44 #include <stmf_sbd_ioctl.h>
45 #include <sbd_impl.h>
46 
47 #define	SCSI2_CONFLICT_FREE_CMDS(cdb)	( \
48 	/* ----------------------- */                                      \
49 	/* Refer Both		   */                                      \
50 	/* SPC-2 (rev 20) Table 10 */                                      \
51 	/* SPC-3 (rev 23) Table 31 */                                      \
52 	/* ----------------------- */                                      \
53 	((cdb[0]) == SCMD_INQUIRY)					|| \
54 	((cdb[0]) == SCMD_LOG_SENSE_G1)					|| \
55 	((cdb[0]) == SCMD_RELEASE)					|| \
56 	((cdb[0]) == SCMD_RELEASE_G1)					|| \
57 	((cdb[0]) == SCMD_REPORT_LUNS)					|| \
58 	((cdb[0]) == SCMD_REQUEST_SENSE)				|| \
59 	/* PREVENT ALLOW MEDIUM REMOVAL with prevent == 0 */               \
60 	((((cdb[0]) == SCMD_DOORLOCK) && (((cdb[4]) & 0x3) == 0)))	|| \
61 	/* SERVICE ACTION IN with READ MEDIA SERIAL NUMBER (0x01) */       \
62 	(((cdb[0]) == SCMD_SVC_ACTION_IN_G5) && (                          \
63 	    ((cdb[1]) & 0x1F) == 0x01))					|| \
64 	/* MAINTENANCE IN with service actions REPORT ALIASES (0x0Bh) */   \
65 	/* REPORT DEVICE IDENTIFIER (0x05)  REPORT PRIORITY (0x0Eh) */     \
66 	/* REPORT TARGET PORT GROUPS (0x0A) REPORT TIMESTAMP (0x0F) */     \
67 	(((cdb[0]) == SCMD_MAINTENANCE_IN) && (                            \
68 	    (((cdb[1]) & 0x1F) == 0x0B) ||                                 \
69 	    (((cdb[1]) & 0x1F) == 0x05) ||                                 \
70 	    (((cdb[1]) & 0x1F) == 0x0E) ||                                 \
71 	    (((cdb[1]) & 0x1F) == 0x0A) ||                                 \
72 	    (((cdb[1]) & 0x1F) == 0x0F)))				|| \
73 	/* ----------------------- */                                      \
74 	/* SBC-3 (rev 17) Table 3  */                                      \
75 	/* ----------------------- */                                      \
76 	/* READ CAPACITY(10) */                                            \
77 	((cdb[0]) == SCMD_READ_CAPACITY)				|| \
78 	/* READ CAPACITY(16) */                                            \
79 	(((cdb[0]) == SCMD_SVC_ACTION_IN_G4) && (                          \
80 	    ((cdb[1]) & 0x1F) == 0x10))					|| \
81 	/* START STOP UNIT with START bit 0 and POWER CONDITION 0  */      \
82 	(((cdb[0]) == SCMD_START_STOP) && (                                \
83 	    (((cdb[4]) & 0xF0) == 0) && (((cdb[4]) & 0x01) == 0))))
84 /* End of SCSI2_CONFLICT_FREE_CMDS */
85 
86 stmf_status_t sbd_lu_reset_state(stmf_lu_t *lu);
87 static void sbd_handle_sync_cache(struct scsi_task *task,
88     struct stmf_data_buf *initial_dbuf);
89 void sbd_handle_read_xfer_completion(struct scsi_task *task,
90     sbd_cmd_t *scmd, struct stmf_data_buf *dbuf);
91 void sbd_handle_short_write_xfer_completion(scsi_task_t *task,
92     stmf_data_buf_t *dbuf);
93 void sbd_handle_short_write_transfers(scsi_task_t *task,
94     stmf_data_buf_t *dbuf, uint32_t cdb_xfer_size);
95 void sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf,
96     uint32_t buflen);
97 void sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf);
98 void sbd_handle_identifying_info(scsi_task_t *task, stmf_data_buf_t *dbuf);
99 
100 extern void sbd_pgr_initialize_it(scsi_task_t *, sbd_it_data_t *);
101 extern int sbd_pgr_reservation_conflict(scsi_task_t *);
102 extern void sbd_pgr_reset(sbd_lu_t *);
103 extern void sbd_pgr_remove_it_handle(sbd_lu_t *, sbd_it_data_t *);
104 extern void sbd_handle_pgr_in_cmd(scsi_task_t *, stmf_data_buf_t *);
105 extern void sbd_handle_pgr_out_cmd(scsi_task_t *, stmf_data_buf_t *);
106 extern void sbd_handle_pgr_out_data(scsi_task_t *, stmf_data_buf_t *);
107 void sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
108     int first_xfer);
109 /*
110  * IMPORTANT NOTE:
111  * =================
112  * The whole world here is based on the assumption that everything within
113  * a scsi task executes in a single threaded manner, even the aborts.
114  * Dont ever change that. There wont be any performance gain but there
115  * will be tons of race conditions.
116  */
117 
118 void
119 sbd_do_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
120 					struct stmf_data_buf *dbuf)
121 {
122 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
123 	uint64_t laddr;
124 	uint32_t len, buflen, iolen;
125 	int ndx;
126 	int bufs_to_take;
127 
128 	/* Lets try not to hog all the buffers the port has. */
129 	bufs_to_take = ((task->task_max_nbufs > 2) &&
130 	    (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
131 	    task->task_max_nbufs;
132 
133 	len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size : scmd->len;
134 	laddr = scmd->addr + scmd->current_ro;
135 
136 	for (buflen = 0, ndx = 0; (buflen < len) &&
137 	    (ndx < dbuf->db_sglist_length); ndx++) {
138 		iolen = min(len - buflen, dbuf->db_sglist[ndx].seg_length);
139 		if (iolen == 0)
140 			break;
141 		if (sbd_data_read(sl, task, laddr, (uint64_t)iolen,
142 		    dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
143 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
144 			/* Do not need to do xfer anymore, just complete it */
145 			dbuf->db_data_size = 0;
146 			dbuf->db_xfer_status = STMF_SUCCESS;
147 			sbd_handle_read_xfer_completion(task, scmd, dbuf);
148 			return;
149 		}
150 		buflen += iolen;
151 		laddr += (uint64_t)iolen;
152 	}
153 	dbuf->db_relative_offset = scmd->current_ro;
154 	dbuf->db_data_size = buflen;
155 	dbuf->db_flags = DB_DIRECTION_TO_RPORT;
156 	(void) stmf_xfer_data(task, dbuf, 0);
157 	scmd->len -= buflen;
158 	scmd->current_ro += buflen;
159 	if (scmd->len && (scmd->nbufs < bufs_to_take)) {
160 		uint32_t maxsize, minsize, old_minsize;
161 
162 		maxsize = (scmd->len > (128*1024)) ? 128*1024 : scmd->len;
163 		minsize = maxsize >> 2;
164 		do {
165 			/*
166 			 * A bad port implementation can keep on failing the
167 			 * the request but keep on sending us a false
168 			 * minsize.
169 			 */
170 			old_minsize = minsize;
171 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
172 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
173 		    (minsize >= 512));
174 		if (dbuf == NULL) {
175 			return;
176 		}
177 		scmd->nbufs++;
178 		sbd_do_read_xfer(task, scmd, dbuf);
179 	}
180 }
181 
182 /*
183  * sbd_zcopy: Bail-out switch for reduced copy path.
184  *
185  * 0 - read & write off
186  * 1 - read & write on
187  * 2 - only read on
188  * 4 - only write on
189  */
190 int sbd_zcopy = 1;	/* enable zcopy read & write path */
191 uint32_t sbd_max_xfer_len = 0;		/* Valid if non-zero */
192 uint32_t sbd_1st_xfer_len = 0;		/* Valid if non-zero */
193 uint32_t sbd_copy_threshold = 0;		/* Valid if non-zero */
194 
195 static void
196 sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
197 {
198 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
199 	sbd_zvol_io_t *zvio;
200 	int ret, final_xfer;
201 	uint64_t offset;
202 	uint32_t xfer_len, max_len, first_len;
203 	stmf_status_t xstat;
204 	stmf_data_buf_t *dbuf;
205 	uint_t nblks;
206 	uint64_t blksize = sl->sl_blksize;
207 	size_t db_private_sz;
208 	hrtime_t xfer_start, xfer_elapsed;
209 	uintptr_t pad;
210 
211 	ASSERT(rw_read_held(&sl->sl_access_state_lock));
212 	ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
213 
214 	/*
215 	 * Calculate the limits on xfer_len to the minimum of :
216 	 *    - task limit
217 	 *    - lun limit
218 	 *    - sbd global limit if set
219 	 *    - first xfer limit if set
220 	 *
221 	 * First, protect against silly over-ride value
222 	 */
223 	if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
224 		cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
225 		    sbd_max_xfer_len);
226 		sbd_max_xfer_len = 0;
227 	}
228 	if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
229 		cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
230 		    sbd_1st_xfer_len);
231 		sbd_1st_xfer_len = 0;
232 	}
233 
234 	max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
235 	if (sbd_max_xfer_len)
236 		max_len = MIN(max_len, sbd_max_xfer_len);
237 	/*
238 	 * Special case the first xfer if hints are set.
239 	 */
240 	if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
241 		/* global over-ride has precedence */
242 		if (sbd_1st_xfer_len)
243 			first_len = sbd_1st_xfer_len;
244 		else
245 			first_len = task->task_1st_xfer_len;
246 	} else {
247 		first_len = 0;
248 	}
249 
250 	while (scmd->len && scmd->nbufs < task->task_max_nbufs) {
251 
252 		xfer_len = MIN(max_len, scmd->len);
253 		if (first_len) {
254 			xfer_len = MIN(xfer_len, first_len);
255 			first_len = 0;
256 		}
257 		if (scmd->len == xfer_len) {
258 			final_xfer = 1;
259 		} else {
260 			/*
261 			 * Attempt to end xfer on a block boundary.
262 			 * The only way this does not happen is if the
263 			 * xfer_len is small enough to stay contained
264 			 * within the same block.
265 			 */
266 			uint64_t xfer_offset, xfer_aligned_end;
267 
268 			final_xfer = 0;
269 			xfer_offset = scmd->addr + scmd->current_ro;
270 			xfer_aligned_end =
271 			    P2ALIGN(xfer_offset+xfer_len, blksize);
272 			if (xfer_aligned_end > xfer_offset)
273 				xfer_len = xfer_aligned_end - xfer_offset;
274 		}
275 		/*
276 		 * Allocate object to track the read and reserve
277 		 * enough space for scatter/gather list.
278 		 */
279 		offset = scmd->addr + scmd->current_ro;
280 		nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
281 
282 		db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
283 		    (nblks * sizeof (stmf_sglist_ent_t));
284 		dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
285 		    AF_DONTZERO);
286 		/*
287 		 * Setup the dbuf
288 		 *
289 		 * XXX Framework does not handle variable length sglists
290 		 * properly, so setup db_lu_private and db_port_private
291 		 * fields here. db_stmf_private is properly set for
292 		 * calls to stmf_free.
293 		 */
294 		if (dbuf->db_port_private == NULL) {
295 			/*
296 			 * XXX Framework assigns space to PP after db_sglist[0]
297 			 */
298 			cmn_err(CE_PANIC, "db_port_private == NULL");
299 		}
300 		pad = (uintptr_t)&dbuf->db_sglist[nblks];
301 		dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
302 		dbuf->db_port_private = NULL;
303 		dbuf->db_buf_size = xfer_len;
304 		dbuf->db_data_size = xfer_len;
305 		dbuf->db_relative_offset = scmd->current_ro;
306 		dbuf->db_sglist_length = (uint16_t)nblks;
307 		dbuf->db_xfer_status = 0;
308 		dbuf->db_handle = 0;
309 
310 		dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
311 		    DB_DIRECTION_TO_RPORT | DB_LU_DATA_BUF);
312 		if (final_xfer)
313 			dbuf->db_flags |= DB_SEND_STATUS_GOOD;
314 
315 		zvio = dbuf->db_lu_private;
316 		/* Need absolute offset for zvol access */
317 		zvio->zvio_offset = offset;
318 		zvio->zvio_flags = ZVIO_SYNC;
319 
320 		/*
321 		 * Accounting for start of read.
322 		 * Note there is no buffer address for the probe yet.
323 		 */
324 		stmf_lu_xfer_start(task);
325 		DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
326 		    uint8_t *, NULL, uint64_t, xfer_len,
327 		    uint64_t, offset, scsi_task_t *, task);
328 		xfer_start = gethrtime();
329 
330 		ret = sbd_zvol_alloc_read_bufs(sl, dbuf);
331 
332 		xfer_elapsed = gethrtime() - xfer_start;
333 
334 		stmf_lu_xfer_done(task, B_TRUE /* read */, (uint64_t)xfer_len,
335 		    xfer_elapsed);
336 		DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
337 		    uint8_t *, NULL, uint64_t, xfer_len,
338 		    uint64_t, offset, int, ret, scsi_task_t *, task);
339 
340 		if (ret != 0) {
341 			/*
342 			 * Read failure from the backend.
343 			 */
344 			stmf_free(dbuf);
345 			if (scmd->nbufs == 0) {
346 				/* nothing queued, just finish */
347 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
348 				stmf_scsilib_send_status(task, STATUS_CHECK,
349 				    STMF_SAA_READ_ERROR);
350 				rw_exit(&sl->sl_access_state_lock);
351 			} else {
352 				/* process failure when other dbufs finish */
353 				scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
354 			}
355 			return;
356 		}
357 
358 
359 		/*
360 		 * Allow PP to do setup
361 		 */
362 		xstat = stmf_setup_dbuf(task, dbuf, 0);
363 		if (xstat != STMF_SUCCESS) {
364 			/*
365 			 * This could happen if the driver cannot get the
366 			 * DDI resources it needs for this request.
367 			 * If other dbufs are queued, try again when the next
368 			 * one completes, otherwise give up.
369 			 */
370 			sbd_zvol_rele_read_bufs(sl, dbuf);
371 			stmf_free(dbuf);
372 			if (scmd->nbufs > 0) {
373 				/* completion of previous dbuf will retry */
374 				return;
375 			}
376 			/*
377 			 * Done with this command.
378 			 */
379 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
380 			if (first_xfer)
381 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
382 			else
383 				stmf_scsilib_send_status(task, STATUS_CHECK,
384 				    STMF_SAA_READ_ERROR);
385 			rw_exit(&sl->sl_access_state_lock);
386 			return;
387 		}
388 		/*
389 		 * dbuf is now queued on task
390 		 */
391 		scmd->nbufs++;
392 
393 		/* XXX leave this in for FW? */
394 		DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
395 		    struct stmf_data_buf *, dbuf, uint64_t, offset,
396 		    uint32_t, xfer_len);
397 		/*
398 		 * Do not pass STMF_IOF_LU_DONE so that the zvol
399 		 * state can be released in the completion callback.
400 		 */
401 		xstat = stmf_xfer_data(task, dbuf, 0);
402 		switch (xstat) {
403 		case STMF_SUCCESS:
404 			break;
405 		case STMF_BUSY:
406 			/*
407 			 * The dbuf is queued on the task, but unknown
408 			 * to the PP, thus no completion will occur.
409 			 */
410 			sbd_zvol_rele_read_bufs(sl, dbuf);
411 			stmf_teardown_dbuf(task, dbuf);
412 			stmf_free(dbuf);
413 			scmd->nbufs--;
414 			if (scmd->nbufs > 0) {
415 				/* completion of previous dbuf will retry */
416 				return;
417 			}
418 			/*
419 			 * Done with this command.
420 			 */
421 			rw_exit(&sl->sl_access_state_lock);
422 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
423 			if (first_xfer)
424 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
425 			else
426 				stmf_scsilib_send_status(task, STATUS_CHECK,
427 				    STMF_SAA_READ_ERROR);
428 			return;
429 		case STMF_ABORTED:
430 			/*
431 			 * Completion from task_done will cleanup
432 			 */
433 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
434 			return;
435 		}
436 		/*
437 		 * Update the xfer progress.
438 		 */
439 		ASSERT(scmd->len >= xfer_len);
440 		scmd->len -= xfer_len;
441 		scmd->current_ro += xfer_len;
442 	}
443 }
444 
445 void
446 sbd_handle_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
447 				struct stmf_data_buf *dbuf)
448 {
449 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
450 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
451 		    dbuf->db_xfer_status, NULL);
452 		return;
453 	}
454 	task->task_nbytes_transferred += dbuf->db_data_size;
455 	if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
456 		stmf_free_dbuf(task, dbuf);
457 		scmd->nbufs--;
458 		if (scmd->nbufs)
459 			return;	/* wait for all buffers to complete */
460 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
461 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL)
462 			stmf_scsilib_send_status(task, STATUS_CHECK,
463 			    STMF_SAA_READ_ERROR);
464 		else
465 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
466 		return;
467 	}
468 	if (dbuf->db_flags & DB_DONT_REUSE) {
469 		/* allocate new dbuf */
470 		uint32_t maxsize, minsize, old_minsize;
471 		stmf_free_dbuf(task, dbuf);
472 
473 		maxsize = (scmd->len > (128*1024)) ? 128*1024 : scmd->len;
474 		minsize = maxsize >> 2;
475 		do {
476 			old_minsize = minsize;
477 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
478 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
479 		    (minsize >= 512));
480 		if (dbuf == NULL) {
481 			scmd->nbufs --;
482 			if (scmd->nbufs == 0) {
483 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
484 				    STMF_ALLOC_FAILURE, NULL);
485 			}
486 			return;
487 		}
488 	}
489 	sbd_do_read_xfer(task, scmd, dbuf);
490 }
491 
492 /*
493  * This routine must release the DMU resources and free the dbuf
494  * in all cases.  If this is the final dbuf of the task, then drop
495  * the reader lock on the LU state. If there are no errors and more
496  * work to do, then queue more xfer operations.
497  */
498 void
499 sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
500 				struct stmf_data_buf *dbuf)
501 {
502 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
503 	stmf_status_t xfer_status;
504 	uint32_t data_size;
505 	int scmd_err;
506 
507 	ASSERT(dbuf->db_lu_private);
508 	ASSERT(scmd->cmd_type == SBD_CMD_SCSI_READ);
509 
510 	scmd->nbufs--;	/* account for this dbuf */
511 	/*
512 	 * Release the DMU resources.
513 	 */
514 	sbd_zvol_rele_read_bufs(sl, dbuf);
515 	/*
516 	 * Release the dbuf after retrieving needed fields.
517 	 */
518 	xfer_status = dbuf->db_xfer_status;
519 	data_size = dbuf->db_data_size;
520 	stmf_teardown_dbuf(task, dbuf);
521 	stmf_free(dbuf);
522 	/*
523 	 * Release the state lock if this is the last completion.
524 	 * If this is the last dbuf on task and all data has been
525 	 * transferred or an error encountered, then no more dbufs
526 	 * will be queued.
527 	 */
528 	scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
529 	    (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
530 	    (xfer_status != STMF_SUCCESS));
531 	if (scmd->nbufs == 0 && (scmd->len == 0 || scmd_err)) {
532 		/* all DMU state has been released */
533 		rw_exit(&sl->sl_access_state_lock);
534 	}
535 
536 	/*
537 	 * If there have been no errors, either complete the task
538 	 * or issue more data xfer operations.
539 	 */
540 	if (!scmd_err) {
541 		/*
542 		 * This chunk completed successfully
543 		 */
544 		task->task_nbytes_transferred += data_size;
545 		if (scmd->nbufs == 0 && scmd->len == 0) {
546 			/*
547 			 * This command completed successfully
548 			 *
549 			 * Status was sent along with data, so no status
550 			 * completion will occur. Tell stmf we are done.
551 			 */
552 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
553 			stmf_task_lu_done(task);
554 			return;
555 		}
556 		/*
557 		 * Start more xfers
558 		 */
559 		sbd_do_sgl_read_xfer(task, scmd, 0);
560 		return;
561 	}
562 	/*
563 	 * Sort out the failure
564 	 */
565 	if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
566 		/*
567 		 * If a previous error occurred, leave the command active
568 		 * and wait for the last completion to send the status check.
569 		 */
570 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
571 			if (scmd->nbufs == 0) {
572 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
573 				stmf_scsilib_send_status(task, STATUS_CHECK,
574 				    STMF_SAA_READ_ERROR);
575 			}
576 			return;
577 		}
578 		/*
579 		 * Must have been a failure on current dbuf
580 		 */
581 		ASSERT(xfer_status != STMF_SUCCESS);
582 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
583 		stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
584 	}
585 }
586 
587 void
588 sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
589 				struct stmf_data_buf *dbuf)
590 {
591 	sbd_zvol_io_t *zvio = dbuf->db_lu_private;
592 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
593 	int ret;
594 	int scmd_err, scmd_xfer_done;
595 	stmf_status_t xfer_status = dbuf->db_xfer_status;
596 	uint32_t data_size = dbuf->db_data_size;
597 	hrtime_t xfer_start;
598 
599 	ASSERT(zvio);
600 
601 	/*
602 	 * Allow PP to free up resources before releasing the write bufs
603 	 * as writing to the backend could take some time.
604 	 */
605 	stmf_teardown_dbuf(task, dbuf);
606 
607 	scmd->nbufs--;	/* account for this dbuf */
608 	/*
609 	 * All data was queued and this is the last completion,
610 	 * but there could still be an error.
611 	 */
612 	scmd_xfer_done = (scmd->len == 0 && scmd->nbufs == 0);
613 	scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
614 	    (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
615 	    (xfer_status != STMF_SUCCESS));
616 
617 	/* start the accounting clock */
618 	stmf_lu_xfer_start(task);
619 	DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
620 	    uint8_t *, NULL, uint64_t, data_size,
621 	    uint64_t, zvio->zvio_offset, scsi_task_t *, task);
622 	xfer_start = gethrtime();
623 
624 	if (scmd_err) {
625 		/* just return the write buffers */
626 		sbd_zvol_rele_write_bufs_abort(sl, dbuf);
627 		ret = 0;
628 	} else {
629 		if (scmd_xfer_done)
630 			zvio->zvio_flags = ZVIO_COMMIT;
631 		else
632 			zvio->zvio_flags = 0;
633 		/* write the data */
634 		ret = sbd_zvol_rele_write_bufs(sl, dbuf);
635 	}
636 
637 	/* finalize accounting */
638 	stmf_lu_xfer_done(task, B_FALSE /* not read */, data_size,
639 	    (gethrtime() - xfer_start));
640 	DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
641 	    uint8_t *, NULL, uint64_t, data_size,
642 	    uint64_t, zvio->zvio_offset, int, ret,  scsi_task_t *, task);
643 
644 	if (ret != 0) {
645 		/* update the error flag */
646 		scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
647 		scmd_err = 1;
648 	}
649 
650 	/* Release the dbuf */
651 	stmf_free(dbuf);
652 
653 	/*
654 	 * Release the state lock if this is the last completion.
655 	 * If this is the last dbuf on task and all data has been
656 	 * transferred or an error encountered, then no more dbufs
657 	 * will be queued.
658 	 */
659 	if (scmd->nbufs == 0 && (scmd->len == 0 || scmd_err)) {
660 		/* all DMU state has been released */
661 		rw_exit(&sl->sl_access_state_lock);
662 	}
663 	/*
664 	 * If there have been no errors, either complete the task
665 	 * or issue more data xfer operations.
666 	 */
667 	if (!scmd_err) {
668 		/* This chunk completed successfully */
669 		task->task_nbytes_transferred += data_size;
670 		if (scmd_xfer_done) {
671 			/* This command completed successfully */
672 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
673 			if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
674 			    (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
675 				stmf_scsilib_send_status(task, STATUS_CHECK,
676 				    STMF_SAA_WRITE_ERROR);
677 			} else {
678 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
679 			}
680 			return;
681 		}
682 		/*
683 		 * Start more xfers
684 		 */
685 		sbd_do_sgl_write_xfer(task, scmd, 0);
686 		return;
687 	}
688 	/*
689 	 * Sort out the failure
690 	 */
691 	if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
692 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
693 			if (scmd->nbufs == 0) {
694 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
695 				stmf_scsilib_send_status(task, STATUS_CHECK,
696 				    STMF_SAA_WRITE_ERROR);
697 			}
698 			/*
699 			 * Leave the command active until last dbuf completes.
700 			 */
701 			return;
702 		}
703 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
704 		ASSERT(xfer_status != STMF_SUCCESS);
705 		stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
706 	}
707 }
708 
709 /*
710  * Handle a copy operation using the zvol interface.
711  *
712  * Similar to the sbd_data_read/write path, except it goes directly through
713  * the zvol interfaces. It can pass a port provider sglist in the
714  * form of uio which is lost through the vn_rdwr path.
715  *
716  * Returns:
717  *	STMF_SUCCESS - request handled
718  *	STMF_FAILURE - request not handled, caller must deal with error
719  */
720 static stmf_status_t
721 sbd_copy_rdwr(scsi_task_t *task, uint64_t laddr, stmf_data_buf_t *dbuf,
722     int cmd, int commit)
723 {
724 	sbd_lu_t		*sl = task->task_lu->lu_provider_private;
725 	struct uio		uio;
726 	struct iovec		*iov, *tiov, iov1[8];
727 	uint32_t		len, resid;
728 	int			ret, i, iovcnt, flags;
729 	hrtime_t		xfer_start;
730 	boolean_t		is_read;
731 
732 	ASSERT(cmd == SBD_CMD_SCSI_READ || cmd == SBD_CMD_SCSI_WRITE);
733 
734 	is_read = (cmd == SBD_CMD_SCSI_READ) ? B_TRUE : B_FALSE;
735 	iovcnt = dbuf->db_sglist_length;
736 	/* use the stack for small iovecs */
737 	if (iovcnt > 8) {
738 		iov = kmem_alloc(iovcnt * sizeof (*iov), KM_SLEEP);
739 	} else {
740 		iov = &iov1[0];
741 	}
742 
743 	/* Convert dbuf sglist to iovec format */
744 	len = dbuf->db_data_size;
745 	resid = len;
746 	tiov = iov;
747 	for (i = 0; i < iovcnt; i++) {
748 		tiov->iov_base = (caddr_t)dbuf->db_sglist[i].seg_addr;
749 		tiov->iov_len = MIN(resid, dbuf->db_sglist[i].seg_length);
750 		resid -= tiov->iov_len;
751 		tiov++;
752 	}
753 	if (resid != 0) {
754 		cmn_err(CE_WARN, "inconsistant sglist rem %d", resid);
755 		if (iov != &iov1[0])
756 			kmem_free(iov, iovcnt * sizeof (*iov));
757 		return (STMF_FAILURE);
758 	}
759 	/* Setup the uio struct */
760 	uio.uio_iov = iov;
761 	uio.uio_iovcnt = iovcnt;
762 	uio.uio_loffset = laddr;
763 	uio.uio_segflg = (short)UIO_SYSSPACE;
764 	uio.uio_resid = (uint64_t)len;
765 	uio.uio_llimit = RLIM64_INFINITY;
766 
767 	/* start the accounting clock */
768 	stmf_lu_xfer_start(task);
769 	xfer_start = gethrtime();
770 	if (is_read == B_TRUE) {
771 		uio.uio_fmode = FREAD;
772 		uio.uio_extflg = UIO_COPY_CACHED;
773 		DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
774 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
775 		    scsi_task_t *, task);
776 
777 		/* Fetch the data */
778 		ret = sbd_zvol_copy_read(sl, &uio);
779 
780 		DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
781 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
782 		    scsi_task_t *, task);
783 	} else {
784 		uio.uio_fmode = FWRITE;
785 		uio.uio_extflg = UIO_COPY_DEFAULT;
786 		DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
787 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
788 		    scsi_task_t *, task);
789 
790 		flags = (commit) ? ZVIO_COMMIT : 0;
791 		/* Write the data */
792 		ret = sbd_zvol_copy_write(sl, &uio, flags);
793 
794 		DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
795 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
796 		    scsi_task_t *, task);
797 	}
798 	/* finalize accounting */
799 	stmf_lu_xfer_done(task, is_read, (uint64_t)len,
800 	    (gethrtime() - xfer_start));
801 
802 	if (iov != &iov1[0])
803 		kmem_free(iov, iovcnt * sizeof (*iov));
804 	if (ret != 0) {
805 		/* Backend I/O error */
806 		return (STMF_FAILURE);
807 	}
808 	return (STMF_SUCCESS);
809 }
810 
811 void
812 sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
813 {
814 	uint64_t lba, laddr;
815 	uint32_t len;
816 	uint8_t op = task->task_cdb[0];
817 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
818 	sbd_cmd_t *scmd;
819 	stmf_data_buf_t *dbuf;
820 	int fast_path;
821 
822 	if (op == SCMD_READ) {
823 		lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
824 		len = (uint32_t)task->task_cdb[4];
825 
826 		if (len == 0) {
827 			len = 256;
828 		}
829 	} else if (op == SCMD_READ_G1) {
830 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
831 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
832 	} else if (op == SCMD_READ_G5) {
833 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
834 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
835 	} else if (op == SCMD_READ_G4) {
836 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
837 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
838 	} else {
839 		stmf_scsilib_send_status(task, STATUS_CHECK,
840 		    STMF_SAA_INVALID_OPCODE);
841 		return;
842 	}
843 
844 	laddr = lba << sl->sl_data_blocksize_shift;
845 	len <<= sl->sl_data_blocksize_shift;
846 
847 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
848 		stmf_scsilib_send_status(task, STATUS_CHECK,
849 		    STMF_SAA_LBA_OUT_OF_RANGE);
850 		return;
851 	}
852 
853 	task->task_cmd_xfer_length = len;
854 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
855 		task->task_expected_xfer_length = len;
856 	}
857 
858 	if (len != task->task_expected_xfer_length) {
859 		fast_path = 0;
860 		len = (len > task->task_expected_xfer_length) ?
861 		    task->task_expected_xfer_length : len;
862 	} else {
863 		fast_path = 1;
864 	}
865 
866 	if (len == 0) {
867 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
868 		return;
869 	}
870 
871 	/*
872 	 * Determine if this read can directly use DMU buffers.
873 	 */
874 	if (sbd_zcopy & (2|1) &&		/* Debug switch */
875 	    initial_dbuf == NULL &&		/* No PP buffer passed in */
876 	    sl->sl_flags & SL_CALL_ZVOL &&	/* zvol backing store */
877 	    (task->task_additional_flags &
878 	    TASK_AF_ACCEPT_LU_DBUF))		/* PP allows it */
879 	{
880 		/*
881 		 * Reduced copy path
882 		 */
883 		uint32_t copy_threshold, minsize;
884 		int ret;
885 
886 		/*
887 		 * The sl_access_state_lock will be held shared
888 		 * for the entire request and released when all
889 		 * dbufs have completed.
890 		 */
891 		rw_enter(&sl->sl_access_state_lock, RW_READER);
892 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
893 			rw_exit(&sl->sl_access_state_lock);
894 			stmf_scsilib_send_status(task, STATUS_CHECK,
895 			    STMF_SAA_READ_ERROR);
896 			return;
897 		}
898 
899 		/*
900 		 * Check if setup is more expensive than copying the data.
901 		 *
902 		 * Use the global over-ride sbd_zcopy_threshold if set.
903 		 */
904 		copy_threshold = (sbd_copy_threshold > 0) ?
905 		    sbd_copy_threshold : task->task_copy_threshold;
906 		minsize = len;
907 		if (len < copy_threshold &&
908 		    (dbuf = stmf_alloc_dbuf(task, len, &minsize, 0)) != 0) {
909 
910 			ret = sbd_copy_rdwr(task, laddr, dbuf,
911 			    SBD_CMD_SCSI_READ, 0);
912 			/* done with the backend */
913 			rw_exit(&sl->sl_access_state_lock);
914 			if (ret != 0) {
915 				/* backend error */
916 				stmf_scsilib_send_status(task, STATUS_CHECK,
917 				    STMF_SAA_READ_ERROR);
918 			} else {
919 				/* send along good data */
920 				dbuf->db_relative_offset = 0;
921 				dbuf->db_data_size = len;
922 				dbuf->db_flags = DB_SEND_STATUS_GOOD |
923 				    DB_DIRECTION_TO_RPORT;
924 				/* XXX keep for FW? */
925 				DTRACE_PROBE4(sbd__xfer,
926 				    struct scsi_task *, task,
927 				    struct stmf_data_buf *, dbuf,
928 				    uint64_t, laddr, uint32_t, len);
929 				(void) stmf_xfer_data(task, dbuf,
930 				    STMF_IOF_LU_DONE);
931 			}
932 			return;
933 		}
934 
935 		/* committed to reduced copy */
936 		if (task->task_lu_private) {
937 			scmd = (sbd_cmd_t *)task->task_lu_private;
938 		} else {
939 			scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
940 			    KM_SLEEP);
941 			task->task_lu_private = scmd;
942 		}
943 		/*
944 		 * Setup scmd to track read progress.
945 		 */
946 		scmd->flags = SBD_SCSI_CMD_ACTIVE;
947 		scmd->cmd_type = SBD_CMD_SCSI_READ;
948 		scmd->nbufs = 0;
949 		scmd->addr = laddr;
950 		scmd->len = len;
951 		scmd->current_ro = 0;
952 
953 		/*
954 		 * Kick-off the read.
955 		 */
956 		sbd_do_sgl_read_xfer(task, scmd, 1);
957 		return;
958 	}
959 
960 	if (initial_dbuf == NULL) {
961 		uint32_t maxsize, minsize, old_minsize;
962 
963 		maxsize = (len > (128*1024)) ? 128*1024 : len;
964 		minsize = maxsize >> 2;
965 		do {
966 			old_minsize = minsize;
967 			initial_dbuf = stmf_alloc_dbuf(task, maxsize,
968 			    &minsize, 0);
969 		} while ((initial_dbuf == NULL) && (old_minsize > minsize) &&
970 		    (minsize >= 512));
971 		if (initial_dbuf == NULL) {
972 			stmf_scsilib_send_status(task, STATUS_QFULL, 0);
973 			return;
974 		}
975 	}
976 	dbuf = initial_dbuf;
977 
978 	if ((dbuf->db_buf_size >= len) && fast_path &&
979 	    (dbuf->db_sglist_length == 1)) {
980 		if (sbd_data_read(sl, task, laddr, (uint64_t)len,
981 		    dbuf->db_sglist[0].seg_addr) == STMF_SUCCESS) {
982 			dbuf->db_relative_offset = 0;
983 			dbuf->db_data_size = len;
984 			dbuf->db_flags = DB_SEND_STATUS_GOOD |
985 			    DB_DIRECTION_TO_RPORT;
986 			/* XXX keep for FW? */
987 			DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
988 			    struct stmf_data_buf *, dbuf,
989 			    uint64_t, laddr, uint32_t, len);
990 			(void) stmf_xfer_data(task, dbuf, STMF_IOF_LU_DONE);
991 		} else {
992 			stmf_scsilib_send_status(task, STATUS_CHECK,
993 			    STMF_SAA_READ_ERROR);
994 		}
995 		return;
996 	}
997 
998 	if (task->task_lu_private) {
999 		scmd = (sbd_cmd_t *)task->task_lu_private;
1000 	} else {
1001 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1002 		task->task_lu_private = scmd;
1003 	}
1004 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1005 	scmd->cmd_type = SBD_CMD_SCSI_READ;
1006 	scmd->nbufs = 1;
1007 	scmd->addr = laddr;
1008 	scmd->len = len;
1009 	scmd->current_ro = 0;
1010 
1011 	sbd_do_read_xfer(task, scmd, dbuf);
1012 }
1013 
1014 void
1015 sbd_do_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
1016     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
1017 {
1018 	uint32_t len;
1019 	int bufs_to_take;
1020 
1021 	if (scmd->len == 0) {
1022 		goto DO_WRITE_XFER_DONE;
1023 	}
1024 
1025 	/* Lets try not to hog all the buffers the port has. */
1026 	bufs_to_take = ((task->task_max_nbufs > 2) &&
1027 	    (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
1028 	    task->task_max_nbufs;
1029 
1030 	if ((dbuf != NULL) &&
1031 	    ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
1032 		/* free current dbuf and allocate a new one */
1033 		stmf_free_dbuf(task, dbuf);
1034 		dbuf = NULL;
1035 	}
1036 	if (scmd->nbufs >= bufs_to_take) {
1037 		goto DO_WRITE_XFER_DONE;
1038 	}
1039 	if (dbuf == NULL) {
1040 		uint32_t maxsize, minsize, old_minsize;
1041 
1042 		maxsize = (scmd->len > (128*1024)) ? 128*1024 :
1043 		    scmd->len;
1044 		minsize = maxsize >> 2;
1045 		do {
1046 			old_minsize = minsize;
1047 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
1048 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
1049 		    (minsize >= 512));
1050 		if (dbuf == NULL) {
1051 			if (scmd->nbufs == 0) {
1052 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1053 				    STMF_ALLOC_FAILURE, NULL);
1054 			}
1055 			return;
1056 		}
1057 	}
1058 
1059 	len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size :
1060 	    scmd->len;
1061 
1062 	dbuf->db_relative_offset = scmd->current_ro;
1063 	dbuf->db_data_size = len;
1064 	dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
1065 	(void) stmf_xfer_data(task, dbuf, 0);
1066 	scmd->nbufs++; /* outstanding port xfers and bufs used */
1067 	scmd->len -= len;
1068 	scmd->current_ro += len;
1069 
1070 	if ((scmd->len != 0) && (scmd->nbufs < bufs_to_take)) {
1071 		sbd_do_write_xfer(task, scmd, NULL, 0);
1072 	}
1073 	return;
1074 
1075 DO_WRITE_XFER_DONE:
1076 	if (dbuf != NULL) {
1077 		stmf_free_dbuf(task, dbuf);
1078 	}
1079 }
1080 
1081 void
1082 sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
1083 {
1084 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1085 	sbd_zvol_io_t *zvio;
1086 	int ret;
1087 	uint32_t xfer_len, max_len, first_len;
1088 	stmf_status_t xstat;
1089 	stmf_data_buf_t *dbuf;
1090 	uint_t nblks;
1091 	uint64_t blksize = sl->sl_blksize;
1092 	uint64_t offset;
1093 	size_t db_private_sz;
1094 	uintptr_t pad;
1095 
1096 	ASSERT(rw_read_held(&sl->sl_access_state_lock));
1097 	ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
1098 
1099 	/*
1100 	 * Calculate the limits on xfer_len to the minimum of :
1101 	 *    - task limit
1102 	 *    - lun limit
1103 	 *    - sbd global limit if set
1104 	 *    - first xfer limit if set
1105 	 *
1106 	 * First, protect against silly over-ride value
1107 	 */
1108 	if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
1109 		cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
1110 		    sbd_max_xfer_len);
1111 		sbd_max_xfer_len = 0;
1112 	}
1113 	if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
1114 		cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
1115 		    sbd_1st_xfer_len);
1116 		sbd_1st_xfer_len = 0;
1117 	}
1118 
1119 	max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
1120 	if (sbd_max_xfer_len)
1121 		max_len = MIN(max_len, sbd_max_xfer_len);
1122 	/*
1123 	 * Special case the first xfer if hints are set.
1124 	 */
1125 	if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
1126 		/* global over-ride has precedence */
1127 		if (sbd_1st_xfer_len)
1128 			first_len = sbd_1st_xfer_len;
1129 		else
1130 			first_len = task->task_1st_xfer_len;
1131 	} else {
1132 		first_len = 0;
1133 	}
1134 
1135 
1136 	while (scmd->len && scmd->nbufs < task->task_max_nbufs) {
1137 
1138 		xfer_len = MIN(max_len, scmd->len);
1139 		if (first_len) {
1140 			xfer_len = MIN(xfer_len, first_len);
1141 			first_len = 0;
1142 		}
1143 		if (xfer_len < scmd->len) {
1144 			/*
1145 			 * Attempt to end xfer on a block boundary.
1146 			 * The only way this does not happen is if the
1147 			 * xfer_len is small enough to stay contained
1148 			 * within the same block.
1149 			 */
1150 			uint64_t xfer_offset, xfer_aligned_end;
1151 
1152 			xfer_offset = scmd->addr + scmd->current_ro;
1153 			xfer_aligned_end =
1154 			    P2ALIGN(xfer_offset+xfer_len, blksize);
1155 			if (xfer_aligned_end > xfer_offset)
1156 				xfer_len = xfer_aligned_end - xfer_offset;
1157 		}
1158 		/*
1159 		 * Allocate object to track the write and reserve
1160 		 * enough space for scatter/gather list.
1161 		 */
1162 		offset = scmd->addr + scmd->current_ro;
1163 		nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
1164 		db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
1165 		    (nblks * sizeof (stmf_sglist_ent_t));
1166 		dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
1167 		    AF_DONTZERO);
1168 
1169 		/*
1170 		 * Setup the dbuf
1171 		 *
1172 		 * XXX Framework does not handle variable length sglists
1173 		 * properly, so setup db_lu_private and db_port_private
1174 		 * fields here. db_stmf_private is properly set for
1175 		 * calls to stmf_free.
1176 		 */
1177 		if (dbuf->db_port_private == NULL) {
1178 			/*
1179 			 * XXX Framework assigns space to PP after db_sglist[0]
1180 			 */
1181 			cmn_err(CE_PANIC, "db_port_private == NULL");
1182 		}
1183 		pad = (uintptr_t)&dbuf->db_sglist[nblks];
1184 		dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
1185 		dbuf->db_port_private = NULL;
1186 		dbuf->db_buf_size = xfer_len;
1187 		dbuf->db_data_size = xfer_len;
1188 		dbuf->db_relative_offset = scmd->current_ro;
1189 		dbuf->db_sglist_length = (uint16_t)nblks;
1190 		dbuf->db_xfer_status = 0;
1191 		dbuf->db_handle = 0;
1192 		dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
1193 		    DB_DIRECTION_FROM_RPORT | DB_LU_DATA_BUF);
1194 
1195 		zvio = dbuf->db_lu_private;
1196 		zvio->zvio_offset = offset;
1197 
1198 		/* get the buffers */
1199 		ret = sbd_zvol_alloc_write_bufs(sl, dbuf);
1200 		if (ret != 0) {
1201 			/*
1202 			 * Could not allocate buffers from the backend;
1203 			 * treat it like an IO error.
1204 			 */
1205 			stmf_free(dbuf);
1206 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1207 			if (scmd->nbufs == 0) {
1208 				/*
1209 				 * Nothing queued, so no completions coming
1210 				 */
1211 				stmf_scsilib_send_status(task, STATUS_CHECK,
1212 				    STMF_SAA_WRITE_ERROR);
1213 				rw_exit(&sl->sl_access_state_lock);
1214 			}
1215 			/*
1216 			 * Completions of previous buffers will cleanup.
1217 			 */
1218 			return;
1219 		}
1220 
1221 		/*
1222 		 * Allow PP to do setup
1223 		 */
1224 		xstat = stmf_setup_dbuf(task, dbuf, 0);
1225 		if (xstat != STMF_SUCCESS) {
1226 			/*
1227 			 * This could happen if the driver cannot get the
1228 			 * DDI resources it needs for this request.
1229 			 * If other dbufs are queued, try again when the next
1230 			 * one completes, otherwise give up.
1231 			 */
1232 			sbd_zvol_rele_write_bufs_abort(sl, dbuf);
1233 			stmf_free(dbuf);
1234 			if (scmd->nbufs > 0) {
1235 				/* completion of previous dbuf will retry */
1236 				return;
1237 			}
1238 			/*
1239 			 * Done with this command.
1240 			 */
1241 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1242 			if (first_xfer)
1243 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1244 			else
1245 				stmf_scsilib_send_status(task, STATUS_CHECK,
1246 				    STMF_SAA_WRITE_ERROR);
1247 			rw_exit(&sl->sl_access_state_lock);
1248 			return;
1249 		}
1250 
1251 		/*
1252 		 * dbuf is now queued on task
1253 		 */
1254 		scmd->nbufs++;
1255 
1256 		xstat = stmf_xfer_data(task, dbuf, 0);
1257 		switch (xstat) {
1258 		case STMF_SUCCESS:
1259 			break;
1260 		case STMF_BUSY:
1261 			/*
1262 			 * The dbuf is queued on the task, but unknown
1263 			 * to the PP, thus no completion will occur.
1264 			 */
1265 			sbd_zvol_rele_write_bufs_abort(sl, dbuf);
1266 			stmf_teardown_dbuf(task, dbuf);
1267 			stmf_free(dbuf);
1268 			scmd->nbufs--;
1269 			if (scmd->nbufs > 0) {
1270 				/* completion of previous dbuf will retry */
1271 				return;
1272 			}
1273 			/*
1274 			 * Done with this command.
1275 			 */
1276 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1277 			if (first_xfer)
1278 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1279 			else
1280 				stmf_scsilib_send_status(task, STATUS_CHECK,
1281 				    STMF_SAA_WRITE_ERROR);
1282 			rw_exit(&sl->sl_access_state_lock);
1283 			return;
1284 		case STMF_ABORTED:
1285 			/*
1286 			 * Completion code will cleanup.
1287 			 */
1288 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1289 			return;
1290 		}
1291 		/*
1292 		 * Update the xfer progress.
1293 		 */
1294 		scmd->len -= xfer_len;
1295 		scmd->current_ro += xfer_len;
1296 	}
1297 }
1298 
1299 void
1300 sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
1301     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
1302 {
1303 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1304 	uint64_t laddr;
1305 	uint32_t buflen, iolen;
1306 	int ndx;
1307 
1308 	if (scmd->nbufs > 0) {
1309 		/*
1310 		 * Decrement the count to indicate the port xfer
1311 		 * into the dbuf has completed even though the buf is
1312 		 * still in use here in the LU provider.
1313 		 */
1314 		scmd->nbufs--;
1315 	}
1316 
1317 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
1318 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1319 		    dbuf->db_xfer_status, NULL);
1320 		return;
1321 	}
1322 
1323 	if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1324 		goto WRITE_XFER_DONE;
1325 	}
1326 
1327 	if (scmd->len != 0) {
1328 		/*
1329 		 * Initiate the next port xfer to occur in parallel
1330 		 * with writing this buf.
1331 		 */
1332 		sbd_do_write_xfer(task, scmd, NULL, 0);
1333 	}
1334 
1335 	laddr = scmd->addr + dbuf->db_relative_offset;
1336 
1337 	/*
1338 	 * If this is going to a zvol, use the direct call to
1339 	 * sbd_zvol_copy_{read,write}. The direct call interface is
1340 	 * restricted to PPs that accept sglists, but that is not required.
1341 	 */
1342 	if (sl->sl_flags & SL_CALL_ZVOL &&
1343 	    (task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF) &&
1344 	    (sbd_zcopy & (4|1))) {
1345 		int commit;
1346 
1347 		commit = (scmd->len == 0 && scmd->nbufs == 0);
1348 		if (sbd_copy_rdwr(task, laddr, dbuf, SBD_CMD_SCSI_WRITE,
1349 		    commit) != STMF_SUCCESS)
1350 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1351 		buflen = dbuf->db_data_size;
1352 	} else {
1353 		for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
1354 		    (ndx < dbuf->db_sglist_length); ndx++) {
1355 			iolen = min(dbuf->db_data_size - buflen,
1356 			    dbuf->db_sglist[ndx].seg_length);
1357 			if (iolen == 0)
1358 				break;
1359 			if (sbd_data_write(sl, task, laddr, (uint64_t)iolen,
1360 			    dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
1361 				scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1362 				break;
1363 			}
1364 			buflen += iolen;
1365 			laddr += (uint64_t)iolen;
1366 		}
1367 	}
1368 	task->task_nbytes_transferred += buflen;
1369 WRITE_XFER_DONE:
1370 	if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1371 		stmf_free_dbuf(task, dbuf);
1372 		if (scmd->nbufs)
1373 			return;	/* wait for all buffers to complete */
1374 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1375 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1376 			stmf_scsilib_send_status(task, STATUS_CHECK,
1377 			    STMF_SAA_WRITE_ERROR);
1378 		} else {
1379 			/*
1380 			 * If SYNC_WRITE flag is on then we need to flush
1381 			 * cache before sending status.
1382 			 * Note: this may be a no-op because of how
1383 			 * SL_WRITEBACK_CACHE_DISABLE and
1384 			 * SL_FLUSH_ON_DISABLED_WRITECACHE are set, but not
1385 			 * worth code complexity of checking those in this code
1386 			 * path, SBD_SCSI_CMD_SYNC_WRITE is rarely set.
1387 			 */
1388 			if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
1389 			    (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
1390 				stmf_scsilib_send_status(task, STATUS_CHECK,
1391 				    STMF_SAA_WRITE_ERROR);
1392 			} else {
1393 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1394 			}
1395 		}
1396 		return;
1397 	}
1398 	sbd_do_write_xfer(task, scmd, dbuf, dbuf_reusable);
1399 }
1400 
1401 /*
1402  * Return true if copy avoidance is beneficial.
1403  */
1404 static int
1405 sbd_zcopy_write_useful(scsi_task_t *task, uint64_t laddr, uint32_t len,
1406     uint64_t blksize)
1407 {
1408 	/*
1409 	 * If there is a global copy threshold over-ride, use it.
1410 	 * Otherwise use the PP value with the caveat that at least
1411 	 * 1/2 the data must avoid being copied to be useful.
1412 	 */
1413 	if (sbd_copy_threshold > 0) {
1414 		return (len >= sbd_copy_threshold);
1415 	} else {
1416 		uint64_t no_copy_span;
1417 
1418 		/* sub-blocksize writes always copy */
1419 		if (len < task->task_copy_threshold || len < blksize)
1420 			return (0);
1421 		/*
1422 		 * Calculate amount of data that will avoid the copy path.
1423 		 * The calculation is only valid if len >= blksize.
1424 		 */
1425 		no_copy_span = P2ALIGN(laddr+len, blksize) -
1426 		    P2ROUNDUP(laddr, blksize);
1427 		return (no_copy_span >= len/2);
1428 	}
1429 }
1430 
1431 void
1432 sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
1433 {
1434 	uint64_t lba, laddr;
1435 	uint32_t len;
1436 	uint8_t op = task->task_cdb[0], do_immediate_data = 0;
1437 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1438 	sbd_cmd_t *scmd;
1439 	stmf_data_buf_t *dbuf;
1440 	uint8_t	sync_wr_flag = 0;
1441 
1442 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
1443 		stmf_scsilib_send_status(task, STATUS_CHECK,
1444 		    STMF_SAA_WRITE_PROTECTED);
1445 		return;
1446 	}
1447 	if (op == SCMD_WRITE) {
1448 		lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
1449 		len = (uint32_t)task->task_cdb[4];
1450 
1451 		if (len == 0) {
1452 			len = 256;
1453 		}
1454 	} else if (op == SCMD_WRITE_G1) {
1455 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1456 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1457 	} else if (op == SCMD_WRITE_G5) {
1458 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1459 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
1460 	} else if (op == SCMD_WRITE_G4) {
1461 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
1462 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1463 	} else if (op == SCMD_WRITE_VERIFY) {
1464 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1465 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1466 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1467 	} else if (op == SCMD_WRITE_VERIFY_G5) {
1468 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1469 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
1470 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1471 	} else if (op == SCMD_WRITE_VERIFY_G4) {
1472 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
1473 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1474 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1475 	} else {
1476 		stmf_scsilib_send_status(task, STATUS_CHECK,
1477 		    STMF_SAA_INVALID_OPCODE);
1478 		return;
1479 	}
1480 
1481 	laddr = lba << sl->sl_data_blocksize_shift;
1482 	len <<= sl->sl_data_blocksize_shift;
1483 
1484 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
1485 		stmf_scsilib_send_status(task, STATUS_CHECK,
1486 		    STMF_SAA_LBA_OUT_OF_RANGE);
1487 		return;
1488 	}
1489 
1490 	task->task_cmd_xfer_length = len;
1491 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1492 		task->task_expected_xfer_length = len;
1493 	}
1494 
1495 	len = (len > task->task_expected_xfer_length) ?
1496 	    task->task_expected_xfer_length : len;
1497 
1498 	if (len == 0) {
1499 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1500 		return;
1501 	}
1502 
1503 	if (sbd_zcopy & (4|1) &&		/* Debug switch */
1504 	    initial_dbuf == NULL &&		/* No PP buf passed in */
1505 	    sl->sl_flags & SL_CALL_ZVOL &&	/* zvol backing store */
1506 	    (task->task_additional_flags &
1507 	    TASK_AF_ACCEPT_LU_DBUF) &&		/* PP allows it */
1508 	    sbd_zcopy_write_useful(task, laddr, len, sl->sl_blksize)) {
1509 
1510 		/*
1511 		 * XXX Note that disallowing initial_dbuf will eliminate
1512 		 * iSCSI from participating. For small writes, that is
1513 		 * probably ok. For large writes, it may be best to just
1514 		 * copy the data from the initial dbuf and use zcopy for
1515 		 * the rest.
1516 		 */
1517 		rw_enter(&sl->sl_access_state_lock, RW_READER);
1518 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
1519 			rw_exit(&sl->sl_access_state_lock);
1520 			stmf_scsilib_send_status(task, STATUS_CHECK,
1521 			    STMF_SAA_READ_ERROR);
1522 			return;
1523 		}
1524 		/*
1525 		 * Setup scmd to track the write progress.
1526 		 */
1527 		if (task->task_lu_private) {
1528 			scmd = (sbd_cmd_t *)task->task_lu_private;
1529 		} else {
1530 			scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
1531 			    KM_SLEEP);
1532 			task->task_lu_private = scmd;
1533 		}
1534 		scmd->flags = SBD_SCSI_CMD_ACTIVE | sync_wr_flag;
1535 		scmd->cmd_type = SBD_CMD_SCSI_WRITE;
1536 		scmd->nbufs = 0;
1537 		scmd->addr = laddr;
1538 		scmd->len = len;
1539 		scmd->current_ro = 0;
1540 		sbd_do_sgl_write_xfer(task, scmd, 1);
1541 		return;
1542 	}
1543 
1544 	if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
1545 		if (initial_dbuf->db_data_size > len) {
1546 			if (initial_dbuf->db_data_size >
1547 			    task->task_expected_xfer_length) {
1548 				/* protocol error */
1549 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1550 				    STMF_INVALID_ARG, NULL);
1551 				return;
1552 			}
1553 			initial_dbuf->db_data_size = len;
1554 		}
1555 		do_immediate_data = 1;
1556 	}
1557 	dbuf = initial_dbuf;
1558 
1559 	if (task->task_lu_private) {
1560 		scmd = (sbd_cmd_t *)task->task_lu_private;
1561 	} else {
1562 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1563 		task->task_lu_private = scmd;
1564 	}
1565 	scmd->flags = SBD_SCSI_CMD_ACTIVE | sync_wr_flag;
1566 	scmd->cmd_type = SBD_CMD_SCSI_WRITE;
1567 	scmd->nbufs = 0;
1568 	scmd->addr = laddr;
1569 	scmd->len = len;
1570 	scmd->current_ro = 0;
1571 
1572 	if (do_immediate_data) {
1573 		/*
1574 		 * Account for data passed in this write command
1575 		 */
1576 		(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
1577 		scmd->len -= dbuf->db_data_size;
1578 		scmd->current_ro += dbuf->db_data_size;
1579 		dbuf->db_xfer_status = STMF_SUCCESS;
1580 		sbd_handle_write_xfer_completion(task, scmd, dbuf, 0);
1581 	} else {
1582 		sbd_do_write_xfer(task, scmd, dbuf, 0);
1583 	}
1584 }
1585 
1586 /*
1587  * Utility routine to handle small non performance data transfers to the
1588  * initiators. dbuf is an initial data buf (if any), 'p' points to a data
1589  * buffer which is source of data for transfer, cdb_xfer_size is the
1590  * transfer size based on CDB, cmd_xfer_size is the actual amount of data
1591  * which this command would transfer (the size of data pointed to by 'p').
1592  */
1593 void
1594 sbd_handle_short_read_transfers(scsi_task_t *task, stmf_data_buf_t *dbuf,
1595     uint8_t *p, uint32_t cdb_xfer_size, uint32_t cmd_xfer_size)
1596 {
1597 	uint32_t bufsize, ndx;
1598 	sbd_cmd_t *scmd;
1599 
1600 	cmd_xfer_size = min(cmd_xfer_size, cdb_xfer_size);
1601 
1602 	task->task_cmd_xfer_length = cmd_xfer_size;
1603 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1604 		task->task_expected_xfer_length = cmd_xfer_size;
1605 	} else {
1606 		cmd_xfer_size = min(cmd_xfer_size,
1607 		    task->task_expected_xfer_length);
1608 	}
1609 
1610 	if (cmd_xfer_size == 0) {
1611 		stmf_scsilib_send_status(task, STATUS_CHECK,
1612 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1613 		return;
1614 	}
1615 	if (dbuf == NULL) {
1616 		uint32_t minsize = cmd_xfer_size;
1617 
1618 		dbuf = stmf_alloc_dbuf(task, cmd_xfer_size, &minsize, 0);
1619 	}
1620 	if (dbuf == NULL) {
1621 		stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1622 		return;
1623 	}
1624 
1625 	for (bufsize = 0, ndx = 0; bufsize < cmd_xfer_size; ndx++) {
1626 		uint8_t *d;
1627 		uint32_t s;
1628 
1629 		d = dbuf->db_sglist[ndx].seg_addr;
1630 		s = min((cmd_xfer_size - bufsize),
1631 		    dbuf->db_sglist[ndx].seg_length);
1632 		bcopy(p+bufsize, d, s);
1633 		bufsize += s;
1634 	}
1635 	dbuf->db_relative_offset = 0;
1636 	dbuf->db_data_size = cmd_xfer_size;
1637 	dbuf->db_flags = DB_DIRECTION_TO_RPORT;
1638 
1639 	if (task->task_lu_private == NULL) {
1640 		task->task_lu_private =
1641 		    kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1642 	}
1643 	scmd = (sbd_cmd_t *)task->task_lu_private;
1644 
1645 	scmd->cmd_type = SBD_CMD_SMALL_READ;
1646 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1647 	(void) stmf_xfer_data(task, dbuf, 0);
1648 }
1649 
1650 void
1651 sbd_handle_short_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
1652 				struct stmf_data_buf *dbuf)
1653 {
1654 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
1655 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1656 		    dbuf->db_xfer_status, NULL);
1657 		return;
1658 	}
1659 	task->task_nbytes_transferred = dbuf->db_data_size;
1660 	scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1661 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1662 }
1663 
1664 void
1665 sbd_handle_short_write_transfers(scsi_task_t *task,
1666     stmf_data_buf_t *dbuf, uint32_t cdb_xfer_size)
1667 {
1668 	sbd_cmd_t *scmd;
1669 
1670 	task->task_cmd_xfer_length = cdb_xfer_size;
1671 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1672 		task->task_expected_xfer_length = cdb_xfer_size;
1673 	} else {
1674 		cdb_xfer_size = min(cdb_xfer_size,
1675 		    task->task_expected_xfer_length);
1676 	}
1677 
1678 	if (cdb_xfer_size == 0) {
1679 		stmf_scsilib_send_status(task, STATUS_CHECK,
1680 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1681 		return;
1682 	}
1683 	if (task->task_lu_private == NULL) {
1684 		task->task_lu_private = kmem_zalloc(sizeof (sbd_cmd_t),
1685 		    KM_SLEEP);
1686 	} else {
1687 		bzero(task->task_lu_private, sizeof (sbd_cmd_t));
1688 	}
1689 	scmd = (sbd_cmd_t *)task->task_lu_private;
1690 	scmd->cmd_type = SBD_CMD_SMALL_WRITE;
1691 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1692 	scmd->len = cdb_xfer_size;
1693 	if (dbuf == NULL) {
1694 		uint32_t minsize = cdb_xfer_size;
1695 
1696 		dbuf = stmf_alloc_dbuf(task, cdb_xfer_size, &minsize, 0);
1697 		if (dbuf == NULL) {
1698 			stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1699 			    STMF_ALLOC_FAILURE, NULL);
1700 			return;
1701 		}
1702 		dbuf->db_data_size = cdb_xfer_size;
1703 		dbuf->db_relative_offset = 0;
1704 		dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
1705 		(void) stmf_xfer_data(task, dbuf, 0);
1706 	} else {
1707 		if (dbuf->db_data_size < cdb_xfer_size) {
1708 			stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1709 			    STMF_ABORTED, NULL);
1710 			return;
1711 		}
1712 		dbuf->db_data_size = cdb_xfer_size;
1713 		sbd_handle_short_write_xfer_completion(task, dbuf);
1714 	}
1715 }
1716 
1717 void
1718 sbd_handle_short_write_xfer_completion(scsi_task_t *task,
1719     stmf_data_buf_t *dbuf)
1720 {
1721 	sbd_cmd_t *scmd;
1722 	stmf_status_t st_ret;
1723 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1724 
1725 	/*
1726 	 * For now lets assume we will get only one sglist element
1727 	 * for short writes. If that ever changes, we should allocate
1728 	 * a local buffer and copy all the sg elements to one linear space.
1729 	 */
1730 	if ((dbuf->db_xfer_status != STMF_SUCCESS) ||
1731 	    (dbuf->db_sglist_length > 1)) {
1732 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1733 		    dbuf->db_xfer_status, NULL);
1734 		return;
1735 	}
1736 
1737 	task->task_nbytes_transferred = dbuf->db_data_size;
1738 	scmd = (sbd_cmd_t *)task->task_lu_private;
1739 	scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1740 
1741 	/* Lets find out who to call */
1742 	switch (task->task_cdb[0]) {
1743 	case SCMD_MODE_SELECT:
1744 	case SCMD_MODE_SELECT_G1:
1745 		if (sl->sl_access_state == SBD_LU_STANDBY) {
1746 			st_ret = stmf_proxy_scsi_cmd(task, dbuf);
1747 			if (st_ret != STMF_SUCCESS) {
1748 				stmf_scsilib_send_status(task, STATUS_CHECK,
1749 				    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
1750 			}
1751 		} else {
1752 			sbd_handle_mode_select_xfer(task,
1753 			    dbuf->db_sglist[0].seg_addr, dbuf->db_data_size);
1754 		}
1755 		break;
1756 	case SCMD_PERSISTENT_RESERVE_OUT:
1757 		if (sl->sl_access_state == SBD_LU_STANDBY) {
1758 			st_ret = stmf_proxy_scsi_cmd(task, dbuf);
1759 			if (st_ret != STMF_SUCCESS) {
1760 				stmf_scsilib_send_status(task, STATUS_CHECK,
1761 				    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
1762 			}
1763 		} else {
1764 			sbd_handle_pgr_out_data(task, dbuf);
1765 		}
1766 		break;
1767 	default:
1768 		/* This should never happen */
1769 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1770 		    STMF_ABORTED, NULL);
1771 	}
1772 }
1773 
1774 void
1775 sbd_handle_read_capacity(struct scsi_task *task,
1776     struct stmf_data_buf *initial_dbuf)
1777 {
1778 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1779 	uint32_t cdb_len;
1780 	uint8_t p[32];
1781 	uint64_t s;
1782 	uint16_t blksize;
1783 
1784 	s = sl->sl_lu_size >> sl->sl_data_blocksize_shift;
1785 	s--;
1786 	blksize = ((uint16_t)1) << sl->sl_data_blocksize_shift;
1787 
1788 	switch (task->task_cdb[0]) {
1789 	case SCMD_READ_CAPACITY:
1790 		if (s & 0xffffffff00000000ull) {
1791 			p[0] = p[1] = p[2] = p[3] = 0xFF;
1792 		} else {
1793 			p[0] = (s >> 24) & 0xff;
1794 			p[1] = (s >> 16) & 0xff;
1795 			p[2] = (s >> 8) & 0xff;
1796 			p[3] = s & 0xff;
1797 		}
1798 		p[4] = 0; p[5] = 0;
1799 		p[6] = (blksize >> 8) & 0xff;
1800 		p[7] = blksize & 0xff;
1801 		sbd_handle_short_read_transfers(task, initial_dbuf, p, 8, 8);
1802 		break;
1803 
1804 	case SCMD_SVC_ACTION_IN_G4:
1805 		cdb_len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1806 		bzero(p, 32);
1807 		p[0] = (s >> 56) & 0xff;
1808 		p[1] = (s >> 48) & 0xff;
1809 		p[2] = (s >> 40) & 0xff;
1810 		p[3] = (s >> 32) & 0xff;
1811 		p[4] = (s >> 24) & 0xff;
1812 		p[5] = (s >> 16) & 0xff;
1813 		p[6] = (s >> 8) & 0xff;
1814 		p[7] = s & 0xff;
1815 		p[10] = (blksize >> 8) & 0xff;
1816 		p[11] = blksize & 0xff;
1817 		sbd_handle_short_read_transfers(task, initial_dbuf, p,
1818 		    cdb_len, 32);
1819 		break;
1820 	}
1821 }
1822 
1823 void
1824 sbd_calc_geometry(uint64_t s, uint16_t blksize, uint8_t *nsectors,
1825     uint8_t *nheads, uint32_t *ncyl)
1826 {
1827 	if (s < (4ull * 1024ull * 1024ull * 1024ull)) {
1828 		*nsectors = 32;
1829 		*nheads = 8;
1830 	} else {
1831 		*nsectors = 254;
1832 		*nheads = 254;
1833 	}
1834 	*ncyl = s / ((uint64_t)blksize * (uint64_t)(*nsectors) *
1835 	    (uint64_t)(*nheads));
1836 }
1837 
1838 void
1839 sbd_handle_mode_sense(struct scsi_task *task,
1840     struct stmf_data_buf *initial_dbuf, uint8_t *buf)
1841 {
1842 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1843 	uint32_t cmd_size, n;
1844 	uint8_t *cdb;
1845 	uint32_t ncyl;
1846 	uint8_t nsectors, nheads;
1847 	uint8_t page, ctrl, header_size, pc_valid;
1848 	uint16_t nbytes;
1849 	uint8_t *p;
1850 	uint64_t s = sl->sl_lu_size;
1851 	uint32_t dev_spec_param_offset;
1852 
1853 	p = buf;	/* buf is assumed to be zeroed out and large enough */
1854 	n = 0;
1855 	cdb = &task->task_cdb[0];
1856 	page = cdb[2] & 0x3F;
1857 	ctrl = (cdb[2] >> 6) & 3;
1858 	cmd_size = (cdb[0] == SCMD_MODE_SENSE) ? cdb[4] :
1859 	    READ_SCSI16(&cdb[7], uint32_t);
1860 
1861 	if (cdb[0] == SCMD_MODE_SENSE) {
1862 		header_size = 4;
1863 		dev_spec_param_offset = 2;
1864 	} else {
1865 		header_size = 8;
1866 		dev_spec_param_offset = 3;
1867 	}
1868 
1869 	/* Now validate the command */
1870 	if ((cdb[2] == 0) || (page == MODEPAGE_ALLPAGES) || (page == 0x08) ||
1871 	    (page == 0x0A) || (page == 0x03) || (page == 0x04)) {
1872 		pc_valid = 1;
1873 	} else {
1874 		pc_valid = 0;
1875 	}
1876 	if ((cmd_size < header_size) || (pc_valid == 0)) {
1877 		stmf_scsilib_send_status(task, STATUS_CHECK,
1878 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1879 		return;
1880 	}
1881 
1882 	/* We will update the length in the mode header at the end */
1883 
1884 	/* Block dev device specific param in mode param header has wp bit */
1885 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
1886 		p[n + dev_spec_param_offset] = BIT_7;
1887 	}
1888 	n += header_size;
1889 	/* We are not going to return any block descriptor */
1890 
1891 	nbytes = ((uint16_t)1) << sl->sl_data_blocksize_shift;
1892 	sbd_calc_geometry(s, nbytes, &nsectors, &nheads, &ncyl);
1893 
1894 	if ((page == 0x03) || (page == MODEPAGE_ALLPAGES)) {
1895 		p[n] = 0x03;
1896 		p[n+1] = 0x16;
1897 		if (ctrl != 1) {
1898 			p[n + 11] = nsectors;
1899 			p[n + 12] = nbytes >> 8;
1900 			p[n + 13] = nbytes & 0xff;
1901 			p[n + 20] = 0x80;
1902 		}
1903 		n += 24;
1904 	}
1905 	if ((page == 0x04) || (page == MODEPAGE_ALLPAGES)) {
1906 		p[n] = 0x04;
1907 		p[n + 1] = 0x16;
1908 		if (ctrl != 1) {
1909 			p[n + 2] = ncyl >> 16;
1910 			p[n + 3] = ncyl >> 8;
1911 			p[n + 4] = ncyl & 0xff;
1912 			p[n + 5] = nheads;
1913 			p[n + 20] = 0x15;
1914 			p[n + 21] = 0x18;
1915 		}
1916 		n += 24;
1917 	}
1918 	if ((page == MODEPAGE_CACHING) || (page == MODEPAGE_ALLPAGES)) {
1919 		struct mode_caching *mode_caching_page;
1920 
1921 		mode_caching_page = (struct mode_caching *)&p[n];
1922 
1923 		mode_caching_page->mode_page.code = MODEPAGE_CACHING;
1924 		mode_caching_page->mode_page.ps = 1; /* A saveable page */
1925 		mode_caching_page->mode_page.length = 0x12;
1926 
1927 		switch (ctrl) {
1928 		case (0):
1929 			/* Current */
1930 			if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) == 0) {
1931 				mode_caching_page->wce = 1;
1932 			}
1933 			break;
1934 
1935 		case (1):
1936 			/* Changeable */
1937 			if ((sl->sl_flags &
1938 			    SL_WRITEBACK_CACHE_SET_UNSUPPORTED) == 0) {
1939 				mode_caching_page->wce = 1;
1940 			}
1941 			break;
1942 
1943 		default:
1944 			if ((sl->sl_flags &
1945 			    SL_SAVED_WRITE_CACHE_DISABLE) == 0) {
1946 				mode_caching_page->wce = 1;
1947 			}
1948 			break;
1949 		}
1950 		n += (sizeof (struct mode_page) +
1951 		    mode_caching_page->mode_page.length);
1952 	}
1953 	if ((page == MODEPAGE_CTRL_MODE) || (page == MODEPAGE_ALLPAGES)) {
1954 		struct mode_control_scsi3 *mode_control_page;
1955 
1956 		mode_control_page = (struct mode_control_scsi3 *)&p[n];
1957 
1958 		mode_control_page->mode_page.code = MODEPAGE_CTRL_MODE;
1959 		mode_control_page->mode_page.length =
1960 		    PAGELENGTH_MODE_CONTROL_SCSI3;
1961 		if (ctrl != 1) {
1962 			/* If not looking for changeable values, report this. */
1963 			mode_control_page->que_mod = CTRL_QMOD_UNRESTRICT;
1964 		}
1965 		n += (sizeof (struct mode_page) +
1966 		    mode_control_page->mode_page.length);
1967 	}
1968 
1969 	if (cdb[0] == SCMD_MODE_SENSE) {
1970 		if (n > 255) {
1971 			stmf_scsilib_send_status(task, STATUS_CHECK,
1972 			    STMF_SAA_INVALID_FIELD_IN_CDB);
1973 			return;
1974 		}
1975 		/*
1976 		 * Mode parameter header length doesn't include the number
1977 		 * of bytes in the length field, so adjust the count.
1978 		 * Byte count minus header length field size.
1979 		 */
1980 		buf[0] = (n - 1) & 0xff;
1981 	} else {
1982 		/* Byte count minus header length field size. */
1983 		buf[1] = (n - 2) & 0xff;
1984 		buf[0] = ((n - 2) >> 8) & 0xff;
1985 	}
1986 
1987 	sbd_handle_short_read_transfers(task, initial_dbuf, buf,
1988 	    cmd_size, n);
1989 }
1990 
1991 void
1992 sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf)
1993 {
1994 	uint32_t cmd_xfer_len;
1995 
1996 	if (task->task_cdb[0] == SCMD_MODE_SELECT) {
1997 		cmd_xfer_len = (uint32_t)task->task_cdb[4];
1998 	} else {
1999 		cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t);
2000 	}
2001 
2002 	if ((task->task_cdb[1] & 0xFE) != 0x10) {
2003 		stmf_scsilib_send_status(task, STATUS_CHECK,
2004 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2005 		return;
2006 	}
2007 
2008 	if (cmd_xfer_len == 0) {
2009 		/* zero byte mode selects are allowed */
2010 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2011 		return;
2012 	}
2013 
2014 	sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
2015 }
2016 
2017 void
2018 sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen)
2019 {
2020 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2021 	sbd_it_data_t *it;
2022 	int hdr_len, bd_len;
2023 	sbd_status_t sret;
2024 	int i;
2025 
2026 	if (task->task_cdb[0] == SCMD_MODE_SELECT) {
2027 		hdr_len = 4;
2028 	} else {
2029 		hdr_len = 8;
2030 	}
2031 
2032 	if (buflen < hdr_len)
2033 		goto mode_sel_param_len_err;
2034 
2035 	bd_len = hdr_len == 4 ? buf[3] : READ_SCSI16(&buf[6], int);
2036 
2037 	if (buflen < (hdr_len + bd_len + 2))
2038 		goto mode_sel_param_len_err;
2039 
2040 	buf += hdr_len + bd_len;
2041 	buflen -= hdr_len + bd_len;
2042 
2043 	if ((buf[0] != 8) || (buflen != ((uint32_t)buf[1] + 2))) {
2044 		goto mode_sel_param_len_err;
2045 	}
2046 
2047 	if (buf[2] & 0xFB) {
2048 		goto mode_sel_param_field_err;
2049 	}
2050 
2051 	for (i = 3; i < (buf[1] + 2); i++) {
2052 		if (buf[i]) {
2053 			goto mode_sel_param_field_err;
2054 		}
2055 	}
2056 
2057 	sret = SBD_SUCCESS;
2058 
2059 	/* All good. Lets handle the write cache change, if any */
2060 	if (buf[2] & BIT_2) {
2061 		sret = sbd_wcd_set(0, sl);
2062 	} else {
2063 		sret = sbd_wcd_set(1, sl);
2064 	}
2065 
2066 	if (sret != SBD_SUCCESS) {
2067 		stmf_scsilib_send_status(task, STATUS_CHECK,
2068 		    STMF_SAA_WRITE_ERROR);
2069 		return;
2070 	}
2071 
2072 	/* set on the device passed, now set the flags */
2073 	mutex_enter(&sl->sl_lock);
2074 	if (buf[2] & BIT_2) {
2075 		sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
2076 	} else {
2077 		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
2078 	}
2079 
2080 	for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
2081 		if (it == task->task_lu_itl_handle)
2082 			continue;
2083 		it->sbd_it_ua_conditions |= SBD_UA_MODE_PARAMETERS_CHANGED;
2084 	}
2085 
2086 	if (task->task_cdb[1] & 1) {
2087 		if (buf[2] & BIT_2) {
2088 			sl->sl_flags &= ~SL_SAVED_WRITE_CACHE_DISABLE;
2089 		} else {
2090 			sl->sl_flags |= SL_SAVED_WRITE_CACHE_DISABLE;
2091 		}
2092 		mutex_exit(&sl->sl_lock);
2093 		sret = sbd_write_lu_info(sl);
2094 	} else {
2095 		mutex_exit(&sl->sl_lock);
2096 	}
2097 	if (sret == SBD_SUCCESS) {
2098 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2099 	} else {
2100 		stmf_scsilib_send_status(task, STATUS_CHECK,
2101 		    STMF_SAA_WRITE_ERROR);
2102 	}
2103 	return;
2104 
2105 mode_sel_param_len_err:
2106 	stmf_scsilib_send_status(task, STATUS_CHECK,
2107 	    STMF_SAA_PARAM_LIST_LENGTH_ERROR);
2108 	return;
2109 mode_sel_param_field_err:
2110 	stmf_scsilib_send_status(task, STATUS_CHECK,
2111 	    STMF_SAA_INVALID_FIELD_IN_PARAM_LIST);
2112 }
2113 
2114 /*
2115  * Command support added from SPC-4 r24
2116  * Supports info type 0, 2, 127
2117  */
2118 void
2119 sbd_handle_identifying_info(struct scsi_task *task,
2120     stmf_data_buf_t *initial_dbuf)
2121 {
2122 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2123 	uint8_t *cdb;
2124 	uint32_t cmd_size;
2125 	uint32_t param_len;
2126 	uint32_t xfer_size;
2127 	uint8_t info_type;
2128 	uint8_t *buf, *p;
2129 
2130 	cdb = &task->task_cdb[0];
2131 	cmd_size = READ_SCSI32(&cdb[6], uint32_t);
2132 	info_type = cdb[10]>>1;
2133 
2134 	/* Validate the command */
2135 	if (cmd_size < 4) {
2136 		stmf_scsilib_send_status(task, STATUS_CHECK,
2137 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2138 		return;
2139 	}
2140 
2141 	p = buf = kmem_zalloc(260, KM_SLEEP);
2142 
2143 	switch (info_type) {
2144 		case 0:
2145 			/*
2146 			 * No value is supplied but this info type
2147 			 * is mandatory.
2148 			 */
2149 			xfer_size = 4;
2150 			break;
2151 		case 2:
2152 			mutex_enter(&sl->sl_lock);
2153 			param_len = strlcpy((char *)(p+4), sl->sl_alias, 256);
2154 			mutex_exit(&sl->sl_lock);
2155 			/* text info must be null terminated */
2156 			if (++param_len > 256)
2157 				param_len = 256;
2158 			SCSI_WRITE16(p+2, param_len);
2159 			xfer_size = param_len + 4;
2160 			break;
2161 		case 127:
2162 			/* 0 and 2 descriptor supported */
2163 			SCSI_WRITE16(p+2, 8); /* set param length */
2164 			p += 8;
2165 			*p = 4; /* set type to 2 (7 hi bits) */
2166 			p += 2;
2167 			SCSI_WRITE16(p, 256); /* 256 max length */
2168 			xfer_size = 12;
2169 			break;
2170 		default:
2171 			stmf_scsilib_send_status(task, STATUS_CHECK,
2172 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2173 			kmem_free(buf, 260);
2174 			return;
2175 	}
2176 	sbd_handle_short_read_transfers(task, initial_dbuf, buf,
2177 	    cmd_size, xfer_size);
2178 	kmem_free(buf, 260);
2179 }
2180 
2181 /*
2182  * This function parse through a string, passed to it as a pointer to a string,
2183  * by adjusting the pointer to the first non-space character and returns
2184  * the count/length of the first bunch of non-space characters. Multiple
2185  * Management URLs are stored as a space delimited string in sl_mgmt_url
2186  * field of sbd_lu_t. This function is used to retrieve one url at a time.
2187  *
2188  * i/p : pointer to pointer to a url string
2189  * o/p : Adjust the pointer to the url to the first non white character
2190  *       and returns the length of the URL
2191  */
2192 uint16_t
2193 sbd_parse_mgmt_url(char **url_addr) {
2194 	uint16_t url_length = 0;
2195 	char *url;
2196 	url = *url_addr;
2197 
2198 	while (*url != '\0') {
2199 		if (*url == ' ' || *url == '\t' || *url == '\n') {
2200 			(*url_addr)++;
2201 			url = *url_addr;
2202 		} else {
2203 			break;
2204 		}
2205 	}
2206 
2207 	while (*url != '\0') {
2208 		if (*url == ' ' || *url == '\t' ||
2209 		    *url == '\n' || *url == '\0') {
2210 			break;
2211 		}
2212 		url++;
2213 		url_length++;
2214 	}
2215 	return (url_length);
2216 }
2217 
2218 void
2219 sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
2220 {
2221 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2222 	uint8_t *cdbp = (uint8_t *)&task->task_cdb[0];
2223 	uint8_t *p;
2224 	uint8_t byte0;
2225 	uint8_t page_length;
2226 	uint16_t bsize = 512;
2227 	uint16_t cmd_size;
2228 	uint32_t xfer_size = 4;
2229 	uint32_t mgmt_url_size = 0;
2230 	char *mgmt_url = NULL;
2231 
2232 
2233 	byte0 = DTYPE_DIRECT;
2234 	/*
2235 	 * Basic protocol checks.
2236 	 */
2237 
2238 	if ((((cdbp[1] & 1) == 0) && cdbp[2]) || cdbp[5]) {
2239 		stmf_scsilib_send_status(task, STATUS_CHECK,
2240 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2241 		return;
2242 	}
2243 
2244 	/*
2245 	 * Zero byte allocation length is not an error.  Just
2246 	 * return success.
2247 	 */
2248 
2249 	cmd_size = (((uint16_t)cdbp[3]) << 8) | cdbp[4];
2250 
2251 	if (cmd_size == 0) {
2252 		task->task_cmd_xfer_length = 0;
2253 		if (task->task_additional_flags &
2254 		    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2255 			task->task_expected_xfer_length = 0;
2256 		}
2257 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2258 		return;
2259 	}
2260 
2261 	/*
2262 	 * Standard inquiry
2263 	 */
2264 
2265 	if ((cdbp[1] & 1) == 0) {
2266 		int	i;
2267 		struct scsi_inquiry *inq;
2268 
2269 		p = (uint8_t *)kmem_zalloc(bsize, KM_SLEEP);
2270 		inq = (struct scsi_inquiry *)p;
2271 
2272 		page_length = 69;
2273 		xfer_size = page_length + 5;
2274 
2275 		inq->inq_dtype = DTYPE_DIRECT;
2276 		inq->inq_ansi = 5;	/* SPC-3 */
2277 		inq->inq_hisup = 1;
2278 		inq->inq_rdf = 2;	/* Response data format for SPC-3 */
2279 		inq->inq_len = page_length;
2280 
2281 		inq->inq_tpgs = TPGS_FAILOVER_IMPLICIT;
2282 		inq->inq_cmdque = 1;
2283 
2284 		if (sl->sl_flags & SL_VID_VALID) {
2285 			bcopy(sl->sl_vendor_id, inq->inq_vid, 8);
2286 		} else {
2287 			bcopy(sbd_vendor_id, inq->inq_vid, 8);
2288 		}
2289 
2290 		if (sl->sl_flags & SL_PID_VALID) {
2291 			bcopy(sl->sl_product_id, inq->inq_pid, 16);
2292 		} else {
2293 			bcopy(sbd_product_id, inq->inq_pid, 16);
2294 		}
2295 
2296 		if (sl->sl_flags & SL_REV_VALID) {
2297 			bcopy(sl->sl_revision, inq->inq_revision, 4);
2298 		} else {
2299 			bcopy(sbd_revision, inq->inq_revision, 4);
2300 		}
2301 
2302 		/* Adding Version Descriptors */
2303 		i = 0;
2304 		/* SAM-3 no version */
2305 		inq->inq_vd[i].inq_vd_msb = 0x00;
2306 		inq->inq_vd[i].inq_vd_lsb = 0x60;
2307 		i++;
2308 
2309 		/* transport */
2310 		switch (task->task_lport->lport_id->protocol_id) {
2311 		case PROTOCOL_FIBRE_CHANNEL:
2312 			inq->inq_vd[i].inq_vd_msb = 0x09;
2313 			inq->inq_vd[i].inq_vd_lsb = 0x00;
2314 			i++;
2315 			break;
2316 
2317 		case PROTOCOL_PARALLEL_SCSI:
2318 		case PROTOCOL_SSA:
2319 		case PROTOCOL_IEEE_1394:
2320 			/* Currently no claims of conformance */
2321 			break;
2322 
2323 		case PROTOCOL_SRP:
2324 			inq->inq_vd[i].inq_vd_msb = 0x09;
2325 			inq->inq_vd[i].inq_vd_lsb = 0x40;
2326 			i++;
2327 			break;
2328 
2329 		case PROTOCOL_iSCSI:
2330 			inq->inq_vd[i].inq_vd_msb = 0x09;
2331 			inq->inq_vd[i].inq_vd_lsb = 0x60;
2332 			i++;
2333 			break;
2334 
2335 		case PROTOCOL_SAS:
2336 		case PROTOCOL_ADT:
2337 		case PROTOCOL_ATAPI:
2338 		default:
2339 			/* Currently no claims of conformance */
2340 			break;
2341 		}
2342 
2343 		/* SPC-3 no version */
2344 		inq->inq_vd[i].inq_vd_msb = 0x03;
2345 		inq->inq_vd[i].inq_vd_lsb = 0x00;
2346 		i++;
2347 
2348 		/* SBC-2 no version */
2349 		inq->inq_vd[i].inq_vd_msb = 0x03;
2350 		inq->inq_vd[i].inq_vd_lsb = 0x20;
2351 
2352 		sbd_handle_short_read_transfers(task, initial_dbuf, p, cmd_size,
2353 		    min(cmd_size, xfer_size));
2354 		kmem_free(p, bsize);
2355 
2356 		return;
2357 	}
2358 
2359 	rw_enter(&sbd_global_prop_lock, RW_READER);
2360 	if (sl->sl_mgmt_url) {
2361 		mgmt_url_size = strlen(sl->sl_mgmt_url);
2362 		mgmt_url = sl->sl_mgmt_url;
2363 	} else if (sbd_mgmt_url) {
2364 		mgmt_url_size = strlen(sbd_mgmt_url);
2365 		mgmt_url = sbd_mgmt_url;
2366 	}
2367 
2368 	/*
2369 	 * EVPD handling
2370 	 */
2371 
2372 	/* Default 512 bytes may not be enough, increase bsize if necessary */
2373 	if (cdbp[2] == 0x83 || cdbp[2] == 0x85) {
2374 		if (bsize <  cmd_size)
2375 			bsize = cmd_size;
2376 	}
2377 	p = (uint8_t *)kmem_zalloc(bsize, KM_SLEEP);
2378 
2379 	switch (cdbp[2]) {
2380 	case 0x00:
2381 		page_length = 4 + (mgmt_url_size ? 1 : 0);
2382 
2383 		p[0] = byte0;
2384 		p[3] = page_length;
2385 		/* Supported VPD pages in ascending order */
2386 		{
2387 			uint8_t i = 5;
2388 
2389 			p[i++] = 0x80;
2390 			p[i++] = 0x83;
2391 			if (mgmt_url_size != 0)
2392 				p[i++] = 0x85;
2393 			p[i++] = 0x86;
2394 		}
2395 		xfer_size = page_length + 4;
2396 		break;
2397 
2398 	case 0x80:
2399 		if (sl->sl_serial_no_size) {
2400 			page_length = sl->sl_serial_no_size;
2401 			bcopy(sl->sl_serial_no, p + 4, sl->sl_serial_no_size);
2402 		} else {
2403 			/* if no serial num is specified set 4 spaces */
2404 			page_length = 4;
2405 			bcopy("    ", p + 4, 4);
2406 		}
2407 		p[0] = byte0;
2408 		p[1] = 0x80;
2409 		p[3] = page_length;
2410 		xfer_size = page_length + 4;
2411 		break;
2412 
2413 	case 0x83:
2414 		xfer_size = stmf_scsilib_prepare_vpd_page83(task, p,
2415 		    bsize, byte0, STMF_VPD_LU_ID|STMF_VPD_TARGET_ID|
2416 		    STMF_VPD_TP_GROUP|STMF_VPD_RELATIVE_TP_ID);
2417 		break;
2418 
2419 	case 0x85:
2420 		if (mgmt_url_size == 0) {
2421 			stmf_scsilib_send_status(task, STATUS_CHECK,
2422 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2423 			goto err_done;
2424 		}
2425 		{
2426 			uint16_t idx, newidx, sz, url_size;
2427 			char *url;
2428 
2429 			p[0] = byte0;
2430 			p[1] = 0x85;
2431 
2432 			idx = 4;
2433 			url = mgmt_url;
2434 			url_size = sbd_parse_mgmt_url(&url);
2435 			/* Creating Network Service Descriptors */
2436 			while (url_size != 0) {
2437 				/* Null terminated and 4 Byte aligned */
2438 				sz = url_size + 1;
2439 				sz += (sz % 4) ? 4 - (sz % 4) : 0;
2440 				newidx = idx + sz + 4;
2441 
2442 				if (newidx < bsize) {
2443 					/*
2444 					 * SPC-3r23 : Table 320  (Sec 7.6.5)
2445 					 * (Network service descriptor format
2446 					 *
2447 					 * Note: Hard coding service type as
2448 					 * "Storage Configuration Service".
2449 					 */
2450 					p[idx] = 1;
2451 					SCSI_WRITE16(p + idx + 2, sz);
2452 					bcopy(url, p + idx + 4, url_size);
2453 					xfer_size = newidx + 4;
2454 				}
2455 				idx = newidx;
2456 
2457 				/* skip to next mgmt url if any */
2458 				url += url_size;
2459 				url_size = sbd_parse_mgmt_url(&url);
2460 			}
2461 
2462 			/* Total descriptor length */
2463 			SCSI_WRITE16(p + 2, idx - 4);
2464 			break;
2465 		}
2466 
2467 	case 0x86:
2468 		page_length = 0x3c;
2469 
2470 		p[0] = byte0;
2471 		p[1] = 0x86;		/* Page 86 response */
2472 		p[3] = page_length;
2473 
2474 		/*
2475 		 * Bits 0, 1, and 2 will need to be updated
2476 		 * to reflect the queue tag handling if/when
2477 		 * that is implemented.  For now, we're going
2478 		 * to claim support only for Simple TA.
2479 		 */
2480 		p[5] = 1;
2481 		xfer_size = page_length + 4;
2482 		break;
2483 
2484 	default:
2485 		stmf_scsilib_send_status(task, STATUS_CHECK,
2486 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2487 		goto err_done;
2488 	}
2489 
2490 	sbd_handle_short_read_transfers(task, initial_dbuf, p, cmd_size,
2491 	    min(cmd_size, xfer_size));
2492 err_done:
2493 	kmem_free(p, bsize);
2494 	rw_exit(&sbd_global_prop_lock);
2495 }
2496 
2497 stmf_status_t
2498 sbd_task_alloc(struct scsi_task *task)
2499 {
2500 	if ((task->task_lu_private =
2501 	    kmem_alloc(sizeof (sbd_cmd_t), KM_NOSLEEP)) != NULL) {
2502 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
2503 		scmd->flags = 0;
2504 		return (STMF_SUCCESS);
2505 	}
2506 	return (STMF_ALLOC_FAILURE);
2507 }
2508 
2509 void
2510 sbd_remove_it_handle(sbd_lu_t *sl, sbd_it_data_t *it)
2511 {
2512 	sbd_it_data_t **ppit;
2513 
2514 	sbd_pgr_remove_it_handle(sl, it);
2515 	mutex_enter(&sl->sl_lock);
2516 	for (ppit = &sl->sl_it_list; *ppit != NULL;
2517 	    ppit = &((*ppit)->sbd_it_next)) {
2518 		if ((*ppit) == it) {
2519 			*ppit = it->sbd_it_next;
2520 			break;
2521 		}
2522 	}
2523 	mutex_exit(&sl->sl_lock);
2524 
2525 	DTRACE_PROBE2(itl__nexus__end, stmf_lu_t *, sl->sl_lu,
2526 	    sbd_it_data_t *, it);
2527 
2528 	kmem_free(it, sizeof (*it));
2529 }
2530 
2531 void
2532 sbd_check_and_clear_scsi2_reservation(sbd_lu_t *sl, sbd_it_data_t *it)
2533 {
2534 	mutex_enter(&sl->sl_lock);
2535 	if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) == 0) {
2536 		/* If we dont have any reservations, just get out. */
2537 		mutex_exit(&sl->sl_lock);
2538 		return;
2539 	}
2540 
2541 	if (it == NULL) {
2542 		/* Find the I_T nexus which is holding the reservation. */
2543 		for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
2544 			if (it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) {
2545 				ASSERT(it->sbd_it_session_id ==
2546 				    sl->sl_rs_owner_session_id);
2547 				break;
2548 			}
2549 		}
2550 		ASSERT(it != NULL);
2551 	} else {
2552 		/*
2553 		 * We were passed an I_T nexus. If this nexus does not hold
2554 		 * the reservation, do nothing. This is why this function is
2555 		 * called "check_and_clear".
2556 		 */
2557 		if ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0) {
2558 			mutex_exit(&sl->sl_lock);
2559 			return;
2560 		}
2561 	}
2562 	it->sbd_it_flags &= ~SBD_IT_HAS_SCSI2_RESERVATION;
2563 	sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
2564 	mutex_exit(&sl->sl_lock);
2565 }
2566 
2567 
2568 
2569 void
2570 sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
2571 {
2572 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2573 	sbd_it_data_t *it;
2574 	uint8_t cdb0, cdb1;
2575 	stmf_status_t st_ret;
2576 
2577 	if ((it = task->task_lu_itl_handle) == NULL) {
2578 		mutex_enter(&sl->sl_lock);
2579 		for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
2580 			if (it->sbd_it_session_id ==
2581 			    task->task_session->ss_session_id) {
2582 				mutex_exit(&sl->sl_lock);
2583 				stmf_scsilib_send_status(task, STATUS_BUSY, 0);
2584 				return;
2585 			}
2586 		}
2587 		it = (sbd_it_data_t *)kmem_zalloc(sizeof (*it), KM_NOSLEEP);
2588 		if (it == NULL) {
2589 			mutex_exit(&sl->sl_lock);
2590 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
2591 			return;
2592 		}
2593 		it->sbd_it_session_id = task->task_session->ss_session_id;
2594 		bcopy(task->task_lun_no, it->sbd_it_lun, 8);
2595 		it->sbd_it_next = sl->sl_it_list;
2596 		sl->sl_it_list = it;
2597 		mutex_exit(&sl->sl_lock);
2598 
2599 		DTRACE_PROBE1(itl__nexus__start, scsi_task *, task);
2600 
2601 		sbd_pgr_initialize_it(task, it);
2602 		if (stmf_register_itl_handle(task->task_lu, task->task_lun_no,
2603 		    task->task_session, it->sbd_it_session_id, it)
2604 		    != STMF_SUCCESS) {
2605 			sbd_remove_it_handle(sl, it);
2606 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
2607 			return;
2608 		}
2609 		task->task_lu_itl_handle = it;
2610 		if (sl->sl_access_state != SBD_LU_STANDBY) {
2611 			it->sbd_it_ua_conditions = SBD_UA_POR;
2612 		}
2613 	} else if (it->sbd_it_flags & SBD_IT_PGR_CHECK_FLAG) {
2614 		sbd_pgr_initialize_it(task, it);
2615 		mutex_enter(&sl->sl_lock);
2616 		it->sbd_it_flags &= ~SBD_IT_PGR_CHECK_FLAG;
2617 		mutex_exit(&sl->sl_lock);
2618 	}
2619 
2620 	if (task->task_mgmt_function) {
2621 		stmf_scsilib_handle_task_mgmt(task);
2622 		return;
2623 	}
2624 
2625 	/*
2626 	 * if we're transitioning between access
2627 	 * states, return NOT READY
2628 	 */
2629 	if (sl->sl_access_state == SBD_LU_TRANSITION_TO_STANDBY ||
2630 	    sl->sl_access_state == SBD_LU_TRANSITION_TO_ACTIVE) {
2631 		stmf_scsilib_send_status(task, STATUS_CHECK,
2632 		    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
2633 		return;
2634 	}
2635 
2636 	/* Checking ua conditions as per SAM3R14 5.3.2 specified order */
2637 	if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) {
2638 		uint32_t saa = 0;
2639 
2640 		mutex_enter(&sl->sl_lock);
2641 		if (it->sbd_it_ua_conditions & SBD_UA_POR) {
2642 			it->sbd_it_ua_conditions &= ~SBD_UA_POR;
2643 			saa = STMF_SAA_POR;
2644 		}
2645 		mutex_exit(&sl->sl_lock);
2646 		if (saa) {
2647 			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
2648 			return;
2649 		}
2650 	}
2651 
2652 	/* Reservation conflict checks */
2653 	if (sl->sl_access_state == SBD_LU_ACTIVE) {
2654 		if (SBD_PGR_RSVD(sl->sl_pgr)) {
2655 			if (sbd_pgr_reservation_conflict(task)) {
2656 				stmf_scsilib_send_status(task,
2657 				    STATUS_RESERVATION_CONFLICT, 0);
2658 				return;
2659 			}
2660 		} else if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) &&
2661 		    ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0)) {
2662 			if (!(SCSI2_CONFLICT_FREE_CMDS(task->task_cdb))) {
2663 				stmf_scsilib_send_status(task,
2664 				    STATUS_RESERVATION_CONFLICT, 0);
2665 				return;
2666 			}
2667 		}
2668 	}
2669 
2670 	/* Rest of the ua conndition checks */
2671 	if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) {
2672 		uint32_t saa = 0;
2673 
2674 		mutex_enter(&sl->sl_lock);
2675 		if (it->sbd_it_ua_conditions & SBD_UA_CAPACITY_CHANGED) {
2676 			it->sbd_it_ua_conditions &= ~SBD_UA_CAPACITY_CHANGED;
2677 			if ((task->task_cdb[0] == SCMD_READ_CAPACITY) ||
2678 			    ((task->task_cdb[0] == SCMD_SVC_ACTION_IN_G4) &&
2679 			    (task->task_cdb[1] ==
2680 			    SSVC_ACTION_READ_CAPACITY_G4))) {
2681 				saa = 0;
2682 			} else {
2683 				saa = STMF_SAA_CAPACITY_DATA_HAS_CHANGED;
2684 			}
2685 		} else if (it->sbd_it_ua_conditions &
2686 		    SBD_UA_MODE_PARAMETERS_CHANGED) {
2687 			it->sbd_it_ua_conditions &=
2688 			    ~SBD_UA_MODE_PARAMETERS_CHANGED;
2689 			saa = STMF_SAA_MODE_PARAMETERS_CHANGED;
2690 		} else if (it->sbd_it_ua_conditions &
2691 		    SBD_UA_ASYMMETRIC_ACCESS_CHANGED) {
2692 			it->sbd_it_ua_conditions &=
2693 			    ~SBD_UA_ASYMMETRIC_ACCESS_CHANGED;
2694 			saa = STMF_SAA_ASYMMETRIC_ACCESS_CHANGED;
2695 		} else if (it->sbd_it_ua_conditions &
2696 		    SBD_UA_ACCESS_STATE_TRANSITION) {
2697 			it->sbd_it_ua_conditions &=
2698 			    ~SBD_UA_ACCESS_STATE_TRANSITION;
2699 			saa = STMF_SAA_LU_NO_ACCESS_TRANSITION;
2700 		} else {
2701 			it->sbd_it_ua_conditions = 0;
2702 			saa = 0;
2703 		}
2704 		mutex_exit(&sl->sl_lock);
2705 		if (saa) {
2706 			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
2707 			return;
2708 		}
2709 	}
2710 
2711 	cdb0 = task->task_cdb[0];
2712 	cdb1 = task->task_cdb[1];
2713 
2714 	if (sl->sl_access_state == SBD_LU_STANDBY) {
2715 		if (cdb0 != SCMD_INQUIRY &&
2716 		    cdb0 != SCMD_MODE_SENSE &&
2717 		    cdb0 != SCMD_MODE_SENSE_G1 &&
2718 		    cdb0 != SCMD_MODE_SELECT &&
2719 		    cdb0 != SCMD_MODE_SELECT_G1 &&
2720 		    cdb0 != SCMD_RESERVE &&
2721 		    cdb0 != SCMD_RELEASE &&
2722 		    cdb0 != SCMD_PERSISTENT_RESERVE_OUT &&
2723 		    cdb0 != SCMD_PERSISTENT_RESERVE_IN &&
2724 		    cdb0 != SCMD_REQUEST_SENSE &&
2725 		    cdb0 != SCMD_READ_CAPACITY &&
2726 		    cdb0 != SCMD_TEST_UNIT_READY &&
2727 		    cdb0 != SCMD_START_STOP &&
2728 		    cdb0 != SCMD_READ &&
2729 		    cdb0 != SCMD_READ_G1 &&
2730 		    cdb0 != SCMD_READ_G4 &&
2731 		    cdb0 != SCMD_READ_G5 &&
2732 		    !(cdb0 == SCMD_SVC_ACTION_IN_G4 &&
2733 		    cdb1 == SSVC_ACTION_READ_CAPACITY_G4) &&
2734 		    !(cdb0 == SCMD_MAINTENANCE_IN &&
2735 		    (cdb1 & 0x1F) == 0x05) &&
2736 		    !(cdb0 == SCMD_MAINTENANCE_IN &&
2737 		    (cdb1 & 0x1F) == 0x0A)) {
2738 			stmf_scsilib_send_status(task, STATUS_CHECK,
2739 			    STMF_SAA_LU_NO_ACCESS_STANDBY);
2740 			return;
2741 		}
2742 
2743 		/*
2744 		 * is this a short write?
2745 		 * if so, we'll need to wait until we have the buffer
2746 		 * before proxying the command
2747 		 */
2748 		switch (cdb0) {
2749 			case SCMD_MODE_SELECT:
2750 			case SCMD_MODE_SELECT_G1:
2751 			case SCMD_PERSISTENT_RESERVE_OUT:
2752 				break;
2753 			default:
2754 				st_ret = stmf_proxy_scsi_cmd(task,
2755 				    initial_dbuf);
2756 				if (st_ret != STMF_SUCCESS) {
2757 					stmf_scsilib_send_status(task,
2758 					    STATUS_CHECK,
2759 					    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
2760 				}
2761 				return;
2762 		}
2763 	}
2764 
2765 	cdb0 = task->task_cdb[0] & 0x1F;
2766 
2767 	if ((cdb0 == SCMD_READ) || (cdb0 == SCMD_WRITE)) {
2768 		if (task->task_additional_flags & TASK_AF_PORT_LOAD_HIGH) {
2769 			stmf_scsilib_send_status(task, STATUS_QFULL, 0);
2770 			return;
2771 		}
2772 		if (cdb0 == SCMD_READ) {
2773 			sbd_handle_read(task, initial_dbuf);
2774 			return;
2775 		}
2776 		sbd_handle_write(task, initial_dbuf);
2777 		return;
2778 	}
2779 
2780 	cdb0 = task->task_cdb[0];
2781 	cdb1 = task->task_cdb[1];
2782 
2783 	if (cdb0 == SCMD_INQUIRY) {		/* Inquiry */
2784 		sbd_handle_inquiry(task, initial_dbuf);
2785 		return;
2786 	}
2787 
2788 	if (cdb0  == SCMD_PERSISTENT_RESERVE_OUT) {
2789 		sbd_handle_pgr_out_cmd(task, initial_dbuf);
2790 		return;
2791 	}
2792 
2793 	if (cdb0  == SCMD_PERSISTENT_RESERVE_IN) {
2794 		sbd_handle_pgr_in_cmd(task, initial_dbuf);
2795 		return;
2796 	}
2797 
2798 	if (cdb0 == SCMD_RELEASE) {
2799 		if (cdb1) {
2800 			stmf_scsilib_send_status(task, STATUS_CHECK,
2801 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2802 			return;
2803 		}
2804 
2805 		mutex_enter(&sl->sl_lock);
2806 		if (sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) {
2807 			/* If not owner don't release it, just return good */
2808 			if (it->sbd_it_session_id !=
2809 			    sl->sl_rs_owner_session_id) {
2810 				mutex_exit(&sl->sl_lock);
2811 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2812 				return;
2813 			}
2814 		}
2815 		sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
2816 		it->sbd_it_flags &= ~SBD_IT_HAS_SCSI2_RESERVATION;
2817 		mutex_exit(&sl->sl_lock);
2818 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2819 		return;
2820 	}
2821 
2822 	if (cdb0 == SCMD_RESERVE) {
2823 		if (cdb1) {
2824 			stmf_scsilib_send_status(task, STATUS_CHECK,
2825 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2826 			return;
2827 		}
2828 
2829 		mutex_enter(&sl->sl_lock);
2830 		if (sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) {
2831 			/* If not owner, return conflict status */
2832 			if (it->sbd_it_session_id !=
2833 			    sl->sl_rs_owner_session_id) {
2834 				mutex_exit(&sl->sl_lock);
2835 				stmf_scsilib_send_status(task,
2836 				    STATUS_RESERVATION_CONFLICT, 0);
2837 				return;
2838 			}
2839 		}
2840 		sl->sl_flags |= SL_LU_HAS_SCSI2_RESERVATION;
2841 		it->sbd_it_flags |= SBD_IT_HAS_SCSI2_RESERVATION;
2842 		sl->sl_rs_owner_session_id = it->sbd_it_session_id;
2843 		mutex_exit(&sl->sl_lock);
2844 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2845 		return;
2846 	}
2847 
2848 	if (cdb0 == SCMD_REQUEST_SENSE) {
2849 		/*
2850 		 * LU provider needs to store unretrieved sense data
2851 		 * (e.g. after power-on/reset).  For now, we'll just
2852 		 * return good status with no sense.
2853 		 */
2854 
2855 		if ((cdb1 & ~1) || task->task_cdb[2] || task->task_cdb[3] ||
2856 		    task->task_cdb[5]) {
2857 			stmf_scsilib_send_status(task, STATUS_CHECK,
2858 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2859 		} else {
2860 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2861 		}
2862 
2863 		return;
2864 	}
2865 
2866 	/* Report Target Port Groups */
2867 	if ((cdb0 == SCMD_MAINTENANCE_IN) &&
2868 	    ((cdb1 & 0x1F) == 0x0A)) {
2869 		stmf_scsilib_handle_report_tpgs(task, initial_dbuf);
2870 		return;
2871 	}
2872 
2873 	/* Report Identifying Information */
2874 	if ((cdb0 == SCMD_MAINTENANCE_IN) &&
2875 	    ((cdb1 & 0x1F) == 0x05)) {
2876 		sbd_handle_identifying_info(task, initial_dbuf);
2877 		return;
2878 	}
2879 
2880 	if (cdb0 == SCMD_START_STOP) {			/* Start stop */
2881 		task->task_cmd_xfer_length = 0;
2882 		if (task->task_cdb[4] & 0xFC) {
2883 			stmf_scsilib_send_status(task, STATUS_CHECK,
2884 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2885 			return;
2886 		}
2887 		if (task->task_cdb[4] & 2) {
2888 			stmf_scsilib_send_status(task, STATUS_CHECK,
2889 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2890 		} else {
2891 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2892 		}
2893 		return;
2894 
2895 	}
2896 
2897 	if ((cdb0 == SCMD_MODE_SENSE) || (cdb0 == SCMD_MODE_SENSE_G1)) {
2898 		uint8_t *p;
2899 		p = kmem_zalloc(512, KM_SLEEP);
2900 		sbd_handle_mode_sense(task, initial_dbuf, p);
2901 		kmem_free(p, 512);
2902 		return;
2903 	}
2904 
2905 	if ((cdb0 == SCMD_MODE_SELECT) || (cdb0 == SCMD_MODE_SELECT_G1)) {
2906 		sbd_handle_mode_select(task, initial_dbuf);
2907 		return;
2908 	}
2909 
2910 	if (cdb0 == SCMD_TEST_UNIT_READY) {	/* Test unit ready */
2911 		task->task_cmd_xfer_length = 0;
2912 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2913 		return;
2914 	}
2915 
2916 	if (cdb0 == SCMD_READ_CAPACITY) {		/* Read Capacity */
2917 		sbd_handle_read_capacity(task, initial_dbuf);
2918 		return;
2919 	}
2920 
2921 	if (cdb0 == SCMD_SVC_ACTION_IN_G4) { 	/* Read Capacity or read long */
2922 		if (cdb1 == SSVC_ACTION_READ_CAPACITY_G4) {
2923 			sbd_handle_read_capacity(task, initial_dbuf);
2924 			return;
2925 		/*
2926 		 * } else if (cdb1 == SSVC_ACTION_READ_LONG_G4) {
2927 		 * 	sbd_handle_read(task, initial_dbuf);
2928 		 * 	return;
2929 		 */
2930 		}
2931 	}
2932 
2933 	/*
2934 	 * if (cdb0 == SCMD_SVC_ACTION_OUT_G4) {
2935 	 *	if (cdb1 == SSVC_ACTION_WRITE_LONG_G4) {
2936 	 *		 sbd_handle_write(task, initial_dbuf);
2937 	 * 		return;
2938 	 *	}
2939 	 * }
2940 	 */
2941 
2942 	if (cdb0 == SCMD_VERIFY) {
2943 		/*
2944 		 * Something more likely needs to be done here.
2945 		 */
2946 		task->task_cmd_xfer_length = 0;
2947 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2948 		return;
2949 	}
2950 
2951 	if (cdb0 == SCMD_SYNCHRONIZE_CACHE ||
2952 	    cdb0 == SCMD_SYNCHRONIZE_CACHE_G4) {
2953 		sbd_handle_sync_cache(task, initial_dbuf);
2954 		return;
2955 	}
2956 
2957 	/*
2958 	 * Write and Verify use the same path as write, but don't clutter the
2959 	 * performance path above with checking for write_verify opcodes.  We
2960 	 * rely on zfs's integrity checks for the "Verify" part of Write &
2961 	 * Verify.  (Even if we did a read to "verify" we'd merely be reading
2962 	 * cache, not actual media.)
2963 	 * Therefore we
2964 	 *   a) only support this if sbd_is_zvol, and
2965 	 *   b) run the IO through the normal write path with a forced
2966 	 *	sbd_flush_data_cache at the end.
2967 	 */
2968 
2969 	if ((sl->sl_flags & SL_ZFS_META) && (
2970 	    cdb0 == SCMD_WRITE_VERIFY ||
2971 	    cdb0 == SCMD_WRITE_VERIFY_G4 ||
2972 	    cdb0 == SCMD_WRITE_VERIFY_G5)) {
2973 		sbd_handle_write(task, initial_dbuf);
2974 		return;
2975 	}
2976 
2977 	stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_INVALID_OPCODE);
2978 }
2979 
2980 void
2981 sbd_dbuf_xfer_done(struct scsi_task *task, struct stmf_data_buf *dbuf)
2982 {
2983 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
2984 
2985 	if (dbuf->db_flags & DB_LU_DATA_BUF) {
2986 		/*
2987 		 * Buffers passed in from the LU always complete
2988 		 * even if the task is no longer active.
2989 		 */
2990 		ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
2991 		ASSERT(scmd);
2992 		switch (scmd->cmd_type) {
2993 		case (SBD_CMD_SCSI_READ):
2994 			sbd_handle_sgl_read_xfer_completion(task, scmd, dbuf);
2995 			break;
2996 		case (SBD_CMD_SCSI_WRITE):
2997 			sbd_handle_sgl_write_xfer_completion(task, scmd, dbuf);
2998 			break;
2999 		default:
3000 			cmn_err(CE_PANIC, "Unknown cmd type, task = %p",
3001 			    (void *)task);
3002 			break;
3003 		}
3004 		return;
3005 	}
3006 
3007 	if ((scmd == NULL) || ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0))
3008 		return;
3009 
3010 	switch (scmd->cmd_type) {
3011 	case (SBD_CMD_SCSI_READ):
3012 		sbd_handle_read_xfer_completion(task, scmd, dbuf);
3013 		break;
3014 
3015 	case (SBD_CMD_SCSI_WRITE):
3016 		sbd_handle_write_xfer_completion(task, scmd, dbuf, 1);
3017 		break;
3018 
3019 	case (SBD_CMD_SMALL_READ):
3020 		sbd_handle_short_read_xfer_completion(task, scmd, dbuf);
3021 		break;
3022 
3023 	case (SBD_CMD_SMALL_WRITE):
3024 		sbd_handle_short_write_xfer_completion(task, dbuf);
3025 		break;
3026 
3027 	default:
3028 		cmn_err(CE_PANIC, "Unknown cmd type, task = %p", (void *)task);
3029 		break;
3030 	}
3031 }
3032 
3033 /* ARGSUSED */
3034 void
3035 sbd_send_status_done(struct scsi_task *task)
3036 {
3037 	cmn_err(CE_PANIC,
3038 	    "sbd_send_status_done: this should not have been called");
3039 }
3040 
3041 void
3042 sbd_task_free(struct scsi_task *task)
3043 {
3044 	if (task->task_lu_private) {
3045 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3046 		if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
3047 			cmn_err(CE_PANIC, "cmd is active, task = %p",
3048 			    (void *)task);
3049 		}
3050 		kmem_free(scmd, sizeof (sbd_cmd_t));
3051 	}
3052 }
3053 
3054 /*
3055  * Aborts are synchronus w.r.t. I/O AND
3056  * All the I/O which SBD does is synchronous AND
3057  * Everything within a task is single threaded.
3058  *   IT MEANS
3059  * If this function is called, we are doing nothing with this task
3060  * inside of sbd module.
3061  */
3062 /* ARGSUSED */
3063 stmf_status_t
3064 sbd_abort(struct stmf_lu *lu, int abort_cmd, void *arg, uint32_t flags)
3065 {
3066 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3067 	scsi_task_t *task;
3068 
3069 	if (abort_cmd == STMF_LU_RESET_STATE) {
3070 		return (sbd_lu_reset_state(lu));
3071 	}
3072 
3073 	if (abort_cmd == STMF_LU_ITL_HANDLE_REMOVED) {
3074 		sbd_check_and_clear_scsi2_reservation(sl, (sbd_it_data_t *)arg);
3075 		sbd_remove_it_handle(sl, (sbd_it_data_t *)arg);
3076 		return (STMF_SUCCESS);
3077 	}
3078 
3079 	ASSERT(abort_cmd == STMF_LU_ABORT_TASK);
3080 	task = (scsi_task_t *)arg;
3081 	if (task->task_lu_private) {
3082 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3083 
3084 		if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
3085 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
3086 			return (STMF_ABORT_SUCCESS);
3087 		}
3088 	}
3089 
3090 	return (STMF_NOT_FOUND);
3091 }
3092 
3093 /*
3094  * This function is called during task clean-up if the
3095  * DB_LU_FLAG is set on the dbuf. This should only be called for
3096  * abort processing after sbd_abort has been called for the task.
3097  */
3098 void
3099 sbd_dbuf_free(struct scsi_task *task, struct stmf_data_buf *dbuf)
3100 {
3101 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3102 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3103 
3104 	ASSERT(dbuf->db_lu_private);
3105 	ASSERT(scmd && scmd->nbufs > 0);
3106 	ASSERT((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0);
3107 	ASSERT(dbuf->db_flags & DB_LU_DATA_BUF);
3108 	ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
3109 	ASSERT((curthread->t_flag & T_INTR_THREAD) == 0);
3110 
3111 	if (scmd->cmd_type == SBD_CMD_SCSI_READ) {
3112 		sbd_zvol_rele_read_bufs(sl, dbuf);
3113 	} else if (scmd->cmd_type == SBD_CMD_SCSI_WRITE) {
3114 		sbd_zvol_rele_write_bufs_abort(sl, dbuf);
3115 	} else {
3116 		cmn_err(CE_PANIC, "Unknown cmd type %d, task = %p",
3117 		    scmd->cmd_type, (void *)task);
3118 	}
3119 	if (--scmd->nbufs == 0)
3120 		rw_exit(&sl->sl_access_state_lock);
3121 	stmf_teardown_dbuf(task, dbuf);
3122 	stmf_free(dbuf);
3123 }
3124 
3125 /* ARGSUSED */
3126 void
3127 sbd_ctl(struct stmf_lu *lu, int cmd, void *arg)
3128 {
3129 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3130 	stmf_change_status_t st;
3131 
3132 	ASSERT((cmd == STMF_CMD_LU_ONLINE) ||
3133 	    (cmd == STMF_CMD_LU_OFFLINE) ||
3134 	    (cmd == STMF_ACK_LU_ONLINE_COMPLETE) ||
3135 	    (cmd == STMF_ACK_LU_OFFLINE_COMPLETE));
3136 
3137 	st.st_completion_status = STMF_SUCCESS;
3138 	st.st_additional_info = NULL;
3139 
3140 	switch (cmd) {
3141 	case STMF_CMD_LU_ONLINE:
3142 		if (sl->sl_state == STMF_STATE_ONLINE)
3143 			st.st_completion_status = STMF_ALREADY;
3144 		else if (sl->sl_state != STMF_STATE_OFFLINE)
3145 			st.st_completion_status = STMF_FAILURE;
3146 		if (st.st_completion_status == STMF_SUCCESS) {
3147 			sl->sl_state = STMF_STATE_ONLINE;
3148 			sl->sl_state_not_acked = 1;
3149 		}
3150 		(void) stmf_ctl(STMF_CMD_LU_ONLINE_COMPLETE, lu, &st);
3151 		break;
3152 
3153 	case STMF_CMD_LU_OFFLINE:
3154 		if (sl->sl_state == STMF_STATE_OFFLINE)
3155 			st.st_completion_status = STMF_ALREADY;
3156 		else if (sl->sl_state != STMF_STATE_ONLINE)
3157 			st.st_completion_status = STMF_FAILURE;
3158 		if (st.st_completion_status == STMF_SUCCESS) {
3159 			sl->sl_flags &= ~(SL_MEDIUM_REMOVAL_PREVENTED |
3160 			    SL_LU_HAS_SCSI2_RESERVATION);
3161 			sl->sl_state = STMF_STATE_OFFLINE;
3162 			sl->sl_state_not_acked = 1;
3163 			sbd_pgr_reset(sl);
3164 		}
3165 		(void) stmf_ctl(STMF_CMD_LU_OFFLINE_COMPLETE, lu, &st);
3166 		break;
3167 
3168 	case STMF_ACK_LU_ONLINE_COMPLETE:
3169 		/* Fallthrough */
3170 	case STMF_ACK_LU_OFFLINE_COMPLETE:
3171 		sl->sl_state_not_acked = 0;
3172 		break;
3173 
3174 	}
3175 }
3176 
3177 /* ARGSUSED */
3178 stmf_status_t
3179 sbd_info(uint32_t cmd, stmf_lu_t *lu, void *arg, uint8_t *buf,
3180     uint32_t *bufsizep)
3181 {
3182 	return (STMF_NOT_SUPPORTED);
3183 }
3184 
3185 stmf_status_t
3186 sbd_lu_reset_state(stmf_lu_t *lu)
3187 {
3188 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3189 
3190 	mutex_enter(&sl->sl_lock);
3191 	if (sl->sl_flags & SL_SAVED_WRITE_CACHE_DISABLE) {
3192 		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
3193 		mutex_exit(&sl->sl_lock);
3194 		if (sl->sl_access_state == SBD_LU_ACTIVE) {
3195 			(void) sbd_wcd_set(1, sl);
3196 		}
3197 	} else {
3198 		sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
3199 		mutex_exit(&sl->sl_lock);
3200 		if (sl->sl_access_state == SBD_LU_ACTIVE) {
3201 			(void) sbd_wcd_set(0, sl);
3202 		}
3203 	}
3204 	sbd_pgr_reset(sl);
3205 	sbd_check_and_clear_scsi2_reservation(sl, NULL);
3206 	if (stmf_deregister_all_lu_itl_handles(lu) != STMF_SUCCESS) {
3207 		return (STMF_FAILURE);
3208 	}
3209 	return (STMF_SUCCESS);
3210 }
3211 
3212 sbd_status_t
3213 sbd_flush_data_cache(sbd_lu_t *sl, int fsync_done)
3214 {
3215 	int r = 0;
3216 	int ret;
3217 
3218 	if (fsync_done)
3219 		goto over_fsync;
3220 	if ((sl->sl_data_vtype == VREG) || (sl->sl_data_vtype == VBLK)) {
3221 		if (VOP_FSYNC(sl->sl_data_vp, FSYNC, kcred, NULL))
3222 			return (SBD_FAILURE);
3223 	}
3224 over_fsync:
3225 	if (((sl->sl_data_vtype == VCHR) || (sl->sl_data_vtype == VBLK)) &&
3226 	    ((sl->sl_flags & SL_NO_DATA_DKIOFLUSH) == 0)) {
3227 		ret = VOP_IOCTL(sl->sl_data_vp, DKIOCFLUSHWRITECACHE, NULL,
3228 		    FKIOCTL, kcred, &r, NULL);
3229 		if ((ret == ENOTTY) || (ret == ENOTSUP)) {
3230 			mutex_enter(&sl->sl_lock);
3231 			sl->sl_flags |= SL_NO_DATA_DKIOFLUSH;
3232 			mutex_exit(&sl->sl_lock);
3233 		} else if (ret != 0) {
3234 			return (SBD_FAILURE);
3235 		}
3236 	}
3237 
3238 	return (SBD_SUCCESS);
3239 }
3240 
3241 /* ARGSUSED */
3242 static void
3243 sbd_handle_sync_cache(struct scsi_task *task,
3244     struct stmf_data_buf *initial_dbuf)
3245 {
3246 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3247 	uint64_t	lba, laddr;
3248 	sbd_status_t	sret;
3249 	uint32_t	len;
3250 	int		is_g4 = 0;
3251 	int		immed;
3252 
3253 	task->task_cmd_xfer_length = 0;
3254 	/*
3255 	 * Determine if this is a 10 or 16 byte CDB
3256 	 */
3257 
3258 	if (task->task_cdb[0] == SCMD_SYNCHRONIZE_CACHE_G4)
3259 		is_g4 = 1;
3260 
3261 	/*
3262 	 * Determine other requested parameters
3263 	 *
3264 	 * We don't have a non-volatile cache, so don't care about SYNC_NV.
3265 	 * Do not support the IMMED bit.
3266 	 */
3267 
3268 	immed = (task->task_cdb[1] & 0x02);
3269 
3270 	if (immed) {
3271 		stmf_scsilib_send_status(task, STATUS_CHECK,
3272 		    STMF_SAA_INVALID_FIELD_IN_CDB);
3273 		return;
3274 	}
3275 
3276 	/*
3277 	 * Check to be sure we're not being asked to sync an LBA
3278 	 * that is out of range.  While checking, verify reserved fields.
3279 	 */
3280 
3281 	if (is_g4) {
3282 		if ((task->task_cdb[1] & 0xf9) || task->task_cdb[14] ||
3283 		    task->task_cdb[15]) {
3284 			stmf_scsilib_send_status(task, STATUS_CHECK,
3285 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3286 			return;
3287 		}
3288 
3289 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
3290 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
3291 	} else {
3292 		if ((task->task_cdb[1] & 0xf9) || task->task_cdb[6] ||
3293 		    task->task_cdb[9]) {
3294 			stmf_scsilib_send_status(task, STATUS_CHECK,
3295 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3296 			return;
3297 		}
3298 
3299 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
3300 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
3301 	}
3302 
3303 	laddr = lba << sl->sl_data_blocksize_shift;
3304 	len <<= sl->sl_data_blocksize_shift;
3305 
3306 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
3307 		stmf_scsilib_send_status(task, STATUS_CHECK,
3308 		    STMF_SAA_LBA_OUT_OF_RANGE);
3309 		return;
3310 	}
3311 
3312 	sret = sbd_flush_data_cache(sl, 0);
3313 	if (sret != SBD_SUCCESS) {
3314 		stmf_scsilib_send_status(task, STATUS_CHECK,
3315 		    STMF_SAA_WRITE_ERROR);
3316 		return;
3317 	}
3318 
3319 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3320 }
3321