xref: /illumos-gate/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_scsi.c (revision f012ee0c3db17469b492c2cf757226f3d7b1ebbc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright (c) 2013 by Delphix. All rights reserved.
25  */
26 
27 #include <sys/conf.h>
28 #include <sys/file.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/modctl.h>
32 #include <sys/scsi/scsi.h>
33 #include <sys/scsi/impl/scsi_reset_notify.h>
34 #include <sys/scsi/generic/mode.h>
35 #include <sys/disp.h>
36 #include <sys/byteorder.h>
37 #include <sys/atomic.h>
38 #include <sys/sdt.h>
39 #include <sys/dkio.h>
40 
41 #include <sys/stmf.h>
42 #include <sys/lpif.h>
43 #include <sys/portif.h>
44 #include <sys/stmf_ioctl.h>
45 #include <sys/stmf_sbd_ioctl.h>
46 
47 #include "stmf_sbd.h"
48 #include "sbd_impl.h"
49 
50 #define	SCSI2_CONFLICT_FREE_CMDS(cdb)	( \
51 	/* ----------------------- */                                      \
52 	/* Refer Both		   */                                      \
53 	/* SPC-2 (rev 20) Table 10 */                                      \
54 	/* SPC-3 (rev 23) Table 31 */                                      \
55 	/* ----------------------- */                                      \
56 	((cdb[0]) == SCMD_INQUIRY)					|| \
57 	((cdb[0]) == SCMD_LOG_SENSE_G1)					|| \
58 	((cdb[0]) == SCMD_RELEASE)					|| \
59 	((cdb[0]) == SCMD_RELEASE_G1)					|| \
60 	((cdb[0]) == SCMD_REPORT_LUNS)					|| \
61 	((cdb[0]) == SCMD_REQUEST_SENSE)				|| \
62 	/* PREVENT ALLOW MEDIUM REMOVAL with prevent == 0 */               \
63 	((((cdb[0]) == SCMD_DOORLOCK) && (((cdb[4]) & 0x3) == 0)))	|| \
64 	/* SERVICE ACTION IN with READ MEDIA SERIAL NUMBER (0x01) */       \
65 	(((cdb[0]) == SCMD_SVC_ACTION_IN_G5) && (                          \
66 	    ((cdb[1]) & 0x1F) == 0x01))					|| \
67 	/* MAINTENANCE IN with service actions REPORT ALIASES (0x0Bh) */   \
68 	/* REPORT DEVICE IDENTIFIER (0x05)  REPORT PRIORITY (0x0Eh) */     \
69 	/* REPORT TARGET PORT GROUPS (0x0A) REPORT TIMESTAMP (0x0F) */     \
70 	(((cdb[0]) == SCMD_MAINTENANCE_IN) && (                            \
71 	    (((cdb[1]) & 0x1F) == 0x0B) ||                                 \
72 	    (((cdb[1]) & 0x1F) == 0x05) ||                                 \
73 	    (((cdb[1]) & 0x1F) == 0x0E) ||                                 \
74 	    (((cdb[1]) & 0x1F) == 0x0A) ||                                 \
75 	    (((cdb[1]) & 0x1F) == 0x0F)))				|| \
76 	/* ----------------------- */                                      \
77 	/* SBC-3 (rev 17) Table 3  */                                      \
78 	/* ----------------------- */                                      \
79 	/* READ CAPACITY(10) */                                            \
80 	((cdb[0]) == SCMD_READ_CAPACITY)				|| \
81 	/* READ CAPACITY(16) */                                            \
82 	(((cdb[0]) == SCMD_SVC_ACTION_IN_G4) && (                          \
83 	    ((cdb[1]) & 0x1F) == 0x10))					|| \
84 	/* START STOP UNIT with START bit 0 and POWER CONDITION 0  */      \
85 	(((cdb[0]) == SCMD_START_STOP) && (                                \
86 	    (((cdb[4]) & 0xF0) == 0) && (((cdb[4]) & 0x01) == 0))))
87 /* End of SCSI2_CONFLICT_FREE_CMDS */
88 
89 stmf_status_t sbd_lu_reset_state(stmf_lu_t *lu);
90 static void sbd_handle_sync_cache(struct scsi_task *task,
91     struct stmf_data_buf *initial_dbuf);
92 void sbd_handle_read_xfer_completion(struct scsi_task *task,
93     sbd_cmd_t *scmd, struct stmf_data_buf *dbuf);
94 void sbd_handle_short_write_xfer_completion(scsi_task_t *task,
95     stmf_data_buf_t *dbuf);
96 void sbd_handle_short_write_transfers(scsi_task_t *task,
97     stmf_data_buf_t *dbuf, uint32_t cdb_xfer_size);
98 void sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf,
99     uint32_t buflen);
100 void sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf);
101 void sbd_handle_identifying_info(scsi_task_t *task, stmf_data_buf_t *dbuf);
102 
103 static void sbd_handle_unmap_xfer(scsi_task_t *task, uint8_t *buf,
104     uint32_t buflen);
105 static void sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf);
106 
107 extern void sbd_pgr_initialize_it(scsi_task_t *, sbd_it_data_t *);
108 extern int sbd_pgr_reservation_conflict(scsi_task_t *);
109 extern void sbd_pgr_reset(sbd_lu_t *);
110 extern void sbd_pgr_remove_it_handle(sbd_lu_t *, sbd_it_data_t *);
111 extern void sbd_handle_pgr_in_cmd(scsi_task_t *, stmf_data_buf_t *);
112 extern void sbd_handle_pgr_out_cmd(scsi_task_t *, stmf_data_buf_t *);
113 extern void sbd_handle_pgr_out_data(scsi_task_t *, stmf_data_buf_t *);
114 void sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
115     int first_xfer);
116 static void sbd_handle_write_same(scsi_task_t *task,
117     struct stmf_data_buf *initial_dbuf);
118 static void sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
119     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable);
120 static void sbd_handle_write_same_xfer_completion(struct scsi_task *task,
121     sbd_cmd_t *scmd, struct stmf_data_buf *dbuf, uint8_t dbuf_reusable);
122 /*
123  * IMPORTANT NOTE:
124  * =================
125  * The whole world here is based on the assumption that everything within
126  * a scsi task executes in a single threaded manner, even the aborts.
127  * Dont ever change that. There wont be any performance gain but there
128  * will be tons of race conditions.
129  */
130 
131 void
132 sbd_do_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
133 					struct stmf_data_buf *dbuf)
134 {
135 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
136 	uint64_t laddr;
137 	uint32_t len, buflen, iolen;
138 	int ndx;
139 	int bufs_to_take;
140 
141 	/* Lets try not to hog all the buffers the port has. */
142 	bufs_to_take = ((task->task_max_nbufs > 2) &&
143 	    (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
144 	    task->task_max_nbufs;
145 
146 	len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size : scmd->len;
147 	laddr = scmd->addr + scmd->current_ro;
148 
149 	for (buflen = 0, ndx = 0; (buflen < len) &&
150 	    (ndx < dbuf->db_sglist_length); ndx++) {
151 		iolen = min(len - buflen, dbuf->db_sglist[ndx].seg_length);
152 		if (iolen == 0)
153 			break;
154 		if (sbd_data_read(sl, task, laddr, (uint64_t)iolen,
155 		    dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
156 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
157 			/* Do not need to do xfer anymore, just complete it */
158 			dbuf->db_data_size = 0;
159 			dbuf->db_xfer_status = STMF_SUCCESS;
160 			sbd_handle_read_xfer_completion(task, scmd, dbuf);
161 			return;
162 		}
163 		buflen += iolen;
164 		laddr += (uint64_t)iolen;
165 	}
166 	dbuf->db_relative_offset = scmd->current_ro;
167 	dbuf->db_data_size = buflen;
168 	dbuf->db_flags = DB_DIRECTION_TO_RPORT;
169 	(void) stmf_xfer_data(task, dbuf, 0);
170 	scmd->len -= buflen;
171 	scmd->current_ro += buflen;
172 	if (scmd->len && (scmd->nbufs < bufs_to_take)) {
173 		uint32_t maxsize, minsize, old_minsize;
174 
175 		maxsize = (scmd->len > (128*1024)) ? 128*1024 : scmd->len;
176 		minsize = maxsize >> 2;
177 		do {
178 			/*
179 			 * A bad port implementation can keep on failing the
180 			 * the request but keep on sending us a false
181 			 * minsize.
182 			 */
183 			old_minsize = minsize;
184 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
185 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
186 		    (minsize >= 512));
187 		if (dbuf == NULL) {
188 			return;
189 		}
190 		scmd->nbufs++;
191 		sbd_do_read_xfer(task, scmd, dbuf);
192 	}
193 }
194 
195 /*
196  * sbd_zcopy: Bail-out switch for reduced copy path.
197  *
198  * 0 - read & write off
199  * 1 - read & write on
200  * 2 - only read on
201  * 4 - only write on
202  */
203 int sbd_zcopy = 1;	/* enable zcopy read & write path */
204 uint32_t sbd_max_xfer_len = 0;		/* Valid if non-zero */
205 uint32_t sbd_1st_xfer_len = 0;		/* Valid if non-zero */
206 uint32_t sbd_copy_threshold = 0;		/* Valid if non-zero */
207 
208 static void
209 sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
210 {
211 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
212 	sbd_zvol_io_t *zvio;
213 	int ret, final_xfer;
214 	uint64_t offset;
215 	uint32_t xfer_len, max_len, first_len;
216 	stmf_status_t xstat;
217 	stmf_data_buf_t *dbuf;
218 	uint_t nblks;
219 	uint64_t blksize = sl->sl_blksize;
220 	size_t db_private_sz;
221 	uintptr_t pad;
222 
223 	ASSERT(rw_read_held(&sl->sl_access_state_lock));
224 	ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
225 
226 	/*
227 	 * Calculate the limits on xfer_len to the minimum of :
228 	 *    - task limit
229 	 *    - lun limit
230 	 *    - sbd global limit if set
231 	 *    - first xfer limit if set
232 	 *
233 	 * First, protect against silly over-ride value
234 	 */
235 	if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
236 		cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
237 		    sbd_max_xfer_len);
238 		sbd_max_xfer_len = 0;
239 	}
240 	if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
241 		cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
242 		    sbd_1st_xfer_len);
243 		sbd_1st_xfer_len = 0;
244 	}
245 
246 	max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
247 	if (sbd_max_xfer_len)
248 		max_len = MIN(max_len, sbd_max_xfer_len);
249 	/*
250 	 * Special case the first xfer if hints are set.
251 	 */
252 	if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
253 		/* global over-ride has precedence */
254 		if (sbd_1st_xfer_len)
255 			first_len = sbd_1st_xfer_len;
256 		else
257 			first_len = task->task_1st_xfer_len;
258 	} else {
259 		first_len = 0;
260 	}
261 
262 	while (scmd->len && scmd->nbufs < task->task_max_nbufs) {
263 
264 		xfer_len = MIN(max_len, scmd->len);
265 		if (first_len) {
266 			xfer_len = MIN(xfer_len, first_len);
267 			first_len = 0;
268 		}
269 		if (scmd->len == xfer_len) {
270 			final_xfer = 1;
271 		} else {
272 			/*
273 			 * Attempt to end xfer on a block boundary.
274 			 * The only way this does not happen is if the
275 			 * xfer_len is small enough to stay contained
276 			 * within the same block.
277 			 */
278 			uint64_t xfer_offset, xfer_aligned_end;
279 
280 			final_xfer = 0;
281 			xfer_offset = scmd->addr + scmd->current_ro;
282 			xfer_aligned_end =
283 			    P2ALIGN(xfer_offset+xfer_len, blksize);
284 			if (xfer_aligned_end > xfer_offset)
285 				xfer_len = xfer_aligned_end - xfer_offset;
286 		}
287 		/*
288 		 * Allocate object to track the read and reserve
289 		 * enough space for scatter/gather list.
290 		 */
291 		offset = scmd->addr + scmd->current_ro;
292 		nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
293 
294 		db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
295 		    (nblks * sizeof (stmf_sglist_ent_t));
296 		dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
297 		    AF_DONTZERO);
298 		/*
299 		 * Setup the dbuf
300 		 *
301 		 * XXX Framework does not handle variable length sglists
302 		 * properly, so setup db_lu_private and db_port_private
303 		 * fields here. db_stmf_private is properly set for
304 		 * calls to stmf_free.
305 		 */
306 		if (dbuf->db_port_private == NULL) {
307 			/*
308 			 * XXX Framework assigns space to PP after db_sglist[0]
309 			 */
310 			cmn_err(CE_PANIC, "db_port_private == NULL");
311 		}
312 		pad = (uintptr_t)&dbuf->db_sglist[nblks];
313 		dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
314 		dbuf->db_port_private = NULL;
315 		dbuf->db_buf_size = xfer_len;
316 		dbuf->db_data_size = xfer_len;
317 		dbuf->db_relative_offset = scmd->current_ro;
318 		dbuf->db_sglist_length = (uint16_t)nblks;
319 		dbuf->db_xfer_status = 0;
320 		dbuf->db_handle = 0;
321 
322 		dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
323 		    DB_DIRECTION_TO_RPORT | DB_LU_DATA_BUF);
324 		if (final_xfer)
325 			dbuf->db_flags |= DB_SEND_STATUS_GOOD;
326 
327 		zvio = dbuf->db_lu_private;
328 		/* Need absolute offset for zvol access */
329 		zvio->zvio_offset = offset;
330 		zvio->zvio_flags = ZVIO_SYNC;
331 
332 		/*
333 		 * Accounting for start of read.
334 		 * Note there is no buffer address for the probe yet.
335 		 */
336 		DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
337 		    uint8_t *, NULL, uint64_t, xfer_len,
338 		    uint64_t, offset, scsi_task_t *, task);
339 
340 		ret = sbd_zvol_alloc_read_bufs(sl, dbuf);
341 
342 		DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
343 		    uint8_t *, NULL, uint64_t, xfer_len,
344 		    uint64_t, offset, int, ret, scsi_task_t *, task);
345 
346 		if (ret != 0) {
347 			/*
348 			 * Read failure from the backend.
349 			 */
350 			stmf_free(dbuf);
351 			if (scmd->nbufs == 0) {
352 				/* nothing queued, just finish */
353 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
354 				stmf_scsilib_send_status(task, STATUS_CHECK,
355 				    STMF_SAA_READ_ERROR);
356 				rw_exit(&sl->sl_access_state_lock);
357 			} else {
358 				/* process failure when other dbufs finish */
359 				scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
360 			}
361 			return;
362 		}
363 
364 
365 		/*
366 		 * Allow PP to do setup
367 		 */
368 		xstat = stmf_setup_dbuf(task, dbuf, 0);
369 		if (xstat != STMF_SUCCESS) {
370 			/*
371 			 * This could happen if the driver cannot get the
372 			 * DDI resources it needs for this request.
373 			 * If other dbufs are queued, try again when the next
374 			 * one completes, otherwise give up.
375 			 */
376 			sbd_zvol_rele_read_bufs(sl, dbuf);
377 			stmf_free(dbuf);
378 			if (scmd->nbufs > 0) {
379 				/* completion of previous dbuf will retry */
380 				return;
381 			}
382 			/*
383 			 * Done with this command.
384 			 */
385 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
386 			if (first_xfer)
387 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
388 			else
389 				stmf_scsilib_send_status(task, STATUS_CHECK,
390 				    STMF_SAA_READ_ERROR);
391 			rw_exit(&sl->sl_access_state_lock);
392 			return;
393 		}
394 		/*
395 		 * dbuf is now queued on task
396 		 */
397 		scmd->nbufs++;
398 
399 		/* XXX leave this in for FW? */
400 		DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
401 		    struct stmf_data_buf *, dbuf, uint64_t, offset,
402 		    uint32_t, xfer_len);
403 		/*
404 		 * Do not pass STMF_IOF_LU_DONE so that the zvol
405 		 * state can be released in the completion callback.
406 		 */
407 		xstat = stmf_xfer_data(task, dbuf, 0);
408 		switch (xstat) {
409 		case STMF_SUCCESS:
410 			break;
411 		case STMF_BUSY:
412 			/*
413 			 * The dbuf is queued on the task, but unknown
414 			 * to the PP, thus no completion will occur.
415 			 */
416 			sbd_zvol_rele_read_bufs(sl, dbuf);
417 			stmf_teardown_dbuf(task, dbuf);
418 			stmf_free(dbuf);
419 			scmd->nbufs--;
420 			if (scmd->nbufs > 0) {
421 				/* completion of previous dbuf will retry */
422 				return;
423 			}
424 			/*
425 			 * Done with this command.
426 			 */
427 			rw_exit(&sl->sl_access_state_lock);
428 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
429 			if (first_xfer)
430 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
431 			else
432 				stmf_scsilib_send_status(task, STATUS_CHECK,
433 				    STMF_SAA_READ_ERROR);
434 			return;
435 		case STMF_ABORTED:
436 			/*
437 			 * Completion from task_done will cleanup
438 			 */
439 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
440 			return;
441 		}
442 		/*
443 		 * Update the xfer progress.
444 		 */
445 		ASSERT(scmd->len >= xfer_len);
446 		scmd->len -= xfer_len;
447 		scmd->current_ro += xfer_len;
448 	}
449 }
450 
451 void
452 sbd_handle_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
453 				struct stmf_data_buf *dbuf)
454 {
455 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
456 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
457 		    dbuf->db_xfer_status, NULL);
458 		return;
459 	}
460 	task->task_nbytes_transferred += dbuf->db_data_size;
461 	if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
462 		stmf_free_dbuf(task, dbuf);
463 		scmd->nbufs--;
464 		if (scmd->nbufs)
465 			return;	/* wait for all buffers to complete */
466 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
467 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL)
468 			stmf_scsilib_send_status(task, STATUS_CHECK,
469 			    STMF_SAA_READ_ERROR);
470 		else
471 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
472 		return;
473 	}
474 	if (dbuf->db_flags & DB_DONT_REUSE) {
475 		/* allocate new dbuf */
476 		uint32_t maxsize, minsize, old_minsize;
477 		stmf_free_dbuf(task, dbuf);
478 
479 		maxsize = (scmd->len > (128*1024)) ? 128*1024 : scmd->len;
480 		minsize = maxsize >> 2;
481 		do {
482 			old_minsize = minsize;
483 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
484 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
485 		    (minsize >= 512));
486 		if (dbuf == NULL) {
487 			scmd->nbufs --;
488 			if (scmd->nbufs == 0) {
489 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
490 				    STMF_ALLOC_FAILURE, NULL);
491 			}
492 			return;
493 		}
494 	}
495 	sbd_do_read_xfer(task, scmd, dbuf);
496 }
497 
498 /*
499  * This routine must release the DMU resources and free the dbuf
500  * in all cases.  If this is the final dbuf of the task, then drop
501  * the reader lock on the LU state. If there are no errors and more
502  * work to do, then queue more xfer operations.
503  */
504 void
505 sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
506 				struct stmf_data_buf *dbuf)
507 {
508 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
509 	stmf_status_t xfer_status;
510 	uint32_t data_size;
511 	int scmd_err;
512 
513 	ASSERT(dbuf->db_lu_private);
514 	ASSERT(scmd->cmd_type == SBD_CMD_SCSI_READ);
515 
516 	scmd->nbufs--;	/* account for this dbuf */
517 	/*
518 	 * Release the DMU resources.
519 	 */
520 	sbd_zvol_rele_read_bufs(sl, dbuf);
521 	/*
522 	 * Release the dbuf after retrieving needed fields.
523 	 */
524 	xfer_status = dbuf->db_xfer_status;
525 	data_size = dbuf->db_data_size;
526 	stmf_teardown_dbuf(task, dbuf);
527 	stmf_free(dbuf);
528 	/*
529 	 * Release the state lock if this is the last completion.
530 	 * If this is the last dbuf on task and all data has been
531 	 * transferred or an error encountered, then no more dbufs
532 	 * will be queued.
533 	 */
534 	scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
535 	    (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
536 	    (xfer_status != STMF_SUCCESS));
537 	if (scmd->nbufs == 0 && (scmd->len == 0 || scmd_err)) {
538 		/* all DMU state has been released */
539 		rw_exit(&sl->sl_access_state_lock);
540 	}
541 
542 	/*
543 	 * If there have been no errors, either complete the task
544 	 * or issue more data xfer operations.
545 	 */
546 	if (!scmd_err) {
547 		/*
548 		 * This chunk completed successfully
549 		 */
550 		task->task_nbytes_transferred += data_size;
551 		if (scmd->nbufs == 0 && scmd->len == 0) {
552 			/*
553 			 * This command completed successfully
554 			 *
555 			 * Status was sent along with data, so no status
556 			 * completion will occur. Tell stmf we are done.
557 			 */
558 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
559 			stmf_task_lu_done(task);
560 			return;
561 		}
562 		/*
563 		 * Start more xfers
564 		 */
565 		sbd_do_sgl_read_xfer(task, scmd, 0);
566 		return;
567 	}
568 	/*
569 	 * Sort out the failure
570 	 */
571 	if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
572 		/*
573 		 * If a previous error occurred, leave the command active
574 		 * and wait for the last completion to send the status check.
575 		 */
576 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
577 			if (scmd->nbufs == 0) {
578 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
579 				stmf_scsilib_send_status(task, STATUS_CHECK,
580 				    STMF_SAA_READ_ERROR);
581 			}
582 			return;
583 		}
584 		/*
585 		 * Must have been a failure on current dbuf
586 		 */
587 		ASSERT(xfer_status != STMF_SUCCESS);
588 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
589 		stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
590 	}
591 }
592 
593 void
594 sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
595 				struct stmf_data_buf *dbuf)
596 {
597 	sbd_zvol_io_t *zvio = dbuf->db_lu_private;
598 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
599 	int ret;
600 	int scmd_err, scmd_xfer_done;
601 	stmf_status_t xfer_status = dbuf->db_xfer_status;
602 	uint32_t data_size = dbuf->db_data_size;
603 
604 	ASSERT(zvio);
605 
606 	/*
607 	 * Allow PP to free up resources before releasing the write bufs
608 	 * as writing to the backend could take some time.
609 	 */
610 	stmf_teardown_dbuf(task, dbuf);
611 
612 	scmd->nbufs--;	/* account for this dbuf */
613 	/*
614 	 * All data was queued and this is the last completion,
615 	 * but there could still be an error.
616 	 */
617 	scmd_xfer_done = (scmd->len == 0 && scmd->nbufs == 0);
618 	scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
619 	    (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
620 	    (xfer_status != STMF_SUCCESS));
621 
622 	DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
623 	    uint8_t *, NULL, uint64_t, data_size,
624 	    uint64_t, zvio->zvio_offset, scsi_task_t *, task);
625 
626 	if (scmd_err) {
627 		/* just return the write buffers */
628 		sbd_zvol_rele_write_bufs_abort(sl, dbuf);
629 		ret = 0;
630 	} else {
631 		if (scmd_xfer_done)
632 			zvio->zvio_flags = ZVIO_COMMIT;
633 		else
634 			zvio->zvio_flags = 0;
635 		/* write the data */
636 		ret = sbd_zvol_rele_write_bufs(sl, dbuf);
637 	}
638 
639 	DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
640 	    uint8_t *, NULL, uint64_t, data_size,
641 	    uint64_t, zvio->zvio_offset, int, ret,  scsi_task_t *, task);
642 
643 	if (ret != 0) {
644 		/* update the error flag */
645 		scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
646 		scmd_err = 1;
647 	}
648 
649 	/* Release the dbuf */
650 	stmf_free(dbuf);
651 
652 	/*
653 	 * Release the state lock if this is the last completion.
654 	 * If this is the last dbuf on task and all data has been
655 	 * transferred or an error encountered, then no more dbufs
656 	 * will be queued.
657 	 */
658 	if (scmd->nbufs == 0 && (scmd->len == 0 || scmd_err)) {
659 		/* all DMU state has been released */
660 		rw_exit(&sl->sl_access_state_lock);
661 	}
662 	/*
663 	 * If there have been no errors, either complete the task
664 	 * or issue more data xfer operations.
665 	 */
666 	if (!scmd_err) {
667 		/* This chunk completed successfully */
668 		task->task_nbytes_transferred += data_size;
669 		if (scmd_xfer_done) {
670 			/* This command completed successfully */
671 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
672 			if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
673 			    (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
674 				stmf_scsilib_send_status(task, STATUS_CHECK,
675 				    STMF_SAA_WRITE_ERROR);
676 			} else {
677 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
678 			}
679 			return;
680 		}
681 		/*
682 		 * Start more xfers
683 		 */
684 		sbd_do_sgl_write_xfer(task, scmd, 0);
685 		return;
686 	}
687 	/*
688 	 * Sort out the failure
689 	 */
690 	if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
691 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
692 			if (scmd->nbufs == 0) {
693 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
694 				stmf_scsilib_send_status(task, STATUS_CHECK,
695 				    STMF_SAA_WRITE_ERROR);
696 			}
697 			/*
698 			 * Leave the command active until last dbuf completes.
699 			 */
700 			return;
701 		}
702 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
703 		ASSERT(xfer_status != STMF_SUCCESS);
704 		stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
705 	}
706 }
707 
708 /*
709  * Handle a copy operation using the zvol interface.
710  *
711  * Similar to the sbd_data_read/write path, except it goes directly through
712  * the zvol interfaces. It can pass a port provider sglist in the
713  * form of uio which is lost through the vn_rdwr path.
714  *
715  * Returns:
716  *	STMF_SUCCESS - request handled
717  *	STMF_FAILURE - request not handled, caller must deal with error
718  */
719 static stmf_status_t
720 sbd_copy_rdwr(scsi_task_t *task, uint64_t laddr, stmf_data_buf_t *dbuf,
721     int cmd, int commit)
722 {
723 	sbd_lu_t		*sl = task->task_lu->lu_provider_private;
724 	struct uio		uio;
725 	struct iovec		*iov, *tiov, iov1[8];
726 	uint32_t		len, resid;
727 	int			ret, i, iovcnt, flags;
728 	boolean_t		is_read;
729 
730 	ASSERT(cmd == SBD_CMD_SCSI_READ || cmd == SBD_CMD_SCSI_WRITE);
731 
732 	is_read = (cmd == SBD_CMD_SCSI_READ) ? B_TRUE : B_FALSE;
733 	iovcnt = dbuf->db_sglist_length;
734 	/* use the stack for small iovecs */
735 	if (iovcnt > 8) {
736 		iov = kmem_alloc(iovcnt * sizeof (*iov), KM_SLEEP);
737 	} else {
738 		iov = &iov1[0];
739 	}
740 
741 	/* Convert dbuf sglist to iovec format */
742 	len = dbuf->db_data_size;
743 	resid = len;
744 	tiov = iov;
745 	for (i = 0; i < iovcnt; i++) {
746 		tiov->iov_base = (caddr_t)dbuf->db_sglist[i].seg_addr;
747 		tiov->iov_len = MIN(resid, dbuf->db_sglist[i].seg_length);
748 		resid -= tiov->iov_len;
749 		tiov++;
750 	}
751 	if (resid != 0) {
752 		cmn_err(CE_WARN, "inconsistant sglist rem %d", resid);
753 		if (iov != &iov1[0])
754 			kmem_free(iov, iovcnt * sizeof (*iov));
755 		return (STMF_FAILURE);
756 	}
757 	/* Setup the uio struct */
758 	uio.uio_iov = iov;
759 	uio.uio_iovcnt = iovcnt;
760 	uio.uio_loffset = laddr;
761 	uio.uio_segflg = (short)UIO_SYSSPACE;
762 	uio.uio_resid = (uint64_t)len;
763 	uio.uio_llimit = RLIM64_INFINITY;
764 
765 	if (is_read == B_TRUE) {
766 		uio.uio_fmode = FREAD;
767 		uio.uio_extflg = UIO_COPY_CACHED;
768 		DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
769 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
770 		    scsi_task_t *, task);
771 
772 		/* Fetch the data */
773 		ret = sbd_zvol_copy_read(sl, &uio);
774 
775 		DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
776 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
777 		    scsi_task_t *, task);
778 	} else {
779 		uio.uio_fmode = FWRITE;
780 		uio.uio_extflg = UIO_COPY_DEFAULT;
781 		DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
782 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
783 		    scsi_task_t *, task);
784 
785 		flags = (commit) ? ZVIO_COMMIT : 0;
786 		/* Write the data */
787 		ret = sbd_zvol_copy_write(sl, &uio, flags);
788 
789 		DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
790 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
791 		    scsi_task_t *, task);
792 	}
793 
794 	if (iov != &iov1[0])
795 		kmem_free(iov, iovcnt * sizeof (*iov));
796 	if (ret != 0) {
797 		/* Backend I/O error */
798 		return (STMF_FAILURE);
799 	}
800 	return (STMF_SUCCESS);
801 }
802 
803 void
804 sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
805 {
806 	uint64_t lba, laddr;
807 	uint32_t len;
808 	uint8_t op = task->task_cdb[0];
809 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
810 	sbd_cmd_t *scmd;
811 	stmf_data_buf_t *dbuf;
812 	int fast_path;
813 
814 	if (op == SCMD_READ) {
815 		lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
816 		len = (uint32_t)task->task_cdb[4];
817 
818 		if (len == 0) {
819 			len = 256;
820 		}
821 	} else if (op == SCMD_READ_G1) {
822 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
823 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
824 	} else if (op == SCMD_READ_G5) {
825 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
826 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
827 	} else if (op == SCMD_READ_G4) {
828 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
829 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
830 	} else {
831 		stmf_scsilib_send_status(task, STATUS_CHECK,
832 		    STMF_SAA_INVALID_OPCODE);
833 		return;
834 	}
835 
836 	laddr = lba << sl->sl_data_blocksize_shift;
837 	len <<= sl->sl_data_blocksize_shift;
838 
839 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
840 		stmf_scsilib_send_status(task, STATUS_CHECK,
841 		    STMF_SAA_LBA_OUT_OF_RANGE);
842 		return;
843 	}
844 
845 	task->task_cmd_xfer_length = len;
846 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
847 		task->task_expected_xfer_length = len;
848 	}
849 
850 	if (len != task->task_expected_xfer_length) {
851 		fast_path = 0;
852 		len = (len > task->task_expected_xfer_length) ?
853 		    task->task_expected_xfer_length : len;
854 	} else {
855 		fast_path = 1;
856 	}
857 
858 	if (len == 0) {
859 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
860 		return;
861 	}
862 
863 	/*
864 	 * Determine if this read can directly use DMU buffers.
865 	 */
866 	if (sbd_zcopy & (2|1) &&		/* Debug switch */
867 	    initial_dbuf == NULL &&		/* No PP buffer passed in */
868 	    sl->sl_flags & SL_CALL_ZVOL &&	/* zvol backing store */
869 	    (task->task_additional_flags &
870 	    TASK_AF_ACCEPT_LU_DBUF))		/* PP allows it */
871 	{
872 		/*
873 		 * Reduced copy path
874 		 */
875 		uint32_t copy_threshold, minsize;
876 		int ret;
877 
878 		/*
879 		 * The sl_access_state_lock will be held shared
880 		 * for the entire request and released when all
881 		 * dbufs have completed.
882 		 */
883 		rw_enter(&sl->sl_access_state_lock, RW_READER);
884 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
885 			rw_exit(&sl->sl_access_state_lock);
886 			stmf_scsilib_send_status(task, STATUS_CHECK,
887 			    STMF_SAA_READ_ERROR);
888 			return;
889 		}
890 
891 		/*
892 		 * Check if setup is more expensive than copying the data.
893 		 *
894 		 * Use the global over-ride sbd_zcopy_threshold if set.
895 		 */
896 		copy_threshold = (sbd_copy_threshold > 0) ?
897 		    sbd_copy_threshold : task->task_copy_threshold;
898 		minsize = len;
899 		if (len < copy_threshold &&
900 		    (dbuf = stmf_alloc_dbuf(task, len, &minsize, 0)) != 0) {
901 
902 			ret = sbd_copy_rdwr(task, laddr, dbuf,
903 			    SBD_CMD_SCSI_READ, 0);
904 			/* done with the backend */
905 			rw_exit(&sl->sl_access_state_lock);
906 			if (ret != 0) {
907 				/* backend error */
908 				stmf_scsilib_send_status(task, STATUS_CHECK,
909 				    STMF_SAA_READ_ERROR);
910 			} else {
911 				/* send along good data */
912 				dbuf->db_relative_offset = 0;
913 				dbuf->db_data_size = len;
914 				dbuf->db_flags = DB_SEND_STATUS_GOOD |
915 				    DB_DIRECTION_TO_RPORT;
916 				/* XXX keep for FW? */
917 				DTRACE_PROBE4(sbd__xfer,
918 				    struct scsi_task *, task,
919 				    struct stmf_data_buf *, dbuf,
920 				    uint64_t, laddr, uint32_t, len);
921 				(void) stmf_xfer_data(task, dbuf,
922 				    STMF_IOF_LU_DONE);
923 			}
924 			return;
925 		}
926 
927 		/* committed to reduced copy */
928 		if (task->task_lu_private) {
929 			scmd = (sbd_cmd_t *)task->task_lu_private;
930 		} else {
931 			scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
932 			    KM_SLEEP);
933 			task->task_lu_private = scmd;
934 		}
935 		/*
936 		 * Setup scmd to track read progress.
937 		 */
938 		scmd->flags = SBD_SCSI_CMD_ACTIVE;
939 		scmd->cmd_type = SBD_CMD_SCSI_READ;
940 		scmd->nbufs = 0;
941 		scmd->addr = laddr;
942 		scmd->len = len;
943 		scmd->current_ro = 0;
944 
945 		/*
946 		 * Kick-off the read.
947 		 */
948 		sbd_do_sgl_read_xfer(task, scmd, 1);
949 		return;
950 	}
951 
952 	if (initial_dbuf == NULL) {
953 		uint32_t maxsize, minsize, old_minsize;
954 
955 		maxsize = (len > (128*1024)) ? 128*1024 : len;
956 		minsize = maxsize >> 2;
957 		do {
958 			old_minsize = minsize;
959 			initial_dbuf = stmf_alloc_dbuf(task, maxsize,
960 			    &minsize, 0);
961 		} while ((initial_dbuf == NULL) && (old_minsize > minsize) &&
962 		    (minsize >= 512));
963 		if (initial_dbuf == NULL) {
964 			stmf_scsilib_send_status(task, STATUS_QFULL, 0);
965 			return;
966 		}
967 	}
968 	dbuf = initial_dbuf;
969 
970 	if ((dbuf->db_buf_size >= len) && fast_path &&
971 	    (dbuf->db_sglist_length == 1)) {
972 		if (sbd_data_read(sl, task, laddr, (uint64_t)len,
973 		    dbuf->db_sglist[0].seg_addr) == STMF_SUCCESS) {
974 			dbuf->db_relative_offset = 0;
975 			dbuf->db_data_size = len;
976 			dbuf->db_flags = DB_SEND_STATUS_GOOD |
977 			    DB_DIRECTION_TO_RPORT;
978 			/* XXX keep for FW? */
979 			DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
980 			    struct stmf_data_buf *, dbuf,
981 			    uint64_t, laddr, uint32_t, len);
982 			(void) stmf_xfer_data(task, dbuf, STMF_IOF_LU_DONE);
983 		} else {
984 			stmf_scsilib_send_status(task, STATUS_CHECK,
985 			    STMF_SAA_READ_ERROR);
986 		}
987 		return;
988 	}
989 
990 	if (task->task_lu_private) {
991 		scmd = (sbd_cmd_t *)task->task_lu_private;
992 	} else {
993 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
994 		task->task_lu_private = scmd;
995 	}
996 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
997 	scmd->cmd_type = SBD_CMD_SCSI_READ;
998 	scmd->nbufs = 1;
999 	scmd->addr = laddr;
1000 	scmd->len = len;
1001 	scmd->current_ro = 0;
1002 
1003 	sbd_do_read_xfer(task, scmd, dbuf);
1004 }
1005 
1006 void
1007 sbd_do_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
1008     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
1009 {
1010 	uint32_t len;
1011 	int bufs_to_take;
1012 
1013 	if (scmd->len == 0) {
1014 		goto DO_WRITE_XFER_DONE;
1015 	}
1016 
1017 	/* Lets try not to hog all the buffers the port has. */
1018 	bufs_to_take = ((task->task_max_nbufs > 2) &&
1019 	    (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
1020 	    task->task_max_nbufs;
1021 
1022 	if ((dbuf != NULL) &&
1023 	    ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
1024 		/* free current dbuf and allocate a new one */
1025 		stmf_free_dbuf(task, dbuf);
1026 		dbuf = NULL;
1027 	}
1028 	if (scmd->nbufs >= bufs_to_take) {
1029 		goto DO_WRITE_XFER_DONE;
1030 	}
1031 	if (dbuf == NULL) {
1032 		uint32_t maxsize, minsize, old_minsize;
1033 
1034 		maxsize = (scmd->len > (128*1024)) ? 128*1024 :
1035 		    scmd->len;
1036 		minsize = maxsize >> 2;
1037 		do {
1038 			old_minsize = minsize;
1039 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
1040 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
1041 		    (minsize >= 512));
1042 		if (dbuf == NULL) {
1043 			if (scmd->nbufs == 0) {
1044 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1045 				    STMF_ALLOC_FAILURE, NULL);
1046 			}
1047 			return;
1048 		}
1049 	}
1050 
1051 	len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size :
1052 	    scmd->len;
1053 
1054 	dbuf->db_relative_offset = scmd->current_ro;
1055 	dbuf->db_data_size = len;
1056 	dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
1057 	(void) stmf_xfer_data(task, dbuf, 0);
1058 	scmd->nbufs++; /* outstanding port xfers and bufs used */
1059 	scmd->len -= len;
1060 	scmd->current_ro += len;
1061 
1062 	if ((scmd->len != 0) && (scmd->nbufs < bufs_to_take)) {
1063 		sbd_do_write_xfer(task, scmd, NULL, 0);
1064 	}
1065 	return;
1066 
1067 DO_WRITE_XFER_DONE:
1068 	if (dbuf != NULL) {
1069 		stmf_free_dbuf(task, dbuf);
1070 	}
1071 }
1072 
1073 void
1074 sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
1075 {
1076 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1077 	sbd_zvol_io_t *zvio;
1078 	int ret;
1079 	uint32_t xfer_len, max_len, first_len;
1080 	stmf_status_t xstat;
1081 	stmf_data_buf_t *dbuf;
1082 	uint_t nblks;
1083 	uint64_t blksize = sl->sl_blksize;
1084 	uint64_t offset;
1085 	size_t db_private_sz;
1086 	uintptr_t pad;
1087 
1088 	ASSERT(rw_read_held(&sl->sl_access_state_lock));
1089 	ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
1090 
1091 	/*
1092 	 * Calculate the limits on xfer_len to the minimum of :
1093 	 *    - task limit
1094 	 *    - lun limit
1095 	 *    - sbd global limit if set
1096 	 *    - first xfer limit if set
1097 	 *
1098 	 * First, protect against silly over-ride value
1099 	 */
1100 	if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
1101 		cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
1102 		    sbd_max_xfer_len);
1103 		sbd_max_xfer_len = 0;
1104 	}
1105 	if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
1106 		cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
1107 		    sbd_1st_xfer_len);
1108 		sbd_1st_xfer_len = 0;
1109 	}
1110 
1111 	max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
1112 	if (sbd_max_xfer_len)
1113 		max_len = MIN(max_len, sbd_max_xfer_len);
1114 	/*
1115 	 * Special case the first xfer if hints are set.
1116 	 */
1117 	if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
1118 		/* global over-ride has precedence */
1119 		if (sbd_1st_xfer_len)
1120 			first_len = sbd_1st_xfer_len;
1121 		else
1122 			first_len = task->task_1st_xfer_len;
1123 	} else {
1124 		first_len = 0;
1125 	}
1126 
1127 
1128 	while (scmd->len && scmd->nbufs < task->task_max_nbufs) {
1129 
1130 		xfer_len = MIN(max_len, scmd->len);
1131 		if (first_len) {
1132 			xfer_len = MIN(xfer_len, first_len);
1133 			first_len = 0;
1134 		}
1135 		if (xfer_len < scmd->len) {
1136 			/*
1137 			 * Attempt to end xfer on a block boundary.
1138 			 * The only way this does not happen is if the
1139 			 * xfer_len is small enough to stay contained
1140 			 * within the same block.
1141 			 */
1142 			uint64_t xfer_offset, xfer_aligned_end;
1143 
1144 			xfer_offset = scmd->addr + scmd->current_ro;
1145 			xfer_aligned_end =
1146 			    P2ALIGN(xfer_offset+xfer_len, blksize);
1147 			if (xfer_aligned_end > xfer_offset)
1148 				xfer_len = xfer_aligned_end - xfer_offset;
1149 		}
1150 		/*
1151 		 * Allocate object to track the write and reserve
1152 		 * enough space for scatter/gather list.
1153 		 */
1154 		offset = scmd->addr + scmd->current_ro;
1155 		nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
1156 		db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
1157 		    (nblks * sizeof (stmf_sglist_ent_t));
1158 		dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
1159 		    AF_DONTZERO);
1160 
1161 		/*
1162 		 * Setup the dbuf
1163 		 *
1164 		 * XXX Framework does not handle variable length sglists
1165 		 * properly, so setup db_lu_private and db_port_private
1166 		 * fields here. db_stmf_private is properly set for
1167 		 * calls to stmf_free.
1168 		 */
1169 		if (dbuf->db_port_private == NULL) {
1170 			/*
1171 			 * XXX Framework assigns space to PP after db_sglist[0]
1172 			 */
1173 			cmn_err(CE_PANIC, "db_port_private == NULL");
1174 		}
1175 		pad = (uintptr_t)&dbuf->db_sglist[nblks];
1176 		dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
1177 		dbuf->db_port_private = NULL;
1178 		dbuf->db_buf_size = xfer_len;
1179 		dbuf->db_data_size = xfer_len;
1180 		dbuf->db_relative_offset = scmd->current_ro;
1181 		dbuf->db_sglist_length = (uint16_t)nblks;
1182 		dbuf->db_xfer_status = 0;
1183 		dbuf->db_handle = 0;
1184 		dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
1185 		    DB_DIRECTION_FROM_RPORT | DB_LU_DATA_BUF);
1186 
1187 		zvio = dbuf->db_lu_private;
1188 		zvio->zvio_offset = offset;
1189 
1190 		/* get the buffers */
1191 		ret = sbd_zvol_alloc_write_bufs(sl, dbuf);
1192 		if (ret != 0) {
1193 			/*
1194 			 * Could not allocate buffers from the backend;
1195 			 * treat it like an IO error.
1196 			 */
1197 			stmf_free(dbuf);
1198 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1199 			if (scmd->nbufs == 0) {
1200 				/*
1201 				 * Nothing queued, so no completions coming
1202 				 */
1203 				stmf_scsilib_send_status(task, STATUS_CHECK,
1204 				    STMF_SAA_WRITE_ERROR);
1205 				rw_exit(&sl->sl_access_state_lock);
1206 			}
1207 			/*
1208 			 * Completions of previous buffers will cleanup.
1209 			 */
1210 			return;
1211 		}
1212 
1213 		/*
1214 		 * Allow PP to do setup
1215 		 */
1216 		xstat = stmf_setup_dbuf(task, dbuf, 0);
1217 		if (xstat != STMF_SUCCESS) {
1218 			/*
1219 			 * This could happen if the driver cannot get the
1220 			 * DDI resources it needs for this request.
1221 			 * If other dbufs are queued, try again when the next
1222 			 * one completes, otherwise give up.
1223 			 */
1224 			sbd_zvol_rele_write_bufs_abort(sl, dbuf);
1225 			stmf_free(dbuf);
1226 			if (scmd->nbufs > 0) {
1227 				/* completion of previous dbuf will retry */
1228 				return;
1229 			}
1230 			/*
1231 			 * Done with this command.
1232 			 */
1233 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1234 			if (first_xfer)
1235 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1236 			else
1237 				stmf_scsilib_send_status(task, STATUS_CHECK,
1238 				    STMF_SAA_WRITE_ERROR);
1239 			rw_exit(&sl->sl_access_state_lock);
1240 			return;
1241 		}
1242 
1243 		/*
1244 		 * dbuf is now queued on task
1245 		 */
1246 		scmd->nbufs++;
1247 
1248 		xstat = stmf_xfer_data(task, dbuf, 0);
1249 		switch (xstat) {
1250 		case STMF_SUCCESS:
1251 			break;
1252 		case STMF_BUSY:
1253 			/*
1254 			 * The dbuf is queued on the task, but unknown
1255 			 * to the PP, thus no completion will occur.
1256 			 */
1257 			sbd_zvol_rele_write_bufs_abort(sl, dbuf);
1258 			stmf_teardown_dbuf(task, dbuf);
1259 			stmf_free(dbuf);
1260 			scmd->nbufs--;
1261 			if (scmd->nbufs > 0) {
1262 				/* completion of previous dbuf will retry */
1263 				return;
1264 			}
1265 			/*
1266 			 * Done with this command.
1267 			 */
1268 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1269 			if (first_xfer)
1270 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1271 			else
1272 				stmf_scsilib_send_status(task, STATUS_CHECK,
1273 				    STMF_SAA_WRITE_ERROR);
1274 			rw_exit(&sl->sl_access_state_lock);
1275 			return;
1276 		case STMF_ABORTED:
1277 			/*
1278 			 * Completion code will cleanup.
1279 			 */
1280 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1281 			return;
1282 		}
1283 		/*
1284 		 * Update the xfer progress.
1285 		 */
1286 		scmd->len -= xfer_len;
1287 		scmd->current_ro += xfer_len;
1288 	}
1289 }
1290 
1291 void
1292 sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
1293     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
1294 {
1295 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1296 	uint64_t laddr;
1297 	uint32_t buflen, iolen;
1298 	int ndx;
1299 
1300 	if (scmd->nbufs > 0) {
1301 		/*
1302 		 * Decrement the count to indicate the port xfer
1303 		 * into the dbuf has completed even though the buf is
1304 		 * still in use here in the LU provider.
1305 		 */
1306 		scmd->nbufs--;
1307 	}
1308 
1309 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
1310 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1311 		    dbuf->db_xfer_status, NULL);
1312 		return;
1313 	}
1314 
1315 	if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1316 		goto WRITE_XFER_DONE;
1317 	}
1318 
1319 	if (scmd->len != 0) {
1320 		/*
1321 		 * Initiate the next port xfer to occur in parallel
1322 		 * with writing this buf.
1323 		 */
1324 		sbd_do_write_xfer(task, scmd, NULL, 0);
1325 	}
1326 
1327 	laddr = scmd->addr + dbuf->db_relative_offset;
1328 
1329 	/*
1330 	 * If this is going to a zvol, use the direct call to
1331 	 * sbd_zvol_copy_{read,write}. The direct call interface is
1332 	 * restricted to PPs that accept sglists, but that is not required.
1333 	 */
1334 	if (sl->sl_flags & SL_CALL_ZVOL &&
1335 	    (task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF) &&
1336 	    (sbd_zcopy & (4|1))) {
1337 		int commit;
1338 
1339 		commit = (scmd->len == 0 && scmd->nbufs == 0);
1340 		if (sbd_copy_rdwr(task, laddr, dbuf, SBD_CMD_SCSI_WRITE,
1341 		    commit) != STMF_SUCCESS)
1342 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1343 		buflen = dbuf->db_data_size;
1344 	} else {
1345 		for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
1346 		    (ndx < dbuf->db_sglist_length); ndx++) {
1347 			iolen = min(dbuf->db_data_size - buflen,
1348 			    dbuf->db_sglist[ndx].seg_length);
1349 			if (iolen == 0)
1350 				break;
1351 			if (sbd_data_write(sl, task, laddr, (uint64_t)iolen,
1352 			    dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
1353 				scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1354 				break;
1355 			}
1356 			buflen += iolen;
1357 			laddr += (uint64_t)iolen;
1358 		}
1359 	}
1360 	task->task_nbytes_transferred += buflen;
1361 WRITE_XFER_DONE:
1362 	if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1363 		stmf_free_dbuf(task, dbuf);
1364 		if (scmd->nbufs)
1365 			return;	/* wait for all buffers to complete */
1366 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1367 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1368 			stmf_scsilib_send_status(task, STATUS_CHECK,
1369 			    STMF_SAA_WRITE_ERROR);
1370 		} else {
1371 			/*
1372 			 * If SYNC_WRITE flag is on then we need to flush
1373 			 * cache before sending status.
1374 			 * Note: this may be a no-op because of how
1375 			 * SL_WRITEBACK_CACHE_DISABLE and
1376 			 * SL_FLUSH_ON_DISABLED_WRITECACHE are set, but not
1377 			 * worth code complexity of checking those in this code
1378 			 * path, SBD_SCSI_CMD_SYNC_WRITE is rarely set.
1379 			 */
1380 			if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
1381 			    (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
1382 				stmf_scsilib_send_status(task, STATUS_CHECK,
1383 				    STMF_SAA_WRITE_ERROR);
1384 			} else {
1385 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1386 			}
1387 		}
1388 		return;
1389 	}
1390 	sbd_do_write_xfer(task, scmd, dbuf, dbuf_reusable);
1391 }
1392 
1393 /*
1394  * Return true if copy avoidance is beneficial.
1395  */
1396 static int
1397 sbd_zcopy_write_useful(scsi_task_t *task, uint64_t laddr, uint32_t len,
1398     uint64_t blksize)
1399 {
1400 	/*
1401 	 * If there is a global copy threshold over-ride, use it.
1402 	 * Otherwise use the PP value with the caveat that at least
1403 	 * 1/2 the data must avoid being copied to be useful.
1404 	 */
1405 	if (sbd_copy_threshold > 0) {
1406 		return (len >= sbd_copy_threshold);
1407 	} else {
1408 		uint64_t no_copy_span;
1409 
1410 		/* sub-blocksize writes always copy */
1411 		if (len < task->task_copy_threshold || len < blksize)
1412 			return (0);
1413 		/*
1414 		 * Calculate amount of data that will avoid the copy path.
1415 		 * The calculation is only valid if len >= blksize.
1416 		 */
1417 		no_copy_span = P2ALIGN(laddr+len, blksize) -
1418 		    P2ROUNDUP(laddr, blksize);
1419 		return (no_copy_span >= len/2);
1420 	}
1421 }
1422 
1423 void
1424 sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
1425 {
1426 	uint64_t lba, laddr;
1427 	uint32_t len;
1428 	uint8_t op = task->task_cdb[0], do_immediate_data = 0;
1429 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1430 	sbd_cmd_t *scmd;
1431 	stmf_data_buf_t *dbuf;
1432 	uint8_t	sync_wr_flag = 0;
1433 
1434 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
1435 		stmf_scsilib_send_status(task, STATUS_CHECK,
1436 		    STMF_SAA_WRITE_PROTECTED);
1437 		return;
1438 	}
1439 	if (op == SCMD_WRITE) {
1440 		lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
1441 		len = (uint32_t)task->task_cdb[4];
1442 
1443 		if (len == 0) {
1444 			len = 256;
1445 		}
1446 	} else if (op == SCMD_WRITE_G1) {
1447 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1448 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1449 	} else if (op == SCMD_WRITE_G5) {
1450 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1451 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
1452 	} else if (op == SCMD_WRITE_G4) {
1453 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
1454 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1455 	} else if (op == SCMD_WRITE_VERIFY) {
1456 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1457 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1458 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1459 	} else if (op == SCMD_WRITE_VERIFY_G5) {
1460 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1461 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
1462 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1463 	} else if (op == SCMD_WRITE_VERIFY_G4) {
1464 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
1465 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1466 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1467 	} else {
1468 		stmf_scsilib_send_status(task, STATUS_CHECK,
1469 		    STMF_SAA_INVALID_OPCODE);
1470 		return;
1471 	}
1472 
1473 	laddr = lba << sl->sl_data_blocksize_shift;
1474 	len <<= sl->sl_data_blocksize_shift;
1475 
1476 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
1477 		stmf_scsilib_send_status(task, STATUS_CHECK,
1478 		    STMF_SAA_LBA_OUT_OF_RANGE);
1479 		return;
1480 	}
1481 
1482 	task->task_cmd_xfer_length = len;
1483 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1484 		task->task_expected_xfer_length = len;
1485 	}
1486 
1487 	len = (len > task->task_expected_xfer_length) ?
1488 	    task->task_expected_xfer_length : len;
1489 
1490 	if (len == 0) {
1491 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1492 		return;
1493 	}
1494 
1495 	if (sbd_zcopy & (4|1) &&		/* Debug switch */
1496 	    initial_dbuf == NULL &&		/* No PP buf passed in */
1497 	    sl->sl_flags & SL_CALL_ZVOL &&	/* zvol backing store */
1498 	    (task->task_additional_flags &
1499 	    TASK_AF_ACCEPT_LU_DBUF) &&		/* PP allows it */
1500 	    sbd_zcopy_write_useful(task, laddr, len, sl->sl_blksize)) {
1501 
1502 		/*
1503 		 * XXX Note that disallowing initial_dbuf will eliminate
1504 		 * iSCSI from participating. For small writes, that is
1505 		 * probably ok. For large writes, it may be best to just
1506 		 * copy the data from the initial dbuf and use zcopy for
1507 		 * the rest.
1508 		 */
1509 		rw_enter(&sl->sl_access_state_lock, RW_READER);
1510 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
1511 			rw_exit(&sl->sl_access_state_lock);
1512 			stmf_scsilib_send_status(task, STATUS_CHECK,
1513 			    STMF_SAA_READ_ERROR);
1514 			return;
1515 		}
1516 		/*
1517 		 * Setup scmd to track the write progress.
1518 		 */
1519 		if (task->task_lu_private) {
1520 			scmd = (sbd_cmd_t *)task->task_lu_private;
1521 		} else {
1522 			scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
1523 			    KM_SLEEP);
1524 			task->task_lu_private = scmd;
1525 		}
1526 		scmd->flags = SBD_SCSI_CMD_ACTIVE | sync_wr_flag;
1527 		scmd->cmd_type = SBD_CMD_SCSI_WRITE;
1528 		scmd->nbufs = 0;
1529 		scmd->addr = laddr;
1530 		scmd->len = len;
1531 		scmd->current_ro = 0;
1532 		sbd_do_sgl_write_xfer(task, scmd, 1);
1533 		return;
1534 	}
1535 
1536 	if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
1537 		if (initial_dbuf->db_data_size > len) {
1538 			if (initial_dbuf->db_data_size >
1539 			    task->task_expected_xfer_length) {
1540 				/* protocol error */
1541 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1542 				    STMF_INVALID_ARG, NULL);
1543 				return;
1544 			}
1545 			initial_dbuf->db_data_size = len;
1546 		}
1547 		do_immediate_data = 1;
1548 	}
1549 	dbuf = initial_dbuf;
1550 
1551 	if (task->task_lu_private) {
1552 		scmd = (sbd_cmd_t *)task->task_lu_private;
1553 	} else {
1554 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1555 		task->task_lu_private = scmd;
1556 	}
1557 	scmd->flags = SBD_SCSI_CMD_ACTIVE | sync_wr_flag;
1558 	scmd->cmd_type = SBD_CMD_SCSI_WRITE;
1559 	scmd->nbufs = 0;
1560 	scmd->addr = laddr;
1561 	scmd->len = len;
1562 	scmd->current_ro = 0;
1563 
1564 	if (do_immediate_data) {
1565 		/*
1566 		 * Account for data passed in this write command
1567 		 */
1568 		(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
1569 		scmd->len -= dbuf->db_data_size;
1570 		scmd->current_ro += dbuf->db_data_size;
1571 		dbuf->db_xfer_status = STMF_SUCCESS;
1572 		sbd_handle_write_xfer_completion(task, scmd, dbuf, 0);
1573 	} else {
1574 		sbd_do_write_xfer(task, scmd, dbuf, 0);
1575 	}
1576 }
1577 
1578 /*
1579  * Utility routine to handle small non performance data transfers to the
1580  * initiators. dbuf is an initial data buf (if any), 'p' points to a data
1581  * buffer which is source of data for transfer, cdb_xfer_size is the
1582  * transfer size based on CDB, cmd_xfer_size is the actual amount of data
1583  * which this command would transfer (the size of data pointed to by 'p').
1584  */
1585 void
1586 sbd_handle_short_read_transfers(scsi_task_t *task, stmf_data_buf_t *dbuf,
1587     uint8_t *p, uint32_t cdb_xfer_size, uint32_t cmd_xfer_size)
1588 {
1589 	uint32_t bufsize, ndx;
1590 	sbd_cmd_t *scmd;
1591 
1592 	cmd_xfer_size = min(cmd_xfer_size, cdb_xfer_size);
1593 
1594 	task->task_cmd_xfer_length = cmd_xfer_size;
1595 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1596 		task->task_expected_xfer_length = cmd_xfer_size;
1597 	} else {
1598 		cmd_xfer_size = min(cmd_xfer_size,
1599 		    task->task_expected_xfer_length);
1600 	}
1601 
1602 	if (cmd_xfer_size == 0) {
1603 		stmf_scsilib_send_status(task, STATUS_CHECK,
1604 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1605 		return;
1606 	}
1607 	if (dbuf == NULL) {
1608 		uint32_t minsize = cmd_xfer_size;
1609 
1610 		dbuf = stmf_alloc_dbuf(task, cmd_xfer_size, &minsize, 0);
1611 	}
1612 	if (dbuf == NULL) {
1613 		stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1614 		return;
1615 	}
1616 
1617 	for (bufsize = 0, ndx = 0; bufsize < cmd_xfer_size; ndx++) {
1618 		uint8_t *d;
1619 		uint32_t s;
1620 
1621 		d = dbuf->db_sglist[ndx].seg_addr;
1622 		s = min((cmd_xfer_size - bufsize),
1623 		    dbuf->db_sglist[ndx].seg_length);
1624 		bcopy(p+bufsize, d, s);
1625 		bufsize += s;
1626 	}
1627 	dbuf->db_relative_offset = 0;
1628 	dbuf->db_data_size = cmd_xfer_size;
1629 	dbuf->db_flags = DB_DIRECTION_TO_RPORT;
1630 
1631 	if (task->task_lu_private == NULL) {
1632 		task->task_lu_private =
1633 		    kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1634 	}
1635 	scmd = (sbd_cmd_t *)task->task_lu_private;
1636 
1637 	scmd->cmd_type = SBD_CMD_SMALL_READ;
1638 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1639 	(void) stmf_xfer_data(task, dbuf, 0);
1640 }
1641 
1642 void
1643 sbd_handle_short_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
1644 				struct stmf_data_buf *dbuf)
1645 {
1646 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
1647 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1648 		    dbuf->db_xfer_status, NULL);
1649 		return;
1650 	}
1651 	task->task_nbytes_transferred = dbuf->db_data_size;
1652 	scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1653 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1654 }
1655 
1656 void
1657 sbd_handle_short_write_transfers(scsi_task_t *task,
1658     stmf_data_buf_t *dbuf, uint32_t cdb_xfer_size)
1659 {
1660 	sbd_cmd_t *scmd;
1661 
1662 	task->task_cmd_xfer_length = cdb_xfer_size;
1663 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1664 		task->task_expected_xfer_length = cdb_xfer_size;
1665 	} else {
1666 		cdb_xfer_size = min(cdb_xfer_size,
1667 		    task->task_expected_xfer_length);
1668 	}
1669 
1670 	if (cdb_xfer_size == 0) {
1671 		stmf_scsilib_send_status(task, STATUS_CHECK,
1672 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1673 		return;
1674 	}
1675 	if (task->task_lu_private == NULL) {
1676 		task->task_lu_private = kmem_zalloc(sizeof (sbd_cmd_t),
1677 		    KM_SLEEP);
1678 	} else {
1679 		bzero(task->task_lu_private, sizeof (sbd_cmd_t));
1680 	}
1681 	scmd = (sbd_cmd_t *)task->task_lu_private;
1682 	scmd->cmd_type = SBD_CMD_SMALL_WRITE;
1683 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1684 	scmd->len = cdb_xfer_size;
1685 	if (dbuf == NULL) {
1686 		uint32_t minsize = cdb_xfer_size;
1687 
1688 		dbuf = stmf_alloc_dbuf(task, cdb_xfer_size, &minsize, 0);
1689 		if (dbuf == NULL) {
1690 			stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1691 			    STMF_ALLOC_FAILURE, NULL);
1692 			return;
1693 		}
1694 		dbuf->db_data_size = cdb_xfer_size;
1695 		dbuf->db_relative_offset = 0;
1696 		dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
1697 		(void) stmf_xfer_data(task, dbuf, 0);
1698 	} else {
1699 		if (dbuf->db_data_size < cdb_xfer_size) {
1700 			stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1701 			    STMF_ABORTED, NULL);
1702 			return;
1703 		}
1704 		dbuf->db_data_size = cdb_xfer_size;
1705 		sbd_handle_short_write_xfer_completion(task, dbuf);
1706 	}
1707 }
1708 
1709 void
1710 sbd_handle_short_write_xfer_completion(scsi_task_t *task,
1711     stmf_data_buf_t *dbuf)
1712 {
1713 	sbd_cmd_t *scmd;
1714 	stmf_status_t st_ret;
1715 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1716 
1717 	/*
1718 	 * For now lets assume we will get only one sglist element
1719 	 * for short writes. If that ever changes, we should allocate
1720 	 * a local buffer and copy all the sg elements to one linear space.
1721 	 */
1722 	if ((dbuf->db_xfer_status != STMF_SUCCESS) ||
1723 	    (dbuf->db_sglist_length > 1)) {
1724 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1725 		    dbuf->db_xfer_status, NULL);
1726 		return;
1727 	}
1728 
1729 	task->task_nbytes_transferred = dbuf->db_data_size;
1730 	scmd = (sbd_cmd_t *)task->task_lu_private;
1731 	scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1732 
1733 	/* Lets find out who to call */
1734 	switch (task->task_cdb[0]) {
1735 	case SCMD_MODE_SELECT:
1736 	case SCMD_MODE_SELECT_G1:
1737 		if (sl->sl_access_state == SBD_LU_STANDBY) {
1738 			st_ret = stmf_proxy_scsi_cmd(task, dbuf);
1739 			if (st_ret != STMF_SUCCESS) {
1740 				stmf_scsilib_send_status(task, STATUS_CHECK,
1741 				    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
1742 			}
1743 		} else {
1744 			sbd_handle_mode_select_xfer(task,
1745 			    dbuf->db_sglist[0].seg_addr, dbuf->db_data_size);
1746 		}
1747 		break;
1748 	case SCMD_UNMAP:
1749 		sbd_handle_unmap_xfer(task,
1750 		    dbuf->db_sglist[0].seg_addr, dbuf->db_data_size);
1751 		break;
1752 	case SCMD_PERSISTENT_RESERVE_OUT:
1753 		if (sl->sl_access_state == SBD_LU_STANDBY) {
1754 			st_ret = stmf_proxy_scsi_cmd(task, dbuf);
1755 			if (st_ret != STMF_SUCCESS) {
1756 				stmf_scsilib_send_status(task, STATUS_CHECK,
1757 				    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
1758 			}
1759 		} else {
1760 			sbd_handle_pgr_out_data(task, dbuf);
1761 		}
1762 		break;
1763 	default:
1764 		/* This should never happen */
1765 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1766 		    STMF_ABORTED, NULL);
1767 	}
1768 }
1769 
1770 void
1771 sbd_handle_read_capacity(struct scsi_task *task,
1772     struct stmf_data_buf *initial_dbuf)
1773 {
1774 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1775 	uint32_t cdb_len;
1776 	uint8_t p[32];
1777 	uint64_t s;
1778 	uint16_t blksize;
1779 
1780 	s = sl->sl_lu_size >> sl->sl_data_blocksize_shift;
1781 	s--;
1782 	blksize = ((uint16_t)1) << sl->sl_data_blocksize_shift;
1783 
1784 	switch (task->task_cdb[0]) {
1785 	case SCMD_READ_CAPACITY:
1786 		if (s & 0xffffffff00000000ull) {
1787 			p[0] = p[1] = p[2] = p[3] = 0xFF;
1788 		} else {
1789 			p[0] = (s >> 24) & 0xff;
1790 			p[1] = (s >> 16) & 0xff;
1791 			p[2] = (s >> 8) & 0xff;
1792 			p[3] = s & 0xff;
1793 		}
1794 		p[4] = 0; p[5] = 0;
1795 		p[6] = (blksize >> 8) & 0xff;
1796 		p[7] = blksize & 0xff;
1797 		sbd_handle_short_read_transfers(task, initial_dbuf, p, 8, 8);
1798 		break;
1799 
1800 	case SCMD_SVC_ACTION_IN_G4:
1801 		cdb_len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1802 		bzero(p, 32);
1803 		p[0] = (s >> 56) & 0xff;
1804 		p[1] = (s >> 48) & 0xff;
1805 		p[2] = (s >> 40) & 0xff;
1806 		p[3] = (s >> 32) & 0xff;
1807 		p[4] = (s >> 24) & 0xff;
1808 		p[5] = (s >> 16) & 0xff;
1809 		p[6] = (s >> 8) & 0xff;
1810 		p[7] = s & 0xff;
1811 		p[10] = (blksize >> 8) & 0xff;
1812 		p[11] = blksize & 0xff;
1813 		if (sl->sl_flags & SL_UNMAP_ENABLED) {
1814 			p[14] = 0x80;
1815 		}
1816 		sbd_handle_short_read_transfers(task, initial_dbuf, p,
1817 		    cdb_len, 32);
1818 		break;
1819 	}
1820 }
1821 
1822 void
1823 sbd_calc_geometry(uint64_t s, uint16_t blksize, uint8_t *nsectors,
1824     uint8_t *nheads, uint32_t *ncyl)
1825 {
1826 	if (s < (4ull * 1024ull * 1024ull * 1024ull)) {
1827 		*nsectors = 32;
1828 		*nheads = 8;
1829 	} else {
1830 		*nsectors = 254;
1831 		*nheads = 254;
1832 	}
1833 	*ncyl = s / ((uint64_t)blksize * (uint64_t)(*nsectors) *
1834 	    (uint64_t)(*nheads));
1835 }
1836 
1837 void
1838 sbd_handle_mode_sense(struct scsi_task *task,
1839     struct stmf_data_buf *initial_dbuf, uint8_t *buf)
1840 {
1841 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1842 	uint32_t cmd_size, n;
1843 	uint8_t *cdb;
1844 	uint32_t ncyl;
1845 	uint8_t nsectors, nheads;
1846 	uint8_t page, ctrl, header_size, pc_valid;
1847 	uint16_t nbytes;
1848 	uint8_t *p;
1849 	uint64_t s = sl->sl_lu_size;
1850 	uint32_t dev_spec_param_offset;
1851 
1852 	p = buf;	/* buf is assumed to be zeroed out and large enough */
1853 	n = 0;
1854 	cdb = &task->task_cdb[0];
1855 	page = cdb[2] & 0x3F;
1856 	ctrl = (cdb[2] >> 6) & 3;
1857 	cmd_size = (cdb[0] == SCMD_MODE_SENSE) ? cdb[4] :
1858 	    READ_SCSI16(&cdb[7], uint32_t);
1859 
1860 	if (cdb[0] == SCMD_MODE_SENSE) {
1861 		header_size = 4;
1862 		dev_spec_param_offset = 2;
1863 	} else {
1864 		header_size = 8;
1865 		dev_spec_param_offset = 3;
1866 	}
1867 
1868 	/* Now validate the command */
1869 	if ((cdb[2] == 0) || (page == MODEPAGE_ALLPAGES) || (page == 0x08) ||
1870 	    (page == 0x0A) || (page == 0x03) || (page == 0x04)) {
1871 		pc_valid = 1;
1872 	} else {
1873 		pc_valid = 0;
1874 	}
1875 	if ((cmd_size < header_size) || (pc_valid == 0)) {
1876 		stmf_scsilib_send_status(task, STATUS_CHECK,
1877 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1878 		return;
1879 	}
1880 
1881 	/* We will update the length in the mode header at the end */
1882 
1883 	/* Block dev device specific param in mode param header has wp bit */
1884 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
1885 		p[n + dev_spec_param_offset] = BIT_7;
1886 	}
1887 	n += header_size;
1888 	/* We are not going to return any block descriptor */
1889 
1890 	nbytes = ((uint16_t)1) << sl->sl_data_blocksize_shift;
1891 	sbd_calc_geometry(s, nbytes, &nsectors, &nheads, &ncyl);
1892 
1893 	if ((page == 0x03) || (page == MODEPAGE_ALLPAGES)) {
1894 		p[n] = 0x03;
1895 		p[n+1] = 0x16;
1896 		if (ctrl != 1) {
1897 			p[n + 11] = nsectors;
1898 			p[n + 12] = nbytes >> 8;
1899 			p[n + 13] = nbytes & 0xff;
1900 			p[n + 20] = 0x80;
1901 		}
1902 		n += 24;
1903 	}
1904 	if ((page == 0x04) || (page == MODEPAGE_ALLPAGES)) {
1905 		p[n] = 0x04;
1906 		p[n + 1] = 0x16;
1907 		if (ctrl != 1) {
1908 			p[n + 2] = ncyl >> 16;
1909 			p[n + 3] = ncyl >> 8;
1910 			p[n + 4] = ncyl & 0xff;
1911 			p[n + 5] = nheads;
1912 			p[n + 20] = 0x15;
1913 			p[n + 21] = 0x18;
1914 		}
1915 		n += 24;
1916 	}
1917 	if ((page == MODEPAGE_CACHING) || (page == MODEPAGE_ALLPAGES)) {
1918 		struct mode_caching *mode_caching_page;
1919 
1920 		mode_caching_page = (struct mode_caching *)&p[n];
1921 
1922 		mode_caching_page->mode_page.code = MODEPAGE_CACHING;
1923 		mode_caching_page->mode_page.ps = 1; /* A saveable page */
1924 		mode_caching_page->mode_page.length = 0x12;
1925 
1926 		switch (ctrl) {
1927 		case (0):
1928 			/* Current */
1929 			if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) == 0) {
1930 				mode_caching_page->wce = 1;
1931 			}
1932 			break;
1933 
1934 		case (1):
1935 			/* Changeable */
1936 			if ((sl->sl_flags &
1937 			    SL_WRITEBACK_CACHE_SET_UNSUPPORTED) == 0) {
1938 				mode_caching_page->wce = 1;
1939 			}
1940 			break;
1941 
1942 		default:
1943 			if ((sl->sl_flags &
1944 			    SL_SAVED_WRITE_CACHE_DISABLE) == 0) {
1945 				mode_caching_page->wce = 1;
1946 			}
1947 			break;
1948 		}
1949 		n += (sizeof (struct mode_page) +
1950 		    mode_caching_page->mode_page.length);
1951 	}
1952 	if ((page == MODEPAGE_CTRL_MODE) || (page == MODEPAGE_ALLPAGES)) {
1953 		struct mode_control_scsi3 *mode_control_page;
1954 
1955 		mode_control_page = (struct mode_control_scsi3 *)&p[n];
1956 
1957 		mode_control_page->mode_page.code = MODEPAGE_CTRL_MODE;
1958 		mode_control_page->mode_page.length =
1959 		    PAGELENGTH_MODE_CONTROL_SCSI3;
1960 		if (ctrl != 1) {
1961 			/* If not looking for changeable values, report this. */
1962 			mode_control_page->que_mod = CTRL_QMOD_UNRESTRICT;
1963 		}
1964 		n += (sizeof (struct mode_page) +
1965 		    mode_control_page->mode_page.length);
1966 	}
1967 
1968 	if (cdb[0] == SCMD_MODE_SENSE) {
1969 		if (n > 255) {
1970 			stmf_scsilib_send_status(task, STATUS_CHECK,
1971 			    STMF_SAA_INVALID_FIELD_IN_CDB);
1972 			return;
1973 		}
1974 		/*
1975 		 * Mode parameter header length doesn't include the number
1976 		 * of bytes in the length field, so adjust the count.
1977 		 * Byte count minus header length field size.
1978 		 */
1979 		buf[0] = (n - 1) & 0xff;
1980 	} else {
1981 		/* Byte count minus header length field size. */
1982 		buf[1] = (n - 2) & 0xff;
1983 		buf[0] = ((n - 2) >> 8) & 0xff;
1984 	}
1985 
1986 	sbd_handle_short_read_transfers(task, initial_dbuf, buf,
1987 	    cmd_size, n);
1988 }
1989 
1990 void
1991 sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf)
1992 {
1993 	uint32_t cmd_xfer_len;
1994 
1995 	if (task->task_cdb[0] == SCMD_MODE_SELECT) {
1996 		cmd_xfer_len = (uint32_t)task->task_cdb[4];
1997 	} else {
1998 		cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1999 	}
2000 
2001 	if ((task->task_cdb[1] & 0xFE) != 0x10) {
2002 		stmf_scsilib_send_status(task, STATUS_CHECK,
2003 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2004 		return;
2005 	}
2006 
2007 	if (cmd_xfer_len == 0) {
2008 		/* zero byte mode selects are allowed */
2009 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2010 		return;
2011 	}
2012 
2013 	sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
2014 }
2015 
2016 void
2017 sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen)
2018 {
2019 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2020 	sbd_it_data_t *it;
2021 	int hdr_len, bd_len;
2022 	sbd_status_t sret;
2023 	int i;
2024 
2025 	if (task->task_cdb[0] == SCMD_MODE_SELECT) {
2026 		hdr_len = 4;
2027 	} else {
2028 		hdr_len = 8;
2029 	}
2030 
2031 	if (buflen < hdr_len)
2032 		goto mode_sel_param_len_err;
2033 
2034 	bd_len = hdr_len == 4 ? buf[3] : READ_SCSI16(&buf[6], int);
2035 
2036 	if (buflen < (hdr_len + bd_len + 2))
2037 		goto mode_sel_param_len_err;
2038 
2039 	buf += hdr_len + bd_len;
2040 	buflen -= hdr_len + bd_len;
2041 
2042 	if ((buf[0] != 8) || (buflen != ((uint32_t)buf[1] + 2))) {
2043 		goto mode_sel_param_len_err;
2044 	}
2045 
2046 	if (buf[2] & 0xFB) {
2047 		goto mode_sel_param_field_err;
2048 	}
2049 
2050 	for (i = 3; i < (buf[1] + 2); i++) {
2051 		if (buf[i]) {
2052 			goto mode_sel_param_field_err;
2053 		}
2054 	}
2055 
2056 	sret = SBD_SUCCESS;
2057 
2058 	/* All good. Lets handle the write cache change, if any */
2059 	if (buf[2] & BIT_2) {
2060 		sret = sbd_wcd_set(0, sl);
2061 	} else {
2062 		sret = sbd_wcd_set(1, sl);
2063 	}
2064 
2065 	if (sret != SBD_SUCCESS) {
2066 		stmf_scsilib_send_status(task, STATUS_CHECK,
2067 		    STMF_SAA_WRITE_ERROR);
2068 		return;
2069 	}
2070 
2071 	/* set on the device passed, now set the flags */
2072 	mutex_enter(&sl->sl_lock);
2073 	if (buf[2] & BIT_2) {
2074 		sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
2075 	} else {
2076 		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
2077 	}
2078 
2079 	for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
2080 		if (it == task->task_lu_itl_handle)
2081 			continue;
2082 		it->sbd_it_ua_conditions |= SBD_UA_MODE_PARAMETERS_CHANGED;
2083 	}
2084 
2085 	if (task->task_cdb[1] & 1) {
2086 		if (buf[2] & BIT_2) {
2087 			sl->sl_flags &= ~SL_SAVED_WRITE_CACHE_DISABLE;
2088 		} else {
2089 			sl->sl_flags |= SL_SAVED_WRITE_CACHE_DISABLE;
2090 		}
2091 		mutex_exit(&sl->sl_lock);
2092 		sret = sbd_write_lu_info(sl);
2093 	} else {
2094 		mutex_exit(&sl->sl_lock);
2095 	}
2096 	if (sret == SBD_SUCCESS) {
2097 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2098 	} else {
2099 		stmf_scsilib_send_status(task, STATUS_CHECK,
2100 		    STMF_SAA_WRITE_ERROR);
2101 	}
2102 	return;
2103 
2104 mode_sel_param_len_err:
2105 	stmf_scsilib_send_status(task, STATUS_CHECK,
2106 	    STMF_SAA_PARAM_LIST_LENGTH_ERROR);
2107 	return;
2108 mode_sel_param_field_err:
2109 	stmf_scsilib_send_status(task, STATUS_CHECK,
2110 	    STMF_SAA_INVALID_FIELD_IN_PARAM_LIST);
2111 }
2112 
2113 /*
2114  * Command support added from SPC-4 r24
2115  * Supports info type 0, 2, 127
2116  */
2117 void
2118 sbd_handle_identifying_info(struct scsi_task *task,
2119     stmf_data_buf_t *initial_dbuf)
2120 {
2121 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2122 	uint8_t *cdb;
2123 	uint32_t cmd_size;
2124 	uint32_t param_len;
2125 	uint32_t xfer_size;
2126 	uint8_t info_type;
2127 	uint8_t *buf, *p;
2128 
2129 	cdb = &task->task_cdb[0];
2130 	cmd_size = READ_SCSI32(&cdb[6], uint32_t);
2131 	info_type = cdb[10]>>1;
2132 
2133 	/* Validate the command */
2134 	if (cmd_size < 4) {
2135 		stmf_scsilib_send_status(task, STATUS_CHECK,
2136 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2137 		return;
2138 	}
2139 
2140 	p = buf = kmem_zalloc(260, KM_SLEEP);
2141 
2142 	switch (info_type) {
2143 		case 0:
2144 			/*
2145 			 * No value is supplied but this info type
2146 			 * is mandatory.
2147 			 */
2148 			xfer_size = 4;
2149 			break;
2150 		case 2:
2151 			mutex_enter(&sl->sl_lock);
2152 			param_len = strlcpy((char *)(p+4), sl->sl_alias, 256);
2153 			mutex_exit(&sl->sl_lock);
2154 			/* text info must be null terminated */
2155 			if (++param_len > 256)
2156 				param_len = 256;
2157 			SCSI_WRITE16(p+2, param_len);
2158 			xfer_size = param_len + 4;
2159 			break;
2160 		case 127:
2161 			/* 0 and 2 descriptor supported */
2162 			SCSI_WRITE16(p+2, 8); /* set param length */
2163 			p += 8;
2164 			*p = 4; /* set type to 2 (7 hi bits) */
2165 			p += 2;
2166 			SCSI_WRITE16(p, 256); /* 256 max length */
2167 			xfer_size = 12;
2168 			break;
2169 		default:
2170 			stmf_scsilib_send_status(task, STATUS_CHECK,
2171 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2172 			kmem_free(buf, 260);
2173 			return;
2174 	}
2175 	sbd_handle_short_read_transfers(task, initial_dbuf, buf,
2176 	    cmd_size, xfer_size);
2177 	kmem_free(buf, 260);
2178 }
2179 
2180 /*
2181  * This function parse through a string, passed to it as a pointer to a string,
2182  * by adjusting the pointer to the first non-space character and returns
2183  * the count/length of the first bunch of non-space characters. Multiple
2184  * Management URLs are stored as a space delimited string in sl_mgmt_url
2185  * field of sbd_lu_t. This function is used to retrieve one url at a time.
2186  *
2187  * i/p : pointer to pointer to a url string
2188  * o/p : Adjust the pointer to the url to the first non white character
2189  *       and returns the length of the URL
2190  */
2191 uint16_t
2192 sbd_parse_mgmt_url(char **url_addr) {
2193 	uint16_t url_length = 0;
2194 	char *url;
2195 	url = *url_addr;
2196 
2197 	while (*url != '\0') {
2198 		if (*url == ' ' || *url == '\t' || *url == '\n') {
2199 			(*url_addr)++;
2200 			url = *url_addr;
2201 		} else {
2202 			break;
2203 		}
2204 	}
2205 
2206 	while (*url != '\0') {
2207 		if (*url == ' ' || *url == '\t' ||
2208 		    *url == '\n' || *url == '\0') {
2209 			break;
2210 		}
2211 		url++;
2212 		url_length++;
2213 	}
2214 	return (url_length);
2215 }
2216 
2217 /* Try to make this the size of a kmem allocation cache. */
2218 static uint_t sbd_write_same_optimal_chunk = 128 * 1024;
2219 
2220 static sbd_status_t
2221 sbd_write_same_data(struct scsi_task *task, sbd_cmd_t *scmd)
2222 {
2223 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2224 	uint64_t addr, len, sz_done;
2225 	uint32_t big_buf_size, xfer_size, off;
2226 	uint8_t *big_buf;
2227 	sbd_status_t ret;
2228 
2229 	if (task->task_cdb[0] == SCMD_WRITE_SAME_G1) {
2230 		addr = READ_SCSI32(&task->task_cdb[2], uint64_t);
2231 		len = READ_SCSI16(&task->task_cdb[7], uint64_t);
2232 	} else {
2233 		addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
2234 		len = READ_SCSI32(&task->task_cdb[10], uint64_t);
2235 	}
2236 	addr <<= sl->sl_data_blocksize_shift;
2237 	len <<= sl->sl_data_blocksize_shift;
2238 
2239 	/*
2240 	 * Reminders:
2241 	 *    "len" is total size of what we wish to "write same".
2242 	 *
2243 	 *    xfer_size will be scmd->trans_data_len, which is the length
2244 	 *    of the pattern we wish to replicate over "len".  We replicate
2245 	 *    "xfer_size" of pattern over "len".
2246 	 *
2247 	 *    big_buf_size is set to an ideal actual-write size for an output
2248 	 *    operation.  It may be the same as "len".  If it's not, it should
2249 	 *    be an exact multiple of "xfer_size" so we don't get pattern
2250 	 *    breakage until the very end of "len".
2251 	 */
2252 	big_buf_size = len > sbd_write_same_optimal_chunk ?
2253 	    sbd_write_same_optimal_chunk : (uint32_t)len;
2254 	xfer_size = scmd->trans_data_len;
2255 
2256 	/*
2257 	 * All transfers should be an integral multiple of the sector size.
2258 	 */
2259 	ASSERT((big_buf_size % xfer_size) == 0);
2260 
2261 	/*
2262 	 * Don't sleep for the allocation, and don't make the system
2263 	 * reclaim memory.  Trade higher I/Os if in a low-memory situation.
2264 	 */
2265 	big_buf = kmem_alloc(big_buf_size, KM_NOSLEEP | KM_NORMALPRI);
2266 
2267 	if (big_buf == NULL) {
2268 		/*
2269 		 * Just send it in terms of of the transmitted data.  This
2270 		 * will be very slow.
2271 		 */
2272 		DTRACE_PROBE1(write__same__low__memory, uint64_t, big_buf_size);
2273 		big_buf = scmd->trans_data;
2274 		big_buf_size = scmd->trans_data_len;
2275 	} else {
2276 		/*
2277 		 * We already ASSERT()ed big_buf_size is an integral multiple
2278 		 * of xfer_size.
2279 		 */
2280 		for (off = 0; off < big_buf_size; off += xfer_size)
2281 			bcopy(scmd->trans_data, big_buf + off, xfer_size);
2282 	}
2283 
2284 	/* Do the actual I/O.  Recycle xfer_size now to be write size. */
2285 	DTRACE_PROBE1(write__same__io__begin, uint64_t, len);
2286 	for (sz_done = 0; sz_done < len; sz_done += (uint64_t)xfer_size) {
2287 		xfer_size = ((big_buf_size + sz_done) <= len) ? big_buf_size :
2288 		    len - sz_done;
2289 		ret = sbd_data_write(sl, task, addr + sz_done,
2290 		    (uint64_t)xfer_size, big_buf);
2291 		if (ret != SBD_SUCCESS)
2292 			break;
2293 	}
2294 	DTRACE_PROBE2(write__same__io__end, uint64_t, len, uint64_t, sz_done);
2295 
2296 	if (big_buf != scmd->trans_data)
2297 		kmem_free(big_buf, big_buf_size);
2298 
2299 	return (ret);
2300 }
2301 
2302 static void
2303 sbd_handle_write_same_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
2304     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
2305 {
2306 	uint64_t laddr;
2307 	uint32_t buflen, iolen;
2308 	int ndx, ret;
2309 
2310 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
2311 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2312 		    dbuf->db_xfer_status, NULL);
2313 		return;
2314 	}
2315 
2316 	if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2317 		goto write_same_xfer_done;
2318 	}
2319 
2320 	if (scmd->len != 0) {
2321 		/*
2322 		 * Initiate the next port xfer to occur in parallel
2323 		 * with writing this buf.
2324 		 */
2325 		sbd_do_write_same_xfer(task, scmd, NULL, 0);
2326 	}
2327 
2328 	laddr = dbuf->db_relative_offset;
2329 
2330 	for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
2331 	    (ndx < dbuf->db_sglist_length); ndx++) {
2332 		iolen = min(dbuf->db_data_size - buflen,
2333 		    dbuf->db_sglist[ndx].seg_length);
2334 		if (iolen == 0)
2335 			break;
2336 		bcopy(dbuf->db_sglist[ndx].seg_addr, &scmd->trans_data[laddr],
2337 		    iolen);
2338 		buflen += iolen;
2339 		laddr += (uint64_t)iolen;
2340 	}
2341 	task->task_nbytes_transferred += buflen;
2342 
2343 write_same_xfer_done:
2344 	if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2345 		stmf_free_dbuf(task, dbuf);
2346 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
2347 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2348 			stmf_scsilib_send_status(task, STATUS_CHECK,
2349 			    STMF_SAA_WRITE_ERROR);
2350 		} else {
2351 			ret = sbd_write_same_data(task, scmd);
2352 			if (ret != SBD_SUCCESS) {
2353 				stmf_scsilib_send_status(task, STATUS_CHECK,
2354 				    STMF_SAA_WRITE_ERROR);
2355 			} else {
2356 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2357 			}
2358 		}
2359 		/*
2360 		 * Only way we should get here is via handle_write_same(),
2361 		 * and that should make the following assertion always pass.
2362 		 */
2363 		ASSERT((scmd->flags & SBD_SCSI_CMD_TRANS_DATA) &&
2364 		    scmd->trans_data != NULL);
2365 		kmem_free(scmd->trans_data, scmd->trans_data_len);
2366 		scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
2367 		return;
2368 	}
2369 	sbd_do_write_same_xfer(task, scmd, dbuf, dbuf_reusable);
2370 }
2371 
2372 static void
2373 sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
2374     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
2375 {
2376 	uint32_t len;
2377 
2378 	if (scmd->len == 0) {
2379 		if (dbuf != NULL)
2380 			stmf_free_dbuf(task, dbuf);
2381 		return;
2382 	}
2383 
2384 	if ((dbuf != NULL) &&
2385 	    ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
2386 		/* free current dbuf and allocate a new one */
2387 		stmf_free_dbuf(task, dbuf);
2388 		dbuf = NULL;
2389 	}
2390 	if (dbuf == NULL) {
2391 		uint32_t maxsize, minsize, old_minsize;
2392 
2393 		maxsize = (scmd->len > (128*1024)) ? 128*1024 :
2394 		    scmd->len;
2395 		minsize = maxsize >> 2;
2396 		do {
2397 			old_minsize = minsize;
2398 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
2399 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
2400 		    (minsize >= 512));
2401 		if (dbuf == NULL) {
2402 			if (scmd->nbufs == 0) {
2403 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2404 				    STMF_ALLOC_FAILURE, NULL);
2405 			}
2406 			return;
2407 		}
2408 	}
2409 
2410 	len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size :
2411 	    scmd->len;
2412 
2413 	dbuf->db_relative_offset = scmd->current_ro;
2414 	dbuf->db_data_size = len;
2415 	dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
2416 	(void) stmf_xfer_data(task, dbuf, 0);
2417 	scmd->nbufs++; /* outstanding port xfers and bufs used */
2418 	scmd->len -= len;
2419 	scmd->current_ro += len;
2420 }
2421 
2422 static void
2423 sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf)
2424 {
2425 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2426 	uint64_t addr, len;
2427 	sbd_cmd_t *scmd;
2428 	stmf_data_buf_t *dbuf;
2429 	uint8_t unmap;
2430 	uint8_t do_immediate_data = 0;
2431 
2432 	task->task_cmd_xfer_length = 0;
2433 	if (task->task_additional_flags &
2434 	    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2435 		task->task_expected_xfer_length = 0;
2436 	}
2437 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
2438 		stmf_scsilib_send_status(task, STATUS_CHECK,
2439 		    STMF_SAA_WRITE_PROTECTED);
2440 		return;
2441 	}
2442 	if (task->task_cdb[1] & 0xF7) {
2443 		stmf_scsilib_send_status(task, STATUS_CHECK,
2444 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2445 		return;
2446 	}
2447 	unmap = task->task_cdb[1] & 0x08;
2448 	if (unmap && ((sl->sl_flags & SL_UNMAP_ENABLED) == 0)) {
2449 		stmf_scsilib_send_status(task, STATUS_CHECK,
2450 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2451 		return;
2452 	}
2453 	if (task->task_cdb[0] == SCMD_WRITE_SAME_G1) {
2454 		addr = READ_SCSI32(&task->task_cdb[2], uint64_t);
2455 		len = READ_SCSI16(&task->task_cdb[7], uint64_t);
2456 	} else {
2457 		addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
2458 		len = READ_SCSI32(&task->task_cdb[10], uint64_t);
2459 	}
2460 	if (len == 0) {
2461 		stmf_scsilib_send_status(task, STATUS_CHECK,
2462 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2463 		return;
2464 	}
2465 	addr <<= sl->sl_data_blocksize_shift;
2466 	len <<= sl->sl_data_blocksize_shift;
2467 
2468 	/* Check if the command is for the unmap function */
2469 	if (unmap) {
2470 		if (sbd_unmap(sl, addr, len) != 0) {
2471 			stmf_scsilib_send_status(task, STATUS_CHECK,
2472 			    STMF_SAA_LBA_OUT_OF_RANGE);
2473 		} else {
2474 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2475 		}
2476 		return;
2477 	}
2478 
2479 	/* Write same function */
2480 
2481 	task->task_cmd_xfer_length = 1 << sl->sl_data_blocksize_shift;
2482 	if (task->task_additional_flags &
2483 	    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2484 		task->task_expected_xfer_length = task->task_cmd_xfer_length;
2485 	}
2486 	if ((addr + len) > sl->sl_lu_size) {
2487 		stmf_scsilib_send_status(task, STATUS_CHECK,
2488 		    STMF_SAA_LBA_OUT_OF_RANGE);
2489 		return;
2490 	}
2491 
2492 	/* For rest of this I/O the transfer length is 1 block */
2493 	len = ((uint64_t)1) << sl->sl_data_blocksize_shift;
2494 
2495 	/* Some basic checks */
2496 	if ((len == 0) || (len != task->task_expected_xfer_length)) {
2497 		stmf_scsilib_send_status(task, STATUS_CHECK,
2498 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2499 		return;
2500 	}
2501 
2502 
2503 	if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
2504 		if (initial_dbuf->db_data_size > len) {
2505 			if (initial_dbuf->db_data_size >
2506 			    task->task_expected_xfer_length) {
2507 				/* protocol error */
2508 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2509 				    STMF_INVALID_ARG, NULL);
2510 				return;
2511 			}
2512 			initial_dbuf->db_data_size = (uint32_t)len;
2513 		}
2514 		do_immediate_data = 1;
2515 	}
2516 	dbuf = initial_dbuf;
2517 
2518 	if (task->task_lu_private) {
2519 		scmd = (sbd_cmd_t *)task->task_lu_private;
2520 	} else {
2521 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
2522 		task->task_lu_private = scmd;
2523 	}
2524 	scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_TRANS_DATA;
2525 	scmd->cmd_type = SBD_CMD_SCSI_WRITE;
2526 	scmd->nbufs = 0;
2527 	scmd->len = (uint32_t)len;
2528 	scmd->trans_data_len = (uint32_t)len;
2529 	scmd->trans_data = kmem_alloc((size_t)len, KM_SLEEP);
2530 	scmd->current_ro = 0;
2531 
2532 	if (do_immediate_data) {
2533 		/*
2534 		 * Account for data passed in this write command
2535 		 */
2536 		(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
2537 		scmd->len -= dbuf->db_data_size;
2538 		scmd->current_ro += dbuf->db_data_size;
2539 		dbuf->db_xfer_status = STMF_SUCCESS;
2540 		sbd_handle_write_same_xfer_completion(task, scmd, dbuf, 0);
2541 	} else {
2542 		sbd_do_write_same_xfer(task, scmd, dbuf, 0);
2543 	}
2544 }
2545 
2546 static void
2547 sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf)
2548 {
2549 	uint32_t cmd_xfer_len;
2550 
2551 	cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t);
2552 
2553 	if (task->task_cdb[1] & 1) {
2554 		stmf_scsilib_send_status(task, STATUS_CHECK,
2555 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2556 		return;
2557 	}
2558 
2559 	if (cmd_xfer_len == 0) {
2560 		task->task_cmd_xfer_length = 0;
2561 		if (task->task_additional_flags &
2562 		    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2563 			task->task_expected_xfer_length = 0;
2564 		}
2565 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2566 		return;
2567 	}
2568 
2569 	sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
2570 }
2571 
2572 static void
2573 sbd_handle_unmap_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen)
2574 {
2575 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2576 	uint32_t ulen, dlen, num_desc;
2577 	uint64_t addr, len;
2578 	uint8_t *p;
2579 	int ret;
2580 
2581 	if (buflen < 24) {
2582 		stmf_scsilib_send_status(task, STATUS_CHECK,
2583 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2584 		return;
2585 	}
2586 	ulen = READ_SCSI16(buf, uint32_t);
2587 	dlen = READ_SCSI16(buf + 2, uint32_t);
2588 	num_desc = dlen >> 4;
2589 	if (((ulen + 2) != buflen) || ((dlen + 8) != buflen) || (dlen & 0xf) ||
2590 	    (num_desc == 0)) {
2591 		stmf_scsilib_send_status(task, STATUS_CHECK,
2592 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2593 		return;
2594 	}
2595 
2596 	for (p = buf + 8; num_desc; num_desc--, p += 16) {
2597 		addr = READ_SCSI64(p, uint64_t);
2598 		addr <<= sl->sl_data_blocksize_shift;
2599 		len = READ_SCSI32(p+8, uint64_t);
2600 		len <<= sl->sl_data_blocksize_shift;
2601 		ret = sbd_unmap(sl, addr, len);
2602 		if (ret != 0) {
2603 			stmf_scsilib_send_status(task, STATUS_CHECK,
2604 			    STMF_SAA_LBA_OUT_OF_RANGE);
2605 			return;
2606 		}
2607 	}
2608 
2609 unmap_done:
2610 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2611 }
2612 
2613 void
2614 sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
2615 {
2616 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2617 	uint8_t *cdbp = (uint8_t *)&task->task_cdb[0];
2618 	uint8_t *p;
2619 	uint8_t byte0;
2620 	uint8_t page_length;
2621 	uint16_t bsize = 512;
2622 	uint16_t cmd_size;
2623 	uint32_t xfer_size = 4;
2624 	uint32_t mgmt_url_size = 0;
2625 	uint8_t exp;
2626 	uint64_t s;
2627 	char *mgmt_url = NULL;
2628 
2629 
2630 	byte0 = DTYPE_DIRECT;
2631 	/*
2632 	 * Basic protocol checks.
2633 	 */
2634 
2635 	if ((((cdbp[1] & 1) == 0) && cdbp[2]) || cdbp[5]) {
2636 		stmf_scsilib_send_status(task, STATUS_CHECK,
2637 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2638 		return;
2639 	}
2640 
2641 	/*
2642 	 * Zero byte allocation length is not an error.  Just
2643 	 * return success.
2644 	 */
2645 
2646 	cmd_size = (((uint16_t)cdbp[3]) << 8) | cdbp[4];
2647 
2648 	if (cmd_size == 0) {
2649 		task->task_cmd_xfer_length = 0;
2650 		if (task->task_additional_flags &
2651 		    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2652 			task->task_expected_xfer_length = 0;
2653 		}
2654 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2655 		return;
2656 	}
2657 
2658 	/*
2659 	 * Standard inquiry
2660 	 */
2661 
2662 	if ((cdbp[1] & 1) == 0) {
2663 		int	i;
2664 		struct scsi_inquiry *inq;
2665 
2666 		p = (uint8_t *)kmem_zalloc(bsize, KM_SLEEP);
2667 		inq = (struct scsi_inquiry *)p;
2668 
2669 		page_length = 69;
2670 		xfer_size = page_length + 5;
2671 
2672 		inq->inq_dtype = DTYPE_DIRECT;
2673 		inq->inq_ansi = 5;	/* SPC-3 */
2674 		inq->inq_hisup = 1;
2675 		inq->inq_rdf = 2;	/* Response data format for SPC-3 */
2676 		inq->inq_len = page_length;
2677 
2678 		inq->inq_tpgs = TPGS_FAILOVER_IMPLICIT;
2679 		inq->inq_cmdque = 1;
2680 
2681 		if (sl->sl_flags & SL_VID_VALID) {
2682 			bcopy(sl->sl_vendor_id, inq->inq_vid, 8);
2683 		} else {
2684 			bcopy(sbd_vendor_id, inq->inq_vid, 8);
2685 		}
2686 
2687 		if (sl->sl_flags & SL_PID_VALID) {
2688 			bcopy(sl->sl_product_id, inq->inq_pid, 16);
2689 		} else {
2690 			bcopy(sbd_product_id, inq->inq_pid, 16);
2691 		}
2692 
2693 		if (sl->sl_flags & SL_REV_VALID) {
2694 			bcopy(sl->sl_revision, inq->inq_revision, 4);
2695 		} else {
2696 			bcopy(sbd_revision, inq->inq_revision, 4);
2697 		}
2698 
2699 		/* Adding Version Descriptors */
2700 		i = 0;
2701 		/* SAM-3 no version */
2702 		inq->inq_vd[i].inq_vd_msb = 0x00;
2703 		inq->inq_vd[i].inq_vd_lsb = 0x60;
2704 		i++;
2705 
2706 		/* transport */
2707 		switch (task->task_lport->lport_id->protocol_id) {
2708 		case PROTOCOL_FIBRE_CHANNEL:
2709 			inq->inq_vd[i].inq_vd_msb = 0x09;
2710 			inq->inq_vd[i].inq_vd_lsb = 0x00;
2711 			i++;
2712 			break;
2713 
2714 		case PROTOCOL_PARALLEL_SCSI:
2715 		case PROTOCOL_SSA:
2716 		case PROTOCOL_IEEE_1394:
2717 			/* Currently no claims of conformance */
2718 			break;
2719 
2720 		case PROTOCOL_SRP:
2721 			inq->inq_vd[i].inq_vd_msb = 0x09;
2722 			inq->inq_vd[i].inq_vd_lsb = 0x40;
2723 			i++;
2724 			break;
2725 
2726 		case PROTOCOL_iSCSI:
2727 			inq->inq_vd[i].inq_vd_msb = 0x09;
2728 			inq->inq_vd[i].inq_vd_lsb = 0x60;
2729 			i++;
2730 			break;
2731 
2732 		case PROTOCOL_SAS:
2733 		case PROTOCOL_ADT:
2734 		case PROTOCOL_ATAPI:
2735 		default:
2736 			/* Currently no claims of conformance */
2737 			break;
2738 		}
2739 
2740 		/* SPC-3 no version */
2741 		inq->inq_vd[i].inq_vd_msb = 0x03;
2742 		inq->inq_vd[i].inq_vd_lsb = 0x00;
2743 		i++;
2744 
2745 		/* SBC-2 no version */
2746 		inq->inq_vd[i].inq_vd_msb = 0x03;
2747 		inq->inq_vd[i].inq_vd_lsb = 0x20;
2748 
2749 		sbd_handle_short_read_transfers(task, initial_dbuf, p, cmd_size,
2750 		    min(cmd_size, xfer_size));
2751 		kmem_free(p, bsize);
2752 
2753 		return;
2754 	}
2755 
2756 	rw_enter(&sbd_global_prop_lock, RW_READER);
2757 	if (sl->sl_mgmt_url) {
2758 		mgmt_url_size = strlen(sl->sl_mgmt_url);
2759 		mgmt_url = sl->sl_mgmt_url;
2760 	} else if (sbd_mgmt_url) {
2761 		mgmt_url_size = strlen(sbd_mgmt_url);
2762 		mgmt_url = sbd_mgmt_url;
2763 	}
2764 
2765 	/*
2766 	 * EVPD handling
2767 	 */
2768 
2769 	/* Default 512 bytes may not be enough, increase bsize if necessary */
2770 	if (cdbp[2] == 0x83 || cdbp[2] == 0x85) {
2771 		if (bsize <  cmd_size)
2772 			bsize = cmd_size;
2773 	}
2774 	p = (uint8_t *)kmem_zalloc(bsize, KM_SLEEP);
2775 
2776 	switch (cdbp[2]) {
2777 	case 0x00:
2778 		page_length = 4 + (mgmt_url_size ? 1 : 0);
2779 		if (sl->sl_flags & SL_UNMAP_ENABLED)
2780 			page_length += 2;
2781 
2782 		p[0] = byte0;
2783 		p[3] = page_length;
2784 		/* Supported VPD pages in ascending order */
2785 		{
2786 			uint8_t i = 5;
2787 
2788 			p[i++] = 0x80;
2789 			p[i++] = 0x83;
2790 			if (mgmt_url_size != 0)
2791 				p[i++] = 0x85;
2792 			p[i++] = 0x86;
2793 			if (sl->sl_flags & SL_UNMAP_ENABLED) {
2794 				p[i++] = 0xb0;
2795 				p[i++] = 0xb2;
2796 			}
2797 		}
2798 		xfer_size = page_length + 4;
2799 		break;
2800 
2801 	case 0x80:
2802 		if (sl->sl_serial_no_size) {
2803 			page_length = sl->sl_serial_no_size;
2804 			bcopy(sl->sl_serial_no, p + 4, sl->sl_serial_no_size);
2805 		} else {
2806 			/* if no serial num is specified set 4 spaces */
2807 			page_length = 4;
2808 			bcopy("    ", p + 4, 4);
2809 		}
2810 		p[0] = byte0;
2811 		p[1] = 0x80;
2812 		p[3] = page_length;
2813 		xfer_size = page_length + 4;
2814 		break;
2815 
2816 	case 0x83:
2817 		xfer_size = stmf_scsilib_prepare_vpd_page83(task, p,
2818 		    bsize, byte0, STMF_VPD_LU_ID|STMF_VPD_TARGET_ID|
2819 		    STMF_VPD_TP_GROUP|STMF_VPD_RELATIVE_TP_ID);
2820 		break;
2821 
2822 	case 0x85:
2823 		if (mgmt_url_size == 0) {
2824 			stmf_scsilib_send_status(task, STATUS_CHECK,
2825 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2826 			goto err_done;
2827 		}
2828 		{
2829 			uint16_t idx, newidx, sz, url_size;
2830 			char *url;
2831 
2832 			p[0] = byte0;
2833 			p[1] = 0x85;
2834 
2835 			idx = 4;
2836 			url = mgmt_url;
2837 			url_size = sbd_parse_mgmt_url(&url);
2838 			/* Creating Network Service Descriptors */
2839 			while (url_size != 0) {
2840 				/* Null terminated and 4 Byte aligned */
2841 				sz = url_size + 1;
2842 				sz += (sz % 4) ? 4 - (sz % 4) : 0;
2843 				newidx = idx + sz + 4;
2844 
2845 				if (newidx < bsize) {
2846 					/*
2847 					 * SPC-3r23 : Table 320  (Sec 7.6.5)
2848 					 * (Network service descriptor format
2849 					 *
2850 					 * Note: Hard coding service type as
2851 					 * "Storage Configuration Service".
2852 					 */
2853 					p[idx] = 1;
2854 					SCSI_WRITE16(p + idx + 2, sz);
2855 					bcopy(url, p + idx + 4, url_size);
2856 					xfer_size = newidx + 4;
2857 				}
2858 				idx = newidx;
2859 
2860 				/* skip to next mgmt url if any */
2861 				url += url_size;
2862 				url_size = sbd_parse_mgmt_url(&url);
2863 			}
2864 
2865 			/* Total descriptor length */
2866 			SCSI_WRITE16(p + 2, idx - 4);
2867 			break;
2868 		}
2869 
2870 	case 0x86:
2871 		page_length = 0x3c;
2872 
2873 		p[0] = byte0;
2874 		p[1] = 0x86;		/* Page 86 response */
2875 		p[3] = page_length;
2876 
2877 		/*
2878 		 * Bits 0, 1, and 2 will need to be updated
2879 		 * to reflect the queue tag handling if/when
2880 		 * that is implemented.  For now, we're going
2881 		 * to claim support only for Simple TA.
2882 		 */
2883 		p[5] = 1;
2884 		xfer_size = page_length + 4;
2885 		break;
2886 
2887 	case 0xb0:
2888 		if ((sl->sl_flags & SL_UNMAP_ENABLED) == 0) {
2889 			stmf_scsilib_send_status(task, STATUS_CHECK,
2890 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2891 			goto err_done;
2892 		}
2893 		page_length = 0x3c;
2894 		p[0] = byte0;
2895 		p[1] = 0xb0;
2896 		p[3] = page_length;
2897 		p[20] = p[21] = p[22] = p[23] = 0xFF;
2898 		p[24] = p[25] = p[26] = p[27] = 0xFF;
2899 		xfer_size = page_length + 4;
2900 		break;
2901 
2902 	case 0xb2:
2903 		if ((sl->sl_flags & SL_UNMAP_ENABLED) == 0) {
2904 			stmf_scsilib_send_status(task, STATUS_CHECK,
2905 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2906 			goto err_done;
2907 		}
2908 		page_length = 4;
2909 		p[0] = byte0;
2910 		p[1] = 0xb2;
2911 		p[3] = page_length;
2912 
2913 		exp = (uint8_t)sl->sl_data_blocksize_shift;
2914 		s = sl->sl_lu_size >> sl->sl_data_blocksize_shift;
2915 		while (s & ((uint64_t)0xFFFFFFFF80000000ull)) {
2916 			s >>= 1;
2917 			exp++;
2918 		}
2919 		p[4] = exp;
2920 		p[5] = 0xc0;
2921 		xfer_size = page_length + 4;
2922 		break;
2923 
2924 	default:
2925 		stmf_scsilib_send_status(task, STATUS_CHECK,
2926 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2927 		goto err_done;
2928 	}
2929 
2930 	sbd_handle_short_read_transfers(task, initial_dbuf, p, cmd_size,
2931 	    min(cmd_size, xfer_size));
2932 err_done:
2933 	kmem_free(p, bsize);
2934 	rw_exit(&sbd_global_prop_lock);
2935 }
2936 
2937 stmf_status_t
2938 sbd_task_alloc(struct scsi_task *task)
2939 {
2940 	if ((task->task_lu_private =
2941 	    kmem_alloc(sizeof (sbd_cmd_t), KM_NOSLEEP)) != NULL) {
2942 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
2943 		scmd->flags = 0;
2944 		return (STMF_SUCCESS);
2945 	}
2946 	return (STMF_ALLOC_FAILURE);
2947 }
2948 
2949 void
2950 sbd_remove_it_handle(sbd_lu_t *sl, sbd_it_data_t *it)
2951 {
2952 	sbd_it_data_t **ppit;
2953 
2954 	sbd_pgr_remove_it_handle(sl, it);
2955 	mutex_enter(&sl->sl_lock);
2956 	for (ppit = &sl->sl_it_list; *ppit != NULL;
2957 	    ppit = &((*ppit)->sbd_it_next)) {
2958 		if ((*ppit) == it) {
2959 			*ppit = it->sbd_it_next;
2960 			break;
2961 		}
2962 	}
2963 	mutex_exit(&sl->sl_lock);
2964 
2965 	DTRACE_PROBE2(itl__nexus__end, stmf_lu_t *, sl->sl_lu,
2966 	    sbd_it_data_t *, it);
2967 
2968 	kmem_free(it, sizeof (*it));
2969 }
2970 
2971 void
2972 sbd_check_and_clear_scsi2_reservation(sbd_lu_t *sl, sbd_it_data_t *it)
2973 {
2974 	mutex_enter(&sl->sl_lock);
2975 	if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) == 0) {
2976 		/* If we dont have any reservations, just get out. */
2977 		mutex_exit(&sl->sl_lock);
2978 		return;
2979 	}
2980 
2981 	if (it == NULL) {
2982 		/* Find the I_T nexus which is holding the reservation. */
2983 		for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
2984 			if (it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) {
2985 				ASSERT(it->sbd_it_session_id ==
2986 				    sl->sl_rs_owner_session_id);
2987 				break;
2988 			}
2989 		}
2990 		ASSERT(it != NULL);
2991 	} else {
2992 		/*
2993 		 * We were passed an I_T nexus. If this nexus does not hold
2994 		 * the reservation, do nothing. This is why this function is
2995 		 * called "check_and_clear".
2996 		 */
2997 		if ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0) {
2998 			mutex_exit(&sl->sl_lock);
2999 			return;
3000 		}
3001 	}
3002 	it->sbd_it_flags &= ~SBD_IT_HAS_SCSI2_RESERVATION;
3003 	sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
3004 	mutex_exit(&sl->sl_lock);
3005 }
3006 
3007 
3008 
3009 void
3010 sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
3011 {
3012 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3013 	sbd_it_data_t *it;
3014 	uint8_t cdb0, cdb1;
3015 	stmf_status_t st_ret;
3016 
3017 	if ((it = task->task_lu_itl_handle) == NULL) {
3018 		mutex_enter(&sl->sl_lock);
3019 		for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
3020 			if (it->sbd_it_session_id ==
3021 			    task->task_session->ss_session_id) {
3022 				mutex_exit(&sl->sl_lock);
3023 				stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3024 				return;
3025 			}
3026 		}
3027 		it = (sbd_it_data_t *)kmem_zalloc(sizeof (*it), KM_NOSLEEP);
3028 		if (it == NULL) {
3029 			mutex_exit(&sl->sl_lock);
3030 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3031 			return;
3032 		}
3033 		it->sbd_it_session_id = task->task_session->ss_session_id;
3034 		bcopy(task->task_lun_no, it->sbd_it_lun, 8);
3035 		it->sbd_it_next = sl->sl_it_list;
3036 		sl->sl_it_list = it;
3037 		mutex_exit(&sl->sl_lock);
3038 
3039 		DTRACE_PROBE1(itl__nexus__start, scsi_task *, task);
3040 
3041 		sbd_pgr_initialize_it(task, it);
3042 		if (stmf_register_itl_handle(task->task_lu, task->task_lun_no,
3043 		    task->task_session, it->sbd_it_session_id, it)
3044 		    != STMF_SUCCESS) {
3045 			sbd_remove_it_handle(sl, it);
3046 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3047 			return;
3048 		}
3049 		task->task_lu_itl_handle = it;
3050 		if (sl->sl_access_state != SBD_LU_STANDBY) {
3051 			it->sbd_it_ua_conditions = SBD_UA_POR;
3052 		}
3053 	} else if (it->sbd_it_flags & SBD_IT_PGR_CHECK_FLAG) {
3054 		mutex_enter(&sl->sl_lock);
3055 		it->sbd_it_flags &= ~SBD_IT_PGR_CHECK_FLAG;
3056 		mutex_exit(&sl->sl_lock);
3057 		sbd_pgr_initialize_it(task, it);
3058 	}
3059 
3060 	if (task->task_mgmt_function) {
3061 		stmf_scsilib_handle_task_mgmt(task);
3062 		return;
3063 	}
3064 
3065 	/*
3066 	 * if we're transitioning between access
3067 	 * states, return NOT READY
3068 	 */
3069 	if (sl->sl_access_state == SBD_LU_TRANSITION_TO_STANDBY ||
3070 	    sl->sl_access_state == SBD_LU_TRANSITION_TO_ACTIVE) {
3071 		stmf_scsilib_send_status(task, STATUS_CHECK,
3072 		    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
3073 		return;
3074 	}
3075 
3076 	/* Checking ua conditions as per SAM3R14 5.3.2 specified order */
3077 	if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) {
3078 		uint32_t saa = 0;
3079 
3080 		mutex_enter(&sl->sl_lock);
3081 		if (it->sbd_it_ua_conditions & SBD_UA_POR) {
3082 			it->sbd_it_ua_conditions &= ~SBD_UA_POR;
3083 			saa = STMF_SAA_POR;
3084 		}
3085 		mutex_exit(&sl->sl_lock);
3086 		if (saa) {
3087 			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
3088 			return;
3089 		}
3090 	}
3091 
3092 	/* Reservation conflict checks */
3093 	if (sl->sl_access_state == SBD_LU_ACTIVE) {
3094 		if (SBD_PGR_RSVD(sl->sl_pgr)) {
3095 			if (sbd_pgr_reservation_conflict(task)) {
3096 				stmf_scsilib_send_status(task,
3097 				    STATUS_RESERVATION_CONFLICT, 0);
3098 				return;
3099 			}
3100 		} else if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) &&
3101 		    ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0)) {
3102 			if (!(SCSI2_CONFLICT_FREE_CMDS(task->task_cdb))) {
3103 				stmf_scsilib_send_status(task,
3104 				    STATUS_RESERVATION_CONFLICT, 0);
3105 				return;
3106 			}
3107 		}
3108 	}
3109 
3110 	/* Rest of the ua conndition checks */
3111 	if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) {
3112 		uint32_t saa = 0;
3113 
3114 		mutex_enter(&sl->sl_lock);
3115 		if (it->sbd_it_ua_conditions & SBD_UA_CAPACITY_CHANGED) {
3116 			it->sbd_it_ua_conditions &= ~SBD_UA_CAPACITY_CHANGED;
3117 			if ((task->task_cdb[0] == SCMD_READ_CAPACITY) ||
3118 			    ((task->task_cdb[0] == SCMD_SVC_ACTION_IN_G4) &&
3119 			    (task->task_cdb[1] ==
3120 			    SSVC_ACTION_READ_CAPACITY_G4))) {
3121 				saa = 0;
3122 			} else {
3123 				saa = STMF_SAA_CAPACITY_DATA_HAS_CHANGED;
3124 			}
3125 		} else if (it->sbd_it_ua_conditions &
3126 		    SBD_UA_MODE_PARAMETERS_CHANGED) {
3127 			it->sbd_it_ua_conditions &=
3128 			    ~SBD_UA_MODE_PARAMETERS_CHANGED;
3129 			saa = STMF_SAA_MODE_PARAMETERS_CHANGED;
3130 		} else if (it->sbd_it_ua_conditions &
3131 		    SBD_UA_ASYMMETRIC_ACCESS_CHANGED) {
3132 			it->sbd_it_ua_conditions &=
3133 			    ~SBD_UA_ASYMMETRIC_ACCESS_CHANGED;
3134 			saa = STMF_SAA_ASYMMETRIC_ACCESS_CHANGED;
3135 		} else if (it->sbd_it_ua_conditions &
3136 		    SBD_UA_ACCESS_STATE_TRANSITION) {
3137 			it->sbd_it_ua_conditions &=
3138 			    ~SBD_UA_ACCESS_STATE_TRANSITION;
3139 			saa = STMF_SAA_LU_NO_ACCESS_TRANSITION;
3140 		} else {
3141 			it->sbd_it_ua_conditions = 0;
3142 			saa = 0;
3143 		}
3144 		mutex_exit(&sl->sl_lock);
3145 		if (saa) {
3146 			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
3147 			return;
3148 		}
3149 	}
3150 
3151 	cdb0 = task->task_cdb[0];
3152 	cdb1 = task->task_cdb[1];
3153 
3154 	if (sl->sl_access_state == SBD_LU_STANDBY) {
3155 		if (cdb0 != SCMD_INQUIRY &&
3156 		    cdb0 != SCMD_MODE_SENSE &&
3157 		    cdb0 != SCMD_MODE_SENSE_G1 &&
3158 		    cdb0 != SCMD_MODE_SELECT &&
3159 		    cdb0 != SCMD_MODE_SELECT_G1 &&
3160 		    cdb0 != SCMD_RESERVE &&
3161 		    cdb0 != SCMD_RELEASE &&
3162 		    cdb0 != SCMD_PERSISTENT_RESERVE_OUT &&
3163 		    cdb0 != SCMD_PERSISTENT_RESERVE_IN &&
3164 		    cdb0 != SCMD_REQUEST_SENSE &&
3165 		    cdb0 != SCMD_READ_CAPACITY &&
3166 		    cdb0 != SCMD_TEST_UNIT_READY &&
3167 		    cdb0 != SCMD_START_STOP &&
3168 		    cdb0 != SCMD_READ &&
3169 		    cdb0 != SCMD_READ_G1 &&
3170 		    cdb0 != SCMD_READ_G4 &&
3171 		    cdb0 != SCMD_READ_G5 &&
3172 		    !(cdb0 == SCMD_SVC_ACTION_IN_G4 &&
3173 		    cdb1 == SSVC_ACTION_READ_CAPACITY_G4) &&
3174 		    !(cdb0 == SCMD_MAINTENANCE_IN &&
3175 		    (cdb1 & 0x1F) == 0x05) &&
3176 		    !(cdb0 == SCMD_MAINTENANCE_IN &&
3177 		    (cdb1 & 0x1F) == 0x0A)) {
3178 			stmf_scsilib_send_status(task, STATUS_CHECK,
3179 			    STMF_SAA_LU_NO_ACCESS_STANDBY);
3180 			return;
3181 		}
3182 
3183 		/*
3184 		 * is this a short write?
3185 		 * if so, we'll need to wait until we have the buffer
3186 		 * before proxying the command
3187 		 */
3188 		switch (cdb0) {
3189 			case SCMD_MODE_SELECT:
3190 			case SCMD_MODE_SELECT_G1:
3191 			case SCMD_PERSISTENT_RESERVE_OUT:
3192 				break;
3193 			default:
3194 				st_ret = stmf_proxy_scsi_cmd(task,
3195 				    initial_dbuf);
3196 				if (st_ret != STMF_SUCCESS) {
3197 					stmf_scsilib_send_status(task,
3198 					    STATUS_CHECK,
3199 					    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
3200 				}
3201 				return;
3202 		}
3203 	}
3204 
3205 	cdb0 = task->task_cdb[0] & 0x1F;
3206 
3207 	if ((cdb0 == SCMD_READ) || (cdb0 == SCMD_WRITE)) {
3208 		if (task->task_additional_flags & TASK_AF_PORT_LOAD_HIGH) {
3209 			stmf_scsilib_send_status(task, STATUS_QFULL, 0);
3210 			return;
3211 		}
3212 		if (cdb0 == SCMD_READ) {
3213 			sbd_handle_read(task, initial_dbuf);
3214 			return;
3215 		}
3216 		sbd_handle_write(task, initial_dbuf);
3217 		return;
3218 	}
3219 
3220 	cdb0 = task->task_cdb[0];
3221 	cdb1 = task->task_cdb[1];
3222 
3223 	if (cdb0 == SCMD_INQUIRY) {		/* Inquiry */
3224 		sbd_handle_inquiry(task, initial_dbuf);
3225 		return;
3226 	}
3227 
3228 	if (cdb0  == SCMD_PERSISTENT_RESERVE_OUT) {
3229 		sbd_handle_pgr_out_cmd(task, initial_dbuf);
3230 		return;
3231 	}
3232 
3233 	if (cdb0  == SCMD_PERSISTENT_RESERVE_IN) {
3234 		sbd_handle_pgr_in_cmd(task, initial_dbuf);
3235 		return;
3236 	}
3237 
3238 	if (cdb0 == SCMD_RELEASE) {
3239 		if (cdb1) {
3240 			stmf_scsilib_send_status(task, STATUS_CHECK,
3241 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3242 			return;
3243 		}
3244 
3245 		mutex_enter(&sl->sl_lock);
3246 		if (sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) {
3247 			/* If not owner don't release it, just return good */
3248 			if (it->sbd_it_session_id !=
3249 			    sl->sl_rs_owner_session_id) {
3250 				mutex_exit(&sl->sl_lock);
3251 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3252 				return;
3253 			}
3254 		}
3255 		sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
3256 		it->sbd_it_flags &= ~SBD_IT_HAS_SCSI2_RESERVATION;
3257 		mutex_exit(&sl->sl_lock);
3258 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3259 		return;
3260 	}
3261 
3262 	if (cdb0 == SCMD_RESERVE) {
3263 		if (cdb1) {
3264 			stmf_scsilib_send_status(task, STATUS_CHECK,
3265 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3266 			return;
3267 		}
3268 
3269 		mutex_enter(&sl->sl_lock);
3270 		if (sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) {
3271 			/* If not owner, return conflict status */
3272 			if (it->sbd_it_session_id !=
3273 			    sl->sl_rs_owner_session_id) {
3274 				mutex_exit(&sl->sl_lock);
3275 				stmf_scsilib_send_status(task,
3276 				    STATUS_RESERVATION_CONFLICT, 0);
3277 				return;
3278 			}
3279 		}
3280 		sl->sl_flags |= SL_LU_HAS_SCSI2_RESERVATION;
3281 		it->sbd_it_flags |= SBD_IT_HAS_SCSI2_RESERVATION;
3282 		sl->sl_rs_owner_session_id = it->sbd_it_session_id;
3283 		mutex_exit(&sl->sl_lock);
3284 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3285 		return;
3286 	}
3287 
3288 	if (cdb0 == SCMD_REQUEST_SENSE) {
3289 		/*
3290 		 * LU provider needs to store unretrieved sense data
3291 		 * (e.g. after power-on/reset).  For now, we'll just
3292 		 * return good status with no sense.
3293 		 */
3294 
3295 		if ((cdb1 & ~1) || task->task_cdb[2] || task->task_cdb[3] ||
3296 		    task->task_cdb[5]) {
3297 			stmf_scsilib_send_status(task, STATUS_CHECK,
3298 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3299 		} else {
3300 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3301 		}
3302 
3303 		return;
3304 	}
3305 
3306 	/* Report Target Port Groups */
3307 	if ((cdb0 == SCMD_MAINTENANCE_IN) &&
3308 	    ((cdb1 & 0x1F) == 0x0A)) {
3309 		stmf_scsilib_handle_report_tpgs(task, initial_dbuf);
3310 		return;
3311 	}
3312 
3313 	/* Report Identifying Information */
3314 	if ((cdb0 == SCMD_MAINTENANCE_IN) &&
3315 	    ((cdb1 & 0x1F) == 0x05)) {
3316 		sbd_handle_identifying_info(task, initial_dbuf);
3317 		return;
3318 	}
3319 
3320 	if (cdb0 == SCMD_START_STOP) {			/* Start stop */
3321 		task->task_cmd_xfer_length = 0;
3322 		if (task->task_cdb[4] & 0xFC) {
3323 			stmf_scsilib_send_status(task, STATUS_CHECK,
3324 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3325 			return;
3326 		}
3327 		if (task->task_cdb[4] & 2) {
3328 			stmf_scsilib_send_status(task, STATUS_CHECK,
3329 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3330 		} else {
3331 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3332 		}
3333 		return;
3334 
3335 	}
3336 
3337 	if ((cdb0 == SCMD_MODE_SENSE) || (cdb0 == SCMD_MODE_SENSE_G1)) {
3338 		uint8_t *p;
3339 		p = kmem_zalloc(512, KM_SLEEP);
3340 		sbd_handle_mode_sense(task, initial_dbuf, p);
3341 		kmem_free(p, 512);
3342 		return;
3343 	}
3344 
3345 	if ((cdb0 == SCMD_MODE_SELECT) || (cdb0 == SCMD_MODE_SELECT_G1)) {
3346 		sbd_handle_mode_select(task, initial_dbuf);
3347 		return;
3348 	}
3349 
3350 	if ((cdb0 == SCMD_UNMAP) && (sl->sl_flags & SL_UNMAP_ENABLED)) {
3351 		sbd_handle_unmap(task, initial_dbuf);
3352 		return;
3353 	}
3354 
3355 	if ((cdb0 == SCMD_WRITE_SAME_G4) || (cdb0 == SCMD_WRITE_SAME_G1)) {
3356 		sbd_handle_write_same(task, initial_dbuf);
3357 		return;
3358 	}
3359 
3360 	if (cdb0 == SCMD_TEST_UNIT_READY) {	/* Test unit ready */
3361 		task->task_cmd_xfer_length = 0;
3362 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3363 		return;
3364 	}
3365 
3366 	if (cdb0 == SCMD_READ_CAPACITY) {		/* Read Capacity */
3367 		sbd_handle_read_capacity(task, initial_dbuf);
3368 		return;
3369 	}
3370 
3371 	if (cdb0 == SCMD_SVC_ACTION_IN_G4) { /* Read Capacity or read long */
3372 		if (cdb1 == SSVC_ACTION_READ_CAPACITY_G4) {
3373 			sbd_handle_read_capacity(task, initial_dbuf);
3374 			return;
3375 		/*
3376 		 * } else if (cdb1 == SSVC_ACTION_READ_LONG_G4) {
3377 		 * 	sbd_handle_read(task, initial_dbuf);
3378 		 * 	return;
3379 		 */
3380 		}
3381 	}
3382 
3383 	/*
3384 	 * if (cdb0 == SCMD_SVC_ACTION_OUT_G4) {
3385 	 *	if (cdb1 == SSVC_ACTION_WRITE_LONG_G4) {
3386 	 *		 sbd_handle_write(task, initial_dbuf);
3387 	 * 		return;
3388 	 *	}
3389 	 * }
3390 	 */
3391 
3392 	if (cdb0 == SCMD_VERIFY) {
3393 		/*
3394 		 * Something more likely needs to be done here.
3395 		 */
3396 		task->task_cmd_xfer_length = 0;
3397 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3398 		return;
3399 	}
3400 
3401 	if (cdb0 == SCMD_SYNCHRONIZE_CACHE ||
3402 	    cdb0 == SCMD_SYNCHRONIZE_CACHE_G4) {
3403 		sbd_handle_sync_cache(task, initial_dbuf);
3404 		return;
3405 	}
3406 
3407 	/*
3408 	 * Write and Verify use the same path as write, but don't clutter the
3409 	 * performance path above with checking for write_verify opcodes.  We
3410 	 * rely on zfs's integrity checks for the "Verify" part of Write &
3411 	 * Verify.  (Even if we did a read to "verify" we'd merely be reading
3412 	 * cache, not actual media.)
3413 	 * Therefore we
3414 	 *   a) only support this if sbd_is_zvol, and
3415 	 *   b) run the IO through the normal write path with a forced
3416 	 *	sbd_flush_data_cache at the end.
3417 	 */
3418 
3419 	if ((sl->sl_flags & SL_ZFS_META) && (
3420 	    cdb0 == SCMD_WRITE_VERIFY ||
3421 	    cdb0 == SCMD_WRITE_VERIFY_G4 ||
3422 	    cdb0 == SCMD_WRITE_VERIFY_G5)) {
3423 		sbd_handle_write(task, initial_dbuf);
3424 		return;
3425 	}
3426 
3427 	stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_INVALID_OPCODE);
3428 }
3429 
3430 void
3431 sbd_dbuf_xfer_done(struct scsi_task *task, struct stmf_data_buf *dbuf)
3432 {
3433 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3434 
3435 	if (dbuf->db_flags & DB_LU_DATA_BUF) {
3436 		/*
3437 		 * Buffers passed in from the LU always complete
3438 		 * even if the task is no longer active.
3439 		 */
3440 		ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
3441 		ASSERT(scmd);
3442 		switch (scmd->cmd_type) {
3443 		case (SBD_CMD_SCSI_READ):
3444 			sbd_handle_sgl_read_xfer_completion(task, scmd, dbuf);
3445 			break;
3446 		case (SBD_CMD_SCSI_WRITE):
3447 			sbd_handle_sgl_write_xfer_completion(task, scmd, dbuf);
3448 			break;
3449 		default:
3450 			cmn_err(CE_PANIC, "Unknown cmd type, task = %p",
3451 			    (void *)task);
3452 			break;
3453 		}
3454 		return;
3455 	}
3456 
3457 	if ((scmd == NULL) || ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0))
3458 		return;
3459 
3460 	switch (scmd->cmd_type) {
3461 	case (SBD_CMD_SCSI_READ):
3462 		sbd_handle_read_xfer_completion(task, scmd, dbuf);
3463 		break;
3464 
3465 	case (SBD_CMD_SCSI_WRITE):
3466 		if ((task->task_cdb[0] == SCMD_WRITE_SAME_G1) ||
3467 		    (task->task_cdb[0] == SCMD_WRITE_SAME_G4)) {
3468 			sbd_handle_write_same_xfer_completion(task, scmd, dbuf,
3469 			    1);
3470 		} else {
3471 			sbd_handle_write_xfer_completion(task, scmd, dbuf, 1);
3472 		}
3473 		break;
3474 
3475 	case (SBD_CMD_SMALL_READ):
3476 		sbd_handle_short_read_xfer_completion(task, scmd, dbuf);
3477 		break;
3478 
3479 	case (SBD_CMD_SMALL_WRITE):
3480 		sbd_handle_short_write_xfer_completion(task, dbuf);
3481 		break;
3482 
3483 	default:
3484 		cmn_err(CE_PANIC, "Unknown cmd type, task = %p", (void *)task);
3485 		break;
3486 	}
3487 }
3488 
3489 /* ARGSUSED */
3490 void
3491 sbd_send_status_done(struct scsi_task *task)
3492 {
3493 	cmn_err(CE_PANIC,
3494 	    "sbd_send_status_done: this should not have been called");
3495 }
3496 
3497 void
3498 sbd_task_free(struct scsi_task *task)
3499 {
3500 	if (task->task_lu_private) {
3501 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3502 		if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
3503 			cmn_err(CE_PANIC, "cmd is active, task = %p",
3504 			    (void *)task);
3505 		}
3506 		kmem_free(scmd, sizeof (sbd_cmd_t));
3507 	}
3508 }
3509 
3510 /*
3511  * Aborts are synchronus w.r.t. I/O AND
3512  * All the I/O which SBD does is synchronous AND
3513  * Everything within a task is single threaded.
3514  *   IT MEANS
3515  * If this function is called, we are doing nothing with this task
3516  * inside of sbd module.
3517  */
3518 /* ARGSUSED */
3519 stmf_status_t
3520 sbd_abort(struct stmf_lu *lu, int abort_cmd, void *arg, uint32_t flags)
3521 {
3522 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3523 	scsi_task_t *task;
3524 
3525 	if (abort_cmd == STMF_LU_RESET_STATE) {
3526 		return (sbd_lu_reset_state(lu));
3527 	}
3528 
3529 	if (abort_cmd == STMF_LU_ITL_HANDLE_REMOVED) {
3530 		sbd_check_and_clear_scsi2_reservation(sl, (sbd_it_data_t *)arg);
3531 		sbd_remove_it_handle(sl, (sbd_it_data_t *)arg);
3532 		return (STMF_SUCCESS);
3533 	}
3534 
3535 	ASSERT(abort_cmd == STMF_LU_ABORT_TASK);
3536 	task = (scsi_task_t *)arg;
3537 	if (task->task_lu_private) {
3538 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3539 
3540 		if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
3541 			if (scmd->flags & SBD_SCSI_CMD_TRANS_DATA) {
3542 				kmem_free(scmd->trans_data,
3543 				    scmd->trans_data_len);
3544 				scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
3545 			}
3546 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
3547 			return (STMF_ABORT_SUCCESS);
3548 		}
3549 	}
3550 
3551 	return (STMF_NOT_FOUND);
3552 }
3553 
3554 /*
3555  * This function is called during task clean-up if the
3556  * DB_LU_FLAG is set on the dbuf. This should only be called for
3557  * abort processing after sbd_abort has been called for the task.
3558  */
3559 void
3560 sbd_dbuf_free(struct scsi_task *task, struct stmf_data_buf *dbuf)
3561 {
3562 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3563 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3564 
3565 	ASSERT(dbuf->db_lu_private);
3566 	ASSERT(scmd && scmd->nbufs > 0);
3567 	ASSERT((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0);
3568 	ASSERT(dbuf->db_flags & DB_LU_DATA_BUF);
3569 	ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
3570 	ASSERT((curthread->t_flag & T_INTR_THREAD) == 0);
3571 
3572 	if (scmd->cmd_type == SBD_CMD_SCSI_READ) {
3573 		sbd_zvol_rele_read_bufs(sl, dbuf);
3574 	} else if (scmd->cmd_type == SBD_CMD_SCSI_WRITE) {
3575 		sbd_zvol_rele_write_bufs_abort(sl, dbuf);
3576 	} else {
3577 		cmn_err(CE_PANIC, "Unknown cmd type %d, task = %p",
3578 		    scmd->cmd_type, (void *)task);
3579 	}
3580 	if (--scmd->nbufs == 0)
3581 		rw_exit(&sl->sl_access_state_lock);
3582 	stmf_teardown_dbuf(task, dbuf);
3583 	stmf_free(dbuf);
3584 }
3585 
3586 /* ARGSUSED */
3587 void
3588 sbd_ctl(struct stmf_lu *lu, int cmd, void *arg)
3589 {
3590 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3591 	stmf_change_status_t st;
3592 
3593 	ASSERT((cmd == STMF_CMD_LU_ONLINE) ||
3594 	    (cmd == STMF_CMD_LU_OFFLINE) ||
3595 	    (cmd == STMF_ACK_LU_ONLINE_COMPLETE) ||
3596 	    (cmd == STMF_ACK_LU_OFFLINE_COMPLETE));
3597 
3598 	st.st_completion_status = STMF_SUCCESS;
3599 	st.st_additional_info = NULL;
3600 
3601 	switch (cmd) {
3602 	case STMF_CMD_LU_ONLINE:
3603 		if (sl->sl_state == STMF_STATE_ONLINE)
3604 			st.st_completion_status = STMF_ALREADY;
3605 		else if (sl->sl_state != STMF_STATE_OFFLINE)
3606 			st.st_completion_status = STMF_FAILURE;
3607 		if (st.st_completion_status == STMF_SUCCESS) {
3608 			sl->sl_state = STMF_STATE_ONLINE;
3609 			sl->sl_state_not_acked = 1;
3610 		}
3611 		(void) stmf_ctl(STMF_CMD_LU_ONLINE_COMPLETE, lu, &st);
3612 		break;
3613 
3614 	case STMF_CMD_LU_OFFLINE:
3615 		if (sl->sl_state == STMF_STATE_OFFLINE)
3616 			st.st_completion_status = STMF_ALREADY;
3617 		else if (sl->sl_state != STMF_STATE_ONLINE)
3618 			st.st_completion_status = STMF_FAILURE;
3619 		if (st.st_completion_status == STMF_SUCCESS) {
3620 			sl->sl_flags &= ~(SL_MEDIUM_REMOVAL_PREVENTED |
3621 			    SL_LU_HAS_SCSI2_RESERVATION);
3622 			sl->sl_state = STMF_STATE_OFFLINE;
3623 			sl->sl_state_not_acked = 1;
3624 			sbd_pgr_reset(sl);
3625 		}
3626 		(void) stmf_ctl(STMF_CMD_LU_OFFLINE_COMPLETE, lu, &st);
3627 		break;
3628 
3629 	case STMF_ACK_LU_ONLINE_COMPLETE:
3630 		/* Fallthrough */
3631 	case STMF_ACK_LU_OFFLINE_COMPLETE:
3632 		sl->sl_state_not_acked = 0;
3633 		break;
3634 
3635 	}
3636 }
3637 
3638 /* ARGSUSED */
3639 stmf_status_t
3640 sbd_info(uint32_t cmd, stmf_lu_t *lu, void *arg, uint8_t *buf,
3641     uint32_t *bufsizep)
3642 {
3643 	return (STMF_NOT_SUPPORTED);
3644 }
3645 
3646 stmf_status_t
3647 sbd_lu_reset_state(stmf_lu_t *lu)
3648 {
3649 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3650 
3651 	mutex_enter(&sl->sl_lock);
3652 	if (sl->sl_flags & SL_SAVED_WRITE_CACHE_DISABLE) {
3653 		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
3654 		mutex_exit(&sl->sl_lock);
3655 		if (sl->sl_access_state == SBD_LU_ACTIVE) {
3656 			(void) sbd_wcd_set(1, sl);
3657 		}
3658 	} else {
3659 		sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
3660 		mutex_exit(&sl->sl_lock);
3661 		if (sl->sl_access_state == SBD_LU_ACTIVE) {
3662 			(void) sbd_wcd_set(0, sl);
3663 		}
3664 	}
3665 	sbd_pgr_reset(sl);
3666 	sbd_check_and_clear_scsi2_reservation(sl, NULL);
3667 	if (stmf_deregister_all_lu_itl_handles(lu) != STMF_SUCCESS) {
3668 		return (STMF_FAILURE);
3669 	}
3670 	return (STMF_SUCCESS);
3671 }
3672 
3673 sbd_status_t
3674 sbd_flush_data_cache(sbd_lu_t *sl, int fsync_done)
3675 {
3676 	int r = 0;
3677 	int ret;
3678 
3679 	if (fsync_done)
3680 		goto over_fsync;
3681 	if ((sl->sl_data_vtype == VREG) || (sl->sl_data_vtype == VBLK)) {
3682 		if (VOP_FSYNC(sl->sl_data_vp, FSYNC, kcred, NULL))
3683 			return (SBD_FAILURE);
3684 	}
3685 over_fsync:
3686 	if (((sl->sl_data_vtype == VCHR) || (sl->sl_data_vtype == VBLK)) &&
3687 	    ((sl->sl_flags & SL_NO_DATA_DKIOFLUSH) == 0)) {
3688 		ret = VOP_IOCTL(sl->sl_data_vp, DKIOCFLUSHWRITECACHE, NULL,
3689 		    FKIOCTL, kcred, &r, NULL);
3690 		if ((ret == ENOTTY) || (ret == ENOTSUP)) {
3691 			mutex_enter(&sl->sl_lock);
3692 			sl->sl_flags |= SL_NO_DATA_DKIOFLUSH;
3693 			mutex_exit(&sl->sl_lock);
3694 		} else if (ret != 0) {
3695 			return (SBD_FAILURE);
3696 		}
3697 	}
3698 
3699 	return (SBD_SUCCESS);
3700 }
3701 
3702 /* ARGSUSED */
3703 static void
3704 sbd_handle_sync_cache(struct scsi_task *task,
3705     struct stmf_data_buf *initial_dbuf)
3706 {
3707 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3708 	uint64_t	lba, laddr;
3709 	sbd_status_t	sret;
3710 	uint32_t	len;
3711 	int		is_g4 = 0;
3712 	int		immed;
3713 
3714 	task->task_cmd_xfer_length = 0;
3715 	/*
3716 	 * Determine if this is a 10 or 16 byte CDB
3717 	 */
3718 
3719 	if (task->task_cdb[0] == SCMD_SYNCHRONIZE_CACHE_G4)
3720 		is_g4 = 1;
3721 
3722 	/*
3723 	 * Determine other requested parameters
3724 	 *
3725 	 * We don't have a non-volatile cache, so don't care about SYNC_NV.
3726 	 * Do not support the IMMED bit.
3727 	 */
3728 
3729 	immed = (task->task_cdb[1] & 0x02);
3730 
3731 	if (immed) {
3732 		stmf_scsilib_send_status(task, STATUS_CHECK,
3733 		    STMF_SAA_INVALID_FIELD_IN_CDB);
3734 		return;
3735 	}
3736 
3737 	/*
3738 	 * Check to be sure we're not being asked to sync an LBA
3739 	 * that is out of range.  While checking, verify reserved fields.
3740 	 */
3741 
3742 	if (is_g4) {
3743 		if ((task->task_cdb[1] & 0xf9) || task->task_cdb[14] ||
3744 		    task->task_cdb[15]) {
3745 			stmf_scsilib_send_status(task, STATUS_CHECK,
3746 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3747 			return;
3748 		}
3749 
3750 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
3751 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
3752 	} else {
3753 		if ((task->task_cdb[1] & 0xf9) || task->task_cdb[6] ||
3754 		    task->task_cdb[9]) {
3755 			stmf_scsilib_send_status(task, STATUS_CHECK,
3756 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3757 			return;
3758 		}
3759 
3760 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
3761 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
3762 	}
3763 
3764 	laddr = lba << sl->sl_data_blocksize_shift;
3765 	len <<= sl->sl_data_blocksize_shift;
3766 
3767 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
3768 		stmf_scsilib_send_status(task, STATUS_CHECK,
3769 		    STMF_SAA_LBA_OUT_OF_RANGE);
3770 		return;
3771 	}
3772 
3773 	sret = sbd_flush_data_cache(sl, 0);
3774 	if (sret != SBD_SUCCESS) {
3775 		stmf_scsilib_send_status(task, STATUS_CHECK,
3776 		    STMF_SAA_WRITE_ERROR);
3777 		return;
3778 	}
3779 
3780 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3781 }
3782