xref: /illumos-gate/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_scsi.c (revision a629ded1d7b2e67c2028ccbc5ba9099328cc4e1b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
25  * Copyright (c) 2013 by Delphix. All rights reserved.
26  */
27 
28 #include <sys/conf.h>
29 #include <sys/file.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/modctl.h>
33 #include <sys/scsi/scsi.h>
34 #include <sys/scsi/impl/scsi_reset_notify.h>
35 #include <sys/scsi/generic/mode.h>
36 #include <sys/disp.h>
37 #include <sys/byteorder.h>
38 #include <sys/atomic.h>
39 #include <sys/sdt.h>
40 #include <sys/dkio.h>
41 #include <sys/dkioc_free_util.h>
42 
43 #include <sys/stmf.h>
44 #include <sys/lpif.h>
45 #include <sys/portif.h>
46 #include <sys/stmf_ioctl.h>
47 #include <sys/stmf_sbd_ioctl.h>
48 
49 #include "stmf_sbd.h"
50 #include "sbd_impl.h"
51 
52 #define	SCSI2_CONFLICT_FREE_CMDS(cdb)	( \
53 	/* ----------------------- */                                      \
54 	/* Refer Both		   */                                      \
55 	/* SPC-2 (rev 20) Table 10 */                                      \
56 	/* SPC-3 (rev 23) Table 31 */                                      \
57 	/* ----------------------- */                                      \
58 	((cdb[0]) == SCMD_INQUIRY)					|| \
59 	((cdb[0]) == SCMD_LOG_SENSE_G1)					|| \
60 	((cdb[0]) == SCMD_RELEASE)					|| \
61 	((cdb[0]) == SCMD_RELEASE_G1)					|| \
62 	((cdb[0]) == SCMD_REPORT_LUNS)					|| \
63 	((cdb[0]) == SCMD_REQUEST_SENSE)				|| \
64 	/* PREVENT ALLOW MEDIUM REMOVAL with prevent == 0 */               \
65 	((((cdb[0]) == SCMD_DOORLOCK) && (((cdb[4]) & 0x3) == 0)))	|| \
66 	/* SERVICE ACTION IN with READ MEDIA SERIAL NUMBER (0x01) */       \
67 	(((cdb[0]) == SCMD_SVC_ACTION_IN_G5) && (                          \
68 	    ((cdb[1]) & 0x1F) == 0x01))					|| \
69 	/* MAINTENANCE IN with service actions REPORT ALIASES (0x0Bh) */   \
70 	/* REPORT DEVICE IDENTIFIER (0x05)  REPORT PRIORITY (0x0Eh) */     \
71 	/* REPORT TARGET PORT GROUPS (0x0A) REPORT TIMESTAMP (0x0F) */     \
72 	(((cdb[0]) == SCMD_MAINTENANCE_IN) && (                            \
73 	    (((cdb[1]) & 0x1F) == 0x0B) ||                                 \
74 	    (((cdb[1]) & 0x1F) == 0x05) ||                                 \
75 	    (((cdb[1]) & 0x1F) == 0x0E) ||                                 \
76 	    (((cdb[1]) & 0x1F) == 0x0A) ||                                 \
77 	    (((cdb[1]) & 0x1F) == 0x0F)))				|| \
78 	/* ----------------------- */                                      \
79 	/* SBC-3 (rev 17) Table 3  */                                      \
80 	/* ----------------------- */                                      \
81 	/* READ CAPACITY(10) */                                            \
82 	((cdb[0]) == SCMD_READ_CAPACITY)				|| \
83 	/* READ CAPACITY(16) */                                            \
84 	(((cdb[0]) == SCMD_SVC_ACTION_IN_G4) && (                          \
85 	    ((cdb[1]) & 0x1F) == 0x10))					|| \
86 	/* START STOP UNIT with START bit 0 and POWER CONDITION 0  */      \
87 	(((cdb[0]) == SCMD_START_STOP) && (                                \
88 	    (((cdb[4]) & 0xF0) == 0) && (((cdb[4]) & 0x01) == 0))))
89 /* End of SCSI2_CONFLICT_FREE_CMDS */
90 
91 uint8_t HardwareAcceleratedInit = 1;
92 uint8_t sbd_unmap_enable = 1;		/* allow unmap by default */
93 
94 /*
95  * An /etc/system tunable which specifies the maximum number of LBAs supported
96  * in a single UNMAP operation. Default is 0x002000 blocks or 4MB in size.
97  */
98 int stmf_sbd_unmap_max_nblks  = 0x002000;
99 
100 /*
101  * An /etc/system tunable which indicates if READ ops can run on the standby
102  * path or return an error.
103  */
104 int stmf_standby_fail_reads = 0;
105 
106 stmf_status_t sbd_lu_reset_state(stmf_lu_t *lu);
107 static void sbd_handle_sync_cache(struct scsi_task *task,
108     struct stmf_data_buf *initial_dbuf);
109 void sbd_handle_read_xfer_completion(struct scsi_task *task,
110     sbd_cmd_t *scmd, struct stmf_data_buf *dbuf);
111 void sbd_handle_short_write_xfer_completion(scsi_task_t *task,
112     stmf_data_buf_t *dbuf);
113 void sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf,
114     uint32_t buflen);
115 void sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf);
116 void sbd_handle_identifying_info(scsi_task_t *task, stmf_data_buf_t *dbuf);
117 
118 static void sbd_handle_unmap_xfer(scsi_task_t *task, uint8_t *buf,
119     uint32_t buflen);
120 static void sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf);
121 
122 extern void sbd_pgr_initialize_it(scsi_task_t *, sbd_it_data_t *);
123 extern int sbd_pgr_reservation_conflict(scsi_task_t *, struct sbd_lu *sl);
124 extern void sbd_pgr_reset(sbd_lu_t *);
125 extern void sbd_pgr_remove_it_handle(sbd_lu_t *, sbd_it_data_t *);
126 extern void sbd_handle_pgr_in_cmd(scsi_task_t *, stmf_data_buf_t *);
127 extern void sbd_handle_pgr_out_cmd(scsi_task_t *, stmf_data_buf_t *);
128 extern void sbd_handle_pgr_out_data(scsi_task_t *, stmf_data_buf_t *);
129 void sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
130     int first_xfer);
131 static void sbd_handle_write_same(scsi_task_t *task,
132     struct stmf_data_buf *initial_dbuf);
133 static void sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
134     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable);
135 static void sbd_handle_write_same_xfer_completion(struct scsi_task *task,
136     sbd_cmd_t *scmd, struct stmf_data_buf *dbuf, uint8_t dbuf_reusable);
137 /*
138  * IMPORTANT NOTE:
139  * =================
140  * The whole world here is based on the assumption that everything within
141  * a scsi task executes in a single threaded manner, even the aborts.
142  * Dont ever change that. There wont be any performance gain but there
143  * will be tons of race conditions.
144  */
145 
146 void
147 sbd_do_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
148     struct stmf_data_buf *dbuf)
149 {
150 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
151 	uint64_t laddr;
152 	uint32_t len, buflen, iolen;
153 	int ndx;
154 	int bufs_to_take;
155 
156 	/* Lets try not to hog all the buffers the port has. */
157 	bufs_to_take = ((task->task_max_nbufs > 2) &&
158 	    (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
159 	    task->task_max_nbufs;
160 
161 	len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ?
162 	    dbuf->db_buf_size : ATOMIC32_GET(scmd->len);
163 	laddr = scmd->addr + scmd->current_ro;
164 
165 	for (buflen = 0, ndx = 0; (buflen < len) &&
166 	    (ndx < dbuf->db_sglist_length); ndx++) {
167 		iolen = min(len - buflen, dbuf->db_sglist[ndx].seg_length);
168 		if (iolen == 0)
169 			break;
170 		if (sbd_data_read(sl, task, laddr, (uint64_t)iolen,
171 		    dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
172 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
173 			/* Do not need to do xfer anymore, just complete it */
174 			dbuf->db_data_size = 0;
175 			dbuf->db_xfer_status = STMF_SUCCESS;
176 			sbd_handle_read_xfer_completion(task, scmd, dbuf);
177 			return;
178 		}
179 		buflen += iolen;
180 		laddr += (uint64_t)iolen;
181 	}
182 	dbuf->db_relative_offset = scmd->current_ro;
183 	dbuf->db_data_size = buflen;
184 	dbuf->db_flags = DB_DIRECTION_TO_RPORT;
185 	(void) stmf_xfer_data(task, dbuf, 0);
186 	atomic_add_32(&scmd->len, -buflen);
187 	scmd->current_ro += buflen;
188 	if (ATOMIC32_GET(scmd->len) &&
189 	    (ATOMIC8_GET(scmd->nbufs) < bufs_to_take)) {
190 		uint32_t maxsize, minsize, old_minsize;
191 
192 		maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128 * 1024 :
193 		    ATOMIC32_GET(scmd->len);
194 		minsize = maxsize >> 2;
195 		do {
196 			/*
197 			 * A bad port implementation can keep on failing the
198 			 * the request but keep on sending us a false
199 			 * minsize.
200 			 */
201 			old_minsize = minsize;
202 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
203 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
204 		    (minsize >= 512));
205 		if (dbuf == NULL) {
206 			return;
207 		}
208 		atomic_inc_8(&scmd->nbufs);
209 		sbd_do_read_xfer(task, scmd, dbuf);
210 	}
211 }
212 
213 /*
214  * sbd_zcopy: Bail-out switch for reduced copy path.
215  *
216  * 0 - read & write off
217  * 1 - read & write on
218  * 2 - only read on
219  * 4 - only write on
220  */
221 int sbd_zcopy = 1;	/* enable zcopy read & write path */
222 uint32_t sbd_max_xfer_len = 0;		/* Valid if non-zero */
223 uint32_t sbd_1st_xfer_len = 0;		/* Valid if non-zero */
224 uint32_t sbd_copy_threshold = 0;		/* Valid if non-zero */
225 
226 static void
227 sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
228 {
229 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
230 	sbd_zvol_io_t *zvio;
231 	int ret, final_xfer;
232 	uint64_t offset;
233 	uint32_t xfer_len, max_len, first_len;
234 	stmf_status_t xstat;
235 	stmf_data_buf_t *dbuf;
236 	uint_t nblks;
237 	uint64_t blksize = sl->sl_blksize;
238 	size_t db_private_sz;
239 	hrtime_t xfer_start;
240 	uintptr_t pad;
241 
242 	ASSERT(rw_read_held(&sl->sl_access_state_lock));
243 	ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
244 
245 	/*
246 	 * Calculate the limits on xfer_len to the minimum of :
247 	 *    - task limit
248 	 *    - lun limit
249 	 *    - sbd global limit if set
250 	 *    - first xfer limit if set
251 	 *
252 	 * First, protect against silly over-ride value
253 	 */
254 	if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
255 		cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
256 		    sbd_max_xfer_len);
257 		sbd_max_xfer_len = 0;
258 	}
259 	if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
260 		cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
261 		    sbd_1st_xfer_len);
262 		sbd_1st_xfer_len = 0;
263 	}
264 
265 	max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
266 	if (sbd_max_xfer_len)
267 		max_len = MIN(max_len, sbd_max_xfer_len);
268 	/*
269 	 * Special case the first xfer if hints are set.
270 	 */
271 	if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
272 		/* global over-ride has precedence */
273 		if (sbd_1st_xfer_len)
274 			first_len = sbd_1st_xfer_len;
275 		else
276 			first_len = task->task_1st_xfer_len;
277 	} else {
278 		first_len = 0;
279 	}
280 
281 	while (ATOMIC32_GET(scmd->len) &&
282 	    ATOMIC8_GET(scmd->nbufs) < task->task_max_nbufs) {
283 
284 		xfer_len = MIN(max_len, ATOMIC32_GET(scmd->len));
285 		if (first_len) {
286 			xfer_len = MIN(xfer_len, first_len);
287 			first_len = 0;
288 		}
289 		if (ATOMIC32_GET(scmd->len) == xfer_len) {
290 			final_xfer = 1;
291 		} else {
292 			/*
293 			 * Attempt to end xfer on a block boundary.
294 			 * The only way this does not happen is if the
295 			 * xfer_len is small enough to stay contained
296 			 * within the same block.
297 			 */
298 			uint64_t xfer_offset, xfer_aligned_end;
299 
300 			final_xfer = 0;
301 			xfer_offset = scmd->addr + scmd->current_ro;
302 			xfer_aligned_end =
303 			    P2ALIGN(xfer_offset+xfer_len, blksize);
304 			if (xfer_aligned_end > xfer_offset)
305 				xfer_len = xfer_aligned_end - xfer_offset;
306 		}
307 		/*
308 		 * Allocate object to track the read and reserve
309 		 * enough space for scatter/gather list.
310 		 */
311 		offset = scmd->addr + scmd->current_ro;
312 		nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
313 
314 		db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
315 		    (nblks * sizeof (stmf_sglist_ent_t));
316 		dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
317 		    AF_DONTZERO);
318 		/*
319 		 * Setup the dbuf
320 		 *
321 		 * XXX Framework does not handle variable length sglists
322 		 * properly, so setup db_lu_private and db_port_private
323 		 * fields here. db_stmf_private is properly set for
324 		 * calls to stmf_free.
325 		 */
326 		if (dbuf->db_port_private == NULL) {
327 			/*
328 			 * XXX Framework assigns space to PP after db_sglist[0]
329 			 */
330 			cmn_err(CE_PANIC, "db_port_private == NULL");
331 		}
332 		pad = (uintptr_t)&dbuf->db_sglist[nblks];
333 		dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
334 		dbuf->db_port_private = NULL;
335 		dbuf->db_buf_size = xfer_len;
336 		dbuf->db_data_size = xfer_len;
337 		dbuf->db_relative_offset = scmd->current_ro;
338 		dbuf->db_sglist_length = (uint16_t)nblks;
339 		dbuf->db_xfer_status = 0;
340 		dbuf->db_handle = 0;
341 
342 		dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
343 		    DB_DIRECTION_TO_RPORT | DB_LU_DATA_BUF);
344 		if (final_xfer)
345 			dbuf->db_flags |= DB_SEND_STATUS_GOOD;
346 
347 		zvio = dbuf->db_lu_private;
348 		/* Need absolute offset for zvol access */
349 		zvio->zvio_offset = offset;
350 		zvio->zvio_flags = ZVIO_SYNC;
351 
352 		/*
353 		 * Accounting for start of read.
354 		 * Note there is no buffer address for the probe yet.
355 		 */
356 		xfer_start = gethrtime();
357 		DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
358 		    uint8_t *, NULL, uint64_t, xfer_len,
359 		    uint64_t, offset, scsi_task_t *, task);
360 
361 		ret = sbd_zvol_alloc_read_bufs(sl, dbuf);
362 
363 		stmf_lu_xfer_done(task, B_TRUE /* read */,
364 		    (gethrtime() - xfer_start));
365 		DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
366 		    uint8_t *, NULL, uint64_t, xfer_len,
367 		    uint64_t, offset, int, ret, scsi_task_t *, task);
368 
369 		if (ret != 0) {
370 			/*
371 			 * Read failure from the backend.
372 			 */
373 			stmf_free(dbuf);
374 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
375 				/* nothing queued, just finish */
376 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
377 				sbd_ats_remove_by_task(task);
378 				stmf_scsilib_send_status(task, STATUS_CHECK,
379 				    STMF_SAA_READ_ERROR);
380 				rw_exit(&sl->sl_access_state_lock);
381 			} else {
382 				/* process failure when other dbufs finish */
383 				scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
384 			}
385 			return;
386 		}
387 
388 		/*
389 		 * Allow PP to do setup
390 		 */
391 		xstat = stmf_setup_dbuf(task, dbuf, 0);
392 		if (xstat != STMF_SUCCESS) {
393 			/*
394 			 * This could happen if the driver cannot get the
395 			 * DDI resources it needs for this request.
396 			 * If other dbufs are queued, try again when the next
397 			 * one completes, otherwise give up.
398 			 */
399 			sbd_zvol_rele_read_bufs(sl, dbuf);
400 			stmf_free(dbuf);
401 			if (ATOMIC8_GET(scmd->nbufs) > 0) {
402 				/* completion of previous dbuf will retry */
403 				return;
404 			}
405 			/*
406 			 * Done with this command.
407 			 */
408 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
409 			sbd_ats_remove_by_task(task);
410 			if (first_xfer)
411 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
412 			else
413 				stmf_scsilib_send_status(task, STATUS_CHECK,
414 				    STMF_SAA_READ_ERROR);
415 			rw_exit(&sl->sl_access_state_lock);
416 			return;
417 		}
418 		/*
419 		 * dbuf is now queued on task
420 		 */
421 		atomic_inc_8(&scmd->nbufs);
422 
423 		/* XXX leave this in for FW? */
424 		DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
425 		    struct stmf_data_buf *, dbuf, uint64_t, offset,
426 		    uint32_t, xfer_len);
427 		/*
428 		 * Do not pass STMF_IOF_LU_DONE so that the zvol
429 		 * state can be released in the completion callback.
430 		 */
431 		xstat = stmf_xfer_data(task, dbuf, 0);
432 		switch (xstat) {
433 		case STMF_SUCCESS:
434 			break;
435 		case STMF_BUSY:
436 			/*
437 			 * The dbuf is queued on the task, but unknown
438 			 * to the PP, thus no completion will occur.
439 			 */
440 			sbd_zvol_rele_read_bufs(sl, dbuf);
441 			stmf_teardown_dbuf(task, dbuf);
442 			stmf_free(dbuf);
443 			atomic_dec_8(&scmd->nbufs);
444 			if (ATOMIC8_GET(scmd->nbufs) > 0) {
445 				/* completion of previous dbuf will retry */
446 				return;
447 			}
448 			/*
449 			 * Done with this command.
450 			 */
451 			rw_exit(&sl->sl_access_state_lock);
452 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
453 			sbd_ats_remove_by_task(task);
454 			if (first_xfer)
455 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
456 			else
457 				stmf_scsilib_send_status(task, STATUS_CHECK,
458 				    STMF_SAA_READ_ERROR);
459 			return;
460 		case STMF_ABORTED:
461 			/*
462 			 * Completion from task_done will cleanup
463 			 */
464 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
465 			sbd_ats_remove_by_task(task);
466 			return;
467 		}
468 		/*
469 		 * Update the xfer progress.
470 		 */
471 		ASSERT(scmd->len >= xfer_len);
472 		atomic_add_32(&scmd->len, -xfer_len);
473 		scmd->current_ro += xfer_len;
474 	}
475 }
476 
477 void
478 sbd_handle_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
479     struct stmf_data_buf *dbuf)
480 {
481 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
482 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
483 		    dbuf->db_xfer_status, NULL);
484 		return;
485 	}
486 	task->task_nbytes_transferred += dbuf->db_data_size;
487 	if (ATOMIC32_GET(scmd->len) == 0 ||
488 	    scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
489 		stmf_free_dbuf(task, dbuf);
490 		atomic_dec_8(&scmd->nbufs);
491 		if (ATOMIC8_GET(scmd->nbufs))
492 			return;	/* wait for all buffers to complete */
493 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
494 		sbd_ats_remove_by_task(task);
495 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL)
496 			stmf_scsilib_send_status(task, STATUS_CHECK,
497 			    STMF_SAA_READ_ERROR);
498 		else
499 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
500 		return;
501 	}
502 	if (dbuf->db_flags & DB_DONT_REUSE) {
503 		/* allocate new dbuf */
504 		uint32_t maxsize, minsize, old_minsize;
505 		stmf_free_dbuf(task, dbuf);
506 
507 		maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ?
508 		    128 * 1024 : ATOMIC32_GET(scmd->len);
509 		minsize = maxsize >> 2;
510 		do {
511 			old_minsize = minsize;
512 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
513 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
514 		    (minsize >= 512));
515 		if (dbuf == NULL) {
516 			atomic_dec_8(&scmd->nbufs);
517 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
518 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
519 				    STMF_ALLOC_FAILURE, NULL);
520 			}
521 			return;
522 		}
523 	}
524 	sbd_do_read_xfer(task, scmd, dbuf);
525 }
526 
527 /*
528  * This routine must release the DMU resources and free the dbuf
529  * in all cases.  If this is the final dbuf of the task, then drop
530  * the reader lock on the LU state. If there are no errors and more
531  * work to do, then queue more xfer operations.
532  */
533 void
534 sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
535     struct stmf_data_buf *dbuf)
536 {
537 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
538 	stmf_status_t xfer_status;
539 	uint32_t data_size;
540 	int scmd_err;
541 
542 	ASSERT(dbuf->db_lu_private);
543 	ASSERT(scmd->cmd_type == SBD_CMD_SCSI_READ);
544 
545 	atomic_dec_8(&scmd->nbufs);	/* account for this dbuf */
546 	/*
547 	 * Release the DMU resources.
548 	 */
549 	sbd_zvol_rele_read_bufs(sl, dbuf);
550 	/*
551 	 * Release the dbuf after retrieving needed fields.
552 	 */
553 	xfer_status = dbuf->db_xfer_status;
554 	data_size = dbuf->db_data_size;
555 	stmf_teardown_dbuf(task, dbuf);
556 	stmf_free(dbuf);
557 	/*
558 	 * Release the state lock if this is the last completion.
559 	 * If this is the last dbuf on task and all data has been
560 	 * transferred or an error encountered, then no more dbufs
561 	 * will be queued.
562 	 */
563 	scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
564 	    (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
565 	    (xfer_status != STMF_SUCCESS));
566 	if ((ATOMIC8_GET(scmd->nbufs) == 0) &&
567 	    (ATOMIC32_GET(scmd->len) == 0 || scmd_err)) {
568 		/* all DMU state has been released */
569 		rw_exit(&sl->sl_access_state_lock);
570 	}
571 
572 	/*
573 	 * If there have been no errors, either complete the task
574 	 * or issue more data xfer operations.
575 	 */
576 	if (!scmd_err) {
577 		/*
578 		 * This chunk completed successfully
579 		 */
580 		task->task_nbytes_transferred += data_size;
581 		if (ATOMIC8_GET(scmd->nbufs) == 0 &&
582 		    ATOMIC32_GET(scmd->len) == 0) {
583 			/*
584 			 * This command completed successfully
585 			 *
586 			 * Status was sent along with data, so no status
587 			 * completion will occur. Tell stmf we are done.
588 			 */
589 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
590 			sbd_ats_remove_by_task(task);
591 			stmf_task_lu_done(task);
592 			return;
593 		}
594 		/*
595 		 * Start more xfers
596 		 */
597 		sbd_do_sgl_read_xfer(task, scmd, 0);
598 		return;
599 	}
600 	/*
601 	 * Sort out the failure
602 	 */
603 	if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
604 		/*
605 		 * If a previous error occurred, leave the command active
606 		 * and wait for the last completion to send the status check.
607 		 */
608 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
609 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
610 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
611 				sbd_ats_remove_by_task(task);
612 				stmf_scsilib_send_status(task, STATUS_CHECK,
613 				    STMF_SAA_READ_ERROR);
614 			}
615 			return;
616 		}
617 		/*
618 		 * Must have been a failure on current dbuf
619 		 */
620 		ASSERT(xfer_status != STMF_SUCCESS);
621 
622 		/*
623 		 * Actually this is a bug. stmf abort should have reset the
624 		 * active flag but since its been there for some time.
625 		 * I wont change it.
626 		 */
627 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
628 		sbd_ats_remove_by_task(task);
629 		stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
630 	}
631 }
632 
633 void
634 sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
635     struct stmf_data_buf *dbuf)
636 {
637 	sbd_zvol_io_t *zvio = dbuf->db_lu_private;
638 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
639 	int ret;
640 	int scmd_err, scmd_xfer_done;
641 	stmf_status_t xfer_status = dbuf->db_xfer_status;
642 	uint32_t data_size = dbuf->db_data_size;
643 	hrtime_t xfer_start;
644 
645 	ASSERT(zvio);
646 
647 	/*
648 	 * Allow PP to free up resources before releasing the write bufs
649 	 * as writing to the backend could take some time.
650 	 */
651 	stmf_teardown_dbuf(task, dbuf);
652 
653 	atomic_dec_8(&scmd->nbufs);	/* account for this dbuf */
654 	/*
655 	 * All data was queued and this is the last completion,
656 	 * but there could still be an error.
657 	 */
658 	scmd_xfer_done = (ATOMIC32_GET(scmd->len) == 0 &&
659 	    (ATOMIC8_GET(scmd->nbufs) == 0));
660 	scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
661 	    (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
662 	    (xfer_status != STMF_SUCCESS));
663 
664 	xfer_start = gethrtime();
665 	DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
666 	    uint8_t *, NULL, uint64_t, data_size,
667 	    uint64_t, zvio->zvio_offset, scsi_task_t *, task);
668 
669 	if (scmd_err) {
670 		/* just return the write buffers */
671 		sbd_zvol_rele_write_bufs_abort(sl, dbuf);
672 		ret = 0;
673 	} else {
674 		if (scmd_xfer_done)
675 			zvio->zvio_flags = ZVIO_COMMIT;
676 		else
677 			zvio->zvio_flags = 0;
678 		/* write the data */
679 		ret = sbd_zvol_rele_write_bufs(sl, dbuf);
680 	}
681 
682 	stmf_lu_xfer_done(task, B_FALSE /* write */,
683 	    (gethrtime() - xfer_start));
684 	DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
685 	    uint8_t *, NULL, uint64_t, data_size,
686 	    uint64_t, zvio->zvio_offset, int, ret,  scsi_task_t *, task);
687 
688 	if (ret != 0) {
689 		/* update the error flag */
690 		scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
691 		scmd_err = 1;
692 	}
693 
694 	/* Release the dbuf */
695 	stmf_free(dbuf);
696 
697 	/*
698 	 * Release the state lock if this is the last completion.
699 	 * If this is the last dbuf on task and all data has been
700 	 * transferred or an error encountered, then no more dbufs
701 	 * will be queued.
702 	 */
703 	if ((ATOMIC8_GET(scmd->nbufs) == 0) &&
704 	    (ATOMIC32_GET(scmd->len) == 0 || scmd_err)) {
705 		/* all DMU state has been released */
706 		rw_exit(&sl->sl_access_state_lock);
707 	}
708 	/*
709 	 * If there have been no errors, either complete the task
710 	 * or issue more data xfer operations.
711 	 */
712 	if (!scmd_err) {
713 		/* This chunk completed successfully */
714 		task->task_nbytes_transferred += data_size;
715 		if (scmd_xfer_done) {
716 			/* This command completed successfully */
717 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
718 			sbd_ats_remove_by_task(task);
719 			if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
720 			    (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
721 				stmf_scsilib_send_status(task, STATUS_CHECK,
722 				    STMF_SAA_WRITE_ERROR);
723 			} else {
724 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
725 			}
726 			return;
727 		}
728 		/*
729 		 * Start more xfers
730 		 */
731 		sbd_do_sgl_write_xfer(task, scmd, 0);
732 		return;
733 	}
734 	/*
735 	 * Sort out the failure
736 	 */
737 	if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
738 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
739 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
740 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
741 				sbd_ats_remove_by_task(task);
742 				stmf_scsilib_send_status(task, STATUS_CHECK,
743 				    STMF_SAA_WRITE_ERROR);
744 			}
745 			/*
746 			 * Leave the command active until last dbuf completes.
747 			 */
748 			return;
749 		}
750 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
751 		sbd_ats_remove_by_task(task);
752 		ASSERT(xfer_status != STMF_SUCCESS);
753 		stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
754 	}
755 }
756 
757 /*
758  * Handle a copy operation using the zvol interface.
759  *
760  * Similar to the sbd_data_read/write path, except it goes directly through
761  * the zvol interfaces. It can pass a port provider sglist in the
762  * form of uio which is lost through the vn_rdwr path.
763  *
764  * Returns:
765  *	STMF_SUCCESS - request handled
766  *	STMF_FAILURE - request not handled, caller must deal with error
767  */
768 static stmf_status_t
769 sbd_copy_rdwr(scsi_task_t *task, uint64_t laddr, stmf_data_buf_t *dbuf,
770     int cmd, int commit)
771 {
772 	sbd_lu_t		*sl = task->task_lu->lu_provider_private;
773 	struct uio		uio;
774 	struct iovec		*iov, *tiov, iov1[8];
775 	uint32_t		len, resid;
776 	int			ret, i, iovcnt, flags;
777 	hrtime_t		xfer_start;
778 	boolean_t		is_read;
779 
780 	ASSERT(cmd == SBD_CMD_SCSI_READ || cmd == SBD_CMD_SCSI_WRITE);
781 
782 	is_read = (cmd == SBD_CMD_SCSI_READ) ? B_TRUE : B_FALSE;
783 	iovcnt = dbuf->db_sglist_length;
784 	/* use the stack for small iovecs */
785 	if (iovcnt > 8) {
786 		iov = kmem_alloc(iovcnt * sizeof (*iov), KM_SLEEP);
787 	} else {
788 		iov = &iov1[0];
789 	}
790 
791 	/* Convert dbuf sglist to iovec format */
792 	len = dbuf->db_data_size;
793 	resid = len;
794 	tiov = iov;
795 	for (i = 0; i < iovcnt; i++) {
796 		tiov->iov_base = (caddr_t)dbuf->db_sglist[i].seg_addr;
797 		tiov->iov_len = MIN(resid, dbuf->db_sglist[i].seg_length);
798 		resid -= tiov->iov_len;
799 		tiov++;
800 	}
801 	if (resid != 0) {
802 		cmn_err(CE_WARN, "inconsistant sglist rem %d", resid);
803 		if (iov != &iov1[0])
804 			kmem_free(iov, iovcnt * sizeof (*iov));
805 		return (STMF_FAILURE);
806 	}
807 	/* Setup the uio struct */
808 	uio.uio_iov = iov;
809 	uio.uio_iovcnt = iovcnt;
810 	uio.uio_loffset = laddr;
811 	uio.uio_segflg = (short)UIO_SYSSPACE;
812 	uio.uio_resid = (uint64_t)len;
813 	uio.uio_llimit = RLIM64_INFINITY;
814 
815 	xfer_start = gethrtime();
816 	if (is_read == B_TRUE) {
817 		uio.uio_fmode = FREAD;
818 		uio.uio_extflg = UIO_COPY_CACHED;
819 		DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
820 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
821 		    scsi_task_t *, task);
822 
823 		/* Fetch the data */
824 		ret = sbd_zvol_copy_read(sl, &uio);
825 
826 		DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
827 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
828 		    scsi_task_t *, task);
829 	} else {
830 		uio.uio_fmode = FWRITE;
831 		uio.uio_extflg = UIO_COPY_DEFAULT;
832 		DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
833 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
834 		    scsi_task_t *, task);
835 
836 		flags = (commit) ? ZVIO_COMMIT : 0;
837 		/* Write the data */
838 		ret = sbd_zvol_copy_write(sl, &uio, flags);
839 
840 		DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
841 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
842 		    scsi_task_t *, task);
843 	}
844 	/* finalize accounting */
845 	stmf_lu_xfer_done(task, is_read, (gethrtime() - xfer_start));
846 
847 	if (iov != &iov1[0])
848 		kmem_free(iov, iovcnt * sizeof (*iov));
849 	if (ret != 0) {
850 		/* Backend I/O error */
851 		return (STMF_FAILURE);
852 	}
853 	return (STMF_SUCCESS);
854 }
855 
856 void
857 sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
858 {
859 	uint64_t lba, laddr;
860 	uint64_t blkcount;
861 	uint32_t len;
862 	uint8_t op = task->task_cdb[0];
863 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
864 	sbd_cmd_t *scmd;
865 	stmf_data_buf_t *dbuf;
866 	int fast_path;
867 	boolean_t fua_bit = B_FALSE;
868 
869 	/*
870 	 * Check to see if the command is READ(10), READ(12), or READ(16).
871 	 * If it is then check for bit 3 being set to indicate if Forced
872 	 * Unit Access is being requested. If so, we'll bypass the use of
873 	 * DMA buffers to simplify support of this feature.
874 	 */
875 	if (((op == SCMD_READ_G1) || (op == SCMD_READ_G4) ||
876 	    (op == SCMD_READ_G5)) &&
877 	    (task->task_cdb[1] & BIT_3)) {
878 		fua_bit = B_TRUE;
879 	}
880 	if (op == SCMD_READ) {
881 		lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
882 		len = (uint32_t)task->task_cdb[4];
883 
884 		if (len == 0) {
885 			len = 256;
886 		}
887 	} else if (op == SCMD_READ_G1) {
888 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
889 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
890 	} else if (op == SCMD_READ_G5) {
891 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
892 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
893 	} else if (op == SCMD_READ_G4) {
894 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
895 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
896 	} else {
897 		stmf_scsilib_send_status(task, STATUS_CHECK,
898 		    STMF_SAA_INVALID_OPCODE);
899 		return;
900 	}
901 
902 	laddr = lba << sl->sl_data_blocksize_shift;
903 	blkcount = len;
904 	len <<= sl->sl_data_blocksize_shift;
905 
906 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
907 		stmf_scsilib_send_status(task, STATUS_CHECK,
908 		    STMF_SAA_LBA_OUT_OF_RANGE);
909 		return;
910 	}
911 
912 	task->task_cmd_xfer_length = len;
913 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
914 		task->task_expected_xfer_length = len;
915 	}
916 
917 	if (len != task->task_expected_xfer_length) {
918 		fast_path = 0;
919 		len = (len > task->task_expected_xfer_length) ?
920 		    task->task_expected_xfer_length : len;
921 	} else {
922 		fast_path = 1;
923 	}
924 
925 	if (len == 0) {
926 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
927 		return;
928 	}
929 
930 	if (sbd_ats_handling_before_io(task, sl, lba, blkcount) !=
931 	    SBD_SUCCESS) {
932 		if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS) {
933 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
934 		}
935 		return;
936 	}
937 	/*
938 	 * Determine if this read can directly use DMU buffers.
939 	 */
940 	if (sbd_zcopy & (2|1) &&		/* Debug switch */
941 	    initial_dbuf == NULL &&		/* No PP buffer passed in */
942 	    sl->sl_flags & SL_CALL_ZVOL &&	/* zvol backing store */
943 	    (task->task_additional_flags &
944 	    TASK_AF_ACCEPT_LU_DBUF) &&		/* PP allows it */
945 	    !fua_bit) {
946 		/*
947 		 * Reduced copy path
948 		 */
949 		uint32_t copy_threshold, minsize;
950 		int ret;
951 
952 		/*
953 		 * The sl_access_state_lock will be held shared
954 		 * for the entire request and released when all
955 		 * dbufs have completed.
956 		 */
957 		rw_enter(&sl->sl_access_state_lock, RW_READER);
958 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
959 			rw_exit(&sl->sl_access_state_lock);
960 			sbd_ats_remove_by_task(task);
961 			stmf_scsilib_send_status(task, STATUS_CHECK,
962 			    STMF_SAA_READ_ERROR);
963 			return;
964 		}
965 
966 		/*
967 		 * Check if setup is more expensive than copying the data.
968 		 *
969 		 * Use the global over-ride sbd_zcopy_threshold if set.
970 		 */
971 		copy_threshold = (sbd_copy_threshold > 0) ?
972 		    sbd_copy_threshold : task->task_copy_threshold;
973 		minsize = len;
974 		if (len < copy_threshold &&
975 		    (dbuf = stmf_alloc_dbuf(task, len, &minsize, 0)) != 0) {
976 
977 			ret = sbd_copy_rdwr(task, laddr, dbuf,
978 			    SBD_CMD_SCSI_READ, 0);
979 			/* done with the backend */
980 			rw_exit(&sl->sl_access_state_lock);
981 			sbd_ats_remove_by_task(task);
982 			if (ret != 0) {
983 				/* backend error */
984 				stmf_scsilib_send_status(task, STATUS_CHECK,
985 				    STMF_SAA_READ_ERROR);
986 			} else {
987 				/* send along good data */
988 				dbuf->db_relative_offset = 0;
989 				dbuf->db_data_size = len;
990 				dbuf->db_flags = DB_SEND_STATUS_GOOD |
991 				    DB_DIRECTION_TO_RPORT;
992 				/* XXX keep for FW? */
993 				DTRACE_PROBE4(sbd__xfer,
994 				    struct scsi_task *, task,
995 				    struct stmf_data_buf *, dbuf,
996 				    uint64_t, laddr, uint32_t, len);
997 				(void) stmf_xfer_data(task, dbuf,
998 				    STMF_IOF_LU_DONE);
999 			}
1000 			return;
1001 		}
1002 
1003 		/* committed to reduced copy */
1004 		if (task->task_lu_private) {
1005 			scmd = (sbd_cmd_t *)task->task_lu_private;
1006 		} else {
1007 			scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
1008 			    KM_SLEEP);
1009 			task->task_lu_private = scmd;
1010 		}
1011 		/*
1012 		 * Setup scmd to track read progress.
1013 		 */
1014 		scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED;
1015 		scmd->cmd_type = SBD_CMD_SCSI_READ;
1016 		scmd->nbufs = 0;
1017 		scmd->addr = laddr;
1018 		scmd->len = len;
1019 		scmd->current_ro = 0;
1020 		/*
1021 		 * Kick-off the read.
1022 		 */
1023 		sbd_do_sgl_read_xfer(task, scmd, 1);
1024 		return;
1025 	}
1026 
1027 	if (initial_dbuf == NULL) {
1028 		uint32_t maxsize, minsize, old_minsize;
1029 
1030 		maxsize = (len > (128*1024)) ? 128*1024 : len;
1031 		minsize = maxsize >> 2;
1032 		do {
1033 			old_minsize = minsize;
1034 			initial_dbuf = stmf_alloc_dbuf(task, maxsize,
1035 			    &minsize, 0);
1036 		} while ((initial_dbuf == NULL) && (old_minsize > minsize) &&
1037 		    (minsize >= 512));
1038 		if (initial_dbuf == NULL) {
1039 			sbd_ats_remove_by_task(task);
1040 			stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1041 			return;
1042 		}
1043 	}
1044 	dbuf = initial_dbuf;
1045 
1046 	if ((dbuf->db_buf_size >= len) && fast_path &&
1047 	    (dbuf->db_sglist_length == 1)) {
1048 		if (sbd_data_read(sl, task, laddr, (uint64_t)len,
1049 		    dbuf->db_sglist[0].seg_addr) == STMF_SUCCESS) {
1050 			dbuf->db_relative_offset = 0;
1051 			dbuf->db_data_size = len;
1052 			dbuf->db_flags = DB_SEND_STATUS_GOOD |
1053 			    DB_DIRECTION_TO_RPORT;
1054 			/* XXX keep for FW? */
1055 			DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
1056 			    struct stmf_data_buf *, dbuf,
1057 			    uint64_t, laddr, uint32_t, len);
1058 			(void) stmf_xfer_data(task, dbuf, STMF_IOF_LU_DONE);
1059 		} else {
1060 			stmf_scsilib_send_status(task, STATUS_CHECK,
1061 			    STMF_SAA_READ_ERROR);
1062 		}
1063 		sbd_ats_remove_by_task(task);
1064 		return;
1065 	}
1066 
1067 	if (task->task_lu_private) {
1068 		scmd = (sbd_cmd_t *)task->task_lu_private;
1069 	} else {
1070 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1071 		task->task_lu_private = scmd;
1072 	}
1073 	scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED;
1074 	scmd->cmd_type = SBD_CMD_SCSI_READ;
1075 	scmd->nbufs = 1;
1076 	scmd->addr = laddr;
1077 	scmd->len = len;
1078 	scmd->current_ro = 0;
1079 
1080 	sbd_do_read_xfer(task, scmd, dbuf);
1081 }
1082 
1083 void
1084 sbd_do_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
1085     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
1086 {
1087 	uint32_t len;
1088 	int bufs_to_take;
1089 
1090 	if (ATOMIC32_GET(scmd->len) == 0) {
1091 		goto DO_WRITE_XFER_DONE;
1092 	}
1093 
1094 	/* Lets try not to hog all the buffers the port has. */
1095 	bufs_to_take = ((task->task_max_nbufs > 2) &&
1096 	    (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
1097 	    task->task_max_nbufs;
1098 
1099 	if ((dbuf != NULL) &&
1100 	    ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
1101 		/* free current dbuf and allocate a new one */
1102 		stmf_free_dbuf(task, dbuf);
1103 		dbuf = NULL;
1104 	}
1105 	if (ATOMIC8_GET(scmd->nbufs) >= bufs_to_take) {
1106 		goto DO_WRITE_XFER_DONE;
1107 	}
1108 	if (dbuf == NULL) {
1109 		uint32_t maxsize, minsize, old_minsize;
1110 
1111 		maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128*1024 :
1112 		    ATOMIC32_GET(scmd->len);
1113 		minsize = maxsize >> 2;
1114 		do {
1115 			old_minsize = minsize;
1116 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
1117 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
1118 		    (minsize >= 512));
1119 		if (dbuf == NULL) {
1120 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
1121 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1122 				    STMF_ALLOC_FAILURE, NULL);
1123 			}
1124 			return;
1125 		}
1126 	}
1127 
1128 	len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ? dbuf->db_buf_size :
1129 	    ATOMIC32_GET(scmd->len);
1130 
1131 	dbuf->db_relative_offset = scmd->current_ro;
1132 	dbuf->db_data_size = len;
1133 	dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
1134 	(void) stmf_xfer_data(task, dbuf, 0);
1135 	/* outstanding port xfers and bufs used */
1136 	atomic_inc_8(&scmd->nbufs);
1137 	atomic_add_32(&scmd->len, -len);
1138 	scmd->current_ro += len;
1139 
1140 	if ((ATOMIC32_GET(scmd->len) != 0) &&
1141 	    (ATOMIC8_GET(scmd->nbufs) < bufs_to_take)) {
1142 		sbd_do_write_xfer(task, scmd, NULL, 0);
1143 	}
1144 	return;
1145 
1146 DO_WRITE_XFER_DONE:
1147 	if (dbuf != NULL) {
1148 		stmf_free_dbuf(task, dbuf);
1149 	}
1150 }
1151 
1152 void
1153 sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
1154 {
1155 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1156 	sbd_zvol_io_t *zvio;
1157 	int ret;
1158 	uint32_t xfer_len, max_len, first_len;
1159 	stmf_status_t xstat;
1160 	stmf_data_buf_t *dbuf;
1161 	uint_t nblks;
1162 	uint64_t blksize = sl->sl_blksize;
1163 	uint64_t offset;
1164 	size_t db_private_sz;
1165 	uintptr_t pad;
1166 
1167 	ASSERT(rw_read_held(&sl->sl_access_state_lock));
1168 	ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
1169 
1170 	/*
1171 	 * Calculate the limits on xfer_len to the minimum of :
1172 	 *    - task limit
1173 	 *    - lun limit
1174 	 *    - sbd global limit if set
1175 	 *    - first xfer limit if set
1176 	 *
1177 	 * First, protect against silly over-ride value
1178 	 */
1179 	if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
1180 		cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
1181 		    sbd_max_xfer_len);
1182 		sbd_max_xfer_len = 0;
1183 	}
1184 	if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
1185 		cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
1186 		    sbd_1st_xfer_len);
1187 		sbd_1st_xfer_len = 0;
1188 	}
1189 
1190 	max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
1191 	if (sbd_max_xfer_len)
1192 		max_len = MIN(max_len, sbd_max_xfer_len);
1193 	/*
1194 	 * Special case the first xfer if hints are set.
1195 	 */
1196 	if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
1197 		/* global over-ride has precedence */
1198 		if (sbd_1st_xfer_len)
1199 			first_len = sbd_1st_xfer_len;
1200 		else
1201 			first_len = task->task_1st_xfer_len;
1202 	} else {
1203 		first_len = 0;
1204 	}
1205 
1206 
1207 	while (ATOMIC32_GET(scmd->len) &&
1208 	    ATOMIC8_GET(scmd->nbufs) < task->task_max_nbufs) {
1209 		xfer_len = MIN(max_len, ATOMIC32_GET(scmd->len));
1210 		if (first_len) {
1211 			xfer_len = MIN(xfer_len, first_len);
1212 			first_len = 0;
1213 		}
1214 		if (xfer_len < ATOMIC32_GET(scmd->len)) {
1215 			/*
1216 			 * Attempt to end xfer on a block boundary.
1217 			 * The only way this does not happen is if the
1218 			 * xfer_len is small enough to stay contained
1219 			 * within the same block.
1220 			 */
1221 			uint64_t xfer_offset, xfer_aligned_end;
1222 
1223 			xfer_offset = scmd->addr + scmd->current_ro;
1224 			xfer_aligned_end =
1225 			    P2ALIGN(xfer_offset+xfer_len, blksize);
1226 			if (xfer_aligned_end > xfer_offset)
1227 				xfer_len = xfer_aligned_end - xfer_offset;
1228 		}
1229 		/*
1230 		 * Allocate object to track the write and reserve
1231 		 * enough space for scatter/gather list.
1232 		 */
1233 		offset = scmd->addr + scmd->current_ro;
1234 		nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
1235 		db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
1236 		    (nblks * sizeof (stmf_sglist_ent_t));
1237 		dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
1238 		    AF_DONTZERO);
1239 
1240 		/*
1241 		 * Setup the dbuf
1242 		 *
1243 		 * XXX Framework does not handle variable length sglists
1244 		 * properly, so setup db_lu_private and db_port_private
1245 		 * fields here. db_stmf_private is properly set for
1246 		 * calls to stmf_free.
1247 		 */
1248 		if (dbuf->db_port_private == NULL) {
1249 			/*
1250 			 * XXX Framework assigns space to PP after db_sglist[0]
1251 			 */
1252 			cmn_err(CE_PANIC, "db_port_private == NULL");
1253 		}
1254 		pad = (uintptr_t)&dbuf->db_sglist[nblks];
1255 		dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
1256 		dbuf->db_port_private = NULL;
1257 		dbuf->db_buf_size = xfer_len;
1258 		dbuf->db_data_size = xfer_len;
1259 		dbuf->db_relative_offset = scmd->current_ro;
1260 		dbuf->db_sglist_length = (uint16_t)nblks;
1261 		dbuf->db_xfer_status = 0;
1262 		dbuf->db_handle = 0;
1263 		dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
1264 		    DB_DIRECTION_FROM_RPORT | DB_LU_DATA_BUF);
1265 
1266 		zvio = dbuf->db_lu_private;
1267 		zvio->zvio_offset = offset;
1268 
1269 		/* get the buffers */
1270 		ret = sbd_zvol_alloc_write_bufs(sl, dbuf);
1271 		if (ret != 0) {
1272 			/*
1273 			 * Could not allocate buffers from the backend;
1274 			 * treat it like an IO error.
1275 			 */
1276 			stmf_free(dbuf);
1277 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1278 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
1279 				/*
1280 				 * Nothing queued, so no completions coming
1281 				 */
1282 				sbd_ats_remove_by_task(task);
1283 				stmf_scsilib_send_status(task, STATUS_CHECK,
1284 				    STMF_SAA_WRITE_ERROR);
1285 				rw_exit(&sl->sl_access_state_lock);
1286 			}
1287 			/*
1288 			 * Completions of previous buffers will cleanup.
1289 			 */
1290 			return;
1291 		}
1292 
1293 		/*
1294 		 * Allow PP to do setup
1295 		 */
1296 		xstat = stmf_setup_dbuf(task, dbuf, 0);
1297 		if (xstat != STMF_SUCCESS) {
1298 			/*
1299 			 * This could happen if the driver cannot get the
1300 			 * DDI resources it needs for this request.
1301 			 * If other dbufs are queued, try again when the next
1302 			 * one completes, otherwise give up.
1303 			 */
1304 			sbd_zvol_rele_write_bufs_abort(sl, dbuf);
1305 			stmf_free(dbuf);
1306 			if (ATOMIC8_GET(scmd->nbufs) > 0) {
1307 				/* completion of previous dbuf will retry */
1308 				return;
1309 			}
1310 			/*
1311 			 * Done with this command.
1312 			 */
1313 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1314 			sbd_ats_remove_by_task(task);
1315 			if (first_xfer)
1316 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1317 			else
1318 				stmf_scsilib_send_status(task, STATUS_CHECK,
1319 				    STMF_SAA_WRITE_ERROR);
1320 			rw_exit(&sl->sl_access_state_lock);
1321 			return;
1322 		}
1323 
1324 		/*
1325 		 * dbuf is now queued on task
1326 		 */
1327 		atomic_inc_8(&scmd->nbufs);
1328 
1329 		xstat = stmf_xfer_data(task, dbuf, 0);
1330 		switch (xstat) {
1331 		case STMF_SUCCESS:
1332 			break;
1333 		case STMF_BUSY:
1334 			/*
1335 			 * The dbuf is queued on the task, but unknown
1336 			 * to the PP, thus no completion will occur.
1337 			 */
1338 			sbd_zvol_rele_write_bufs_abort(sl, dbuf);
1339 			stmf_teardown_dbuf(task, dbuf);
1340 			stmf_free(dbuf);
1341 			atomic_dec_8(&scmd->nbufs);
1342 			if (ATOMIC8_GET(scmd->nbufs) > 0) {
1343 				/* completion of previous dbuf will retry */
1344 				return;
1345 			}
1346 			/*
1347 			 * Done with this command.
1348 			 */
1349 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1350 			sbd_ats_remove_by_task(task);
1351 			if (first_xfer)
1352 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1353 			else
1354 				stmf_scsilib_send_status(task, STATUS_CHECK,
1355 				    STMF_SAA_WRITE_ERROR);
1356 			rw_exit(&sl->sl_access_state_lock);
1357 			return;
1358 		case STMF_ABORTED:
1359 			/*
1360 			 * Completion code will cleanup.
1361 			 */
1362 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1363 			return;
1364 		}
1365 		/*
1366 		 * Update the xfer progress.
1367 		 */
1368 		atomic_add_32(&scmd->len, -xfer_len);
1369 		scmd->current_ro += xfer_len;
1370 	}
1371 }
1372 
1373 void
1374 sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
1375     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
1376 {
1377 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1378 	uint64_t laddr;
1379 	uint32_t buflen, iolen;
1380 	int ndx;
1381 	uint8_t op = task->task_cdb[0];
1382 	boolean_t fua_bit = B_FALSE;
1383 
1384 	if (ATOMIC8_GET(scmd->nbufs) > 0) {
1385 		/*
1386 		 * Decrement the count to indicate the port xfer
1387 		 * into the dbuf has completed even though the buf is
1388 		 * still in use here in the LU provider.
1389 		 */
1390 		atomic_dec_8(&scmd->nbufs);
1391 	}
1392 
1393 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
1394 		sbd_ats_remove_by_task(task);
1395 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1396 		    dbuf->db_xfer_status, NULL);
1397 		return;
1398 	}
1399 
1400 	if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1401 		goto WRITE_XFER_DONE;
1402 	}
1403 
1404 	if (ATOMIC32_GET(scmd->len) != 0) {
1405 		/*
1406 		 * Initiate the next port xfer to occur in parallel
1407 		 * with writing this buf.
1408 		 */
1409 		sbd_do_write_xfer(task, scmd, NULL, 0);
1410 	}
1411 
1412 	/*
1413 	 * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
1414 	 * If it is then check for bit 3 being set to indicate if Forced
1415 	 * Unit Access is being requested. If so, we'll bypass the direct
1416 	 * call and handle it in sbd_data_write().
1417 	 */
1418 	if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
1419 	    (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
1420 		fua_bit = B_TRUE;
1421 	}
1422 	laddr = scmd->addr + dbuf->db_relative_offset;
1423 
1424 	/*
1425 	 * If this is going to a zvol, use the direct call to
1426 	 * sbd_zvol_copy_{read,write}. The direct call interface is
1427 	 * restricted to PPs that accept sglists, but that is not required.
1428 	 */
1429 	if (sl->sl_flags & SL_CALL_ZVOL &&
1430 	    (task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF) &&
1431 	    (sbd_zcopy & (4|1)) && !fua_bit) {
1432 		int commit;
1433 
1434 		commit = (ATOMIC32_GET(scmd->len) == 0 &&
1435 		    ATOMIC8_GET(scmd->nbufs) == 0);
1436 		rw_enter(&sl->sl_access_state_lock, RW_READER);
1437 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0 ||
1438 		    sbd_copy_rdwr(task, laddr, dbuf, SBD_CMD_SCSI_WRITE,
1439 		    commit) != STMF_SUCCESS)
1440 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1441 		rw_exit(&sl->sl_access_state_lock);
1442 		buflen = dbuf->db_data_size;
1443 	} else {
1444 		for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
1445 		    (ndx < dbuf->db_sglist_length); ndx++) {
1446 			iolen = min(dbuf->db_data_size - buflen,
1447 			    dbuf->db_sglist[ndx].seg_length);
1448 			if (iolen == 0)
1449 				break;
1450 			if (sbd_data_write(sl, task, laddr, (uint64_t)iolen,
1451 			    dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
1452 				scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1453 				break;
1454 			}
1455 			buflen += iolen;
1456 			laddr += (uint64_t)iolen;
1457 		}
1458 	}
1459 	task->task_nbytes_transferred += buflen;
1460 WRITE_XFER_DONE:
1461 	if (ATOMIC32_GET(scmd->len) == 0 ||
1462 	    scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1463 		stmf_free_dbuf(task, dbuf);
1464 		if (ATOMIC8_GET(scmd->nbufs))
1465 			return;	/* wait for all buffers to complete */
1466 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1467 		sbd_ats_remove_by_task(task);
1468 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1469 			stmf_scsilib_send_status(task, STATUS_CHECK,
1470 			    STMF_SAA_WRITE_ERROR);
1471 		} else {
1472 			/*
1473 			 * If SYNC_WRITE flag is on then we need to flush
1474 			 * cache before sending status.
1475 			 * Note: this may be a no-op because of how
1476 			 * SL_WRITEBACK_CACHE_DISABLE and
1477 			 * SL_FLUSH_ON_DISABLED_WRITECACHE are set, but not
1478 			 * worth code complexity of checking those in this code
1479 			 * path, SBD_SCSI_CMD_SYNC_WRITE is rarely set.
1480 			 */
1481 			if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
1482 			    (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
1483 				stmf_scsilib_send_status(task, STATUS_CHECK,
1484 				    STMF_SAA_WRITE_ERROR);
1485 			} else {
1486 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1487 			}
1488 		}
1489 		return;
1490 	}
1491 	sbd_do_write_xfer(task, scmd, dbuf, dbuf_reusable);
1492 }
1493 
1494 /*
1495  * Return true if copy avoidance is beneficial.
1496  */
1497 static int
1498 sbd_zcopy_write_useful(scsi_task_t *task, uint64_t laddr, uint32_t len,
1499     uint64_t blksize)
1500 {
1501 	/*
1502 	 * If there is a global copy threshold over-ride, use it.
1503 	 * Otherwise use the PP value with the caveat that at least
1504 	 * 1/2 the data must avoid being copied to be useful.
1505 	 */
1506 	if (sbd_copy_threshold > 0) {
1507 		return (len >= sbd_copy_threshold);
1508 	} else {
1509 		uint64_t no_copy_span;
1510 
1511 		/* sub-blocksize writes always copy */
1512 		if (len < task->task_copy_threshold || len < blksize)
1513 			return (0);
1514 		/*
1515 		 * Calculate amount of data that will avoid the copy path.
1516 		 * The calculation is only valid if len >= blksize.
1517 		 */
1518 		no_copy_span = P2ALIGN(laddr+len, blksize) -
1519 		    P2ROUNDUP(laddr, blksize);
1520 		return (no_copy_span >= len/2);
1521 	}
1522 }
1523 
1524 void
1525 sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
1526 {
1527 	uint64_t lba, laddr;
1528 	uint32_t len;
1529 	uint8_t op = task->task_cdb[0], do_immediate_data = 0;
1530 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1531 	sbd_cmd_t *scmd;
1532 	stmf_data_buf_t *dbuf;
1533 	uint64_t blkcount;
1534 	uint8_t	sync_wr_flag = 0;
1535 	boolean_t fua_bit = B_FALSE;
1536 
1537 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
1538 		stmf_scsilib_send_status(task, STATUS_CHECK,
1539 		    STMF_SAA_WRITE_PROTECTED);
1540 		return;
1541 	}
1542 	/*
1543 	 * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
1544 	 * If it is then check for bit 3 being set to indicate if Forced
1545 	 * Unit Access is being requested. If so, we'll bypass the fast path
1546 	 * code to simplify support of this feature.
1547 	 */
1548 	if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
1549 	    (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
1550 		fua_bit = B_TRUE;
1551 	}
1552 	if (op == SCMD_WRITE) {
1553 		lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
1554 		len = (uint32_t)task->task_cdb[4];
1555 
1556 		if (len == 0) {
1557 			len = 256;
1558 		}
1559 	} else if (op == SCMD_WRITE_G1) {
1560 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1561 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1562 	} else if (op == SCMD_WRITE_G5) {
1563 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1564 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
1565 	} else if (op == SCMD_WRITE_G4) {
1566 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
1567 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1568 	} else if (op == SCMD_WRITE_VERIFY) {
1569 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1570 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1571 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1572 	} else if (op == SCMD_WRITE_VERIFY_G5) {
1573 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1574 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
1575 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1576 	} else if (op == SCMD_WRITE_VERIFY_G4) {
1577 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
1578 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1579 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1580 	} else {
1581 		stmf_scsilib_send_status(task, STATUS_CHECK,
1582 		    STMF_SAA_INVALID_OPCODE);
1583 		return;
1584 	}
1585 
1586 	laddr = lba << sl->sl_data_blocksize_shift;
1587 	blkcount = len;
1588 	len <<= sl->sl_data_blocksize_shift;
1589 
1590 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
1591 		stmf_scsilib_send_status(task, STATUS_CHECK,
1592 		    STMF_SAA_LBA_OUT_OF_RANGE);
1593 		return;
1594 	}
1595 
1596 	task->task_cmd_xfer_length = len;
1597 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1598 		task->task_expected_xfer_length = len;
1599 	}
1600 
1601 	len = (len > task->task_expected_xfer_length) ?
1602 	    task->task_expected_xfer_length : len;
1603 
1604 	if (len == 0) {
1605 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1606 		return;
1607 	}
1608 
1609 	if (sbd_ats_handling_before_io(task, sl, lba, blkcount) !=
1610 	    SBD_SUCCESS) {
1611 		if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS) {
1612 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
1613 		}
1614 		return;
1615 	}
1616 
1617 	if (sbd_zcopy & (4|1) &&		/* Debug switch */
1618 	    initial_dbuf == NULL &&		/* No PP buf passed in */
1619 	    sl->sl_flags & SL_CALL_ZVOL &&	/* zvol backing store */
1620 	    (task->task_additional_flags &
1621 	    TASK_AF_ACCEPT_LU_DBUF) &&		/* PP allows it */
1622 	    sbd_zcopy_write_useful(task, laddr, len, sl->sl_blksize) &&
1623 	    !fua_bit) {
1624 
1625 		/*
1626 		 * XXX Note that disallowing initial_dbuf will eliminate
1627 		 * iSCSI from participating. For small writes, that is
1628 		 * probably ok. For large writes, it may be best to just
1629 		 * copy the data from the initial dbuf and use zcopy for
1630 		 * the rest.
1631 		 */
1632 		rw_enter(&sl->sl_access_state_lock, RW_READER);
1633 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
1634 			rw_exit(&sl->sl_access_state_lock);
1635 			sbd_ats_remove_by_task(task);
1636 			stmf_scsilib_send_status(task, STATUS_CHECK,
1637 			    STMF_SAA_READ_ERROR);
1638 			return;
1639 		}
1640 		/*
1641 		 * Setup scmd to track the write progress.
1642 		 */
1643 		if (task->task_lu_private) {
1644 			scmd = (sbd_cmd_t *)task->task_lu_private;
1645 		} else {
1646 			scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
1647 			    KM_SLEEP);
1648 			task->task_lu_private = scmd;
1649 		}
1650 		scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED |
1651 		    sync_wr_flag;
1652 		scmd->cmd_type = SBD_CMD_SCSI_WRITE;
1653 		scmd->nbufs = 0;
1654 		scmd->addr = laddr;
1655 		scmd->len = len;
1656 		scmd->current_ro = 0;
1657 		sbd_do_sgl_write_xfer(task, scmd, 1);
1658 		return;
1659 	}
1660 
1661 	if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
1662 		if (initial_dbuf->db_data_size > len) {
1663 			if (initial_dbuf->db_data_size >
1664 			    task->task_expected_xfer_length) {
1665 				/* protocol error */
1666 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1667 				    STMF_INVALID_ARG, NULL);
1668 				return;
1669 			}
1670 			initial_dbuf->db_data_size = len;
1671 		}
1672 		do_immediate_data = 1;
1673 	}
1674 	dbuf = initial_dbuf;
1675 
1676 	if (task->task_lu_private) {
1677 		scmd = (sbd_cmd_t *)task->task_lu_private;
1678 	} else {
1679 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1680 		task->task_lu_private = scmd;
1681 	}
1682 	scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED |
1683 	    sync_wr_flag;
1684 	scmd->cmd_type = SBD_CMD_SCSI_WRITE;
1685 	scmd->nbufs = 0;
1686 	scmd->addr = laddr;
1687 	scmd->len = len;
1688 	scmd->current_ro = 0;
1689 
1690 	if (do_immediate_data) {
1691 		/*
1692 		 * Account for data passed in this write command
1693 		 */
1694 		(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
1695 		atomic_add_32(&scmd->len, -dbuf->db_data_size);
1696 		scmd->current_ro += dbuf->db_data_size;
1697 		dbuf->db_xfer_status = STMF_SUCCESS;
1698 		sbd_handle_write_xfer_completion(task, scmd, dbuf, 0);
1699 	} else {
1700 		sbd_do_write_xfer(task, scmd, dbuf, 0);
1701 	}
1702 }
1703 
1704 /*
1705  * Utility routine to handle small non performance data transfers to the
1706  * initiators. dbuf is an initial data buf (if any), 'p' points to a data
1707  * buffer which is source of data for transfer, cdb_xfer_size is the
1708  * transfer size based on CDB, cmd_xfer_size is the actual amount of data
1709  * which this command would transfer (the size of data pointed to by 'p').
1710  */
1711 void
1712 sbd_handle_short_read_transfers(scsi_task_t *task, stmf_data_buf_t *dbuf,
1713     uint8_t *p, uint32_t cdb_xfer_size, uint32_t cmd_xfer_size)
1714 {
1715 	uint32_t bufsize, ndx;
1716 	sbd_cmd_t *scmd;
1717 
1718 	cmd_xfer_size = min(cmd_xfer_size, cdb_xfer_size);
1719 
1720 	task->task_cmd_xfer_length = cmd_xfer_size;
1721 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1722 		task->task_expected_xfer_length = cmd_xfer_size;
1723 	} else {
1724 		cmd_xfer_size = min(cmd_xfer_size,
1725 		    task->task_expected_xfer_length);
1726 	}
1727 
1728 	if (cmd_xfer_size == 0) {
1729 		stmf_scsilib_send_status(task, STATUS_CHECK,
1730 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1731 		return;
1732 	}
1733 	if (dbuf == NULL) {
1734 		uint32_t minsize = cmd_xfer_size;
1735 
1736 		dbuf = stmf_alloc_dbuf(task, cmd_xfer_size, &minsize, 0);
1737 	}
1738 	if (dbuf == NULL) {
1739 		stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1740 		return;
1741 	}
1742 
1743 	for (bufsize = 0, ndx = 0; bufsize < cmd_xfer_size; ndx++) {
1744 		uint8_t *d;
1745 		uint32_t s;
1746 
1747 		d = dbuf->db_sglist[ndx].seg_addr;
1748 		s = min((cmd_xfer_size - bufsize),
1749 		    dbuf->db_sglist[ndx].seg_length);
1750 		bcopy(p+bufsize, d, s);
1751 		bufsize += s;
1752 	}
1753 	dbuf->db_relative_offset = 0;
1754 	dbuf->db_data_size = cmd_xfer_size;
1755 	dbuf->db_flags = DB_DIRECTION_TO_RPORT;
1756 
1757 	if (task->task_lu_private == NULL) {
1758 		task->task_lu_private =
1759 		    kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1760 	}
1761 	scmd = (sbd_cmd_t *)task->task_lu_private;
1762 
1763 	scmd->cmd_type = SBD_CMD_SMALL_READ;
1764 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1765 	(void) stmf_xfer_data(task, dbuf, 0);
1766 }
1767 
1768 void
1769 sbd_handle_short_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
1770     struct stmf_data_buf *dbuf)
1771 {
1772 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
1773 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1774 		    dbuf->db_xfer_status, NULL);
1775 		return;
1776 	}
1777 	task->task_nbytes_transferred = dbuf->db_data_size;
1778 	scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1779 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1780 }
1781 
1782 void
1783 sbd_handle_short_write_transfers(scsi_task_t *task,
1784     stmf_data_buf_t *dbuf, uint32_t cdb_xfer_size)
1785 {
1786 	sbd_cmd_t *scmd;
1787 
1788 	task->task_cmd_xfer_length = cdb_xfer_size;
1789 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1790 		task->task_expected_xfer_length = cdb_xfer_size;
1791 	} else {
1792 		cdb_xfer_size = min(cdb_xfer_size,
1793 		    task->task_expected_xfer_length);
1794 	}
1795 
1796 	if (cdb_xfer_size == 0) {
1797 		stmf_scsilib_send_status(task, STATUS_CHECK,
1798 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1799 		return;
1800 	}
1801 	if (task->task_lu_private == NULL) {
1802 		task->task_lu_private = kmem_zalloc(sizeof (sbd_cmd_t),
1803 		    KM_SLEEP);
1804 	} else {
1805 		bzero(task->task_lu_private, sizeof (sbd_cmd_t));
1806 	}
1807 	scmd = (sbd_cmd_t *)task->task_lu_private;
1808 	scmd->cmd_type = SBD_CMD_SMALL_WRITE;
1809 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1810 	scmd->len = cdb_xfer_size;
1811 	if (dbuf == NULL) {
1812 		uint32_t minsize = cdb_xfer_size;
1813 
1814 		dbuf = stmf_alloc_dbuf(task, cdb_xfer_size, &minsize, 0);
1815 		if (dbuf == NULL) {
1816 			stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1817 			    STMF_ALLOC_FAILURE, NULL);
1818 			return;
1819 		}
1820 		dbuf->db_data_size = cdb_xfer_size;
1821 		dbuf->db_relative_offset = 0;
1822 		dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
1823 		(void) stmf_xfer_data(task, dbuf, 0);
1824 	} else {
1825 		if (dbuf->db_data_size < cdb_xfer_size) {
1826 			stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1827 			    STMF_ABORTED, NULL);
1828 			return;
1829 		}
1830 		dbuf->db_data_size = cdb_xfer_size;
1831 		sbd_handle_short_write_xfer_completion(task, dbuf);
1832 	}
1833 }
1834 
1835 void
1836 sbd_handle_short_write_xfer_completion(scsi_task_t *task,
1837     stmf_data_buf_t *dbuf)
1838 {
1839 	sbd_cmd_t *scmd;
1840 	stmf_status_t st_ret;
1841 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1842 
1843 	/*
1844 	 * For now lets assume we will get only one sglist element
1845 	 * for short writes. If that ever changes, we should allocate
1846 	 * a local buffer and copy all the sg elements to one linear space.
1847 	 */
1848 	if ((dbuf->db_xfer_status != STMF_SUCCESS) ||
1849 	    (dbuf->db_sglist_length > 1)) {
1850 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1851 		    dbuf->db_xfer_status, NULL);
1852 		return;
1853 	}
1854 
1855 	task->task_nbytes_transferred = dbuf->db_data_size;
1856 	scmd = (sbd_cmd_t *)task->task_lu_private;
1857 	scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1858 
1859 	/* Lets find out who to call */
1860 	switch (task->task_cdb[0]) {
1861 	case SCMD_MODE_SELECT:
1862 	case SCMD_MODE_SELECT_G1:
1863 		if (sl->sl_access_state == SBD_LU_STANDBY) {
1864 			st_ret = stmf_proxy_scsi_cmd(task, dbuf);
1865 			if (st_ret != STMF_SUCCESS) {
1866 				stmf_scsilib_send_status(task, STATUS_CHECK,
1867 				    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
1868 			}
1869 		} else {
1870 			sbd_handle_mode_select_xfer(task,
1871 			    dbuf->db_sglist[0].seg_addr, dbuf->db_data_size);
1872 		}
1873 		break;
1874 	case SCMD_UNMAP:
1875 		sbd_handle_unmap_xfer(task,
1876 		    dbuf->db_sglist[0].seg_addr, dbuf->db_data_size);
1877 		break;
1878 	case SCMD_EXTENDED_COPY:
1879 		sbd_handle_xcopy_xfer(task, dbuf->db_sglist[0].seg_addr);
1880 		break;
1881 	case SCMD_PERSISTENT_RESERVE_OUT:
1882 		if (sl->sl_access_state == SBD_LU_STANDBY) {
1883 			st_ret = stmf_proxy_scsi_cmd(task, dbuf);
1884 			if (st_ret != STMF_SUCCESS) {
1885 				stmf_scsilib_send_status(task, STATUS_CHECK,
1886 				    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
1887 			}
1888 		} else {
1889 			sbd_handle_pgr_out_data(task, dbuf);
1890 		}
1891 		break;
1892 	default:
1893 		/* This should never happen */
1894 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1895 		    STMF_ABORTED, NULL);
1896 	}
1897 }
1898 
1899 void
1900 sbd_handle_read_capacity(struct scsi_task *task,
1901     struct stmf_data_buf *initial_dbuf)
1902 {
1903 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1904 	uint32_t cdb_len;
1905 	uint8_t p[32];
1906 	uint64_t s;
1907 	uint16_t blksize;
1908 
1909 	s = sl->sl_lu_size >> sl->sl_data_blocksize_shift;
1910 	s--;
1911 	blksize = ((uint16_t)1) << sl->sl_data_blocksize_shift;
1912 
1913 	switch (task->task_cdb[0]) {
1914 	case SCMD_READ_CAPACITY:
1915 		if (s & 0xffffffff00000000ull) {
1916 			p[0] = p[1] = p[2] = p[3] = 0xFF;
1917 		} else {
1918 			p[0] = (s >> 24) & 0xff;
1919 			p[1] = (s >> 16) & 0xff;
1920 			p[2] = (s >> 8) & 0xff;
1921 			p[3] = s & 0xff;
1922 		}
1923 		p[4] = 0; p[5] = 0;
1924 		p[6] = (blksize >> 8) & 0xff;
1925 		p[7] = blksize & 0xff;
1926 		sbd_handle_short_read_transfers(task, initial_dbuf, p, 8, 8);
1927 		break;
1928 
1929 	case SCMD_SVC_ACTION_IN_G4:
1930 		cdb_len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1931 		bzero(p, 32);
1932 		p[0] = (s >> 56) & 0xff;
1933 		p[1] = (s >> 48) & 0xff;
1934 		p[2] = (s >> 40) & 0xff;
1935 		p[3] = (s >> 32) & 0xff;
1936 		p[4] = (s >> 24) & 0xff;
1937 		p[5] = (s >> 16) & 0xff;
1938 		p[6] = (s >> 8) & 0xff;
1939 		p[7] = s & 0xff;
1940 		p[10] = (blksize >> 8) & 0xff;
1941 		p[11] = blksize & 0xff;
1942 		if (sl->sl_flags & SL_UNMAP_ENABLED) {
1943 			p[14] = 0x80;
1944 		}
1945 		sbd_handle_short_read_transfers(task, initial_dbuf, p,
1946 		    cdb_len, 32);
1947 		break;
1948 	}
1949 }
1950 
1951 void
1952 sbd_calc_geometry(uint64_t s, uint16_t blksize, uint8_t *nsectors,
1953     uint8_t *nheads, uint32_t *ncyl)
1954 {
1955 	if (s < (4ull * 1024ull * 1024ull * 1024ull)) {
1956 		*nsectors = 32;
1957 		*nheads = 8;
1958 	} else {
1959 		*nsectors = 254;
1960 		*nheads = 254;
1961 	}
1962 	*ncyl = s / ((uint64_t)blksize * (uint64_t)(*nsectors) *
1963 	    (uint64_t)(*nheads));
1964 }
1965 
1966 void
1967 sbd_handle_mode_sense(struct scsi_task *task,
1968     struct stmf_data_buf *initial_dbuf, uint8_t *buf)
1969 {
1970 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1971 	uint32_t cmd_size, n;
1972 	uint8_t *cdb;
1973 	uint32_t ncyl;
1974 	uint8_t nsectors, nheads;
1975 	uint8_t page, ctrl, header_size;
1976 	uint16_t nbytes;
1977 	uint8_t *p;
1978 	uint64_t s = sl->sl_lu_size;
1979 	uint32_t dev_spec_param_offset;
1980 
1981 	p = buf;	/* buf is assumed to be zeroed out and large enough */
1982 	n = 0;
1983 	cdb = &task->task_cdb[0];
1984 	page = cdb[2] & 0x3F;
1985 	ctrl = (cdb[2] >> 6) & 3;
1986 
1987 	if (cdb[0] == SCMD_MODE_SENSE) {
1988 		cmd_size = cdb[4];
1989 		header_size = 4;
1990 		dev_spec_param_offset = 2;
1991 	} else {
1992 		cmd_size = READ_SCSI16(&cdb[7], uint32_t);
1993 		header_size = 8;
1994 		dev_spec_param_offset = 3;
1995 	}
1996 
1997 	/* Now validate the command */
1998 	if ((cdb[2] != 0) && (page != MODEPAGE_ALLPAGES) &&
1999 	    (page != MODEPAGE_CACHING) && (page != MODEPAGE_CTRL_MODE) &&
2000 	    (page != MODEPAGE_FORMAT) && (page != MODEPAGE_GEOMETRY)) {
2001 		stmf_scsilib_send_status(task, STATUS_CHECK,
2002 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2003 		return;
2004 	}
2005 
2006 	/* We will update the length in the mode header at the end */
2007 
2008 	/* Block dev device specific param in mode param header has wp bit */
2009 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
2010 		p[n + dev_spec_param_offset] = BIT_7;
2011 	}
2012 	n += header_size;
2013 	/* We are not going to return any block descriptor */
2014 
2015 	nbytes = ((uint16_t)1) << sl->sl_data_blocksize_shift;
2016 	sbd_calc_geometry(s, nbytes, &nsectors, &nheads, &ncyl);
2017 
2018 	if ((page == MODEPAGE_FORMAT) || (page == MODEPAGE_ALLPAGES)) {
2019 		p[n] = 0x03;
2020 		p[n+1] = 0x16;
2021 		if (ctrl != 1) {
2022 			p[n + 11] = nsectors;
2023 			p[n + 12] = nbytes >> 8;
2024 			p[n + 13] = nbytes & 0xff;
2025 			p[n + 20] = 0x80;
2026 		}
2027 		n += 24;
2028 	}
2029 	if ((page == MODEPAGE_GEOMETRY) || (page == MODEPAGE_ALLPAGES)) {
2030 		p[n] = 0x04;
2031 		p[n + 1] = 0x16;
2032 		if (ctrl != 1) {
2033 			p[n + 2] = ncyl >> 16;
2034 			p[n + 3] = ncyl >> 8;
2035 			p[n + 4] = ncyl & 0xff;
2036 			p[n + 5] = nheads;
2037 			p[n + 20] = 0x15;
2038 			p[n + 21] = 0x18;
2039 		}
2040 		n += 24;
2041 	}
2042 	if ((page == MODEPAGE_CACHING) || (page == MODEPAGE_ALLPAGES)) {
2043 		struct mode_caching *mode_caching_page;
2044 
2045 		mode_caching_page = (struct mode_caching *)&p[n];
2046 
2047 		mode_caching_page->mode_page.code = MODEPAGE_CACHING;
2048 		mode_caching_page->mode_page.ps = 1; /* A saveable page */
2049 		mode_caching_page->mode_page.length = 0x12;
2050 
2051 		switch (ctrl) {
2052 		case (0):
2053 			/* Current */
2054 			if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) == 0) {
2055 				mode_caching_page->wce = 1;
2056 			}
2057 			break;
2058 
2059 		case (1):
2060 			/* Changeable */
2061 			if ((sl->sl_flags &
2062 			    SL_WRITEBACK_CACHE_SET_UNSUPPORTED) == 0) {
2063 				mode_caching_page->wce = 1;
2064 			}
2065 			break;
2066 
2067 		default:
2068 			if ((sl->sl_flags &
2069 			    SL_SAVED_WRITE_CACHE_DISABLE) == 0) {
2070 				mode_caching_page->wce = 1;
2071 			}
2072 			break;
2073 		}
2074 		n += (sizeof (struct mode_page) +
2075 		    mode_caching_page->mode_page.length);
2076 	}
2077 	if ((page == MODEPAGE_CTRL_MODE) || (page == MODEPAGE_ALLPAGES)) {
2078 		struct mode_control_scsi3 *mode_control_page;
2079 
2080 		mode_control_page = (struct mode_control_scsi3 *)&p[n];
2081 
2082 		mode_control_page->mode_page.code = MODEPAGE_CTRL_MODE;
2083 		mode_control_page->mode_page.length =
2084 		    PAGELENGTH_MODE_CONTROL_SCSI3;
2085 		if (ctrl != 1) {
2086 			/* If not looking for changeable values, report this. */
2087 			mode_control_page->que_mod = CTRL_QMOD_UNRESTRICT;
2088 		}
2089 		n += (sizeof (struct mode_page) +
2090 		    mode_control_page->mode_page.length);
2091 	}
2092 
2093 	if (cdb[0] == SCMD_MODE_SENSE) {
2094 		if (n > 255) {
2095 			stmf_scsilib_send_status(task, STATUS_CHECK,
2096 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2097 			return;
2098 		}
2099 		/*
2100 		 * Mode parameter header length doesn't include the number
2101 		 * of bytes in the length field, so adjust the count.
2102 		 * Byte count minus header length field size.
2103 		 */
2104 		buf[0] = (n - header_size) & 0xff;
2105 	} else {
2106 		/* Byte count minus header length field size. */
2107 		buf[1] = (n - header_size) & 0xff;
2108 		buf[0] = ((n - header_size) >> 8) & 0xff;
2109 	}
2110 
2111 	sbd_handle_short_read_transfers(task, initial_dbuf, buf,
2112 	    cmd_size, n);
2113 }
2114 
2115 void
2116 sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf)
2117 {
2118 	uint32_t cmd_xfer_len;
2119 
2120 	if (task->task_cdb[0] == SCMD_MODE_SELECT) {
2121 		cmd_xfer_len = (uint32_t)task->task_cdb[4];
2122 	} else {
2123 		cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t);
2124 	}
2125 
2126 	if ((task->task_cdb[1] & 0xFE) != 0x10) {
2127 		stmf_scsilib_send_status(task, STATUS_CHECK,
2128 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2129 		return;
2130 	}
2131 
2132 	if (cmd_xfer_len == 0) {
2133 		/* zero byte mode selects are allowed */
2134 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2135 		return;
2136 	}
2137 
2138 	sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
2139 }
2140 
2141 void
2142 sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen)
2143 {
2144 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2145 	sbd_it_data_t *it;
2146 	int hdr_len, bd_len;
2147 	sbd_status_t sret;
2148 	int i;
2149 
2150 	if (task->task_cdb[0] == SCMD_MODE_SELECT) {
2151 		hdr_len = 4;
2152 	} else {
2153 		hdr_len = 8;
2154 	}
2155 
2156 	if (buflen < hdr_len)
2157 		goto mode_sel_param_len_err;
2158 
2159 	bd_len = hdr_len == 4 ? buf[3] : READ_SCSI16(&buf[6], int);
2160 
2161 	if (buflen < (hdr_len + bd_len + 2))
2162 		goto mode_sel_param_len_err;
2163 
2164 	buf += hdr_len + bd_len;
2165 	buflen -= hdr_len + bd_len;
2166 
2167 	if ((buf[0] != 8) || (buflen != ((uint32_t)buf[1] + 2))) {
2168 		goto mode_sel_param_len_err;
2169 	}
2170 
2171 	if (buf[2] & 0xFB) {
2172 		goto mode_sel_param_field_err;
2173 	}
2174 
2175 	for (i = 3; i < (buf[1] + 2); i++) {
2176 		if (buf[i]) {
2177 			goto mode_sel_param_field_err;
2178 		}
2179 	}
2180 
2181 	sret = SBD_SUCCESS;
2182 
2183 	/* All good. Lets handle the write cache change, if any */
2184 	if (buf[2] & BIT_2) {
2185 		sret = sbd_wcd_set(0, sl);
2186 	} else {
2187 		sret = sbd_wcd_set(1, sl);
2188 	}
2189 
2190 	if (sret != SBD_SUCCESS) {
2191 		stmf_scsilib_send_status(task, STATUS_CHECK,
2192 		    STMF_SAA_WRITE_ERROR);
2193 		return;
2194 	}
2195 
2196 	/* set on the device passed, now set the flags */
2197 	mutex_enter(&sl->sl_lock);
2198 	if (buf[2] & BIT_2) {
2199 		sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
2200 	} else {
2201 		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
2202 	}
2203 
2204 	for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
2205 		if (it == task->task_lu_itl_handle)
2206 			continue;
2207 		it->sbd_it_ua_conditions |= SBD_UA_MODE_PARAMETERS_CHANGED;
2208 	}
2209 
2210 	if (task->task_cdb[1] & 1) {
2211 		if (buf[2] & BIT_2) {
2212 			sl->sl_flags &= ~SL_SAVED_WRITE_CACHE_DISABLE;
2213 		} else {
2214 			sl->sl_flags |= SL_SAVED_WRITE_CACHE_DISABLE;
2215 		}
2216 		mutex_exit(&sl->sl_lock);
2217 		sret = sbd_write_lu_info(sl);
2218 	} else {
2219 		mutex_exit(&sl->sl_lock);
2220 	}
2221 	if (sret == SBD_SUCCESS) {
2222 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2223 	} else {
2224 		stmf_scsilib_send_status(task, STATUS_CHECK,
2225 		    STMF_SAA_WRITE_ERROR);
2226 	}
2227 	return;
2228 
2229 mode_sel_param_len_err:
2230 	stmf_scsilib_send_status(task, STATUS_CHECK,
2231 	    STMF_SAA_PARAM_LIST_LENGTH_ERROR);
2232 	return;
2233 mode_sel_param_field_err:
2234 	stmf_scsilib_send_status(task, STATUS_CHECK,
2235 	    STMF_SAA_INVALID_FIELD_IN_PARAM_LIST);
2236 }
2237 
2238 /*
2239  * Command support added from SPC-4 r24
2240  * Supports info type 0, 2, 127
2241  */
2242 void
2243 sbd_handle_identifying_info(struct scsi_task *task,
2244     stmf_data_buf_t *initial_dbuf)
2245 {
2246 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2247 	uint8_t *cdb;
2248 	uint32_t cmd_size;
2249 	uint32_t param_len;
2250 	uint32_t xfer_size;
2251 	uint8_t info_type;
2252 	uint8_t *buf, *p;
2253 
2254 	cdb = &task->task_cdb[0];
2255 	cmd_size = READ_SCSI32(&cdb[6], uint32_t);
2256 	info_type = cdb[10]>>1;
2257 
2258 	/* Validate the command */
2259 	if (cmd_size < 4) {
2260 		stmf_scsilib_send_status(task, STATUS_CHECK,
2261 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2262 		return;
2263 	}
2264 
2265 	p = buf = kmem_zalloc(260, KM_SLEEP);
2266 
2267 	switch (info_type) {
2268 		case 0:
2269 			/*
2270 			 * No value is supplied but this info type
2271 			 * is mandatory.
2272 			 */
2273 			xfer_size = 4;
2274 			break;
2275 		case 2:
2276 			mutex_enter(&sl->sl_lock);
2277 			param_len = strlcpy((char *)(p+4), sl->sl_alias, 256);
2278 			mutex_exit(&sl->sl_lock);
2279 			/* text info must be null terminated */
2280 			if (++param_len > 256)
2281 				param_len = 256;
2282 			SCSI_WRITE16(p+2, param_len);
2283 			xfer_size = param_len + 4;
2284 			break;
2285 		case 127:
2286 			/* 0 and 2 descriptor supported */
2287 			SCSI_WRITE16(p+2, 8); /* set param length */
2288 			p += 8;
2289 			*p = 4; /* set type to 2 (7 hi bits) */
2290 			p += 2;
2291 			SCSI_WRITE16(p, 256); /* 256 max length */
2292 			xfer_size = 12;
2293 			break;
2294 		default:
2295 			stmf_scsilib_send_status(task, STATUS_CHECK,
2296 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2297 			kmem_free(buf, 260);
2298 			return;
2299 	}
2300 	sbd_handle_short_read_transfers(task, initial_dbuf, buf,
2301 	    cmd_size, xfer_size);
2302 	kmem_free(buf, 260);
2303 }
2304 
2305 /*
2306  * This function parse through a string, passed to it as a pointer to a string,
2307  * by adjusting the pointer to the first non-space character and returns
2308  * the count/length of the first bunch of non-space characters. Multiple
2309  * Management URLs are stored as a space delimited string in sl_mgmt_url
2310  * field of sbd_lu_t. This function is used to retrieve one url at a time.
2311  *
2312  * i/p : pointer to pointer to a url string
2313  * o/p : Adjust the pointer to the url to the first non white character
2314  *       and returns the length of the URL
2315  */
2316 uint16_t
2317 sbd_parse_mgmt_url(char **url_addr)
2318 {
2319 	uint16_t url_length = 0;
2320 	char *url;
2321 	url = *url_addr;
2322 
2323 	while (*url != '\0') {
2324 		if (*url == ' ' || *url == '\t' || *url == '\n') {
2325 			(*url_addr)++;
2326 			url = *url_addr;
2327 		} else {
2328 			break;
2329 		}
2330 	}
2331 
2332 	while (*url != '\0') {
2333 		if (*url == ' ' || *url == '\t' ||
2334 		    *url == '\n' || *url == '\0') {
2335 			break;
2336 		}
2337 		url++;
2338 		url_length++;
2339 	}
2340 	return (url_length);
2341 }
2342 
2343 /* Try to make this the size of a kmem allocation cache. */
2344 static uint_t sbd_write_same_optimal_chunk = 128 * 1024;
2345 
2346 static sbd_status_t
2347 sbd_write_same_data(struct scsi_task *task, sbd_cmd_t *scmd)
2348 {
2349 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2350 	uint64_t addr, len, sz_done;
2351 	uint32_t big_buf_size, xfer_size, off;
2352 	uint8_t *big_buf;
2353 	sbd_status_t ret;
2354 
2355 	if (task->task_cdb[0] == SCMD_WRITE_SAME_G1) {
2356 		addr = READ_SCSI32(&task->task_cdb[2], uint64_t);
2357 		len = READ_SCSI16(&task->task_cdb[7], uint64_t);
2358 	} else {
2359 		addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
2360 		len = READ_SCSI32(&task->task_cdb[10], uint64_t);
2361 	}
2362 	addr <<= sl->sl_data_blocksize_shift;
2363 	len <<= sl->sl_data_blocksize_shift;
2364 
2365 	/*
2366 	 * Reminders:
2367 	 *    "len" is total size of what we wish to "write same".
2368 	 *
2369 	 *    xfer_size will be scmd->trans_data_len, which is the length
2370 	 *    of the pattern we wish to replicate over "len".  We replicate
2371 	 *    "xfer_size" of pattern over "len".
2372 	 *
2373 	 *    big_buf_size is set to an ideal actual-write size for an output
2374 	 *    operation.  It may be the same as "len".  If it's not, it should
2375 	 *    be an exact multiple of "xfer_size" so we don't get pattern
2376 	 *    breakage until the very end of "len".
2377 	 */
2378 	big_buf_size = len > sbd_write_same_optimal_chunk ?
2379 	    sbd_write_same_optimal_chunk : (uint32_t)len;
2380 	xfer_size = scmd->trans_data_len;
2381 
2382 	/*
2383 	 * All transfers should be an integral multiple of the sector size.
2384 	 */
2385 	ASSERT((big_buf_size % xfer_size) == 0);
2386 
2387 	/*
2388 	 * Don't sleep for the allocation, and don't make the system
2389 	 * reclaim memory.  Trade higher I/Os if in a low-memory situation.
2390 	 */
2391 	big_buf = kmem_alloc(big_buf_size, KM_NOSLEEP | KM_NORMALPRI);
2392 
2393 	if (big_buf == NULL) {
2394 		/*
2395 		 * Just send it in terms of of the transmitted data.  This
2396 		 * will be very slow.
2397 		 */
2398 		DTRACE_PROBE1(write__same__low__memory, uint64_t, big_buf_size);
2399 		big_buf = scmd->trans_data;
2400 		big_buf_size = scmd->trans_data_len;
2401 	} else {
2402 		/*
2403 		 * We already ASSERT()ed big_buf_size is an integral multiple
2404 		 * of xfer_size.
2405 		 */
2406 		for (off = 0; off < big_buf_size; off += xfer_size)
2407 			bcopy(scmd->trans_data, big_buf + off, xfer_size);
2408 	}
2409 
2410 	/* Do the actual I/O.  Recycle xfer_size now to be write size. */
2411 	DTRACE_PROBE1(write__same__io__begin, uint64_t, len);
2412 	for (sz_done = 0; sz_done < len; sz_done += (uint64_t)xfer_size) {
2413 		xfer_size = ((big_buf_size + sz_done) <= len) ? big_buf_size :
2414 		    len - sz_done;
2415 		ret = sbd_data_write(sl, task, addr + sz_done,
2416 		    (uint64_t)xfer_size, big_buf);
2417 		if (ret != SBD_SUCCESS)
2418 			break;
2419 	}
2420 	DTRACE_PROBE2(write__same__io__end, uint64_t, len, uint64_t, sz_done);
2421 
2422 	if (big_buf != scmd->trans_data)
2423 		kmem_free(big_buf, big_buf_size);
2424 
2425 	return (ret);
2426 }
2427 
2428 static void
2429 sbd_write_same_release_resources(struct scsi_task *task)
2430 {
2431 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
2432 
2433 	if (scmd->nbufs == 0XFF)
2434 		cmn_err(CE_WARN, "%s invalid buffer count %x",
2435 		    __func__, scmd->nbufs);
2436 	if ((scmd->trans_data_len != 0) && (scmd->trans_data != NULL))
2437 		kmem_free(scmd->trans_data, scmd->trans_data_len);
2438 	scmd->trans_data = NULL;
2439 	scmd->trans_data_len = 0;
2440 	scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
2441 }
2442 
2443 static void
2444 sbd_handle_write_same_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
2445     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
2446 {
2447 	uint64_t laddr;
2448 	uint32_t buflen, iolen;
2449 	int ndx, ret;
2450 
2451 	if (ATOMIC8_GET(scmd->nbufs) > 0) {
2452 		atomic_dec_8(&scmd->nbufs);
2453 	}
2454 
2455 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
2456 		sbd_write_same_release_resources(task);
2457 		sbd_ats_remove_by_task(task);
2458 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2459 		    dbuf->db_xfer_status, NULL);
2460 		return;
2461 	}
2462 
2463 	if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2464 		goto write_same_xfer_done;
2465 	}
2466 
2467 	/* if this is a unnessary callback just return */
2468 	if (((scmd->flags & SBD_SCSI_CMD_TRANS_DATA) == 0) ||
2469 	    ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
2470 	    (scmd->trans_data == NULL)) {
2471 		sbd_ats_remove_by_task(task);
2472 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
2473 		return;
2474 	}
2475 
2476 	if (ATOMIC32_GET(scmd->len) != 0) {
2477 		/*
2478 		 * Initiate the next port xfer to occur in parallel
2479 		 * with writing this buf.
2480 		 */
2481 		sbd_do_write_same_xfer(task, scmd, NULL, 0);
2482 	}
2483 
2484 	laddr = dbuf->db_relative_offset;
2485 
2486 	for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
2487 	    (ndx < dbuf->db_sglist_length); ndx++) {
2488 		iolen = min(dbuf->db_data_size - buflen,
2489 		    dbuf->db_sglist[ndx].seg_length);
2490 		if (iolen == 0)
2491 			break;
2492 		bcopy(dbuf->db_sglist[ndx].seg_addr, &scmd->trans_data[laddr],
2493 		    iolen);
2494 		buflen += iolen;
2495 		laddr += (uint64_t)iolen;
2496 	}
2497 	task->task_nbytes_transferred += buflen;
2498 
2499 write_same_xfer_done:
2500 	if (ATOMIC32_GET(scmd->len) == 0 ||
2501 	    scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2502 		stmf_free_dbuf(task, dbuf);
2503 		if (ATOMIC8_GET(scmd->nbufs) > 0)
2504 			return;
2505 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
2506 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2507 			sbd_ats_remove_by_task(task);
2508 			sbd_write_same_release_resources(task);
2509 			stmf_scsilib_send_status(task, STATUS_CHECK,
2510 			    STMF_SAA_WRITE_ERROR);
2511 		} else {
2512 			ret = sbd_write_same_data(task, scmd);
2513 			sbd_ats_remove_by_task(task);
2514 			sbd_write_same_release_resources(task);
2515 			if (ret != SBD_SUCCESS) {
2516 				stmf_scsilib_send_status(task, STATUS_CHECK,
2517 				    STMF_SAA_WRITE_ERROR);
2518 			} else {
2519 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2520 			}
2521 		}
2522 		return;
2523 	}
2524 	sbd_do_write_same_xfer(task, scmd, dbuf, dbuf_reusable);
2525 }
2526 
2527 static void
2528 sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
2529     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
2530 {
2531 	uint32_t len;
2532 
2533 	if (ATOMIC32_GET(scmd->len) == 0) {
2534 		if (dbuf != NULL)
2535 			stmf_free_dbuf(task, dbuf);
2536 		return;
2537 	}
2538 
2539 	if ((dbuf != NULL) &&
2540 	    ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
2541 		/* free current dbuf and allocate a new one */
2542 		stmf_free_dbuf(task, dbuf);
2543 		dbuf = NULL;
2544 	}
2545 	if (dbuf == NULL) {
2546 		uint32_t maxsize, minsize, old_minsize;
2547 
2548 		maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128*1024 :
2549 		    ATOMIC32_GET(scmd->len);
2550 		minsize = maxsize >> 2;
2551 		do {
2552 			old_minsize = minsize;
2553 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
2554 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
2555 		    (minsize >= 512));
2556 		if (dbuf == NULL) {
2557 			sbd_ats_remove_by_task(task);
2558 			sbd_write_same_release_resources(task);
2559 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
2560 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2561 				    STMF_ALLOC_FAILURE, NULL);
2562 			}
2563 			return;
2564 		}
2565 	}
2566 
2567 	len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ? dbuf->db_buf_size :
2568 	    ATOMIC32_GET(scmd->len);
2569 
2570 	dbuf->db_relative_offset = scmd->current_ro;
2571 	dbuf->db_data_size = len;
2572 	dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
2573 	(void) stmf_xfer_data(task, dbuf, 0);
2574 	/* outstanding port xfers and bufs used */
2575 	atomic_inc_8(&scmd->nbufs);
2576 	atomic_add_32(&scmd->len, -len);
2577 	scmd->current_ro += len;
2578 }
2579 
2580 static void
2581 sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf)
2582 {
2583 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2584 	uint64_t addr, len;
2585 	sbd_cmd_t *scmd;
2586 	stmf_data_buf_t *dbuf;
2587 	uint8_t unmap;
2588 	uint8_t do_immediate_data = 0;
2589 
2590 	if (HardwareAcceleratedInit == 0) {
2591 		stmf_scsilib_send_status(task, STATUS_CHECK,
2592 		    STMF_SAA_INVALID_OPCODE);
2593 		return;
2594 	}
2595 
2596 	task->task_cmd_xfer_length = 0;
2597 	if (task->task_additional_flags &
2598 	    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2599 		task->task_expected_xfer_length = 0;
2600 	}
2601 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
2602 		stmf_scsilib_send_status(task, STATUS_CHECK,
2603 		    STMF_SAA_WRITE_PROTECTED);
2604 		return;
2605 	}
2606 	if (task->task_cdb[1] & 0xF7) {
2607 		stmf_scsilib_send_status(task, STATUS_CHECK,
2608 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2609 		return;
2610 	}
2611 	unmap = task->task_cdb[1] & 0x08;
2612 
2613 	if (unmap && ((sl->sl_flags & SL_UNMAP_ENABLED) == 0)) {
2614 		stmf_scsilib_send_status(task, STATUS_CHECK,
2615 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2616 		return;
2617 	}
2618 
2619 	if (task->task_cdb[0] == SCMD_WRITE_SAME_G1) {
2620 		addr = READ_SCSI32(&task->task_cdb[2], uint64_t);
2621 		len = READ_SCSI16(&task->task_cdb[7], uint64_t);
2622 	} else {
2623 		addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
2624 		len = READ_SCSI32(&task->task_cdb[10], uint64_t);
2625 	}
2626 
2627 	if (len == 0) {
2628 		stmf_scsilib_send_status(task, STATUS_CHECK,
2629 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2630 		return;
2631 	}
2632 
2633 	if (sbd_ats_handling_before_io(task, sl, addr, len) !=
2634 	    SBD_SUCCESS) {
2635 		if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS)
2636 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
2637 		return;
2638 	}
2639 
2640 	addr <<= sl->sl_data_blocksize_shift;
2641 	len <<= sl->sl_data_blocksize_shift;
2642 
2643 	/* Check if the command is for the unmap function */
2644 	if (unmap) {
2645 		dkioc_free_list_t *dfl = kmem_zalloc(DFL_SZ(1), KM_SLEEP);
2646 
2647 		dfl->dfl_num_exts = 1;
2648 		dfl->dfl_exts[0].dfle_start = addr;
2649 		dfl->dfl_exts[0].dfle_length = len;
2650 		if (sbd_unmap(sl, dfl) != 0) {
2651 			stmf_scsilib_send_status(task, STATUS_CHECK,
2652 			    STMF_SAA_LBA_OUT_OF_RANGE);
2653 		} else {
2654 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2655 		}
2656 		dfl_free(dfl);
2657 		return;
2658 	}
2659 
2660 	/* Write same function */
2661 
2662 	task->task_cmd_xfer_length = 1 << sl->sl_data_blocksize_shift;
2663 	if (task->task_additional_flags &
2664 	    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2665 		task->task_expected_xfer_length = task->task_cmd_xfer_length;
2666 	}
2667 	if ((addr + len) > sl->sl_lu_size) {
2668 		sbd_ats_remove_by_task(task);
2669 		stmf_scsilib_send_status(task, STATUS_CHECK,
2670 		    STMF_SAA_LBA_OUT_OF_RANGE);
2671 		return;
2672 	}
2673 
2674 	/* For rest of this I/O the transfer length is 1 block */
2675 	len = ((uint64_t)1) << sl->sl_data_blocksize_shift;
2676 
2677 	/* Some basic checks */
2678 	if ((len == 0) || (len != task->task_expected_xfer_length)) {
2679 		sbd_ats_remove_by_task(task);
2680 		stmf_scsilib_send_status(task, STATUS_CHECK,
2681 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2682 		return;
2683 	}
2684 
2685 
2686 	if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
2687 		if (initial_dbuf->db_data_size > len) {
2688 			if (initial_dbuf->db_data_size >
2689 			    task->task_expected_xfer_length) {
2690 				/* protocol error */
2691 				sbd_ats_remove_by_task(task);
2692 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2693 				    STMF_INVALID_ARG, NULL);
2694 				return;
2695 			}
2696 			initial_dbuf->db_data_size = (uint32_t)len;
2697 		}
2698 		do_immediate_data = 1;
2699 	}
2700 	dbuf = initial_dbuf;
2701 
2702 	if (task->task_lu_private) {
2703 		scmd = (sbd_cmd_t *)task->task_lu_private;
2704 	} else {
2705 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
2706 		task->task_lu_private = scmd;
2707 	}
2708 	scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_TRANS_DATA |
2709 	    SBD_SCSI_CMD_ATS_RELATED;
2710 	scmd->cmd_type = SBD_CMD_SCSI_WRITE;
2711 	scmd->nbufs = 0;
2712 	scmd->len = (uint32_t)len;
2713 	scmd->trans_data_len = (uint32_t)len;
2714 	scmd->trans_data = kmem_alloc((size_t)len, KM_SLEEP);
2715 	scmd->current_ro = 0;
2716 
2717 	if (do_immediate_data) {
2718 		/*
2719 		 * Account for data passed in this write command
2720 		 */
2721 		(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
2722 		atomic_add_32(&scmd->len, -dbuf->db_data_size);
2723 		scmd->current_ro += dbuf->db_data_size;
2724 		dbuf->db_xfer_status = STMF_SUCCESS;
2725 		sbd_handle_write_same_xfer_completion(task, scmd, dbuf, 0);
2726 	} else {
2727 		sbd_do_write_same_xfer(task, scmd, dbuf, 0);
2728 	}
2729 }
2730 
2731 static void
2732 sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf)
2733 {
2734 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2735 	uint32_t cmd_xfer_len;
2736 
2737 	if (sbd_unmap_enable == 0) {
2738 		stmf_scsilib_send_status(task, STATUS_CHECK,
2739 		    STMF_SAA_INVALID_OPCODE);
2740 		return;
2741 	}
2742 
2743 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
2744 		stmf_scsilib_send_status(task, STATUS_CHECK,
2745 		    STMF_SAA_WRITE_PROTECTED);
2746 		return;
2747 	}
2748 	cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t);
2749 
2750 	if (task->task_cdb[1] & 1) {
2751 		stmf_scsilib_send_status(task, STATUS_CHECK,
2752 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2753 		return;
2754 	}
2755 
2756 	if (cmd_xfer_len == 0) {
2757 		task->task_cmd_xfer_length = 0;
2758 		if (task->task_additional_flags &
2759 		    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2760 			task->task_expected_xfer_length = 0;
2761 		}
2762 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2763 		return;
2764 	}
2765 
2766 	sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
2767 }
2768 
2769 static void
2770 sbd_handle_unmap_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen)
2771 {
2772 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2773 	uint32_t ulen, dlen, num_desc;
2774 	uint64_t addr, len;
2775 	uint8_t *p;
2776 	dkioc_free_list_t *dfl;
2777 	int ret;
2778 	int i;
2779 
2780 	if (buflen < 24) {
2781 		stmf_scsilib_send_status(task, STATUS_CHECK,
2782 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2783 		return;
2784 	}
2785 	ulen = READ_SCSI16(buf, uint32_t);
2786 	dlen = READ_SCSI16(buf + 2, uint32_t);
2787 	num_desc = dlen >> 4;
2788 	if (((ulen + 2) != buflen) || ((dlen + 8) != buflen) || (dlen & 0xf) ||
2789 	    (num_desc == 0)) {
2790 		stmf_scsilib_send_status(task, STATUS_CHECK,
2791 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2792 		return;
2793 	}
2794 
2795 	dfl = kmem_zalloc(DFL_SZ(num_desc), KM_SLEEP);
2796 	dfl->dfl_num_exts = num_desc;
2797 	/*
2798 	 * This should use ATS locking but that was disabled by the
2799 	 * changes to ZFS top take advantage of TRIM in SSDs.
2800 	 *
2801 	 * Since the entire list is passed to ZFS in one list ATS
2802 	 * locking is not done.  This may be detectable, and if it is
2803 	 * then the entire list needs to be locked and then after the
2804 	 * unmap completes the entire list must be unlocked
2805 	 */
2806 	for (p = buf + 8, i = 0; num_desc; num_desc--, p += 16, i++) {
2807 		addr = READ_SCSI64(p, uint64_t);
2808 		len = READ_SCSI32(p+8, uint64_t);
2809 		addr <<= sl->sl_data_blocksize_shift;
2810 		len <<= sl->sl_data_blocksize_shift;
2811 
2812 		/* Prepare a list of extents to unmap */
2813 		dfl->dfl_exts[i].dfle_start = addr;
2814 		dfl->dfl_exts[i].dfle_length = len;
2815 
2816 		/* release the overlap */
2817 	}
2818 	ASSERT(i == dfl->dfl_num_exts);
2819 
2820 	/* Finally execute the unmap operations in a single step */
2821 	ret = sbd_unmap(sl, dfl);
2822 	dfl_free(dfl);
2823 	if (ret != 0) {
2824 		stmf_scsilib_send_status(task, STATUS_CHECK,
2825 		    STMF_SAA_LBA_OUT_OF_RANGE);
2826 		return;
2827 	}
2828 
2829 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2830 }
2831 
2832 void
2833 sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
2834 {
2835 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2836 	uint8_t *cdbp = (uint8_t *)&task->task_cdb[0];
2837 	uint8_t *p;
2838 	uint8_t byte0;
2839 	uint8_t page_length;
2840 	uint16_t bsize = 512;
2841 	uint16_t cmd_size;
2842 	uint32_t xfer_size = 4;
2843 	uint32_t mgmt_url_size = 0;
2844 	uint8_t exp;
2845 	uint64_t s;
2846 	char *mgmt_url = NULL;
2847 
2848 
2849 	byte0 = DTYPE_DIRECT;
2850 	/*
2851 	 * Basic protocol checks.
2852 	 */
2853 
2854 	if ((((cdbp[1] & 1) == 0) && cdbp[2]) || cdbp[5]) {
2855 		stmf_scsilib_send_status(task, STATUS_CHECK,
2856 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2857 		return;
2858 	}
2859 
2860 	/*
2861 	 * Zero byte allocation length is not an error.  Just
2862 	 * return success.
2863 	 */
2864 
2865 	cmd_size = (((uint16_t)cdbp[3]) << 8) | cdbp[4];
2866 
2867 	if (cmd_size == 0) {
2868 		task->task_cmd_xfer_length = 0;
2869 		if (task->task_additional_flags &
2870 		    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2871 			task->task_expected_xfer_length = 0;
2872 		}
2873 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2874 		return;
2875 	}
2876 
2877 	/*
2878 	 * Standard inquiry
2879 	 */
2880 
2881 	if ((cdbp[1] & 1) == 0) {
2882 		int	i;
2883 		struct scsi_inquiry *inq;
2884 
2885 		p = (uint8_t *)kmem_zalloc(bsize, KM_SLEEP);
2886 		inq = (struct scsi_inquiry *)p;
2887 
2888 		page_length = 69;
2889 		xfer_size = page_length + 5;
2890 
2891 		inq->inq_dtype = DTYPE_DIRECT;
2892 		inq->inq_ansi = 5;	/* SPC-3 */
2893 		inq->inq_hisup = 1;
2894 		inq->inq_rdf = 2;	/* Response data format for SPC-3 */
2895 		inq->inq_len = page_length;
2896 
2897 		inq->inq_tpgs = TPGS_FAILOVER_IMPLICIT;
2898 		inq->inq_cmdque = 1;
2899 		inq->inq_3pc = 1;
2900 
2901 		if (sl->sl_flags & SL_VID_VALID) {
2902 			bcopy(sl->sl_vendor_id, inq->inq_vid, 8);
2903 		} else {
2904 			bcopy(sbd_vendor_id, inq->inq_vid, 8);
2905 		}
2906 
2907 		if (sl->sl_flags & SL_PID_VALID) {
2908 			bcopy(sl->sl_product_id, inq->inq_pid, 16);
2909 		} else {
2910 			bcopy(sbd_product_id, inq->inq_pid, 16);
2911 		}
2912 
2913 		if (sl->sl_flags & SL_REV_VALID) {
2914 			bcopy(sl->sl_revision, inq->inq_revision, 4);
2915 		} else {
2916 			bcopy(sbd_revision, inq->inq_revision, 4);
2917 		}
2918 
2919 		/* Adding Version Descriptors */
2920 		i = 0;
2921 		/* SAM-3 no version */
2922 		inq->inq_vd[i].inq_vd_msb = 0x00;
2923 		inq->inq_vd[i].inq_vd_lsb = 0x60;
2924 		i++;
2925 
2926 		/* transport */
2927 		switch (task->task_lport->lport_id->protocol_id) {
2928 		case PROTOCOL_FIBRE_CHANNEL:
2929 			inq->inq_vd[i].inq_vd_msb = 0x09;
2930 			inq->inq_vd[i].inq_vd_lsb = 0x00;
2931 			i++;
2932 			break;
2933 
2934 		case PROTOCOL_PARALLEL_SCSI:
2935 		case PROTOCOL_SSA:
2936 		case PROTOCOL_IEEE_1394:
2937 			/* Currently no claims of conformance */
2938 			break;
2939 
2940 		case PROTOCOL_SRP:
2941 			inq->inq_vd[i].inq_vd_msb = 0x09;
2942 			inq->inq_vd[i].inq_vd_lsb = 0x40;
2943 			i++;
2944 			break;
2945 
2946 		case PROTOCOL_iSCSI:
2947 			inq->inq_vd[i].inq_vd_msb = 0x09;
2948 			inq->inq_vd[i].inq_vd_lsb = 0x60;
2949 			i++;
2950 			break;
2951 
2952 		case PROTOCOL_SAS:
2953 		case PROTOCOL_ADT:
2954 		case PROTOCOL_ATAPI:
2955 		default:
2956 			/* Currently no claims of conformance */
2957 			break;
2958 		}
2959 
2960 		/* SPC-3 no version */
2961 		inq->inq_vd[i].inq_vd_msb = 0x03;
2962 		inq->inq_vd[i].inq_vd_lsb = 0x00;
2963 		i++;
2964 
2965 		/* SBC-2 no version */
2966 		inq->inq_vd[i].inq_vd_msb = 0x03;
2967 		inq->inq_vd[i].inq_vd_lsb = 0x20;
2968 
2969 		sbd_handle_short_read_transfers(task, initial_dbuf, p, cmd_size,
2970 		    min(cmd_size, xfer_size));
2971 		kmem_free(p, bsize);
2972 
2973 		return;
2974 	}
2975 
2976 	rw_enter(&sbd_global_prop_lock, RW_READER);
2977 	if (sl->sl_mgmt_url) {
2978 		mgmt_url_size = strlen(sl->sl_mgmt_url);
2979 		mgmt_url = sl->sl_mgmt_url;
2980 	} else if (sbd_mgmt_url) {
2981 		mgmt_url_size = strlen(sbd_mgmt_url);
2982 		mgmt_url = sbd_mgmt_url;
2983 	}
2984 
2985 	/*
2986 	 * EVPD handling
2987 	 */
2988 
2989 	/* Default 512 bytes may not be enough, increase bsize if necessary */
2990 	if (cdbp[2] == 0x83 || cdbp[2] == 0x85) {
2991 		if (bsize <  cmd_size)
2992 			bsize = cmd_size;
2993 	}
2994 	p = (uint8_t *)kmem_zalloc(bsize, KM_SLEEP);
2995 
2996 	switch (cdbp[2]) {
2997 	case 0x00:
2998 		page_length = 5 + (mgmt_url_size ? 1 : 0);
2999 
3000 		if (sl->sl_flags & SL_UNMAP_ENABLED)
3001 			page_length += 1;
3002 
3003 		p[0] = byte0;
3004 		p[3] = page_length;
3005 		/* Supported VPD pages in ascending order */
3006 		/* CSTYLED */
3007 		{
3008 			uint8_t i = 5;
3009 
3010 			p[i++] = 0x80;
3011 			p[i++] = 0x83;
3012 			if (mgmt_url_size != 0)
3013 				p[i++] = 0x85;
3014 			p[i++] = 0x86;
3015 			p[i++] = 0xb0;
3016 			if (sl->sl_flags & SL_UNMAP_ENABLED) {
3017 				p[i++] = 0xb2;
3018 			}
3019 		}
3020 		xfer_size = page_length + 4;
3021 		break;
3022 
3023 	case 0x80:
3024 		if (sl->sl_serial_no_size) {
3025 			page_length = sl->sl_serial_no_size;
3026 			bcopy(sl->sl_serial_no, p + 4, sl->sl_serial_no_size);
3027 		} else {
3028 			/* if no serial num is specified set 4 spaces */
3029 			page_length = 4;
3030 			bcopy("    ", p + 4, 4);
3031 		}
3032 		p[0] = byte0;
3033 		p[1] = 0x80;
3034 		p[3] = page_length;
3035 		xfer_size = page_length + 4;
3036 		break;
3037 
3038 	case 0x83:
3039 		xfer_size = stmf_scsilib_prepare_vpd_page83(task, p,
3040 		    bsize, byte0, STMF_VPD_LU_ID|STMF_VPD_TARGET_ID|
3041 		    STMF_VPD_TP_GROUP|STMF_VPD_RELATIVE_TP_ID);
3042 		break;
3043 
3044 	case 0x85:
3045 		if (mgmt_url_size == 0) {
3046 			stmf_scsilib_send_status(task, STATUS_CHECK,
3047 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3048 			goto err_done;
3049 		} /* CSTYLED */
3050 		{
3051 			uint16_t idx, newidx, sz, url_size;
3052 			char *url;
3053 
3054 			p[0] = byte0;
3055 			p[1] = 0x85;
3056 
3057 			idx = 4;
3058 			url = mgmt_url;
3059 			url_size = sbd_parse_mgmt_url(&url);
3060 			/* Creating Network Service Descriptors */
3061 			while (url_size != 0) {
3062 				/* Null terminated and 4 Byte aligned */
3063 				sz = url_size + 1;
3064 				sz += (sz % 4) ? 4 - (sz % 4) : 0;
3065 				newidx = idx + sz + 4;
3066 
3067 				if (newidx < bsize) {
3068 					/*
3069 					 * SPC-3r23 : Table 320  (Sec 7.6.5)
3070 					 * (Network service descriptor format
3071 					 *
3072 					 * Note: Hard coding service type as
3073 					 * "Storage Configuration Service".
3074 					 */
3075 					p[idx] = 1;
3076 					SCSI_WRITE16(p + idx + 2, sz);
3077 					bcopy(url, p + idx + 4, url_size);
3078 					xfer_size = newidx + 4;
3079 				}
3080 				idx = newidx;
3081 
3082 				/* skip to next mgmt url if any */
3083 				url += url_size;
3084 				url_size = sbd_parse_mgmt_url(&url);
3085 			}
3086 
3087 			/* Total descriptor length */
3088 			SCSI_WRITE16(p + 2, idx - 4);
3089 			break;
3090 		}
3091 
3092 	case 0x86:
3093 		page_length = 0x3c;
3094 
3095 		p[0] = byte0;
3096 		p[1] = 0x86;		/* Page 86 response */
3097 		p[3] = page_length;
3098 
3099 		/*
3100 		 * Bits 0, 1, and 2 will need to be updated
3101 		 * to reflect the queue tag handling if/when
3102 		 * that is implemented.  For now, we're going
3103 		 * to claim support only for Simple TA.
3104 		 */
3105 		p[5] = 1;
3106 		xfer_size = page_length + 4;
3107 		break;
3108 
3109 	case 0xb0:
3110 		page_length = 0x3c;
3111 		p[0] = byte0;
3112 		p[1] = 0xb0;
3113 		p[3] = page_length;
3114 		p[4] = 1;
3115 		p[5] = sbd_ats_max_nblks();
3116 		if (sl->sl_flags & SL_UNMAP_ENABLED && sbd_unmap_enable) {
3117 			p[20] = (stmf_sbd_unmap_max_nblks >> 24) & 0xff;
3118 			p[21] = (stmf_sbd_unmap_max_nblks >> 16) & 0xff;
3119 			p[22] = (stmf_sbd_unmap_max_nblks >> 8) & 0xff;
3120 			p[23] = stmf_sbd_unmap_max_nblks & 0xff;
3121 
3122 			p[24] = 0;
3123 			p[25] = 0;
3124 			p[26] = 0;
3125 			p[27] = 0xFF;
3126 		}
3127 		xfer_size = page_length + 4;
3128 		break;
3129 
3130 	case 0xb2:
3131 		if ((sl->sl_flags & SL_UNMAP_ENABLED) == 0) {
3132 			stmf_scsilib_send_status(task, STATUS_CHECK,
3133 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3134 			goto err_done;
3135 		}
3136 		page_length = 4;
3137 		p[0] = byte0;
3138 		p[1] = 0xb2;
3139 		p[3] = page_length;
3140 
3141 		exp = (uint8_t)sl->sl_data_blocksize_shift;
3142 		s = sl->sl_lu_size >> sl->sl_data_blocksize_shift;
3143 		while (s & ((uint64_t)0xFFFFFFFF80000000ull)) {
3144 			s >>= 1;
3145 			exp++;
3146 		}
3147 		p[4] = exp;
3148 		p[5] = 0xc0;	/* Logical provisioning UNMAP and WRITE SAME */
3149 		xfer_size = page_length + 4;
3150 		break;
3151 
3152 	default:
3153 		stmf_scsilib_send_status(task, STATUS_CHECK,
3154 		    STMF_SAA_INVALID_FIELD_IN_CDB);
3155 		goto err_done;
3156 	}
3157 
3158 	sbd_handle_short_read_transfers(task, initial_dbuf, p, cmd_size,
3159 	    min(cmd_size, xfer_size));
3160 err_done:
3161 	kmem_free(p, bsize);
3162 	rw_exit(&sbd_global_prop_lock);
3163 }
3164 
3165 stmf_status_t
3166 sbd_task_alloc(struct scsi_task *task)
3167 {
3168 	if ((task->task_lu_private =
3169 	    kmem_zalloc(sizeof (sbd_cmd_t), KM_NOSLEEP)) != NULL) {
3170 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3171 		scmd->flags = 0;
3172 		return (STMF_SUCCESS);
3173 	}
3174 	return (STMF_ALLOC_FAILURE);
3175 }
3176 
3177 void
3178 sbd_remove_it_handle(sbd_lu_t *sl, sbd_it_data_t *it)
3179 {
3180 	sbd_it_data_t **ppit;
3181 
3182 	sbd_pgr_remove_it_handle(sl, it);
3183 	mutex_enter(&sl->sl_lock);
3184 	for (ppit = &sl->sl_it_list; *ppit != NULL;
3185 	    ppit = &((*ppit)->sbd_it_next)) {
3186 		if ((*ppit) == it) {
3187 			*ppit = it->sbd_it_next;
3188 			break;
3189 		}
3190 	}
3191 	mutex_exit(&sl->sl_lock);
3192 
3193 	DTRACE_PROBE2(itl__nexus__end, stmf_lu_t *, sl->sl_lu,
3194 	    sbd_it_data_t *, it);
3195 
3196 	kmem_free(it, sizeof (*it));
3197 }
3198 
3199 void
3200 sbd_check_and_clear_scsi2_reservation(sbd_lu_t *sl, sbd_it_data_t *it)
3201 {
3202 	mutex_enter(&sl->sl_lock);
3203 	if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) == 0) {
3204 		/* If we dont have any reservations, just get out. */
3205 		mutex_exit(&sl->sl_lock);
3206 		return;
3207 	}
3208 
3209 	if (it == NULL) {
3210 		/* Find the I_T nexus which is holding the reservation. */
3211 		for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
3212 			if (it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) {
3213 				ASSERT(it->sbd_it_session_id ==
3214 				    sl->sl_rs_owner_session_id);
3215 				break;
3216 			}
3217 		}
3218 		ASSERT(it != NULL);
3219 	} else {
3220 		/*
3221 		 * We were passed an I_T nexus. If this nexus does not hold
3222 		 * the reservation, do nothing. This is why this function is
3223 		 * called "check_and_clear".
3224 		 */
3225 		if ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0) {
3226 			mutex_exit(&sl->sl_lock);
3227 			return;
3228 		}
3229 	}
3230 	it->sbd_it_flags &= ~SBD_IT_HAS_SCSI2_RESERVATION;
3231 	sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
3232 	mutex_exit(&sl->sl_lock);
3233 }
3234 
3235 /*
3236  * Given a LU and a task, check if the task is causing reservation
3237  * conflict. Returns 1 in case of conflict, 0 otherwise.
3238  * Note that the LU might not be the same LU as in the task but the
3239  * caller makes sure that the LU can be accessed.
3240  */
3241 int
3242 sbd_check_reservation_conflict(struct sbd_lu *sl, struct scsi_task *task)
3243 {
3244 	sbd_it_data_t *it;
3245 
3246 	it = task->task_lu_itl_handle;
3247 	ASSERT(it);
3248 	if (sl->sl_access_state == SBD_LU_ACTIVE) {
3249 		if (SBD_PGR_RSVD(sl->sl_pgr)) {
3250 			if (sbd_pgr_reservation_conflict(task, sl)) {
3251 				return (1);
3252 			}
3253 		} else if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) &&
3254 		    ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0)) {
3255 			if (!(SCSI2_CONFLICT_FREE_CMDS(task->task_cdb))) {
3256 				return (1);
3257 			}
3258 		}
3259 	}
3260 
3261 	return (0);
3262 }
3263 
3264 /*
3265  * Keep in mind that sbd_new_task can be called multiple times for the same
3266  * task because of us calling stmf_task_poll_lu resulting in a call to
3267  * sbd_task_poll().
3268  */
3269 void
3270 sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
3271 {
3272 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3273 	sbd_it_data_t *it;
3274 	uint8_t cdb0, cdb1;
3275 	stmf_status_t st_ret;
3276 
3277 	if ((it = task->task_lu_itl_handle) == NULL) {
3278 		mutex_enter(&sl->sl_lock);
3279 		for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
3280 			if (it->sbd_it_session_id ==
3281 			    task->task_session->ss_session_id) {
3282 				mutex_exit(&sl->sl_lock);
3283 				stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3284 				return;
3285 			}
3286 		}
3287 		it = (sbd_it_data_t *)kmem_zalloc(sizeof (*it), KM_NOSLEEP);
3288 		if (it == NULL) {
3289 			mutex_exit(&sl->sl_lock);
3290 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3291 			return;
3292 		}
3293 		it->sbd_it_session_id = task->task_session->ss_session_id;
3294 		bcopy(task->task_lun_no, it->sbd_it_lun, 8);
3295 		it->sbd_it_next = sl->sl_it_list;
3296 		sl->sl_it_list = it;
3297 		mutex_exit(&sl->sl_lock);
3298 
3299 		DTRACE_PROBE1(itl__nexus__start, scsi_task *, task);
3300 
3301 		sbd_pgr_initialize_it(task, it);
3302 		if (stmf_register_itl_handle(task->task_lu, task->task_lun_no,
3303 		    task->task_session, it->sbd_it_session_id, it)
3304 		    != STMF_SUCCESS) {
3305 			sbd_remove_it_handle(sl, it);
3306 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3307 			return;
3308 		}
3309 		task->task_lu_itl_handle = it;
3310 		if (sl->sl_access_state != SBD_LU_STANDBY) {
3311 			it->sbd_it_ua_conditions = SBD_UA_POR;
3312 		}
3313 	} else if (it->sbd_it_flags & SBD_IT_PGR_CHECK_FLAG) {
3314 		mutex_enter(&sl->sl_lock);
3315 		it->sbd_it_flags &= ~SBD_IT_PGR_CHECK_FLAG;
3316 		mutex_exit(&sl->sl_lock);
3317 		sbd_pgr_initialize_it(task, it);
3318 	}
3319 
3320 	if (task->task_mgmt_function) {
3321 		stmf_scsilib_handle_task_mgmt(task);
3322 		return;
3323 	}
3324 
3325 	/*
3326 	 * if we're transitioning between access
3327 	 * states, return NOT READY
3328 	 */
3329 	if (sl->sl_access_state == SBD_LU_TRANSITION_TO_STANDBY ||
3330 	    sl->sl_access_state == SBD_LU_TRANSITION_TO_ACTIVE) {
3331 		stmf_scsilib_send_status(task, STATUS_CHECK,
3332 		    STMF_SAA_LU_NO_ACCESS_TRANSITION);
3333 		return;
3334 	}
3335 
3336 	cdb0 = task->task_cdb[0];
3337 	cdb1 = task->task_cdb[1];
3338 	/*
3339 	 * Special case for different versions of Windows.
3340 	 * 1) Windows 2012 and VMWare will fail to discover LU's if a READ
3341 	 *    operation sent down the standby path returns an error. By default
3342 	 *    standby_fail_reads will be set to 0.
3343 	 * 2) Windows 2008 R2 has a severe performace problem if READ ops
3344 	 *    aren't rejected on the standby path. 2008 sends commands
3345 	 *    down the standby path which then must be proxied over to the
3346 	 *    active node and back.
3347 	 */
3348 	if ((sl->sl_access_state == SBD_LU_STANDBY) &&
3349 	    stmf_standby_fail_reads &&
3350 	    (cdb0 == SCMD_READ || cdb0 == SCMD_READ_G1 ||
3351 	    cdb0 == SCMD_READ_G4 || cdb0 == SCMD_READ_G5)) {
3352 		stmf_scsilib_send_status(task, STATUS_CHECK,
3353 		    STMF_SAA_LU_NO_ACCESS_STANDBY);
3354 		return;
3355 	}
3356 
3357 	/*
3358 	 * Don't go further if cmd is unsupported in standby mode
3359 	 */
3360 	if (sl->sl_access_state == SBD_LU_STANDBY) {
3361 		if (cdb0 != SCMD_INQUIRY &&
3362 		    cdb0 != SCMD_MODE_SENSE &&
3363 		    cdb0 != SCMD_MODE_SENSE_G1 &&
3364 		    cdb0 != SCMD_MODE_SELECT &&
3365 		    cdb0 != SCMD_MODE_SELECT_G1 &&
3366 		    cdb0 != SCMD_RESERVE &&
3367 		    cdb0 != SCMD_RELEASE &&
3368 		    cdb0 != SCMD_PERSISTENT_RESERVE_OUT &&
3369 		    cdb0 != SCMD_PERSISTENT_RESERVE_IN &&
3370 		    cdb0 != SCMD_REQUEST_SENSE &&
3371 		    cdb0 != SCMD_READ_CAPACITY &&
3372 		    cdb0 != SCMD_TEST_UNIT_READY &&
3373 		    cdb0 != SCMD_START_STOP &&
3374 		    cdb0 != SCMD_READ &&
3375 		    cdb0 != SCMD_READ_G1 &&
3376 		    cdb0 != SCMD_READ_G4 &&
3377 		    cdb0 != SCMD_READ_G5 &&
3378 		    !(cdb0 == SCMD_SVC_ACTION_IN_G4 &&
3379 		    cdb1 == SSVC_ACTION_READ_CAPACITY_G4) &&
3380 		    !(cdb0 == SCMD_MAINTENANCE_IN &&
3381 		    (cdb1 & 0x1F) == 0x05) &&
3382 		    !(cdb0 == SCMD_MAINTENANCE_IN &&
3383 		    (cdb1 & 0x1F) == 0x0A)) {
3384 			stmf_scsilib_send_status(task, STATUS_CHECK,
3385 			    STMF_SAA_LU_NO_ACCESS_STANDBY);
3386 			return;
3387 		}
3388 	}
3389 
3390 	/*
3391 	 * Checking ua conditions as per SAM3R14 5.3.2 specified order. During
3392 	 * MPIO/ALUA failover, cmds come in through local ports and proxy port
3393 	 * port provider (i.e. pppt), we want to report unit attention to
3394 	 * only local cmds since initiators (Windows MPIO/DSM) would continue
3395 	 * sending I/O to the target that reported unit attention.
3396 	 */
3397 	if ((it->sbd_it_ua_conditions) &&
3398 	    !(task->task_additional_flags & TASK_AF_PPPT_TASK) &&
3399 	    (task->task_cdb[0] != SCMD_INQUIRY)) {
3400 		uint32_t saa = 0;
3401 
3402 		mutex_enter(&sl->sl_lock);
3403 		if (it->sbd_it_ua_conditions & SBD_UA_POR) {
3404 			it->sbd_it_ua_conditions &= ~SBD_UA_POR;
3405 			saa = STMF_SAA_POR;
3406 		} else if (it->sbd_it_ua_conditions &
3407 		    SBD_UA_ASYMMETRIC_ACCESS_CHANGED) {
3408 			it->sbd_it_ua_conditions &=
3409 			    ~SBD_UA_ASYMMETRIC_ACCESS_CHANGED;
3410 			saa = STMF_SAA_ASYMMETRIC_ACCESS_CHANGED;
3411 		}
3412 		mutex_exit(&sl->sl_lock);
3413 		if (saa) {
3414 			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
3415 			return;
3416 		}
3417 	}
3418 
3419 	/* Reservation conflict checks */
3420 	if (sbd_check_reservation_conflict(sl, task)) {
3421 		stmf_scsilib_send_status(task,
3422 		    STATUS_RESERVATION_CONFLICT, 0);
3423 		return;
3424 	}
3425 
3426 	/* Rest of the ua conndition checks */
3427 	if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) {
3428 		uint32_t saa = 0;
3429 
3430 		mutex_enter(&sl->sl_lock);
3431 		if (it->sbd_it_ua_conditions & SBD_UA_CAPACITY_CHANGED) {
3432 			it->sbd_it_ua_conditions &= ~SBD_UA_CAPACITY_CHANGED;
3433 			if ((task->task_cdb[0] == SCMD_READ_CAPACITY) ||
3434 			    ((task->task_cdb[0] == SCMD_SVC_ACTION_IN_G4) &&
3435 			    (task->task_cdb[1] ==
3436 			    SSVC_ACTION_READ_CAPACITY_G4))) {
3437 				saa = 0;
3438 			} else {
3439 				saa = STMF_SAA_CAPACITY_DATA_HAS_CHANGED;
3440 			}
3441 		} else if (it->sbd_it_ua_conditions &
3442 		    SBD_UA_MODE_PARAMETERS_CHANGED) {
3443 			it->sbd_it_ua_conditions &=
3444 			    ~SBD_UA_MODE_PARAMETERS_CHANGED;
3445 			saa = STMF_SAA_MODE_PARAMETERS_CHANGED;
3446 		} else if (it->sbd_it_ua_conditions &
3447 		    SBD_UA_ASYMMETRIC_ACCESS_CHANGED) {
3448 			saa = 0;
3449 		} else if (it->sbd_it_ua_conditions & SBD_UA_POR) {
3450 			saa = 0;
3451 		} else if (it->sbd_it_ua_conditions &
3452 		    SBD_UA_ACCESS_STATE_TRANSITION) {
3453 			it->sbd_it_ua_conditions &=
3454 			    ~SBD_UA_ACCESS_STATE_TRANSITION;
3455 			saa = STMF_SAA_LU_NO_ACCESS_TRANSITION;
3456 		} else {
3457 			it->sbd_it_ua_conditions = 0;
3458 			saa = 0;
3459 		}
3460 		mutex_exit(&sl->sl_lock);
3461 		if (saa) {
3462 			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
3463 			return;
3464 		}
3465 	}
3466 
3467 	if (sl->sl_access_state == SBD_LU_STANDBY) {
3468 		/*
3469 		 * is this a short write?
3470 		 * if so, we'll need to wait until we have the buffer
3471 		 * before proxying the command
3472 		 */
3473 		switch (cdb0) {
3474 			case SCMD_MODE_SELECT:
3475 			case SCMD_MODE_SELECT_G1:
3476 			case SCMD_PERSISTENT_RESERVE_OUT:
3477 				break;
3478 			default:
3479 				st_ret = stmf_proxy_scsi_cmd(task,
3480 				    initial_dbuf);
3481 				if (st_ret != STMF_SUCCESS) {
3482 					stmf_scsilib_send_status(task,
3483 					    STATUS_CHECK,
3484 					    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
3485 				}
3486 				return;
3487 		}
3488 	}
3489 
3490 	cdb0 = task->task_cdb[0] & 0x1F;
3491 
3492 	if ((cdb0 == SCMD_READ) || (cdb0 == SCMD_WRITE)) {
3493 		if (task->task_additional_flags & TASK_AF_PORT_LOAD_HIGH) {
3494 			stmf_scsilib_send_status(task, STATUS_QFULL, 0);
3495 			return;
3496 		}
3497 		if (cdb0 == SCMD_READ) {
3498 			sbd_handle_read(task, initial_dbuf);
3499 			return;
3500 		}
3501 		sbd_handle_write(task, initial_dbuf);
3502 		return;
3503 	}
3504 
3505 	cdb0 = task->task_cdb[0];
3506 	cdb1 = task->task_cdb[1];
3507 
3508 	if (cdb0 == SCMD_INQUIRY) {		/* Inquiry */
3509 		sbd_handle_inquiry(task, initial_dbuf);
3510 		return;
3511 	}
3512 
3513 	if (cdb0  == SCMD_PERSISTENT_RESERVE_OUT) {
3514 		sbd_handle_pgr_out_cmd(task, initial_dbuf);
3515 		return;
3516 	}
3517 
3518 	if (cdb0  == SCMD_PERSISTENT_RESERVE_IN) {
3519 		sbd_handle_pgr_in_cmd(task, initial_dbuf);
3520 		return;
3521 	}
3522 
3523 	if (cdb0 == SCMD_RELEASE) {
3524 		if (cdb1) {
3525 			stmf_scsilib_send_status(task, STATUS_CHECK,
3526 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3527 			return;
3528 		}
3529 
3530 		mutex_enter(&sl->sl_lock);
3531 		if (sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) {
3532 			/* If not owner don't release it, just return good */
3533 			if (it->sbd_it_session_id !=
3534 			    sl->sl_rs_owner_session_id) {
3535 				mutex_exit(&sl->sl_lock);
3536 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3537 				return;
3538 			}
3539 		}
3540 		sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
3541 		it->sbd_it_flags &= ~SBD_IT_HAS_SCSI2_RESERVATION;
3542 		mutex_exit(&sl->sl_lock);
3543 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3544 		return;
3545 	}
3546 
3547 	if (cdb0 == SCMD_RESERVE) {
3548 		if (cdb1) {
3549 			stmf_scsilib_send_status(task, STATUS_CHECK,
3550 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3551 			return;
3552 		}
3553 
3554 		mutex_enter(&sl->sl_lock);
3555 		if (sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) {
3556 			/* If not owner, return conflict status */
3557 			if (it->sbd_it_session_id !=
3558 			    sl->sl_rs_owner_session_id) {
3559 				mutex_exit(&sl->sl_lock);
3560 				stmf_scsilib_send_status(task,
3561 				    STATUS_RESERVATION_CONFLICT, 0);
3562 				return;
3563 			}
3564 		}
3565 		sl->sl_flags |= SL_LU_HAS_SCSI2_RESERVATION;
3566 		it->sbd_it_flags |= SBD_IT_HAS_SCSI2_RESERVATION;
3567 		sl->sl_rs_owner_session_id = it->sbd_it_session_id;
3568 		mutex_exit(&sl->sl_lock);
3569 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3570 		return;
3571 	}
3572 
3573 	if (cdb0 == SCMD_REQUEST_SENSE) {
3574 		/*
3575 		 * LU provider needs to store unretrieved sense data
3576 		 * (e.g. after power-on/reset).  For now, we'll just
3577 		 * return good status with no sense.
3578 		 */
3579 
3580 		if ((cdb1 & ~1) || task->task_cdb[2] || task->task_cdb[3] ||
3581 		    task->task_cdb[5]) {
3582 			stmf_scsilib_send_status(task, STATUS_CHECK,
3583 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3584 		} else {
3585 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3586 		}
3587 
3588 		return;
3589 	}
3590 
3591 	/* Report Target Port Groups */
3592 	if ((cdb0 == SCMD_MAINTENANCE_IN) &&
3593 	    ((cdb1 & 0x1F) == 0x0A)) {
3594 		stmf_scsilib_handle_report_tpgs(task, initial_dbuf);
3595 		return;
3596 	}
3597 
3598 	/* Report Identifying Information */
3599 	if ((cdb0 == SCMD_MAINTENANCE_IN) &&
3600 	    ((cdb1 & 0x1F) == 0x05)) {
3601 		sbd_handle_identifying_info(task, initial_dbuf);
3602 		return;
3603 	}
3604 
3605 	if (cdb0 == SCMD_START_STOP) {			/* Start stop */
3606 		task->task_cmd_xfer_length = 0;
3607 		if (task->task_cdb[4] & 0xFC) {
3608 			stmf_scsilib_send_status(task, STATUS_CHECK,
3609 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3610 			return;
3611 		}
3612 		if (task->task_cdb[4] & 2) {
3613 			stmf_scsilib_send_status(task, STATUS_CHECK,
3614 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3615 		} else {
3616 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3617 		}
3618 		return;
3619 
3620 	}
3621 
3622 	if ((cdb0 == SCMD_MODE_SENSE) || (cdb0 == SCMD_MODE_SENSE_G1)) {
3623 		uint8_t *p;
3624 		p = kmem_zalloc(512, KM_SLEEP);
3625 		sbd_handle_mode_sense(task, initial_dbuf, p);
3626 		kmem_free(p, 512);
3627 		return;
3628 	}
3629 
3630 	if ((cdb0 == SCMD_MODE_SELECT) || (cdb0 == SCMD_MODE_SELECT_G1)) {
3631 		sbd_handle_mode_select(task, initial_dbuf);
3632 		return;
3633 	}
3634 
3635 	if ((cdb0 == SCMD_UNMAP) && (sl->sl_flags & SL_UNMAP_ENABLED)) {
3636 		sbd_handle_unmap(task, initial_dbuf);
3637 		return;
3638 	}
3639 
3640 	if ((cdb0 == SCMD_WRITE_SAME_G4) || (cdb0 == SCMD_WRITE_SAME_G1)) {
3641 		sbd_handle_write_same(task, initial_dbuf);
3642 		return;
3643 	}
3644 
3645 	if (cdb0 == SCMD_COMPARE_AND_WRITE) {
3646 		sbd_handle_ats(task, initial_dbuf);
3647 		return;
3648 	}
3649 
3650 	if (cdb0 == SCMD_EXTENDED_COPY) {
3651 		sbd_handle_xcopy(task, initial_dbuf);
3652 		return;
3653 	}
3654 
3655 	if (cdb0 == SCMD_RECV_COPY_RESULTS) {
3656 		sbd_handle_recv_copy_results(task, initial_dbuf);
3657 		return;
3658 	}
3659 
3660 	if (cdb0 == SCMD_TEST_UNIT_READY) {	/* Test unit ready */
3661 		task->task_cmd_xfer_length = 0;
3662 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3663 		return;
3664 	}
3665 
3666 	if (cdb0 == SCMD_READ_CAPACITY) {		/* Read Capacity */
3667 		sbd_handle_read_capacity(task, initial_dbuf);
3668 		return;
3669 	}
3670 
3671 	if (cdb0 == SCMD_SVC_ACTION_IN_G4) { /* Read Capacity or read long */
3672 		if (cdb1 == SSVC_ACTION_READ_CAPACITY_G4) {
3673 			sbd_handle_read_capacity(task, initial_dbuf);
3674 			return;
3675 		/*
3676 		 * } else if (cdb1 == SSVC_ACTION_READ_LONG_G4) {
3677 		 *	sbd_handle_read(task, initial_dbuf);
3678 		 *	return;
3679 		 */
3680 		}
3681 	}
3682 
3683 	/*
3684 	 * if (cdb0 == SCMD_SVC_ACTION_OUT_G4) {
3685 	 *	if (cdb1 == SSVC_ACTION_WRITE_LONG_G4) {
3686 	 *		 sbd_handle_write(task, initial_dbuf);
3687 	 *		return;
3688 	 *	}
3689 	 * }
3690 	 */
3691 
3692 	if (cdb0 == SCMD_VERIFY) {
3693 		/*
3694 		 * Something more likely needs to be done here.
3695 		 */
3696 		task->task_cmd_xfer_length = 0;
3697 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3698 		return;
3699 	}
3700 
3701 	if (cdb0 == SCMD_SYNCHRONIZE_CACHE ||
3702 	    cdb0 == SCMD_SYNCHRONIZE_CACHE_G4) {
3703 		sbd_handle_sync_cache(task, initial_dbuf);
3704 		return;
3705 	}
3706 
3707 	/*
3708 	 * Write and Verify use the same path as write, but don't clutter the
3709 	 * performance path above with checking for write_verify opcodes.  We
3710 	 * rely on zfs's integrity checks for the "Verify" part of Write &
3711 	 * Verify.  (Even if we did a read to "verify" we'd merely be reading
3712 	 * cache, not actual media.)
3713 	 * Therefore we
3714 	 *   a) only support this if sbd_is_zvol, and
3715 	 *   b) run the IO through the normal write path with a forced
3716 	 *	sbd_flush_data_cache at the end.
3717 	 */
3718 
3719 	if ((sl->sl_flags & SL_ZFS_META) && (
3720 	    cdb0 == SCMD_WRITE_VERIFY ||
3721 	    cdb0 == SCMD_WRITE_VERIFY_G4 ||
3722 	    cdb0 == SCMD_WRITE_VERIFY_G5)) {
3723 		sbd_handle_write(task, initial_dbuf);
3724 		return;
3725 	}
3726 	stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_INVALID_OPCODE);
3727 }
3728 
3729 void
3730 sbd_dbuf_xfer_done(struct scsi_task *task, struct stmf_data_buf *dbuf)
3731 {
3732 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3733 
3734 	if (dbuf->db_flags & DB_LU_DATA_BUF) {
3735 		/*
3736 		 * Buffers passed in from the LU always complete
3737 		 * even if the task is no longer active.
3738 		 */
3739 		ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
3740 		ASSERT(scmd);
3741 		switch (scmd->cmd_type) {
3742 		case (SBD_CMD_SCSI_READ):
3743 			sbd_handle_sgl_read_xfer_completion(task, scmd, dbuf);
3744 			break;
3745 		case (SBD_CMD_SCSI_WRITE):
3746 			sbd_handle_sgl_write_xfer_completion(task, scmd, dbuf);
3747 			break;
3748 		default:
3749 			cmn_err(CE_PANIC, "Unknown cmd type, task = %p",
3750 			    (void *)task);
3751 			break;
3752 		}
3753 		return;
3754 	}
3755 
3756 	if ((scmd == NULL) || ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0))
3757 		return;
3758 
3759 	switch (scmd->cmd_type) {
3760 	case (SBD_CMD_SCSI_READ):
3761 		sbd_handle_read_xfer_completion(task, scmd, dbuf);
3762 		break;
3763 
3764 	case (SBD_CMD_SCSI_WRITE):
3765 		switch (task->task_cdb[0]) {
3766 		case SCMD_WRITE_SAME_G1:
3767 		case SCMD_WRITE_SAME_G4:
3768 			sbd_handle_write_same_xfer_completion(task, scmd, dbuf,
3769 			    1);
3770 			break;
3771 		case SCMD_COMPARE_AND_WRITE:
3772 			sbd_handle_ats_xfer_completion(task, scmd, dbuf, 1);
3773 			break;
3774 		default:
3775 			sbd_handle_write_xfer_completion(task, scmd, dbuf, 1);
3776 			/* FALLTHRU */
3777 		}
3778 		break;
3779 
3780 	case (SBD_CMD_SMALL_READ):
3781 		sbd_handle_short_read_xfer_completion(task, scmd, dbuf);
3782 		break;
3783 
3784 	case (SBD_CMD_SMALL_WRITE):
3785 		sbd_handle_short_write_xfer_completion(task, dbuf);
3786 		break;
3787 
3788 	default:
3789 		cmn_err(CE_PANIC, "Unknown cmd type, task = %p", (void *)task);
3790 		break;
3791 	}
3792 }
3793 
3794 /* ARGSUSED */
3795 void
3796 sbd_send_status_done(struct scsi_task *task)
3797 {
3798 	cmn_err(CE_PANIC,
3799 	    "sbd_send_status_done: this should not have been called");
3800 }
3801 
3802 void
3803 sbd_task_free(struct scsi_task *task)
3804 {
3805 	if (task->task_lu_private) {
3806 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3807 		if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
3808 			cmn_err(CE_PANIC, "cmd is active, task = %p",
3809 			    (void *)task);
3810 		}
3811 		kmem_free(scmd, sizeof (sbd_cmd_t));
3812 	}
3813 }
3814 
3815 /*
3816  * Aborts are synchronus w.r.t. I/O AND
3817  * All the I/O which SBD does is synchronous AND
3818  * Everything within a task is single threaded.
3819  *   IT MEANS
3820  * If this function is called, we are doing nothing with this task
3821  * inside of sbd module.
3822  */
3823 /* ARGSUSED */
3824 stmf_status_t
3825 sbd_abort(struct stmf_lu *lu, int abort_cmd, void *arg, uint32_t flags)
3826 {
3827 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3828 	scsi_task_t *task;
3829 
3830 	if (abort_cmd == STMF_LU_RESET_STATE) {
3831 		return (sbd_lu_reset_state(lu));
3832 	}
3833 
3834 	if (abort_cmd == STMF_LU_ITL_HANDLE_REMOVED) {
3835 		sbd_check_and_clear_scsi2_reservation(sl, (sbd_it_data_t *)arg);
3836 		sbd_remove_it_handle(sl, (sbd_it_data_t *)arg);
3837 		return (STMF_SUCCESS);
3838 	}
3839 
3840 	ASSERT(abort_cmd == STMF_LU_ABORT_TASK);
3841 	task = (scsi_task_t *)arg;
3842 	sbd_ats_remove_by_task(task);
3843 	if (task->task_lu_private) {
3844 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3845 
3846 		if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
3847 			if (scmd->flags & SBD_SCSI_CMD_TRANS_DATA) {
3848 				kmem_free(scmd->trans_data,
3849 				    scmd->trans_data_len);
3850 				scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
3851 			}
3852 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
3853 			return (STMF_ABORT_SUCCESS);
3854 		}
3855 	}
3856 
3857 	return (STMF_NOT_FOUND);
3858 }
3859 
3860 void
3861 sbd_task_poll(struct scsi_task *task)
3862 {
3863 	stmf_data_buf_t *initial_dbuf;
3864 
3865 	initial_dbuf = stmf_handle_to_buf(task, 0);
3866 	sbd_new_task(task, initial_dbuf);
3867 }
3868 
3869 /*
3870  * This function is called during task clean-up if the
3871  * DB_LU_FLAG is set on the dbuf. This should only be called for
3872  * abort processing after sbd_abort has been called for the task.
3873  */
3874 void
3875 sbd_dbuf_free(struct scsi_task *task, struct stmf_data_buf *dbuf)
3876 {
3877 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3878 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3879 
3880 	ASSERT(dbuf->db_lu_private);
3881 	ASSERT(scmd && ATOMIC8_GET(scmd->nbufs) > 0);
3882 	ASSERT((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0);
3883 	ASSERT(dbuf->db_flags & DB_LU_DATA_BUF);
3884 	ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
3885 	ASSERT((curthread->t_flag & T_INTR_THREAD) == 0);
3886 
3887 	if (scmd->cmd_type == SBD_CMD_SCSI_READ) {
3888 		sbd_zvol_rele_read_bufs(sl, dbuf);
3889 	} else if (scmd->cmd_type == SBD_CMD_SCSI_WRITE) {
3890 		sbd_zvol_rele_write_bufs_abort(sl, dbuf);
3891 	} else {
3892 		cmn_err(CE_PANIC, "Unknown cmd type %d, task = %p",
3893 		    scmd->cmd_type, (void *)task);
3894 	}
3895 	if (atomic_dec_8_nv(&scmd->nbufs) == 0)
3896 		rw_exit(&sl->sl_access_state_lock);
3897 	stmf_teardown_dbuf(task, dbuf);
3898 	stmf_free(dbuf);
3899 }
3900 
3901 /* ARGSUSED */
3902 void
3903 sbd_ctl(struct stmf_lu *lu, int cmd, void *arg)
3904 {
3905 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3906 	stmf_change_status_t st;
3907 
3908 	ASSERT((cmd == STMF_CMD_LU_ONLINE) ||
3909 	    (cmd == STMF_CMD_LU_OFFLINE) ||
3910 	    (cmd == STMF_ACK_LU_ONLINE_COMPLETE) ||
3911 	    (cmd == STMF_ACK_LU_OFFLINE_COMPLETE));
3912 
3913 	st.st_completion_status = STMF_SUCCESS;
3914 	st.st_additional_info = NULL;
3915 
3916 	switch (cmd) {
3917 	case STMF_CMD_LU_ONLINE:
3918 		if (sl->sl_state == STMF_STATE_ONLINE)
3919 			st.st_completion_status = STMF_ALREADY;
3920 		else if (sl->sl_state != STMF_STATE_OFFLINE)
3921 			st.st_completion_status = STMF_FAILURE;
3922 		if (st.st_completion_status == STMF_SUCCESS) {
3923 			sl->sl_state = STMF_STATE_ONLINE;
3924 			sl->sl_state_not_acked = 1;
3925 		}
3926 		(void) stmf_ctl(STMF_CMD_LU_ONLINE_COMPLETE, lu, &st);
3927 		break;
3928 
3929 	case STMF_CMD_LU_OFFLINE:
3930 		if (sl->sl_state == STMF_STATE_OFFLINE)
3931 			st.st_completion_status = STMF_ALREADY;
3932 		else if (sl->sl_state != STMF_STATE_ONLINE)
3933 			st.st_completion_status = STMF_FAILURE;
3934 		if (st.st_completion_status == STMF_SUCCESS) {
3935 			sl->sl_flags &= ~(SL_MEDIUM_REMOVAL_PREVENTED |
3936 			    SL_LU_HAS_SCSI2_RESERVATION);
3937 			sl->sl_state = STMF_STATE_OFFLINE;
3938 			sl->sl_state_not_acked = 1;
3939 			sbd_pgr_reset(sl);
3940 		}
3941 		(void) stmf_ctl(STMF_CMD_LU_OFFLINE_COMPLETE, lu, &st);
3942 		break;
3943 
3944 	case STMF_ACK_LU_ONLINE_COMPLETE:
3945 		/* Fallthrough */
3946 	case STMF_ACK_LU_OFFLINE_COMPLETE:
3947 		sl->sl_state_not_acked = 0;
3948 		break;
3949 
3950 	}
3951 }
3952 
3953 /* ARGSUSED */
3954 stmf_status_t
3955 sbd_info(uint32_t cmd, stmf_lu_t *lu, void *arg, uint8_t *buf,
3956     uint32_t *bufsizep)
3957 {
3958 	return (STMF_NOT_SUPPORTED);
3959 }
3960 
3961 stmf_status_t
3962 sbd_lu_reset_state(stmf_lu_t *lu)
3963 {
3964 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3965 
3966 	mutex_enter(&sl->sl_lock);
3967 	if (sl->sl_flags & SL_SAVED_WRITE_CACHE_DISABLE) {
3968 		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
3969 		mutex_exit(&sl->sl_lock);
3970 		if (sl->sl_access_state == SBD_LU_ACTIVE) {
3971 			(void) sbd_wcd_set(1, sl);
3972 		}
3973 	} else {
3974 		sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
3975 		mutex_exit(&sl->sl_lock);
3976 		if (sl->sl_access_state == SBD_LU_ACTIVE) {
3977 			(void) sbd_wcd_set(0, sl);
3978 		}
3979 	}
3980 	sbd_pgr_reset(sl);
3981 	sbd_check_and_clear_scsi2_reservation(sl, NULL);
3982 	if (stmf_deregister_all_lu_itl_handles(lu) != STMF_SUCCESS) {
3983 		return (STMF_FAILURE);
3984 	}
3985 	return (STMF_SUCCESS);
3986 }
3987 
3988 sbd_status_t
3989 sbd_flush_data_cache(sbd_lu_t *sl, int fsync_done)
3990 {
3991 	int r = 0;
3992 	sbd_status_t ret;
3993 
3994 	rw_enter(&sl->sl_access_state_lock, RW_READER);
3995 	if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
3996 		ret = SBD_FILEIO_FAILURE;
3997 		goto flush_fail;
3998 	}
3999 	if (fsync_done)
4000 		goto over_fsync;
4001 	if ((sl->sl_data_vtype == VREG) || (sl->sl_data_vtype == VBLK)) {
4002 		if (VOP_FSYNC(sl->sl_data_vp, FSYNC, kcred, NULL)) {
4003 			ret = SBD_FAILURE;
4004 			goto flush_fail;
4005 		}
4006 	}
4007 over_fsync:
4008 	if (((sl->sl_data_vtype == VCHR) || (sl->sl_data_vtype == VBLK)) &&
4009 	    ((sl->sl_flags & SL_NO_DATA_DKIOFLUSH) == 0)) {
4010 		ret = VOP_IOCTL(sl->sl_data_vp, DKIOCFLUSHWRITECACHE, 0,
4011 		    FKIOCTL, kcred, &r, NULL);
4012 		if ((ret == ENOTTY) || (ret == ENOTSUP)) {
4013 			mutex_enter(&sl->sl_lock);
4014 			sl->sl_flags |= SL_NO_DATA_DKIOFLUSH;
4015 			mutex_exit(&sl->sl_lock);
4016 		} else {
4017 			ret = (ret != 0) ? SBD_FAILURE : SBD_SUCCESS;
4018 		}
4019 	}
4020 flush_fail:
4021 	rw_exit(&sl->sl_access_state_lock);
4022 
4023 	return (ret);
4024 }
4025 
4026 /* ARGSUSED */
4027 static void
4028 sbd_handle_sync_cache(struct scsi_task *task,
4029     struct stmf_data_buf *initial_dbuf)
4030 {
4031 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
4032 	uint64_t	lba, laddr;
4033 	sbd_status_t	sret;
4034 	uint32_t	len;
4035 	int		is_g4 = 0;
4036 	int		immed;
4037 
4038 	task->task_cmd_xfer_length = 0;
4039 	/*
4040 	 * Determine if this is a 10 or 16 byte CDB
4041 	 */
4042 
4043 	if (task->task_cdb[0] == SCMD_SYNCHRONIZE_CACHE_G4)
4044 		is_g4 = 1;
4045 
4046 	/*
4047 	 * Determine other requested parameters
4048 	 *
4049 	 * We don't have a non-volatile cache, so don't care about SYNC_NV.
4050 	 * Do not support the IMMED bit.
4051 	 */
4052 
4053 	immed = (task->task_cdb[1] & 0x02);
4054 
4055 	if (immed) {
4056 		stmf_scsilib_send_status(task, STATUS_CHECK,
4057 		    STMF_SAA_INVALID_FIELD_IN_CDB);
4058 		return;
4059 	}
4060 
4061 	/*
4062 	 * Check to be sure we're not being asked to sync an LBA
4063 	 * that is out of range.  While checking, verify reserved fields.
4064 	 */
4065 
4066 	if (is_g4) {
4067 		if ((task->task_cdb[1] & 0xf9) || task->task_cdb[14] ||
4068 		    task->task_cdb[15]) {
4069 			stmf_scsilib_send_status(task, STATUS_CHECK,
4070 			    STMF_SAA_INVALID_FIELD_IN_CDB);
4071 			return;
4072 		}
4073 
4074 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
4075 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
4076 	} else {
4077 		if ((task->task_cdb[1] & 0xf9) || task->task_cdb[6] ||
4078 		    task->task_cdb[9]) {
4079 			stmf_scsilib_send_status(task, STATUS_CHECK,
4080 			    STMF_SAA_INVALID_FIELD_IN_CDB);
4081 			return;
4082 		}
4083 
4084 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
4085 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
4086 	}
4087 
4088 	laddr = lba << sl->sl_data_blocksize_shift;
4089 	len <<= sl->sl_data_blocksize_shift;
4090 
4091 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
4092 		stmf_scsilib_send_status(task, STATUS_CHECK,
4093 		    STMF_SAA_LBA_OUT_OF_RANGE);
4094 		return;
4095 	}
4096 
4097 	sret = sbd_flush_data_cache(sl, 0);
4098 	if (sret != SBD_SUCCESS) {
4099 		stmf_scsilib_send_status(task, STATUS_CHECK,
4100 		    STMF_SAA_WRITE_ERROR);
4101 		return;
4102 	}
4103 
4104 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
4105 }
4106