xref: /illumos-gate/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_scsi.c (revision 5a6b1e2b91f40072b96efcba827bf8ca1d9b2bdd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
25  * Copyright (c) 2013 by Delphix. All rights reserved.
26  * Copyright 2019 Joyent, Inc.
27  * Copyright 2024 MNX Cloud, Inc.
28  */
29 
30 #include <sys/conf.h>
31 #include <sys/file.h>
32 #include <sys/ddi.h>
33 #include <sys/sunddi.h>
34 #include <sys/modctl.h>
35 #include <sys/scsi/scsi.h>
36 #include <sys/scsi/impl/scsi_reset_notify.h>
37 #include <sys/scsi/generic/mode.h>
38 #include <sys/disp.h>
39 #include <sys/byteorder.h>
40 #include <sys/atomic.h>
41 #include <sys/sdt.h>
42 #include <sys/dkio.h>
43 #include <sys/dkioc_free_util.h>
44 
45 #include <sys/stmf.h>
46 #include <sys/lpif.h>
47 #include <sys/portif.h>
48 #include <sys/stmf_ioctl.h>
49 #include <sys/stmf_sbd_ioctl.h>
50 
51 #include "stmf_sbd.h"
52 #include "sbd_impl.h"
53 
54 #define	SCSI2_CONFLICT_FREE_CMDS(cdb)	( \
55 	/* ----------------------- */                                      \
56 	/* Refer Both		   */                                      \
57 	/* SPC-2 (rev 20) Table 10 */                                      \
58 	/* SPC-3 (rev 23) Table 31 */                                      \
59 	/* ----------------------- */                                      \
60 	((cdb[0]) == SCMD_INQUIRY)					|| \
61 	((cdb[0]) == SCMD_LOG_SENSE_G1)					|| \
62 	((cdb[0]) == SCMD_RELEASE)					|| \
63 	((cdb[0]) == SCMD_RELEASE_G1)					|| \
64 	((cdb[0]) == SCMD_REPORT_LUNS)					|| \
65 	((cdb[0]) == SCMD_REQUEST_SENSE)				|| \
66 	/* PREVENT ALLOW MEDIUM REMOVAL with prevent == 0 */               \
67 	((((cdb[0]) == SCMD_DOORLOCK) && (((cdb[4]) & 0x3) == 0)))	|| \
68 	/* SERVICE ACTION IN with READ MEDIA SERIAL NUMBER (0x01) */       \
69 	(((cdb[0]) == SCMD_SVC_ACTION_IN_G5) && (                          \
70 	    ((cdb[1]) & 0x1F) == 0x01))					|| \
71 	/* MAINTENANCE IN with service actions REPORT ALIASES (0x0Bh) */   \
72 	/* REPORT DEVICE IDENTIFIER (0x05)  REPORT PRIORITY (0x0Eh) */     \
73 	/* REPORT TARGET PORT GROUPS (0x0A) REPORT TIMESTAMP (0x0F) */     \
74 	(((cdb[0]) == SCMD_MAINTENANCE_IN) && (                            \
75 	    (((cdb[1]) & 0x1F) == 0x0B) ||                                 \
76 	    (((cdb[1]) & 0x1F) == 0x05) ||                                 \
77 	    (((cdb[1]) & 0x1F) == 0x0E) ||                                 \
78 	    (((cdb[1]) & 0x1F) == 0x0A) ||                                 \
79 	    (((cdb[1]) & 0x1F) == 0x0F)))				|| \
80 	/* ----------------------- */                                      \
81 	/* SBC-3 (rev 17) Table 3  */                                      \
82 	/* ----------------------- */                                      \
83 	/* READ CAPACITY(10) */                                            \
84 	((cdb[0]) == SCMD_READ_CAPACITY)				|| \
85 	/* READ CAPACITY(16) */                                            \
86 	(((cdb[0]) == SCMD_SVC_ACTION_IN_G4) && (                          \
87 	    ((cdb[1]) & 0x1F) == 0x10))					|| \
88 	/* START STOP UNIT with START bit 0 and POWER CONDITION 0  */      \
89 	(((cdb[0]) == SCMD_START_STOP) && (                                \
90 	    (((cdb[4]) & 0xF0) == 0) && (((cdb[4]) & 0x01) == 0))))
91 /* End of SCSI2_CONFLICT_FREE_CMDS */
92 
93 uint8_t HardwareAcceleratedInit = 1;
94 uint8_t sbd_unmap_enable = 1;		/* allow unmap by default */
95 
96 /*
97  * An /etc/system tunable which specifies the maximum number of LBAs supported
98  * in a single UNMAP operation. Default is 0x002000 blocks or 4MB in size.
99  */
100 int stmf_sbd_unmap_max_nblks  = 0x002000;
101 
102 /*
103  * An /etc/system tunable which indicates if READ ops can run on the standby
104  * path or return an error.
105  */
106 int stmf_standby_fail_reads = 0;
107 
108 stmf_status_t sbd_lu_reset_state(stmf_lu_t *lu);
109 static void sbd_handle_sync_cache(struct scsi_task *task,
110     struct stmf_data_buf *initial_dbuf);
111 void sbd_handle_read_xfer_completion(struct scsi_task *task,
112     sbd_cmd_t *scmd, struct stmf_data_buf *dbuf);
113 void sbd_handle_short_write_xfer_completion(scsi_task_t *task,
114     stmf_data_buf_t *dbuf);
115 void sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf,
116     uint32_t buflen);
117 void sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf);
118 void sbd_handle_identifying_info(scsi_task_t *task, stmf_data_buf_t *dbuf);
119 
120 static void sbd_handle_unmap_xfer(scsi_task_t *task, uint8_t *buf,
121     uint32_t buflen);
122 static void sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf);
123 
124 extern void sbd_pgr_initialize_it(scsi_task_t *, sbd_it_data_t *);
125 extern int sbd_pgr_reservation_conflict(scsi_task_t *, struct sbd_lu *sl);
126 extern void sbd_pgr_reset(sbd_lu_t *);
127 extern void sbd_pgr_remove_it_handle(sbd_lu_t *, sbd_it_data_t *);
128 extern void sbd_handle_pgr_in_cmd(scsi_task_t *, stmf_data_buf_t *);
129 extern void sbd_handle_pgr_out_cmd(scsi_task_t *, stmf_data_buf_t *);
130 extern void sbd_handle_pgr_out_data(scsi_task_t *, stmf_data_buf_t *);
131 void sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
132     int first_xfer);
133 static void sbd_handle_write_same(scsi_task_t *task,
134     struct stmf_data_buf *initial_dbuf);
135 static void sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
136     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable);
137 static void sbd_handle_write_same_xfer_completion(struct scsi_task *task,
138     sbd_cmd_t *scmd, struct stmf_data_buf *dbuf, uint8_t dbuf_reusable);
139 /*
140  * IMPORTANT NOTE:
141  * =================
142  * The whole world here is based on the assumption that everything within
143  * a scsi task executes in a single threaded manner, even the aborts.
144  * Dont ever change that. There wont be any performance gain but there
145  * will be tons of race conditions.
146  */
147 
148 void
sbd_do_read_xfer(struct scsi_task * task,sbd_cmd_t * scmd,struct stmf_data_buf * dbuf)149 sbd_do_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
150     struct stmf_data_buf *dbuf)
151 {
152 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
153 	uint64_t laddr;
154 	uint32_t len, buflen, iolen;
155 	int ndx;
156 	int bufs_to_take;
157 
158 	/* Lets try not to hog all the buffers the port has. */
159 	bufs_to_take = ((task->task_max_nbufs > 2) &&
160 	    (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
161 	    task->task_max_nbufs;
162 
163 	len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ?
164 	    dbuf->db_buf_size : ATOMIC32_GET(scmd->len);
165 	laddr = scmd->addr + scmd->current_ro;
166 
167 	for (buflen = 0, ndx = 0; (buflen < len) &&
168 	    (ndx < dbuf->db_sglist_length); ndx++) {
169 		iolen = min(len - buflen, dbuf->db_sglist[ndx].seg_length);
170 		if (iolen == 0)
171 			break;
172 		if (sbd_data_read(sl, task, laddr, (uint64_t)iolen,
173 		    dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
174 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
175 			/* Do not need to do xfer anymore, just complete it */
176 			dbuf->db_data_size = 0;
177 			dbuf->db_xfer_status = STMF_SUCCESS;
178 			sbd_handle_read_xfer_completion(task, scmd, dbuf);
179 			return;
180 		}
181 		buflen += iolen;
182 		laddr += (uint64_t)iolen;
183 	}
184 	dbuf->db_relative_offset = scmd->current_ro;
185 	dbuf->db_data_size = buflen;
186 	dbuf->db_flags = DB_DIRECTION_TO_RPORT;
187 	(void) stmf_xfer_data(task, dbuf, 0);
188 	atomic_add_32(&scmd->len, -buflen);
189 	scmd->current_ro += buflen;
190 	if (ATOMIC32_GET(scmd->len) &&
191 	    (ATOMIC8_GET(scmd->nbufs) < bufs_to_take)) {
192 		uint32_t maxsize, minsize, old_minsize;
193 
194 		maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128 * 1024 :
195 		    ATOMIC32_GET(scmd->len);
196 		minsize = maxsize >> 2;
197 		do {
198 			/*
199 			 * A bad port implementation can keep on failing the
200 			 * the request but keep on sending us a false
201 			 * minsize.
202 			 */
203 			old_minsize = minsize;
204 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
205 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
206 		    (minsize >= 512));
207 		if (dbuf == NULL) {
208 			return;
209 		}
210 		atomic_inc_8(&scmd->nbufs);
211 		sbd_do_read_xfer(task, scmd, dbuf);
212 	}
213 }
214 
215 /*
216  * sbd_zcopy: Bail-out switch for reduced copy path.
217  *
218  * 0 - read & write off
219  * 1 - read & write on
220  * 2 - only read on
221  * 4 - only write on
222  */
223 int sbd_zcopy = 1;	/* enable zcopy read & write path */
224 uint32_t sbd_max_xfer_len = 0;		/* Valid if non-zero */
225 uint32_t sbd_1st_xfer_len = 0;		/* Valid if non-zero */
226 uint32_t sbd_copy_threshold = 0;		/* Valid if non-zero */
227 
228 static void
sbd_do_sgl_read_xfer(struct scsi_task * task,sbd_cmd_t * scmd,int first_xfer)229 sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
230 {
231 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
232 	sbd_zvol_io_t *zvio;
233 	int ret, final_xfer;
234 	uint64_t offset;
235 	uint32_t xfer_len, max_len, first_len;
236 	stmf_status_t xstat;
237 	stmf_data_buf_t *dbuf;
238 	uint_t nblks;
239 	uint64_t blksize = sl->sl_blksize;
240 	size_t db_private_sz;
241 	hrtime_t xfer_start;
242 	uintptr_t pad;
243 
244 	ASSERT(rw_read_held(&sl->sl_access_state_lock));
245 	ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
246 
247 	/*
248 	 * Calculate the limits on xfer_len to the minimum of :
249 	 *    - task limit
250 	 *    - lun limit
251 	 *    - sbd global limit if set
252 	 *    - first xfer limit if set
253 	 *
254 	 * First, protect against silly over-ride value
255 	 */
256 	if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
257 		cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
258 		    sbd_max_xfer_len);
259 		sbd_max_xfer_len = 0;
260 	}
261 	if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
262 		cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
263 		    sbd_1st_xfer_len);
264 		sbd_1st_xfer_len = 0;
265 	}
266 
267 	max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
268 	if (sbd_max_xfer_len)
269 		max_len = MIN(max_len, sbd_max_xfer_len);
270 	/*
271 	 * Special case the first xfer if hints are set.
272 	 */
273 	if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
274 		/* global over-ride has precedence */
275 		if (sbd_1st_xfer_len)
276 			first_len = sbd_1st_xfer_len;
277 		else
278 			first_len = task->task_1st_xfer_len;
279 	} else {
280 		first_len = 0;
281 	}
282 
283 	while (ATOMIC32_GET(scmd->len) &&
284 	    ATOMIC8_GET(scmd->nbufs) < task->task_max_nbufs) {
285 
286 		xfer_len = MIN(max_len, ATOMIC32_GET(scmd->len));
287 		if (first_len) {
288 			xfer_len = MIN(xfer_len, first_len);
289 			first_len = 0;
290 		}
291 		if (ATOMIC32_GET(scmd->len) == xfer_len) {
292 			final_xfer = 1;
293 		} else {
294 			/*
295 			 * Attempt to end xfer on a block boundary.
296 			 * The only way this does not happen is if the
297 			 * xfer_len is small enough to stay contained
298 			 * within the same block.
299 			 */
300 			uint64_t xfer_offset, xfer_aligned_end;
301 
302 			final_xfer = 0;
303 			xfer_offset = scmd->addr + scmd->current_ro;
304 			xfer_aligned_end =
305 			    P2ALIGN(xfer_offset+xfer_len, blksize);
306 			if (xfer_aligned_end > xfer_offset)
307 				xfer_len = xfer_aligned_end - xfer_offset;
308 		}
309 		/*
310 		 * Allocate object to track the read and reserve
311 		 * enough space for scatter/gather list.
312 		 */
313 		offset = scmd->addr + scmd->current_ro;
314 		nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
315 
316 		db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
317 		    (nblks * sizeof (stmf_sglist_ent_t));
318 		dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
319 		    AF_DONTZERO);
320 		/*
321 		 * Setup the dbuf
322 		 *
323 		 * XXX Framework does not handle variable length sglists
324 		 * properly, so setup db_lu_private and db_port_private
325 		 * fields here. db_stmf_private is properly set for
326 		 * calls to stmf_free.
327 		 */
328 		if (dbuf->db_port_private == NULL) {
329 			/*
330 			 * XXX Framework assigns space to PP after db_sglist[0]
331 			 */
332 			cmn_err(CE_PANIC, "db_port_private == NULL");
333 		}
334 		pad = (uintptr_t)&dbuf->db_sglist[nblks];
335 		dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
336 		dbuf->db_port_private = NULL;
337 		dbuf->db_buf_size = xfer_len;
338 		dbuf->db_data_size = xfer_len;
339 		dbuf->db_relative_offset = scmd->current_ro;
340 		dbuf->db_sglist_length = (uint16_t)nblks;
341 		dbuf->db_xfer_status = 0;
342 		dbuf->db_handle = 0;
343 
344 		dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
345 		    DB_DIRECTION_TO_RPORT | DB_LU_DATA_BUF);
346 		if (final_xfer)
347 			dbuf->db_flags |= DB_SEND_STATUS_GOOD;
348 
349 		zvio = dbuf->db_lu_private;
350 		/* Need absolute offset for zvol access */
351 		zvio->zvio_offset = offset;
352 		zvio->zvio_flags = ZVIO_SYNC;
353 
354 		/*
355 		 * Accounting for start of read.
356 		 * Note there is no buffer address for the probe yet.
357 		 */
358 		xfer_start = gethrtime();
359 		DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
360 		    uint8_t *, NULL, uint64_t, xfer_len,
361 		    uint64_t, offset, scsi_task_t *, task);
362 
363 		ret = sbd_zvol_alloc_read_bufs(sl, dbuf);
364 
365 		stmf_lu_xfer_done(task, B_TRUE /* read */,
366 		    (gethrtime() - xfer_start));
367 		DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
368 		    uint8_t *, NULL, uint64_t, xfer_len,
369 		    uint64_t, offset, int, ret, scsi_task_t *, task);
370 
371 		if (ret != 0) {
372 			/*
373 			 * Read failure from the backend.
374 			 */
375 			stmf_free(dbuf);
376 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
377 				/* nothing queued, just finish */
378 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
379 				sbd_ats_remove_by_task(task);
380 				stmf_scsilib_send_status(task, STATUS_CHECK,
381 				    STMF_SAA_READ_ERROR);
382 				rw_exit(&sl->sl_access_state_lock);
383 			} else {
384 				/* process failure when other dbufs finish */
385 				scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
386 			}
387 			return;
388 		}
389 
390 		/*
391 		 * Allow PP to do setup
392 		 */
393 		xstat = stmf_setup_dbuf(task, dbuf, 0);
394 		if (xstat != STMF_SUCCESS) {
395 			/*
396 			 * This could happen if the driver cannot get the
397 			 * DDI resources it needs for this request.
398 			 * If other dbufs are queued, try again when the next
399 			 * one completes, otherwise give up.
400 			 */
401 			sbd_zvol_rele_read_bufs(sl, dbuf);
402 			stmf_free(dbuf);
403 			if (ATOMIC8_GET(scmd->nbufs) > 0) {
404 				/* completion of previous dbuf will retry */
405 				return;
406 			}
407 			/*
408 			 * Done with this command.
409 			 */
410 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
411 			sbd_ats_remove_by_task(task);
412 			if (first_xfer)
413 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
414 			else
415 				stmf_scsilib_send_status(task, STATUS_CHECK,
416 				    STMF_SAA_READ_ERROR);
417 			rw_exit(&sl->sl_access_state_lock);
418 			return;
419 		}
420 		/*
421 		 * dbuf is now queued on task
422 		 */
423 		atomic_inc_8(&scmd->nbufs);
424 
425 		/* XXX leave this in for FW? */
426 		DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
427 		    struct stmf_data_buf *, dbuf, uint64_t, offset,
428 		    uint32_t, xfer_len);
429 		/*
430 		 * Do not pass STMF_IOF_LU_DONE so that the zvol
431 		 * state can be released in the completion callback.
432 		 */
433 		xstat = stmf_xfer_data(task, dbuf, 0);
434 		switch (xstat) {
435 		case STMF_SUCCESS:
436 			break;
437 		case STMF_BUSY:
438 			/*
439 			 * The dbuf is queued on the task, but unknown
440 			 * to the PP, thus no completion will occur.
441 			 */
442 			sbd_zvol_rele_read_bufs(sl, dbuf);
443 			stmf_teardown_dbuf(task, dbuf);
444 			stmf_free(dbuf);
445 			atomic_dec_8(&scmd->nbufs);
446 			if (ATOMIC8_GET(scmd->nbufs) > 0) {
447 				/* completion of previous dbuf will retry */
448 				return;
449 			}
450 			/*
451 			 * Done with this command.
452 			 */
453 			rw_exit(&sl->sl_access_state_lock);
454 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
455 			sbd_ats_remove_by_task(task);
456 			if (first_xfer)
457 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
458 			else
459 				stmf_scsilib_send_status(task, STATUS_CHECK,
460 				    STMF_SAA_READ_ERROR);
461 			return;
462 		case STMF_ABORTED:
463 			/*
464 			 * Completion from task_done will cleanup
465 			 */
466 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
467 			sbd_ats_remove_by_task(task);
468 			return;
469 		}
470 		/*
471 		 * Update the xfer progress.
472 		 */
473 		ASSERT(scmd->len >= xfer_len);
474 		atomic_add_32(&scmd->len, -xfer_len);
475 		scmd->current_ro += xfer_len;
476 	}
477 }
478 
479 void
sbd_handle_read_xfer_completion(struct scsi_task * task,sbd_cmd_t * scmd,struct stmf_data_buf * dbuf)480 sbd_handle_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
481     struct stmf_data_buf *dbuf)
482 {
483 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
484 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
485 		    dbuf->db_xfer_status, NULL);
486 		return;
487 	}
488 	task->task_nbytes_transferred += dbuf->db_data_size;
489 	if (ATOMIC32_GET(scmd->len) == 0 ||
490 	    scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
491 		stmf_free_dbuf(task, dbuf);
492 		atomic_dec_8(&scmd->nbufs);
493 		if (ATOMIC8_GET(scmd->nbufs))
494 			return;	/* wait for all buffers to complete */
495 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
496 		sbd_ats_remove_by_task(task);
497 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL)
498 			stmf_scsilib_send_status(task, STATUS_CHECK,
499 			    STMF_SAA_READ_ERROR);
500 		else
501 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
502 		return;
503 	}
504 	if (dbuf->db_flags & DB_DONT_REUSE) {
505 		/* allocate new dbuf */
506 		uint32_t maxsize, minsize, old_minsize;
507 		stmf_free_dbuf(task, dbuf);
508 
509 		maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ?
510 		    128 * 1024 : ATOMIC32_GET(scmd->len);
511 		minsize = maxsize >> 2;
512 		do {
513 			old_minsize = minsize;
514 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
515 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
516 		    (minsize >= 512));
517 		if (dbuf == NULL) {
518 			atomic_dec_8(&scmd->nbufs);
519 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
520 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
521 				    STMF_ALLOC_FAILURE, NULL);
522 			}
523 			return;
524 		}
525 	}
526 	sbd_do_read_xfer(task, scmd, dbuf);
527 }
528 
529 /*
530  * This routine must release the DMU resources and free the dbuf
531  * in all cases.  If this is the final dbuf of the task, then drop
532  * the reader lock on the LU state. If there are no errors and more
533  * work to do, then queue more xfer operations.
534  */
535 void
sbd_handle_sgl_read_xfer_completion(struct scsi_task * task,sbd_cmd_t * scmd,struct stmf_data_buf * dbuf)536 sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
537     struct stmf_data_buf *dbuf)
538 {
539 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
540 	stmf_status_t xfer_status;
541 	uint32_t data_size;
542 	int scmd_err;
543 
544 	ASSERT(dbuf->db_lu_private);
545 	ASSERT(scmd->cmd_type == SBD_CMD_SCSI_READ);
546 
547 	atomic_dec_8(&scmd->nbufs);	/* account for this dbuf */
548 	/*
549 	 * Release the DMU resources.
550 	 */
551 	sbd_zvol_rele_read_bufs(sl, dbuf);
552 	/*
553 	 * Release the dbuf after retrieving needed fields.
554 	 */
555 	xfer_status = dbuf->db_xfer_status;
556 	data_size = dbuf->db_data_size;
557 	stmf_teardown_dbuf(task, dbuf);
558 	stmf_free(dbuf);
559 	/*
560 	 * Release the state lock if this is the last completion.
561 	 * If this is the last dbuf on task and all data has been
562 	 * transferred or an error encountered, then no more dbufs
563 	 * will be queued.
564 	 */
565 	scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
566 	    (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
567 	    (xfer_status != STMF_SUCCESS));
568 	if ((ATOMIC8_GET(scmd->nbufs) == 0) &&
569 	    (ATOMIC32_GET(scmd->len) == 0 || scmd_err)) {
570 		/* all DMU state has been released */
571 		rw_exit(&sl->sl_access_state_lock);
572 	}
573 
574 	/*
575 	 * If there have been no errors, either complete the task
576 	 * or issue more data xfer operations.
577 	 */
578 	if (!scmd_err) {
579 		/*
580 		 * This chunk completed successfully
581 		 */
582 		task->task_nbytes_transferred += data_size;
583 		if (ATOMIC8_GET(scmd->nbufs) == 0 &&
584 		    ATOMIC32_GET(scmd->len) == 0) {
585 			/*
586 			 * This command completed successfully
587 			 *
588 			 * Status was sent along with data, so no status
589 			 * completion will occur. Tell stmf we are done.
590 			 */
591 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
592 			sbd_ats_remove_by_task(task);
593 			stmf_task_lu_done(task);
594 			return;
595 		}
596 		/*
597 		 * Start more xfers
598 		 */
599 		sbd_do_sgl_read_xfer(task, scmd, 0);
600 		return;
601 	}
602 	/*
603 	 * Sort out the failure
604 	 */
605 	if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
606 		/*
607 		 * If a previous error occurred, leave the command active
608 		 * and wait for the last completion to send the status check.
609 		 */
610 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
611 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
612 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
613 				sbd_ats_remove_by_task(task);
614 				stmf_scsilib_send_status(task, STATUS_CHECK,
615 				    STMF_SAA_READ_ERROR);
616 			}
617 			return;
618 		}
619 		/*
620 		 * Must have been a failure on current dbuf
621 		 */
622 		ASSERT(xfer_status != STMF_SUCCESS);
623 
624 		/*
625 		 * Actually this is a bug. stmf abort should have reset the
626 		 * active flag but since its been there for some time.
627 		 * I wont change it.
628 		 */
629 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
630 		sbd_ats_remove_by_task(task);
631 		stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
632 	}
633 }
634 
635 void
sbd_handle_sgl_write_xfer_completion(struct scsi_task * task,sbd_cmd_t * scmd,struct stmf_data_buf * dbuf)636 sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
637     struct stmf_data_buf *dbuf)
638 {
639 	sbd_zvol_io_t *zvio = dbuf->db_lu_private;
640 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
641 	int ret;
642 	int scmd_err, scmd_xfer_done;
643 	stmf_status_t xfer_status = dbuf->db_xfer_status;
644 	uint32_t data_size = dbuf->db_data_size;
645 	hrtime_t xfer_start;
646 
647 	ASSERT(zvio);
648 
649 	/*
650 	 * Allow PP to free up resources before releasing the write bufs
651 	 * as writing to the backend could take some time.
652 	 */
653 	stmf_teardown_dbuf(task, dbuf);
654 
655 	atomic_dec_8(&scmd->nbufs);	/* account for this dbuf */
656 	/*
657 	 * All data was queued and this is the last completion,
658 	 * but there could still be an error.
659 	 */
660 	scmd_xfer_done = (ATOMIC32_GET(scmd->len) == 0 &&
661 	    (ATOMIC8_GET(scmd->nbufs) == 0));
662 	scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
663 	    (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
664 	    (xfer_status != STMF_SUCCESS));
665 
666 	xfer_start = gethrtime();
667 	DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
668 	    uint8_t *, NULL, uint64_t, data_size,
669 	    uint64_t, zvio->zvio_offset, scsi_task_t *, task);
670 
671 	if (scmd_err) {
672 		/* just return the write buffers */
673 		sbd_zvol_rele_write_bufs_abort(sl, dbuf);
674 		ret = 0;
675 	} else {
676 		if (scmd_xfer_done)
677 			zvio->zvio_flags = ZVIO_COMMIT;
678 		else
679 			zvio->zvio_flags = 0;
680 		/* write the data */
681 		ret = sbd_zvol_rele_write_bufs(sl, dbuf);
682 	}
683 
684 	stmf_lu_xfer_done(task, B_FALSE /* write */,
685 	    (gethrtime() - xfer_start));
686 	DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
687 	    uint8_t *, NULL, uint64_t, data_size,
688 	    uint64_t, zvio->zvio_offset, int, ret,  scsi_task_t *, task);
689 
690 	if (ret != 0) {
691 		/* update the error flag */
692 		scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
693 		scmd_err = 1;
694 	}
695 
696 	/* Release the dbuf */
697 	stmf_free(dbuf);
698 
699 	/*
700 	 * Release the state lock if this is the last completion.
701 	 * If this is the last dbuf on task and all data has been
702 	 * transferred or an error encountered, then no more dbufs
703 	 * will be queued.
704 	 */
705 	if ((ATOMIC8_GET(scmd->nbufs) == 0) &&
706 	    (ATOMIC32_GET(scmd->len) == 0 || scmd_err)) {
707 		/* all DMU state has been released */
708 		rw_exit(&sl->sl_access_state_lock);
709 	}
710 	/*
711 	 * If there have been no errors, either complete the task
712 	 * or issue more data xfer operations.
713 	 */
714 	if (!scmd_err) {
715 		/* This chunk completed successfully */
716 		task->task_nbytes_transferred += data_size;
717 		if (scmd_xfer_done) {
718 			/* This command completed successfully */
719 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
720 			sbd_ats_remove_by_task(task);
721 			if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
722 			    (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
723 				stmf_scsilib_send_status(task, STATUS_CHECK,
724 				    STMF_SAA_WRITE_ERROR);
725 			} else {
726 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
727 			}
728 			return;
729 		}
730 		/*
731 		 * Start more xfers
732 		 */
733 		sbd_do_sgl_write_xfer(task, scmd, 0);
734 		return;
735 	}
736 	/*
737 	 * Sort out the failure
738 	 */
739 	if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
740 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
741 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
742 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
743 				sbd_ats_remove_by_task(task);
744 				stmf_scsilib_send_status(task, STATUS_CHECK,
745 				    STMF_SAA_WRITE_ERROR);
746 			}
747 			/*
748 			 * Leave the command active until last dbuf completes.
749 			 */
750 			return;
751 		}
752 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
753 		sbd_ats_remove_by_task(task);
754 		ASSERT(xfer_status != STMF_SUCCESS);
755 		stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
756 	}
757 }
758 
759 /*
760  * Handle a copy operation using the zvol interface.
761  *
762  * Similar to the sbd_data_read/write path, except it goes directly through
763  * the zvol interfaces. It can pass a port provider sglist in the
764  * form of uio which is lost through the vn_rdwr path.
765  *
766  * Returns:
767  *	STMF_SUCCESS - request handled
768  *	STMF_FAILURE - request not handled, caller must deal with error
769  */
770 static stmf_status_t
sbd_copy_rdwr(scsi_task_t * task,uint64_t laddr,stmf_data_buf_t * dbuf,int cmd,int commit)771 sbd_copy_rdwr(scsi_task_t *task, uint64_t laddr, stmf_data_buf_t *dbuf,
772     int cmd, int commit)
773 {
774 	sbd_lu_t		*sl = task->task_lu->lu_provider_private;
775 	struct uio		uio;
776 	struct iovec		*iov, *tiov, iov1[8];
777 	uint32_t		len, resid;
778 	int			ret, i, iovcnt, flags;
779 	hrtime_t		xfer_start;
780 	boolean_t		is_read;
781 
782 	ASSERT(cmd == SBD_CMD_SCSI_READ || cmd == SBD_CMD_SCSI_WRITE);
783 
784 	is_read = (cmd == SBD_CMD_SCSI_READ) ? B_TRUE : B_FALSE;
785 	iovcnt = dbuf->db_sglist_length;
786 	/* use the stack for small iovecs */
787 	if (iovcnt > 8) {
788 		iov = kmem_alloc(iovcnt * sizeof (*iov), KM_SLEEP);
789 	} else {
790 		iov = &iov1[0];
791 	}
792 
793 	/* Convert dbuf sglist to iovec format */
794 	len = dbuf->db_data_size;
795 	resid = len;
796 	tiov = iov;
797 	for (i = 0; i < iovcnt; i++) {
798 		tiov->iov_base = (caddr_t)dbuf->db_sglist[i].seg_addr;
799 		tiov->iov_len = MIN(resid, dbuf->db_sglist[i].seg_length);
800 		resid -= tiov->iov_len;
801 		tiov++;
802 	}
803 	if (resid != 0) {
804 		cmn_err(CE_WARN, "inconsistant sglist rem %d", resid);
805 		if (iov != &iov1[0])
806 			kmem_free(iov, iovcnt * sizeof (*iov));
807 		return (STMF_FAILURE);
808 	}
809 	/* Setup the uio struct */
810 	uio.uio_iov = iov;
811 	uio.uio_iovcnt = iovcnt;
812 	uio.uio_loffset = laddr;
813 	uio.uio_segflg = (short)UIO_SYSSPACE;
814 	uio.uio_resid = (uint64_t)len;
815 	uio.uio_llimit = RLIM64_INFINITY;
816 
817 	xfer_start = gethrtime();
818 	if (is_read == B_TRUE) {
819 		uio.uio_fmode = FREAD;
820 		uio.uio_extflg = UIO_COPY_CACHED;
821 		DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
822 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
823 		    scsi_task_t *, task);
824 
825 		/* Fetch the data */
826 		ret = sbd_zvol_copy_read(sl, &uio);
827 
828 		DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
829 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
830 		    scsi_task_t *, task);
831 	} else {
832 		uio.uio_fmode = FWRITE;
833 		uio.uio_extflg = UIO_COPY_DEFAULT;
834 		DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
835 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
836 		    scsi_task_t *, task);
837 
838 		flags = (commit) ? ZVIO_COMMIT : 0;
839 		/* Write the data */
840 		ret = sbd_zvol_copy_write(sl, &uio, flags);
841 
842 		DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
843 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
844 		    scsi_task_t *, task);
845 	}
846 	/* finalize accounting */
847 	stmf_lu_xfer_done(task, is_read, (gethrtime() - xfer_start));
848 
849 	if (iov != &iov1[0])
850 		kmem_free(iov, iovcnt * sizeof (*iov));
851 	if (ret != 0) {
852 		/* Backend I/O error */
853 		return (STMF_FAILURE);
854 	}
855 	return (STMF_SUCCESS);
856 }
857 
858 void
sbd_handle_read(struct scsi_task * task,struct stmf_data_buf * initial_dbuf)859 sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
860 {
861 	uint64_t lba, laddr;
862 	uint64_t blkcount;
863 	uint32_t len;
864 	uint8_t op = task->task_cdb[0];
865 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
866 	sbd_cmd_t *scmd;
867 	stmf_data_buf_t *dbuf;
868 	int fast_path;
869 	boolean_t fua_bit = B_FALSE;
870 
871 	/*
872 	 * Check to see if the command is READ(10), READ(12), or READ(16).
873 	 * If it is then check for bit 3 being set to indicate if Forced
874 	 * Unit Access is being requested. If so, we'll bypass the use of
875 	 * DMA buffers to simplify support of this feature.
876 	 */
877 	if (((op == SCMD_READ_G1) || (op == SCMD_READ_G4) ||
878 	    (op == SCMD_READ_G5)) &&
879 	    (task->task_cdb[1] & BIT_3)) {
880 		fua_bit = B_TRUE;
881 	}
882 	if (op == SCMD_READ) {
883 		lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
884 		len = (uint32_t)task->task_cdb[4];
885 
886 		if (len == 0) {
887 			len = 256;
888 		}
889 	} else if (op == SCMD_READ_G1) {
890 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
891 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
892 	} else if (op == SCMD_READ_G5) {
893 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
894 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
895 	} else if (op == SCMD_READ_G4) {
896 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
897 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
898 	} else {
899 		stmf_scsilib_send_status(task, STATUS_CHECK,
900 		    STMF_SAA_INVALID_OPCODE);
901 		return;
902 	}
903 
904 	laddr = lba << sl->sl_data_blocksize_shift;
905 	blkcount = len;
906 	len <<= sl->sl_data_blocksize_shift;
907 
908 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
909 		stmf_scsilib_send_status(task, STATUS_CHECK,
910 		    STMF_SAA_LBA_OUT_OF_RANGE);
911 		return;
912 	}
913 
914 	task->task_cmd_xfer_length = len;
915 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
916 		task->task_expected_xfer_length = len;
917 	}
918 
919 	if (len != task->task_expected_xfer_length) {
920 		fast_path = 0;
921 		len = (len > task->task_expected_xfer_length) ?
922 		    task->task_expected_xfer_length : len;
923 	} else {
924 		fast_path = 1;
925 	}
926 
927 	if (len == 0) {
928 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
929 		return;
930 	}
931 
932 	if (sbd_ats_handling_before_io(task, sl, lba, blkcount) !=
933 	    SBD_SUCCESS) {
934 		if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS) {
935 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
936 		}
937 		return;
938 	}
939 	/*
940 	 * Determine if this read can directly use DMU buffers.
941 	 */
942 	if (sbd_zcopy & (2|1) &&		/* Debug switch */
943 	    initial_dbuf == NULL &&		/* No PP buffer passed in */
944 	    sl->sl_flags & SL_CALL_ZVOL &&	/* zvol backing store */
945 	    (task->task_additional_flags &
946 	    TASK_AF_ACCEPT_LU_DBUF) &&		/* PP allows it */
947 	    !fua_bit) {
948 		/*
949 		 * Reduced copy path
950 		 */
951 		uint32_t copy_threshold, minsize;
952 		int ret;
953 
954 		/*
955 		 * The sl_access_state_lock will be held shared
956 		 * for the entire request and released when all
957 		 * dbufs have completed.
958 		 */
959 		rw_enter(&sl->sl_access_state_lock, RW_READER);
960 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
961 			rw_exit(&sl->sl_access_state_lock);
962 			sbd_ats_remove_by_task(task);
963 			stmf_scsilib_send_status(task, STATUS_CHECK,
964 			    STMF_SAA_READ_ERROR);
965 			return;
966 		}
967 
968 		/*
969 		 * Check if setup is more expensive than copying the data.
970 		 *
971 		 * Use the global over-ride sbd_zcopy_threshold if set.
972 		 */
973 		copy_threshold = (sbd_copy_threshold > 0) ?
974 		    sbd_copy_threshold : task->task_copy_threshold;
975 		minsize = len;
976 		if (len < copy_threshold &&
977 		    (dbuf = stmf_alloc_dbuf(task, len, &minsize, 0)) != 0) {
978 
979 			ret = sbd_copy_rdwr(task, laddr, dbuf,
980 			    SBD_CMD_SCSI_READ, 0);
981 			/* done with the backend */
982 			rw_exit(&sl->sl_access_state_lock);
983 			sbd_ats_remove_by_task(task);
984 			if (ret != 0) {
985 				/* backend error */
986 				stmf_scsilib_send_status(task, STATUS_CHECK,
987 				    STMF_SAA_READ_ERROR);
988 			} else {
989 				/* send along good data */
990 				dbuf->db_relative_offset = 0;
991 				dbuf->db_data_size = len;
992 				dbuf->db_flags = DB_SEND_STATUS_GOOD |
993 				    DB_DIRECTION_TO_RPORT;
994 				/* XXX keep for FW? */
995 				DTRACE_PROBE4(sbd__xfer,
996 				    struct scsi_task *, task,
997 				    struct stmf_data_buf *, dbuf,
998 				    uint64_t, laddr, uint32_t, len);
999 				(void) stmf_xfer_data(task, dbuf,
1000 				    STMF_IOF_LU_DONE);
1001 			}
1002 			return;
1003 		}
1004 
1005 		/* committed to reduced copy */
1006 		if (task->task_lu_private) {
1007 			scmd = (sbd_cmd_t *)task->task_lu_private;
1008 		} else {
1009 			scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
1010 			    KM_SLEEP);
1011 			task->task_lu_private = scmd;
1012 		}
1013 		/*
1014 		 * Setup scmd to track read progress.
1015 		 */
1016 		scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED;
1017 		scmd->cmd_type = SBD_CMD_SCSI_READ;
1018 		scmd->nbufs = 0;
1019 		scmd->addr = laddr;
1020 		scmd->len = len;
1021 		scmd->current_ro = 0;
1022 		/*
1023 		 * Kick-off the read.
1024 		 */
1025 		sbd_do_sgl_read_xfer(task, scmd, 1);
1026 		return;
1027 	}
1028 
1029 	if (initial_dbuf == NULL) {
1030 		uint32_t maxsize, minsize, old_minsize;
1031 
1032 		maxsize = (len > (128*1024)) ? 128*1024 : len;
1033 		minsize = maxsize >> 2;
1034 		do {
1035 			old_minsize = minsize;
1036 			initial_dbuf = stmf_alloc_dbuf(task, maxsize,
1037 			    &minsize, 0);
1038 		} while ((initial_dbuf == NULL) && (old_minsize > minsize) &&
1039 		    (minsize >= 512));
1040 		if (initial_dbuf == NULL) {
1041 			sbd_ats_remove_by_task(task);
1042 			stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1043 			return;
1044 		}
1045 	}
1046 	dbuf = initial_dbuf;
1047 
1048 	if ((dbuf->db_buf_size >= len) && fast_path &&
1049 	    (dbuf->db_sglist_length == 1)) {
1050 		if (sbd_data_read(sl, task, laddr, (uint64_t)len,
1051 		    dbuf->db_sglist[0].seg_addr) == STMF_SUCCESS) {
1052 			dbuf->db_relative_offset = 0;
1053 			dbuf->db_data_size = len;
1054 			dbuf->db_flags = DB_SEND_STATUS_GOOD |
1055 			    DB_DIRECTION_TO_RPORT;
1056 			/* XXX keep for FW? */
1057 			DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
1058 			    struct stmf_data_buf *, dbuf,
1059 			    uint64_t, laddr, uint32_t, len);
1060 			(void) stmf_xfer_data(task, dbuf, STMF_IOF_LU_DONE);
1061 		} else {
1062 			stmf_scsilib_send_status(task, STATUS_CHECK,
1063 			    STMF_SAA_READ_ERROR);
1064 		}
1065 		sbd_ats_remove_by_task(task);
1066 		return;
1067 	}
1068 
1069 	if (task->task_lu_private) {
1070 		scmd = (sbd_cmd_t *)task->task_lu_private;
1071 	} else {
1072 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1073 		task->task_lu_private = scmd;
1074 	}
1075 	scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED;
1076 	scmd->cmd_type = SBD_CMD_SCSI_READ;
1077 	scmd->nbufs = 1;
1078 	scmd->addr = laddr;
1079 	scmd->len = len;
1080 	scmd->current_ro = 0;
1081 
1082 	sbd_do_read_xfer(task, scmd, dbuf);
1083 }
1084 
1085 void
sbd_do_write_xfer(struct scsi_task * task,sbd_cmd_t * scmd,struct stmf_data_buf * dbuf,uint8_t dbuf_reusable)1086 sbd_do_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
1087     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
1088 {
1089 	uint32_t len;
1090 	int bufs_to_take;
1091 
1092 	if (ATOMIC32_GET(scmd->len) == 0) {
1093 		goto DO_WRITE_XFER_DONE;
1094 	}
1095 
1096 	/* Lets try not to hog all the buffers the port has. */
1097 	bufs_to_take = ((task->task_max_nbufs > 2) &&
1098 	    (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
1099 	    task->task_max_nbufs;
1100 
1101 	if ((dbuf != NULL) &&
1102 	    ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
1103 		/* free current dbuf and allocate a new one */
1104 		stmf_free_dbuf(task, dbuf);
1105 		dbuf = NULL;
1106 	}
1107 	if (ATOMIC8_GET(scmd->nbufs) >= bufs_to_take) {
1108 		goto DO_WRITE_XFER_DONE;
1109 	}
1110 	if (dbuf == NULL) {
1111 		uint32_t maxsize, minsize, old_minsize;
1112 
1113 		maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128*1024 :
1114 		    ATOMIC32_GET(scmd->len);
1115 		minsize = maxsize >> 2;
1116 		do {
1117 			old_minsize = minsize;
1118 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
1119 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
1120 		    (minsize >= 512));
1121 		if (dbuf == NULL) {
1122 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
1123 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1124 				    STMF_ALLOC_FAILURE, NULL);
1125 			}
1126 			return;
1127 		}
1128 	}
1129 
1130 	len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ? dbuf->db_buf_size :
1131 	    ATOMIC32_GET(scmd->len);
1132 
1133 	dbuf->db_relative_offset = scmd->current_ro;
1134 	dbuf->db_data_size = len;
1135 	dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
1136 	(void) stmf_xfer_data(task, dbuf, 0);
1137 	/* outstanding port xfers and bufs used */
1138 	atomic_inc_8(&scmd->nbufs);
1139 	atomic_add_32(&scmd->len, -len);
1140 	scmd->current_ro += len;
1141 
1142 	if ((ATOMIC32_GET(scmd->len) != 0) &&
1143 	    (ATOMIC8_GET(scmd->nbufs) < bufs_to_take)) {
1144 		sbd_do_write_xfer(task, scmd, NULL, 0);
1145 	}
1146 	return;
1147 
1148 DO_WRITE_XFER_DONE:
1149 	if (dbuf != NULL) {
1150 		stmf_free_dbuf(task, dbuf);
1151 	}
1152 }
1153 
1154 void
sbd_do_sgl_write_xfer(struct scsi_task * task,sbd_cmd_t * scmd,int first_xfer)1155 sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
1156 {
1157 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1158 	sbd_zvol_io_t *zvio;
1159 	int ret;
1160 	uint32_t xfer_len, max_len, first_len;
1161 	stmf_status_t xstat;
1162 	stmf_data_buf_t *dbuf;
1163 	uint_t nblks;
1164 	uint64_t blksize = sl->sl_blksize;
1165 	uint64_t offset;
1166 	size_t db_private_sz;
1167 	uintptr_t pad;
1168 
1169 	ASSERT(rw_read_held(&sl->sl_access_state_lock));
1170 	ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
1171 
1172 	/*
1173 	 * Calculate the limits on xfer_len to the minimum of :
1174 	 *    - task limit
1175 	 *    - lun limit
1176 	 *    - sbd global limit if set
1177 	 *    - first xfer limit if set
1178 	 *
1179 	 * First, protect against silly over-ride value
1180 	 */
1181 	if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
1182 		cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
1183 		    sbd_max_xfer_len);
1184 		sbd_max_xfer_len = 0;
1185 	}
1186 	if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
1187 		cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
1188 		    sbd_1st_xfer_len);
1189 		sbd_1st_xfer_len = 0;
1190 	}
1191 
1192 	max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
1193 	if (sbd_max_xfer_len)
1194 		max_len = MIN(max_len, sbd_max_xfer_len);
1195 	/*
1196 	 * Special case the first xfer if hints are set.
1197 	 */
1198 	if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
1199 		/* global over-ride has precedence */
1200 		if (sbd_1st_xfer_len)
1201 			first_len = sbd_1st_xfer_len;
1202 		else
1203 			first_len = task->task_1st_xfer_len;
1204 	} else {
1205 		first_len = 0;
1206 	}
1207 
1208 
1209 	while (ATOMIC32_GET(scmd->len) &&
1210 	    ATOMIC8_GET(scmd->nbufs) < task->task_max_nbufs) {
1211 		xfer_len = MIN(max_len, ATOMIC32_GET(scmd->len));
1212 		if (first_len) {
1213 			xfer_len = MIN(xfer_len, first_len);
1214 			first_len = 0;
1215 		}
1216 		if (xfer_len < ATOMIC32_GET(scmd->len)) {
1217 			/*
1218 			 * Attempt to end xfer on a block boundary.
1219 			 * The only way this does not happen is if the
1220 			 * xfer_len is small enough to stay contained
1221 			 * within the same block.
1222 			 */
1223 			uint64_t xfer_offset, xfer_aligned_end;
1224 
1225 			xfer_offset = scmd->addr + scmd->current_ro;
1226 			xfer_aligned_end =
1227 			    P2ALIGN(xfer_offset+xfer_len, blksize);
1228 			if (xfer_aligned_end > xfer_offset)
1229 				xfer_len = xfer_aligned_end - xfer_offset;
1230 		}
1231 		/*
1232 		 * Allocate object to track the write and reserve
1233 		 * enough space for scatter/gather list.
1234 		 */
1235 		offset = scmd->addr + scmd->current_ro;
1236 		nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
1237 		db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
1238 		    (nblks * sizeof (stmf_sglist_ent_t));
1239 		dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
1240 		    AF_DONTZERO);
1241 
1242 		/*
1243 		 * Setup the dbuf
1244 		 *
1245 		 * XXX Framework does not handle variable length sglists
1246 		 * properly, so setup db_lu_private and db_port_private
1247 		 * fields here. db_stmf_private is properly set for
1248 		 * calls to stmf_free.
1249 		 */
1250 		if (dbuf->db_port_private == NULL) {
1251 			/*
1252 			 * XXX Framework assigns space to PP after db_sglist[0]
1253 			 */
1254 			cmn_err(CE_PANIC, "db_port_private == NULL");
1255 		}
1256 		pad = (uintptr_t)&dbuf->db_sglist[nblks];
1257 		dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
1258 		dbuf->db_port_private = NULL;
1259 		dbuf->db_buf_size = xfer_len;
1260 		dbuf->db_data_size = xfer_len;
1261 		dbuf->db_relative_offset = scmd->current_ro;
1262 		dbuf->db_sglist_length = (uint16_t)nblks;
1263 		dbuf->db_xfer_status = 0;
1264 		dbuf->db_handle = 0;
1265 		dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
1266 		    DB_DIRECTION_FROM_RPORT | DB_LU_DATA_BUF);
1267 
1268 		zvio = dbuf->db_lu_private;
1269 		zvio->zvio_offset = offset;
1270 
1271 		/* get the buffers */
1272 		ret = sbd_zvol_alloc_write_bufs(sl, dbuf);
1273 		if (ret != 0) {
1274 			/*
1275 			 * Could not allocate buffers from the backend;
1276 			 * treat it like an IO error.
1277 			 */
1278 			stmf_free(dbuf);
1279 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1280 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
1281 				/*
1282 				 * Nothing queued, so no completions coming
1283 				 */
1284 				sbd_ats_remove_by_task(task);
1285 				stmf_scsilib_send_status(task, STATUS_CHECK,
1286 				    STMF_SAA_WRITE_ERROR);
1287 				rw_exit(&sl->sl_access_state_lock);
1288 			}
1289 			/*
1290 			 * Completions of previous buffers will cleanup.
1291 			 */
1292 			return;
1293 		}
1294 
1295 		/*
1296 		 * Allow PP to do setup
1297 		 */
1298 		xstat = stmf_setup_dbuf(task, dbuf, 0);
1299 		if (xstat != STMF_SUCCESS) {
1300 			/*
1301 			 * This could happen if the driver cannot get the
1302 			 * DDI resources it needs for this request.
1303 			 * If other dbufs are queued, try again when the next
1304 			 * one completes, otherwise give up.
1305 			 */
1306 			sbd_zvol_rele_write_bufs_abort(sl, dbuf);
1307 			stmf_free(dbuf);
1308 			if (ATOMIC8_GET(scmd->nbufs) > 0) {
1309 				/* completion of previous dbuf will retry */
1310 				return;
1311 			}
1312 			/*
1313 			 * Done with this command.
1314 			 */
1315 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1316 			sbd_ats_remove_by_task(task);
1317 			if (first_xfer)
1318 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1319 			else
1320 				stmf_scsilib_send_status(task, STATUS_CHECK,
1321 				    STMF_SAA_WRITE_ERROR);
1322 			rw_exit(&sl->sl_access_state_lock);
1323 			return;
1324 		}
1325 
1326 		/*
1327 		 * dbuf is now queued on task
1328 		 */
1329 		atomic_inc_8(&scmd->nbufs);
1330 
1331 		xstat = stmf_xfer_data(task, dbuf, 0);
1332 		switch (xstat) {
1333 		case STMF_SUCCESS:
1334 			break;
1335 		case STMF_BUSY:
1336 			/*
1337 			 * The dbuf is queued on the task, but unknown
1338 			 * to the PP, thus no completion will occur.
1339 			 */
1340 			sbd_zvol_rele_write_bufs_abort(sl, dbuf);
1341 			stmf_teardown_dbuf(task, dbuf);
1342 			stmf_free(dbuf);
1343 			atomic_dec_8(&scmd->nbufs);
1344 			if (ATOMIC8_GET(scmd->nbufs) > 0) {
1345 				/* completion of previous dbuf will retry */
1346 				return;
1347 			}
1348 			/*
1349 			 * Done with this command.
1350 			 */
1351 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1352 			sbd_ats_remove_by_task(task);
1353 			if (first_xfer)
1354 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1355 			else
1356 				stmf_scsilib_send_status(task, STATUS_CHECK,
1357 				    STMF_SAA_WRITE_ERROR);
1358 			rw_exit(&sl->sl_access_state_lock);
1359 			return;
1360 		case STMF_ABORTED:
1361 			/*
1362 			 * Completion code will cleanup.
1363 			 */
1364 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1365 			return;
1366 		}
1367 		/*
1368 		 * Update the xfer progress.
1369 		 */
1370 		atomic_add_32(&scmd->len, -xfer_len);
1371 		scmd->current_ro += xfer_len;
1372 	}
1373 }
1374 
1375 void
sbd_handle_write_xfer_completion(struct scsi_task * task,sbd_cmd_t * scmd,struct stmf_data_buf * dbuf,uint8_t dbuf_reusable)1376 sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
1377     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
1378 {
1379 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1380 	uint64_t laddr;
1381 	uint32_t buflen, iolen;
1382 	int ndx;
1383 	uint8_t op = task->task_cdb[0];
1384 	boolean_t fua_bit = B_FALSE;
1385 
1386 	if (ATOMIC8_GET(scmd->nbufs) > 0) {
1387 		/*
1388 		 * Decrement the count to indicate the port xfer
1389 		 * into the dbuf has completed even though the buf is
1390 		 * still in use here in the LU provider.
1391 		 */
1392 		atomic_dec_8(&scmd->nbufs);
1393 	}
1394 
1395 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
1396 		sbd_ats_remove_by_task(task);
1397 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1398 		    dbuf->db_xfer_status, NULL);
1399 		return;
1400 	}
1401 
1402 	if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1403 		goto WRITE_XFER_DONE;
1404 	}
1405 
1406 	if (ATOMIC32_GET(scmd->len) != 0) {
1407 		/*
1408 		 * Initiate the next port xfer to occur in parallel
1409 		 * with writing this buf.
1410 		 */
1411 		sbd_do_write_xfer(task, scmd, NULL, 0);
1412 	}
1413 
1414 	/*
1415 	 * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
1416 	 * If it is then check for bit 3 being set to indicate if Forced
1417 	 * Unit Access is being requested. If so, we'll bypass the direct
1418 	 * call and handle it in sbd_data_write().
1419 	 */
1420 	if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
1421 	    (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
1422 		fua_bit = B_TRUE;
1423 	}
1424 	laddr = scmd->addr + dbuf->db_relative_offset;
1425 
1426 	/*
1427 	 * If this is going to a zvol, use the direct call to
1428 	 * sbd_zvol_copy_{read,write}. The direct call interface is
1429 	 * restricted to PPs that accept sglists, but that is not required.
1430 	 */
1431 	if (sl->sl_flags & SL_CALL_ZVOL &&
1432 	    (task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF) &&
1433 	    (sbd_zcopy & (4|1)) && !fua_bit) {
1434 		int commit;
1435 
1436 		commit = (ATOMIC32_GET(scmd->len) == 0 &&
1437 		    ATOMIC8_GET(scmd->nbufs) == 0);
1438 		rw_enter(&sl->sl_access_state_lock, RW_READER);
1439 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0 ||
1440 		    sbd_copy_rdwr(task, laddr, dbuf, SBD_CMD_SCSI_WRITE,
1441 		    commit) != STMF_SUCCESS)
1442 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1443 		rw_exit(&sl->sl_access_state_lock);
1444 		buflen = dbuf->db_data_size;
1445 	} else {
1446 		for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
1447 		    (ndx < dbuf->db_sglist_length); ndx++) {
1448 			iolen = min(dbuf->db_data_size - buflen,
1449 			    dbuf->db_sglist[ndx].seg_length);
1450 			if (iolen == 0)
1451 				break;
1452 			if (sbd_data_write(sl, task, laddr, (uint64_t)iolen,
1453 			    dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
1454 				scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1455 				break;
1456 			}
1457 			buflen += iolen;
1458 			laddr += (uint64_t)iolen;
1459 		}
1460 	}
1461 	task->task_nbytes_transferred += buflen;
1462 WRITE_XFER_DONE:
1463 	if (ATOMIC32_GET(scmd->len) == 0 ||
1464 	    scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1465 		stmf_free_dbuf(task, dbuf);
1466 		if (ATOMIC8_GET(scmd->nbufs))
1467 			return;	/* wait for all buffers to complete */
1468 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1469 		sbd_ats_remove_by_task(task);
1470 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1471 			stmf_scsilib_send_status(task, STATUS_CHECK,
1472 			    STMF_SAA_WRITE_ERROR);
1473 		} else {
1474 			/*
1475 			 * If SYNC_WRITE flag is on then we need to flush
1476 			 * cache before sending status.
1477 			 * Note: this may be a no-op because of how
1478 			 * SL_WRITEBACK_CACHE_DISABLE and
1479 			 * SL_FLUSH_ON_DISABLED_WRITECACHE are set, but not
1480 			 * worth code complexity of checking those in this code
1481 			 * path, SBD_SCSI_CMD_SYNC_WRITE is rarely set.
1482 			 */
1483 			if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
1484 			    (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
1485 				stmf_scsilib_send_status(task, STATUS_CHECK,
1486 				    STMF_SAA_WRITE_ERROR);
1487 			} else {
1488 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1489 			}
1490 		}
1491 		return;
1492 	}
1493 	sbd_do_write_xfer(task, scmd, dbuf, dbuf_reusable);
1494 }
1495 
1496 /*
1497  * Return true if copy avoidance is beneficial.
1498  */
1499 static int
sbd_zcopy_write_useful(scsi_task_t * task,uint64_t laddr,uint32_t len,uint64_t blksize)1500 sbd_zcopy_write_useful(scsi_task_t *task, uint64_t laddr, uint32_t len,
1501     uint64_t blksize)
1502 {
1503 	/*
1504 	 * If there is a global copy threshold over-ride, use it.
1505 	 * Otherwise use the PP value with the caveat that at least
1506 	 * 1/2 the data must avoid being copied to be useful.
1507 	 */
1508 	if (sbd_copy_threshold > 0) {
1509 		return (len >= sbd_copy_threshold);
1510 	} else {
1511 		uint64_t no_copy_span;
1512 
1513 		/* sub-blocksize writes always copy */
1514 		if (len < task->task_copy_threshold || len < blksize)
1515 			return (0);
1516 		/*
1517 		 * Calculate amount of data that will avoid the copy path.
1518 		 * The calculation is only valid if len >= blksize.
1519 		 */
1520 		no_copy_span = P2ALIGN(laddr+len, blksize) -
1521 		    P2ROUNDUP(laddr, blksize);
1522 		return (no_copy_span >= len/2);
1523 	}
1524 }
1525 
1526 void
sbd_handle_write(struct scsi_task * task,struct stmf_data_buf * initial_dbuf)1527 sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
1528 {
1529 	uint64_t lba, laddr;
1530 	uint32_t len;
1531 	uint8_t op = task->task_cdb[0], do_immediate_data = 0;
1532 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1533 	sbd_cmd_t *scmd;
1534 	stmf_data_buf_t *dbuf;
1535 	uint64_t blkcount;
1536 	uint8_t	sync_wr_flag = 0;
1537 	boolean_t fua_bit = B_FALSE;
1538 
1539 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
1540 		stmf_scsilib_send_status(task, STATUS_CHECK,
1541 		    STMF_SAA_WRITE_PROTECTED);
1542 		return;
1543 	}
1544 	/*
1545 	 * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
1546 	 * If it is then check for bit 3 being set to indicate if Forced
1547 	 * Unit Access is being requested. If so, we'll bypass the fast path
1548 	 * code to simplify support of this feature.
1549 	 */
1550 	if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
1551 	    (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
1552 		fua_bit = B_TRUE;
1553 	}
1554 	if (op == SCMD_WRITE) {
1555 		lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
1556 		len = (uint32_t)task->task_cdb[4];
1557 
1558 		if (len == 0) {
1559 			len = 256;
1560 		}
1561 	} else if (op == SCMD_WRITE_G1) {
1562 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1563 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1564 	} else if (op == SCMD_WRITE_G5) {
1565 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1566 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
1567 	} else if (op == SCMD_WRITE_G4) {
1568 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
1569 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1570 	} else if (op == SCMD_WRITE_VERIFY) {
1571 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1572 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1573 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1574 	} else if (op == SCMD_WRITE_VERIFY_G5) {
1575 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1576 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
1577 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1578 	} else if (op == SCMD_WRITE_VERIFY_G4) {
1579 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
1580 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1581 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1582 	} else {
1583 		stmf_scsilib_send_status(task, STATUS_CHECK,
1584 		    STMF_SAA_INVALID_OPCODE);
1585 		return;
1586 	}
1587 
1588 	laddr = lba << sl->sl_data_blocksize_shift;
1589 	blkcount = len;
1590 	len <<= sl->sl_data_blocksize_shift;
1591 
1592 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
1593 		stmf_scsilib_send_status(task, STATUS_CHECK,
1594 		    STMF_SAA_LBA_OUT_OF_RANGE);
1595 		return;
1596 	}
1597 
1598 	task->task_cmd_xfer_length = len;
1599 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1600 		task->task_expected_xfer_length = len;
1601 	}
1602 
1603 	len = (len > task->task_expected_xfer_length) ?
1604 	    task->task_expected_xfer_length : len;
1605 
1606 	if (len == 0) {
1607 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1608 		return;
1609 	}
1610 
1611 	if (sbd_ats_handling_before_io(task, sl, lba, blkcount) !=
1612 	    SBD_SUCCESS) {
1613 		if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS) {
1614 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
1615 		}
1616 		return;
1617 	}
1618 
1619 	if (sbd_zcopy & (4|1) &&		/* Debug switch */
1620 	    initial_dbuf == NULL &&		/* No PP buf passed in */
1621 	    sl->sl_flags & SL_CALL_ZVOL &&	/* zvol backing store */
1622 	    (task->task_additional_flags &
1623 	    TASK_AF_ACCEPT_LU_DBUF) &&		/* PP allows it */
1624 	    sbd_zcopy_write_useful(task, laddr, len, sl->sl_blksize) &&
1625 	    !fua_bit) {
1626 
1627 		/*
1628 		 * XXX Note that disallowing initial_dbuf will eliminate
1629 		 * iSCSI from participating. For small writes, that is
1630 		 * probably ok. For large writes, it may be best to just
1631 		 * copy the data from the initial dbuf and use zcopy for
1632 		 * the rest.
1633 		 */
1634 		rw_enter(&sl->sl_access_state_lock, RW_READER);
1635 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
1636 			rw_exit(&sl->sl_access_state_lock);
1637 			sbd_ats_remove_by_task(task);
1638 			stmf_scsilib_send_status(task, STATUS_CHECK,
1639 			    STMF_SAA_READ_ERROR);
1640 			return;
1641 		}
1642 		/*
1643 		 * Setup scmd to track the write progress.
1644 		 */
1645 		if (task->task_lu_private) {
1646 			scmd = (sbd_cmd_t *)task->task_lu_private;
1647 		} else {
1648 			scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
1649 			    KM_SLEEP);
1650 			task->task_lu_private = scmd;
1651 		}
1652 		scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED |
1653 		    sync_wr_flag;
1654 		scmd->cmd_type = SBD_CMD_SCSI_WRITE;
1655 		scmd->nbufs = 0;
1656 		scmd->addr = laddr;
1657 		scmd->len = len;
1658 		scmd->current_ro = 0;
1659 		sbd_do_sgl_write_xfer(task, scmd, 1);
1660 		return;
1661 	}
1662 
1663 	if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
1664 		if (initial_dbuf->db_data_size > len) {
1665 			if (initial_dbuf->db_data_size >
1666 			    task->task_expected_xfer_length) {
1667 				/* protocol error */
1668 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1669 				    STMF_INVALID_ARG, NULL);
1670 				return;
1671 			}
1672 			initial_dbuf->db_data_size = len;
1673 		}
1674 		do_immediate_data = 1;
1675 	}
1676 	dbuf = initial_dbuf;
1677 
1678 	if (task->task_lu_private) {
1679 		scmd = (sbd_cmd_t *)task->task_lu_private;
1680 	} else {
1681 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1682 		task->task_lu_private = scmd;
1683 	}
1684 	scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED |
1685 	    sync_wr_flag;
1686 	scmd->cmd_type = SBD_CMD_SCSI_WRITE;
1687 	scmd->nbufs = 0;
1688 	scmd->addr = laddr;
1689 	scmd->len = len;
1690 	scmd->current_ro = 0;
1691 
1692 	if (do_immediate_data) {
1693 		/*
1694 		 * Account for data passed in this write command
1695 		 */
1696 		(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
1697 		atomic_add_32(&scmd->len, -dbuf->db_data_size);
1698 		scmd->current_ro += dbuf->db_data_size;
1699 		dbuf->db_xfer_status = STMF_SUCCESS;
1700 		sbd_handle_write_xfer_completion(task, scmd, dbuf, 0);
1701 	} else {
1702 		sbd_do_write_xfer(task, scmd, dbuf, 0);
1703 	}
1704 }
1705 
1706 /*
1707  * Utility routine to handle small non performance data transfers to the
1708  * initiators. dbuf is an initial data buf (if any), 'p' points to a data
1709  * buffer which is source of data for transfer, cdb_xfer_size is the
1710  * transfer size based on CDB, cmd_xfer_size is the actual amount of data
1711  * which this command would transfer (the size of data pointed to by 'p').
1712  */
1713 void
sbd_handle_short_read_transfers(scsi_task_t * task,stmf_data_buf_t * dbuf,uint8_t * p,uint32_t cdb_xfer_size,uint32_t cmd_xfer_size)1714 sbd_handle_short_read_transfers(scsi_task_t *task, stmf_data_buf_t *dbuf,
1715     uint8_t *p, uint32_t cdb_xfer_size, uint32_t cmd_xfer_size)
1716 {
1717 	uint32_t bufsize, ndx;
1718 	sbd_cmd_t *scmd;
1719 
1720 	cmd_xfer_size = min(cmd_xfer_size, cdb_xfer_size);
1721 
1722 	task->task_cmd_xfer_length = cmd_xfer_size;
1723 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1724 		task->task_expected_xfer_length = cmd_xfer_size;
1725 	} else {
1726 		cmd_xfer_size = min(cmd_xfer_size,
1727 		    task->task_expected_xfer_length);
1728 	}
1729 
1730 	if (cmd_xfer_size == 0) {
1731 		stmf_scsilib_send_status(task, STATUS_CHECK,
1732 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1733 		return;
1734 	}
1735 	if (dbuf == NULL) {
1736 		uint32_t minsize = cmd_xfer_size;
1737 
1738 		dbuf = stmf_alloc_dbuf(task, cmd_xfer_size, &minsize, 0);
1739 	}
1740 	if (dbuf == NULL) {
1741 		stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1742 		return;
1743 	}
1744 
1745 	for (bufsize = 0, ndx = 0; bufsize < cmd_xfer_size; ndx++) {
1746 		uint8_t *d;
1747 		uint32_t s;
1748 
1749 		d = dbuf->db_sglist[ndx].seg_addr;
1750 		s = min((cmd_xfer_size - bufsize),
1751 		    dbuf->db_sglist[ndx].seg_length);
1752 		bcopy(p+bufsize, d, s);
1753 		bufsize += s;
1754 	}
1755 	dbuf->db_relative_offset = 0;
1756 	dbuf->db_data_size = cmd_xfer_size;
1757 	dbuf->db_flags = DB_DIRECTION_TO_RPORT;
1758 
1759 	if (task->task_lu_private == NULL) {
1760 		task->task_lu_private =
1761 		    kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1762 	}
1763 	scmd = (sbd_cmd_t *)task->task_lu_private;
1764 
1765 	scmd->cmd_type = SBD_CMD_SMALL_READ;
1766 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1767 	(void) stmf_xfer_data(task, dbuf, 0);
1768 }
1769 
1770 void
sbd_handle_short_read_xfer_completion(struct scsi_task * task,sbd_cmd_t * scmd,struct stmf_data_buf * dbuf)1771 sbd_handle_short_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
1772     struct stmf_data_buf *dbuf)
1773 {
1774 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
1775 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1776 		    dbuf->db_xfer_status, NULL);
1777 		return;
1778 	}
1779 	task->task_nbytes_transferred = dbuf->db_data_size;
1780 	scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1781 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1782 }
1783 
1784 void
sbd_handle_short_write_transfers(scsi_task_t * task,stmf_data_buf_t * dbuf,uint32_t cdb_xfer_size)1785 sbd_handle_short_write_transfers(scsi_task_t *task,
1786     stmf_data_buf_t *dbuf, uint32_t cdb_xfer_size)
1787 {
1788 	sbd_cmd_t *scmd;
1789 
1790 	task->task_cmd_xfer_length = cdb_xfer_size;
1791 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1792 		task->task_expected_xfer_length = cdb_xfer_size;
1793 	} else {
1794 		cdb_xfer_size = min(cdb_xfer_size,
1795 		    task->task_expected_xfer_length);
1796 	}
1797 
1798 	if (cdb_xfer_size == 0) {
1799 		stmf_scsilib_send_status(task, STATUS_CHECK,
1800 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1801 		return;
1802 	}
1803 	if (task->task_lu_private == NULL) {
1804 		task->task_lu_private = kmem_zalloc(sizeof (sbd_cmd_t),
1805 		    KM_SLEEP);
1806 	} else {
1807 		bzero(task->task_lu_private, sizeof (sbd_cmd_t));
1808 	}
1809 	scmd = (sbd_cmd_t *)task->task_lu_private;
1810 	scmd->cmd_type = SBD_CMD_SMALL_WRITE;
1811 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1812 	scmd->len = cdb_xfer_size;
1813 	if (dbuf == NULL) {
1814 		uint32_t minsize = cdb_xfer_size;
1815 
1816 		dbuf = stmf_alloc_dbuf(task, cdb_xfer_size, &minsize, 0);
1817 		if (dbuf == NULL) {
1818 			stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1819 			    STMF_ALLOC_FAILURE, NULL);
1820 			return;
1821 		}
1822 		dbuf->db_data_size = cdb_xfer_size;
1823 		dbuf->db_relative_offset = 0;
1824 		dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
1825 		(void) stmf_xfer_data(task, dbuf, 0);
1826 	} else {
1827 		if (dbuf->db_data_size < cdb_xfer_size) {
1828 			stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1829 			    STMF_ABORTED, NULL);
1830 			return;
1831 		}
1832 		dbuf->db_data_size = cdb_xfer_size;
1833 		sbd_handle_short_write_xfer_completion(task, dbuf);
1834 	}
1835 }
1836 
1837 void
sbd_handle_short_write_xfer_completion(scsi_task_t * task,stmf_data_buf_t * dbuf)1838 sbd_handle_short_write_xfer_completion(scsi_task_t *task,
1839     stmf_data_buf_t *dbuf)
1840 {
1841 	sbd_cmd_t *scmd;
1842 	stmf_status_t st_ret;
1843 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1844 
1845 	/*
1846 	 * For now lets assume we will get only one sglist element
1847 	 * for short writes. If that ever changes, we should allocate
1848 	 * a local buffer and copy all the sg elements to one linear space.
1849 	 */
1850 	if ((dbuf->db_xfer_status != STMF_SUCCESS) ||
1851 	    (dbuf->db_sglist_length > 1)) {
1852 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1853 		    dbuf->db_xfer_status, NULL);
1854 		return;
1855 	}
1856 
1857 	task->task_nbytes_transferred = dbuf->db_data_size;
1858 	scmd = (sbd_cmd_t *)task->task_lu_private;
1859 	scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1860 
1861 	/* Lets find out who to call */
1862 	switch (task->task_cdb[0]) {
1863 	case SCMD_MODE_SELECT:
1864 	case SCMD_MODE_SELECT_G1:
1865 		if (sl->sl_access_state == SBD_LU_STANDBY) {
1866 			st_ret = stmf_proxy_scsi_cmd(task, dbuf);
1867 			if (st_ret != STMF_SUCCESS) {
1868 				stmf_scsilib_send_status(task, STATUS_CHECK,
1869 				    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
1870 			}
1871 		} else {
1872 			sbd_handle_mode_select_xfer(task,
1873 			    dbuf->db_sglist[0].seg_addr, dbuf->db_data_size);
1874 		}
1875 		break;
1876 	case SCMD_UNMAP:
1877 		sbd_handle_unmap_xfer(task,
1878 		    dbuf->db_sglist[0].seg_addr, dbuf->db_data_size);
1879 		break;
1880 	case SCMD_EXTENDED_COPY:
1881 		sbd_handle_xcopy_xfer(task, dbuf->db_sglist[0].seg_addr);
1882 		break;
1883 	case SCMD_PERSISTENT_RESERVE_OUT:
1884 		if (sl->sl_access_state == SBD_LU_STANDBY) {
1885 			st_ret = stmf_proxy_scsi_cmd(task, dbuf);
1886 			if (st_ret != STMF_SUCCESS) {
1887 				stmf_scsilib_send_status(task, STATUS_CHECK,
1888 				    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
1889 			}
1890 		} else {
1891 			sbd_handle_pgr_out_data(task, dbuf);
1892 		}
1893 		break;
1894 	default:
1895 		/* This should never happen */
1896 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1897 		    STMF_ABORTED, NULL);
1898 	}
1899 }
1900 
1901 void
sbd_handle_read_capacity(struct scsi_task * task,struct stmf_data_buf * initial_dbuf)1902 sbd_handle_read_capacity(struct scsi_task *task,
1903     struct stmf_data_buf *initial_dbuf)
1904 {
1905 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1906 	uint32_t cdb_len;
1907 	uint8_t p[32];
1908 	uint64_t s;
1909 	uint16_t blksize;
1910 
1911 	s = sl->sl_lu_size >> sl->sl_data_blocksize_shift;
1912 	s--;
1913 	blksize = ((uint16_t)1) << sl->sl_data_blocksize_shift;
1914 
1915 	switch (task->task_cdb[0]) {
1916 	case SCMD_READ_CAPACITY:
1917 		if (s & 0xffffffff00000000ull) {
1918 			p[0] = p[1] = p[2] = p[3] = 0xFF;
1919 		} else {
1920 			p[0] = (s >> 24) & 0xff;
1921 			p[1] = (s >> 16) & 0xff;
1922 			p[2] = (s >> 8) & 0xff;
1923 			p[3] = s & 0xff;
1924 		}
1925 		p[4] = 0; p[5] = 0;
1926 		p[6] = (blksize >> 8) & 0xff;
1927 		p[7] = blksize & 0xff;
1928 		sbd_handle_short_read_transfers(task, initial_dbuf, p, 8, 8);
1929 		break;
1930 
1931 	case SCMD_SVC_ACTION_IN_G4:
1932 		cdb_len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1933 		bzero(p, 32);
1934 		p[0] = (s >> 56) & 0xff;
1935 		p[1] = (s >> 48) & 0xff;
1936 		p[2] = (s >> 40) & 0xff;
1937 		p[3] = (s >> 32) & 0xff;
1938 		p[4] = (s >> 24) & 0xff;
1939 		p[5] = (s >> 16) & 0xff;
1940 		p[6] = (s >> 8) & 0xff;
1941 		p[7] = s & 0xff;
1942 		p[10] = (blksize >> 8) & 0xff;
1943 		p[11] = blksize & 0xff;
1944 		if (sl->sl_flags & SL_UNMAP_ENABLED) {
1945 			p[14] = 0x80;
1946 		}
1947 		sbd_handle_short_read_transfers(task, initial_dbuf, p,
1948 		    cdb_len, 32);
1949 		break;
1950 	}
1951 }
1952 
1953 void
sbd_calc_geometry(uint64_t s,uint16_t blksize,uint8_t * nsectors,uint8_t * nheads,uint32_t * ncyl)1954 sbd_calc_geometry(uint64_t s, uint16_t blksize, uint8_t *nsectors,
1955     uint8_t *nheads, uint32_t *ncyl)
1956 {
1957 	if (s < (4ull * 1024ull * 1024ull * 1024ull)) {
1958 		*nsectors = 32;
1959 		*nheads = 8;
1960 	} else {
1961 		*nsectors = 254;
1962 		*nheads = 254;
1963 	}
1964 	*ncyl = s / ((uint64_t)blksize * (uint64_t)(*nsectors) *
1965 	    (uint64_t)(*nheads));
1966 }
1967 
1968 void
sbd_handle_mode_sense(struct scsi_task * task,struct stmf_data_buf * initial_dbuf,uint8_t * buf)1969 sbd_handle_mode_sense(struct scsi_task *task,
1970     struct stmf_data_buf *initial_dbuf, uint8_t *buf)
1971 {
1972 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1973 	uint32_t cmd_size, n;
1974 	uint8_t *cdb;
1975 	uint32_t ncyl;
1976 	uint8_t nsectors, nheads;
1977 	uint8_t page, ctrl, header_size;
1978 	uint16_t nbytes;
1979 	uint8_t *p;
1980 	uint64_t s = sl->sl_lu_size;
1981 	uint32_t dev_spec_param_offset;
1982 
1983 	p = buf;	/* buf is assumed to be zeroed out and large enough */
1984 	n = 0;
1985 	cdb = &task->task_cdb[0];
1986 	page = cdb[2] & 0x3F;
1987 	ctrl = (cdb[2] >> 6) & 3;
1988 
1989 	if (cdb[0] == SCMD_MODE_SENSE) {
1990 		cmd_size = cdb[4];
1991 		header_size = 4;
1992 		dev_spec_param_offset = 2;
1993 	} else {
1994 		cmd_size = READ_SCSI16(&cdb[7], uint32_t);
1995 		header_size = 8;
1996 		dev_spec_param_offset = 3;
1997 	}
1998 
1999 	/* Now validate the command */
2000 	if ((cdb[2] != 0) && (page != MODEPAGE_ALLPAGES) &&
2001 	    (page != MODEPAGE_CACHING) && (page != MODEPAGE_CTRL_MODE) &&
2002 	    (page != MODEPAGE_FORMAT) && (page != MODEPAGE_GEOMETRY)) {
2003 		stmf_scsilib_send_status(task, STATUS_CHECK,
2004 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2005 		return;
2006 	}
2007 
2008 	/* We will update the length in the mode header at the end */
2009 
2010 	/* Block dev device specific param in mode param header has wp bit */
2011 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
2012 		p[n + dev_spec_param_offset] = BIT_7;
2013 	}
2014 	n += header_size;
2015 	/* We are not going to return any block descriptor */
2016 
2017 	nbytes = ((uint16_t)1) << sl->sl_data_blocksize_shift;
2018 	sbd_calc_geometry(s, nbytes, &nsectors, &nheads, &ncyl);
2019 
2020 	if ((page == MODEPAGE_FORMAT) || (page == MODEPAGE_ALLPAGES)) {
2021 		p[n] = 0x03;
2022 		p[n+1] = 0x16;
2023 		if (ctrl != 1) {
2024 			p[n + 11] = nsectors;
2025 			p[n + 12] = nbytes >> 8;
2026 			p[n + 13] = nbytes & 0xff;
2027 			p[n + 20] = 0x80;
2028 		}
2029 		n += 24;
2030 	}
2031 	if ((page == MODEPAGE_GEOMETRY) || (page == MODEPAGE_ALLPAGES)) {
2032 		p[n] = 0x04;
2033 		p[n + 1] = 0x16;
2034 		if (ctrl != 1) {
2035 			p[n + 2] = ncyl >> 16;
2036 			p[n + 3] = ncyl >> 8;
2037 			p[n + 4] = ncyl & 0xff;
2038 			p[n + 5] = nheads;
2039 			p[n + 20] = 0x15;
2040 			p[n + 21] = 0x18;
2041 		}
2042 		n += 24;
2043 	}
2044 	if ((page == MODEPAGE_CACHING) || (page == MODEPAGE_ALLPAGES)) {
2045 		struct mode_caching *mode_caching_page;
2046 
2047 		mode_caching_page = (struct mode_caching *)&p[n];
2048 
2049 		mode_caching_page->mode_page.code = MODEPAGE_CACHING;
2050 		mode_caching_page->mode_page.ps = 1; /* A saveable page */
2051 		mode_caching_page->mode_page.length = 0x12;
2052 
2053 		switch (ctrl) {
2054 		case (0):
2055 			/* Current */
2056 			if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) == 0) {
2057 				mode_caching_page->wce = 1;
2058 			}
2059 			break;
2060 
2061 		case (1):
2062 			/* Changeable */
2063 			if ((sl->sl_flags &
2064 			    SL_WRITEBACK_CACHE_SET_UNSUPPORTED) == 0) {
2065 				mode_caching_page->wce = 1;
2066 			}
2067 			break;
2068 
2069 		default:
2070 			if ((sl->sl_flags &
2071 			    SL_SAVED_WRITE_CACHE_DISABLE) == 0) {
2072 				mode_caching_page->wce = 1;
2073 			}
2074 			break;
2075 		}
2076 		n += (sizeof (struct mode_page) +
2077 		    mode_caching_page->mode_page.length);
2078 	}
2079 	if ((page == MODEPAGE_CTRL_MODE) || (page == MODEPAGE_ALLPAGES)) {
2080 		struct mode_control_scsi3 *mode_control_page;
2081 
2082 		mode_control_page = (struct mode_control_scsi3 *)&p[n];
2083 
2084 		mode_control_page->mode_page.code = MODEPAGE_CTRL_MODE;
2085 		mode_control_page->mode_page.length =
2086 		    PAGELENGTH_MODE_CONTROL_SCSI3;
2087 		if (ctrl != 1) {
2088 			/* If not looking for changeable values, report this. */
2089 			mode_control_page->que_mod = CTRL_QMOD_UNRESTRICT;
2090 		}
2091 		n += (sizeof (struct mode_page) +
2092 		    mode_control_page->mode_page.length);
2093 	}
2094 
2095 	if (cdb[0] == SCMD_MODE_SENSE) {
2096 		if (n > 255) {
2097 			stmf_scsilib_send_status(task, STATUS_CHECK,
2098 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2099 			return;
2100 		}
2101 		/*
2102 		 * Mode parameter header length doesn't include the number
2103 		 * of bytes in the length field, so adjust the count.
2104 		 * Byte count minus header length field size.
2105 		 */
2106 		buf[0] = (n - header_size) & 0xff;
2107 	} else {
2108 		/* Byte count minus header length field size. */
2109 		buf[1] = (n - header_size) & 0xff;
2110 		buf[0] = ((n - header_size) >> 8) & 0xff;
2111 	}
2112 
2113 	sbd_handle_short_read_transfers(task, initial_dbuf, buf,
2114 	    cmd_size, n);
2115 }
2116 
2117 void
sbd_handle_mode_select(scsi_task_t * task,stmf_data_buf_t * dbuf)2118 sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf)
2119 {
2120 	uint32_t cmd_xfer_len;
2121 
2122 	if (task->task_cdb[0] == SCMD_MODE_SELECT) {
2123 		cmd_xfer_len = (uint32_t)task->task_cdb[4];
2124 	} else {
2125 		cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t);
2126 	}
2127 
2128 	if ((task->task_cdb[1] & 0xFE) != 0x10) {
2129 		stmf_scsilib_send_status(task, STATUS_CHECK,
2130 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2131 		return;
2132 	}
2133 
2134 	if (cmd_xfer_len == 0) {
2135 		/* zero byte mode selects are allowed */
2136 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2137 		return;
2138 	}
2139 
2140 	sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
2141 }
2142 
2143 void
sbd_handle_mode_select_xfer(scsi_task_t * task,uint8_t * buf,uint32_t buflen)2144 sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen)
2145 {
2146 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2147 	sbd_it_data_t *it;
2148 	int hdr_len, bd_len;
2149 	sbd_status_t sret;
2150 	int i;
2151 
2152 	if (task->task_cdb[0] == SCMD_MODE_SELECT) {
2153 		hdr_len = 4;
2154 	} else {
2155 		hdr_len = 8;
2156 	}
2157 
2158 	if (buflen < hdr_len)
2159 		goto mode_sel_param_len_err;
2160 
2161 	bd_len = hdr_len == 4 ? buf[3] : READ_SCSI16(&buf[6], int);
2162 
2163 	if (buflen < (hdr_len + bd_len + 2))
2164 		goto mode_sel_param_len_err;
2165 
2166 	buf += hdr_len + bd_len;
2167 	buflen -= hdr_len + bd_len;
2168 
2169 	if ((buf[0] != 8) || (buflen != ((uint32_t)buf[1] + 2))) {
2170 		goto mode_sel_param_len_err;
2171 	}
2172 
2173 	if (buf[2] & 0xFB) {
2174 		goto mode_sel_param_field_err;
2175 	}
2176 
2177 	for (i = 3; i < (buf[1] + 2); i++) {
2178 		if (buf[i]) {
2179 			goto mode_sel_param_field_err;
2180 		}
2181 	}
2182 
2183 	sret = SBD_SUCCESS;
2184 
2185 	/* All good. Lets handle the write cache change, if any */
2186 	if (buf[2] & BIT_2) {
2187 		sret = sbd_wcd_set(0, sl);
2188 	} else {
2189 		sret = sbd_wcd_set(1, sl);
2190 	}
2191 
2192 	if (sret != SBD_SUCCESS) {
2193 		stmf_scsilib_send_status(task, STATUS_CHECK,
2194 		    STMF_SAA_WRITE_ERROR);
2195 		return;
2196 	}
2197 
2198 	/* set on the device passed, now set the flags */
2199 	mutex_enter(&sl->sl_lock);
2200 	if (buf[2] & BIT_2) {
2201 		sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
2202 	} else {
2203 		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
2204 	}
2205 
2206 	for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
2207 		if (it == task->task_lu_itl_handle)
2208 			continue;
2209 		it->sbd_it_ua_conditions |= SBD_UA_MODE_PARAMETERS_CHANGED;
2210 	}
2211 
2212 	if (task->task_cdb[1] & 1) {
2213 		if (buf[2] & BIT_2) {
2214 			sl->sl_flags &= ~SL_SAVED_WRITE_CACHE_DISABLE;
2215 		} else {
2216 			sl->sl_flags |= SL_SAVED_WRITE_CACHE_DISABLE;
2217 		}
2218 		mutex_exit(&sl->sl_lock);
2219 		sret = sbd_write_lu_info(sl);
2220 	} else {
2221 		mutex_exit(&sl->sl_lock);
2222 	}
2223 	if (sret == SBD_SUCCESS) {
2224 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2225 	} else {
2226 		stmf_scsilib_send_status(task, STATUS_CHECK,
2227 		    STMF_SAA_WRITE_ERROR);
2228 	}
2229 	return;
2230 
2231 mode_sel_param_len_err:
2232 	stmf_scsilib_send_status(task, STATUS_CHECK,
2233 	    STMF_SAA_PARAM_LIST_LENGTH_ERROR);
2234 	return;
2235 mode_sel_param_field_err:
2236 	stmf_scsilib_send_status(task, STATUS_CHECK,
2237 	    STMF_SAA_INVALID_FIELD_IN_PARAM_LIST);
2238 }
2239 
2240 /*
2241  * Command support added from SPC-4 r24
2242  * Supports info type 0, 2, 127
2243  */
2244 void
sbd_handle_identifying_info(struct scsi_task * task,stmf_data_buf_t * initial_dbuf)2245 sbd_handle_identifying_info(struct scsi_task *task,
2246     stmf_data_buf_t *initial_dbuf)
2247 {
2248 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2249 	uint8_t *cdb;
2250 	uint32_t cmd_size;
2251 	uint32_t param_len;
2252 	uint32_t xfer_size;
2253 	uint8_t info_type;
2254 	uint8_t *buf, *p;
2255 
2256 	cdb = &task->task_cdb[0];
2257 	cmd_size = READ_SCSI32(&cdb[6], uint32_t);
2258 	info_type = cdb[10]>>1;
2259 
2260 	/* Validate the command */
2261 	if (cmd_size < 4) {
2262 		stmf_scsilib_send_status(task, STATUS_CHECK,
2263 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2264 		return;
2265 	}
2266 
2267 	p = buf = kmem_zalloc(260, KM_SLEEP);
2268 
2269 	switch (info_type) {
2270 		case 0:
2271 			/*
2272 			 * No value is supplied but this info type
2273 			 * is mandatory.
2274 			 */
2275 			xfer_size = 4;
2276 			break;
2277 		case 2:
2278 			mutex_enter(&sl->sl_lock);
2279 			param_len = strlcpy((char *)(p+4), sl->sl_alias, 256);
2280 			mutex_exit(&sl->sl_lock);
2281 			/* text info must be null terminated */
2282 			if (++param_len > 256)
2283 				param_len = 256;
2284 			SCSI_WRITE16(p+2, param_len);
2285 			xfer_size = param_len + 4;
2286 			break;
2287 		case 127:
2288 			/* 0 and 2 descriptor supported */
2289 			SCSI_WRITE16(p+2, 8); /* set param length */
2290 			p += 8;
2291 			*p = 4; /* set type to 2 (7 hi bits) */
2292 			p += 2;
2293 			SCSI_WRITE16(p, 256); /* 256 max length */
2294 			xfer_size = 12;
2295 			break;
2296 		default:
2297 			stmf_scsilib_send_status(task, STATUS_CHECK,
2298 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2299 			kmem_free(buf, 260);
2300 			return;
2301 	}
2302 	sbd_handle_short_read_transfers(task, initial_dbuf, buf,
2303 	    cmd_size, xfer_size);
2304 	kmem_free(buf, 260);
2305 }
2306 
2307 /*
2308  * This function parse through a string, passed to it as a pointer to a string,
2309  * by adjusting the pointer to the first non-space character and returns
2310  * the count/length of the first bunch of non-space characters. Multiple
2311  * Management URLs are stored as a space delimited string in sl_mgmt_url
2312  * field of sbd_lu_t. This function is used to retrieve one url at a time.
2313  *
2314  * i/p : pointer to pointer to a url string
2315  * o/p : Adjust the pointer to the url to the first non white character
2316  *       and returns the length of the URL
2317  */
2318 uint16_t
sbd_parse_mgmt_url(char ** url_addr)2319 sbd_parse_mgmt_url(char **url_addr)
2320 {
2321 	uint16_t url_length = 0;
2322 	char *url;
2323 	url = *url_addr;
2324 
2325 	while (*url != '\0') {
2326 		if (*url == ' ' || *url == '\t' || *url == '\n') {
2327 			(*url_addr)++;
2328 			url = *url_addr;
2329 		} else {
2330 			break;
2331 		}
2332 	}
2333 
2334 	while (*url != '\0') {
2335 		if (*url == ' ' || *url == '\t' ||
2336 		    *url == '\n' || *url == '\0') {
2337 			break;
2338 		}
2339 		url++;
2340 		url_length++;
2341 	}
2342 	return (url_length);
2343 }
2344 
2345 /* Try to make this the size of a kmem allocation cache. */
2346 static uint_t sbd_write_same_optimal_chunk = 128 * 1024;
2347 
2348 static sbd_status_t
sbd_write_same_data_common(sbd_lu_t * sl,struct scsi_task * task,uint64_t addr,uint8_t * buf,uint32_t bufsz,uint32_t xfer_size,uint64_t len)2349 sbd_write_same_data_common(sbd_lu_t *sl, struct scsi_task *task, uint64_t addr,
2350     uint8_t *buf, uint32_t bufsz, uint32_t xfer_size, uint64_t len)
2351 {
2352 	uint64_t sz_done;
2353 	sbd_status_t ret = SBD_SUCCESS;
2354 
2355 	/* Do the actual I/O.  Recycle xfer_size now to be write size. */
2356 	DTRACE_PROBE1(write__same__io__begin, uint64_t, len);
2357 	for (sz_done = 0; sz_done < len; sz_done += (uint64_t)xfer_size) {
2358 		xfer_size = ((bufsz + sz_done) <= len) ? bufsz :
2359 		    len - sz_done;
2360 		ret = sbd_data_write(sl, task, addr + sz_done,
2361 		    (uint64_t)xfer_size, buf);
2362 		if (ret != SBD_SUCCESS)
2363 			break;
2364 	}
2365 	DTRACE_PROBE2(write__same__io__end, uint64_t, len, uint64_t, sz_done);
2366 	return (ret);
2367 }
2368 
2369 static sbd_status_t
sbd_write_same_data(struct scsi_task * task,sbd_cmd_t * scmd)2370 sbd_write_same_data(struct scsi_task *task, sbd_cmd_t *scmd)
2371 {
2372 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2373 	uint64_t addr, len;
2374 	uint32_t big_buf_size, xfer_size, off;
2375 	uint8_t *big_buf;
2376 	sbd_status_t ret;
2377 
2378 	if (task->task_cdb[0] == SCMD_WRITE_SAME_G1) {
2379 		addr = READ_SCSI32(&task->task_cdb[2], uint64_t);
2380 		len = READ_SCSI16(&task->task_cdb[7], uint64_t);
2381 	} else {
2382 		addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
2383 		len = READ_SCSI32(&task->task_cdb[10], uint64_t);
2384 	}
2385 	addr <<= sl->sl_data_blocksize_shift;
2386 	len <<= sl->sl_data_blocksize_shift;
2387 
2388 	/*
2389 	 * Reminders:
2390 	 *    "len" is total size of what we wish to "write same".
2391 	 *
2392 	 *    xfer_size will be scmd->trans_data_len, which is the length
2393 	 *    of the pattern we wish to replicate over "len".  We replicate
2394 	 *    "xfer_size" of pattern over "len".
2395 	 *
2396 	 *    big_buf_size is set to an ideal actual-write size for an output
2397 	 *    operation.  It may be the same as "len".  If it's not, it should
2398 	 *    be an exact multiple of "xfer_size" so we don't get pattern
2399 	 *    breakage until the very end of "len".
2400 	 */
2401 	big_buf_size = len > sbd_write_same_optimal_chunk ?
2402 	    sbd_write_same_optimal_chunk : (uint32_t)len;
2403 	xfer_size = scmd->trans_data_len;
2404 
2405 	/*
2406 	 * All transfers should be an integral multiple of the sector size.
2407 	 */
2408 	ASSERT((big_buf_size % xfer_size) == 0);
2409 
2410 	/*
2411 	 * Don't sleep for the allocation, and don't make the system
2412 	 * reclaim memory.  Trade higher I/Os if in a low-memory situation.
2413 	 */
2414 	big_buf = kmem_alloc(big_buf_size, KM_NOSLEEP_LAZY);
2415 	if (big_buf == NULL) {
2416 		/*
2417 		 * Just send it in terms of of the transmitted data.  This
2418 		 * will be very slow.
2419 		 */
2420 		DTRACE_PROBE1(write__same__low__memory, uint64_t, big_buf_size);
2421 		ret = sbd_write_same_data_common(sl, task, addr,
2422 		    scmd->trans_data, scmd->trans_data_len, xfer_size, len);
2423 	} else {
2424 		/*
2425 		 * We already ASSERT()ed big_buf_size is an integral multiple
2426 		 * of xfer_size.
2427 		 */
2428 		for (off = 0; off < big_buf_size; off += xfer_size)
2429 			bcopy(scmd->trans_data, big_buf + off, xfer_size);
2430 
2431 		ret = sbd_write_same_data_common(sl, task, addr, big_buf,
2432 		    big_buf_size, xfer_size, len);
2433 
2434 		kmem_free(big_buf, big_buf_size);
2435 	}
2436 
2437 	return (ret);
2438 }
2439 
2440 static void
sbd_write_same_release_resources(struct scsi_task * task)2441 sbd_write_same_release_resources(struct scsi_task *task)
2442 {
2443 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
2444 
2445 	if (scmd->nbufs == 0XFF)
2446 		cmn_err(CE_WARN, "%s invalid buffer count %x",
2447 		    __func__, scmd->nbufs);
2448 	if ((scmd->trans_data_len != 0) && (scmd->trans_data != NULL))
2449 		kmem_free(scmd->trans_data, scmd->trans_data_len);
2450 	scmd->trans_data = NULL;
2451 	scmd->trans_data_len = 0;
2452 	scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
2453 }
2454 
2455 static void
sbd_handle_write_same_xfer_completion(struct scsi_task * task,sbd_cmd_t * scmd,struct stmf_data_buf * dbuf,uint8_t dbuf_reusable)2456 sbd_handle_write_same_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
2457     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
2458 {
2459 	uint64_t laddr;
2460 	uint32_t buflen, iolen;
2461 	int ndx, ret;
2462 
2463 	if (ATOMIC8_GET(scmd->nbufs) > 0) {
2464 		atomic_dec_8(&scmd->nbufs);
2465 	}
2466 
2467 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
2468 		sbd_write_same_release_resources(task);
2469 		sbd_ats_remove_by_task(task);
2470 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2471 		    dbuf->db_xfer_status, NULL);
2472 		return;
2473 	}
2474 
2475 	if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2476 		goto write_same_xfer_done;
2477 	}
2478 
2479 	/* if this is a unnessary callback just return */
2480 	if (((scmd->flags & SBD_SCSI_CMD_TRANS_DATA) == 0) ||
2481 	    ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
2482 	    (scmd->trans_data == NULL)) {
2483 		sbd_ats_remove_by_task(task);
2484 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
2485 		return;
2486 	}
2487 
2488 	if (ATOMIC32_GET(scmd->len) != 0) {
2489 		/*
2490 		 * Initiate the next port xfer to occur in parallel
2491 		 * with writing this buf.
2492 		 */
2493 		sbd_do_write_same_xfer(task, scmd, NULL, 0);
2494 	}
2495 
2496 	laddr = dbuf->db_relative_offset;
2497 
2498 	for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
2499 	    (ndx < dbuf->db_sglist_length); ndx++) {
2500 		iolen = min(dbuf->db_data_size - buflen,
2501 		    dbuf->db_sglist[ndx].seg_length);
2502 		if (iolen == 0)
2503 			break;
2504 		bcopy(dbuf->db_sglist[ndx].seg_addr, &scmd->trans_data[laddr],
2505 		    iolen);
2506 		buflen += iolen;
2507 		laddr += (uint64_t)iolen;
2508 	}
2509 	task->task_nbytes_transferred += buflen;
2510 
2511 write_same_xfer_done:
2512 	if (ATOMIC32_GET(scmd->len) == 0 ||
2513 	    scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2514 		stmf_free_dbuf(task, dbuf);
2515 		if (ATOMIC8_GET(scmd->nbufs) > 0)
2516 			return;
2517 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
2518 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2519 			sbd_ats_remove_by_task(task);
2520 			sbd_write_same_release_resources(task);
2521 			stmf_scsilib_send_status(task, STATUS_CHECK,
2522 			    STMF_SAA_WRITE_ERROR);
2523 		} else {
2524 			ret = sbd_write_same_data(task, scmd);
2525 			sbd_ats_remove_by_task(task);
2526 			sbd_write_same_release_resources(task);
2527 			if (ret != SBD_SUCCESS) {
2528 				stmf_scsilib_send_status(task, STATUS_CHECK,
2529 				    STMF_SAA_WRITE_ERROR);
2530 			} else {
2531 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2532 			}
2533 		}
2534 		return;
2535 	}
2536 	sbd_do_write_same_xfer(task, scmd, dbuf, dbuf_reusable);
2537 }
2538 
2539 static void
sbd_do_write_same_xfer(struct scsi_task * task,sbd_cmd_t * scmd,struct stmf_data_buf * dbuf,uint8_t dbuf_reusable)2540 sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
2541     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
2542 {
2543 	uint32_t len;
2544 
2545 	if (ATOMIC32_GET(scmd->len) == 0) {
2546 		if (dbuf != NULL)
2547 			stmf_free_dbuf(task, dbuf);
2548 		return;
2549 	}
2550 
2551 	if ((dbuf != NULL) &&
2552 	    ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
2553 		/* free current dbuf and allocate a new one */
2554 		stmf_free_dbuf(task, dbuf);
2555 		dbuf = NULL;
2556 	}
2557 	if (dbuf == NULL) {
2558 		uint32_t maxsize, minsize, old_minsize;
2559 
2560 		maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128*1024 :
2561 		    ATOMIC32_GET(scmd->len);
2562 		minsize = maxsize >> 2;
2563 		do {
2564 			old_minsize = minsize;
2565 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
2566 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
2567 		    (minsize >= 512));
2568 		if (dbuf == NULL) {
2569 			sbd_ats_remove_by_task(task);
2570 			sbd_write_same_release_resources(task);
2571 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
2572 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2573 				    STMF_ALLOC_FAILURE, NULL);
2574 			}
2575 			return;
2576 		}
2577 	}
2578 
2579 	len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ? dbuf->db_buf_size :
2580 	    ATOMIC32_GET(scmd->len);
2581 
2582 	dbuf->db_relative_offset = scmd->current_ro;
2583 	dbuf->db_data_size = len;
2584 	dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
2585 	(void) stmf_xfer_data(task, dbuf, 0);
2586 	/* outstanding port xfers and bufs used */
2587 	atomic_inc_8(&scmd->nbufs);
2588 	atomic_add_32(&scmd->len, -len);
2589 	scmd->current_ro += len;
2590 }
2591 
2592 static void
sbd_handle_write_same(scsi_task_t * task,struct stmf_data_buf * initial_dbuf)2593 sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf)
2594 {
2595 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2596 	uint64_t addr, len;
2597 	sbd_cmd_t *scmd;
2598 	stmf_data_buf_t *dbuf;
2599 	uint8_t unmap;
2600 	uint8_t do_immediate_data = 0;
2601 
2602 	if (HardwareAcceleratedInit == 0) {
2603 		stmf_scsilib_send_status(task, STATUS_CHECK,
2604 		    STMF_SAA_INVALID_OPCODE);
2605 		return;
2606 	}
2607 
2608 	task->task_cmd_xfer_length = 0;
2609 	if (task->task_additional_flags &
2610 	    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2611 		task->task_expected_xfer_length = 0;
2612 	}
2613 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
2614 		stmf_scsilib_send_status(task, STATUS_CHECK,
2615 		    STMF_SAA_WRITE_PROTECTED);
2616 		return;
2617 	}
2618 	if (task->task_cdb[1] & 0xF7) {
2619 		stmf_scsilib_send_status(task, STATUS_CHECK,
2620 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2621 		return;
2622 	}
2623 	unmap = task->task_cdb[1] & 0x08;
2624 
2625 	if (unmap && ((sl->sl_flags & SL_UNMAP_ENABLED) == 0)) {
2626 		stmf_scsilib_send_status(task, STATUS_CHECK,
2627 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2628 		return;
2629 	}
2630 
2631 	if (task->task_cdb[0] == SCMD_WRITE_SAME_G1) {
2632 		addr = READ_SCSI32(&task->task_cdb[2], uint64_t);
2633 		len = READ_SCSI16(&task->task_cdb[7], uint64_t);
2634 	} else {
2635 		addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
2636 		len = READ_SCSI32(&task->task_cdb[10], uint64_t);
2637 	}
2638 
2639 	if (len == 0) {
2640 		stmf_scsilib_send_status(task, STATUS_CHECK,
2641 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2642 		return;
2643 	}
2644 
2645 	if (sbd_ats_handling_before_io(task, sl, addr, len) !=
2646 	    SBD_SUCCESS) {
2647 		if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS)
2648 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
2649 		return;
2650 	}
2651 
2652 	addr <<= sl->sl_data_blocksize_shift;
2653 	len <<= sl->sl_data_blocksize_shift;
2654 
2655 	/* Check if the command is for the unmap function */
2656 	if (unmap) {
2657 		dkioc_free_list_t *dfl = kmem_zalloc(DFL_SZ(1), KM_SLEEP);
2658 
2659 		dfl->dfl_num_exts = 1;
2660 		dfl->dfl_exts[0].dfle_start = addr;
2661 		dfl->dfl_exts[0].dfle_length = len;
2662 		if (sbd_unmap(sl, dfl) != 0) {
2663 			stmf_scsilib_send_status(task, STATUS_CHECK,
2664 			    STMF_SAA_LBA_OUT_OF_RANGE);
2665 		} else {
2666 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2667 		}
2668 		dfl_free(dfl);
2669 		return;
2670 	}
2671 
2672 	/* Write same function */
2673 
2674 	task->task_cmd_xfer_length = 1 << sl->sl_data_blocksize_shift;
2675 	if (task->task_additional_flags &
2676 	    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2677 		task->task_expected_xfer_length = task->task_cmd_xfer_length;
2678 	}
2679 	if ((addr + len) > sl->sl_lu_size) {
2680 		sbd_ats_remove_by_task(task);
2681 		stmf_scsilib_send_status(task, STATUS_CHECK,
2682 		    STMF_SAA_LBA_OUT_OF_RANGE);
2683 		return;
2684 	}
2685 
2686 	/* For rest of this I/O the transfer length is 1 block */
2687 	len = ((uint64_t)1) << sl->sl_data_blocksize_shift;
2688 
2689 	/* Some basic checks */
2690 	if ((len == 0) || (len != task->task_expected_xfer_length)) {
2691 		sbd_ats_remove_by_task(task);
2692 		stmf_scsilib_send_status(task, STATUS_CHECK,
2693 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2694 		return;
2695 	}
2696 
2697 
2698 	if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
2699 		if (initial_dbuf->db_data_size > len) {
2700 			if (initial_dbuf->db_data_size >
2701 			    task->task_expected_xfer_length) {
2702 				/* protocol error */
2703 				sbd_ats_remove_by_task(task);
2704 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2705 				    STMF_INVALID_ARG, NULL);
2706 				return;
2707 			}
2708 			initial_dbuf->db_data_size = (uint32_t)len;
2709 		}
2710 		do_immediate_data = 1;
2711 	}
2712 	dbuf = initial_dbuf;
2713 
2714 	if (task->task_lu_private) {
2715 		scmd = (sbd_cmd_t *)task->task_lu_private;
2716 	} else {
2717 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
2718 		task->task_lu_private = scmd;
2719 	}
2720 	scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_TRANS_DATA |
2721 	    SBD_SCSI_CMD_ATS_RELATED;
2722 	scmd->cmd_type = SBD_CMD_SCSI_WRITE;
2723 	scmd->nbufs = 0;
2724 	scmd->len = (uint32_t)len;
2725 	scmd->trans_data_len = (uint32_t)len;
2726 	scmd->trans_data = kmem_alloc((size_t)len, KM_SLEEP);
2727 	scmd->current_ro = 0;
2728 
2729 	if (do_immediate_data) {
2730 		/*
2731 		 * Account for data passed in this write command
2732 		 */
2733 		(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
2734 		atomic_add_32(&scmd->len, -dbuf->db_data_size);
2735 		scmd->current_ro += dbuf->db_data_size;
2736 		dbuf->db_xfer_status = STMF_SUCCESS;
2737 		sbd_handle_write_same_xfer_completion(task, scmd, dbuf, 0);
2738 	} else {
2739 		sbd_do_write_same_xfer(task, scmd, dbuf, 0);
2740 	}
2741 }
2742 
2743 static void
sbd_handle_unmap(scsi_task_t * task,stmf_data_buf_t * dbuf)2744 sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf)
2745 {
2746 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2747 	uint32_t cmd_xfer_len;
2748 
2749 	if (sbd_unmap_enable == 0) {
2750 		stmf_scsilib_send_status(task, STATUS_CHECK,
2751 		    STMF_SAA_INVALID_OPCODE);
2752 		return;
2753 	}
2754 
2755 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
2756 		stmf_scsilib_send_status(task, STATUS_CHECK,
2757 		    STMF_SAA_WRITE_PROTECTED);
2758 		return;
2759 	}
2760 	cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t);
2761 
2762 	if (task->task_cdb[1] & 1) {
2763 		stmf_scsilib_send_status(task, STATUS_CHECK,
2764 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2765 		return;
2766 	}
2767 
2768 	if (cmd_xfer_len == 0) {
2769 		task->task_cmd_xfer_length = 0;
2770 		if (task->task_additional_flags &
2771 		    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2772 			task->task_expected_xfer_length = 0;
2773 		}
2774 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2775 		return;
2776 	}
2777 
2778 	sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
2779 }
2780 
2781 static void
sbd_handle_unmap_xfer(scsi_task_t * task,uint8_t * buf,uint32_t buflen)2782 sbd_handle_unmap_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen)
2783 {
2784 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2785 	uint32_t ulen, dlen, num_desc;
2786 	uint64_t addr, len;
2787 	uint8_t *p;
2788 	dkioc_free_list_t *dfl;
2789 	int ret;
2790 	int i;
2791 
2792 	if (buflen < 24) {
2793 		stmf_scsilib_send_status(task, STATUS_CHECK,
2794 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2795 		return;
2796 	}
2797 	ulen = READ_SCSI16(buf, uint32_t);
2798 	dlen = READ_SCSI16(buf + 2, uint32_t);
2799 	num_desc = dlen >> 4;
2800 	if (((ulen + 2) != buflen) || ((dlen + 8) != buflen) || (dlen & 0xf) ||
2801 	    (num_desc == 0)) {
2802 		stmf_scsilib_send_status(task, STATUS_CHECK,
2803 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2804 		return;
2805 	}
2806 
2807 	dfl = kmem_zalloc(DFL_SZ(num_desc), KM_SLEEP);
2808 	dfl->dfl_num_exts = num_desc;
2809 	/*
2810 	 * This should use ATS locking but that was disabled by the
2811 	 * changes to ZFS top take advantage of TRIM in SSDs.
2812 	 *
2813 	 * Since the entire list is passed to ZFS in one list ATS
2814 	 * locking is not done.  This may be detectable, and if it is
2815 	 * then the entire list needs to be locked and then after the
2816 	 * unmap completes the entire list must be unlocked
2817 	 */
2818 	for (p = buf + 8, i = 0; num_desc; num_desc--, p += 16, i++) {
2819 		addr = READ_SCSI64(p, uint64_t);
2820 		len = READ_SCSI32(p+8, uint64_t);
2821 		addr <<= sl->sl_data_blocksize_shift;
2822 		len <<= sl->sl_data_blocksize_shift;
2823 
2824 		/* Prepare a list of extents to unmap */
2825 		dfl->dfl_exts[i].dfle_start = addr;
2826 		dfl->dfl_exts[i].dfle_length = len;
2827 
2828 		/* release the overlap */
2829 	}
2830 	ASSERT(i == dfl->dfl_num_exts);
2831 
2832 	/* Finally execute the unmap operations in a single step */
2833 	ret = sbd_unmap(sl, dfl);
2834 	dfl_free(dfl);
2835 	if (ret != 0) {
2836 		stmf_scsilib_send_status(task, STATUS_CHECK,
2837 		    STMF_SAA_LBA_OUT_OF_RANGE);
2838 		return;
2839 	}
2840 
2841 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2842 }
2843 
2844 void
sbd_handle_inquiry(struct scsi_task * task,struct stmf_data_buf * initial_dbuf)2845 sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
2846 {
2847 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2848 	uint8_t *cdbp = (uint8_t *)&task->task_cdb[0];
2849 	uint8_t *p;
2850 	uint8_t byte0;
2851 	uint8_t page_length;
2852 	uint16_t bsize = 512;
2853 	uint16_t cmd_size;
2854 	uint32_t xfer_size = 4;
2855 	uint32_t mgmt_url_size = 0;
2856 	uint8_t exp;
2857 	uint64_t s;
2858 	char *mgmt_url = NULL;
2859 
2860 
2861 	byte0 = DTYPE_DIRECT;
2862 	/*
2863 	 * Basic protocol checks.
2864 	 */
2865 
2866 	if ((((cdbp[1] & 1) == 0) && cdbp[2]) || cdbp[5]) {
2867 		stmf_scsilib_send_status(task, STATUS_CHECK,
2868 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2869 		return;
2870 	}
2871 
2872 	/*
2873 	 * Zero byte allocation length is not an error.  Just
2874 	 * return success.
2875 	 */
2876 
2877 	cmd_size = (((uint16_t)cdbp[3]) << 8) | cdbp[4];
2878 
2879 	if (cmd_size == 0) {
2880 		task->task_cmd_xfer_length = 0;
2881 		if (task->task_additional_flags &
2882 		    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2883 			task->task_expected_xfer_length = 0;
2884 		}
2885 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2886 		return;
2887 	}
2888 
2889 	/*
2890 	 * Standard inquiry
2891 	 */
2892 
2893 	if ((cdbp[1] & 1) == 0) {
2894 		int	i;
2895 		struct scsi_inquiry *inq;
2896 
2897 		p = (uint8_t *)kmem_zalloc(bsize, KM_SLEEP);
2898 		inq = (struct scsi_inquiry *)p;
2899 
2900 		page_length = 69;
2901 		xfer_size = page_length + 5;
2902 
2903 		inq->inq_dtype = DTYPE_DIRECT;
2904 		inq->inq_ansi = 5;	/* SPC-3 */
2905 		inq->inq_hisup = 1;
2906 		inq->inq_rdf = 2;	/* Response data format for SPC-3 */
2907 		inq->inq_len = page_length;
2908 
2909 		inq->inq_tpgs = TPGS_FAILOVER_IMPLICIT;
2910 		inq->inq_cmdque = 1;
2911 		inq->inq_3pc = 1;
2912 
2913 		if (sl->sl_flags & SL_VID_VALID) {
2914 			bcopy(sl->sl_vendor_id, inq->inq_vid, 8);
2915 		} else {
2916 			bcopy(sbd_vendor_id, inq->inq_vid, 8);
2917 		}
2918 
2919 		if (sl->sl_flags & SL_PID_VALID) {
2920 			bcopy(sl->sl_product_id, inq->inq_pid, 16);
2921 		} else {
2922 			bcopy(sbd_product_id, inq->inq_pid, 16);
2923 		}
2924 
2925 		if (sl->sl_flags & SL_REV_VALID) {
2926 			bcopy(sl->sl_revision, inq->inq_revision, 4);
2927 		} else {
2928 			bcopy(sbd_revision, inq->inq_revision, 4);
2929 		}
2930 
2931 		/* Adding Version Descriptors */
2932 		i = 0;
2933 		/* SAM-3 no version */
2934 		inq->inq_vd[i].inq_vd_msb = 0x00;
2935 		inq->inq_vd[i].inq_vd_lsb = 0x60;
2936 		i++;
2937 
2938 		/* transport */
2939 		switch (task->task_lport->lport_id->protocol_id) {
2940 		case PROTOCOL_FIBRE_CHANNEL:
2941 			inq->inq_vd[i].inq_vd_msb = 0x09;
2942 			inq->inq_vd[i].inq_vd_lsb = 0x00;
2943 			i++;
2944 			break;
2945 
2946 		case PROTOCOL_PARALLEL_SCSI:
2947 		case PROTOCOL_SSA:
2948 		case PROTOCOL_IEEE_1394:
2949 			/* Currently no claims of conformance */
2950 			break;
2951 
2952 		case PROTOCOL_SRP:
2953 			inq->inq_vd[i].inq_vd_msb = 0x09;
2954 			inq->inq_vd[i].inq_vd_lsb = 0x40;
2955 			i++;
2956 			break;
2957 
2958 		case PROTOCOL_iSCSI:
2959 			inq->inq_vd[i].inq_vd_msb = 0x09;
2960 			inq->inq_vd[i].inq_vd_lsb = 0x60;
2961 			i++;
2962 			break;
2963 
2964 		case PROTOCOL_SAS:
2965 		case PROTOCOL_ADT:
2966 		case PROTOCOL_ATAPI:
2967 		default:
2968 			/* Currently no claims of conformance */
2969 			break;
2970 		}
2971 
2972 		/* SPC-3 no version */
2973 		inq->inq_vd[i].inq_vd_msb = 0x03;
2974 		inq->inq_vd[i].inq_vd_lsb = 0x00;
2975 		i++;
2976 
2977 		/* SBC-2 no version */
2978 		inq->inq_vd[i].inq_vd_msb = 0x03;
2979 		inq->inq_vd[i].inq_vd_lsb = 0x20;
2980 
2981 		sbd_handle_short_read_transfers(task, initial_dbuf, p, cmd_size,
2982 		    min(cmd_size, xfer_size));
2983 		kmem_free(p, bsize);
2984 
2985 		return;
2986 	}
2987 
2988 	rw_enter(&sbd_global_prop_lock, RW_READER);
2989 	if (sl->sl_mgmt_url) {
2990 		mgmt_url_size = strlen(sl->sl_mgmt_url);
2991 		mgmt_url = sl->sl_mgmt_url;
2992 	} else if (sbd_mgmt_url) {
2993 		mgmt_url_size = strlen(sbd_mgmt_url);
2994 		mgmt_url = sbd_mgmt_url;
2995 	}
2996 
2997 	/*
2998 	 * EVPD handling
2999 	 */
3000 
3001 	/* Default 512 bytes may not be enough, increase bsize if necessary */
3002 	if (cdbp[2] == 0x83 || cdbp[2] == 0x85) {
3003 		if (bsize <  cmd_size)
3004 			bsize = cmd_size;
3005 	}
3006 	p = (uint8_t *)kmem_zalloc(bsize, KM_SLEEP);
3007 
3008 	switch (cdbp[2]) {
3009 	case 0x00:
3010 		page_length = 5 + (mgmt_url_size ? 1 : 0);
3011 
3012 		if (sl->sl_flags & SL_UNMAP_ENABLED)
3013 			page_length += 1;
3014 
3015 		p[0] = byte0;
3016 		p[3] = page_length;
3017 		/* Supported VPD pages in ascending order */
3018 		/* CSTYLED */
3019 		{
3020 			uint8_t i = 5;
3021 
3022 			p[i++] = 0x80;
3023 			p[i++] = 0x83;
3024 			if (mgmt_url_size != 0)
3025 				p[i++] = 0x85;
3026 			p[i++] = 0x86;
3027 			p[i++] = 0xb0;
3028 			if (sl->sl_flags & SL_UNMAP_ENABLED) {
3029 				p[i++] = 0xb2;
3030 			}
3031 		}
3032 		xfer_size = page_length + 4;
3033 		break;
3034 
3035 	case 0x80:
3036 		if (sl->sl_serial_no_size) {
3037 			page_length = sl->sl_serial_no_size;
3038 			bcopy(sl->sl_serial_no, p + 4, sl->sl_serial_no_size);
3039 		} else {
3040 			/* if no serial num is specified set 4 spaces */
3041 			page_length = 4;
3042 			bcopy("    ", p + 4, 4);
3043 		}
3044 		p[0] = byte0;
3045 		p[1] = 0x80;
3046 		p[3] = page_length;
3047 		xfer_size = page_length + 4;
3048 		break;
3049 
3050 	case 0x83:
3051 		xfer_size = stmf_scsilib_prepare_vpd_page83(task, p,
3052 		    bsize, byte0, STMF_VPD_LU_ID|STMF_VPD_TARGET_ID|
3053 		    STMF_VPD_TP_GROUP|STMF_VPD_RELATIVE_TP_ID);
3054 		break;
3055 
3056 	case 0x85:
3057 		if (mgmt_url_size == 0) {
3058 			stmf_scsilib_send_status(task, STATUS_CHECK,
3059 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3060 			goto err_done;
3061 		} /* CSTYLED */
3062 		{
3063 			uint16_t idx, newidx, sz, url_size;
3064 			char *url;
3065 
3066 			p[0] = byte0;
3067 			p[1] = 0x85;
3068 
3069 			idx = 4;
3070 			url = mgmt_url;
3071 			url_size = sbd_parse_mgmt_url(&url);
3072 			/* Creating Network Service Descriptors */
3073 			while (url_size != 0) {
3074 				/* Null terminated and 4 Byte aligned */
3075 				sz = url_size + 1;
3076 				sz += (sz % 4) ? 4 - (sz % 4) : 0;
3077 				newidx = idx + sz + 4;
3078 
3079 				if (newidx < bsize) {
3080 					/*
3081 					 * SPC-3r23 : Table 320  (Sec 7.6.5)
3082 					 * (Network service descriptor format
3083 					 *
3084 					 * Note: Hard coding service type as
3085 					 * "Storage Configuration Service".
3086 					 */
3087 					p[idx] = 1;
3088 					SCSI_WRITE16(p + idx + 2, sz);
3089 					bcopy(url, p + idx + 4, url_size);
3090 					xfer_size = newidx + 4;
3091 				}
3092 				idx = newidx;
3093 
3094 				/* skip to next mgmt url if any */
3095 				url += url_size;
3096 				url_size = sbd_parse_mgmt_url(&url);
3097 			}
3098 
3099 			/* Total descriptor length */
3100 			SCSI_WRITE16(p + 2, idx - 4);
3101 			break;
3102 		}
3103 
3104 	case 0x86:
3105 		page_length = 0x3c;
3106 
3107 		p[0] = byte0;
3108 		p[1] = 0x86;		/* Page 86 response */
3109 		p[3] = page_length;
3110 
3111 		/*
3112 		 * Bits 0, 1, and 2 will need to be updated
3113 		 * to reflect the queue tag handling if/when
3114 		 * that is implemented.  For now, we're going
3115 		 * to claim support only for Simple TA.
3116 		 */
3117 		p[5] = 1;
3118 		xfer_size = page_length + 4;
3119 		break;
3120 
3121 	case 0xb0:
3122 		page_length = 0x3c;
3123 		p[0] = byte0;
3124 		p[1] = 0xb0;
3125 		p[3] = page_length;
3126 		p[4] = 1;
3127 		p[5] = sbd_ats_max_nblks();
3128 		if (sl->sl_flags & SL_UNMAP_ENABLED && sbd_unmap_enable) {
3129 			p[20] = (stmf_sbd_unmap_max_nblks >> 24) & 0xff;
3130 			p[21] = (stmf_sbd_unmap_max_nblks >> 16) & 0xff;
3131 			p[22] = (stmf_sbd_unmap_max_nblks >> 8) & 0xff;
3132 			p[23] = stmf_sbd_unmap_max_nblks & 0xff;
3133 
3134 			p[24] = 0;
3135 			p[25] = 0;
3136 			p[26] = 0;
3137 			p[27] = 0xFF;
3138 		}
3139 		xfer_size = page_length + 4;
3140 		break;
3141 
3142 	case 0xb2:
3143 		if ((sl->sl_flags & SL_UNMAP_ENABLED) == 0) {
3144 			stmf_scsilib_send_status(task, STATUS_CHECK,
3145 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3146 			goto err_done;
3147 		}
3148 		page_length = 4;
3149 		p[0] = byte0;
3150 		p[1] = 0xb2;
3151 		p[3] = page_length;
3152 
3153 		exp = (uint8_t)sl->sl_data_blocksize_shift;
3154 		s = sl->sl_lu_size >> sl->sl_data_blocksize_shift;
3155 		while (s & ((uint64_t)0xFFFFFFFF80000000ull)) {
3156 			s >>= 1;
3157 			exp++;
3158 		}
3159 		p[4] = exp;
3160 		p[5] = 0xc0;	/* Logical provisioning UNMAP and WRITE SAME */
3161 		xfer_size = page_length + 4;
3162 		break;
3163 
3164 	default:
3165 		stmf_scsilib_send_status(task, STATUS_CHECK,
3166 		    STMF_SAA_INVALID_FIELD_IN_CDB);
3167 		goto err_done;
3168 	}
3169 
3170 	sbd_handle_short_read_transfers(task, initial_dbuf, p, cmd_size,
3171 	    min(cmd_size, xfer_size));
3172 err_done:
3173 	kmem_free(p, bsize);
3174 	rw_exit(&sbd_global_prop_lock);
3175 }
3176 
3177 stmf_status_t
sbd_task_alloc(struct scsi_task * task)3178 sbd_task_alloc(struct scsi_task *task)
3179 {
3180 	if ((task->task_lu_private =
3181 	    kmem_zalloc(sizeof (sbd_cmd_t), KM_NOSLEEP)) != NULL) {
3182 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3183 		scmd->flags = 0;
3184 		return (STMF_SUCCESS);
3185 	}
3186 	return (STMF_ALLOC_FAILURE);
3187 }
3188 
3189 void
sbd_remove_it_handle(sbd_lu_t * sl,sbd_it_data_t * it)3190 sbd_remove_it_handle(sbd_lu_t *sl, sbd_it_data_t *it)
3191 {
3192 	sbd_it_data_t **ppit;
3193 
3194 	sbd_pgr_remove_it_handle(sl, it);
3195 	mutex_enter(&sl->sl_lock);
3196 	for (ppit = &sl->sl_it_list; *ppit != NULL;
3197 	    ppit = &((*ppit)->sbd_it_next)) {
3198 		if ((*ppit) == it) {
3199 			*ppit = it->sbd_it_next;
3200 			break;
3201 		}
3202 	}
3203 	mutex_exit(&sl->sl_lock);
3204 
3205 	DTRACE_PROBE2(itl__nexus__end, stmf_lu_t *, sl->sl_lu,
3206 	    sbd_it_data_t *, it);
3207 
3208 	kmem_free(it, sizeof (*it));
3209 }
3210 
3211 void
sbd_check_and_clear_scsi2_reservation(sbd_lu_t * sl,sbd_it_data_t * it)3212 sbd_check_and_clear_scsi2_reservation(sbd_lu_t *sl, sbd_it_data_t *it)
3213 {
3214 	mutex_enter(&sl->sl_lock);
3215 	if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) == 0) {
3216 		/* If we dont have any reservations, just get out. */
3217 		mutex_exit(&sl->sl_lock);
3218 		return;
3219 	}
3220 
3221 	if (it == NULL) {
3222 		/* Find the I_T nexus which is holding the reservation. */
3223 		for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
3224 			if (it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) {
3225 				ASSERT(it->sbd_it_session_id ==
3226 				    sl->sl_rs_owner_session_id);
3227 				break;
3228 			}
3229 		}
3230 		ASSERT(it != NULL);
3231 	} else {
3232 		/*
3233 		 * We were passed an I_T nexus. If this nexus does not hold
3234 		 * the reservation, do nothing. This is why this function is
3235 		 * called "check_and_clear".
3236 		 */
3237 		if ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0) {
3238 			mutex_exit(&sl->sl_lock);
3239 			return;
3240 		}
3241 	}
3242 	it->sbd_it_flags &= ~SBD_IT_HAS_SCSI2_RESERVATION;
3243 	sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
3244 	mutex_exit(&sl->sl_lock);
3245 }
3246 
3247 /*
3248  * Given a LU and a task, check if the task is causing reservation
3249  * conflict. Returns 1 in case of conflict, 0 otherwise.
3250  * Note that the LU might not be the same LU as in the task but the
3251  * caller makes sure that the LU can be accessed.
3252  */
3253 int
sbd_check_reservation_conflict(struct sbd_lu * sl,struct scsi_task * task)3254 sbd_check_reservation_conflict(struct sbd_lu *sl, struct scsi_task *task)
3255 {
3256 	sbd_it_data_t *it;
3257 
3258 	it = task->task_lu_itl_handle;
3259 	ASSERT(it);
3260 	if (sl->sl_access_state == SBD_LU_ACTIVE) {
3261 		if (SBD_PGR_RSVD(sl->sl_pgr)) {
3262 			if (sbd_pgr_reservation_conflict(task, sl)) {
3263 				return (1);
3264 			}
3265 		} else if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) &&
3266 		    ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0)) {
3267 			if (!(SCSI2_CONFLICT_FREE_CMDS(task->task_cdb))) {
3268 				return (1);
3269 			}
3270 		}
3271 	}
3272 
3273 	return (0);
3274 }
3275 
3276 /*
3277  * Keep in mind that sbd_new_task can be called multiple times for the same
3278  * task because of us calling stmf_task_poll_lu resulting in a call to
3279  * sbd_task_poll().
3280  */
3281 void
sbd_new_task(struct scsi_task * task,struct stmf_data_buf * initial_dbuf)3282 sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
3283 {
3284 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3285 	sbd_it_data_t *it;
3286 	uint8_t cdb0, cdb1;
3287 	stmf_status_t st_ret;
3288 
3289 	if ((it = task->task_lu_itl_handle) == NULL) {
3290 		mutex_enter(&sl->sl_lock);
3291 		for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
3292 			if (it->sbd_it_session_id ==
3293 			    task->task_session->ss_session_id) {
3294 				mutex_exit(&sl->sl_lock);
3295 				stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3296 				return;
3297 			}
3298 		}
3299 		it = (sbd_it_data_t *)kmem_zalloc(sizeof (*it), KM_NOSLEEP);
3300 		if (it == NULL) {
3301 			mutex_exit(&sl->sl_lock);
3302 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3303 			return;
3304 		}
3305 		it->sbd_it_session_id = task->task_session->ss_session_id;
3306 		bcopy(task->task_lun_no, it->sbd_it_lun, 8);
3307 		it->sbd_it_next = sl->sl_it_list;
3308 		sl->sl_it_list = it;
3309 		mutex_exit(&sl->sl_lock);
3310 
3311 		DTRACE_PROBE1(itl__nexus__start, scsi_task *, task);
3312 
3313 		sbd_pgr_initialize_it(task, it);
3314 		if (stmf_register_itl_handle(task->task_lu, task->task_lun_no,
3315 		    task->task_session, it->sbd_it_session_id, it)
3316 		    != STMF_SUCCESS) {
3317 			sbd_remove_it_handle(sl, it);
3318 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3319 			return;
3320 		}
3321 		task->task_lu_itl_handle = it;
3322 		if (sl->sl_access_state != SBD_LU_STANDBY) {
3323 			it->sbd_it_ua_conditions = SBD_UA_POR;
3324 		}
3325 	} else if (it->sbd_it_flags & SBD_IT_PGR_CHECK_FLAG) {
3326 		mutex_enter(&sl->sl_lock);
3327 		it->sbd_it_flags &= ~SBD_IT_PGR_CHECK_FLAG;
3328 		mutex_exit(&sl->sl_lock);
3329 		sbd_pgr_initialize_it(task, it);
3330 	}
3331 
3332 	if (task->task_mgmt_function) {
3333 		stmf_scsilib_handle_task_mgmt(task);
3334 		return;
3335 	}
3336 
3337 	/*
3338 	 * if we're transitioning between access
3339 	 * states, return NOT READY
3340 	 */
3341 	if (sl->sl_access_state == SBD_LU_TRANSITION_TO_STANDBY ||
3342 	    sl->sl_access_state == SBD_LU_TRANSITION_TO_ACTIVE) {
3343 		stmf_scsilib_send_status(task, STATUS_CHECK,
3344 		    STMF_SAA_LU_NO_ACCESS_TRANSITION);
3345 		return;
3346 	}
3347 
3348 	cdb0 = task->task_cdb[0];
3349 	cdb1 = task->task_cdb[1];
3350 	/*
3351 	 * Special case for different versions of Windows.
3352 	 * 1) Windows 2012 and VMWare will fail to discover LU's if a READ
3353 	 *    operation sent down the standby path returns an error. By default
3354 	 *    standby_fail_reads will be set to 0.
3355 	 * 2) Windows 2008 R2 has a severe performace problem if READ ops
3356 	 *    aren't rejected on the standby path. 2008 sends commands
3357 	 *    down the standby path which then must be proxied over to the
3358 	 *    active node and back.
3359 	 */
3360 	if ((sl->sl_access_state == SBD_LU_STANDBY) &&
3361 	    stmf_standby_fail_reads &&
3362 	    (cdb0 == SCMD_READ || cdb0 == SCMD_READ_G1 ||
3363 	    cdb0 == SCMD_READ_G4 || cdb0 == SCMD_READ_G5)) {
3364 		stmf_scsilib_send_status(task, STATUS_CHECK,
3365 		    STMF_SAA_LU_NO_ACCESS_STANDBY);
3366 		return;
3367 	}
3368 
3369 	/*
3370 	 * Don't go further if cmd is unsupported in standby mode
3371 	 */
3372 	if (sl->sl_access_state == SBD_LU_STANDBY) {
3373 		if (cdb0 != SCMD_INQUIRY &&
3374 		    cdb0 != SCMD_MODE_SENSE &&
3375 		    cdb0 != SCMD_MODE_SENSE_G1 &&
3376 		    cdb0 != SCMD_MODE_SELECT &&
3377 		    cdb0 != SCMD_MODE_SELECT_G1 &&
3378 		    cdb0 != SCMD_RESERVE &&
3379 		    cdb0 != SCMD_RELEASE &&
3380 		    cdb0 != SCMD_PERSISTENT_RESERVE_OUT &&
3381 		    cdb0 != SCMD_PERSISTENT_RESERVE_IN &&
3382 		    cdb0 != SCMD_REQUEST_SENSE &&
3383 		    cdb0 != SCMD_READ_CAPACITY &&
3384 		    cdb0 != SCMD_TEST_UNIT_READY &&
3385 		    cdb0 != SCMD_START_STOP &&
3386 		    cdb0 != SCMD_READ &&
3387 		    cdb0 != SCMD_READ_G1 &&
3388 		    cdb0 != SCMD_READ_G4 &&
3389 		    cdb0 != SCMD_READ_G5 &&
3390 		    !(cdb0 == SCMD_SVC_ACTION_IN_G4 &&
3391 		    cdb1 == SSVC_ACTION_READ_CAPACITY_G4) &&
3392 		    !(cdb0 == SCMD_MAINTENANCE_IN &&
3393 		    (cdb1 & 0x1F) == 0x05) &&
3394 		    !(cdb0 == SCMD_MAINTENANCE_IN &&
3395 		    (cdb1 & 0x1F) == 0x0A)) {
3396 			stmf_scsilib_send_status(task, STATUS_CHECK,
3397 			    STMF_SAA_LU_NO_ACCESS_STANDBY);
3398 			return;
3399 		}
3400 	}
3401 
3402 	/*
3403 	 * Checking ua conditions as per SAM3R14 5.3.2 specified order. During
3404 	 * MPIO/ALUA failover, cmds come in through local ports and proxy port
3405 	 * port provider (i.e. pppt), we want to report unit attention to
3406 	 * only local cmds since initiators (Windows MPIO/DSM) would continue
3407 	 * sending I/O to the target that reported unit attention.
3408 	 */
3409 	if ((it->sbd_it_ua_conditions) &&
3410 	    !(task->task_additional_flags & TASK_AF_PPPT_TASK) &&
3411 	    (task->task_cdb[0] != SCMD_INQUIRY)) {
3412 		uint32_t saa = 0;
3413 
3414 		mutex_enter(&sl->sl_lock);
3415 		if (it->sbd_it_ua_conditions & SBD_UA_POR) {
3416 			it->sbd_it_ua_conditions &= ~SBD_UA_POR;
3417 			saa = STMF_SAA_POR;
3418 		} else if (it->sbd_it_ua_conditions &
3419 		    SBD_UA_ASYMMETRIC_ACCESS_CHANGED) {
3420 			it->sbd_it_ua_conditions &=
3421 			    ~SBD_UA_ASYMMETRIC_ACCESS_CHANGED;
3422 			saa = STMF_SAA_ASYMMETRIC_ACCESS_CHANGED;
3423 		}
3424 		mutex_exit(&sl->sl_lock);
3425 		if (saa) {
3426 			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
3427 			return;
3428 		}
3429 	}
3430 
3431 	/* Reservation conflict checks */
3432 	if (sbd_check_reservation_conflict(sl, task)) {
3433 		stmf_scsilib_send_status(task,
3434 		    STATUS_RESERVATION_CONFLICT, 0);
3435 		return;
3436 	}
3437 
3438 	/* Rest of the ua conndition checks */
3439 	if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) {
3440 		uint32_t saa = 0;
3441 
3442 		mutex_enter(&sl->sl_lock);
3443 		if (it->sbd_it_ua_conditions & SBD_UA_CAPACITY_CHANGED) {
3444 			it->sbd_it_ua_conditions &= ~SBD_UA_CAPACITY_CHANGED;
3445 			if ((task->task_cdb[0] == SCMD_READ_CAPACITY) ||
3446 			    ((task->task_cdb[0] == SCMD_SVC_ACTION_IN_G4) &&
3447 			    (task->task_cdb[1] ==
3448 			    SSVC_ACTION_READ_CAPACITY_G4))) {
3449 				saa = 0;
3450 			} else {
3451 				saa = STMF_SAA_CAPACITY_DATA_HAS_CHANGED;
3452 			}
3453 		} else if (it->sbd_it_ua_conditions &
3454 		    SBD_UA_MODE_PARAMETERS_CHANGED) {
3455 			it->sbd_it_ua_conditions &=
3456 			    ~SBD_UA_MODE_PARAMETERS_CHANGED;
3457 			saa = STMF_SAA_MODE_PARAMETERS_CHANGED;
3458 		} else if (it->sbd_it_ua_conditions &
3459 		    SBD_UA_ASYMMETRIC_ACCESS_CHANGED) {
3460 			saa = 0;
3461 		} else if (it->sbd_it_ua_conditions & SBD_UA_POR) {
3462 			saa = 0;
3463 		} else if (it->sbd_it_ua_conditions &
3464 		    SBD_UA_ACCESS_STATE_TRANSITION) {
3465 			it->sbd_it_ua_conditions &=
3466 			    ~SBD_UA_ACCESS_STATE_TRANSITION;
3467 			saa = STMF_SAA_LU_NO_ACCESS_TRANSITION;
3468 		} else {
3469 			it->sbd_it_ua_conditions = 0;
3470 			saa = 0;
3471 		}
3472 		mutex_exit(&sl->sl_lock);
3473 		if (saa) {
3474 			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
3475 			return;
3476 		}
3477 	}
3478 
3479 	if (sl->sl_access_state == SBD_LU_STANDBY) {
3480 		/*
3481 		 * is this a short write?
3482 		 * if so, we'll need to wait until we have the buffer
3483 		 * before proxying the command
3484 		 */
3485 		switch (cdb0) {
3486 			case SCMD_MODE_SELECT:
3487 			case SCMD_MODE_SELECT_G1:
3488 			case SCMD_PERSISTENT_RESERVE_OUT:
3489 				break;
3490 			default:
3491 				st_ret = stmf_proxy_scsi_cmd(task,
3492 				    initial_dbuf);
3493 				if (st_ret != STMF_SUCCESS) {
3494 					stmf_scsilib_send_status(task,
3495 					    STATUS_CHECK,
3496 					    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
3497 				}
3498 				return;
3499 		}
3500 	}
3501 
3502 	cdb0 = task->task_cdb[0] & 0x1F;
3503 
3504 	if ((cdb0 == SCMD_READ) || (cdb0 == SCMD_WRITE)) {
3505 		if (task->task_additional_flags & TASK_AF_PORT_LOAD_HIGH) {
3506 			stmf_scsilib_send_status(task, STATUS_QFULL, 0);
3507 			return;
3508 		}
3509 		if (cdb0 == SCMD_READ) {
3510 			sbd_handle_read(task, initial_dbuf);
3511 			return;
3512 		}
3513 		sbd_handle_write(task, initial_dbuf);
3514 		return;
3515 	}
3516 
3517 	cdb0 = task->task_cdb[0];
3518 	cdb1 = task->task_cdb[1];
3519 
3520 	if (cdb0 == SCMD_INQUIRY) {		/* Inquiry */
3521 		sbd_handle_inquiry(task, initial_dbuf);
3522 		return;
3523 	}
3524 
3525 	if (cdb0  == SCMD_PERSISTENT_RESERVE_OUT) {
3526 		sbd_handle_pgr_out_cmd(task, initial_dbuf);
3527 		return;
3528 	}
3529 
3530 	if (cdb0  == SCMD_PERSISTENT_RESERVE_IN) {
3531 		sbd_handle_pgr_in_cmd(task, initial_dbuf);
3532 		return;
3533 	}
3534 
3535 	if (cdb0 == SCMD_RELEASE) {
3536 		if (cdb1) {
3537 			stmf_scsilib_send_status(task, STATUS_CHECK,
3538 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3539 			return;
3540 		}
3541 
3542 		mutex_enter(&sl->sl_lock);
3543 		if (sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) {
3544 			/* If not owner don't release it, just return good */
3545 			if (it->sbd_it_session_id !=
3546 			    sl->sl_rs_owner_session_id) {
3547 				mutex_exit(&sl->sl_lock);
3548 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3549 				return;
3550 			}
3551 		}
3552 		sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
3553 		it->sbd_it_flags &= ~SBD_IT_HAS_SCSI2_RESERVATION;
3554 		mutex_exit(&sl->sl_lock);
3555 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3556 		return;
3557 	}
3558 
3559 	if (cdb0 == SCMD_RESERVE) {
3560 		if (cdb1) {
3561 			stmf_scsilib_send_status(task, STATUS_CHECK,
3562 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3563 			return;
3564 		}
3565 
3566 		mutex_enter(&sl->sl_lock);
3567 		if (sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) {
3568 			/* If not owner, return conflict status */
3569 			if (it->sbd_it_session_id !=
3570 			    sl->sl_rs_owner_session_id) {
3571 				mutex_exit(&sl->sl_lock);
3572 				stmf_scsilib_send_status(task,
3573 				    STATUS_RESERVATION_CONFLICT, 0);
3574 				return;
3575 			}
3576 		}
3577 		sl->sl_flags |= SL_LU_HAS_SCSI2_RESERVATION;
3578 		it->sbd_it_flags |= SBD_IT_HAS_SCSI2_RESERVATION;
3579 		sl->sl_rs_owner_session_id = it->sbd_it_session_id;
3580 		mutex_exit(&sl->sl_lock);
3581 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3582 		return;
3583 	}
3584 
3585 	if (cdb0 == SCMD_REQUEST_SENSE) {
3586 		/*
3587 		 * LU provider needs to store unretrieved sense data
3588 		 * (e.g. after power-on/reset).  For now, we'll just
3589 		 * return good status with no sense.
3590 		 */
3591 
3592 		if ((cdb1 & ~1) || task->task_cdb[2] || task->task_cdb[3] ||
3593 		    task->task_cdb[5]) {
3594 			stmf_scsilib_send_status(task, STATUS_CHECK,
3595 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3596 		} else {
3597 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3598 		}
3599 
3600 		return;
3601 	}
3602 
3603 	/* Report Target Port Groups */
3604 	if ((cdb0 == SCMD_MAINTENANCE_IN) &&
3605 	    ((cdb1 & 0x1F) == 0x0A)) {
3606 		stmf_scsilib_handle_report_tpgs(task, initial_dbuf);
3607 		return;
3608 	}
3609 
3610 	/* Report Identifying Information */
3611 	if ((cdb0 == SCMD_MAINTENANCE_IN) &&
3612 	    ((cdb1 & 0x1F) == 0x05)) {
3613 		sbd_handle_identifying_info(task, initial_dbuf);
3614 		return;
3615 	}
3616 
3617 	if (cdb0 == SCMD_START_STOP) {			/* Start stop */
3618 		task->task_cmd_xfer_length = 0;
3619 		if (task->task_cdb[4] & 0xFC) {
3620 			stmf_scsilib_send_status(task, STATUS_CHECK,
3621 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3622 			return;
3623 		}
3624 		if (task->task_cdb[4] & 2) {
3625 			stmf_scsilib_send_status(task, STATUS_CHECK,
3626 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3627 		} else {
3628 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3629 		}
3630 		return;
3631 
3632 	}
3633 
3634 	if ((cdb0 == SCMD_MODE_SENSE) || (cdb0 == SCMD_MODE_SENSE_G1)) {
3635 		uint8_t *p;
3636 		p = kmem_zalloc(512, KM_SLEEP);
3637 		sbd_handle_mode_sense(task, initial_dbuf, p);
3638 		kmem_free(p, 512);
3639 		return;
3640 	}
3641 
3642 	if ((cdb0 == SCMD_MODE_SELECT) || (cdb0 == SCMD_MODE_SELECT_G1)) {
3643 		sbd_handle_mode_select(task, initial_dbuf);
3644 		return;
3645 	}
3646 
3647 	if ((cdb0 == SCMD_UNMAP) && (sl->sl_flags & SL_UNMAP_ENABLED)) {
3648 		sbd_handle_unmap(task, initial_dbuf);
3649 		return;
3650 	}
3651 
3652 	if ((cdb0 == SCMD_WRITE_SAME_G4) || (cdb0 == SCMD_WRITE_SAME_G1)) {
3653 		sbd_handle_write_same(task, initial_dbuf);
3654 		return;
3655 	}
3656 
3657 	if (cdb0 == SCMD_COMPARE_AND_WRITE) {
3658 		sbd_handle_ats(task, initial_dbuf);
3659 		return;
3660 	}
3661 
3662 	if (cdb0 == SCMD_EXTENDED_COPY) {
3663 		sbd_handle_xcopy(task, initial_dbuf);
3664 		return;
3665 	}
3666 
3667 	if (cdb0 == SCMD_RECV_COPY_RESULTS) {
3668 		sbd_handle_recv_copy_results(task, initial_dbuf);
3669 		return;
3670 	}
3671 
3672 	if (cdb0 == SCMD_TEST_UNIT_READY) {	/* Test unit ready */
3673 		task->task_cmd_xfer_length = 0;
3674 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3675 		return;
3676 	}
3677 
3678 	if (cdb0 == SCMD_READ_CAPACITY) {		/* Read Capacity */
3679 		sbd_handle_read_capacity(task, initial_dbuf);
3680 		return;
3681 	}
3682 
3683 	if (cdb0 == SCMD_SVC_ACTION_IN_G4) { /* Read Capacity or read long */
3684 		if (cdb1 == SSVC_ACTION_READ_CAPACITY_G4) {
3685 			sbd_handle_read_capacity(task, initial_dbuf);
3686 			return;
3687 		/*
3688 		 * } else if (cdb1 == SSVC_ACTION_READ_LONG_G4) {
3689 		 *	sbd_handle_read(task, initial_dbuf);
3690 		 *	return;
3691 		 */
3692 		}
3693 	}
3694 
3695 	/*
3696 	 * if (cdb0 == SCMD_SVC_ACTION_OUT_G4) {
3697 	 *	if (cdb1 == SSVC_ACTION_WRITE_LONG_G4) {
3698 	 *		 sbd_handle_write(task, initial_dbuf);
3699 	 *		return;
3700 	 *	}
3701 	 * }
3702 	 */
3703 
3704 	if (cdb0 == SCMD_VERIFY) {
3705 		/*
3706 		 * Something more likely needs to be done here.
3707 		 */
3708 		task->task_cmd_xfer_length = 0;
3709 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3710 		return;
3711 	}
3712 
3713 	if (cdb0 == SCMD_SYNCHRONIZE_CACHE ||
3714 	    cdb0 == SCMD_SYNCHRONIZE_CACHE_G4) {
3715 		sbd_handle_sync_cache(task, initial_dbuf);
3716 		return;
3717 	}
3718 
3719 	/*
3720 	 * Write and Verify use the same path as write, but don't clutter the
3721 	 * performance path above with checking for write_verify opcodes.  We
3722 	 * rely on zfs's integrity checks for the "Verify" part of Write &
3723 	 * Verify.  (Even if we did a read to "verify" we'd merely be reading
3724 	 * cache, not actual media.)
3725 	 * Therefore we
3726 	 *   a) only support this if sbd_is_zvol, and
3727 	 *   b) run the IO through the normal write path with a forced
3728 	 *	sbd_flush_data_cache at the end.
3729 	 */
3730 
3731 	if ((sl->sl_flags & SL_ZFS_META) && (
3732 	    cdb0 == SCMD_WRITE_VERIFY ||
3733 	    cdb0 == SCMD_WRITE_VERIFY_G4 ||
3734 	    cdb0 == SCMD_WRITE_VERIFY_G5)) {
3735 		sbd_handle_write(task, initial_dbuf);
3736 		return;
3737 	}
3738 	stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_INVALID_OPCODE);
3739 }
3740 
3741 void
sbd_dbuf_xfer_done(struct scsi_task * task,struct stmf_data_buf * dbuf)3742 sbd_dbuf_xfer_done(struct scsi_task *task, struct stmf_data_buf *dbuf)
3743 {
3744 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3745 
3746 	if (dbuf->db_flags & DB_LU_DATA_BUF) {
3747 		/*
3748 		 * Buffers passed in from the LU always complete
3749 		 * even if the task is no longer active.
3750 		 */
3751 		ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
3752 		ASSERT(scmd);
3753 		switch (scmd->cmd_type) {
3754 		case (SBD_CMD_SCSI_READ):
3755 			sbd_handle_sgl_read_xfer_completion(task, scmd, dbuf);
3756 			break;
3757 		case (SBD_CMD_SCSI_WRITE):
3758 			sbd_handle_sgl_write_xfer_completion(task, scmd, dbuf);
3759 			break;
3760 		default:
3761 			cmn_err(CE_PANIC, "Unknown cmd type, task = %p",
3762 			    (void *)task);
3763 			break;
3764 		}
3765 		return;
3766 	}
3767 
3768 	if ((scmd == NULL) || ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0))
3769 		return;
3770 
3771 	switch (scmd->cmd_type) {
3772 	case (SBD_CMD_SCSI_READ):
3773 		sbd_handle_read_xfer_completion(task, scmd, dbuf);
3774 		break;
3775 
3776 	case (SBD_CMD_SCSI_WRITE):
3777 		switch (task->task_cdb[0]) {
3778 		case SCMD_WRITE_SAME_G1:
3779 		case SCMD_WRITE_SAME_G4:
3780 			sbd_handle_write_same_xfer_completion(task, scmd, dbuf,
3781 			    1);
3782 			break;
3783 		case SCMD_COMPARE_AND_WRITE:
3784 			sbd_handle_ats_xfer_completion(task, scmd, dbuf, 1);
3785 			break;
3786 		default:
3787 			sbd_handle_write_xfer_completion(task, scmd, dbuf, 1);
3788 			/* FALLTHRU */
3789 		}
3790 		break;
3791 
3792 	case (SBD_CMD_SMALL_READ):
3793 		sbd_handle_short_read_xfer_completion(task, scmd, dbuf);
3794 		break;
3795 
3796 	case (SBD_CMD_SMALL_WRITE):
3797 		sbd_handle_short_write_xfer_completion(task, dbuf);
3798 		break;
3799 
3800 	default:
3801 		cmn_err(CE_PANIC, "Unknown cmd type, task = %p", (void *)task);
3802 		break;
3803 	}
3804 }
3805 
3806 /* ARGSUSED */
3807 void
sbd_send_status_done(struct scsi_task * task)3808 sbd_send_status_done(struct scsi_task *task)
3809 {
3810 	cmn_err(CE_PANIC,
3811 	    "sbd_send_status_done: this should not have been called");
3812 }
3813 
3814 void
sbd_task_free(struct scsi_task * task)3815 sbd_task_free(struct scsi_task *task)
3816 {
3817 	if (task->task_lu_private) {
3818 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3819 		if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
3820 			cmn_err(CE_PANIC, "cmd is active, task = %p",
3821 			    (void *)task);
3822 		}
3823 		kmem_free(scmd, sizeof (sbd_cmd_t));
3824 	}
3825 }
3826 
3827 /*
3828  * Aborts are synchronus w.r.t. I/O AND
3829  * All the I/O which SBD does is synchronous AND
3830  * Everything within a task is single threaded.
3831  *   IT MEANS
3832  * If this function is called, we are doing nothing with this task
3833  * inside of sbd module.
3834  */
3835 /* ARGSUSED */
3836 stmf_status_t
sbd_abort(struct stmf_lu * lu,int abort_cmd,void * arg,uint32_t flags)3837 sbd_abort(struct stmf_lu *lu, int abort_cmd, void *arg, uint32_t flags)
3838 {
3839 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3840 	scsi_task_t *task;
3841 
3842 	if (abort_cmd == STMF_LU_RESET_STATE) {
3843 		return (sbd_lu_reset_state(lu));
3844 	}
3845 
3846 	if (abort_cmd == STMF_LU_ITL_HANDLE_REMOVED) {
3847 		sbd_check_and_clear_scsi2_reservation(sl, (sbd_it_data_t *)arg);
3848 		sbd_remove_it_handle(sl, (sbd_it_data_t *)arg);
3849 		return (STMF_SUCCESS);
3850 	}
3851 
3852 	ASSERT(abort_cmd == STMF_LU_ABORT_TASK);
3853 	task = (scsi_task_t *)arg;
3854 	sbd_ats_remove_by_task(task);
3855 	if (task->task_lu_private) {
3856 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3857 
3858 		if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
3859 			if (scmd->flags & SBD_SCSI_CMD_TRANS_DATA) {
3860 				kmem_free(scmd->trans_data,
3861 				    scmd->trans_data_len);
3862 				scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
3863 			}
3864 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
3865 			return (STMF_ABORT_SUCCESS);
3866 		}
3867 	}
3868 
3869 	return (STMF_NOT_FOUND);
3870 }
3871 
3872 void
sbd_task_poll(struct scsi_task * task)3873 sbd_task_poll(struct scsi_task *task)
3874 {
3875 	stmf_data_buf_t *initial_dbuf;
3876 
3877 	initial_dbuf = stmf_handle_to_buf(task, 0);
3878 	sbd_new_task(task, initial_dbuf);
3879 }
3880 
3881 /*
3882  * This function is called during task clean-up if the
3883  * DB_LU_FLAG is set on the dbuf. This should only be called for
3884  * abort processing after sbd_abort has been called for the task.
3885  */
3886 void
sbd_dbuf_free(struct scsi_task * task,struct stmf_data_buf * dbuf)3887 sbd_dbuf_free(struct scsi_task *task, struct stmf_data_buf *dbuf)
3888 {
3889 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3890 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3891 
3892 	ASSERT(dbuf->db_lu_private);
3893 	ASSERT(scmd && ATOMIC8_GET(scmd->nbufs) > 0);
3894 	ASSERT((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0);
3895 	ASSERT(dbuf->db_flags & DB_LU_DATA_BUF);
3896 	ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
3897 	ASSERT((curthread->t_flag & T_INTR_THREAD) == 0);
3898 
3899 	if (scmd->cmd_type == SBD_CMD_SCSI_READ) {
3900 		sbd_zvol_rele_read_bufs(sl, dbuf);
3901 	} else if (scmd->cmd_type == SBD_CMD_SCSI_WRITE) {
3902 		sbd_zvol_rele_write_bufs_abort(sl, dbuf);
3903 	} else {
3904 		cmn_err(CE_PANIC, "Unknown cmd type %d, task = %p",
3905 		    scmd->cmd_type, (void *)task);
3906 	}
3907 	if (atomic_dec_8_nv(&scmd->nbufs) == 0)
3908 		rw_exit(&sl->sl_access_state_lock);
3909 	stmf_teardown_dbuf(task, dbuf);
3910 	stmf_free(dbuf);
3911 }
3912 
3913 /* ARGSUSED */
3914 void
sbd_ctl(struct stmf_lu * lu,int cmd,void * arg)3915 sbd_ctl(struct stmf_lu *lu, int cmd, void *arg)
3916 {
3917 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3918 	stmf_change_status_t st;
3919 
3920 	ASSERT((cmd == STMF_CMD_LU_ONLINE) ||
3921 	    (cmd == STMF_CMD_LU_OFFLINE) ||
3922 	    (cmd == STMF_ACK_LU_ONLINE_COMPLETE) ||
3923 	    (cmd == STMF_ACK_LU_OFFLINE_COMPLETE));
3924 
3925 	st.st_completion_status = STMF_SUCCESS;
3926 	st.st_additional_info = NULL;
3927 
3928 	switch (cmd) {
3929 	case STMF_CMD_LU_ONLINE:
3930 		if (sl->sl_state == STMF_STATE_ONLINE)
3931 			st.st_completion_status = STMF_ALREADY;
3932 		else if (sl->sl_state != STMF_STATE_OFFLINE)
3933 			st.st_completion_status = STMF_FAILURE;
3934 		if (st.st_completion_status == STMF_SUCCESS) {
3935 			sl->sl_state = STMF_STATE_ONLINE;
3936 			sl->sl_state_not_acked = 1;
3937 		}
3938 		(void) stmf_ctl(STMF_CMD_LU_ONLINE_COMPLETE, lu, &st);
3939 		break;
3940 
3941 	case STMF_CMD_LU_OFFLINE:
3942 		if (sl->sl_state == STMF_STATE_OFFLINE)
3943 			st.st_completion_status = STMF_ALREADY;
3944 		else if (sl->sl_state != STMF_STATE_ONLINE)
3945 			st.st_completion_status = STMF_FAILURE;
3946 		if (st.st_completion_status == STMF_SUCCESS) {
3947 			sl->sl_flags &= ~(SL_MEDIUM_REMOVAL_PREVENTED |
3948 			    SL_LU_HAS_SCSI2_RESERVATION);
3949 			sl->sl_state = STMF_STATE_OFFLINE;
3950 			sl->sl_state_not_acked = 1;
3951 			sbd_pgr_reset(sl);
3952 		}
3953 		(void) stmf_ctl(STMF_CMD_LU_OFFLINE_COMPLETE, lu, &st);
3954 		break;
3955 
3956 	case STMF_ACK_LU_ONLINE_COMPLETE:
3957 		/* Fallthrough */
3958 	case STMF_ACK_LU_OFFLINE_COMPLETE:
3959 		sl->sl_state_not_acked = 0;
3960 		break;
3961 
3962 	}
3963 }
3964 
3965 /* ARGSUSED */
3966 stmf_status_t
sbd_info(uint32_t cmd,stmf_lu_t * lu,void * arg,uint8_t * buf,uint32_t * bufsizep)3967 sbd_info(uint32_t cmd, stmf_lu_t *lu, void *arg, uint8_t *buf,
3968     uint32_t *bufsizep)
3969 {
3970 	return (STMF_NOT_SUPPORTED);
3971 }
3972 
3973 stmf_status_t
sbd_lu_reset_state(stmf_lu_t * lu)3974 sbd_lu_reset_state(stmf_lu_t *lu)
3975 {
3976 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3977 
3978 	mutex_enter(&sl->sl_lock);
3979 	if (sl->sl_flags & SL_SAVED_WRITE_CACHE_DISABLE) {
3980 		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
3981 		mutex_exit(&sl->sl_lock);
3982 		if (sl->sl_access_state == SBD_LU_ACTIVE) {
3983 			(void) sbd_wcd_set(1, sl);
3984 		}
3985 	} else {
3986 		sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
3987 		mutex_exit(&sl->sl_lock);
3988 		if (sl->sl_access_state == SBD_LU_ACTIVE) {
3989 			(void) sbd_wcd_set(0, sl);
3990 		}
3991 	}
3992 	sbd_pgr_reset(sl);
3993 	sbd_check_and_clear_scsi2_reservation(sl, NULL);
3994 	if (stmf_deregister_all_lu_itl_handles(lu) != STMF_SUCCESS) {
3995 		return (STMF_FAILURE);
3996 	}
3997 	return (STMF_SUCCESS);
3998 }
3999 
4000 sbd_status_t
sbd_flush_data_cache(sbd_lu_t * sl,int fsync_done)4001 sbd_flush_data_cache(sbd_lu_t *sl, int fsync_done)
4002 {
4003 	sbd_status_t ret = SBD_SUCCESS;
4004 
4005 	rw_enter(&sl->sl_access_state_lock, RW_READER);
4006 	if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
4007 		ret = SBD_FILEIO_FAILURE;
4008 		goto flush_fail;
4009 	}
4010 	if (fsync_done)
4011 		goto over_fsync;
4012 	if ((sl->sl_data_vtype == VREG) || (sl->sl_data_vtype == VBLK)) {
4013 		if (VOP_FSYNC(sl->sl_data_vp, FSYNC, kcred, NULL)) {
4014 			ret = SBD_FAILURE;
4015 			goto flush_fail;
4016 		}
4017 	}
4018 over_fsync:
4019 	if (((sl->sl_data_vtype == VCHR) || (sl->sl_data_vtype == VBLK)) &&
4020 	    ((sl->sl_flags & SL_NO_DATA_DKIOFLUSH) == 0)) {
4021 		int r = 0;
4022 
4023 		ret = VOP_IOCTL(sl->sl_data_vp, DKIOCFLUSHWRITECACHE, 0,
4024 		    FKIOCTL, kcred, &r, NULL);
4025 		if ((ret == ENOTTY) || (ret == ENOTSUP)) {
4026 			mutex_enter(&sl->sl_lock);
4027 			sl->sl_flags |= SL_NO_DATA_DKIOFLUSH;
4028 			mutex_exit(&sl->sl_lock);
4029 		} else {
4030 			ret = (ret != 0) ? SBD_FAILURE : SBD_SUCCESS;
4031 		}
4032 	}
4033 flush_fail:
4034 	rw_exit(&sl->sl_access_state_lock);
4035 
4036 	return (ret);
4037 }
4038 
4039 /* ARGSUSED */
4040 static void
sbd_handle_sync_cache(struct scsi_task * task,struct stmf_data_buf * initial_dbuf)4041 sbd_handle_sync_cache(struct scsi_task *task,
4042     struct stmf_data_buf *initial_dbuf)
4043 {
4044 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
4045 	uint64_t	lba, laddr;
4046 	sbd_status_t	sret;
4047 	uint32_t	len;
4048 	int		is_g4 = 0;
4049 	int		immed;
4050 
4051 	task->task_cmd_xfer_length = 0;
4052 	/*
4053 	 * Determine if this is a 10 or 16 byte CDB
4054 	 */
4055 
4056 	if (task->task_cdb[0] == SCMD_SYNCHRONIZE_CACHE_G4)
4057 		is_g4 = 1;
4058 
4059 	/*
4060 	 * Determine other requested parameters
4061 	 *
4062 	 * We don't have a non-volatile cache, so don't care about SYNC_NV.
4063 	 * Do not support the IMMED bit.
4064 	 */
4065 
4066 	immed = (task->task_cdb[1] & 0x02);
4067 
4068 	if (immed) {
4069 		stmf_scsilib_send_status(task, STATUS_CHECK,
4070 		    STMF_SAA_INVALID_FIELD_IN_CDB);
4071 		return;
4072 	}
4073 
4074 	/*
4075 	 * Check to be sure we're not being asked to sync an LBA
4076 	 * that is out of range.  While checking, verify reserved fields.
4077 	 */
4078 
4079 	if (is_g4) {
4080 		if ((task->task_cdb[1] & 0xf9) || task->task_cdb[14] ||
4081 		    task->task_cdb[15]) {
4082 			stmf_scsilib_send_status(task, STATUS_CHECK,
4083 			    STMF_SAA_INVALID_FIELD_IN_CDB);
4084 			return;
4085 		}
4086 
4087 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
4088 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
4089 	} else {
4090 		if ((task->task_cdb[1] & 0xf9) || task->task_cdb[6] ||
4091 		    task->task_cdb[9]) {
4092 			stmf_scsilib_send_status(task, STATUS_CHECK,
4093 			    STMF_SAA_INVALID_FIELD_IN_CDB);
4094 			return;
4095 		}
4096 
4097 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
4098 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
4099 	}
4100 
4101 	laddr = lba << sl->sl_data_blocksize_shift;
4102 	len <<= sl->sl_data_blocksize_shift;
4103 
4104 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
4105 		stmf_scsilib_send_status(task, STATUS_CHECK,
4106 		    STMF_SAA_LBA_OUT_OF_RANGE);
4107 		return;
4108 	}
4109 
4110 	sret = sbd_flush_data_cache(sl, 0);
4111 	if (sret != SBD_SUCCESS) {
4112 		stmf_scsilib_send_status(task, STATUS_CHECK,
4113 		    STMF_SAA_WRITE_ERROR);
4114 		return;
4115 	}
4116 
4117 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
4118 }
4119