xref: /illumos-gate/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_scsi.c (revision ca783257c986cddcc674ae22916a6766b98a2d36)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
25  * Copyright (c) 2013 by Delphix. All rights reserved.
26  * Copyright 2019 Joyent, Inc.
27  */
28 
29 #include <sys/conf.h>
30 #include <sys/file.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/modctl.h>
34 #include <sys/scsi/scsi.h>
35 #include <sys/scsi/impl/scsi_reset_notify.h>
36 #include <sys/scsi/generic/mode.h>
37 #include <sys/disp.h>
38 #include <sys/byteorder.h>
39 #include <sys/atomic.h>
40 #include <sys/sdt.h>
41 #include <sys/dkio.h>
42 #include <sys/dkioc_free_util.h>
43 
44 #include <sys/stmf.h>
45 #include <sys/lpif.h>
46 #include <sys/portif.h>
47 #include <sys/stmf_ioctl.h>
48 #include <sys/stmf_sbd_ioctl.h>
49 
50 #include "stmf_sbd.h"
51 #include "sbd_impl.h"
52 
53 #define	SCSI2_CONFLICT_FREE_CMDS(cdb)	( \
54 	/* ----------------------- */                                      \
55 	/* Refer Both		   */                                      \
56 	/* SPC-2 (rev 20) Table 10 */                                      \
57 	/* SPC-3 (rev 23) Table 31 */                                      \
58 	/* ----------------------- */                                      \
59 	((cdb[0]) == SCMD_INQUIRY)					|| \
60 	((cdb[0]) == SCMD_LOG_SENSE_G1)					|| \
61 	((cdb[0]) == SCMD_RELEASE)					|| \
62 	((cdb[0]) == SCMD_RELEASE_G1)					|| \
63 	((cdb[0]) == SCMD_REPORT_LUNS)					|| \
64 	((cdb[0]) == SCMD_REQUEST_SENSE)				|| \
65 	/* PREVENT ALLOW MEDIUM REMOVAL with prevent == 0 */               \
66 	((((cdb[0]) == SCMD_DOORLOCK) && (((cdb[4]) & 0x3) == 0)))	|| \
67 	/* SERVICE ACTION IN with READ MEDIA SERIAL NUMBER (0x01) */       \
68 	(((cdb[0]) == SCMD_SVC_ACTION_IN_G5) && (                          \
69 	    ((cdb[1]) & 0x1F) == 0x01))					|| \
70 	/* MAINTENANCE IN with service actions REPORT ALIASES (0x0Bh) */   \
71 	/* REPORT DEVICE IDENTIFIER (0x05)  REPORT PRIORITY (0x0Eh) */     \
72 	/* REPORT TARGET PORT GROUPS (0x0A) REPORT TIMESTAMP (0x0F) */     \
73 	(((cdb[0]) == SCMD_MAINTENANCE_IN) && (                            \
74 	    (((cdb[1]) & 0x1F) == 0x0B) ||                                 \
75 	    (((cdb[1]) & 0x1F) == 0x05) ||                                 \
76 	    (((cdb[1]) & 0x1F) == 0x0E) ||                                 \
77 	    (((cdb[1]) & 0x1F) == 0x0A) ||                                 \
78 	    (((cdb[1]) & 0x1F) == 0x0F)))				|| \
79 	/* ----------------------- */                                      \
80 	/* SBC-3 (rev 17) Table 3  */                                      \
81 	/* ----------------------- */                                      \
82 	/* READ CAPACITY(10) */                                            \
83 	((cdb[0]) == SCMD_READ_CAPACITY)				|| \
84 	/* READ CAPACITY(16) */                                            \
85 	(((cdb[0]) == SCMD_SVC_ACTION_IN_G4) && (                          \
86 	    ((cdb[1]) & 0x1F) == 0x10))					|| \
87 	/* START STOP UNIT with START bit 0 and POWER CONDITION 0  */      \
88 	(((cdb[0]) == SCMD_START_STOP) && (                                \
89 	    (((cdb[4]) & 0xF0) == 0) && (((cdb[4]) & 0x01) == 0))))
90 /* End of SCSI2_CONFLICT_FREE_CMDS */
91 
92 uint8_t HardwareAcceleratedInit = 1;
93 uint8_t sbd_unmap_enable = 1;		/* allow unmap by default */
94 
95 /*
96  * An /etc/system tunable which specifies the maximum number of LBAs supported
97  * in a single UNMAP operation. Default is 0x002000 blocks or 4MB in size.
98  */
99 int stmf_sbd_unmap_max_nblks  = 0x002000;
100 
101 /*
102  * An /etc/system tunable which indicates if READ ops can run on the standby
103  * path or return an error.
104  */
105 int stmf_standby_fail_reads = 0;
106 
107 stmf_status_t sbd_lu_reset_state(stmf_lu_t *lu);
108 static void sbd_handle_sync_cache(struct scsi_task *task,
109     struct stmf_data_buf *initial_dbuf);
110 void sbd_handle_read_xfer_completion(struct scsi_task *task,
111     sbd_cmd_t *scmd, struct stmf_data_buf *dbuf);
112 void sbd_handle_short_write_xfer_completion(scsi_task_t *task,
113     stmf_data_buf_t *dbuf);
114 void sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf,
115     uint32_t buflen);
116 void sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf);
117 void sbd_handle_identifying_info(scsi_task_t *task, stmf_data_buf_t *dbuf);
118 
119 static void sbd_handle_unmap_xfer(scsi_task_t *task, uint8_t *buf,
120     uint32_t buflen);
121 static void sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf);
122 
123 extern void sbd_pgr_initialize_it(scsi_task_t *, sbd_it_data_t *);
124 extern int sbd_pgr_reservation_conflict(scsi_task_t *, struct sbd_lu *sl);
125 extern void sbd_pgr_reset(sbd_lu_t *);
126 extern void sbd_pgr_remove_it_handle(sbd_lu_t *, sbd_it_data_t *);
127 extern void sbd_handle_pgr_in_cmd(scsi_task_t *, stmf_data_buf_t *);
128 extern void sbd_handle_pgr_out_cmd(scsi_task_t *, stmf_data_buf_t *);
129 extern void sbd_handle_pgr_out_data(scsi_task_t *, stmf_data_buf_t *);
130 void sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
131     int first_xfer);
132 static void sbd_handle_write_same(scsi_task_t *task,
133     struct stmf_data_buf *initial_dbuf);
134 static void sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
135     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable);
136 static void sbd_handle_write_same_xfer_completion(struct scsi_task *task,
137     sbd_cmd_t *scmd, struct stmf_data_buf *dbuf, uint8_t dbuf_reusable);
138 /*
139  * IMPORTANT NOTE:
140  * =================
141  * The whole world here is based on the assumption that everything within
142  * a scsi task executes in a single threaded manner, even the aborts.
143  * Dont ever change that. There wont be any performance gain but there
144  * will be tons of race conditions.
145  */
146 
147 void
sbd_do_read_xfer(struct scsi_task * task,sbd_cmd_t * scmd,struct stmf_data_buf * dbuf)148 sbd_do_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
149     struct stmf_data_buf *dbuf)
150 {
151 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
152 	uint64_t laddr;
153 	uint32_t len, buflen, iolen;
154 	int ndx;
155 	int bufs_to_take;
156 
157 	/* Lets try not to hog all the buffers the port has. */
158 	bufs_to_take = ((task->task_max_nbufs > 2) &&
159 	    (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
160 	    task->task_max_nbufs;
161 
162 	len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ?
163 	    dbuf->db_buf_size : ATOMIC32_GET(scmd->len);
164 	laddr = scmd->addr + scmd->current_ro;
165 
166 	for (buflen = 0, ndx = 0; (buflen < len) &&
167 	    (ndx < dbuf->db_sglist_length); ndx++) {
168 		iolen = min(len - buflen, dbuf->db_sglist[ndx].seg_length);
169 		if (iolen == 0)
170 			break;
171 		if (sbd_data_read(sl, task, laddr, (uint64_t)iolen,
172 		    dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
173 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
174 			/* Do not need to do xfer anymore, just complete it */
175 			dbuf->db_data_size = 0;
176 			dbuf->db_xfer_status = STMF_SUCCESS;
177 			sbd_handle_read_xfer_completion(task, scmd, dbuf);
178 			return;
179 		}
180 		buflen += iolen;
181 		laddr += (uint64_t)iolen;
182 	}
183 	dbuf->db_relative_offset = scmd->current_ro;
184 	dbuf->db_data_size = buflen;
185 	dbuf->db_flags = DB_DIRECTION_TO_RPORT;
186 	(void) stmf_xfer_data(task, dbuf, 0);
187 	atomic_add_32(&scmd->len, -buflen);
188 	scmd->current_ro += buflen;
189 	if (ATOMIC32_GET(scmd->len) &&
190 	    (ATOMIC8_GET(scmd->nbufs) < bufs_to_take)) {
191 		uint32_t maxsize, minsize, old_minsize;
192 
193 		maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128 * 1024 :
194 		    ATOMIC32_GET(scmd->len);
195 		minsize = maxsize >> 2;
196 		do {
197 			/*
198 			 * A bad port implementation can keep on failing the
199 			 * the request but keep on sending us a false
200 			 * minsize.
201 			 */
202 			old_minsize = minsize;
203 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
204 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
205 		    (minsize >= 512));
206 		if (dbuf == NULL) {
207 			return;
208 		}
209 		atomic_inc_8(&scmd->nbufs);
210 		sbd_do_read_xfer(task, scmd, dbuf);
211 	}
212 }
213 
214 /*
215  * sbd_zcopy: Bail-out switch for reduced copy path.
216  *
217  * 0 - read & write off
218  * 1 - read & write on
219  * 2 - only read on
220  * 4 - only write on
221  */
222 int sbd_zcopy = 1;	/* enable zcopy read & write path */
223 uint32_t sbd_max_xfer_len = 0;		/* Valid if non-zero */
224 uint32_t sbd_1st_xfer_len = 0;		/* Valid if non-zero */
225 uint32_t sbd_copy_threshold = 0;		/* Valid if non-zero */
226 
227 static void
sbd_do_sgl_read_xfer(struct scsi_task * task,sbd_cmd_t * scmd,int first_xfer)228 sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
229 {
230 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
231 	sbd_zvol_io_t *zvio;
232 	int ret, final_xfer;
233 	uint64_t offset;
234 	uint32_t xfer_len, max_len, first_len;
235 	stmf_status_t xstat;
236 	stmf_data_buf_t *dbuf;
237 	uint_t nblks;
238 	uint64_t blksize = sl->sl_blksize;
239 	size_t db_private_sz;
240 	hrtime_t xfer_start;
241 	uintptr_t pad;
242 
243 	ASSERT(rw_read_held(&sl->sl_access_state_lock));
244 	ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
245 
246 	/*
247 	 * Calculate the limits on xfer_len to the minimum of :
248 	 *    - task limit
249 	 *    - lun limit
250 	 *    - sbd global limit if set
251 	 *    - first xfer limit if set
252 	 *
253 	 * First, protect against silly over-ride value
254 	 */
255 	if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
256 		cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
257 		    sbd_max_xfer_len);
258 		sbd_max_xfer_len = 0;
259 	}
260 	if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
261 		cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
262 		    sbd_1st_xfer_len);
263 		sbd_1st_xfer_len = 0;
264 	}
265 
266 	max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
267 	if (sbd_max_xfer_len)
268 		max_len = MIN(max_len, sbd_max_xfer_len);
269 	/*
270 	 * Special case the first xfer if hints are set.
271 	 */
272 	if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
273 		/* global over-ride has precedence */
274 		if (sbd_1st_xfer_len)
275 			first_len = sbd_1st_xfer_len;
276 		else
277 			first_len = task->task_1st_xfer_len;
278 	} else {
279 		first_len = 0;
280 	}
281 
282 	while (ATOMIC32_GET(scmd->len) &&
283 	    ATOMIC8_GET(scmd->nbufs) < task->task_max_nbufs) {
284 
285 		xfer_len = MIN(max_len, ATOMIC32_GET(scmd->len));
286 		if (first_len) {
287 			xfer_len = MIN(xfer_len, first_len);
288 			first_len = 0;
289 		}
290 		if (ATOMIC32_GET(scmd->len) == xfer_len) {
291 			final_xfer = 1;
292 		} else {
293 			/*
294 			 * Attempt to end xfer on a block boundary.
295 			 * The only way this does not happen is if the
296 			 * xfer_len is small enough to stay contained
297 			 * within the same block.
298 			 */
299 			uint64_t xfer_offset, xfer_aligned_end;
300 
301 			final_xfer = 0;
302 			xfer_offset = scmd->addr + scmd->current_ro;
303 			xfer_aligned_end =
304 			    P2ALIGN(xfer_offset+xfer_len, blksize);
305 			if (xfer_aligned_end > xfer_offset)
306 				xfer_len = xfer_aligned_end - xfer_offset;
307 		}
308 		/*
309 		 * Allocate object to track the read and reserve
310 		 * enough space for scatter/gather list.
311 		 */
312 		offset = scmd->addr + scmd->current_ro;
313 		nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
314 
315 		db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
316 		    (nblks * sizeof (stmf_sglist_ent_t));
317 		dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
318 		    AF_DONTZERO);
319 		/*
320 		 * Setup the dbuf
321 		 *
322 		 * XXX Framework does not handle variable length sglists
323 		 * properly, so setup db_lu_private and db_port_private
324 		 * fields here. db_stmf_private is properly set for
325 		 * calls to stmf_free.
326 		 */
327 		if (dbuf->db_port_private == NULL) {
328 			/*
329 			 * XXX Framework assigns space to PP after db_sglist[0]
330 			 */
331 			cmn_err(CE_PANIC, "db_port_private == NULL");
332 		}
333 		pad = (uintptr_t)&dbuf->db_sglist[nblks];
334 		dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
335 		dbuf->db_port_private = NULL;
336 		dbuf->db_buf_size = xfer_len;
337 		dbuf->db_data_size = xfer_len;
338 		dbuf->db_relative_offset = scmd->current_ro;
339 		dbuf->db_sglist_length = (uint16_t)nblks;
340 		dbuf->db_xfer_status = 0;
341 		dbuf->db_handle = 0;
342 
343 		dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
344 		    DB_DIRECTION_TO_RPORT | DB_LU_DATA_BUF);
345 		if (final_xfer)
346 			dbuf->db_flags |= DB_SEND_STATUS_GOOD;
347 
348 		zvio = dbuf->db_lu_private;
349 		/* Need absolute offset for zvol access */
350 		zvio->zvio_offset = offset;
351 		zvio->zvio_flags = ZVIO_SYNC;
352 
353 		/*
354 		 * Accounting for start of read.
355 		 * Note there is no buffer address for the probe yet.
356 		 */
357 		xfer_start = gethrtime();
358 		DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
359 		    uint8_t *, NULL, uint64_t, xfer_len,
360 		    uint64_t, offset, scsi_task_t *, task);
361 
362 		ret = sbd_zvol_alloc_read_bufs(sl, dbuf);
363 
364 		stmf_lu_xfer_done(task, B_TRUE /* read */,
365 		    (gethrtime() - xfer_start));
366 		DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
367 		    uint8_t *, NULL, uint64_t, xfer_len,
368 		    uint64_t, offset, int, ret, scsi_task_t *, task);
369 
370 		if (ret != 0) {
371 			/*
372 			 * Read failure from the backend.
373 			 */
374 			stmf_free(dbuf);
375 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
376 				/* nothing queued, just finish */
377 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
378 				sbd_ats_remove_by_task(task);
379 				stmf_scsilib_send_status(task, STATUS_CHECK,
380 				    STMF_SAA_READ_ERROR);
381 				rw_exit(&sl->sl_access_state_lock);
382 			} else {
383 				/* process failure when other dbufs finish */
384 				scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
385 			}
386 			return;
387 		}
388 
389 		/*
390 		 * Allow PP to do setup
391 		 */
392 		xstat = stmf_setup_dbuf(task, dbuf, 0);
393 		if (xstat != STMF_SUCCESS) {
394 			/*
395 			 * This could happen if the driver cannot get the
396 			 * DDI resources it needs for this request.
397 			 * If other dbufs are queued, try again when the next
398 			 * one completes, otherwise give up.
399 			 */
400 			sbd_zvol_rele_read_bufs(sl, dbuf);
401 			stmf_free(dbuf);
402 			if (ATOMIC8_GET(scmd->nbufs) > 0) {
403 				/* completion of previous dbuf will retry */
404 				return;
405 			}
406 			/*
407 			 * Done with this command.
408 			 */
409 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
410 			sbd_ats_remove_by_task(task);
411 			if (first_xfer)
412 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
413 			else
414 				stmf_scsilib_send_status(task, STATUS_CHECK,
415 				    STMF_SAA_READ_ERROR);
416 			rw_exit(&sl->sl_access_state_lock);
417 			return;
418 		}
419 		/*
420 		 * dbuf is now queued on task
421 		 */
422 		atomic_inc_8(&scmd->nbufs);
423 
424 		/* XXX leave this in for FW? */
425 		DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
426 		    struct stmf_data_buf *, dbuf, uint64_t, offset,
427 		    uint32_t, xfer_len);
428 		/*
429 		 * Do not pass STMF_IOF_LU_DONE so that the zvol
430 		 * state can be released in the completion callback.
431 		 */
432 		xstat = stmf_xfer_data(task, dbuf, 0);
433 		switch (xstat) {
434 		case STMF_SUCCESS:
435 			break;
436 		case STMF_BUSY:
437 			/*
438 			 * The dbuf is queued on the task, but unknown
439 			 * to the PP, thus no completion will occur.
440 			 */
441 			sbd_zvol_rele_read_bufs(sl, dbuf);
442 			stmf_teardown_dbuf(task, dbuf);
443 			stmf_free(dbuf);
444 			atomic_dec_8(&scmd->nbufs);
445 			if (ATOMIC8_GET(scmd->nbufs) > 0) {
446 				/* completion of previous dbuf will retry */
447 				return;
448 			}
449 			/*
450 			 * Done with this command.
451 			 */
452 			rw_exit(&sl->sl_access_state_lock);
453 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
454 			sbd_ats_remove_by_task(task);
455 			if (first_xfer)
456 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
457 			else
458 				stmf_scsilib_send_status(task, STATUS_CHECK,
459 				    STMF_SAA_READ_ERROR);
460 			return;
461 		case STMF_ABORTED:
462 			/*
463 			 * Completion from task_done will cleanup
464 			 */
465 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
466 			sbd_ats_remove_by_task(task);
467 			return;
468 		}
469 		/*
470 		 * Update the xfer progress.
471 		 */
472 		ASSERT(scmd->len >= xfer_len);
473 		atomic_add_32(&scmd->len, -xfer_len);
474 		scmd->current_ro += xfer_len;
475 	}
476 }
477 
478 void
sbd_handle_read_xfer_completion(struct scsi_task * task,sbd_cmd_t * scmd,struct stmf_data_buf * dbuf)479 sbd_handle_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
480     struct stmf_data_buf *dbuf)
481 {
482 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
483 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
484 		    dbuf->db_xfer_status, NULL);
485 		return;
486 	}
487 	task->task_nbytes_transferred += dbuf->db_data_size;
488 	if (ATOMIC32_GET(scmd->len) == 0 ||
489 	    scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
490 		stmf_free_dbuf(task, dbuf);
491 		atomic_dec_8(&scmd->nbufs);
492 		if (ATOMIC8_GET(scmd->nbufs))
493 			return;	/* wait for all buffers to complete */
494 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
495 		sbd_ats_remove_by_task(task);
496 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL)
497 			stmf_scsilib_send_status(task, STATUS_CHECK,
498 			    STMF_SAA_READ_ERROR);
499 		else
500 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
501 		return;
502 	}
503 	if (dbuf->db_flags & DB_DONT_REUSE) {
504 		/* allocate new dbuf */
505 		uint32_t maxsize, minsize, old_minsize;
506 		stmf_free_dbuf(task, dbuf);
507 
508 		maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ?
509 		    128 * 1024 : ATOMIC32_GET(scmd->len);
510 		minsize = maxsize >> 2;
511 		do {
512 			old_minsize = minsize;
513 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
514 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
515 		    (minsize >= 512));
516 		if (dbuf == NULL) {
517 			atomic_dec_8(&scmd->nbufs);
518 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
519 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
520 				    STMF_ALLOC_FAILURE, NULL);
521 			}
522 			return;
523 		}
524 	}
525 	sbd_do_read_xfer(task, scmd, dbuf);
526 }
527 
528 /*
529  * This routine must release the DMU resources and free the dbuf
530  * in all cases.  If this is the final dbuf of the task, then drop
531  * the reader lock on the LU state. If there are no errors and more
532  * work to do, then queue more xfer operations.
533  */
534 void
sbd_handle_sgl_read_xfer_completion(struct scsi_task * task,sbd_cmd_t * scmd,struct stmf_data_buf * dbuf)535 sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
536     struct stmf_data_buf *dbuf)
537 {
538 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
539 	stmf_status_t xfer_status;
540 	uint32_t data_size;
541 	int scmd_err;
542 
543 	ASSERT(dbuf->db_lu_private);
544 	ASSERT(scmd->cmd_type == SBD_CMD_SCSI_READ);
545 
546 	atomic_dec_8(&scmd->nbufs);	/* account for this dbuf */
547 	/*
548 	 * Release the DMU resources.
549 	 */
550 	sbd_zvol_rele_read_bufs(sl, dbuf);
551 	/*
552 	 * Release the dbuf after retrieving needed fields.
553 	 */
554 	xfer_status = dbuf->db_xfer_status;
555 	data_size = dbuf->db_data_size;
556 	stmf_teardown_dbuf(task, dbuf);
557 	stmf_free(dbuf);
558 	/*
559 	 * Release the state lock if this is the last completion.
560 	 * If this is the last dbuf on task and all data has been
561 	 * transferred or an error encountered, then no more dbufs
562 	 * will be queued.
563 	 */
564 	scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
565 	    (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
566 	    (xfer_status != STMF_SUCCESS));
567 	if ((ATOMIC8_GET(scmd->nbufs) == 0) &&
568 	    (ATOMIC32_GET(scmd->len) == 0 || scmd_err)) {
569 		/* all DMU state has been released */
570 		rw_exit(&sl->sl_access_state_lock);
571 	}
572 
573 	/*
574 	 * If there have been no errors, either complete the task
575 	 * or issue more data xfer operations.
576 	 */
577 	if (!scmd_err) {
578 		/*
579 		 * This chunk completed successfully
580 		 */
581 		task->task_nbytes_transferred += data_size;
582 		if (ATOMIC8_GET(scmd->nbufs) == 0 &&
583 		    ATOMIC32_GET(scmd->len) == 0) {
584 			/*
585 			 * This command completed successfully
586 			 *
587 			 * Status was sent along with data, so no status
588 			 * completion will occur. Tell stmf we are done.
589 			 */
590 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
591 			sbd_ats_remove_by_task(task);
592 			stmf_task_lu_done(task);
593 			return;
594 		}
595 		/*
596 		 * Start more xfers
597 		 */
598 		sbd_do_sgl_read_xfer(task, scmd, 0);
599 		return;
600 	}
601 	/*
602 	 * Sort out the failure
603 	 */
604 	if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
605 		/*
606 		 * If a previous error occurred, leave the command active
607 		 * and wait for the last completion to send the status check.
608 		 */
609 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
610 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
611 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
612 				sbd_ats_remove_by_task(task);
613 				stmf_scsilib_send_status(task, STATUS_CHECK,
614 				    STMF_SAA_READ_ERROR);
615 			}
616 			return;
617 		}
618 		/*
619 		 * Must have been a failure on current dbuf
620 		 */
621 		ASSERT(xfer_status != STMF_SUCCESS);
622 
623 		/*
624 		 * Actually this is a bug. stmf abort should have reset the
625 		 * active flag but since its been there for some time.
626 		 * I wont change it.
627 		 */
628 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
629 		sbd_ats_remove_by_task(task);
630 		stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
631 	}
632 }
633 
634 void
sbd_handle_sgl_write_xfer_completion(struct scsi_task * task,sbd_cmd_t * scmd,struct stmf_data_buf * dbuf)635 sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
636     struct stmf_data_buf *dbuf)
637 {
638 	sbd_zvol_io_t *zvio = dbuf->db_lu_private;
639 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
640 	int ret;
641 	int scmd_err, scmd_xfer_done;
642 	stmf_status_t xfer_status = dbuf->db_xfer_status;
643 	uint32_t data_size = dbuf->db_data_size;
644 	hrtime_t xfer_start;
645 
646 	ASSERT(zvio);
647 
648 	/*
649 	 * Allow PP to free up resources before releasing the write bufs
650 	 * as writing to the backend could take some time.
651 	 */
652 	stmf_teardown_dbuf(task, dbuf);
653 
654 	atomic_dec_8(&scmd->nbufs);	/* account for this dbuf */
655 	/*
656 	 * All data was queued and this is the last completion,
657 	 * but there could still be an error.
658 	 */
659 	scmd_xfer_done = (ATOMIC32_GET(scmd->len) == 0 &&
660 	    (ATOMIC8_GET(scmd->nbufs) == 0));
661 	scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
662 	    (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
663 	    (xfer_status != STMF_SUCCESS));
664 
665 	xfer_start = gethrtime();
666 	DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
667 	    uint8_t *, NULL, uint64_t, data_size,
668 	    uint64_t, zvio->zvio_offset, scsi_task_t *, task);
669 
670 	if (scmd_err) {
671 		/* just return the write buffers */
672 		sbd_zvol_rele_write_bufs_abort(sl, dbuf);
673 		ret = 0;
674 	} else {
675 		if (scmd_xfer_done)
676 			zvio->zvio_flags = ZVIO_COMMIT;
677 		else
678 			zvio->zvio_flags = 0;
679 		/* write the data */
680 		ret = sbd_zvol_rele_write_bufs(sl, dbuf);
681 	}
682 
683 	stmf_lu_xfer_done(task, B_FALSE /* write */,
684 	    (gethrtime() - xfer_start));
685 	DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
686 	    uint8_t *, NULL, uint64_t, data_size,
687 	    uint64_t, zvio->zvio_offset, int, ret,  scsi_task_t *, task);
688 
689 	if (ret != 0) {
690 		/* update the error flag */
691 		scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
692 		scmd_err = 1;
693 	}
694 
695 	/* Release the dbuf */
696 	stmf_free(dbuf);
697 
698 	/*
699 	 * Release the state lock if this is the last completion.
700 	 * If this is the last dbuf on task and all data has been
701 	 * transferred or an error encountered, then no more dbufs
702 	 * will be queued.
703 	 */
704 	if ((ATOMIC8_GET(scmd->nbufs) == 0) &&
705 	    (ATOMIC32_GET(scmd->len) == 0 || scmd_err)) {
706 		/* all DMU state has been released */
707 		rw_exit(&sl->sl_access_state_lock);
708 	}
709 	/*
710 	 * If there have been no errors, either complete the task
711 	 * or issue more data xfer operations.
712 	 */
713 	if (!scmd_err) {
714 		/* This chunk completed successfully */
715 		task->task_nbytes_transferred += data_size;
716 		if (scmd_xfer_done) {
717 			/* This command completed successfully */
718 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
719 			sbd_ats_remove_by_task(task);
720 			if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
721 			    (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
722 				stmf_scsilib_send_status(task, STATUS_CHECK,
723 				    STMF_SAA_WRITE_ERROR);
724 			} else {
725 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
726 			}
727 			return;
728 		}
729 		/*
730 		 * Start more xfers
731 		 */
732 		sbd_do_sgl_write_xfer(task, scmd, 0);
733 		return;
734 	}
735 	/*
736 	 * Sort out the failure
737 	 */
738 	if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
739 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
740 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
741 				scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
742 				sbd_ats_remove_by_task(task);
743 				stmf_scsilib_send_status(task, STATUS_CHECK,
744 				    STMF_SAA_WRITE_ERROR);
745 			}
746 			/*
747 			 * Leave the command active until last dbuf completes.
748 			 */
749 			return;
750 		}
751 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
752 		sbd_ats_remove_by_task(task);
753 		ASSERT(xfer_status != STMF_SUCCESS);
754 		stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
755 	}
756 }
757 
758 /*
759  * Handle a copy operation using the zvol interface.
760  *
761  * Similar to the sbd_data_read/write path, except it goes directly through
762  * the zvol interfaces. It can pass a port provider sglist in the
763  * form of uio which is lost through the vn_rdwr path.
764  *
765  * Returns:
766  *	STMF_SUCCESS - request handled
767  *	STMF_FAILURE - request not handled, caller must deal with error
768  */
769 static stmf_status_t
sbd_copy_rdwr(scsi_task_t * task,uint64_t laddr,stmf_data_buf_t * dbuf,int cmd,int commit)770 sbd_copy_rdwr(scsi_task_t *task, uint64_t laddr, stmf_data_buf_t *dbuf,
771     int cmd, int commit)
772 {
773 	sbd_lu_t		*sl = task->task_lu->lu_provider_private;
774 	struct uio		uio;
775 	struct iovec		*iov, *tiov, iov1[8];
776 	uint32_t		len, resid;
777 	int			ret, i, iovcnt, flags;
778 	hrtime_t		xfer_start;
779 	boolean_t		is_read;
780 
781 	ASSERT(cmd == SBD_CMD_SCSI_READ || cmd == SBD_CMD_SCSI_WRITE);
782 
783 	is_read = (cmd == SBD_CMD_SCSI_READ) ? B_TRUE : B_FALSE;
784 	iovcnt = dbuf->db_sglist_length;
785 	/* use the stack for small iovecs */
786 	if (iovcnt > 8) {
787 		iov = kmem_alloc(iovcnt * sizeof (*iov), KM_SLEEP);
788 	} else {
789 		iov = &iov1[0];
790 	}
791 
792 	/* Convert dbuf sglist to iovec format */
793 	len = dbuf->db_data_size;
794 	resid = len;
795 	tiov = iov;
796 	for (i = 0; i < iovcnt; i++) {
797 		tiov->iov_base = (caddr_t)dbuf->db_sglist[i].seg_addr;
798 		tiov->iov_len = MIN(resid, dbuf->db_sglist[i].seg_length);
799 		resid -= tiov->iov_len;
800 		tiov++;
801 	}
802 	if (resid != 0) {
803 		cmn_err(CE_WARN, "inconsistant sglist rem %d", resid);
804 		if (iov != &iov1[0])
805 			kmem_free(iov, iovcnt * sizeof (*iov));
806 		return (STMF_FAILURE);
807 	}
808 	/* Setup the uio struct */
809 	uio.uio_iov = iov;
810 	uio.uio_iovcnt = iovcnt;
811 	uio.uio_loffset = laddr;
812 	uio.uio_segflg = (short)UIO_SYSSPACE;
813 	uio.uio_resid = (uint64_t)len;
814 	uio.uio_llimit = RLIM64_INFINITY;
815 
816 	xfer_start = gethrtime();
817 	if (is_read == B_TRUE) {
818 		uio.uio_fmode = FREAD;
819 		uio.uio_extflg = UIO_COPY_CACHED;
820 		DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
821 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
822 		    scsi_task_t *, task);
823 
824 		/* Fetch the data */
825 		ret = sbd_zvol_copy_read(sl, &uio);
826 
827 		DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
828 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
829 		    scsi_task_t *, task);
830 	} else {
831 		uio.uio_fmode = FWRITE;
832 		uio.uio_extflg = UIO_COPY_DEFAULT;
833 		DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
834 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
835 		    scsi_task_t *, task);
836 
837 		flags = (commit) ? ZVIO_COMMIT : 0;
838 		/* Write the data */
839 		ret = sbd_zvol_copy_write(sl, &uio, flags);
840 
841 		DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
842 		    uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
843 		    scsi_task_t *, task);
844 	}
845 	/* finalize accounting */
846 	stmf_lu_xfer_done(task, is_read, (gethrtime() - xfer_start));
847 
848 	if (iov != &iov1[0])
849 		kmem_free(iov, iovcnt * sizeof (*iov));
850 	if (ret != 0) {
851 		/* Backend I/O error */
852 		return (STMF_FAILURE);
853 	}
854 	return (STMF_SUCCESS);
855 }
856 
857 void
sbd_handle_read(struct scsi_task * task,struct stmf_data_buf * initial_dbuf)858 sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
859 {
860 	uint64_t lba, laddr;
861 	uint64_t blkcount;
862 	uint32_t len;
863 	uint8_t op = task->task_cdb[0];
864 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
865 	sbd_cmd_t *scmd;
866 	stmf_data_buf_t *dbuf;
867 	int fast_path;
868 	boolean_t fua_bit = B_FALSE;
869 
870 	/*
871 	 * Check to see if the command is READ(10), READ(12), or READ(16).
872 	 * If it is then check for bit 3 being set to indicate if Forced
873 	 * Unit Access is being requested. If so, we'll bypass the use of
874 	 * DMA buffers to simplify support of this feature.
875 	 */
876 	if (((op == SCMD_READ_G1) || (op == SCMD_READ_G4) ||
877 	    (op == SCMD_READ_G5)) &&
878 	    (task->task_cdb[1] & BIT_3)) {
879 		fua_bit = B_TRUE;
880 	}
881 	if (op == SCMD_READ) {
882 		lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
883 		len = (uint32_t)task->task_cdb[4];
884 
885 		if (len == 0) {
886 			len = 256;
887 		}
888 	} else if (op == SCMD_READ_G1) {
889 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
890 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
891 	} else if (op == SCMD_READ_G5) {
892 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
893 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
894 	} else if (op == SCMD_READ_G4) {
895 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
896 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
897 	} else {
898 		stmf_scsilib_send_status(task, STATUS_CHECK,
899 		    STMF_SAA_INVALID_OPCODE);
900 		return;
901 	}
902 
903 	laddr = lba << sl->sl_data_blocksize_shift;
904 	blkcount = len;
905 	len <<= sl->sl_data_blocksize_shift;
906 
907 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
908 		stmf_scsilib_send_status(task, STATUS_CHECK,
909 		    STMF_SAA_LBA_OUT_OF_RANGE);
910 		return;
911 	}
912 
913 	task->task_cmd_xfer_length = len;
914 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
915 		task->task_expected_xfer_length = len;
916 	}
917 
918 	if (len != task->task_expected_xfer_length) {
919 		fast_path = 0;
920 		len = (len > task->task_expected_xfer_length) ?
921 		    task->task_expected_xfer_length : len;
922 	} else {
923 		fast_path = 1;
924 	}
925 
926 	if (len == 0) {
927 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
928 		return;
929 	}
930 
931 	if (sbd_ats_handling_before_io(task, sl, lba, blkcount) !=
932 	    SBD_SUCCESS) {
933 		if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS) {
934 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
935 		}
936 		return;
937 	}
938 	/*
939 	 * Determine if this read can directly use DMU buffers.
940 	 */
941 	if (sbd_zcopy & (2|1) &&		/* Debug switch */
942 	    initial_dbuf == NULL &&		/* No PP buffer passed in */
943 	    sl->sl_flags & SL_CALL_ZVOL &&	/* zvol backing store */
944 	    (task->task_additional_flags &
945 	    TASK_AF_ACCEPT_LU_DBUF) &&		/* PP allows it */
946 	    !fua_bit) {
947 		/*
948 		 * Reduced copy path
949 		 */
950 		uint32_t copy_threshold, minsize;
951 		int ret;
952 
953 		/*
954 		 * The sl_access_state_lock will be held shared
955 		 * for the entire request and released when all
956 		 * dbufs have completed.
957 		 */
958 		rw_enter(&sl->sl_access_state_lock, RW_READER);
959 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
960 			rw_exit(&sl->sl_access_state_lock);
961 			sbd_ats_remove_by_task(task);
962 			stmf_scsilib_send_status(task, STATUS_CHECK,
963 			    STMF_SAA_READ_ERROR);
964 			return;
965 		}
966 
967 		/*
968 		 * Check if setup is more expensive than copying the data.
969 		 *
970 		 * Use the global over-ride sbd_zcopy_threshold if set.
971 		 */
972 		copy_threshold = (sbd_copy_threshold > 0) ?
973 		    sbd_copy_threshold : task->task_copy_threshold;
974 		minsize = len;
975 		if (len < copy_threshold &&
976 		    (dbuf = stmf_alloc_dbuf(task, len, &minsize, 0)) != 0) {
977 
978 			ret = sbd_copy_rdwr(task, laddr, dbuf,
979 			    SBD_CMD_SCSI_READ, 0);
980 			/* done with the backend */
981 			rw_exit(&sl->sl_access_state_lock);
982 			sbd_ats_remove_by_task(task);
983 			if (ret != 0) {
984 				/* backend error */
985 				stmf_scsilib_send_status(task, STATUS_CHECK,
986 				    STMF_SAA_READ_ERROR);
987 			} else {
988 				/* send along good data */
989 				dbuf->db_relative_offset = 0;
990 				dbuf->db_data_size = len;
991 				dbuf->db_flags = DB_SEND_STATUS_GOOD |
992 				    DB_DIRECTION_TO_RPORT;
993 				/* XXX keep for FW? */
994 				DTRACE_PROBE4(sbd__xfer,
995 				    struct scsi_task *, task,
996 				    struct stmf_data_buf *, dbuf,
997 				    uint64_t, laddr, uint32_t, len);
998 				(void) stmf_xfer_data(task, dbuf,
999 				    STMF_IOF_LU_DONE);
1000 			}
1001 			return;
1002 		}
1003 
1004 		/* committed to reduced copy */
1005 		if (task->task_lu_private) {
1006 			scmd = (sbd_cmd_t *)task->task_lu_private;
1007 		} else {
1008 			scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
1009 			    KM_SLEEP);
1010 			task->task_lu_private = scmd;
1011 		}
1012 		/*
1013 		 * Setup scmd to track read progress.
1014 		 */
1015 		scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED;
1016 		scmd->cmd_type = SBD_CMD_SCSI_READ;
1017 		scmd->nbufs = 0;
1018 		scmd->addr = laddr;
1019 		scmd->len = len;
1020 		scmd->current_ro = 0;
1021 		/*
1022 		 * Kick-off the read.
1023 		 */
1024 		sbd_do_sgl_read_xfer(task, scmd, 1);
1025 		return;
1026 	}
1027 
1028 	if (initial_dbuf == NULL) {
1029 		uint32_t maxsize, minsize, old_minsize;
1030 
1031 		maxsize = (len > (128*1024)) ? 128*1024 : len;
1032 		minsize = maxsize >> 2;
1033 		do {
1034 			old_minsize = minsize;
1035 			initial_dbuf = stmf_alloc_dbuf(task, maxsize,
1036 			    &minsize, 0);
1037 		} while ((initial_dbuf == NULL) && (old_minsize > minsize) &&
1038 		    (minsize >= 512));
1039 		if (initial_dbuf == NULL) {
1040 			sbd_ats_remove_by_task(task);
1041 			stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1042 			return;
1043 		}
1044 	}
1045 	dbuf = initial_dbuf;
1046 
1047 	if ((dbuf->db_buf_size >= len) && fast_path &&
1048 	    (dbuf->db_sglist_length == 1)) {
1049 		if (sbd_data_read(sl, task, laddr, (uint64_t)len,
1050 		    dbuf->db_sglist[0].seg_addr) == STMF_SUCCESS) {
1051 			dbuf->db_relative_offset = 0;
1052 			dbuf->db_data_size = len;
1053 			dbuf->db_flags = DB_SEND_STATUS_GOOD |
1054 			    DB_DIRECTION_TO_RPORT;
1055 			/* XXX keep for FW? */
1056 			DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
1057 			    struct stmf_data_buf *, dbuf,
1058 			    uint64_t, laddr, uint32_t, len);
1059 			(void) stmf_xfer_data(task, dbuf, STMF_IOF_LU_DONE);
1060 		} else {
1061 			stmf_scsilib_send_status(task, STATUS_CHECK,
1062 			    STMF_SAA_READ_ERROR);
1063 		}
1064 		sbd_ats_remove_by_task(task);
1065 		return;
1066 	}
1067 
1068 	if (task->task_lu_private) {
1069 		scmd = (sbd_cmd_t *)task->task_lu_private;
1070 	} else {
1071 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1072 		task->task_lu_private = scmd;
1073 	}
1074 	scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED;
1075 	scmd->cmd_type = SBD_CMD_SCSI_READ;
1076 	scmd->nbufs = 1;
1077 	scmd->addr = laddr;
1078 	scmd->len = len;
1079 	scmd->current_ro = 0;
1080 
1081 	sbd_do_read_xfer(task, scmd, dbuf);
1082 }
1083 
1084 void
sbd_do_write_xfer(struct scsi_task * task,sbd_cmd_t * scmd,struct stmf_data_buf * dbuf,uint8_t dbuf_reusable)1085 sbd_do_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
1086     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
1087 {
1088 	uint32_t len;
1089 	int bufs_to_take;
1090 
1091 	if (ATOMIC32_GET(scmd->len) == 0) {
1092 		goto DO_WRITE_XFER_DONE;
1093 	}
1094 
1095 	/* Lets try not to hog all the buffers the port has. */
1096 	bufs_to_take = ((task->task_max_nbufs > 2) &&
1097 	    (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
1098 	    task->task_max_nbufs;
1099 
1100 	if ((dbuf != NULL) &&
1101 	    ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
1102 		/* free current dbuf and allocate a new one */
1103 		stmf_free_dbuf(task, dbuf);
1104 		dbuf = NULL;
1105 	}
1106 	if (ATOMIC8_GET(scmd->nbufs) >= bufs_to_take) {
1107 		goto DO_WRITE_XFER_DONE;
1108 	}
1109 	if (dbuf == NULL) {
1110 		uint32_t maxsize, minsize, old_minsize;
1111 
1112 		maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128*1024 :
1113 		    ATOMIC32_GET(scmd->len);
1114 		minsize = maxsize >> 2;
1115 		do {
1116 			old_minsize = minsize;
1117 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
1118 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
1119 		    (minsize >= 512));
1120 		if (dbuf == NULL) {
1121 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
1122 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1123 				    STMF_ALLOC_FAILURE, NULL);
1124 			}
1125 			return;
1126 		}
1127 	}
1128 
1129 	len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ? dbuf->db_buf_size :
1130 	    ATOMIC32_GET(scmd->len);
1131 
1132 	dbuf->db_relative_offset = scmd->current_ro;
1133 	dbuf->db_data_size = len;
1134 	dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
1135 	(void) stmf_xfer_data(task, dbuf, 0);
1136 	/* outstanding port xfers and bufs used */
1137 	atomic_inc_8(&scmd->nbufs);
1138 	atomic_add_32(&scmd->len, -len);
1139 	scmd->current_ro += len;
1140 
1141 	if ((ATOMIC32_GET(scmd->len) != 0) &&
1142 	    (ATOMIC8_GET(scmd->nbufs) < bufs_to_take)) {
1143 		sbd_do_write_xfer(task, scmd, NULL, 0);
1144 	}
1145 	return;
1146 
1147 DO_WRITE_XFER_DONE:
1148 	if (dbuf != NULL) {
1149 		stmf_free_dbuf(task, dbuf);
1150 	}
1151 }
1152 
1153 void
sbd_do_sgl_write_xfer(struct scsi_task * task,sbd_cmd_t * scmd,int first_xfer)1154 sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
1155 {
1156 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1157 	sbd_zvol_io_t *zvio;
1158 	int ret;
1159 	uint32_t xfer_len, max_len, first_len;
1160 	stmf_status_t xstat;
1161 	stmf_data_buf_t *dbuf;
1162 	uint_t nblks;
1163 	uint64_t blksize = sl->sl_blksize;
1164 	uint64_t offset;
1165 	size_t db_private_sz;
1166 	uintptr_t pad;
1167 
1168 	ASSERT(rw_read_held(&sl->sl_access_state_lock));
1169 	ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
1170 
1171 	/*
1172 	 * Calculate the limits on xfer_len to the minimum of :
1173 	 *    - task limit
1174 	 *    - lun limit
1175 	 *    - sbd global limit if set
1176 	 *    - first xfer limit if set
1177 	 *
1178 	 * First, protect against silly over-ride value
1179 	 */
1180 	if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
1181 		cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
1182 		    sbd_max_xfer_len);
1183 		sbd_max_xfer_len = 0;
1184 	}
1185 	if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
1186 		cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
1187 		    sbd_1st_xfer_len);
1188 		sbd_1st_xfer_len = 0;
1189 	}
1190 
1191 	max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
1192 	if (sbd_max_xfer_len)
1193 		max_len = MIN(max_len, sbd_max_xfer_len);
1194 	/*
1195 	 * Special case the first xfer if hints are set.
1196 	 */
1197 	if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
1198 		/* global over-ride has precedence */
1199 		if (sbd_1st_xfer_len)
1200 			first_len = sbd_1st_xfer_len;
1201 		else
1202 			first_len = task->task_1st_xfer_len;
1203 	} else {
1204 		first_len = 0;
1205 	}
1206 
1207 
1208 	while (ATOMIC32_GET(scmd->len) &&
1209 	    ATOMIC8_GET(scmd->nbufs) < task->task_max_nbufs) {
1210 		xfer_len = MIN(max_len, ATOMIC32_GET(scmd->len));
1211 		if (first_len) {
1212 			xfer_len = MIN(xfer_len, first_len);
1213 			first_len = 0;
1214 		}
1215 		if (xfer_len < ATOMIC32_GET(scmd->len)) {
1216 			/*
1217 			 * Attempt to end xfer on a block boundary.
1218 			 * The only way this does not happen is if the
1219 			 * xfer_len is small enough to stay contained
1220 			 * within the same block.
1221 			 */
1222 			uint64_t xfer_offset, xfer_aligned_end;
1223 
1224 			xfer_offset = scmd->addr + scmd->current_ro;
1225 			xfer_aligned_end =
1226 			    P2ALIGN(xfer_offset+xfer_len, blksize);
1227 			if (xfer_aligned_end > xfer_offset)
1228 				xfer_len = xfer_aligned_end - xfer_offset;
1229 		}
1230 		/*
1231 		 * Allocate object to track the write and reserve
1232 		 * enough space for scatter/gather list.
1233 		 */
1234 		offset = scmd->addr + scmd->current_ro;
1235 		nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
1236 		db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
1237 		    (nblks * sizeof (stmf_sglist_ent_t));
1238 		dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
1239 		    AF_DONTZERO);
1240 
1241 		/*
1242 		 * Setup the dbuf
1243 		 *
1244 		 * XXX Framework does not handle variable length sglists
1245 		 * properly, so setup db_lu_private and db_port_private
1246 		 * fields here. db_stmf_private is properly set for
1247 		 * calls to stmf_free.
1248 		 */
1249 		if (dbuf->db_port_private == NULL) {
1250 			/*
1251 			 * XXX Framework assigns space to PP after db_sglist[0]
1252 			 */
1253 			cmn_err(CE_PANIC, "db_port_private == NULL");
1254 		}
1255 		pad = (uintptr_t)&dbuf->db_sglist[nblks];
1256 		dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
1257 		dbuf->db_port_private = NULL;
1258 		dbuf->db_buf_size = xfer_len;
1259 		dbuf->db_data_size = xfer_len;
1260 		dbuf->db_relative_offset = scmd->current_ro;
1261 		dbuf->db_sglist_length = (uint16_t)nblks;
1262 		dbuf->db_xfer_status = 0;
1263 		dbuf->db_handle = 0;
1264 		dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
1265 		    DB_DIRECTION_FROM_RPORT | DB_LU_DATA_BUF);
1266 
1267 		zvio = dbuf->db_lu_private;
1268 		zvio->zvio_offset = offset;
1269 
1270 		/* get the buffers */
1271 		ret = sbd_zvol_alloc_write_bufs(sl, dbuf);
1272 		if (ret != 0) {
1273 			/*
1274 			 * Could not allocate buffers from the backend;
1275 			 * treat it like an IO error.
1276 			 */
1277 			stmf_free(dbuf);
1278 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1279 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
1280 				/*
1281 				 * Nothing queued, so no completions coming
1282 				 */
1283 				sbd_ats_remove_by_task(task);
1284 				stmf_scsilib_send_status(task, STATUS_CHECK,
1285 				    STMF_SAA_WRITE_ERROR);
1286 				rw_exit(&sl->sl_access_state_lock);
1287 			}
1288 			/*
1289 			 * Completions of previous buffers will cleanup.
1290 			 */
1291 			return;
1292 		}
1293 
1294 		/*
1295 		 * Allow PP to do setup
1296 		 */
1297 		xstat = stmf_setup_dbuf(task, dbuf, 0);
1298 		if (xstat != STMF_SUCCESS) {
1299 			/*
1300 			 * This could happen if the driver cannot get the
1301 			 * DDI resources it needs for this request.
1302 			 * If other dbufs are queued, try again when the next
1303 			 * one completes, otherwise give up.
1304 			 */
1305 			sbd_zvol_rele_write_bufs_abort(sl, dbuf);
1306 			stmf_free(dbuf);
1307 			if (ATOMIC8_GET(scmd->nbufs) > 0) {
1308 				/* completion of previous dbuf will retry */
1309 				return;
1310 			}
1311 			/*
1312 			 * Done with this command.
1313 			 */
1314 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1315 			sbd_ats_remove_by_task(task);
1316 			if (first_xfer)
1317 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1318 			else
1319 				stmf_scsilib_send_status(task, STATUS_CHECK,
1320 				    STMF_SAA_WRITE_ERROR);
1321 			rw_exit(&sl->sl_access_state_lock);
1322 			return;
1323 		}
1324 
1325 		/*
1326 		 * dbuf is now queued on task
1327 		 */
1328 		atomic_inc_8(&scmd->nbufs);
1329 
1330 		xstat = stmf_xfer_data(task, dbuf, 0);
1331 		switch (xstat) {
1332 		case STMF_SUCCESS:
1333 			break;
1334 		case STMF_BUSY:
1335 			/*
1336 			 * The dbuf is queued on the task, but unknown
1337 			 * to the PP, thus no completion will occur.
1338 			 */
1339 			sbd_zvol_rele_write_bufs_abort(sl, dbuf);
1340 			stmf_teardown_dbuf(task, dbuf);
1341 			stmf_free(dbuf);
1342 			atomic_dec_8(&scmd->nbufs);
1343 			if (ATOMIC8_GET(scmd->nbufs) > 0) {
1344 				/* completion of previous dbuf will retry */
1345 				return;
1346 			}
1347 			/*
1348 			 * Done with this command.
1349 			 */
1350 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1351 			sbd_ats_remove_by_task(task);
1352 			if (first_xfer)
1353 				stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1354 			else
1355 				stmf_scsilib_send_status(task, STATUS_CHECK,
1356 				    STMF_SAA_WRITE_ERROR);
1357 			rw_exit(&sl->sl_access_state_lock);
1358 			return;
1359 		case STMF_ABORTED:
1360 			/*
1361 			 * Completion code will cleanup.
1362 			 */
1363 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1364 			return;
1365 		}
1366 		/*
1367 		 * Update the xfer progress.
1368 		 */
1369 		atomic_add_32(&scmd->len, -xfer_len);
1370 		scmd->current_ro += xfer_len;
1371 	}
1372 }
1373 
1374 void
sbd_handle_write_xfer_completion(struct scsi_task * task,sbd_cmd_t * scmd,struct stmf_data_buf * dbuf,uint8_t dbuf_reusable)1375 sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
1376     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
1377 {
1378 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1379 	uint64_t laddr;
1380 	uint32_t buflen, iolen;
1381 	int ndx;
1382 	uint8_t op = task->task_cdb[0];
1383 	boolean_t fua_bit = B_FALSE;
1384 
1385 	if (ATOMIC8_GET(scmd->nbufs) > 0) {
1386 		/*
1387 		 * Decrement the count to indicate the port xfer
1388 		 * into the dbuf has completed even though the buf is
1389 		 * still in use here in the LU provider.
1390 		 */
1391 		atomic_dec_8(&scmd->nbufs);
1392 	}
1393 
1394 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
1395 		sbd_ats_remove_by_task(task);
1396 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1397 		    dbuf->db_xfer_status, NULL);
1398 		return;
1399 	}
1400 
1401 	if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1402 		goto WRITE_XFER_DONE;
1403 	}
1404 
1405 	if (ATOMIC32_GET(scmd->len) != 0) {
1406 		/*
1407 		 * Initiate the next port xfer to occur in parallel
1408 		 * with writing this buf.
1409 		 */
1410 		sbd_do_write_xfer(task, scmd, NULL, 0);
1411 	}
1412 
1413 	/*
1414 	 * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
1415 	 * If it is then check for bit 3 being set to indicate if Forced
1416 	 * Unit Access is being requested. If so, we'll bypass the direct
1417 	 * call and handle it in sbd_data_write().
1418 	 */
1419 	if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
1420 	    (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
1421 		fua_bit = B_TRUE;
1422 	}
1423 	laddr = scmd->addr + dbuf->db_relative_offset;
1424 
1425 	/*
1426 	 * If this is going to a zvol, use the direct call to
1427 	 * sbd_zvol_copy_{read,write}. The direct call interface is
1428 	 * restricted to PPs that accept sglists, but that is not required.
1429 	 */
1430 	if (sl->sl_flags & SL_CALL_ZVOL &&
1431 	    (task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF) &&
1432 	    (sbd_zcopy & (4|1)) && !fua_bit) {
1433 		int commit;
1434 
1435 		commit = (ATOMIC32_GET(scmd->len) == 0 &&
1436 		    ATOMIC8_GET(scmd->nbufs) == 0);
1437 		rw_enter(&sl->sl_access_state_lock, RW_READER);
1438 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0 ||
1439 		    sbd_copy_rdwr(task, laddr, dbuf, SBD_CMD_SCSI_WRITE,
1440 		    commit) != STMF_SUCCESS)
1441 			scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1442 		rw_exit(&sl->sl_access_state_lock);
1443 		buflen = dbuf->db_data_size;
1444 	} else {
1445 		for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
1446 		    (ndx < dbuf->db_sglist_length); ndx++) {
1447 			iolen = min(dbuf->db_data_size - buflen,
1448 			    dbuf->db_sglist[ndx].seg_length);
1449 			if (iolen == 0)
1450 				break;
1451 			if (sbd_data_write(sl, task, laddr, (uint64_t)iolen,
1452 			    dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
1453 				scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1454 				break;
1455 			}
1456 			buflen += iolen;
1457 			laddr += (uint64_t)iolen;
1458 		}
1459 	}
1460 	task->task_nbytes_transferred += buflen;
1461 WRITE_XFER_DONE:
1462 	if (ATOMIC32_GET(scmd->len) == 0 ||
1463 	    scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1464 		stmf_free_dbuf(task, dbuf);
1465 		if (ATOMIC8_GET(scmd->nbufs))
1466 			return;	/* wait for all buffers to complete */
1467 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1468 		sbd_ats_remove_by_task(task);
1469 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1470 			stmf_scsilib_send_status(task, STATUS_CHECK,
1471 			    STMF_SAA_WRITE_ERROR);
1472 		} else {
1473 			/*
1474 			 * If SYNC_WRITE flag is on then we need to flush
1475 			 * cache before sending status.
1476 			 * Note: this may be a no-op because of how
1477 			 * SL_WRITEBACK_CACHE_DISABLE and
1478 			 * SL_FLUSH_ON_DISABLED_WRITECACHE are set, but not
1479 			 * worth code complexity of checking those in this code
1480 			 * path, SBD_SCSI_CMD_SYNC_WRITE is rarely set.
1481 			 */
1482 			if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
1483 			    (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
1484 				stmf_scsilib_send_status(task, STATUS_CHECK,
1485 				    STMF_SAA_WRITE_ERROR);
1486 			} else {
1487 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1488 			}
1489 		}
1490 		return;
1491 	}
1492 	sbd_do_write_xfer(task, scmd, dbuf, dbuf_reusable);
1493 }
1494 
1495 /*
1496  * Return true if copy avoidance is beneficial.
1497  */
1498 static int
sbd_zcopy_write_useful(scsi_task_t * task,uint64_t laddr,uint32_t len,uint64_t blksize)1499 sbd_zcopy_write_useful(scsi_task_t *task, uint64_t laddr, uint32_t len,
1500     uint64_t blksize)
1501 {
1502 	/*
1503 	 * If there is a global copy threshold over-ride, use it.
1504 	 * Otherwise use the PP value with the caveat that at least
1505 	 * 1/2 the data must avoid being copied to be useful.
1506 	 */
1507 	if (sbd_copy_threshold > 0) {
1508 		return (len >= sbd_copy_threshold);
1509 	} else {
1510 		uint64_t no_copy_span;
1511 
1512 		/* sub-blocksize writes always copy */
1513 		if (len < task->task_copy_threshold || len < blksize)
1514 			return (0);
1515 		/*
1516 		 * Calculate amount of data that will avoid the copy path.
1517 		 * The calculation is only valid if len >= blksize.
1518 		 */
1519 		no_copy_span = P2ALIGN(laddr+len, blksize) -
1520 		    P2ROUNDUP(laddr, blksize);
1521 		return (no_copy_span >= len/2);
1522 	}
1523 }
1524 
1525 void
sbd_handle_write(struct scsi_task * task,struct stmf_data_buf * initial_dbuf)1526 sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
1527 {
1528 	uint64_t lba, laddr;
1529 	uint32_t len;
1530 	uint8_t op = task->task_cdb[0], do_immediate_data = 0;
1531 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1532 	sbd_cmd_t *scmd;
1533 	stmf_data_buf_t *dbuf;
1534 	uint64_t blkcount;
1535 	uint8_t	sync_wr_flag = 0;
1536 	boolean_t fua_bit = B_FALSE;
1537 
1538 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
1539 		stmf_scsilib_send_status(task, STATUS_CHECK,
1540 		    STMF_SAA_WRITE_PROTECTED);
1541 		return;
1542 	}
1543 	/*
1544 	 * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
1545 	 * If it is then check for bit 3 being set to indicate if Forced
1546 	 * Unit Access is being requested. If so, we'll bypass the fast path
1547 	 * code to simplify support of this feature.
1548 	 */
1549 	if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
1550 	    (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
1551 		fua_bit = B_TRUE;
1552 	}
1553 	if (op == SCMD_WRITE) {
1554 		lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
1555 		len = (uint32_t)task->task_cdb[4];
1556 
1557 		if (len == 0) {
1558 			len = 256;
1559 		}
1560 	} else if (op == SCMD_WRITE_G1) {
1561 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1562 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1563 	} else if (op == SCMD_WRITE_G5) {
1564 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1565 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
1566 	} else if (op == SCMD_WRITE_G4) {
1567 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
1568 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1569 	} else if (op == SCMD_WRITE_VERIFY) {
1570 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1571 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1572 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1573 	} else if (op == SCMD_WRITE_VERIFY_G5) {
1574 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1575 		len = READ_SCSI32(&task->task_cdb[6], uint32_t);
1576 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1577 	} else if (op == SCMD_WRITE_VERIFY_G4) {
1578 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
1579 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1580 		sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1581 	} else {
1582 		stmf_scsilib_send_status(task, STATUS_CHECK,
1583 		    STMF_SAA_INVALID_OPCODE);
1584 		return;
1585 	}
1586 
1587 	laddr = lba << sl->sl_data_blocksize_shift;
1588 	blkcount = len;
1589 	len <<= sl->sl_data_blocksize_shift;
1590 
1591 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
1592 		stmf_scsilib_send_status(task, STATUS_CHECK,
1593 		    STMF_SAA_LBA_OUT_OF_RANGE);
1594 		return;
1595 	}
1596 
1597 	task->task_cmd_xfer_length = len;
1598 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1599 		task->task_expected_xfer_length = len;
1600 	}
1601 
1602 	len = (len > task->task_expected_xfer_length) ?
1603 	    task->task_expected_xfer_length : len;
1604 
1605 	if (len == 0) {
1606 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1607 		return;
1608 	}
1609 
1610 	if (sbd_ats_handling_before_io(task, sl, lba, blkcount) !=
1611 	    SBD_SUCCESS) {
1612 		if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS) {
1613 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
1614 		}
1615 		return;
1616 	}
1617 
1618 	if (sbd_zcopy & (4|1) &&		/* Debug switch */
1619 	    initial_dbuf == NULL &&		/* No PP buf passed in */
1620 	    sl->sl_flags & SL_CALL_ZVOL &&	/* zvol backing store */
1621 	    (task->task_additional_flags &
1622 	    TASK_AF_ACCEPT_LU_DBUF) &&		/* PP allows it */
1623 	    sbd_zcopy_write_useful(task, laddr, len, sl->sl_blksize) &&
1624 	    !fua_bit) {
1625 
1626 		/*
1627 		 * XXX Note that disallowing initial_dbuf will eliminate
1628 		 * iSCSI from participating. For small writes, that is
1629 		 * probably ok. For large writes, it may be best to just
1630 		 * copy the data from the initial dbuf and use zcopy for
1631 		 * the rest.
1632 		 */
1633 		rw_enter(&sl->sl_access_state_lock, RW_READER);
1634 		if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
1635 			rw_exit(&sl->sl_access_state_lock);
1636 			sbd_ats_remove_by_task(task);
1637 			stmf_scsilib_send_status(task, STATUS_CHECK,
1638 			    STMF_SAA_READ_ERROR);
1639 			return;
1640 		}
1641 		/*
1642 		 * Setup scmd to track the write progress.
1643 		 */
1644 		if (task->task_lu_private) {
1645 			scmd = (sbd_cmd_t *)task->task_lu_private;
1646 		} else {
1647 			scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
1648 			    KM_SLEEP);
1649 			task->task_lu_private = scmd;
1650 		}
1651 		scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED |
1652 		    sync_wr_flag;
1653 		scmd->cmd_type = SBD_CMD_SCSI_WRITE;
1654 		scmd->nbufs = 0;
1655 		scmd->addr = laddr;
1656 		scmd->len = len;
1657 		scmd->current_ro = 0;
1658 		sbd_do_sgl_write_xfer(task, scmd, 1);
1659 		return;
1660 	}
1661 
1662 	if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
1663 		if (initial_dbuf->db_data_size > len) {
1664 			if (initial_dbuf->db_data_size >
1665 			    task->task_expected_xfer_length) {
1666 				/* protocol error */
1667 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1668 				    STMF_INVALID_ARG, NULL);
1669 				return;
1670 			}
1671 			initial_dbuf->db_data_size = len;
1672 		}
1673 		do_immediate_data = 1;
1674 	}
1675 	dbuf = initial_dbuf;
1676 
1677 	if (task->task_lu_private) {
1678 		scmd = (sbd_cmd_t *)task->task_lu_private;
1679 	} else {
1680 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1681 		task->task_lu_private = scmd;
1682 	}
1683 	scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED |
1684 	    sync_wr_flag;
1685 	scmd->cmd_type = SBD_CMD_SCSI_WRITE;
1686 	scmd->nbufs = 0;
1687 	scmd->addr = laddr;
1688 	scmd->len = len;
1689 	scmd->current_ro = 0;
1690 
1691 	if (do_immediate_data) {
1692 		/*
1693 		 * Account for data passed in this write command
1694 		 */
1695 		(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
1696 		atomic_add_32(&scmd->len, -dbuf->db_data_size);
1697 		scmd->current_ro += dbuf->db_data_size;
1698 		dbuf->db_xfer_status = STMF_SUCCESS;
1699 		sbd_handle_write_xfer_completion(task, scmd, dbuf, 0);
1700 	} else {
1701 		sbd_do_write_xfer(task, scmd, dbuf, 0);
1702 	}
1703 }
1704 
1705 /*
1706  * Utility routine to handle small non performance data transfers to the
1707  * initiators. dbuf is an initial data buf (if any), 'p' points to a data
1708  * buffer which is source of data for transfer, cdb_xfer_size is the
1709  * transfer size based on CDB, cmd_xfer_size is the actual amount of data
1710  * which this command would transfer (the size of data pointed to by 'p').
1711  */
1712 void
sbd_handle_short_read_transfers(scsi_task_t * task,stmf_data_buf_t * dbuf,uint8_t * p,uint32_t cdb_xfer_size,uint32_t cmd_xfer_size)1713 sbd_handle_short_read_transfers(scsi_task_t *task, stmf_data_buf_t *dbuf,
1714     uint8_t *p, uint32_t cdb_xfer_size, uint32_t cmd_xfer_size)
1715 {
1716 	uint32_t bufsize, ndx;
1717 	sbd_cmd_t *scmd;
1718 
1719 	cmd_xfer_size = min(cmd_xfer_size, cdb_xfer_size);
1720 
1721 	task->task_cmd_xfer_length = cmd_xfer_size;
1722 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1723 		task->task_expected_xfer_length = cmd_xfer_size;
1724 	} else {
1725 		cmd_xfer_size = min(cmd_xfer_size,
1726 		    task->task_expected_xfer_length);
1727 	}
1728 
1729 	if (cmd_xfer_size == 0) {
1730 		stmf_scsilib_send_status(task, STATUS_CHECK,
1731 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1732 		return;
1733 	}
1734 	if (dbuf == NULL) {
1735 		uint32_t minsize = cmd_xfer_size;
1736 
1737 		dbuf = stmf_alloc_dbuf(task, cmd_xfer_size, &minsize, 0);
1738 	}
1739 	if (dbuf == NULL) {
1740 		stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1741 		return;
1742 	}
1743 
1744 	for (bufsize = 0, ndx = 0; bufsize < cmd_xfer_size; ndx++) {
1745 		uint8_t *d;
1746 		uint32_t s;
1747 
1748 		d = dbuf->db_sglist[ndx].seg_addr;
1749 		s = min((cmd_xfer_size - bufsize),
1750 		    dbuf->db_sglist[ndx].seg_length);
1751 		bcopy(p+bufsize, d, s);
1752 		bufsize += s;
1753 	}
1754 	dbuf->db_relative_offset = 0;
1755 	dbuf->db_data_size = cmd_xfer_size;
1756 	dbuf->db_flags = DB_DIRECTION_TO_RPORT;
1757 
1758 	if (task->task_lu_private == NULL) {
1759 		task->task_lu_private =
1760 		    kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1761 	}
1762 	scmd = (sbd_cmd_t *)task->task_lu_private;
1763 
1764 	scmd->cmd_type = SBD_CMD_SMALL_READ;
1765 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1766 	(void) stmf_xfer_data(task, dbuf, 0);
1767 }
1768 
1769 void
sbd_handle_short_read_xfer_completion(struct scsi_task * task,sbd_cmd_t * scmd,struct stmf_data_buf * dbuf)1770 sbd_handle_short_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
1771     struct stmf_data_buf *dbuf)
1772 {
1773 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
1774 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1775 		    dbuf->db_xfer_status, NULL);
1776 		return;
1777 	}
1778 	task->task_nbytes_transferred = dbuf->db_data_size;
1779 	scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1780 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1781 }
1782 
1783 void
sbd_handle_short_write_transfers(scsi_task_t * task,stmf_data_buf_t * dbuf,uint32_t cdb_xfer_size)1784 sbd_handle_short_write_transfers(scsi_task_t *task,
1785     stmf_data_buf_t *dbuf, uint32_t cdb_xfer_size)
1786 {
1787 	sbd_cmd_t *scmd;
1788 
1789 	task->task_cmd_xfer_length = cdb_xfer_size;
1790 	if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1791 		task->task_expected_xfer_length = cdb_xfer_size;
1792 	} else {
1793 		cdb_xfer_size = min(cdb_xfer_size,
1794 		    task->task_expected_xfer_length);
1795 	}
1796 
1797 	if (cdb_xfer_size == 0) {
1798 		stmf_scsilib_send_status(task, STATUS_CHECK,
1799 		    STMF_SAA_INVALID_FIELD_IN_CDB);
1800 		return;
1801 	}
1802 	if (task->task_lu_private == NULL) {
1803 		task->task_lu_private = kmem_zalloc(sizeof (sbd_cmd_t),
1804 		    KM_SLEEP);
1805 	} else {
1806 		bzero(task->task_lu_private, sizeof (sbd_cmd_t));
1807 	}
1808 	scmd = (sbd_cmd_t *)task->task_lu_private;
1809 	scmd->cmd_type = SBD_CMD_SMALL_WRITE;
1810 	scmd->flags = SBD_SCSI_CMD_ACTIVE;
1811 	scmd->len = cdb_xfer_size;
1812 	if (dbuf == NULL) {
1813 		uint32_t minsize = cdb_xfer_size;
1814 
1815 		dbuf = stmf_alloc_dbuf(task, cdb_xfer_size, &minsize, 0);
1816 		if (dbuf == NULL) {
1817 			stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1818 			    STMF_ALLOC_FAILURE, NULL);
1819 			return;
1820 		}
1821 		dbuf->db_data_size = cdb_xfer_size;
1822 		dbuf->db_relative_offset = 0;
1823 		dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
1824 		(void) stmf_xfer_data(task, dbuf, 0);
1825 	} else {
1826 		if (dbuf->db_data_size < cdb_xfer_size) {
1827 			stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1828 			    STMF_ABORTED, NULL);
1829 			return;
1830 		}
1831 		dbuf->db_data_size = cdb_xfer_size;
1832 		sbd_handle_short_write_xfer_completion(task, dbuf);
1833 	}
1834 }
1835 
1836 void
sbd_handle_short_write_xfer_completion(scsi_task_t * task,stmf_data_buf_t * dbuf)1837 sbd_handle_short_write_xfer_completion(scsi_task_t *task,
1838     stmf_data_buf_t *dbuf)
1839 {
1840 	sbd_cmd_t *scmd;
1841 	stmf_status_t st_ret;
1842 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1843 
1844 	/*
1845 	 * For now lets assume we will get only one sglist element
1846 	 * for short writes. If that ever changes, we should allocate
1847 	 * a local buffer and copy all the sg elements to one linear space.
1848 	 */
1849 	if ((dbuf->db_xfer_status != STMF_SUCCESS) ||
1850 	    (dbuf->db_sglist_length > 1)) {
1851 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1852 		    dbuf->db_xfer_status, NULL);
1853 		return;
1854 	}
1855 
1856 	task->task_nbytes_transferred = dbuf->db_data_size;
1857 	scmd = (sbd_cmd_t *)task->task_lu_private;
1858 	scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1859 
1860 	/* Lets find out who to call */
1861 	switch (task->task_cdb[0]) {
1862 	case SCMD_MODE_SELECT:
1863 	case SCMD_MODE_SELECT_G1:
1864 		if (sl->sl_access_state == SBD_LU_STANDBY) {
1865 			st_ret = stmf_proxy_scsi_cmd(task, dbuf);
1866 			if (st_ret != STMF_SUCCESS) {
1867 				stmf_scsilib_send_status(task, STATUS_CHECK,
1868 				    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
1869 			}
1870 		} else {
1871 			sbd_handle_mode_select_xfer(task,
1872 			    dbuf->db_sglist[0].seg_addr, dbuf->db_data_size);
1873 		}
1874 		break;
1875 	case SCMD_UNMAP:
1876 		sbd_handle_unmap_xfer(task,
1877 		    dbuf->db_sglist[0].seg_addr, dbuf->db_data_size);
1878 		break;
1879 	case SCMD_EXTENDED_COPY:
1880 		sbd_handle_xcopy_xfer(task, dbuf->db_sglist[0].seg_addr);
1881 		break;
1882 	case SCMD_PERSISTENT_RESERVE_OUT:
1883 		if (sl->sl_access_state == SBD_LU_STANDBY) {
1884 			st_ret = stmf_proxy_scsi_cmd(task, dbuf);
1885 			if (st_ret != STMF_SUCCESS) {
1886 				stmf_scsilib_send_status(task, STATUS_CHECK,
1887 				    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
1888 			}
1889 		} else {
1890 			sbd_handle_pgr_out_data(task, dbuf);
1891 		}
1892 		break;
1893 	default:
1894 		/* This should never happen */
1895 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1896 		    STMF_ABORTED, NULL);
1897 	}
1898 }
1899 
1900 void
sbd_handle_read_capacity(struct scsi_task * task,struct stmf_data_buf * initial_dbuf)1901 sbd_handle_read_capacity(struct scsi_task *task,
1902     struct stmf_data_buf *initial_dbuf)
1903 {
1904 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1905 	uint32_t cdb_len;
1906 	uint8_t p[32];
1907 	uint64_t s;
1908 	uint16_t blksize;
1909 
1910 	s = sl->sl_lu_size >> sl->sl_data_blocksize_shift;
1911 	s--;
1912 	blksize = ((uint16_t)1) << sl->sl_data_blocksize_shift;
1913 
1914 	switch (task->task_cdb[0]) {
1915 	case SCMD_READ_CAPACITY:
1916 		if (s & 0xffffffff00000000ull) {
1917 			p[0] = p[1] = p[2] = p[3] = 0xFF;
1918 		} else {
1919 			p[0] = (s >> 24) & 0xff;
1920 			p[1] = (s >> 16) & 0xff;
1921 			p[2] = (s >> 8) & 0xff;
1922 			p[3] = s & 0xff;
1923 		}
1924 		p[4] = 0; p[5] = 0;
1925 		p[6] = (blksize >> 8) & 0xff;
1926 		p[7] = blksize & 0xff;
1927 		sbd_handle_short_read_transfers(task, initial_dbuf, p, 8, 8);
1928 		break;
1929 
1930 	case SCMD_SVC_ACTION_IN_G4:
1931 		cdb_len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1932 		bzero(p, 32);
1933 		p[0] = (s >> 56) & 0xff;
1934 		p[1] = (s >> 48) & 0xff;
1935 		p[2] = (s >> 40) & 0xff;
1936 		p[3] = (s >> 32) & 0xff;
1937 		p[4] = (s >> 24) & 0xff;
1938 		p[5] = (s >> 16) & 0xff;
1939 		p[6] = (s >> 8) & 0xff;
1940 		p[7] = s & 0xff;
1941 		p[10] = (blksize >> 8) & 0xff;
1942 		p[11] = blksize & 0xff;
1943 		if (sl->sl_flags & SL_UNMAP_ENABLED) {
1944 			p[14] = 0x80;
1945 		}
1946 		sbd_handle_short_read_transfers(task, initial_dbuf, p,
1947 		    cdb_len, 32);
1948 		break;
1949 	}
1950 }
1951 
1952 void
sbd_calc_geometry(uint64_t s,uint16_t blksize,uint8_t * nsectors,uint8_t * nheads,uint32_t * ncyl)1953 sbd_calc_geometry(uint64_t s, uint16_t blksize, uint8_t *nsectors,
1954     uint8_t *nheads, uint32_t *ncyl)
1955 {
1956 	if (s < (4ull * 1024ull * 1024ull * 1024ull)) {
1957 		*nsectors = 32;
1958 		*nheads = 8;
1959 	} else {
1960 		*nsectors = 254;
1961 		*nheads = 254;
1962 	}
1963 	*ncyl = s / ((uint64_t)blksize * (uint64_t)(*nsectors) *
1964 	    (uint64_t)(*nheads));
1965 }
1966 
1967 void
sbd_handle_mode_sense(struct scsi_task * task,struct stmf_data_buf * initial_dbuf,uint8_t * buf)1968 sbd_handle_mode_sense(struct scsi_task *task,
1969     struct stmf_data_buf *initial_dbuf, uint8_t *buf)
1970 {
1971 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1972 	uint32_t cmd_size, n;
1973 	uint8_t *cdb;
1974 	uint32_t ncyl;
1975 	uint8_t nsectors, nheads;
1976 	uint8_t page, ctrl, header_size;
1977 	uint16_t nbytes;
1978 	uint8_t *p;
1979 	uint64_t s = sl->sl_lu_size;
1980 	uint32_t dev_spec_param_offset;
1981 
1982 	p = buf;	/* buf is assumed to be zeroed out and large enough */
1983 	n = 0;
1984 	cdb = &task->task_cdb[0];
1985 	page = cdb[2] & 0x3F;
1986 	ctrl = (cdb[2] >> 6) & 3;
1987 
1988 	if (cdb[0] == SCMD_MODE_SENSE) {
1989 		cmd_size = cdb[4];
1990 		header_size = 4;
1991 		dev_spec_param_offset = 2;
1992 	} else {
1993 		cmd_size = READ_SCSI16(&cdb[7], uint32_t);
1994 		header_size = 8;
1995 		dev_spec_param_offset = 3;
1996 	}
1997 
1998 	/* Now validate the command */
1999 	if ((cdb[2] != 0) && (page != MODEPAGE_ALLPAGES) &&
2000 	    (page != MODEPAGE_CACHING) && (page != MODEPAGE_CTRL_MODE) &&
2001 	    (page != MODEPAGE_FORMAT) && (page != MODEPAGE_GEOMETRY)) {
2002 		stmf_scsilib_send_status(task, STATUS_CHECK,
2003 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2004 		return;
2005 	}
2006 
2007 	/* We will update the length in the mode header at the end */
2008 
2009 	/* Block dev device specific param in mode param header has wp bit */
2010 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
2011 		p[n + dev_spec_param_offset] = BIT_7;
2012 	}
2013 	n += header_size;
2014 	/* We are not going to return any block descriptor */
2015 
2016 	nbytes = ((uint16_t)1) << sl->sl_data_blocksize_shift;
2017 	sbd_calc_geometry(s, nbytes, &nsectors, &nheads, &ncyl);
2018 
2019 	if ((page == MODEPAGE_FORMAT) || (page == MODEPAGE_ALLPAGES)) {
2020 		p[n] = 0x03;
2021 		p[n+1] = 0x16;
2022 		if (ctrl != 1) {
2023 			p[n + 11] = nsectors;
2024 			p[n + 12] = nbytes >> 8;
2025 			p[n + 13] = nbytes & 0xff;
2026 			p[n + 20] = 0x80;
2027 		}
2028 		n += 24;
2029 	}
2030 	if ((page == MODEPAGE_GEOMETRY) || (page == MODEPAGE_ALLPAGES)) {
2031 		p[n] = 0x04;
2032 		p[n + 1] = 0x16;
2033 		if (ctrl != 1) {
2034 			p[n + 2] = ncyl >> 16;
2035 			p[n + 3] = ncyl >> 8;
2036 			p[n + 4] = ncyl & 0xff;
2037 			p[n + 5] = nheads;
2038 			p[n + 20] = 0x15;
2039 			p[n + 21] = 0x18;
2040 		}
2041 		n += 24;
2042 	}
2043 	if ((page == MODEPAGE_CACHING) || (page == MODEPAGE_ALLPAGES)) {
2044 		struct mode_caching *mode_caching_page;
2045 
2046 		mode_caching_page = (struct mode_caching *)&p[n];
2047 
2048 		mode_caching_page->mode_page.code = MODEPAGE_CACHING;
2049 		mode_caching_page->mode_page.ps = 1; /* A saveable page */
2050 		mode_caching_page->mode_page.length = 0x12;
2051 
2052 		switch (ctrl) {
2053 		case (0):
2054 			/* Current */
2055 			if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) == 0) {
2056 				mode_caching_page->wce = 1;
2057 			}
2058 			break;
2059 
2060 		case (1):
2061 			/* Changeable */
2062 			if ((sl->sl_flags &
2063 			    SL_WRITEBACK_CACHE_SET_UNSUPPORTED) == 0) {
2064 				mode_caching_page->wce = 1;
2065 			}
2066 			break;
2067 
2068 		default:
2069 			if ((sl->sl_flags &
2070 			    SL_SAVED_WRITE_CACHE_DISABLE) == 0) {
2071 				mode_caching_page->wce = 1;
2072 			}
2073 			break;
2074 		}
2075 		n += (sizeof (struct mode_page) +
2076 		    mode_caching_page->mode_page.length);
2077 	}
2078 	if ((page == MODEPAGE_CTRL_MODE) || (page == MODEPAGE_ALLPAGES)) {
2079 		struct mode_control_scsi3 *mode_control_page;
2080 
2081 		mode_control_page = (struct mode_control_scsi3 *)&p[n];
2082 
2083 		mode_control_page->mode_page.code = MODEPAGE_CTRL_MODE;
2084 		mode_control_page->mode_page.length =
2085 		    PAGELENGTH_MODE_CONTROL_SCSI3;
2086 		if (ctrl != 1) {
2087 			/* If not looking for changeable values, report this. */
2088 			mode_control_page->que_mod = CTRL_QMOD_UNRESTRICT;
2089 		}
2090 		n += (sizeof (struct mode_page) +
2091 		    mode_control_page->mode_page.length);
2092 	}
2093 
2094 	if (cdb[0] == SCMD_MODE_SENSE) {
2095 		if (n > 255) {
2096 			stmf_scsilib_send_status(task, STATUS_CHECK,
2097 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2098 			return;
2099 		}
2100 		/*
2101 		 * Mode parameter header length doesn't include the number
2102 		 * of bytes in the length field, so adjust the count.
2103 		 * Byte count minus header length field size.
2104 		 */
2105 		buf[0] = (n - header_size) & 0xff;
2106 	} else {
2107 		/* Byte count minus header length field size. */
2108 		buf[1] = (n - header_size) & 0xff;
2109 		buf[0] = ((n - header_size) >> 8) & 0xff;
2110 	}
2111 
2112 	sbd_handle_short_read_transfers(task, initial_dbuf, buf,
2113 	    cmd_size, n);
2114 }
2115 
2116 void
sbd_handle_mode_select(scsi_task_t * task,stmf_data_buf_t * dbuf)2117 sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf)
2118 {
2119 	uint32_t cmd_xfer_len;
2120 
2121 	if (task->task_cdb[0] == SCMD_MODE_SELECT) {
2122 		cmd_xfer_len = (uint32_t)task->task_cdb[4];
2123 	} else {
2124 		cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t);
2125 	}
2126 
2127 	if ((task->task_cdb[1] & 0xFE) != 0x10) {
2128 		stmf_scsilib_send_status(task, STATUS_CHECK,
2129 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2130 		return;
2131 	}
2132 
2133 	if (cmd_xfer_len == 0) {
2134 		/* zero byte mode selects are allowed */
2135 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2136 		return;
2137 	}
2138 
2139 	sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
2140 }
2141 
2142 void
sbd_handle_mode_select_xfer(scsi_task_t * task,uint8_t * buf,uint32_t buflen)2143 sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen)
2144 {
2145 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2146 	sbd_it_data_t *it;
2147 	int hdr_len, bd_len;
2148 	sbd_status_t sret;
2149 	int i;
2150 
2151 	if (task->task_cdb[0] == SCMD_MODE_SELECT) {
2152 		hdr_len = 4;
2153 	} else {
2154 		hdr_len = 8;
2155 	}
2156 
2157 	if (buflen < hdr_len)
2158 		goto mode_sel_param_len_err;
2159 
2160 	bd_len = hdr_len == 4 ? buf[3] : READ_SCSI16(&buf[6], int);
2161 
2162 	if (buflen < (hdr_len + bd_len + 2))
2163 		goto mode_sel_param_len_err;
2164 
2165 	buf += hdr_len + bd_len;
2166 	buflen -= hdr_len + bd_len;
2167 
2168 	if ((buf[0] != 8) || (buflen != ((uint32_t)buf[1] + 2))) {
2169 		goto mode_sel_param_len_err;
2170 	}
2171 
2172 	if (buf[2] & 0xFB) {
2173 		goto mode_sel_param_field_err;
2174 	}
2175 
2176 	for (i = 3; i < (buf[1] + 2); i++) {
2177 		if (buf[i]) {
2178 			goto mode_sel_param_field_err;
2179 		}
2180 	}
2181 
2182 	sret = SBD_SUCCESS;
2183 
2184 	/* All good. Lets handle the write cache change, if any */
2185 	if (buf[2] & BIT_2) {
2186 		sret = sbd_wcd_set(0, sl);
2187 	} else {
2188 		sret = sbd_wcd_set(1, sl);
2189 	}
2190 
2191 	if (sret != SBD_SUCCESS) {
2192 		stmf_scsilib_send_status(task, STATUS_CHECK,
2193 		    STMF_SAA_WRITE_ERROR);
2194 		return;
2195 	}
2196 
2197 	/* set on the device passed, now set the flags */
2198 	mutex_enter(&sl->sl_lock);
2199 	if (buf[2] & BIT_2) {
2200 		sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
2201 	} else {
2202 		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
2203 	}
2204 
2205 	for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
2206 		if (it == task->task_lu_itl_handle)
2207 			continue;
2208 		it->sbd_it_ua_conditions |= SBD_UA_MODE_PARAMETERS_CHANGED;
2209 	}
2210 
2211 	if (task->task_cdb[1] & 1) {
2212 		if (buf[2] & BIT_2) {
2213 			sl->sl_flags &= ~SL_SAVED_WRITE_CACHE_DISABLE;
2214 		} else {
2215 			sl->sl_flags |= SL_SAVED_WRITE_CACHE_DISABLE;
2216 		}
2217 		mutex_exit(&sl->sl_lock);
2218 		sret = sbd_write_lu_info(sl);
2219 	} else {
2220 		mutex_exit(&sl->sl_lock);
2221 	}
2222 	if (sret == SBD_SUCCESS) {
2223 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2224 	} else {
2225 		stmf_scsilib_send_status(task, STATUS_CHECK,
2226 		    STMF_SAA_WRITE_ERROR);
2227 	}
2228 	return;
2229 
2230 mode_sel_param_len_err:
2231 	stmf_scsilib_send_status(task, STATUS_CHECK,
2232 	    STMF_SAA_PARAM_LIST_LENGTH_ERROR);
2233 	return;
2234 mode_sel_param_field_err:
2235 	stmf_scsilib_send_status(task, STATUS_CHECK,
2236 	    STMF_SAA_INVALID_FIELD_IN_PARAM_LIST);
2237 }
2238 
2239 /*
2240  * Command support added from SPC-4 r24
2241  * Supports info type 0, 2, 127
2242  */
2243 void
sbd_handle_identifying_info(struct scsi_task * task,stmf_data_buf_t * initial_dbuf)2244 sbd_handle_identifying_info(struct scsi_task *task,
2245     stmf_data_buf_t *initial_dbuf)
2246 {
2247 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2248 	uint8_t *cdb;
2249 	uint32_t cmd_size;
2250 	uint32_t param_len;
2251 	uint32_t xfer_size;
2252 	uint8_t info_type;
2253 	uint8_t *buf, *p;
2254 
2255 	cdb = &task->task_cdb[0];
2256 	cmd_size = READ_SCSI32(&cdb[6], uint32_t);
2257 	info_type = cdb[10]>>1;
2258 
2259 	/* Validate the command */
2260 	if (cmd_size < 4) {
2261 		stmf_scsilib_send_status(task, STATUS_CHECK,
2262 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2263 		return;
2264 	}
2265 
2266 	p = buf = kmem_zalloc(260, KM_SLEEP);
2267 
2268 	switch (info_type) {
2269 		case 0:
2270 			/*
2271 			 * No value is supplied but this info type
2272 			 * is mandatory.
2273 			 */
2274 			xfer_size = 4;
2275 			break;
2276 		case 2:
2277 			mutex_enter(&sl->sl_lock);
2278 			param_len = strlcpy((char *)(p+4), sl->sl_alias, 256);
2279 			mutex_exit(&sl->sl_lock);
2280 			/* text info must be null terminated */
2281 			if (++param_len > 256)
2282 				param_len = 256;
2283 			SCSI_WRITE16(p+2, param_len);
2284 			xfer_size = param_len + 4;
2285 			break;
2286 		case 127:
2287 			/* 0 and 2 descriptor supported */
2288 			SCSI_WRITE16(p+2, 8); /* set param length */
2289 			p += 8;
2290 			*p = 4; /* set type to 2 (7 hi bits) */
2291 			p += 2;
2292 			SCSI_WRITE16(p, 256); /* 256 max length */
2293 			xfer_size = 12;
2294 			break;
2295 		default:
2296 			stmf_scsilib_send_status(task, STATUS_CHECK,
2297 			    STMF_SAA_INVALID_FIELD_IN_CDB);
2298 			kmem_free(buf, 260);
2299 			return;
2300 	}
2301 	sbd_handle_short_read_transfers(task, initial_dbuf, buf,
2302 	    cmd_size, xfer_size);
2303 	kmem_free(buf, 260);
2304 }
2305 
2306 /*
2307  * This function parse through a string, passed to it as a pointer to a string,
2308  * by adjusting the pointer to the first non-space character and returns
2309  * the count/length of the first bunch of non-space characters. Multiple
2310  * Management URLs are stored as a space delimited string in sl_mgmt_url
2311  * field of sbd_lu_t. This function is used to retrieve one url at a time.
2312  *
2313  * i/p : pointer to pointer to a url string
2314  * o/p : Adjust the pointer to the url to the first non white character
2315  *       and returns the length of the URL
2316  */
2317 uint16_t
sbd_parse_mgmt_url(char ** url_addr)2318 sbd_parse_mgmt_url(char **url_addr)
2319 {
2320 	uint16_t url_length = 0;
2321 	char *url;
2322 	url = *url_addr;
2323 
2324 	while (*url != '\0') {
2325 		if (*url == ' ' || *url == '\t' || *url == '\n') {
2326 			(*url_addr)++;
2327 			url = *url_addr;
2328 		} else {
2329 			break;
2330 		}
2331 	}
2332 
2333 	while (*url != '\0') {
2334 		if (*url == ' ' || *url == '\t' ||
2335 		    *url == '\n' || *url == '\0') {
2336 			break;
2337 		}
2338 		url++;
2339 		url_length++;
2340 	}
2341 	return (url_length);
2342 }
2343 
2344 /* Try to make this the size of a kmem allocation cache. */
2345 static uint_t sbd_write_same_optimal_chunk = 128 * 1024;
2346 
2347 static sbd_status_t
sbd_write_same_data(struct scsi_task * task,sbd_cmd_t * scmd)2348 sbd_write_same_data(struct scsi_task *task, sbd_cmd_t *scmd)
2349 {
2350 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2351 	uint64_t addr, len, sz_done;
2352 	uint32_t big_buf_size, xfer_size, off;
2353 	uint8_t *big_buf;
2354 	sbd_status_t ret;
2355 
2356 	if (task->task_cdb[0] == SCMD_WRITE_SAME_G1) {
2357 		addr = READ_SCSI32(&task->task_cdb[2], uint64_t);
2358 		len = READ_SCSI16(&task->task_cdb[7], uint64_t);
2359 	} else {
2360 		addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
2361 		len = READ_SCSI32(&task->task_cdb[10], uint64_t);
2362 	}
2363 	addr <<= sl->sl_data_blocksize_shift;
2364 	len <<= sl->sl_data_blocksize_shift;
2365 
2366 	/*
2367 	 * Reminders:
2368 	 *    "len" is total size of what we wish to "write same".
2369 	 *
2370 	 *    xfer_size will be scmd->trans_data_len, which is the length
2371 	 *    of the pattern we wish to replicate over "len".  We replicate
2372 	 *    "xfer_size" of pattern over "len".
2373 	 *
2374 	 *    big_buf_size is set to an ideal actual-write size for an output
2375 	 *    operation.  It may be the same as "len".  If it's not, it should
2376 	 *    be an exact multiple of "xfer_size" so we don't get pattern
2377 	 *    breakage until the very end of "len".
2378 	 */
2379 	big_buf_size = len > sbd_write_same_optimal_chunk ?
2380 	    sbd_write_same_optimal_chunk : (uint32_t)len;
2381 	xfer_size = scmd->trans_data_len;
2382 
2383 	/*
2384 	 * All transfers should be an integral multiple of the sector size.
2385 	 */
2386 	ASSERT((big_buf_size % xfer_size) == 0);
2387 
2388 	/*
2389 	 * Don't sleep for the allocation, and don't make the system
2390 	 * reclaim memory.  Trade higher I/Os if in a low-memory situation.
2391 	 */
2392 	big_buf = kmem_alloc(big_buf_size, KM_NOSLEEP_LAZY);
2393 
2394 	if (big_buf == NULL) {
2395 		/*
2396 		 * Just send it in terms of of the transmitted data.  This
2397 		 * will be very slow.
2398 		 */
2399 		DTRACE_PROBE1(write__same__low__memory, uint64_t, big_buf_size);
2400 		big_buf = scmd->trans_data;
2401 		big_buf_size = scmd->trans_data_len;
2402 	} else {
2403 		/*
2404 		 * We already ASSERT()ed big_buf_size is an integral multiple
2405 		 * of xfer_size.
2406 		 */
2407 		for (off = 0; off < big_buf_size; off += xfer_size)
2408 			bcopy(scmd->trans_data, big_buf + off, xfer_size);
2409 	}
2410 
2411 	/* Do the actual I/O.  Recycle xfer_size now to be write size. */
2412 	DTRACE_PROBE1(write__same__io__begin, uint64_t, len);
2413 	for (sz_done = 0; sz_done < len; sz_done += (uint64_t)xfer_size) {
2414 		xfer_size = ((big_buf_size + sz_done) <= len) ? big_buf_size :
2415 		    len - sz_done;
2416 		ret = sbd_data_write(sl, task, addr + sz_done,
2417 		    (uint64_t)xfer_size, big_buf);
2418 		if (ret != SBD_SUCCESS)
2419 			break;
2420 	}
2421 	DTRACE_PROBE2(write__same__io__end, uint64_t, len, uint64_t, sz_done);
2422 
2423 	if (big_buf != scmd->trans_data)
2424 		kmem_free(big_buf, big_buf_size);
2425 
2426 	return (ret);
2427 }
2428 
2429 static void
sbd_write_same_release_resources(struct scsi_task * task)2430 sbd_write_same_release_resources(struct scsi_task *task)
2431 {
2432 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
2433 
2434 	if (scmd->nbufs == 0XFF)
2435 		cmn_err(CE_WARN, "%s invalid buffer count %x",
2436 		    __func__, scmd->nbufs);
2437 	if ((scmd->trans_data_len != 0) && (scmd->trans_data != NULL))
2438 		kmem_free(scmd->trans_data, scmd->trans_data_len);
2439 	scmd->trans_data = NULL;
2440 	scmd->trans_data_len = 0;
2441 	scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
2442 }
2443 
2444 static void
sbd_handle_write_same_xfer_completion(struct scsi_task * task,sbd_cmd_t * scmd,struct stmf_data_buf * dbuf,uint8_t dbuf_reusable)2445 sbd_handle_write_same_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
2446     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
2447 {
2448 	uint64_t laddr;
2449 	uint32_t buflen, iolen;
2450 	int ndx, ret;
2451 
2452 	if (ATOMIC8_GET(scmd->nbufs) > 0) {
2453 		atomic_dec_8(&scmd->nbufs);
2454 	}
2455 
2456 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
2457 		sbd_write_same_release_resources(task);
2458 		sbd_ats_remove_by_task(task);
2459 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2460 		    dbuf->db_xfer_status, NULL);
2461 		return;
2462 	}
2463 
2464 	if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2465 		goto write_same_xfer_done;
2466 	}
2467 
2468 	/* if this is a unnessary callback just return */
2469 	if (((scmd->flags & SBD_SCSI_CMD_TRANS_DATA) == 0) ||
2470 	    ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
2471 	    (scmd->trans_data == NULL)) {
2472 		sbd_ats_remove_by_task(task);
2473 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
2474 		return;
2475 	}
2476 
2477 	if (ATOMIC32_GET(scmd->len) != 0) {
2478 		/*
2479 		 * Initiate the next port xfer to occur in parallel
2480 		 * with writing this buf.
2481 		 */
2482 		sbd_do_write_same_xfer(task, scmd, NULL, 0);
2483 	}
2484 
2485 	laddr = dbuf->db_relative_offset;
2486 
2487 	for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
2488 	    (ndx < dbuf->db_sglist_length); ndx++) {
2489 		iolen = min(dbuf->db_data_size - buflen,
2490 		    dbuf->db_sglist[ndx].seg_length);
2491 		if (iolen == 0)
2492 			break;
2493 		bcopy(dbuf->db_sglist[ndx].seg_addr, &scmd->trans_data[laddr],
2494 		    iolen);
2495 		buflen += iolen;
2496 		laddr += (uint64_t)iolen;
2497 	}
2498 	task->task_nbytes_transferred += buflen;
2499 
2500 write_same_xfer_done:
2501 	if (ATOMIC32_GET(scmd->len) == 0 ||
2502 	    scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2503 		stmf_free_dbuf(task, dbuf);
2504 		if (ATOMIC8_GET(scmd->nbufs) > 0)
2505 			return;
2506 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
2507 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2508 			sbd_ats_remove_by_task(task);
2509 			sbd_write_same_release_resources(task);
2510 			stmf_scsilib_send_status(task, STATUS_CHECK,
2511 			    STMF_SAA_WRITE_ERROR);
2512 		} else {
2513 			ret = sbd_write_same_data(task, scmd);
2514 			sbd_ats_remove_by_task(task);
2515 			sbd_write_same_release_resources(task);
2516 			if (ret != SBD_SUCCESS) {
2517 				stmf_scsilib_send_status(task, STATUS_CHECK,
2518 				    STMF_SAA_WRITE_ERROR);
2519 			} else {
2520 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2521 			}
2522 		}
2523 		return;
2524 	}
2525 	sbd_do_write_same_xfer(task, scmd, dbuf, dbuf_reusable);
2526 }
2527 
2528 static void
sbd_do_write_same_xfer(struct scsi_task * task,sbd_cmd_t * scmd,struct stmf_data_buf * dbuf,uint8_t dbuf_reusable)2529 sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
2530     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
2531 {
2532 	uint32_t len;
2533 
2534 	if (ATOMIC32_GET(scmd->len) == 0) {
2535 		if (dbuf != NULL)
2536 			stmf_free_dbuf(task, dbuf);
2537 		return;
2538 	}
2539 
2540 	if ((dbuf != NULL) &&
2541 	    ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
2542 		/* free current dbuf and allocate a new one */
2543 		stmf_free_dbuf(task, dbuf);
2544 		dbuf = NULL;
2545 	}
2546 	if (dbuf == NULL) {
2547 		uint32_t maxsize, minsize, old_minsize;
2548 
2549 		maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128*1024 :
2550 		    ATOMIC32_GET(scmd->len);
2551 		minsize = maxsize >> 2;
2552 		do {
2553 			old_minsize = minsize;
2554 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
2555 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
2556 		    (minsize >= 512));
2557 		if (dbuf == NULL) {
2558 			sbd_ats_remove_by_task(task);
2559 			sbd_write_same_release_resources(task);
2560 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
2561 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2562 				    STMF_ALLOC_FAILURE, NULL);
2563 			}
2564 			return;
2565 		}
2566 	}
2567 
2568 	len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ? dbuf->db_buf_size :
2569 	    ATOMIC32_GET(scmd->len);
2570 
2571 	dbuf->db_relative_offset = scmd->current_ro;
2572 	dbuf->db_data_size = len;
2573 	dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
2574 	(void) stmf_xfer_data(task, dbuf, 0);
2575 	/* outstanding port xfers and bufs used */
2576 	atomic_inc_8(&scmd->nbufs);
2577 	atomic_add_32(&scmd->len, -len);
2578 	scmd->current_ro += len;
2579 }
2580 
2581 static void
sbd_handle_write_same(scsi_task_t * task,struct stmf_data_buf * initial_dbuf)2582 sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf)
2583 {
2584 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2585 	uint64_t addr, len;
2586 	sbd_cmd_t *scmd;
2587 	stmf_data_buf_t *dbuf;
2588 	uint8_t unmap;
2589 	uint8_t do_immediate_data = 0;
2590 
2591 	if (HardwareAcceleratedInit == 0) {
2592 		stmf_scsilib_send_status(task, STATUS_CHECK,
2593 		    STMF_SAA_INVALID_OPCODE);
2594 		return;
2595 	}
2596 
2597 	task->task_cmd_xfer_length = 0;
2598 	if (task->task_additional_flags &
2599 	    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2600 		task->task_expected_xfer_length = 0;
2601 	}
2602 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
2603 		stmf_scsilib_send_status(task, STATUS_CHECK,
2604 		    STMF_SAA_WRITE_PROTECTED);
2605 		return;
2606 	}
2607 	if (task->task_cdb[1] & 0xF7) {
2608 		stmf_scsilib_send_status(task, STATUS_CHECK,
2609 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2610 		return;
2611 	}
2612 	unmap = task->task_cdb[1] & 0x08;
2613 
2614 	if (unmap && ((sl->sl_flags & SL_UNMAP_ENABLED) == 0)) {
2615 		stmf_scsilib_send_status(task, STATUS_CHECK,
2616 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2617 		return;
2618 	}
2619 
2620 	if (task->task_cdb[0] == SCMD_WRITE_SAME_G1) {
2621 		addr = READ_SCSI32(&task->task_cdb[2], uint64_t);
2622 		len = READ_SCSI16(&task->task_cdb[7], uint64_t);
2623 	} else {
2624 		addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
2625 		len = READ_SCSI32(&task->task_cdb[10], uint64_t);
2626 	}
2627 
2628 	if (len == 0) {
2629 		stmf_scsilib_send_status(task, STATUS_CHECK,
2630 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2631 		return;
2632 	}
2633 
2634 	if (sbd_ats_handling_before_io(task, sl, addr, len) !=
2635 	    SBD_SUCCESS) {
2636 		if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS)
2637 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
2638 		return;
2639 	}
2640 
2641 	addr <<= sl->sl_data_blocksize_shift;
2642 	len <<= sl->sl_data_blocksize_shift;
2643 
2644 	/* Check if the command is for the unmap function */
2645 	if (unmap) {
2646 		dkioc_free_list_t *dfl = kmem_zalloc(DFL_SZ(1), KM_SLEEP);
2647 
2648 		dfl->dfl_num_exts = 1;
2649 		dfl->dfl_exts[0].dfle_start = addr;
2650 		dfl->dfl_exts[0].dfle_length = len;
2651 		if (sbd_unmap(sl, dfl) != 0) {
2652 			stmf_scsilib_send_status(task, STATUS_CHECK,
2653 			    STMF_SAA_LBA_OUT_OF_RANGE);
2654 		} else {
2655 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2656 		}
2657 		dfl_free(dfl);
2658 		return;
2659 	}
2660 
2661 	/* Write same function */
2662 
2663 	task->task_cmd_xfer_length = 1 << sl->sl_data_blocksize_shift;
2664 	if (task->task_additional_flags &
2665 	    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2666 		task->task_expected_xfer_length = task->task_cmd_xfer_length;
2667 	}
2668 	if ((addr + len) > sl->sl_lu_size) {
2669 		sbd_ats_remove_by_task(task);
2670 		stmf_scsilib_send_status(task, STATUS_CHECK,
2671 		    STMF_SAA_LBA_OUT_OF_RANGE);
2672 		return;
2673 	}
2674 
2675 	/* For rest of this I/O the transfer length is 1 block */
2676 	len = ((uint64_t)1) << sl->sl_data_blocksize_shift;
2677 
2678 	/* Some basic checks */
2679 	if ((len == 0) || (len != task->task_expected_xfer_length)) {
2680 		sbd_ats_remove_by_task(task);
2681 		stmf_scsilib_send_status(task, STATUS_CHECK,
2682 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2683 		return;
2684 	}
2685 
2686 
2687 	if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
2688 		if (initial_dbuf->db_data_size > len) {
2689 			if (initial_dbuf->db_data_size >
2690 			    task->task_expected_xfer_length) {
2691 				/* protocol error */
2692 				sbd_ats_remove_by_task(task);
2693 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2694 				    STMF_INVALID_ARG, NULL);
2695 				return;
2696 			}
2697 			initial_dbuf->db_data_size = (uint32_t)len;
2698 		}
2699 		do_immediate_data = 1;
2700 	}
2701 	dbuf = initial_dbuf;
2702 
2703 	if (task->task_lu_private) {
2704 		scmd = (sbd_cmd_t *)task->task_lu_private;
2705 	} else {
2706 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
2707 		task->task_lu_private = scmd;
2708 	}
2709 	scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_TRANS_DATA |
2710 	    SBD_SCSI_CMD_ATS_RELATED;
2711 	scmd->cmd_type = SBD_CMD_SCSI_WRITE;
2712 	scmd->nbufs = 0;
2713 	scmd->len = (uint32_t)len;
2714 	scmd->trans_data_len = (uint32_t)len;
2715 	scmd->trans_data = kmem_alloc((size_t)len, KM_SLEEP);
2716 	scmd->current_ro = 0;
2717 
2718 	if (do_immediate_data) {
2719 		/*
2720 		 * Account for data passed in this write command
2721 		 */
2722 		(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
2723 		atomic_add_32(&scmd->len, -dbuf->db_data_size);
2724 		scmd->current_ro += dbuf->db_data_size;
2725 		dbuf->db_xfer_status = STMF_SUCCESS;
2726 		sbd_handle_write_same_xfer_completion(task, scmd, dbuf, 0);
2727 	} else {
2728 		sbd_do_write_same_xfer(task, scmd, dbuf, 0);
2729 	}
2730 }
2731 
2732 static void
sbd_handle_unmap(scsi_task_t * task,stmf_data_buf_t * dbuf)2733 sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf)
2734 {
2735 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2736 	uint32_t cmd_xfer_len;
2737 
2738 	if (sbd_unmap_enable == 0) {
2739 		stmf_scsilib_send_status(task, STATUS_CHECK,
2740 		    STMF_SAA_INVALID_OPCODE);
2741 		return;
2742 	}
2743 
2744 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
2745 		stmf_scsilib_send_status(task, STATUS_CHECK,
2746 		    STMF_SAA_WRITE_PROTECTED);
2747 		return;
2748 	}
2749 	cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t);
2750 
2751 	if (task->task_cdb[1] & 1) {
2752 		stmf_scsilib_send_status(task, STATUS_CHECK,
2753 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2754 		return;
2755 	}
2756 
2757 	if (cmd_xfer_len == 0) {
2758 		task->task_cmd_xfer_length = 0;
2759 		if (task->task_additional_flags &
2760 		    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2761 			task->task_expected_xfer_length = 0;
2762 		}
2763 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2764 		return;
2765 	}
2766 
2767 	sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
2768 }
2769 
2770 static void
sbd_handle_unmap_xfer(scsi_task_t * task,uint8_t * buf,uint32_t buflen)2771 sbd_handle_unmap_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen)
2772 {
2773 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2774 	uint32_t ulen, dlen, num_desc;
2775 	uint64_t addr, len;
2776 	uint8_t *p;
2777 	dkioc_free_list_t *dfl;
2778 	int ret;
2779 	int i;
2780 
2781 	if (buflen < 24) {
2782 		stmf_scsilib_send_status(task, STATUS_CHECK,
2783 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2784 		return;
2785 	}
2786 	ulen = READ_SCSI16(buf, uint32_t);
2787 	dlen = READ_SCSI16(buf + 2, uint32_t);
2788 	num_desc = dlen >> 4;
2789 	if (((ulen + 2) != buflen) || ((dlen + 8) != buflen) || (dlen & 0xf) ||
2790 	    (num_desc == 0)) {
2791 		stmf_scsilib_send_status(task, STATUS_CHECK,
2792 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2793 		return;
2794 	}
2795 
2796 	dfl = kmem_zalloc(DFL_SZ(num_desc), KM_SLEEP);
2797 	dfl->dfl_num_exts = num_desc;
2798 	/*
2799 	 * This should use ATS locking but that was disabled by the
2800 	 * changes to ZFS top take advantage of TRIM in SSDs.
2801 	 *
2802 	 * Since the entire list is passed to ZFS in one list ATS
2803 	 * locking is not done.  This may be detectable, and if it is
2804 	 * then the entire list needs to be locked and then after the
2805 	 * unmap completes the entire list must be unlocked
2806 	 */
2807 	for (p = buf + 8, i = 0; num_desc; num_desc--, p += 16, i++) {
2808 		addr = READ_SCSI64(p, uint64_t);
2809 		len = READ_SCSI32(p+8, uint64_t);
2810 		addr <<= sl->sl_data_blocksize_shift;
2811 		len <<= sl->sl_data_blocksize_shift;
2812 
2813 		/* Prepare a list of extents to unmap */
2814 		dfl->dfl_exts[i].dfle_start = addr;
2815 		dfl->dfl_exts[i].dfle_length = len;
2816 
2817 		/* release the overlap */
2818 	}
2819 	ASSERT(i == dfl->dfl_num_exts);
2820 
2821 	/* Finally execute the unmap operations in a single step */
2822 	ret = sbd_unmap(sl, dfl);
2823 	dfl_free(dfl);
2824 	if (ret != 0) {
2825 		stmf_scsilib_send_status(task, STATUS_CHECK,
2826 		    STMF_SAA_LBA_OUT_OF_RANGE);
2827 		return;
2828 	}
2829 
2830 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2831 }
2832 
2833 void
sbd_handle_inquiry(struct scsi_task * task,struct stmf_data_buf * initial_dbuf)2834 sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
2835 {
2836 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2837 	uint8_t *cdbp = (uint8_t *)&task->task_cdb[0];
2838 	uint8_t *p;
2839 	uint8_t byte0;
2840 	uint8_t page_length;
2841 	uint16_t bsize = 512;
2842 	uint16_t cmd_size;
2843 	uint32_t xfer_size = 4;
2844 	uint32_t mgmt_url_size = 0;
2845 	uint8_t exp;
2846 	uint64_t s;
2847 	char *mgmt_url = NULL;
2848 
2849 
2850 	byte0 = DTYPE_DIRECT;
2851 	/*
2852 	 * Basic protocol checks.
2853 	 */
2854 
2855 	if ((((cdbp[1] & 1) == 0) && cdbp[2]) || cdbp[5]) {
2856 		stmf_scsilib_send_status(task, STATUS_CHECK,
2857 		    STMF_SAA_INVALID_FIELD_IN_CDB);
2858 		return;
2859 	}
2860 
2861 	/*
2862 	 * Zero byte allocation length is not an error.  Just
2863 	 * return success.
2864 	 */
2865 
2866 	cmd_size = (((uint16_t)cdbp[3]) << 8) | cdbp[4];
2867 
2868 	if (cmd_size == 0) {
2869 		task->task_cmd_xfer_length = 0;
2870 		if (task->task_additional_flags &
2871 		    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2872 			task->task_expected_xfer_length = 0;
2873 		}
2874 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2875 		return;
2876 	}
2877 
2878 	/*
2879 	 * Standard inquiry
2880 	 */
2881 
2882 	if ((cdbp[1] & 1) == 0) {
2883 		int	i;
2884 		struct scsi_inquiry *inq;
2885 
2886 		p = (uint8_t *)kmem_zalloc(bsize, KM_SLEEP);
2887 		inq = (struct scsi_inquiry *)p;
2888 
2889 		page_length = 69;
2890 		xfer_size = page_length + 5;
2891 
2892 		inq->inq_dtype = DTYPE_DIRECT;
2893 		inq->inq_ansi = 5;	/* SPC-3 */
2894 		inq->inq_hisup = 1;
2895 		inq->inq_rdf = 2;	/* Response data format for SPC-3 */
2896 		inq->inq_len = page_length;
2897 
2898 		inq->inq_tpgs = TPGS_FAILOVER_IMPLICIT;
2899 		inq->inq_cmdque = 1;
2900 		inq->inq_3pc = 1;
2901 
2902 		if (sl->sl_flags & SL_VID_VALID) {
2903 			bcopy(sl->sl_vendor_id, inq->inq_vid, 8);
2904 		} else {
2905 			bcopy(sbd_vendor_id, inq->inq_vid, 8);
2906 		}
2907 
2908 		if (sl->sl_flags & SL_PID_VALID) {
2909 			bcopy(sl->sl_product_id, inq->inq_pid, 16);
2910 		} else {
2911 			bcopy(sbd_product_id, inq->inq_pid, 16);
2912 		}
2913 
2914 		if (sl->sl_flags & SL_REV_VALID) {
2915 			bcopy(sl->sl_revision, inq->inq_revision, 4);
2916 		} else {
2917 			bcopy(sbd_revision, inq->inq_revision, 4);
2918 		}
2919 
2920 		/* Adding Version Descriptors */
2921 		i = 0;
2922 		/* SAM-3 no version */
2923 		inq->inq_vd[i].inq_vd_msb = 0x00;
2924 		inq->inq_vd[i].inq_vd_lsb = 0x60;
2925 		i++;
2926 
2927 		/* transport */
2928 		switch (task->task_lport->lport_id->protocol_id) {
2929 		case PROTOCOL_FIBRE_CHANNEL:
2930 			inq->inq_vd[i].inq_vd_msb = 0x09;
2931 			inq->inq_vd[i].inq_vd_lsb = 0x00;
2932 			i++;
2933 			break;
2934 
2935 		case PROTOCOL_PARALLEL_SCSI:
2936 		case PROTOCOL_SSA:
2937 		case PROTOCOL_IEEE_1394:
2938 			/* Currently no claims of conformance */
2939 			break;
2940 
2941 		case PROTOCOL_SRP:
2942 			inq->inq_vd[i].inq_vd_msb = 0x09;
2943 			inq->inq_vd[i].inq_vd_lsb = 0x40;
2944 			i++;
2945 			break;
2946 
2947 		case PROTOCOL_iSCSI:
2948 			inq->inq_vd[i].inq_vd_msb = 0x09;
2949 			inq->inq_vd[i].inq_vd_lsb = 0x60;
2950 			i++;
2951 			break;
2952 
2953 		case PROTOCOL_SAS:
2954 		case PROTOCOL_ADT:
2955 		case PROTOCOL_ATAPI:
2956 		default:
2957 			/* Currently no claims of conformance */
2958 			break;
2959 		}
2960 
2961 		/* SPC-3 no version */
2962 		inq->inq_vd[i].inq_vd_msb = 0x03;
2963 		inq->inq_vd[i].inq_vd_lsb = 0x00;
2964 		i++;
2965 
2966 		/* SBC-2 no version */
2967 		inq->inq_vd[i].inq_vd_msb = 0x03;
2968 		inq->inq_vd[i].inq_vd_lsb = 0x20;
2969 
2970 		sbd_handle_short_read_transfers(task, initial_dbuf, p, cmd_size,
2971 		    min(cmd_size, xfer_size));
2972 		kmem_free(p, bsize);
2973 
2974 		return;
2975 	}
2976 
2977 	rw_enter(&sbd_global_prop_lock, RW_READER);
2978 	if (sl->sl_mgmt_url) {
2979 		mgmt_url_size = strlen(sl->sl_mgmt_url);
2980 		mgmt_url = sl->sl_mgmt_url;
2981 	} else if (sbd_mgmt_url) {
2982 		mgmt_url_size = strlen(sbd_mgmt_url);
2983 		mgmt_url = sbd_mgmt_url;
2984 	}
2985 
2986 	/*
2987 	 * EVPD handling
2988 	 */
2989 
2990 	/* Default 512 bytes may not be enough, increase bsize if necessary */
2991 	if (cdbp[2] == 0x83 || cdbp[2] == 0x85) {
2992 		if (bsize <  cmd_size)
2993 			bsize = cmd_size;
2994 	}
2995 	p = (uint8_t *)kmem_zalloc(bsize, KM_SLEEP);
2996 
2997 	switch (cdbp[2]) {
2998 	case 0x00:
2999 		page_length = 5 + (mgmt_url_size ? 1 : 0);
3000 
3001 		if (sl->sl_flags & SL_UNMAP_ENABLED)
3002 			page_length += 1;
3003 
3004 		p[0] = byte0;
3005 		p[3] = page_length;
3006 		/* Supported VPD pages in ascending order */
3007 		/* CSTYLED */
3008 		{
3009 			uint8_t i = 5;
3010 
3011 			p[i++] = 0x80;
3012 			p[i++] = 0x83;
3013 			if (mgmt_url_size != 0)
3014 				p[i++] = 0x85;
3015 			p[i++] = 0x86;
3016 			p[i++] = 0xb0;
3017 			if (sl->sl_flags & SL_UNMAP_ENABLED) {
3018 				p[i++] = 0xb2;
3019 			}
3020 		}
3021 		xfer_size = page_length + 4;
3022 		break;
3023 
3024 	case 0x80:
3025 		if (sl->sl_serial_no_size) {
3026 			page_length = sl->sl_serial_no_size;
3027 			bcopy(sl->sl_serial_no, p + 4, sl->sl_serial_no_size);
3028 		} else {
3029 			/* if no serial num is specified set 4 spaces */
3030 			page_length = 4;
3031 			bcopy("    ", p + 4, 4);
3032 		}
3033 		p[0] = byte0;
3034 		p[1] = 0x80;
3035 		p[3] = page_length;
3036 		xfer_size = page_length + 4;
3037 		break;
3038 
3039 	case 0x83:
3040 		xfer_size = stmf_scsilib_prepare_vpd_page83(task, p,
3041 		    bsize, byte0, STMF_VPD_LU_ID|STMF_VPD_TARGET_ID|
3042 		    STMF_VPD_TP_GROUP|STMF_VPD_RELATIVE_TP_ID);
3043 		break;
3044 
3045 	case 0x85:
3046 		if (mgmt_url_size == 0) {
3047 			stmf_scsilib_send_status(task, STATUS_CHECK,
3048 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3049 			goto err_done;
3050 		} /* CSTYLED */
3051 		{
3052 			uint16_t idx, newidx, sz, url_size;
3053 			char *url;
3054 
3055 			p[0] = byte0;
3056 			p[1] = 0x85;
3057 
3058 			idx = 4;
3059 			url = mgmt_url;
3060 			url_size = sbd_parse_mgmt_url(&url);
3061 			/* Creating Network Service Descriptors */
3062 			while (url_size != 0) {
3063 				/* Null terminated and 4 Byte aligned */
3064 				sz = url_size + 1;
3065 				sz += (sz % 4) ? 4 - (sz % 4) : 0;
3066 				newidx = idx + sz + 4;
3067 
3068 				if (newidx < bsize) {
3069 					/*
3070 					 * SPC-3r23 : Table 320  (Sec 7.6.5)
3071 					 * (Network service descriptor format
3072 					 *
3073 					 * Note: Hard coding service type as
3074 					 * "Storage Configuration Service".
3075 					 */
3076 					p[idx] = 1;
3077 					SCSI_WRITE16(p + idx + 2, sz);
3078 					bcopy(url, p + idx + 4, url_size);
3079 					xfer_size = newidx + 4;
3080 				}
3081 				idx = newidx;
3082 
3083 				/* skip to next mgmt url if any */
3084 				url += url_size;
3085 				url_size = sbd_parse_mgmt_url(&url);
3086 			}
3087 
3088 			/* Total descriptor length */
3089 			SCSI_WRITE16(p + 2, idx - 4);
3090 			break;
3091 		}
3092 
3093 	case 0x86:
3094 		page_length = 0x3c;
3095 
3096 		p[0] = byte0;
3097 		p[1] = 0x86;		/* Page 86 response */
3098 		p[3] = page_length;
3099 
3100 		/*
3101 		 * Bits 0, 1, and 2 will need to be updated
3102 		 * to reflect the queue tag handling if/when
3103 		 * that is implemented.  For now, we're going
3104 		 * to claim support only for Simple TA.
3105 		 */
3106 		p[5] = 1;
3107 		xfer_size = page_length + 4;
3108 		break;
3109 
3110 	case 0xb0:
3111 		page_length = 0x3c;
3112 		p[0] = byte0;
3113 		p[1] = 0xb0;
3114 		p[3] = page_length;
3115 		p[4] = 1;
3116 		p[5] = sbd_ats_max_nblks();
3117 		if (sl->sl_flags & SL_UNMAP_ENABLED && sbd_unmap_enable) {
3118 			p[20] = (stmf_sbd_unmap_max_nblks >> 24) & 0xff;
3119 			p[21] = (stmf_sbd_unmap_max_nblks >> 16) & 0xff;
3120 			p[22] = (stmf_sbd_unmap_max_nblks >> 8) & 0xff;
3121 			p[23] = stmf_sbd_unmap_max_nblks & 0xff;
3122 
3123 			p[24] = 0;
3124 			p[25] = 0;
3125 			p[26] = 0;
3126 			p[27] = 0xFF;
3127 		}
3128 		xfer_size = page_length + 4;
3129 		break;
3130 
3131 	case 0xb2:
3132 		if ((sl->sl_flags & SL_UNMAP_ENABLED) == 0) {
3133 			stmf_scsilib_send_status(task, STATUS_CHECK,
3134 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3135 			goto err_done;
3136 		}
3137 		page_length = 4;
3138 		p[0] = byte0;
3139 		p[1] = 0xb2;
3140 		p[3] = page_length;
3141 
3142 		exp = (uint8_t)sl->sl_data_blocksize_shift;
3143 		s = sl->sl_lu_size >> sl->sl_data_blocksize_shift;
3144 		while (s & ((uint64_t)0xFFFFFFFF80000000ull)) {
3145 			s >>= 1;
3146 			exp++;
3147 		}
3148 		p[4] = exp;
3149 		p[5] = 0xc0;	/* Logical provisioning UNMAP and WRITE SAME */
3150 		xfer_size = page_length + 4;
3151 		break;
3152 
3153 	default:
3154 		stmf_scsilib_send_status(task, STATUS_CHECK,
3155 		    STMF_SAA_INVALID_FIELD_IN_CDB);
3156 		goto err_done;
3157 	}
3158 
3159 	sbd_handle_short_read_transfers(task, initial_dbuf, p, cmd_size,
3160 	    min(cmd_size, xfer_size));
3161 err_done:
3162 	kmem_free(p, bsize);
3163 	rw_exit(&sbd_global_prop_lock);
3164 }
3165 
3166 stmf_status_t
sbd_task_alloc(struct scsi_task * task)3167 sbd_task_alloc(struct scsi_task *task)
3168 {
3169 	if ((task->task_lu_private =
3170 	    kmem_zalloc(sizeof (sbd_cmd_t), KM_NOSLEEP)) != NULL) {
3171 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3172 		scmd->flags = 0;
3173 		return (STMF_SUCCESS);
3174 	}
3175 	return (STMF_ALLOC_FAILURE);
3176 }
3177 
3178 void
sbd_remove_it_handle(sbd_lu_t * sl,sbd_it_data_t * it)3179 sbd_remove_it_handle(sbd_lu_t *sl, sbd_it_data_t *it)
3180 {
3181 	sbd_it_data_t **ppit;
3182 
3183 	sbd_pgr_remove_it_handle(sl, it);
3184 	mutex_enter(&sl->sl_lock);
3185 	for (ppit = &sl->sl_it_list; *ppit != NULL;
3186 	    ppit = &((*ppit)->sbd_it_next)) {
3187 		if ((*ppit) == it) {
3188 			*ppit = it->sbd_it_next;
3189 			break;
3190 		}
3191 	}
3192 	mutex_exit(&sl->sl_lock);
3193 
3194 	DTRACE_PROBE2(itl__nexus__end, stmf_lu_t *, sl->sl_lu,
3195 	    sbd_it_data_t *, it);
3196 
3197 	kmem_free(it, sizeof (*it));
3198 }
3199 
3200 void
sbd_check_and_clear_scsi2_reservation(sbd_lu_t * sl,sbd_it_data_t * it)3201 sbd_check_and_clear_scsi2_reservation(sbd_lu_t *sl, sbd_it_data_t *it)
3202 {
3203 	mutex_enter(&sl->sl_lock);
3204 	if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) == 0) {
3205 		/* If we dont have any reservations, just get out. */
3206 		mutex_exit(&sl->sl_lock);
3207 		return;
3208 	}
3209 
3210 	if (it == NULL) {
3211 		/* Find the I_T nexus which is holding the reservation. */
3212 		for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
3213 			if (it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) {
3214 				ASSERT(it->sbd_it_session_id ==
3215 				    sl->sl_rs_owner_session_id);
3216 				break;
3217 			}
3218 		}
3219 		ASSERT(it != NULL);
3220 	} else {
3221 		/*
3222 		 * We were passed an I_T nexus. If this nexus does not hold
3223 		 * the reservation, do nothing. This is why this function is
3224 		 * called "check_and_clear".
3225 		 */
3226 		if ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0) {
3227 			mutex_exit(&sl->sl_lock);
3228 			return;
3229 		}
3230 	}
3231 	it->sbd_it_flags &= ~SBD_IT_HAS_SCSI2_RESERVATION;
3232 	sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
3233 	mutex_exit(&sl->sl_lock);
3234 }
3235 
3236 /*
3237  * Given a LU and a task, check if the task is causing reservation
3238  * conflict. Returns 1 in case of conflict, 0 otherwise.
3239  * Note that the LU might not be the same LU as in the task but the
3240  * caller makes sure that the LU can be accessed.
3241  */
3242 int
sbd_check_reservation_conflict(struct sbd_lu * sl,struct scsi_task * task)3243 sbd_check_reservation_conflict(struct sbd_lu *sl, struct scsi_task *task)
3244 {
3245 	sbd_it_data_t *it;
3246 
3247 	it = task->task_lu_itl_handle;
3248 	ASSERT(it);
3249 	if (sl->sl_access_state == SBD_LU_ACTIVE) {
3250 		if (SBD_PGR_RSVD(sl->sl_pgr)) {
3251 			if (sbd_pgr_reservation_conflict(task, sl)) {
3252 				return (1);
3253 			}
3254 		} else if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) &&
3255 		    ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0)) {
3256 			if (!(SCSI2_CONFLICT_FREE_CMDS(task->task_cdb))) {
3257 				return (1);
3258 			}
3259 		}
3260 	}
3261 
3262 	return (0);
3263 }
3264 
3265 /*
3266  * Keep in mind that sbd_new_task can be called multiple times for the same
3267  * task because of us calling stmf_task_poll_lu resulting in a call to
3268  * sbd_task_poll().
3269  */
3270 void
sbd_new_task(struct scsi_task * task,struct stmf_data_buf * initial_dbuf)3271 sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
3272 {
3273 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3274 	sbd_it_data_t *it;
3275 	uint8_t cdb0, cdb1;
3276 	stmf_status_t st_ret;
3277 
3278 	if ((it = task->task_lu_itl_handle) == NULL) {
3279 		mutex_enter(&sl->sl_lock);
3280 		for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
3281 			if (it->sbd_it_session_id ==
3282 			    task->task_session->ss_session_id) {
3283 				mutex_exit(&sl->sl_lock);
3284 				stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3285 				return;
3286 			}
3287 		}
3288 		it = (sbd_it_data_t *)kmem_zalloc(sizeof (*it), KM_NOSLEEP);
3289 		if (it == NULL) {
3290 			mutex_exit(&sl->sl_lock);
3291 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3292 			return;
3293 		}
3294 		it->sbd_it_session_id = task->task_session->ss_session_id;
3295 		bcopy(task->task_lun_no, it->sbd_it_lun, 8);
3296 		it->sbd_it_next = sl->sl_it_list;
3297 		sl->sl_it_list = it;
3298 		mutex_exit(&sl->sl_lock);
3299 
3300 		DTRACE_PROBE1(itl__nexus__start, scsi_task *, task);
3301 
3302 		sbd_pgr_initialize_it(task, it);
3303 		if (stmf_register_itl_handle(task->task_lu, task->task_lun_no,
3304 		    task->task_session, it->sbd_it_session_id, it)
3305 		    != STMF_SUCCESS) {
3306 			sbd_remove_it_handle(sl, it);
3307 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3308 			return;
3309 		}
3310 		task->task_lu_itl_handle = it;
3311 		if (sl->sl_access_state != SBD_LU_STANDBY) {
3312 			it->sbd_it_ua_conditions = SBD_UA_POR;
3313 		}
3314 	} else if (it->sbd_it_flags & SBD_IT_PGR_CHECK_FLAG) {
3315 		mutex_enter(&sl->sl_lock);
3316 		it->sbd_it_flags &= ~SBD_IT_PGR_CHECK_FLAG;
3317 		mutex_exit(&sl->sl_lock);
3318 		sbd_pgr_initialize_it(task, it);
3319 	}
3320 
3321 	if (task->task_mgmt_function) {
3322 		stmf_scsilib_handle_task_mgmt(task);
3323 		return;
3324 	}
3325 
3326 	/*
3327 	 * if we're transitioning between access
3328 	 * states, return NOT READY
3329 	 */
3330 	if (sl->sl_access_state == SBD_LU_TRANSITION_TO_STANDBY ||
3331 	    sl->sl_access_state == SBD_LU_TRANSITION_TO_ACTIVE) {
3332 		stmf_scsilib_send_status(task, STATUS_CHECK,
3333 		    STMF_SAA_LU_NO_ACCESS_TRANSITION);
3334 		return;
3335 	}
3336 
3337 	cdb0 = task->task_cdb[0];
3338 	cdb1 = task->task_cdb[1];
3339 	/*
3340 	 * Special case for different versions of Windows.
3341 	 * 1) Windows 2012 and VMWare will fail to discover LU's if a READ
3342 	 *    operation sent down the standby path returns an error. By default
3343 	 *    standby_fail_reads will be set to 0.
3344 	 * 2) Windows 2008 R2 has a severe performace problem if READ ops
3345 	 *    aren't rejected on the standby path. 2008 sends commands
3346 	 *    down the standby path which then must be proxied over to the
3347 	 *    active node and back.
3348 	 */
3349 	if ((sl->sl_access_state == SBD_LU_STANDBY) &&
3350 	    stmf_standby_fail_reads &&
3351 	    (cdb0 == SCMD_READ || cdb0 == SCMD_READ_G1 ||
3352 	    cdb0 == SCMD_READ_G4 || cdb0 == SCMD_READ_G5)) {
3353 		stmf_scsilib_send_status(task, STATUS_CHECK,
3354 		    STMF_SAA_LU_NO_ACCESS_STANDBY);
3355 		return;
3356 	}
3357 
3358 	/*
3359 	 * Don't go further if cmd is unsupported in standby mode
3360 	 */
3361 	if (sl->sl_access_state == SBD_LU_STANDBY) {
3362 		if (cdb0 != SCMD_INQUIRY &&
3363 		    cdb0 != SCMD_MODE_SENSE &&
3364 		    cdb0 != SCMD_MODE_SENSE_G1 &&
3365 		    cdb0 != SCMD_MODE_SELECT &&
3366 		    cdb0 != SCMD_MODE_SELECT_G1 &&
3367 		    cdb0 != SCMD_RESERVE &&
3368 		    cdb0 != SCMD_RELEASE &&
3369 		    cdb0 != SCMD_PERSISTENT_RESERVE_OUT &&
3370 		    cdb0 != SCMD_PERSISTENT_RESERVE_IN &&
3371 		    cdb0 != SCMD_REQUEST_SENSE &&
3372 		    cdb0 != SCMD_READ_CAPACITY &&
3373 		    cdb0 != SCMD_TEST_UNIT_READY &&
3374 		    cdb0 != SCMD_START_STOP &&
3375 		    cdb0 != SCMD_READ &&
3376 		    cdb0 != SCMD_READ_G1 &&
3377 		    cdb0 != SCMD_READ_G4 &&
3378 		    cdb0 != SCMD_READ_G5 &&
3379 		    !(cdb0 == SCMD_SVC_ACTION_IN_G4 &&
3380 		    cdb1 == SSVC_ACTION_READ_CAPACITY_G4) &&
3381 		    !(cdb0 == SCMD_MAINTENANCE_IN &&
3382 		    (cdb1 & 0x1F) == 0x05) &&
3383 		    !(cdb0 == SCMD_MAINTENANCE_IN &&
3384 		    (cdb1 & 0x1F) == 0x0A)) {
3385 			stmf_scsilib_send_status(task, STATUS_CHECK,
3386 			    STMF_SAA_LU_NO_ACCESS_STANDBY);
3387 			return;
3388 		}
3389 	}
3390 
3391 	/*
3392 	 * Checking ua conditions as per SAM3R14 5.3.2 specified order. During
3393 	 * MPIO/ALUA failover, cmds come in through local ports and proxy port
3394 	 * port provider (i.e. pppt), we want to report unit attention to
3395 	 * only local cmds since initiators (Windows MPIO/DSM) would continue
3396 	 * sending I/O to the target that reported unit attention.
3397 	 */
3398 	if ((it->sbd_it_ua_conditions) &&
3399 	    !(task->task_additional_flags & TASK_AF_PPPT_TASK) &&
3400 	    (task->task_cdb[0] != SCMD_INQUIRY)) {
3401 		uint32_t saa = 0;
3402 
3403 		mutex_enter(&sl->sl_lock);
3404 		if (it->sbd_it_ua_conditions & SBD_UA_POR) {
3405 			it->sbd_it_ua_conditions &= ~SBD_UA_POR;
3406 			saa = STMF_SAA_POR;
3407 		} else if (it->sbd_it_ua_conditions &
3408 		    SBD_UA_ASYMMETRIC_ACCESS_CHANGED) {
3409 			it->sbd_it_ua_conditions &=
3410 			    ~SBD_UA_ASYMMETRIC_ACCESS_CHANGED;
3411 			saa = STMF_SAA_ASYMMETRIC_ACCESS_CHANGED;
3412 		}
3413 		mutex_exit(&sl->sl_lock);
3414 		if (saa) {
3415 			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
3416 			return;
3417 		}
3418 	}
3419 
3420 	/* Reservation conflict checks */
3421 	if (sbd_check_reservation_conflict(sl, task)) {
3422 		stmf_scsilib_send_status(task,
3423 		    STATUS_RESERVATION_CONFLICT, 0);
3424 		return;
3425 	}
3426 
3427 	/* Rest of the ua conndition checks */
3428 	if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) {
3429 		uint32_t saa = 0;
3430 
3431 		mutex_enter(&sl->sl_lock);
3432 		if (it->sbd_it_ua_conditions & SBD_UA_CAPACITY_CHANGED) {
3433 			it->sbd_it_ua_conditions &= ~SBD_UA_CAPACITY_CHANGED;
3434 			if ((task->task_cdb[0] == SCMD_READ_CAPACITY) ||
3435 			    ((task->task_cdb[0] == SCMD_SVC_ACTION_IN_G4) &&
3436 			    (task->task_cdb[1] ==
3437 			    SSVC_ACTION_READ_CAPACITY_G4))) {
3438 				saa = 0;
3439 			} else {
3440 				saa = STMF_SAA_CAPACITY_DATA_HAS_CHANGED;
3441 			}
3442 		} else if (it->sbd_it_ua_conditions &
3443 		    SBD_UA_MODE_PARAMETERS_CHANGED) {
3444 			it->sbd_it_ua_conditions &=
3445 			    ~SBD_UA_MODE_PARAMETERS_CHANGED;
3446 			saa = STMF_SAA_MODE_PARAMETERS_CHANGED;
3447 		} else if (it->sbd_it_ua_conditions &
3448 		    SBD_UA_ASYMMETRIC_ACCESS_CHANGED) {
3449 			saa = 0;
3450 		} else if (it->sbd_it_ua_conditions & SBD_UA_POR) {
3451 			saa = 0;
3452 		} else if (it->sbd_it_ua_conditions &
3453 		    SBD_UA_ACCESS_STATE_TRANSITION) {
3454 			it->sbd_it_ua_conditions &=
3455 			    ~SBD_UA_ACCESS_STATE_TRANSITION;
3456 			saa = STMF_SAA_LU_NO_ACCESS_TRANSITION;
3457 		} else {
3458 			it->sbd_it_ua_conditions = 0;
3459 			saa = 0;
3460 		}
3461 		mutex_exit(&sl->sl_lock);
3462 		if (saa) {
3463 			stmf_scsilib_send_status(task, STATUS_CHECK, saa);
3464 			return;
3465 		}
3466 	}
3467 
3468 	if (sl->sl_access_state == SBD_LU_STANDBY) {
3469 		/*
3470 		 * is this a short write?
3471 		 * if so, we'll need to wait until we have the buffer
3472 		 * before proxying the command
3473 		 */
3474 		switch (cdb0) {
3475 			case SCMD_MODE_SELECT:
3476 			case SCMD_MODE_SELECT_G1:
3477 			case SCMD_PERSISTENT_RESERVE_OUT:
3478 				break;
3479 			default:
3480 				st_ret = stmf_proxy_scsi_cmd(task,
3481 				    initial_dbuf);
3482 				if (st_ret != STMF_SUCCESS) {
3483 					stmf_scsilib_send_status(task,
3484 					    STATUS_CHECK,
3485 					    STMF_SAA_LU_NO_ACCESS_UNAVAIL);
3486 				}
3487 				return;
3488 		}
3489 	}
3490 
3491 	cdb0 = task->task_cdb[0] & 0x1F;
3492 
3493 	if ((cdb0 == SCMD_READ) || (cdb0 == SCMD_WRITE)) {
3494 		if (task->task_additional_flags & TASK_AF_PORT_LOAD_HIGH) {
3495 			stmf_scsilib_send_status(task, STATUS_QFULL, 0);
3496 			return;
3497 		}
3498 		if (cdb0 == SCMD_READ) {
3499 			sbd_handle_read(task, initial_dbuf);
3500 			return;
3501 		}
3502 		sbd_handle_write(task, initial_dbuf);
3503 		return;
3504 	}
3505 
3506 	cdb0 = task->task_cdb[0];
3507 	cdb1 = task->task_cdb[1];
3508 
3509 	if (cdb0 == SCMD_INQUIRY) {		/* Inquiry */
3510 		sbd_handle_inquiry(task, initial_dbuf);
3511 		return;
3512 	}
3513 
3514 	if (cdb0  == SCMD_PERSISTENT_RESERVE_OUT) {
3515 		sbd_handle_pgr_out_cmd(task, initial_dbuf);
3516 		return;
3517 	}
3518 
3519 	if (cdb0  == SCMD_PERSISTENT_RESERVE_IN) {
3520 		sbd_handle_pgr_in_cmd(task, initial_dbuf);
3521 		return;
3522 	}
3523 
3524 	if (cdb0 == SCMD_RELEASE) {
3525 		if (cdb1) {
3526 			stmf_scsilib_send_status(task, STATUS_CHECK,
3527 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3528 			return;
3529 		}
3530 
3531 		mutex_enter(&sl->sl_lock);
3532 		if (sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) {
3533 			/* If not owner don't release it, just return good */
3534 			if (it->sbd_it_session_id !=
3535 			    sl->sl_rs_owner_session_id) {
3536 				mutex_exit(&sl->sl_lock);
3537 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3538 				return;
3539 			}
3540 		}
3541 		sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
3542 		it->sbd_it_flags &= ~SBD_IT_HAS_SCSI2_RESERVATION;
3543 		mutex_exit(&sl->sl_lock);
3544 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3545 		return;
3546 	}
3547 
3548 	if (cdb0 == SCMD_RESERVE) {
3549 		if (cdb1) {
3550 			stmf_scsilib_send_status(task, STATUS_CHECK,
3551 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3552 			return;
3553 		}
3554 
3555 		mutex_enter(&sl->sl_lock);
3556 		if (sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) {
3557 			/* If not owner, return conflict status */
3558 			if (it->sbd_it_session_id !=
3559 			    sl->sl_rs_owner_session_id) {
3560 				mutex_exit(&sl->sl_lock);
3561 				stmf_scsilib_send_status(task,
3562 				    STATUS_RESERVATION_CONFLICT, 0);
3563 				return;
3564 			}
3565 		}
3566 		sl->sl_flags |= SL_LU_HAS_SCSI2_RESERVATION;
3567 		it->sbd_it_flags |= SBD_IT_HAS_SCSI2_RESERVATION;
3568 		sl->sl_rs_owner_session_id = it->sbd_it_session_id;
3569 		mutex_exit(&sl->sl_lock);
3570 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3571 		return;
3572 	}
3573 
3574 	if (cdb0 == SCMD_REQUEST_SENSE) {
3575 		/*
3576 		 * LU provider needs to store unretrieved sense data
3577 		 * (e.g. after power-on/reset).  For now, we'll just
3578 		 * return good status with no sense.
3579 		 */
3580 
3581 		if ((cdb1 & ~1) || task->task_cdb[2] || task->task_cdb[3] ||
3582 		    task->task_cdb[5]) {
3583 			stmf_scsilib_send_status(task, STATUS_CHECK,
3584 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3585 		} else {
3586 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3587 		}
3588 
3589 		return;
3590 	}
3591 
3592 	/* Report Target Port Groups */
3593 	if ((cdb0 == SCMD_MAINTENANCE_IN) &&
3594 	    ((cdb1 & 0x1F) == 0x0A)) {
3595 		stmf_scsilib_handle_report_tpgs(task, initial_dbuf);
3596 		return;
3597 	}
3598 
3599 	/* Report Identifying Information */
3600 	if ((cdb0 == SCMD_MAINTENANCE_IN) &&
3601 	    ((cdb1 & 0x1F) == 0x05)) {
3602 		sbd_handle_identifying_info(task, initial_dbuf);
3603 		return;
3604 	}
3605 
3606 	if (cdb0 == SCMD_START_STOP) {			/* Start stop */
3607 		task->task_cmd_xfer_length = 0;
3608 		if (task->task_cdb[4] & 0xFC) {
3609 			stmf_scsilib_send_status(task, STATUS_CHECK,
3610 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3611 			return;
3612 		}
3613 		if (task->task_cdb[4] & 2) {
3614 			stmf_scsilib_send_status(task, STATUS_CHECK,
3615 			    STMF_SAA_INVALID_FIELD_IN_CDB);
3616 		} else {
3617 			stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3618 		}
3619 		return;
3620 
3621 	}
3622 
3623 	if ((cdb0 == SCMD_MODE_SENSE) || (cdb0 == SCMD_MODE_SENSE_G1)) {
3624 		uint8_t *p;
3625 		p = kmem_zalloc(512, KM_SLEEP);
3626 		sbd_handle_mode_sense(task, initial_dbuf, p);
3627 		kmem_free(p, 512);
3628 		return;
3629 	}
3630 
3631 	if ((cdb0 == SCMD_MODE_SELECT) || (cdb0 == SCMD_MODE_SELECT_G1)) {
3632 		sbd_handle_mode_select(task, initial_dbuf);
3633 		return;
3634 	}
3635 
3636 	if ((cdb0 == SCMD_UNMAP) && (sl->sl_flags & SL_UNMAP_ENABLED)) {
3637 		sbd_handle_unmap(task, initial_dbuf);
3638 		return;
3639 	}
3640 
3641 	if ((cdb0 == SCMD_WRITE_SAME_G4) || (cdb0 == SCMD_WRITE_SAME_G1)) {
3642 		sbd_handle_write_same(task, initial_dbuf);
3643 		return;
3644 	}
3645 
3646 	if (cdb0 == SCMD_COMPARE_AND_WRITE) {
3647 		sbd_handle_ats(task, initial_dbuf);
3648 		return;
3649 	}
3650 
3651 	if (cdb0 == SCMD_EXTENDED_COPY) {
3652 		sbd_handle_xcopy(task, initial_dbuf);
3653 		return;
3654 	}
3655 
3656 	if (cdb0 == SCMD_RECV_COPY_RESULTS) {
3657 		sbd_handle_recv_copy_results(task, initial_dbuf);
3658 		return;
3659 	}
3660 
3661 	if (cdb0 == SCMD_TEST_UNIT_READY) {	/* Test unit ready */
3662 		task->task_cmd_xfer_length = 0;
3663 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3664 		return;
3665 	}
3666 
3667 	if (cdb0 == SCMD_READ_CAPACITY) {		/* Read Capacity */
3668 		sbd_handle_read_capacity(task, initial_dbuf);
3669 		return;
3670 	}
3671 
3672 	if (cdb0 == SCMD_SVC_ACTION_IN_G4) { /* Read Capacity or read long */
3673 		if (cdb1 == SSVC_ACTION_READ_CAPACITY_G4) {
3674 			sbd_handle_read_capacity(task, initial_dbuf);
3675 			return;
3676 		/*
3677 		 * } else if (cdb1 == SSVC_ACTION_READ_LONG_G4) {
3678 		 *	sbd_handle_read(task, initial_dbuf);
3679 		 *	return;
3680 		 */
3681 		}
3682 	}
3683 
3684 	/*
3685 	 * if (cdb0 == SCMD_SVC_ACTION_OUT_G4) {
3686 	 *	if (cdb1 == SSVC_ACTION_WRITE_LONG_G4) {
3687 	 *		 sbd_handle_write(task, initial_dbuf);
3688 	 *		return;
3689 	 *	}
3690 	 * }
3691 	 */
3692 
3693 	if (cdb0 == SCMD_VERIFY) {
3694 		/*
3695 		 * Something more likely needs to be done here.
3696 		 */
3697 		task->task_cmd_xfer_length = 0;
3698 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3699 		return;
3700 	}
3701 
3702 	if (cdb0 == SCMD_SYNCHRONIZE_CACHE ||
3703 	    cdb0 == SCMD_SYNCHRONIZE_CACHE_G4) {
3704 		sbd_handle_sync_cache(task, initial_dbuf);
3705 		return;
3706 	}
3707 
3708 	/*
3709 	 * Write and Verify use the same path as write, but don't clutter the
3710 	 * performance path above with checking for write_verify opcodes.  We
3711 	 * rely on zfs's integrity checks for the "Verify" part of Write &
3712 	 * Verify.  (Even if we did a read to "verify" we'd merely be reading
3713 	 * cache, not actual media.)
3714 	 * Therefore we
3715 	 *   a) only support this if sbd_is_zvol, and
3716 	 *   b) run the IO through the normal write path with a forced
3717 	 *	sbd_flush_data_cache at the end.
3718 	 */
3719 
3720 	if ((sl->sl_flags & SL_ZFS_META) && (
3721 	    cdb0 == SCMD_WRITE_VERIFY ||
3722 	    cdb0 == SCMD_WRITE_VERIFY_G4 ||
3723 	    cdb0 == SCMD_WRITE_VERIFY_G5)) {
3724 		sbd_handle_write(task, initial_dbuf);
3725 		return;
3726 	}
3727 	stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_INVALID_OPCODE);
3728 }
3729 
3730 void
sbd_dbuf_xfer_done(struct scsi_task * task,struct stmf_data_buf * dbuf)3731 sbd_dbuf_xfer_done(struct scsi_task *task, struct stmf_data_buf *dbuf)
3732 {
3733 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3734 
3735 	if (dbuf->db_flags & DB_LU_DATA_BUF) {
3736 		/*
3737 		 * Buffers passed in from the LU always complete
3738 		 * even if the task is no longer active.
3739 		 */
3740 		ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
3741 		ASSERT(scmd);
3742 		switch (scmd->cmd_type) {
3743 		case (SBD_CMD_SCSI_READ):
3744 			sbd_handle_sgl_read_xfer_completion(task, scmd, dbuf);
3745 			break;
3746 		case (SBD_CMD_SCSI_WRITE):
3747 			sbd_handle_sgl_write_xfer_completion(task, scmd, dbuf);
3748 			break;
3749 		default:
3750 			cmn_err(CE_PANIC, "Unknown cmd type, task = %p",
3751 			    (void *)task);
3752 			break;
3753 		}
3754 		return;
3755 	}
3756 
3757 	if ((scmd == NULL) || ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0))
3758 		return;
3759 
3760 	switch (scmd->cmd_type) {
3761 	case (SBD_CMD_SCSI_READ):
3762 		sbd_handle_read_xfer_completion(task, scmd, dbuf);
3763 		break;
3764 
3765 	case (SBD_CMD_SCSI_WRITE):
3766 		switch (task->task_cdb[0]) {
3767 		case SCMD_WRITE_SAME_G1:
3768 		case SCMD_WRITE_SAME_G4:
3769 			sbd_handle_write_same_xfer_completion(task, scmd, dbuf,
3770 			    1);
3771 			break;
3772 		case SCMD_COMPARE_AND_WRITE:
3773 			sbd_handle_ats_xfer_completion(task, scmd, dbuf, 1);
3774 			break;
3775 		default:
3776 			sbd_handle_write_xfer_completion(task, scmd, dbuf, 1);
3777 			/* FALLTHRU */
3778 		}
3779 		break;
3780 
3781 	case (SBD_CMD_SMALL_READ):
3782 		sbd_handle_short_read_xfer_completion(task, scmd, dbuf);
3783 		break;
3784 
3785 	case (SBD_CMD_SMALL_WRITE):
3786 		sbd_handle_short_write_xfer_completion(task, dbuf);
3787 		break;
3788 
3789 	default:
3790 		cmn_err(CE_PANIC, "Unknown cmd type, task = %p", (void *)task);
3791 		break;
3792 	}
3793 }
3794 
3795 /* ARGSUSED */
3796 void
sbd_send_status_done(struct scsi_task * task)3797 sbd_send_status_done(struct scsi_task *task)
3798 {
3799 	cmn_err(CE_PANIC,
3800 	    "sbd_send_status_done: this should not have been called");
3801 }
3802 
3803 void
sbd_task_free(struct scsi_task * task)3804 sbd_task_free(struct scsi_task *task)
3805 {
3806 	if (task->task_lu_private) {
3807 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3808 		if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
3809 			cmn_err(CE_PANIC, "cmd is active, task = %p",
3810 			    (void *)task);
3811 		}
3812 		kmem_free(scmd, sizeof (sbd_cmd_t));
3813 	}
3814 }
3815 
3816 /*
3817  * Aborts are synchronus w.r.t. I/O AND
3818  * All the I/O which SBD does is synchronous AND
3819  * Everything within a task is single threaded.
3820  *   IT MEANS
3821  * If this function is called, we are doing nothing with this task
3822  * inside of sbd module.
3823  */
3824 /* ARGSUSED */
3825 stmf_status_t
sbd_abort(struct stmf_lu * lu,int abort_cmd,void * arg,uint32_t flags)3826 sbd_abort(struct stmf_lu *lu, int abort_cmd, void *arg, uint32_t flags)
3827 {
3828 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3829 	scsi_task_t *task;
3830 
3831 	if (abort_cmd == STMF_LU_RESET_STATE) {
3832 		return (sbd_lu_reset_state(lu));
3833 	}
3834 
3835 	if (abort_cmd == STMF_LU_ITL_HANDLE_REMOVED) {
3836 		sbd_check_and_clear_scsi2_reservation(sl, (sbd_it_data_t *)arg);
3837 		sbd_remove_it_handle(sl, (sbd_it_data_t *)arg);
3838 		return (STMF_SUCCESS);
3839 	}
3840 
3841 	ASSERT(abort_cmd == STMF_LU_ABORT_TASK);
3842 	task = (scsi_task_t *)arg;
3843 	sbd_ats_remove_by_task(task);
3844 	if (task->task_lu_private) {
3845 		sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3846 
3847 		if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
3848 			if (scmd->flags & SBD_SCSI_CMD_TRANS_DATA) {
3849 				kmem_free(scmd->trans_data,
3850 				    scmd->trans_data_len);
3851 				scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
3852 			}
3853 			scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
3854 			return (STMF_ABORT_SUCCESS);
3855 		}
3856 	}
3857 
3858 	return (STMF_NOT_FOUND);
3859 }
3860 
3861 void
sbd_task_poll(struct scsi_task * task)3862 sbd_task_poll(struct scsi_task *task)
3863 {
3864 	stmf_data_buf_t *initial_dbuf;
3865 
3866 	initial_dbuf = stmf_handle_to_buf(task, 0);
3867 	sbd_new_task(task, initial_dbuf);
3868 }
3869 
3870 /*
3871  * This function is called during task clean-up if the
3872  * DB_LU_FLAG is set on the dbuf. This should only be called for
3873  * abort processing after sbd_abort has been called for the task.
3874  */
3875 void
sbd_dbuf_free(struct scsi_task * task,struct stmf_data_buf * dbuf)3876 sbd_dbuf_free(struct scsi_task *task, struct stmf_data_buf *dbuf)
3877 {
3878 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3879 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3880 
3881 	ASSERT(dbuf->db_lu_private);
3882 	ASSERT(scmd && ATOMIC8_GET(scmd->nbufs) > 0);
3883 	ASSERT((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0);
3884 	ASSERT(dbuf->db_flags & DB_LU_DATA_BUF);
3885 	ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
3886 	ASSERT((curthread->t_flag & T_INTR_THREAD) == 0);
3887 
3888 	if (scmd->cmd_type == SBD_CMD_SCSI_READ) {
3889 		sbd_zvol_rele_read_bufs(sl, dbuf);
3890 	} else if (scmd->cmd_type == SBD_CMD_SCSI_WRITE) {
3891 		sbd_zvol_rele_write_bufs_abort(sl, dbuf);
3892 	} else {
3893 		cmn_err(CE_PANIC, "Unknown cmd type %d, task = %p",
3894 		    scmd->cmd_type, (void *)task);
3895 	}
3896 	if (atomic_dec_8_nv(&scmd->nbufs) == 0)
3897 		rw_exit(&sl->sl_access_state_lock);
3898 	stmf_teardown_dbuf(task, dbuf);
3899 	stmf_free(dbuf);
3900 }
3901 
3902 /* ARGSUSED */
3903 void
sbd_ctl(struct stmf_lu * lu,int cmd,void * arg)3904 sbd_ctl(struct stmf_lu *lu, int cmd, void *arg)
3905 {
3906 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3907 	stmf_change_status_t st;
3908 
3909 	ASSERT((cmd == STMF_CMD_LU_ONLINE) ||
3910 	    (cmd == STMF_CMD_LU_OFFLINE) ||
3911 	    (cmd == STMF_ACK_LU_ONLINE_COMPLETE) ||
3912 	    (cmd == STMF_ACK_LU_OFFLINE_COMPLETE));
3913 
3914 	st.st_completion_status = STMF_SUCCESS;
3915 	st.st_additional_info = NULL;
3916 
3917 	switch (cmd) {
3918 	case STMF_CMD_LU_ONLINE:
3919 		if (sl->sl_state == STMF_STATE_ONLINE)
3920 			st.st_completion_status = STMF_ALREADY;
3921 		else if (sl->sl_state != STMF_STATE_OFFLINE)
3922 			st.st_completion_status = STMF_FAILURE;
3923 		if (st.st_completion_status == STMF_SUCCESS) {
3924 			sl->sl_state = STMF_STATE_ONLINE;
3925 			sl->sl_state_not_acked = 1;
3926 		}
3927 		(void) stmf_ctl(STMF_CMD_LU_ONLINE_COMPLETE, lu, &st);
3928 		break;
3929 
3930 	case STMF_CMD_LU_OFFLINE:
3931 		if (sl->sl_state == STMF_STATE_OFFLINE)
3932 			st.st_completion_status = STMF_ALREADY;
3933 		else if (sl->sl_state != STMF_STATE_ONLINE)
3934 			st.st_completion_status = STMF_FAILURE;
3935 		if (st.st_completion_status == STMF_SUCCESS) {
3936 			sl->sl_flags &= ~(SL_MEDIUM_REMOVAL_PREVENTED |
3937 			    SL_LU_HAS_SCSI2_RESERVATION);
3938 			sl->sl_state = STMF_STATE_OFFLINE;
3939 			sl->sl_state_not_acked = 1;
3940 			sbd_pgr_reset(sl);
3941 		}
3942 		(void) stmf_ctl(STMF_CMD_LU_OFFLINE_COMPLETE, lu, &st);
3943 		break;
3944 
3945 	case STMF_ACK_LU_ONLINE_COMPLETE:
3946 		/* Fallthrough */
3947 	case STMF_ACK_LU_OFFLINE_COMPLETE:
3948 		sl->sl_state_not_acked = 0;
3949 		break;
3950 
3951 	}
3952 }
3953 
3954 /* ARGSUSED */
3955 stmf_status_t
sbd_info(uint32_t cmd,stmf_lu_t * lu,void * arg,uint8_t * buf,uint32_t * bufsizep)3956 sbd_info(uint32_t cmd, stmf_lu_t *lu, void *arg, uint8_t *buf,
3957     uint32_t *bufsizep)
3958 {
3959 	return (STMF_NOT_SUPPORTED);
3960 }
3961 
3962 stmf_status_t
sbd_lu_reset_state(stmf_lu_t * lu)3963 sbd_lu_reset_state(stmf_lu_t *lu)
3964 {
3965 	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3966 
3967 	mutex_enter(&sl->sl_lock);
3968 	if (sl->sl_flags & SL_SAVED_WRITE_CACHE_DISABLE) {
3969 		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
3970 		mutex_exit(&sl->sl_lock);
3971 		if (sl->sl_access_state == SBD_LU_ACTIVE) {
3972 			(void) sbd_wcd_set(1, sl);
3973 		}
3974 	} else {
3975 		sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
3976 		mutex_exit(&sl->sl_lock);
3977 		if (sl->sl_access_state == SBD_LU_ACTIVE) {
3978 			(void) sbd_wcd_set(0, sl);
3979 		}
3980 	}
3981 	sbd_pgr_reset(sl);
3982 	sbd_check_and_clear_scsi2_reservation(sl, NULL);
3983 	if (stmf_deregister_all_lu_itl_handles(lu) != STMF_SUCCESS) {
3984 		return (STMF_FAILURE);
3985 	}
3986 	return (STMF_SUCCESS);
3987 }
3988 
3989 sbd_status_t
sbd_flush_data_cache(sbd_lu_t * sl,int fsync_done)3990 sbd_flush_data_cache(sbd_lu_t *sl, int fsync_done)
3991 {
3992 	sbd_status_t ret = SBD_SUCCESS;
3993 
3994 	rw_enter(&sl->sl_access_state_lock, RW_READER);
3995 	if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
3996 		ret = SBD_FILEIO_FAILURE;
3997 		goto flush_fail;
3998 	}
3999 	if (fsync_done)
4000 		goto over_fsync;
4001 	if ((sl->sl_data_vtype == VREG) || (sl->sl_data_vtype == VBLK)) {
4002 		if (VOP_FSYNC(sl->sl_data_vp, FSYNC, kcred, NULL)) {
4003 			ret = SBD_FAILURE;
4004 			goto flush_fail;
4005 		}
4006 	}
4007 over_fsync:
4008 	if (((sl->sl_data_vtype == VCHR) || (sl->sl_data_vtype == VBLK)) &&
4009 	    ((sl->sl_flags & SL_NO_DATA_DKIOFLUSH) == 0)) {
4010 		int r = 0;
4011 
4012 		ret = VOP_IOCTL(sl->sl_data_vp, DKIOCFLUSHWRITECACHE, 0,
4013 		    FKIOCTL, kcred, &r, NULL);
4014 		if ((ret == ENOTTY) || (ret == ENOTSUP)) {
4015 			mutex_enter(&sl->sl_lock);
4016 			sl->sl_flags |= SL_NO_DATA_DKIOFLUSH;
4017 			mutex_exit(&sl->sl_lock);
4018 		} else {
4019 			ret = (ret != 0) ? SBD_FAILURE : SBD_SUCCESS;
4020 		}
4021 	}
4022 flush_fail:
4023 	rw_exit(&sl->sl_access_state_lock);
4024 
4025 	return (ret);
4026 }
4027 
4028 /* ARGSUSED */
4029 static void
sbd_handle_sync_cache(struct scsi_task * task,struct stmf_data_buf * initial_dbuf)4030 sbd_handle_sync_cache(struct scsi_task *task,
4031     struct stmf_data_buf *initial_dbuf)
4032 {
4033 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
4034 	uint64_t	lba, laddr;
4035 	sbd_status_t	sret;
4036 	uint32_t	len;
4037 	int		is_g4 = 0;
4038 	int		immed;
4039 
4040 	task->task_cmd_xfer_length = 0;
4041 	/*
4042 	 * Determine if this is a 10 or 16 byte CDB
4043 	 */
4044 
4045 	if (task->task_cdb[0] == SCMD_SYNCHRONIZE_CACHE_G4)
4046 		is_g4 = 1;
4047 
4048 	/*
4049 	 * Determine other requested parameters
4050 	 *
4051 	 * We don't have a non-volatile cache, so don't care about SYNC_NV.
4052 	 * Do not support the IMMED bit.
4053 	 */
4054 
4055 	immed = (task->task_cdb[1] & 0x02);
4056 
4057 	if (immed) {
4058 		stmf_scsilib_send_status(task, STATUS_CHECK,
4059 		    STMF_SAA_INVALID_FIELD_IN_CDB);
4060 		return;
4061 	}
4062 
4063 	/*
4064 	 * Check to be sure we're not being asked to sync an LBA
4065 	 * that is out of range.  While checking, verify reserved fields.
4066 	 */
4067 
4068 	if (is_g4) {
4069 		if ((task->task_cdb[1] & 0xf9) || task->task_cdb[14] ||
4070 		    task->task_cdb[15]) {
4071 			stmf_scsilib_send_status(task, STATUS_CHECK,
4072 			    STMF_SAA_INVALID_FIELD_IN_CDB);
4073 			return;
4074 		}
4075 
4076 		lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
4077 		len = READ_SCSI32(&task->task_cdb[10], uint32_t);
4078 	} else {
4079 		if ((task->task_cdb[1] & 0xf9) || task->task_cdb[6] ||
4080 		    task->task_cdb[9]) {
4081 			stmf_scsilib_send_status(task, STATUS_CHECK,
4082 			    STMF_SAA_INVALID_FIELD_IN_CDB);
4083 			return;
4084 		}
4085 
4086 		lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
4087 		len = READ_SCSI16(&task->task_cdb[7], uint32_t);
4088 	}
4089 
4090 	laddr = lba << sl->sl_data_blocksize_shift;
4091 	len <<= sl->sl_data_blocksize_shift;
4092 
4093 	if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
4094 		stmf_scsilib_send_status(task, STATUS_CHECK,
4095 		    STMF_SAA_LBA_OUT_OF_RANGE);
4096 		return;
4097 	}
4098 
4099 	sret = sbd_flush_data_cache(sl, 0);
4100 	if (sret != SBD_SUCCESS) {
4101 		stmf_scsilib_send_status(task, STATUS_CHECK,
4102 		    STMF_SAA_WRITE_ERROR);
4103 		return;
4104 	}
4105 
4106 	stmf_scsilib_send_status(task, STATUS_GOOD, 0);
4107 }
4108