xref: /illumos-gate/usr/src/uts/common/io/comstar/lu/stmf_sbd/ats_copy_mgr.c (revision 61dfa5098dc8576d9a5e277deba6df647bb70c06)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2019 Nexenta Systems, Inc.  All rights reserved.
14  */
15 
16 #include <sys/conf.h>
17 #include <sys/file.h>
18 #include <sys/ddi.h>
19 #include <sys/sunddi.h>
20 #include <sys/modctl.h>
21 #include <sys/scsi/scsi.h>
22 #include <sys/scsi/impl/scsi_reset_notify.h>
23 #include <sys/scsi/generic/mode.h>
24 #include <sys/disp.h>
25 #include <sys/byteorder.h>
26 #include <sys/atomic.h>
27 #include <sys/sdt.h>
28 #include <sys/dkio.h>
29 
30 #include <sys/dmu.h>
31 #include <sys/txg.h>
32 #include <sys/refcount.h>
33 #include <sys/zvol.h>
34 
35 #include <sys/stmf.h>
36 #include <sys/lpif.h>
37 #include <sys/portif.h>
38 #include <sys/stmf_ioctl.h>
39 #include <sys/stmf_sbd_ioctl.h>
40 
41 #include "stmf_sbd.h"
42 #include "sbd_impl.h"
43 
44 /* ATS tuning parameters */
45 #define	OVERLAP_OFF 0
46 #define	OVERLAP_LOW 1
47 #define	OVERLAP_MEDIUM 2
48 #define	OVERLAP_HIGH 3
49 uint8_t ats_overlap_check = OVERLAP_LOW; /* check for rw overlap with ATS */
50 
51 uint8_t HardwareAcceleratedLocking = 1; /* 0 for disabled */
52 uint8_t HardwareAcceleratedMove = 1;
53 uint64_t sbd_list_length = 0;
54 
55 #define	SBD_ATS_MAX_NBLKS	32
56 /* ATS routines. */
57 uint8_t
sbd_ats_max_nblks(void)58 sbd_ats_max_nblks(void)
59 {
60 	if (HardwareAcceleratedLocking == 0)
61 		return (0);
62 	return (SBD_ATS_MAX_NBLKS);
63 }
64 
65 #define	is_overlapping(start1, len1, start2, len2) \
66 	((start2) > (start1) ? ((start2) - (start1)) < (len1) : \
67 	((start1) - (start2)) < (len2))
68 
69 /*ARGSUSED*/
70 static sbd_status_t
sbd_ats_do_handling_before_io(scsi_task_t * task,struct sbd_lu * sl,uint64_t lba,uint64_t count,uint32_t flags)71 sbd_ats_do_handling_before_io(scsi_task_t *task, struct sbd_lu *sl,
72     uint64_t lba, uint64_t count, uint32_t flags)
73 {
74 	sbd_status_t ret = SBD_SUCCESS;
75 	ats_state_t *ats_state, *ats_state_ret;
76 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
77 	uint8_t cdb0 = task->task_cdb[0];
78 
79 	if (scmd == NULL)
80 		return (SBD_SUCCESS);
81 
82 	if (HardwareAcceleratedLocking == 0)
83 		return (SBD_SUCCESS);
84 	/*
85 	 * if ATS overlap checking is disabled just return.  The check
86 	 * is not done in the function to remove items from the list which
87 	 * allows this value to be changed at runtime.  If it is turned on
88 	 * at runtime the remove will just start taking items off the list.
89 	 * If it is turned off at runtime the list is still cleaned up.
90 	 */
91 	if (ats_overlap_check == OVERLAP_OFF)
92 		return (SBD_SUCCESS);
93 
94 	/* overlap checking for compare and write only */
95 	if (ats_overlap_check == OVERLAP_LOW) {
96 		if (cdb0 != SCMD_COMPARE_AND_WRITE)
97 			return (SBD_SUCCESS);
98 	}
99 
100 	/* overlap checking for compare and write and write only */
101 	if (ats_overlap_check == OVERLAP_MEDIUM) {
102 		if ((cdb0 != SCMD_COMPARE_AND_WRITE) && (cdb0 != SCMD_WRITE))
103 			return (SBD_SUCCESS);
104 	}
105 
106 	mutex_enter(&sl->sl_lock);
107 	/*
108 	 * if the list is empty then just add the element to the list and
109 	 * return success. There is no overlap.  This is done for every
110 	 * read, write or compare and write.
111 	 */
112 	if (list_is_empty(&sl->sl_ats_io_list)) {
113 		goto done;
114 	}
115 
116 	/*
117 	 * There are inflight operations.  As a result the list must be scanned
118 	 * and if there are any overlaps then SBD_BUSY should be returned.
119 	 *
120 	 * Duplicate reads and writes are allowed and kept on the list
121 	 * since there is no reason that overlapping IO operations should
122 	 * be delayed.
123 	 *
124 	 * A command that conflicts with a running compare and write will
125 	 * be rescheduled and rerun.  This is handled by stmf_task_poll_lu.
126 	 * There is a possibility that a command can be starved and still
127 	 * return busy, which is valid in the SCSI protocol.
128 	 */
129 
130 	for (ats_state = list_head(&sl->sl_ats_io_list); ats_state != NULL;
131 	    ats_state = list_next(&sl->sl_ats_io_list, ats_state)) {
132 
133 		if (is_overlapping(ats_state->as_cur_ats_lba,
134 		    ats_state->as_cur_ats_len, lba, count) == 0)
135 			continue;
136 
137 		/* if the task is already listed just return */
138 		if (task == ats_state->as_cur_ats_task) {
139 			cmn_err(CE_WARN, "sbd_ats_handling_before_io: "
140 			    "task %p already on list", (void *) task);
141 			ret = SBD_SUCCESS;
142 			goto exit;
143 		}
144 		/*
145 		 * the current command is a compare and write, if there is any
146 		 * overlap return error
147 		 */
148 
149 		if ((cdb0 == SCMD_COMPARE_AND_WRITE) ||
150 		    (ats_state->as_cmd == SCMD_COMPARE_AND_WRITE)) {
151 			ret = SBD_BUSY;
152 			goto exit;
153 		}
154 	}
155 done:
156 	ats_state_ret =
157 	    (ats_state_t *)kmem_zalloc(sizeof (ats_state_t), KM_SLEEP);
158 	ats_state_ret->as_cur_ats_lba = lba;
159 	ats_state_ret->as_cur_ats_len = count;
160 	ats_state_ret->as_cmd = cdb0;
161 	ats_state_ret->as_cur_ats_task = task;
162 	if (list_is_empty(&sl->sl_ats_io_list)) {
163 		list_insert_head(&sl->sl_ats_io_list, ats_state_ret);
164 	} else {
165 		list_insert_tail(&sl->sl_ats_io_list, ats_state_ret);
166 	}
167 	scmd->flags |= SBD_SCSI_CMD_ATS_RELATED;
168 	scmd->ats_state = ats_state;
169 	sbd_list_length++;
170 	mutex_exit(&sl->sl_lock);
171 	return (SBD_SUCCESS);
172 
173 exit:
174 	mutex_exit(&sl->sl_lock);
175 	return (ret);
176 }
177 
178 sbd_status_t
sbd_ats_handling_before_io(scsi_task_t * task,struct sbd_lu * sl,uint64_t lba,uint64_t count)179 sbd_ats_handling_before_io(scsi_task_t *task, struct sbd_lu *sl,
180     uint64_t lba, uint64_t count)
181 {
182 	return (sbd_ats_do_handling_before_io(task, sl, lba, count, 0));
183 }
184 
185 void
sbd_ats_remove_by_task(scsi_task_t * task)186 sbd_ats_remove_by_task(scsi_task_t *task)
187 {
188 	ats_state_t *ats_state;
189 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
190 	sbd_cmd_t *scmd = task->task_lu_private;
191 
192 	if (scmd == NULL)
193 		return;
194 	/*
195 	 * Scan the list and take the task off of the list. It is possible
196 	 * that the call is made in a situation where the task is not
197 	 * listed.  That is a valid but unlikely case. If it happens
198 	 * just fall through and return.  The list removal is done by
199 	 * task not LBA range and a task cannot be active for more than
200 	 * one command so there is never an issue about removing the
201 	 * wrong element.
202 	 */
203 	mutex_enter(&sl->sl_lock);
204 	if (list_is_empty(&sl->sl_ats_io_list)) {
205 		mutex_exit(&sl->sl_lock);
206 		return;
207 	}
208 
209 	for (ats_state = list_head(&sl->sl_ats_io_list); ats_state != NULL;
210 	    ats_state = list_next(&sl->sl_ats_io_list, ats_state)) {
211 
212 		if (ats_state->as_cur_ats_task == task) {
213 			list_remove(&sl->sl_ats_io_list, ats_state);
214 			kmem_free(ats_state, sizeof (ats_state_t));
215 			scmd->flags &= ~SBD_SCSI_CMD_ATS_RELATED;
216 			scmd->ats_state = NULL;
217 			sbd_list_length--;
218 			break;
219 		}
220 	}
221 	mutex_exit(&sl->sl_lock);
222 }
223 
224 static sbd_status_t
sbd_compare_and_write(struct scsi_task * task,sbd_cmd_t * scmd,uint32_t * ret_off)225 sbd_compare_and_write(struct scsi_task *task, sbd_cmd_t *scmd,
226     uint32_t *ret_off)
227 {
228 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
229 	uint8_t *buf;
230 	sbd_status_t ret;
231 	uint64_t addr;
232 	uint32_t len, i;
233 
234 	addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
235 	len = (uint32_t)task->task_cdb[13];
236 
237 	addr <<= sl->sl_data_blocksize_shift;
238 	len <<= sl->sl_data_blocksize_shift;
239 	buf = kmem_alloc(len, KM_SLEEP);
240 	ret = sbd_data_read(sl, task, addr, (uint64_t)len, buf);
241 	if (ret != SBD_SUCCESS) {
242 		goto compare_and_write_done;
243 	}
244 	/*
245 	 * Can't use bcmp here. We need mismatch offset.
246 	 */
247 	for (i = 0; i < len; i++) {
248 		if (buf[i] != scmd->trans_data[i])
249 			break;
250 	}
251 	if (i != len) {
252 		*ret_off = i;
253 		ret = SBD_COMPARE_FAILED;
254 		goto compare_and_write_done;
255 	}
256 
257 	ret = sbd_data_write(sl, task, addr, (uint64_t)len,
258 	    scmd->trans_data + len);
259 
260 compare_and_write_done:
261 	kmem_free(buf, len);
262 	return (ret);
263 }
264 
265 static void
sbd_send_miscompare_status(struct scsi_task * task,uint32_t miscompare_off)266 sbd_send_miscompare_status(struct scsi_task *task, uint32_t miscompare_off)
267 {
268 	uint8_t sd[18];
269 
270 	task->task_scsi_status = STATUS_CHECK;
271 	bzero(sd, 18);
272 	sd[0] = 0xF0;
273 	sd[2] = 0xe;
274 	SCSI_WRITE32(&sd[3], miscompare_off);
275 	sd[7] = 10;
276 	sd[12] = 0x1D;
277 	task->task_sense_data = sd;
278 	task->task_sense_length = 18;
279 	(void) stmf_send_scsi_status(task, STMF_IOF_LU_DONE);
280 }
281 
282 static void
sbd_ats_release_resources(struct scsi_task * task)283 sbd_ats_release_resources(struct scsi_task *task)
284 {
285 	sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
286 
287 	/*
288 	 * a few basic check here to be sure that there are not multiple
289 	 * calls going on.  If scmd is null just return.  This is very
290 	 * unlikely, but could happed if the task is freed by an abort.
291 	 * If nbufs is invalid warn but ignore the error.  Last if the
292 	 * trans_data is either null or the lenght is zero just blow
293 	 * off the operation and leak the memory buffer.
294 	 */
295 	if (scmd == NULL)
296 		return;
297 
298 	if (scmd->nbufs == 0xFF)
299 		cmn_err(CE_WARN, "%s invalid buffer count %x", __func__,
300 		    scmd->nbufs);
301 
302 	if ((scmd->trans_data != NULL) && (scmd->trans_data_len != 0))
303 		kmem_free(scmd->trans_data, scmd->trans_data_len);
304 
305 	scmd->trans_data = NULL; /* force panic later if re-entered */
306 	scmd->trans_data_len = 0;
307 	scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
308 }
309 
310 void
sbd_handle_ats_xfer_completion(struct scsi_task * task,sbd_cmd_t * scmd,struct stmf_data_buf * dbuf,uint8_t dbuf_reusable)311 sbd_handle_ats_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
312     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
313 {
314 	uint64_t laddr;
315 	uint32_t buflen, iolen, miscompare_off;
316 	int ndx;
317 	sbd_status_t ret;
318 
319 	if (ATOMIC8_GET(scmd->nbufs) > 0) {
320 		atomic_dec_8(&scmd->nbufs);
321 	}
322 
323 	if (dbuf->db_xfer_status != STMF_SUCCESS) {
324 		sbd_ats_remove_by_task(task);
325 		sbd_ats_release_resources(task);
326 		stmf_abort(STMF_QUEUE_TASK_ABORT, task,
327 		    dbuf->db_xfer_status, NULL);
328 		return;
329 	}
330 
331 	if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
332 		goto ATS_XFER_DONE;
333 	}
334 
335 	/* if state is confused drop the command */
336 	if ((scmd->trans_data == NULL) ||
337 	    ((scmd->flags & SBD_SCSI_CMD_TRANS_DATA) == 0) ||
338 	    ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0)) {
339 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
340 		return;
341 	}
342 
343 	if (ATOMIC32_GET(scmd->len) != 0) {
344 		/*
345 		 * Initiate the next port xfer to occur in parallel
346 		 * with writing this buf.  A side effect of sbd_do_ats_xfer is
347 		 * it may set scmd_len to 0.  This means all the data
348 		 * transfers have been started, not that they are done.
349 		 */
350 		sbd_do_ats_xfer(task, scmd, NULL, 0);
351 	}
352 
353 	/*
354 	 * move the most recent data transfer to the temporary buffer
355 	 * used for the compare and write function.
356 	 */
357 	laddr = dbuf->db_relative_offset;
358 	for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
359 	    (ndx < dbuf->db_sglist_length); ndx++) {
360 		iolen = min(dbuf->db_data_size - buflen,
361 		    dbuf->db_sglist[ndx].seg_length);
362 		if (iolen == 0)
363 			break;
364 		bcopy(dbuf->db_sglist[ndx].seg_addr, &scmd->trans_data[laddr],
365 		    iolen);
366 		buflen += iolen;
367 		laddr += (uint64_t)iolen;
368 	}
369 	task->task_nbytes_transferred += buflen;
370 
371 ATS_XFER_DONE:
372 	if (ATOMIC32_GET(scmd->len) == 0 ||
373 	    scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
374 		stmf_free_dbuf(task, dbuf);
375 		/*
376 		 * if this is not the last buffer to be transfered then exit
377 		 * and wait for the next buffer.  Once nbufs is 0 then all the
378 		 * data has arrived and the compare can be done.
379 		 */
380 		if (ATOMIC8_GET(scmd->nbufs) > 0)
381 			return;
382 		scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
383 		if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
384 			sbd_ats_remove_by_task(task);
385 			stmf_scsilib_send_status(task, STATUS_CHECK,
386 			    STMF_SAA_WRITE_ERROR);
387 		} else {
388 			ret = sbd_compare_and_write(task, scmd,
389 			    &miscompare_off);
390 			sbd_ats_remove_by_task(task);
391 			sbd_ats_release_resources(task);
392 			if (ret != SBD_SUCCESS) {
393 				if (ret != SBD_COMPARE_FAILED) {
394 					stmf_scsilib_send_status(task,
395 					    STATUS_CHECK, STMF_SAA_WRITE_ERROR);
396 				} else {
397 					sbd_send_miscompare_status(task,
398 					    miscompare_off);
399 				}
400 			} else {
401 				stmf_scsilib_send_status(task, STATUS_GOOD, 0);
402 			}
403 		}
404 		return;
405 	}
406 	sbd_do_ats_xfer(task, scmd, dbuf, dbuf_reusable);
407 }
408 
409 void
sbd_do_ats_xfer(struct scsi_task * task,sbd_cmd_t * scmd,struct stmf_data_buf * dbuf,uint8_t dbuf_reusable)410 sbd_do_ats_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
411     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
412 {
413 	uint32_t len;
414 
415 	if (ATOMIC32_GET(scmd->len) == 0) {
416 		if (dbuf != NULL) {
417 			stmf_free_dbuf(task, dbuf);
418 		}
419 		return;
420 	}
421 
422 	if ((dbuf != NULL) &&
423 	    ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
424 		/* free current dbuf and allocate a new one */
425 		stmf_free_dbuf(task, dbuf);
426 		dbuf = NULL;
427 	}
428 	if (dbuf == NULL) {
429 		uint32_t maxsize, minsize, old_minsize;
430 
431 		maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128*1024 :
432 		    ATOMIC32_GET(scmd->len);
433 		minsize = maxsize >> 2;
434 		do {
435 			old_minsize = minsize;
436 			dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
437 		} while ((dbuf == NULL) && (old_minsize > minsize) &&
438 		    (minsize >= 512));
439 		if (dbuf == NULL) {
440 			if (ATOMIC8_GET(scmd->nbufs) == 0) {
441 				sbd_ats_remove_by_task(task);
442 				sbd_ats_release_resources(task);
443 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
444 				    STMF_ALLOC_FAILURE, NULL);
445 			}
446 			return;
447 		}
448 	}
449 
450 	len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ? dbuf->db_buf_size :
451 	    ATOMIC32_GET(scmd->len);
452 
453 	dbuf->db_relative_offset = scmd->current_ro;
454 	dbuf->db_data_size = len;
455 	dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
456 	(void) stmf_xfer_data(task, dbuf, 0);
457 	/*
458 	 * scmd->nbufs is the outstanding transfers
459 	 * scmd->len is the number of bytes that are remaing for requests
460 	 */
461 	atomic_inc_8(&scmd->nbufs);
462 	atomic_add_32(&scmd->len, -len);
463 	scmd->current_ro += len;
464 }
465 
466 void
sbd_handle_ats(scsi_task_t * task,struct stmf_data_buf * initial_dbuf)467 sbd_handle_ats(scsi_task_t *task, struct stmf_data_buf *initial_dbuf)
468 {
469 	sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
470 	uint64_t addr, len;
471 	sbd_cmd_t *scmd;
472 	stmf_data_buf_t *dbuf;
473 	uint8_t do_immediate_data = 0;
474 	/* int ret; */
475 
476 	if (HardwareAcceleratedLocking == 0) {
477 		stmf_scsilib_send_status(task, STATUS_CHECK,
478 		    STMF_SAA_INVALID_OPCODE);
479 		return;
480 	}
481 
482 	task->task_cmd_xfer_length = 0;
483 	if (task->task_additional_flags &
484 	    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
485 		task->task_expected_xfer_length = 0;
486 	}
487 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
488 		stmf_scsilib_send_status(task, STATUS_CHECK,
489 		    STMF_SAA_WRITE_PROTECTED);
490 		return;
491 	}
492 	addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
493 	len = (uint64_t)task->task_cdb[13];
494 
495 	if ((task->task_cdb[1]) || (len > SBD_ATS_MAX_NBLKS)) {
496 		stmf_scsilib_send_status(task, STATUS_CHECK,
497 		    STMF_SAA_INVALID_FIELD_IN_CDB);
498 		return;
499 	}
500 	if (len == 0) {
501 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
502 		return;
503 	}
504 
505 	/*
506 	 * This can be called again. It will return the same handle again.
507 	 */
508 	if (sbd_ats_handling_before_io(task, sl, addr, len) != SBD_SUCCESS) {
509 		if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS) {
510 			stmf_scsilib_send_status(task, STATUS_BUSY, 0);
511 		}
512 		return;
513 	}
514 
515 	addr <<= sl->sl_data_blocksize_shift;
516 	len <<= sl->sl_data_blocksize_shift;
517 
518 	task->task_cmd_xfer_length = len << 1;	/* actual amt of data is 2x */
519 	if (task->task_additional_flags &
520 	    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
521 		task->task_expected_xfer_length = task->task_cmd_xfer_length;
522 	}
523 	if ((addr + len) > sl->sl_lu_size) {
524 		sbd_ats_remove_by_task(task);
525 		stmf_scsilib_send_status(task, STATUS_CHECK,
526 		    STMF_SAA_LBA_OUT_OF_RANGE);
527 		return;
528 	}
529 
530 	len <<= 1;
531 
532 	if (len != task->task_expected_xfer_length) {
533 		sbd_ats_remove_by_task(task);
534 		stmf_scsilib_send_status(task, STATUS_CHECK,
535 		    STMF_SAA_INVALID_FIELD_IN_CDB);
536 		return;
537 	}
538 
539 	if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
540 		if (initial_dbuf->db_data_size > len) {
541 			if (initial_dbuf->db_data_size >
542 			    task->task_expected_xfer_length) {
543 				/* protocol error */
544 				sbd_ats_remove_by_task(task);
545 				stmf_abort(STMF_QUEUE_TASK_ABORT, task,
546 				    STMF_INVALID_ARG, NULL);
547 				return;
548 			}
549 			ASSERT(len <= 0xFFFFFFFFull);
550 			initial_dbuf->db_data_size = (uint32_t)len;
551 		}
552 		do_immediate_data = 1;
553 	}
554 	dbuf = initial_dbuf;
555 
556 	if (task->task_lu_private) {
557 		scmd = (sbd_cmd_t *)task->task_lu_private;
558 	} else {
559 		scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
560 		task->task_lu_private = scmd;
561 	}
562 
563 	/* We dont set the ATS_RELATED flag here */
564 	scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_TRANS_DATA;
565 	scmd->cmd_type = SBD_CMD_SCSI_WRITE;
566 	scmd->nbufs = 0;
567 	ASSERT(len <= 0xFFFFFFFFull);
568 	scmd->len = (uint32_t)len;
569 	scmd->trans_data_len = (uint32_t)len;
570 	scmd->trans_data = kmem_alloc((size_t)len, KM_SLEEP);
571 	scmd->current_ro = 0;
572 
573 	if (do_immediate_data) {
574 		/*
575 		 * Account for data passed in this write command
576 		 */
577 		(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
578 		atomic_add_32(&scmd->len, -dbuf->db_data_size);
579 		scmd->current_ro += dbuf->db_data_size;
580 		dbuf->db_xfer_status = STMF_SUCCESS;
581 		sbd_handle_ats_xfer_completion(task, scmd, dbuf, 0);
582 	} else {
583 		sbd_do_ats_xfer(task, scmd, dbuf, 0);
584 	}
585 }
586 
587 /*
588  * SCSI Copy Manager
589  *
590  * SCSI copy manager is the state machine which implements
591  * SCSI extended copy functionality (SPC). There is one
592  * cpmgr instance per extended copy command.
593  *
594  * Exported block-copy functions:
595  *   cpmgr_create()  - Creates the state machine.
596  *   cpmgr_destroy() - Cleans up a completed cpmgr.
597  *   cpmgr_run()     - Performs time bound copy.
598  *   cpmgr_abort()   - Aborts a cpmgr(if not already completed).
599  *   cpmgr_done()    - Tests if the copy is done.
600  */
601 
602 static void cpmgr_completion_cleanup(cpmgr_t *cm);
603 int sbd_check_reservation_conflict(sbd_lu_t *sl, scsi_task_t *task);
604 
605 static uint8_t sbd_recv_copy_results_op_params[] = {
606     0, 0, 0, 42, 1, 0, 0, 0,
607     0, 2, 0, 1, 0, 0, 0xFF, 0xFF, 0, 0, 0, 0,
608     0, 0, 0, 0, 0, 0, 0, 0,
609     0, 0, 0, 0, 0, 0,
610     0xFF, 0xFF, 0, 9, 0, 0, 0, 0, 0,
611     2, 2, 0xE4
612 };
613 
614 cpmgr_handle_t
cpmgr_create(scsi_task_t * task,uint8_t * params)615 cpmgr_create(scsi_task_t *task, uint8_t *params)
616 {
617 	cpmgr_t *cm = NULL;
618 	uint8_t *p;
619 	uint32_t plist_len;
620 	uint32_t dbl;
621 	int i;
622 	uint16_t tdlen;
623 	uint16_t n;
624 
625 	cm = kmem_zalloc(sizeof (*cm), KM_NOSLEEP);
626 	if (cm == NULL)
627 		return (CPMGR_INVALID_HANDLE);
628 
629 	cm->cm_task = task;
630 	p = task->task_cdb;
631 	plist_len = READ_SCSI32(&p[10], uint32_t);
632 
633 	/*
634 	 * In case of error. Otherwise we will change this to CM_COPYING.
635 	 */
636 	cm->cm_state = CM_COMPLETE;
637 
638 	if (plist_len == 0) {
639 		cm->cm_status = 0;
640 		goto cpmgr_create_done;
641 	}
642 
643 	if (plist_len < CPMGR_PARAM_HDR_LEN) {
644 		cm->cm_status = CPMGR_PARAM_LIST_LEN_ERROR;
645 		goto cpmgr_create_done;
646 	} else if ((params[0] != 0) || ((params[1] & 0x18) != 0x18)) {
647 		/*
648 		 * Current implementation does not allow the use
649 		 * of list ID field.
650 		 */
651 		cm->cm_status = CPMGR_INVALID_FIELD_IN_PARAM_LIST;
652 		goto cpmgr_create_done;
653 	}
654 	/* No inline data either */
655 	if (*((uint32_t *)(&params[12])) != 0) {
656 		cm->cm_status = CPMGR_INVALID_FIELD_IN_PARAM_LIST;
657 		goto cpmgr_create_done;
658 	}
659 
660 	tdlen = READ_SCSI16(&params[2], uint16_t);
661 	if ((tdlen == 0) || (tdlen % CPMGR_TARGET_DESCRIPTOR_SIZE) ||
662 	    (plist_len < (CPMGR_PARAM_HDR_LEN + tdlen))) {
663 		cm->cm_status = CPMGR_PARAM_LIST_LEN_ERROR;
664 		goto cpmgr_create_done;
665 	}
666 	cm->cm_td_count = tdlen / CPMGR_TARGET_DESCRIPTOR_SIZE;
667 	if (cm->cm_td_count > CPMGR_MAX_TARGET_DESCRIPTORS) {
668 		cm->cm_status = CPMGR_TOO_MANY_TARGET_DESCRIPTORS;
669 		goto cpmgr_create_done;
670 	}
671 	if (plist_len != (CPMGR_PARAM_HDR_LEN + tdlen +
672 	    CPMGR_B2B_SEGMENT_DESCRIPTOR_SIZE)) {
673 		cm->cm_status = CPMGR_PARAM_LIST_LEN_ERROR;
674 		goto cpmgr_create_done;
675 	}
676 	for (i = 0; i < cm->cm_td_count; i++) {
677 		p = params + CPMGR_PARAM_HDR_LEN;
678 		p += i * CPMGR_TARGET_DESCRIPTOR_SIZE;
679 		if ((p[0] != CPMGR_IDENT_TARGET_DESCRIPTOR) ||
680 		    ((p[5] & 0x30) != 0) || (p[7] != 16)) {
681 			cm->cm_status = CPMGR_UNSUPPORTED_TARGET_DESCRIPTOR;
682 			goto cpmgr_create_done;
683 		}
684 		/*
685 		 * stmf should be able to find this LU and lock it. Also
686 		 * make sure that is indeed a sbd lu.
687 		 */
688 		if (((cm->cm_tds[i].td_lu =
689 		    stmf_check_and_hold_lu(task, &p[8])) == NULL) ||
690 		    (!sbd_is_valid_lu(cm->cm_tds[i].td_lu))) {
691 			cm->cm_status = CPMGR_COPY_TARGET_NOT_REACHABLE;
692 			goto cpmgr_create_done;
693 		}
694 		dbl = p[29];
695 		dbl <<= 8;
696 		dbl |= p[30];
697 		dbl <<= 8;
698 		dbl |= p[31];
699 		cm->cm_tds[i].td_disk_block_len = dbl;
700 		cm->cm_tds[i].td_lbasize_shift =
701 		    sbd_get_lbasize_shift(cm->cm_tds[i].td_lu);
702 	}
703 	/* p now points to segment descriptor */
704 	p += CPMGR_TARGET_DESCRIPTOR_SIZE;
705 
706 	if (p[0] != CPMGR_B2B_SEGMENT_DESCRIPTOR) {
707 		cm->cm_status = CPMGR_UNSUPPORTED_SEGMENT_DESCRIPTOR;
708 		goto cpmgr_create_done;
709 	}
710 	n = READ_SCSI16(&p[2], uint16_t);
711 	if (n != (CPMGR_B2B_SEGMENT_DESCRIPTOR_SIZE - 4)) {
712 		cm->cm_status = CPMGR_INVALID_FIELD_IN_PARAM_LIST;
713 		goto cpmgr_create_done;
714 	}
715 
716 	n = READ_SCSI16(&p[4], uint16_t);
717 	if (n >= cm->cm_td_count) {
718 		cm->cm_status = CPMGR_INVALID_FIELD_IN_PARAM_LIST;
719 		goto cpmgr_create_done;
720 	}
721 	cm->cm_src_td_ndx = n;
722 
723 	n = READ_SCSI16(&p[6], uint16_t);
724 	if (n >= cm->cm_td_count) {
725 		cm->cm_status = CPMGR_INVALID_FIELD_IN_PARAM_LIST;
726 		goto cpmgr_create_done;
727 	}
728 	cm->cm_dst_td_ndx = n;
729 
730 	cm->cm_copy_size = READ_SCSI16(&p[10], uint64_t);
731 	cm->cm_copy_size *= (uint64_t)(cm->cm_tds[(p[1] & 2) ?
732 	    cm->cm_dst_td_ndx : cm->cm_src_td_ndx].td_disk_block_len);
733 	cm->cm_src_offset = (READ_SCSI64(&p[12], uint64_t)) <<
734 	    cm->cm_tds[cm->cm_src_td_ndx].td_lbasize_shift;
735 	cm->cm_dst_offset = (READ_SCSI64(&p[20], uint64_t)) <<
736 	    cm->cm_tds[cm->cm_dst_td_ndx].td_lbasize_shift;
737 
738 	/* Allocate the xfer buffer. */
739 	cm->cm_xfer_buf = kmem_alloc(CPMGR_XFER_BUF_SIZE, KM_NOSLEEP);
740 	if (cm->cm_xfer_buf == NULL) {
741 		cm->cm_status = CPMGR_INSUFFICIENT_RESOURCES;
742 		goto cpmgr_create_done;
743 	}
744 
745 	/*
746 	 * No need to check block limits. cpmgr_run() will
747 	 * take care of that.
748 	 */
749 
750 	/* All checks passed */
751 	cm->cm_state = CM_COPYING;
752 
753 cpmgr_create_done:
754 	if (cm->cm_state == CM_COMPLETE) {
755 		cpmgr_completion_cleanup(cm);
756 	}
757 	return (cm);
758 }
759 
760 void
cpmgr_destroy(cpmgr_handle_t h)761 cpmgr_destroy(cpmgr_handle_t h)
762 {
763 	cpmgr_t *cm = (cpmgr_t *)h;
764 
765 	ASSERT(cm->cm_state == CM_COMPLETE);
766 	kmem_free(cm, sizeof (*cm));
767 }
768 
769 static void
cpmgr_completion_cleanup(cpmgr_t * cm)770 cpmgr_completion_cleanup(cpmgr_t *cm)
771 {
772 	int i;
773 
774 	for (i = 0; i < cm->cm_td_count; i++) {
775 		if (cm->cm_tds[i].td_lu) {
776 			stmf_release_lu(cm->cm_tds[i].td_lu);
777 			cm->cm_tds[i].td_lu = NULL;
778 		}
779 	}
780 	if (cm->cm_xfer_buf) {
781 		kmem_free(cm->cm_xfer_buf, CPMGR_XFER_BUF_SIZE);
782 		cm->cm_xfer_buf = NULL;
783 	}
784 }
785 
786 void
cpmgr_run(cpmgr_t * cm,clock_t preemption_point)787 cpmgr_run(cpmgr_t *cm, clock_t preemption_point)
788 {
789 	stmf_lu_t *lu;
790 	sbd_lu_t *src_slu, *dst_slu;
791 	uint64_t xfer_size, start, end;
792 	sbd_status_t ret;
793 
794 	/*
795 	 * XXX: Handle reservations and read-only LU here.
796 	 */
797 	ASSERT(cm->cm_state == CM_COPYING);
798 	lu = cm->cm_tds[cm->cm_src_td_ndx].td_lu;
799 	src_slu = (sbd_lu_t *)lu->lu_provider_private;
800 	if (sbd_check_reservation_conflict(src_slu, cm->cm_task)) {
801 		cpmgr_abort(cm, CPMGR_RESERVATION_CONFLICT);
802 		return;
803 	}
804 
805 	lu = cm->cm_tds[cm->cm_dst_td_ndx].td_lu;
806 	dst_slu = (sbd_lu_t *)lu->lu_provider_private;
807 	if (sbd_check_reservation_conflict(dst_slu, cm->cm_task)) {
808 		cpmgr_abort(cm, CPMGR_RESERVATION_CONFLICT);
809 		return;
810 	}
811 	if (dst_slu->sl_flags & SL_WRITE_PROTECTED) {
812 		cpmgr_abort(cm, STMF_SAA_WRITE_PROTECTED);
813 		return;
814 	}
815 
816 	while (cm->cm_size_done < cm->cm_copy_size) {
817 		xfer_size = ((cm->cm_copy_size - cm->cm_size_done) >
818 		    CPMGR_XFER_BUF_SIZE) ? CPMGR_XFER_BUF_SIZE :
819 		    (cm->cm_copy_size - cm->cm_size_done);
820 		start = cm->cm_src_offset + cm->cm_size_done;
821 		ret = sbd_data_read(src_slu, cm->cm_task, start, xfer_size,
822 		    cm->cm_xfer_buf);
823 		if (ret != SBD_SUCCESS) {
824 			if (ret == SBD_IO_PAST_EOF) {
825 				cpmgr_abort(cm, CPMGR_LBA_OUT_OF_RANGE);
826 			} else {
827 				cpmgr_abort(cm,
828 				    CPMGR_THIRD_PARTY_DEVICE_FAILURE);
829 			}
830 			break;
831 		}
832 		end = cm->cm_dst_offset + cm->cm_size_done;
833 		ret = sbd_data_write(dst_slu, cm->cm_task, end, xfer_size,
834 		    cm->cm_xfer_buf);
835 		if (ret != SBD_SUCCESS) {
836 			if (ret == SBD_IO_PAST_EOF) {
837 				cpmgr_abort(cm, CPMGR_LBA_OUT_OF_RANGE);
838 			} else {
839 				cpmgr_abort(cm,
840 				    CPMGR_THIRD_PARTY_DEVICE_FAILURE);
841 			}
842 			break;
843 		}
844 		cm->cm_size_done += xfer_size;
845 		if (ddi_get_lbolt() >= preemption_point)
846 			break;
847 	}
848 	if (cm->cm_size_done == cm->cm_copy_size) {
849 		cm->cm_state = CM_COMPLETE;
850 		cm->cm_status = 0;
851 		cpmgr_completion_cleanup(cm);
852 	}
853 }
854 
855 void
cpmgr_abort(cpmgr_t * cm,uint32_t s)856 cpmgr_abort(cpmgr_t *cm, uint32_t s)
857 {
858 	if (cm->cm_state == CM_COPYING) {
859 		cm->cm_state = CM_COMPLETE;
860 		cm->cm_status = s;
861 		cpmgr_completion_cleanup(cm);
862 	}
863 }
864 
865 void
sbd_handle_xcopy(scsi_task_t * task,stmf_data_buf_t * dbuf)866 sbd_handle_xcopy(scsi_task_t *task, stmf_data_buf_t *dbuf)
867 {
868 	uint32_t cmd_xfer_len;
869 
870 	if (HardwareAcceleratedMove == 0) {
871 		stmf_scsilib_send_status(task, STATUS_CHECK,
872 		    STMF_SAA_INVALID_OPCODE);
873 		return;
874 	}
875 
876 	cmd_xfer_len = READ_SCSI32(&task->task_cdb[10], uint32_t);
877 
878 	if (cmd_xfer_len == 0) {
879 		task->task_cmd_xfer_length = 0;
880 		if (task->task_additional_flags &
881 		    TASK_AF_NO_EXPECTED_XFER_LENGTH) {
882 			task->task_expected_xfer_length = 0;
883 		}
884 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
885 		return;
886 	}
887 
888 	sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
889 }
890 
891 void
sbd_handle_xcopy_xfer(scsi_task_t * task,uint8_t * buf)892 sbd_handle_xcopy_xfer(scsi_task_t *task, uint8_t *buf)
893 {
894 	cpmgr_handle_t h;
895 	uint32_t s;
896 	clock_t tic, end;
897 
898 	/*
899 	 * No need to pass buf size. Its taken from cdb.
900 	 */
901 	h = cpmgr_create(task, buf);
902 	if (h == CPMGR_INVALID_HANDLE) {
903 		stmf_scsilib_send_status(task, STATUS_CHECK,
904 		    CPMGR_INSUFFICIENT_RESOURCES);
905 		return;
906 	}
907 	tic = drv_usectohz(1000000);
908 	end = ddi_get_lbolt() + (CPMGR_DEFAULT_TIMEOUT * tic);
909 	while (!cpmgr_done(h)) {
910 		if (stmf_is_task_being_aborted(task) || (ddi_get_lbolt() > end))
911 			cpmgr_abort(h, CPMGR_THIRD_PARTY_DEVICE_FAILURE);
912 		else
913 			cpmgr_run(h, ddi_get_lbolt() + tic);
914 	}
915 	s = cpmgr_status(h);
916 	if (s) {
917 		if (s == CPMGR_RESERVATION_CONFLICT) {
918 			stmf_scsilib_send_status(task,
919 			    STATUS_RESERVATION_CONFLICT, 0);
920 		} else {
921 			stmf_scsilib_send_status(task, STATUS_CHECK, s);
922 		}
923 	} else {
924 		stmf_scsilib_send_status(task, STATUS_GOOD, 0);
925 	}
926 	cpmgr_destroy(h);
927 }
928 
929 void
sbd_handle_recv_copy_results(struct scsi_task * task,struct stmf_data_buf * initial_dbuf)930 sbd_handle_recv_copy_results(struct scsi_task *task,
931     struct stmf_data_buf *initial_dbuf)
932 {
933 	uint32_t cdb_len;
934 
935 	cdb_len = READ_SCSI32(&task->task_cdb[10], uint32_t);
936 	if ((task->task_cdb[1] & 0x1F) != 3) {
937 		stmf_scsilib_send_status(task, STATUS_CHECK,
938 		    STMF_SAA_INVALID_FIELD_IN_CDB);
939 		return;
940 	}
941 	sbd_handle_short_read_transfers(task, initial_dbuf,
942 	    sbd_recv_copy_results_op_params, cdb_len,
943 	    sizeof (sbd_recv_copy_results_op_params));
944 }
945