xref: /illumos-gate/usr/src/cmd/bhyve/common/pci_virtio_scsi_uscsi.c (revision 5f016a21b06f5c6d125fdb35da2350c7d582f5ad)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2016 Jakub Klama <jceel@FreeBSD.org>.
5  * Copyright (c) 2018 Marcelo Araujo <araujo@FreeBSD.org>.
6  * Copyright (c) 2026 Hans Rosenfeld
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer
14  *    in this position and unchanged.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/param.h>
33 #include <sys/linker_set.h>
34 #include <sys/types.h>
35 #include <sys/uio.h>
36 #include <sys/time.h>
37 #include <sys/queue.h>
38 
39 #include <alloca.h>
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <stdbool.h>
45 #include <string.h>
46 #include <unistd.h>
47 #include <assert.h>
48 #include <pthread.h>
49 #include <pthread_np.h>
50 
51 #include <scsi/libscsi.h>
52 #include <sys/scsi/generic/commands.h>
53 #include <sys/scsi/generic/status.h>
54 #include <sys/scsi/impl/uscsi.h>
55 
56 #include "bhyverun.h"
57 #include "config.h"
58 #include "debug.h"
59 #include "pci_emul.h"
60 #include "virtio.h"
61 #include "iov.h"
62 #include "privileges.h"
63 #include "pci_virtio_scsi.h"
64 
65 struct vtscsi_uscsi_backend {
66 	struct pci_vtscsi_backend	vub_backend;
67 	libscsi_hdl_t			*vub_scsi_hdl;
68 };
69 
70 static int vtscsi_uscsi_init(struct pci_vtscsi_softc *,
71     struct pci_vtscsi_backend *, nvlist_t *);
72 static int vtscsi_uscsi_open(struct pci_vtscsi_softc *, const char *, long);
73 static void vtscsi_uscsi_reset(struct pci_vtscsi_softc *);
74 
75 static void *vtscsi_uscsi_req_alloc(struct pci_vtscsi_softc *);
76 static void vtscsi_uscsi_req_clear(void  *);
77 static void vtscsi_uscsi_req_free(void *);
78 
79 static void vtscsi_uscsi_tmf_hdl(struct pci_vtscsi_softc *, int,
80     struct pci_vtscsi_ctrl_tmf *);
81 static void vtscsi_uscsi_an_hdl(struct pci_vtscsi_softc *, int,
82     struct pci_vtscsi_ctrl_an *);
83 static int vtscsi_uscsi_req_hdl(struct pci_vtscsi_softc *, int,
84     struct pci_vtscsi_request *);
85 
86 static void vtscsi_uscsi_make_check_condition(struct uscsi_cmd *, char, char,
87     char);
88 static void vtscsi_uscsi_filter_post_report_luns(struct uscsi_cmd *);
89 static void vtscsi_uscsi_filter_post(struct uscsi_cmd *);
90 
91 
92 static int
vtscsi_uscsi_init(struct pci_vtscsi_softc * sc,struct pci_vtscsi_backend * backend,nvlist_t * nvl __unused)93 vtscsi_uscsi_init(struct pci_vtscsi_softc *sc,
94     struct pci_vtscsi_backend *backend, nvlist_t *nvl __unused)
95 {
96 	struct vtscsi_uscsi_backend *uscsi_backend;
97 	libscsi_errno_t serr;
98 
99 	uscsi_backend = calloc(1, sizeof (struct vtscsi_uscsi_backend));
100 	if (uscsi_backend == NULL) {
101 		EPRINTLN("failed to allocate backend data: %s",
102 		    strerror(errno));
103 		return (-1);
104 	}
105 
106 	uscsi_backend->vub_backend = *backend;
107 
108 	uscsi_backend->vub_scsi_hdl = libscsi_init(LIBSCSI_VERSION, &serr);
109 	if (uscsi_backend->vub_scsi_hdl == NULL) {
110 		EPRINTLN("failed to initialize libscsi: %s",
111 		    libscsi_strerror(serr));
112 		free(uscsi_backend);
113 		return (-1);
114 	}
115 
116 	sc->vss_backend = &uscsi_backend->vub_backend;
117 
118 	return (0);
119 }
120 
121 static int
vtscsi_uscsi_open(struct pci_vtscsi_softc * sc,const char * path,long target)122 vtscsi_uscsi_open(struct pci_vtscsi_softc *sc, const char *path, long target)
123 {
124 	struct pci_vtscsi_target *tgt = &sc->vss_targets[target];
125 	uscsi_xfer_t maxxfer = 0;
126 
127 	/*
128 	 * Most SCSI target drivers require the SYS_DEVICES privilege to send
129 	 * USCSI commands.
130 	 */
131 	illumos_priv_add_min(PRIV_SYS_DEVICES, "scsi");
132 
133 	/*
134 	 * Open the target.
135 	 */
136 	tgt->vst_fd = open(path, O_RDWR);
137 	if (tgt->vst_fd < 0)
138 		return (-1);
139 
140 	/*
141 	 * Get the maximum transfer size of the backend device.
142 	 */
143 	if (ioctl(tgt->vst_fd, USCSIMAXXFER, &maxxfer) < 0) {
144 		int errno_save = errno;
145 
146 		if (errno == ENOTTY) {
147 			/*
148 			 * The underlying device doesn't support this ioctl.
149 			 * Limit max_sectors to 128MB, which is as good as
150 			 * any other assumption.
151 			 */
152 			tgt->vst_max_sectors = 128 << (20 - 9);
153 			return (0);
154 		}
155 
156 		WPRINTF("USCSIMAXXFER: unexpected error: errno=%d (%s)",
157 		    strerrorname_np(errno), strerror(errno));
158 		(void) close(tgt->vst_fd);
159 		tgt->vst_fd = -1;
160 		errno = errno_save;
161 		return (-1);
162 	}
163 
164 	/*
165 	 * Even though the virtio spec isn't particularly verbose about what
166 	 * "max_sectors" actually means and what size a sector is, Linux seems
167 	 * to treat it as a number of 512b sectors.
168 	 *
169 	 * In any case, we need to limit maxxfer such that it fits into a signed
170 	 * 32bit int.
171 	 */
172 	if (maxxfer > INT32_MAX)
173 		maxxfer = INT32_MAX;
174 
175 	tgt->vst_max_sectors = maxxfer >> 9;
176 
177 	return (0);
178 }
179 
180 static void
vtscsi_uscsi_reset(struct pci_vtscsi_softc * sc)181 vtscsi_uscsi_reset(struct pci_vtscsi_softc *sc)
182 {
183 	size_t i;
184 
185 	sc->vss_config.max_sectors = INT32_MAX;
186 
187 	/*
188 	 * As we may be configured to use a variety of differing backend devices
189 	 * with varying maximum transfer sizes but virtio-scsi supports only one
190 	 * max_sectors limit per instance, we'll use the smallest maximum
191 	 * transfer size found.
192 	 */
193 	for (i = 0; i < sc->vss_num_target; i++) {
194 		struct pci_vtscsi_target *tgt = &sc->vss_targets[i];
195 
196 		if (tgt->vst_max_sectors < sc->vss_config.max_sectors)
197 			sc->vss_config.max_sectors = tgt->vst_max_sectors;
198 	}
199 }
200 
201 static void *
vtscsi_uscsi_req_alloc(struct pci_vtscsi_softc * sc)202 vtscsi_uscsi_req_alloc(struct pci_vtscsi_softc *sc)
203 {
204 	return (calloc(1, sizeof (struct uscsi_cmd)));
205 }
206 
207 static void
vtscsi_uscsi_req_clear(void * io)208 vtscsi_uscsi_req_clear(void *io)
209 {
210 	bzero(io, sizeof (struct uscsi_cmd));
211 }
212 
213 static void
vtscsi_uscsi_req_free(void * io)214 vtscsi_uscsi_req_free(void *io)
215 {
216 	free(io);
217 }
218 
219 static void
vtscsi_uscsi_tmf_hdl(struct pci_vtscsi_softc * sc __unused,int fd,struct pci_vtscsi_ctrl_tmf * tmf)220 vtscsi_uscsi_tmf_hdl(struct pci_vtscsi_softc *sc __unused, int fd,
221     struct pci_vtscsi_ctrl_tmf *tmf)
222 {
223 	struct uscsi_cmd cmd;
224 	int err;
225 
226 	/* We currently support only LUN 0. */
227 	if (pci_vtscsi_get_lun(sc, tmf->lun) != 0) {
228 		tmf->response = VIRTIO_SCSI_S_BAD_TARGET;
229 		return;
230 	}
231 
232 	tmf->response = VIRTIO_SCSI_S_FUNCTION_COMPLETE;
233 
234 	memset(&cmd, 0, sizeof (cmd));
235 	cmd.uscsi_status = -1;
236 	cmd.uscsi_flags = USCSI_DIAGNOSE | USCSI_SILENT;
237 
238 
239 	/* The only TMF requests that we can handle here are RESETs. */
240 	switch (tmf->subtype) {
241 	case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
242 		cmd.uscsi_flags |= USCSI_RESET_TARGET;
243 		break;
244 
245 	case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
246 		cmd.uscsi_flags |= USCSI_RESET_LUN;
247 		break;
248 
249 	default:
250 		/*
251 		 * For all other TMF requests, return FUNCTION COMPLETE as
252 		 * there is nothing we can or need to do for them.
253 		 *
254 		 * See the comments in pci_vtscsi_tmf_handle() for additional
255 		 * information on how the common code and the backend-specific
256 		 * code interact for TMF requests.
257 		 */
258 		tmf->response = VIRTIO_SCSI_S_FUNCTION_COMPLETE;
259 		return;
260 	}
261 
262 	err = ioctl(fd, USCSICMD, &cmd);
263 
264 	if (err != 0) {
265 		WPRINTF("USCSICMD: unexpected TMF error, errno=%d (%s)",
266 		    strerrorname_np(errno), strerror(errno));
267 		tmf->response = VIRTIO_SCSI_S_FAILURE;
268 	}
269 }
270 
271 static void
vtscsi_uscsi_an_hdl(struct pci_vtscsi_softc * sc __unused,int fd __unused,struct pci_vtscsi_ctrl_an * an)272 vtscsi_uscsi_an_hdl(struct pci_vtscsi_softc *sc __unused, int fd __unused,
273     struct pci_vtscsi_ctrl_an *an)
274 {
275 	/* We currently support only LUN 0. */
276 	if (pci_vtscsi_get_lun(sc, an->lun) != 0) {
277 		an->response = VIRTIO_SCSI_S_BAD_TARGET;
278 		return;
279 	}
280 
281 	an->response = VIRTIO_SCSI_S_FAILURE;
282 }
283 
284 static int
vtscsi_uscsi_req_hdl(struct pci_vtscsi_softc * sc,int fd,struct pci_vtscsi_request * req)285 vtscsi_uscsi_req_hdl(struct pci_vtscsi_softc *sc, int fd,
286     struct pci_vtscsi_request *req)
287 {
288 	struct vtscsi_uscsi_backend *uscsi =
289 	    (struct vtscsi_uscsi_backend *)sc->vss_backend;
290 	struct uscsi_cmd *cmd = req->vsr_backend;
291 	void *ext_data = NULL;
292 	ssize_t ext_data_len = 0;
293 	int nxferred = 0;
294 
295 	/* We currently support only LUN 0. */
296 	if (pci_vtscsi_get_lun(sc, req->vsr_cmd_rd->lun) != 0) {
297 		req->vsr_cmd_wr->response = VIRTIO_SCSI_S_BAD_TARGET;
298 		return (0);
299 	}
300 
301 	if (req->vsr_data_niov_in > 0) {
302 		ext_data_len = iov_to_buf(req->vsr_data_iov_in,
303 		    req->vsr_data_niov_in, &ext_data);
304 		cmd->uscsi_flags |= USCSI_WRITE;
305 	} else if (req->vsr_data_niov_out > 0) {
306 		ext_data_len = count_iov(req->vsr_data_iov_out,
307 		    req->vsr_data_niov_out);
308 		ext_data = malloc(ext_data_len);
309 		cmd->uscsi_flags |= USCSI_READ;
310 	}
311 
312 	/* Stop here if we failed to allocate ext_data. */
313 	if (ext_data == NULL && ext_data_len != 0) {
314 		WPRINTF("failed to allocate buffer for ext_data");
315 		req->vsr_cmd_wr->response = VIRTIO_SCSI_S_FAILURE;
316 		return (0);
317 	}
318 
319 	cmd->uscsi_buflen = ext_data_len;
320 	cmd->uscsi_bufaddr = ext_data;
321 
322 	cmd->uscsi_cdb = (caddr_t)req->vsr_cmd_rd->cdb;
323 	cmd->uscsi_cdblen = libscsi_cmd_cdblen(uscsi->vub_scsi_hdl,
324 	    req->vsr_cmd_rd->cdb[0]);
325 
326 	cmd->uscsi_status = -1;
327 
328 	/*
329 	 * We set an unreasonably large timeout here. The virtio spec doesn't
330 	 * provide a way for the guest driver to pass a I/O timeout value to
331 	 * the device, but if our timeout here is larger than any timeout the
332 	 * guest uses, we can expect them to abort the command before we would.
333 	 *
334 	 * INT16_MAX corresponds to a bit over 9 hours, which should be enough.
335 	 */
336 	cmd->uscsi_timeout = INT16_MAX;
337 	cmd->uscsi_flags |= USCSI_DIAGNOSE;
338 	cmd->uscsi_rqlen = sc->vss_config.sense_size;
339 	cmd->uscsi_rqbuf = (caddr_t)req->vsr_cmd_wr->sense;
340 	cmd->uscsi_flags |= USCSI_RQENABLE;
341 
342 	switch (req->vsr_cmd_rd->task_attr) {
343 	case VIRTIO_SCSI_S_ORDERED:
344 		cmd->uscsi_flags |= USCSI_OTAG;
345 		break;
346 	case VIRTIO_SCSI_S_HEAD:
347 		cmd->uscsi_flags |= USCSI_HEAD|USCSI_HTAG;
348 		break;
349 	case VIRTIO_SCSI_S_SIMPLE:
350 		break;
351 
352 	case VIRTIO_SCSI_S_ACA:
353 		/*
354 		 * I haven't found any indication in our code that would
355 		 * suggest that we support ACA in any way in illumos. In
356 		 * fact, scsi_transport() asserts that NACA isn't set in
357 		 * a packet, and scsi_uscsi_pktinit() warns about it and
358 		 * clears the flag if found set. There's a tunable to
359 		 * override that behaviour (scsi_pkt_allow_naca), but there
360 		 * really seems to be no code properly handling ACA or
361 		 * setting the ACA flag.
362 		 *
363 		 * I guess this makes sense since we're doing ARQ anyway,
364 		 * so let's just pretend no target is ever in ACA state
365 		 * and thus no packet will ever require this.
366 		 */
367 	default:
368 		WPRINTF("USCSICMD: unexpected task attr in request: 0x%x",
369 		    req->vsr_cmd_rd->task_attr);
370 		req->vsr_cmd_wr->response = VIRTIO_SCSI_S_FAILURE;
371 		return (0);
372 	}
373 
374 	errno = 0;
375 	(void) ioctl(fd, USCSICMD, cmd);
376 
377 	switch (errno) {
378 	case EIO:
379 		/*
380 		 * EIO may indicate that a SCSI error occured. If that's the
381 		 * case, uscsi_status should have been set to a valid value,
382 		 * and we want to continue to process the request normally.
383 		 */
384 		if (cmd->uscsi_status == -1) {
385 			req->vsr_cmd_wr->response = VIRTIO_SCSI_S_FAILURE;
386 			break;
387 		}
388 
389 		/*FALLTHRU*/
390 	case 0:
391 		/*
392 		 * If the command completed successfully, apply any necessary
393 		 * post-completion filtering.
394 		 */
395 		if (cmd->uscsi_status == STATUS_GOOD)
396 			vtscsi_uscsi_filter_post(cmd);
397 
398 		req->vsr_cmd_wr->sense_len =
399 		    sc->vss_config.sense_size - cmd->uscsi_rqresid;
400 		req->vsr_cmd_wr->residual = cmd->uscsi_resid;
401 		req->vsr_cmd_wr->status = cmd->uscsi_status;
402 		req->vsr_cmd_wr->response = VIRTIO_SCSI_S_OK;
403 
404 		nxferred = ext_data_len - req->vsr_cmd_wr->residual;
405 
406 		if (req->vsr_data_niov_out > 0) {
407 			(void) buf_to_iov(ext_data, nxferred,
408 			    req->vsr_data_iov_out, req->vsr_data_niov_out);
409 		}
410 		break;
411 
412 	case EAGAIN:
413 		/*
414 		 * Despite not being documented in uscsi(4I), sd(4D) returns
415 		 * this when the device is busy formatting.
416 		 */
417 		req->vsr_cmd_wr->response = VIRTIO_SCSI_S_BUSY;
418 		break;
419 
420 	case EINVAL:
421 		/*
422 		 * This may happen if packet allocation fails, which in turn
423 		 * may happen if we didn't honor USCSIMAXXFER.
424 		 */
425 		req->vsr_cmd_wr->response = VIRTIO_SCSI_S_OVERRUN;
426 		break;
427 
428 	case EFAULT:
429 		/*
430 		 * EFAULT should never happen as we never send bogus memory
431 		 * addresses in our USCSI commands.
432 		 */
433 
434 	case EPERM:
435 		/*
436 		 * EPERM should never happen as we have the SYS_DEVICES
437 		 * privilege.
438 		 */
439 
440 	default:
441 		WPRINTF("USCSICMD: unexpected I/O error: errno=%d (%s)",
442 		    strerrorname_np(errno), strerror(errno));
443 		abort();
444 	}
445 
446 	free(ext_data);
447 
448 	return (nxferred);
449 }
450 
451 /*
452  * Return a CHECK CONDITION and fill in the sense data with the given sense key,
453  * additional sense code, and additional sense qualifier.
454  */
455 static void
vtscsi_uscsi_make_check_condition(struct uscsi_cmd * cmd,char key,char asc,char qual)456 vtscsi_uscsi_make_check_condition(struct uscsi_cmd *cmd, char key, char asc,
457     char qual)
458 {
459 	cmd->uscsi_status = STATUS_CHECK;
460 	cmd->uscsi_resid = cmd->uscsi_buflen;
461 	cmd->uscsi_rqstatus = STATUS_GOOD;
462 
463 	bzero(cmd->uscsi_rqbuf, cmd->uscsi_rqlen);
464 
465 	if (cmd->uscsi_rqlen >= 1)
466 		cmd->uscsi_rqbuf[0] = 0x70;
467 	if (cmd->uscsi_rqlen >= 3)
468 		cmd->uscsi_rqbuf[2] = key;
469 	if (cmd->uscsi_rqlen >= 8)
470 		cmd->uscsi_rqbuf[7] = cmd->uscsi_rqlen - 8;
471 	if (cmd->uscsi_rqlen >= 13)
472 		cmd->uscsi_rqbuf[12] = asc;
473 	if (cmd->uscsi_rqlen >= 14)
474 		cmd->uscsi_rqbuf[13] = qual;
475 }
476 
477 /*
478  * We currently only support LUN 0. Make sure we never report anything else.
479  *
480  * We make no assumption about the buffer size. If it's large enough to hold the
481  * LUN list length, we'll set the LUN list length to 8. The resid is adjusted if
482  * the buffer size is larger than 16 bytes, which is the length needed to hold
483  * one LUN address.
484  */
485 static void
vtscsi_uscsi_filter_post_report_luns(struct uscsi_cmd * cmd)486 vtscsi_uscsi_filter_post_report_luns(struct uscsi_cmd *cmd)
487 {
488 	uint8_t report = (uint8_t)cmd->uscsi_cdb[2];
489 
490 	bzero(cmd->uscsi_bufaddr, cmd->uscsi_buflen);
491 
492 	switch (report) {
493 	case 0:
494 	case 2:
495 		/*
496 		 * We'll overwrite the output from the device to report just one
497 		 * LUN with an all-zero address:
498 		 * - LUN list length is 8
499 		 * - LUN 1 address is 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
500 		 */
501 		if (cmd->uscsi_buflen >= 4)
502 			cmd->uscsi_bufaddr[3] = 8;
503 		if (cmd->uscsi_buflen >= 16)
504 			cmd->uscsi_resid = cmd->uscsi_buflen - 16;
505 		break;
506 	case 1:
507 		/*
508 		 * We don't report any Well-Known LUNs either, because we have
509 		 * no way to address them anyway using USCSICMD.
510 		 */
511 		cmd->uscsi_resid = cmd->uscsi_buflen;
512 		break;
513 	default:
514 		/*
515 		 * All other values for "select report" are either invalid or
516 		 * vendor-specific and thus unsupported. Return the command with
517 		 * CHECK CONDITION, and fill in sense data to report a ILLEGAL
518 		 * REQUEST with INVALID FIELD IN CDB.
519 		 */
520 		vtscsi_uscsi_make_check_condition(cmd, KEY_ILLEGAL_REQUEST,
521 		    0x24, 0x00);
522 	}
523 }
524 
525 static void
vtscsi_uscsi_filter_post(struct uscsi_cmd * cmd)526 vtscsi_uscsi_filter_post(struct uscsi_cmd *cmd)
527 {
528 	switch ((uint8_t)cmd->uscsi_cdb[0]) {
529 	case SCMD_REPORT_LUNS:
530 		vtscsi_uscsi_filter_post_report_luns(cmd);
531 		break;
532 
533 	default:
534 		break;
535 	}
536 }
537 
538 static const struct pci_vtscsi_backend vtscsi_uscsi_backend = {
539 	.vsb_name = "uscsi",
540 	.vsb_init = vtscsi_uscsi_init,
541 	.vsb_open = vtscsi_uscsi_open,
542 	.vsb_reset = vtscsi_uscsi_reset,
543 
544 	.vsb_req_alloc = vtscsi_uscsi_req_alloc,
545 	.vsb_req_clear = vtscsi_uscsi_req_clear,
546 	.vsb_req_free = vtscsi_uscsi_req_free,
547 
548 	.vsb_tmf_hdl = vtscsi_uscsi_tmf_hdl,
549 	.vsb_an_hdl = vtscsi_uscsi_an_hdl,
550 	.vsb_req_hdl = vtscsi_uscsi_req_hdl
551 };
552 PCI_VTSCSI_BACKEND_SET(vtscsi_uscsi_backend);
553