1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2016 Jakub Klama <jceel@FreeBSD.org>.
5 * Copyright (c) 2018 Marcelo Araujo <araujo@FreeBSD.org>.
6 * Copyright (c) 2026 Hans Rosenfeld
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer
14 * in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/param.h>
33 #include <sys/linker_set.h>
34 #include <sys/types.h>
35 #include <sys/uio.h>
36 #include <sys/time.h>
37 #include <sys/queue.h>
38
39 #include <alloca.h>
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <stdbool.h>
45 #include <string.h>
46 #include <unistd.h>
47 #include <assert.h>
48 #include <pthread.h>
49 #include <pthread_np.h>
50
51 #include <scsi/libscsi.h>
52 #include <sys/scsi/generic/commands.h>
53 #include <sys/scsi/generic/status.h>
54 #include <sys/scsi/impl/uscsi.h>
55
56 #include "bhyverun.h"
57 #include "config.h"
58 #include "debug.h"
59 #include "pci_emul.h"
60 #include "virtio.h"
61 #include "iov.h"
62 #include "privileges.h"
63 #include "pci_virtio_scsi.h"
64
65 struct vtscsi_uscsi_backend {
66 struct pci_vtscsi_backend vub_backend;
67 libscsi_hdl_t *vub_scsi_hdl;
68 };
69
70 static int vtscsi_uscsi_init(struct pci_vtscsi_softc *,
71 struct pci_vtscsi_backend *, nvlist_t *);
72 static int vtscsi_uscsi_open(struct pci_vtscsi_softc *, const char *, long);
73 static void vtscsi_uscsi_reset(struct pci_vtscsi_softc *);
74
75 static void *vtscsi_uscsi_req_alloc(struct pci_vtscsi_softc *);
76 static void vtscsi_uscsi_req_clear(void *);
77 static void vtscsi_uscsi_req_free(void *);
78
79 static void vtscsi_uscsi_tmf_hdl(struct pci_vtscsi_softc *, int,
80 struct pci_vtscsi_ctrl_tmf *);
81 static void vtscsi_uscsi_an_hdl(struct pci_vtscsi_softc *, int,
82 struct pci_vtscsi_ctrl_an *);
83 static int vtscsi_uscsi_req_hdl(struct pci_vtscsi_softc *, int,
84 struct pci_vtscsi_request *);
85
86 static void vtscsi_uscsi_make_check_condition(struct uscsi_cmd *, char, char,
87 char);
88 static void vtscsi_uscsi_filter_post_report_luns(struct uscsi_cmd *);
89 static void vtscsi_uscsi_filter_post(struct uscsi_cmd *);
90
91
92 static int
vtscsi_uscsi_init(struct pci_vtscsi_softc * sc,struct pci_vtscsi_backend * backend,nvlist_t * nvl __unused)93 vtscsi_uscsi_init(struct pci_vtscsi_softc *sc,
94 struct pci_vtscsi_backend *backend, nvlist_t *nvl __unused)
95 {
96 struct vtscsi_uscsi_backend *uscsi_backend;
97 libscsi_errno_t serr;
98
99 uscsi_backend = calloc(1, sizeof (struct vtscsi_uscsi_backend));
100 if (uscsi_backend == NULL) {
101 EPRINTLN("failed to allocate backend data: %s",
102 strerror(errno));
103 return (-1);
104 }
105
106 uscsi_backend->vub_backend = *backend;
107
108 uscsi_backend->vub_scsi_hdl = libscsi_init(LIBSCSI_VERSION, &serr);
109 if (uscsi_backend->vub_scsi_hdl == NULL) {
110 EPRINTLN("failed to initialize libscsi: %s",
111 libscsi_strerror(serr));
112 free(uscsi_backend);
113 return (-1);
114 }
115
116 sc->vss_backend = &uscsi_backend->vub_backend;
117
118 return (0);
119 }
120
121 static int
vtscsi_uscsi_open(struct pci_vtscsi_softc * sc,const char * path,long target)122 vtscsi_uscsi_open(struct pci_vtscsi_softc *sc, const char *path, long target)
123 {
124 struct pci_vtscsi_target *tgt = &sc->vss_targets[target];
125 uscsi_xfer_t maxxfer = 0;
126
127 /*
128 * Most SCSI target drivers require the SYS_DEVICES privilege to send
129 * USCSI commands.
130 */
131 illumos_priv_add_min(PRIV_SYS_DEVICES, "scsi");
132
133 /*
134 * Open the target.
135 */
136 tgt->vst_fd = open(path, O_RDWR);
137 if (tgt->vst_fd < 0)
138 return (-1);
139
140 /*
141 * Get the maximum transfer size of the backend device.
142 */
143 if (ioctl(tgt->vst_fd, USCSIMAXXFER, &maxxfer) < 0) {
144 int errno_save = errno;
145
146 if (errno == ENOTTY) {
147 /*
148 * The underlying device doesn't support this ioctl.
149 * Limit max_sectors to 128MB, which is as good as
150 * any other assumption.
151 */
152 tgt->vst_max_sectors = 128 << (20 - 9);
153 return (0);
154 }
155
156 WPRINTF("USCSIMAXXFER: unexpected error: errno=%d (%s)",
157 strerrorname_np(errno), strerror(errno));
158 (void) close(tgt->vst_fd);
159 tgt->vst_fd = -1;
160 errno = errno_save;
161 return (-1);
162 }
163
164 /*
165 * Even though the virtio spec isn't particularly verbose about what
166 * "max_sectors" actually means and what size a sector is, Linux seems
167 * to treat it as a number of 512b sectors.
168 *
169 * In any case, we need to limit maxxfer such that it fits into a signed
170 * 32bit int.
171 */
172 if (maxxfer > INT32_MAX)
173 maxxfer = INT32_MAX;
174
175 tgt->vst_max_sectors = maxxfer >> 9;
176
177 return (0);
178 }
179
180 static void
vtscsi_uscsi_reset(struct pci_vtscsi_softc * sc)181 vtscsi_uscsi_reset(struct pci_vtscsi_softc *sc)
182 {
183 size_t i;
184
185 sc->vss_config.max_sectors = INT32_MAX;
186
187 /*
188 * As we may be configured to use a variety of differing backend devices
189 * with varying maximum transfer sizes but virtio-scsi supports only one
190 * max_sectors limit per instance, we'll use the smallest maximum
191 * transfer size found.
192 */
193 for (i = 0; i < sc->vss_num_target; i++) {
194 struct pci_vtscsi_target *tgt = &sc->vss_targets[i];
195
196 if (tgt->vst_max_sectors < sc->vss_config.max_sectors)
197 sc->vss_config.max_sectors = tgt->vst_max_sectors;
198 }
199 }
200
201 static void *
vtscsi_uscsi_req_alloc(struct pci_vtscsi_softc * sc)202 vtscsi_uscsi_req_alloc(struct pci_vtscsi_softc *sc)
203 {
204 return (calloc(1, sizeof (struct uscsi_cmd)));
205 }
206
207 static void
vtscsi_uscsi_req_clear(void * io)208 vtscsi_uscsi_req_clear(void *io)
209 {
210 bzero(io, sizeof (struct uscsi_cmd));
211 }
212
213 static void
vtscsi_uscsi_req_free(void * io)214 vtscsi_uscsi_req_free(void *io)
215 {
216 free(io);
217 }
218
219 static void
vtscsi_uscsi_tmf_hdl(struct pci_vtscsi_softc * sc __unused,int fd,struct pci_vtscsi_ctrl_tmf * tmf)220 vtscsi_uscsi_tmf_hdl(struct pci_vtscsi_softc *sc __unused, int fd,
221 struct pci_vtscsi_ctrl_tmf *tmf)
222 {
223 struct uscsi_cmd cmd;
224 int err;
225
226 /* We currently support only LUN 0. */
227 if (pci_vtscsi_get_lun(sc, tmf->lun) != 0) {
228 tmf->response = VIRTIO_SCSI_S_BAD_TARGET;
229 return;
230 }
231
232 tmf->response = VIRTIO_SCSI_S_FUNCTION_COMPLETE;
233
234 memset(&cmd, 0, sizeof (cmd));
235 cmd.uscsi_status = -1;
236 cmd.uscsi_flags = USCSI_DIAGNOSE | USCSI_SILENT;
237
238
239 /* The only TMF requests that we can handle here are RESETs. */
240 switch (tmf->subtype) {
241 case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
242 cmd.uscsi_flags |= USCSI_RESET_TARGET;
243 break;
244
245 case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
246 cmd.uscsi_flags |= USCSI_RESET_LUN;
247 break;
248
249 default:
250 /*
251 * For all other TMF requests, return FUNCTION COMPLETE as
252 * there is nothing we can or need to do for them.
253 *
254 * See the comments in pci_vtscsi_tmf_handle() for additional
255 * information on how the common code and the backend-specific
256 * code interact for TMF requests.
257 */
258 tmf->response = VIRTIO_SCSI_S_FUNCTION_COMPLETE;
259 return;
260 }
261
262 err = ioctl(fd, USCSICMD, &cmd);
263
264 if (err != 0) {
265 WPRINTF("USCSICMD: unexpected TMF error, errno=%d (%s)",
266 strerrorname_np(errno), strerror(errno));
267 tmf->response = VIRTIO_SCSI_S_FAILURE;
268 }
269 }
270
271 static void
vtscsi_uscsi_an_hdl(struct pci_vtscsi_softc * sc __unused,int fd __unused,struct pci_vtscsi_ctrl_an * an)272 vtscsi_uscsi_an_hdl(struct pci_vtscsi_softc *sc __unused, int fd __unused,
273 struct pci_vtscsi_ctrl_an *an)
274 {
275 /* We currently support only LUN 0. */
276 if (pci_vtscsi_get_lun(sc, an->lun) != 0) {
277 an->response = VIRTIO_SCSI_S_BAD_TARGET;
278 return;
279 }
280
281 an->response = VIRTIO_SCSI_S_FAILURE;
282 }
283
284 static int
vtscsi_uscsi_req_hdl(struct pci_vtscsi_softc * sc,int fd,struct pci_vtscsi_request * req)285 vtscsi_uscsi_req_hdl(struct pci_vtscsi_softc *sc, int fd,
286 struct pci_vtscsi_request *req)
287 {
288 struct vtscsi_uscsi_backend *uscsi =
289 (struct vtscsi_uscsi_backend *)sc->vss_backend;
290 struct uscsi_cmd *cmd = req->vsr_backend;
291 void *ext_data = NULL;
292 ssize_t ext_data_len = 0;
293 int nxferred = 0;
294
295 /* We currently support only LUN 0. */
296 if (pci_vtscsi_get_lun(sc, req->vsr_cmd_rd->lun) != 0) {
297 req->vsr_cmd_wr->response = VIRTIO_SCSI_S_BAD_TARGET;
298 return (0);
299 }
300
301 if (req->vsr_data_niov_in > 0) {
302 ext_data_len = iov_to_buf(req->vsr_data_iov_in,
303 req->vsr_data_niov_in, &ext_data);
304 cmd->uscsi_flags |= USCSI_WRITE;
305 } else if (req->vsr_data_niov_out > 0) {
306 ext_data_len = count_iov(req->vsr_data_iov_out,
307 req->vsr_data_niov_out);
308 ext_data = malloc(ext_data_len);
309 cmd->uscsi_flags |= USCSI_READ;
310 }
311
312 /* Stop here if we failed to allocate ext_data. */
313 if (ext_data == NULL && ext_data_len != 0) {
314 WPRINTF("failed to allocate buffer for ext_data");
315 req->vsr_cmd_wr->response = VIRTIO_SCSI_S_FAILURE;
316 return (0);
317 }
318
319 cmd->uscsi_buflen = ext_data_len;
320 cmd->uscsi_bufaddr = ext_data;
321
322 cmd->uscsi_cdb = (caddr_t)req->vsr_cmd_rd->cdb;
323 cmd->uscsi_cdblen = libscsi_cmd_cdblen(uscsi->vub_scsi_hdl,
324 req->vsr_cmd_rd->cdb[0]);
325
326 cmd->uscsi_status = -1;
327
328 /*
329 * We set an unreasonably large timeout here. The virtio spec doesn't
330 * provide a way for the guest driver to pass a I/O timeout value to
331 * the device, but if our timeout here is larger than any timeout the
332 * guest uses, we can expect them to abort the command before we would.
333 *
334 * INT16_MAX corresponds to a bit over 9 hours, which should be enough.
335 */
336 cmd->uscsi_timeout = INT16_MAX;
337 cmd->uscsi_flags |= USCSI_DIAGNOSE;
338 cmd->uscsi_rqlen = sc->vss_config.sense_size;
339 cmd->uscsi_rqbuf = (caddr_t)req->vsr_cmd_wr->sense;
340 cmd->uscsi_flags |= USCSI_RQENABLE;
341
342 switch (req->vsr_cmd_rd->task_attr) {
343 case VIRTIO_SCSI_S_ORDERED:
344 cmd->uscsi_flags |= USCSI_OTAG;
345 break;
346 case VIRTIO_SCSI_S_HEAD:
347 cmd->uscsi_flags |= USCSI_HEAD|USCSI_HTAG;
348 break;
349 case VIRTIO_SCSI_S_SIMPLE:
350 break;
351
352 case VIRTIO_SCSI_S_ACA:
353 /*
354 * I haven't found any indication in our code that would
355 * suggest that we support ACA in any way in illumos. In
356 * fact, scsi_transport() asserts that NACA isn't set in
357 * a packet, and scsi_uscsi_pktinit() warns about it and
358 * clears the flag if found set. There's a tunable to
359 * override that behaviour (scsi_pkt_allow_naca), but there
360 * really seems to be no code properly handling ACA or
361 * setting the ACA flag.
362 *
363 * I guess this makes sense since we're doing ARQ anyway,
364 * so let's just pretend no target is ever in ACA state
365 * and thus no packet will ever require this.
366 */
367 default:
368 WPRINTF("USCSICMD: unexpected task attr in request: 0x%x",
369 req->vsr_cmd_rd->task_attr);
370 req->vsr_cmd_wr->response = VIRTIO_SCSI_S_FAILURE;
371 return (0);
372 }
373
374 errno = 0;
375 (void) ioctl(fd, USCSICMD, cmd);
376
377 switch (errno) {
378 case EIO:
379 /*
380 * EIO may indicate that a SCSI error occured. If that's the
381 * case, uscsi_status should have been set to a valid value,
382 * and we want to continue to process the request normally.
383 */
384 if (cmd->uscsi_status == -1) {
385 req->vsr_cmd_wr->response = VIRTIO_SCSI_S_FAILURE;
386 break;
387 }
388
389 /*FALLTHRU*/
390 case 0:
391 /*
392 * If the command completed successfully, apply any necessary
393 * post-completion filtering.
394 */
395 if (cmd->uscsi_status == STATUS_GOOD)
396 vtscsi_uscsi_filter_post(cmd);
397
398 req->vsr_cmd_wr->sense_len =
399 sc->vss_config.sense_size - cmd->uscsi_rqresid;
400 req->vsr_cmd_wr->residual = cmd->uscsi_resid;
401 req->vsr_cmd_wr->status = cmd->uscsi_status;
402 req->vsr_cmd_wr->response = VIRTIO_SCSI_S_OK;
403
404 nxferred = ext_data_len - req->vsr_cmd_wr->residual;
405
406 if (req->vsr_data_niov_out > 0) {
407 (void) buf_to_iov(ext_data, nxferred,
408 req->vsr_data_iov_out, req->vsr_data_niov_out);
409 }
410 break;
411
412 case EAGAIN:
413 /*
414 * Despite not being documented in uscsi(4I), sd(4D) returns
415 * this when the device is busy formatting.
416 */
417 req->vsr_cmd_wr->response = VIRTIO_SCSI_S_BUSY;
418 break;
419
420 case EINVAL:
421 /*
422 * This may happen if packet allocation fails, which in turn
423 * may happen if we didn't honor USCSIMAXXFER.
424 */
425 req->vsr_cmd_wr->response = VIRTIO_SCSI_S_OVERRUN;
426 break;
427
428 case EFAULT:
429 /*
430 * EFAULT should never happen as we never send bogus memory
431 * addresses in our USCSI commands.
432 */
433
434 case EPERM:
435 /*
436 * EPERM should never happen as we have the SYS_DEVICES
437 * privilege.
438 */
439
440 default:
441 WPRINTF("USCSICMD: unexpected I/O error: errno=%d (%s)",
442 strerrorname_np(errno), strerror(errno));
443 abort();
444 }
445
446 free(ext_data);
447
448 return (nxferred);
449 }
450
451 /*
452 * Return a CHECK CONDITION and fill in the sense data with the given sense key,
453 * additional sense code, and additional sense qualifier.
454 */
455 static void
vtscsi_uscsi_make_check_condition(struct uscsi_cmd * cmd,char key,char asc,char qual)456 vtscsi_uscsi_make_check_condition(struct uscsi_cmd *cmd, char key, char asc,
457 char qual)
458 {
459 cmd->uscsi_status = STATUS_CHECK;
460 cmd->uscsi_resid = cmd->uscsi_buflen;
461 cmd->uscsi_rqstatus = STATUS_GOOD;
462
463 bzero(cmd->uscsi_rqbuf, cmd->uscsi_rqlen);
464
465 if (cmd->uscsi_rqlen >= 1)
466 cmd->uscsi_rqbuf[0] = 0x70;
467 if (cmd->uscsi_rqlen >= 3)
468 cmd->uscsi_rqbuf[2] = key;
469 if (cmd->uscsi_rqlen >= 8)
470 cmd->uscsi_rqbuf[7] = cmd->uscsi_rqlen - 8;
471 if (cmd->uscsi_rqlen >= 13)
472 cmd->uscsi_rqbuf[12] = asc;
473 if (cmd->uscsi_rqlen >= 14)
474 cmd->uscsi_rqbuf[13] = qual;
475 }
476
477 /*
478 * We currently only support LUN 0. Make sure we never report anything else.
479 *
480 * We make no assumption about the buffer size. If it's large enough to hold the
481 * LUN list length, we'll set the LUN list length to 8. The resid is adjusted if
482 * the buffer size is larger than 16 bytes, which is the length needed to hold
483 * one LUN address.
484 */
485 static void
vtscsi_uscsi_filter_post_report_luns(struct uscsi_cmd * cmd)486 vtscsi_uscsi_filter_post_report_luns(struct uscsi_cmd *cmd)
487 {
488 uint8_t report = (uint8_t)cmd->uscsi_cdb[2];
489
490 bzero(cmd->uscsi_bufaddr, cmd->uscsi_buflen);
491
492 switch (report) {
493 case 0:
494 case 2:
495 /*
496 * We'll overwrite the output from the device to report just one
497 * LUN with an all-zero address:
498 * - LUN list length is 8
499 * - LUN 1 address is 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
500 */
501 if (cmd->uscsi_buflen >= 4)
502 cmd->uscsi_bufaddr[3] = 8;
503 if (cmd->uscsi_buflen >= 16)
504 cmd->uscsi_resid = cmd->uscsi_buflen - 16;
505 break;
506 case 1:
507 /*
508 * We don't report any Well-Known LUNs either, because we have
509 * no way to address them anyway using USCSICMD.
510 */
511 cmd->uscsi_resid = cmd->uscsi_buflen;
512 break;
513 default:
514 /*
515 * All other values for "select report" are either invalid or
516 * vendor-specific and thus unsupported. Return the command with
517 * CHECK CONDITION, and fill in sense data to report a ILLEGAL
518 * REQUEST with INVALID FIELD IN CDB.
519 */
520 vtscsi_uscsi_make_check_condition(cmd, KEY_ILLEGAL_REQUEST,
521 0x24, 0x00);
522 }
523 }
524
525 static void
vtscsi_uscsi_filter_post(struct uscsi_cmd * cmd)526 vtscsi_uscsi_filter_post(struct uscsi_cmd *cmd)
527 {
528 switch ((uint8_t)cmd->uscsi_cdb[0]) {
529 case SCMD_REPORT_LUNS:
530 vtscsi_uscsi_filter_post_report_luns(cmd);
531 break;
532
533 default:
534 break;
535 }
536 }
537
538 static const struct pci_vtscsi_backend vtscsi_uscsi_backend = {
539 .vsb_name = "uscsi",
540 .vsb_init = vtscsi_uscsi_init,
541 .vsb_open = vtscsi_uscsi_open,
542 .vsb_reset = vtscsi_uscsi_reset,
543
544 .vsb_req_alloc = vtscsi_uscsi_req_alloc,
545 .vsb_req_clear = vtscsi_uscsi_req_clear,
546 .vsb_req_free = vtscsi_uscsi_req_free,
547
548 .vsb_tmf_hdl = vtscsi_uscsi_tmf_hdl,
549 .vsb_an_hdl = vtscsi_uscsi_an_hdl,
550 .vsb_req_hdl = vtscsi_uscsi_req_hdl
551 };
552 PCI_VTSCSI_BACKEND_SET(vtscsi_uscsi_backend);
553