xref: /freebsd/usr.sbin/bhyve/pci_ahci.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
5  * Copyright (c) 2015-2016 Alexander Motin <mav@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/param.h>
31 #include <sys/linker_set.h>
32 #include <sys/stat.h>
33 #include <sys/uio.h>
34 #include <sys/ioctl.h>
35 #include <sys/disk.h>
36 #include <sys/ata.h>
37 #include <sys/endian.h>
38 
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <stdint.h>
44 #include <string.h>
45 #include <strings.h>
46 #include <unistd.h>
47 #include <assert.h>
48 #include <pthread.h>
49 #include <pthread_np.h>
50 #include <inttypes.h>
51 #include <md5.h>
52 
53 #include "bhyverun.h"
54 #include "config.h"
55 #include "debug.h"
56 #include "pci_emul.h"
57 #ifdef BHYVE_SNAPSHOT
58 #include "snapshot.h"
59 #endif
60 #include "ahci.h"
61 #include "block_if.h"
62 
63 #define	DEF_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
64 #define	MAX_PORTS	32	/* AHCI supports 32 ports */
65 
66 #define	PxSIG_ATA	0x00000101 /* ATA drive */
67 #define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
68 
69 enum sata_fis_type {
70 	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
71 	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
72 	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
73 	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
74 	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
75 	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
76 	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
77 	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
78 };
79 
80 /*
81  * SCSI opcodes
82  */
83 #define	TEST_UNIT_READY		0x00
84 #define	REQUEST_SENSE		0x03
85 #define	INQUIRY			0x12
86 #define	START_STOP_UNIT		0x1B
87 #define	PREVENT_ALLOW		0x1E
88 #define	READ_CAPACITY		0x25
89 #define	READ_10			0x28
90 #define	POSITION_TO_ELEMENT	0x2B
91 #define	READ_TOC		0x43
92 #define	GET_EVENT_STATUS_NOTIFICATION 0x4A
93 #define	MODE_SENSE_10		0x5A
94 #define	REPORT_LUNS		0xA0
95 #define	READ_12			0xA8
96 #define	READ_CD			0xBE
97 
98 /*
99  * SCSI mode page codes
100  */
101 #define	MODEPAGE_RW_ERROR_RECOVERY	0x01
102 #define	MODEPAGE_CD_CAPABILITIES	0x2A
103 
104 /*
105  * ATA commands
106  */
107 #define	ATA_SF_ENAB_SATA_SF		0x10
108 #define	ATA_SATA_SF_AN			0x05
109 #define	ATA_SF_DIS_SATA_SF		0x90
110 
111 /*
112  * Debug printf
113  */
114 #ifdef AHCI_DEBUG
115 static FILE *dbg;
116 #define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
117 #else
118 #define DPRINTF(format, arg...)
119 #endif
120 
121 #define AHCI_PORT_IDENT 20 + 1
122 
123 struct ahci_ioreq {
124 	struct blockif_req io_req;
125 	struct ahci_port *io_pr;
126 	STAILQ_ENTRY(ahci_ioreq) io_flist;
127 	TAILQ_ENTRY(ahci_ioreq) io_blist;
128 	uint8_t *cfis;
129 	uint8_t *dsm;
130 	uint32_t len;
131 	uint32_t done;
132 	int slot;
133 	int more;
134 	int readop;
135 };
136 
137 struct ahci_port {
138 	struct blockif_ctxt *bctx;
139 	struct pci_ahci_softc *pr_sc;
140 	struct ata_params ata_ident;
141 	uint8_t *cmd_lst;
142 	uint8_t *rfis;
143 	int port;
144 	int atapi;
145 	int reset;
146 	int waitforclear;
147 	int mult_sectors;
148 	uint8_t xfermode;
149 	uint8_t err_cfis[20];
150 	uint8_t sense_key;
151 	uint8_t asc;
152 	u_int ccs;
153 	uint32_t pending;
154 
155 	uint32_t clb;
156 	uint32_t clbu;
157 	uint32_t fb;
158 	uint32_t fbu;
159 	uint32_t is;
160 	uint32_t ie;
161 	uint32_t cmd;
162 	uint32_t unused0;
163 	uint32_t tfd;
164 	uint32_t sig;
165 	uint32_t ssts;
166 	uint32_t sctl;
167 	uint32_t serr;
168 	uint32_t sact;
169 	uint32_t ci;
170 	uint32_t sntf;
171 	uint32_t fbs;
172 
173 	/*
174 	 * i/o request info
175 	 */
176 	struct ahci_ioreq *ioreq;
177 	int ioqsz;
178 	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
179 	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
180 };
181 
182 struct ahci_cmd_hdr {
183 	uint16_t flags;
184 	uint16_t prdtl;
185 	uint32_t prdbc;
186 	uint64_t ctba;
187 	uint32_t reserved[4];
188 };
189 
190 struct ahci_prdt_entry {
191 	uint64_t dba;
192 	uint32_t reserved;
193 #define	DBCMASK		0x3fffff
194 	uint32_t dbc;
195 };
196 
197 struct pci_ahci_softc {
198 	struct pci_devinst *asc_pi;
199 	pthread_mutex_t	mtx;
200 	int ports;
201 	uint32_t cap;
202 	uint32_t ghc;
203 	uint32_t is;
204 	uint32_t pi;
205 	uint32_t vs;
206 	uint32_t ccc_ctl;
207 	uint32_t ccc_pts;
208 	uint32_t em_loc;
209 	uint32_t em_ctl;
210 	uint32_t cap2;
211 	uint32_t bohc;
212 	uint32_t lintr;
213 	struct ahci_port port[MAX_PORTS];
214 };
215 #define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
216 
217 static void ahci_handle_next_trim(struct ahci_port *p, int slot, uint8_t *cfis,
218     uint8_t *buf, uint32_t len, uint32_t done);
219 static void ahci_handle_port(struct ahci_port *p);
220 
221 static inline void lba_to_msf(uint8_t *buf, int lba)
222 {
223 	lba += 150;
224 	buf[0] = (lba / 75) / 60;
225 	buf[1] = (lba / 75) % 60;
226 	buf[2] = lba % 75;
227 }
228 
229 /*
230  * Generate HBA interrupts on global IS register write.
231  */
232 static void
233 ahci_generate_intr(struct pci_ahci_softc *sc, uint32_t mask)
234 {
235 	struct pci_devinst *pi = sc->asc_pi;
236 	struct ahci_port *p;
237 	int i, nmsg;
238 	uint32_t mmask;
239 
240 	/* Update global IS from PxIS/PxIE. */
241 	for (i = 0; i < sc->ports; i++) {
242 		p = &sc->port[i];
243 		if (p->is & p->ie)
244 			sc->is |= (1 << i);
245 	}
246 	DPRINTF("%s(%08x) %08x", __func__, mask, sc->is);
247 
248 	/* If there is nothing enabled -- clear legacy interrupt and exit. */
249 	if (sc->is == 0 || (sc->ghc & AHCI_GHC_IE) == 0) {
250 		if (sc->lintr) {
251 			pci_lintr_deassert(pi);
252 			sc->lintr = 0;
253 		}
254 		return;
255 	}
256 
257 	/* If there is anything and no MSI -- assert legacy interrupt. */
258 	nmsg = pci_msi_maxmsgnum(pi);
259 	if (nmsg == 0) {
260 		if (!sc->lintr) {
261 			sc->lintr = 1;
262 			pci_lintr_assert(pi);
263 		}
264 		return;
265 	}
266 
267 	/* Assert respective MSIs for ports that were touched. */
268 	for (i = 0; i < nmsg; i++) {
269 		if (sc->ports <= nmsg || i < nmsg - 1)
270 			mmask = 1 << i;
271 		else
272 			mmask = 0xffffffff << i;
273 		if (sc->is & mask && mmask & mask)
274 			pci_generate_msi(pi, i);
275 	}
276 }
277 
278 /*
279  * Generate HBA interrupt on specific port event.
280  */
281 static void
282 ahci_port_intr(struct ahci_port *p)
283 {
284 	struct pci_ahci_softc *sc = p->pr_sc;
285 	struct pci_devinst *pi = sc->asc_pi;
286 	int nmsg;
287 
288 	DPRINTF("%s(%d) %08x/%08x %08x", __func__,
289 	    p->port, p->is, p->ie, sc->is);
290 
291 	/* If there is nothing enabled -- we are done. */
292 	if ((p->is & p->ie) == 0)
293 		return;
294 
295 	/* In case of non-shared MSI always generate interrupt. */
296 	nmsg = pci_msi_maxmsgnum(pi);
297 	if (sc->ports <= nmsg || p->port < nmsg - 1) {
298 		sc->is |= (1 << p->port);
299 		if ((sc->ghc & AHCI_GHC_IE) == 0)
300 			return;
301 		pci_generate_msi(pi, p->port);
302 		return;
303 	}
304 
305 	/* If IS for this port is already set -- do nothing. */
306 	if (sc->is & (1 << p->port))
307 		return;
308 
309 	sc->is |= (1 << p->port);
310 
311 	/* If interrupts are enabled -- generate one. */
312 	if ((sc->ghc & AHCI_GHC_IE) == 0)
313 		return;
314 	if (nmsg > 0) {
315 		pci_generate_msi(pi, nmsg - 1);
316 	} else if (!sc->lintr) {
317 		sc->lintr = 1;
318 		pci_lintr_assert(pi);
319 	}
320 }
321 
322 static void
323 ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
324 {
325 	int offset, len, irq;
326 
327 	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
328 		return;
329 
330 	switch (ft) {
331 	case FIS_TYPE_REGD2H:
332 		offset = 0x40;
333 		len = 20;
334 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0;
335 		break;
336 	case FIS_TYPE_SETDEVBITS:
337 		offset = 0x58;
338 		len = 8;
339 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0;
340 		break;
341 	case FIS_TYPE_PIOSETUP:
342 		offset = 0x20;
343 		len = 20;
344 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0;
345 		break;
346 	default:
347 		EPRINTLN("unsupported fis type %d", ft);
348 		return;
349 	}
350 	if (fis[2] & ATA_S_ERROR) {
351 		p->waitforclear = 1;
352 		irq |= AHCI_P_IX_TFE;
353 	}
354 	memcpy(p->rfis + offset, fis, len);
355 	if (irq) {
356 		if (~p->is & irq) {
357 			p->is |= irq;
358 			ahci_port_intr(p);
359 		}
360 	}
361 }
362 
363 static void
364 ahci_write_fis_piosetup(struct ahci_port *p)
365 {
366 	uint8_t fis[20];
367 
368 	memset(fis, 0, sizeof(fis));
369 	fis[0] = FIS_TYPE_PIOSETUP;
370 	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
371 }
372 
373 static void
374 ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
375 {
376 	uint8_t fis[8];
377 	uint8_t error;
378 
379 	error = (tfd >> 8) & 0xff;
380 	tfd &= 0x77;
381 	memset(fis, 0, sizeof(fis));
382 	fis[0] = FIS_TYPE_SETDEVBITS;
383 	fis[1] = (1 << 6);
384 	fis[2] = tfd;
385 	fis[3] = error;
386 	if (fis[2] & ATA_S_ERROR) {
387 		p->err_cfis[0] = slot;
388 		p->err_cfis[2] = tfd;
389 		p->err_cfis[3] = error;
390 		memcpy(&p->err_cfis[4], cfis + 4, 16);
391 	} else {
392 		*(uint32_t *)(fis + 4) = (1 << slot);
393 		p->sact &= ~(1 << slot);
394 	}
395 	p->tfd &= ~0x77;
396 	p->tfd |= tfd;
397 	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
398 }
399 
400 static void
401 ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
402 {
403 	uint8_t fis[20];
404 	uint8_t error;
405 
406 	error = (tfd >> 8) & 0xff;
407 	memset(fis, 0, sizeof(fis));
408 	fis[0] = FIS_TYPE_REGD2H;
409 	fis[1] = (1 << 6);
410 	fis[2] = tfd & 0xff;
411 	fis[3] = error;
412 	fis[4] = cfis[4];
413 	fis[5] = cfis[5];
414 	fis[6] = cfis[6];
415 	fis[7] = cfis[7];
416 	fis[8] = cfis[8];
417 	fis[9] = cfis[9];
418 	fis[10] = cfis[10];
419 	fis[11] = cfis[11];
420 	fis[12] = cfis[12];
421 	fis[13] = cfis[13];
422 	if (fis[2] & ATA_S_ERROR) {
423 		p->err_cfis[0] = 0x80;
424 		p->err_cfis[2] = tfd & 0xff;
425 		p->err_cfis[3] = error;
426 		memcpy(&p->err_cfis[4], cfis + 4, 16);
427 	} else
428 		p->ci &= ~(1 << slot);
429 	p->tfd = tfd;
430 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
431 }
432 
433 static void
434 ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot)
435 {
436 	uint8_t fis[20];
437 
438 	p->tfd = ATA_S_READY | ATA_S_DSC;
439 	memset(fis, 0, sizeof(fis));
440 	fis[0] = FIS_TYPE_REGD2H;
441 	fis[1] = 0;			/* No interrupt */
442 	fis[2] = p->tfd;		/* Status */
443 	fis[3] = 0;			/* No error */
444 	p->ci &= ~(1 << slot);
445 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
446 }
447 
448 static void
449 ahci_write_reset_fis_d2h(struct ahci_port *p)
450 {
451 	uint8_t fis[20];
452 
453 	memset(fis, 0, sizeof(fis));
454 	fis[0] = FIS_TYPE_REGD2H;
455 	fis[3] = 1;
456 	fis[4] = 1;
457 	if (p->atapi) {
458 		fis[5] = 0x14;
459 		fis[6] = 0xeb;
460 	}
461 	fis[12] = 1;
462 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
463 }
464 
465 static void
466 ahci_check_stopped(struct ahci_port *p)
467 {
468 	/*
469 	 * If we are no longer processing the command list and nothing
470 	 * is in-flight, clear the running bit, the current command
471 	 * slot, the command issue and active bits.
472 	 */
473 	if (!(p->cmd & AHCI_P_CMD_ST)) {
474 		if (p->pending == 0) {
475 			p->ccs = 0;
476 			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
477 			p->ci = 0;
478 			p->sact = 0;
479 			p->waitforclear = 0;
480 		}
481 	}
482 }
483 
484 static void
485 ahci_port_stop(struct ahci_port *p)
486 {
487 	struct ahci_ioreq *aior;
488 	uint8_t *cfis;
489 	int slot;
490 	int error;
491 
492 	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
493 
494 	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
495 		/*
496 		 * Try to cancel the outstanding blockif request.
497 		 */
498 		error = blockif_cancel(p->bctx, &aior->io_req);
499 		if (error != 0)
500 			continue;
501 
502 		slot = aior->slot;
503 		cfis = aior->cfis;
504 		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
505 		    cfis[2] == ATA_READ_FPDMA_QUEUED ||
506 		    cfis[2] == ATA_SEND_FPDMA_QUEUED)
507 			p->sact &= ~(1 << slot);	/* NCQ */
508 		else
509 			p->ci &= ~(1 << slot);
510 
511 		/*
512 		 * This command is now done.
513 		 */
514 		p->pending &= ~(1 << slot);
515 
516 		/*
517 		 * Delete the blockif request from the busy list
518 		 */
519 		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
520 
521 		/*
522 		 * Move the blockif request back to the free list
523 		 */
524 		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
525 	}
526 
527 	ahci_check_stopped(p);
528 }
529 
530 static void
531 ahci_port_reset(struct ahci_port *pr)
532 {
533 	pr->serr = 0;
534 	pr->sact = 0;
535 	pr->xfermode = ATA_UDMA6;
536 	pr->mult_sectors = 128;
537 
538 	if (!pr->bctx) {
539 		pr->ssts = ATA_SS_DET_NO_DEVICE;
540 		pr->sig = 0xFFFFFFFF;
541 		pr->tfd = 0x7F;
542 		return;
543 	}
544 	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
545 	if (pr->sctl & ATA_SC_SPD_MASK)
546 		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
547 	else
548 		pr->ssts |= ATA_SS_SPD_GEN3;
549 	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
550 	if (!pr->atapi) {
551 		pr->sig = PxSIG_ATA;
552 		pr->tfd |= ATA_S_READY;
553 	} else
554 		pr->sig = PxSIG_ATAPI;
555 	ahci_write_reset_fis_d2h(pr);
556 }
557 
558 static void
559 ahci_reset(struct pci_ahci_softc *sc)
560 {
561 	int i;
562 
563 	sc->ghc = AHCI_GHC_AE;
564 	sc->is = 0;
565 
566 	if (sc->lintr) {
567 		pci_lintr_deassert(sc->asc_pi);
568 		sc->lintr = 0;
569 	}
570 
571 	for (i = 0; i < sc->ports; i++) {
572 		sc->port[i].ie = 0;
573 		sc->port[i].is = 0;
574 		sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD);
575 		if (sc->port[i].bctx)
576 			sc->port[i].cmd |= AHCI_P_CMD_CPS;
577 		sc->port[i].sctl = 0;
578 		ahci_port_reset(&sc->port[i]);
579 	}
580 }
581 
582 static void
583 ata_string(uint8_t *dest, const char *src, int len)
584 {
585 	int i;
586 
587 	for (i = 0; i < len; i++) {
588 		if (*src)
589 			dest[i ^ 1] = *src++;
590 		else
591 			dest[i ^ 1] = ' ';
592 	}
593 }
594 
595 static void
596 atapi_string(uint8_t *dest, const char *src, int len)
597 {
598 	int i;
599 
600 	for (i = 0; i < len; i++) {
601 		if (*src)
602 			dest[i] = *src++;
603 		else
604 			dest[i] = ' ';
605 	}
606 }
607 
608 /*
609  * Build up the iovec based on the PRDT, 'done' and 'len'.
610  */
611 static void
612 ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
613     struct ahci_prdt_entry *prdt, uint16_t prdtl)
614 {
615 	struct blockif_req *breq = &aior->io_req;
616 	uint32_t dbcsz, extra, left, skip, todo;
617 	int i, j;
618 
619 	assert(aior->len >= aior->done);
620 
621 	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
622 	skip = aior->done;
623 	left = aior->len - aior->done;
624 	todo = 0;
625 	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
626 	    i++, prdt++) {
627 		dbcsz = (prdt->dbc & DBCMASK) + 1;
628 		/* Skip already done part of the PRDT */
629 		if (dbcsz <= skip) {
630 			skip -= dbcsz;
631 			continue;
632 		}
633 		dbcsz -= skip;
634 		if (dbcsz > left)
635 			dbcsz = left;
636 		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
637 		    prdt->dba + skip, dbcsz);
638 		breq->br_iov[j].iov_len = dbcsz;
639 		todo += dbcsz;
640 		left -= dbcsz;
641 		skip = 0;
642 		j++;
643 	}
644 
645 	/* If we got limited by IOV length, round I/O down to sector size. */
646 	if (j == BLOCKIF_IOV_MAX) {
647 		extra = todo % blockif_sectsz(p->bctx);
648 		todo -= extra;
649 		assert(todo > 0);
650 		while (extra > 0) {
651 			if (breq->br_iov[j - 1].iov_len > extra) {
652 				breq->br_iov[j - 1].iov_len -= extra;
653 				break;
654 			}
655 			extra -= breq->br_iov[j - 1].iov_len;
656 			j--;
657 		}
658 	}
659 
660 	breq->br_iovcnt = j;
661 	breq->br_resid = todo;
662 	aior->done += todo;
663 	aior->more = (aior->done < aior->len && i < prdtl);
664 }
665 
666 static void
667 ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
668 {
669 	struct ahci_ioreq *aior;
670 	struct blockif_req *breq;
671 	struct ahci_prdt_entry *prdt;
672 	struct ahci_cmd_hdr *hdr;
673 	uint64_t lba;
674 	uint32_t len;
675 	int err, first, ncq, readop;
676 
677 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
678 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
679 	ncq = 0;
680 	readop = 1;
681 	first = (done == 0);
682 
683 	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
684 	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
685 	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
686 	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
687 		readop = 0;
688 
689 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
690 	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
691 		lba = ((uint64_t)cfis[10] << 40) |
692 			((uint64_t)cfis[9] << 32) |
693 			((uint64_t)cfis[8] << 24) |
694 			((uint64_t)cfis[6] << 16) |
695 			((uint64_t)cfis[5] << 8) |
696 			cfis[4];
697 		len = cfis[11] << 8 | cfis[3];
698 		if (!len)
699 			len = 65536;
700 		ncq = 1;
701 	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
702 	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
703 	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
704 		lba = ((uint64_t)cfis[10] << 40) |
705 			((uint64_t)cfis[9] << 32) |
706 			((uint64_t)cfis[8] << 24) |
707 			((uint64_t)cfis[6] << 16) |
708 			((uint64_t)cfis[5] << 8) |
709 			cfis[4];
710 		len = cfis[13] << 8 | cfis[12];
711 		if (!len)
712 			len = 65536;
713 	} else {
714 		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
715 			(cfis[5] << 8) | cfis[4];
716 		len = cfis[12];
717 		if (!len)
718 			len = 256;
719 	}
720 	lba *= blockif_sectsz(p->bctx);
721 	len *= blockif_sectsz(p->bctx);
722 
723 	/* Pull request off free list */
724 	aior = STAILQ_FIRST(&p->iofhd);
725 	assert(aior != NULL);
726 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
727 
728 	aior->cfis = cfis;
729 	aior->slot = slot;
730 	aior->len = len;
731 	aior->done = done;
732 	aior->readop = readop;
733 	breq = &aior->io_req;
734 	breq->br_offset = lba + done;
735 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
736 
737 	/* Mark this command in-flight. */
738 	p->pending |= 1 << slot;
739 
740 	/* Stuff request onto busy list. */
741 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
742 
743 	if (ncq && first)
744 		ahci_write_fis_d2h_ncq(p, slot);
745 
746 	if (readop)
747 		err = blockif_read(p->bctx, breq);
748 	else
749 		err = blockif_write(p->bctx, breq);
750 	assert(err == 0);
751 }
752 
753 static void
754 ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
755 {
756 	struct ahci_ioreq *aior;
757 	struct blockif_req *breq;
758 	int err;
759 
760 	/*
761 	 * Pull request off free list
762 	 */
763 	aior = STAILQ_FIRST(&p->iofhd);
764 	assert(aior != NULL);
765 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
766 	aior->cfis = cfis;
767 	aior->slot = slot;
768 	aior->len = 0;
769 	aior->done = 0;
770 	aior->more = 0;
771 	breq = &aior->io_req;
772 
773 	/*
774 	 * Mark this command in-flight.
775 	 */
776 	p->pending |= 1 << slot;
777 
778 	/*
779 	 * Stuff request onto busy list
780 	 */
781 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
782 
783 	err = blockif_flush(p->bctx, breq);
784 	assert(err == 0);
785 }
786 
787 static inline unsigned int
788 read_prdt(struct ahci_port *p, int slot, uint8_t *cfis, void *buf,
789     unsigned int size)
790 {
791 	struct ahci_cmd_hdr *hdr;
792 	struct ahci_prdt_entry *prdt;
793 	uint8_t *to;
794 	unsigned int len;
795 	int i;
796 
797 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
798 	len = size;
799 	to = buf;
800 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
801 	for (i = 0; i < hdr->prdtl && len; i++) {
802 		uint8_t *ptr;
803 		uint32_t dbcsz;
804 		unsigned int sublen;
805 
806 		dbcsz = (prdt->dbc & DBCMASK) + 1;
807 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
808 		sublen = MIN(len, dbcsz);
809 		memcpy(to, ptr, sublen);
810 		len -= sublen;
811 		to += sublen;
812 		prdt++;
813 	}
814 	return (size - len);
815 }
816 
817 static void
818 ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis)
819 {
820 	uint32_t len;
821 	int ncq;
822 	uint8_t *buf;
823 	unsigned int nread;
824 
825 	buf = NULL;
826 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
827 		len = (uint16_t)cfis[13] << 8 | cfis[12];
828 		len *= 512;
829 		ncq = 0;
830 	} else { /* ATA_SEND_FPDMA_QUEUED */
831 		len = (uint16_t)cfis[11] << 8 | cfis[3];
832 		len *= 512;
833 		ncq = 1;
834 	}
835 
836 	/* Support for only a single block is advertised via IDENTIFY. */
837 	if (len > 512) {
838 		goto invalid_command;
839 	}
840 
841 	buf = malloc(len);
842 	nread = read_prdt(p, slot, cfis, buf, len);
843 	if (nread != len) {
844 		goto invalid_command;
845 	}
846 	ahci_handle_next_trim(p, slot, cfis, buf, len, 0);
847 	return;
848 
849 invalid_command:
850 	free(buf);
851 	if (ncq) {
852 		ahci_write_fis_d2h_ncq(p, slot);
853 		ahci_write_fis_sdb(p, slot, cfis,
854 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
855 	} else {
856 		ahci_write_fis_d2h(p, slot, cfis,
857 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
858 	}
859 }
860 
861 static void
862 ahci_handle_next_trim(struct ahci_port *p, int slot, uint8_t *cfis,
863     uint8_t *buf, uint32_t len, uint32_t done)
864 {
865 	struct ahci_ioreq *aior;
866 	struct blockif_req *breq;
867 	uint8_t *entry;
868 	uint64_t elba;
869 	uint32_t elen;
870 	int err;
871 	bool first, ncq;
872 
873 	first = (done == 0);
874 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
875 		ncq = false;
876 	} else { /* ATA_SEND_FPDMA_QUEUED */
877 		ncq = true;
878 	}
879 
880 	/* Find the next range to TRIM. */
881 	while (done < len) {
882 		entry = &buf[done];
883 		elba = ((uint64_t)entry[5] << 40) |
884 		    ((uint64_t)entry[4] << 32) |
885 		    ((uint64_t)entry[3] << 24) |
886 		    ((uint64_t)entry[2] << 16) |
887 		    ((uint64_t)entry[1] << 8) |
888 		    entry[0];
889 		elen = (uint16_t)entry[7] << 8 | entry[6];
890 		done += 8;
891 		if (elen != 0)
892 			break;
893 	}
894 
895 	/* All remaining ranges were empty. */
896 	if (done == len) {
897 		free(buf);
898 		if (ncq) {
899 			if (first)
900 				ahci_write_fis_d2h_ncq(p, slot);
901 			ahci_write_fis_sdb(p, slot, cfis,
902 			    ATA_S_READY | ATA_S_DSC);
903 		} else {
904 			ahci_write_fis_d2h(p, slot, cfis,
905 			    ATA_S_READY | ATA_S_DSC);
906 		}
907 		if (!first) {
908 			p->pending &= ~(1 << slot);
909 			ahci_check_stopped(p);
910 			ahci_handle_port(p);
911 		}
912 		return;
913 	}
914 
915 	/*
916 	 * Pull request off free list
917 	 */
918 	aior = STAILQ_FIRST(&p->iofhd);
919 	assert(aior != NULL);
920 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
921 	aior->cfis = cfis;
922 	aior->slot = slot;
923 	aior->len = len;
924 	aior->done = done;
925 	aior->dsm = buf;
926 	aior->more = (len != done);
927 
928 	breq = &aior->io_req;
929 	breq->br_offset = elba * blockif_sectsz(p->bctx);
930 	breq->br_resid = elen * blockif_sectsz(p->bctx);
931 
932 	/*
933 	 * Mark this command in-flight.
934 	 */
935 	p->pending |= 1 << slot;
936 
937 	/*
938 	 * Stuff request onto busy list
939 	 */
940 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
941 
942 	if (ncq && first)
943 		ahci_write_fis_d2h_ncq(p, slot);
944 
945 	err = blockif_delete(p->bctx, breq);
946 	assert(err == 0);
947 }
948 
949 static inline void
950 write_prdt(struct ahci_port *p, int slot, uint8_t *cfis, void *buf,
951     unsigned int size)
952 {
953 	struct ahci_cmd_hdr *hdr;
954 	struct ahci_prdt_entry *prdt;
955 	uint8_t *from;
956 	unsigned int len;
957 	int i;
958 
959 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
960 	len = size;
961 	from = buf;
962 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
963 	for (i = 0; i < hdr->prdtl && len; i++) {
964 		uint8_t *ptr;
965 		uint32_t dbcsz;
966 		int sublen;
967 
968 		dbcsz = (prdt->dbc & DBCMASK) + 1;
969 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
970 		sublen = MIN(len, dbcsz);
971 		memcpy(ptr, from, sublen);
972 		len -= sublen;
973 		from += sublen;
974 		prdt++;
975 	}
976 	hdr->prdbc = size - len;
977 }
978 
979 static void
980 ahci_checksum(uint8_t *buf, int size)
981 {
982 	int i;
983 	uint8_t sum = 0;
984 
985 	for (i = 0; i < size - 1; i++)
986 		sum += buf[i];
987 	buf[size - 1] = 0x100 - sum;
988 }
989 
990 static void
991 ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
992 {
993 	struct ahci_cmd_hdr *hdr;
994 	uint32_t buf[128];
995 	uint8_t *buf8 = (uint8_t *)buf;
996 	uint16_t *buf16 = (uint16_t *)buf;
997 
998 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
999 	if (p->atapi || hdr->prdtl == 0 || cfis[5] != 0 ||
1000 	    cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
1001 		ahci_write_fis_d2h(p, slot, cfis,
1002 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1003 		return;
1004 	}
1005 
1006 	memset(buf, 0, sizeof(buf));
1007 	if (cfis[4] == 0x00) {	/* Log directory */
1008 		buf16[0x00] = 1; /* Version -- 1 */
1009 		buf16[0x10] = 1; /* NCQ Command Error Log -- 1 page */
1010 		buf16[0x13] = 1; /* SATA NCQ Send and Receive Log -- 1 page */
1011 	} else if (cfis[4] == 0x10) {	/* NCQ Command Error Log */
1012 		memcpy(buf8, p->err_cfis, sizeof(p->err_cfis));
1013 		ahci_checksum(buf8, sizeof(buf));
1014 	} else if (cfis[4] == 0x13) {	/* SATA NCQ Send and Receive Log */
1015 		if (blockif_candelete(p->bctx) && !blockif_is_ro(p->bctx)) {
1016 			buf[0x00] = 1;	/* SFQ DSM supported */
1017 			buf[0x01] = 1;	/* SFQ DSM TRIM supported */
1018 		}
1019 	} else {
1020 		ahci_write_fis_d2h(p, slot, cfis,
1021 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1022 		return;
1023 	}
1024 
1025 	if (cfis[2] == ATA_READ_LOG_EXT)
1026 		ahci_write_fis_piosetup(p);
1027 	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
1028 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1029 }
1030 
1031 static void
1032 handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
1033 {
1034 	struct ahci_cmd_hdr *hdr;
1035 
1036 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1037 	if (p->atapi || hdr->prdtl == 0) {
1038 		ahci_write_fis_d2h(p, slot, cfis,
1039 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1040 	} else {
1041 		ahci_write_fis_piosetup(p);
1042 		write_prdt(p, slot, cfis, (void*)&p->ata_ident, sizeof(struct ata_params));
1043 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1044 	}
1045 }
1046 
1047 static void
1048 ata_identify_init(struct ahci_port* p, int atapi)
1049 {
1050 	struct ata_params* ata_ident = &p->ata_ident;
1051 
1052 	if (atapi) {
1053 		ata_ident->config = ATA_PROTO_ATAPI | ATA_ATAPI_TYPE_CDROM |
1054 		    ATA_ATAPI_REMOVABLE | ATA_DRQ_FAST;
1055 		ata_ident->capabilities1 = ATA_SUPPORT_LBA |
1056 			ATA_SUPPORT_DMA;
1057 		ata_ident->capabilities2 = (1 << 14 | 1);
1058 		ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88;
1059 		ata_ident->obsolete62 = 0x3f;
1060 		ata_ident->mwdmamodes = 7;
1061 		if (p->xfermode & ATA_WDMA0)
1062 			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1063 		ata_ident->apiomodes = 3;
1064 		ata_ident->mwdmamin = 0x0078;
1065 		ata_ident->mwdmarec = 0x0078;
1066 		ata_ident->pioblind = 0x0078;
1067 		ata_ident->pioiordy = 0x0078;
1068 		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
1069 		ata_ident->satacapabilities2 = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
1070 		ata_ident->satasupport = ATA_SUPPORT_NCQ_STREAM;
1071 		ata_ident->version_major = 0x3f0;
1072 		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1073 			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1074 		ata_ident->support.command2 = (1 << 14);
1075 		ata_ident->support.extension = (1 << 14);
1076 		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1077 			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1078 		ata_ident->enabled.extension = (1 << 14);
1079 		ata_ident->udmamodes = 0x7f;
1080 		if (p->xfermode & ATA_UDMA0)
1081 			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1082 		ata_ident->transport_major = 0x1020;
1083 		ata_ident->integrity = 0x00a5;
1084 	} else {
1085 		uint64_t sectors;
1086 		int sectsz, psectsz, psectoff, candelete, ro;
1087 		uint16_t cyl;
1088 		uint8_t sech, heads;
1089 
1090 		ro = blockif_is_ro(p->bctx);
1091 		candelete = blockif_candelete(p->bctx);
1092 		sectsz = blockif_sectsz(p->bctx);
1093 		sectors = blockif_size(p->bctx) / sectsz;
1094 		blockif_chs(p->bctx, &cyl, &heads, &sech);
1095 		blockif_psectsz(p->bctx, &psectsz, &psectoff);
1096 		ata_ident->config = ATA_DRQ_FAST;
1097 		ata_ident->cylinders = cyl;
1098 		ata_ident->heads = heads;
1099 		ata_ident->sectors = sech;
1100 
1101 		ata_ident->sectors_intr = (0x8000 | 128);
1102 		ata_ident->tcg = 0;
1103 
1104 		ata_ident->capabilities1 = ATA_SUPPORT_DMA |
1105 			ATA_SUPPORT_LBA | ATA_SUPPORT_IORDY;
1106 		ata_ident->capabilities2 = (1 << 14);
1107 		ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88;
1108 		if (p->mult_sectors)
1109 			ata_ident->multi = (ATA_MULTI_VALID | p->mult_sectors);
1110 		if (sectors <= 0x0fffffff) {
1111 			ata_ident->lba_size_1 = sectors;
1112 			ata_ident->lba_size_2 = (sectors >> 16);
1113 		} else {
1114 			ata_ident->lba_size_1 = 0xffff;
1115 			ata_ident->lba_size_2 = 0x0fff;
1116 		}
1117 		ata_ident->mwdmamodes = 0x7;
1118 		if (p->xfermode & ATA_WDMA0)
1119 			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1120 		ata_ident->apiomodes = 0x3;
1121 		ata_ident->mwdmamin = 0x0078;
1122 		ata_ident->mwdmarec = 0x0078;
1123 		ata_ident->pioblind = 0x0078;
1124 		ata_ident->pioiordy = 0x0078;
1125 		ata_ident->support3 = 0;
1126 		ata_ident->queue = 31;
1127 		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
1128 			ATA_SUPPORT_NCQ);
1129 		ata_ident->satacapabilities2 = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
1130 			(p->ssts & ATA_SS_SPD_MASK) >> 3);
1131 		ata_ident->version_major = 0x3f0;
1132 		ata_ident->version_minor = 0x28;
1133 		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1134 			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1135 		ata_ident->support.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1136 			ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
1137 		ata_ident->support.extension = (1 << 14);
1138 		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1139 			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1140 		ata_ident->enabled.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1141 			ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
1142 		ata_ident->enabled.extension = (1 << 14);
1143 		ata_ident->udmamodes = 0x7f;
1144 		if (p->xfermode & ATA_UDMA0)
1145 			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1146 		ata_ident->lba_size48_1 = sectors;
1147 		ata_ident->lba_size48_2 = (sectors >> 16);
1148 		ata_ident->lba_size48_3 = (sectors >> 32);
1149 		ata_ident->lba_size48_4 = (sectors >> 48);
1150 
1151 		if (candelete && !ro) {
1152 			ata_ident->support3 |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
1153 			ata_ident->max_dsm_blocks = 1;
1154 			ata_ident->support_dsm = ATA_SUPPORT_DSM_TRIM;
1155 		}
1156 		ata_ident->pss = ATA_PSS_VALID_VALUE;
1157 		ata_ident->lsalign = 0x4000;
1158 		if (psectsz > sectsz) {
1159 			ata_ident->pss |= ATA_PSS_MULTLS;
1160 			ata_ident->pss |= ffsl(psectsz / sectsz) - 1;
1161 			ata_ident->lsalign |= (psectoff / sectsz);
1162 		}
1163 		if (sectsz > 512) {
1164 			ata_ident->pss |= ATA_PSS_LSSABOVE512;
1165 			ata_ident->lss_1 = sectsz / 2;
1166 			ata_ident->lss_2 = ((sectsz / 2) >> 16);
1167 		}
1168 		ata_ident->support2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1169 		ata_ident->enabled2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1170 		ata_ident->transport_major = 0x1020;
1171 		ata_ident->integrity = 0x00a5;
1172 	}
1173 	ahci_checksum((uint8_t*)ata_ident, sizeof(struct ata_params));
1174 }
1175 
1176 static void
1177 handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
1178 {
1179 	if (!p->atapi) {
1180 		ahci_write_fis_d2h(p, slot, cfis,
1181 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1182 	} else {
1183 		ahci_write_fis_piosetup(p);
1184 		write_prdt(p, slot, cfis, (void *)&p->ata_ident, sizeof(struct ata_params));
1185 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1186 	}
1187 }
1188 
1189 static void
1190 atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1191 {
1192 	uint8_t buf[36];
1193 	uint8_t *acmd;
1194 	unsigned int len;
1195 	uint32_t tfd;
1196 
1197 	acmd = cfis + 0x40;
1198 
1199 	if (acmd[1] & 1) {		/* VPD */
1200 		if (acmd[2] == 0) {	/* Supported VPD pages */
1201 			buf[0] = 0x05;
1202 			buf[1] = 0;
1203 			buf[2] = 0;
1204 			buf[3] = 1;
1205 			buf[4] = 0;
1206 			len = 4 + buf[3];
1207 		} else {
1208 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1209 			p->asc = 0x24;
1210 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1211 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1212 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1213 			return;
1214 		}
1215 	} else {
1216 		buf[0] = 0x05;
1217 		buf[1] = 0x80;
1218 		buf[2] = 0x00;
1219 		buf[3] = 0x21;
1220 		buf[4] = 31;
1221 		buf[5] = 0;
1222 		buf[6] = 0;
1223 		buf[7] = 0;
1224 		atapi_string(buf + 8, "BHYVE", 8);
1225 		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1226 		atapi_string(buf + 32, "001", 4);
1227 		len = sizeof(buf);
1228 	}
1229 
1230 	if (len > acmd[4])
1231 		len = acmd[4];
1232 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1233 	write_prdt(p, slot, cfis, buf, len);
1234 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1235 }
1236 
1237 static void
1238 atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1239 {
1240 	uint8_t buf[8];
1241 	uint64_t sectors;
1242 
1243 	sectors = blockif_size(p->bctx) / 2048;
1244 	be32enc(buf, sectors - 1);
1245 	be32enc(buf + 4, 2048);
1246 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1247 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1248 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1249 }
1250 
1251 static void
1252 atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1253 {
1254 	uint8_t *acmd;
1255 	uint8_t format;
1256 	unsigned int len;
1257 
1258 	acmd = cfis + 0x40;
1259 
1260 	len = be16dec(acmd + 7);
1261 	format = acmd[9] >> 6;
1262 	switch (format) {
1263 	case 0:
1264 	{
1265 		size_t size;
1266 		int msf;
1267 		uint64_t sectors;
1268 		uint8_t start_track, buf[20], *bp;
1269 
1270 		msf = (acmd[1] >> 1) & 1;
1271 		start_track = acmd[6];
1272 		if (start_track > 1 && start_track != 0xaa) {
1273 			uint32_t tfd;
1274 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1275 			p->asc = 0x24;
1276 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1277 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1278 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1279 			return;
1280 		}
1281 		bp = buf + 2;
1282 		*bp++ = 1;
1283 		*bp++ = 1;
1284 		if (start_track <= 1) {
1285 			*bp++ = 0;
1286 			*bp++ = 0x14;
1287 			*bp++ = 1;
1288 			*bp++ = 0;
1289 			if (msf) {
1290 				*bp++ = 0;
1291 				lba_to_msf(bp, 0);
1292 				bp += 3;
1293 			} else {
1294 				*bp++ = 0;
1295 				*bp++ = 0;
1296 				*bp++ = 0;
1297 				*bp++ = 0;
1298 			}
1299 		}
1300 		*bp++ = 0;
1301 		*bp++ = 0x14;
1302 		*bp++ = 0xaa;
1303 		*bp++ = 0;
1304 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1305 		sectors >>= 2;
1306 		if (msf) {
1307 			*bp++ = 0;
1308 			lba_to_msf(bp, sectors);
1309 			bp += 3;
1310 		} else {
1311 			be32enc(bp, sectors);
1312 			bp += 4;
1313 		}
1314 		size = bp - buf;
1315 		be16enc(buf, size - 2);
1316 		if (len > size)
1317 			len = size;
1318 		write_prdt(p, slot, cfis, buf, len);
1319 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1320 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1321 		break;
1322 	}
1323 	case 1:
1324 	{
1325 		uint8_t buf[12];
1326 
1327 		memset(buf, 0, sizeof(buf));
1328 		buf[1] = 0xa;
1329 		buf[2] = 0x1;
1330 		buf[3] = 0x1;
1331 		if (len > sizeof(buf))
1332 			len = sizeof(buf);
1333 		write_prdt(p, slot, cfis, buf, len);
1334 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1335 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1336 		break;
1337 	}
1338 	case 2:
1339 	{
1340 		size_t size;
1341 		int msf;
1342 		uint64_t sectors;
1343 		uint8_t *bp, buf[50];
1344 
1345 		msf = (acmd[1] >> 1) & 1;
1346 		bp = buf + 2;
1347 		*bp++ = 1;
1348 		*bp++ = 1;
1349 
1350 		*bp++ = 1;
1351 		*bp++ = 0x14;
1352 		*bp++ = 0;
1353 		*bp++ = 0xa0;
1354 		*bp++ = 0;
1355 		*bp++ = 0;
1356 		*bp++ = 0;
1357 		*bp++ = 0;
1358 		*bp++ = 1;
1359 		*bp++ = 0;
1360 		*bp++ = 0;
1361 
1362 		*bp++ = 1;
1363 		*bp++ = 0x14;
1364 		*bp++ = 0;
1365 		*bp++ = 0xa1;
1366 		*bp++ = 0;
1367 		*bp++ = 0;
1368 		*bp++ = 0;
1369 		*bp++ = 0;
1370 		*bp++ = 1;
1371 		*bp++ = 0;
1372 		*bp++ = 0;
1373 
1374 		*bp++ = 1;
1375 		*bp++ = 0x14;
1376 		*bp++ = 0;
1377 		*bp++ = 0xa2;
1378 		*bp++ = 0;
1379 		*bp++ = 0;
1380 		*bp++ = 0;
1381 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1382 		sectors >>= 2;
1383 		if (msf) {
1384 			*bp++ = 0;
1385 			lba_to_msf(bp, sectors);
1386 			bp += 3;
1387 		} else {
1388 			be32enc(bp, sectors);
1389 			bp += 4;
1390 		}
1391 
1392 		*bp++ = 1;
1393 		*bp++ = 0x14;
1394 		*bp++ = 0;
1395 		*bp++ = 1;
1396 		*bp++ = 0;
1397 		*bp++ = 0;
1398 		*bp++ = 0;
1399 		if (msf) {
1400 			*bp++ = 0;
1401 			lba_to_msf(bp, 0);
1402 			bp += 3;
1403 		} else {
1404 			*bp++ = 0;
1405 			*bp++ = 0;
1406 			*bp++ = 0;
1407 			*bp++ = 0;
1408 		}
1409 
1410 		size = bp - buf;
1411 		be16enc(buf, size - 2);
1412 		if (len > size)
1413 			len = size;
1414 		write_prdt(p, slot, cfis, buf, len);
1415 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1416 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1417 		break;
1418 	}
1419 	default:
1420 	{
1421 		uint32_t tfd;
1422 
1423 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1424 		p->asc = 0x24;
1425 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1426 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1427 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1428 		break;
1429 	}
1430 	}
1431 }
1432 
1433 static void
1434 atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1435 {
1436 	uint8_t buf[16];
1437 
1438 	memset(buf, 0, sizeof(buf));
1439 	buf[3] = 8;
1440 
1441 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1442 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1443 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1444 }
1445 
1446 static void
1447 atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1448 {
1449 	struct ahci_ioreq *aior;
1450 	struct ahci_cmd_hdr *hdr;
1451 	struct ahci_prdt_entry *prdt;
1452 	struct blockif_req *breq;
1453 	uint8_t *acmd;
1454 	uint64_t lba;
1455 	uint32_t len;
1456 	int err;
1457 
1458 	acmd = cfis + 0x40;
1459 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1460 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1461 
1462 	lba = be32dec(acmd + 2);
1463 	if (acmd[0] == READ_10)
1464 		len = be16dec(acmd + 7);
1465 	else
1466 		len = be32dec(acmd + 6);
1467 	if (len == 0) {
1468 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1469 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1470 	}
1471 	lba *= 2048;
1472 	len *= 2048;
1473 
1474 	/*
1475 	 * Pull request off free list
1476 	 */
1477 	aior = STAILQ_FIRST(&p->iofhd);
1478 	assert(aior != NULL);
1479 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1480 	aior->cfis = cfis;
1481 	aior->slot = slot;
1482 	aior->len = len;
1483 	aior->done = done;
1484 	aior->readop = 1;
1485 	breq = &aior->io_req;
1486 	breq->br_offset = lba + done;
1487 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1488 
1489 	/* Mark this command in-flight. */
1490 	p->pending |= 1 << slot;
1491 
1492 	/* Stuff request onto busy list. */
1493 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1494 
1495 	err = blockif_read(p->bctx, breq);
1496 	assert(err == 0);
1497 }
1498 
1499 static void
1500 atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1501 {
1502 	uint8_t buf[64];
1503 	uint8_t *acmd;
1504 	unsigned int len;
1505 
1506 	acmd = cfis + 0x40;
1507 	len = acmd[4];
1508 	if (len > sizeof(buf))
1509 		len = sizeof(buf);
1510 	memset(buf, 0, len);
1511 	buf[0] = 0x70 | (1 << 7);
1512 	buf[2] = p->sense_key;
1513 	buf[7] = 10;
1514 	buf[12] = p->asc;
1515 	write_prdt(p, slot, cfis, buf, len);
1516 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1517 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1518 }
1519 
1520 static void
1521 atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1522 {
1523 	uint8_t *acmd = cfis + 0x40;
1524 	uint32_t tfd;
1525 
1526 	switch (acmd[4] & 3) {
1527 	case 0:
1528 	case 1:
1529 	case 3:
1530 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1531 		tfd = ATA_S_READY | ATA_S_DSC;
1532 		break;
1533 	case 2:
1534 		/* TODO eject media */
1535 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1536 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1537 		p->asc = 0x53;
1538 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1539 		break;
1540 	}
1541 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1542 }
1543 
1544 static void
1545 atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1546 {
1547 	uint8_t *acmd;
1548 	uint32_t tfd;
1549 	uint8_t pc, code;
1550 	unsigned int len;
1551 
1552 	acmd = cfis + 0x40;
1553 	len = be16dec(acmd + 7);
1554 	pc = acmd[2] >> 6;
1555 	code = acmd[2] & 0x3f;
1556 
1557 	switch (pc) {
1558 	case 0:
1559 		switch (code) {
1560 		case MODEPAGE_RW_ERROR_RECOVERY:
1561 		{
1562 			uint8_t buf[16];
1563 
1564 			if (len > sizeof(buf))
1565 				len = sizeof(buf);
1566 
1567 			memset(buf, 0, sizeof(buf));
1568 			be16enc(buf, 16 - 2);
1569 			buf[2] = 0x70;
1570 			buf[8] = 0x01;
1571 			buf[9] = 16 - 10;
1572 			buf[11] = 0x05;
1573 			write_prdt(p, slot, cfis, buf, len);
1574 			tfd = ATA_S_READY | ATA_S_DSC;
1575 			break;
1576 		}
1577 		case MODEPAGE_CD_CAPABILITIES:
1578 		{
1579 			uint8_t buf[30];
1580 
1581 			if (len > sizeof(buf))
1582 				len = sizeof(buf);
1583 
1584 			memset(buf, 0, sizeof(buf));
1585 			be16enc(buf, 30 - 2);
1586 			buf[2] = 0x70;
1587 			buf[8] = 0x2A;
1588 			buf[9] = 30 - 10;
1589 			buf[10] = 0x08;
1590 			buf[12] = 0x71;
1591 			be16enc(&buf[18], 2);
1592 			be16enc(&buf[20], 512);
1593 			write_prdt(p, slot, cfis, buf, len);
1594 			tfd = ATA_S_READY | ATA_S_DSC;
1595 			break;
1596 		}
1597 		default:
1598 			goto error;
1599 			break;
1600 		}
1601 		break;
1602 	case 3:
1603 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1604 		p->asc = 0x39;
1605 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1606 		break;
1607 error:
1608 	case 1:
1609 	case 2:
1610 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1611 		p->asc = 0x24;
1612 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1613 		break;
1614 	}
1615 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1616 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1617 }
1618 
1619 static void
1620 atapi_get_event_status_notification(struct ahci_port *p, int slot,
1621     uint8_t *cfis)
1622 {
1623 	uint8_t *acmd;
1624 	uint32_t tfd;
1625 
1626 	acmd = cfis + 0x40;
1627 
1628 	/* we don't support asynchronous operation */
1629 	if (!(acmd[1] & 1)) {
1630 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1631 		p->asc = 0x24;
1632 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1633 	} else {
1634 		uint8_t buf[8];
1635 		unsigned int len;
1636 
1637 		len = be16dec(acmd + 7);
1638 		if (len > sizeof(buf))
1639 			len = sizeof(buf);
1640 
1641 		memset(buf, 0, sizeof(buf));
1642 		be16enc(buf, 8 - 2);
1643 		buf[2] = 0x04;
1644 		buf[3] = 0x10;
1645 		buf[5] = 0x02;
1646 		write_prdt(p, slot, cfis, buf, len);
1647 		tfd = ATA_S_READY | ATA_S_DSC;
1648 	}
1649 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1650 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1651 }
1652 
1653 static void
1654 handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1655 {
1656 	uint8_t *acmd;
1657 
1658 	acmd = cfis + 0x40;
1659 
1660 #ifdef AHCI_DEBUG
1661 	{
1662 		int i;
1663 		DPRINTF("ACMD:");
1664 		for (i = 0; i < 16; i++)
1665 			DPRINTF("%02x ", acmd[i]);
1666 		DPRINTF("");
1667 	}
1668 #endif
1669 
1670 	switch (acmd[0]) {
1671 	case TEST_UNIT_READY:
1672 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1673 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1674 		break;
1675 	case INQUIRY:
1676 		atapi_inquiry(p, slot, cfis);
1677 		break;
1678 	case READ_CAPACITY:
1679 		atapi_read_capacity(p, slot, cfis);
1680 		break;
1681 	case PREVENT_ALLOW:
1682 		/* TODO */
1683 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1684 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1685 		break;
1686 	case READ_TOC:
1687 		atapi_read_toc(p, slot, cfis);
1688 		break;
1689 	case REPORT_LUNS:
1690 		atapi_report_luns(p, slot, cfis);
1691 		break;
1692 	case READ_10:
1693 	case READ_12:
1694 		atapi_read(p, slot, cfis, 0);
1695 		break;
1696 	case REQUEST_SENSE:
1697 		atapi_request_sense(p, slot, cfis);
1698 		break;
1699 	case START_STOP_UNIT:
1700 		atapi_start_stop_unit(p, slot, cfis);
1701 		break;
1702 	case MODE_SENSE_10:
1703 		atapi_mode_sense(p, slot, cfis);
1704 		break;
1705 	case GET_EVENT_STATUS_NOTIFICATION:
1706 		atapi_get_event_status_notification(p, slot, cfis);
1707 		break;
1708 	default:
1709 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1710 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1711 		p->asc = 0x20;
1712 		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1713 				ATA_S_READY | ATA_S_ERROR);
1714 		break;
1715 	}
1716 }
1717 
1718 static void
1719 ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1720 {
1721 
1722 	p->tfd |= ATA_S_BUSY;
1723 	switch (cfis[2]) {
1724 	case ATA_ATA_IDENTIFY:
1725 		handle_identify(p, slot, cfis);
1726 		break;
1727 	case ATA_SETFEATURES:
1728 	{
1729 		switch (cfis[3]) {
1730 		case ATA_SF_ENAB_SATA_SF:
1731 			switch (cfis[12]) {
1732 			case ATA_SATA_SF_AN:
1733 				p->tfd = ATA_S_DSC | ATA_S_READY;
1734 				break;
1735 			default:
1736 				p->tfd = ATA_S_ERROR | ATA_S_READY;
1737 				p->tfd |= (ATA_ERROR_ABORT << 8);
1738 				break;
1739 			}
1740 			break;
1741 		case ATA_SF_ENAB_WCACHE:
1742 		case ATA_SF_DIS_WCACHE:
1743 		case ATA_SF_ENAB_RCACHE:
1744 		case ATA_SF_DIS_RCACHE:
1745 			p->tfd = ATA_S_DSC | ATA_S_READY;
1746 			break;
1747 		case ATA_SF_SETXFER:
1748 		{
1749 			switch (cfis[12] & 0xf8) {
1750 			case ATA_PIO:
1751 			case ATA_PIO0:
1752 				break;
1753 			case ATA_WDMA0:
1754 			case ATA_UDMA0:
1755 				p->xfermode = (cfis[12] & 0x7);
1756 				break;
1757 			}
1758 			p->tfd = ATA_S_DSC | ATA_S_READY;
1759 			break;
1760 		}
1761 		default:
1762 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1763 			p->tfd |= (ATA_ERROR_ABORT << 8);
1764 			break;
1765 		}
1766 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1767 		break;
1768 	}
1769 	case ATA_SET_MULTI:
1770 		if (cfis[12] != 0 &&
1771 			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1772 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1773 			p->tfd |= (ATA_ERROR_ABORT << 8);
1774 		} else {
1775 			p->mult_sectors = cfis[12];
1776 			p->tfd = ATA_S_DSC | ATA_S_READY;
1777 		}
1778 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1779 		break;
1780 	case ATA_READ:
1781 	case ATA_WRITE:
1782 	case ATA_READ48:
1783 	case ATA_WRITE48:
1784 	case ATA_READ_MUL:
1785 	case ATA_WRITE_MUL:
1786 	case ATA_READ_MUL48:
1787 	case ATA_WRITE_MUL48:
1788 	case ATA_READ_DMA:
1789 	case ATA_WRITE_DMA:
1790 	case ATA_READ_DMA48:
1791 	case ATA_WRITE_DMA48:
1792 	case ATA_READ_FPDMA_QUEUED:
1793 	case ATA_WRITE_FPDMA_QUEUED:
1794 		ahci_handle_rw(p, slot, cfis, 0);
1795 		break;
1796 	case ATA_FLUSHCACHE:
1797 	case ATA_FLUSHCACHE48:
1798 		ahci_handle_flush(p, slot, cfis);
1799 		break;
1800 	case ATA_DATA_SET_MANAGEMENT:
1801 		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1802 		    cfis[13] == 0 && cfis[12] == 1) {
1803 			ahci_handle_dsm_trim(p, slot, cfis);
1804 			break;
1805 		}
1806 		ahci_write_fis_d2h(p, slot, cfis,
1807 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1808 		break;
1809 	case ATA_SEND_FPDMA_QUEUED:
1810 		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1811 		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1812 		    cfis[11] == 0 && cfis[3] == 1) {
1813 			ahci_handle_dsm_trim(p, slot, cfis);
1814 			break;
1815 		}
1816 		ahci_write_fis_d2h(p, slot, cfis,
1817 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1818 		break;
1819 	case ATA_READ_LOG_EXT:
1820 	case ATA_READ_LOG_DMA_EXT:
1821 		ahci_handle_read_log(p, slot, cfis);
1822 		break;
1823 	case ATA_SECURITY_FREEZE_LOCK:
1824 	case ATA_SMART_CMD:
1825 	case ATA_NOP:
1826 		ahci_write_fis_d2h(p, slot, cfis,
1827 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1828 		break;
1829 	case ATA_CHECK_POWER_MODE:
1830 		cfis[12] = 0xff;	/* always on */
1831 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1832 		break;
1833 	case ATA_STANDBY_CMD:
1834 	case ATA_STANDBY_IMMEDIATE:
1835 	case ATA_IDLE_CMD:
1836 	case ATA_IDLE_IMMEDIATE:
1837 	case ATA_SLEEP:
1838 	case ATA_READ_VERIFY:
1839 	case ATA_READ_VERIFY48:
1840 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1841 		break;
1842 	case ATA_ATAPI_IDENTIFY:
1843 		handle_atapi_identify(p, slot, cfis);
1844 		break;
1845 	case ATA_PACKET_CMD:
1846 		if (!p->atapi) {
1847 			ahci_write_fis_d2h(p, slot, cfis,
1848 			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1849 		} else
1850 			handle_packet_cmd(p, slot, cfis);
1851 		break;
1852 	default:
1853 		EPRINTLN("Unsupported cmd:%02x", cfis[2]);
1854 		ahci_write_fis_d2h(p, slot, cfis,
1855 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1856 		break;
1857 	}
1858 }
1859 
1860 static void
1861 ahci_handle_slot(struct ahci_port *p, int slot)
1862 {
1863 	struct ahci_cmd_hdr *hdr;
1864 #ifdef AHCI_DEBUG
1865 	struct ahci_prdt_entry *prdt;
1866 #endif
1867 	struct pci_ahci_softc *sc;
1868 	uint8_t *cfis;
1869 #ifdef AHCI_DEBUG
1870 	int cfl, i;
1871 #endif
1872 
1873 	sc = p->pr_sc;
1874 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1875 #ifdef AHCI_DEBUG
1876 	cfl = (hdr->flags & 0x1f) * 4;
1877 #endif
1878 	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1879 			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1880 #ifdef AHCI_DEBUG
1881 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1882 
1883 	DPRINTF("cfis:");
1884 	for (i = 0; i < cfl; i++) {
1885 		if (i % 10 == 0)
1886 			DPRINTF("");
1887 		DPRINTF("%02x ", cfis[i]);
1888 	}
1889 	DPRINTF("");
1890 
1891 	for (i = 0; i < hdr->prdtl; i++) {
1892 		DPRINTF("%d@%08"PRIx64"", prdt->dbc & 0x3fffff, prdt->dba);
1893 		prdt++;
1894 	}
1895 #endif
1896 
1897 	if (cfis[0] != FIS_TYPE_REGH2D) {
1898 		EPRINTLN("Not a H2D FIS:%02x", cfis[0]);
1899 		return;
1900 	}
1901 
1902 	if (cfis[1] & 0x80) {
1903 		ahci_handle_cmd(p, slot, cfis);
1904 	} else {
1905 		if (cfis[15] & (1 << 2))
1906 			p->reset = 1;
1907 		else if (p->reset) {
1908 			p->reset = 0;
1909 			ahci_port_reset(p);
1910 		}
1911 		p->ci &= ~(1 << slot);
1912 	}
1913 }
1914 
1915 static void
1916 ahci_handle_port(struct ahci_port *p)
1917 {
1918 
1919 	if (!(p->cmd & AHCI_P_CMD_ST))
1920 		return;
1921 
1922 	/*
1923 	 * Search for any new commands to issue ignoring those that
1924 	 * are already in-flight.  Stop if device is busy or in error.
1925 	 */
1926 	for (; (p->ci & ~p->pending) != 0; p->ccs = ((p->ccs + 1) & 31)) {
1927 		if ((p->tfd & (ATA_S_BUSY | ATA_S_DRQ)) != 0)
1928 			break;
1929 		if (p->waitforclear)
1930 			break;
1931 		if ((p->ci & ~p->pending & (1 << p->ccs)) != 0) {
1932 			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1933 			p->cmd |= p->ccs << AHCI_P_CMD_CCS_SHIFT;
1934 			ahci_handle_slot(p, p->ccs);
1935 		}
1936 	}
1937 }
1938 
1939 /*
1940  * blockif callback routine - this runs in the context of the blockif
1941  * i/o thread, so the mutex needs to be acquired.
1942  */
1943 static void
1944 ata_ioreq_cb(struct blockif_req *br, int err)
1945 {
1946 	struct ahci_cmd_hdr *hdr;
1947 	struct ahci_ioreq *aior;
1948 	struct ahci_port *p;
1949 	struct pci_ahci_softc *sc;
1950 	uint32_t tfd;
1951 	uint8_t *cfis, *dsm;
1952 	int slot, ncq;
1953 
1954 	DPRINTF("%s %d", __func__, err);
1955 
1956 	ncq = 0;
1957 	aior = br->br_param;
1958 	p = aior->io_pr;
1959 	cfis = aior->cfis;
1960 	slot = aior->slot;
1961 	sc = p->pr_sc;
1962 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1963 
1964 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1965 	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1966 	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1967 		ncq = 1;
1968 	dsm = aior->dsm;
1969 	aior->dsm = NULL;
1970 
1971 	pthread_mutex_lock(&sc->mtx);
1972 
1973 	/*
1974 	 * Delete the blockif request from the busy list
1975 	 */
1976 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1977 
1978 	/*
1979 	 * Move the blockif request back to the free list
1980 	 */
1981 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1982 
1983 	if (!err)
1984 		hdr->prdbc = aior->done;
1985 
1986 	if (!err && aior->more) {
1987 		if (dsm != NULL)
1988 			ahci_handle_next_trim(p, slot, cfis, dsm,
1989 			    aior->len, aior->done);
1990 		else
1991 			ahci_handle_rw(p, slot, cfis, aior->done);
1992 		goto out;
1993 	}
1994 
1995 	if (!err)
1996 		tfd = ATA_S_READY | ATA_S_DSC;
1997 	else
1998 		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1999 	if (ncq)
2000 		ahci_write_fis_sdb(p, slot, cfis, tfd);
2001 	else
2002 		ahci_write_fis_d2h(p, slot, cfis, tfd);
2003 
2004 	/*
2005 	 * This command is now complete.
2006 	 */
2007 	p->pending &= ~(1 << slot);
2008 
2009 	ahci_check_stopped(p);
2010 	ahci_handle_port(p);
2011 	free(dsm);
2012 out:
2013 	pthread_mutex_unlock(&sc->mtx);
2014 	DPRINTF("%s exit", __func__);
2015 }
2016 
2017 static void
2018 atapi_ioreq_cb(struct blockif_req *br, int err)
2019 {
2020 	struct ahci_cmd_hdr *hdr;
2021 	struct ahci_ioreq *aior;
2022 	struct ahci_port *p;
2023 	struct pci_ahci_softc *sc;
2024 	uint8_t *cfis;
2025 	uint32_t tfd;
2026 	int slot;
2027 
2028 	DPRINTF("%s %d", __func__, err);
2029 
2030 	aior = br->br_param;
2031 	p = aior->io_pr;
2032 	cfis = aior->cfis;
2033 	slot = aior->slot;
2034 	sc = p->pr_sc;
2035 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
2036 
2037 	pthread_mutex_lock(&sc->mtx);
2038 
2039 	/*
2040 	 * Delete the blockif request from the busy list
2041 	 */
2042 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
2043 
2044 	/*
2045 	 * Move the blockif request back to the free list
2046 	 */
2047 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
2048 
2049 	if (!err)
2050 		hdr->prdbc = aior->done;
2051 
2052 	if (!err && aior->more) {
2053 		atapi_read(p, slot, cfis, aior->done);
2054 		goto out;
2055 	}
2056 
2057 	if (!err) {
2058 		tfd = ATA_S_READY | ATA_S_DSC;
2059 	} else {
2060 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
2061 		p->asc = 0x21;
2062 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
2063 	}
2064 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
2065 	ahci_write_fis_d2h(p, slot, cfis, tfd);
2066 
2067 	/*
2068 	 * This command is now complete.
2069 	 */
2070 	p->pending &= ~(1 << slot);
2071 
2072 	ahci_check_stopped(p);
2073 	ahci_handle_port(p);
2074 out:
2075 	pthread_mutex_unlock(&sc->mtx);
2076 	DPRINTF("%s exit", __func__);
2077 }
2078 
2079 static void
2080 pci_ahci_ioreq_init(struct ahci_port *pr)
2081 {
2082 	struct ahci_ioreq *vr;
2083 	int i;
2084 
2085 	pr->ioqsz = blockif_queuesz(pr->bctx);
2086 	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
2087 	STAILQ_INIT(&pr->iofhd);
2088 
2089 	/*
2090 	 * Add all i/o request entries to the free queue
2091 	 */
2092 	for (i = 0; i < pr->ioqsz; i++) {
2093 		vr = &pr->ioreq[i];
2094 		vr->io_pr = pr;
2095 		if (!pr->atapi)
2096 			vr->io_req.br_callback = ata_ioreq_cb;
2097 		else
2098 			vr->io_req.br_callback = atapi_ioreq_cb;
2099 		vr->io_req.br_param = vr;
2100 		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
2101 	}
2102 
2103 	TAILQ_INIT(&pr->iobhd);
2104 }
2105 
2106 static void
2107 pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2108 {
2109 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2110 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2111 	struct ahci_port *p = &sc->port[port];
2112 
2113 	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"",
2114 		port, offset, value);
2115 
2116 	switch (offset) {
2117 	case AHCI_P_CLB:
2118 		p->clb = value;
2119 		break;
2120 	case AHCI_P_CLBU:
2121 		p->clbu = value;
2122 		break;
2123 	case AHCI_P_FB:
2124 		p->fb = value;
2125 		break;
2126 	case AHCI_P_FBU:
2127 		p->fbu = value;
2128 		break;
2129 	case AHCI_P_IS:
2130 		p->is &= ~value;
2131 		ahci_port_intr(p);
2132 		break;
2133 	case AHCI_P_IE:
2134 		p->ie = value & 0xFDC000FF;
2135 		ahci_port_intr(p);
2136 		break;
2137 	case AHCI_P_CMD:
2138 	{
2139 		p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2140 		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2141 		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2142 		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK);
2143 		p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2144 		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2145 		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2146 		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value;
2147 
2148 		if (!(value & AHCI_P_CMD_ST)) {
2149 			ahci_port_stop(p);
2150 		} else {
2151 			uint64_t clb;
2152 
2153 			p->cmd |= AHCI_P_CMD_CR;
2154 			clb = (uint64_t)p->clbu << 32 | p->clb;
2155 			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
2156 					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
2157 		}
2158 
2159 		if (value & AHCI_P_CMD_FRE) {
2160 			uint64_t fb;
2161 
2162 			p->cmd |= AHCI_P_CMD_FR;
2163 			fb = (uint64_t)p->fbu << 32 | p->fb;
2164 			/* we don't support FBSCP, so rfis size is 256Bytes */
2165 			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
2166 		} else {
2167 			p->cmd &= ~AHCI_P_CMD_FR;
2168 		}
2169 
2170 		if (value & AHCI_P_CMD_CLO) {
2171 			p->tfd &= ~(ATA_S_BUSY | ATA_S_DRQ);
2172 			p->cmd &= ~AHCI_P_CMD_CLO;
2173 		}
2174 
2175 		if (value & AHCI_P_CMD_ICC_MASK) {
2176 			p->cmd &= ~AHCI_P_CMD_ICC_MASK;
2177 		}
2178 
2179 		ahci_handle_port(p);
2180 		break;
2181 	}
2182 	case AHCI_P_TFD:
2183 	case AHCI_P_SIG:
2184 	case AHCI_P_SSTS:
2185 		EPRINTLN("pci_ahci_port: read only registers 0x%"PRIx64"", offset);
2186 		break;
2187 	case AHCI_P_SCTL:
2188 		p->sctl = value;
2189 		if (!(p->cmd & AHCI_P_CMD_ST)) {
2190 			if (value & ATA_SC_DET_RESET)
2191 				ahci_port_reset(p);
2192 		}
2193 		break;
2194 	case AHCI_P_SERR:
2195 		p->serr &= ~value;
2196 		break;
2197 	case AHCI_P_SACT:
2198 		p->sact |= value;
2199 		break;
2200 	case AHCI_P_CI:
2201 		p->ci |= value;
2202 		ahci_handle_port(p);
2203 		break;
2204 	case AHCI_P_SNTF:
2205 	case AHCI_P_FBS:
2206 	default:
2207 		break;
2208 	}
2209 }
2210 
2211 static void
2212 pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2213 {
2214 	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"",
2215 		offset, value);
2216 
2217 	switch (offset) {
2218 	case AHCI_CAP:
2219 	case AHCI_PI:
2220 	case AHCI_VS:
2221 	case AHCI_CAP2:
2222 		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"", offset);
2223 		break;
2224 	case AHCI_GHC:
2225 		if (value & AHCI_GHC_HR) {
2226 			ahci_reset(sc);
2227 			break;
2228 		}
2229 		if (value & AHCI_GHC_IE)
2230 			sc->ghc |= AHCI_GHC_IE;
2231 		else
2232 			sc->ghc &= ~AHCI_GHC_IE;
2233 		ahci_generate_intr(sc, 0xffffffff);
2234 		break;
2235 	case AHCI_IS:
2236 		sc->is &= ~value;
2237 		ahci_generate_intr(sc, value);
2238 		break;
2239 	default:
2240 		break;
2241 	}
2242 }
2243 
2244 static void
2245 pci_ahci_write(struct pci_devinst *pi, int baridx, uint64_t offset, int size,
2246     uint64_t value)
2247 {
2248 	struct pci_ahci_softc *sc = pi->pi_arg;
2249 
2250 	assert(baridx == 5);
2251 	assert((offset % 4) == 0 && size == 4);
2252 
2253 	pthread_mutex_lock(&sc->mtx);
2254 
2255 	if (offset < AHCI_OFFSET)
2256 		pci_ahci_host_write(sc, offset, value);
2257 	else if (offset < (uint64_t)AHCI_OFFSET + sc->ports * AHCI_STEP)
2258 		pci_ahci_port_write(sc, offset, value);
2259 	else
2260 		EPRINTLN("pci_ahci: unknown i/o write offset 0x%"PRIx64"", offset);
2261 
2262 	pthread_mutex_unlock(&sc->mtx);
2263 }
2264 
2265 static uint64_t
2266 pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2267 {
2268 	uint32_t value;
2269 
2270 	switch (offset) {
2271 	case AHCI_CAP:
2272 	case AHCI_GHC:
2273 	case AHCI_IS:
2274 	case AHCI_PI:
2275 	case AHCI_VS:
2276 	case AHCI_CCCC:
2277 	case AHCI_CCCP:
2278 	case AHCI_EM_LOC:
2279 	case AHCI_EM_CTL:
2280 	case AHCI_CAP2:
2281 	{
2282 		uint32_t *p = &sc->cap;
2283 		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2284 		value = *p;
2285 		break;
2286 	}
2287 	default:
2288 		value = 0;
2289 		break;
2290 	}
2291 	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x",
2292 		offset, value);
2293 
2294 	return (value);
2295 }
2296 
2297 static uint64_t
2298 pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2299 {
2300 	uint32_t value;
2301 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2302 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2303 
2304 	switch (offset) {
2305 	case AHCI_P_CLB:
2306 	case AHCI_P_CLBU:
2307 	case AHCI_P_FB:
2308 	case AHCI_P_FBU:
2309 	case AHCI_P_IS:
2310 	case AHCI_P_IE:
2311 	case AHCI_P_CMD:
2312 	case AHCI_P_TFD:
2313 	case AHCI_P_SIG:
2314 	case AHCI_P_SSTS:
2315 	case AHCI_P_SCTL:
2316 	case AHCI_P_SERR:
2317 	case AHCI_P_SACT:
2318 	case AHCI_P_CI:
2319 	case AHCI_P_SNTF:
2320 	case AHCI_P_FBS:
2321 	{
2322 		uint32_t *p= &sc->port[port].clb;
2323 		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2324 		value = *p;
2325 		break;
2326 	}
2327 	default:
2328 		value = 0;
2329 		break;
2330 	}
2331 
2332 	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x",
2333 		port, offset, value);
2334 
2335 	return value;
2336 }
2337 
2338 static uint64_t
2339 pci_ahci_read(struct pci_devinst *pi, int baridx, uint64_t regoff, int size)
2340 {
2341 	struct pci_ahci_softc *sc = pi->pi_arg;
2342 	uint64_t offset;
2343 	uint32_t value;
2344 
2345 	assert(baridx == 5);
2346 	assert(size == 1 || size == 2 || size == 4);
2347 	assert((regoff & (size - 1)) == 0);
2348 
2349 	pthread_mutex_lock(&sc->mtx);
2350 
2351 	offset = regoff & ~0x3;	    /* round down to a multiple of 4 bytes */
2352 	if (offset < AHCI_OFFSET)
2353 		value = pci_ahci_host_read(sc, offset);
2354 	else if (offset < (uint64_t)AHCI_OFFSET + sc->ports * AHCI_STEP)
2355 		value = pci_ahci_port_read(sc, offset);
2356 	else {
2357 		value = 0;
2358 		EPRINTLN("pci_ahci: unknown i/o read offset 0x%"PRIx64"",
2359 		    regoff);
2360 	}
2361 	value >>= 8 * (regoff & 0x3);
2362 
2363 	pthread_mutex_unlock(&sc->mtx);
2364 
2365 	return (value);
2366 }
2367 
2368 /*
2369  * Each AHCI controller has a "port" node which contains nodes for
2370  * each port named after the decimal number of the port (no leading
2371  * zeroes).  Port nodes contain a "type" ("hd" or "cd"), as well as
2372  * options for blockif.  For example:
2373  *
2374  * pci.0.1.0
2375  *          .device="ahci"
2376  *          .port
2377  *               .0
2378  *                 .type="hd"
2379  *                 .path="/path/to/image"
2380  */
2381 static int
2382 pci_ahci_legacy_config_port(nvlist_t *nvl, int port, const char *type,
2383     const char *opts)
2384 {
2385 	char node_name[sizeof("XX")];
2386 	nvlist_t *port_nvl;
2387 
2388 	snprintf(node_name, sizeof(node_name), "%d", port);
2389 	port_nvl = create_relative_config_node(nvl, node_name);
2390 	set_config_value_node(port_nvl, "type", type);
2391 	return (blockif_legacy_config(port_nvl, opts));
2392 }
2393 
2394 static int
2395 pci_ahci_legacy_config(nvlist_t *nvl, const char *opts)
2396 {
2397 	nvlist_t *ports_nvl;
2398 	const char *type;
2399 	char *next, *next2, *str, *tofree;
2400 	int p, ret;
2401 
2402 	if (opts == NULL)
2403 		return (0);
2404 
2405 	ports_nvl = create_relative_config_node(nvl, "port");
2406 	ret = 1;
2407 	tofree = str = strdup(opts);
2408 	for (p = 0; p < MAX_PORTS && str != NULL; p++, str = next) {
2409 		/* Identify and cut off type of present port. */
2410 		if (strncmp(str, "hd:", 3) == 0) {
2411 			type = "hd";
2412 			str += 3;
2413 		} else if (strncmp(str, "cd:", 3) == 0) {
2414 			type = "cd";
2415 			str += 3;
2416 		} else
2417 			type = NULL;
2418 
2419 		/* Find and cut off the next port options. */
2420 		next = strstr(str, ",hd:");
2421 		next2 = strstr(str, ",cd:");
2422 		if (next == NULL || (next2 != NULL && next2 < next))
2423 			next = next2;
2424 		if (next != NULL) {
2425 			next[0] = 0;
2426 			next++;
2427 		}
2428 
2429 		if (str[0] == 0)
2430 			continue;
2431 
2432 		if (type == NULL) {
2433 			EPRINTLN("Missing or invalid type for port %d: \"%s\"",
2434 			    p, str);
2435 			goto out;
2436 		}
2437 
2438 		if (pci_ahci_legacy_config_port(ports_nvl, p, type, str) != 0)
2439 			goto out;
2440 	}
2441 	ret = 0;
2442 out:
2443 	free(tofree);
2444 	return (ret);
2445 }
2446 
2447 static int
2448 pci_ahci_cd_legacy_config(nvlist_t *nvl, const char *opts)
2449 {
2450 	nvlist_t *ports_nvl;
2451 
2452 	ports_nvl = create_relative_config_node(nvl, "port");
2453 	return (pci_ahci_legacy_config_port(ports_nvl, 0, "cd", opts));
2454 }
2455 
2456 static int
2457 pci_ahci_hd_legacy_config(nvlist_t *nvl, const char *opts)
2458 {
2459 	nvlist_t *ports_nvl;
2460 
2461 	ports_nvl = create_relative_config_node(nvl, "port");
2462 	return (pci_ahci_legacy_config_port(ports_nvl, 0, "hd", opts));
2463 }
2464 
2465 static int
2466 pci_ahci_init(struct pci_devinst *pi, nvlist_t *nvl)
2467 {
2468 	char bident[sizeof("XXX:XXX:XXX")];
2469 	char node_name[sizeof("XX")];
2470 	struct blockif_ctxt *bctxt;
2471 	struct pci_ahci_softc *sc;
2472 	int atapi, ret, slots, p;
2473 	MD5_CTX mdctx;
2474 	u_char digest[16];
2475 	const char *path, *type, *value;
2476 	nvlist_t *ports_nvl, *port_nvl;
2477 
2478 	ret = 0;
2479 
2480 #ifdef AHCI_DEBUG
2481 	dbg = fopen("/tmp/log", "w+");
2482 #endif
2483 
2484 	sc = calloc(1, sizeof(struct pci_ahci_softc));
2485 	pi->pi_arg = sc;
2486 	sc->asc_pi = pi;
2487 	pthread_mutex_init(&sc->mtx, NULL);
2488 	sc->ports = 0;
2489 	sc->pi = 0;
2490 	slots = 32;
2491 
2492 	ports_nvl = find_relative_config_node(nvl, "port");
2493 	for (p = 0; ports_nvl != NULL && p < MAX_PORTS; p++) {
2494 		struct ata_params *ata_ident = &sc->port[p].ata_ident;
2495 		char ident[AHCI_PORT_IDENT];
2496 
2497 		snprintf(node_name, sizeof(node_name), "%d", p);
2498 		port_nvl = find_relative_config_node(ports_nvl, node_name);
2499 		if (port_nvl == NULL)
2500 			continue;
2501 
2502 		type = get_config_value_node(port_nvl, "type");
2503 		if (type == NULL)
2504 			continue;
2505 
2506 		if (strcmp(type, "hd") == 0)
2507 			atapi = 0;
2508 		else
2509 			atapi = 1;
2510 
2511 		/*
2512 		 * Attempt to open the backing image. Use the PCI slot/func
2513 		 * and the port number for the identifier string.
2514 		 */
2515 		snprintf(bident, sizeof(bident), "%u:%u:%u", pi->pi_slot,
2516 		    pi->pi_func, p);
2517 
2518 		bctxt = blockif_open(port_nvl, bident);
2519 		if (bctxt == NULL) {
2520 			sc->ports = p;
2521 			ret = 1;
2522 			goto open_fail;
2523 		}
2524 
2525 		ret = blockif_add_boot_device(pi, bctxt);
2526 		if (ret) {
2527 			sc->ports = p;
2528 			goto open_fail;
2529 		}
2530 
2531 		sc->port[p].bctx = bctxt;
2532 		sc->port[p].pr_sc = sc;
2533 		sc->port[p].port = p;
2534 		sc->port[p].atapi = atapi;
2535 
2536 		/*
2537 		 * Create an identifier for the backing file.
2538 		 * Use parts of the md5 sum of the filename
2539 		 */
2540 		path = get_config_value_node(port_nvl, "path");
2541 		MD5Init(&mdctx);
2542 		MD5Update(&mdctx, path, strlen(path));
2543 		MD5Final(digest, &mdctx);
2544 		snprintf(ident, AHCI_PORT_IDENT,
2545 			"BHYVE-%02X%02X-%02X%02X-%02X%02X",
2546 			digest[0], digest[1], digest[2], digest[3], digest[4],
2547 			digest[5]);
2548 
2549 		memset(ata_ident, 0, sizeof(struct ata_params));
2550 		ata_string((uint8_t*)&ata_ident->serial, ident, 20);
2551 		ata_string((uint8_t*)&ata_ident->revision, "001", 8);
2552 		if (atapi)
2553 			ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DVD ROM", 40);
2554 		else
2555 			ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DISK", 40);
2556 		value = get_config_value_node(port_nvl, "nmrr");
2557 		if (value != NULL)
2558 			ata_ident->media_rotation_rate = atoi(value);
2559 		value = get_config_value_node(port_nvl, "ser");
2560 		if (value != NULL)
2561 			ata_string((uint8_t*)(&ata_ident->serial), value, 20);
2562 		value = get_config_value_node(port_nvl, "rev");
2563 		if (value != NULL)
2564 			ata_string((uint8_t*)(&ata_ident->revision), value, 8);
2565 		value = get_config_value_node(port_nvl, "model");
2566 		if (value != NULL)
2567 			ata_string((uint8_t*)(&ata_ident->model), value, 40);
2568 		ata_identify_init(&sc->port[p], atapi);
2569 
2570 		/*
2571 		 * Allocate blockif request structures and add them
2572 		 * to the free list
2573 		 */
2574 		pci_ahci_ioreq_init(&sc->port[p]);
2575 
2576 		sc->pi |= (1 << p);
2577 		if (sc->port[p].ioqsz < slots)
2578 			slots = sc->port[p].ioqsz;
2579 	}
2580 	sc->ports = p;
2581 
2582 	/* Intel ICH8 AHCI */
2583 	--slots;
2584 	if (sc->ports < DEF_PORTS)
2585 		sc->ports = DEF_PORTS;
2586 	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2587 	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2588 	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2589 	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2590 	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2591 
2592 	sc->vs = 0x10300;
2593 	sc->cap2 = AHCI_CAP2_APST;
2594 	ahci_reset(sc);
2595 
2596 	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2597 	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2598 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2599 	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2600 	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2601 	p = MIN(sc->ports, 16);
2602 	p = flsl(p) - ((p & (p - 1)) ? 0 : 1);
2603 	pci_emul_add_msicap(pi, 1 << p);
2604 	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2605 	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2606 
2607 	pci_lintr_request(pi);
2608 
2609 open_fail:
2610 	if (ret) {
2611 		for (p = 0; p < sc->ports; p++) {
2612 			if (sc->port[p].bctx != NULL)
2613 				blockif_close(sc->port[p].bctx);
2614 		}
2615 		free(sc);
2616 	}
2617 
2618 	return (ret);
2619 }
2620 
2621 #ifdef BHYVE_SNAPSHOT
2622 static int
2623 pci_ahci_snapshot(struct vm_snapshot_meta *meta)
2624 {
2625 	int i, ret;
2626 	void *bctx;
2627 	struct pci_devinst *pi;
2628 	struct pci_ahci_softc *sc;
2629 	struct ahci_port *port;
2630 
2631 	pi = meta->dev_data;
2632 	sc = pi->pi_arg;
2633 
2634 	/* TODO: add mtx lock/unlock */
2635 
2636 	SNAPSHOT_VAR_OR_LEAVE(sc->ports, meta, ret, done);
2637 	SNAPSHOT_VAR_OR_LEAVE(sc->cap, meta, ret, done);
2638 	SNAPSHOT_VAR_OR_LEAVE(sc->ghc, meta, ret, done);
2639 	SNAPSHOT_VAR_OR_LEAVE(sc->is, meta, ret, done);
2640 	SNAPSHOT_VAR_OR_LEAVE(sc->pi, meta, ret, done);
2641 	SNAPSHOT_VAR_OR_LEAVE(sc->vs, meta, ret, done);
2642 	SNAPSHOT_VAR_OR_LEAVE(sc->ccc_ctl, meta, ret, done);
2643 	SNAPSHOT_VAR_OR_LEAVE(sc->ccc_pts, meta, ret, done);
2644 	SNAPSHOT_VAR_OR_LEAVE(sc->em_loc, meta, ret, done);
2645 	SNAPSHOT_VAR_OR_LEAVE(sc->em_ctl, meta, ret, done);
2646 	SNAPSHOT_VAR_OR_LEAVE(sc->cap2, meta, ret, done);
2647 	SNAPSHOT_VAR_OR_LEAVE(sc->bohc, meta, ret, done);
2648 	SNAPSHOT_VAR_OR_LEAVE(sc->lintr, meta, ret, done);
2649 
2650 	for (i = 0; i < MAX_PORTS; i++) {
2651 		port = &sc->port[i];
2652 
2653 		if (meta->op == VM_SNAPSHOT_SAVE)
2654 			bctx = port->bctx;
2655 
2656 		SNAPSHOT_VAR_OR_LEAVE(bctx, meta, ret, done);
2657 		SNAPSHOT_VAR_OR_LEAVE(port->port, meta, ret, done);
2658 
2659 		/* Mostly for restore; save is ensured by the lines above. */
2660 		if (((bctx == NULL) && (port->bctx != NULL)) ||
2661 		    ((bctx != NULL) && (port->bctx == NULL))) {
2662 			EPRINTLN("%s: ports not matching", __func__);
2663 			ret = EINVAL;
2664 			goto done;
2665 		}
2666 
2667 		if (port->bctx == NULL)
2668 			continue;
2669 
2670 		if (port->port != i) {
2671 			EPRINTLN("%s: ports not matching: "
2672 			    "actual: %d expected: %d", __func__, port->port, i);
2673 			ret = EINVAL;
2674 			goto done;
2675 		}
2676 
2677 		SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(pi->pi_vmctx, port->cmd_lst,
2678 			AHCI_CL_SIZE * AHCI_MAX_SLOTS, false, meta, ret, done);
2679 		SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(pi->pi_vmctx, port->rfis, 256,
2680 		    false, meta, ret, done);
2681 
2682 		SNAPSHOT_VAR_OR_LEAVE(port->ata_ident, meta, ret, done);
2683 		SNAPSHOT_VAR_OR_LEAVE(port->atapi, meta, ret, done);
2684 		SNAPSHOT_VAR_OR_LEAVE(port->reset, meta, ret, done);
2685 		SNAPSHOT_VAR_OR_LEAVE(port->waitforclear, meta, ret, done);
2686 		SNAPSHOT_VAR_OR_LEAVE(port->mult_sectors, meta, ret, done);
2687 		SNAPSHOT_VAR_OR_LEAVE(port->xfermode, meta, ret, done);
2688 		SNAPSHOT_VAR_OR_LEAVE(port->err_cfis, meta, ret, done);
2689 		SNAPSHOT_VAR_OR_LEAVE(port->sense_key, meta, ret, done);
2690 		SNAPSHOT_VAR_OR_LEAVE(port->asc, meta, ret, done);
2691 		SNAPSHOT_VAR_OR_LEAVE(port->ccs, meta, ret, done);
2692 		SNAPSHOT_VAR_OR_LEAVE(port->pending, meta, ret, done);
2693 
2694 		SNAPSHOT_VAR_OR_LEAVE(port->clb, meta, ret, done);
2695 		SNAPSHOT_VAR_OR_LEAVE(port->clbu, meta, ret, done);
2696 		SNAPSHOT_VAR_OR_LEAVE(port->fb, meta, ret, done);
2697 		SNAPSHOT_VAR_OR_LEAVE(port->fbu, meta, ret, done);
2698 		SNAPSHOT_VAR_OR_LEAVE(port->ie, meta, ret, done);
2699 		SNAPSHOT_VAR_OR_LEAVE(port->cmd, meta, ret, done);
2700 		SNAPSHOT_VAR_OR_LEAVE(port->unused0, meta, ret, done);
2701 		SNAPSHOT_VAR_OR_LEAVE(port->tfd, meta, ret, done);
2702 		SNAPSHOT_VAR_OR_LEAVE(port->sig, meta, ret, done);
2703 		SNAPSHOT_VAR_OR_LEAVE(port->ssts, meta, ret, done);
2704 		SNAPSHOT_VAR_OR_LEAVE(port->sctl, meta, ret, done);
2705 		SNAPSHOT_VAR_OR_LEAVE(port->serr, meta, ret, done);
2706 		SNAPSHOT_VAR_OR_LEAVE(port->sact, meta, ret, done);
2707 		SNAPSHOT_VAR_OR_LEAVE(port->ci, meta, ret, done);
2708 		SNAPSHOT_VAR_OR_LEAVE(port->sntf, meta, ret, done);
2709 		SNAPSHOT_VAR_OR_LEAVE(port->fbs, meta, ret, done);
2710 		SNAPSHOT_VAR_OR_LEAVE(port->ioqsz, meta, ret, done);
2711 
2712 		assert(TAILQ_EMPTY(&port->iobhd));
2713 	}
2714 
2715 done:
2716 	return (ret);
2717 }
2718 
2719 static int
2720 pci_ahci_pause(struct pci_devinst *pi)
2721 {
2722 	struct pci_ahci_softc *sc;
2723 	struct blockif_ctxt *bctxt;
2724 	int i;
2725 
2726 	sc = pi->pi_arg;
2727 
2728 	for (i = 0; i < MAX_PORTS; i++) {
2729 		bctxt = sc->port[i].bctx;
2730 		if (bctxt == NULL)
2731 			continue;
2732 
2733 		blockif_pause(bctxt);
2734 	}
2735 
2736 	return (0);
2737 }
2738 
2739 static int
2740 pci_ahci_resume(struct pci_devinst *pi)
2741 {
2742 	struct pci_ahci_softc *sc;
2743 	struct blockif_ctxt *bctxt;
2744 	int i;
2745 
2746 	sc = pi->pi_arg;
2747 
2748 	for (i = 0; i < MAX_PORTS; i++) {
2749 		bctxt = sc->port[i].bctx;
2750 		if (bctxt == NULL)
2751 			continue;
2752 
2753 		blockif_resume(bctxt);
2754 	}
2755 
2756 	return (0);
2757 }
2758 #endif	/* BHYVE_SNAPSHOT */
2759 
2760 /*
2761  * Use separate emulation names to distinguish drive and atapi devices
2762  */
2763 static const struct pci_devemu pci_de_ahci = {
2764 	.pe_emu =	"ahci",
2765 	.pe_init =	pci_ahci_init,
2766 	.pe_legacy_config = pci_ahci_legacy_config,
2767 	.pe_barwrite =	pci_ahci_write,
2768 	.pe_barread =	pci_ahci_read,
2769 #ifdef BHYVE_SNAPSHOT
2770 	.pe_snapshot =	pci_ahci_snapshot,
2771 	.pe_pause =	pci_ahci_pause,
2772 	.pe_resume =	pci_ahci_resume,
2773 #endif
2774 };
2775 PCI_EMUL_SET(pci_de_ahci);
2776 
2777 static const struct pci_devemu pci_de_ahci_hd = {
2778 	.pe_emu =	"ahci-hd",
2779 	.pe_legacy_config = pci_ahci_hd_legacy_config,
2780 	.pe_alias =	"ahci",
2781 };
2782 PCI_EMUL_SET(pci_de_ahci_hd);
2783 
2784 static const struct pci_devemu pci_de_ahci_cd = {
2785 	.pe_emu =	"ahci-cd",
2786 	.pe_legacy_config = pci_ahci_cd_legacy_config,
2787 	.pe_alias =	"ahci",
2788 };
2789 PCI_EMUL_SET(pci_de_ahci_cd);
2790