xref: /freebsd/usr.sbin/bhyve/pci_ahci.c (revision c40487d49bde43806672a0917a7ccc5d1e6301fd)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
5  * Copyright (c) 2015-2016 Alexander Motin <mav@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/linker_set.h>
37 #include <sys/stat.h>
38 #include <sys/uio.h>
39 #include <sys/ioctl.h>
40 #include <sys/disk.h>
41 #include <sys/ata.h>
42 #include <sys/endian.h>
43 
44 #include <machine/vmm_snapshot.h>
45 
46 #include <errno.h>
47 #include <fcntl.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <stdint.h>
51 #include <string.h>
52 #include <strings.h>
53 #include <unistd.h>
54 #include <assert.h>
55 #include <pthread.h>
56 #include <pthread_np.h>
57 #include <inttypes.h>
58 #include <md5.h>
59 
60 #include "bhyverun.h"
61 #include "pci_emul.h"
62 #include "ahci.h"
63 #include "block_if.h"
64 
65 #define	DEF_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
66 #define	MAX_PORTS	32	/* AHCI supports 32 ports */
67 
68 #define	PxSIG_ATA	0x00000101 /* ATA drive */
69 #define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
70 
71 enum sata_fis_type {
72 	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
73 	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
74 	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
75 	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
76 	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
77 	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
78 	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
79 	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
80 };
81 
82 /*
83  * SCSI opcodes
84  */
85 #define	TEST_UNIT_READY		0x00
86 #define	REQUEST_SENSE		0x03
87 #define	INQUIRY			0x12
88 #define	START_STOP_UNIT		0x1B
89 #define	PREVENT_ALLOW		0x1E
90 #define	READ_CAPACITY		0x25
91 #define	READ_10			0x28
92 #define	POSITION_TO_ELEMENT	0x2B
93 #define	READ_TOC		0x43
94 #define	GET_EVENT_STATUS_NOTIFICATION 0x4A
95 #define	MODE_SENSE_10		0x5A
96 #define	REPORT_LUNS		0xA0
97 #define	READ_12			0xA8
98 #define	READ_CD			0xBE
99 
100 /*
101  * SCSI mode page codes
102  */
103 #define	MODEPAGE_RW_ERROR_RECOVERY	0x01
104 #define	MODEPAGE_CD_CAPABILITIES	0x2A
105 
106 /*
107  * ATA commands
108  */
109 #define	ATA_SF_ENAB_SATA_SF		0x10
110 #define	ATA_SATA_SF_AN			0x05
111 #define	ATA_SF_DIS_SATA_SF		0x90
112 
113 /*
114  * Debug printf
115  */
116 #ifdef AHCI_DEBUG
117 static FILE *dbg;
118 #define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
119 #else
120 #define DPRINTF(format, arg...)
121 #endif
122 #define WPRINTF(format, arg...) printf(format, ##arg)
123 
124 #define AHCI_PORT_IDENT 20 + 1
125 
126 struct ahci_ioreq {
127 	struct blockif_req io_req;
128 	struct ahci_port *io_pr;
129 	STAILQ_ENTRY(ahci_ioreq) io_flist;
130 	TAILQ_ENTRY(ahci_ioreq) io_blist;
131 	uint8_t *cfis;
132 	uint32_t len;
133 	uint32_t done;
134 	int slot;
135 	int more;
136 	int readop;
137 };
138 
139 struct ahci_port {
140 	struct blockif_ctxt *bctx;
141 	struct pci_ahci_softc *pr_sc;
142 	struct ata_params ata_ident;
143 	uint8_t *cmd_lst;
144 	uint8_t *rfis;
145 	int port;
146 	int atapi;
147 	int reset;
148 	int waitforclear;
149 	int mult_sectors;
150 	uint8_t xfermode;
151 	uint8_t err_cfis[20];
152 	uint8_t sense_key;
153 	uint8_t asc;
154 	u_int ccs;
155 	uint32_t pending;
156 
157 	uint32_t clb;
158 	uint32_t clbu;
159 	uint32_t fb;
160 	uint32_t fbu;
161 	uint32_t is;
162 	uint32_t ie;
163 	uint32_t cmd;
164 	uint32_t unused0;
165 	uint32_t tfd;
166 	uint32_t sig;
167 	uint32_t ssts;
168 	uint32_t sctl;
169 	uint32_t serr;
170 	uint32_t sact;
171 	uint32_t ci;
172 	uint32_t sntf;
173 	uint32_t fbs;
174 
175 	/*
176 	 * i/o request info
177 	 */
178 	struct ahci_ioreq *ioreq;
179 	int ioqsz;
180 	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
181 	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
182 };
183 
184 struct ahci_cmd_hdr {
185 	uint16_t flags;
186 	uint16_t prdtl;
187 	uint32_t prdbc;
188 	uint64_t ctba;
189 	uint32_t reserved[4];
190 };
191 
192 struct ahci_prdt_entry {
193 	uint64_t dba;
194 	uint32_t reserved;
195 #define	DBCMASK		0x3fffff
196 	uint32_t dbc;
197 };
198 
199 struct pci_ahci_softc {
200 	struct pci_devinst *asc_pi;
201 	pthread_mutex_t	mtx;
202 	int ports;
203 	uint32_t cap;
204 	uint32_t ghc;
205 	uint32_t is;
206 	uint32_t pi;
207 	uint32_t vs;
208 	uint32_t ccc_ctl;
209 	uint32_t ccc_pts;
210 	uint32_t em_loc;
211 	uint32_t em_ctl;
212 	uint32_t cap2;
213 	uint32_t bohc;
214 	uint32_t lintr;
215 	struct ahci_port port[MAX_PORTS];
216 };
217 #define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
218 
219 static void ahci_handle_port(struct ahci_port *p);
220 
221 static inline void lba_to_msf(uint8_t *buf, int lba)
222 {
223 	lba += 150;
224 	buf[0] = (lba / 75) / 60;
225 	buf[1] = (lba / 75) % 60;
226 	buf[2] = lba % 75;
227 }
228 
229 /*
230  * Generate HBA interrupts on global IS register write.
231  */
232 static void
233 ahci_generate_intr(struct pci_ahci_softc *sc, uint32_t mask)
234 {
235 	struct pci_devinst *pi = sc->asc_pi;
236 	struct ahci_port *p;
237 	int i, nmsg;
238 	uint32_t mmask;
239 
240 	/* Update global IS from PxIS/PxIE. */
241 	for (i = 0; i < sc->ports; i++) {
242 		p = &sc->port[i];
243 		if (p->is & p->ie)
244 			sc->is |= (1 << i);
245 	}
246 	DPRINTF("%s(%08x) %08x", __func__, mask, sc->is);
247 
248 	/* If there is nothing enabled -- clear legacy interrupt and exit. */
249 	if (sc->is == 0 || (sc->ghc & AHCI_GHC_IE) == 0) {
250 		if (sc->lintr) {
251 			pci_lintr_deassert(pi);
252 			sc->lintr = 0;
253 		}
254 		return;
255 	}
256 
257 	/* If there is anything and no MSI -- assert legacy interrupt. */
258 	nmsg = pci_msi_maxmsgnum(pi);
259 	if (nmsg == 0) {
260 		if (!sc->lintr) {
261 			sc->lintr = 1;
262 			pci_lintr_assert(pi);
263 		}
264 		return;
265 	}
266 
267 	/* Assert respective MSIs for ports that were touched. */
268 	for (i = 0; i < nmsg; i++) {
269 		if (sc->ports <= nmsg || i < nmsg - 1)
270 			mmask = 1 << i;
271 		else
272 			mmask = 0xffffffff << i;
273 		if (sc->is & mask && mmask & mask)
274 			pci_generate_msi(pi, i);
275 	}
276 }
277 
278 /*
279  * Generate HBA interrupt on specific port event.
280  */
281 static void
282 ahci_port_intr(struct ahci_port *p)
283 {
284 	struct pci_ahci_softc *sc = p->pr_sc;
285 	struct pci_devinst *pi = sc->asc_pi;
286 	int nmsg;
287 
288 	DPRINTF("%s(%d) %08x/%08x %08x", __func__,
289 	    p->port, p->is, p->ie, sc->is);
290 
291 	/* If there is nothing enabled -- we are done. */
292 	if ((p->is & p->ie) == 0)
293 		return;
294 
295 	/* In case of non-shared MSI always generate interrupt. */
296 	nmsg = pci_msi_maxmsgnum(pi);
297 	if (sc->ports <= nmsg || p->port < nmsg - 1) {
298 		sc->is |= (1 << p->port);
299 		if ((sc->ghc & AHCI_GHC_IE) == 0)
300 			return;
301 		pci_generate_msi(pi, p->port);
302 		return;
303 	}
304 
305 	/* If IS for this port is already set -- do nothing. */
306 	if (sc->is & (1 << p->port))
307 		return;
308 
309 	sc->is |= (1 << p->port);
310 
311 	/* If interrupts are enabled -- generate one. */
312 	if ((sc->ghc & AHCI_GHC_IE) == 0)
313 		return;
314 	if (nmsg > 0) {
315 		pci_generate_msi(pi, nmsg - 1);
316 	} else if (!sc->lintr) {
317 		sc->lintr = 1;
318 		pci_lintr_assert(pi);
319 	}
320 }
321 
322 static void
323 ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
324 {
325 	int offset, len, irq;
326 
327 	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
328 		return;
329 
330 	switch (ft) {
331 	case FIS_TYPE_REGD2H:
332 		offset = 0x40;
333 		len = 20;
334 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0;
335 		break;
336 	case FIS_TYPE_SETDEVBITS:
337 		offset = 0x58;
338 		len = 8;
339 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0;
340 		break;
341 	case FIS_TYPE_PIOSETUP:
342 		offset = 0x20;
343 		len = 20;
344 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0;
345 		break;
346 	default:
347 		WPRINTF("unsupported fis type %d", ft);
348 		return;
349 	}
350 	if (fis[2] & ATA_S_ERROR) {
351 		p->waitforclear = 1;
352 		irq |= AHCI_P_IX_TFE;
353 	}
354 	memcpy(p->rfis + offset, fis, len);
355 	if (irq) {
356 		if (~p->is & irq) {
357 			p->is |= irq;
358 			ahci_port_intr(p);
359 		}
360 	}
361 }
362 
363 static void
364 ahci_write_fis_piosetup(struct ahci_port *p)
365 {
366 	uint8_t fis[20];
367 
368 	memset(fis, 0, sizeof(fis));
369 	fis[0] = FIS_TYPE_PIOSETUP;
370 	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
371 }
372 
373 static void
374 ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
375 {
376 	uint8_t fis[8];
377 	uint8_t error;
378 
379 	error = (tfd >> 8) & 0xff;
380 	tfd &= 0x77;
381 	memset(fis, 0, sizeof(fis));
382 	fis[0] = FIS_TYPE_SETDEVBITS;
383 	fis[1] = (1 << 6);
384 	fis[2] = tfd;
385 	fis[3] = error;
386 	if (fis[2] & ATA_S_ERROR) {
387 		p->err_cfis[0] = slot;
388 		p->err_cfis[2] = tfd;
389 		p->err_cfis[3] = error;
390 		memcpy(&p->err_cfis[4], cfis + 4, 16);
391 	} else {
392 		*(uint32_t *)(fis + 4) = (1 << slot);
393 		p->sact &= ~(1 << slot);
394 	}
395 	p->tfd &= ~0x77;
396 	p->tfd |= tfd;
397 	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
398 }
399 
400 static void
401 ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
402 {
403 	uint8_t fis[20];
404 	uint8_t error;
405 
406 	error = (tfd >> 8) & 0xff;
407 	memset(fis, 0, sizeof(fis));
408 	fis[0] = FIS_TYPE_REGD2H;
409 	fis[1] = (1 << 6);
410 	fis[2] = tfd & 0xff;
411 	fis[3] = error;
412 	fis[4] = cfis[4];
413 	fis[5] = cfis[5];
414 	fis[6] = cfis[6];
415 	fis[7] = cfis[7];
416 	fis[8] = cfis[8];
417 	fis[9] = cfis[9];
418 	fis[10] = cfis[10];
419 	fis[11] = cfis[11];
420 	fis[12] = cfis[12];
421 	fis[13] = cfis[13];
422 	if (fis[2] & ATA_S_ERROR) {
423 		p->err_cfis[0] = 0x80;
424 		p->err_cfis[2] = tfd & 0xff;
425 		p->err_cfis[3] = error;
426 		memcpy(&p->err_cfis[4], cfis + 4, 16);
427 	} else
428 		p->ci &= ~(1 << slot);
429 	p->tfd = tfd;
430 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
431 }
432 
433 static void
434 ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot)
435 {
436 	uint8_t fis[20];
437 
438 	p->tfd = ATA_S_READY | ATA_S_DSC;
439 	memset(fis, 0, sizeof(fis));
440 	fis[0] = FIS_TYPE_REGD2H;
441 	fis[1] = 0;			/* No interrupt */
442 	fis[2] = p->tfd;		/* Status */
443 	fis[3] = 0;			/* No error */
444 	p->ci &= ~(1 << slot);
445 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
446 }
447 
448 static void
449 ahci_write_reset_fis_d2h(struct ahci_port *p)
450 {
451 	uint8_t fis[20];
452 
453 	memset(fis, 0, sizeof(fis));
454 	fis[0] = FIS_TYPE_REGD2H;
455 	fis[3] = 1;
456 	fis[4] = 1;
457 	if (p->atapi) {
458 		fis[5] = 0x14;
459 		fis[6] = 0xeb;
460 	}
461 	fis[12] = 1;
462 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
463 }
464 
465 static void
466 ahci_check_stopped(struct ahci_port *p)
467 {
468 	/*
469 	 * If we are no longer processing the command list and nothing
470 	 * is in-flight, clear the running bit, the current command
471 	 * slot, the command issue and active bits.
472 	 */
473 	if (!(p->cmd & AHCI_P_CMD_ST)) {
474 		if (p->pending == 0) {
475 			p->ccs = 0;
476 			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
477 			p->ci = 0;
478 			p->sact = 0;
479 			p->waitforclear = 0;
480 		}
481 	}
482 }
483 
484 static void
485 ahci_port_stop(struct ahci_port *p)
486 {
487 	struct ahci_ioreq *aior;
488 	uint8_t *cfis;
489 	int slot;
490 	int error;
491 
492 	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
493 
494 	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
495 		/*
496 		 * Try to cancel the outstanding blockif request.
497 		 */
498 		error = blockif_cancel(p->bctx, &aior->io_req);
499 		if (error != 0)
500 			continue;
501 
502 		slot = aior->slot;
503 		cfis = aior->cfis;
504 		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
505 		    cfis[2] == ATA_READ_FPDMA_QUEUED ||
506 		    cfis[2] == ATA_SEND_FPDMA_QUEUED)
507 			p->sact &= ~(1 << slot);	/* NCQ */
508 		else
509 			p->ci &= ~(1 << slot);
510 
511 		/*
512 		 * This command is now done.
513 		 */
514 		p->pending &= ~(1 << slot);
515 
516 		/*
517 		 * Delete the blockif request from the busy list
518 		 */
519 		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
520 
521 		/*
522 		 * Move the blockif request back to the free list
523 		 */
524 		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
525 	}
526 
527 	ahci_check_stopped(p);
528 }
529 
530 static void
531 ahci_port_reset(struct ahci_port *pr)
532 {
533 	pr->serr = 0;
534 	pr->sact = 0;
535 	pr->xfermode = ATA_UDMA6;
536 	pr->mult_sectors = 128;
537 
538 	if (!pr->bctx) {
539 		pr->ssts = ATA_SS_DET_NO_DEVICE;
540 		pr->sig = 0xFFFFFFFF;
541 		pr->tfd = 0x7F;
542 		return;
543 	}
544 	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
545 	if (pr->sctl & ATA_SC_SPD_MASK)
546 		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
547 	else
548 		pr->ssts |= ATA_SS_SPD_GEN3;
549 	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
550 	if (!pr->atapi) {
551 		pr->sig = PxSIG_ATA;
552 		pr->tfd |= ATA_S_READY;
553 	} else
554 		pr->sig = PxSIG_ATAPI;
555 	ahci_write_reset_fis_d2h(pr);
556 }
557 
558 static void
559 ahci_reset(struct pci_ahci_softc *sc)
560 {
561 	int i;
562 
563 	sc->ghc = AHCI_GHC_AE;
564 	sc->is = 0;
565 
566 	if (sc->lintr) {
567 		pci_lintr_deassert(sc->asc_pi);
568 		sc->lintr = 0;
569 	}
570 
571 	for (i = 0; i < sc->ports; i++) {
572 		sc->port[i].ie = 0;
573 		sc->port[i].is = 0;
574 		sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD);
575 		if (sc->port[i].bctx)
576 			sc->port[i].cmd |= AHCI_P_CMD_CPS;
577 		sc->port[i].sctl = 0;
578 		ahci_port_reset(&sc->port[i]);
579 	}
580 }
581 
582 static void
583 ata_string(uint8_t *dest, const char *src, int len)
584 {
585 	int i;
586 
587 	for (i = 0; i < len; i++) {
588 		if (*src)
589 			dest[i ^ 1] = *src++;
590 		else
591 			dest[i ^ 1] = ' ';
592 	}
593 }
594 
595 static void
596 atapi_string(uint8_t *dest, const char *src, int len)
597 {
598 	int i;
599 
600 	for (i = 0; i < len; i++) {
601 		if (*src)
602 			dest[i] = *src++;
603 		else
604 			dest[i] = ' ';
605 	}
606 }
607 
608 /*
609  * Build up the iovec based on the PRDT, 'done' and 'len'.
610  */
611 static void
612 ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
613     struct ahci_prdt_entry *prdt, uint16_t prdtl)
614 {
615 	struct blockif_req *breq = &aior->io_req;
616 	int i, j, skip, todo, left, extra;
617 	uint32_t dbcsz;
618 
619 	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
620 	skip = aior->done;
621 	left = aior->len - aior->done;
622 	todo = 0;
623 	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
624 	    i++, prdt++) {
625 		dbcsz = (prdt->dbc & DBCMASK) + 1;
626 		/* Skip already done part of the PRDT */
627 		if (dbcsz <= skip) {
628 			skip -= dbcsz;
629 			continue;
630 		}
631 		dbcsz -= skip;
632 		if (dbcsz > left)
633 			dbcsz = left;
634 		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
635 		    prdt->dba + skip, dbcsz);
636 		breq->br_iov[j].iov_len = dbcsz;
637 		todo += dbcsz;
638 		left -= dbcsz;
639 		skip = 0;
640 		j++;
641 	}
642 
643 	/* If we got limited by IOV length, round I/O down to sector size. */
644 	if (j == BLOCKIF_IOV_MAX) {
645 		extra = todo % blockif_sectsz(p->bctx);
646 		todo -= extra;
647 		assert(todo > 0);
648 		while (extra > 0) {
649 			if (breq->br_iov[j - 1].iov_len > extra) {
650 				breq->br_iov[j - 1].iov_len -= extra;
651 				break;
652 			}
653 			extra -= breq->br_iov[j - 1].iov_len;
654 			j--;
655 		}
656 	}
657 
658 	breq->br_iovcnt = j;
659 	breq->br_resid = todo;
660 	aior->done += todo;
661 	aior->more = (aior->done < aior->len && i < prdtl);
662 }
663 
664 static void
665 ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
666 {
667 	struct ahci_ioreq *aior;
668 	struct blockif_req *breq;
669 	struct ahci_prdt_entry *prdt;
670 	struct ahci_cmd_hdr *hdr;
671 	uint64_t lba;
672 	uint32_t len;
673 	int err, first, ncq, readop;
674 
675 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
676 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
677 	ncq = 0;
678 	readop = 1;
679 	first = (done == 0);
680 
681 	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
682 	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
683 	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
684 	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
685 		readop = 0;
686 
687 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
688 	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
689 		lba = ((uint64_t)cfis[10] << 40) |
690 			((uint64_t)cfis[9] << 32) |
691 			((uint64_t)cfis[8] << 24) |
692 			((uint64_t)cfis[6] << 16) |
693 			((uint64_t)cfis[5] << 8) |
694 			cfis[4];
695 		len = cfis[11] << 8 | cfis[3];
696 		if (!len)
697 			len = 65536;
698 		ncq = 1;
699 	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
700 	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
701 	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
702 		lba = ((uint64_t)cfis[10] << 40) |
703 			((uint64_t)cfis[9] << 32) |
704 			((uint64_t)cfis[8] << 24) |
705 			((uint64_t)cfis[6] << 16) |
706 			((uint64_t)cfis[5] << 8) |
707 			cfis[4];
708 		len = cfis[13] << 8 | cfis[12];
709 		if (!len)
710 			len = 65536;
711 	} else {
712 		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
713 			(cfis[5] << 8) | cfis[4];
714 		len = cfis[12];
715 		if (!len)
716 			len = 256;
717 	}
718 	lba *= blockif_sectsz(p->bctx);
719 	len *= blockif_sectsz(p->bctx);
720 
721 	/* Pull request off free list */
722 	aior = STAILQ_FIRST(&p->iofhd);
723 	assert(aior != NULL);
724 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
725 
726 	aior->cfis = cfis;
727 	aior->slot = slot;
728 	aior->len = len;
729 	aior->done = done;
730 	aior->readop = readop;
731 	breq = &aior->io_req;
732 	breq->br_offset = lba + done;
733 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
734 
735 	/* Mark this command in-flight. */
736 	p->pending |= 1 << slot;
737 
738 	/* Stuff request onto busy list. */
739 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
740 
741 	if (ncq && first)
742 		ahci_write_fis_d2h_ncq(p, slot);
743 
744 	if (readop)
745 		err = blockif_read(p->bctx, breq);
746 	else
747 		err = blockif_write(p->bctx, breq);
748 	assert(err == 0);
749 }
750 
751 static void
752 ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
753 {
754 	struct ahci_ioreq *aior;
755 	struct blockif_req *breq;
756 	int err;
757 
758 	/*
759 	 * Pull request off free list
760 	 */
761 	aior = STAILQ_FIRST(&p->iofhd);
762 	assert(aior != NULL);
763 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
764 	aior->cfis = cfis;
765 	aior->slot = slot;
766 	aior->len = 0;
767 	aior->done = 0;
768 	aior->more = 0;
769 	breq = &aior->io_req;
770 
771 	/*
772 	 * Mark this command in-flight.
773 	 */
774 	p->pending |= 1 << slot;
775 
776 	/*
777 	 * Stuff request onto busy list
778 	 */
779 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
780 
781 	err = blockif_flush(p->bctx, breq);
782 	assert(err == 0);
783 }
784 
785 static inline void
786 read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
787 		void *buf, int size)
788 {
789 	struct ahci_cmd_hdr *hdr;
790 	struct ahci_prdt_entry *prdt;
791 	void *to;
792 	int i, len;
793 
794 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
795 	len = size;
796 	to = buf;
797 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
798 	for (i = 0; i < hdr->prdtl && len; i++) {
799 		uint8_t *ptr;
800 		uint32_t dbcsz;
801 		int sublen;
802 
803 		dbcsz = (prdt->dbc & DBCMASK) + 1;
804 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
805 		sublen = MIN(len, dbcsz);
806 		memcpy(to, ptr, sublen);
807 		len -= sublen;
808 		to += sublen;
809 		prdt++;
810 	}
811 }
812 
813 static void
814 ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
815 {
816 	struct ahci_ioreq *aior;
817 	struct blockif_req *breq;
818 	uint8_t *entry;
819 	uint64_t elba;
820 	uint32_t len, elen;
821 	int err, first, ncq;
822 	uint8_t buf[512];
823 
824 	first = (done == 0);
825 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
826 		len = (uint16_t)cfis[13] << 8 | cfis[12];
827 		len *= 512;
828 		ncq = 0;
829 	} else { /* ATA_SEND_FPDMA_QUEUED */
830 		len = (uint16_t)cfis[11] << 8 | cfis[3];
831 		len *= 512;
832 		ncq = 1;
833 	}
834 	read_prdt(p, slot, cfis, buf, sizeof(buf));
835 
836 next:
837 	entry = &buf[done];
838 	elba = ((uint64_t)entry[5] << 40) |
839 		((uint64_t)entry[4] << 32) |
840 		((uint64_t)entry[3] << 24) |
841 		((uint64_t)entry[2] << 16) |
842 		((uint64_t)entry[1] << 8) |
843 		entry[0];
844 	elen = (uint16_t)entry[7] << 8 | entry[6];
845 	done += 8;
846 	if (elen == 0) {
847 		if (done >= len) {
848 			if (ncq) {
849 				if (first)
850 					ahci_write_fis_d2h_ncq(p, slot);
851 				ahci_write_fis_sdb(p, slot, cfis,
852 				    ATA_S_READY | ATA_S_DSC);
853 			} else {
854 				ahci_write_fis_d2h(p, slot, cfis,
855 				    ATA_S_READY | ATA_S_DSC);
856 			}
857 			p->pending &= ~(1 << slot);
858 			ahci_check_stopped(p);
859 			if (!first)
860 				ahci_handle_port(p);
861 			return;
862 		}
863 		goto next;
864 	}
865 
866 	/*
867 	 * Pull request off free list
868 	 */
869 	aior = STAILQ_FIRST(&p->iofhd);
870 	assert(aior != NULL);
871 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
872 	aior->cfis = cfis;
873 	aior->slot = slot;
874 	aior->len = len;
875 	aior->done = done;
876 	aior->more = (len != done);
877 
878 	breq = &aior->io_req;
879 	breq->br_offset = elba * blockif_sectsz(p->bctx);
880 	breq->br_resid = elen * blockif_sectsz(p->bctx);
881 
882 	/*
883 	 * Mark this command in-flight.
884 	 */
885 	p->pending |= 1 << slot;
886 
887 	/*
888 	 * Stuff request onto busy list
889 	 */
890 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
891 
892 	if (ncq && first)
893 		ahci_write_fis_d2h_ncq(p, slot);
894 
895 	err = blockif_delete(p->bctx, breq);
896 	assert(err == 0);
897 }
898 
899 static inline void
900 write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
901 		void *buf, int size)
902 {
903 	struct ahci_cmd_hdr *hdr;
904 	struct ahci_prdt_entry *prdt;
905 	void *from;
906 	int i, len;
907 
908 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
909 	len = size;
910 	from = buf;
911 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
912 	for (i = 0; i < hdr->prdtl && len; i++) {
913 		uint8_t *ptr;
914 		uint32_t dbcsz;
915 		int sublen;
916 
917 		dbcsz = (prdt->dbc & DBCMASK) + 1;
918 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
919 		sublen = MIN(len, dbcsz);
920 		memcpy(ptr, from, sublen);
921 		len -= sublen;
922 		from += sublen;
923 		prdt++;
924 	}
925 	hdr->prdbc = size - len;
926 }
927 
928 static void
929 ahci_checksum(uint8_t *buf, int size)
930 {
931 	int i;
932 	uint8_t sum = 0;
933 
934 	for (i = 0; i < size - 1; i++)
935 		sum += buf[i];
936 	buf[size - 1] = 0x100 - sum;
937 }
938 
939 static void
940 ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
941 {
942 	struct ahci_cmd_hdr *hdr;
943 	uint32_t buf[128];
944 	uint8_t *buf8 = (uint8_t *)buf;
945 	uint16_t *buf16 = (uint16_t *)buf;
946 
947 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
948 	if (p->atapi || hdr->prdtl == 0 || cfis[5] != 0 ||
949 	    cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
950 		ahci_write_fis_d2h(p, slot, cfis,
951 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
952 		return;
953 	}
954 
955 	memset(buf, 0, sizeof(buf));
956 	if (cfis[4] == 0x00) {	/* Log directory */
957 		buf16[0x00] = 1; /* Version -- 1 */
958 		buf16[0x10] = 1; /* NCQ Command Error Log -- 1 page */
959 		buf16[0x13] = 1; /* SATA NCQ Send and Receive Log -- 1 page */
960 	} else if (cfis[4] == 0x10) {	/* NCQ Command Error Log */
961 		memcpy(buf8, p->err_cfis, sizeof(p->err_cfis));
962 		ahci_checksum(buf8, sizeof(buf));
963 	} else if (cfis[4] == 0x13) {	/* SATA NCQ Send and Receive Log */
964 		if (blockif_candelete(p->bctx) && !blockif_is_ro(p->bctx)) {
965 			buf[0x00] = 1;	/* SFQ DSM supported */
966 			buf[0x01] = 1;	/* SFQ DSM TRIM supported */
967 		}
968 	} else {
969 		ahci_write_fis_d2h(p, slot, cfis,
970 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
971 		return;
972 	}
973 
974 	if (cfis[2] == ATA_READ_LOG_EXT)
975 		ahci_write_fis_piosetup(p);
976 	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
977 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
978 }
979 
980 static void
981 handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
982 {
983 	struct ahci_cmd_hdr *hdr;
984 
985 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
986 	if (p->atapi || hdr->prdtl == 0) {
987 		ahci_write_fis_d2h(p, slot, cfis,
988 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
989 	} else {
990 		ahci_write_fis_piosetup(p);
991 		write_prdt(p, slot, cfis, (void*)&p->ata_ident, sizeof(struct ata_params));
992 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
993 	}
994 }
995 
996 static void
997 ata_identify_init(struct ahci_port* p, int atapi)
998 {
999 	struct ata_params* ata_ident = &p->ata_ident;
1000 
1001 	if (atapi) {
1002 		ata_ident->config = ATA_PROTO_ATAPI | ATA_ATAPI_TYPE_CDROM |
1003 		    ATA_ATAPI_REMOVABLE | ATA_DRQ_FAST;
1004 		ata_ident->capabilities1 = ATA_SUPPORT_LBA |
1005 			ATA_SUPPORT_DMA;
1006 		ata_ident->capabilities2 = (1 << 14 | 1);
1007 		ata_ident->atavalid = ATA_FLAG_54_58 | ATA_FLAG_64_70;
1008 		ata_ident->obsolete62 = 0x3f;
1009 		ata_ident->mwdmamodes = 7;
1010 		if (p->xfermode & ATA_WDMA0)
1011 			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1012 		ata_ident->apiomodes = 3;
1013 		ata_ident->mwdmamin = 0x0078;
1014 		ata_ident->mwdmarec = 0x0078;
1015 		ata_ident->pioblind = 0x0078;
1016 		ata_ident->pioiordy = 0x0078;
1017 		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
1018 		ata_ident->satacapabilities2 = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
1019 		ata_ident->satasupport = ATA_SUPPORT_NCQ_STREAM;
1020 		ata_ident->version_major = 0x3f0;
1021 		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1022 			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1023 		ata_ident->support.command2 = (1 << 14);
1024 		ata_ident->support.extension = (1 << 14);
1025 		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1026 			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1027 		ata_ident->enabled.extension = (1 << 14);
1028 		ata_ident->udmamodes = 0x7f;
1029 		if (p->xfermode & ATA_UDMA0)
1030 			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1031 		ata_ident->transport_major = 0x1020;
1032 		ata_ident->integrity = 0x00a5;
1033 	} else {
1034 		uint64_t sectors;
1035 		int sectsz, psectsz, psectoff, candelete, ro;
1036 		uint16_t cyl;
1037 		uint8_t sech, heads;
1038 
1039 		ro = blockif_is_ro(p->bctx);
1040 		candelete = blockif_candelete(p->bctx);
1041 		sectsz = blockif_sectsz(p->bctx);
1042 		sectors = blockif_size(p->bctx) / sectsz;
1043 		blockif_chs(p->bctx, &cyl, &heads, &sech);
1044 		blockif_psectsz(p->bctx, &psectsz, &psectoff);
1045 		ata_ident->config = ATA_DRQ_FAST;
1046 		ata_ident->cylinders = cyl;
1047 		ata_ident->heads = heads;
1048 		ata_ident->sectors = sech;
1049 
1050 		ata_ident->sectors_intr = (0x8000 | 128);
1051 		ata_ident->tcg = 0;
1052 
1053 		ata_ident->capabilities1 = ATA_SUPPORT_DMA |
1054 			ATA_SUPPORT_LBA | ATA_SUPPORT_IORDY;
1055 		ata_ident->capabilities2 = (1 << 14);
1056 		ata_ident->atavalid = ATA_FLAG_54_58 |
1057 			ATA_FLAG_64_70;
1058 		if (p->mult_sectors)
1059 			ata_ident->multi = (ATA_MULTI_VALID | p->mult_sectors);
1060 		if (sectors <= 0x0fffffff) {
1061 			ata_ident->lba_size_1 = sectors;
1062 			ata_ident->lba_size_2 = (sectors >> 16);
1063 		} else {
1064 			ata_ident->lba_size_1 = 0xffff;
1065 			ata_ident->lba_size_2 = 0x0fff;
1066 		}
1067 		ata_ident->mwdmamodes = 0x7;
1068 		if (p->xfermode & ATA_WDMA0)
1069 			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1070 		ata_ident->apiomodes = 0x3;
1071 		ata_ident->mwdmamin = 0x0078;
1072 		ata_ident->mwdmarec = 0x0078;
1073 		ata_ident->pioblind = 0x0078;
1074 		ata_ident->pioiordy = 0x0078;
1075 		ata_ident->support3 = 0;
1076 		ata_ident->queue = 31;
1077 		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
1078 			ATA_SUPPORT_NCQ);
1079 		ata_ident->satacapabilities2 = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
1080 			(p->ssts & ATA_SS_SPD_MASK) >> 3);
1081 		ata_ident->version_major = 0x3f0;
1082 		ata_ident->version_minor = 0x28;
1083 		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1084 			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1085 		ata_ident->support.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1086 			ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
1087 		ata_ident->support.extension = (1 << 14);
1088 		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1089 			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1090 		ata_ident->enabled.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1091 			ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
1092 		ata_ident->enabled.extension = (1 << 14);
1093 		ata_ident->udmamodes = 0x7f;
1094 		if (p->xfermode & ATA_UDMA0)
1095 			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1096 		ata_ident->lba_size48_1 = sectors;
1097 		ata_ident->lba_size48_2 = (sectors >> 16);
1098 		ata_ident->lba_size48_3 = (sectors >> 32);
1099 		ata_ident->lba_size48_4 = (sectors >> 48);
1100 
1101 		if (candelete && !ro) {
1102 			ata_ident->support3 |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
1103 			ata_ident->max_dsm_blocks = 1;
1104 			ata_ident->support_dsm = ATA_SUPPORT_DSM_TRIM;
1105 		}
1106 		ata_ident->pss = ATA_PSS_VALID_VALUE;
1107 		ata_ident->lsalign = 0x4000;
1108 		if (psectsz > sectsz) {
1109 			ata_ident->pss |= ATA_PSS_MULTLS;
1110 			ata_ident->pss |= ffsl(psectsz / sectsz) - 1;
1111 			ata_ident->lsalign |= (psectoff / sectsz);
1112 		}
1113 		if (sectsz > 512) {
1114 			ata_ident->pss |= ATA_PSS_LSSABOVE512;
1115 			ata_ident->lss_1 = sectsz / 2;
1116 			ata_ident->lss_2 = ((sectsz / 2) >> 16);
1117 		}
1118 		ata_ident->support2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1119 		ata_ident->enabled2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1120 		ata_ident->transport_major = 0x1020;
1121 		ata_ident->integrity = 0x00a5;
1122 	}
1123 	ahci_checksum((uint8_t*)ata_ident, sizeof(struct ata_params));
1124 }
1125 
1126 static void
1127 handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
1128 {
1129 	if (!p->atapi) {
1130 		ahci_write_fis_d2h(p, slot, cfis,
1131 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1132 	} else {
1133 		ahci_write_fis_piosetup(p);
1134 		write_prdt(p, slot, cfis, (void *)&p->ata_ident, sizeof(struct ata_params));
1135 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1136 	}
1137 }
1138 
1139 static void
1140 atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1141 {
1142 	uint8_t buf[36];
1143 	uint8_t *acmd;
1144 	int len;
1145 	uint32_t tfd;
1146 
1147 	acmd = cfis + 0x40;
1148 
1149 	if (acmd[1] & 1) {		/* VPD */
1150 		if (acmd[2] == 0) {	/* Supported VPD pages */
1151 			buf[0] = 0x05;
1152 			buf[1] = 0;
1153 			buf[2] = 0;
1154 			buf[3] = 1;
1155 			buf[4] = 0;
1156 			len = 4 + buf[3];
1157 		} else {
1158 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1159 			p->asc = 0x24;
1160 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1161 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1162 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1163 			return;
1164 		}
1165 	} else {
1166 		buf[0] = 0x05;
1167 		buf[1] = 0x80;
1168 		buf[2] = 0x00;
1169 		buf[3] = 0x21;
1170 		buf[4] = 31;
1171 		buf[5] = 0;
1172 		buf[6] = 0;
1173 		buf[7] = 0;
1174 		atapi_string(buf + 8, "BHYVE", 8);
1175 		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1176 		atapi_string(buf + 32, "001", 4);
1177 		len = sizeof(buf);
1178 	}
1179 
1180 	if (len > acmd[4])
1181 		len = acmd[4];
1182 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1183 	write_prdt(p, slot, cfis, buf, len);
1184 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1185 }
1186 
1187 static void
1188 atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1189 {
1190 	uint8_t buf[8];
1191 	uint64_t sectors;
1192 
1193 	sectors = blockif_size(p->bctx) / 2048;
1194 	be32enc(buf, sectors - 1);
1195 	be32enc(buf + 4, 2048);
1196 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1197 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1198 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1199 }
1200 
1201 static void
1202 atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1203 {
1204 	uint8_t *acmd;
1205 	uint8_t format;
1206 	int len;
1207 
1208 	acmd = cfis + 0x40;
1209 
1210 	len = be16dec(acmd + 7);
1211 	format = acmd[9] >> 6;
1212 	switch (format) {
1213 	case 0:
1214 	{
1215 		int msf, size;
1216 		uint64_t sectors;
1217 		uint8_t start_track, buf[20], *bp;
1218 
1219 		msf = (acmd[1] >> 1) & 1;
1220 		start_track = acmd[6];
1221 		if (start_track > 1 && start_track != 0xaa) {
1222 			uint32_t tfd;
1223 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1224 			p->asc = 0x24;
1225 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1226 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1227 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1228 			return;
1229 		}
1230 		bp = buf + 2;
1231 		*bp++ = 1;
1232 		*bp++ = 1;
1233 		if (start_track <= 1) {
1234 			*bp++ = 0;
1235 			*bp++ = 0x14;
1236 			*bp++ = 1;
1237 			*bp++ = 0;
1238 			if (msf) {
1239 				*bp++ = 0;
1240 				lba_to_msf(bp, 0);
1241 				bp += 3;
1242 			} else {
1243 				*bp++ = 0;
1244 				*bp++ = 0;
1245 				*bp++ = 0;
1246 				*bp++ = 0;
1247 			}
1248 		}
1249 		*bp++ = 0;
1250 		*bp++ = 0x14;
1251 		*bp++ = 0xaa;
1252 		*bp++ = 0;
1253 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1254 		sectors >>= 2;
1255 		if (msf) {
1256 			*bp++ = 0;
1257 			lba_to_msf(bp, sectors);
1258 			bp += 3;
1259 		} else {
1260 			be32enc(bp, sectors);
1261 			bp += 4;
1262 		}
1263 		size = bp - buf;
1264 		be16enc(buf, size - 2);
1265 		if (len > size)
1266 			len = size;
1267 		write_prdt(p, slot, cfis, buf, len);
1268 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1269 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1270 		break;
1271 	}
1272 	case 1:
1273 	{
1274 		uint8_t buf[12];
1275 
1276 		memset(buf, 0, sizeof(buf));
1277 		buf[1] = 0xa;
1278 		buf[2] = 0x1;
1279 		buf[3] = 0x1;
1280 		if (len > sizeof(buf))
1281 			len = sizeof(buf);
1282 		write_prdt(p, slot, cfis, buf, len);
1283 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1284 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1285 		break;
1286 	}
1287 	case 2:
1288 	{
1289 		int msf, size;
1290 		uint64_t sectors;
1291 		uint8_t *bp, buf[50];
1292 
1293 		msf = (acmd[1] >> 1) & 1;
1294 		bp = buf + 2;
1295 		*bp++ = 1;
1296 		*bp++ = 1;
1297 
1298 		*bp++ = 1;
1299 		*bp++ = 0x14;
1300 		*bp++ = 0;
1301 		*bp++ = 0xa0;
1302 		*bp++ = 0;
1303 		*bp++ = 0;
1304 		*bp++ = 0;
1305 		*bp++ = 0;
1306 		*bp++ = 1;
1307 		*bp++ = 0;
1308 		*bp++ = 0;
1309 
1310 		*bp++ = 1;
1311 		*bp++ = 0x14;
1312 		*bp++ = 0;
1313 		*bp++ = 0xa1;
1314 		*bp++ = 0;
1315 		*bp++ = 0;
1316 		*bp++ = 0;
1317 		*bp++ = 0;
1318 		*bp++ = 1;
1319 		*bp++ = 0;
1320 		*bp++ = 0;
1321 
1322 		*bp++ = 1;
1323 		*bp++ = 0x14;
1324 		*bp++ = 0;
1325 		*bp++ = 0xa2;
1326 		*bp++ = 0;
1327 		*bp++ = 0;
1328 		*bp++ = 0;
1329 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1330 		sectors >>= 2;
1331 		if (msf) {
1332 			*bp++ = 0;
1333 			lba_to_msf(bp, sectors);
1334 			bp += 3;
1335 		} else {
1336 			be32enc(bp, sectors);
1337 			bp += 4;
1338 		}
1339 
1340 		*bp++ = 1;
1341 		*bp++ = 0x14;
1342 		*bp++ = 0;
1343 		*bp++ = 1;
1344 		*bp++ = 0;
1345 		*bp++ = 0;
1346 		*bp++ = 0;
1347 		if (msf) {
1348 			*bp++ = 0;
1349 			lba_to_msf(bp, 0);
1350 			bp += 3;
1351 		} else {
1352 			*bp++ = 0;
1353 			*bp++ = 0;
1354 			*bp++ = 0;
1355 			*bp++ = 0;
1356 		}
1357 
1358 		size = bp - buf;
1359 		be16enc(buf, size - 2);
1360 		if (len > size)
1361 			len = size;
1362 		write_prdt(p, slot, cfis, buf, len);
1363 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1364 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1365 		break;
1366 	}
1367 	default:
1368 	{
1369 		uint32_t tfd;
1370 
1371 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1372 		p->asc = 0x24;
1373 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1374 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1375 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1376 		break;
1377 	}
1378 	}
1379 }
1380 
1381 static void
1382 atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1383 {
1384 	uint8_t buf[16];
1385 
1386 	memset(buf, 0, sizeof(buf));
1387 	buf[3] = 8;
1388 
1389 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1390 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1391 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1392 }
1393 
1394 static void
1395 atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1396 {
1397 	struct ahci_ioreq *aior;
1398 	struct ahci_cmd_hdr *hdr;
1399 	struct ahci_prdt_entry *prdt;
1400 	struct blockif_req *breq;
1401 	uint8_t *acmd;
1402 	uint64_t lba;
1403 	uint32_t len;
1404 	int err;
1405 
1406 	acmd = cfis + 0x40;
1407 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1408 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1409 
1410 	lba = be32dec(acmd + 2);
1411 	if (acmd[0] == READ_10)
1412 		len = be16dec(acmd + 7);
1413 	else
1414 		len = be32dec(acmd + 6);
1415 	if (len == 0) {
1416 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1417 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1418 	}
1419 	lba *= 2048;
1420 	len *= 2048;
1421 
1422 	/*
1423 	 * Pull request off free list
1424 	 */
1425 	aior = STAILQ_FIRST(&p->iofhd);
1426 	assert(aior != NULL);
1427 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1428 	aior->cfis = cfis;
1429 	aior->slot = slot;
1430 	aior->len = len;
1431 	aior->done = done;
1432 	aior->readop = 1;
1433 	breq = &aior->io_req;
1434 	breq->br_offset = lba + done;
1435 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1436 
1437 	/* Mark this command in-flight. */
1438 	p->pending |= 1 << slot;
1439 
1440 	/* Stuff request onto busy list. */
1441 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1442 
1443 	err = blockif_read(p->bctx, breq);
1444 	assert(err == 0);
1445 }
1446 
1447 static void
1448 atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1449 {
1450 	uint8_t buf[64];
1451 	uint8_t *acmd;
1452 	int len;
1453 
1454 	acmd = cfis + 0x40;
1455 	len = acmd[4];
1456 	if (len > sizeof(buf))
1457 		len = sizeof(buf);
1458 	memset(buf, 0, len);
1459 	buf[0] = 0x70 | (1 << 7);
1460 	buf[2] = p->sense_key;
1461 	buf[7] = 10;
1462 	buf[12] = p->asc;
1463 	write_prdt(p, slot, cfis, buf, len);
1464 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1465 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1466 }
1467 
1468 static void
1469 atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1470 {
1471 	uint8_t *acmd = cfis + 0x40;
1472 	uint32_t tfd;
1473 
1474 	switch (acmd[4] & 3) {
1475 	case 0:
1476 	case 1:
1477 	case 3:
1478 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1479 		tfd = ATA_S_READY | ATA_S_DSC;
1480 		break;
1481 	case 2:
1482 		/* TODO eject media */
1483 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1484 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1485 		p->asc = 0x53;
1486 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1487 		break;
1488 	}
1489 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1490 }
1491 
1492 static void
1493 atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1494 {
1495 	uint8_t *acmd;
1496 	uint32_t tfd;
1497 	uint8_t pc, code;
1498 	int len;
1499 
1500 	acmd = cfis + 0x40;
1501 	len = be16dec(acmd + 7);
1502 	pc = acmd[2] >> 6;
1503 	code = acmd[2] & 0x3f;
1504 
1505 	switch (pc) {
1506 	case 0:
1507 		switch (code) {
1508 		case MODEPAGE_RW_ERROR_RECOVERY:
1509 		{
1510 			uint8_t buf[16];
1511 
1512 			if (len > sizeof(buf))
1513 				len = sizeof(buf);
1514 
1515 			memset(buf, 0, sizeof(buf));
1516 			be16enc(buf, 16 - 2);
1517 			buf[2] = 0x70;
1518 			buf[8] = 0x01;
1519 			buf[9] = 16 - 10;
1520 			buf[11] = 0x05;
1521 			write_prdt(p, slot, cfis, buf, len);
1522 			tfd = ATA_S_READY | ATA_S_DSC;
1523 			break;
1524 		}
1525 		case MODEPAGE_CD_CAPABILITIES:
1526 		{
1527 			uint8_t buf[30];
1528 
1529 			if (len > sizeof(buf))
1530 				len = sizeof(buf);
1531 
1532 			memset(buf, 0, sizeof(buf));
1533 			be16enc(buf, 30 - 2);
1534 			buf[2] = 0x70;
1535 			buf[8] = 0x2A;
1536 			buf[9] = 30 - 10;
1537 			buf[10] = 0x08;
1538 			buf[12] = 0x71;
1539 			be16enc(&buf[18], 2);
1540 			be16enc(&buf[20], 512);
1541 			write_prdt(p, slot, cfis, buf, len);
1542 			tfd = ATA_S_READY | ATA_S_DSC;
1543 			break;
1544 		}
1545 		default:
1546 			goto error;
1547 			break;
1548 		}
1549 		break;
1550 	case 3:
1551 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1552 		p->asc = 0x39;
1553 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1554 		break;
1555 error:
1556 	case 1:
1557 	case 2:
1558 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1559 		p->asc = 0x24;
1560 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1561 		break;
1562 	}
1563 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1564 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1565 }
1566 
1567 static void
1568 atapi_get_event_status_notification(struct ahci_port *p, int slot,
1569     uint8_t *cfis)
1570 {
1571 	uint8_t *acmd;
1572 	uint32_t tfd;
1573 
1574 	acmd = cfis + 0x40;
1575 
1576 	/* we don't support asynchronous operation */
1577 	if (!(acmd[1] & 1)) {
1578 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1579 		p->asc = 0x24;
1580 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1581 	} else {
1582 		uint8_t buf[8];
1583 		int len;
1584 
1585 		len = be16dec(acmd + 7);
1586 		if (len > sizeof(buf))
1587 			len = sizeof(buf);
1588 
1589 		memset(buf, 0, sizeof(buf));
1590 		be16enc(buf, 8 - 2);
1591 		buf[2] = 0x04;
1592 		buf[3] = 0x10;
1593 		buf[5] = 0x02;
1594 		write_prdt(p, slot, cfis, buf, len);
1595 		tfd = ATA_S_READY | ATA_S_DSC;
1596 	}
1597 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1598 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1599 }
1600 
1601 static void
1602 handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1603 {
1604 	uint8_t *acmd;
1605 
1606 	acmd = cfis + 0x40;
1607 
1608 #ifdef AHCI_DEBUG
1609 	{
1610 		int i;
1611 		DPRINTF("ACMD:");
1612 		for (i = 0; i < 16; i++)
1613 			DPRINTF("%02x ", acmd[i]);
1614 		DPRINTF("");
1615 	}
1616 #endif
1617 
1618 	switch (acmd[0]) {
1619 	case TEST_UNIT_READY:
1620 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1621 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1622 		break;
1623 	case INQUIRY:
1624 		atapi_inquiry(p, slot, cfis);
1625 		break;
1626 	case READ_CAPACITY:
1627 		atapi_read_capacity(p, slot, cfis);
1628 		break;
1629 	case PREVENT_ALLOW:
1630 		/* TODO */
1631 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1632 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1633 		break;
1634 	case READ_TOC:
1635 		atapi_read_toc(p, slot, cfis);
1636 		break;
1637 	case REPORT_LUNS:
1638 		atapi_report_luns(p, slot, cfis);
1639 		break;
1640 	case READ_10:
1641 	case READ_12:
1642 		atapi_read(p, slot, cfis, 0);
1643 		break;
1644 	case REQUEST_SENSE:
1645 		atapi_request_sense(p, slot, cfis);
1646 		break;
1647 	case START_STOP_UNIT:
1648 		atapi_start_stop_unit(p, slot, cfis);
1649 		break;
1650 	case MODE_SENSE_10:
1651 		atapi_mode_sense(p, slot, cfis);
1652 		break;
1653 	case GET_EVENT_STATUS_NOTIFICATION:
1654 		atapi_get_event_status_notification(p, slot, cfis);
1655 		break;
1656 	default:
1657 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1658 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1659 		p->asc = 0x20;
1660 		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1661 				ATA_S_READY | ATA_S_ERROR);
1662 		break;
1663 	}
1664 }
1665 
1666 static void
1667 ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1668 {
1669 
1670 	p->tfd |= ATA_S_BUSY;
1671 	switch (cfis[2]) {
1672 	case ATA_ATA_IDENTIFY:
1673 		handle_identify(p, slot, cfis);
1674 		break;
1675 	case ATA_SETFEATURES:
1676 	{
1677 		switch (cfis[3]) {
1678 		case ATA_SF_ENAB_SATA_SF:
1679 			switch (cfis[12]) {
1680 			case ATA_SATA_SF_AN:
1681 				p->tfd = ATA_S_DSC | ATA_S_READY;
1682 				break;
1683 			default:
1684 				p->tfd = ATA_S_ERROR | ATA_S_READY;
1685 				p->tfd |= (ATA_ERROR_ABORT << 8);
1686 				break;
1687 			}
1688 			break;
1689 		case ATA_SF_ENAB_WCACHE:
1690 		case ATA_SF_DIS_WCACHE:
1691 		case ATA_SF_ENAB_RCACHE:
1692 		case ATA_SF_DIS_RCACHE:
1693 			p->tfd = ATA_S_DSC | ATA_S_READY;
1694 			break;
1695 		case ATA_SF_SETXFER:
1696 		{
1697 			switch (cfis[12] & 0xf8) {
1698 			case ATA_PIO:
1699 			case ATA_PIO0:
1700 				break;
1701 			case ATA_WDMA0:
1702 			case ATA_UDMA0:
1703 				p->xfermode = (cfis[12] & 0x7);
1704 				break;
1705 			}
1706 			p->tfd = ATA_S_DSC | ATA_S_READY;
1707 			break;
1708 		}
1709 		default:
1710 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1711 			p->tfd |= (ATA_ERROR_ABORT << 8);
1712 			break;
1713 		}
1714 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1715 		break;
1716 	}
1717 	case ATA_SET_MULTI:
1718 		if (cfis[12] != 0 &&
1719 			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1720 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1721 			p->tfd |= (ATA_ERROR_ABORT << 8);
1722 		} else {
1723 			p->mult_sectors = cfis[12];
1724 			p->tfd = ATA_S_DSC | ATA_S_READY;
1725 		}
1726 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1727 		break;
1728 	case ATA_READ:
1729 	case ATA_WRITE:
1730 	case ATA_READ48:
1731 	case ATA_WRITE48:
1732 	case ATA_READ_MUL:
1733 	case ATA_WRITE_MUL:
1734 	case ATA_READ_MUL48:
1735 	case ATA_WRITE_MUL48:
1736 	case ATA_READ_DMA:
1737 	case ATA_WRITE_DMA:
1738 	case ATA_READ_DMA48:
1739 	case ATA_WRITE_DMA48:
1740 	case ATA_READ_FPDMA_QUEUED:
1741 	case ATA_WRITE_FPDMA_QUEUED:
1742 		ahci_handle_rw(p, slot, cfis, 0);
1743 		break;
1744 	case ATA_FLUSHCACHE:
1745 	case ATA_FLUSHCACHE48:
1746 		ahci_handle_flush(p, slot, cfis);
1747 		break;
1748 	case ATA_DATA_SET_MANAGEMENT:
1749 		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1750 		    cfis[13] == 0 && cfis[12] == 1) {
1751 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1752 			break;
1753 		}
1754 		ahci_write_fis_d2h(p, slot, cfis,
1755 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1756 		break;
1757 	case ATA_SEND_FPDMA_QUEUED:
1758 		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1759 		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1760 		    cfis[11] == 0 && cfis[3] == 1) {
1761 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1762 			break;
1763 		}
1764 		ahci_write_fis_d2h(p, slot, cfis,
1765 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1766 		break;
1767 	case ATA_READ_LOG_EXT:
1768 	case ATA_READ_LOG_DMA_EXT:
1769 		ahci_handle_read_log(p, slot, cfis);
1770 		break;
1771 	case ATA_SECURITY_FREEZE_LOCK:
1772 	case ATA_SMART_CMD:
1773 	case ATA_NOP:
1774 		ahci_write_fis_d2h(p, slot, cfis,
1775 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1776 		break;
1777 	case ATA_CHECK_POWER_MODE:
1778 		cfis[12] = 0xff;	/* always on */
1779 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1780 		break;
1781 	case ATA_STANDBY_CMD:
1782 	case ATA_STANDBY_IMMEDIATE:
1783 	case ATA_IDLE_CMD:
1784 	case ATA_IDLE_IMMEDIATE:
1785 	case ATA_SLEEP:
1786 	case ATA_READ_VERIFY:
1787 	case ATA_READ_VERIFY48:
1788 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1789 		break;
1790 	case ATA_ATAPI_IDENTIFY:
1791 		handle_atapi_identify(p, slot, cfis);
1792 		break;
1793 	case ATA_PACKET_CMD:
1794 		if (!p->atapi) {
1795 			ahci_write_fis_d2h(p, slot, cfis,
1796 			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1797 		} else
1798 			handle_packet_cmd(p, slot, cfis);
1799 		break;
1800 	default:
1801 		WPRINTF("Unsupported cmd:%02x", cfis[2]);
1802 		ahci_write_fis_d2h(p, slot, cfis,
1803 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1804 		break;
1805 	}
1806 }
1807 
1808 static void
1809 ahci_handle_slot(struct ahci_port *p, int slot)
1810 {
1811 	struct ahci_cmd_hdr *hdr;
1812 #ifdef AHCI_DEBUG
1813 	struct ahci_prdt_entry *prdt;
1814 #endif
1815 	struct pci_ahci_softc *sc;
1816 	uint8_t *cfis;
1817 #ifdef AHCI_DEBUG
1818 	int cfl, i;
1819 #endif
1820 
1821 	sc = p->pr_sc;
1822 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1823 #ifdef AHCI_DEBUG
1824 	cfl = (hdr->flags & 0x1f) * 4;
1825 #endif
1826 	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1827 			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1828 #ifdef AHCI_DEBUG
1829 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1830 
1831 	DPRINTF("cfis:");
1832 	for (i = 0; i < cfl; i++) {
1833 		if (i % 10 == 0)
1834 			DPRINTF("");
1835 		DPRINTF("%02x ", cfis[i]);
1836 	}
1837 	DPRINTF("");
1838 
1839 	for (i = 0; i < hdr->prdtl; i++) {
1840 		DPRINTF("%d@%08"PRIx64"", prdt->dbc & 0x3fffff, prdt->dba);
1841 		prdt++;
1842 	}
1843 #endif
1844 
1845 	if (cfis[0] != FIS_TYPE_REGH2D) {
1846 		WPRINTF("Not a H2D FIS:%02x", cfis[0]);
1847 		return;
1848 	}
1849 
1850 	if (cfis[1] & 0x80) {
1851 		ahci_handle_cmd(p, slot, cfis);
1852 	} else {
1853 		if (cfis[15] & (1 << 2))
1854 			p->reset = 1;
1855 		else if (p->reset) {
1856 			p->reset = 0;
1857 			ahci_port_reset(p);
1858 		}
1859 		p->ci &= ~(1 << slot);
1860 	}
1861 }
1862 
1863 static void
1864 ahci_handle_port(struct ahci_port *p)
1865 {
1866 
1867 	if (!(p->cmd & AHCI_P_CMD_ST))
1868 		return;
1869 
1870 	/*
1871 	 * Search for any new commands to issue ignoring those that
1872 	 * are already in-flight.  Stop if device is busy or in error.
1873 	 */
1874 	for (; (p->ci & ~p->pending) != 0; p->ccs = ((p->ccs + 1) & 31)) {
1875 		if ((p->tfd & (ATA_S_BUSY | ATA_S_DRQ)) != 0)
1876 			break;
1877 		if (p->waitforclear)
1878 			break;
1879 		if ((p->ci & ~p->pending & (1 << p->ccs)) != 0) {
1880 			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1881 			p->cmd |= p->ccs << AHCI_P_CMD_CCS_SHIFT;
1882 			ahci_handle_slot(p, p->ccs);
1883 		}
1884 	}
1885 }
1886 
1887 /*
1888  * blockif callback routine - this runs in the context of the blockif
1889  * i/o thread, so the mutex needs to be acquired.
1890  */
1891 static void
1892 ata_ioreq_cb(struct blockif_req *br, int err)
1893 {
1894 	struct ahci_cmd_hdr *hdr;
1895 	struct ahci_ioreq *aior;
1896 	struct ahci_port *p;
1897 	struct pci_ahci_softc *sc;
1898 	uint32_t tfd;
1899 	uint8_t *cfis;
1900 	int slot, ncq, dsm;
1901 
1902 	DPRINTF("%s %d", __func__, err);
1903 
1904 	ncq = dsm = 0;
1905 	aior = br->br_param;
1906 	p = aior->io_pr;
1907 	cfis = aior->cfis;
1908 	slot = aior->slot;
1909 	sc = p->pr_sc;
1910 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1911 
1912 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1913 	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1914 	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1915 		ncq = 1;
1916 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1917 	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1918 	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1919 		dsm = 1;
1920 
1921 	pthread_mutex_lock(&sc->mtx);
1922 
1923 	/*
1924 	 * Delete the blockif request from the busy list
1925 	 */
1926 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1927 
1928 	/*
1929 	 * Move the blockif request back to the free list
1930 	 */
1931 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1932 
1933 	if (!err)
1934 		hdr->prdbc = aior->done;
1935 
1936 	if (!err && aior->more) {
1937 		if (dsm)
1938 			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1939 		else
1940 			ahci_handle_rw(p, slot, cfis, aior->done);
1941 		goto out;
1942 	}
1943 
1944 	if (!err)
1945 		tfd = ATA_S_READY | ATA_S_DSC;
1946 	else
1947 		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1948 	if (ncq)
1949 		ahci_write_fis_sdb(p, slot, cfis, tfd);
1950 	else
1951 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1952 
1953 	/*
1954 	 * This command is now complete.
1955 	 */
1956 	p->pending &= ~(1 << slot);
1957 
1958 	ahci_check_stopped(p);
1959 	ahci_handle_port(p);
1960 out:
1961 	pthread_mutex_unlock(&sc->mtx);
1962 	DPRINTF("%s exit", __func__);
1963 }
1964 
1965 static void
1966 atapi_ioreq_cb(struct blockif_req *br, int err)
1967 {
1968 	struct ahci_cmd_hdr *hdr;
1969 	struct ahci_ioreq *aior;
1970 	struct ahci_port *p;
1971 	struct pci_ahci_softc *sc;
1972 	uint8_t *cfis;
1973 	uint32_t tfd;
1974 	int slot;
1975 
1976 	DPRINTF("%s %d", __func__, err);
1977 
1978 	aior = br->br_param;
1979 	p = aior->io_pr;
1980 	cfis = aior->cfis;
1981 	slot = aior->slot;
1982 	sc = p->pr_sc;
1983 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1984 
1985 	pthread_mutex_lock(&sc->mtx);
1986 
1987 	/*
1988 	 * Delete the blockif request from the busy list
1989 	 */
1990 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1991 
1992 	/*
1993 	 * Move the blockif request back to the free list
1994 	 */
1995 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1996 
1997 	if (!err)
1998 		hdr->prdbc = aior->done;
1999 
2000 	if (!err && aior->more) {
2001 		atapi_read(p, slot, cfis, aior->done);
2002 		goto out;
2003 	}
2004 
2005 	if (!err) {
2006 		tfd = ATA_S_READY | ATA_S_DSC;
2007 	} else {
2008 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
2009 		p->asc = 0x21;
2010 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
2011 	}
2012 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
2013 	ahci_write_fis_d2h(p, slot, cfis, tfd);
2014 
2015 	/*
2016 	 * This command is now complete.
2017 	 */
2018 	p->pending &= ~(1 << slot);
2019 
2020 	ahci_check_stopped(p);
2021 	ahci_handle_port(p);
2022 out:
2023 	pthread_mutex_unlock(&sc->mtx);
2024 	DPRINTF("%s exit", __func__);
2025 }
2026 
2027 static void
2028 pci_ahci_ioreq_init(struct ahci_port *pr)
2029 {
2030 	struct ahci_ioreq *vr;
2031 	int i;
2032 
2033 	pr->ioqsz = blockif_queuesz(pr->bctx);
2034 	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
2035 	STAILQ_INIT(&pr->iofhd);
2036 
2037 	/*
2038 	 * Add all i/o request entries to the free queue
2039 	 */
2040 	for (i = 0; i < pr->ioqsz; i++) {
2041 		vr = &pr->ioreq[i];
2042 		vr->io_pr = pr;
2043 		if (!pr->atapi)
2044 			vr->io_req.br_callback = ata_ioreq_cb;
2045 		else
2046 			vr->io_req.br_callback = atapi_ioreq_cb;
2047 		vr->io_req.br_param = vr;
2048 		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
2049 	}
2050 
2051 	TAILQ_INIT(&pr->iobhd);
2052 }
2053 
2054 static void
2055 pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2056 {
2057 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2058 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2059 	struct ahci_port *p = &sc->port[port];
2060 
2061 	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"",
2062 		port, offset, value);
2063 
2064 	switch (offset) {
2065 	case AHCI_P_CLB:
2066 		p->clb = value;
2067 		break;
2068 	case AHCI_P_CLBU:
2069 		p->clbu = value;
2070 		break;
2071 	case AHCI_P_FB:
2072 		p->fb = value;
2073 		break;
2074 	case AHCI_P_FBU:
2075 		p->fbu = value;
2076 		break;
2077 	case AHCI_P_IS:
2078 		p->is &= ~value;
2079 		ahci_port_intr(p);
2080 		break;
2081 	case AHCI_P_IE:
2082 		p->ie = value & 0xFDC000FF;
2083 		ahci_port_intr(p);
2084 		break;
2085 	case AHCI_P_CMD:
2086 	{
2087 		p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2088 		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2089 		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2090 		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK);
2091 		p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2092 		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2093 		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2094 		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value;
2095 
2096 		if (!(value & AHCI_P_CMD_ST)) {
2097 			ahci_port_stop(p);
2098 		} else {
2099 			uint64_t clb;
2100 
2101 			p->cmd |= AHCI_P_CMD_CR;
2102 			clb = (uint64_t)p->clbu << 32 | p->clb;
2103 			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
2104 					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
2105 		}
2106 
2107 		if (value & AHCI_P_CMD_FRE) {
2108 			uint64_t fb;
2109 
2110 			p->cmd |= AHCI_P_CMD_FR;
2111 			fb = (uint64_t)p->fbu << 32 | p->fb;
2112 			/* we don't support FBSCP, so rfis size is 256Bytes */
2113 			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
2114 		} else {
2115 			p->cmd &= ~AHCI_P_CMD_FR;
2116 		}
2117 
2118 		if (value & AHCI_P_CMD_CLO) {
2119 			p->tfd &= ~(ATA_S_BUSY | ATA_S_DRQ);
2120 			p->cmd &= ~AHCI_P_CMD_CLO;
2121 		}
2122 
2123 		if (value & AHCI_P_CMD_ICC_MASK) {
2124 			p->cmd &= ~AHCI_P_CMD_ICC_MASK;
2125 		}
2126 
2127 		ahci_handle_port(p);
2128 		break;
2129 	}
2130 	case AHCI_P_TFD:
2131 	case AHCI_P_SIG:
2132 	case AHCI_P_SSTS:
2133 		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"", offset);
2134 		break;
2135 	case AHCI_P_SCTL:
2136 		p->sctl = value;
2137 		if (!(p->cmd & AHCI_P_CMD_ST)) {
2138 			if (value & ATA_SC_DET_RESET)
2139 				ahci_port_reset(p);
2140 		}
2141 		break;
2142 	case AHCI_P_SERR:
2143 		p->serr &= ~value;
2144 		break;
2145 	case AHCI_P_SACT:
2146 		p->sact |= value;
2147 		break;
2148 	case AHCI_P_CI:
2149 		p->ci |= value;
2150 		ahci_handle_port(p);
2151 		break;
2152 	case AHCI_P_SNTF:
2153 	case AHCI_P_FBS:
2154 	default:
2155 		break;
2156 	}
2157 }
2158 
2159 static void
2160 pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2161 {
2162 	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"",
2163 		offset, value);
2164 
2165 	switch (offset) {
2166 	case AHCI_CAP:
2167 	case AHCI_PI:
2168 	case AHCI_VS:
2169 	case AHCI_CAP2:
2170 		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"", offset);
2171 		break;
2172 	case AHCI_GHC:
2173 		if (value & AHCI_GHC_HR) {
2174 			ahci_reset(sc);
2175 			break;
2176 		}
2177 		if (value & AHCI_GHC_IE)
2178 			sc->ghc |= AHCI_GHC_IE;
2179 		else
2180 			sc->ghc &= ~AHCI_GHC_IE;
2181 		ahci_generate_intr(sc, 0xffffffff);
2182 		break;
2183 	case AHCI_IS:
2184 		sc->is &= ~value;
2185 		ahci_generate_intr(sc, value);
2186 		break;
2187 	default:
2188 		break;
2189 	}
2190 }
2191 
2192 static void
2193 pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
2194 		int baridx, uint64_t offset, int size, uint64_t value)
2195 {
2196 	struct pci_ahci_softc *sc = pi->pi_arg;
2197 
2198 	assert(baridx == 5);
2199 	assert((offset % 4) == 0 && size == 4);
2200 
2201 	pthread_mutex_lock(&sc->mtx);
2202 
2203 	if (offset < AHCI_OFFSET)
2204 		pci_ahci_host_write(sc, offset, value);
2205 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2206 		pci_ahci_port_write(sc, offset, value);
2207 	else
2208 		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"", offset);
2209 
2210 	pthread_mutex_unlock(&sc->mtx);
2211 }
2212 
2213 static uint64_t
2214 pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2215 {
2216 	uint32_t value;
2217 
2218 	switch (offset) {
2219 	case AHCI_CAP:
2220 	case AHCI_GHC:
2221 	case AHCI_IS:
2222 	case AHCI_PI:
2223 	case AHCI_VS:
2224 	case AHCI_CCCC:
2225 	case AHCI_CCCP:
2226 	case AHCI_EM_LOC:
2227 	case AHCI_EM_CTL:
2228 	case AHCI_CAP2:
2229 	{
2230 		uint32_t *p = &sc->cap;
2231 		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2232 		value = *p;
2233 		break;
2234 	}
2235 	default:
2236 		value = 0;
2237 		break;
2238 	}
2239 	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x",
2240 		offset, value);
2241 
2242 	return (value);
2243 }
2244 
2245 static uint64_t
2246 pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2247 {
2248 	uint32_t value;
2249 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2250 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2251 
2252 	switch (offset) {
2253 	case AHCI_P_CLB:
2254 	case AHCI_P_CLBU:
2255 	case AHCI_P_FB:
2256 	case AHCI_P_FBU:
2257 	case AHCI_P_IS:
2258 	case AHCI_P_IE:
2259 	case AHCI_P_CMD:
2260 	case AHCI_P_TFD:
2261 	case AHCI_P_SIG:
2262 	case AHCI_P_SSTS:
2263 	case AHCI_P_SCTL:
2264 	case AHCI_P_SERR:
2265 	case AHCI_P_SACT:
2266 	case AHCI_P_CI:
2267 	case AHCI_P_SNTF:
2268 	case AHCI_P_FBS:
2269 	{
2270 		uint32_t *p= &sc->port[port].clb;
2271 		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2272 		value = *p;
2273 		break;
2274 	}
2275 	default:
2276 		value = 0;
2277 		break;
2278 	}
2279 
2280 	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x",
2281 		port, offset, value);
2282 
2283 	return value;
2284 }
2285 
2286 static uint64_t
2287 pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
2288     uint64_t regoff, int size)
2289 {
2290 	struct pci_ahci_softc *sc = pi->pi_arg;
2291 	uint64_t offset;
2292 	uint32_t value;
2293 
2294 	assert(baridx == 5);
2295 	assert(size == 1 || size == 2 || size == 4);
2296 	assert((regoff & (size - 1)) == 0);
2297 
2298 	pthread_mutex_lock(&sc->mtx);
2299 
2300 	offset = regoff & ~0x3;	    /* round down to a multiple of 4 bytes */
2301 	if (offset < AHCI_OFFSET)
2302 		value = pci_ahci_host_read(sc, offset);
2303 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2304 		value = pci_ahci_port_read(sc, offset);
2305 	else {
2306 		value = 0;
2307 		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"",
2308 		    regoff);
2309 	}
2310 	value >>= 8 * (regoff & 0x3);
2311 
2312 	pthread_mutex_unlock(&sc->mtx);
2313 
2314 	return (value);
2315 }
2316 
2317 static int
2318 pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
2319 {
2320 	char bident[sizeof("XX:XX:XX")];
2321 	struct blockif_ctxt *bctxt;
2322 	struct pci_ahci_softc *sc;
2323 	int ret, slots, p;
2324 	MD5_CTX mdctx;
2325 	u_char digest[16];
2326 	char *next, *next2;
2327 	char *bopt, *uopt, *xopts, *config;
2328 	FILE* fp;
2329 	size_t block_len;
2330 	int comma, optpos;
2331 
2332 	ret = 0;
2333 
2334 #ifdef AHCI_DEBUG
2335 	dbg = fopen("/tmp/log", "w+");
2336 #endif
2337 
2338 	sc = calloc(1, sizeof(struct pci_ahci_softc));
2339 	pi->pi_arg = sc;
2340 	sc->asc_pi = pi;
2341 	pthread_mutex_init(&sc->mtx, NULL);
2342 	sc->ports = 0;
2343 	sc->pi = 0;
2344 	slots = 32;
2345 
2346 	for (p = 0; p < MAX_PORTS && opts != NULL; p++, opts = next) {
2347 		struct ata_params *ata_ident = &sc->port[p].ata_ident;
2348 		memset(ata_ident, 0, sizeof(struct ata_params));
2349 
2350 		/* Identify and cut off type of present port. */
2351 		if (strncmp(opts, "hd:", 3) == 0) {
2352 			atapi = 0;
2353 			opts += 3;
2354 		} else if (strncmp(opts, "cd:", 3) == 0) {
2355 			atapi = 1;
2356 			opts += 3;
2357 		}
2358 
2359 		/* Find and cut off the next port options. */
2360 		next = strstr(opts, ",hd:");
2361 		next2 = strstr(opts, ",cd:");
2362 		if (next == NULL || (next2 != NULL && next2 < next))
2363 			next = next2;
2364 		if (next != NULL) {
2365 			next[0] = 0;
2366 			next++;
2367 		}
2368 
2369 		if (opts[0] == 0)
2370 			continue;
2371 
2372 		uopt = strdup(opts);
2373 		bopt = NULL;
2374 		fp = open_memstream(&bopt, &block_len);
2375 		comma = 0;
2376 		optpos = 0;
2377 
2378 		for (xopts = strtok(uopt, ",");
2379 		     xopts != NULL;
2380 		     xopts = strtok(NULL, ",")) {
2381 
2382 			/* First option assume as block filename. */
2383 			if (optpos == 0) {
2384 				/*
2385 				 * Create an identifier for the backing file.
2386 				 * Use parts of the md5 sum of the filename
2387 				 */
2388 				char ident[AHCI_PORT_IDENT];
2389 				MD5Init(&mdctx);
2390 				MD5Update(&mdctx, opts, strlen(opts));
2391 				MD5Final(digest, &mdctx);
2392 				snprintf(ident, AHCI_PORT_IDENT,
2393 					"BHYVE-%02X%02X-%02X%02X-%02X%02X",
2394 					digest[0], digest[1], digest[2], digest[3], digest[4],
2395 					digest[5]);
2396 				ata_string((uint8_t*)&ata_ident->serial, ident, 20);
2397 				ata_string((uint8_t*)&ata_ident->revision, "001", 8);
2398 				if (atapi) {
2399 					ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DVD ROM", 40);
2400 				}
2401 				else {
2402 					ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DISK", 40);
2403 				}
2404 			}
2405 
2406 			if ((config = strchr(xopts, '=')) != NULL) {
2407 				*config++ = '\0';
2408 				if (!strcmp("nmrr", xopts)) {
2409 					ata_ident->media_rotation_rate = atoi(config);
2410 				}
2411 				else if (!strcmp("ser", xopts)) {
2412 					ata_string((uint8_t*)(&ata_ident->serial), config, 20);
2413 				}
2414 				else if (!strcmp("rev", xopts)) {
2415 					ata_string((uint8_t*)(&ata_ident->revision), config, 8);
2416 				}
2417 				else if (!strcmp("model", xopts)) {
2418 					ata_string((uint8_t*)(&ata_ident->model), config, 40);
2419 				}
2420 				else {
2421 					/* Pass all other options to blockif_open. */
2422 					*--config = '=';
2423 					fprintf(fp, "%s%s", comma ? "," : "", xopts);
2424 					comma = 1;
2425 				}
2426 			}
2427 			else {
2428 				/* Pass all other options to blockif_open. */
2429 				fprintf(fp, "%s%s", comma ? "," : "", xopts);
2430 				comma = 1;
2431 			}
2432 			optpos++;
2433 		}
2434 		free(uopt);
2435 		fclose(fp);
2436 
2437 		DPRINTF("%s\n", bopt);
2438 
2439 		/*
2440 		 * Attempt to open the backing image. Use the PCI slot/func
2441 		 * and the port number for the identifier string.
2442 		 */
2443 		snprintf(bident, sizeof(bident), "%d:%d:%d", pi->pi_slot,
2444 		    pi->pi_func, p);
2445 		bctxt = blockif_open(bopt, bident);
2446 		free(bopt);
2447 
2448 		if (bctxt == NULL) {
2449 			sc->ports = p;
2450 			ret = 1;
2451 			goto open_fail;
2452 		}
2453 		sc->port[p].bctx = bctxt;
2454 		sc->port[p].pr_sc = sc;
2455 		sc->port[p].port = p;
2456 		sc->port[p].atapi = atapi;
2457 
2458 		ata_identify_init(&sc->port[p], atapi);
2459 
2460 		/*
2461 		 * Allocate blockif request structures and add them
2462 		 * to the free list
2463 		 */
2464 		pci_ahci_ioreq_init(&sc->port[p]);
2465 
2466 		sc->pi |= (1 << p);
2467 		if (sc->port[p].ioqsz < slots)
2468 			slots = sc->port[p].ioqsz;
2469 	}
2470 	sc->ports = p;
2471 
2472 	/* Intel ICH8 AHCI */
2473 	--slots;
2474 	if (sc->ports < DEF_PORTS)
2475 		sc->ports = DEF_PORTS;
2476 	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2477 	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2478 	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2479 	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2480 	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2481 
2482 	sc->vs = 0x10300;
2483 	sc->cap2 = AHCI_CAP2_APST;
2484 	ahci_reset(sc);
2485 
2486 	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2487 	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2488 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2489 	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2490 	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2491 	p = MIN(sc->ports, 16);
2492 	p = flsl(p) - ((p & (p - 1)) ? 0 : 1);
2493 	pci_emul_add_msicap(pi, 1 << p);
2494 	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2495 	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2496 
2497 	pci_lintr_request(pi);
2498 
2499 open_fail:
2500 	if (ret) {
2501 		for (p = 0; p < sc->ports; p++) {
2502 			if (sc->port[p].bctx != NULL)
2503 				blockif_close(sc->port[p].bctx);
2504 		}
2505 		free(sc);
2506 	}
2507 
2508 	return (ret);
2509 }
2510 
2511 static int
2512 pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2513 {
2514 
2515 	return (pci_ahci_init(ctx, pi, opts, 0));
2516 }
2517 
2518 static int
2519 pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2520 {
2521 
2522 	return (pci_ahci_init(ctx, pi, opts, 1));
2523 }
2524 
2525 #ifdef BHYVE_SNAPSHOT
2526 static int
2527 pci_ahci_snapshot_save_queues(struct ahci_port *port,
2528 			      struct vm_snapshot_meta *meta)
2529 {
2530 	int ret;
2531 	int idx;
2532 	struct ahci_ioreq *ioreq;
2533 
2534 	STAILQ_FOREACH(ioreq, &port->iofhd, io_flist) {
2535 		idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq);
2536 		SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
2537 	}
2538 
2539 	idx = -1;
2540 	SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
2541 
2542 	TAILQ_FOREACH(ioreq, &port->iobhd, io_blist) {
2543 		idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq);
2544 		SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
2545 
2546 		/*
2547 		 * Snapshot only the busy requests; other requests are
2548 		 * not valid.
2549 		 */
2550 		ret = blockif_snapshot_req(&ioreq->io_req, meta);
2551 		if (ret != 0) {
2552 			fprintf(stderr, "%s: failed to snapshot req\r\n",
2553 				__func__);
2554 			goto done;
2555 		}
2556 	}
2557 
2558 	idx = -1;
2559 	SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
2560 
2561 done:
2562 	return (ret);
2563 }
2564 
2565 static int
2566 pci_ahci_snapshot_restore_queues(struct ahci_port *port,
2567 				 struct vm_snapshot_meta *meta)
2568 {
2569 	int ret;
2570 	int idx;
2571 	struct ahci_ioreq *ioreq;
2572 
2573 	/* Empty the free queue before restoring. */
2574 	while (!STAILQ_EMPTY(&port->iofhd))
2575 		STAILQ_REMOVE_HEAD(&port->iofhd, io_flist);
2576 
2577 	/* Restore the free queue. */
2578 	while (1) {
2579 		SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
2580 		if (idx == -1)
2581 			break;
2582 
2583 		STAILQ_INSERT_TAIL(&port->iofhd, &port->ioreq[idx], io_flist);
2584 	}
2585 
2586 	/* Restore the busy queue. */
2587 	while (1) {
2588 		SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
2589 		if (idx == -1)
2590 			break;
2591 
2592 		ioreq = &port->ioreq[idx];
2593 		TAILQ_INSERT_TAIL(&port->iobhd, ioreq, io_blist);
2594 
2595 		/*
2596 		 * Restore only the busy requests; other requests are
2597 		 * not valid.
2598 		 */
2599 		ret = blockif_snapshot_req(&ioreq->io_req, meta);
2600 		if (ret != 0) {
2601 			fprintf(stderr, "%s: failed to restore request\r\n",
2602 				__func__);
2603 			goto done;
2604 		}
2605 
2606 		/* Re-enqueue the requests in the block interface. */
2607 		if (ioreq->readop)
2608 			ret = blockif_read(port->bctx, &ioreq->io_req);
2609 		else
2610 			ret = blockif_write(port->bctx, &ioreq->io_req);
2611 
2612 		if (ret != 0) {
2613 			fprintf(stderr,
2614 				"%s: failed to re-enqueue request\r\n",
2615 				__func__);
2616 			goto done;
2617 		}
2618 	}
2619 
2620 done:
2621 	return (ret);
2622 }
2623 
2624 static int
2625 pci_ahci_snapshot(struct vm_snapshot_meta *meta)
2626 {
2627 	int i, j, ret;
2628 	void *bctx;
2629 	struct pci_devinst *pi;
2630 	struct pci_ahci_softc *sc;
2631 	struct ahci_port *port;
2632 	struct ahci_cmd_hdr *hdr;
2633 	struct ahci_ioreq *ioreq;
2634 
2635 	pi = meta->dev_data;
2636 	sc = pi->pi_arg;
2637 
2638 	/* TODO: add mtx lock/unlock */
2639 
2640 	SNAPSHOT_VAR_OR_LEAVE(sc->ports, meta, ret, done);
2641 	SNAPSHOT_VAR_OR_LEAVE(sc->cap, meta, ret, done);
2642 	SNAPSHOT_VAR_OR_LEAVE(sc->ghc, meta, ret, done);
2643 	SNAPSHOT_VAR_OR_LEAVE(sc->is, meta, ret, done);
2644 	SNAPSHOT_VAR_OR_LEAVE(sc->pi, meta, ret, done);
2645 	SNAPSHOT_VAR_OR_LEAVE(sc->vs, meta, ret, done);
2646 	SNAPSHOT_VAR_OR_LEAVE(sc->ccc_ctl, meta, ret, done);
2647 	SNAPSHOT_VAR_OR_LEAVE(sc->ccc_pts, meta, ret, done);
2648 	SNAPSHOT_VAR_OR_LEAVE(sc->em_loc, meta, ret, done);
2649 	SNAPSHOT_VAR_OR_LEAVE(sc->em_ctl, meta, ret, done);
2650 	SNAPSHOT_VAR_OR_LEAVE(sc->cap2, meta, ret, done);
2651 	SNAPSHOT_VAR_OR_LEAVE(sc->bohc, meta, ret, done);
2652 	SNAPSHOT_VAR_OR_LEAVE(sc->lintr, meta, ret, done);
2653 
2654 	for (i = 0; i < MAX_PORTS; i++) {
2655 		port = &sc->port[i];
2656 
2657 		if (meta->op == VM_SNAPSHOT_SAVE)
2658 			bctx = port->bctx;
2659 
2660 		SNAPSHOT_VAR_OR_LEAVE(bctx, meta, ret, done);
2661 		SNAPSHOT_VAR_OR_LEAVE(port->port, meta, ret, done);
2662 
2663 		/* Mostly for restore; save is ensured by the lines above. */
2664 		if (((bctx == NULL) && (port->bctx != NULL)) ||
2665 		    ((bctx != NULL) && (port->bctx == NULL))) {
2666 			fprintf(stderr, "%s: ports not matching\r\n", __func__);
2667 			ret = EINVAL;
2668 			goto done;
2669 		}
2670 
2671 		if (port->bctx == NULL)
2672 			continue;
2673 
2674 		if (port->port != i) {
2675 			fprintf(stderr, "%s: ports not matching: "
2676 					"actual: %d expected: %d\r\n",
2677 					__func__, port->port, i);
2678 			ret = EINVAL;
2679 			goto done;
2680 		}
2681 
2682 		SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(port->cmd_lst,
2683 			AHCI_CL_SIZE * AHCI_MAX_SLOTS, false, meta, ret, done);
2684 		SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(port->rfis, 256, false, meta,
2685 			ret, done);
2686 
2687 		SNAPSHOT_VAR_OR_LEAVE(port->ata_ident, meta, ret, done);
2688 		SNAPSHOT_VAR_OR_LEAVE(port->atapi, meta, ret, done);
2689 		SNAPSHOT_VAR_OR_LEAVE(port->reset, meta, ret, done);
2690 		SNAPSHOT_VAR_OR_LEAVE(port->waitforclear, meta, ret, done);
2691 		SNAPSHOT_VAR_OR_LEAVE(port->mult_sectors, meta, ret, done);
2692 		SNAPSHOT_VAR_OR_LEAVE(port->xfermode, meta, ret, done);
2693 		SNAPSHOT_VAR_OR_LEAVE(port->err_cfis, meta, ret, done);
2694 		SNAPSHOT_VAR_OR_LEAVE(port->sense_key, meta, ret, done);
2695 		SNAPSHOT_VAR_OR_LEAVE(port->asc, meta, ret, done);
2696 		SNAPSHOT_VAR_OR_LEAVE(port->ccs, meta, ret, done);
2697 		SNAPSHOT_VAR_OR_LEAVE(port->pending, meta, ret, done);
2698 
2699 		SNAPSHOT_VAR_OR_LEAVE(port->clb, meta, ret, done);
2700 		SNAPSHOT_VAR_OR_LEAVE(port->clbu, meta, ret, done);
2701 		SNAPSHOT_VAR_OR_LEAVE(port->fb, meta, ret, done);
2702 		SNAPSHOT_VAR_OR_LEAVE(port->fbu, meta, ret, done);
2703 		SNAPSHOT_VAR_OR_LEAVE(port->ie, meta, ret, done);
2704 		SNAPSHOT_VAR_OR_LEAVE(port->cmd, meta, ret, done);
2705 		SNAPSHOT_VAR_OR_LEAVE(port->unused0, meta, ret, done);
2706 		SNAPSHOT_VAR_OR_LEAVE(port->tfd, meta, ret, done);
2707 		SNAPSHOT_VAR_OR_LEAVE(port->sig, meta, ret, done);
2708 		SNAPSHOT_VAR_OR_LEAVE(port->ssts, meta, ret, done);
2709 		SNAPSHOT_VAR_OR_LEAVE(port->sctl, meta, ret, done);
2710 		SNAPSHOT_VAR_OR_LEAVE(port->serr, meta, ret, done);
2711 		SNAPSHOT_VAR_OR_LEAVE(port->sact, meta, ret, done);
2712 		SNAPSHOT_VAR_OR_LEAVE(port->ci, meta, ret, done);
2713 		SNAPSHOT_VAR_OR_LEAVE(port->sntf, meta, ret, done);
2714 		SNAPSHOT_VAR_OR_LEAVE(port->fbs, meta, ret, done);
2715 		SNAPSHOT_VAR_OR_LEAVE(port->ioqsz, meta, ret, done);
2716 
2717 		for (j = 0; j < port->ioqsz; j++) {
2718 			ioreq = &port->ioreq[j];
2719 
2720 			/* blockif_req snapshot done only for busy requests. */
2721 			hdr = (struct ahci_cmd_hdr *)(port->cmd_lst +
2722 				ioreq->slot * AHCI_CL_SIZE);
2723 			SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(ioreq->cfis,
2724 				0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry),
2725 				false, meta, ret, done);
2726 
2727 			SNAPSHOT_VAR_OR_LEAVE(ioreq->len, meta, ret, done);
2728 			SNAPSHOT_VAR_OR_LEAVE(ioreq->done, meta, ret, done);
2729 			SNAPSHOT_VAR_OR_LEAVE(ioreq->slot, meta, ret, done);
2730 			SNAPSHOT_VAR_OR_LEAVE(ioreq->more, meta, ret, done);
2731 			SNAPSHOT_VAR_OR_LEAVE(ioreq->readop, meta, ret, done);
2732 		}
2733 
2734 		/* Perform save / restore specific operations. */
2735 		if (meta->op == VM_SNAPSHOT_SAVE) {
2736 			ret = pci_ahci_snapshot_save_queues(port, meta);
2737 			if (ret != 0)
2738 				goto done;
2739 		} else if (meta->op == VM_SNAPSHOT_RESTORE) {
2740 			ret = pci_ahci_snapshot_restore_queues(port, meta);
2741 			if (ret != 0)
2742 				goto done;
2743 		} else {
2744 			ret = EINVAL;
2745 			goto done;
2746 		}
2747 
2748 		ret = blockif_snapshot(port->bctx, meta);
2749 		if (ret != 0) {
2750 			fprintf(stderr, "%s: failed to restore blockif\r\n",
2751 				__func__);
2752 			goto done;
2753 		}
2754 	}
2755 
2756 done:
2757 	return (ret);
2758 }
2759 
2760 static int
2761 pci_ahci_pause(struct vmctx *ctx, struct pci_devinst *pi)
2762 {
2763 	struct pci_ahci_softc *sc;
2764 	struct blockif_ctxt *bctxt;
2765 	int i;
2766 
2767 	sc = pi->pi_arg;
2768 
2769 	for (i = 0; i < MAX_PORTS; i++) {
2770 		bctxt = sc->port[i].bctx;
2771 		if (bctxt == NULL)
2772 			continue;
2773 
2774 		blockif_pause(bctxt);
2775 	}
2776 
2777 	return (0);
2778 }
2779 
2780 static int
2781 pci_ahci_resume(struct vmctx *ctx, struct pci_devinst *pi)
2782 {
2783 	struct pci_ahci_softc *sc;
2784 	struct blockif_ctxt *bctxt;
2785 	int i;
2786 
2787 	sc = pi->pi_arg;
2788 
2789 	for (i = 0; i < MAX_PORTS; i++) {
2790 		bctxt = sc->port[i].bctx;
2791 		if (bctxt == NULL)
2792 			continue;
2793 
2794 		blockif_resume(bctxt);
2795 	}
2796 
2797 	return (0);
2798 }
2799 #endif
2800 
2801 /*
2802  * Use separate emulation names to distinguish drive and atapi devices
2803  */
2804 struct pci_devemu pci_de_ahci = {
2805 	.pe_emu =	"ahci",
2806 	.pe_init =	pci_ahci_hd_init,
2807 	.pe_barwrite =	pci_ahci_write,
2808 	.pe_barread =	pci_ahci_read,
2809 #ifdef BHYVE_SNAPSHOT
2810 	.pe_snapshot =	pci_ahci_snapshot,
2811 	.pe_pause =	pci_ahci_pause,
2812 	.pe_resume =	pci_ahci_resume,
2813 #endif
2814 };
2815 PCI_EMUL_SET(pci_de_ahci);
2816 
2817 struct pci_devemu pci_de_ahci_hd = {
2818 	.pe_emu =	"ahci-hd",
2819 	.pe_init =	pci_ahci_hd_init,
2820 	.pe_barwrite =	pci_ahci_write,
2821 	.pe_barread =	pci_ahci_read,
2822 #ifdef BHYVE_SNAPSHOT
2823 	.pe_snapshot =	pci_ahci_snapshot,
2824 	.pe_pause =	pci_ahci_pause,
2825 	.pe_resume =	pci_ahci_resume,
2826 #endif
2827 };
2828 PCI_EMUL_SET(pci_de_ahci_hd);
2829 
2830 struct pci_devemu pci_de_ahci_cd = {
2831 	.pe_emu =	"ahci-cd",
2832 	.pe_init =	pci_ahci_atapi_init,
2833 	.pe_barwrite =	pci_ahci_write,
2834 	.pe_barread =	pci_ahci_read,
2835 #ifdef BHYVE_SNAPSHOT
2836 	.pe_snapshot =	pci_ahci_snapshot,
2837 	.pe_pause =	pci_ahci_pause,
2838 	.pe_resume =	pci_ahci_resume,
2839 #endif
2840 };
2841 PCI_EMUL_SET(pci_de_ahci_cd);
2842