xref: /freebsd/usr.sbin/bhyve/pci_ahci.c (revision 1323ec571215a77ddd21294f0871979d5ad6b992)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
5  * Copyright (c) 2015-2016 Alexander Motin <mav@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/linker_set.h>
37 #include <sys/stat.h>
38 #include <sys/uio.h>
39 #include <sys/ioctl.h>
40 #include <sys/disk.h>
41 #include <sys/ata.h>
42 #include <sys/endian.h>
43 
44 #include <machine/vmm_snapshot.h>
45 
46 #include <errno.h>
47 #include <fcntl.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <stdint.h>
51 #include <string.h>
52 #include <strings.h>
53 #include <unistd.h>
54 #include <assert.h>
55 #include <pthread.h>
56 #include <pthread_np.h>
57 #include <inttypes.h>
58 #include <md5.h>
59 
60 #include "bhyverun.h"
61 #include "config.h"
62 #include "debug.h"
63 #include "pci_emul.h"
64 #include "ahci.h"
65 #include "block_if.h"
66 
67 #define	DEF_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
68 #define	MAX_PORTS	32	/* AHCI supports 32 ports */
69 
70 #define	PxSIG_ATA	0x00000101 /* ATA drive */
71 #define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
72 
73 enum sata_fis_type {
74 	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
75 	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
76 	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
77 	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
78 	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
79 	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
80 	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
81 	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
82 };
83 
84 /*
85  * SCSI opcodes
86  */
87 #define	TEST_UNIT_READY		0x00
88 #define	REQUEST_SENSE		0x03
89 #define	INQUIRY			0x12
90 #define	START_STOP_UNIT		0x1B
91 #define	PREVENT_ALLOW		0x1E
92 #define	READ_CAPACITY		0x25
93 #define	READ_10			0x28
94 #define	POSITION_TO_ELEMENT	0x2B
95 #define	READ_TOC		0x43
96 #define	GET_EVENT_STATUS_NOTIFICATION 0x4A
97 #define	MODE_SENSE_10		0x5A
98 #define	REPORT_LUNS		0xA0
99 #define	READ_12			0xA8
100 #define	READ_CD			0xBE
101 
102 /*
103  * SCSI mode page codes
104  */
105 #define	MODEPAGE_RW_ERROR_RECOVERY	0x01
106 #define	MODEPAGE_CD_CAPABILITIES	0x2A
107 
108 /*
109  * ATA commands
110  */
111 #define	ATA_SF_ENAB_SATA_SF		0x10
112 #define	ATA_SATA_SF_AN			0x05
113 #define	ATA_SF_DIS_SATA_SF		0x90
114 
115 /*
116  * Debug printf
117  */
118 #ifdef AHCI_DEBUG
119 static FILE *dbg;
120 #define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
121 #else
122 #define DPRINTF(format, arg...)
123 #endif
124 #define WPRINTF(format, arg...) printf(format, ##arg)
125 
126 #define AHCI_PORT_IDENT 20 + 1
127 
128 struct ahci_ioreq {
129 	struct blockif_req io_req;
130 	struct ahci_port *io_pr;
131 	STAILQ_ENTRY(ahci_ioreq) io_flist;
132 	TAILQ_ENTRY(ahci_ioreq) io_blist;
133 	uint8_t *cfis;
134 	uint32_t len;
135 	uint32_t done;
136 	int slot;
137 	int more;
138 	int readop;
139 };
140 
141 struct ahci_port {
142 	struct blockif_ctxt *bctx;
143 	struct pci_ahci_softc *pr_sc;
144 	struct ata_params ata_ident;
145 	uint8_t *cmd_lst;
146 	uint8_t *rfis;
147 	int port;
148 	int atapi;
149 	int reset;
150 	int waitforclear;
151 	int mult_sectors;
152 	uint8_t xfermode;
153 	uint8_t err_cfis[20];
154 	uint8_t sense_key;
155 	uint8_t asc;
156 	u_int ccs;
157 	uint32_t pending;
158 
159 	uint32_t clb;
160 	uint32_t clbu;
161 	uint32_t fb;
162 	uint32_t fbu;
163 	uint32_t is;
164 	uint32_t ie;
165 	uint32_t cmd;
166 	uint32_t unused0;
167 	uint32_t tfd;
168 	uint32_t sig;
169 	uint32_t ssts;
170 	uint32_t sctl;
171 	uint32_t serr;
172 	uint32_t sact;
173 	uint32_t ci;
174 	uint32_t sntf;
175 	uint32_t fbs;
176 
177 	/*
178 	 * i/o request info
179 	 */
180 	struct ahci_ioreq *ioreq;
181 	int ioqsz;
182 	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
183 	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
184 };
185 
186 struct ahci_cmd_hdr {
187 	uint16_t flags;
188 	uint16_t prdtl;
189 	uint32_t prdbc;
190 	uint64_t ctba;
191 	uint32_t reserved[4];
192 };
193 
194 struct ahci_prdt_entry {
195 	uint64_t dba;
196 	uint32_t reserved;
197 #define	DBCMASK		0x3fffff
198 	uint32_t dbc;
199 };
200 
201 struct pci_ahci_softc {
202 	struct pci_devinst *asc_pi;
203 	pthread_mutex_t	mtx;
204 	int ports;
205 	uint32_t cap;
206 	uint32_t ghc;
207 	uint32_t is;
208 	uint32_t pi;
209 	uint32_t vs;
210 	uint32_t ccc_ctl;
211 	uint32_t ccc_pts;
212 	uint32_t em_loc;
213 	uint32_t em_ctl;
214 	uint32_t cap2;
215 	uint32_t bohc;
216 	uint32_t lintr;
217 	struct ahci_port port[MAX_PORTS];
218 };
219 #define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
220 
221 static void ahci_handle_port(struct ahci_port *p);
222 
223 static inline void lba_to_msf(uint8_t *buf, int lba)
224 {
225 	lba += 150;
226 	buf[0] = (lba / 75) / 60;
227 	buf[1] = (lba / 75) % 60;
228 	buf[2] = lba % 75;
229 }
230 
231 /*
232  * Generate HBA interrupts on global IS register write.
233  */
234 static void
235 ahci_generate_intr(struct pci_ahci_softc *sc, uint32_t mask)
236 {
237 	struct pci_devinst *pi = sc->asc_pi;
238 	struct ahci_port *p;
239 	int i, nmsg;
240 	uint32_t mmask;
241 
242 	/* Update global IS from PxIS/PxIE. */
243 	for (i = 0; i < sc->ports; i++) {
244 		p = &sc->port[i];
245 		if (p->is & p->ie)
246 			sc->is |= (1 << i);
247 	}
248 	DPRINTF("%s(%08x) %08x", __func__, mask, sc->is);
249 
250 	/* If there is nothing enabled -- clear legacy interrupt and exit. */
251 	if (sc->is == 0 || (sc->ghc & AHCI_GHC_IE) == 0) {
252 		if (sc->lintr) {
253 			pci_lintr_deassert(pi);
254 			sc->lintr = 0;
255 		}
256 		return;
257 	}
258 
259 	/* If there is anything and no MSI -- assert legacy interrupt. */
260 	nmsg = pci_msi_maxmsgnum(pi);
261 	if (nmsg == 0) {
262 		if (!sc->lintr) {
263 			sc->lintr = 1;
264 			pci_lintr_assert(pi);
265 		}
266 		return;
267 	}
268 
269 	/* Assert respective MSIs for ports that were touched. */
270 	for (i = 0; i < nmsg; i++) {
271 		if (sc->ports <= nmsg || i < nmsg - 1)
272 			mmask = 1 << i;
273 		else
274 			mmask = 0xffffffff << i;
275 		if (sc->is & mask && mmask & mask)
276 			pci_generate_msi(pi, i);
277 	}
278 }
279 
280 /*
281  * Generate HBA interrupt on specific port event.
282  */
283 static void
284 ahci_port_intr(struct ahci_port *p)
285 {
286 	struct pci_ahci_softc *sc = p->pr_sc;
287 	struct pci_devinst *pi = sc->asc_pi;
288 	int nmsg;
289 
290 	DPRINTF("%s(%d) %08x/%08x %08x", __func__,
291 	    p->port, p->is, p->ie, sc->is);
292 
293 	/* If there is nothing enabled -- we are done. */
294 	if ((p->is & p->ie) == 0)
295 		return;
296 
297 	/* In case of non-shared MSI always generate interrupt. */
298 	nmsg = pci_msi_maxmsgnum(pi);
299 	if (sc->ports <= nmsg || p->port < nmsg - 1) {
300 		sc->is |= (1 << p->port);
301 		if ((sc->ghc & AHCI_GHC_IE) == 0)
302 			return;
303 		pci_generate_msi(pi, p->port);
304 		return;
305 	}
306 
307 	/* If IS for this port is already set -- do nothing. */
308 	if (sc->is & (1 << p->port))
309 		return;
310 
311 	sc->is |= (1 << p->port);
312 
313 	/* If interrupts are enabled -- generate one. */
314 	if ((sc->ghc & AHCI_GHC_IE) == 0)
315 		return;
316 	if (nmsg > 0) {
317 		pci_generate_msi(pi, nmsg - 1);
318 	} else if (!sc->lintr) {
319 		sc->lintr = 1;
320 		pci_lintr_assert(pi);
321 	}
322 }
323 
324 static void
325 ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
326 {
327 	int offset, len, irq;
328 
329 	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
330 		return;
331 
332 	switch (ft) {
333 	case FIS_TYPE_REGD2H:
334 		offset = 0x40;
335 		len = 20;
336 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0;
337 		break;
338 	case FIS_TYPE_SETDEVBITS:
339 		offset = 0x58;
340 		len = 8;
341 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0;
342 		break;
343 	case FIS_TYPE_PIOSETUP:
344 		offset = 0x20;
345 		len = 20;
346 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0;
347 		break;
348 	default:
349 		WPRINTF("unsupported fis type %d", ft);
350 		return;
351 	}
352 	if (fis[2] & ATA_S_ERROR) {
353 		p->waitforclear = 1;
354 		irq |= AHCI_P_IX_TFE;
355 	}
356 	memcpy(p->rfis + offset, fis, len);
357 	if (irq) {
358 		if (~p->is & irq) {
359 			p->is |= irq;
360 			ahci_port_intr(p);
361 		}
362 	}
363 }
364 
365 static void
366 ahci_write_fis_piosetup(struct ahci_port *p)
367 {
368 	uint8_t fis[20];
369 
370 	memset(fis, 0, sizeof(fis));
371 	fis[0] = FIS_TYPE_PIOSETUP;
372 	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
373 }
374 
375 static void
376 ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
377 {
378 	uint8_t fis[8];
379 	uint8_t error;
380 
381 	error = (tfd >> 8) & 0xff;
382 	tfd &= 0x77;
383 	memset(fis, 0, sizeof(fis));
384 	fis[0] = FIS_TYPE_SETDEVBITS;
385 	fis[1] = (1 << 6);
386 	fis[2] = tfd;
387 	fis[3] = error;
388 	if (fis[2] & ATA_S_ERROR) {
389 		p->err_cfis[0] = slot;
390 		p->err_cfis[2] = tfd;
391 		p->err_cfis[3] = error;
392 		memcpy(&p->err_cfis[4], cfis + 4, 16);
393 	} else {
394 		*(uint32_t *)(fis + 4) = (1 << slot);
395 		p->sact &= ~(1 << slot);
396 	}
397 	p->tfd &= ~0x77;
398 	p->tfd |= tfd;
399 	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
400 }
401 
402 static void
403 ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
404 {
405 	uint8_t fis[20];
406 	uint8_t error;
407 
408 	error = (tfd >> 8) & 0xff;
409 	memset(fis, 0, sizeof(fis));
410 	fis[0] = FIS_TYPE_REGD2H;
411 	fis[1] = (1 << 6);
412 	fis[2] = tfd & 0xff;
413 	fis[3] = error;
414 	fis[4] = cfis[4];
415 	fis[5] = cfis[5];
416 	fis[6] = cfis[6];
417 	fis[7] = cfis[7];
418 	fis[8] = cfis[8];
419 	fis[9] = cfis[9];
420 	fis[10] = cfis[10];
421 	fis[11] = cfis[11];
422 	fis[12] = cfis[12];
423 	fis[13] = cfis[13];
424 	if (fis[2] & ATA_S_ERROR) {
425 		p->err_cfis[0] = 0x80;
426 		p->err_cfis[2] = tfd & 0xff;
427 		p->err_cfis[3] = error;
428 		memcpy(&p->err_cfis[4], cfis + 4, 16);
429 	} else
430 		p->ci &= ~(1 << slot);
431 	p->tfd = tfd;
432 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
433 }
434 
435 static void
436 ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot)
437 {
438 	uint8_t fis[20];
439 
440 	p->tfd = ATA_S_READY | ATA_S_DSC;
441 	memset(fis, 0, sizeof(fis));
442 	fis[0] = FIS_TYPE_REGD2H;
443 	fis[1] = 0;			/* No interrupt */
444 	fis[2] = p->tfd;		/* Status */
445 	fis[3] = 0;			/* No error */
446 	p->ci &= ~(1 << slot);
447 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
448 }
449 
450 static void
451 ahci_write_reset_fis_d2h(struct ahci_port *p)
452 {
453 	uint8_t fis[20];
454 
455 	memset(fis, 0, sizeof(fis));
456 	fis[0] = FIS_TYPE_REGD2H;
457 	fis[3] = 1;
458 	fis[4] = 1;
459 	if (p->atapi) {
460 		fis[5] = 0x14;
461 		fis[6] = 0xeb;
462 	}
463 	fis[12] = 1;
464 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
465 }
466 
467 static void
468 ahci_check_stopped(struct ahci_port *p)
469 {
470 	/*
471 	 * If we are no longer processing the command list and nothing
472 	 * is in-flight, clear the running bit, the current command
473 	 * slot, the command issue and active bits.
474 	 */
475 	if (!(p->cmd & AHCI_P_CMD_ST)) {
476 		if (p->pending == 0) {
477 			p->ccs = 0;
478 			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
479 			p->ci = 0;
480 			p->sact = 0;
481 			p->waitforclear = 0;
482 		}
483 	}
484 }
485 
486 static void
487 ahci_port_stop(struct ahci_port *p)
488 {
489 	struct ahci_ioreq *aior;
490 	uint8_t *cfis;
491 	int slot;
492 	int error;
493 
494 	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
495 
496 	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
497 		/*
498 		 * Try to cancel the outstanding blockif request.
499 		 */
500 		error = blockif_cancel(p->bctx, &aior->io_req);
501 		if (error != 0)
502 			continue;
503 
504 		slot = aior->slot;
505 		cfis = aior->cfis;
506 		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
507 		    cfis[2] == ATA_READ_FPDMA_QUEUED ||
508 		    cfis[2] == ATA_SEND_FPDMA_QUEUED)
509 			p->sact &= ~(1 << slot);	/* NCQ */
510 		else
511 			p->ci &= ~(1 << slot);
512 
513 		/*
514 		 * This command is now done.
515 		 */
516 		p->pending &= ~(1 << slot);
517 
518 		/*
519 		 * Delete the blockif request from the busy list
520 		 */
521 		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
522 
523 		/*
524 		 * Move the blockif request back to the free list
525 		 */
526 		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
527 	}
528 
529 	ahci_check_stopped(p);
530 }
531 
532 static void
533 ahci_port_reset(struct ahci_port *pr)
534 {
535 	pr->serr = 0;
536 	pr->sact = 0;
537 	pr->xfermode = ATA_UDMA6;
538 	pr->mult_sectors = 128;
539 
540 	if (!pr->bctx) {
541 		pr->ssts = ATA_SS_DET_NO_DEVICE;
542 		pr->sig = 0xFFFFFFFF;
543 		pr->tfd = 0x7F;
544 		return;
545 	}
546 	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
547 	if (pr->sctl & ATA_SC_SPD_MASK)
548 		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
549 	else
550 		pr->ssts |= ATA_SS_SPD_GEN3;
551 	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
552 	if (!pr->atapi) {
553 		pr->sig = PxSIG_ATA;
554 		pr->tfd |= ATA_S_READY;
555 	} else
556 		pr->sig = PxSIG_ATAPI;
557 	ahci_write_reset_fis_d2h(pr);
558 }
559 
560 static void
561 ahci_reset(struct pci_ahci_softc *sc)
562 {
563 	int i;
564 
565 	sc->ghc = AHCI_GHC_AE;
566 	sc->is = 0;
567 
568 	if (sc->lintr) {
569 		pci_lintr_deassert(sc->asc_pi);
570 		sc->lintr = 0;
571 	}
572 
573 	for (i = 0; i < sc->ports; i++) {
574 		sc->port[i].ie = 0;
575 		sc->port[i].is = 0;
576 		sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD);
577 		if (sc->port[i].bctx)
578 			sc->port[i].cmd |= AHCI_P_CMD_CPS;
579 		sc->port[i].sctl = 0;
580 		ahci_port_reset(&sc->port[i]);
581 	}
582 }
583 
584 static void
585 ata_string(uint8_t *dest, const char *src, int len)
586 {
587 	int i;
588 
589 	for (i = 0; i < len; i++) {
590 		if (*src)
591 			dest[i ^ 1] = *src++;
592 		else
593 			dest[i ^ 1] = ' ';
594 	}
595 }
596 
597 static void
598 atapi_string(uint8_t *dest, const char *src, int len)
599 {
600 	int i;
601 
602 	for (i = 0; i < len; i++) {
603 		if (*src)
604 			dest[i] = *src++;
605 		else
606 			dest[i] = ' ';
607 	}
608 }
609 
610 /*
611  * Build up the iovec based on the PRDT, 'done' and 'len'.
612  */
613 static void
614 ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
615     struct ahci_prdt_entry *prdt, uint16_t prdtl)
616 {
617 	struct blockif_req *breq = &aior->io_req;
618 	int i, j, skip, todo, left, extra;
619 	uint32_t dbcsz;
620 
621 	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
622 	skip = aior->done;
623 	left = aior->len - aior->done;
624 	todo = 0;
625 	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
626 	    i++, prdt++) {
627 		dbcsz = (prdt->dbc & DBCMASK) + 1;
628 		/* Skip already done part of the PRDT */
629 		if (dbcsz <= skip) {
630 			skip -= dbcsz;
631 			continue;
632 		}
633 		dbcsz -= skip;
634 		if (dbcsz > left)
635 			dbcsz = left;
636 		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
637 		    prdt->dba + skip, dbcsz);
638 		breq->br_iov[j].iov_len = dbcsz;
639 		todo += dbcsz;
640 		left -= dbcsz;
641 		skip = 0;
642 		j++;
643 	}
644 
645 	/* If we got limited by IOV length, round I/O down to sector size. */
646 	if (j == BLOCKIF_IOV_MAX) {
647 		extra = todo % blockif_sectsz(p->bctx);
648 		todo -= extra;
649 		assert(todo > 0);
650 		while (extra > 0) {
651 			if (breq->br_iov[j - 1].iov_len > extra) {
652 				breq->br_iov[j - 1].iov_len -= extra;
653 				break;
654 			}
655 			extra -= breq->br_iov[j - 1].iov_len;
656 			j--;
657 		}
658 	}
659 
660 	breq->br_iovcnt = j;
661 	breq->br_resid = todo;
662 	aior->done += todo;
663 	aior->more = (aior->done < aior->len && i < prdtl);
664 }
665 
666 static void
667 ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
668 {
669 	struct ahci_ioreq *aior;
670 	struct blockif_req *breq;
671 	struct ahci_prdt_entry *prdt;
672 	struct ahci_cmd_hdr *hdr;
673 	uint64_t lba;
674 	uint32_t len;
675 	int err, first, ncq, readop;
676 
677 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
678 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
679 	ncq = 0;
680 	readop = 1;
681 	first = (done == 0);
682 
683 	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
684 	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
685 	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
686 	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
687 		readop = 0;
688 
689 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
690 	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
691 		lba = ((uint64_t)cfis[10] << 40) |
692 			((uint64_t)cfis[9] << 32) |
693 			((uint64_t)cfis[8] << 24) |
694 			((uint64_t)cfis[6] << 16) |
695 			((uint64_t)cfis[5] << 8) |
696 			cfis[4];
697 		len = cfis[11] << 8 | cfis[3];
698 		if (!len)
699 			len = 65536;
700 		ncq = 1;
701 	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
702 	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
703 	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
704 		lba = ((uint64_t)cfis[10] << 40) |
705 			((uint64_t)cfis[9] << 32) |
706 			((uint64_t)cfis[8] << 24) |
707 			((uint64_t)cfis[6] << 16) |
708 			((uint64_t)cfis[5] << 8) |
709 			cfis[4];
710 		len = cfis[13] << 8 | cfis[12];
711 		if (!len)
712 			len = 65536;
713 	} else {
714 		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
715 			(cfis[5] << 8) | cfis[4];
716 		len = cfis[12];
717 		if (!len)
718 			len = 256;
719 	}
720 	lba *= blockif_sectsz(p->bctx);
721 	len *= blockif_sectsz(p->bctx);
722 
723 	/* Pull request off free list */
724 	aior = STAILQ_FIRST(&p->iofhd);
725 	assert(aior != NULL);
726 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
727 
728 	aior->cfis = cfis;
729 	aior->slot = slot;
730 	aior->len = len;
731 	aior->done = done;
732 	aior->readop = readop;
733 	breq = &aior->io_req;
734 	breq->br_offset = lba + done;
735 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
736 
737 	/* Mark this command in-flight. */
738 	p->pending |= 1 << slot;
739 
740 	/* Stuff request onto busy list. */
741 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
742 
743 	if (ncq && first)
744 		ahci_write_fis_d2h_ncq(p, slot);
745 
746 	if (readop)
747 		err = blockif_read(p->bctx, breq);
748 	else
749 		err = blockif_write(p->bctx, breq);
750 	assert(err == 0);
751 }
752 
753 static void
754 ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
755 {
756 	struct ahci_ioreq *aior;
757 	struct blockif_req *breq;
758 	int err;
759 
760 	/*
761 	 * Pull request off free list
762 	 */
763 	aior = STAILQ_FIRST(&p->iofhd);
764 	assert(aior != NULL);
765 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
766 	aior->cfis = cfis;
767 	aior->slot = slot;
768 	aior->len = 0;
769 	aior->done = 0;
770 	aior->more = 0;
771 	breq = &aior->io_req;
772 
773 	/*
774 	 * Mark this command in-flight.
775 	 */
776 	p->pending |= 1 << slot;
777 
778 	/*
779 	 * Stuff request onto busy list
780 	 */
781 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
782 
783 	err = blockif_flush(p->bctx, breq);
784 	assert(err == 0);
785 }
786 
787 static inline void
788 read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
789 		void *buf, int size)
790 {
791 	struct ahci_cmd_hdr *hdr;
792 	struct ahci_prdt_entry *prdt;
793 	void *to;
794 	int i, len;
795 
796 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
797 	len = size;
798 	to = buf;
799 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
800 	for (i = 0; i < hdr->prdtl && len; i++) {
801 		uint8_t *ptr;
802 		uint32_t dbcsz;
803 		int sublen;
804 
805 		dbcsz = (prdt->dbc & DBCMASK) + 1;
806 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
807 		sublen = MIN(len, dbcsz);
808 		memcpy(to, ptr, sublen);
809 		len -= sublen;
810 		to += sublen;
811 		prdt++;
812 	}
813 }
814 
815 static void
816 ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
817 {
818 	struct ahci_ioreq *aior;
819 	struct blockif_req *breq;
820 	uint8_t *entry;
821 	uint64_t elba;
822 	uint32_t len, elen;
823 	int err, first, ncq;
824 	uint8_t buf[512];
825 
826 	first = (done == 0);
827 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
828 		len = (uint16_t)cfis[13] << 8 | cfis[12];
829 		len *= 512;
830 		ncq = 0;
831 	} else { /* ATA_SEND_FPDMA_QUEUED */
832 		len = (uint16_t)cfis[11] << 8 | cfis[3];
833 		len *= 512;
834 		ncq = 1;
835 	}
836 	read_prdt(p, slot, cfis, buf, sizeof(buf));
837 
838 next:
839 	entry = &buf[done];
840 	elba = ((uint64_t)entry[5] << 40) |
841 		((uint64_t)entry[4] << 32) |
842 		((uint64_t)entry[3] << 24) |
843 		((uint64_t)entry[2] << 16) |
844 		((uint64_t)entry[1] << 8) |
845 		entry[0];
846 	elen = (uint16_t)entry[7] << 8 | entry[6];
847 	done += 8;
848 	if (elen == 0) {
849 		if (done >= len) {
850 			if (ncq) {
851 				if (first)
852 					ahci_write_fis_d2h_ncq(p, slot);
853 				ahci_write_fis_sdb(p, slot, cfis,
854 				    ATA_S_READY | ATA_S_DSC);
855 			} else {
856 				ahci_write_fis_d2h(p, slot, cfis,
857 				    ATA_S_READY | ATA_S_DSC);
858 			}
859 			p->pending &= ~(1 << slot);
860 			ahci_check_stopped(p);
861 			if (!first)
862 				ahci_handle_port(p);
863 			return;
864 		}
865 		goto next;
866 	}
867 
868 	/*
869 	 * Pull request off free list
870 	 */
871 	aior = STAILQ_FIRST(&p->iofhd);
872 	assert(aior != NULL);
873 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
874 	aior->cfis = cfis;
875 	aior->slot = slot;
876 	aior->len = len;
877 	aior->done = done;
878 	aior->more = (len != done);
879 
880 	breq = &aior->io_req;
881 	breq->br_offset = elba * blockif_sectsz(p->bctx);
882 	breq->br_resid = elen * blockif_sectsz(p->bctx);
883 
884 	/*
885 	 * Mark this command in-flight.
886 	 */
887 	p->pending |= 1 << slot;
888 
889 	/*
890 	 * Stuff request onto busy list
891 	 */
892 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
893 
894 	if (ncq && first)
895 		ahci_write_fis_d2h_ncq(p, slot);
896 
897 	err = blockif_delete(p->bctx, breq);
898 	assert(err == 0);
899 }
900 
901 static inline void
902 write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
903 		void *buf, int size)
904 {
905 	struct ahci_cmd_hdr *hdr;
906 	struct ahci_prdt_entry *prdt;
907 	void *from;
908 	int i, len;
909 
910 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
911 	len = size;
912 	from = buf;
913 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
914 	for (i = 0; i < hdr->prdtl && len; i++) {
915 		uint8_t *ptr;
916 		uint32_t dbcsz;
917 		int sublen;
918 
919 		dbcsz = (prdt->dbc & DBCMASK) + 1;
920 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
921 		sublen = MIN(len, dbcsz);
922 		memcpy(ptr, from, sublen);
923 		len -= sublen;
924 		from += sublen;
925 		prdt++;
926 	}
927 	hdr->prdbc = size - len;
928 }
929 
930 static void
931 ahci_checksum(uint8_t *buf, int size)
932 {
933 	int i;
934 	uint8_t sum = 0;
935 
936 	for (i = 0; i < size - 1; i++)
937 		sum += buf[i];
938 	buf[size - 1] = 0x100 - sum;
939 }
940 
941 static void
942 ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
943 {
944 	struct ahci_cmd_hdr *hdr;
945 	uint32_t buf[128];
946 	uint8_t *buf8 = (uint8_t *)buf;
947 	uint16_t *buf16 = (uint16_t *)buf;
948 
949 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
950 	if (p->atapi || hdr->prdtl == 0 || cfis[5] != 0 ||
951 	    cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
952 		ahci_write_fis_d2h(p, slot, cfis,
953 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
954 		return;
955 	}
956 
957 	memset(buf, 0, sizeof(buf));
958 	if (cfis[4] == 0x00) {	/* Log directory */
959 		buf16[0x00] = 1; /* Version -- 1 */
960 		buf16[0x10] = 1; /* NCQ Command Error Log -- 1 page */
961 		buf16[0x13] = 1; /* SATA NCQ Send and Receive Log -- 1 page */
962 	} else if (cfis[4] == 0x10) {	/* NCQ Command Error Log */
963 		memcpy(buf8, p->err_cfis, sizeof(p->err_cfis));
964 		ahci_checksum(buf8, sizeof(buf));
965 	} else if (cfis[4] == 0x13) {	/* SATA NCQ Send and Receive Log */
966 		if (blockif_candelete(p->bctx) && !blockif_is_ro(p->bctx)) {
967 			buf[0x00] = 1;	/* SFQ DSM supported */
968 			buf[0x01] = 1;	/* SFQ DSM TRIM supported */
969 		}
970 	} else {
971 		ahci_write_fis_d2h(p, slot, cfis,
972 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
973 		return;
974 	}
975 
976 	if (cfis[2] == ATA_READ_LOG_EXT)
977 		ahci_write_fis_piosetup(p);
978 	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
979 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
980 }
981 
982 static void
983 handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
984 {
985 	struct ahci_cmd_hdr *hdr;
986 
987 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
988 	if (p->atapi || hdr->prdtl == 0) {
989 		ahci_write_fis_d2h(p, slot, cfis,
990 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
991 	} else {
992 		ahci_write_fis_piosetup(p);
993 		write_prdt(p, slot, cfis, (void*)&p->ata_ident, sizeof(struct ata_params));
994 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
995 	}
996 }
997 
998 static void
999 ata_identify_init(struct ahci_port* p, int atapi)
1000 {
1001 	struct ata_params* ata_ident = &p->ata_ident;
1002 
1003 	if (atapi) {
1004 		ata_ident->config = ATA_PROTO_ATAPI | ATA_ATAPI_TYPE_CDROM |
1005 		    ATA_ATAPI_REMOVABLE | ATA_DRQ_FAST;
1006 		ata_ident->capabilities1 = ATA_SUPPORT_LBA |
1007 			ATA_SUPPORT_DMA;
1008 		ata_ident->capabilities2 = (1 << 14 | 1);
1009 		ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88;
1010 		ata_ident->obsolete62 = 0x3f;
1011 		ata_ident->mwdmamodes = 7;
1012 		if (p->xfermode & ATA_WDMA0)
1013 			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1014 		ata_ident->apiomodes = 3;
1015 		ata_ident->mwdmamin = 0x0078;
1016 		ata_ident->mwdmarec = 0x0078;
1017 		ata_ident->pioblind = 0x0078;
1018 		ata_ident->pioiordy = 0x0078;
1019 		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
1020 		ata_ident->satacapabilities2 = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
1021 		ata_ident->satasupport = ATA_SUPPORT_NCQ_STREAM;
1022 		ata_ident->version_major = 0x3f0;
1023 		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1024 			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1025 		ata_ident->support.command2 = (1 << 14);
1026 		ata_ident->support.extension = (1 << 14);
1027 		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1028 			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1029 		ata_ident->enabled.extension = (1 << 14);
1030 		ata_ident->udmamodes = 0x7f;
1031 		if (p->xfermode & ATA_UDMA0)
1032 			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1033 		ata_ident->transport_major = 0x1020;
1034 		ata_ident->integrity = 0x00a5;
1035 	} else {
1036 		uint64_t sectors;
1037 		int sectsz, psectsz, psectoff, candelete, ro;
1038 		uint16_t cyl;
1039 		uint8_t sech, heads;
1040 
1041 		ro = blockif_is_ro(p->bctx);
1042 		candelete = blockif_candelete(p->bctx);
1043 		sectsz = blockif_sectsz(p->bctx);
1044 		sectors = blockif_size(p->bctx) / sectsz;
1045 		blockif_chs(p->bctx, &cyl, &heads, &sech);
1046 		blockif_psectsz(p->bctx, &psectsz, &psectoff);
1047 		ata_ident->config = ATA_DRQ_FAST;
1048 		ata_ident->cylinders = cyl;
1049 		ata_ident->heads = heads;
1050 		ata_ident->sectors = sech;
1051 
1052 		ata_ident->sectors_intr = (0x8000 | 128);
1053 		ata_ident->tcg = 0;
1054 
1055 		ata_ident->capabilities1 = ATA_SUPPORT_DMA |
1056 			ATA_SUPPORT_LBA | ATA_SUPPORT_IORDY;
1057 		ata_ident->capabilities2 = (1 << 14);
1058 		ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88;
1059 		if (p->mult_sectors)
1060 			ata_ident->multi = (ATA_MULTI_VALID | p->mult_sectors);
1061 		if (sectors <= 0x0fffffff) {
1062 			ata_ident->lba_size_1 = sectors;
1063 			ata_ident->lba_size_2 = (sectors >> 16);
1064 		} else {
1065 			ata_ident->lba_size_1 = 0xffff;
1066 			ata_ident->lba_size_2 = 0x0fff;
1067 		}
1068 		ata_ident->mwdmamodes = 0x7;
1069 		if (p->xfermode & ATA_WDMA0)
1070 			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1071 		ata_ident->apiomodes = 0x3;
1072 		ata_ident->mwdmamin = 0x0078;
1073 		ata_ident->mwdmarec = 0x0078;
1074 		ata_ident->pioblind = 0x0078;
1075 		ata_ident->pioiordy = 0x0078;
1076 		ata_ident->support3 = 0;
1077 		ata_ident->queue = 31;
1078 		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
1079 			ATA_SUPPORT_NCQ);
1080 		ata_ident->satacapabilities2 = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
1081 			(p->ssts & ATA_SS_SPD_MASK) >> 3);
1082 		ata_ident->version_major = 0x3f0;
1083 		ata_ident->version_minor = 0x28;
1084 		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1085 			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1086 		ata_ident->support.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1087 			ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
1088 		ata_ident->support.extension = (1 << 14);
1089 		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1090 			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1091 		ata_ident->enabled.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1092 			ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
1093 		ata_ident->enabled.extension = (1 << 14);
1094 		ata_ident->udmamodes = 0x7f;
1095 		if (p->xfermode & ATA_UDMA0)
1096 			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1097 		ata_ident->lba_size48_1 = sectors;
1098 		ata_ident->lba_size48_2 = (sectors >> 16);
1099 		ata_ident->lba_size48_3 = (sectors >> 32);
1100 		ata_ident->lba_size48_4 = (sectors >> 48);
1101 
1102 		if (candelete && !ro) {
1103 			ata_ident->support3 |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
1104 			ata_ident->max_dsm_blocks = 1;
1105 			ata_ident->support_dsm = ATA_SUPPORT_DSM_TRIM;
1106 		}
1107 		ata_ident->pss = ATA_PSS_VALID_VALUE;
1108 		ata_ident->lsalign = 0x4000;
1109 		if (psectsz > sectsz) {
1110 			ata_ident->pss |= ATA_PSS_MULTLS;
1111 			ata_ident->pss |= ffsl(psectsz / sectsz) - 1;
1112 			ata_ident->lsalign |= (psectoff / sectsz);
1113 		}
1114 		if (sectsz > 512) {
1115 			ata_ident->pss |= ATA_PSS_LSSABOVE512;
1116 			ata_ident->lss_1 = sectsz / 2;
1117 			ata_ident->lss_2 = ((sectsz / 2) >> 16);
1118 		}
1119 		ata_ident->support2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1120 		ata_ident->enabled2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1121 		ata_ident->transport_major = 0x1020;
1122 		ata_ident->integrity = 0x00a5;
1123 	}
1124 	ahci_checksum((uint8_t*)ata_ident, sizeof(struct ata_params));
1125 }
1126 
1127 static void
1128 handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
1129 {
1130 	if (!p->atapi) {
1131 		ahci_write_fis_d2h(p, slot, cfis,
1132 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1133 	} else {
1134 		ahci_write_fis_piosetup(p);
1135 		write_prdt(p, slot, cfis, (void *)&p->ata_ident, sizeof(struct ata_params));
1136 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1137 	}
1138 }
1139 
1140 static void
1141 atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1142 {
1143 	uint8_t buf[36];
1144 	uint8_t *acmd;
1145 	int len;
1146 	uint32_t tfd;
1147 
1148 	acmd = cfis + 0x40;
1149 
1150 	if (acmd[1] & 1) {		/* VPD */
1151 		if (acmd[2] == 0) {	/* Supported VPD pages */
1152 			buf[0] = 0x05;
1153 			buf[1] = 0;
1154 			buf[2] = 0;
1155 			buf[3] = 1;
1156 			buf[4] = 0;
1157 			len = 4 + buf[3];
1158 		} else {
1159 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1160 			p->asc = 0x24;
1161 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1162 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1163 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1164 			return;
1165 		}
1166 	} else {
1167 		buf[0] = 0x05;
1168 		buf[1] = 0x80;
1169 		buf[2] = 0x00;
1170 		buf[3] = 0x21;
1171 		buf[4] = 31;
1172 		buf[5] = 0;
1173 		buf[6] = 0;
1174 		buf[7] = 0;
1175 		atapi_string(buf + 8, "BHYVE", 8);
1176 		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1177 		atapi_string(buf + 32, "001", 4);
1178 		len = sizeof(buf);
1179 	}
1180 
1181 	if (len > acmd[4])
1182 		len = acmd[4];
1183 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1184 	write_prdt(p, slot, cfis, buf, len);
1185 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1186 }
1187 
1188 static void
1189 atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1190 {
1191 	uint8_t buf[8];
1192 	uint64_t sectors;
1193 
1194 	sectors = blockif_size(p->bctx) / 2048;
1195 	be32enc(buf, sectors - 1);
1196 	be32enc(buf + 4, 2048);
1197 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1198 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1199 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1200 }
1201 
1202 static void
1203 atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1204 {
1205 	uint8_t *acmd;
1206 	uint8_t format;
1207 	int len;
1208 
1209 	acmd = cfis + 0x40;
1210 
1211 	len = be16dec(acmd + 7);
1212 	format = acmd[9] >> 6;
1213 	switch (format) {
1214 	case 0:
1215 	{
1216 		int msf, size;
1217 		uint64_t sectors;
1218 		uint8_t start_track, buf[20], *bp;
1219 
1220 		msf = (acmd[1] >> 1) & 1;
1221 		start_track = acmd[6];
1222 		if (start_track > 1 && start_track != 0xaa) {
1223 			uint32_t tfd;
1224 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1225 			p->asc = 0x24;
1226 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1227 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1228 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1229 			return;
1230 		}
1231 		bp = buf + 2;
1232 		*bp++ = 1;
1233 		*bp++ = 1;
1234 		if (start_track <= 1) {
1235 			*bp++ = 0;
1236 			*bp++ = 0x14;
1237 			*bp++ = 1;
1238 			*bp++ = 0;
1239 			if (msf) {
1240 				*bp++ = 0;
1241 				lba_to_msf(bp, 0);
1242 				bp += 3;
1243 			} else {
1244 				*bp++ = 0;
1245 				*bp++ = 0;
1246 				*bp++ = 0;
1247 				*bp++ = 0;
1248 			}
1249 		}
1250 		*bp++ = 0;
1251 		*bp++ = 0x14;
1252 		*bp++ = 0xaa;
1253 		*bp++ = 0;
1254 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1255 		sectors >>= 2;
1256 		if (msf) {
1257 			*bp++ = 0;
1258 			lba_to_msf(bp, sectors);
1259 			bp += 3;
1260 		} else {
1261 			be32enc(bp, sectors);
1262 			bp += 4;
1263 		}
1264 		size = bp - buf;
1265 		be16enc(buf, size - 2);
1266 		if (len > size)
1267 			len = size;
1268 		write_prdt(p, slot, cfis, buf, len);
1269 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1270 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1271 		break;
1272 	}
1273 	case 1:
1274 	{
1275 		uint8_t buf[12];
1276 
1277 		memset(buf, 0, sizeof(buf));
1278 		buf[1] = 0xa;
1279 		buf[2] = 0x1;
1280 		buf[3] = 0x1;
1281 		if (len > sizeof(buf))
1282 			len = sizeof(buf);
1283 		write_prdt(p, slot, cfis, buf, len);
1284 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1285 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1286 		break;
1287 	}
1288 	case 2:
1289 	{
1290 		int msf, size;
1291 		uint64_t sectors;
1292 		uint8_t *bp, buf[50];
1293 
1294 		msf = (acmd[1] >> 1) & 1;
1295 		bp = buf + 2;
1296 		*bp++ = 1;
1297 		*bp++ = 1;
1298 
1299 		*bp++ = 1;
1300 		*bp++ = 0x14;
1301 		*bp++ = 0;
1302 		*bp++ = 0xa0;
1303 		*bp++ = 0;
1304 		*bp++ = 0;
1305 		*bp++ = 0;
1306 		*bp++ = 0;
1307 		*bp++ = 1;
1308 		*bp++ = 0;
1309 		*bp++ = 0;
1310 
1311 		*bp++ = 1;
1312 		*bp++ = 0x14;
1313 		*bp++ = 0;
1314 		*bp++ = 0xa1;
1315 		*bp++ = 0;
1316 		*bp++ = 0;
1317 		*bp++ = 0;
1318 		*bp++ = 0;
1319 		*bp++ = 1;
1320 		*bp++ = 0;
1321 		*bp++ = 0;
1322 
1323 		*bp++ = 1;
1324 		*bp++ = 0x14;
1325 		*bp++ = 0;
1326 		*bp++ = 0xa2;
1327 		*bp++ = 0;
1328 		*bp++ = 0;
1329 		*bp++ = 0;
1330 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1331 		sectors >>= 2;
1332 		if (msf) {
1333 			*bp++ = 0;
1334 			lba_to_msf(bp, sectors);
1335 			bp += 3;
1336 		} else {
1337 			be32enc(bp, sectors);
1338 			bp += 4;
1339 		}
1340 
1341 		*bp++ = 1;
1342 		*bp++ = 0x14;
1343 		*bp++ = 0;
1344 		*bp++ = 1;
1345 		*bp++ = 0;
1346 		*bp++ = 0;
1347 		*bp++ = 0;
1348 		if (msf) {
1349 			*bp++ = 0;
1350 			lba_to_msf(bp, 0);
1351 			bp += 3;
1352 		} else {
1353 			*bp++ = 0;
1354 			*bp++ = 0;
1355 			*bp++ = 0;
1356 			*bp++ = 0;
1357 		}
1358 
1359 		size = bp - buf;
1360 		be16enc(buf, size - 2);
1361 		if (len > size)
1362 			len = size;
1363 		write_prdt(p, slot, cfis, buf, len);
1364 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1365 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1366 		break;
1367 	}
1368 	default:
1369 	{
1370 		uint32_t tfd;
1371 
1372 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1373 		p->asc = 0x24;
1374 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1375 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1376 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1377 		break;
1378 	}
1379 	}
1380 }
1381 
1382 static void
1383 atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1384 {
1385 	uint8_t buf[16];
1386 
1387 	memset(buf, 0, sizeof(buf));
1388 	buf[3] = 8;
1389 
1390 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1391 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1392 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1393 }
1394 
1395 static void
1396 atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1397 {
1398 	struct ahci_ioreq *aior;
1399 	struct ahci_cmd_hdr *hdr;
1400 	struct ahci_prdt_entry *prdt;
1401 	struct blockif_req *breq;
1402 	uint8_t *acmd;
1403 	uint64_t lba;
1404 	uint32_t len;
1405 	int err;
1406 
1407 	acmd = cfis + 0x40;
1408 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1409 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1410 
1411 	lba = be32dec(acmd + 2);
1412 	if (acmd[0] == READ_10)
1413 		len = be16dec(acmd + 7);
1414 	else
1415 		len = be32dec(acmd + 6);
1416 	if (len == 0) {
1417 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1418 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1419 	}
1420 	lba *= 2048;
1421 	len *= 2048;
1422 
1423 	/*
1424 	 * Pull request off free list
1425 	 */
1426 	aior = STAILQ_FIRST(&p->iofhd);
1427 	assert(aior != NULL);
1428 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1429 	aior->cfis = cfis;
1430 	aior->slot = slot;
1431 	aior->len = len;
1432 	aior->done = done;
1433 	aior->readop = 1;
1434 	breq = &aior->io_req;
1435 	breq->br_offset = lba + done;
1436 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1437 
1438 	/* Mark this command in-flight. */
1439 	p->pending |= 1 << slot;
1440 
1441 	/* Stuff request onto busy list. */
1442 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1443 
1444 	err = blockif_read(p->bctx, breq);
1445 	assert(err == 0);
1446 }
1447 
1448 static void
1449 atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1450 {
1451 	uint8_t buf[64];
1452 	uint8_t *acmd;
1453 	int len;
1454 
1455 	acmd = cfis + 0x40;
1456 	len = acmd[4];
1457 	if (len > sizeof(buf))
1458 		len = sizeof(buf);
1459 	memset(buf, 0, len);
1460 	buf[0] = 0x70 | (1 << 7);
1461 	buf[2] = p->sense_key;
1462 	buf[7] = 10;
1463 	buf[12] = p->asc;
1464 	write_prdt(p, slot, cfis, buf, len);
1465 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1466 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1467 }
1468 
1469 static void
1470 atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1471 {
1472 	uint8_t *acmd = cfis + 0x40;
1473 	uint32_t tfd;
1474 
1475 	switch (acmd[4] & 3) {
1476 	case 0:
1477 	case 1:
1478 	case 3:
1479 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1480 		tfd = ATA_S_READY | ATA_S_DSC;
1481 		break;
1482 	case 2:
1483 		/* TODO eject media */
1484 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1485 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1486 		p->asc = 0x53;
1487 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1488 		break;
1489 	}
1490 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1491 }
1492 
1493 static void
1494 atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1495 {
1496 	uint8_t *acmd;
1497 	uint32_t tfd;
1498 	uint8_t pc, code;
1499 	int len;
1500 
1501 	acmd = cfis + 0x40;
1502 	len = be16dec(acmd + 7);
1503 	pc = acmd[2] >> 6;
1504 	code = acmd[2] & 0x3f;
1505 
1506 	switch (pc) {
1507 	case 0:
1508 		switch (code) {
1509 		case MODEPAGE_RW_ERROR_RECOVERY:
1510 		{
1511 			uint8_t buf[16];
1512 
1513 			if (len > sizeof(buf))
1514 				len = sizeof(buf);
1515 
1516 			memset(buf, 0, sizeof(buf));
1517 			be16enc(buf, 16 - 2);
1518 			buf[2] = 0x70;
1519 			buf[8] = 0x01;
1520 			buf[9] = 16 - 10;
1521 			buf[11] = 0x05;
1522 			write_prdt(p, slot, cfis, buf, len);
1523 			tfd = ATA_S_READY | ATA_S_DSC;
1524 			break;
1525 		}
1526 		case MODEPAGE_CD_CAPABILITIES:
1527 		{
1528 			uint8_t buf[30];
1529 
1530 			if (len > sizeof(buf))
1531 				len = sizeof(buf);
1532 
1533 			memset(buf, 0, sizeof(buf));
1534 			be16enc(buf, 30 - 2);
1535 			buf[2] = 0x70;
1536 			buf[8] = 0x2A;
1537 			buf[9] = 30 - 10;
1538 			buf[10] = 0x08;
1539 			buf[12] = 0x71;
1540 			be16enc(&buf[18], 2);
1541 			be16enc(&buf[20], 512);
1542 			write_prdt(p, slot, cfis, buf, len);
1543 			tfd = ATA_S_READY | ATA_S_DSC;
1544 			break;
1545 		}
1546 		default:
1547 			goto error;
1548 			break;
1549 		}
1550 		break;
1551 	case 3:
1552 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1553 		p->asc = 0x39;
1554 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1555 		break;
1556 error:
1557 	case 1:
1558 	case 2:
1559 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1560 		p->asc = 0x24;
1561 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1562 		break;
1563 	}
1564 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1565 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1566 }
1567 
1568 static void
1569 atapi_get_event_status_notification(struct ahci_port *p, int slot,
1570     uint8_t *cfis)
1571 {
1572 	uint8_t *acmd;
1573 	uint32_t tfd;
1574 
1575 	acmd = cfis + 0x40;
1576 
1577 	/* we don't support asynchronous operation */
1578 	if (!(acmd[1] & 1)) {
1579 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1580 		p->asc = 0x24;
1581 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1582 	} else {
1583 		uint8_t buf[8];
1584 		int len;
1585 
1586 		len = be16dec(acmd + 7);
1587 		if (len > sizeof(buf))
1588 			len = sizeof(buf);
1589 
1590 		memset(buf, 0, sizeof(buf));
1591 		be16enc(buf, 8 - 2);
1592 		buf[2] = 0x04;
1593 		buf[3] = 0x10;
1594 		buf[5] = 0x02;
1595 		write_prdt(p, slot, cfis, buf, len);
1596 		tfd = ATA_S_READY | ATA_S_DSC;
1597 	}
1598 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1599 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1600 }
1601 
1602 static void
1603 handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1604 {
1605 	uint8_t *acmd;
1606 
1607 	acmd = cfis + 0x40;
1608 
1609 #ifdef AHCI_DEBUG
1610 	{
1611 		int i;
1612 		DPRINTF("ACMD:");
1613 		for (i = 0; i < 16; i++)
1614 			DPRINTF("%02x ", acmd[i]);
1615 		DPRINTF("");
1616 	}
1617 #endif
1618 
1619 	switch (acmd[0]) {
1620 	case TEST_UNIT_READY:
1621 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1622 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1623 		break;
1624 	case INQUIRY:
1625 		atapi_inquiry(p, slot, cfis);
1626 		break;
1627 	case READ_CAPACITY:
1628 		atapi_read_capacity(p, slot, cfis);
1629 		break;
1630 	case PREVENT_ALLOW:
1631 		/* TODO */
1632 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1633 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1634 		break;
1635 	case READ_TOC:
1636 		atapi_read_toc(p, slot, cfis);
1637 		break;
1638 	case REPORT_LUNS:
1639 		atapi_report_luns(p, slot, cfis);
1640 		break;
1641 	case READ_10:
1642 	case READ_12:
1643 		atapi_read(p, slot, cfis, 0);
1644 		break;
1645 	case REQUEST_SENSE:
1646 		atapi_request_sense(p, slot, cfis);
1647 		break;
1648 	case START_STOP_UNIT:
1649 		atapi_start_stop_unit(p, slot, cfis);
1650 		break;
1651 	case MODE_SENSE_10:
1652 		atapi_mode_sense(p, slot, cfis);
1653 		break;
1654 	case GET_EVENT_STATUS_NOTIFICATION:
1655 		atapi_get_event_status_notification(p, slot, cfis);
1656 		break;
1657 	default:
1658 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1659 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1660 		p->asc = 0x20;
1661 		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1662 				ATA_S_READY | ATA_S_ERROR);
1663 		break;
1664 	}
1665 }
1666 
1667 static void
1668 ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1669 {
1670 
1671 	p->tfd |= ATA_S_BUSY;
1672 	switch (cfis[2]) {
1673 	case ATA_ATA_IDENTIFY:
1674 		handle_identify(p, slot, cfis);
1675 		break;
1676 	case ATA_SETFEATURES:
1677 	{
1678 		switch (cfis[3]) {
1679 		case ATA_SF_ENAB_SATA_SF:
1680 			switch (cfis[12]) {
1681 			case ATA_SATA_SF_AN:
1682 				p->tfd = ATA_S_DSC | ATA_S_READY;
1683 				break;
1684 			default:
1685 				p->tfd = ATA_S_ERROR | ATA_S_READY;
1686 				p->tfd |= (ATA_ERROR_ABORT << 8);
1687 				break;
1688 			}
1689 			break;
1690 		case ATA_SF_ENAB_WCACHE:
1691 		case ATA_SF_DIS_WCACHE:
1692 		case ATA_SF_ENAB_RCACHE:
1693 		case ATA_SF_DIS_RCACHE:
1694 			p->tfd = ATA_S_DSC | ATA_S_READY;
1695 			break;
1696 		case ATA_SF_SETXFER:
1697 		{
1698 			switch (cfis[12] & 0xf8) {
1699 			case ATA_PIO:
1700 			case ATA_PIO0:
1701 				break;
1702 			case ATA_WDMA0:
1703 			case ATA_UDMA0:
1704 				p->xfermode = (cfis[12] & 0x7);
1705 				break;
1706 			}
1707 			p->tfd = ATA_S_DSC | ATA_S_READY;
1708 			break;
1709 		}
1710 		default:
1711 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1712 			p->tfd |= (ATA_ERROR_ABORT << 8);
1713 			break;
1714 		}
1715 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1716 		break;
1717 	}
1718 	case ATA_SET_MULTI:
1719 		if (cfis[12] != 0 &&
1720 			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1721 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1722 			p->tfd |= (ATA_ERROR_ABORT << 8);
1723 		} else {
1724 			p->mult_sectors = cfis[12];
1725 			p->tfd = ATA_S_DSC | ATA_S_READY;
1726 		}
1727 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1728 		break;
1729 	case ATA_READ:
1730 	case ATA_WRITE:
1731 	case ATA_READ48:
1732 	case ATA_WRITE48:
1733 	case ATA_READ_MUL:
1734 	case ATA_WRITE_MUL:
1735 	case ATA_READ_MUL48:
1736 	case ATA_WRITE_MUL48:
1737 	case ATA_READ_DMA:
1738 	case ATA_WRITE_DMA:
1739 	case ATA_READ_DMA48:
1740 	case ATA_WRITE_DMA48:
1741 	case ATA_READ_FPDMA_QUEUED:
1742 	case ATA_WRITE_FPDMA_QUEUED:
1743 		ahci_handle_rw(p, slot, cfis, 0);
1744 		break;
1745 	case ATA_FLUSHCACHE:
1746 	case ATA_FLUSHCACHE48:
1747 		ahci_handle_flush(p, slot, cfis);
1748 		break;
1749 	case ATA_DATA_SET_MANAGEMENT:
1750 		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1751 		    cfis[13] == 0 && cfis[12] == 1) {
1752 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1753 			break;
1754 		}
1755 		ahci_write_fis_d2h(p, slot, cfis,
1756 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1757 		break;
1758 	case ATA_SEND_FPDMA_QUEUED:
1759 		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1760 		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1761 		    cfis[11] == 0 && cfis[3] == 1) {
1762 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1763 			break;
1764 		}
1765 		ahci_write_fis_d2h(p, slot, cfis,
1766 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1767 		break;
1768 	case ATA_READ_LOG_EXT:
1769 	case ATA_READ_LOG_DMA_EXT:
1770 		ahci_handle_read_log(p, slot, cfis);
1771 		break;
1772 	case ATA_SECURITY_FREEZE_LOCK:
1773 	case ATA_SMART_CMD:
1774 	case ATA_NOP:
1775 		ahci_write_fis_d2h(p, slot, cfis,
1776 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1777 		break;
1778 	case ATA_CHECK_POWER_MODE:
1779 		cfis[12] = 0xff;	/* always on */
1780 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1781 		break;
1782 	case ATA_STANDBY_CMD:
1783 	case ATA_STANDBY_IMMEDIATE:
1784 	case ATA_IDLE_CMD:
1785 	case ATA_IDLE_IMMEDIATE:
1786 	case ATA_SLEEP:
1787 	case ATA_READ_VERIFY:
1788 	case ATA_READ_VERIFY48:
1789 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1790 		break;
1791 	case ATA_ATAPI_IDENTIFY:
1792 		handle_atapi_identify(p, slot, cfis);
1793 		break;
1794 	case ATA_PACKET_CMD:
1795 		if (!p->atapi) {
1796 			ahci_write_fis_d2h(p, slot, cfis,
1797 			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1798 		} else
1799 			handle_packet_cmd(p, slot, cfis);
1800 		break;
1801 	default:
1802 		WPRINTF("Unsupported cmd:%02x", cfis[2]);
1803 		ahci_write_fis_d2h(p, slot, cfis,
1804 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1805 		break;
1806 	}
1807 }
1808 
1809 static void
1810 ahci_handle_slot(struct ahci_port *p, int slot)
1811 {
1812 	struct ahci_cmd_hdr *hdr;
1813 #ifdef AHCI_DEBUG
1814 	struct ahci_prdt_entry *prdt;
1815 #endif
1816 	struct pci_ahci_softc *sc;
1817 	uint8_t *cfis;
1818 #ifdef AHCI_DEBUG
1819 	int cfl, i;
1820 #endif
1821 
1822 	sc = p->pr_sc;
1823 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1824 #ifdef AHCI_DEBUG
1825 	cfl = (hdr->flags & 0x1f) * 4;
1826 #endif
1827 	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1828 			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1829 #ifdef AHCI_DEBUG
1830 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1831 
1832 	DPRINTF("cfis:");
1833 	for (i = 0; i < cfl; i++) {
1834 		if (i % 10 == 0)
1835 			DPRINTF("");
1836 		DPRINTF("%02x ", cfis[i]);
1837 	}
1838 	DPRINTF("");
1839 
1840 	for (i = 0; i < hdr->prdtl; i++) {
1841 		DPRINTF("%d@%08"PRIx64"", prdt->dbc & 0x3fffff, prdt->dba);
1842 		prdt++;
1843 	}
1844 #endif
1845 
1846 	if (cfis[0] != FIS_TYPE_REGH2D) {
1847 		WPRINTF("Not a H2D FIS:%02x", cfis[0]);
1848 		return;
1849 	}
1850 
1851 	if (cfis[1] & 0x80) {
1852 		ahci_handle_cmd(p, slot, cfis);
1853 	} else {
1854 		if (cfis[15] & (1 << 2))
1855 			p->reset = 1;
1856 		else if (p->reset) {
1857 			p->reset = 0;
1858 			ahci_port_reset(p);
1859 		}
1860 		p->ci &= ~(1 << slot);
1861 	}
1862 }
1863 
1864 static void
1865 ahci_handle_port(struct ahci_port *p)
1866 {
1867 
1868 	if (!(p->cmd & AHCI_P_CMD_ST))
1869 		return;
1870 
1871 	/*
1872 	 * Search for any new commands to issue ignoring those that
1873 	 * are already in-flight.  Stop if device is busy or in error.
1874 	 */
1875 	for (; (p->ci & ~p->pending) != 0; p->ccs = ((p->ccs + 1) & 31)) {
1876 		if ((p->tfd & (ATA_S_BUSY | ATA_S_DRQ)) != 0)
1877 			break;
1878 		if (p->waitforclear)
1879 			break;
1880 		if ((p->ci & ~p->pending & (1 << p->ccs)) != 0) {
1881 			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1882 			p->cmd |= p->ccs << AHCI_P_CMD_CCS_SHIFT;
1883 			ahci_handle_slot(p, p->ccs);
1884 		}
1885 	}
1886 }
1887 
1888 /*
1889  * blockif callback routine - this runs in the context of the blockif
1890  * i/o thread, so the mutex needs to be acquired.
1891  */
1892 static void
1893 ata_ioreq_cb(struct blockif_req *br, int err)
1894 {
1895 	struct ahci_cmd_hdr *hdr;
1896 	struct ahci_ioreq *aior;
1897 	struct ahci_port *p;
1898 	struct pci_ahci_softc *sc;
1899 	uint32_t tfd;
1900 	uint8_t *cfis;
1901 	int slot, ncq, dsm;
1902 
1903 	DPRINTF("%s %d", __func__, err);
1904 
1905 	ncq = dsm = 0;
1906 	aior = br->br_param;
1907 	p = aior->io_pr;
1908 	cfis = aior->cfis;
1909 	slot = aior->slot;
1910 	sc = p->pr_sc;
1911 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1912 
1913 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1914 	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1915 	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1916 		ncq = 1;
1917 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1918 	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1919 	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1920 		dsm = 1;
1921 
1922 	pthread_mutex_lock(&sc->mtx);
1923 
1924 	/*
1925 	 * Delete the blockif request from the busy list
1926 	 */
1927 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1928 
1929 	/*
1930 	 * Move the blockif request back to the free list
1931 	 */
1932 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1933 
1934 	if (!err)
1935 		hdr->prdbc = aior->done;
1936 
1937 	if (!err && aior->more) {
1938 		if (dsm)
1939 			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1940 		else
1941 			ahci_handle_rw(p, slot, cfis, aior->done);
1942 		goto out;
1943 	}
1944 
1945 	if (!err)
1946 		tfd = ATA_S_READY | ATA_S_DSC;
1947 	else
1948 		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1949 	if (ncq)
1950 		ahci_write_fis_sdb(p, slot, cfis, tfd);
1951 	else
1952 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1953 
1954 	/*
1955 	 * This command is now complete.
1956 	 */
1957 	p->pending &= ~(1 << slot);
1958 
1959 	ahci_check_stopped(p);
1960 	ahci_handle_port(p);
1961 out:
1962 	pthread_mutex_unlock(&sc->mtx);
1963 	DPRINTF("%s exit", __func__);
1964 }
1965 
1966 static void
1967 atapi_ioreq_cb(struct blockif_req *br, int err)
1968 {
1969 	struct ahci_cmd_hdr *hdr;
1970 	struct ahci_ioreq *aior;
1971 	struct ahci_port *p;
1972 	struct pci_ahci_softc *sc;
1973 	uint8_t *cfis;
1974 	uint32_t tfd;
1975 	int slot;
1976 
1977 	DPRINTF("%s %d", __func__, err);
1978 
1979 	aior = br->br_param;
1980 	p = aior->io_pr;
1981 	cfis = aior->cfis;
1982 	slot = aior->slot;
1983 	sc = p->pr_sc;
1984 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1985 
1986 	pthread_mutex_lock(&sc->mtx);
1987 
1988 	/*
1989 	 * Delete the blockif request from the busy list
1990 	 */
1991 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1992 
1993 	/*
1994 	 * Move the blockif request back to the free list
1995 	 */
1996 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1997 
1998 	if (!err)
1999 		hdr->prdbc = aior->done;
2000 
2001 	if (!err && aior->more) {
2002 		atapi_read(p, slot, cfis, aior->done);
2003 		goto out;
2004 	}
2005 
2006 	if (!err) {
2007 		tfd = ATA_S_READY | ATA_S_DSC;
2008 	} else {
2009 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
2010 		p->asc = 0x21;
2011 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
2012 	}
2013 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
2014 	ahci_write_fis_d2h(p, slot, cfis, tfd);
2015 
2016 	/*
2017 	 * This command is now complete.
2018 	 */
2019 	p->pending &= ~(1 << slot);
2020 
2021 	ahci_check_stopped(p);
2022 	ahci_handle_port(p);
2023 out:
2024 	pthread_mutex_unlock(&sc->mtx);
2025 	DPRINTF("%s exit", __func__);
2026 }
2027 
2028 static void
2029 pci_ahci_ioreq_init(struct ahci_port *pr)
2030 {
2031 	struct ahci_ioreq *vr;
2032 	int i;
2033 
2034 	pr->ioqsz = blockif_queuesz(pr->bctx);
2035 	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
2036 	STAILQ_INIT(&pr->iofhd);
2037 
2038 	/*
2039 	 * Add all i/o request entries to the free queue
2040 	 */
2041 	for (i = 0; i < pr->ioqsz; i++) {
2042 		vr = &pr->ioreq[i];
2043 		vr->io_pr = pr;
2044 		if (!pr->atapi)
2045 			vr->io_req.br_callback = ata_ioreq_cb;
2046 		else
2047 			vr->io_req.br_callback = atapi_ioreq_cb;
2048 		vr->io_req.br_param = vr;
2049 		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
2050 	}
2051 
2052 	TAILQ_INIT(&pr->iobhd);
2053 }
2054 
2055 static void
2056 pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2057 {
2058 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2059 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2060 	struct ahci_port *p = &sc->port[port];
2061 
2062 	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"",
2063 		port, offset, value);
2064 
2065 	switch (offset) {
2066 	case AHCI_P_CLB:
2067 		p->clb = value;
2068 		break;
2069 	case AHCI_P_CLBU:
2070 		p->clbu = value;
2071 		break;
2072 	case AHCI_P_FB:
2073 		p->fb = value;
2074 		break;
2075 	case AHCI_P_FBU:
2076 		p->fbu = value;
2077 		break;
2078 	case AHCI_P_IS:
2079 		p->is &= ~value;
2080 		ahci_port_intr(p);
2081 		break;
2082 	case AHCI_P_IE:
2083 		p->ie = value & 0xFDC000FF;
2084 		ahci_port_intr(p);
2085 		break;
2086 	case AHCI_P_CMD:
2087 	{
2088 		p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2089 		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2090 		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2091 		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK);
2092 		p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2093 		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2094 		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2095 		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value;
2096 
2097 		if (!(value & AHCI_P_CMD_ST)) {
2098 			ahci_port_stop(p);
2099 		} else {
2100 			uint64_t clb;
2101 
2102 			p->cmd |= AHCI_P_CMD_CR;
2103 			clb = (uint64_t)p->clbu << 32 | p->clb;
2104 			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
2105 					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
2106 		}
2107 
2108 		if (value & AHCI_P_CMD_FRE) {
2109 			uint64_t fb;
2110 
2111 			p->cmd |= AHCI_P_CMD_FR;
2112 			fb = (uint64_t)p->fbu << 32 | p->fb;
2113 			/* we don't support FBSCP, so rfis size is 256Bytes */
2114 			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
2115 		} else {
2116 			p->cmd &= ~AHCI_P_CMD_FR;
2117 		}
2118 
2119 		if (value & AHCI_P_CMD_CLO) {
2120 			p->tfd &= ~(ATA_S_BUSY | ATA_S_DRQ);
2121 			p->cmd &= ~AHCI_P_CMD_CLO;
2122 		}
2123 
2124 		if (value & AHCI_P_CMD_ICC_MASK) {
2125 			p->cmd &= ~AHCI_P_CMD_ICC_MASK;
2126 		}
2127 
2128 		ahci_handle_port(p);
2129 		break;
2130 	}
2131 	case AHCI_P_TFD:
2132 	case AHCI_P_SIG:
2133 	case AHCI_P_SSTS:
2134 		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"", offset);
2135 		break;
2136 	case AHCI_P_SCTL:
2137 		p->sctl = value;
2138 		if (!(p->cmd & AHCI_P_CMD_ST)) {
2139 			if (value & ATA_SC_DET_RESET)
2140 				ahci_port_reset(p);
2141 		}
2142 		break;
2143 	case AHCI_P_SERR:
2144 		p->serr &= ~value;
2145 		break;
2146 	case AHCI_P_SACT:
2147 		p->sact |= value;
2148 		break;
2149 	case AHCI_P_CI:
2150 		p->ci |= value;
2151 		ahci_handle_port(p);
2152 		break;
2153 	case AHCI_P_SNTF:
2154 	case AHCI_P_FBS:
2155 	default:
2156 		break;
2157 	}
2158 }
2159 
2160 static void
2161 pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2162 {
2163 	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"",
2164 		offset, value);
2165 
2166 	switch (offset) {
2167 	case AHCI_CAP:
2168 	case AHCI_PI:
2169 	case AHCI_VS:
2170 	case AHCI_CAP2:
2171 		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"", offset);
2172 		break;
2173 	case AHCI_GHC:
2174 		if (value & AHCI_GHC_HR) {
2175 			ahci_reset(sc);
2176 			break;
2177 		}
2178 		if (value & AHCI_GHC_IE)
2179 			sc->ghc |= AHCI_GHC_IE;
2180 		else
2181 			sc->ghc &= ~AHCI_GHC_IE;
2182 		ahci_generate_intr(sc, 0xffffffff);
2183 		break;
2184 	case AHCI_IS:
2185 		sc->is &= ~value;
2186 		ahci_generate_intr(sc, value);
2187 		break;
2188 	default:
2189 		break;
2190 	}
2191 }
2192 
2193 static void
2194 pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
2195 		int baridx, uint64_t offset, int size, uint64_t value)
2196 {
2197 	struct pci_ahci_softc *sc = pi->pi_arg;
2198 
2199 	assert(baridx == 5);
2200 	assert((offset % 4) == 0 && size == 4);
2201 
2202 	pthread_mutex_lock(&sc->mtx);
2203 
2204 	if (offset < AHCI_OFFSET)
2205 		pci_ahci_host_write(sc, offset, value);
2206 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2207 		pci_ahci_port_write(sc, offset, value);
2208 	else
2209 		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"", offset);
2210 
2211 	pthread_mutex_unlock(&sc->mtx);
2212 }
2213 
2214 static uint64_t
2215 pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2216 {
2217 	uint32_t value;
2218 
2219 	switch (offset) {
2220 	case AHCI_CAP:
2221 	case AHCI_GHC:
2222 	case AHCI_IS:
2223 	case AHCI_PI:
2224 	case AHCI_VS:
2225 	case AHCI_CCCC:
2226 	case AHCI_CCCP:
2227 	case AHCI_EM_LOC:
2228 	case AHCI_EM_CTL:
2229 	case AHCI_CAP2:
2230 	{
2231 		uint32_t *p = &sc->cap;
2232 		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2233 		value = *p;
2234 		break;
2235 	}
2236 	default:
2237 		value = 0;
2238 		break;
2239 	}
2240 	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x",
2241 		offset, value);
2242 
2243 	return (value);
2244 }
2245 
2246 static uint64_t
2247 pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2248 {
2249 	uint32_t value;
2250 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2251 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2252 
2253 	switch (offset) {
2254 	case AHCI_P_CLB:
2255 	case AHCI_P_CLBU:
2256 	case AHCI_P_FB:
2257 	case AHCI_P_FBU:
2258 	case AHCI_P_IS:
2259 	case AHCI_P_IE:
2260 	case AHCI_P_CMD:
2261 	case AHCI_P_TFD:
2262 	case AHCI_P_SIG:
2263 	case AHCI_P_SSTS:
2264 	case AHCI_P_SCTL:
2265 	case AHCI_P_SERR:
2266 	case AHCI_P_SACT:
2267 	case AHCI_P_CI:
2268 	case AHCI_P_SNTF:
2269 	case AHCI_P_FBS:
2270 	{
2271 		uint32_t *p= &sc->port[port].clb;
2272 		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2273 		value = *p;
2274 		break;
2275 	}
2276 	default:
2277 		value = 0;
2278 		break;
2279 	}
2280 
2281 	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x",
2282 		port, offset, value);
2283 
2284 	return value;
2285 }
2286 
2287 static uint64_t
2288 pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
2289     uint64_t regoff, int size)
2290 {
2291 	struct pci_ahci_softc *sc = pi->pi_arg;
2292 	uint64_t offset;
2293 	uint32_t value;
2294 
2295 	assert(baridx == 5);
2296 	assert(size == 1 || size == 2 || size == 4);
2297 	assert((regoff & (size - 1)) == 0);
2298 
2299 	pthread_mutex_lock(&sc->mtx);
2300 
2301 	offset = regoff & ~0x3;	    /* round down to a multiple of 4 bytes */
2302 	if (offset < AHCI_OFFSET)
2303 		value = pci_ahci_host_read(sc, offset);
2304 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2305 		value = pci_ahci_port_read(sc, offset);
2306 	else {
2307 		value = 0;
2308 		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"",
2309 		    regoff);
2310 	}
2311 	value >>= 8 * (regoff & 0x3);
2312 
2313 	pthread_mutex_unlock(&sc->mtx);
2314 
2315 	return (value);
2316 }
2317 
2318 /*
2319  * Each AHCI controller has a "port" node which contains nodes for
2320  * each port named after the decimal number of the port (no leading
2321  * zeroes).  Port nodes contain a "type" ("hd" or "cd"), as well as
2322  * options for blockif.  For example:
2323  *
2324  * pci.0.1.0
2325  *          .device="ahci"
2326  *          .port
2327  *               .0
2328  *                 .type="hd"
2329  *                 .path="/path/to/image"
2330  */
2331 static int
2332 pci_ahci_legacy_config_port(nvlist_t *nvl, int port, const char *type,
2333     const char *opts)
2334 {
2335 	char node_name[sizeof("XX")];
2336 	nvlist_t *port_nvl;
2337 
2338 	snprintf(node_name, sizeof(node_name), "%d", port);
2339 	port_nvl = create_relative_config_node(nvl, node_name);
2340 	set_config_value_node(port_nvl, "type", type);
2341 	return (blockif_legacy_config(port_nvl, opts));
2342 }
2343 
2344 static int
2345 pci_ahci_legacy_config(nvlist_t *nvl, const char *opts)
2346 {
2347 	nvlist_t *ports_nvl;
2348 	const char *type;
2349 	char *next, *next2, *str, *tofree;
2350 	int p, ret;
2351 
2352 	if (opts == NULL)
2353 		return (0);
2354 
2355 	ports_nvl = create_relative_config_node(nvl, "port");
2356 	ret = 1;
2357 	tofree = str = strdup(opts);
2358 	for (p = 0; p < MAX_PORTS && str != NULL; p++, str = next) {
2359 		/* Identify and cut off type of present port. */
2360 		if (strncmp(str, "hd:", 3) == 0) {
2361 			type = "hd";
2362 			str += 3;
2363 		} else if (strncmp(str, "cd:", 3) == 0) {
2364 			type = "cd";
2365 			str += 3;
2366 		} else
2367 			type = NULL;
2368 
2369 		/* Find and cut off the next port options. */
2370 		next = strstr(str, ",hd:");
2371 		next2 = strstr(str, ",cd:");
2372 		if (next == NULL || (next2 != NULL && next2 < next))
2373 			next = next2;
2374 		if (next != NULL) {
2375 			next[0] = 0;
2376 			next++;
2377 		}
2378 
2379 		if (str[0] == 0)
2380 			continue;
2381 
2382 		if (type == NULL) {
2383 			EPRINTLN("Missing or invalid type for port %d: \"%s\"",
2384 			    p, str);
2385 			goto out;
2386 		}
2387 
2388 		if (pci_ahci_legacy_config_port(ports_nvl, p, type, str) != 0)
2389 			goto out;
2390 	}
2391 	ret = 0;
2392 out:
2393 	free(tofree);
2394 	return (ret);
2395 }
2396 
2397 static int
2398 pci_ahci_cd_legacy_config(nvlist_t *nvl, const char *opts)
2399 {
2400 	nvlist_t *ports_nvl;
2401 
2402 	ports_nvl = create_relative_config_node(nvl, "port");
2403 	return (pci_ahci_legacy_config_port(ports_nvl, 0, "cd", opts));
2404 }
2405 
2406 static int
2407 pci_ahci_hd_legacy_config(nvlist_t *nvl, const char *opts)
2408 {
2409 	nvlist_t *ports_nvl;
2410 
2411 	ports_nvl = create_relative_config_node(nvl, "port");
2412 	return (pci_ahci_legacy_config_port(ports_nvl, 0, "hd", opts));
2413 }
2414 
2415 static int
2416 pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl)
2417 {
2418 	char bident[sizeof("XX:XX:XX")];
2419 	char node_name[sizeof("XX")];
2420 	struct blockif_ctxt *bctxt;
2421 	struct pci_ahci_softc *sc;
2422 	int atapi, ret, slots, p;
2423 	MD5_CTX mdctx;
2424 	u_char digest[16];
2425 	const char *path, *type, *value;
2426 	nvlist_t *ports_nvl, *port_nvl;
2427 
2428 	ret = 0;
2429 
2430 #ifdef AHCI_DEBUG
2431 	dbg = fopen("/tmp/log", "w+");
2432 #endif
2433 
2434 	sc = calloc(1, sizeof(struct pci_ahci_softc));
2435 	pi->pi_arg = sc;
2436 	sc->asc_pi = pi;
2437 	pthread_mutex_init(&sc->mtx, NULL);
2438 	sc->ports = 0;
2439 	sc->pi = 0;
2440 	slots = 32;
2441 
2442 	ports_nvl = find_relative_config_node(nvl, "port");
2443 	for (p = 0; ports_nvl != NULL && p < MAX_PORTS; p++) {
2444 		struct ata_params *ata_ident = &sc->port[p].ata_ident;
2445 		char ident[AHCI_PORT_IDENT];
2446 
2447 		snprintf(node_name, sizeof(node_name), "%d", p);
2448 		port_nvl = find_relative_config_node(ports_nvl, node_name);
2449 		if (port_nvl == NULL)
2450 			continue;
2451 
2452 		type = get_config_value_node(port_nvl, "type");
2453 		if (type == NULL)
2454 			continue;
2455 
2456 		if (strcmp(type, "hd") == 0)
2457 			atapi = 0;
2458 		else
2459 			atapi = 1;
2460 
2461 		/*
2462 		 * Attempt to open the backing image. Use the PCI slot/func
2463 		 * and the port number for the identifier string.
2464 		 */
2465 		snprintf(bident, sizeof(bident), "%d:%d:%d", pi->pi_slot,
2466 		    pi->pi_func, p);
2467 
2468 		bctxt = blockif_open(port_nvl, bident);
2469 		if (bctxt == NULL) {
2470 			sc->ports = p;
2471 			ret = 1;
2472 			goto open_fail;
2473 		}
2474 		sc->port[p].bctx = bctxt;
2475 		sc->port[p].pr_sc = sc;
2476 		sc->port[p].port = p;
2477 		sc->port[p].atapi = atapi;
2478 
2479 		/*
2480 		 * Create an identifier for the backing file.
2481 		 * Use parts of the md5 sum of the filename
2482 		 */
2483 		path = get_config_value_node(port_nvl, "path");
2484 		MD5Init(&mdctx);
2485 		MD5Update(&mdctx, path, strlen(path));
2486 		MD5Final(digest, &mdctx);
2487 		snprintf(ident, AHCI_PORT_IDENT,
2488 			"BHYVE-%02X%02X-%02X%02X-%02X%02X",
2489 			digest[0], digest[1], digest[2], digest[3], digest[4],
2490 			digest[5]);
2491 
2492 		memset(ata_ident, 0, sizeof(struct ata_params));
2493 		ata_string((uint8_t*)&ata_ident->serial, ident, 20);
2494 		ata_string((uint8_t*)&ata_ident->revision, "001", 8);
2495 		if (atapi)
2496 			ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DVD ROM", 40);
2497 		else
2498 			ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DISK", 40);
2499 		value = get_config_value_node(port_nvl, "nmrr");
2500 		if (value != NULL)
2501 			ata_ident->media_rotation_rate = atoi(value);
2502 		value = get_config_value_node(port_nvl, "ser");
2503 		if (value != NULL)
2504 			ata_string((uint8_t*)(&ata_ident->serial), value, 20);
2505 		value = get_config_value_node(port_nvl, "rev");
2506 		if (value != NULL)
2507 			ata_string((uint8_t*)(&ata_ident->revision), value, 8);
2508 		value = get_config_value_node(port_nvl, "model");
2509 		if (value != NULL)
2510 			ata_string((uint8_t*)(&ata_ident->model), value, 40);
2511 		ata_identify_init(&sc->port[p], atapi);
2512 
2513 		/*
2514 		 * Allocate blockif request structures and add them
2515 		 * to the free list
2516 		 */
2517 		pci_ahci_ioreq_init(&sc->port[p]);
2518 
2519 		sc->pi |= (1 << p);
2520 		if (sc->port[p].ioqsz < slots)
2521 			slots = sc->port[p].ioqsz;
2522 	}
2523 	sc->ports = p;
2524 
2525 	/* Intel ICH8 AHCI */
2526 	--slots;
2527 	if (sc->ports < DEF_PORTS)
2528 		sc->ports = DEF_PORTS;
2529 	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2530 	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2531 	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2532 	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2533 	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2534 
2535 	sc->vs = 0x10300;
2536 	sc->cap2 = AHCI_CAP2_APST;
2537 	ahci_reset(sc);
2538 
2539 	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2540 	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2541 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2542 	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2543 	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2544 	p = MIN(sc->ports, 16);
2545 	p = flsl(p) - ((p & (p - 1)) ? 0 : 1);
2546 	pci_emul_add_msicap(pi, 1 << p);
2547 	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2548 	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2549 
2550 	pci_lintr_request(pi);
2551 
2552 open_fail:
2553 	if (ret) {
2554 		for (p = 0; p < sc->ports; p++) {
2555 			if (sc->port[p].bctx != NULL)
2556 				blockif_close(sc->port[p].bctx);
2557 		}
2558 		free(sc);
2559 	}
2560 
2561 	return (ret);
2562 }
2563 
2564 #ifdef BHYVE_SNAPSHOT
2565 static int
2566 pci_ahci_snapshot_save_queues(struct ahci_port *port,
2567 			      struct vm_snapshot_meta *meta)
2568 {
2569 	int ret;
2570 	int idx;
2571 	struct ahci_ioreq *ioreq;
2572 
2573 	STAILQ_FOREACH(ioreq, &port->iofhd, io_flist) {
2574 		idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq);
2575 		SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
2576 	}
2577 
2578 	idx = -1;
2579 	SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
2580 
2581 	TAILQ_FOREACH(ioreq, &port->iobhd, io_blist) {
2582 		idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq);
2583 		SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
2584 
2585 		/*
2586 		 * Snapshot only the busy requests; other requests are
2587 		 * not valid.
2588 		 */
2589 		ret = blockif_snapshot_req(&ioreq->io_req, meta);
2590 		if (ret != 0) {
2591 			fprintf(stderr, "%s: failed to snapshot req\r\n",
2592 				__func__);
2593 			goto done;
2594 		}
2595 	}
2596 
2597 	idx = -1;
2598 	SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
2599 
2600 done:
2601 	return (ret);
2602 }
2603 
2604 static int
2605 pci_ahci_snapshot_restore_queues(struct ahci_port *port,
2606 				 struct vm_snapshot_meta *meta)
2607 {
2608 	int ret;
2609 	int idx;
2610 	struct ahci_ioreq *ioreq;
2611 
2612 	/* Empty the free queue before restoring. */
2613 	while (!STAILQ_EMPTY(&port->iofhd))
2614 		STAILQ_REMOVE_HEAD(&port->iofhd, io_flist);
2615 
2616 	/* Restore the free queue. */
2617 	while (1) {
2618 		SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
2619 		if (idx == -1)
2620 			break;
2621 
2622 		STAILQ_INSERT_TAIL(&port->iofhd, &port->ioreq[idx], io_flist);
2623 	}
2624 
2625 	/* Restore the busy queue. */
2626 	while (1) {
2627 		SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
2628 		if (idx == -1)
2629 			break;
2630 
2631 		ioreq = &port->ioreq[idx];
2632 		TAILQ_INSERT_TAIL(&port->iobhd, ioreq, io_blist);
2633 
2634 		/*
2635 		 * Restore only the busy requests; other requests are
2636 		 * not valid.
2637 		 */
2638 		ret = blockif_snapshot_req(&ioreq->io_req, meta);
2639 		if (ret != 0) {
2640 			fprintf(stderr, "%s: failed to restore request\r\n",
2641 				__func__);
2642 			goto done;
2643 		}
2644 
2645 		/* Re-enqueue the requests in the block interface. */
2646 		if (ioreq->readop)
2647 			ret = blockif_read(port->bctx, &ioreq->io_req);
2648 		else
2649 			ret = blockif_write(port->bctx, &ioreq->io_req);
2650 
2651 		if (ret != 0) {
2652 			fprintf(stderr,
2653 				"%s: failed to re-enqueue request\r\n",
2654 				__func__);
2655 			goto done;
2656 		}
2657 	}
2658 
2659 done:
2660 	return (ret);
2661 }
2662 
2663 static int
2664 pci_ahci_snapshot(struct vm_snapshot_meta *meta)
2665 {
2666 	int i, j, ret;
2667 	void *bctx;
2668 	struct pci_devinst *pi;
2669 	struct pci_ahci_softc *sc;
2670 	struct ahci_port *port;
2671 	struct ahci_cmd_hdr *hdr;
2672 	struct ahci_ioreq *ioreq;
2673 
2674 	pi = meta->dev_data;
2675 	sc = pi->pi_arg;
2676 
2677 	/* TODO: add mtx lock/unlock */
2678 
2679 	SNAPSHOT_VAR_OR_LEAVE(sc->ports, meta, ret, done);
2680 	SNAPSHOT_VAR_OR_LEAVE(sc->cap, meta, ret, done);
2681 	SNAPSHOT_VAR_OR_LEAVE(sc->ghc, meta, ret, done);
2682 	SNAPSHOT_VAR_OR_LEAVE(sc->is, meta, ret, done);
2683 	SNAPSHOT_VAR_OR_LEAVE(sc->pi, meta, ret, done);
2684 	SNAPSHOT_VAR_OR_LEAVE(sc->vs, meta, ret, done);
2685 	SNAPSHOT_VAR_OR_LEAVE(sc->ccc_ctl, meta, ret, done);
2686 	SNAPSHOT_VAR_OR_LEAVE(sc->ccc_pts, meta, ret, done);
2687 	SNAPSHOT_VAR_OR_LEAVE(sc->em_loc, meta, ret, done);
2688 	SNAPSHOT_VAR_OR_LEAVE(sc->em_ctl, meta, ret, done);
2689 	SNAPSHOT_VAR_OR_LEAVE(sc->cap2, meta, ret, done);
2690 	SNAPSHOT_VAR_OR_LEAVE(sc->bohc, meta, ret, done);
2691 	SNAPSHOT_VAR_OR_LEAVE(sc->lintr, meta, ret, done);
2692 
2693 	for (i = 0; i < MAX_PORTS; i++) {
2694 		port = &sc->port[i];
2695 
2696 		if (meta->op == VM_SNAPSHOT_SAVE)
2697 			bctx = port->bctx;
2698 
2699 		SNAPSHOT_VAR_OR_LEAVE(bctx, meta, ret, done);
2700 		SNAPSHOT_VAR_OR_LEAVE(port->port, meta, ret, done);
2701 
2702 		/* Mostly for restore; save is ensured by the lines above. */
2703 		if (((bctx == NULL) && (port->bctx != NULL)) ||
2704 		    ((bctx != NULL) && (port->bctx == NULL))) {
2705 			fprintf(stderr, "%s: ports not matching\r\n", __func__);
2706 			ret = EINVAL;
2707 			goto done;
2708 		}
2709 
2710 		if (port->bctx == NULL)
2711 			continue;
2712 
2713 		if (port->port != i) {
2714 			fprintf(stderr, "%s: ports not matching: "
2715 					"actual: %d expected: %d\r\n",
2716 					__func__, port->port, i);
2717 			ret = EINVAL;
2718 			goto done;
2719 		}
2720 
2721 		SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(port->cmd_lst,
2722 			AHCI_CL_SIZE * AHCI_MAX_SLOTS, false, meta, ret, done);
2723 		SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(port->rfis, 256, false, meta,
2724 			ret, done);
2725 
2726 		SNAPSHOT_VAR_OR_LEAVE(port->ata_ident, meta, ret, done);
2727 		SNAPSHOT_VAR_OR_LEAVE(port->atapi, meta, ret, done);
2728 		SNAPSHOT_VAR_OR_LEAVE(port->reset, meta, ret, done);
2729 		SNAPSHOT_VAR_OR_LEAVE(port->waitforclear, meta, ret, done);
2730 		SNAPSHOT_VAR_OR_LEAVE(port->mult_sectors, meta, ret, done);
2731 		SNAPSHOT_VAR_OR_LEAVE(port->xfermode, meta, ret, done);
2732 		SNAPSHOT_VAR_OR_LEAVE(port->err_cfis, meta, ret, done);
2733 		SNAPSHOT_VAR_OR_LEAVE(port->sense_key, meta, ret, done);
2734 		SNAPSHOT_VAR_OR_LEAVE(port->asc, meta, ret, done);
2735 		SNAPSHOT_VAR_OR_LEAVE(port->ccs, meta, ret, done);
2736 		SNAPSHOT_VAR_OR_LEAVE(port->pending, meta, ret, done);
2737 
2738 		SNAPSHOT_VAR_OR_LEAVE(port->clb, meta, ret, done);
2739 		SNAPSHOT_VAR_OR_LEAVE(port->clbu, meta, ret, done);
2740 		SNAPSHOT_VAR_OR_LEAVE(port->fb, meta, ret, done);
2741 		SNAPSHOT_VAR_OR_LEAVE(port->fbu, meta, ret, done);
2742 		SNAPSHOT_VAR_OR_LEAVE(port->ie, meta, ret, done);
2743 		SNAPSHOT_VAR_OR_LEAVE(port->cmd, meta, ret, done);
2744 		SNAPSHOT_VAR_OR_LEAVE(port->unused0, meta, ret, done);
2745 		SNAPSHOT_VAR_OR_LEAVE(port->tfd, meta, ret, done);
2746 		SNAPSHOT_VAR_OR_LEAVE(port->sig, meta, ret, done);
2747 		SNAPSHOT_VAR_OR_LEAVE(port->ssts, meta, ret, done);
2748 		SNAPSHOT_VAR_OR_LEAVE(port->sctl, meta, ret, done);
2749 		SNAPSHOT_VAR_OR_LEAVE(port->serr, meta, ret, done);
2750 		SNAPSHOT_VAR_OR_LEAVE(port->sact, meta, ret, done);
2751 		SNAPSHOT_VAR_OR_LEAVE(port->ci, meta, ret, done);
2752 		SNAPSHOT_VAR_OR_LEAVE(port->sntf, meta, ret, done);
2753 		SNAPSHOT_VAR_OR_LEAVE(port->fbs, meta, ret, done);
2754 		SNAPSHOT_VAR_OR_LEAVE(port->ioqsz, meta, ret, done);
2755 
2756 		for (j = 0; j < port->ioqsz; j++) {
2757 			ioreq = &port->ioreq[j];
2758 
2759 			/* blockif_req snapshot done only for busy requests. */
2760 			hdr = (struct ahci_cmd_hdr *)(port->cmd_lst +
2761 				ioreq->slot * AHCI_CL_SIZE);
2762 			SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(ioreq->cfis,
2763 				0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry),
2764 				false, meta, ret, done);
2765 
2766 			SNAPSHOT_VAR_OR_LEAVE(ioreq->len, meta, ret, done);
2767 			SNAPSHOT_VAR_OR_LEAVE(ioreq->done, meta, ret, done);
2768 			SNAPSHOT_VAR_OR_LEAVE(ioreq->slot, meta, ret, done);
2769 			SNAPSHOT_VAR_OR_LEAVE(ioreq->more, meta, ret, done);
2770 			SNAPSHOT_VAR_OR_LEAVE(ioreq->readop, meta, ret, done);
2771 		}
2772 
2773 		/* Perform save / restore specific operations. */
2774 		if (meta->op == VM_SNAPSHOT_SAVE) {
2775 			ret = pci_ahci_snapshot_save_queues(port, meta);
2776 			if (ret != 0)
2777 				goto done;
2778 		} else if (meta->op == VM_SNAPSHOT_RESTORE) {
2779 			ret = pci_ahci_snapshot_restore_queues(port, meta);
2780 			if (ret != 0)
2781 				goto done;
2782 		} else {
2783 			ret = EINVAL;
2784 			goto done;
2785 		}
2786 
2787 		ret = blockif_snapshot(port->bctx, meta);
2788 		if (ret != 0) {
2789 			fprintf(stderr, "%s: failed to restore blockif\r\n",
2790 				__func__);
2791 			goto done;
2792 		}
2793 	}
2794 
2795 done:
2796 	return (ret);
2797 }
2798 
2799 static int
2800 pci_ahci_pause(struct vmctx *ctx, struct pci_devinst *pi)
2801 {
2802 	struct pci_ahci_softc *sc;
2803 	struct blockif_ctxt *bctxt;
2804 	int i;
2805 
2806 	sc = pi->pi_arg;
2807 
2808 	for (i = 0; i < MAX_PORTS; i++) {
2809 		bctxt = sc->port[i].bctx;
2810 		if (bctxt == NULL)
2811 			continue;
2812 
2813 		blockif_pause(bctxt);
2814 	}
2815 
2816 	return (0);
2817 }
2818 
2819 static int
2820 pci_ahci_resume(struct vmctx *ctx, struct pci_devinst *pi)
2821 {
2822 	struct pci_ahci_softc *sc;
2823 	struct blockif_ctxt *bctxt;
2824 	int i;
2825 
2826 	sc = pi->pi_arg;
2827 
2828 	for (i = 0; i < MAX_PORTS; i++) {
2829 		bctxt = sc->port[i].bctx;
2830 		if (bctxt == NULL)
2831 			continue;
2832 
2833 		blockif_resume(bctxt);
2834 	}
2835 
2836 	return (0);
2837 }
2838 #endif
2839 
2840 /*
2841  * Use separate emulation names to distinguish drive and atapi devices
2842  */
2843 struct pci_devemu pci_de_ahci = {
2844 	.pe_emu =	"ahci",
2845 	.pe_init =	pci_ahci_init,
2846 	.pe_legacy_config = pci_ahci_legacy_config,
2847 	.pe_barwrite =	pci_ahci_write,
2848 	.pe_barread =	pci_ahci_read,
2849 #ifdef BHYVE_SNAPSHOT
2850 	.pe_snapshot =	pci_ahci_snapshot,
2851 	.pe_pause =	pci_ahci_pause,
2852 	.pe_resume =	pci_ahci_resume,
2853 #endif
2854 };
2855 PCI_EMUL_SET(pci_de_ahci);
2856 
2857 struct pci_devemu pci_de_ahci_hd = {
2858 	.pe_emu =	"ahci-hd",
2859 	.pe_legacy_config = pci_ahci_hd_legacy_config,
2860 	.pe_alias =	"ahci",
2861 };
2862 PCI_EMUL_SET(pci_de_ahci_hd);
2863 
2864 struct pci_devemu pci_de_ahci_cd = {
2865 	.pe_emu =	"ahci-cd",
2866 	.pe_legacy_config = pci_ahci_cd_legacy_config,
2867 	.pe_alias =	"ahci",
2868 };
2869 PCI_EMUL_SET(pci_de_ahci_cd);
2870