xref: /freebsd/usr.sbin/bhyve/pci_ahci.c (revision e32fecd0c2c3ee37c47ee100f169e7eb0282a873)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
5  * Copyright (c) 2015-2016 Alexander Motin <mav@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/linker_set.h>
37 #include <sys/stat.h>
38 #include <sys/uio.h>
39 #include <sys/ioctl.h>
40 #include <sys/disk.h>
41 #include <sys/ata.h>
42 #include <sys/endian.h>
43 
44 #include <errno.h>
45 #include <fcntl.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <stdint.h>
49 #include <string.h>
50 #include <strings.h>
51 #include <unistd.h>
52 #include <assert.h>
53 #include <pthread.h>
54 #include <pthread_np.h>
55 #include <inttypes.h>
56 #include <md5.h>
57 
58 #include "bhyverun.h"
59 #include "config.h"
60 #include "debug.h"
61 #include "pci_emul.h"
62 #ifdef BHYVE_SNAPSHOT
63 #include "snapshot.h"
64 #endif
65 #include "ahci.h"
66 #include "block_if.h"
67 
68 #define	DEF_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
69 #define	MAX_PORTS	32	/* AHCI supports 32 ports */
70 
71 #define	PxSIG_ATA	0x00000101 /* ATA drive */
72 #define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
73 
74 enum sata_fis_type {
75 	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
76 	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
77 	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
78 	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
79 	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
80 	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
81 	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
82 	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
83 };
84 
85 /*
86  * SCSI opcodes
87  */
88 #define	TEST_UNIT_READY		0x00
89 #define	REQUEST_SENSE		0x03
90 #define	INQUIRY			0x12
91 #define	START_STOP_UNIT		0x1B
92 #define	PREVENT_ALLOW		0x1E
93 #define	READ_CAPACITY		0x25
94 #define	READ_10			0x28
95 #define	POSITION_TO_ELEMENT	0x2B
96 #define	READ_TOC		0x43
97 #define	GET_EVENT_STATUS_NOTIFICATION 0x4A
98 #define	MODE_SENSE_10		0x5A
99 #define	REPORT_LUNS		0xA0
100 #define	READ_12			0xA8
101 #define	READ_CD			0xBE
102 
103 /*
104  * SCSI mode page codes
105  */
106 #define	MODEPAGE_RW_ERROR_RECOVERY	0x01
107 #define	MODEPAGE_CD_CAPABILITIES	0x2A
108 
109 /*
110  * ATA commands
111  */
112 #define	ATA_SF_ENAB_SATA_SF		0x10
113 #define	ATA_SATA_SF_AN			0x05
114 #define	ATA_SF_DIS_SATA_SF		0x90
115 
116 /*
117  * Debug printf
118  */
119 #ifdef AHCI_DEBUG
120 static FILE *dbg;
121 #define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
122 #else
123 #define DPRINTF(format, arg...)
124 #endif
125 #define WPRINTF(format, arg...) printf(format, ##arg)
126 
127 #define AHCI_PORT_IDENT 20 + 1
128 
129 struct ahci_ioreq {
130 	struct blockif_req io_req;
131 	struct ahci_port *io_pr;
132 	STAILQ_ENTRY(ahci_ioreq) io_flist;
133 	TAILQ_ENTRY(ahci_ioreq) io_blist;
134 	uint8_t *cfis;
135 	uint32_t len;
136 	uint32_t done;
137 	int slot;
138 	int more;
139 	int readop;
140 };
141 
142 struct ahci_port {
143 	struct blockif_ctxt *bctx;
144 	struct pci_ahci_softc *pr_sc;
145 	struct ata_params ata_ident;
146 	uint8_t *cmd_lst;
147 	uint8_t *rfis;
148 	int port;
149 	int atapi;
150 	int reset;
151 	int waitforclear;
152 	int mult_sectors;
153 	uint8_t xfermode;
154 	uint8_t err_cfis[20];
155 	uint8_t sense_key;
156 	uint8_t asc;
157 	u_int ccs;
158 	uint32_t pending;
159 
160 	uint32_t clb;
161 	uint32_t clbu;
162 	uint32_t fb;
163 	uint32_t fbu;
164 	uint32_t is;
165 	uint32_t ie;
166 	uint32_t cmd;
167 	uint32_t unused0;
168 	uint32_t tfd;
169 	uint32_t sig;
170 	uint32_t ssts;
171 	uint32_t sctl;
172 	uint32_t serr;
173 	uint32_t sact;
174 	uint32_t ci;
175 	uint32_t sntf;
176 	uint32_t fbs;
177 
178 	/*
179 	 * i/o request info
180 	 */
181 	struct ahci_ioreq *ioreq;
182 	int ioqsz;
183 	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
184 	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
185 };
186 
187 struct ahci_cmd_hdr {
188 	uint16_t flags;
189 	uint16_t prdtl;
190 	uint32_t prdbc;
191 	uint64_t ctba;
192 	uint32_t reserved[4];
193 };
194 
195 struct ahci_prdt_entry {
196 	uint64_t dba;
197 	uint32_t reserved;
198 #define	DBCMASK		0x3fffff
199 	uint32_t dbc;
200 };
201 
202 struct pci_ahci_softc {
203 	struct pci_devinst *asc_pi;
204 	pthread_mutex_t	mtx;
205 	int ports;
206 	uint32_t cap;
207 	uint32_t ghc;
208 	uint32_t is;
209 	uint32_t pi;
210 	uint32_t vs;
211 	uint32_t ccc_ctl;
212 	uint32_t ccc_pts;
213 	uint32_t em_loc;
214 	uint32_t em_ctl;
215 	uint32_t cap2;
216 	uint32_t bohc;
217 	uint32_t lintr;
218 	struct ahci_port port[MAX_PORTS];
219 };
220 #define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
221 
222 static void ahci_handle_port(struct ahci_port *p);
223 
224 static inline void lba_to_msf(uint8_t *buf, int lba)
225 {
226 	lba += 150;
227 	buf[0] = (lba / 75) / 60;
228 	buf[1] = (lba / 75) % 60;
229 	buf[2] = lba % 75;
230 }
231 
232 /*
233  * Generate HBA interrupts on global IS register write.
234  */
235 static void
236 ahci_generate_intr(struct pci_ahci_softc *sc, uint32_t mask)
237 {
238 	struct pci_devinst *pi = sc->asc_pi;
239 	struct ahci_port *p;
240 	int i, nmsg;
241 	uint32_t mmask;
242 
243 	/* Update global IS from PxIS/PxIE. */
244 	for (i = 0; i < sc->ports; i++) {
245 		p = &sc->port[i];
246 		if (p->is & p->ie)
247 			sc->is |= (1 << i);
248 	}
249 	DPRINTF("%s(%08x) %08x", __func__, mask, sc->is);
250 
251 	/* If there is nothing enabled -- clear legacy interrupt and exit. */
252 	if (sc->is == 0 || (sc->ghc & AHCI_GHC_IE) == 0) {
253 		if (sc->lintr) {
254 			pci_lintr_deassert(pi);
255 			sc->lintr = 0;
256 		}
257 		return;
258 	}
259 
260 	/* If there is anything and no MSI -- assert legacy interrupt. */
261 	nmsg = pci_msi_maxmsgnum(pi);
262 	if (nmsg == 0) {
263 		if (!sc->lintr) {
264 			sc->lintr = 1;
265 			pci_lintr_assert(pi);
266 		}
267 		return;
268 	}
269 
270 	/* Assert respective MSIs for ports that were touched. */
271 	for (i = 0; i < nmsg; i++) {
272 		if (sc->ports <= nmsg || i < nmsg - 1)
273 			mmask = 1 << i;
274 		else
275 			mmask = 0xffffffff << i;
276 		if (sc->is & mask && mmask & mask)
277 			pci_generate_msi(pi, i);
278 	}
279 }
280 
281 /*
282  * Generate HBA interrupt on specific port event.
283  */
284 static void
285 ahci_port_intr(struct ahci_port *p)
286 {
287 	struct pci_ahci_softc *sc = p->pr_sc;
288 	struct pci_devinst *pi = sc->asc_pi;
289 	int nmsg;
290 
291 	DPRINTF("%s(%d) %08x/%08x %08x", __func__,
292 	    p->port, p->is, p->ie, sc->is);
293 
294 	/* If there is nothing enabled -- we are done. */
295 	if ((p->is & p->ie) == 0)
296 		return;
297 
298 	/* In case of non-shared MSI always generate interrupt. */
299 	nmsg = pci_msi_maxmsgnum(pi);
300 	if (sc->ports <= nmsg || p->port < nmsg - 1) {
301 		sc->is |= (1 << p->port);
302 		if ((sc->ghc & AHCI_GHC_IE) == 0)
303 			return;
304 		pci_generate_msi(pi, p->port);
305 		return;
306 	}
307 
308 	/* If IS for this port is already set -- do nothing. */
309 	if (sc->is & (1 << p->port))
310 		return;
311 
312 	sc->is |= (1 << p->port);
313 
314 	/* If interrupts are enabled -- generate one. */
315 	if ((sc->ghc & AHCI_GHC_IE) == 0)
316 		return;
317 	if (nmsg > 0) {
318 		pci_generate_msi(pi, nmsg - 1);
319 	} else if (!sc->lintr) {
320 		sc->lintr = 1;
321 		pci_lintr_assert(pi);
322 	}
323 }
324 
325 static void
326 ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
327 {
328 	int offset, len, irq;
329 
330 	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
331 		return;
332 
333 	switch (ft) {
334 	case FIS_TYPE_REGD2H:
335 		offset = 0x40;
336 		len = 20;
337 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0;
338 		break;
339 	case FIS_TYPE_SETDEVBITS:
340 		offset = 0x58;
341 		len = 8;
342 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0;
343 		break;
344 	case FIS_TYPE_PIOSETUP:
345 		offset = 0x20;
346 		len = 20;
347 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0;
348 		break;
349 	default:
350 		WPRINTF("unsupported fis type %d", ft);
351 		return;
352 	}
353 	if (fis[2] & ATA_S_ERROR) {
354 		p->waitforclear = 1;
355 		irq |= AHCI_P_IX_TFE;
356 	}
357 	memcpy(p->rfis + offset, fis, len);
358 	if (irq) {
359 		if (~p->is & irq) {
360 			p->is |= irq;
361 			ahci_port_intr(p);
362 		}
363 	}
364 }
365 
366 static void
367 ahci_write_fis_piosetup(struct ahci_port *p)
368 {
369 	uint8_t fis[20];
370 
371 	memset(fis, 0, sizeof(fis));
372 	fis[0] = FIS_TYPE_PIOSETUP;
373 	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
374 }
375 
376 static void
377 ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
378 {
379 	uint8_t fis[8];
380 	uint8_t error;
381 
382 	error = (tfd >> 8) & 0xff;
383 	tfd &= 0x77;
384 	memset(fis, 0, sizeof(fis));
385 	fis[0] = FIS_TYPE_SETDEVBITS;
386 	fis[1] = (1 << 6);
387 	fis[2] = tfd;
388 	fis[3] = error;
389 	if (fis[2] & ATA_S_ERROR) {
390 		p->err_cfis[0] = slot;
391 		p->err_cfis[2] = tfd;
392 		p->err_cfis[3] = error;
393 		memcpy(&p->err_cfis[4], cfis + 4, 16);
394 	} else {
395 		*(uint32_t *)(fis + 4) = (1 << slot);
396 		p->sact &= ~(1 << slot);
397 	}
398 	p->tfd &= ~0x77;
399 	p->tfd |= tfd;
400 	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
401 }
402 
403 static void
404 ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
405 {
406 	uint8_t fis[20];
407 	uint8_t error;
408 
409 	error = (tfd >> 8) & 0xff;
410 	memset(fis, 0, sizeof(fis));
411 	fis[0] = FIS_TYPE_REGD2H;
412 	fis[1] = (1 << 6);
413 	fis[2] = tfd & 0xff;
414 	fis[3] = error;
415 	fis[4] = cfis[4];
416 	fis[5] = cfis[5];
417 	fis[6] = cfis[6];
418 	fis[7] = cfis[7];
419 	fis[8] = cfis[8];
420 	fis[9] = cfis[9];
421 	fis[10] = cfis[10];
422 	fis[11] = cfis[11];
423 	fis[12] = cfis[12];
424 	fis[13] = cfis[13];
425 	if (fis[2] & ATA_S_ERROR) {
426 		p->err_cfis[0] = 0x80;
427 		p->err_cfis[2] = tfd & 0xff;
428 		p->err_cfis[3] = error;
429 		memcpy(&p->err_cfis[4], cfis + 4, 16);
430 	} else
431 		p->ci &= ~(1 << slot);
432 	p->tfd = tfd;
433 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
434 }
435 
436 static void
437 ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot)
438 {
439 	uint8_t fis[20];
440 
441 	p->tfd = ATA_S_READY | ATA_S_DSC;
442 	memset(fis, 0, sizeof(fis));
443 	fis[0] = FIS_TYPE_REGD2H;
444 	fis[1] = 0;			/* No interrupt */
445 	fis[2] = p->tfd;		/* Status */
446 	fis[3] = 0;			/* No error */
447 	p->ci &= ~(1 << slot);
448 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
449 }
450 
451 static void
452 ahci_write_reset_fis_d2h(struct ahci_port *p)
453 {
454 	uint8_t fis[20];
455 
456 	memset(fis, 0, sizeof(fis));
457 	fis[0] = FIS_TYPE_REGD2H;
458 	fis[3] = 1;
459 	fis[4] = 1;
460 	if (p->atapi) {
461 		fis[5] = 0x14;
462 		fis[6] = 0xeb;
463 	}
464 	fis[12] = 1;
465 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
466 }
467 
468 static void
469 ahci_check_stopped(struct ahci_port *p)
470 {
471 	/*
472 	 * If we are no longer processing the command list and nothing
473 	 * is in-flight, clear the running bit, the current command
474 	 * slot, the command issue and active bits.
475 	 */
476 	if (!(p->cmd & AHCI_P_CMD_ST)) {
477 		if (p->pending == 0) {
478 			p->ccs = 0;
479 			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
480 			p->ci = 0;
481 			p->sact = 0;
482 			p->waitforclear = 0;
483 		}
484 	}
485 }
486 
487 static void
488 ahci_port_stop(struct ahci_port *p)
489 {
490 	struct ahci_ioreq *aior;
491 	uint8_t *cfis;
492 	int slot;
493 	int error;
494 
495 	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
496 
497 	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
498 		/*
499 		 * Try to cancel the outstanding blockif request.
500 		 */
501 		error = blockif_cancel(p->bctx, &aior->io_req);
502 		if (error != 0)
503 			continue;
504 
505 		slot = aior->slot;
506 		cfis = aior->cfis;
507 		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
508 		    cfis[2] == ATA_READ_FPDMA_QUEUED ||
509 		    cfis[2] == ATA_SEND_FPDMA_QUEUED)
510 			p->sact &= ~(1 << slot);	/* NCQ */
511 		else
512 			p->ci &= ~(1 << slot);
513 
514 		/*
515 		 * This command is now done.
516 		 */
517 		p->pending &= ~(1 << slot);
518 
519 		/*
520 		 * Delete the blockif request from the busy list
521 		 */
522 		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
523 
524 		/*
525 		 * Move the blockif request back to the free list
526 		 */
527 		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
528 	}
529 
530 	ahci_check_stopped(p);
531 }
532 
533 static void
534 ahci_port_reset(struct ahci_port *pr)
535 {
536 	pr->serr = 0;
537 	pr->sact = 0;
538 	pr->xfermode = ATA_UDMA6;
539 	pr->mult_sectors = 128;
540 
541 	if (!pr->bctx) {
542 		pr->ssts = ATA_SS_DET_NO_DEVICE;
543 		pr->sig = 0xFFFFFFFF;
544 		pr->tfd = 0x7F;
545 		return;
546 	}
547 	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
548 	if (pr->sctl & ATA_SC_SPD_MASK)
549 		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
550 	else
551 		pr->ssts |= ATA_SS_SPD_GEN3;
552 	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
553 	if (!pr->atapi) {
554 		pr->sig = PxSIG_ATA;
555 		pr->tfd |= ATA_S_READY;
556 	} else
557 		pr->sig = PxSIG_ATAPI;
558 	ahci_write_reset_fis_d2h(pr);
559 }
560 
561 static void
562 ahci_reset(struct pci_ahci_softc *sc)
563 {
564 	int i;
565 
566 	sc->ghc = AHCI_GHC_AE;
567 	sc->is = 0;
568 
569 	if (sc->lintr) {
570 		pci_lintr_deassert(sc->asc_pi);
571 		sc->lintr = 0;
572 	}
573 
574 	for (i = 0; i < sc->ports; i++) {
575 		sc->port[i].ie = 0;
576 		sc->port[i].is = 0;
577 		sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD);
578 		if (sc->port[i].bctx)
579 			sc->port[i].cmd |= AHCI_P_CMD_CPS;
580 		sc->port[i].sctl = 0;
581 		ahci_port_reset(&sc->port[i]);
582 	}
583 }
584 
585 static void
586 ata_string(uint8_t *dest, const char *src, int len)
587 {
588 	int i;
589 
590 	for (i = 0; i < len; i++) {
591 		if (*src)
592 			dest[i ^ 1] = *src++;
593 		else
594 			dest[i ^ 1] = ' ';
595 	}
596 }
597 
598 static void
599 atapi_string(uint8_t *dest, const char *src, int len)
600 {
601 	int i;
602 
603 	for (i = 0; i < len; i++) {
604 		if (*src)
605 			dest[i] = *src++;
606 		else
607 			dest[i] = ' ';
608 	}
609 }
610 
611 /*
612  * Build up the iovec based on the PRDT, 'done' and 'len'.
613  */
614 static void
615 ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
616     struct ahci_prdt_entry *prdt, uint16_t prdtl)
617 {
618 	struct blockif_req *breq = &aior->io_req;
619 	uint32_t dbcsz, extra, left, skip, todo;
620 	int i, j;
621 
622 	assert(aior->len >= aior->done);
623 
624 	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
625 	skip = aior->done;
626 	left = aior->len - aior->done;
627 	todo = 0;
628 	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
629 	    i++, prdt++) {
630 		dbcsz = (prdt->dbc & DBCMASK) + 1;
631 		/* Skip already done part of the PRDT */
632 		if (dbcsz <= skip) {
633 			skip -= dbcsz;
634 			continue;
635 		}
636 		dbcsz -= skip;
637 		if (dbcsz > left)
638 			dbcsz = left;
639 		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
640 		    prdt->dba + skip, dbcsz);
641 		breq->br_iov[j].iov_len = dbcsz;
642 		todo += dbcsz;
643 		left -= dbcsz;
644 		skip = 0;
645 		j++;
646 	}
647 
648 	/* If we got limited by IOV length, round I/O down to sector size. */
649 	if (j == BLOCKIF_IOV_MAX) {
650 		extra = todo % blockif_sectsz(p->bctx);
651 		todo -= extra;
652 		assert(todo > 0);
653 		while (extra > 0) {
654 			if (breq->br_iov[j - 1].iov_len > extra) {
655 				breq->br_iov[j - 1].iov_len -= extra;
656 				break;
657 			}
658 			extra -= breq->br_iov[j - 1].iov_len;
659 			j--;
660 		}
661 	}
662 
663 	breq->br_iovcnt = j;
664 	breq->br_resid = todo;
665 	aior->done += todo;
666 	aior->more = (aior->done < aior->len && i < prdtl);
667 }
668 
669 static void
670 ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
671 {
672 	struct ahci_ioreq *aior;
673 	struct blockif_req *breq;
674 	struct ahci_prdt_entry *prdt;
675 	struct ahci_cmd_hdr *hdr;
676 	uint64_t lba;
677 	uint32_t len;
678 	int err, first, ncq, readop;
679 
680 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
681 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
682 	ncq = 0;
683 	readop = 1;
684 	first = (done == 0);
685 
686 	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
687 	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
688 	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
689 	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
690 		readop = 0;
691 
692 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
693 	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
694 		lba = ((uint64_t)cfis[10] << 40) |
695 			((uint64_t)cfis[9] << 32) |
696 			((uint64_t)cfis[8] << 24) |
697 			((uint64_t)cfis[6] << 16) |
698 			((uint64_t)cfis[5] << 8) |
699 			cfis[4];
700 		len = cfis[11] << 8 | cfis[3];
701 		if (!len)
702 			len = 65536;
703 		ncq = 1;
704 	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
705 	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
706 	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
707 		lba = ((uint64_t)cfis[10] << 40) |
708 			((uint64_t)cfis[9] << 32) |
709 			((uint64_t)cfis[8] << 24) |
710 			((uint64_t)cfis[6] << 16) |
711 			((uint64_t)cfis[5] << 8) |
712 			cfis[4];
713 		len = cfis[13] << 8 | cfis[12];
714 		if (!len)
715 			len = 65536;
716 	} else {
717 		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
718 			(cfis[5] << 8) | cfis[4];
719 		len = cfis[12];
720 		if (!len)
721 			len = 256;
722 	}
723 	lba *= blockif_sectsz(p->bctx);
724 	len *= blockif_sectsz(p->bctx);
725 
726 	/* Pull request off free list */
727 	aior = STAILQ_FIRST(&p->iofhd);
728 	assert(aior != NULL);
729 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
730 
731 	aior->cfis = cfis;
732 	aior->slot = slot;
733 	aior->len = len;
734 	aior->done = done;
735 	aior->readop = readop;
736 	breq = &aior->io_req;
737 	breq->br_offset = lba + done;
738 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
739 
740 	/* Mark this command in-flight. */
741 	p->pending |= 1 << slot;
742 
743 	/* Stuff request onto busy list. */
744 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
745 
746 	if (ncq && first)
747 		ahci_write_fis_d2h_ncq(p, slot);
748 
749 	if (readop)
750 		err = blockif_read(p->bctx, breq);
751 	else
752 		err = blockif_write(p->bctx, breq);
753 	assert(err == 0);
754 }
755 
756 static void
757 ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
758 {
759 	struct ahci_ioreq *aior;
760 	struct blockif_req *breq;
761 	int err;
762 
763 	/*
764 	 * Pull request off free list
765 	 */
766 	aior = STAILQ_FIRST(&p->iofhd);
767 	assert(aior != NULL);
768 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
769 	aior->cfis = cfis;
770 	aior->slot = slot;
771 	aior->len = 0;
772 	aior->done = 0;
773 	aior->more = 0;
774 	breq = &aior->io_req;
775 
776 	/*
777 	 * Mark this command in-flight.
778 	 */
779 	p->pending |= 1 << slot;
780 
781 	/*
782 	 * Stuff request onto busy list
783 	 */
784 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
785 
786 	err = blockif_flush(p->bctx, breq);
787 	assert(err == 0);
788 }
789 
790 static inline void
791 read_prdt(struct ahci_port *p, int slot, uint8_t *cfis, void *buf,
792     unsigned int size)
793 {
794 	struct ahci_cmd_hdr *hdr;
795 	struct ahci_prdt_entry *prdt;
796 	uint8_t *to;
797 	unsigned int len;
798 	int i;
799 
800 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
801 	len = size;
802 	to = buf;
803 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
804 	for (i = 0; i < hdr->prdtl && len; i++) {
805 		uint8_t *ptr;
806 		uint32_t dbcsz;
807 		unsigned int sublen;
808 
809 		dbcsz = (prdt->dbc & DBCMASK) + 1;
810 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
811 		sublen = MIN(len, dbcsz);
812 		memcpy(to, ptr, sublen);
813 		len -= sublen;
814 		to += sublen;
815 		prdt++;
816 	}
817 }
818 
819 static void
820 ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
821 {
822 	struct ahci_ioreq *aior;
823 	struct blockif_req *breq;
824 	uint8_t *entry;
825 	uint64_t elba;
826 	uint32_t len, elen;
827 	int err, first, ncq;
828 	uint8_t buf[512];
829 
830 	first = (done == 0);
831 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
832 		len = (uint16_t)cfis[13] << 8 | cfis[12];
833 		len *= 512;
834 		ncq = 0;
835 	} else { /* ATA_SEND_FPDMA_QUEUED */
836 		len = (uint16_t)cfis[11] << 8 | cfis[3];
837 		len *= 512;
838 		ncq = 1;
839 	}
840 	read_prdt(p, slot, cfis, buf, sizeof(buf));
841 
842 next:
843 	entry = &buf[done];
844 	elba = ((uint64_t)entry[5] << 40) |
845 		((uint64_t)entry[4] << 32) |
846 		((uint64_t)entry[3] << 24) |
847 		((uint64_t)entry[2] << 16) |
848 		((uint64_t)entry[1] << 8) |
849 		entry[0];
850 	elen = (uint16_t)entry[7] << 8 | entry[6];
851 	done += 8;
852 	if (elen == 0) {
853 		if (done >= len) {
854 			if (ncq) {
855 				if (first)
856 					ahci_write_fis_d2h_ncq(p, slot);
857 				ahci_write_fis_sdb(p, slot, cfis,
858 				    ATA_S_READY | ATA_S_DSC);
859 			} else {
860 				ahci_write_fis_d2h(p, slot, cfis,
861 				    ATA_S_READY | ATA_S_DSC);
862 			}
863 			p->pending &= ~(1 << slot);
864 			ahci_check_stopped(p);
865 			if (!first)
866 				ahci_handle_port(p);
867 			return;
868 		}
869 		goto next;
870 	}
871 
872 	/*
873 	 * Pull request off free list
874 	 */
875 	aior = STAILQ_FIRST(&p->iofhd);
876 	assert(aior != NULL);
877 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
878 	aior->cfis = cfis;
879 	aior->slot = slot;
880 	aior->len = len;
881 	aior->done = done;
882 	aior->more = (len != done);
883 
884 	breq = &aior->io_req;
885 	breq->br_offset = elba * blockif_sectsz(p->bctx);
886 	breq->br_resid = elen * blockif_sectsz(p->bctx);
887 
888 	/*
889 	 * Mark this command in-flight.
890 	 */
891 	p->pending |= 1 << slot;
892 
893 	/*
894 	 * Stuff request onto busy list
895 	 */
896 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
897 
898 	if (ncq && first)
899 		ahci_write_fis_d2h_ncq(p, slot);
900 
901 	err = blockif_delete(p->bctx, breq);
902 	assert(err == 0);
903 }
904 
905 static inline void
906 write_prdt(struct ahci_port *p, int slot, uint8_t *cfis, void *buf,
907     unsigned int size)
908 {
909 	struct ahci_cmd_hdr *hdr;
910 	struct ahci_prdt_entry *prdt;
911 	uint8_t *from;
912 	unsigned int len;
913 	int i;
914 
915 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
916 	len = size;
917 	from = buf;
918 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
919 	for (i = 0; i < hdr->prdtl && len; i++) {
920 		uint8_t *ptr;
921 		uint32_t dbcsz;
922 		int sublen;
923 
924 		dbcsz = (prdt->dbc & DBCMASK) + 1;
925 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
926 		sublen = MIN(len, dbcsz);
927 		memcpy(ptr, from, sublen);
928 		len -= sublen;
929 		from += sublen;
930 		prdt++;
931 	}
932 	hdr->prdbc = size - len;
933 }
934 
935 static void
936 ahci_checksum(uint8_t *buf, int size)
937 {
938 	int i;
939 	uint8_t sum = 0;
940 
941 	for (i = 0; i < size - 1; i++)
942 		sum += buf[i];
943 	buf[size - 1] = 0x100 - sum;
944 }
945 
946 static void
947 ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
948 {
949 	struct ahci_cmd_hdr *hdr;
950 	uint32_t buf[128];
951 	uint8_t *buf8 = (uint8_t *)buf;
952 	uint16_t *buf16 = (uint16_t *)buf;
953 
954 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
955 	if (p->atapi || hdr->prdtl == 0 || cfis[5] != 0 ||
956 	    cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
957 		ahci_write_fis_d2h(p, slot, cfis,
958 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
959 		return;
960 	}
961 
962 	memset(buf, 0, sizeof(buf));
963 	if (cfis[4] == 0x00) {	/* Log directory */
964 		buf16[0x00] = 1; /* Version -- 1 */
965 		buf16[0x10] = 1; /* NCQ Command Error Log -- 1 page */
966 		buf16[0x13] = 1; /* SATA NCQ Send and Receive Log -- 1 page */
967 	} else if (cfis[4] == 0x10) {	/* NCQ Command Error Log */
968 		memcpy(buf8, p->err_cfis, sizeof(p->err_cfis));
969 		ahci_checksum(buf8, sizeof(buf));
970 	} else if (cfis[4] == 0x13) {	/* SATA NCQ Send and Receive Log */
971 		if (blockif_candelete(p->bctx) && !blockif_is_ro(p->bctx)) {
972 			buf[0x00] = 1;	/* SFQ DSM supported */
973 			buf[0x01] = 1;	/* SFQ DSM TRIM supported */
974 		}
975 	} else {
976 		ahci_write_fis_d2h(p, slot, cfis,
977 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
978 		return;
979 	}
980 
981 	if (cfis[2] == ATA_READ_LOG_EXT)
982 		ahci_write_fis_piosetup(p);
983 	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
984 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
985 }
986 
987 static void
988 handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
989 {
990 	struct ahci_cmd_hdr *hdr;
991 
992 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
993 	if (p->atapi || hdr->prdtl == 0) {
994 		ahci_write_fis_d2h(p, slot, cfis,
995 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
996 	} else {
997 		ahci_write_fis_piosetup(p);
998 		write_prdt(p, slot, cfis, (void*)&p->ata_ident, sizeof(struct ata_params));
999 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1000 	}
1001 }
1002 
1003 static void
1004 ata_identify_init(struct ahci_port* p, int atapi)
1005 {
1006 	struct ata_params* ata_ident = &p->ata_ident;
1007 
1008 	if (atapi) {
1009 		ata_ident->config = ATA_PROTO_ATAPI | ATA_ATAPI_TYPE_CDROM |
1010 		    ATA_ATAPI_REMOVABLE | ATA_DRQ_FAST;
1011 		ata_ident->capabilities1 = ATA_SUPPORT_LBA |
1012 			ATA_SUPPORT_DMA;
1013 		ata_ident->capabilities2 = (1 << 14 | 1);
1014 		ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88;
1015 		ata_ident->obsolete62 = 0x3f;
1016 		ata_ident->mwdmamodes = 7;
1017 		if (p->xfermode & ATA_WDMA0)
1018 			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1019 		ata_ident->apiomodes = 3;
1020 		ata_ident->mwdmamin = 0x0078;
1021 		ata_ident->mwdmarec = 0x0078;
1022 		ata_ident->pioblind = 0x0078;
1023 		ata_ident->pioiordy = 0x0078;
1024 		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
1025 		ata_ident->satacapabilities2 = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
1026 		ata_ident->satasupport = ATA_SUPPORT_NCQ_STREAM;
1027 		ata_ident->version_major = 0x3f0;
1028 		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1029 			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1030 		ata_ident->support.command2 = (1 << 14);
1031 		ata_ident->support.extension = (1 << 14);
1032 		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1033 			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1034 		ata_ident->enabled.extension = (1 << 14);
1035 		ata_ident->udmamodes = 0x7f;
1036 		if (p->xfermode & ATA_UDMA0)
1037 			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1038 		ata_ident->transport_major = 0x1020;
1039 		ata_ident->integrity = 0x00a5;
1040 	} else {
1041 		uint64_t sectors;
1042 		int sectsz, psectsz, psectoff, candelete, ro;
1043 		uint16_t cyl;
1044 		uint8_t sech, heads;
1045 
1046 		ro = blockif_is_ro(p->bctx);
1047 		candelete = blockif_candelete(p->bctx);
1048 		sectsz = blockif_sectsz(p->bctx);
1049 		sectors = blockif_size(p->bctx) / sectsz;
1050 		blockif_chs(p->bctx, &cyl, &heads, &sech);
1051 		blockif_psectsz(p->bctx, &psectsz, &psectoff);
1052 		ata_ident->config = ATA_DRQ_FAST;
1053 		ata_ident->cylinders = cyl;
1054 		ata_ident->heads = heads;
1055 		ata_ident->sectors = sech;
1056 
1057 		ata_ident->sectors_intr = (0x8000 | 128);
1058 		ata_ident->tcg = 0;
1059 
1060 		ata_ident->capabilities1 = ATA_SUPPORT_DMA |
1061 			ATA_SUPPORT_LBA | ATA_SUPPORT_IORDY;
1062 		ata_ident->capabilities2 = (1 << 14);
1063 		ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88;
1064 		if (p->mult_sectors)
1065 			ata_ident->multi = (ATA_MULTI_VALID | p->mult_sectors);
1066 		if (sectors <= 0x0fffffff) {
1067 			ata_ident->lba_size_1 = sectors;
1068 			ata_ident->lba_size_2 = (sectors >> 16);
1069 		} else {
1070 			ata_ident->lba_size_1 = 0xffff;
1071 			ata_ident->lba_size_2 = 0x0fff;
1072 		}
1073 		ata_ident->mwdmamodes = 0x7;
1074 		if (p->xfermode & ATA_WDMA0)
1075 			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1076 		ata_ident->apiomodes = 0x3;
1077 		ata_ident->mwdmamin = 0x0078;
1078 		ata_ident->mwdmarec = 0x0078;
1079 		ata_ident->pioblind = 0x0078;
1080 		ata_ident->pioiordy = 0x0078;
1081 		ata_ident->support3 = 0;
1082 		ata_ident->queue = 31;
1083 		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
1084 			ATA_SUPPORT_NCQ);
1085 		ata_ident->satacapabilities2 = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
1086 			(p->ssts & ATA_SS_SPD_MASK) >> 3);
1087 		ata_ident->version_major = 0x3f0;
1088 		ata_ident->version_minor = 0x28;
1089 		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1090 			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1091 		ata_ident->support.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1092 			ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
1093 		ata_ident->support.extension = (1 << 14);
1094 		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1095 			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1096 		ata_ident->enabled.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1097 			ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
1098 		ata_ident->enabled.extension = (1 << 14);
1099 		ata_ident->udmamodes = 0x7f;
1100 		if (p->xfermode & ATA_UDMA0)
1101 			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1102 		ata_ident->lba_size48_1 = sectors;
1103 		ata_ident->lba_size48_2 = (sectors >> 16);
1104 		ata_ident->lba_size48_3 = (sectors >> 32);
1105 		ata_ident->lba_size48_4 = (sectors >> 48);
1106 
1107 		if (candelete && !ro) {
1108 			ata_ident->support3 |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
1109 			ata_ident->max_dsm_blocks = 1;
1110 			ata_ident->support_dsm = ATA_SUPPORT_DSM_TRIM;
1111 		}
1112 		ata_ident->pss = ATA_PSS_VALID_VALUE;
1113 		ata_ident->lsalign = 0x4000;
1114 		if (psectsz > sectsz) {
1115 			ata_ident->pss |= ATA_PSS_MULTLS;
1116 			ata_ident->pss |= ffsl(psectsz / sectsz) - 1;
1117 			ata_ident->lsalign |= (psectoff / sectsz);
1118 		}
1119 		if (sectsz > 512) {
1120 			ata_ident->pss |= ATA_PSS_LSSABOVE512;
1121 			ata_ident->lss_1 = sectsz / 2;
1122 			ata_ident->lss_2 = ((sectsz / 2) >> 16);
1123 		}
1124 		ata_ident->support2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1125 		ata_ident->enabled2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1126 		ata_ident->transport_major = 0x1020;
1127 		ata_ident->integrity = 0x00a5;
1128 	}
1129 	ahci_checksum((uint8_t*)ata_ident, sizeof(struct ata_params));
1130 }
1131 
1132 static void
1133 handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
1134 {
1135 	if (!p->atapi) {
1136 		ahci_write_fis_d2h(p, slot, cfis,
1137 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1138 	} else {
1139 		ahci_write_fis_piosetup(p);
1140 		write_prdt(p, slot, cfis, (void *)&p->ata_ident, sizeof(struct ata_params));
1141 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1142 	}
1143 }
1144 
1145 static void
1146 atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1147 {
1148 	uint8_t buf[36];
1149 	uint8_t *acmd;
1150 	unsigned int len;
1151 	uint32_t tfd;
1152 
1153 	acmd = cfis + 0x40;
1154 
1155 	if (acmd[1] & 1) {		/* VPD */
1156 		if (acmd[2] == 0) {	/* Supported VPD pages */
1157 			buf[0] = 0x05;
1158 			buf[1] = 0;
1159 			buf[2] = 0;
1160 			buf[3] = 1;
1161 			buf[4] = 0;
1162 			len = 4 + buf[3];
1163 		} else {
1164 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1165 			p->asc = 0x24;
1166 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1167 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1168 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1169 			return;
1170 		}
1171 	} else {
1172 		buf[0] = 0x05;
1173 		buf[1] = 0x80;
1174 		buf[2] = 0x00;
1175 		buf[3] = 0x21;
1176 		buf[4] = 31;
1177 		buf[5] = 0;
1178 		buf[6] = 0;
1179 		buf[7] = 0;
1180 		atapi_string(buf + 8, "BHYVE", 8);
1181 		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1182 		atapi_string(buf + 32, "001", 4);
1183 		len = sizeof(buf);
1184 	}
1185 
1186 	if (len > acmd[4])
1187 		len = acmd[4];
1188 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1189 	write_prdt(p, slot, cfis, buf, len);
1190 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1191 }
1192 
1193 static void
1194 atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1195 {
1196 	uint8_t buf[8];
1197 	uint64_t sectors;
1198 
1199 	sectors = blockif_size(p->bctx) / 2048;
1200 	be32enc(buf, sectors - 1);
1201 	be32enc(buf + 4, 2048);
1202 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1203 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1204 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1205 }
1206 
1207 static void
1208 atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1209 {
1210 	uint8_t *acmd;
1211 	uint8_t format;
1212 	unsigned int len;
1213 
1214 	acmd = cfis + 0x40;
1215 
1216 	len = be16dec(acmd + 7);
1217 	format = acmd[9] >> 6;
1218 	switch (format) {
1219 	case 0:
1220 	{
1221 		size_t size;
1222 		int msf;
1223 		uint64_t sectors;
1224 		uint8_t start_track, buf[20], *bp;
1225 
1226 		msf = (acmd[1] >> 1) & 1;
1227 		start_track = acmd[6];
1228 		if (start_track > 1 && start_track != 0xaa) {
1229 			uint32_t tfd;
1230 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1231 			p->asc = 0x24;
1232 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1233 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1234 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1235 			return;
1236 		}
1237 		bp = buf + 2;
1238 		*bp++ = 1;
1239 		*bp++ = 1;
1240 		if (start_track <= 1) {
1241 			*bp++ = 0;
1242 			*bp++ = 0x14;
1243 			*bp++ = 1;
1244 			*bp++ = 0;
1245 			if (msf) {
1246 				*bp++ = 0;
1247 				lba_to_msf(bp, 0);
1248 				bp += 3;
1249 			} else {
1250 				*bp++ = 0;
1251 				*bp++ = 0;
1252 				*bp++ = 0;
1253 				*bp++ = 0;
1254 			}
1255 		}
1256 		*bp++ = 0;
1257 		*bp++ = 0x14;
1258 		*bp++ = 0xaa;
1259 		*bp++ = 0;
1260 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1261 		sectors >>= 2;
1262 		if (msf) {
1263 			*bp++ = 0;
1264 			lba_to_msf(bp, sectors);
1265 			bp += 3;
1266 		} else {
1267 			be32enc(bp, sectors);
1268 			bp += 4;
1269 		}
1270 		size = bp - buf;
1271 		be16enc(buf, size - 2);
1272 		if (len > size)
1273 			len = size;
1274 		write_prdt(p, slot, cfis, buf, len);
1275 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1276 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1277 		break;
1278 	}
1279 	case 1:
1280 	{
1281 		uint8_t buf[12];
1282 
1283 		memset(buf, 0, sizeof(buf));
1284 		buf[1] = 0xa;
1285 		buf[2] = 0x1;
1286 		buf[3] = 0x1;
1287 		if (len > sizeof(buf))
1288 			len = sizeof(buf);
1289 		write_prdt(p, slot, cfis, buf, len);
1290 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1291 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1292 		break;
1293 	}
1294 	case 2:
1295 	{
1296 		size_t size;
1297 		int msf;
1298 		uint64_t sectors;
1299 		uint8_t *bp, buf[50];
1300 
1301 		msf = (acmd[1] >> 1) & 1;
1302 		bp = buf + 2;
1303 		*bp++ = 1;
1304 		*bp++ = 1;
1305 
1306 		*bp++ = 1;
1307 		*bp++ = 0x14;
1308 		*bp++ = 0;
1309 		*bp++ = 0xa0;
1310 		*bp++ = 0;
1311 		*bp++ = 0;
1312 		*bp++ = 0;
1313 		*bp++ = 0;
1314 		*bp++ = 1;
1315 		*bp++ = 0;
1316 		*bp++ = 0;
1317 
1318 		*bp++ = 1;
1319 		*bp++ = 0x14;
1320 		*bp++ = 0;
1321 		*bp++ = 0xa1;
1322 		*bp++ = 0;
1323 		*bp++ = 0;
1324 		*bp++ = 0;
1325 		*bp++ = 0;
1326 		*bp++ = 1;
1327 		*bp++ = 0;
1328 		*bp++ = 0;
1329 
1330 		*bp++ = 1;
1331 		*bp++ = 0x14;
1332 		*bp++ = 0;
1333 		*bp++ = 0xa2;
1334 		*bp++ = 0;
1335 		*bp++ = 0;
1336 		*bp++ = 0;
1337 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1338 		sectors >>= 2;
1339 		if (msf) {
1340 			*bp++ = 0;
1341 			lba_to_msf(bp, sectors);
1342 			bp += 3;
1343 		} else {
1344 			be32enc(bp, sectors);
1345 			bp += 4;
1346 		}
1347 
1348 		*bp++ = 1;
1349 		*bp++ = 0x14;
1350 		*bp++ = 0;
1351 		*bp++ = 1;
1352 		*bp++ = 0;
1353 		*bp++ = 0;
1354 		*bp++ = 0;
1355 		if (msf) {
1356 			*bp++ = 0;
1357 			lba_to_msf(bp, 0);
1358 			bp += 3;
1359 		} else {
1360 			*bp++ = 0;
1361 			*bp++ = 0;
1362 			*bp++ = 0;
1363 			*bp++ = 0;
1364 		}
1365 
1366 		size = bp - buf;
1367 		be16enc(buf, size - 2);
1368 		if (len > size)
1369 			len = size;
1370 		write_prdt(p, slot, cfis, buf, len);
1371 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1372 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1373 		break;
1374 	}
1375 	default:
1376 	{
1377 		uint32_t tfd;
1378 
1379 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1380 		p->asc = 0x24;
1381 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1382 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1383 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1384 		break;
1385 	}
1386 	}
1387 }
1388 
1389 static void
1390 atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1391 {
1392 	uint8_t buf[16];
1393 
1394 	memset(buf, 0, sizeof(buf));
1395 	buf[3] = 8;
1396 
1397 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1398 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1399 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1400 }
1401 
1402 static void
1403 atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1404 {
1405 	struct ahci_ioreq *aior;
1406 	struct ahci_cmd_hdr *hdr;
1407 	struct ahci_prdt_entry *prdt;
1408 	struct blockif_req *breq;
1409 	uint8_t *acmd;
1410 	uint64_t lba;
1411 	uint32_t len;
1412 	int err;
1413 
1414 	acmd = cfis + 0x40;
1415 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1416 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1417 
1418 	lba = be32dec(acmd + 2);
1419 	if (acmd[0] == READ_10)
1420 		len = be16dec(acmd + 7);
1421 	else
1422 		len = be32dec(acmd + 6);
1423 	if (len == 0) {
1424 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1425 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1426 	}
1427 	lba *= 2048;
1428 	len *= 2048;
1429 
1430 	/*
1431 	 * Pull request off free list
1432 	 */
1433 	aior = STAILQ_FIRST(&p->iofhd);
1434 	assert(aior != NULL);
1435 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1436 	aior->cfis = cfis;
1437 	aior->slot = slot;
1438 	aior->len = len;
1439 	aior->done = done;
1440 	aior->readop = 1;
1441 	breq = &aior->io_req;
1442 	breq->br_offset = lba + done;
1443 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1444 
1445 	/* Mark this command in-flight. */
1446 	p->pending |= 1 << slot;
1447 
1448 	/* Stuff request onto busy list. */
1449 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1450 
1451 	err = blockif_read(p->bctx, breq);
1452 	assert(err == 0);
1453 }
1454 
1455 static void
1456 atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1457 {
1458 	uint8_t buf[64];
1459 	uint8_t *acmd;
1460 	unsigned int len;
1461 
1462 	acmd = cfis + 0x40;
1463 	len = acmd[4];
1464 	if (len > sizeof(buf))
1465 		len = sizeof(buf);
1466 	memset(buf, 0, len);
1467 	buf[0] = 0x70 | (1 << 7);
1468 	buf[2] = p->sense_key;
1469 	buf[7] = 10;
1470 	buf[12] = p->asc;
1471 	write_prdt(p, slot, cfis, buf, len);
1472 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1473 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1474 }
1475 
1476 static void
1477 atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1478 {
1479 	uint8_t *acmd = cfis + 0x40;
1480 	uint32_t tfd;
1481 
1482 	switch (acmd[4] & 3) {
1483 	case 0:
1484 	case 1:
1485 	case 3:
1486 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1487 		tfd = ATA_S_READY | ATA_S_DSC;
1488 		break;
1489 	case 2:
1490 		/* TODO eject media */
1491 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1492 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1493 		p->asc = 0x53;
1494 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1495 		break;
1496 	}
1497 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1498 }
1499 
1500 static void
1501 atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1502 {
1503 	uint8_t *acmd;
1504 	uint32_t tfd;
1505 	uint8_t pc, code;
1506 	unsigned int len;
1507 
1508 	acmd = cfis + 0x40;
1509 	len = be16dec(acmd + 7);
1510 	pc = acmd[2] >> 6;
1511 	code = acmd[2] & 0x3f;
1512 
1513 	switch (pc) {
1514 	case 0:
1515 		switch (code) {
1516 		case MODEPAGE_RW_ERROR_RECOVERY:
1517 		{
1518 			uint8_t buf[16];
1519 
1520 			if (len > sizeof(buf))
1521 				len = sizeof(buf);
1522 
1523 			memset(buf, 0, sizeof(buf));
1524 			be16enc(buf, 16 - 2);
1525 			buf[2] = 0x70;
1526 			buf[8] = 0x01;
1527 			buf[9] = 16 - 10;
1528 			buf[11] = 0x05;
1529 			write_prdt(p, slot, cfis, buf, len);
1530 			tfd = ATA_S_READY | ATA_S_DSC;
1531 			break;
1532 		}
1533 		case MODEPAGE_CD_CAPABILITIES:
1534 		{
1535 			uint8_t buf[30];
1536 
1537 			if (len > sizeof(buf))
1538 				len = sizeof(buf);
1539 
1540 			memset(buf, 0, sizeof(buf));
1541 			be16enc(buf, 30 - 2);
1542 			buf[2] = 0x70;
1543 			buf[8] = 0x2A;
1544 			buf[9] = 30 - 10;
1545 			buf[10] = 0x08;
1546 			buf[12] = 0x71;
1547 			be16enc(&buf[18], 2);
1548 			be16enc(&buf[20], 512);
1549 			write_prdt(p, slot, cfis, buf, len);
1550 			tfd = ATA_S_READY | ATA_S_DSC;
1551 			break;
1552 		}
1553 		default:
1554 			goto error;
1555 			break;
1556 		}
1557 		break;
1558 	case 3:
1559 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1560 		p->asc = 0x39;
1561 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1562 		break;
1563 error:
1564 	case 1:
1565 	case 2:
1566 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1567 		p->asc = 0x24;
1568 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1569 		break;
1570 	}
1571 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1572 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1573 }
1574 
1575 static void
1576 atapi_get_event_status_notification(struct ahci_port *p, int slot,
1577     uint8_t *cfis)
1578 {
1579 	uint8_t *acmd;
1580 	uint32_t tfd;
1581 
1582 	acmd = cfis + 0x40;
1583 
1584 	/* we don't support asynchronous operation */
1585 	if (!(acmd[1] & 1)) {
1586 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1587 		p->asc = 0x24;
1588 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1589 	} else {
1590 		uint8_t buf[8];
1591 		unsigned int len;
1592 
1593 		len = be16dec(acmd + 7);
1594 		if (len > sizeof(buf))
1595 			len = sizeof(buf);
1596 
1597 		memset(buf, 0, sizeof(buf));
1598 		be16enc(buf, 8 - 2);
1599 		buf[2] = 0x04;
1600 		buf[3] = 0x10;
1601 		buf[5] = 0x02;
1602 		write_prdt(p, slot, cfis, buf, len);
1603 		tfd = ATA_S_READY | ATA_S_DSC;
1604 	}
1605 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1606 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1607 }
1608 
1609 static void
1610 handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1611 {
1612 	uint8_t *acmd;
1613 
1614 	acmd = cfis + 0x40;
1615 
1616 #ifdef AHCI_DEBUG
1617 	{
1618 		int i;
1619 		DPRINTF("ACMD:");
1620 		for (i = 0; i < 16; i++)
1621 			DPRINTF("%02x ", acmd[i]);
1622 		DPRINTF("");
1623 	}
1624 #endif
1625 
1626 	switch (acmd[0]) {
1627 	case TEST_UNIT_READY:
1628 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1629 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1630 		break;
1631 	case INQUIRY:
1632 		atapi_inquiry(p, slot, cfis);
1633 		break;
1634 	case READ_CAPACITY:
1635 		atapi_read_capacity(p, slot, cfis);
1636 		break;
1637 	case PREVENT_ALLOW:
1638 		/* TODO */
1639 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1640 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1641 		break;
1642 	case READ_TOC:
1643 		atapi_read_toc(p, slot, cfis);
1644 		break;
1645 	case REPORT_LUNS:
1646 		atapi_report_luns(p, slot, cfis);
1647 		break;
1648 	case READ_10:
1649 	case READ_12:
1650 		atapi_read(p, slot, cfis, 0);
1651 		break;
1652 	case REQUEST_SENSE:
1653 		atapi_request_sense(p, slot, cfis);
1654 		break;
1655 	case START_STOP_UNIT:
1656 		atapi_start_stop_unit(p, slot, cfis);
1657 		break;
1658 	case MODE_SENSE_10:
1659 		atapi_mode_sense(p, slot, cfis);
1660 		break;
1661 	case GET_EVENT_STATUS_NOTIFICATION:
1662 		atapi_get_event_status_notification(p, slot, cfis);
1663 		break;
1664 	default:
1665 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1666 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1667 		p->asc = 0x20;
1668 		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1669 				ATA_S_READY | ATA_S_ERROR);
1670 		break;
1671 	}
1672 }
1673 
1674 static void
1675 ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1676 {
1677 
1678 	p->tfd |= ATA_S_BUSY;
1679 	switch (cfis[2]) {
1680 	case ATA_ATA_IDENTIFY:
1681 		handle_identify(p, slot, cfis);
1682 		break;
1683 	case ATA_SETFEATURES:
1684 	{
1685 		switch (cfis[3]) {
1686 		case ATA_SF_ENAB_SATA_SF:
1687 			switch (cfis[12]) {
1688 			case ATA_SATA_SF_AN:
1689 				p->tfd = ATA_S_DSC | ATA_S_READY;
1690 				break;
1691 			default:
1692 				p->tfd = ATA_S_ERROR | ATA_S_READY;
1693 				p->tfd |= (ATA_ERROR_ABORT << 8);
1694 				break;
1695 			}
1696 			break;
1697 		case ATA_SF_ENAB_WCACHE:
1698 		case ATA_SF_DIS_WCACHE:
1699 		case ATA_SF_ENAB_RCACHE:
1700 		case ATA_SF_DIS_RCACHE:
1701 			p->tfd = ATA_S_DSC | ATA_S_READY;
1702 			break;
1703 		case ATA_SF_SETXFER:
1704 		{
1705 			switch (cfis[12] & 0xf8) {
1706 			case ATA_PIO:
1707 			case ATA_PIO0:
1708 				break;
1709 			case ATA_WDMA0:
1710 			case ATA_UDMA0:
1711 				p->xfermode = (cfis[12] & 0x7);
1712 				break;
1713 			}
1714 			p->tfd = ATA_S_DSC | ATA_S_READY;
1715 			break;
1716 		}
1717 		default:
1718 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1719 			p->tfd |= (ATA_ERROR_ABORT << 8);
1720 			break;
1721 		}
1722 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1723 		break;
1724 	}
1725 	case ATA_SET_MULTI:
1726 		if (cfis[12] != 0 &&
1727 			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1728 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1729 			p->tfd |= (ATA_ERROR_ABORT << 8);
1730 		} else {
1731 			p->mult_sectors = cfis[12];
1732 			p->tfd = ATA_S_DSC | ATA_S_READY;
1733 		}
1734 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1735 		break;
1736 	case ATA_READ:
1737 	case ATA_WRITE:
1738 	case ATA_READ48:
1739 	case ATA_WRITE48:
1740 	case ATA_READ_MUL:
1741 	case ATA_WRITE_MUL:
1742 	case ATA_READ_MUL48:
1743 	case ATA_WRITE_MUL48:
1744 	case ATA_READ_DMA:
1745 	case ATA_WRITE_DMA:
1746 	case ATA_READ_DMA48:
1747 	case ATA_WRITE_DMA48:
1748 	case ATA_READ_FPDMA_QUEUED:
1749 	case ATA_WRITE_FPDMA_QUEUED:
1750 		ahci_handle_rw(p, slot, cfis, 0);
1751 		break;
1752 	case ATA_FLUSHCACHE:
1753 	case ATA_FLUSHCACHE48:
1754 		ahci_handle_flush(p, slot, cfis);
1755 		break;
1756 	case ATA_DATA_SET_MANAGEMENT:
1757 		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1758 		    cfis[13] == 0 && cfis[12] == 1) {
1759 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1760 			break;
1761 		}
1762 		ahci_write_fis_d2h(p, slot, cfis,
1763 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1764 		break;
1765 	case ATA_SEND_FPDMA_QUEUED:
1766 		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1767 		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1768 		    cfis[11] == 0 && cfis[3] == 1) {
1769 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1770 			break;
1771 		}
1772 		ahci_write_fis_d2h(p, slot, cfis,
1773 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1774 		break;
1775 	case ATA_READ_LOG_EXT:
1776 	case ATA_READ_LOG_DMA_EXT:
1777 		ahci_handle_read_log(p, slot, cfis);
1778 		break;
1779 	case ATA_SECURITY_FREEZE_LOCK:
1780 	case ATA_SMART_CMD:
1781 	case ATA_NOP:
1782 		ahci_write_fis_d2h(p, slot, cfis,
1783 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1784 		break;
1785 	case ATA_CHECK_POWER_MODE:
1786 		cfis[12] = 0xff;	/* always on */
1787 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1788 		break;
1789 	case ATA_STANDBY_CMD:
1790 	case ATA_STANDBY_IMMEDIATE:
1791 	case ATA_IDLE_CMD:
1792 	case ATA_IDLE_IMMEDIATE:
1793 	case ATA_SLEEP:
1794 	case ATA_READ_VERIFY:
1795 	case ATA_READ_VERIFY48:
1796 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1797 		break;
1798 	case ATA_ATAPI_IDENTIFY:
1799 		handle_atapi_identify(p, slot, cfis);
1800 		break;
1801 	case ATA_PACKET_CMD:
1802 		if (!p->atapi) {
1803 			ahci_write_fis_d2h(p, slot, cfis,
1804 			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1805 		} else
1806 			handle_packet_cmd(p, slot, cfis);
1807 		break;
1808 	default:
1809 		WPRINTF("Unsupported cmd:%02x", cfis[2]);
1810 		ahci_write_fis_d2h(p, slot, cfis,
1811 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1812 		break;
1813 	}
1814 }
1815 
1816 static void
1817 ahci_handle_slot(struct ahci_port *p, int slot)
1818 {
1819 	struct ahci_cmd_hdr *hdr;
1820 #ifdef AHCI_DEBUG
1821 	struct ahci_prdt_entry *prdt;
1822 #endif
1823 	struct pci_ahci_softc *sc;
1824 	uint8_t *cfis;
1825 #ifdef AHCI_DEBUG
1826 	int cfl, i;
1827 #endif
1828 
1829 	sc = p->pr_sc;
1830 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1831 #ifdef AHCI_DEBUG
1832 	cfl = (hdr->flags & 0x1f) * 4;
1833 #endif
1834 	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1835 			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1836 #ifdef AHCI_DEBUG
1837 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1838 
1839 	DPRINTF("cfis:");
1840 	for (i = 0; i < cfl; i++) {
1841 		if (i % 10 == 0)
1842 			DPRINTF("");
1843 		DPRINTF("%02x ", cfis[i]);
1844 	}
1845 	DPRINTF("");
1846 
1847 	for (i = 0; i < hdr->prdtl; i++) {
1848 		DPRINTF("%d@%08"PRIx64"", prdt->dbc & 0x3fffff, prdt->dba);
1849 		prdt++;
1850 	}
1851 #endif
1852 
1853 	if (cfis[0] != FIS_TYPE_REGH2D) {
1854 		WPRINTF("Not a H2D FIS:%02x", cfis[0]);
1855 		return;
1856 	}
1857 
1858 	if (cfis[1] & 0x80) {
1859 		ahci_handle_cmd(p, slot, cfis);
1860 	} else {
1861 		if (cfis[15] & (1 << 2))
1862 			p->reset = 1;
1863 		else if (p->reset) {
1864 			p->reset = 0;
1865 			ahci_port_reset(p);
1866 		}
1867 		p->ci &= ~(1 << slot);
1868 	}
1869 }
1870 
1871 static void
1872 ahci_handle_port(struct ahci_port *p)
1873 {
1874 
1875 	if (!(p->cmd & AHCI_P_CMD_ST))
1876 		return;
1877 
1878 	/*
1879 	 * Search for any new commands to issue ignoring those that
1880 	 * are already in-flight.  Stop if device is busy or in error.
1881 	 */
1882 	for (; (p->ci & ~p->pending) != 0; p->ccs = ((p->ccs + 1) & 31)) {
1883 		if ((p->tfd & (ATA_S_BUSY | ATA_S_DRQ)) != 0)
1884 			break;
1885 		if (p->waitforclear)
1886 			break;
1887 		if ((p->ci & ~p->pending & (1 << p->ccs)) != 0) {
1888 			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1889 			p->cmd |= p->ccs << AHCI_P_CMD_CCS_SHIFT;
1890 			ahci_handle_slot(p, p->ccs);
1891 		}
1892 	}
1893 }
1894 
1895 /*
1896  * blockif callback routine - this runs in the context of the blockif
1897  * i/o thread, so the mutex needs to be acquired.
1898  */
1899 static void
1900 ata_ioreq_cb(struct blockif_req *br, int err)
1901 {
1902 	struct ahci_cmd_hdr *hdr;
1903 	struct ahci_ioreq *aior;
1904 	struct ahci_port *p;
1905 	struct pci_ahci_softc *sc;
1906 	uint32_t tfd;
1907 	uint8_t *cfis;
1908 	int slot, ncq, dsm;
1909 
1910 	DPRINTF("%s %d", __func__, err);
1911 
1912 	ncq = dsm = 0;
1913 	aior = br->br_param;
1914 	p = aior->io_pr;
1915 	cfis = aior->cfis;
1916 	slot = aior->slot;
1917 	sc = p->pr_sc;
1918 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1919 
1920 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1921 	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1922 	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1923 		ncq = 1;
1924 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1925 	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1926 	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1927 		dsm = 1;
1928 
1929 	pthread_mutex_lock(&sc->mtx);
1930 
1931 	/*
1932 	 * Delete the blockif request from the busy list
1933 	 */
1934 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1935 
1936 	/*
1937 	 * Move the blockif request back to the free list
1938 	 */
1939 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1940 
1941 	if (!err)
1942 		hdr->prdbc = aior->done;
1943 
1944 	if (!err && aior->more) {
1945 		if (dsm)
1946 			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1947 		else
1948 			ahci_handle_rw(p, slot, cfis, aior->done);
1949 		goto out;
1950 	}
1951 
1952 	if (!err)
1953 		tfd = ATA_S_READY | ATA_S_DSC;
1954 	else
1955 		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1956 	if (ncq)
1957 		ahci_write_fis_sdb(p, slot, cfis, tfd);
1958 	else
1959 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1960 
1961 	/*
1962 	 * This command is now complete.
1963 	 */
1964 	p->pending &= ~(1 << slot);
1965 
1966 	ahci_check_stopped(p);
1967 	ahci_handle_port(p);
1968 out:
1969 	pthread_mutex_unlock(&sc->mtx);
1970 	DPRINTF("%s exit", __func__);
1971 }
1972 
1973 static void
1974 atapi_ioreq_cb(struct blockif_req *br, int err)
1975 {
1976 	struct ahci_cmd_hdr *hdr;
1977 	struct ahci_ioreq *aior;
1978 	struct ahci_port *p;
1979 	struct pci_ahci_softc *sc;
1980 	uint8_t *cfis;
1981 	uint32_t tfd;
1982 	int slot;
1983 
1984 	DPRINTF("%s %d", __func__, err);
1985 
1986 	aior = br->br_param;
1987 	p = aior->io_pr;
1988 	cfis = aior->cfis;
1989 	slot = aior->slot;
1990 	sc = p->pr_sc;
1991 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1992 
1993 	pthread_mutex_lock(&sc->mtx);
1994 
1995 	/*
1996 	 * Delete the blockif request from the busy list
1997 	 */
1998 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1999 
2000 	/*
2001 	 * Move the blockif request back to the free list
2002 	 */
2003 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
2004 
2005 	if (!err)
2006 		hdr->prdbc = aior->done;
2007 
2008 	if (!err && aior->more) {
2009 		atapi_read(p, slot, cfis, aior->done);
2010 		goto out;
2011 	}
2012 
2013 	if (!err) {
2014 		tfd = ATA_S_READY | ATA_S_DSC;
2015 	} else {
2016 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
2017 		p->asc = 0x21;
2018 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
2019 	}
2020 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
2021 	ahci_write_fis_d2h(p, slot, cfis, tfd);
2022 
2023 	/*
2024 	 * This command is now complete.
2025 	 */
2026 	p->pending &= ~(1 << slot);
2027 
2028 	ahci_check_stopped(p);
2029 	ahci_handle_port(p);
2030 out:
2031 	pthread_mutex_unlock(&sc->mtx);
2032 	DPRINTF("%s exit", __func__);
2033 }
2034 
2035 static void
2036 pci_ahci_ioreq_init(struct ahci_port *pr)
2037 {
2038 	struct ahci_ioreq *vr;
2039 	int i;
2040 
2041 	pr->ioqsz = blockif_queuesz(pr->bctx);
2042 	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
2043 	STAILQ_INIT(&pr->iofhd);
2044 
2045 	/*
2046 	 * Add all i/o request entries to the free queue
2047 	 */
2048 	for (i = 0; i < pr->ioqsz; i++) {
2049 		vr = &pr->ioreq[i];
2050 		vr->io_pr = pr;
2051 		if (!pr->atapi)
2052 			vr->io_req.br_callback = ata_ioreq_cb;
2053 		else
2054 			vr->io_req.br_callback = atapi_ioreq_cb;
2055 		vr->io_req.br_param = vr;
2056 		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
2057 	}
2058 
2059 	TAILQ_INIT(&pr->iobhd);
2060 }
2061 
2062 static void
2063 pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2064 {
2065 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2066 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2067 	struct ahci_port *p = &sc->port[port];
2068 
2069 	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"",
2070 		port, offset, value);
2071 
2072 	switch (offset) {
2073 	case AHCI_P_CLB:
2074 		p->clb = value;
2075 		break;
2076 	case AHCI_P_CLBU:
2077 		p->clbu = value;
2078 		break;
2079 	case AHCI_P_FB:
2080 		p->fb = value;
2081 		break;
2082 	case AHCI_P_FBU:
2083 		p->fbu = value;
2084 		break;
2085 	case AHCI_P_IS:
2086 		p->is &= ~value;
2087 		ahci_port_intr(p);
2088 		break;
2089 	case AHCI_P_IE:
2090 		p->ie = value & 0xFDC000FF;
2091 		ahci_port_intr(p);
2092 		break;
2093 	case AHCI_P_CMD:
2094 	{
2095 		p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2096 		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2097 		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2098 		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK);
2099 		p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2100 		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2101 		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2102 		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value;
2103 
2104 		if (!(value & AHCI_P_CMD_ST)) {
2105 			ahci_port_stop(p);
2106 		} else {
2107 			uint64_t clb;
2108 
2109 			p->cmd |= AHCI_P_CMD_CR;
2110 			clb = (uint64_t)p->clbu << 32 | p->clb;
2111 			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
2112 					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
2113 		}
2114 
2115 		if (value & AHCI_P_CMD_FRE) {
2116 			uint64_t fb;
2117 
2118 			p->cmd |= AHCI_P_CMD_FR;
2119 			fb = (uint64_t)p->fbu << 32 | p->fb;
2120 			/* we don't support FBSCP, so rfis size is 256Bytes */
2121 			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
2122 		} else {
2123 			p->cmd &= ~AHCI_P_CMD_FR;
2124 		}
2125 
2126 		if (value & AHCI_P_CMD_CLO) {
2127 			p->tfd &= ~(ATA_S_BUSY | ATA_S_DRQ);
2128 			p->cmd &= ~AHCI_P_CMD_CLO;
2129 		}
2130 
2131 		if (value & AHCI_P_CMD_ICC_MASK) {
2132 			p->cmd &= ~AHCI_P_CMD_ICC_MASK;
2133 		}
2134 
2135 		ahci_handle_port(p);
2136 		break;
2137 	}
2138 	case AHCI_P_TFD:
2139 	case AHCI_P_SIG:
2140 	case AHCI_P_SSTS:
2141 		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"", offset);
2142 		break;
2143 	case AHCI_P_SCTL:
2144 		p->sctl = value;
2145 		if (!(p->cmd & AHCI_P_CMD_ST)) {
2146 			if (value & ATA_SC_DET_RESET)
2147 				ahci_port_reset(p);
2148 		}
2149 		break;
2150 	case AHCI_P_SERR:
2151 		p->serr &= ~value;
2152 		break;
2153 	case AHCI_P_SACT:
2154 		p->sact |= value;
2155 		break;
2156 	case AHCI_P_CI:
2157 		p->ci |= value;
2158 		ahci_handle_port(p);
2159 		break;
2160 	case AHCI_P_SNTF:
2161 	case AHCI_P_FBS:
2162 	default:
2163 		break;
2164 	}
2165 }
2166 
2167 static void
2168 pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2169 {
2170 	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"",
2171 		offset, value);
2172 
2173 	switch (offset) {
2174 	case AHCI_CAP:
2175 	case AHCI_PI:
2176 	case AHCI_VS:
2177 	case AHCI_CAP2:
2178 		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"", offset);
2179 		break;
2180 	case AHCI_GHC:
2181 		if (value & AHCI_GHC_HR) {
2182 			ahci_reset(sc);
2183 			break;
2184 		}
2185 		if (value & AHCI_GHC_IE)
2186 			sc->ghc |= AHCI_GHC_IE;
2187 		else
2188 			sc->ghc &= ~AHCI_GHC_IE;
2189 		ahci_generate_intr(sc, 0xffffffff);
2190 		break;
2191 	case AHCI_IS:
2192 		sc->is &= ~value;
2193 		ahci_generate_intr(sc, value);
2194 		break;
2195 	default:
2196 		break;
2197 	}
2198 }
2199 
2200 static void
2201 pci_ahci_write(struct pci_devinst *pi, int baridx, uint64_t offset, int size,
2202     uint64_t value)
2203 {
2204 	struct pci_ahci_softc *sc = pi->pi_arg;
2205 
2206 	assert(baridx == 5);
2207 	assert((offset % 4) == 0 && size == 4);
2208 
2209 	pthread_mutex_lock(&sc->mtx);
2210 
2211 	if (offset < AHCI_OFFSET)
2212 		pci_ahci_host_write(sc, offset, value);
2213 	else if (offset < (uint64_t)AHCI_OFFSET + sc->ports * AHCI_STEP)
2214 		pci_ahci_port_write(sc, offset, value);
2215 	else
2216 		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"", offset);
2217 
2218 	pthread_mutex_unlock(&sc->mtx);
2219 }
2220 
2221 static uint64_t
2222 pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2223 {
2224 	uint32_t value;
2225 
2226 	switch (offset) {
2227 	case AHCI_CAP:
2228 	case AHCI_GHC:
2229 	case AHCI_IS:
2230 	case AHCI_PI:
2231 	case AHCI_VS:
2232 	case AHCI_CCCC:
2233 	case AHCI_CCCP:
2234 	case AHCI_EM_LOC:
2235 	case AHCI_EM_CTL:
2236 	case AHCI_CAP2:
2237 	{
2238 		uint32_t *p = &sc->cap;
2239 		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2240 		value = *p;
2241 		break;
2242 	}
2243 	default:
2244 		value = 0;
2245 		break;
2246 	}
2247 	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x",
2248 		offset, value);
2249 
2250 	return (value);
2251 }
2252 
2253 static uint64_t
2254 pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2255 {
2256 	uint32_t value;
2257 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2258 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2259 
2260 	switch (offset) {
2261 	case AHCI_P_CLB:
2262 	case AHCI_P_CLBU:
2263 	case AHCI_P_FB:
2264 	case AHCI_P_FBU:
2265 	case AHCI_P_IS:
2266 	case AHCI_P_IE:
2267 	case AHCI_P_CMD:
2268 	case AHCI_P_TFD:
2269 	case AHCI_P_SIG:
2270 	case AHCI_P_SSTS:
2271 	case AHCI_P_SCTL:
2272 	case AHCI_P_SERR:
2273 	case AHCI_P_SACT:
2274 	case AHCI_P_CI:
2275 	case AHCI_P_SNTF:
2276 	case AHCI_P_FBS:
2277 	{
2278 		uint32_t *p= &sc->port[port].clb;
2279 		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2280 		value = *p;
2281 		break;
2282 	}
2283 	default:
2284 		value = 0;
2285 		break;
2286 	}
2287 
2288 	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x",
2289 		port, offset, value);
2290 
2291 	return value;
2292 }
2293 
2294 static uint64_t
2295 pci_ahci_read(struct pci_devinst *pi, int baridx, uint64_t regoff, int size)
2296 {
2297 	struct pci_ahci_softc *sc = pi->pi_arg;
2298 	uint64_t offset;
2299 	uint32_t value;
2300 
2301 	assert(baridx == 5);
2302 	assert(size == 1 || size == 2 || size == 4);
2303 	assert((regoff & (size - 1)) == 0);
2304 
2305 	pthread_mutex_lock(&sc->mtx);
2306 
2307 	offset = regoff & ~0x3;	    /* round down to a multiple of 4 bytes */
2308 	if (offset < AHCI_OFFSET)
2309 		value = pci_ahci_host_read(sc, offset);
2310 	else if (offset < (uint64_t)AHCI_OFFSET + sc->ports * AHCI_STEP)
2311 		value = pci_ahci_port_read(sc, offset);
2312 	else {
2313 		value = 0;
2314 		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"",
2315 		    regoff);
2316 	}
2317 	value >>= 8 * (regoff & 0x3);
2318 
2319 	pthread_mutex_unlock(&sc->mtx);
2320 
2321 	return (value);
2322 }
2323 
2324 /*
2325  * Each AHCI controller has a "port" node which contains nodes for
2326  * each port named after the decimal number of the port (no leading
2327  * zeroes).  Port nodes contain a "type" ("hd" or "cd"), as well as
2328  * options for blockif.  For example:
2329  *
2330  * pci.0.1.0
2331  *          .device="ahci"
2332  *          .port
2333  *               .0
2334  *                 .type="hd"
2335  *                 .path="/path/to/image"
2336  */
2337 static int
2338 pci_ahci_legacy_config_port(nvlist_t *nvl, int port, const char *type,
2339     const char *opts)
2340 {
2341 	char node_name[sizeof("XX")];
2342 	nvlist_t *port_nvl;
2343 
2344 	snprintf(node_name, sizeof(node_name), "%d", port);
2345 	port_nvl = create_relative_config_node(nvl, node_name);
2346 	set_config_value_node(port_nvl, "type", type);
2347 	return (blockif_legacy_config(port_nvl, opts));
2348 }
2349 
2350 static int
2351 pci_ahci_legacy_config(nvlist_t *nvl, const char *opts)
2352 {
2353 	nvlist_t *ports_nvl;
2354 	const char *type;
2355 	char *next, *next2, *str, *tofree;
2356 	int p, ret;
2357 
2358 	if (opts == NULL)
2359 		return (0);
2360 
2361 	ports_nvl = create_relative_config_node(nvl, "port");
2362 	ret = 1;
2363 	tofree = str = strdup(opts);
2364 	for (p = 0; p < MAX_PORTS && str != NULL; p++, str = next) {
2365 		/* Identify and cut off type of present port. */
2366 		if (strncmp(str, "hd:", 3) == 0) {
2367 			type = "hd";
2368 			str += 3;
2369 		} else if (strncmp(str, "cd:", 3) == 0) {
2370 			type = "cd";
2371 			str += 3;
2372 		} else
2373 			type = NULL;
2374 
2375 		/* Find and cut off the next port options. */
2376 		next = strstr(str, ",hd:");
2377 		next2 = strstr(str, ",cd:");
2378 		if (next == NULL || (next2 != NULL && next2 < next))
2379 			next = next2;
2380 		if (next != NULL) {
2381 			next[0] = 0;
2382 			next++;
2383 		}
2384 
2385 		if (str[0] == 0)
2386 			continue;
2387 
2388 		if (type == NULL) {
2389 			EPRINTLN("Missing or invalid type for port %d: \"%s\"",
2390 			    p, str);
2391 			goto out;
2392 		}
2393 
2394 		if (pci_ahci_legacy_config_port(ports_nvl, p, type, str) != 0)
2395 			goto out;
2396 	}
2397 	ret = 0;
2398 out:
2399 	free(tofree);
2400 	return (ret);
2401 }
2402 
2403 static int
2404 pci_ahci_cd_legacy_config(nvlist_t *nvl, const char *opts)
2405 {
2406 	nvlist_t *ports_nvl;
2407 
2408 	ports_nvl = create_relative_config_node(nvl, "port");
2409 	return (pci_ahci_legacy_config_port(ports_nvl, 0, "cd", opts));
2410 }
2411 
2412 static int
2413 pci_ahci_hd_legacy_config(nvlist_t *nvl, const char *opts)
2414 {
2415 	nvlist_t *ports_nvl;
2416 
2417 	ports_nvl = create_relative_config_node(nvl, "port");
2418 	return (pci_ahci_legacy_config_port(ports_nvl, 0, "hd", opts));
2419 }
2420 
2421 static int
2422 pci_ahci_init(struct pci_devinst *pi, nvlist_t *nvl)
2423 {
2424 	char bident[sizeof("XXX:XXX:XXX")];
2425 	char node_name[sizeof("XX")];
2426 	struct blockif_ctxt *bctxt;
2427 	struct pci_ahci_softc *sc;
2428 	int atapi, ret, slots, p;
2429 	MD5_CTX mdctx;
2430 	u_char digest[16];
2431 	const char *path, *type, *value;
2432 	nvlist_t *ports_nvl, *port_nvl;
2433 
2434 	ret = 0;
2435 
2436 #ifdef AHCI_DEBUG
2437 	dbg = fopen("/tmp/log", "w+");
2438 #endif
2439 
2440 	sc = calloc(1, sizeof(struct pci_ahci_softc));
2441 	pi->pi_arg = sc;
2442 	sc->asc_pi = pi;
2443 	pthread_mutex_init(&sc->mtx, NULL);
2444 	sc->ports = 0;
2445 	sc->pi = 0;
2446 	slots = 32;
2447 
2448 	ports_nvl = find_relative_config_node(nvl, "port");
2449 	for (p = 0; ports_nvl != NULL && p < MAX_PORTS; p++) {
2450 		struct ata_params *ata_ident = &sc->port[p].ata_ident;
2451 		char ident[AHCI_PORT_IDENT];
2452 
2453 		snprintf(node_name, sizeof(node_name), "%d", p);
2454 		port_nvl = find_relative_config_node(ports_nvl, node_name);
2455 		if (port_nvl == NULL)
2456 			continue;
2457 
2458 		type = get_config_value_node(port_nvl, "type");
2459 		if (type == NULL)
2460 			continue;
2461 
2462 		if (strcmp(type, "hd") == 0)
2463 			atapi = 0;
2464 		else
2465 			atapi = 1;
2466 
2467 		/*
2468 		 * Attempt to open the backing image. Use the PCI slot/func
2469 		 * and the port number for the identifier string.
2470 		 */
2471 		snprintf(bident, sizeof(bident), "%u:%u:%u", pi->pi_slot,
2472 		    pi->pi_func, p);
2473 
2474 		bctxt = blockif_open(port_nvl, bident);
2475 		if (bctxt == NULL) {
2476 			sc->ports = p;
2477 			ret = 1;
2478 			goto open_fail;
2479 		}
2480 		sc->port[p].bctx = bctxt;
2481 		sc->port[p].pr_sc = sc;
2482 		sc->port[p].port = p;
2483 		sc->port[p].atapi = atapi;
2484 
2485 		/*
2486 		 * Create an identifier for the backing file.
2487 		 * Use parts of the md5 sum of the filename
2488 		 */
2489 		path = get_config_value_node(port_nvl, "path");
2490 		MD5Init(&mdctx);
2491 		MD5Update(&mdctx, path, strlen(path));
2492 		MD5Final(digest, &mdctx);
2493 		snprintf(ident, AHCI_PORT_IDENT,
2494 			"BHYVE-%02X%02X-%02X%02X-%02X%02X",
2495 			digest[0], digest[1], digest[2], digest[3], digest[4],
2496 			digest[5]);
2497 
2498 		memset(ata_ident, 0, sizeof(struct ata_params));
2499 		ata_string((uint8_t*)&ata_ident->serial, ident, 20);
2500 		ata_string((uint8_t*)&ata_ident->revision, "001", 8);
2501 		if (atapi)
2502 			ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DVD ROM", 40);
2503 		else
2504 			ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DISK", 40);
2505 		value = get_config_value_node(port_nvl, "nmrr");
2506 		if (value != NULL)
2507 			ata_ident->media_rotation_rate = atoi(value);
2508 		value = get_config_value_node(port_nvl, "ser");
2509 		if (value != NULL)
2510 			ata_string((uint8_t*)(&ata_ident->serial), value, 20);
2511 		value = get_config_value_node(port_nvl, "rev");
2512 		if (value != NULL)
2513 			ata_string((uint8_t*)(&ata_ident->revision), value, 8);
2514 		value = get_config_value_node(port_nvl, "model");
2515 		if (value != NULL)
2516 			ata_string((uint8_t*)(&ata_ident->model), value, 40);
2517 		ata_identify_init(&sc->port[p], atapi);
2518 
2519 		/*
2520 		 * Allocate blockif request structures and add them
2521 		 * to the free list
2522 		 */
2523 		pci_ahci_ioreq_init(&sc->port[p]);
2524 
2525 		sc->pi |= (1 << p);
2526 		if (sc->port[p].ioqsz < slots)
2527 			slots = sc->port[p].ioqsz;
2528 	}
2529 	sc->ports = p;
2530 
2531 	/* Intel ICH8 AHCI */
2532 	--slots;
2533 	if (sc->ports < DEF_PORTS)
2534 		sc->ports = DEF_PORTS;
2535 	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2536 	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2537 	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2538 	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2539 	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2540 
2541 	sc->vs = 0x10300;
2542 	sc->cap2 = AHCI_CAP2_APST;
2543 	ahci_reset(sc);
2544 
2545 	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2546 	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2547 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2548 	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2549 	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2550 	p = MIN(sc->ports, 16);
2551 	p = flsl(p) - ((p & (p - 1)) ? 0 : 1);
2552 	pci_emul_add_msicap(pi, 1 << p);
2553 	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2554 	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2555 
2556 	pci_lintr_request(pi);
2557 
2558 open_fail:
2559 	if (ret) {
2560 		for (p = 0; p < sc->ports; p++) {
2561 			if (sc->port[p].bctx != NULL)
2562 				blockif_close(sc->port[p].bctx);
2563 		}
2564 		free(sc);
2565 	}
2566 
2567 	return (ret);
2568 }
2569 
2570 #ifdef BHYVE_SNAPSHOT
2571 static int
2572 pci_ahci_snapshot(struct vm_snapshot_meta *meta)
2573 {
2574 	int i, ret;
2575 	void *bctx;
2576 	struct pci_devinst *pi;
2577 	struct pci_ahci_softc *sc;
2578 	struct ahci_port *port;
2579 
2580 	pi = meta->dev_data;
2581 	sc = pi->pi_arg;
2582 
2583 	/* TODO: add mtx lock/unlock */
2584 
2585 	SNAPSHOT_VAR_OR_LEAVE(sc->ports, meta, ret, done);
2586 	SNAPSHOT_VAR_OR_LEAVE(sc->cap, meta, ret, done);
2587 	SNAPSHOT_VAR_OR_LEAVE(sc->ghc, meta, ret, done);
2588 	SNAPSHOT_VAR_OR_LEAVE(sc->is, meta, ret, done);
2589 	SNAPSHOT_VAR_OR_LEAVE(sc->pi, meta, ret, done);
2590 	SNAPSHOT_VAR_OR_LEAVE(sc->vs, meta, ret, done);
2591 	SNAPSHOT_VAR_OR_LEAVE(sc->ccc_ctl, meta, ret, done);
2592 	SNAPSHOT_VAR_OR_LEAVE(sc->ccc_pts, meta, ret, done);
2593 	SNAPSHOT_VAR_OR_LEAVE(sc->em_loc, meta, ret, done);
2594 	SNAPSHOT_VAR_OR_LEAVE(sc->em_ctl, meta, ret, done);
2595 	SNAPSHOT_VAR_OR_LEAVE(sc->cap2, meta, ret, done);
2596 	SNAPSHOT_VAR_OR_LEAVE(sc->bohc, meta, ret, done);
2597 	SNAPSHOT_VAR_OR_LEAVE(sc->lintr, meta, ret, done);
2598 
2599 	for (i = 0; i < MAX_PORTS; i++) {
2600 		port = &sc->port[i];
2601 
2602 		if (meta->op == VM_SNAPSHOT_SAVE)
2603 			bctx = port->bctx;
2604 
2605 		SNAPSHOT_VAR_OR_LEAVE(bctx, meta, ret, done);
2606 		SNAPSHOT_VAR_OR_LEAVE(port->port, meta, ret, done);
2607 
2608 		/* Mostly for restore; save is ensured by the lines above. */
2609 		if (((bctx == NULL) && (port->bctx != NULL)) ||
2610 		    ((bctx != NULL) && (port->bctx == NULL))) {
2611 			fprintf(stderr, "%s: ports not matching\r\n", __func__);
2612 			ret = EINVAL;
2613 			goto done;
2614 		}
2615 
2616 		if (port->bctx == NULL)
2617 			continue;
2618 
2619 		if (port->port != i) {
2620 			fprintf(stderr, "%s: ports not matching: "
2621 					"actual: %d expected: %d\r\n",
2622 					__func__, port->port, i);
2623 			ret = EINVAL;
2624 			goto done;
2625 		}
2626 
2627 		SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(pi->pi_vmctx, port->cmd_lst,
2628 			AHCI_CL_SIZE * AHCI_MAX_SLOTS, false, meta, ret, done);
2629 		SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(pi->pi_vmctx, port->rfis, 256,
2630 		    false, meta, ret, done);
2631 
2632 		SNAPSHOT_VAR_OR_LEAVE(port->ata_ident, meta, ret, done);
2633 		SNAPSHOT_VAR_OR_LEAVE(port->atapi, meta, ret, done);
2634 		SNAPSHOT_VAR_OR_LEAVE(port->reset, meta, ret, done);
2635 		SNAPSHOT_VAR_OR_LEAVE(port->waitforclear, meta, ret, done);
2636 		SNAPSHOT_VAR_OR_LEAVE(port->mult_sectors, meta, ret, done);
2637 		SNAPSHOT_VAR_OR_LEAVE(port->xfermode, meta, ret, done);
2638 		SNAPSHOT_VAR_OR_LEAVE(port->err_cfis, meta, ret, done);
2639 		SNAPSHOT_VAR_OR_LEAVE(port->sense_key, meta, ret, done);
2640 		SNAPSHOT_VAR_OR_LEAVE(port->asc, meta, ret, done);
2641 		SNAPSHOT_VAR_OR_LEAVE(port->ccs, meta, ret, done);
2642 		SNAPSHOT_VAR_OR_LEAVE(port->pending, meta, ret, done);
2643 
2644 		SNAPSHOT_VAR_OR_LEAVE(port->clb, meta, ret, done);
2645 		SNAPSHOT_VAR_OR_LEAVE(port->clbu, meta, ret, done);
2646 		SNAPSHOT_VAR_OR_LEAVE(port->fb, meta, ret, done);
2647 		SNAPSHOT_VAR_OR_LEAVE(port->fbu, meta, ret, done);
2648 		SNAPSHOT_VAR_OR_LEAVE(port->ie, meta, ret, done);
2649 		SNAPSHOT_VAR_OR_LEAVE(port->cmd, meta, ret, done);
2650 		SNAPSHOT_VAR_OR_LEAVE(port->unused0, meta, ret, done);
2651 		SNAPSHOT_VAR_OR_LEAVE(port->tfd, meta, ret, done);
2652 		SNAPSHOT_VAR_OR_LEAVE(port->sig, meta, ret, done);
2653 		SNAPSHOT_VAR_OR_LEAVE(port->ssts, meta, ret, done);
2654 		SNAPSHOT_VAR_OR_LEAVE(port->sctl, meta, ret, done);
2655 		SNAPSHOT_VAR_OR_LEAVE(port->serr, meta, ret, done);
2656 		SNAPSHOT_VAR_OR_LEAVE(port->sact, meta, ret, done);
2657 		SNAPSHOT_VAR_OR_LEAVE(port->ci, meta, ret, done);
2658 		SNAPSHOT_VAR_OR_LEAVE(port->sntf, meta, ret, done);
2659 		SNAPSHOT_VAR_OR_LEAVE(port->fbs, meta, ret, done);
2660 		SNAPSHOT_VAR_OR_LEAVE(port->ioqsz, meta, ret, done);
2661 
2662 		assert(TAILQ_EMPTY(&port->iobhd));
2663 	}
2664 
2665 done:
2666 	return (ret);
2667 }
2668 
2669 static int
2670 pci_ahci_pause(struct pci_devinst *pi)
2671 {
2672 	struct pci_ahci_softc *sc;
2673 	struct blockif_ctxt *bctxt;
2674 	int i;
2675 
2676 	sc = pi->pi_arg;
2677 
2678 	for (i = 0; i < MAX_PORTS; i++) {
2679 		bctxt = sc->port[i].bctx;
2680 		if (bctxt == NULL)
2681 			continue;
2682 
2683 		blockif_pause(bctxt);
2684 	}
2685 
2686 	return (0);
2687 }
2688 
2689 static int
2690 pci_ahci_resume(struct pci_devinst *pi)
2691 {
2692 	struct pci_ahci_softc *sc;
2693 	struct blockif_ctxt *bctxt;
2694 	int i;
2695 
2696 	sc = pi->pi_arg;
2697 
2698 	for (i = 0; i < MAX_PORTS; i++) {
2699 		bctxt = sc->port[i].bctx;
2700 		if (bctxt == NULL)
2701 			continue;
2702 
2703 		blockif_resume(bctxt);
2704 	}
2705 
2706 	return (0);
2707 }
2708 #endif	/* BHYVE_SNAPSHOT */
2709 
2710 /*
2711  * Use separate emulation names to distinguish drive and atapi devices
2712  */
2713 static const struct pci_devemu pci_de_ahci = {
2714 	.pe_emu =	"ahci",
2715 	.pe_init =	pci_ahci_init,
2716 	.pe_legacy_config = pci_ahci_legacy_config,
2717 	.pe_barwrite =	pci_ahci_write,
2718 	.pe_barread =	pci_ahci_read,
2719 #ifdef BHYVE_SNAPSHOT
2720 	.pe_snapshot =	pci_ahci_snapshot,
2721 	.pe_pause =	pci_ahci_pause,
2722 	.pe_resume =	pci_ahci_resume,
2723 #endif
2724 };
2725 PCI_EMUL_SET(pci_de_ahci);
2726 
2727 static const struct pci_devemu pci_de_ahci_hd = {
2728 	.pe_emu =	"ahci-hd",
2729 	.pe_legacy_config = pci_ahci_hd_legacy_config,
2730 	.pe_alias =	"ahci",
2731 };
2732 PCI_EMUL_SET(pci_de_ahci_hd);
2733 
2734 static const struct pci_devemu pci_de_ahci_cd = {
2735 	.pe_emu =	"ahci-cd",
2736 	.pe_legacy_config = pci_ahci_cd_legacy_config,
2737 	.pe_alias =	"ahci",
2738 };
2739 PCI_EMUL_SET(pci_de_ahci_cd);
2740