xref: /freebsd/usr.sbin/bhyve/pci_ahci.c (revision 2008043f386721d58158e37e0d7e50df8095942d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
5  * Copyright (c) 2015-2016 Alexander Motin <mav@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 #include <sys/param.h>
32 #include <sys/linker_set.h>
33 #include <sys/stat.h>
34 #include <sys/uio.h>
35 #include <sys/ioctl.h>
36 #include <sys/disk.h>
37 #include <sys/ata.h>
38 #include <sys/endian.h>
39 
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <stdint.h>
45 #include <string.h>
46 #include <strings.h>
47 #include <unistd.h>
48 #include <assert.h>
49 #include <pthread.h>
50 #include <pthread_np.h>
51 #include <inttypes.h>
52 #include <md5.h>
53 
54 #include "bhyverun.h"
55 #include "config.h"
56 #include "debug.h"
57 #include "pci_emul.h"
58 #ifdef BHYVE_SNAPSHOT
59 #include "snapshot.h"
60 #endif
61 #include "ahci.h"
62 #include "block_if.h"
63 
64 #define	DEF_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
65 #define	MAX_PORTS	32	/* AHCI supports 32 ports */
66 
67 #define	PxSIG_ATA	0x00000101 /* ATA drive */
68 #define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
69 
70 enum sata_fis_type {
71 	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
72 	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
73 	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
74 	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
75 	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
76 	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
77 	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
78 	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
79 };
80 
81 /*
82  * SCSI opcodes
83  */
84 #define	TEST_UNIT_READY		0x00
85 #define	REQUEST_SENSE		0x03
86 #define	INQUIRY			0x12
87 #define	START_STOP_UNIT		0x1B
88 #define	PREVENT_ALLOW		0x1E
89 #define	READ_CAPACITY		0x25
90 #define	READ_10			0x28
91 #define	POSITION_TO_ELEMENT	0x2B
92 #define	READ_TOC		0x43
93 #define	GET_EVENT_STATUS_NOTIFICATION 0x4A
94 #define	MODE_SENSE_10		0x5A
95 #define	REPORT_LUNS		0xA0
96 #define	READ_12			0xA8
97 #define	READ_CD			0xBE
98 
99 /*
100  * SCSI mode page codes
101  */
102 #define	MODEPAGE_RW_ERROR_RECOVERY	0x01
103 #define	MODEPAGE_CD_CAPABILITIES	0x2A
104 
105 /*
106  * ATA commands
107  */
108 #define	ATA_SF_ENAB_SATA_SF		0x10
109 #define	ATA_SATA_SF_AN			0x05
110 #define	ATA_SF_DIS_SATA_SF		0x90
111 
112 /*
113  * Debug printf
114  */
115 #ifdef AHCI_DEBUG
116 static FILE *dbg;
117 #define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
118 #else
119 #define DPRINTF(format, arg...)
120 #endif
121 
122 #define AHCI_PORT_IDENT 20 + 1
123 
124 struct ahci_ioreq {
125 	struct blockif_req io_req;
126 	struct ahci_port *io_pr;
127 	STAILQ_ENTRY(ahci_ioreq) io_flist;
128 	TAILQ_ENTRY(ahci_ioreq) io_blist;
129 	uint8_t *cfis;
130 	uint32_t len;
131 	uint32_t done;
132 	int slot;
133 	int more;
134 	int readop;
135 };
136 
137 struct ahci_port {
138 	struct blockif_ctxt *bctx;
139 	struct pci_ahci_softc *pr_sc;
140 	struct ata_params ata_ident;
141 	uint8_t *cmd_lst;
142 	uint8_t *rfis;
143 	int port;
144 	int atapi;
145 	int reset;
146 	int waitforclear;
147 	int mult_sectors;
148 	uint8_t xfermode;
149 	uint8_t err_cfis[20];
150 	uint8_t sense_key;
151 	uint8_t asc;
152 	u_int ccs;
153 	uint32_t pending;
154 
155 	uint32_t clb;
156 	uint32_t clbu;
157 	uint32_t fb;
158 	uint32_t fbu;
159 	uint32_t is;
160 	uint32_t ie;
161 	uint32_t cmd;
162 	uint32_t unused0;
163 	uint32_t tfd;
164 	uint32_t sig;
165 	uint32_t ssts;
166 	uint32_t sctl;
167 	uint32_t serr;
168 	uint32_t sact;
169 	uint32_t ci;
170 	uint32_t sntf;
171 	uint32_t fbs;
172 
173 	/*
174 	 * i/o request info
175 	 */
176 	struct ahci_ioreq *ioreq;
177 	int ioqsz;
178 	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
179 	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
180 };
181 
182 struct ahci_cmd_hdr {
183 	uint16_t flags;
184 	uint16_t prdtl;
185 	uint32_t prdbc;
186 	uint64_t ctba;
187 	uint32_t reserved[4];
188 };
189 
190 struct ahci_prdt_entry {
191 	uint64_t dba;
192 	uint32_t reserved;
193 #define	DBCMASK		0x3fffff
194 	uint32_t dbc;
195 };
196 
197 struct pci_ahci_softc {
198 	struct pci_devinst *asc_pi;
199 	pthread_mutex_t	mtx;
200 	int ports;
201 	uint32_t cap;
202 	uint32_t ghc;
203 	uint32_t is;
204 	uint32_t pi;
205 	uint32_t vs;
206 	uint32_t ccc_ctl;
207 	uint32_t ccc_pts;
208 	uint32_t em_loc;
209 	uint32_t em_ctl;
210 	uint32_t cap2;
211 	uint32_t bohc;
212 	uint32_t lintr;
213 	struct ahci_port port[MAX_PORTS];
214 };
215 #define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
216 
217 static void ahci_handle_port(struct ahci_port *p);
218 
219 static inline void lba_to_msf(uint8_t *buf, int lba)
220 {
221 	lba += 150;
222 	buf[0] = (lba / 75) / 60;
223 	buf[1] = (lba / 75) % 60;
224 	buf[2] = lba % 75;
225 }
226 
227 /*
228  * Generate HBA interrupts on global IS register write.
229  */
230 static void
231 ahci_generate_intr(struct pci_ahci_softc *sc, uint32_t mask)
232 {
233 	struct pci_devinst *pi = sc->asc_pi;
234 	struct ahci_port *p;
235 	int i, nmsg;
236 	uint32_t mmask;
237 
238 	/* Update global IS from PxIS/PxIE. */
239 	for (i = 0; i < sc->ports; i++) {
240 		p = &sc->port[i];
241 		if (p->is & p->ie)
242 			sc->is |= (1 << i);
243 	}
244 	DPRINTF("%s(%08x) %08x", __func__, mask, sc->is);
245 
246 	/* If there is nothing enabled -- clear legacy interrupt and exit. */
247 	if (sc->is == 0 || (sc->ghc & AHCI_GHC_IE) == 0) {
248 		if (sc->lintr) {
249 			pci_lintr_deassert(pi);
250 			sc->lintr = 0;
251 		}
252 		return;
253 	}
254 
255 	/* If there is anything and no MSI -- assert legacy interrupt. */
256 	nmsg = pci_msi_maxmsgnum(pi);
257 	if (nmsg == 0) {
258 		if (!sc->lintr) {
259 			sc->lintr = 1;
260 			pci_lintr_assert(pi);
261 		}
262 		return;
263 	}
264 
265 	/* Assert respective MSIs for ports that were touched. */
266 	for (i = 0; i < nmsg; i++) {
267 		if (sc->ports <= nmsg || i < nmsg - 1)
268 			mmask = 1 << i;
269 		else
270 			mmask = 0xffffffff << i;
271 		if (sc->is & mask && mmask & mask)
272 			pci_generate_msi(pi, i);
273 	}
274 }
275 
276 /*
277  * Generate HBA interrupt on specific port event.
278  */
279 static void
280 ahci_port_intr(struct ahci_port *p)
281 {
282 	struct pci_ahci_softc *sc = p->pr_sc;
283 	struct pci_devinst *pi = sc->asc_pi;
284 	int nmsg;
285 
286 	DPRINTF("%s(%d) %08x/%08x %08x", __func__,
287 	    p->port, p->is, p->ie, sc->is);
288 
289 	/* If there is nothing enabled -- we are done. */
290 	if ((p->is & p->ie) == 0)
291 		return;
292 
293 	/* In case of non-shared MSI always generate interrupt. */
294 	nmsg = pci_msi_maxmsgnum(pi);
295 	if (sc->ports <= nmsg || p->port < nmsg - 1) {
296 		sc->is |= (1 << p->port);
297 		if ((sc->ghc & AHCI_GHC_IE) == 0)
298 			return;
299 		pci_generate_msi(pi, p->port);
300 		return;
301 	}
302 
303 	/* If IS for this port is already set -- do nothing. */
304 	if (sc->is & (1 << p->port))
305 		return;
306 
307 	sc->is |= (1 << p->port);
308 
309 	/* If interrupts are enabled -- generate one. */
310 	if ((sc->ghc & AHCI_GHC_IE) == 0)
311 		return;
312 	if (nmsg > 0) {
313 		pci_generate_msi(pi, nmsg - 1);
314 	} else if (!sc->lintr) {
315 		sc->lintr = 1;
316 		pci_lintr_assert(pi);
317 	}
318 }
319 
320 static void
321 ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
322 {
323 	int offset, len, irq;
324 
325 	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
326 		return;
327 
328 	switch (ft) {
329 	case FIS_TYPE_REGD2H:
330 		offset = 0x40;
331 		len = 20;
332 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0;
333 		break;
334 	case FIS_TYPE_SETDEVBITS:
335 		offset = 0x58;
336 		len = 8;
337 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0;
338 		break;
339 	case FIS_TYPE_PIOSETUP:
340 		offset = 0x20;
341 		len = 20;
342 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0;
343 		break;
344 	default:
345 		EPRINTLN("unsupported fis type %d", ft);
346 		return;
347 	}
348 	if (fis[2] & ATA_S_ERROR) {
349 		p->waitforclear = 1;
350 		irq |= AHCI_P_IX_TFE;
351 	}
352 	memcpy(p->rfis + offset, fis, len);
353 	if (irq) {
354 		if (~p->is & irq) {
355 			p->is |= irq;
356 			ahci_port_intr(p);
357 		}
358 	}
359 }
360 
361 static void
362 ahci_write_fis_piosetup(struct ahci_port *p)
363 {
364 	uint8_t fis[20];
365 
366 	memset(fis, 0, sizeof(fis));
367 	fis[0] = FIS_TYPE_PIOSETUP;
368 	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
369 }
370 
371 static void
372 ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
373 {
374 	uint8_t fis[8];
375 	uint8_t error;
376 
377 	error = (tfd >> 8) & 0xff;
378 	tfd &= 0x77;
379 	memset(fis, 0, sizeof(fis));
380 	fis[0] = FIS_TYPE_SETDEVBITS;
381 	fis[1] = (1 << 6);
382 	fis[2] = tfd;
383 	fis[3] = error;
384 	if (fis[2] & ATA_S_ERROR) {
385 		p->err_cfis[0] = slot;
386 		p->err_cfis[2] = tfd;
387 		p->err_cfis[3] = error;
388 		memcpy(&p->err_cfis[4], cfis + 4, 16);
389 	} else {
390 		*(uint32_t *)(fis + 4) = (1 << slot);
391 		p->sact &= ~(1 << slot);
392 	}
393 	p->tfd &= ~0x77;
394 	p->tfd |= tfd;
395 	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
396 }
397 
398 static void
399 ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
400 {
401 	uint8_t fis[20];
402 	uint8_t error;
403 
404 	error = (tfd >> 8) & 0xff;
405 	memset(fis, 0, sizeof(fis));
406 	fis[0] = FIS_TYPE_REGD2H;
407 	fis[1] = (1 << 6);
408 	fis[2] = tfd & 0xff;
409 	fis[3] = error;
410 	fis[4] = cfis[4];
411 	fis[5] = cfis[5];
412 	fis[6] = cfis[6];
413 	fis[7] = cfis[7];
414 	fis[8] = cfis[8];
415 	fis[9] = cfis[9];
416 	fis[10] = cfis[10];
417 	fis[11] = cfis[11];
418 	fis[12] = cfis[12];
419 	fis[13] = cfis[13];
420 	if (fis[2] & ATA_S_ERROR) {
421 		p->err_cfis[0] = 0x80;
422 		p->err_cfis[2] = tfd & 0xff;
423 		p->err_cfis[3] = error;
424 		memcpy(&p->err_cfis[4], cfis + 4, 16);
425 	} else
426 		p->ci &= ~(1 << slot);
427 	p->tfd = tfd;
428 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
429 }
430 
431 static void
432 ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot)
433 {
434 	uint8_t fis[20];
435 
436 	p->tfd = ATA_S_READY | ATA_S_DSC;
437 	memset(fis, 0, sizeof(fis));
438 	fis[0] = FIS_TYPE_REGD2H;
439 	fis[1] = 0;			/* No interrupt */
440 	fis[2] = p->tfd;		/* Status */
441 	fis[3] = 0;			/* No error */
442 	p->ci &= ~(1 << slot);
443 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
444 }
445 
446 static void
447 ahci_write_reset_fis_d2h(struct ahci_port *p)
448 {
449 	uint8_t fis[20];
450 
451 	memset(fis, 0, sizeof(fis));
452 	fis[0] = FIS_TYPE_REGD2H;
453 	fis[3] = 1;
454 	fis[4] = 1;
455 	if (p->atapi) {
456 		fis[5] = 0x14;
457 		fis[6] = 0xeb;
458 	}
459 	fis[12] = 1;
460 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
461 }
462 
463 static void
464 ahci_check_stopped(struct ahci_port *p)
465 {
466 	/*
467 	 * If we are no longer processing the command list and nothing
468 	 * is in-flight, clear the running bit, the current command
469 	 * slot, the command issue and active bits.
470 	 */
471 	if (!(p->cmd & AHCI_P_CMD_ST)) {
472 		if (p->pending == 0) {
473 			p->ccs = 0;
474 			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
475 			p->ci = 0;
476 			p->sact = 0;
477 			p->waitforclear = 0;
478 		}
479 	}
480 }
481 
482 static void
483 ahci_port_stop(struct ahci_port *p)
484 {
485 	struct ahci_ioreq *aior;
486 	uint8_t *cfis;
487 	int slot;
488 	int error;
489 
490 	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
491 
492 	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
493 		/*
494 		 * Try to cancel the outstanding blockif request.
495 		 */
496 		error = blockif_cancel(p->bctx, &aior->io_req);
497 		if (error != 0)
498 			continue;
499 
500 		slot = aior->slot;
501 		cfis = aior->cfis;
502 		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
503 		    cfis[2] == ATA_READ_FPDMA_QUEUED ||
504 		    cfis[2] == ATA_SEND_FPDMA_QUEUED)
505 			p->sact &= ~(1 << slot);	/* NCQ */
506 		else
507 			p->ci &= ~(1 << slot);
508 
509 		/*
510 		 * This command is now done.
511 		 */
512 		p->pending &= ~(1 << slot);
513 
514 		/*
515 		 * Delete the blockif request from the busy list
516 		 */
517 		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
518 
519 		/*
520 		 * Move the blockif request back to the free list
521 		 */
522 		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
523 	}
524 
525 	ahci_check_stopped(p);
526 }
527 
528 static void
529 ahci_port_reset(struct ahci_port *pr)
530 {
531 	pr->serr = 0;
532 	pr->sact = 0;
533 	pr->xfermode = ATA_UDMA6;
534 	pr->mult_sectors = 128;
535 
536 	if (!pr->bctx) {
537 		pr->ssts = ATA_SS_DET_NO_DEVICE;
538 		pr->sig = 0xFFFFFFFF;
539 		pr->tfd = 0x7F;
540 		return;
541 	}
542 	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
543 	if (pr->sctl & ATA_SC_SPD_MASK)
544 		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
545 	else
546 		pr->ssts |= ATA_SS_SPD_GEN3;
547 	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
548 	if (!pr->atapi) {
549 		pr->sig = PxSIG_ATA;
550 		pr->tfd |= ATA_S_READY;
551 	} else
552 		pr->sig = PxSIG_ATAPI;
553 	ahci_write_reset_fis_d2h(pr);
554 }
555 
556 static void
557 ahci_reset(struct pci_ahci_softc *sc)
558 {
559 	int i;
560 
561 	sc->ghc = AHCI_GHC_AE;
562 	sc->is = 0;
563 
564 	if (sc->lintr) {
565 		pci_lintr_deassert(sc->asc_pi);
566 		sc->lintr = 0;
567 	}
568 
569 	for (i = 0; i < sc->ports; i++) {
570 		sc->port[i].ie = 0;
571 		sc->port[i].is = 0;
572 		sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD);
573 		if (sc->port[i].bctx)
574 			sc->port[i].cmd |= AHCI_P_CMD_CPS;
575 		sc->port[i].sctl = 0;
576 		ahci_port_reset(&sc->port[i]);
577 	}
578 }
579 
580 static void
581 ata_string(uint8_t *dest, const char *src, int len)
582 {
583 	int i;
584 
585 	for (i = 0; i < len; i++) {
586 		if (*src)
587 			dest[i ^ 1] = *src++;
588 		else
589 			dest[i ^ 1] = ' ';
590 	}
591 }
592 
593 static void
594 atapi_string(uint8_t *dest, const char *src, int len)
595 {
596 	int i;
597 
598 	for (i = 0; i < len; i++) {
599 		if (*src)
600 			dest[i] = *src++;
601 		else
602 			dest[i] = ' ';
603 	}
604 }
605 
606 /*
607  * Build up the iovec based on the PRDT, 'done' and 'len'.
608  */
609 static void
610 ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
611     struct ahci_prdt_entry *prdt, uint16_t prdtl)
612 {
613 	struct blockif_req *breq = &aior->io_req;
614 	uint32_t dbcsz, extra, left, skip, todo;
615 	int i, j;
616 
617 	assert(aior->len >= aior->done);
618 
619 	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
620 	skip = aior->done;
621 	left = aior->len - aior->done;
622 	todo = 0;
623 	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
624 	    i++, prdt++) {
625 		dbcsz = (prdt->dbc & DBCMASK) + 1;
626 		/* Skip already done part of the PRDT */
627 		if (dbcsz <= skip) {
628 			skip -= dbcsz;
629 			continue;
630 		}
631 		dbcsz -= skip;
632 		if (dbcsz > left)
633 			dbcsz = left;
634 		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
635 		    prdt->dba + skip, dbcsz);
636 		breq->br_iov[j].iov_len = dbcsz;
637 		todo += dbcsz;
638 		left -= dbcsz;
639 		skip = 0;
640 		j++;
641 	}
642 
643 	/* If we got limited by IOV length, round I/O down to sector size. */
644 	if (j == BLOCKIF_IOV_MAX) {
645 		extra = todo % blockif_sectsz(p->bctx);
646 		todo -= extra;
647 		assert(todo > 0);
648 		while (extra > 0) {
649 			if (breq->br_iov[j - 1].iov_len > extra) {
650 				breq->br_iov[j - 1].iov_len -= extra;
651 				break;
652 			}
653 			extra -= breq->br_iov[j - 1].iov_len;
654 			j--;
655 		}
656 	}
657 
658 	breq->br_iovcnt = j;
659 	breq->br_resid = todo;
660 	aior->done += todo;
661 	aior->more = (aior->done < aior->len && i < prdtl);
662 }
663 
664 static void
665 ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
666 {
667 	struct ahci_ioreq *aior;
668 	struct blockif_req *breq;
669 	struct ahci_prdt_entry *prdt;
670 	struct ahci_cmd_hdr *hdr;
671 	uint64_t lba;
672 	uint32_t len;
673 	int err, first, ncq, readop;
674 
675 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
676 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
677 	ncq = 0;
678 	readop = 1;
679 	first = (done == 0);
680 
681 	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
682 	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
683 	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
684 	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
685 		readop = 0;
686 
687 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
688 	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
689 		lba = ((uint64_t)cfis[10] << 40) |
690 			((uint64_t)cfis[9] << 32) |
691 			((uint64_t)cfis[8] << 24) |
692 			((uint64_t)cfis[6] << 16) |
693 			((uint64_t)cfis[5] << 8) |
694 			cfis[4];
695 		len = cfis[11] << 8 | cfis[3];
696 		if (!len)
697 			len = 65536;
698 		ncq = 1;
699 	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
700 	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
701 	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
702 		lba = ((uint64_t)cfis[10] << 40) |
703 			((uint64_t)cfis[9] << 32) |
704 			((uint64_t)cfis[8] << 24) |
705 			((uint64_t)cfis[6] << 16) |
706 			((uint64_t)cfis[5] << 8) |
707 			cfis[4];
708 		len = cfis[13] << 8 | cfis[12];
709 		if (!len)
710 			len = 65536;
711 	} else {
712 		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
713 			(cfis[5] << 8) | cfis[4];
714 		len = cfis[12];
715 		if (!len)
716 			len = 256;
717 	}
718 	lba *= blockif_sectsz(p->bctx);
719 	len *= blockif_sectsz(p->bctx);
720 
721 	/* Pull request off free list */
722 	aior = STAILQ_FIRST(&p->iofhd);
723 	assert(aior != NULL);
724 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
725 
726 	aior->cfis = cfis;
727 	aior->slot = slot;
728 	aior->len = len;
729 	aior->done = done;
730 	aior->readop = readop;
731 	breq = &aior->io_req;
732 	breq->br_offset = lba + done;
733 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
734 
735 	/* Mark this command in-flight. */
736 	p->pending |= 1 << slot;
737 
738 	/* Stuff request onto busy list. */
739 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
740 
741 	if (ncq && first)
742 		ahci_write_fis_d2h_ncq(p, slot);
743 
744 	if (readop)
745 		err = blockif_read(p->bctx, breq);
746 	else
747 		err = blockif_write(p->bctx, breq);
748 	assert(err == 0);
749 }
750 
751 static void
752 ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
753 {
754 	struct ahci_ioreq *aior;
755 	struct blockif_req *breq;
756 	int err;
757 
758 	/*
759 	 * Pull request off free list
760 	 */
761 	aior = STAILQ_FIRST(&p->iofhd);
762 	assert(aior != NULL);
763 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
764 	aior->cfis = cfis;
765 	aior->slot = slot;
766 	aior->len = 0;
767 	aior->done = 0;
768 	aior->more = 0;
769 	breq = &aior->io_req;
770 
771 	/*
772 	 * Mark this command in-flight.
773 	 */
774 	p->pending |= 1 << slot;
775 
776 	/*
777 	 * Stuff request onto busy list
778 	 */
779 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
780 
781 	err = blockif_flush(p->bctx, breq);
782 	assert(err == 0);
783 }
784 
785 static inline void
786 read_prdt(struct ahci_port *p, int slot, uint8_t *cfis, void *buf,
787     unsigned int size)
788 {
789 	struct ahci_cmd_hdr *hdr;
790 	struct ahci_prdt_entry *prdt;
791 	uint8_t *to;
792 	unsigned int len;
793 	int i;
794 
795 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
796 	len = size;
797 	to = buf;
798 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
799 	for (i = 0; i < hdr->prdtl && len; i++) {
800 		uint8_t *ptr;
801 		uint32_t dbcsz;
802 		unsigned int sublen;
803 
804 		dbcsz = (prdt->dbc & DBCMASK) + 1;
805 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
806 		sublen = MIN(len, dbcsz);
807 		memcpy(to, ptr, sublen);
808 		len -= sublen;
809 		to += sublen;
810 		prdt++;
811 	}
812 }
813 
814 static void
815 ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
816 {
817 	struct ahci_ioreq *aior;
818 	struct blockif_req *breq;
819 	uint8_t *entry;
820 	uint64_t elba;
821 	uint32_t len, elen;
822 	int err, first, ncq;
823 	uint8_t buf[512];
824 
825 	first = (done == 0);
826 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
827 		len = (uint16_t)cfis[13] << 8 | cfis[12];
828 		len *= 512;
829 		ncq = 0;
830 	} else { /* ATA_SEND_FPDMA_QUEUED */
831 		len = (uint16_t)cfis[11] << 8 | cfis[3];
832 		len *= 512;
833 		ncq = 1;
834 	}
835 	read_prdt(p, slot, cfis, buf, sizeof(buf));
836 
837 next:
838 	entry = &buf[done];
839 	elba = ((uint64_t)entry[5] << 40) |
840 		((uint64_t)entry[4] << 32) |
841 		((uint64_t)entry[3] << 24) |
842 		((uint64_t)entry[2] << 16) |
843 		((uint64_t)entry[1] << 8) |
844 		entry[0];
845 	elen = (uint16_t)entry[7] << 8 | entry[6];
846 	done += 8;
847 	if (elen == 0) {
848 		if (done >= len) {
849 			if (ncq) {
850 				if (first)
851 					ahci_write_fis_d2h_ncq(p, slot);
852 				ahci_write_fis_sdb(p, slot, cfis,
853 				    ATA_S_READY | ATA_S_DSC);
854 			} else {
855 				ahci_write_fis_d2h(p, slot, cfis,
856 				    ATA_S_READY | ATA_S_DSC);
857 			}
858 			p->pending &= ~(1 << slot);
859 			ahci_check_stopped(p);
860 			if (!first)
861 				ahci_handle_port(p);
862 			return;
863 		}
864 		goto next;
865 	}
866 
867 	/*
868 	 * Pull request off free list
869 	 */
870 	aior = STAILQ_FIRST(&p->iofhd);
871 	assert(aior != NULL);
872 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
873 	aior->cfis = cfis;
874 	aior->slot = slot;
875 	aior->len = len;
876 	aior->done = done;
877 	aior->more = (len != done);
878 
879 	breq = &aior->io_req;
880 	breq->br_offset = elba * blockif_sectsz(p->bctx);
881 	breq->br_resid = elen * blockif_sectsz(p->bctx);
882 
883 	/*
884 	 * Mark this command in-flight.
885 	 */
886 	p->pending |= 1 << slot;
887 
888 	/*
889 	 * Stuff request onto busy list
890 	 */
891 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
892 
893 	if (ncq && first)
894 		ahci_write_fis_d2h_ncq(p, slot);
895 
896 	err = blockif_delete(p->bctx, breq);
897 	assert(err == 0);
898 }
899 
900 static inline void
901 write_prdt(struct ahci_port *p, int slot, uint8_t *cfis, void *buf,
902     unsigned int size)
903 {
904 	struct ahci_cmd_hdr *hdr;
905 	struct ahci_prdt_entry *prdt;
906 	uint8_t *from;
907 	unsigned int len;
908 	int i;
909 
910 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
911 	len = size;
912 	from = buf;
913 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
914 	for (i = 0; i < hdr->prdtl && len; i++) {
915 		uint8_t *ptr;
916 		uint32_t dbcsz;
917 		int sublen;
918 
919 		dbcsz = (prdt->dbc & DBCMASK) + 1;
920 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
921 		sublen = MIN(len, dbcsz);
922 		memcpy(ptr, from, sublen);
923 		len -= sublen;
924 		from += sublen;
925 		prdt++;
926 	}
927 	hdr->prdbc = size - len;
928 }
929 
930 static void
931 ahci_checksum(uint8_t *buf, int size)
932 {
933 	int i;
934 	uint8_t sum = 0;
935 
936 	for (i = 0; i < size - 1; i++)
937 		sum += buf[i];
938 	buf[size - 1] = 0x100 - sum;
939 }
940 
941 static void
942 ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
943 {
944 	struct ahci_cmd_hdr *hdr;
945 	uint32_t buf[128];
946 	uint8_t *buf8 = (uint8_t *)buf;
947 	uint16_t *buf16 = (uint16_t *)buf;
948 
949 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
950 	if (p->atapi || hdr->prdtl == 0 || cfis[5] != 0 ||
951 	    cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
952 		ahci_write_fis_d2h(p, slot, cfis,
953 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
954 		return;
955 	}
956 
957 	memset(buf, 0, sizeof(buf));
958 	if (cfis[4] == 0x00) {	/* Log directory */
959 		buf16[0x00] = 1; /* Version -- 1 */
960 		buf16[0x10] = 1; /* NCQ Command Error Log -- 1 page */
961 		buf16[0x13] = 1; /* SATA NCQ Send and Receive Log -- 1 page */
962 	} else if (cfis[4] == 0x10) {	/* NCQ Command Error Log */
963 		memcpy(buf8, p->err_cfis, sizeof(p->err_cfis));
964 		ahci_checksum(buf8, sizeof(buf));
965 	} else if (cfis[4] == 0x13) {	/* SATA NCQ Send and Receive Log */
966 		if (blockif_candelete(p->bctx) && !blockif_is_ro(p->bctx)) {
967 			buf[0x00] = 1;	/* SFQ DSM supported */
968 			buf[0x01] = 1;	/* SFQ DSM TRIM supported */
969 		}
970 	} else {
971 		ahci_write_fis_d2h(p, slot, cfis,
972 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
973 		return;
974 	}
975 
976 	if (cfis[2] == ATA_READ_LOG_EXT)
977 		ahci_write_fis_piosetup(p);
978 	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
979 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
980 }
981 
982 static void
983 handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
984 {
985 	struct ahci_cmd_hdr *hdr;
986 
987 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
988 	if (p->atapi || hdr->prdtl == 0) {
989 		ahci_write_fis_d2h(p, slot, cfis,
990 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
991 	} else {
992 		ahci_write_fis_piosetup(p);
993 		write_prdt(p, slot, cfis, (void*)&p->ata_ident, sizeof(struct ata_params));
994 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
995 	}
996 }
997 
998 static void
999 ata_identify_init(struct ahci_port* p, int atapi)
1000 {
1001 	struct ata_params* ata_ident = &p->ata_ident;
1002 
1003 	if (atapi) {
1004 		ata_ident->config = ATA_PROTO_ATAPI | ATA_ATAPI_TYPE_CDROM |
1005 		    ATA_ATAPI_REMOVABLE | ATA_DRQ_FAST;
1006 		ata_ident->capabilities1 = ATA_SUPPORT_LBA |
1007 			ATA_SUPPORT_DMA;
1008 		ata_ident->capabilities2 = (1 << 14 | 1);
1009 		ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88;
1010 		ata_ident->obsolete62 = 0x3f;
1011 		ata_ident->mwdmamodes = 7;
1012 		if (p->xfermode & ATA_WDMA0)
1013 			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1014 		ata_ident->apiomodes = 3;
1015 		ata_ident->mwdmamin = 0x0078;
1016 		ata_ident->mwdmarec = 0x0078;
1017 		ata_ident->pioblind = 0x0078;
1018 		ata_ident->pioiordy = 0x0078;
1019 		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
1020 		ata_ident->satacapabilities2 = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
1021 		ata_ident->satasupport = ATA_SUPPORT_NCQ_STREAM;
1022 		ata_ident->version_major = 0x3f0;
1023 		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1024 			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1025 		ata_ident->support.command2 = (1 << 14);
1026 		ata_ident->support.extension = (1 << 14);
1027 		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1028 			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1029 		ata_ident->enabled.extension = (1 << 14);
1030 		ata_ident->udmamodes = 0x7f;
1031 		if (p->xfermode & ATA_UDMA0)
1032 			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1033 		ata_ident->transport_major = 0x1020;
1034 		ata_ident->integrity = 0x00a5;
1035 	} else {
1036 		uint64_t sectors;
1037 		int sectsz, psectsz, psectoff, candelete, ro;
1038 		uint16_t cyl;
1039 		uint8_t sech, heads;
1040 
1041 		ro = blockif_is_ro(p->bctx);
1042 		candelete = blockif_candelete(p->bctx);
1043 		sectsz = blockif_sectsz(p->bctx);
1044 		sectors = blockif_size(p->bctx) / sectsz;
1045 		blockif_chs(p->bctx, &cyl, &heads, &sech);
1046 		blockif_psectsz(p->bctx, &psectsz, &psectoff);
1047 		ata_ident->config = ATA_DRQ_FAST;
1048 		ata_ident->cylinders = cyl;
1049 		ata_ident->heads = heads;
1050 		ata_ident->sectors = sech;
1051 
1052 		ata_ident->sectors_intr = (0x8000 | 128);
1053 		ata_ident->tcg = 0;
1054 
1055 		ata_ident->capabilities1 = ATA_SUPPORT_DMA |
1056 			ATA_SUPPORT_LBA | ATA_SUPPORT_IORDY;
1057 		ata_ident->capabilities2 = (1 << 14);
1058 		ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88;
1059 		if (p->mult_sectors)
1060 			ata_ident->multi = (ATA_MULTI_VALID | p->mult_sectors);
1061 		if (sectors <= 0x0fffffff) {
1062 			ata_ident->lba_size_1 = sectors;
1063 			ata_ident->lba_size_2 = (sectors >> 16);
1064 		} else {
1065 			ata_ident->lba_size_1 = 0xffff;
1066 			ata_ident->lba_size_2 = 0x0fff;
1067 		}
1068 		ata_ident->mwdmamodes = 0x7;
1069 		if (p->xfermode & ATA_WDMA0)
1070 			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1071 		ata_ident->apiomodes = 0x3;
1072 		ata_ident->mwdmamin = 0x0078;
1073 		ata_ident->mwdmarec = 0x0078;
1074 		ata_ident->pioblind = 0x0078;
1075 		ata_ident->pioiordy = 0x0078;
1076 		ata_ident->support3 = 0;
1077 		ata_ident->queue = 31;
1078 		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
1079 			ATA_SUPPORT_NCQ);
1080 		ata_ident->satacapabilities2 = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
1081 			(p->ssts & ATA_SS_SPD_MASK) >> 3);
1082 		ata_ident->version_major = 0x3f0;
1083 		ata_ident->version_minor = 0x28;
1084 		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1085 			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1086 		ata_ident->support.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1087 			ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
1088 		ata_ident->support.extension = (1 << 14);
1089 		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1090 			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1091 		ata_ident->enabled.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1092 			ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
1093 		ata_ident->enabled.extension = (1 << 14);
1094 		ata_ident->udmamodes = 0x7f;
1095 		if (p->xfermode & ATA_UDMA0)
1096 			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1097 		ata_ident->lba_size48_1 = sectors;
1098 		ata_ident->lba_size48_2 = (sectors >> 16);
1099 		ata_ident->lba_size48_3 = (sectors >> 32);
1100 		ata_ident->lba_size48_4 = (sectors >> 48);
1101 
1102 		if (candelete && !ro) {
1103 			ata_ident->support3 |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
1104 			ata_ident->max_dsm_blocks = 1;
1105 			ata_ident->support_dsm = ATA_SUPPORT_DSM_TRIM;
1106 		}
1107 		ata_ident->pss = ATA_PSS_VALID_VALUE;
1108 		ata_ident->lsalign = 0x4000;
1109 		if (psectsz > sectsz) {
1110 			ata_ident->pss |= ATA_PSS_MULTLS;
1111 			ata_ident->pss |= ffsl(psectsz / sectsz) - 1;
1112 			ata_ident->lsalign |= (psectoff / sectsz);
1113 		}
1114 		if (sectsz > 512) {
1115 			ata_ident->pss |= ATA_PSS_LSSABOVE512;
1116 			ata_ident->lss_1 = sectsz / 2;
1117 			ata_ident->lss_2 = ((sectsz / 2) >> 16);
1118 		}
1119 		ata_ident->support2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1120 		ata_ident->enabled2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1121 		ata_ident->transport_major = 0x1020;
1122 		ata_ident->integrity = 0x00a5;
1123 	}
1124 	ahci_checksum((uint8_t*)ata_ident, sizeof(struct ata_params));
1125 }
1126 
1127 static void
1128 handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
1129 {
1130 	if (!p->atapi) {
1131 		ahci_write_fis_d2h(p, slot, cfis,
1132 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1133 	} else {
1134 		ahci_write_fis_piosetup(p);
1135 		write_prdt(p, slot, cfis, (void *)&p->ata_ident, sizeof(struct ata_params));
1136 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1137 	}
1138 }
1139 
1140 static void
1141 atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1142 {
1143 	uint8_t buf[36];
1144 	uint8_t *acmd;
1145 	unsigned int len;
1146 	uint32_t tfd;
1147 
1148 	acmd = cfis + 0x40;
1149 
1150 	if (acmd[1] & 1) {		/* VPD */
1151 		if (acmd[2] == 0) {	/* Supported VPD pages */
1152 			buf[0] = 0x05;
1153 			buf[1] = 0;
1154 			buf[2] = 0;
1155 			buf[3] = 1;
1156 			buf[4] = 0;
1157 			len = 4 + buf[3];
1158 		} else {
1159 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1160 			p->asc = 0x24;
1161 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1162 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1163 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1164 			return;
1165 		}
1166 	} else {
1167 		buf[0] = 0x05;
1168 		buf[1] = 0x80;
1169 		buf[2] = 0x00;
1170 		buf[3] = 0x21;
1171 		buf[4] = 31;
1172 		buf[5] = 0;
1173 		buf[6] = 0;
1174 		buf[7] = 0;
1175 		atapi_string(buf + 8, "BHYVE", 8);
1176 		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1177 		atapi_string(buf + 32, "001", 4);
1178 		len = sizeof(buf);
1179 	}
1180 
1181 	if (len > acmd[4])
1182 		len = acmd[4];
1183 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1184 	write_prdt(p, slot, cfis, buf, len);
1185 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1186 }
1187 
1188 static void
1189 atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1190 {
1191 	uint8_t buf[8];
1192 	uint64_t sectors;
1193 
1194 	sectors = blockif_size(p->bctx) / 2048;
1195 	be32enc(buf, sectors - 1);
1196 	be32enc(buf + 4, 2048);
1197 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1198 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1199 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1200 }
1201 
1202 static void
1203 atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1204 {
1205 	uint8_t *acmd;
1206 	uint8_t format;
1207 	unsigned int len;
1208 
1209 	acmd = cfis + 0x40;
1210 
1211 	len = be16dec(acmd + 7);
1212 	format = acmd[9] >> 6;
1213 	switch (format) {
1214 	case 0:
1215 	{
1216 		size_t size;
1217 		int msf;
1218 		uint64_t sectors;
1219 		uint8_t start_track, buf[20], *bp;
1220 
1221 		msf = (acmd[1] >> 1) & 1;
1222 		start_track = acmd[6];
1223 		if (start_track > 1 && start_track != 0xaa) {
1224 			uint32_t tfd;
1225 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1226 			p->asc = 0x24;
1227 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1228 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1229 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1230 			return;
1231 		}
1232 		bp = buf + 2;
1233 		*bp++ = 1;
1234 		*bp++ = 1;
1235 		if (start_track <= 1) {
1236 			*bp++ = 0;
1237 			*bp++ = 0x14;
1238 			*bp++ = 1;
1239 			*bp++ = 0;
1240 			if (msf) {
1241 				*bp++ = 0;
1242 				lba_to_msf(bp, 0);
1243 				bp += 3;
1244 			} else {
1245 				*bp++ = 0;
1246 				*bp++ = 0;
1247 				*bp++ = 0;
1248 				*bp++ = 0;
1249 			}
1250 		}
1251 		*bp++ = 0;
1252 		*bp++ = 0x14;
1253 		*bp++ = 0xaa;
1254 		*bp++ = 0;
1255 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1256 		sectors >>= 2;
1257 		if (msf) {
1258 			*bp++ = 0;
1259 			lba_to_msf(bp, sectors);
1260 			bp += 3;
1261 		} else {
1262 			be32enc(bp, sectors);
1263 			bp += 4;
1264 		}
1265 		size = bp - buf;
1266 		be16enc(buf, size - 2);
1267 		if (len > size)
1268 			len = size;
1269 		write_prdt(p, slot, cfis, buf, len);
1270 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1271 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1272 		break;
1273 	}
1274 	case 1:
1275 	{
1276 		uint8_t buf[12];
1277 
1278 		memset(buf, 0, sizeof(buf));
1279 		buf[1] = 0xa;
1280 		buf[2] = 0x1;
1281 		buf[3] = 0x1;
1282 		if (len > sizeof(buf))
1283 			len = sizeof(buf);
1284 		write_prdt(p, slot, cfis, buf, len);
1285 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1286 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1287 		break;
1288 	}
1289 	case 2:
1290 	{
1291 		size_t size;
1292 		int msf;
1293 		uint64_t sectors;
1294 		uint8_t *bp, buf[50];
1295 
1296 		msf = (acmd[1] >> 1) & 1;
1297 		bp = buf + 2;
1298 		*bp++ = 1;
1299 		*bp++ = 1;
1300 
1301 		*bp++ = 1;
1302 		*bp++ = 0x14;
1303 		*bp++ = 0;
1304 		*bp++ = 0xa0;
1305 		*bp++ = 0;
1306 		*bp++ = 0;
1307 		*bp++ = 0;
1308 		*bp++ = 0;
1309 		*bp++ = 1;
1310 		*bp++ = 0;
1311 		*bp++ = 0;
1312 
1313 		*bp++ = 1;
1314 		*bp++ = 0x14;
1315 		*bp++ = 0;
1316 		*bp++ = 0xa1;
1317 		*bp++ = 0;
1318 		*bp++ = 0;
1319 		*bp++ = 0;
1320 		*bp++ = 0;
1321 		*bp++ = 1;
1322 		*bp++ = 0;
1323 		*bp++ = 0;
1324 
1325 		*bp++ = 1;
1326 		*bp++ = 0x14;
1327 		*bp++ = 0;
1328 		*bp++ = 0xa2;
1329 		*bp++ = 0;
1330 		*bp++ = 0;
1331 		*bp++ = 0;
1332 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1333 		sectors >>= 2;
1334 		if (msf) {
1335 			*bp++ = 0;
1336 			lba_to_msf(bp, sectors);
1337 			bp += 3;
1338 		} else {
1339 			be32enc(bp, sectors);
1340 			bp += 4;
1341 		}
1342 
1343 		*bp++ = 1;
1344 		*bp++ = 0x14;
1345 		*bp++ = 0;
1346 		*bp++ = 1;
1347 		*bp++ = 0;
1348 		*bp++ = 0;
1349 		*bp++ = 0;
1350 		if (msf) {
1351 			*bp++ = 0;
1352 			lba_to_msf(bp, 0);
1353 			bp += 3;
1354 		} else {
1355 			*bp++ = 0;
1356 			*bp++ = 0;
1357 			*bp++ = 0;
1358 			*bp++ = 0;
1359 		}
1360 
1361 		size = bp - buf;
1362 		be16enc(buf, size - 2);
1363 		if (len > size)
1364 			len = size;
1365 		write_prdt(p, slot, cfis, buf, len);
1366 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1367 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1368 		break;
1369 	}
1370 	default:
1371 	{
1372 		uint32_t tfd;
1373 
1374 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1375 		p->asc = 0x24;
1376 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1377 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1378 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1379 		break;
1380 	}
1381 	}
1382 }
1383 
1384 static void
1385 atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1386 {
1387 	uint8_t buf[16];
1388 
1389 	memset(buf, 0, sizeof(buf));
1390 	buf[3] = 8;
1391 
1392 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1393 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1394 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1395 }
1396 
1397 static void
1398 atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1399 {
1400 	struct ahci_ioreq *aior;
1401 	struct ahci_cmd_hdr *hdr;
1402 	struct ahci_prdt_entry *prdt;
1403 	struct blockif_req *breq;
1404 	uint8_t *acmd;
1405 	uint64_t lba;
1406 	uint32_t len;
1407 	int err;
1408 
1409 	acmd = cfis + 0x40;
1410 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1411 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1412 
1413 	lba = be32dec(acmd + 2);
1414 	if (acmd[0] == READ_10)
1415 		len = be16dec(acmd + 7);
1416 	else
1417 		len = be32dec(acmd + 6);
1418 	if (len == 0) {
1419 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1420 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1421 	}
1422 	lba *= 2048;
1423 	len *= 2048;
1424 
1425 	/*
1426 	 * Pull request off free list
1427 	 */
1428 	aior = STAILQ_FIRST(&p->iofhd);
1429 	assert(aior != NULL);
1430 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1431 	aior->cfis = cfis;
1432 	aior->slot = slot;
1433 	aior->len = len;
1434 	aior->done = done;
1435 	aior->readop = 1;
1436 	breq = &aior->io_req;
1437 	breq->br_offset = lba + done;
1438 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1439 
1440 	/* Mark this command in-flight. */
1441 	p->pending |= 1 << slot;
1442 
1443 	/* Stuff request onto busy list. */
1444 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1445 
1446 	err = blockif_read(p->bctx, breq);
1447 	assert(err == 0);
1448 }
1449 
1450 static void
1451 atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1452 {
1453 	uint8_t buf[64];
1454 	uint8_t *acmd;
1455 	unsigned int len;
1456 
1457 	acmd = cfis + 0x40;
1458 	len = acmd[4];
1459 	if (len > sizeof(buf))
1460 		len = sizeof(buf);
1461 	memset(buf, 0, len);
1462 	buf[0] = 0x70 | (1 << 7);
1463 	buf[2] = p->sense_key;
1464 	buf[7] = 10;
1465 	buf[12] = p->asc;
1466 	write_prdt(p, slot, cfis, buf, len);
1467 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1468 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1469 }
1470 
1471 static void
1472 atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1473 {
1474 	uint8_t *acmd = cfis + 0x40;
1475 	uint32_t tfd;
1476 
1477 	switch (acmd[4] & 3) {
1478 	case 0:
1479 	case 1:
1480 	case 3:
1481 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1482 		tfd = ATA_S_READY | ATA_S_DSC;
1483 		break;
1484 	case 2:
1485 		/* TODO eject media */
1486 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1487 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1488 		p->asc = 0x53;
1489 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1490 		break;
1491 	}
1492 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1493 }
1494 
1495 static void
1496 atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1497 {
1498 	uint8_t *acmd;
1499 	uint32_t tfd;
1500 	uint8_t pc, code;
1501 	unsigned int len;
1502 
1503 	acmd = cfis + 0x40;
1504 	len = be16dec(acmd + 7);
1505 	pc = acmd[2] >> 6;
1506 	code = acmd[2] & 0x3f;
1507 
1508 	switch (pc) {
1509 	case 0:
1510 		switch (code) {
1511 		case MODEPAGE_RW_ERROR_RECOVERY:
1512 		{
1513 			uint8_t buf[16];
1514 
1515 			if (len > sizeof(buf))
1516 				len = sizeof(buf);
1517 
1518 			memset(buf, 0, sizeof(buf));
1519 			be16enc(buf, 16 - 2);
1520 			buf[2] = 0x70;
1521 			buf[8] = 0x01;
1522 			buf[9] = 16 - 10;
1523 			buf[11] = 0x05;
1524 			write_prdt(p, slot, cfis, buf, len);
1525 			tfd = ATA_S_READY | ATA_S_DSC;
1526 			break;
1527 		}
1528 		case MODEPAGE_CD_CAPABILITIES:
1529 		{
1530 			uint8_t buf[30];
1531 
1532 			if (len > sizeof(buf))
1533 				len = sizeof(buf);
1534 
1535 			memset(buf, 0, sizeof(buf));
1536 			be16enc(buf, 30 - 2);
1537 			buf[2] = 0x70;
1538 			buf[8] = 0x2A;
1539 			buf[9] = 30 - 10;
1540 			buf[10] = 0x08;
1541 			buf[12] = 0x71;
1542 			be16enc(&buf[18], 2);
1543 			be16enc(&buf[20], 512);
1544 			write_prdt(p, slot, cfis, buf, len);
1545 			tfd = ATA_S_READY | ATA_S_DSC;
1546 			break;
1547 		}
1548 		default:
1549 			goto error;
1550 			break;
1551 		}
1552 		break;
1553 	case 3:
1554 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1555 		p->asc = 0x39;
1556 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1557 		break;
1558 error:
1559 	case 1:
1560 	case 2:
1561 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1562 		p->asc = 0x24;
1563 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1564 		break;
1565 	}
1566 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1567 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1568 }
1569 
1570 static void
1571 atapi_get_event_status_notification(struct ahci_port *p, int slot,
1572     uint8_t *cfis)
1573 {
1574 	uint8_t *acmd;
1575 	uint32_t tfd;
1576 
1577 	acmd = cfis + 0x40;
1578 
1579 	/* we don't support asynchronous operation */
1580 	if (!(acmd[1] & 1)) {
1581 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1582 		p->asc = 0x24;
1583 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1584 	} else {
1585 		uint8_t buf[8];
1586 		unsigned int len;
1587 
1588 		len = be16dec(acmd + 7);
1589 		if (len > sizeof(buf))
1590 			len = sizeof(buf);
1591 
1592 		memset(buf, 0, sizeof(buf));
1593 		be16enc(buf, 8 - 2);
1594 		buf[2] = 0x04;
1595 		buf[3] = 0x10;
1596 		buf[5] = 0x02;
1597 		write_prdt(p, slot, cfis, buf, len);
1598 		tfd = ATA_S_READY | ATA_S_DSC;
1599 	}
1600 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1601 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1602 }
1603 
1604 static void
1605 handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1606 {
1607 	uint8_t *acmd;
1608 
1609 	acmd = cfis + 0x40;
1610 
1611 #ifdef AHCI_DEBUG
1612 	{
1613 		int i;
1614 		DPRINTF("ACMD:");
1615 		for (i = 0; i < 16; i++)
1616 			DPRINTF("%02x ", acmd[i]);
1617 		DPRINTF("");
1618 	}
1619 #endif
1620 
1621 	switch (acmd[0]) {
1622 	case TEST_UNIT_READY:
1623 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1624 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1625 		break;
1626 	case INQUIRY:
1627 		atapi_inquiry(p, slot, cfis);
1628 		break;
1629 	case READ_CAPACITY:
1630 		atapi_read_capacity(p, slot, cfis);
1631 		break;
1632 	case PREVENT_ALLOW:
1633 		/* TODO */
1634 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1635 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1636 		break;
1637 	case READ_TOC:
1638 		atapi_read_toc(p, slot, cfis);
1639 		break;
1640 	case REPORT_LUNS:
1641 		atapi_report_luns(p, slot, cfis);
1642 		break;
1643 	case READ_10:
1644 	case READ_12:
1645 		atapi_read(p, slot, cfis, 0);
1646 		break;
1647 	case REQUEST_SENSE:
1648 		atapi_request_sense(p, slot, cfis);
1649 		break;
1650 	case START_STOP_UNIT:
1651 		atapi_start_stop_unit(p, slot, cfis);
1652 		break;
1653 	case MODE_SENSE_10:
1654 		atapi_mode_sense(p, slot, cfis);
1655 		break;
1656 	case GET_EVENT_STATUS_NOTIFICATION:
1657 		atapi_get_event_status_notification(p, slot, cfis);
1658 		break;
1659 	default:
1660 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1661 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1662 		p->asc = 0x20;
1663 		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1664 				ATA_S_READY | ATA_S_ERROR);
1665 		break;
1666 	}
1667 }
1668 
1669 static void
1670 ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1671 {
1672 
1673 	p->tfd |= ATA_S_BUSY;
1674 	switch (cfis[2]) {
1675 	case ATA_ATA_IDENTIFY:
1676 		handle_identify(p, slot, cfis);
1677 		break;
1678 	case ATA_SETFEATURES:
1679 	{
1680 		switch (cfis[3]) {
1681 		case ATA_SF_ENAB_SATA_SF:
1682 			switch (cfis[12]) {
1683 			case ATA_SATA_SF_AN:
1684 				p->tfd = ATA_S_DSC | ATA_S_READY;
1685 				break;
1686 			default:
1687 				p->tfd = ATA_S_ERROR | ATA_S_READY;
1688 				p->tfd |= (ATA_ERROR_ABORT << 8);
1689 				break;
1690 			}
1691 			break;
1692 		case ATA_SF_ENAB_WCACHE:
1693 		case ATA_SF_DIS_WCACHE:
1694 		case ATA_SF_ENAB_RCACHE:
1695 		case ATA_SF_DIS_RCACHE:
1696 			p->tfd = ATA_S_DSC | ATA_S_READY;
1697 			break;
1698 		case ATA_SF_SETXFER:
1699 		{
1700 			switch (cfis[12] & 0xf8) {
1701 			case ATA_PIO:
1702 			case ATA_PIO0:
1703 				break;
1704 			case ATA_WDMA0:
1705 			case ATA_UDMA0:
1706 				p->xfermode = (cfis[12] & 0x7);
1707 				break;
1708 			}
1709 			p->tfd = ATA_S_DSC | ATA_S_READY;
1710 			break;
1711 		}
1712 		default:
1713 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1714 			p->tfd |= (ATA_ERROR_ABORT << 8);
1715 			break;
1716 		}
1717 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1718 		break;
1719 	}
1720 	case ATA_SET_MULTI:
1721 		if (cfis[12] != 0 &&
1722 			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1723 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1724 			p->tfd |= (ATA_ERROR_ABORT << 8);
1725 		} else {
1726 			p->mult_sectors = cfis[12];
1727 			p->tfd = ATA_S_DSC | ATA_S_READY;
1728 		}
1729 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1730 		break;
1731 	case ATA_READ:
1732 	case ATA_WRITE:
1733 	case ATA_READ48:
1734 	case ATA_WRITE48:
1735 	case ATA_READ_MUL:
1736 	case ATA_WRITE_MUL:
1737 	case ATA_READ_MUL48:
1738 	case ATA_WRITE_MUL48:
1739 	case ATA_READ_DMA:
1740 	case ATA_WRITE_DMA:
1741 	case ATA_READ_DMA48:
1742 	case ATA_WRITE_DMA48:
1743 	case ATA_READ_FPDMA_QUEUED:
1744 	case ATA_WRITE_FPDMA_QUEUED:
1745 		ahci_handle_rw(p, slot, cfis, 0);
1746 		break;
1747 	case ATA_FLUSHCACHE:
1748 	case ATA_FLUSHCACHE48:
1749 		ahci_handle_flush(p, slot, cfis);
1750 		break;
1751 	case ATA_DATA_SET_MANAGEMENT:
1752 		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1753 		    cfis[13] == 0 && cfis[12] == 1) {
1754 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1755 			break;
1756 		}
1757 		ahci_write_fis_d2h(p, slot, cfis,
1758 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1759 		break;
1760 	case ATA_SEND_FPDMA_QUEUED:
1761 		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1762 		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1763 		    cfis[11] == 0 && cfis[3] == 1) {
1764 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1765 			break;
1766 		}
1767 		ahci_write_fis_d2h(p, slot, cfis,
1768 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1769 		break;
1770 	case ATA_READ_LOG_EXT:
1771 	case ATA_READ_LOG_DMA_EXT:
1772 		ahci_handle_read_log(p, slot, cfis);
1773 		break;
1774 	case ATA_SECURITY_FREEZE_LOCK:
1775 	case ATA_SMART_CMD:
1776 	case ATA_NOP:
1777 		ahci_write_fis_d2h(p, slot, cfis,
1778 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1779 		break;
1780 	case ATA_CHECK_POWER_MODE:
1781 		cfis[12] = 0xff;	/* always on */
1782 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1783 		break;
1784 	case ATA_STANDBY_CMD:
1785 	case ATA_STANDBY_IMMEDIATE:
1786 	case ATA_IDLE_CMD:
1787 	case ATA_IDLE_IMMEDIATE:
1788 	case ATA_SLEEP:
1789 	case ATA_READ_VERIFY:
1790 	case ATA_READ_VERIFY48:
1791 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1792 		break;
1793 	case ATA_ATAPI_IDENTIFY:
1794 		handle_atapi_identify(p, slot, cfis);
1795 		break;
1796 	case ATA_PACKET_CMD:
1797 		if (!p->atapi) {
1798 			ahci_write_fis_d2h(p, slot, cfis,
1799 			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1800 		} else
1801 			handle_packet_cmd(p, slot, cfis);
1802 		break;
1803 	default:
1804 		EPRINTLN("Unsupported cmd:%02x", cfis[2]);
1805 		ahci_write_fis_d2h(p, slot, cfis,
1806 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1807 		break;
1808 	}
1809 }
1810 
1811 static void
1812 ahci_handle_slot(struct ahci_port *p, int slot)
1813 {
1814 	struct ahci_cmd_hdr *hdr;
1815 #ifdef AHCI_DEBUG
1816 	struct ahci_prdt_entry *prdt;
1817 #endif
1818 	struct pci_ahci_softc *sc;
1819 	uint8_t *cfis;
1820 #ifdef AHCI_DEBUG
1821 	int cfl, i;
1822 #endif
1823 
1824 	sc = p->pr_sc;
1825 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1826 #ifdef AHCI_DEBUG
1827 	cfl = (hdr->flags & 0x1f) * 4;
1828 #endif
1829 	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1830 			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1831 #ifdef AHCI_DEBUG
1832 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1833 
1834 	DPRINTF("cfis:");
1835 	for (i = 0; i < cfl; i++) {
1836 		if (i % 10 == 0)
1837 			DPRINTF("");
1838 		DPRINTF("%02x ", cfis[i]);
1839 	}
1840 	DPRINTF("");
1841 
1842 	for (i = 0; i < hdr->prdtl; i++) {
1843 		DPRINTF("%d@%08"PRIx64"", prdt->dbc & 0x3fffff, prdt->dba);
1844 		prdt++;
1845 	}
1846 #endif
1847 
1848 	if (cfis[0] != FIS_TYPE_REGH2D) {
1849 		EPRINTLN("Not a H2D FIS:%02x", cfis[0]);
1850 		return;
1851 	}
1852 
1853 	if (cfis[1] & 0x80) {
1854 		ahci_handle_cmd(p, slot, cfis);
1855 	} else {
1856 		if (cfis[15] & (1 << 2))
1857 			p->reset = 1;
1858 		else if (p->reset) {
1859 			p->reset = 0;
1860 			ahci_port_reset(p);
1861 		}
1862 		p->ci &= ~(1 << slot);
1863 	}
1864 }
1865 
1866 static void
1867 ahci_handle_port(struct ahci_port *p)
1868 {
1869 
1870 	if (!(p->cmd & AHCI_P_CMD_ST))
1871 		return;
1872 
1873 	/*
1874 	 * Search for any new commands to issue ignoring those that
1875 	 * are already in-flight.  Stop if device is busy or in error.
1876 	 */
1877 	for (; (p->ci & ~p->pending) != 0; p->ccs = ((p->ccs + 1) & 31)) {
1878 		if ((p->tfd & (ATA_S_BUSY | ATA_S_DRQ)) != 0)
1879 			break;
1880 		if (p->waitforclear)
1881 			break;
1882 		if ((p->ci & ~p->pending & (1 << p->ccs)) != 0) {
1883 			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1884 			p->cmd |= p->ccs << AHCI_P_CMD_CCS_SHIFT;
1885 			ahci_handle_slot(p, p->ccs);
1886 		}
1887 	}
1888 }
1889 
1890 /*
1891  * blockif callback routine - this runs in the context of the blockif
1892  * i/o thread, so the mutex needs to be acquired.
1893  */
1894 static void
1895 ata_ioreq_cb(struct blockif_req *br, int err)
1896 {
1897 	struct ahci_cmd_hdr *hdr;
1898 	struct ahci_ioreq *aior;
1899 	struct ahci_port *p;
1900 	struct pci_ahci_softc *sc;
1901 	uint32_t tfd;
1902 	uint8_t *cfis;
1903 	int slot, ncq, dsm;
1904 
1905 	DPRINTF("%s %d", __func__, err);
1906 
1907 	ncq = dsm = 0;
1908 	aior = br->br_param;
1909 	p = aior->io_pr;
1910 	cfis = aior->cfis;
1911 	slot = aior->slot;
1912 	sc = p->pr_sc;
1913 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1914 
1915 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1916 	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1917 	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1918 		ncq = 1;
1919 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1920 	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1921 	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1922 		dsm = 1;
1923 
1924 	pthread_mutex_lock(&sc->mtx);
1925 
1926 	/*
1927 	 * Delete the blockif request from the busy list
1928 	 */
1929 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1930 
1931 	/*
1932 	 * Move the blockif request back to the free list
1933 	 */
1934 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1935 
1936 	if (!err)
1937 		hdr->prdbc = aior->done;
1938 
1939 	if (!err && aior->more) {
1940 		if (dsm)
1941 			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1942 		else
1943 			ahci_handle_rw(p, slot, cfis, aior->done);
1944 		goto out;
1945 	}
1946 
1947 	if (!err)
1948 		tfd = ATA_S_READY | ATA_S_DSC;
1949 	else
1950 		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1951 	if (ncq)
1952 		ahci_write_fis_sdb(p, slot, cfis, tfd);
1953 	else
1954 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1955 
1956 	/*
1957 	 * This command is now complete.
1958 	 */
1959 	p->pending &= ~(1 << slot);
1960 
1961 	ahci_check_stopped(p);
1962 	ahci_handle_port(p);
1963 out:
1964 	pthread_mutex_unlock(&sc->mtx);
1965 	DPRINTF("%s exit", __func__);
1966 }
1967 
1968 static void
1969 atapi_ioreq_cb(struct blockif_req *br, int err)
1970 {
1971 	struct ahci_cmd_hdr *hdr;
1972 	struct ahci_ioreq *aior;
1973 	struct ahci_port *p;
1974 	struct pci_ahci_softc *sc;
1975 	uint8_t *cfis;
1976 	uint32_t tfd;
1977 	int slot;
1978 
1979 	DPRINTF("%s %d", __func__, err);
1980 
1981 	aior = br->br_param;
1982 	p = aior->io_pr;
1983 	cfis = aior->cfis;
1984 	slot = aior->slot;
1985 	sc = p->pr_sc;
1986 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1987 
1988 	pthread_mutex_lock(&sc->mtx);
1989 
1990 	/*
1991 	 * Delete the blockif request from the busy list
1992 	 */
1993 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1994 
1995 	/*
1996 	 * Move the blockif request back to the free list
1997 	 */
1998 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1999 
2000 	if (!err)
2001 		hdr->prdbc = aior->done;
2002 
2003 	if (!err && aior->more) {
2004 		atapi_read(p, slot, cfis, aior->done);
2005 		goto out;
2006 	}
2007 
2008 	if (!err) {
2009 		tfd = ATA_S_READY | ATA_S_DSC;
2010 	} else {
2011 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
2012 		p->asc = 0x21;
2013 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
2014 	}
2015 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
2016 	ahci_write_fis_d2h(p, slot, cfis, tfd);
2017 
2018 	/*
2019 	 * This command is now complete.
2020 	 */
2021 	p->pending &= ~(1 << slot);
2022 
2023 	ahci_check_stopped(p);
2024 	ahci_handle_port(p);
2025 out:
2026 	pthread_mutex_unlock(&sc->mtx);
2027 	DPRINTF("%s exit", __func__);
2028 }
2029 
2030 static void
2031 pci_ahci_ioreq_init(struct ahci_port *pr)
2032 {
2033 	struct ahci_ioreq *vr;
2034 	int i;
2035 
2036 	pr->ioqsz = blockif_queuesz(pr->bctx);
2037 	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
2038 	STAILQ_INIT(&pr->iofhd);
2039 
2040 	/*
2041 	 * Add all i/o request entries to the free queue
2042 	 */
2043 	for (i = 0; i < pr->ioqsz; i++) {
2044 		vr = &pr->ioreq[i];
2045 		vr->io_pr = pr;
2046 		if (!pr->atapi)
2047 			vr->io_req.br_callback = ata_ioreq_cb;
2048 		else
2049 			vr->io_req.br_callback = atapi_ioreq_cb;
2050 		vr->io_req.br_param = vr;
2051 		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
2052 	}
2053 
2054 	TAILQ_INIT(&pr->iobhd);
2055 }
2056 
2057 static void
2058 pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2059 {
2060 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2061 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2062 	struct ahci_port *p = &sc->port[port];
2063 
2064 	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"",
2065 		port, offset, value);
2066 
2067 	switch (offset) {
2068 	case AHCI_P_CLB:
2069 		p->clb = value;
2070 		break;
2071 	case AHCI_P_CLBU:
2072 		p->clbu = value;
2073 		break;
2074 	case AHCI_P_FB:
2075 		p->fb = value;
2076 		break;
2077 	case AHCI_P_FBU:
2078 		p->fbu = value;
2079 		break;
2080 	case AHCI_P_IS:
2081 		p->is &= ~value;
2082 		ahci_port_intr(p);
2083 		break;
2084 	case AHCI_P_IE:
2085 		p->ie = value & 0xFDC000FF;
2086 		ahci_port_intr(p);
2087 		break;
2088 	case AHCI_P_CMD:
2089 	{
2090 		p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2091 		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2092 		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2093 		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK);
2094 		p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2095 		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2096 		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2097 		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value;
2098 
2099 		if (!(value & AHCI_P_CMD_ST)) {
2100 			ahci_port_stop(p);
2101 		} else {
2102 			uint64_t clb;
2103 
2104 			p->cmd |= AHCI_P_CMD_CR;
2105 			clb = (uint64_t)p->clbu << 32 | p->clb;
2106 			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
2107 					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
2108 		}
2109 
2110 		if (value & AHCI_P_CMD_FRE) {
2111 			uint64_t fb;
2112 
2113 			p->cmd |= AHCI_P_CMD_FR;
2114 			fb = (uint64_t)p->fbu << 32 | p->fb;
2115 			/* we don't support FBSCP, so rfis size is 256Bytes */
2116 			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
2117 		} else {
2118 			p->cmd &= ~AHCI_P_CMD_FR;
2119 		}
2120 
2121 		if (value & AHCI_P_CMD_CLO) {
2122 			p->tfd &= ~(ATA_S_BUSY | ATA_S_DRQ);
2123 			p->cmd &= ~AHCI_P_CMD_CLO;
2124 		}
2125 
2126 		if (value & AHCI_P_CMD_ICC_MASK) {
2127 			p->cmd &= ~AHCI_P_CMD_ICC_MASK;
2128 		}
2129 
2130 		ahci_handle_port(p);
2131 		break;
2132 	}
2133 	case AHCI_P_TFD:
2134 	case AHCI_P_SIG:
2135 	case AHCI_P_SSTS:
2136 		EPRINTLN("pci_ahci_port: read only registers 0x%"PRIx64"", offset);
2137 		break;
2138 	case AHCI_P_SCTL:
2139 		p->sctl = value;
2140 		if (!(p->cmd & AHCI_P_CMD_ST)) {
2141 			if (value & ATA_SC_DET_RESET)
2142 				ahci_port_reset(p);
2143 		}
2144 		break;
2145 	case AHCI_P_SERR:
2146 		p->serr &= ~value;
2147 		break;
2148 	case AHCI_P_SACT:
2149 		p->sact |= value;
2150 		break;
2151 	case AHCI_P_CI:
2152 		p->ci |= value;
2153 		ahci_handle_port(p);
2154 		break;
2155 	case AHCI_P_SNTF:
2156 	case AHCI_P_FBS:
2157 	default:
2158 		break;
2159 	}
2160 }
2161 
2162 static void
2163 pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2164 {
2165 	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"",
2166 		offset, value);
2167 
2168 	switch (offset) {
2169 	case AHCI_CAP:
2170 	case AHCI_PI:
2171 	case AHCI_VS:
2172 	case AHCI_CAP2:
2173 		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"", offset);
2174 		break;
2175 	case AHCI_GHC:
2176 		if (value & AHCI_GHC_HR) {
2177 			ahci_reset(sc);
2178 			break;
2179 		}
2180 		if (value & AHCI_GHC_IE)
2181 			sc->ghc |= AHCI_GHC_IE;
2182 		else
2183 			sc->ghc &= ~AHCI_GHC_IE;
2184 		ahci_generate_intr(sc, 0xffffffff);
2185 		break;
2186 	case AHCI_IS:
2187 		sc->is &= ~value;
2188 		ahci_generate_intr(sc, value);
2189 		break;
2190 	default:
2191 		break;
2192 	}
2193 }
2194 
2195 static void
2196 pci_ahci_write(struct pci_devinst *pi, int baridx, uint64_t offset, int size,
2197     uint64_t value)
2198 {
2199 	struct pci_ahci_softc *sc = pi->pi_arg;
2200 
2201 	assert(baridx == 5);
2202 	assert((offset % 4) == 0 && size == 4);
2203 
2204 	pthread_mutex_lock(&sc->mtx);
2205 
2206 	if (offset < AHCI_OFFSET)
2207 		pci_ahci_host_write(sc, offset, value);
2208 	else if (offset < (uint64_t)AHCI_OFFSET + sc->ports * AHCI_STEP)
2209 		pci_ahci_port_write(sc, offset, value);
2210 	else
2211 		EPRINTLN("pci_ahci: unknown i/o write offset 0x%"PRIx64"", offset);
2212 
2213 	pthread_mutex_unlock(&sc->mtx);
2214 }
2215 
2216 static uint64_t
2217 pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2218 {
2219 	uint32_t value;
2220 
2221 	switch (offset) {
2222 	case AHCI_CAP:
2223 	case AHCI_GHC:
2224 	case AHCI_IS:
2225 	case AHCI_PI:
2226 	case AHCI_VS:
2227 	case AHCI_CCCC:
2228 	case AHCI_CCCP:
2229 	case AHCI_EM_LOC:
2230 	case AHCI_EM_CTL:
2231 	case AHCI_CAP2:
2232 	{
2233 		uint32_t *p = &sc->cap;
2234 		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2235 		value = *p;
2236 		break;
2237 	}
2238 	default:
2239 		value = 0;
2240 		break;
2241 	}
2242 	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x",
2243 		offset, value);
2244 
2245 	return (value);
2246 }
2247 
2248 static uint64_t
2249 pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2250 {
2251 	uint32_t value;
2252 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2253 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2254 
2255 	switch (offset) {
2256 	case AHCI_P_CLB:
2257 	case AHCI_P_CLBU:
2258 	case AHCI_P_FB:
2259 	case AHCI_P_FBU:
2260 	case AHCI_P_IS:
2261 	case AHCI_P_IE:
2262 	case AHCI_P_CMD:
2263 	case AHCI_P_TFD:
2264 	case AHCI_P_SIG:
2265 	case AHCI_P_SSTS:
2266 	case AHCI_P_SCTL:
2267 	case AHCI_P_SERR:
2268 	case AHCI_P_SACT:
2269 	case AHCI_P_CI:
2270 	case AHCI_P_SNTF:
2271 	case AHCI_P_FBS:
2272 	{
2273 		uint32_t *p= &sc->port[port].clb;
2274 		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2275 		value = *p;
2276 		break;
2277 	}
2278 	default:
2279 		value = 0;
2280 		break;
2281 	}
2282 
2283 	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x",
2284 		port, offset, value);
2285 
2286 	return value;
2287 }
2288 
2289 static uint64_t
2290 pci_ahci_read(struct pci_devinst *pi, int baridx, uint64_t regoff, int size)
2291 {
2292 	struct pci_ahci_softc *sc = pi->pi_arg;
2293 	uint64_t offset;
2294 	uint32_t value;
2295 
2296 	assert(baridx == 5);
2297 	assert(size == 1 || size == 2 || size == 4);
2298 	assert((regoff & (size - 1)) == 0);
2299 
2300 	pthread_mutex_lock(&sc->mtx);
2301 
2302 	offset = regoff & ~0x3;	    /* round down to a multiple of 4 bytes */
2303 	if (offset < AHCI_OFFSET)
2304 		value = pci_ahci_host_read(sc, offset);
2305 	else if (offset < (uint64_t)AHCI_OFFSET + sc->ports * AHCI_STEP)
2306 		value = pci_ahci_port_read(sc, offset);
2307 	else {
2308 		value = 0;
2309 		EPRINTLN("pci_ahci: unknown i/o read offset 0x%"PRIx64"",
2310 		    regoff);
2311 	}
2312 	value >>= 8 * (regoff & 0x3);
2313 
2314 	pthread_mutex_unlock(&sc->mtx);
2315 
2316 	return (value);
2317 }
2318 
2319 /*
2320  * Each AHCI controller has a "port" node which contains nodes for
2321  * each port named after the decimal number of the port (no leading
2322  * zeroes).  Port nodes contain a "type" ("hd" or "cd"), as well as
2323  * options for blockif.  For example:
2324  *
2325  * pci.0.1.0
2326  *          .device="ahci"
2327  *          .port
2328  *               .0
2329  *                 .type="hd"
2330  *                 .path="/path/to/image"
2331  */
2332 static int
2333 pci_ahci_legacy_config_port(nvlist_t *nvl, int port, const char *type,
2334     const char *opts)
2335 {
2336 	char node_name[sizeof("XX")];
2337 	nvlist_t *port_nvl;
2338 
2339 	snprintf(node_name, sizeof(node_name), "%d", port);
2340 	port_nvl = create_relative_config_node(nvl, node_name);
2341 	set_config_value_node(port_nvl, "type", type);
2342 	return (blockif_legacy_config(port_nvl, opts));
2343 }
2344 
2345 static int
2346 pci_ahci_legacy_config(nvlist_t *nvl, const char *opts)
2347 {
2348 	nvlist_t *ports_nvl;
2349 	const char *type;
2350 	char *next, *next2, *str, *tofree;
2351 	int p, ret;
2352 
2353 	if (opts == NULL)
2354 		return (0);
2355 
2356 	ports_nvl = create_relative_config_node(nvl, "port");
2357 	ret = 1;
2358 	tofree = str = strdup(opts);
2359 	for (p = 0; p < MAX_PORTS && str != NULL; p++, str = next) {
2360 		/* Identify and cut off type of present port. */
2361 		if (strncmp(str, "hd:", 3) == 0) {
2362 			type = "hd";
2363 			str += 3;
2364 		} else if (strncmp(str, "cd:", 3) == 0) {
2365 			type = "cd";
2366 			str += 3;
2367 		} else
2368 			type = NULL;
2369 
2370 		/* Find and cut off the next port options. */
2371 		next = strstr(str, ",hd:");
2372 		next2 = strstr(str, ",cd:");
2373 		if (next == NULL || (next2 != NULL && next2 < next))
2374 			next = next2;
2375 		if (next != NULL) {
2376 			next[0] = 0;
2377 			next++;
2378 		}
2379 
2380 		if (str[0] == 0)
2381 			continue;
2382 
2383 		if (type == NULL) {
2384 			EPRINTLN("Missing or invalid type for port %d: \"%s\"",
2385 			    p, str);
2386 			goto out;
2387 		}
2388 
2389 		if (pci_ahci_legacy_config_port(ports_nvl, p, type, str) != 0)
2390 			goto out;
2391 	}
2392 	ret = 0;
2393 out:
2394 	free(tofree);
2395 	return (ret);
2396 }
2397 
2398 static int
2399 pci_ahci_cd_legacy_config(nvlist_t *nvl, const char *opts)
2400 {
2401 	nvlist_t *ports_nvl;
2402 
2403 	ports_nvl = create_relative_config_node(nvl, "port");
2404 	return (pci_ahci_legacy_config_port(ports_nvl, 0, "cd", opts));
2405 }
2406 
2407 static int
2408 pci_ahci_hd_legacy_config(nvlist_t *nvl, const char *opts)
2409 {
2410 	nvlist_t *ports_nvl;
2411 
2412 	ports_nvl = create_relative_config_node(nvl, "port");
2413 	return (pci_ahci_legacy_config_port(ports_nvl, 0, "hd", opts));
2414 }
2415 
2416 static int
2417 pci_ahci_init(struct pci_devinst *pi, nvlist_t *nvl)
2418 {
2419 	char bident[sizeof("XXX:XXX:XXX")];
2420 	char node_name[sizeof("XX")];
2421 	struct blockif_ctxt *bctxt;
2422 	struct pci_ahci_softc *sc;
2423 	int atapi, ret, slots, p;
2424 	MD5_CTX mdctx;
2425 	u_char digest[16];
2426 	const char *path, *type, *value;
2427 	nvlist_t *ports_nvl, *port_nvl;
2428 
2429 	ret = 0;
2430 
2431 #ifdef AHCI_DEBUG
2432 	dbg = fopen("/tmp/log", "w+");
2433 #endif
2434 
2435 	sc = calloc(1, sizeof(struct pci_ahci_softc));
2436 	pi->pi_arg = sc;
2437 	sc->asc_pi = pi;
2438 	pthread_mutex_init(&sc->mtx, NULL);
2439 	sc->ports = 0;
2440 	sc->pi = 0;
2441 	slots = 32;
2442 
2443 	ports_nvl = find_relative_config_node(nvl, "port");
2444 	for (p = 0; ports_nvl != NULL && p < MAX_PORTS; p++) {
2445 		struct ata_params *ata_ident = &sc->port[p].ata_ident;
2446 		char ident[AHCI_PORT_IDENT];
2447 
2448 		snprintf(node_name, sizeof(node_name), "%d", p);
2449 		port_nvl = find_relative_config_node(ports_nvl, node_name);
2450 		if (port_nvl == NULL)
2451 			continue;
2452 
2453 		type = get_config_value_node(port_nvl, "type");
2454 		if (type == NULL)
2455 			continue;
2456 
2457 		if (strcmp(type, "hd") == 0)
2458 			atapi = 0;
2459 		else
2460 			atapi = 1;
2461 
2462 		/*
2463 		 * Attempt to open the backing image. Use the PCI slot/func
2464 		 * and the port number for the identifier string.
2465 		 */
2466 		snprintf(bident, sizeof(bident), "%u:%u:%u", pi->pi_slot,
2467 		    pi->pi_func, p);
2468 
2469 		bctxt = blockif_open(port_nvl, bident);
2470 		if (bctxt == NULL) {
2471 			sc->ports = p;
2472 			ret = 1;
2473 			goto open_fail;
2474 		}
2475 
2476 		ret = blockif_add_boot_device(pi, bctxt);
2477 		if (ret) {
2478 			sc->ports = p;
2479 			goto open_fail;
2480 		}
2481 
2482 		sc->port[p].bctx = bctxt;
2483 		sc->port[p].pr_sc = sc;
2484 		sc->port[p].port = p;
2485 		sc->port[p].atapi = atapi;
2486 
2487 		/*
2488 		 * Create an identifier for the backing file.
2489 		 * Use parts of the md5 sum of the filename
2490 		 */
2491 		path = get_config_value_node(port_nvl, "path");
2492 		MD5Init(&mdctx);
2493 		MD5Update(&mdctx, path, strlen(path));
2494 		MD5Final(digest, &mdctx);
2495 		snprintf(ident, AHCI_PORT_IDENT,
2496 			"BHYVE-%02X%02X-%02X%02X-%02X%02X",
2497 			digest[0], digest[1], digest[2], digest[3], digest[4],
2498 			digest[5]);
2499 
2500 		memset(ata_ident, 0, sizeof(struct ata_params));
2501 		ata_string((uint8_t*)&ata_ident->serial, ident, 20);
2502 		ata_string((uint8_t*)&ata_ident->revision, "001", 8);
2503 		if (atapi)
2504 			ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DVD ROM", 40);
2505 		else
2506 			ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DISK", 40);
2507 		value = get_config_value_node(port_nvl, "nmrr");
2508 		if (value != NULL)
2509 			ata_ident->media_rotation_rate = atoi(value);
2510 		value = get_config_value_node(port_nvl, "ser");
2511 		if (value != NULL)
2512 			ata_string((uint8_t*)(&ata_ident->serial), value, 20);
2513 		value = get_config_value_node(port_nvl, "rev");
2514 		if (value != NULL)
2515 			ata_string((uint8_t*)(&ata_ident->revision), value, 8);
2516 		value = get_config_value_node(port_nvl, "model");
2517 		if (value != NULL)
2518 			ata_string((uint8_t*)(&ata_ident->model), value, 40);
2519 		ata_identify_init(&sc->port[p], atapi);
2520 
2521 		/*
2522 		 * Allocate blockif request structures and add them
2523 		 * to the free list
2524 		 */
2525 		pci_ahci_ioreq_init(&sc->port[p]);
2526 
2527 		sc->pi |= (1 << p);
2528 		if (sc->port[p].ioqsz < slots)
2529 			slots = sc->port[p].ioqsz;
2530 	}
2531 	sc->ports = p;
2532 
2533 	/* Intel ICH8 AHCI */
2534 	--slots;
2535 	if (sc->ports < DEF_PORTS)
2536 		sc->ports = DEF_PORTS;
2537 	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2538 	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2539 	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2540 	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2541 	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2542 
2543 	sc->vs = 0x10300;
2544 	sc->cap2 = AHCI_CAP2_APST;
2545 	ahci_reset(sc);
2546 
2547 	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2548 	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2549 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2550 	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2551 	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2552 	p = MIN(sc->ports, 16);
2553 	p = flsl(p) - ((p & (p - 1)) ? 0 : 1);
2554 	pci_emul_add_msicap(pi, 1 << p);
2555 	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2556 	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2557 
2558 	pci_lintr_request(pi);
2559 
2560 open_fail:
2561 	if (ret) {
2562 		for (p = 0; p < sc->ports; p++) {
2563 			if (sc->port[p].bctx != NULL)
2564 				blockif_close(sc->port[p].bctx);
2565 		}
2566 		free(sc);
2567 	}
2568 
2569 	return (ret);
2570 }
2571 
2572 #ifdef BHYVE_SNAPSHOT
2573 static int
2574 pci_ahci_snapshot(struct vm_snapshot_meta *meta)
2575 {
2576 	int i, ret;
2577 	void *bctx;
2578 	struct pci_devinst *pi;
2579 	struct pci_ahci_softc *sc;
2580 	struct ahci_port *port;
2581 
2582 	pi = meta->dev_data;
2583 	sc = pi->pi_arg;
2584 
2585 	/* TODO: add mtx lock/unlock */
2586 
2587 	SNAPSHOT_VAR_OR_LEAVE(sc->ports, meta, ret, done);
2588 	SNAPSHOT_VAR_OR_LEAVE(sc->cap, meta, ret, done);
2589 	SNAPSHOT_VAR_OR_LEAVE(sc->ghc, meta, ret, done);
2590 	SNAPSHOT_VAR_OR_LEAVE(sc->is, meta, ret, done);
2591 	SNAPSHOT_VAR_OR_LEAVE(sc->pi, meta, ret, done);
2592 	SNAPSHOT_VAR_OR_LEAVE(sc->vs, meta, ret, done);
2593 	SNAPSHOT_VAR_OR_LEAVE(sc->ccc_ctl, meta, ret, done);
2594 	SNAPSHOT_VAR_OR_LEAVE(sc->ccc_pts, meta, ret, done);
2595 	SNAPSHOT_VAR_OR_LEAVE(sc->em_loc, meta, ret, done);
2596 	SNAPSHOT_VAR_OR_LEAVE(sc->em_ctl, meta, ret, done);
2597 	SNAPSHOT_VAR_OR_LEAVE(sc->cap2, meta, ret, done);
2598 	SNAPSHOT_VAR_OR_LEAVE(sc->bohc, meta, ret, done);
2599 	SNAPSHOT_VAR_OR_LEAVE(sc->lintr, meta, ret, done);
2600 
2601 	for (i = 0; i < MAX_PORTS; i++) {
2602 		port = &sc->port[i];
2603 
2604 		if (meta->op == VM_SNAPSHOT_SAVE)
2605 			bctx = port->bctx;
2606 
2607 		SNAPSHOT_VAR_OR_LEAVE(bctx, meta, ret, done);
2608 		SNAPSHOT_VAR_OR_LEAVE(port->port, meta, ret, done);
2609 
2610 		/* Mostly for restore; save is ensured by the lines above. */
2611 		if (((bctx == NULL) && (port->bctx != NULL)) ||
2612 		    ((bctx != NULL) && (port->bctx == NULL))) {
2613 			EPRINTLN("%s: ports not matching", __func__);
2614 			ret = EINVAL;
2615 			goto done;
2616 		}
2617 
2618 		if (port->bctx == NULL)
2619 			continue;
2620 
2621 		if (port->port != i) {
2622 			EPRINTLN("%s: ports not matching: "
2623 			    "actual: %d expected: %d", __func__, port->port, i);
2624 			ret = EINVAL;
2625 			goto done;
2626 		}
2627 
2628 		SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(pi->pi_vmctx, port->cmd_lst,
2629 			AHCI_CL_SIZE * AHCI_MAX_SLOTS, false, meta, ret, done);
2630 		SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(pi->pi_vmctx, port->rfis, 256,
2631 		    false, meta, ret, done);
2632 
2633 		SNAPSHOT_VAR_OR_LEAVE(port->ata_ident, meta, ret, done);
2634 		SNAPSHOT_VAR_OR_LEAVE(port->atapi, meta, ret, done);
2635 		SNAPSHOT_VAR_OR_LEAVE(port->reset, meta, ret, done);
2636 		SNAPSHOT_VAR_OR_LEAVE(port->waitforclear, meta, ret, done);
2637 		SNAPSHOT_VAR_OR_LEAVE(port->mult_sectors, meta, ret, done);
2638 		SNAPSHOT_VAR_OR_LEAVE(port->xfermode, meta, ret, done);
2639 		SNAPSHOT_VAR_OR_LEAVE(port->err_cfis, meta, ret, done);
2640 		SNAPSHOT_VAR_OR_LEAVE(port->sense_key, meta, ret, done);
2641 		SNAPSHOT_VAR_OR_LEAVE(port->asc, meta, ret, done);
2642 		SNAPSHOT_VAR_OR_LEAVE(port->ccs, meta, ret, done);
2643 		SNAPSHOT_VAR_OR_LEAVE(port->pending, meta, ret, done);
2644 
2645 		SNAPSHOT_VAR_OR_LEAVE(port->clb, meta, ret, done);
2646 		SNAPSHOT_VAR_OR_LEAVE(port->clbu, meta, ret, done);
2647 		SNAPSHOT_VAR_OR_LEAVE(port->fb, meta, ret, done);
2648 		SNAPSHOT_VAR_OR_LEAVE(port->fbu, meta, ret, done);
2649 		SNAPSHOT_VAR_OR_LEAVE(port->ie, meta, ret, done);
2650 		SNAPSHOT_VAR_OR_LEAVE(port->cmd, meta, ret, done);
2651 		SNAPSHOT_VAR_OR_LEAVE(port->unused0, meta, ret, done);
2652 		SNAPSHOT_VAR_OR_LEAVE(port->tfd, meta, ret, done);
2653 		SNAPSHOT_VAR_OR_LEAVE(port->sig, meta, ret, done);
2654 		SNAPSHOT_VAR_OR_LEAVE(port->ssts, meta, ret, done);
2655 		SNAPSHOT_VAR_OR_LEAVE(port->sctl, meta, ret, done);
2656 		SNAPSHOT_VAR_OR_LEAVE(port->serr, meta, ret, done);
2657 		SNAPSHOT_VAR_OR_LEAVE(port->sact, meta, ret, done);
2658 		SNAPSHOT_VAR_OR_LEAVE(port->ci, meta, ret, done);
2659 		SNAPSHOT_VAR_OR_LEAVE(port->sntf, meta, ret, done);
2660 		SNAPSHOT_VAR_OR_LEAVE(port->fbs, meta, ret, done);
2661 		SNAPSHOT_VAR_OR_LEAVE(port->ioqsz, meta, ret, done);
2662 
2663 		assert(TAILQ_EMPTY(&port->iobhd));
2664 	}
2665 
2666 done:
2667 	return (ret);
2668 }
2669 
2670 static int
2671 pci_ahci_pause(struct pci_devinst *pi)
2672 {
2673 	struct pci_ahci_softc *sc;
2674 	struct blockif_ctxt *bctxt;
2675 	int i;
2676 
2677 	sc = pi->pi_arg;
2678 
2679 	for (i = 0; i < MAX_PORTS; i++) {
2680 		bctxt = sc->port[i].bctx;
2681 		if (bctxt == NULL)
2682 			continue;
2683 
2684 		blockif_pause(bctxt);
2685 	}
2686 
2687 	return (0);
2688 }
2689 
2690 static int
2691 pci_ahci_resume(struct pci_devinst *pi)
2692 {
2693 	struct pci_ahci_softc *sc;
2694 	struct blockif_ctxt *bctxt;
2695 	int i;
2696 
2697 	sc = pi->pi_arg;
2698 
2699 	for (i = 0; i < MAX_PORTS; i++) {
2700 		bctxt = sc->port[i].bctx;
2701 		if (bctxt == NULL)
2702 			continue;
2703 
2704 		blockif_resume(bctxt);
2705 	}
2706 
2707 	return (0);
2708 }
2709 #endif	/* BHYVE_SNAPSHOT */
2710 
2711 /*
2712  * Use separate emulation names to distinguish drive and atapi devices
2713  */
2714 static const struct pci_devemu pci_de_ahci = {
2715 	.pe_emu =	"ahci",
2716 	.pe_init =	pci_ahci_init,
2717 	.pe_legacy_config = pci_ahci_legacy_config,
2718 	.pe_barwrite =	pci_ahci_write,
2719 	.pe_barread =	pci_ahci_read,
2720 #ifdef BHYVE_SNAPSHOT
2721 	.pe_snapshot =	pci_ahci_snapshot,
2722 	.pe_pause =	pci_ahci_pause,
2723 	.pe_resume =	pci_ahci_resume,
2724 #endif
2725 };
2726 PCI_EMUL_SET(pci_de_ahci);
2727 
2728 static const struct pci_devemu pci_de_ahci_hd = {
2729 	.pe_emu =	"ahci-hd",
2730 	.pe_legacy_config = pci_ahci_hd_legacy_config,
2731 	.pe_alias =	"ahci",
2732 };
2733 PCI_EMUL_SET(pci_de_ahci_hd);
2734 
2735 static const struct pci_devemu pci_de_ahci_cd = {
2736 	.pe_emu =	"ahci-cd",
2737 	.pe_legacy_config = pci_ahci_cd_legacy_config,
2738 	.pe_alias =	"ahci",
2739 };
2740 PCI_EMUL_SET(pci_de_ahci_cd);
2741