xref: /freebsd/usr.sbin/bhyve/pci_ahci.c (revision 6ef644f5889afbd0f681b08ed1a2f369524af83e)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
5  * Copyright (c) 2015-2016 Alexander Motin <mav@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 #include <sys/param.h>
32 #include <sys/linker_set.h>
33 #include <sys/stat.h>
34 #include <sys/uio.h>
35 #include <sys/ioctl.h>
36 #include <sys/disk.h>
37 #include <sys/ata.h>
38 #include <sys/endian.h>
39 
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <stdint.h>
45 #include <string.h>
46 #include <strings.h>
47 #include <unistd.h>
48 #include <assert.h>
49 #include <pthread.h>
50 #include <pthread_np.h>
51 #include <inttypes.h>
52 #include <md5.h>
53 
54 #include "bhyverun.h"
55 #include "config.h"
56 #include "debug.h"
57 #include "pci_emul.h"
58 #ifdef BHYVE_SNAPSHOT
59 #include "snapshot.h"
60 #endif
61 #include "ahci.h"
62 #include "block_if.h"
63 
64 #define	DEF_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
65 #define	MAX_PORTS	32	/* AHCI supports 32 ports */
66 
67 #define	PxSIG_ATA	0x00000101 /* ATA drive */
68 #define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
69 
70 enum sata_fis_type {
71 	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
72 	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
73 	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
74 	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
75 	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
76 	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
77 	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
78 	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
79 };
80 
81 /*
82  * SCSI opcodes
83  */
84 #define	TEST_UNIT_READY		0x00
85 #define	REQUEST_SENSE		0x03
86 #define	INQUIRY			0x12
87 #define	START_STOP_UNIT		0x1B
88 #define	PREVENT_ALLOW		0x1E
89 #define	READ_CAPACITY		0x25
90 #define	READ_10			0x28
91 #define	POSITION_TO_ELEMENT	0x2B
92 #define	READ_TOC		0x43
93 #define	GET_EVENT_STATUS_NOTIFICATION 0x4A
94 #define	MODE_SENSE_10		0x5A
95 #define	REPORT_LUNS		0xA0
96 #define	READ_12			0xA8
97 #define	READ_CD			0xBE
98 
99 /*
100  * SCSI mode page codes
101  */
102 #define	MODEPAGE_RW_ERROR_RECOVERY	0x01
103 #define	MODEPAGE_CD_CAPABILITIES	0x2A
104 
105 /*
106  * ATA commands
107  */
108 #define	ATA_SF_ENAB_SATA_SF		0x10
109 #define	ATA_SATA_SF_AN			0x05
110 #define	ATA_SF_DIS_SATA_SF		0x90
111 
112 /*
113  * Debug printf
114  */
115 #ifdef AHCI_DEBUG
116 static FILE *dbg;
117 #define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
118 #else
119 #define DPRINTF(format, arg...)
120 #endif
121 #define WPRINTF(format, arg...) printf(format, ##arg)
122 
123 #define AHCI_PORT_IDENT 20 + 1
124 
125 struct ahci_ioreq {
126 	struct blockif_req io_req;
127 	struct ahci_port *io_pr;
128 	STAILQ_ENTRY(ahci_ioreq) io_flist;
129 	TAILQ_ENTRY(ahci_ioreq) io_blist;
130 	uint8_t *cfis;
131 	uint32_t len;
132 	uint32_t done;
133 	int slot;
134 	int more;
135 	int readop;
136 };
137 
138 struct ahci_port {
139 	struct blockif_ctxt *bctx;
140 	struct pci_ahci_softc *pr_sc;
141 	struct ata_params ata_ident;
142 	uint8_t *cmd_lst;
143 	uint8_t *rfis;
144 	int port;
145 	int atapi;
146 	int reset;
147 	int waitforclear;
148 	int mult_sectors;
149 	uint8_t xfermode;
150 	uint8_t err_cfis[20];
151 	uint8_t sense_key;
152 	uint8_t asc;
153 	u_int ccs;
154 	uint32_t pending;
155 
156 	uint32_t clb;
157 	uint32_t clbu;
158 	uint32_t fb;
159 	uint32_t fbu;
160 	uint32_t is;
161 	uint32_t ie;
162 	uint32_t cmd;
163 	uint32_t unused0;
164 	uint32_t tfd;
165 	uint32_t sig;
166 	uint32_t ssts;
167 	uint32_t sctl;
168 	uint32_t serr;
169 	uint32_t sact;
170 	uint32_t ci;
171 	uint32_t sntf;
172 	uint32_t fbs;
173 
174 	/*
175 	 * i/o request info
176 	 */
177 	struct ahci_ioreq *ioreq;
178 	int ioqsz;
179 	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
180 	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
181 };
182 
183 struct ahci_cmd_hdr {
184 	uint16_t flags;
185 	uint16_t prdtl;
186 	uint32_t prdbc;
187 	uint64_t ctba;
188 	uint32_t reserved[4];
189 };
190 
191 struct ahci_prdt_entry {
192 	uint64_t dba;
193 	uint32_t reserved;
194 #define	DBCMASK		0x3fffff
195 	uint32_t dbc;
196 };
197 
198 struct pci_ahci_softc {
199 	struct pci_devinst *asc_pi;
200 	pthread_mutex_t	mtx;
201 	int ports;
202 	uint32_t cap;
203 	uint32_t ghc;
204 	uint32_t is;
205 	uint32_t pi;
206 	uint32_t vs;
207 	uint32_t ccc_ctl;
208 	uint32_t ccc_pts;
209 	uint32_t em_loc;
210 	uint32_t em_ctl;
211 	uint32_t cap2;
212 	uint32_t bohc;
213 	uint32_t lintr;
214 	struct ahci_port port[MAX_PORTS];
215 };
216 #define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
217 
218 static void ahci_handle_port(struct ahci_port *p);
219 
220 static inline void lba_to_msf(uint8_t *buf, int lba)
221 {
222 	lba += 150;
223 	buf[0] = (lba / 75) / 60;
224 	buf[1] = (lba / 75) % 60;
225 	buf[2] = lba % 75;
226 }
227 
228 /*
229  * Generate HBA interrupts on global IS register write.
230  */
231 static void
232 ahci_generate_intr(struct pci_ahci_softc *sc, uint32_t mask)
233 {
234 	struct pci_devinst *pi = sc->asc_pi;
235 	struct ahci_port *p;
236 	int i, nmsg;
237 	uint32_t mmask;
238 
239 	/* Update global IS from PxIS/PxIE. */
240 	for (i = 0; i < sc->ports; i++) {
241 		p = &sc->port[i];
242 		if (p->is & p->ie)
243 			sc->is |= (1 << i);
244 	}
245 	DPRINTF("%s(%08x) %08x", __func__, mask, sc->is);
246 
247 	/* If there is nothing enabled -- clear legacy interrupt and exit. */
248 	if (sc->is == 0 || (sc->ghc & AHCI_GHC_IE) == 0) {
249 		if (sc->lintr) {
250 			pci_lintr_deassert(pi);
251 			sc->lintr = 0;
252 		}
253 		return;
254 	}
255 
256 	/* If there is anything and no MSI -- assert legacy interrupt. */
257 	nmsg = pci_msi_maxmsgnum(pi);
258 	if (nmsg == 0) {
259 		if (!sc->lintr) {
260 			sc->lintr = 1;
261 			pci_lintr_assert(pi);
262 		}
263 		return;
264 	}
265 
266 	/* Assert respective MSIs for ports that were touched. */
267 	for (i = 0; i < nmsg; i++) {
268 		if (sc->ports <= nmsg || i < nmsg - 1)
269 			mmask = 1 << i;
270 		else
271 			mmask = 0xffffffff << i;
272 		if (sc->is & mask && mmask & mask)
273 			pci_generate_msi(pi, i);
274 	}
275 }
276 
277 /*
278  * Generate HBA interrupt on specific port event.
279  */
280 static void
281 ahci_port_intr(struct ahci_port *p)
282 {
283 	struct pci_ahci_softc *sc = p->pr_sc;
284 	struct pci_devinst *pi = sc->asc_pi;
285 	int nmsg;
286 
287 	DPRINTF("%s(%d) %08x/%08x %08x", __func__,
288 	    p->port, p->is, p->ie, sc->is);
289 
290 	/* If there is nothing enabled -- we are done. */
291 	if ((p->is & p->ie) == 0)
292 		return;
293 
294 	/* In case of non-shared MSI always generate interrupt. */
295 	nmsg = pci_msi_maxmsgnum(pi);
296 	if (sc->ports <= nmsg || p->port < nmsg - 1) {
297 		sc->is |= (1 << p->port);
298 		if ((sc->ghc & AHCI_GHC_IE) == 0)
299 			return;
300 		pci_generate_msi(pi, p->port);
301 		return;
302 	}
303 
304 	/* If IS for this port is already set -- do nothing. */
305 	if (sc->is & (1 << p->port))
306 		return;
307 
308 	sc->is |= (1 << p->port);
309 
310 	/* If interrupts are enabled -- generate one. */
311 	if ((sc->ghc & AHCI_GHC_IE) == 0)
312 		return;
313 	if (nmsg > 0) {
314 		pci_generate_msi(pi, nmsg - 1);
315 	} else if (!sc->lintr) {
316 		sc->lintr = 1;
317 		pci_lintr_assert(pi);
318 	}
319 }
320 
321 static void
322 ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
323 {
324 	int offset, len, irq;
325 
326 	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
327 		return;
328 
329 	switch (ft) {
330 	case FIS_TYPE_REGD2H:
331 		offset = 0x40;
332 		len = 20;
333 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0;
334 		break;
335 	case FIS_TYPE_SETDEVBITS:
336 		offset = 0x58;
337 		len = 8;
338 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0;
339 		break;
340 	case FIS_TYPE_PIOSETUP:
341 		offset = 0x20;
342 		len = 20;
343 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0;
344 		break;
345 	default:
346 		WPRINTF("unsupported fis type %d", ft);
347 		return;
348 	}
349 	if (fis[2] & ATA_S_ERROR) {
350 		p->waitforclear = 1;
351 		irq |= AHCI_P_IX_TFE;
352 	}
353 	memcpy(p->rfis + offset, fis, len);
354 	if (irq) {
355 		if (~p->is & irq) {
356 			p->is |= irq;
357 			ahci_port_intr(p);
358 		}
359 	}
360 }
361 
362 static void
363 ahci_write_fis_piosetup(struct ahci_port *p)
364 {
365 	uint8_t fis[20];
366 
367 	memset(fis, 0, sizeof(fis));
368 	fis[0] = FIS_TYPE_PIOSETUP;
369 	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
370 }
371 
372 static void
373 ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
374 {
375 	uint8_t fis[8];
376 	uint8_t error;
377 
378 	error = (tfd >> 8) & 0xff;
379 	tfd &= 0x77;
380 	memset(fis, 0, sizeof(fis));
381 	fis[0] = FIS_TYPE_SETDEVBITS;
382 	fis[1] = (1 << 6);
383 	fis[2] = tfd;
384 	fis[3] = error;
385 	if (fis[2] & ATA_S_ERROR) {
386 		p->err_cfis[0] = slot;
387 		p->err_cfis[2] = tfd;
388 		p->err_cfis[3] = error;
389 		memcpy(&p->err_cfis[4], cfis + 4, 16);
390 	} else {
391 		*(uint32_t *)(fis + 4) = (1 << slot);
392 		p->sact &= ~(1 << slot);
393 	}
394 	p->tfd &= ~0x77;
395 	p->tfd |= tfd;
396 	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
397 }
398 
399 static void
400 ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
401 {
402 	uint8_t fis[20];
403 	uint8_t error;
404 
405 	error = (tfd >> 8) & 0xff;
406 	memset(fis, 0, sizeof(fis));
407 	fis[0] = FIS_TYPE_REGD2H;
408 	fis[1] = (1 << 6);
409 	fis[2] = tfd & 0xff;
410 	fis[3] = error;
411 	fis[4] = cfis[4];
412 	fis[5] = cfis[5];
413 	fis[6] = cfis[6];
414 	fis[7] = cfis[7];
415 	fis[8] = cfis[8];
416 	fis[9] = cfis[9];
417 	fis[10] = cfis[10];
418 	fis[11] = cfis[11];
419 	fis[12] = cfis[12];
420 	fis[13] = cfis[13];
421 	if (fis[2] & ATA_S_ERROR) {
422 		p->err_cfis[0] = 0x80;
423 		p->err_cfis[2] = tfd & 0xff;
424 		p->err_cfis[3] = error;
425 		memcpy(&p->err_cfis[4], cfis + 4, 16);
426 	} else
427 		p->ci &= ~(1 << slot);
428 	p->tfd = tfd;
429 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
430 }
431 
432 static void
433 ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot)
434 {
435 	uint8_t fis[20];
436 
437 	p->tfd = ATA_S_READY | ATA_S_DSC;
438 	memset(fis, 0, sizeof(fis));
439 	fis[0] = FIS_TYPE_REGD2H;
440 	fis[1] = 0;			/* No interrupt */
441 	fis[2] = p->tfd;		/* Status */
442 	fis[3] = 0;			/* No error */
443 	p->ci &= ~(1 << slot);
444 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
445 }
446 
447 static void
448 ahci_write_reset_fis_d2h(struct ahci_port *p)
449 {
450 	uint8_t fis[20];
451 
452 	memset(fis, 0, sizeof(fis));
453 	fis[0] = FIS_TYPE_REGD2H;
454 	fis[3] = 1;
455 	fis[4] = 1;
456 	if (p->atapi) {
457 		fis[5] = 0x14;
458 		fis[6] = 0xeb;
459 	}
460 	fis[12] = 1;
461 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
462 }
463 
464 static void
465 ahci_check_stopped(struct ahci_port *p)
466 {
467 	/*
468 	 * If we are no longer processing the command list and nothing
469 	 * is in-flight, clear the running bit, the current command
470 	 * slot, the command issue and active bits.
471 	 */
472 	if (!(p->cmd & AHCI_P_CMD_ST)) {
473 		if (p->pending == 0) {
474 			p->ccs = 0;
475 			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
476 			p->ci = 0;
477 			p->sact = 0;
478 			p->waitforclear = 0;
479 		}
480 	}
481 }
482 
483 static void
484 ahci_port_stop(struct ahci_port *p)
485 {
486 	struct ahci_ioreq *aior;
487 	uint8_t *cfis;
488 	int slot;
489 	int error;
490 
491 	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
492 
493 	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
494 		/*
495 		 * Try to cancel the outstanding blockif request.
496 		 */
497 		error = blockif_cancel(p->bctx, &aior->io_req);
498 		if (error != 0)
499 			continue;
500 
501 		slot = aior->slot;
502 		cfis = aior->cfis;
503 		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
504 		    cfis[2] == ATA_READ_FPDMA_QUEUED ||
505 		    cfis[2] == ATA_SEND_FPDMA_QUEUED)
506 			p->sact &= ~(1 << slot);	/* NCQ */
507 		else
508 			p->ci &= ~(1 << slot);
509 
510 		/*
511 		 * This command is now done.
512 		 */
513 		p->pending &= ~(1 << slot);
514 
515 		/*
516 		 * Delete the blockif request from the busy list
517 		 */
518 		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
519 
520 		/*
521 		 * Move the blockif request back to the free list
522 		 */
523 		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
524 	}
525 
526 	ahci_check_stopped(p);
527 }
528 
529 static void
530 ahci_port_reset(struct ahci_port *pr)
531 {
532 	pr->serr = 0;
533 	pr->sact = 0;
534 	pr->xfermode = ATA_UDMA6;
535 	pr->mult_sectors = 128;
536 
537 	if (!pr->bctx) {
538 		pr->ssts = ATA_SS_DET_NO_DEVICE;
539 		pr->sig = 0xFFFFFFFF;
540 		pr->tfd = 0x7F;
541 		return;
542 	}
543 	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
544 	if (pr->sctl & ATA_SC_SPD_MASK)
545 		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
546 	else
547 		pr->ssts |= ATA_SS_SPD_GEN3;
548 	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
549 	if (!pr->atapi) {
550 		pr->sig = PxSIG_ATA;
551 		pr->tfd |= ATA_S_READY;
552 	} else
553 		pr->sig = PxSIG_ATAPI;
554 	ahci_write_reset_fis_d2h(pr);
555 }
556 
557 static void
558 ahci_reset(struct pci_ahci_softc *sc)
559 {
560 	int i;
561 
562 	sc->ghc = AHCI_GHC_AE;
563 	sc->is = 0;
564 
565 	if (sc->lintr) {
566 		pci_lintr_deassert(sc->asc_pi);
567 		sc->lintr = 0;
568 	}
569 
570 	for (i = 0; i < sc->ports; i++) {
571 		sc->port[i].ie = 0;
572 		sc->port[i].is = 0;
573 		sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD);
574 		if (sc->port[i].bctx)
575 			sc->port[i].cmd |= AHCI_P_CMD_CPS;
576 		sc->port[i].sctl = 0;
577 		ahci_port_reset(&sc->port[i]);
578 	}
579 }
580 
581 static void
582 ata_string(uint8_t *dest, const char *src, int len)
583 {
584 	int i;
585 
586 	for (i = 0; i < len; i++) {
587 		if (*src)
588 			dest[i ^ 1] = *src++;
589 		else
590 			dest[i ^ 1] = ' ';
591 	}
592 }
593 
594 static void
595 atapi_string(uint8_t *dest, const char *src, int len)
596 {
597 	int i;
598 
599 	for (i = 0; i < len; i++) {
600 		if (*src)
601 			dest[i] = *src++;
602 		else
603 			dest[i] = ' ';
604 	}
605 }
606 
607 /*
608  * Build up the iovec based on the PRDT, 'done' and 'len'.
609  */
610 static void
611 ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
612     struct ahci_prdt_entry *prdt, uint16_t prdtl)
613 {
614 	struct blockif_req *breq = &aior->io_req;
615 	uint32_t dbcsz, extra, left, skip, todo;
616 	int i, j;
617 
618 	assert(aior->len >= aior->done);
619 
620 	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
621 	skip = aior->done;
622 	left = aior->len - aior->done;
623 	todo = 0;
624 	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
625 	    i++, prdt++) {
626 		dbcsz = (prdt->dbc & DBCMASK) + 1;
627 		/* Skip already done part of the PRDT */
628 		if (dbcsz <= skip) {
629 			skip -= dbcsz;
630 			continue;
631 		}
632 		dbcsz -= skip;
633 		if (dbcsz > left)
634 			dbcsz = left;
635 		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
636 		    prdt->dba + skip, dbcsz);
637 		breq->br_iov[j].iov_len = dbcsz;
638 		todo += dbcsz;
639 		left -= dbcsz;
640 		skip = 0;
641 		j++;
642 	}
643 
644 	/* If we got limited by IOV length, round I/O down to sector size. */
645 	if (j == BLOCKIF_IOV_MAX) {
646 		extra = todo % blockif_sectsz(p->bctx);
647 		todo -= extra;
648 		assert(todo > 0);
649 		while (extra > 0) {
650 			if (breq->br_iov[j - 1].iov_len > extra) {
651 				breq->br_iov[j - 1].iov_len -= extra;
652 				break;
653 			}
654 			extra -= breq->br_iov[j - 1].iov_len;
655 			j--;
656 		}
657 	}
658 
659 	breq->br_iovcnt = j;
660 	breq->br_resid = todo;
661 	aior->done += todo;
662 	aior->more = (aior->done < aior->len && i < prdtl);
663 }
664 
665 static void
666 ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
667 {
668 	struct ahci_ioreq *aior;
669 	struct blockif_req *breq;
670 	struct ahci_prdt_entry *prdt;
671 	struct ahci_cmd_hdr *hdr;
672 	uint64_t lba;
673 	uint32_t len;
674 	int err, first, ncq, readop;
675 
676 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
677 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
678 	ncq = 0;
679 	readop = 1;
680 	first = (done == 0);
681 
682 	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
683 	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
684 	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
685 	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
686 		readop = 0;
687 
688 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
689 	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
690 		lba = ((uint64_t)cfis[10] << 40) |
691 			((uint64_t)cfis[9] << 32) |
692 			((uint64_t)cfis[8] << 24) |
693 			((uint64_t)cfis[6] << 16) |
694 			((uint64_t)cfis[5] << 8) |
695 			cfis[4];
696 		len = cfis[11] << 8 | cfis[3];
697 		if (!len)
698 			len = 65536;
699 		ncq = 1;
700 	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
701 	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
702 	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
703 		lba = ((uint64_t)cfis[10] << 40) |
704 			((uint64_t)cfis[9] << 32) |
705 			((uint64_t)cfis[8] << 24) |
706 			((uint64_t)cfis[6] << 16) |
707 			((uint64_t)cfis[5] << 8) |
708 			cfis[4];
709 		len = cfis[13] << 8 | cfis[12];
710 		if (!len)
711 			len = 65536;
712 	} else {
713 		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
714 			(cfis[5] << 8) | cfis[4];
715 		len = cfis[12];
716 		if (!len)
717 			len = 256;
718 	}
719 	lba *= blockif_sectsz(p->bctx);
720 	len *= blockif_sectsz(p->bctx);
721 
722 	/* Pull request off free list */
723 	aior = STAILQ_FIRST(&p->iofhd);
724 	assert(aior != NULL);
725 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
726 
727 	aior->cfis = cfis;
728 	aior->slot = slot;
729 	aior->len = len;
730 	aior->done = done;
731 	aior->readop = readop;
732 	breq = &aior->io_req;
733 	breq->br_offset = lba + done;
734 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
735 
736 	/* Mark this command in-flight. */
737 	p->pending |= 1 << slot;
738 
739 	/* Stuff request onto busy list. */
740 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
741 
742 	if (ncq && first)
743 		ahci_write_fis_d2h_ncq(p, slot);
744 
745 	if (readop)
746 		err = blockif_read(p->bctx, breq);
747 	else
748 		err = blockif_write(p->bctx, breq);
749 	assert(err == 0);
750 }
751 
752 static void
753 ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
754 {
755 	struct ahci_ioreq *aior;
756 	struct blockif_req *breq;
757 	int err;
758 
759 	/*
760 	 * Pull request off free list
761 	 */
762 	aior = STAILQ_FIRST(&p->iofhd);
763 	assert(aior != NULL);
764 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
765 	aior->cfis = cfis;
766 	aior->slot = slot;
767 	aior->len = 0;
768 	aior->done = 0;
769 	aior->more = 0;
770 	breq = &aior->io_req;
771 
772 	/*
773 	 * Mark this command in-flight.
774 	 */
775 	p->pending |= 1 << slot;
776 
777 	/*
778 	 * Stuff request onto busy list
779 	 */
780 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
781 
782 	err = blockif_flush(p->bctx, breq);
783 	assert(err == 0);
784 }
785 
786 static inline void
787 read_prdt(struct ahci_port *p, int slot, uint8_t *cfis, void *buf,
788     unsigned int size)
789 {
790 	struct ahci_cmd_hdr *hdr;
791 	struct ahci_prdt_entry *prdt;
792 	uint8_t *to;
793 	unsigned int len;
794 	int i;
795 
796 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
797 	len = size;
798 	to = buf;
799 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
800 	for (i = 0; i < hdr->prdtl && len; i++) {
801 		uint8_t *ptr;
802 		uint32_t dbcsz;
803 		unsigned int sublen;
804 
805 		dbcsz = (prdt->dbc & DBCMASK) + 1;
806 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
807 		sublen = MIN(len, dbcsz);
808 		memcpy(to, ptr, sublen);
809 		len -= sublen;
810 		to += sublen;
811 		prdt++;
812 	}
813 }
814 
815 static void
816 ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
817 {
818 	struct ahci_ioreq *aior;
819 	struct blockif_req *breq;
820 	uint8_t *entry;
821 	uint64_t elba;
822 	uint32_t len, elen;
823 	int err, first, ncq;
824 	uint8_t buf[512];
825 
826 	first = (done == 0);
827 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
828 		len = (uint16_t)cfis[13] << 8 | cfis[12];
829 		len *= 512;
830 		ncq = 0;
831 	} else { /* ATA_SEND_FPDMA_QUEUED */
832 		len = (uint16_t)cfis[11] << 8 | cfis[3];
833 		len *= 512;
834 		ncq = 1;
835 	}
836 	read_prdt(p, slot, cfis, buf, sizeof(buf));
837 
838 next:
839 	entry = &buf[done];
840 	elba = ((uint64_t)entry[5] << 40) |
841 		((uint64_t)entry[4] << 32) |
842 		((uint64_t)entry[3] << 24) |
843 		((uint64_t)entry[2] << 16) |
844 		((uint64_t)entry[1] << 8) |
845 		entry[0];
846 	elen = (uint16_t)entry[7] << 8 | entry[6];
847 	done += 8;
848 	if (elen == 0) {
849 		if (done >= len) {
850 			if (ncq) {
851 				if (first)
852 					ahci_write_fis_d2h_ncq(p, slot);
853 				ahci_write_fis_sdb(p, slot, cfis,
854 				    ATA_S_READY | ATA_S_DSC);
855 			} else {
856 				ahci_write_fis_d2h(p, slot, cfis,
857 				    ATA_S_READY | ATA_S_DSC);
858 			}
859 			p->pending &= ~(1 << slot);
860 			ahci_check_stopped(p);
861 			if (!first)
862 				ahci_handle_port(p);
863 			return;
864 		}
865 		goto next;
866 	}
867 
868 	/*
869 	 * Pull request off free list
870 	 */
871 	aior = STAILQ_FIRST(&p->iofhd);
872 	assert(aior != NULL);
873 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
874 	aior->cfis = cfis;
875 	aior->slot = slot;
876 	aior->len = len;
877 	aior->done = done;
878 	aior->more = (len != done);
879 
880 	breq = &aior->io_req;
881 	breq->br_offset = elba * blockif_sectsz(p->bctx);
882 	breq->br_resid = elen * blockif_sectsz(p->bctx);
883 
884 	/*
885 	 * Mark this command in-flight.
886 	 */
887 	p->pending |= 1 << slot;
888 
889 	/*
890 	 * Stuff request onto busy list
891 	 */
892 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
893 
894 	if (ncq && first)
895 		ahci_write_fis_d2h_ncq(p, slot);
896 
897 	err = blockif_delete(p->bctx, breq);
898 	assert(err == 0);
899 }
900 
901 static inline void
902 write_prdt(struct ahci_port *p, int slot, uint8_t *cfis, void *buf,
903     unsigned int size)
904 {
905 	struct ahci_cmd_hdr *hdr;
906 	struct ahci_prdt_entry *prdt;
907 	uint8_t *from;
908 	unsigned int len;
909 	int i;
910 
911 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
912 	len = size;
913 	from = buf;
914 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
915 	for (i = 0; i < hdr->prdtl && len; i++) {
916 		uint8_t *ptr;
917 		uint32_t dbcsz;
918 		int sublen;
919 
920 		dbcsz = (prdt->dbc & DBCMASK) + 1;
921 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
922 		sublen = MIN(len, dbcsz);
923 		memcpy(ptr, from, sublen);
924 		len -= sublen;
925 		from += sublen;
926 		prdt++;
927 	}
928 	hdr->prdbc = size - len;
929 }
930 
931 static void
932 ahci_checksum(uint8_t *buf, int size)
933 {
934 	int i;
935 	uint8_t sum = 0;
936 
937 	for (i = 0; i < size - 1; i++)
938 		sum += buf[i];
939 	buf[size - 1] = 0x100 - sum;
940 }
941 
942 static void
943 ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
944 {
945 	struct ahci_cmd_hdr *hdr;
946 	uint32_t buf[128];
947 	uint8_t *buf8 = (uint8_t *)buf;
948 	uint16_t *buf16 = (uint16_t *)buf;
949 
950 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
951 	if (p->atapi || hdr->prdtl == 0 || cfis[5] != 0 ||
952 	    cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
953 		ahci_write_fis_d2h(p, slot, cfis,
954 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
955 		return;
956 	}
957 
958 	memset(buf, 0, sizeof(buf));
959 	if (cfis[4] == 0x00) {	/* Log directory */
960 		buf16[0x00] = 1; /* Version -- 1 */
961 		buf16[0x10] = 1; /* NCQ Command Error Log -- 1 page */
962 		buf16[0x13] = 1; /* SATA NCQ Send and Receive Log -- 1 page */
963 	} else if (cfis[4] == 0x10) {	/* NCQ Command Error Log */
964 		memcpy(buf8, p->err_cfis, sizeof(p->err_cfis));
965 		ahci_checksum(buf8, sizeof(buf));
966 	} else if (cfis[4] == 0x13) {	/* SATA NCQ Send and Receive Log */
967 		if (blockif_candelete(p->bctx) && !blockif_is_ro(p->bctx)) {
968 			buf[0x00] = 1;	/* SFQ DSM supported */
969 			buf[0x01] = 1;	/* SFQ DSM TRIM supported */
970 		}
971 	} else {
972 		ahci_write_fis_d2h(p, slot, cfis,
973 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
974 		return;
975 	}
976 
977 	if (cfis[2] == ATA_READ_LOG_EXT)
978 		ahci_write_fis_piosetup(p);
979 	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
980 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
981 }
982 
983 static void
984 handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
985 {
986 	struct ahci_cmd_hdr *hdr;
987 
988 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
989 	if (p->atapi || hdr->prdtl == 0) {
990 		ahci_write_fis_d2h(p, slot, cfis,
991 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
992 	} else {
993 		ahci_write_fis_piosetup(p);
994 		write_prdt(p, slot, cfis, (void*)&p->ata_ident, sizeof(struct ata_params));
995 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
996 	}
997 }
998 
999 static void
1000 ata_identify_init(struct ahci_port* p, int atapi)
1001 {
1002 	struct ata_params* ata_ident = &p->ata_ident;
1003 
1004 	if (atapi) {
1005 		ata_ident->config = ATA_PROTO_ATAPI | ATA_ATAPI_TYPE_CDROM |
1006 		    ATA_ATAPI_REMOVABLE | ATA_DRQ_FAST;
1007 		ata_ident->capabilities1 = ATA_SUPPORT_LBA |
1008 			ATA_SUPPORT_DMA;
1009 		ata_ident->capabilities2 = (1 << 14 | 1);
1010 		ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88;
1011 		ata_ident->obsolete62 = 0x3f;
1012 		ata_ident->mwdmamodes = 7;
1013 		if (p->xfermode & ATA_WDMA0)
1014 			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1015 		ata_ident->apiomodes = 3;
1016 		ata_ident->mwdmamin = 0x0078;
1017 		ata_ident->mwdmarec = 0x0078;
1018 		ata_ident->pioblind = 0x0078;
1019 		ata_ident->pioiordy = 0x0078;
1020 		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
1021 		ata_ident->satacapabilities2 = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
1022 		ata_ident->satasupport = ATA_SUPPORT_NCQ_STREAM;
1023 		ata_ident->version_major = 0x3f0;
1024 		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1025 			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1026 		ata_ident->support.command2 = (1 << 14);
1027 		ata_ident->support.extension = (1 << 14);
1028 		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1029 			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1030 		ata_ident->enabled.extension = (1 << 14);
1031 		ata_ident->udmamodes = 0x7f;
1032 		if (p->xfermode & ATA_UDMA0)
1033 			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1034 		ata_ident->transport_major = 0x1020;
1035 		ata_ident->integrity = 0x00a5;
1036 	} else {
1037 		uint64_t sectors;
1038 		int sectsz, psectsz, psectoff, candelete, ro;
1039 		uint16_t cyl;
1040 		uint8_t sech, heads;
1041 
1042 		ro = blockif_is_ro(p->bctx);
1043 		candelete = blockif_candelete(p->bctx);
1044 		sectsz = blockif_sectsz(p->bctx);
1045 		sectors = blockif_size(p->bctx) / sectsz;
1046 		blockif_chs(p->bctx, &cyl, &heads, &sech);
1047 		blockif_psectsz(p->bctx, &psectsz, &psectoff);
1048 		ata_ident->config = ATA_DRQ_FAST;
1049 		ata_ident->cylinders = cyl;
1050 		ata_ident->heads = heads;
1051 		ata_ident->sectors = sech;
1052 
1053 		ata_ident->sectors_intr = (0x8000 | 128);
1054 		ata_ident->tcg = 0;
1055 
1056 		ata_ident->capabilities1 = ATA_SUPPORT_DMA |
1057 			ATA_SUPPORT_LBA | ATA_SUPPORT_IORDY;
1058 		ata_ident->capabilities2 = (1 << 14);
1059 		ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88;
1060 		if (p->mult_sectors)
1061 			ata_ident->multi = (ATA_MULTI_VALID | p->mult_sectors);
1062 		if (sectors <= 0x0fffffff) {
1063 			ata_ident->lba_size_1 = sectors;
1064 			ata_ident->lba_size_2 = (sectors >> 16);
1065 		} else {
1066 			ata_ident->lba_size_1 = 0xffff;
1067 			ata_ident->lba_size_2 = 0x0fff;
1068 		}
1069 		ata_ident->mwdmamodes = 0x7;
1070 		if (p->xfermode & ATA_WDMA0)
1071 			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1072 		ata_ident->apiomodes = 0x3;
1073 		ata_ident->mwdmamin = 0x0078;
1074 		ata_ident->mwdmarec = 0x0078;
1075 		ata_ident->pioblind = 0x0078;
1076 		ata_ident->pioiordy = 0x0078;
1077 		ata_ident->support3 = 0;
1078 		ata_ident->queue = 31;
1079 		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
1080 			ATA_SUPPORT_NCQ);
1081 		ata_ident->satacapabilities2 = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
1082 			(p->ssts & ATA_SS_SPD_MASK) >> 3);
1083 		ata_ident->version_major = 0x3f0;
1084 		ata_ident->version_minor = 0x28;
1085 		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1086 			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1087 		ata_ident->support.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1088 			ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
1089 		ata_ident->support.extension = (1 << 14);
1090 		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1091 			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1092 		ata_ident->enabled.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1093 			ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
1094 		ata_ident->enabled.extension = (1 << 14);
1095 		ata_ident->udmamodes = 0x7f;
1096 		if (p->xfermode & ATA_UDMA0)
1097 			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1098 		ata_ident->lba_size48_1 = sectors;
1099 		ata_ident->lba_size48_2 = (sectors >> 16);
1100 		ata_ident->lba_size48_3 = (sectors >> 32);
1101 		ata_ident->lba_size48_4 = (sectors >> 48);
1102 
1103 		if (candelete && !ro) {
1104 			ata_ident->support3 |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
1105 			ata_ident->max_dsm_blocks = 1;
1106 			ata_ident->support_dsm = ATA_SUPPORT_DSM_TRIM;
1107 		}
1108 		ata_ident->pss = ATA_PSS_VALID_VALUE;
1109 		ata_ident->lsalign = 0x4000;
1110 		if (psectsz > sectsz) {
1111 			ata_ident->pss |= ATA_PSS_MULTLS;
1112 			ata_ident->pss |= ffsl(psectsz / sectsz) - 1;
1113 			ata_ident->lsalign |= (psectoff / sectsz);
1114 		}
1115 		if (sectsz > 512) {
1116 			ata_ident->pss |= ATA_PSS_LSSABOVE512;
1117 			ata_ident->lss_1 = sectsz / 2;
1118 			ata_ident->lss_2 = ((sectsz / 2) >> 16);
1119 		}
1120 		ata_ident->support2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1121 		ata_ident->enabled2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1122 		ata_ident->transport_major = 0x1020;
1123 		ata_ident->integrity = 0x00a5;
1124 	}
1125 	ahci_checksum((uint8_t*)ata_ident, sizeof(struct ata_params));
1126 }
1127 
1128 static void
1129 handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
1130 {
1131 	if (!p->atapi) {
1132 		ahci_write_fis_d2h(p, slot, cfis,
1133 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1134 	} else {
1135 		ahci_write_fis_piosetup(p);
1136 		write_prdt(p, slot, cfis, (void *)&p->ata_ident, sizeof(struct ata_params));
1137 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1138 	}
1139 }
1140 
1141 static void
1142 atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1143 {
1144 	uint8_t buf[36];
1145 	uint8_t *acmd;
1146 	unsigned int len;
1147 	uint32_t tfd;
1148 
1149 	acmd = cfis + 0x40;
1150 
1151 	if (acmd[1] & 1) {		/* VPD */
1152 		if (acmd[2] == 0) {	/* Supported VPD pages */
1153 			buf[0] = 0x05;
1154 			buf[1] = 0;
1155 			buf[2] = 0;
1156 			buf[3] = 1;
1157 			buf[4] = 0;
1158 			len = 4 + buf[3];
1159 		} else {
1160 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1161 			p->asc = 0x24;
1162 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1163 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1164 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1165 			return;
1166 		}
1167 	} else {
1168 		buf[0] = 0x05;
1169 		buf[1] = 0x80;
1170 		buf[2] = 0x00;
1171 		buf[3] = 0x21;
1172 		buf[4] = 31;
1173 		buf[5] = 0;
1174 		buf[6] = 0;
1175 		buf[7] = 0;
1176 		atapi_string(buf + 8, "BHYVE", 8);
1177 		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1178 		atapi_string(buf + 32, "001", 4);
1179 		len = sizeof(buf);
1180 	}
1181 
1182 	if (len > acmd[4])
1183 		len = acmd[4];
1184 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1185 	write_prdt(p, slot, cfis, buf, len);
1186 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1187 }
1188 
1189 static void
1190 atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1191 {
1192 	uint8_t buf[8];
1193 	uint64_t sectors;
1194 
1195 	sectors = blockif_size(p->bctx) / 2048;
1196 	be32enc(buf, sectors - 1);
1197 	be32enc(buf + 4, 2048);
1198 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1199 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1200 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1201 }
1202 
1203 static void
1204 atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1205 {
1206 	uint8_t *acmd;
1207 	uint8_t format;
1208 	unsigned int len;
1209 
1210 	acmd = cfis + 0x40;
1211 
1212 	len = be16dec(acmd + 7);
1213 	format = acmd[9] >> 6;
1214 	switch (format) {
1215 	case 0:
1216 	{
1217 		size_t size;
1218 		int msf;
1219 		uint64_t sectors;
1220 		uint8_t start_track, buf[20], *bp;
1221 
1222 		msf = (acmd[1] >> 1) & 1;
1223 		start_track = acmd[6];
1224 		if (start_track > 1 && start_track != 0xaa) {
1225 			uint32_t tfd;
1226 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1227 			p->asc = 0x24;
1228 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1229 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1230 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1231 			return;
1232 		}
1233 		bp = buf + 2;
1234 		*bp++ = 1;
1235 		*bp++ = 1;
1236 		if (start_track <= 1) {
1237 			*bp++ = 0;
1238 			*bp++ = 0x14;
1239 			*bp++ = 1;
1240 			*bp++ = 0;
1241 			if (msf) {
1242 				*bp++ = 0;
1243 				lba_to_msf(bp, 0);
1244 				bp += 3;
1245 			} else {
1246 				*bp++ = 0;
1247 				*bp++ = 0;
1248 				*bp++ = 0;
1249 				*bp++ = 0;
1250 			}
1251 		}
1252 		*bp++ = 0;
1253 		*bp++ = 0x14;
1254 		*bp++ = 0xaa;
1255 		*bp++ = 0;
1256 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1257 		sectors >>= 2;
1258 		if (msf) {
1259 			*bp++ = 0;
1260 			lba_to_msf(bp, sectors);
1261 			bp += 3;
1262 		} else {
1263 			be32enc(bp, sectors);
1264 			bp += 4;
1265 		}
1266 		size = bp - buf;
1267 		be16enc(buf, size - 2);
1268 		if (len > size)
1269 			len = size;
1270 		write_prdt(p, slot, cfis, buf, len);
1271 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1272 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1273 		break;
1274 	}
1275 	case 1:
1276 	{
1277 		uint8_t buf[12];
1278 
1279 		memset(buf, 0, sizeof(buf));
1280 		buf[1] = 0xa;
1281 		buf[2] = 0x1;
1282 		buf[3] = 0x1;
1283 		if (len > sizeof(buf))
1284 			len = sizeof(buf);
1285 		write_prdt(p, slot, cfis, buf, len);
1286 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1287 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1288 		break;
1289 	}
1290 	case 2:
1291 	{
1292 		size_t size;
1293 		int msf;
1294 		uint64_t sectors;
1295 		uint8_t *bp, buf[50];
1296 
1297 		msf = (acmd[1] >> 1) & 1;
1298 		bp = buf + 2;
1299 		*bp++ = 1;
1300 		*bp++ = 1;
1301 
1302 		*bp++ = 1;
1303 		*bp++ = 0x14;
1304 		*bp++ = 0;
1305 		*bp++ = 0xa0;
1306 		*bp++ = 0;
1307 		*bp++ = 0;
1308 		*bp++ = 0;
1309 		*bp++ = 0;
1310 		*bp++ = 1;
1311 		*bp++ = 0;
1312 		*bp++ = 0;
1313 
1314 		*bp++ = 1;
1315 		*bp++ = 0x14;
1316 		*bp++ = 0;
1317 		*bp++ = 0xa1;
1318 		*bp++ = 0;
1319 		*bp++ = 0;
1320 		*bp++ = 0;
1321 		*bp++ = 0;
1322 		*bp++ = 1;
1323 		*bp++ = 0;
1324 		*bp++ = 0;
1325 
1326 		*bp++ = 1;
1327 		*bp++ = 0x14;
1328 		*bp++ = 0;
1329 		*bp++ = 0xa2;
1330 		*bp++ = 0;
1331 		*bp++ = 0;
1332 		*bp++ = 0;
1333 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1334 		sectors >>= 2;
1335 		if (msf) {
1336 			*bp++ = 0;
1337 			lba_to_msf(bp, sectors);
1338 			bp += 3;
1339 		} else {
1340 			be32enc(bp, sectors);
1341 			bp += 4;
1342 		}
1343 
1344 		*bp++ = 1;
1345 		*bp++ = 0x14;
1346 		*bp++ = 0;
1347 		*bp++ = 1;
1348 		*bp++ = 0;
1349 		*bp++ = 0;
1350 		*bp++ = 0;
1351 		if (msf) {
1352 			*bp++ = 0;
1353 			lba_to_msf(bp, 0);
1354 			bp += 3;
1355 		} else {
1356 			*bp++ = 0;
1357 			*bp++ = 0;
1358 			*bp++ = 0;
1359 			*bp++ = 0;
1360 		}
1361 
1362 		size = bp - buf;
1363 		be16enc(buf, size - 2);
1364 		if (len > size)
1365 			len = size;
1366 		write_prdt(p, slot, cfis, buf, len);
1367 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1368 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1369 		break;
1370 	}
1371 	default:
1372 	{
1373 		uint32_t tfd;
1374 
1375 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1376 		p->asc = 0x24;
1377 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1378 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1379 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1380 		break;
1381 	}
1382 	}
1383 }
1384 
1385 static void
1386 atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1387 {
1388 	uint8_t buf[16];
1389 
1390 	memset(buf, 0, sizeof(buf));
1391 	buf[3] = 8;
1392 
1393 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1394 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1395 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1396 }
1397 
1398 static void
1399 atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1400 {
1401 	struct ahci_ioreq *aior;
1402 	struct ahci_cmd_hdr *hdr;
1403 	struct ahci_prdt_entry *prdt;
1404 	struct blockif_req *breq;
1405 	uint8_t *acmd;
1406 	uint64_t lba;
1407 	uint32_t len;
1408 	int err;
1409 
1410 	acmd = cfis + 0x40;
1411 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1412 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1413 
1414 	lba = be32dec(acmd + 2);
1415 	if (acmd[0] == READ_10)
1416 		len = be16dec(acmd + 7);
1417 	else
1418 		len = be32dec(acmd + 6);
1419 	if (len == 0) {
1420 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1421 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1422 	}
1423 	lba *= 2048;
1424 	len *= 2048;
1425 
1426 	/*
1427 	 * Pull request off free list
1428 	 */
1429 	aior = STAILQ_FIRST(&p->iofhd);
1430 	assert(aior != NULL);
1431 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1432 	aior->cfis = cfis;
1433 	aior->slot = slot;
1434 	aior->len = len;
1435 	aior->done = done;
1436 	aior->readop = 1;
1437 	breq = &aior->io_req;
1438 	breq->br_offset = lba + done;
1439 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1440 
1441 	/* Mark this command in-flight. */
1442 	p->pending |= 1 << slot;
1443 
1444 	/* Stuff request onto busy list. */
1445 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1446 
1447 	err = blockif_read(p->bctx, breq);
1448 	assert(err == 0);
1449 }
1450 
1451 static void
1452 atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1453 {
1454 	uint8_t buf[64];
1455 	uint8_t *acmd;
1456 	unsigned int len;
1457 
1458 	acmd = cfis + 0x40;
1459 	len = acmd[4];
1460 	if (len > sizeof(buf))
1461 		len = sizeof(buf);
1462 	memset(buf, 0, len);
1463 	buf[0] = 0x70 | (1 << 7);
1464 	buf[2] = p->sense_key;
1465 	buf[7] = 10;
1466 	buf[12] = p->asc;
1467 	write_prdt(p, slot, cfis, buf, len);
1468 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1469 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1470 }
1471 
1472 static void
1473 atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1474 {
1475 	uint8_t *acmd = cfis + 0x40;
1476 	uint32_t tfd;
1477 
1478 	switch (acmd[4] & 3) {
1479 	case 0:
1480 	case 1:
1481 	case 3:
1482 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1483 		tfd = ATA_S_READY | ATA_S_DSC;
1484 		break;
1485 	case 2:
1486 		/* TODO eject media */
1487 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1488 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1489 		p->asc = 0x53;
1490 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1491 		break;
1492 	}
1493 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1494 }
1495 
1496 static void
1497 atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1498 {
1499 	uint8_t *acmd;
1500 	uint32_t tfd;
1501 	uint8_t pc, code;
1502 	unsigned int len;
1503 
1504 	acmd = cfis + 0x40;
1505 	len = be16dec(acmd + 7);
1506 	pc = acmd[2] >> 6;
1507 	code = acmd[2] & 0x3f;
1508 
1509 	switch (pc) {
1510 	case 0:
1511 		switch (code) {
1512 		case MODEPAGE_RW_ERROR_RECOVERY:
1513 		{
1514 			uint8_t buf[16];
1515 
1516 			if (len > sizeof(buf))
1517 				len = sizeof(buf);
1518 
1519 			memset(buf, 0, sizeof(buf));
1520 			be16enc(buf, 16 - 2);
1521 			buf[2] = 0x70;
1522 			buf[8] = 0x01;
1523 			buf[9] = 16 - 10;
1524 			buf[11] = 0x05;
1525 			write_prdt(p, slot, cfis, buf, len);
1526 			tfd = ATA_S_READY | ATA_S_DSC;
1527 			break;
1528 		}
1529 		case MODEPAGE_CD_CAPABILITIES:
1530 		{
1531 			uint8_t buf[30];
1532 
1533 			if (len > sizeof(buf))
1534 				len = sizeof(buf);
1535 
1536 			memset(buf, 0, sizeof(buf));
1537 			be16enc(buf, 30 - 2);
1538 			buf[2] = 0x70;
1539 			buf[8] = 0x2A;
1540 			buf[9] = 30 - 10;
1541 			buf[10] = 0x08;
1542 			buf[12] = 0x71;
1543 			be16enc(&buf[18], 2);
1544 			be16enc(&buf[20], 512);
1545 			write_prdt(p, slot, cfis, buf, len);
1546 			tfd = ATA_S_READY | ATA_S_DSC;
1547 			break;
1548 		}
1549 		default:
1550 			goto error;
1551 			break;
1552 		}
1553 		break;
1554 	case 3:
1555 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1556 		p->asc = 0x39;
1557 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1558 		break;
1559 error:
1560 	case 1:
1561 	case 2:
1562 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1563 		p->asc = 0x24;
1564 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1565 		break;
1566 	}
1567 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1568 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1569 }
1570 
1571 static void
1572 atapi_get_event_status_notification(struct ahci_port *p, int slot,
1573     uint8_t *cfis)
1574 {
1575 	uint8_t *acmd;
1576 	uint32_t tfd;
1577 
1578 	acmd = cfis + 0x40;
1579 
1580 	/* we don't support asynchronous operation */
1581 	if (!(acmd[1] & 1)) {
1582 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1583 		p->asc = 0x24;
1584 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1585 	} else {
1586 		uint8_t buf[8];
1587 		unsigned int len;
1588 
1589 		len = be16dec(acmd + 7);
1590 		if (len > sizeof(buf))
1591 			len = sizeof(buf);
1592 
1593 		memset(buf, 0, sizeof(buf));
1594 		be16enc(buf, 8 - 2);
1595 		buf[2] = 0x04;
1596 		buf[3] = 0x10;
1597 		buf[5] = 0x02;
1598 		write_prdt(p, slot, cfis, buf, len);
1599 		tfd = ATA_S_READY | ATA_S_DSC;
1600 	}
1601 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1602 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1603 }
1604 
1605 static void
1606 handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1607 {
1608 	uint8_t *acmd;
1609 
1610 	acmd = cfis + 0x40;
1611 
1612 #ifdef AHCI_DEBUG
1613 	{
1614 		int i;
1615 		DPRINTF("ACMD:");
1616 		for (i = 0; i < 16; i++)
1617 			DPRINTF("%02x ", acmd[i]);
1618 		DPRINTF("");
1619 	}
1620 #endif
1621 
1622 	switch (acmd[0]) {
1623 	case TEST_UNIT_READY:
1624 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1625 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1626 		break;
1627 	case INQUIRY:
1628 		atapi_inquiry(p, slot, cfis);
1629 		break;
1630 	case READ_CAPACITY:
1631 		atapi_read_capacity(p, slot, cfis);
1632 		break;
1633 	case PREVENT_ALLOW:
1634 		/* TODO */
1635 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1636 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1637 		break;
1638 	case READ_TOC:
1639 		atapi_read_toc(p, slot, cfis);
1640 		break;
1641 	case REPORT_LUNS:
1642 		atapi_report_luns(p, slot, cfis);
1643 		break;
1644 	case READ_10:
1645 	case READ_12:
1646 		atapi_read(p, slot, cfis, 0);
1647 		break;
1648 	case REQUEST_SENSE:
1649 		atapi_request_sense(p, slot, cfis);
1650 		break;
1651 	case START_STOP_UNIT:
1652 		atapi_start_stop_unit(p, slot, cfis);
1653 		break;
1654 	case MODE_SENSE_10:
1655 		atapi_mode_sense(p, slot, cfis);
1656 		break;
1657 	case GET_EVENT_STATUS_NOTIFICATION:
1658 		atapi_get_event_status_notification(p, slot, cfis);
1659 		break;
1660 	default:
1661 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1662 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1663 		p->asc = 0x20;
1664 		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1665 				ATA_S_READY | ATA_S_ERROR);
1666 		break;
1667 	}
1668 }
1669 
1670 static void
1671 ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1672 {
1673 
1674 	p->tfd |= ATA_S_BUSY;
1675 	switch (cfis[2]) {
1676 	case ATA_ATA_IDENTIFY:
1677 		handle_identify(p, slot, cfis);
1678 		break;
1679 	case ATA_SETFEATURES:
1680 	{
1681 		switch (cfis[3]) {
1682 		case ATA_SF_ENAB_SATA_SF:
1683 			switch (cfis[12]) {
1684 			case ATA_SATA_SF_AN:
1685 				p->tfd = ATA_S_DSC | ATA_S_READY;
1686 				break;
1687 			default:
1688 				p->tfd = ATA_S_ERROR | ATA_S_READY;
1689 				p->tfd |= (ATA_ERROR_ABORT << 8);
1690 				break;
1691 			}
1692 			break;
1693 		case ATA_SF_ENAB_WCACHE:
1694 		case ATA_SF_DIS_WCACHE:
1695 		case ATA_SF_ENAB_RCACHE:
1696 		case ATA_SF_DIS_RCACHE:
1697 			p->tfd = ATA_S_DSC | ATA_S_READY;
1698 			break;
1699 		case ATA_SF_SETXFER:
1700 		{
1701 			switch (cfis[12] & 0xf8) {
1702 			case ATA_PIO:
1703 			case ATA_PIO0:
1704 				break;
1705 			case ATA_WDMA0:
1706 			case ATA_UDMA0:
1707 				p->xfermode = (cfis[12] & 0x7);
1708 				break;
1709 			}
1710 			p->tfd = ATA_S_DSC | ATA_S_READY;
1711 			break;
1712 		}
1713 		default:
1714 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1715 			p->tfd |= (ATA_ERROR_ABORT << 8);
1716 			break;
1717 		}
1718 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1719 		break;
1720 	}
1721 	case ATA_SET_MULTI:
1722 		if (cfis[12] != 0 &&
1723 			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1724 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1725 			p->tfd |= (ATA_ERROR_ABORT << 8);
1726 		} else {
1727 			p->mult_sectors = cfis[12];
1728 			p->tfd = ATA_S_DSC | ATA_S_READY;
1729 		}
1730 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1731 		break;
1732 	case ATA_READ:
1733 	case ATA_WRITE:
1734 	case ATA_READ48:
1735 	case ATA_WRITE48:
1736 	case ATA_READ_MUL:
1737 	case ATA_WRITE_MUL:
1738 	case ATA_READ_MUL48:
1739 	case ATA_WRITE_MUL48:
1740 	case ATA_READ_DMA:
1741 	case ATA_WRITE_DMA:
1742 	case ATA_READ_DMA48:
1743 	case ATA_WRITE_DMA48:
1744 	case ATA_READ_FPDMA_QUEUED:
1745 	case ATA_WRITE_FPDMA_QUEUED:
1746 		ahci_handle_rw(p, slot, cfis, 0);
1747 		break;
1748 	case ATA_FLUSHCACHE:
1749 	case ATA_FLUSHCACHE48:
1750 		ahci_handle_flush(p, slot, cfis);
1751 		break;
1752 	case ATA_DATA_SET_MANAGEMENT:
1753 		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1754 		    cfis[13] == 0 && cfis[12] == 1) {
1755 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1756 			break;
1757 		}
1758 		ahci_write_fis_d2h(p, slot, cfis,
1759 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1760 		break;
1761 	case ATA_SEND_FPDMA_QUEUED:
1762 		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1763 		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1764 		    cfis[11] == 0 && cfis[3] == 1) {
1765 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1766 			break;
1767 		}
1768 		ahci_write_fis_d2h(p, slot, cfis,
1769 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1770 		break;
1771 	case ATA_READ_LOG_EXT:
1772 	case ATA_READ_LOG_DMA_EXT:
1773 		ahci_handle_read_log(p, slot, cfis);
1774 		break;
1775 	case ATA_SECURITY_FREEZE_LOCK:
1776 	case ATA_SMART_CMD:
1777 	case ATA_NOP:
1778 		ahci_write_fis_d2h(p, slot, cfis,
1779 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1780 		break;
1781 	case ATA_CHECK_POWER_MODE:
1782 		cfis[12] = 0xff;	/* always on */
1783 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1784 		break;
1785 	case ATA_STANDBY_CMD:
1786 	case ATA_STANDBY_IMMEDIATE:
1787 	case ATA_IDLE_CMD:
1788 	case ATA_IDLE_IMMEDIATE:
1789 	case ATA_SLEEP:
1790 	case ATA_READ_VERIFY:
1791 	case ATA_READ_VERIFY48:
1792 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1793 		break;
1794 	case ATA_ATAPI_IDENTIFY:
1795 		handle_atapi_identify(p, slot, cfis);
1796 		break;
1797 	case ATA_PACKET_CMD:
1798 		if (!p->atapi) {
1799 			ahci_write_fis_d2h(p, slot, cfis,
1800 			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1801 		} else
1802 			handle_packet_cmd(p, slot, cfis);
1803 		break;
1804 	default:
1805 		WPRINTF("Unsupported cmd:%02x", cfis[2]);
1806 		ahci_write_fis_d2h(p, slot, cfis,
1807 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1808 		break;
1809 	}
1810 }
1811 
1812 static void
1813 ahci_handle_slot(struct ahci_port *p, int slot)
1814 {
1815 	struct ahci_cmd_hdr *hdr;
1816 #ifdef AHCI_DEBUG
1817 	struct ahci_prdt_entry *prdt;
1818 #endif
1819 	struct pci_ahci_softc *sc;
1820 	uint8_t *cfis;
1821 #ifdef AHCI_DEBUG
1822 	int cfl, i;
1823 #endif
1824 
1825 	sc = p->pr_sc;
1826 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1827 #ifdef AHCI_DEBUG
1828 	cfl = (hdr->flags & 0x1f) * 4;
1829 #endif
1830 	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1831 			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1832 #ifdef AHCI_DEBUG
1833 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1834 
1835 	DPRINTF("cfis:");
1836 	for (i = 0; i < cfl; i++) {
1837 		if (i % 10 == 0)
1838 			DPRINTF("");
1839 		DPRINTF("%02x ", cfis[i]);
1840 	}
1841 	DPRINTF("");
1842 
1843 	for (i = 0; i < hdr->prdtl; i++) {
1844 		DPRINTF("%d@%08"PRIx64"", prdt->dbc & 0x3fffff, prdt->dba);
1845 		prdt++;
1846 	}
1847 #endif
1848 
1849 	if (cfis[0] != FIS_TYPE_REGH2D) {
1850 		WPRINTF("Not a H2D FIS:%02x", cfis[0]);
1851 		return;
1852 	}
1853 
1854 	if (cfis[1] & 0x80) {
1855 		ahci_handle_cmd(p, slot, cfis);
1856 	} else {
1857 		if (cfis[15] & (1 << 2))
1858 			p->reset = 1;
1859 		else if (p->reset) {
1860 			p->reset = 0;
1861 			ahci_port_reset(p);
1862 		}
1863 		p->ci &= ~(1 << slot);
1864 	}
1865 }
1866 
1867 static void
1868 ahci_handle_port(struct ahci_port *p)
1869 {
1870 
1871 	if (!(p->cmd & AHCI_P_CMD_ST))
1872 		return;
1873 
1874 	/*
1875 	 * Search for any new commands to issue ignoring those that
1876 	 * are already in-flight.  Stop if device is busy or in error.
1877 	 */
1878 	for (; (p->ci & ~p->pending) != 0; p->ccs = ((p->ccs + 1) & 31)) {
1879 		if ((p->tfd & (ATA_S_BUSY | ATA_S_DRQ)) != 0)
1880 			break;
1881 		if (p->waitforclear)
1882 			break;
1883 		if ((p->ci & ~p->pending & (1 << p->ccs)) != 0) {
1884 			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1885 			p->cmd |= p->ccs << AHCI_P_CMD_CCS_SHIFT;
1886 			ahci_handle_slot(p, p->ccs);
1887 		}
1888 	}
1889 }
1890 
1891 /*
1892  * blockif callback routine - this runs in the context of the blockif
1893  * i/o thread, so the mutex needs to be acquired.
1894  */
1895 static void
1896 ata_ioreq_cb(struct blockif_req *br, int err)
1897 {
1898 	struct ahci_cmd_hdr *hdr;
1899 	struct ahci_ioreq *aior;
1900 	struct ahci_port *p;
1901 	struct pci_ahci_softc *sc;
1902 	uint32_t tfd;
1903 	uint8_t *cfis;
1904 	int slot, ncq, dsm;
1905 
1906 	DPRINTF("%s %d", __func__, err);
1907 
1908 	ncq = dsm = 0;
1909 	aior = br->br_param;
1910 	p = aior->io_pr;
1911 	cfis = aior->cfis;
1912 	slot = aior->slot;
1913 	sc = p->pr_sc;
1914 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1915 
1916 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1917 	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1918 	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1919 		ncq = 1;
1920 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1921 	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1922 	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1923 		dsm = 1;
1924 
1925 	pthread_mutex_lock(&sc->mtx);
1926 
1927 	/*
1928 	 * Delete the blockif request from the busy list
1929 	 */
1930 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1931 
1932 	/*
1933 	 * Move the blockif request back to the free list
1934 	 */
1935 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1936 
1937 	if (!err)
1938 		hdr->prdbc = aior->done;
1939 
1940 	if (!err && aior->more) {
1941 		if (dsm)
1942 			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1943 		else
1944 			ahci_handle_rw(p, slot, cfis, aior->done);
1945 		goto out;
1946 	}
1947 
1948 	if (!err)
1949 		tfd = ATA_S_READY | ATA_S_DSC;
1950 	else
1951 		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1952 	if (ncq)
1953 		ahci_write_fis_sdb(p, slot, cfis, tfd);
1954 	else
1955 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1956 
1957 	/*
1958 	 * This command is now complete.
1959 	 */
1960 	p->pending &= ~(1 << slot);
1961 
1962 	ahci_check_stopped(p);
1963 	ahci_handle_port(p);
1964 out:
1965 	pthread_mutex_unlock(&sc->mtx);
1966 	DPRINTF("%s exit", __func__);
1967 }
1968 
1969 static void
1970 atapi_ioreq_cb(struct blockif_req *br, int err)
1971 {
1972 	struct ahci_cmd_hdr *hdr;
1973 	struct ahci_ioreq *aior;
1974 	struct ahci_port *p;
1975 	struct pci_ahci_softc *sc;
1976 	uint8_t *cfis;
1977 	uint32_t tfd;
1978 	int slot;
1979 
1980 	DPRINTF("%s %d", __func__, err);
1981 
1982 	aior = br->br_param;
1983 	p = aior->io_pr;
1984 	cfis = aior->cfis;
1985 	slot = aior->slot;
1986 	sc = p->pr_sc;
1987 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1988 
1989 	pthread_mutex_lock(&sc->mtx);
1990 
1991 	/*
1992 	 * Delete the blockif request from the busy list
1993 	 */
1994 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1995 
1996 	/*
1997 	 * Move the blockif request back to the free list
1998 	 */
1999 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
2000 
2001 	if (!err)
2002 		hdr->prdbc = aior->done;
2003 
2004 	if (!err && aior->more) {
2005 		atapi_read(p, slot, cfis, aior->done);
2006 		goto out;
2007 	}
2008 
2009 	if (!err) {
2010 		tfd = ATA_S_READY | ATA_S_DSC;
2011 	} else {
2012 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
2013 		p->asc = 0x21;
2014 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
2015 	}
2016 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
2017 	ahci_write_fis_d2h(p, slot, cfis, tfd);
2018 
2019 	/*
2020 	 * This command is now complete.
2021 	 */
2022 	p->pending &= ~(1 << slot);
2023 
2024 	ahci_check_stopped(p);
2025 	ahci_handle_port(p);
2026 out:
2027 	pthread_mutex_unlock(&sc->mtx);
2028 	DPRINTF("%s exit", __func__);
2029 }
2030 
2031 static void
2032 pci_ahci_ioreq_init(struct ahci_port *pr)
2033 {
2034 	struct ahci_ioreq *vr;
2035 	int i;
2036 
2037 	pr->ioqsz = blockif_queuesz(pr->bctx);
2038 	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
2039 	STAILQ_INIT(&pr->iofhd);
2040 
2041 	/*
2042 	 * Add all i/o request entries to the free queue
2043 	 */
2044 	for (i = 0; i < pr->ioqsz; i++) {
2045 		vr = &pr->ioreq[i];
2046 		vr->io_pr = pr;
2047 		if (!pr->atapi)
2048 			vr->io_req.br_callback = ata_ioreq_cb;
2049 		else
2050 			vr->io_req.br_callback = atapi_ioreq_cb;
2051 		vr->io_req.br_param = vr;
2052 		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
2053 	}
2054 
2055 	TAILQ_INIT(&pr->iobhd);
2056 }
2057 
2058 static void
2059 pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2060 {
2061 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2062 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2063 	struct ahci_port *p = &sc->port[port];
2064 
2065 	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"",
2066 		port, offset, value);
2067 
2068 	switch (offset) {
2069 	case AHCI_P_CLB:
2070 		p->clb = value;
2071 		break;
2072 	case AHCI_P_CLBU:
2073 		p->clbu = value;
2074 		break;
2075 	case AHCI_P_FB:
2076 		p->fb = value;
2077 		break;
2078 	case AHCI_P_FBU:
2079 		p->fbu = value;
2080 		break;
2081 	case AHCI_P_IS:
2082 		p->is &= ~value;
2083 		ahci_port_intr(p);
2084 		break;
2085 	case AHCI_P_IE:
2086 		p->ie = value & 0xFDC000FF;
2087 		ahci_port_intr(p);
2088 		break;
2089 	case AHCI_P_CMD:
2090 	{
2091 		p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2092 		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2093 		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2094 		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK);
2095 		p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2096 		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2097 		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2098 		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value;
2099 
2100 		if (!(value & AHCI_P_CMD_ST)) {
2101 			ahci_port_stop(p);
2102 		} else {
2103 			uint64_t clb;
2104 
2105 			p->cmd |= AHCI_P_CMD_CR;
2106 			clb = (uint64_t)p->clbu << 32 | p->clb;
2107 			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
2108 					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
2109 		}
2110 
2111 		if (value & AHCI_P_CMD_FRE) {
2112 			uint64_t fb;
2113 
2114 			p->cmd |= AHCI_P_CMD_FR;
2115 			fb = (uint64_t)p->fbu << 32 | p->fb;
2116 			/* we don't support FBSCP, so rfis size is 256Bytes */
2117 			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
2118 		} else {
2119 			p->cmd &= ~AHCI_P_CMD_FR;
2120 		}
2121 
2122 		if (value & AHCI_P_CMD_CLO) {
2123 			p->tfd &= ~(ATA_S_BUSY | ATA_S_DRQ);
2124 			p->cmd &= ~AHCI_P_CMD_CLO;
2125 		}
2126 
2127 		if (value & AHCI_P_CMD_ICC_MASK) {
2128 			p->cmd &= ~AHCI_P_CMD_ICC_MASK;
2129 		}
2130 
2131 		ahci_handle_port(p);
2132 		break;
2133 	}
2134 	case AHCI_P_TFD:
2135 	case AHCI_P_SIG:
2136 	case AHCI_P_SSTS:
2137 		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"", offset);
2138 		break;
2139 	case AHCI_P_SCTL:
2140 		p->sctl = value;
2141 		if (!(p->cmd & AHCI_P_CMD_ST)) {
2142 			if (value & ATA_SC_DET_RESET)
2143 				ahci_port_reset(p);
2144 		}
2145 		break;
2146 	case AHCI_P_SERR:
2147 		p->serr &= ~value;
2148 		break;
2149 	case AHCI_P_SACT:
2150 		p->sact |= value;
2151 		break;
2152 	case AHCI_P_CI:
2153 		p->ci |= value;
2154 		ahci_handle_port(p);
2155 		break;
2156 	case AHCI_P_SNTF:
2157 	case AHCI_P_FBS:
2158 	default:
2159 		break;
2160 	}
2161 }
2162 
2163 static void
2164 pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2165 {
2166 	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"",
2167 		offset, value);
2168 
2169 	switch (offset) {
2170 	case AHCI_CAP:
2171 	case AHCI_PI:
2172 	case AHCI_VS:
2173 	case AHCI_CAP2:
2174 		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"", offset);
2175 		break;
2176 	case AHCI_GHC:
2177 		if (value & AHCI_GHC_HR) {
2178 			ahci_reset(sc);
2179 			break;
2180 		}
2181 		if (value & AHCI_GHC_IE)
2182 			sc->ghc |= AHCI_GHC_IE;
2183 		else
2184 			sc->ghc &= ~AHCI_GHC_IE;
2185 		ahci_generate_intr(sc, 0xffffffff);
2186 		break;
2187 	case AHCI_IS:
2188 		sc->is &= ~value;
2189 		ahci_generate_intr(sc, value);
2190 		break;
2191 	default:
2192 		break;
2193 	}
2194 }
2195 
2196 static void
2197 pci_ahci_write(struct pci_devinst *pi, int baridx, uint64_t offset, int size,
2198     uint64_t value)
2199 {
2200 	struct pci_ahci_softc *sc = pi->pi_arg;
2201 
2202 	assert(baridx == 5);
2203 	assert((offset % 4) == 0 && size == 4);
2204 
2205 	pthread_mutex_lock(&sc->mtx);
2206 
2207 	if (offset < AHCI_OFFSET)
2208 		pci_ahci_host_write(sc, offset, value);
2209 	else if (offset < (uint64_t)AHCI_OFFSET + sc->ports * AHCI_STEP)
2210 		pci_ahci_port_write(sc, offset, value);
2211 	else
2212 		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"", offset);
2213 
2214 	pthread_mutex_unlock(&sc->mtx);
2215 }
2216 
2217 static uint64_t
2218 pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2219 {
2220 	uint32_t value;
2221 
2222 	switch (offset) {
2223 	case AHCI_CAP:
2224 	case AHCI_GHC:
2225 	case AHCI_IS:
2226 	case AHCI_PI:
2227 	case AHCI_VS:
2228 	case AHCI_CCCC:
2229 	case AHCI_CCCP:
2230 	case AHCI_EM_LOC:
2231 	case AHCI_EM_CTL:
2232 	case AHCI_CAP2:
2233 	{
2234 		uint32_t *p = &sc->cap;
2235 		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2236 		value = *p;
2237 		break;
2238 	}
2239 	default:
2240 		value = 0;
2241 		break;
2242 	}
2243 	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x",
2244 		offset, value);
2245 
2246 	return (value);
2247 }
2248 
2249 static uint64_t
2250 pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2251 {
2252 	uint32_t value;
2253 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2254 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2255 
2256 	switch (offset) {
2257 	case AHCI_P_CLB:
2258 	case AHCI_P_CLBU:
2259 	case AHCI_P_FB:
2260 	case AHCI_P_FBU:
2261 	case AHCI_P_IS:
2262 	case AHCI_P_IE:
2263 	case AHCI_P_CMD:
2264 	case AHCI_P_TFD:
2265 	case AHCI_P_SIG:
2266 	case AHCI_P_SSTS:
2267 	case AHCI_P_SCTL:
2268 	case AHCI_P_SERR:
2269 	case AHCI_P_SACT:
2270 	case AHCI_P_CI:
2271 	case AHCI_P_SNTF:
2272 	case AHCI_P_FBS:
2273 	{
2274 		uint32_t *p= &sc->port[port].clb;
2275 		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2276 		value = *p;
2277 		break;
2278 	}
2279 	default:
2280 		value = 0;
2281 		break;
2282 	}
2283 
2284 	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x",
2285 		port, offset, value);
2286 
2287 	return value;
2288 }
2289 
2290 static uint64_t
2291 pci_ahci_read(struct pci_devinst *pi, int baridx, uint64_t regoff, int size)
2292 {
2293 	struct pci_ahci_softc *sc = pi->pi_arg;
2294 	uint64_t offset;
2295 	uint32_t value;
2296 
2297 	assert(baridx == 5);
2298 	assert(size == 1 || size == 2 || size == 4);
2299 	assert((regoff & (size - 1)) == 0);
2300 
2301 	pthread_mutex_lock(&sc->mtx);
2302 
2303 	offset = regoff & ~0x3;	    /* round down to a multiple of 4 bytes */
2304 	if (offset < AHCI_OFFSET)
2305 		value = pci_ahci_host_read(sc, offset);
2306 	else if (offset < (uint64_t)AHCI_OFFSET + sc->ports * AHCI_STEP)
2307 		value = pci_ahci_port_read(sc, offset);
2308 	else {
2309 		value = 0;
2310 		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"",
2311 		    regoff);
2312 	}
2313 	value >>= 8 * (regoff & 0x3);
2314 
2315 	pthread_mutex_unlock(&sc->mtx);
2316 
2317 	return (value);
2318 }
2319 
2320 /*
2321  * Each AHCI controller has a "port" node which contains nodes for
2322  * each port named after the decimal number of the port (no leading
2323  * zeroes).  Port nodes contain a "type" ("hd" or "cd"), as well as
2324  * options for blockif.  For example:
2325  *
2326  * pci.0.1.0
2327  *          .device="ahci"
2328  *          .port
2329  *               .0
2330  *                 .type="hd"
2331  *                 .path="/path/to/image"
2332  */
2333 static int
2334 pci_ahci_legacy_config_port(nvlist_t *nvl, int port, const char *type,
2335     const char *opts)
2336 {
2337 	char node_name[sizeof("XX")];
2338 	nvlist_t *port_nvl;
2339 
2340 	snprintf(node_name, sizeof(node_name), "%d", port);
2341 	port_nvl = create_relative_config_node(nvl, node_name);
2342 	set_config_value_node(port_nvl, "type", type);
2343 	return (blockif_legacy_config(port_nvl, opts));
2344 }
2345 
2346 static int
2347 pci_ahci_legacy_config(nvlist_t *nvl, const char *opts)
2348 {
2349 	nvlist_t *ports_nvl;
2350 	const char *type;
2351 	char *next, *next2, *str, *tofree;
2352 	int p, ret;
2353 
2354 	if (opts == NULL)
2355 		return (0);
2356 
2357 	ports_nvl = create_relative_config_node(nvl, "port");
2358 	ret = 1;
2359 	tofree = str = strdup(opts);
2360 	for (p = 0; p < MAX_PORTS && str != NULL; p++, str = next) {
2361 		/* Identify and cut off type of present port. */
2362 		if (strncmp(str, "hd:", 3) == 0) {
2363 			type = "hd";
2364 			str += 3;
2365 		} else if (strncmp(str, "cd:", 3) == 0) {
2366 			type = "cd";
2367 			str += 3;
2368 		} else
2369 			type = NULL;
2370 
2371 		/* Find and cut off the next port options. */
2372 		next = strstr(str, ",hd:");
2373 		next2 = strstr(str, ",cd:");
2374 		if (next == NULL || (next2 != NULL && next2 < next))
2375 			next = next2;
2376 		if (next != NULL) {
2377 			next[0] = 0;
2378 			next++;
2379 		}
2380 
2381 		if (str[0] == 0)
2382 			continue;
2383 
2384 		if (type == NULL) {
2385 			EPRINTLN("Missing or invalid type for port %d: \"%s\"",
2386 			    p, str);
2387 			goto out;
2388 		}
2389 
2390 		if (pci_ahci_legacy_config_port(ports_nvl, p, type, str) != 0)
2391 			goto out;
2392 	}
2393 	ret = 0;
2394 out:
2395 	free(tofree);
2396 	return (ret);
2397 }
2398 
2399 static int
2400 pci_ahci_cd_legacy_config(nvlist_t *nvl, const char *opts)
2401 {
2402 	nvlist_t *ports_nvl;
2403 
2404 	ports_nvl = create_relative_config_node(nvl, "port");
2405 	return (pci_ahci_legacy_config_port(ports_nvl, 0, "cd", opts));
2406 }
2407 
2408 static int
2409 pci_ahci_hd_legacy_config(nvlist_t *nvl, const char *opts)
2410 {
2411 	nvlist_t *ports_nvl;
2412 
2413 	ports_nvl = create_relative_config_node(nvl, "port");
2414 	return (pci_ahci_legacy_config_port(ports_nvl, 0, "hd", opts));
2415 }
2416 
2417 static int
2418 pci_ahci_init(struct pci_devinst *pi, nvlist_t *nvl)
2419 {
2420 	char bident[sizeof("XXX:XXX:XXX")];
2421 	char node_name[sizeof("XX")];
2422 	struct blockif_ctxt *bctxt;
2423 	struct pci_ahci_softc *sc;
2424 	int atapi, ret, slots, p;
2425 	MD5_CTX mdctx;
2426 	u_char digest[16];
2427 	const char *path, *type, *value;
2428 	nvlist_t *ports_nvl, *port_nvl;
2429 
2430 	ret = 0;
2431 
2432 #ifdef AHCI_DEBUG
2433 	dbg = fopen("/tmp/log", "w+");
2434 #endif
2435 
2436 	sc = calloc(1, sizeof(struct pci_ahci_softc));
2437 	pi->pi_arg = sc;
2438 	sc->asc_pi = pi;
2439 	pthread_mutex_init(&sc->mtx, NULL);
2440 	sc->ports = 0;
2441 	sc->pi = 0;
2442 	slots = 32;
2443 
2444 	ports_nvl = find_relative_config_node(nvl, "port");
2445 	for (p = 0; ports_nvl != NULL && p < MAX_PORTS; p++) {
2446 		struct ata_params *ata_ident = &sc->port[p].ata_ident;
2447 		char ident[AHCI_PORT_IDENT];
2448 
2449 		snprintf(node_name, sizeof(node_name), "%d", p);
2450 		port_nvl = find_relative_config_node(ports_nvl, node_name);
2451 		if (port_nvl == NULL)
2452 			continue;
2453 
2454 		type = get_config_value_node(port_nvl, "type");
2455 		if (type == NULL)
2456 			continue;
2457 
2458 		if (strcmp(type, "hd") == 0)
2459 			atapi = 0;
2460 		else
2461 			atapi = 1;
2462 
2463 		/*
2464 		 * Attempt to open the backing image. Use the PCI slot/func
2465 		 * and the port number for the identifier string.
2466 		 */
2467 		snprintf(bident, sizeof(bident), "%u:%u:%u", pi->pi_slot,
2468 		    pi->pi_func, p);
2469 
2470 		bctxt = blockif_open(port_nvl, bident);
2471 		if (bctxt == NULL) {
2472 			sc->ports = p;
2473 			ret = 1;
2474 			goto open_fail;
2475 		}
2476 
2477 		ret = blockif_add_boot_device(pi, bctxt);
2478 		if (ret) {
2479 			sc->ports = p;
2480 			goto open_fail;
2481 		}
2482 
2483 		sc->port[p].bctx = bctxt;
2484 		sc->port[p].pr_sc = sc;
2485 		sc->port[p].port = p;
2486 		sc->port[p].atapi = atapi;
2487 
2488 		/*
2489 		 * Create an identifier for the backing file.
2490 		 * Use parts of the md5 sum of the filename
2491 		 */
2492 		path = get_config_value_node(port_nvl, "path");
2493 		MD5Init(&mdctx);
2494 		MD5Update(&mdctx, path, strlen(path));
2495 		MD5Final(digest, &mdctx);
2496 		snprintf(ident, AHCI_PORT_IDENT,
2497 			"BHYVE-%02X%02X-%02X%02X-%02X%02X",
2498 			digest[0], digest[1], digest[2], digest[3], digest[4],
2499 			digest[5]);
2500 
2501 		memset(ata_ident, 0, sizeof(struct ata_params));
2502 		ata_string((uint8_t*)&ata_ident->serial, ident, 20);
2503 		ata_string((uint8_t*)&ata_ident->revision, "001", 8);
2504 		if (atapi)
2505 			ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DVD ROM", 40);
2506 		else
2507 			ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DISK", 40);
2508 		value = get_config_value_node(port_nvl, "nmrr");
2509 		if (value != NULL)
2510 			ata_ident->media_rotation_rate = atoi(value);
2511 		value = get_config_value_node(port_nvl, "ser");
2512 		if (value != NULL)
2513 			ata_string((uint8_t*)(&ata_ident->serial), value, 20);
2514 		value = get_config_value_node(port_nvl, "rev");
2515 		if (value != NULL)
2516 			ata_string((uint8_t*)(&ata_ident->revision), value, 8);
2517 		value = get_config_value_node(port_nvl, "model");
2518 		if (value != NULL)
2519 			ata_string((uint8_t*)(&ata_ident->model), value, 40);
2520 		ata_identify_init(&sc->port[p], atapi);
2521 
2522 		/*
2523 		 * Allocate blockif request structures and add them
2524 		 * to the free list
2525 		 */
2526 		pci_ahci_ioreq_init(&sc->port[p]);
2527 
2528 		sc->pi |= (1 << p);
2529 		if (sc->port[p].ioqsz < slots)
2530 			slots = sc->port[p].ioqsz;
2531 	}
2532 	sc->ports = p;
2533 
2534 	/* Intel ICH8 AHCI */
2535 	--slots;
2536 	if (sc->ports < DEF_PORTS)
2537 		sc->ports = DEF_PORTS;
2538 	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2539 	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2540 	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2541 	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2542 	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2543 
2544 	sc->vs = 0x10300;
2545 	sc->cap2 = AHCI_CAP2_APST;
2546 	ahci_reset(sc);
2547 
2548 	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2549 	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2550 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2551 	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2552 	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2553 	p = MIN(sc->ports, 16);
2554 	p = flsl(p) - ((p & (p - 1)) ? 0 : 1);
2555 	pci_emul_add_msicap(pi, 1 << p);
2556 	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2557 	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2558 
2559 	pci_lintr_request(pi);
2560 
2561 open_fail:
2562 	if (ret) {
2563 		for (p = 0; p < sc->ports; p++) {
2564 			if (sc->port[p].bctx != NULL)
2565 				blockif_close(sc->port[p].bctx);
2566 		}
2567 		free(sc);
2568 	}
2569 
2570 	return (ret);
2571 }
2572 
2573 #ifdef BHYVE_SNAPSHOT
2574 static int
2575 pci_ahci_snapshot(struct vm_snapshot_meta *meta)
2576 {
2577 	int i, ret;
2578 	void *bctx;
2579 	struct pci_devinst *pi;
2580 	struct pci_ahci_softc *sc;
2581 	struct ahci_port *port;
2582 
2583 	pi = meta->dev_data;
2584 	sc = pi->pi_arg;
2585 
2586 	/* TODO: add mtx lock/unlock */
2587 
2588 	SNAPSHOT_VAR_OR_LEAVE(sc->ports, meta, ret, done);
2589 	SNAPSHOT_VAR_OR_LEAVE(sc->cap, meta, ret, done);
2590 	SNAPSHOT_VAR_OR_LEAVE(sc->ghc, meta, ret, done);
2591 	SNAPSHOT_VAR_OR_LEAVE(sc->is, meta, ret, done);
2592 	SNAPSHOT_VAR_OR_LEAVE(sc->pi, meta, ret, done);
2593 	SNAPSHOT_VAR_OR_LEAVE(sc->vs, meta, ret, done);
2594 	SNAPSHOT_VAR_OR_LEAVE(sc->ccc_ctl, meta, ret, done);
2595 	SNAPSHOT_VAR_OR_LEAVE(sc->ccc_pts, meta, ret, done);
2596 	SNAPSHOT_VAR_OR_LEAVE(sc->em_loc, meta, ret, done);
2597 	SNAPSHOT_VAR_OR_LEAVE(sc->em_ctl, meta, ret, done);
2598 	SNAPSHOT_VAR_OR_LEAVE(sc->cap2, meta, ret, done);
2599 	SNAPSHOT_VAR_OR_LEAVE(sc->bohc, meta, ret, done);
2600 	SNAPSHOT_VAR_OR_LEAVE(sc->lintr, meta, ret, done);
2601 
2602 	for (i = 0; i < MAX_PORTS; i++) {
2603 		port = &sc->port[i];
2604 
2605 		if (meta->op == VM_SNAPSHOT_SAVE)
2606 			bctx = port->bctx;
2607 
2608 		SNAPSHOT_VAR_OR_LEAVE(bctx, meta, ret, done);
2609 		SNAPSHOT_VAR_OR_LEAVE(port->port, meta, ret, done);
2610 
2611 		/* Mostly for restore; save is ensured by the lines above. */
2612 		if (((bctx == NULL) && (port->bctx != NULL)) ||
2613 		    ((bctx != NULL) && (port->bctx == NULL))) {
2614 			fprintf(stderr, "%s: ports not matching\r\n", __func__);
2615 			ret = EINVAL;
2616 			goto done;
2617 		}
2618 
2619 		if (port->bctx == NULL)
2620 			continue;
2621 
2622 		if (port->port != i) {
2623 			fprintf(stderr, "%s: ports not matching: "
2624 					"actual: %d expected: %d\r\n",
2625 					__func__, port->port, i);
2626 			ret = EINVAL;
2627 			goto done;
2628 		}
2629 
2630 		SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(pi->pi_vmctx, port->cmd_lst,
2631 			AHCI_CL_SIZE * AHCI_MAX_SLOTS, false, meta, ret, done);
2632 		SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(pi->pi_vmctx, port->rfis, 256,
2633 		    false, meta, ret, done);
2634 
2635 		SNAPSHOT_VAR_OR_LEAVE(port->ata_ident, meta, ret, done);
2636 		SNAPSHOT_VAR_OR_LEAVE(port->atapi, meta, ret, done);
2637 		SNAPSHOT_VAR_OR_LEAVE(port->reset, meta, ret, done);
2638 		SNAPSHOT_VAR_OR_LEAVE(port->waitforclear, meta, ret, done);
2639 		SNAPSHOT_VAR_OR_LEAVE(port->mult_sectors, meta, ret, done);
2640 		SNAPSHOT_VAR_OR_LEAVE(port->xfermode, meta, ret, done);
2641 		SNAPSHOT_VAR_OR_LEAVE(port->err_cfis, meta, ret, done);
2642 		SNAPSHOT_VAR_OR_LEAVE(port->sense_key, meta, ret, done);
2643 		SNAPSHOT_VAR_OR_LEAVE(port->asc, meta, ret, done);
2644 		SNAPSHOT_VAR_OR_LEAVE(port->ccs, meta, ret, done);
2645 		SNAPSHOT_VAR_OR_LEAVE(port->pending, meta, ret, done);
2646 
2647 		SNAPSHOT_VAR_OR_LEAVE(port->clb, meta, ret, done);
2648 		SNAPSHOT_VAR_OR_LEAVE(port->clbu, meta, ret, done);
2649 		SNAPSHOT_VAR_OR_LEAVE(port->fb, meta, ret, done);
2650 		SNAPSHOT_VAR_OR_LEAVE(port->fbu, meta, ret, done);
2651 		SNAPSHOT_VAR_OR_LEAVE(port->ie, meta, ret, done);
2652 		SNAPSHOT_VAR_OR_LEAVE(port->cmd, meta, ret, done);
2653 		SNAPSHOT_VAR_OR_LEAVE(port->unused0, meta, ret, done);
2654 		SNAPSHOT_VAR_OR_LEAVE(port->tfd, meta, ret, done);
2655 		SNAPSHOT_VAR_OR_LEAVE(port->sig, meta, ret, done);
2656 		SNAPSHOT_VAR_OR_LEAVE(port->ssts, meta, ret, done);
2657 		SNAPSHOT_VAR_OR_LEAVE(port->sctl, meta, ret, done);
2658 		SNAPSHOT_VAR_OR_LEAVE(port->serr, meta, ret, done);
2659 		SNAPSHOT_VAR_OR_LEAVE(port->sact, meta, ret, done);
2660 		SNAPSHOT_VAR_OR_LEAVE(port->ci, meta, ret, done);
2661 		SNAPSHOT_VAR_OR_LEAVE(port->sntf, meta, ret, done);
2662 		SNAPSHOT_VAR_OR_LEAVE(port->fbs, meta, ret, done);
2663 		SNAPSHOT_VAR_OR_LEAVE(port->ioqsz, meta, ret, done);
2664 
2665 		assert(TAILQ_EMPTY(&port->iobhd));
2666 	}
2667 
2668 done:
2669 	return (ret);
2670 }
2671 
2672 static int
2673 pci_ahci_pause(struct pci_devinst *pi)
2674 {
2675 	struct pci_ahci_softc *sc;
2676 	struct blockif_ctxt *bctxt;
2677 	int i;
2678 
2679 	sc = pi->pi_arg;
2680 
2681 	for (i = 0; i < MAX_PORTS; i++) {
2682 		bctxt = sc->port[i].bctx;
2683 		if (bctxt == NULL)
2684 			continue;
2685 
2686 		blockif_pause(bctxt);
2687 	}
2688 
2689 	return (0);
2690 }
2691 
2692 static int
2693 pci_ahci_resume(struct pci_devinst *pi)
2694 {
2695 	struct pci_ahci_softc *sc;
2696 	struct blockif_ctxt *bctxt;
2697 	int i;
2698 
2699 	sc = pi->pi_arg;
2700 
2701 	for (i = 0; i < MAX_PORTS; i++) {
2702 		bctxt = sc->port[i].bctx;
2703 		if (bctxt == NULL)
2704 			continue;
2705 
2706 		blockif_resume(bctxt);
2707 	}
2708 
2709 	return (0);
2710 }
2711 #endif	/* BHYVE_SNAPSHOT */
2712 
2713 /*
2714  * Use separate emulation names to distinguish drive and atapi devices
2715  */
2716 static const struct pci_devemu pci_de_ahci = {
2717 	.pe_emu =	"ahci",
2718 	.pe_init =	pci_ahci_init,
2719 	.pe_legacy_config = pci_ahci_legacy_config,
2720 	.pe_barwrite =	pci_ahci_write,
2721 	.pe_barread =	pci_ahci_read,
2722 #ifdef BHYVE_SNAPSHOT
2723 	.pe_snapshot =	pci_ahci_snapshot,
2724 	.pe_pause =	pci_ahci_pause,
2725 	.pe_resume =	pci_ahci_resume,
2726 #endif
2727 };
2728 PCI_EMUL_SET(pci_de_ahci);
2729 
2730 static const struct pci_devemu pci_de_ahci_hd = {
2731 	.pe_emu =	"ahci-hd",
2732 	.pe_legacy_config = pci_ahci_hd_legacy_config,
2733 	.pe_alias =	"ahci",
2734 };
2735 PCI_EMUL_SET(pci_de_ahci_hd);
2736 
2737 static const struct pci_devemu pci_de_ahci_cd = {
2738 	.pe_emu =	"ahci-cd",
2739 	.pe_legacy_config = pci_ahci_cd_legacy_config,
2740 	.pe_alias =	"ahci",
2741 };
2742 PCI_EMUL_SET(pci_de_ahci_cd);
2743