xref: /freebsd/usr.sbin/bhyve/pci_ahci.c (revision ec0e626bafb335b30c499d06066997f54b10c092)
1 /*-
2  * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/linker_set.h>
34 #include <sys/stat.h>
35 #include <sys/uio.h>
36 #include <sys/ioctl.h>
37 #include <sys/disk.h>
38 #include <sys/ata.h>
39 #include <sys/endian.h>
40 
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <stdint.h>
46 #include <string.h>
47 #include <strings.h>
48 #include <unistd.h>
49 #include <assert.h>
50 #include <pthread.h>
51 #include <pthread_np.h>
52 #include <inttypes.h>
53 #include <md5.h>
54 
55 #include "bhyverun.h"
56 #include "pci_emul.h"
57 #include "ahci.h"
58 #include "block_if.h"
59 
60 #define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
61 
62 #define	PxSIG_ATA	0x00000101 /* ATA drive */
63 #define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
64 
65 enum sata_fis_type {
66 	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
67 	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
68 	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
69 	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
70 	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
71 	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
72 	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
73 	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
74 };
75 
76 /*
77  * SCSI opcodes
78  */
79 #define	TEST_UNIT_READY		0x00
80 #define	REQUEST_SENSE		0x03
81 #define	INQUIRY			0x12
82 #define	START_STOP_UNIT		0x1B
83 #define	PREVENT_ALLOW		0x1E
84 #define	READ_CAPACITY		0x25
85 #define	READ_10			0x28
86 #define	POSITION_TO_ELEMENT	0x2B
87 #define	READ_TOC		0x43
88 #define	GET_EVENT_STATUS_NOTIFICATION 0x4A
89 #define	MODE_SENSE_10		0x5A
90 #define	REPORT_LUNS		0xA0
91 #define	READ_12			0xA8
92 #define	READ_CD			0xBE
93 
94 /*
95  * SCSI mode page codes
96  */
97 #define	MODEPAGE_RW_ERROR_RECOVERY	0x01
98 #define	MODEPAGE_CD_CAPABILITIES	0x2A
99 
100 /*
101  * ATA commands
102  */
103 #define	ATA_SF_ENAB_SATA_SF		0x10
104 #define		ATA_SATA_SF_AN		0x05
105 #define	ATA_SF_DIS_SATA_SF		0x90
106 
107 /*
108  * Debug printf
109  */
110 #ifdef AHCI_DEBUG
111 static FILE *dbg;
112 #define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
113 #else
114 #define DPRINTF(format, arg...)
115 #endif
116 #define WPRINTF(format, arg...) printf(format, ##arg)
117 
118 struct ahci_ioreq {
119 	struct blockif_req io_req;
120 	struct ahci_port *io_pr;
121 	STAILQ_ENTRY(ahci_ioreq) io_flist;
122 	TAILQ_ENTRY(ahci_ioreq) io_blist;
123 	uint8_t *cfis;
124 	uint32_t len;
125 	uint32_t done;
126 	int slot;
127 	int prdtl;
128 };
129 
130 struct ahci_port {
131 	struct blockif_ctxt *bctx;
132 	struct pci_ahci_softc *pr_sc;
133 	uint8_t *cmd_lst;
134 	uint8_t *rfis;
135 	char ident[20 + 1];
136 	int atapi;
137 	int reset;
138 	int mult_sectors;
139 	uint8_t xfermode;
140 	uint8_t err_cfis[20];
141 	uint8_t sense_key;
142 	uint8_t asc;
143 	uint32_t pending;
144 
145 	uint32_t clb;
146 	uint32_t clbu;
147 	uint32_t fb;
148 	uint32_t fbu;
149 	uint32_t is;
150 	uint32_t ie;
151 	uint32_t cmd;
152 	uint32_t unused0;
153 	uint32_t tfd;
154 	uint32_t sig;
155 	uint32_t ssts;
156 	uint32_t sctl;
157 	uint32_t serr;
158 	uint32_t sact;
159 	uint32_t ci;
160 	uint32_t sntf;
161 	uint32_t fbs;
162 
163 	/*
164 	 * i/o request info
165 	 */
166 	struct ahci_ioreq *ioreq;
167 	int ioqsz;
168 	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
169 	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
170 };
171 
172 struct ahci_cmd_hdr {
173 	uint16_t flags;
174 	uint16_t prdtl;
175 	uint32_t prdbc;
176 	uint64_t ctba;
177 	uint32_t reserved[4];
178 };
179 
180 struct ahci_prdt_entry {
181 	uint64_t dba;
182 	uint32_t reserved;
183 #define	DBCMASK		0x3fffff
184 	uint32_t dbc;
185 };
186 
187 struct pci_ahci_softc {
188 	struct pci_devinst *asc_pi;
189 	pthread_mutex_t	mtx;
190 	int ports;
191 	uint32_t cap;
192 	uint32_t ghc;
193 	uint32_t is;
194 	uint32_t pi;
195 	uint32_t vs;
196 	uint32_t ccc_ctl;
197 	uint32_t ccc_pts;
198 	uint32_t em_loc;
199 	uint32_t em_ctl;
200 	uint32_t cap2;
201 	uint32_t bohc;
202 	uint32_t lintr;
203 	struct ahci_port port[MAX_PORTS];
204 };
205 #define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
206 
207 static inline void lba_to_msf(uint8_t *buf, int lba)
208 {
209 	lba += 150;
210 	buf[0] = (lba / 75) / 60;
211 	buf[1] = (lba / 75) % 60;
212 	buf[2] = lba % 75;
213 }
214 
215 /*
216  * generate HBA intr depending on whether or not ports within
217  * the controller have an interrupt pending.
218  */
219 static void
220 ahci_generate_intr(struct pci_ahci_softc *sc)
221 {
222 	struct pci_devinst *pi;
223 	int i;
224 
225 	pi = sc->asc_pi;
226 
227 	for (i = 0; i < sc->ports; i++) {
228 		struct ahci_port *pr;
229 		pr = &sc->port[i];
230 		if (pr->is & pr->ie)
231 			sc->is |= (1 << i);
232 	}
233 
234 	DPRINTF("%s %x\n", __func__, sc->is);
235 
236 	if (sc->is && (sc->ghc & AHCI_GHC_IE)) {
237 		if (pci_msi_enabled(pi)) {
238 			/*
239 			 * Generate an MSI interrupt on every edge
240 			 */
241 			pci_generate_msi(pi, 0);
242 		} else if (!sc->lintr) {
243 			/*
244 			 * Only generate a pin-based interrupt if one wasn't
245 			 * in progress
246 			 */
247 			sc->lintr = 1;
248 			pci_lintr_assert(pi);
249 		}
250 	} else if (sc->lintr) {
251 		/*
252 		 * No interrupts: deassert pin-based signal if it had
253 		 * been asserted
254 		 */
255 		pci_lintr_deassert(pi);
256 		sc->lintr = 0;
257 	}
258 }
259 
260 static void
261 ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
262 {
263 	int offset, len, irq;
264 
265 	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
266 		return;
267 
268 	switch (ft) {
269 	case FIS_TYPE_REGD2H:
270 		offset = 0x40;
271 		len = 20;
272 		irq = AHCI_P_IX_DHR;
273 		break;
274 	case FIS_TYPE_SETDEVBITS:
275 		offset = 0x58;
276 		len = 8;
277 		irq = AHCI_P_IX_SDB;
278 		break;
279 	case FIS_TYPE_PIOSETUP:
280 		offset = 0x20;
281 		len = 20;
282 		irq = 0;
283 		break;
284 	default:
285 		WPRINTF("unsupported fis type %d\n", ft);
286 		return;
287 	}
288 	memcpy(p->rfis + offset, fis, len);
289 	if (irq) {
290 		p->is |= irq;
291 		ahci_generate_intr(p->pr_sc);
292 	}
293 }
294 
295 static void
296 ahci_write_fis_piosetup(struct ahci_port *p)
297 {
298 	uint8_t fis[20];
299 
300 	memset(fis, 0, sizeof(fis));
301 	fis[0] = FIS_TYPE_PIOSETUP;
302 	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
303 }
304 
305 static void
306 ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
307 {
308 	uint8_t fis[8];
309 	uint8_t error;
310 
311 	error = (tfd >> 8) & 0xff;
312 	memset(fis, 0, sizeof(fis));
313 	fis[0] = FIS_TYPE_SETDEVBITS;
314 	fis[1] = (1 << 6);
315 	fis[2] = tfd & 0x77;
316 	fis[3] = error;
317 	if (fis[2] & ATA_S_ERROR) {
318 		p->is |= AHCI_P_IX_TFE;
319 		p->err_cfis[0] = slot;
320 		p->err_cfis[2] = tfd & 0x77;
321 		p->err_cfis[3] = error;
322 		memcpy(&p->err_cfis[4], cfis + 4, 16);
323 	} else {
324 		*(uint32_t *)(fis + 4) = (1 << slot);
325 		p->sact &= ~(1 << slot);
326 	}
327 	p->tfd = tfd;
328 	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
329 }
330 
331 static void
332 ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
333 {
334 	uint8_t fis[20];
335 	uint8_t error;
336 
337 	error = (tfd >> 8) & 0xff;
338 	memset(fis, 0, sizeof(fis));
339 	fis[0] = FIS_TYPE_REGD2H;
340 	fis[1] = (1 << 6);
341 	fis[2] = tfd & 0xff;
342 	fis[3] = error;
343 	fis[4] = cfis[4];
344 	fis[5] = cfis[5];
345 	fis[6] = cfis[6];
346 	fis[7] = cfis[7];
347 	fis[8] = cfis[8];
348 	fis[9] = cfis[9];
349 	fis[10] = cfis[10];
350 	fis[11] = cfis[11];
351 	fis[12] = cfis[12];
352 	fis[13] = cfis[13];
353 	if (fis[2] & ATA_S_ERROR) {
354 		p->is |= AHCI_P_IX_TFE;
355 		p->err_cfis[0] = 0x80;
356 		p->err_cfis[2] = tfd & 0xff;
357 		p->err_cfis[3] = error;
358 		memcpy(&p->err_cfis[4], cfis + 4, 16);
359 	} else
360 		p->ci &= ~(1 << slot);
361 	p->tfd = tfd;
362 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
363 }
364 
365 static void
366 ahci_write_reset_fis_d2h(struct ahci_port *p)
367 {
368 	uint8_t fis[20];
369 
370 	memset(fis, 0, sizeof(fis));
371 	fis[0] = FIS_TYPE_REGD2H;
372 	fis[3] = 1;
373 	fis[4] = 1;
374 	if (p->atapi) {
375 		fis[5] = 0x14;
376 		fis[6] = 0xeb;
377 	}
378 	fis[12] = 1;
379 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
380 }
381 
382 static void
383 ahci_check_stopped(struct ahci_port *p)
384 {
385 	/*
386 	 * If we are no longer processing the command list and nothing
387 	 * is in-flight, clear the running bit, the current command
388 	 * slot, the command issue and active bits.
389 	 */
390 	if (!(p->cmd & AHCI_P_CMD_ST)) {
391 		if (p->pending == 0) {
392 			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
393 			p->ci = 0;
394 			p->sact = 0;
395 		}
396 	}
397 }
398 
399 static void
400 ahci_port_stop(struct ahci_port *p)
401 {
402 	struct ahci_ioreq *aior;
403 	uint8_t *cfis;
404 	int slot;
405 	int ncq;
406 	int error;
407 
408 	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
409 
410 	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
411 		/*
412 		 * Try to cancel the outstanding blockif request.
413 		 */
414 		error = blockif_cancel(p->bctx, &aior->io_req);
415 		if (error != 0)
416 			continue;
417 
418 		slot = aior->slot;
419 		cfis = aior->cfis;
420 		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
421 		    cfis[2] == ATA_READ_FPDMA_QUEUED)
422 			ncq = 1;
423 
424 		if (ncq)
425 			p->sact &= ~(1 << slot);
426 		else
427 			p->ci &= ~(1 << slot);
428 
429 		/*
430 		 * This command is now done.
431 		 */
432 		p->pending &= ~(1 << slot);
433 
434 		/*
435 		 * Delete the blockif request from the busy list
436 		 */
437 		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
438 
439 		/*
440 		 * Move the blockif request back to the free list
441 		 */
442 		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
443 	}
444 
445 	ahci_check_stopped(p);
446 }
447 
448 static void
449 ahci_port_reset(struct ahci_port *pr)
450 {
451 	pr->serr = 0;
452 	pr->sact = 0;
453 	pr->xfermode = ATA_UDMA6;
454 	pr->mult_sectors = 128;
455 
456 	if (!pr->bctx) {
457 		pr->ssts = ATA_SS_DET_NO_DEVICE;
458 		pr->sig = 0xFFFFFFFF;
459 		pr->tfd = 0x7F;
460 		return;
461 	}
462 	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
463 	if (pr->sctl & ATA_SC_SPD_MASK)
464 		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
465 	else
466 		pr->ssts |= ATA_SS_SPD_GEN3;
467 	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
468 	if (!pr->atapi) {
469 		pr->sig = PxSIG_ATA;
470 		pr->tfd |= ATA_S_READY;
471 	} else
472 		pr->sig = PxSIG_ATAPI;
473 	ahci_write_reset_fis_d2h(pr);
474 }
475 
476 static void
477 ahci_reset(struct pci_ahci_softc *sc)
478 {
479 	int i;
480 
481 	sc->ghc = AHCI_GHC_AE;
482 	sc->is = 0;
483 
484 	if (sc->lintr) {
485 		pci_lintr_deassert(sc->asc_pi);
486 		sc->lintr = 0;
487 	}
488 
489 	for (i = 0; i < sc->ports; i++) {
490 		sc->port[i].ie = 0;
491 		sc->port[i].is = 0;
492 		sc->port[i].sctl = 0;
493 		ahci_port_reset(&sc->port[i]);
494 	}
495 }
496 
497 static void
498 ata_string(uint8_t *dest, const char *src, int len)
499 {
500 	int i;
501 
502 	for (i = 0; i < len; i++) {
503 		if (*src)
504 			dest[i ^ 1] = *src++;
505 		else
506 			dest[i ^ 1] = ' ';
507 	}
508 }
509 
510 static void
511 atapi_string(uint8_t *dest, const char *src, int len)
512 {
513 	int i;
514 
515 	for (i = 0; i < len; i++) {
516 		if (*src)
517 			dest[i] = *src++;
518 		else
519 			dest[i] = ' ';
520 	}
521 }
522 
523 static void
524 ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done,
525     int seek)
526 {
527 	struct ahci_ioreq *aior;
528 	struct blockif_req *breq;
529 	struct pci_ahci_softc *sc;
530 	struct ahci_prdt_entry *prdt;
531 	struct ahci_cmd_hdr *hdr;
532 	uint64_t lba;
533 	uint32_t len;
534 	int i, err, iovcnt, ncq, readop;
535 
536 	sc = p->pr_sc;
537 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
538 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
539 	ncq = 0;
540 	readop = 1;
541 
542 	prdt += seek;
543 	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
544 	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
545 	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
546 	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
547 		readop = 0;
548 
549 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
550 	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
551 		lba = ((uint64_t)cfis[10] << 40) |
552 			((uint64_t)cfis[9] << 32) |
553 			((uint64_t)cfis[8] << 24) |
554 			((uint64_t)cfis[6] << 16) |
555 			((uint64_t)cfis[5] << 8) |
556 			cfis[4];
557 		len = cfis[11] << 8 | cfis[3];
558 		if (!len)
559 			len = 65536;
560 		ncq = 1;
561 	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
562 	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
563 	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
564 		lba = ((uint64_t)cfis[10] << 40) |
565 			((uint64_t)cfis[9] << 32) |
566 			((uint64_t)cfis[8] << 24) |
567 			((uint64_t)cfis[6] << 16) |
568 			((uint64_t)cfis[5] << 8) |
569 			cfis[4];
570 		len = cfis[13] << 8 | cfis[12];
571 		if (!len)
572 			len = 65536;
573 	} else {
574 		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
575 			(cfis[5] << 8) | cfis[4];
576 		len = cfis[12];
577 		if (!len)
578 			len = 256;
579 	}
580 	lba *= blockif_sectsz(p->bctx);
581 	len *= blockif_sectsz(p->bctx);
582 
583 	/*
584 	 * Pull request off free list
585 	 */
586 	aior = STAILQ_FIRST(&p->iofhd);
587 	assert(aior != NULL);
588 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
589 	aior->cfis = cfis;
590 	aior->slot = slot;
591 	aior->len = len;
592 	aior->done = done;
593 	breq = &aior->io_req;
594 	breq->br_offset = lba + done;
595 	iovcnt = hdr->prdtl - seek;
596 	if (iovcnt > BLOCKIF_IOV_MAX) {
597 		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
598 		iovcnt = BLOCKIF_IOV_MAX;
599 	} else
600 		aior->prdtl = 0;
601 	breq->br_iovcnt = iovcnt;
602 
603 	/*
604 	 * Mark this command in-flight.
605 	 */
606 	p->pending |= 1 << slot;
607 
608 	/*
609 	 * Stuff request onto busy list
610 	 */
611 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
612 
613 	/*
614 	 * Build up the iovec based on the prdt
615 	 */
616 	for (i = 0; i < iovcnt; i++) {
617 		uint32_t dbcsz;
618 
619 		dbcsz = (prdt->dbc & DBCMASK) + 1;
620 		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
621 		    prdt->dba, dbcsz);
622 		breq->br_iov[i].iov_len = dbcsz;
623 		aior->done += dbcsz;
624 		prdt++;
625 	}
626 	if (readop)
627 		err = blockif_read(p->bctx, breq);
628 	else
629 		err = blockif_write(p->bctx, breq);
630 	assert(err == 0);
631 
632 	if (ncq)
633 		p->ci &= ~(1 << slot);
634 }
635 
636 static void
637 ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
638 {
639 	struct ahci_ioreq *aior;
640 	struct blockif_req *breq;
641 	int err;
642 
643 	/*
644 	 * Pull request off free list
645 	 */
646 	aior = STAILQ_FIRST(&p->iofhd);
647 	assert(aior != NULL);
648 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
649 	aior->cfis = cfis;
650 	aior->slot = slot;
651 	aior->len = 0;
652 	aior->done = 0;
653 	aior->prdtl = 0;
654 	breq = &aior->io_req;
655 
656 	/*
657 	 * Mark this command in-flight.
658 	 */
659 	p->pending |= 1 << slot;
660 
661 	/*
662 	 * Stuff request onto busy list
663 	 */
664 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
665 
666 	err = blockif_flush(p->bctx, breq);
667 	assert(err == 0);
668 }
669 
670 static inline void
671 read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
672 		void *buf, int size)
673 {
674 	struct ahci_cmd_hdr *hdr;
675 	struct ahci_prdt_entry *prdt;
676 	void *to;
677 	int i, len;
678 
679 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
680 	len = size;
681 	to = buf;
682 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
683 	for (i = 0; i < hdr->prdtl && len; i++) {
684 		uint8_t *ptr;
685 		uint32_t dbcsz;
686 		int sublen;
687 
688 		dbcsz = (prdt->dbc & DBCMASK) + 1;
689 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
690 		sublen = len < dbcsz ? len : dbcsz;
691 		memcpy(to, ptr, sublen);
692 		len -= sublen;
693 		to += sublen;
694 		prdt++;
695 	}
696 }
697 
698 static void
699 ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
700 {
701 	struct ahci_ioreq *aior;
702 	struct blockif_req *breq;
703 	uint8_t *entry;
704 	uint64_t elba;
705 	uint32_t len, elen;
706 	int err;
707 	uint8_t buf[512];
708 
709 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
710 		len = (uint16_t)cfis[13] << 8 | cfis[12];
711 		len *= 512;
712 	} else { /* ATA_SEND_FPDMA_QUEUED */
713 		len = (uint16_t)cfis[11] << 8 | cfis[3];
714 		len *= 512;
715 	}
716 	read_prdt(p, slot, cfis, buf, sizeof(buf));
717 
718 next:
719 	entry = &buf[done];
720 	elba = ((uint64_t)entry[5] << 40) |
721 		((uint64_t)entry[4] << 32) |
722 		((uint64_t)entry[3] << 24) |
723 		((uint64_t)entry[2] << 16) |
724 		((uint64_t)entry[1] << 8) |
725 		entry[0];
726 	elen = (uint16_t)entry[7] << 8 | entry[6];
727 	done += 8;
728 	if (elen == 0) {
729 		if (done >= len) {
730 			ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
731 			p->pending &= ~(1 << slot);
732 			ahci_check_stopped(p);
733 			return;
734 		}
735 		goto next;
736 	}
737 
738 	/*
739 	 * Pull request off free list
740 	 */
741 	aior = STAILQ_FIRST(&p->iofhd);
742 	assert(aior != NULL);
743 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
744 	aior->cfis = cfis;
745 	aior->slot = slot;
746 	aior->len = len;
747 	aior->done = done;
748 	aior->prdtl = 0;
749 
750 	breq = &aior->io_req;
751 	breq->br_offset = elba * blockif_sectsz(p->bctx);
752 	breq->br_iovcnt = 1;
753 	breq->br_iov[0].iov_len = elen * blockif_sectsz(p->bctx);
754 
755 	/*
756 	 * Mark this command in-flight.
757 	 */
758 	p->pending |= 1 << slot;
759 
760 	/*
761 	 * Stuff request onto busy list
762 	 */
763 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
764 
765 	err = blockif_delete(p->bctx, breq);
766 	assert(err == 0);
767 }
768 
769 static inline void
770 write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
771 		void *buf, int size)
772 {
773 	struct ahci_cmd_hdr *hdr;
774 	struct ahci_prdt_entry *prdt;
775 	void *from;
776 	int i, len;
777 
778 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
779 	len = size;
780 	from = buf;
781 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
782 	for (i = 0; i < hdr->prdtl && len; i++) {
783 		uint8_t *ptr;
784 		uint32_t dbcsz;
785 		int sublen;
786 
787 		dbcsz = (prdt->dbc & DBCMASK) + 1;
788 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
789 		sublen = len < dbcsz ? len : dbcsz;
790 		memcpy(ptr, from, sublen);
791 		len -= sublen;
792 		from += sublen;
793 		prdt++;
794 	}
795 	hdr->prdbc = size - len;
796 }
797 
798 static void
799 ahci_checksum(uint8_t *buf, int size)
800 {
801 	int i;
802 	uint8_t sum = 0;
803 
804 	for (i = 0; i < size - 1; i++)
805 		sum += buf[i];
806 	buf[size - 1] = 0x100 - sum;
807 }
808 
809 static void
810 ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
811 {
812 	struct ahci_cmd_hdr *hdr;
813 	uint8_t buf[512];
814 
815 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
816 	if (p->atapi || hdr->prdtl == 0 || cfis[4] != 0x10 ||
817 	    cfis[5] != 0 || cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
818 		ahci_write_fis_d2h(p, slot, cfis,
819 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
820 		return;
821 	}
822 
823 	memset(buf, 0, sizeof(buf));
824 	memcpy(buf, p->err_cfis, sizeof(p->err_cfis));
825 	ahci_checksum(buf, sizeof(buf));
826 
827 	if (cfis[2] == ATA_READ_LOG_EXT)
828 		ahci_write_fis_piosetup(p);
829 	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
830 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
831 }
832 
833 static void
834 handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
835 {
836 	struct ahci_cmd_hdr *hdr;
837 
838 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
839 	if (p->atapi || hdr->prdtl == 0) {
840 		ahci_write_fis_d2h(p, slot, cfis,
841 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
842 	} else {
843 		uint16_t buf[256];
844 		uint64_t sectors;
845 		int sectsz, psectsz, psectoff, candelete, ro;
846 		uint16_t cyl;
847 		uint8_t sech, heads;
848 
849 		ro = blockif_is_ro(p->bctx);
850 		candelete = blockif_candelete(p->bctx);
851 		sectsz = blockif_sectsz(p->bctx);
852 		sectors = blockif_size(p->bctx) / sectsz;
853 		blockif_chs(p->bctx, &cyl, &heads, &sech);
854 		blockif_psectsz(p->bctx, &psectsz, &psectoff);
855 		memset(buf, 0, sizeof(buf));
856 		buf[0] = 0x0040;
857 		buf[1] = cyl;
858 		buf[3] = heads;
859 		buf[6] = sech;
860 		ata_string((uint8_t *)(buf+10), p->ident, 20);
861 		ata_string((uint8_t *)(buf+23), "001", 8);
862 		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
863 		buf[47] = (0x8000 | 128);
864 		buf[48] = 0x1;
865 		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
866 		buf[50] = (1 << 14);
867 		buf[53] = (1 << 1 | 1 << 2);
868 		if (p->mult_sectors)
869 			buf[59] = (0x100 | p->mult_sectors);
870 		if (sectors <= 0x0fffffff) {
871 			buf[60] = sectors;
872 			buf[61] = (sectors >> 16);
873 		} else {
874 			buf[60] = 0xffff;
875 			buf[61] = 0x0fff;
876 		}
877 		buf[63] = 0x7;
878 		if (p->xfermode & ATA_WDMA0)
879 			buf[63] |= (1 << ((p->xfermode & 7) + 8));
880 		buf[64] = 0x3;
881 		buf[65] = 120;
882 		buf[66] = 120;
883 		buf[67] = 120;
884 		buf[68] = 120;
885 		buf[69] = 0;
886 		buf[75] = 31;
887 		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
888 			   ATA_SUPPORT_NCQ);
889 		buf[77] = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
890 			   (p->ssts & ATA_SS_SPD_MASK) >> 3);
891 		buf[80] = 0x3f0;
892 		buf[81] = 0x28;
893 		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
894 			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
895 		buf[83] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
896 			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
897 		buf[84] = (1 << 14);
898 		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
899 			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
900 		buf[86] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
901 			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
902 		buf[87] = (1 << 14);
903 		buf[88] = 0x7f;
904 		if (p->xfermode & ATA_UDMA0)
905 			buf[88] |= (1 << ((p->xfermode & 7) + 8));
906 		buf[93] = (1 | 1 <<14);
907 		buf[100] = sectors;
908 		buf[101] = (sectors >> 16);
909 		buf[102] = (sectors >> 32);
910 		buf[103] = (sectors >> 48);
911 		if (candelete && !ro) {
912 			buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
913 			buf[105] = 1;
914 			buf[169] = ATA_SUPPORT_DSM_TRIM;
915 		}
916 		buf[106] = 0x4000;
917 		buf[209] = 0x4000;
918 		if (psectsz > sectsz) {
919 			buf[106] |= 0x2000;
920 			buf[106] |= ffsl(psectsz / sectsz) - 1;
921 			buf[209] |= (psectoff / sectsz);
922 		}
923 		if (sectsz > 512) {
924 			buf[106] |= 0x1000;
925 			buf[117] = sectsz / 2;
926 			buf[118] = ((sectsz / 2) >> 16);
927 		}
928 		buf[119] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
929 		buf[120] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
930 		buf[222] = 0x1020;
931 		buf[255] = 0x00a5;
932 		ahci_checksum((uint8_t *)buf, sizeof(buf));
933 		ahci_write_fis_piosetup(p);
934 		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
935 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
936 	}
937 }
938 
939 static void
940 handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
941 {
942 	if (!p->atapi) {
943 		ahci_write_fis_d2h(p, slot, cfis,
944 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
945 	} else {
946 		uint16_t buf[256];
947 
948 		memset(buf, 0, sizeof(buf));
949 		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
950 		ata_string((uint8_t *)(buf+10), p->ident, 20);
951 		ata_string((uint8_t *)(buf+23), "001", 8);
952 		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
953 		buf[49] = (1 << 9 | 1 << 8);
954 		buf[50] = (1 << 14 | 1);
955 		buf[53] = (1 << 2 | 1 << 1);
956 		buf[62] = 0x3f;
957 		buf[63] = 7;
958 		if (p->xfermode & ATA_WDMA0)
959 			buf[63] |= (1 << ((p->xfermode & 7) + 8));
960 		buf[64] = 3;
961 		buf[65] = 120;
962 		buf[66] = 120;
963 		buf[67] = 120;
964 		buf[68] = 120;
965 		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
966 		buf[77] = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
967 		buf[78] = (1 << 5);
968 		buf[80] = 0x3f0;
969 		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
970 			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
971 		buf[83] = (1 << 14);
972 		buf[84] = (1 << 14);
973 		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
974 			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
975 		buf[87] = (1 << 14);
976 		buf[88] = 0x7f;
977 		if (p->xfermode & ATA_UDMA0)
978 			buf[88] |= (1 << ((p->xfermode & 7) + 8));
979 		buf[222] = 0x1020;
980 		buf[255] = 0x00a5;
981 		ahci_checksum((uint8_t *)buf, sizeof(buf));
982 		ahci_write_fis_piosetup(p);
983 		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
984 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
985 	}
986 }
987 
988 static void
989 atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
990 {
991 	uint8_t buf[36];
992 	uint8_t *acmd;
993 	int len;
994 	uint32_t tfd;
995 
996 	acmd = cfis + 0x40;
997 
998 	if (acmd[1] & 1) {		/* VPD */
999 		if (acmd[2] == 0) {	/* Supported VPD pages */
1000 			buf[0] = 0x05;
1001 			buf[1] = 0;
1002 			buf[2] = 0;
1003 			buf[3] = 1;
1004 			buf[4] = 0;
1005 			len = 4 + buf[3];
1006 		} else {
1007 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1008 			p->asc = 0x24;
1009 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1010 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1011 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1012 			return;
1013 		}
1014 	} else {
1015 		buf[0] = 0x05;
1016 		buf[1] = 0x80;
1017 		buf[2] = 0x00;
1018 		buf[3] = 0x21;
1019 		buf[4] = 31;
1020 		buf[5] = 0;
1021 		buf[6] = 0;
1022 		buf[7] = 0;
1023 		atapi_string(buf + 8, "BHYVE", 8);
1024 		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1025 		atapi_string(buf + 32, "001", 4);
1026 		len = sizeof(buf);
1027 	}
1028 
1029 	if (len > acmd[4])
1030 		len = acmd[4];
1031 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1032 	write_prdt(p, slot, cfis, buf, len);
1033 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1034 }
1035 
1036 static void
1037 atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1038 {
1039 	uint8_t buf[8];
1040 	uint64_t sectors;
1041 
1042 	sectors = blockif_size(p->bctx) / 2048;
1043 	be32enc(buf, sectors - 1);
1044 	be32enc(buf + 4, 2048);
1045 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1046 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1047 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1048 }
1049 
1050 static void
1051 atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1052 {
1053 	uint8_t *acmd;
1054 	uint8_t format;
1055 	int len;
1056 
1057 	acmd = cfis + 0x40;
1058 
1059 	len = be16dec(acmd + 7);
1060 	format = acmd[9] >> 6;
1061 	switch (format) {
1062 	case 0:
1063 	{
1064 		int msf, size;
1065 		uint64_t sectors;
1066 		uint8_t start_track, buf[20], *bp;
1067 
1068 		msf = (acmd[1] >> 1) & 1;
1069 		start_track = acmd[6];
1070 		if (start_track > 1 && start_track != 0xaa) {
1071 			uint32_t tfd;
1072 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1073 			p->asc = 0x24;
1074 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1075 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1076 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1077 			return;
1078 		}
1079 		bp = buf + 2;
1080 		*bp++ = 1;
1081 		*bp++ = 1;
1082 		if (start_track <= 1) {
1083 			*bp++ = 0;
1084 			*bp++ = 0x14;
1085 			*bp++ = 1;
1086 			*bp++ = 0;
1087 			if (msf) {
1088 				*bp++ = 0;
1089 				lba_to_msf(bp, 0);
1090 				bp += 3;
1091 			} else {
1092 				*bp++ = 0;
1093 				*bp++ = 0;
1094 				*bp++ = 0;
1095 				*bp++ = 0;
1096 			}
1097 		}
1098 		*bp++ = 0;
1099 		*bp++ = 0x14;
1100 		*bp++ = 0xaa;
1101 		*bp++ = 0;
1102 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1103 		sectors >>= 2;
1104 		if (msf) {
1105 			*bp++ = 0;
1106 			lba_to_msf(bp, sectors);
1107 			bp += 3;
1108 		} else {
1109 			be32enc(bp, sectors);
1110 			bp += 4;
1111 		}
1112 		size = bp - buf;
1113 		be16enc(buf, size - 2);
1114 		if (len > size)
1115 			len = size;
1116 		write_prdt(p, slot, cfis, buf, len);
1117 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1118 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1119 		break;
1120 	}
1121 	case 1:
1122 	{
1123 		uint8_t buf[12];
1124 
1125 		memset(buf, 0, sizeof(buf));
1126 		buf[1] = 0xa;
1127 		buf[2] = 0x1;
1128 		buf[3] = 0x1;
1129 		if (len > sizeof(buf))
1130 			len = sizeof(buf);
1131 		write_prdt(p, slot, cfis, buf, len);
1132 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1133 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1134 		break;
1135 	}
1136 	case 2:
1137 	{
1138 		int msf, size;
1139 		uint64_t sectors;
1140 		uint8_t start_track, *bp, buf[50];
1141 
1142 		msf = (acmd[1] >> 1) & 1;
1143 		start_track = acmd[6];
1144 		bp = buf + 2;
1145 		*bp++ = 1;
1146 		*bp++ = 1;
1147 
1148 		*bp++ = 1;
1149 		*bp++ = 0x14;
1150 		*bp++ = 0;
1151 		*bp++ = 0xa0;
1152 		*bp++ = 0;
1153 		*bp++ = 0;
1154 		*bp++ = 0;
1155 		*bp++ = 0;
1156 		*bp++ = 1;
1157 		*bp++ = 0;
1158 		*bp++ = 0;
1159 
1160 		*bp++ = 1;
1161 		*bp++ = 0x14;
1162 		*bp++ = 0;
1163 		*bp++ = 0xa1;
1164 		*bp++ = 0;
1165 		*bp++ = 0;
1166 		*bp++ = 0;
1167 		*bp++ = 0;
1168 		*bp++ = 1;
1169 		*bp++ = 0;
1170 		*bp++ = 0;
1171 
1172 		*bp++ = 1;
1173 		*bp++ = 0x14;
1174 		*bp++ = 0;
1175 		*bp++ = 0xa2;
1176 		*bp++ = 0;
1177 		*bp++ = 0;
1178 		*bp++ = 0;
1179 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1180 		sectors >>= 2;
1181 		if (msf) {
1182 			*bp++ = 0;
1183 			lba_to_msf(bp, sectors);
1184 			bp += 3;
1185 		} else {
1186 			be32enc(bp, sectors);
1187 			bp += 4;
1188 		}
1189 
1190 		*bp++ = 1;
1191 		*bp++ = 0x14;
1192 		*bp++ = 0;
1193 		*bp++ = 1;
1194 		*bp++ = 0;
1195 		*bp++ = 0;
1196 		*bp++ = 0;
1197 		if (msf) {
1198 			*bp++ = 0;
1199 			lba_to_msf(bp, 0);
1200 			bp += 3;
1201 		} else {
1202 			*bp++ = 0;
1203 			*bp++ = 0;
1204 			*bp++ = 0;
1205 			*bp++ = 0;
1206 		}
1207 
1208 		size = bp - buf;
1209 		be16enc(buf, size - 2);
1210 		if (len > size)
1211 			len = size;
1212 		write_prdt(p, slot, cfis, buf, len);
1213 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1214 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1215 		break;
1216 	}
1217 	default:
1218 	{
1219 		uint32_t tfd;
1220 
1221 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1222 		p->asc = 0x24;
1223 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1224 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1225 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1226 		break;
1227 	}
1228 	}
1229 }
1230 
1231 static void
1232 atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1233 {
1234 	uint8_t buf[16];
1235 
1236 	memset(buf, 0, sizeof(buf));
1237 	buf[3] = 8;
1238 
1239 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1240 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1241 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1242 }
1243 
1244 static void
1245 atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
1246 		uint32_t done, int seek)
1247 {
1248 	struct ahci_ioreq *aior;
1249 	struct ahci_cmd_hdr *hdr;
1250 	struct ahci_prdt_entry *prdt;
1251 	struct blockif_req *breq;
1252 	struct pci_ahci_softc *sc;
1253 	uint8_t *acmd;
1254 	uint64_t lba;
1255 	uint32_t len;
1256 	int i, err, iovcnt;
1257 
1258 	sc = p->pr_sc;
1259 	acmd = cfis + 0x40;
1260 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1261 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1262 
1263 	prdt += seek;
1264 	lba = be32dec(acmd + 2);
1265 	if (acmd[0] == READ_10)
1266 		len = be16dec(acmd + 7);
1267 	else
1268 		len = be32dec(acmd + 6);
1269 	if (len == 0) {
1270 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1271 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1272 	}
1273 	lba *= 2048;
1274 	len *= 2048;
1275 
1276 	/*
1277 	 * Pull request off free list
1278 	 */
1279 	aior = STAILQ_FIRST(&p->iofhd);
1280 	assert(aior != NULL);
1281 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1282 	aior->cfis = cfis;
1283 	aior->slot = slot;
1284 	aior->len = len;
1285 	aior->done = done;
1286 	breq = &aior->io_req;
1287 	breq->br_offset = lba + done;
1288 	iovcnt = hdr->prdtl - seek;
1289 	if (iovcnt > BLOCKIF_IOV_MAX) {
1290 		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
1291 		iovcnt = BLOCKIF_IOV_MAX;
1292 	} else
1293 		aior->prdtl = 0;
1294 	breq->br_iovcnt = iovcnt;
1295 
1296 	/*
1297 	 * Mark this command in-flight.
1298 	 */
1299 	p->pending |= 1 << slot;
1300 
1301 	/*
1302 	 * Stuff request onto busy list
1303 	 */
1304 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1305 
1306 	/*
1307 	 * Build up the iovec based on the prdt
1308 	 */
1309 	for (i = 0; i < iovcnt; i++) {
1310 		uint32_t dbcsz;
1311 
1312 		dbcsz = (prdt->dbc & DBCMASK) + 1;
1313 		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
1314 		    prdt->dba, dbcsz);
1315 		breq->br_iov[i].iov_len = dbcsz;
1316 		aior->done += dbcsz;
1317 		prdt++;
1318 	}
1319 	err = blockif_read(p->bctx, breq);
1320 	assert(err == 0);
1321 }
1322 
1323 static void
1324 atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1325 {
1326 	uint8_t buf[64];
1327 	uint8_t *acmd;
1328 	int len;
1329 
1330 	acmd = cfis + 0x40;
1331 	len = acmd[4];
1332 	if (len > sizeof(buf))
1333 		len = sizeof(buf);
1334 	memset(buf, 0, len);
1335 	buf[0] = 0x70 | (1 << 7);
1336 	buf[2] = p->sense_key;
1337 	buf[7] = 10;
1338 	buf[12] = p->asc;
1339 	write_prdt(p, slot, cfis, buf, len);
1340 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1341 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1342 }
1343 
1344 static void
1345 atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1346 {
1347 	uint8_t *acmd = cfis + 0x40;
1348 	uint32_t tfd;
1349 
1350 	switch (acmd[4] & 3) {
1351 	case 0:
1352 	case 1:
1353 	case 3:
1354 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1355 		tfd = ATA_S_READY | ATA_S_DSC;
1356 		break;
1357 	case 2:
1358 		/* TODO eject media */
1359 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1360 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1361 		p->asc = 0x53;
1362 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1363 		break;
1364 	}
1365 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1366 }
1367 
1368 static void
1369 atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1370 {
1371 	uint8_t *acmd;
1372 	uint32_t tfd;
1373 	uint8_t pc, code;
1374 	int len;
1375 
1376 	acmd = cfis + 0x40;
1377 	len = be16dec(acmd + 7);
1378 	pc = acmd[2] >> 6;
1379 	code = acmd[2] & 0x3f;
1380 
1381 	switch (pc) {
1382 	case 0:
1383 		switch (code) {
1384 		case MODEPAGE_RW_ERROR_RECOVERY:
1385 		{
1386 			uint8_t buf[16];
1387 
1388 			if (len > sizeof(buf))
1389 				len = sizeof(buf);
1390 
1391 			memset(buf, 0, sizeof(buf));
1392 			be16enc(buf, 16 - 2);
1393 			buf[2] = 0x70;
1394 			buf[8] = 0x01;
1395 			buf[9] = 16 - 10;
1396 			buf[11] = 0x05;
1397 			write_prdt(p, slot, cfis, buf, len);
1398 			tfd = ATA_S_READY | ATA_S_DSC;
1399 			break;
1400 		}
1401 		case MODEPAGE_CD_CAPABILITIES:
1402 		{
1403 			uint8_t buf[30];
1404 
1405 			if (len > sizeof(buf))
1406 				len = sizeof(buf);
1407 
1408 			memset(buf, 0, sizeof(buf));
1409 			be16enc(buf, 30 - 2);
1410 			buf[2] = 0x70;
1411 			buf[8] = 0x2A;
1412 			buf[9] = 30 - 10;
1413 			buf[10] = 0x08;
1414 			buf[12] = 0x71;
1415 			be16enc(&buf[18], 2);
1416 			be16enc(&buf[20], 512);
1417 			write_prdt(p, slot, cfis, buf, len);
1418 			tfd = ATA_S_READY | ATA_S_DSC;
1419 			break;
1420 		}
1421 		default:
1422 			goto error;
1423 			break;
1424 		}
1425 		break;
1426 	case 3:
1427 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1428 		p->asc = 0x39;
1429 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1430 		break;
1431 error:
1432 	case 1:
1433 	case 2:
1434 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1435 		p->asc = 0x24;
1436 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1437 		break;
1438 	}
1439 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1440 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1441 }
1442 
1443 static void
1444 atapi_get_event_status_notification(struct ahci_port *p, int slot,
1445     uint8_t *cfis)
1446 {
1447 	uint8_t *acmd;
1448 	uint32_t tfd;
1449 
1450 	acmd = cfis + 0x40;
1451 
1452 	/* we don't support asynchronous operation */
1453 	if (!(acmd[1] & 1)) {
1454 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1455 		p->asc = 0x24;
1456 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1457 	} else {
1458 		uint8_t buf[8];
1459 		int len;
1460 
1461 		len = be16dec(acmd + 7);
1462 		if (len > sizeof(buf))
1463 			len = sizeof(buf);
1464 
1465 		memset(buf, 0, sizeof(buf));
1466 		be16enc(buf, 8 - 2);
1467 		buf[2] = 0x04;
1468 		buf[3] = 0x10;
1469 		buf[5] = 0x02;
1470 		write_prdt(p, slot, cfis, buf, len);
1471 		tfd = ATA_S_READY | ATA_S_DSC;
1472 	}
1473 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1474 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1475 }
1476 
1477 static void
1478 handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1479 {
1480 	uint8_t *acmd;
1481 
1482 	acmd = cfis + 0x40;
1483 
1484 #ifdef AHCI_DEBUG
1485 	{
1486 		int i;
1487 		DPRINTF("ACMD:");
1488 		for (i = 0; i < 16; i++)
1489 			DPRINTF("%02x ", acmd[i]);
1490 		DPRINTF("\n");
1491 	}
1492 #endif
1493 
1494 	switch (acmd[0]) {
1495 	case TEST_UNIT_READY:
1496 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1497 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1498 		break;
1499 	case INQUIRY:
1500 		atapi_inquiry(p, slot, cfis);
1501 		break;
1502 	case READ_CAPACITY:
1503 		atapi_read_capacity(p, slot, cfis);
1504 		break;
1505 	case PREVENT_ALLOW:
1506 		/* TODO */
1507 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1508 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1509 		break;
1510 	case READ_TOC:
1511 		atapi_read_toc(p, slot, cfis);
1512 		break;
1513 	case REPORT_LUNS:
1514 		atapi_report_luns(p, slot, cfis);
1515 		break;
1516 	case READ_10:
1517 	case READ_12:
1518 		atapi_read(p, slot, cfis, 0, 0);
1519 		break;
1520 	case REQUEST_SENSE:
1521 		atapi_request_sense(p, slot, cfis);
1522 		break;
1523 	case START_STOP_UNIT:
1524 		atapi_start_stop_unit(p, slot, cfis);
1525 		break;
1526 	case MODE_SENSE_10:
1527 		atapi_mode_sense(p, slot, cfis);
1528 		break;
1529 	case GET_EVENT_STATUS_NOTIFICATION:
1530 		atapi_get_event_status_notification(p, slot, cfis);
1531 		break;
1532 	default:
1533 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1534 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1535 		p->asc = 0x20;
1536 		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1537 				ATA_S_READY | ATA_S_ERROR);
1538 		break;
1539 	}
1540 }
1541 
1542 static void
1543 ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1544 {
1545 
1546 	switch (cfis[2]) {
1547 	case ATA_ATA_IDENTIFY:
1548 		handle_identify(p, slot, cfis);
1549 		break;
1550 	case ATA_SETFEATURES:
1551 	{
1552 		switch (cfis[3]) {
1553 		case ATA_SF_ENAB_SATA_SF:
1554 			switch (cfis[12]) {
1555 			case ATA_SATA_SF_AN:
1556 				p->tfd = ATA_S_DSC | ATA_S_READY;
1557 				break;
1558 			default:
1559 				p->tfd = ATA_S_ERROR | ATA_S_READY;
1560 				p->tfd |= (ATA_ERROR_ABORT << 8);
1561 				break;
1562 			}
1563 			break;
1564 		case ATA_SF_ENAB_WCACHE:
1565 		case ATA_SF_DIS_WCACHE:
1566 		case ATA_SF_ENAB_RCACHE:
1567 		case ATA_SF_DIS_RCACHE:
1568 			p->tfd = ATA_S_DSC | ATA_S_READY;
1569 			break;
1570 		case ATA_SF_SETXFER:
1571 		{
1572 			switch (cfis[12] & 0xf8) {
1573 			case ATA_PIO:
1574 			case ATA_PIO0:
1575 				break;
1576 			case ATA_WDMA0:
1577 			case ATA_UDMA0:
1578 				p->xfermode = (cfis[12] & 0x7);
1579 				break;
1580 			}
1581 			p->tfd = ATA_S_DSC | ATA_S_READY;
1582 			break;
1583 		}
1584 		default:
1585 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1586 			p->tfd |= (ATA_ERROR_ABORT << 8);
1587 			break;
1588 		}
1589 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1590 		break;
1591 	}
1592 	case ATA_SET_MULTI:
1593 		if (cfis[12] != 0 &&
1594 			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1595 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1596 			p->tfd |= (ATA_ERROR_ABORT << 8);
1597 		} else {
1598 			p->mult_sectors = cfis[12];
1599 			p->tfd = ATA_S_DSC | ATA_S_READY;
1600 		}
1601 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1602 		break;
1603 	case ATA_READ:
1604 	case ATA_WRITE:
1605 	case ATA_READ48:
1606 	case ATA_WRITE48:
1607 	case ATA_READ_MUL:
1608 	case ATA_WRITE_MUL:
1609 	case ATA_READ_MUL48:
1610 	case ATA_WRITE_MUL48:
1611 	case ATA_READ_DMA:
1612 	case ATA_WRITE_DMA:
1613 	case ATA_READ_DMA48:
1614 	case ATA_WRITE_DMA48:
1615 	case ATA_READ_FPDMA_QUEUED:
1616 	case ATA_WRITE_FPDMA_QUEUED:
1617 		ahci_handle_dma(p, slot, cfis, 0, 0);
1618 		break;
1619 	case ATA_FLUSHCACHE:
1620 	case ATA_FLUSHCACHE48:
1621 		ahci_handle_flush(p, slot, cfis);
1622 		break;
1623 	case ATA_DATA_SET_MANAGEMENT:
1624 		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1625 		    cfis[13] == 0 && cfis[12] == 1) {
1626 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1627 			break;
1628 		}
1629 		ahci_write_fis_d2h(p, slot, cfis,
1630 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1631 		break;
1632 	case ATA_SEND_FPDMA_QUEUED:
1633 		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1634 		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1635 		    cfis[11] == 0 && cfis[13] == 1) {
1636 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1637 			break;
1638 		}
1639 		ahci_write_fis_d2h(p, slot, cfis,
1640 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1641 		break;
1642 	case ATA_READ_LOG_EXT:
1643 	case ATA_READ_LOG_DMA_EXT:
1644 		ahci_handle_read_log(p, slot, cfis);
1645 		break;
1646 	case ATA_NOP:
1647 		ahci_write_fis_d2h(p, slot, cfis,
1648 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1649 		break;
1650 	case ATA_STANDBY_CMD:
1651 	case ATA_STANDBY_IMMEDIATE:
1652 	case ATA_IDLE_CMD:
1653 	case ATA_IDLE_IMMEDIATE:
1654 	case ATA_SLEEP:
1655 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1656 		break;
1657 	case ATA_ATAPI_IDENTIFY:
1658 		handle_atapi_identify(p, slot, cfis);
1659 		break;
1660 	case ATA_PACKET_CMD:
1661 		if (!p->atapi) {
1662 			ahci_write_fis_d2h(p, slot, cfis,
1663 			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1664 		} else
1665 			handle_packet_cmd(p, slot, cfis);
1666 		break;
1667 	default:
1668 		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1669 		ahci_write_fis_d2h(p, slot, cfis,
1670 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1671 		break;
1672 	}
1673 }
1674 
1675 static void
1676 ahci_handle_slot(struct ahci_port *p, int slot)
1677 {
1678 	struct ahci_cmd_hdr *hdr;
1679 	struct ahci_prdt_entry *prdt;
1680 	struct pci_ahci_softc *sc;
1681 	uint8_t *cfis;
1682 	int cfl;
1683 
1684 	sc = p->pr_sc;
1685 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1686 	cfl = (hdr->flags & 0x1f) * 4;
1687 	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1688 			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1689 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1690 
1691 #ifdef AHCI_DEBUG
1692 	DPRINTF("\ncfis:");
1693 	for (i = 0; i < cfl; i++) {
1694 		if (i % 10 == 0)
1695 			DPRINTF("\n");
1696 		DPRINTF("%02x ", cfis[i]);
1697 	}
1698 	DPRINTF("\n");
1699 
1700 	for (i = 0; i < hdr->prdtl; i++) {
1701 		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1702 		prdt++;
1703 	}
1704 #endif
1705 
1706 	if (cfis[0] != FIS_TYPE_REGH2D) {
1707 		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1708 		return;
1709 	}
1710 
1711 	if (cfis[1] & 0x80) {
1712 		ahci_handle_cmd(p, slot, cfis);
1713 	} else {
1714 		if (cfis[15] & (1 << 2))
1715 			p->reset = 1;
1716 		else if (p->reset) {
1717 			p->reset = 0;
1718 			ahci_port_reset(p);
1719 		}
1720 		p->ci &= ~(1 << slot);
1721 	}
1722 }
1723 
1724 static void
1725 ahci_handle_port(struct ahci_port *p)
1726 {
1727 	int i;
1728 
1729 	if (!(p->cmd & AHCI_P_CMD_ST))
1730 		return;
1731 
1732 	/*
1733 	 * Search for any new commands to issue ignoring those that
1734 	 * are already in-flight.
1735 	 */
1736 	for (i = 0; (i < 32) && p->ci; i++) {
1737 		if ((p->ci & (1 << i)) && !(p->pending & (1 << i))) {
1738 			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1739 			p->cmd |= i << AHCI_P_CMD_CCS_SHIFT;
1740 			ahci_handle_slot(p, i);
1741 		}
1742 	}
1743 }
1744 
1745 /*
1746  * blockif callback routine - this runs in the context of the blockif
1747  * i/o thread, so the mutex needs to be acquired.
1748  */
1749 static void
1750 ata_ioreq_cb(struct blockif_req *br, int err)
1751 {
1752 	struct ahci_cmd_hdr *hdr;
1753 	struct ahci_ioreq *aior;
1754 	struct ahci_port *p;
1755 	struct pci_ahci_softc *sc;
1756 	uint32_t tfd;
1757 	uint8_t *cfis;
1758 	int pending, slot, ncq, dsm;
1759 
1760 	DPRINTF("%s %d\n", __func__, err);
1761 
1762 	ncq = 0;
1763 	aior = br->br_param;
1764 	p = aior->io_pr;
1765 	cfis = aior->cfis;
1766 	slot = aior->slot;
1767 	pending = aior->prdtl;
1768 	sc = p->pr_sc;
1769 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1770 
1771 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1772 	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1773 	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1774 		ncq = 1;
1775 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1776 	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1777 	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1778 		dsm = 1;
1779 
1780 	pthread_mutex_lock(&sc->mtx);
1781 
1782 	/*
1783 	 * Delete the blockif request from the busy list
1784 	 */
1785 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1786 
1787 	/*
1788 	 * Move the blockif request back to the free list
1789 	 */
1790 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1791 
1792 	if (!err)
1793 		hdr->prdbc = aior->done;
1794 
1795 	if (dsm) {
1796 		if (aior->done != aior->len && !err) {
1797 			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1798 			goto out;
1799 		}
1800 	} else {
1801 		if (pending && !err) {
1802 			ahci_handle_dma(p, slot, cfis, aior->done,
1803 			    hdr->prdtl - pending);
1804 			goto out;
1805 		}
1806 	}
1807 
1808 	if (!err && aior->done == aior->len) {
1809 		tfd = ATA_S_READY | ATA_S_DSC;
1810 	} else {
1811 		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1812 	}
1813 
1814 	if (ncq)
1815 		ahci_write_fis_sdb(p, slot, cfis, tfd);
1816 	else
1817 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1818 
1819 	/*
1820 	 * This command is now complete.
1821 	 */
1822 	p->pending &= ~(1 << slot);
1823 
1824 	ahci_check_stopped(p);
1825 out:
1826 	pthread_mutex_unlock(&sc->mtx);
1827 	DPRINTF("%s exit\n", __func__);
1828 }
1829 
1830 static void
1831 atapi_ioreq_cb(struct blockif_req *br, int err)
1832 {
1833 	struct ahci_cmd_hdr *hdr;
1834 	struct ahci_ioreq *aior;
1835 	struct ahci_port *p;
1836 	struct pci_ahci_softc *sc;
1837 	uint8_t *cfis;
1838 	uint32_t tfd;
1839 	int pending, slot;
1840 
1841 	DPRINTF("%s %d\n", __func__, err);
1842 
1843 	aior = br->br_param;
1844 	p = aior->io_pr;
1845 	cfis = aior->cfis;
1846 	slot = aior->slot;
1847 	pending = aior->prdtl;
1848 	sc = p->pr_sc;
1849 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1850 
1851 	pthread_mutex_lock(&sc->mtx);
1852 
1853 	/*
1854 	 * Delete the blockif request from the busy list
1855 	 */
1856 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1857 
1858 	/*
1859 	 * Move the blockif request back to the free list
1860 	 */
1861 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1862 
1863 	if (!err)
1864 		hdr->prdbc = aior->done;
1865 
1866 	if (pending && !err) {
1867 		atapi_read(p, slot, cfis, aior->done, hdr->prdtl - pending);
1868 		goto out;
1869 	}
1870 
1871 	if (!err && aior->done == aior->len) {
1872 		tfd = ATA_S_READY | ATA_S_DSC;
1873 	} else {
1874 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1875 		p->asc = 0x21;
1876 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1877 	}
1878 
1879 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1880 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1881 
1882 	/*
1883 	 * This command is now complete.
1884 	 */
1885 	p->pending &= ~(1 << slot);
1886 
1887 	ahci_check_stopped(p);
1888 out:
1889 	pthread_mutex_unlock(&sc->mtx);
1890 	DPRINTF("%s exit\n", __func__);
1891 }
1892 
1893 static void
1894 pci_ahci_ioreq_init(struct ahci_port *pr)
1895 {
1896 	struct ahci_ioreq *vr;
1897 	int i;
1898 
1899 	pr->ioqsz = blockif_queuesz(pr->bctx);
1900 	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1901 	STAILQ_INIT(&pr->iofhd);
1902 
1903 	/*
1904 	 * Add all i/o request entries to the free queue
1905 	 */
1906 	for (i = 0; i < pr->ioqsz; i++) {
1907 		vr = &pr->ioreq[i];
1908 		vr->io_pr = pr;
1909 		if (!pr->atapi)
1910 			vr->io_req.br_callback = ata_ioreq_cb;
1911 		else
1912 			vr->io_req.br_callback = atapi_ioreq_cb;
1913 		vr->io_req.br_param = vr;
1914 		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
1915 	}
1916 
1917 	TAILQ_INIT(&pr->iobhd);
1918 }
1919 
1920 static void
1921 pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1922 {
1923 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1924 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1925 	struct ahci_port *p = &sc->port[port];
1926 
1927 	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1928 		port, offset, value);
1929 
1930 	switch (offset) {
1931 	case AHCI_P_CLB:
1932 		p->clb = value;
1933 		break;
1934 	case AHCI_P_CLBU:
1935 		p->clbu = value;
1936 		break;
1937 	case AHCI_P_FB:
1938 		p->fb = value;
1939 		break;
1940 	case AHCI_P_FBU:
1941 		p->fbu = value;
1942 		break;
1943 	case AHCI_P_IS:
1944 		p->is &= ~value;
1945 		break;
1946 	case AHCI_P_IE:
1947 		p->ie = value & 0xFDC000FF;
1948 		ahci_generate_intr(sc);
1949 		break;
1950 	case AHCI_P_CMD:
1951 	{
1952 		p->cmd = value;
1953 
1954 		if (!(value & AHCI_P_CMD_ST)) {
1955 			ahci_port_stop(p);
1956 		} else {
1957 			uint64_t clb;
1958 
1959 			p->cmd |= AHCI_P_CMD_CR;
1960 			clb = (uint64_t)p->clbu << 32 | p->clb;
1961 			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
1962 					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
1963 		}
1964 
1965 		if (value & AHCI_P_CMD_FRE) {
1966 			uint64_t fb;
1967 
1968 			p->cmd |= AHCI_P_CMD_FR;
1969 			fb = (uint64_t)p->fbu << 32 | p->fb;
1970 			/* we don't support FBSCP, so rfis size is 256Bytes */
1971 			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
1972 		} else {
1973 			p->cmd &= ~AHCI_P_CMD_FR;
1974 		}
1975 
1976 		if (value & AHCI_P_CMD_CLO) {
1977 			p->tfd = 0;
1978 			p->cmd &= ~AHCI_P_CMD_CLO;
1979 		}
1980 
1981 		ahci_handle_port(p);
1982 		break;
1983 	}
1984 	case AHCI_P_TFD:
1985 	case AHCI_P_SIG:
1986 	case AHCI_P_SSTS:
1987 		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
1988 		break;
1989 	case AHCI_P_SCTL:
1990 		p->sctl = value;
1991 		if (!(p->cmd & AHCI_P_CMD_ST)) {
1992 			if (value & ATA_SC_DET_RESET)
1993 				ahci_port_reset(p);
1994 		}
1995 		break;
1996 	case AHCI_P_SERR:
1997 		p->serr &= ~value;
1998 		break;
1999 	case AHCI_P_SACT:
2000 		p->sact |= value;
2001 		break;
2002 	case AHCI_P_CI:
2003 		p->ci |= value;
2004 		ahci_handle_port(p);
2005 		break;
2006 	case AHCI_P_SNTF:
2007 	case AHCI_P_FBS:
2008 	default:
2009 		break;
2010 	}
2011 }
2012 
2013 static void
2014 pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2015 {
2016 	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
2017 		offset, value);
2018 
2019 	switch (offset) {
2020 	case AHCI_CAP:
2021 	case AHCI_PI:
2022 	case AHCI_VS:
2023 	case AHCI_CAP2:
2024 		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
2025 		break;
2026 	case AHCI_GHC:
2027 		if (value & AHCI_GHC_HR)
2028 			ahci_reset(sc);
2029 		else if (value & AHCI_GHC_IE) {
2030 			sc->ghc |= AHCI_GHC_IE;
2031 			ahci_generate_intr(sc);
2032 		}
2033 		break;
2034 	case AHCI_IS:
2035 		sc->is &= ~value;
2036 		ahci_generate_intr(sc);
2037 		break;
2038 	default:
2039 		break;
2040 	}
2041 }
2042 
2043 static void
2044 pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
2045 		int baridx, uint64_t offset, int size, uint64_t value)
2046 {
2047 	struct pci_ahci_softc *sc = pi->pi_arg;
2048 
2049 	assert(baridx == 5);
2050 	assert(size == 4);
2051 
2052 	pthread_mutex_lock(&sc->mtx);
2053 
2054 	if (offset < AHCI_OFFSET)
2055 		pci_ahci_host_write(sc, offset, value);
2056 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2057 		pci_ahci_port_write(sc, offset, value);
2058 	else
2059 		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
2060 
2061 	pthread_mutex_unlock(&sc->mtx);
2062 }
2063 
2064 static uint64_t
2065 pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2066 {
2067 	uint32_t value;
2068 
2069 	switch (offset) {
2070 	case AHCI_CAP:
2071 	case AHCI_GHC:
2072 	case AHCI_IS:
2073 	case AHCI_PI:
2074 	case AHCI_VS:
2075 	case AHCI_CCCC:
2076 	case AHCI_CCCP:
2077 	case AHCI_EM_LOC:
2078 	case AHCI_EM_CTL:
2079 	case AHCI_CAP2:
2080 	{
2081 		uint32_t *p = &sc->cap;
2082 		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2083 		value = *p;
2084 		break;
2085 	}
2086 	default:
2087 		value = 0;
2088 		break;
2089 	}
2090 	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
2091 		offset, value);
2092 
2093 	return (value);
2094 }
2095 
2096 static uint64_t
2097 pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2098 {
2099 	uint32_t value;
2100 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2101 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2102 
2103 	switch (offset) {
2104 	case AHCI_P_CLB:
2105 	case AHCI_P_CLBU:
2106 	case AHCI_P_FB:
2107 	case AHCI_P_FBU:
2108 	case AHCI_P_IS:
2109 	case AHCI_P_IE:
2110 	case AHCI_P_CMD:
2111 	case AHCI_P_TFD:
2112 	case AHCI_P_SIG:
2113 	case AHCI_P_SSTS:
2114 	case AHCI_P_SCTL:
2115 	case AHCI_P_SERR:
2116 	case AHCI_P_SACT:
2117 	case AHCI_P_CI:
2118 	case AHCI_P_SNTF:
2119 	case AHCI_P_FBS:
2120 	{
2121 		uint32_t *p= &sc->port[port].clb;
2122 		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2123 		value = *p;
2124 		break;
2125 	}
2126 	default:
2127 		value = 0;
2128 		break;
2129 	}
2130 
2131 	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
2132 		port, offset, value);
2133 
2134 	return value;
2135 }
2136 
2137 static uint64_t
2138 pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
2139     uint64_t offset, int size)
2140 {
2141 	struct pci_ahci_softc *sc = pi->pi_arg;
2142 	uint32_t value;
2143 
2144 	assert(baridx == 5);
2145 	assert(size == 4);
2146 
2147 	pthread_mutex_lock(&sc->mtx);
2148 
2149 	if (offset < AHCI_OFFSET)
2150 		value = pci_ahci_host_read(sc, offset);
2151 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2152 		value = pci_ahci_port_read(sc, offset);
2153 	else {
2154 		value = 0;
2155 		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
2156 	}
2157 
2158 	pthread_mutex_unlock(&sc->mtx);
2159 
2160 	return (value);
2161 }
2162 
2163 static int
2164 pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
2165 {
2166 	char bident[sizeof("XX:X:X")];
2167 	struct blockif_ctxt *bctxt;
2168 	struct pci_ahci_softc *sc;
2169 	int ret, slots;
2170 	MD5_CTX mdctx;
2171 	u_char digest[16];
2172 
2173 	ret = 0;
2174 
2175 	if (opts == NULL) {
2176 		fprintf(stderr, "pci_ahci: backing device required\n");
2177 		return (1);
2178 	}
2179 
2180 #ifdef AHCI_DEBUG
2181 	dbg = fopen("/tmp/log", "w+");
2182 #endif
2183 
2184 	sc = calloc(1, sizeof(struct pci_ahci_softc));
2185 	pi->pi_arg = sc;
2186 	sc->asc_pi = pi;
2187 	sc->ports = MAX_PORTS;
2188 
2189 	/*
2190 	 * Only use port 0 for a backing device. All other ports will be
2191 	 * marked as unused
2192 	 */
2193 	sc->port[0].atapi = atapi;
2194 
2195 	/*
2196 	 * Attempt to open the backing image. Use the PCI
2197 	 * slot/func for the identifier string.
2198 	 */
2199 	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
2200 	bctxt = blockif_open(opts, bident);
2201 	if (bctxt == NULL) {
2202 		ret = 1;
2203 		goto open_fail;
2204 	}
2205 	sc->port[0].bctx = bctxt;
2206 	sc->port[0].pr_sc = sc;
2207 
2208 	/*
2209 	 * Create an identifier for the backing file. Use parts of the
2210 	 * md5 sum of the filename
2211 	 */
2212 	MD5Init(&mdctx);
2213 	MD5Update(&mdctx, opts, strlen(opts));
2214 	MD5Final(digest, &mdctx);
2215 	sprintf(sc->port[0].ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X",
2216 	    digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]);
2217 
2218 	/*
2219 	 * Allocate blockif request structures and add them
2220 	 * to the free list
2221 	 */
2222 	pci_ahci_ioreq_init(&sc->port[0]);
2223 
2224 	pthread_mutex_init(&sc->mtx, NULL);
2225 
2226 	/* Intel ICH8 AHCI */
2227 	slots = sc->port[0].ioqsz;
2228 	if (slots > 32)
2229 		slots = 32;
2230 	--slots;
2231 	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2232 	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2233 	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2234 	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2235 	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2236 
2237 	/* Only port 0 implemented */
2238 	sc->pi = 1;
2239 	sc->vs = 0x10300;
2240 	sc->cap2 = AHCI_CAP2_APST;
2241 	ahci_reset(sc);
2242 
2243 	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2244 	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2245 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2246 	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2247 	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2248 	pci_emul_add_msicap(pi, 1);
2249 	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2250 	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2251 
2252 	pci_lintr_request(pi);
2253 
2254 open_fail:
2255 	if (ret) {
2256 		if (sc->port[0].bctx != NULL)
2257 			blockif_close(sc->port[0].bctx);
2258 		free(sc);
2259 	}
2260 
2261 	return (ret);
2262 }
2263 
2264 static int
2265 pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2266 {
2267 
2268 	return (pci_ahci_init(ctx, pi, opts, 0));
2269 }
2270 
2271 static int
2272 pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2273 {
2274 
2275 	return (pci_ahci_init(ctx, pi, opts, 1));
2276 }
2277 
2278 /*
2279  * Use separate emulation names to distinguish drive and atapi devices
2280  */
2281 struct pci_devemu pci_de_ahci_hd = {
2282 	.pe_emu =	"ahci-hd",
2283 	.pe_init =	pci_ahci_hd_init,
2284 	.pe_barwrite =	pci_ahci_write,
2285 	.pe_barread =	pci_ahci_read
2286 };
2287 PCI_EMUL_SET(pci_de_ahci_hd);
2288 
2289 struct pci_devemu pci_de_ahci_cd = {
2290 	.pe_emu =	"ahci-cd",
2291 	.pe_init =	pci_ahci_atapi_init,
2292 	.pe_barwrite =	pci_ahci_write,
2293 	.pe_barread =	pci_ahci_read
2294 };
2295 PCI_EMUL_SET(pci_de_ahci_cd);
2296