xref: /freebsd/usr.sbin/bhyve/pci_ahci.c (revision fba3cde907930eed2adb8a320524bc250338c729)
1 /*-
2  * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/linker_set.h>
34 #include <sys/stat.h>
35 #include <sys/uio.h>
36 #include <sys/ioctl.h>
37 #include <sys/disk.h>
38 #include <sys/ata.h>
39 #include <sys/endian.h>
40 
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <stdint.h>
46 #include <string.h>
47 #include <strings.h>
48 #include <unistd.h>
49 #include <assert.h>
50 #include <pthread.h>
51 #include <inttypes.h>
52 
53 #include "bhyverun.h"
54 #include "pci_emul.h"
55 #include "ahci.h"
56 #include "block_if.h"
57 
58 #define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
59 
60 #define	PxSIG_ATA	0x00000101 /* ATA drive */
61 #define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
62 
63 enum sata_fis_type {
64 	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
65 	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
66 	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
67 	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
68 	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
69 	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
70 	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
71 	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
72 };
73 
74 /*
75  * SCSI opcodes
76  */
77 #define	TEST_UNIT_READY		0x00
78 #define	REQUEST_SENSE		0x03
79 #define	INQUIRY			0x12
80 #define	START_STOP_UNIT		0x1B
81 #define	PREVENT_ALLOW		0x1E
82 #define	READ_CAPACITY		0x25
83 #define	READ_10			0x28
84 #define	POSITION_TO_ELEMENT	0x2B
85 #define	READ_TOC		0x43
86 #define	GET_EVENT_STATUS_NOTIFICATION 0x4A
87 #define	MODE_SENSE_10		0x5A
88 #define	READ_12			0xA8
89 #define	READ_CD			0xBE
90 
91 /*
92  * SCSI mode page codes
93  */
94 #define	MODEPAGE_RW_ERROR_RECOVERY	0x01
95 #define	MODEPAGE_CD_CAPABILITIES	0x2A
96 
97 /*
98  * Debug printf
99  */
100 #ifdef AHCI_DEBUG
101 static FILE *dbg;
102 #define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
103 #else
104 #define DPRINTF(format, arg...)
105 #endif
106 #define WPRINTF(format, arg...) printf(format, ##arg)
107 
108 struct ahci_ioreq {
109 	struct blockif_req io_req;
110 	struct ahci_port *io_pr;
111 	STAILQ_ENTRY(ahci_ioreq) io_list;
112 	uint8_t *cfis;
113 	uint32_t len;
114 	uint32_t done;
115 	int slot;
116 	int prdtl;
117 };
118 
119 struct ahci_port {
120 	struct blockif_ctxt *bctx;
121 	struct pci_ahci_softc *pr_sc;
122 	uint8_t *cmd_lst;
123 	uint8_t *rfis;
124 	int atapi;
125 	int reset;
126 	int mult_sectors;
127 	uint8_t xfermode;
128 	uint8_t sense_key;
129 	uint8_t asc;
130 
131 	uint32_t clb;
132 	uint32_t clbu;
133 	uint32_t fb;
134 	uint32_t fbu;
135 	uint32_t is;
136 	uint32_t ie;
137 	uint32_t cmd;
138 	uint32_t unused0;
139 	uint32_t tfd;
140 	uint32_t sig;
141 	uint32_t ssts;
142 	uint32_t sctl;
143 	uint32_t serr;
144 	uint32_t sact;
145 	uint32_t ci;
146 	uint32_t sntf;
147 	uint32_t fbs;
148 
149 	/*
150 	 * i/o request info
151 	 */
152 	struct ahci_ioreq *ioreq;
153 	int ioqsz;
154 	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
155 };
156 
157 struct ahci_cmd_hdr {
158 	uint16_t flags;
159 	uint16_t prdtl;
160 	uint32_t prdbc;
161 	uint64_t ctba;
162 	uint32_t reserved[4];
163 };
164 
165 struct ahci_prdt_entry {
166 	uint64_t dba;
167 	uint32_t reserved;
168 	uint32_t dbc;
169 };
170 
171 struct pci_ahci_softc {
172 	struct pci_devinst *asc_pi;
173 	pthread_mutex_t	mtx;
174 	int ports;
175 	uint32_t cap;
176 	uint32_t ghc;
177 	uint32_t is;
178 	uint32_t pi;
179 	uint32_t vs;
180 	uint32_t ccc_ctl;
181 	uint32_t ccc_pts;
182 	uint32_t em_loc;
183 	uint32_t em_ctl;
184 	uint32_t cap2;
185 	uint32_t bohc;
186 	struct ahci_port port[MAX_PORTS];
187 };
188 #define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
189 
190 static inline void lba_to_msf(uint8_t *buf, int lba)
191 {
192 	lba += 150;
193 	buf[0] = (lba / 75) / 60;
194 	buf[1] = (lba / 75) % 60;
195 	buf[2] = lba % 75;
196 }
197 
198 /*
199  * generate HBA intr depending on whether or not ports within
200  * the controller have an interrupt pending.
201  */
202 static void
203 ahci_generate_intr(struct pci_ahci_softc *sc)
204 {
205 	int i;
206 
207 	for (i = 0; i < sc->ports; i++) {
208 		struct ahci_port *pr;
209 		pr = &sc->port[i];
210 		if (pr->is & pr->ie)
211 			sc->is |= (1 << i);
212 	}
213 
214 	DPRINTF("%s %x\n", __func__, sc->is);
215 
216 	if (sc->is && (sc->ghc & AHCI_GHC_IE))
217 		pci_generate_msi(sc->asc_pi, 0);
218 }
219 
220 static void
221 ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
222 {
223 	int offset, len, irq;
224 
225 	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
226 		return;
227 
228 	switch (ft) {
229 	case FIS_TYPE_REGD2H:
230 		offset = 0x40;
231 		len = 20;
232 		irq = AHCI_P_IX_DHR;
233 		break;
234 	case FIS_TYPE_SETDEVBITS:
235 		offset = 0x58;
236 		len = 8;
237 		irq = AHCI_P_IX_SDB;
238 		break;
239 	case FIS_TYPE_PIOSETUP:
240 		offset = 0x20;
241 		len = 20;
242 		irq = 0;
243 		break;
244 	default:
245 		WPRINTF("unsupported fis type %d\n", ft);
246 		return;
247 	}
248 	memcpy(p->rfis + offset, fis, len);
249 	if (irq) {
250 		p->is |= irq;
251 		ahci_generate_intr(p->pr_sc);
252 	}
253 }
254 
255 static void
256 ahci_write_fis_sdb(struct ahci_port *p, int slot, uint32_t tfd)
257 {
258 	uint8_t fis[8];
259 	uint8_t error;
260 
261 	error = (tfd >> 8) & 0xff;
262 	memset(fis, 0, sizeof(fis));
263 	fis[0] = error;
264 	fis[2] = tfd & 0x77;
265 	*(uint32_t *)(fis + 4) = (1 << slot);
266 	if (fis[2] & ATA_S_ERROR)
267 		p->is |= AHCI_P_IX_TFE;
268 	p->tfd = tfd;
269 	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
270 }
271 
272 static void
273 ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
274 {
275 	uint8_t fis[20];
276 	uint8_t error;
277 
278 	error = (tfd >> 8) & 0xff;
279 	memset(fis, 0, sizeof(fis));
280 	fis[0] = FIS_TYPE_REGD2H;
281 	fis[1] = (1 << 6);
282 	fis[2] = tfd & 0xff;
283 	fis[3] = error;
284 	fis[4] = cfis[4];
285 	fis[5] = cfis[5];
286 	fis[6] = cfis[6];
287 	fis[7] = cfis[7];
288 	fis[8] = cfis[8];
289 	fis[9] = cfis[9];
290 	fis[10] = cfis[10];
291 	fis[11] = cfis[11];
292 	fis[12] = cfis[12];
293 	fis[13] = cfis[13];
294 	if (fis[2] & ATA_S_ERROR)
295 		p->is |= AHCI_P_IX_TFE;
296 	p->tfd = tfd;
297 	p->ci &= ~(1 << slot);
298 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
299 }
300 
301 static void
302 ahci_write_reset_fis_d2h(struct ahci_port *p)
303 {
304 	uint8_t fis[20];
305 
306 	memset(fis, 0, sizeof(fis));
307 	fis[0] = FIS_TYPE_REGD2H;
308 	fis[3] = 1;
309 	fis[4] = 1;
310 	if (p->atapi) {
311 		fis[5] = 0x14;
312 		fis[6] = 0xeb;
313 	}
314 	fis[12] = 1;
315 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
316 }
317 
318 static void
319 ahci_port_reset(struct ahci_port *pr)
320 {
321 	pr->sctl = 0;
322 	pr->serr = 0;
323 	pr->sact = 0;
324 	pr->xfermode = ATA_UDMA6;
325 	pr->mult_sectors = 128;
326 
327 	if (!pr->bctx) {
328 		pr->ssts = ATA_SS_DET_NO_DEVICE;
329 		pr->sig = 0xFFFFFFFF;
330 		pr->tfd = 0x7F;
331 		return;
332 	}
333 	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_SPD_GEN2 |
334 		ATA_SS_IPM_ACTIVE;
335 	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
336 	if (!pr->atapi) {
337 		pr->sig = PxSIG_ATA;
338 		pr->tfd |= ATA_S_READY;
339 	} else
340 		pr->sig = PxSIG_ATAPI;
341 	ahci_write_reset_fis_d2h(pr);
342 }
343 
344 static void
345 ahci_reset(struct pci_ahci_softc *sc)
346 {
347 	int i;
348 
349 	sc->ghc = AHCI_GHC_AE;
350 	sc->is = 0;
351 	for (i = 0; i < sc->ports; i++) {
352 		sc->port[i].ie = 0;
353 		sc->port[i].is = 0;
354 		ahci_port_reset(&sc->port[i]);
355 	}
356 }
357 
358 static void
359 ata_string(uint8_t *dest, const char *src, int len)
360 {
361 	int i;
362 
363 	for (i = 0; i < len; i++) {
364 		if (*src)
365 			dest[i ^ 1] = *src++;
366 		else
367 			dest[i ^ 1] = ' ';
368 	}
369 }
370 
371 static void
372 atapi_string(uint8_t *dest, const char *src, int len)
373 {
374 	int i;
375 
376 	for (i = 0; i < len; i++) {
377 		if (*src)
378 			dest[i] = *src++;
379 		else
380 			dest[i] = ' ';
381 	}
382 }
383 
384 static void
385 ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done,
386     int seek)
387 {
388 	struct ahci_ioreq *aior;
389 	struct blockif_req *breq;
390 	struct pci_ahci_softc *sc;
391 	struct ahci_prdt_entry *prdt;
392 	struct ahci_cmd_hdr *hdr;
393 	uint64_t lba;
394 	uint32_t len;
395 	int i, err, iovcnt, ncq, readop;
396 
397 	sc = p->pr_sc;
398 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
399 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
400 	ncq = 0;
401 	readop = 1;
402 
403 	prdt += seek;
404 	if (cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
405 			cfis[2] == ATA_WRITE_FPDMA_QUEUED)
406 		readop = 0;
407 
408 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
409 			cfis[2] == ATA_READ_FPDMA_QUEUED) {
410 		lba = ((uint64_t)cfis[10] << 40) |
411 			((uint64_t)cfis[9] << 32) |
412 			((uint64_t)cfis[8] << 24) |
413 			((uint64_t)cfis[6] << 16) |
414 			((uint64_t)cfis[5] << 8) |
415 			cfis[4];
416 		len = cfis[11] << 8 | cfis[3];
417 		if (!len)
418 			len = 65536;
419 		ncq = 1;
420 	} else if (cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
421 		lba = ((uint64_t)cfis[10] << 40) |
422 			((uint64_t)cfis[9] << 32) |
423 			((uint64_t)cfis[8] << 24) |
424 			((uint64_t)cfis[6] << 16) |
425 			((uint64_t)cfis[5] << 8) |
426 			cfis[4];
427 		len = cfis[13] << 8 | cfis[12];
428 		if (!len)
429 			len = 65536;
430 	} else {
431 		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
432 			(cfis[5] << 8) | cfis[4];
433 		len = cfis[12];
434 		if (!len)
435 			len = 256;
436 	}
437 	lba *= blockif_sectsz(p->bctx);
438 	len *= blockif_sectsz(p->bctx);
439 
440 	/*
441 	 * Pull request off free list
442 	 */
443 	aior = STAILQ_FIRST(&p->iofhd);
444 	assert(aior != NULL);
445 	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
446 	aior->cfis = cfis;
447 	aior->slot = slot;
448 	aior->len = len;
449 	aior->done = done;
450 	breq = &aior->io_req;
451 	breq->br_offset = lba + done;
452 	iovcnt = hdr->prdtl - seek;
453 	if (iovcnt > BLOCKIF_IOV_MAX) {
454 		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
455 		iovcnt = BLOCKIF_IOV_MAX;
456 	} else
457 		aior->prdtl = 0;
458 	breq->br_iovcnt = iovcnt;
459 
460 	/*
461 	 * Build up the iovec based on the prdt
462 	 */
463 	for (i = 0; i < iovcnt; i++) {
464 		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
465 				prdt->dba, prdt->dbc + 1);
466 		breq->br_iov[i].iov_len = prdt->dbc + 1;
467 		aior->done += (prdt->dbc + 1);
468 		prdt++;
469 	}
470 	if (readop)
471 		err = blockif_read(p->bctx, breq);
472 	else
473 		err = blockif_write(p->bctx, breq);
474 	assert(err == 0);
475 
476 	if (!aior->prdtl && ncq)
477 		p->ci &= ~(1 << slot);
478 }
479 
480 static void
481 ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
482 {
483 	struct ahci_ioreq *aior;
484 	struct blockif_req *breq;
485 	int err;
486 
487 	/*
488 	 * Pull request off free list
489 	 */
490 	aior = STAILQ_FIRST(&p->iofhd);
491 	assert(aior != NULL);
492 	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
493 	aior->cfis = cfis;
494 	aior->slot = slot;
495 	aior->len = 0;
496 	breq = &aior->io_req;
497 
498 	err = blockif_flush(p->bctx, breq);
499 	assert(err == 0);
500 }
501 
502 static inline void
503 write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
504 		void *buf, int size)
505 {
506 	struct ahci_cmd_hdr *hdr;
507 	struct ahci_prdt_entry *prdt;
508 	void *from;
509 	int i, len;
510 
511 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
512 	len = size;
513 	from = buf;
514 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
515 	for (i = 0; i < hdr->prdtl && len; i++) {
516 		uint8_t *ptr = paddr_guest2host(ahci_ctx(p->pr_sc),
517 				prdt->dba, prdt->dbc + 1);
518 		memcpy(ptr, from, prdt->dbc + 1);
519 		len -= (prdt->dbc + 1);
520 		from += (prdt->dbc + 1);
521 		prdt++;
522 	}
523 	hdr->prdbc = size - len;
524 }
525 
526 static void
527 handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
528 {
529 	struct ahci_cmd_hdr *hdr;
530 
531 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
532 	if (p->atapi || hdr->prdtl == 0) {
533 		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
534 		p->is |= AHCI_P_IX_TFE;
535 	} else {
536 		uint16_t buf[256];
537 		uint64_t sectors;
538 
539 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
540 		memset(buf, 0, sizeof(buf));
541 		buf[0] = 0x0040;
542 		/* TODO emulate different serial? */
543 		ata_string((uint8_t *)(buf+10), "123456", 20);
544 		ata_string((uint8_t *)(buf+23), "001", 8);
545 		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
546 		buf[47] = (0x8000 | 128);
547 		buf[48] = 0x1;
548 		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
549 		buf[50] = (1 << 14);
550 		buf[53] = (1 << 1 | 1 << 2);
551 		if (p->mult_sectors)
552 			buf[59] = (0x100 | p->mult_sectors);
553 		buf[60] = sectors;
554 		buf[61] = (sectors >> 16);
555 		buf[63] = 0x7;
556 		if (p->xfermode & ATA_WDMA0)
557 			buf[63] |= (1 << ((p->xfermode & 7) + 8));
558 		buf[64] = 0x3;
559 		buf[65] = 100;
560 		buf[66] = 100;
561 		buf[67] = 100;
562 		buf[68] = 100;
563 		buf[75] = 31;
564 		buf[76] = (1 << 8 | 1 << 2);
565 		buf[80] = 0x1f0;
566 		buf[81] = 0x28;
567 		buf[82] = (1 << 5 | 1 << 14);
568 		buf[83] = (1 << 10 | 1 << 12 | 1 << 13 | 1 << 14);
569 		buf[84] = (1 << 14);
570 		buf[85] = (1 << 5 | 1 << 14);
571 		buf[86] = (1 << 10 | 1 << 12 | 1 << 13);
572 		buf[87] = (1 << 14);
573 		buf[88] = 0x7f;
574 		if (p->xfermode & ATA_UDMA0)
575 			buf[88] |= (1 << ((p->xfermode & 7) + 8));
576 		buf[93] = (1 | 1 <<14);
577 		buf[100] = sectors;
578 		buf[101] = (sectors >> 16);
579 		buf[102] = (sectors >> 32);
580 		buf[103] = (sectors >> 48);
581 		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
582 		p->tfd = ATA_S_DSC | ATA_S_READY;
583 		p->is |= AHCI_P_IX_DP;
584 	}
585 	p->ci &= ~(1 << slot);
586 	ahci_generate_intr(p->pr_sc);
587 }
588 
589 static void
590 handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
591 {
592 	if (!p->atapi) {
593 		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
594 		p->is |= AHCI_P_IX_TFE;
595 	} else {
596 		uint16_t buf[256];
597 
598 		memset(buf, 0, sizeof(buf));
599 		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
600 		/* TODO emulate different serial? */
601 		ata_string((uint8_t *)(buf+10), "123456", 20);
602 		ata_string((uint8_t *)(buf+23), "001", 8);
603 		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
604 		buf[49] = (1 << 9 | 1 << 8);
605 		buf[50] = (1 << 14 | 1);
606 		buf[53] = (1 << 2 | 1 << 1);
607 		buf[62] = 0x3f;
608 		buf[63] = 7;
609 		buf[64] = 3;
610 		buf[65] = 100;
611 		buf[66] = 100;
612 		buf[67] = 100;
613 		buf[68] = 100;
614 		buf[76] = (1 << 2 | 1 << 1);
615 		buf[78] = (1 << 5);
616 		buf[80] = (0x1f << 4);
617 		buf[82] = (1 << 4);
618 		buf[83] = (1 << 14);
619 		buf[84] = (1 << 14);
620 		buf[85] = (1 << 4);
621 		buf[87] = (1 << 14);
622 		buf[88] = (1 << 14 | 0x7f);
623 		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
624 		p->tfd = ATA_S_DSC | ATA_S_READY;
625 		p->is |= AHCI_P_IX_DHR;
626 	}
627 	p->ci &= ~(1 << slot);
628 	ahci_generate_intr(p->pr_sc);
629 }
630 
631 static void
632 atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
633 {
634 	uint8_t buf[36];
635 	uint8_t *acmd;
636 	int len;
637 
638 	acmd = cfis + 0x40;
639 
640 	buf[0] = 0x05;
641 	buf[1] = 0x80;
642 	buf[2] = 0x00;
643 	buf[3] = 0x21;
644 	buf[4] = 31;
645 	buf[5] = 0;
646 	buf[6] = 0;
647 	buf[7] = 0;
648 	atapi_string(buf + 8, "BHYVE", 8);
649 	atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
650 	atapi_string(buf + 32, "001", 4);
651 
652 	len = sizeof(buf);
653 	if (len > acmd[4])
654 		len = acmd[4];
655 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
656 	write_prdt(p, slot, cfis, buf, len);
657 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
658 }
659 
660 static void
661 atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
662 {
663 	uint8_t buf[8];
664 	uint64_t sectors;
665 
666 	sectors = blockif_size(p->bctx) / 2048;
667 	be32enc(buf, sectors - 1);
668 	be32enc(buf + 4, 2048);
669 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
670 	write_prdt(p, slot, cfis, buf, sizeof(buf));
671 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
672 }
673 
674 static void
675 atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
676 {
677 	uint8_t *acmd;
678 	uint8_t format;
679 	int len;
680 
681 	acmd = cfis + 0x40;
682 
683 	len = be16dec(acmd + 7);
684 	format = acmd[9] >> 6;
685 	switch (format) {
686 	case 0:
687 	{
688 		int msf, size;
689 		uint64_t sectors;
690 		uint8_t start_track, buf[20], *bp;
691 
692 		msf = (acmd[1] >> 1) & 1;
693 		start_track = acmd[6];
694 		if (start_track > 1 && start_track != 0xaa) {
695 			uint32_t tfd;
696 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
697 			p->asc = 0x24;
698 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
699 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
700 			ahci_write_fis_d2h(p, slot, cfis, tfd);
701 			return;
702 		}
703 		bp = buf + 2;
704 		*bp++ = 1;
705 		*bp++ = 1;
706 		if (start_track <= 1) {
707 			*bp++ = 0;
708 			*bp++ = 0x14;
709 			*bp++ = 1;
710 			*bp++ = 0;
711 			if (msf) {
712 				*bp++ = 0;
713 				lba_to_msf(bp, 0);
714 				bp += 3;
715 			} else {
716 				*bp++ = 0;
717 				*bp++ = 0;
718 				*bp++ = 0;
719 				*bp++ = 0;
720 			}
721 		}
722 		*bp++ = 0;
723 		*bp++ = 0x14;
724 		*bp++ = 0xaa;
725 		*bp++ = 0;
726 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
727 		sectors >>= 2;
728 		if (msf) {
729 			*bp++ = 0;
730 			lba_to_msf(bp, sectors);
731 			bp += 3;
732 		} else {
733 			be32enc(bp, sectors);
734 			bp += 4;
735 		}
736 		size = bp - buf;
737 		be16enc(buf, size - 2);
738 		if (len > size)
739 			len = size;
740 		write_prdt(p, slot, cfis, buf, len);
741 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
742 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
743 		break;
744 	}
745 	case 1:
746 	{
747 		uint8_t buf[12];
748 
749 		memset(buf, 0, sizeof(buf));
750 		buf[1] = 0xa;
751 		buf[2] = 0x1;
752 		buf[3] = 0x1;
753 		if (len > sizeof(buf))
754 			len = sizeof(buf);
755 		write_prdt(p, slot, cfis, buf, len);
756 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
757 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
758 		break;
759 	}
760 	case 2:
761 	{
762 		int msf, size;
763 		uint64_t sectors;
764 		uint8_t start_track, *bp, buf[50];
765 
766 		msf = (acmd[1] >> 1) & 1;
767 		start_track = acmd[6];
768 		bp = buf + 2;
769 		*bp++ = 1;
770 		*bp++ = 1;
771 
772 		*bp++ = 1;
773 		*bp++ = 0x14;
774 		*bp++ = 0;
775 		*bp++ = 0xa0;
776 		*bp++ = 0;
777 		*bp++ = 0;
778 		*bp++ = 0;
779 		*bp++ = 0;
780 		*bp++ = 1;
781 		*bp++ = 0;
782 		*bp++ = 0;
783 
784 		*bp++ = 1;
785 		*bp++ = 0x14;
786 		*bp++ = 0;
787 		*bp++ = 0xa1;
788 		*bp++ = 0;
789 		*bp++ = 0;
790 		*bp++ = 0;
791 		*bp++ = 0;
792 		*bp++ = 1;
793 		*bp++ = 0;
794 		*bp++ = 0;
795 
796 		*bp++ = 1;
797 		*bp++ = 0x14;
798 		*bp++ = 0;
799 		*bp++ = 0xa2;
800 		*bp++ = 0;
801 		*bp++ = 0;
802 		*bp++ = 0;
803 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
804 		sectors >>= 2;
805 		if (msf) {
806 			*bp++ = 0;
807 			lba_to_msf(bp, sectors);
808 			bp += 3;
809 		} else {
810 			be32enc(bp, sectors);
811 			bp += 4;
812 		}
813 
814 		*bp++ = 1;
815 		*bp++ = 0x14;
816 		*bp++ = 0;
817 		*bp++ = 1;
818 		*bp++ = 0;
819 		*bp++ = 0;
820 		*bp++ = 0;
821 		if (msf) {
822 			*bp++ = 0;
823 			lba_to_msf(bp, 0);
824 			bp += 3;
825 		} else {
826 			*bp++ = 0;
827 			*bp++ = 0;
828 			*bp++ = 0;
829 			*bp++ = 0;
830 		}
831 
832 		size = bp - buf;
833 		be16enc(buf, size - 2);
834 		if (len > size)
835 			len = size;
836 		write_prdt(p, slot, cfis, buf, len);
837 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
838 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
839 		break;
840 	}
841 	default:
842 	{
843 		uint32_t tfd;
844 
845 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
846 		p->asc = 0x24;
847 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
848 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
849 		ahci_write_fis_d2h(p, slot, cfis, tfd);
850 		break;
851 	}
852 	}
853 }
854 
855 static void
856 atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
857 		uint32_t done, int seek)
858 {
859 	struct ahci_ioreq *aior;
860 	struct ahci_cmd_hdr *hdr;
861 	struct ahci_prdt_entry *prdt;
862 	struct blockif_req *breq;
863 	struct pci_ahci_softc *sc;
864 	uint8_t *acmd;
865 	uint64_t lba;
866 	uint32_t len;
867 	int i, err, iovcnt;
868 
869 	sc = p->pr_sc;
870 	acmd = cfis + 0x40;
871 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
872 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
873 
874 	prdt += seek;
875 	lba = be32dec(acmd + 2);
876 	if (acmd[0] == READ_10)
877 		len = be16dec(acmd + 7);
878 	else
879 		len = be32dec(acmd + 6);
880 	if (len == 0) {
881 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
882 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
883 	}
884 	lba *= 2048;
885 	len *= 2048;
886 
887 	/*
888 	 * Pull request off free list
889 	 */
890 	aior = STAILQ_FIRST(&p->iofhd);
891 	assert(aior != NULL);
892 	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
893 	aior->cfis = cfis;
894 	aior->slot = slot;
895 	aior->len = len;
896 	aior->done = done;
897 	breq = &aior->io_req;
898 	breq->br_offset = lba + done;
899 	iovcnt = hdr->prdtl - seek;
900 	if (iovcnt > BLOCKIF_IOV_MAX) {
901 		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
902 		iovcnt = BLOCKIF_IOV_MAX;
903 	} else
904 		aior->prdtl = 0;
905 	breq->br_iovcnt = iovcnt;
906 
907 	/*
908 	 * Build up the iovec based on the prdt
909 	 */
910 	for (i = 0; i < iovcnt; i++) {
911 		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
912 		    prdt->dba, prdt->dbc + 1);
913 		breq->br_iov[i].iov_len = prdt->dbc + 1;
914 		aior->done += (prdt->dbc + 1);
915 		prdt++;
916 	}
917 	err = blockif_read(p->bctx, breq);
918 	assert(err == 0);
919 }
920 
921 static void
922 atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
923 {
924 	uint8_t buf[64];
925 	uint8_t *acmd;
926 	int len;
927 
928 	acmd = cfis + 0x40;
929 	len = acmd[4];
930 	if (len > sizeof(buf))
931 		len = sizeof(buf);
932 	memset(buf, 0, len);
933 	buf[0] = 0x70 | (1 << 7);
934 	buf[2] = p->sense_key;
935 	buf[7] = 10;
936 	buf[12] = p->asc;
937 	write_prdt(p, slot, cfis, buf, len);
938 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
939 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
940 }
941 
942 static void
943 atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
944 {
945 	uint8_t *acmd = cfis + 0x40;
946 	uint32_t tfd;
947 
948 	switch (acmd[4] & 3) {
949 	case 0:
950 	case 1:
951 	case 3:
952 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
953 		tfd = ATA_S_READY | ATA_S_DSC;
954 		break;
955 	case 2:
956 		/* TODO eject media */
957 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
958 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
959 		p->asc = 0x53;
960 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
961 		break;
962 	}
963 	ahci_write_fis_d2h(p, slot, cfis, tfd);
964 }
965 
966 static void
967 atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
968 {
969 	uint8_t *acmd;
970 	uint32_t tfd;
971 	uint8_t pc, code;
972 	int len;
973 
974 	acmd = cfis + 0x40;
975 	len = be16dec(acmd + 7);
976 	pc = acmd[2] >> 6;
977 	code = acmd[2] & 0x3f;
978 
979 	switch (pc) {
980 	case 0:
981 		switch (code) {
982 		case MODEPAGE_RW_ERROR_RECOVERY:
983 		{
984 			uint8_t buf[16];
985 
986 			if (len > sizeof(buf))
987 				len = sizeof(buf);
988 
989 			memset(buf, 0, sizeof(buf));
990 			be16enc(buf, 16 - 2);
991 			buf[2] = 0x70;
992 			buf[8] = 0x01;
993 			buf[9] = 16 - 10;
994 			buf[11] = 0x05;
995 			write_prdt(p, slot, cfis, buf, len);
996 			tfd = ATA_S_READY | ATA_S_DSC;
997 			break;
998 		}
999 		case MODEPAGE_CD_CAPABILITIES:
1000 		{
1001 			uint8_t buf[30];
1002 
1003 			if (len > sizeof(buf))
1004 				len = sizeof(buf);
1005 
1006 			memset(buf, 0, sizeof(buf));
1007 			be16enc(buf, 30 - 2);
1008 			buf[2] = 0x70;
1009 			buf[8] = 0x2A;
1010 			buf[9] = 30 - 10;
1011 			buf[10] = 0x08;
1012 			buf[12] = 0x71;
1013 			be16enc(&buf[18], 2);
1014 			be16enc(&buf[20], 512);
1015 			write_prdt(p, slot, cfis, buf, len);
1016 			tfd = ATA_S_READY | ATA_S_DSC;
1017 			break;
1018 		}
1019 		default:
1020 			goto error;
1021 			break;
1022 		}
1023 		break;
1024 	case 3:
1025 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1026 		p->asc = 0x39;
1027 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1028 		break;
1029 error:
1030 	case 1:
1031 	case 2:
1032 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1033 		p->asc = 0x24;
1034 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1035 		break;
1036 	}
1037 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1038 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1039 }
1040 
1041 static void
1042 atapi_get_event_status_notification(struct ahci_port *p, int slot,
1043     uint8_t *cfis)
1044 {
1045 	uint8_t *acmd;
1046 	uint32_t tfd;
1047 
1048 	acmd = cfis + 0x40;
1049 
1050 	/* we don't support asynchronous operation */
1051 	if (!(acmd[1] & 1)) {
1052 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1053 		p->asc = 0x24;
1054 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1055 	} else {
1056 		uint8_t buf[8];
1057 		int len;
1058 
1059 		len = be16dec(acmd + 7);
1060 		if (len > sizeof(buf))
1061 			len = sizeof(buf);
1062 
1063 		memset(buf, 0, sizeof(buf));
1064 		be16enc(buf, 8 - 2);
1065 		buf[2] = 0x04;
1066 		buf[3] = 0x10;
1067 		buf[5] = 0x02;
1068 		write_prdt(p, slot, cfis, buf, len);
1069 		tfd = ATA_S_READY | ATA_S_DSC;
1070 	}
1071 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1072 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1073 }
1074 
1075 static void
1076 handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1077 {
1078 	uint8_t *acmd;
1079 
1080 	acmd = cfis + 0x40;
1081 
1082 #ifdef AHCI_DEBUG
1083 	{
1084 		int i;
1085 		DPRINTF("ACMD:");
1086 		for (i = 0; i < 16; i++)
1087 			DPRINTF("%02x ", acmd[i]);
1088 		DPRINTF("\n");
1089 	}
1090 #endif
1091 
1092 	switch (acmd[0]) {
1093 	case TEST_UNIT_READY:
1094 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1095 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1096 		break;
1097 	case INQUIRY:
1098 		atapi_inquiry(p, slot, cfis);
1099 		break;
1100 	case READ_CAPACITY:
1101 		atapi_read_capacity(p, slot, cfis);
1102 		break;
1103 	case PREVENT_ALLOW:
1104 		/* TODO */
1105 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1106 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1107 		break;
1108 	case READ_TOC:
1109 		atapi_read_toc(p, slot, cfis);
1110 		break;
1111 	case READ_10:
1112 	case READ_12:
1113 		atapi_read(p, slot, cfis, 0, 0);
1114 		break;
1115 	case REQUEST_SENSE:
1116 		atapi_request_sense(p, slot, cfis);
1117 		break;
1118 	case START_STOP_UNIT:
1119 		atapi_start_stop_unit(p, slot, cfis);
1120 		break;
1121 	case MODE_SENSE_10:
1122 		atapi_mode_sense(p, slot, cfis);
1123 		break;
1124 	case GET_EVENT_STATUS_NOTIFICATION:
1125 		atapi_get_event_status_notification(p, slot, cfis);
1126 		break;
1127 	default:
1128 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1129 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1130 		p->asc = 0x20;
1131 		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1132 				ATA_S_READY | ATA_S_ERROR);
1133 		break;
1134 	}
1135 }
1136 
1137 static void
1138 ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1139 {
1140 
1141 	switch (cfis[2]) {
1142 	case ATA_ATA_IDENTIFY:
1143 		handle_identify(p, slot, cfis);
1144 		break;
1145 	case ATA_SETFEATURES:
1146 	{
1147 		switch (cfis[3]) {
1148 		case ATA_SF_ENAB_WCACHE:
1149 		case ATA_SF_DIS_WCACHE:
1150 		case ATA_SF_ENAB_RCACHE:
1151 		case ATA_SF_DIS_RCACHE:
1152 			p->tfd = ATA_S_DSC | ATA_S_READY;
1153 			break;
1154 		case ATA_SF_SETXFER:
1155 		{
1156 			switch (cfis[12] & 0xf8) {
1157 			case ATA_PIO:
1158 			case ATA_PIO0:
1159 				break;
1160 			case ATA_WDMA0:
1161 			case ATA_UDMA0:
1162 				p->xfermode = (cfis[12] & 0x7);
1163 				break;
1164 			}
1165 			p->tfd = ATA_S_DSC | ATA_S_READY;
1166 			break;
1167 		}
1168 		default:
1169 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1170 			p->tfd |= (ATA_ERROR_ABORT << 8);
1171 			break;
1172 		}
1173 		p->is |= AHCI_P_IX_DP;
1174 		p->ci &= ~(1 << slot);
1175 		ahci_generate_intr(p->pr_sc);
1176 		break;
1177 	}
1178 	case ATA_SET_MULTI:
1179 		if (cfis[12] != 0 &&
1180 			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1181 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1182 			p->tfd |= (ATA_ERROR_ABORT << 8);
1183 		} else {
1184 			p->mult_sectors = cfis[12];
1185 			p->tfd = ATA_S_DSC | ATA_S_READY;
1186 		}
1187 		p->is |= AHCI_P_IX_DP;
1188 		p->ci &= ~(1 << slot);
1189 		ahci_generate_intr(p->pr_sc);
1190 		break;
1191 	case ATA_READ_DMA:
1192 	case ATA_WRITE_DMA:
1193 	case ATA_READ_DMA48:
1194 	case ATA_WRITE_DMA48:
1195 	case ATA_READ_FPDMA_QUEUED:
1196 	case ATA_WRITE_FPDMA_QUEUED:
1197 		ahci_handle_dma(p, slot, cfis, 0, 0);
1198 		break;
1199 	case ATA_FLUSHCACHE:
1200 	case ATA_FLUSHCACHE48:
1201 		ahci_handle_flush(p, slot, cfis);
1202 		break;
1203 	case ATA_STANDBY_CMD:
1204 		break;
1205 	case ATA_NOP:
1206 	case ATA_STANDBY_IMMEDIATE:
1207 	case ATA_IDLE_IMMEDIATE:
1208 	case ATA_SLEEP:
1209 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1210 		break;
1211 	case ATA_ATAPI_IDENTIFY:
1212 		handle_atapi_identify(p, slot, cfis);
1213 		break;
1214 	case ATA_PACKET_CMD:
1215 		if (!p->atapi) {
1216 			p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1217 			p->is |= AHCI_P_IX_TFE;
1218 			p->ci &= ~(1 << slot);
1219 			ahci_generate_intr(p->pr_sc);
1220 		} else
1221 			handle_packet_cmd(p, slot, cfis);
1222 		break;
1223 	default:
1224 		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1225 		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1226 		p->is |= AHCI_P_IX_TFE;
1227 		p->ci &= ~(1 << slot);
1228 		ahci_generate_intr(p->pr_sc);
1229 		break;
1230 	}
1231 }
1232 
1233 static void
1234 ahci_handle_slot(struct ahci_port *p, int slot)
1235 {
1236 	struct ahci_cmd_hdr *hdr;
1237 	struct ahci_prdt_entry *prdt;
1238 	struct pci_ahci_softc *sc;
1239 	uint8_t *cfis;
1240 	int cfl;
1241 
1242 	sc = p->pr_sc;
1243 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1244 	cfl = (hdr->flags & 0x1f) * 4;
1245 	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1246 			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1247 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1248 
1249 #ifdef AHCI_DEBUG
1250 	DPRINTF("\ncfis:");
1251 	for (i = 0; i < cfl; i++) {
1252 		if (i % 10 == 0)
1253 			DPRINTF("\n");
1254 		DPRINTF("%02x ", cfis[i]);
1255 	}
1256 	DPRINTF("\n");
1257 
1258 	for (i = 0; i < hdr->prdtl; i++) {
1259 		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1260 		prdt++;
1261 	}
1262 #endif
1263 
1264 	if (cfis[0] != FIS_TYPE_REGH2D) {
1265 		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1266 		return;
1267 	}
1268 
1269 	if (cfis[1] & 0x80) {
1270 		ahci_handle_cmd(p, slot, cfis);
1271 	} else {
1272 		if (cfis[15] & (1 << 2))
1273 			p->reset = 1;
1274 		else if (p->reset) {
1275 			p->reset = 0;
1276 			ahci_port_reset(p);
1277 		}
1278 		p->ci &= ~(1 << slot);
1279 	}
1280 }
1281 
1282 static void
1283 ahci_handle_port(struct ahci_port *p)
1284 {
1285 	int i;
1286 
1287 	if (!(p->cmd & AHCI_P_CMD_ST))
1288 		return;
1289 
1290 	for (i = 0; (i < 32) && p->ci; i++) {
1291 		if (p->ci & (1 << i))
1292 			ahci_handle_slot(p, i);
1293 	}
1294 }
1295 
1296 /*
1297  * blockif callback routine - this runs in the context of the blockif
1298  * i/o thread, so the mutex needs to be acquired.
1299  */
1300 static void
1301 ata_ioreq_cb(struct blockif_req *br, int err)
1302 {
1303 	struct ahci_cmd_hdr *hdr;
1304 	struct ahci_ioreq *aior;
1305 	struct ahci_port *p;
1306 	struct pci_ahci_softc *sc;
1307 	uint32_t tfd;
1308 	uint8_t *cfis;
1309 	int pending, slot, ncq;
1310 
1311 	DPRINTF("%s %d\n", __func__, err);
1312 
1313 	ncq = 0;
1314 	aior = br->br_param;
1315 	p = aior->io_pr;
1316 	cfis = aior->cfis;
1317 	slot = aior->slot;
1318 	pending = aior->prdtl;
1319 	sc = p->pr_sc;
1320 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1321 
1322 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1323 			cfis[2] == ATA_READ_FPDMA_QUEUED)
1324 		ncq = 1;
1325 
1326 	pthread_mutex_lock(&sc->mtx);
1327 
1328 	/*
1329 	 * Move the blockif request back to the free list
1330 	 */
1331 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_list);
1332 
1333 	if (pending && !err) {
1334 		ahci_handle_dma(p, slot, cfis, aior->done,
1335 		    hdr->prdtl - pending);
1336 		goto out;
1337 	}
1338 
1339 	if (!err && aior->done == aior->len) {
1340 		tfd = ATA_S_READY | ATA_S_DSC;
1341 		if (ncq)
1342 			hdr->prdbc = 0;
1343 		else
1344 			hdr->prdbc = aior->len;
1345 	} else {
1346 		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1347 		hdr->prdbc = 0;
1348 		if (ncq)
1349 			p->serr |= (1 << slot);
1350 	}
1351 
1352 	if (ncq) {
1353 		p->sact &= ~(1 << slot);
1354 		ahci_write_fis_sdb(p, slot, tfd);
1355 	} else
1356 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1357 
1358 out:
1359 	pthread_mutex_unlock(&sc->mtx);
1360 	DPRINTF("%s exit\n", __func__);
1361 }
1362 
1363 static void
1364 atapi_ioreq_cb(struct blockif_req *br, int err)
1365 {
1366 	struct ahci_cmd_hdr *hdr;
1367 	struct ahci_ioreq *aior;
1368 	struct ahci_port *p;
1369 	struct pci_ahci_softc *sc;
1370 	uint8_t *cfis;
1371 	uint32_t tfd;
1372 	int pending, slot;
1373 
1374 	DPRINTF("%s %d\n", __func__, err);
1375 
1376 	aior = br->br_param;
1377 	p = aior->io_pr;
1378 	cfis = aior->cfis;
1379 	slot = aior->slot;
1380 	pending = aior->prdtl;
1381 	sc = p->pr_sc;
1382 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1383 
1384 	pthread_mutex_lock(&sc->mtx);
1385 
1386 	/*
1387 	 * Move the blockif request back to the free list
1388 	 */
1389 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_list);
1390 
1391 	if (pending && !err) {
1392 		atapi_read(p, slot, cfis, aior->done, hdr->prdtl - pending);
1393 		goto out;
1394 	}
1395 
1396 	if (!err && aior->done == aior->len) {
1397 		tfd = ATA_S_READY | ATA_S_DSC;
1398 		hdr->prdbc = aior->len;
1399 	} else {
1400 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1401 		p->asc = 0x21;
1402 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1403 		hdr->prdbc = 0;
1404 	}
1405 
1406 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1407 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1408 
1409 out:
1410 	pthread_mutex_unlock(&sc->mtx);
1411 	DPRINTF("%s exit\n", __func__);
1412 }
1413 
1414 static void
1415 pci_ahci_ioreq_init(struct ahci_port *pr)
1416 {
1417 	struct ahci_ioreq *vr;
1418 	int i;
1419 
1420 	pr->ioqsz = blockif_queuesz(pr->bctx);
1421 	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1422 	STAILQ_INIT(&pr->iofhd);
1423 
1424 	/*
1425 	 * Add all i/o request entries to the free queue
1426 	 */
1427 	for (i = 0; i < pr->ioqsz; i++) {
1428 		vr = &pr->ioreq[i];
1429 		vr->io_pr = pr;
1430 		if (!pr->atapi)
1431 			vr->io_req.br_callback = ata_ioreq_cb;
1432 		else
1433 			vr->io_req.br_callback = atapi_ioreq_cb;
1434 		vr->io_req.br_param = vr;
1435 		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_list);
1436 	}
1437 }
1438 
1439 static void
1440 pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1441 {
1442 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1443 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1444 	struct ahci_port *p = &sc->port[port];
1445 
1446 	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1447 		port, offset, value);
1448 
1449 	switch (offset) {
1450 	case AHCI_P_CLB:
1451 		p->clb = value;
1452 		break;
1453 	case AHCI_P_CLBU:
1454 		p->clbu = value;
1455 		break;
1456 	case AHCI_P_FB:
1457 		p->fb = value;
1458 		break;
1459 	case AHCI_P_FBU:
1460 		p->fbu = value;
1461 		break;
1462 	case AHCI_P_IS:
1463 		p->is &= ~value;
1464 		break;
1465 	case AHCI_P_IE:
1466 		p->ie = value & 0xFDC000FF;
1467 		ahci_generate_intr(sc);
1468 		break;
1469 	case AHCI_P_CMD:
1470 	{
1471 		p->cmd = value;
1472 
1473 		if (!(value & AHCI_P_CMD_ST)) {
1474 			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
1475 			p->ci = 0;
1476 			p->sact = 0;
1477 		} else {
1478 			uint64_t clb;
1479 
1480 			p->cmd |= AHCI_P_CMD_CR;
1481 			clb = (uint64_t)p->clbu << 32 | p->clb;
1482 			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
1483 					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
1484 		}
1485 
1486 		if (value & AHCI_P_CMD_FRE) {
1487 			uint64_t fb;
1488 
1489 			p->cmd |= AHCI_P_CMD_FR;
1490 			fb = (uint64_t)p->fbu << 32 | p->fb;
1491 			/* we don't support FBSCP, so rfis size is 256Bytes */
1492 			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
1493 		} else {
1494 			p->cmd &= ~AHCI_P_CMD_FR;
1495 		}
1496 
1497 		if (value & AHCI_P_CMD_CLO) {
1498 			p->tfd = 0;
1499 			p->cmd &= ~AHCI_P_CMD_CLO;
1500 		}
1501 
1502 		ahci_handle_port(p);
1503 		break;
1504 	}
1505 	case AHCI_P_TFD:
1506 	case AHCI_P_SIG:
1507 	case AHCI_P_SSTS:
1508 		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
1509 		break;
1510 	case AHCI_P_SCTL:
1511 		if (!(p->cmd & AHCI_P_CMD_ST)) {
1512 			if (value & ATA_SC_DET_RESET)
1513 				ahci_port_reset(p);
1514 			p->sctl = value;
1515 		}
1516 		break;
1517 	case AHCI_P_SERR:
1518 		p->serr &= ~value;
1519 		break;
1520 	case AHCI_P_SACT:
1521 		p->sact |= value;
1522 		break;
1523 	case AHCI_P_CI:
1524 		p->ci |= value;
1525 		ahci_handle_port(p);
1526 		break;
1527 	case AHCI_P_SNTF:
1528 	case AHCI_P_FBS:
1529 	default:
1530 		break;
1531 	}
1532 }
1533 
1534 static void
1535 pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1536 {
1537 	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1538 		offset, value);
1539 
1540 	switch (offset) {
1541 	case AHCI_CAP:
1542 	case AHCI_PI:
1543 	case AHCI_VS:
1544 	case AHCI_CAP2:
1545 		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
1546 		break;
1547 	case AHCI_GHC:
1548 		if (value & AHCI_GHC_HR)
1549 			ahci_reset(sc);
1550 		else if (value & AHCI_GHC_IE) {
1551 			sc->ghc |= AHCI_GHC_IE;
1552 			ahci_generate_intr(sc);
1553 		}
1554 		break;
1555 	case AHCI_IS:
1556 		sc->is &= ~value;
1557 		ahci_generate_intr(sc);
1558 		break;
1559 	default:
1560 		break;
1561 	}
1562 }
1563 
1564 static void
1565 pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
1566 		int baridx, uint64_t offset, int size, uint64_t value)
1567 {
1568 	struct pci_ahci_softc *sc = pi->pi_arg;
1569 
1570 	assert(baridx == 5);
1571 	assert(size == 4);
1572 
1573 	pthread_mutex_lock(&sc->mtx);
1574 
1575 	if (offset < AHCI_OFFSET)
1576 		pci_ahci_host_write(sc, offset, value);
1577 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
1578 		pci_ahci_port_write(sc, offset, value);
1579 	else
1580 		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
1581 
1582 	pthread_mutex_unlock(&sc->mtx);
1583 }
1584 
1585 static uint64_t
1586 pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
1587 {
1588 	uint32_t value;
1589 
1590 	switch (offset) {
1591 	case AHCI_CAP:
1592 	case AHCI_GHC:
1593 	case AHCI_IS:
1594 	case AHCI_PI:
1595 	case AHCI_VS:
1596 	case AHCI_CCCC:
1597 	case AHCI_CCCP:
1598 	case AHCI_EM_LOC:
1599 	case AHCI_EM_CTL:
1600 	case AHCI_CAP2:
1601 	{
1602 		uint32_t *p = &sc->cap;
1603 		p += (offset - AHCI_CAP) / sizeof(uint32_t);
1604 		value = *p;
1605 		break;
1606 	}
1607 	default:
1608 		value = 0;
1609 		break;
1610 	}
1611 	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
1612 		offset, value);
1613 
1614 	return (value);
1615 }
1616 
1617 static uint64_t
1618 pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
1619 {
1620 	uint32_t value;
1621 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1622 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1623 
1624 	switch (offset) {
1625 	case AHCI_P_CLB:
1626 	case AHCI_P_CLBU:
1627 	case AHCI_P_FB:
1628 	case AHCI_P_FBU:
1629 	case AHCI_P_IS:
1630 	case AHCI_P_IE:
1631 	case AHCI_P_CMD:
1632 	case AHCI_P_TFD:
1633 	case AHCI_P_SIG:
1634 	case AHCI_P_SSTS:
1635 	case AHCI_P_SCTL:
1636 	case AHCI_P_SERR:
1637 	case AHCI_P_SACT:
1638 	case AHCI_P_CI:
1639 	case AHCI_P_SNTF:
1640 	case AHCI_P_FBS:
1641 	{
1642 		uint32_t *p= &sc->port[port].clb;
1643 		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
1644 		value = *p;
1645 		break;
1646 	}
1647 	default:
1648 		value = 0;
1649 		break;
1650 	}
1651 
1652 	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
1653 		port, offset, value);
1654 
1655 	return value;
1656 }
1657 
1658 static uint64_t
1659 pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
1660     uint64_t offset, int size)
1661 {
1662 	struct pci_ahci_softc *sc = pi->pi_arg;
1663 	uint32_t value;
1664 
1665 	assert(baridx == 5);
1666 	assert(size == 4);
1667 
1668 	pthread_mutex_lock(&sc->mtx);
1669 
1670 	if (offset < AHCI_OFFSET)
1671 		value = pci_ahci_host_read(sc, offset);
1672 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
1673 		value = pci_ahci_port_read(sc, offset);
1674 	else {
1675 		value = 0;
1676 		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
1677 	}
1678 
1679 	pthread_mutex_unlock(&sc->mtx);
1680 
1681 	return (value);
1682 }
1683 
1684 static int
1685 pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
1686 {
1687 	char bident[sizeof("XX:X:X")];
1688 	struct blockif_ctxt *bctxt;
1689 	struct pci_ahci_softc *sc;
1690 	int ret, slots;
1691 
1692 	ret = 0;
1693 
1694 	if (opts == NULL) {
1695 		fprintf(stderr, "pci_ahci: backing device required\n");
1696 		return (1);
1697 	}
1698 
1699 #ifdef AHCI_DEBUG
1700 	dbg = fopen("/tmp/log", "w+");
1701 #endif
1702 
1703        	sc = malloc(sizeof(struct pci_ahci_softc));
1704 	memset(sc, 0, sizeof(struct pci_ahci_softc));
1705 	pi->pi_arg = sc;
1706 	sc->asc_pi = pi;
1707 	sc->ports = MAX_PORTS;
1708 
1709 	/*
1710 	 * Only use port 0 for a backing device. All other ports will be
1711 	 * marked as unused
1712 	 */
1713 	sc->port[0].atapi = atapi;
1714 
1715 	/*
1716 	 * Attempt to open the backing image. Use the PCI
1717 	 * slot/func for the identifier string.
1718 	 */
1719 	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
1720 	bctxt = blockif_open(opts, bident);
1721 	if (bctxt == NULL) {
1722 		ret = 1;
1723 		goto open_fail;
1724 	}
1725 	sc->port[0].bctx = bctxt;
1726 	sc->port[0].pr_sc = sc;
1727 
1728 	/*
1729 	 * Allocate blockif request structures and add them
1730 	 * to the free list
1731 	 */
1732 	pci_ahci_ioreq_init(&sc->port[0]);
1733 
1734 	pthread_mutex_init(&sc->mtx, NULL);
1735 
1736 	/* Intel ICH8 AHCI */
1737 	slots = sc->port[0].ioqsz;
1738 	if (slots > 32)
1739 		slots = 32;
1740 	--slots;
1741 	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
1742 	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
1743 	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
1744 	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
1745 	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
1746 
1747 	/* Only port 0 implemented */
1748 	sc->pi = 1;
1749 	sc->vs = 0x10300;
1750 	sc->cap2 = AHCI_CAP2_APST;
1751 	ahci_reset(sc);
1752 
1753 	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
1754 	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
1755 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
1756 	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
1757 	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
1758 	pci_emul_add_msicap(pi, 1);
1759 	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
1760 	    AHCI_OFFSET + sc->ports * AHCI_STEP);
1761 
1762 open_fail:
1763 	if (ret) {
1764 		blockif_close(sc->port[0].bctx);
1765 		free(sc);
1766 	}
1767 
1768 	return (ret);
1769 }
1770 
1771 static int
1772 pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
1773 {
1774 
1775 	return (pci_ahci_init(ctx, pi, opts, 0));
1776 }
1777 
1778 static int
1779 pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
1780 {
1781 
1782 	return (pci_ahci_init(ctx, pi, opts, 1));
1783 }
1784 
1785 /*
1786  * Use separate emulation names to distinguish drive and atapi devices
1787  */
1788 struct pci_devemu pci_de_ahci_hd = {
1789 	.pe_emu =	"ahci-hd",
1790 	.pe_init =	pci_ahci_hd_init,
1791 	.pe_barwrite =	pci_ahci_write,
1792 	.pe_barread =	pci_ahci_read
1793 };
1794 PCI_EMUL_SET(pci_de_ahci_hd);
1795 
1796 struct pci_devemu pci_de_ahci_cd = {
1797 	.pe_emu =	"ahci-cd",
1798 	.pe_init =	pci_ahci_atapi_init,
1799 	.pe_barwrite =	pci_ahci_write,
1800 	.pe_barread =	pci_ahci_read
1801 };
1802 PCI_EMUL_SET(pci_de_ahci_cd);
1803