xref: /freebsd/usr.sbin/bhyve/pci_ahci.c (revision d8b88105c2ccf7686552516877f541efb54fb6c8)
1 /*-
2  * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/linker_set.h>
34 #include <sys/stat.h>
35 #include <sys/uio.h>
36 #include <sys/ioctl.h>
37 #include <sys/disk.h>
38 #include <sys/ata.h>
39 #include <sys/endian.h>
40 
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <stdint.h>
46 #include <string.h>
47 #include <strings.h>
48 #include <unistd.h>
49 #include <assert.h>
50 #include <pthread.h>
51 #include <inttypes.h>
52 
53 #include "bhyverun.h"
54 #include "pci_emul.h"
55 #include "ahci.h"
56 #include "block_if.h"
57 
58 #define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
59 
60 #define	PxSIG_ATA	0x00000101 /* ATA drive */
61 #define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
62 
63 enum sata_fis_type {
64 	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
65 	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
66 	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
67 	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
68 	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
69 	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
70 	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
71 	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
72 };
73 
74 /*
75  * SCSI opcodes
76  */
77 #define	TEST_UNIT_READY		0x00
78 #define	REQUEST_SENSE		0x03
79 #define	INQUIRY			0x12
80 #define	START_STOP_UNIT		0x1B
81 #define	PREVENT_ALLOW		0x1E
82 #define	READ_CAPACITY		0x25
83 #define	READ_10			0x28
84 #define	POSITION_TO_ELEMENT	0x2B
85 #define	READ_TOC		0x43
86 #define	GET_EVENT_STATUS_NOTIFICATION 0x4A
87 #define	MODE_SENSE_10		0x5A
88 #define	READ_12			0xA8
89 #define	READ_CD			0xBE
90 
91 /*
92  * SCSI mode page codes
93  */
94 #define	MODEPAGE_RW_ERROR_RECOVERY	0x01
95 #define	MODEPAGE_CD_CAPABILITIES	0x2A
96 
97 /*
98  * Debug printf
99  */
100 #ifdef AHCI_DEBUG
101 static FILE *dbg;
102 #define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
103 #else
104 #define DPRINTF(format, arg...)
105 #endif
106 #define WPRINTF(format, arg...) printf(format, ##arg)
107 
108 struct ahci_ioreq {
109 	struct blockif_req io_req;
110 	struct ahci_port *io_pr;
111 	STAILQ_ENTRY(ahci_ioreq) io_list;
112 	uint8_t *cfis;
113 	uint32_t len;
114 	uint32_t done;
115 	int slot;
116 	int prdtl;
117 };
118 
119 struct ahci_port {
120 	struct blockif_ctxt *bctx;
121 	struct pci_ahci_softc *pr_sc;
122 	uint8_t *cmd_lst;
123 	uint8_t *rfis;
124 	int atapi;
125 	int reset;
126 	int mult_sectors;
127 	uint8_t xfermode;
128 	uint8_t sense_key;
129 	uint8_t asc;
130 
131 	uint32_t clb;
132 	uint32_t clbu;
133 	uint32_t fb;
134 	uint32_t fbu;
135 	uint32_t is;
136 	uint32_t ie;
137 	uint32_t cmd;
138 	uint32_t unused0;
139 	uint32_t tfd;
140 	uint32_t sig;
141 	uint32_t ssts;
142 	uint32_t sctl;
143 	uint32_t serr;
144 	uint32_t sact;
145 	uint32_t ci;
146 	uint32_t sntf;
147 	uint32_t fbs;
148 
149 	/*
150 	 * i/o request info
151 	 */
152 	struct ahci_ioreq *ioreq;
153 	int ioqsz;
154 	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
155 };
156 
157 struct ahci_cmd_hdr {
158 	uint16_t flags;
159 	uint16_t prdtl;
160 	uint32_t prdbc;
161 	uint64_t ctba;
162 	uint32_t reserved[4];
163 };
164 
165 struct ahci_prdt_entry {
166 	uint64_t dba;
167 	uint32_t reserved;
168 #define	DBCMASK		0x3fffff
169 	uint32_t dbc;
170 };
171 
172 struct pci_ahci_softc {
173 	struct pci_devinst *asc_pi;
174 	pthread_mutex_t	mtx;
175 	int ports;
176 	uint32_t cap;
177 	uint32_t ghc;
178 	uint32_t is;
179 	uint32_t pi;
180 	uint32_t vs;
181 	uint32_t ccc_ctl;
182 	uint32_t ccc_pts;
183 	uint32_t em_loc;
184 	uint32_t em_ctl;
185 	uint32_t cap2;
186 	uint32_t bohc;
187 	struct ahci_port port[MAX_PORTS];
188 };
189 #define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
190 
191 static inline void lba_to_msf(uint8_t *buf, int lba)
192 {
193 	lba += 150;
194 	buf[0] = (lba / 75) / 60;
195 	buf[1] = (lba / 75) % 60;
196 	buf[2] = lba % 75;
197 }
198 
199 /*
200  * generate HBA intr depending on whether or not ports within
201  * the controller have an interrupt pending.
202  */
203 static void
204 ahci_generate_intr(struct pci_ahci_softc *sc)
205 {
206 	int i;
207 
208 	for (i = 0; i < sc->ports; i++) {
209 		struct ahci_port *pr;
210 		pr = &sc->port[i];
211 		if (pr->is & pr->ie)
212 			sc->is |= (1 << i);
213 	}
214 
215 	DPRINTF("%s %x\n", __func__, sc->is);
216 
217 	if (sc->is && (sc->ghc & AHCI_GHC_IE))
218 		pci_generate_msi(sc->asc_pi, 0);
219 }
220 
221 static void
222 ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
223 {
224 	int offset, len, irq;
225 
226 	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
227 		return;
228 
229 	switch (ft) {
230 	case FIS_TYPE_REGD2H:
231 		offset = 0x40;
232 		len = 20;
233 		irq = AHCI_P_IX_DHR;
234 		break;
235 	case FIS_TYPE_SETDEVBITS:
236 		offset = 0x58;
237 		len = 8;
238 		irq = AHCI_P_IX_SDB;
239 		break;
240 	case FIS_TYPE_PIOSETUP:
241 		offset = 0x20;
242 		len = 20;
243 		irq = 0;
244 		break;
245 	default:
246 		WPRINTF("unsupported fis type %d\n", ft);
247 		return;
248 	}
249 	memcpy(p->rfis + offset, fis, len);
250 	if (irq) {
251 		p->is |= irq;
252 		ahci_generate_intr(p->pr_sc);
253 	}
254 }
255 
256 static void
257 ahci_write_fis_sdb(struct ahci_port *p, int slot, uint32_t tfd)
258 {
259 	uint8_t fis[8];
260 	uint8_t error;
261 
262 	error = (tfd >> 8) & 0xff;
263 	memset(fis, 0, sizeof(fis));
264 	fis[0] = error;
265 	fis[2] = tfd & 0x77;
266 	*(uint32_t *)(fis + 4) = (1 << slot);
267 	if (fis[2] & ATA_S_ERROR)
268 		p->is |= AHCI_P_IX_TFE;
269 	p->tfd = tfd;
270 	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
271 }
272 
273 static void
274 ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
275 {
276 	uint8_t fis[20];
277 	uint8_t error;
278 
279 	error = (tfd >> 8) & 0xff;
280 	memset(fis, 0, sizeof(fis));
281 	fis[0] = FIS_TYPE_REGD2H;
282 	fis[1] = (1 << 6);
283 	fis[2] = tfd & 0xff;
284 	fis[3] = error;
285 	fis[4] = cfis[4];
286 	fis[5] = cfis[5];
287 	fis[6] = cfis[6];
288 	fis[7] = cfis[7];
289 	fis[8] = cfis[8];
290 	fis[9] = cfis[9];
291 	fis[10] = cfis[10];
292 	fis[11] = cfis[11];
293 	fis[12] = cfis[12];
294 	fis[13] = cfis[13];
295 	if (fis[2] & ATA_S_ERROR)
296 		p->is |= AHCI_P_IX_TFE;
297 	p->tfd = tfd;
298 	p->ci &= ~(1 << slot);
299 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
300 }
301 
302 static void
303 ahci_write_reset_fis_d2h(struct ahci_port *p)
304 {
305 	uint8_t fis[20];
306 
307 	memset(fis, 0, sizeof(fis));
308 	fis[0] = FIS_TYPE_REGD2H;
309 	fis[3] = 1;
310 	fis[4] = 1;
311 	if (p->atapi) {
312 		fis[5] = 0x14;
313 		fis[6] = 0xeb;
314 	}
315 	fis[12] = 1;
316 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
317 }
318 
319 static void
320 ahci_port_reset(struct ahci_port *pr)
321 {
322 	pr->sctl = 0;
323 	pr->serr = 0;
324 	pr->sact = 0;
325 	pr->xfermode = ATA_UDMA6;
326 	pr->mult_sectors = 128;
327 
328 	if (!pr->bctx) {
329 		pr->ssts = ATA_SS_DET_NO_DEVICE;
330 		pr->sig = 0xFFFFFFFF;
331 		pr->tfd = 0x7F;
332 		return;
333 	}
334 	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_SPD_GEN2 |
335 		ATA_SS_IPM_ACTIVE;
336 	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
337 	if (!pr->atapi) {
338 		pr->sig = PxSIG_ATA;
339 		pr->tfd |= ATA_S_READY;
340 	} else
341 		pr->sig = PxSIG_ATAPI;
342 	ahci_write_reset_fis_d2h(pr);
343 }
344 
345 static void
346 ahci_reset(struct pci_ahci_softc *sc)
347 {
348 	int i;
349 
350 	sc->ghc = AHCI_GHC_AE;
351 	sc->is = 0;
352 	for (i = 0; i < sc->ports; i++) {
353 		sc->port[i].ie = 0;
354 		sc->port[i].is = 0;
355 		ahci_port_reset(&sc->port[i]);
356 	}
357 }
358 
359 static void
360 ata_string(uint8_t *dest, const char *src, int len)
361 {
362 	int i;
363 
364 	for (i = 0; i < len; i++) {
365 		if (*src)
366 			dest[i ^ 1] = *src++;
367 		else
368 			dest[i ^ 1] = ' ';
369 	}
370 }
371 
372 static void
373 atapi_string(uint8_t *dest, const char *src, int len)
374 {
375 	int i;
376 
377 	for (i = 0; i < len; i++) {
378 		if (*src)
379 			dest[i] = *src++;
380 		else
381 			dest[i] = ' ';
382 	}
383 }
384 
385 static void
386 ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done,
387     int seek)
388 {
389 	struct ahci_ioreq *aior;
390 	struct blockif_req *breq;
391 	struct pci_ahci_softc *sc;
392 	struct ahci_prdt_entry *prdt;
393 	struct ahci_cmd_hdr *hdr;
394 	uint64_t lba;
395 	uint32_t len;
396 	int i, err, iovcnt, ncq, readop;
397 
398 	sc = p->pr_sc;
399 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
400 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
401 	ncq = 0;
402 	readop = 1;
403 
404 	prdt += seek;
405 	if (cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
406 			cfis[2] == ATA_WRITE_FPDMA_QUEUED)
407 		readop = 0;
408 
409 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
410 			cfis[2] == ATA_READ_FPDMA_QUEUED) {
411 		lba = ((uint64_t)cfis[10] << 40) |
412 			((uint64_t)cfis[9] << 32) |
413 			((uint64_t)cfis[8] << 24) |
414 			((uint64_t)cfis[6] << 16) |
415 			((uint64_t)cfis[5] << 8) |
416 			cfis[4];
417 		len = cfis[11] << 8 | cfis[3];
418 		if (!len)
419 			len = 65536;
420 		ncq = 1;
421 	} else if (cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
422 		lba = ((uint64_t)cfis[10] << 40) |
423 			((uint64_t)cfis[9] << 32) |
424 			((uint64_t)cfis[8] << 24) |
425 			((uint64_t)cfis[6] << 16) |
426 			((uint64_t)cfis[5] << 8) |
427 			cfis[4];
428 		len = cfis[13] << 8 | cfis[12];
429 		if (!len)
430 			len = 65536;
431 	} else {
432 		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
433 			(cfis[5] << 8) | cfis[4];
434 		len = cfis[12];
435 		if (!len)
436 			len = 256;
437 	}
438 	lba *= blockif_sectsz(p->bctx);
439 	len *= blockif_sectsz(p->bctx);
440 
441 	/*
442 	 * Pull request off free list
443 	 */
444 	aior = STAILQ_FIRST(&p->iofhd);
445 	assert(aior != NULL);
446 	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
447 	aior->cfis = cfis;
448 	aior->slot = slot;
449 	aior->len = len;
450 	aior->done = done;
451 	breq = &aior->io_req;
452 	breq->br_offset = lba + done;
453 	iovcnt = hdr->prdtl - seek;
454 	if (iovcnt > BLOCKIF_IOV_MAX) {
455 		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
456 		iovcnt = BLOCKIF_IOV_MAX;
457 	} else
458 		aior->prdtl = 0;
459 	breq->br_iovcnt = iovcnt;
460 
461 	/*
462 	 * Build up the iovec based on the prdt
463 	 */
464 	for (i = 0; i < iovcnt; i++) {
465 		uint32_t dbcsz;
466 
467 		dbcsz = (prdt->dbc & DBCMASK) + 1;
468 		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
469 		    prdt->dba, dbcsz);
470 		breq->br_iov[i].iov_len = dbcsz;
471 		aior->done += dbcsz;
472 		prdt++;
473 	}
474 	if (readop)
475 		err = blockif_read(p->bctx, breq);
476 	else
477 		err = blockif_write(p->bctx, breq);
478 	assert(err == 0);
479 
480 	if (!aior->prdtl && ncq)
481 		p->ci &= ~(1 << slot);
482 }
483 
484 static void
485 ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
486 {
487 	struct ahci_ioreq *aior;
488 	struct blockif_req *breq;
489 	int err;
490 
491 	/*
492 	 * Pull request off free list
493 	 */
494 	aior = STAILQ_FIRST(&p->iofhd);
495 	assert(aior != NULL);
496 	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
497 	aior->cfis = cfis;
498 	aior->slot = slot;
499 	aior->len = 0;
500 	aior->done = 0;
501 	aior->prdtl = 0;
502 	breq = &aior->io_req;
503 
504 	err = blockif_flush(p->bctx, breq);
505 	assert(err == 0);
506 }
507 
508 static inline void
509 write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
510 		void *buf, int size)
511 {
512 	struct ahci_cmd_hdr *hdr;
513 	struct ahci_prdt_entry *prdt;
514 	void *from;
515 	int i, len;
516 
517 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
518 	len = size;
519 	from = buf;
520 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
521 	for (i = 0; i < hdr->prdtl && len; i++) {
522 		uint8_t *ptr;
523 		uint32_t dbcsz;
524 
525 		dbcsz = (prdt->dbc & DBCMASK) + 1;
526 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
527 		memcpy(ptr, from, dbcsz);
528 		len -= dbcsz;
529 		from += dbcsz;
530 		prdt++;
531 	}
532 	hdr->prdbc = size - len;
533 }
534 
535 static void
536 handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
537 {
538 	struct ahci_cmd_hdr *hdr;
539 
540 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
541 	if (p->atapi || hdr->prdtl == 0) {
542 		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
543 		p->is |= AHCI_P_IX_TFE;
544 	} else {
545 		uint16_t buf[256];
546 		uint64_t sectors;
547 
548 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
549 		memset(buf, 0, sizeof(buf));
550 		buf[0] = 0x0040;
551 		/* TODO emulate different serial? */
552 		ata_string((uint8_t *)(buf+10), "123456", 20);
553 		ata_string((uint8_t *)(buf+23), "001", 8);
554 		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
555 		buf[47] = (0x8000 | 128);
556 		buf[48] = 0x1;
557 		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
558 		buf[50] = (1 << 14);
559 		buf[53] = (1 << 1 | 1 << 2);
560 		if (p->mult_sectors)
561 			buf[59] = (0x100 | p->mult_sectors);
562 		buf[60] = sectors;
563 		buf[61] = (sectors >> 16);
564 		buf[63] = 0x7;
565 		if (p->xfermode & ATA_WDMA0)
566 			buf[63] |= (1 << ((p->xfermode & 7) + 8));
567 		buf[64] = 0x3;
568 		buf[65] = 100;
569 		buf[66] = 100;
570 		buf[67] = 100;
571 		buf[68] = 100;
572 		buf[75] = 31;
573 		buf[76] = (1 << 8 | 1 << 2);
574 		buf[80] = 0x1f0;
575 		buf[81] = 0x28;
576 		buf[82] = (1 << 5 | 1 << 14);
577 		buf[83] = (1 << 10 | 1 << 12 | 1 << 13 | 1 << 14);
578 		buf[84] = (1 << 14);
579 		buf[85] = (1 << 5 | 1 << 14);
580 		buf[86] = (1 << 10 | 1 << 12 | 1 << 13);
581 		buf[87] = (1 << 14);
582 		buf[88] = 0x7f;
583 		if (p->xfermode & ATA_UDMA0)
584 			buf[88] |= (1 << ((p->xfermode & 7) + 8));
585 		buf[93] = (1 | 1 <<14);
586 		buf[100] = sectors;
587 		buf[101] = (sectors >> 16);
588 		buf[102] = (sectors >> 32);
589 		buf[103] = (sectors >> 48);
590 		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
591 		p->tfd = ATA_S_DSC | ATA_S_READY;
592 		p->is |= AHCI_P_IX_DP;
593 	}
594 	p->ci &= ~(1 << slot);
595 	ahci_generate_intr(p->pr_sc);
596 }
597 
598 static void
599 handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
600 {
601 	if (!p->atapi) {
602 		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
603 		p->is |= AHCI_P_IX_TFE;
604 	} else {
605 		uint16_t buf[256];
606 
607 		memset(buf, 0, sizeof(buf));
608 		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
609 		/* TODO emulate different serial? */
610 		ata_string((uint8_t *)(buf+10), "123456", 20);
611 		ata_string((uint8_t *)(buf+23), "001", 8);
612 		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
613 		buf[49] = (1 << 9 | 1 << 8);
614 		buf[50] = (1 << 14 | 1);
615 		buf[53] = (1 << 2 | 1 << 1);
616 		buf[62] = 0x3f;
617 		buf[63] = 7;
618 		buf[64] = 3;
619 		buf[65] = 100;
620 		buf[66] = 100;
621 		buf[67] = 100;
622 		buf[68] = 100;
623 		buf[76] = (1 << 2 | 1 << 1);
624 		buf[78] = (1 << 5);
625 		buf[80] = (0x1f << 4);
626 		buf[82] = (1 << 4);
627 		buf[83] = (1 << 14);
628 		buf[84] = (1 << 14);
629 		buf[85] = (1 << 4);
630 		buf[87] = (1 << 14);
631 		buf[88] = (1 << 14 | 0x7f);
632 		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
633 		p->tfd = ATA_S_DSC | ATA_S_READY;
634 		p->is |= AHCI_P_IX_DHR;
635 	}
636 	p->ci &= ~(1 << slot);
637 	ahci_generate_intr(p->pr_sc);
638 }
639 
640 static void
641 atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
642 {
643 	uint8_t buf[36];
644 	uint8_t *acmd;
645 	int len;
646 
647 	acmd = cfis + 0x40;
648 
649 	buf[0] = 0x05;
650 	buf[1] = 0x80;
651 	buf[2] = 0x00;
652 	buf[3] = 0x21;
653 	buf[4] = 31;
654 	buf[5] = 0;
655 	buf[6] = 0;
656 	buf[7] = 0;
657 	atapi_string(buf + 8, "BHYVE", 8);
658 	atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
659 	atapi_string(buf + 32, "001", 4);
660 
661 	len = sizeof(buf);
662 	if (len > acmd[4])
663 		len = acmd[4];
664 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
665 	write_prdt(p, slot, cfis, buf, len);
666 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
667 }
668 
669 static void
670 atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
671 {
672 	uint8_t buf[8];
673 	uint64_t sectors;
674 
675 	sectors = blockif_size(p->bctx) / 2048;
676 	be32enc(buf, sectors - 1);
677 	be32enc(buf + 4, 2048);
678 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
679 	write_prdt(p, slot, cfis, buf, sizeof(buf));
680 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
681 }
682 
683 static void
684 atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
685 {
686 	uint8_t *acmd;
687 	uint8_t format;
688 	int len;
689 
690 	acmd = cfis + 0x40;
691 
692 	len = be16dec(acmd + 7);
693 	format = acmd[9] >> 6;
694 	switch (format) {
695 	case 0:
696 	{
697 		int msf, size;
698 		uint64_t sectors;
699 		uint8_t start_track, buf[20], *bp;
700 
701 		msf = (acmd[1] >> 1) & 1;
702 		start_track = acmd[6];
703 		if (start_track > 1 && start_track != 0xaa) {
704 			uint32_t tfd;
705 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
706 			p->asc = 0x24;
707 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
708 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
709 			ahci_write_fis_d2h(p, slot, cfis, tfd);
710 			return;
711 		}
712 		bp = buf + 2;
713 		*bp++ = 1;
714 		*bp++ = 1;
715 		if (start_track <= 1) {
716 			*bp++ = 0;
717 			*bp++ = 0x14;
718 			*bp++ = 1;
719 			*bp++ = 0;
720 			if (msf) {
721 				*bp++ = 0;
722 				lba_to_msf(bp, 0);
723 				bp += 3;
724 			} else {
725 				*bp++ = 0;
726 				*bp++ = 0;
727 				*bp++ = 0;
728 				*bp++ = 0;
729 			}
730 		}
731 		*bp++ = 0;
732 		*bp++ = 0x14;
733 		*bp++ = 0xaa;
734 		*bp++ = 0;
735 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
736 		sectors >>= 2;
737 		if (msf) {
738 			*bp++ = 0;
739 			lba_to_msf(bp, sectors);
740 			bp += 3;
741 		} else {
742 			be32enc(bp, sectors);
743 			bp += 4;
744 		}
745 		size = bp - buf;
746 		be16enc(buf, size - 2);
747 		if (len > size)
748 			len = size;
749 		write_prdt(p, slot, cfis, buf, len);
750 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
751 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
752 		break;
753 	}
754 	case 1:
755 	{
756 		uint8_t buf[12];
757 
758 		memset(buf, 0, sizeof(buf));
759 		buf[1] = 0xa;
760 		buf[2] = 0x1;
761 		buf[3] = 0x1;
762 		if (len > sizeof(buf))
763 			len = sizeof(buf);
764 		write_prdt(p, slot, cfis, buf, len);
765 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
766 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
767 		break;
768 	}
769 	case 2:
770 	{
771 		int msf, size;
772 		uint64_t sectors;
773 		uint8_t start_track, *bp, buf[50];
774 
775 		msf = (acmd[1] >> 1) & 1;
776 		start_track = acmd[6];
777 		bp = buf + 2;
778 		*bp++ = 1;
779 		*bp++ = 1;
780 
781 		*bp++ = 1;
782 		*bp++ = 0x14;
783 		*bp++ = 0;
784 		*bp++ = 0xa0;
785 		*bp++ = 0;
786 		*bp++ = 0;
787 		*bp++ = 0;
788 		*bp++ = 0;
789 		*bp++ = 1;
790 		*bp++ = 0;
791 		*bp++ = 0;
792 
793 		*bp++ = 1;
794 		*bp++ = 0x14;
795 		*bp++ = 0;
796 		*bp++ = 0xa1;
797 		*bp++ = 0;
798 		*bp++ = 0;
799 		*bp++ = 0;
800 		*bp++ = 0;
801 		*bp++ = 1;
802 		*bp++ = 0;
803 		*bp++ = 0;
804 
805 		*bp++ = 1;
806 		*bp++ = 0x14;
807 		*bp++ = 0;
808 		*bp++ = 0xa2;
809 		*bp++ = 0;
810 		*bp++ = 0;
811 		*bp++ = 0;
812 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
813 		sectors >>= 2;
814 		if (msf) {
815 			*bp++ = 0;
816 			lba_to_msf(bp, sectors);
817 			bp += 3;
818 		} else {
819 			be32enc(bp, sectors);
820 			bp += 4;
821 		}
822 
823 		*bp++ = 1;
824 		*bp++ = 0x14;
825 		*bp++ = 0;
826 		*bp++ = 1;
827 		*bp++ = 0;
828 		*bp++ = 0;
829 		*bp++ = 0;
830 		if (msf) {
831 			*bp++ = 0;
832 			lba_to_msf(bp, 0);
833 			bp += 3;
834 		} else {
835 			*bp++ = 0;
836 			*bp++ = 0;
837 			*bp++ = 0;
838 			*bp++ = 0;
839 		}
840 
841 		size = bp - buf;
842 		be16enc(buf, size - 2);
843 		if (len > size)
844 			len = size;
845 		write_prdt(p, slot, cfis, buf, len);
846 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
847 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
848 		break;
849 	}
850 	default:
851 	{
852 		uint32_t tfd;
853 
854 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
855 		p->asc = 0x24;
856 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
857 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
858 		ahci_write_fis_d2h(p, slot, cfis, tfd);
859 		break;
860 	}
861 	}
862 }
863 
864 static void
865 atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
866 		uint32_t done, int seek)
867 {
868 	struct ahci_ioreq *aior;
869 	struct ahci_cmd_hdr *hdr;
870 	struct ahci_prdt_entry *prdt;
871 	struct blockif_req *breq;
872 	struct pci_ahci_softc *sc;
873 	uint8_t *acmd;
874 	uint64_t lba;
875 	uint32_t len;
876 	int i, err, iovcnt;
877 
878 	sc = p->pr_sc;
879 	acmd = cfis + 0x40;
880 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
881 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
882 
883 	prdt += seek;
884 	lba = be32dec(acmd + 2);
885 	if (acmd[0] == READ_10)
886 		len = be16dec(acmd + 7);
887 	else
888 		len = be32dec(acmd + 6);
889 	if (len == 0) {
890 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
891 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
892 	}
893 	lba *= 2048;
894 	len *= 2048;
895 
896 	/*
897 	 * Pull request off free list
898 	 */
899 	aior = STAILQ_FIRST(&p->iofhd);
900 	assert(aior != NULL);
901 	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
902 	aior->cfis = cfis;
903 	aior->slot = slot;
904 	aior->len = len;
905 	aior->done = done;
906 	breq = &aior->io_req;
907 	breq->br_offset = lba + done;
908 	iovcnt = hdr->prdtl - seek;
909 	if (iovcnt > BLOCKIF_IOV_MAX) {
910 		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
911 		iovcnt = BLOCKIF_IOV_MAX;
912 	} else
913 		aior->prdtl = 0;
914 	breq->br_iovcnt = iovcnt;
915 
916 	/*
917 	 * Build up the iovec based on the prdt
918 	 */
919 	for (i = 0; i < iovcnt; i++) {
920 		uint32_t dbcsz;
921 
922 		dbcsz = (prdt->dbc & DBCMASK) + 1;
923 		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
924 		    prdt->dba, dbcsz);
925 		breq->br_iov[i].iov_len = dbcsz;
926 		aior->done += dbcsz;
927 		prdt++;
928 	}
929 	err = blockif_read(p->bctx, breq);
930 	assert(err == 0);
931 }
932 
933 static void
934 atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
935 {
936 	uint8_t buf[64];
937 	uint8_t *acmd;
938 	int len;
939 
940 	acmd = cfis + 0x40;
941 	len = acmd[4];
942 	if (len > sizeof(buf))
943 		len = sizeof(buf);
944 	memset(buf, 0, len);
945 	buf[0] = 0x70 | (1 << 7);
946 	buf[2] = p->sense_key;
947 	buf[7] = 10;
948 	buf[12] = p->asc;
949 	write_prdt(p, slot, cfis, buf, len);
950 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
951 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
952 }
953 
954 static void
955 atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
956 {
957 	uint8_t *acmd = cfis + 0x40;
958 	uint32_t tfd;
959 
960 	switch (acmd[4] & 3) {
961 	case 0:
962 	case 1:
963 	case 3:
964 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
965 		tfd = ATA_S_READY | ATA_S_DSC;
966 		break;
967 	case 2:
968 		/* TODO eject media */
969 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
970 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
971 		p->asc = 0x53;
972 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
973 		break;
974 	}
975 	ahci_write_fis_d2h(p, slot, cfis, tfd);
976 }
977 
978 static void
979 atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
980 {
981 	uint8_t *acmd;
982 	uint32_t tfd;
983 	uint8_t pc, code;
984 	int len;
985 
986 	acmd = cfis + 0x40;
987 	len = be16dec(acmd + 7);
988 	pc = acmd[2] >> 6;
989 	code = acmd[2] & 0x3f;
990 
991 	switch (pc) {
992 	case 0:
993 		switch (code) {
994 		case MODEPAGE_RW_ERROR_RECOVERY:
995 		{
996 			uint8_t buf[16];
997 
998 			if (len > sizeof(buf))
999 				len = sizeof(buf);
1000 
1001 			memset(buf, 0, sizeof(buf));
1002 			be16enc(buf, 16 - 2);
1003 			buf[2] = 0x70;
1004 			buf[8] = 0x01;
1005 			buf[9] = 16 - 10;
1006 			buf[11] = 0x05;
1007 			write_prdt(p, slot, cfis, buf, len);
1008 			tfd = ATA_S_READY | ATA_S_DSC;
1009 			break;
1010 		}
1011 		case MODEPAGE_CD_CAPABILITIES:
1012 		{
1013 			uint8_t buf[30];
1014 
1015 			if (len > sizeof(buf))
1016 				len = sizeof(buf);
1017 
1018 			memset(buf, 0, sizeof(buf));
1019 			be16enc(buf, 30 - 2);
1020 			buf[2] = 0x70;
1021 			buf[8] = 0x2A;
1022 			buf[9] = 30 - 10;
1023 			buf[10] = 0x08;
1024 			buf[12] = 0x71;
1025 			be16enc(&buf[18], 2);
1026 			be16enc(&buf[20], 512);
1027 			write_prdt(p, slot, cfis, buf, len);
1028 			tfd = ATA_S_READY | ATA_S_DSC;
1029 			break;
1030 		}
1031 		default:
1032 			goto error;
1033 			break;
1034 		}
1035 		break;
1036 	case 3:
1037 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1038 		p->asc = 0x39;
1039 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1040 		break;
1041 error:
1042 	case 1:
1043 	case 2:
1044 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1045 		p->asc = 0x24;
1046 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1047 		break;
1048 	}
1049 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1050 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1051 }
1052 
1053 static void
1054 atapi_get_event_status_notification(struct ahci_port *p, int slot,
1055     uint8_t *cfis)
1056 {
1057 	uint8_t *acmd;
1058 	uint32_t tfd;
1059 
1060 	acmd = cfis + 0x40;
1061 
1062 	/* we don't support asynchronous operation */
1063 	if (!(acmd[1] & 1)) {
1064 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1065 		p->asc = 0x24;
1066 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1067 	} else {
1068 		uint8_t buf[8];
1069 		int len;
1070 
1071 		len = be16dec(acmd + 7);
1072 		if (len > sizeof(buf))
1073 			len = sizeof(buf);
1074 
1075 		memset(buf, 0, sizeof(buf));
1076 		be16enc(buf, 8 - 2);
1077 		buf[2] = 0x04;
1078 		buf[3] = 0x10;
1079 		buf[5] = 0x02;
1080 		write_prdt(p, slot, cfis, buf, len);
1081 		tfd = ATA_S_READY | ATA_S_DSC;
1082 	}
1083 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1084 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1085 }
1086 
1087 static void
1088 handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1089 {
1090 	uint8_t *acmd;
1091 
1092 	acmd = cfis + 0x40;
1093 
1094 #ifdef AHCI_DEBUG
1095 	{
1096 		int i;
1097 		DPRINTF("ACMD:");
1098 		for (i = 0; i < 16; i++)
1099 			DPRINTF("%02x ", acmd[i]);
1100 		DPRINTF("\n");
1101 	}
1102 #endif
1103 
1104 	switch (acmd[0]) {
1105 	case TEST_UNIT_READY:
1106 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1107 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1108 		break;
1109 	case INQUIRY:
1110 		atapi_inquiry(p, slot, cfis);
1111 		break;
1112 	case READ_CAPACITY:
1113 		atapi_read_capacity(p, slot, cfis);
1114 		break;
1115 	case PREVENT_ALLOW:
1116 		/* TODO */
1117 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1118 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1119 		break;
1120 	case READ_TOC:
1121 		atapi_read_toc(p, slot, cfis);
1122 		break;
1123 	case READ_10:
1124 	case READ_12:
1125 		atapi_read(p, slot, cfis, 0, 0);
1126 		break;
1127 	case REQUEST_SENSE:
1128 		atapi_request_sense(p, slot, cfis);
1129 		break;
1130 	case START_STOP_UNIT:
1131 		atapi_start_stop_unit(p, slot, cfis);
1132 		break;
1133 	case MODE_SENSE_10:
1134 		atapi_mode_sense(p, slot, cfis);
1135 		break;
1136 	case GET_EVENT_STATUS_NOTIFICATION:
1137 		atapi_get_event_status_notification(p, slot, cfis);
1138 		break;
1139 	default:
1140 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1141 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1142 		p->asc = 0x20;
1143 		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1144 				ATA_S_READY | ATA_S_ERROR);
1145 		break;
1146 	}
1147 }
1148 
1149 static void
1150 ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1151 {
1152 
1153 	switch (cfis[2]) {
1154 	case ATA_ATA_IDENTIFY:
1155 		handle_identify(p, slot, cfis);
1156 		break;
1157 	case ATA_SETFEATURES:
1158 	{
1159 		switch (cfis[3]) {
1160 		case ATA_SF_ENAB_WCACHE:
1161 		case ATA_SF_DIS_WCACHE:
1162 		case ATA_SF_ENAB_RCACHE:
1163 		case ATA_SF_DIS_RCACHE:
1164 			p->tfd = ATA_S_DSC | ATA_S_READY;
1165 			break;
1166 		case ATA_SF_SETXFER:
1167 		{
1168 			switch (cfis[12] & 0xf8) {
1169 			case ATA_PIO:
1170 			case ATA_PIO0:
1171 				break;
1172 			case ATA_WDMA0:
1173 			case ATA_UDMA0:
1174 				p->xfermode = (cfis[12] & 0x7);
1175 				break;
1176 			}
1177 			p->tfd = ATA_S_DSC | ATA_S_READY;
1178 			break;
1179 		}
1180 		default:
1181 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1182 			p->tfd |= (ATA_ERROR_ABORT << 8);
1183 			break;
1184 		}
1185 		p->is |= AHCI_P_IX_DP;
1186 		p->ci &= ~(1 << slot);
1187 		ahci_generate_intr(p->pr_sc);
1188 		break;
1189 	}
1190 	case ATA_SET_MULTI:
1191 		if (cfis[12] != 0 &&
1192 			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1193 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1194 			p->tfd |= (ATA_ERROR_ABORT << 8);
1195 		} else {
1196 			p->mult_sectors = cfis[12];
1197 			p->tfd = ATA_S_DSC | ATA_S_READY;
1198 		}
1199 		p->is |= AHCI_P_IX_DP;
1200 		p->ci &= ~(1 << slot);
1201 		ahci_generate_intr(p->pr_sc);
1202 		break;
1203 	case ATA_READ_DMA:
1204 	case ATA_WRITE_DMA:
1205 	case ATA_READ_DMA48:
1206 	case ATA_WRITE_DMA48:
1207 	case ATA_READ_FPDMA_QUEUED:
1208 	case ATA_WRITE_FPDMA_QUEUED:
1209 		ahci_handle_dma(p, slot, cfis, 0, 0);
1210 		break;
1211 	case ATA_FLUSHCACHE:
1212 	case ATA_FLUSHCACHE48:
1213 		ahci_handle_flush(p, slot, cfis);
1214 		break;
1215 	case ATA_STANDBY_CMD:
1216 		break;
1217 	case ATA_NOP:
1218 	case ATA_STANDBY_IMMEDIATE:
1219 	case ATA_IDLE_IMMEDIATE:
1220 	case ATA_SLEEP:
1221 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1222 		break;
1223 	case ATA_ATAPI_IDENTIFY:
1224 		handle_atapi_identify(p, slot, cfis);
1225 		break;
1226 	case ATA_PACKET_CMD:
1227 		if (!p->atapi) {
1228 			p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1229 			p->is |= AHCI_P_IX_TFE;
1230 			p->ci &= ~(1 << slot);
1231 			ahci_generate_intr(p->pr_sc);
1232 		} else
1233 			handle_packet_cmd(p, slot, cfis);
1234 		break;
1235 	default:
1236 		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1237 		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1238 		p->is |= AHCI_P_IX_TFE;
1239 		p->ci &= ~(1 << slot);
1240 		ahci_generate_intr(p->pr_sc);
1241 		break;
1242 	}
1243 }
1244 
1245 static void
1246 ahci_handle_slot(struct ahci_port *p, int slot)
1247 {
1248 	struct ahci_cmd_hdr *hdr;
1249 	struct ahci_prdt_entry *prdt;
1250 	struct pci_ahci_softc *sc;
1251 	uint8_t *cfis;
1252 	int cfl;
1253 
1254 	sc = p->pr_sc;
1255 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1256 	cfl = (hdr->flags & 0x1f) * 4;
1257 	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1258 			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1259 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1260 
1261 #ifdef AHCI_DEBUG
1262 	DPRINTF("\ncfis:");
1263 	for (i = 0; i < cfl; i++) {
1264 		if (i % 10 == 0)
1265 			DPRINTF("\n");
1266 		DPRINTF("%02x ", cfis[i]);
1267 	}
1268 	DPRINTF("\n");
1269 
1270 	for (i = 0; i < hdr->prdtl; i++) {
1271 		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1272 		prdt++;
1273 	}
1274 #endif
1275 
1276 	if (cfis[0] != FIS_TYPE_REGH2D) {
1277 		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1278 		return;
1279 	}
1280 
1281 	if (cfis[1] & 0x80) {
1282 		ahci_handle_cmd(p, slot, cfis);
1283 	} else {
1284 		if (cfis[15] & (1 << 2))
1285 			p->reset = 1;
1286 		else if (p->reset) {
1287 			p->reset = 0;
1288 			ahci_port_reset(p);
1289 		}
1290 		p->ci &= ~(1 << slot);
1291 	}
1292 }
1293 
1294 static void
1295 ahci_handle_port(struct ahci_port *p)
1296 {
1297 	int i;
1298 
1299 	if (!(p->cmd & AHCI_P_CMD_ST))
1300 		return;
1301 
1302 	for (i = 0; (i < 32) && p->ci; i++) {
1303 		if (p->ci & (1 << i))
1304 			ahci_handle_slot(p, i);
1305 	}
1306 }
1307 
1308 /*
1309  * blockif callback routine - this runs in the context of the blockif
1310  * i/o thread, so the mutex needs to be acquired.
1311  */
1312 static void
1313 ata_ioreq_cb(struct blockif_req *br, int err)
1314 {
1315 	struct ahci_cmd_hdr *hdr;
1316 	struct ahci_ioreq *aior;
1317 	struct ahci_port *p;
1318 	struct pci_ahci_softc *sc;
1319 	uint32_t tfd;
1320 	uint8_t *cfis;
1321 	int pending, slot, ncq;
1322 
1323 	DPRINTF("%s %d\n", __func__, err);
1324 
1325 	ncq = 0;
1326 	aior = br->br_param;
1327 	p = aior->io_pr;
1328 	cfis = aior->cfis;
1329 	slot = aior->slot;
1330 	pending = aior->prdtl;
1331 	sc = p->pr_sc;
1332 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1333 
1334 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1335 			cfis[2] == ATA_READ_FPDMA_QUEUED)
1336 		ncq = 1;
1337 
1338 	pthread_mutex_lock(&sc->mtx);
1339 
1340 	/*
1341 	 * Move the blockif request back to the free list
1342 	 */
1343 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_list);
1344 
1345 	if (pending && !err) {
1346 		ahci_handle_dma(p, slot, cfis, aior->done,
1347 		    hdr->prdtl - pending);
1348 		goto out;
1349 	}
1350 
1351 	if (!err && aior->done == aior->len) {
1352 		tfd = ATA_S_READY | ATA_S_DSC;
1353 		if (ncq)
1354 			hdr->prdbc = 0;
1355 		else
1356 			hdr->prdbc = aior->len;
1357 	} else {
1358 		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1359 		hdr->prdbc = 0;
1360 		if (ncq)
1361 			p->serr |= (1 << slot);
1362 	}
1363 
1364 	if (ncq) {
1365 		p->sact &= ~(1 << slot);
1366 		ahci_write_fis_sdb(p, slot, tfd);
1367 	} else
1368 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1369 
1370 out:
1371 	pthread_mutex_unlock(&sc->mtx);
1372 	DPRINTF("%s exit\n", __func__);
1373 }
1374 
1375 static void
1376 atapi_ioreq_cb(struct blockif_req *br, int err)
1377 {
1378 	struct ahci_cmd_hdr *hdr;
1379 	struct ahci_ioreq *aior;
1380 	struct ahci_port *p;
1381 	struct pci_ahci_softc *sc;
1382 	uint8_t *cfis;
1383 	uint32_t tfd;
1384 	int pending, slot;
1385 
1386 	DPRINTF("%s %d\n", __func__, err);
1387 
1388 	aior = br->br_param;
1389 	p = aior->io_pr;
1390 	cfis = aior->cfis;
1391 	slot = aior->slot;
1392 	pending = aior->prdtl;
1393 	sc = p->pr_sc;
1394 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1395 
1396 	pthread_mutex_lock(&sc->mtx);
1397 
1398 	/*
1399 	 * Move the blockif request back to the free list
1400 	 */
1401 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_list);
1402 
1403 	if (pending && !err) {
1404 		atapi_read(p, slot, cfis, aior->done, hdr->prdtl - pending);
1405 		goto out;
1406 	}
1407 
1408 	if (!err && aior->done == aior->len) {
1409 		tfd = ATA_S_READY | ATA_S_DSC;
1410 		hdr->prdbc = aior->len;
1411 	} else {
1412 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1413 		p->asc = 0x21;
1414 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1415 		hdr->prdbc = 0;
1416 	}
1417 
1418 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1419 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1420 
1421 out:
1422 	pthread_mutex_unlock(&sc->mtx);
1423 	DPRINTF("%s exit\n", __func__);
1424 }
1425 
1426 static void
1427 pci_ahci_ioreq_init(struct ahci_port *pr)
1428 {
1429 	struct ahci_ioreq *vr;
1430 	int i;
1431 
1432 	pr->ioqsz = blockif_queuesz(pr->bctx);
1433 	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1434 	STAILQ_INIT(&pr->iofhd);
1435 
1436 	/*
1437 	 * Add all i/o request entries to the free queue
1438 	 */
1439 	for (i = 0; i < pr->ioqsz; i++) {
1440 		vr = &pr->ioreq[i];
1441 		vr->io_pr = pr;
1442 		if (!pr->atapi)
1443 			vr->io_req.br_callback = ata_ioreq_cb;
1444 		else
1445 			vr->io_req.br_callback = atapi_ioreq_cb;
1446 		vr->io_req.br_param = vr;
1447 		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_list);
1448 	}
1449 }
1450 
1451 static void
1452 pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1453 {
1454 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1455 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1456 	struct ahci_port *p = &sc->port[port];
1457 
1458 	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1459 		port, offset, value);
1460 
1461 	switch (offset) {
1462 	case AHCI_P_CLB:
1463 		p->clb = value;
1464 		break;
1465 	case AHCI_P_CLBU:
1466 		p->clbu = value;
1467 		break;
1468 	case AHCI_P_FB:
1469 		p->fb = value;
1470 		break;
1471 	case AHCI_P_FBU:
1472 		p->fbu = value;
1473 		break;
1474 	case AHCI_P_IS:
1475 		p->is &= ~value;
1476 		break;
1477 	case AHCI_P_IE:
1478 		p->ie = value & 0xFDC000FF;
1479 		ahci_generate_intr(sc);
1480 		break;
1481 	case AHCI_P_CMD:
1482 	{
1483 		p->cmd = value;
1484 
1485 		if (!(value & AHCI_P_CMD_ST)) {
1486 			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
1487 			p->ci = 0;
1488 			p->sact = 0;
1489 		} else {
1490 			uint64_t clb;
1491 
1492 			p->cmd |= AHCI_P_CMD_CR;
1493 			clb = (uint64_t)p->clbu << 32 | p->clb;
1494 			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
1495 					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
1496 		}
1497 
1498 		if (value & AHCI_P_CMD_FRE) {
1499 			uint64_t fb;
1500 
1501 			p->cmd |= AHCI_P_CMD_FR;
1502 			fb = (uint64_t)p->fbu << 32 | p->fb;
1503 			/* we don't support FBSCP, so rfis size is 256Bytes */
1504 			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
1505 		} else {
1506 			p->cmd &= ~AHCI_P_CMD_FR;
1507 		}
1508 
1509 		if (value & AHCI_P_CMD_CLO) {
1510 			p->tfd = 0;
1511 			p->cmd &= ~AHCI_P_CMD_CLO;
1512 		}
1513 
1514 		ahci_handle_port(p);
1515 		break;
1516 	}
1517 	case AHCI_P_TFD:
1518 	case AHCI_P_SIG:
1519 	case AHCI_P_SSTS:
1520 		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
1521 		break;
1522 	case AHCI_P_SCTL:
1523 		if (!(p->cmd & AHCI_P_CMD_ST)) {
1524 			if (value & ATA_SC_DET_RESET)
1525 				ahci_port_reset(p);
1526 			p->sctl = value;
1527 		}
1528 		break;
1529 	case AHCI_P_SERR:
1530 		p->serr &= ~value;
1531 		break;
1532 	case AHCI_P_SACT:
1533 		p->sact |= value;
1534 		break;
1535 	case AHCI_P_CI:
1536 		p->ci |= value;
1537 		ahci_handle_port(p);
1538 		break;
1539 	case AHCI_P_SNTF:
1540 	case AHCI_P_FBS:
1541 	default:
1542 		break;
1543 	}
1544 }
1545 
1546 static void
1547 pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1548 {
1549 	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1550 		offset, value);
1551 
1552 	switch (offset) {
1553 	case AHCI_CAP:
1554 	case AHCI_PI:
1555 	case AHCI_VS:
1556 	case AHCI_CAP2:
1557 		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
1558 		break;
1559 	case AHCI_GHC:
1560 		if (value & AHCI_GHC_HR)
1561 			ahci_reset(sc);
1562 		else if (value & AHCI_GHC_IE) {
1563 			sc->ghc |= AHCI_GHC_IE;
1564 			ahci_generate_intr(sc);
1565 		}
1566 		break;
1567 	case AHCI_IS:
1568 		sc->is &= ~value;
1569 		ahci_generate_intr(sc);
1570 		break;
1571 	default:
1572 		break;
1573 	}
1574 }
1575 
1576 static void
1577 pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
1578 		int baridx, uint64_t offset, int size, uint64_t value)
1579 {
1580 	struct pci_ahci_softc *sc = pi->pi_arg;
1581 
1582 	assert(baridx == 5);
1583 	assert(size == 4);
1584 
1585 	pthread_mutex_lock(&sc->mtx);
1586 
1587 	if (offset < AHCI_OFFSET)
1588 		pci_ahci_host_write(sc, offset, value);
1589 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
1590 		pci_ahci_port_write(sc, offset, value);
1591 	else
1592 		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
1593 
1594 	pthread_mutex_unlock(&sc->mtx);
1595 }
1596 
1597 static uint64_t
1598 pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
1599 {
1600 	uint32_t value;
1601 
1602 	switch (offset) {
1603 	case AHCI_CAP:
1604 	case AHCI_GHC:
1605 	case AHCI_IS:
1606 	case AHCI_PI:
1607 	case AHCI_VS:
1608 	case AHCI_CCCC:
1609 	case AHCI_CCCP:
1610 	case AHCI_EM_LOC:
1611 	case AHCI_EM_CTL:
1612 	case AHCI_CAP2:
1613 	{
1614 		uint32_t *p = &sc->cap;
1615 		p += (offset - AHCI_CAP) / sizeof(uint32_t);
1616 		value = *p;
1617 		break;
1618 	}
1619 	default:
1620 		value = 0;
1621 		break;
1622 	}
1623 	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
1624 		offset, value);
1625 
1626 	return (value);
1627 }
1628 
1629 static uint64_t
1630 pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
1631 {
1632 	uint32_t value;
1633 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1634 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1635 
1636 	switch (offset) {
1637 	case AHCI_P_CLB:
1638 	case AHCI_P_CLBU:
1639 	case AHCI_P_FB:
1640 	case AHCI_P_FBU:
1641 	case AHCI_P_IS:
1642 	case AHCI_P_IE:
1643 	case AHCI_P_CMD:
1644 	case AHCI_P_TFD:
1645 	case AHCI_P_SIG:
1646 	case AHCI_P_SSTS:
1647 	case AHCI_P_SCTL:
1648 	case AHCI_P_SERR:
1649 	case AHCI_P_SACT:
1650 	case AHCI_P_CI:
1651 	case AHCI_P_SNTF:
1652 	case AHCI_P_FBS:
1653 	{
1654 		uint32_t *p= &sc->port[port].clb;
1655 		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
1656 		value = *p;
1657 		break;
1658 	}
1659 	default:
1660 		value = 0;
1661 		break;
1662 	}
1663 
1664 	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
1665 		port, offset, value);
1666 
1667 	return value;
1668 }
1669 
1670 static uint64_t
1671 pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
1672     uint64_t offset, int size)
1673 {
1674 	struct pci_ahci_softc *sc = pi->pi_arg;
1675 	uint32_t value;
1676 
1677 	assert(baridx == 5);
1678 	assert(size == 4);
1679 
1680 	pthread_mutex_lock(&sc->mtx);
1681 
1682 	if (offset < AHCI_OFFSET)
1683 		value = pci_ahci_host_read(sc, offset);
1684 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
1685 		value = pci_ahci_port_read(sc, offset);
1686 	else {
1687 		value = 0;
1688 		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
1689 	}
1690 
1691 	pthread_mutex_unlock(&sc->mtx);
1692 
1693 	return (value);
1694 }
1695 
1696 static int
1697 pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
1698 {
1699 	char bident[sizeof("XX:X:X")];
1700 	struct blockif_ctxt *bctxt;
1701 	struct pci_ahci_softc *sc;
1702 	int ret, slots;
1703 
1704 	ret = 0;
1705 
1706 	if (opts == NULL) {
1707 		fprintf(stderr, "pci_ahci: backing device required\n");
1708 		return (1);
1709 	}
1710 
1711 #ifdef AHCI_DEBUG
1712 	dbg = fopen("/tmp/log", "w+");
1713 #endif
1714 
1715        	sc = malloc(sizeof(struct pci_ahci_softc));
1716 	memset(sc, 0, sizeof(struct pci_ahci_softc));
1717 	pi->pi_arg = sc;
1718 	sc->asc_pi = pi;
1719 	sc->ports = MAX_PORTS;
1720 
1721 	/*
1722 	 * Only use port 0 for a backing device. All other ports will be
1723 	 * marked as unused
1724 	 */
1725 	sc->port[0].atapi = atapi;
1726 
1727 	/*
1728 	 * Attempt to open the backing image. Use the PCI
1729 	 * slot/func for the identifier string.
1730 	 */
1731 	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
1732 	bctxt = blockif_open(opts, bident);
1733 	if (bctxt == NULL) {
1734 		ret = 1;
1735 		goto open_fail;
1736 	}
1737 	sc->port[0].bctx = bctxt;
1738 	sc->port[0].pr_sc = sc;
1739 
1740 	/*
1741 	 * Allocate blockif request structures and add them
1742 	 * to the free list
1743 	 */
1744 	pci_ahci_ioreq_init(&sc->port[0]);
1745 
1746 	pthread_mutex_init(&sc->mtx, NULL);
1747 
1748 	/* Intel ICH8 AHCI */
1749 	slots = sc->port[0].ioqsz;
1750 	if (slots > 32)
1751 		slots = 32;
1752 	--slots;
1753 	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
1754 	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
1755 	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
1756 	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
1757 	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
1758 
1759 	/* Only port 0 implemented */
1760 	sc->pi = 1;
1761 	sc->vs = 0x10300;
1762 	sc->cap2 = AHCI_CAP2_APST;
1763 	ahci_reset(sc);
1764 
1765 	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
1766 	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
1767 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
1768 	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
1769 	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
1770 	pci_emul_add_msicap(pi, 1);
1771 	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
1772 	    AHCI_OFFSET + sc->ports * AHCI_STEP);
1773 
1774 open_fail:
1775 	if (ret) {
1776 		blockif_close(sc->port[0].bctx);
1777 		free(sc);
1778 	}
1779 
1780 	return (ret);
1781 }
1782 
1783 static int
1784 pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
1785 {
1786 
1787 	return (pci_ahci_init(ctx, pi, opts, 0));
1788 }
1789 
1790 static int
1791 pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
1792 {
1793 
1794 	return (pci_ahci_init(ctx, pi, opts, 1));
1795 }
1796 
1797 /*
1798  * Use separate emulation names to distinguish drive and atapi devices
1799  */
1800 struct pci_devemu pci_de_ahci_hd = {
1801 	.pe_emu =	"ahci-hd",
1802 	.pe_init =	pci_ahci_hd_init,
1803 	.pe_barwrite =	pci_ahci_write,
1804 	.pe_barread =	pci_ahci_read
1805 };
1806 PCI_EMUL_SET(pci_de_ahci_hd);
1807 
1808 struct pci_devemu pci_de_ahci_cd = {
1809 	.pe_emu =	"ahci-cd",
1810 	.pe_init =	pci_ahci_atapi_init,
1811 	.pe_barwrite =	pci_ahci_write,
1812 	.pe_barread =	pci_ahci_read
1813 };
1814 PCI_EMUL_SET(pci_de_ahci_cd);
1815