xref: /freebsd/usr.sbin/bhyve/pci_ahci.c (revision a9e8641da961bcf3d24afc85fd657f2083a872a2)
1 /*-
2  * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/linker_set.h>
34 #include <sys/stat.h>
35 #include <sys/uio.h>
36 #include <sys/ioctl.h>
37 #include <sys/disk.h>
38 #include <sys/ata.h>
39 #include <sys/endian.h>
40 
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <stdint.h>
46 #include <string.h>
47 #include <strings.h>
48 #include <unistd.h>
49 #include <assert.h>
50 #include <pthread.h>
51 #include <inttypes.h>
52 
53 #include "bhyverun.h"
54 #include "pci_emul.h"
55 #include "ahci.h"
56 #include "block_if.h"
57 
58 #define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
59 
60 #define	PxSIG_ATA	0x00000101 /* ATA drive */
61 #define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
62 
63 enum sata_fis_type {
64 	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
65 	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
66 	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
67 	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
68 	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
69 	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
70 	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
71 	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
72 };
73 
74 /*
75  * SCSI opcodes
76  */
77 #define	TEST_UNIT_READY		0x00
78 #define	REQUEST_SENSE		0x03
79 #define	INQUIRY			0x12
80 #define	START_STOP_UNIT		0x1B
81 #define	PREVENT_ALLOW		0x1E
82 #define	READ_CAPACITY		0x25
83 #define	READ_10			0x28
84 #define	POSITION_TO_ELEMENT	0x2B
85 #define	READ_TOC		0x43
86 #define	GET_EVENT_STATUS_NOTIFICATION 0x4A
87 #define	MODE_SENSE_10		0x5A
88 #define	READ_12			0xA8
89 #define	READ_CD			0xBE
90 
91 /*
92  * SCSI mode page codes
93  */
94 #define	MODEPAGE_RW_ERROR_RECOVERY	0x01
95 #define	MODEPAGE_CD_CAPABILITIES	0x2A
96 
97 /*
98  * Debug printf
99  */
100 #ifdef AHCI_DEBUG
101 static FILE *dbg;
102 #define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
103 #else
104 #define DPRINTF(format, arg...)
105 #endif
106 #define WPRINTF(format, arg...) printf(format, ##arg)
107 
108 struct ahci_ioreq {
109 	struct blockif_req io_req;
110 	struct ahci_port *io_pr;
111 	STAILQ_ENTRY(ahci_ioreq) io_list;
112 	uint8_t *cfis;
113 	uint32_t len;
114 	uint32_t done;
115 	int slot;
116 	int prdtl;
117 };
118 
119 struct ahci_port {
120 	struct blockif_ctxt *bctx;
121 	struct pci_ahci_softc *pr_sc;
122 	uint8_t *cmd_lst;
123 	uint8_t *rfis;
124 	int atapi;
125 	int reset;
126 	int mult_sectors;
127 	uint8_t xfermode;
128 	uint8_t sense_key;
129 	uint8_t asc;
130 
131 	uint32_t clb;
132 	uint32_t clbu;
133 	uint32_t fb;
134 	uint32_t fbu;
135 	uint32_t is;
136 	uint32_t ie;
137 	uint32_t cmd;
138 	uint32_t unused0;
139 	uint32_t tfd;
140 	uint32_t sig;
141 	uint32_t ssts;
142 	uint32_t sctl;
143 	uint32_t serr;
144 	uint32_t sact;
145 	uint32_t ci;
146 	uint32_t sntf;
147 	uint32_t fbs;
148 
149 	/*
150 	 * i/o request info
151 	 */
152 	struct ahci_ioreq *ioreq;
153 	int ioqsz;
154 	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
155 };
156 
157 struct ahci_cmd_hdr {
158 	uint16_t flags;
159 	uint16_t prdtl;
160 	uint32_t prdbc;
161 	uint64_t ctba;
162 	uint32_t reserved[4];
163 };
164 
165 struct ahci_prdt_entry {
166 	uint64_t dba;
167 	uint32_t reserved;
168 #define	DBCMASK		0x3fffff
169 	uint32_t dbc;
170 };
171 
172 struct pci_ahci_softc {
173 	struct pci_devinst *asc_pi;
174 	pthread_mutex_t	mtx;
175 	int ports;
176 	uint32_t cap;
177 	uint32_t ghc;
178 	uint32_t is;
179 	uint32_t pi;
180 	uint32_t vs;
181 	uint32_t ccc_ctl;
182 	uint32_t ccc_pts;
183 	uint32_t em_loc;
184 	uint32_t em_ctl;
185 	uint32_t cap2;
186 	uint32_t bohc;
187 	struct ahci_port port[MAX_PORTS];
188 };
189 #define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
190 
191 static inline void lba_to_msf(uint8_t *buf, int lba)
192 {
193 	lba += 150;
194 	buf[0] = (lba / 75) / 60;
195 	buf[1] = (lba / 75) % 60;
196 	buf[2] = lba % 75;
197 }
198 
199 /*
200  * generate HBA intr depending on whether or not ports within
201  * the controller have an interrupt pending.
202  */
203 static void
204 ahci_generate_intr(struct pci_ahci_softc *sc)
205 {
206 	int i;
207 
208 	for (i = 0; i < sc->ports; i++) {
209 		struct ahci_port *pr;
210 		pr = &sc->port[i];
211 		if (pr->is & pr->ie)
212 			sc->is |= (1 << i);
213 	}
214 
215 	DPRINTF("%s %x\n", __func__, sc->is);
216 
217 	if (sc->is && (sc->ghc & AHCI_GHC_IE))
218 		pci_generate_msi(sc->asc_pi, 0);
219 }
220 
221 static void
222 ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
223 {
224 	int offset, len, irq;
225 
226 	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
227 		return;
228 
229 	switch (ft) {
230 	case FIS_TYPE_REGD2H:
231 		offset = 0x40;
232 		len = 20;
233 		irq = AHCI_P_IX_DHR;
234 		break;
235 	case FIS_TYPE_SETDEVBITS:
236 		offset = 0x58;
237 		len = 8;
238 		irq = AHCI_P_IX_SDB;
239 		break;
240 	case FIS_TYPE_PIOSETUP:
241 		offset = 0x20;
242 		len = 20;
243 		irq = 0;
244 		break;
245 	default:
246 		WPRINTF("unsupported fis type %d\n", ft);
247 		return;
248 	}
249 	memcpy(p->rfis + offset, fis, len);
250 	if (irq) {
251 		p->is |= irq;
252 		ahci_generate_intr(p->pr_sc);
253 	}
254 }
255 
256 static void
257 ahci_write_fis_piosetup(struct ahci_port *p)
258 {
259 	uint8_t fis[20];
260 
261 	memset(fis, 0, sizeof(fis));
262 	fis[0] = FIS_TYPE_PIOSETUP;
263 	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
264 }
265 
266 static void
267 ahci_write_fis_sdb(struct ahci_port *p, int slot, uint32_t tfd)
268 {
269 	uint8_t fis[8];
270 	uint8_t error;
271 
272 	error = (tfd >> 8) & 0xff;
273 	memset(fis, 0, sizeof(fis));
274 	fis[0] = error;
275 	fis[2] = tfd & 0x77;
276 	*(uint32_t *)(fis + 4) = (1 << slot);
277 	if (fis[2] & ATA_S_ERROR)
278 		p->is |= AHCI_P_IX_TFE;
279 	p->tfd = tfd;
280 	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
281 }
282 
283 static void
284 ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
285 {
286 	uint8_t fis[20];
287 	uint8_t error;
288 
289 	error = (tfd >> 8) & 0xff;
290 	memset(fis, 0, sizeof(fis));
291 	fis[0] = FIS_TYPE_REGD2H;
292 	fis[1] = (1 << 6);
293 	fis[2] = tfd & 0xff;
294 	fis[3] = error;
295 	fis[4] = cfis[4];
296 	fis[5] = cfis[5];
297 	fis[6] = cfis[6];
298 	fis[7] = cfis[7];
299 	fis[8] = cfis[8];
300 	fis[9] = cfis[9];
301 	fis[10] = cfis[10];
302 	fis[11] = cfis[11];
303 	fis[12] = cfis[12];
304 	fis[13] = cfis[13];
305 	if (fis[2] & ATA_S_ERROR)
306 		p->is |= AHCI_P_IX_TFE;
307 	p->tfd = tfd;
308 	p->ci &= ~(1 << slot);
309 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
310 }
311 
312 static void
313 ahci_write_reset_fis_d2h(struct ahci_port *p)
314 {
315 	uint8_t fis[20];
316 
317 	memset(fis, 0, sizeof(fis));
318 	fis[0] = FIS_TYPE_REGD2H;
319 	fis[3] = 1;
320 	fis[4] = 1;
321 	if (p->atapi) {
322 		fis[5] = 0x14;
323 		fis[6] = 0xeb;
324 	}
325 	fis[12] = 1;
326 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
327 }
328 
329 static void
330 ahci_port_reset(struct ahci_port *pr)
331 {
332 	pr->sctl = 0;
333 	pr->serr = 0;
334 	pr->sact = 0;
335 	pr->xfermode = ATA_UDMA6;
336 	pr->mult_sectors = 128;
337 
338 	if (!pr->bctx) {
339 		pr->ssts = ATA_SS_DET_NO_DEVICE;
340 		pr->sig = 0xFFFFFFFF;
341 		pr->tfd = 0x7F;
342 		return;
343 	}
344 	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_SPD_GEN2 |
345 		ATA_SS_IPM_ACTIVE;
346 	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
347 	if (!pr->atapi) {
348 		pr->sig = PxSIG_ATA;
349 		pr->tfd |= ATA_S_READY;
350 	} else
351 		pr->sig = PxSIG_ATAPI;
352 	ahci_write_reset_fis_d2h(pr);
353 }
354 
355 static void
356 ahci_reset(struct pci_ahci_softc *sc)
357 {
358 	int i;
359 
360 	sc->ghc = AHCI_GHC_AE;
361 	sc->is = 0;
362 	for (i = 0; i < sc->ports; i++) {
363 		sc->port[i].ie = 0;
364 		sc->port[i].is = 0;
365 		ahci_port_reset(&sc->port[i]);
366 	}
367 }
368 
369 static void
370 ata_string(uint8_t *dest, const char *src, int len)
371 {
372 	int i;
373 
374 	for (i = 0; i < len; i++) {
375 		if (*src)
376 			dest[i ^ 1] = *src++;
377 		else
378 			dest[i ^ 1] = ' ';
379 	}
380 }
381 
382 static void
383 atapi_string(uint8_t *dest, const char *src, int len)
384 {
385 	int i;
386 
387 	for (i = 0; i < len; i++) {
388 		if (*src)
389 			dest[i] = *src++;
390 		else
391 			dest[i] = ' ';
392 	}
393 }
394 
395 static void
396 ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done,
397     int seek)
398 {
399 	struct ahci_ioreq *aior;
400 	struct blockif_req *breq;
401 	struct pci_ahci_softc *sc;
402 	struct ahci_prdt_entry *prdt;
403 	struct ahci_cmd_hdr *hdr;
404 	uint64_t lba;
405 	uint32_t len;
406 	int i, err, iovcnt, ncq, readop;
407 
408 	sc = p->pr_sc;
409 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
410 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
411 	ncq = 0;
412 	readop = 1;
413 
414 	prdt += seek;
415 	if (cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
416 			cfis[2] == ATA_WRITE_FPDMA_QUEUED)
417 		readop = 0;
418 
419 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
420 			cfis[2] == ATA_READ_FPDMA_QUEUED) {
421 		lba = ((uint64_t)cfis[10] << 40) |
422 			((uint64_t)cfis[9] << 32) |
423 			((uint64_t)cfis[8] << 24) |
424 			((uint64_t)cfis[6] << 16) |
425 			((uint64_t)cfis[5] << 8) |
426 			cfis[4];
427 		len = cfis[11] << 8 | cfis[3];
428 		if (!len)
429 			len = 65536;
430 		ncq = 1;
431 	} else if (cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
432 		lba = ((uint64_t)cfis[10] << 40) |
433 			((uint64_t)cfis[9] << 32) |
434 			((uint64_t)cfis[8] << 24) |
435 			((uint64_t)cfis[6] << 16) |
436 			((uint64_t)cfis[5] << 8) |
437 			cfis[4];
438 		len = cfis[13] << 8 | cfis[12];
439 		if (!len)
440 			len = 65536;
441 	} else {
442 		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
443 			(cfis[5] << 8) | cfis[4];
444 		len = cfis[12];
445 		if (!len)
446 			len = 256;
447 	}
448 	lba *= blockif_sectsz(p->bctx);
449 	len *= blockif_sectsz(p->bctx);
450 
451 	/*
452 	 * Pull request off free list
453 	 */
454 	aior = STAILQ_FIRST(&p->iofhd);
455 	assert(aior != NULL);
456 	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
457 	aior->cfis = cfis;
458 	aior->slot = slot;
459 	aior->len = len;
460 	aior->done = done;
461 	breq = &aior->io_req;
462 	breq->br_offset = lba + done;
463 	iovcnt = hdr->prdtl - seek;
464 	if (iovcnt > BLOCKIF_IOV_MAX) {
465 		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
466 		iovcnt = BLOCKIF_IOV_MAX;
467 	} else
468 		aior->prdtl = 0;
469 	breq->br_iovcnt = iovcnt;
470 
471 	/*
472 	 * Build up the iovec based on the prdt
473 	 */
474 	for (i = 0; i < iovcnt; i++) {
475 		uint32_t dbcsz;
476 
477 		dbcsz = (prdt->dbc & DBCMASK) + 1;
478 		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
479 		    prdt->dba, dbcsz);
480 		breq->br_iov[i].iov_len = dbcsz;
481 		aior->done += dbcsz;
482 		prdt++;
483 	}
484 	if (readop)
485 		err = blockif_read(p->bctx, breq);
486 	else
487 		err = blockif_write(p->bctx, breq);
488 	assert(err == 0);
489 
490 	if (!aior->prdtl && ncq)
491 		p->ci &= ~(1 << slot);
492 }
493 
494 static void
495 ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
496 {
497 	struct ahci_ioreq *aior;
498 	struct blockif_req *breq;
499 	int err;
500 
501 	/*
502 	 * Pull request off free list
503 	 */
504 	aior = STAILQ_FIRST(&p->iofhd);
505 	assert(aior != NULL);
506 	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
507 	aior->cfis = cfis;
508 	aior->slot = slot;
509 	aior->len = 0;
510 	aior->done = 0;
511 	aior->prdtl = 0;
512 	breq = &aior->io_req;
513 
514 	err = blockif_flush(p->bctx, breq);
515 	assert(err == 0);
516 }
517 
518 static inline void
519 write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
520 		void *buf, int size)
521 {
522 	struct ahci_cmd_hdr *hdr;
523 	struct ahci_prdt_entry *prdt;
524 	void *from;
525 	int i, len;
526 
527 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
528 	len = size;
529 	from = buf;
530 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
531 	for (i = 0; i < hdr->prdtl && len; i++) {
532 		uint8_t *ptr;
533 		uint32_t dbcsz;
534 
535 		dbcsz = (prdt->dbc & DBCMASK) + 1;
536 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
537 		memcpy(ptr, from, dbcsz);
538 		len -= dbcsz;
539 		from += dbcsz;
540 		prdt++;
541 	}
542 	hdr->prdbc = size - len;
543 }
544 
545 static void
546 handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
547 {
548 	struct ahci_cmd_hdr *hdr;
549 
550 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
551 	if (p->atapi || hdr->prdtl == 0) {
552 		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
553 		p->is |= AHCI_P_IX_TFE;
554 	} else {
555 		uint16_t buf[256];
556 		uint64_t sectors;
557 
558 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
559 		memset(buf, 0, sizeof(buf));
560 		buf[0] = 0x0040;
561 		/* TODO emulate different serial? */
562 		ata_string((uint8_t *)(buf+10), "123456", 20);
563 		ata_string((uint8_t *)(buf+23), "001", 8);
564 		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
565 		buf[47] = (0x8000 | 128);
566 		buf[48] = 0x1;
567 		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
568 		buf[50] = (1 << 14);
569 		buf[53] = (1 << 1 | 1 << 2);
570 		if (p->mult_sectors)
571 			buf[59] = (0x100 | p->mult_sectors);
572 		buf[60] = sectors;
573 		buf[61] = (sectors >> 16);
574 		buf[63] = 0x7;
575 		if (p->xfermode & ATA_WDMA0)
576 			buf[63] |= (1 << ((p->xfermode & 7) + 8));
577 		buf[64] = 0x3;
578 		buf[65] = 100;
579 		buf[66] = 100;
580 		buf[67] = 100;
581 		buf[68] = 100;
582 		buf[75] = 31;
583 		buf[76] = (1 << 8 | 1 << 2);
584 		buf[80] = 0x1f0;
585 		buf[81] = 0x28;
586 		buf[82] = (1 << 5 | 1 << 14);
587 		buf[83] = (1 << 10 | 1 << 12 | 1 << 13 | 1 << 14);
588 		buf[84] = (1 << 14);
589 		buf[85] = (1 << 5 | 1 << 14);
590 		buf[86] = (1 << 10 | 1 << 12 | 1 << 13);
591 		buf[87] = (1 << 14);
592 		buf[88] = 0x7f;
593 		if (p->xfermode & ATA_UDMA0)
594 			buf[88] |= (1 << ((p->xfermode & 7) + 8));
595 		buf[93] = (1 | 1 <<14);
596 		buf[100] = sectors;
597 		buf[101] = (sectors >> 16);
598 		buf[102] = (sectors >> 32);
599 		buf[103] = (sectors >> 48);
600 		ahci_write_fis_piosetup(p);
601 		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
602 		p->tfd = ATA_S_DSC | ATA_S_READY;
603 		p->is |= AHCI_P_IX_DP;
604 	}
605 	p->ci &= ~(1 << slot);
606 	ahci_generate_intr(p->pr_sc);
607 }
608 
609 static void
610 handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
611 {
612 	if (!p->atapi) {
613 		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
614 		p->is |= AHCI_P_IX_TFE;
615 	} else {
616 		uint16_t buf[256];
617 
618 		memset(buf, 0, sizeof(buf));
619 		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
620 		/* TODO emulate different serial? */
621 		ata_string((uint8_t *)(buf+10), "123456", 20);
622 		ata_string((uint8_t *)(buf+23), "001", 8);
623 		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
624 		buf[49] = (1 << 9 | 1 << 8);
625 		buf[50] = (1 << 14 | 1);
626 		buf[53] = (1 << 2 | 1 << 1);
627 		buf[62] = 0x3f;
628 		buf[63] = 7;
629 		buf[64] = 3;
630 		buf[65] = 100;
631 		buf[66] = 100;
632 		buf[67] = 100;
633 		buf[68] = 100;
634 		buf[76] = (1 << 2 | 1 << 1);
635 		buf[78] = (1 << 5);
636 		buf[80] = (0x1f << 4);
637 		buf[82] = (1 << 4);
638 		buf[83] = (1 << 14);
639 		buf[84] = (1 << 14);
640 		buf[85] = (1 << 4);
641 		buf[87] = (1 << 14);
642 		buf[88] = (1 << 14 | 0x7f);
643 		ahci_write_fis_piosetup(p);
644 		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
645 		p->tfd = ATA_S_DSC | ATA_S_READY;
646 		p->is |= AHCI_P_IX_DHR;
647 	}
648 	p->ci &= ~(1 << slot);
649 	ahci_generate_intr(p->pr_sc);
650 }
651 
652 static void
653 atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
654 {
655 	uint8_t buf[36];
656 	uint8_t *acmd;
657 	int len;
658 
659 	acmd = cfis + 0x40;
660 
661 	buf[0] = 0x05;
662 	buf[1] = 0x80;
663 	buf[2] = 0x00;
664 	buf[3] = 0x21;
665 	buf[4] = 31;
666 	buf[5] = 0;
667 	buf[6] = 0;
668 	buf[7] = 0;
669 	atapi_string(buf + 8, "BHYVE", 8);
670 	atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
671 	atapi_string(buf + 32, "001", 4);
672 
673 	len = sizeof(buf);
674 	if (len > acmd[4])
675 		len = acmd[4];
676 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
677 	write_prdt(p, slot, cfis, buf, len);
678 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
679 }
680 
681 static void
682 atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
683 {
684 	uint8_t buf[8];
685 	uint64_t sectors;
686 
687 	sectors = blockif_size(p->bctx) / 2048;
688 	be32enc(buf, sectors - 1);
689 	be32enc(buf + 4, 2048);
690 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
691 	write_prdt(p, slot, cfis, buf, sizeof(buf));
692 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
693 }
694 
695 static void
696 atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
697 {
698 	uint8_t *acmd;
699 	uint8_t format;
700 	int len;
701 
702 	acmd = cfis + 0x40;
703 
704 	len = be16dec(acmd + 7);
705 	format = acmd[9] >> 6;
706 	switch (format) {
707 	case 0:
708 	{
709 		int msf, size;
710 		uint64_t sectors;
711 		uint8_t start_track, buf[20], *bp;
712 
713 		msf = (acmd[1] >> 1) & 1;
714 		start_track = acmd[6];
715 		if (start_track > 1 && start_track != 0xaa) {
716 			uint32_t tfd;
717 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
718 			p->asc = 0x24;
719 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
720 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
721 			ahci_write_fis_d2h(p, slot, cfis, tfd);
722 			return;
723 		}
724 		bp = buf + 2;
725 		*bp++ = 1;
726 		*bp++ = 1;
727 		if (start_track <= 1) {
728 			*bp++ = 0;
729 			*bp++ = 0x14;
730 			*bp++ = 1;
731 			*bp++ = 0;
732 			if (msf) {
733 				*bp++ = 0;
734 				lba_to_msf(bp, 0);
735 				bp += 3;
736 			} else {
737 				*bp++ = 0;
738 				*bp++ = 0;
739 				*bp++ = 0;
740 				*bp++ = 0;
741 			}
742 		}
743 		*bp++ = 0;
744 		*bp++ = 0x14;
745 		*bp++ = 0xaa;
746 		*bp++ = 0;
747 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
748 		sectors >>= 2;
749 		if (msf) {
750 			*bp++ = 0;
751 			lba_to_msf(bp, sectors);
752 			bp += 3;
753 		} else {
754 			be32enc(bp, sectors);
755 			bp += 4;
756 		}
757 		size = bp - buf;
758 		be16enc(buf, size - 2);
759 		if (len > size)
760 			len = size;
761 		write_prdt(p, slot, cfis, buf, len);
762 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
763 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
764 		break;
765 	}
766 	case 1:
767 	{
768 		uint8_t buf[12];
769 
770 		memset(buf, 0, sizeof(buf));
771 		buf[1] = 0xa;
772 		buf[2] = 0x1;
773 		buf[3] = 0x1;
774 		if (len > sizeof(buf))
775 			len = sizeof(buf);
776 		write_prdt(p, slot, cfis, buf, len);
777 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
778 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
779 		break;
780 	}
781 	case 2:
782 	{
783 		int msf, size;
784 		uint64_t sectors;
785 		uint8_t start_track, *bp, buf[50];
786 
787 		msf = (acmd[1] >> 1) & 1;
788 		start_track = acmd[6];
789 		bp = buf + 2;
790 		*bp++ = 1;
791 		*bp++ = 1;
792 
793 		*bp++ = 1;
794 		*bp++ = 0x14;
795 		*bp++ = 0;
796 		*bp++ = 0xa0;
797 		*bp++ = 0;
798 		*bp++ = 0;
799 		*bp++ = 0;
800 		*bp++ = 0;
801 		*bp++ = 1;
802 		*bp++ = 0;
803 		*bp++ = 0;
804 
805 		*bp++ = 1;
806 		*bp++ = 0x14;
807 		*bp++ = 0;
808 		*bp++ = 0xa1;
809 		*bp++ = 0;
810 		*bp++ = 0;
811 		*bp++ = 0;
812 		*bp++ = 0;
813 		*bp++ = 1;
814 		*bp++ = 0;
815 		*bp++ = 0;
816 
817 		*bp++ = 1;
818 		*bp++ = 0x14;
819 		*bp++ = 0;
820 		*bp++ = 0xa2;
821 		*bp++ = 0;
822 		*bp++ = 0;
823 		*bp++ = 0;
824 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
825 		sectors >>= 2;
826 		if (msf) {
827 			*bp++ = 0;
828 			lba_to_msf(bp, sectors);
829 			bp += 3;
830 		} else {
831 			be32enc(bp, sectors);
832 			bp += 4;
833 		}
834 
835 		*bp++ = 1;
836 		*bp++ = 0x14;
837 		*bp++ = 0;
838 		*bp++ = 1;
839 		*bp++ = 0;
840 		*bp++ = 0;
841 		*bp++ = 0;
842 		if (msf) {
843 			*bp++ = 0;
844 			lba_to_msf(bp, 0);
845 			bp += 3;
846 		} else {
847 			*bp++ = 0;
848 			*bp++ = 0;
849 			*bp++ = 0;
850 			*bp++ = 0;
851 		}
852 
853 		size = bp - buf;
854 		be16enc(buf, size - 2);
855 		if (len > size)
856 			len = size;
857 		write_prdt(p, slot, cfis, buf, len);
858 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
859 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
860 		break;
861 	}
862 	default:
863 	{
864 		uint32_t tfd;
865 
866 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
867 		p->asc = 0x24;
868 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
869 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
870 		ahci_write_fis_d2h(p, slot, cfis, tfd);
871 		break;
872 	}
873 	}
874 }
875 
876 static void
877 atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
878 		uint32_t done, int seek)
879 {
880 	struct ahci_ioreq *aior;
881 	struct ahci_cmd_hdr *hdr;
882 	struct ahci_prdt_entry *prdt;
883 	struct blockif_req *breq;
884 	struct pci_ahci_softc *sc;
885 	uint8_t *acmd;
886 	uint64_t lba;
887 	uint32_t len;
888 	int i, err, iovcnt;
889 
890 	sc = p->pr_sc;
891 	acmd = cfis + 0x40;
892 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
893 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
894 
895 	prdt += seek;
896 	lba = be32dec(acmd + 2);
897 	if (acmd[0] == READ_10)
898 		len = be16dec(acmd + 7);
899 	else
900 		len = be32dec(acmd + 6);
901 	if (len == 0) {
902 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
903 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
904 	}
905 	lba *= 2048;
906 	len *= 2048;
907 
908 	/*
909 	 * Pull request off free list
910 	 */
911 	aior = STAILQ_FIRST(&p->iofhd);
912 	assert(aior != NULL);
913 	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
914 	aior->cfis = cfis;
915 	aior->slot = slot;
916 	aior->len = len;
917 	aior->done = done;
918 	breq = &aior->io_req;
919 	breq->br_offset = lba + done;
920 	iovcnt = hdr->prdtl - seek;
921 	if (iovcnt > BLOCKIF_IOV_MAX) {
922 		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
923 		iovcnt = BLOCKIF_IOV_MAX;
924 	} else
925 		aior->prdtl = 0;
926 	breq->br_iovcnt = iovcnt;
927 
928 	/*
929 	 * Build up the iovec based on the prdt
930 	 */
931 	for (i = 0; i < iovcnt; i++) {
932 		uint32_t dbcsz;
933 
934 		dbcsz = (prdt->dbc & DBCMASK) + 1;
935 		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
936 		    prdt->dba, dbcsz);
937 		breq->br_iov[i].iov_len = dbcsz;
938 		aior->done += dbcsz;
939 		prdt++;
940 	}
941 	err = blockif_read(p->bctx, breq);
942 	assert(err == 0);
943 }
944 
945 static void
946 atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
947 {
948 	uint8_t buf[64];
949 	uint8_t *acmd;
950 	int len;
951 
952 	acmd = cfis + 0x40;
953 	len = acmd[4];
954 	if (len > sizeof(buf))
955 		len = sizeof(buf);
956 	memset(buf, 0, len);
957 	buf[0] = 0x70 | (1 << 7);
958 	buf[2] = p->sense_key;
959 	buf[7] = 10;
960 	buf[12] = p->asc;
961 	write_prdt(p, slot, cfis, buf, len);
962 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
963 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
964 }
965 
966 static void
967 atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
968 {
969 	uint8_t *acmd = cfis + 0x40;
970 	uint32_t tfd;
971 
972 	switch (acmd[4] & 3) {
973 	case 0:
974 	case 1:
975 	case 3:
976 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
977 		tfd = ATA_S_READY | ATA_S_DSC;
978 		break;
979 	case 2:
980 		/* TODO eject media */
981 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
982 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
983 		p->asc = 0x53;
984 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
985 		break;
986 	}
987 	ahci_write_fis_d2h(p, slot, cfis, tfd);
988 }
989 
990 static void
991 atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
992 {
993 	uint8_t *acmd;
994 	uint32_t tfd;
995 	uint8_t pc, code;
996 	int len;
997 
998 	acmd = cfis + 0x40;
999 	len = be16dec(acmd + 7);
1000 	pc = acmd[2] >> 6;
1001 	code = acmd[2] & 0x3f;
1002 
1003 	switch (pc) {
1004 	case 0:
1005 		switch (code) {
1006 		case MODEPAGE_RW_ERROR_RECOVERY:
1007 		{
1008 			uint8_t buf[16];
1009 
1010 			if (len > sizeof(buf))
1011 				len = sizeof(buf);
1012 
1013 			memset(buf, 0, sizeof(buf));
1014 			be16enc(buf, 16 - 2);
1015 			buf[2] = 0x70;
1016 			buf[8] = 0x01;
1017 			buf[9] = 16 - 10;
1018 			buf[11] = 0x05;
1019 			write_prdt(p, slot, cfis, buf, len);
1020 			tfd = ATA_S_READY | ATA_S_DSC;
1021 			break;
1022 		}
1023 		case MODEPAGE_CD_CAPABILITIES:
1024 		{
1025 			uint8_t buf[30];
1026 
1027 			if (len > sizeof(buf))
1028 				len = sizeof(buf);
1029 
1030 			memset(buf, 0, sizeof(buf));
1031 			be16enc(buf, 30 - 2);
1032 			buf[2] = 0x70;
1033 			buf[8] = 0x2A;
1034 			buf[9] = 30 - 10;
1035 			buf[10] = 0x08;
1036 			buf[12] = 0x71;
1037 			be16enc(&buf[18], 2);
1038 			be16enc(&buf[20], 512);
1039 			write_prdt(p, slot, cfis, buf, len);
1040 			tfd = ATA_S_READY | ATA_S_DSC;
1041 			break;
1042 		}
1043 		default:
1044 			goto error;
1045 			break;
1046 		}
1047 		break;
1048 	case 3:
1049 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1050 		p->asc = 0x39;
1051 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1052 		break;
1053 error:
1054 	case 1:
1055 	case 2:
1056 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1057 		p->asc = 0x24;
1058 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1059 		break;
1060 	}
1061 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1062 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1063 }
1064 
1065 static void
1066 atapi_get_event_status_notification(struct ahci_port *p, int slot,
1067     uint8_t *cfis)
1068 {
1069 	uint8_t *acmd;
1070 	uint32_t tfd;
1071 
1072 	acmd = cfis + 0x40;
1073 
1074 	/* we don't support asynchronous operation */
1075 	if (!(acmd[1] & 1)) {
1076 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1077 		p->asc = 0x24;
1078 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1079 	} else {
1080 		uint8_t buf[8];
1081 		int len;
1082 
1083 		len = be16dec(acmd + 7);
1084 		if (len > sizeof(buf))
1085 			len = sizeof(buf);
1086 
1087 		memset(buf, 0, sizeof(buf));
1088 		be16enc(buf, 8 - 2);
1089 		buf[2] = 0x04;
1090 		buf[3] = 0x10;
1091 		buf[5] = 0x02;
1092 		write_prdt(p, slot, cfis, buf, len);
1093 		tfd = ATA_S_READY | ATA_S_DSC;
1094 	}
1095 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1096 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1097 }
1098 
1099 static void
1100 handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1101 {
1102 	uint8_t *acmd;
1103 
1104 	acmd = cfis + 0x40;
1105 
1106 #ifdef AHCI_DEBUG
1107 	{
1108 		int i;
1109 		DPRINTF("ACMD:");
1110 		for (i = 0; i < 16; i++)
1111 			DPRINTF("%02x ", acmd[i]);
1112 		DPRINTF("\n");
1113 	}
1114 #endif
1115 
1116 	switch (acmd[0]) {
1117 	case TEST_UNIT_READY:
1118 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1119 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1120 		break;
1121 	case INQUIRY:
1122 		atapi_inquiry(p, slot, cfis);
1123 		break;
1124 	case READ_CAPACITY:
1125 		atapi_read_capacity(p, slot, cfis);
1126 		break;
1127 	case PREVENT_ALLOW:
1128 		/* TODO */
1129 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1130 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1131 		break;
1132 	case READ_TOC:
1133 		atapi_read_toc(p, slot, cfis);
1134 		break;
1135 	case READ_10:
1136 	case READ_12:
1137 		atapi_read(p, slot, cfis, 0, 0);
1138 		break;
1139 	case REQUEST_SENSE:
1140 		atapi_request_sense(p, slot, cfis);
1141 		break;
1142 	case START_STOP_UNIT:
1143 		atapi_start_stop_unit(p, slot, cfis);
1144 		break;
1145 	case MODE_SENSE_10:
1146 		atapi_mode_sense(p, slot, cfis);
1147 		break;
1148 	case GET_EVENT_STATUS_NOTIFICATION:
1149 		atapi_get_event_status_notification(p, slot, cfis);
1150 		break;
1151 	default:
1152 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1153 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1154 		p->asc = 0x20;
1155 		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1156 				ATA_S_READY | ATA_S_ERROR);
1157 		break;
1158 	}
1159 }
1160 
1161 static void
1162 ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1163 {
1164 
1165 	switch (cfis[2]) {
1166 	case ATA_ATA_IDENTIFY:
1167 		handle_identify(p, slot, cfis);
1168 		break;
1169 	case ATA_SETFEATURES:
1170 	{
1171 		switch (cfis[3]) {
1172 		case ATA_SF_ENAB_WCACHE:
1173 		case ATA_SF_DIS_WCACHE:
1174 		case ATA_SF_ENAB_RCACHE:
1175 		case ATA_SF_DIS_RCACHE:
1176 			p->tfd = ATA_S_DSC | ATA_S_READY;
1177 			break;
1178 		case ATA_SF_SETXFER:
1179 		{
1180 			switch (cfis[12] & 0xf8) {
1181 			case ATA_PIO:
1182 			case ATA_PIO0:
1183 				break;
1184 			case ATA_WDMA0:
1185 			case ATA_UDMA0:
1186 				p->xfermode = (cfis[12] & 0x7);
1187 				break;
1188 			}
1189 			p->tfd = ATA_S_DSC | ATA_S_READY;
1190 			break;
1191 		}
1192 		default:
1193 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1194 			p->tfd |= (ATA_ERROR_ABORT << 8);
1195 			break;
1196 		}
1197 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1198 		break;
1199 	}
1200 	case ATA_SET_MULTI:
1201 		if (cfis[12] != 0 &&
1202 			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1203 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1204 			p->tfd |= (ATA_ERROR_ABORT << 8);
1205 		} else {
1206 			p->mult_sectors = cfis[12];
1207 			p->tfd = ATA_S_DSC | ATA_S_READY;
1208 		}
1209 		p->is |= AHCI_P_IX_DP;
1210 		p->ci &= ~(1 << slot);
1211 		ahci_generate_intr(p->pr_sc);
1212 		break;
1213 	case ATA_READ_DMA:
1214 	case ATA_WRITE_DMA:
1215 	case ATA_READ_DMA48:
1216 	case ATA_WRITE_DMA48:
1217 	case ATA_READ_FPDMA_QUEUED:
1218 	case ATA_WRITE_FPDMA_QUEUED:
1219 		ahci_handle_dma(p, slot, cfis, 0, 0);
1220 		break;
1221 	case ATA_FLUSHCACHE:
1222 	case ATA_FLUSHCACHE48:
1223 		ahci_handle_flush(p, slot, cfis);
1224 		break;
1225 	case ATA_STANDBY_CMD:
1226 		break;
1227 	case ATA_NOP:
1228 	case ATA_STANDBY_IMMEDIATE:
1229 	case ATA_IDLE_IMMEDIATE:
1230 	case ATA_SLEEP:
1231 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1232 		break;
1233 	case ATA_ATAPI_IDENTIFY:
1234 		handle_atapi_identify(p, slot, cfis);
1235 		break;
1236 	case ATA_PACKET_CMD:
1237 		if (!p->atapi) {
1238 			p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1239 			p->is |= AHCI_P_IX_TFE;
1240 			p->ci &= ~(1 << slot);
1241 			ahci_generate_intr(p->pr_sc);
1242 		} else
1243 			handle_packet_cmd(p, slot, cfis);
1244 		break;
1245 	default:
1246 		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1247 		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1248 		p->is |= AHCI_P_IX_TFE;
1249 		p->ci &= ~(1 << slot);
1250 		ahci_generate_intr(p->pr_sc);
1251 		break;
1252 	}
1253 }
1254 
1255 static void
1256 ahci_handle_slot(struct ahci_port *p, int slot)
1257 {
1258 	struct ahci_cmd_hdr *hdr;
1259 	struct ahci_prdt_entry *prdt;
1260 	struct pci_ahci_softc *sc;
1261 	uint8_t *cfis;
1262 	int cfl;
1263 
1264 	sc = p->pr_sc;
1265 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1266 	cfl = (hdr->flags & 0x1f) * 4;
1267 	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1268 			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1269 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1270 
1271 #ifdef AHCI_DEBUG
1272 	DPRINTF("\ncfis:");
1273 	for (i = 0; i < cfl; i++) {
1274 		if (i % 10 == 0)
1275 			DPRINTF("\n");
1276 		DPRINTF("%02x ", cfis[i]);
1277 	}
1278 	DPRINTF("\n");
1279 
1280 	for (i = 0; i < hdr->prdtl; i++) {
1281 		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1282 		prdt++;
1283 	}
1284 #endif
1285 
1286 	if (cfis[0] != FIS_TYPE_REGH2D) {
1287 		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1288 		return;
1289 	}
1290 
1291 	if (cfis[1] & 0x80) {
1292 		ahci_handle_cmd(p, slot, cfis);
1293 	} else {
1294 		if (cfis[15] & (1 << 2))
1295 			p->reset = 1;
1296 		else if (p->reset) {
1297 			p->reset = 0;
1298 			ahci_port_reset(p);
1299 		}
1300 		p->ci &= ~(1 << slot);
1301 	}
1302 }
1303 
1304 static void
1305 ahci_handle_port(struct ahci_port *p)
1306 {
1307 	int i;
1308 
1309 	if (!(p->cmd & AHCI_P_CMD_ST))
1310 		return;
1311 
1312 	for (i = 0; (i < 32) && p->ci; i++) {
1313 		if (p->ci & (1 << i))
1314 			ahci_handle_slot(p, i);
1315 	}
1316 }
1317 
1318 /*
1319  * blockif callback routine - this runs in the context of the blockif
1320  * i/o thread, so the mutex needs to be acquired.
1321  */
1322 static void
1323 ata_ioreq_cb(struct blockif_req *br, int err)
1324 {
1325 	struct ahci_cmd_hdr *hdr;
1326 	struct ahci_ioreq *aior;
1327 	struct ahci_port *p;
1328 	struct pci_ahci_softc *sc;
1329 	uint32_t tfd;
1330 	uint8_t *cfis;
1331 	int pending, slot, ncq;
1332 
1333 	DPRINTF("%s %d\n", __func__, err);
1334 
1335 	ncq = 0;
1336 	aior = br->br_param;
1337 	p = aior->io_pr;
1338 	cfis = aior->cfis;
1339 	slot = aior->slot;
1340 	pending = aior->prdtl;
1341 	sc = p->pr_sc;
1342 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1343 
1344 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1345 			cfis[2] == ATA_READ_FPDMA_QUEUED)
1346 		ncq = 1;
1347 
1348 	pthread_mutex_lock(&sc->mtx);
1349 
1350 	/*
1351 	 * Move the blockif request back to the free list
1352 	 */
1353 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_list);
1354 
1355 	if (pending && !err) {
1356 		ahci_handle_dma(p, slot, cfis, aior->done,
1357 		    hdr->prdtl - pending);
1358 		goto out;
1359 	}
1360 
1361 	if (!err && aior->done == aior->len) {
1362 		tfd = ATA_S_READY | ATA_S_DSC;
1363 		if (ncq)
1364 			hdr->prdbc = 0;
1365 		else
1366 			hdr->prdbc = aior->len;
1367 	} else {
1368 		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1369 		hdr->prdbc = 0;
1370 		if (ncq)
1371 			p->serr |= (1 << slot);
1372 	}
1373 
1374 	if (ncq) {
1375 		p->sact &= ~(1 << slot);
1376 		ahci_write_fis_sdb(p, slot, tfd);
1377 	} else
1378 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1379 
1380 out:
1381 	pthread_mutex_unlock(&sc->mtx);
1382 	DPRINTF("%s exit\n", __func__);
1383 }
1384 
1385 static void
1386 atapi_ioreq_cb(struct blockif_req *br, int err)
1387 {
1388 	struct ahci_cmd_hdr *hdr;
1389 	struct ahci_ioreq *aior;
1390 	struct ahci_port *p;
1391 	struct pci_ahci_softc *sc;
1392 	uint8_t *cfis;
1393 	uint32_t tfd;
1394 	int pending, slot;
1395 
1396 	DPRINTF("%s %d\n", __func__, err);
1397 
1398 	aior = br->br_param;
1399 	p = aior->io_pr;
1400 	cfis = aior->cfis;
1401 	slot = aior->slot;
1402 	pending = aior->prdtl;
1403 	sc = p->pr_sc;
1404 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1405 
1406 	pthread_mutex_lock(&sc->mtx);
1407 
1408 	/*
1409 	 * Move the blockif request back to the free list
1410 	 */
1411 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_list);
1412 
1413 	if (pending && !err) {
1414 		atapi_read(p, slot, cfis, aior->done, hdr->prdtl - pending);
1415 		goto out;
1416 	}
1417 
1418 	if (!err && aior->done == aior->len) {
1419 		tfd = ATA_S_READY | ATA_S_DSC;
1420 		hdr->prdbc = aior->len;
1421 	} else {
1422 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1423 		p->asc = 0x21;
1424 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1425 		hdr->prdbc = 0;
1426 	}
1427 
1428 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1429 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1430 
1431 out:
1432 	pthread_mutex_unlock(&sc->mtx);
1433 	DPRINTF("%s exit\n", __func__);
1434 }
1435 
1436 static void
1437 pci_ahci_ioreq_init(struct ahci_port *pr)
1438 {
1439 	struct ahci_ioreq *vr;
1440 	int i;
1441 
1442 	pr->ioqsz = blockif_queuesz(pr->bctx);
1443 	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1444 	STAILQ_INIT(&pr->iofhd);
1445 
1446 	/*
1447 	 * Add all i/o request entries to the free queue
1448 	 */
1449 	for (i = 0; i < pr->ioqsz; i++) {
1450 		vr = &pr->ioreq[i];
1451 		vr->io_pr = pr;
1452 		if (!pr->atapi)
1453 			vr->io_req.br_callback = ata_ioreq_cb;
1454 		else
1455 			vr->io_req.br_callback = atapi_ioreq_cb;
1456 		vr->io_req.br_param = vr;
1457 		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_list);
1458 	}
1459 }
1460 
1461 static void
1462 pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1463 {
1464 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1465 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1466 	struct ahci_port *p = &sc->port[port];
1467 
1468 	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1469 		port, offset, value);
1470 
1471 	switch (offset) {
1472 	case AHCI_P_CLB:
1473 		p->clb = value;
1474 		break;
1475 	case AHCI_P_CLBU:
1476 		p->clbu = value;
1477 		break;
1478 	case AHCI_P_FB:
1479 		p->fb = value;
1480 		break;
1481 	case AHCI_P_FBU:
1482 		p->fbu = value;
1483 		break;
1484 	case AHCI_P_IS:
1485 		p->is &= ~value;
1486 		break;
1487 	case AHCI_P_IE:
1488 		p->ie = value & 0xFDC000FF;
1489 		ahci_generate_intr(sc);
1490 		break;
1491 	case AHCI_P_CMD:
1492 	{
1493 		p->cmd = value;
1494 
1495 		if (!(value & AHCI_P_CMD_ST)) {
1496 			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
1497 			p->ci = 0;
1498 			p->sact = 0;
1499 		} else {
1500 			uint64_t clb;
1501 
1502 			p->cmd |= AHCI_P_CMD_CR;
1503 			clb = (uint64_t)p->clbu << 32 | p->clb;
1504 			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
1505 					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
1506 		}
1507 
1508 		if (value & AHCI_P_CMD_FRE) {
1509 			uint64_t fb;
1510 
1511 			p->cmd |= AHCI_P_CMD_FR;
1512 			fb = (uint64_t)p->fbu << 32 | p->fb;
1513 			/* we don't support FBSCP, so rfis size is 256Bytes */
1514 			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
1515 		} else {
1516 			p->cmd &= ~AHCI_P_CMD_FR;
1517 		}
1518 
1519 		if (value & AHCI_P_CMD_CLO) {
1520 			p->tfd = 0;
1521 			p->cmd &= ~AHCI_P_CMD_CLO;
1522 		}
1523 
1524 		ahci_handle_port(p);
1525 		break;
1526 	}
1527 	case AHCI_P_TFD:
1528 	case AHCI_P_SIG:
1529 	case AHCI_P_SSTS:
1530 		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
1531 		break;
1532 	case AHCI_P_SCTL:
1533 		if (!(p->cmd & AHCI_P_CMD_ST)) {
1534 			if (value & ATA_SC_DET_RESET)
1535 				ahci_port_reset(p);
1536 			p->sctl = value;
1537 		}
1538 		break;
1539 	case AHCI_P_SERR:
1540 		p->serr &= ~value;
1541 		break;
1542 	case AHCI_P_SACT:
1543 		p->sact |= value;
1544 		break;
1545 	case AHCI_P_CI:
1546 		p->ci |= value;
1547 		ahci_handle_port(p);
1548 		break;
1549 	case AHCI_P_SNTF:
1550 	case AHCI_P_FBS:
1551 	default:
1552 		break;
1553 	}
1554 }
1555 
1556 static void
1557 pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1558 {
1559 	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1560 		offset, value);
1561 
1562 	switch (offset) {
1563 	case AHCI_CAP:
1564 	case AHCI_PI:
1565 	case AHCI_VS:
1566 	case AHCI_CAP2:
1567 		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
1568 		break;
1569 	case AHCI_GHC:
1570 		if (value & AHCI_GHC_HR)
1571 			ahci_reset(sc);
1572 		else if (value & AHCI_GHC_IE) {
1573 			sc->ghc |= AHCI_GHC_IE;
1574 			ahci_generate_intr(sc);
1575 		}
1576 		break;
1577 	case AHCI_IS:
1578 		sc->is &= ~value;
1579 		ahci_generate_intr(sc);
1580 		break;
1581 	default:
1582 		break;
1583 	}
1584 }
1585 
1586 static void
1587 pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
1588 		int baridx, uint64_t offset, int size, uint64_t value)
1589 {
1590 	struct pci_ahci_softc *sc = pi->pi_arg;
1591 
1592 	assert(baridx == 5);
1593 	assert(size == 4);
1594 
1595 	pthread_mutex_lock(&sc->mtx);
1596 
1597 	if (offset < AHCI_OFFSET)
1598 		pci_ahci_host_write(sc, offset, value);
1599 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
1600 		pci_ahci_port_write(sc, offset, value);
1601 	else
1602 		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
1603 
1604 	pthread_mutex_unlock(&sc->mtx);
1605 }
1606 
1607 static uint64_t
1608 pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
1609 {
1610 	uint32_t value;
1611 
1612 	switch (offset) {
1613 	case AHCI_CAP:
1614 	case AHCI_GHC:
1615 	case AHCI_IS:
1616 	case AHCI_PI:
1617 	case AHCI_VS:
1618 	case AHCI_CCCC:
1619 	case AHCI_CCCP:
1620 	case AHCI_EM_LOC:
1621 	case AHCI_EM_CTL:
1622 	case AHCI_CAP2:
1623 	{
1624 		uint32_t *p = &sc->cap;
1625 		p += (offset - AHCI_CAP) / sizeof(uint32_t);
1626 		value = *p;
1627 		break;
1628 	}
1629 	default:
1630 		value = 0;
1631 		break;
1632 	}
1633 	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
1634 		offset, value);
1635 
1636 	return (value);
1637 }
1638 
1639 static uint64_t
1640 pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
1641 {
1642 	uint32_t value;
1643 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1644 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1645 
1646 	switch (offset) {
1647 	case AHCI_P_CLB:
1648 	case AHCI_P_CLBU:
1649 	case AHCI_P_FB:
1650 	case AHCI_P_FBU:
1651 	case AHCI_P_IS:
1652 	case AHCI_P_IE:
1653 	case AHCI_P_CMD:
1654 	case AHCI_P_TFD:
1655 	case AHCI_P_SIG:
1656 	case AHCI_P_SSTS:
1657 	case AHCI_P_SCTL:
1658 	case AHCI_P_SERR:
1659 	case AHCI_P_SACT:
1660 	case AHCI_P_CI:
1661 	case AHCI_P_SNTF:
1662 	case AHCI_P_FBS:
1663 	{
1664 		uint32_t *p= &sc->port[port].clb;
1665 		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
1666 		value = *p;
1667 		break;
1668 	}
1669 	default:
1670 		value = 0;
1671 		break;
1672 	}
1673 
1674 	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
1675 		port, offset, value);
1676 
1677 	return value;
1678 }
1679 
1680 static uint64_t
1681 pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
1682     uint64_t offset, int size)
1683 {
1684 	struct pci_ahci_softc *sc = pi->pi_arg;
1685 	uint32_t value;
1686 
1687 	assert(baridx == 5);
1688 	assert(size == 4);
1689 
1690 	pthread_mutex_lock(&sc->mtx);
1691 
1692 	if (offset < AHCI_OFFSET)
1693 		value = pci_ahci_host_read(sc, offset);
1694 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
1695 		value = pci_ahci_port_read(sc, offset);
1696 	else {
1697 		value = 0;
1698 		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
1699 	}
1700 
1701 	pthread_mutex_unlock(&sc->mtx);
1702 
1703 	return (value);
1704 }
1705 
1706 static int
1707 pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
1708 {
1709 	char bident[sizeof("XX:X:X")];
1710 	struct blockif_ctxt *bctxt;
1711 	struct pci_ahci_softc *sc;
1712 	int ret, slots;
1713 
1714 	ret = 0;
1715 
1716 	if (opts == NULL) {
1717 		fprintf(stderr, "pci_ahci: backing device required\n");
1718 		return (1);
1719 	}
1720 
1721 #ifdef AHCI_DEBUG
1722 	dbg = fopen("/tmp/log", "w+");
1723 #endif
1724 
1725        	sc = malloc(sizeof(struct pci_ahci_softc));
1726 	memset(sc, 0, sizeof(struct pci_ahci_softc));
1727 	pi->pi_arg = sc;
1728 	sc->asc_pi = pi;
1729 	sc->ports = MAX_PORTS;
1730 
1731 	/*
1732 	 * Only use port 0 for a backing device. All other ports will be
1733 	 * marked as unused
1734 	 */
1735 	sc->port[0].atapi = atapi;
1736 
1737 	/*
1738 	 * Attempt to open the backing image. Use the PCI
1739 	 * slot/func for the identifier string.
1740 	 */
1741 	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
1742 	bctxt = blockif_open(opts, bident);
1743 	if (bctxt == NULL) {
1744 		ret = 1;
1745 		goto open_fail;
1746 	}
1747 	sc->port[0].bctx = bctxt;
1748 	sc->port[0].pr_sc = sc;
1749 
1750 	/*
1751 	 * Allocate blockif request structures and add them
1752 	 * to the free list
1753 	 */
1754 	pci_ahci_ioreq_init(&sc->port[0]);
1755 
1756 	pthread_mutex_init(&sc->mtx, NULL);
1757 
1758 	/* Intel ICH8 AHCI */
1759 	slots = sc->port[0].ioqsz;
1760 	if (slots > 32)
1761 		slots = 32;
1762 	--slots;
1763 	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
1764 	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
1765 	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
1766 	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
1767 	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
1768 
1769 	/* Only port 0 implemented */
1770 	sc->pi = 1;
1771 	sc->vs = 0x10300;
1772 	sc->cap2 = AHCI_CAP2_APST;
1773 	ahci_reset(sc);
1774 
1775 	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
1776 	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
1777 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
1778 	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
1779 	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
1780 	pci_emul_add_msicap(pi, 1);
1781 	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
1782 	    AHCI_OFFSET + sc->ports * AHCI_STEP);
1783 
1784 open_fail:
1785 	if (ret) {
1786 		blockif_close(sc->port[0].bctx);
1787 		free(sc);
1788 	}
1789 
1790 	return (ret);
1791 }
1792 
1793 static int
1794 pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
1795 {
1796 
1797 	return (pci_ahci_init(ctx, pi, opts, 0));
1798 }
1799 
1800 static int
1801 pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
1802 {
1803 
1804 	return (pci_ahci_init(ctx, pi, opts, 1));
1805 }
1806 
1807 /*
1808  * Use separate emulation names to distinguish drive and atapi devices
1809  */
1810 struct pci_devemu pci_de_ahci_hd = {
1811 	.pe_emu =	"ahci-hd",
1812 	.pe_init =	pci_ahci_hd_init,
1813 	.pe_barwrite =	pci_ahci_write,
1814 	.pe_barread =	pci_ahci_read
1815 };
1816 PCI_EMUL_SET(pci_de_ahci_hd);
1817 
1818 struct pci_devemu pci_de_ahci_cd = {
1819 	.pe_emu =	"ahci-cd",
1820 	.pe_init =	pci_ahci_atapi_init,
1821 	.pe_barwrite =	pci_ahci_write,
1822 	.pe_barread =	pci_ahci_read
1823 };
1824 PCI_EMUL_SET(pci_de_ahci_cd);
1825