xref: /freebsd/usr.sbin/bhyve/pci_ahci.c (revision 6186fd1857626de0f7cb1a9e4dff19082f9ebb11)
1 /*-
2  * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/linker_set.h>
34 #include <sys/stat.h>
35 #include <sys/uio.h>
36 #include <sys/ioctl.h>
37 #include <sys/disk.h>
38 #include <sys/ata.h>
39 #include <sys/endian.h>
40 
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <stdint.h>
46 #include <string.h>
47 #include <strings.h>
48 #include <unistd.h>
49 #include <assert.h>
50 #include <pthread.h>
51 #include <inttypes.h>
52 
53 #include "bhyverun.h"
54 #include "pci_emul.h"
55 #include "ahci.h"
56 #include "block_if.h"
57 
58 #define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
59 
60 #define	PxSIG_ATA	0x00000101 /* ATA drive */
61 #define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
62 
63 enum sata_fis_type {
64 	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
65 	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
66 	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
67 	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
68 	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
69 	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
70 	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
71 	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
72 };
73 
74 /*
75  * SCSI opcodes
76  */
77 #define	TEST_UNIT_READY		0x00
78 #define	REQUEST_SENSE		0x03
79 #define	INQUIRY			0x12
80 #define	START_STOP_UNIT		0x1B
81 #define	PREVENT_ALLOW		0x1E
82 #define	READ_CAPACITY		0x25
83 #define	READ_10			0x28
84 #define	POSITION_TO_ELEMENT	0x2B
85 #define	READ_TOC		0x43
86 #define	GET_EVENT_STATUS_NOTIFICATION 0x4A
87 #define	MODE_SENSE_10		0x5A
88 #define	READ_12			0xA8
89 #define	READ_CD			0xBE
90 
91 /*
92  * SCSI mode page codes
93  */
94 #define	MODEPAGE_RW_ERROR_RECOVERY	0x01
95 #define	MODEPAGE_CD_CAPABILITIES	0x2A
96 
97 /*
98  * ATA commands
99  */
100 #define	ATA_SF_ENAB_SATA_SF		0x10
101 #define		ATA_SATA_SF_AN		0x05
102 #define	ATA_SF_DIS_SATA_SF		0x90
103 
104 /*
105  * Debug printf
106  */
107 #ifdef AHCI_DEBUG
108 static FILE *dbg;
109 #define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
110 #else
111 #define DPRINTF(format, arg...)
112 #endif
113 #define WPRINTF(format, arg...) printf(format, ##arg)
114 
115 struct ahci_ioreq {
116 	struct blockif_req io_req;
117 	struct ahci_port *io_pr;
118 	STAILQ_ENTRY(ahci_ioreq) io_list;
119 	uint8_t *cfis;
120 	uint32_t len;
121 	uint32_t done;
122 	int slot;
123 	int prdtl;
124 };
125 
126 struct ahci_port {
127 	struct blockif_ctxt *bctx;
128 	struct pci_ahci_softc *pr_sc;
129 	uint8_t *cmd_lst;
130 	uint8_t *rfis;
131 	int atapi;
132 	int reset;
133 	int mult_sectors;
134 	uint8_t xfermode;
135 	uint8_t sense_key;
136 	uint8_t asc;
137 	uint32_t pending;
138 
139 	uint32_t clb;
140 	uint32_t clbu;
141 	uint32_t fb;
142 	uint32_t fbu;
143 	uint32_t is;
144 	uint32_t ie;
145 	uint32_t cmd;
146 	uint32_t unused0;
147 	uint32_t tfd;
148 	uint32_t sig;
149 	uint32_t ssts;
150 	uint32_t sctl;
151 	uint32_t serr;
152 	uint32_t sact;
153 	uint32_t ci;
154 	uint32_t sntf;
155 	uint32_t fbs;
156 
157 	/*
158 	 * i/o request info
159 	 */
160 	struct ahci_ioreq *ioreq;
161 	int ioqsz;
162 	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
163 };
164 
165 struct ahci_cmd_hdr {
166 	uint16_t flags;
167 	uint16_t prdtl;
168 	uint32_t prdbc;
169 	uint64_t ctba;
170 	uint32_t reserved[4];
171 };
172 
173 struct ahci_prdt_entry {
174 	uint64_t dba;
175 	uint32_t reserved;
176 #define	DBCMASK		0x3fffff
177 	uint32_t dbc;
178 };
179 
180 struct pci_ahci_softc {
181 	struct pci_devinst *asc_pi;
182 	pthread_mutex_t	mtx;
183 	int ports;
184 	uint32_t cap;
185 	uint32_t ghc;
186 	uint32_t is;
187 	uint32_t pi;
188 	uint32_t vs;
189 	uint32_t ccc_ctl;
190 	uint32_t ccc_pts;
191 	uint32_t em_loc;
192 	uint32_t em_ctl;
193 	uint32_t cap2;
194 	uint32_t bohc;
195 	uint32_t lintr;
196 	struct ahci_port port[MAX_PORTS];
197 };
198 #define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
199 
200 static inline void lba_to_msf(uint8_t *buf, int lba)
201 {
202 	lba += 150;
203 	buf[0] = (lba / 75) / 60;
204 	buf[1] = (lba / 75) % 60;
205 	buf[2] = lba % 75;
206 }
207 
208 /*
209  * generate HBA intr depending on whether or not ports within
210  * the controller have an interrupt pending.
211  */
212 static void
213 ahci_generate_intr(struct pci_ahci_softc *sc)
214 {
215 	struct pci_devinst *pi;
216 	int i;
217 
218 	pi = sc->asc_pi;
219 
220 	for (i = 0; i < sc->ports; i++) {
221 		struct ahci_port *pr;
222 		pr = &sc->port[i];
223 		if (pr->is & pr->ie)
224 			sc->is |= (1 << i);
225 	}
226 
227 	DPRINTF("%s %x\n", __func__, sc->is);
228 
229 	if (sc->is && (sc->ghc & AHCI_GHC_IE)) {
230 		if (pci_msi_enabled(pi)) {
231 			/*
232 			 * Generate an MSI interrupt on every edge
233 			 */
234 			pci_generate_msi(pi, 0);
235 		} else if (!sc->lintr) {
236 			/*
237 			 * Only generate a pin-based interrupt if one wasn't
238 			 * in progress
239 			 */
240 			sc->lintr = 1;
241 			pci_lintr_assert(pi);
242 		}
243 	} else if (sc->lintr) {
244 		/*
245 		 * No interrupts: deassert pin-based signal if it had
246 		 * been asserted
247 		 */
248 		pci_lintr_deassert(pi);
249 		sc->lintr = 0;
250 	}
251 }
252 
253 static void
254 ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
255 {
256 	int offset, len, irq;
257 
258 	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
259 		return;
260 
261 	switch (ft) {
262 	case FIS_TYPE_REGD2H:
263 		offset = 0x40;
264 		len = 20;
265 		irq = AHCI_P_IX_DHR;
266 		break;
267 	case FIS_TYPE_SETDEVBITS:
268 		offset = 0x58;
269 		len = 8;
270 		irq = AHCI_P_IX_SDB;
271 		break;
272 	case FIS_TYPE_PIOSETUP:
273 		offset = 0x20;
274 		len = 20;
275 		irq = 0;
276 		break;
277 	default:
278 		WPRINTF("unsupported fis type %d\n", ft);
279 		return;
280 	}
281 	memcpy(p->rfis + offset, fis, len);
282 	if (irq) {
283 		p->is |= irq;
284 		ahci_generate_intr(p->pr_sc);
285 	}
286 }
287 
288 static void
289 ahci_write_fis_piosetup(struct ahci_port *p)
290 {
291 	uint8_t fis[20];
292 
293 	memset(fis, 0, sizeof(fis));
294 	fis[0] = FIS_TYPE_PIOSETUP;
295 	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
296 }
297 
298 static void
299 ahci_write_fis_sdb(struct ahci_port *p, int slot, uint32_t tfd)
300 {
301 	uint8_t fis[8];
302 	uint8_t error;
303 
304 	error = (tfd >> 8) & 0xff;
305 	memset(fis, 0, sizeof(fis));
306 	fis[0] = error;
307 	fis[2] = tfd & 0x77;
308 	*(uint32_t *)(fis + 4) = (1 << slot);
309 	if (fis[2] & ATA_S_ERROR)
310 		p->is |= AHCI_P_IX_TFE;
311 	p->tfd = tfd;
312 	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
313 }
314 
315 static void
316 ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
317 {
318 	uint8_t fis[20];
319 	uint8_t error;
320 
321 	error = (tfd >> 8) & 0xff;
322 	memset(fis, 0, sizeof(fis));
323 	fis[0] = FIS_TYPE_REGD2H;
324 	fis[1] = (1 << 6);
325 	fis[2] = tfd & 0xff;
326 	fis[3] = error;
327 	fis[4] = cfis[4];
328 	fis[5] = cfis[5];
329 	fis[6] = cfis[6];
330 	fis[7] = cfis[7];
331 	fis[8] = cfis[8];
332 	fis[9] = cfis[9];
333 	fis[10] = cfis[10];
334 	fis[11] = cfis[11];
335 	fis[12] = cfis[12];
336 	fis[13] = cfis[13];
337 	if (fis[2] & ATA_S_ERROR)
338 		p->is |= AHCI_P_IX_TFE;
339 	else
340 		p->ci &= ~(1 << slot);
341 	p->tfd = tfd;
342 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
343 }
344 
345 static void
346 ahci_write_reset_fis_d2h(struct ahci_port *p)
347 {
348 	uint8_t fis[20];
349 
350 	memset(fis, 0, sizeof(fis));
351 	fis[0] = FIS_TYPE_REGD2H;
352 	fis[3] = 1;
353 	fis[4] = 1;
354 	if (p->atapi) {
355 		fis[5] = 0x14;
356 		fis[6] = 0xeb;
357 	}
358 	fis[12] = 1;
359 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
360 }
361 
362 static void
363 ahci_port_reset(struct ahci_port *pr)
364 {
365 	pr->sctl = 0;
366 	pr->serr = 0;
367 	pr->sact = 0;
368 	pr->xfermode = ATA_UDMA6;
369 	pr->mult_sectors = 128;
370 
371 	if (!pr->bctx) {
372 		pr->ssts = ATA_SS_DET_NO_DEVICE;
373 		pr->sig = 0xFFFFFFFF;
374 		pr->tfd = 0x7F;
375 		return;
376 	}
377 	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_SPD_GEN2 |
378 		ATA_SS_IPM_ACTIVE;
379 	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
380 	if (!pr->atapi) {
381 		pr->sig = PxSIG_ATA;
382 		pr->tfd |= ATA_S_READY;
383 	} else
384 		pr->sig = PxSIG_ATAPI;
385 	ahci_write_reset_fis_d2h(pr);
386 }
387 
388 static void
389 ahci_reset(struct pci_ahci_softc *sc)
390 {
391 	int i;
392 
393 	sc->ghc = AHCI_GHC_AE;
394 	sc->is = 0;
395 
396 	if (sc->lintr) {
397 		pci_lintr_deassert(sc->asc_pi);
398 		sc->lintr = 0;
399 	}
400 
401 	for (i = 0; i < sc->ports; i++) {
402 		sc->port[i].ie = 0;
403 		sc->port[i].is = 0;
404 		ahci_port_reset(&sc->port[i]);
405 	}
406 }
407 
408 static void
409 ata_string(uint8_t *dest, const char *src, int len)
410 {
411 	int i;
412 
413 	for (i = 0; i < len; i++) {
414 		if (*src)
415 			dest[i ^ 1] = *src++;
416 		else
417 			dest[i ^ 1] = ' ';
418 	}
419 }
420 
421 static void
422 atapi_string(uint8_t *dest, const char *src, int len)
423 {
424 	int i;
425 
426 	for (i = 0; i < len; i++) {
427 		if (*src)
428 			dest[i] = *src++;
429 		else
430 			dest[i] = ' ';
431 	}
432 }
433 
434 static void
435 ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done,
436     int seek)
437 {
438 	struct ahci_ioreq *aior;
439 	struct blockif_req *breq;
440 	struct pci_ahci_softc *sc;
441 	struct ahci_prdt_entry *prdt;
442 	struct ahci_cmd_hdr *hdr;
443 	uint64_t lba;
444 	uint32_t len;
445 	int i, err, iovcnt, ncq, readop;
446 
447 	sc = p->pr_sc;
448 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
449 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
450 	ncq = 0;
451 	readop = 1;
452 
453 	prdt += seek;
454 	if (cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
455 			cfis[2] == ATA_WRITE_FPDMA_QUEUED)
456 		readop = 0;
457 
458 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
459 			cfis[2] == ATA_READ_FPDMA_QUEUED) {
460 		lba = ((uint64_t)cfis[10] << 40) |
461 			((uint64_t)cfis[9] << 32) |
462 			((uint64_t)cfis[8] << 24) |
463 			((uint64_t)cfis[6] << 16) |
464 			((uint64_t)cfis[5] << 8) |
465 			cfis[4];
466 		len = cfis[11] << 8 | cfis[3];
467 		if (!len)
468 			len = 65536;
469 		ncq = 1;
470 	} else if (cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
471 		lba = ((uint64_t)cfis[10] << 40) |
472 			((uint64_t)cfis[9] << 32) |
473 			((uint64_t)cfis[8] << 24) |
474 			((uint64_t)cfis[6] << 16) |
475 			((uint64_t)cfis[5] << 8) |
476 			cfis[4];
477 		len = cfis[13] << 8 | cfis[12];
478 		if (!len)
479 			len = 65536;
480 	} else {
481 		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
482 			(cfis[5] << 8) | cfis[4];
483 		len = cfis[12];
484 		if (!len)
485 			len = 256;
486 	}
487 	lba *= blockif_sectsz(p->bctx);
488 	len *= blockif_sectsz(p->bctx);
489 
490 	/*
491 	 * Pull request off free list
492 	 */
493 	aior = STAILQ_FIRST(&p->iofhd);
494 	assert(aior != NULL);
495 	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
496 	aior->cfis = cfis;
497 	aior->slot = slot;
498 	aior->len = len;
499 	aior->done = done;
500 	breq = &aior->io_req;
501 	breq->br_offset = lba + done;
502 	iovcnt = hdr->prdtl - seek;
503 	if (iovcnt > BLOCKIF_IOV_MAX) {
504 		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
505 		iovcnt = BLOCKIF_IOV_MAX;
506 		/*
507 		 * Mark this command in-flight.
508 		 */
509 		p->pending |= 1 << slot;
510 	} else
511 		aior->prdtl = 0;
512 	breq->br_iovcnt = iovcnt;
513 
514 	/*
515 	 * Build up the iovec based on the prdt
516 	 */
517 	for (i = 0; i < iovcnt; i++) {
518 		uint32_t dbcsz;
519 
520 		dbcsz = (prdt->dbc & DBCMASK) + 1;
521 		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
522 		    prdt->dba, dbcsz);
523 		breq->br_iov[i].iov_len = dbcsz;
524 		aior->done += dbcsz;
525 		prdt++;
526 	}
527 	if (readop)
528 		err = blockif_read(p->bctx, breq);
529 	else
530 		err = blockif_write(p->bctx, breq);
531 	assert(err == 0);
532 
533 	if (ncq)
534 		p->ci &= ~(1 << slot);
535 }
536 
537 static void
538 ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
539 {
540 	struct ahci_ioreq *aior;
541 	struct blockif_req *breq;
542 	int err;
543 
544 	/*
545 	 * Pull request off free list
546 	 */
547 	aior = STAILQ_FIRST(&p->iofhd);
548 	assert(aior != NULL);
549 	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
550 	aior->cfis = cfis;
551 	aior->slot = slot;
552 	aior->len = 0;
553 	aior->done = 0;
554 	aior->prdtl = 0;
555 	breq = &aior->io_req;
556 
557 	err = blockif_flush(p->bctx, breq);
558 	assert(err == 0);
559 }
560 
561 static inline void
562 write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
563 		void *buf, int size)
564 {
565 	struct ahci_cmd_hdr *hdr;
566 	struct ahci_prdt_entry *prdt;
567 	void *from;
568 	int i, len;
569 
570 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
571 	len = size;
572 	from = buf;
573 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
574 	for (i = 0; i < hdr->prdtl && len; i++) {
575 		uint8_t *ptr;
576 		uint32_t dbcsz;
577 		int sublen;
578 
579 		dbcsz = (prdt->dbc & DBCMASK) + 1;
580 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
581 		sublen = len < dbcsz ? len : dbcsz;
582 		memcpy(ptr, from, sublen);
583 		len -= sublen;
584 		from += sublen;
585 		prdt++;
586 	}
587 	hdr->prdbc = size - len;
588 }
589 
590 static void
591 handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
592 {
593 	struct ahci_cmd_hdr *hdr;
594 
595 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
596 	if (p->atapi || hdr->prdtl == 0) {
597 		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
598 		p->is |= AHCI_P_IX_TFE;
599 	} else {
600 		uint16_t buf[256];
601 		uint64_t sectors;
602 		uint16_t cyl;
603 		uint8_t sech, heads;
604 
605 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
606 		blockif_chs(p->bctx, &cyl, &heads, &sech);
607 		memset(buf, 0, sizeof(buf));
608 		buf[0] = 0x0040;
609 		buf[1] = cyl;
610 		buf[3] = heads;
611 		buf[6] = sech;
612 		/* TODO emulate different serial? */
613 		ata_string((uint8_t *)(buf+10), "123456", 20);
614 		ata_string((uint8_t *)(buf+23), "001", 8);
615 		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
616 		buf[47] = (0x8000 | 128);
617 		buf[48] = 0x1;
618 		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
619 		buf[50] = (1 << 14);
620 		buf[53] = (1 << 1 | 1 << 2);
621 		if (p->mult_sectors)
622 			buf[59] = (0x100 | p->mult_sectors);
623 		buf[60] = sectors;
624 		buf[61] = (sectors >> 16);
625 		buf[63] = 0x7;
626 		if (p->xfermode & ATA_WDMA0)
627 			buf[63] |= (1 << ((p->xfermode & 7) + 8));
628 		buf[64] = 0x3;
629 		buf[65] = 100;
630 		buf[66] = 100;
631 		buf[67] = 100;
632 		buf[68] = 100;
633 		buf[75] = 31;
634 		buf[76] = (1 << 8 | 1 << 2);
635 		buf[80] = 0x1f0;
636 		buf[81] = 0x28;
637 		buf[82] = (1 << 5 | 1 << 14);
638 		buf[83] = (1 << 10 | 1 << 12 | 1 << 13 | 1 << 14);
639 		buf[84] = (1 << 14);
640 		buf[85] = (1 << 5 | 1 << 14);
641 		buf[86] = (1 << 10 | 1 << 12 | 1 << 13);
642 		buf[87] = (1 << 14);
643 		buf[88] = 0x7f;
644 		if (p->xfermode & ATA_UDMA0)
645 			buf[88] |= (1 << ((p->xfermode & 7) + 8));
646 		buf[93] = (1 | 1 <<14);
647 		buf[100] = sectors;
648 		buf[101] = (sectors >> 16);
649 		buf[102] = (sectors >> 32);
650 		buf[103] = (sectors >> 48);
651 		ahci_write_fis_piosetup(p);
652 		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
653 		p->tfd = ATA_S_DSC | ATA_S_READY;
654 		p->is |= AHCI_P_IX_DP;
655 		p->ci &= ~(1 << slot);
656 	}
657 	ahci_generate_intr(p->pr_sc);
658 }
659 
660 static void
661 handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
662 {
663 	if (!p->atapi) {
664 		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
665 		p->is |= AHCI_P_IX_TFE;
666 	} else {
667 		uint16_t buf[256];
668 
669 		memset(buf, 0, sizeof(buf));
670 		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
671 		/* TODO emulate different serial? */
672 		ata_string((uint8_t *)(buf+10), "123456", 20);
673 		ata_string((uint8_t *)(buf+23), "001", 8);
674 		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
675 		buf[49] = (1 << 9 | 1 << 8);
676 		buf[50] = (1 << 14 | 1);
677 		buf[53] = (1 << 2 | 1 << 1);
678 		buf[62] = 0x3f;
679 		buf[63] = 7;
680 		buf[64] = 3;
681 		buf[65] = 100;
682 		buf[66] = 100;
683 		buf[67] = 100;
684 		buf[68] = 100;
685 		buf[76] = (1 << 2 | 1 << 1);
686 		buf[78] = (1 << 5);
687 		buf[80] = (0x1f << 4);
688 		buf[82] = (1 << 4);
689 		buf[83] = (1 << 14);
690 		buf[84] = (1 << 14);
691 		buf[85] = (1 << 4);
692 		buf[87] = (1 << 14);
693 		buf[88] = (1 << 14 | 0x7f);
694 		ahci_write_fis_piosetup(p);
695 		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
696 		p->tfd = ATA_S_DSC | ATA_S_READY;
697 		p->is |= AHCI_P_IX_DHR;
698 		p->ci &= ~(1 << slot);
699 	}
700 	ahci_generate_intr(p->pr_sc);
701 }
702 
703 static void
704 atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
705 {
706 	uint8_t buf[36];
707 	uint8_t *acmd;
708 	int len;
709 
710 	acmd = cfis + 0x40;
711 
712 	buf[0] = 0x05;
713 	buf[1] = 0x80;
714 	buf[2] = 0x00;
715 	buf[3] = 0x21;
716 	buf[4] = 31;
717 	buf[5] = 0;
718 	buf[6] = 0;
719 	buf[7] = 0;
720 	atapi_string(buf + 8, "BHYVE", 8);
721 	atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
722 	atapi_string(buf + 32, "001", 4);
723 
724 	len = sizeof(buf);
725 	if (len > acmd[4])
726 		len = acmd[4];
727 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
728 	write_prdt(p, slot, cfis, buf, len);
729 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
730 }
731 
732 static void
733 atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
734 {
735 	uint8_t buf[8];
736 	uint64_t sectors;
737 
738 	sectors = blockif_size(p->bctx) / 2048;
739 	be32enc(buf, sectors - 1);
740 	be32enc(buf + 4, 2048);
741 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
742 	write_prdt(p, slot, cfis, buf, sizeof(buf));
743 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
744 }
745 
746 static void
747 atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
748 {
749 	uint8_t *acmd;
750 	uint8_t format;
751 	int len;
752 
753 	acmd = cfis + 0x40;
754 
755 	len = be16dec(acmd + 7);
756 	format = acmd[9] >> 6;
757 	switch (format) {
758 	case 0:
759 	{
760 		int msf, size;
761 		uint64_t sectors;
762 		uint8_t start_track, buf[20], *bp;
763 
764 		msf = (acmd[1] >> 1) & 1;
765 		start_track = acmd[6];
766 		if (start_track > 1 && start_track != 0xaa) {
767 			uint32_t tfd;
768 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
769 			p->asc = 0x24;
770 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
771 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
772 			ahci_write_fis_d2h(p, slot, cfis, tfd);
773 			return;
774 		}
775 		bp = buf + 2;
776 		*bp++ = 1;
777 		*bp++ = 1;
778 		if (start_track <= 1) {
779 			*bp++ = 0;
780 			*bp++ = 0x14;
781 			*bp++ = 1;
782 			*bp++ = 0;
783 			if (msf) {
784 				*bp++ = 0;
785 				lba_to_msf(bp, 0);
786 				bp += 3;
787 			} else {
788 				*bp++ = 0;
789 				*bp++ = 0;
790 				*bp++ = 0;
791 				*bp++ = 0;
792 			}
793 		}
794 		*bp++ = 0;
795 		*bp++ = 0x14;
796 		*bp++ = 0xaa;
797 		*bp++ = 0;
798 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
799 		sectors >>= 2;
800 		if (msf) {
801 			*bp++ = 0;
802 			lba_to_msf(bp, sectors);
803 			bp += 3;
804 		} else {
805 			be32enc(bp, sectors);
806 			bp += 4;
807 		}
808 		size = bp - buf;
809 		be16enc(buf, size - 2);
810 		if (len > size)
811 			len = size;
812 		write_prdt(p, slot, cfis, buf, len);
813 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
814 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
815 		break;
816 	}
817 	case 1:
818 	{
819 		uint8_t buf[12];
820 
821 		memset(buf, 0, sizeof(buf));
822 		buf[1] = 0xa;
823 		buf[2] = 0x1;
824 		buf[3] = 0x1;
825 		if (len > sizeof(buf))
826 			len = sizeof(buf);
827 		write_prdt(p, slot, cfis, buf, len);
828 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
829 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
830 		break;
831 	}
832 	case 2:
833 	{
834 		int msf, size;
835 		uint64_t sectors;
836 		uint8_t start_track, *bp, buf[50];
837 
838 		msf = (acmd[1] >> 1) & 1;
839 		start_track = acmd[6];
840 		bp = buf + 2;
841 		*bp++ = 1;
842 		*bp++ = 1;
843 
844 		*bp++ = 1;
845 		*bp++ = 0x14;
846 		*bp++ = 0;
847 		*bp++ = 0xa0;
848 		*bp++ = 0;
849 		*bp++ = 0;
850 		*bp++ = 0;
851 		*bp++ = 0;
852 		*bp++ = 1;
853 		*bp++ = 0;
854 		*bp++ = 0;
855 
856 		*bp++ = 1;
857 		*bp++ = 0x14;
858 		*bp++ = 0;
859 		*bp++ = 0xa1;
860 		*bp++ = 0;
861 		*bp++ = 0;
862 		*bp++ = 0;
863 		*bp++ = 0;
864 		*bp++ = 1;
865 		*bp++ = 0;
866 		*bp++ = 0;
867 
868 		*bp++ = 1;
869 		*bp++ = 0x14;
870 		*bp++ = 0;
871 		*bp++ = 0xa2;
872 		*bp++ = 0;
873 		*bp++ = 0;
874 		*bp++ = 0;
875 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
876 		sectors >>= 2;
877 		if (msf) {
878 			*bp++ = 0;
879 			lba_to_msf(bp, sectors);
880 			bp += 3;
881 		} else {
882 			be32enc(bp, sectors);
883 			bp += 4;
884 		}
885 
886 		*bp++ = 1;
887 		*bp++ = 0x14;
888 		*bp++ = 0;
889 		*bp++ = 1;
890 		*bp++ = 0;
891 		*bp++ = 0;
892 		*bp++ = 0;
893 		if (msf) {
894 			*bp++ = 0;
895 			lba_to_msf(bp, 0);
896 			bp += 3;
897 		} else {
898 			*bp++ = 0;
899 			*bp++ = 0;
900 			*bp++ = 0;
901 			*bp++ = 0;
902 		}
903 
904 		size = bp - buf;
905 		be16enc(buf, size - 2);
906 		if (len > size)
907 			len = size;
908 		write_prdt(p, slot, cfis, buf, len);
909 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
910 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
911 		break;
912 	}
913 	default:
914 	{
915 		uint32_t tfd;
916 
917 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
918 		p->asc = 0x24;
919 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
920 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
921 		ahci_write_fis_d2h(p, slot, cfis, tfd);
922 		break;
923 	}
924 	}
925 }
926 
927 static void
928 atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
929 		uint32_t done, int seek)
930 {
931 	struct ahci_ioreq *aior;
932 	struct ahci_cmd_hdr *hdr;
933 	struct ahci_prdt_entry *prdt;
934 	struct blockif_req *breq;
935 	struct pci_ahci_softc *sc;
936 	uint8_t *acmd;
937 	uint64_t lba;
938 	uint32_t len;
939 	int i, err, iovcnt;
940 
941 	sc = p->pr_sc;
942 	acmd = cfis + 0x40;
943 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
944 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
945 
946 	prdt += seek;
947 	lba = be32dec(acmd + 2);
948 	if (acmd[0] == READ_10)
949 		len = be16dec(acmd + 7);
950 	else
951 		len = be32dec(acmd + 6);
952 	if (len == 0) {
953 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
954 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
955 	}
956 	lba *= 2048;
957 	len *= 2048;
958 
959 	/*
960 	 * Pull request off free list
961 	 */
962 	aior = STAILQ_FIRST(&p->iofhd);
963 	assert(aior != NULL);
964 	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
965 	aior->cfis = cfis;
966 	aior->slot = slot;
967 	aior->len = len;
968 	aior->done = done;
969 	breq = &aior->io_req;
970 	breq->br_offset = lba + done;
971 	iovcnt = hdr->prdtl - seek;
972 	if (iovcnt > BLOCKIF_IOV_MAX) {
973 		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
974 		iovcnt = BLOCKIF_IOV_MAX;
975 	} else
976 		aior->prdtl = 0;
977 	breq->br_iovcnt = iovcnt;
978 
979 	/*
980 	 * Build up the iovec based on the prdt
981 	 */
982 	for (i = 0; i < iovcnt; i++) {
983 		uint32_t dbcsz;
984 
985 		dbcsz = (prdt->dbc & DBCMASK) + 1;
986 		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
987 		    prdt->dba, dbcsz);
988 		breq->br_iov[i].iov_len = dbcsz;
989 		aior->done += dbcsz;
990 		prdt++;
991 	}
992 	err = blockif_read(p->bctx, breq);
993 	assert(err == 0);
994 }
995 
996 static void
997 atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
998 {
999 	uint8_t buf[64];
1000 	uint8_t *acmd;
1001 	int len;
1002 
1003 	acmd = cfis + 0x40;
1004 	len = acmd[4];
1005 	if (len > sizeof(buf))
1006 		len = sizeof(buf);
1007 	memset(buf, 0, len);
1008 	buf[0] = 0x70 | (1 << 7);
1009 	buf[2] = p->sense_key;
1010 	buf[7] = 10;
1011 	buf[12] = p->asc;
1012 	write_prdt(p, slot, cfis, buf, len);
1013 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1014 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1015 }
1016 
1017 static void
1018 atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1019 {
1020 	uint8_t *acmd = cfis + 0x40;
1021 	uint32_t tfd;
1022 
1023 	switch (acmd[4] & 3) {
1024 	case 0:
1025 	case 1:
1026 	case 3:
1027 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1028 		tfd = ATA_S_READY | ATA_S_DSC;
1029 		break;
1030 	case 2:
1031 		/* TODO eject media */
1032 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1033 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1034 		p->asc = 0x53;
1035 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1036 		break;
1037 	}
1038 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1039 }
1040 
1041 static void
1042 atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1043 {
1044 	uint8_t *acmd;
1045 	uint32_t tfd;
1046 	uint8_t pc, code;
1047 	int len;
1048 
1049 	acmd = cfis + 0x40;
1050 	len = be16dec(acmd + 7);
1051 	pc = acmd[2] >> 6;
1052 	code = acmd[2] & 0x3f;
1053 
1054 	switch (pc) {
1055 	case 0:
1056 		switch (code) {
1057 		case MODEPAGE_RW_ERROR_RECOVERY:
1058 		{
1059 			uint8_t buf[16];
1060 
1061 			if (len > sizeof(buf))
1062 				len = sizeof(buf);
1063 
1064 			memset(buf, 0, sizeof(buf));
1065 			be16enc(buf, 16 - 2);
1066 			buf[2] = 0x70;
1067 			buf[8] = 0x01;
1068 			buf[9] = 16 - 10;
1069 			buf[11] = 0x05;
1070 			write_prdt(p, slot, cfis, buf, len);
1071 			tfd = ATA_S_READY | ATA_S_DSC;
1072 			break;
1073 		}
1074 		case MODEPAGE_CD_CAPABILITIES:
1075 		{
1076 			uint8_t buf[30];
1077 
1078 			if (len > sizeof(buf))
1079 				len = sizeof(buf);
1080 
1081 			memset(buf, 0, sizeof(buf));
1082 			be16enc(buf, 30 - 2);
1083 			buf[2] = 0x70;
1084 			buf[8] = 0x2A;
1085 			buf[9] = 30 - 10;
1086 			buf[10] = 0x08;
1087 			buf[12] = 0x71;
1088 			be16enc(&buf[18], 2);
1089 			be16enc(&buf[20], 512);
1090 			write_prdt(p, slot, cfis, buf, len);
1091 			tfd = ATA_S_READY | ATA_S_DSC;
1092 			break;
1093 		}
1094 		default:
1095 			goto error;
1096 			break;
1097 		}
1098 		break;
1099 	case 3:
1100 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1101 		p->asc = 0x39;
1102 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1103 		break;
1104 error:
1105 	case 1:
1106 	case 2:
1107 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1108 		p->asc = 0x24;
1109 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1110 		break;
1111 	}
1112 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1113 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1114 }
1115 
1116 static void
1117 atapi_get_event_status_notification(struct ahci_port *p, int slot,
1118     uint8_t *cfis)
1119 {
1120 	uint8_t *acmd;
1121 	uint32_t tfd;
1122 
1123 	acmd = cfis + 0x40;
1124 
1125 	/* we don't support asynchronous operation */
1126 	if (!(acmd[1] & 1)) {
1127 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1128 		p->asc = 0x24;
1129 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1130 	} else {
1131 		uint8_t buf[8];
1132 		int len;
1133 
1134 		len = be16dec(acmd + 7);
1135 		if (len > sizeof(buf))
1136 			len = sizeof(buf);
1137 
1138 		memset(buf, 0, sizeof(buf));
1139 		be16enc(buf, 8 - 2);
1140 		buf[2] = 0x04;
1141 		buf[3] = 0x10;
1142 		buf[5] = 0x02;
1143 		write_prdt(p, slot, cfis, buf, len);
1144 		tfd = ATA_S_READY | ATA_S_DSC;
1145 	}
1146 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1147 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1148 }
1149 
1150 static void
1151 handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1152 {
1153 	uint8_t *acmd;
1154 
1155 	acmd = cfis + 0x40;
1156 
1157 #ifdef AHCI_DEBUG
1158 	{
1159 		int i;
1160 		DPRINTF("ACMD:");
1161 		for (i = 0; i < 16; i++)
1162 			DPRINTF("%02x ", acmd[i]);
1163 		DPRINTF("\n");
1164 	}
1165 #endif
1166 
1167 	switch (acmd[0]) {
1168 	case TEST_UNIT_READY:
1169 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1170 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1171 		break;
1172 	case INQUIRY:
1173 		atapi_inquiry(p, slot, cfis);
1174 		break;
1175 	case READ_CAPACITY:
1176 		atapi_read_capacity(p, slot, cfis);
1177 		break;
1178 	case PREVENT_ALLOW:
1179 		/* TODO */
1180 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1181 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1182 		break;
1183 	case READ_TOC:
1184 		atapi_read_toc(p, slot, cfis);
1185 		break;
1186 	case READ_10:
1187 	case READ_12:
1188 		atapi_read(p, slot, cfis, 0, 0);
1189 		break;
1190 	case REQUEST_SENSE:
1191 		atapi_request_sense(p, slot, cfis);
1192 		break;
1193 	case START_STOP_UNIT:
1194 		atapi_start_stop_unit(p, slot, cfis);
1195 		break;
1196 	case MODE_SENSE_10:
1197 		atapi_mode_sense(p, slot, cfis);
1198 		break;
1199 	case GET_EVENT_STATUS_NOTIFICATION:
1200 		atapi_get_event_status_notification(p, slot, cfis);
1201 		break;
1202 	default:
1203 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1204 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1205 		p->asc = 0x20;
1206 		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1207 				ATA_S_READY | ATA_S_ERROR);
1208 		break;
1209 	}
1210 }
1211 
1212 static void
1213 ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1214 {
1215 
1216 	switch (cfis[2]) {
1217 	case ATA_ATA_IDENTIFY:
1218 		handle_identify(p, slot, cfis);
1219 		break;
1220 	case ATA_SETFEATURES:
1221 	{
1222 		switch (cfis[3]) {
1223 		case ATA_SF_ENAB_SATA_SF:
1224 			switch (cfis[12]) {
1225 			case ATA_SATA_SF_AN:
1226 				p->tfd = ATA_S_DSC | ATA_S_READY;
1227 				break;
1228 			default:
1229 				p->tfd = ATA_S_ERROR | ATA_S_READY;
1230 				p->tfd |= (ATA_ERROR_ABORT << 8);
1231 				break;
1232 			}
1233 			break;
1234 		case ATA_SF_ENAB_WCACHE:
1235 		case ATA_SF_DIS_WCACHE:
1236 		case ATA_SF_ENAB_RCACHE:
1237 		case ATA_SF_DIS_RCACHE:
1238 			p->tfd = ATA_S_DSC | ATA_S_READY;
1239 			break;
1240 		case ATA_SF_SETXFER:
1241 		{
1242 			switch (cfis[12] & 0xf8) {
1243 			case ATA_PIO:
1244 			case ATA_PIO0:
1245 				break;
1246 			case ATA_WDMA0:
1247 			case ATA_UDMA0:
1248 				p->xfermode = (cfis[12] & 0x7);
1249 				break;
1250 			}
1251 			p->tfd = ATA_S_DSC | ATA_S_READY;
1252 			break;
1253 		}
1254 		default:
1255 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1256 			p->tfd |= (ATA_ERROR_ABORT << 8);
1257 			break;
1258 		}
1259 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1260 		break;
1261 	}
1262 	case ATA_SET_MULTI:
1263 		if (cfis[12] != 0 &&
1264 			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1265 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1266 			p->tfd |= (ATA_ERROR_ABORT << 8);
1267 		} else {
1268 			p->mult_sectors = cfis[12];
1269 			p->tfd = ATA_S_DSC | ATA_S_READY;
1270 		}
1271 		p->is |= AHCI_P_IX_DP;
1272 		p->ci &= ~(1 << slot);
1273 		ahci_generate_intr(p->pr_sc);
1274 		break;
1275 	case ATA_READ_DMA:
1276 	case ATA_WRITE_DMA:
1277 	case ATA_READ_DMA48:
1278 	case ATA_WRITE_DMA48:
1279 	case ATA_READ_FPDMA_QUEUED:
1280 	case ATA_WRITE_FPDMA_QUEUED:
1281 		ahci_handle_dma(p, slot, cfis, 0, 0);
1282 		break;
1283 	case ATA_FLUSHCACHE:
1284 	case ATA_FLUSHCACHE48:
1285 		ahci_handle_flush(p, slot, cfis);
1286 		break;
1287 	case ATA_STANDBY_CMD:
1288 		break;
1289 	case ATA_NOP:
1290 	case ATA_STANDBY_IMMEDIATE:
1291 	case ATA_IDLE_IMMEDIATE:
1292 	case ATA_SLEEP:
1293 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1294 		break;
1295 	case ATA_ATAPI_IDENTIFY:
1296 		handle_atapi_identify(p, slot, cfis);
1297 		break;
1298 	case ATA_PACKET_CMD:
1299 		if (!p->atapi) {
1300 			p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1301 			p->is |= AHCI_P_IX_TFE;
1302 			ahci_generate_intr(p->pr_sc);
1303 		} else
1304 			handle_packet_cmd(p, slot, cfis);
1305 		break;
1306 	default:
1307 		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1308 		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1309 		p->is |= AHCI_P_IX_TFE;
1310 		ahci_generate_intr(p->pr_sc);
1311 		break;
1312 	}
1313 }
1314 
1315 static void
1316 ahci_handle_slot(struct ahci_port *p, int slot)
1317 {
1318 	struct ahci_cmd_hdr *hdr;
1319 	struct ahci_prdt_entry *prdt;
1320 	struct pci_ahci_softc *sc;
1321 	uint8_t *cfis;
1322 	int cfl;
1323 
1324 	sc = p->pr_sc;
1325 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1326 	cfl = (hdr->flags & 0x1f) * 4;
1327 	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1328 			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1329 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1330 
1331 #ifdef AHCI_DEBUG
1332 	DPRINTF("\ncfis:");
1333 	for (i = 0; i < cfl; i++) {
1334 		if (i % 10 == 0)
1335 			DPRINTF("\n");
1336 		DPRINTF("%02x ", cfis[i]);
1337 	}
1338 	DPRINTF("\n");
1339 
1340 	for (i = 0; i < hdr->prdtl; i++) {
1341 		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1342 		prdt++;
1343 	}
1344 #endif
1345 
1346 	if (cfis[0] != FIS_TYPE_REGH2D) {
1347 		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1348 		return;
1349 	}
1350 
1351 	if (cfis[1] & 0x80) {
1352 		ahci_handle_cmd(p, slot, cfis);
1353 	} else {
1354 		if (cfis[15] & (1 << 2))
1355 			p->reset = 1;
1356 		else if (p->reset) {
1357 			p->reset = 0;
1358 			ahci_port_reset(p);
1359 		}
1360 		p->ci &= ~(1 << slot);
1361 	}
1362 }
1363 
1364 static void
1365 ahci_handle_port(struct ahci_port *p)
1366 {
1367 	int i;
1368 
1369 	if (!(p->cmd & AHCI_P_CMD_ST))
1370 		return;
1371 
1372 	/*
1373 	 * Search for any new commands to issue ignoring those that
1374 	 * are already in-flight.
1375 	 */
1376 	for (i = 0; (i < 32) && p->ci; i++) {
1377 		if ((p->ci & (1 << i)) && !(p->pending & (1 << i))) {
1378 			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1379 			p->cmd |= i << AHCI_P_CMD_CCS_SHIFT;
1380 			ahci_handle_slot(p, i);
1381 		}
1382 	}
1383 }
1384 
1385 /*
1386  * blockif callback routine - this runs in the context of the blockif
1387  * i/o thread, so the mutex needs to be acquired.
1388  */
1389 static void
1390 ata_ioreq_cb(struct blockif_req *br, int err)
1391 {
1392 	struct ahci_cmd_hdr *hdr;
1393 	struct ahci_ioreq *aior;
1394 	struct ahci_port *p;
1395 	struct pci_ahci_softc *sc;
1396 	uint32_t tfd;
1397 	uint8_t *cfis;
1398 	int pending, slot, ncq;
1399 
1400 	DPRINTF("%s %d\n", __func__, err);
1401 
1402 	ncq = 0;
1403 	aior = br->br_param;
1404 	p = aior->io_pr;
1405 	cfis = aior->cfis;
1406 	slot = aior->slot;
1407 	pending = aior->prdtl;
1408 	sc = p->pr_sc;
1409 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1410 
1411 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1412 			cfis[2] == ATA_READ_FPDMA_QUEUED)
1413 		ncq = 1;
1414 
1415 	pthread_mutex_lock(&sc->mtx);
1416 
1417 	/*
1418 	 * Move the blockif request back to the free list
1419 	 */
1420 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_list);
1421 
1422 	if (pending && !err) {
1423 		ahci_handle_dma(p, slot, cfis, aior->done,
1424 		    hdr->prdtl - pending);
1425 		goto out;
1426 	}
1427 
1428 	if (!err && aior->done == aior->len) {
1429 		tfd = ATA_S_READY | ATA_S_DSC;
1430 		if (ncq)
1431 			hdr->prdbc = 0;
1432 		else
1433 			hdr->prdbc = aior->len;
1434 	} else {
1435 		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1436 		hdr->prdbc = 0;
1437 		if (ncq)
1438 			p->serr |= (1 << slot);
1439 	}
1440 
1441 	/*
1442 	 * This command is now complete.
1443 	 */
1444 	p->pending &= ~(1 << slot);
1445 
1446 	if (ncq) {
1447 		p->sact &= ~(1 << slot);
1448 		ahci_write_fis_sdb(p, slot, tfd);
1449 	} else
1450 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1451 
1452 out:
1453 	pthread_mutex_unlock(&sc->mtx);
1454 	DPRINTF("%s exit\n", __func__);
1455 }
1456 
1457 static void
1458 atapi_ioreq_cb(struct blockif_req *br, int err)
1459 {
1460 	struct ahci_cmd_hdr *hdr;
1461 	struct ahci_ioreq *aior;
1462 	struct ahci_port *p;
1463 	struct pci_ahci_softc *sc;
1464 	uint8_t *cfis;
1465 	uint32_t tfd;
1466 	int pending, slot;
1467 
1468 	DPRINTF("%s %d\n", __func__, err);
1469 
1470 	aior = br->br_param;
1471 	p = aior->io_pr;
1472 	cfis = aior->cfis;
1473 	slot = aior->slot;
1474 	pending = aior->prdtl;
1475 	sc = p->pr_sc;
1476 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1477 
1478 	pthread_mutex_lock(&sc->mtx);
1479 
1480 	/*
1481 	 * Move the blockif request back to the free list
1482 	 */
1483 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_list);
1484 
1485 	if (pending && !err) {
1486 		atapi_read(p, slot, cfis, aior->done, hdr->prdtl - pending);
1487 		goto out;
1488 	}
1489 
1490 	if (!err && aior->done == aior->len) {
1491 		tfd = ATA_S_READY | ATA_S_DSC;
1492 		hdr->prdbc = aior->len;
1493 	} else {
1494 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1495 		p->asc = 0x21;
1496 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1497 		hdr->prdbc = 0;
1498 	}
1499 
1500 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1501 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1502 
1503 out:
1504 	pthread_mutex_unlock(&sc->mtx);
1505 	DPRINTF("%s exit\n", __func__);
1506 }
1507 
1508 static void
1509 pci_ahci_ioreq_init(struct ahci_port *pr)
1510 {
1511 	struct ahci_ioreq *vr;
1512 	int i;
1513 
1514 	pr->ioqsz = blockif_queuesz(pr->bctx);
1515 	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1516 	STAILQ_INIT(&pr->iofhd);
1517 
1518 	/*
1519 	 * Add all i/o request entries to the free queue
1520 	 */
1521 	for (i = 0; i < pr->ioqsz; i++) {
1522 		vr = &pr->ioreq[i];
1523 		vr->io_pr = pr;
1524 		if (!pr->atapi)
1525 			vr->io_req.br_callback = ata_ioreq_cb;
1526 		else
1527 			vr->io_req.br_callback = atapi_ioreq_cb;
1528 		vr->io_req.br_param = vr;
1529 		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_list);
1530 	}
1531 }
1532 
1533 static void
1534 pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1535 {
1536 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1537 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1538 	struct ahci_port *p = &sc->port[port];
1539 
1540 	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1541 		port, offset, value);
1542 
1543 	switch (offset) {
1544 	case AHCI_P_CLB:
1545 		p->clb = value;
1546 		break;
1547 	case AHCI_P_CLBU:
1548 		p->clbu = value;
1549 		break;
1550 	case AHCI_P_FB:
1551 		p->fb = value;
1552 		break;
1553 	case AHCI_P_FBU:
1554 		p->fbu = value;
1555 		break;
1556 	case AHCI_P_IS:
1557 		p->is &= ~value;
1558 		break;
1559 	case AHCI_P_IE:
1560 		p->ie = value & 0xFDC000FF;
1561 		ahci_generate_intr(sc);
1562 		break;
1563 	case AHCI_P_CMD:
1564 	{
1565 		p->cmd = value;
1566 
1567 		if (!(value & AHCI_P_CMD_ST)) {
1568 			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
1569 			p->ci = 0;
1570 			p->sact = 0;
1571 		} else {
1572 			uint64_t clb;
1573 
1574 			p->cmd |= AHCI_P_CMD_CR;
1575 			clb = (uint64_t)p->clbu << 32 | p->clb;
1576 			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
1577 					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
1578 		}
1579 
1580 		if (value & AHCI_P_CMD_FRE) {
1581 			uint64_t fb;
1582 
1583 			p->cmd |= AHCI_P_CMD_FR;
1584 			fb = (uint64_t)p->fbu << 32 | p->fb;
1585 			/* we don't support FBSCP, so rfis size is 256Bytes */
1586 			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
1587 		} else {
1588 			p->cmd &= ~AHCI_P_CMD_FR;
1589 		}
1590 
1591 		if (value & AHCI_P_CMD_CLO) {
1592 			p->tfd = 0;
1593 			p->cmd &= ~AHCI_P_CMD_CLO;
1594 		}
1595 
1596 		ahci_handle_port(p);
1597 		break;
1598 	}
1599 	case AHCI_P_TFD:
1600 	case AHCI_P_SIG:
1601 	case AHCI_P_SSTS:
1602 		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
1603 		break;
1604 	case AHCI_P_SCTL:
1605 		if (!(p->cmd & AHCI_P_CMD_ST)) {
1606 			if (value & ATA_SC_DET_RESET)
1607 				ahci_port_reset(p);
1608 			p->sctl = value;
1609 		}
1610 		break;
1611 	case AHCI_P_SERR:
1612 		p->serr &= ~value;
1613 		break;
1614 	case AHCI_P_SACT:
1615 		p->sact |= value;
1616 		break;
1617 	case AHCI_P_CI:
1618 		p->ci |= value;
1619 		ahci_handle_port(p);
1620 		break;
1621 	case AHCI_P_SNTF:
1622 	case AHCI_P_FBS:
1623 	default:
1624 		break;
1625 	}
1626 }
1627 
1628 static void
1629 pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1630 {
1631 	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1632 		offset, value);
1633 
1634 	switch (offset) {
1635 	case AHCI_CAP:
1636 	case AHCI_PI:
1637 	case AHCI_VS:
1638 	case AHCI_CAP2:
1639 		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
1640 		break;
1641 	case AHCI_GHC:
1642 		if (value & AHCI_GHC_HR)
1643 			ahci_reset(sc);
1644 		else if (value & AHCI_GHC_IE) {
1645 			sc->ghc |= AHCI_GHC_IE;
1646 			ahci_generate_intr(sc);
1647 		}
1648 		break;
1649 	case AHCI_IS:
1650 		sc->is &= ~value;
1651 		ahci_generate_intr(sc);
1652 		break;
1653 	default:
1654 		break;
1655 	}
1656 }
1657 
1658 static void
1659 pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
1660 		int baridx, uint64_t offset, int size, uint64_t value)
1661 {
1662 	struct pci_ahci_softc *sc = pi->pi_arg;
1663 
1664 	assert(baridx == 5);
1665 	assert(size == 4);
1666 
1667 	pthread_mutex_lock(&sc->mtx);
1668 
1669 	if (offset < AHCI_OFFSET)
1670 		pci_ahci_host_write(sc, offset, value);
1671 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
1672 		pci_ahci_port_write(sc, offset, value);
1673 	else
1674 		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
1675 
1676 	pthread_mutex_unlock(&sc->mtx);
1677 }
1678 
1679 static uint64_t
1680 pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
1681 {
1682 	uint32_t value;
1683 
1684 	switch (offset) {
1685 	case AHCI_CAP:
1686 	case AHCI_GHC:
1687 	case AHCI_IS:
1688 	case AHCI_PI:
1689 	case AHCI_VS:
1690 	case AHCI_CCCC:
1691 	case AHCI_CCCP:
1692 	case AHCI_EM_LOC:
1693 	case AHCI_EM_CTL:
1694 	case AHCI_CAP2:
1695 	{
1696 		uint32_t *p = &sc->cap;
1697 		p += (offset - AHCI_CAP) / sizeof(uint32_t);
1698 		value = *p;
1699 		break;
1700 	}
1701 	default:
1702 		value = 0;
1703 		break;
1704 	}
1705 	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
1706 		offset, value);
1707 
1708 	return (value);
1709 }
1710 
1711 static uint64_t
1712 pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
1713 {
1714 	uint32_t value;
1715 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1716 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1717 
1718 	switch (offset) {
1719 	case AHCI_P_CLB:
1720 	case AHCI_P_CLBU:
1721 	case AHCI_P_FB:
1722 	case AHCI_P_FBU:
1723 	case AHCI_P_IS:
1724 	case AHCI_P_IE:
1725 	case AHCI_P_CMD:
1726 	case AHCI_P_TFD:
1727 	case AHCI_P_SIG:
1728 	case AHCI_P_SSTS:
1729 	case AHCI_P_SCTL:
1730 	case AHCI_P_SERR:
1731 	case AHCI_P_SACT:
1732 	case AHCI_P_CI:
1733 	case AHCI_P_SNTF:
1734 	case AHCI_P_FBS:
1735 	{
1736 		uint32_t *p= &sc->port[port].clb;
1737 		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
1738 		value = *p;
1739 		break;
1740 	}
1741 	default:
1742 		value = 0;
1743 		break;
1744 	}
1745 
1746 	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
1747 		port, offset, value);
1748 
1749 	return value;
1750 }
1751 
1752 static uint64_t
1753 pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
1754     uint64_t offset, int size)
1755 {
1756 	struct pci_ahci_softc *sc = pi->pi_arg;
1757 	uint32_t value;
1758 
1759 	assert(baridx == 5);
1760 	assert(size == 4);
1761 
1762 	pthread_mutex_lock(&sc->mtx);
1763 
1764 	if (offset < AHCI_OFFSET)
1765 		value = pci_ahci_host_read(sc, offset);
1766 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
1767 		value = pci_ahci_port_read(sc, offset);
1768 	else {
1769 		value = 0;
1770 		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
1771 	}
1772 
1773 	pthread_mutex_unlock(&sc->mtx);
1774 
1775 	return (value);
1776 }
1777 
1778 static int
1779 pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
1780 {
1781 	char bident[sizeof("XX:X:X")];
1782 	struct blockif_ctxt *bctxt;
1783 	struct pci_ahci_softc *sc;
1784 	int ret, slots;
1785 
1786 	ret = 0;
1787 
1788 	if (opts == NULL) {
1789 		fprintf(stderr, "pci_ahci: backing device required\n");
1790 		return (1);
1791 	}
1792 
1793 #ifdef AHCI_DEBUG
1794 	dbg = fopen("/tmp/log", "w+");
1795 #endif
1796 
1797 	sc = calloc(1, sizeof(struct pci_ahci_softc));
1798 	pi->pi_arg = sc;
1799 	sc->asc_pi = pi;
1800 	sc->ports = MAX_PORTS;
1801 
1802 	/*
1803 	 * Only use port 0 for a backing device. All other ports will be
1804 	 * marked as unused
1805 	 */
1806 	sc->port[0].atapi = atapi;
1807 
1808 	/*
1809 	 * Attempt to open the backing image. Use the PCI
1810 	 * slot/func for the identifier string.
1811 	 */
1812 	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
1813 	bctxt = blockif_open(opts, bident);
1814 	if (bctxt == NULL) {
1815 		ret = 1;
1816 		goto open_fail;
1817 	}
1818 	sc->port[0].bctx = bctxt;
1819 	sc->port[0].pr_sc = sc;
1820 
1821 	/*
1822 	 * Allocate blockif request structures and add them
1823 	 * to the free list
1824 	 */
1825 	pci_ahci_ioreq_init(&sc->port[0]);
1826 
1827 	pthread_mutex_init(&sc->mtx, NULL);
1828 
1829 	/* Intel ICH8 AHCI */
1830 	slots = sc->port[0].ioqsz;
1831 	if (slots > 32)
1832 		slots = 32;
1833 	--slots;
1834 	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
1835 	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
1836 	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
1837 	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
1838 	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
1839 
1840 	/* Only port 0 implemented */
1841 	sc->pi = 1;
1842 	sc->vs = 0x10300;
1843 	sc->cap2 = AHCI_CAP2_APST;
1844 	ahci_reset(sc);
1845 
1846 	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
1847 	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
1848 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
1849 	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
1850 	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
1851 	pci_emul_add_msicap(pi, 1);
1852 	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
1853 	    AHCI_OFFSET + sc->ports * AHCI_STEP);
1854 
1855 	pci_lintr_request(pi);
1856 
1857 open_fail:
1858 	if (ret) {
1859 		blockif_close(sc->port[0].bctx);
1860 		free(sc);
1861 	}
1862 
1863 	return (ret);
1864 }
1865 
1866 static int
1867 pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
1868 {
1869 
1870 	return (pci_ahci_init(ctx, pi, opts, 0));
1871 }
1872 
1873 static int
1874 pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
1875 {
1876 
1877 	return (pci_ahci_init(ctx, pi, opts, 1));
1878 }
1879 
1880 /*
1881  * Use separate emulation names to distinguish drive and atapi devices
1882  */
1883 struct pci_devemu pci_de_ahci_hd = {
1884 	.pe_emu =	"ahci-hd",
1885 	.pe_init =	pci_ahci_hd_init,
1886 	.pe_barwrite =	pci_ahci_write,
1887 	.pe_barread =	pci_ahci_read
1888 };
1889 PCI_EMUL_SET(pci_de_ahci_hd);
1890 
1891 struct pci_devemu pci_de_ahci_cd = {
1892 	.pe_emu =	"ahci-cd",
1893 	.pe_init =	pci_ahci_atapi_init,
1894 	.pe_barwrite =	pci_ahci_write,
1895 	.pe_barread =	pci_ahci_read
1896 };
1897 PCI_EMUL_SET(pci_de_ahci_cd);
1898