xref: /freebsd/usr.sbin/bhyve/pci_ahci.c (revision bd1da0a002e9a43cfb5220835c7a42804d90dc56)
1 /*-
2  * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/linker_set.h>
34 #include <sys/stat.h>
35 #include <sys/uio.h>
36 #include <sys/ioctl.h>
37 #include <sys/disk.h>
38 #include <sys/ata.h>
39 #include <sys/endian.h>
40 
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <stdint.h>
46 #include <string.h>
47 #include <strings.h>
48 #include <unistd.h>
49 #include <assert.h>
50 #include <pthread.h>
51 #include <pthread_np.h>
52 #include <inttypes.h>
53 #include <md5.h>
54 
55 #include "bhyverun.h"
56 #include "pci_emul.h"
57 #include "ahci.h"
58 #include "block_if.h"
59 
60 #define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
61 
62 #define	PxSIG_ATA	0x00000101 /* ATA drive */
63 #define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
64 
65 enum sata_fis_type {
66 	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
67 	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
68 	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
69 	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
70 	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
71 	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
72 	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
73 	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
74 };
75 
76 /*
77  * SCSI opcodes
78  */
79 #define	TEST_UNIT_READY		0x00
80 #define	REQUEST_SENSE		0x03
81 #define	INQUIRY			0x12
82 #define	START_STOP_UNIT		0x1B
83 #define	PREVENT_ALLOW		0x1E
84 #define	READ_CAPACITY		0x25
85 #define	READ_10			0x28
86 #define	POSITION_TO_ELEMENT	0x2B
87 #define	READ_TOC		0x43
88 #define	GET_EVENT_STATUS_NOTIFICATION 0x4A
89 #define	MODE_SENSE_10		0x5A
90 #define	REPORT_LUNS		0xA0
91 #define	READ_12			0xA8
92 #define	READ_CD			0xBE
93 
94 /*
95  * SCSI mode page codes
96  */
97 #define	MODEPAGE_RW_ERROR_RECOVERY	0x01
98 #define	MODEPAGE_CD_CAPABILITIES	0x2A
99 
100 /*
101  * ATA commands
102  */
103 #define	ATA_SF_ENAB_SATA_SF		0x10
104 #define		ATA_SATA_SF_AN		0x05
105 #define	ATA_SF_DIS_SATA_SF		0x90
106 
107 /*
108  * Debug printf
109  */
110 #ifdef AHCI_DEBUG
111 static FILE *dbg;
112 #define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
113 #else
114 #define DPRINTF(format, arg...)
115 #endif
116 #define WPRINTF(format, arg...) printf(format, ##arg)
117 
118 struct ahci_ioreq {
119 	struct blockif_req io_req;
120 	struct ahci_port *io_pr;
121 	STAILQ_ENTRY(ahci_ioreq) io_flist;
122 	TAILQ_ENTRY(ahci_ioreq) io_blist;
123 	uint8_t *cfis;
124 	uint32_t len;
125 	uint32_t done;
126 	int slot;
127 	int more;
128 };
129 
130 struct ahci_port {
131 	struct blockif_ctxt *bctx;
132 	struct pci_ahci_softc *pr_sc;
133 	uint8_t *cmd_lst;
134 	uint8_t *rfis;
135 	char ident[20 + 1];
136 	int atapi;
137 	int reset;
138 	int mult_sectors;
139 	uint8_t xfermode;
140 	uint8_t err_cfis[20];
141 	uint8_t sense_key;
142 	uint8_t asc;
143 	uint32_t pending;
144 
145 	uint32_t clb;
146 	uint32_t clbu;
147 	uint32_t fb;
148 	uint32_t fbu;
149 	uint32_t is;
150 	uint32_t ie;
151 	uint32_t cmd;
152 	uint32_t unused0;
153 	uint32_t tfd;
154 	uint32_t sig;
155 	uint32_t ssts;
156 	uint32_t sctl;
157 	uint32_t serr;
158 	uint32_t sact;
159 	uint32_t ci;
160 	uint32_t sntf;
161 	uint32_t fbs;
162 
163 	/*
164 	 * i/o request info
165 	 */
166 	struct ahci_ioreq *ioreq;
167 	int ioqsz;
168 	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
169 	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
170 };
171 
172 struct ahci_cmd_hdr {
173 	uint16_t flags;
174 	uint16_t prdtl;
175 	uint32_t prdbc;
176 	uint64_t ctba;
177 	uint32_t reserved[4];
178 };
179 
180 struct ahci_prdt_entry {
181 	uint64_t dba;
182 	uint32_t reserved;
183 #define	DBCMASK		0x3fffff
184 	uint32_t dbc;
185 };
186 
187 struct pci_ahci_softc {
188 	struct pci_devinst *asc_pi;
189 	pthread_mutex_t	mtx;
190 	int ports;
191 	uint32_t cap;
192 	uint32_t ghc;
193 	uint32_t is;
194 	uint32_t pi;
195 	uint32_t vs;
196 	uint32_t ccc_ctl;
197 	uint32_t ccc_pts;
198 	uint32_t em_loc;
199 	uint32_t em_ctl;
200 	uint32_t cap2;
201 	uint32_t bohc;
202 	uint32_t lintr;
203 	struct ahci_port port[MAX_PORTS];
204 };
205 #define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
206 
207 static inline void lba_to_msf(uint8_t *buf, int lba)
208 {
209 	lba += 150;
210 	buf[0] = (lba / 75) / 60;
211 	buf[1] = (lba / 75) % 60;
212 	buf[2] = lba % 75;
213 }
214 
215 /*
216  * generate HBA intr depending on whether or not ports within
217  * the controller have an interrupt pending.
218  */
219 static void
220 ahci_generate_intr(struct pci_ahci_softc *sc)
221 {
222 	struct pci_devinst *pi;
223 	int i;
224 
225 	pi = sc->asc_pi;
226 
227 	for (i = 0; i < sc->ports; i++) {
228 		struct ahci_port *pr;
229 		pr = &sc->port[i];
230 		if (pr->is & pr->ie)
231 			sc->is |= (1 << i);
232 	}
233 
234 	DPRINTF("%s %x\n", __func__, sc->is);
235 
236 	if (sc->is && (sc->ghc & AHCI_GHC_IE)) {
237 		if (pci_msi_enabled(pi)) {
238 			/*
239 			 * Generate an MSI interrupt on every edge
240 			 */
241 			pci_generate_msi(pi, 0);
242 		} else if (!sc->lintr) {
243 			/*
244 			 * Only generate a pin-based interrupt if one wasn't
245 			 * in progress
246 			 */
247 			sc->lintr = 1;
248 			pci_lintr_assert(pi);
249 		}
250 	} else if (sc->lintr) {
251 		/*
252 		 * No interrupts: deassert pin-based signal if it had
253 		 * been asserted
254 		 */
255 		pci_lintr_deassert(pi);
256 		sc->lintr = 0;
257 	}
258 }
259 
260 static void
261 ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
262 {
263 	int offset, len, irq;
264 
265 	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
266 		return;
267 
268 	switch (ft) {
269 	case FIS_TYPE_REGD2H:
270 		offset = 0x40;
271 		len = 20;
272 		irq = AHCI_P_IX_DHR;
273 		break;
274 	case FIS_TYPE_SETDEVBITS:
275 		offset = 0x58;
276 		len = 8;
277 		irq = AHCI_P_IX_SDB;
278 		break;
279 	case FIS_TYPE_PIOSETUP:
280 		offset = 0x20;
281 		len = 20;
282 		irq = 0;
283 		break;
284 	default:
285 		WPRINTF("unsupported fis type %d\n", ft);
286 		return;
287 	}
288 	memcpy(p->rfis + offset, fis, len);
289 	if (irq) {
290 		p->is |= irq;
291 		ahci_generate_intr(p->pr_sc);
292 	}
293 }
294 
295 static void
296 ahci_write_fis_piosetup(struct ahci_port *p)
297 {
298 	uint8_t fis[20];
299 
300 	memset(fis, 0, sizeof(fis));
301 	fis[0] = FIS_TYPE_PIOSETUP;
302 	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
303 }
304 
305 static void
306 ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
307 {
308 	uint8_t fis[8];
309 	uint8_t error;
310 
311 	error = (tfd >> 8) & 0xff;
312 	memset(fis, 0, sizeof(fis));
313 	fis[0] = FIS_TYPE_SETDEVBITS;
314 	fis[1] = (1 << 6);
315 	fis[2] = tfd & 0x77;
316 	fis[3] = error;
317 	if (fis[2] & ATA_S_ERROR) {
318 		p->is |= AHCI_P_IX_TFE;
319 		p->err_cfis[0] = slot;
320 		p->err_cfis[2] = tfd & 0x77;
321 		p->err_cfis[3] = error;
322 		memcpy(&p->err_cfis[4], cfis + 4, 16);
323 	} else {
324 		*(uint32_t *)(fis + 4) = (1 << slot);
325 		p->sact &= ~(1 << slot);
326 	}
327 	p->tfd = tfd;
328 	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
329 }
330 
331 static void
332 ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
333 {
334 	uint8_t fis[20];
335 	uint8_t error;
336 
337 	error = (tfd >> 8) & 0xff;
338 	memset(fis, 0, sizeof(fis));
339 	fis[0] = FIS_TYPE_REGD2H;
340 	fis[1] = (1 << 6);
341 	fis[2] = tfd & 0xff;
342 	fis[3] = error;
343 	fis[4] = cfis[4];
344 	fis[5] = cfis[5];
345 	fis[6] = cfis[6];
346 	fis[7] = cfis[7];
347 	fis[8] = cfis[8];
348 	fis[9] = cfis[9];
349 	fis[10] = cfis[10];
350 	fis[11] = cfis[11];
351 	fis[12] = cfis[12];
352 	fis[13] = cfis[13];
353 	if (fis[2] & ATA_S_ERROR) {
354 		p->is |= AHCI_P_IX_TFE;
355 		p->err_cfis[0] = 0x80;
356 		p->err_cfis[2] = tfd & 0xff;
357 		p->err_cfis[3] = error;
358 		memcpy(&p->err_cfis[4], cfis + 4, 16);
359 	} else
360 		p->ci &= ~(1 << slot);
361 	p->tfd = tfd;
362 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
363 }
364 
365 static void
366 ahci_write_reset_fis_d2h(struct ahci_port *p)
367 {
368 	uint8_t fis[20];
369 
370 	memset(fis, 0, sizeof(fis));
371 	fis[0] = FIS_TYPE_REGD2H;
372 	fis[3] = 1;
373 	fis[4] = 1;
374 	if (p->atapi) {
375 		fis[5] = 0x14;
376 		fis[6] = 0xeb;
377 	}
378 	fis[12] = 1;
379 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
380 }
381 
382 static void
383 ahci_check_stopped(struct ahci_port *p)
384 {
385 	/*
386 	 * If we are no longer processing the command list and nothing
387 	 * is in-flight, clear the running bit, the current command
388 	 * slot, the command issue and active bits.
389 	 */
390 	if (!(p->cmd & AHCI_P_CMD_ST)) {
391 		if (p->pending == 0) {
392 			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
393 			p->ci = 0;
394 			p->sact = 0;
395 		}
396 	}
397 }
398 
399 static void
400 ahci_port_stop(struct ahci_port *p)
401 {
402 	struct ahci_ioreq *aior;
403 	uint8_t *cfis;
404 	int slot;
405 	int ncq;
406 	int error;
407 
408 	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
409 
410 	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
411 		/*
412 		 * Try to cancel the outstanding blockif request.
413 		 */
414 		error = blockif_cancel(p->bctx, &aior->io_req);
415 		if (error != 0)
416 			continue;
417 
418 		slot = aior->slot;
419 		cfis = aior->cfis;
420 		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
421 		    cfis[2] == ATA_READ_FPDMA_QUEUED ||
422 		    cfis[2] == ATA_SEND_FPDMA_QUEUED)
423 			ncq = 1;
424 
425 		if (ncq)
426 			p->sact &= ~(1 << slot);
427 		else
428 			p->ci &= ~(1 << slot);
429 
430 		/*
431 		 * This command is now done.
432 		 */
433 		p->pending &= ~(1 << slot);
434 
435 		/*
436 		 * Delete the blockif request from the busy list
437 		 */
438 		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
439 
440 		/*
441 		 * Move the blockif request back to the free list
442 		 */
443 		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
444 	}
445 
446 	ahci_check_stopped(p);
447 }
448 
449 static void
450 ahci_port_reset(struct ahci_port *pr)
451 {
452 	pr->serr = 0;
453 	pr->sact = 0;
454 	pr->xfermode = ATA_UDMA6;
455 	pr->mult_sectors = 128;
456 
457 	if (!pr->bctx) {
458 		pr->ssts = ATA_SS_DET_NO_DEVICE;
459 		pr->sig = 0xFFFFFFFF;
460 		pr->tfd = 0x7F;
461 		return;
462 	}
463 	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
464 	if (pr->sctl & ATA_SC_SPD_MASK)
465 		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
466 	else
467 		pr->ssts |= ATA_SS_SPD_GEN3;
468 	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
469 	if (!pr->atapi) {
470 		pr->sig = PxSIG_ATA;
471 		pr->tfd |= ATA_S_READY;
472 	} else
473 		pr->sig = PxSIG_ATAPI;
474 	ahci_write_reset_fis_d2h(pr);
475 }
476 
477 static void
478 ahci_reset(struct pci_ahci_softc *sc)
479 {
480 	int i;
481 
482 	sc->ghc = AHCI_GHC_AE;
483 	sc->is = 0;
484 
485 	if (sc->lintr) {
486 		pci_lintr_deassert(sc->asc_pi);
487 		sc->lintr = 0;
488 	}
489 
490 	for (i = 0; i < sc->ports; i++) {
491 		sc->port[i].ie = 0;
492 		sc->port[i].is = 0;
493 		sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD);
494 		if (sc->port[i].bctx)
495 			sc->port[i].cmd |= AHCI_P_CMD_CPS;
496 		sc->port[i].sctl = 0;
497 		ahci_port_reset(&sc->port[i]);
498 	}
499 }
500 
501 static void
502 ata_string(uint8_t *dest, const char *src, int len)
503 {
504 	int i;
505 
506 	for (i = 0; i < len; i++) {
507 		if (*src)
508 			dest[i ^ 1] = *src++;
509 		else
510 			dest[i ^ 1] = ' ';
511 	}
512 }
513 
514 static void
515 atapi_string(uint8_t *dest, const char *src, int len)
516 {
517 	int i;
518 
519 	for (i = 0; i < len; i++) {
520 		if (*src)
521 			dest[i] = *src++;
522 		else
523 			dest[i] = ' ';
524 	}
525 }
526 
527 /*
528  * Build up the iovec based on the PRDT, 'done' and 'len'.
529  */
530 static void
531 ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
532     struct ahci_prdt_entry *prdt, uint16_t prdtl)
533 {
534 	struct blockif_req *breq = &aior->io_req;
535 	int i, j, skip, todo, left, extra;
536 	uint32_t dbcsz;
537 
538 	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
539 	skip = aior->done;
540 	left = aior->len - aior->done;
541 	todo = 0;
542 	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
543 	    i++, prdt++) {
544 		dbcsz = (prdt->dbc & DBCMASK) + 1;
545 		/* Skip already done part of the PRDT */
546 		if (dbcsz <= skip) {
547 			skip -= dbcsz;
548 			continue;
549 		}
550 		dbcsz -= skip;
551 		if (dbcsz > left)
552 			dbcsz = left;
553 		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
554 		    prdt->dba + skip, dbcsz);
555 		breq->br_iov[j].iov_len = dbcsz;
556 		todo += dbcsz;
557 		left -= dbcsz;
558 		skip = 0;
559 		j++;
560 	}
561 
562 	/* If we got limited by IOV length, round I/O down to sector size. */
563 	if (j == BLOCKIF_IOV_MAX) {
564 		extra = todo % blockif_sectsz(p->bctx);
565 		todo -= extra;
566 		assert(todo > 0);
567 		while (extra > 0) {
568 			if (breq->br_iov[j - 1].iov_len > extra) {
569 				breq->br_iov[j - 1].iov_len -= extra;
570 				break;
571 			}
572 			extra -= breq->br_iov[j - 1].iov_len;
573 			j--;
574 		}
575 	}
576 
577 	breq->br_iovcnt = j;
578 	breq->br_resid = todo;
579 	aior->done += todo;
580 	aior->more = (aior->done < aior->len && i < prdtl);
581 }
582 
583 static void
584 ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
585 {
586 	struct ahci_ioreq *aior;
587 	struct blockif_req *breq;
588 	struct ahci_prdt_entry *prdt;
589 	struct ahci_cmd_hdr *hdr;
590 	uint64_t lba;
591 	uint32_t len;
592 	int err, ncq, readop;
593 
594 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
595 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
596 	ncq = 0;
597 	readop = 1;
598 
599 	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
600 	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
601 	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
602 	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
603 		readop = 0;
604 
605 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
606 	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
607 		lba = ((uint64_t)cfis[10] << 40) |
608 			((uint64_t)cfis[9] << 32) |
609 			((uint64_t)cfis[8] << 24) |
610 			((uint64_t)cfis[6] << 16) |
611 			((uint64_t)cfis[5] << 8) |
612 			cfis[4];
613 		len = cfis[11] << 8 | cfis[3];
614 		if (!len)
615 			len = 65536;
616 		ncq = 1;
617 	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
618 	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
619 	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
620 		lba = ((uint64_t)cfis[10] << 40) |
621 			((uint64_t)cfis[9] << 32) |
622 			((uint64_t)cfis[8] << 24) |
623 			((uint64_t)cfis[6] << 16) |
624 			((uint64_t)cfis[5] << 8) |
625 			cfis[4];
626 		len = cfis[13] << 8 | cfis[12];
627 		if (!len)
628 			len = 65536;
629 	} else {
630 		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
631 			(cfis[5] << 8) | cfis[4];
632 		len = cfis[12];
633 		if (!len)
634 			len = 256;
635 	}
636 	lba *= blockif_sectsz(p->bctx);
637 	len *= blockif_sectsz(p->bctx);
638 
639 	/* Pull request off free list */
640 	aior = STAILQ_FIRST(&p->iofhd);
641 	assert(aior != NULL);
642 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
643 
644 	aior->cfis = cfis;
645 	aior->slot = slot;
646 	aior->len = len;
647 	aior->done = done;
648 	breq = &aior->io_req;
649 	breq->br_offset = lba + done;
650 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
651 
652 	/* Mark this command in-flight. */
653 	p->pending |= 1 << slot;
654 
655 	/* Stuff request onto busy list. */
656 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
657 
658 	if (readop)
659 		err = blockif_read(p->bctx, breq);
660 	else
661 		err = blockif_write(p->bctx, breq);
662 	assert(err == 0);
663 
664 	if (ncq)
665 		p->ci &= ~(1 << slot);
666 }
667 
668 static void
669 ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
670 {
671 	struct ahci_ioreq *aior;
672 	struct blockif_req *breq;
673 	int err;
674 
675 	/*
676 	 * Pull request off free list
677 	 */
678 	aior = STAILQ_FIRST(&p->iofhd);
679 	assert(aior != NULL);
680 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
681 	aior->cfis = cfis;
682 	aior->slot = slot;
683 	aior->len = 0;
684 	aior->done = 0;
685 	aior->more = 0;
686 	breq = &aior->io_req;
687 
688 	/*
689 	 * Mark this command in-flight.
690 	 */
691 	p->pending |= 1 << slot;
692 
693 	/*
694 	 * Stuff request onto busy list
695 	 */
696 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
697 
698 	err = blockif_flush(p->bctx, breq);
699 	assert(err == 0);
700 }
701 
702 static inline void
703 read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
704 		void *buf, int size)
705 {
706 	struct ahci_cmd_hdr *hdr;
707 	struct ahci_prdt_entry *prdt;
708 	void *to;
709 	int i, len;
710 
711 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
712 	len = size;
713 	to = buf;
714 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
715 	for (i = 0; i < hdr->prdtl && len; i++) {
716 		uint8_t *ptr;
717 		uint32_t dbcsz;
718 		int sublen;
719 
720 		dbcsz = (prdt->dbc & DBCMASK) + 1;
721 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
722 		sublen = len < dbcsz ? len : dbcsz;
723 		memcpy(to, ptr, sublen);
724 		len -= sublen;
725 		to += sublen;
726 		prdt++;
727 	}
728 }
729 
730 static void
731 ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
732 {
733 	struct ahci_ioreq *aior;
734 	struct blockif_req *breq;
735 	uint8_t *entry;
736 	uint64_t elba;
737 	uint32_t len, elen;
738 	int err;
739 	uint8_t buf[512];
740 
741 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
742 		len = (uint16_t)cfis[13] << 8 | cfis[12];
743 		len *= 512;
744 	} else { /* ATA_SEND_FPDMA_QUEUED */
745 		len = (uint16_t)cfis[11] << 8 | cfis[3];
746 		len *= 512;
747 	}
748 	read_prdt(p, slot, cfis, buf, sizeof(buf));
749 
750 next:
751 	entry = &buf[done];
752 	elba = ((uint64_t)entry[5] << 40) |
753 		((uint64_t)entry[4] << 32) |
754 		((uint64_t)entry[3] << 24) |
755 		((uint64_t)entry[2] << 16) |
756 		((uint64_t)entry[1] << 8) |
757 		entry[0];
758 	elen = (uint16_t)entry[7] << 8 | entry[6];
759 	done += 8;
760 	if (elen == 0) {
761 		if (done >= len) {
762 			ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
763 			p->pending &= ~(1 << slot);
764 			ahci_check_stopped(p);
765 			return;
766 		}
767 		goto next;
768 	}
769 
770 	/*
771 	 * Pull request off free list
772 	 */
773 	aior = STAILQ_FIRST(&p->iofhd);
774 	assert(aior != NULL);
775 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
776 	aior->cfis = cfis;
777 	aior->slot = slot;
778 	aior->len = len;
779 	aior->done = done;
780 	aior->more = (len != done);
781 
782 	breq = &aior->io_req;
783 	breq->br_offset = elba * blockif_sectsz(p->bctx);
784 	breq->br_resid = elen * blockif_sectsz(p->bctx);
785 
786 	/*
787 	 * Mark this command in-flight.
788 	 */
789 	p->pending |= 1 << slot;
790 
791 	/*
792 	 * Stuff request onto busy list
793 	 */
794 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
795 
796 	err = blockif_delete(p->bctx, breq);
797 	assert(err == 0);
798 }
799 
800 static inline void
801 write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
802 		void *buf, int size)
803 {
804 	struct ahci_cmd_hdr *hdr;
805 	struct ahci_prdt_entry *prdt;
806 	void *from;
807 	int i, len;
808 
809 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
810 	len = size;
811 	from = buf;
812 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
813 	for (i = 0; i < hdr->prdtl && len; i++) {
814 		uint8_t *ptr;
815 		uint32_t dbcsz;
816 		int sublen;
817 
818 		dbcsz = (prdt->dbc & DBCMASK) + 1;
819 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
820 		sublen = len < dbcsz ? len : dbcsz;
821 		memcpy(ptr, from, sublen);
822 		len -= sublen;
823 		from += sublen;
824 		prdt++;
825 	}
826 	hdr->prdbc = size - len;
827 }
828 
829 static void
830 ahci_checksum(uint8_t *buf, int size)
831 {
832 	int i;
833 	uint8_t sum = 0;
834 
835 	for (i = 0; i < size - 1; i++)
836 		sum += buf[i];
837 	buf[size - 1] = 0x100 - sum;
838 }
839 
840 static void
841 ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
842 {
843 	struct ahci_cmd_hdr *hdr;
844 	uint8_t buf[512];
845 
846 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
847 	if (p->atapi || hdr->prdtl == 0 || cfis[4] != 0x10 ||
848 	    cfis[5] != 0 || cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
849 		ahci_write_fis_d2h(p, slot, cfis,
850 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
851 		return;
852 	}
853 
854 	memset(buf, 0, sizeof(buf));
855 	memcpy(buf, p->err_cfis, sizeof(p->err_cfis));
856 	ahci_checksum(buf, sizeof(buf));
857 
858 	if (cfis[2] == ATA_READ_LOG_EXT)
859 		ahci_write_fis_piosetup(p);
860 	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
861 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
862 }
863 
864 static void
865 handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
866 {
867 	struct ahci_cmd_hdr *hdr;
868 
869 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
870 	if (p->atapi || hdr->prdtl == 0) {
871 		ahci_write_fis_d2h(p, slot, cfis,
872 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
873 	} else {
874 		uint16_t buf[256];
875 		uint64_t sectors;
876 		int sectsz, psectsz, psectoff, candelete, ro;
877 		uint16_t cyl;
878 		uint8_t sech, heads;
879 
880 		ro = blockif_is_ro(p->bctx);
881 		candelete = blockif_candelete(p->bctx);
882 		sectsz = blockif_sectsz(p->bctx);
883 		sectors = blockif_size(p->bctx) / sectsz;
884 		blockif_chs(p->bctx, &cyl, &heads, &sech);
885 		blockif_psectsz(p->bctx, &psectsz, &psectoff);
886 		memset(buf, 0, sizeof(buf));
887 		buf[0] = 0x0040;
888 		buf[1] = cyl;
889 		buf[3] = heads;
890 		buf[6] = sech;
891 		ata_string((uint8_t *)(buf+10), p->ident, 20);
892 		ata_string((uint8_t *)(buf+23), "001", 8);
893 		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
894 		buf[47] = (0x8000 | 128);
895 		buf[48] = 0x1;
896 		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
897 		buf[50] = (1 << 14);
898 		buf[53] = (1 << 1 | 1 << 2);
899 		if (p->mult_sectors)
900 			buf[59] = (0x100 | p->mult_sectors);
901 		if (sectors <= 0x0fffffff) {
902 			buf[60] = sectors;
903 			buf[61] = (sectors >> 16);
904 		} else {
905 			buf[60] = 0xffff;
906 			buf[61] = 0x0fff;
907 		}
908 		buf[63] = 0x7;
909 		if (p->xfermode & ATA_WDMA0)
910 			buf[63] |= (1 << ((p->xfermode & 7) + 8));
911 		buf[64] = 0x3;
912 		buf[65] = 120;
913 		buf[66] = 120;
914 		buf[67] = 120;
915 		buf[68] = 120;
916 		buf[69] = 0;
917 		buf[75] = 31;
918 		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
919 			   ATA_SUPPORT_NCQ);
920 		buf[77] = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
921 			   (p->ssts & ATA_SS_SPD_MASK) >> 3);
922 		buf[80] = 0x3f0;
923 		buf[81] = 0x28;
924 		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
925 			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
926 		buf[83] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
927 			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
928 		buf[84] = (1 << 14);
929 		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
930 			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
931 		buf[86] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
932 			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
933 		buf[87] = (1 << 14);
934 		buf[88] = 0x7f;
935 		if (p->xfermode & ATA_UDMA0)
936 			buf[88] |= (1 << ((p->xfermode & 7) + 8));
937 		buf[100] = sectors;
938 		buf[101] = (sectors >> 16);
939 		buf[102] = (sectors >> 32);
940 		buf[103] = (sectors >> 48);
941 		if (candelete && !ro) {
942 			buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
943 			buf[105] = 1;
944 			buf[169] = ATA_SUPPORT_DSM_TRIM;
945 		}
946 		buf[106] = 0x4000;
947 		buf[209] = 0x4000;
948 		if (psectsz > sectsz) {
949 			buf[106] |= 0x2000;
950 			buf[106] |= ffsl(psectsz / sectsz) - 1;
951 			buf[209] |= (psectoff / sectsz);
952 		}
953 		if (sectsz > 512) {
954 			buf[106] |= 0x1000;
955 			buf[117] = sectsz / 2;
956 			buf[118] = ((sectsz / 2) >> 16);
957 		}
958 		buf[119] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
959 		buf[120] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
960 		buf[222] = 0x1020;
961 		buf[255] = 0x00a5;
962 		ahci_checksum((uint8_t *)buf, sizeof(buf));
963 		ahci_write_fis_piosetup(p);
964 		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
965 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
966 	}
967 }
968 
969 static void
970 handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
971 {
972 	if (!p->atapi) {
973 		ahci_write_fis_d2h(p, slot, cfis,
974 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
975 	} else {
976 		uint16_t buf[256];
977 
978 		memset(buf, 0, sizeof(buf));
979 		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
980 		ata_string((uint8_t *)(buf+10), p->ident, 20);
981 		ata_string((uint8_t *)(buf+23), "001", 8);
982 		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
983 		buf[49] = (1 << 9 | 1 << 8);
984 		buf[50] = (1 << 14 | 1);
985 		buf[53] = (1 << 2 | 1 << 1);
986 		buf[62] = 0x3f;
987 		buf[63] = 7;
988 		if (p->xfermode & ATA_WDMA0)
989 			buf[63] |= (1 << ((p->xfermode & 7) + 8));
990 		buf[64] = 3;
991 		buf[65] = 120;
992 		buf[66] = 120;
993 		buf[67] = 120;
994 		buf[68] = 120;
995 		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
996 		buf[77] = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
997 		buf[78] = (1 << 5);
998 		buf[80] = 0x3f0;
999 		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1000 			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1001 		buf[83] = (1 << 14);
1002 		buf[84] = (1 << 14);
1003 		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1004 			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1005 		buf[87] = (1 << 14);
1006 		buf[88] = 0x7f;
1007 		if (p->xfermode & ATA_UDMA0)
1008 			buf[88] |= (1 << ((p->xfermode & 7) + 8));
1009 		buf[222] = 0x1020;
1010 		buf[255] = 0x00a5;
1011 		ahci_checksum((uint8_t *)buf, sizeof(buf));
1012 		ahci_write_fis_piosetup(p);
1013 		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
1014 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1015 	}
1016 }
1017 
1018 static void
1019 atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1020 {
1021 	uint8_t buf[36];
1022 	uint8_t *acmd;
1023 	int len;
1024 	uint32_t tfd;
1025 
1026 	acmd = cfis + 0x40;
1027 
1028 	if (acmd[1] & 1) {		/* VPD */
1029 		if (acmd[2] == 0) {	/* Supported VPD pages */
1030 			buf[0] = 0x05;
1031 			buf[1] = 0;
1032 			buf[2] = 0;
1033 			buf[3] = 1;
1034 			buf[4] = 0;
1035 			len = 4 + buf[3];
1036 		} else {
1037 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1038 			p->asc = 0x24;
1039 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1040 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1041 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1042 			return;
1043 		}
1044 	} else {
1045 		buf[0] = 0x05;
1046 		buf[1] = 0x80;
1047 		buf[2] = 0x00;
1048 		buf[3] = 0x21;
1049 		buf[4] = 31;
1050 		buf[5] = 0;
1051 		buf[6] = 0;
1052 		buf[7] = 0;
1053 		atapi_string(buf + 8, "BHYVE", 8);
1054 		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1055 		atapi_string(buf + 32, "001", 4);
1056 		len = sizeof(buf);
1057 	}
1058 
1059 	if (len > acmd[4])
1060 		len = acmd[4];
1061 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1062 	write_prdt(p, slot, cfis, buf, len);
1063 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1064 }
1065 
1066 static void
1067 atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1068 {
1069 	uint8_t buf[8];
1070 	uint64_t sectors;
1071 
1072 	sectors = blockif_size(p->bctx) / 2048;
1073 	be32enc(buf, sectors - 1);
1074 	be32enc(buf + 4, 2048);
1075 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1076 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1077 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1078 }
1079 
1080 static void
1081 atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1082 {
1083 	uint8_t *acmd;
1084 	uint8_t format;
1085 	int len;
1086 
1087 	acmd = cfis + 0x40;
1088 
1089 	len = be16dec(acmd + 7);
1090 	format = acmd[9] >> 6;
1091 	switch (format) {
1092 	case 0:
1093 	{
1094 		int msf, size;
1095 		uint64_t sectors;
1096 		uint8_t start_track, buf[20], *bp;
1097 
1098 		msf = (acmd[1] >> 1) & 1;
1099 		start_track = acmd[6];
1100 		if (start_track > 1 && start_track != 0xaa) {
1101 			uint32_t tfd;
1102 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1103 			p->asc = 0x24;
1104 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1105 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1106 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1107 			return;
1108 		}
1109 		bp = buf + 2;
1110 		*bp++ = 1;
1111 		*bp++ = 1;
1112 		if (start_track <= 1) {
1113 			*bp++ = 0;
1114 			*bp++ = 0x14;
1115 			*bp++ = 1;
1116 			*bp++ = 0;
1117 			if (msf) {
1118 				*bp++ = 0;
1119 				lba_to_msf(bp, 0);
1120 				bp += 3;
1121 			} else {
1122 				*bp++ = 0;
1123 				*bp++ = 0;
1124 				*bp++ = 0;
1125 				*bp++ = 0;
1126 			}
1127 		}
1128 		*bp++ = 0;
1129 		*bp++ = 0x14;
1130 		*bp++ = 0xaa;
1131 		*bp++ = 0;
1132 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1133 		sectors >>= 2;
1134 		if (msf) {
1135 			*bp++ = 0;
1136 			lba_to_msf(bp, sectors);
1137 			bp += 3;
1138 		} else {
1139 			be32enc(bp, sectors);
1140 			bp += 4;
1141 		}
1142 		size = bp - buf;
1143 		be16enc(buf, size - 2);
1144 		if (len > size)
1145 			len = size;
1146 		write_prdt(p, slot, cfis, buf, len);
1147 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1148 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1149 		break;
1150 	}
1151 	case 1:
1152 	{
1153 		uint8_t buf[12];
1154 
1155 		memset(buf, 0, sizeof(buf));
1156 		buf[1] = 0xa;
1157 		buf[2] = 0x1;
1158 		buf[3] = 0x1;
1159 		if (len > sizeof(buf))
1160 			len = sizeof(buf);
1161 		write_prdt(p, slot, cfis, buf, len);
1162 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1163 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1164 		break;
1165 	}
1166 	case 2:
1167 	{
1168 		int msf, size;
1169 		uint64_t sectors;
1170 		uint8_t start_track, *bp, buf[50];
1171 
1172 		msf = (acmd[1] >> 1) & 1;
1173 		start_track = acmd[6];
1174 		bp = buf + 2;
1175 		*bp++ = 1;
1176 		*bp++ = 1;
1177 
1178 		*bp++ = 1;
1179 		*bp++ = 0x14;
1180 		*bp++ = 0;
1181 		*bp++ = 0xa0;
1182 		*bp++ = 0;
1183 		*bp++ = 0;
1184 		*bp++ = 0;
1185 		*bp++ = 0;
1186 		*bp++ = 1;
1187 		*bp++ = 0;
1188 		*bp++ = 0;
1189 
1190 		*bp++ = 1;
1191 		*bp++ = 0x14;
1192 		*bp++ = 0;
1193 		*bp++ = 0xa1;
1194 		*bp++ = 0;
1195 		*bp++ = 0;
1196 		*bp++ = 0;
1197 		*bp++ = 0;
1198 		*bp++ = 1;
1199 		*bp++ = 0;
1200 		*bp++ = 0;
1201 
1202 		*bp++ = 1;
1203 		*bp++ = 0x14;
1204 		*bp++ = 0;
1205 		*bp++ = 0xa2;
1206 		*bp++ = 0;
1207 		*bp++ = 0;
1208 		*bp++ = 0;
1209 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1210 		sectors >>= 2;
1211 		if (msf) {
1212 			*bp++ = 0;
1213 			lba_to_msf(bp, sectors);
1214 			bp += 3;
1215 		} else {
1216 			be32enc(bp, sectors);
1217 			bp += 4;
1218 		}
1219 
1220 		*bp++ = 1;
1221 		*bp++ = 0x14;
1222 		*bp++ = 0;
1223 		*bp++ = 1;
1224 		*bp++ = 0;
1225 		*bp++ = 0;
1226 		*bp++ = 0;
1227 		if (msf) {
1228 			*bp++ = 0;
1229 			lba_to_msf(bp, 0);
1230 			bp += 3;
1231 		} else {
1232 			*bp++ = 0;
1233 			*bp++ = 0;
1234 			*bp++ = 0;
1235 			*bp++ = 0;
1236 		}
1237 
1238 		size = bp - buf;
1239 		be16enc(buf, size - 2);
1240 		if (len > size)
1241 			len = size;
1242 		write_prdt(p, slot, cfis, buf, len);
1243 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1244 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1245 		break;
1246 	}
1247 	default:
1248 	{
1249 		uint32_t tfd;
1250 
1251 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1252 		p->asc = 0x24;
1253 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1254 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1255 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1256 		break;
1257 	}
1258 	}
1259 }
1260 
1261 static void
1262 atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1263 {
1264 	uint8_t buf[16];
1265 
1266 	memset(buf, 0, sizeof(buf));
1267 	buf[3] = 8;
1268 
1269 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1270 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1271 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1272 }
1273 
1274 static void
1275 atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1276 {
1277 	struct ahci_ioreq *aior;
1278 	struct ahci_cmd_hdr *hdr;
1279 	struct ahci_prdt_entry *prdt;
1280 	struct blockif_req *breq;
1281 	struct pci_ahci_softc *sc;
1282 	uint8_t *acmd;
1283 	uint64_t lba;
1284 	uint32_t len;
1285 	int err;
1286 
1287 	sc = p->pr_sc;
1288 	acmd = cfis + 0x40;
1289 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1290 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1291 
1292 	lba = be32dec(acmd + 2);
1293 	if (acmd[0] == READ_10)
1294 		len = be16dec(acmd + 7);
1295 	else
1296 		len = be32dec(acmd + 6);
1297 	if (len == 0) {
1298 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1299 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1300 	}
1301 	lba *= 2048;
1302 	len *= 2048;
1303 
1304 	/*
1305 	 * Pull request off free list
1306 	 */
1307 	aior = STAILQ_FIRST(&p->iofhd);
1308 	assert(aior != NULL);
1309 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1310 	aior->cfis = cfis;
1311 	aior->slot = slot;
1312 	aior->len = len;
1313 	aior->done = done;
1314 	breq = &aior->io_req;
1315 	breq->br_offset = lba + done;
1316 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1317 
1318 	/* Mark this command in-flight. */
1319 	p->pending |= 1 << slot;
1320 
1321 	/* Stuff request onto busy list. */
1322 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1323 
1324 	err = blockif_read(p->bctx, breq);
1325 	assert(err == 0);
1326 }
1327 
1328 static void
1329 atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1330 {
1331 	uint8_t buf[64];
1332 	uint8_t *acmd;
1333 	int len;
1334 
1335 	acmd = cfis + 0x40;
1336 	len = acmd[4];
1337 	if (len > sizeof(buf))
1338 		len = sizeof(buf);
1339 	memset(buf, 0, len);
1340 	buf[0] = 0x70 | (1 << 7);
1341 	buf[2] = p->sense_key;
1342 	buf[7] = 10;
1343 	buf[12] = p->asc;
1344 	write_prdt(p, slot, cfis, buf, len);
1345 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1346 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1347 }
1348 
1349 static void
1350 atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1351 {
1352 	uint8_t *acmd = cfis + 0x40;
1353 	uint32_t tfd;
1354 
1355 	switch (acmd[4] & 3) {
1356 	case 0:
1357 	case 1:
1358 	case 3:
1359 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1360 		tfd = ATA_S_READY | ATA_S_DSC;
1361 		break;
1362 	case 2:
1363 		/* TODO eject media */
1364 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1365 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1366 		p->asc = 0x53;
1367 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1368 		break;
1369 	}
1370 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1371 }
1372 
1373 static void
1374 atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1375 {
1376 	uint8_t *acmd;
1377 	uint32_t tfd;
1378 	uint8_t pc, code;
1379 	int len;
1380 
1381 	acmd = cfis + 0x40;
1382 	len = be16dec(acmd + 7);
1383 	pc = acmd[2] >> 6;
1384 	code = acmd[2] & 0x3f;
1385 
1386 	switch (pc) {
1387 	case 0:
1388 		switch (code) {
1389 		case MODEPAGE_RW_ERROR_RECOVERY:
1390 		{
1391 			uint8_t buf[16];
1392 
1393 			if (len > sizeof(buf))
1394 				len = sizeof(buf);
1395 
1396 			memset(buf, 0, sizeof(buf));
1397 			be16enc(buf, 16 - 2);
1398 			buf[2] = 0x70;
1399 			buf[8] = 0x01;
1400 			buf[9] = 16 - 10;
1401 			buf[11] = 0x05;
1402 			write_prdt(p, slot, cfis, buf, len);
1403 			tfd = ATA_S_READY | ATA_S_DSC;
1404 			break;
1405 		}
1406 		case MODEPAGE_CD_CAPABILITIES:
1407 		{
1408 			uint8_t buf[30];
1409 
1410 			if (len > sizeof(buf))
1411 				len = sizeof(buf);
1412 
1413 			memset(buf, 0, sizeof(buf));
1414 			be16enc(buf, 30 - 2);
1415 			buf[2] = 0x70;
1416 			buf[8] = 0x2A;
1417 			buf[9] = 30 - 10;
1418 			buf[10] = 0x08;
1419 			buf[12] = 0x71;
1420 			be16enc(&buf[18], 2);
1421 			be16enc(&buf[20], 512);
1422 			write_prdt(p, slot, cfis, buf, len);
1423 			tfd = ATA_S_READY | ATA_S_DSC;
1424 			break;
1425 		}
1426 		default:
1427 			goto error;
1428 			break;
1429 		}
1430 		break;
1431 	case 3:
1432 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1433 		p->asc = 0x39;
1434 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1435 		break;
1436 error:
1437 	case 1:
1438 	case 2:
1439 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1440 		p->asc = 0x24;
1441 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1442 		break;
1443 	}
1444 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1445 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1446 }
1447 
1448 static void
1449 atapi_get_event_status_notification(struct ahci_port *p, int slot,
1450     uint8_t *cfis)
1451 {
1452 	uint8_t *acmd;
1453 	uint32_t tfd;
1454 
1455 	acmd = cfis + 0x40;
1456 
1457 	/* we don't support asynchronous operation */
1458 	if (!(acmd[1] & 1)) {
1459 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1460 		p->asc = 0x24;
1461 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1462 	} else {
1463 		uint8_t buf[8];
1464 		int len;
1465 
1466 		len = be16dec(acmd + 7);
1467 		if (len > sizeof(buf))
1468 			len = sizeof(buf);
1469 
1470 		memset(buf, 0, sizeof(buf));
1471 		be16enc(buf, 8 - 2);
1472 		buf[2] = 0x04;
1473 		buf[3] = 0x10;
1474 		buf[5] = 0x02;
1475 		write_prdt(p, slot, cfis, buf, len);
1476 		tfd = ATA_S_READY | ATA_S_DSC;
1477 	}
1478 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1479 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1480 }
1481 
1482 static void
1483 handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1484 {
1485 	uint8_t *acmd;
1486 
1487 	acmd = cfis + 0x40;
1488 
1489 #ifdef AHCI_DEBUG
1490 	{
1491 		int i;
1492 		DPRINTF("ACMD:");
1493 		for (i = 0; i < 16; i++)
1494 			DPRINTF("%02x ", acmd[i]);
1495 		DPRINTF("\n");
1496 	}
1497 #endif
1498 
1499 	switch (acmd[0]) {
1500 	case TEST_UNIT_READY:
1501 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1502 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1503 		break;
1504 	case INQUIRY:
1505 		atapi_inquiry(p, slot, cfis);
1506 		break;
1507 	case READ_CAPACITY:
1508 		atapi_read_capacity(p, slot, cfis);
1509 		break;
1510 	case PREVENT_ALLOW:
1511 		/* TODO */
1512 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1513 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1514 		break;
1515 	case READ_TOC:
1516 		atapi_read_toc(p, slot, cfis);
1517 		break;
1518 	case REPORT_LUNS:
1519 		atapi_report_luns(p, slot, cfis);
1520 		break;
1521 	case READ_10:
1522 	case READ_12:
1523 		atapi_read(p, slot, cfis, 0);
1524 		break;
1525 	case REQUEST_SENSE:
1526 		atapi_request_sense(p, slot, cfis);
1527 		break;
1528 	case START_STOP_UNIT:
1529 		atapi_start_stop_unit(p, slot, cfis);
1530 		break;
1531 	case MODE_SENSE_10:
1532 		atapi_mode_sense(p, slot, cfis);
1533 		break;
1534 	case GET_EVENT_STATUS_NOTIFICATION:
1535 		atapi_get_event_status_notification(p, slot, cfis);
1536 		break;
1537 	default:
1538 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1539 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1540 		p->asc = 0x20;
1541 		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1542 				ATA_S_READY | ATA_S_ERROR);
1543 		break;
1544 	}
1545 }
1546 
1547 static void
1548 ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1549 {
1550 
1551 	switch (cfis[2]) {
1552 	case ATA_ATA_IDENTIFY:
1553 		handle_identify(p, slot, cfis);
1554 		break;
1555 	case ATA_SETFEATURES:
1556 	{
1557 		switch (cfis[3]) {
1558 		case ATA_SF_ENAB_SATA_SF:
1559 			switch (cfis[12]) {
1560 			case ATA_SATA_SF_AN:
1561 				p->tfd = ATA_S_DSC | ATA_S_READY;
1562 				break;
1563 			default:
1564 				p->tfd = ATA_S_ERROR | ATA_S_READY;
1565 				p->tfd |= (ATA_ERROR_ABORT << 8);
1566 				break;
1567 			}
1568 			break;
1569 		case ATA_SF_ENAB_WCACHE:
1570 		case ATA_SF_DIS_WCACHE:
1571 		case ATA_SF_ENAB_RCACHE:
1572 		case ATA_SF_DIS_RCACHE:
1573 			p->tfd = ATA_S_DSC | ATA_S_READY;
1574 			break;
1575 		case ATA_SF_SETXFER:
1576 		{
1577 			switch (cfis[12] & 0xf8) {
1578 			case ATA_PIO:
1579 			case ATA_PIO0:
1580 				break;
1581 			case ATA_WDMA0:
1582 			case ATA_UDMA0:
1583 				p->xfermode = (cfis[12] & 0x7);
1584 				break;
1585 			}
1586 			p->tfd = ATA_S_DSC | ATA_S_READY;
1587 			break;
1588 		}
1589 		default:
1590 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1591 			p->tfd |= (ATA_ERROR_ABORT << 8);
1592 			break;
1593 		}
1594 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1595 		break;
1596 	}
1597 	case ATA_SET_MULTI:
1598 		if (cfis[12] != 0 &&
1599 			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1600 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1601 			p->tfd |= (ATA_ERROR_ABORT << 8);
1602 		} else {
1603 			p->mult_sectors = cfis[12];
1604 			p->tfd = ATA_S_DSC | ATA_S_READY;
1605 		}
1606 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1607 		break;
1608 	case ATA_READ:
1609 	case ATA_WRITE:
1610 	case ATA_READ48:
1611 	case ATA_WRITE48:
1612 	case ATA_READ_MUL:
1613 	case ATA_WRITE_MUL:
1614 	case ATA_READ_MUL48:
1615 	case ATA_WRITE_MUL48:
1616 	case ATA_READ_DMA:
1617 	case ATA_WRITE_DMA:
1618 	case ATA_READ_DMA48:
1619 	case ATA_WRITE_DMA48:
1620 	case ATA_READ_FPDMA_QUEUED:
1621 	case ATA_WRITE_FPDMA_QUEUED:
1622 		ahci_handle_rw(p, slot, cfis, 0);
1623 		break;
1624 	case ATA_FLUSHCACHE:
1625 	case ATA_FLUSHCACHE48:
1626 		ahci_handle_flush(p, slot, cfis);
1627 		break;
1628 	case ATA_DATA_SET_MANAGEMENT:
1629 		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1630 		    cfis[13] == 0 && cfis[12] == 1) {
1631 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1632 			break;
1633 		}
1634 		ahci_write_fis_d2h(p, slot, cfis,
1635 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1636 		break;
1637 	case ATA_SEND_FPDMA_QUEUED:
1638 		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1639 		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1640 		    cfis[11] == 0 && cfis[13] == 1) {
1641 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1642 			break;
1643 		}
1644 		ahci_write_fis_d2h(p, slot, cfis,
1645 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1646 		break;
1647 	case ATA_READ_LOG_EXT:
1648 	case ATA_READ_LOG_DMA_EXT:
1649 		ahci_handle_read_log(p, slot, cfis);
1650 		break;
1651 	case ATA_NOP:
1652 		ahci_write_fis_d2h(p, slot, cfis,
1653 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1654 		break;
1655 	case ATA_STANDBY_CMD:
1656 	case ATA_STANDBY_IMMEDIATE:
1657 	case ATA_IDLE_CMD:
1658 	case ATA_IDLE_IMMEDIATE:
1659 	case ATA_SLEEP:
1660 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1661 		break;
1662 	case ATA_ATAPI_IDENTIFY:
1663 		handle_atapi_identify(p, slot, cfis);
1664 		break;
1665 	case ATA_PACKET_CMD:
1666 		if (!p->atapi) {
1667 			ahci_write_fis_d2h(p, slot, cfis,
1668 			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1669 		} else
1670 			handle_packet_cmd(p, slot, cfis);
1671 		break;
1672 	default:
1673 		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1674 		ahci_write_fis_d2h(p, slot, cfis,
1675 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1676 		break;
1677 	}
1678 }
1679 
1680 static void
1681 ahci_handle_slot(struct ahci_port *p, int slot)
1682 {
1683 	struct ahci_cmd_hdr *hdr;
1684 	struct ahci_prdt_entry *prdt;
1685 	struct pci_ahci_softc *sc;
1686 	uint8_t *cfis;
1687 	int cfl;
1688 
1689 	sc = p->pr_sc;
1690 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1691 	cfl = (hdr->flags & 0x1f) * 4;
1692 	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1693 			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1694 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1695 
1696 #ifdef AHCI_DEBUG
1697 	DPRINTF("\ncfis:");
1698 	for (i = 0; i < cfl; i++) {
1699 		if (i % 10 == 0)
1700 			DPRINTF("\n");
1701 		DPRINTF("%02x ", cfis[i]);
1702 	}
1703 	DPRINTF("\n");
1704 
1705 	for (i = 0; i < hdr->prdtl; i++) {
1706 		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1707 		prdt++;
1708 	}
1709 #endif
1710 
1711 	if (cfis[0] != FIS_TYPE_REGH2D) {
1712 		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1713 		return;
1714 	}
1715 
1716 	if (cfis[1] & 0x80) {
1717 		ahci_handle_cmd(p, slot, cfis);
1718 	} else {
1719 		if (cfis[15] & (1 << 2))
1720 			p->reset = 1;
1721 		else if (p->reset) {
1722 			p->reset = 0;
1723 			ahci_port_reset(p);
1724 		}
1725 		p->ci &= ~(1 << slot);
1726 	}
1727 }
1728 
1729 static void
1730 ahci_handle_port(struct ahci_port *p)
1731 {
1732 	int i;
1733 
1734 	if (!(p->cmd & AHCI_P_CMD_ST))
1735 		return;
1736 
1737 	/*
1738 	 * Search for any new commands to issue ignoring those that
1739 	 * are already in-flight.
1740 	 */
1741 	for (i = 0; (i < 32) && p->ci; i++) {
1742 		if ((p->ci & (1 << i)) && !(p->pending & (1 << i))) {
1743 			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1744 			p->cmd |= i << AHCI_P_CMD_CCS_SHIFT;
1745 			ahci_handle_slot(p, i);
1746 		}
1747 	}
1748 }
1749 
1750 /*
1751  * blockif callback routine - this runs in the context of the blockif
1752  * i/o thread, so the mutex needs to be acquired.
1753  */
1754 static void
1755 ata_ioreq_cb(struct blockif_req *br, int err)
1756 {
1757 	struct ahci_cmd_hdr *hdr;
1758 	struct ahci_ioreq *aior;
1759 	struct ahci_port *p;
1760 	struct pci_ahci_softc *sc;
1761 	uint32_t tfd;
1762 	uint8_t *cfis;
1763 	int slot, ncq, dsm;
1764 
1765 	DPRINTF("%s %d\n", __func__, err);
1766 
1767 	ncq = dsm = 0;
1768 	aior = br->br_param;
1769 	p = aior->io_pr;
1770 	cfis = aior->cfis;
1771 	slot = aior->slot;
1772 	sc = p->pr_sc;
1773 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1774 
1775 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1776 	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1777 	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1778 		ncq = 1;
1779 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1780 	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1781 	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1782 		dsm = 1;
1783 
1784 	pthread_mutex_lock(&sc->mtx);
1785 
1786 	/*
1787 	 * Delete the blockif request from the busy list
1788 	 */
1789 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1790 
1791 	/*
1792 	 * Move the blockif request back to the free list
1793 	 */
1794 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1795 
1796 	if (!err)
1797 		hdr->prdbc = aior->done;
1798 
1799 	if (!err && aior->more) {
1800 		if (dsm)
1801 			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1802 		else
1803 			ahci_handle_rw(p, slot, cfis, aior->done);
1804 		goto out;
1805 	}
1806 
1807 	if (!err)
1808 		tfd = ATA_S_READY | ATA_S_DSC;
1809 	else
1810 		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1811 	if (ncq)
1812 		ahci_write_fis_sdb(p, slot, cfis, tfd);
1813 	else
1814 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1815 
1816 	/*
1817 	 * This command is now complete.
1818 	 */
1819 	p->pending &= ~(1 << slot);
1820 
1821 	ahci_check_stopped(p);
1822 out:
1823 	pthread_mutex_unlock(&sc->mtx);
1824 	DPRINTF("%s exit\n", __func__);
1825 }
1826 
1827 static void
1828 atapi_ioreq_cb(struct blockif_req *br, int err)
1829 {
1830 	struct ahci_cmd_hdr *hdr;
1831 	struct ahci_ioreq *aior;
1832 	struct ahci_port *p;
1833 	struct pci_ahci_softc *sc;
1834 	uint8_t *cfis;
1835 	uint32_t tfd;
1836 	int slot;
1837 
1838 	DPRINTF("%s %d\n", __func__, err);
1839 
1840 	aior = br->br_param;
1841 	p = aior->io_pr;
1842 	cfis = aior->cfis;
1843 	slot = aior->slot;
1844 	sc = p->pr_sc;
1845 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1846 
1847 	pthread_mutex_lock(&sc->mtx);
1848 
1849 	/*
1850 	 * Delete the blockif request from the busy list
1851 	 */
1852 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1853 
1854 	/*
1855 	 * Move the blockif request back to the free list
1856 	 */
1857 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1858 
1859 	if (!err)
1860 		hdr->prdbc = aior->done;
1861 
1862 	if (!err && aior->more) {
1863 		atapi_read(p, slot, cfis, aior->done);
1864 		goto out;
1865 	}
1866 
1867 	if (!err) {
1868 		tfd = ATA_S_READY | ATA_S_DSC;
1869 	} else {
1870 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1871 		p->asc = 0x21;
1872 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1873 	}
1874 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1875 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1876 
1877 	/*
1878 	 * This command is now complete.
1879 	 */
1880 	p->pending &= ~(1 << slot);
1881 
1882 	ahci_check_stopped(p);
1883 out:
1884 	pthread_mutex_unlock(&sc->mtx);
1885 	DPRINTF("%s exit\n", __func__);
1886 }
1887 
1888 static void
1889 pci_ahci_ioreq_init(struct ahci_port *pr)
1890 {
1891 	struct ahci_ioreq *vr;
1892 	int i;
1893 
1894 	pr->ioqsz = blockif_queuesz(pr->bctx);
1895 	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1896 	STAILQ_INIT(&pr->iofhd);
1897 
1898 	/*
1899 	 * Add all i/o request entries to the free queue
1900 	 */
1901 	for (i = 0; i < pr->ioqsz; i++) {
1902 		vr = &pr->ioreq[i];
1903 		vr->io_pr = pr;
1904 		if (!pr->atapi)
1905 			vr->io_req.br_callback = ata_ioreq_cb;
1906 		else
1907 			vr->io_req.br_callback = atapi_ioreq_cb;
1908 		vr->io_req.br_param = vr;
1909 		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
1910 	}
1911 
1912 	TAILQ_INIT(&pr->iobhd);
1913 }
1914 
1915 static void
1916 pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1917 {
1918 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1919 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1920 	struct ahci_port *p = &sc->port[port];
1921 
1922 	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1923 		port, offset, value);
1924 
1925 	switch (offset) {
1926 	case AHCI_P_CLB:
1927 		p->clb = value;
1928 		break;
1929 	case AHCI_P_CLBU:
1930 		p->clbu = value;
1931 		break;
1932 	case AHCI_P_FB:
1933 		p->fb = value;
1934 		break;
1935 	case AHCI_P_FBU:
1936 		p->fbu = value;
1937 		break;
1938 	case AHCI_P_IS:
1939 		p->is &= ~value;
1940 		break;
1941 	case AHCI_P_IE:
1942 		p->ie = value & 0xFDC000FF;
1943 		ahci_generate_intr(sc);
1944 		break;
1945 	case AHCI_P_CMD:
1946 	{
1947 		p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
1948 		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
1949 		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
1950 		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK);
1951 		p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
1952 		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
1953 		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
1954 		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value;
1955 
1956 		if (!(value & AHCI_P_CMD_ST)) {
1957 			ahci_port_stop(p);
1958 		} else {
1959 			uint64_t clb;
1960 
1961 			p->cmd |= AHCI_P_CMD_CR;
1962 			clb = (uint64_t)p->clbu << 32 | p->clb;
1963 			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
1964 					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
1965 		}
1966 
1967 		if (value & AHCI_P_CMD_FRE) {
1968 			uint64_t fb;
1969 
1970 			p->cmd |= AHCI_P_CMD_FR;
1971 			fb = (uint64_t)p->fbu << 32 | p->fb;
1972 			/* we don't support FBSCP, so rfis size is 256Bytes */
1973 			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
1974 		} else {
1975 			p->cmd &= ~AHCI_P_CMD_FR;
1976 		}
1977 
1978 		if (value & AHCI_P_CMD_CLO) {
1979 			p->tfd = 0;
1980 			p->cmd &= ~AHCI_P_CMD_CLO;
1981 		}
1982 
1983 		if (value & AHCI_P_CMD_ICC_MASK) {
1984 			p->cmd &= ~AHCI_P_CMD_ICC_MASK;
1985 		}
1986 
1987 		ahci_handle_port(p);
1988 		break;
1989 	}
1990 	case AHCI_P_TFD:
1991 	case AHCI_P_SIG:
1992 	case AHCI_P_SSTS:
1993 		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
1994 		break;
1995 	case AHCI_P_SCTL:
1996 		p->sctl = value;
1997 		if (!(p->cmd & AHCI_P_CMD_ST)) {
1998 			if (value & ATA_SC_DET_RESET)
1999 				ahci_port_reset(p);
2000 		}
2001 		break;
2002 	case AHCI_P_SERR:
2003 		p->serr &= ~value;
2004 		break;
2005 	case AHCI_P_SACT:
2006 		p->sact |= value;
2007 		break;
2008 	case AHCI_P_CI:
2009 		p->ci |= value;
2010 		ahci_handle_port(p);
2011 		break;
2012 	case AHCI_P_SNTF:
2013 	case AHCI_P_FBS:
2014 	default:
2015 		break;
2016 	}
2017 }
2018 
2019 static void
2020 pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2021 {
2022 	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
2023 		offset, value);
2024 
2025 	switch (offset) {
2026 	case AHCI_CAP:
2027 	case AHCI_PI:
2028 	case AHCI_VS:
2029 	case AHCI_CAP2:
2030 		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
2031 		break;
2032 	case AHCI_GHC:
2033 		if (value & AHCI_GHC_HR)
2034 			ahci_reset(sc);
2035 		else if (value & AHCI_GHC_IE) {
2036 			sc->ghc |= AHCI_GHC_IE;
2037 			ahci_generate_intr(sc);
2038 		}
2039 		break;
2040 	case AHCI_IS:
2041 		sc->is &= ~value;
2042 		ahci_generate_intr(sc);
2043 		break;
2044 	default:
2045 		break;
2046 	}
2047 }
2048 
2049 static void
2050 pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
2051 		int baridx, uint64_t offset, int size, uint64_t value)
2052 {
2053 	struct pci_ahci_softc *sc = pi->pi_arg;
2054 
2055 	assert(baridx == 5);
2056 	assert(size == 4);
2057 
2058 	pthread_mutex_lock(&sc->mtx);
2059 
2060 	if (offset < AHCI_OFFSET)
2061 		pci_ahci_host_write(sc, offset, value);
2062 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2063 		pci_ahci_port_write(sc, offset, value);
2064 	else
2065 		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
2066 
2067 	pthread_mutex_unlock(&sc->mtx);
2068 }
2069 
2070 static uint64_t
2071 pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2072 {
2073 	uint32_t value;
2074 
2075 	switch (offset) {
2076 	case AHCI_CAP:
2077 	case AHCI_GHC:
2078 	case AHCI_IS:
2079 	case AHCI_PI:
2080 	case AHCI_VS:
2081 	case AHCI_CCCC:
2082 	case AHCI_CCCP:
2083 	case AHCI_EM_LOC:
2084 	case AHCI_EM_CTL:
2085 	case AHCI_CAP2:
2086 	{
2087 		uint32_t *p = &sc->cap;
2088 		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2089 		value = *p;
2090 		break;
2091 	}
2092 	default:
2093 		value = 0;
2094 		break;
2095 	}
2096 	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
2097 		offset, value);
2098 
2099 	return (value);
2100 }
2101 
2102 static uint64_t
2103 pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2104 {
2105 	uint32_t value;
2106 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2107 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2108 
2109 	switch (offset) {
2110 	case AHCI_P_CLB:
2111 	case AHCI_P_CLBU:
2112 	case AHCI_P_FB:
2113 	case AHCI_P_FBU:
2114 	case AHCI_P_IS:
2115 	case AHCI_P_IE:
2116 	case AHCI_P_CMD:
2117 	case AHCI_P_TFD:
2118 	case AHCI_P_SIG:
2119 	case AHCI_P_SSTS:
2120 	case AHCI_P_SCTL:
2121 	case AHCI_P_SERR:
2122 	case AHCI_P_SACT:
2123 	case AHCI_P_CI:
2124 	case AHCI_P_SNTF:
2125 	case AHCI_P_FBS:
2126 	{
2127 		uint32_t *p= &sc->port[port].clb;
2128 		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2129 		value = *p;
2130 		break;
2131 	}
2132 	default:
2133 		value = 0;
2134 		break;
2135 	}
2136 
2137 	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
2138 		port, offset, value);
2139 
2140 	return value;
2141 }
2142 
2143 static uint64_t
2144 pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
2145     uint64_t offset, int size)
2146 {
2147 	struct pci_ahci_softc *sc = pi->pi_arg;
2148 	uint32_t value;
2149 
2150 	assert(baridx == 5);
2151 	assert(size == 4);
2152 
2153 	pthread_mutex_lock(&sc->mtx);
2154 
2155 	if (offset < AHCI_OFFSET)
2156 		value = pci_ahci_host_read(sc, offset);
2157 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2158 		value = pci_ahci_port_read(sc, offset);
2159 	else {
2160 		value = 0;
2161 		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
2162 	}
2163 
2164 	pthread_mutex_unlock(&sc->mtx);
2165 
2166 	return (value);
2167 }
2168 
2169 static int
2170 pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
2171 {
2172 	char bident[sizeof("XX:X:X")];
2173 	struct blockif_ctxt *bctxt;
2174 	struct pci_ahci_softc *sc;
2175 	int ret, slots;
2176 	MD5_CTX mdctx;
2177 	u_char digest[16];
2178 
2179 	ret = 0;
2180 
2181 	if (opts == NULL) {
2182 		fprintf(stderr, "pci_ahci: backing device required\n");
2183 		return (1);
2184 	}
2185 
2186 #ifdef AHCI_DEBUG
2187 	dbg = fopen("/tmp/log", "w+");
2188 #endif
2189 
2190 	sc = calloc(1, sizeof(struct pci_ahci_softc));
2191 	pi->pi_arg = sc;
2192 	sc->asc_pi = pi;
2193 	sc->ports = MAX_PORTS;
2194 
2195 	/*
2196 	 * Only use port 0 for a backing device. All other ports will be
2197 	 * marked as unused
2198 	 */
2199 	sc->port[0].atapi = atapi;
2200 
2201 	/*
2202 	 * Attempt to open the backing image. Use the PCI
2203 	 * slot/func for the identifier string.
2204 	 */
2205 	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
2206 	bctxt = blockif_open(opts, bident);
2207 	if (bctxt == NULL) {
2208 		ret = 1;
2209 		goto open_fail;
2210 	}
2211 	sc->port[0].bctx = bctxt;
2212 	sc->port[0].pr_sc = sc;
2213 
2214 	/*
2215 	 * Create an identifier for the backing file. Use parts of the
2216 	 * md5 sum of the filename
2217 	 */
2218 	MD5Init(&mdctx);
2219 	MD5Update(&mdctx, opts, strlen(opts));
2220 	MD5Final(digest, &mdctx);
2221 	sprintf(sc->port[0].ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X",
2222 	    digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]);
2223 
2224 	/*
2225 	 * Allocate blockif request structures and add them
2226 	 * to the free list
2227 	 */
2228 	pci_ahci_ioreq_init(&sc->port[0]);
2229 
2230 	pthread_mutex_init(&sc->mtx, NULL);
2231 
2232 	/* Intel ICH8 AHCI */
2233 	slots = sc->port[0].ioqsz;
2234 	if (slots > 32)
2235 		slots = 32;
2236 	--slots;
2237 	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2238 	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2239 	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2240 	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2241 	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2242 
2243 	/* Only port 0 implemented */
2244 	sc->pi = 1;
2245 	sc->vs = 0x10300;
2246 	sc->cap2 = AHCI_CAP2_APST;
2247 	ahci_reset(sc);
2248 
2249 	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2250 	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2251 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2252 	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2253 	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2254 	pci_emul_add_msicap(pi, 1);
2255 	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2256 	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2257 
2258 	pci_lintr_request(pi);
2259 
2260 open_fail:
2261 	if (ret) {
2262 		if (sc->port[0].bctx != NULL)
2263 			blockif_close(sc->port[0].bctx);
2264 		free(sc);
2265 	}
2266 
2267 	return (ret);
2268 }
2269 
2270 static int
2271 pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2272 {
2273 
2274 	return (pci_ahci_init(ctx, pi, opts, 0));
2275 }
2276 
2277 static int
2278 pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2279 {
2280 
2281 	return (pci_ahci_init(ctx, pi, opts, 1));
2282 }
2283 
2284 /*
2285  * Use separate emulation names to distinguish drive and atapi devices
2286  */
2287 struct pci_devemu pci_de_ahci_hd = {
2288 	.pe_emu =	"ahci-hd",
2289 	.pe_init =	pci_ahci_hd_init,
2290 	.pe_barwrite =	pci_ahci_write,
2291 	.pe_barread =	pci_ahci_read
2292 };
2293 PCI_EMUL_SET(pci_de_ahci_hd);
2294 
2295 struct pci_devemu pci_de_ahci_cd = {
2296 	.pe_emu =	"ahci-cd",
2297 	.pe_init =	pci_ahci_atapi_init,
2298 	.pe_barwrite =	pci_ahci_write,
2299 	.pe_barread =	pci_ahci_read
2300 };
2301 PCI_EMUL_SET(pci_de_ahci_cd);
2302