xref: /freebsd/usr.sbin/bhyve/pci_ahci.c (revision 49a26c1f9368c5c1106c9d018c3f553dc07042e4)
1 /*-
2  * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/linker_set.h>
34 #include <sys/stat.h>
35 #include <sys/uio.h>
36 #include <sys/ioctl.h>
37 #include <sys/disk.h>
38 #include <sys/ata.h>
39 #include <sys/endian.h>
40 
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <stdint.h>
46 #include <string.h>
47 #include <strings.h>
48 #include <unistd.h>
49 #include <assert.h>
50 #include <pthread.h>
51 #include <pthread_np.h>
52 #include <inttypes.h>
53 #include <md5.h>
54 
55 #include "bhyverun.h"
56 #include "pci_emul.h"
57 #include "ahci.h"
58 #include "block_if.h"
59 
60 #define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
61 
62 #define	PxSIG_ATA	0x00000101 /* ATA drive */
63 #define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
64 
65 enum sata_fis_type {
66 	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
67 	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
68 	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
69 	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
70 	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
71 	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
72 	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
73 	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
74 };
75 
76 /*
77  * SCSI opcodes
78  */
79 #define	TEST_UNIT_READY		0x00
80 #define	REQUEST_SENSE		0x03
81 #define	INQUIRY			0x12
82 #define	START_STOP_UNIT		0x1B
83 #define	PREVENT_ALLOW		0x1E
84 #define	READ_CAPACITY		0x25
85 #define	READ_10			0x28
86 #define	POSITION_TO_ELEMENT	0x2B
87 #define	READ_TOC		0x43
88 #define	GET_EVENT_STATUS_NOTIFICATION 0x4A
89 #define	MODE_SENSE_10		0x5A
90 #define	REPORT_LUNS		0xA0
91 #define	READ_12			0xA8
92 #define	READ_CD			0xBE
93 
94 /*
95  * SCSI mode page codes
96  */
97 #define	MODEPAGE_RW_ERROR_RECOVERY	0x01
98 #define	MODEPAGE_CD_CAPABILITIES	0x2A
99 
100 /*
101  * ATA commands
102  */
103 #define	ATA_SF_ENAB_SATA_SF		0x10
104 #define		ATA_SATA_SF_AN		0x05
105 #define	ATA_SF_DIS_SATA_SF		0x90
106 
107 /*
108  * Debug printf
109  */
110 #ifdef AHCI_DEBUG
111 static FILE *dbg;
112 #define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
113 #else
114 #define DPRINTF(format, arg...)
115 #endif
116 #define WPRINTF(format, arg...) printf(format, ##arg)
117 
118 struct ahci_ioreq {
119 	struct blockif_req io_req;
120 	struct ahci_port *io_pr;
121 	STAILQ_ENTRY(ahci_ioreq) io_flist;
122 	TAILQ_ENTRY(ahci_ioreq) io_blist;
123 	uint8_t *cfis;
124 	uint32_t len;
125 	uint32_t done;
126 	int slot;
127 	int more;
128 };
129 
130 struct ahci_port {
131 	struct blockif_ctxt *bctx;
132 	struct pci_ahci_softc *pr_sc;
133 	uint8_t *cmd_lst;
134 	uint8_t *rfis;
135 	char ident[20 + 1];
136 	int atapi;
137 	int reset;
138 	int mult_sectors;
139 	uint8_t xfermode;
140 	uint8_t err_cfis[20];
141 	uint8_t sense_key;
142 	uint8_t asc;
143 	uint32_t pending;
144 
145 	uint32_t clb;
146 	uint32_t clbu;
147 	uint32_t fb;
148 	uint32_t fbu;
149 	uint32_t is;
150 	uint32_t ie;
151 	uint32_t cmd;
152 	uint32_t unused0;
153 	uint32_t tfd;
154 	uint32_t sig;
155 	uint32_t ssts;
156 	uint32_t sctl;
157 	uint32_t serr;
158 	uint32_t sact;
159 	uint32_t ci;
160 	uint32_t sntf;
161 	uint32_t fbs;
162 
163 	/*
164 	 * i/o request info
165 	 */
166 	struct ahci_ioreq *ioreq;
167 	int ioqsz;
168 	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
169 	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
170 };
171 
172 struct ahci_cmd_hdr {
173 	uint16_t flags;
174 	uint16_t prdtl;
175 	uint32_t prdbc;
176 	uint64_t ctba;
177 	uint32_t reserved[4];
178 };
179 
180 struct ahci_prdt_entry {
181 	uint64_t dba;
182 	uint32_t reserved;
183 #define	DBCMASK		0x3fffff
184 	uint32_t dbc;
185 };
186 
187 struct pci_ahci_softc {
188 	struct pci_devinst *asc_pi;
189 	pthread_mutex_t	mtx;
190 	int ports;
191 	uint32_t cap;
192 	uint32_t ghc;
193 	uint32_t is;
194 	uint32_t pi;
195 	uint32_t vs;
196 	uint32_t ccc_ctl;
197 	uint32_t ccc_pts;
198 	uint32_t em_loc;
199 	uint32_t em_ctl;
200 	uint32_t cap2;
201 	uint32_t bohc;
202 	uint32_t lintr;
203 	struct ahci_port port[MAX_PORTS];
204 };
205 #define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
206 
207 static inline void lba_to_msf(uint8_t *buf, int lba)
208 {
209 	lba += 150;
210 	buf[0] = (lba / 75) / 60;
211 	buf[1] = (lba / 75) % 60;
212 	buf[2] = lba % 75;
213 }
214 
215 /*
216  * generate HBA intr depending on whether or not ports within
217  * the controller have an interrupt pending.
218  */
219 static void
220 ahci_generate_intr(struct pci_ahci_softc *sc)
221 {
222 	struct pci_devinst *pi;
223 	int i;
224 
225 	pi = sc->asc_pi;
226 
227 	for (i = 0; i < sc->ports; i++) {
228 		struct ahci_port *pr;
229 		pr = &sc->port[i];
230 		if (pr->is & pr->ie)
231 			sc->is |= (1 << i);
232 	}
233 
234 	DPRINTF("%s %x\n", __func__, sc->is);
235 
236 	if (sc->is && (sc->ghc & AHCI_GHC_IE)) {
237 		if (pci_msi_enabled(pi)) {
238 			/*
239 			 * Generate an MSI interrupt on every edge
240 			 */
241 			pci_generate_msi(pi, 0);
242 		} else if (!sc->lintr) {
243 			/*
244 			 * Only generate a pin-based interrupt if one wasn't
245 			 * in progress
246 			 */
247 			sc->lintr = 1;
248 			pci_lintr_assert(pi);
249 		}
250 	} else if (sc->lintr) {
251 		/*
252 		 * No interrupts: deassert pin-based signal if it had
253 		 * been asserted
254 		 */
255 		pci_lintr_deassert(pi);
256 		sc->lintr = 0;
257 	}
258 }
259 
260 static void
261 ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
262 {
263 	int offset, len, irq;
264 
265 	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
266 		return;
267 
268 	switch (ft) {
269 	case FIS_TYPE_REGD2H:
270 		offset = 0x40;
271 		len = 20;
272 		irq = AHCI_P_IX_DHR;
273 		break;
274 	case FIS_TYPE_SETDEVBITS:
275 		offset = 0x58;
276 		len = 8;
277 		irq = AHCI_P_IX_SDB;
278 		break;
279 	case FIS_TYPE_PIOSETUP:
280 		offset = 0x20;
281 		len = 20;
282 		irq = 0;
283 		break;
284 	default:
285 		WPRINTF("unsupported fis type %d\n", ft);
286 		return;
287 	}
288 	memcpy(p->rfis + offset, fis, len);
289 	if (irq) {
290 		p->is |= irq;
291 		ahci_generate_intr(p->pr_sc);
292 	}
293 }
294 
295 static void
296 ahci_write_fis_piosetup(struct ahci_port *p)
297 {
298 	uint8_t fis[20];
299 
300 	memset(fis, 0, sizeof(fis));
301 	fis[0] = FIS_TYPE_PIOSETUP;
302 	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
303 }
304 
305 static void
306 ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
307 {
308 	uint8_t fis[8];
309 	uint8_t error;
310 
311 	error = (tfd >> 8) & 0xff;
312 	memset(fis, 0, sizeof(fis));
313 	fis[0] = FIS_TYPE_SETDEVBITS;
314 	fis[1] = (1 << 6);
315 	fis[2] = tfd & 0x77;
316 	fis[3] = error;
317 	if (fis[2] & ATA_S_ERROR) {
318 		p->is |= AHCI_P_IX_TFE;
319 		p->err_cfis[0] = slot;
320 		p->err_cfis[2] = tfd & 0x77;
321 		p->err_cfis[3] = error;
322 		memcpy(&p->err_cfis[4], cfis + 4, 16);
323 	} else {
324 		*(uint32_t *)(fis + 4) = (1 << slot);
325 		p->sact &= ~(1 << slot);
326 	}
327 	p->tfd = tfd;
328 	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
329 }
330 
331 static void
332 ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
333 {
334 	uint8_t fis[20];
335 	uint8_t error;
336 
337 	error = (tfd >> 8) & 0xff;
338 	memset(fis, 0, sizeof(fis));
339 	fis[0] = FIS_TYPE_REGD2H;
340 	fis[1] = (1 << 6);
341 	fis[2] = tfd & 0xff;
342 	fis[3] = error;
343 	fis[4] = cfis[4];
344 	fis[5] = cfis[5];
345 	fis[6] = cfis[6];
346 	fis[7] = cfis[7];
347 	fis[8] = cfis[8];
348 	fis[9] = cfis[9];
349 	fis[10] = cfis[10];
350 	fis[11] = cfis[11];
351 	fis[12] = cfis[12];
352 	fis[13] = cfis[13];
353 	if (fis[2] & ATA_S_ERROR) {
354 		p->is |= AHCI_P_IX_TFE;
355 		p->err_cfis[0] = 0x80;
356 		p->err_cfis[2] = tfd & 0xff;
357 		p->err_cfis[3] = error;
358 		memcpy(&p->err_cfis[4], cfis + 4, 16);
359 	} else
360 		p->ci &= ~(1 << slot);
361 	p->tfd = tfd;
362 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
363 }
364 
365 static void
366 ahci_write_reset_fis_d2h(struct ahci_port *p)
367 {
368 	uint8_t fis[20];
369 
370 	memset(fis, 0, sizeof(fis));
371 	fis[0] = FIS_TYPE_REGD2H;
372 	fis[3] = 1;
373 	fis[4] = 1;
374 	if (p->atapi) {
375 		fis[5] = 0x14;
376 		fis[6] = 0xeb;
377 	}
378 	fis[12] = 1;
379 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
380 }
381 
382 static void
383 ahci_check_stopped(struct ahci_port *p)
384 {
385 	/*
386 	 * If we are no longer processing the command list and nothing
387 	 * is in-flight, clear the running bit, the current command
388 	 * slot, the command issue and active bits.
389 	 */
390 	if (!(p->cmd & AHCI_P_CMD_ST)) {
391 		if (p->pending == 0) {
392 			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
393 			p->ci = 0;
394 			p->sact = 0;
395 		}
396 	}
397 }
398 
399 static void
400 ahci_port_stop(struct ahci_port *p)
401 {
402 	struct ahci_ioreq *aior;
403 	uint8_t *cfis;
404 	int slot;
405 	int ncq;
406 	int error;
407 
408 	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
409 
410 	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
411 		/*
412 		 * Try to cancel the outstanding blockif request.
413 		 */
414 		error = blockif_cancel(p->bctx, &aior->io_req);
415 		if (error != 0)
416 			continue;
417 
418 		slot = aior->slot;
419 		cfis = aior->cfis;
420 		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
421 		    cfis[2] == ATA_READ_FPDMA_QUEUED)
422 			ncq = 1;
423 
424 		if (ncq)
425 			p->sact &= ~(1 << slot);
426 		else
427 			p->ci &= ~(1 << slot);
428 
429 		/*
430 		 * This command is now done.
431 		 */
432 		p->pending &= ~(1 << slot);
433 
434 		/*
435 		 * Delete the blockif request from the busy list
436 		 */
437 		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
438 
439 		/*
440 		 * Move the blockif request back to the free list
441 		 */
442 		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
443 	}
444 
445 	ahci_check_stopped(p);
446 }
447 
448 static void
449 ahci_port_reset(struct ahci_port *pr)
450 {
451 	pr->serr = 0;
452 	pr->sact = 0;
453 	pr->xfermode = ATA_UDMA6;
454 	pr->mult_sectors = 128;
455 
456 	if (!pr->bctx) {
457 		pr->ssts = ATA_SS_DET_NO_DEVICE;
458 		pr->sig = 0xFFFFFFFF;
459 		pr->tfd = 0x7F;
460 		return;
461 	}
462 	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
463 	if (pr->sctl & ATA_SC_SPD_MASK)
464 		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
465 	else
466 		pr->ssts |= ATA_SS_SPD_GEN3;
467 	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
468 	if (!pr->atapi) {
469 		pr->sig = PxSIG_ATA;
470 		pr->tfd |= ATA_S_READY;
471 	} else
472 		pr->sig = PxSIG_ATAPI;
473 	ahci_write_reset_fis_d2h(pr);
474 }
475 
476 static void
477 ahci_reset(struct pci_ahci_softc *sc)
478 {
479 	int i;
480 
481 	sc->ghc = AHCI_GHC_AE;
482 	sc->is = 0;
483 
484 	if (sc->lintr) {
485 		pci_lintr_deassert(sc->asc_pi);
486 		sc->lintr = 0;
487 	}
488 
489 	for (i = 0; i < sc->ports; i++) {
490 		sc->port[i].ie = 0;
491 		sc->port[i].is = 0;
492 		sc->port[i].sctl = 0;
493 		ahci_port_reset(&sc->port[i]);
494 	}
495 }
496 
497 static void
498 ata_string(uint8_t *dest, const char *src, int len)
499 {
500 	int i;
501 
502 	for (i = 0; i < len; i++) {
503 		if (*src)
504 			dest[i ^ 1] = *src++;
505 		else
506 			dest[i ^ 1] = ' ';
507 	}
508 }
509 
510 static void
511 atapi_string(uint8_t *dest, const char *src, int len)
512 {
513 	int i;
514 
515 	for (i = 0; i < len; i++) {
516 		if (*src)
517 			dest[i] = *src++;
518 		else
519 			dest[i] = ' ';
520 	}
521 }
522 
523 /*
524  * Build up the iovec based on the PRDT, 'done' and 'len'.
525  */
526 static void
527 ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
528     struct ahci_prdt_entry *prdt, uint16_t prdtl)
529 {
530 	struct blockif_req *breq = &aior->io_req;
531 	int i, j, skip, todo, left, extra;
532 	uint32_t dbcsz;
533 
534 	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
535 	skip = aior->done;
536 	left = aior->len - aior->done;
537 	todo = 0;
538 	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
539 	    i++, prdt++) {
540 		dbcsz = (prdt->dbc & DBCMASK) + 1;
541 		/* Skip already done part of the PRDT */
542 		if (dbcsz <= skip) {
543 			skip -= dbcsz;
544 			continue;
545 		}
546 		dbcsz -= skip;
547 		if (dbcsz > left)
548 			dbcsz = left;
549 		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
550 		    prdt->dba + skip, dbcsz);
551 		breq->br_iov[j].iov_len = dbcsz;
552 		todo += dbcsz;
553 		left -= dbcsz;
554 		skip = 0;
555 		j++;
556 	}
557 
558 	/* If we got limited by IOV length, round I/O down to sector size. */
559 	if (j == BLOCKIF_IOV_MAX) {
560 		extra = todo % blockif_sectsz(p->bctx);
561 		todo -= extra;
562 		assert(todo > 0);
563 		while (extra > 0) {
564 			if (breq->br_iov[j - 1].iov_len > extra) {
565 				breq->br_iov[j - 1].iov_len -= extra;
566 				break;
567 			}
568 			extra -= breq->br_iov[j - 1].iov_len;
569 			j--;
570 		}
571 	}
572 
573 	breq->br_iovcnt = j;
574 	breq->br_resid = todo;
575 	aior->done += todo;
576 	aior->more = (aior->done < aior->len && i < prdtl);
577 }
578 
579 static void
580 ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
581 {
582 	struct ahci_ioreq *aior;
583 	struct blockif_req *breq;
584 	struct ahci_prdt_entry *prdt;
585 	struct ahci_cmd_hdr *hdr;
586 	uint64_t lba;
587 	uint32_t len;
588 	int err, ncq, readop;
589 
590 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
591 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
592 	ncq = 0;
593 	readop = 1;
594 
595 	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
596 	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
597 	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
598 	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
599 		readop = 0;
600 
601 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
602 	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
603 		lba = ((uint64_t)cfis[10] << 40) |
604 			((uint64_t)cfis[9] << 32) |
605 			((uint64_t)cfis[8] << 24) |
606 			((uint64_t)cfis[6] << 16) |
607 			((uint64_t)cfis[5] << 8) |
608 			cfis[4];
609 		len = cfis[11] << 8 | cfis[3];
610 		if (!len)
611 			len = 65536;
612 		ncq = 1;
613 	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
614 	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
615 	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
616 		lba = ((uint64_t)cfis[10] << 40) |
617 			((uint64_t)cfis[9] << 32) |
618 			((uint64_t)cfis[8] << 24) |
619 			((uint64_t)cfis[6] << 16) |
620 			((uint64_t)cfis[5] << 8) |
621 			cfis[4];
622 		len = cfis[13] << 8 | cfis[12];
623 		if (!len)
624 			len = 65536;
625 	} else {
626 		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
627 			(cfis[5] << 8) | cfis[4];
628 		len = cfis[12];
629 		if (!len)
630 			len = 256;
631 	}
632 	lba *= blockif_sectsz(p->bctx);
633 	len *= blockif_sectsz(p->bctx);
634 
635 	/* Pull request off free list */
636 	aior = STAILQ_FIRST(&p->iofhd);
637 	assert(aior != NULL);
638 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
639 
640 	aior->cfis = cfis;
641 	aior->slot = slot;
642 	aior->len = len;
643 	aior->done = done;
644 	breq = &aior->io_req;
645 	breq->br_offset = lba + done;
646 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
647 
648 	/* Mark this command in-flight. */
649 	p->pending |= 1 << slot;
650 
651 	/* Stuff request onto busy list. */
652 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
653 
654 	if (readop)
655 		err = blockif_read(p->bctx, breq);
656 	else
657 		err = blockif_write(p->bctx, breq);
658 	assert(err == 0);
659 
660 	if (ncq)
661 		p->ci &= ~(1 << slot);
662 }
663 
664 static void
665 ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
666 {
667 	struct ahci_ioreq *aior;
668 	struct blockif_req *breq;
669 	int err;
670 
671 	/*
672 	 * Pull request off free list
673 	 */
674 	aior = STAILQ_FIRST(&p->iofhd);
675 	assert(aior != NULL);
676 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
677 	aior->cfis = cfis;
678 	aior->slot = slot;
679 	aior->len = 0;
680 	aior->done = 0;
681 	aior->more = 0;
682 	breq = &aior->io_req;
683 
684 	/*
685 	 * Mark this command in-flight.
686 	 */
687 	p->pending |= 1 << slot;
688 
689 	/*
690 	 * Stuff request onto busy list
691 	 */
692 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
693 
694 	err = blockif_flush(p->bctx, breq);
695 	assert(err == 0);
696 }
697 
698 static inline void
699 read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
700 		void *buf, int size)
701 {
702 	struct ahci_cmd_hdr *hdr;
703 	struct ahci_prdt_entry *prdt;
704 	void *to;
705 	int i, len;
706 
707 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
708 	len = size;
709 	to = buf;
710 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
711 	for (i = 0; i < hdr->prdtl && len; i++) {
712 		uint8_t *ptr;
713 		uint32_t dbcsz;
714 		int sublen;
715 
716 		dbcsz = (prdt->dbc & DBCMASK) + 1;
717 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
718 		sublen = len < dbcsz ? len : dbcsz;
719 		memcpy(to, ptr, sublen);
720 		len -= sublen;
721 		to += sublen;
722 		prdt++;
723 	}
724 }
725 
726 static void
727 ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
728 {
729 	struct ahci_ioreq *aior;
730 	struct blockif_req *breq;
731 	uint8_t *entry;
732 	uint64_t elba;
733 	uint32_t len, elen;
734 	int err;
735 	uint8_t buf[512];
736 
737 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
738 		len = (uint16_t)cfis[13] << 8 | cfis[12];
739 		len *= 512;
740 	} else { /* ATA_SEND_FPDMA_QUEUED */
741 		len = (uint16_t)cfis[11] << 8 | cfis[3];
742 		len *= 512;
743 	}
744 	read_prdt(p, slot, cfis, buf, sizeof(buf));
745 
746 next:
747 	entry = &buf[done];
748 	elba = ((uint64_t)entry[5] << 40) |
749 		((uint64_t)entry[4] << 32) |
750 		((uint64_t)entry[3] << 24) |
751 		((uint64_t)entry[2] << 16) |
752 		((uint64_t)entry[1] << 8) |
753 		entry[0];
754 	elen = (uint16_t)entry[7] << 8 | entry[6];
755 	done += 8;
756 	if (elen == 0) {
757 		if (done >= len) {
758 			ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
759 			p->pending &= ~(1 << slot);
760 			ahci_check_stopped(p);
761 			return;
762 		}
763 		goto next;
764 	}
765 
766 	/*
767 	 * Pull request off free list
768 	 */
769 	aior = STAILQ_FIRST(&p->iofhd);
770 	assert(aior != NULL);
771 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
772 	aior->cfis = cfis;
773 	aior->slot = slot;
774 	aior->len = len;
775 	aior->done = done;
776 	aior->more = (len != done);
777 
778 	breq = &aior->io_req;
779 	breq->br_offset = elba * blockif_sectsz(p->bctx);
780 	breq->br_resid = elen * blockif_sectsz(p->bctx);
781 
782 	/*
783 	 * Mark this command in-flight.
784 	 */
785 	p->pending |= 1 << slot;
786 
787 	/*
788 	 * Stuff request onto busy list
789 	 */
790 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
791 
792 	err = blockif_delete(p->bctx, breq);
793 	assert(err == 0);
794 }
795 
796 static inline void
797 write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
798 		void *buf, int size)
799 {
800 	struct ahci_cmd_hdr *hdr;
801 	struct ahci_prdt_entry *prdt;
802 	void *from;
803 	int i, len;
804 
805 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
806 	len = size;
807 	from = buf;
808 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
809 	for (i = 0; i < hdr->prdtl && len; i++) {
810 		uint8_t *ptr;
811 		uint32_t dbcsz;
812 		int sublen;
813 
814 		dbcsz = (prdt->dbc & DBCMASK) + 1;
815 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
816 		sublen = len < dbcsz ? len : dbcsz;
817 		memcpy(ptr, from, sublen);
818 		len -= sublen;
819 		from += sublen;
820 		prdt++;
821 	}
822 	hdr->prdbc = size - len;
823 }
824 
825 static void
826 ahci_checksum(uint8_t *buf, int size)
827 {
828 	int i;
829 	uint8_t sum = 0;
830 
831 	for (i = 0; i < size - 1; i++)
832 		sum += buf[i];
833 	buf[size - 1] = 0x100 - sum;
834 }
835 
836 static void
837 ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
838 {
839 	struct ahci_cmd_hdr *hdr;
840 	uint8_t buf[512];
841 
842 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
843 	if (p->atapi || hdr->prdtl == 0 || cfis[4] != 0x10 ||
844 	    cfis[5] != 0 || cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
845 		ahci_write_fis_d2h(p, slot, cfis,
846 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
847 		return;
848 	}
849 
850 	memset(buf, 0, sizeof(buf));
851 	memcpy(buf, p->err_cfis, sizeof(p->err_cfis));
852 	ahci_checksum(buf, sizeof(buf));
853 
854 	if (cfis[2] == ATA_READ_LOG_EXT)
855 		ahci_write_fis_piosetup(p);
856 	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
857 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
858 }
859 
860 static void
861 handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
862 {
863 	struct ahci_cmd_hdr *hdr;
864 
865 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
866 	if (p->atapi || hdr->prdtl == 0) {
867 		ahci_write_fis_d2h(p, slot, cfis,
868 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
869 	} else {
870 		uint16_t buf[256];
871 		uint64_t sectors;
872 		int sectsz, psectsz, psectoff, candelete, ro;
873 		uint16_t cyl;
874 		uint8_t sech, heads;
875 
876 		ro = blockif_is_ro(p->bctx);
877 		candelete = blockif_candelete(p->bctx);
878 		sectsz = blockif_sectsz(p->bctx);
879 		sectors = blockif_size(p->bctx) / sectsz;
880 		blockif_chs(p->bctx, &cyl, &heads, &sech);
881 		blockif_psectsz(p->bctx, &psectsz, &psectoff);
882 		memset(buf, 0, sizeof(buf));
883 		buf[0] = 0x0040;
884 		buf[1] = cyl;
885 		buf[3] = heads;
886 		buf[6] = sech;
887 		ata_string((uint8_t *)(buf+10), p->ident, 20);
888 		ata_string((uint8_t *)(buf+23), "001", 8);
889 		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
890 		buf[47] = (0x8000 | 128);
891 		buf[48] = 0x1;
892 		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
893 		buf[50] = (1 << 14);
894 		buf[53] = (1 << 1 | 1 << 2);
895 		if (p->mult_sectors)
896 			buf[59] = (0x100 | p->mult_sectors);
897 		if (sectors <= 0x0fffffff) {
898 			buf[60] = sectors;
899 			buf[61] = (sectors >> 16);
900 		} else {
901 			buf[60] = 0xffff;
902 			buf[61] = 0x0fff;
903 		}
904 		buf[63] = 0x7;
905 		if (p->xfermode & ATA_WDMA0)
906 			buf[63] |= (1 << ((p->xfermode & 7) + 8));
907 		buf[64] = 0x3;
908 		buf[65] = 120;
909 		buf[66] = 120;
910 		buf[67] = 120;
911 		buf[68] = 120;
912 		buf[69] = 0;
913 		buf[75] = 31;
914 		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
915 			   ATA_SUPPORT_NCQ);
916 		buf[77] = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
917 			   (p->ssts & ATA_SS_SPD_MASK) >> 3);
918 		buf[80] = 0x3f0;
919 		buf[81] = 0x28;
920 		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
921 			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
922 		buf[83] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
923 			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
924 		buf[84] = (1 << 14);
925 		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
926 			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
927 		buf[86] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
928 			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
929 		buf[87] = (1 << 14);
930 		buf[88] = 0x7f;
931 		if (p->xfermode & ATA_UDMA0)
932 			buf[88] |= (1 << ((p->xfermode & 7) + 8));
933 		buf[100] = sectors;
934 		buf[101] = (sectors >> 16);
935 		buf[102] = (sectors >> 32);
936 		buf[103] = (sectors >> 48);
937 		if (candelete && !ro) {
938 			buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
939 			buf[105] = 1;
940 			buf[169] = ATA_SUPPORT_DSM_TRIM;
941 		}
942 		buf[106] = 0x4000;
943 		buf[209] = 0x4000;
944 		if (psectsz > sectsz) {
945 			buf[106] |= 0x2000;
946 			buf[106] |= ffsl(psectsz / sectsz) - 1;
947 			buf[209] |= (psectoff / sectsz);
948 		}
949 		if (sectsz > 512) {
950 			buf[106] |= 0x1000;
951 			buf[117] = sectsz / 2;
952 			buf[118] = ((sectsz / 2) >> 16);
953 		}
954 		buf[119] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
955 		buf[120] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
956 		buf[222] = 0x1020;
957 		buf[255] = 0x00a5;
958 		ahci_checksum((uint8_t *)buf, sizeof(buf));
959 		ahci_write_fis_piosetup(p);
960 		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
961 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
962 	}
963 }
964 
965 static void
966 handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
967 {
968 	if (!p->atapi) {
969 		ahci_write_fis_d2h(p, slot, cfis,
970 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
971 	} else {
972 		uint16_t buf[256];
973 
974 		memset(buf, 0, sizeof(buf));
975 		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
976 		ata_string((uint8_t *)(buf+10), p->ident, 20);
977 		ata_string((uint8_t *)(buf+23), "001", 8);
978 		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
979 		buf[49] = (1 << 9 | 1 << 8);
980 		buf[50] = (1 << 14 | 1);
981 		buf[53] = (1 << 2 | 1 << 1);
982 		buf[62] = 0x3f;
983 		buf[63] = 7;
984 		if (p->xfermode & ATA_WDMA0)
985 			buf[63] |= (1 << ((p->xfermode & 7) + 8));
986 		buf[64] = 3;
987 		buf[65] = 120;
988 		buf[66] = 120;
989 		buf[67] = 120;
990 		buf[68] = 120;
991 		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
992 		buf[77] = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
993 		buf[78] = (1 << 5);
994 		buf[80] = 0x3f0;
995 		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
996 			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
997 		buf[83] = (1 << 14);
998 		buf[84] = (1 << 14);
999 		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1000 			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1001 		buf[87] = (1 << 14);
1002 		buf[88] = 0x7f;
1003 		if (p->xfermode & ATA_UDMA0)
1004 			buf[88] |= (1 << ((p->xfermode & 7) + 8));
1005 		buf[222] = 0x1020;
1006 		buf[255] = 0x00a5;
1007 		ahci_checksum((uint8_t *)buf, sizeof(buf));
1008 		ahci_write_fis_piosetup(p);
1009 		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
1010 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1011 	}
1012 }
1013 
1014 static void
1015 atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1016 {
1017 	uint8_t buf[36];
1018 	uint8_t *acmd;
1019 	int len;
1020 	uint32_t tfd;
1021 
1022 	acmd = cfis + 0x40;
1023 
1024 	if (acmd[1] & 1) {		/* VPD */
1025 		if (acmd[2] == 0) {	/* Supported VPD pages */
1026 			buf[0] = 0x05;
1027 			buf[1] = 0;
1028 			buf[2] = 0;
1029 			buf[3] = 1;
1030 			buf[4] = 0;
1031 			len = 4 + buf[3];
1032 		} else {
1033 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1034 			p->asc = 0x24;
1035 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1036 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1037 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1038 			return;
1039 		}
1040 	} else {
1041 		buf[0] = 0x05;
1042 		buf[1] = 0x80;
1043 		buf[2] = 0x00;
1044 		buf[3] = 0x21;
1045 		buf[4] = 31;
1046 		buf[5] = 0;
1047 		buf[6] = 0;
1048 		buf[7] = 0;
1049 		atapi_string(buf + 8, "BHYVE", 8);
1050 		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1051 		atapi_string(buf + 32, "001", 4);
1052 		len = sizeof(buf);
1053 	}
1054 
1055 	if (len > acmd[4])
1056 		len = acmd[4];
1057 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1058 	write_prdt(p, slot, cfis, buf, len);
1059 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1060 }
1061 
1062 static void
1063 atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1064 {
1065 	uint8_t buf[8];
1066 	uint64_t sectors;
1067 
1068 	sectors = blockif_size(p->bctx) / 2048;
1069 	be32enc(buf, sectors - 1);
1070 	be32enc(buf + 4, 2048);
1071 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1072 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1073 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1074 }
1075 
1076 static void
1077 atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1078 {
1079 	uint8_t *acmd;
1080 	uint8_t format;
1081 	int len;
1082 
1083 	acmd = cfis + 0x40;
1084 
1085 	len = be16dec(acmd + 7);
1086 	format = acmd[9] >> 6;
1087 	switch (format) {
1088 	case 0:
1089 	{
1090 		int msf, size;
1091 		uint64_t sectors;
1092 		uint8_t start_track, buf[20], *bp;
1093 
1094 		msf = (acmd[1] >> 1) & 1;
1095 		start_track = acmd[6];
1096 		if (start_track > 1 && start_track != 0xaa) {
1097 			uint32_t tfd;
1098 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1099 			p->asc = 0x24;
1100 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1101 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1102 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1103 			return;
1104 		}
1105 		bp = buf + 2;
1106 		*bp++ = 1;
1107 		*bp++ = 1;
1108 		if (start_track <= 1) {
1109 			*bp++ = 0;
1110 			*bp++ = 0x14;
1111 			*bp++ = 1;
1112 			*bp++ = 0;
1113 			if (msf) {
1114 				*bp++ = 0;
1115 				lba_to_msf(bp, 0);
1116 				bp += 3;
1117 			} else {
1118 				*bp++ = 0;
1119 				*bp++ = 0;
1120 				*bp++ = 0;
1121 				*bp++ = 0;
1122 			}
1123 		}
1124 		*bp++ = 0;
1125 		*bp++ = 0x14;
1126 		*bp++ = 0xaa;
1127 		*bp++ = 0;
1128 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1129 		sectors >>= 2;
1130 		if (msf) {
1131 			*bp++ = 0;
1132 			lba_to_msf(bp, sectors);
1133 			bp += 3;
1134 		} else {
1135 			be32enc(bp, sectors);
1136 			bp += 4;
1137 		}
1138 		size = bp - buf;
1139 		be16enc(buf, size - 2);
1140 		if (len > size)
1141 			len = size;
1142 		write_prdt(p, slot, cfis, buf, len);
1143 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1144 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1145 		break;
1146 	}
1147 	case 1:
1148 	{
1149 		uint8_t buf[12];
1150 
1151 		memset(buf, 0, sizeof(buf));
1152 		buf[1] = 0xa;
1153 		buf[2] = 0x1;
1154 		buf[3] = 0x1;
1155 		if (len > sizeof(buf))
1156 			len = sizeof(buf);
1157 		write_prdt(p, slot, cfis, buf, len);
1158 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1159 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1160 		break;
1161 	}
1162 	case 2:
1163 	{
1164 		int msf, size;
1165 		uint64_t sectors;
1166 		uint8_t start_track, *bp, buf[50];
1167 
1168 		msf = (acmd[1] >> 1) & 1;
1169 		start_track = acmd[6];
1170 		bp = buf + 2;
1171 		*bp++ = 1;
1172 		*bp++ = 1;
1173 
1174 		*bp++ = 1;
1175 		*bp++ = 0x14;
1176 		*bp++ = 0;
1177 		*bp++ = 0xa0;
1178 		*bp++ = 0;
1179 		*bp++ = 0;
1180 		*bp++ = 0;
1181 		*bp++ = 0;
1182 		*bp++ = 1;
1183 		*bp++ = 0;
1184 		*bp++ = 0;
1185 
1186 		*bp++ = 1;
1187 		*bp++ = 0x14;
1188 		*bp++ = 0;
1189 		*bp++ = 0xa1;
1190 		*bp++ = 0;
1191 		*bp++ = 0;
1192 		*bp++ = 0;
1193 		*bp++ = 0;
1194 		*bp++ = 1;
1195 		*bp++ = 0;
1196 		*bp++ = 0;
1197 
1198 		*bp++ = 1;
1199 		*bp++ = 0x14;
1200 		*bp++ = 0;
1201 		*bp++ = 0xa2;
1202 		*bp++ = 0;
1203 		*bp++ = 0;
1204 		*bp++ = 0;
1205 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1206 		sectors >>= 2;
1207 		if (msf) {
1208 			*bp++ = 0;
1209 			lba_to_msf(bp, sectors);
1210 			bp += 3;
1211 		} else {
1212 			be32enc(bp, sectors);
1213 			bp += 4;
1214 		}
1215 
1216 		*bp++ = 1;
1217 		*bp++ = 0x14;
1218 		*bp++ = 0;
1219 		*bp++ = 1;
1220 		*bp++ = 0;
1221 		*bp++ = 0;
1222 		*bp++ = 0;
1223 		if (msf) {
1224 			*bp++ = 0;
1225 			lba_to_msf(bp, 0);
1226 			bp += 3;
1227 		} else {
1228 			*bp++ = 0;
1229 			*bp++ = 0;
1230 			*bp++ = 0;
1231 			*bp++ = 0;
1232 		}
1233 
1234 		size = bp - buf;
1235 		be16enc(buf, size - 2);
1236 		if (len > size)
1237 			len = size;
1238 		write_prdt(p, slot, cfis, buf, len);
1239 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1240 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1241 		break;
1242 	}
1243 	default:
1244 	{
1245 		uint32_t tfd;
1246 
1247 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1248 		p->asc = 0x24;
1249 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1250 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1251 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1252 		break;
1253 	}
1254 	}
1255 }
1256 
1257 static void
1258 atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1259 {
1260 	uint8_t buf[16];
1261 
1262 	memset(buf, 0, sizeof(buf));
1263 	buf[3] = 8;
1264 
1265 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1266 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1267 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1268 }
1269 
1270 static void
1271 atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1272 {
1273 	struct ahci_ioreq *aior;
1274 	struct ahci_cmd_hdr *hdr;
1275 	struct ahci_prdt_entry *prdt;
1276 	struct blockif_req *breq;
1277 	struct pci_ahci_softc *sc;
1278 	uint8_t *acmd;
1279 	uint64_t lba;
1280 	uint32_t len;
1281 	int err;
1282 
1283 	sc = p->pr_sc;
1284 	acmd = cfis + 0x40;
1285 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1286 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1287 
1288 	lba = be32dec(acmd + 2);
1289 	if (acmd[0] == READ_10)
1290 		len = be16dec(acmd + 7);
1291 	else
1292 		len = be32dec(acmd + 6);
1293 	if (len == 0) {
1294 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1295 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1296 	}
1297 	lba *= 2048;
1298 	len *= 2048;
1299 
1300 	/*
1301 	 * Pull request off free list
1302 	 */
1303 	aior = STAILQ_FIRST(&p->iofhd);
1304 	assert(aior != NULL);
1305 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1306 	aior->cfis = cfis;
1307 	aior->slot = slot;
1308 	aior->len = len;
1309 	aior->done = done;
1310 	breq = &aior->io_req;
1311 	breq->br_offset = lba + done;
1312 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1313 
1314 	/* Mark this command in-flight. */
1315 	p->pending |= 1 << slot;
1316 
1317 	/* Stuff request onto busy list. */
1318 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1319 
1320 	err = blockif_read(p->bctx, breq);
1321 	assert(err == 0);
1322 }
1323 
1324 static void
1325 atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1326 {
1327 	uint8_t buf[64];
1328 	uint8_t *acmd;
1329 	int len;
1330 
1331 	acmd = cfis + 0x40;
1332 	len = acmd[4];
1333 	if (len > sizeof(buf))
1334 		len = sizeof(buf);
1335 	memset(buf, 0, len);
1336 	buf[0] = 0x70 | (1 << 7);
1337 	buf[2] = p->sense_key;
1338 	buf[7] = 10;
1339 	buf[12] = p->asc;
1340 	write_prdt(p, slot, cfis, buf, len);
1341 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1342 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1343 }
1344 
1345 static void
1346 atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1347 {
1348 	uint8_t *acmd = cfis + 0x40;
1349 	uint32_t tfd;
1350 
1351 	switch (acmd[4] & 3) {
1352 	case 0:
1353 	case 1:
1354 	case 3:
1355 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1356 		tfd = ATA_S_READY | ATA_S_DSC;
1357 		break;
1358 	case 2:
1359 		/* TODO eject media */
1360 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1361 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1362 		p->asc = 0x53;
1363 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1364 		break;
1365 	}
1366 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1367 }
1368 
1369 static void
1370 atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1371 {
1372 	uint8_t *acmd;
1373 	uint32_t tfd;
1374 	uint8_t pc, code;
1375 	int len;
1376 
1377 	acmd = cfis + 0x40;
1378 	len = be16dec(acmd + 7);
1379 	pc = acmd[2] >> 6;
1380 	code = acmd[2] & 0x3f;
1381 
1382 	switch (pc) {
1383 	case 0:
1384 		switch (code) {
1385 		case MODEPAGE_RW_ERROR_RECOVERY:
1386 		{
1387 			uint8_t buf[16];
1388 
1389 			if (len > sizeof(buf))
1390 				len = sizeof(buf);
1391 
1392 			memset(buf, 0, sizeof(buf));
1393 			be16enc(buf, 16 - 2);
1394 			buf[2] = 0x70;
1395 			buf[8] = 0x01;
1396 			buf[9] = 16 - 10;
1397 			buf[11] = 0x05;
1398 			write_prdt(p, slot, cfis, buf, len);
1399 			tfd = ATA_S_READY | ATA_S_DSC;
1400 			break;
1401 		}
1402 		case MODEPAGE_CD_CAPABILITIES:
1403 		{
1404 			uint8_t buf[30];
1405 
1406 			if (len > sizeof(buf))
1407 				len = sizeof(buf);
1408 
1409 			memset(buf, 0, sizeof(buf));
1410 			be16enc(buf, 30 - 2);
1411 			buf[2] = 0x70;
1412 			buf[8] = 0x2A;
1413 			buf[9] = 30 - 10;
1414 			buf[10] = 0x08;
1415 			buf[12] = 0x71;
1416 			be16enc(&buf[18], 2);
1417 			be16enc(&buf[20], 512);
1418 			write_prdt(p, slot, cfis, buf, len);
1419 			tfd = ATA_S_READY | ATA_S_DSC;
1420 			break;
1421 		}
1422 		default:
1423 			goto error;
1424 			break;
1425 		}
1426 		break;
1427 	case 3:
1428 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1429 		p->asc = 0x39;
1430 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1431 		break;
1432 error:
1433 	case 1:
1434 	case 2:
1435 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1436 		p->asc = 0x24;
1437 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1438 		break;
1439 	}
1440 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1441 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1442 }
1443 
1444 static void
1445 atapi_get_event_status_notification(struct ahci_port *p, int slot,
1446     uint8_t *cfis)
1447 {
1448 	uint8_t *acmd;
1449 	uint32_t tfd;
1450 
1451 	acmd = cfis + 0x40;
1452 
1453 	/* we don't support asynchronous operation */
1454 	if (!(acmd[1] & 1)) {
1455 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1456 		p->asc = 0x24;
1457 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1458 	} else {
1459 		uint8_t buf[8];
1460 		int len;
1461 
1462 		len = be16dec(acmd + 7);
1463 		if (len > sizeof(buf))
1464 			len = sizeof(buf);
1465 
1466 		memset(buf, 0, sizeof(buf));
1467 		be16enc(buf, 8 - 2);
1468 		buf[2] = 0x04;
1469 		buf[3] = 0x10;
1470 		buf[5] = 0x02;
1471 		write_prdt(p, slot, cfis, buf, len);
1472 		tfd = ATA_S_READY | ATA_S_DSC;
1473 	}
1474 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1475 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1476 }
1477 
1478 static void
1479 handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1480 {
1481 	uint8_t *acmd;
1482 
1483 	acmd = cfis + 0x40;
1484 
1485 #ifdef AHCI_DEBUG
1486 	{
1487 		int i;
1488 		DPRINTF("ACMD:");
1489 		for (i = 0; i < 16; i++)
1490 			DPRINTF("%02x ", acmd[i]);
1491 		DPRINTF("\n");
1492 	}
1493 #endif
1494 
1495 	switch (acmd[0]) {
1496 	case TEST_UNIT_READY:
1497 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1498 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1499 		break;
1500 	case INQUIRY:
1501 		atapi_inquiry(p, slot, cfis);
1502 		break;
1503 	case READ_CAPACITY:
1504 		atapi_read_capacity(p, slot, cfis);
1505 		break;
1506 	case PREVENT_ALLOW:
1507 		/* TODO */
1508 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1509 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1510 		break;
1511 	case READ_TOC:
1512 		atapi_read_toc(p, slot, cfis);
1513 		break;
1514 	case REPORT_LUNS:
1515 		atapi_report_luns(p, slot, cfis);
1516 		break;
1517 	case READ_10:
1518 	case READ_12:
1519 		atapi_read(p, slot, cfis, 0);
1520 		break;
1521 	case REQUEST_SENSE:
1522 		atapi_request_sense(p, slot, cfis);
1523 		break;
1524 	case START_STOP_UNIT:
1525 		atapi_start_stop_unit(p, slot, cfis);
1526 		break;
1527 	case MODE_SENSE_10:
1528 		atapi_mode_sense(p, slot, cfis);
1529 		break;
1530 	case GET_EVENT_STATUS_NOTIFICATION:
1531 		atapi_get_event_status_notification(p, slot, cfis);
1532 		break;
1533 	default:
1534 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1535 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1536 		p->asc = 0x20;
1537 		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1538 				ATA_S_READY | ATA_S_ERROR);
1539 		break;
1540 	}
1541 }
1542 
1543 static void
1544 ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1545 {
1546 
1547 	switch (cfis[2]) {
1548 	case ATA_ATA_IDENTIFY:
1549 		handle_identify(p, slot, cfis);
1550 		break;
1551 	case ATA_SETFEATURES:
1552 	{
1553 		switch (cfis[3]) {
1554 		case ATA_SF_ENAB_SATA_SF:
1555 			switch (cfis[12]) {
1556 			case ATA_SATA_SF_AN:
1557 				p->tfd = ATA_S_DSC | ATA_S_READY;
1558 				break;
1559 			default:
1560 				p->tfd = ATA_S_ERROR | ATA_S_READY;
1561 				p->tfd |= (ATA_ERROR_ABORT << 8);
1562 				break;
1563 			}
1564 			break;
1565 		case ATA_SF_ENAB_WCACHE:
1566 		case ATA_SF_DIS_WCACHE:
1567 		case ATA_SF_ENAB_RCACHE:
1568 		case ATA_SF_DIS_RCACHE:
1569 			p->tfd = ATA_S_DSC | ATA_S_READY;
1570 			break;
1571 		case ATA_SF_SETXFER:
1572 		{
1573 			switch (cfis[12] & 0xf8) {
1574 			case ATA_PIO:
1575 			case ATA_PIO0:
1576 				break;
1577 			case ATA_WDMA0:
1578 			case ATA_UDMA0:
1579 				p->xfermode = (cfis[12] & 0x7);
1580 				break;
1581 			}
1582 			p->tfd = ATA_S_DSC | ATA_S_READY;
1583 			break;
1584 		}
1585 		default:
1586 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1587 			p->tfd |= (ATA_ERROR_ABORT << 8);
1588 			break;
1589 		}
1590 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1591 		break;
1592 	}
1593 	case ATA_SET_MULTI:
1594 		if (cfis[12] != 0 &&
1595 			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1596 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1597 			p->tfd |= (ATA_ERROR_ABORT << 8);
1598 		} else {
1599 			p->mult_sectors = cfis[12];
1600 			p->tfd = ATA_S_DSC | ATA_S_READY;
1601 		}
1602 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1603 		break;
1604 	case ATA_READ:
1605 	case ATA_WRITE:
1606 	case ATA_READ48:
1607 	case ATA_WRITE48:
1608 	case ATA_READ_MUL:
1609 	case ATA_WRITE_MUL:
1610 	case ATA_READ_MUL48:
1611 	case ATA_WRITE_MUL48:
1612 	case ATA_READ_DMA:
1613 	case ATA_WRITE_DMA:
1614 	case ATA_READ_DMA48:
1615 	case ATA_WRITE_DMA48:
1616 	case ATA_READ_FPDMA_QUEUED:
1617 	case ATA_WRITE_FPDMA_QUEUED:
1618 		ahci_handle_rw(p, slot, cfis, 0);
1619 		break;
1620 	case ATA_FLUSHCACHE:
1621 	case ATA_FLUSHCACHE48:
1622 		ahci_handle_flush(p, slot, cfis);
1623 		break;
1624 	case ATA_DATA_SET_MANAGEMENT:
1625 		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1626 		    cfis[13] == 0 && cfis[12] == 1) {
1627 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1628 			break;
1629 		}
1630 		ahci_write_fis_d2h(p, slot, cfis,
1631 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1632 		break;
1633 	case ATA_SEND_FPDMA_QUEUED:
1634 		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1635 		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1636 		    cfis[11] == 0 && cfis[13] == 1) {
1637 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1638 			break;
1639 		}
1640 		ahci_write_fis_d2h(p, slot, cfis,
1641 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1642 		break;
1643 	case ATA_READ_LOG_EXT:
1644 	case ATA_READ_LOG_DMA_EXT:
1645 		ahci_handle_read_log(p, slot, cfis);
1646 		break;
1647 	case ATA_NOP:
1648 		ahci_write_fis_d2h(p, slot, cfis,
1649 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1650 		break;
1651 	case ATA_STANDBY_CMD:
1652 	case ATA_STANDBY_IMMEDIATE:
1653 	case ATA_IDLE_CMD:
1654 	case ATA_IDLE_IMMEDIATE:
1655 	case ATA_SLEEP:
1656 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1657 		break;
1658 	case ATA_ATAPI_IDENTIFY:
1659 		handle_atapi_identify(p, slot, cfis);
1660 		break;
1661 	case ATA_PACKET_CMD:
1662 		if (!p->atapi) {
1663 			ahci_write_fis_d2h(p, slot, cfis,
1664 			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1665 		} else
1666 			handle_packet_cmd(p, slot, cfis);
1667 		break;
1668 	default:
1669 		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1670 		ahci_write_fis_d2h(p, slot, cfis,
1671 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1672 		break;
1673 	}
1674 }
1675 
1676 static void
1677 ahci_handle_slot(struct ahci_port *p, int slot)
1678 {
1679 	struct ahci_cmd_hdr *hdr;
1680 	struct ahci_prdt_entry *prdt;
1681 	struct pci_ahci_softc *sc;
1682 	uint8_t *cfis;
1683 	int cfl;
1684 
1685 	sc = p->pr_sc;
1686 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1687 	cfl = (hdr->flags & 0x1f) * 4;
1688 	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1689 			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1690 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1691 
1692 #ifdef AHCI_DEBUG
1693 	DPRINTF("\ncfis:");
1694 	for (i = 0; i < cfl; i++) {
1695 		if (i % 10 == 0)
1696 			DPRINTF("\n");
1697 		DPRINTF("%02x ", cfis[i]);
1698 	}
1699 	DPRINTF("\n");
1700 
1701 	for (i = 0; i < hdr->prdtl; i++) {
1702 		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1703 		prdt++;
1704 	}
1705 #endif
1706 
1707 	if (cfis[0] != FIS_TYPE_REGH2D) {
1708 		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1709 		return;
1710 	}
1711 
1712 	if (cfis[1] & 0x80) {
1713 		ahci_handle_cmd(p, slot, cfis);
1714 	} else {
1715 		if (cfis[15] & (1 << 2))
1716 			p->reset = 1;
1717 		else if (p->reset) {
1718 			p->reset = 0;
1719 			ahci_port_reset(p);
1720 		}
1721 		p->ci &= ~(1 << slot);
1722 	}
1723 }
1724 
1725 static void
1726 ahci_handle_port(struct ahci_port *p)
1727 {
1728 	int i;
1729 
1730 	if (!(p->cmd & AHCI_P_CMD_ST))
1731 		return;
1732 
1733 	/*
1734 	 * Search for any new commands to issue ignoring those that
1735 	 * are already in-flight.
1736 	 */
1737 	for (i = 0; (i < 32) && p->ci; i++) {
1738 		if ((p->ci & (1 << i)) && !(p->pending & (1 << i))) {
1739 			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1740 			p->cmd |= i << AHCI_P_CMD_CCS_SHIFT;
1741 			ahci_handle_slot(p, i);
1742 		}
1743 	}
1744 }
1745 
1746 /*
1747  * blockif callback routine - this runs in the context of the blockif
1748  * i/o thread, so the mutex needs to be acquired.
1749  */
1750 static void
1751 ata_ioreq_cb(struct blockif_req *br, int err)
1752 {
1753 	struct ahci_cmd_hdr *hdr;
1754 	struct ahci_ioreq *aior;
1755 	struct ahci_port *p;
1756 	struct pci_ahci_softc *sc;
1757 	uint32_t tfd;
1758 	uint8_t *cfis;
1759 	int slot, ncq, dsm;
1760 
1761 	DPRINTF("%s %d\n", __func__, err);
1762 
1763 	ncq = dsm = 0;
1764 	aior = br->br_param;
1765 	p = aior->io_pr;
1766 	cfis = aior->cfis;
1767 	slot = aior->slot;
1768 	sc = p->pr_sc;
1769 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1770 
1771 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1772 	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1773 	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1774 		ncq = 1;
1775 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1776 	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1777 	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1778 		dsm = 1;
1779 
1780 	pthread_mutex_lock(&sc->mtx);
1781 
1782 	/*
1783 	 * Delete the blockif request from the busy list
1784 	 */
1785 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1786 
1787 	/*
1788 	 * Move the blockif request back to the free list
1789 	 */
1790 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1791 
1792 	if (!err)
1793 		hdr->prdbc = aior->done;
1794 
1795 	if (!err && aior->more) {
1796 		if (dsm)
1797 			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1798 		else
1799 			ahci_handle_rw(p, slot, cfis, aior->done);
1800 		goto out;
1801 	}
1802 
1803 	if (!err)
1804 		tfd = ATA_S_READY | ATA_S_DSC;
1805 	else
1806 		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1807 	if (ncq)
1808 		ahci_write_fis_sdb(p, slot, cfis, tfd);
1809 	else
1810 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1811 
1812 	/*
1813 	 * This command is now complete.
1814 	 */
1815 	p->pending &= ~(1 << slot);
1816 
1817 	ahci_check_stopped(p);
1818 out:
1819 	pthread_mutex_unlock(&sc->mtx);
1820 	DPRINTF("%s exit\n", __func__);
1821 }
1822 
1823 static void
1824 atapi_ioreq_cb(struct blockif_req *br, int err)
1825 {
1826 	struct ahci_cmd_hdr *hdr;
1827 	struct ahci_ioreq *aior;
1828 	struct ahci_port *p;
1829 	struct pci_ahci_softc *sc;
1830 	uint8_t *cfis;
1831 	uint32_t tfd;
1832 	int slot;
1833 
1834 	DPRINTF("%s %d\n", __func__, err);
1835 
1836 	aior = br->br_param;
1837 	p = aior->io_pr;
1838 	cfis = aior->cfis;
1839 	slot = aior->slot;
1840 	sc = p->pr_sc;
1841 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1842 
1843 	pthread_mutex_lock(&sc->mtx);
1844 
1845 	/*
1846 	 * Delete the blockif request from the busy list
1847 	 */
1848 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1849 
1850 	/*
1851 	 * Move the blockif request back to the free list
1852 	 */
1853 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1854 
1855 	if (!err)
1856 		hdr->prdbc = aior->done;
1857 
1858 	if (!err && aior->more) {
1859 		atapi_read(p, slot, cfis, aior->done);
1860 		goto out;
1861 	}
1862 
1863 	if (!err) {
1864 		tfd = ATA_S_READY | ATA_S_DSC;
1865 	} else {
1866 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1867 		p->asc = 0x21;
1868 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1869 	}
1870 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1871 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1872 
1873 	/*
1874 	 * This command is now complete.
1875 	 */
1876 	p->pending &= ~(1 << slot);
1877 
1878 	ahci_check_stopped(p);
1879 out:
1880 	pthread_mutex_unlock(&sc->mtx);
1881 	DPRINTF("%s exit\n", __func__);
1882 }
1883 
1884 static void
1885 pci_ahci_ioreq_init(struct ahci_port *pr)
1886 {
1887 	struct ahci_ioreq *vr;
1888 	int i;
1889 
1890 	pr->ioqsz = blockif_queuesz(pr->bctx);
1891 	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1892 	STAILQ_INIT(&pr->iofhd);
1893 
1894 	/*
1895 	 * Add all i/o request entries to the free queue
1896 	 */
1897 	for (i = 0; i < pr->ioqsz; i++) {
1898 		vr = &pr->ioreq[i];
1899 		vr->io_pr = pr;
1900 		if (!pr->atapi)
1901 			vr->io_req.br_callback = ata_ioreq_cb;
1902 		else
1903 			vr->io_req.br_callback = atapi_ioreq_cb;
1904 		vr->io_req.br_param = vr;
1905 		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
1906 	}
1907 
1908 	TAILQ_INIT(&pr->iobhd);
1909 }
1910 
1911 static void
1912 pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1913 {
1914 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1915 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1916 	struct ahci_port *p = &sc->port[port];
1917 
1918 	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1919 		port, offset, value);
1920 
1921 	switch (offset) {
1922 	case AHCI_P_CLB:
1923 		p->clb = value;
1924 		break;
1925 	case AHCI_P_CLBU:
1926 		p->clbu = value;
1927 		break;
1928 	case AHCI_P_FB:
1929 		p->fb = value;
1930 		break;
1931 	case AHCI_P_FBU:
1932 		p->fbu = value;
1933 		break;
1934 	case AHCI_P_IS:
1935 		p->is &= ~value;
1936 		break;
1937 	case AHCI_P_IE:
1938 		p->ie = value & 0xFDC000FF;
1939 		ahci_generate_intr(sc);
1940 		break;
1941 	case AHCI_P_CMD:
1942 	{
1943 		p->cmd = value;
1944 
1945 		if (!(value & AHCI_P_CMD_ST)) {
1946 			ahci_port_stop(p);
1947 		} else {
1948 			uint64_t clb;
1949 
1950 			p->cmd |= AHCI_P_CMD_CR;
1951 			clb = (uint64_t)p->clbu << 32 | p->clb;
1952 			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
1953 					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
1954 		}
1955 
1956 		if (value & AHCI_P_CMD_FRE) {
1957 			uint64_t fb;
1958 
1959 			p->cmd |= AHCI_P_CMD_FR;
1960 			fb = (uint64_t)p->fbu << 32 | p->fb;
1961 			/* we don't support FBSCP, so rfis size is 256Bytes */
1962 			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
1963 		} else {
1964 			p->cmd &= ~AHCI_P_CMD_FR;
1965 		}
1966 
1967 		if (value & AHCI_P_CMD_CLO) {
1968 			p->tfd = 0;
1969 			p->cmd &= ~AHCI_P_CMD_CLO;
1970 		}
1971 
1972 		ahci_handle_port(p);
1973 		break;
1974 	}
1975 	case AHCI_P_TFD:
1976 	case AHCI_P_SIG:
1977 	case AHCI_P_SSTS:
1978 		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
1979 		break;
1980 	case AHCI_P_SCTL:
1981 		p->sctl = value;
1982 		if (!(p->cmd & AHCI_P_CMD_ST)) {
1983 			if (value & ATA_SC_DET_RESET)
1984 				ahci_port_reset(p);
1985 		}
1986 		break;
1987 	case AHCI_P_SERR:
1988 		p->serr &= ~value;
1989 		break;
1990 	case AHCI_P_SACT:
1991 		p->sact |= value;
1992 		break;
1993 	case AHCI_P_CI:
1994 		p->ci |= value;
1995 		ahci_handle_port(p);
1996 		break;
1997 	case AHCI_P_SNTF:
1998 	case AHCI_P_FBS:
1999 	default:
2000 		break;
2001 	}
2002 }
2003 
2004 static void
2005 pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2006 {
2007 	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
2008 		offset, value);
2009 
2010 	switch (offset) {
2011 	case AHCI_CAP:
2012 	case AHCI_PI:
2013 	case AHCI_VS:
2014 	case AHCI_CAP2:
2015 		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
2016 		break;
2017 	case AHCI_GHC:
2018 		if (value & AHCI_GHC_HR)
2019 			ahci_reset(sc);
2020 		else if (value & AHCI_GHC_IE) {
2021 			sc->ghc |= AHCI_GHC_IE;
2022 			ahci_generate_intr(sc);
2023 		}
2024 		break;
2025 	case AHCI_IS:
2026 		sc->is &= ~value;
2027 		ahci_generate_intr(sc);
2028 		break;
2029 	default:
2030 		break;
2031 	}
2032 }
2033 
2034 static void
2035 pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
2036 		int baridx, uint64_t offset, int size, uint64_t value)
2037 {
2038 	struct pci_ahci_softc *sc = pi->pi_arg;
2039 
2040 	assert(baridx == 5);
2041 	assert(size == 4);
2042 
2043 	pthread_mutex_lock(&sc->mtx);
2044 
2045 	if (offset < AHCI_OFFSET)
2046 		pci_ahci_host_write(sc, offset, value);
2047 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2048 		pci_ahci_port_write(sc, offset, value);
2049 	else
2050 		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
2051 
2052 	pthread_mutex_unlock(&sc->mtx);
2053 }
2054 
2055 static uint64_t
2056 pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2057 {
2058 	uint32_t value;
2059 
2060 	switch (offset) {
2061 	case AHCI_CAP:
2062 	case AHCI_GHC:
2063 	case AHCI_IS:
2064 	case AHCI_PI:
2065 	case AHCI_VS:
2066 	case AHCI_CCCC:
2067 	case AHCI_CCCP:
2068 	case AHCI_EM_LOC:
2069 	case AHCI_EM_CTL:
2070 	case AHCI_CAP2:
2071 	{
2072 		uint32_t *p = &sc->cap;
2073 		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2074 		value = *p;
2075 		break;
2076 	}
2077 	default:
2078 		value = 0;
2079 		break;
2080 	}
2081 	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
2082 		offset, value);
2083 
2084 	return (value);
2085 }
2086 
2087 static uint64_t
2088 pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2089 {
2090 	uint32_t value;
2091 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2092 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2093 
2094 	switch (offset) {
2095 	case AHCI_P_CLB:
2096 	case AHCI_P_CLBU:
2097 	case AHCI_P_FB:
2098 	case AHCI_P_FBU:
2099 	case AHCI_P_IS:
2100 	case AHCI_P_IE:
2101 	case AHCI_P_CMD:
2102 	case AHCI_P_TFD:
2103 	case AHCI_P_SIG:
2104 	case AHCI_P_SSTS:
2105 	case AHCI_P_SCTL:
2106 	case AHCI_P_SERR:
2107 	case AHCI_P_SACT:
2108 	case AHCI_P_CI:
2109 	case AHCI_P_SNTF:
2110 	case AHCI_P_FBS:
2111 	{
2112 		uint32_t *p= &sc->port[port].clb;
2113 		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2114 		value = *p;
2115 		break;
2116 	}
2117 	default:
2118 		value = 0;
2119 		break;
2120 	}
2121 
2122 	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
2123 		port, offset, value);
2124 
2125 	return value;
2126 }
2127 
2128 static uint64_t
2129 pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
2130     uint64_t offset, int size)
2131 {
2132 	struct pci_ahci_softc *sc = pi->pi_arg;
2133 	uint32_t value;
2134 
2135 	assert(baridx == 5);
2136 	assert(size == 4);
2137 
2138 	pthread_mutex_lock(&sc->mtx);
2139 
2140 	if (offset < AHCI_OFFSET)
2141 		value = pci_ahci_host_read(sc, offset);
2142 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2143 		value = pci_ahci_port_read(sc, offset);
2144 	else {
2145 		value = 0;
2146 		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
2147 	}
2148 
2149 	pthread_mutex_unlock(&sc->mtx);
2150 
2151 	return (value);
2152 }
2153 
2154 static int
2155 pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
2156 {
2157 	char bident[sizeof("XX:X:X")];
2158 	struct blockif_ctxt *bctxt;
2159 	struct pci_ahci_softc *sc;
2160 	int ret, slots;
2161 	MD5_CTX mdctx;
2162 	u_char digest[16];
2163 
2164 	ret = 0;
2165 
2166 	if (opts == NULL) {
2167 		fprintf(stderr, "pci_ahci: backing device required\n");
2168 		return (1);
2169 	}
2170 
2171 #ifdef AHCI_DEBUG
2172 	dbg = fopen("/tmp/log", "w+");
2173 #endif
2174 
2175 	sc = calloc(1, sizeof(struct pci_ahci_softc));
2176 	pi->pi_arg = sc;
2177 	sc->asc_pi = pi;
2178 	sc->ports = MAX_PORTS;
2179 
2180 	/*
2181 	 * Only use port 0 for a backing device. All other ports will be
2182 	 * marked as unused
2183 	 */
2184 	sc->port[0].atapi = atapi;
2185 
2186 	/*
2187 	 * Attempt to open the backing image. Use the PCI
2188 	 * slot/func for the identifier string.
2189 	 */
2190 	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
2191 	bctxt = blockif_open(opts, bident);
2192 	if (bctxt == NULL) {
2193 		ret = 1;
2194 		goto open_fail;
2195 	}
2196 	sc->port[0].bctx = bctxt;
2197 	sc->port[0].pr_sc = sc;
2198 
2199 	/*
2200 	 * Create an identifier for the backing file. Use parts of the
2201 	 * md5 sum of the filename
2202 	 */
2203 	MD5Init(&mdctx);
2204 	MD5Update(&mdctx, opts, strlen(opts));
2205 	MD5Final(digest, &mdctx);
2206 	sprintf(sc->port[0].ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X",
2207 	    digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]);
2208 
2209 	/*
2210 	 * Allocate blockif request structures and add them
2211 	 * to the free list
2212 	 */
2213 	pci_ahci_ioreq_init(&sc->port[0]);
2214 
2215 	pthread_mutex_init(&sc->mtx, NULL);
2216 
2217 	/* Intel ICH8 AHCI */
2218 	slots = sc->port[0].ioqsz;
2219 	if (slots > 32)
2220 		slots = 32;
2221 	--slots;
2222 	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2223 	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2224 	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2225 	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2226 	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2227 
2228 	/* Only port 0 implemented */
2229 	sc->pi = 1;
2230 	sc->vs = 0x10300;
2231 	sc->cap2 = AHCI_CAP2_APST;
2232 	ahci_reset(sc);
2233 
2234 	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2235 	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2236 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2237 	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2238 	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2239 	pci_emul_add_msicap(pi, 1);
2240 	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2241 	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2242 
2243 	pci_lintr_request(pi);
2244 
2245 open_fail:
2246 	if (ret) {
2247 		if (sc->port[0].bctx != NULL)
2248 			blockif_close(sc->port[0].bctx);
2249 		free(sc);
2250 	}
2251 
2252 	return (ret);
2253 }
2254 
2255 static int
2256 pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2257 {
2258 
2259 	return (pci_ahci_init(ctx, pi, opts, 0));
2260 }
2261 
2262 static int
2263 pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2264 {
2265 
2266 	return (pci_ahci_init(ctx, pi, opts, 1));
2267 }
2268 
2269 /*
2270  * Use separate emulation names to distinguish drive and atapi devices
2271  */
2272 struct pci_devemu pci_de_ahci_hd = {
2273 	.pe_emu =	"ahci-hd",
2274 	.pe_init =	pci_ahci_hd_init,
2275 	.pe_barwrite =	pci_ahci_write,
2276 	.pe_barread =	pci_ahci_read
2277 };
2278 PCI_EMUL_SET(pci_de_ahci_hd);
2279 
2280 struct pci_devemu pci_de_ahci_cd = {
2281 	.pe_emu =	"ahci-cd",
2282 	.pe_init =	pci_ahci_atapi_init,
2283 	.pe_barwrite =	pci_ahci_write,
2284 	.pe_barread =	pci_ahci_read
2285 };
2286 PCI_EMUL_SET(pci_de_ahci_cd);
2287