xref: /freebsd/usr.sbin/bhyve/pci_ahci.c (revision bd66c1b43e33540205dbc1187c2f2a15c58b57ba)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
5  * Copyright (c) 2015-2016 Alexander Motin <mav@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/param.h>
31 #include <sys/linker_set.h>
32 #include <sys/stat.h>
33 #include <sys/uio.h>
34 #include <sys/ioctl.h>
35 #include <sys/disk.h>
36 #include <sys/ata.h>
37 #include <sys/endian.h>
38 
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <stdint.h>
44 #include <string.h>
45 #include <strings.h>
46 #include <unistd.h>
47 #include <assert.h>
48 #include <pthread.h>
49 #include <pthread_np.h>
50 #include <inttypes.h>
51 #include <md5.h>
52 
53 #include "bhyverun.h"
54 #include "config.h"
55 #include "debug.h"
56 #include "pci_emul.h"
57 #ifdef BHYVE_SNAPSHOT
58 #include "snapshot.h"
59 #endif
60 #include "ahci.h"
61 #include "block_if.h"
62 
63 #define	DEF_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
64 #define	MAX_PORTS	32	/* AHCI supports 32 ports */
65 
66 #define	PxSIG_ATA	0x00000101 /* ATA drive */
67 #define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
68 
69 enum sata_fis_type {
70 	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
71 	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
72 	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
73 	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
74 	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
75 	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
76 	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
77 	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
78 };
79 
80 /*
81  * SCSI opcodes
82  */
83 #define	TEST_UNIT_READY		0x00
84 #define	REQUEST_SENSE		0x03
85 #define	INQUIRY			0x12
86 #define	START_STOP_UNIT		0x1B
87 #define	PREVENT_ALLOW		0x1E
88 #define	READ_CAPACITY		0x25
89 #define	READ_10			0x28
90 #define	POSITION_TO_ELEMENT	0x2B
91 #define	READ_TOC		0x43
92 #define	GET_EVENT_STATUS_NOTIFICATION 0x4A
93 #define	MODE_SENSE_10		0x5A
94 #define	REPORT_LUNS		0xA0
95 #define	READ_12			0xA8
96 #define	READ_CD			0xBE
97 
98 /*
99  * SCSI mode page codes
100  */
101 #define	MODEPAGE_RW_ERROR_RECOVERY	0x01
102 #define	MODEPAGE_CD_CAPABILITIES	0x2A
103 
104 /*
105  * ATA commands
106  */
107 #define	ATA_SF_ENAB_SATA_SF		0x10
108 #define	ATA_SATA_SF_AN			0x05
109 #define	ATA_SF_DIS_SATA_SF		0x90
110 
111 /*
112  * Debug printf
113  */
114 #ifdef AHCI_DEBUG
115 static FILE *dbg;
116 #define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
117 #else
118 #define DPRINTF(format, arg...)
119 #endif
120 
121 #define AHCI_PORT_IDENT 20 + 1
122 
123 struct ahci_ioreq {
124 	struct blockif_req io_req;
125 	struct ahci_port *io_pr;
126 	STAILQ_ENTRY(ahci_ioreq) io_flist;
127 	TAILQ_ENTRY(ahci_ioreq) io_blist;
128 	uint8_t *cfis;
129 	uint32_t len;
130 	uint32_t done;
131 	int slot;
132 	int more;
133 	int readop;
134 };
135 
136 struct ahci_port {
137 	struct blockif_ctxt *bctx;
138 	struct pci_ahci_softc *pr_sc;
139 	struct ata_params ata_ident;
140 	uint8_t *cmd_lst;
141 	uint8_t *rfis;
142 	int port;
143 	int atapi;
144 	int reset;
145 	int waitforclear;
146 	int mult_sectors;
147 	uint8_t xfermode;
148 	uint8_t err_cfis[20];
149 	uint8_t sense_key;
150 	uint8_t asc;
151 	u_int ccs;
152 	uint32_t pending;
153 
154 	uint32_t clb;
155 	uint32_t clbu;
156 	uint32_t fb;
157 	uint32_t fbu;
158 	uint32_t is;
159 	uint32_t ie;
160 	uint32_t cmd;
161 	uint32_t unused0;
162 	uint32_t tfd;
163 	uint32_t sig;
164 	uint32_t ssts;
165 	uint32_t sctl;
166 	uint32_t serr;
167 	uint32_t sact;
168 	uint32_t ci;
169 	uint32_t sntf;
170 	uint32_t fbs;
171 
172 	/*
173 	 * i/o request info
174 	 */
175 	struct ahci_ioreq *ioreq;
176 	int ioqsz;
177 	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
178 	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
179 };
180 
181 struct ahci_cmd_hdr {
182 	uint16_t flags;
183 	uint16_t prdtl;
184 	uint32_t prdbc;
185 	uint64_t ctba;
186 	uint32_t reserved[4];
187 };
188 
189 struct ahci_prdt_entry {
190 	uint64_t dba;
191 	uint32_t reserved;
192 #define	DBCMASK		0x3fffff
193 	uint32_t dbc;
194 };
195 
196 struct pci_ahci_softc {
197 	struct pci_devinst *asc_pi;
198 	pthread_mutex_t	mtx;
199 	int ports;
200 	uint32_t cap;
201 	uint32_t ghc;
202 	uint32_t is;
203 	uint32_t pi;
204 	uint32_t vs;
205 	uint32_t ccc_ctl;
206 	uint32_t ccc_pts;
207 	uint32_t em_loc;
208 	uint32_t em_ctl;
209 	uint32_t cap2;
210 	uint32_t bohc;
211 	uint32_t lintr;
212 	struct ahci_port port[MAX_PORTS];
213 };
214 #define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
215 
216 static void ahci_handle_port(struct ahci_port *p);
217 
218 static inline void lba_to_msf(uint8_t *buf, int lba)
219 {
220 	lba += 150;
221 	buf[0] = (lba / 75) / 60;
222 	buf[1] = (lba / 75) % 60;
223 	buf[2] = lba % 75;
224 }
225 
226 /*
227  * Generate HBA interrupts on global IS register write.
228  */
229 static void
230 ahci_generate_intr(struct pci_ahci_softc *sc, uint32_t mask)
231 {
232 	struct pci_devinst *pi = sc->asc_pi;
233 	struct ahci_port *p;
234 	int i, nmsg;
235 	uint32_t mmask;
236 
237 	/* Update global IS from PxIS/PxIE. */
238 	for (i = 0; i < sc->ports; i++) {
239 		p = &sc->port[i];
240 		if (p->is & p->ie)
241 			sc->is |= (1 << i);
242 	}
243 	DPRINTF("%s(%08x) %08x", __func__, mask, sc->is);
244 
245 	/* If there is nothing enabled -- clear legacy interrupt and exit. */
246 	if (sc->is == 0 || (sc->ghc & AHCI_GHC_IE) == 0) {
247 		if (sc->lintr) {
248 			pci_lintr_deassert(pi);
249 			sc->lintr = 0;
250 		}
251 		return;
252 	}
253 
254 	/* If there is anything and no MSI -- assert legacy interrupt. */
255 	nmsg = pci_msi_maxmsgnum(pi);
256 	if (nmsg == 0) {
257 		if (!sc->lintr) {
258 			sc->lintr = 1;
259 			pci_lintr_assert(pi);
260 		}
261 		return;
262 	}
263 
264 	/* Assert respective MSIs for ports that were touched. */
265 	for (i = 0; i < nmsg; i++) {
266 		if (sc->ports <= nmsg || i < nmsg - 1)
267 			mmask = 1 << i;
268 		else
269 			mmask = 0xffffffff << i;
270 		if (sc->is & mask && mmask & mask)
271 			pci_generate_msi(pi, i);
272 	}
273 }
274 
275 /*
276  * Generate HBA interrupt on specific port event.
277  */
278 static void
279 ahci_port_intr(struct ahci_port *p)
280 {
281 	struct pci_ahci_softc *sc = p->pr_sc;
282 	struct pci_devinst *pi = sc->asc_pi;
283 	int nmsg;
284 
285 	DPRINTF("%s(%d) %08x/%08x %08x", __func__,
286 	    p->port, p->is, p->ie, sc->is);
287 
288 	/* If there is nothing enabled -- we are done. */
289 	if ((p->is & p->ie) == 0)
290 		return;
291 
292 	/* In case of non-shared MSI always generate interrupt. */
293 	nmsg = pci_msi_maxmsgnum(pi);
294 	if (sc->ports <= nmsg || p->port < nmsg - 1) {
295 		sc->is |= (1 << p->port);
296 		if ((sc->ghc & AHCI_GHC_IE) == 0)
297 			return;
298 		pci_generate_msi(pi, p->port);
299 		return;
300 	}
301 
302 	/* If IS for this port is already set -- do nothing. */
303 	if (sc->is & (1 << p->port))
304 		return;
305 
306 	sc->is |= (1 << p->port);
307 
308 	/* If interrupts are enabled -- generate one. */
309 	if ((sc->ghc & AHCI_GHC_IE) == 0)
310 		return;
311 	if (nmsg > 0) {
312 		pci_generate_msi(pi, nmsg - 1);
313 	} else if (!sc->lintr) {
314 		sc->lintr = 1;
315 		pci_lintr_assert(pi);
316 	}
317 }
318 
319 static void
320 ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
321 {
322 	int offset, len, irq;
323 
324 	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
325 		return;
326 
327 	switch (ft) {
328 	case FIS_TYPE_REGD2H:
329 		offset = 0x40;
330 		len = 20;
331 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0;
332 		break;
333 	case FIS_TYPE_SETDEVBITS:
334 		offset = 0x58;
335 		len = 8;
336 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0;
337 		break;
338 	case FIS_TYPE_PIOSETUP:
339 		offset = 0x20;
340 		len = 20;
341 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0;
342 		break;
343 	default:
344 		EPRINTLN("unsupported fis type %d", ft);
345 		return;
346 	}
347 	if (fis[2] & ATA_S_ERROR) {
348 		p->waitforclear = 1;
349 		irq |= AHCI_P_IX_TFE;
350 	}
351 	memcpy(p->rfis + offset, fis, len);
352 	if (irq) {
353 		if (~p->is & irq) {
354 			p->is |= irq;
355 			ahci_port_intr(p);
356 		}
357 	}
358 }
359 
360 static void
361 ahci_write_fis_piosetup(struct ahci_port *p)
362 {
363 	uint8_t fis[20];
364 
365 	memset(fis, 0, sizeof(fis));
366 	fis[0] = FIS_TYPE_PIOSETUP;
367 	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
368 }
369 
370 static void
371 ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
372 {
373 	uint8_t fis[8];
374 	uint8_t error;
375 
376 	error = (tfd >> 8) & 0xff;
377 	tfd &= 0x77;
378 	memset(fis, 0, sizeof(fis));
379 	fis[0] = FIS_TYPE_SETDEVBITS;
380 	fis[1] = (1 << 6);
381 	fis[2] = tfd;
382 	fis[3] = error;
383 	if (fis[2] & ATA_S_ERROR) {
384 		p->err_cfis[0] = slot;
385 		p->err_cfis[2] = tfd;
386 		p->err_cfis[3] = error;
387 		memcpy(&p->err_cfis[4], cfis + 4, 16);
388 	} else {
389 		*(uint32_t *)(fis + 4) = (1 << slot);
390 		p->sact &= ~(1 << slot);
391 	}
392 	p->tfd &= ~0x77;
393 	p->tfd |= tfd;
394 	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
395 }
396 
397 static void
398 ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
399 {
400 	uint8_t fis[20];
401 	uint8_t error;
402 
403 	error = (tfd >> 8) & 0xff;
404 	memset(fis, 0, sizeof(fis));
405 	fis[0] = FIS_TYPE_REGD2H;
406 	fis[1] = (1 << 6);
407 	fis[2] = tfd & 0xff;
408 	fis[3] = error;
409 	fis[4] = cfis[4];
410 	fis[5] = cfis[5];
411 	fis[6] = cfis[6];
412 	fis[7] = cfis[7];
413 	fis[8] = cfis[8];
414 	fis[9] = cfis[9];
415 	fis[10] = cfis[10];
416 	fis[11] = cfis[11];
417 	fis[12] = cfis[12];
418 	fis[13] = cfis[13];
419 	if (fis[2] & ATA_S_ERROR) {
420 		p->err_cfis[0] = 0x80;
421 		p->err_cfis[2] = tfd & 0xff;
422 		p->err_cfis[3] = error;
423 		memcpy(&p->err_cfis[4], cfis + 4, 16);
424 	} else
425 		p->ci &= ~(1 << slot);
426 	p->tfd = tfd;
427 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
428 }
429 
430 static void
431 ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot)
432 {
433 	uint8_t fis[20];
434 
435 	p->tfd = ATA_S_READY | ATA_S_DSC;
436 	memset(fis, 0, sizeof(fis));
437 	fis[0] = FIS_TYPE_REGD2H;
438 	fis[1] = 0;			/* No interrupt */
439 	fis[2] = p->tfd;		/* Status */
440 	fis[3] = 0;			/* No error */
441 	p->ci &= ~(1 << slot);
442 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
443 }
444 
445 static void
446 ahci_write_reset_fis_d2h(struct ahci_port *p)
447 {
448 	uint8_t fis[20];
449 
450 	memset(fis, 0, sizeof(fis));
451 	fis[0] = FIS_TYPE_REGD2H;
452 	fis[3] = 1;
453 	fis[4] = 1;
454 	if (p->atapi) {
455 		fis[5] = 0x14;
456 		fis[6] = 0xeb;
457 	}
458 	fis[12] = 1;
459 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
460 }
461 
462 static void
463 ahci_check_stopped(struct ahci_port *p)
464 {
465 	/*
466 	 * If we are no longer processing the command list and nothing
467 	 * is in-flight, clear the running bit, the current command
468 	 * slot, the command issue and active bits.
469 	 */
470 	if (!(p->cmd & AHCI_P_CMD_ST)) {
471 		if (p->pending == 0) {
472 			p->ccs = 0;
473 			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
474 			p->ci = 0;
475 			p->sact = 0;
476 			p->waitforclear = 0;
477 		}
478 	}
479 }
480 
481 static void
482 ahci_port_stop(struct ahci_port *p)
483 {
484 	struct ahci_ioreq *aior;
485 	uint8_t *cfis;
486 	int slot;
487 	int error;
488 
489 	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
490 
491 	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
492 		/*
493 		 * Try to cancel the outstanding blockif request.
494 		 */
495 		error = blockif_cancel(p->bctx, &aior->io_req);
496 		if (error != 0)
497 			continue;
498 
499 		slot = aior->slot;
500 		cfis = aior->cfis;
501 		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
502 		    cfis[2] == ATA_READ_FPDMA_QUEUED ||
503 		    cfis[2] == ATA_SEND_FPDMA_QUEUED)
504 			p->sact &= ~(1 << slot);	/* NCQ */
505 		else
506 			p->ci &= ~(1 << slot);
507 
508 		/*
509 		 * This command is now done.
510 		 */
511 		p->pending &= ~(1 << slot);
512 
513 		/*
514 		 * Delete the blockif request from the busy list
515 		 */
516 		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
517 
518 		/*
519 		 * Move the blockif request back to the free list
520 		 */
521 		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
522 	}
523 
524 	ahci_check_stopped(p);
525 }
526 
527 static void
528 ahci_port_reset(struct ahci_port *pr)
529 {
530 	pr->serr = 0;
531 	pr->sact = 0;
532 	pr->xfermode = ATA_UDMA6;
533 	pr->mult_sectors = 128;
534 
535 	if (!pr->bctx) {
536 		pr->ssts = ATA_SS_DET_NO_DEVICE;
537 		pr->sig = 0xFFFFFFFF;
538 		pr->tfd = 0x7F;
539 		return;
540 	}
541 	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
542 	if (pr->sctl & ATA_SC_SPD_MASK)
543 		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
544 	else
545 		pr->ssts |= ATA_SS_SPD_GEN3;
546 	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
547 	if (!pr->atapi) {
548 		pr->sig = PxSIG_ATA;
549 		pr->tfd |= ATA_S_READY;
550 	} else
551 		pr->sig = PxSIG_ATAPI;
552 	ahci_write_reset_fis_d2h(pr);
553 }
554 
555 static void
556 ahci_reset(struct pci_ahci_softc *sc)
557 {
558 	int i;
559 
560 	sc->ghc = AHCI_GHC_AE;
561 	sc->is = 0;
562 
563 	if (sc->lintr) {
564 		pci_lintr_deassert(sc->asc_pi);
565 		sc->lintr = 0;
566 	}
567 
568 	for (i = 0; i < sc->ports; i++) {
569 		sc->port[i].ie = 0;
570 		sc->port[i].is = 0;
571 		sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD);
572 		if (sc->port[i].bctx)
573 			sc->port[i].cmd |= AHCI_P_CMD_CPS;
574 		sc->port[i].sctl = 0;
575 		ahci_port_reset(&sc->port[i]);
576 	}
577 }
578 
579 static void
580 ata_string(uint8_t *dest, const char *src, int len)
581 {
582 	int i;
583 
584 	for (i = 0; i < len; i++) {
585 		if (*src)
586 			dest[i ^ 1] = *src++;
587 		else
588 			dest[i ^ 1] = ' ';
589 	}
590 }
591 
592 static void
593 atapi_string(uint8_t *dest, const char *src, int len)
594 {
595 	int i;
596 
597 	for (i = 0; i < len; i++) {
598 		if (*src)
599 			dest[i] = *src++;
600 		else
601 			dest[i] = ' ';
602 	}
603 }
604 
605 /*
606  * Build up the iovec based on the PRDT, 'done' and 'len'.
607  */
608 static void
609 ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
610     struct ahci_prdt_entry *prdt, uint16_t prdtl)
611 {
612 	struct blockif_req *breq = &aior->io_req;
613 	uint32_t dbcsz, extra, left, skip, todo;
614 	int i, j;
615 
616 	assert(aior->len >= aior->done);
617 
618 	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
619 	skip = aior->done;
620 	left = aior->len - aior->done;
621 	todo = 0;
622 	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
623 	    i++, prdt++) {
624 		dbcsz = (prdt->dbc & DBCMASK) + 1;
625 		/* Skip already done part of the PRDT */
626 		if (dbcsz <= skip) {
627 			skip -= dbcsz;
628 			continue;
629 		}
630 		dbcsz -= skip;
631 		if (dbcsz > left)
632 			dbcsz = left;
633 		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
634 		    prdt->dba + skip, dbcsz);
635 		breq->br_iov[j].iov_len = dbcsz;
636 		todo += dbcsz;
637 		left -= dbcsz;
638 		skip = 0;
639 		j++;
640 	}
641 
642 	/* If we got limited by IOV length, round I/O down to sector size. */
643 	if (j == BLOCKIF_IOV_MAX) {
644 		extra = todo % blockif_sectsz(p->bctx);
645 		todo -= extra;
646 		assert(todo > 0);
647 		while (extra > 0) {
648 			if (breq->br_iov[j - 1].iov_len > extra) {
649 				breq->br_iov[j - 1].iov_len -= extra;
650 				break;
651 			}
652 			extra -= breq->br_iov[j - 1].iov_len;
653 			j--;
654 		}
655 	}
656 
657 	breq->br_iovcnt = j;
658 	breq->br_resid = todo;
659 	aior->done += todo;
660 	aior->more = (aior->done < aior->len && i < prdtl);
661 }
662 
663 static void
664 ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
665 {
666 	struct ahci_ioreq *aior;
667 	struct blockif_req *breq;
668 	struct ahci_prdt_entry *prdt;
669 	struct ahci_cmd_hdr *hdr;
670 	uint64_t lba;
671 	uint32_t len;
672 	int err, first, ncq, readop;
673 
674 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
675 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
676 	ncq = 0;
677 	readop = 1;
678 	first = (done == 0);
679 
680 	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
681 	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
682 	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
683 	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
684 		readop = 0;
685 
686 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
687 	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
688 		lba = ((uint64_t)cfis[10] << 40) |
689 			((uint64_t)cfis[9] << 32) |
690 			((uint64_t)cfis[8] << 24) |
691 			((uint64_t)cfis[6] << 16) |
692 			((uint64_t)cfis[5] << 8) |
693 			cfis[4];
694 		len = cfis[11] << 8 | cfis[3];
695 		if (!len)
696 			len = 65536;
697 		ncq = 1;
698 	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
699 	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
700 	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
701 		lba = ((uint64_t)cfis[10] << 40) |
702 			((uint64_t)cfis[9] << 32) |
703 			((uint64_t)cfis[8] << 24) |
704 			((uint64_t)cfis[6] << 16) |
705 			((uint64_t)cfis[5] << 8) |
706 			cfis[4];
707 		len = cfis[13] << 8 | cfis[12];
708 		if (!len)
709 			len = 65536;
710 	} else {
711 		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
712 			(cfis[5] << 8) | cfis[4];
713 		len = cfis[12];
714 		if (!len)
715 			len = 256;
716 	}
717 	lba *= blockif_sectsz(p->bctx);
718 	len *= blockif_sectsz(p->bctx);
719 
720 	/* Pull request off free list */
721 	aior = STAILQ_FIRST(&p->iofhd);
722 	assert(aior != NULL);
723 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
724 
725 	aior->cfis = cfis;
726 	aior->slot = slot;
727 	aior->len = len;
728 	aior->done = done;
729 	aior->readop = readop;
730 	breq = &aior->io_req;
731 	breq->br_offset = lba + done;
732 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
733 
734 	/* Mark this command in-flight. */
735 	p->pending |= 1 << slot;
736 
737 	/* Stuff request onto busy list. */
738 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
739 
740 	if (ncq && first)
741 		ahci_write_fis_d2h_ncq(p, slot);
742 
743 	if (readop)
744 		err = blockif_read(p->bctx, breq);
745 	else
746 		err = blockif_write(p->bctx, breq);
747 	assert(err == 0);
748 }
749 
750 static void
751 ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
752 {
753 	struct ahci_ioreq *aior;
754 	struct blockif_req *breq;
755 	int err;
756 
757 	/*
758 	 * Pull request off free list
759 	 */
760 	aior = STAILQ_FIRST(&p->iofhd);
761 	assert(aior != NULL);
762 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
763 	aior->cfis = cfis;
764 	aior->slot = slot;
765 	aior->len = 0;
766 	aior->done = 0;
767 	aior->more = 0;
768 	breq = &aior->io_req;
769 
770 	/*
771 	 * Mark this command in-flight.
772 	 */
773 	p->pending |= 1 << slot;
774 
775 	/*
776 	 * Stuff request onto busy list
777 	 */
778 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
779 
780 	err = blockif_flush(p->bctx, breq);
781 	assert(err == 0);
782 }
783 
784 static inline unsigned int
785 read_prdt(struct ahci_port *p, int slot, uint8_t *cfis, void *buf,
786     unsigned int size)
787 {
788 	struct ahci_cmd_hdr *hdr;
789 	struct ahci_prdt_entry *prdt;
790 	uint8_t *to;
791 	unsigned int len;
792 	int i;
793 
794 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
795 	len = size;
796 	to = buf;
797 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
798 	for (i = 0; i < hdr->prdtl && len; i++) {
799 		uint8_t *ptr;
800 		uint32_t dbcsz;
801 		unsigned int sublen;
802 
803 		dbcsz = (prdt->dbc & DBCMASK) + 1;
804 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
805 		sublen = MIN(len, dbcsz);
806 		memcpy(to, ptr, sublen);
807 		len -= sublen;
808 		to += sublen;
809 		prdt++;
810 	}
811 	return (size - len);
812 }
813 
814 static void
815 ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
816 {
817 	struct ahci_ioreq *aior;
818 	struct blockif_req *breq;
819 	uint8_t *entry;
820 	uint64_t elba;
821 	uint32_t len, elen;
822 	int err, first, ncq;
823 	uint8_t buf[512];
824 	unsigned int written;
825 
826 	first = (done == 0);
827 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
828 		len = (uint16_t)cfis[13] << 8 | cfis[12];
829 		len *= 512;
830 		ncq = 0;
831 	} else { /* ATA_SEND_FPDMA_QUEUED */
832 		len = (uint16_t)cfis[11] << 8 | cfis[3];
833 		len *= 512;
834 		ncq = 1;
835 	}
836 	written = read_prdt(p, slot, cfis, buf, sizeof(buf));
837 	memset(buf + written, 0, sizeof(buf) - written);
838 
839 next:
840 	if (done >= sizeof(buf) - 8)
841 		return;
842 	entry = &buf[done];
843 	elba = ((uint64_t)entry[5] << 40) |
844 		((uint64_t)entry[4] << 32) |
845 		((uint64_t)entry[3] << 24) |
846 		((uint64_t)entry[2] << 16) |
847 		((uint64_t)entry[1] << 8) |
848 		entry[0];
849 	elen = (uint16_t)entry[7] << 8 | entry[6];
850 	done += 8;
851 	if (elen == 0) {
852 		if (done >= len) {
853 			if (ncq) {
854 				if (first)
855 					ahci_write_fis_d2h_ncq(p, slot);
856 				ahci_write_fis_sdb(p, slot, cfis,
857 				    ATA_S_READY | ATA_S_DSC);
858 			} else {
859 				ahci_write_fis_d2h(p, slot, cfis,
860 				    ATA_S_READY | ATA_S_DSC);
861 			}
862 			p->pending &= ~(1 << slot);
863 			ahci_check_stopped(p);
864 			if (!first)
865 				ahci_handle_port(p);
866 			return;
867 		}
868 		goto next;
869 	}
870 
871 	/*
872 	 * Pull request off free list
873 	 */
874 	aior = STAILQ_FIRST(&p->iofhd);
875 	assert(aior != NULL);
876 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
877 	aior->cfis = cfis;
878 	aior->slot = slot;
879 	aior->len = len;
880 	aior->done = done;
881 	aior->more = (len != done);
882 
883 	breq = &aior->io_req;
884 	breq->br_offset = elba * blockif_sectsz(p->bctx);
885 	breq->br_resid = elen * blockif_sectsz(p->bctx);
886 
887 	/*
888 	 * Mark this command in-flight.
889 	 */
890 	p->pending |= 1 << slot;
891 
892 	/*
893 	 * Stuff request onto busy list
894 	 */
895 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
896 
897 	if (ncq && first)
898 		ahci_write_fis_d2h_ncq(p, slot);
899 
900 	err = blockif_delete(p->bctx, breq);
901 	assert(err == 0);
902 }
903 
904 static inline void
905 write_prdt(struct ahci_port *p, int slot, uint8_t *cfis, void *buf,
906     unsigned int size)
907 {
908 	struct ahci_cmd_hdr *hdr;
909 	struct ahci_prdt_entry *prdt;
910 	uint8_t *from;
911 	unsigned int len;
912 	int i;
913 
914 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
915 	len = size;
916 	from = buf;
917 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
918 	for (i = 0; i < hdr->prdtl && len; i++) {
919 		uint8_t *ptr;
920 		uint32_t dbcsz;
921 		int sublen;
922 
923 		dbcsz = (prdt->dbc & DBCMASK) + 1;
924 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
925 		sublen = MIN(len, dbcsz);
926 		memcpy(ptr, from, sublen);
927 		len -= sublen;
928 		from += sublen;
929 		prdt++;
930 	}
931 	hdr->prdbc = size - len;
932 }
933 
934 static void
935 ahci_checksum(uint8_t *buf, int size)
936 {
937 	int i;
938 	uint8_t sum = 0;
939 
940 	for (i = 0; i < size - 1; i++)
941 		sum += buf[i];
942 	buf[size - 1] = 0x100 - sum;
943 }
944 
945 static void
946 ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
947 {
948 	struct ahci_cmd_hdr *hdr;
949 	uint32_t buf[128];
950 	uint8_t *buf8 = (uint8_t *)buf;
951 	uint16_t *buf16 = (uint16_t *)buf;
952 
953 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
954 	if (p->atapi || hdr->prdtl == 0 || cfis[5] != 0 ||
955 	    cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
956 		ahci_write_fis_d2h(p, slot, cfis,
957 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
958 		return;
959 	}
960 
961 	memset(buf, 0, sizeof(buf));
962 	if (cfis[4] == 0x00) {	/* Log directory */
963 		buf16[0x00] = 1; /* Version -- 1 */
964 		buf16[0x10] = 1; /* NCQ Command Error Log -- 1 page */
965 		buf16[0x13] = 1; /* SATA NCQ Send and Receive Log -- 1 page */
966 	} else if (cfis[4] == 0x10) {	/* NCQ Command Error Log */
967 		memcpy(buf8, p->err_cfis, sizeof(p->err_cfis));
968 		ahci_checksum(buf8, sizeof(buf));
969 	} else if (cfis[4] == 0x13) {	/* SATA NCQ Send and Receive Log */
970 		if (blockif_candelete(p->bctx) && !blockif_is_ro(p->bctx)) {
971 			buf[0x00] = 1;	/* SFQ DSM supported */
972 			buf[0x01] = 1;	/* SFQ DSM TRIM supported */
973 		}
974 	} else {
975 		ahci_write_fis_d2h(p, slot, cfis,
976 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
977 		return;
978 	}
979 
980 	if (cfis[2] == ATA_READ_LOG_EXT)
981 		ahci_write_fis_piosetup(p);
982 	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
983 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
984 }
985 
986 static void
987 handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
988 {
989 	struct ahci_cmd_hdr *hdr;
990 
991 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
992 	if (p->atapi || hdr->prdtl == 0) {
993 		ahci_write_fis_d2h(p, slot, cfis,
994 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
995 	} else {
996 		ahci_write_fis_piosetup(p);
997 		write_prdt(p, slot, cfis, (void*)&p->ata_ident, sizeof(struct ata_params));
998 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
999 	}
1000 }
1001 
1002 static void
1003 ata_identify_init(struct ahci_port* p, int atapi)
1004 {
1005 	struct ata_params* ata_ident = &p->ata_ident;
1006 
1007 	if (atapi) {
1008 		ata_ident->config = ATA_PROTO_ATAPI | ATA_ATAPI_TYPE_CDROM |
1009 		    ATA_ATAPI_REMOVABLE | ATA_DRQ_FAST;
1010 		ata_ident->capabilities1 = ATA_SUPPORT_LBA |
1011 			ATA_SUPPORT_DMA;
1012 		ata_ident->capabilities2 = (1 << 14 | 1);
1013 		ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88;
1014 		ata_ident->obsolete62 = 0x3f;
1015 		ata_ident->mwdmamodes = 7;
1016 		if (p->xfermode & ATA_WDMA0)
1017 			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1018 		ata_ident->apiomodes = 3;
1019 		ata_ident->mwdmamin = 0x0078;
1020 		ata_ident->mwdmarec = 0x0078;
1021 		ata_ident->pioblind = 0x0078;
1022 		ata_ident->pioiordy = 0x0078;
1023 		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
1024 		ata_ident->satacapabilities2 = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
1025 		ata_ident->satasupport = ATA_SUPPORT_NCQ_STREAM;
1026 		ata_ident->version_major = 0x3f0;
1027 		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1028 			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1029 		ata_ident->support.command2 = (1 << 14);
1030 		ata_ident->support.extension = (1 << 14);
1031 		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1032 			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1033 		ata_ident->enabled.extension = (1 << 14);
1034 		ata_ident->udmamodes = 0x7f;
1035 		if (p->xfermode & ATA_UDMA0)
1036 			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1037 		ata_ident->transport_major = 0x1020;
1038 		ata_ident->integrity = 0x00a5;
1039 	} else {
1040 		uint64_t sectors;
1041 		int sectsz, psectsz, psectoff, candelete, ro;
1042 		uint16_t cyl;
1043 		uint8_t sech, heads;
1044 
1045 		ro = blockif_is_ro(p->bctx);
1046 		candelete = blockif_candelete(p->bctx);
1047 		sectsz = blockif_sectsz(p->bctx);
1048 		sectors = blockif_size(p->bctx) / sectsz;
1049 		blockif_chs(p->bctx, &cyl, &heads, &sech);
1050 		blockif_psectsz(p->bctx, &psectsz, &psectoff);
1051 		ata_ident->config = ATA_DRQ_FAST;
1052 		ata_ident->cylinders = cyl;
1053 		ata_ident->heads = heads;
1054 		ata_ident->sectors = sech;
1055 
1056 		ata_ident->sectors_intr = (0x8000 | 128);
1057 		ata_ident->tcg = 0;
1058 
1059 		ata_ident->capabilities1 = ATA_SUPPORT_DMA |
1060 			ATA_SUPPORT_LBA | ATA_SUPPORT_IORDY;
1061 		ata_ident->capabilities2 = (1 << 14);
1062 		ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88;
1063 		if (p->mult_sectors)
1064 			ata_ident->multi = (ATA_MULTI_VALID | p->mult_sectors);
1065 		if (sectors <= 0x0fffffff) {
1066 			ata_ident->lba_size_1 = sectors;
1067 			ata_ident->lba_size_2 = (sectors >> 16);
1068 		} else {
1069 			ata_ident->lba_size_1 = 0xffff;
1070 			ata_ident->lba_size_2 = 0x0fff;
1071 		}
1072 		ata_ident->mwdmamodes = 0x7;
1073 		if (p->xfermode & ATA_WDMA0)
1074 			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1075 		ata_ident->apiomodes = 0x3;
1076 		ata_ident->mwdmamin = 0x0078;
1077 		ata_ident->mwdmarec = 0x0078;
1078 		ata_ident->pioblind = 0x0078;
1079 		ata_ident->pioiordy = 0x0078;
1080 		ata_ident->support3 = 0;
1081 		ata_ident->queue = 31;
1082 		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
1083 			ATA_SUPPORT_NCQ);
1084 		ata_ident->satacapabilities2 = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
1085 			(p->ssts & ATA_SS_SPD_MASK) >> 3);
1086 		ata_ident->version_major = 0x3f0;
1087 		ata_ident->version_minor = 0x28;
1088 		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1089 			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1090 		ata_ident->support.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1091 			ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
1092 		ata_ident->support.extension = (1 << 14);
1093 		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1094 			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1095 		ata_ident->enabled.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1096 			ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
1097 		ata_ident->enabled.extension = (1 << 14);
1098 		ata_ident->udmamodes = 0x7f;
1099 		if (p->xfermode & ATA_UDMA0)
1100 			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1101 		ata_ident->lba_size48_1 = sectors;
1102 		ata_ident->lba_size48_2 = (sectors >> 16);
1103 		ata_ident->lba_size48_3 = (sectors >> 32);
1104 		ata_ident->lba_size48_4 = (sectors >> 48);
1105 
1106 		if (candelete && !ro) {
1107 			ata_ident->support3 |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
1108 			ata_ident->max_dsm_blocks = 1;
1109 			ata_ident->support_dsm = ATA_SUPPORT_DSM_TRIM;
1110 		}
1111 		ata_ident->pss = ATA_PSS_VALID_VALUE;
1112 		ata_ident->lsalign = 0x4000;
1113 		if (psectsz > sectsz) {
1114 			ata_ident->pss |= ATA_PSS_MULTLS;
1115 			ata_ident->pss |= ffsl(psectsz / sectsz) - 1;
1116 			ata_ident->lsalign |= (psectoff / sectsz);
1117 		}
1118 		if (sectsz > 512) {
1119 			ata_ident->pss |= ATA_PSS_LSSABOVE512;
1120 			ata_ident->lss_1 = sectsz / 2;
1121 			ata_ident->lss_2 = ((sectsz / 2) >> 16);
1122 		}
1123 		ata_ident->support2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1124 		ata_ident->enabled2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1125 		ata_ident->transport_major = 0x1020;
1126 		ata_ident->integrity = 0x00a5;
1127 	}
1128 	ahci_checksum((uint8_t*)ata_ident, sizeof(struct ata_params));
1129 }
1130 
1131 static void
1132 handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
1133 {
1134 	if (!p->atapi) {
1135 		ahci_write_fis_d2h(p, slot, cfis,
1136 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1137 	} else {
1138 		ahci_write_fis_piosetup(p);
1139 		write_prdt(p, slot, cfis, (void *)&p->ata_ident, sizeof(struct ata_params));
1140 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1141 	}
1142 }
1143 
1144 static void
1145 atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1146 {
1147 	uint8_t buf[36];
1148 	uint8_t *acmd;
1149 	unsigned int len;
1150 	uint32_t tfd;
1151 
1152 	acmd = cfis + 0x40;
1153 
1154 	if (acmd[1] & 1) {		/* VPD */
1155 		if (acmd[2] == 0) {	/* Supported VPD pages */
1156 			buf[0] = 0x05;
1157 			buf[1] = 0;
1158 			buf[2] = 0;
1159 			buf[3] = 1;
1160 			buf[4] = 0;
1161 			len = 4 + buf[3];
1162 		} else {
1163 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1164 			p->asc = 0x24;
1165 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1166 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1167 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1168 			return;
1169 		}
1170 	} else {
1171 		buf[0] = 0x05;
1172 		buf[1] = 0x80;
1173 		buf[2] = 0x00;
1174 		buf[3] = 0x21;
1175 		buf[4] = 31;
1176 		buf[5] = 0;
1177 		buf[6] = 0;
1178 		buf[7] = 0;
1179 		atapi_string(buf + 8, "BHYVE", 8);
1180 		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1181 		atapi_string(buf + 32, "001", 4);
1182 		len = sizeof(buf);
1183 	}
1184 
1185 	if (len > acmd[4])
1186 		len = acmd[4];
1187 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1188 	write_prdt(p, slot, cfis, buf, len);
1189 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1190 }
1191 
1192 static void
1193 atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1194 {
1195 	uint8_t buf[8];
1196 	uint64_t sectors;
1197 
1198 	sectors = blockif_size(p->bctx) / 2048;
1199 	be32enc(buf, sectors - 1);
1200 	be32enc(buf + 4, 2048);
1201 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1202 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1203 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1204 }
1205 
1206 static void
1207 atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1208 {
1209 	uint8_t *acmd;
1210 	uint8_t format;
1211 	unsigned int len;
1212 
1213 	acmd = cfis + 0x40;
1214 
1215 	len = be16dec(acmd + 7);
1216 	format = acmd[9] >> 6;
1217 	switch (format) {
1218 	case 0:
1219 	{
1220 		size_t size;
1221 		int msf;
1222 		uint64_t sectors;
1223 		uint8_t start_track, buf[20], *bp;
1224 
1225 		msf = (acmd[1] >> 1) & 1;
1226 		start_track = acmd[6];
1227 		if (start_track > 1 && start_track != 0xaa) {
1228 			uint32_t tfd;
1229 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1230 			p->asc = 0x24;
1231 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1232 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1233 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1234 			return;
1235 		}
1236 		bp = buf + 2;
1237 		*bp++ = 1;
1238 		*bp++ = 1;
1239 		if (start_track <= 1) {
1240 			*bp++ = 0;
1241 			*bp++ = 0x14;
1242 			*bp++ = 1;
1243 			*bp++ = 0;
1244 			if (msf) {
1245 				*bp++ = 0;
1246 				lba_to_msf(bp, 0);
1247 				bp += 3;
1248 			} else {
1249 				*bp++ = 0;
1250 				*bp++ = 0;
1251 				*bp++ = 0;
1252 				*bp++ = 0;
1253 			}
1254 		}
1255 		*bp++ = 0;
1256 		*bp++ = 0x14;
1257 		*bp++ = 0xaa;
1258 		*bp++ = 0;
1259 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1260 		sectors >>= 2;
1261 		if (msf) {
1262 			*bp++ = 0;
1263 			lba_to_msf(bp, sectors);
1264 			bp += 3;
1265 		} else {
1266 			be32enc(bp, sectors);
1267 			bp += 4;
1268 		}
1269 		size = bp - buf;
1270 		be16enc(buf, size - 2);
1271 		if (len > size)
1272 			len = size;
1273 		write_prdt(p, slot, cfis, buf, len);
1274 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1275 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1276 		break;
1277 	}
1278 	case 1:
1279 	{
1280 		uint8_t buf[12];
1281 
1282 		memset(buf, 0, sizeof(buf));
1283 		buf[1] = 0xa;
1284 		buf[2] = 0x1;
1285 		buf[3] = 0x1;
1286 		if (len > sizeof(buf))
1287 			len = sizeof(buf);
1288 		write_prdt(p, slot, cfis, buf, len);
1289 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1290 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1291 		break;
1292 	}
1293 	case 2:
1294 	{
1295 		size_t size;
1296 		int msf;
1297 		uint64_t sectors;
1298 		uint8_t *bp, buf[50];
1299 
1300 		msf = (acmd[1] >> 1) & 1;
1301 		bp = buf + 2;
1302 		*bp++ = 1;
1303 		*bp++ = 1;
1304 
1305 		*bp++ = 1;
1306 		*bp++ = 0x14;
1307 		*bp++ = 0;
1308 		*bp++ = 0xa0;
1309 		*bp++ = 0;
1310 		*bp++ = 0;
1311 		*bp++ = 0;
1312 		*bp++ = 0;
1313 		*bp++ = 1;
1314 		*bp++ = 0;
1315 		*bp++ = 0;
1316 
1317 		*bp++ = 1;
1318 		*bp++ = 0x14;
1319 		*bp++ = 0;
1320 		*bp++ = 0xa1;
1321 		*bp++ = 0;
1322 		*bp++ = 0;
1323 		*bp++ = 0;
1324 		*bp++ = 0;
1325 		*bp++ = 1;
1326 		*bp++ = 0;
1327 		*bp++ = 0;
1328 
1329 		*bp++ = 1;
1330 		*bp++ = 0x14;
1331 		*bp++ = 0;
1332 		*bp++ = 0xa2;
1333 		*bp++ = 0;
1334 		*bp++ = 0;
1335 		*bp++ = 0;
1336 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1337 		sectors >>= 2;
1338 		if (msf) {
1339 			*bp++ = 0;
1340 			lba_to_msf(bp, sectors);
1341 			bp += 3;
1342 		} else {
1343 			be32enc(bp, sectors);
1344 			bp += 4;
1345 		}
1346 
1347 		*bp++ = 1;
1348 		*bp++ = 0x14;
1349 		*bp++ = 0;
1350 		*bp++ = 1;
1351 		*bp++ = 0;
1352 		*bp++ = 0;
1353 		*bp++ = 0;
1354 		if (msf) {
1355 			*bp++ = 0;
1356 			lba_to_msf(bp, 0);
1357 			bp += 3;
1358 		} else {
1359 			*bp++ = 0;
1360 			*bp++ = 0;
1361 			*bp++ = 0;
1362 			*bp++ = 0;
1363 		}
1364 
1365 		size = bp - buf;
1366 		be16enc(buf, size - 2);
1367 		if (len > size)
1368 			len = size;
1369 		write_prdt(p, slot, cfis, buf, len);
1370 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1371 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1372 		break;
1373 	}
1374 	default:
1375 	{
1376 		uint32_t tfd;
1377 
1378 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1379 		p->asc = 0x24;
1380 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1381 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1382 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1383 		break;
1384 	}
1385 	}
1386 }
1387 
1388 static void
1389 atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1390 {
1391 	uint8_t buf[16];
1392 
1393 	memset(buf, 0, sizeof(buf));
1394 	buf[3] = 8;
1395 
1396 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1397 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1398 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1399 }
1400 
1401 static void
1402 atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1403 {
1404 	struct ahci_ioreq *aior;
1405 	struct ahci_cmd_hdr *hdr;
1406 	struct ahci_prdt_entry *prdt;
1407 	struct blockif_req *breq;
1408 	uint8_t *acmd;
1409 	uint64_t lba;
1410 	uint32_t len;
1411 	int err;
1412 
1413 	acmd = cfis + 0x40;
1414 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1415 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1416 
1417 	lba = be32dec(acmd + 2);
1418 	if (acmd[0] == READ_10)
1419 		len = be16dec(acmd + 7);
1420 	else
1421 		len = be32dec(acmd + 6);
1422 	if (len == 0) {
1423 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1424 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1425 	}
1426 	lba *= 2048;
1427 	len *= 2048;
1428 
1429 	/*
1430 	 * Pull request off free list
1431 	 */
1432 	aior = STAILQ_FIRST(&p->iofhd);
1433 	assert(aior != NULL);
1434 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1435 	aior->cfis = cfis;
1436 	aior->slot = slot;
1437 	aior->len = len;
1438 	aior->done = done;
1439 	aior->readop = 1;
1440 	breq = &aior->io_req;
1441 	breq->br_offset = lba + done;
1442 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1443 
1444 	/* Mark this command in-flight. */
1445 	p->pending |= 1 << slot;
1446 
1447 	/* Stuff request onto busy list. */
1448 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1449 
1450 	err = blockif_read(p->bctx, breq);
1451 	assert(err == 0);
1452 }
1453 
1454 static void
1455 atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1456 {
1457 	uint8_t buf[64];
1458 	uint8_t *acmd;
1459 	unsigned int len;
1460 
1461 	acmd = cfis + 0x40;
1462 	len = acmd[4];
1463 	if (len > sizeof(buf))
1464 		len = sizeof(buf);
1465 	memset(buf, 0, len);
1466 	buf[0] = 0x70 | (1 << 7);
1467 	buf[2] = p->sense_key;
1468 	buf[7] = 10;
1469 	buf[12] = p->asc;
1470 	write_prdt(p, slot, cfis, buf, len);
1471 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1472 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1473 }
1474 
1475 static void
1476 atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1477 {
1478 	uint8_t *acmd = cfis + 0x40;
1479 	uint32_t tfd;
1480 
1481 	switch (acmd[4] & 3) {
1482 	case 0:
1483 	case 1:
1484 	case 3:
1485 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1486 		tfd = ATA_S_READY | ATA_S_DSC;
1487 		break;
1488 	case 2:
1489 		/* TODO eject media */
1490 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1491 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1492 		p->asc = 0x53;
1493 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1494 		break;
1495 	}
1496 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1497 }
1498 
1499 static void
1500 atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1501 {
1502 	uint8_t *acmd;
1503 	uint32_t tfd;
1504 	uint8_t pc, code;
1505 	unsigned int len;
1506 
1507 	acmd = cfis + 0x40;
1508 	len = be16dec(acmd + 7);
1509 	pc = acmd[2] >> 6;
1510 	code = acmd[2] & 0x3f;
1511 
1512 	switch (pc) {
1513 	case 0:
1514 		switch (code) {
1515 		case MODEPAGE_RW_ERROR_RECOVERY:
1516 		{
1517 			uint8_t buf[16];
1518 
1519 			if (len > sizeof(buf))
1520 				len = sizeof(buf);
1521 
1522 			memset(buf, 0, sizeof(buf));
1523 			be16enc(buf, 16 - 2);
1524 			buf[2] = 0x70;
1525 			buf[8] = 0x01;
1526 			buf[9] = 16 - 10;
1527 			buf[11] = 0x05;
1528 			write_prdt(p, slot, cfis, buf, len);
1529 			tfd = ATA_S_READY | ATA_S_DSC;
1530 			break;
1531 		}
1532 		case MODEPAGE_CD_CAPABILITIES:
1533 		{
1534 			uint8_t buf[30];
1535 
1536 			if (len > sizeof(buf))
1537 				len = sizeof(buf);
1538 
1539 			memset(buf, 0, sizeof(buf));
1540 			be16enc(buf, 30 - 2);
1541 			buf[2] = 0x70;
1542 			buf[8] = 0x2A;
1543 			buf[9] = 30 - 10;
1544 			buf[10] = 0x08;
1545 			buf[12] = 0x71;
1546 			be16enc(&buf[18], 2);
1547 			be16enc(&buf[20], 512);
1548 			write_prdt(p, slot, cfis, buf, len);
1549 			tfd = ATA_S_READY | ATA_S_DSC;
1550 			break;
1551 		}
1552 		default:
1553 			goto error;
1554 			break;
1555 		}
1556 		break;
1557 	case 3:
1558 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1559 		p->asc = 0x39;
1560 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1561 		break;
1562 error:
1563 	case 1:
1564 	case 2:
1565 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1566 		p->asc = 0x24;
1567 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1568 		break;
1569 	}
1570 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1571 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1572 }
1573 
1574 static void
1575 atapi_get_event_status_notification(struct ahci_port *p, int slot,
1576     uint8_t *cfis)
1577 {
1578 	uint8_t *acmd;
1579 	uint32_t tfd;
1580 
1581 	acmd = cfis + 0x40;
1582 
1583 	/* we don't support asynchronous operation */
1584 	if (!(acmd[1] & 1)) {
1585 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1586 		p->asc = 0x24;
1587 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1588 	} else {
1589 		uint8_t buf[8];
1590 		unsigned int len;
1591 
1592 		len = be16dec(acmd + 7);
1593 		if (len > sizeof(buf))
1594 			len = sizeof(buf);
1595 
1596 		memset(buf, 0, sizeof(buf));
1597 		be16enc(buf, 8 - 2);
1598 		buf[2] = 0x04;
1599 		buf[3] = 0x10;
1600 		buf[5] = 0x02;
1601 		write_prdt(p, slot, cfis, buf, len);
1602 		tfd = ATA_S_READY | ATA_S_DSC;
1603 	}
1604 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1605 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1606 }
1607 
1608 static void
1609 handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1610 {
1611 	uint8_t *acmd;
1612 
1613 	acmd = cfis + 0x40;
1614 
1615 #ifdef AHCI_DEBUG
1616 	{
1617 		int i;
1618 		DPRINTF("ACMD:");
1619 		for (i = 0; i < 16; i++)
1620 			DPRINTF("%02x ", acmd[i]);
1621 		DPRINTF("");
1622 	}
1623 #endif
1624 
1625 	switch (acmd[0]) {
1626 	case TEST_UNIT_READY:
1627 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1628 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1629 		break;
1630 	case INQUIRY:
1631 		atapi_inquiry(p, slot, cfis);
1632 		break;
1633 	case READ_CAPACITY:
1634 		atapi_read_capacity(p, slot, cfis);
1635 		break;
1636 	case PREVENT_ALLOW:
1637 		/* TODO */
1638 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1639 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1640 		break;
1641 	case READ_TOC:
1642 		atapi_read_toc(p, slot, cfis);
1643 		break;
1644 	case REPORT_LUNS:
1645 		atapi_report_luns(p, slot, cfis);
1646 		break;
1647 	case READ_10:
1648 	case READ_12:
1649 		atapi_read(p, slot, cfis, 0);
1650 		break;
1651 	case REQUEST_SENSE:
1652 		atapi_request_sense(p, slot, cfis);
1653 		break;
1654 	case START_STOP_UNIT:
1655 		atapi_start_stop_unit(p, slot, cfis);
1656 		break;
1657 	case MODE_SENSE_10:
1658 		atapi_mode_sense(p, slot, cfis);
1659 		break;
1660 	case GET_EVENT_STATUS_NOTIFICATION:
1661 		atapi_get_event_status_notification(p, slot, cfis);
1662 		break;
1663 	default:
1664 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1665 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1666 		p->asc = 0x20;
1667 		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1668 				ATA_S_READY | ATA_S_ERROR);
1669 		break;
1670 	}
1671 }
1672 
1673 static void
1674 ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1675 {
1676 
1677 	p->tfd |= ATA_S_BUSY;
1678 	switch (cfis[2]) {
1679 	case ATA_ATA_IDENTIFY:
1680 		handle_identify(p, slot, cfis);
1681 		break;
1682 	case ATA_SETFEATURES:
1683 	{
1684 		switch (cfis[3]) {
1685 		case ATA_SF_ENAB_SATA_SF:
1686 			switch (cfis[12]) {
1687 			case ATA_SATA_SF_AN:
1688 				p->tfd = ATA_S_DSC | ATA_S_READY;
1689 				break;
1690 			default:
1691 				p->tfd = ATA_S_ERROR | ATA_S_READY;
1692 				p->tfd |= (ATA_ERROR_ABORT << 8);
1693 				break;
1694 			}
1695 			break;
1696 		case ATA_SF_ENAB_WCACHE:
1697 		case ATA_SF_DIS_WCACHE:
1698 		case ATA_SF_ENAB_RCACHE:
1699 		case ATA_SF_DIS_RCACHE:
1700 			p->tfd = ATA_S_DSC | ATA_S_READY;
1701 			break;
1702 		case ATA_SF_SETXFER:
1703 		{
1704 			switch (cfis[12] & 0xf8) {
1705 			case ATA_PIO:
1706 			case ATA_PIO0:
1707 				break;
1708 			case ATA_WDMA0:
1709 			case ATA_UDMA0:
1710 				p->xfermode = (cfis[12] & 0x7);
1711 				break;
1712 			}
1713 			p->tfd = ATA_S_DSC | ATA_S_READY;
1714 			break;
1715 		}
1716 		default:
1717 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1718 			p->tfd |= (ATA_ERROR_ABORT << 8);
1719 			break;
1720 		}
1721 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1722 		break;
1723 	}
1724 	case ATA_SET_MULTI:
1725 		if (cfis[12] != 0 &&
1726 			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1727 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1728 			p->tfd |= (ATA_ERROR_ABORT << 8);
1729 		} else {
1730 			p->mult_sectors = cfis[12];
1731 			p->tfd = ATA_S_DSC | ATA_S_READY;
1732 		}
1733 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1734 		break;
1735 	case ATA_READ:
1736 	case ATA_WRITE:
1737 	case ATA_READ48:
1738 	case ATA_WRITE48:
1739 	case ATA_READ_MUL:
1740 	case ATA_WRITE_MUL:
1741 	case ATA_READ_MUL48:
1742 	case ATA_WRITE_MUL48:
1743 	case ATA_READ_DMA:
1744 	case ATA_WRITE_DMA:
1745 	case ATA_READ_DMA48:
1746 	case ATA_WRITE_DMA48:
1747 	case ATA_READ_FPDMA_QUEUED:
1748 	case ATA_WRITE_FPDMA_QUEUED:
1749 		ahci_handle_rw(p, slot, cfis, 0);
1750 		break;
1751 	case ATA_FLUSHCACHE:
1752 	case ATA_FLUSHCACHE48:
1753 		ahci_handle_flush(p, slot, cfis);
1754 		break;
1755 	case ATA_DATA_SET_MANAGEMENT:
1756 		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1757 		    cfis[13] == 0 && cfis[12] == 1) {
1758 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1759 			break;
1760 		}
1761 		ahci_write_fis_d2h(p, slot, cfis,
1762 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1763 		break;
1764 	case ATA_SEND_FPDMA_QUEUED:
1765 		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1766 		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1767 		    cfis[11] == 0 && cfis[3] == 1) {
1768 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1769 			break;
1770 		}
1771 		ahci_write_fis_d2h(p, slot, cfis,
1772 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1773 		break;
1774 	case ATA_READ_LOG_EXT:
1775 	case ATA_READ_LOG_DMA_EXT:
1776 		ahci_handle_read_log(p, slot, cfis);
1777 		break;
1778 	case ATA_SECURITY_FREEZE_LOCK:
1779 	case ATA_SMART_CMD:
1780 	case ATA_NOP:
1781 		ahci_write_fis_d2h(p, slot, cfis,
1782 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1783 		break;
1784 	case ATA_CHECK_POWER_MODE:
1785 		cfis[12] = 0xff;	/* always on */
1786 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1787 		break;
1788 	case ATA_STANDBY_CMD:
1789 	case ATA_STANDBY_IMMEDIATE:
1790 	case ATA_IDLE_CMD:
1791 	case ATA_IDLE_IMMEDIATE:
1792 	case ATA_SLEEP:
1793 	case ATA_READ_VERIFY:
1794 	case ATA_READ_VERIFY48:
1795 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1796 		break;
1797 	case ATA_ATAPI_IDENTIFY:
1798 		handle_atapi_identify(p, slot, cfis);
1799 		break;
1800 	case ATA_PACKET_CMD:
1801 		if (!p->atapi) {
1802 			ahci_write_fis_d2h(p, slot, cfis,
1803 			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1804 		} else
1805 			handle_packet_cmd(p, slot, cfis);
1806 		break;
1807 	default:
1808 		EPRINTLN("Unsupported cmd:%02x", cfis[2]);
1809 		ahci_write_fis_d2h(p, slot, cfis,
1810 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1811 		break;
1812 	}
1813 }
1814 
1815 static void
1816 ahci_handle_slot(struct ahci_port *p, int slot)
1817 {
1818 	struct ahci_cmd_hdr *hdr;
1819 #ifdef AHCI_DEBUG
1820 	struct ahci_prdt_entry *prdt;
1821 #endif
1822 	struct pci_ahci_softc *sc;
1823 	uint8_t *cfis;
1824 #ifdef AHCI_DEBUG
1825 	int cfl, i;
1826 #endif
1827 
1828 	sc = p->pr_sc;
1829 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1830 #ifdef AHCI_DEBUG
1831 	cfl = (hdr->flags & 0x1f) * 4;
1832 #endif
1833 	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1834 			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1835 #ifdef AHCI_DEBUG
1836 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1837 
1838 	DPRINTF("cfis:");
1839 	for (i = 0; i < cfl; i++) {
1840 		if (i % 10 == 0)
1841 			DPRINTF("");
1842 		DPRINTF("%02x ", cfis[i]);
1843 	}
1844 	DPRINTF("");
1845 
1846 	for (i = 0; i < hdr->prdtl; i++) {
1847 		DPRINTF("%d@%08"PRIx64"", prdt->dbc & 0x3fffff, prdt->dba);
1848 		prdt++;
1849 	}
1850 #endif
1851 
1852 	if (cfis[0] != FIS_TYPE_REGH2D) {
1853 		EPRINTLN("Not a H2D FIS:%02x", cfis[0]);
1854 		return;
1855 	}
1856 
1857 	if (cfis[1] & 0x80) {
1858 		ahci_handle_cmd(p, slot, cfis);
1859 	} else {
1860 		if (cfis[15] & (1 << 2))
1861 			p->reset = 1;
1862 		else if (p->reset) {
1863 			p->reset = 0;
1864 			ahci_port_reset(p);
1865 		}
1866 		p->ci &= ~(1 << slot);
1867 	}
1868 }
1869 
1870 static void
1871 ahci_handle_port(struct ahci_port *p)
1872 {
1873 
1874 	if (!(p->cmd & AHCI_P_CMD_ST))
1875 		return;
1876 
1877 	/*
1878 	 * Search for any new commands to issue ignoring those that
1879 	 * are already in-flight.  Stop if device is busy or in error.
1880 	 */
1881 	for (; (p->ci & ~p->pending) != 0; p->ccs = ((p->ccs + 1) & 31)) {
1882 		if ((p->tfd & (ATA_S_BUSY | ATA_S_DRQ)) != 0)
1883 			break;
1884 		if (p->waitforclear)
1885 			break;
1886 		if ((p->ci & ~p->pending & (1 << p->ccs)) != 0) {
1887 			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1888 			p->cmd |= p->ccs << AHCI_P_CMD_CCS_SHIFT;
1889 			ahci_handle_slot(p, p->ccs);
1890 		}
1891 	}
1892 }
1893 
1894 /*
1895  * blockif callback routine - this runs in the context of the blockif
1896  * i/o thread, so the mutex needs to be acquired.
1897  */
1898 static void
1899 ata_ioreq_cb(struct blockif_req *br, int err)
1900 {
1901 	struct ahci_cmd_hdr *hdr;
1902 	struct ahci_ioreq *aior;
1903 	struct ahci_port *p;
1904 	struct pci_ahci_softc *sc;
1905 	uint32_t tfd;
1906 	uint8_t *cfis;
1907 	int slot, ncq, dsm;
1908 
1909 	DPRINTF("%s %d", __func__, err);
1910 
1911 	ncq = dsm = 0;
1912 	aior = br->br_param;
1913 	p = aior->io_pr;
1914 	cfis = aior->cfis;
1915 	slot = aior->slot;
1916 	sc = p->pr_sc;
1917 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1918 
1919 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1920 	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1921 	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1922 		ncq = 1;
1923 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1924 	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1925 	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1926 		dsm = 1;
1927 
1928 	pthread_mutex_lock(&sc->mtx);
1929 
1930 	/*
1931 	 * Delete the blockif request from the busy list
1932 	 */
1933 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1934 
1935 	/*
1936 	 * Move the blockif request back to the free list
1937 	 */
1938 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1939 
1940 	if (!err)
1941 		hdr->prdbc = aior->done;
1942 
1943 	if (!err && aior->more) {
1944 		if (dsm)
1945 			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1946 		else
1947 			ahci_handle_rw(p, slot, cfis, aior->done);
1948 		goto out;
1949 	}
1950 
1951 	if (!err)
1952 		tfd = ATA_S_READY | ATA_S_DSC;
1953 	else
1954 		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1955 	if (ncq)
1956 		ahci_write_fis_sdb(p, slot, cfis, tfd);
1957 	else
1958 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1959 
1960 	/*
1961 	 * This command is now complete.
1962 	 */
1963 	p->pending &= ~(1 << slot);
1964 
1965 	ahci_check_stopped(p);
1966 	ahci_handle_port(p);
1967 out:
1968 	pthread_mutex_unlock(&sc->mtx);
1969 	DPRINTF("%s exit", __func__);
1970 }
1971 
1972 static void
1973 atapi_ioreq_cb(struct blockif_req *br, int err)
1974 {
1975 	struct ahci_cmd_hdr *hdr;
1976 	struct ahci_ioreq *aior;
1977 	struct ahci_port *p;
1978 	struct pci_ahci_softc *sc;
1979 	uint8_t *cfis;
1980 	uint32_t tfd;
1981 	int slot;
1982 
1983 	DPRINTF("%s %d", __func__, err);
1984 
1985 	aior = br->br_param;
1986 	p = aior->io_pr;
1987 	cfis = aior->cfis;
1988 	slot = aior->slot;
1989 	sc = p->pr_sc;
1990 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1991 
1992 	pthread_mutex_lock(&sc->mtx);
1993 
1994 	/*
1995 	 * Delete the blockif request from the busy list
1996 	 */
1997 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1998 
1999 	/*
2000 	 * Move the blockif request back to the free list
2001 	 */
2002 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
2003 
2004 	if (!err)
2005 		hdr->prdbc = aior->done;
2006 
2007 	if (!err && aior->more) {
2008 		atapi_read(p, slot, cfis, aior->done);
2009 		goto out;
2010 	}
2011 
2012 	if (!err) {
2013 		tfd = ATA_S_READY | ATA_S_DSC;
2014 	} else {
2015 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
2016 		p->asc = 0x21;
2017 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
2018 	}
2019 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
2020 	ahci_write_fis_d2h(p, slot, cfis, tfd);
2021 
2022 	/*
2023 	 * This command is now complete.
2024 	 */
2025 	p->pending &= ~(1 << slot);
2026 
2027 	ahci_check_stopped(p);
2028 	ahci_handle_port(p);
2029 out:
2030 	pthread_mutex_unlock(&sc->mtx);
2031 	DPRINTF("%s exit", __func__);
2032 }
2033 
2034 static void
2035 pci_ahci_ioreq_init(struct ahci_port *pr)
2036 {
2037 	struct ahci_ioreq *vr;
2038 	int i;
2039 
2040 	pr->ioqsz = blockif_queuesz(pr->bctx);
2041 	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
2042 	STAILQ_INIT(&pr->iofhd);
2043 
2044 	/*
2045 	 * Add all i/o request entries to the free queue
2046 	 */
2047 	for (i = 0; i < pr->ioqsz; i++) {
2048 		vr = &pr->ioreq[i];
2049 		vr->io_pr = pr;
2050 		if (!pr->atapi)
2051 			vr->io_req.br_callback = ata_ioreq_cb;
2052 		else
2053 			vr->io_req.br_callback = atapi_ioreq_cb;
2054 		vr->io_req.br_param = vr;
2055 		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
2056 	}
2057 
2058 	TAILQ_INIT(&pr->iobhd);
2059 }
2060 
2061 static void
2062 pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2063 {
2064 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2065 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2066 	struct ahci_port *p = &sc->port[port];
2067 
2068 	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"",
2069 		port, offset, value);
2070 
2071 	switch (offset) {
2072 	case AHCI_P_CLB:
2073 		p->clb = value;
2074 		break;
2075 	case AHCI_P_CLBU:
2076 		p->clbu = value;
2077 		break;
2078 	case AHCI_P_FB:
2079 		p->fb = value;
2080 		break;
2081 	case AHCI_P_FBU:
2082 		p->fbu = value;
2083 		break;
2084 	case AHCI_P_IS:
2085 		p->is &= ~value;
2086 		ahci_port_intr(p);
2087 		break;
2088 	case AHCI_P_IE:
2089 		p->ie = value & 0xFDC000FF;
2090 		ahci_port_intr(p);
2091 		break;
2092 	case AHCI_P_CMD:
2093 	{
2094 		p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2095 		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2096 		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2097 		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK);
2098 		p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2099 		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2100 		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2101 		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value;
2102 
2103 		if (!(value & AHCI_P_CMD_ST)) {
2104 			ahci_port_stop(p);
2105 		} else {
2106 			uint64_t clb;
2107 
2108 			p->cmd |= AHCI_P_CMD_CR;
2109 			clb = (uint64_t)p->clbu << 32 | p->clb;
2110 			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
2111 					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
2112 		}
2113 
2114 		if (value & AHCI_P_CMD_FRE) {
2115 			uint64_t fb;
2116 
2117 			p->cmd |= AHCI_P_CMD_FR;
2118 			fb = (uint64_t)p->fbu << 32 | p->fb;
2119 			/* we don't support FBSCP, so rfis size is 256Bytes */
2120 			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
2121 		} else {
2122 			p->cmd &= ~AHCI_P_CMD_FR;
2123 		}
2124 
2125 		if (value & AHCI_P_CMD_CLO) {
2126 			p->tfd &= ~(ATA_S_BUSY | ATA_S_DRQ);
2127 			p->cmd &= ~AHCI_P_CMD_CLO;
2128 		}
2129 
2130 		if (value & AHCI_P_CMD_ICC_MASK) {
2131 			p->cmd &= ~AHCI_P_CMD_ICC_MASK;
2132 		}
2133 
2134 		ahci_handle_port(p);
2135 		break;
2136 	}
2137 	case AHCI_P_TFD:
2138 	case AHCI_P_SIG:
2139 	case AHCI_P_SSTS:
2140 		EPRINTLN("pci_ahci_port: read only registers 0x%"PRIx64"", offset);
2141 		break;
2142 	case AHCI_P_SCTL:
2143 		p->sctl = value;
2144 		if (!(p->cmd & AHCI_P_CMD_ST)) {
2145 			if (value & ATA_SC_DET_RESET)
2146 				ahci_port_reset(p);
2147 		}
2148 		break;
2149 	case AHCI_P_SERR:
2150 		p->serr &= ~value;
2151 		break;
2152 	case AHCI_P_SACT:
2153 		p->sact |= value;
2154 		break;
2155 	case AHCI_P_CI:
2156 		p->ci |= value;
2157 		ahci_handle_port(p);
2158 		break;
2159 	case AHCI_P_SNTF:
2160 	case AHCI_P_FBS:
2161 	default:
2162 		break;
2163 	}
2164 }
2165 
2166 static void
2167 pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2168 {
2169 	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"",
2170 		offset, value);
2171 
2172 	switch (offset) {
2173 	case AHCI_CAP:
2174 	case AHCI_PI:
2175 	case AHCI_VS:
2176 	case AHCI_CAP2:
2177 		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"", offset);
2178 		break;
2179 	case AHCI_GHC:
2180 		if (value & AHCI_GHC_HR) {
2181 			ahci_reset(sc);
2182 			break;
2183 		}
2184 		if (value & AHCI_GHC_IE)
2185 			sc->ghc |= AHCI_GHC_IE;
2186 		else
2187 			sc->ghc &= ~AHCI_GHC_IE;
2188 		ahci_generate_intr(sc, 0xffffffff);
2189 		break;
2190 	case AHCI_IS:
2191 		sc->is &= ~value;
2192 		ahci_generate_intr(sc, value);
2193 		break;
2194 	default:
2195 		break;
2196 	}
2197 }
2198 
2199 static void
2200 pci_ahci_write(struct pci_devinst *pi, int baridx, uint64_t offset, int size,
2201     uint64_t value)
2202 {
2203 	struct pci_ahci_softc *sc = pi->pi_arg;
2204 
2205 	assert(baridx == 5);
2206 	assert((offset % 4) == 0 && size == 4);
2207 
2208 	pthread_mutex_lock(&sc->mtx);
2209 
2210 	if (offset < AHCI_OFFSET)
2211 		pci_ahci_host_write(sc, offset, value);
2212 	else if (offset < (uint64_t)AHCI_OFFSET + sc->ports * AHCI_STEP)
2213 		pci_ahci_port_write(sc, offset, value);
2214 	else
2215 		EPRINTLN("pci_ahci: unknown i/o write offset 0x%"PRIx64"", offset);
2216 
2217 	pthread_mutex_unlock(&sc->mtx);
2218 }
2219 
2220 static uint64_t
2221 pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2222 {
2223 	uint32_t value;
2224 
2225 	switch (offset) {
2226 	case AHCI_CAP:
2227 	case AHCI_GHC:
2228 	case AHCI_IS:
2229 	case AHCI_PI:
2230 	case AHCI_VS:
2231 	case AHCI_CCCC:
2232 	case AHCI_CCCP:
2233 	case AHCI_EM_LOC:
2234 	case AHCI_EM_CTL:
2235 	case AHCI_CAP2:
2236 	{
2237 		uint32_t *p = &sc->cap;
2238 		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2239 		value = *p;
2240 		break;
2241 	}
2242 	default:
2243 		value = 0;
2244 		break;
2245 	}
2246 	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x",
2247 		offset, value);
2248 
2249 	return (value);
2250 }
2251 
2252 static uint64_t
2253 pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2254 {
2255 	uint32_t value;
2256 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2257 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2258 
2259 	switch (offset) {
2260 	case AHCI_P_CLB:
2261 	case AHCI_P_CLBU:
2262 	case AHCI_P_FB:
2263 	case AHCI_P_FBU:
2264 	case AHCI_P_IS:
2265 	case AHCI_P_IE:
2266 	case AHCI_P_CMD:
2267 	case AHCI_P_TFD:
2268 	case AHCI_P_SIG:
2269 	case AHCI_P_SSTS:
2270 	case AHCI_P_SCTL:
2271 	case AHCI_P_SERR:
2272 	case AHCI_P_SACT:
2273 	case AHCI_P_CI:
2274 	case AHCI_P_SNTF:
2275 	case AHCI_P_FBS:
2276 	{
2277 		uint32_t *p= &sc->port[port].clb;
2278 		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2279 		value = *p;
2280 		break;
2281 	}
2282 	default:
2283 		value = 0;
2284 		break;
2285 	}
2286 
2287 	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x",
2288 		port, offset, value);
2289 
2290 	return value;
2291 }
2292 
2293 static uint64_t
2294 pci_ahci_read(struct pci_devinst *pi, int baridx, uint64_t regoff, int size)
2295 {
2296 	struct pci_ahci_softc *sc = pi->pi_arg;
2297 	uint64_t offset;
2298 	uint32_t value;
2299 
2300 	assert(baridx == 5);
2301 	assert(size == 1 || size == 2 || size == 4);
2302 	assert((regoff & (size - 1)) == 0);
2303 
2304 	pthread_mutex_lock(&sc->mtx);
2305 
2306 	offset = regoff & ~0x3;	    /* round down to a multiple of 4 bytes */
2307 	if (offset < AHCI_OFFSET)
2308 		value = pci_ahci_host_read(sc, offset);
2309 	else if (offset < (uint64_t)AHCI_OFFSET + sc->ports * AHCI_STEP)
2310 		value = pci_ahci_port_read(sc, offset);
2311 	else {
2312 		value = 0;
2313 		EPRINTLN("pci_ahci: unknown i/o read offset 0x%"PRIx64"",
2314 		    regoff);
2315 	}
2316 	value >>= 8 * (regoff & 0x3);
2317 
2318 	pthread_mutex_unlock(&sc->mtx);
2319 
2320 	return (value);
2321 }
2322 
2323 /*
2324  * Each AHCI controller has a "port" node which contains nodes for
2325  * each port named after the decimal number of the port (no leading
2326  * zeroes).  Port nodes contain a "type" ("hd" or "cd"), as well as
2327  * options for blockif.  For example:
2328  *
2329  * pci.0.1.0
2330  *          .device="ahci"
2331  *          .port
2332  *               .0
2333  *                 .type="hd"
2334  *                 .path="/path/to/image"
2335  */
2336 static int
2337 pci_ahci_legacy_config_port(nvlist_t *nvl, int port, const char *type,
2338     const char *opts)
2339 {
2340 	char node_name[sizeof("XX")];
2341 	nvlist_t *port_nvl;
2342 
2343 	snprintf(node_name, sizeof(node_name), "%d", port);
2344 	port_nvl = create_relative_config_node(nvl, node_name);
2345 	set_config_value_node(port_nvl, "type", type);
2346 	return (blockif_legacy_config(port_nvl, opts));
2347 }
2348 
2349 static int
2350 pci_ahci_legacy_config(nvlist_t *nvl, const char *opts)
2351 {
2352 	nvlist_t *ports_nvl;
2353 	const char *type;
2354 	char *next, *next2, *str, *tofree;
2355 	int p, ret;
2356 
2357 	if (opts == NULL)
2358 		return (0);
2359 
2360 	ports_nvl = create_relative_config_node(nvl, "port");
2361 	ret = 1;
2362 	tofree = str = strdup(opts);
2363 	for (p = 0; p < MAX_PORTS && str != NULL; p++, str = next) {
2364 		/* Identify and cut off type of present port. */
2365 		if (strncmp(str, "hd:", 3) == 0) {
2366 			type = "hd";
2367 			str += 3;
2368 		} else if (strncmp(str, "cd:", 3) == 0) {
2369 			type = "cd";
2370 			str += 3;
2371 		} else
2372 			type = NULL;
2373 
2374 		/* Find and cut off the next port options. */
2375 		next = strstr(str, ",hd:");
2376 		next2 = strstr(str, ",cd:");
2377 		if (next == NULL || (next2 != NULL && next2 < next))
2378 			next = next2;
2379 		if (next != NULL) {
2380 			next[0] = 0;
2381 			next++;
2382 		}
2383 
2384 		if (str[0] == 0)
2385 			continue;
2386 
2387 		if (type == NULL) {
2388 			EPRINTLN("Missing or invalid type for port %d: \"%s\"",
2389 			    p, str);
2390 			goto out;
2391 		}
2392 
2393 		if (pci_ahci_legacy_config_port(ports_nvl, p, type, str) != 0)
2394 			goto out;
2395 	}
2396 	ret = 0;
2397 out:
2398 	free(tofree);
2399 	return (ret);
2400 }
2401 
2402 static int
2403 pci_ahci_cd_legacy_config(nvlist_t *nvl, const char *opts)
2404 {
2405 	nvlist_t *ports_nvl;
2406 
2407 	ports_nvl = create_relative_config_node(nvl, "port");
2408 	return (pci_ahci_legacy_config_port(ports_nvl, 0, "cd", opts));
2409 }
2410 
2411 static int
2412 pci_ahci_hd_legacy_config(nvlist_t *nvl, const char *opts)
2413 {
2414 	nvlist_t *ports_nvl;
2415 
2416 	ports_nvl = create_relative_config_node(nvl, "port");
2417 	return (pci_ahci_legacy_config_port(ports_nvl, 0, "hd", opts));
2418 }
2419 
2420 static int
2421 pci_ahci_init(struct pci_devinst *pi, nvlist_t *nvl)
2422 {
2423 	char bident[sizeof("XXX:XXX:XXX")];
2424 	char node_name[sizeof("XX")];
2425 	struct blockif_ctxt *bctxt;
2426 	struct pci_ahci_softc *sc;
2427 	int atapi, ret, slots, p;
2428 	MD5_CTX mdctx;
2429 	u_char digest[16];
2430 	const char *path, *type, *value;
2431 	nvlist_t *ports_nvl, *port_nvl;
2432 
2433 	ret = 0;
2434 
2435 #ifdef AHCI_DEBUG
2436 	dbg = fopen("/tmp/log", "w+");
2437 #endif
2438 
2439 	sc = calloc(1, sizeof(struct pci_ahci_softc));
2440 	pi->pi_arg = sc;
2441 	sc->asc_pi = pi;
2442 	pthread_mutex_init(&sc->mtx, NULL);
2443 	sc->ports = 0;
2444 	sc->pi = 0;
2445 	slots = 32;
2446 
2447 	ports_nvl = find_relative_config_node(nvl, "port");
2448 	for (p = 0; ports_nvl != NULL && p < MAX_PORTS; p++) {
2449 		struct ata_params *ata_ident = &sc->port[p].ata_ident;
2450 		char ident[AHCI_PORT_IDENT];
2451 
2452 		snprintf(node_name, sizeof(node_name), "%d", p);
2453 		port_nvl = find_relative_config_node(ports_nvl, node_name);
2454 		if (port_nvl == NULL)
2455 			continue;
2456 
2457 		type = get_config_value_node(port_nvl, "type");
2458 		if (type == NULL)
2459 			continue;
2460 
2461 		if (strcmp(type, "hd") == 0)
2462 			atapi = 0;
2463 		else
2464 			atapi = 1;
2465 
2466 		/*
2467 		 * Attempt to open the backing image. Use the PCI slot/func
2468 		 * and the port number for the identifier string.
2469 		 */
2470 		snprintf(bident, sizeof(bident), "%u:%u:%u", pi->pi_slot,
2471 		    pi->pi_func, p);
2472 
2473 		bctxt = blockif_open(port_nvl, bident);
2474 		if (bctxt == NULL) {
2475 			sc->ports = p;
2476 			ret = 1;
2477 			goto open_fail;
2478 		}
2479 
2480 		ret = blockif_add_boot_device(pi, bctxt);
2481 		if (ret) {
2482 			sc->ports = p;
2483 			goto open_fail;
2484 		}
2485 
2486 		sc->port[p].bctx = bctxt;
2487 		sc->port[p].pr_sc = sc;
2488 		sc->port[p].port = p;
2489 		sc->port[p].atapi = atapi;
2490 
2491 		/*
2492 		 * Create an identifier for the backing file.
2493 		 * Use parts of the md5 sum of the filename
2494 		 */
2495 		path = get_config_value_node(port_nvl, "path");
2496 		MD5Init(&mdctx);
2497 		MD5Update(&mdctx, path, strlen(path));
2498 		MD5Final(digest, &mdctx);
2499 		snprintf(ident, AHCI_PORT_IDENT,
2500 			"BHYVE-%02X%02X-%02X%02X-%02X%02X",
2501 			digest[0], digest[1], digest[2], digest[3], digest[4],
2502 			digest[5]);
2503 
2504 		memset(ata_ident, 0, sizeof(struct ata_params));
2505 		ata_string((uint8_t*)&ata_ident->serial, ident, 20);
2506 		ata_string((uint8_t*)&ata_ident->revision, "001", 8);
2507 		if (atapi)
2508 			ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DVD ROM", 40);
2509 		else
2510 			ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DISK", 40);
2511 		value = get_config_value_node(port_nvl, "nmrr");
2512 		if (value != NULL)
2513 			ata_ident->media_rotation_rate = atoi(value);
2514 		value = get_config_value_node(port_nvl, "ser");
2515 		if (value != NULL)
2516 			ata_string((uint8_t*)(&ata_ident->serial), value, 20);
2517 		value = get_config_value_node(port_nvl, "rev");
2518 		if (value != NULL)
2519 			ata_string((uint8_t*)(&ata_ident->revision), value, 8);
2520 		value = get_config_value_node(port_nvl, "model");
2521 		if (value != NULL)
2522 			ata_string((uint8_t*)(&ata_ident->model), value, 40);
2523 		ata_identify_init(&sc->port[p], atapi);
2524 
2525 		/*
2526 		 * Allocate blockif request structures and add them
2527 		 * to the free list
2528 		 */
2529 		pci_ahci_ioreq_init(&sc->port[p]);
2530 
2531 		sc->pi |= (1 << p);
2532 		if (sc->port[p].ioqsz < slots)
2533 			slots = sc->port[p].ioqsz;
2534 	}
2535 	sc->ports = p;
2536 
2537 	/* Intel ICH8 AHCI */
2538 	--slots;
2539 	if (sc->ports < DEF_PORTS)
2540 		sc->ports = DEF_PORTS;
2541 	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2542 	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2543 	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2544 	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2545 	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2546 
2547 	sc->vs = 0x10300;
2548 	sc->cap2 = AHCI_CAP2_APST;
2549 	ahci_reset(sc);
2550 
2551 	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2552 	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2553 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2554 	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2555 	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2556 	p = MIN(sc->ports, 16);
2557 	p = flsl(p) - ((p & (p - 1)) ? 0 : 1);
2558 	pci_emul_add_msicap(pi, 1 << p);
2559 	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2560 	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2561 
2562 	pci_lintr_request(pi);
2563 
2564 open_fail:
2565 	if (ret) {
2566 		for (p = 0; p < sc->ports; p++) {
2567 			if (sc->port[p].bctx != NULL)
2568 				blockif_close(sc->port[p].bctx);
2569 		}
2570 		free(sc);
2571 	}
2572 
2573 	return (ret);
2574 }
2575 
2576 #ifdef BHYVE_SNAPSHOT
2577 static int
2578 pci_ahci_snapshot(struct vm_snapshot_meta *meta)
2579 {
2580 	int i, ret;
2581 	void *bctx;
2582 	struct pci_devinst *pi;
2583 	struct pci_ahci_softc *sc;
2584 	struct ahci_port *port;
2585 
2586 	pi = meta->dev_data;
2587 	sc = pi->pi_arg;
2588 
2589 	/* TODO: add mtx lock/unlock */
2590 
2591 	SNAPSHOT_VAR_OR_LEAVE(sc->ports, meta, ret, done);
2592 	SNAPSHOT_VAR_OR_LEAVE(sc->cap, meta, ret, done);
2593 	SNAPSHOT_VAR_OR_LEAVE(sc->ghc, meta, ret, done);
2594 	SNAPSHOT_VAR_OR_LEAVE(sc->is, meta, ret, done);
2595 	SNAPSHOT_VAR_OR_LEAVE(sc->pi, meta, ret, done);
2596 	SNAPSHOT_VAR_OR_LEAVE(sc->vs, meta, ret, done);
2597 	SNAPSHOT_VAR_OR_LEAVE(sc->ccc_ctl, meta, ret, done);
2598 	SNAPSHOT_VAR_OR_LEAVE(sc->ccc_pts, meta, ret, done);
2599 	SNAPSHOT_VAR_OR_LEAVE(sc->em_loc, meta, ret, done);
2600 	SNAPSHOT_VAR_OR_LEAVE(sc->em_ctl, meta, ret, done);
2601 	SNAPSHOT_VAR_OR_LEAVE(sc->cap2, meta, ret, done);
2602 	SNAPSHOT_VAR_OR_LEAVE(sc->bohc, meta, ret, done);
2603 	SNAPSHOT_VAR_OR_LEAVE(sc->lintr, meta, ret, done);
2604 
2605 	for (i = 0; i < MAX_PORTS; i++) {
2606 		port = &sc->port[i];
2607 
2608 		if (meta->op == VM_SNAPSHOT_SAVE)
2609 			bctx = port->bctx;
2610 
2611 		SNAPSHOT_VAR_OR_LEAVE(bctx, meta, ret, done);
2612 		SNAPSHOT_VAR_OR_LEAVE(port->port, meta, ret, done);
2613 
2614 		/* Mostly for restore; save is ensured by the lines above. */
2615 		if (((bctx == NULL) && (port->bctx != NULL)) ||
2616 		    ((bctx != NULL) && (port->bctx == NULL))) {
2617 			EPRINTLN("%s: ports not matching", __func__);
2618 			ret = EINVAL;
2619 			goto done;
2620 		}
2621 
2622 		if (port->bctx == NULL)
2623 			continue;
2624 
2625 		if (port->port != i) {
2626 			EPRINTLN("%s: ports not matching: "
2627 			    "actual: %d expected: %d", __func__, port->port, i);
2628 			ret = EINVAL;
2629 			goto done;
2630 		}
2631 
2632 		SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(pi->pi_vmctx, port->cmd_lst,
2633 			AHCI_CL_SIZE * AHCI_MAX_SLOTS, false, meta, ret, done);
2634 		SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(pi->pi_vmctx, port->rfis, 256,
2635 		    false, meta, ret, done);
2636 
2637 		SNAPSHOT_VAR_OR_LEAVE(port->ata_ident, meta, ret, done);
2638 		SNAPSHOT_VAR_OR_LEAVE(port->atapi, meta, ret, done);
2639 		SNAPSHOT_VAR_OR_LEAVE(port->reset, meta, ret, done);
2640 		SNAPSHOT_VAR_OR_LEAVE(port->waitforclear, meta, ret, done);
2641 		SNAPSHOT_VAR_OR_LEAVE(port->mult_sectors, meta, ret, done);
2642 		SNAPSHOT_VAR_OR_LEAVE(port->xfermode, meta, ret, done);
2643 		SNAPSHOT_VAR_OR_LEAVE(port->err_cfis, meta, ret, done);
2644 		SNAPSHOT_VAR_OR_LEAVE(port->sense_key, meta, ret, done);
2645 		SNAPSHOT_VAR_OR_LEAVE(port->asc, meta, ret, done);
2646 		SNAPSHOT_VAR_OR_LEAVE(port->ccs, meta, ret, done);
2647 		SNAPSHOT_VAR_OR_LEAVE(port->pending, meta, ret, done);
2648 
2649 		SNAPSHOT_VAR_OR_LEAVE(port->clb, meta, ret, done);
2650 		SNAPSHOT_VAR_OR_LEAVE(port->clbu, meta, ret, done);
2651 		SNAPSHOT_VAR_OR_LEAVE(port->fb, meta, ret, done);
2652 		SNAPSHOT_VAR_OR_LEAVE(port->fbu, meta, ret, done);
2653 		SNAPSHOT_VAR_OR_LEAVE(port->ie, meta, ret, done);
2654 		SNAPSHOT_VAR_OR_LEAVE(port->cmd, meta, ret, done);
2655 		SNAPSHOT_VAR_OR_LEAVE(port->unused0, meta, ret, done);
2656 		SNAPSHOT_VAR_OR_LEAVE(port->tfd, meta, ret, done);
2657 		SNAPSHOT_VAR_OR_LEAVE(port->sig, meta, ret, done);
2658 		SNAPSHOT_VAR_OR_LEAVE(port->ssts, meta, ret, done);
2659 		SNAPSHOT_VAR_OR_LEAVE(port->sctl, meta, ret, done);
2660 		SNAPSHOT_VAR_OR_LEAVE(port->serr, meta, ret, done);
2661 		SNAPSHOT_VAR_OR_LEAVE(port->sact, meta, ret, done);
2662 		SNAPSHOT_VAR_OR_LEAVE(port->ci, meta, ret, done);
2663 		SNAPSHOT_VAR_OR_LEAVE(port->sntf, meta, ret, done);
2664 		SNAPSHOT_VAR_OR_LEAVE(port->fbs, meta, ret, done);
2665 		SNAPSHOT_VAR_OR_LEAVE(port->ioqsz, meta, ret, done);
2666 
2667 		assert(TAILQ_EMPTY(&port->iobhd));
2668 	}
2669 
2670 done:
2671 	return (ret);
2672 }
2673 
2674 static int
2675 pci_ahci_pause(struct pci_devinst *pi)
2676 {
2677 	struct pci_ahci_softc *sc;
2678 	struct blockif_ctxt *bctxt;
2679 	int i;
2680 
2681 	sc = pi->pi_arg;
2682 
2683 	for (i = 0; i < MAX_PORTS; i++) {
2684 		bctxt = sc->port[i].bctx;
2685 		if (bctxt == NULL)
2686 			continue;
2687 
2688 		blockif_pause(bctxt);
2689 	}
2690 
2691 	return (0);
2692 }
2693 
2694 static int
2695 pci_ahci_resume(struct pci_devinst *pi)
2696 {
2697 	struct pci_ahci_softc *sc;
2698 	struct blockif_ctxt *bctxt;
2699 	int i;
2700 
2701 	sc = pi->pi_arg;
2702 
2703 	for (i = 0; i < MAX_PORTS; i++) {
2704 		bctxt = sc->port[i].bctx;
2705 		if (bctxt == NULL)
2706 			continue;
2707 
2708 		blockif_resume(bctxt);
2709 	}
2710 
2711 	return (0);
2712 }
2713 #endif	/* BHYVE_SNAPSHOT */
2714 
2715 /*
2716  * Use separate emulation names to distinguish drive and atapi devices
2717  */
2718 static const struct pci_devemu pci_de_ahci = {
2719 	.pe_emu =	"ahci",
2720 	.pe_init =	pci_ahci_init,
2721 	.pe_legacy_config = pci_ahci_legacy_config,
2722 	.pe_barwrite =	pci_ahci_write,
2723 	.pe_barread =	pci_ahci_read,
2724 #ifdef BHYVE_SNAPSHOT
2725 	.pe_snapshot =	pci_ahci_snapshot,
2726 	.pe_pause =	pci_ahci_pause,
2727 	.pe_resume =	pci_ahci_resume,
2728 #endif
2729 };
2730 PCI_EMUL_SET(pci_de_ahci);
2731 
2732 static const struct pci_devemu pci_de_ahci_hd = {
2733 	.pe_emu =	"ahci-hd",
2734 	.pe_legacy_config = pci_ahci_hd_legacy_config,
2735 	.pe_alias =	"ahci",
2736 };
2737 PCI_EMUL_SET(pci_de_ahci_hd);
2738 
2739 static const struct pci_devemu pci_de_ahci_cd = {
2740 	.pe_emu =	"ahci-cd",
2741 	.pe_legacy_config = pci_ahci_cd_legacy_config,
2742 	.pe_alias =	"ahci",
2743 };
2744 PCI_EMUL_SET(pci_de_ahci_cd);
2745