xref: /illumos-gate/usr/src/cmd/bhyve/common/pci_ahci.c (revision 4b9db4f6425b1a08fca4390f446072c4a6aae8d5)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
5  * Copyright (c) 2015-2016 Alexander Motin <mav@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 
31 #include <sys/param.h>
32 #include <sys/linker_set.h>
33 #include <sys/stat.h>
34 #include <sys/uio.h>
35 #include <sys/ioctl.h>
36 #include <sys/disk.h>
37 #include <sys/ata.h>
38 #include <sys/endian.h>
39 
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <stdint.h>
45 #include <string.h>
46 #include <strings.h>
47 #include <unistd.h>
48 #include <assert.h>
49 #include <pthread.h>
50 #include <pthread_np.h>
51 #include <inttypes.h>
52 #include <md5.h>
53 
54 #include "bhyverun.h"
55 #include "config.h"
56 #include "debug.h"
57 #include "pci_emul.h"
58 #include "ahci.h"
59 #include "block_if.h"
60 
61 #define	DEF_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
62 #define	MAX_PORTS	32	/* AHCI supports 32 ports */
63 
64 #define	PxSIG_ATA	0x00000101 /* ATA drive */
65 #define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
66 
67 enum sata_fis_type {
68 	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
69 	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
70 	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
71 	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
72 	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
73 	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
74 	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
75 	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
76 };
77 
78 /*
79  * SCSI opcodes
80  */
81 #define	TEST_UNIT_READY		0x00
82 #define	REQUEST_SENSE		0x03
83 #define	INQUIRY			0x12
84 #define	START_STOP_UNIT		0x1B
85 #define	PREVENT_ALLOW		0x1E
86 #define	READ_CAPACITY		0x25
87 #define	READ_10			0x28
88 #define	POSITION_TO_ELEMENT	0x2B
89 #define	READ_TOC		0x43
90 #define	GET_EVENT_STATUS_NOTIFICATION 0x4A
91 #define	MODE_SENSE_10		0x5A
92 #define	REPORT_LUNS		0xA0
93 #define	READ_12			0xA8
94 #define	READ_CD			0xBE
95 
96 /*
97  * SCSI mode page codes
98  */
99 #define	MODEPAGE_RW_ERROR_RECOVERY	0x01
100 #define	MODEPAGE_CD_CAPABILITIES	0x2A
101 
102 /*
103  * ATA commands
104  */
105 #define	ATA_SF_ENAB_SATA_SF		0x10
106 #define	ATA_SATA_SF_AN			0x05
107 #define	ATA_SF_DIS_SATA_SF		0x90
108 
109 /*
110  * Debug printf
111  */
112 #ifdef AHCI_DEBUG
113 static FILE *dbg;
114 #define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
115 #else
116 #define DPRINTF(format, arg...)
117 #endif
118 
119 #define AHCI_PORT_IDENT 20 + 1
120 
121 struct ahci_ioreq {
122 	struct blockif_req io_req;
123 	struct ahci_port *io_pr;
124 	STAILQ_ENTRY(ahci_ioreq) io_flist;
125 	TAILQ_ENTRY(ahci_ioreq) io_blist;
126 	uint8_t *cfis;
127 	uint32_t len;
128 	uint32_t done;
129 	int slot;
130 	int more;
131 };
132 
133 struct ahci_port {
134 	struct blockif_ctxt *bctx;
135 	struct pci_ahci_softc *pr_sc;
136 	struct ata_params ata_ident;
137 	uint8_t *cmd_lst;
138 	uint8_t *rfis;
139 	int port;
140 	int atapi;
141 	int reset;
142 	int waitforclear;
143 	int mult_sectors;
144 	uint8_t xfermode;
145 	uint8_t err_cfis[20];
146 	uint8_t sense_key;
147 	uint8_t asc;
148 	u_int ccs;
149 	uint32_t pending;
150 
151 	uint32_t clb;
152 	uint32_t clbu;
153 	uint32_t fb;
154 	uint32_t fbu;
155 	uint32_t is;
156 	uint32_t ie;
157 	uint32_t cmd;
158 	uint32_t unused0;
159 	uint32_t tfd;
160 	uint32_t sig;
161 	uint32_t ssts;
162 	uint32_t sctl;
163 	uint32_t serr;
164 	uint32_t sact;
165 	uint32_t ci;
166 	uint32_t sntf;
167 	uint32_t fbs;
168 
169 	/*
170 	 * i/o request info
171 	 */
172 	struct ahci_ioreq *ioreq;
173 	int ioqsz;
174 	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
175 	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
176 };
177 
178 struct ahci_cmd_hdr {
179 	uint16_t flags;
180 	uint16_t prdtl;
181 	uint32_t prdbc;
182 	uint64_t ctba;
183 	uint32_t reserved[4];
184 };
185 
186 struct ahci_prdt_entry {
187 	uint64_t dba;
188 	uint32_t reserved;
189 #define	DBCMASK		0x3fffff
190 	uint32_t dbc;
191 };
192 
193 struct pci_ahci_softc {
194 	struct pci_devinst *asc_pi;
195 	pthread_mutex_t	mtx;
196 	int ports;
197 	uint32_t cap;
198 	uint32_t ghc;
199 	uint32_t is;
200 	uint32_t pi;
201 	uint32_t vs;
202 	uint32_t ccc_ctl;
203 	uint32_t ccc_pts;
204 	uint32_t em_loc;
205 	uint32_t em_ctl;
206 	uint32_t cap2;
207 	uint32_t bohc;
208 	uint32_t lintr;
209 	struct ahci_port port[MAX_PORTS];
210 };
211 #define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
212 
213 static void ahci_handle_port(struct ahci_port *p);
214 
215 static inline void lba_to_msf(uint8_t *buf, int lba)
216 {
217 	lba += 150;
218 	buf[0] = (lba / 75) / 60;
219 	buf[1] = (lba / 75) % 60;
220 	buf[2] = lba % 75;
221 }
222 
223 /*
224  * Generate HBA interrupts on global IS register write.
225  */
226 static void
227 ahci_generate_intr(struct pci_ahci_softc *sc, uint32_t mask)
228 {
229 	struct pci_devinst *pi = sc->asc_pi;
230 	struct ahci_port *p;
231 	int i, nmsg;
232 	uint32_t mmask;
233 
234 	/* Update global IS from PxIS/PxIE. */
235 	for (i = 0; i < sc->ports; i++) {
236 		p = &sc->port[i];
237 		if (p->is & p->ie)
238 			sc->is |= (1 << i);
239 	}
240 	DPRINTF("%s(%08x) %08x", __func__, mask, sc->is);
241 
242 	/* If there is nothing enabled -- clear legacy interrupt and exit. */
243 	if (sc->is == 0 || (sc->ghc & AHCI_GHC_IE) == 0) {
244 		if (sc->lintr) {
245 			pci_lintr_deassert(pi);
246 			sc->lintr = 0;
247 		}
248 		return;
249 	}
250 
251 	/* If there is anything and no MSI -- assert legacy interrupt. */
252 	nmsg = pci_msi_maxmsgnum(pi);
253 	if (nmsg == 0) {
254 		if (!sc->lintr) {
255 			sc->lintr = 1;
256 			pci_lintr_assert(pi);
257 		}
258 		return;
259 	}
260 
261 	/* Assert respective MSIs for ports that were touched. */
262 	for (i = 0; i < nmsg; i++) {
263 		if (sc->ports <= nmsg || i < nmsg - 1)
264 			mmask = 1 << i;
265 		else
266 			mmask = 0xffffffff << i;
267 		if (sc->is & mask && mmask & mask)
268 			pci_generate_msi(pi, i);
269 	}
270 }
271 
272 /*
273  * Generate HBA interrupt on specific port event.
274  */
275 static void
276 ahci_port_intr(struct ahci_port *p)
277 {
278 	struct pci_ahci_softc *sc = p->pr_sc;
279 	struct pci_devinst *pi = sc->asc_pi;
280 	int nmsg;
281 
282 	DPRINTF("%s(%d) %08x/%08x %08x", __func__,
283 	    p->port, p->is, p->ie, sc->is);
284 
285 	/* If there is nothing enabled -- we are done. */
286 	if ((p->is & p->ie) == 0)
287 		return;
288 
289 	/* In case of non-shared MSI always generate interrupt. */
290 	nmsg = pci_msi_maxmsgnum(pi);
291 	if (sc->ports <= nmsg || p->port < nmsg - 1) {
292 		sc->is |= (1 << p->port);
293 		if ((sc->ghc & AHCI_GHC_IE) == 0)
294 			return;
295 		pci_generate_msi(pi, p->port);
296 		return;
297 	}
298 
299 	/* If IS for this port is already set -- do nothing. */
300 	if (sc->is & (1 << p->port))
301 		return;
302 
303 	sc->is |= (1 << p->port);
304 
305 	/* If interrupts are enabled -- generate one. */
306 	if ((sc->ghc & AHCI_GHC_IE) == 0)
307 		return;
308 	if (nmsg > 0) {
309 		pci_generate_msi(pi, nmsg - 1);
310 	} else if (!sc->lintr) {
311 		sc->lintr = 1;
312 		pci_lintr_assert(pi);
313 	}
314 }
315 
316 static void
317 ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
318 {
319 	int offset, len, irq;
320 
321 	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
322 		return;
323 
324 	switch (ft) {
325 	case FIS_TYPE_REGD2H:
326 		offset = 0x40;
327 		len = 20;
328 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0;
329 		break;
330 	case FIS_TYPE_SETDEVBITS:
331 		offset = 0x58;
332 		len = 8;
333 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0;
334 		break;
335 	case FIS_TYPE_PIOSETUP:
336 		offset = 0x20;
337 		len = 20;
338 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0;
339 		break;
340 	default:
341 		EPRINTLN("unsupported fis type %d", ft);
342 		return;
343 	}
344 	if (fis[2] & ATA_S_ERROR) {
345 		p->waitforclear = 1;
346 		irq |= AHCI_P_IX_TFE;
347 	}
348 	memcpy(p->rfis + offset, fis, len);
349 	if (irq) {
350 		if (~p->is & irq) {
351 			p->is |= irq;
352 			ahci_port_intr(p);
353 		}
354 	}
355 }
356 
357 static void
358 ahci_write_fis_piosetup(struct ahci_port *p)
359 {
360 	uint8_t fis[20];
361 
362 	memset(fis, 0, sizeof(fis));
363 	fis[0] = FIS_TYPE_PIOSETUP;
364 	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
365 }
366 
367 static void
368 ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
369 {
370 	uint8_t fis[8];
371 	uint8_t error;
372 
373 	error = (tfd >> 8) & 0xff;
374 	tfd &= 0x77;
375 	memset(fis, 0, sizeof(fis));
376 	fis[0] = FIS_TYPE_SETDEVBITS;
377 	fis[1] = (1 << 6);
378 	fis[2] = tfd;
379 	fis[3] = error;
380 	if (fis[2] & ATA_S_ERROR) {
381 		p->err_cfis[0] = slot;
382 		p->err_cfis[2] = tfd;
383 		p->err_cfis[3] = error;
384 		memcpy(&p->err_cfis[4], cfis + 4, 16);
385 	} else {
386 		*(uint32_t *)(fis + 4) = (1 << slot);
387 		p->sact &= ~(1 << slot);
388 	}
389 	p->tfd &= ~0x77;
390 	p->tfd |= tfd;
391 	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
392 }
393 
394 static void
395 ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
396 {
397 	uint8_t fis[20];
398 	uint8_t error;
399 
400 	error = (tfd >> 8) & 0xff;
401 	memset(fis, 0, sizeof(fis));
402 	fis[0] = FIS_TYPE_REGD2H;
403 	fis[1] = (1 << 6);
404 	fis[2] = tfd & 0xff;
405 	fis[3] = error;
406 	fis[4] = cfis[4];
407 	fis[5] = cfis[5];
408 	fis[6] = cfis[6];
409 	fis[7] = cfis[7];
410 	fis[8] = cfis[8];
411 	fis[9] = cfis[9];
412 	fis[10] = cfis[10];
413 	fis[11] = cfis[11];
414 	fis[12] = cfis[12];
415 	fis[13] = cfis[13];
416 	if (fis[2] & ATA_S_ERROR) {
417 		p->err_cfis[0] = 0x80;
418 		p->err_cfis[2] = tfd & 0xff;
419 		p->err_cfis[3] = error;
420 		memcpy(&p->err_cfis[4], cfis + 4, 16);
421 	} else
422 		p->ci &= ~(1 << slot);
423 	p->tfd = tfd;
424 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
425 }
426 
427 static void
428 ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot)
429 {
430 	uint8_t fis[20];
431 
432 	p->tfd = ATA_S_READY | ATA_S_DSC;
433 	memset(fis, 0, sizeof(fis));
434 	fis[0] = FIS_TYPE_REGD2H;
435 	fis[1] = 0;			/* No interrupt */
436 	fis[2] = p->tfd;		/* Status */
437 	fis[3] = 0;			/* No error */
438 	p->ci &= ~(1 << slot);
439 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
440 }
441 
442 static void
443 ahci_write_reset_fis_d2h(struct ahci_port *p)
444 {
445 	uint8_t fis[20];
446 
447 	memset(fis, 0, sizeof(fis));
448 	fis[0] = FIS_TYPE_REGD2H;
449 	fis[3] = 1;
450 	fis[4] = 1;
451 	if (p->atapi) {
452 		fis[5] = 0x14;
453 		fis[6] = 0xeb;
454 	}
455 	fis[12] = 1;
456 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
457 }
458 
459 static void
460 ahci_check_stopped(struct ahci_port *p)
461 {
462 	/*
463 	 * If we are no longer processing the command list and nothing
464 	 * is in-flight, clear the running bit, the current command
465 	 * slot, the command issue and active bits.
466 	 */
467 	if (!(p->cmd & AHCI_P_CMD_ST)) {
468 		if (p->pending == 0) {
469 			p->ccs = 0;
470 			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
471 			p->ci = 0;
472 			p->sact = 0;
473 			p->waitforclear = 0;
474 		}
475 	}
476 }
477 
478 static void
479 ahci_port_stop(struct ahci_port *p)
480 {
481 	struct ahci_ioreq *aior;
482 	uint8_t *cfis;
483 	int slot;
484 	int error;
485 
486 	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
487 
488 	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
489 		/*
490 		 * Try to cancel the outstanding blockif request.
491 		 */
492 		error = blockif_cancel(p->bctx, &aior->io_req);
493 		if (error != 0)
494 			continue;
495 
496 		slot = aior->slot;
497 		cfis = aior->cfis;
498 		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
499 		    cfis[2] == ATA_READ_FPDMA_QUEUED ||
500 		    cfis[2] == ATA_SEND_FPDMA_QUEUED)
501 			p->sact &= ~(1 << slot);	/* NCQ */
502 		else
503 			p->ci &= ~(1 << slot);
504 
505 		/*
506 		 * This command is now done.
507 		 */
508 		p->pending &= ~(1 << slot);
509 
510 		/*
511 		 * Delete the blockif request from the busy list
512 		 */
513 		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
514 
515 		/*
516 		 * Move the blockif request back to the free list
517 		 */
518 		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
519 	}
520 
521 	ahci_check_stopped(p);
522 }
523 
524 static void
525 ahci_port_reset(struct ahci_port *pr)
526 {
527 	pr->serr = 0;
528 	pr->sact = 0;
529 	pr->xfermode = ATA_UDMA6;
530 	pr->mult_sectors = 128;
531 
532 	if (!pr->bctx) {
533 		pr->ssts = ATA_SS_DET_NO_DEVICE;
534 		pr->sig = 0xFFFFFFFF;
535 		pr->tfd = 0x7F;
536 		return;
537 	}
538 	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
539 	if (pr->sctl & ATA_SC_SPD_MASK)
540 		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
541 	else
542 		pr->ssts |= ATA_SS_SPD_GEN3;
543 	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
544 	if (!pr->atapi) {
545 		pr->sig = PxSIG_ATA;
546 		pr->tfd |= ATA_S_READY;
547 	} else
548 		pr->sig = PxSIG_ATAPI;
549 	ahci_write_reset_fis_d2h(pr);
550 }
551 
552 static void
553 ahci_reset(struct pci_ahci_softc *sc)
554 {
555 	int i;
556 
557 	sc->ghc = AHCI_GHC_AE;
558 	sc->is = 0;
559 
560 	if (sc->lintr) {
561 		pci_lintr_deassert(sc->asc_pi);
562 		sc->lintr = 0;
563 	}
564 
565 	for (i = 0; i < sc->ports; i++) {
566 		sc->port[i].ie = 0;
567 		sc->port[i].is = 0;
568 		sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD);
569 		if (sc->port[i].bctx)
570 			sc->port[i].cmd |= AHCI_P_CMD_CPS;
571 		sc->port[i].sctl = 0;
572 		ahci_port_reset(&sc->port[i]);
573 	}
574 }
575 
576 static void
577 ata_string(uint8_t *dest, const char *src, int len)
578 {
579 	int i;
580 
581 	for (i = 0; i < len; i++) {
582 		if (*src)
583 			dest[i ^ 1] = *src++;
584 		else
585 			dest[i ^ 1] = ' ';
586 	}
587 }
588 
589 static void
590 atapi_string(uint8_t *dest, const char *src, int len)
591 {
592 	int i;
593 
594 	for (i = 0; i < len; i++) {
595 		if (*src)
596 			dest[i] = *src++;
597 		else
598 			dest[i] = ' ';
599 	}
600 }
601 
602 /*
603  * Build up the iovec based on the PRDT, 'done' and 'len'.
604  */
605 static void
606 ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
607     struct ahci_prdt_entry *prdt, uint16_t prdtl)
608 {
609 	struct blockif_req *breq = &aior->io_req;
610 	uint32_t dbcsz, extra, left, skip, todo;
611 	int i, j;
612 
613 	assert(aior->len >= aior->done);
614 
615 	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
616 	skip = aior->done;
617 	left = aior->len - aior->done;
618 	todo = 0;
619 	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
620 	    i++, prdt++) {
621 		dbcsz = (prdt->dbc & DBCMASK) + 1;
622 		/* Skip already done part of the PRDT */
623 		if (dbcsz <= skip) {
624 			skip -= dbcsz;
625 			continue;
626 		}
627 		dbcsz -= skip;
628 		if (dbcsz > left)
629 			dbcsz = left;
630 		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
631 		    prdt->dba + skip, dbcsz);
632 		breq->br_iov[j].iov_len = dbcsz;
633 		todo += dbcsz;
634 		left -= dbcsz;
635 		skip = 0;
636 		j++;
637 	}
638 
639 	/* If we got limited by IOV length, round I/O down to sector size. */
640 	if (j == BLOCKIF_IOV_MAX) {
641 		extra = todo % blockif_sectsz(p->bctx);
642 		todo -= extra;
643 		assert(todo > 0);
644 		while (extra > 0) {
645 			if (breq->br_iov[j - 1].iov_len > extra) {
646 				breq->br_iov[j - 1].iov_len -= extra;
647 				break;
648 			}
649 			extra -= breq->br_iov[j - 1].iov_len;
650 			j--;
651 		}
652 	}
653 
654 	breq->br_iovcnt = j;
655 	breq->br_resid = todo;
656 	aior->done += todo;
657 	aior->more = (aior->done < aior->len && i < prdtl);
658 }
659 
660 static void
661 ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
662 {
663 	struct ahci_ioreq *aior;
664 	struct blockif_req *breq;
665 	struct ahci_prdt_entry *prdt;
666 	struct ahci_cmd_hdr *hdr;
667 	uint64_t lba;
668 	uint32_t len;
669 	int err, first, ncq, readop;
670 
671 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
672 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
673 	ncq = 0;
674 	readop = 1;
675 	first = (done == 0);
676 
677 	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
678 	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
679 	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
680 	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
681 		readop = 0;
682 
683 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
684 	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
685 		lba = ((uint64_t)cfis[10] << 40) |
686 			((uint64_t)cfis[9] << 32) |
687 			((uint64_t)cfis[8] << 24) |
688 			((uint64_t)cfis[6] << 16) |
689 			((uint64_t)cfis[5] << 8) |
690 			cfis[4];
691 		len = cfis[11] << 8 | cfis[3];
692 		if (!len)
693 			len = 65536;
694 		ncq = 1;
695 	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
696 	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
697 	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
698 		lba = ((uint64_t)cfis[10] << 40) |
699 			((uint64_t)cfis[9] << 32) |
700 			((uint64_t)cfis[8] << 24) |
701 			((uint64_t)cfis[6] << 16) |
702 			((uint64_t)cfis[5] << 8) |
703 			cfis[4];
704 		len = cfis[13] << 8 | cfis[12];
705 		if (!len)
706 			len = 65536;
707 	} else {
708 		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
709 			(cfis[5] << 8) | cfis[4];
710 		len = cfis[12];
711 		if (!len)
712 			len = 256;
713 	}
714 	lba *= blockif_sectsz(p->bctx);
715 	len *= blockif_sectsz(p->bctx);
716 
717 	/* Pull request off free list */
718 	aior = STAILQ_FIRST(&p->iofhd);
719 	assert(aior != NULL);
720 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
721 
722 	aior->cfis = cfis;
723 	aior->slot = slot;
724 	aior->len = len;
725 	aior->done = done;
726 	breq = &aior->io_req;
727 	breq->br_offset = lba + done;
728 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
729 
730 	/* Mark this command in-flight. */
731 	p->pending |= 1 << slot;
732 
733 	/* Stuff request onto busy list. */
734 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
735 
736 	if (ncq && first)
737 		ahci_write_fis_d2h_ncq(p, slot);
738 
739 	if (readop)
740 		err = blockif_read(p->bctx, breq);
741 	else
742 		err = blockif_write(p->bctx, breq);
743 	assert(err == 0);
744 }
745 
746 static void
747 ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
748 {
749 	struct ahci_ioreq *aior;
750 	struct blockif_req *breq;
751 	int err;
752 
753 	/*
754 	 * Pull request off free list
755 	 */
756 	aior = STAILQ_FIRST(&p->iofhd);
757 	assert(aior != NULL);
758 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
759 	aior->cfis = cfis;
760 	aior->slot = slot;
761 	aior->len = 0;
762 	aior->done = 0;
763 	aior->more = 0;
764 	breq = &aior->io_req;
765 
766 	/*
767 	 * Mark this command in-flight.
768 	 */
769 	p->pending |= 1 << slot;
770 
771 	/*
772 	 * Stuff request onto busy list
773 	 */
774 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
775 
776 	err = blockif_flush(p->bctx, breq);
777 	assert(err == 0);
778 }
779 
780 static inline void
781 read_prdt(struct ahci_port *p, int slot, uint8_t *cfis, void *buf,
782     unsigned int size)
783 {
784 	struct ahci_cmd_hdr *hdr;
785 	struct ahci_prdt_entry *prdt;
786 	uint8_t *to;
787 	unsigned int len;
788 	int i;
789 
790 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
791 	len = size;
792 	to = buf;
793 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
794 	for (i = 0; i < hdr->prdtl && len; i++) {
795 		uint8_t *ptr;
796 		uint32_t dbcsz;
797 		unsigned int sublen;
798 
799 		dbcsz = (prdt->dbc & DBCMASK) + 1;
800 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
801 		sublen = MIN(len, dbcsz);
802 		memcpy(to, ptr, sublen);
803 		len -= sublen;
804 		to += sublen;
805 		prdt++;
806 	}
807 }
808 
809 static void
810 ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
811 {
812 	struct ahci_ioreq *aior;
813 	struct blockif_req *breq;
814 	uint8_t *entry;
815 	uint64_t elba;
816 	uint32_t len, elen;
817 	int err, first, ncq;
818 	uint8_t buf[512];
819 
820 	first = (done == 0);
821 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
822 		len = (uint16_t)cfis[13] << 8 | cfis[12];
823 		len *= 512;
824 		ncq = 0;
825 	} else { /* ATA_SEND_FPDMA_QUEUED */
826 		len = (uint16_t)cfis[11] << 8 | cfis[3];
827 		len *= 512;
828 		ncq = 1;
829 	}
830 	read_prdt(p, slot, cfis, buf, sizeof(buf));
831 
832 next:
833 	entry = &buf[done];
834 	elba = ((uint64_t)entry[5] << 40) |
835 		((uint64_t)entry[4] << 32) |
836 		((uint64_t)entry[3] << 24) |
837 		((uint64_t)entry[2] << 16) |
838 		((uint64_t)entry[1] << 8) |
839 		entry[0];
840 	elen = (uint16_t)entry[7] << 8 | entry[6];
841 	done += 8;
842 	if (elen == 0) {
843 		if (done >= len) {
844 			if (ncq) {
845 				if (first)
846 					ahci_write_fis_d2h_ncq(p, slot);
847 				ahci_write_fis_sdb(p, slot, cfis,
848 				    ATA_S_READY | ATA_S_DSC);
849 			} else {
850 				ahci_write_fis_d2h(p, slot, cfis,
851 				    ATA_S_READY | ATA_S_DSC);
852 			}
853 			p->pending &= ~(1 << slot);
854 			ahci_check_stopped(p);
855 			if (!first)
856 				ahci_handle_port(p);
857 			return;
858 		}
859 		goto next;
860 	}
861 
862 	/*
863 	 * Pull request off free list
864 	 */
865 	aior = STAILQ_FIRST(&p->iofhd);
866 	assert(aior != NULL);
867 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
868 	aior->cfis = cfis;
869 	aior->slot = slot;
870 	aior->len = len;
871 	aior->done = done;
872 	aior->more = (len != done);
873 
874 	breq = &aior->io_req;
875 	breq->br_offset = elba * blockif_sectsz(p->bctx);
876 	breq->br_resid = elen * blockif_sectsz(p->bctx);
877 
878 	/*
879 	 * Mark this command in-flight.
880 	 */
881 	p->pending |= 1 << slot;
882 
883 	/*
884 	 * Stuff request onto busy list
885 	 */
886 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
887 
888 	if (ncq && first)
889 		ahci_write_fis_d2h_ncq(p, slot);
890 
891 	err = blockif_delete(p->bctx, breq);
892 	assert(err == 0);
893 }
894 
895 static inline void
896 write_prdt(struct ahci_port *p, int slot, uint8_t *cfis, void *buf,
897     unsigned int size)
898 {
899 	struct ahci_cmd_hdr *hdr;
900 	struct ahci_prdt_entry *prdt;
901 	uint8_t *from;
902 	unsigned int len;
903 	int i;
904 
905 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
906 	len = size;
907 	from = buf;
908 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
909 	for (i = 0; i < hdr->prdtl && len; i++) {
910 		uint8_t *ptr;
911 		uint32_t dbcsz;
912 		int sublen;
913 
914 		dbcsz = (prdt->dbc & DBCMASK) + 1;
915 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
916 		sublen = MIN(len, dbcsz);
917 		memcpy(ptr, from, sublen);
918 		len -= sublen;
919 		from += sublen;
920 		prdt++;
921 	}
922 	hdr->prdbc = size - len;
923 }
924 
925 static void
926 ahci_checksum(uint8_t *buf, int size)
927 {
928 	int i;
929 	uint8_t sum = 0;
930 
931 	for (i = 0; i < size - 1; i++)
932 		sum += buf[i];
933 	buf[size - 1] = 0x100 - sum;
934 }
935 
936 static void
937 ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
938 {
939 	struct ahci_cmd_hdr *hdr;
940 	uint32_t buf[128];
941 	uint8_t *buf8 = (uint8_t *)buf;
942 	uint16_t *buf16 = (uint16_t *)buf;
943 
944 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
945 	if (p->atapi || hdr->prdtl == 0 || cfis[5] != 0 ||
946 	    cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
947 		ahci_write_fis_d2h(p, slot, cfis,
948 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
949 		return;
950 	}
951 
952 	memset(buf, 0, sizeof(buf));
953 	if (cfis[4] == 0x00) {	/* Log directory */
954 		buf16[0x00] = 1; /* Version -- 1 */
955 		buf16[0x10] = 1; /* NCQ Command Error Log -- 1 page */
956 		buf16[0x13] = 1; /* SATA NCQ Send and Receive Log -- 1 page */
957 	} else if (cfis[4] == 0x10) {	/* NCQ Command Error Log */
958 		memcpy(buf8, p->err_cfis, sizeof(p->err_cfis));
959 		ahci_checksum(buf8, sizeof(buf));
960 	} else if (cfis[4] == 0x13) {	/* SATA NCQ Send and Receive Log */
961 		if (blockif_candelete(p->bctx) && !blockif_is_ro(p->bctx)) {
962 			buf[0x00] = 1;	/* SFQ DSM supported */
963 			buf[0x01] = 1;	/* SFQ DSM TRIM supported */
964 		}
965 	} else {
966 		ahci_write_fis_d2h(p, slot, cfis,
967 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
968 		return;
969 	}
970 
971 	if (cfis[2] == ATA_READ_LOG_EXT)
972 		ahci_write_fis_piosetup(p);
973 	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
974 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
975 }
976 
977 static void
978 handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
979 {
980 	struct ahci_cmd_hdr *hdr;
981 
982 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
983 	if (p->atapi || hdr->prdtl == 0) {
984 		ahci_write_fis_d2h(p, slot, cfis,
985 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
986 	} else {
987 		ahci_write_fis_piosetup(p);
988 		write_prdt(p, slot, cfis, (void*)&p->ata_ident, sizeof(struct ata_params));
989 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
990 	}
991 }
992 
993 static void
994 ata_identify_init(struct ahci_port* p, int atapi)
995 {
996 	struct ata_params* ata_ident = &p->ata_ident;
997 
998 	if (atapi) {
999 		ata_ident->config = ATA_PROTO_ATAPI | ATA_ATAPI_TYPE_CDROM |
1000 		    ATA_ATAPI_REMOVABLE | ATA_DRQ_FAST;
1001 		ata_ident->capabilities1 = ATA_SUPPORT_LBA |
1002 			ATA_SUPPORT_DMA;
1003 		ata_ident->capabilities2 = (1 << 14 | 1);
1004 		ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88;
1005 		ata_ident->obsolete62 = 0x3f;
1006 		ata_ident->mwdmamodes = 7;
1007 		if (p->xfermode & ATA_WDMA0)
1008 			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1009 		ata_ident->apiomodes = 3;
1010 		ata_ident->mwdmamin = 0x0078;
1011 		ata_ident->mwdmarec = 0x0078;
1012 		ata_ident->pioblind = 0x0078;
1013 		ata_ident->pioiordy = 0x0078;
1014 		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
1015 		ata_ident->satacapabilities2 = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
1016 		ata_ident->satasupport = ATA_SUPPORT_NCQ_STREAM;
1017 		ata_ident->version_major = 0x3f0;
1018 		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1019 			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1020 		ata_ident->support.command2 = (1 << 14);
1021 		ata_ident->support.extension = (1 << 14);
1022 		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1023 			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1024 		ata_ident->enabled.extension = (1 << 14);
1025 		ata_ident->udmamodes = 0x7f;
1026 		if (p->xfermode & ATA_UDMA0)
1027 			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1028 		ata_ident->transport_major = 0x1020;
1029 		ata_ident->integrity = 0x00a5;
1030 	} else {
1031 		uint64_t sectors;
1032 		int sectsz, psectsz, psectoff, candelete, ro;
1033 		uint16_t cyl;
1034 		uint8_t sech, heads;
1035 
1036 		ro = blockif_is_ro(p->bctx);
1037 		candelete = blockif_candelete(p->bctx);
1038 		sectsz = blockif_sectsz(p->bctx);
1039 		sectors = blockif_size(p->bctx) / sectsz;
1040 		blockif_chs(p->bctx, &cyl, &heads, &sech);
1041 		blockif_psectsz(p->bctx, &psectsz, &psectoff);
1042 		ata_ident->config = ATA_DRQ_FAST;
1043 		ata_ident->cylinders = cyl;
1044 		ata_ident->heads = heads;
1045 		ata_ident->sectors = sech;
1046 
1047 		ata_ident->sectors_intr = (0x8000 | 128);
1048 		ata_ident->tcg = 0;
1049 
1050 		ata_ident->capabilities1 = ATA_SUPPORT_DMA |
1051 			ATA_SUPPORT_LBA | ATA_SUPPORT_IORDY;
1052 		ata_ident->capabilities2 = (1 << 14);
1053 		ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88;
1054 		if (p->mult_sectors)
1055 			ata_ident->multi = (ATA_MULTI_VALID | p->mult_sectors);
1056 		if (sectors <= 0x0fffffff) {
1057 			ata_ident->lba_size_1 = sectors;
1058 			ata_ident->lba_size_2 = (sectors >> 16);
1059 		} else {
1060 			ata_ident->lba_size_1 = 0xffff;
1061 			ata_ident->lba_size_2 = 0x0fff;
1062 		}
1063 		ata_ident->mwdmamodes = 0x7;
1064 		if (p->xfermode & ATA_WDMA0)
1065 			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1066 		ata_ident->apiomodes = 0x3;
1067 		ata_ident->mwdmamin = 0x0078;
1068 		ata_ident->mwdmarec = 0x0078;
1069 		ata_ident->pioblind = 0x0078;
1070 		ata_ident->pioiordy = 0x0078;
1071 		ata_ident->support3 = 0;
1072 		ata_ident->queue = 31;
1073 		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
1074 			ATA_SUPPORT_NCQ);
1075 		ata_ident->satacapabilities2 = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
1076 			(p->ssts & ATA_SS_SPD_MASK) >> 3);
1077 		ata_ident->version_major = 0x3f0;
1078 		ata_ident->version_minor = 0x28;
1079 		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1080 			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1081 		ata_ident->support.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1082 			ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
1083 		ata_ident->support.extension = (1 << 14);
1084 		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1085 			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1086 		ata_ident->enabled.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1087 			ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
1088 		ata_ident->enabled.extension = (1 << 14);
1089 		ata_ident->udmamodes = 0x7f;
1090 		if (p->xfermode & ATA_UDMA0)
1091 			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1092 		ata_ident->lba_size48_1 = sectors;
1093 		ata_ident->lba_size48_2 = (sectors >> 16);
1094 		ata_ident->lba_size48_3 = (sectors >> 32);
1095 		ata_ident->lba_size48_4 = (sectors >> 48);
1096 
1097 		if (candelete && !ro) {
1098 			ata_ident->support3 |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
1099 			ata_ident->max_dsm_blocks = 1;
1100 			ata_ident->support_dsm = ATA_SUPPORT_DSM_TRIM;
1101 		}
1102 		ata_ident->pss = ATA_PSS_VALID_VALUE;
1103 		ata_ident->lsalign = 0x4000;
1104 		if (psectsz > sectsz) {
1105 			ata_ident->pss |= ATA_PSS_MULTLS;
1106 			ata_ident->pss |= ffsl(psectsz / sectsz) - 1;
1107 			ata_ident->lsalign |= (psectoff / sectsz);
1108 		}
1109 		if (sectsz > 512) {
1110 			ata_ident->pss |= ATA_PSS_LSSABOVE512;
1111 			ata_ident->lss_1 = sectsz / 2;
1112 			ata_ident->lss_2 = ((sectsz / 2) >> 16);
1113 		}
1114 		ata_ident->support2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1115 		ata_ident->enabled2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1116 		ata_ident->transport_major = 0x1020;
1117 		ata_ident->integrity = 0x00a5;
1118 	}
1119 	ahci_checksum((uint8_t*)ata_ident, sizeof(struct ata_params));
1120 }
1121 
1122 static void
1123 handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
1124 {
1125 	if (!p->atapi) {
1126 		ahci_write_fis_d2h(p, slot, cfis,
1127 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1128 	} else {
1129 		ahci_write_fis_piosetup(p);
1130 		write_prdt(p, slot, cfis, (void *)&p->ata_ident, sizeof(struct ata_params));
1131 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1132 	}
1133 }
1134 
1135 static void
1136 atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1137 {
1138 	uint8_t buf[36];
1139 	uint8_t *acmd;
1140 	unsigned int len;
1141 	uint32_t tfd;
1142 
1143 	acmd = cfis + 0x40;
1144 
1145 	if (acmd[1] & 1) {		/* VPD */
1146 		if (acmd[2] == 0) {	/* Supported VPD pages */
1147 			buf[0] = 0x05;
1148 			buf[1] = 0;
1149 			buf[2] = 0;
1150 			buf[3] = 1;
1151 			buf[4] = 0;
1152 			len = 4 + buf[3];
1153 		} else {
1154 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1155 			p->asc = 0x24;
1156 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1157 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1158 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1159 			return;
1160 		}
1161 	} else {
1162 		buf[0] = 0x05;
1163 		buf[1] = 0x80;
1164 		buf[2] = 0x00;
1165 		buf[3] = 0x21;
1166 		buf[4] = 31;
1167 		buf[5] = 0;
1168 		buf[6] = 0;
1169 		buf[7] = 0;
1170 		atapi_string(buf + 8, "BHYVE", 8);
1171 		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1172 		atapi_string(buf + 32, "001", 4);
1173 		len = sizeof(buf);
1174 	}
1175 
1176 	if (len > acmd[4])
1177 		len = acmd[4];
1178 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1179 	write_prdt(p, slot, cfis, buf, len);
1180 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1181 }
1182 
1183 static void
1184 atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1185 {
1186 	uint8_t buf[8];
1187 	uint64_t sectors;
1188 
1189 	sectors = blockif_size(p->bctx) / 2048;
1190 	be32enc(buf, sectors - 1);
1191 	be32enc(buf + 4, 2048);
1192 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1193 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1194 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1195 }
1196 
1197 static void
1198 atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1199 {
1200 	uint8_t *acmd;
1201 	uint8_t format;
1202 	unsigned int len;
1203 
1204 	acmd = cfis + 0x40;
1205 
1206 	len = be16dec(acmd + 7);
1207 	format = acmd[9] >> 6;
1208 	switch (format) {
1209 	case 0:
1210 	{
1211 		size_t size;
1212 		int msf;
1213 		uint64_t sectors;
1214 		uint8_t start_track, buf[20], *bp;
1215 
1216 		msf = (acmd[1] >> 1) & 1;
1217 		start_track = acmd[6];
1218 		if (start_track > 1 && start_track != 0xaa) {
1219 			uint32_t tfd;
1220 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1221 			p->asc = 0x24;
1222 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1223 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1224 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1225 			return;
1226 		}
1227 		bp = buf + 2;
1228 		*bp++ = 1;
1229 		*bp++ = 1;
1230 		if (start_track <= 1) {
1231 			*bp++ = 0;
1232 			*bp++ = 0x14;
1233 			*bp++ = 1;
1234 			*bp++ = 0;
1235 			if (msf) {
1236 				*bp++ = 0;
1237 				lba_to_msf(bp, 0);
1238 				bp += 3;
1239 			} else {
1240 				*bp++ = 0;
1241 				*bp++ = 0;
1242 				*bp++ = 0;
1243 				*bp++ = 0;
1244 			}
1245 		}
1246 		*bp++ = 0;
1247 		*bp++ = 0x14;
1248 		*bp++ = 0xaa;
1249 		*bp++ = 0;
1250 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1251 		sectors >>= 2;
1252 		if (msf) {
1253 			*bp++ = 0;
1254 			lba_to_msf(bp, sectors);
1255 			bp += 3;
1256 		} else {
1257 			be32enc(bp, sectors);
1258 			bp += 4;
1259 		}
1260 		size = bp - buf;
1261 		be16enc(buf, size - 2);
1262 		if (len > size)
1263 			len = size;
1264 		write_prdt(p, slot, cfis, buf, len);
1265 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1266 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1267 		break;
1268 	}
1269 	case 1:
1270 	{
1271 		uint8_t buf[12];
1272 
1273 		memset(buf, 0, sizeof(buf));
1274 		buf[1] = 0xa;
1275 		buf[2] = 0x1;
1276 		buf[3] = 0x1;
1277 		if (len > sizeof(buf))
1278 			len = sizeof(buf);
1279 		write_prdt(p, slot, cfis, buf, len);
1280 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1281 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1282 		break;
1283 	}
1284 	case 2:
1285 	{
1286 		size_t size;
1287 		int msf;
1288 		uint64_t sectors;
1289 		uint8_t *bp, buf[50];
1290 
1291 		msf = (acmd[1] >> 1) & 1;
1292 		bp = buf + 2;
1293 		*bp++ = 1;
1294 		*bp++ = 1;
1295 
1296 		*bp++ = 1;
1297 		*bp++ = 0x14;
1298 		*bp++ = 0;
1299 		*bp++ = 0xa0;
1300 		*bp++ = 0;
1301 		*bp++ = 0;
1302 		*bp++ = 0;
1303 		*bp++ = 0;
1304 		*bp++ = 1;
1305 		*bp++ = 0;
1306 		*bp++ = 0;
1307 
1308 		*bp++ = 1;
1309 		*bp++ = 0x14;
1310 		*bp++ = 0;
1311 		*bp++ = 0xa1;
1312 		*bp++ = 0;
1313 		*bp++ = 0;
1314 		*bp++ = 0;
1315 		*bp++ = 0;
1316 		*bp++ = 1;
1317 		*bp++ = 0;
1318 		*bp++ = 0;
1319 
1320 		*bp++ = 1;
1321 		*bp++ = 0x14;
1322 		*bp++ = 0;
1323 		*bp++ = 0xa2;
1324 		*bp++ = 0;
1325 		*bp++ = 0;
1326 		*bp++ = 0;
1327 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1328 		sectors >>= 2;
1329 		if (msf) {
1330 			*bp++ = 0;
1331 			lba_to_msf(bp, sectors);
1332 			bp += 3;
1333 		} else {
1334 			be32enc(bp, sectors);
1335 			bp += 4;
1336 		}
1337 
1338 		*bp++ = 1;
1339 		*bp++ = 0x14;
1340 		*bp++ = 0;
1341 		*bp++ = 1;
1342 		*bp++ = 0;
1343 		*bp++ = 0;
1344 		*bp++ = 0;
1345 		if (msf) {
1346 			*bp++ = 0;
1347 			lba_to_msf(bp, 0);
1348 			bp += 3;
1349 		} else {
1350 			*bp++ = 0;
1351 			*bp++ = 0;
1352 			*bp++ = 0;
1353 			*bp++ = 0;
1354 		}
1355 
1356 		size = bp - buf;
1357 		be16enc(buf, size - 2);
1358 		if (len > size)
1359 			len = size;
1360 		write_prdt(p, slot, cfis, buf, len);
1361 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1362 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1363 		break;
1364 	}
1365 	default:
1366 	{
1367 		uint32_t tfd;
1368 
1369 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1370 		p->asc = 0x24;
1371 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1372 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1373 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1374 		break;
1375 	}
1376 	}
1377 }
1378 
1379 static void
1380 atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1381 {
1382 	uint8_t buf[16];
1383 
1384 	memset(buf, 0, sizeof(buf));
1385 	buf[3] = 8;
1386 
1387 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1388 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1389 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1390 }
1391 
1392 static void
1393 atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1394 {
1395 	struct ahci_ioreq *aior;
1396 	struct ahci_cmd_hdr *hdr;
1397 	struct ahci_prdt_entry *prdt;
1398 	struct blockif_req *breq;
1399 	uint8_t *acmd;
1400 	uint64_t lba;
1401 	uint32_t len;
1402 	int err;
1403 
1404 	acmd = cfis + 0x40;
1405 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1406 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1407 
1408 	lba = be32dec(acmd + 2);
1409 	if (acmd[0] == READ_10)
1410 		len = be16dec(acmd + 7);
1411 	else
1412 		len = be32dec(acmd + 6);
1413 	if (len == 0) {
1414 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1415 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1416 	}
1417 	lba *= 2048;
1418 	len *= 2048;
1419 
1420 	/*
1421 	 * Pull request off free list
1422 	 */
1423 	aior = STAILQ_FIRST(&p->iofhd);
1424 	assert(aior != NULL);
1425 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1426 	aior->cfis = cfis;
1427 	aior->slot = slot;
1428 	aior->len = len;
1429 	aior->done = done;
1430 	breq = &aior->io_req;
1431 	breq->br_offset = lba + done;
1432 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1433 
1434 	/* Mark this command in-flight. */
1435 	p->pending |= 1 << slot;
1436 
1437 	/* Stuff request onto busy list. */
1438 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1439 
1440 	err = blockif_read(p->bctx, breq);
1441 	assert(err == 0);
1442 }
1443 
1444 static void
1445 atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1446 {
1447 	uint8_t buf[64];
1448 	uint8_t *acmd;
1449 	unsigned int len;
1450 
1451 	acmd = cfis + 0x40;
1452 	len = acmd[4];
1453 	if (len > sizeof(buf))
1454 		len = sizeof(buf);
1455 	memset(buf, 0, len);
1456 	buf[0] = 0x70 | (1 << 7);
1457 	buf[2] = p->sense_key;
1458 	buf[7] = 10;
1459 	buf[12] = p->asc;
1460 	write_prdt(p, slot, cfis, buf, len);
1461 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1462 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1463 }
1464 
1465 static void
1466 atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1467 {
1468 	uint8_t *acmd = cfis + 0x40;
1469 	uint32_t tfd;
1470 
1471 	switch (acmd[4] & 3) {
1472 	case 0:
1473 	case 1:
1474 	case 3:
1475 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1476 		tfd = ATA_S_READY | ATA_S_DSC;
1477 		break;
1478 	case 2:
1479 		/* TODO eject media */
1480 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1481 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1482 		p->asc = 0x53;
1483 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1484 		break;
1485 	}
1486 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1487 }
1488 
1489 static void
1490 atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1491 {
1492 	uint8_t *acmd;
1493 	uint32_t tfd = 0;
1494 	uint8_t pc, code;
1495 	unsigned int len;
1496 
1497 	acmd = cfis + 0x40;
1498 	len = be16dec(acmd + 7);
1499 	pc = acmd[2] >> 6;
1500 	code = acmd[2] & 0x3f;
1501 
1502 	switch (pc) {
1503 	case 0:
1504 		switch (code) {
1505 		case MODEPAGE_RW_ERROR_RECOVERY:
1506 		{
1507 			uint8_t buf[16];
1508 
1509 			if (len > sizeof(buf))
1510 				len = sizeof(buf);
1511 
1512 			memset(buf, 0, sizeof(buf));
1513 			be16enc(buf, 16 - 2);
1514 			buf[2] = 0x70;
1515 			buf[8] = 0x01;
1516 			buf[9] = 16 - 10;
1517 			buf[11] = 0x05;
1518 			write_prdt(p, slot, cfis, buf, len);
1519 			tfd = ATA_S_READY | ATA_S_DSC;
1520 			break;
1521 		}
1522 		case MODEPAGE_CD_CAPABILITIES:
1523 		{
1524 			uint8_t buf[30];
1525 
1526 			if (len > sizeof(buf))
1527 				len = sizeof(buf);
1528 
1529 			memset(buf, 0, sizeof(buf));
1530 			be16enc(buf, 30 - 2);
1531 			buf[2] = 0x70;
1532 			buf[8] = 0x2A;
1533 			buf[9] = 30 - 10;
1534 			buf[10] = 0x08;
1535 			buf[12] = 0x71;
1536 			be16enc(&buf[18], 2);
1537 			be16enc(&buf[20], 512);
1538 			write_prdt(p, slot, cfis, buf, len);
1539 			tfd = ATA_S_READY | ATA_S_DSC;
1540 			break;
1541 		}
1542 		default:
1543 			goto error;
1544 			break;
1545 		}
1546 		break;
1547 	case 3:
1548 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1549 		p->asc = 0x39;
1550 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1551 		break;
1552 error:
1553 	case 1:
1554 	case 2:
1555 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1556 		p->asc = 0x24;
1557 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1558 		break;
1559 	}
1560 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1561 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1562 }
1563 
1564 static void
1565 atapi_get_event_status_notification(struct ahci_port *p, int slot,
1566     uint8_t *cfis)
1567 {
1568 	uint8_t *acmd;
1569 	uint32_t tfd;
1570 
1571 	acmd = cfis + 0x40;
1572 
1573 	/* we don't support asynchronous operation */
1574 	if (!(acmd[1] & 1)) {
1575 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1576 		p->asc = 0x24;
1577 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1578 	} else {
1579 		uint8_t buf[8];
1580 		unsigned int len;
1581 
1582 		len = be16dec(acmd + 7);
1583 		if (len > sizeof(buf))
1584 			len = sizeof(buf);
1585 
1586 		memset(buf, 0, sizeof(buf));
1587 		be16enc(buf, 8 - 2);
1588 		buf[2] = 0x04;
1589 		buf[3] = 0x10;
1590 		buf[5] = 0x02;
1591 		write_prdt(p, slot, cfis, buf, len);
1592 		tfd = ATA_S_READY | ATA_S_DSC;
1593 	}
1594 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1595 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1596 }
1597 
1598 static void
1599 handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1600 {
1601 	uint8_t *acmd;
1602 
1603 	acmd = cfis + 0x40;
1604 
1605 #ifdef AHCI_DEBUG
1606 	{
1607 		int i;
1608 		DPRINTF("ACMD:");
1609 		for (i = 0; i < 16; i++)
1610 			DPRINTF("%02x ", acmd[i]);
1611 		DPRINTF("");
1612 	}
1613 #endif
1614 
1615 	switch (acmd[0]) {
1616 	case TEST_UNIT_READY:
1617 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1618 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1619 		break;
1620 	case INQUIRY:
1621 		atapi_inquiry(p, slot, cfis);
1622 		break;
1623 	case READ_CAPACITY:
1624 		atapi_read_capacity(p, slot, cfis);
1625 		break;
1626 	case PREVENT_ALLOW:
1627 		/* TODO */
1628 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1629 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1630 		break;
1631 	case READ_TOC:
1632 		atapi_read_toc(p, slot, cfis);
1633 		break;
1634 	case REPORT_LUNS:
1635 		atapi_report_luns(p, slot, cfis);
1636 		break;
1637 	case READ_10:
1638 	case READ_12:
1639 		atapi_read(p, slot, cfis, 0);
1640 		break;
1641 	case REQUEST_SENSE:
1642 		atapi_request_sense(p, slot, cfis);
1643 		break;
1644 	case START_STOP_UNIT:
1645 		atapi_start_stop_unit(p, slot, cfis);
1646 		break;
1647 	case MODE_SENSE_10:
1648 		atapi_mode_sense(p, slot, cfis);
1649 		break;
1650 	case GET_EVENT_STATUS_NOTIFICATION:
1651 		atapi_get_event_status_notification(p, slot, cfis);
1652 		break;
1653 	default:
1654 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1655 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1656 		p->asc = 0x20;
1657 		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1658 				ATA_S_READY | ATA_S_ERROR);
1659 		break;
1660 	}
1661 }
1662 
1663 static void
1664 ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1665 {
1666 
1667 	p->tfd |= ATA_S_BUSY;
1668 	switch (cfis[2]) {
1669 	case ATA_ATA_IDENTIFY:
1670 		handle_identify(p, slot, cfis);
1671 		break;
1672 	case ATA_SETFEATURES:
1673 	{
1674 		switch (cfis[3]) {
1675 		case ATA_SF_ENAB_SATA_SF:
1676 			switch (cfis[12]) {
1677 			case ATA_SATA_SF_AN:
1678 				p->tfd = ATA_S_DSC | ATA_S_READY;
1679 				break;
1680 			default:
1681 				p->tfd = ATA_S_ERROR | ATA_S_READY;
1682 				p->tfd |= (ATA_ERROR_ABORT << 8);
1683 				break;
1684 			}
1685 			break;
1686 		case ATA_SF_ENAB_WCACHE:
1687 		case ATA_SF_DIS_WCACHE:
1688 		case ATA_SF_ENAB_RCACHE:
1689 		case ATA_SF_DIS_RCACHE:
1690 			p->tfd = ATA_S_DSC | ATA_S_READY;
1691 			break;
1692 		case ATA_SF_SETXFER:
1693 		{
1694 			switch (cfis[12] & 0xf8) {
1695 			case ATA_PIO:
1696 			case ATA_PIO0:
1697 				break;
1698 			case ATA_WDMA0:
1699 			case ATA_UDMA0:
1700 				p->xfermode = (cfis[12] & 0x7);
1701 				break;
1702 			}
1703 			p->tfd = ATA_S_DSC | ATA_S_READY;
1704 			break;
1705 		}
1706 		default:
1707 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1708 			p->tfd |= (ATA_ERROR_ABORT << 8);
1709 			break;
1710 		}
1711 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1712 		break;
1713 	}
1714 	case ATA_SET_MULTI:
1715 		if (cfis[12] != 0 &&
1716 			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1717 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1718 			p->tfd |= (ATA_ERROR_ABORT << 8);
1719 		} else {
1720 			p->mult_sectors = cfis[12];
1721 			p->tfd = ATA_S_DSC | ATA_S_READY;
1722 		}
1723 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1724 		break;
1725 	case ATA_READ:
1726 	case ATA_WRITE:
1727 	case ATA_READ48:
1728 	case ATA_WRITE48:
1729 	case ATA_READ_MUL:
1730 	case ATA_WRITE_MUL:
1731 	case ATA_READ_MUL48:
1732 	case ATA_WRITE_MUL48:
1733 	case ATA_READ_DMA:
1734 	case ATA_WRITE_DMA:
1735 	case ATA_READ_DMA48:
1736 	case ATA_WRITE_DMA48:
1737 	case ATA_READ_FPDMA_QUEUED:
1738 	case ATA_WRITE_FPDMA_QUEUED:
1739 		ahci_handle_rw(p, slot, cfis, 0);
1740 		break;
1741 	case ATA_FLUSHCACHE:
1742 	case ATA_FLUSHCACHE48:
1743 		ahci_handle_flush(p, slot, cfis);
1744 		break;
1745 	case ATA_DATA_SET_MANAGEMENT:
1746 		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1747 		    cfis[13] == 0 && cfis[12] == 1) {
1748 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1749 			break;
1750 		}
1751 		ahci_write_fis_d2h(p, slot, cfis,
1752 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1753 		break;
1754 	case ATA_SEND_FPDMA_QUEUED:
1755 		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1756 		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1757 		    cfis[11] == 0 && cfis[3] == 1) {
1758 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1759 			break;
1760 		}
1761 		ahci_write_fis_d2h(p, slot, cfis,
1762 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1763 		break;
1764 	case ATA_READ_LOG_EXT:
1765 	case ATA_READ_LOG_DMA_EXT:
1766 		ahci_handle_read_log(p, slot, cfis);
1767 		break;
1768 	case ATA_SECURITY_FREEZE_LOCK:
1769 	case ATA_SMART_CMD:
1770 	case ATA_NOP:
1771 		ahci_write_fis_d2h(p, slot, cfis,
1772 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1773 		break;
1774 	case ATA_CHECK_POWER_MODE:
1775 		cfis[12] = 0xff;	/* always on */
1776 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1777 		break;
1778 	case ATA_STANDBY_CMD:
1779 	case ATA_STANDBY_IMMEDIATE:
1780 	case ATA_IDLE_CMD:
1781 	case ATA_IDLE_IMMEDIATE:
1782 	case ATA_SLEEP:
1783 	case ATA_READ_VERIFY:
1784 	case ATA_READ_VERIFY48:
1785 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1786 		break;
1787 	case ATA_ATAPI_IDENTIFY:
1788 		handle_atapi_identify(p, slot, cfis);
1789 		break;
1790 	case ATA_PACKET_CMD:
1791 		if (!p->atapi) {
1792 			ahci_write_fis_d2h(p, slot, cfis,
1793 			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1794 		} else
1795 			handle_packet_cmd(p, slot, cfis);
1796 		break;
1797 	default:
1798 		EPRINTLN("Unsupported cmd:%02x", cfis[2]);
1799 		ahci_write_fis_d2h(p, slot, cfis,
1800 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1801 		break;
1802 	}
1803 }
1804 
1805 static void
1806 ahci_handle_slot(struct ahci_port *p, int slot)
1807 {
1808 	struct ahci_cmd_hdr *hdr;
1809 #ifdef AHCI_DEBUG
1810 	struct ahci_prdt_entry *prdt;
1811 #endif
1812 	struct pci_ahci_softc *sc;
1813 	uint8_t *cfis;
1814 #ifdef AHCI_DEBUG
1815 	int cfl, i;
1816 #endif
1817 
1818 	sc = p->pr_sc;
1819 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1820 #ifdef AHCI_DEBUG
1821 	cfl = (hdr->flags & 0x1f) * 4;
1822 #endif
1823 	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1824 			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1825 #ifdef AHCI_DEBUG
1826 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1827 
1828 	DPRINTF("cfis:");
1829 	for (i = 0; i < cfl; i++) {
1830 		if (i % 10 == 0)
1831 			DPRINTF("");
1832 		DPRINTF("%02x ", cfis[i]);
1833 	}
1834 	DPRINTF("");
1835 
1836 	for (i = 0; i < hdr->prdtl; i++) {
1837 		DPRINTF("%d@%08"PRIx64"", prdt->dbc & 0x3fffff, prdt->dba);
1838 		prdt++;
1839 	}
1840 #endif
1841 
1842 	if (cfis[0] != FIS_TYPE_REGH2D) {
1843 		EPRINTLN("Not a H2D FIS:%02x", cfis[0]);
1844 		return;
1845 	}
1846 
1847 	if (cfis[1] & 0x80) {
1848 		ahci_handle_cmd(p, slot, cfis);
1849 	} else {
1850 		if (cfis[15] & (1 << 2))
1851 			p->reset = 1;
1852 		else if (p->reset) {
1853 			p->reset = 0;
1854 			ahci_port_reset(p);
1855 		}
1856 		p->ci &= ~(1 << slot);
1857 	}
1858 }
1859 
1860 static void
1861 ahci_handle_port(struct ahci_port *p)
1862 {
1863 
1864 	if (!(p->cmd & AHCI_P_CMD_ST))
1865 		return;
1866 
1867 	/*
1868 	 * Search for any new commands to issue ignoring those that
1869 	 * are already in-flight.  Stop if device is busy or in error.
1870 	 */
1871 	for (; (p->ci & ~p->pending) != 0; p->ccs = ((p->ccs + 1) & 31)) {
1872 		if ((p->tfd & (ATA_S_BUSY | ATA_S_DRQ)) != 0)
1873 			break;
1874 		if (p->waitforclear)
1875 			break;
1876 		if ((p->ci & ~p->pending & (1 << p->ccs)) != 0) {
1877 			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1878 			p->cmd |= p->ccs << AHCI_P_CMD_CCS_SHIFT;
1879 			ahci_handle_slot(p, p->ccs);
1880 		}
1881 	}
1882 }
1883 
1884 /*
1885  * blockif callback routine - this runs in the context of the blockif
1886  * i/o thread, so the mutex needs to be acquired.
1887  */
1888 static void
1889 ata_ioreq_cb(struct blockif_req *br, int err)
1890 {
1891 	struct ahci_cmd_hdr *hdr;
1892 	struct ahci_ioreq *aior;
1893 	struct ahci_port *p;
1894 	struct pci_ahci_softc *sc;
1895 	uint32_t tfd;
1896 	uint8_t *cfis;
1897 	int slot, ncq, dsm;
1898 
1899 	DPRINTF("%s %d", __func__, err);
1900 
1901 	ncq = dsm = 0;
1902 	aior = br->br_param;
1903 	p = aior->io_pr;
1904 	cfis = aior->cfis;
1905 	slot = aior->slot;
1906 	sc = p->pr_sc;
1907 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1908 
1909 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1910 	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1911 	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1912 		ncq = 1;
1913 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1914 	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1915 	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1916 		dsm = 1;
1917 
1918 	pthread_mutex_lock(&sc->mtx);
1919 
1920 	/*
1921 	 * Delete the blockif request from the busy list
1922 	 */
1923 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1924 
1925 	/*
1926 	 * Move the blockif request back to the free list
1927 	 */
1928 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1929 
1930 	if (!err)
1931 		hdr->prdbc = aior->done;
1932 
1933 	if (!err && aior->more) {
1934 		if (dsm)
1935 			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1936 		else
1937 			ahci_handle_rw(p, slot, cfis, aior->done);
1938 		goto out;
1939 	}
1940 
1941 	if (!err)
1942 		tfd = ATA_S_READY | ATA_S_DSC;
1943 	else
1944 		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1945 	if (ncq)
1946 		ahci_write_fis_sdb(p, slot, cfis, tfd);
1947 	else
1948 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1949 
1950 	/*
1951 	 * This command is now complete.
1952 	 */
1953 	p->pending &= ~(1 << slot);
1954 
1955 	ahci_check_stopped(p);
1956 	ahci_handle_port(p);
1957 out:
1958 	pthread_mutex_unlock(&sc->mtx);
1959 	DPRINTF("%s exit", __func__);
1960 }
1961 
1962 static void
1963 atapi_ioreq_cb(struct blockif_req *br, int err)
1964 {
1965 	struct ahci_cmd_hdr *hdr;
1966 	struct ahci_ioreq *aior;
1967 	struct ahci_port *p;
1968 	struct pci_ahci_softc *sc;
1969 	uint8_t *cfis;
1970 	uint32_t tfd;
1971 	int slot;
1972 
1973 	DPRINTF("%s %d", __func__, err);
1974 
1975 	aior = br->br_param;
1976 	p = aior->io_pr;
1977 	cfis = aior->cfis;
1978 	slot = aior->slot;
1979 	sc = p->pr_sc;
1980 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1981 
1982 	pthread_mutex_lock(&sc->mtx);
1983 
1984 	/*
1985 	 * Delete the blockif request from the busy list
1986 	 */
1987 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1988 
1989 	/*
1990 	 * Move the blockif request back to the free list
1991 	 */
1992 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1993 
1994 	if (!err)
1995 		hdr->prdbc = aior->done;
1996 
1997 	if (!err && aior->more) {
1998 		atapi_read(p, slot, cfis, aior->done);
1999 		goto out;
2000 	}
2001 
2002 	if (!err) {
2003 		tfd = ATA_S_READY | ATA_S_DSC;
2004 	} else {
2005 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
2006 		p->asc = 0x21;
2007 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
2008 	}
2009 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
2010 	ahci_write_fis_d2h(p, slot, cfis, tfd);
2011 
2012 	/*
2013 	 * This command is now complete.
2014 	 */
2015 	p->pending &= ~(1 << slot);
2016 
2017 	ahci_check_stopped(p);
2018 	ahci_handle_port(p);
2019 out:
2020 	pthread_mutex_unlock(&sc->mtx);
2021 	DPRINTF("%s exit", __func__);
2022 }
2023 
2024 static void
2025 pci_ahci_ioreq_init(struct ahci_port *pr)
2026 {
2027 	struct ahci_ioreq *vr;
2028 	int i;
2029 
2030 	pr->ioqsz = blockif_queuesz(pr->bctx);
2031 	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
2032 	STAILQ_INIT(&pr->iofhd);
2033 
2034 	/*
2035 	 * Add all i/o request entries to the free queue
2036 	 */
2037 	for (i = 0; i < pr->ioqsz; i++) {
2038 		vr = &pr->ioreq[i];
2039 		vr->io_pr = pr;
2040 		if (!pr->atapi)
2041 			vr->io_req.br_callback = ata_ioreq_cb;
2042 		else
2043 			vr->io_req.br_callback = atapi_ioreq_cb;
2044 		vr->io_req.br_param = vr;
2045 		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
2046 	}
2047 
2048 	TAILQ_INIT(&pr->iobhd);
2049 }
2050 
2051 static void
2052 pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2053 {
2054 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2055 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2056 	struct ahci_port *p = &sc->port[port];
2057 
2058 	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"",
2059 		port, offset, value);
2060 
2061 	switch (offset) {
2062 	case AHCI_P_CLB:
2063 		p->clb = value;
2064 		break;
2065 	case AHCI_P_CLBU:
2066 		p->clbu = value;
2067 		break;
2068 	case AHCI_P_FB:
2069 		p->fb = value;
2070 		break;
2071 	case AHCI_P_FBU:
2072 		p->fbu = value;
2073 		break;
2074 	case AHCI_P_IS:
2075 		p->is &= ~value;
2076 		ahci_port_intr(p);
2077 		break;
2078 	case AHCI_P_IE:
2079 		p->ie = value & 0xFDC000FF;
2080 		ahci_port_intr(p);
2081 		break;
2082 	case AHCI_P_CMD:
2083 	{
2084 		p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2085 		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2086 		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2087 		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK);
2088 		p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2089 		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2090 		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2091 		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value;
2092 
2093 		if (!(value & AHCI_P_CMD_ST)) {
2094 			ahci_port_stop(p);
2095 		} else {
2096 			uint64_t clb;
2097 
2098 			p->cmd |= AHCI_P_CMD_CR;
2099 			clb = (uint64_t)p->clbu << 32 | p->clb;
2100 			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
2101 					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
2102 		}
2103 
2104 		if (value & AHCI_P_CMD_FRE) {
2105 			uint64_t fb;
2106 
2107 			p->cmd |= AHCI_P_CMD_FR;
2108 			fb = (uint64_t)p->fbu << 32 | p->fb;
2109 			/* we don't support FBSCP, so rfis size is 256Bytes */
2110 			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
2111 		} else {
2112 			p->cmd &= ~AHCI_P_CMD_FR;
2113 		}
2114 
2115 		if (value & AHCI_P_CMD_CLO) {
2116 			p->tfd &= ~(ATA_S_BUSY | ATA_S_DRQ);
2117 			p->cmd &= ~AHCI_P_CMD_CLO;
2118 		}
2119 
2120 		if (value & AHCI_P_CMD_ICC_MASK) {
2121 			p->cmd &= ~AHCI_P_CMD_ICC_MASK;
2122 		}
2123 
2124 		ahci_handle_port(p);
2125 		break;
2126 	}
2127 	case AHCI_P_TFD:
2128 	case AHCI_P_SIG:
2129 	case AHCI_P_SSTS:
2130 		EPRINTLN("pci_ahci_port: read only registers 0x%"PRIx64"", offset);
2131 		break;
2132 	case AHCI_P_SCTL:
2133 		p->sctl = value;
2134 		if (!(p->cmd & AHCI_P_CMD_ST)) {
2135 			if (value & ATA_SC_DET_RESET)
2136 				ahci_port_reset(p);
2137 		}
2138 		break;
2139 	case AHCI_P_SERR:
2140 		p->serr &= ~value;
2141 		break;
2142 	case AHCI_P_SACT:
2143 		p->sact |= value;
2144 		break;
2145 	case AHCI_P_CI:
2146 		p->ci |= value;
2147 		ahci_handle_port(p);
2148 		break;
2149 	case AHCI_P_SNTF:
2150 	case AHCI_P_FBS:
2151 	default:
2152 		break;
2153 	}
2154 }
2155 
2156 static void
2157 pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2158 {
2159 	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"",
2160 		offset, value);
2161 
2162 	switch (offset) {
2163 	case AHCI_CAP:
2164 	case AHCI_PI:
2165 	case AHCI_VS:
2166 	case AHCI_CAP2:
2167 		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"", offset);
2168 		break;
2169 	case AHCI_GHC:
2170 		if (value & AHCI_GHC_HR) {
2171 			ahci_reset(sc);
2172 			break;
2173 		}
2174 		if (value & AHCI_GHC_IE)
2175 			sc->ghc |= AHCI_GHC_IE;
2176 		else
2177 			sc->ghc &= ~AHCI_GHC_IE;
2178 		ahci_generate_intr(sc, 0xffffffff);
2179 		break;
2180 	case AHCI_IS:
2181 		sc->is &= ~value;
2182 		ahci_generate_intr(sc, value);
2183 		break;
2184 	default:
2185 		break;
2186 	}
2187 }
2188 
2189 static void
2190 pci_ahci_write(struct pci_devinst *pi, int baridx, uint64_t offset, int size,
2191     uint64_t value)
2192 {
2193 	struct pci_ahci_softc *sc = pi->pi_arg;
2194 
2195 	assert(baridx == 5);
2196 	assert((offset % 4) == 0 && size == 4);
2197 
2198 	pthread_mutex_lock(&sc->mtx);
2199 
2200 	if (offset < AHCI_OFFSET)
2201 		pci_ahci_host_write(sc, offset, value);
2202 	else if (offset < (uint64_t)AHCI_OFFSET + sc->ports * AHCI_STEP)
2203 		pci_ahci_port_write(sc, offset, value);
2204 	else
2205 		EPRINTLN("pci_ahci: unknown i/o write offset 0x%"PRIx64"", offset);
2206 
2207 	pthread_mutex_unlock(&sc->mtx);
2208 }
2209 
2210 static uint64_t
2211 pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2212 {
2213 	uint32_t value;
2214 
2215 	switch (offset) {
2216 	case AHCI_CAP:
2217 	case AHCI_GHC:
2218 	case AHCI_IS:
2219 	case AHCI_PI:
2220 	case AHCI_VS:
2221 	case AHCI_CCCC:
2222 	case AHCI_CCCP:
2223 	case AHCI_EM_LOC:
2224 	case AHCI_EM_CTL:
2225 	case AHCI_CAP2:
2226 	{
2227 		uint32_t *p = &sc->cap;
2228 		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2229 		value = *p;
2230 		break;
2231 	}
2232 	default:
2233 		value = 0;
2234 		break;
2235 	}
2236 	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x",
2237 		offset, value);
2238 
2239 	return (value);
2240 }
2241 
2242 static uint64_t
2243 pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2244 {
2245 	uint32_t value;
2246 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2247 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2248 
2249 	switch (offset) {
2250 	case AHCI_P_CLB:
2251 	case AHCI_P_CLBU:
2252 	case AHCI_P_FB:
2253 	case AHCI_P_FBU:
2254 	case AHCI_P_IS:
2255 	case AHCI_P_IE:
2256 	case AHCI_P_CMD:
2257 	case AHCI_P_TFD:
2258 	case AHCI_P_SIG:
2259 	case AHCI_P_SSTS:
2260 	case AHCI_P_SCTL:
2261 	case AHCI_P_SERR:
2262 	case AHCI_P_SACT:
2263 	case AHCI_P_CI:
2264 	case AHCI_P_SNTF:
2265 	case AHCI_P_FBS:
2266 	{
2267 		uint32_t *p= &sc->port[port].clb;
2268 		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2269 		value = *p;
2270 		break;
2271 	}
2272 	default:
2273 		value = 0;
2274 		break;
2275 	}
2276 
2277 	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x",
2278 		port, offset, value);
2279 
2280 	return value;
2281 }
2282 
2283 static uint64_t
2284 pci_ahci_read(struct pci_devinst *pi, int baridx, uint64_t regoff, int size)
2285 {
2286 	struct pci_ahci_softc *sc = pi->pi_arg;
2287 	uint64_t offset;
2288 	uint32_t value;
2289 
2290 	assert(baridx == 5);
2291 	assert(size == 1 || size == 2 || size == 4);
2292 	assert((regoff & (size - 1)) == 0);
2293 
2294 	pthread_mutex_lock(&sc->mtx);
2295 
2296 	offset = regoff & ~0x3;	    /* round down to a multiple of 4 bytes */
2297 	if (offset < AHCI_OFFSET)
2298 		value = pci_ahci_host_read(sc, offset);
2299 	else if (offset < (uint64_t)AHCI_OFFSET + sc->ports * AHCI_STEP)
2300 		value = pci_ahci_port_read(sc, offset);
2301 	else {
2302 		value = 0;
2303 		EPRINTLN("pci_ahci: unknown i/o read offset 0x%"PRIx64"",
2304 		    regoff);
2305 	}
2306 	value >>= 8 * (regoff & 0x3);
2307 
2308 	pthread_mutex_unlock(&sc->mtx);
2309 
2310 	return (value);
2311 }
2312 
2313 /*
2314  * Each AHCI controller has a "port" node which contains nodes for
2315  * each port named after the decimal number of the port (no leading
2316  * zeroes).  Port nodes contain a "type" ("hd" or "cd"), as well as
2317  * options for blockif.  For example:
2318  *
2319  * pci.0.1.0
2320  *          .device="ahci"
2321  *          .port
2322  *               .0
2323  *                 .type="hd"
2324  *                 .path="/path/to/image"
2325  */
2326 static int
2327 pci_ahci_legacy_config_port(nvlist_t *nvl, int port, const char *type,
2328     const char *opts)
2329 {
2330 	char node_name[sizeof("XX")];
2331 	nvlist_t *port_nvl;
2332 
2333 	snprintf(node_name, sizeof(node_name), "%d", port);
2334 	port_nvl = create_relative_config_node(nvl, node_name);
2335 	set_config_value_node(port_nvl, "type", type);
2336 	return (blockif_legacy_config(port_nvl, opts));
2337 }
2338 
2339 static int
2340 pci_ahci_legacy_config(nvlist_t *nvl, const char *opts)
2341 {
2342 	nvlist_t *ports_nvl;
2343 	const char *type;
2344 	char *next, *next2, *str, *tofree;
2345 	int p, ret;
2346 
2347 	if (opts == NULL)
2348 		return (0);
2349 
2350 	ports_nvl = create_relative_config_node(nvl, "port");
2351 	ret = 1;
2352 	tofree = str = strdup(opts);
2353 	for (p = 0; p < MAX_PORTS && str != NULL; p++, str = next) {
2354 		/* Identify and cut off type of present port. */
2355 		if (strncmp(str, "hd:", 3) == 0) {
2356 			type = "hd";
2357 			str += 3;
2358 		} else if (strncmp(str, "cd:", 3) == 0) {
2359 			type = "cd";
2360 			str += 3;
2361 		} else
2362 			type = NULL;
2363 
2364 		/* Find and cut off the next port options. */
2365 		next = strstr(str, ",hd:");
2366 		next2 = strstr(str, ",cd:");
2367 		if (next == NULL || (next2 != NULL && next2 < next))
2368 			next = next2;
2369 		if (next != NULL) {
2370 			next[0] = 0;
2371 			next++;
2372 		}
2373 
2374 		if (str[0] == 0)
2375 			continue;
2376 
2377 		if (type == NULL) {
2378 			EPRINTLN("Missing or invalid type for port %d: \"%s\"",
2379 			    p, str);
2380 			goto out;
2381 		}
2382 
2383 		if (pci_ahci_legacy_config_port(ports_nvl, p, type, str) != 0)
2384 			goto out;
2385 	}
2386 	ret = 0;
2387 out:
2388 	free(tofree);
2389 	return (ret);
2390 }
2391 
2392 static int
2393 pci_ahci_cd_legacy_config(nvlist_t *nvl, const char *opts)
2394 {
2395 	nvlist_t *ports_nvl;
2396 
2397 	ports_nvl = create_relative_config_node(nvl, "port");
2398 	return (pci_ahci_legacy_config_port(ports_nvl, 0, "cd", opts));
2399 }
2400 
2401 static int
2402 pci_ahci_hd_legacy_config(nvlist_t *nvl, const char *opts)
2403 {
2404 	nvlist_t *ports_nvl;
2405 
2406 	ports_nvl = create_relative_config_node(nvl, "port");
2407 	return (pci_ahci_legacy_config_port(ports_nvl, 0, "hd", opts));
2408 }
2409 
2410 static int
2411 pci_ahci_init(struct pci_devinst *pi, nvlist_t *nvl)
2412 {
2413 	char bident[sizeof("XXX:XXX:XXX")];
2414 	char node_name[sizeof("XX")];
2415 	struct blockif_ctxt *bctxt;
2416 	struct pci_ahci_softc *sc;
2417 	int atapi, ret, slots, p;
2418 	MD5_CTX mdctx;
2419 	u_char digest[16];
2420 	const char *path, *type, *value;
2421 	nvlist_t *ports_nvl, *port_nvl;
2422 
2423 	ret = 0;
2424 
2425 #ifdef AHCI_DEBUG
2426 	dbg = fopen("/tmp/log", "w+");
2427 #endif
2428 
2429 	sc = calloc(1, sizeof(struct pci_ahci_softc));
2430 	pi->pi_arg = sc;
2431 	sc->asc_pi = pi;
2432 	pthread_mutex_init(&sc->mtx, NULL);
2433 	sc->ports = 0;
2434 	sc->pi = 0;
2435 	slots = 32;
2436 
2437 	ports_nvl = find_relative_config_node(nvl, "port");
2438 	for (p = 0; ports_nvl != NULL && p < MAX_PORTS; p++) {
2439 		struct ata_params *ata_ident = &sc->port[p].ata_ident;
2440 		char ident[AHCI_PORT_IDENT];
2441 
2442 		snprintf(node_name, sizeof(node_name), "%d", p);
2443 		port_nvl = find_relative_config_node(ports_nvl, node_name);
2444 		if (port_nvl == NULL)
2445 			continue;
2446 
2447 		type = get_config_value_node(port_nvl, "type");
2448 		if (type == NULL)
2449 			continue;
2450 
2451 		if (strcmp(type, "hd") == 0)
2452 			atapi = 0;
2453 		else
2454 			atapi = 1;
2455 
2456 		/*
2457 		 * Attempt to open the backing image. Use the PCI slot/func
2458 		 * and the port number for the identifier string.
2459 		 */
2460 		snprintf(bident, sizeof(bident), "%u:%u:%u", pi->pi_slot,
2461 		    pi->pi_func, p);
2462 
2463 		bctxt = blockif_open(port_nvl, bident);
2464 		if (bctxt == NULL) {
2465 			sc->ports = p;
2466 			ret = 1;
2467 			goto open_fail;
2468 		}
2469 
2470 		ret = blockif_add_boot_device(pi, bctxt);
2471 		if (ret) {
2472 			sc->ports = p;
2473 			goto open_fail;
2474 		}
2475 
2476 		sc->port[p].bctx = bctxt;
2477 		sc->port[p].pr_sc = sc;
2478 		sc->port[p].port = p;
2479 		sc->port[p].atapi = atapi;
2480 
2481 		/*
2482 		 * Create an identifier for the backing file.
2483 		 * Use parts of the md5 sum of the filename
2484 		 */
2485 		path = get_config_value_node(port_nvl, "path");
2486 		MD5Init(&mdctx);
2487 		MD5Update(&mdctx, path, strlen(path));
2488 		MD5Final(digest, &mdctx);
2489 		snprintf(ident, AHCI_PORT_IDENT,
2490 			"BHYVE-%02X%02X-%02X%02X-%02X%02X",
2491 			digest[0], digest[1], digest[2], digest[3], digest[4],
2492 			digest[5]);
2493 
2494 		memset(ata_ident, 0, sizeof(struct ata_params));
2495 		ata_string((uint8_t*)&ata_ident->serial, ident, 20);
2496 		ata_string((uint8_t*)&ata_ident->revision, "001", 8);
2497 		if (atapi)
2498 			ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DVD ROM", 40);
2499 		else
2500 			ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DISK", 40);
2501 		value = get_config_value_node(port_nvl, "nmrr");
2502 		if (value != NULL)
2503 			ata_ident->media_rotation_rate = atoi(value);
2504 		value = get_config_value_node(port_nvl, "ser");
2505 		if (value != NULL)
2506 			ata_string((uint8_t*)(&ata_ident->serial), value, 20);
2507 		value = get_config_value_node(port_nvl, "rev");
2508 		if (value != NULL)
2509 			ata_string((uint8_t*)(&ata_ident->revision), value, 8);
2510 		value = get_config_value_node(port_nvl, "model");
2511 		if (value != NULL)
2512 			ata_string((uint8_t*)(&ata_ident->model), value, 40);
2513 		ata_identify_init(&sc->port[p], atapi);
2514 
2515 #ifndef __FreeBSD__
2516 		/*
2517 		 * Attempt to enable the write cache for this device, as the
2518 		 * guest will issue FLUSH commands when it requires durability.
2519 		 *
2520 		 * Failure here is fine, since an always-sync device will not
2521 		 * have an impact on correctness.
2522 		 */
2523 		(void) blockif_set_wce(bctxt, 1);
2524 #endif
2525 
2526 		/*
2527 		 * Allocate blockif request structures and add them
2528 		 * to the free list
2529 		 */
2530 		pci_ahci_ioreq_init(&sc->port[p]);
2531 
2532 		sc->pi |= (1 << p);
2533 		if (sc->port[p].ioqsz < slots)
2534 			slots = sc->port[p].ioqsz;
2535 	}
2536 	sc->ports = p;
2537 
2538 	/* Intel ICH8 AHCI */
2539 	--slots;
2540 	if (sc->ports < DEF_PORTS)
2541 		sc->ports = DEF_PORTS;
2542 	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2543 	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2544 	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2545 	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2546 	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2547 
2548 	sc->vs = 0x10300;
2549 	sc->cap2 = AHCI_CAP2_APST;
2550 	ahci_reset(sc);
2551 
2552 	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2553 	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2554 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2555 	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2556 	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2557 	p = MIN(sc->ports, 16);
2558 	p = flsl(p) - ((p & (p - 1)) ? 0 : 1);
2559 	pci_emul_add_msicap(pi, 1 << p);
2560 	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2561 	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2562 
2563 	pci_lintr_request(pi);
2564 
2565 open_fail:
2566 	if (ret) {
2567 		for (p = 0; p < sc->ports; p++) {
2568 			if (sc->port[p].bctx != NULL)
2569 				blockif_close(sc->port[p].bctx);
2570 		}
2571 		free(sc);
2572 	}
2573 
2574 	return (ret);
2575 }
2576 
2577 /*
2578  * Use separate emulation names to distinguish drive and atapi devices
2579  */
2580 static const struct pci_devemu pci_de_ahci = {
2581 	.pe_emu =	"ahci",
2582 	.pe_init =	pci_ahci_init,
2583 	.pe_legacy_config = pci_ahci_legacy_config,
2584 	.pe_barwrite =	pci_ahci_write,
2585 	.pe_barread =	pci_ahci_read,
2586 };
2587 PCI_EMUL_SET(pci_de_ahci);
2588 
2589 static const struct pci_devemu pci_de_ahci_hd = {
2590 	.pe_emu =	"ahci-hd",
2591 	.pe_legacy_config = pci_ahci_hd_legacy_config,
2592 	.pe_alias =	"ahci",
2593 };
2594 PCI_EMUL_SET(pci_de_ahci_hd);
2595 
2596 static const struct pci_devemu pci_de_ahci_cd = {
2597 	.pe_emu =	"ahci-cd",
2598 	.pe_legacy_config = pci_ahci_cd_legacy_config,
2599 	.pe_alias =	"ahci",
2600 };
2601 PCI_EMUL_SET(pci_de_ahci_cd);
2602