xref: /freebsd/sys/dev/vmware/pvscsi/pvscsi.c (revision 1719886f6d08408b834d270c59ffcfd821c8f63a)
1 /*-
2  * Copyright (c) 2018 VMware, Inc.
3  *
4  * SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0)
5  */
6 
7 #include <sys/param.h>
8 #include <sys/bus.h>
9 #include <sys/errno.h>
10 #include <sys/kernel.h>
11 #include <sys/malloc.h>
12 #include <sys/module.h>
13 #include <sys/queue.h>
14 #include <sys/rman.h>
15 #include <sys/sysctl.h>
16 #include <sys/systm.h>
17 
18 #include <machine/bus.h>
19 #include <machine/resource.h>
20 
21 #include <dev/pci/pcireg.h>
22 #include <dev/pci/pcivar.h>
23 
24 #include <cam/cam.h>
25 #include <cam/cam_ccb.h>
26 #include <cam/cam_debug.h>
27 #include <cam/cam_sim.h>
28 #include <cam/cam_xpt_sim.h>
29 #include <cam/scsi/scsi_message.h>
30 
31 #include "pvscsi.h"
32 
33 #define	PVSCSI_DEFAULT_NUM_PAGES_REQ_RING	8
34 #define	PVSCSI_SENSE_LENGTH			256
35 
36 MALLOC_DECLARE(M_PVSCSI);
37 MALLOC_DEFINE(M_PVSCSI, "pvscsi", "PVSCSI memory");
38 
39 #ifdef PVSCSI_DEBUG_LOGGING
40 #define	DEBUG_PRINTF(level, dev, fmt, ...)				\
41 	do {								\
42 		if (pvscsi_log_level >= (level)) {			\
43 			device_printf((dev), (fmt), ##__VA_ARGS__);	\
44 		}							\
45 	} while(0)
46 #else
47 #define DEBUG_PRINTF(level, dev, fmt, ...)
48 #endif /* PVSCSI_DEBUG_LOGGING */
49 
50 #define	ccb_pvscsi_hcb	spriv_ptr0
51 #define	ccb_pvscsi_sc	spriv_ptr1
52 
53 struct pvscsi_softc;
54 struct pvscsi_hcb;
55 struct pvscsi_dma;
56 
57 static inline uint32_t pvscsi_reg_read(struct pvscsi_softc *sc,
58     uint32_t offset);
59 static inline void pvscsi_reg_write(struct pvscsi_softc *sc, uint32_t offset,
60     uint32_t val);
61 static inline uint32_t pvscsi_read_intr_status(struct pvscsi_softc *sc);
62 static inline void pvscsi_write_intr_status(struct pvscsi_softc *sc,
63     uint32_t val);
64 static inline void pvscsi_intr_enable(struct pvscsi_softc *sc);
65 static inline void pvscsi_intr_disable(struct pvscsi_softc *sc);
66 static void pvscsi_kick_io(struct pvscsi_softc *sc, uint8_t cdb0);
67 static void pvscsi_write_cmd(struct pvscsi_softc *sc, uint32_t cmd, void *data,
68     uint32_t len);
69 static uint32_t pvscsi_get_max_targets(struct pvscsi_softc *sc);
70 static int pvscsi_setup_req_call(struct pvscsi_softc *sc, uint32_t enable);
71 static void pvscsi_setup_rings(struct pvscsi_softc *sc);
72 static void pvscsi_setup_msg_ring(struct pvscsi_softc *sc);
73 static int pvscsi_hw_supports_msg(struct pvscsi_softc *sc);
74 
75 static void pvscsi_timeout(void *arg);
76 static void pvscsi_freeze(struct pvscsi_softc *sc);
77 static void pvscsi_adapter_reset(struct pvscsi_softc *sc);
78 static void pvscsi_bus_reset(struct pvscsi_softc *sc);
79 static void pvscsi_device_reset(struct pvscsi_softc *sc, uint32_t target);
80 static void pvscsi_abort(struct pvscsi_softc *sc, uint32_t target,
81     union ccb *ccb);
82 
83 static void pvscsi_process_completion(struct pvscsi_softc *sc,
84     struct pvscsi_ring_cmp_desc *e);
85 static void pvscsi_process_cmp_ring(struct pvscsi_softc *sc);
86 static void pvscsi_process_msg(struct pvscsi_softc *sc,
87     struct pvscsi_ring_msg_desc *e);
88 static void pvscsi_process_msg_ring(struct pvscsi_softc *sc);
89 
90 static void pvscsi_intr_locked(struct pvscsi_softc *sc);
91 static void pvscsi_intr(void *xsc);
92 static void pvscsi_poll(struct cam_sim *sim);
93 
94 static void pvscsi_execute_ccb(void *arg, bus_dma_segment_t *segs, int nseg,
95     int error);
96 static void pvscsi_action(struct cam_sim *sim, union ccb *ccb);
97 
98 static inline uint64_t pvscsi_hcb_to_context(struct pvscsi_softc *sc,
99     struct pvscsi_hcb *hcb);
100 static inline struct pvscsi_hcb* pvscsi_context_to_hcb(struct pvscsi_softc *sc,
101     uint64_t context);
102 static struct pvscsi_hcb * pvscsi_hcb_get(struct pvscsi_softc *sc);
103 static void pvscsi_hcb_put(struct pvscsi_softc *sc, struct pvscsi_hcb *hcb);
104 
105 static void pvscsi_dma_cb(void *arg, bus_dma_segment_t *segs, int nseg,
106     int error);
107 static void pvscsi_dma_free(struct pvscsi_softc *sc, struct pvscsi_dma *dma);
108 static int pvscsi_dma_alloc(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
109     bus_size_t size, bus_size_t alignment);
110 static int pvscsi_dma_alloc_ppns(struct pvscsi_softc *sc,
111     struct pvscsi_dma *dma, uint64_t *ppn_list, uint32_t num_pages);
112 static void pvscsi_dma_free_per_hcb(struct pvscsi_softc *sc,
113     uint32_t hcbs_allocated);
114 static int pvscsi_dma_alloc_per_hcb(struct pvscsi_softc *sc);
115 static void pvscsi_free_rings(struct pvscsi_softc *sc);
116 static int pvscsi_allocate_rings(struct pvscsi_softc *sc);
117 static void pvscsi_free_interrupts(struct pvscsi_softc *sc);
118 static int pvscsi_setup_interrupts(struct pvscsi_softc *sc);
119 static void pvscsi_free_all(struct pvscsi_softc *sc);
120 
121 static int pvscsi_attach(device_t dev);
122 static int pvscsi_detach(device_t dev);
123 static int pvscsi_probe(device_t dev);
124 static int pvscsi_shutdown(device_t dev);
125 static int pvscsi_get_tunable(struct pvscsi_softc *sc, char *name, int value);
126 
127 #ifdef PVSCSI_DEBUG_LOGGING
128 static int pvscsi_log_level = 0;
129 static SYSCTL_NODE(_hw, OID_AUTO, pvscsi, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
130     "PVSCSI driver parameters");
131 SYSCTL_INT(_hw_pvscsi, OID_AUTO, log_level, CTLFLAG_RWTUN, &pvscsi_log_level,
132     0, "PVSCSI debug log level");
133 #endif
134 
135 static int pvscsi_request_ring_pages = 0;
136 TUNABLE_INT("hw.pvscsi.request_ring_pages", &pvscsi_request_ring_pages);
137 
138 static int pvscsi_use_msg = 1;
139 TUNABLE_INT("hw.pvscsi.use_msg", &pvscsi_use_msg);
140 
141 static int pvscsi_use_msi = 1;
142 TUNABLE_INT("hw.pvscsi.use_msi", &pvscsi_use_msi);
143 
144 static int pvscsi_use_msix = 1;
145 TUNABLE_INT("hw.pvscsi.use_msix", &pvscsi_use_msix);
146 
147 static int pvscsi_use_req_call_threshold = 1;
148 TUNABLE_INT("hw.pvscsi.use_req_call_threshold", &pvscsi_use_req_call_threshold);
149 
150 static int pvscsi_max_queue_depth = 0;
151 TUNABLE_INT("hw.pvscsi.max_queue_depth", &pvscsi_max_queue_depth);
152 
153 struct pvscsi_sg_list {
154 	struct pvscsi_sg_element sge[PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT];
155 };
156 
157 #define	PVSCSI_ABORT_TIMEOUT	2
158 #define	PVSCSI_RESET_TIMEOUT	10
159 
160 #define	PVSCSI_HCB_NONE		0
161 #define	PVSCSI_HCB_ABORT	1
162 #define	PVSCSI_HCB_DEVICE_RESET	2
163 #define	PVSCSI_HCB_BUS_RESET	3
164 
165 struct pvscsi_hcb {
166 	union ccb			*ccb;
167 	struct pvscsi_ring_req_desc	*e;
168 	int				 recovery;
169 	SLIST_ENTRY(pvscsi_hcb)		 links;
170 
171 	struct callout			 callout;
172 	bus_dmamap_t			 dma_map;
173 	void				*sense_buffer;
174 	bus_addr_t			 sense_buffer_paddr;
175 	struct pvscsi_sg_list		*sg_list;
176 	bus_addr_t			 sg_list_paddr;
177 };
178 
179 struct pvscsi_dma
180 {
181 	bus_dma_tag_t	 tag;
182 	bus_dmamap_t	 map;
183 	void		*vaddr;
184 	bus_addr_t	 paddr;
185 	bus_size_t	 size;
186 };
187 
188 struct pvscsi_softc {
189 	device_t		 dev;
190 	struct mtx		 lock;
191 	struct cam_sim		*sim;
192 	struct cam_path		*bus_path;
193 	int			 frozen;
194 	struct pvscsi_rings_state	*rings_state;
195 	struct pvscsi_ring_req_desc	*req_ring;
196 	struct pvscsi_ring_cmp_desc	*cmp_ring;
197 	struct pvscsi_ring_msg_desc	*msg_ring;
198 	uint32_t		 hcb_cnt;
199 	struct pvscsi_hcb	*hcbs;
200 	SLIST_HEAD(, pvscsi_hcb)	free_list;
201 	bus_dma_tag_t		parent_dmat;
202 	bus_dma_tag_t		buffer_dmat;
203 
204 	bool		 use_msg;
205 	uint32_t	 max_targets;
206 	int		 mm_rid;
207 	struct resource	*mm_res;
208 	int		 irq_id;
209 	struct resource	*irq_res;
210 	void		*irq_handler;
211 	int		 use_req_call_threshold;
212 	int		 use_msi_or_msix;
213 
214 	uint64_t	rings_state_ppn;
215 	uint32_t	req_ring_num_pages;
216 	uint64_t	req_ring_ppn[PVSCSI_MAX_NUM_PAGES_REQ_RING];
217 	uint32_t	cmp_ring_num_pages;
218 	uint64_t	cmp_ring_ppn[PVSCSI_MAX_NUM_PAGES_CMP_RING];
219 	uint32_t	msg_ring_num_pages;
220 	uint64_t	msg_ring_ppn[PVSCSI_MAX_NUM_PAGES_MSG_RING];
221 
222 	struct	pvscsi_dma rings_state_dma;
223 	struct	pvscsi_dma req_ring_dma;
224 	struct	pvscsi_dma cmp_ring_dma;
225 	struct	pvscsi_dma msg_ring_dma;
226 
227 	struct	pvscsi_dma sg_list_dma;
228 	struct	pvscsi_dma sense_buffer_dma;
229 };
230 
231 static int pvscsi_get_tunable(struct pvscsi_softc *sc, char *name, int value)
232 {
233 	char cfg[64];
234 
235 	snprintf(cfg, sizeof(cfg), "hw.pvscsi.%d.%s", device_get_unit(sc->dev),
236 	    name);
237 	TUNABLE_INT_FETCH(cfg, &value);
238 
239 	return (value);
240 }
241 
242 static void
243 pvscsi_freeze(struct pvscsi_softc *sc)
244 {
245 
246 	if (!sc->frozen) {
247 		xpt_freeze_simq(sc->sim, 1);
248 		sc->frozen = 1;
249 	}
250 }
251 
252 static inline uint32_t
253 pvscsi_reg_read(struct pvscsi_softc *sc, uint32_t offset)
254 {
255 
256 	return (bus_read_4(sc->mm_res, offset));
257 }
258 
259 static inline void
260 pvscsi_reg_write(struct pvscsi_softc *sc, uint32_t offset, uint32_t val)
261 {
262 
263 	bus_write_4(sc->mm_res, offset, val);
264 }
265 
266 static inline uint32_t
267 pvscsi_read_intr_status(struct pvscsi_softc *sc)
268 {
269 
270 	return (pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_INTR_STATUS));
271 }
272 
273 static inline void
274 pvscsi_write_intr_status(struct pvscsi_softc *sc, uint32_t val)
275 {
276 
277 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_STATUS, val);
278 }
279 
280 static inline void
281 pvscsi_intr_enable(struct pvscsi_softc *sc)
282 {
283 	uint32_t mask;
284 
285 	mask = PVSCSI_INTR_CMPL_MASK;
286 	if (sc->use_msg) {
287 		mask |= PVSCSI_INTR_MSG_MASK;
288 	}
289 
290 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_MASK, mask);
291 }
292 
293 static inline void
294 pvscsi_intr_disable(struct pvscsi_softc *sc)
295 {
296 
297 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_MASK, 0);
298 }
299 
300 static void
301 pvscsi_kick_io(struct pvscsi_softc *sc, uint8_t cdb0)
302 {
303 	struct pvscsi_rings_state *s;
304 
305 	if (cdb0 == READ_6  || cdb0 == READ_10  ||
306 	    cdb0 == READ_12  || cdb0 == READ_16 ||
307 	    cdb0 == WRITE_6 || cdb0 == WRITE_10 ||
308 	    cdb0 == WRITE_12 || cdb0 == WRITE_16) {
309 		s = sc->rings_state;
310 
311 		if (!sc->use_req_call_threshold ||
312 		    (s->req_prod_idx - s->req_cons_idx) >=
313 		     s->req_call_threshold) {
314 			pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_KICK_RW_IO, 0);
315 		}
316 	} else {
317 		pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_KICK_NON_RW_IO, 0);
318 	}
319 }
320 
321 static void
322 pvscsi_write_cmd(struct pvscsi_softc *sc, uint32_t cmd, void *data,
323 		 uint32_t len)
324 {
325 	uint32_t *data_ptr;
326 	int i;
327 
328 	KASSERT(len % sizeof(uint32_t) == 0,
329 		("command size not a multiple of 4"));
330 
331 	data_ptr = data;
332 	len /= sizeof(uint32_t);
333 
334 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND, cmd);
335 	for (i = 0; i < len; ++i) {
336 		pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND_DATA,
337 		   data_ptr[i]);
338 	}
339 }
340 
341 static inline uint64_t pvscsi_hcb_to_context(struct pvscsi_softc *sc,
342     struct pvscsi_hcb *hcb)
343 {
344 
345 	/* Offset by 1 because context must not be 0 */
346 	return (hcb - sc->hcbs + 1);
347 }
348 
349 static inline struct pvscsi_hcb* pvscsi_context_to_hcb(struct pvscsi_softc *sc,
350     uint64_t context)
351 {
352 
353 	return (sc->hcbs + (context - 1));
354 }
355 
356 static struct pvscsi_hcb *
357 pvscsi_hcb_get(struct pvscsi_softc *sc)
358 {
359 	struct pvscsi_hcb *hcb;
360 
361 	mtx_assert(&sc->lock, MA_OWNED);
362 
363 	hcb = SLIST_FIRST(&sc->free_list);
364 	if (hcb) {
365 		SLIST_REMOVE_HEAD(&sc->free_list, links);
366 	}
367 
368 	return (hcb);
369 }
370 
371 static void
372 pvscsi_hcb_put(struct pvscsi_softc *sc, struct pvscsi_hcb *hcb)
373 {
374 
375 	mtx_assert(&sc->lock, MA_OWNED);
376 	hcb->ccb = NULL;
377 	hcb->e = NULL;
378 	hcb->recovery = PVSCSI_HCB_NONE;
379 	SLIST_INSERT_HEAD(&sc->free_list, hcb, links);
380 }
381 
382 static uint32_t
383 pvscsi_get_max_targets(struct pvscsi_softc *sc)
384 {
385 	uint32_t max_targets;
386 
387 	pvscsi_write_cmd(sc, PVSCSI_CMD_GET_MAX_TARGETS, NULL, 0);
388 
389 	max_targets = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
390 
391 	if (max_targets == ~0) {
392 		max_targets = 16;
393 	}
394 
395 	return (max_targets);
396 }
397 
398 static int pvscsi_setup_req_call(struct pvscsi_softc *sc, uint32_t enable)
399 {
400 	uint32_t status;
401 	struct pvscsi_cmd_desc_setup_req_call cmd;
402 
403 	if (!pvscsi_get_tunable(sc, "pvscsi_use_req_call_threshold",
404 	    pvscsi_use_req_call_threshold)) {
405 		return (0);
406 	}
407 
408 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND,
409 	    PVSCSI_CMD_SETUP_REQCALLTHRESHOLD);
410 	status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
411 
412 	if (status != -1) {
413 		bzero(&cmd, sizeof(cmd));
414 		cmd.enable = enable;
415 		pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_REQCALLTHRESHOLD,
416 		    &cmd, sizeof(cmd));
417 		status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
418 
419 		return (status != 0);
420 	} else {
421 		return (0);
422 	}
423 }
424 
425 static void
426 pvscsi_dma_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
427 {
428 	bus_addr_t *dest;
429 
430 	KASSERT(nseg == 1, ("more than one segment"));
431 
432 	dest = arg;
433 
434 	if (!error) {
435 		*dest = segs->ds_addr;
436 	}
437 }
438 
439 static void
440 pvscsi_dma_free(struct pvscsi_softc *sc, struct pvscsi_dma *dma)
441 {
442 
443 	if (dma->tag != NULL) {
444 		if (dma->paddr != 0) {
445 			bus_dmamap_unload(dma->tag, dma->map);
446 		}
447 
448 		if (dma->vaddr != NULL) {
449 			bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
450 		}
451 
452 		bus_dma_tag_destroy(dma->tag);
453 	}
454 
455 	bzero(dma, sizeof(*dma));
456 }
457 
458 static int
459 pvscsi_dma_alloc(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
460     bus_size_t size, bus_size_t alignment)
461 {
462 	int error;
463 
464 	bzero(dma, sizeof(*dma));
465 
466 	error = bus_dma_tag_create(sc->parent_dmat, alignment, 0,
467 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, size, 1, size,
468 	    BUS_DMA_ALLOCNOW, NULL, NULL, &dma->tag);
469 	if (error) {
470 		device_printf(sc->dev, "error creating dma tag, error %d\n",
471 		    error);
472 		goto fail;
473 	}
474 
475 	error = bus_dmamem_alloc(dma->tag, &dma->vaddr,
476 	    BUS_DMA_NOWAIT | BUS_DMA_ZERO, &dma->map);
477 	if (error) {
478 		device_printf(sc->dev, "error allocating dma mem, error %d\n",
479 		    error);
480 		goto fail;
481 	}
482 
483 	error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size,
484 	    pvscsi_dma_cb, &dma->paddr, BUS_DMA_NOWAIT);
485 	if (error) {
486 		device_printf(sc->dev, "error mapping dma mam, error %d\n",
487 		    error);
488 		goto fail;
489 	}
490 
491 	dma->size = size;
492 
493 fail:
494 	if (error) {
495 		pvscsi_dma_free(sc, dma);
496 	}
497 	return (error);
498 }
499 
500 static int
501 pvscsi_dma_alloc_ppns(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
502     uint64_t *ppn_list, uint32_t num_pages)
503 {
504 	int error;
505 	uint32_t i;
506 	uint64_t ppn;
507 
508 	error = pvscsi_dma_alloc(sc, dma, num_pages * PAGE_SIZE, PAGE_SIZE);
509 	if (error) {
510 		device_printf(sc->dev, "Error allocating pages, error %d\n",
511 		    error);
512 		return (error);
513 	}
514 
515 	ppn = dma->paddr >> PAGE_SHIFT;
516 	for (i = 0; i < num_pages; i++) {
517 		ppn_list[i] = ppn + i;
518 	}
519 
520 	return (0);
521 }
522 
523 static void
524 pvscsi_dma_free_per_hcb(struct pvscsi_softc *sc, uint32_t hcbs_allocated)
525 {
526 	int i;
527 	int lock_owned;
528 	struct pvscsi_hcb *hcb;
529 
530 	lock_owned = mtx_owned(&sc->lock);
531 
532 	if (lock_owned) {
533 		mtx_unlock(&sc->lock);
534 	}
535 	for (i = 0; i < hcbs_allocated; ++i) {
536 		hcb = sc->hcbs + i;
537 		callout_drain(&hcb->callout);
538 	};
539 	if (lock_owned) {
540 		mtx_lock(&sc->lock);
541 	}
542 
543 	for (i = 0; i < hcbs_allocated; ++i) {
544 		hcb = sc->hcbs + i;
545 		bus_dmamap_destroy(sc->buffer_dmat, hcb->dma_map);
546 	};
547 
548 	pvscsi_dma_free(sc, &sc->sense_buffer_dma);
549 	pvscsi_dma_free(sc, &sc->sg_list_dma);
550 }
551 
552 static int
553 pvscsi_dma_alloc_per_hcb(struct pvscsi_softc *sc)
554 {
555 	int i;
556 	int error;
557 	struct pvscsi_hcb *hcb;
558 
559 	i = 0;
560 
561 	error = pvscsi_dma_alloc(sc, &sc->sg_list_dma,
562 	    sizeof(struct pvscsi_sg_list) * sc->hcb_cnt, 1);
563 	if (error) {
564 		device_printf(sc->dev,
565 		    "Error allocation sg list DMA memory, error %d\n", error);
566 		goto fail;
567 	}
568 
569 	error = pvscsi_dma_alloc(sc, &sc->sense_buffer_dma,
570 				 PVSCSI_SENSE_LENGTH * sc->hcb_cnt, 1);
571 	if (error) {
572 		device_printf(sc->dev,
573 		    "Error allocation sg list DMA memory, error %d\n", error);
574 		goto fail;
575 	}
576 
577 	for (i = 0; i < sc->hcb_cnt; ++i) {
578 		hcb = sc->hcbs + i;
579 
580 		error = bus_dmamap_create(sc->buffer_dmat, 0, &hcb->dma_map);
581 		if (error) {
582 			device_printf(sc->dev,
583 			    "Error creating dma map for hcb %d, error %d\n",
584 			    i, error);
585 			goto fail;
586 		}
587 
588 		hcb->sense_buffer =
589 		    (void *)((caddr_t)sc->sense_buffer_dma.vaddr +
590 		    PVSCSI_SENSE_LENGTH * i);
591 		hcb->sense_buffer_paddr =
592 		    sc->sense_buffer_dma.paddr + PVSCSI_SENSE_LENGTH * i;
593 
594 		hcb->sg_list =
595 		    (struct pvscsi_sg_list *)((caddr_t)sc->sg_list_dma.vaddr +
596 		    sizeof(struct pvscsi_sg_list) * i);
597 		hcb->sg_list_paddr =
598 		    sc->sg_list_dma.paddr + sizeof(struct pvscsi_sg_list) * i;
599 
600 		callout_init_mtx(&hcb->callout, &sc->lock, 0);
601 	}
602 
603 	SLIST_INIT(&sc->free_list);
604 	for (i = (sc->hcb_cnt - 1); i >= 0; --i) {
605 		hcb = sc->hcbs + i;
606 		SLIST_INSERT_HEAD(&sc->free_list, hcb, links);
607 	}
608 
609 fail:
610 	if (error) {
611 		pvscsi_dma_free_per_hcb(sc, i);
612 	}
613 
614 	return (error);
615 }
616 
617 static void
618 pvscsi_free_rings(struct pvscsi_softc *sc)
619 {
620 
621 	pvscsi_dma_free(sc, &sc->rings_state_dma);
622 	pvscsi_dma_free(sc, &sc->req_ring_dma);
623 	pvscsi_dma_free(sc, &sc->cmp_ring_dma);
624 	if (sc->use_msg) {
625 		pvscsi_dma_free(sc, &sc->msg_ring_dma);
626 	}
627 }
628 
629 static int
630 pvscsi_allocate_rings(struct pvscsi_softc *sc)
631 {
632 	int error;
633 
634 	error = pvscsi_dma_alloc_ppns(sc, &sc->rings_state_dma,
635 	    &sc->rings_state_ppn, 1);
636 	if (error) {
637 		device_printf(sc->dev,
638 		    "Error allocating rings state, error = %d\n", error);
639 		goto fail;
640 	}
641 	sc->rings_state = sc->rings_state_dma.vaddr;
642 
643 	error = pvscsi_dma_alloc_ppns(sc, &sc->req_ring_dma, sc->req_ring_ppn,
644 	    sc->req_ring_num_pages);
645 	if (error) {
646 		device_printf(sc->dev,
647 		    "Error allocating req ring pages, error = %d\n", error);
648 		goto fail;
649 	}
650 	sc->req_ring = sc->req_ring_dma.vaddr;
651 
652 	error = pvscsi_dma_alloc_ppns(sc, &sc->cmp_ring_dma, sc->cmp_ring_ppn,
653 	    sc->cmp_ring_num_pages);
654 	if (error) {
655 		device_printf(sc->dev,
656 		    "Error allocating cmp ring pages, error = %d\n", error);
657 		goto fail;
658 	}
659 	sc->cmp_ring = sc->cmp_ring_dma.vaddr;
660 
661 	sc->msg_ring = NULL;
662 	if (sc->use_msg) {
663 		error = pvscsi_dma_alloc_ppns(sc, &sc->msg_ring_dma,
664 		    sc->msg_ring_ppn, sc->msg_ring_num_pages);
665 		if (error) {
666 			device_printf(sc->dev,
667 			    "Error allocating cmp ring pages, error = %d\n",
668 			    error);
669 			goto fail;
670 		}
671 		sc->msg_ring = sc->msg_ring_dma.vaddr;
672 	}
673 
674 	DEBUG_PRINTF(1, sc->dev, "rings_state: %p\n", sc->rings_state);
675 	DEBUG_PRINTF(1, sc->dev, "req_ring: %p - %u pages\n", sc->req_ring,
676 	    sc->req_ring_num_pages);
677 	DEBUG_PRINTF(1, sc->dev, "cmp_ring: %p - %u pages\n", sc->cmp_ring,
678 	    sc->cmp_ring_num_pages);
679 	DEBUG_PRINTF(1, sc->dev, "msg_ring: %p - %u pages\n", sc->msg_ring,
680 	    sc->msg_ring_num_pages);
681 
682 fail:
683 	if (error) {
684 		pvscsi_free_rings(sc);
685 	}
686 	return (error);
687 }
688 
689 static void
690 pvscsi_setup_rings(struct pvscsi_softc *sc)
691 {
692 	struct pvscsi_cmd_desc_setup_rings cmd;
693 	uint32_t i;
694 
695 	bzero(&cmd, sizeof(cmd));
696 
697 	cmd.rings_state_ppn = sc->rings_state_ppn;
698 
699 	cmd.req_ring_num_pages = sc->req_ring_num_pages;
700 	for (i = 0; i < sc->req_ring_num_pages; ++i) {
701 		cmd.req_ring_ppns[i] = sc->req_ring_ppn[i];
702 	}
703 
704 	cmd.cmp_ring_num_pages = sc->cmp_ring_num_pages;
705 	for (i = 0; i < sc->cmp_ring_num_pages; ++i) {
706 		cmd.cmp_ring_ppns[i] = sc->cmp_ring_ppn[i];
707 	}
708 
709 	pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_RINGS, &cmd, sizeof(cmd));
710 }
711 
712 static int
713 pvscsi_hw_supports_msg(struct pvscsi_softc *sc)
714 {
715 	uint32_t status;
716 
717 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND,
718 	    PVSCSI_CMD_SETUP_MSG_RING);
719 	status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
720 
721 	return (status != -1);
722 }
723 
724 static void
725 pvscsi_setup_msg_ring(struct pvscsi_softc *sc)
726 {
727 	struct pvscsi_cmd_desc_setup_msg_ring cmd;
728 	uint32_t i;
729 
730 	KASSERT(sc->use_msg, ("msg is not being used"));
731 
732 	bzero(&cmd, sizeof(cmd));
733 
734 	cmd.num_pages = sc->msg_ring_num_pages;
735 	for (i = 0; i < sc->msg_ring_num_pages; ++i) {
736 		cmd.ring_ppns[i] = sc->msg_ring_ppn[i];
737 	}
738 
739 	pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_MSG_RING, &cmd, sizeof(cmd));
740 }
741 
742 static void
743 pvscsi_adapter_reset(struct pvscsi_softc *sc)
744 {
745 	uint32_t val __unused;
746 
747 	device_printf(sc->dev, "Adapter Reset\n");
748 
749 	pvscsi_write_cmd(sc, PVSCSI_CMD_ADAPTER_RESET, NULL, 0);
750 	val = pvscsi_read_intr_status(sc);
751 
752 	DEBUG_PRINTF(2, sc->dev, "adapter reset done: %u\n", val);
753 }
754 
755 static void
756 pvscsi_bus_reset(struct pvscsi_softc *sc)
757 {
758 
759 	device_printf(sc->dev, "Bus Reset\n");
760 
761 	pvscsi_write_cmd(sc, PVSCSI_CMD_RESET_BUS, NULL, 0);
762 	pvscsi_process_cmp_ring(sc);
763 
764 	DEBUG_PRINTF(2, sc->dev, "bus reset done\n");
765 }
766 
767 static void
768 pvscsi_device_reset(struct pvscsi_softc *sc, uint32_t target)
769 {
770 	struct pvscsi_cmd_desc_reset_device cmd;
771 
772 	memset(&cmd, 0, sizeof(cmd));
773 
774 	cmd.target = target;
775 
776 	device_printf(sc->dev, "Device reset for target %u\n", target);
777 
778 	pvscsi_write_cmd(sc, PVSCSI_CMD_RESET_DEVICE, &cmd, sizeof cmd);
779 	pvscsi_process_cmp_ring(sc);
780 
781 	DEBUG_PRINTF(2, sc->dev, "device reset done\n");
782 }
783 
784 static void
785 pvscsi_abort(struct pvscsi_softc *sc, uint32_t target, union ccb *ccb)
786 {
787 	struct pvscsi_cmd_desc_abort_cmd cmd;
788 	struct pvscsi_hcb *hcb;
789 	uint64_t context;
790 
791 	pvscsi_process_cmp_ring(sc);
792 
793 	hcb = ccb->ccb_h.ccb_pvscsi_hcb;
794 
795 	if (hcb != NULL) {
796 		context = pvscsi_hcb_to_context(sc, hcb);
797 
798 		memset(&cmd, 0, sizeof cmd);
799 		cmd.target = target;
800 		cmd.context = context;
801 
802 		device_printf(sc->dev, "Abort for target %u context %llx\n",
803 		    target, (unsigned long long)context);
804 
805 		pvscsi_write_cmd(sc, PVSCSI_CMD_ABORT_CMD, &cmd, sizeof(cmd));
806 		pvscsi_process_cmp_ring(sc);
807 
808 		DEBUG_PRINTF(2, sc->dev, "abort done\n");
809 	} else {
810 		DEBUG_PRINTF(1, sc->dev,
811 		    "Target %u ccb %p not found for abort\n", target, ccb);
812 	}
813 }
814 
815 static int
816 pvscsi_probe(device_t dev)
817 {
818 
819 	if (pci_get_vendor(dev) == PCI_VENDOR_ID_VMWARE &&
820 	    pci_get_device(dev) == PCI_DEVICE_ID_VMWARE_PVSCSI) {
821 		device_set_desc(dev, "VMware Paravirtual SCSI Controller");
822 		return (BUS_PROBE_DEFAULT);
823 	}
824 	return (ENXIO);
825 }
826 
827 static int
828 pvscsi_shutdown(device_t dev)
829 {
830 
831 	return (0);
832 }
833 
834 static void
835 pvscsi_timeout(void *arg)
836 {
837 	struct pvscsi_hcb *hcb;
838 	struct pvscsi_softc *sc;
839 	union ccb *ccb;
840 
841 	hcb = arg;
842 	ccb = hcb->ccb;
843 
844 	if (ccb == NULL) {
845 		/* Already completed */
846 		return;
847 	}
848 
849 	sc = ccb->ccb_h.ccb_pvscsi_sc;
850 	mtx_assert(&sc->lock, MA_OWNED);
851 
852 	device_printf(sc->dev, "Command timed out hcb=%p ccb=%p.\n", hcb, ccb);
853 
854 	switch (hcb->recovery) {
855 	case PVSCSI_HCB_NONE:
856 		hcb->recovery = PVSCSI_HCB_ABORT;
857 		pvscsi_abort(sc, ccb->ccb_h.target_id, ccb);
858 		callout_reset_sbt(&hcb->callout, PVSCSI_ABORT_TIMEOUT * SBT_1S,
859 		    0, pvscsi_timeout, hcb, 0);
860 		break;
861 	case PVSCSI_HCB_ABORT:
862 		hcb->recovery = PVSCSI_HCB_DEVICE_RESET;
863 		pvscsi_freeze(sc);
864 		pvscsi_device_reset(sc, ccb->ccb_h.target_id);
865 		callout_reset_sbt(&hcb->callout, PVSCSI_RESET_TIMEOUT * SBT_1S,
866 		    0, pvscsi_timeout, hcb, 0);
867 		break;
868 	case PVSCSI_HCB_DEVICE_RESET:
869 		hcb->recovery = PVSCSI_HCB_BUS_RESET;
870 		pvscsi_freeze(sc);
871 		pvscsi_bus_reset(sc);
872 		callout_reset_sbt(&hcb->callout, PVSCSI_RESET_TIMEOUT * SBT_1S,
873 		    0, pvscsi_timeout, hcb, 0);
874 		break;
875 	case PVSCSI_HCB_BUS_RESET:
876 		pvscsi_freeze(sc);
877 		pvscsi_adapter_reset(sc);
878 		break;
879 	};
880 }
881 
882 static void
883 pvscsi_process_completion(struct pvscsi_softc *sc,
884     struct pvscsi_ring_cmp_desc *e)
885 {
886 	struct pvscsi_hcb *hcb;
887 	union ccb *ccb;
888 	uint32_t status;
889 	uint32_t btstat;
890 	uint32_t sdstat;
891 	bus_dmasync_op_t op;
892 
893 	hcb = pvscsi_context_to_hcb(sc, e->context);
894 
895 	callout_stop(&hcb->callout);
896 
897 	ccb = hcb->ccb;
898 
899 	btstat = e->host_status;
900 	sdstat = e->scsi_status;
901 
902 	ccb->csio.scsi_status = sdstat;
903 	ccb->csio.resid = ccb->csio.dxfer_len - e->data_len;
904 
905 	if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
906 		if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
907 			op = BUS_DMASYNC_POSTREAD;
908 		} else {
909 			op = BUS_DMASYNC_POSTWRITE;
910 		}
911 		bus_dmamap_sync(sc->buffer_dmat, hcb->dma_map, op);
912 		bus_dmamap_unload(sc->buffer_dmat, hcb->dma_map);
913 	}
914 
915 	if (btstat == BTSTAT_SUCCESS && sdstat == SCSI_STATUS_OK) {
916 		DEBUG_PRINTF(3, sc->dev,
917 		    "completing command context %llx success\n",
918 		    (unsigned long long)e->context);
919 		ccb->csio.resid = 0;
920 		status = CAM_REQ_CMP;
921 	} else {
922 		switch (btstat) {
923 		case BTSTAT_SUCCESS:
924 		case BTSTAT_LINKED_COMMAND_COMPLETED:
925 		case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG:
926 			switch (sdstat) {
927 			case SCSI_STATUS_OK:
928 				ccb->csio.resid = 0;
929 				status = CAM_REQ_CMP;
930 				break;
931 			case SCSI_STATUS_CHECK_COND:
932 				status = CAM_SCSI_STATUS_ERROR;
933 
934 				if (ccb->csio.sense_len != 0) {
935 					status |= CAM_AUTOSNS_VALID;
936 
937 					memset(&ccb->csio.sense_data, 0,
938 					    sizeof(ccb->csio.sense_data));
939 					memcpy(&ccb->csio.sense_data,
940 					    hcb->sense_buffer,
941 					    MIN(ccb->csio.sense_len,
942 						e->sense_len));
943 				}
944 				break;
945 			case SCSI_STATUS_BUSY:
946 			case SCSI_STATUS_QUEUE_FULL:
947 				status = CAM_REQUEUE_REQ;
948 				break;
949 			case SCSI_STATUS_CMD_TERMINATED:
950 			case SCSI_STATUS_TASK_ABORTED:
951 				status = CAM_REQ_ABORTED;
952 				break;
953 			default:
954 				DEBUG_PRINTF(1, sc->dev,
955 				    "ccb: %p sdstat=0x%x\n", ccb, sdstat);
956 				status = CAM_SCSI_STATUS_ERROR;
957 				break;
958 			}
959 			break;
960 		case BTSTAT_SELTIMEO:
961 			status = CAM_SEL_TIMEOUT;
962 			break;
963 		case BTSTAT_DATARUN:
964 		case BTSTAT_DATA_UNDERRUN:
965 			status = CAM_DATA_RUN_ERR;
966 			break;
967 		case BTSTAT_ABORTQUEUE:
968 		case BTSTAT_HATIMEOUT:
969 			status = CAM_REQUEUE_REQ;
970 			break;
971 		case BTSTAT_NORESPONSE:
972 		case BTSTAT_SENTRST:
973 		case BTSTAT_RECVRST:
974 		case BTSTAT_BUSRESET:
975 			status = CAM_SCSI_BUS_RESET;
976 			break;
977 		case BTSTAT_SCSIPARITY:
978 			status = CAM_UNCOR_PARITY;
979 			break;
980 		case BTSTAT_BUSFREE:
981 			status = CAM_UNEXP_BUSFREE;
982 			break;
983 		case BTSTAT_INVPHASE:
984 			status = CAM_SEQUENCE_FAIL;
985 			break;
986 		case BTSTAT_SENSFAILED:
987 			status = CAM_AUTOSENSE_FAIL;
988 			break;
989 		case BTSTAT_LUNMISMATCH:
990 		case BTSTAT_TAGREJECT:
991 		case BTSTAT_DISCONNECT:
992 		case BTSTAT_BADMSG:
993 		case BTSTAT_INVPARAM:
994 			status = CAM_REQ_CMP_ERR;
995 			break;
996 		case BTSTAT_HASOFTWARE:
997 		case BTSTAT_HAHARDWARE:
998 			status = CAM_NO_HBA;
999 			break;
1000 		default:
1001 			device_printf(sc->dev, "unknown hba status: 0x%x\n",
1002 			    btstat);
1003 			status = CAM_NO_HBA;
1004 			break;
1005 		}
1006 
1007 		DEBUG_PRINTF(3, sc->dev,
1008 		    "completing command context %llx btstat %x sdstat %x - status %x\n",
1009 		    (unsigned long long)e->context, btstat, sdstat, status);
1010 	}
1011 
1012 	ccb->ccb_h.ccb_pvscsi_hcb = NULL;
1013 	ccb->ccb_h.ccb_pvscsi_sc = NULL;
1014 	pvscsi_hcb_put(sc, hcb);
1015 
1016 	ccb->ccb_h.status =
1017 	    status | (ccb->ccb_h.status & ~(CAM_STATUS_MASK | CAM_SIM_QUEUED));
1018 
1019 	if (sc->frozen) {
1020 		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
1021 		sc->frozen = 0;
1022 	}
1023 
1024 	if (status != CAM_REQ_CMP) {
1025 		ccb->ccb_h.status |= CAM_DEV_QFRZN;
1026 		xpt_freeze_devq(ccb->ccb_h.path, /*count*/ 1);
1027 	}
1028 	xpt_done(ccb);
1029 }
1030 
1031 static void
1032 pvscsi_process_cmp_ring(struct pvscsi_softc *sc)
1033 {
1034 	struct pvscsi_ring_cmp_desc *ring;
1035 	struct pvscsi_rings_state *s;
1036 	struct pvscsi_ring_cmp_desc *e;
1037 	uint32_t mask;
1038 
1039 	mtx_assert(&sc->lock, MA_OWNED);
1040 
1041 	s = sc->rings_state;
1042 	ring = sc->cmp_ring;
1043 	mask = MASK(s->cmp_num_entries_log2);
1044 
1045 	while (s->cmp_cons_idx != s->cmp_prod_idx) {
1046 		e = ring + (s->cmp_cons_idx & mask);
1047 
1048 		pvscsi_process_completion(sc, e);
1049 
1050 		mb();
1051 		s->cmp_cons_idx++;
1052 	}
1053 }
1054 
1055 static void
1056 pvscsi_process_msg(struct pvscsi_softc *sc, struct pvscsi_ring_msg_desc *e)
1057 {
1058 	struct pvscsi_ring_msg_dev_status_changed *desc;
1059 
1060 	union ccb *ccb;
1061 	switch (e->type) {
1062 	case PVSCSI_MSG_DEV_ADDED:
1063 	case PVSCSI_MSG_DEV_REMOVED: {
1064 		desc = (struct pvscsi_ring_msg_dev_status_changed *)e;
1065 
1066 		device_printf(sc->dev, "MSG: device %s at scsi%u:%u:%u\n",
1067 		    desc->type == PVSCSI_MSG_DEV_ADDED ? "addition" : "removal",
1068 		    desc->bus, desc->target, desc->lun[1]);
1069 
1070 		ccb = xpt_alloc_ccb_nowait();
1071 		if (ccb == NULL) {
1072 			device_printf(sc->dev,
1073 			    "Error allocating CCB for dev change.\n");
1074 			break;
1075 		}
1076 
1077 		if (xpt_create_path(&ccb->ccb_h.path, NULL,
1078 		    cam_sim_path(sc->sim), desc->target, desc->lun[1])
1079 		    != CAM_REQ_CMP) {
1080 			device_printf(sc->dev,
1081 			    "Error creating path for dev change.\n");
1082 			xpt_free_ccb(ccb);
1083 			break;
1084 		}
1085 
1086 		xpt_rescan(ccb);
1087 	} break;
1088 	default:
1089 		device_printf(sc->dev, "Unknown msg type 0x%x\n", e->type);
1090 	};
1091 }
1092 
1093 static void
1094 pvscsi_process_msg_ring(struct pvscsi_softc *sc)
1095 {
1096 	struct pvscsi_ring_msg_desc *ring;
1097 	struct pvscsi_rings_state *s;
1098 	struct pvscsi_ring_msg_desc *e;
1099 	uint32_t mask;
1100 
1101 	mtx_assert(&sc->lock, MA_OWNED);
1102 
1103 	s = sc->rings_state;
1104 	ring = sc->msg_ring;
1105 	mask = MASK(s->msg_num_entries_log2);
1106 
1107 	while (s->msg_cons_idx != s->msg_prod_idx) {
1108 		e = ring + (s->msg_cons_idx & mask);
1109 
1110 		pvscsi_process_msg(sc, e);
1111 
1112 		mb();
1113 		s->msg_cons_idx++;
1114 	}
1115 }
1116 
1117 static void
1118 pvscsi_intr_locked(struct pvscsi_softc *sc)
1119 {
1120 	uint32_t val;
1121 
1122 	mtx_assert(&sc->lock, MA_OWNED);
1123 
1124 	val = pvscsi_read_intr_status(sc);
1125 
1126 	if ((val & PVSCSI_INTR_ALL_SUPPORTED) != 0) {
1127 		pvscsi_write_intr_status(sc, val & PVSCSI_INTR_ALL_SUPPORTED);
1128 		pvscsi_process_cmp_ring(sc);
1129 		if (sc->use_msg) {
1130 			pvscsi_process_msg_ring(sc);
1131 		}
1132 	}
1133 }
1134 
1135 static void
1136 pvscsi_intr(void *xsc)
1137 {
1138 	struct pvscsi_softc *sc;
1139 
1140 	sc = xsc;
1141 
1142 	mtx_assert(&sc->lock, MA_NOTOWNED);
1143 
1144 	mtx_lock(&sc->lock);
1145 	pvscsi_intr_locked(xsc);
1146 	mtx_unlock(&sc->lock);
1147 }
1148 
1149 static void
1150 pvscsi_poll(struct cam_sim *sim)
1151 {
1152 	struct pvscsi_softc *sc;
1153 
1154 	sc = cam_sim_softc(sim);
1155 
1156 	mtx_assert(&sc->lock, MA_OWNED);
1157 	pvscsi_intr_locked(sc);
1158 }
1159 
1160 static void
1161 pvscsi_execute_ccb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
1162 {
1163 	struct pvscsi_hcb *hcb;
1164 	struct pvscsi_ring_req_desc *e;
1165 	union ccb *ccb;
1166 	struct pvscsi_softc *sc;
1167 	struct pvscsi_rings_state *s;
1168 	uint8_t cdb0;
1169 	bus_dmasync_op_t op;
1170 
1171 	hcb = arg;
1172 	ccb = hcb->ccb;
1173 	e = hcb->e;
1174 	sc = ccb->ccb_h.ccb_pvscsi_sc;
1175 	s = sc->rings_state;
1176 
1177 	mtx_assert(&sc->lock, MA_OWNED);
1178 
1179 	if (error) {
1180 		device_printf(sc->dev, "pvscsi_execute_ccb error %d\n", error);
1181 
1182 		if (error == EFBIG) {
1183 			ccb->ccb_h.status = CAM_REQ_TOO_BIG;
1184 		} else {
1185 			ccb->ccb_h.status = CAM_REQ_CMP_ERR;
1186 		}
1187 
1188 		pvscsi_hcb_put(sc, hcb);
1189 		xpt_done(ccb);
1190 		return;
1191 	}
1192 
1193 	e->flags = 0;
1194 	op = 0;
1195 	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
1196 	case CAM_DIR_NONE:
1197 		e->flags |= PVSCSI_FLAG_CMD_DIR_NONE;
1198 		break;
1199 	case CAM_DIR_IN:
1200 		e->flags |= PVSCSI_FLAG_CMD_DIR_TOHOST;
1201 		op = BUS_DMASYNC_PREREAD;
1202 		break;
1203 	case CAM_DIR_OUT:
1204 		e->flags |= PVSCSI_FLAG_CMD_DIR_TODEVICE;
1205 		op = BUS_DMASYNC_PREWRITE;
1206 		break;
1207 	case CAM_DIR_BOTH:
1208 		/* TODO: does this need handling? */
1209 		break;
1210 	}
1211 
1212 	if (nseg != 0) {
1213 		if (nseg > 1) {
1214 			int i;
1215 			struct pvscsi_sg_element *sge;
1216 
1217 			KASSERT(nseg <= PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT,
1218 			    ("too many sg segments"));
1219 
1220 			sge = hcb->sg_list->sge;
1221 			e->flags |= PVSCSI_FLAG_CMD_WITH_SG_LIST;
1222 
1223 			for (i = 0; i < nseg; ++i) {
1224 				sge[i].addr = segs[i].ds_addr;
1225 				sge[i].length = segs[i].ds_len;
1226 				sge[i].flags = 0;
1227 			}
1228 
1229 			e->data_addr = hcb->sg_list_paddr;
1230 		} else {
1231 			e->data_addr = segs->ds_addr;
1232 		}
1233 
1234 		bus_dmamap_sync(sc->buffer_dmat, hcb->dma_map, op);
1235 	} else {
1236 		e->data_addr = 0;
1237 	}
1238 
1239 	cdb0 = e->cdb[0];
1240 	ccb->ccb_h.status |= CAM_SIM_QUEUED;
1241 
1242 	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
1243 		callout_reset_sbt(&hcb->callout, ccb->ccb_h.timeout * SBT_1MS,
1244 		    0, pvscsi_timeout, hcb, 0);
1245 	}
1246 
1247 	mb();
1248 	s->req_prod_idx++;
1249 	pvscsi_kick_io(sc, cdb0);
1250 }
1251 
1252 static void
1253 pvscsi_action(struct cam_sim *sim, union ccb *ccb)
1254 {
1255 	struct pvscsi_softc *sc;
1256 	struct ccb_hdr *ccb_h;
1257 
1258 	sc = cam_sim_softc(sim);
1259 	ccb_h = &ccb->ccb_h;
1260 
1261 	mtx_assert(&sc->lock, MA_OWNED);
1262 
1263 	switch (ccb_h->func_code) {
1264 	case XPT_SCSI_IO:
1265 	{
1266 		struct ccb_scsiio *csio;
1267 		uint32_t req_num_entries_log2;
1268 		struct pvscsi_ring_req_desc *ring;
1269 		struct pvscsi_ring_req_desc *e;
1270 		struct pvscsi_rings_state *s;
1271 		struct pvscsi_hcb *hcb;
1272 
1273 		csio = &ccb->csio;
1274 		ring = sc->req_ring;
1275 		s = sc->rings_state;
1276 
1277 		hcb = NULL;
1278 
1279 		/*
1280 		 * Check if it was completed already (such as aborted
1281 		 * by upper layers)
1282 		 */
1283 		if ((ccb_h->status & CAM_STATUS_MASK) != CAM_REQ_INPROG) {
1284 			xpt_done(ccb);
1285 			return;
1286 		}
1287 
1288 		req_num_entries_log2 = s->req_num_entries_log2;
1289 
1290 		if (s->req_prod_idx - s->cmp_cons_idx >=
1291 		    (1 << req_num_entries_log2)) {
1292 			device_printf(sc->dev,
1293 			    "Not enough room on completion ring.\n");
1294 			pvscsi_freeze(sc);
1295 			ccb_h->status = CAM_REQUEUE_REQ;
1296 			goto finish_ccb;
1297 		}
1298 
1299 		hcb = pvscsi_hcb_get(sc);
1300 		if (hcb == NULL) {
1301 			device_printf(sc->dev, "No free hcbs.\n");
1302 			pvscsi_freeze(sc);
1303 			ccb_h->status = CAM_REQUEUE_REQ;
1304 			goto finish_ccb;
1305 		}
1306 
1307 		hcb->ccb = ccb;
1308 		ccb_h->ccb_pvscsi_hcb = hcb;
1309 		ccb_h->ccb_pvscsi_sc = sc;
1310 
1311 		if (csio->cdb_len > sizeof(e->cdb)) {
1312 			DEBUG_PRINTF(2, sc->dev, "cdb length %u too large\n",
1313 			    csio->cdb_len);
1314 			ccb_h->status = CAM_REQ_INVALID;
1315 			goto finish_ccb;
1316 		}
1317 
1318 		if (ccb_h->flags & CAM_CDB_PHYS) {
1319 			DEBUG_PRINTF(2, sc->dev,
1320 			    "CAM_CDB_PHYS not implemented\n");
1321 			ccb_h->status = CAM_REQ_INVALID;
1322 			goto finish_ccb;
1323 		}
1324 
1325 		e = ring + (s->req_prod_idx & MASK(req_num_entries_log2));
1326 
1327 		e->bus = cam_sim_bus(sim);
1328 		e->target = ccb_h->target_id;
1329 		memset(e->lun, 0, sizeof(e->lun));
1330 		e->lun[1] = ccb_h->target_lun;
1331 		e->data_addr = 0;
1332 		e->data_len = csio->dxfer_len;
1333 		e->vcpu_hint = curcpu;
1334 
1335 		e->cdb_len = csio->cdb_len;
1336 		memcpy(e->cdb, scsiio_cdb_ptr(csio), csio->cdb_len);
1337 
1338 		e->sense_addr = 0;
1339 		e->sense_len = csio->sense_len;
1340 		if (e->sense_len > 0) {
1341 			e->sense_addr = hcb->sense_buffer_paddr;
1342 		}
1343 
1344 		e->tag = MSG_SIMPLE_Q_TAG;
1345 		if (ccb_h->flags & CAM_TAG_ACTION_VALID) {
1346 			e->tag = csio->tag_action;
1347 		}
1348 
1349 		e->context = pvscsi_hcb_to_context(sc, hcb);
1350 		hcb->e = e;
1351 
1352 		DEBUG_PRINTF(3, sc->dev,
1353 		    " queuing command %02x context %llx\n", e->cdb[0],
1354 		    (unsigned long long)e->context);
1355 		bus_dmamap_load_ccb(sc->buffer_dmat, hcb->dma_map, ccb,
1356 		    pvscsi_execute_ccb, hcb, 0);
1357 		break;
1358 
1359 finish_ccb:
1360 		if (hcb != NULL) {
1361 			pvscsi_hcb_put(sc, hcb);
1362 		}
1363 		xpt_done(ccb);
1364 	} break;
1365 	case XPT_ABORT:
1366 	{
1367 		struct pvscsi_hcb *abort_hcb;
1368 		union ccb *abort_ccb;
1369 
1370 		abort_ccb = ccb->cab.abort_ccb;
1371 		abort_hcb = abort_ccb->ccb_h.ccb_pvscsi_hcb;
1372 
1373 		if (abort_hcb->ccb != NULL && abort_hcb->ccb == abort_ccb) {
1374 			if (abort_ccb->ccb_h.func_code == XPT_SCSI_IO) {
1375 				pvscsi_abort(sc, ccb_h->target_id, abort_ccb);
1376 				ccb_h->status = CAM_REQ_CMP;
1377 			} else {
1378 				ccb_h->status = CAM_UA_ABORT;
1379 			}
1380 		} else {
1381 			device_printf(sc->dev,
1382 			    "Could not find hcb for ccb %p (tgt %u)\n",
1383 			    ccb, ccb_h->target_id);
1384 			ccb_h->status = CAM_REQ_CMP;
1385 		}
1386 		xpt_done(ccb);
1387 	} break;
1388 	case XPT_RESET_DEV:
1389 	{
1390 		pvscsi_device_reset(sc, ccb_h->target_id);
1391 		ccb_h->status = CAM_REQ_CMP;
1392 		xpt_done(ccb);
1393 	} break;
1394 	case XPT_RESET_BUS:
1395 	{
1396 		pvscsi_bus_reset(sc);
1397 		ccb_h->status = CAM_REQ_CMP;
1398 		xpt_done(ccb);
1399 	} break;
1400 	case XPT_PATH_INQ:
1401 	{
1402 		struct ccb_pathinq *cpi;
1403 
1404 		cpi = &ccb->cpi;
1405 
1406 		cpi->version_num = 1;
1407 		cpi->hba_inquiry = PI_TAG_ABLE;
1408 		cpi->target_sprt = 0;
1409 		cpi->hba_misc = PIM_NOBUSRESET | PIM_UNMAPPED;
1410 		cpi->hba_eng_cnt = 0;
1411 		/* cpi->vuhba_flags = 0; */
1412 		cpi->max_target = sc->max_targets - 1;
1413 		cpi->max_lun = 0;
1414 		cpi->async_flags = 0;
1415 		cpi->hpath_id = 0;
1416 		cpi->unit_number = cam_sim_unit(sim);
1417 		cpi->bus_id = cam_sim_bus(sim);
1418 		cpi->initiator_id = 7;
1419 		cpi->base_transfer_speed = 750000;
1420 		strlcpy(cpi->sim_vid, "VMware", SIM_IDLEN);
1421 		strlcpy(cpi->hba_vid, "VMware", HBA_IDLEN);
1422 		strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
1423 		/* Limit I/O to 256k since we can't do 512k unaligned I/O */
1424 		cpi->maxio = (PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT / 2) * PAGE_SIZE;
1425 		cpi->protocol = PROTO_SCSI;
1426 		cpi->protocol_version = SCSI_REV_SPC2;
1427 		cpi->transport = XPORT_SAS;
1428 		cpi->transport_version = 0;
1429 
1430 		ccb_h->status = CAM_REQ_CMP;
1431 		xpt_done(ccb);
1432 	} break;
1433 	case XPT_GET_TRAN_SETTINGS:
1434 	{
1435 		struct ccb_trans_settings *cts;
1436 
1437 		cts = &ccb->cts;
1438 
1439 		cts->protocol = PROTO_SCSI;
1440 		cts->protocol_version = SCSI_REV_SPC2;
1441 		cts->transport = XPORT_SAS;
1442 		cts->transport_version = 0;
1443 
1444 		cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
1445 		cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
1446 
1447 		ccb_h->status = CAM_REQ_CMP;
1448 		xpt_done(ccb);
1449 	} break;
1450 	case XPT_CALC_GEOMETRY:
1451 	{
1452 		cam_calc_geometry(&ccb->ccg, 1);
1453 		xpt_done(ccb);
1454 	} break;
1455 	default:
1456 		ccb_h->status = CAM_REQ_INVALID;
1457 		xpt_done(ccb);
1458 		break;
1459 	}
1460 }
1461 
1462 static void
1463 pvscsi_free_interrupts(struct pvscsi_softc *sc)
1464 {
1465 
1466 	if (sc->irq_handler != NULL) {
1467 		bus_teardown_intr(sc->dev, sc->irq_res, sc->irq_handler);
1468 	}
1469 	if (sc->irq_res != NULL) {
1470 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_id,
1471 		    sc->irq_res);
1472 	}
1473 	if (sc->use_msi_or_msix) {
1474 		pci_release_msi(sc->dev);
1475 	}
1476 }
1477 
1478 static int
1479 pvscsi_setup_interrupts(struct pvscsi_softc *sc)
1480 {
1481 	int error;
1482 	int flags;
1483 	int use_msix;
1484 	int use_msi;
1485 	int count;
1486 
1487 	sc->use_msi_or_msix = 0;
1488 
1489 	use_msix = pvscsi_get_tunable(sc, "use_msix", pvscsi_use_msix);
1490 	use_msi = pvscsi_get_tunable(sc, "use_msi", pvscsi_use_msi);
1491 
1492 	if (use_msix && pci_msix_count(sc->dev) > 0) {
1493 		count = 1;
1494 		if (pci_alloc_msix(sc->dev, &count) == 0 && count == 1) {
1495 			sc->use_msi_or_msix = 1;
1496 			device_printf(sc->dev, "Interrupt: MSI-X\n");
1497 		} else {
1498 			pci_release_msi(sc->dev);
1499 		}
1500 	}
1501 
1502 	if (sc->use_msi_or_msix == 0 && use_msi && pci_msi_count(sc->dev) > 0) {
1503 		count = 1;
1504 		if (pci_alloc_msi(sc->dev, &count) == 0 && count == 1) {
1505 			sc->use_msi_or_msix = 1;
1506 			device_printf(sc->dev, "Interrupt: MSI\n");
1507 		} else {
1508 			pci_release_msi(sc->dev);
1509 		}
1510 	}
1511 
1512 	flags = RF_ACTIVE;
1513 	if (sc->use_msi_or_msix) {
1514 		sc->irq_id = 1;
1515 	} else {
1516 		device_printf(sc->dev, "Interrupt: INT\n");
1517 		sc->irq_id = 0;
1518 		flags |= RF_SHAREABLE;
1519 	}
1520 
1521 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_id,
1522 	    flags);
1523 	if (sc->irq_res == NULL) {
1524 		device_printf(sc->dev, "IRQ allocation failed\n");
1525 		if (sc->use_msi_or_msix) {
1526 			pci_release_msi(sc->dev);
1527 		}
1528 		return (ENXIO);
1529 	}
1530 
1531 	error = bus_setup_intr(sc->dev, sc->irq_res,
1532 	    INTR_TYPE_CAM | INTR_MPSAFE, NULL, pvscsi_intr, sc,
1533 	    &sc->irq_handler);
1534 	if (error) {
1535 		device_printf(sc->dev, "IRQ handler setup failed\n");
1536 		pvscsi_free_interrupts(sc);
1537 		return (error);
1538 	}
1539 
1540 	return (0);
1541 }
1542 
1543 static void
1544 pvscsi_free_all(struct pvscsi_softc *sc)
1545 {
1546 
1547 	if (sc->sim) {
1548 		int error;
1549 
1550 		if (sc->bus_path) {
1551 			xpt_free_path(sc->bus_path);
1552 		}
1553 
1554 		error = xpt_bus_deregister(cam_sim_path(sc->sim));
1555 		if (error != 0) {
1556 			device_printf(sc->dev,
1557 			    "Error deregistering bus, error %d\n", error);
1558 		}
1559 
1560 		cam_sim_free(sc->sim, TRUE);
1561 	}
1562 
1563 	pvscsi_dma_free_per_hcb(sc, sc->hcb_cnt);
1564 
1565 	if (sc->hcbs) {
1566 		free(sc->hcbs, M_PVSCSI);
1567 	}
1568 
1569 	pvscsi_free_rings(sc);
1570 
1571 	pvscsi_free_interrupts(sc);
1572 
1573 	if (sc->buffer_dmat != NULL) {
1574 		bus_dma_tag_destroy(sc->buffer_dmat);
1575 	}
1576 
1577 	if (sc->parent_dmat != NULL) {
1578 		bus_dma_tag_destroy(sc->parent_dmat);
1579 	}
1580 
1581 	if (sc->mm_res != NULL) {
1582 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->mm_rid,
1583 		    sc->mm_res);
1584 	}
1585 }
1586 
1587 static int
1588 pvscsi_attach(device_t dev)
1589 {
1590 	struct pvscsi_softc *sc;
1591 	int rid;
1592 	int barid;
1593 	int error;
1594 	int max_queue_depth;
1595 	int adapter_queue_size;
1596 	struct cam_devq *devq;
1597 
1598 	sc = device_get_softc(dev);
1599 	sc->dev = dev;
1600 
1601 	mtx_init(&sc->lock, "pvscsi", NULL, MTX_DEF);
1602 
1603 	pci_enable_busmaster(dev);
1604 
1605 	sc->mm_rid = -1;
1606 	for (barid = 0; barid <= PCIR_MAX_BAR_0; ++barid) {
1607 		rid = PCIR_BAR(barid);
1608 
1609 		sc->mm_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
1610 		    RF_ACTIVE);
1611 		if (sc->mm_res != NULL) {
1612 			sc->mm_rid = rid;
1613 			break;
1614 		}
1615 	}
1616 
1617 	if (sc->mm_res == NULL) {
1618 		device_printf(dev, "could not map device memory\n");
1619 		return (ENXIO);
1620 	}
1621 
1622 	error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
1623 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE,
1624 	    BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, 0, NULL, NULL,
1625 	    &sc->parent_dmat);
1626 	if (error) {
1627 		device_printf(dev, "parent dma tag create failure, error %d\n",
1628 		    error);
1629 		pvscsi_free_all(sc);
1630 		return (ENXIO);
1631 	}
1632 
1633 	error = bus_dma_tag_create(sc->parent_dmat, 1, 0,
1634 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1635 	    PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT * PAGE_SIZE,
1636 	    PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT, PAGE_SIZE, BUS_DMA_ALLOCNOW,
1637 	    NULL, NULL, &sc->buffer_dmat);
1638 	if (error) {
1639 		device_printf(dev, "parent dma tag create failure, error %d\n",
1640 		    error);
1641 		pvscsi_free_all(sc);
1642 		return (ENXIO);
1643 	}
1644 
1645 	error = pvscsi_setup_interrupts(sc);
1646 	if (error) {
1647 		device_printf(dev, "Interrupt setup failed\n");
1648 		pvscsi_free_all(sc);
1649 		return (error);
1650 	}
1651 
1652 	sc->max_targets = pvscsi_get_max_targets(sc);
1653 
1654 	sc->use_msg = pvscsi_get_tunable(sc, "use_msg", pvscsi_use_msg) &&
1655 	    pvscsi_hw_supports_msg(sc);
1656 	sc->msg_ring_num_pages = sc->use_msg ? 1 : 0;
1657 
1658 	sc->req_ring_num_pages = pvscsi_get_tunable(sc, "request_ring_pages",
1659 	    pvscsi_request_ring_pages);
1660 	if (sc->req_ring_num_pages <= 0) {
1661 		if (sc->max_targets <= 16) {
1662 			sc->req_ring_num_pages =
1663 			    PVSCSI_DEFAULT_NUM_PAGES_REQ_RING;
1664 		} else {
1665 			sc->req_ring_num_pages = PVSCSI_MAX_NUM_PAGES_REQ_RING;
1666 		}
1667 	} else if (sc->req_ring_num_pages > PVSCSI_MAX_NUM_PAGES_REQ_RING) {
1668 		sc->req_ring_num_pages = PVSCSI_MAX_NUM_PAGES_REQ_RING;
1669 	}
1670 	sc->cmp_ring_num_pages = sc->req_ring_num_pages;
1671 
1672 	max_queue_depth = pvscsi_get_tunable(sc, "max_queue_depth",
1673 	    pvscsi_max_queue_depth);
1674 
1675 	adapter_queue_size = (sc->req_ring_num_pages * PAGE_SIZE) /
1676 	    sizeof(struct pvscsi_ring_req_desc);
1677 	if (max_queue_depth > 0) {
1678 		adapter_queue_size = MIN(adapter_queue_size, max_queue_depth);
1679 	}
1680 	adapter_queue_size = MIN(adapter_queue_size,
1681 	    PVSCSI_MAX_REQ_QUEUE_DEPTH);
1682 
1683 	device_printf(sc->dev, "Use Msg: %d\n", sc->use_msg);
1684 	device_printf(sc->dev, "Max targets: %d\n", sc->max_targets);
1685 	device_printf(sc->dev, "REQ num pages: %d\n", sc->req_ring_num_pages);
1686 	device_printf(sc->dev, "CMP num pages: %d\n", sc->cmp_ring_num_pages);
1687 	device_printf(sc->dev, "MSG num pages: %d\n", sc->msg_ring_num_pages);
1688 	device_printf(sc->dev, "Queue size: %d\n", adapter_queue_size);
1689 
1690 	if (pvscsi_allocate_rings(sc)) {
1691 		device_printf(dev, "ring allocation failed\n");
1692 		pvscsi_free_all(sc);
1693 		return (ENXIO);
1694 	}
1695 
1696 	sc->hcb_cnt = adapter_queue_size;
1697 	sc->hcbs = malloc(sc->hcb_cnt * sizeof(*sc->hcbs), M_PVSCSI,
1698 	    M_NOWAIT | M_ZERO);
1699 	if (sc->hcbs == NULL) {
1700 		device_printf(dev, "error allocating hcb array\n");
1701 		pvscsi_free_all(sc);
1702 		return (ENXIO);
1703 	}
1704 
1705 	if (pvscsi_dma_alloc_per_hcb(sc)) {
1706 		device_printf(dev, "error allocating per hcb dma memory\n");
1707 		pvscsi_free_all(sc);
1708 		return (ENXIO);
1709 	}
1710 
1711 	pvscsi_adapter_reset(sc);
1712 
1713 	devq = cam_simq_alloc(adapter_queue_size);
1714 	if (devq == NULL) {
1715 		device_printf(dev, "cam devq alloc failed\n");
1716 		pvscsi_free_all(sc);
1717 		return (ENXIO);
1718 	}
1719 
1720 	sc->sim = cam_sim_alloc(pvscsi_action, pvscsi_poll, "pvscsi", sc,
1721 	    device_get_unit(dev), &sc->lock, 1, adapter_queue_size, devq);
1722 	if (sc->sim == NULL) {
1723 		device_printf(dev, "cam sim alloc failed\n");
1724 		cam_simq_free(devq);
1725 		pvscsi_free_all(sc);
1726 		return (ENXIO);
1727 	}
1728 
1729 	mtx_lock(&sc->lock);
1730 
1731 	if (xpt_bus_register(sc->sim, dev, 0) != CAM_SUCCESS) {
1732 		device_printf(dev, "xpt bus register failed\n");
1733 		pvscsi_free_all(sc);
1734 		mtx_unlock(&sc->lock);
1735 		return (ENXIO);
1736 	}
1737 
1738 	if (xpt_create_path(&sc->bus_path, NULL, cam_sim_path(sc->sim),
1739 	    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
1740 		device_printf(dev, "xpt create path failed\n");
1741 		pvscsi_free_all(sc);
1742 		mtx_unlock(&sc->lock);
1743 		return (ENXIO);
1744 	}
1745 
1746 	pvscsi_setup_rings(sc);
1747 	if (sc->use_msg) {
1748 		pvscsi_setup_msg_ring(sc);
1749 	}
1750 
1751 	sc->use_req_call_threshold = pvscsi_setup_req_call(sc, 1);
1752 
1753 	pvscsi_intr_enable(sc);
1754 
1755 	mtx_unlock(&sc->lock);
1756 
1757 	return (0);
1758 }
1759 
1760 static int
1761 pvscsi_detach(device_t dev)
1762 {
1763 	struct pvscsi_softc *sc;
1764 
1765 	sc = device_get_softc(dev);
1766 
1767 	pvscsi_intr_disable(sc);
1768 	pvscsi_adapter_reset(sc);
1769 
1770 	if (sc->irq_handler != NULL) {
1771 		bus_teardown_intr(dev, sc->irq_res, sc->irq_handler);
1772 	}
1773 
1774 	mtx_lock(&sc->lock);
1775 	pvscsi_free_all(sc);
1776 	mtx_unlock(&sc->lock);
1777 
1778 	mtx_destroy(&sc->lock);
1779 
1780 	return (0);
1781 }
1782 
1783 static device_method_t pvscsi_methods[] = {
1784 	DEVMETHOD(device_probe, pvscsi_probe),
1785 	DEVMETHOD(device_shutdown, pvscsi_shutdown),
1786 	DEVMETHOD(device_attach, pvscsi_attach),
1787 	DEVMETHOD(device_detach, pvscsi_detach),
1788 	DEVMETHOD_END
1789 };
1790 
1791 static driver_t pvscsi_driver = {
1792 	"pvscsi", pvscsi_methods, sizeof(struct pvscsi_softc)
1793 };
1794 
1795 DRIVER_MODULE(pvscsi, pci, pvscsi_driver, 0, 0);
1796 
1797 MODULE_DEPEND(pvscsi, pci, 1, 1, 1);
1798 MODULE_DEPEND(pvscsi, cam, 1, 1, 1);
1799