xref: /freebsd/sys/dev/vmware/pvscsi/pvscsi.c (revision 924226fba12cc9a228c73b956e1b7fa24c60b055)
1 /*-
2  * Copyright (c) 2018 VMware, Inc.
3  *
4  * SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0)
5  */
6 
7 #include <sys/cdefs.h>
8 __FBSDID("$FreeBSD$");
9 
10 #include <sys/param.h>
11 #include <sys/bus.h>
12 #include <sys/errno.h>
13 #include <sys/kernel.h>
14 #include <sys/malloc.h>
15 #include <sys/module.h>
16 #include <sys/queue.h>
17 #include <sys/rman.h>
18 #include <sys/sysctl.h>
19 #include <sys/systm.h>
20 
21 #include <machine/bus.h>
22 #include <machine/resource.h>
23 
24 #include <dev/pci/pcireg.h>
25 #include <dev/pci/pcivar.h>
26 
27 #include <cam/cam.h>
28 #include <cam/cam_ccb.h>
29 #include <cam/cam_debug.h>
30 #include <cam/cam_sim.h>
31 #include <cam/cam_xpt_sim.h>
32 #include <cam/scsi/scsi_message.h>
33 
34 #include "pvscsi.h"
35 
36 #define	PVSCSI_DEFAULT_NUM_PAGES_REQ_RING	8
37 #define	PVSCSI_SENSE_LENGTH			256
38 
39 MALLOC_DECLARE(M_PVSCSI);
40 MALLOC_DEFINE(M_PVSCSI, "pvscsi", "PVSCSI memory");
41 
42 #ifdef PVSCSI_DEBUG_LOGGING
43 #define	DEBUG_PRINTF(level, dev, fmt, ...)				\
44 	do {								\
45 		if (pvscsi_log_level >= (level)) {			\
46 			device_printf((dev), (fmt), ##__VA_ARGS__);	\
47 		}							\
48 	} while(0)
49 #else
50 #define DEBUG_PRINTF(level, dev, fmt, ...)
51 #endif /* PVSCSI_DEBUG_LOGGING */
52 
53 #define	ccb_pvscsi_hcb	spriv_ptr0
54 #define	ccb_pvscsi_sc	spriv_ptr1
55 
56 struct pvscsi_softc;
57 struct pvscsi_hcb;
58 struct pvscsi_dma;
59 
60 static inline uint32_t pvscsi_reg_read(struct pvscsi_softc *sc,
61     uint32_t offset);
62 static inline void pvscsi_reg_write(struct pvscsi_softc *sc, uint32_t offset,
63     uint32_t val);
64 static inline uint32_t pvscsi_read_intr_status(struct pvscsi_softc *sc);
65 static inline void pvscsi_write_intr_status(struct pvscsi_softc *sc,
66     uint32_t val);
67 static inline void pvscsi_intr_enable(struct pvscsi_softc *sc);
68 static inline void pvscsi_intr_disable(struct pvscsi_softc *sc);
69 static void pvscsi_kick_io(struct pvscsi_softc *sc, uint8_t cdb0);
70 static void pvscsi_write_cmd(struct pvscsi_softc *sc, uint32_t cmd, void *data,
71     uint32_t len);
72 static uint32_t pvscsi_get_max_targets(struct pvscsi_softc *sc);
73 static int pvscsi_setup_req_call(struct pvscsi_softc *sc, uint32_t enable);
74 static void pvscsi_setup_rings(struct pvscsi_softc *sc);
75 static void pvscsi_setup_msg_ring(struct pvscsi_softc *sc);
76 static int pvscsi_hw_supports_msg(struct pvscsi_softc *sc);
77 
78 static void pvscsi_timeout(void *arg);
79 static void pvscsi_freeze(struct pvscsi_softc *sc);
80 static void pvscsi_adapter_reset(struct pvscsi_softc *sc);
81 static void pvscsi_bus_reset(struct pvscsi_softc *sc);
82 static void pvscsi_device_reset(struct pvscsi_softc *sc, uint32_t target);
83 static void pvscsi_abort(struct pvscsi_softc *sc, uint32_t target,
84     union ccb *ccb);
85 
86 static void pvscsi_process_completion(struct pvscsi_softc *sc,
87     struct pvscsi_ring_cmp_desc *e);
88 static void pvscsi_process_cmp_ring(struct pvscsi_softc *sc);
89 static void pvscsi_process_msg(struct pvscsi_softc *sc,
90     struct pvscsi_ring_msg_desc *e);
91 static void pvscsi_process_msg_ring(struct pvscsi_softc *sc);
92 
93 static void pvscsi_intr_locked(struct pvscsi_softc *sc);
94 static void pvscsi_intr(void *xsc);
95 static void pvscsi_poll(struct cam_sim *sim);
96 
97 static void pvscsi_execute_ccb(void *arg, bus_dma_segment_t *segs, int nseg,
98     int error);
99 static void pvscsi_action(struct cam_sim *sim, union ccb *ccb);
100 
101 static inline uint64_t pvscsi_hcb_to_context(struct pvscsi_softc *sc,
102     struct pvscsi_hcb *hcb);
103 static inline struct pvscsi_hcb* pvscsi_context_to_hcb(struct pvscsi_softc *sc,
104     uint64_t context);
105 static struct pvscsi_hcb * pvscsi_hcb_get(struct pvscsi_softc *sc);
106 static void pvscsi_hcb_put(struct pvscsi_softc *sc, struct pvscsi_hcb *hcb);
107 
108 static void pvscsi_dma_cb(void *arg, bus_dma_segment_t *segs, int nseg,
109     int error);
110 static void pvscsi_dma_free(struct pvscsi_softc *sc, struct pvscsi_dma *dma);
111 static int pvscsi_dma_alloc(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
112     bus_size_t size, bus_size_t alignment);
113 static int pvscsi_dma_alloc_ppns(struct pvscsi_softc *sc,
114     struct pvscsi_dma *dma, uint64_t *ppn_list, uint32_t num_pages);
115 static void pvscsi_dma_free_per_hcb(struct pvscsi_softc *sc,
116     uint32_t hcbs_allocated);
117 static int pvscsi_dma_alloc_per_hcb(struct pvscsi_softc *sc);
118 static void pvscsi_free_rings(struct pvscsi_softc *sc);
119 static int pvscsi_allocate_rings(struct pvscsi_softc *sc);
120 static void pvscsi_free_interrupts(struct pvscsi_softc *sc);
121 static int pvscsi_setup_interrupts(struct pvscsi_softc *sc);
122 static void pvscsi_free_all(struct pvscsi_softc *sc);
123 
124 static int pvscsi_attach(device_t dev);
125 static int pvscsi_detach(device_t dev);
126 static int pvscsi_probe(device_t dev);
127 static int pvscsi_shutdown(device_t dev);
128 static int pvscsi_get_tunable(struct pvscsi_softc *sc, char *name, int value);
129 
130 #ifdef PVSCSI_DEBUG_LOGGING
131 static int pvscsi_log_level = 0;
132 static SYSCTL_NODE(_hw, OID_AUTO, pvscsi, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
133     "PVSCSI driver parameters");
134 SYSCTL_INT(_hw_pvscsi, OID_AUTO, log_level, CTLFLAG_RWTUN, &pvscsi_log_level,
135     0, "PVSCSI debug log level");
136 #endif
137 
138 static int pvscsi_request_ring_pages = 0;
139 TUNABLE_INT("hw.pvscsi.request_ring_pages", &pvscsi_request_ring_pages);
140 
141 static int pvscsi_use_msg = 1;
142 TUNABLE_INT("hw.pvscsi.use_msg", &pvscsi_use_msg);
143 
144 static int pvscsi_use_msi = 1;
145 TUNABLE_INT("hw.pvscsi.use_msi", &pvscsi_use_msi);
146 
147 static int pvscsi_use_msix = 1;
148 TUNABLE_INT("hw.pvscsi.use_msix", &pvscsi_use_msix);
149 
150 static int pvscsi_use_req_call_threshold = 1;
151 TUNABLE_INT("hw.pvscsi.use_req_call_threshold", &pvscsi_use_req_call_threshold);
152 
153 static int pvscsi_max_queue_depth = 0;
154 TUNABLE_INT("hw.pvscsi.max_queue_depth", &pvscsi_max_queue_depth);
155 
156 struct pvscsi_sg_list {
157 	struct pvscsi_sg_element sge[PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT];
158 };
159 
160 #define	PVSCSI_ABORT_TIMEOUT	2
161 #define	PVSCSI_RESET_TIMEOUT	10
162 
163 #define	PVSCSI_HCB_NONE		0
164 #define	PVSCSI_HCB_ABORT	1
165 #define	PVSCSI_HCB_DEVICE_RESET	2
166 #define	PVSCSI_HCB_BUS_RESET	3
167 
168 struct pvscsi_hcb {
169 	union ccb			*ccb;
170 	struct pvscsi_ring_req_desc	*e;
171 	int				 recovery;
172 	SLIST_ENTRY(pvscsi_hcb)		 links;
173 
174 	struct callout			 callout;
175 	bus_dmamap_t			 dma_map;
176 	void				*sense_buffer;
177 	bus_addr_t			 sense_buffer_paddr;
178 	struct pvscsi_sg_list		*sg_list;
179 	bus_addr_t			 sg_list_paddr;
180 };
181 
182 struct pvscsi_dma
183 {
184 	bus_dma_tag_t	 tag;
185 	bus_dmamap_t	 map;
186 	void		*vaddr;
187 	bus_addr_t	 paddr;
188 	bus_size_t	 size;
189 };
190 
191 struct pvscsi_softc {
192 	device_t		 dev;
193 	struct mtx		 lock;
194 	struct cam_sim		*sim;
195 	struct cam_path		*bus_path;
196 	int			 frozen;
197 	struct pvscsi_rings_state	*rings_state;
198 	struct pvscsi_ring_req_desc	*req_ring;
199 	struct pvscsi_ring_cmp_desc	*cmp_ring;
200 	struct pvscsi_ring_msg_desc	*msg_ring;
201 	uint32_t		 hcb_cnt;
202 	struct pvscsi_hcb	*hcbs;
203 	SLIST_HEAD(, pvscsi_hcb)	free_list;
204 	bus_dma_tag_t		parent_dmat;
205 	bus_dma_tag_t		buffer_dmat;
206 
207 	bool		 use_msg;
208 	uint32_t	 max_targets;
209 	int		 mm_rid;
210 	struct resource	*mm_res;
211 	int		 irq_id;
212 	struct resource	*irq_res;
213 	void		*irq_handler;
214 	int		 use_req_call_threshold;
215 	int		 use_msi_or_msix;
216 
217 	uint64_t	rings_state_ppn;
218 	uint32_t	req_ring_num_pages;
219 	uint64_t	req_ring_ppn[PVSCSI_MAX_NUM_PAGES_REQ_RING];
220 	uint32_t	cmp_ring_num_pages;
221 	uint64_t	cmp_ring_ppn[PVSCSI_MAX_NUM_PAGES_CMP_RING];
222 	uint32_t	msg_ring_num_pages;
223 	uint64_t	msg_ring_ppn[PVSCSI_MAX_NUM_PAGES_MSG_RING];
224 
225 	struct	pvscsi_dma rings_state_dma;
226 	struct	pvscsi_dma req_ring_dma;
227 	struct	pvscsi_dma cmp_ring_dma;
228 	struct	pvscsi_dma msg_ring_dma;
229 
230 	struct	pvscsi_dma sg_list_dma;
231 	struct	pvscsi_dma sense_buffer_dma;
232 };
233 
234 static int pvscsi_get_tunable(struct pvscsi_softc *sc, char *name, int value)
235 {
236 	char cfg[64];
237 
238 	snprintf(cfg, sizeof(cfg), "hw.pvscsi.%d.%s", device_get_unit(sc->dev),
239 	    name);
240 	TUNABLE_INT_FETCH(cfg, &value);
241 
242 	return (value);
243 }
244 
245 static void
246 pvscsi_freeze(struct pvscsi_softc *sc)
247 {
248 
249 	if (!sc->frozen) {
250 		xpt_freeze_simq(sc->sim, 1);
251 		sc->frozen = 1;
252 	}
253 }
254 
255 static inline uint32_t
256 pvscsi_reg_read(struct pvscsi_softc *sc, uint32_t offset)
257 {
258 
259 	return (bus_read_4(sc->mm_res, offset));
260 }
261 
262 static inline void
263 pvscsi_reg_write(struct pvscsi_softc *sc, uint32_t offset, uint32_t val)
264 {
265 
266 	bus_write_4(sc->mm_res, offset, val);
267 }
268 
269 static inline uint32_t
270 pvscsi_read_intr_status(struct pvscsi_softc *sc)
271 {
272 
273 	return (pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_INTR_STATUS));
274 }
275 
276 static inline void
277 pvscsi_write_intr_status(struct pvscsi_softc *sc, uint32_t val)
278 {
279 
280 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_STATUS, val);
281 }
282 
283 static inline void
284 pvscsi_intr_enable(struct pvscsi_softc *sc)
285 {
286 	uint32_t mask;
287 
288 	mask = PVSCSI_INTR_CMPL_MASK;
289 	if (sc->use_msg) {
290 		mask |= PVSCSI_INTR_MSG_MASK;
291 	}
292 
293 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_MASK, mask);
294 }
295 
296 static inline void
297 pvscsi_intr_disable(struct pvscsi_softc *sc)
298 {
299 
300 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_MASK, 0);
301 }
302 
303 static void
304 pvscsi_kick_io(struct pvscsi_softc *sc, uint8_t cdb0)
305 {
306 	struct pvscsi_rings_state *s;
307 
308 	if (cdb0 == READ_6  || cdb0 == READ_10  ||
309 	    cdb0 == READ_12  || cdb0 == READ_16 ||
310 	    cdb0 == WRITE_6 || cdb0 == WRITE_10 ||
311 	    cdb0 == WRITE_12 || cdb0 == WRITE_16) {
312 		s = sc->rings_state;
313 
314 		if (!sc->use_req_call_threshold ||
315 		    (s->req_prod_idx - s->req_cons_idx) >=
316 		     s->req_call_threshold) {
317 			pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_KICK_RW_IO, 0);
318 		}
319 	} else {
320 		pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_KICK_NON_RW_IO, 0);
321 	}
322 }
323 
324 static void
325 pvscsi_write_cmd(struct pvscsi_softc *sc, uint32_t cmd, void *data,
326 		 uint32_t len)
327 {
328 	uint32_t *data_ptr;
329 	int i;
330 
331 	KASSERT(len % sizeof(uint32_t) == 0,
332 		("command size not a multiple of 4"));
333 
334 	data_ptr = data;
335 	len /= sizeof(uint32_t);
336 
337 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND, cmd);
338 	for (i = 0; i < len; ++i) {
339 		pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND_DATA,
340 		   data_ptr[i]);
341 	}
342 }
343 
344 static inline uint64_t pvscsi_hcb_to_context(struct pvscsi_softc *sc,
345     struct pvscsi_hcb *hcb)
346 {
347 
348 	/* Offset by 1 because context must not be 0 */
349 	return (hcb - sc->hcbs + 1);
350 }
351 
352 static inline struct pvscsi_hcb* pvscsi_context_to_hcb(struct pvscsi_softc *sc,
353     uint64_t context)
354 {
355 
356 	return (sc->hcbs + (context - 1));
357 }
358 
359 static struct pvscsi_hcb *
360 pvscsi_hcb_get(struct pvscsi_softc *sc)
361 {
362 	struct pvscsi_hcb *hcb;
363 
364 	mtx_assert(&sc->lock, MA_OWNED);
365 
366 	hcb = SLIST_FIRST(&sc->free_list);
367 	if (hcb) {
368 		SLIST_REMOVE_HEAD(&sc->free_list, links);
369 	}
370 
371 	return (hcb);
372 }
373 
374 static void
375 pvscsi_hcb_put(struct pvscsi_softc *sc, struct pvscsi_hcb *hcb)
376 {
377 
378 	mtx_assert(&sc->lock, MA_OWNED);
379 	hcb->ccb = NULL;
380 	hcb->e = NULL;
381 	hcb->recovery = PVSCSI_HCB_NONE;
382 	SLIST_INSERT_HEAD(&sc->free_list, hcb, links);
383 }
384 
385 static uint32_t
386 pvscsi_get_max_targets(struct pvscsi_softc *sc)
387 {
388 	uint32_t max_targets;
389 
390 	pvscsi_write_cmd(sc, PVSCSI_CMD_GET_MAX_TARGETS, NULL, 0);
391 
392 	max_targets = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
393 
394 	if (max_targets == ~0) {
395 		max_targets = 16;
396 	}
397 
398 	return (max_targets);
399 }
400 
401 static int pvscsi_setup_req_call(struct pvscsi_softc *sc, uint32_t enable)
402 {
403 	uint32_t status;
404 	struct pvscsi_cmd_desc_setup_req_call cmd;
405 
406 	if (!pvscsi_get_tunable(sc, "pvscsi_use_req_call_threshold",
407 	    pvscsi_use_req_call_threshold)) {
408 		return (0);
409 	}
410 
411 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND,
412 	    PVSCSI_CMD_SETUP_REQCALLTHRESHOLD);
413 	status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
414 
415 	if (status != -1) {
416 		bzero(&cmd, sizeof(cmd));
417 		cmd.enable = enable;
418 		pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_REQCALLTHRESHOLD,
419 		    &cmd, sizeof(cmd));
420 		status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
421 
422 		return (status != 0);
423 	} else {
424 		return (0);
425 	}
426 }
427 
428 static void
429 pvscsi_dma_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
430 {
431 	bus_addr_t *dest;
432 
433 	KASSERT(nseg == 1, ("more than one segment"));
434 
435 	dest = arg;
436 
437 	if (!error) {
438 		*dest = segs->ds_addr;
439 	}
440 }
441 
442 static void
443 pvscsi_dma_free(struct pvscsi_softc *sc, struct pvscsi_dma *dma)
444 {
445 
446 	if (dma->tag != NULL) {
447 		if (dma->paddr != 0) {
448 			bus_dmamap_unload(dma->tag, dma->map);
449 		}
450 
451 		if (dma->vaddr != NULL) {
452 			bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
453 		}
454 
455 		bus_dma_tag_destroy(dma->tag);
456 	}
457 
458 	bzero(dma, sizeof(*dma));
459 }
460 
461 static int
462 pvscsi_dma_alloc(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
463     bus_size_t size, bus_size_t alignment)
464 {
465 	int error;
466 
467 	bzero(dma, sizeof(*dma));
468 
469 	error = bus_dma_tag_create(sc->parent_dmat, alignment, 0,
470 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, size, 1, size,
471 	    BUS_DMA_ALLOCNOW, NULL, NULL, &dma->tag);
472 	if (error) {
473 		device_printf(sc->dev, "error creating dma tag, error %d\n",
474 		    error);
475 		goto fail;
476 	}
477 
478 	error = bus_dmamem_alloc(dma->tag, &dma->vaddr,
479 	    BUS_DMA_NOWAIT | BUS_DMA_ZERO, &dma->map);
480 	if (error) {
481 		device_printf(sc->dev, "error allocating dma mem, error %d\n",
482 		    error);
483 		goto fail;
484 	}
485 
486 	error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size,
487 	    pvscsi_dma_cb, &dma->paddr, BUS_DMA_NOWAIT);
488 	if (error) {
489 		device_printf(sc->dev, "error mapping dma mam, error %d\n",
490 		    error);
491 		goto fail;
492 	}
493 
494 	dma->size = size;
495 
496 fail:
497 	if (error) {
498 		pvscsi_dma_free(sc, dma);
499 	}
500 	return (error);
501 }
502 
503 static int
504 pvscsi_dma_alloc_ppns(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
505     uint64_t *ppn_list, uint32_t num_pages)
506 {
507 	int error;
508 	uint32_t i;
509 	uint64_t ppn;
510 
511 	error = pvscsi_dma_alloc(sc, dma, num_pages * PAGE_SIZE, PAGE_SIZE);
512 	if (error) {
513 		device_printf(sc->dev, "Error allocating pages, error %d\n",
514 		    error);
515 		return (error);
516 	}
517 
518 	ppn = dma->paddr >> PAGE_SHIFT;
519 	for (i = 0; i < num_pages; i++) {
520 		ppn_list[i] = ppn + i;
521 	}
522 
523 	return (0);
524 }
525 
526 static void
527 pvscsi_dma_free_per_hcb(struct pvscsi_softc *sc, uint32_t hcbs_allocated)
528 {
529 	int i;
530 	int lock_owned;
531 	struct pvscsi_hcb *hcb;
532 
533 	lock_owned = mtx_owned(&sc->lock);
534 
535 	if (lock_owned) {
536 		mtx_unlock(&sc->lock);
537 	}
538 	for (i = 0; i < hcbs_allocated; ++i) {
539 		hcb = sc->hcbs + i;
540 		callout_drain(&hcb->callout);
541 	};
542 	if (lock_owned) {
543 		mtx_lock(&sc->lock);
544 	}
545 
546 	for (i = 0; i < hcbs_allocated; ++i) {
547 		hcb = sc->hcbs + i;
548 		bus_dmamap_destroy(sc->buffer_dmat, hcb->dma_map);
549 	};
550 
551 	pvscsi_dma_free(sc, &sc->sense_buffer_dma);
552 	pvscsi_dma_free(sc, &sc->sg_list_dma);
553 }
554 
555 static int
556 pvscsi_dma_alloc_per_hcb(struct pvscsi_softc *sc)
557 {
558 	int i;
559 	int error;
560 	struct pvscsi_hcb *hcb;
561 
562 	i = 0;
563 
564 	error = pvscsi_dma_alloc(sc, &sc->sg_list_dma,
565 	    sizeof(struct pvscsi_sg_list) * sc->hcb_cnt, 1);
566 	if (error) {
567 		device_printf(sc->dev,
568 		    "Error allocation sg list DMA memory, error %d\n", error);
569 		goto fail;
570 	}
571 
572 	error = pvscsi_dma_alloc(sc, &sc->sense_buffer_dma,
573 				 PVSCSI_SENSE_LENGTH * sc->hcb_cnt, 1);
574 	if (error) {
575 		device_printf(sc->dev,
576 		    "Error allocation sg list DMA memory, error %d\n", error);
577 		goto fail;
578 	}
579 
580 	for (i = 0; i < sc->hcb_cnt; ++i) {
581 		hcb = sc->hcbs + i;
582 
583 		error = bus_dmamap_create(sc->buffer_dmat, 0, &hcb->dma_map);
584 		if (error) {
585 			device_printf(sc->dev,
586 			    "Error creating dma map for hcb %d, error %d\n",
587 			    i, error);
588 			goto fail;
589 		}
590 
591 		hcb->sense_buffer =
592 		    (void *)((caddr_t)sc->sense_buffer_dma.vaddr +
593 		    PVSCSI_SENSE_LENGTH * i);
594 		hcb->sense_buffer_paddr =
595 		    sc->sense_buffer_dma.paddr + PVSCSI_SENSE_LENGTH * i;
596 
597 		hcb->sg_list =
598 		    (struct pvscsi_sg_list *)((caddr_t)sc->sg_list_dma.vaddr +
599 		    sizeof(struct pvscsi_sg_list) * i);
600 		hcb->sg_list_paddr =
601 		    sc->sg_list_dma.paddr + sizeof(struct pvscsi_sg_list) * i;
602 
603 		callout_init_mtx(&hcb->callout, &sc->lock, 0);
604 	}
605 
606 	SLIST_INIT(&sc->free_list);
607 	for (i = (sc->hcb_cnt - 1); i >= 0; --i) {
608 		hcb = sc->hcbs + i;
609 		SLIST_INSERT_HEAD(&sc->free_list, hcb, links);
610 	}
611 
612 fail:
613 	if (error) {
614 		pvscsi_dma_free_per_hcb(sc, i);
615 	}
616 
617 	return (error);
618 }
619 
620 static void
621 pvscsi_free_rings(struct pvscsi_softc *sc)
622 {
623 
624 	pvscsi_dma_free(sc, &sc->rings_state_dma);
625 	pvscsi_dma_free(sc, &sc->req_ring_dma);
626 	pvscsi_dma_free(sc, &sc->cmp_ring_dma);
627 	if (sc->use_msg) {
628 		pvscsi_dma_free(sc, &sc->msg_ring_dma);
629 	}
630 }
631 
632 static int
633 pvscsi_allocate_rings(struct pvscsi_softc *sc)
634 {
635 	int error;
636 
637 	error = pvscsi_dma_alloc_ppns(sc, &sc->rings_state_dma,
638 	    &sc->rings_state_ppn, 1);
639 	if (error) {
640 		device_printf(sc->dev,
641 		    "Error allocating rings state, error = %d\n", error);
642 		goto fail;
643 	}
644 	sc->rings_state = sc->rings_state_dma.vaddr;
645 
646 	error = pvscsi_dma_alloc_ppns(sc, &sc->req_ring_dma, sc->req_ring_ppn,
647 	    sc->req_ring_num_pages);
648 	if (error) {
649 		device_printf(sc->dev,
650 		    "Error allocating req ring pages, error = %d\n", error);
651 		goto fail;
652 	}
653 	sc->req_ring = sc->req_ring_dma.vaddr;
654 
655 	error = pvscsi_dma_alloc_ppns(sc, &sc->cmp_ring_dma, sc->cmp_ring_ppn,
656 	    sc->cmp_ring_num_pages);
657 	if (error) {
658 		device_printf(sc->dev,
659 		    "Error allocating cmp ring pages, error = %d\n", error);
660 		goto fail;
661 	}
662 	sc->cmp_ring = sc->cmp_ring_dma.vaddr;
663 
664 	sc->msg_ring = NULL;
665 	if (sc->use_msg) {
666 		error = pvscsi_dma_alloc_ppns(sc, &sc->msg_ring_dma,
667 		    sc->msg_ring_ppn, sc->msg_ring_num_pages);
668 		if (error) {
669 			device_printf(sc->dev,
670 			    "Error allocating cmp ring pages, error = %d\n",
671 			    error);
672 			goto fail;
673 		}
674 		sc->msg_ring = sc->msg_ring_dma.vaddr;
675 	}
676 
677 	DEBUG_PRINTF(1, sc->dev, "rings_state: %p\n", sc->rings_state);
678 	DEBUG_PRINTF(1, sc->dev, "req_ring: %p - %u pages\n", sc->req_ring,
679 	    sc->req_ring_num_pages);
680 	DEBUG_PRINTF(1, sc->dev, "cmp_ring: %p - %u pages\n", sc->cmp_ring,
681 	    sc->cmp_ring_num_pages);
682 	DEBUG_PRINTF(1, sc->dev, "msg_ring: %p - %u pages\n", sc->msg_ring,
683 	    sc->msg_ring_num_pages);
684 
685 fail:
686 	if (error) {
687 		pvscsi_free_rings(sc);
688 	}
689 	return (error);
690 }
691 
692 static void
693 pvscsi_setup_rings(struct pvscsi_softc *sc)
694 {
695 	struct pvscsi_cmd_desc_setup_rings cmd;
696 	uint32_t i;
697 
698 	bzero(&cmd, sizeof(cmd));
699 
700 	cmd.rings_state_ppn = sc->rings_state_ppn;
701 
702 	cmd.req_ring_num_pages = sc->req_ring_num_pages;
703 	for (i = 0; i < sc->req_ring_num_pages; ++i) {
704 		cmd.req_ring_ppns[i] = sc->req_ring_ppn[i];
705 	}
706 
707 	cmd.cmp_ring_num_pages = sc->cmp_ring_num_pages;
708 	for (i = 0; i < sc->cmp_ring_num_pages; ++i) {
709 		cmd.cmp_ring_ppns[i] = sc->cmp_ring_ppn[i];
710 	}
711 
712 	pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_RINGS, &cmd, sizeof(cmd));
713 }
714 
715 static int
716 pvscsi_hw_supports_msg(struct pvscsi_softc *sc)
717 {
718 	uint32_t status;
719 
720 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND,
721 	    PVSCSI_CMD_SETUP_MSG_RING);
722 	status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
723 
724 	return (status != -1);
725 }
726 
727 static void
728 pvscsi_setup_msg_ring(struct pvscsi_softc *sc)
729 {
730 	struct pvscsi_cmd_desc_setup_msg_ring cmd;
731 	uint32_t i;
732 
733 	KASSERT(sc->use_msg, ("msg is not being used"));
734 
735 	bzero(&cmd, sizeof(cmd));
736 
737 	cmd.num_pages = sc->msg_ring_num_pages;
738 	for (i = 0; i < sc->msg_ring_num_pages; ++i) {
739 		cmd.ring_ppns[i] = sc->msg_ring_ppn[i];
740 	}
741 
742 	pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_MSG_RING, &cmd, sizeof(cmd));
743 }
744 
745 static void
746 pvscsi_adapter_reset(struct pvscsi_softc *sc)
747 {
748 	uint32_t val __unused;
749 
750 	device_printf(sc->dev, "Adapter Reset\n");
751 
752 	pvscsi_write_cmd(sc, PVSCSI_CMD_ADAPTER_RESET, NULL, 0);
753 	val = pvscsi_read_intr_status(sc);
754 
755 	DEBUG_PRINTF(2, sc->dev, "adapter reset done: %u\n", val);
756 }
757 
758 static void
759 pvscsi_bus_reset(struct pvscsi_softc *sc)
760 {
761 
762 	device_printf(sc->dev, "Bus Reset\n");
763 
764 	pvscsi_write_cmd(sc, PVSCSI_CMD_RESET_BUS, NULL, 0);
765 	pvscsi_process_cmp_ring(sc);
766 
767 	DEBUG_PRINTF(2, sc->dev, "bus reset done\n");
768 }
769 
770 static void
771 pvscsi_device_reset(struct pvscsi_softc *sc, uint32_t target)
772 {
773 	struct pvscsi_cmd_desc_reset_device cmd;
774 
775 	memset(&cmd, 0, sizeof(cmd));
776 
777 	cmd.target = target;
778 
779 	device_printf(sc->dev, "Device reset for target %u\n", target);
780 
781 	pvscsi_write_cmd(sc, PVSCSI_CMD_RESET_DEVICE, &cmd, sizeof cmd);
782 	pvscsi_process_cmp_ring(sc);
783 
784 	DEBUG_PRINTF(2, sc->dev, "device reset done\n");
785 }
786 
787 static void
788 pvscsi_abort(struct pvscsi_softc *sc, uint32_t target, union ccb *ccb)
789 {
790 	struct pvscsi_cmd_desc_abort_cmd cmd;
791 	struct pvscsi_hcb *hcb;
792 	uint64_t context;
793 
794 	pvscsi_process_cmp_ring(sc);
795 
796 	hcb = ccb->ccb_h.ccb_pvscsi_hcb;
797 
798 	if (hcb != NULL) {
799 		context = pvscsi_hcb_to_context(sc, hcb);
800 
801 		memset(&cmd, 0, sizeof cmd);
802 		cmd.target = target;
803 		cmd.context = context;
804 
805 		device_printf(sc->dev, "Abort for target %u context %llx\n",
806 		    target, (unsigned long long)context);
807 
808 		pvscsi_write_cmd(sc, PVSCSI_CMD_ABORT_CMD, &cmd, sizeof(cmd));
809 		pvscsi_process_cmp_ring(sc);
810 
811 		DEBUG_PRINTF(2, sc->dev, "abort done\n");
812 	} else {
813 		DEBUG_PRINTF(1, sc->dev,
814 		    "Target %u ccb %p not found for abort\n", target, ccb);
815 	}
816 }
817 
818 static int
819 pvscsi_probe(device_t dev)
820 {
821 
822 	if (pci_get_vendor(dev) == PCI_VENDOR_ID_VMWARE &&
823 	    pci_get_device(dev) == PCI_DEVICE_ID_VMWARE_PVSCSI) {
824 		device_set_desc(dev, "VMware Paravirtual SCSI Controller");
825 		return (BUS_PROBE_DEFAULT);
826 	}
827 	return (ENXIO);
828 }
829 
830 static int
831 pvscsi_shutdown(device_t dev)
832 {
833 
834 	return (0);
835 }
836 
837 static void
838 pvscsi_timeout(void *arg)
839 {
840 	struct pvscsi_hcb *hcb;
841 	struct pvscsi_softc *sc;
842 	union ccb *ccb;
843 
844 	hcb = arg;
845 	ccb = hcb->ccb;
846 
847 	if (ccb == NULL) {
848 		/* Already completed */
849 		return;
850 	}
851 
852 	sc = ccb->ccb_h.ccb_pvscsi_sc;
853 	mtx_assert(&sc->lock, MA_OWNED);
854 
855 	device_printf(sc->dev, "Command timed out hcb=%p ccb=%p.\n", hcb, ccb);
856 
857 	switch (hcb->recovery) {
858 	case PVSCSI_HCB_NONE:
859 		hcb->recovery = PVSCSI_HCB_ABORT;
860 		pvscsi_abort(sc, ccb->ccb_h.target_id, ccb);
861 		callout_reset_sbt(&hcb->callout, PVSCSI_ABORT_TIMEOUT * SBT_1S,
862 		    0, pvscsi_timeout, hcb, 0);
863 		break;
864 	case PVSCSI_HCB_ABORT:
865 		hcb->recovery = PVSCSI_HCB_DEVICE_RESET;
866 		pvscsi_freeze(sc);
867 		pvscsi_device_reset(sc, ccb->ccb_h.target_id);
868 		callout_reset_sbt(&hcb->callout, PVSCSI_RESET_TIMEOUT * SBT_1S,
869 		    0, pvscsi_timeout, hcb, 0);
870 		break;
871 	case PVSCSI_HCB_DEVICE_RESET:
872 		hcb->recovery = PVSCSI_HCB_BUS_RESET;
873 		pvscsi_freeze(sc);
874 		pvscsi_bus_reset(sc);
875 		callout_reset_sbt(&hcb->callout, PVSCSI_RESET_TIMEOUT * SBT_1S,
876 		    0, pvscsi_timeout, hcb, 0);
877 		break;
878 	case PVSCSI_HCB_BUS_RESET:
879 		pvscsi_freeze(sc);
880 		pvscsi_adapter_reset(sc);
881 		break;
882 	};
883 }
884 
885 static void
886 pvscsi_process_completion(struct pvscsi_softc *sc,
887     struct pvscsi_ring_cmp_desc *e)
888 {
889 	struct pvscsi_hcb *hcb;
890 	union ccb *ccb;
891 	uint32_t status;
892 	uint32_t btstat;
893 	uint32_t sdstat;
894 	bus_dmasync_op_t op;
895 
896 	hcb = pvscsi_context_to_hcb(sc, e->context);
897 
898 	callout_stop(&hcb->callout);
899 
900 	ccb = hcb->ccb;
901 
902 	btstat = e->host_status;
903 	sdstat = e->scsi_status;
904 
905 	ccb->csio.scsi_status = sdstat;
906 	ccb->csio.resid = ccb->csio.dxfer_len - e->data_len;
907 
908 	if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
909 		if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
910 			op = BUS_DMASYNC_POSTREAD;
911 		} else {
912 			op = BUS_DMASYNC_POSTWRITE;
913 		}
914 		bus_dmamap_sync(sc->buffer_dmat, hcb->dma_map, op);
915 		bus_dmamap_unload(sc->buffer_dmat, hcb->dma_map);
916 	}
917 
918 	if (btstat == BTSTAT_SUCCESS && sdstat == SCSI_STATUS_OK) {
919 		DEBUG_PRINTF(3, sc->dev,
920 		    "completing command context %llx success\n",
921 		    (unsigned long long)e->context);
922 		ccb->csio.resid = 0;
923 		status = CAM_REQ_CMP;
924 	} else {
925 		switch (btstat) {
926 		case BTSTAT_SUCCESS:
927 		case BTSTAT_LINKED_COMMAND_COMPLETED:
928 		case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG:
929 			switch (sdstat) {
930 			case SCSI_STATUS_OK:
931 				ccb->csio.resid = 0;
932 				status = CAM_REQ_CMP;
933 				break;
934 			case SCSI_STATUS_CHECK_COND:
935 				status = CAM_SCSI_STATUS_ERROR;
936 
937 				if (ccb->csio.sense_len != 0) {
938 					status |= CAM_AUTOSNS_VALID;
939 
940 					memset(&ccb->csio.sense_data, 0,
941 					    sizeof(ccb->csio.sense_data));
942 					memcpy(&ccb->csio.sense_data,
943 					    hcb->sense_buffer,
944 					    MIN(ccb->csio.sense_len,
945 						e->sense_len));
946 				}
947 				break;
948 			case SCSI_STATUS_BUSY:
949 			case SCSI_STATUS_QUEUE_FULL:
950 				status = CAM_REQUEUE_REQ;
951 				break;
952 			case SCSI_STATUS_CMD_TERMINATED:
953 			case SCSI_STATUS_TASK_ABORTED:
954 				status = CAM_REQ_ABORTED;
955 				break;
956 			default:
957 				DEBUG_PRINTF(1, sc->dev,
958 				    "ccb: %p sdstat=0x%x\n", ccb, sdstat);
959 				status = CAM_SCSI_STATUS_ERROR;
960 				break;
961 			}
962 			break;
963 		case BTSTAT_SELTIMEO:
964 			status = CAM_SEL_TIMEOUT;
965 			break;
966 		case BTSTAT_DATARUN:
967 		case BTSTAT_DATA_UNDERRUN:
968 			status = CAM_DATA_RUN_ERR;
969 			break;
970 		case BTSTAT_ABORTQUEUE:
971 		case BTSTAT_HATIMEOUT:
972 			status = CAM_REQUEUE_REQ;
973 			break;
974 		case BTSTAT_NORESPONSE:
975 		case BTSTAT_SENTRST:
976 		case BTSTAT_RECVRST:
977 		case BTSTAT_BUSRESET:
978 			status = CAM_SCSI_BUS_RESET;
979 			break;
980 		case BTSTAT_SCSIPARITY:
981 			status = CAM_UNCOR_PARITY;
982 			break;
983 		case BTSTAT_BUSFREE:
984 			status = CAM_UNEXP_BUSFREE;
985 			break;
986 		case BTSTAT_INVPHASE:
987 			status = CAM_SEQUENCE_FAIL;
988 			break;
989 		case BTSTAT_SENSFAILED:
990 			status = CAM_AUTOSENSE_FAIL;
991 			break;
992 		case BTSTAT_LUNMISMATCH:
993 		case BTSTAT_TAGREJECT:
994 		case BTSTAT_DISCONNECT:
995 		case BTSTAT_BADMSG:
996 		case BTSTAT_INVPARAM:
997 			status = CAM_REQ_CMP_ERR;
998 			break;
999 		case BTSTAT_HASOFTWARE:
1000 		case BTSTAT_HAHARDWARE:
1001 			status = CAM_NO_HBA;
1002 			break;
1003 		default:
1004 			device_printf(sc->dev, "unknown hba status: 0x%x\n",
1005 			    btstat);
1006 			status = CAM_NO_HBA;
1007 			break;
1008 		}
1009 
1010 		DEBUG_PRINTF(3, sc->dev,
1011 		    "completing command context %llx btstat %x sdstat %x - status %x\n",
1012 		    (unsigned long long)e->context, btstat, sdstat, status);
1013 	}
1014 
1015 	ccb->ccb_h.ccb_pvscsi_hcb = NULL;
1016 	ccb->ccb_h.ccb_pvscsi_sc = NULL;
1017 	pvscsi_hcb_put(sc, hcb);
1018 
1019 	ccb->ccb_h.status =
1020 	    status | (ccb->ccb_h.status & ~(CAM_STATUS_MASK | CAM_SIM_QUEUED));
1021 
1022 	if (sc->frozen) {
1023 		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
1024 		sc->frozen = 0;
1025 	}
1026 
1027 	if (status != CAM_REQ_CMP) {
1028 		ccb->ccb_h.status |= CAM_DEV_QFRZN;
1029 		xpt_freeze_devq(ccb->ccb_h.path, /*count*/ 1);
1030 	}
1031 	xpt_done(ccb);
1032 }
1033 
1034 static void
1035 pvscsi_process_cmp_ring(struct pvscsi_softc *sc)
1036 {
1037 	struct pvscsi_ring_cmp_desc *ring;
1038 	struct pvscsi_rings_state *s;
1039 	struct pvscsi_ring_cmp_desc *e;
1040 	uint32_t mask;
1041 
1042 	mtx_assert(&sc->lock, MA_OWNED);
1043 
1044 	s = sc->rings_state;
1045 	ring = sc->cmp_ring;
1046 	mask = MASK(s->cmp_num_entries_log2);
1047 
1048 	while (s->cmp_cons_idx != s->cmp_prod_idx) {
1049 		e = ring + (s->cmp_cons_idx & mask);
1050 
1051 		pvscsi_process_completion(sc, e);
1052 
1053 		mb();
1054 		s->cmp_cons_idx++;
1055 	}
1056 }
1057 
1058 static void
1059 pvscsi_process_msg(struct pvscsi_softc *sc, struct pvscsi_ring_msg_desc *e)
1060 {
1061 	struct pvscsi_ring_msg_dev_status_changed *desc;
1062 
1063 	union ccb *ccb;
1064 	switch (e->type) {
1065 	case PVSCSI_MSG_DEV_ADDED:
1066 	case PVSCSI_MSG_DEV_REMOVED: {
1067 		desc = (struct pvscsi_ring_msg_dev_status_changed *)e;
1068 
1069 		device_printf(sc->dev, "MSG: device %s at scsi%u:%u:%u\n",
1070 		    desc->type == PVSCSI_MSG_DEV_ADDED ? "addition" : "removal",
1071 		    desc->bus, desc->target, desc->lun[1]);
1072 
1073 		ccb = xpt_alloc_ccb_nowait();
1074 		if (ccb == NULL) {
1075 			device_printf(sc->dev,
1076 			    "Error allocating CCB for dev change.\n");
1077 			break;
1078 		}
1079 
1080 		if (xpt_create_path(&ccb->ccb_h.path, NULL,
1081 		    cam_sim_path(sc->sim), desc->target, desc->lun[1])
1082 		    != CAM_REQ_CMP) {
1083 			device_printf(sc->dev,
1084 			    "Error creating path for dev change.\n");
1085 			xpt_free_ccb(ccb);
1086 			break;
1087 		}
1088 
1089 		xpt_rescan(ccb);
1090 	} break;
1091 	default:
1092 		device_printf(sc->dev, "Unknown msg type 0x%x\n", e->type);
1093 	};
1094 }
1095 
1096 static void
1097 pvscsi_process_msg_ring(struct pvscsi_softc *sc)
1098 {
1099 	struct pvscsi_ring_msg_desc *ring;
1100 	struct pvscsi_rings_state *s;
1101 	struct pvscsi_ring_msg_desc *e;
1102 	uint32_t mask;
1103 
1104 	mtx_assert(&sc->lock, MA_OWNED);
1105 
1106 	s = sc->rings_state;
1107 	ring = sc->msg_ring;
1108 	mask = MASK(s->msg_num_entries_log2);
1109 
1110 	while (s->msg_cons_idx != s->msg_prod_idx) {
1111 		e = ring + (s->msg_cons_idx & mask);
1112 
1113 		pvscsi_process_msg(sc, e);
1114 
1115 		mb();
1116 		s->msg_cons_idx++;
1117 	}
1118 }
1119 
1120 static void
1121 pvscsi_intr_locked(struct pvscsi_softc *sc)
1122 {
1123 	uint32_t val;
1124 
1125 	mtx_assert(&sc->lock, MA_OWNED);
1126 
1127 	val = pvscsi_read_intr_status(sc);
1128 
1129 	if ((val & PVSCSI_INTR_ALL_SUPPORTED) != 0) {
1130 		pvscsi_write_intr_status(sc, val & PVSCSI_INTR_ALL_SUPPORTED);
1131 		pvscsi_process_cmp_ring(sc);
1132 		if (sc->use_msg) {
1133 			pvscsi_process_msg_ring(sc);
1134 		}
1135 	}
1136 }
1137 
1138 static void
1139 pvscsi_intr(void *xsc)
1140 {
1141 	struct pvscsi_softc *sc;
1142 
1143 	sc = xsc;
1144 
1145 	mtx_assert(&sc->lock, MA_NOTOWNED);
1146 
1147 	mtx_lock(&sc->lock);
1148 	pvscsi_intr_locked(xsc);
1149 	mtx_unlock(&sc->lock);
1150 }
1151 
1152 static void
1153 pvscsi_poll(struct cam_sim *sim)
1154 {
1155 	struct pvscsi_softc *sc;
1156 
1157 	sc = cam_sim_softc(sim);
1158 
1159 	mtx_assert(&sc->lock, MA_OWNED);
1160 	pvscsi_intr_locked(sc);
1161 }
1162 
1163 static void
1164 pvscsi_execute_ccb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
1165 {
1166 	struct pvscsi_hcb *hcb;
1167 	struct pvscsi_ring_req_desc *e;
1168 	union ccb *ccb;
1169 	struct pvscsi_softc *sc;
1170 	struct pvscsi_rings_state *s;
1171 	uint8_t cdb0;
1172 	bus_dmasync_op_t op;
1173 
1174 	hcb = arg;
1175 	ccb = hcb->ccb;
1176 	e = hcb->e;
1177 	sc = ccb->ccb_h.ccb_pvscsi_sc;
1178 	s = sc->rings_state;
1179 
1180 	mtx_assert(&sc->lock, MA_OWNED);
1181 
1182 	if (error) {
1183 		device_printf(sc->dev, "pvscsi_execute_ccb error %d\n", error);
1184 
1185 		if (error == EFBIG) {
1186 			ccb->ccb_h.status = CAM_REQ_TOO_BIG;
1187 		} else {
1188 			ccb->ccb_h.status = CAM_REQ_CMP_ERR;
1189 		}
1190 
1191 		pvscsi_hcb_put(sc, hcb);
1192 		xpt_done(ccb);
1193 		return;
1194 	}
1195 
1196 	e->flags = 0;
1197 	op = 0;
1198 	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
1199 	case CAM_DIR_NONE:
1200 		e->flags |= PVSCSI_FLAG_CMD_DIR_NONE;
1201 		break;
1202 	case CAM_DIR_IN:
1203 		e->flags |= PVSCSI_FLAG_CMD_DIR_TOHOST;
1204 		op = BUS_DMASYNC_PREREAD;
1205 		break;
1206 	case CAM_DIR_OUT:
1207 		e->flags |= PVSCSI_FLAG_CMD_DIR_TODEVICE;
1208 		op = BUS_DMASYNC_PREWRITE;
1209 		break;
1210 	case CAM_DIR_BOTH:
1211 		/* TODO: does this need handling? */
1212 		break;
1213 	}
1214 
1215 	if (nseg != 0) {
1216 		if (nseg > 1) {
1217 			int i;
1218 			struct pvscsi_sg_element *sge;
1219 
1220 			KASSERT(nseg <= PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT,
1221 			    ("too many sg segments"));
1222 
1223 			sge = hcb->sg_list->sge;
1224 			e->flags |= PVSCSI_FLAG_CMD_WITH_SG_LIST;
1225 
1226 			for (i = 0; i < nseg; ++i) {
1227 				sge[i].addr = segs[i].ds_addr;
1228 				sge[i].length = segs[i].ds_len;
1229 				sge[i].flags = 0;
1230 			}
1231 
1232 			e->data_addr = hcb->sg_list_paddr;
1233 		} else {
1234 			e->data_addr = segs->ds_addr;
1235 		}
1236 
1237 		bus_dmamap_sync(sc->buffer_dmat, hcb->dma_map, op);
1238 	} else {
1239 		e->data_addr = 0;
1240 	}
1241 
1242 	cdb0 = e->cdb[0];
1243 	ccb->ccb_h.status |= CAM_SIM_QUEUED;
1244 
1245 	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
1246 		callout_reset_sbt(&hcb->callout, ccb->ccb_h.timeout * SBT_1MS,
1247 		    0, pvscsi_timeout, hcb, 0);
1248 	}
1249 
1250 	mb();
1251 	s->req_prod_idx++;
1252 	pvscsi_kick_io(sc, cdb0);
1253 }
1254 
1255 static void
1256 pvscsi_action(struct cam_sim *sim, union ccb *ccb)
1257 {
1258 	struct pvscsi_softc *sc;
1259 	struct ccb_hdr *ccb_h;
1260 
1261 	sc = cam_sim_softc(sim);
1262 	ccb_h = &ccb->ccb_h;
1263 
1264 	mtx_assert(&sc->lock, MA_OWNED);
1265 
1266 	switch (ccb_h->func_code) {
1267 	case XPT_SCSI_IO:
1268 	{
1269 		struct ccb_scsiio *csio;
1270 		uint32_t req_num_entries_log2;
1271 		struct pvscsi_ring_req_desc *ring;
1272 		struct pvscsi_ring_req_desc *e;
1273 		struct pvscsi_rings_state *s;
1274 		struct pvscsi_hcb *hcb;
1275 
1276 		csio = &ccb->csio;
1277 		ring = sc->req_ring;
1278 		s = sc->rings_state;
1279 
1280 		hcb = NULL;
1281 
1282 		/*
1283 		 * Check if it was completed already (such as aborted
1284 		 * by upper layers)
1285 		 */
1286 		if ((ccb_h->status & CAM_STATUS_MASK) != CAM_REQ_INPROG) {
1287 			xpt_done(ccb);
1288 			return;
1289 		}
1290 
1291 		req_num_entries_log2 = s->req_num_entries_log2;
1292 
1293 		if (s->req_prod_idx - s->cmp_cons_idx >=
1294 		    (1 << req_num_entries_log2)) {
1295 			device_printf(sc->dev,
1296 			    "Not enough room on completion ring.\n");
1297 			pvscsi_freeze(sc);
1298 			ccb_h->status = CAM_REQUEUE_REQ;
1299 			goto finish_ccb;
1300 		}
1301 
1302 		hcb = pvscsi_hcb_get(sc);
1303 		if (hcb == NULL) {
1304 			device_printf(sc->dev, "No free hcbs.\n");
1305 			pvscsi_freeze(sc);
1306 			ccb_h->status = CAM_REQUEUE_REQ;
1307 			goto finish_ccb;
1308 		}
1309 
1310 		hcb->ccb = ccb;
1311 		ccb_h->ccb_pvscsi_hcb = hcb;
1312 		ccb_h->ccb_pvscsi_sc = sc;
1313 
1314 		if (csio->cdb_len > sizeof(e->cdb)) {
1315 			DEBUG_PRINTF(2, sc->dev, "cdb length %u too large\n",
1316 			    csio->cdb_len);
1317 			ccb_h->status = CAM_REQ_INVALID;
1318 			goto finish_ccb;
1319 		}
1320 
1321 		if (ccb_h->flags & CAM_CDB_PHYS) {
1322 			DEBUG_PRINTF(2, sc->dev,
1323 			    "CAM_CDB_PHYS not implemented\n");
1324 			ccb_h->status = CAM_REQ_INVALID;
1325 			goto finish_ccb;
1326 		}
1327 
1328 		e = ring + (s->req_prod_idx & MASK(req_num_entries_log2));
1329 
1330 		e->bus = cam_sim_bus(sim);
1331 		e->target = ccb_h->target_id;
1332 		memset(e->lun, 0, sizeof(e->lun));
1333 		e->lun[1] = ccb_h->target_lun;
1334 		e->data_addr = 0;
1335 		e->data_len = csio->dxfer_len;
1336 		e->vcpu_hint = curcpu;
1337 
1338 		e->cdb_len = csio->cdb_len;
1339 		memcpy(e->cdb, scsiio_cdb_ptr(csio), csio->cdb_len);
1340 
1341 		e->sense_addr = 0;
1342 		e->sense_len = csio->sense_len;
1343 		if (e->sense_len > 0) {
1344 			e->sense_addr = hcb->sense_buffer_paddr;
1345 		}
1346 
1347 		e->tag = MSG_SIMPLE_Q_TAG;
1348 		if (ccb_h->flags & CAM_TAG_ACTION_VALID) {
1349 			e->tag = csio->tag_action;
1350 		}
1351 
1352 		e->context = pvscsi_hcb_to_context(sc, hcb);
1353 		hcb->e = e;
1354 
1355 		DEBUG_PRINTF(3, sc->dev,
1356 		    " queuing command %02x context %llx\n", e->cdb[0],
1357 		    (unsigned long long)e->context);
1358 		bus_dmamap_load_ccb(sc->buffer_dmat, hcb->dma_map, ccb,
1359 		    pvscsi_execute_ccb, hcb, 0);
1360 		break;
1361 
1362 finish_ccb:
1363 		if (hcb != NULL) {
1364 			pvscsi_hcb_put(sc, hcb);
1365 		}
1366 		xpt_done(ccb);
1367 	} break;
1368 	case XPT_ABORT:
1369 	{
1370 		struct pvscsi_hcb *abort_hcb;
1371 		union ccb *abort_ccb;
1372 
1373 		abort_ccb = ccb->cab.abort_ccb;
1374 		abort_hcb = abort_ccb->ccb_h.ccb_pvscsi_hcb;
1375 
1376 		if (abort_hcb->ccb != NULL && abort_hcb->ccb == abort_ccb) {
1377 			if (abort_ccb->ccb_h.func_code == XPT_SCSI_IO) {
1378 				pvscsi_abort(sc, ccb_h->target_id, abort_ccb);
1379 				ccb_h->status = CAM_REQ_CMP;
1380 			} else {
1381 				ccb_h->status = CAM_UA_ABORT;
1382 			}
1383 		} else {
1384 			device_printf(sc->dev,
1385 			    "Could not find hcb for ccb %p (tgt %u)\n",
1386 			    ccb, ccb_h->target_id);
1387 			ccb_h->status = CAM_REQ_CMP;
1388 		}
1389 		xpt_done(ccb);
1390 	} break;
1391 	case XPT_RESET_DEV:
1392 	{
1393 		pvscsi_device_reset(sc, ccb_h->target_id);
1394 		ccb_h->status = CAM_REQ_CMP;
1395 		xpt_done(ccb);
1396 	} break;
1397 	case XPT_RESET_BUS:
1398 	{
1399 		pvscsi_bus_reset(sc);
1400 		ccb_h->status = CAM_REQ_CMP;
1401 		xpt_done(ccb);
1402 	} break;
1403 	case XPT_PATH_INQ:
1404 	{
1405 		struct ccb_pathinq *cpi;
1406 
1407 		cpi = &ccb->cpi;
1408 
1409 		cpi->version_num = 1;
1410 		cpi->hba_inquiry = PI_TAG_ABLE;
1411 		cpi->target_sprt = 0;
1412 		cpi->hba_misc = PIM_NOBUSRESET | PIM_UNMAPPED;
1413 		cpi->hba_eng_cnt = 0;
1414 		/* cpi->vuhba_flags = 0; */
1415 		cpi->max_target = sc->max_targets;
1416 		cpi->max_lun = 0;
1417 		cpi->async_flags = 0;
1418 		cpi->hpath_id = 0;
1419 		cpi->unit_number = cam_sim_unit(sim);
1420 		cpi->bus_id = cam_sim_bus(sim);
1421 		cpi->initiator_id = 7;
1422 		cpi->base_transfer_speed = 750000;
1423 		strlcpy(cpi->sim_vid, "VMware", SIM_IDLEN);
1424 		strlcpy(cpi->hba_vid, "VMware", HBA_IDLEN);
1425 		strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
1426 		/* Limit I/O to 256k since we can't do 512k unaligned I/O */
1427 		cpi->maxio = (PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT / 2) * PAGE_SIZE;
1428 		cpi->protocol = PROTO_SCSI;
1429 		cpi->protocol_version = SCSI_REV_SPC2;
1430 		cpi->transport = XPORT_SAS;
1431 		cpi->transport_version = 0;
1432 
1433 		ccb_h->status = CAM_REQ_CMP;
1434 		xpt_done(ccb);
1435 	} break;
1436 	case XPT_GET_TRAN_SETTINGS:
1437 	{
1438 		struct ccb_trans_settings *cts;
1439 
1440 		cts = &ccb->cts;
1441 
1442 		cts->protocol = PROTO_SCSI;
1443 		cts->protocol_version = SCSI_REV_SPC2;
1444 		cts->transport = XPORT_SAS;
1445 		cts->transport_version = 0;
1446 
1447 		cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
1448 		cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
1449 
1450 		ccb_h->status = CAM_REQ_CMP;
1451 		xpt_done(ccb);
1452 	} break;
1453 	case XPT_CALC_GEOMETRY:
1454 	{
1455 		cam_calc_geometry(&ccb->ccg, 1);
1456 		xpt_done(ccb);
1457 	} break;
1458 	default:
1459 		ccb_h->status = CAM_REQ_INVALID;
1460 		xpt_done(ccb);
1461 		break;
1462 	}
1463 }
1464 
1465 static void
1466 pvscsi_free_interrupts(struct pvscsi_softc *sc)
1467 {
1468 
1469 	if (sc->irq_handler != NULL) {
1470 		bus_teardown_intr(sc->dev, sc->irq_res, sc->irq_handler);
1471 	}
1472 	if (sc->irq_res != NULL) {
1473 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_id,
1474 		    sc->irq_res);
1475 	}
1476 	if (sc->use_msi_or_msix) {
1477 		pci_release_msi(sc->dev);
1478 	}
1479 }
1480 
1481 static int
1482 pvscsi_setup_interrupts(struct pvscsi_softc *sc)
1483 {
1484 	int error;
1485 	int flags;
1486 	int use_msix;
1487 	int use_msi;
1488 	int count;
1489 
1490 	sc->use_msi_or_msix = 0;
1491 
1492 	use_msix = pvscsi_get_tunable(sc, "use_msix", pvscsi_use_msix);
1493 	use_msi = pvscsi_get_tunable(sc, "use_msi", pvscsi_use_msi);
1494 
1495 	if (use_msix && pci_msix_count(sc->dev) > 0) {
1496 		count = 1;
1497 		if (pci_alloc_msix(sc->dev, &count) == 0 && count == 1) {
1498 			sc->use_msi_or_msix = 1;
1499 			device_printf(sc->dev, "Interrupt: MSI-X\n");
1500 		} else {
1501 			pci_release_msi(sc->dev);
1502 		}
1503 	}
1504 
1505 	if (sc->use_msi_or_msix == 0 && use_msi && pci_msi_count(sc->dev) > 0) {
1506 		count = 1;
1507 		if (pci_alloc_msi(sc->dev, &count) == 0 && count == 1) {
1508 			sc->use_msi_or_msix = 1;
1509 			device_printf(sc->dev, "Interrupt: MSI\n");
1510 		} else {
1511 			pci_release_msi(sc->dev);
1512 		}
1513 	}
1514 
1515 	flags = RF_ACTIVE;
1516 	if (sc->use_msi_or_msix) {
1517 		sc->irq_id = 1;
1518 	} else {
1519 		device_printf(sc->dev, "Interrupt: INT\n");
1520 		sc->irq_id = 0;
1521 		flags |= RF_SHAREABLE;
1522 	}
1523 
1524 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_id,
1525 	    flags);
1526 	if (sc->irq_res == NULL) {
1527 		device_printf(sc->dev, "IRQ allocation failed\n");
1528 		if (sc->use_msi_or_msix) {
1529 			pci_release_msi(sc->dev);
1530 		}
1531 		return (ENXIO);
1532 	}
1533 
1534 	error = bus_setup_intr(sc->dev, sc->irq_res,
1535 	    INTR_TYPE_CAM | INTR_MPSAFE, NULL, pvscsi_intr, sc,
1536 	    &sc->irq_handler);
1537 	if (error) {
1538 		device_printf(sc->dev, "IRQ handler setup failed\n");
1539 		pvscsi_free_interrupts(sc);
1540 		return (error);
1541 	}
1542 
1543 	return (0);
1544 }
1545 
1546 static void
1547 pvscsi_free_all(struct pvscsi_softc *sc)
1548 {
1549 
1550 	if (sc->sim) {
1551 		int error;
1552 
1553 		if (sc->bus_path) {
1554 			xpt_free_path(sc->bus_path);
1555 		}
1556 
1557 		error = xpt_bus_deregister(cam_sim_path(sc->sim));
1558 		if (error != 0) {
1559 			device_printf(sc->dev,
1560 			    "Error deregistering bus, error %d\n", error);
1561 		}
1562 
1563 		cam_sim_free(sc->sim, TRUE);
1564 	}
1565 
1566 	pvscsi_dma_free_per_hcb(sc, sc->hcb_cnt);
1567 
1568 	if (sc->hcbs) {
1569 		free(sc->hcbs, M_PVSCSI);
1570 	}
1571 
1572 	pvscsi_free_rings(sc);
1573 
1574 	pvscsi_free_interrupts(sc);
1575 
1576 	if (sc->buffer_dmat != NULL) {
1577 		bus_dma_tag_destroy(sc->buffer_dmat);
1578 	}
1579 
1580 	if (sc->parent_dmat != NULL) {
1581 		bus_dma_tag_destroy(sc->parent_dmat);
1582 	}
1583 
1584 	if (sc->mm_res != NULL) {
1585 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->mm_rid,
1586 		    sc->mm_res);
1587 	}
1588 }
1589 
1590 static int
1591 pvscsi_attach(device_t dev)
1592 {
1593 	struct pvscsi_softc *sc;
1594 	int rid;
1595 	int barid;
1596 	int error;
1597 	int max_queue_depth;
1598 	int adapter_queue_size;
1599 	struct cam_devq *devq;
1600 
1601 	sc = device_get_softc(dev);
1602 	sc->dev = dev;
1603 
1604 	mtx_init(&sc->lock, "pvscsi", NULL, MTX_DEF);
1605 
1606 	pci_enable_busmaster(dev);
1607 
1608 	sc->mm_rid = -1;
1609 	for (barid = 0; barid <= PCIR_MAX_BAR_0; ++barid) {
1610 		rid = PCIR_BAR(barid);
1611 
1612 		sc->mm_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
1613 		    RF_ACTIVE);
1614 		if (sc->mm_res != NULL) {
1615 			sc->mm_rid = rid;
1616 			break;
1617 		}
1618 	}
1619 
1620 	if (sc->mm_res == NULL) {
1621 		device_printf(dev, "could not map device memory\n");
1622 		return (ENXIO);
1623 	}
1624 
1625 	error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
1626 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE,
1627 	    BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, 0, NULL, NULL,
1628 	    &sc->parent_dmat);
1629 	if (error) {
1630 		device_printf(dev, "parent dma tag create failure, error %d\n",
1631 		    error);
1632 		pvscsi_free_all(sc);
1633 		return (ENXIO);
1634 	}
1635 
1636 	error = bus_dma_tag_create(sc->parent_dmat, 1, 0,
1637 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1638 	    PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT * PAGE_SIZE,
1639 	    PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT, PAGE_SIZE, BUS_DMA_ALLOCNOW,
1640 	    NULL, NULL, &sc->buffer_dmat);
1641 	if (error) {
1642 		device_printf(dev, "parent dma tag create failure, error %d\n",
1643 		    error);
1644 		pvscsi_free_all(sc);
1645 		return (ENXIO);
1646 	}
1647 
1648 	error = pvscsi_setup_interrupts(sc);
1649 	if (error) {
1650 		device_printf(dev, "Interrupt setup failed\n");
1651 		pvscsi_free_all(sc);
1652 		return (error);
1653 	}
1654 
1655 	sc->max_targets = pvscsi_get_max_targets(sc);
1656 
1657 	sc->use_msg = pvscsi_get_tunable(sc, "use_msg", pvscsi_use_msg) &&
1658 	    pvscsi_hw_supports_msg(sc);
1659 	sc->msg_ring_num_pages = sc->use_msg ? 1 : 0;
1660 
1661 	sc->req_ring_num_pages = pvscsi_get_tunable(sc, "request_ring_pages",
1662 	    pvscsi_request_ring_pages);
1663 	if (sc->req_ring_num_pages <= 0) {
1664 		if (sc->max_targets <= 16) {
1665 			sc->req_ring_num_pages =
1666 			    PVSCSI_DEFAULT_NUM_PAGES_REQ_RING;
1667 		} else {
1668 			sc->req_ring_num_pages = PVSCSI_MAX_NUM_PAGES_REQ_RING;
1669 		}
1670 	} else if (sc->req_ring_num_pages > PVSCSI_MAX_NUM_PAGES_REQ_RING) {
1671 		sc->req_ring_num_pages = PVSCSI_MAX_NUM_PAGES_REQ_RING;
1672 	}
1673 	sc->cmp_ring_num_pages = sc->req_ring_num_pages;
1674 
1675 	max_queue_depth = pvscsi_get_tunable(sc, "max_queue_depth",
1676 	    pvscsi_max_queue_depth);
1677 
1678 	adapter_queue_size = (sc->req_ring_num_pages * PAGE_SIZE) /
1679 	    sizeof(struct pvscsi_ring_req_desc);
1680 	if (max_queue_depth > 0) {
1681 		adapter_queue_size = MIN(adapter_queue_size, max_queue_depth);
1682 	}
1683 	adapter_queue_size = MIN(adapter_queue_size,
1684 	    PVSCSI_MAX_REQ_QUEUE_DEPTH);
1685 
1686 	device_printf(sc->dev, "Use Msg: %d\n", sc->use_msg);
1687 	device_printf(sc->dev, "REQ num pages: %d\n", sc->req_ring_num_pages);
1688 	device_printf(sc->dev, "CMP num pages: %d\n", sc->cmp_ring_num_pages);
1689 	device_printf(sc->dev, "MSG num pages: %d\n", sc->msg_ring_num_pages);
1690 	device_printf(sc->dev, "Queue size: %d\n", adapter_queue_size);
1691 
1692 	if (pvscsi_allocate_rings(sc)) {
1693 		device_printf(dev, "ring allocation failed\n");
1694 		pvscsi_free_all(sc);
1695 		return (ENXIO);
1696 	}
1697 
1698 	sc->hcb_cnt = adapter_queue_size;
1699 	sc->hcbs = malloc(sc->hcb_cnt * sizeof(*sc->hcbs), M_PVSCSI,
1700 	    M_NOWAIT | M_ZERO);
1701 	if (sc->hcbs == NULL) {
1702 		device_printf(dev, "error allocating hcb array\n");
1703 		pvscsi_free_all(sc);
1704 		return (ENXIO);
1705 	}
1706 
1707 	if (pvscsi_dma_alloc_per_hcb(sc)) {
1708 		device_printf(dev, "error allocating per hcb dma memory\n");
1709 		pvscsi_free_all(sc);
1710 		return (ENXIO);
1711 	}
1712 
1713 	pvscsi_adapter_reset(sc);
1714 
1715 	devq = cam_simq_alloc(adapter_queue_size);
1716 	if (devq == NULL) {
1717 		device_printf(dev, "cam devq alloc failed\n");
1718 		pvscsi_free_all(sc);
1719 		return (ENXIO);
1720 	}
1721 
1722 	sc->sim = cam_sim_alloc(pvscsi_action, pvscsi_poll, "pvscsi", sc,
1723 	    device_get_unit(dev), &sc->lock, 1, adapter_queue_size, devq);
1724 	if (sc->sim == NULL) {
1725 		device_printf(dev, "cam sim alloc failed\n");
1726 		cam_simq_free(devq);
1727 		pvscsi_free_all(sc);
1728 		return (ENXIO);
1729 	}
1730 
1731 	mtx_lock(&sc->lock);
1732 
1733 	if (xpt_bus_register(sc->sim, dev, 0) != CAM_SUCCESS) {
1734 		device_printf(dev, "xpt bus register failed\n");
1735 		pvscsi_free_all(sc);
1736 		mtx_unlock(&sc->lock);
1737 		return (ENXIO);
1738 	}
1739 
1740 	if (xpt_create_path(&sc->bus_path, NULL, cam_sim_path(sc->sim),
1741 	    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
1742 		device_printf(dev, "xpt create path failed\n");
1743 		pvscsi_free_all(sc);
1744 		mtx_unlock(&sc->lock);
1745 		return (ENXIO);
1746 	}
1747 
1748 	pvscsi_setup_rings(sc);
1749 	if (sc->use_msg) {
1750 		pvscsi_setup_msg_ring(sc);
1751 	}
1752 
1753 	sc->use_req_call_threshold = pvscsi_setup_req_call(sc, 1);
1754 
1755 	pvscsi_intr_enable(sc);
1756 
1757 	mtx_unlock(&sc->lock);
1758 
1759 	return (0);
1760 }
1761 
1762 static int
1763 pvscsi_detach(device_t dev)
1764 {
1765 	struct pvscsi_softc *sc;
1766 
1767 	sc = device_get_softc(dev);
1768 
1769 	pvscsi_intr_disable(sc);
1770 	pvscsi_adapter_reset(sc);
1771 
1772 	if (sc->irq_handler != NULL) {
1773 		bus_teardown_intr(dev, sc->irq_res, sc->irq_handler);
1774 	}
1775 
1776 	mtx_lock(&sc->lock);
1777 	pvscsi_free_all(sc);
1778 	mtx_unlock(&sc->lock);
1779 
1780 	mtx_destroy(&sc->lock);
1781 
1782 	return (0);
1783 }
1784 
1785 static device_method_t pvscsi_methods[] = {
1786 	DEVMETHOD(device_probe, pvscsi_probe),
1787 	DEVMETHOD(device_shutdown, pvscsi_shutdown),
1788 	DEVMETHOD(device_attach, pvscsi_attach),
1789 	DEVMETHOD(device_detach, pvscsi_detach),
1790 	DEVMETHOD_END
1791 };
1792 
1793 static driver_t pvscsi_driver = {
1794 	"pvscsi", pvscsi_methods, sizeof(struct pvscsi_softc)
1795 };
1796 
1797 DRIVER_MODULE(pvscsi, pci, pvscsi_driver, 0, 0);
1798 
1799 MODULE_DEPEND(pvscsi, pci, 1, 1, 1);
1800 MODULE_DEPEND(pvscsi, cam, 1, 1, 1);
1801