xref: /freebsd/sys/dev/vmware/pvscsi/pvscsi.c (revision 052e12a5084f8ab3bbc602cabf095576b47f6146)
1 /*-
2  * Copyright (c) 2018 VMware, Inc.
3  *
4  * SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0)
5  */
6 
7 #include <sys/cdefs.h>
8 __FBSDID("$FreeBSD$");
9 
10 #include <sys/param.h>
11 #include <sys/bus.h>
12 #include <sys/errno.h>
13 #include <sys/kernel.h>
14 #include <sys/malloc.h>
15 #include <sys/module.h>
16 #include <sys/queue.h>
17 #include <sys/rman.h>
18 #include <sys/sysctl.h>
19 #include <sys/systm.h>
20 
21 #include <machine/bus.h>
22 #include <machine/resource.h>
23 
24 #include <dev/pci/pcireg.h>
25 #include <dev/pci/pcivar.h>
26 
27 #include <cam/cam.h>
28 #include <cam/cam_ccb.h>
29 #include <cam/cam_debug.h>
30 #include <cam/cam_sim.h>
31 #include <cam/cam_xpt_sim.h>
32 #include <cam/scsi/scsi_message.h>
33 
34 #include "pvscsi.h"
35 
36 #define	PVSCSI_DEFAULT_NUM_PAGES_REQ_RING	8
37 #define	PVSCSI_SENSE_LENGTH			256
38 
39 MALLOC_DECLARE(M_PVSCSI);
40 MALLOC_DEFINE(M_PVSCSI, "pvscsi", "PVSCSI memory");
41 
42 #ifdef PVSCSI_DEBUG_LOGGING
43 #define	DEBUG_PRINTF(level, dev, fmt, ...)				\
44 	do {								\
45 		if (pvscsi_log_level >= (level)) {			\
46 			device_printf((dev), (fmt), ##__VA_ARGS__);	\
47 		}							\
48 	} while(0)
49 #else
50 #define DEBUG_PRINTF(level, dev, fmt, ...)
51 #endif /* PVSCSI_DEBUG_LOGGING */
52 
53 #define	ccb_pvscsi_hcb	spriv_ptr0
54 #define	ccb_pvscsi_sc	spriv_ptr1
55 
56 struct pvscsi_softc;
57 static timeout_t pvscsi_timeout;
58 struct pvscsi_hcb;
59 struct pvscsi_dma;
60 
61 static inline uint32_t pvscsi_reg_read(struct pvscsi_softc *sc,
62     uint32_t offset);
63 static inline void pvscsi_reg_write(struct pvscsi_softc *sc, uint32_t offset,
64     uint32_t val);
65 static inline uint32_t pvscsi_read_intr_status(struct pvscsi_softc *sc);
66 static inline void pvscsi_write_intr_status(struct pvscsi_softc *sc,
67     uint32_t val);
68 static inline void pvscsi_intr_enable(struct pvscsi_softc *sc);
69 static inline void pvscsi_intr_disable(struct pvscsi_softc *sc);
70 static void pvscsi_kick_io(struct pvscsi_softc *sc, uint8_t cdb0);
71 static void pvscsi_write_cmd(struct pvscsi_softc *sc, uint32_t cmd, void *data,
72     uint32_t len);
73 static uint32_t pvscsi_get_max_targets(struct pvscsi_softc *sc);
74 static int pvscsi_setup_req_call(struct pvscsi_softc *sc, uint32_t enable);
75 static void pvscsi_setup_rings(struct pvscsi_softc *sc);
76 static void pvscsi_setup_msg_ring(struct pvscsi_softc *sc);
77 static int pvscsi_hw_supports_msg(struct pvscsi_softc *sc);
78 
79 static void pvscsi_timeout(void *arg);
80 static void pvscsi_freeze(struct pvscsi_softc *sc);
81 static void pvscsi_adapter_reset(struct pvscsi_softc *sc);
82 static void pvscsi_bus_reset(struct pvscsi_softc *sc);
83 static void pvscsi_device_reset(struct pvscsi_softc *sc, uint32_t target);
84 static void pvscsi_abort(struct pvscsi_softc *sc, uint32_t target,
85     union ccb *ccb);
86 
87 static void pvscsi_process_completion(struct pvscsi_softc *sc,
88     struct pvscsi_ring_cmp_desc *e);
89 static void pvscsi_process_cmp_ring(struct pvscsi_softc *sc);
90 static void pvscsi_process_msg(struct pvscsi_softc *sc,
91     struct pvscsi_ring_msg_desc *e);
92 static void pvscsi_process_msg_ring(struct pvscsi_softc *sc);
93 
94 static void pvscsi_intr_locked(struct pvscsi_softc *sc);
95 static void pvscsi_intr(void *xsc);
96 static void pvscsi_poll(struct cam_sim *sim);
97 
98 static void pvscsi_execute_ccb(void *arg, bus_dma_segment_t *segs, int nseg,
99     int error);
100 static void pvscsi_action(struct cam_sim *sim, union ccb *ccb);
101 
102 static inline uint64_t pvscsi_hcb_to_context(struct pvscsi_softc *sc,
103     struct pvscsi_hcb *hcb);
104 static inline struct pvscsi_hcb* pvscsi_context_to_hcb(struct pvscsi_softc *sc,
105     uint64_t context);
106 static struct pvscsi_hcb * pvscsi_hcb_get(struct pvscsi_softc *sc);
107 static void pvscsi_hcb_put(struct pvscsi_softc *sc, struct pvscsi_hcb *hcb);
108 
109 static void pvscsi_dma_cb(void *arg, bus_dma_segment_t *segs, int nseg,
110     int error);
111 static void pvscsi_dma_free(struct pvscsi_softc *sc, struct pvscsi_dma *dma);
112 static int pvscsi_dma_alloc(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
113     bus_size_t size, bus_size_t alignment);
114 static int pvscsi_dma_alloc_ppns(struct pvscsi_softc *sc,
115     struct pvscsi_dma *dma, uint64_t *ppn_list, uint32_t num_pages);
116 static void pvscsi_dma_free_per_hcb(struct pvscsi_softc *sc,
117     uint32_t hcbs_allocated);
118 static int pvscsi_dma_alloc_per_hcb(struct pvscsi_softc *sc);
119 static void pvscsi_free_rings(struct pvscsi_softc *sc);
120 static int pvscsi_allocate_rings(struct pvscsi_softc *sc);
121 static void pvscsi_free_interrupts(struct pvscsi_softc *sc);
122 static int pvscsi_setup_interrupts(struct pvscsi_softc *sc);
123 static void pvscsi_free_all(struct pvscsi_softc *sc);
124 
125 static int pvscsi_attach(device_t dev);
126 static int pvscsi_detach(device_t dev);
127 static int pvscsi_probe(device_t dev);
128 static int pvscsi_shutdown(device_t dev);
129 static int pvscsi_get_tunable(struct pvscsi_softc *sc, char *name, int value);
130 
131 
132 #ifdef PVSCSI_DEBUG_LOGGING
133 static int pvscsi_log_level = 0;
134 static SYSCTL_NODE(_hw, OID_AUTO, pvscsi, CTLFLAG_RD, 0,
135     "PVSCSI driver parameters");
136 SYSCTL_INT(_hw_pvscsi, OID_AUTO, log_level, CTLFLAG_RWTUN, &pvscsi_log_level,
137     0, "PVSCSI debug log level");
138 #endif
139 
140 static int pvscsi_request_ring_pages = 0;
141 TUNABLE_INT("hw.pvscsi.request_ring_pages", &pvscsi_request_ring_pages);
142 
143 static int pvscsi_use_msg = 1;
144 TUNABLE_INT("hw.pvscsi.use_msg", &pvscsi_use_msg);
145 
146 static int pvscsi_use_msi = 1;
147 TUNABLE_INT("hw.pvscsi.use_msi", &pvscsi_use_msi);
148 
149 static int pvscsi_use_msix = 1;
150 TUNABLE_INT("hw.pvscsi.use_msix", &pvscsi_use_msix);
151 
152 static int pvscsi_use_req_call_threshold = 1;
153 TUNABLE_INT("hw.pvscsi.use_req_call_threshold", &pvscsi_use_req_call_threshold);
154 
155 static int pvscsi_max_queue_depth = 0;
156 TUNABLE_INT("hw.pvscsi.max_queue_depth", &pvscsi_max_queue_depth);
157 
158 
159 struct pvscsi_sg_list {
160 	struct pvscsi_sg_element sge[PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT];
161 };
162 
163 
164 #define	PVSCSI_ABORT_TIMEOUT	2
165 #define	PVSCSI_RESET_TIMEOUT	10
166 
167 #define	PVSCSI_HCB_NONE		0
168 #define	PVSCSI_HCB_ABORT	1
169 #define	PVSCSI_HCB_DEVICE_RESET	2
170 #define	PVSCSI_HCB_BUS_RESET	3
171 
172 struct pvscsi_hcb {
173 	union ccb			*ccb;
174 	struct pvscsi_ring_req_desc	*e;
175 	int				 recovery;
176 	SLIST_ENTRY(pvscsi_hcb)		 links;
177 
178 	struct callout			 callout;
179 	bus_dmamap_t			 dma_map;
180 	void				*sense_buffer;
181 	bus_addr_t			 sense_buffer_paddr;
182 	struct pvscsi_sg_list		*sg_list;
183 	bus_addr_t			 sg_list_paddr;
184 };
185 
186 struct pvscsi_dma
187 {
188 	bus_dma_tag_t	 tag;
189 	bus_dmamap_t	 map;
190 	void		*vaddr;
191 	bus_addr_t	 paddr;
192 	bus_size_t	 size;
193 };
194 
195 struct pvscsi_softc {
196 	device_t		 dev;
197 	struct mtx		 lock;
198 	struct cam_sim		*sim;
199 	struct cam_path		*bus_path;
200 	int			 frozen;
201 	struct pvscsi_rings_state	*rings_state;
202 	struct pvscsi_ring_req_desc	*req_ring;
203 	struct pvscsi_ring_cmp_desc	*cmp_ring;
204 	struct pvscsi_ring_msg_desc	*msg_ring;
205 	uint32_t		 hcb_cnt;
206 	struct pvscsi_hcb	*hcbs;
207 	SLIST_HEAD(, pvscsi_hcb)	free_list;
208 	bus_dma_tag_t		parent_dmat;
209 	bus_dma_tag_t		buffer_dmat;
210 
211 	bool		 use_msg;
212 	uint32_t	 max_targets;
213 	int		 mm_rid;
214 	struct resource	*mm_res;
215 	int		 irq_id;
216 	struct resource	*irq_res;
217 	void		*irq_handler;
218 	int		 use_req_call_threshold;
219 	int		 use_msi_or_msix;
220 
221 	uint64_t	rings_state_ppn;
222 	uint32_t	req_ring_num_pages;
223 	uint64_t	req_ring_ppn[PVSCSI_MAX_NUM_PAGES_REQ_RING];
224 	uint32_t	cmp_ring_num_pages;
225 	uint64_t	cmp_ring_ppn[PVSCSI_MAX_NUM_PAGES_CMP_RING];
226 	uint32_t	msg_ring_num_pages;
227 	uint64_t	msg_ring_ppn[PVSCSI_MAX_NUM_PAGES_MSG_RING];
228 
229 	struct	pvscsi_dma rings_state_dma;
230 	struct	pvscsi_dma req_ring_dma;
231 	struct	pvscsi_dma cmp_ring_dma;
232 	struct	pvscsi_dma msg_ring_dma;
233 
234 	struct	pvscsi_dma sg_list_dma;
235 	struct	pvscsi_dma sense_buffer_dma;
236 };
237 
238 static int pvscsi_get_tunable(struct pvscsi_softc *sc, char *name, int value)
239 {
240 	char cfg[64];
241 
242 	snprintf(cfg, sizeof(cfg), "hw.pvscsi.%d.%s", device_get_unit(sc->dev),
243 	    name);
244 	TUNABLE_INT_FETCH(cfg, &value);
245 
246 	return (value);
247 }
248 
249 static void
250 pvscsi_freeze(struct pvscsi_softc *sc)
251 {
252 
253 	if (!sc->frozen) {
254 		xpt_freeze_simq(sc->sim, 1);
255 		sc->frozen = 1;
256 	}
257 }
258 
259 static inline uint32_t
260 pvscsi_reg_read(struct pvscsi_softc *sc, uint32_t offset)
261 {
262 
263 	return (bus_read_4(sc->mm_res, offset));
264 }
265 
266 static inline void
267 pvscsi_reg_write(struct pvscsi_softc *sc, uint32_t offset, uint32_t val)
268 {
269 
270 	bus_write_4(sc->mm_res, offset, val);
271 }
272 
273 static inline uint32_t
274 pvscsi_read_intr_status(struct pvscsi_softc *sc)
275 {
276 
277 	return (pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_INTR_STATUS));
278 }
279 
280 static inline void
281 pvscsi_write_intr_status(struct pvscsi_softc *sc, uint32_t val)
282 {
283 
284 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_STATUS, val);
285 }
286 
287 static inline void
288 pvscsi_intr_enable(struct pvscsi_softc *sc)
289 {
290 	uint32_t mask;
291 
292 	mask = PVSCSI_INTR_CMPL_MASK;
293 	if (sc->use_msg) {
294 		mask |= PVSCSI_INTR_MSG_MASK;
295 	}
296 
297 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_MASK, mask);
298 }
299 
300 static inline void
301 pvscsi_intr_disable(struct pvscsi_softc *sc)
302 {
303 
304 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_MASK, 0);
305 }
306 
307 static void
308 pvscsi_kick_io(struct pvscsi_softc *sc, uint8_t cdb0)
309 {
310 	struct pvscsi_rings_state *s;
311 
312 	if (cdb0 == READ_6  || cdb0 == READ_10  ||
313 	    cdb0 == READ_12  || cdb0 == READ_16 ||
314 	    cdb0 == WRITE_6 || cdb0 == WRITE_10 ||
315 	    cdb0 == WRITE_12 || cdb0 == WRITE_16) {
316 		s = sc->rings_state;
317 
318 		if (!sc->use_req_call_threshold ||
319 		    (s->req_prod_idx - s->req_cons_idx) >=
320 		     s->req_call_threshold) {
321 			pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_KICK_RW_IO, 0);
322 		}
323 	} else {
324 		pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_KICK_NON_RW_IO, 0);
325 	}
326 }
327 
328 static void
329 pvscsi_write_cmd(struct pvscsi_softc *sc, uint32_t cmd, void *data,
330 		 uint32_t len)
331 {
332 	uint32_t *data_ptr;
333 	int i;
334 
335 	KASSERT(len % sizeof(uint32_t) == 0,
336 		("command size not a multiple of 4"));
337 
338 	data_ptr = data;
339 	len /= sizeof(uint32_t);
340 
341 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND, cmd);
342 	for (i = 0; i < len; ++i) {
343 		pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND_DATA,
344 		   data_ptr[i]);
345 	}
346 }
347 
348 static inline uint64_t pvscsi_hcb_to_context(struct pvscsi_softc *sc,
349     struct pvscsi_hcb *hcb)
350 {
351 
352 	/* Offset by 1 because context must not be 0 */
353 	return (hcb - sc->hcbs + 1);
354 }
355 
356 static inline struct pvscsi_hcb* pvscsi_context_to_hcb(struct pvscsi_softc *sc,
357     uint64_t context)
358 {
359 
360 	return (sc->hcbs + (context - 1));
361 }
362 
363 static struct pvscsi_hcb *
364 pvscsi_hcb_get(struct pvscsi_softc *sc)
365 {
366 	struct pvscsi_hcb *hcb;
367 
368 	mtx_assert(&sc->lock, MA_OWNED);
369 
370 	hcb = SLIST_FIRST(&sc->free_list);
371 	if (hcb) {
372 		SLIST_REMOVE_HEAD(&sc->free_list, links);
373 	}
374 
375 	return (hcb);
376 }
377 
378 static void
379 pvscsi_hcb_put(struct pvscsi_softc *sc, struct pvscsi_hcb *hcb)
380 {
381 
382 	mtx_assert(&sc->lock, MA_OWNED);
383 	hcb->ccb = NULL;
384 	hcb->e = NULL;
385 	hcb->recovery = PVSCSI_HCB_NONE;
386 	SLIST_INSERT_HEAD(&sc->free_list, hcb, links);
387 }
388 
389 static uint32_t
390 pvscsi_get_max_targets(struct pvscsi_softc *sc)
391 {
392 	uint32_t max_targets;
393 
394 	pvscsi_write_cmd(sc, PVSCSI_CMD_GET_MAX_TARGETS, NULL, 0);
395 
396 	max_targets = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
397 
398 	if (max_targets == ~0) {
399 		max_targets = 16;
400 	}
401 
402 	return (max_targets);
403 }
404 
405 static int pvscsi_setup_req_call(struct pvscsi_softc *sc, uint32_t enable)
406 {
407 	uint32_t status;
408 	struct pvscsi_cmd_desc_setup_req_call cmd;
409 
410 	if (!pvscsi_get_tunable(sc, "pvscsi_use_req_call_threshold",
411 	    pvscsi_use_req_call_threshold)) {
412 		return (0);
413 	}
414 
415 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND,
416 	    PVSCSI_CMD_SETUP_REQCALLTHRESHOLD);
417 	status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
418 
419 	if (status != -1) {
420 		bzero(&cmd, sizeof(cmd));
421 		cmd.enable = enable;
422 		pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_REQCALLTHRESHOLD,
423 		    &cmd, sizeof(cmd));
424 		status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
425 
426 		return (status != 0);
427 	} else {
428 		return (0);
429 	}
430 }
431 
432 static void
433 pvscsi_dma_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
434 {
435 	bus_addr_t *dest;
436 
437 	KASSERT(nseg == 1, ("more than one segment"));
438 
439 	dest = arg;
440 
441 	if (!error) {
442 		*dest = segs->ds_addr;
443 	}
444 }
445 
446 static void
447 pvscsi_dma_free(struct pvscsi_softc *sc, struct pvscsi_dma *dma)
448 {
449 
450 	if (dma->tag != NULL) {
451 		if (dma->paddr != 0) {
452 			bus_dmamap_unload(dma->tag, dma->map);
453 		}
454 
455 		if (dma->vaddr != NULL) {
456 			bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
457 		}
458 
459 		bus_dma_tag_destroy(dma->tag);
460 	}
461 
462 	bzero(dma, sizeof(*dma));
463 }
464 
465 static int
466 pvscsi_dma_alloc(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
467     bus_size_t size, bus_size_t alignment)
468 {
469 	int error;
470 
471 	bzero(dma, sizeof(*dma));
472 
473 	error = bus_dma_tag_create(sc->parent_dmat, alignment, 0,
474 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, size, 1, size,
475 	    BUS_DMA_ALLOCNOW, NULL, NULL, &dma->tag);
476 	if (error) {
477 		device_printf(sc->dev, "error creating dma tag, error %d\n",
478 		    error);
479 		goto fail;
480 	}
481 
482 	error = bus_dmamem_alloc(dma->tag, &dma->vaddr,
483 	    BUS_DMA_NOWAIT | BUS_DMA_ZERO, &dma->map);
484 	if (error) {
485 		device_printf(sc->dev, "error allocating dma mem, error %d\n",
486 		    error);
487 		goto fail;
488 	}
489 
490 	error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size,
491 	    pvscsi_dma_cb, &dma->paddr, BUS_DMA_NOWAIT);
492 	if (error) {
493 		device_printf(sc->dev, "error mapping dma mam, error %d\n",
494 		    error);
495 		goto fail;
496 	}
497 
498 	dma->size = size;
499 
500 fail:
501 	if (error) {
502 		pvscsi_dma_free(sc, dma);
503 	}
504 	return (error);
505 }
506 
507 static int
508 pvscsi_dma_alloc_ppns(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
509     uint64_t *ppn_list, uint32_t num_pages)
510 {
511 	int error;
512 	uint32_t i;
513 	uint64_t ppn;
514 
515 	error = pvscsi_dma_alloc(sc, dma, num_pages * PAGE_SIZE, PAGE_SIZE);
516 	if (error) {
517 		device_printf(sc->dev, "Error allocating pages, error %d\n",
518 		    error);
519 		return (error);
520 	}
521 
522 	ppn = dma->paddr >> PAGE_SHIFT;
523 	for (i = 0; i < num_pages; i++) {
524 		ppn_list[i] = ppn + i;
525 	}
526 
527 	return (0);
528 }
529 
530 static void
531 pvscsi_dma_free_per_hcb(struct pvscsi_softc *sc, uint32_t hcbs_allocated)
532 {
533 	int i;
534 	int lock_owned;
535 	struct pvscsi_hcb *hcb;
536 
537 	lock_owned = mtx_owned(&sc->lock);
538 
539 	if (lock_owned) {
540 		mtx_unlock(&sc->lock);
541 	}
542 	for (i = 0; i < hcbs_allocated; ++i) {
543 		hcb = sc->hcbs + i;
544 		callout_drain(&hcb->callout);
545 	};
546 	if (lock_owned) {
547 		mtx_lock(&sc->lock);
548 	}
549 
550 	for (i = 0; i < hcbs_allocated; ++i) {
551 		hcb = sc->hcbs + i;
552 		bus_dmamap_destroy(sc->buffer_dmat, hcb->dma_map);
553 	};
554 
555 	pvscsi_dma_free(sc, &sc->sense_buffer_dma);
556 	pvscsi_dma_free(sc, &sc->sg_list_dma);
557 }
558 
559 static int
560 pvscsi_dma_alloc_per_hcb(struct pvscsi_softc *sc)
561 {
562 	int i;
563 	int error;
564 	struct pvscsi_hcb *hcb;
565 
566 	i = 0;
567 
568 	error = pvscsi_dma_alloc(sc, &sc->sg_list_dma,
569 	    sizeof(struct pvscsi_sg_list) * sc->hcb_cnt, 1);
570 	if (error) {
571 		device_printf(sc->dev,
572 		    "Error allocation sg list DMA memory, error %d\n", error);
573 		goto fail;
574 	}
575 
576 	error = pvscsi_dma_alloc(sc, &sc->sense_buffer_dma,
577 				 PVSCSI_SENSE_LENGTH * sc->hcb_cnt, 1);
578 	if (error) {
579 		device_printf(sc->dev,
580 		    "Error allocation sg list DMA memory, error %d\n", error);
581 		goto fail;
582 	}
583 
584 	for (i = 0; i < sc->hcb_cnt; ++i) {
585 		hcb = sc->hcbs + i;
586 
587 		error = bus_dmamap_create(sc->buffer_dmat, 0, &hcb->dma_map);
588 		if (error) {
589 			device_printf(sc->dev,
590 			    "Error creating dma map for hcb %d, error %d\n",
591 			    i, error);
592 			goto fail;
593 		}
594 
595 		hcb->sense_buffer =
596 		    (void *)((caddr_t)sc->sense_buffer_dma.vaddr +
597 		    PVSCSI_SENSE_LENGTH * i);
598 		hcb->sense_buffer_paddr =
599 		    sc->sense_buffer_dma.paddr + PVSCSI_SENSE_LENGTH * i;
600 
601 		hcb->sg_list =
602 		    (struct pvscsi_sg_list *)((caddr_t)sc->sg_list_dma.vaddr +
603 		    sizeof(struct pvscsi_sg_list) * i);
604 		hcb->sg_list_paddr =
605 		    sc->sg_list_dma.paddr + sizeof(struct pvscsi_sg_list) * i;
606 
607 		callout_init_mtx(&hcb->callout, &sc->lock, 0);
608 	}
609 
610 	SLIST_INIT(&sc->free_list);
611 	for (i = (sc->hcb_cnt - 1); i >= 0; --i) {
612 		hcb = sc->hcbs + i;
613 		SLIST_INSERT_HEAD(&sc->free_list, hcb, links);
614 	}
615 
616 fail:
617 	if (error) {
618 		pvscsi_dma_free_per_hcb(sc, i);
619 	}
620 
621 	return (error);
622 }
623 
624 static void
625 pvscsi_free_rings(struct pvscsi_softc *sc)
626 {
627 
628 	pvscsi_dma_free(sc, &sc->rings_state_dma);
629 	pvscsi_dma_free(sc, &sc->req_ring_dma);
630 	pvscsi_dma_free(sc, &sc->cmp_ring_dma);
631 	if (sc->use_msg) {
632 		pvscsi_dma_free(sc, &sc->msg_ring_dma);
633 	}
634 }
635 
636 static int
637 pvscsi_allocate_rings(struct pvscsi_softc *sc)
638 {
639 	int error;
640 
641 	error = pvscsi_dma_alloc_ppns(sc, &sc->rings_state_dma,
642 	    &sc->rings_state_ppn, 1);
643 	if (error) {
644 		device_printf(sc->dev,
645 		    "Error allocating rings state, error = %d\n", error);
646 		goto fail;
647 	}
648 	sc->rings_state = sc->rings_state_dma.vaddr;
649 
650 	error = pvscsi_dma_alloc_ppns(sc, &sc->req_ring_dma, sc->req_ring_ppn,
651 	    sc->req_ring_num_pages);
652 	if (error) {
653 		device_printf(sc->dev,
654 		    "Error allocating req ring pages, error = %d\n", error);
655 		goto fail;
656 	}
657 	sc->req_ring = sc->req_ring_dma.vaddr;
658 
659 	error = pvscsi_dma_alloc_ppns(sc, &sc->cmp_ring_dma, sc->cmp_ring_ppn,
660 	    sc->cmp_ring_num_pages);
661 	if (error) {
662 		device_printf(sc->dev,
663 		    "Error allocating cmp ring pages, error = %d\n", error);
664 		goto fail;
665 	}
666 	sc->cmp_ring = sc->cmp_ring_dma.vaddr;
667 
668 	sc->msg_ring = NULL;
669 	if (sc->use_msg) {
670 		error = pvscsi_dma_alloc_ppns(sc, &sc->msg_ring_dma,
671 		    sc->msg_ring_ppn, sc->msg_ring_num_pages);
672 		if (error) {
673 			device_printf(sc->dev,
674 			    "Error allocating cmp ring pages, error = %d\n",
675 			    error);
676 			goto fail;
677 		}
678 		sc->msg_ring = sc->msg_ring_dma.vaddr;
679 	}
680 
681 	DEBUG_PRINTF(1, sc->dev, "rings_state: %p\n", sc->rings_state);
682 	DEBUG_PRINTF(1, sc->dev, "req_ring: %p - %u pages\n", sc->req_ring,
683 	    sc->req_ring_num_pages);
684 	DEBUG_PRINTF(1, sc->dev, "cmp_ring: %p - %u pages\n", sc->cmp_ring,
685 	    sc->cmp_ring_num_pages);
686 	DEBUG_PRINTF(1, sc->dev, "msg_ring: %p - %u pages\n", sc->msg_ring,
687 	    sc->msg_ring_num_pages);
688 
689 fail:
690 	if (error) {
691 		pvscsi_free_rings(sc);
692 	}
693 	return (error);
694 }
695 
696 static void
697 pvscsi_setup_rings(struct pvscsi_softc *sc)
698 {
699 	struct pvscsi_cmd_desc_setup_rings cmd;
700 	uint32_t i;
701 
702 	bzero(&cmd, sizeof(cmd));
703 
704 	cmd.rings_state_ppn = sc->rings_state_ppn;
705 
706 	cmd.req_ring_num_pages = sc->req_ring_num_pages;
707 	for (i = 0; i < sc->req_ring_num_pages; ++i) {
708 		cmd.req_ring_ppns[i] = sc->req_ring_ppn[i];
709 	}
710 
711 	cmd.cmp_ring_num_pages = sc->cmp_ring_num_pages;
712 	for (i = 0; i < sc->cmp_ring_num_pages; ++i) {
713 		cmd.cmp_ring_ppns[i] = sc->cmp_ring_ppn[i];
714 	}
715 
716 	pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_RINGS, &cmd, sizeof(cmd));
717 }
718 
719 static int
720 pvscsi_hw_supports_msg(struct pvscsi_softc *sc)
721 {
722 	uint32_t status;
723 
724 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND,
725 	    PVSCSI_CMD_SETUP_MSG_RING);
726 	status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
727 
728 	return (status != -1);
729 }
730 
731 static void
732 pvscsi_setup_msg_ring(struct pvscsi_softc *sc)
733 {
734 	struct pvscsi_cmd_desc_setup_msg_ring cmd;
735 	uint32_t i;
736 
737 	KASSERT(sc->use_msg, ("msg is not being used"));
738 
739 	bzero(&cmd, sizeof(cmd));
740 
741 	cmd.num_pages = sc->msg_ring_num_pages;
742 	for (i = 0; i < sc->msg_ring_num_pages; ++i) {
743 		cmd.ring_ppns[i] = sc->msg_ring_ppn[i];
744 	}
745 
746 	pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_MSG_RING, &cmd, sizeof(cmd));
747 }
748 
749 static void
750 pvscsi_adapter_reset(struct pvscsi_softc *sc)
751 {
752 	uint32_t val;
753 
754 	device_printf(sc->dev, "Adapter Reset\n");
755 
756 	pvscsi_write_cmd(sc, PVSCSI_CMD_ADAPTER_RESET, NULL, 0);
757 	val = pvscsi_read_intr_status(sc);
758 
759 	DEBUG_PRINTF(2, sc->dev, "adapter reset done: %u\n", val);
760 }
761 
762 static void
763 pvscsi_bus_reset(struct pvscsi_softc *sc)
764 {
765 
766 	device_printf(sc->dev, "Bus Reset\n");
767 
768 	pvscsi_write_cmd(sc, PVSCSI_CMD_RESET_BUS, NULL, 0);
769 	pvscsi_process_cmp_ring(sc);
770 
771 	DEBUG_PRINTF(2, sc->dev, "bus reset done\n");
772 }
773 
774 static void
775 pvscsi_device_reset(struct pvscsi_softc *sc, uint32_t target)
776 {
777 	struct pvscsi_cmd_desc_reset_device cmd;
778 
779 	memset(&cmd, 0, sizeof(cmd));
780 
781 	cmd.target = target;
782 
783 	device_printf(sc->dev, "Device reset for target %u\n", target);
784 
785 	pvscsi_write_cmd(sc, PVSCSI_CMD_RESET_DEVICE, &cmd, sizeof cmd);
786 	pvscsi_process_cmp_ring(sc);
787 
788 	DEBUG_PRINTF(2, sc->dev, "device reset done\n");
789 }
790 
791 static void
792 pvscsi_abort(struct pvscsi_softc *sc, uint32_t target, union ccb *ccb)
793 {
794 	struct pvscsi_cmd_desc_abort_cmd cmd;
795 	struct pvscsi_hcb *hcb;
796 	uint64_t context;
797 
798 	pvscsi_process_cmp_ring(sc);
799 
800 	hcb = ccb->ccb_h.ccb_pvscsi_hcb;
801 
802 	if (hcb != NULL) {
803 		context = pvscsi_hcb_to_context(sc, hcb);
804 
805 		memset(&cmd, 0, sizeof cmd);
806 		cmd.target = target;
807 		cmd.context = context;
808 
809 		device_printf(sc->dev, "Abort for target %u context %llx\n",
810 		    target, (unsigned long long)context);
811 
812 		pvscsi_write_cmd(sc, PVSCSI_CMD_ABORT_CMD, &cmd, sizeof(cmd));
813 		pvscsi_process_cmp_ring(sc);
814 
815 		DEBUG_PRINTF(2, sc->dev, "abort done\n");
816 	} else {
817 		DEBUG_PRINTF(1, sc->dev,
818 		    "Target %u ccb %p not found for abort\n", target, ccb);
819 	}
820 }
821 
822 static int
823 pvscsi_probe(device_t dev)
824 {
825 
826 	if (pci_get_vendor(dev) == PCI_VENDOR_ID_VMWARE &&
827 	    pci_get_device(dev) == PCI_DEVICE_ID_VMWARE_PVSCSI) {
828 		device_set_desc(dev, "VMware Paravirtual SCSI Controller");
829 		return (BUS_PROBE_DEFAULT);
830 	}
831 	return (ENXIO);
832 }
833 
834 static int
835 pvscsi_shutdown(device_t dev)
836 {
837 
838 	return (0);
839 }
840 
841 static void
842 pvscsi_timeout(void *arg)
843 {
844 	struct pvscsi_hcb *hcb;
845 	struct pvscsi_softc *sc;
846 	union ccb *ccb;
847 
848 	hcb = arg;
849 	ccb = hcb->ccb;
850 
851 	if (ccb == NULL) {
852 		/* Already completed */
853 		return;
854 	}
855 
856 	sc = ccb->ccb_h.ccb_pvscsi_sc;
857 	mtx_assert(&sc->lock, MA_OWNED);
858 
859 	device_printf(sc->dev, "Command timed out hcb=%p ccb=%p.\n", hcb, ccb);
860 
861 	switch (hcb->recovery) {
862 	case PVSCSI_HCB_NONE:
863 		hcb->recovery = PVSCSI_HCB_ABORT;
864 		pvscsi_abort(sc, ccb->ccb_h.target_id, ccb);
865 		callout_reset_sbt(&hcb->callout, PVSCSI_ABORT_TIMEOUT * SBT_1S,
866 		    0, pvscsi_timeout, hcb, 0);
867 		break;
868 	case PVSCSI_HCB_ABORT:
869 		hcb->recovery = PVSCSI_HCB_DEVICE_RESET;
870 		pvscsi_freeze(sc);
871 		pvscsi_device_reset(sc, ccb->ccb_h.target_id);
872 		callout_reset_sbt(&hcb->callout, PVSCSI_RESET_TIMEOUT * SBT_1S,
873 		    0, pvscsi_timeout, hcb, 0);
874 		break;
875 	case PVSCSI_HCB_DEVICE_RESET:
876 		hcb->recovery = PVSCSI_HCB_BUS_RESET;
877 		pvscsi_freeze(sc);
878 		pvscsi_bus_reset(sc);
879 		callout_reset_sbt(&hcb->callout, PVSCSI_RESET_TIMEOUT * SBT_1S,
880 		    0, pvscsi_timeout, hcb, 0);
881 		break;
882 	case PVSCSI_HCB_BUS_RESET:
883 		pvscsi_freeze(sc);
884 		pvscsi_adapter_reset(sc);
885 		break;
886 	};
887 }
888 
889 static void
890 pvscsi_process_completion(struct pvscsi_softc *sc,
891     struct pvscsi_ring_cmp_desc *e)
892 {
893 	struct pvscsi_hcb *hcb;
894 	union ccb *ccb;
895 	uint32_t status;
896 	uint32_t btstat;
897 	uint32_t sdstat;
898 	bus_dmasync_op_t op;
899 
900 	hcb = pvscsi_context_to_hcb(sc, e->context);
901 
902 	callout_stop(&hcb->callout);
903 
904 	ccb = hcb->ccb;
905 
906 	btstat = e->host_status;
907 	sdstat = e->scsi_status;
908 
909 	ccb->csio.scsi_status = sdstat;
910 	ccb->csio.resid = ccb->csio.dxfer_len - e->data_len;
911 
912 	if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
913 		if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
914 			op = BUS_DMASYNC_POSTREAD;
915 		} else {
916 			op = BUS_DMASYNC_POSTWRITE;
917 		}
918 		bus_dmamap_sync(sc->buffer_dmat, hcb->dma_map, op);
919 		bus_dmamap_unload(sc->buffer_dmat, hcb->dma_map);
920 	}
921 
922 	if (btstat == BTSTAT_SUCCESS && sdstat == SCSI_STATUS_OK) {
923 		DEBUG_PRINTF(3, sc->dev,
924 		    "completing command context %llx success\n",
925 		    (unsigned long long)e->context);
926 		ccb->csio.resid = 0;
927 		status = CAM_REQ_CMP;
928 	} else {
929 		switch (btstat) {
930 		case BTSTAT_SUCCESS:
931 		case BTSTAT_LINKED_COMMAND_COMPLETED:
932 		case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG:
933 			switch (sdstat) {
934 			case SCSI_STATUS_OK:
935 				ccb->csio.resid = 0;
936 				status = CAM_REQ_CMP;
937 				break;
938 			case SCSI_STATUS_CHECK_COND:
939 				status = CAM_SCSI_STATUS_ERROR;
940 
941 				if (ccb->csio.sense_len != 0) {
942 					status |= CAM_AUTOSNS_VALID;
943 
944 					memset(&ccb->csio.sense_data, 0,
945 					    sizeof(ccb->csio.sense_data));
946 					memcpy(&ccb->csio.sense_data,
947 					    hcb->sense_buffer,
948 					    MIN(ccb->csio.sense_len,
949 						e->sense_len));
950 				}
951 				break;
952 			case SCSI_STATUS_BUSY:
953 			case SCSI_STATUS_QUEUE_FULL:
954 				status = CAM_REQUEUE_REQ;
955 				break;
956 			case SCSI_STATUS_CMD_TERMINATED:
957 			case SCSI_STATUS_TASK_ABORTED:
958 				status = CAM_REQ_ABORTED;
959 				break;
960 			default:
961 				DEBUG_PRINTF(1, sc->dev,
962 				    "ccb: %p sdstat=0x%x\n", ccb, sdstat);
963 				status = CAM_SCSI_STATUS_ERROR;
964 				break;
965 			}
966 			break;
967 		case BTSTAT_SELTIMEO:
968 			status = CAM_SEL_TIMEOUT;
969 			break;
970 		case BTSTAT_DATARUN:
971 		case BTSTAT_DATA_UNDERRUN:
972 			status = CAM_DATA_RUN_ERR;
973 			break;
974 		case BTSTAT_ABORTQUEUE:
975 		case BTSTAT_HATIMEOUT:
976 			status = CAM_REQUEUE_REQ;
977 			break;
978 		case BTSTAT_NORESPONSE:
979 		case BTSTAT_SENTRST:
980 		case BTSTAT_RECVRST:
981 		case BTSTAT_BUSRESET:
982 			status = CAM_SCSI_BUS_RESET;
983 			break;
984 		case BTSTAT_SCSIPARITY:
985 			status = CAM_UNCOR_PARITY;
986 			break;
987 		case BTSTAT_BUSFREE:
988 			status = CAM_UNEXP_BUSFREE;
989 			break;
990 		case BTSTAT_INVPHASE:
991 			status = CAM_SEQUENCE_FAIL;
992 			break;
993 		case BTSTAT_SENSFAILED:
994 			status = CAM_AUTOSENSE_FAIL;
995 			break;
996 		case BTSTAT_LUNMISMATCH:
997 		case BTSTAT_TAGREJECT:
998 		case BTSTAT_DISCONNECT:
999 		case BTSTAT_BADMSG:
1000 		case BTSTAT_INVPARAM:
1001 			status = CAM_REQ_CMP_ERR;
1002 			break;
1003 		case BTSTAT_HASOFTWARE:
1004 		case BTSTAT_HAHARDWARE:
1005 			status = CAM_NO_HBA;
1006 			break;
1007 		default:
1008 			device_printf(sc->dev, "unknown hba status: 0x%x\n",
1009 			    btstat);
1010 			status = CAM_NO_HBA;
1011 			break;
1012 		}
1013 
1014 		DEBUG_PRINTF(3, sc->dev,
1015 		    "completing command context %llx btstat %x sdstat %x - status %x\n",
1016 		    (unsigned long long)e->context, btstat, sdstat, status);
1017 	}
1018 
1019 	ccb->ccb_h.ccb_pvscsi_hcb = NULL;
1020 	ccb->ccb_h.ccb_pvscsi_sc = NULL;
1021 	pvscsi_hcb_put(sc, hcb);
1022 
1023 	ccb->ccb_h.status =
1024 	    status | (ccb->ccb_h.status & ~(CAM_STATUS_MASK | CAM_SIM_QUEUED));
1025 
1026 	if (sc->frozen) {
1027 		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
1028 		sc->frozen = 0;
1029 	}
1030 
1031 	if (status != CAM_REQ_CMP) {
1032 		ccb->ccb_h.status |= CAM_DEV_QFRZN;
1033 		xpt_freeze_devq(ccb->ccb_h.path, /*count*/ 1);
1034 	}
1035 	xpt_done(ccb);
1036 }
1037 
1038 static void
1039 pvscsi_process_cmp_ring(struct pvscsi_softc *sc)
1040 {
1041 	struct pvscsi_ring_cmp_desc *ring;
1042 	struct pvscsi_rings_state *s;
1043 	struct pvscsi_ring_cmp_desc *e;
1044 	uint32_t mask;
1045 
1046 	mtx_assert(&sc->lock, MA_OWNED);
1047 
1048 	s = sc->rings_state;
1049 	ring = sc->cmp_ring;
1050 	mask = MASK(s->cmp_num_entries_log2);
1051 
1052 	while (s->cmp_cons_idx != s->cmp_prod_idx) {
1053 		e = ring + (s->cmp_cons_idx & mask);
1054 
1055 		pvscsi_process_completion(sc, e);
1056 
1057 		mb();
1058 		s->cmp_cons_idx++;
1059 	}
1060 }
1061 
1062 static void
1063 pvscsi_process_msg(struct pvscsi_softc *sc, struct pvscsi_ring_msg_desc *e)
1064 {
1065 	struct pvscsi_ring_msg_dev_status_changed *desc;
1066 
1067 	union ccb *ccb;
1068 	switch (e->type) {
1069 	case PVSCSI_MSG_DEV_ADDED:
1070 	case PVSCSI_MSG_DEV_REMOVED: {
1071 		desc = (struct pvscsi_ring_msg_dev_status_changed *)e;
1072 
1073 		device_printf(sc->dev, "MSG: device %s at scsi%u:%u:%u\n",
1074 		    desc->type == PVSCSI_MSG_DEV_ADDED ? "addition" : "removal",
1075 		    desc->bus, desc->target, desc->lun[1]);
1076 
1077 		ccb = xpt_alloc_ccb_nowait();
1078 		if (ccb == NULL) {
1079 			device_printf(sc->dev,
1080 			    "Error allocating CCB for dev change.\n");
1081 			break;
1082 		}
1083 
1084 		if (xpt_create_path(&ccb->ccb_h.path, NULL,
1085 		    cam_sim_path(sc->sim), desc->target, desc->lun[1])
1086 		    != CAM_REQ_CMP) {
1087 			device_printf(sc->dev,
1088 			    "Error creating path for dev change.\n");
1089 			xpt_free_ccb(ccb);
1090 			break;
1091 		}
1092 
1093 		xpt_rescan(ccb);
1094 	} break;
1095 	default:
1096 		device_printf(sc->dev, "Unknown msg type 0x%x\n", e->type);
1097 	};
1098 }
1099 
1100 static void
1101 pvscsi_process_msg_ring(struct pvscsi_softc *sc)
1102 {
1103 	struct pvscsi_ring_msg_desc *ring;
1104 	struct pvscsi_rings_state *s;
1105 	struct pvscsi_ring_msg_desc *e;
1106 	uint32_t mask;
1107 
1108 	mtx_assert(&sc->lock, MA_OWNED);
1109 
1110 	s = sc->rings_state;
1111 	ring = sc->msg_ring;
1112 	mask = MASK(s->msg_num_entries_log2);
1113 
1114 	while (s->msg_cons_idx != s->msg_prod_idx) {
1115 		e = ring + (s->msg_cons_idx & mask);
1116 
1117 		pvscsi_process_msg(sc, e);
1118 
1119 		mb();
1120 		s->msg_cons_idx++;
1121 	}
1122 }
1123 
1124 static void
1125 pvscsi_intr_locked(struct pvscsi_softc *sc)
1126 {
1127 	uint32_t val;
1128 
1129 	mtx_assert(&sc->lock, MA_OWNED);
1130 
1131 	val = pvscsi_read_intr_status(sc);
1132 
1133 	if ((val & PVSCSI_INTR_ALL_SUPPORTED) != 0) {
1134 		pvscsi_write_intr_status(sc, val & PVSCSI_INTR_ALL_SUPPORTED);
1135 		pvscsi_process_cmp_ring(sc);
1136 		if (sc->use_msg) {
1137 			pvscsi_process_msg_ring(sc);
1138 		}
1139 	}
1140 }
1141 
1142 static void
1143 pvscsi_intr(void *xsc)
1144 {
1145 	struct pvscsi_softc *sc;
1146 
1147 	sc = xsc;
1148 
1149 	mtx_assert(&sc->lock, MA_NOTOWNED);
1150 
1151 	mtx_lock(&sc->lock);
1152 	pvscsi_intr_locked(xsc);
1153 	mtx_unlock(&sc->lock);
1154 }
1155 
1156 static void
1157 pvscsi_poll(struct cam_sim *sim)
1158 {
1159 	struct pvscsi_softc *sc;
1160 
1161 	sc = cam_sim_softc(sim);
1162 
1163 	mtx_assert(&sc->lock, MA_OWNED);
1164 	pvscsi_intr_locked(sc);
1165 }
1166 
1167 static void
1168 pvscsi_execute_ccb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
1169 {
1170 	struct pvscsi_hcb *hcb;
1171 	struct pvscsi_ring_req_desc *e;
1172 	union ccb *ccb;
1173 	struct pvscsi_softc *sc;
1174 	struct pvscsi_rings_state *s;
1175 	uint8_t cdb0;
1176 	bus_dmasync_op_t op;
1177 
1178 	hcb = arg;
1179 	ccb = hcb->ccb;
1180 	e = hcb->e;
1181 	sc = ccb->ccb_h.ccb_pvscsi_sc;
1182 	s = sc->rings_state;
1183 
1184 	mtx_assert(&sc->lock, MA_OWNED);
1185 
1186 	if (error) {
1187 		device_printf(sc->dev, "pvscsi_execute_ccb error %d\n", error);
1188 
1189 		if (error == EFBIG) {
1190 			ccb->ccb_h.status = CAM_REQ_TOO_BIG;
1191 		} else {
1192 			ccb->ccb_h.status = CAM_REQ_CMP_ERR;
1193 		}
1194 
1195 		pvscsi_hcb_put(sc, hcb);
1196 		xpt_done(ccb);
1197 		return;
1198 	}
1199 
1200 	e->flags = 0;
1201 	op = 0;
1202 	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
1203 	case CAM_DIR_NONE:
1204 		e->flags |= PVSCSI_FLAG_CMD_DIR_NONE;
1205 		break;
1206 	case CAM_DIR_IN:
1207 		e->flags |= PVSCSI_FLAG_CMD_DIR_TOHOST;
1208 		op = BUS_DMASYNC_PREREAD;
1209 		break;
1210 	case CAM_DIR_OUT:
1211 		e->flags |= PVSCSI_FLAG_CMD_DIR_TODEVICE;
1212 		op = BUS_DMASYNC_PREWRITE;
1213 		break;
1214 	case CAM_DIR_BOTH:
1215 		/* TODO: does this need handling? */
1216 		break;
1217 	}
1218 
1219 	if (nseg != 0) {
1220 		if (nseg > 1) {
1221 			int i;
1222 			struct pvscsi_sg_element *sge;
1223 
1224 			KASSERT(nseg <= PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT,
1225 			    ("too many sg segments"));
1226 
1227 			sge = hcb->sg_list->sge;
1228 			e->flags |= PVSCSI_FLAG_CMD_WITH_SG_LIST;
1229 
1230 			for (i = 0; i < nseg; ++i) {
1231 				sge[i].addr = segs[i].ds_addr;
1232 				sge[i].length = segs[i].ds_len;
1233 				sge[i].flags = 0;
1234 			}
1235 
1236 			e->data_addr = hcb->sg_list_paddr;
1237 		} else {
1238 			e->data_addr = segs->ds_addr;
1239 		}
1240 
1241 		bus_dmamap_sync(sc->buffer_dmat, hcb->dma_map, op);
1242 	} else {
1243 		e->data_addr = 0;
1244 	}
1245 
1246 	cdb0 = e->cdb[0];
1247 	ccb->ccb_h.status |= CAM_SIM_QUEUED;
1248 
1249 	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
1250 		callout_reset_sbt(&hcb->callout, ccb->ccb_h.timeout * SBT_1MS,
1251 		    0, pvscsi_timeout, hcb, 0);
1252 	}
1253 
1254 	mb();
1255 	s->req_prod_idx++;
1256 	pvscsi_kick_io(sc, cdb0);
1257 }
1258 
1259 static void
1260 pvscsi_action(struct cam_sim *sim, union ccb *ccb)
1261 {
1262 	struct pvscsi_softc *sc;
1263 	struct ccb_hdr *ccb_h;
1264 
1265 	sc = cam_sim_softc(sim);
1266 	ccb_h = &ccb->ccb_h;
1267 
1268 	mtx_assert(&sc->lock, MA_OWNED);
1269 
1270 	switch (ccb_h->func_code) {
1271 	case XPT_SCSI_IO:
1272 	{
1273 		struct ccb_scsiio *csio;
1274 		uint32_t req_num_entries_log2;
1275 		struct pvscsi_ring_req_desc *ring;
1276 		struct pvscsi_ring_req_desc *e;
1277 		struct pvscsi_rings_state *s;
1278 		struct pvscsi_hcb *hcb;
1279 
1280 		csio = &ccb->csio;
1281 		ring = sc->req_ring;
1282 		s = sc->rings_state;
1283 
1284 		hcb = NULL;
1285 
1286 		/*
1287 		 * Check if it was completed already (such as aborted
1288 		 * by upper layers)
1289 		 */
1290 		if ((ccb_h->status & CAM_STATUS_MASK) != CAM_REQ_INPROG) {
1291 			xpt_done(ccb);
1292 			return;
1293 		}
1294 
1295 		req_num_entries_log2 = s->req_num_entries_log2;
1296 
1297 		if (s->req_prod_idx - s->cmp_cons_idx >=
1298 		    (1 << req_num_entries_log2)) {
1299 			device_printf(sc->dev,
1300 			    "Not enough room on completion ring.\n");
1301 			pvscsi_freeze(sc);
1302 			ccb_h->status = CAM_REQUEUE_REQ;
1303 			goto finish_ccb;
1304 		}
1305 
1306 		hcb = pvscsi_hcb_get(sc);
1307 		if (hcb == NULL) {
1308 			device_printf(sc->dev, "No free hcbs.\n");
1309 			pvscsi_freeze(sc);
1310 			ccb_h->status = CAM_REQUEUE_REQ;
1311 			goto finish_ccb;
1312 		}
1313 
1314 		hcb->ccb = ccb;
1315 		ccb_h->ccb_pvscsi_hcb = hcb;
1316 		ccb_h->ccb_pvscsi_sc = sc;
1317 
1318 		if (csio->cdb_len > sizeof(e->cdb)) {
1319 			DEBUG_PRINTF(2, sc->dev, "cdb length %u too large\n",
1320 			    csio->cdb_len);
1321 			ccb_h->status = CAM_REQ_INVALID;
1322 			goto finish_ccb;
1323 		}
1324 
1325 		if (ccb_h->flags & CAM_CDB_PHYS) {
1326 			DEBUG_PRINTF(2, sc->dev,
1327 			    "CAM_CDB_PHYS not implemented\n");
1328 			ccb_h->status = CAM_REQ_INVALID;
1329 			goto finish_ccb;
1330 		}
1331 
1332 		e = ring + (s->req_prod_idx & MASK(req_num_entries_log2));
1333 
1334 		e->bus = cam_sim_bus(sim);
1335 		e->target = ccb_h->target_id;
1336 		memset(e->lun, 0, sizeof(e->lun));
1337 		e->lun[1] = ccb_h->target_lun;
1338 		e->data_addr = 0;
1339 		e->data_len = csio->dxfer_len;
1340 		e->vcpu_hint = curcpu;
1341 
1342 		e->cdb_len = csio->cdb_len;
1343 		memcpy(e->cdb, scsiio_cdb_ptr(csio), csio->cdb_len);
1344 
1345 		e->sense_addr = 0;
1346 		e->sense_len = csio->sense_len;
1347 		if (e->sense_len > 0) {
1348 			e->sense_addr = hcb->sense_buffer_paddr;
1349 		}
1350 
1351 		e->tag = MSG_SIMPLE_Q_TAG;
1352 		if (ccb_h->flags & CAM_TAG_ACTION_VALID) {
1353 			e->tag = csio->tag_action;
1354 		}
1355 
1356 		e->context = pvscsi_hcb_to_context(sc, hcb);
1357 		hcb->e = e;
1358 
1359 		DEBUG_PRINTF(3, sc->dev,
1360 		    " queuing command %02x context %llx\n", e->cdb[0],
1361 		    (unsigned long long)e->context);
1362 		bus_dmamap_load_ccb(sc->buffer_dmat, hcb->dma_map, ccb,
1363 		    pvscsi_execute_ccb, hcb, 0);
1364 		break;
1365 
1366 finish_ccb:
1367 		if (hcb != NULL) {
1368 			pvscsi_hcb_put(sc, hcb);
1369 		}
1370 		xpt_done(ccb);
1371 	} break;
1372 	case XPT_ABORT:
1373 	{
1374 		struct pvscsi_hcb *abort_hcb;
1375 		union ccb *abort_ccb;
1376 
1377 		abort_ccb = ccb->cab.abort_ccb;
1378 		abort_hcb = abort_ccb->ccb_h.ccb_pvscsi_hcb;
1379 
1380 		if (abort_hcb->ccb != NULL && abort_hcb->ccb == abort_ccb) {
1381 			if (abort_ccb->ccb_h.func_code == XPT_SCSI_IO) {
1382 				pvscsi_abort(sc, ccb_h->target_id, abort_ccb);
1383 				ccb_h->status = CAM_REQ_CMP;
1384 			} else {
1385 				ccb_h->status = CAM_UA_ABORT;
1386 			}
1387 		} else {
1388 			device_printf(sc->dev,
1389 			    "Could not find hcb for ccb %p (tgt %u)\n",
1390 			    ccb, ccb_h->target_id);
1391 			ccb_h->status = CAM_REQ_CMP;
1392 		}
1393 		xpt_done(ccb);
1394 	} break;
1395 	case XPT_RESET_DEV:
1396 	{
1397 		pvscsi_device_reset(sc, ccb_h->target_id);
1398 		ccb_h->status = CAM_REQ_CMP;
1399 		xpt_done(ccb);
1400 	} break;
1401 	case XPT_RESET_BUS:
1402 	{
1403 		pvscsi_bus_reset(sc);
1404 		ccb_h->status = CAM_REQ_CMP;
1405 		xpt_done(ccb);
1406 	} break;
1407 	case XPT_PATH_INQ:
1408 	{
1409 		struct ccb_pathinq *cpi;
1410 
1411 		cpi = &ccb->cpi;
1412 
1413 		cpi->version_num = 1;
1414 		cpi->hba_inquiry = PI_TAG_ABLE;
1415 		cpi->target_sprt = 0;
1416 		cpi->hba_misc = PIM_NOBUSRESET | PIM_UNMAPPED;
1417 		cpi->hba_eng_cnt = 0;
1418 		/* cpi->vuhba_flags = 0; */
1419 		cpi->max_target = sc->max_targets;
1420 		cpi->max_lun = 0;
1421 		cpi->async_flags = 0;
1422 		cpi->hpath_id = 0;
1423 		cpi->unit_number = cam_sim_unit(sim);
1424 		cpi->bus_id = cam_sim_bus(sim);
1425 		cpi->initiator_id = 7;
1426 		cpi->base_transfer_speed = 750000;
1427 		strlcpy(cpi->sim_vid, "VMware", SIM_IDLEN);
1428 		strlcpy(cpi->hba_vid, "VMware", HBA_IDLEN);
1429 		strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
1430 		cpi->maxio = PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT * PAGE_SIZE;
1431 		cpi->protocol = PROTO_SCSI;
1432 		cpi->protocol_version = SCSI_REV_SPC2;
1433 		cpi->transport = XPORT_SAS;
1434 		cpi->transport_version = 0;
1435 
1436 		ccb_h->status = CAM_REQ_CMP;
1437 		xpt_done(ccb);
1438 	} break;
1439 	case XPT_GET_TRAN_SETTINGS:
1440 	{
1441 		struct ccb_trans_settings *cts;
1442 
1443 		cts = &ccb->cts;
1444 
1445 		cts->protocol = PROTO_SCSI;
1446 		cts->protocol_version = SCSI_REV_SPC2;
1447 		cts->transport = XPORT_SAS;
1448 		cts->transport_version = 0;
1449 
1450 		cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
1451 		cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
1452 
1453 		ccb_h->status = CAM_REQ_CMP;
1454 		xpt_done(ccb);
1455 	} break;
1456 	case XPT_CALC_GEOMETRY:
1457 	{
1458 		cam_calc_geometry(&ccb->ccg, 1);
1459 		xpt_done(ccb);
1460 	} break;
1461 	default:
1462 		ccb_h->status = CAM_REQ_INVALID;
1463 		xpt_done(ccb);
1464 		break;
1465 	}
1466 }
1467 
1468 static void
1469 pvscsi_free_interrupts(struct pvscsi_softc *sc)
1470 {
1471 
1472 	if (sc->irq_handler != NULL) {
1473 		bus_teardown_intr(sc->dev, sc->irq_res, sc->irq_handler);
1474 	}
1475 	if (sc->irq_res != NULL) {
1476 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_id,
1477 		    sc->irq_res);
1478 	}
1479 	if (sc->use_msi_or_msix) {
1480 		pci_release_msi(sc->dev);
1481 	}
1482 }
1483 
1484 static int
1485 pvscsi_setup_interrupts(struct pvscsi_softc *sc)
1486 {
1487 	int error;
1488 	int flags;
1489 	int use_msix;
1490 	int use_msi;
1491 	int count;
1492 
1493 	sc->use_msi_or_msix = 0;
1494 
1495 	use_msix = pvscsi_get_tunable(sc, "use_msix", pvscsi_use_msix);
1496 	use_msi = pvscsi_get_tunable(sc, "use_msi", pvscsi_use_msi);
1497 
1498 	if (use_msix && pci_msix_count(sc->dev) > 0) {
1499 		count = 1;
1500 		if (pci_alloc_msix(sc->dev, &count) == 0 && count == 1) {
1501 			sc->use_msi_or_msix = 1;
1502 			device_printf(sc->dev, "Interrupt: MSI-X\n");
1503 		} else {
1504 			pci_release_msi(sc->dev);
1505 		}
1506 	}
1507 
1508 	if (sc->use_msi_or_msix == 0 && use_msi && pci_msi_count(sc->dev) > 0) {
1509 		count = 1;
1510 		if (pci_alloc_msi(sc->dev, &count) == 0 && count == 1) {
1511 			sc->use_msi_or_msix = 1;
1512 			device_printf(sc->dev, "Interrupt: MSI\n");
1513 		} else {
1514 			pci_release_msi(sc->dev);
1515 		}
1516 	}
1517 
1518 	flags = RF_ACTIVE;
1519 	if (sc->use_msi_or_msix) {
1520 		sc->irq_id = 1;
1521 	} else {
1522 		device_printf(sc->dev, "Interrupt: INT\n");
1523 		sc->irq_id = 0;
1524 		flags |= RF_SHAREABLE;
1525 	}
1526 
1527 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_id,
1528 	    flags);
1529 	if (sc->irq_res == NULL) {
1530 		device_printf(sc->dev, "IRQ allocation failed\n");
1531 		if (sc->use_msi_or_msix) {
1532 			pci_release_msi(sc->dev);
1533 		}
1534 		return (ENXIO);
1535 	}
1536 
1537 	error = bus_setup_intr(sc->dev, sc->irq_res,
1538 	    INTR_TYPE_CAM | INTR_MPSAFE, NULL, pvscsi_intr, sc,
1539 	    &sc->irq_handler);
1540 	if (error) {
1541 		device_printf(sc->dev, "IRQ handler setup failed\n");
1542 		pvscsi_free_interrupts(sc);
1543 		return (error);
1544 	}
1545 
1546 	return (0);
1547 }
1548 
1549 static void
1550 pvscsi_free_all(struct pvscsi_softc *sc)
1551 {
1552 
1553 	if (sc->sim) {
1554 		int32_t status;
1555 
1556 		if (sc->bus_path) {
1557 			xpt_free_path(sc->bus_path);
1558 		}
1559 
1560 		status = xpt_bus_deregister(cam_sim_path(sc->sim));
1561 		if (status != CAM_REQ_CMP) {
1562 			device_printf(sc->dev,
1563 			    "Error deregistering bus, status=%d\n", status);
1564 		}
1565 
1566 		cam_sim_free(sc->sim, TRUE);
1567 	}
1568 
1569 	pvscsi_dma_free_per_hcb(sc, sc->hcb_cnt);
1570 
1571 	if (sc->hcbs) {
1572 		free(sc->hcbs, M_PVSCSI);
1573 	}
1574 
1575 	pvscsi_free_rings(sc);
1576 
1577 	pvscsi_free_interrupts(sc);
1578 
1579 	if (sc->buffer_dmat != NULL) {
1580 		bus_dma_tag_destroy(sc->buffer_dmat);
1581 	}
1582 
1583 	if (sc->parent_dmat != NULL) {
1584 		bus_dma_tag_destroy(sc->parent_dmat);
1585 	}
1586 
1587 	if (sc->mm_res != NULL) {
1588 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->mm_rid,
1589 		    sc->mm_res);
1590 	}
1591 }
1592 
1593 static int
1594 pvscsi_attach(device_t dev)
1595 {
1596 	struct pvscsi_softc *sc;
1597 	int rid;
1598 	int barid;
1599 	int error;
1600 	int max_queue_depth;
1601 	int adapter_queue_size;
1602 	struct cam_devq *devq;
1603 
1604 	sc = device_get_softc(dev);
1605 	sc->dev = dev;
1606 
1607 	mtx_init(&sc->lock, "pvscsi", NULL, MTX_DEF);
1608 
1609 	pci_enable_busmaster(dev);
1610 
1611 	sc->mm_rid = -1;
1612 	for (barid = 0; barid <= PCIR_MAX_BAR_0; ++barid) {
1613 		rid = PCIR_BAR(barid);
1614 
1615 		sc->mm_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
1616 		    RF_ACTIVE);
1617 		if (sc->mm_res != NULL) {
1618 			sc->mm_rid = rid;
1619 			break;
1620 		}
1621 	}
1622 
1623 	if (sc->mm_res == NULL) {
1624 		device_printf(dev, "could not map device memory\n");
1625 		return (ENXIO);
1626 	}
1627 
1628 	error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
1629 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE,
1630 	    BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, 0, NULL, NULL,
1631 	    &sc->parent_dmat);
1632 	if (error) {
1633 		device_printf(dev, "parent dma tag create failure, error %d\n",
1634 		    error);
1635 		pvscsi_free_all(sc);
1636 		return (ENXIO);
1637 	}
1638 
1639 	error = bus_dma_tag_create(sc->parent_dmat, 1, 0,
1640 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1641 	    PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT * PAGE_SIZE,
1642 	    PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT, PAGE_SIZE, BUS_DMA_ALLOCNOW,
1643 	    NULL, NULL, &sc->buffer_dmat);
1644 	if (error) {
1645 		device_printf(dev, "parent dma tag create failure, error %d\n",
1646 		    error);
1647 		pvscsi_free_all(sc);
1648 		return (ENXIO);
1649 	}
1650 
1651 	error = pvscsi_setup_interrupts(sc);
1652 	if (error) {
1653 		device_printf(dev, "Interrupt setup failed\n");
1654 		pvscsi_free_all(sc);
1655 		return (error);
1656 	}
1657 
1658 	sc->max_targets = pvscsi_get_max_targets(sc);
1659 
1660 	sc->use_msg = pvscsi_get_tunable(sc, "use_msg", pvscsi_use_msg) &&
1661 	    pvscsi_hw_supports_msg(sc);
1662 	sc->msg_ring_num_pages = sc->use_msg ? 1 : 0;
1663 
1664 	sc->req_ring_num_pages = pvscsi_get_tunable(sc, "request_ring_pages",
1665 	    pvscsi_request_ring_pages);
1666 	if (sc->req_ring_num_pages <= 0) {
1667 		if (sc->max_targets <= 16) {
1668 			sc->req_ring_num_pages =
1669 			    PVSCSI_DEFAULT_NUM_PAGES_REQ_RING;
1670 		} else {
1671 			sc->req_ring_num_pages = PVSCSI_MAX_NUM_PAGES_REQ_RING;
1672 		}
1673 	} else if (sc->req_ring_num_pages > PVSCSI_MAX_NUM_PAGES_REQ_RING) {
1674 		sc->req_ring_num_pages = PVSCSI_MAX_NUM_PAGES_REQ_RING;
1675 	}
1676 	sc->cmp_ring_num_pages = sc->req_ring_num_pages;
1677 
1678 	max_queue_depth = pvscsi_get_tunable(sc, "max_queue_depth",
1679 	    pvscsi_max_queue_depth);
1680 
1681 	adapter_queue_size = (sc->req_ring_num_pages * PAGE_SIZE) /
1682 	    sizeof(struct pvscsi_ring_req_desc);
1683 	if (max_queue_depth > 0) {
1684 		adapter_queue_size = MIN(adapter_queue_size, max_queue_depth);
1685 	}
1686 	adapter_queue_size = MIN(adapter_queue_size,
1687 	    PVSCSI_MAX_REQ_QUEUE_DEPTH);
1688 
1689 	device_printf(sc->dev, "Use Msg: %d\n", sc->use_msg);
1690 	device_printf(sc->dev, "REQ num pages: %d\n", sc->req_ring_num_pages);
1691 	device_printf(sc->dev, "CMP num pages: %d\n", sc->cmp_ring_num_pages);
1692 	device_printf(sc->dev, "MSG num pages: %d\n", sc->msg_ring_num_pages);
1693 	device_printf(sc->dev, "Queue size: %d\n", adapter_queue_size);
1694 
1695 	if (pvscsi_allocate_rings(sc)) {
1696 		device_printf(dev, "ring allocation failed\n");
1697 		pvscsi_free_all(sc);
1698 		return (ENXIO);
1699 	}
1700 
1701 	sc->hcb_cnt = adapter_queue_size;
1702 	sc->hcbs = malloc(sc->hcb_cnt * sizeof(*sc->hcbs), M_PVSCSI,
1703 	    M_NOWAIT | M_ZERO);
1704 	if (sc->hcbs == NULL) {
1705 		device_printf(dev, "error allocating hcb array\n");
1706 		pvscsi_free_all(sc);
1707 		return (ENXIO);
1708 	}
1709 
1710 	if (pvscsi_dma_alloc_per_hcb(sc)) {
1711 		device_printf(dev, "error allocating per hcb dma memory\n");
1712 		pvscsi_free_all(sc);
1713 		return (ENXIO);
1714 	}
1715 
1716 	pvscsi_adapter_reset(sc);
1717 
1718 	devq = cam_simq_alloc(adapter_queue_size);
1719 	if (devq == NULL) {
1720 		device_printf(dev, "cam devq alloc failed\n");
1721 		pvscsi_free_all(sc);
1722 		return (ENXIO);
1723 	}
1724 
1725 	sc->sim = cam_sim_alloc(pvscsi_action, pvscsi_poll, "pvscsi", sc,
1726 	    device_get_unit(dev), &sc->lock, 1, adapter_queue_size, devq);
1727 	if (sc->sim == NULL) {
1728 		device_printf(dev, "cam sim alloc failed\n");
1729 		cam_simq_free(devq);
1730 		pvscsi_free_all(sc);
1731 		return (ENXIO);
1732 	}
1733 
1734 	mtx_lock(&sc->lock);
1735 
1736 	if (xpt_bus_register(sc->sim, dev, 0) != CAM_SUCCESS) {
1737 		device_printf(dev, "xpt bus register failed\n");
1738 		pvscsi_free_all(sc);
1739 		mtx_unlock(&sc->lock);
1740 		return (ENXIO);
1741 	}
1742 
1743 	if (xpt_create_path(&sc->bus_path, NULL, cam_sim_path(sc->sim),
1744 	    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
1745 		device_printf(dev, "xpt create path failed\n");
1746 		pvscsi_free_all(sc);
1747 		mtx_unlock(&sc->lock);
1748 		return (ENXIO);
1749 	}
1750 
1751 	pvscsi_setup_rings(sc);
1752 	if (sc->use_msg) {
1753 		pvscsi_setup_msg_ring(sc);
1754 	}
1755 
1756 	sc->use_req_call_threshold = pvscsi_setup_req_call(sc, 1);
1757 
1758 	pvscsi_intr_enable(sc);
1759 
1760 	mtx_unlock(&sc->lock);
1761 
1762 	return (0);
1763 }
1764 
1765 static int
1766 pvscsi_detach(device_t dev)
1767 {
1768 	struct pvscsi_softc *sc;
1769 
1770 	sc = device_get_softc(dev);
1771 
1772 	pvscsi_intr_disable(sc);
1773 	pvscsi_adapter_reset(sc);
1774 
1775 	if (sc->irq_handler != NULL) {
1776 		bus_teardown_intr(dev, sc->irq_res, sc->irq_handler);
1777 	}
1778 
1779 	mtx_lock(&sc->lock);
1780 	pvscsi_free_all(sc);
1781 	mtx_unlock(&sc->lock);
1782 
1783 	mtx_destroy(&sc->lock);
1784 
1785 	return (0);
1786 }
1787 
1788 static device_method_t pvscsi_methods[] = {
1789 	DEVMETHOD(device_probe, pvscsi_probe),
1790 	DEVMETHOD(device_shutdown, pvscsi_shutdown),
1791 	DEVMETHOD(device_attach, pvscsi_attach),
1792 	DEVMETHOD(device_detach, pvscsi_detach),
1793 	DEVMETHOD_END
1794 };
1795 
1796 static driver_t pvscsi_driver = {
1797 	"pvscsi", pvscsi_methods, sizeof(struct pvscsi_softc)
1798 };
1799 
1800 static devclass_t pvscsi_devclass;
1801 DRIVER_MODULE(pvscsi, pci, pvscsi_driver, pvscsi_devclass, 0, 0);
1802 
1803 MODULE_DEPEND(pvscsi, pci, 1, 1, 1);
1804 MODULE_DEPEND(pvscsi, cam, 1, 1, 1);
1805