xref: /freebsd/sys/dev/vmware/pvscsi/pvscsi.c (revision 031beb4e239bfce798af17f5fe8dba8bcaf13d99)
1 /*-
2  * Copyright (c) 2018 VMware, Inc.
3  *
4  * SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0)
5  */
6 
7 #include <sys/cdefs.h>
8 #include <sys/param.h>
9 #include <sys/bus.h>
10 #include <sys/errno.h>
11 #include <sys/kernel.h>
12 #include <sys/malloc.h>
13 #include <sys/module.h>
14 #include <sys/queue.h>
15 #include <sys/rman.h>
16 #include <sys/sysctl.h>
17 #include <sys/systm.h>
18 
19 #include <machine/bus.h>
20 #include <machine/resource.h>
21 
22 #include <dev/pci/pcireg.h>
23 #include <dev/pci/pcivar.h>
24 
25 #include <cam/cam.h>
26 #include <cam/cam_ccb.h>
27 #include <cam/cam_debug.h>
28 #include <cam/cam_sim.h>
29 #include <cam/cam_xpt_sim.h>
30 #include <cam/scsi/scsi_message.h>
31 
32 #include "pvscsi.h"
33 
34 #define	PVSCSI_DEFAULT_NUM_PAGES_REQ_RING	8
35 #define	PVSCSI_SENSE_LENGTH			256
36 
37 MALLOC_DECLARE(M_PVSCSI);
38 MALLOC_DEFINE(M_PVSCSI, "pvscsi", "PVSCSI memory");
39 
40 #ifdef PVSCSI_DEBUG_LOGGING
41 #define	DEBUG_PRINTF(level, dev, fmt, ...)				\
42 	do {								\
43 		if (pvscsi_log_level >= (level)) {			\
44 			device_printf((dev), (fmt), ##__VA_ARGS__);	\
45 		}							\
46 	} while(0)
47 #else
48 #define DEBUG_PRINTF(level, dev, fmt, ...)
49 #endif /* PVSCSI_DEBUG_LOGGING */
50 
51 #define	ccb_pvscsi_hcb	spriv_ptr0
52 #define	ccb_pvscsi_sc	spriv_ptr1
53 
54 struct pvscsi_softc;
55 struct pvscsi_hcb;
56 struct pvscsi_dma;
57 
58 static inline uint32_t pvscsi_reg_read(struct pvscsi_softc *sc,
59     uint32_t offset);
60 static inline void pvscsi_reg_write(struct pvscsi_softc *sc, uint32_t offset,
61     uint32_t val);
62 static inline uint32_t pvscsi_read_intr_status(struct pvscsi_softc *sc);
63 static inline void pvscsi_write_intr_status(struct pvscsi_softc *sc,
64     uint32_t val);
65 static inline void pvscsi_intr_enable(struct pvscsi_softc *sc);
66 static inline void pvscsi_intr_disable(struct pvscsi_softc *sc);
67 static void pvscsi_kick_io(struct pvscsi_softc *sc, uint8_t cdb0);
68 static void pvscsi_write_cmd(struct pvscsi_softc *sc, uint32_t cmd, void *data,
69     uint32_t len);
70 static uint32_t pvscsi_get_max_targets(struct pvscsi_softc *sc);
71 static int pvscsi_setup_req_call(struct pvscsi_softc *sc, uint32_t enable);
72 static void pvscsi_setup_rings(struct pvscsi_softc *sc);
73 static void pvscsi_setup_msg_ring(struct pvscsi_softc *sc);
74 static int pvscsi_hw_supports_msg(struct pvscsi_softc *sc);
75 
76 static void pvscsi_timeout(void *arg);
77 static void pvscsi_freeze(struct pvscsi_softc *sc);
78 static void pvscsi_adapter_reset(struct pvscsi_softc *sc);
79 static void pvscsi_bus_reset(struct pvscsi_softc *sc);
80 static void pvscsi_device_reset(struct pvscsi_softc *sc, uint32_t target);
81 static void pvscsi_abort(struct pvscsi_softc *sc, uint32_t target,
82     union ccb *ccb);
83 
84 static void pvscsi_process_completion(struct pvscsi_softc *sc,
85     struct pvscsi_ring_cmp_desc *e);
86 static void pvscsi_process_cmp_ring(struct pvscsi_softc *sc);
87 static void pvscsi_process_msg(struct pvscsi_softc *sc,
88     struct pvscsi_ring_msg_desc *e);
89 static void pvscsi_process_msg_ring(struct pvscsi_softc *sc);
90 
91 static void pvscsi_intr_locked(struct pvscsi_softc *sc);
92 static void pvscsi_intr(void *xsc);
93 static void pvscsi_poll(struct cam_sim *sim);
94 
95 static void pvscsi_execute_ccb(void *arg, bus_dma_segment_t *segs, int nseg,
96     int error);
97 static void pvscsi_action(struct cam_sim *sim, union ccb *ccb);
98 
99 static inline uint64_t pvscsi_hcb_to_context(struct pvscsi_softc *sc,
100     struct pvscsi_hcb *hcb);
101 static inline struct pvscsi_hcb* pvscsi_context_to_hcb(struct pvscsi_softc *sc,
102     uint64_t context);
103 static struct pvscsi_hcb * pvscsi_hcb_get(struct pvscsi_softc *sc);
104 static void pvscsi_hcb_put(struct pvscsi_softc *sc, struct pvscsi_hcb *hcb);
105 
106 static void pvscsi_dma_cb(void *arg, bus_dma_segment_t *segs, int nseg,
107     int error);
108 static void pvscsi_dma_free(struct pvscsi_softc *sc, struct pvscsi_dma *dma);
109 static int pvscsi_dma_alloc(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
110     bus_size_t size, bus_size_t alignment);
111 static int pvscsi_dma_alloc_ppns(struct pvscsi_softc *sc,
112     struct pvscsi_dma *dma, uint64_t *ppn_list, uint32_t num_pages);
113 static void pvscsi_dma_free_per_hcb(struct pvscsi_softc *sc,
114     uint32_t hcbs_allocated);
115 static int pvscsi_dma_alloc_per_hcb(struct pvscsi_softc *sc);
116 static void pvscsi_free_rings(struct pvscsi_softc *sc);
117 static int pvscsi_allocate_rings(struct pvscsi_softc *sc);
118 static void pvscsi_free_interrupts(struct pvscsi_softc *sc);
119 static int pvscsi_setup_interrupts(struct pvscsi_softc *sc);
120 static void pvscsi_free_all(struct pvscsi_softc *sc);
121 
122 static int pvscsi_attach(device_t dev);
123 static int pvscsi_detach(device_t dev);
124 static int pvscsi_probe(device_t dev);
125 static int pvscsi_shutdown(device_t dev);
126 static int pvscsi_get_tunable(struct pvscsi_softc *sc, char *name, int value);
127 
128 #ifdef PVSCSI_DEBUG_LOGGING
129 static int pvscsi_log_level = 0;
130 static SYSCTL_NODE(_hw, OID_AUTO, pvscsi, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
131     "PVSCSI driver parameters");
132 SYSCTL_INT(_hw_pvscsi, OID_AUTO, log_level, CTLFLAG_RWTUN, &pvscsi_log_level,
133     0, "PVSCSI debug log level");
134 #endif
135 
136 static int pvscsi_request_ring_pages = 0;
137 TUNABLE_INT("hw.pvscsi.request_ring_pages", &pvscsi_request_ring_pages);
138 
139 static int pvscsi_use_msg = 1;
140 TUNABLE_INT("hw.pvscsi.use_msg", &pvscsi_use_msg);
141 
142 static int pvscsi_use_msi = 1;
143 TUNABLE_INT("hw.pvscsi.use_msi", &pvscsi_use_msi);
144 
145 static int pvscsi_use_msix = 1;
146 TUNABLE_INT("hw.pvscsi.use_msix", &pvscsi_use_msix);
147 
148 static int pvscsi_use_req_call_threshold = 1;
149 TUNABLE_INT("hw.pvscsi.use_req_call_threshold", &pvscsi_use_req_call_threshold);
150 
151 static int pvscsi_max_queue_depth = 0;
152 TUNABLE_INT("hw.pvscsi.max_queue_depth", &pvscsi_max_queue_depth);
153 
154 struct pvscsi_sg_list {
155 	struct pvscsi_sg_element sge[PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT];
156 };
157 
158 #define	PVSCSI_ABORT_TIMEOUT	2
159 #define	PVSCSI_RESET_TIMEOUT	10
160 
161 #define	PVSCSI_HCB_NONE		0
162 #define	PVSCSI_HCB_ABORT	1
163 #define	PVSCSI_HCB_DEVICE_RESET	2
164 #define	PVSCSI_HCB_BUS_RESET	3
165 
166 struct pvscsi_hcb {
167 	union ccb			*ccb;
168 	struct pvscsi_ring_req_desc	*e;
169 	int				 recovery;
170 	SLIST_ENTRY(pvscsi_hcb)		 links;
171 
172 	struct callout			 callout;
173 	bus_dmamap_t			 dma_map;
174 	void				*sense_buffer;
175 	bus_addr_t			 sense_buffer_paddr;
176 	struct pvscsi_sg_list		*sg_list;
177 	bus_addr_t			 sg_list_paddr;
178 };
179 
180 struct pvscsi_dma
181 {
182 	bus_dma_tag_t	 tag;
183 	bus_dmamap_t	 map;
184 	void		*vaddr;
185 	bus_addr_t	 paddr;
186 	bus_size_t	 size;
187 };
188 
189 struct pvscsi_softc {
190 	device_t		 dev;
191 	struct mtx		 lock;
192 	struct cam_sim		*sim;
193 	struct cam_path		*bus_path;
194 	int			 frozen;
195 	struct pvscsi_rings_state	*rings_state;
196 	struct pvscsi_ring_req_desc	*req_ring;
197 	struct pvscsi_ring_cmp_desc	*cmp_ring;
198 	struct pvscsi_ring_msg_desc	*msg_ring;
199 	uint32_t		 hcb_cnt;
200 	struct pvscsi_hcb	*hcbs;
201 	SLIST_HEAD(, pvscsi_hcb)	free_list;
202 	bus_dma_tag_t		parent_dmat;
203 	bus_dma_tag_t		buffer_dmat;
204 
205 	bool		 use_msg;
206 	uint32_t	 max_targets;
207 	int		 mm_rid;
208 	struct resource	*mm_res;
209 	int		 irq_id;
210 	struct resource	*irq_res;
211 	void		*irq_handler;
212 	int		 use_req_call_threshold;
213 	int		 use_msi_or_msix;
214 
215 	uint64_t	rings_state_ppn;
216 	uint32_t	req_ring_num_pages;
217 	uint64_t	req_ring_ppn[PVSCSI_MAX_NUM_PAGES_REQ_RING];
218 	uint32_t	cmp_ring_num_pages;
219 	uint64_t	cmp_ring_ppn[PVSCSI_MAX_NUM_PAGES_CMP_RING];
220 	uint32_t	msg_ring_num_pages;
221 	uint64_t	msg_ring_ppn[PVSCSI_MAX_NUM_PAGES_MSG_RING];
222 
223 	struct	pvscsi_dma rings_state_dma;
224 	struct	pvscsi_dma req_ring_dma;
225 	struct	pvscsi_dma cmp_ring_dma;
226 	struct	pvscsi_dma msg_ring_dma;
227 
228 	struct	pvscsi_dma sg_list_dma;
229 	struct	pvscsi_dma sense_buffer_dma;
230 };
231 
232 static int pvscsi_get_tunable(struct pvscsi_softc *sc, char *name, int value)
233 {
234 	char cfg[64];
235 
236 	snprintf(cfg, sizeof(cfg), "hw.pvscsi.%d.%s", device_get_unit(sc->dev),
237 	    name);
238 	TUNABLE_INT_FETCH(cfg, &value);
239 
240 	return (value);
241 }
242 
243 static void
244 pvscsi_freeze(struct pvscsi_softc *sc)
245 {
246 
247 	if (!sc->frozen) {
248 		xpt_freeze_simq(sc->sim, 1);
249 		sc->frozen = 1;
250 	}
251 }
252 
253 static inline uint32_t
254 pvscsi_reg_read(struct pvscsi_softc *sc, uint32_t offset)
255 {
256 
257 	return (bus_read_4(sc->mm_res, offset));
258 }
259 
260 static inline void
261 pvscsi_reg_write(struct pvscsi_softc *sc, uint32_t offset, uint32_t val)
262 {
263 
264 	bus_write_4(sc->mm_res, offset, val);
265 }
266 
267 static inline uint32_t
268 pvscsi_read_intr_status(struct pvscsi_softc *sc)
269 {
270 
271 	return (pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_INTR_STATUS));
272 }
273 
274 static inline void
275 pvscsi_write_intr_status(struct pvscsi_softc *sc, uint32_t val)
276 {
277 
278 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_STATUS, val);
279 }
280 
281 static inline void
282 pvscsi_intr_enable(struct pvscsi_softc *sc)
283 {
284 	uint32_t mask;
285 
286 	mask = PVSCSI_INTR_CMPL_MASK;
287 	if (sc->use_msg) {
288 		mask |= PVSCSI_INTR_MSG_MASK;
289 	}
290 
291 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_MASK, mask);
292 }
293 
294 static inline void
295 pvscsi_intr_disable(struct pvscsi_softc *sc)
296 {
297 
298 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_MASK, 0);
299 }
300 
301 static void
302 pvscsi_kick_io(struct pvscsi_softc *sc, uint8_t cdb0)
303 {
304 	struct pvscsi_rings_state *s;
305 
306 	if (cdb0 == READ_6  || cdb0 == READ_10  ||
307 	    cdb0 == READ_12  || cdb0 == READ_16 ||
308 	    cdb0 == WRITE_6 || cdb0 == WRITE_10 ||
309 	    cdb0 == WRITE_12 || cdb0 == WRITE_16) {
310 		s = sc->rings_state;
311 
312 		if (!sc->use_req_call_threshold ||
313 		    (s->req_prod_idx - s->req_cons_idx) >=
314 		     s->req_call_threshold) {
315 			pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_KICK_RW_IO, 0);
316 		}
317 	} else {
318 		pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_KICK_NON_RW_IO, 0);
319 	}
320 }
321 
322 static void
323 pvscsi_write_cmd(struct pvscsi_softc *sc, uint32_t cmd, void *data,
324 		 uint32_t len)
325 {
326 	uint32_t *data_ptr;
327 	int i;
328 
329 	KASSERT(len % sizeof(uint32_t) == 0,
330 		("command size not a multiple of 4"));
331 
332 	data_ptr = data;
333 	len /= sizeof(uint32_t);
334 
335 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND, cmd);
336 	for (i = 0; i < len; ++i) {
337 		pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND_DATA,
338 		   data_ptr[i]);
339 	}
340 }
341 
342 static inline uint64_t pvscsi_hcb_to_context(struct pvscsi_softc *sc,
343     struct pvscsi_hcb *hcb)
344 {
345 
346 	/* Offset by 1 because context must not be 0 */
347 	return (hcb - sc->hcbs + 1);
348 }
349 
350 static inline struct pvscsi_hcb* pvscsi_context_to_hcb(struct pvscsi_softc *sc,
351     uint64_t context)
352 {
353 
354 	return (sc->hcbs + (context - 1));
355 }
356 
357 static struct pvscsi_hcb *
358 pvscsi_hcb_get(struct pvscsi_softc *sc)
359 {
360 	struct pvscsi_hcb *hcb;
361 
362 	mtx_assert(&sc->lock, MA_OWNED);
363 
364 	hcb = SLIST_FIRST(&sc->free_list);
365 	if (hcb) {
366 		SLIST_REMOVE_HEAD(&sc->free_list, links);
367 	}
368 
369 	return (hcb);
370 }
371 
372 static void
373 pvscsi_hcb_put(struct pvscsi_softc *sc, struct pvscsi_hcb *hcb)
374 {
375 
376 	mtx_assert(&sc->lock, MA_OWNED);
377 	hcb->ccb = NULL;
378 	hcb->e = NULL;
379 	hcb->recovery = PVSCSI_HCB_NONE;
380 	SLIST_INSERT_HEAD(&sc->free_list, hcb, links);
381 }
382 
383 static uint32_t
384 pvscsi_get_max_targets(struct pvscsi_softc *sc)
385 {
386 	uint32_t max_targets;
387 
388 	pvscsi_write_cmd(sc, PVSCSI_CMD_GET_MAX_TARGETS, NULL, 0);
389 
390 	max_targets = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
391 
392 	if (max_targets == ~0) {
393 		max_targets = 16;
394 	}
395 
396 	return (max_targets);
397 }
398 
399 static int pvscsi_setup_req_call(struct pvscsi_softc *sc, uint32_t enable)
400 {
401 	uint32_t status;
402 	struct pvscsi_cmd_desc_setup_req_call cmd;
403 
404 	if (!pvscsi_get_tunable(sc, "pvscsi_use_req_call_threshold",
405 	    pvscsi_use_req_call_threshold)) {
406 		return (0);
407 	}
408 
409 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND,
410 	    PVSCSI_CMD_SETUP_REQCALLTHRESHOLD);
411 	status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
412 
413 	if (status != -1) {
414 		bzero(&cmd, sizeof(cmd));
415 		cmd.enable = enable;
416 		pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_REQCALLTHRESHOLD,
417 		    &cmd, sizeof(cmd));
418 		status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
419 
420 		return (status != 0);
421 	} else {
422 		return (0);
423 	}
424 }
425 
426 static void
427 pvscsi_dma_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
428 {
429 	bus_addr_t *dest;
430 
431 	KASSERT(nseg == 1, ("more than one segment"));
432 
433 	dest = arg;
434 
435 	if (!error) {
436 		*dest = segs->ds_addr;
437 	}
438 }
439 
440 static void
441 pvscsi_dma_free(struct pvscsi_softc *sc, struct pvscsi_dma *dma)
442 {
443 
444 	if (dma->tag != NULL) {
445 		if (dma->paddr != 0) {
446 			bus_dmamap_unload(dma->tag, dma->map);
447 		}
448 
449 		if (dma->vaddr != NULL) {
450 			bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
451 		}
452 
453 		bus_dma_tag_destroy(dma->tag);
454 	}
455 
456 	bzero(dma, sizeof(*dma));
457 }
458 
459 static int
460 pvscsi_dma_alloc(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
461     bus_size_t size, bus_size_t alignment)
462 {
463 	int error;
464 
465 	bzero(dma, sizeof(*dma));
466 
467 	error = bus_dma_tag_create(sc->parent_dmat, alignment, 0,
468 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, size, 1, size,
469 	    BUS_DMA_ALLOCNOW, NULL, NULL, &dma->tag);
470 	if (error) {
471 		device_printf(sc->dev, "error creating dma tag, error %d\n",
472 		    error);
473 		goto fail;
474 	}
475 
476 	error = bus_dmamem_alloc(dma->tag, &dma->vaddr,
477 	    BUS_DMA_NOWAIT | BUS_DMA_ZERO, &dma->map);
478 	if (error) {
479 		device_printf(sc->dev, "error allocating dma mem, error %d\n",
480 		    error);
481 		goto fail;
482 	}
483 
484 	error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size,
485 	    pvscsi_dma_cb, &dma->paddr, BUS_DMA_NOWAIT);
486 	if (error) {
487 		device_printf(sc->dev, "error mapping dma mam, error %d\n",
488 		    error);
489 		goto fail;
490 	}
491 
492 	dma->size = size;
493 
494 fail:
495 	if (error) {
496 		pvscsi_dma_free(sc, dma);
497 	}
498 	return (error);
499 }
500 
501 static int
502 pvscsi_dma_alloc_ppns(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
503     uint64_t *ppn_list, uint32_t num_pages)
504 {
505 	int error;
506 	uint32_t i;
507 	uint64_t ppn;
508 
509 	error = pvscsi_dma_alloc(sc, dma, num_pages * PAGE_SIZE, PAGE_SIZE);
510 	if (error) {
511 		device_printf(sc->dev, "Error allocating pages, error %d\n",
512 		    error);
513 		return (error);
514 	}
515 
516 	ppn = dma->paddr >> PAGE_SHIFT;
517 	for (i = 0; i < num_pages; i++) {
518 		ppn_list[i] = ppn + i;
519 	}
520 
521 	return (0);
522 }
523 
524 static void
525 pvscsi_dma_free_per_hcb(struct pvscsi_softc *sc, uint32_t hcbs_allocated)
526 {
527 	int i;
528 	int lock_owned;
529 	struct pvscsi_hcb *hcb;
530 
531 	lock_owned = mtx_owned(&sc->lock);
532 
533 	if (lock_owned) {
534 		mtx_unlock(&sc->lock);
535 	}
536 	for (i = 0; i < hcbs_allocated; ++i) {
537 		hcb = sc->hcbs + i;
538 		callout_drain(&hcb->callout);
539 	};
540 	if (lock_owned) {
541 		mtx_lock(&sc->lock);
542 	}
543 
544 	for (i = 0; i < hcbs_allocated; ++i) {
545 		hcb = sc->hcbs + i;
546 		bus_dmamap_destroy(sc->buffer_dmat, hcb->dma_map);
547 	};
548 
549 	pvscsi_dma_free(sc, &sc->sense_buffer_dma);
550 	pvscsi_dma_free(sc, &sc->sg_list_dma);
551 }
552 
553 static int
554 pvscsi_dma_alloc_per_hcb(struct pvscsi_softc *sc)
555 {
556 	int i;
557 	int error;
558 	struct pvscsi_hcb *hcb;
559 
560 	i = 0;
561 
562 	error = pvscsi_dma_alloc(sc, &sc->sg_list_dma,
563 	    sizeof(struct pvscsi_sg_list) * sc->hcb_cnt, 1);
564 	if (error) {
565 		device_printf(sc->dev,
566 		    "Error allocation sg list DMA memory, error %d\n", error);
567 		goto fail;
568 	}
569 
570 	error = pvscsi_dma_alloc(sc, &sc->sense_buffer_dma,
571 				 PVSCSI_SENSE_LENGTH * sc->hcb_cnt, 1);
572 	if (error) {
573 		device_printf(sc->dev,
574 		    "Error allocation sg list DMA memory, error %d\n", error);
575 		goto fail;
576 	}
577 
578 	for (i = 0; i < sc->hcb_cnt; ++i) {
579 		hcb = sc->hcbs + i;
580 
581 		error = bus_dmamap_create(sc->buffer_dmat, 0, &hcb->dma_map);
582 		if (error) {
583 			device_printf(sc->dev,
584 			    "Error creating dma map for hcb %d, error %d\n",
585 			    i, error);
586 			goto fail;
587 		}
588 
589 		hcb->sense_buffer =
590 		    (void *)((caddr_t)sc->sense_buffer_dma.vaddr +
591 		    PVSCSI_SENSE_LENGTH * i);
592 		hcb->sense_buffer_paddr =
593 		    sc->sense_buffer_dma.paddr + PVSCSI_SENSE_LENGTH * i;
594 
595 		hcb->sg_list =
596 		    (struct pvscsi_sg_list *)((caddr_t)sc->sg_list_dma.vaddr +
597 		    sizeof(struct pvscsi_sg_list) * i);
598 		hcb->sg_list_paddr =
599 		    sc->sg_list_dma.paddr + sizeof(struct pvscsi_sg_list) * i;
600 
601 		callout_init_mtx(&hcb->callout, &sc->lock, 0);
602 	}
603 
604 	SLIST_INIT(&sc->free_list);
605 	for (i = (sc->hcb_cnt - 1); i >= 0; --i) {
606 		hcb = sc->hcbs + i;
607 		SLIST_INSERT_HEAD(&sc->free_list, hcb, links);
608 	}
609 
610 fail:
611 	if (error) {
612 		pvscsi_dma_free_per_hcb(sc, i);
613 	}
614 
615 	return (error);
616 }
617 
618 static void
619 pvscsi_free_rings(struct pvscsi_softc *sc)
620 {
621 
622 	pvscsi_dma_free(sc, &sc->rings_state_dma);
623 	pvscsi_dma_free(sc, &sc->req_ring_dma);
624 	pvscsi_dma_free(sc, &sc->cmp_ring_dma);
625 	if (sc->use_msg) {
626 		pvscsi_dma_free(sc, &sc->msg_ring_dma);
627 	}
628 }
629 
630 static int
631 pvscsi_allocate_rings(struct pvscsi_softc *sc)
632 {
633 	int error;
634 
635 	error = pvscsi_dma_alloc_ppns(sc, &sc->rings_state_dma,
636 	    &sc->rings_state_ppn, 1);
637 	if (error) {
638 		device_printf(sc->dev,
639 		    "Error allocating rings state, error = %d\n", error);
640 		goto fail;
641 	}
642 	sc->rings_state = sc->rings_state_dma.vaddr;
643 
644 	error = pvscsi_dma_alloc_ppns(sc, &sc->req_ring_dma, sc->req_ring_ppn,
645 	    sc->req_ring_num_pages);
646 	if (error) {
647 		device_printf(sc->dev,
648 		    "Error allocating req ring pages, error = %d\n", error);
649 		goto fail;
650 	}
651 	sc->req_ring = sc->req_ring_dma.vaddr;
652 
653 	error = pvscsi_dma_alloc_ppns(sc, &sc->cmp_ring_dma, sc->cmp_ring_ppn,
654 	    sc->cmp_ring_num_pages);
655 	if (error) {
656 		device_printf(sc->dev,
657 		    "Error allocating cmp ring pages, error = %d\n", error);
658 		goto fail;
659 	}
660 	sc->cmp_ring = sc->cmp_ring_dma.vaddr;
661 
662 	sc->msg_ring = NULL;
663 	if (sc->use_msg) {
664 		error = pvscsi_dma_alloc_ppns(sc, &sc->msg_ring_dma,
665 		    sc->msg_ring_ppn, sc->msg_ring_num_pages);
666 		if (error) {
667 			device_printf(sc->dev,
668 			    "Error allocating cmp ring pages, error = %d\n",
669 			    error);
670 			goto fail;
671 		}
672 		sc->msg_ring = sc->msg_ring_dma.vaddr;
673 	}
674 
675 	DEBUG_PRINTF(1, sc->dev, "rings_state: %p\n", sc->rings_state);
676 	DEBUG_PRINTF(1, sc->dev, "req_ring: %p - %u pages\n", sc->req_ring,
677 	    sc->req_ring_num_pages);
678 	DEBUG_PRINTF(1, sc->dev, "cmp_ring: %p - %u pages\n", sc->cmp_ring,
679 	    sc->cmp_ring_num_pages);
680 	DEBUG_PRINTF(1, sc->dev, "msg_ring: %p - %u pages\n", sc->msg_ring,
681 	    sc->msg_ring_num_pages);
682 
683 fail:
684 	if (error) {
685 		pvscsi_free_rings(sc);
686 	}
687 	return (error);
688 }
689 
690 static void
691 pvscsi_setup_rings(struct pvscsi_softc *sc)
692 {
693 	struct pvscsi_cmd_desc_setup_rings cmd;
694 	uint32_t i;
695 
696 	bzero(&cmd, sizeof(cmd));
697 
698 	cmd.rings_state_ppn = sc->rings_state_ppn;
699 
700 	cmd.req_ring_num_pages = sc->req_ring_num_pages;
701 	for (i = 0; i < sc->req_ring_num_pages; ++i) {
702 		cmd.req_ring_ppns[i] = sc->req_ring_ppn[i];
703 	}
704 
705 	cmd.cmp_ring_num_pages = sc->cmp_ring_num_pages;
706 	for (i = 0; i < sc->cmp_ring_num_pages; ++i) {
707 		cmd.cmp_ring_ppns[i] = sc->cmp_ring_ppn[i];
708 	}
709 
710 	pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_RINGS, &cmd, sizeof(cmd));
711 }
712 
713 static int
714 pvscsi_hw_supports_msg(struct pvscsi_softc *sc)
715 {
716 	uint32_t status;
717 
718 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND,
719 	    PVSCSI_CMD_SETUP_MSG_RING);
720 	status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
721 
722 	return (status != -1);
723 }
724 
725 static void
726 pvscsi_setup_msg_ring(struct pvscsi_softc *sc)
727 {
728 	struct pvscsi_cmd_desc_setup_msg_ring cmd;
729 	uint32_t i;
730 
731 	KASSERT(sc->use_msg, ("msg is not being used"));
732 
733 	bzero(&cmd, sizeof(cmd));
734 
735 	cmd.num_pages = sc->msg_ring_num_pages;
736 	for (i = 0; i < sc->msg_ring_num_pages; ++i) {
737 		cmd.ring_ppns[i] = sc->msg_ring_ppn[i];
738 	}
739 
740 	pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_MSG_RING, &cmd, sizeof(cmd));
741 }
742 
743 static void
744 pvscsi_adapter_reset(struct pvscsi_softc *sc)
745 {
746 	uint32_t val __unused;
747 
748 	device_printf(sc->dev, "Adapter Reset\n");
749 
750 	pvscsi_write_cmd(sc, PVSCSI_CMD_ADAPTER_RESET, NULL, 0);
751 	val = pvscsi_read_intr_status(sc);
752 
753 	DEBUG_PRINTF(2, sc->dev, "adapter reset done: %u\n", val);
754 }
755 
756 static void
757 pvscsi_bus_reset(struct pvscsi_softc *sc)
758 {
759 
760 	device_printf(sc->dev, "Bus Reset\n");
761 
762 	pvscsi_write_cmd(sc, PVSCSI_CMD_RESET_BUS, NULL, 0);
763 	pvscsi_process_cmp_ring(sc);
764 
765 	DEBUG_PRINTF(2, sc->dev, "bus reset done\n");
766 }
767 
768 static void
769 pvscsi_device_reset(struct pvscsi_softc *sc, uint32_t target)
770 {
771 	struct pvscsi_cmd_desc_reset_device cmd;
772 
773 	memset(&cmd, 0, sizeof(cmd));
774 
775 	cmd.target = target;
776 
777 	device_printf(sc->dev, "Device reset for target %u\n", target);
778 
779 	pvscsi_write_cmd(sc, PVSCSI_CMD_RESET_DEVICE, &cmd, sizeof cmd);
780 	pvscsi_process_cmp_ring(sc);
781 
782 	DEBUG_PRINTF(2, sc->dev, "device reset done\n");
783 }
784 
785 static void
786 pvscsi_abort(struct pvscsi_softc *sc, uint32_t target, union ccb *ccb)
787 {
788 	struct pvscsi_cmd_desc_abort_cmd cmd;
789 	struct pvscsi_hcb *hcb;
790 	uint64_t context;
791 
792 	pvscsi_process_cmp_ring(sc);
793 
794 	hcb = ccb->ccb_h.ccb_pvscsi_hcb;
795 
796 	if (hcb != NULL) {
797 		context = pvscsi_hcb_to_context(sc, hcb);
798 
799 		memset(&cmd, 0, sizeof cmd);
800 		cmd.target = target;
801 		cmd.context = context;
802 
803 		device_printf(sc->dev, "Abort for target %u context %llx\n",
804 		    target, (unsigned long long)context);
805 
806 		pvscsi_write_cmd(sc, PVSCSI_CMD_ABORT_CMD, &cmd, sizeof(cmd));
807 		pvscsi_process_cmp_ring(sc);
808 
809 		DEBUG_PRINTF(2, sc->dev, "abort done\n");
810 	} else {
811 		DEBUG_PRINTF(1, sc->dev,
812 		    "Target %u ccb %p not found for abort\n", target, ccb);
813 	}
814 }
815 
816 static int
817 pvscsi_probe(device_t dev)
818 {
819 
820 	if (pci_get_vendor(dev) == PCI_VENDOR_ID_VMWARE &&
821 	    pci_get_device(dev) == PCI_DEVICE_ID_VMWARE_PVSCSI) {
822 		device_set_desc(dev, "VMware Paravirtual SCSI Controller");
823 		return (BUS_PROBE_DEFAULT);
824 	}
825 	return (ENXIO);
826 }
827 
828 static int
829 pvscsi_shutdown(device_t dev)
830 {
831 
832 	return (0);
833 }
834 
835 static void
836 pvscsi_timeout(void *arg)
837 {
838 	struct pvscsi_hcb *hcb;
839 	struct pvscsi_softc *sc;
840 	union ccb *ccb;
841 
842 	hcb = arg;
843 	ccb = hcb->ccb;
844 
845 	if (ccb == NULL) {
846 		/* Already completed */
847 		return;
848 	}
849 
850 	sc = ccb->ccb_h.ccb_pvscsi_sc;
851 	mtx_assert(&sc->lock, MA_OWNED);
852 
853 	device_printf(sc->dev, "Command timed out hcb=%p ccb=%p.\n", hcb, ccb);
854 
855 	switch (hcb->recovery) {
856 	case PVSCSI_HCB_NONE:
857 		hcb->recovery = PVSCSI_HCB_ABORT;
858 		pvscsi_abort(sc, ccb->ccb_h.target_id, ccb);
859 		callout_reset_sbt(&hcb->callout, PVSCSI_ABORT_TIMEOUT * SBT_1S,
860 		    0, pvscsi_timeout, hcb, 0);
861 		break;
862 	case PVSCSI_HCB_ABORT:
863 		hcb->recovery = PVSCSI_HCB_DEVICE_RESET;
864 		pvscsi_freeze(sc);
865 		pvscsi_device_reset(sc, ccb->ccb_h.target_id);
866 		callout_reset_sbt(&hcb->callout, PVSCSI_RESET_TIMEOUT * SBT_1S,
867 		    0, pvscsi_timeout, hcb, 0);
868 		break;
869 	case PVSCSI_HCB_DEVICE_RESET:
870 		hcb->recovery = PVSCSI_HCB_BUS_RESET;
871 		pvscsi_freeze(sc);
872 		pvscsi_bus_reset(sc);
873 		callout_reset_sbt(&hcb->callout, PVSCSI_RESET_TIMEOUT * SBT_1S,
874 		    0, pvscsi_timeout, hcb, 0);
875 		break;
876 	case PVSCSI_HCB_BUS_RESET:
877 		pvscsi_freeze(sc);
878 		pvscsi_adapter_reset(sc);
879 		break;
880 	};
881 }
882 
883 static void
884 pvscsi_process_completion(struct pvscsi_softc *sc,
885     struct pvscsi_ring_cmp_desc *e)
886 {
887 	struct pvscsi_hcb *hcb;
888 	union ccb *ccb;
889 	uint32_t status;
890 	uint32_t btstat;
891 	uint32_t sdstat;
892 	bus_dmasync_op_t op;
893 
894 	hcb = pvscsi_context_to_hcb(sc, e->context);
895 
896 	callout_stop(&hcb->callout);
897 
898 	ccb = hcb->ccb;
899 
900 	btstat = e->host_status;
901 	sdstat = e->scsi_status;
902 
903 	ccb->csio.scsi_status = sdstat;
904 	ccb->csio.resid = ccb->csio.dxfer_len - e->data_len;
905 
906 	if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
907 		if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
908 			op = BUS_DMASYNC_POSTREAD;
909 		} else {
910 			op = BUS_DMASYNC_POSTWRITE;
911 		}
912 		bus_dmamap_sync(sc->buffer_dmat, hcb->dma_map, op);
913 		bus_dmamap_unload(sc->buffer_dmat, hcb->dma_map);
914 	}
915 
916 	if (btstat == BTSTAT_SUCCESS && sdstat == SCSI_STATUS_OK) {
917 		DEBUG_PRINTF(3, sc->dev,
918 		    "completing command context %llx success\n",
919 		    (unsigned long long)e->context);
920 		ccb->csio.resid = 0;
921 		status = CAM_REQ_CMP;
922 	} else {
923 		switch (btstat) {
924 		case BTSTAT_SUCCESS:
925 		case BTSTAT_LINKED_COMMAND_COMPLETED:
926 		case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG:
927 			switch (sdstat) {
928 			case SCSI_STATUS_OK:
929 				ccb->csio.resid = 0;
930 				status = CAM_REQ_CMP;
931 				break;
932 			case SCSI_STATUS_CHECK_COND:
933 				status = CAM_SCSI_STATUS_ERROR;
934 
935 				if (ccb->csio.sense_len != 0) {
936 					status |= CAM_AUTOSNS_VALID;
937 
938 					memset(&ccb->csio.sense_data, 0,
939 					    sizeof(ccb->csio.sense_data));
940 					memcpy(&ccb->csio.sense_data,
941 					    hcb->sense_buffer,
942 					    MIN(ccb->csio.sense_len,
943 						e->sense_len));
944 				}
945 				break;
946 			case SCSI_STATUS_BUSY:
947 			case SCSI_STATUS_QUEUE_FULL:
948 				status = CAM_REQUEUE_REQ;
949 				break;
950 			case SCSI_STATUS_CMD_TERMINATED:
951 			case SCSI_STATUS_TASK_ABORTED:
952 				status = CAM_REQ_ABORTED;
953 				break;
954 			default:
955 				DEBUG_PRINTF(1, sc->dev,
956 				    "ccb: %p sdstat=0x%x\n", ccb, sdstat);
957 				status = CAM_SCSI_STATUS_ERROR;
958 				break;
959 			}
960 			break;
961 		case BTSTAT_SELTIMEO:
962 			status = CAM_SEL_TIMEOUT;
963 			break;
964 		case BTSTAT_DATARUN:
965 		case BTSTAT_DATA_UNDERRUN:
966 			status = CAM_DATA_RUN_ERR;
967 			break;
968 		case BTSTAT_ABORTQUEUE:
969 		case BTSTAT_HATIMEOUT:
970 			status = CAM_REQUEUE_REQ;
971 			break;
972 		case BTSTAT_NORESPONSE:
973 		case BTSTAT_SENTRST:
974 		case BTSTAT_RECVRST:
975 		case BTSTAT_BUSRESET:
976 			status = CAM_SCSI_BUS_RESET;
977 			break;
978 		case BTSTAT_SCSIPARITY:
979 			status = CAM_UNCOR_PARITY;
980 			break;
981 		case BTSTAT_BUSFREE:
982 			status = CAM_UNEXP_BUSFREE;
983 			break;
984 		case BTSTAT_INVPHASE:
985 			status = CAM_SEQUENCE_FAIL;
986 			break;
987 		case BTSTAT_SENSFAILED:
988 			status = CAM_AUTOSENSE_FAIL;
989 			break;
990 		case BTSTAT_LUNMISMATCH:
991 		case BTSTAT_TAGREJECT:
992 		case BTSTAT_DISCONNECT:
993 		case BTSTAT_BADMSG:
994 		case BTSTAT_INVPARAM:
995 			status = CAM_REQ_CMP_ERR;
996 			break;
997 		case BTSTAT_HASOFTWARE:
998 		case BTSTAT_HAHARDWARE:
999 			status = CAM_NO_HBA;
1000 			break;
1001 		default:
1002 			device_printf(sc->dev, "unknown hba status: 0x%x\n",
1003 			    btstat);
1004 			status = CAM_NO_HBA;
1005 			break;
1006 		}
1007 
1008 		DEBUG_PRINTF(3, sc->dev,
1009 		    "completing command context %llx btstat %x sdstat %x - status %x\n",
1010 		    (unsigned long long)e->context, btstat, sdstat, status);
1011 	}
1012 
1013 	ccb->ccb_h.ccb_pvscsi_hcb = NULL;
1014 	ccb->ccb_h.ccb_pvscsi_sc = NULL;
1015 	pvscsi_hcb_put(sc, hcb);
1016 
1017 	ccb->ccb_h.status =
1018 	    status | (ccb->ccb_h.status & ~(CAM_STATUS_MASK | CAM_SIM_QUEUED));
1019 
1020 	if (sc->frozen) {
1021 		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
1022 		sc->frozen = 0;
1023 	}
1024 
1025 	if (status != CAM_REQ_CMP) {
1026 		ccb->ccb_h.status |= CAM_DEV_QFRZN;
1027 		xpt_freeze_devq(ccb->ccb_h.path, /*count*/ 1);
1028 	}
1029 	xpt_done(ccb);
1030 }
1031 
1032 static void
1033 pvscsi_process_cmp_ring(struct pvscsi_softc *sc)
1034 {
1035 	struct pvscsi_ring_cmp_desc *ring;
1036 	struct pvscsi_rings_state *s;
1037 	struct pvscsi_ring_cmp_desc *e;
1038 	uint32_t mask;
1039 
1040 	mtx_assert(&sc->lock, MA_OWNED);
1041 
1042 	s = sc->rings_state;
1043 	ring = sc->cmp_ring;
1044 	mask = MASK(s->cmp_num_entries_log2);
1045 
1046 	while (s->cmp_cons_idx != s->cmp_prod_idx) {
1047 		e = ring + (s->cmp_cons_idx & mask);
1048 
1049 		pvscsi_process_completion(sc, e);
1050 
1051 		mb();
1052 		s->cmp_cons_idx++;
1053 	}
1054 }
1055 
1056 static void
1057 pvscsi_process_msg(struct pvscsi_softc *sc, struct pvscsi_ring_msg_desc *e)
1058 {
1059 	struct pvscsi_ring_msg_dev_status_changed *desc;
1060 
1061 	union ccb *ccb;
1062 	switch (e->type) {
1063 	case PVSCSI_MSG_DEV_ADDED:
1064 	case PVSCSI_MSG_DEV_REMOVED: {
1065 		desc = (struct pvscsi_ring_msg_dev_status_changed *)e;
1066 
1067 		device_printf(sc->dev, "MSG: device %s at scsi%u:%u:%u\n",
1068 		    desc->type == PVSCSI_MSG_DEV_ADDED ? "addition" : "removal",
1069 		    desc->bus, desc->target, desc->lun[1]);
1070 
1071 		ccb = xpt_alloc_ccb_nowait();
1072 		if (ccb == NULL) {
1073 			device_printf(sc->dev,
1074 			    "Error allocating CCB for dev change.\n");
1075 			break;
1076 		}
1077 
1078 		if (xpt_create_path(&ccb->ccb_h.path, NULL,
1079 		    cam_sim_path(sc->sim), desc->target, desc->lun[1])
1080 		    != CAM_REQ_CMP) {
1081 			device_printf(sc->dev,
1082 			    "Error creating path for dev change.\n");
1083 			xpt_free_ccb(ccb);
1084 			break;
1085 		}
1086 
1087 		xpt_rescan(ccb);
1088 	} break;
1089 	default:
1090 		device_printf(sc->dev, "Unknown msg type 0x%x\n", e->type);
1091 	};
1092 }
1093 
1094 static void
1095 pvscsi_process_msg_ring(struct pvscsi_softc *sc)
1096 {
1097 	struct pvscsi_ring_msg_desc *ring;
1098 	struct pvscsi_rings_state *s;
1099 	struct pvscsi_ring_msg_desc *e;
1100 	uint32_t mask;
1101 
1102 	mtx_assert(&sc->lock, MA_OWNED);
1103 
1104 	s = sc->rings_state;
1105 	ring = sc->msg_ring;
1106 	mask = MASK(s->msg_num_entries_log2);
1107 
1108 	while (s->msg_cons_idx != s->msg_prod_idx) {
1109 		e = ring + (s->msg_cons_idx & mask);
1110 
1111 		pvscsi_process_msg(sc, e);
1112 
1113 		mb();
1114 		s->msg_cons_idx++;
1115 	}
1116 }
1117 
1118 static void
1119 pvscsi_intr_locked(struct pvscsi_softc *sc)
1120 {
1121 	uint32_t val;
1122 
1123 	mtx_assert(&sc->lock, MA_OWNED);
1124 
1125 	val = pvscsi_read_intr_status(sc);
1126 
1127 	if ((val & PVSCSI_INTR_ALL_SUPPORTED) != 0) {
1128 		pvscsi_write_intr_status(sc, val & PVSCSI_INTR_ALL_SUPPORTED);
1129 		pvscsi_process_cmp_ring(sc);
1130 		if (sc->use_msg) {
1131 			pvscsi_process_msg_ring(sc);
1132 		}
1133 	}
1134 }
1135 
1136 static void
1137 pvscsi_intr(void *xsc)
1138 {
1139 	struct pvscsi_softc *sc;
1140 
1141 	sc = xsc;
1142 
1143 	mtx_assert(&sc->lock, MA_NOTOWNED);
1144 
1145 	mtx_lock(&sc->lock);
1146 	pvscsi_intr_locked(xsc);
1147 	mtx_unlock(&sc->lock);
1148 }
1149 
1150 static void
1151 pvscsi_poll(struct cam_sim *sim)
1152 {
1153 	struct pvscsi_softc *sc;
1154 
1155 	sc = cam_sim_softc(sim);
1156 
1157 	mtx_assert(&sc->lock, MA_OWNED);
1158 	pvscsi_intr_locked(sc);
1159 }
1160 
1161 static void
1162 pvscsi_execute_ccb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
1163 {
1164 	struct pvscsi_hcb *hcb;
1165 	struct pvscsi_ring_req_desc *e;
1166 	union ccb *ccb;
1167 	struct pvscsi_softc *sc;
1168 	struct pvscsi_rings_state *s;
1169 	uint8_t cdb0;
1170 	bus_dmasync_op_t op;
1171 
1172 	hcb = arg;
1173 	ccb = hcb->ccb;
1174 	e = hcb->e;
1175 	sc = ccb->ccb_h.ccb_pvscsi_sc;
1176 	s = sc->rings_state;
1177 
1178 	mtx_assert(&sc->lock, MA_OWNED);
1179 
1180 	if (error) {
1181 		device_printf(sc->dev, "pvscsi_execute_ccb error %d\n", error);
1182 
1183 		if (error == EFBIG) {
1184 			ccb->ccb_h.status = CAM_REQ_TOO_BIG;
1185 		} else {
1186 			ccb->ccb_h.status = CAM_REQ_CMP_ERR;
1187 		}
1188 
1189 		pvscsi_hcb_put(sc, hcb);
1190 		xpt_done(ccb);
1191 		return;
1192 	}
1193 
1194 	e->flags = 0;
1195 	op = 0;
1196 	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
1197 	case CAM_DIR_NONE:
1198 		e->flags |= PVSCSI_FLAG_CMD_DIR_NONE;
1199 		break;
1200 	case CAM_DIR_IN:
1201 		e->flags |= PVSCSI_FLAG_CMD_DIR_TOHOST;
1202 		op = BUS_DMASYNC_PREREAD;
1203 		break;
1204 	case CAM_DIR_OUT:
1205 		e->flags |= PVSCSI_FLAG_CMD_DIR_TODEVICE;
1206 		op = BUS_DMASYNC_PREWRITE;
1207 		break;
1208 	case CAM_DIR_BOTH:
1209 		/* TODO: does this need handling? */
1210 		break;
1211 	}
1212 
1213 	if (nseg != 0) {
1214 		if (nseg > 1) {
1215 			int i;
1216 			struct pvscsi_sg_element *sge;
1217 
1218 			KASSERT(nseg <= PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT,
1219 			    ("too many sg segments"));
1220 
1221 			sge = hcb->sg_list->sge;
1222 			e->flags |= PVSCSI_FLAG_CMD_WITH_SG_LIST;
1223 
1224 			for (i = 0; i < nseg; ++i) {
1225 				sge[i].addr = segs[i].ds_addr;
1226 				sge[i].length = segs[i].ds_len;
1227 				sge[i].flags = 0;
1228 			}
1229 
1230 			e->data_addr = hcb->sg_list_paddr;
1231 		} else {
1232 			e->data_addr = segs->ds_addr;
1233 		}
1234 
1235 		bus_dmamap_sync(sc->buffer_dmat, hcb->dma_map, op);
1236 	} else {
1237 		e->data_addr = 0;
1238 	}
1239 
1240 	cdb0 = e->cdb[0];
1241 	ccb->ccb_h.status |= CAM_SIM_QUEUED;
1242 
1243 	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
1244 		callout_reset_sbt(&hcb->callout, ccb->ccb_h.timeout * SBT_1MS,
1245 		    0, pvscsi_timeout, hcb, 0);
1246 	}
1247 
1248 	mb();
1249 	s->req_prod_idx++;
1250 	pvscsi_kick_io(sc, cdb0);
1251 }
1252 
1253 static void
1254 pvscsi_action(struct cam_sim *sim, union ccb *ccb)
1255 {
1256 	struct pvscsi_softc *sc;
1257 	struct ccb_hdr *ccb_h;
1258 
1259 	sc = cam_sim_softc(sim);
1260 	ccb_h = &ccb->ccb_h;
1261 
1262 	mtx_assert(&sc->lock, MA_OWNED);
1263 
1264 	switch (ccb_h->func_code) {
1265 	case XPT_SCSI_IO:
1266 	{
1267 		struct ccb_scsiio *csio;
1268 		uint32_t req_num_entries_log2;
1269 		struct pvscsi_ring_req_desc *ring;
1270 		struct pvscsi_ring_req_desc *e;
1271 		struct pvscsi_rings_state *s;
1272 		struct pvscsi_hcb *hcb;
1273 
1274 		csio = &ccb->csio;
1275 		ring = sc->req_ring;
1276 		s = sc->rings_state;
1277 
1278 		hcb = NULL;
1279 
1280 		/*
1281 		 * Check if it was completed already (such as aborted
1282 		 * by upper layers)
1283 		 */
1284 		if ((ccb_h->status & CAM_STATUS_MASK) != CAM_REQ_INPROG) {
1285 			xpt_done(ccb);
1286 			return;
1287 		}
1288 
1289 		req_num_entries_log2 = s->req_num_entries_log2;
1290 
1291 		if (s->req_prod_idx - s->cmp_cons_idx >=
1292 		    (1 << req_num_entries_log2)) {
1293 			device_printf(sc->dev,
1294 			    "Not enough room on completion ring.\n");
1295 			pvscsi_freeze(sc);
1296 			ccb_h->status = CAM_REQUEUE_REQ;
1297 			goto finish_ccb;
1298 		}
1299 
1300 		hcb = pvscsi_hcb_get(sc);
1301 		if (hcb == NULL) {
1302 			device_printf(sc->dev, "No free hcbs.\n");
1303 			pvscsi_freeze(sc);
1304 			ccb_h->status = CAM_REQUEUE_REQ;
1305 			goto finish_ccb;
1306 		}
1307 
1308 		hcb->ccb = ccb;
1309 		ccb_h->ccb_pvscsi_hcb = hcb;
1310 		ccb_h->ccb_pvscsi_sc = sc;
1311 
1312 		if (csio->cdb_len > sizeof(e->cdb)) {
1313 			DEBUG_PRINTF(2, sc->dev, "cdb length %u too large\n",
1314 			    csio->cdb_len);
1315 			ccb_h->status = CAM_REQ_INVALID;
1316 			goto finish_ccb;
1317 		}
1318 
1319 		if (ccb_h->flags & CAM_CDB_PHYS) {
1320 			DEBUG_PRINTF(2, sc->dev,
1321 			    "CAM_CDB_PHYS not implemented\n");
1322 			ccb_h->status = CAM_REQ_INVALID;
1323 			goto finish_ccb;
1324 		}
1325 
1326 		e = ring + (s->req_prod_idx & MASK(req_num_entries_log2));
1327 
1328 		e->bus = cam_sim_bus(sim);
1329 		e->target = ccb_h->target_id;
1330 		memset(e->lun, 0, sizeof(e->lun));
1331 		e->lun[1] = ccb_h->target_lun;
1332 		e->data_addr = 0;
1333 		e->data_len = csio->dxfer_len;
1334 		e->vcpu_hint = curcpu;
1335 
1336 		e->cdb_len = csio->cdb_len;
1337 		memcpy(e->cdb, scsiio_cdb_ptr(csio), csio->cdb_len);
1338 
1339 		e->sense_addr = 0;
1340 		e->sense_len = csio->sense_len;
1341 		if (e->sense_len > 0) {
1342 			e->sense_addr = hcb->sense_buffer_paddr;
1343 		}
1344 
1345 		e->tag = MSG_SIMPLE_Q_TAG;
1346 		if (ccb_h->flags & CAM_TAG_ACTION_VALID) {
1347 			e->tag = csio->tag_action;
1348 		}
1349 
1350 		e->context = pvscsi_hcb_to_context(sc, hcb);
1351 		hcb->e = e;
1352 
1353 		DEBUG_PRINTF(3, sc->dev,
1354 		    " queuing command %02x context %llx\n", e->cdb[0],
1355 		    (unsigned long long)e->context);
1356 		bus_dmamap_load_ccb(sc->buffer_dmat, hcb->dma_map, ccb,
1357 		    pvscsi_execute_ccb, hcb, 0);
1358 		break;
1359 
1360 finish_ccb:
1361 		if (hcb != NULL) {
1362 			pvscsi_hcb_put(sc, hcb);
1363 		}
1364 		xpt_done(ccb);
1365 	} break;
1366 	case XPT_ABORT:
1367 	{
1368 		struct pvscsi_hcb *abort_hcb;
1369 		union ccb *abort_ccb;
1370 
1371 		abort_ccb = ccb->cab.abort_ccb;
1372 		abort_hcb = abort_ccb->ccb_h.ccb_pvscsi_hcb;
1373 
1374 		if (abort_hcb->ccb != NULL && abort_hcb->ccb == abort_ccb) {
1375 			if (abort_ccb->ccb_h.func_code == XPT_SCSI_IO) {
1376 				pvscsi_abort(sc, ccb_h->target_id, abort_ccb);
1377 				ccb_h->status = CAM_REQ_CMP;
1378 			} else {
1379 				ccb_h->status = CAM_UA_ABORT;
1380 			}
1381 		} else {
1382 			device_printf(sc->dev,
1383 			    "Could not find hcb for ccb %p (tgt %u)\n",
1384 			    ccb, ccb_h->target_id);
1385 			ccb_h->status = CAM_REQ_CMP;
1386 		}
1387 		xpt_done(ccb);
1388 	} break;
1389 	case XPT_RESET_DEV:
1390 	{
1391 		pvscsi_device_reset(sc, ccb_h->target_id);
1392 		ccb_h->status = CAM_REQ_CMP;
1393 		xpt_done(ccb);
1394 	} break;
1395 	case XPT_RESET_BUS:
1396 	{
1397 		pvscsi_bus_reset(sc);
1398 		ccb_h->status = CAM_REQ_CMP;
1399 		xpt_done(ccb);
1400 	} break;
1401 	case XPT_PATH_INQ:
1402 	{
1403 		struct ccb_pathinq *cpi;
1404 
1405 		cpi = &ccb->cpi;
1406 
1407 		cpi->version_num = 1;
1408 		cpi->hba_inquiry = PI_TAG_ABLE;
1409 		cpi->target_sprt = 0;
1410 		cpi->hba_misc = PIM_NOBUSRESET | PIM_UNMAPPED;
1411 		cpi->hba_eng_cnt = 0;
1412 		/* cpi->vuhba_flags = 0; */
1413 		cpi->max_target = sc->max_targets - 1;
1414 		cpi->max_lun = 0;
1415 		cpi->async_flags = 0;
1416 		cpi->hpath_id = 0;
1417 		cpi->unit_number = cam_sim_unit(sim);
1418 		cpi->bus_id = cam_sim_bus(sim);
1419 		cpi->initiator_id = 7;
1420 		cpi->base_transfer_speed = 750000;
1421 		strlcpy(cpi->sim_vid, "VMware", SIM_IDLEN);
1422 		strlcpy(cpi->hba_vid, "VMware", HBA_IDLEN);
1423 		strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
1424 		/* Limit I/O to 256k since we can't do 512k unaligned I/O */
1425 		cpi->maxio = (PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT / 2) * PAGE_SIZE;
1426 		cpi->protocol = PROTO_SCSI;
1427 		cpi->protocol_version = SCSI_REV_SPC2;
1428 		cpi->transport = XPORT_SAS;
1429 		cpi->transport_version = 0;
1430 
1431 		ccb_h->status = CAM_REQ_CMP;
1432 		xpt_done(ccb);
1433 	} break;
1434 	case XPT_GET_TRAN_SETTINGS:
1435 	{
1436 		struct ccb_trans_settings *cts;
1437 
1438 		cts = &ccb->cts;
1439 
1440 		cts->protocol = PROTO_SCSI;
1441 		cts->protocol_version = SCSI_REV_SPC2;
1442 		cts->transport = XPORT_SAS;
1443 		cts->transport_version = 0;
1444 
1445 		cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
1446 		cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
1447 
1448 		ccb_h->status = CAM_REQ_CMP;
1449 		xpt_done(ccb);
1450 	} break;
1451 	case XPT_CALC_GEOMETRY:
1452 	{
1453 		cam_calc_geometry(&ccb->ccg, 1);
1454 		xpt_done(ccb);
1455 	} break;
1456 	default:
1457 		ccb_h->status = CAM_REQ_INVALID;
1458 		xpt_done(ccb);
1459 		break;
1460 	}
1461 }
1462 
1463 static void
1464 pvscsi_free_interrupts(struct pvscsi_softc *sc)
1465 {
1466 
1467 	if (sc->irq_handler != NULL) {
1468 		bus_teardown_intr(sc->dev, sc->irq_res, sc->irq_handler);
1469 	}
1470 	if (sc->irq_res != NULL) {
1471 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_id,
1472 		    sc->irq_res);
1473 	}
1474 	if (sc->use_msi_or_msix) {
1475 		pci_release_msi(sc->dev);
1476 	}
1477 }
1478 
1479 static int
1480 pvscsi_setup_interrupts(struct pvscsi_softc *sc)
1481 {
1482 	int error;
1483 	int flags;
1484 	int use_msix;
1485 	int use_msi;
1486 	int count;
1487 
1488 	sc->use_msi_or_msix = 0;
1489 
1490 	use_msix = pvscsi_get_tunable(sc, "use_msix", pvscsi_use_msix);
1491 	use_msi = pvscsi_get_tunable(sc, "use_msi", pvscsi_use_msi);
1492 
1493 	if (use_msix && pci_msix_count(sc->dev) > 0) {
1494 		count = 1;
1495 		if (pci_alloc_msix(sc->dev, &count) == 0 && count == 1) {
1496 			sc->use_msi_or_msix = 1;
1497 			device_printf(sc->dev, "Interrupt: MSI-X\n");
1498 		} else {
1499 			pci_release_msi(sc->dev);
1500 		}
1501 	}
1502 
1503 	if (sc->use_msi_or_msix == 0 && use_msi && pci_msi_count(sc->dev) > 0) {
1504 		count = 1;
1505 		if (pci_alloc_msi(sc->dev, &count) == 0 && count == 1) {
1506 			sc->use_msi_or_msix = 1;
1507 			device_printf(sc->dev, "Interrupt: MSI\n");
1508 		} else {
1509 			pci_release_msi(sc->dev);
1510 		}
1511 	}
1512 
1513 	flags = RF_ACTIVE;
1514 	if (sc->use_msi_or_msix) {
1515 		sc->irq_id = 1;
1516 	} else {
1517 		device_printf(sc->dev, "Interrupt: INT\n");
1518 		sc->irq_id = 0;
1519 		flags |= RF_SHAREABLE;
1520 	}
1521 
1522 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_id,
1523 	    flags);
1524 	if (sc->irq_res == NULL) {
1525 		device_printf(sc->dev, "IRQ allocation failed\n");
1526 		if (sc->use_msi_or_msix) {
1527 			pci_release_msi(sc->dev);
1528 		}
1529 		return (ENXIO);
1530 	}
1531 
1532 	error = bus_setup_intr(sc->dev, sc->irq_res,
1533 	    INTR_TYPE_CAM | INTR_MPSAFE, NULL, pvscsi_intr, sc,
1534 	    &sc->irq_handler);
1535 	if (error) {
1536 		device_printf(sc->dev, "IRQ handler setup failed\n");
1537 		pvscsi_free_interrupts(sc);
1538 		return (error);
1539 	}
1540 
1541 	return (0);
1542 }
1543 
1544 static void
1545 pvscsi_free_all(struct pvscsi_softc *sc)
1546 {
1547 
1548 	if (sc->sim) {
1549 		int error;
1550 
1551 		if (sc->bus_path) {
1552 			xpt_free_path(sc->bus_path);
1553 		}
1554 
1555 		error = xpt_bus_deregister(cam_sim_path(sc->sim));
1556 		if (error != 0) {
1557 			device_printf(sc->dev,
1558 			    "Error deregistering bus, error %d\n", error);
1559 		}
1560 
1561 		cam_sim_free(sc->sim, TRUE);
1562 	}
1563 
1564 	pvscsi_dma_free_per_hcb(sc, sc->hcb_cnt);
1565 
1566 	if (sc->hcbs) {
1567 		free(sc->hcbs, M_PVSCSI);
1568 	}
1569 
1570 	pvscsi_free_rings(sc);
1571 
1572 	pvscsi_free_interrupts(sc);
1573 
1574 	if (sc->buffer_dmat != NULL) {
1575 		bus_dma_tag_destroy(sc->buffer_dmat);
1576 	}
1577 
1578 	if (sc->parent_dmat != NULL) {
1579 		bus_dma_tag_destroy(sc->parent_dmat);
1580 	}
1581 
1582 	if (sc->mm_res != NULL) {
1583 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->mm_rid,
1584 		    sc->mm_res);
1585 	}
1586 }
1587 
1588 static int
1589 pvscsi_attach(device_t dev)
1590 {
1591 	struct pvscsi_softc *sc;
1592 	int rid;
1593 	int barid;
1594 	int error;
1595 	int max_queue_depth;
1596 	int adapter_queue_size;
1597 	struct cam_devq *devq;
1598 
1599 	sc = device_get_softc(dev);
1600 	sc->dev = dev;
1601 
1602 	mtx_init(&sc->lock, "pvscsi", NULL, MTX_DEF);
1603 
1604 	pci_enable_busmaster(dev);
1605 
1606 	sc->mm_rid = -1;
1607 	for (barid = 0; barid <= PCIR_MAX_BAR_0; ++barid) {
1608 		rid = PCIR_BAR(barid);
1609 
1610 		sc->mm_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
1611 		    RF_ACTIVE);
1612 		if (sc->mm_res != NULL) {
1613 			sc->mm_rid = rid;
1614 			break;
1615 		}
1616 	}
1617 
1618 	if (sc->mm_res == NULL) {
1619 		device_printf(dev, "could not map device memory\n");
1620 		return (ENXIO);
1621 	}
1622 
1623 	error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
1624 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE,
1625 	    BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, 0, NULL, NULL,
1626 	    &sc->parent_dmat);
1627 	if (error) {
1628 		device_printf(dev, "parent dma tag create failure, error %d\n",
1629 		    error);
1630 		pvscsi_free_all(sc);
1631 		return (ENXIO);
1632 	}
1633 
1634 	error = bus_dma_tag_create(sc->parent_dmat, 1, 0,
1635 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1636 	    PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT * PAGE_SIZE,
1637 	    PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT, PAGE_SIZE, BUS_DMA_ALLOCNOW,
1638 	    NULL, NULL, &sc->buffer_dmat);
1639 	if (error) {
1640 		device_printf(dev, "parent dma tag create failure, error %d\n",
1641 		    error);
1642 		pvscsi_free_all(sc);
1643 		return (ENXIO);
1644 	}
1645 
1646 	error = pvscsi_setup_interrupts(sc);
1647 	if (error) {
1648 		device_printf(dev, "Interrupt setup failed\n");
1649 		pvscsi_free_all(sc);
1650 		return (error);
1651 	}
1652 
1653 	sc->max_targets = pvscsi_get_max_targets(sc);
1654 
1655 	sc->use_msg = pvscsi_get_tunable(sc, "use_msg", pvscsi_use_msg) &&
1656 	    pvscsi_hw_supports_msg(sc);
1657 	sc->msg_ring_num_pages = sc->use_msg ? 1 : 0;
1658 
1659 	sc->req_ring_num_pages = pvscsi_get_tunable(sc, "request_ring_pages",
1660 	    pvscsi_request_ring_pages);
1661 	if (sc->req_ring_num_pages <= 0) {
1662 		if (sc->max_targets <= 16) {
1663 			sc->req_ring_num_pages =
1664 			    PVSCSI_DEFAULT_NUM_PAGES_REQ_RING;
1665 		} else {
1666 			sc->req_ring_num_pages = PVSCSI_MAX_NUM_PAGES_REQ_RING;
1667 		}
1668 	} else if (sc->req_ring_num_pages > PVSCSI_MAX_NUM_PAGES_REQ_RING) {
1669 		sc->req_ring_num_pages = PVSCSI_MAX_NUM_PAGES_REQ_RING;
1670 	}
1671 	sc->cmp_ring_num_pages = sc->req_ring_num_pages;
1672 
1673 	max_queue_depth = pvscsi_get_tunable(sc, "max_queue_depth",
1674 	    pvscsi_max_queue_depth);
1675 
1676 	adapter_queue_size = (sc->req_ring_num_pages * PAGE_SIZE) /
1677 	    sizeof(struct pvscsi_ring_req_desc);
1678 	if (max_queue_depth > 0) {
1679 		adapter_queue_size = MIN(adapter_queue_size, max_queue_depth);
1680 	}
1681 	adapter_queue_size = MIN(adapter_queue_size,
1682 	    PVSCSI_MAX_REQ_QUEUE_DEPTH);
1683 
1684 	device_printf(sc->dev, "Use Msg: %d\n", sc->use_msg);
1685 	device_printf(sc->dev, "Max targets: %d\n", sc->max_targets);
1686 	device_printf(sc->dev, "REQ num pages: %d\n", sc->req_ring_num_pages);
1687 	device_printf(sc->dev, "CMP num pages: %d\n", sc->cmp_ring_num_pages);
1688 	device_printf(sc->dev, "MSG num pages: %d\n", sc->msg_ring_num_pages);
1689 	device_printf(sc->dev, "Queue size: %d\n", adapter_queue_size);
1690 
1691 	if (pvscsi_allocate_rings(sc)) {
1692 		device_printf(dev, "ring allocation failed\n");
1693 		pvscsi_free_all(sc);
1694 		return (ENXIO);
1695 	}
1696 
1697 	sc->hcb_cnt = adapter_queue_size;
1698 	sc->hcbs = malloc(sc->hcb_cnt * sizeof(*sc->hcbs), M_PVSCSI,
1699 	    M_NOWAIT | M_ZERO);
1700 	if (sc->hcbs == NULL) {
1701 		device_printf(dev, "error allocating hcb array\n");
1702 		pvscsi_free_all(sc);
1703 		return (ENXIO);
1704 	}
1705 
1706 	if (pvscsi_dma_alloc_per_hcb(sc)) {
1707 		device_printf(dev, "error allocating per hcb dma memory\n");
1708 		pvscsi_free_all(sc);
1709 		return (ENXIO);
1710 	}
1711 
1712 	pvscsi_adapter_reset(sc);
1713 
1714 	devq = cam_simq_alloc(adapter_queue_size);
1715 	if (devq == NULL) {
1716 		device_printf(dev, "cam devq alloc failed\n");
1717 		pvscsi_free_all(sc);
1718 		return (ENXIO);
1719 	}
1720 
1721 	sc->sim = cam_sim_alloc(pvscsi_action, pvscsi_poll, "pvscsi", sc,
1722 	    device_get_unit(dev), &sc->lock, 1, adapter_queue_size, devq);
1723 	if (sc->sim == NULL) {
1724 		device_printf(dev, "cam sim alloc failed\n");
1725 		cam_simq_free(devq);
1726 		pvscsi_free_all(sc);
1727 		return (ENXIO);
1728 	}
1729 
1730 	mtx_lock(&sc->lock);
1731 
1732 	if (xpt_bus_register(sc->sim, dev, 0) != CAM_SUCCESS) {
1733 		device_printf(dev, "xpt bus register failed\n");
1734 		pvscsi_free_all(sc);
1735 		mtx_unlock(&sc->lock);
1736 		return (ENXIO);
1737 	}
1738 
1739 	if (xpt_create_path(&sc->bus_path, NULL, cam_sim_path(sc->sim),
1740 	    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
1741 		device_printf(dev, "xpt create path failed\n");
1742 		pvscsi_free_all(sc);
1743 		mtx_unlock(&sc->lock);
1744 		return (ENXIO);
1745 	}
1746 
1747 	pvscsi_setup_rings(sc);
1748 	if (sc->use_msg) {
1749 		pvscsi_setup_msg_ring(sc);
1750 	}
1751 
1752 	sc->use_req_call_threshold = pvscsi_setup_req_call(sc, 1);
1753 
1754 	pvscsi_intr_enable(sc);
1755 
1756 	mtx_unlock(&sc->lock);
1757 
1758 	return (0);
1759 }
1760 
1761 static int
1762 pvscsi_detach(device_t dev)
1763 {
1764 	struct pvscsi_softc *sc;
1765 
1766 	sc = device_get_softc(dev);
1767 
1768 	pvscsi_intr_disable(sc);
1769 	pvscsi_adapter_reset(sc);
1770 
1771 	if (sc->irq_handler != NULL) {
1772 		bus_teardown_intr(dev, sc->irq_res, sc->irq_handler);
1773 	}
1774 
1775 	mtx_lock(&sc->lock);
1776 	pvscsi_free_all(sc);
1777 	mtx_unlock(&sc->lock);
1778 
1779 	mtx_destroy(&sc->lock);
1780 
1781 	return (0);
1782 }
1783 
1784 static device_method_t pvscsi_methods[] = {
1785 	DEVMETHOD(device_probe, pvscsi_probe),
1786 	DEVMETHOD(device_shutdown, pvscsi_shutdown),
1787 	DEVMETHOD(device_attach, pvscsi_attach),
1788 	DEVMETHOD(device_detach, pvscsi_detach),
1789 	DEVMETHOD_END
1790 };
1791 
1792 static driver_t pvscsi_driver = {
1793 	"pvscsi", pvscsi_methods, sizeof(struct pvscsi_softc)
1794 };
1795 
1796 DRIVER_MODULE(pvscsi, pci, pvscsi_driver, 0, 0);
1797 
1798 MODULE_DEPEND(pvscsi, pci, 1, 1, 1);
1799 MODULE_DEPEND(pvscsi, cam, 1, 1, 1);
1800