xref: /freebsd/sys/dev/vmware/pvscsi/pvscsi.c (revision 2f513db72b034fd5ef7f080b11be5c711c15186a)
1 /*-
2  * Copyright (c) 2018 VMware, Inc.
3  *
4  * SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0)
5  */
6 
7 #include <sys/cdefs.h>
8 __FBSDID("$FreeBSD$");
9 
10 #include <sys/param.h>
11 #include <sys/bus.h>
12 #include <sys/errno.h>
13 #include <sys/kernel.h>
14 #include <sys/malloc.h>
15 #include <sys/module.h>
16 #include <sys/queue.h>
17 #include <sys/rman.h>
18 #include <sys/sysctl.h>
19 #include <sys/systm.h>
20 
21 #include <machine/bus.h>
22 #include <machine/resource.h>
23 
24 #include <dev/pci/pcireg.h>
25 #include <dev/pci/pcivar.h>
26 
27 #include <cam/cam.h>
28 #include <cam/cam_ccb.h>
29 #include <cam/cam_debug.h>
30 #include <cam/cam_sim.h>
31 #include <cam/cam_xpt_sim.h>
32 #include <cam/scsi/scsi_message.h>
33 
34 #include "pvscsi.h"
35 
36 #define	PVSCSI_DEFAULT_NUM_PAGES_REQ_RING	8
37 #define	PVSCSI_SENSE_LENGTH			256
38 
39 MALLOC_DECLARE(M_PVSCSI);
40 MALLOC_DEFINE(M_PVSCSI, "pvscsi", "PVSCSI memory");
41 
42 #ifdef PVSCSI_DEBUG_LOGGING
43 #define	DEBUG_PRINTF(level, dev, fmt, ...)				\
44 	do {								\
45 		if (pvscsi_log_level >= (level)) {			\
46 			device_printf((dev), (fmt), ##__VA_ARGS__);	\
47 		}							\
48 	} while(0)
49 #else
50 #define DEBUG_PRINTF(level, dev, fmt, ...)
51 #endif /* PVSCSI_DEBUG_LOGGING */
52 
53 #define	ccb_pvscsi_hcb	spriv_ptr0
54 #define	ccb_pvscsi_sc	spriv_ptr1
55 
56 struct pvscsi_softc;
57 struct pvscsi_hcb;
58 struct pvscsi_dma;
59 
60 static inline uint32_t pvscsi_reg_read(struct pvscsi_softc *sc,
61     uint32_t offset);
62 static inline void pvscsi_reg_write(struct pvscsi_softc *sc, uint32_t offset,
63     uint32_t val);
64 static inline uint32_t pvscsi_read_intr_status(struct pvscsi_softc *sc);
65 static inline void pvscsi_write_intr_status(struct pvscsi_softc *sc,
66     uint32_t val);
67 static inline void pvscsi_intr_enable(struct pvscsi_softc *sc);
68 static inline void pvscsi_intr_disable(struct pvscsi_softc *sc);
69 static void pvscsi_kick_io(struct pvscsi_softc *sc, uint8_t cdb0);
70 static void pvscsi_write_cmd(struct pvscsi_softc *sc, uint32_t cmd, void *data,
71     uint32_t len);
72 static uint32_t pvscsi_get_max_targets(struct pvscsi_softc *sc);
73 static int pvscsi_setup_req_call(struct pvscsi_softc *sc, uint32_t enable);
74 static void pvscsi_setup_rings(struct pvscsi_softc *sc);
75 static void pvscsi_setup_msg_ring(struct pvscsi_softc *sc);
76 static int pvscsi_hw_supports_msg(struct pvscsi_softc *sc);
77 
78 static void pvscsi_timeout(void *arg);
79 static void pvscsi_freeze(struct pvscsi_softc *sc);
80 static void pvscsi_adapter_reset(struct pvscsi_softc *sc);
81 static void pvscsi_bus_reset(struct pvscsi_softc *sc);
82 static void pvscsi_device_reset(struct pvscsi_softc *sc, uint32_t target);
83 static void pvscsi_abort(struct pvscsi_softc *sc, uint32_t target,
84     union ccb *ccb);
85 
86 static void pvscsi_process_completion(struct pvscsi_softc *sc,
87     struct pvscsi_ring_cmp_desc *e);
88 static void pvscsi_process_cmp_ring(struct pvscsi_softc *sc);
89 static void pvscsi_process_msg(struct pvscsi_softc *sc,
90     struct pvscsi_ring_msg_desc *e);
91 static void pvscsi_process_msg_ring(struct pvscsi_softc *sc);
92 
93 static void pvscsi_intr_locked(struct pvscsi_softc *sc);
94 static void pvscsi_intr(void *xsc);
95 static void pvscsi_poll(struct cam_sim *sim);
96 
97 static void pvscsi_execute_ccb(void *arg, bus_dma_segment_t *segs, int nseg,
98     int error);
99 static void pvscsi_action(struct cam_sim *sim, union ccb *ccb);
100 
101 static inline uint64_t pvscsi_hcb_to_context(struct pvscsi_softc *sc,
102     struct pvscsi_hcb *hcb);
103 static inline struct pvscsi_hcb* pvscsi_context_to_hcb(struct pvscsi_softc *sc,
104     uint64_t context);
105 static struct pvscsi_hcb * pvscsi_hcb_get(struct pvscsi_softc *sc);
106 static void pvscsi_hcb_put(struct pvscsi_softc *sc, struct pvscsi_hcb *hcb);
107 
108 static void pvscsi_dma_cb(void *arg, bus_dma_segment_t *segs, int nseg,
109     int error);
110 static void pvscsi_dma_free(struct pvscsi_softc *sc, struct pvscsi_dma *dma);
111 static int pvscsi_dma_alloc(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
112     bus_size_t size, bus_size_t alignment);
113 static int pvscsi_dma_alloc_ppns(struct pvscsi_softc *sc,
114     struct pvscsi_dma *dma, uint64_t *ppn_list, uint32_t num_pages);
115 static void pvscsi_dma_free_per_hcb(struct pvscsi_softc *sc,
116     uint32_t hcbs_allocated);
117 static int pvscsi_dma_alloc_per_hcb(struct pvscsi_softc *sc);
118 static void pvscsi_free_rings(struct pvscsi_softc *sc);
119 static int pvscsi_allocate_rings(struct pvscsi_softc *sc);
120 static void pvscsi_free_interrupts(struct pvscsi_softc *sc);
121 static int pvscsi_setup_interrupts(struct pvscsi_softc *sc);
122 static void pvscsi_free_all(struct pvscsi_softc *sc);
123 
124 static int pvscsi_attach(device_t dev);
125 static int pvscsi_detach(device_t dev);
126 static int pvscsi_probe(device_t dev);
127 static int pvscsi_shutdown(device_t dev);
128 static int pvscsi_get_tunable(struct pvscsi_softc *sc, char *name, int value);
129 
130 
131 #ifdef PVSCSI_DEBUG_LOGGING
132 static int pvscsi_log_level = 0;
133 static SYSCTL_NODE(_hw, OID_AUTO, pvscsi, CTLFLAG_RD, 0,
134     "PVSCSI driver parameters");
135 SYSCTL_INT(_hw_pvscsi, OID_AUTO, log_level, CTLFLAG_RWTUN, &pvscsi_log_level,
136     0, "PVSCSI debug log level");
137 #endif
138 
139 static int pvscsi_request_ring_pages = 0;
140 TUNABLE_INT("hw.pvscsi.request_ring_pages", &pvscsi_request_ring_pages);
141 
142 static int pvscsi_use_msg = 1;
143 TUNABLE_INT("hw.pvscsi.use_msg", &pvscsi_use_msg);
144 
145 static int pvscsi_use_msi = 1;
146 TUNABLE_INT("hw.pvscsi.use_msi", &pvscsi_use_msi);
147 
148 static int pvscsi_use_msix = 1;
149 TUNABLE_INT("hw.pvscsi.use_msix", &pvscsi_use_msix);
150 
151 static int pvscsi_use_req_call_threshold = 1;
152 TUNABLE_INT("hw.pvscsi.use_req_call_threshold", &pvscsi_use_req_call_threshold);
153 
154 static int pvscsi_max_queue_depth = 0;
155 TUNABLE_INT("hw.pvscsi.max_queue_depth", &pvscsi_max_queue_depth);
156 
157 
158 struct pvscsi_sg_list {
159 	struct pvscsi_sg_element sge[PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT];
160 };
161 
162 
163 #define	PVSCSI_ABORT_TIMEOUT	2
164 #define	PVSCSI_RESET_TIMEOUT	10
165 
166 #define	PVSCSI_HCB_NONE		0
167 #define	PVSCSI_HCB_ABORT	1
168 #define	PVSCSI_HCB_DEVICE_RESET	2
169 #define	PVSCSI_HCB_BUS_RESET	3
170 
171 struct pvscsi_hcb {
172 	union ccb			*ccb;
173 	struct pvscsi_ring_req_desc	*e;
174 	int				 recovery;
175 	SLIST_ENTRY(pvscsi_hcb)		 links;
176 
177 	struct callout			 callout;
178 	bus_dmamap_t			 dma_map;
179 	void				*sense_buffer;
180 	bus_addr_t			 sense_buffer_paddr;
181 	struct pvscsi_sg_list		*sg_list;
182 	bus_addr_t			 sg_list_paddr;
183 };
184 
185 struct pvscsi_dma
186 {
187 	bus_dma_tag_t	 tag;
188 	bus_dmamap_t	 map;
189 	void		*vaddr;
190 	bus_addr_t	 paddr;
191 	bus_size_t	 size;
192 };
193 
194 struct pvscsi_softc {
195 	device_t		 dev;
196 	struct mtx		 lock;
197 	struct cam_sim		*sim;
198 	struct cam_path		*bus_path;
199 	int			 frozen;
200 	struct pvscsi_rings_state	*rings_state;
201 	struct pvscsi_ring_req_desc	*req_ring;
202 	struct pvscsi_ring_cmp_desc	*cmp_ring;
203 	struct pvscsi_ring_msg_desc	*msg_ring;
204 	uint32_t		 hcb_cnt;
205 	struct pvscsi_hcb	*hcbs;
206 	SLIST_HEAD(, pvscsi_hcb)	free_list;
207 	bus_dma_tag_t		parent_dmat;
208 	bus_dma_tag_t		buffer_dmat;
209 
210 	bool		 use_msg;
211 	uint32_t	 max_targets;
212 	int		 mm_rid;
213 	struct resource	*mm_res;
214 	int		 irq_id;
215 	struct resource	*irq_res;
216 	void		*irq_handler;
217 	int		 use_req_call_threshold;
218 	int		 use_msi_or_msix;
219 
220 	uint64_t	rings_state_ppn;
221 	uint32_t	req_ring_num_pages;
222 	uint64_t	req_ring_ppn[PVSCSI_MAX_NUM_PAGES_REQ_RING];
223 	uint32_t	cmp_ring_num_pages;
224 	uint64_t	cmp_ring_ppn[PVSCSI_MAX_NUM_PAGES_CMP_RING];
225 	uint32_t	msg_ring_num_pages;
226 	uint64_t	msg_ring_ppn[PVSCSI_MAX_NUM_PAGES_MSG_RING];
227 
228 	struct	pvscsi_dma rings_state_dma;
229 	struct	pvscsi_dma req_ring_dma;
230 	struct	pvscsi_dma cmp_ring_dma;
231 	struct	pvscsi_dma msg_ring_dma;
232 
233 	struct	pvscsi_dma sg_list_dma;
234 	struct	pvscsi_dma sense_buffer_dma;
235 };
236 
237 static int pvscsi_get_tunable(struct pvscsi_softc *sc, char *name, int value)
238 {
239 	char cfg[64];
240 
241 	snprintf(cfg, sizeof(cfg), "hw.pvscsi.%d.%s", device_get_unit(sc->dev),
242 	    name);
243 	TUNABLE_INT_FETCH(cfg, &value);
244 
245 	return (value);
246 }
247 
248 static void
249 pvscsi_freeze(struct pvscsi_softc *sc)
250 {
251 
252 	if (!sc->frozen) {
253 		xpt_freeze_simq(sc->sim, 1);
254 		sc->frozen = 1;
255 	}
256 }
257 
258 static inline uint32_t
259 pvscsi_reg_read(struct pvscsi_softc *sc, uint32_t offset)
260 {
261 
262 	return (bus_read_4(sc->mm_res, offset));
263 }
264 
265 static inline void
266 pvscsi_reg_write(struct pvscsi_softc *sc, uint32_t offset, uint32_t val)
267 {
268 
269 	bus_write_4(sc->mm_res, offset, val);
270 }
271 
272 static inline uint32_t
273 pvscsi_read_intr_status(struct pvscsi_softc *sc)
274 {
275 
276 	return (pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_INTR_STATUS));
277 }
278 
279 static inline void
280 pvscsi_write_intr_status(struct pvscsi_softc *sc, uint32_t val)
281 {
282 
283 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_STATUS, val);
284 }
285 
286 static inline void
287 pvscsi_intr_enable(struct pvscsi_softc *sc)
288 {
289 	uint32_t mask;
290 
291 	mask = PVSCSI_INTR_CMPL_MASK;
292 	if (sc->use_msg) {
293 		mask |= PVSCSI_INTR_MSG_MASK;
294 	}
295 
296 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_MASK, mask);
297 }
298 
299 static inline void
300 pvscsi_intr_disable(struct pvscsi_softc *sc)
301 {
302 
303 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_MASK, 0);
304 }
305 
306 static void
307 pvscsi_kick_io(struct pvscsi_softc *sc, uint8_t cdb0)
308 {
309 	struct pvscsi_rings_state *s;
310 
311 	if (cdb0 == READ_6  || cdb0 == READ_10  ||
312 	    cdb0 == READ_12  || cdb0 == READ_16 ||
313 	    cdb0 == WRITE_6 || cdb0 == WRITE_10 ||
314 	    cdb0 == WRITE_12 || cdb0 == WRITE_16) {
315 		s = sc->rings_state;
316 
317 		if (!sc->use_req_call_threshold ||
318 		    (s->req_prod_idx - s->req_cons_idx) >=
319 		     s->req_call_threshold) {
320 			pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_KICK_RW_IO, 0);
321 		}
322 	} else {
323 		pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_KICK_NON_RW_IO, 0);
324 	}
325 }
326 
327 static void
328 pvscsi_write_cmd(struct pvscsi_softc *sc, uint32_t cmd, void *data,
329 		 uint32_t len)
330 {
331 	uint32_t *data_ptr;
332 	int i;
333 
334 	KASSERT(len % sizeof(uint32_t) == 0,
335 		("command size not a multiple of 4"));
336 
337 	data_ptr = data;
338 	len /= sizeof(uint32_t);
339 
340 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND, cmd);
341 	for (i = 0; i < len; ++i) {
342 		pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND_DATA,
343 		   data_ptr[i]);
344 	}
345 }
346 
347 static inline uint64_t pvscsi_hcb_to_context(struct pvscsi_softc *sc,
348     struct pvscsi_hcb *hcb)
349 {
350 
351 	/* Offset by 1 because context must not be 0 */
352 	return (hcb - sc->hcbs + 1);
353 }
354 
355 static inline struct pvscsi_hcb* pvscsi_context_to_hcb(struct pvscsi_softc *sc,
356     uint64_t context)
357 {
358 
359 	return (sc->hcbs + (context - 1));
360 }
361 
362 static struct pvscsi_hcb *
363 pvscsi_hcb_get(struct pvscsi_softc *sc)
364 {
365 	struct pvscsi_hcb *hcb;
366 
367 	mtx_assert(&sc->lock, MA_OWNED);
368 
369 	hcb = SLIST_FIRST(&sc->free_list);
370 	if (hcb) {
371 		SLIST_REMOVE_HEAD(&sc->free_list, links);
372 	}
373 
374 	return (hcb);
375 }
376 
377 static void
378 pvscsi_hcb_put(struct pvscsi_softc *sc, struct pvscsi_hcb *hcb)
379 {
380 
381 	mtx_assert(&sc->lock, MA_OWNED);
382 	hcb->ccb = NULL;
383 	hcb->e = NULL;
384 	hcb->recovery = PVSCSI_HCB_NONE;
385 	SLIST_INSERT_HEAD(&sc->free_list, hcb, links);
386 }
387 
388 static uint32_t
389 pvscsi_get_max_targets(struct pvscsi_softc *sc)
390 {
391 	uint32_t max_targets;
392 
393 	pvscsi_write_cmd(sc, PVSCSI_CMD_GET_MAX_TARGETS, NULL, 0);
394 
395 	max_targets = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
396 
397 	if (max_targets == ~0) {
398 		max_targets = 16;
399 	}
400 
401 	return (max_targets);
402 }
403 
404 static int pvscsi_setup_req_call(struct pvscsi_softc *sc, uint32_t enable)
405 {
406 	uint32_t status;
407 	struct pvscsi_cmd_desc_setup_req_call cmd;
408 
409 	if (!pvscsi_get_tunable(sc, "pvscsi_use_req_call_threshold",
410 	    pvscsi_use_req_call_threshold)) {
411 		return (0);
412 	}
413 
414 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND,
415 	    PVSCSI_CMD_SETUP_REQCALLTHRESHOLD);
416 	status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
417 
418 	if (status != -1) {
419 		bzero(&cmd, sizeof(cmd));
420 		cmd.enable = enable;
421 		pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_REQCALLTHRESHOLD,
422 		    &cmd, sizeof(cmd));
423 		status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
424 
425 		return (status != 0);
426 	} else {
427 		return (0);
428 	}
429 }
430 
431 static void
432 pvscsi_dma_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
433 {
434 	bus_addr_t *dest;
435 
436 	KASSERT(nseg == 1, ("more than one segment"));
437 
438 	dest = arg;
439 
440 	if (!error) {
441 		*dest = segs->ds_addr;
442 	}
443 }
444 
445 static void
446 pvscsi_dma_free(struct pvscsi_softc *sc, struct pvscsi_dma *dma)
447 {
448 
449 	if (dma->tag != NULL) {
450 		if (dma->paddr != 0) {
451 			bus_dmamap_unload(dma->tag, dma->map);
452 		}
453 
454 		if (dma->vaddr != NULL) {
455 			bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
456 		}
457 
458 		bus_dma_tag_destroy(dma->tag);
459 	}
460 
461 	bzero(dma, sizeof(*dma));
462 }
463 
464 static int
465 pvscsi_dma_alloc(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
466     bus_size_t size, bus_size_t alignment)
467 {
468 	int error;
469 
470 	bzero(dma, sizeof(*dma));
471 
472 	error = bus_dma_tag_create(sc->parent_dmat, alignment, 0,
473 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, size, 1, size,
474 	    BUS_DMA_ALLOCNOW, NULL, NULL, &dma->tag);
475 	if (error) {
476 		device_printf(sc->dev, "error creating dma tag, error %d\n",
477 		    error);
478 		goto fail;
479 	}
480 
481 	error = bus_dmamem_alloc(dma->tag, &dma->vaddr,
482 	    BUS_DMA_NOWAIT | BUS_DMA_ZERO, &dma->map);
483 	if (error) {
484 		device_printf(sc->dev, "error allocating dma mem, error %d\n",
485 		    error);
486 		goto fail;
487 	}
488 
489 	error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size,
490 	    pvscsi_dma_cb, &dma->paddr, BUS_DMA_NOWAIT);
491 	if (error) {
492 		device_printf(sc->dev, "error mapping dma mam, error %d\n",
493 		    error);
494 		goto fail;
495 	}
496 
497 	dma->size = size;
498 
499 fail:
500 	if (error) {
501 		pvscsi_dma_free(sc, dma);
502 	}
503 	return (error);
504 }
505 
506 static int
507 pvscsi_dma_alloc_ppns(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
508     uint64_t *ppn_list, uint32_t num_pages)
509 {
510 	int error;
511 	uint32_t i;
512 	uint64_t ppn;
513 
514 	error = pvscsi_dma_alloc(sc, dma, num_pages * PAGE_SIZE, PAGE_SIZE);
515 	if (error) {
516 		device_printf(sc->dev, "Error allocating pages, error %d\n",
517 		    error);
518 		return (error);
519 	}
520 
521 	ppn = dma->paddr >> PAGE_SHIFT;
522 	for (i = 0; i < num_pages; i++) {
523 		ppn_list[i] = ppn + i;
524 	}
525 
526 	return (0);
527 }
528 
529 static void
530 pvscsi_dma_free_per_hcb(struct pvscsi_softc *sc, uint32_t hcbs_allocated)
531 {
532 	int i;
533 	int lock_owned;
534 	struct pvscsi_hcb *hcb;
535 
536 	lock_owned = mtx_owned(&sc->lock);
537 
538 	if (lock_owned) {
539 		mtx_unlock(&sc->lock);
540 	}
541 	for (i = 0; i < hcbs_allocated; ++i) {
542 		hcb = sc->hcbs + i;
543 		callout_drain(&hcb->callout);
544 	};
545 	if (lock_owned) {
546 		mtx_lock(&sc->lock);
547 	}
548 
549 	for (i = 0; i < hcbs_allocated; ++i) {
550 		hcb = sc->hcbs + i;
551 		bus_dmamap_destroy(sc->buffer_dmat, hcb->dma_map);
552 	};
553 
554 	pvscsi_dma_free(sc, &sc->sense_buffer_dma);
555 	pvscsi_dma_free(sc, &sc->sg_list_dma);
556 }
557 
558 static int
559 pvscsi_dma_alloc_per_hcb(struct pvscsi_softc *sc)
560 {
561 	int i;
562 	int error;
563 	struct pvscsi_hcb *hcb;
564 
565 	i = 0;
566 
567 	error = pvscsi_dma_alloc(sc, &sc->sg_list_dma,
568 	    sizeof(struct pvscsi_sg_list) * sc->hcb_cnt, 1);
569 	if (error) {
570 		device_printf(sc->dev,
571 		    "Error allocation sg list DMA memory, error %d\n", error);
572 		goto fail;
573 	}
574 
575 	error = pvscsi_dma_alloc(sc, &sc->sense_buffer_dma,
576 				 PVSCSI_SENSE_LENGTH * sc->hcb_cnt, 1);
577 	if (error) {
578 		device_printf(sc->dev,
579 		    "Error allocation sg list DMA memory, error %d\n", error);
580 		goto fail;
581 	}
582 
583 	for (i = 0; i < sc->hcb_cnt; ++i) {
584 		hcb = sc->hcbs + i;
585 
586 		error = bus_dmamap_create(sc->buffer_dmat, 0, &hcb->dma_map);
587 		if (error) {
588 			device_printf(sc->dev,
589 			    "Error creating dma map for hcb %d, error %d\n",
590 			    i, error);
591 			goto fail;
592 		}
593 
594 		hcb->sense_buffer =
595 		    (void *)((caddr_t)sc->sense_buffer_dma.vaddr +
596 		    PVSCSI_SENSE_LENGTH * i);
597 		hcb->sense_buffer_paddr =
598 		    sc->sense_buffer_dma.paddr + PVSCSI_SENSE_LENGTH * i;
599 
600 		hcb->sg_list =
601 		    (struct pvscsi_sg_list *)((caddr_t)sc->sg_list_dma.vaddr +
602 		    sizeof(struct pvscsi_sg_list) * i);
603 		hcb->sg_list_paddr =
604 		    sc->sg_list_dma.paddr + sizeof(struct pvscsi_sg_list) * i;
605 
606 		callout_init_mtx(&hcb->callout, &sc->lock, 0);
607 	}
608 
609 	SLIST_INIT(&sc->free_list);
610 	for (i = (sc->hcb_cnt - 1); i >= 0; --i) {
611 		hcb = sc->hcbs + i;
612 		SLIST_INSERT_HEAD(&sc->free_list, hcb, links);
613 	}
614 
615 fail:
616 	if (error) {
617 		pvscsi_dma_free_per_hcb(sc, i);
618 	}
619 
620 	return (error);
621 }
622 
623 static void
624 pvscsi_free_rings(struct pvscsi_softc *sc)
625 {
626 
627 	pvscsi_dma_free(sc, &sc->rings_state_dma);
628 	pvscsi_dma_free(sc, &sc->req_ring_dma);
629 	pvscsi_dma_free(sc, &sc->cmp_ring_dma);
630 	if (sc->use_msg) {
631 		pvscsi_dma_free(sc, &sc->msg_ring_dma);
632 	}
633 }
634 
635 static int
636 pvscsi_allocate_rings(struct pvscsi_softc *sc)
637 {
638 	int error;
639 
640 	error = pvscsi_dma_alloc_ppns(sc, &sc->rings_state_dma,
641 	    &sc->rings_state_ppn, 1);
642 	if (error) {
643 		device_printf(sc->dev,
644 		    "Error allocating rings state, error = %d\n", error);
645 		goto fail;
646 	}
647 	sc->rings_state = sc->rings_state_dma.vaddr;
648 
649 	error = pvscsi_dma_alloc_ppns(sc, &sc->req_ring_dma, sc->req_ring_ppn,
650 	    sc->req_ring_num_pages);
651 	if (error) {
652 		device_printf(sc->dev,
653 		    "Error allocating req ring pages, error = %d\n", error);
654 		goto fail;
655 	}
656 	sc->req_ring = sc->req_ring_dma.vaddr;
657 
658 	error = pvscsi_dma_alloc_ppns(sc, &sc->cmp_ring_dma, sc->cmp_ring_ppn,
659 	    sc->cmp_ring_num_pages);
660 	if (error) {
661 		device_printf(sc->dev,
662 		    "Error allocating cmp ring pages, error = %d\n", error);
663 		goto fail;
664 	}
665 	sc->cmp_ring = sc->cmp_ring_dma.vaddr;
666 
667 	sc->msg_ring = NULL;
668 	if (sc->use_msg) {
669 		error = pvscsi_dma_alloc_ppns(sc, &sc->msg_ring_dma,
670 		    sc->msg_ring_ppn, sc->msg_ring_num_pages);
671 		if (error) {
672 			device_printf(sc->dev,
673 			    "Error allocating cmp ring pages, error = %d\n",
674 			    error);
675 			goto fail;
676 		}
677 		sc->msg_ring = sc->msg_ring_dma.vaddr;
678 	}
679 
680 	DEBUG_PRINTF(1, sc->dev, "rings_state: %p\n", sc->rings_state);
681 	DEBUG_PRINTF(1, sc->dev, "req_ring: %p - %u pages\n", sc->req_ring,
682 	    sc->req_ring_num_pages);
683 	DEBUG_PRINTF(1, sc->dev, "cmp_ring: %p - %u pages\n", sc->cmp_ring,
684 	    sc->cmp_ring_num_pages);
685 	DEBUG_PRINTF(1, sc->dev, "msg_ring: %p - %u pages\n", sc->msg_ring,
686 	    sc->msg_ring_num_pages);
687 
688 fail:
689 	if (error) {
690 		pvscsi_free_rings(sc);
691 	}
692 	return (error);
693 }
694 
695 static void
696 pvscsi_setup_rings(struct pvscsi_softc *sc)
697 {
698 	struct pvscsi_cmd_desc_setup_rings cmd;
699 	uint32_t i;
700 
701 	bzero(&cmd, sizeof(cmd));
702 
703 	cmd.rings_state_ppn = sc->rings_state_ppn;
704 
705 	cmd.req_ring_num_pages = sc->req_ring_num_pages;
706 	for (i = 0; i < sc->req_ring_num_pages; ++i) {
707 		cmd.req_ring_ppns[i] = sc->req_ring_ppn[i];
708 	}
709 
710 	cmd.cmp_ring_num_pages = sc->cmp_ring_num_pages;
711 	for (i = 0; i < sc->cmp_ring_num_pages; ++i) {
712 		cmd.cmp_ring_ppns[i] = sc->cmp_ring_ppn[i];
713 	}
714 
715 	pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_RINGS, &cmd, sizeof(cmd));
716 }
717 
718 static int
719 pvscsi_hw_supports_msg(struct pvscsi_softc *sc)
720 {
721 	uint32_t status;
722 
723 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND,
724 	    PVSCSI_CMD_SETUP_MSG_RING);
725 	status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
726 
727 	return (status != -1);
728 }
729 
730 static void
731 pvscsi_setup_msg_ring(struct pvscsi_softc *sc)
732 {
733 	struct pvscsi_cmd_desc_setup_msg_ring cmd;
734 	uint32_t i;
735 
736 	KASSERT(sc->use_msg, ("msg is not being used"));
737 
738 	bzero(&cmd, sizeof(cmd));
739 
740 	cmd.num_pages = sc->msg_ring_num_pages;
741 	for (i = 0; i < sc->msg_ring_num_pages; ++i) {
742 		cmd.ring_ppns[i] = sc->msg_ring_ppn[i];
743 	}
744 
745 	pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_MSG_RING, &cmd, sizeof(cmd));
746 }
747 
748 static void
749 pvscsi_adapter_reset(struct pvscsi_softc *sc)
750 {
751 	uint32_t val;
752 
753 	device_printf(sc->dev, "Adapter Reset\n");
754 
755 	pvscsi_write_cmd(sc, PVSCSI_CMD_ADAPTER_RESET, NULL, 0);
756 	val = pvscsi_read_intr_status(sc);
757 
758 	DEBUG_PRINTF(2, sc->dev, "adapter reset done: %u\n", val);
759 }
760 
761 static void
762 pvscsi_bus_reset(struct pvscsi_softc *sc)
763 {
764 
765 	device_printf(sc->dev, "Bus Reset\n");
766 
767 	pvscsi_write_cmd(sc, PVSCSI_CMD_RESET_BUS, NULL, 0);
768 	pvscsi_process_cmp_ring(sc);
769 
770 	DEBUG_PRINTF(2, sc->dev, "bus reset done\n");
771 }
772 
773 static void
774 pvscsi_device_reset(struct pvscsi_softc *sc, uint32_t target)
775 {
776 	struct pvscsi_cmd_desc_reset_device cmd;
777 
778 	memset(&cmd, 0, sizeof(cmd));
779 
780 	cmd.target = target;
781 
782 	device_printf(sc->dev, "Device reset for target %u\n", target);
783 
784 	pvscsi_write_cmd(sc, PVSCSI_CMD_RESET_DEVICE, &cmd, sizeof cmd);
785 	pvscsi_process_cmp_ring(sc);
786 
787 	DEBUG_PRINTF(2, sc->dev, "device reset done\n");
788 }
789 
790 static void
791 pvscsi_abort(struct pvscsi_softc *sc, uint32_t target, union ccb *ccb)
792 {
793 	struct pvscsi_cmd_desc_abort_cmd cmd;
794 	struct pvscsi_hcb *hcb;
795 	uint64_t context;
796 
797 	pvscsi_process_cmp_ring(sc);
798 
799 	hcb = ccb->ccb_h.ccb_pvscsi_hcb;
800 
801 	if (hcb != NULL) {
802 		context = pvscsi_hcb_to_context(sc, hcb);
803 
804 		memset(&cmd, 0, sizeof cmd);
805 		cmd.target = target;
806 		cmd.context = context;
807 
808 		device_printf(sc->dev, "Abort for target %u context %llx\n",
809 		    target, (unsigned long long)context);
810 
811 		pvscsi_write_cmd(sc, PVSCSI_CMD_ABORT_CMD, &cmd, sizeof(cmd));
812 		pvscsi_process_cmp_ring(sc);
813 
814 		DEBUG_PRINTF(2, sc->dev, "abort done\n");
815 	} else {
816 		DEBUG_PRINTF(1, sc->dev,
817 		    "Target %u ccb %p not found for abort\n", target, ccb);
818 	}
819 }
820 
821 static int
822 pvscsi_probe(device_t dev)
823 {
824 
825 	if (pci_get_vendor(dev) == PCI_VENDOR_ID_VMWARE &&
826 	    pci_get_device(dev) == PCI_DEVICE_ID_VMWARE_PVSCSI) {
827 		device_set_desc(dev, "VMware Paravirtual SCSI Controller");
828 		return (BUS_PROBE_DEFAULT);
829 	}
830 	return (ENXIO);
831 }
832 
833 static int
834 pvscsi_shutdown(device_t dev)
835 {
836 
837 	return (0);
838 }
839 
840 static void
841 pvscsi_timeout(void *arg)
842 {
843 	struct pvscsi_hcb *hcb;
844 	struct pvscsi_softc *sc;
845 	union ccb *ccb;
846 
847 	hcb = arg;
848 	ccb = hcb->ccb;
849 
850 	if (ccb == NULL) {
851 		/* Already completed */
852 		return;
853 	}
854 
855 	sc = ccb->ccb_h.ccb_pvscsi_sc;
856 	mtx_assert(&sc->lock, MA_OWNED);
857 
858 	device_printf(sc->dev, "Command timed out hcb=%p ccb=%p.\n", hcb, ccb);
859 
860 	switch (hcb->recovery) {
861 	case PVSCSI_HCB_NONE:
862 		hcb->recovery = PVSCSI_HCB_ABORT;
863 		pvscsi_abort(sc, ccb->ccb_h.target_id, ccb);
864 		callout_reset_sbt(&hcb->callout, PVSCSI_ABORT_TIMEOUT * SBT_1S,
865 		    0, pvscsi_timeout, hcb, 0);
866 		break;
867 	case PVSCSI_HCB_ABORT:
868 		hcb->recovery = PVSCSI_HCB_DEVICE_RESET;
869 		pvscsi_freeze(sc);
870 		pvscsi_device_reset(sc, ccb->ccb_h.target_id);
871 		callout_reset_sbt(&hcb->callout, PVSCSI_RESET_TIMEOUT * SBT_1S,
872 		    0, pvscsi_timeout, hcb, 0);
873 		break;
874 	case PVSCSI_HCB_DEVICE_RESET:
875 		hcb->recovery = PVSCSI_HCB_BUS_RESET;
876 		pvscsi_freeze(sc);
877 		pvscsi_bus_reset(sc);
878 		callout_reset_sbt(&hcb->callout, PVSCSI_RESET_TIMEOUT * SBT_1S,
879 		    0, pvscsi_timeout, hcb, 0);
880 		break;
881 	case PVSCSI_HCB_BUS_RESET:
882 		pvscsi_freeze(sc);
883 		pvscsi_adapter_reset(sc);
884 		break;
885 	};
886 }
887 
888 static void
889 pvscsi_process_completion(struct pvscsi_softc *sc,
890     struct pvscsi_ring_cmp_desc *e)
891 {
892 	struct pvscsi_hcb *hcb;
893 	union ccb *ccb;
894 	uint32_t status;
895 	uint32_t btstat;
896 	uint32_t sdstat;
897 	bus_dmasync_op_t op;
898 
899 	hcb = pvscsi_context_to_hcb(sc, e->context);
900 
901 	callout_stop(&hcb->callout);
902 
903 	ccb = hcb->ccb;
904 
905 	btstat = e->host_status;
906 	sdstat = e->scsi_status;
907 
908 	ccb->csio.scsi_status = sdstat;
909 	ccb->csio.resid = ccb->csio.dxfer_len - e->data_len;
910 
911 	if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
912 		if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
913 			op = BUS_DMASYNC_POSTREAD;
914 		} else {
915 			op = BUS_DMASYNC_POSTWRITE;
916 		}
917 		bus_dmamap_sync(sc->buffer_dmat, hcb->dma_map, op);
918 		bus_dmamap_unload(sc->buffer_dmat, hcb->dma_map);
919 	}
920 
921 	if (btstat == BTSTAT_SUCCESS && sdstat == SCSI_STATUS_OK) {
922 		DEBUG_PRINTF(3, sc->dev,
923 		    "completing command context %llx success\n",
924 		    (unsigned long long)e->context);
925 		ccb->csio.resid = 0;
926 		status = CAM_REQ_CMP;
927 	} else {
928 		switch (btstat) {
929 		case BTSTAT_SUCCESS:
930 		case BTSTAT_LINKED_COMMAND_COMPLETED:
931 		case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG:
932 			switch (sdstat) {
933 			case SCSI_STATUS_OK:
934 				ccb->csio.resid = 0;
935 				status = CAM_REQ_CMP;
936 				break;
937 			case SCSI_STATUS_CHECK_COND:
938 				status = CAM_SCSI_STATUS_ERROR;
939 
940 				if (ccb->csio.sense_len != 0) {
941 					status |= CAM_AUTOSNS_VALID;
942 
943 					memset(&ccb->csio.sense_data, 0,
944 					    sizeof(ccb->csio.sense_data));
945 					memcpy(&ccb->csio.sense_data,
946 					    hcb->sense_buffer,
947 					    MIN(ccb->csio.sense_len,
948 						e->sense_len));
949 				}
950 				break;
951 			case SCSI_STATUS_BUSY:
952 			case SCSI_STATUS_QUEUE_FULL:
953 				status = CAM_REQUEUE_REQ;
954 				break;
955 			case SCSI_STATUS_CMD_TERMINATED:
956 			case SCSI_STATUS_TASK_ABORTED:
957 				status = CAM_REQ_ABORTED;
958 				break;
959 			default:
960 				DEBUG_PRINTF(1, sc->dev,
961 				    "ccb: %p sdstat=0x%x\n", ccb, sdstat);
962 				status = CAM_SCSI_STATUS_ERROR;
963 				break;
964 			}
965 			break;
966 		case BTSTAT_SELTIMEO:
967 			status = CAM_SEL_TIMEOUT;
968 			break;
969 		case BTSTAT_DATARUN:
970 		case BTSTAT_DATA_UNDERRUN:
971 			status = CAM_DATA_RUN_ERR;
972 			break;
973 		case BTSTAT_ABORTQUEUE:
974 		case BTSTAT_HATIMEOUT:
975 			status = CAM_REQUEUE_REQ;
976 			break;
977 		case BTSTAT_NORESPONSE:
978 		case BTSTAT_SENTRST:
979 		case BTSTAT_RECVRST:
980 		case BTSTAT_BUSRESET:
981 			status = CAM_SCSI_BUS_RESET;
982 			break;
983 		case BTSTAT_SCSIPARITY:
984 			status = CAM_UNCOR_PARITY;
985 			break;
986 		case BTSTAT_BUSFREE:
987 			status = CAM_UNEXP_BUSFREE;
988 			break;
989 		case BTSTAT_INVPHASE:
990 			status = CAM_SEQUENCE_FAIL;
991 			break;
992 		case BTSTAT_SENSFAILED:
993 			status = CAM_AUTOSENSE_FAIL;
994 			break;
995 		case BTSTAT_LUNMISMATCH:
996 		case BTSTAT_TAGREJECT:
997 		case BTSTAT_DISCONNECT:
998 		case BTSTAT_BADMSG:
999 		case BTSTAT_INVPARAM:
1000 			status = CAM_REQ_CMP_ERR;
1001 			break;
1002 		case BTSTAT_HASOFTWARE:
1003 		case BTSTAT_HAHARDWARE:
1004 			status = CAM_NO_HBA;
1005 			break;
1006 		default:
1007 			device_printf(sc->dev, "unknown hba status: 0x%x\n",
1008 			    btstat);
1009 			status = CAM_NO_HBA;
1010 			break;
1011 		}
1012 
1013 		DEBUG_PRINTF(3, sc->dev,
1014 		    "completing command context %llx btstat %x sdstat %x - status %x\n",
1015 		    (unsigned long long)e->context, btstat, sdstat, status);
1016 	}
1017 
1018 	ccb->ccb_h.ccb_pvscsi_hcb = NULL;
1019 	ccb->ccb_h.ccb_pvscsi_sc = NULL;
1020 	pvscsi_hcb_put(sc, hcb);
1021 
1022 	ccb->ccb_h.status =
1023 	    status | (ccb->ccb_h.status & ~(CAM_STATUS_MASK | CAM_SIM_QUEUED));
1024 
1025 	if (sc->frozen) {
1026 		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
1027 		sc->frozen = 0;
1028 	}
1029 
1030 	if (status != CAM_REQ_CMP) {
1031 		ccb->ccb_h.status |= CAM_DEV_QFRZN;
1032 		xpt_freeze_devq(ccb->ccb_h.path, /*count*/ 1);
1033 	}
1034 	xpt_done(ccb);
1035 }
1036 
1037 static void
1038 pvscsi_process_cmp_ring(struct pvscsi_softc *sc)
1039 {
1040 	struct pvscsi_ring_cmp_desc *ring;
1041 	struct pvscsi_rings_state *s;
1042 	struct pvscsi_ring_cmp_desc *e;
1043 	uint32_t mask;
1044 
1045 	mtx_assert(&sc->lock, MA_OWNED);
1046 
1047 	s = sc->rings_state;
1048 	ring = sc->cmp_ring;
1049 	mask = MASK(s->cmp_num_entries_log2);
1050 
1051 	while (s->cmp_cons_idx != s->cmp_prod_idx) {
1052 		e = ring + (s->cmp_cons_idx & mask);
1053 
1054 		pvscsi_process_completion(sc, e);
1055 
1056 		mb();
1057 		s->cmp_cons_idx++;
1058 	}
1059 }
1060 
1061 static void
1062 pvscsi_process_msg(struct pvscsi_softc *sc, struct pvscsi_ring_msg_desc *e)
1063 {
1064 	struct pvscsi_ring_msg_dev_status_changed *desc;
1065 
1066 	union ccb *ccb;
1067 	switch (e->type) {
1068 	case PVSCSI_MSG_DEV_ADDED:
1069 	case PVSCSI_MSG_DEV_REMOVED: {
1070 		desc = (struct pvscsi_ring_msg_dev_status_changed *)e;
1071 
1072 		device_printf(sc->dev, "MSG: device %s at scsi%u:%u:%u\n",
1073 		    desc->type == PVSCSI_MSG_DEV_ADDED ? "addition" : "removal",
1074 		    desc->bus, desc->target, desc->lun[1]);
1075 
1076 		ccb = xpt_alloc_ccb_nowait();
1077 		if (ccb == NULL) {
1078 			device_printf(sc->dev,
1079 			    "Error allocating CCB for dev change.\n");
1080 			break;
1081 		}
1082 
1083 		if (xpt_create_path(&ccb->ccb_h.path, NULL,
1084 		    cam_sim_path(sc->sim), desc->target, desc->lun[1])
1085 		    != CAM_REQ_CMP) {
1086 			device_printf(sc->dev,
1087 			    "Error creating path for dev change.\n");
1088 			xpt_free_ccb(ccb);
1089 			break;
1090 		}
1091 
1092 		xpt_rescan(ccb);
1093 	} break;
1094 	default:
1095 		device_printf(sc->dev, "Unknown msg type 0x%x\n", e->type);
1096 	};
1097 }
1098 
1099 static void
1100 pvscsi_process_msg_ring(struct pvscsi_softc *sc)
1101 {
1102 	struct pvscsi_ring_msg_desc *ring;
1103 	struct pvscsi_rings_state *s;
1104 	struct pvscsi_ring_msg_desc *e;
1105 	uint32_t mask;
1106 
1107 	mtx_assert(&sc->lock, MA_OWNED);
1108 
1109 	s = sc->rings_state;
1110 	ring = sc->msg_ring;
1111 	mask = MASK(s->msg_num_entries_log2);
1112 
1113 	while (s->msg_cons_idx != s->msg_prod_idx) {
1114 		e = ring + (s->msg_cons_idx & mask);
1115 
1116 		pvscsi_process_msg(sc, e);
1117 
1118 		mb();
1119 		s->msg_cons_idx++;
1120 	}
1121 }
1122 
1123 static void
1124 pvscsi_intr_locked(struct pvscsi_softc *sc)
1125 {
1126 	uint32_t val;
1127 
1128 	mtx_assert(&sc->lock, MA_OWNED);
1129 
1130 	val = pvscsi_read_intr_status(sc);
1131 
1132 	if ((val & PVSCSI_INTR_ALL_SUPPORTED) != 0) {
1133 		pvscsi_write_intr_status(sc, val & PVSCSI_INTR_ALL_SUPPORTED);
1134 		pvscsi_process_cmp_ring(sc);
1135 		if (sc->use_msg) {
1136 			pvscsi_process_msg_ring(sc);
1137 		}
1138 	}
1139 }
1140 
1141 static void
1142 pvscsi_intr(void *xsc)
1143 {
1144 	struct pvscsi_softc *sc;
1145 
1146 	sc = xsc;
1147 
1148 	mtx_assert(&sc->lock, MA_NOTOWNED);
1149 
1150 	mtx_lock(&sc->lock);
1151 	pvscsi_intr_locked(xsc);
1152 	mtx_unlock(&sc->lock);
1153 }
1154 
1155 static void
1156 pvscsi_poll(struct cam_sim *sim)
1157 {
1158 	struct pvscsi_softc *sc;
1159 
1160 	sc = cam_sim_softc(sim);
1161 
1162 	mtx_assert(&sc->lock, MA_OWNED);
1163 	pvscsi_intr_locked(sc);
1164 }
1165 
1166 static void
1167 pvscsi_execute_ccb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
1168 {
1169 	struct pvscsi_hcb *hcb;
1170 	struct pvscsi_ring_req_desc *e;
1171 	union ccb *ccb;
1172 	struct pvscsi_softc *sc;
1173 	struct pvscsi_rings_state *s;
1174 	uint8_t cdb0;
1175 	bus_dmasync_op_t op;
1176 
1177 	hcb = arg;
1178 	ccb = hcb->ccb;
1179 	e = hcb->e;
1180 	sc = ccb->ccb_h.ccb_pvscsi_sc;
1181 	s = sc->rings_state;
1182 
1183 	mtx_assert(&sc->lock, MA_OWNED);
1184 
1185 	if (error) {
1186 		device_printf(sc->dev, "pvscsi_execute_ccb error %d\n", error);
1187 
1188 		if (error == EFBIG) {
1189 			ccb->ccb_h.status = CAM_REQ_TOO_BIG;
1190 		} else {
1191 			ccb->ccb_h.status = CAM_REQ_CMP_ERR;
1192 		}
1193 
1194 		pvscsi_hcb_put(sc, hcb);
1195 		xpt_done(ccb);
1196 		return;
1197 	}
1198 
1199 	e->flags = 0;
1200 	op = 0;
1201 	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
1202 	case CAM_DIR_NONE:
1203 		e->flags |= PVSCSI_FLAG_CMD_DIR_NONE;
1204 		break;
1205 	case CAM_DIR_IN:
1206 		e->flags |= PVSCSI_FLAG_CMD_DIR_TOHOST;
1207 		op = BUS_DMASYNC_PREREAD;
1208 		break;
1209 	case CAM_DIR_OUT:
1210 		e->flags |= PVSCSI_FLAG_CMD_DIR_TODEVICE;
1211 		op = BUS_DMASYNC_PREWRITE;
1212 		break;
1213 	case CAM_DIR_BOTH:
1214 		/* TODO: does this need handling? */
1215 		break;
1216 	}
1217 
1218 	if (nseg != 0) {
1219 		if (nseg > 1) {
1220 			int i;
1221 			struct pvscsi_sg_element *sge;
1222 
1223 			KASSERT(nseg <= PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT,
1224 			    ("too many sg segments"));
1225 
1226 			sge = hcb->sg_list->sge;
1227 			e->flags |= PVSCSI_FLAG_CMD_WITH_SG_LIST;
1228 
1229 			for (i = 0; i < nseg; ++i) {
1230 				sge[i].addr = segs[i].ds_addr;
1231 				sge[i].length = segs[i].ds_len;
1232 				sge[i].flags = 0;
1233 			}
1234 
1235 			e->data_addr = hcb->sg_list_paddr;
1236 		} else {
1237 			e->data_addr = segs->ds_addr;
1238 		}
1239 
1240 		bus_dmamap_sync(sc->buffer_dmat, hcb->dma_map, op);
1241 	} else {
1242 		e->data_addr = 0;
1243 	}
1244 
1245 	cdb0 = e->cdb[0];
1246 	ccb->ccb_h.status |= CAM_SIM_QUEUED;
1247 
1248 	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
1249 		callout_reset_sbt(&hcb->callout, ccb->ccb_h.timeout * SBT_1MS,
1250 		    0, pvscsi_timeout, hcb, 0);
1251 	}
1252 
1253 	mb();
1254 	s->req_prod_idx++;
1255 	pvscsi_kick_io(sc, cdb0);
1256 }
1257 
1258 static void
1259 pvscsi_action(struct cam_sim *sim, union ccb *ccb)
1260 {
1261 	struct pvscsi_softc *sc;
1262 	struct ccb_hdr *ccb_h;
1263 
1264 	sc = cam_sim_softc(sim);
1265 	ccb_h = &ccb->ccb_h;
1266 
1267 	mtx_assert(&sc->lock, MA_OWNED);
1268 
1269 	switch (ccb_h->func_code) {
1270 	case XPT_SCSI_IO:
1271 	{
1272 		struct ccb_scsiio *csio;
1273 		uint32_t req_num_entries_log2;
1274 		struct pvscsi_ring_req_desc *ring;
1275 		struct pvscsi_ring_req_desc *e;
1276 		struct pvscsi_rings_state *s;
1277 		struct pvscsi_hcb *hcb;
1278 
1279 		csio = &ccb->csio;
1280 		ring = sc->req_ring;
1281 		s = sc->rings_state;
1282 
1283 		hcb = NULL;
1284 
1285 		/*
1286 		 * Check if it was completed already (such as aborted
1287 		 * by upper layers)
1288 		 */
1289 		if ((ccb_h->status & CAM_STATUS_MASK) != CAM_REQ_INPROG) {
1290 			xpt_done(ccb);
1291 			return;
1292 		}
1293 
1294 		req_num_entries_log2 = s->req_num_entries_log2;
1295 
1296 		if (s->req_prod_idx - s->cmp_cons_idx >=
1297 		    (1 << req_num_entries_log2)) {
1298 			device_printf(sc->dev,
1299 			    "Not enough room on completion ring.\n");
1300 			pvscsi_freeze(sc);
1301 			ccb_h->status = CAM_REQUEUE_REQ;
1302 			goto finish_ccb;
1303 		}
1304 
1305 		hcb = pvscsi_hcb_get(sc);
1306 		if (hcb == NULL) {
1307 			device_printf(sc->dev, "No free hcbs.\n");
1308 			pvscsi_freeze(sc);
1309 			ccb_h->status = CAM_REQUEUE_REQ;
1310 			goto finish_ccb;
1311 		}
1312 
1313 		hcb->ccb = ccb;
1314 		ccb_h->ccb_pvscsi_hcb = hcb;
1315 		ccb_h->ccb_pvscsi_sc = sc;
1316 
1317 		if (csio->cdb_len > sizeof(e->cdb)) {
1318 			DEBUG_PRINTF(2, sc->dev, "cdb length %u too large\n",
1319 			    csio->cdb_len);
1320 			ccb_h->status = CAM_REQ_INVALID;
1321 			goto finish_ccb;
1322 		}
1323 
1324 		if (ccb_h->flags & CAM_CDB_PHYS) {
1325 			DEBUG_PRINTF(2, sc->dev,
1326 			    "CAM_CDB_PHYS not implemented\n");
1327 			ccb_h->status = CAM_REQ_INVALID;
1328 			goto finish_ccb;
1329 		}
1330 
1331 		e = ring + (s->req_prod_idx & MASK(req_num_entries_log2));
1332 
1333 		e->bus = cam_sim_bus(sim);
1334 		e->target = ccb_h->target_id;
1335 		memset(e->lun, 0, sizeof(e->lun));
1336 		e->lun[1] = ccb_h->target_lun;
1337 		e->data_addr = 0;
1338 		e->data_len = csio->dxfer_len;
1339 		e->vcpu_hint = curcpu;
1340 
1341 		e->cdb_len = csio->cdb_len;
1342 		memcpy(e->cdb, scsiio_cdb_ptr(csio), csio->cdb_len);
1343 
1344 		e->sense_addr = 0;
1345 		e->sense_len = csio->sense_len;
1346 		if (e->sense_len > 0) {
1347 			e->sense_addr = hcb->sense_buffer_paddr;
1348 		}
1349 
1350 		e->tag = MSG_SIMPLE_Q_TAG;
1351 		if (ccb_h->flags & CAM_TAG_ACTION_VALID) {
1352 			e->tag = csio->tag_action;
1353 		}
1354 
1355 		e->context = pvscsi_hcb_to_context(sc, hcb);
1356 		hcb->e = e;
1357 
1358 		DEBUG_PRINTF(3, sc->dev,
1359 		    " queuing command %02x context %llx\n", e->cdb[0],
1360 		    (unsigned long long)e->context);
1361 		bus_dmamap_load_ccb(sc->buffer_dmat, hcb->dma_map, ccb,
1362 		    pvscsi_execute_ccb, hcb, 0);
1363 		break;
1364 
1365 finish_ccb:
1366 		if (hcb != NULL) {
1367 			pvscsi_hcb_put(sc, hcb);
1368 		}
1369 		xpt_done(ccb);
1370 	} break;
1371 	case XPT_ABORT:
1372 	{
1373 		struct pvscsi_hcb *abort_hcb;
1374 		union ccb *abort_ccb;
1375 
1376 		abort_ccb = ccb->cab.abort_ccb;
1377 		abort_hcb = abort_ccb->ccb_h.ccb_pvscsi_hcb;
1378 
1379 		if (abort_hcb->ccb != NULL && abort_hcb->ccb == abort_ccb) {
1380 			if (abort_ccb->ccb_h.func_code == XPT_SCSI_IO) {
1381 				pvscsi_abort(sc, ccb_h->target_id, abort_ccb);
1382 				ccb_h->status = CAM_REQ_CMP;
1383 			} else {
1384 				ccb_h->status = CAM_UA_ABORT;
1385 			}
1386 		} else {
1387 			device_printf(sc->dev,
1388 			    "Could not find hcb for ccb %p (tgt %u)\n",
1389 			    ccb, ccb_h->target_id);
1390 			ccb_h->status = CAM_REQ_CMP;
1391 		}
1392 		xpt_done(ccb);
1393 	} break;
1394 	case XPT_RESET_DEV:
1395 	{
1396 		pvscsi_device_reset(sc, ccb_h->target_id);
1397 		ccb_h->status = CAM_REQ_CMP;
1398 		xpt_done(ccb);
1399 	} break;
1400 	case XPT_RESET_BUS:
1401 	{
1402 		pvscsi_bus_reset(sc);
1403 		ccb_h->status = CAM_REQ_CMP;
1404 		xpt_done(ccb);
1405 	} break;
1406 	case XPT_PATH_INQ:
1407 	{
1408 		struct ccb_pathinq *cpi;
1409 
1410 		cpi = &ccb->cpi;
1411 
1412 		cpi->version_num = 1;
1413 		cpi->hba_inquiry = PI_TAG_ABLE;
1414 		cpi->target_sprt = 0;
1415 		cpi->hba_misc = PIM_NOBUSRESET | PIM_UNMAPPED;
1416 		cpi->hba_eng_cnt = 0;
1417 		/* cpi->vuhba_flags = 0; */
1418 		cpi->max_target = sc->max_targets;
1419 		cpi->max_lun = 0;
1420 		cpi->async_flags = 0;
1421 		cpi->hpath_id = 0;
1422 		cpi->unit_number = cam_sim_unit(sim);
1423 		cpi->bus_id = cam_sim_bus(sim);
1424 		cpi->initiator_id = 7;
1425 		cpi->base_transfer_speed = 750000;
1426 		strlcpy(cpi->sim_vid, "VMware", SIM_IDLEN);
1427 		strlcpy(cpi->hba_vid, "VMware", HBA_IDLEN);
1428 		strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
1429 		cpi->maxio = PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT * PAGE_SIZE;
1430 		cpi->protocol = PROTO_SCSI;
1431 		cpi->protocol_version = SCSI_REV_SPC2;
1432 		cpi->transport = XPORT_SAS;
1433 		cpi->transport_version = 0;
1434 
1435 		ccb_h->status = CAM_REQ_CMP;
1436 		xpt_done(ccb);
1437 	} break;
1438 	case XPT_GET_TRAN_SETTINGS:
1439 	{
1440 		struct ccb_trans_settings *cts;
1441 
1442 		cts = &ccb->cts;
1443 
1444 		cts->protocol = PROTO_SCSI;
1445 		cts->protocol_version = SCSI_REV_SPC2;
1446 		cts->transport = XPORT_SAS;
1447 		cts->transport_version = 0;
1448 
1449 		cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
1450 		cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
1451 
1452 		ccb_h->status = CAM_REQ_CMP;
1453 		xpt_done(ccb);
1454 	} break;
1455 	case XPT_CALC_GEOMETRY:
1456 	{
1457 		cam_calc_geometry(&ccb->ccg, 1);
1458 		xpt_done(ccb);
1459 	} break;
1460 	default:
1461 		ccb_h->status = CAM_REQ_INVALID;
1462 		xpt_done(ccb);
1463 		break;
1464 	}
1465 }
1466 
1467 static void
1468 pvscsi_free_interrupts(struct pvscsi_softc *sc)
1469 {
1470 
1471 	if (sc->irq_handler != NULL) {
1472 		bus_teardown_intr(sc->dev, sc->irq_res, sc->irq_handler);
1473 	}
1474 	if (sc->irq_res != NULL) {
1475 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_id,
1476 		    sc->irq_res);
1477 	}
1478 	if (sc->use_msi_or_msix) {
1479 		pci_release_msi(sc->dev);
1480 	}
1481 }
1482 
1483 static int
1484 pvscsi_setup_interrupts(struct pvscsi_softc *sc)
1485 {
1486 	int error;
1487 	int flags;
1488 	int use_msix;
1489 	int use_msi;
1490 	int count;
1491 
1492 	sc->use_msi_or_msix = 0;
1493 
1494 	use_msix = pvscsi_get_tunable(sc, "use_msix", pvscsi_use_msix);
1495 	use_msi = pvscsi_get_tunable(sc, "use_msi", pvscsi_use_msi);
1496 
1497 	if (use_msix && pci_msix_count(sc->dev) > 0) {
1498 		count = 1;
1499 		if (pci_alloc_msix(sc->dev, &count) == 0 && count == 1) {
1500 			sc->use_msi_or_msix = 1;
1501 			device_printf(sc->dev, "Interrupt: MSI-X\n");
1502 		} else {
1503 			pci_release_msi(sc->dev);
1504 		}
1505 	}
1506 
1507 	if (sc->use_msi_or_msix == 0 && use_msi && pci_msi_count(sc->dev) > 0) {
1508 		count = 1;
1509 		if (pci_alloc_msi(sc->dev, &count) == 0 && count == 1) {
1510 			sc->use_msi_or_msix = 1;
1511 			device_printf(sc->dev, "Interrupt: MSI\n");
1512 		} else {
1513 			pci_release_msi(sc->dev);
1514 		}
1515 	}
1516 
1517 	flags = RF_ACTIVE;
1518 	if (sc->use_msi_or_msix) {
1519 		sc->irq_id = 1;
1520 	} else {
1521 		device_printf(sc->dev, "Interrupt: INT\n");
1522 		sc->irq_id = 0;
1523 		flags |= RF_SHAREABLE;
1524 	}
1525 
1526 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_id,
1527 	    flags);
1528 	if (sc->irq_res == NULL) {
1529 		device_printf(sc->dev, "IRQ allocation failed\n");
1530 		if (sc->use_msi_or_msix) {
1531 			pci_release_msi(sc->dev);
1532 		}
1533 		return (ENXIO);
1534 	}
1535 
1536 	error = bus_setup_intr(sc->dev, sc->irq_res,
1537 	    INTR_TYPE_CAM | INTR_MPSAFE, NULL, pvscsi_intr, sc,
1538 	    &sc->irq_handler);
1539 	if (error) {
1540 		device_printf(sc->dev, "IRQ handler setup failed\n");
1541 		pvscsi_free_interrupts(sc);
1542 		return (error);
1543 	}
1544 
1545 	return (0);
1546 }
1547 
1548 static void
1549 pvscsi_free_all(struct pvscsi_softc *sc)
1550 {
1551 
1552 	if (sc->sim) {
1553 		int32_t status;
1554 
1555 		if (sc->bus_path) {
1556 			xpt_free_path(sc->bus_path);
1557 		}
1558 
1559 		status = xpt_bus_deregister(cam_sim_path(sc->sim));
1560 		if (status != CAM_REQ_CMP) {
1561 			device_printf(sc->dev,
1562 			    "Error deregistering bus, status=%d\n", status);
1563 		}
1564 
1565 		cam_sim_free(sc->sim, TRUE);
1566 	}
1567 
1568 	pvscsi_dma_free_per_hcb(sc, sc->hcb_cnt);
1569 
1570 	if (sc->hcbs) {
1571 		free(sc->hcbs, M_PVSCSI);
1572 	}
1573 
1574 	pvscsi_free_rings(sc);
1575 
1576 	pvscsi_free_interrupts(sc);
1577 
1578 	if (sc->buffer_dmat != NULL) {
1579 		bus_dma_tag_destroy(sc->buffer_dmat);
1580 	}
1581 
1582 	if (sc->parent_dmat != NULL) {
1583 		bus_dma_tag_destroy(sc->parent_dmat);
1584 	}
1585 
1586 	if (sc->mm_res != NULL) {
1587 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->mm_rid,
1588 		    sc->mm_res);
1589 	}
1590 }
1591 
1592 static int
1593 pvscsi_attach(device_t dev)
1594 {
1595 	struct pvscsi_softc *sc;
1596 	int rid;
1597 	int barid;
1598 	int error;
1599 	int max_queue_depth;
1600 	int adapter_queue_size;
1601 	struct cam_devq *devq;
1602 
1603 	sc = device_get_softc(dev);
1604 	sc->dev = dev;
1605 
1606 	mtx_init(&sc->lock, "pvscsi", NULL, MTX_DEF);
1607 
1608 	pci_enable_busmaster(dev);
1609 
1610 	sc->mm_rid = -1;
1611 	for (barid = 0; barid <= PCIR_MAX_BAR_0; ++barid) {
1612 		rid = PCIR_BAR(barid);
1613 
1614 		sc->mm_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
1615 		    RF_ACTIVE);
1616 		if (sc->mm_res != NULL) {
1617 			sc->mm_rid = rid;
1618 			break;
1619 		}
1620 	}
1621 
1622 	if (sc->mm_res == NULL) {
1623 		device_printf(dev, "could not map device memory\n");
1624 		return (ENXIO);
1625 	}
1626 
1627 	error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
1628 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE,
1629 	    BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, 0, NULL, NULL,
1630 	    &sc->parent_dmat);
1631 	if (error) {
1632 		device_printf(dev, "parent dma tag create failure, error %d\n",
1633 		    error);
1634 		pvscsi_free_all(sc);
1635 		return (ENXIO);
1636 	}
1637 
1638 	error = bus_dma_tag_create(sc->parent_dmat, 1, 0,
1639 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1640 	    PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT * PAGE_SIZE,
1641 	    PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT, PAGE_SIZE, BUS_DMA_ALLOCNOW,
1642 	    NULL, NULL, &sc->buffer_dmat);
1643 	if (error) {
1644 		device_printf(dev, "parent dma tag create failure, error %d\n",
1645 		    error);
1646 		pvscsi_free_all(sc);
1647 		return (ENXIO);
1648 	}
1649 
1650 	error = pvscsi_setup_interrupts(sc);
1651 	if (error) {
1652 		device_printf(dev, "Interrupt setup failed\n");
1653 		pvscsi_free_all(sc);
1654 		return (error);
1655 	}
1656 
1657 	sc->max_targets = pvscsi_get_max_targets(sc);
1658 
1659 	sc->use_msg = pvscsi_get_tunable(sc, "use_msg", pvscsi_use_msg) &&
1660 	    pvscsi_hw_supports_msg(sc);
1661 	sc->msg_ring_num_pages = sc->use_msg ? 1 : 0;
1662 
1663 	sc->req_ring_num_pages = pvscsi_get_tunable(sc, "request_ring_pages",
1664 	    pvscsi_request_ring_pages);
1665 	if (sc->req_ring_num_pages <= 0) {
1666 		if (sc->max_targets <= 16) {
1667 			sc->req_ring_num_pages =
1668 			    PVSCSI_DEFAULT_NUM_PAGES_REQ_RING;
1669 		} else {
1670 			sc->req_ring_num_pages = PVSCSI_MAX_NUM_PAGES_REQ_RING;
1671 		}
1672 	} else if (sc->req_ring_num_pages > PVSCSI_MAX_NUM_PAGES_REQ_RING) {
1673 		sc->req_ring_num_pages = PVSCSI_MAX_NUM_PAGES_REQ_RING;
1674 	}
1675 	sc->cmp_ring_num_pages = sc->req_ring_num_pages;
1676 
1677 	max_queue_depth = pvscsi_get_tunable(sc, "max_queue_depth",
1678 	    pvscsi_max_queue_depth);
1679 
1680 	adapter_queue_size = (sc->req_ring_num_pages * PAGE_SIZE) /
1681 	    sizeof(struct pvscsi_ring_req_desc);
1682 	if (max_queue_depth > 0) {
1683 		adapter_queue_size = MIN(adapter_queue_size, max_queue_depth);
1684 	}
1685 	adapter_queue_size = MIN(adapter_queue_size,
1686 	    PVSCSI_MAX_REQ_QUEUE_DEPTH);
1687 
1688 	device_printf(sc->dev, "Use Msg: %d\n", sc->use_msg);
1689 	device_printf(sc->dev, "REQ num pages: %d\n", sc->req_ring_num_pages);
1690 	device_printf(sc->dev, "CMP num pages: %d\n", sc->cmp_ring_num_pages);
1691 	device_printf(sc->dev, "MSG num pages: %d\n", sc->msg_ring_num_pages);
1692 	device_printf(sc->dev, "Queue size: %d\n", adapter_queue_size);
1693 
1694 	if (pvscsi_allocate_rings(sc)) {
1695 		device_printf(dev, "ring allocation failed\n");
1696 		pvscsi_free_all(sc);
1697 		return (ENXIO);
1698 	}
1699 
1700 	sc->hcb_cnt = adapter_queue_size;
1701 	sc->hcbs = malloc(sc->hcb_cnt * sizeof(*sc->hcbs), M_PVSCSI,
1702 	    M_NOWAIT | M_ZERO);
1703 	if (sc->hcbs == NULL) {
1704 		device_printf(dev, "error allocating hcb array\n");
1705 		pvscsi_free_all(sc);
1706 		return (ENXIO);
1707 	}
1708 
1709 	if (pvscsi_dma_alloc_per_hcb(sc)) {
1710 		device_printf(dev, "error allocating per hcb dma memory\n");
1711 		pvscsi_free_all(sc);
1712 		return (ENXIO);
1713 	}
1714 
1715 	pvscsi_adapter_reset(sc);
1716 
1717 	devq = cam_simq_alloc(adapter_queue_size);
1718 	if (devq == NULL) {
1719 		device_printf(dev, "cam devq alloc failed\n");
1720 		pvscsi_free_all(sc);
1721 		return (ENXIO);
1722 	}
1723 
1724 	sc->sim = cam_sim_alloc(pvscsi_action, pvscsi_poll, "pvscsi", sc,
1725 	    device_get_unit(dev), &sc->lock, 1, adapter_queue_size, devq);
1726 	if (sc->sim == NULL) {
1727 		device_printf(dev, "cam sim alloc failed\n");
1728 		cam_simq_free(devq);
1729 		pvscsi_free_all(sc);
1730 		return (ENXIO);
1731 	}
1732 
1733 	mtx_lock(&sc->lock);
1734 
1735 	if (xpt_bus_register(sc->sim, dev, 0) != CAM_SUCCESS) {
1736 		device_printf(dev, "xpt bus register failed\n");
1737 		pvscsi_free_all(sc);
1738 		mtx_unlock(&sc->lock);
1739 		return (ENXIO);
1740 	}
1741 
1742 	if (xpt_create_path(&sc->bus_path, NULL, cam_sim_path(sc->sim),
1743 	    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
1744 		device_printf(dev, "xpt create path failed\n");
1745 		pvscsi_free_all(sc);
1746 		mtx_unlock(&sc->lock);
1747 		return (ENXIO);
1748 	}
1749 
1750 	pvscsi_setup_rings(sc);
1751 	if (sc->use_msg) {
1752 		pvscsi_setup_msg_ring(sc);
1753 	}
1754 
1755 	sc->use_req_call_threshold = pvscsi_setup_req_call(sc, 1);
1756 
1757 	pvscsi_intr_enable(sc);
1758 
1759 	mtx_unlock(&sc->lock);
1760 
1761 	return (0);
1762 }
1763 
1764 static int
1765 pvscsi_detach(device_t dev)
1766 {
1767 	struct pvscsi_softc *sc;
1768 
1769 	sc = device_get_softc(dev);
1770 
1771 	pvscsi_intr_disable(sc);
1772 	pvscsi_adapter_reset(sc);
1773 
1774 	if (sc->irq_handler != NULL) {
1775 		bus_teardown_intr(dev, sc->irq_res, sc->irq_handler);
1776 	}
1777 
1778 	mtx_lock(&sc->lock);
1779 	pvscsi_free_all(sc);
1780 	mtx_unlock(&sc->lock);
1781 
1782 	mtx_destroy(&sc->lock);
1783 
1784 	return (0);
1785 }
1786 
1787 static device_method_t pvscsi_methods[] = {
1788 	DEVMETHOD(device_probe, pvscsi_probe),
1789 	DEVMETHOD(device_shutdown, pvscsi_shutdown),
1790 	DEVMETHOD(device_attach, pvscsi_attach),
1791 	DEVMETHOD(device_detach, pvscsi_detach),
1792 	DEVMETHOD_END
1793 };
1794 
1795 static driver_t pvscsi_driver = {
1796 	"pvscsi", pvscsi_methods, sizeof(struct pvscsi_softc)
1797 };
1798 
1799 static devclass_t pvscsi_devclass;
1800 DRIVER_MODULE(pvscsi, pci, pvscsi_driver, pvscsi_devclass, 0, 0);
1801 
1802 MODULE_DEPEND(pvscsi, pci, 1, 1, 1);
1803 MODULE_DEPEND(pvscsi, cam, 1, 1, 1);
1804