xref: /titanic_41/usr/src/uts/sun4v/io/ds_pri.c (revision bbb1277b6ec1b0daad4e3ed1a2b891d3e2ece2eb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * sun4v domain services PRI driver
28  */
29 
30 #include <sys/types.h>
31 #include <sys/file.h>
32 #include <sys/errno.h>
33 #include <sys/open.h>
34 #include <sys/cred.h>
35 #include <sys/uio.h>
36 #include <sys/stat.h>
37 #include <sys/ksynch.h>
38 #include <sys/modctl.h>
39 #include <sys/conf.h>
40 #include <sys/devops.h>
41 #include <sys/debug.h>
42 #include <sys/cmn_err.h>
43 #include <sys/ddi.h>
44 #include <sys/sunddi.h>
45 #include <sys/ds.h>
46 #include <sys/hypervisor_api.h>
47 #include <sys/machsystm.h>
48 #include <sys/sysmacros.h>
49 #include <sys/hsvc.h>
50 #include <sys/bitmap.h>
51 #include <sys/ds_pri.h>
52 
53 static uint_t ds_pri_debug = 0;
54 #define	DS_PRI_DBG	if (ds_pri_debug) printf
55 
56 #define	DS_PRI_NAME	"ds_pri"
57 
58 #define	TEST_HARNESS
59 #ifdef TEST_HARNESS
60 #define	DS_PRI_MAX_PRI_SIZE	(64 * 1024)
61 
62 #define	DSIOC_TEST_REG	97
63 #define	DSIOC_TEST_UNREG	98
64 #define	DSIOC_TEST_DATA	99
65 
66 struct ds_pri_test_data {
67 	size_t		size;
68 	void		*data;
69 };
70 
71 struct ds_pri_test_data32 {
72 	size32_t	size;
73 	caddr32_t	data;
74 };
75 #endif /* TEST_HARNESS */
76 
77 typedef	enum {
78 	DS_PRI_REQUEST	= 0,
79 	DS_PRI_DATA	= 1,
80 	DS_PRI_UPDATE	= 2
81 } ds_pri_msg_type_t;
82 
83 typedef	struct {
84 	struct {
85 		uint64_t	seq_num;
86 		uint64_t	type;
87 	} hdr;
88 	uint8_t		data[1];
89 } ds_pri_msg_t;
90 
91 /*
92  * The following are bit field flags. No service implies no DS PRI and
93  * no outstanding request.
94  */
95 typedef enum {
96 	DS_PRI_NO_SERVICE = 0x0,
97 	DS_PRI_HAS_SERVICE = 0x1,
98 	DS_PRI_REQUESTED = 0x2,
99 	DS_PRI_HAS_PRI = 0x4
100 } ds_pri_flags_t;
101 
102 struct ds_pri_state {
103 	dev_info_t	*dip;
104 	int		instance;
105 
106 	kmutex_t	lock;
107 	kcondvar_t	cv;
108 
109 	/* PRI/DS */
110 	ds_pri_flags_t	state;
111 	uint64_t	gencount;
112 	ds_svc_hdl_t	ds_pri_handle;
113 	void		*ds_pri;
114 	size_t		ds_pri_len;
115 	uint64_t	req_id;
116 	uint64_t	last_req_id;
117 	int		num_opens;
118 };
119 
120 typedef struct ds_pri_state ds_pri_state_t;
121 
122 static void *ds_pri_statep;
123 
124 static void request_pri(ds_pri_state_t *sp);
125 static uint64_t ds_get_hv_pri(ds_pri_state_t *sp);
126 
127 static int ds_pri_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
128 static int ds_pri_attach(dev_info_t *, ddi_attach_cmd_t);
129 static int ds_pri_detach(dev_info_t *, ddi_detach_cmd_t);
130 static int ds_pri_open(dev_t *, int, int, cred_t *);
131 static int ds_pri_close(dev_t, int, int, cred_t *);
132 static int ds_pri_read(dev_t, struct uio *, cred_t *);
133 static int ds_pri_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
134 
135 /*
136  * DS Callbacks
137  */
138 static void ds_pri_reg_handler(ds_cb_arg_t, ds_ver_t *, ds_svc_hdl_t);
139 static void ds_pri_unreg_handler(ds_cb_arg_t arg);
140 static void ds_pri_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen);
141 
142 /*
143  * PRI DS capability registration
144  */
145 
146 static ds_ver_t ds_pri_ver_1_0 = { 1, 0 };
147 
148 static ds_capability_t ds_pri_cap = {
149 	"pri",
150 	&ds_pri_ver_1_0,
151 	1
152 };
153 
154 /*
155  * PRI DS Client callback vector
156  */
157 static ds_clnt_ops_t ds_pri_ops = {
158 	ds_pri_reg_handler,	/* ds_reg_cb */
159 	ds_pri_unreg_handler,	/* ds_unreg_cb */
160 	ds_pri_data_handler,	/* ds_data_cb */
161 	NULL			/* cb_arg */
162 };
163 
164 /*
165  * DS PRI driver Ops Vector
166  */
167 static struct cb_ops ds_pri_cb_ops = {
168 	ds_pri_open,		/* cb_open */
169 	ds_pri_close,		/* cb_close */
170 	nodev,			/* cb_strategy */
171 	nodev,			/* cb_print */
172 	nodev,			/* cb_dump */
173 	ds_pri_read,		/* cb_read */
174 	nodev,			/* cb_write */
175 	ds_pri_ioctl,		/* cb_ioctl */
176 	nodev,			/* cb_devmap */
177 	nodev,			/* cb_mmap */
178 	nodev,			/* cb_segmap */
179 	nochpoll,		/* cb_chpoll */
180 	ddi_prop_op,		/* cb_prop_op */
181 	(struct streamtab *)NULL, /* cb_str */
182 	D_MP | D_64BIT,		/* cb_flag */
183 	CB_REV,			/* cb_rev */
184 	nodev,			/* cb_aread */
185 	nodev			/* cb_awrite */
186 };
187 
188 static struct dev_ops ds_pri_dev_ops = {
189 	DEVO_REV,		/* devo_rev */
190 	0,			/* devo_refcnt */
191 	ds_pri_getinfo,		/* devo_getinfo */
192 	nulldev,		/* devo_identify */
193 	nulldev,		/* devo_probe */
194 	ds_pri_attach,		/* devo_attach */
195 	ds_pri_detach,		/* devo_detach */
196 	nodev,			/* devo_reset */
197 	&ds_pri_cb_ops,		/* devo_cb_ops */
198 	(struct bus_ops *)NULL,	/* devo_bus_ops */
199 	nulldev,		/* devo_power */
200 	ddi_quiesce_not_needed,		/* devo_quiesce */
201 };
202 
203 static struct modldrv modldrv = {
204 	&mod_driverops,
205 	"Domain Services PRI Driver",
206 	&ds_pri_dev_ops
207 };
208 
209 static struct modlinkage modlinkage = {
210 	MODREV_1,
211 	(void *)&modldrv,
212 	NULL
213 };
214 
215 static boolean_t hsvc_pboot_available = B_FALSE;
216 static hsvc_info_t pboot_hsvc = {
217 	HSVC_REV_1, NULL, HSVC_GROUP_PBOOT, 1, 0, NULL
218 };
219 
220 int
221 _init(void)
222 {
223 	int retval;
224 	uint64_t	hsvc_pboot_minor;
225 	uint64_t	status;
226 
227 	status = hsvc_register(&pboot_hsvc, &hsvc_pboot_minor);
228 	if (status == H_EOK) {
229 		hsvc_pboot_available = B_TRUE;
230 	} else {
231 		DS_PRI_DBG("hypervisor services not negotiated "
232 		    "for group number: 0x%lx errorno: 0x%lx\n",
233 		    pboot_hsvc.hsvc_group, status);
234 	}
235 
236 	retval = ddi_soft_state_init(&ds_pri_statep,
237 	    sizeof (ds_pri_state_t), 0);
238 	if (retval != 0)
239 		return (retval);
240 
241 	retval = mod_install(&modlinkage);
242 	if (retval != 0) {
243 		ddi_soft_state_fini(&ds_pri_statep);
244 		return (retval);
245 	}
246 
247 	return (retval);
248 }
249 
250 
251 int
252 _info(struct modinfo *modinfop)
253 {
254 	return (mod_info(&modlinkage, modinfop));
255 }
256 
257 
258 int
259 _fini(void)
260 {
261 	int retval;
262 
263 	if ((retval = mod_remove(&modlinkage)) != 0)
264 		return (retval);
265 
266 	ddi_soft_state_fini(&ds_pri_statep);
267 
268 	(void) hsvc_unregister(&pboot_hsvc);
269 
270 	return (retval);
271 }
272 
273 
274 /*ARGSUSED*/
275 static int
276 ds_pri_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp)
277 {
278 	ds_pri_state_t *sp;
279 	int retval = DDI_FAILURE;
280 
281 	ASSERT(resultp != NULL);
282 
283 	switch (cmd) {
284 	case DDI_INFO_DEVT2DEVINFO:
285 		sp = ddi_get_soft_state(ds_pri_statep, getminor((dev_t)arg));
286 		if (sp != NULL) {
287 			*resultp = sp->dip;
288 			retval = DDI_SUCCESS;
289 		} else
290 			*resultp = NULL;
291 		break;
292 
293 	case DDI_INFO_DEVT2INSTANCE:
294 		*resultp = (void *)(uintptr_t)getminor((dev_t)arg);
295 		retval = DDI_SUCCESS;
296 		break;
297 
298 	default:
299 		break;
300 	}
301 
302 	return (retval);
303 }
304 
305 
306 static int
307 ds_pri_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
308 {
309 	int instance;
310 	ds_pri_state_t *sp;
311 	int rv;
312 	uint64_t status;
313 
314 	switch (cmd) {
315 	case DDI_ATTACH:
316 		break;
317 
318 	case DDI_RESUME:
319 		return (DDI_SUCCESS);
320 
321 	default:
322 		return (DDI_FAILURE);
323 	}
324 
325 	instance = ddi_get_instance(dip);
326 
327 	if (ddi_soft_state_zalloc(ds_pri_statep, instance) !=
328 	    DDI_SUCCESS) {
329 		cmn_err(CE_WARN, "%s@%d: Unable to allocate state",
330 		    DS_PRI_NAME, instance);
331 		return (DDI_FAILURE);
332 	}
333 	sp = ddi_get_soft_state(ds_pri_statep, instance);
334 
335 	mutex_init(&sp->lock, NULL, MUTEX_DEFAULT, NULL);
336 	cv_init(&sp->cv, NULL, CV_DEFAULT, NULL);
337 
338 	if (ddi_create_minor_node(dip, DS_PRI_NAME, S_IFCHR, instance,
339 	    DDI_PSEUDO, 0) != DDI_SUCCESS) {
340 		cmn_err(CE_WARN, "%s@%d: Unable to create minor node",
341 		    DS_PRI_NAME, instance);
342 		goto fail;
343 	}
344 
345 	if (ds_pri_ops.cb_arg != NULL)
346 		goto fail;
347 	ds_pri_ops.cb_arg = dip;
348 
349 	sp->state = DS_PRI_NO_SERVICE;
350 
351 	/* Until the service registers the handle is invalid */
352 	sp->ds_pri_handle = DS_INVALID_HDL;
353 
354 	sp->ds_pri = NULL;
355 	sp->ds_pri_len = 0;
356 	sp->req_id = 0;
357 	sp->num_opens = 0;
358 
359 	/*
360 	 * See if we can get the static hv pri data. Static pri data
361 	 * is only available for privileged domains.
362 	 */
363 	if (hsvc_pboot_available == B_TRUE) {
364 		if ((status = ds_get_hv_pri(sp)) != 0) {
365 			cmn_err(CE_NOTE, "ds_get_hv_pri failed: 0x%lx", status);
366 		}
367 	}
368 
369 	if ((rv = ds_cap_init(&ds_pri_cap, &ds_pri_ops)) != 0) {
370 		cmn_err(CE_NOTE, "ds_cap_init failed: %d", rv);
371 		goto fail;
372 	}
373 
374 	ddi_report_dev(dip);
375 
376 	return (DDI_SUCCESS);
377 
378 fail:
379 	if (sp->ds_pri)
380 		kmem_free(sp->ds_pri, sp->ds_pri_len);
381 	ddi_remove_minor_node(dip, NULL);
382 	cv_destroy(&sp->cv);
383 	mutex_destroy(&sp->lock);
384 	ddi_soft_state_free(ds_pri_statep, instance);
385 	return (DDI_FAILURE);
386 
387 }
388 
389 
390 /*ARGSUSED*/
391 static int
392 ds_pri_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
393 {
394 	ds_pri_state_t *sp;
395 	int instance;
396 	int rv;
397 
398 	instance = ddi_get_instance(dip);
399 	sp = ddi_get_soft_state(ds_pri_statep, instance);
400 
401 	switch (cmd) {
402 	case DDI_DETACH:
403 		break;
404 
405 	case DDI_SUSPEND:
406 		return (DDI_SUCCESS);
407 
408 	default:
409 		return (DDI_FAILURE);
410 	}
411 
412 	/* This really shouldn't fail - but check anyway */
413 	if ((rv = ds_cap_fini(&ds_pri_cap)) != 0) {
414 		cmn_err(CE_WARN, "ds_cap_fini failed: %d", rv);
415 	}
416 
417 	if (sp != NULL && sp->ds_pri_len != 0)
418 		kmem_free(sp->ds_pri, sp->ds_pri_len);
419 
420 	ds_pri_ops.cb_arg = NULL;
421 
422 	ddi_remove_minor_node(dip, NULL);
423 	cv_destroy(&sp->cv);
424 	mutex_destroy(&sp->lock);
425 	ddi_soft_state_free(ds_pri_statep, instance);
426 
427 	return (DDI_SUCCESS);
428 }
429 
430 
431 /*ARGSUSED*/
432 static int
433 ds_pri_open(dev_t *devp, int flag, int otyp, cred_t *credp)
434 {
435 	ds_pri_state_t *sp;
436 	int instance;
437 
438 	if (otyp != OTYP_CHR)
439 		return (EINVAL);
440 
441 	instance = getminor(*devp);
442 	sp = ddi_get_soft_state(ds_pri_statep, instance);
443 	if (sp == NULL)
444 		return (ENXIO);
445 
446 	mutex_enter(&sp->lock);
447 
448 	/*
449 	 * Proceed if we have PRI data (possibly obtained from
450 	 * static HV PRI or last pushed DS PRI data update).
451 	 * If no PRI data and we have no DS PRI service then this
452 	 * means that PRI DS has never called the registration callback.
453 	 * A while loop is necessary as we might have been woken up
454 	 * prematurely, e.g., due to a debugger or "pstack" etc.
455 	 * Wait here and the callback will signal us when it has completed
456 	 * its work.
457 	 */
458 	if (!(sp->state & DS_PRI_HAS_PRI)) {
459 		while (!(sp->state & DS_PRI_HAS_SERVICE)) {
460 			if (cv_wait_sig(&sp->cv, &sp->lock) == 0) {
461 				mutex_exit(&sp->lock);
462 				return (EINTR);
463 			}
464 		}
465 	}
466 
467 	sp->num_opens++;
468 	mutex_exit(&sp->lock);
469 
470 	DS_PRI_DBG("ds_pri_open: state = 0x%x\n", sp->state);
471 
472 	return (0);
473 }
474 
475 
476 /*ARGSUSED*/
477 static int
478 ds_pri_close(dev_t dev, int flag, int otyp, cred_t *credp)
479 {
480 	int instance;
481 	ds_pri_state_t *sp;
482 
483 	if (otyp != OTYP_CHR)
484 		return (EINVAL);
485 
486 	DS_PRI_DBG("ds_pri_close\n");
487 
488 	instance = getminor(dev);
489 	if ((sp = ddi_get_soft_state(ds_pri_statep, instance)) == NULL)
490 		return (ENXIO);
491 
492 	mutex_enter(&sp->lock);
493 	if (!(sp->state & DS_PRI_HAS_SERVICE)) {
494 		mutex_exit(&sp->lock);
495 		return (0);
496 	}
497 
498 	if (--sp->num_opens > 0) {
499 		mutex_exit(&sp->lock);
500 		return (0);
501 	}
502 
503 	sp->state &= ~DS_PRI_REQUESTED;
504 	mutex_exit(&sp->lock);
505 	return (0);
506 }
507 
508 
509 /*ARGSUSED*/
510 static int
511 ds_pri_read(dev_t dev, struct uio *uiop, cred_t *credp)
512 {
513 	ds_pri_state_t *sp;
514 	int instance;
515 	size_t len;
516 	int retval;
517 	caddr_t tmpbufp;
518 	offset_t off = uiop->uio_offset;
519 
520 	instance = getminor(dev);
521 	if ((sp = ddi_get_soft_state(ds_pri_statep, instance)) == NULL)
522 		return (ENXIO);
523 
524 	len = uiop->uio_resid;
525 
526 	if (len == 0)
527 		return (0);
528 
529 	mutex_enter(&sp->lock);
530 
531 	DS_PRI_DBG("ds_pri_read: state = 0x%x\n", sp->state);
532 
533 	/* block or bail if there is no current PRI */
534 	if (!(sp->state & DS_PRI_HAS_PRI)) {
535 		DS_PRI_DBG("ds_pri_read: no PRI held\n");
536 
537 		if (uiop->uio_fmode & (FNDELAY | FNONBLOCK)) {
538 			mutex_exit(&sp->lock);
539 			return (EAGAIN);
540 		}
541 
542 		while (!(sp->state & DS_PRI_HAS_PRI)) {
543 			DS_PRI_DBG("ds_pri_read: state = 0x%x\n", sp->state);
544 			request_pri(sp);
545 			if (cv_wait_sig(&sp->cv, &sp->lock) == 0) {
546 				mutex_exit(&sp->lock);
547 				return (EINTR);
548 			}
549 		}
550 	}
551 
552 	if (len > sp->ds_pri_len)
553 		len = sp->ds_pri_len;
554 
555 	if (len == 0) {
556 		mutex_exit(&sp->lock);
557 		return (0);
558 	}
559 
560 	/*
561 	 * We're supposed to move the data out to userland, but
562 	 * that can suspend because of page faults etc., and meanwhile
563 	 * other parts of this driver want to update the PRI buffer ...
564 	 * we could hold the data buffer locked with a flag etc.,
565 	 * but that's still a lock ... a simpler mechanism - if not quite
566 	 * as performance efficient is to simply clone here the part of
567 	 * the buffer we care about and then the original can be released
568 	 * for further updates while the uiomove continues.
569 	 */
570 
571 	tmpbufp = kmem_alloc(len, KM_SLEEP);
572 	bcopy(((caddr_t)sp->ds_pri), tmpbufp, len);
573 	mutex_exit(&sp->lock);
574 
575 	retval = uiomove(tmpbufp, len, UIO_READ, uiop);
576 
577 	kmem_free(tmpbufp, len);
578 
579 	/*
580 	 * restore uio_offset after uiomove since the driver
581 	 * does not support the concept of position.
582 	 */
583 	uiop->uio_offset = off;
584 
585 	return (retval);
586 }
587 
588 
589 /*ARGSUSED*/
590 static int
591 ds_pri_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
592     int *rvalp)
593 {
594 	ds_pri_state_t *sp;
595 	int instance;
596 
597 	instance = getminor(dev);
598 	if ((sp = ddi_get_soft_state(ds_pri_statep, instance)) == NULL)
599 		return (ENXIO);
600 
601 	switch (cmd) {
602 	case DSPRI_GETINFO: {
603 		struct dspri_info info;
604 
605 		if (!(mode & FREAD))
606 			return (EACCES);
607 
608 		/*
609 		 * We are not guaranteed that ddi_copyout(9F) will read
610 		 * atomically anything larger than a byte.  Therefore we
611 		 * must duplicate the size before copying it out to the user.
612 		 */
613 		mutex_enter(&sp->lock);
614 
615 loop:;
616 		if (sp->state & DS_PRI_HAS_PRI) {
617 			/* If we have a PRI simply return the info */
618 			info.size = sp->ds_pri_len;
619 			info.token = sp->gencount;
620 		} else
621 		if (!(sp->state & DS_PRI_HAS_SERVICE)) {
622 			/* If we have no service return a nil response */
623 			info.size = 0;
624 			info.token = 0;
625 		} else {
626 			request_pri(sp);
627 			/* wait for something & check again */
628 			if (cv_wait_sig(&sp->cv, &sp->lock) == 0) {
629 				mutex_exit(&sp->lock);
630 				return (EINTR);
631 			}
632 			goto loop;
633 		}
634 		DS_PRI_DBG("ds_pri_ioctl: DSPRI_GETINFO sz=0x%lx tok=0x%lx\n",
635 		    info.size, info.token);
636 		mutex_exit(&sp->lock);
637 
638 		if (ddi_copyout(&info, (void *)arg, sizeof (info), mode) != 0)
639 			return (EFAULT);
640 		break;
641 	}
642 
643 	case DSPRI_WAIT: {
644 		uint64_t gencount;
645 
646 		if (ddi_copyin((void *)arg, &gencount, sizeof (gencount),
647 		    mode) != 0)
648 			return (EFAULT);
649 
650 		mutex_enter(&sp->lock);
651 
652 		DS_PRI_DBG("ds_pri_ioctl: DSPRI_WAIT gen=0x%lx sp->gen=0x%lx\n",
653 		    gencount, sp->gencount);
654 
655 		while ((sp->state & DS_PRI_HAS_PRI) == 0 ||
656 		    gencount == sp->gencount) {
657 			if ((sp->state & DS_PRI_HAS_PRI) == 0)
658 				request_pri(sp);
659 			if (cv_wait_sig(&sp->cv, &sp->lock) == 0) {
660 				mutex_exit(&sp->lock);
661 				return (EINTR);
662 			}
663 		}
664 		mutex_exit(&sp->lock);
665 		break;
666 	}
667 
668 	default:
669 		return (ENOTTY);
670 	}
671 	return (0);
672 }
673 
674 
675 	/* assumes sp->lock is held when called */
676 static void
677 request_pri(ds_pri_state_t *sp)
678 {
679 	ds_pri_msg_t reqmsg;
680 
681 	ASSERT(MUTEX_HELD(&sp->lock));
682 
683 	/* If a request is already pending we're done */
684 	if (!(sp->state & DS_PRI_HAS_SERVICE))
685 		return;
686 	if (sp->state & DS_PRI_REQUESTED)
687 		return;
688 
689 	/* If we have an old PRI - remove it */
690 	if (sp->state & DS_PRI_HAS_PRI) {
691 		ASSERT(sp->ds_pri_len != 0);
692 		ASSERT(sp->ds_pri != NULL);
693 
694 		/* remove the old data if we have an outstanding request */
695 		kmem_free(sp->ds_pri, sp->ds_pri_len);
696 		sp->ds_pri_len = 0;
697 		sp->ds_pri = NULL;
698 		sp->state &= ~DS_PRI_HAS_PRI;
699 	} else {
700 		ASSERT(sp->ds_pri == NULL);
701 		ASSERT(sp->ds_pri_len == 0);
702 	}
703 
704 	reqmsg.hdr.seq_num = ++(sp->req_id);
705 	reqmsg.hdr.type = DS_PRI_REQUEST;
706 
707 	DS_PRI_DBG("request_pri: request id 0x%lx\n", sp->req_id);
708 
709 		/*
710 		 * Request consists of header only.
711 		 * We don't care about fail status for ds_send;
712 		 * if it does fail we will get an unregister callback
713 		 * from the DS framework and we handle the state change
714 		 * there.
715 		 */
716 	(void) ds_cap_send(sp->ds_pri_handle, &reqmsg, sizeof (reqmsg.hdr));
717 
718 	sp->state |= DS_PRI_REQUESTED;
719 	sp->last_req_id = sp->req_id;
720 }
721 
722 /*
723  * DS Callbacks
724  */
725 /*ARGSUSED*/
726 static void
727 ds_pri_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl)
728 {
729 	dev_info_t *dip = arg;
730 	ds_pri_state_t *sp;
731 	int instance;
732 
733 	instance = ddi_get_instance(dip);
734 	if ((sp = ddi_get_soft_state(ds_pri_statep, instance)) == NULL)
735 		return;
736 
737 	DS_PRI_DBG("ds_pri_reg_handler: registering handle 0x%lx for version "
738 	    "0x%x:0x%x\n", (uint64_t)hdl, ver->major, ver->minor);
739 
740 	/* When the domain service comes up automatically update the state */
741 	mutex_enter(&sp->lock);
742 
743 	ASSERT(sp->ds_pri_handle == DS_INVALID_HDL);
744 	sp->ds_pri_handle = hdl;
745 
746 	ASSERT(!(sp->state & DS_PRI_HAS_SERVICE));
747 	sp->state |= DS_PRI_HAS_SERVICE;
748 
749 	/*
750 	 * Cannot request a PRI here, because the reg handler cannot
751 	 * do a DS send operation - we take care of this later.
752 	 * Static hv pri data might be available.
753 	 */
754 
755 	/* Wake up anyone waiting in open() */
756 	cv_broadcast(&sp->cv);
757 
758 	mutex_exit(&sp->lock);
759 }
760 
761 
762 static void
763 ds_pri_unreg_handler(ds_cb_arg_t arg)
764 {
765 	dev_info_t *dip = arg;
766 	ds_pri_state_t *sp;
767 	int instance;
768 
769 	instance = ddi_get_instance(dip);
770 	if ((sp = ddi_get_soft_state(ds_pri_statep, instance)) == NULL)
771 		return;
772 
773 	DS_PRI_DBG("ds_pri_unreg_handler: un-registering ds_pri service\n");
774 
775 	mutex_enter(&sp->lock);
776 
777 	/*
778 	 * Note that if the service goes offline, we don't
779 	 * free up the current PRI data at hand. It is assumed
780 	 * that PRI DS service will only push new update when
781 	 * it comes online. We mark the state to indicate no
782 	 * DS PRI service is available. The current PRI data if
783 	 * available is provided to the consumers.
784 	 */
785 	sp->ds_pri_handle = DS_INVALID_HDL;
786 	sp->state &= ~DS_PRI_HAS_SERVICE;
787 
788 	mutex_exit(&sp->lock);
789 }
790 
791 
792 static void
793 ds_pri_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen)
794 {
795 	dev_info_t *dip = arg;
796 	ds_pri_state_t *sp;
797 	int instance;
798 	void *data;
799 	ds_pri_msg_t	*msgp;
800 	size_t	pri_size;
801 
802 	msgp = (ds_pri_msg_t *)buf;
803 
804 	/* make sure the header is at least valid */
805 	if (buflen < sizeof (msgp->hdr))
806 		return;
807 
808 	DS_PRI_DBG("ds_pri_data_handler: msg buf len 0x%lx : type 0x%lx, "
809 	    "seqn 0x%lx\n", buflen, msgp->hdr.type, msgp->hdr.seq_num);
810 
811 	instance = ddi_get_instance(dip);
812 	if ((sp = ddi_get_soft_state(ds_pri_statep, instance)) == NULL)
813 		return;
814 
815 	mutex_enter(&sp->lock);
816 
817 	ASSERT(sp->state & DS_PRI_HAS_SERVICE);
818 
819 	switch (msgp->hdr.type) {
820 	case DS_PRI_DATA:	/* in response to a request from us */
821 		break;
822 	case DS_PRI_UPDATE:	/* aynch notification */
823 			/* our default response to this is to request the PRI */
824 		/* simply issue a request for the new PRI */
825 		request_pri(sp);
826 		goto done;
827 	default:	/* ignore garbage or unknown message types */
828 		goto done;
829 	}
830 
831 	/*
832 	 * If there is no pending PRI request, then we've received a
833 	 * bogus data message ... so ignore it.
834 	 */
835 
836 	if (!(sp->state & DS_PRI_REQUESTED)) {
837 		cmn_err(CE_WARN, "Received DS pri data without request");
838 		goto done;
839 	}
840 
841 	/* response to a request therefore old PRI must be gone */
842 	ASSERT(!(sp->state & DS_PRI_HAS_PRI));
843 	ASSERT(sp->ds_pri_len == 0);
844 	ASSERT(sp->ds_pri == NULL);
845 
846 	/* response seq_num should match our request seq_num */
847 	if (msgp->hdr.seq_num != sp->last_req_id) {
848 		cmn_err(CE_WARN, "Received DS pri data out of sequence with "
849 		    "request");
850 		goto done;
851 	}
852 
853 	pri_size = buflen - sizeof (msgp->hdr);
854 	if (pri_size == 0) {
855 		cmn_err(CE_WARN, "Received DS pri data of size 0");
856 		goto done;
857 	}
858 	data = kmem_alloc(pri_size, KM_SLEEP);
859 	sp->ds_pri = data;
860 	sp->ds_pri_len = pri_size;
861 	bcopy(msgp->data, data, sp->ds_pri_len);
862 	sp->state &= ~DS_PRI_REQUESTED;
863 	sp->state |= DS_PRI_HAS_PRI;
864 
865 	sp->gencount++;
866 	cv_broadcast(&sp->cv);
867 
868 done:;
869 	mutex_exit(&sp->lock);
870 }
871 
872 /*
873  * Routine to get static PRI data from the Hypervisor.
874  * If successful, this PRI data is the last known PRI
875  * data generated since the last poweron reset.
876  */
877 static uint64_t
878 ds_get_hv_pri(ds_pri_state_t *sp)
879 {
880 	uint64_t	status;
881 	uint64_t	pri_size;
882 	uint64_t	buf_size;
883 	uint64_t	buf_pa;
884 	caddr_t		buf_va = NULL;
885 	caddr_t		pri_data;
886 
887 	/*
888 	 * Get pri buffer size by calling hcall with buffer size 0.
889 	 */
890 	pri_size = 0LL;
891 	status = hv_mach_pri((uint64_t)0, &pri_size);
892 	DS_PRI_DBG("ds_get_hv_pri: hv_mach_pri pri size: 0x%lx\n", pri_size);
893 	if (pri_size == 0)
894 		return (1);
895 
896 	if (status == H_ENOTSUPPORTED || status == H_ENOACCESS) {
897 		DS_PRI_DBG("ds_get_hv_pri: hv_mach_pri service is not "
898 		    "available. errorno: 0x%lx\n", status);
899 		return (status);
900 	}
901 
902 	/*
903 	 * contig_mem_alloc requires size to be a power of 2.
904 	 * Increase size to next power of 2 if necessary.
905 	 */
906 	if ((pri_size & (pri_size - 1)) != 0)
907 		buf_size = 1 << highbit(pri_size);
908 	DS_PRI_DBG("ds_get_hv_pri: buf_size = 0x%lx\n", buf_size);
909 
910 	buf_va = contig_mem_alloc(buf_size);
911 	if (buf_va == NULL)
912 		return (1);
913 
914 	buf_pa = va_to_pa(buf_va);
915 	DS_PRI_DBG("ds_get_hv_pri: buf_pa 0x%lx\n", buf_pa);
916 	status = hv_mach_pri(buf_pa, &pri_size);
917 	DS_PRI_DBG("ds_get_hv_pri: hv_mach_pri status = 0x%lx\n", status);
918 
919 	if (status == H_EOK) {
920 		pri_data = kmem_alloc(pri_size, KM_SLEEP);
921 		sp->ds_pri = pri_data;
922 		sp->ds_pri_len = pri_size;
923 		bcopy(buf_va, pri_data, sp->ds_pri_len);
924 		sp->state |= DS_PRI_HAS_PRI;
925 		sp->gencount++;
926 	}
927 
928 	contig_mem_free(buf_va, buf_size);
929 
930 	return (status);
931 }
932