xref: /titanic_51/usr/src/uts/sun4u/starfire/io/idn_smr.c (revision e2e5537f18c3760ce88f9038f7fe8b59cdf3b1d5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Inter-Domain Network
27  *
28  * Shared Memory Region (SMR) supporting code.
29  */
30 
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/machparam.h>
34 #include <sys/debug.h>
35 #include <sys/cpuvar.h>
36 #include <sys/kmem.h>
37 #include <sys/mutex.h>
38 #include <sys/rwlock.h>
39 #include <sys/systm.h>
40 #include <sys/machlock.h>
41 #include <sys/membar.h>
42 #include <sys/mman.h>
43 #include <vm/hat.h>
44 #include <vm/as.h>
45 #include <vm/hat_sfmmu.h>
46 #include <sys/vm_machparam.h>
47 #include <sys/x_call.h>
48 
49 #include <sys/idn.h>
50 
51 #ifdef DEBUG
52 #define	DIOCHECK(domid) \
53 { \
54 	int	_dio; \
55 	if ((_dio = idn_domain[domid].dio) < 0) { \
56 		cmn_err(CE_WARN, \
57 			">>>>> file %s, line %d: domain %d, dio = %d", \
58 			__FILE__, __LINE__, (domid), _dio); \
59 	} \
60 }
61 #else
62 #define	DIOCHECK(domid)
63 #endif /* DEBUG */
64 
65 static int	smr_slab_alloc_local(int domid, smr_slab_t **spp);
66 static int	smr_slab_alloc_remote(int domid, smr_slab_t **spp);
67 static void	smr_slab_free_local(int domid, smr_slab_t *sp);
68 static void	smr_slab_free_remote(int domid, smr_slab_t *sp);
69 static int 	smr_slabwaiter_register(int domid);
70 static int 	smr_slabwaiter_unregister(int domid, smr_slab_t **spp);
71 static int 	smr_slaballoc_wait(int domid, smr_slab_t **spp);
72 static smr_slab_t 	*smr_slab_reserve(int domid);
73 static void 	smr_slab_unreserve(int domid, smr_slab_t *sp);
74 static void	smr_slab_reap_global();
75 
76 /*
77  * Can only be called by the master.  Allocate a slab from the
78  * local pool representing the SMR, on behalf of the given
79  * domain.  Slab is either being requested for use by the
80  * local domain (i.e. domid == idn.localid), or it's being
81  * allocated to give to a remote domain which requested one.
82  * In the base of allocating on behalf of a remote domain,
83  * smr_slab_t structure is used simply to manage ownership.
84  *
85  * Returns:	smr_slaballoc_wait
86  * 		(EINVAL, ETIMEDOUT)
87  *		smr_slabwatier_unregister
88  *		(0, EINVAL, EBUSY, ENOMEM)
89  *		ENOLCK
90  */
91 static int
92 smr_slab_alloc_local(int domid, smr_slab_t **spp)
93 {
94 	int		serrno = 0;
95 	int		nwait;
96 	smr_slab_t	*sp;
97 	idn_domain_t	*dp;
98 
99 
100 	/*
101 	 * Only the master can make local allocations.
102 	 */
103 	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
104 	ASSERT(idn.localid == IDN_GET_MASTERID());
105 
106 	*spp = NULL;
107 
108 	dp = &idn_domain[domid];
109 	ASSERT(DSLAB_READ_HELD(domid));
110 	ASSERT(dp->dslab_state == DSLAB_STATE_LOCAL);
111 
112 	/*
113 	 * Register myself with the waiting list.
114 	 */
115 	nwait = smr_slabwaiter_register(domid);
116 
117 	if (nwait > 1) {
118 		/*
119 		 * XXX - old comment?
120 		 * Need to drop the read lock _after_ registering
121 		 * ourselves with the potential wait list for this allocation.
122 		 * Although this allocation is not a remote one, we could
123 		 * still have multiple threads on the master trying to
124 		 * satisfy (allocate) request on behalf of a remote domain.
125 		 */
126 		/*
127 		 * Somebody is already in the process of satisfying
128 		 * the allocation request for this respective
129 		 * domain.  All we need to do is wait and let
130 		 * it happen.
131 		 */
132 		serrno = smr_slaballoc_wait(domid, spp);
133 		return (serrno);
134 	}
135 	/*
136 	 * I'm the original slab requester for this domain.  It's local
137 	 * so go ahead and do the job.
138 	 */
139 
140 	if ((sp = smr_slab_reserve(domid)) == NULL)
141 		serrno = ENOMEM;
142 
143 	/*
144 	 * Allocation may have failed.  In either case we've
145 	 * got to do the put to at least wake potential waiters up.
146 	 */
147 	if (!serrno) {
148 		if (DSLAB_LOCK_TRYUPGRADE(domid) == 0) {
149 			DSLAB_UNLOCK(domid);
150 			DSLAB_LOCK_EXCL(domid);
151 		}
152 	}
153 
154 	(void) smr_slaballoc_put(domid, sp, 0, serrno);
155 
156 	/*
157 	 * If serrno is ENOLCK here, then we must have failed
158 	 * on the upgrade above, so lock already dropped.
159 	 */
160 	if (serrno != ENOLCK) {
161 		/*
162 		 * Need to drop since reaping may be recursive?
163 		 */
164 		DSLAB_UNLOCK(domid);
165 	}
166 
167 	/*
168 	 * Since we were the original requester but never went
169 	 * to sleep, we need to directly unregister ourselves
170 	 * from the waiting list.
171 	 */
172 	serrno = smr_slabwaiter_unregister(domid, spp);
173 
174 	/*
175 	 * Now that we've satisfied the request, let's check if any
176 	 * reaping is necessary.  Only the master does this and only
177 	 * when allocating slabs, an infrequent event :-o
178 	 */
179 	smr_slab_reap_global();
180 
181 	ASSERT((serrno == 0) ? (*spp != NULL) : (*spp == NULL));
182 
183 	DSLAB_LOCK_SHARED(domid);
184 
185 	return (serrno);
186 }
187 
188 /*
189  * Can only be called by a slave on behalf of himself.  Need to
190  * make a request to the master to allocate a slab of SMR buffers
191  * for the local domain.
192  *
193  * Returns:	smr_slaballoc_wait
194  *		(0, EINVAL, EBUSY, ENOMEM)
195  *		ENOLCK
196  *		ECANCELED
197  */
198 static int
199 smr_slab_alloc_remote(int domid, smr_slab_t **spp)
200 {
201 	int		nwait;
202 	int		serrno = 0;
203 	int		bailout = 0;
204 	int		masterid;
205 	idn_domain_t	*dp, *mdp = NULL;
206 	procname_t	proc = "smr_slab_alloc_remote";
207 
208 	/*
209 	 * Only slaves make remote allocations.
210 	 */
211 	ASSERT(idn.localid != IDN_GET_MASTERID());
212 	ASSERT(domid == idn.localid);
213 	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
214 
215 	*spp = NULL;
216 
217 	dp = &idn_domain[domid];
218 	ASSERT(DSLAB_READ_HELD(domid));
219 	ASSERT(dp->dslab_state == DSLAB_STATE_REMOTE);
220 
221 	/*
222 	 * Register myself with the slaballoc waiting list.
223 	 * Note that only allow one outstanding allocation
224 	 * request for the given domain.  Other callers which
225 	 * detect a slab is needed simply get stuck on the
226 	 * waiting list waiting for the original caller to
227 	 * get the job done.
228 	 * The waiter_register routine will allocate the necessary
229 	 * slab structure which will ultimately be inserted in
230 	 * the domain's slab list via smr_slaballoc_put().
231 	 */
232 	nwait = smr_slabwaiter_register(domid);
233 
234 	/*
235 	 * Make sure we have a connection with the master
236 	 * before we wait around for nothing and send a
237 	 * command off to nowhere.
238 	 * First do a quick (no lock) check for global okayness.
239 	 */
240 	if ((idn.state != IDNGS_ONLINE) ||
241 	    ((masterid = IDN_GET_MASTERID()) == IDN_NIL_DOMID)) {
242 		bailout = 1;
243 		serrno = ECANCELED;
244 	}
245 	/*
246 	 * We need to drop our read lock _before_ acquiring the
247 	 * slaballoc waiter lock.  This is necessary because the
248 	 * thread that receives the slab alloc response and fills
249 	 * in the slab structure will need to grab the domain write
250 	 * lock while holding onto the slaballoc waiter lock.
251 	 * Potentially could deadlock if we didn't drop our domain
252 	 * lock before.  Plus, we've registered.
253 	 *
254 	 * 4093209 - Note also that we do this _after_ the check for
255 	 *	idn.masterid where we grab the READER global
256 	 *	lock.  This is to prevent somebody from
257 	 *	changing our state after we drop the drwlock.
258 	 *	A deadlock can occur when shutting down a
259 	 *	domain we're holding the
260 	 */
261 
262 	if (!bailout) {
263 		mdp = &idn_domain[masterid];
264 		/*
265 		 * Global state is okay.  Let's double check the
266 		 * state of our actual target domain.
267 		 */
268 		if (mdp->dstate != IDNDS_CONNECTED) {
269 			bailout = 1;
270 			serrno = ECANCELED;
271 		} else if (IDN_DLOCK_TRY_SHARED(masterid)) {
272 			if (mdp->dstate != IDNDS_CONNECTED) {
273 				bailout = 1;
274 				serrno = ECANCELED;
275 				IDN_DUNLOCK(masterid);
276 			} else if (nwait != 1) {
277 				IDN_DUNLOCK(masterid);
278 			}
279 			/*
280 			 * Note that keep the drwlock(read) for
281 			 * the target (master) domain if it appears
282 			 * we're the lucky one to send the command.
283 			 * We hold onto the lock until we've actually
284 			 * sent the command out.
285 			 * We don't reach this place unless it
286 			 * appears everything is kosher with
287 			 * the target (master) domain.
288 			 */
289 		} else {
290 			bailout = 1;
291 			serrno = ENOLCK;
292 		}
293 	}
294 
295 	if (bailout) {
296 		ASSERT(serrno);
297 		/*
298 		 * Gotta bail.  Abort operation.  Error result
299 		 * will be picked up when we attempt to wait.
300 		 */
301 		PR_SMR("%s: BAILING OUT on behalf domain %d "
302 		    "(err=%d, gs=%s, ms=%s)\n",
303 		    proc, domid, serrno, idngs_str[idn.state],
304 		    (masterid == IDN_NIL_DOMID)
305 		    ? "unknown" : idnds_str[idn_domain[masterid].dstate]);
306 		(void) smr_slabwaiter_abort(domid, serrno);
307 
308 	} else if (nwait == 1) {
309 		/*
310 		 * We are the original requester.  Initiate the
311 		 * actual request to the master.
312 		 */
313 		idn_send_cmd(masterid, IDNCMD_SLABALLOC, IDN_SLAB_SIZE, 0, 0);
314 		ASSERT(mdp);
315 		IDN_DUNLOCK(masterid);
316 	}
317 
318 	/*
319 	 * Wait here for response.  Once awakened func returns
320 	 * with slab structure possibly filled with gifts!
321 	 */
322 	serrno = smr_slaballoc_wait(domid, spp);
323 
324 	return (serrno);
325 }
326 
327 /*
328  * Allocate a slab from the Master on behalf
329  * of the given domain.  Note that master uses
330  * this function to allocate slabs on behalf of
331  * remote domains also.
332  * Entered with drwlock held.
333  * Leaves with drwlock dropped.
334  * Returns:	EDQUOT
335  *		EINVAL
336  *		ENOLCK
337  *		smr_slab_alloc_local
338  *		smr_slab_alloc_remote
339  *		(0, EINVAL, EBUSY, ENOMEM)
340  */
341 int
342 smr_slab_alloc(int domid, smr_slab_t **spp)
343 {
344 	int		serrno = 0;
345 	idn_domain_t	*dp;
346 	procname_t	proc = "smr_slab_alloc";
347 
348 
349 	dp = &idn_domain[domid];
350 
351 	ASSERT(DSLAB_READ_HELD(domid));
352 	ASSERT(dp->dslab_state != DSLAB_STATE_UNKNOWN);
353 
354 	*spp = NULL;
355 
356 	switch (dp->dslab_state) {
357 	case DSLAB_STATE_UNKNOWN:
358 		cmn_err(CE_WARN,
359 		    "IDN: 300: no slab allocations without a master");
360 		serrno = EINVAL;
361 		break;
362 
363 	case DSLAB_STATE_LOCAL:
364 		/*
365 		 * If I'm the master, then get a slab
366 		 * from the local SMR pool, but only
367 		 * if the number of allocated slabs has
368 		 * not been exceeded.
369 		 */
370 		if (((int)dp->dnslabs < IDN_SLAB_MAXPERDOMAIN) ||
371 		    !IDN_SLAB_MAXPERDOMAIN)
372 			serrno = smr_slab_alloc_local(domid, spp);
373 		else
374 			serrno = EDQUOT;
375 		break;
376 
377 	case DSLAB_STATE_REMOTE:
378 		/*
379 		 * Have to make a remote request.
380 		 * In order to prevent overwhelming the master
381 		 * with a bunch of requests that he won't be able
382 		 * to handle we do a check to see if we're still
383 		 * under quota.  Note that the limit is known
384 		 * apriori based on the SMR/NWR size and
385 		 * IDN_SLAB_MINTOTAL.  Domains must have the same
386 		 * size SMR/NWR, however they can have different
387 		 * IDN_SLAB_MINTOTAL.  Thus a domain could throttle
388 		 * itself however it wishes.
389 		 */
390 		if (((int)dp->dnslabs < IDN_SLAB_MAXPERDOMAIN) ||
391 		    !IDN_SLAB_MAXPERDOMAIN)
392 			serrno = smr_slab_alloc_remote(domid, spp);
393 		else
394 			serrno = EDQUOT;
395 		break;
396 
397 	default:
398 		cmn_err(CE_WARN,
399 		    "IDN: 301: (ALLOC) unknown slab state (%d) "
400 		    "for domain %d", dp->dslab_state, domid);
401 		serrno = EINVAL;
402 		break;
403 	}
404 
405 	if (*spp == NULL) {
406 		PR_SMR("%s: failed to allocate %s slab [serrno = %d]\n",
407 		    proc, (idn.localid == IDN_GET_MASTERID()) ?
408 		    "local" : "remote", serrno);
409 	}
410 
411 	if (serrno) {
412 		IDN_GKSTAT_GLOBAL_EVENT(gk_slabfail, gk_slabfail_last);
413 	}
414 
415 	return (serrno);
416 }
417 
418 static void
419 smr_slab_free_local(int domid, smr_slab_t *sp)
420 {
421 	int	rv;
422 
423 	/*
424 	 * Do a slaballoc_put just in case there may have
425 	 * been waiters for slabs for this respective domain
426 	 * before we unreserve this slab.
427 	 */
428 	rv = smr_slaballoc_put(domid, sp, 0, 0);
429 
430 	if (rv == -1) {
431 		/*
432 		 * Put failed.  Must not have been any waiters.
433 		 * Go ahead and unreserve the space.
434 		 */
435 		smr_slab_unreserve(domid, sp);
436 	}
437 }
438 
439 static void
440 smr_slab_free_remote(int domid, smr_slab_t *sp)
441 {
442 	smr_offset_t	slab_offset;
443 	int		slab_size;
444 	int		rv;
445 	int		masterid;
446 
447 	ASSERT(domid == idn.localid);
448 	ASSERT(idn.localid != IDN_GET_MASTERID());
449 	ASSERT(DSLAB_WRITE_HELD(domid));
450 	ASSERT(idn_domain[domid].dslab_state == DSLAB_STATE_REMOTE);
451 
452 	masterid = IDN_GET_MASTERID();
453 
454 	ASSERT(masterid != IDN_NIL_DOMID);
455 
456 	slab_offset = IDN_ADDR2OFFSET(sp->sl_start);
457 	slab_size   = (int)(sp->sl_end - sp->sl_start);
458 
459 	/*
460 	 * Do a slaballoc_put just in case there may have
461 	 * been waiters for slabs for this domain before
462 	 * returning back to the master.
463 	 */
464 	rv = smr_slaballoc_put(domid, sp, 0, 0);
465 
466 	if ((rv == -1) && (masterid != IDN_NIL_DOMID)) {
467 		/*
468 		 * Put failed.  No waiters so free the local data
469 		 * structure ship the SMR range off to the master.
470 		 */
471 		smr_free_buflist(sp);
472 		FREESTRUCT(sp, smr_slab_t, 1);
473 
474 		IDN_DLOCK_SHARED(masterid);
475 		idn_send_cmd(masterid, IDNCMD_SLABFREE, slab_offset, slab_size,
476 		    0);
477 		IDN_DUNLOCK(masterid);
478 	}
479 }
480 
481 /*
482  * Free up the list of slabs passed
483  */
484 void
485 smr_slab_free(int domid, smr_slab_t *sp)
486 {
487 	smr_slab_t	*nsp = NULL;
488 
489 	ASSERT(DSLAB_WRITE_HELD(domid));
490 
491 	if (sp == NULL)
492 		return;
493 
494 	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
495 
496 	switch (idn_domain[domid].dslab_state) {
497 	case DSLAB_STATE_UNKNOWN:
498 		cmn_err(CE_WARN, "IDN: 302: no slab free without a master");
499 		break;
500 
501 	case DSLAB_STATE_LOCAL:
502 		/*
503 		 * If I'm the master then put the slabs
504 		 * back to the local SMR pool.
505 		 */
506 		for (; sp; sp = nsp) {
507 			nsp = sp->sl_next;
508 			smr_slab_free_local(domid, sp);
509 		}
510 		break;
511 
512 	case DSLAB_STATE_REMOTE:
513 		/*
514 		 * If the domid is my own then I'm freeing
515 		 * a slab back to the Master.
516 		 */
517 		for (; sp; sp = nsp) {
518 			nsp = sp->sl_next;
519 			smr_slab_free_remote(domid, sp);
520 		}
521 		break;
522 
523 	default:
524 		cmn_err(CE_WARN,
525 		    "IDN: 301: (FREE) unknown slab state (%d) for domain %d",
526 		    idn_domain[domid].dslab_state, domid);
527 		break;
528 	}
529 }
530 
531 /*
532  * Free up the list of slab data structures ONLY.
533  * This is called during a fatal shutdown of the master
534  * where we need to garbage collect the locally allocated
535  * data structures used to manage slabs allocated to the
536  * local domain.  Should never be called by a master since
537  * the master can do a regular smr_slab_free.
538  */
539 void
540 smr_slab_garbage_collection(smr_slab_t *sp)
541 {
542 	smr_slab_t	*nsp;
543 
544 	ASSERT(idn_domain[idn.localid].dvote.v.master == 0);
545 
546 	if (sp == NULL)
547 		return;
548 	/*
549 	 * Since this is only ever called by a slave,
550 	 * the slab structure size always contains a buflist.
551 	 */
552 	for (; sp; sp = nsp) {
553 		nsp = sp->sl_next;
554 		smr_free_buflist(sp);
555 		FREESTRUCT(sp, smr_slab_t, 1);
556 	}
557 }
558 
559 /*
560  * Allocate a SMR buffer on behalf of the local domain
561  * which is ultimately targeted for the given domain.
562  *
563  * IMPORTANT: This routine is going to drop the domain rwlock (drwlock)
564  *	      for the domain on whose behalf the request is being
565  *	      made.  This routine canNOT block on trying to
566  *	      reacquire the drwlock.  If he does block then somebody
567  *	      must have the write lock on the domain which most likely
568  *	      means the domain is going south anyway, so just bail on
569  *	      this buffer.  Higher levels will retry if needed.
570  *
571  * XXX - Support larger than IDN_SMR_BUFSIZE allocations?
572  *
573  * Returns:	A negative return value indicates lock lost on domid.
574  *		EINVAL, ENOLINK, ENOLCK(internal)
575  *		smr_slaballoc_wait
576  * 		(EINVAL, ETIMEDOUT)
577  *		smr_slabwatier_unregister
578  *		(0, EINVAL, EBUSY, ENOMEM)
579  */
580 int
581 smr_buf_alloc(int domid, uint_t len, caddr_t *bufpp)
582 {
583 	register idn_domain_t	*dp, *ldp;
584 	smr_slab_t	*sp;
585 	caddr_t		bufp = NULL;
586 	int		serrno;
587 	procname_t	proc = "smr_buf_alloc";
588 
589 	dp = &idn_domain[domid];
590 	/*
591 	 * Local domain can only allocate on behalf of
592 	 * itself if this is a priviledged call and the
593 	 * caller is the master.
594 	 */
595 	ASSERT((domid != idn.localid) && (domid != IDN_NIL_DOMID));
596 
597 	*bufpp = NULL;
598 
599 	if (len > IDN_DATA_SIZE) {
600 		cmn_err(CE_WARN,
601 		    "IDN: 303: buffer len %d > IDN_DATA_SIZE (%lu)",
602 		    len, IDN_DATA_SIZE);
603 		IDN_GKSTAT_GLOBAL_EVENT(gk_buffail, gk_buffail_last);
604 		return (EINVAL);
605 	}
606 
607 	/*
608 	 * Need to go to my local slab list to find
609 	 * a buffer.
610 	 */
611 	ldp = &idn_domain[idn.localid];
612 	/*
613 	 * Now we loop trying to locate a buffer out of our
614 	 * slabs.  We continue this until either we find a
615 	 * buffer or we're unable to allocate a slab.  Note
616 	 * that new slabs are allocated to the front.
617 	 */
618 	DSLAB_LOCK_SHARED(idn.localid);
619 	sp = ldp->dslab;
620 	do {
621 		int	spl, all_empty;
622 
623 		if (sp == NULL) {
624 			if ((serrno = smr_slab_alloc(idn.localid, &sp)) != 0) {
625 				PR_SMR("%s:%d: failed to allocate "
626 				    "slab [serrno = %d]",
627 				    proc, domid, serrno);
628 				DSLAB_UNLOCK(idn.localid);
629 				IDN_GKSTAT_GLOBAL_EVENT(gk_buffail,
630 				    gk_buffail_last);
631 				return (serrno);
632 			}
633 			/*
634 			 * Of course, the world may have changed while
635 			 * we dropped the lock.  Better make sure we're
636 			 * still established.
637 			 */
638 			if (dp->dstate != IDNDS_CONNECTED) {
639 				PR_SMR("%s:%d: state changed during slab "
640 				    "alloc (dstate = %s)\n",
641 				    proc, domid, idnds_str[dp->dstate]);
642 				DSLAB_UNLOCK(idn.localid);
643 				IDN_GKSTAT_GLOBAL_EVENT(gk_buffail,
644 				    gk_buffail_last);
645 				return (ENOLINK);
646 			}
647 			/*
648 			 * We were able to allocate a slab.  Should
649 			 * be at the front of the list, spin again.
650 			 */
651 			sp = ldp->dslab;
652 		}
653 		/*
654 		 * If we have reached here then we have a slab!
655 		 * Hopefully there are free bufs there :-o
656 		 */
657 		spl = splhi();
658 		all_empty = 1;
659 		for (; sp && !bufp; sp = sp->sl_next) {
660 			smr_slabbuf_t	*bp;
661 
662 			if (sp->sl_free == NULL)
663 				continue;
664 
665 			if (!lock_try(&sp->sl_lock)) {
666 				all_empty = 0;
667 				continue;
668 			}
669 
670 			if ((bp = sp->sl_free) == NULL) {
671 				lock_clear(&sp->sl_lock);
672 				continue;
673 			}
674 
675 			sp->sl_free = bp->sb_next;
676 			bp->sb_next = sp->sl_inuse;
677 			sp->sl_inuse = bp;
678 			/*
679 			 * Found a free buffer.
680 			 */
681 			bp->sb_domid = domid;
682 			bufp = bp->sb_bufp;
683 			lock_clear(&sp->sl_lock);
684 		}
685 		splx(spl);
686 
687 		if (!all_empty && !bufp) {
688 			/*
689 			 * If we still haven't found a buffer, but
690 			 * there's still possibly a buffer available,
691 			 * then try again.  Only if we're absolutely
692 			 * sure all slabs are empty do we attempt
693 			 * to allocate a new one.
694 			 */
695 			sp = ldp->dslab;
696 		}
697 	} while (bufp == NULL);
698 
699 	*bufpp = bufp;
700 
701 	ATOMIC_INC(dp->dio);
702 
703 	DSLAB_UNLOCK(idn.localid);
704 
705 	return (0);
706 }
707 
708 /*
709  * Free a buffer allocated to the local domain back to
710  * its respective slab.  Slabs are freed via the slab-reap command.
711  * XXX - Support larger than IDN_SMR_BUFSIZE allocations?
712  */
713 int
714 smr_buf_free(int domid, caddr_t bufp, uint_t len)
715 {
716 	register smr_slab_t	*sp;
717 	smr_slabbuf_t		*bp, **bpp;
718 	idn_domain_t		*ldp;
719 	int		buffreed;
720 	int		lockheld = (len == (uint_t)-1);
721 
722 	/*
723 	 * We should never be free'ing a buffer on
724 	 * behalf of ourselves as we are never the
725 	 * target for allocated SMR buffers.
726 	 */
727 	ASSERT(domid != idn.localid);
728 
729 	sp = NULL;
730 	buffreed = 0;
731 	ldp = &idn_domain[idn.localid];
732 
733 	DSLAB_LOCK_SHARED(idn.localid);
734 
735 	if (((uintptr_t)bufp & (IDN_SMR_BUFSIZE-1)) &&
736 	    (IDN_ADDR2OFFSET(bufp) % IDN_SMR_BUFSIZE)) {
737 		cmn_err(CE_WARN,
738 		    "IDN: 304: buffer (0x%p) from domain %d not on a "
739 		    "%d boundary", (void *)bufp, domid, IDN_SMR_BUFSIZE);
740 		goto bfdone;
741 	}
742 	if (!lockheld && (len > IDN_DATA_SIZE)) {
743 		cmn_err(CE_WARN,
744 		    "IDN: 305: buffer length (%d) from domain %d greater "
745 		    "than IDN_DATA_SIZE (%lu)",
746 		    len, domid, IDN_DATA_SIZE);
747 		goto bfdone;
748 	}
749 
750 	for (sp = ldp->dslab; sp; sp = sp->sl_next)
751 		if ((bufp >= sp->sl_start) && (bufp < sp->sl_end))
752 			break;
753 
754 	if (sp) {
755 		int spl;
756 
757 		spl = splhi();
758 		while (!lock_try(&sp->sl_lock))
759 			;
760 		bpp = &sp->sl_inuse;
761 		for (bp = *bpp; bp; bp = *bpp) {
762 			if (bp->sb_bufp == bufp)
763 				break;
764 			bpp = &bp->sb_next;
765 		}
766 		if (bp) {
767 			ASSERT(bp->sb_domid == domid);
768 			buffreed++;
769 			bp->sb_domid = IDN_NIL_DOMID;
770 			*bpp = bp->sb_next;
771 			bp->sb_next = sp->sl_free;
772 			sp->sl_free = bp;
773 		}
774 		lock_clear(&sp->sl_lock);
775 		splx(spl);
776 	}
777 bfdone:
778 	if (buffreed) {
779 		ATOMIC_DEC(idn_domain[domid].dio);
780 		DIOCHECK(domid);
781 	} else {
782 		cmn_err(CE_WARN,
783 		    "IDN: 306: unknown buffer (0x%p) from domain %d",
784 		    (void *)bufp, domid);
785 		ATOMIC_INC(idn_domain[domid].dioerr);
786 	}
787 
788 	DSLAB_UNLOCK(idn.localid);
789 
790 	return (sp ? 0 : -1);
791 }
792 
793 /*
794  * Alternative interface to smr_buf_free, but with local drwlock
795  * held.
796  */
797 /* ARGSUSED2 */
798 int
799 smr_buf_free_locked(int domid, caddr_t bufp, uint_t len)
800 {
801 	return (smr_buf_free(domid, bufp, (uint_t)-1));
802 }
803 
804 /*
805  * Free any and all buffers associated with the given domain.
806  * Assumption is that domain is dead and buffers are not in use.
807  * Returns:	Number of buffers freed.
808  *		-1 if error.
809  */
810 int
811 smr_buf_free_all(int domid)
812 {
813 	register smr_slab_t	*sp;
814 	register smr_slabbuf_t	*bp, **bpp;
815 	idn_domain_t		*ldp;
816 	int			nbufsfreed = 0;
817 	procname_t	proc = "smr_buf_free_all";
818 
819 	/*
820 	 * We should never be free'ing buffers on
821 	 * behalf of ourself
822 	 */
823 	ASSERT(domid != idn.localid);
824 
825 	if (!VALID_DOMAINID(domid)) {
826 		cmn_err(CE_WARN, "IDN: 307: domain ID (%d) invalid", domid);
827 		return (-1);
828 	}
829 
830 	ldp = &idn_domain[idn.localid];
831 
832 	/*
833 	 * We grab the writer lock so that we don't have any
834 	 * competition during a "free-all" call.
835 	 * No need to grab individual slab locks when holding
836 	 * dslab(writer).
837 	 */
838 	DSLAB_LOCK_EXCL(idn.localid);
839 
840 	for (sp = ldp->dslab; sp; sp = sp->sl_next) {
841 		bpp = &sp->sl_inuse;
842 		for (bp = *bpp; bp; bp = *bpp) {
843 			if (bp->sb_domid == domid) {
844 				bp->sb_domid = IDN_NIL_DOMID;
845 				*bpp = bp->sb_next;
846 				bp->sb_next = sp->sl_free;
847 				sp->sl_free = bp;
848 				nbufsfreed++;
849 			} else {
850 				bpp = &bp->sb_next;
851 			}
852 		}
853 	}
854 
855 	if (nbufsfreed > 0) {
856 		ATOMIC_SUB(idn_domain[domid].dio, nbufsfreed);
857 		idn_domain[domid].dioerr = 0;
858 		DIOCHECK(domid);
859 	}
860 
861 	DSLAB_UNLOCK(idn.localid);
862 
863 	PR_SMR("%s: freed %d buffers for domain %d\n", proc, nbufsfreed, domid);
864 
865 	return (nbufsfreed);
866 }
867 
868 int
869 smr_buf_reclaim(int domid, int nbufs)
870 {
871 	int		num_reclaimed = 0;
872 	idn_domain_t	*ldp, *dp;
873 	procname_t	proc = "smr_buf_reclaim";
874 
875 	ldp = &idn_domain[idn.localid];
876 	dp  = &idn_domain[domid];
877 
878 	ASSERT(domid != idn.localid);
879 
880 	if (ATOMIC_CAS(&dp->dreclaim_inprogress, 0, 1)) {
881 		/*
882 		 * Reclaim is already in progress, don't
883 		 * bother.
884 		 */
885 		PR_DATA("%s: reclaim already in progress\n", proc);
886 		return (0);
887 	}
888 
889 	PR_SMR("%s: requested %d buffers from domain %d\n", proc, nbufs, domid);
890 
891 	if (dp->dio && nbufs) {
892 		register smr_slab_t	*sp;
893 		int spl;
894 
895 		DSLAB_LOCK_SHARED(idn.localid);
896 		spl = splhi();
897 		for (sp = ldp->dslab; sp && nbufs; sp = sp->sl_next) {
898 			register smr_slabbuf_t	*bp, **bpp;
899 
900 			if (sp->sl_inuse == NULL)
901 				continue;
902 
903 			if (!lock_try(&sp->sl_lock))
904 				continue;
905 
906 			if (sp->sl_inuse == NULL) {
907 				lock_clear(&sp->sl_lock);
908 				continue;
909 			}
910 
911 			bpp = &sp->sl_inuse;
912 			for (bp = *bpp; bp && nbufs; bp = *bpp) {
913 				if (bp->sb_domid == domid) {
914 					/*
915 					 * Buffer no longer in use,
916 					 * reclaim it.
917 					 */
918 					bp->sb_domid = IDN_NIL_DOMID;
919 					*bpp = bp->sb_next;
920 					bp->sb_next = sp->sl_free;
921 					sp->sl_free = bp;
922 					num_reclaimed++;
923 					nbufs--;
924 				} else {
925 					bpp = &bp->sb_next;
926 				}
927 			}
928 			lock_clear(&sp->sl_lock);
929 		}
930 		splx(spl);
931 
932 		if (num_reclaimed > 0) {
933 			ATOMIC_SUB(dp->dio, num_reclaimed);
934 			DIOCHECK(domid);
935 		}
936 		DSLAB_UNLOCK(idn.localid);
937 	}
938 
939 	PR_SMR("%s: reclaimed %d buffers from domain %d\n",
940 	    proc, num_reclaimed, domid);
941 
942 	return (num_reclaimed);
943 }
944 
945 /*
946  * Returns 1	If any buffers are locked for the given slab.
947  *	   0	If all buffers are free for the given slab.
948  *
949  * The caller is assumed to have the slab protected so that no
950  * new allocations are attempted from it.  Also, this is only
951  * valid to be called with respect to slabs that were allocated
952  * on behalf of the local domain, i.e. the master is not expected
953  * to call this function with (slave) slab "representatives".
954  */
955 int
956 smr_slab_busy(smr_slab_t *sp)
957 {
958 	return ((sp && sp->sl_inuse) ? 1 : 0);
959 }
960 
961 int
962 smr_slabwaiter_init()
963 {
964 	register int		i;
965 	struct slabwaiter	*wp;
966 
967 	if (idn.slabwaiter != NULL)
968 		return (0);
969 
970 	/*
971 	 * Initialize the slab waiting area for MAX_DOMAINS.
972 	 */
973 	idn.slabwaiter = GETSTRUCT(struct slabwaiter, MAX_DOMAINS);
974 	wp = idn.slabwaiter;
975 	for (i = 0; i < MAX_DOMAINS; wp++, i++) {
976 		wp->w_closed = 0;
977 		mutex_init(&wp->w_mutex, NULL, MUTEX_DEFAULT, NULL);
978 		cv_init(&wp->w_cv, NULL, CV_DEFAULT, NULL);
979 	}
980 
981 	return (0);
982 }
983 
984 void
985 smr_slabwaiter_deinit()
986 {
987 	register int		i;
988 	struct slabwaiter	*wp;
989 
990 	if ((wp = idn.slabwaiter) == NULL)
991 		return;
992 
993 	for (i = 0; i < MAX_DOMAINS; wp++, i++) {
994 		ASSERT(wp->w_nwaiters == 0);
995 		ASSERT(wp->w_sp == NULL);
996 		cv_destroy(&wp->w_cv);
997 		mutex_destroy(&wp->w_mutex);
998 	}
999 
1000 	FREESTRUCT(idn.slabwaiter, struct slabwaiter, MAX_DOMAINS);
1001 	idn.slabwaiter = NULL;
1002 }
1003 
1004 void
1005 smr_slabwaiter_open(domainset_t domset)
1006 {
1007 	int			d;
1008 	struct slabwaiter	*wp;
1009 
1010 	if ((domset == 0) || !idn.slabwaiter)
1011 		return;
1012 
1013 	wp = idn.slabwaiter;
1014 
1015 	for (d = 0; d < MAX_DOMAINS; wp++, d++) {
1016 		if (!DOMAIN_IN_SET(domset, d))
1017 			continue;
1018 		mutex_enter(&wp->w_mutex);
1019 		wp->w_closed = 0;
1020 		mutex_exit(&wp->w_mutex);
1021 	}
1022 }
1023 
1024 void
1025 smr_slabwaiter_close(domainset_t domset)
1026 {
1027 	int			d;
1028 	struct slabwaiter	*wp;
1029 
1030 	if ((domset == 0) || !idn.slabwaiter)
1031 		return;
1032 
1033 	wp = idn.slabwaiter;
1034 
1035 	for (d = 0; d < MAX_DOMAINS; wp++, d++) {
1036 		if (!DOMAIN_IN_SET(domset, d))
1037 			continue;
1038 		mutex_enter(&wp->w_mutex);
1039 		wp->w_closed = 1;
1040 		cv_broadcast(&wp->w_cv);
1041 		mutex_exit(&wp->w_mutex);
1042 	}
1043 }
1044 
1045 /*
1046  * Register the caller with the waiting list for the
1047  * given domain.
1048  *
1049  * Protocol:
1050  *	1st Local requester:	register -> alloc ->
1051  *						put(wakeup|xdc) -> unregister
1052  *	Nth Local requester:	register -> wait
1053  *	1st Remote requester:	register -> xdc -> wait
1054  *	Nth Remote requester:	register -> wait
1055  *
1056  *	Remote Responder:	local alloc -> put(xdc)
1057  *	Local Handler:		xdc -> put(wakeup)
1058  *
1059  * E.g. A standard slave allocation request:
1060  *	slave			master
1061  *	-----			------
1062  *	idn_slab_alloc(remote)
1063  *	- register
1064  *	- xdc		->	idn_handler
1065  *	- wait			...
1066  *				idn_slab_alloc(local)
1067  *				- register
1068  *				- alloc
1069  *				- put
1070  *				  . wakeup [local]
1071  *				- unregister
1072  *	idn_handler    	<-	- xdc
1073  *	- put       		DONE
1074  *	  . wakeup [local]
1075  *	    |
1076  *	    V
1077  *      - wait
1078  *	  . unregister
1079  *	DONE
1080  */
1081 static int
1082 smr_slabwaiter_register(int domid)
1083 {
1084 	struct slabwaiter	*wp;
1085 	int		nwait;
1086 	procname_t	proc = "smr_slabwaiter_register";
1087 
1088 
1089 	ASSERT(domid != IDN_NIL_DOMID);
1090 
1091 	ASSERT(DSLAB_READ_HELD(domid));
1092 
1093 	wp = &idn.slabwaiter[domid];
1094 
1095 	ASSERT(MUTEX_NOT_HELD(&wp->w_mutex));
1096 
1097 	mutex_enter(&wp->w_mutex);
1098 
1099 	nwait = ++(wp->w_nwaiters);
1100 	ASSERT(nwait > 0);
1101 
1102 	PR_SMR("%s: domain = %d, (new)nwaiters = %d\n", proc, domid, nwait);
1103 
1104 	if (nwait > 1) {
1105 		/*
1106 		 * There are already waiters for slab allocations
1107 		 * with respect to this domain.
1108 		 */
1109 		PR_SMR("%s: existing waiters for slabs for domain %d\n",
1110 		    proc, domid);
1111 		mutex_exit(&wp->w_mutex);
1112 
1113 		return (nwait);
1114 	}
1115 	PR_SMR("%s: initial waiter for slabs for domain %d\n", proc, domid);
1116 	/*
1117 	 * We are the first requester of a slab allocation for this
1118 	 * respective domain.  Need to prep waiting area for
1119 	 * subsequent arrival of a slab.
1120 	 */
1121 	wp->w_sp = NULL;
1122 	wp->w_done = 0;
1123 	wp->w_serrno = 0;
1124 
1125 	mutex_exit(&wp->w_mutex);
1126 
1127 	return (nwait);
1128 }
1129 
1130 /*
1131  * It is assumed that the caller had previously registered,
1132  * but wakeup did not occur due to caller never waiting.
1133  * Thus, slaballoc mutex is still held by caller.
1134  *
1135  * Returns:	0
1136  *		EINVAL
1137  *		EBUSY
1138  *		w_serrno (smr_slaballoc_put)
1139  *		(0, ENOLCK, ENOMEM, EDQUOT, EBUSY, ECANCELED)
1140  */
1141 static int
1142 smr_slabwaiter_unregister(int domid, smr_slab_t **spp)
1143 {
1144 	struct slabwaiter	*wp;
1145 	int		serrno = 0;
1146 	procname_t	proc = "smr_slabwaiter_unregister";
1147 
1148 
1149 	ASSERT(domid != IDN_NIL_DOMID);
1150 
1151 	wp = &idn.slabwaiter[domid];
1152 
1153 	mutex_enter(&wp->w_mutex);
1154 
1155 	PR_SMR("%s: domain = %d, nwaiters = %d\n", proc, domid, wp->w_nwaiters);
1156 
1157 	if (wp->w_nwaiters <= 0) {
1158 		/*
1159 		 * Hmmm...nobody is registered!
1160 		 */
1161 		PR_SMR("%s: NO WAITERS (domid = %d)\n", proc, domid);
1162 		mutex_exit(&wp->w_mutex);
1163 		return (EINVAL);
1164 	}
1165 	(wp->w_nwaiters)--;
1166 	/*
1167 	 * Is our present under the tree?
1168 	 */
1169 	if (!wp->w_done) {
1170 		/*
1171 		 * Bummer...no presents.  Let the caller know
1172 		 * via a null slab pointer.
1173 		 * Note that we don't clean up immediately since
1174 		 * message might still come in for other waiters.
1175 		 * Thus, late sleepers may still get a chance.
1176 		 */
1177 		PR_SMR("%s: bummer no slab allocated for domain %d\n",
1178 		    proc, domid);
1179 		ASSERT(wp->w_sp == NULL);
1180 		(*spp) = NULL;
1181 		serrno = wp->w_closed ? ECANCELED : EBUSY;
1182 
1183 	} else {
1184 		(*spp) = wp->w_sp;
1185 		serrno = wp->w_serrno;
1186 
1187 #ifdef DEBUG
1188 		if (serrno == 0) {
1189 			register smr_slab_t	*sp;
1190 
1191 			ASSERT(wp->w_sp);
1192 			PR_SMR("%s: allocation succeeded (domain %d)\n",
1193 			    proc, domid);
1194 
1195 			DSLAB_LOCK_SHARED(domid);
1196 			for (sp = idn_domain[domid].dslab; sp; sp = sp->sl_next)
1197 				if (sp == wp->w_sp)
1198 					break;
1199 			if (sp == NULL)
1200 				cmn_err(CE_WARN,
1201 				    "%s:%d: slab ptr = NULL",
1202 				    proc, domid);
1203 			DSLAB_UNLOCK(domid);
1204 		} else {
1205 			PR_SMR("%s: allocation failed (domain %d) "
1206 			    "[serrno = %d]\n", proc, domid, serrno);
1207 		}
1208 #endif /* DEBUG */
1209 	}
1210 	if (wp->w_nwaiters == 0) {
1211 		/*
1212 		 * Last one turns out the lights.
1213 		 */
1214 		PR_SMR("%s: domain %d last waiter, turning out lights\n",
1215 		    proc, domid);
1216 		wp->w_sp = NULL;
1217 		wp->w_done = 0;
1218 		wp->w_serrno = 0;
1219 	}
1220 	mutex_exit(&wp->w_mutex);
1221 
1222 	return (serrno);
1223 }
1224 
1225 /*
1226  * Called to abort any slaballoc requests on behalf of the
1227  * given domain.
1228  */
1229 int
1230 smr_slabwaiter_abort(int domid, int serrno)
1231 {
1232 	ASSERT(serrno != 0);
1233 
1234 	return (smr_slaballoc_put(domid, NULL, 0, serrno));
1235 }
1236 
1237 /*
1238  * Put ourselves into a timedwait waiting for slab to be
1239  * allocated.
1240  * Returns with slaballoc mutex dropped.
1241  *
1242  * Returns:	EINVAL
1243  *		ETIMEDOUT
1244  *		smr_slabwatier_unregister
1245  *		(0, EINVAL, EBUSY, ENOMEM)
1246  */
1247 static int
1248 smr_slaballoc_wait(int domid, smr_slab_t **spp)
1249 {
1250 	struct slabwaiter	*wp;
1251 	int			serrno = 0, serrno_unreg;
1252 	procname_t		proc = "smr_slaballoc_wait";
1253 
1254 
1255 	wp = &idn.slabwaiter[domid];
1256 
1257 	ASSERT(MUTEX_NOT_HELD(&wp->w_mutex));
1258 
1259 	mutex_enter(&wp->w_mutex);
1260 
1261 	PR_SMR("%s: domain = %d, nwaiters = %d, wsp = 0x%p\n",
1262 	    proc, domid, wp->w_nwaiters, (void *)wp->w_sp);
1263 
1264 	if (wp->w_nwaiters <= 0) {
1265 		/*
1266 		 * Hmmm...no waiters registered.
1267 		 */
1268 		PR_SMR("%s: domain %d, no waiters!\n", proc, domid);
1269 		mutex_exit(&wp->w_mutex);
1270 		return (EINVAL);
1271 	}
1272 	ASSERT(DSLAB_READ_HELD(domid));
1273 	DSLAB_UNLOCK(domid);
1274 
1275 	if (!wp->w_done && !wp->w_closed) {
1276 		int	rv;
1277 
1278 		/*
1279 		 * Only wait if data hasn't arrived yet.
1280 		 */
1281 		PR_SMR("%s: domain %d, going to sleep...\n", proc, domid);
1282 
1283 		rv = cv_reltimedwait_sig(&wp->w_cv, &wp->w_mutex,
1284 		    IDN_SLABALLOC_WAITTIME, TR_CLOCK_TICK);
1285 		if (rv == -1)
1286 			serrno = ETIMEDOUT;
1287 
1288 		PR_SMR("%s: domain %d, awakened (reason = %s)\n",
1289 		    proc, domid, (rv == -1) ? "TIMEOUT" : "SIGNALED");
1290 	}
1291 	/*
1292 	 * We've awakened or request already filled!
1293 	 * Unregister ourselves.
1294 	 */
1295 	mutex_exit(&wp->w_mutex);
1296 
1297 	/*
1298 	 * Any gifts will be entered into spp.
1299 	 */
1300 	serrno_unreg = smr_slabwaiter_unregister(domid, spp);
1301 
1302 	/*
1303 	 * Leave with reader lock on dslab_lock.
1304 	 */
1305 	DSLAB_LOCK_SHARED(domid);
1306 
1307 	if ((serrno_unreg == EBUSY) && (serrno == ETIMEDOUT))
1308 		return (serrno);
1309 	else
1310 		return (serrno_unreg);
1311 }
1312 
1313 /*
1314  * A SMR slab was allocated on behalf of the given domain.
1315  * Wakeup anybody that may have been waiting for the allocation.
1316  * Note that if the domain is a remote one, i.e. master is allocating
1317  * on behalf of a slave, it's up to the caller to transmit the
1318  * allocation response to that domain.
1319  * The force flag indicates that we want to install the slab for
1320  * the given user regardless of whether there are waiters or not.
1321  * This is used primarily in situations where a slave may have timed
1322  * out before the response actually arrived.  In this situation we
1323  * don't want to send slab back to the master after we went through
1324  * the trouble of allocating one.  Master is _not_ allowed to do this
1325  * for remote domains.
1326  *
1327  * Returns:	-1	Non-registered waiter or waiting area garbaged.
1328  *		0	Successfully performed operation.
1329  */
1330 int
1331 smr_slaballoc_put(int domid, smr_slab_t *sp, int forceflag, int serrno)
1332 {
1333 	idn_domain_t		*dp;
1334 	struct slabwaiter	*wp;
1335 	procname_t		proc = "smr_slaballoc_put";
1336 
1337 
1338 	dp = &idn_domain[domid];
1339 
1340 	ASSERT(!serrno ? DSLAB_WRITE_HELD(domid) : 1);
1341 
1342 	if (domid == IDN_NIL_DOMID)
1343 		return (-1);
1344 
1345 	ASSERT(serrno ? (sp == NULL) : (sp != NULL));
1346 
1347 	wp = &idn.slabwaiter[domid];
1348 
1349 	mutex_enter(&wp->w_mutex);
1350 
1351 	PR_SMR("%s: domain = %d, bufp = 0x%p, ebufp = 0x%p, "
1352 	    "(f = %d, se = %d)\n", proc, domid,
1353 	    (sp ? (void *)sp->sl_start : 0),
1354 	    (sp ? (void *)sp->sl_end : 0), forceflag, serrno);
1355 
1356 	if (wp->w_nwaiters <= 0) {
1357 		/*
1358 		 * There are no waiters!!  Must have timed out
1359 		 * and left.  Oh well...
1360 		 */
1361 		PR_SMR("%s: no slaballoc waiters found for domain %d\n",
1362 		    proc, domid);
1363 		if (!forceflag || serrno || !sp) {
1364 			/*
1365 			 * No waiters and caller doesn't want to force it.
1366 			 */
1367 			mutex_exit(&wp->w_mutex);
1368 			return (-1);
1369 		}
1370 		PR_SMR("%s: forcing slab onto domain %d\n", proc, domid);
1371 		ASSERT(domid == idn.localid);
1372 		ASSERT(wp->w_sp == NULL);
1373 		wp->w_done = 0;
1374 		/*
1375 		 * Now we fall through and let it be added in the
1376 		 * regular manor.
1377 		 */
1378 	}
1379 	if (wp->w_done) {
1380 		/*
1381 		 * There's at least one waiter so there has
1382 		 * to be a slab structure waiting for us.
1383 		 * If everything is going smoothly, there should only
1384 		 * be one guy coming through the path of inserting
1385 		 * an error or good slab.  However, if a disconnect was
1386 		 * detected, you may get several guys coming through
1387 		 * trying to let everybody know.
1388 		 */
1389 		ASSERT(wp->w_serrno ?
1390 		    (wp->w_sp == NULL) : (wp->w_sp != NULL));
1391 
1392 		cv_broadcast(&wp->w_cv);
1393 		mutex_exit(&wp->w_mutex);
1394 
1395 		return (-1);
1396 	}
1397 	if (serrno != 0) {
1398 		/*
1399 		 * Bummer...allocation failed.  This call is simply
1400 		 * to wake up the sleepers and let them know.
1401 		 */
1402 		PR_SMR("%s: slaballoc failed for domain %d\n", proc, domid);
1403 		wp->w_serrno = serrno;
1404 		wp->w_done = 1;
1405 		cv_broadcast(&wp->w_cv);
1406 		mutex_exit(&wp->w_mutex);
1407 
1408 		return (0);
1409 	}
1410 	PR_SMR("%s: putting slab into struct (domid=%d, localid=%d)\n",
1411 	    proc, domid, idn.localid);
1412 	/*
1413 	 * Prep the slab structure.
1414 	 */
1415 
1416 	if (domid == idn.localid) {
1417 		/*
1418 		 * Allocation was indeed for me.
1419 		 * Slab may or may not be locked when
1420 		 * we reach.  Normally they will be locked
1421 		 * if we're being called on behalf of a
1422 		 * free, and not locked if on behalf of
1423 		 * a new allocation request.
1424 		 */
1425 		lock_clear(&sp->sl_lock);
1426 		smr_alloc_buflist(sp);
1427 #ifdef DEBUG
1428 	} else {
1429 		uint_t	rv;
1430 		/*
1431 		 * Slab was not allocated on my behalf.  Must be
1432 		 * a master request on behalf of some other domain.
1433 		 * Prep appropriately.  Slab should have been locked
1434 		 * by smr_slab_reserve.
1435 		 */
1436 		rv = lock_try(&sp->sl_lock);
1437 		ASSERT(!rv);
1438 		ASSERT(sp->sl_domid == (short)domid);
1439 #endif /* DEBUG */
1440 	}
1441 
1442 	/*
1443 	 * Slab is ready to go.  Insert it into the domain's
1444 	 * slab list so once we wake everybody up they'll find it.
1445 	 * You better have write lock if you're putting treasures
1446 	 * there.
1447 	 */
1448 	ASSERT(DSLAB_WRITE_HELD(domid));
1449 
1450 	sp->sl_next = dp->dslab;
1451 	dp->dslab  = sp;
1452 	dp->dnslabs++;
1453 
1454 	/*
1455 	 * It's possible to fall through here without waiters.
1456 	 * This is a case where forceflag was set.
1457 	 */
1458 	if (wp->w_nwaiters > 0) {
1459 		wp->w_sp = sp;
1460 		wp->w_serrno = serrno;
1461 		wp->w_done = 1;
1462 		cv_broadcast(&wp->w_cv);
1463 	} else {
1464 		ASSERT(forceflag);
1465 		wp->w_sp = NULL;
1466 		wp->w_serrno = 0;
1467 		wp->w_done = 0;
1468 	}
1469 	mutex_exit(&wp->w_mutex);
1470 
1471 	return (0);
1472 }
1473 
1474 /*
1475  * Get the slab representing [bufp,ebufp] from the respective
1476  * domain's pool if all the buffers are free.  Remove them from
1477  * the domain's list and return it.
1478  * If bufp == NULL, then return however many free ones you
1479  * can find.
1480  * List of slabs are returned locked (sl_lock).
1481  * XXX - Need minimum limit to make sure we don't free up _all_
1482  *	 of our slabs!  However, during a shutdown we will need
1483  *	 method to free them all up regardless of locking.
1484  */
1485 smr_slab_t *
1486 smr_slaballoc_get(int domid, caddr_t bufp, caddr_t ebufp)
1487 {
1488 	idn_domain_t	*dp;
1489 	smr_slab_t	*retsp, *sp, **psp;
1490 	int		foundit, islocal = 0;
1491 	int		nslabs;
1492 	procname_t	proc = "smr_slaballoc_get";
1493 
1494 	PR_SMR("%s: getting slab for domain %d [bufp=0x%p, ebufp=0x%p]\n",
1495 	    proc, domid, (void *)bufp, (void *)ebufp);
1496 
1497 	dp = &idn_domain[domid];
1498 
1499 	ASSERT(DSLAB_WRITE_HELD(domid));
1500 
1501 	if ((sp = dp->dslab) == NULL) {
1502 		PR_SMR("%s: oops, no slabs for domain %d\n", proc, domid);
1503 		return (NULL);
1504 	}
1505 	/*
1506 	 * If domid is myself then I'm trying to get a slab out
1507 	 * of my local pool.  Otherwise, I'm the master and
1508 	 * I'm trying to get the slab representative from the
1509 	 * global pool.
1510 	 */
1511 	if (domid == idn.localid)
1512 		islocal = 1;
1513 
1514 	if (bufp != NULL) {
1515 		nslabs = -1;
1516 	} else {
1517 		nslabs = *(int *)ebufp;
1518 		if (nslabs == 0) {
1519 			PR_SMR("%s: requested nslabs (%d) <= 0\n",
1520 			    proc, nslabs);
1521 			return (NULL);
1522 		} else if (nslabs < 0) {
1523 			/*
1524 			 * Caller wants them all!
1525 			 */
1526 			nslabs = (int)dp->dnslabs;
1527 		}
1528 	}
1529 
1530 	retsp = NULL;
1531 	foundit = 0;
1532 	for (psp = &dp->dslab; sp; sp = *psp) {
1533 		int	isbusy;
1534 
1535 		if (bufp && (sp->sl_start != bufp)) {
1536 			psp = &sp->sl_next;
1537 			continue;
1538 		}
1539 
1540 		if (bufp && (ebufp > sp->sl_end)) {
1541 			PR_SMR("%s: bufp/ebufp (0x%p/0x%p) "
1542 			    "expected (0x%p/0x%p)\n", proc, (void *)bufp,
1543 			    (void *)ebufp, (void *)sp->sl_start,
1544 			    (void *)sp->sl_end);
1545 			ASSERT(0);
1546 		}
1547 		/*
1548 		 * We found the desired slab.  Make sure
1549 		 * it's free.
1550 		 */
1551 		foundit++;
1552 		isbusy = 0;
1553 		if (islocal) {
1554 			int spl;
1555 
1556 			/*
1557 			 * Some of the buffers in the slab
1558 			 * are still in use.  Unlock the
1559 			 * buffers we locked and bail out.
1560 			 */
1561 			spl = splhi();
1562 			if (!lock_try(&sp->sl_lock)) {
1563 				isbusy = 1;
1564 				foundit--;
1565 			} else if (sp->sl_inuse) {
1566 				lock_clear(&sp->sl_lock);
1567 				isbusy = 1;
1568 				foundit--;
1569 			}
1570 			splx(spl);
1571 		} else {
1572 			/*
1573 			 * If not local, then I'm the master getting
1574 			 * a slab from one of the slaves.  In this case,
1575 			 * their slab structs will always be locked.
1576 			 */
1577 			ASSERT(!lock_try(&sp->sl_lock));
1578 		}
1579 		if (!isbusy) {
1580 			/*
1581 			 * Delete the entry from the list and slap
1582 			 * it onto our return list.
1583 			 */
1584 			*psp = sp->sl_next;
1585 			sp->sl_next = retsp;
1586 			retsp = sp;
1587 		} else {
1588 			psp = &sp->sl_next;
1589 		}
1590 		/*
1591 		 * If bufp == NULL (alternate interface) and we haven't
1592 		 * found the desired number of slabs yet, keep looking.
1593 		 */
1594 		if (bufp || (foundit == nslabs))
1595 			break;
1596 	}
1597 	dp->dnslabs -= (short)foundit;
1598 
1599 	if (foundit) {
1600 		PR_SMR("%s: found %d free slabs (domid = %d)\n", proc, foundit,
1601 		    domid);
1602 	} else {
1603 		PR_SMR("%s: no free slabs found (domid = %d)\n", proc, domid);
1604 	}
1605 
1606 	/*
1607 	 * If this is the alternate interface, need to return
1608 	 * the number of slabs found in the ebufp parameter.
1609 	 */
1610 	if (bufp == NULL)
1611 		*(int *)ebufp = foundit;
1612 
1613 	return (retsp);
1614 }
1615 
1616 /*
1617  * Wrapper to hide alternate interface to smr_slaballoc_get()
1618  */
1619 smr_slab_t *
1620 smr_slaballoc_get_n(int domid, int *nslabs)
1621 {
1622 	smr_slab_t	*sp;
1623 
1624 	ASSERT(DSLAB_WRITE_HELD(domid));
1625 
1626 	sp = smr_slaballoc_get(domid, NULL, (caddr_t)nslabs);
1627 
1628 	return (sp);
1629 }
1630 
1631 /*
1632  * Only called by master.  Initialize slab pool based on local SMR.
1633  * Returns number of slabs initialized.
1634  * reserved_size = Length of area at the front of the NWR portion
1635  *		   of the SMR to reserve and not make available for
1636  *		   slab allocations.  Must be a IDN_SMR_BUFSIZE multiple.
1637  * reserved_area = Pointer to reserved area, if any.
1638  */
1639 int
1640 smr_slabpool_init(size_t reserved_size, caddr_t *reserved_area)
1641 {
1642 	size_t			nwr_available;
1643 	int			minperpool, ntotslabs, nxslabs, nslabs;
1644 	register int		p, pp;
1645 	register caddr_t	bufp;
1646 	register smr_slab_t	*sp;
1647 
1648 	ASSERT(IDN_GLOCK_IS_EXCL());
1649 	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
1650 
1651 	*reserved_area = NULL;
1652 
1653 	nwr_available = MB2B(IDN_NWR_SIZE) - reserved_size;
1654 
1655 	if ((idn.localid != IDN_GET_MASTERID()) ||
1656 	    (nwr_available < IDN_SLAB_SIZE) ||
1657 	    (idn.slabpool != NULL) ||
1658 	    ((reserved_size != 0) && (reserved_size & (IDN_SMR_BUFSIZE-1)))) {
1659 		return (-1);
1660 	}
1661 
1662 	idn.slabpool = GETSTRUCT(struct slabpool, 1);
1663 	idn.slabpool->ntotslabs = ntotslabs = nwr_available / IDN_SLAB_SIZE;
1664 	ASSERT(ntotslabs > 0);
1665 	minperpool = (ntotslabs < IDN_SLAB_MINPERPOOL) ?
1666 	    1 : IDN_SLAB_MINPERPOOL;
1667 	idn.slabpool->npools = (ntotslabs + (minperpool - 1)) / minperpool;
1668 
1669 	if ((idn.slabpool->npools & 1) == 0) {
1670 		/*
1671 		 * npools needs to be odd for hashing algorithm.
1672 		 */
1673 		idn.slabpool->npools++;
1674 	}
1675 	ASSERT(idn.slabpool->npools > 0);
1676 	minperpool = (ntotslabs < idn.slabpool->npools) ?
1677 	    1 : (ntotslabs / idn.slabpool->npools);
1678 
1679 	/*
1680 	 * Calculate the number of extra slabs that will need to
1681 	 * be alloted to the pools.  This number will be less than
1682 	 * npools.  Only one extra slab is allocated to each pool
1683 	 * until we have assigned all the extra slabs.
1684 	 */
1685 	if (ntotslabs > (idn.slabpool->npools * minperpool))
1686 		nxslabs = ntotslabs - (idn.slabpool->npools * minperpool);
1687 	else
1688 		nxslabs = 0;
1689 	ASSERT((nxslabs >= 0) && (nxslabs < idn.slabpool->npools));
1690 
1691 	idn.slabpool->pool = GETSTRUCT(struct smr_slabtbl,
1692 	    idn.slabpool->npools);
1693 	sp = GETSTRUCT(smr_slab_t, idn.slabpool->ntotslabs);
1694 
1695 	idn.slabpool->savep = sp;
1696 	bufp = idn.smr.vaddr + reserved_size;
1697 
1698 	for (p = nslabs = 0;
1699 	    (p < idn.slabpool->npools) && (ntotslabs > 0);
1700 	    p++, ntotslabs -= nslabs) {
1701 
1702 		nslabs = (ntotslabs < minperpool) ? ntotslabs : minperpool;
1703 		if (nxslabs > 0) {
1704 			nslabs++;
1705 			nxslabs--;
1706 		}
1707 		idn.slabpool->pool[p].sarray = sp;
1708 		for (pp = 0; pp < nslabs; pp++) {
1709 
1710 			sp->sl_next  = NULL;
1711 			sp->sl_start = bufp;
1712 			sp->sl_end   = bufp = sp->sl_start + IDN_SLAB_SIZE;
1713 			sp->sl_lock  = 0;
1714 			sp->sl_domid = (short)IDN_NIL_DOMID;
1715 
1716 			sp++;
1717 		}
1718 		idn.slabpool->pool[p].nfree   = nslabs;
1719 		idn.slabpool->pool[p].nslabs  = nslabs;
1720 	}
1721 	ASSERT((ntotslabs == 0) && (nxslabs == 0));
1722 	/*
1723 	 * We should be at the end of the SMR at this point.
1724 	 */
1725 	ASSERT(bufp == (idn.smr.vaddr + reserved_size
1726 	    + (idn.slabpool->ntotslabs * IDN_SLAB_SIZE)));
1727 
1728 	if (reserved_size != 0)
1729 		*reserved_area = idn.smr.vaddr;
1730 
1731 	return (0);
1732 }
1733 
1734 void
1735 smr_slabpool_deinit()
1736 {
1737 	if (idn.slabpool == NULL)
1738 		return;
1739 
1740 	FREESTRUCT(idn.slabpool->savep, smr_slab_t, idn.slabpool->ntotslabs);
1741 	FREESTRUCT(idn.slabpool->pool, struct smr_slabtbl,
1742 	    idn.slabpool->npools);
1743 	FREESTRUCT(idn.slabpool, struct slabpool, 1);
1744 
1745 	idn.slabpool = NULL;
1746 }
1747 
1748 void
1749 smr_alloc_buflist(smr_slab_t *sp)
1750 {
1751 	int		n, nbufs;
1752 	caddr_t		sbufp;
1753 	smr_slabbuf_t	*hp, *bp;
1754 
1755 	if (sp->sl_head)
1756 		return;
1757 
1758 	nbufs = (sp->sl_end - sp->sl_start) / IDN_SMR_BUFSIZE;
1759 	ASSERT(nbufs > 0);
1760 	if (nbufs <= 0) {
1761 		sp->sl_head = sp->sl_free = sp->sl_inuse = NULL;
1762 		return;
1763 	}
1764 
1765 	hp = GETSTRUCT(smr_slabbuf_t, nbufs);
1766 
1767 	sbufp = sp->sl_start;
1768 	for (n = 0, bp = hp; n < nbufs; bp++, n++) {
1769 		bp->sb_bufp = sbufp;
1770 		bp->sb_domid = IDN_NIL_DOMID;
1771 		bp->sb_next = bp + 1;
1772 		sbufp += IDN_SMR_BUFSIZE;
1773 	}
1774 	(--bp)->sb_next = NULL;
1775 
1776 	sp->sl_head = sp->sl_free = hp;
1777 	sp->sl_inuse = NULL;
1778 }
1779 
1780 void
1781 smr_free_buflist(smr_slab_t *sp)
1782 {
1783 	int	nbufs;
1784 
1785 	if (sp->sl_head == NULL)
1786 		return;
1787 
1788 	nbufs = (sp->sl_end - sp->sl_start) / IDN_SMR_BUFSIZE;
1789 
1790 	FREESTRUCT(sp->sl_head, smr_slabbuf_t, nbufs);
1791 
1792 	sp->sl_head = sp->sl_free = sp->sl_inuse = NULL;
1793 }
1794 
1795 /*
1796  * Returns:	0 Successfully located a slab.
1797  *	       -1 Failure.
1798  */
1799 static smr_slab_t *
1800 smr_slab_reserve(int domid)
1801 {
1802 	register int		p, nextp, s, nexts;
1803 	register smr_slab_t	*spa;
1804 	int			startp, starts;
1805 	int			foundone = 0;
1806 	int			spl;
1807 	procname_t		proc = "smr_slab_reserve";
1808 
1809 	p = startp = SMR_SLABPOOL_HASH(domid);
1810 	nextp = -1;
1811 
1812 	spl = splhi();
1813 	while ((nextp != startp) && !foundone) {
1814 
1815 		s = starts = SMR_SLAB_HASH(p, domid);
1816 		nexts = -1;
1817 		spa = &(idn.slabpool->pool[p].sarray[0]);
1818 
1819 		while ((nexts != starts) && !foundone) {
1820 			if (lock_try(&spa[s].sl_lock)) {
1821 				foundone = 1;
1822 				break;
1823 			}
1824 			nexts = SMR_SLAB_HASHSTEP(p, s);
1825 			s = nexts;
1826 		}
1827 		if (foundone)
1828 			break;
1829 		nextp = SMR_SLABPOOL_HASHSTEP(p);
1830 		p = nextp;
1831 	}
1832 	splx(spl);
1833 
1834 	if (foundone) {
1835 		ASSERT((&spa[s] >= idn.slabpool->savep) &&
1836 		    (&spa[s] < (idn.slabpool->savep +
1837 		    idn.slabpool->ntotslabs)));
1838 
1839 		spa[s].sl_domid = (short)domid;
1840 
1841 		ATOMIC_DEC(idn.slabpool->pool[p].nfree);
1842 
1843 		if (domid == idn.localid) {
1844 			smr_slab_t	*nsp;
1845 			/*
1846 			 * Caller is actually reserving a slab for
1847 			 * themself which means they'll need the full
1848 			 * slab structure to represent all of the I/O
1849 			 * buffers.  The "spa" is just a representative
1850 			 * and doesn't contain the space to manage the
1851 			 * individual buffers.  Need to alloc a full-size
1852 			 * struct.
1853 			 * Note that this results in the returning
1854 			 * smr_slab_t structure being unlocked.
1855 			 */
1856 			ASSERT(idn.localid == IDN_GET_MASTERID());
1857 			nsp = GETSTRUCT(smr_slab_t, 1);
1858 			nsp->sl_start = spa[s].sl_start;
1859 			nsp->sl_end   = spa[s].sl_end;
1860 			smr_alloc_buflist(nsp);
1861 			spa = nsp;
1862 			PR_SMR("%s: allocated full slab struct for domain %d\n",
1863 			    proc, domid);
1864 		} else {
1865 			/*
1866 			 * Slab structure gets returned locked.
1867 			 */
1868 			spa += s;
1869 		}
1870 
1871 		PR_SMR("%s: allocated slab 0x%p (start=0x%p, size=%lu) for "
1872 		    "domain %d\n", proc, (void *)spa, (void *)spa->sl_start,
1873 		    spa->sl_end - spa->sl_start, domid);
1874 	} else {
1875 		PR_SMR("%s: FAILED to allocate for domain %d\n",
1876 		    proc, domid);
1877 		spa = NULL;
1878 	}
1879 
1880 	return (spa);
1881 }
1882 
1883 static void
1884 smr_slab_unreserve(int domid, smr_slab_t *sp)
1885 {
1886 	register int		p, nextp, s, nexts;
1887 	register smr_slab_t	*spa;
1888 	int			foundit = 0;
1889 	int			startp, starts;
1890 	caddr_t			bufp;
1891 	procname_t		proc = "smr_slab_unreserve";
1892 
1893 	bufp = sp->sl_start;
1894 	p = startp = SMR_SLABPOOL_HASH(domid);
1895 	nextp = -1;
1896 
1897 	while ((nextp != startp) && !foundit) {
1898 
1899 		s = starts = SMR_SLAB_HASH(p, domid);
1900 		nexts = -1;
1901 		spa = &(idn.slabpool->pool[p].sarray[0]);
1902 
1903 		while ((nexts != starts) && !foundit) {
1904 			if (spa[s].sl_start == bufp) {
1905 				foundit = 1;
1906 				break;
1907 			}
1908 			nexts = SMR_SLAB_HASHSTEP(p, s);
1909 			s = nexts;
1910 		}
1911 		if (foundit)
1912 			break;
1913 		nextp = SMR_SLABPOOL_HASHSTEP(p);
1914 		p = nextp;
1915 	}
1916 	if (foundit) {
1917 		ASSERT((&spa[s] >= idn.slabpool->savep) &&
1918 		    (&spa[s] < (idn.slabpool->savep +
1919 		    idn.slabpool->ntotslabs)));
1920 		ASSERT(!lock_try(&spa[s].sl_lock));
1921 		ASSERT(spa[s].sl_domid == (short)domid);
1922 
1923 		spa[s].sl_next = NULL;
1924 		spa[s].sl_domid = (short)IDN_NIL_DOMID;
1925 		lock_clear(&spa[s].sl_lock);
1926 
1927 		ATOMIC_INC(idn.slabpool->pool[p].nfree);
1928 
1929 		PR_SMR("%s: freed (bufp=0x%p) for domain %d\n",
1930 		    proc, (void *)bufp, domid);
1931 
1932 		if (domid == idn.localid) {
1933 			/*
1934 			 * Caller is actually unreserving a slab of their
1935 			 * own.  Note that only the master calls this
1936 			 * routine.  Since the master's local slab
1937 			 * structures do not get entered into the global
1938 			 * "representative" pool, we need to free up the
1939 			 * data structure that was passed in.
1940 			 */
1941 			ASSERT(idn.localid == IDN_GET_MASTERID());
1942 			ASSERT(sp != &spa[s]);
1943 
1944 			smr_free_buflist(sp);
1945 			FREESTRUCT(sp, smr_slab_t, 1);
1946 		} else {
1947 			ASSERT(sp == &spa[s]);
1948 		}
1949 	} else {
1950 		/*
1951 		 * Couldn't find slab entry for given buf!
1952 		 */
1953 		PR_SMR("%s: FAILED to free (bufp=0x%p) for domain %d\n",
1954 		    proc, (void *)bufp, domid);
1955 	}
1956 }
1957 
1958 /*
1959  * The Reap Protocol:
1960  *	master				   slave
1961  *	------				   -----
1962  *	smr_slab_reap_global
1963  *	- idn_broadcast_cmd(SLABREAP) ->   idn_recv_cmd(SLABREAP)
1964  *	  . idn_local_cmd(SLABREAP)        - idn_recv_slabreap_req
1965  *	    - smr_slab_reap	             . smr_slab_reap
1966  *	      . smr_slaballoc_get_n            - smr_slaballoc_get_n
1967  *	      . smr_slab_free		       - smr_slab_free
1968  *		- smr_slab_free_local		 . smr_slab_free_remote
1969  *		  . smr_slab_unreserve
1970  *				      <-	   - idn_send_cmd(SLABFREE)
1971  *	idn_recv_cmd(SLABFREE)
1972  *	- idn_recv_slabfree_req
1973  *	  . smr_slaballoc_get
1974  *	  . smr_slab_free
1975  *	    - smr_slab_free_local
1976  *	      . smr_slab_unreserve
1977  *        . idn_send_slabfree_resp    ->   idn_recv_cmd(SLABFREE | ack)
1978  *					   - idn_recv_slabfree_resp
1979  *
1980  *	idn_recv_cmd(SLABREAP | ack)  <-     . idn_send_slabreap_resp
1981  *	- idn_recv_slabreap_resp	   DONE
1982  *	DONE
1983  *
1984  * Check available slabs and if we're below the threshold, kick
1985  * off reaping to all remote domains.  There is no guarantee remote
1986  * domains will be able to free up any.
1987  */
1988 static void
1989 smr_slab_reap_global()
1990 {
1991 	register int	p, npools;
1992 	register int	total_free = 0;
1993 	register struct smr_slabtbl	*tblp;
1994 	static clock_t	reap_last = 0;
1995 	procname_t	proc = "smr_slab_reap_global";
1996 	clock_t		now;
1997 
1998 	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
1999 
2000 	DSLAB_LOCK_SHARED(idn.localid);
2001 	if (idn_domain[idn.localid].dslab_state != DSLAB_STATE_LOCAL) {
2002 		PR_SMR("%s: only allowed by master (%d)\n",
2003 		    proc, IDN_GET_MASTERID());
2004 		DSLAB_UNLOCK(idn.localid);
2005 		return;
2006 	}
2007 	DSLAB_UNLOCK(idn.localid);
2008 
2009 	now = ddi_get_lbolt();
2010 	if ((now > 0) && (now > reap_last) &&
2011 	    ((now - reap_last) < IDN_REAP_INTERVAL))
2012 		return;
2013 
2014 	reap_last = now;
2015 
2016 	ASSERT(idn.slabpool);
2017 
2018 	npools = idn.slabpool->npools;
2019 	tblp   = idn.slabpool->pool;
2020 
2021 	for (p = 0; p < npools; tblp++, p++)
2022 		total_free += tblp->nfree;
2023 
2024 	if (total_free <= IDN_SLAB_THRESHOLD) {
2025 		int	diff, reap_per_domain;
2026 
2027 		PR_SMR("%s: kicking off reaping "
2028 		    "(total_free = %d, min = %d)\n",
2029 		    proc, total_free, IDN_SLAB_THRESHOLD);
2030 
2031 		diff = IDN_SLAB_THRESHOLD - total_free;
2032 		reap_per_domain = (diff < idn.ndomains) ?
2033 		    1 : (diff / idn.ndomains);
2034 
2035 		idn_broadcast_cmd(IDNCMD_SLABREAP, reap_per_domain, 0, 0);
2036 	}
2037 }
2038 
2039 void
2040 smr_slab_reap(int domid, int *nslabs)
2041 {
2042 	register int	d;
2043 	int		nreclaimed;
2044 	smr_slab_t	*sp;
2045 	domainset_t	reapset;
2046 	procname_t	proc = "smr_slab_reap";
2047 
2048 	/*
2049 	 * Should only be called on behalf of local
2050 	 * domain.
2051 	 */
2052 	if (domid != idn.localid) {
2053 		PR_SMR("%s: called by domain %d, should only be local (%d)\n",
2054 		    proc, domid, idn.localid);
2055 		ASSERT(0);
2056 		return;
2057 	}
2058 	/*
2059 	 * Try and reclaim some buffers so we can possibly
2060 	 * free up some slabs.
2061 	 */
2062 	reapset = idn.domset.ds_connected;
2063 
2064 	IDN_GKSTAT_GLOBAL_EVENT(gk_reaps, gk_reap_last);
2065 
2066 	nreclaimed = 0;
2067 	for (d = 0; d < MAX_DOMAINS; d++) {
2068 		int		nr;
2069 		idn_domain_t	*dp;
2070 
2071 		if (!DOMAIN_IN_SET(reapset, d))
2072 			continue;
2073 
2074 		IDN_DLOCK_SHARED(d);
2075 
2076 		dp = &idn_domain[d];
2077 		if ((d == idn.localid) || (dp->dcpu < 0)) {
2078 			IDN_DUNLOCK(d);
2079 			continue;
2080 		}
2081 		/*
2082 		 * Clean up any dead I/O errors if possible.
2083 		 */
2084 		if (dp->dioerr > 0) {
2085 			idn_domain_t	*ldp;
2086 			register int	cnt;
2087 			register smr_slabbuf_t	*bp;
2088 			/*
2089 			 * We need to grab the writer lock to prevent
2090 			 * anybody from allocating buffers while we
2091 			 * traverse the slabs outstanding.
2092 			 */
2093 			cnt = 0;
2094 			ldp = &idn_domain[idn.localid];
2095 			IDN_DLOCK_EXCL(idn.localid);
2096 			DSLAB_LOCK_EXCL(idn.localid);
2097 			for (sp = ldp->dslab; sp; sp = sp->sl_next)
2098 				for (bp = sp->sl_inuse; bp; bp = bp->sb_next)
2099 					if (bp->sb_domid == d)
2100 						cnt++;
2101 			DSLAB_UNLOCK(idn.localid);
2102 			ASSERT((dp->dio + dp->dioerr) >= cnt);
2103 			dp->dio = cnt;
2104 			dp->dioerr = 0;
2105 			IDN_DUNLOCK(idn.localid);
2106 		}
2107 		if ((dp->dstate == IDNDS_CONNECTED) &&
2108 		    ((nr = idn_reclaim_mboxdata(d, 0, -1)) > 0))
2109 			nreclaimed += nr;
2110 
2111 		IDN_DUNLOCK(d);
2112 	}
2113 
2114 	DSLAB_LOCK_EXCL(domid);
2115 	sp = smr_slaballoc_get_n(domid, nslabs);
2116 	if (sp) {
2117 		IDN_GKSTAT_ADD(gk_reap_count, (ulong_t)(*nslabs));
2118 		smr_slab_free(domid, sp);
2119 	}
2120 	DSLAB_UNLOCK(domid);
2121 }
2122 
2123 /*
2124  * ---------------------------------------------------------------------
2125  * Remap the (IDN) shared memory region to a new physical address.
2126  * Caller is expected to have performed a ecache flush if needed.
2127  * ---------------------------------------------------------------------
2128  */
2129 void
2130 smr_remap(struct as *as, register caddr_t vaddr,
2131 		register pfn_t new_pfn, uint_t mblen)
2132 {
2133 	tte_t		tte;
2134 	size_t		blen;
2135 	pgcnt_t		p, npgs;
2136 	procname_t	proc = "smr_remap";
2137 
2138 	if (va_to_pfn(vaddr) == new_pfn) {
2139 		PR_REMAP("%s: vaddr (0x%p) already mapped to pfn (0x%lx)\n",
2140 		    proc, (void *)vaddr, new_pfn);
2141 		return;
2142 	}
2143 
2144 	blen = MB2B(mblen);
2145 	npgs = btopr(blen);
2146 	ASSERT(npgs != 0);
2147 
2148 	PR_REMAP("%s: va = 0x%p, pfn = 0x%lx, npgs = %ld, mb = %d MB (%ld)\n",
2149 	    proc, (void *)vaddr, new_pfn, npgs, mblen, blen);
2150 
2151 	/*
2152 	 * Unmap the SMR virtual address from it's current
2153 	 * mapping.
2154 	 */
2155 	hat_unload(as->a_hat, vaddr, blen, HAT_UNLOAD_UNLOCK);
2156 
2157 	if (new_pfn == PFN_INVALID)
2158 		return;
2159 
2160 	/*
2161 	 * Map the SMR to the new physical address space,
2162 	 * presumably a remote pfn.  Cannot use hat_devload
2163 	 * because it will think pfn represents non-memory,
2164 	 * i.e. space since it may beyond his physmax.
2165 	 */
2166 	for (p = 0; p < npgs; p++) {
2167 		sfmmu_memtte(&tte, new_pfn, PROT_READ | PROT_WRITE | HAT_NOSYNC,
2168 		    TTE8K);
2169 		sfmmu_tteload(as->a_hat, &tte, vaddr, NULL, HAT_LOAD_LOCK);
2170 
2171 		vaddr += MMU_PAGESIZE;
2172 		new_pfn++;
2173 	}
2174 
2175 	PR_REMAP("%s: remapped %ld pages (expected %ld)\n",
2176 	    proc, npgs, btopr(MB2B(mblen)));
2177 }
2178