xref: /titanic_41/usr/src/uts/sun4u/starfire/io/idn_smr.c (revision 55f5292c612446ce6f93ddd248c0019b5974618b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * Inter-Domain Network
26  *
27  * Shared Memory Region (SMR) supporting code.
28  */
29 
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/machparam.h>
33 #include <sys/debug.h>
34 #include <sys/cpuvar.h>
35 #include <sys/kmem.h>
36 #include <sys/mutex.h>
37 #include <sys/rwlock.h>
38 #include <sys/systm.h>
39 #include <sys/machlock.h>
40 #include <sys/membar.h>
41 #include <sys/mman.h>
42 #include <vm/hat.h>
43 #include <vm/as.h>
44 #include <vm/hat_sfmmu.h>
45 #include <sys/vm_machparam.h>
46 #include <sys/x_call.h>
47 
48 #include <sys/idn.h>
49 
50 #ifdef DEBUG
51 #define	DIOCHECK(domid) \
52 { \
53 	int	_dio; \
54 	if ((_dio = idn_domain[domid].dio) < 0) { \
55 		cmn_err(CE_WARN, \
56 			">>>>> file %s, line %d: domain %d, dio = %d", \
57 			__FILE__, __LINE__, (domid), _dio); \
58 	} \
59 }
60 #else
61 #define	DIOCHECK(domid)
62 #endif /* DEBUG */
63 
64 static int	smr_slab_alloc_local(int domid, smr_slab_t **spp);
65 static int	smr_slab_alloc_remote(int domid, smr_slab_t **spp);
66 static void	smr_slab_free_local(int domid, smr_slab_t *sp);
67 static void	smr_slab_free_remote(int domid, smr_slab_t *sp);
68 static int 	smr_slabwaiter_register(int domid);
69 static int 	smr_slabwaiter_unregister(int domid, smr_slab_t **spp);
70 static int 	smr_slaballoc_wait(int domid, smr_slab_t **spp);
71 static smr_slab_t 	*smr_slab_reserve(int domid);
72 static void 	smr_slab_unreserve(int domid, smr_slab_t *sp);
73 static void	smr_slab_reap_global();
74 
75 /*
76  * Can only be called by the master.  Allocate a slab from the
77  * local pool representing the SMR, on behalf of the given
78  * domain.  Slab is either being requested for use by the
79  * local domain (i.e. domid == idn.localid), or it's being
80  * allocated to give to a remote domain which requested one.
81  * In the base of allocating on behalf of a remote domain,
82  * smr_slab_t structure is used simply to manage ownership.
83  *
84  * Returns:	smr_slaballoc_wait
85  * 		(EINVAL, ETIMEDOUT)
86  *		smr_slabwatier_unregister
87  *		(0, EINVAL, EBUSY, ENOMEM)
88  *		ENOLCK
89  */
90 static int
91 smr_slab_alloc_local(int domid, smr_slab_t **spp)
92 {
93 	int		serrno = 0;
94 	int		nwait;
95 	smr_slab_t	*sp;
96 	idn_domain_t	*dp;
97 
98 
99 	/*
100 	 * Only the master can make local allocations.
101 	 */
102 	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
103 	ASSERT(idn.localid == IDN_GET_MASTERID());
104 
105 	*spp = NULL;
106 
107 	dp = &idn_domain[domid];
108 	ASSERT(DSLAB_READ_HELD(domid));
109 	ASSERT(dp->dslab_state == DSLAB_STATE_LOCAL);
110 
111 	/*
112 	 * Register myself with the waiting list.
113 	 */
114 	nwait = smr_slabwaiter_register(domid);
115 
116 	if (nwait > 1) {
117 		/*
118 		 * XXX - old comment?
119 		 * Need to drop the read lock _after_ registering
120 		 * ourselves with the potential wait list for this allocation.
121 		 * Although this allocation is not a remote one, we could
122 		 * still have multiple threads on the master trying to
123 		 * satisfy (allocate) request on behalf of a remote domain.
124 		 */
125 		/*
126 		 * Somebody is already in the process of satisfying
127 		 * the allocation request for this respective
128 		 * domain.  All we need to do is wait and let
129 		 * it happen.
130 		 */
131 		serrno = smr_slaballoc_wait(domid, spp);
132 		return (serrno);
133 	}
134 	/*
135 	 * I'm the original slab requester for this domain.  It's local
136 	 * so go ahead and do the job.
137 	 */
138 
139 	if ((sp = smr_slab_reserve(domid)) == NULL)
140 		serrno = ENOMEM;
141 
142 	/*
143 	 * Allocation may have failed.  In either case we've
144 	 * got to do the put to at least wake potential waiters up.
145 	 */
146 	if (!serrno) {
147 		if (DSLAB_LOCK_TRYUPGRADE(domid) == 0) {
148 			DSLAB_UNLOCK(domid);
149 			DSLAB_LOCK_EXCL(domid);
150 		}
151 	}
152 
153 	(void) smr_slaballoc_put(domid, sp, 0, serrno);
154 
155 	/*
156 	 * If serrno is ENOLCK here, then we must have failed
157 	 * on the upgrade above, so lock already dropped.
158 	 */
159 	if (serrno != ENOLCK) {
160 		/*
161 		 * Need to drop since reaping may be recursive?
162 		 */
163 		DSLAB_UNLOCK(domid);
164 	}
165 
166 	/*
167 	 * Since we were the original requester but never went
168 	 * to sleep, we need to directly unregister ourselves
169 	 * from the waiting list.
170 	 */
171 	serrno = smr_slabwaiter_unregister(domid, spp);
172 
173 	/*
174 	 * Now that we've satisfied the request, let's check if any
175 	 * reaping is necessary.  Only the master does this and only
176 	 * when allocating slabs, an infrequent event :-o
177 	 */
178 	smr_slab_reap_global();
179 
180 	ASSERT((serrno == 0) ? (*spp != NULL) : (*spp == NULL));
181 
182 	DSLAB_LOCK_SHARED(domid);
183 
184 	return (serrno);
185 }
186 
187 /*
188  * Can only be called by a slave on behalf of himself.  Need to
189  * make a request to the master to allocate a slab of SMR buffers
190  * for the local domain.
191  *
192  * Returns:	smr_slaballoc_wait
193  *		(0, EINVAL, EBUSY, ENOMEM)
194  *		ENOLCK
195  *		ECANCELED
196  */
197 static int
198 smr_slab_alloc_remote(int domid, smr_slab_t **spp)
199 {
200 	int		nwait;
201 	int		serrno = 0;
202 	int		bailout = 0;
203 	int		masterid;
204 	idn_domain_t	*dp, *mdp = NULL;
205 	procname_t	proc = "smr_slab_alloc_remote";
206 
207 	/*
208 	 * Only slaves make remote allocations.
209 	 */
210 	ASSERT(idn.localid != IDN_GET_MASTERID());
211 	ASSERT(domid == idn.localid);
212 	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
213 
214 	*spp = NULL;
215 
216 	dp = &idn_domain[domid];
217 	ASSERT(DSLAB_READ_HELD(domid));
218 	ASSERT(dp->dslab_state == DSLAB_STATE_REMOTE);
219 
220 	/*
221 	 * Register myself with the slaballoc waiting list.
222 	 * Note that only allow one outstanding allocation
223 	 * request for the given domain.  Other callers which
224 	 * detect a slab is needed simply get stuck on the
225 	 * waiting list waiting for the original caller to
226 	 * get the job done.
227 	 * The waiter_register routine will allocate the necessary
228 	 * slab structure which will ultimately be inserted in
229 	 * the domain's slab list via smr_slaballoc_put().
230 	 */
231 	nwait = smr_slabwaiter_register(domid);
232 
233 	/*
234 	 * Make sure we have a connection with the master
235 	 * before we wait around for nothing and send a
236 	 * command off to nowhere.
237 	 * First do a quick (no lock) check for global okayness.
238 	 */
239 	if ((idn.state != IDNGS_ONLINE) ||
240 	    ((masterid = IDN_GET_MASTERID()) == IDN_NIL_DOMID)) {
241 		bailout = 1;
242 		serrno = ECANCELED;
243 	}
244 	/*
245 	 * We need to drop our read lock _before_ acquiring the
246 	 * slaballoc waiter lock.  This is necessary because the
247 	 * thread that receives the slab alloc response and fills
248 	 * in the slab structure will need to grab the domain write
249 	 * lock while holding onto the slaballoc waiter lock.
250 	 * Potentially could deadlock if we didn't drop our domain
251 	 * lock before.  Plus, we've registered.
252 	 *
253 	 * 4093209 - Note also that we do this _after_ the check for
254 	 *	idn.masterid where we grab the READER global
255 	 *	lock.  This is to prevent somebody from
256 	 *	changing our state after we drop the drwlock.
257 	 *	A deadlock can occur when shutting down a
258 	 *	domain we're holding the
259 	 */
260 
261 	if (!bailout) {
262 		mdp = &idn_domain[masterid];
263 		/*
264 		 * Global state is okay.  Let's double check the
265 		 * state of our actual target domain.
266 		 */
267 		if (mdp->dstate != IDNDS_CONNECTED) {
268 			bailout = 1;
269 			serrno = ECANCELED;
270 		} else if (IDN_DLOCK_TRY_SHARED(masterid)) {
271 			if (mdp->dstate != IDNDS_CONNECTED) {
272 				bailout = 1;
273 				serrno = ECANCELED;
274 				IDN_DUNLOCK(masterid);
275 			} else if (nwait != 1) {
276 				IDN_DUNLOCK(masterid);
277 			}
278 			/*
279 			 * Note that keep the drwlock(read) for
280 			 * the target (master) domain if it appears
281 			 * we're the lucky one to send the command.
282 			 * We hold onto the lock until we've actually
283 			 * sent the command out.
284 			 * We don't reach this place unless it
285 			 * appears everything is kosher with
286 			 * the target (master) domain.
287 			 */
288 		} else {
289 			bailout = 1;
290 			serrno = ENOLCK;
291 		}
292 	}
293 
294 	if (bailout) {
295 		ASSERT(serrno);
296 		/*
297 		 * Gotta bail.  Abort operation.  Error result
298 		 * will be picked up when we attempt to wait.
299 		 */
300 		PR_SMR("%s: BAILING OUT on behalf domain %d "
301 		    "(err=%d, gs=%s, ms=%s)\n",
302 		    proc, domid, serrno, idngs_str[idn.state],
303 		    (masterid == IDN_NIL_DOMID)
304 		    ? "unknown" : idnds_str[idn_domain[masterid].dstate]);
305 		(void) smr_slabwaiter_abort(domid, serrno);
306 
307 	} else if (nwait == 1) {
308 		/*
309 		 * We are the original requester.  Initiate the
310 		 * actual request to the master.
311 		 */
312 		idn_send_cmd(masterid, IDNCMD_SLABALLOC, IDN_SLAB_SIZE, 0, 0);
313 		ASSERT(mdp);
314 		IDN_DUNLOCK(masterid);
315 	}
316 
317 	/*
318 	 * Wait here for response.  Once awakened func returns
319 	 * with slab structure possibly filled with gifts!
320 	 */
321 	serrno = smr_slaballoc_wait(domid, spp);
322 
323 	return (serrno);
324 }
325 
326 /*
327  * Allocate a slab from the Master on behalf
328  * of the given domain.  Note that master uses
329  * this function to allocate slabs on behalf of
330  * remote domains also.
331  * Entered with drwlock held.
332  * Leaves with drwlock dropped.
333  * Returns:	EDQUOT
334  *		EINVAL
335  *		ENOLCK
336  *		smr_slab_alloc_local
337  *		smr_slab_alloc_remote
338  *		(0, EINVAL, EBUSY, ENOMEM)
339  */
340 int
341 smr_slab_alloc(int domid, smr_slab_t **spp)
342 {
343 	int		serrno = 0;
344 	idn_domain_t	*dp;
345 	procname_t	proc = "smr_slab_alloc";
346 
347 
348 	dp = &idn_domain[domid];
349 
350 	ASSERT(DSLAB_READ_HELD(domid));
351 	ASSERT(dp->dslab_state != DSLAB_STATE_UNKNOWN);
352 
353 	*spp = NULL;
354 
355 	switch (dp->dslab_state) {
356 	case DSLAB_STATE_UNKNOWN:
357 		cmn_err(CE_WARN,
358 		    "IDN: 300: no slab allocations without a master");
359 		serrno = EINVAL;
360 		break;
361 
362 	case DSLAB_STATE_LOCAL:
363 		/*
364 		 * If I'm the master, then get a slab
365 		 * from the local SMR pool, but only
366 		 * if the number of allocated slabs has
367 		 * not been exceeded.
368 		 */
369 		if (((int)dp->dnslabs < IDN_SLAB_MAXPERDOMAIN) ||
370 		    !IDN_SLAB_MAXPERDOMAIN)
371 			serrno = smr_slab_alloc_local(domid, spp);
372 		else
373 			serrno = EDQUOT;
374 		break;
375 
376 	case DSLAB_STATE_REMOTE:
377 		/*
378 		 * Have to make a remote request.
379 		 * In order to prevent overwhelming the master
380 		 * with a bunch of requests that he won't be able
381 		 * to handle we do a check to see if we're still
382 		 * under quota.  Note that the limit is known
383 		 * apriori based on the SMR/NWR size and
384 		 * IDN_SLAB_MINTOTAL.  Domains must have the same
385 		 * size SMR/NWR, however they can have different
386 		 * IDN_SLAB_MINTOTAL.  Thus a domain could throttle
387 		 * itself however it wishes.
388 		 */
389 		if (((int)dp->dnslabs < IDN_SLAB_MAXPERDOMAIN) ||
390 		    !IDN_SLAB_MAXPERDOMAIN)
391 			serrno = smr_slab_alloc_remote(domid, spp);
392 		else
393 			serrno = EDQUOT;
394 		break;
395 
396 	default:
397 		cmn_err(CE_WARN,
398 		    "IDN: 301: (ALLOC) unknown slab state (%d) "
399 		    "for domain %d", dp->dslab_state, domid);
400 		serrno = EINVAL;
401 		break;
402 	}
403 
404 	if (*spp == NULL) {
405 		PR_SMR("%s: failed to allocate %s slab [serrno = %d]\n",
406 		    proc, (idn.localid == IDN_GET_MASTERID()) ?
407 		    "local" : "remote", serrno);
408 	}
409 
410 	if (serrno) {
411 		IDN_GKSTAT_GLOBAL_EVENT(gk_slabfail, gk_slabfail_last);
412 	}
413 
414 	return (serrno);
415 }
416 
417 static void
418 smr_slab_free_local(int domid, smr_slab_t *sp)
419 {
420 	int	rv;
421 
422 	/*
423 	 * Do a slaballoc_put just in case there may have
424 	 * been waiters for slabs for this respective domain
425 	 * before we unreserve this slab.
426 	 */
427 	rv = smr_slaballoc_put(domid, sp, 0, 0);
428 
429 	if (rv == -1) {
430 		/*
431 		 * Put failed.  Must not have been any waiters.
432 		 * Go ahead and unreserve the space.
433 		 */
434 		smr_slab_unreserve(domid, sp);
435 	}
436 }
437 
438 static void
439 smr_slab_free_remote(int domid, smr_slab_t *sp)
440 {
441 	smr_offset_t	slab_offset;
442 	int		slab_size;
443 	int		rv;
444 	int		masterid;
445 
446 	ASSERT(domid == idn.localid);
447 	ASSERT(idn.localid != IDN_GET_MASTERID());
448 	ASSERT(DSLAB_WRITE_HELD(domid));
449 	ASSERT(idn_domain[domid].dslab_state == DSLAB_STATE_REMOTE);
450 
451 	masterid = IDN_GET_MASTERID();
452 
453 	ASSERT(masterid != IDN_NIL_DOMID);
454 
455 	slab_offset = IDN_ADDR2OFFSET(sp->sl_start);
456 	slab_size   = (int)(sp->sl_end - sp->sl_start);
457 
458 	/*
459 	 * Do a slaballoc_put just in case there may have
460 	 * been waiters for slabs for this domain before
461 	 * returning back to the master.
462 	 */
463 	rv = smr_slaballoc_put(domid, sp, 0, 0);
464 
465 	if ((rv == -1) && (masterid != IDN_NIL_DOMID)) {
466 		/*
467 		 * Put failed.  No waiters so free the local data
468 		 * structure ship the SMR range off to the master.
469 		 */
470 		smr_free_buflist(sp);
471 		FREESTRUCT(sp, smr_slab_t, 1);
472 
473 		IDN_DLOCK_SHARED(masterid);
474 		idn_send_cmd(masterid, IDNCMD_SLABFREE, slab_offset, slab_size,
475 		    0);
476 		IDN_DUNLOCK(masterid);
477 	}
478 }
479 
480 /*
481  * Free up the list of slabs passed
482  */
483 void
484 smr_slab_free(int domid, smr_slab_t *sp)
485 {
486 	smr_slab_t	*nsp = NULL;
487 
488 	ASSERT(DSLAB_WRITE_HELD(domid));
489 
490 	if (sp == NULL)
491 		return;
492 
493 	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
494 
495 	switch (idn_domain[domid].dslab_state) {
496 	case DSLAB_STATE_UNKNOWN:
497 		cmn_err(CE_WARN, "IDN: 302: no slab free without a master");
498 		break;
499 
500 	case DSLAB_STATE_LOCAL:
501 		/*
502 		 * If I'm the master then put the slabs
503 		 * back to the local SMR pool.
504 		 */
505 		for (; sp; sp = nsp) {
506 			nsp = sp->sl_next;
507 			smr_slab_free_local(domid, sp);
508 		}
509 		break;
510 
511 	case DSLAB_STATE_REMOTE:
512 		/*
513 		 * If the domid is my own then I'm freeing
514 		 * a slab back to the Master.
515 		 */
516 		for (; sp; sp = nsp) {
517 			nsp = sp->sl_next;
518 			smr_slab_free_remote(domid, sp);
519 		}
520 		break;
521 
522 	default:
523 		cmn_err(CE_WARN,
524 		    "IDN: 301: (FREE) unknown slab state (%d) for domain %d",
525 		    idn_domain[domid].dslab_state, domid);
526 		break;
527 	}
528 }
529 
530 /*
531  * Free up the list of slab data structures ONLY.
532  * This is called during a fatal shutdown of the master
533  * where we need to garbage collect the locally allocated
534  * data structures used to manage slabs allocated to the
535  * local domain.  Should never be called by a master since
536  * the master can do a regular smr_slab_free.
537  */
538 void
539 smr_slab_garbage_collection(smr_slab_t *sp)
540 {
541 	smr_slab_t	*nsp;
542 
543 	ASSERT(idn_domain[idn.localid].dvote.v.master == 0);
544 
545 	if (sp == NULL)
546 		return;
547 	/*
548 	 * Since this is only ever called by a slave,
549 	 * the slab structure size always contains a buflist.
550 	 */
551 	for (; sp; sp = nsp) {
552 		nsp = sp->sl_next;
553 		smr_free_buflist(sp);
554 		FREESTRUCT(sp, smr_slab_t, 1);
555 	}
556 }
557 
558 /*
559  * Allocate a SMR buffer on behalf of the local domain
560  * which is ultimately targeted for the given domain.
561  *
562  * IMPORTANT: This routine is going to drop the domain rwlock (drwlock)
563  *	      for the domain on whose behalf the request is being
564  *	      made.  This routine canNOT block on trying to
565  *	      reacquire the drwlock.  If he does block then somebody
566  *	      must have the write lock on the domain which most likely
567  *	      means the domain is going south anyway, so just bail on
568  *	      this buffer.  Higher levels will retry if needed.
569  *
570  * XXX - Support larger than IDN_SMR_BUFSIZE allocations?
571  *
572  * Returns:	A negative return value indicates lock lost on domid.
573  *		EINVAL, ENOLINK, ENOLCK(internal)
574  *		smr_slaballoc_wait
575  * 		(EINVAL, ETIMEDOUT)
576  *		smr_slabwatier_unregister
577  *		(0, EINVAL, EBUSY, ENOMEM)
578  */
579 int
580 smr_buf_alloc(int domid, uint_t len, caddr_t *bufpp)
581 {
582 	register idn_domain_t	*dp, *ldp;
583 	smr_slab_t	*sp;
584 	caddr_t		bufp = NULL;
585 	int		serrno;
586 	procname_t	proc = "smr_buf_alloc";
587 
588 	dp = &idn_domain[domid];
589 	/*
590 	 * Local domain can only allocate on behalf of
591 	 * itself if this is a priviledged call and the
592 	 * caller is the master.
593 	 */
594 	ASSERT((domid != idn.localid) && (domid != IDN_NIL_DOMID));
595 
596 	*bufpp = NULL;
597 
598 	if (len > IDN_DATA_SIZE) {
599 		cmn_err(CE_WARN,
600 		    "IDN: 303: buffer len %d > IDN_DATA_SIZE (%lu)",
601 		    len, IDN_DATA_SIZE);
602 		IDN_GKSTAT_GLOBAL_EVENT(gk_buffail, gk_buffail_last);
603 		return (EINVAL);
604 	}
605 
606 	/*
607 	 * Need to go to my local slab list to find
608 	 * a buffer.
609 	 */
610 	ldp = &idn_domain[idn.localid];
611 	/*
612 	 * Now we loop trying to locate a buffer out of our
613 	 * slabs.  We continue this until either we find a
614 	 * buffer or we're unable to allocate a slab.  Note
615 	 * that new slabs are allocated to the front.
616 	 */
617 	DSLAB_LOCK_SHARED(idn.localid);
618 	sp = ldp->dslab;
619 	do {
620 		int	spl, all_empty;
621 
622 		if (sp == NULL) {
623 			if ((serrno = smr_slab_alloc(idn.localid, &sp)) != 0) {
624 				PR_SMR("%s:%d: failed to allocate "
625 				    "slab [serrno = %d]",
626 				    proc, domid, serrno);
627 				DSLAB_UNLOCK(idn.localid);
628 				IDN_GKSTAT_GLOBAL_EVENT(gk_buffail,
629 				    gk_buffail_last);
630 				return (serrno);
631 			}
632 			/*
633 			 * Of course, the world may have changed while
634 			 * we dropped the lock.  Better make sure we're
635 			 * still established.
636 			 */
637 			if (dp->dstate != IDNDS_CONNECTED) {
638 				PR_SMR("%s:%d: state changed during slab "
639 				    "alloc (dstate = %s)\n",
640 				    proc, domid, idnds_str[dp->dstate]);
641 				DSLAB_UNLOCK(idn.localid);
642 				IDN_GKSTAT_GLOBAL_EVENT(gk_buffail,
643 				    gk_buffail_last);
644 				return (ENOLINK);
645 			}
646 			/*
647 			 * We were able to allocate a slab.  Should
648 			 * be at the front of the list, spin again.
649 			 */
650 			sp = ldp->dslab;
651 		}
652 		/*
653 		 * If we have reached here then we have a slab!
654 		 * Hopefully there are free bufs there :-o
655 		 */
656 		spl = splhi();
657 		all_empty = 1;
658 		for (; sp && !bufp; sp = sp->sl_next) {
659 			smr_slabbuf_t	*bp;
660 
661 			if (sp->sl_free == NULL)
662 				continue;
663 
664 			if (!lock_try(&sp->sl_lock)) {
665 				all_empty = 0;
666 				continue;
667 			}
668 
669 			if ((bp = sp->sl_free) == NULL) {
670 				lock_clear(&sp->sl_lock);
671 				continue;
672 			}
673 
674 			sp->sl_free = bp->sb_next;
675 			bp->sb_next = sp->sl_inuse;
676 			sp->sl_inuse = bp;
677 			/*
678 			 * Found a free buffer.
679 			 */
680 			bp->sb_domid = domid;
681 			bufp = bp->sb_bufp;
682 			lock_clear(&sp->sl_lock);
683 		}
684 		splx(spl);
685 
686 		if (!all_empty && !bufp) {
687 			/*
688 			 * If we still haven't found a buffer, but
689 			 * there's still possibly a buffer available,
690 			 * then try again.  Only if we're absolutely
691 			 * sure all slabs are empty do we attempt
692 			 * to allocate a new one.
693 			 */
694 			sp = ldp->dslab;
695 		}
696 	} while (bufp == NULL);
697 
698 	*bufpp = bufp;
699 
700 	ATOMIC_INC(dp->dio);
701 
702 	DSLAB_UNLOCK(idn.localid);
703 
704 	return (0);
705 }
706 
707 /*
708  * Free a buffer allocated to the local domain back to
709  * its respective slab.  Slabs are freed via the slab-reap command.
710  * XXX - Support larger than IDN_SMR_BUFSIZE allocations?
711  */
712 int
713 smr_buf_free(int domid, caddr_t bufp, uint_t len)
714 {
715 	register smr_slab_t	*sp;
716 	smr_slabbuf_t		*bp, **bpp;
717 	idn_domain_t		*ldp;
718 	int		buffreed;
719 	int		lockheld = (len == (uint_t)-1);
720 
721 	/*
722 	 * We should never be free'ing a buffer on
723 	 * behalf of ourselves as we are never the
724 	 * target for allocated SMR buffers.
725 	 */
726 	ASSERT(domid != idn.localid);
727 
728 	sp = NULL;
729 	buffreed = 0;
730 	ldp = &idn_domain[idn.localid];
731 
732 	DSLAB_LOCK_SHARED(idn.localid);
733 
734 	if (((uintptr_t)bufp & (IDN_SMR_BUFSIZE-1)) &&
735 	    (IDN_ADDR2OFFSET(bufp) % IDN_SMR_BUFSIZE)) {
736 		cmn_err(CE_WARN,
737 		    "IDN: 304: buffer (0x%p) from domain %d not on a "
738 		    "%d boundary", bufp, domid, IDN_SMR_BUFSIZE);
739 		goto bfdone;
740 	}
741 	if (!lockheld && (len > IDN_DATA_SIZE)) {
742 		cmn_err(CE_WARN,
743 		    "IDN: 305: buffer length (%d) from domain %d greater "
744 		    "than IDN_DATA_SIZE (%lu)",
745 		    len, domid, IDN_DATA_SIZE);
746 		goto bfdone;
747 	}
748 
749 	for (sp = ldp->dslab; sp; sp = sp->sl_next)
750 		if ((bufp >= sp->sl_start) && (bufp < sp->sl_end))
751 			break;
752 
753 	if (sp) {
754 		int spl;
755 
756 		spl = splhi();
757 		while (!lock_try(&sp->sl_lock))
758 			;
759 		bpp = &sp->sl_inuse;
760 		for (bp = *bpp; bp; bp = *bpp) {
761 			if (bp->sb_bufp == bufp)
762 				break;
763 			bpp = &bp->sb_next;
764 		}
765 		if (bp) {
766 			ASSERT(bp->sb_domid == domid);
767 			buffreed++;
768 			bp->sb_domid = IDN_NIL_DOMID;
769 			*bpp = bp->sb_next;
770 			bp->sb_next = sp->sl_free;
771 			sp->sl_free = bp;
772 		}
773 		lock_clear(&sp->sl_lock);
774 		splx(spl);
775 	}
776 bfdone:
777 	if (buffreed) {
778 		ATOMIC_DEC(idn_domain[domid].dio);
779 		DIOCHECK(domid);
780 	} else {
781 		cmn_err(CE_WARN,
782 		    "IDN: 306: unknown buffer (0x%p) from domain %d",
783 		    bufp, domid);
784 		ATOMIC_INC(idn_domain[domid].dioerr);
785 	}
786 
787 	DSLAB_UNLOCK(idn.localid);
788 
789 	return (sp ? 0 : -1);
790 }
791 
792 /*
793  * Alternative interface to smr_buf_free, but with local drwlock
794  * held.
795  */
796 /* ARGSUSED2 */
797 int
798 smr_buf_free_locked(int domid, caddr_t bufp, uint_t len)
799 {
800 	return (smr_buf_free(domid, bufp, (uint_t)-1));
801 }
802 
803 /*
804  * Free any and all buffers associated with the given domain.
805  * Assumption is that domain is dead and buffers are not in use.
806  * Returns:	Number of buffers freed.
807  *		-1 if error.
808  */
809 int
810 smr_buf_free_all(int domid)
811 {
812 	register smr_slab_t	*sp;
813 	register smr_slabbuf_t	*bp, **bpp;
814 	idn_domain_t		*ldp;
815 	int			nbufsfreed = 0;
816 	procname_t	proc = "smr_buf_free_all";
817 
818 	/*
819 	 * We should never be free'ing buffers on
820 	 * behalf of ourself
821 	 */
822 	ASSERT(domid != idn.localid);
823 
824 	if (!VALID_DOMAINID(domid)) {
825 		cmn_err(CE_WARN, "IDN: 307: domain ID (%d) invalid", domid);
826 		return (-1);
827 	}
828 
829 	ldp = &idn_domain[idn.localid];
830 
831 	/*
832 	 * We grab the writer lock so that we don't have any
833 	 * competition during a "free-all" call.
834 	 * No need to grab individual slab locks when holding
835 	 * dslab(writer).
836 	 */
837 	DSLAB_LOCK_EXCL(idn.localid);
838 
839 	for (sp = ldp->dslab; sp; sp = sp->sl_next) {
840 		bpp = &sp->sl_inuse;
841 		for (bp = *bpp; bp; bp = *bpp) {
842 			if (bp->sb_domid == domid) {
843 				bp->sb_domid = IDN_NIL_DOMID;
844 				*bpp = bp->sb_next;
845 				bp->sb_next = sp->sl_free;
846 				sp->sl_free = bp;
847 				nbufsfreed++;
848 			} else {
849 				bpp = &bp->sb_next;
850 			}
851 		}
852 	}
853 
854 	if (nbufsfreed > 0) {
855 		ATOMIC_SUB(idn_domain[domid].dio, nbufsfreed);
856 		idn_domain[domid].dioerr = 0;
857 		DIOCHECK(domid);
858 	}
859 
860 	DSLAB_UNLOCK(idn.localid);
861 
862 	PR_SMR("%s: freed %d buffers for domain %d\n", proc, nbufsfreed, domid);
863 
864 	return (nbufsfreed);
865 }
866 
867 int
868 smr_buf_reclaim(int domid, int nbufs)
869 {
870 	int		num_reclaimed = 0;
871 	idn_domain_t	*ldp, *dp;
872 	procname_t	proc = "smr_buf_reclaim";
873 
874 	ldp = &idn_domain[idn.localid];
875 	dp  = &idn_domain[domid];
876 
877 	ASSERT(domid != idn.localid);
878 
879 	if (ATOMIC_CAS(&dp->dreclaim_inprogress, 0, 1)) {
880 		/*
881 		 * Reclaim is already in progress, don't
882 		 * bother.
883 		 */
884 		PR_DATA("%s: reclaim already in progress\n", proc);
885 		return (0);
886 	}
887 
888 	PR_SMR("%s: requested %d buffers from domain %d\n", proc, nbufs, domid);
889 
890 	if (dp->dio && nbufs) {
891 		register smr_slab_t	*sp;
892 		int spl;
893 
894 		DSLAB_LOCK_SHARED(idn.localid);
895 		spl = splhi();
896 		for (sp = ldp->dslab; sp && nbufs; sp = sp->sl_next) {
897 			register smr_slabbuf_t	*bp, **bpp;
898 
899 			if (sp->sl_inuse == NULL)
900 				continue;
901 
902 			if (!lock_try(&sp->sl_lock))
903 				continue;
904 
905 			if (sp->sl_inuse == NULL) {
906 				lock_clear(&sp->sl_lock);
907 				continue;
908 			}
909 
910 			bpp = &sp->sl_inuse;
911 			for (bp = *bpp; bp && nbufs; bp = *bpp) {
912 				if (bp->sb_domid == domid) {
913 					/*
914 					 * Buffer no longer in use,
915 					 * reclaim it.
916 					 */
917 					bp->sb_domid = IDN_NIL_DOMID;
918 					*bpp = bp->sb_next;
919 					bp->sb_next = sp->sl_free;
920 					sp->sl_free = bp;
921 					num_reclaimed++;
922 					nbufs--;
923 				} else {
924 					bpp = &bp->sb_next;
925 				}
926 			}
927 			lock_clear(&sp->sl_lock);
928 		}
929 		splx(spl);
930 
931 		if (num_reclaimed > 0) {
932 			ATOMIC_SUB(dp->dio, num_reclaimed);
933 			DIOCHECK(domid);
934 		}
935 		DSLAB_UNLOCK(idn.localid);
936 	}
937 
938 	PR_SMR("%s: reclaimed %d buffers from domain %d\n",
939 	    proc, num_reclaimed, domid);
940 
941 	return (num_reclaimed);
942 }
943 
944 /*
945  * Returns 1	If any buffers are locked for the given slab.
946  *	   0	If all buffers are free for the given slab.
947  *
948  * The caller is assumed to have the slab protected so that no
949  * new allocations are attempted from it.  Also, this is only
950  * valid to be called with respect to slabs that were allocated
951  * on behalf of the local domain, i.e. the master is not expected
952  * to call this function with (slave) slab "representatives".
953  */
954 int
955 smr_slab_busy(smr_slab_t *sp)
956 {
957 	return ((sp && sp->sl_inuse) ? 1 : 0);
958 }
959 
960 int
961 smr_slabwaiter_init()
962 {
963 	register int		i;
964 	struct slabwaiter	*wp;
965 
966 	if (idn.slabwaiter != NULL)
967 		return (0);
968 
969 	/*
970 	 * Initialize the slab waiting area for MAX_DOMAINS.
971 	 */
972 	idn.slabwaiter = GETSTRUCT(struct slabwaiter, MAX_DOMAINS);
973 	wp = idn.slabwaiter;
974 	for (i = 0; i < MAX_DOMAINS; wp++, i++) {
975 		wp->w_closed = 0;
976 		mutex_init(&wp->w_mutex, NULL, MUTEX_DEFAULT, NULL);
977 		cv_init(&wp->w_cv, NULL, CV_DEFAULT, NULL);
978 	}
979 
980 	return (0);
981 }
982 
983 void
984 smr_slabwaiter_deinit()
985 {
986 	register int		i;
987 	struct slabwaiter	*wp;
988 
989 	if ((wp = idn.slabwaiter) == NULL)
990 		return;
991 
992 	for (i = 0; i < MAX_DOMAINS; wp++, i++) {
993 		ASSERT(wp->w_nwaiters == 0);
994 		ASSERT(wp->w_sp == NULL);
995 		cv_destroy(&wp->w_cv);
996 		mutex_destroy(&wp->w_mutex);
997 	}
998 
999 	FREESTRUCT(idn.slabwaiter, struct slabwaiter, MAX_DOMAINS);
1000 	idn.slabwaiter = NULL;
1001 }
1002 
1003 void
1004 smr_slabwaiter_open(domainset_t domset)
1005 {
1006 	int			d;
1007 	struct slabwaiter	*wp;
1008 
1009 	if ((domset == 0) || !idn.slabwaiter)
1010 		return;
1011 
1012 	wp = idn.slabwaiter;
1013 
1014 	for (d = 0; d < MAX_DOMAINS; wp++, d++) {
1015 		if (!DOMAIN_IN_SET(domset, d))
1016 			continue;
1017 		mutex_enter(&wp->w_mutex);
1018 		wp->w_closed = 0;
1019 		mutex_exit(&wp->w_mutex);
1020 	}
1021 }
1022 
1023 void
1024 smr_slabwaiter_close(domainset_t domset)
1025 {
1026 	int			d;
1027 	struct slabwaiter	*wp;
1028 
1029 	if ((domset == 0) || !idn.slabwaiter)
1030 		return;
1031 
1032 	wp = idn.slabwaiter;
1033 
1034 	for (d = 0; d < MAX_DOMAINS; wp++, d++) {
1035 		if (!DOMAIN_IN_SET(domset, d))
1036 			continue;
1037 		mutex_enter(&wp->w_mutex);
1038 		wp->w_closed = 1;
1039 		cv_broadcast(&wp->w_cv);
1040 		mutex_exit(&wp->w_mutex);
1041 	}
1042 }
1043 
1044 /*
1045  * Register the caller with the waiting list for the
1046  * given domain.
1047  *
1048  * Protocol:
1049  *	1st Local requester:	register -> alloc ->
1050  *						put(wakeup|xdc) -> unregister
1051  *	Nth Local requester:	register -> wait
1052  *	1st Remote requester:	register -> xdc -> wait
1053  *	Nth Remote requester:	register -> wait
1054  *
1055  *	Remote Responder:	local alloc -> put(xdc)
1056  *	Local Handler:		xdc -> put(wakeup)
1057  *
1058  * E.g. A standard slave allocation request:
1059  *	slave			master
1060  *	-----			------
1061  *	idn_slab_alloc(remote)
1062  *	- register
1063  *	- xdc		->	idn_handler
1064  *	- wait			...
1065  *				idn_slab_alloc(local)
1066  *				- register
1067  *				- alloc
1068  *				- put
1069  *				  . wakeup [local]
1070  *				- unregister
1071  *	idn_handler    	<-	- xdc
1072  *	- put       		DONE
1073  *	  . wakeup [local]
1074  *	    |
1075  *	    V
1076  *      - wait
1077  *	  . unregister
1078  *	DONE
1079  */
1080 static int
1081 smr_slabwaiter_register(int domid)
1082 {
1083 	struct slabwaiter	*wp;
1084 	int		nwait;
1085 	procname_t	proc = "smr_slabwaiter_register";
1086 
1087 
1088 	ASSERT(domid != IDN_NIL_DOMID);
1089 
1090 	ASSERT(DSLAB_READ_HELD(domid));
1091 
1092 	wp = &idn.slabwaiter[domid];
1093 
1094 	ASSERT(MUTEX_NOT_HELD(&wp->w_mutex));
1095 
1096 	mutex_enter(&wp->w_mutex);
1097 
1098 	nwait = ++(wp->w_nwaiters);
1099 	ASSERT(nwait > 0);
1100 
1101 	PR_SMR("%s: domain = %d, (new)nwaiters = %d\n", proc, domid, nwait);
1102 
1103 	if (nwait > 1) {
1104 		/*
1105 		 * There are already waiters for slab allocations
1106 		 * with respect to this domain.
1107 		 */
1108 		PR_SMR("%s: existing waiters for slabs for domain %d\n",
1109 		    proc, domid);
1110 		mutex_exit(&wp->w_mutex);
1111 
1112 		return (nwait);
1113 	}
1114 	PR_SMR("%s: initial waiter for slabs for domain %d\n", proc, domid);
1115 	/*
1116 	 * We are the first requester of a slab allocation for this
1117 	 * respective domain.  Need to prep waiting area for
1118 	 * subsequent arrival of a slab.
1119 	 */
1120 	wp->w_sp = NULL;
1121 	wp->w_done = 0;
1122 	wp->w_serrno = 0;
1123 
1124 	mutex_exit(&wp->w_mutex);
1125 
1126 	return (nwait);
1127 }
1128 
1129 /*
1130  * It is assumed that the caller had previously registered,
1131  * but wakeup did not occur due to caller never waiting.
1132  * Thus, slaballoc mutex is still held by caller.
1133  *
1134  * Returns:	0
1135  *		EINVAL
1136  *		EBUSY
1137  *		w_serrno (smr_slaballoc_put)
1138  *		(0, ENOLCK, ENOMEM, EDQUOT, EBUSY, ECANCELED)
1139  */
1140 static int
1141 smr_slabwaiter_unregister(int domid, smr_slab_t **spp)
1142 {
1143 	struct slabwaiter	*wp;
1144 	int		serrno = 0;
1145 	procname_t	proc = "smr_slabwaiter_unregister";
1146 
1147 
1148 	ASSERT(domid != IDN_NIL_DOMID);
1149 
1150 	wp = &idn.slabwaiter[domid];
1151 
1152 	mutex_enter(&wp->w_mutex);
1153 
1154 	PR_SMR("%s: domain = %d, nwaiters = %d\n", proc, domid, wp->w_nwaiters);
1155 
1156 	if (wp->w_nwaiters <= 0) {
1157 		/*
1158 		 * Hmmm...nobody is registered!
1159 		 */
1160 		PR_SMR("%s: NO WAITERS (domid = %d)\n", proc, domid);
1161 		mutex_exit(&wp->w_mutex);
1162 		return (EINVAL);
1163 	}
1164 	(wp->w_nwaiters)--;
1165 	/*
1166 	 * Is our present under the tree?
1167 	 */
1168 	if (!wp->w_done) {
1169 		/*
1170 		 * Bummer...no presents.  Let the caller know
1171 		 * via a null slab pointer.
1172 		 * Note that we don't clean up immediately since
1173 		 * message might still come in for other waiters.
1174 		 * Thus, late sleepers may still get a chance.
1175 		 */
1176 		PR_SMR("%s: bummer no slab allocated for domain %d\n",
1177 		    proc, domid);
1178 		ASSERT(wp->w_sp == NULL);
1179 		(*spp) = NULL;
1180 		serrno = wp->w_closed ? ECANCELED : EBUSY;
1181 
1182 	} else {
1183 		(*spp) = wp->w_sp;
1184 		serrno = wp->w_serrno;
1185 
1186 #ifdef DEBUG
1187 		if (serrno == 0) {
1188 			register smr_slab_t	*sp;
1189 
1190 			ASSERT(wp->w_sp);
1191 			PR_SMR("%s: allocation succeeded (domain %d)\n",
1192 			    proc, domid);
1193 
1194 			DSLAB_LOCK_SHARED(domid);
1195 			for (sp = idn_domain[domid].dslab; sp; sp = sp->sl_next)
1196 				if (sp == wp->w_sp)
1197 					break;
1198 			if (sp == NULL)
1199 				cmn_err(CE_WARN,
1200 				    "%s:%d: slab ptr = NULL",
1201 				    proc, domid);
1202 			DSLAB_UNLOCK(domid);
1203 		} else {
1204 			PR_SMR("%s: allocation failed (domain %d) "
1205 			    "[serrno = %d]\n", proc, domid, serrno);
1206 		}
1207 #endif /* DEBUG */
1208 	}
1209 	if (wp->w_nwaiters == 0) {
1210 		/*
1211 		 * Last one turns out the lights.
1212 		 */
1213 		PR_SMR("%s: domain %d last waiter, turning out lights\n",
1214 		    proc, domid);
1215 		wp->w_sp = NULL;
1216 		wp->w_done = 0;
1217 		wp->w_serrno = 0;
1218 	}
1219 	mutex_exit(&wp->w_mutex);
1220 
1221 	return (serrno);
1222 }
1223 
1224 /*
1225  * Called to abort any slaballoc requests on behalf of the
1226  * given domain.
1227  */
1228 int
1229 smr_slabwaiter_abort(int domid, int serrno)
1230 {
1231 	ASSERT(serrno != 0);
1232 
1233 	return (smr_slaballoc_put(domid, NULL, 0, serrno));
1234 }
1235 
1236 /*
1237  * Put ourselves into a timedwait waiting for slab to be
1238  * allocated.
1239  * Returns with slaballoc mutex dropped.
1240  *
1241  * Returns:	EINVAL
1242  *		ETIMEDOUT
1243  *		smr_slabwatier_unregister
1244  *		(0, EINVAL, EBUSY, ENOMEM)
1245  */
1246 static int
1247 smr_slaballoc_wait(int domid, smr_slab_t **spp)
1248 {
1249 	struct slabwaiter	*wp;
1250 	int			serrno = 0, serrno_unreg;
1251 	procname_t		proc = "smr_slaballoc_wait";
1252 
1253 
1254 	wp = &idn.slabwaiter[domid];
1255 
1256 	ASSERT(MUTEX_NOT_HELD(&wp->w_mutex));
1257 
1258 	mutex_enter(&wp->w_mutex);
1259 
1260 	PR_SMR("%s: domain = %d, nwaiters = %d, wsp = 0x%p\n",
1261 	    proc, domid, wp->w_nwaiters, wp->w_sp);
1262 
1263 	if (wp->w_nwaiters <= 0) {
1264 		/*
1265 		 * Hmmm...no waiters registered.
1266 		 */
1267 		PR_SMR("%s: domain %d, no waiters!\n", proc, domid);
1268 		mutex_exit(&wp->w_mutex);
1269 		return (EINVAL);
1270 	}
1271 	ASSERT(DSLAB_READ_HELD(domid));
1272 	DSLAB_UNLOCK(domid);
1273 
1274 	if (!wp->w_done && !wp->w_closed) {
1275 		int	rv;
1276 
1277 		/*
1278 		 * Only wait if data hasn't arrived yet.
1279 		 */
1280 		PR_SMR("%s: domain %d, going to sleep...\n", proc, domid);
1281 
1282 		rv = cv_reltimedwait_sig(&wp->w_cv, &wp->w_mutex,
1283 		    IDN_SLABALLOC_WAITTIME, TR_CLOCK_TICK);
1284 		if (rv == -1)
1285 			serrno = ETIMEDOUT;
1286 
1287 		PR_SMR("%s: domain %d, awakened (reason = %s)\n",
1288 		    proc, domid, (rv == -1) ? "TIMEOUT" : "SIGNALED");
1289 	}
1290 	/*
1291 	 * We've awakened or request already filled!
1292 	 * Unregister ourselves.
1293 	 */
1294 	mutex_exit(&wp->w_mutex);
1295 
1296 	/*
1297 	 * Any gifts will be entered into spp.
1298 	 */
1299 	serrno_unreg = smr_slabwaiter_unregister(domid, spp);
1300 
1301 	/*
1302 	 * Leave with reader lock on dslab_lock.
1303 	 */
1304 	DSLAB_LOCK_SHARED(domid);
1305 
1306 	if ((serrno_unreg == EBUSY) && (serrno == ETIMEDOUT))
1307 		return (serrno);
1308 	else
1309 		return (serrno_unreg);
1310 }
1311 
1312 /*
1313  * A SMR slab was allocated on behalf of the given domain.
1314  * Wakeup anybody that may have been waiting for the allocation.
1315  * Note that if the domain is a remote one, i.e. master is allocating
1316  * on behalf of a slave, it's up to the caller to transmit the
1317  * allocation response to that domain.
1318  * The force flag indicates that we want to install the slab for
1319  * the given user regardless of whether there are waiters or not.
1320  * This is used primarily in situations where a slave may have timed
1321  * out before the response actually arrived.  In this situation we
1322  * don't want to send slab back to the master after we went through
1323  * the trouble of allocating one.  Master is _not_ allowed to do this
1324  * for remote domains.
1325  *
1326  * Returns:	-1	Non-registered waiter or waiting area garbaged.
1327  *		0	Successfully performed operation.
1328  */
1329 int
1330 smr_slaballoc_put(int domid, smr_slab_t *sp, int forceflag, int serrno)
1331 {
1332 	idn_domain_t		*dp;
1333 	struct slabwaiter	*wp;
1334 	procname_t		proc = "smr_slaballoc_put";
1335 
1336 
1337 	dp = &idn_domain[domid];
1338 
1339 	ASSERT(!serrno ? DSLAB_WRITE_HELD(domid) : 1);
1340 
1341 	if (domid == IDN_NIL_DOMID)
1342 		return (-1);
1343 
1344 	ASSERT(serrno ? (sp == NULL) : (sp != NULL));
1345 
1346 	wp = &idn.slabwaiter[domid];
1347 
1348 	mutex_enter(&wp->w_mutex);
1349 
1350 	PR_SMR("%s: domain = %d, bufp = 0x%p, ebufp = 0x%p, "
1351 	    "(f = %d, se = %d)\n", proc, domid,
1352 	    (sp ? sp->sl_start : 0),
1353 	    (sp ? sp->sl_end : 0), forceflag, serrno);
1354 
1355 	if (wp->w_nwaiters <= 0) {
1356 		/*
1357 		 * There are no waiters!!  Must have timed out
1358 		 * and left.  Oh well...
1359 		 */
1360 		PR_SMR("%s: no slaballoc waiters found for domain %d\n",
1361 		    proc, domid);
1362 		if (!forceflag || serrno || !sp) {
1363 			/*
1364 			 * No waiters and caller doesn't want to force it.
1365 			 */
1366 			mutex_exit(&wp->w_mutex);
1367 			return (-1);
1368 		}
1369 		PR_SMR("%s: forcing slab onto domain %d\n", proc, domid);
1370 		ASSERT(domid == idn.localid);
1371 		ASSERT(wp->w_sp == NULL);
1372 		wp->w_done = 0;
1373 		/*
1374 		 * Now we fall through and let it be added in the
1375 		 * regular manor.
1376 		 */
1377 	}
1378 	if (wp->w_done) {
1379 		/*
1380 		 * There's at least one waiter so there has
1381 		 * to be a slab structure waiting for us.
1382 		 * If everything is going smoothly, there should only
1383 		 * be one guy coming through the path of inserting
1384 		 * an error or good slab.  However, if a disconnect was
1385 		 * detected, you may get several guys coming through
1386 		 * trying to let everybody know.
1387 		 */
1388 		ASSERT(wp->w_serrno ?
1389 		    (wp->w_sp == NULL) : (wp->w_sp != NULL));
1390 
1391 		cv_broadcast(&wp->w_cv);
1392 		mutex_exit(&wp->w_mutex);
1393 
1394 		return (-1);
1395 	}
1396 	if (serrno != 0) {
1397 		/*
1398 		 * Bummer...allocation failed.  This call is simply
1399 		 * to wake up the sleepers and let them know.
1400 		 */
1401 		PR_SMR("%s: slaballoc failed for domain %d\n", proc, domid);
1402 		wp->w_serrno = serrno;
1403 		wp->w_done = 1;
1404 		cv_broadcast(&wp->w_cv);
1405 		mutex_exit(&wp->w_mutex);
1406 
1407 		return (0);
1408 	}
1409 	PR_SMR("%s: putting slab into struct (domid=%d, localid=%d)\n",
1410 	    proc, domid, idn.localid);
1411 	/*
1412 	 * Prep the slab structure.
1413 	 */
1414 
1415 	if (domid == idn.localid) {
1416 		/*
1417 		 * Allocation was indeed for me.
1418 		 * Slab may or may not be locked when
1419 		 * we reach.  Normally they will be locked
1420 		 * if we're being called on behalf of a
1421 		 * free, and not locked if on behalf of
1422 		 * a new allocation request.
1423 		 */
1424 		lock_clear(&sp->sl_lock);
1425 		smr_alloc_buflist(sp);
1426 #ifdef DEBUG
1427 	} else {
1428 		uint_t	rv;
1429 		/*
1430 		 * Slab was not allocated on my behalf.  Must be
1431 		 * a master request on behalf of some other domain.
1432 		 * Prep appropriately.  Slab should have been locked
1433 		 * by smr_slab_reserve.
1434 		 */
1435 		rv = lock_try(&sp->sl_lock);
1436 		ASSERT(!rv);
1437 		ASSERT(sp->sl_domid == (short)domid);
1438 #endif /* DEBUG */
1439 	}
1440 
1441 	/*
1442 	 * Slab is ready to go.  Insert it into the domain's
1443 	 * slab list so once we wake everybody up they'll find it.
1444 	 * You better have write lock if you're putting treasures
1445 	 * there.
1446 	 */
1447 	ASSERT(DSLAB_WRITE_HELD(domid));
1448 
1449 	sp->sl_next = dp->dslab;
1450 	dp->dslab  = sp;
1451 	dp->dnslabs++;
1452 
1453 	/*
1454 	 * It's possible to fall through here without waiters.
1455 	 * This is a case where forceflag was set.
1456 	 */
1457 	if (wp->w_nwaiters > 0) {
1458 		wp->w_sp = sp;
1459 		wp->w_serrno = serrno;
1460 		wp->w_done = 1;
1461 		cv_broadcast(&wp->w_cv);
1462 	} else {
1463 		ASSERT(forceflag);
1464 		wp->w_sp = NULL;
1465 		wp->w_serrno = 0;
1466 		wp->w_done = 0;
1467 	}
1468 	mutex_exit(&wp->w_mutex);
1469 
1470 	return (0);
1471 }
1472 
1473 /*
1474  * Get the slab representing [bufp,ebufp] from the respective
1475  * domain's pool if all the buffers are free.  Remove them from
1476  * the domain's list and return it.
1477  * If bufp == NULL, then return however many free ones you
1478  * can find.
1479  * List of slabs are returned locked (sl_lock).
1480  * XXX - Need minimum limit to make sure we don't free up _all_
1481  *	 of our slabs!  However, during a shutdown we will need
1482  *	 method to free them all up regardless of locking.
1483  */
1484 smr_slab_t *
1485 smr_slaballoc_get(int domid, caddr_t bufp, caddr_t ebufp)
1486 {
1487 	idn_domain_t	*dp;
1488 	smr_slab_t	*retsp, *sp, **psp;
1489 	int		foundit, islocal = 0;
1490 	int		nslabs;
1491 	procname_t	proc = "smr_slaballoc_get";
1492 
1493 	PR_SMR("%s: getting slab for domain %d [bufp=0x%p, ebufp=0x%p]\n",
1494 	    proc, domid, bufp, ebufp);
1495 
1496 	dp = &idn_domain[domid];
1497 
1498 	ASSERT(DSLAB_WRITE_HELD(domid));
1499 
1500 	if ((sp = dp->dslab) == NULL) {
1501 		PR_SMR("%s: oops, no slabs for domain %d\n", proc, domid);
1502 		return (NULL);
1503 	}
1504 	/*
1505 	 * If domid is myself then I'm trying to get a slab out
1506 	 * of my local pool.  Otherwise, I'm the master and
1507 	 * I'm trying to get the slab representative from the
1508 	 * global pool.
1509 	 */
1510 	if (domid == idn.localid)
1511 		islocal = 1;
1512 
1513 	if (bufp != NULL) {
1514 		nslabs = -1;
1515 	} else {
1516 		nslabs = *(int *)ebufp;
1517 		if (nslabs == 0) {
1518 			PR_SMR("%s: requested nslabs (%d) <= 0\n",
1519 			    proc, nslabs);
1520 			return (NULL);
1521 		} else if (nslabs < 0) {
1522 			/*
1523 			 * Caller wants them all!
1524 			 */
1525 			nslabs = (int)dp->dnslabs;
1526 		}
1527 	}
1528 
1529 	retsp = NULL;
1530 	foundit = 0;
1531 	for (psp = &dp->dslab; sp; sp = *psp) {
1532 		int	isbusy;
1533 
1534 		if (bufp && (sp->sl_start != bufp)) {
1535 			psp = &sp->sl_next;
1536 			continue;
1537 		}
1538 
1539 		if (bufp && (ebufp > sp->sl_end)) {
1540 			PR_SMR("%s: bufp/ebufp (0x%p/0x%p) "
1541 			    "expected (0x%p/0x%p)\n", proc, bufp, ebufp,
1542 			    sp->sl_start, sp->sl_end);
1543 			ASSERT(0);
1544 		}
1545 		/*
1546 		 * We found the desired slab.  Make sure
1547 		 * it's free.
1548 		 */
1549 		foundit++;
1550 		isbusy = 0;
1551 		if (islocal) {
1552 			int spl;
1553 
1554 			/*
1555 			 * Some of the buffers in the slab
1556 			 * are still in use.  Unlock the
1557 			 * buffers we locked and bail out.
1558 			 */
1559 			spl = splhi();
1560 			if (!lock_try(&sp->sl_lock)) {
1561 				isbusy = 1;
1562 				foundit--;
1563 			} else if (sp->sl_inuse) {
1564 				lock_clear(&sp->sl_lock);
1565 				isbusy = 1;
1566 				foundit--;
1567 			}
1568 			splx(spl);
1569 		} else {
1570 			/*
1571 			 * If not local, then I'm the master getting
1572 			 * a slab from one of the slaves.  In this case,
1573 			 * their slab structs will always be locked.
1574 			 */
1575 			ASSERT(!lock_try(&sp->sl_lock));
1576 		}
1577 		if (!isbusy) {
1578 			/*
1579 			 * Delete the entry from the list and slap
1580 			 * it onto our return list.
1581 			 */
1582 			*psp = sp->sl_next;
1583 			sp->sl_next = retsp;
1584 			retsp = sp;
1585 		} else {
1586 			psp = &sp->sl_next;
1587 		}
1588 		/*
1589 		 * If bufp == NULL (alternate interface) and we haven't
1590 		 * found the desired number of slabs yet, keep looking.
1591 		 */
1592 		if (bufp || (foundit == nslabs))
1593 			break;
1594 	}
1595 	dp->dnslabs -= (short)foundit;
1596 
1597 	if (foundit) {
1598 		PR_SMR("%s: found %d free slabs (domid = %d)\n", proc, foundit,
1599 		    domid);
1600 	} else {
1601 		PR_SMR("%s: no free slabs found (domid = %d)\n", proc, domid);
1602 	}
1603 
1604 	/*
1605 	 * If this is the alternate interface, need to return
1606 	 * the number of slabs found in the ebufp parameter.
1607 	 */
1608 	if (bufp == NULL)
1609 		*(int *)ebufp = foundit;
1610 
1611 	return (retsp);
1612 }
1613 
1614 /*
1615  * Wrapper to hide alternate interface to smr_slaballoc_get()
1616  */
1617 smr_slab_t *
1618 smr_slaballoc_get_n(int domid, int *nslabs)
1619 {
1620 	smr_slab_t	*sp;
1621 
1622 	ASSERT(DSLAB_WRITE_HELD(domid));
1623 
1624 	sp = smr_slaballoc_get(domid, NULL, (caddr_t)nslabs);
1625 
1626 	return (sp);
1627 }
1628 
1629 /*
1630  * Only called by master.  Initialize slab pool based on local SMR.
1631  * Returns number of slabs initialized.
1632  * reserved_size = Length of area at the front of the NWR portion
1633  *		   of the SMR to reserve and not make available for
1634  *		   slab allocations.  Must be a IDN_SMR_BUFSIZE multiple.
1635  * reserved_area = Pointer to reserved area, if any.
1636  */
1637 int
1638 smr_slabpool_init(size_t reserved_size, caddr_t *reserved_area)
1639 {
1640 	size_t			nwr_available;
1641 	int			minperpool, ntotslabs, nxslabs, nslabs;
1642 	register int		p, pp;
1643 	register caddr_t	bufp;
1644 	register smr_slab_t	*sp;
1645 
1646 	ASSERT(IDN_GLOCK_IS_EXCL());
1647 	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
1648 
1649 	*reserved_area = NULL;
1650 
1651 	nwr_available = MB2B(IDN_NWR_SIZE) - reserved_size;
1652 
1653 	if ((idn.localid != IDN_GET_MASTERID()) ||
1654 	    (nwr_available < IDN_SLAB_SIZE) ||
1655 	    (idn.slabpool != NULL) ||
1656 	    ((reserved_size != 0) && (reserved_size & (IDN_SMR_BUFSIZE-1)))) {
1657 		return (-1);
1658 	}
1659 
1660 	idn.slabpool = GETSTRUCT(struct slabpool, 1);
1661 	idn.slabpool->ntotslabs = ntotslabs = nwr_available / IDN_SLAB_SIZE;
1662 	ASSERT(ntotslabs > 0);
1663 	minperpool = (ntotslabs < IDN_SLAB_MINPERPOOL) ?
1664 	    1 : IDN_SLAB_MINPERPOOL;
1665 	idn.slabpool->npools = (ntotslabs + (minperpool - 1)) / minperpool;
1666 
1667 	if ((idn.slabpool->npools & 1) == 0) {
1668 		/*
1669 		 * npools needs to be odd for hashing algorithm.
1670 		 */
1671 		idn.slabpool->npools++;
1672 	}
1673 	ASSERT(idn.slabpool->npools > 0);
1674 	minperpool = (ntotslabs < idn.slabpool->npools) ?
1675 	    1 : (ntotslabs / idn.slabpool->npools);
1676 
1677 	/*
1678 	 * Calculate the number of extra slabs that will need to
1679 	 * be alloted to the pools.  This number will be less than
1680 	 * npools.  Only one extra slab is allocated to each pool
1681 	 * until we have assigned all the extra slabs.
1682 	 */
1683 	if (ntotslabs > (idn.slabpool->npools * minperpool))
1684 		nxslabs = ntotslabs - (idn.slabpool->npools * minperpool);
1685 	else
1686 		nxslabs = 0;
1687 	ASSERT((nxslabs >= 0) && (nxslabs < idn.slabpool->npools));
1688 
1689 	idn.slabpool->pool = GETSTRUCT(struct smr_slabtbl,
1690 	    idn.slabpool->npools);
1691 	sp = GETSTRUCT(smr_slab_t, idn.slabpool->ntotslabs);
1692 
1693 	idn.slabpool->savep = sp;
1694 	bufp = idn.smr.vaddr + reserved_size;
1695 
1696 	for (p = nslabs = 0;
1697 	    (p < idn.slabpool->npools) && (ntotslabs > 0);
1698 	    p++, ntotslabs -= nslabs) {
1699 
1700 		nslabs = (ntotslabs < minperpool) ? ntotslabs : minperpool;
1701 		if (nxslabs > 0) {
1702 			nslabs++;
1703 			nxslabs--;
1704 		}
1705 		idn.slabpool->pool[p].sarray = sp;
1706 		for (pp = 0; pp < nslabs; pp++) {
1707 
1708 			sp->sl_next  = NULL;
1709 			sp->sl_start = bufp;
1710 			sp->sl_end   = bufp = sp->sl_start + IDN_SLAB_SIZE;
1711 			sp->sl_lock  = 0;
1712 			sp->sl_domid = (short)IDN_NIL_DOMID;
1713 
1714 			sp++;
1715 		}
1716 		idn.slabpool->pool[p].nfree   = nslabs;
1717 		idn.slabpool->pool[p].nslabs  = nslabs;
1718 	}
1719 	ASSERT((ntotslabs == 0) && (nxslabs == 0));
1720 	/*
1721 	 * We should be at the end of the SMR at this point.
1722 	 */
1723 	ASSERT(bufp == (idn.smr.vaddr + reserved_size
1724 	    + (idn.slabpool->ntotslabs * IDN_SLAB_SIZE)));
1725 
1726 	if (reserved_size != 0)
1727 		*reserved_area = idn.smr.vaddr;
1728 
1729 	return (0);
1730 }
1731 
1732 void
1733 smr_slabpool_deinit()
1734 {
1735 	if (idn.slabpool == NULL)
1736 		return;
1737 
1738 	FREESTRUCT(idn.slabpool->savep, smr_slab_t, idn.slabpool->ntotslabs);
1739 	FREESTRUCT(idn.slabpool->pool, struct smr_slabtbl,
1740 	    idn.slabpool->npools);
1741 	FREESTRUCT(idn.slabpool, struct slabpool, 1);
1742 
1743 	idn.slabpool = NULL;
1744 }
1745 
1746 void
1747 smr_alloc_buflist(smr_slab_t *sp)
1748 {
1749 	int		n, nbufs;
1750 	caddr_t		sbufp;
1751 	smr_slabbuf_t	*hp, *bp;
1752 
1753 	if (sp->sl_head)
1754 		return;
1755 
1756 	nbufs = (sp->sl_end - sp->sl_start) / IDN_SMR_BUFSIZE;
1757 	ASSERT(nbufs > 0);
1758 	if (nbufs <= 0) {
1759 		sp->sl_head = sp->sl_free = sp->sl_inuse = NULL;
1760 		return;
1761 	}
1762 
1763 	hp = GETSTRUCT(smr_slabbuf_t, nbufs);
1764 
1765 	sbufp = sp->sl_start;
1766 	for (n = 0, bp = hp; n < nbufs; bp++, n++) {
1767 		bp->sb_bufp = sbufp;
1768 		bp->sb_domid = IDN_NIL_DOMID;
1769 		bp->sb_next = bp + 1;
1770 		sbufp += IDN_SMR_BUFSIZE;
1771 	}
1772 	(--bp)->sb_next = NULL;
1773 
1774 	sp->sl_head = sp->sl_free = hp;
1775 	sp->sl_inuse = NULL;
1776 }
1777 
1778 void
1779 smr_free_buflist(smr_slab_t *sp)
1780 {
1781 	int	nbufs;
1782 
1783 	if (sp->sl_head == NULL)
1784 		return;
1785 
1786 	nbufs = (sp->sl_end - sp->sl_start) / IDN_SMR_BUFSIZE;
1787 
1788 	FREESTRUCT(sp->sl_head, smr_slabbuf_t, nbufs);
1789 
1790 	sp->sl_head = sp->sl_free = sp->sl_inuse = NULL;
1791 }
1792 
1793 /*
1794  * Returns:	0 Successfully located a slab.
1795  *	       -1 Failure.
1796  */
1797 static smr_slab_t *
1798 smr_slab_reserve(int domid)
1799 {
1800 	register int		p, nextp, s, nexts;
1801 	register smr_slab_t	*spa;
1802 	int			startp, starts;
1803 	int			foundone = 0;
1804 	int			spl;
1805 	procname_t		proc = "smr_slab_reserve";
1806 
1807 	p = startp = SMR_SLABPOOL_HASH(domid);
1808 	nextp = -1;
1809 
1810 	spl = splhi();
1811 	while ((nextp != startp) && !foundone) {
1812 
1813 		s = starts = SMR_SLAB_HASH(p, domid);
1814 		nexts = -1;
1815 		spa = &(idn.slabpool->pool[p].sarray[0]);
1816 
1817 		while ((nexts != starts) && !foundone) {
1818 			if (lock_try(&spa[s].sl_lock)) {
1819 				foundone = 1;
1820 				break;
1821 			}
1822 			nexts = SMR_SLAB_HASHSTEP(p, s);
1823 			s = nexts;
1824 		}
1825 		if (foundone)
1826 			break;
1827 		nextp = SMR_SLABPOOL_HASHSTEP(p);
1828 		p = nextp;
1829 	}
1830 	splx(spl);
1831 
1832 	if (foundone) {
1833 		ASSERT((&spa[s] >= idn.slabpool->savep) &&
1834 		    (&spa[s] < (idn.slabpool->savep +
1835 		    idn.slabpool->ntotslabs)));
1836 
1837 		spa[s].sl_domid = (short)domid;
1838 
1839 		ATOMIC_DEC(idn.slabpool->pool[p].nfree);
1840 
1841 		if (domid == idn.localid) {
1842 			smr_slab_t	*nsp;
1843 			/*
1844 			 * Caller is actually reserving a slab for
1845 			 * themself which means they'll need the full
1846 			 * slab structure to represent all of the I/O
1847 			 * buffers.  The "spa" is just a representative
1848 			 * and doesn't contain the space to manage the
1849 			 * individual buffers.  Need to alloc a full-size
1850 			 * struct.
1851 			 * Note that this results in the returning
1852 			 * smr_slab_t structure being unlocked.
1853 			 */
1854 			ASSERT(idn.localid == IDN_GET_MASTERID());
1855 			nsp = GETSTRUCT(smr_slab_t, 1);
1856 			nsp->sl_start = spa[s].sl_start;
1857 			nsp->sl_end   = spa[s].sl_end;
1858 			smr_alloc_buflist(nsp);
1859 			spa = nsp;
1860 			PR_SMR("%s: allocated full slab struct for domain %d\n",
1861 			    proc, domid);
1862 		} else {
1863 			/*
1864 			 * Slab structure gets returned locked.
1865 			 */
1866 			spa += s;
1867 		}
1868 
1869 		PR_SMR("%s: allocated slab 0x%p (start=0x%p, size=%lu) for "
1870 		    "domain %d\n", proc, spa, spa->sl_start,
1871 		    spa->sl_end - spa->sl_start, domid);
1872 	} else {
1873 		PR_SMR("%s: FAILED to allocate for domain %d\n",
1874 		    proc, domid);
1875 		spa = NULL;
1876 	}
1877 
1878 	return (spa);
1879 }
1880 
1881 static void
1882 smr_slab_unreserve(int domid, smr_slab_t *sp)
1883 {
1884 	register int		p, nextp, s, nexts;
1885 	register smr_slab_t	*spa;
1886 	int			foundit = 0;
1887 	int			startp, starts;
1888 	caddr_t			bufp;
1889 	procname_t		proc = "smr_slab_unreserve";
1890 
1891 	bufp = sp->sl_start;
1892 	p = startp = SMR_SLABPOOL_HASH(domid);
1893 	nextp = -1;
1894 
1895 	while ((nextp != startp) && !foundit) {
1896 
1897 		s = starts = SMR_SLAB_HASH(p, domid);
1898 		nexts = -1;
1899 		spa = &(idn.slabpool->pool[p].sarray[0]);
1900 
1901 		while ((nexts != starts) && !foundit) {
1902 			if (spa[s].sl_start == bufp) {
1903 				foundit = 1;
1904 				break;
1905 			}
1906 			nexts = SMR_SLAB_HASHSTEP(p, s);
1907 			s = nexts;
1908 		}
1909 		if (foundit)
1910 			break;
1911 		nextp = SMR_SLABPOOL_HASHSTEP(p);
1912 		p = nextp;
1913 	}
1914 	if (foundit) {
1915 		ASSERT((&spa[s] >= idn.slabpool->savep) &&
1916 		    (&spa[s] < (idn.slabpool->savep +
1917 		    idn.slabpool->ntotslabs)));
1918 		ASSERT(!lock_try(&spa[s].sl_lock));
1919 		ASSERT(spa[s].sl_domid == (short)domid);
1920 
1921 		spa[s].sl_next = NULL;
1922 		spa[s].sl_domid = (short)IDN_NIL_DOMID;
1923 		lock_clear(&spa[s].sl_lock);
1924 
1925 		ATOMIC_INC(idn.slabpool->pool[p].nfree);
1926 
1927 		PR_SMR("%s: freed (bufp=0x%p) for domain %d\n",
1928 		    proc, bufp, domid);
1929 
1930 		if (domid == idn.localid) {
1931 			/*
1932 			 * Caller is actually unreserving a slab of their
1933 			 * own.  Note that only the master calls this
1934 			 * routine.  Since the master's local slab
1935 			 * structures do not get entered into the global
1936 			 * "representative" pool, we need to free up the
1937 			 * data structure that was passed in.
1938 			 */
1939 			ASSERT(idn.localid == IDN_GET_MASTERID());
1940 			ASSERT(sp != &spa[s]);
1941 
1942 			smr_free_buflist(sp);
1943 			FREESTRUCT(sp, smr_slab_t, 1);
1944 		} else {
1945 			ASSERT(sp == &spa[s]);
1946 		}
1947 	} else {
1948 		/*
1949 		 * Couldn't find slab entry for given buf!
1950 		 */
1951 		PR_SMR("%s: FAILED to free (bufp=0x%p) for domain %d\n",
1952 		    proc, bufp, domid);
1953 	}
1954 }
1955 
1956 /*
1957  * The Reap Protocol:
1958  *	master				   slave
1959  *	------				   -----
1960  *	smr_slab_reap_global
1961  *	- idn_broadcast_cmd(SLABREAP) ->   idn_recv_cmd(SLABREAP)
1962  *	  . idn_local_cmd(SLABREAP)        - idn_recv_slabreap_req
1963  *	    - smr_slab_reap	             . smr_slab_reap
1964  *	      . smr_slaballoc_get_n            - smr_slaballoc_get_n
1965  *	      . smr_slab_free		       - smr_slab_free
1966  *		- smr_slab_free_local		 . smr_slab_free_remote
1967  *		  . smr_slab_unreserve
1968  *				      <-	   - idn_send_cmd(SLABFREE)
1969  *	idn_recv_cmd(SLABFREE)
1970  *	- idn_recv_slabfree_req
1971  *	  . smr_slaballoc_get
1972  *	  . smr_slab_free
1973  *	    - smr_slab_free_local
1974  *	      . smr_slab_unreserve
1975  *        . idn_send_slabfree_resp    ->   idn_recv_cmd(SLABFREE | ack)
1976  *					   - idn_recv_slabfree_resp
1977  *
1978  *	idn_recv_cmd(SLABREAP | ack)  <-     . idn_send_slabreap_resp
1979  *	- idn_recv_slabreap_resp	   DONE
1980  *	DONE
1981  *
1982  * Check available slabs and if we're below the threshold, kick
1983  * off reaping to all remote domains.  There is no guarantee remote
1984  * domains will be able to free up any.
1985  */
1986 static void
1987 smr_slab_reap_global()
1988 {
1989 	register int	p, npools;
1990 	register int	total_free = 0;
1991 	register struct smr_slabtbl	*tblp;
1992 	static clock_t	reap_last = 0;
1993 	procname_t	proc = "smr_slab_reap_global";
1994 	clock_t		now;
1995 
1996 	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
1997 
1998 	DSLAB_LOCK_SHARED(idn.localid);
1999 	if (idn_domain[idn.localid].dslab_state != DSLAB_STATE_LOCAL) {
2000 		PR_SMR("%s: only allowed by master (%d)\n",
2001 		    proc, IDN_GET_MASTERID());
2002 		DSLAB_UNLOCK(idn.localid);
2003 		return;
2004 	}
2005 	DSLAB_UNLOCK(idn.localid);
2006 
2007 	now = ddi_get_lbolt();
2008 	if ((now > 0) && (now > reap_last) &&
2009 	    ((now - reap_last) < IDN_REAP_INTERVAL))
2010 		return;
2011 
2012 	reap_last = now;
2013 
2014 	ASSERT(idn.slabpool);
2015 
2016 	npools = idn.slabpool->npools;
2017 	tblp   = idn.slabpool->pool;
2018 
2019 	for (p = 0; p < npools; tblp++, p++)
2020 		total_free += tblp->nfree;
2021 
2022 	if (total_free <= IDN_SLAB_THRESHOLD) {
2023 		int	diff, reap_per_domain;
2024 
2025 		PR_SMR("%s: kicking off reaping "
2026 		    "(total_free = %d, min = %d)\n",
2027 		    proc, total_free, IDN_SLAB_THRESHOLD);
2028 
2029 		diff = IDN_SLAB_THRESHOLD - total_free;
2030 		reap_per_domain = (diff < idn.ndomains) ?
2031 		    1 : (diff / idn.ndomains);
2032 
2033 		idn_broadcast_cmd(IDNCMD_SLABREAP, reap_per_domain, 0, 0);
2034 	}
2035 }
2036 
2037 void
2038 smr_slab_reap(int domid, int *nslabs)
2039 {
2040 	register int	d;
2041 	int		nreclaimed;
2042 	smr_slab_t	*sp;
2043 	domainset_t	reapset;
2044 	procname_t	proc = "smr_slab_reap";
2045 
2046 	/*
2047 	 * Should only be called on behalf of local
2048 	 * domain.
2049 	 */
2050 	if (domid != idn.localid) {
2051 		PR_SMR("%s: called by domain %d, should only be local (%d)\n",
2052 		    proc, domid, idn.localid);
2053 		ASSERT(0);
2054 		return;
2055 	}
2056 	/*
2057 	 * Try and reclaim some buffers so we can possibly
2058 	 * free up some slabs.
2059 	 */
2060 	reapset = idn.domset.ds_connected;
2061 
2062 	IDN_GKSTAT_GLOBAL_EVENT(gk_reaps, gk_reap_last);
2063 
2064 	nreclaimed = 0;
2065 	for (d = 0; d < MAX_DOMAINS; d++) {
2066 		int		nr;
2067 		idn_domain_t	*dp;
2068 
2069 		if (!DOMAIN_IN_SET(reapset, d))
2070 			continue;
2071 
2072 		IDN_DLOCK_SHARED(d);
2073 
2074 		dp = &idn_domain[d];
2075 		if ((d == idn.localid) || (dp->dcpu < 0)) {
2076 			IDN_DUNLOCK(d);
2077 			continue;
2078 		}
2079 		/*
2080 		 * Clean up any dead I/O errors if possible.
2081 		 */
2082 		if (dp->dioerr > 0) {
2083 			idn_domain_t	*ldp;
2084 			register int	cnt;
2085 			register smr_slabbuf_t	*bp;
2086 			/*
2087 			 * We need to grab the writer lock to prevent
2088 			 * anybody from allocating buffers while we
2089 			 * traverse the slabs outstanding.
2090 			 */
2091 			cnt = 0;
2092 			ldp = &idn_domain[idn.localid];
2093 			IDN_DLOCK_EXCL(idn.localid);
2094 			DSLAB_LOCK_EXCL(idn.localid);
2095 			for (sp = ldp->dslab; sp; sp = sp->sl_next)
2096 				for (bp = sp->sl_inuse; bp; bp = bp->sb_next)
2097 					if (bp->sb_domid == d)
2098 						cnt++;
2099 			DSLAB_UNLOCK(idn.localid);
2100 			ASSERT((dp->dio + dp->dioerr) >= cnt);
2101 			dp->dio = cnt;
2102 			dp->dioerr = 0;
2103 			IDN_DUNLOCK(idn.localid);
2104 		}
2105 		if ((dp->dstate == IDNDS_CONNECTED) &&
2106 		    ((nr = idn_reclaim_mboxdata(d, 0, -1)) > 0))
2107 			nreclaimed += nr;
2108 
2109 		IDN_DUNLOCK(d);
2110 	}
2111 
2112 	DSLAB_LOCK_EXCL(domid);
2113 	sp = smr_slaballoc_get_n(domid, nslabs);
2114 	if (sp) {
2115 		IDN_GKSTAT_ADD(gk_reap_count, (ulong_t)(*nslabs));
2116 		smr_slab_free(domid, sp);
2117 	}
2118 	DSLAB_UNLOCK(domid);
2119 }
2120 
2121 /*
2122  * ---------------------------------------------------------------------
2123  * Remap the (IDN) shared memory region to a new physical address.
2124  * Caller is expected to have performed a ecache flush if needed.
2125  * ---------------------------------------------------------------------
2126  */
2127 void
2128 smr_remap(struct as *as, register caddr_t vaddr,
2129 		register pfn_t new_pfn, uint_t mblen)
2130 {
2131 	tte_t		tte;
2132 	size_t		blen;
2133 	pgcnt_t		p, npgs;
2134 	procname_t	proc = "smr_remap";
2135 
2136 	if (va_to_pfn(vaddr) == new_pfn) {
2137 		PR_REMAP("%s: vaddr (0x%p) already mapped to pfn (0x%lx)\n",
2138 		    proc, vaddr, new_pfn);
2139 		return;
2140 	}
2141 
2142 	blen = MB2B(mblen);
2143 	npgs = btopr(blen);
2144 	ASSERT(npgs != 0);
2145 
2146 	PR_REMAP("%s: va = 0x%p, pfn = 0x%lx, npgs = %ld, mb = %d MB (%ld)\n",
2147 	    proc, vaddr, new_pfn, npgs, mblen, blen);
2148 
2149 	/*
2150 	 * Unmap the SMR virtual address from it's current
2151 	 * mapping.
2152 	 */
2153 	hat_unload(as->a_hat, vaddr, blen, HAT_UNLOAD_UNLOCK);
2154 
2155 	if (new_pfn == PFN_INVALID)
2156 		return;
2157 
2158 	/*
2159 	 * Map the SMR to the new physical address space,
2160 	 * presumably a remote pfn.  Cannot use hat_devload
2161 	 * because it will think pfn represents non-memory,
2162 	 * i.e. space since it may beyond his physmax.
2163 	 */
2164 	for (p = 0; p < npgs; p++) {
2165 		sfmmu_memtte(&tte, new_pfn, PROT_READ | PROT_WRITE | HAT_NOSYNC,
2166 		    TTE8K);
2167 		sfmmu_tteload(as->a_hat, &tte, vaddr, NULL, HAT_LOAD_LOCK);
2168 
2169 		vaddr += MMU_PAGESIZE;
2170 		new_pfn++;
2171 	}
2172 
2173 	PR_REMAP("%s: remapped %ld pages (expected %ld)\n",
2174 	    proc, npgs, btopr(MB2B(mblen)));
2175 }
2176