xref: /titanic_52/usr/src/uts/sun4u/starfire/io/idn_smr.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Inter-Domain Network
27  *
28  * Shared Memory Region (SMR) supporting code.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/machparam.h>
36 #include <sys/debug.h>
37 #include <sys/cpuvar.h>
38 #include <sys/kmem.h>
39 #include <sys/mutex.h>
40 #include <sys/rwlock.h>
41 #include <sys/systm.h>
42 #include <sys/machlock.h>
43 #include <sys/membar.h>
44 #include <sys/mman.h>
45 #include <vm/hat.h>
46 #include <vm/as.h>
47 #include <vm/hat_sfmmu.h>
48 #include <sys/vm_machparam.h>
49 #include <sys/x_call.h>
50 
51 #include <sys/idn.h>
52 
53 #ifdef DEBUG
54 #define	DIOCHECK(domid) \
55 { \
56 	int	_dio; \
57 	if ((_dio = idn_domain[domid].dio) < 0) { \
58 		cmn_err(CE_WARN, \
59 			">>>>> file %s, line %d: domain %d, dio = %d", \
60 			__FILE__, __LINE__, (domid), _dio); \
61 	} \
62 }
63 #else
64 #define	DIOCHECK(domid)
65 #endif /* DEBUG */
66 
67 static int	smr_slab_alloc_local(int domid, smr_slab_t **spp);
68 static int	smr_slab_alloc_remote(int domid, smr_slab_t **spp);
69 static void	smr_slab_free_local(int domid, smr_slab_t *sp);
70 static void	smr_slab_free_remote(int domid, smr_slab_t *sp);
71 static int 	smr_slabwaiter_register(int domid);
72 static int 	smr_slabwaiter_unregister(int domid, smr_slab_t **spp);
73 static int 	smr_slaballoc_wait(int domid, smr_slab_t **spp);
74 static smr_slab_t 	*smr_slab_reserve(int domid);
75 static void 	smr_slab_unreserve(int domid, smr_slab_t *sp);
76 static void	smr_slab_reap_global();
77 
78 /*
79  * Can only be called by the master.  Allocate a slab from the
80  * local pool representing the SMR, on behalf of the given
81  * domain.  Slab is either being requested for use by the
82  * local domain (i.e. domid == idn.localid), or it's being
83  * allocated to give to a remote domain which requested one.
84  * In the base of allocating on behalf of a remote domain,
85  * smr_slab_t structure is used simply to manage ownership.
86  *
87  * Returns:	smr_slaballoc_wait
88  * 		(EINVAL, ETIMEDOUT)
89  *		smr_slabwatier_unregister
90  *		(0, EINVAL, EBUSY, ENOMEM)
91  *		ENOLCK
92  */
93 static int
94 smr_slab_alloc_local(int domid, smr_slab_t **spp)
95 {
96 	int		serrno = 0;
97 	int		nwait;
98 	smr_slab_t	*sp;
99 	idn_domain_t	*dp;
100 
101 
102 	/*
103 	 * Only the master can make local allocations.
104 	 */
105 	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
106 	ASSERT(idn.localid == IDN_GET_MASTERID());
107 
108 	*spp = NULL;
109 
110 	dp = &idn_domain[domid];
111 	ASSERT(DSLAB_READ_HELD(domid));
112 	ASSERT(dp->dslab_state == DSLAB_STATE_LOCAL);
113 
114 	/*
115 	 * Register myself with the waiting list.
116 	 */
117 	nwait = smr_slabwaiter_register(domid);
118 
119 	if (nwait > 1) {
120 		/*
121 		 * XXX - old comment?
122 		 * Need to drop the read lock _after_ registering
123 		 * ourselves with the potential wait list for this allocation.
124 		 * Although this allocation is not a remote one, we could
125 		 * still have multiple threads on the master trying to
126 		 * satisfy (allocate) request on behalf of a remote domain.
127 		 */
128 		/*
129 		 * Somebody is already in the process of satisfying
130 		 * the allocation request for this respective
131 		 * domain.  All we need to do is wait and let
132 		 * it happen.
133 		 */
134 		serrno = smr_slaballoc_wait(domid, spp);
135 		return (serrno);
136 	}
137 	/*
138 	 * I'm the original slab requester for this domain.  It's local
139 	 * so go ahead and do the job.
140 	 */
141 
142 	if ((sp = smr_slab_reserve(domid)) == NULL)
143 		serrno = ENOMEM;
144 
145 	/*
146 	 * Allocation may have failed.  In either case we've
147 	 * got to do the put to at least wake potential waiters up.
148 	 */
149 	if (!serrno) {
150 		if (DSLAB_LOCK_TRYUPGRADE(domid) == 0) {
151 			DSLAB_UNLOCK(domid);
152 			DSLAB_LOCK_EXCL(domid);
153 		}
154 	}
155 
156 	(void) smr_slaballoc_put(domid, sp, 0, serrno);
157 
158 	/*
159 	 * If serrno is ENOLCK here, then we must have failed
160 	 * on the upgrade above, so lock already dropped.
161 	 */
162 	if (serrno != ENOLCK) {
163 		/*
164 		 * Need to drop since reaping may be recursive?
165 		 */
166 		DSLAB_UNLOCK(domid);
167 	}
168 
169 	/*
170 	 * Since we were the original requester but never went
171 	 * to sleep, we need to directly unregister ourselves
172 	 * from the waiting list.
173 	 */
174 	serrno = smr_slabwaiter_unregister(domid, spp);
175 
176 	/*
177 	 * Now that we've satisfied the request, let's check if any
178 	 * reaping is necessary.  Only the master does this and only
179 	 * when allocating slabs, an infrequent event :-o
180 	 */
181 	smr_slab_reap_global();
182 
183 	ASSERT((serrno == 0) ? (*spp != NULL) : (*spp == NULL));
184 
185 	DSLAB_LOCK_SHARED(domid);
186 
187 	return (serrno);
188 }
189 
190 /*
191  * Can only be called by a slave on behalf of himself.  Need to
192  * make a request to the master to allocate a slab of SMR buffers
193  * for the local domain.
194  *
195  * Returns:	smr_slaballoc_wait
196  *		(0, EINVAL, EBUSY, ENOMEM)
197  *		ENOLCK
198  *		ECANCELED
199  */
200 static int
201 smr_slab_alloc_remote(int domid, smr_slab_t **spp)
202 {
203 	int		nwait;
204 	int		serrno = 0;
205 	int		bailout = 0;
206 	int		masterid;
207 	idn_domain_t	*dp, *mdp = NULL;
208 	procname_t	proc = "smr_slab_alloc_remote";
209 
210 	/*
211 	 * Only slaves make remote allocations.
212 	 */
213 	ASSERT(idn.localid != IDN_GET_MASTERID());
214 	ASSERT(domid == idn.localid);
215 	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
216 
217 	*spp = NULL;
218 
219 	dp = &idn_domain[domid];
220 	ASSERT(DSLAB_READ_HELD(domid));
221 	ASSERT(dp->dslab_state == DSLAB_STATE_REMOTE);
222 
223 	/*
224 	 * Register myself with the slaballoc waiting list.
225 	 * Note that only allow one outstanding allocation
226 	 * request for the given domain.  Other callers which
227 	 * detect a slab is needed simply get stuck on the
228 	 * waiting list waiting for the original caller to
229 	 * get the job done.
230 	 * The waiter_register routine will allocate the necessary
231 	 * slab structure which will ultimately be inserted in
232 	 * the domain's slab list via smr_slaballoc_put().
233 	 */
234 	nwait = smr_slabwaiter_register(domid);
235 
236 	/*
237 	 * Make sure we have a connection with the master
238 	 * before we wait around for nothing and send a
239 	 * command off to nowhere.
240 	 * First do a quick (no lock) check for global okayness.
241 	 */
242 	if ((idn.state != IDNGS_ONLINE) ||
243 			((masterid = IDN_GET_MASTERID()) == IDN_NIL_DOMID)) {
244 		bailout = 1;
245 		serrno = ECANCELED;
246 	}
247 	/*
248 	 * We need to drop our read lock _before_ acquiring the
249 	 * slaballoc waiter lock.  This is necessary because the
250 	 * thread that receives the slab alloc response and fills
251 	 * in the slab structure will need to grab the domain write
252 	 * lock while holding onto the slaballoc waiter lock.
253 	 * Potentially could deadlock if we didn't drop our domain
254 	 * lock before.  Plus, we've registered.
255 	 *
256 	 * 4093209 - Note also that we do this _after_ the check for
257 	 *	idn.masterid where we grab the READER global
258 	 *	lock.  This is to prevent somebody from
259 	 *	changing our state after we drop the drwlock.
260 	 *	A deadlock can occur when shutting down a
261 	 *	domain we're holding the
262 	 */
263 
264 	if (!bailout) {
265 		mdp = &idn_domain[masterid];
266 		/*
267 		 * Global state is okay.  Let's double check the
268 		 * state of our actual target domain.
269 		 */
270 		if (mdp->dstate != IDNDS_CONNECTED) {
271 			bailout = 1;
272 			serrno = ECANCELED;
273 		} else if (IDN_DLOCK_TRY_SHARED(masterid)) {
274 			if (mdp->dstate != IDNDS_CONNECTED) {
275 				bailout = 1;
276 				serrno = ECANCELED;
277 				IDN_DUNLOCK(masterid);
278 			} else if (nwait != 1) {
279 				IDN_DUNLOCK(masterid);
280 			}
281 			/*
282 			 * Note that keep the drwlock(read) for
283 			 * the target (master) domain if it appears
284 			 * we're the lucky one to send the command.
285 			 * We hold onto the lock until we've actually
286 			 * sent the command out.
287 			 * We don't reach this place unless it
288 			 * appears everything is kosher with
289 			 * the target (master) domain.
290 			 */
291 		} else {
292 			bailout = 1;
293 			serrno = ENOLCK;
294 		}
295 	}
296 
297 	if (bailout) {
298 		ASSERT(serrno);
299 		/*
300 		 * Gotta bail.  Abort operation.  Error result
301 		 * will be picked up when we attempt to wait.
302 		 */
303 		PR_SMR("%s: BAILING OUT on behalf domain %d "
304 			"(err=%d, gs=%s, ms=%s)\n",
305 			proc, domid, serrno, idngs_str[idn.state],
306 			(masterid == IDN_NIL_DOMID)
307 			? "unknown" : idnds_str[idn_domain[masterid].dstate]);
308 		(void) smr_slabwaiter_abort(domid, serrno);
309 
310 	} else if (nwait == 1) {
311 		/*
312 		 * We are the original requester.  Initiate the
313 		 * actual request to the master.
314 		 */
315 		idn_send_cmd(masterid, IDNCMD_SLABALLOC,
316 				IDN_SLAB_SIZE, 0, 0);
317 		ASSERT(mdp);
318 		IDN_DUNLOCK(masterid);
319 	}
320 
321 	/*
322 	 * Wait here for response.  Once awakened func returns
323 	 * with slab structure possibly filled with gifts!
324 	 */
325 	serrno = smr_slaballoc_wait(domid, spp);
326 
327 	return (serrno);
328 }
329 
330 /*
331  * Allocate a slab from the Master on behalf
332  * of the given domain.  Note that master uses
333  * this function to allocate slabs on behalf of
334  * remote domains also.
335  * Entered with drwlock held.
336  * Leaves with drwlock dropped.
337  * Returns:	EDQUOT
338  *		EINVAL
339  *		ENOLCK
340  *		smr_slab_alloc_local
341  *		smr_slab_alloc_remote
342  *		(0, EINVAL, EBUSY, ENOMEM)
343  */
344 int
345 smr_slab_alloc(int domid, smr_slab_t **spp)
346 {
347 	int		serrno = 0;
348 	idn_domain_t	*dp;
349 	procname_t	proc = "smr_slab_alloc";
350 
351 
352 	dp = &idn_domain[domid];
353 
354 	ASSERT(DSLAB_READ_HELD(domid));
355 	ASSERT(dp->dslab_state != DSLAB_STATE_UNKNOWN);
356 
357 	*spp = NULL;
358 
359 	switch (dp->dslab_state) {
360 	case DSLAB_STATE_UNKNOWN:
361 		cmn_err(CE_WARN,
362 			"IDN: 300: no slab allocations without a master");
363 		serrno = EINVAL;
364 		break;
365 
366 	case DSLAB_STATE_LOCAL:
367 		/*
368 		 * If I'm the master, then get a slab
369 		 * from the local SMR pool, but only
370 		 * if the number of allocated slabs has
371 		 * not been exceeded.
372 		 */
373 		if (((int)dp->dnslabs < IDN_SLAB_MAXPERDOMAIN) ||
374 						!IDN_SLAB_MAXPERDOMAIN)
375 			serrno = smr_slab_alloc_local(domid, spp);
376 		else
377 			serrno = EDQUOT;
378 		break;
379 
380 	case DSLAB_STATE_REMOTE:
381 		/*
382 		 * Have to make a remote request.
383 		 * In order to prevent overwhelming the master
384 		 * with a bunch of requests that he won't be able
385 		 * to handle we do a check to see if we're still
386 		 * under quota.  Note that the limit is known
387 		 * apriori based on the SMR/NWR size and
388 		 * IDN_SLAB_MINTOTAL.  Domains must have the same
389 		 * size SMR/NWR, however they can have different
390 		 * IDN_SLAB_MINTOTAL.  Thus a domain could throttle
391 		 * itself however it wishes.
392 		 */
393 		if (((int)dp->dnslabs < IDN_SLAB_MAXPERDOMAIN) ||
394 						!IDN_SLAB_MAXPERDOMAIN)
395 			serrno = smr_slab_alloc_remote(domid, spp);
396 		else
397 			serrno = EDQUOT;
398 		break;
399 
400 	default:
401 		cmn_err(CE_WARN,
402 			"IDN: 301: (ALLOC) unknown slab state (%d) "
403 			"for domain %d", dp->dslab_state, domid);
404 		serrno = EINVAL;
405 		break;
406 	}
407 
408 	if (*spp == NULL) {
409 		PR_SMR("%s: failed to allocate %s slab [serrno = %d]\n",
410 			proc, (idn.localid == IDN_GET_MASTERID()) ?
411 			"local" : "remote", serrno);
412 	}
413 
414 	if (serrno) {
415 		IDN_GKSTAT_GLOBAL_EVENT(gk_slabfail, gk_slabfail_last);
416 	}
417 
418 	return (serrno);
419 }
420 
421 static void
422 smr_slab_free_local(int domid, smr_slab_t *sp)
423 {
424 	int	rv;
425 
426 	/*
427 	 * Do a slaballoc_put just in case there may have
428 	 * been waiters for slabs for this respective domain
429 	 * before we unreserve this slab.
430 	 */
431 	rv = smr_slaballoc_put(domid, sp, 0, 0);
432 
433 	if (rv == -1) {
434 		/*
435 		 * Put failed.  Must not have been any waiters.
436 		 * Go ahead and unreserve the space.
437 		 */
438 		smr_slab_unreserve(domid, sp);
439 	}
440 }
441 
442 static void
443 smr_slab_free_remote(int domid, smr_slab_t *sp)
444 {
445 	smr_offset_t	slab_offset;
446 	int		slab_size;
447 	int		rv;
448 	int		masterid;
449 
450 	ASSERT(domid == idn.localid);
451 	ASSERT(idn.localid != IDN_GET_MASTERID());
452 	ASSERT(DSLAB_WRITE_HELD(domid));
453 	ASSERT(idn_domain[domid].dslab_state == DSLAB_STATE_REMOTE);
454 
455 	masterid = IDN_GET_MASTERID();
456 
457 	ASSERT(masterid != IDN_NIL_DOMID);
458 
459 	slab_offset = IDN_ADDR2OFFSET(sp->sl_start);
460 	slab_size   = (int)(sp->sl_end - sp->sl_start);
461 
462 	/*
463 	 * Do a slaballoc_put just in case there may have
464 	 * been waiters for slabs for this domain before
465 	 * returning back to the master.
466 	 */
467 	rv = smr_slaballoc_put(domid, sp, 0, 0);
468 
469 	if ((rv == -1) && (masterid != IDN_NIL_DOMID)) {
470 		/*
471 		 * Put failed.  No waiters so free the local data
472 		 * structure ship the SMR range off to the master.
473 		 */
474 		smr_free_buflist(sp);
475 		FREESTRUCT(sp, smr_slab_t, 1);
476 
477 		IDN_DLOCK_SHARED(masterid);
478 		idn_send_cmd(masterid, IDNCMD_SLABFREE,
479 				slab_offset, slab_size, 0);
480 		IDN_DUNLOCK(masterid);
481 	}
482 }
483 
484 /*
485  * Free up the list of slabs passed
486  */
487 void
488 smr_slab_free(int domid, smr_slab_t *sp)
489 {
490 	smr_slab_t	*nsp = NULL;
491 
492 	ASSERT(DSLAB_WRITE_HELD(domid));
493 
494 	if (sp == NULL)
495 		return;
496 
497 	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
498 
499 	switch (idn_domain[domid].dslab_state) {
500 	case DSLAB_STATE_UNKNOWN:
501 		cmn_err(CE_WARN,
502 			"IDN: 302: no slab free without a master");
503 		break;
504 
505 	case DSLAB_STATE_LOCAL:
506 		/*
507 		 * If I'm the master then put the slabs
508 		 * back to the local SMR pool.
509 		 */
510 		for (; sp; sp = nsp) {
511 			nsp = sp->sl_next;
512 			smr_slab_free_local(domid, sp);
513 		}
514 		break;
515 
516 	case DSLAB_STATE_REMOTE:
517 		/*
518 		 * If the domid is my own then I'm freeing
519 		 * a slab back to the Master.
520 		 */
521 		for (; sp; sp = nsp) {
522 			nsp = sp->sl_next;
523 			smr_slab_free_remote(domid, sp);
524 		}
525 		break;
526 
527 	default:
528 		cmn_err(CE_WARN,
529 			"IDN: 301: (FREE) unknown slab state "
530 			"(%d) for domain %d",
531 			idn_domain[domid].dslab_state, domid);
532 		break;
533 	}
534 }
535 
536 /*
537  * Free up the list of slab data structures ONLY.
538  * This is called during a fatal shutdown of the master
539  * where we need to garbage collect the locally allocated
540  * data structures used to manage slabs allocated to the
541  * local domain.  Should never be called by a master since
542  * the master can do a regular smr_slab_free.
543  */
544 void
545 smr_slab_garbage_collection(smr_slab_t *sp)
546 {
547 	smr_slab_t	*nsp;
548 
549 	ASSERT(idn_domain[idn.localid].dvote.v.master == 0);
550 
551 	if (sp == NULL)
552 		return;
553 	/*
554 	 * Since this is only ever called by a slave,
555 	 * the slab structure size always contains a buflist.
556 	 */
557 	for (; sp; sp = nsp) {
558 		nsp = sp->sl_next;
559 		smr_free_buflist(sp);
560 		FREESTRUCT(sp, smr_slab_t, 1);
561 	}
562 }
563 
564 /*
565  * Allocate a SMR buffer on behalf of the local domain
566  * which is ultimately targeted for the given domain.
567  *
568  * IMPORTANT: This routine is going to drop the domain rwlock (drwlock)
569  *	      for the domain on whose behalf the request is being
570  *	      made.  This routine canNOT block on trying to
571  *	      reacquire the drwlock.  If he does block then somebody
572  *	      must have the write lock on the domain which most likely
573  *	      means the domain is going south anyway, so just bail on
574  *	      this buffer.  Higher levels will retry if needed.
575  *
576  * XXX - Support larger than IDN_SMR_BUFSIZE allocations?
577  *
578  * Returns:	A negative return value indicates lock lost on domid.
579  *		EINVAL, ENOLINK, ENOLCK(internal)
580  *		smr_slaballoc_wait
581  * 		(EINVAL, ETIMEDOUT)
582  *		smr_slabwatier_unregister
583  *		(0, EINVAL, EBUSY, ENOMEM)
584  */
585 int
586 smr_buf_alloc(int domid, uint_t len, caddr_t *bufpp)
587 {
588 	register idn_domain_t	*dp, *ldp;
589 	smr_slab_t	*sp;
590 	caddr_t		bufp = NULL;
591 	int		serrno;
592 	procname_t	proc = "smr_buf_alloc";
593 
594 	dp = &idn_domain[domid];
595 	/*
596 	 * Local domain can only allocate on behalf of
597 	 * itself if this is a priviledged call and the
598 	 * caller is the master.
599 	 */
600 	ASSERT((domid != idn.localid) && (domid != IDN_NIL_DOMID));
601 
602 	*bufpp = NULL;
603 
604 	if (len > IDN_DATA_SIZE) {
605 		cmn_err(CE_WARN,
606 			"IDN: 303: buffer len %d > IDN_DATA_SIZE (%d)",
607 			len, IDN_DATA_SIZE);
608 		IDN_GKSTAT_GLOBAL_EVENT(gk_buffail, gk_buffail_last);
609 		return (EINVAL);
610 	}
611 
612 	/*
613 	 * Need to go to my local slab list to find
614 	 * a buffer.
615 	 */
616 	ldp = &idn_domain[idn.localid];
617 	/*
618 	 * Now we loop trying to locate a buffer out of our
619 	 * slabs.  We continue this until either we find a
620 	 * buffer or we're unable to allocate a slab.  Note
621 	 * that new slabs are allocated to the front.
622 	 */
623 	DSLAB_LOCK_SHARED(idn.localid);
624 	sp = ldp->dslab;
625 	do {
626 		int	spl, all_empty;
627 
628 		if (sp == NULL) {
629 			if ((serrno = smr_slab_alloc(idn.localid, &sp)) != 0) {
630 				PR_SMR("%s:%d: failed to allocate "
631 					"slab [serrno = %d]",
632 					proc, domid, serrno);
633 				DSLAB_UNLOCK(idn.localid);
634 				IDN_GKSTAT_GLOBAL_EVENT(gk_buffail,
635 							gk_buffail_last);
636 				return (serrno);
637 			}
638 			/*
639 			 * Of course, the world may have changed while
640 			 * we dropped the lock.  Better make sure we're
641 			 * still established.
642 			 */
643 			if (dp->dstate != IDNDS_CONNECTED) {
644 				PR_SMR("%s:%d: state changed during slab "
645 					"alloc (dstate = %s)\n",
646 					proc, domid, idnds_str[dp->dstate]);
647 				DSLAB_UNLOCK(idn.localid);
648 				IDN_GKSTAT_GLOBAL_EVENT(gk_buffail,
649 							gk_buffail_last);
650 				return (ENOLINK);
651 			}
652 			/*
653 			 * We were able to allocate a slab.  Should
654 			 * be at the front of the list, spin again.
655 			 */
656 			sp = ldp->dslab;
657 		}
658 		/*
659 		 * If we have reached here then we have a slab!
660 		 * Hopefully there are free bufs there :-o
661 		 */
662 		spl = splhi();
663 		all_empty = 1;
664 		for (; sp && !bufp; sp = sp->sl_next) {
665 			smr_slabbuf_t	*bp;
666 
667 			if (sp->sl_free == NULL)
668 				continue;
669 
670 			if (!lock_try(&sp->sl_lock)) {
671 				all_empty = 0;
672 				continue;
673 			}
674 
675 			if ((bp = sp->sl_free) == NULL) {
676 				lock_clear(&sp->sl_lock);
677 				continue;
678 			}
679 
680 			sp->sl_free = bp->sb_next;
681 			bp->sb_next = sp->sl_inuse;
682 			sp->sl_inuse = bp;
683 			/*
684 			 * Found a free buffer.
685 			 */
686 			bp->sb_domid = domid;
687 			bufp = bp->sb_bufp;
688 			lock_clear(&sp->sl_lock);
689 		}
690 		splx(spl);
691 
692 		if (!all_empty && !bufp) {
693 			/*
694 			 * If we still haven't found a buffer, but
695 			 * there's still possibly a buffer available,
696 			 * then try again.  Only if we're absolutely
697 			 * sure all slabs are empty do we attempt
698 			 * to allocate a new one.
699 			 */
700 			sp = ldp->dslab;
701 		}
702 	} while (bufp == NULL);
703 
704 	*bufpp = bufp;
705 
706 	ATOMIC_INC(dp->dio);
707 
708 	DSLAB_UNLOCK(idn.localid);
709 
710 	return (0);
711 }
712 
713 /*
714  * Free a buffer allocated to the local domain back to
715  * its respective slab.  Slabs are freed via the slab-reap command.
716  * XXX - Support larger than IDN_SMR_BUFSIZE allocations?
717  */
718 int
719 smr_buf_free(int domid, caddr_t bufp, uint_t len)
720 {
721 	register smr_slab_t	*sp;
722 	smr_slabbuf_t		*bp, **bpp;
723 	idn_domain_t		*ldp;
724 	int		buffreed;
725 	int		lockheld = (len == (uint_t)-1);
726 
727 	/*
728 	 * We should never be free'ing a buffer on
729 	 * behalf of ourselves as we are never the
730 	 * target for allocated SMR buffers.
731 	 */
732 	ASSERT(domid != idn.localid);
733 
734 	sp = NULL;
735 	buffreed = 0;
736 	ldp = &idn_domain[idn.localid];
737 
738 	DSLAB_LOCK_SHARED(idn.localid);
739 
740 	if (((uintptr_t)bufp & (IDN_SMR_BUFSIZE-1)) &&
741 	    (IDN_ADDR2OFFSET(bufp) % IDN_SMR_BUFSIZE)) {
742 		cmn_err(CE_WARN,
743 			"IDN: 304: buffer (0x%x) from domain %d not on a "
744 			"%d boundary", (uintptr_t)bufp, domid,
745 			IDN_SMR_BUFSIZE);
746 		goto bfdone;
747 	}
748 	if (!lockheld && (len > IDN_DATA_SIZE)) {
749 		cmn_err(CE_WARN,
750 			"IDN: 305: buffer length (%d) from domain %d greater "
751 			"than IDN_DATA_SIZE (%d)",
752 			len, domid, IDN_DATA_SIZE);
753 		goto bfdone;
754 	}
755 
756 	for (sp = ldp->dslab; sp; sp = sp->sl_next)
757 		if ((bufp >= sp->sl_start) && (bufp < sp->sl_end))
758 			break;
759 
760 	if (sp) {
761 		int spl;
762 
763 		spl = splhi();
764 		while (!lock_try(&sp->sl_lock))
765 			;
766 		bpp = &sp->sl_inuse;
767 		for (bp = *bpp; bp; bp = *bpp) {
768 			if (bp->sb_bufp == bufp)
769 				break;
770 			bpp = &bp->sb_next;
771 		}
772 		if (bp) {
773 			ASSERT(bp->sb_domid == domid);
774 			buffreed++;
775 			bp->sb_domid = IDN_NIL_DOMID;
776 			*bpp = bp->sb_next;
777 			bp->sb_next = sp->sl_free;
778 			sp->sl_free = bp;
779 		}
780 		lock_clear(&sp->sl_lock);
781 		splx(spl);
782 	}
783 bfdone:
784 	if (buffreed) {
785 		ATOMIC_DEC(idn_domain[domid].dio);
786 		DIOCHECK(domid);
787 	} else {
788 		cmn_err(CE_WARN,
789 			"IDN: 306: unknown buffer (0x%p) from domain %d",
790 			bufp, domid);
791 		ATOMIC_INC(idn_domain[domid].dioerr);
792 	}
793 
794 	DSLAB_UNLOCK(idn.localid);
795 
796 	return (sp ? 0 : -1);
797 }
798 
799 /*
800  * Alternative interface to smr_buf_free, but with local drwlock
801  * held.
802  */
803 /* ARGSUSED2 */
804 int
805 smr_buf_free_locked(int domid, caddr_t bufp, uint_t len)
806 {
807 	return (smr_buf_free(domid, bufp, (uint_t)-1));
808 }
809 
810 /*
811  * Free any and all buffers associated with the given domain.
812  * Assumption is that domain is dead and buffers are not in use.
813  * Returns:	Number of buffers freed.
814  *		-1 if error.
815  */
816 int
817 smr_buf_free_all(int domid)
818 {
819 	register smr_slab_t	*sp;
820 	register smr_slabbuf_t	*bp, **bpp;
821 	idn_domain_t		*ldp;
822 	int			nbufsfreed = 0;
823 	procname_t	proc = "smr_buf_free_all";
824 
825 	/*
826 	 * We should never be free'ing buffers on
827 	 * behalf of ourself
828 	 */
829 	ASSERT(domid != idn.localid);
830 
831 	if (!VALID_DOMAINID(domid)) {
832 		cmn_err(CE_WARN,
833 			"IDN: 307: domain ID (%d) invalid", proc, domid);
834 		return (-1);
835 	}
836 
837 	ldp = &idn_domain[idn.localid];
838 
839 	/*
840 	 * We grab the writer lock so that we don't have any
841 	 * competition during a "free-all" call.
842 	 * No need to grab individual slab locks when holding
843 	 * dslab(writer).
844 	 */
845 	DSLAB_LOCK_EXCL(idn.localid);
846 
847 	for (sp = ldp->dslab; sp; sp = sp->sl_next) {
848 		bpp = &sp->sl_inuse;
849 		for (bp = *bpp; bp; bp = *bpp) {
850 			if (bp->sb_domid == domid) {
851 				bp->sb_domid = IDN_NIL_DOMID;
852 				*bpp = bp->sb_next;
853 				bp->sb_next = sp->sl_free;
854 				sp->sl_free = bp;
855 				nbufsfreed++;
856 			} else {
857 				bpp = &bp->sb_next;
858 			}
859 		}
860 	}
861 
862 	if (nbufsfreed > 0) {
863 		ATOMIC_SUB(idn_domain[domid].dio, nbufsfreed);
864 		idn_domain[domid].dioerr = 0;
865 		DIOCHECK(domid);
866 	}
867 
868 	DSLAB_UNLOCK(idn.localid);
869 
870 	PR_SMR("%s: freed %d buffers for domain %d\n",
871 		proc, nbufsfreed, domid);
872 
873 	return (nbufsfreed);
874 }
875 
876 int
877 smr_buf_reclaim(int domid, int nbufs)
878 {
879 	int		num_reclaimed = 0;
880 	idn_domain_t	*ldp, *dp;
881 	procname_t	proc = "smr_buf_reclaim";
882 
883 	ldp = &idn_domain[idn.localid];
884 	dp  = &idn_domain[domid];
885 
886 	ASSERT(domid != idn.localid);
887 
888 	if (ATOMIC_CAS(&dp->dreclaim_inprogress, 0, 1)) {
889 		/*
890 		 * Reclaim is already in progress, don't
891 		 * bother.
892 		 */
893 		PR_DATA("%s: reclaim already in progress\n", proc);
894 		return (0);
895 	}
896 
897 	PR_SMR("%s: requested %d buffers from domain %d\n",
898 		proc, nbufs, domid);
899 
900 	if (dp->dio && nbufs) {
901 		register smr_slab_t	*sp;
902 		int spl;
903 
904 		DSLAB_LOCK_SHARED(idn.localid);
905 		spl = splhi();
906 		for (sp = ldp->dslab; sp && nbufs; sp = sp->sl_next) {
907 			register smr_slabbuf_t	*bp, **bpp;
908 
909 			if (sp->sl_inuse == NULL)
910 				continue;
911 
912 			if (!lock_try(&sp->sl_lock))
913 				continue;
914 
915 			if (sp->sl_inuse == NULL) {
916 				lock_clear(&sp->sl_lock);
917 				continue;
918 			}
919 
920 			bpp = &sp->sl_inuse;
921 			for (bp = *bpp; bp && nbufs; bp = *bpp) {
922 				if (bp->sb_domid == domid) {
923 					/*
924 					 * Buffer no longer in use,
925 					 * reclaim it.
926 					 */
927 					bp->sb_domid = IDN_NIL_DOMID;
928 					*bpp = bp->sb_next;
929 					bp->sb_next = sp->sl_free;
930 					sp->sl_free = bp;
931 					num_reclaimed++;
932 					nbufs--;
933 				} else {
934 					bpp = &bp->sb_next;
935 				}
936 			}
937 			lock_clear(&sp->sl_lock);
938 		}
939 		splx(spl);
940 
941 		if (num_reclaimed > 0) {
942 			ATOMIC_SUB(dp->dio, num_reclaimed);
943 			DIOCHECK(domid);
944 		}
945 		DSLAB_UNLOCK(idn.localid);
946 	}
947 
948 	PR_SMR("%s: reclaimed %d buffers from domain %d\n",
949 		proc, num_reclaimed, domid);
950 
951 	return (num_reclaimed);
952 }
953 
954 /*
955  * Returns 1	If any buffers are locked for the given slab.
956  *	   0	If all buffers are free for the given slab.
957  *
958  * The caller is assumed to have the slab protected so that no
959  * new allocations are attempted from it.  Also, this is only
960  * valid to be called with respect to slabs that were allocated
961  * on behalf of the local domain, i.e. the master is not expected
962  * to call this function with (slave) slab "representatives".
963  */
964 int
965 smr_slab_busy(smr_slab_t *sp)
966 {
967 	return ((sp && sp->sl_inuse) ? 1 : 0);
968 }
969 
970 int
971 smr_slabwaiter_init()
972 {
973 	register int		i;
974 	struct slabwaiter	*wp;
975 
976 	if (idn.slabwaiter != NULL)
977 		return (0);
978 
979 	/*
980 	 * Initialize the slab waiting area for MAX_DOMAINS.
981 	 */
982 	idn.slabwaiter = GETSTRUCT(struct slabwaiter, MAX_DOMAINS);
983 	wp = idn.slabwaiter;
984 	for (i = 0; i < MAX_DOMAINS; wp++, i++) {
985 		wp->w_closed = 0;
986 		mutex_init(&wp->w_mutex, NULL, MUTEX_DEFAULT, NULL);
987 		cv_init(&wp->w_cv, NULL, CV_DEFAULT, NULL);
988 	}
989 
990 	return (0);
991 }
992 
993 void
994 smr_slabwaiter_deinit()
995 {
996 	register int		i;
997 	struct slabwaiter	*wp;
998 
999 	if ((wp = idn.slabwaiter) == NULL)
1000 		return;
1001 
1002 	for (i = 0; i < MAX_DOMAINS; wp++, i++) {
1003 		ASSERT(wp->w_nwaiters == 0);
1004 		ASSERT(wp->w_sp == NULL);
1005 		cv_destroy(&wp->w_cv);
1006 		mutex_destroy(&wp->w_mutex);
1007 	}
1008 
1009 	FREESTRUCT(idn.slabwaiter, struct slabwaiter, MAX_DOMAINS);
1010 	idn.slabwaiter = NULL;
1011 }
1012 
1013 void
1014 smr_slabwaiter_open(domainset_t domset)
1015 {
1016 	int			d;
1017 	struct slabwaiter	*wp;
1018 
1019 	if ((domset == 0) || !idn.slabwaiter)
1020 		return;
1021 
1022 	wp = idn.slabwaiter;
1023 
1024 	for (d = 0; d < MAX_DOMAINS; wp++, d++) {
1025 		if (!DOMAIN_IN_SET(domset, d))
1026 			continue;
1027 		mutex_enter(&wp->w_mutex);
1028 		wp->w_closed = 0;
1029 		mutex_exit(&wp->w_mutex);
1030 	}
1031 }
1032 
1033 void
1034 smr_slabwaiter_close(domainset_t domset)
1035 {
1036 	int			d;
1037 	struct slabwaiter	*wp;
1038 
1039 	if ((domset == 0) || !idn.slabwaiter)
1040 		return;
1041 
1042 	wp = idn.slabwaiter;
1043 
1044 	for (d = 0; d < MAX_DOMAINS; wp++, d++) {
1045 		if (!DOMAIN_IN_SET(domset, d))
1046 			continue;
1047 		mutex_enter(&wp->w_mutex);
1048 		wp->w_closed = 1;
1049 		cv_broadcast(&wp->w_cv);
1050 		mutex_exit(&wp->w_mutex);
1051 	}
1052 }
1053 
1054 /*
1055  * Register the caller with the waiting list for the
1056  * given domain.
1057  *
1058  * Protocol:
1059  *	1st Local requester:	register -> alloc ->
1060  *						put(wakeup|xdc) -> unregister
1061  *	Nth Local requester:	register -> wait
1062  *	1st Remote requester:	register -> xdc -> wait
1063  *	Nth Remote requester:	register -> wait
1064  *
1065  *	Remote Responder:	local alloc -> put(xdc)
1066  *	Local Handler:		xdc -> put(wakeup)
1067  *
1068  * E.g. A standard slave allocation request:
1069  *	slave			master
1070  *	-----			------
1071  *	idn_slab_alloc(remote)
1072  *	- register
1073  *	- xdc		->	idn_handler
1074  *	- wait			...
1075  *				idn_slab_alloc(local)
1076  *				- register
1077  *				- alloc
1078  *				- put
1079  *				  . wakeup [local]
1080  *				- unregister
1081  *	idn_handler    	<-	- xdc
1082  *	- put       		DONE
1083  *	  . wakeup [local]
1084  *	    |
1085  *	    V
1086  *      - wait
1087  *	  . unregister
1088  *	DONE
1089  */
1090 static int
1091 smr_slabwaiter_register(int domid)
1092 {
1093 	struct slabwaiter	*wp;
1094 	int		nwait;
1095 	procname_t	proc = "smr_slabwaiter_register";
1096 
1097 
1098 	ASSERT(domid != IDN_NIL_DOMID);
1099 
1100 	ASSERT(DSLAB_READ_HELD(domid));
1101 
1102 	wp = &idn.slabwaiter[domid];
1103 
1104 	ASSERT(MUTEX_NOT_HELD(&wp->w_mutex));
1105 
1106 	mutex_enter(&wp->w_mutex);
1107 
1108 	nwait = ++(wp->w_nwaiters);
1109 	ASSERT(nwait > 0);
1110 
1111 	PR_SMR("%s: domain = %d, (new)nwaiters = %d\n",
1112 		proc, domid, nwait);
1113 
1114 	if (nwait > 1) {
1115 		/*
1116 		 * There are already waiters for slab allocations
1117 		 * with respect to this domain.
1118 		 */
1119 		PR_SMR("%s: existing waiters for slabs for domain %d\n",
1120 			proc, domid);
1121 		mutex_exit(&wp->w_mutex);
1122 
1123 		return (nwait);
1124 	}
1125 	PR_SMR("%s: initial waiter for slabs for domain %d\n", proc, domid);
1126 	/*
1127 	 * We are the first requester of a slab allocation for this
1128 	 * respective domain.  Need to prep waiting area for
1129 	 * subsequent arrival of a slab.
1130 	 */
1131 	wp->w_sp = NULL;
1132 	wp->w_done = 0;
1133 	wp->w_serrno = 0;
1134 
1135 	mutex_exit(&wp->w_mutex);
1136 
1137 	return (nwait);
1138 }
1139 
1140 /*
1141  * It is assumed that the caller had previously registered,
1142  * but wakeup did not occur due to caller never waiting.
1143  * Thus, slaballoc mutex is still held by caller.
1144  *
1145  * Returns:	0
1146  *		EINVAL
1147  *		EBUSY
1148  *		w_serrno (smr_slaballoc_put)
1149  *		(0, ENOLCK, ENOMEM, EDQUOT, EBUSY, ECANCELED)
1150  */
1151 static int
1152 smr_slabwaiter_unregister(int domid, smr_slab_t **spp)
1153 {
1154 	struct slabwaiter	*wp;
1155 	int		serrno = 0;
1156 	procname_t	proc = "smr_slabwaiter_unregister";
1157 
1158 
1159 	ASSERT(domid != IDN_NIL_DOMID);
1160 
1161 	wp = &idn.slabwaiter[domid];
1162 
1163 	mutex_enter(&wp->w_mutex);
1164 
1165 	PR_SMR("%s: domain = %d, nwaiters = %d\n",
1166 		proc, domid, wp->w_nwaiters);
1167 
1168 	if (wp->w_nwaiters <= 0) {
1169 		/*
1170 		 * Hmmm...nobody is registered!
1171 		 */
1172 		PR_SMR("%s: NO WAITERS (domid = %d)\n", proc, domid);
1173 		mutex_exit(&wp->w_mutex);
1174 		return (EINVAL);
1175 	}
1176 	(wp->w_nwaiters)--;
1177 	/*
1178 	 * Is our present under the tree?
1179 	 */
1180 	if (!wp->w_done) {
1181 		/*
1182 		 * Bummer...no presents.  Let the caller know
1183 		 * via a null slab pointer.
1184 		 * Note that we don't clean up immediately since
1185 		 * message might still come in for other waiters.
1186 		 * Thus, late sleepers may still get a chance.
1187 		 */
1188 		PR_SMR("%s: bummer no slab allocated for domain %d\n",
1189 			proc, domid);
1190 		ASSERT(wp->w_sp == NULL);
1191 		(*spp) = NULL;
1192 		serrno = wp->w_closed ? ECANCELED : EBUSY;
1193 
1194 	} else {
1195 		(*spp) = wp->w_sp;
1196 		serrno = wp->w_serrno;
1197 
1198 #ifdef DEBUG
1199 		if (serrno == 0) {
1200 			register smr_slab_t	*sp;
1201 
1202 			ASSERT(wp->w_sp);
1203 			PR_SMR("%s: allocation succeeded (domain %d)\n",
1204 				proc, domid);
1205 
1206 			DSLAB_LOCK_SHARED(domid);
1207 			for (sp = idn_domain[domid].dslab; sp; sp = sp->sl_next)
1208 				if (sp == wp->w_sp)
1209 					break;
1210 			if (sp == NULL)
1211 				cmn_err(CE_WARN,
1212 					"%s:%d: slab ptr = NULL",
1213 					proc, domid);
1214 			DSLAB_UNLOCK(domid);
1215 		} else {
1216 			PR_SMR("%s: allocation failed (domain %d) "
1217 				"[serrno = %d]\n", proc, domid, serrno);
1218 		}
1219 #endif /* DEBUG */
1220 	}
1221 	if (wp->w_nwaiters == 0) {
1222 		/*
1223 		 * Last one turns out the lights.
1224 		 */
1225 		PR_SMR("%s: domain %d last waiter, turning out lights\n",
1226 			proc, domid);
1227 		wp->w_sp = NULL;
1228 		wp->w_done = 0;
1229 		wp->w_serrno = 0;
1230 	}
1231 	mutex_exit(&wp->w_mutex);
1232 
1233 	return (serrno);
1234 }
1235 
1236 /*
1237  * Called to abort any slaballoc requests on behalf of the
1238  * given domain.
1239  */
1240 int
1241 smr_slabwaiter_abort(int domid, int serrno)
1242 {
1243 	ASSERT(serrno != 0);
1244 
1245 	return (smr_slaballoc_put(domid, NULL, 0, serrno));
1246 }
1247 
1248 /*
1249  * Put ourselves into a timedwait waiting for slab to be
1250  * allocated.
1251  * Returns with slaballoc mutex dropped.
1252  *
1253  * Returns:	EINVAL
1254  *		ETIMEDOUT
1255  *		smr_slabwatier_unregister
1256  *		(0, EINVAL, EBUSY, ENOMEM)
1257  */
1258 static int
1259 smr_slaballoc_wait(int domid, smr_slab_t **spp)
1260 {
1261 	struct slabwaiter	*wp;
1262 	int			serrno = 0, serrno_unreg;
1263 	procname_t		proc = "smr_slaballoc_wait";
1264 
1265 
1266 	wp = &idn.slabwaiter[domid];
1267 
1268 	ASSERT(MUTEX_NOT_HELD(&wp->w_mutex));
1269 
1270 	mutex_enter(&wp->w_mutex);
1271 
1272 	PR_SMR("%s: domain = %d, nwaiters = %d, wsp = 0x%x\n",
1273 		proc, domid, wp->w_nwaiters, (uint_t)wp->w_sp);
1274 
1275 	if (wp->w_nwaiters <= 0) {
1276 		/*
1277 		 * Hmmm...no waiters registered.
1278 		 */
1279 		PR_SMR("%s: domain %d, no waiters!\n",
1280 			proc, domid);
1281 		mutex_exit(&wp->w_mutex);
1282 		return (EINVAL);
1283 	}
1284 	ASSERT(DSLAB_READ_HELD(domid));
1285 	DSLAB_UNLOCK(domid);
1286 
1287 	if (!wp->w_done && !wp->w_closed) {
1288 		int	rv;
1289 
1290 		/*
1291 		 * Only wait if data hasn't arrived yet.
1292 		 */
1293 		PR_SMR("%s: domain %d, going to sleep...\n",
1294 			proc, domid);
1295 
1296 
1297 		rv = cv_timedwait_sig(&wp->w_cv, &wp->w_mutex,
1298 				lbolt + IDN_SLABALLOC_WAITTIME);
1299 		if (rv == -1)
1300 			serrno = ETIMEDOUT;
1301 
1302 		PR_SMR("%s: domain %d, awakened (reason = %s)\n",
1303 			proc, domid, (rv == -1) ? "TIMEOUT" : "SIGNALED");
1304 	}
1305 	/*
1306 	 * We've awakened or request already filled!
1307 	 * Unregister ourselves.
1308 	 */
1309 	mutex_exit(&wp->w_mutex);
1310 
1311 	/*
1312 	 * Any gifts will be entered into spp.
1313 	 */
1314 	serrno_unreg = smr_slabwaiter_unregister(domid, spp);
1315 
1316 	/*
1317 	 * Leave with reader lock on dslab_lock.
1318 	 */
1319 	DSLAB_LOCK_SHARED(domid);
1320 
1321 	if ((serrno_unreg == EBUSY) && (serrno == ETIMEDOUT))
1322 		return (serrno);
1323 	else
1324 		return (serrno_unreg);
1325 }
1326 
1327 /*
1328  * A SMR slab was allocated on behalf of the given domain.
1329  * Wakeup anybody that may have been waiting for the allocation.
1330  * Note that if the domain is a remote one, i.e. master is allocating
1331  * on behalf of a slave, it's up to the caller to transmit the
1332  * allocation response to that domain.
1333  * The force flag indicates that we want to install the slab for
1334  * the given user regardless of whether there are waiters or not.
1335  * This is used primarily in situations where a slave may have timed
1336  * out before the response actually arrived.  In this situation we
1337  * don't want to send slab back to the master after we went through
1338  * the trouble of allocating one.  Master is _not_ allowed to do this
1339  * for remote domains.
1340  *
1341  * Returns:	-1	Non-registered waiter or waiting area garbaged.
1342  *		0	Successfully performed operation.
1343  */
1344 int
1345 smr_slaballoc_put(int domid, smr_slab_t *sp, int forceflag, int serrno)
1346 {
1347 	idn_domain_t		*dp;
1348 	struct slabwaiter	*wp;
1349 	procname_t		proc = "smr_slaballoc_put";
1350 
1351 
1352 	dp = &idn_domain[domid];
1353 
1354 	ASSERT(!serrno ? DSLAB_WRITE_HELD(domid) : 1);
1355 
1356 	if (domid == IDN_NIL_DOMID)
1357 		return (-1);
1358 
1359 	ASSERT(serrno ? (sp == NULL) : (sp != NULL));
1360 
1361 	wp = &idn.slabwaiter[domid];
1362 
1363 	mutex_enter(&wp->w_mutex);
1364 
1365 	PR_SMR("%s: domain = %d, bufp = 0x%x, ebufp = 0x%x, "
1366 		"(f = %d, se = %d)\n",
1367 		proc, domid, (uint_t)(sp ? sp->sl_start : 0),
1368 		(uint_t)(sp ? sp->sl_end : 0),
1369 		forceflag, serrno);
1370 
1371 	if (wp->w_nwaiters <= 0) {
1372 		/*
1373 		 * There are no waiters!!  Must have timed out
1374 		 * and left.  Oh well...
1375 		 */
1376 		PR_SMR("%s: no slaballoc waiters found for domain %d\n",
1377 			proc, domid);
1378 		if (!forceflag || serrno || !sp) {
1379 			/*
1380 			 * No waiters and caller doesn't want to force it.
1381 			 */
1382 			mutex_exit(&wp->w_mutex);
1383 			return (-1);
1384 		}
1385 		PR_SMR("%s: forcing slab onto domain %d\n", proc, domid);
1386 		ASSERT(domid == idn.localid);
1387 		ASSERT(wp->w_sp == NULL);
1388 		wp->w_done = 0;
1389 		/*
1390 		 * Now we fall through and let it be added in the
1391 		 * regular manor.
1392 		 */
1393 	}
1394 	if (wp->w_done) {
1395 		/*
1396 		 * There's at least one waiter so there has
1397 		 * to be a slab structure waiting for us.
1398 		 * If everything is going smoothly, there should only
1399 		 * be one guy coming through the path of inserting
1400 		 * an error or good slab.  However, if a disconnect was
1401 		 * detected, you may get several guys coming through
1402 		 * trying to let everybody know.
1403 		 */
1404 		ASSERT(wp->w_serrno ?
1405 			(wp->w_sp == NULL) : (wp->w_sp != NULL));
1406 
1407 		cv_broadcast(&wp->w_cv);
1408 		mutex_exit(&wp->w_mutex);
1409 
1410 		return (-1);
1411 	}
1412 	if (serrno != 0) {
1413 		/*
1414 		 * Bummer...allocation failed.  This call is simply
1415 		 * to wake up the sleepers and let them know.
1416 		 */
1417 		PR_SMR("%s: slaballoc failed for domain %d\n",
1418 			proc, domid);
1419 		wp->w_serrno = serrno;
1420 		wp->w_done = 1;
1421 		cv_broadcast(&wp->w_cv);
1422 		mutex_exit(&wp->w_mutex);
1423 
1424 		return (0);
1425 	}
1426 	PR_SMR("%s: putting slab into struct (domid=%d, localid=%d)\n",
1427 		proc, domid, idn.localid);
1428 	/*
1429 	 * Prep the slab structure.
1430 	 */
1431 
1432 	if (domid == idn.localid) {
1433 		/*
1434 		 * Allocation was indeed for me.
1435 		 * Slab may or may not be locked when
1436 		 * we reach.  Normally they will be locked
1437 		 * if we're being called on behalf of a
1438 		 * free, and not locked if on behalf of
1439 		 * a new allocation request.
1440 		 */
1441 		lock_clear(&sp->sl_lock);
1442 		smr_alloc_buflist(sp);
1443 #ifdef DEBUG
1444 	} else {
1445 		uint_t	rv;
1446 		/*
1447 		 * Slab was not allocated on my behalf.  Must be
1448 		 * a master request on behalf of some other domain.
1449 		 * Prep appropriately.  Slab should have been locked
1450 		 * by smr_slab_reserve.
1451 		 */
1452 		rv = lock_try(&sp->sl_lock);
1453 		ASSERT(!rv);
1454 		ASSERT(sp->sl_domid == (short)domid);
1455 #endif /* DEBUG */
1456 	}
1457 
1458 	/*
1459 	 * Slab is ready to go.  Insert it into the domain's
1460 	 * slab list so once we wake everybody up they'll find it.
1461 	 * You better have write lock if you're putting treasures
1462 	 * there.
1463 	 */
1464 	ASSERT(DSLAB_WRITE_HELD(domid));
1465 
1466 	sp->sl_next = dp->dslab;
1467 	dp->dslab  = sp;
1468 	dp->dnslabs++;
1469 
1470 	/*
1471 	 * It's possible to fall through here without waiters.
1472 	 * This is a case where forceflag was set.
1473 	 */
1474 	if (wp->w_nwaiters > 0) {
1475 		wp->w_sp = sp;
1476 		wp->w_serrno = serrno;
1477 		wp->w_done = 1;
1478 		cv_broadcast(&wp->w_cv);
1479 	} else {
1480 		ASSERT(forceflag);
1481 		wp->w_sp = NULL;
1482 		wp->w_serrno = 0;
1483 		wp->w_done = 0;
1484 	}
1485 	mutex_exit(&wp->w_mutex);
1486 
1487 	return (0);
1488 }
1489 
1490 /*
1491  * Get the slab representing [bufp,ebufp] from the respective
1492  * domain's pool if all the buffers are free.  Remove them from
1493  * the domain's list and return it.
1494  * If bufp == NULL, then return however many free ones you
1495  * can find.
1496  * List of slabs are returned locked (sl_lock).
1497  * XXX - Need minimum limit to make sure we don't free up _all_
1498  *	 of our slabs!  However, during a shutdown we will need
1499  *	 method to free them all up regardless of locking.
1500  */
1501 smr_slab_t *
1502 smr_slaballoc_get(int domid, caddr_t bufp, caddr_t ebufp)
1503 {
1504 	idn_domain_t	*dp;
1505 	smr_slab_t	*retsp, *sp, **psp;
1506 	int		foundit, islocal = 0;
1507 	int		nslabs;
1508 	procname_t	proc = "smr_slaballoc_get";
1509 
1510 	PR_SMR("%s: getting slab for domain %d [bufp=0x%x, ebufp=0x%x]\n",
1511 		proc, domid, (uintptr_t)bufp, (uint_t)ebufp);
1512 
1513 	dp = &idn_domain[domid];
1514 
1515 	ASSERT(DSLAB_WRITE_HELD(domid));
1516 
1517 	if ((sp = dp->dslab) == NULL) {
1518 		PR_SMR("%s: oops, no slabs for domain %d\n",
1519 			proc, domid);
1520 		return (NULL);
1521 	}
1522 	/*
1523 	 * If domid is myself then I'm trying to get a slab out
1524 	 * of my local pool.  Otherwise, I'm the master and
1525 	 * I'm trying to get the slab representative from the
1526 	 * global pool.
1527 	 */
1528 	if (domid == idn.localid)
1529 		islocal = 1;
1530 
1531 	if (bufp != NULL) {
1532 		nslabs = -1;
1533 	} else {
1534 		nslabs = *(int *)ebufp;
1535 		if (nslabs == 0) {
1536 			PR_SMR("%s: requested nslabs (%d) <= 0\n",
1537 				proc, nslabs);
1538 			return (NULL);
1539 		} else if (nslabs < 0) {
1540 			/*
1541 			 * Caller wants them all!
1542 			 */
1543 			nslabs = (int)dp->dnslabs;
1544 		}
1545 	}
1546 
1547 	retsp = NULL;
1548 	foundit = 0;
1549 	for (psp = &dp->dslab; sp; sp = *psp) {
1550 		int	isbusy;
1551 
1552 		if (bufp && (sp->sl_start != bufp)) {
1553 			psp = &sp->sl_next;
1554 			continue;
1555 		}
1556 
1557 		if (bufp && (ebufp > sp->sl_end)) {
1558 			PR_SMR("%s: bufp/ebufp (0x%x/0x%x) "
1559 				"expected (0x%x/0x%x)\n",
1560 				proc, (uintptr_t)bufp, (uintptr_t)ebufp,
1561 				(uintptr_t)sp->sl_start, (uintptr_t)sp->sl_end);
1562 			ASSERT(0);
1563 		}
1564 		/*
1565 		 * We found the desired slab.  Make sure
1566 		 * it's free.
1567 		 */
1568 		foundit++;
1569 		isbusy = 0;
1570 		if (islocal) {
1571 			int spl;
1572 
1573 			/*
1574 			 * Some of the buffers in the slab
1575 			 * are still in use.  Unlock the
1576 			 * buffers we locked and bail out.
1577 			 */
1578 			spl = splhi();
1579 			if (!lock_try(&sp->sl_lock)) {
1580 				isbusy = 1;
1581 				foundit--;
1582 			} else if (sp->sl_inuse) {
1583 				lock_clear(&sp->sl_lock);
1584 				isbusy = 1;
1585 				foundit--;
1586 			}
1587 			splx(spl);
1588 		} else {
1589 			/*
1590 			 * If not local, then I'm the master getting
1591 			 * a slab from one of the slaves.  In this case,
1592 			 * their slab structs will always be locked.
1593 			 */
1594 			ASSERT(!lock_try(&sp->sl_lock));
1595 		}
1596 		if (!isbusy) {
1597 			/*
1598 			 * Delete the entry from the list and slap
1599 			 * it onto our return list.
1600 			 */
1601 			*psp = sp->sl_next;
1602 			sp->sl_next = retsp;
1603 			retsp = sp;
1604 		} else {
1605 			psp = &sp->sl_next;
1606 		}
1607 		/*
1608 		 * If bufp == NULL (alternate interface) and we haven't
1609 		 * found the desired number of slabs yet, keep looking.
1610 		 */
1611 		if (bufp || (foundit == nslabs))
1612 			break;
1613 	}
1614 	dp->dnslabs -= (short)foundit;
1615 
1616 	if (foundit) {
1617 		PR_SMR("%s: found %d free slabs (domid = %d)\n",
1618 			proc, foundit, domid);
1619 	} else {
1620 		PR_SMR("%s: no free slabs found (domid = %d)\n",
1621 			proc, domid);
1622 	}
1623 
1624 	/*
1625 	 * If this is the alternate interface, need to return
1626 	 * the number of slabs found in the ebufp parameter.
1627 	 */
1628 	if (bufp == NULL)
1629 		*(int *)ebufp = foundit;
1630 
1631 	return (retsp);
1632 }
1633 
1634 /*
1635  * Wrapper to hide alternate interface to smr_slaballoc_get()
1636  */
1637 smr_slab_t *
1638 smr_slaballoc_get_n(int domid, int *nslabs)
1639 {
1640 	smr_slab_t	*sp;
1641 
1642 	ASSERT(DSLAB_WRITE_HELD(domid));
1643 
1644 	sp = smr_slaballoc_get(domid, NULL, (caddr_t)nslabs);
1645 
1646 	return (sp);
1647 }
1648 
1649 /*
1650  * Only called by master.  Initialize slab pool based on local SMR.
1651  * Returns number of slabs initialized.
1652  * reserved_size = Length of area at the front of the NWR portion
1653  *		   of the SMR to reserve and not make available for
1654  *		   slab allocations.  Must be a IDN_SMR_BUFSIZE multiple.
1655  * reserved_area = Pointer to reserved area, if any.
1656  */
1657 int
1658 smr_slabpool_init(size_t reserved_size, caddr_t *reserved_area)
1659 {
1660 	size_t			nwr_available;
1661 	int			minperpool, ntotslabs, nxslabs, nslabs;
1662 	register int		p, pp;
1663 	register caddr_t	bufp;
1664 	register smr_slab_t	*sp;
1665 
1666 	ASSERT(IDN_GLOCK_IS_EXCL());
1667 	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
1668 
1669 	*reserved_area = NULL;
1670 
1671 	nwr_available = MB2B(IDN_NWR_SIZE) - reserved_size;
1672 
1673 	if ((idn.localid != IDN_GET_MASTERID()) ||
1674 	    (nwr_available < IDN_SLAB_SIZE) ||
1675 	    (idn.slabpool != NULL) ||
1676 	    ((reserved_size != 0) && (reserved_size & (IDN_SMR_BUFSIZE-1)))) {
1677 		return (-1);
1678 	}
1679 
1680 	idn.slabpool = GETSTRUCT(struct slabpool, 1);
1681 	idn.slabpool->ntotslabs = ntotslabs = nwr_available / IDN_SLAB_SIZE;
1682 	ASSERT(ntotslabs > 0);
1683 	minperpool = (ntotslabs < IDN_SLAB_MINPERPOOL) ?
1684 						1 : IDN_SLAB_MINPERPOOL;
1685 	idn.slabpool->npools = (ntotslabs + (minperpool - 1)) / minperpool;
1686 
1687 	if ((idn.slabpool->npools & 1) == 0) {
1688 		/*
1689 		 * npools needs to be odd for hashing algorithm.
1690 		 */
1691 		idn.slabpool->npools++;
1692 	}
1693 	ASSERT(idn.slabpool->npools > 0);
1694 	minperpool = (ntotslabs < idn.slabpool->npools) ?
1695 				1 : (ntotslabs / idn.slabpool->npools);
1696 
1697 	/*
1698 	 * Calculate the number of extra slabs that will need to
1699 	 * be alloted to the pools.  This number will be less than
1700 	 * npools.  Only one extra slab is allocated to each pool
1701 	 * until we have assigned all the extra slabs.
1702 	 */
1703 	if (ntotslabs > (idn.slabpool->npools * minperpool))
1704 		nxslabs = ntotslabs - (idn.slabpool->npools * minperpool);
1705 	else
1706 		nxslabs = 0;
1707 	ASSERT((nxslabs >= 0) && (nxslabs < idn.slabpool->npools));
1708 
1709 	idn.slabpool->pool = GETSTRUCT(struct smr_slabtbl,
1710 					idn.slabpool->npools);
1711 	sp = GETSTRUCT(smr_slab_t, idn.slabpool->ntotslabs);
1712 
1713 	idn.slabpool->savep = sp;
1714 	bufp = idn.smr.vaddr + reserved_size;
1715 
1716 	for (p = nslabs = 0;
1717 	    (p < idn.slabpool->npools) && (ntotslabs > 0);
1718 	    p++, ntotslabs -= nslabs) {
1719 
1720 		nslabs = (ntotslabs < minperpool) ? ntotslabs : minperpool;
1721 		if (nxslabs > 0) {
1722 			nslabs++;
1723 			nxslabs--;
1724 		}
1725 		idn.slabpool->pool[p].sarray = sp;
1726 		for (pp = 0; pp < nslabs; pp++) {
1727 
1728 			sp->sl_next  = NULL;
1729 			sp->sl_start = bufp;
1730 			sp->sl_end   = bufp = sp->sl_start + IDN_SLAB_SIZE;
1731 			sp->sl_lock  = 0;
1732 			sp->sl_domid = (short)IDN_NIL_DOMID;
1733 
1734 			sp++;
1735 		}
1736 		idn.slabpool->pool[p].nfree   = nslabs;
1737 		idn.slabpool->pool[p].nslabs  = nslabs;
1738 	}
1739 	ASSERT((ntotslabs == 0) && (nxslabs == 0));
1740 	/*
1741 	 * We should be at the end of the SMR at this point.
1742 	 */
1743 	ASSERT(bufp == (idn.smr.vaddr
1744 			+ reserved_size
1745 			+ (idn.slabpool->ntotslabs * IDN_SLAB_SIZE)));
1746 
1747 	if (reserved_size != 0)
1748 		*reserved_area = idn.smr.vaddr;
1749 
1750 	return (0);
1751 }
1752 
1753 void
1754 smr_slabpool_deinit()
1755 {
1756 	if (idn.slabpool == NULL)
1757 		return;
1758 
1759 	FREESTRUCT(idn.slabpool->savep, smr_slab_t, idn.slabpool->ntotslabs);
1760 	FREESTRUCT(idn.slabpool->pool, struct smr_slabtbl,
1761 			idn.slabpool->npools);
1762 	FREESTRUCT(idn.slabpool, struct slabpool, 1);
1763 
1764 	idn.slabpool = NULL;
1765 }
1766 
1767 void
1768 smr_alloc_buflist(smr_slab_t *sp)
1769 {
1770 	int		n, nbufs;
1771 	caddr_t		sbufp;
1772 	smr_slabbuf_t	*hp, *bp;
1773 
1774 	if (sp->sl_head)
1775 		return;
1776 
1777 	nbufs = (sp->sl_end - sp->sl_start) / IDN_SMR_BUFSIZE;
1778 	ASSERT(nbufs > 0);
1779 	if (nbufs <= 0) {
1780 		sp->sl_head = sp->sl_free = sp->sl_inuse = NULL;
1781 		return;
1782 	}
1783 
1784 	hp = GETSTRUCT(smr_slabbuf_t, nbufs);
1785 
1786 	sbufp = sp->sl_start;
1787 	for (n = 0, bp = hp; n < nbufs; bp++, n++) {
1788 		bp->sb_bufp = sbufp;
1789 		bp->sb_domid = IDN_NIL_DOMID;
1790 		bp->sb_next = bp + 1;
1791 		sbufp += IDN_SMR_BUFSIZE;
1792 	}
1793 	(--bp)->sb_next = NULL;
1794 
1795 	sp->sl_head = sp->sl_free = hp;
1796 	sp->sl_inuse = NULL;
1797 }
1798 
1799 void
1800 smr_free_buflist(smr_slab_t *sp)
1801 {
1802 	int	nbufs;
1803 
1804 	if (sp->sl_head == NULL)
1805 		return;
1806 
1807 	nbufs = (sp->sl_end - sp->sl_start) / IDN_SMR_BUFSIZE;
1808 
1809 	FREESTRUCT(sp->sl_head, smr_slabbuf_t, nbufs);
1810 
1811 	sp->sl_head = sp->sl_free = sp->sl_inuse = NULL;
1812 }
1813 
1814 /*
1815  * Returns:	0 Successfully located a slab.
1816  *	       -1 Failure.
1817  */
1818 static smr_slab_t *
1819 smr_slab_reserve(int domid)
1820 {
1821 	register int		p, nextp, s, nexts;
1822 	register smr_slab_t	*spa;
1823 	int			startp, starts;
1824 	int			foundone = 0;
1825 	int			spl;
1826 	procname_t		proc = "smr_slab_reserve";
1827 
1828 	p = startp = SMR_SLABPOOL_HASH(domid);
1829 	nextp = -1;
1830 
1831 	spl = splhi();
1832 	while ((nextp != startp) && !foundone) {
1833 
1834 		s = starts = SMR_SLAB_HASH(p, domid);
1835 		nexts = -1;
1836 		spa = &(idn.slabpool->pool[p].sarray[0]);
1837 
1838 		while ((nexts != starts) && !foundone) {
1839 			if (lock_try(&spa[s].sl_lock)) {
1840 				foundone = 1;
1841 				break;
1842 			}
1843 			nexts = SMR_SLAB_HASHSTEP(p, s);
1844 			s = nexts;
1845 		}
1846 		if (foundone)
1847 			break;
1848 		nextp = SMR_SLABPOOL_HASHSTEP(p);
1849 		p = nextp;
1850 	}
1851 	splx(spl);
1852 
1853 	if (foundone) {
1854 		ASSERT((&spa[s] >= idn.slabpool->savep) &&
1855 			(&spa[s] < (idn.slabpool->savep +
1856 					idn.slabpool->ntotslabs)));
1857 
1858 		spa[s].sl_domid = (short)domid;
1859 
1860 		ATOMIC_DEC(idn.slabpool->pool[p].nfree);
1861 
1862 		if (domid == idn.localid) {
1863 			smr_slab_t	*nsp;
1864 			/*
1865 			 * Caller is actually reserving a slab for
1866 			 * themself which means they'll need the full
1867 			 * slab structure to represent all of the I/O
1868 			 * buffers.  The "spa" is just a representative
1869 			 * and doesn't contain the space to manage the
1870 			 * individual buffers.  Need to alloc a full-size
1871 			 * struct.
1872 			 * Note that this results in the returning
1873 			 * smr_slab_t structure being unlocked.
1874 			 */
1875 			ASSERT(idn.localid == IDN_GET_MASTERID());
1876 			nsp = GETSTRUCT(smr_slab_t, 1);
1877 			nsp->sl_start = spa[s].sl_start;
1878 			nsp->sl_end   = spa[s].sl_end;
1879 			smr_alloc_buflist(nsp);
1880 			spa = nsp;
1881 			PR_SMR("%s: allocated full slab struct for domain %d\n",
1882 				proc, domid);
1883 		} else {
1884 			/*
1885 			 * Slab structure gets returned locked.
1886 			 */
1887 			spa += s;
1888 		}
1889 
1890 		PR_SMR("%s: allocated slab 0x%x (start=0x%x, size=%d) for "
1891 			"domain %d\n", proc, (uint_t)spa, (uint_t)spa->sl_start,
1892 			spa->sl_end - spa->sl_start, domid);
1893 	} else {
1894 		PR_SMR("%s: FAILED to allocate for domain %d\n",
1895 			proc, domid);
1896 		spa = NULL;
1897 	}
1898 
1899 	return (spa);
1900 }
1901 
1902 static void
1903 smr_slab_unreserve(int domid, smr_slab_t *sp)
1904 {
1905 	register int		p, nextp, s, nexts;
1906 	register smr_slab_t	*spa;
1907 	int			foundit = 0;
1908 	int			startp, starts;
1909 	caddr_t			bufp;
1910 	procname_t		proc = "smr_slab_unreserve";
1911 
1912 	bufp = sp->sl_start;
1913 	p = startp = SMR_SLABPOOL_HASH(domid);
1914 	nextp = -1;
1915 
1916 	while ((nextp != startp) && !foundit) {
1917 
1918 		s = starts = SMR_SLAB_HASH(p, domid);
1919 		nexts = -1;
1920 		spa = &(idn.slabpool->pool[p].sarray[0]);
1921 
1922 		while ((nexts != starts) && !foundit) {
1923 			if (spa[s].sl_start == bufp) {
1924 				foundit = 1;
1925 				break;
1926 			}
1927 			nexts = SMR_SLAB_HASHSTEP(p, s);
1928 			s = nexts;
1929 		}
1930 		if (foundit)
1931 			break;
1932 		nextp = SMR_SLABPOOL_HASHSTEP(p);
1933 		p = nextp;
1934 	}
1935 	if (foundit) {
1936 		ASSERT((&spa[s] >= idn.slabpool->savep) &&
1937 			(&spa[s] < (idn.slabpool->savep +
1938 					idn.slabpool->ntotslabs)));
1939 		ASSERT(!lock_try(&spa[s].sl_lock));
1940 		ASSERT(spa[s].sl_domid == (short)domid);
1941 
1942 		spa[s].sl_next = NULL;
1943 		spa[s].sl_domid = (short)IDN_NIL_DOMID;
1944 		lock_clear(&spa[s].sl_lock);
1945 
1946 		ATOMIC_INC(idn.slabpool->pool[p].nfree);
1947 
1948 		PR_SMR("%s: freed (bufp=0x%x) for domain %d\n",
1949 			proc, (uintptr_t)bufp, domid);
1950 
1951 		if (domid == idn.localid) {
1952 			/*
1953 			 * Caller is actually unreserving a slab of their
1954 			 * own.  Note that only the master calls this
1955 			 * routine.  Since the master's local slab
1956 			 * structures do not get entered into the global
1957 			 * "representative" pool, we need to free up the
1958 			 * data structure that was passed in.
1959 			 */
1960 			ASSERT(idn.localid == IDN_GET_MASTERID());
1961 			ASSERT(sp != &spa[s]);
1962 
1963 			smr_free_buflist(sp);
1964 			FREESTRUCT(sp, smr_slab_t, 1);
1965 		} else {
1966 			ASSERT(sp == &spa[s]);
1967 		}
1968 	} else {
1969 		/*
1970 		 * Couldn't find slab entry for given buf!
1971 		 */
1972 		PR_SMR("%s: FAILED to free (bufp=0x%x) for domain %d\n",
1973 			proc, (uintptr_t)bufp, domid);
1974 	}
1975 }
1976 
1977 /*
1978  * The Reap Protocol:
1979  *	master				   slave
1980  *	------				   -----
1981  *	smr_slab_reap_global
1982  *	- idn_broadcast_cmd(SLABREAP) ->   idn_recv_cmd(SLABREAP)
1983  *	  . idn_local_cmd(SLABREAP)        - idn_recv_slabreap_req
1984  *	    - smr_slab_reap	             . smr_slab_reap
1985  *	      . smr_slaballoc_get_n            - smr_slaballoc_get_n
1986  *	      . smr_slab_free		       - smr_slab_free
1987  *		- smr_slab_free_local		 . smr_slab_free_remote
1988  *		  . smr_slab_unreserve
1989  *				      <-	   - idn_send_cmd(SLABFREE)
1990  *	idn_recv_cmd(SLABFREE)
1991  *	- idn_recv_slabfree_req
1992  *	  . smr_slaballoc_get
1993  *	  . smr_slab_free
1994  *	    - smr_slab_free_local
1995  *	      . smr_slab_unreserve
1996  *        . idn_send_slabfree_resp    ->   idn_recv_cmd(SLABFREE | ack)
1997  *					   - idn_recv_slabfree_resp
1998  *
1999  *	idn_recv_cmd(SLABREAP | ack)  <-     . idn_send_slabreap_resp
2000  *	- idn_recv_slabreap_resp	   DONE
2001  *	DONE
2002  *
2003  * Check available slabs and if we're below the threshold, kick
2004  * off reaping to all remote domains.  There is no guarantee remote
2005  * domains will be able to free up any.
2006  */
2007 static void
2008 smr_slab_reap_global()
2009 {
2010 	register int	p, npools;
2011 	register int	total_free = 0;
2012 	register struct smr_slabtbl	*tblp;
2013 	static clock_t	reap_last = 0;
2014 	procname_t	proc = "smr_slab_reap_global";
2015 
2016 	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
2017 
2018 	DSLAB_LOCK_SHARED(idn.localid);
2019 	if (idn_domain[idn.localid].dslab_state != DSLAB_STATE_LOCAL) {
2020 		PR_SMR("%s: only allowed by master (%d)\n",
2021 			proc, IDN_GET_MASTERID());
2022 		DSLAB_UNLOCK(idn.localid);
2023 		return;
2024 	}
2025 	DSLAB_UNLOCK(idn.localid);
2026 
2027 	if ((lbolt > 0) && (lbolt > reap_last) &&
2028 			((lbolt - reap_last) < IDN_REAP_INTERVAL))
2029 		return;
2030 
2031 	reap_last = lbolt;
2032 
2033 	ASSERT(idn.slabpool);
2034 
2035 	npools = idn.slabpool->npools;
2036 	tblp   = idn.slabpool->pool;
2037 
2038 	for (p = 0; p < npools; tblp++, p++)
2039 		total_free += tblp->nfree;
2040 
2041 	if (total_free <= IDN_SLAB_THRESHOLD) {
2042 		int	diff, reap_per_domain;
2043 
2044 		PR_SMR("%s: kicking off reaping "
2045 			"(total_free = %d, min = %ld)\n",
2046 			proc, total_free, IDN_SLAB_THRESHOLD);
2047 
2048 		diff = IDN_SLAB_THRESHOLD - total_free;
2049 		reap_per_domain = (diff < idn.ndomains)
2050 					? 1 : (diff / idn.ndomains);
2051 
2052 		idn_broadcast_cmd(IDNCMD_SLABREAP, reap_per_domain, 0, 0);
2053 	}
2054 }
2055 
2056 void
2057 smr_slab_reap(int domid, int *nslabs)
2058 {
2059 	register int	d;
2060 	int		nreclaimed;
2061 	smr_slab_t	*sp;
2062 	domainset_t	reapset;
2063 	procname_t	proc = "smr_slab_reap";
2064 
2065 	/*
2066 	 * Should only be called on behalf of local
2067 	 * domain.
2068 	 */
2069 	if (domid != idn.localid) {
2070 		PR_SMR("%s: called by domain %d, should only be local (%d)\n",
2071 			proc, domid, idn.localid);
2072 		ASSERT(0);
2073 		return;
2074 	}
2075 	/*
2076 	 * Try and reclaim some buffers so we can possibly
2077 	 * free up some slabs.
2078 	 */
2079 	reapset = idn.domset.ds_connected;
2080 
2081 	IDN_GKSTAT_GLOBAL_EVENT(gk_reaps, gk_reap_last);
2082 
2083 	nreclaimed = 0;
2084 	for (d = 0; d < MAX_DOMAINS; d++) {
2085 		int		nr;
2086 		idn_domain_t	*dp;
2087 
2088 		if (!DOMAIN_IN_SET(reapset, d))
2089 			continue;
2090 
2091 		IDN_DLOCK_SHARED(d);
2092 
2093 		dp = &idn_domain[d];
2094 		if ((d == idn.localid) || (dp->dcpu < 0)) {
2095 			IDN_DUNLOCK(d);
2096 			continue;
2097 		}
2098 		/*
2099 		 * Clean up any dead I/O errors if possible.
2100 		 */
2101 		if (dp->dioerr > 0) {
2102 			idn_domain_t	*ldp;
2103 			register int	cnt;
2104 			register smr_slabbuf_t	*bp;
2105 			/*
2106 			 * We need to grab the writer lock to prevent
2107 			 * anybody from allocating buffers while we
2108 			 * traverse the slabs outstanding.
2109 			 */
2110 			cnt = 0;
2111 			ldp = &idn_domain[idn.localid];
2112 			IDN_DLOCK_EXCL(idn.localid);
2113 			DSLAB_LOCK_EXCL(idn.localid);
2114 			for (sp = ldp->dslab; sp; sp = sp->sl_next)
2115 				for (bp = sp->sl_inuse; bp; bp = bp->sb_next)
2116 					if (bp->sb_domid == d)
2117 						cnt++;
2118 			DSLAB_UNLOCK(idn.localid);
2119 			ASSERT((dp->dio + dp->dioerr) >= cnt);
2120 			dp->dio = cnt;
2121 			dp->dioerr = 0;
2122 			IDN_DUNLOCK(idn.localid);
2123 		}
2124 		if ((dp->dstate == IDNDS_CONNECTED) &&
2125 				((nr = idn_reclaim_mboxdata(d, 0, -1)) > 0))
2126 			nreclaimed += nr;
2127 
2128 		IDN_DUNLOCK(d);
2129 	}
2130 
2131 	DSLAB_LOCK_EXCL(domid);
2132 	sp = smr_slaballoc_get_n(domid, nslabs);
2133 	if (sp) {
2134 		IDN_GKSTAT_ADD(gk_reap_count, (ulong_t)(*nslabs));
2135 		smr_slab_free(domid, sp);
2136 	}
2137 	DSLAB_UNLOCK(domid);
2138 }
2139 
2140 /*
2141  * ---------------------------------------------------------------------
2142  * Remap the (IDN) shared memory region to a new physical address.
2143  * Caller is expected to have performed a ecache flush if needed.
2144  * ---------------------------------------------------------------------
2145  */
2146 void
2147 smr_remap(struct as *as, register caddr_t vaddr,
2148 		register pfn_t new_pfn, uint_t mblen)
2149 {
2150 	tte_t		tte;
2151 	size_t		blen;
2152 	pgcnt_t		p, npgs;
2153 	procname_t	proc = "smr_remap";
2154 
2155 	if (va_to_pfn(vaddr) == new_pfn) {
2156 		PR_REMAP("%s: vaddr (0x%x) already mapped to pfn (0x%x)\n",
2157 			proc, (uint_t)vaddr, new_pfn);
2158 		return;
2159 	}
2160 
2161 	blen = MB2B(mblen);
2162 	npgs = btopr(blen);
2163 	ASSERT(npgs != 0);
2164 
2165 	PR_REMAP("%s: va = 0x%x, pfn = 0x%x, npgs = %ld, mb = %d MB (%ld)\n",
2166 		proc, (uint_t)vaddr, new_pfn, npgs, mblen, blen);
2167 
2168 	/*
2169 	 * Unmap the SMR virtual address from it's current
2170 	 * mapping.
2171 	 */
2172 	hat_unload(as->a_hat, vaddr, blen, HAT_UNLOAD_UNLOCK);
2173 
2174 	if (new_pfn == PFN_INVALID)
2175 		return;
2176 
2177 	/*
2178 	 * Map the SMR to the new physical address space,
2179 	 * presumably a remote pfn.  Cannot use hat_devload
2180 	 * because it will think pfn represents non-memory,
2181 	 * i.e. space since it may beyond his physmax.
2182 	 */
2183 	for (p = 0; p < npgs; p++) {
2184 		sfmmu_memtte(&tte, new_pfn,
2185 				PROT_READ | PROT_WRITE | HAT_NOSYNC,
2186 				TTE8K);
2187 		sfmmu_tteload(as->a_hat, &tte, vaddr, NULL, HAT_LOAD_LOCK);
2188 
2189 		vaddr += MMU_PAGESIZE;
2190 		new_pfn++;
2191 	}
2192 
2193 	PR_REMAP("%s: remapped %ld pages (expected %ld)\n",
2194 		proc, npgs, btopr(MB2B(mblen)));
2195 }
2196