xref: /titanic_44/usr/src/uts/sun4u/starfire/io/idn_smr.c (revision 13faa91230bde46da937bf33010b9accc5bdeb59)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Inter-Domain Network
27  *
28  * Shared Memory Region (SMR) supporting code.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/machparam.h>
36 #include <sys/debug.h>
37 #include <sys/cpuvar.h>
38 #include <sys/kmem.h>
39 #include <sys/mutex.h>
40 #include <sys/rwlock.h>
41 #include <sys/systm.h>
42 #include <sys/machlock.h>
43 #include <sys/membar.h>
44 #include <sys/mman.h>
45 #include <vm/hat.h>
46 #include <vm/as.h>
47 #include <vm/hat_sfmmu.h>
48 #include <sys/vm_machparam.h>
49 #include <sys/x_call.h>
50 
51 #include <sys/idn.h>
52 
53 #ifdef DEBUG
54 #define	DIOCHECK(domid) \
55 { \
56 	int	_dio; \
57 	if ((_dio = idn_domain[domid].dio) < 0) { \
58 		cmn_err(CE_WARN, \
59 			">>>>> file %s, line %d: domain %d, dio = %d", \
60 			__FILE__, __LINE__, (domid), _dio); \
61 	} \
62 }
63 #else
64 #define	DIOCHECK(domid)
65 #endif /* DEBUG */
66 
67 static int	smr_slab_alloc_local(int domid, smr_slab_t **spp);
68 static int	smr_slab_alloc_remote(int domid, smr_slab_t **spp);
69 static void	smr_slab_free_local(int domid, smr_slab_t *sp);
70 static void	smr_slab_free_remote(int domid, smr_slab_t *sp);
71 static int 	smr_slabwaiter_register(int domid);
72 static int 	smr_slabwaiter_unregister(int domid, smr_slab_t **spp);
73 static int 	smr_slaballoc_wait(int domid, smr_slab_t **spp);
74 static smr_slab_t 	*smr_slab_reserve(int domid);
75 static void 	smr_slab_unreserve(int domid, smr_slab_t *sp);
76 static void	smr_slab_reap_global();
77 
78 /*
79  * Can only be called by the master.  Allocate a slab from the
80  * local pool representing the SMR, on behalf of the given
81  * domain.  Slab is either being requested for use by the
82  * local domain (i.e. domid == idn.localid), or it's being
83  * allocated to give to a remote domain which requested one.
84  * In the base of allocating on behalf of a remote domain,
85  * smr_slab_t structure is used simply to manage ownership.
86  *
87  * Returns:	smr_slaballoc_wait
88  * 		(EINVAL, ETIMEDOUT)
89  *		smr_slabwatier_unregister
90  *		(0, EINVAL, EBUSY, ENOMEM)
91  *		ENOLCK
92  */
93 static int
94 smr_slab_alloc_local(int domid, smr_slab_t **spp)
95 {
96 	int		serrno = 0;
97 	int		nwait;
98 	smr_slab_t	*sp;
99 	idn_domain_t	*dp;
100 
101 
102 	/*
103 	 * Only the master can make local allocations.
104 	 */
105 	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
106 	ASSERT(idn.localid == IDN_GET_MASTERID());
107 
108 	*spp = NULL;
109 
110 	dp = &idn_domain[domid];
111 	ASSERT(DSLAB_READ_HELD(domid));
112 	ASSERT(dp->dslab_state == DSLAB_STATE_LOCAL);
113 
114 	/*
115 	 * Register myself with the waiting list.
116 	 */
117 	nwait = smr_slabwaiter_register(domid);
118 
119 	if (nwait > 1) {
120 		/*
121 		 * XXX - old comment?
122 		 * Need to drop the read lock _after_ registering
123 		 * ourselves with the potential wait list for this allocation.
124 		 * Although this allocation is not a remote one, we could
125 		 * still have multiple threads on the master trying to
126 		 * satisfy (allocate) request on behalf of a remote domain.
127 		 */
128 		/*
129 		 * Somebody is already in the process of satisfying
130 		 * the allocation request for this respective
131 		 * domain.  All we need to do is wait and let
132 		 * it happen.
133 		 */
134 		serrno = smr_slaballoc_wait(domid, spp);
135 		return (serrno);
136 	}
137 	/*
138 	 * I'm the original slab requester for this domain.  It's local
139 	 * so go ahead and do the job.
140 	 */
141 
142 	if ((sp = smr_slab_reserve(domid)) == NULL)
143 		serrno = ENOMEM;
144 
145 	/*
146 	 * Allocation may have failed.  In either case we've
147 	 * got to do the put to at least wake potential waiters up.
148 	 */
149 	if (!serrno) {
150 		if (DSLAB_LOCK_TRYUPGRADE(domid) == 0) {
151 			DSLAB_UNLOCK(domid);
152 			DSLAB_LOCK_EXCL(domid);
153 		}
154 	}
155 
156 	(void) smr_slaballoc_put(domid, sp, 0, serrno);
157 
158 	/*
159 	 * If serrno is ENOLCK here, then we must have failed
160 	 * on the upgrade above, so lock already dropped.
161 	 */
162 	if (serrno != ENOLCK) {
163 		/*
164 		 * Need to drop since reaping may be recursive?
165 		 */
166 		DSLAB_UNLOCK(domid);
167 	}
168 
169 	/*
170 	 * Since we were the original requester but never went
171 	 * to sleep, we need to directly unregister ourselves
172 	 * from the waiting list.
173 	 */
174 	serrno = smr_slabwaiter_unregister(domid, spp);
175 
176 	/*
177 	 * Now that we've satisfied the request, let's check if any
178 	 * reaping is necessary.  Only the master does this and only
179 	 * when allocating slabs, an infrequent event :-o
180 	 */
181 	smr_slab_reap_global();
182 
183 	ASSERT((serrno == 0) ? (*spp != NULL) : (*spp == NULL));
184 
185 	DSLAB_LOCK_SHARED(domid);
186 
187 	return (serrno);
188 }
189 
190 /*
191  * Can only be called by a slave on behalf of himself.  Need to
192  * make a request to the master to allocate a slab of SMR buffers
193  * for the local domain.
194  *
195  * Returns:	smr_slaballoc_wait
196  *		(0, EINVAL, EBUSY, ENOMEM)
197  *		ENOLCK
198  *		ECANCELED
199  */
200 static int
201 smr_slab_alloc_remote(int domid, smr_slab_t **spp)
202 {
203 	int		nwait;
204 	int		serrno = 0;
205 	int		bailout = 0;
206 	int		masterid;
207 	idn_domain_t	*dp, *mdp = NULL;
208 	procname_t	proc = "smr_slab_alloc_remote";
209 
210 	/*
211 	 * Only slaves make remote allocations.
212 	 */
213 	ASSERT(idn.localid != IDN_GET_MASTERID());
214 	ASSERT(domid == idn.localid);
215 	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
216 
217 	*spp = NULL;
218 
219 	dp = &idn_domain[domid];
220 	ASSERT(DSLAB_READ_HELD(domid));
221 	ASSERT(dp->dslab_state == DSLAB_STATE_REMOTE);
222 
223 	/*
224 	 * Register myself with the slaballoc waiting list.
225 	 * Note that only allow one outstanding allocation
226 	 * request for the given domain.  Other callers which
227 	 * detect a slab is needed simply get stuck on the
228 	 * waiting list waiting for the original caller to
229 	 * get the job done.
230 	 * The waiter_register routine will allocate the necessary
231 	 * slab structure which will ultimately be inserted in
232 	 * the domain's slab list via smr_slaballoc_put().
233 	 */
234 	nwait = smr_slabwaiter_register(domid);
235 
236 	/*
237 	 * Make sure we have a connection with the master
238 	 * before we wait around for nothing and send a
239 	 * command off to nowhere.
240 	 * First do a quick (no lock) check for global okayness.
241 	 */
242 	if ((idn.state != IDNGS_ONLINE) ||
243 			((masterid = IDN_GET_MASTERID()) == IDN_NIL_DOMID)) {
244 		bailout = 1;
245 		serrno = ECANCELED;
246 	}
247 	/*
248 	 * We need to drop our read lock _before_ acquiring the
249 	 * slaballoc waiter lock.  This is necessary because the
250 	 * thread that receives the slab alloc response and fills
251 	 * in the slab structure will need to grab the domain write
252 	 * lock while holding onto the slaballoc waiter lock.
253 	 * Potentially could deadlock if we didn't drop our domain
254 	 * lock before.  Plus, we've registered.
255 	 *
256 	 * 4093209 - Note also that we do this _after_ the check for
257 	 *	idn.masterid where we grab the READER global
258 	 *	lock.  This is to prevent somebody from
259 	 *	changing our state after we drop the drwlock.
260 	 *	A deadlock can occur when shutting down a
261 	 *	domain we're holding the
262 	 */
263 
264 	if (!bailout) {
265 		mdp = &idn_domain[masterid];
266 		/*
267 		 * Global state is okay.  Let's double check the
268 		 * state of our actual target domain.
269 		 */
270 		if (mdp->dstate != IDNDS_CONNECTED) {
271 			bailout = 1;
272 			serrno = ECANCELED;
273 		} else if (IDN_DLOCK_TRY_SHARED(masterid)) {
274 			if (mdp->dstate != IDNDS_CONNECTED) {
275 				bailout = 1;
276 				serrno = ECANCELED;
277 				IDN_DUNLOCK(masterid);
278 			} else if (nwait != 1) {
279 				IDN_DUNLOCK(masterid);
280 			}
281 			/*
282 			 * Note that keep the drwlock(read) for
283 			 * the target (master) domain if it appears
284 			 * we're the lucky one to send the command.
285 			 * We hold onto the lock until we've actually
286 			 * sent the command out.
287 			 * We don't reach this place unless it
288 			 * appears everything is kosher with
289 			 * the target (master) domain.
290 			 */
291 		} else {
292 			bailout = 1;
293 			serrno = ENOLCK;
294 		}
295 	}
296 
297 	if (bailout) {
298 		ASSERT(serrno);
299 		/*
300 		 * Gotta bail.  Abort operation.  Error result
301 		 * will be picked up when we attempt to wait.
302 		 */
303 		PR_SMR("%s: BAILING OUT on behalf domain %d "
304 			"(err=%d, gs=%s, ms=%s)\n",
305 			proc, domid, serrno, idngs_str[idn.state],
306 			(masterid == IDN_NIL_DOMID)
307 			? "unknown" : idnds_str[idn_domain[masterid].dstate]);
308 		(void) smr_slabwaiter_abort(domid, serrno);
309 
310 	} else if (nwait == 1) {
311 		/*
312 		 * We are the original requester.  Initiate the
313 		 * actual request to the master.
314 		 */
315 		idn_send_cmd(masterid, IDNCMD_SLABALLOC,
316 				IDN_SLAB_SIZE, 0, 0);
317 		ASSERT(mdp);
318 		IDN_DUNLOCK(masterid);
319 	}
320 
321 	/*
322 	 * Wait here for response.  Once awakened func returns
323 	 * with slab structure possibly filled with gifts!
324 	 */
325 	serrno = smr_slaballoc_wait(domid, spp);
326 
327 	return (serrno);
328 }
329 
330 /*
331  * Allocate a slab from the Master on behalf
332  * of the given domain.  Note that master uses
333  * this function to allocate slabs on behalf of
334  * remote domains also.
335  * Entered with drwlock held.
336  * Leaves with drwlock dropped.
337  * Returns:	EDQUOT
338  *		EINVAL
339  *		ENOLCK
340  *		smr_slab_alloc_local
341  *		smr_slab_alloc_remote
342  *		(0, EINVAL, EBUSY, ENOMEM)
343  */
344 int
345 smr_slab_alloc(int domid, smr_slab_t **spp)
346 {
347 	int		serrno = 0;
348 	idn_domain_t	*dp;
349 	procname_t	proc = "smr_slab_alloc";
350 
351 
352 	dp = &idn_domain[domid];
353 
354 	ASSERT(DSLAB_READ_HELD(domid));
355 	ASSERT(dp->dslab_state != DSLAB_STATE_UNKNOWN);
356 
357 	*spp = NULL;
358 
359 	switch (dp->dslab_state) {
360 	case DSLAB_STATE_UNKNOWN:
361 		cmn_err(CE_WARN,
362 			"IDN: 300: no slab allocations without a master");
363 		serrno = EINVAL;
364 		break;
365 
366 	case DSLAB_STATE_LOCAL:
367 		/*
368 		 * If I'm the master, then get a slab
369 		 * from the local SMR pool, but only
370 		 * if the number of allocated slabs has
371 		 * not been exceeded.
372 		 */
373 		if (((int)dp->dnslabs < IDN_SLAB_MAXPERDOMAIN) ||
374 						!IDN_SLAB_MAXPERDOMAIN)
375 			serrno = smr_slab_alloc_local(domid, spp);
376 		else
377 			serrno = EDQUOT;
378 		break;
379 
380 	case DSLAB_STATE_REMOTE:
381 		/*
382 		 * Have to make a remote request.
383 		 * In order to prevent overwhelming the master
384 		 * with a bunch of requests that he won't be able
385 		 * to handle we do a check to see if we're still
386 		 * under quota.  Note that the limit is known
387 		 * apriori based on the SMR/NWR size and
388 		 * IDN_SLAB_MINTOTAL.  Domains must have the same
389 		 * size SMR/NWR, however they can have different
390 		 * IDN_SLAB_MINTOTAL.  Thus a domain could throttle
391 		 * itself however it wishes.
392 		 */
393 		if (((int)dp->dnslabs < IDN_SLAB_MAXPERDOMAIN) ||
394 						!IDN_SLAB_MAXPERDOMAIN)
395 			serrno = smr_slab_alloc_remote(domid, spp);
396 		else
397 			serrno = EDQUOT;
398 		break;
399 
400 	default:
401 		cmn_err(CE_WARN,
402 			"IDN: 301: (ALLOC) unknown slab state (%d) "
403 			"for domain %d", dp->dslab_state, domid);
404 		serrno = EINVAL;
405 		break;
406 	}
407 
408 	if (*spp == NULL) {
409 		PR_SMR("%s: failed to allocate %s slab [serrno = %d]\n",
410 			proc, (idn.localid == IDN_GET_MASTERID()) ?
411 			"local" : "remote", serrno);
412 	}
413 
414 	if (serrno) {
415 		IDN_GKSTAT_GLOBAL_EVENT(gk_slabfail, gk_slabfail_last);
416 	}
417 
418 	return (serrno);
419 }
420 
421 static void
422 smr_slab_free_local(int domid, smr_slab_t *sp)
423 {
424 	int	rv;
425 
426 	/*
427 	 * Do a slaballoc_put just in case there may have
428 	 * been waiters for slabs for this respective domain
429 	 * before we unreserve this slab.
430 	 */
431 	rv = smr_slaballoc_put(domid, sp, 0, 0);
432 
433 	if (rv == -1) {
434 		/*
435 		 * Put failed.  Must not have been any waiters.
436 		 * Go ahead and unreserve the space.
437 		 */
438 		smr_slab_unreserve(domid, sp);
439 	}
440 }
441 
442 static void
443 smr_slab_free_remote(int domid, smr_slab_t *sp)
444 {
445 	smr_offset_t	slab_offset;
446 	int		slab_size;
447 	int		rv;
448 	int		masterid;
449 
450 	ASSERT(domid == idn.localid);
451 	ASSERT(idn.localid != IDN_GET_MASTERID());
452 	ASSERT(DSLAB_WRITE_HELD(domid));
453 	ASSERT(idn_domain[domid].dslab_state == DSLAB_STATE_REMOTE);
454 
455 	masterid = IDN_GET_MASTERID();
456 
457 	ASSERT(masterid != IDN_NIL_DOMID);
458 
459 	slab_offset = IDN_ADDR2OFFSET(sp->sl_start);
460 	slab_size   = (int)(sp->sl_end - sp->sl_start);
461 
462 	/*
463 	 * Do a slaballoc_put just in case there may have
464 	 * been waiters for slabs for this domain before
465 	 * returning back to the master.
466 	 */
467 	rv = smr_slaballoc_put(domid, sp, 0, 0);
468 
469 	if ((rv == -1) && (masterid != IDN_NIL_DOMID)) {
470 		/*
471 		 * Put failed.  No waiters so free the local data
472 		 * structure ship the SMR range off to the master.
473 		 */
474 		smr_free_buflist(sp);
475 		FREESTRUCT(sp, smr_slab_t, 1);
476 
477 		IDN_DLOCK_SHARED(masterid);
478 		idn_send_cmd(masterid, IDNCMD_SLABFREE,
479 				slab_offset, slab_size, 0);
480 		IDN_DUNLOCK(masterid);
481 	}
482 }
483 
484 /*
485  * Free up the list of slabs passed
486  */
487 void
488 smr_slab_free(int domid, smr_slab_t *sp)
489 {
490 	smr_slab_t	*nsp = NULL;
491 
492 	ASSERT(DSLAB_WRITE_HELD(domid));
493 
494 	if (sp == NULL)
495 		return;
496 
497 	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
498 
499 	switch (idn_domain[domid].dslab_state) {
500 	case DSLAB_STATE_UNKNOWN:
501 		cmn_err(CE_WARN,
502 			"IDN: 302: no slab free without a master");
503 		break;
504 
505 	case DSLAB_STATE_LOCAL:
506 		/*
507 		 * If I'm the master then put the slabs
508 		 * back to the local SMR pool.
509 		 */
510 		for (; sp; sp = nsp) {
511 			nsp = sp->sl_next;
512 			smr_slab_free_local(domid, sp);
513 		}
514 		break;
515 
516 	case DSLAB_STATE_REMOTE:
517 		/*
518 		 * If the domid is my own then I'm freeing
519 		 * a slab back to the Master.
520 		 */
521 		for (; sp; sp = nsp) {
522 			nsp = sp->sl_next;
523 			smr_slab_free_remote(domid, sp);
524 		}
525 		break;
526 
527 	default:
528 		cmn_err(CE_WARN,
529 			"IDN: 301: (FREE) unknown slab state "
530 			"(%d) for domain %d",
531 			idn_domain[domid].dslab_state, domid);
532 		break;
533 	}
534 }
535 
536 /*
537  * Free up the list of slab data structures ONLY.
538  * This is called during a fatal shutdown of the master
539  * where we need to garbage collect the locally allocated
540  * data structures used to manage slabs allocated to the
541  * local domain.  Should never be called by a master since
542  * the master can do a regular smr_slab_free.
543  */
544 void
545 smr_slab_garbage_collection(smr_slab_t *sp)
546 {
547 	smr_slab_t	*nsp;
548 
549 	ASSERT(idn_domain[idn.localid].dvote.v.master == 0);
550 
551 	if (sp == NULL)
552 		return;
553 	/*
554 	 * Since this is only ever called by a slave,
555 	 * the slab structure size always contains a buflist.
556 	 */
557 	for (; sp; sp = nsp) {
558 		nsp = sp->sl_next;
559 		smr_free_buflist(sp);
560 		FREESTRUCT(sp, smr_slab_t, 1);
561 	}
562 }
563 
564 /*
565  * Allocate a SMR buffer on behalf of the local domain
566  * which is ultimately targeted for the given domain.
567  *
568  * IMPORTANT: This routine is going to drop the domain rwlock (drwlock)
569  *	      for the domain on whose behalf the request is being
570  *	      made.  This routine canNOT block on trying to
571  *	      reacquire the drwlock.  If he does block then somebody
572  *	      must have the write lock on the domain which most likely
573  *	      means the domain is going south anyway, so just bail on
574  *	      this buffer.  Higher levels will retry if needed.
575  *
576  * XXX - Support larger than IDN_SMR_BUFSIZE allocations?
577  *
578  * Returns:	A negative return value indicates lock lost on domid.
579  *		EINVAL, ENOLINK, ENOLCK(internal)
580  *		smr_slaballoc_wait
581  * 		(EINVAL, ETIMEDOUT)
582  *		smr_slabwatier_unregister
583  *		(0, EINVAL, EBUSY, ENOMEM)
584  */
585 int
586 smr_buf_alloc(int domid, uint_t len, caddr_t *bufpp)
587 {
588 	register idn_domain_t	*dp, *ldp;
589 	smr_slab_t	*sp;
590 	caddr_t		bufp = NULL;
591 	int		serrno;
592 	procname_t	proc = "smr_buf_alloc";
593 
594 	dp = &idn_domain[domid];
595 	/*
596 	 * Local domain can only allocate on behalf of
597 	 * itself if this is a priviledged call and the
598 	 * caller is the master.
599 	 */
600 	ASSERT((domid != idn.localid) && (domid != IDN_NIL_DOMID));
601 
602 	*bufpp = NULL;
603 
604 	if (len > IDN_DATA_SIZE) {
605 		cmn_err(CE_WARN,
606 			"IDN: 303: buffer len %d > IDN_DATA_SIZE (%lu)",
607 			len, IDN_DATA_SIZE);
608 		IDN_GKSTAT_GLOBAL_EVENT(gk_buffail, gk_buffail_last);
609 		return (EINVAL);
610 	}
611 
612 	/*
613 	 * Need to go to my local slab list to find
614 	 * a buffer.
615 	 */
616 	ldp = &idn_domain[idn.localid];
617 	/*
618 	 * Now we loop trying to locate a buffer out of our
619 	 * slabs.  We continue this until either we find a
620 	 * buffer or we're unable to allocate a slab.  Note
621 	 * that new slabs are allocated to the front.
622 	 */
623 	DSLAB_LOCK_SHARED(idn.localid);
624 	sp = ldp->dslab;
625 	do {
626 		int	spl, all_empty;
627 
628 		if (sp == NULL) {
629 			if ((serrno = smr_slab_alloc(idn.localid, &sp)) != 0) {
630 				PR_SMR("%s:%d: failed to allocate "
631 					"slab [serrno = %d]",
632 					proc, domid, serrno);
633 				DSLAB_UNLOCK(idn.localid);
634 				IDN_GKSTAT_GLOBAL_EVENT(gk_buffail,
635 							gk_buffail_last);
636 				return (serrno);
637 			}
638 			/*
639 			 * Of course, the world may have changed while
640 			 * we dropped the lock.  Better make sure we're
641 			 * still established.
642 			 */
643 			if (dp->dstate != IDNDS_CONNECTED) {
644 				PR_SMR("%s:%d: state changed during slab "
645 					"alloc (dstate = %s)\n",
646 					proc, domid, idnds_str[dp->dstate]);
647 				DSLAB_UNLOCK(idn.localid);
648 				IDN_GKSTAT_GLOBAL_EVENT(gk_buffail,
649 							gk_buffail_last);
650 				return (ENOLINK);
651 			}
652 			/*
653 			 * We were able to allocate a slab.  Should
654 			 * be at the front of the list, spin again.
655 			 */
656 			sp = ldp->dslab;
657 		}
658 		/*
659 		 * If we have reached here then we have a slab!
660 		 * Hopefully there are free bufs there :-o
661 		 */
662 		spl = splhi();
663 		all_empty = 1;
664 		for (; sp && !bufp; sp = sp->sl_next) {
665 			smr_slabbuf_t	*bp;
666 
667 			if (sp->sl_free == NULL)
668 				continue;
669 
670 			if (!lock_try(&sp->sl_lock)) {
671 				all_empty = 0;
672 				continue;
673 			}
674 
675 			if ((bp = sp->sl_free) == NULL) {
676 				lock_clear(&sp->sl_lock);
677 				continue;
678 			}
679 
680 			sp->sl_free = bp->sb_next;
681 			bp->sb_next = sp->sl_inuse;
682 			sp->sl_inuse = bp;
683 			/*
684 			 * Found a free buffer.
685 			 */
686 			bp->sb_domid = domid;
687 			bufp = bp->sb_bufp;
688 			lock_clear(&sp->sl_lock);
689 		}
690 		splx(spl);
691 
692 		if (!all_empty && !bufp) {
693 			/*
694 			 * If we still haven't found a buffer, but
695 			 * there's still possibly a buffer available,
696 			 * then try again.  Only if we're absolutely
697 			 * sure all slabs are empty do we attempt
698 			 * to allocate a new one.
699 			 */
700 			sp = ldp->dslab;
701 		}
702 	} while (bufp == NULL);
703 
704 	*bufpp = bufp;
705 
706 	ATOMIC_INC(dp->dio);
707 
708 	DSLAB_UNLOCK(idn.localid);
709 
710 	return (0);
711 }
712 
713 /*
714  * Free a buffer allocated to the local domain back to
715  * its respective slab.  Slabs are freed via the slab-reap command.
716  * XXX - Support larger than IDN_SMR_BUFSIZE allocations?
717  */
718 int
719 smr_buf_free(int domid, caddr_t bufp, uint_t len)
720 {
721 	register smr_slab_t	*sp;
722 	smr_slabbuf_t		*bp, **bpp;
723 	idn_domain_t		*ldp;
724 	int		buffreed;
725 	int		lockheld = (len == (uint_t)-1);
726 
727 	/*
728 	 * We should never be free'ing a buffer on
729 	 * behalf of ourselves as we are never the
730 	 * target for allocated SMR buffers.
731 	 */
732 	ASSERT(domid != idn.localid);
733 
734 	sp = NULL;
735 	buffreed = 0;
736 	ldp = &idn_domain[idn.localid];
737 
738 	DSLAB_LOCK_SHARED(idn.localid);
739 
740 	if (((uintptr_t)bufp & (IDN_SMR_BUFSIZE-1)) &&
741 	    (IDN_ADDR2OFFSET(bufp) % IDN_SMR_BUFSIZE)) {
742 		cmn_err(CE_WARN,
743 			"IDN: 304: buffer (0x%p) from domain %d not on a "
744 			"%d boundary", bufp, domid, IDN_SMR_BUFSIZE);
745 		goto bfdone;
746 	}
747 	if (!lockheld && (len > IDN_DATA_SIZE)) {
748 		cmn_err(CE_WARN,
749 			"IDN: 305: buffer length (%d) from domain %d greater "
750 			"than IDN_DATA_SIZE (%lu)",
751 			len, domid, IDN_DATA_SIZE);
752 		goto bfdone;
753 	}
754 
755 	for (sp = ldp->dslab; sp; sp = sp->sl_next)
756 		if ((bufp >= sp->sl_start) && (bufp < sp->sl_end))
757 			break;
758 
759 	if (sp) {
760 		int spl;
761 
762 		spl = splhi();
763 		while (!lock_try(&sp->sl_lock))
764 			;
765 		bpp = &sp->sl_inuse;
766 		for (bp = *bpp; bp; bp = *bpp) {
767 			if (bp->sb_bufp == bufp)
768 				break;
769 			bpp = &bp->sb_next;
770 		}
771 		if (bp) {
772 			ASSERT(bp->sb_domid == domid);
773 			buffreed++;
774 			bp->sb_domid = IDN_NIL_DOMID;
775 			*bpp = bp->sb_next;
776 			bp->sb_next = sp->sl_free;
777 			sp->sl_free = bp;
778 		}
779 		lock_clear(&sp->sl_lock);
780 		splx(spl);
781 	}
782 bfdone:
783 	if (buffreed) {
784 		ATOMIC_DEC(idn_domain[domid].dio);
785 		DIOCHECK(domid);
786 	} else {
787 		cmn_err(CE_WARN,
788 			"IDN: 306: unknown buffer (0x%p) from domain %d",
789 			bufp, domid);
790 		ATOMIC_INC(idn_domain[domid].dioerr);
791 	}
792 
793 	DSLAB_UNLOCK(idn.localid);
794 
795 	return (sp ? 0 : -1);
796 }
797 
798 /*
799  * Alternative interface to smr_buf_free, but with local drwlock
800  * held.
801  */
802 /* ARGSUSED2 */
803 int
804 smr_buf_free_locked(int domid, caddr_t bufp, uint_t len)
805 {
806 	return (smr_buf_free(domid, bufp, (uint_t)-1));
807 }
808 
809 /*
810  * Free any and all buffers associated with the given domain.
811  * Assumption is that domain is dead and buffers are not in use.
812  * Returns:	Number of buffers freed.
813  *		-1 if error.
814  */
815 int
816 smr_buf_free_all(int domid)
817 {
818 	register smr_slab_t	*sp;
819 	register smr_slabbuf_t	*bp, **bpp;
820 	idn_domain_t		*ldp;
821 	int			nbufsfreed = 0;
822 	procname_t	proc = "smr_buf_free_all";
823 
824 	/*
825 	 * We should never be free'ing buffers on
826 	 * behalf of ourself
827 	 */
828 	ASSERT(domid != idn.localid);
829 
830 	if (!VALID_DOMAINID(domid)) {
831 		cmn_err(CE_WARN,
832 			"IDN: 307: domain ID (%d) invalid", domid);
833 		return (-1);
834 	}
835 
836 	ldp = &idn_domain[idn.localid];
837 
838 	/*
839 	 * We grab the writer lock so that we don't have any
840 	 * competition during a "free-all" call.
841 	 * No need to grab individual slab locks when holding
842 	 * dslab(writer).
843 	 */
844 	DSLAB_LOCK_EXCL(idn.localid);
845 
846 	for (sp = ldp->dslab; sp; sp = sp->sl_next) {
847 		bpp = &sp->sl_inuse;
848 		for (bp = *bpp; bp; bp = *bpp) {
849 			if (bp->sb_domid == domid) {
850 				bp->sb_domid = IDN_NIL_DOMID;
851 				*bpp = bp->sb_next;
852 				bp->sb_next = sp->sl_free;
853 				sp->sl_free = bp;
854 				nbufsfreed++;
855 			} else {
856 				bpp = &bp->sb_next;
857 			}
858 		}
859 	}
860 
861 	if (nbufsfreed > 0) {
862 		ATOMIC_SUB(idn_domain[domid].dio, nbufsfreed);
863 		idn_domain[domid].dioerr = 0;
864 		DIOCHECK(domid);
865 	}
866 
867 	DSLAB_UNLOCK(idn.localid);
868 
869 	PR_SMR("%s: freed %d buffers for domain %d\n",
870 		proc, nbufsfreed, domid);
871 
872 	return (nbufsfreed);
873 }
874 
875 int
876 smr_buf_reclaim(int domid, int nbufs)
877 {
878 	int		num_reclaimed = 0;
879 	idn_domain_t	*ldp, *dp;
880 	procname_t	proc = "smr_buf_reclaim";
881 
882 	ldp = &idn_domain[idn.localid];
883 	dp  = &idn_domain[domid];
884 
885 	ASSERT(domid != idn.localid);
886 
887 	if (ATOMIC_CAS(&dp->dreclaim_inprogress, 0, 1)) {
888 		/*
889 		 * Reclaim is already in progress, don't
890 		 * bother.
891 		 */
892 		PR_DATA("%s: reclaim already in progress\n", proc);
893 		return (0);
894 	}
895 
896 	PR_SMR("%s: requested %d buffers from domain %d\n",
897 		proc, nbufs, domid);
898 
899 	if (dp->dio && nbufs) {
900 		register smr_slab_t	*sp;
901 		int spl;
902 
903 		DSLAB_LOCK_SHARED(idn.localid);
904 		spl = splhi();
905 		for (sp = ldp->dslab; sp && nbufs; sp = sp->sl_next) {
906 			register smr_slabbuf_t	*bp, **bpp;
907 
908 			if (sp->sl_inuse == NULL)
909 				continue;
910 
911 			if (!lock_try(&sp->sl_lock))
912 				continue;
913 
914 			if (sp->sl_inuse == NULL) {
915 				lock_clear(&sp->sl_lock);
916 				continue;
917 			}
918 
919 			bpp = &sp->sl_inuse;
920 			for (bp = *bpp; bp && nbufs; bp = *bpp) {
921 				if (bp->sb_domid == domid) {
922 					/*
923 					 * Buffer no longer in use,
924 					 * reclaim it.
925 					 */
926 					bp->sb_domid = IDN_NIL_DOMID;
927 					*bpp = bp->sb_next;
928 					bp->sb_next = sp->sl_free;
929 					sp->sl_free = bp;
930 					num_reclaimed++;
931 					nbufs--;
932 				} else {
933 					bpp = &bp->sb_next;
934 				}
935 			}
936 			lock_clear(&sp->sl_lock);
937 		}
938 		splx(spl);
939 
940 		if (num_reclaimed > 0) {
941 			ATOMIC_SUB(dp->dio, num_reclaimed);
942 			DIOCHECK(domid);
943 		}
944 		DSLAB_UNLOCK(idn.localid);
945 	}
946 
947 	PR_SMR("%s: reclaimed %d buffers from domain %d\n",
948 		proc, num_reclaimed, domid);
949 
950 	return (num_reclaimed);
951 }
952 
953 /*
954  * Returns 1	If any buffers are locked for the given slab.
955  *	   0	If all buffers are free for the given slab.
956  *
957  * The caller is assumed to have the slab protected so that no
958  * new allocations are attempted from it.  Also, this is only
959  * valid to be called with respect to slabs that were allocated
960  * on behalf of the local domain, i.e. the master is not expected
961  * to call this function with (slave) slab "representatives".
962  */
963 int
964 smr_slab_busy(smr_slab_t *sp)
965 {
966 	return ((sp && sp->sl_inuse) ? 1 : 0);
967 }
968 
969 int
970 smr_slabwaiter_init()
971 {
972 	register int		i;
973 	struct slabwaiter	*wp;
974 
975 	if (idn.slabwaiter != NULL)
976 		return (0);
977 
978 	/*
979 	 * Initialize the slab waiting area for MAX_DOMAINS.
980 	 */
981 	idn.slabwaiter = GETSTRUCT(struct slabwaiter, MAX_DOMAINS);
982 	wp = idn.slabwaiter;
983 	for (i = 0; i < MAX_DOMAINS; wp++, i++) {
984 		wp->w_closed = 0;
985 		mutex_init(&wp->w_mutex, NULL, MUTEX_DEFAULT, NULL);
986 		cv_init(&wp->w_cv, NULL, CV_DEFAULT, NULL);
987 	}
988 
989 	return (0);
990 }
991 
992 void
993 smr_slabwaiter_deinit()
994 {
995 	register int		i;
996 	struct slabwaiter	*wp;
997 
998 	if ((wp = idn.slabwaiter) == NULL)
999 		return;
1000 
1001 	for (i = 0; i < MAX_DOMAINS; wp++, i++) {
1002 		ASSERT(wp->w_nwaiters == 0);
1003 		ASSERT(wp->w_sp == NULL);
1004 		cv_destroy(&wp->w_cv);
1005 		mutex_destroy(&wp->w_mutex);
1006 	}
1007 
1008 	FREESTRUCT(idn.slabwaiter, struct slabwaiter, MAX_DOMAINS);
1009 	idn.slabwaiter = NULL;
1010 }
1011 
1012 void
1013 smr_slabwaiter_open(domainset_t domset)
1014 {
1015 	int			d;
1016 	struct slabwaiter	*wp;
1017 
1018 	if ((domset == 0) || !idn.slabwaiter)
1019 		return;
1020 
1021 	wp = idn.slabwaiter;
1022 
1023 	for (d = 0; d < MAX_DOMAINS; wp++, d++) {
1024 		if (!DOMAIN_IN_SET(domset, d))
1025 			continue;
1026 		mutex_enter(&wp->w_mutex);
1027 		wp->w_closed = 0;
1028 		mutex_exit(&wp->w_mutex);
1029 	}
1030 }
1031 
1032 void
1033 smr_slabwaiter_close(domainset_t domset)
1034 {
1035 	int			d;
1036 	struct slabwaiter	*wp;
1037 
1038 	if ((domset == 0) || !idn.slabwaiter)
1039 		return;
1040 
1041 	wp = idn.slabwaiter;
1042 
1043 	for (d = 0; d < MAX_DOMAINS; wp++, d++) {
1044 		if (!DOMAIN_IN_SET(domset, d))
1045 			continue;
1046 		mutex_enter(&wp->w_mutex);
1047 		wp->w_closed = 1;
1048 		cv_broadcast(&wp->w_cv);
1049 		mutex_exit(&wp->w_mutex);
1050 	}
1051 }
1052 
1053 /*
1054  * Register the caller with the waiting list for the
1055  * given domain.
1056  *
1057  * Protocol:
1058  *	1st Local requester:	register -> alloc ->
1059  *						put(wakeup|xdc) -> unregister
1060  *	Nth Local requester:	register -> wait
1061  *	1st Remote requester:	register -> xdc -> wait
1062  *	Nth Remote requester:	register -> wait
1063  *
1064  *	Remote Responder:	local alloc -> put(xdc)
1065  *	Local Handler:		xdc -> put(wakeup)
1066  *
1067  * E.g. A standard slave allocation request:
1068  *	slave			master
1069  *	-----			------
1070  *	idn_slab_alloc(remote)
1071  *	- register
1072  *	- xdc		->	idn_handler
1073  *	- wait			...
1074  *				idn_slab_alloc(local)
1075  *				- register
1076  *				- alloc
1077  *				- put
1078  *				  . wakeup [local]
1079  *				- unregister
1080  *	idn_handler    	<-	- xdc
1081  *	- put       		DONE
1082  *	  . wakeup [local]
1083  *	    |
1084  *	    V
1085  *      - wait
1086  *	  . unregister
1087  *	DONE
1088  */
1089 static int
1090 smr_slabwaiter_register(int domid)
1091 {
1092 	struct slabwaiter	*wp;
1093 	int		nwait;
1094 	procname_t	proc = "smr_slabwaiter_register";
1095 
1096 
1097 	ASSERT(domid != IDN_NIL_DOMID);
1098 
1099 	ASSERT(DSLAB_READ_HELD(domid));
1100 
1101 	wp = &idn.slabwaiter[domid];
1102 
1103 	ASSERT(MUTEX_NOT_HELD(&wp->w_mutex));
1104 
1105 	mutex_enter(&wp->w_mutex);
1106 
1107 	nwait = ++(wp->w_nwaiters);
1108 	ASSERT(nwait > 0);
1109 
1110 	PR_SMR("%s: domain = %d, (new)nwaiters = %d\n",
1111 		proc, domid, nwait);
1112 
1113 	if (nwait > 1) {
1114 		/*
1115 		 * There are already waiters for slab allocations
1116 		 * with respect to this domain.
1117 		 */
1118 		PR_SMR("%s: existing waiters for slabs for domain %d\n",
1119 			proc, domid);
1120 		mutex_exit(&wp->w_mutex);
1121 
1122 		return (nwait);
1123 	}
1124 	PR_SMR("%s: initial waiter for slabs for domain %d\n", proc, domid);
1125 	/*
1126 	 * We are the first requester of a slab allocation for this
1127 	 * respective domain.  Need to prep waiting area for
1128 	 * subsequent arrival of a slab.
1129 	 */
1130 	wp->w_sp = NULL;
1131 	wp->w_done = 0;
1132 	wp->w_serrno = 0;
1133 
1134 	mutex_exit(&wp->w_mutex);
1135 
1136 	return (nwait);
1137 }
1138 
1139 /*
1140  * It is assumed that the caller had previously registered,
1141  * but wakeup did not occur due to caller never waiting.
1142  * Thus, slaballoc mutex is still held by caller.
1143  *
1144  * Returns:	0
1145  *		EINVAL
1146  *		EBUSY
1147  *		w_serrno (smr_slaballoc_put)
1148  *		(0, ENOLCK, ENOMEM, EDQUOT, EBUSY, ECANCELED)
1149  */
1150 static int
1151 smr_slabwaiter_unregister(int domid, smr_slab_t **spp)
1152 {
1153 	struct slabwaiter	*wp;
1154 	int		serrno = 0;
1155 	procname_t	proc = "smr_slabwaiter_unregister";
1156 
1157 
1158 	ASSERT(domid != IDN_NIL_DOMID);
1159 
1160 	wp = &idn.slabwaiter[domid];
1161 
1162 	mutex_enter(&wp->w_mutex);
1163 
1164 	PR_SMR("%s: domain = %d, nwaiters = %d\n",
1165 		proc, domid, wp->w_nwaiters);
1166 
1167 	if (wp->w_nwaiters <= 0) {
1168 		/*
1169 		 * Hmmm...nobody is registered!
1170 		 */
1171 		PR_SMR("%s: NO WAITERS (domid = %d)\n", proc, domid);
1172 		mutex_exit(&wp->w_mutex);
1173 		return (EINVAL);
1174 	}
1175 	(wp->w_nwaiters)--;
1176 	/*
1177 	 * Is our present under the tree?
1178 	 */
1179 	if (!wp->w_done) {
1180 		/*
1181 		 * Bummer...no presents.  Let the caller know
1182 		 * via a null slab pointer.
1183 		 * Note that we don't clean up immediately since
1184 		 * message might still come in for other waiters.
1185 		 * Thus, late sleepers may still get a chance.
1186 		 */
1187 		PR_SMR("%s: bummer no slab allocated for domain %d\n",
1188 			proc, domid);
1189 		ASSERT(wp->w_sp == NULL);
1190 		(*spp) = NULL;
1191 		serrno = wp->w_closed ? ECANCELED : EBUSY;
1192 
1193 	} else {
1194 		(*spp) = wp->w_sp;
1195 		serrno = wp->w_serrno;
1196 
1197 #ifdef DEBUG
1198 		if (serrno == 0) {
1199 			register smr_slab_t	*sp;
1200 
1201 			ASSERT(wp->w_sp);
1202 			PR_SMR("%s: allocation succeeded (domain %d)\n",
1203 				proc, domid);
1204 
1205 			DSLAB_LOCK_SHARED(domid);
1206 			for (sp = idn_domain[domid].dslab; sp; sp = sp->sl_next)
1207 				if (sp == wp->w_sp)
1208 					break;
1209 			if (sp == NULL)
1210 				cmn_err(CE_WARN,
1211 					"%s:%d: slab ptr = NULL",
1212 					proc, domid);
1213 			DSLAB_UNLOCK(domid);
1214 		} else {
1215 			PR_SMR("%s: allocation failed (domain %d) "
1216 				"[serrno = %d]\n", proc, domid, serrno);
1217 		}
1218 #endif /* DEBUG */
1219 	}
1220 	if (wp->w_nwaiters == 0) {
1221 		/*
1222 		 * Last one turns out the lights.
1223 		 */
1224 		PR_SMR("%s: domain %d last waiter, turning out lights\n",
1225 			proc, domid);
1226 		wp->w_sp = NULL;
1227 		wp->w_done = 0;
1228 		wp->w_serrno = 0;
1229 	}
1230 	mutex_exit(&wp->w_mutex);
1231 
1232 	return (serrno);
1233 }
1234 
1235 /*
1236  * Called to abort any slaballoc requests on behalf of the
1237  * given domain.
1238  */
1239 int
1240 smr_slabwaiter_abort(int domid, int serrno)
1241 {
1242 	ASSERT(serrno != 0);
1243 
1244 	return (smr_slaballoc_put(domid, NULL, 0, serrno));
1245 }
1246 
1247 /*
1248  * Put ourselves into a timedwait waiting for slab to be
1249  * allocated.
1250  * Returns with slaballoc mutex dropped.
1251  *
1252  * Returns:	EINVAL
1253  *		ETIMEDOUT
1254  *		smr_slabwatier_unregister
1255  *		(0, EINVAL, EBUSY, ENOMEM)
1256  */
1257 static int
1258 smr_slaballoc_wait(int domid, smr_slab_t **spp)
1259 {
1260 	struct slabwaiter	*wp;
1261 	int			serrno = 0, serrno_unreg;
1262 	procname_t		proc = "smr_slaballoc_wait";
1263 
1264 
1265 	wp = &idn.slabwaiter[domid];
1266 
1267 	ASSERT(MUTEX_NOT_HELD(&wp->w_mutex));
1268 
1269 	mutex_enter(&wp->w_mutex);
1270 
1271 	PR_SMR("%s: domain = %d, nwaiters = %d, wsp = 0x%p\n",
1272 		proc, domid, wp->w_nwaiters, wp->w_sp);
1273 
1274 	if (wp->w_nwaiters <= 0) {
1275 		/*
1276 		 * Hmmm...no waiters registered.
1277 		 */
1278 		PR_SMR("%s: domain %d, no waiters!\n",
1279 			proc, domid);
1280 		mutex_exit(&wp->w_mutex);
1281 		return (EINVAL);
1282 	}
1283 	ASSERT(DSLAB_READ_HELD(domid));
1284 	DSLAB_UNLOCK(domid);
1285 
1286 	if (!wp->w_done && !wp->w_closed) {
1287 		int	rv;
1288 
1289 		/*
1290 		 * Only wait if data hasn't arrived yet.
1291 		 */
1292 		PR_SMR("%s: domain %d, going to sleep...\n",
1293 			proc, domid);
1294 
1295 
1296 		rv = cv_timedwait_sig(&wp->w_cv, &wp->w_mutex,
1297 				lbolt + IDN_SLABALLOC_WAITTIME);
1298 		if (rv == -1)
1299 			serrno = ETIMEDOUT;
1300 
1301 		PR_SMR("%s: domain %d, awakened (reason = %s)\n",
1302 			proc, domid, (rv == -1) ? "TIMEOUT" : "SIGNALED");
1303 	}
1304 	/*
1305 	 * We've awakened or request already filled!
1306 	 * Unregister ourselves.
1307 	 */
1308 	mutex_exit(&wp->w_mutex);
1309 
1310 	/*
1311 	 * Any gifts will be entered into spp.
1312 	 */
1313 	serrno_unreg = smr_slabwaiter_unregister(domid, spp);
1314 
1315 	/*
1316 	 * Leave with reader lock on dslab_lock.
1317 	 */
1318 	DSLAB_LOCK_SHARED(domid);
1319 
1320 	if ((serrno_unreg == EBUSY) && (serrno == ETIMEDOUT))
1321 		return (serrno);
1322 	else
1323 		return (serrno_unreg);
1324 }
1325 
1326 /*
1327  * A SMR slab was allocated on behalf of the given domain.
1328  * Wakeup anybody that may have been waiting for the allocation.
1329  * Note that if the domain is a remote one, i.e. master is allocating
1330  * on behalf of a slave, it's up to the caller to transmit the
1331  * allocation response to that domain.
1332  * The force flag indicates that we want to install the slab for
1333  * the given user regardless of whether there are waiters or not.
1334  * This is used primarily in situations where a slave may have timed
1335  * out before the response actually arrived.  In this situation we
1336  * don't want to send slab back to the master after we went through
1337  * the trouble of allocating one.  Master is _not_ allowed to do this
1338  * for remote domains.
1339  *
1340  * Returns:	-1	Non-registered waiter or waiting area garbaged.
1341  *		0	Successfully performed operation.
1342  */
1343 int
1344 smr_slaballoc_put(int domid, smr_slab_t *sp, int forceflag, int serrno)
1345 {
1346 	idn_domain_t		*dp;
1347 	struct slabwaiter	*wp;
1348 	procname_t		proc = "smr_slaballoc_put";
1349 
1350 
1351 	dp = &idn_domain[domid];
1352 
1353 	ASSERT(!serrno ? DSLAB_WRITE_HELD(domid) : 1);
1354 
1355 	if (domid == IDN_NIL_DOMID)
1356 		return (-1);
1357 
1358 	ASSERT(serrno ? (sp == NULL) : (sp != NULL));
1359 
1360 	wp = &idn.slabwaiter[domid];
1361 
1362 	mutex_enter(&wp->w_mutex);
1363 
1364 	PR_SMR("%s: domain = %d, bufp = 0x%p, ebufp = 0x%p, "
1365 		"(f = %d, se = %d)\n", proc, domid,
1366 		(sp ? sp->sl_start : 0),
1367 		(sp ? sp->sl_end : 0), forceflag, serrno);
1368 
1369 	if (wp->w_nwaiters <= 0) {
1370 		/*
1371 		 * There are no waiters!!  Must have timed out
1372 		 * and left.  Oh well...
1373 		 */
1374 		PR_SMR("%s: no slaballoc waiters found for domain %d\n",
1375 			proc, domid);
1376 		if (!forceflag || serrno || !sp) {
1377 			/*
1378 			 * No waiters and caller doesn't want to force it.
1379 			 */
1380 			mutex_exit(&wp->w_mutex);
1381 			return (-1);
1382 		}
1383 		PR_SMR("%s: forcing slab onto domain %d\n", proc, domid);
1384 		ASSERT(domid == idn.localid);
1385 		ASSERT(wp->w_sp == NULL);
1386 		wp->w_done = 0;
1387 		/*
1388 		 * Now we fall through and let it be added in the
1389 		 * regular manor.
1390 		 */
1391 	}
1392 	if (wp->w_done) {
1393 		/*
1394 		 * There's at least one waiter so there has
1395 		 * to be a slab structure waiting for us.
1396 		 * If everything is going smoothly, there should only
1397 		 * be one guy coming through the path of inserting
1398 		 * an error or good slab.  However, if a disconnect was
1399 		 * detected, you may get several guys coming through
1400 		 * trying to let everybody know.
1401 		 */
1402 		ASSERT(wp->w_serrno ?
1403 			(wp->w_sp == NULL) : (wp->w_sp != NULL));
1404 
1405 		cv_broadcast(&wp->w_cv);
1406 		mutex_exit(&wp->w_mutex);
1407 
1408 		return (-1);
1409 	}
1410 	if (serrno != 0) {
1411 		/*
1412 		 * Bummer...allocation failed.  This call is simply
1413 		 * to wake up the sleepers and let them know.
1414 		 */
1415 		PR_SMR("%s: slaballoc failed for domain %d\n",
1416 			proc, domid);
1417 		wp->w_serrno = serrno;
1418 		wp->w_done = 1;
1419 		cv_broadcast(&wp->w_cv);
1420 		mutex_exit(&wp->w_mutex);
1421 
1422 		return (0);
1423 	}
1424 	PR_SMR("%s: putting slab into struct (domid=%d, localid=%d)\n",
1425 		proc, domid, idn.localid);
1426 	/*
1427 	 * Prep the slab structure.
1428 	 */
1429 
1430 	if (domid == idn.localid) {
1431 		/*
1432 		 * Allocation was indeed for me.
1433 		 * Slab may or may not be locked when
1434 		 * we reach.  Normally they will be locked
1435 		 * if we're being called on behalf of a
1436 		 * free, and not locked if on behalf of
1437 		 * a new allocation request.
1438 		 */
1439 		lock_clear(&sp->sl_lock);
1440 		smr_alloc_buflist(sp);
1441 #ifdef DEBUG
1442 	} else {
1443 		uint_t	rv;
1444 		/*
1445 		 * Slab was not allocated on my behalf.  Must be
1446 		 * a master request on behalf of some other domain.
1447 		 * Prep appropriately.  Slab should have been locked
1448 		 * by smr_slab_reserve.
1449 		 */
1450 		rv = lock_try(&sp->sl_lock);
1451 		ASSERT(!rv);
1452 		ASSERT(sp->sl_domid == (short)domid);
1453 #endif /* DEBUG */
1454 	}
1455 
1456 	/*
1457 	 * Slab is ready to go.  Insert it into the domain's
1458 	 * slab list so once we wake everybody up they'll find it.
1459 	 * You better have write lock if you're putting treasures
1460 	 * there.
1461 	 */
1462 	ASSERT(DSLAB_WRITE_HELD(domid));
1463 
1464 	sp->sl_next = dp->dslab;
1465 	dp->dslab  = sp;
1466 	dp->dnslabs++;
1467 
1468 	/*
1469 	 * It's possible to fall through here without waiters.
1470 	 * This is a case where forceflag was set.
1471 	 */
1472 	if (wp->w_nwaiters > 0) {
1473 		wp->w_sp = sp;
1474 		wp->w_serrno = serrno;
1475 		wp->w_done = 1;
1476 		cv_broadcast(&wp->w_cv);
1477 	} else {
1478 		ASSERT(forceflag);
1479 		wp->w_sp = NULL;
1480 		wp->w_serrno = 0;
1481 		wp->w_done = 0;
1482 	}
1483 	mutex_exit(&wp->w_mutex);
1484 
1485 	return (0);
1486 }
1487 
1488 /*
1489  * Get the slab representing [bufp,ebufp] from the respective
1490  * domain's pool if all the buffers are free.  Remove them from
1491  * the domain's list and return it.
1492  * If bufp == NULL, then return however many free ones you
1493  * can find.
1494  * List of slabs are returned locked (sl_lock).
1495  * XXX - Need minimum limit to make sure we don't free up _all_
1496  *	 of our slabs!  However, during a shutdown we will need
1497  *	 method to free them all up regardless of locking.
1498  */
1499 smr_slab_t *
1500 smr_slaballoc_get(int domid, caddr_t bufp, caddr_t ebufp)
1501 {
1502 	idn_domain_t	*dp;
1503 	smr_slab_t	*retsp, *sp, **psp;
1504 	int		foundit, islocal = 0;
1505 	int		nslabs;
1506 	procname_t	proc = "smr_slaballoc_get";
1507 
1508 	PR_SMR("%s: getting slab for domain %d [bufp=0x%p, ebufp=0x%p]\n",
1509 		proc, domid, bufp, ebufp);
1510 
1511 	dp = &idn_domain[domid];
1512 
1513 	ASSERT(DSLAB_WRITE_HELD(domid));
1514 
1515 	if ((sp = dp->dslab) == NULL) {
1516 		PR_SMR("%s: oops, no slabs for domain %d\n",
1517 			proc, domid);
1518 		return (NULL);
1519 	}
1520 	/*
1521 	 * If domid is myself then I'm trying to get a slab out
1522 	 * of my local pool.  Otherwise, I'm the master and
1523 	 * I'm trying to get the slab representative from the
1524 	 * global pool.
1525 	 */
1526 	if (domid == idn.localid)
1527 		islocal = 1;
1528 
1529 	if (bufp != NULL) {
1530 		nslabs = -1;
1531 	} else {
1532 		nslabs = *(int *)ebufp;
1533 		if (nslabs == 0) {
1534 			PR_SMR("%s: requested nslabs (%d) <= 0\n",
1535 				proc, nslabs);
1536 			return (NULL);
1537 		} else if (nslabs < 0) {
1538 			/*
1539 			 * Caller wants them all!
1540 			 */
1541 			nslabs = (int)dp->dnslabs;
1542 		}
1543 	}
1544 
1545 	retsp = NULL;
1546 	foundit = 0;
1547 	for (psp = &dp->dslab; sp; sp = *psp) {
1548 		int	isbusy;
1549 
1550 		if (bufp && (sp->sl_start != bufp)) {
1551 			psp = &sp->sl_next;
1552 			continue;
1553 		}
1554 
1555 		if (bufp && (ebufp > sp->sl_end)) {
1556 			PR_SMR("%s: bufp/ebufp (0x%p/0x%p) "
1557 				"expected (0x%p/0x%p)\n", proc, bufp, ebufp,
1558 				sp->sl_start, sp->sl_end);
1559 			ASSERT(0);
1560 		}
1561 		/*
1562 		 * We found the desired slab.  Make sure
1563 		 * it's free.
1564 		 */
1565 		foundit++;
1566 		isbusy = 0;
1567 		if (islocal) {
1568 			int spl;
1569 
1570 			/*
1571 			 * Some of the buffers in the slab
1572 			 * are still in use.  Unlock the
1573 			 * buffers we locked and bail out.
1574 			 */
1575 			spl = splhi();
1576 			if (!lock_try(&sp->sl_lock)) {
1577 				isbusy = 1;
1578 				foundit--;
1579 			} else if (sp->sl_inuse) {
1580 				lock_clear(&sp->sl_lock);
1581 				isbusy = 1;
1582 				foundit--;
1583 			}
1584 			splx(spl);
1585 		} else {
1586 			/*
1587 			 * If not local, then I'm the master getting
1588 			 * a slab from one of the slaves.  In this case,
1589 			 * their slab structs will always be locked.
1590 			 */
1591 			ASSERT(!lock_try(&sp->sl_lock));
1592 		}
1593 		if (!isbusy) {
1594 			/*
1595 			 * Delete the entry from the list and slap
1596 			 * it onto our return list.
1597 			 */
1598 			*psp = sp->sl_next;
1599 			sp->sl_next = retsp;
1600 			retsp = sp;
1601 		} else {
1602 			psp = &sp->sl_next;
1603 		}
1604 		/*
1605 		 * If bufp == NULL (alternate interface) and we haven't
1606 		 * found the desired number of slabs yet, keep looking.
1607 		 */
1608 		if (bufp || (foundit == nslabs))
1609 			break;
1610 	}
1611 	dp->dnslabs -= (short)foundit;
1612 
1613 	if (foundit) {
1614 		PR_SMR("%s: found %d free slabs (domid = %d)\n",
1615 			proc, foundit, domid);
1616 	} else {
1617 		PR_SMR("%s: no free slabs found (domid = %d)\n",
1618 			proc, domid);
1619 	}
1620 
1621 	/*
1622 	 * If this is the alternate interface, need to return
1623 	 * the number of slabs found in the ebufp parameter.
1624 	 */
1625 	if (bufp == NULL)
1626 		*(int *)ebufp = foundit;
1627 
1628 	return (retsp);
1629 }
1630 
1631 /*
1632  * Wrapper to hide alternate interface to smr_slaballoc_get()
1633  */
1634 smr_slab_t *
1635 smr_slaballoc_get_n(int domid, int *nslabs)
1636 {
1637 	smr_slab_t	*sp;
1638 
1639 	ASSERT(DSLAB_WRITE_HELD(domid));
1640 
1641 	sp = smr_slaballoc_get(domid, NULL, (caddr_t)nslabs);
1642 
1643 	return (sp);
1644 }
1645 
1646 /*
1647  * Only called by master.  Initialize slab pool based on local SMR.
1648  * Returns number of slabs initialized.
1649  * reserved_size = Length of area at the front of the NWR portion
1650  *		   of the SMR to reserve and not make available for
1651  *		   slab allocations.  Must be a IDN_SMR_BUFSIZE multiple.
1652  * reserved_area = Pointer to reserved area, if any.
1653  */
1654 int
1655 smr_slabpool_init(size_t reserved_size, caddr_t *reserved_area)
1656 {
1657 	size_t			nwr_available;
1658 	int			minperpool, ntotslabs, nxslabs, nslabs;
1659 	register int		p, pp;
1660 	register caddr_t	bufp;
1661 	register smr_slab_t	*sp;
1662 
1663 	ASSERT(IDN_GLOCK_IS_EXCL());
1664 	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
1665 
1666 	*reserved_area = NULL;
1667 
1668 	nwr_available = MB2B(IDN_NWR_SIZE) - reserved_size;
1669 
1670 	if ((idn.localid != IDN_GET_MASTERID()) ||
1671 	    (nwr_available < IDN_SLAB_SIZE) ||
1672 	    (idn.slabpool != NULL) ||
1673 	    ((reserved_size != 0) && (reserved_size & (IDN_SMR_BUFSIZE-1)))) {
1674 		return (-1);
1675 	}
1676 
1677 	idn.slabpool = GETSTRUCT(struct slabpool, 1);
1678 	idn.slabpool->ntotslabs = ntotslabs = nwr_available / IDN_SLAB_SIZE;
1679 	ASSERT(ntotslabs > 0);
1680 	minperpool = (ntotslabs < IDN_SLAB_MINPERPOOL) ?
1681 						1 : IDN_SLAB_MINPERPOOL;
1682 	idn.slabpool->npools = (ntotslabs + (minperpool - 1)) / minperpool;
1683 
1684 	if ((idn.slabpool->npools & 1) == 0) {
1685 		/*
1686 		 * npools needs to be odd for hashing algorithm.
1687 		 */
1688 		idn.slabpool->npools++;
1689 	}
1690 	ASSERT(idn.slabpool->npools > 0);
1691 	minperpool = (ntotslabs < idn.slabpool->npools) ?
1692 				1 : (ntotslabs / idn.slabpool->npools);
1693 
1694 	/*
1695 	 * Calculate the number of extra slabs that will need to
1696 	 * be alloted to the pools.  This number will be less than
1697 	 * npools.  Only one extra slab is allocated to each pool
1698 	 * until we have assigned all the extra slabs.
1699 	 */
1700 	if (ntotslabs > (idn.slabpool->npools * minperpool))
1701 		nxslabs = ntotslabs - (idn.slabpool->npools * minperpool);
1702 	else
1703 		nxslabs = 0;
1704 	ASSERT((nxslabs >= 0) && (nxslabs < idn.slabpool->npools));
1705 
1706 	idn.slabpool->pool = GETSTRUCT(struct smr_slabtbl,
1707 					idn.slabpool->npools);
1708 	sp = GETSTRUCT(smr_slab_t, idn.slabpool->ntotslabs);
1709 
1710 	idn.slabpool->savep = sp;
1711 	bufp = idn.smr.vaddr + reserved_size;
1712 
1713 	for (p = nslabs = 0;
1714 	    (p < idn.slabpool->npools) && (ntotslabs > 0);
1715 	    p++, ntotslabs -= nslabs) {
1716 
1717 		nslabs = (ntotslabs < minperpool) ? ntotslabs : minperpool;
1718 		if (nxslabs > 0) {
1719 			nslabs++;
1720 			nxslabs--;
1721 		}
1722 		idn.slabpool->pool[p].sarray = sp;
1723 		for (pp = 0; pp < nslabs; pp++) {
1724 
1725 			sp->sl_next  = NULL;
1726 			sp->sl_start = bufp;
1727 			sp->sl_end   = bufp = sp->sl_start + IDN_SLAB_SIZE;
1728 			sp->sl_lock  = 0;
1729 			sp->sl_domid = (short)IDN_NIL_DOMID;
1730 
1731 			sp++;
1732 		}
1733 		idn.slabpool->pool[p].nfree   = nslabs;
1734 		idn.slabpool->pool[p].nslabs  = nslabs;
1735 	}
1736 	ASSERT((ntotslabs == 0) && (nxslabs == 0));
1737 	/*
1738 	 * We should be at the end of the SMR at this point.
1739 	 */
1740 	ASSERT(bufp == (idn.smr.vaddr
1741 			+ reserved_size
1742 			+ (idn.slabpool->ntotslabs * IDN_SLAB_SIZE)));
1743 
1744 	if (reserved_size != 0)
1745 		*reserved_area = idn.smr.vaddr;
1746 
1747 	return (0);
1748 }
1749 
1750 void
1751 smr_slabpool_deinit()
1752 {
1753 	if (idn.slabpool == NULL)
1754 		return;
1755 
1756 	FREESTRUCT(idn.slabpool->savep, smr_slab_t, idn.slabpool->ntotslabs);
1757 	FREESTRUCT(idn.slabpool->pool, struct smr_slabtbl,
1758 			idn.slabpool->npools);
1759 	FREESTRUCT(idn.slabpool, struct slabpool, 1);
1760 
1761 	idn.slabpool = NULL;
1762 }
1763 
1764 void
1765 smr_alloc_buflist(smr_slab_t *sp)
1766 {
1767 	int		n, nbufs;
1768 	caddr_t		sbufp;
1769 	smr_slabbuf_t	*hp, *bp;
1770 
1771 	if (sp->sl_head)
1772 		return;
1773 
1774 	nbufs = (sp->sl_end - sp->sl_start) / IDN_SMR_BUFSIZE;
1775 	ASSERT(nbufs > 0);
1776 	if (nbufs <= 0) {
1777 		sp->sl_head = sp->sl_free = sp->sl_inuse = NULL;
1778 		return;
1779 	}
1780 
1781 	hp = GETSTRUCT(smr_slabbuf_t, nbufs);
1782 
1783 	sbufp = sp->sl_start;
1784 	for (n = 0, bp = hp; n < nbufs; bp++, n++) {
1785 		bp->sb_bufp = sbufp;
1786 		bp->sb_domid = IDN_NIL_DOMID;
1787 		bp->sb_next = bp + 1;
1788 		sbufp += IDN_SMR_BUFSIZE;
1789 	}
1790 	(--bp)->sb_next = NULL;
1791 
1792 	sp->sl_head = sp->sl_free = hp;
1793 	sp->sl_inuse = NULL;
1794 }
1795 
1796 void
1797 smr_free_buflist(smr_slab_t *sp)
1798 {
1799 	int	nbufs;
1800 
1801 	if (sp->sl_head == NULL)
1802 		return;
1803 
1804 	nbufs = (sp->sl_end - sp->sl_start) / IDN_SMR_BUFSIZE;
1805 
1806 	FREESTRUCT(sp->sl_head, smr_slabbuf_t, nbufs);
1807 
1808 	sp->sl_head = sp->sl_free = sp->sl_inuse = NULL;
1809 }
1810 
1811 /*
1812  * Returns:	0 Successfully located a slab.
1813  *	       -1 Failure.
1814  */
1815 static smr_slab_t *
1816 smr_slab_reserve(int domid)
1817 {
1818 	register int		p, nextp, s, nexts;
1819 	register smr_slab_t	*spa;
1820 	int			startp, starts;
1821 	int			foundone = 0;
1822 	int			spl;
1823 	procname_t		proc = "smr_slab_reserve";
1824 
1825 	p = startp = SMR_SLABPOOL_HASH(domid);
1826 	nextp = -1;
1827 
1828 	spl = splhi();
1829 	while ((nextp != startp) && !foundone) {
1830 
1831 		s = starts = SMR_SLAB_HASH(p, domid);
1832 		nexts = -1;
1833 		spa = &(idn.slabpool->pool[p].sarray[0]);
1834 
1835 		while ((nexts != starts) && !foundone) {
1836 			if (lock_try(&spa[s].sl_lock)) {
1837 				foundone = 1;
1838 				break;
1839 			}
1840 			nexts = SMR_SLAB_HASHSTEP(p, s);
1841 			s = nexts;
1842 		}
1843 		if (foundone)
1844 			break;
1845 		nextp = SMR_SLABPOOL_HASHSTEP(p);
1846 		p = nextp;
1847 	}
1848 	splx(spl);
1849 
1850 	if (foundone) {
1851 		ASSERT((&spa[s] >= idn.slabpool->savep) &&
1852 			(&spa[s] < (idn.slabpool->savep +
1853 					idn.slabpool->ntotslabs)));
1854 
1855 		spa[s].sl_domid = (short)domid;
1856 
1857 		ATOMIC_DEC(idn.slabpool->pool[p].nfree);
1858 
1859 		if (domid == idn.localid) {
1860 			smr_slab_t	*nsp;
1861 			/*
1862 			 * Caller is actually reserving a slab for
1863 			 * themself which means they'll need the full
1864 			 * slab structure to represent all of the I/O
1865 			 * buffers.  The "spa" is just a representative
1866 			 * and doesn't contain the space to manage the
1867 			 * individual buffers.  Need to alloc a full-size
1868 			 * struct.
1869 			 * Note that this results in the returning
1870 			 * smr_slab_t structure being unlocked.
1871 			 */
1872 			ASSERT(idn.localid == IDN_GET_MASTERID());
1873 			nsp = GETSTRUCT(smr_slab_t, 1);
1874 			nsp->sl_start = spa[s].sl_start;
1875 			nsp->sl_end   = spa[s].sl_end;
1876 			smr_alloc_buflist(nsp);
1877 			spa = nsp;
1878 			PR_SMR("%s: allocated full slab struct for domain %d\n",
1879 				proc, domid);
1880 		} else {
1881 			/*
1882 			 * Slab structure gets returned locked.
1883 			 */
1884 			spa += s;
1885 		}
1886 
1887 		PR_SMR("%s: allocated slab 0x%p (start=0x%p, size=%lu) for "
1888 			"domain %d\n", proc, spa, spa->sl_start,
1889 			spa->sl_end - spa->sl_start, domid);
1890 	} else {
1891 		PR_SMR("%s: FAILED to allocate for domain %d\n",
1892 			proc, domid);
1893 		spa = NULL;
1894 	}
1895 
1896 	return (spa);
1897 }
1898 
1899 static void
1900 smr_slab_unreserve(int domid, smr_slab_t *sp)
1901 {
1902 	register int		p, nextp, s, nexts;
1903 	register smr_slab_t	*spa;
1904 	int			foundit = 0;
1905 	int			startp, starts;
1906 	caddr_t			bufp;
1907 	procname_t		proc = "smr_slab_unreserve";
1908 
1909 	bufp = sp->sl_start;
1910 	p = startp = SMR_SLABPOOL_HASH(domid);
1911 	nextp = -1;
1912 
1913 	while ((nextp != startp) && !foundit) {
1914 
1915 		s = starts = SMR_SLAB_HASH(p, domid);
1916 		nexts = -1;
1917 		spa = &(idn.slabpool->pool[p].sarray[0]);
1918 
1919 		while ((nexts != starts) && !foundit) {
1920 			if (spa[s].sl_start == bufp) {
1921 				foundit = 1;
1922 				break;
1923 			}
1924 			nexts = SMR_SLAB_HASHSTEP(p, s);
1925 			s = nexts;
1926 		}
1927 		if (foundit)
1928 			break;
1929 		nextp = SMR_SLABPOOL_HASHSTEP(p);
1930 		p = nextp;
1931 	}
1932 	if (foundit) {
1933 		ASSERT((&spa[s] >= idn.slabpool->savep) &&
1934 			(&spa[s] < (idn.slabpool->savep +
1935 					idn.slabpool->ntotslabs)));
1936 		ASSERT(!lock_try(&spa[s].sl_lock));
1937 		ASSERT(spa[s].sl_domid == (short)domid);
1938 
1939 		spa[s].sl_next = NULL;
1940 		spa[s].sl_domid = (short)IDN_NIL_DOMID;
1941 		lock_clear(&spa[s].sl_lock);
1942 
1943 		ATOMIC_INC(idn.slabpool->pool[p].nfree);
1944 
1945 		PR_SMR("%s: freed (bufp=0x%p) for domain %d\n",
1946 			proc, bufp, domid);
1947 
1948 		if (domid == idn.localid) {
1949 			/*
1950 			 * Caller is actually unreserving a slab of their
1951 			 * own.  Note that only the master calls this
1952 			 * routine.  Since the master's local slab
1953 			 * structures do not get entered into the global
1954 			 * "representative" pool, we need to free up the
1955 			 * data structure that was passed in.
1956 			 */
1957 			ASSERT(idn.localid == IDN_GET_MASTERID());
1958 			ASSERT(sp != &spa[s]);
1959 
1960 			smr_free_buflist(sp);
1961 			FREESTRUCT(sp, smr_slab_t, 1);
1962 		} else {
1963 			ASSERT(sp == &spa[s]);
1964 		}
1965 	} else {
1966 		/*
1967 		 * Couldn't find slab entry for given buf!
1968 		 */
1969 		PR_SMR("%s: FAILED to free (bufp=0x%p) for domain %d\n",
1970 			proc, bufp, domid);
1971 	}
1972 }
1973 
1974 /*
1975  * The Reap Protocol:
1976  *	master				   slave
1977  *	------				   -----
1978  *	smr_slab_reap_global
1979  *	- idn_broadcast_cmd(SLABREAP) ->   idn_recv_cmd(SLABREAP)
1980  *	  . idn_local_cmd(SLABREAP)        - idn_recv_slabreap_req
1981  *	    - smr_slab_reap	             . smr_slab_reap
1982  *	      . smr_slaballoc_get_n            - smr_slaballoc_get_n
1983  *	      . smr_slab_free		       - smr_slab_free
1984  *		- smr_slab_free_local		 . smr_slab_free_remote
1985  *		  . smr_slab_unreserve
1986  *				      <-	   - idn_send_cmd(SLABFREE)
1987  *	idn_recv_cmd(SLABFREE)
1988  *	- idn_recv_slabfree_req
1989  *	  . smr_slaballoc_get
1990  *	  . smr_slab_free
1991  *	    - smr_slab_free_local
1992  *	      . smr_slab_unreserve
1993  *        . idn_send_slabfree_resp    ->   idn_recv_cmd(SLABFREE | ack)
1994  *					   - idn_recv_slabfree_resp
1995  *
1996  *	idn_recv_cmd(SLABREAP | ack)  <-     . idn_send_slabreap_resp
1997  *	- idn_recv_slabreap_resp	   DONE
1998  *	DONE
1999  *
2000  * Check available slabs and if we're below the threshold, kick
2001  * off reaping to all remote domains.  There is no guarantee remote
2002  * domains will be able to free up any.
2003  */
2004 static void
2005 smr_slab_reap_global()
2006 {
2007 	register int	p, npools;
2008 	register int	total_free = 0;
2009 	register struct smr_slabtbl	*tblp;
2010 	static clock_t	reap_last = 0;
2011 	procname_t	proc = "smr_slab_reap_global";
2012 
2013 	ASSERT(IDN_GET_MASTERID() != IDN_NIL_DOMID);
2014 
2015 	DSLAB_LOCK_SHARED(idn.localid);
2016 	if (idn_domain[idn.localid].dslab_state != DSLAB_STATE_LOCAL) {
2017 		PR_SMR("%s: only allowed by master (%d)\n",
2018 			proc, IDN_GET_MASTERID());
2019 		DSLAB_UNLOCK(idn.localid);
2020 		return;
2021 	}
2022 	DSLAB_UNLOCK(idn.localid);
2023 
2024 	if ((lbolt > 0) && (lbolt > reap_last) &&
2025 			((lbolt - reap_last) < IDN_REAP_INTERVAL))
2026 		return;
2027 
2028 	reap_last = lbolt;
2029 
2030 	ASSERT(idn.slabpool);
2031 
2032 	npools = idn.slabpool->npools;
2033 	tblp   = idn.slabpool->pool;
2034 
2035 	for (p = 0; p < npools; tblp++, p++)
2036 		total_free += tblp->nfree;
2037 
2038 	if (total_free <= IDN_SLAB_THRESHOLD) {
2039 		int	diff, reap_per_domain;
2040 
2041 		PR_SMR("%s: kicking off reaping "
2042 			"(total_free = %d, min = %d)\n",
2043 			proc, total_free, IDN_SLAB_THRESHOLD);
2044 
2045 		diff = IDN_SLAB_THRESHOLD - total_free;
2046 		reap_per_domain = (diff < idn.ndomains)
2047 					? 1 : (diff / idn.ndomains);
2048 
2049 		idn_broadcast_cmd(IDNCMD_SLABREAP, reap_per_domain, 0, 0);
2050 	}
2051 }
2052 
2053 void
2054 smr_slab_reap(int domid, int *nslabs)
2055 {
2056 	register int	d;
2057 	int		nreclaimed;
2058 	smr_slab_t	*sp;
2059 	domainset_t	reapset;
2060 	procname_t	proc = "smr_slab_reap";
2061 
2062 	/*
2063 	 * Should only be called on behalf of local
2064 	 * domain.
2065 	 */
2066 	if (domid != idn.localid) {
2067 		PR_SMR("%s: called by domain %d, should only be local (%d)\n",
2068 			proc, domid, idn.localid);
2069 		ASSERT(0);
2070 		return;
2071 	}
2072 	/*
2073 	 * Try and reclaim some buffers so we can possibly
2074 	 * free up some slabs.
2075 	 */
2076 	reapset = idn.domset.ds_connected;
2077 
2078 	IDN_GKSTAT_GLOBAL_EVENT(gk_reaps, gk_reap_last);
2079 
2080 	nreclaimed = 0;
2081 	for (d = 0; d < MAX_DOMAINS; d++) {
2082 		int		nr;
2083 		idn_domain_t	*dp;
2084 
2085 		if (!DOMAIN_IN_SET(reapset, d))
2086 			continue;
2087 
2088 		IDN_DLOCK_SHARED(d);
2089 
2090 		dp = &idn_domain[d];
2091 		if ((d == idn.localid) || (dp->dcpu < 0)) {
2092 			IDN_DUNLOCK(d);
2093 			continue;
2094 		}
2095 		/*
2096 		 * Clean up any dead I/O errors if possible.
2097 		 */
2098 		if (dp->dioerr > 0) {
2099 			idn_domain_t	*ldp;
2100 			register int	cnt;
2101 			register smr_slabbuf_t	*bp;
2102 			/*
2103 			 * We need to grab the writer lock to prevent
2104 			 * anybody from allocating buffers while we
2105 			 * traverse the slabs outstanding.
2106 			 */
2107 			cnt = 0;
2108 			ldp = &idn_domain[idn.localid];
2109 			IDN_DLOCK_EXCL(idn.localid);
2110 			DSLAB_LOCK_EXCL(idn.localid);
2111 			for (sp = ldp->dslab; sp; sp = sp->sl_next)
2112 				for (bp = sp->sl_inuse; bp; bp = bp->sb_next)
2113 					if (bp->sb_domid == d)
2114 						cnt++;
2115 			DSLAB_UNLOCK(idn.localid);
2116 			ASSERT((dp->dio + dp->dioerr) >= cnt);
2117 			dp->dio = cnt;
2118 			dp->dioerr = 0;
2119 			IDN_DUNLOCK(idn.localid);
2120 		}
2121 		if ((dp->dstate == IDNDS_CONNECTED) &&
2122 				((nr = idn_reclaim_mboxdata(d, 0, -1)) > 0))
2123 			nreclaimed += nr;
2124 
2125 		IDN_DUNLOCK(d);
2126 	}
2127 
2128 	DSLAB_LOCK_EXCL(domid);
2129 	sp = smr_slaballoc_get_n(domid, nslabs);
2130 	if (sp) {
2131 		IDN_GKSTAT_ADD(gk_reap_count, (ulong_t)(*nslabs));
2132 		smr_slab_free(domid, sp);
2133 	}
2134 	DSLAB_UNLOCK(domid);
2135 }
2136 
2137 /*
2138  * ---------------------------------------------------------------------
2139  * Remap the (IDN) shared memory region to a new physical address.
2140  * Caller is expected to have performed a ecache flush if needed.
2141  * ---------------------------------------------------------------------
2142  */
2143 void
2144 smr_remap(struct as *as, register caddr_t vaddr,
2145 		register pfn_t new_pfn, uint_t mblen)
2146 {
2147 	tte_t		tte;
2148 	size_t		blen;
2149 	pgcnt_t		p, npgs;
2150 	procname_t	proc = "smr_remap";
2151 
2152 	if (va_to_pfn(vaddr) == new_pfn) {
2153 		PR_REMAP("%s: vaddr (0x%p) already mapped to pfn (0x%lx)\n",
2154 			proc, vaddr, new_pfn);
2155 		return;
2156 	}
2157 
2158 	blen = MB2B(mblen);
2159 	npgs = btopr(blen);
2160 	ASSERT(npgs != 0);
2161 
2162 	PR_REMAP("%s: va = 0x%p, pfn = 0x%lx, npgs = %ld, mb = %d MB (%ld)\n",
2163 		proc, vaddr, new_pfn, npgs, mblen, blen);
2164 
2165 	/*
2166 	 * Unmap the SMR virtual address from it's current
2167 	 * mapping.
2168 	 */
2169 	hat_unload(as->a_hat, vaddr, blen, HAT_UNLOAD_UNLOCK);
2170 
2171 	if (new_pfn == PFN_INVALID)
2172 		return;
2173 
2174 	/*
2175 	 * Map the SMR to the new physical address space,
2176 	 * presumably a remote pfn.  Cannot use hat_devload
2177 	 * because it will think pfn represents non-memory,
2178 	 * i.e. space since it may beyond his physmax.
2179 	 */
2180 	for (p = 0; p < npgs; p++) {
2181 		sfmmu_memtte(&tte, new_pfn,
2182 				PROT_READ | PROT_WRITE | HAT_NOSYNC,
2183 				TTE8K);
2184 		sfmmu_tteload(as->a_hat, &tte, vaddr, NULL, HAT_LOAD_LOCK);
2185 
2186 		vaddr += MMU_PAGESIZE;
2187 		new_pfn++;
2188 	}
2189 
2190 	PR_REMAP("%s: remapped %ld pages (expected %ld)\n",
2191 		proc, npgs, btopr(MB2B(mblen)));
2192 }
2193