xref: /titanic_51/usr/src/uts/i86pc/os/pmem.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*7c478bd9Sstevel@tonic-gate 
29*7c478bd9Sstevel@tonic-gate /*
30*7c478bd9Sstevel@tonic-gate  * PMEM - Direct mapping physical memory pages to userland process
31*7c478bd9Sstevel@tonic-gate  *
32*7c478bd9Sstevel@tonic-gate  * Provide functions used for directly (w/o occupying kernel virtual address
33*7c478bd9Sstevel@tonic-gate  * space) allocating and exporting physical memory pages to userland.
34*7c478bd9Sstevel@tonic-gate  */
35*7c478bd9Sstevel@tonic-gate 
36*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
37*7c478bd9Sstevel@tonic-gate #include <sys/mutex.h>
38*7c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
39*7c478bd9Sstevel@tonic-gate #include <sys/ddidevmap.h>
40*7c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
41*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
42*7c478bd9Sstevel@tonic-gate #include <sys/project.h>
43*7c478bd9Sstevel@tonic-gate #include <vm/seg_dev.h>
44*7c478bd9Sstevel@tonic-gate #include <sys/pmem.h>
45*7c478bd9Sstevel@tonic-gate #include <vm/hat_i86.h>
46*7c478bd9Sstevel@tonic-gate #include <sys/task.h>
47*7c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
48*7c478bd9Sstevel@tonic-gate 
49*7c478bd9Sstevel@tonic-gate /*
50*7c478bd9Sstevel@tonic-gate  * The routines in this file allocate memory which will be accessed through
51*7c478bd9Sstevel@tonic-gate  * the AGP GART hardware.  The GART is programmed with the PFNs for this
52*7c478bd9Sstevel@tonic-gate  * memory, and the only mechanism for removing these entries is by an
53*7c478bd9Sstevel@tonic-gate  * explicit process operation (ioctl/close of the driver, or process exit).
54*7c478bd9Sstevel@tonic-gate  * As such, the pages need to remain locked to ensure that they won't be
55*7c478bd9Sstevel@tonic-gate  * relocated or paged out.
56*7c478bd9Sstevel@tonic-gate  *
57*7c478bd9Sstevel@tonic-gate  * To prevent these locked pages from getting in the way of page
58*7c478bd9Sstevel@tonic-gate  * coalescing, we try to allocate large pages from the system, and carve
59*7c478bd9Sstevel@tonic-gate  * them up to satisfy pmem allocation requests.  This will keep the locked
60*7c478bd9Sstevel@tonic-gate  * pages within a constrained area of physical memory, limiting the number
61*7c478bd9Sstevel@tonic-gate  * of large pages that would be pinned by our locked pages.  This is, of
62*7c478bd9Sstevel@tonic-gate  * course, another take on the infamous kernel cage, and it has many of the
63*7c478bd9Sstevel@tonic-gate  * downsides of the original cage.  It also interferes with system-wide
64*7c478bd9Sstevel@tonic-gate  * resource management decisions, as it maintains its own pool of unused
65*7c478bd9Sstevel@tonic-gate  * pages which can't be easily reclaimed and used during low-memory
66*7c478bd9Sstevel@tonic-gate  * situations.
67*7c478bd9Sstevel@tonic-gate  *
68*7c478bd9Sstevel@tonic-gate  * The right solution is for pmem to register a callback that the VM system
69*7c478bd9Sstevel@tonic-gate  * could call, which would temporarily remove any GART entries for pages
70*7c478bd9Sstevel@tonic-gate  * that were being relocated.  This would let us leave the pages unlocked,
71*7c478bd9Sstevel@tonic-gate  * which would remove the need for using large pages, which would simplify
72*7c478bd9Sstevel@tonic-gate  * this code a great deal.  Unfortunately, the support for these callbacks
73*7c478bd9Sstevel@tonic-gate  * only exists on some SPARC platforms right now.
74*7c478bd9Sstevel@tonic-gate  *
75*7c478bd9Sstevel@tonic-gate  * Note that this is the *only* reason that large pages are used here.  The
76*7c478bd9Sstevel@tonic-gate  * GART can't perform large-page translations, and the code appropriately
77*7c478bd9Sstevel@tonic-gate  * falls back to using small pages if page_create_va_large() fails.
78*7c478bd9Sstevel@tonic-gate  */
79*7c478bd9Sstevel@tonic-gate 
80*7c478bd9Sstevel@tonic-gate #define	HOLD_DHP_LOCK(dhp)  if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
81*7c478bd9Sstevel@tonic-gate 			{ mutex_enter(&dhp->dh_lock); }
82*7c478bd9Sstevel@tonic-gate 
83*7c478bd9Sstevel@tonic-gate #define	RELE_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
84*7c478bd9Sstevel@tonic-gate 			{ mutex_exit(&dhp->dh_lock); }
85*7c478bd9Sstevel@tonic-gate 
86*7c478bd9Sstevel@tonic-gate #define	FROM_LPG(pp) (pp->p_szc != 0)
87*7c478bd9Sstevel@tonic-gate #define	PFIND(pp) (page_pptonum(pp) & (pmem_pgcnt - 1))
88*7c478bd9Sstevel@tonic-gate 
89*7c478bd9Sstevel@tonic-gate /*
90*7c478bd9Sstevel@tonic-gate  * Structs and static variables used for pmem only.
91*7c478bd9Sstevel@tonic-gate  */
92*7c478bd9Sstevel@tonic-gate typedef struct pmem_lpg {
93*7c478bd9Sstevel@tonic-gate 	page_t	*pl_pp;		/* start pp */
94*7c478bd9Sstevel@tonic-gate 	ulong_t	*pl_bitmap;	/* allocation status for each page */
95*7c478bd9Sstevel@tonic-gate 	ushort_t pl_pfree;	/* this large page might be fully freed */
96*7c478bd9Sstevel@tonic-gate 	struct pmem_lpg *pl_next;
97*7c478bd9Sstevel@tonic-gate 	struct pmem_lpg *pl_prev;
98*7c478bd9Sstevel@tonic-gate } pmem_lpg_t;
99*7c478bd9Sstevel@tonic-gate 
100*7c478bd9Sstevel@tonic-gate static size_t	pmem_lpgsize;	/* the size of one large page */
101*7c478bd9Sstevel@tonic-gate static pgcnt_t	pmem_pgcnt;	/* the number of small pages in a large page */
102*7c478bd9Sstevel@tonic-gate static uint_t	pmem_lszc;	/* page size code of the large page */
103*7c478bd9Sstevel@tonic-gate /* The segment to be associated with all the allocated pages. */
104*7c478bd9Sstevel@tonic-gate static struct seg	pmem_seg;
105*7c478bd9Sstevel@tonic-gate /* Fully occupied large pages allocated for pmem. */
106*7c478bd9Sstevel@tonic-gate static pmem_lpg_t *pmem_occ_lpgs;
107*7c478bd9Sstevel@tonic-gate /* Memory pool to store residual small pages from large pages. */
108*7c478bd9Sstevel@tonic-gate static page_t	*pmem_mpool = NULL;
109*7c478bd9Sstevel@tonic-gate /* Number of small pages reside in pmem_mpool currently. */
110*7c478bd9Sstevel@tonic-gate static pgcnt_t	pmem_nmpages = 0;
111*7c478bd9Sstevel@tonic-gate /* To protect pmem_nmpages, pmem_mpool and pmem_occ_lpgs. */
112*7c478bd9Sstevel@tonic-gate kmutex_t	pmem_mutex;
113*7c478bd9Sstevel@tonic-gate 
114*7c478bd9Sstevel@tonic-gate static int lpg_isfree(pmem_lpg_t *);
115*7c478bd9Sstevel@tonic-gate static void pmem_lpg_sub(pmem_lpg_t **, pmem_lpg_t *);
116*7c478bd9Sstevel@tonic-gate static void pmem_lpg_concat(pmem_lpg_t **, pmem_lpg_t **);
117*7c478bd9Sstevel@tonic-gate static pmem_lpg_t *pmem_lpg_get(pmem_lpg_t *, page_t *, pmem_lpg_t **);
118*7c478bd9Sstevel@tonic-gate static pmem_lpg_t *pmem_lpg_alloc(uint_t);
119*7c478bd9Sstevel@tonic-gate static void pmem_lpg_free(pmem_lpg_t **, pmem_lpg_t *);
120*7c478bd9Sstevel@tonic-gate static void lpg_free(page_t *spp);
121*7c478bd9Sstevel@tonic-gate static pgcnt_t mpool_break(page_t **, pgcnt_t);
122*7c478bd9Sstevel@tonic-gate static void mpool_append(page_t **, pgcnt_t);
123*7c478bd9Sstevel@tonic-gate static void lpp_break(page_t **, pgcnt_t, pgcnt_t, pmem_lpg_t *);
124*7c478bd9Sstevel@tonic-gate static void lpp_free(page_t *, pgcnt_t, pmem_lpg_t **);
125*7c478bd9Sstevel@tonic-gate static int lpp_create(page_t **, pgcnt_t, pgcnt_t *, pmem_lpg_t **,
126*7c478bd9Sstevel@tonic-gate     vnode_t *, u_offset_t *, uint_t);
127*7c478bd9Sstevel@tonic-gate static void tlist_in(page_t *, pgcnt_t, vnode_t *, u_offset_t *);
128*7c478bd9Sstevel@tonic-gate static void tlist_out(page_t *, pgcnt_t);
129*7c478bd9Sstevel@tonic-gate static int pmem_cookie_alloc(struct devmap_pmem_cookie **, pgcnt_t, uint_t);
130*7c478bd9Sstevel@tonic-gate static int pmem_lock(pgcnt_t, kproject_t **);
131*7c478bd9Sstevel@tonic-gate 
132*7c478bd9Sstevel@tonic-gate /*
133*7c478bd9Sstevel@tonic-gate  * Called by driver devmap routine to pass physical memory mapping info to
134*7c478bd9Sstevel@tonic-gate  * seg_dev framework, used only for physical memory allocated from
135*7c478bd9Sstevel@tonic-gate  * devmap_pmem_alloc().
136*7c478bd9Sstevel@tonic-gate  */
137*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
138*7c478bd9Sstevel@tonic-gate int
139*7c478bd9Sstevel@tonic-gate devmap_pmem_setup(devmap_cookie_t dhc, dev_info_t *dip,
140*7c478bd9Sstevel@tonic-gate     struct devmap_callback_ctl *callbackops, devmap_pmem_cookie_t cookie,
141*7c478bd9Sstevel@tonic-gate     offset_t off, size_t len, uint_t maxprot, uint_t flags,
142*7c478bd9Sstevel@tonic-gate     ddi_device_acc_attr_t *accattrp)
143*7c478bd9Sstevel@tonic-gate {
144*7c478bd9Sstevel@tonic-gate 	devmap_handle_t *dhp = (devmap_handle_t *)dhc;
145*7c478bd9Sstevel@tonic-gate 	struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie;
146*7c478bd9Sstevel@tonic-gate 
147*7c478bd9Sstevel@tonic-gate 	if (pcp == NULL || (off + len) > ptob(pcp->dp_npages))
148*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
149*7c478bd9Sstevel@tonic-gate 
150*7c478bd9Sstevel@tonic-gate 	/*
151*7c478bd9Sstevel@tonic-gate 	 * First to check if this function has been called for this dhp.
152*7c478bd9Sstevel@tonic-gate 	 */
153*7c478bd9Sstevel@tonic-gate 	if (dhp->dh_flags & DEVMAP_SETUP_DONE)
154*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
155*7c478bd9Sstevel@tonic-gate 
156*7c478bd9Sstevel@tonic-gate 	if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
157*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
158*7c478bd9Sstevel@tonic-gate 
159*7c478bd9Sstevel@tonic-gate 	if (flags & DEVMAP_MAPPING_INVALID) {
160*7c478bd9Sstevel@tonic-gate 		/*
161*7c478bd9Sstevel@tonic-gate 		 * If DEVMAP_MAPPING_INVALID is specified, we have to grant
162*7c478bd9Sstevel@tonic-gate 		 * remap permission.
163*7c478bd9Sstevel@tonic-gate 		 */
164*7c478bd9Sstevel@tonic-gate 		if (!(flags & DEVMAP_ALLOW_REMAP))
165*7c478bd9Sstevel@tonic-gate 			return (DDI_FAILURE);
166*7c478bd9Sstevel@tonic-gate 	} else {
167*7c478bd9Sstevel@tonic-gate 		dhp->dh_pcookie = (devmap_pmem_cookie_t)pcp;
168*7c478bd9Sstevel@tonic-gate 		/* dh_roff is the offset inside the dh_pcookie. */
169*7c478bd9Sstevel@tonic-gate 		dhp->dh_roff = ptob(btop(off));
170*7c478bd9Sstevel@tonic-gate 	}
171*7c478bd9Sstevel@tonic-gate 
172*7c478bd9Sstevel@tonic-gate 	/*
173*7c478bd9Sstevel@tonic-gate 	 * Only "No Cache" and "Write Combining" are supported. If any other
174*7c478bd9Sstevel@tonic-gate 	 * cache type is specified, override with "No Cache".
175*7c478bd9Sstevel@tonic-gate 	 */
176*7c478bd9Sstevel@tonic-gate 	if (accattrp->devacc_attr_dataorder == DDI_MERGING_OK_ACC)
177*7c478bd9Sstevel@tonic-gate 		dhp->dh_hat_attr = HAT_PLAT_NOCACHE | HAT_MERGING_OK;
178*7c478bd9Sstevel@tonic-gate 	else
179*7c478bd9Sstevel@tonic-gate 		dhp->dh_hat_attr = HAT_PLAT_NOCACHE | HAT_STRICTORDER;
180*7c478bd9Sstevel@tonic-gate 	dhp->dh_cookie = DEVMAP_PMEM_COOKIE;
181*7c478bd9Sstevel@tonic-gate 	dhp->dh_flags |= (flags & DEVMAP_SETUP_FLAGS);
182*7c478bd9Sstevel@tonic-gate 	dhp->dh_len = ptob(btopr(len));
183*7c478bd9Sstevel@tonic-gate 
184*7c478bd9Sstevel@tonic-gate 	dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
185*7c478bd9Sstevel@tonic-gate 	ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
186*7c478bd9Sstevel@tonic-gate 
187*7c478bd9Sstevel@tonic-gate 	if (callbackops != NULL) {
188*7c478bd9Sstevel@tonic-gate 		bcopy(callbackops, &dhp->dh_callbackops,
189*7c478bd9Sstevel@tonic-gate 		    sizeof (struct devmap_callback_ctl));
190*7c478bd9Sstevel@tonic-gate 	}
191*7c478bd9Sstevel@tonic-gate 
192*7c478bd9Sstevel@tonic-gate 	/*
193*7c478bd9Sstevel@tonic-gate 	 * Initialize dh_lock if we want to do remap.
194*7c478bd9Sstevel@tonic-gate 	 */
195*7c478bd9Sstevel@tonic-gate 	if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) {
196*7c478bd9Sstevel@tonic-gate 		mutex_init(&dhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
197*7c478bd9Sstevel@tonic-gate 		dhp->dh_flags |= DEVMAP_LOCK_INITED;
198*7c478bd9Sstevel@tonic-gate 	}
199*7c478bd9Sstevel@tonic-gate 
200*7c478bd9Sstevel@tonic-gate 	dhp->dh_flags |= DEVMAP_SETUP_DONE;
201*7c478bd9Sstevel@tonic-gate 
202*7c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
203*7c478bd9Sstevel@tonic-gate }
204*7c478bd9Sstevel@tonic-gate 
205*7c478bd9Sstevel@tonic-gate /*
206*7c478bd9Sstevel@tonic-gate  * Replace existing mapping using a new cookie, mainly gets called when doing
207*7c478bd9Sstevel@tonic-gate  * fork(). Should be called in associated devmap_dup(9E).
208*7c478bd9Sstevel@tonic-gate  */
209*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
210*7c478bd9Sstevel@tonic-gate int
211*7c478bd9Sstevel@tonic-gate devmap_pmem_remap(devmap_cookie_t dhc, dev_info_t *dip,
212*7c478bd9Sstevel@tonic-gate     devmap_pmem_cookie_t cookie, offset_t off, size_t len, uint_t maxprot,
213*7c478bd9Sstevel@tonic-gate     uint_t flags, ddi_device_acc_attr_t *accattrp)
214*7c478bd9Sstevel@tonic-gate {
215*7c478bd9Sstevel@tonic-gate 	devmap_handle_t *dhp = (devmap_handle_t *)dhc;
216*7c478bd9Sstevel@tonic-gate 	struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie;
217*7c478bd9Sstevel@tonic-gate 
218*7c478bd9Sstevel@tonic-gate 	/*
219*7c478bd9Sstevel@tonic-gate 	 * Reture failure if setup has not been done or no remap permission
220*7c478bd9Sstevel@tonic-gate 	 * has been granted during the setup.
221*7c478bd9Sstevel@tonic-gate 	 */
222*7c478bd9Sstevel@tonic-gate 	if ((dhp->dh_flags & DEVMAP_SETUP_DONE) == 0 ||
223*7c478bd9Sstevel@tonic-gate 	    (dhp->dh_flags & DEVMAP_ALLOW_REMAP) == 0)
224*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
225*7c478bd9Sstevel@tonic-gate 
226*7c478bd9Sstevel@tonic-gate 	/* No flags supported for remap yet. */
227*7c478bd9Sstevel@tonic-gate 	if (flags != 0)
228*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
229*7c478bd9Sstevel@tonic-gate 
230*7c478bd9Sstevel@tonic-gate 	if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
231*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
232*7c478bd9Sstevel@tonic-gate 
233*7c478bd9Sstevel@tonic-gate 	if (pcp == NULL || (off + len) > ptob(pcp->dp_npages))
234*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
235*7c478bd9Sstevel@tonic-gate 
236*7c478bd9Sstevel@tonic-gate 	HOLD_DHP_LOCK(dhp);
237*7c478bd9Sstevel@tonic-gate 	/*
238*7c478bd9Sstevel@tonic-gate 	 * Unload the old mapping of pages reloated with this dhp, so next
239*7c478bd9Sstevel@tonic-gate 	 * fault will setup the new mappings. It is in segdev_faultpage that
240*7c478bd9Sstevel@tonic-gate 	 * calls hat_devload to establish the mapping. Do this while holding
241*7c478bd9Sstevel@tonic-gate 	 * the dhp lock so other faults dont reestablish the mappings.
242*7c478bd9Sstevel@tonic-gate 	 */
243*7c478bd9Sstevel@tonic-gate 	hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
244*7c478bd9Sstevel@tonic-gate 	    dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
245*7c478bd9Sstevel@tonic-gate 
246*7c478bd9Sstevel@tonic-gate 	/*
247*7c478bd9Sstevel@tonic-gate 	 * Only "No Cache" and "Write Combining" are supported, if other cache
248*7c478bd9Sstevel@tonic-gate 	 * type is specified, override with "No Cache".
249*7c478bd9Sstevel@tonic-gate 	 */
250*7c478bd9Sstevel@tonic-gate 	if (accattrp->devacc_attr_dataorder == DDI_MERGING_OK_ACC)
251*7c478bd9Sstevel@tonic-gate 		dhp->dh_hat_attr = HAT_MERGING_OK;
252*7c478bd9Sstevel@tonic-gate 	else
253*7c478bd9Sstevel@tonic-gate 		dhp->dh_hat_attr = HAT_STRICTORDER;
254*7c478bd9Sstevel@tonic-gate 	dhp->dh_pcookie = cookie;
255*7c478bd9Sstevel@tonic-gate 	dhp->dh_roff = ptob(btop(off));
256*7c478bd9Sstevel@tonic-gate 	dhp->dh_len = ptob(btopr(len));
257*7c478bd9Sstevel@tonic-gate 
258*7c478bd9Sstevel@tonic-gate 	/* Clear the large page size flag. */
259*7c478bd9Sstevel@tonic-gate 	dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
260*7c478bd9Sstevel@tonic-gate 
261*7c478bd9Sstevel@tonic-gate 	dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
262*7c478bd9Sstevel@tonic-gate 	ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
263*7c478bd9Sstevel@tonic-gate 	RELE_DHP_LOCK(dhp);
264*7c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
265*7c478bd9Sstevel@tonic-gate }
266*7c478bd9Sstevel@tonic-gate 
267*7c478bd9Sstevel@tonic-gate /*
268*7c478bd9Sstevel@tonic-gate  * Directly (i.e., without occupying kernel virtual address space) allocate
269*7c478bd9Sstevel@tonic-gate  * 'npages' physical memory pages for exporting to user land. The allocated
270*7c478bd9Sstevel@tonic-gate  * page_t pointer will be recorded in cookie.
271*7c478bd9Sstevel@tonic-gate  */
272*7c478bd9Sstevel@tonic-gate int
273*7c478bd9Sstevel@tonic-gate devmap_pmem_alloc(size_t size, uint_t flags, devmap_pmem_cookie_t *cookiep)
274*7c478bd9Sstevel@tonic-gate {
275*7c478bd9Sstevel@tonic-gate 	u_offset_t	pmem_off = 0;
276*7c478bd9Sstevel@tonic-gate 	page_t		*pp = NULL;
277*7c478bd9Sstevel@tonic-gate 	page_t		*lpp = NULL;
278*7c478bd9Sstevel@tonic-gate 	page_t		*tlist = NULL;
279*7c478bd9Sstevel@tonic-gate 	pgcnt_t		i = 0;
280*7c478bd9Sstevel@tonic-gate 	pgcnt_t		rpages = 0;
281*7c478bd9Sstevel@tonic-gate 	pgcnt_t		lpages = 0;
282*7c478bd9Sstevel@tonic-gate 	pgcnt_t		tpages = 0;
283*7c478bd9Sstevel@tonic-gate 	pgcnt_t		npages = btopr(size);
284*7c478bd9Sstevel@tonic-gate 	pmem_lpg_t	*plp = NULL;
285*7c478bd9Sstevel@tonic-gate 	struct devmap_pmem_cookie	*pcp;
286*7c478bd9Sstevel@tonic-gate 	uint_t		reserved = 0;
287*7c478bd9Sstevel@tonic-gate 	uint_t		locked = 0;
288*7c478bd9Sstevel@tonic-gate 	uint_t		pflags, kflags;
289*7c478bd9Sstevel@tonic-gate 
290*7c478bd9Sstevel@tonic-gate 	*cookiep = NULL;
291*7c478bd9Sstevel@tonic-gate 
292*7c478bd9Sstevel@tonic-gate 	/*
293*7c478bd9Sstevel@tonic-gate 	 * Number larger than this will cause page_create_va() to loop
294*7c478bd9Sstevel@tonic-gate 	 * infinitely.
295*7c478bd9Sstevel@tonic-gate 	 */
296*7c478bd9Sstevel@tonic-gate 	if (npages == 0 || npages >= total_pages / 2)
297*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
298*7c478bd9Sstevel@tonic-gate 	if ((flags & (PMEM_SLEEP | PMEM_NOSLEEP)) == 0)
299*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
300*7c478bd9Sstevel@tonic-gate 	pflags = flags & PMEM_NOSLEEP ? PG_EXCL : PG_WAIT;
301*7c478bd9Sstevel@tonic-gate 	kflags = flags & PMEM_NOSLEEP ? KM_NOSLEEP : KM_SLEEP;
302*7c478bd9Sstevel@tonic-gate 
303*7c478bd9Sstevel@tonic-gate 	/* Allocate pmem cookie. */
304*7c478bd9Sstevel@tonic-gate 	if (pmem_cookie_alloc(&pcp, npages, kflags) == DDI_FAILURE)
305*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
306*7c478bd9Sstevel@tonic-gate 	pcp->dp_npages = npages;
307*7c478bd9Sstevel@tonic-gate 
308*7c478bd9Sstevel@tonic-gate 	/*
309*7c478bd9Sstevel@tonic-gate 	 * See if the requested memory can be locked. Currently we do resource
310*7c478bd9Sstevel@tonic-gate 	 * controls on the project levlel only.
311*7c478bd9Sstevel@tonic-gate 	 */
312*7c478bd9Sstevel@tonic-gate 	if (pmem_lock(npages, &(pcp->dp_projp)) == DDI_FAILURE)
313*7c478bd9Sstevel@tonic-gate 		goto alloc_fail;
314*7c478bd9Sstevel@tonic-gate 	locked = 1;
315*7c478bd9Sstevel@tonic-gate 
316*7c478bd9Sstevel@tonic-gate 	/*
317*7c478bd9Sstevel@tonic-gate 	 * First, grab as many as possible from pmem_mpool. If pages in
318*7c478bd9Sstevel@tonic-gate 	 * pmem_mpool are enough for this request, we are done.
319*7c478bd9Sstevel@tonic-gate 	 */
320*7c478bd9Sstevel@tonic-gate 	mutex_enter(&pmem_mutex);
321*7c478bd9Sstevel@tonic-gate 	tpages = mpool_break(&tlist, npages);
322*7c478bd9Sstevel@tonic-gate 	/* IOlock and hashin them into the new offset. */
323*7c478bd9Sstevel@tonic-gate 	if (tpages)
324*7c478bd9Sstevel@tonic-gate 		tlist_in(tlist, tpages, pcp->dp_vnp, &pmem_off);
325*7c478bd9Sstevel@tonic-gate 	mutex_exit(&pmem_mutex);
326*7c478bd9Sstevel@tonic-gate 
327*7c478bd9Sstevel@tonic-gate 	if (tpages == npages)
328*7c478bd9Sstevel@tonic-gate 		goto done;
329*7c478bd9Sstevel@tonic-gate 
330*7c478bd9Sstevel@tonic-gate 	rpages = npages - tpages;
331*7c478bd9Sstevel@tonic-gate 	/* Quit now if memory cannot be reserved. */
332*7c478bd9Sstevel@tonic-gate 	if (!page_resv(rpages, kflags))
333*7c478bd9Sstevel@tonic-gate 		goto alloc_fail;
334*7c478bd9Sstevel@tonic-gate 	reserved = 1;
335*7c478bd9Sstevel@tonic-gate 
336*7c478bd9Sstevel@tonic-gate 	/* Try to allocate large pages first to decrease fragmentation. */
337*7c478bd9Sstevel@tonic-gate 	i = (rpages + (pmem_pgcnt - 1)) / pmem_pgcnt;
338*7c478bd9Sstevel@tonic-gate 	if (lpp_create(&lpp, i, &lpages, &plp, pcp->dp_vnp, &pmem_off,
339*7c478bd9Sstevel@tonic-gate 	    kflags) == DDI_FAILURE)
340*7c478bd9Sstevel@tonic-gate 		goto alloc_fail;
341*7c478bd9Sstevel@tonic-gate 	ASSERT(lpages == 0 ? lpp == NULL : 1);
342*7c478bd9Sstevel@tonic-gate 
343*7c478bd9Sstevel@tonic-gate 	/*
344*7c478bd9Sstevel@tonic-gate 	 * Pages in large pages is more than the request, put the residual
345*7c478bd9Sstevel@tonic-gate 	 * pages into pmem_mpool.
346*7c478bd9Sstevel@tonic-gate 	 */
347*7c478bd9Sstevel@tonic-gate 	if (lpages >= rpages) {
348*7c478bd9Sstevel@tonic-gate 		lpp_break(&lpp, lpages, lpages - rpages, plp);
349*7c478bd9Sstevel@tonic-gate 		goto done;
350*7c478bd9Sstevel@tonic-gate 	}
351*7c478bd9Sstevel@tonic-gate 
352*7c478bd9Sstevel@tonic-gate 	/* Allocate small pages if lpp+tlist cannot satisfy the request. */
353*7c478bd9Sstevel@tonic-gate 	i =  rpages - lpages;
354*7c478bd9Sstevel@tonic-gate 	if ((pp = page_create_va(pcp->dp_vnp, pmem_off, ptob(i),
355*7c478bd9Sstevel@tonic-gate 	    pflags, &pmem_seg, (caddr_t)pmem_off)) == NULL)
356*7c478bd9Sstevel@tonic-gate 		goto alloc_fail;
357*7c478bd9Sstevel@tonic-gate 
358*7c478bd9Sstevel@tonic-gate done:
359*7c478bd9Sstevel@tonic-gate 	page_list_concat(&tlist, &lpp);
360*7c478bd9Sstevel@tonic-gate 	page_list_concat(&tlist, &pp);
361*7c478bd9Sstevel@tonic-gate 	/* Set those small pages from large pages as allocated. */
362*7c478bd9Sstevel@tonic-gate 	mutex_enter(&pmem_mutex);
363*7c478bd9Sstevel@tonic-gate 	pmem_lpg_concat(&pmem_occ_lpgs, &plp);
364*7c478bd9Sstevel@tonic-gate 	mutex_exit(&pmem_mutex);
365*7c478bd9Sstevel@tonic-gate 
366*7c478bd9Sstevel@tonic-gate 	/*
367*7c478bd9Sstevel@tonic-gate 	 * Now tlist holds all the pages for this cookie. Record these pages in
368*7c478bd9Sstevel@tonic-gate 	 * pmem cookie.
369*7c478bd9Sstevel@tonic-gate 	 */
370*7c478bd9Sstevel@tonic-gate 	for (pp = tlist, i = 0; i < npages; i++) {
371*7c478bd9Sstevel@tonic-gate 		pcp->dp_pparray[i] = pp;
372*7c478bd9Sstevel@tonic-gate 		page_io_unlock(pp);
373*7c478bd9Sstevel@tonic-gate 		pp = pp->p_next;
374*7c478bd9Sstevel@tonic-gate 		page_sub(&tlist, pp->p_prev);
375*7c478bd9Sstevel@tonic-gate 	}
376*7c478bd9Sstevel@tonic-gate 	ASSERT(tlist == NULL);
377*7c478bd9Sstevel@tonic-gate 	*cookiep = (devmap_pmem_cookie_t)pcp;
378*7c478bd9Sstevel@tonic-gate 
379*7c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
380*7c478bd9Sstevel@tonic-gate 
381*7c478bd9Sstevel@tonic-gate alloc_fail:
382*7c478bd9Sstevel@tonic-gate 	DTRACE_PROBE(pmem__alloc__fail);
383*7c478bd9Sstevel@tonic-gate 	/* Free large pages and the associated allocation records. */
384*7c478bd9Sstevel@tonic-gate 	if (lpp)
385*7c478bd9Sstevel@tonic-gate 		lpp_free(lpp, lpages / pmem_pgcnt, &plp);
386*7c478bd9Sstevel@tonic-gate 	if (reserved == 1)
387*7c478bd9Sstevel@tonic-gate 		page_unresv(rpages);
388*7c478bd9Sstevel@tonic-gate 	/* Put those pages in tlist back into pmem_mpool. */
389*7c478bd9Sstevel@tonic-gate 	if (tpages != 0) {
390*7c478bd9Sstevel@tonic-gate 		mutex_enter(&pmem_mutex);
391*7c478bd9Sstevel@tonic-gate 		/* IOunlock, hashout and update the allocation records. */
392*7c478bd9Sstevel@tonic-gate 		tlist_out(tlist, tpages);
393*7c478bd9Sstevel@tonic-gate 		mpool_append(&tlist, tpages);
394*7c478bd9Sstevel@tonic-gate 		mutex_exit(&pmem_mutex);
395*7c478bd9Sstevel@tonic-gate 	}
396*7c478bd9Sstevel@tonic-gate 	if (locked == 1)
397*7c478bd9Sstevel@tonic-gate 		i_ddi_decr_locked_memory(NULL, NULL, pcp->dp_projp, NULL,
398*7c478bd9Sstevel@tonic-gate 		    ptob(pcp->dp_npages));
399*7c478bd9Sstevel@tonic-gate 	/* Freeing pmem_cookie. */
400*7c478bd9Sstevel@tonic-gate 	kmem_free(pcp->dp_vnp, sizeof (vnode_t));
401*7c478bd9Sstevel@tonic-gate 	kmem_free(pcp->dp_pparray, npages * sizeof (page_t *));
402*7c478bd9Sstevel@tonic-gate 	kmem_free(pcp, sizeof (struct devmap_pmem_cookie));
403*7c478bd9Sstevel@tonic-gate 	return (DDI_FAILURE);
404*7c478bd9Sstevel@tonic-gate }
405*7c478bd9Sstevel@tonic-gate 
406*7c478bd9Sstevel@tonic-gate /*
407*7c478bd9Sstevel@tonic-gate  * Free all small pages inside cookie, and return pages from large pages into
408*7c478bd9Sstevel@tonic-gate  * mpool, if all the pages from one large page is in mpool, free it as a whole.
409*7c478bd9Sstevel@tonic-gate  */
410*7c478bd9Sstevel@tonic-gate void
411*7c478bd9Sstevel@tonic-gate devmap_pmem_free(devmap_pmem_cookie_t cookie)
412*7c478bd9Sstevel@tonic-gate {
413*7c478bd9Sstevel@tonic-gate 	struct	devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie;
414*7c478bd9Sstevel@tonic-gate 	pgcnt_t		i;
415*7c478bd9Sstevel@tonic-gate 	pgcnt_t		tpages = 0;
416*7c478bd9Sstevel@tonic-gate 	page_t		*pp;
417*7c478bd9Sstevel@tonic-gate 	pmem_lpg_t 	*pl1, *plp;
418*7c478bd9Sstevel@tonic-gate 	pmem_lpg_t	*pf_lpgs = NULL;
419*7c478bd9Sstevel@tonic-gate 	uint_t		npls = 0;
420*7c478bd9Sstevel@tonic-gate 	pmem_lpg_t *last_pl = NULL;
421*7c478bd9Sstevel@tonic-gate 	pmem_lpg_t *plast_pl = NULL;
422*7c478bd9Sstevel@tonic-gate 
423*7c478bd9Sstevel@tonic-gate 	ASSERT(pcp);
424*7c478bd9Sstevel@tonic-gate 	mutex_enter(&pmem_mutex);
425*7c478bd9Sstevel@tonic-gate 	/* Free small pages and return them to memory pool. */
426*7c478bd9Sstevel@tonic-gate 	for (i = pcp->dp_npages; i > 0; i--) {
427*7c478bd9Sstevel@tonic-gate 		pp = pcp->dp_pparray[i - 1];
428*7c478bd9Sstevel@tonic-gate 		page_hashout(pp, NULL);
429*7c478bd9Sstevel@tonic-gate 		/*
430*7c478bd9Sstevel@tonic-gate 		 * Remove the mapping of this single page, this mapping is
431*7c478bd9Sstevel@tonic-gate 		 * created using hat_devload() in segdev_faultpage().
432*7c478bd9Sstevel@tonic-gate 		 */
433*7c478bd9Sstevel@tonic-gate 		(void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
434*7c478bd9Sstevel@tonic-gate 		if (!FROM_LPG(pp)) {
435*7c478bd9Sstevel@tonic-gate 			/* Normal small page. */
436*7c478bd9Sstevel@tonic-gate 			page_free(pp, 1);
437*7c478bd9Sstevel@tonic-gate 			page_unresv(1);
438*7c478bd9Sstevel@tonic-gate 		} else {
439*7c478bd9Sstevel@tonic-gate 			/* Small page from large pages. */
440*7c478bd9Sstevel@tonic-gate 			plp = pmem_lpg_get(pmem_occ_lpgs, pp, &last_pl);
441*7c478bd9Sstevel@tonic-gate 			if (plp && !(plp->pl_pfree)) {
442*7c478bd9Sstevel@tonic-gate 				/*
443*7c478bd9Sstevel@tonic-gate 				 * Move this record to pf_lpgs list, this large
444*7c478bd9Sstevel@tonic-gate 				 * page may be able to be freed as a whole.
445*7c478bd9Sstevel@tonic-gate 				 */
446*7c478bd9Sstevel@tonic-gate 				pmem_lpg_sub(&pmem_occ_lpgs, plp);
447*7c478bd9Sstevel@tonic-gate 				pmem_lpg_concat(&pf_lpgs, &plp);
448*7c478bd9Sstevel@tonic-gate 				plp->pl_pfree = 1;
449*7c478bd9Sstevel@tonic-gate 				npls++;
450*7c478bd9Sstevel@tonic-gate 				last_pl = NULL;
451*7c478bd9Sstevel@tonic-gate 			} else {
452*7c478bd9Sstevel@tonic-gate 				/* Search in pf_lpgs list. */
453*7c478bd9Sstevel@tonic-gate 				plp = pmem_lpg_get(pf_lpgs, pp, &plast_pl);
454*7c478bd9Sstevel@tonic-gate 			}
455*7c478bd9Sstevel@tonic-gate 			ASSERT(plp);
456*7c478bd9Sstevel@tonic-gate 			/* Mark this page as free. */
457*7c478bd9Sstevel@tonic-gate 			BT_SET(plp->pl_bitmap, PFIND(pp));
458*7c478bd9Sstevel@tonic-gate 			/* Record this page in pmem_mpool. */
459*7c478bd9Sstevel@tonic-gate 			mpool_append(&pp, 1);
460*7c478bd9Sstevel@tonic-gate 		}
461*7c478bd9Sstevel@tonic-gate 	}
462*7c478bd9Sstevel@tonic-gate 
463*7c478bd9Sstevel@tonic-gate 	/*
464*7c478bd9Sstevel@tonic-gate 	 * Find out the large pages whose pages have been freed, remove them
465*7c478bd9Sstevel@tonic-gate 	 * from plp list, free them and the associated pmem_lpg struct.
466*7c478bd9Sstevel@tonic-gate 	 */
467*7c478bd9Sstevel@tonic-gate 	for (plp = pf_lpgs; npls != 0; npls--) {
468*7c478bd9Sstevel@tonic-gate 		pl1 = plp;
469*7c478bd9Sstevel@tonic-gate 		plp = plp->pl_next;
470*7c478bd9Sstevel@tonic-gate 		if (lpg_isfree(pl1)) {
471*7c478bd9Sstevel@tonic-gate 			/*
472*7c478bd9Sstevel@tonic-gate 			 * Get one free large page.  Find all pages in this
473*7c478bd9Sstevel@tonic-gate 			 * large page and remove them from pmem_mpool.
474*7c478bd9Sstevel@tonic-gate 			 */
475*7c478bd9Sstevel@tonic-gate 			lpg_free(pl1->pl_pp);
476*7c478bd9Sstevel@tonic-gate 			/* Remove associated allocation records. */
477*7c478bd9Sstevel@tonic-gate 			pmem_lpg_sub(&pf_lpgs, pl1);
478*7c478bd9Sstevel@tonic-gate 			pmem_lpg_free(&pf_lpgs, pl1);
479*7c478bd9Sstevel@tonic-gate 			tpages -= pmem_pgcnt;
480*7c478bd9Sstevel@tonic-gate 		} else
481*7c478bd9Sstevel@tonic-gate 			pl1->pl_pfree = 0;
482*7c478bd9Sstevel@tonic-gate 	}
483*7c478bd9Sstevel@tonic-gate 	/* Update allocation records accordingly. */
484*7c478bd9Sstevel@tonic-gate 	pmem_lpg_concat(&pmem_occ_lpgs, &pf_lpgs);
485*7c478bd9Sstevel@tonic-gate 	mutex_exit(&pmem_mutex);
486*7c478bd9Sstevel@tonic-gate 
487*7c478bd9Sstevel@tonic-gate 	i_ddi_decr_locked_memory(NULL, NULL, (kproject_t *)pcp->dp_projp, NULL,
488*7c478bd9Sstevel@tonic-gate 	    ptob(pcp->dp_npages));
489*7c478bd9Sstevel@tonic-gate 	kmem_free(pcp->dp_vnp, sizeof (vnode_t));
490*7c478bd9Sstevel@tonic-gate 	kmem_free(pcp->dp_pparray, pcp->dp_npages * sizeof (page_t *));
491*7c478bd9Sstevel@tonic-gate 	kmem_free(pcp, sizeof (struct devmap_pmem_cookie));
492*7c478bd9Sstevel@tonic-gate }
493*7c478bd9Sstevel@tonic-gate 
494*7c478bd9Sstevel@tonic-gate /*
495*7c478bd9Sstevel@tonic-gate  * To extract page frame number from specified range in a cookie.
496*7c478bd9Sstevel@tonic-gate  */
497*7c478bd9Sstevel@tonic-gate int
498*7c478bd9Sstevel@tonic-gate devmap_pmem_getpfns(devmap_pmem_cookie_t cookie, uint_t start, pgcnt_t npages,
499*7c478bd9Sstevel@tonic-gate     pfn_t *pfnarray)
500*7c478bd9Sstevel@tonic-gate {
501*7c478bd9Sstevel@tonic-gate 	struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie;
502*7c478bd9Sstevel@tonic-gate 	pgcnt_t i;
503*7c478bd9Sstevel@tonic-gate 
504*7c478bd9Sstevel@tonic-gate 	if (pcp == NULL || start + npages > pcp->dp_npages)
505*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
506*7c478bd9Sstevel@tonic-gate 
507*7c478bd9Sstevel@tonic-gate 	for (i = start; i < start + npages; i++)
508*7c478bd9Sstevel@tonic-gate 		pfnarray[i - start] = pcp->dp_pparray[i]->p_pagenum;
509*7c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
510*7c478bd9Sstevel@tonic-gate }
511*7c478bd9Sstevel@tonic-gate 
512*7c478bd9Sstevel@tonic-gate void
513*7c478bd9Sstevel@tonic-gate pmem_init()
514*7c478bd9Sstevel@tonic-gate {
515*7c478bd9Sstevel@tonic-gate 	mutex_init(&pmem_mutex, NULL, MUTEX_DEFAULT, NULL);
516*7c478bd9Sstevel@tonic-gate 	pmem_lszc = MIN(1, page_num_pagesizes() - 1);
517*7c478bd9Sstevel@tonic-gate 	pmem_lpgsize = page_get_pagesize(pmem_lszc);
518*7c478bd9Sstevel@tonic-gate 	pmem_pgcnt = pmem_lpgsize >> PAGESHIFT;
519*7c478bd9Sstevel@tonic-gate 	bzero(&pmem_seg, sizeof (struct seg));
520*7c478bd9Sstevel@tonic-gate 	pmem_seg.s_as = &kas;
521*7c478bd9Sstevel@tonic-gate }
522*7c478bd9Sstevel@tonic-gate 
523*7c478bd9Sstevel@tonic-gate /* Allocate kernel memory for one pmem cookie with n pages. */
524*7c478bd9Sstevel@tonic-gate static int
525*7c478bd9Sstevel@tonic-gate pmem_cookie_alloc(struct devmap_pmem_cookie **pcpp, pgcnt_t n, uint_t kflags)
526*7c478bd9Sstevel@tonic-gate {
527*7c478bd9Sstevel@tonic-gate 	struct devmap_pmem_cookie *pcp;
528*7c478bd9Sstevel@tonic-gate 
529*7c478bd9Sstevel@tonic-gate 	if ((*pcpp = kmem_zalloc(sizeof (struct devmap_pmem_cookie),
530*7c478bd9Sstevel@tonic-gate 	    kflags)) == NULL)
531*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
532*7c478bd9Sstevel@tonic-gate 	pcp = *pcpp;
533*7c478bd9Sstevel@tonic-gate 	if ((pcp->dp_vnp =
534*7c478bd9Sstevel@tonic-gate 	    kmem_zalloc(sizeof (vnode_t), kflags)) == NULL) {
535*7c478bd9Sstevel@tonic-gate 		kmem_free(pcp, sizeof (struct devmap_pmem_cookie));
536*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
537*7c478bd9Sstevel@tonic-gate 	}
538*7c478bd9Sstevel@tonic-gate 	if ((pcp->dp_pparray =
539*7c478bd9Sstevel@tonic-gate 	    kmem_zalloc(n * sizeof (page_t *), kflags)) == NULL) {
540*7c478bd9Sstevel@tonic-gate 		kmem_free(pcp->dp_vnp, sizeof (vnode_t));
541*7c478bd9Sstevel@tonic-gate 		kmem_free(pcp, sizeof (struct devmap_pmem_cookie));
542*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
543*7c478bd9Sstevel@tonic-gate 	}
544*7c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
545*7c478bd9Sstevel@tonic-gate }
546*7c478bd9Sstevel@tonic-gate 
547*7c478bd9Sstevel@tonic-gate /* Try to lock down n pages resource for current project. */
548*7c478bd9Sstevel@tonic-gate static int
549*7c478bd9Sstevel@tonic-gate pmem_lock(pgcnt_t n, kproject_t **prjpp)
550*7c478bd9Sstevel@tonic-gate {
551*7c478bd9Sstevel@tonic-gate 	mutex_enter(&curproc->p_lock);
552*7c478bd9Sstevel@tonic-gate 	if (i_ddi_incr_locked_memory(curproc, NULL, NULL, NULL,
553*7c478bd9Sstevel@tonic-gate 	    ptob(n)) != 0) {
554*7c478bd9Sstevel@tonic-gate 		mutex_exit(&curproc->p_lock);
555*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
556*7c478bd9Sstevel@tonic-gate 	}
557*7c478bd9Sstevel@tonic-gate 	/* Store this project in cookie for later lock/unlock. */
558*7c478bd9Sstevel@tonic-gate 	*prjpp = curproc->p_task->tk_proj;
559*7c478bd9Sstevel@tonic-gate 	mutex_exit(&curproc->p_lock);
560*7c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
561*7c478bd9Sstevel@tonic-gate }
562*7c478bd9Sstevel@tonic-gate 
563*7c478bd9Sstevel@tonic-gate /* To check if all the pages in a large page are freed. */
564*7c478bd9Sstevel@tonic-gate static int
565*7c478bd9Sstevel@tonic-gate lpg_isfree(pmem_lpg_t *plp)
566*7c478bd9Sstevel@tonic-gate {
567*7c478bd9Sstevel@tonic-gate 	uint_t i;
568*7c478bd9Sstevel@tonic-gate 
569*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < BT_BITOUL(pmem_pgcnt); i++)
570*7c478bd9Sstevel@tonic-gate 		if (plp->pl_bitmap[i] != BT_ULMAXMASK)
571*7c478bd9Sstevel@tonic-gate 			return (0);
572*7c478bd9Sstevel@tonic-gate 	/* All 1 means all pages are freed. */
573*7c478bd9Sstevel@tonic-gate 	return (1);
574*7c478bd9Sstevel@tonic-gate }
575*7c478bd9Sstevel@tonic-gate 
576*7c478bd9Sstevel@tonic-gate /*
577*7c478bd9Sstevel@tonic-gate  * Using pp to get the associated large page allocation record, searching in
578*7c478bd9Sstevel@tonic-gate  * the splp linked list with *last as the heuristic pointer. Return NULL if
579*7c478bd9Sstevel@tonic-gate  * not found.
580*7c478bd9Sstevel@tonic-gate  */
581*7c478bd9Sstevel@tonic-gate static pmem_lpg_t *
582*7c478bd9Sstevel@tonic-gate pmem_lpg_get(pmem_lpg_t *splp, page_t *pp, pmem_lpg_t **last)
583*7c478bd9Sstevel@tonic-gate {
584*7c478bd9Sstevel@tonic-gate 	pmem_lpg_t *plp;
585*7c478bd9Sstevel@tonic-gate 	pgcnt_t root_pfn;
586*7c478bd9Sstevel@tonic-gate 
587*7c478bd9Sstevel@tonic-gate 	ASSERT(pp);
588*7c478bd9Sstevel@tonic-gate 	if (splp == NULL)
589*7c478bd9Sstevel@tonic-gate 		return (NULL);
590*7c478bd9Sstevel@tonic-gate 	root_pfn = page_pptonum(pp) & ~(pmem_pgcnt - 1);
591*7c478bd9Sstevel@tonic-gate 
592*7c478bd9Sstevel@tonic-gate 	/* Try last winner first. */
593*7c478bd9Sstevel@tonic-gate 	if (*last && root_pfn == page_pptonum((*last)->pl_pp))
594*7c478bd9Sstevel@tonic-gate 		goto pl_found;
595*7c478bd9Sstevel@tonic-gate 
596*7c478bd9Sstevel@tonic-gate 	/* Else search the whole pmem_lpg list. */
597*7c478bd9Sstevel@tonic-gate 	for (plp = splp; root_pfn != page_pptonum(plp->pl_pp); ) {
598*7c478bd9Sstevel@tonic-gate 		plp = plp->pl_next;
599*7c478bd9Sstevel@tonic-gate 		if (plp == splp) {
600*7c478bd9Sstevel@tonic-gate 			plp = NULL;
601*7c478bd9Sstevel@tonic-gate 			break;
602*7c478bd9Sstevel@tonic-gate 		}
603*7c478bd9Sstevel@tonic-gate 		ASSERT(plp->pl_pp);
604*7c478bd9Sstevel@tonic-gate 	}
605*7c478bd9Sstevel@tonic-gate 
606*7c478bd9Sstevel@tonic-gate 	*last = plp;
607*7c478bd9Sstevel@tonic-gate 
608*7c478bd9Sstevel@tonic-gate pl_found:
609*7c478bd9Sstevel@tonic-gate 	return (*last);
610*7c478bd9Sstevel@tonic-gate }
611*7c478bd9Sstevel@tonic-gate 
612*7c478bd9Sstevel@tonic-gate /*
613*7c478bd9Sstevel@tonic-gate  *  Remove one pmem_lpg plp from the oplpp list.
614*7c478bd9Sstevel@tonic-gate  */
615*7c478bd9Sstevel@tonic-gate static void
616*7c478bd9Sstevel@tonic-gate pmem_lpg_sub(pmem_lpg_t **oplpp, pmem_lpg_t *plp)
617*7c478bd9Sstevel@tonic-gate {
618*7c478bd9Sstevel@tonic-gate 	if (*oplpp == plp)
619*7c478bd9Sstevel@tonic-gate 		*oplpp = plp->pl_next;		/* go to next pmem_lpg */
620*7c478bd9Sstevel@tonic-gate 
621*7c478bd9Sstevel@tonic-gate 	if (*oplpp == plp)
622*7c478bd9Sstevel@tonic-gate 		*oplpp = NULL;			/* pmem_lpg list is gone */
623*7c478bd9Sstevel@tonic-gate 	else {
624*7c478bd9Sstevel@tonic-gate 		plp->pl_prev->pl_next = plp->pl_next;
625*7c478bd9Sstevel@tonic-gate 		plp->pl_next->pl_prev = plp->pl_prev;
626*7c478bd9Sstevel@tonic-gate 	}
627*7c478bd9Sstevel@tonic-gate 	plp->pl_prev = plp->pl_next = plp;	/* make plp a list of one */
628*7c478bd9Sstevel@tonic-gate }
629*7c478bd9Sstevel@tonic-gate 
630*7c478bd9Sstevel@tonic-gate /*
631*7c478bd9Sstevel@tonic-gate  * Concatenate page list nplpp onto the end of list plpp.
632*7c478bd9Sstevel@tonic-gate  */
633*7c478bd9Sstevel@tonic-gate static void
634*7c478bd9Sstevel@tonic-gate pmem_lpg_concat(pmem_lpg_t **plpp, pmem_lpg_t **nplpp)
635*7c478bd9Sstevel@tonic-gate {
636*7c478bd9Sstevel@tonic-gate 	pmem_lpg_t *s1p, *s2p, *e1p, *e2p;
637*7c478bd9Sstevel@tonic-gate 
638*7c478bd9Sstevel@tonic-gate 	if (*nplpp == NULL) {
639*7c478bd9Sstevel@tonic-gate 		return;
640*7c478bd9Sstevel@tonic-gate 	}
641*7c478bd9Sstevel@tonic-gate 	if (*plpp == NULL) {
642*7c478bd9Sstevel@tonic-gate 		*plpp = *nplpp;
643*7c478bd9Sstevel@tonic-gate 		return;
644*7c478bd9Sstevel@tonic-gate 	}
645*7c478bd9Sstevel@tonic-gate 	s1p = *plpp;
646*7c478bd9Sstevel@tonic-gate 	e1p =  s1p->pl_prev;
647*7c478bd9Sstevel@tonic-gate 	s2p = *nplpp;
648*7c478bd9Sstevel@tonic-gate 	e2p = s2p->pl_prev;
649*7c478bd9Sstevel@tonic-gate 	s1p->pl_prev = e2p;
650*7c478bd9Sstevel@tonic-gate 	e2p->pl_next = s1p;
651*7c478bd9Sstevel@tonic-gate 	e1p->pl_next = s2p;
652*7c478bd9Sstevel@tonic-gate 	s2p->pl_prev = e1p;
653*7c478bd9Sstevel@tonic-gate }
654*7c478bd9Sstevel@tonic-gate 
655*7c478bd9Sstevel@tonic-gate /*
656*7c478bd9Sstevel@tonic-gate  * Allocate and initialize the allocation record of one large page, the init
657*7c478bd9Sstevel@tonic-gate  * value is 'allocated'.
658*7c478bd9Sstevel@tonic-gate  */
659*7c478bd9Sstevel@tonic-gate static pmem_lpg_t *
660*7c478bd9Sstevel@tonic-gate pmem_lpg_alloc(uint_t kflags)
661*7c478bd9Sstevel@tonic-gate {
662*7c478bd9Sstevel@tonic-gate 	pmem_lpg_t *plp;
663*7c478bd9Sstevel@tonic-gate 
664*7c478bd9Sstevel@tonic-gate 	ASSERT(pmem_pgcnt % BT_NBIPUL == 0);
665*7c478bd9Sstevel@tonic-gate 	plp = kmem_zalloc(sizeof (pmem_lpg_t), kflags);
666*7c478bd9Sstevel@tonic-gate 	if (plp == NULL)
667*7c478bd9Sstevel@tonic-gate 		return (NULL);
668*7c478bd9Sstevel@tonic-gate 	plp->pl_bitmap = kmem_zalloc(BT_SIZEOFMAP(pmem_pgcnt), kflags);
669*7c478bd9Sstevel@tonic-gate 	if (plp->pl_bitmap == NULL) {
670*7c478bd9Sstevel@tonic-gate 		kmem_free(plp, sizeof (*plp));
671*7c478bd9Sstevel@tonic-gate 		return (NULL);
672*7c478bd9Sstevel@tonic-gate 	}
673*7c478bd9Sstevel@tonic-gate 	plp->pl_next = plp->pl_prev = plp;
674*7c478bd9Sstevel@tonic-gate 	return (plp);
675*7c478bd9Sstevel@tonic-gate }
676*7c478bd9Sstevel@tonic-gate 
677*7c478bd9Sstevel@tonic-gate /* Free one allocation record pointed by oplp. */
678*7c478bd9Sstevel@tonic-gate static void
679*7c478bd9Sstevel@tonic-gate pmem_lpg_free(pmem_lpg_t **headp, pmem_lpg_t *plp)
680*7c478bd9Sstevel@tonic-gate {
681*7c478bd9Sstevel@tonic-gate 	if (*headp == plp)
682*7c478bd9Sstevel@tonic-gate 		*headp = plp->pl_next;		/* go to next pmem_lpg_t */
683*7c478bd9Sstevel@tonic-gate 
684*7c478bd9Sstevel@tonic-gate 	if (*headp == plp)
685*7c478bd9Sstevel@tonic-gate 		*headp = NULL;			/* this list is gone */
686*7c478bd9Sstevel@tonic-gate 	else {
687*7c478bd9Sstevel@tonic-gate 		plp->pl_prev->pl_next = plp->pl_next;
688*7c478bd9Sstevel@tonic-gate 		plp->pl_next->pl_prev = plp->pl_prev;
689*7c478bd9Sstevel@tonic-gate 	}
690*7c478bd9Sstevel@tonic-gate 	kmem_free(plp->pl_bitmap, BT_SIZEOFMAP(pmem_pgcnt));
691*7c478bd9Sstevel@tonic-gate 	kmem_free(plp, sizeof (*plp));
692*7c478bd9Sstevel@tonic-gate }
693*7c478bd9Sstevel@tonic-gate 
694*7c478bd9Sstevel@tonic-gate /* Free one large page headed by spp from pmem_mpool. */
695*7c478bd9Sstevel@tonic-gate static void
696*7c478bd9Sstevel@tonic-gate lpg_free(page_t *spp)
697*7c478bd9Sstevel@tonic-gate {
698*7c478bd9Sstevel@tonic-gate 	page_t *pp1 = spp;
699*7c478bd9Sstevel@tonic-gate 	uint_t i;
700*7c478bd9Sstevel@tonic-gate 
701*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pmem_mutex));
702*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < pmem_pgcnt; i++) {
703*7c478bd9Sstevel@tonic-gate 		/* Break pp1 from pmem_mpool. */
704*7c478bd9Sstevel@tonic-gate 		page_sub(&pmem_mpool, pp1);
705*7c478bd9Sstevel@tonic-gate 		pp1++;
706*7c478bd9Sstevel@tonic-gate 	}
707*7c478bd9Sstevel@tonic-gate 	/* Free pages in this large page. */
708*7c478bd9Sstevel@tonic-gate 	page_free_pages(spp);
709*7c478bd9Sstevel@tonic-gate 	page_unresv(pmem_pgcnt);
710*7c478bd9Sstevel@tonic-gate 	pmem_nmpages -= pmem_pgcnt;
711*7c478bd9Sstevel@tonic-gate 	ASSERT((pmem_nmpages && pmem_mpool) || (!pmem_nmpages && !pmem_mpool));
712*7c478bd9Sstevel@tonic-gate }
713*7c478bd9Sstevel@tonic-gate 
714*7c478bd9Sstevel@tonic-gate /* Put n pages in *ppp list back into pmem_mpool. */
715*7c478bd9Sstevel@tonic-gate static void
716*7c478bd9Sstevel@tonic-gate mpool_append(page_t **ppp, pgcnt_t n)
717*7c478bd9Sstevel@tonic-gate {
718*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pmem_mutex));
719*7c478bd9Sstevel@tonic-gate 	/* Put back pages. */
720*7c478bd9Sstevel@tonic-gate 	page_list_concat(&pmem_mpool, ppp);
721*7c478bd9Sstevel@tonic-gate 	pmem_nmpages += n;
722*7c478bd9Sstevel@tonic-gate 	ASSERT((pmem_nmpages && pmem_mpool) || (!pmem_nmpages && !pmem_mpool));
723*7c478bd9Sstevel@tonic-gate }
724*7c478bd9Sstevel@tonic-gate 
725*7c478bd9Sstevel@tonic-gate /*
726*7c478bd9Sstevel@tonic-gate  * Try to grab MIN(pmem_nmpages, n) pages from pmem_mpool, put them into *ppp
727*7c478bd9Sstevel@tonic-gate  * list, and return the number of grabbed pages.
728*7c478bd9Sstevel@tonic-gate  */
729*7c478bd9Sstevel@tonic-gate static pgcnt_t
730*7c478bd9Sstevel@tonic-gate mpool_break(page_t **ppp, pgcnt_t n)
731*7c478bd9Sstevel@tonic-gate {
732*7c478bd9Sstevel@tonic-gate 	pgcnt_t i;
733*7c478bd9Sstevel@tonic-gate 
734*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pmem_mutex));
735*7c478bd9Sstevel@tonic-gate 	/* Grab the pages. */
736*7c478bd9Sstevel@tonic-gate 	i = MIN(pmem_nmpages, n);
737*7c478bd9Sstevel@tonic-gate 	*ppp = pmem_mpool;
738*7c478bd9Sstevel@tonic-gate 	page_list_break(ppp, &pmem_mpool, i);
739*7c478bd9Sstevel@tonic-gate 	pmem_nmpages -= i;
740*7c478bd9Sstevel@tonic-gate 	ASSERT((pmem_nmpages && pmem_mpool) || (!pmem_nmpages && !pmem_mpool));
741*7c478bd9Sstevel@tonic-gate 	return (i);
742*7c478bd9Sstevel@tonic-gate }
743*7c478bd9Sstevel@tonic-gate 
744*7c478bd9Sstevel@tonic-gate /*
745*7c478bd9Sstevel@tonic-gate  * Create n large pages, lpages and plpp contains the number of small pages and
746*7c478bd9Sstevel@tonic-gate  * allocation records list respectively.
747*7c478bd9Sstevel@tonic-gate  */
748*7c478bd9Sstevel@tonic-gate static int
749*7c478bd9Sstevel@tonic-gate lpp_create(page_t **lppp, pgcnt_t n, pgcnt_t *lpages, pmem_lpg_t **plpp,
750*7c478bd9Sstevel@tonic-gate     vnode_t *vnp, u_offset_t *offp, uint_t kflags)
751*7c478bd9Sstevel@tonic-gate {
752*7c478bd9Sstevel@tonic-gate 	pgcnt_t i;
753*7c478bd9Sstevel@tonic-gate 	pmem_lpg_t *plp;
754*7c478bd9Sstevel@tonic-gate 	page_t *pp;
755*7c478bd9Sstevel@tonic-gate 
756*7c478bd9Sstevel@tonic-gate 	for (i = 0, *lpages = 0; i < n; i++) {
757*7c478bd9Sstevel@tonic-gate 		/* Allocte one large page each time. */
758*7c478bd9Sstevel@tonic-gate 		pp = page_create_va_large(vnp, *offp, pmem_lpgsize,
759*7c478bd9Sstevel@tonic-gate 		    PG_EXCL, &pmem_seg, (caddr_t)*offp, NULL);
760*7c478bd9Sstevel@tonic-gate 		if (pp == NULL)
761*7c478bd9Sstevel@tonic-gate 			break;
762*7c478bd9Sstevel@tonic-gate 		*offp += pmem_lpgsize;
763*7c478bd9Sstevel@tonic-gate 		page_list_concat(lppp, &pp);
764*7c478bd9Sstevel@tonic-gate 		*lpages += pmem_pgcnt;
765*7c478bd9Sstevel@tonic-gate 		/* Add one allocation record for this large page. */
766*7c478bd9Sstevel@tonic-gate 		if ((plp = pmem_lpg_alloc(kflags)) == NULL)
767*7c478bd9Sstevel@tonic-gate 			return (DDI_FAILURE);
768*7c478bd9Sstevel@tonic-gate 		plp->pl_pp = pp;
769*7c478bd9Sstevel@tonic-gate 		pmem_lpg_concat(plpp, &plp);
770*7c478bd9Sstevel@tonic-gate 	}
771*7c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
772*7c478bd9Sstevel@tonic-gate }
773*7c478bd9Sstevel@tonic-gate 
774*7c478bd9Sstevel@tonic-gate /*
775*7c478bd9Sstevel@tonic-gate  * Break the last r small pages from the large page list *lppp (with totally n
776*7c478bd9Sstevel@tonic-gate  * small pages) and put them into pmem_mpool.
777*7c478bd9Sstevel@tonic-gate  */
778*7c478bd9Sstevel@tonic-gate static void
779*7c478bd9Sstevel@tonic-gate lpp_break(page_t **lppp, pgcnt_t n, pgcnt_t r, pmem_lpg_t *oplp)
780*7c478bd9Sstevel@tonic-gate {
781*7c478bd9Sstevel@tonic-gate 	page_t *pp, *pp1;
782*7c478bd9Sstevel@tonic-gate 	pgcnt_t i;
783*7c478bd9Sstevel@tonic-gate 	pmem_lpg_t *plp;
784*7c478bd9Sstevel@tonic-gate 
785*7c478bd9Sstevel@tonic-gate 	if (r == 0)
786*7c478bd9Sstevel@tonic-gate 		return;
787*7c478bd9Sstevel@tonic-gate 	ASSERT(*lppp != NULL && r < pmem_pgcnt);
788*7c478bd9Sstevel@tonic-gate 	page_list_break(lppp, &pp, n - r);
789*7c478bd9Sstevel@tonic-gate 
790*7c478bd9Sstevel@tonic-gate 	/* The residual should reside in the last large page.  */
791*7c478bd9Sstevel@tonic-gate 	plp = oplp->pl_prev;
792*7c478bd9Sstevel@tonic-gate 	/* IOunlock and hashout the residual pages. */
793*7c478bd9Sstevel@tonic-gate 	for (pp1 = pp, i = 0; i < r; i++) {
794*7c478bd9Sstevel@tonic-gate 		page_io_unlock(pp1);
795*7c478bd9Sstevel@tonic-gate 		page_hashout(pp1, NULL);
796*7c478bd9Sstevel@tonic-gate 		/* Mark this page as free. */
797*7c478bd9Sstevel@tonic-gate 		BT_SET(plp->pl_bitmap, PFIND(pp1));
798*7c478bd9Sstevel@tonic-gate 		pp1 = pp1->p_next;
799*7c478bd9Sstevel@tonic-gate 	}
800*7c478bd9Sstevel@tonic-gate 	ASSERT(pp1 == pp);
801*7c478bd9Sstevel@tonic-gate 	/* Put these residual pages into memory pool. */
802*7c478bd9Sstevel@tonic-gate 	mutex_enter(&pmem_mutex);
803*7c478bd9Sstevel@tonic-gate 	mpool_append(&pp, r);
804*7c478bd9Sstevel@tonic-gate 	mutex_exit(&pmem_mutex);
805*7c478bd9Sstevel@tonic-gate }
806*7c478bd9Sstevel@tonic-gate 
807*7c478bd9Sstevel@tonic-gate /* Freeing large pages in lpp and the associated allocation records in plp. */
808*7c478bd9Sstevel@tonic-gate static void
809*7c478bd9Sstevel@tonic-gate lpp_free(page_t *lpp, pgcnt_t lpgs, pmem_lpg_t **plpp)
810*7c478bd9Sstevel@tonic-gate {
811*7c478bd9Sstevel@tonic-gate 	pgcnt_t i, j;
812*7c478bd9Sstevel@tonic-gate 	page_t *pp = lpp, *pp1;
813*7c478bd9Sstevel@tonic-gate 	pmem_lpg_t *plp1, *plp2;
814*7c478bd9Sstevel@tonic-gate 
815*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < lpgs; i++) {
816*7c478bd9Sstevel@tonic-gate 		for (j = 0; j < pmem_pgcnt; j++) {
817*7c478bd9Sstevel@tonic-gate 			/* IO unlock and hashout this small page. */
818*7c478bd9Sstevel@tonic-gate 			page_io_unlock(pp);
819*7c478bd9Sstevel@tonic-gate 			page_hashout(pp, NULL);
820*7c478bd9Sstevel@tonic-gate 			pp1 = pp->p_next;
821*7c478bd9Sstevel@tonic-gate 			pp->p_prev = pp->p_next = pp;
822*7c478bd9Sstevel@tonic-gate 			pp = pp1;
823*7c478bd9Sstevel@tonic-gate 		}
824*7c478bd9Sstevel@tonic-gate 		/* Free one large page at one time. */
825*7c478bd9Sstevel@tonic-gate 		page_free_pages(lpp);
826*7c478bd9Sstevel@tonic-gate 		lpp = pp;
827*7c478bd9Sstevel@tonic-gate 	}
828*7c478bd9Sstevel@tonic-gate 	/* Free associate pmem large page allocation records. */
829*7c478bd9Sstevel@tonic-gate 	for (plp1 = *plpp; *plpp; plp1 = plp2) {
830*7c478bd9Sstevel@tonic-gate 		plp2 = plp1->pl_next;
831*7c478bd9Sstevel@tonic-gate 		pmem_lpg_free(plpp, plp1);
832*7c478bd9Sstevel@tonic-gate 	}
833*7c478bd9Sstevel@tonic-gate }
834*7c478bd9Sstevel@tonic-gate 
835*7c478bd9Sstevel@tonic-gate /*
836*7c478bd9Sstevel@tonic-gate  * IOlock and hashin all pages in tlist, associate them with vnode *pvnp
837*7c478bd9Sstevel@tonic-gate  * and offset starting with *poffp. Update allocation records accordingly at
838*7c478bd9Sstevel@tonic-gate  * the same time.
839*7c478bd9Sstevel@tonic-gate  */
840*7c478bd9Sstevel@tonic-gate static void
841*7c478bd9Sstevel@tonic-gate tlist_in(page_t *tlist, pgcnt_t tpages, vnode_t *pvnp, u_offset_t *poffp)
842*7c478bd9Sstevel@tonic-gate {
843*7c478bd9Sstevel@tonic-gate 	page_t *pp;
844*7c478bd9Sstevel@tonic-gate 	pgcnt_t i = 0;
845*7c478bd9Sstevel@tonic-gate 	pmem_lpg_t *plp, *last_pl = NULL;
846*7c478bd9Sstevel@tonic-gate 
847*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pmem_mutex));
848*7c478bd9Sstevel@tonic-gate 	for (pp = tlist; i < tpages; i++) {
849*7c478bd9Sstevel@tonic-gate 		ASSERT(FROM_LPG(pp));
850*7c478bd9Sstevel@tonic-gate 		page_io_lock(pp);
851*7c478bd9Sstevel@tonic-gate 		(void) page_hashin(pp, pvnp, *poffp, NULL);
852*7c478bd9Sstevel@tonic-gate 		plp = pmem_lpg_get(pmem_occ_lpgs, pp, &last_pl);
853*7c478bd9Sstevel@tonic-gate 		/* Mark this page as allocated. */
854*7c478bd9Sstevel@tonic-gate 		BT_CLEAR(plp->pl_bitmap, PFIND(pp));
855*7c478bd9Sstevel@tonic-gate 		*poffp += PAGESIZE;
856*7c478bd9Sstevel@tonic-gate 		pp = pp->p_next;
857*7c478bd9Sstevel@tonic-gate 	}
858*7c478bd9Sstevel@tonic-gate 	ASSERT(pp == tlist);
859*7c478bd9Sstevel@tonic-gate }
860*7c478bd9Sstevel@tonic-gate 
861*7c478bd9Sstevel@tonic-gate /*
862*7c478bd9Sstevel@tonic-gate  * IOunlock and hashout all pages in tlist, update allocation records
863*7c478bd9Sstevel@tonic-gate  * accordingly at the same time.
864*7c478bd9Sstevel@tonic-gate  */
865*7c478bd9Sstevel@tonic-gate static void
866*7c478bd9Sstevel@tonic-gate tlist_out(page_t *tlist, pgcnt_t tpages)
867*7c478bd9Sstevel@tonic-gate {
868*7c478bd9Sstevel@tonic-gate 	page_t *pp;
869*7c478bd9Sstevel@tonic-gate 	pgcnt_t i = 0;
870*7c478bd9Sstevel@tonic-gate 	pmem_lpg_t *plp, *last_pl = NULL;
871*7c478bd9Sstevel@tonic-gate 
872*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pmem_mutex));
873*7c478bd9Sstevel@tonic-gate 	for (pp = tlist; i < tpages; i++) {
874*7c478bd9Sstevel@tonic-gate 		ASSERT(FROM_LPG(pp));
875*7c478bd9Sstevel@tonic-gate 		page_io_unlock(pp);
876*7c478bd9Sstevel@tonic-gate 		page_hashout(pp, NULL);
877*7c478bd9Sstevel@tonic-gate 		plp = pmem_lpg_get(pmem_occ_lpgs, pp, &last_pl);
878*7c478bd9Sstevel@tonic-gate 		/* Mark this page as free. */
879*7c478bd9Sstevel@tonic-gate 		BT_SET(plp->pl_bitmap, PFIND(pp));
880*7c478bd9Sstevel@tonic-gate 		pp = pp->p_next;
881*7c478bd9Sstevel@tonic-gate 	}
882*7c478bd9Sstevel@tonic-gate 	ASSERT(pp == tlist);
883*7c478bd9Sstevel@tonic-gate }
884