xref: /illumos-gate/usr/src/uts/sun4v/os/memseg.c (revision 66582b606a8194f7f3ba5b3a3a6dca5b0d346361)
1 /*
2  *
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/cmn_err.h>
29 #include <sys/vm.h>
30 #include <sys/mman.h>
31 #include <vm/vm_dep.h>
32 #include <vm/seg_kmem.h>
33 #include <vm/seg_kpm.h>
34 #include <sys/mem_config.h>
35 #include <sys/sysmacros.h>
36 
37 extern pgcnt_t pp_dummy_npages;
38 extern pfn_t *pp_dummy_pfn;	/* Array of dummy pfns. */
39 
40 extern kmutex_t memseg_lists_lock;
41 extern struct memseg *memseg_va_avail;
42 extern struct memseg *memseg_alloc();
43 
44 extern page_t *ppvm_base;
45 extern pgcnt_t ppvm_size;
46 
47 static int sun4v_memseg_debug;
48 
49 extern struct memseg *memseg_reuse(pgcnt_t);
50 extern void remap_to_dummy(caddr_t, pgcnt_t);
51 
52 /*
53  * The page_t memory for incoming pages is allocated from existing memory
54  * which can create a potential situation where memory addition fails
55  * because of shortage of existing memory.  To mitigate this situation
56  * some memory is always reserved ahead of time for page_t allocation.
57  * Each 4MB of reserved page_t's guarantees a 256MB (x64) addition without
58  * page_t allocation.  The added 256MB added memory could theoretically
59  * allow an addition of 16GB.
60  */
61 #define	RSV_SIZE	0x40000000	/* add size with rsrvd page_t's 1G */
62 
63 #ifdef	DEBUG
64 #define	MEMSEG_DEBUG(args...) if (sun4v_memseg_debug) printf(args)
65 #else
66 #define	MEMSEG_DEBUG(...)
67 #endif
68 
69 /*
70  * The page_t's for the incoming memory are allocated from
71  * existing pages.
72  */
73 /*ARGSUSED*/
74 int
75 memseg_alloc_meta(pfn_t base, pgcnt_t npgs, void **ptp, pgcnt_t *metap)
76 {
77 	page_t		*pp, *opp, *epp;
78 	pgcnt_t		metapgs;
79 	int		i;
80 	struct seg	kseg;
81 	caddr_t		vaddr;
82 
83 	/*
84 	 * Verify incoming memory is within supported DR range.
85 	 */
86 	if ((base + npgs) * sizeof (page_t) > ppvm_size)
87 		return (KPHYSM_ENOTSUP);
88 
89 	opp = pp = ppvm_base + base;
90 	epp = pp + npgs;
91 	metapgs = btopr(npgs * sizeof (page_t));
92 
93 	if (!IS_P2ALIGNED((uint64_t)pp, PAGESIZE) &&
94 	    page_find(&mpvp, (u_offset_t)pp)) {
95 		/*
96 		 * Another memseg has page_t's in the same
97 		 * page which 'pp' resides.  This would happen
98 		 * if PAGESIZE is not an integral multiple of
99 		 * sizeof (page_t) and therefore 'pp'
100 		 * does not start on a page boundry.
101 		 *
102 		 * Since the other memseg's pages_t's still
103 		 * map valid pages, skip allocation of this page.
104 		 * Advance 'pp' to the next page which should
105 		 * belong only to the incoming memseg.
106 		 *
107 		 * If the last page_t in the current page
108 		 * crosses a page boundary, this should still
109 		 * work.  The first part of the page_t is
110 		 * already allocated.  The second part of
111 		 * the page_t will be allocated below.
112 		 */
113 		ASSERT(PAGESIZE % sizeof (page_t));
114 		pp = (page_t *)P2ROUNDUP((uint64_t)pp, PAGESIZE);
115 		metapgs--;
116 	}
117 
118 	if (!IS_P2ALIGNED((uint64_t)epp, PAGESIZE) &&
119 	    page_find(&mpvp, (u_offset_t)epp)) {
120 		/*
121 		 * Another memseg has page_t's in the same
122 		 * page which 'epp' resides.  This would happen
123 		 * if PAGESIZE is not an integral multiple of
124 		 * sizeof (page_t) and therefore 'epp'
125 		 * does not start on a page boundry.
126 		 *
127 		 * Since the other memseg's pages_t's still
128 		 * map valid pages, skip allocation of this page.
129 		 */
130 		ASSERT(PAGESIZE % sizeof (page_t));
131 		metapgs--;
132 	}
133 
134 	ASSERT(IS_P2ALIGNED((uint64_t)pp, PAGESIZE));
135 
136 	/*
137 	 * Back metadata space with physical pages.
138 	 */
139 	kseg.s_as = &kas;
140 	vaddr = (caddr_t)pp;
141 
142 	for (i = 0; i < metapgs; i++)
143 		if (page_find(&mpvp, (u_offset_t)(vaddr + i * PAGESIZE)))
144 			panic("page_find(0x%p, %p)\n",
145 			    (void *)&mpvp, (void *)(vaddr + i * PAGESIZE));
146 
147 	/*
148 	 * Allocate the metadata pages; these are the pages that will
149 	 * contain the page_t's for the incoming memory.
150 	 */
151 	if ((page_create_va(&mpvp, (u_offset_t)pp, ptob(metapgs),
152 	    PG_NORELOC | PG_EXCL, &kseg, vaddr)) == NULL) {
153 		MEMSEG_DEBUG("memseg_alloc_meta: can't get 0x%ld metapgs",
154 		    metapgs);
155 		return (KPHYSM_ERESOURCE);
156 	}
157 
158 	ASSERT(ptp);
159 	ASSERT(metap);
160 
161 	*ptp = (void *)opp;
162 	*metap = metapgs;
163 
164 	return (KPHYSM_OK);
165 }
166 
167 void
168 memseg_free_meta(void *ptp, pgcnt_t metapgs)
169 {
170 	int i;
171 	page_t *pp;
172 	u_offset_t off;
173 
174 	if (!metapgs)
175 		return;
176 
177 	off = (u_offset_t)ptp;
178 
179 	ASSERT(off);
180 	ASSERT(IS_P2ALIGNED((uint64_t)off, PAGESIZE));
181 
182 	MEMSEG_DEBUG("memseg_free_meta: off=0x%lx metapgs=0x%lx\n",
183 	    (uint64_t)off, metapgs);
184 	/*
185 	 * Free pages allocated during add.
186 	 */
187 	for (i = 0; i < metapgs; i++) {
188 		pp = page_find(&mpvp, off);
189 		ASSERT(pp);
190 		ASSERT(pp->p_szc == 0);
191 		page_io_unlock(pp);
192 		page_destroy(pp, 0);
193 		off += PAGESIZE;
194 	}
195 }
196 
197 pfn_t
198 memseg_get_metapfn(void *ptp, pgcnt_t metapg)
199 {
200 	page_t *pp;
201 	u_offset_t off;
202 
203 	off = (u_offset_t)ptp + ptob(metapg);
204 
205 	ASSERT(off);
206 	ASSERT(IS_P2ALIGNED((uint64_t)off, PAGESIZE));
207 
208 	pp = page_find(&mpvp, off);
209 	ASSERT(pp);
210 	ASSERT(pp->p_szc == 0);
211 	ASSERT(pp->p_pagenum != PFN_INVALID);
212 
213 	return (pp->p_pagenum);
214 }
215 
216 /*
217  * Remap a memseg's page_t's to dummy pages.  Skip the low/high
218  * ends of the range if they are already in use.
219  */
220 void
221 memseg_remap_meta(struct memseg *seg)
222 {
223 	int i;
224 	u_offset_t off;
225 	page_t *pp;
226 #if 0
227 	page_t *epp;
228 #endif
229 	pgcnt_t metapgs;
230 
231 	metapgs = btopr(MSEG_NPAGES(seg) * sizeof (page_t));
232 	ASSERT(metapgs);
233 	pp = seg->pages;
234 	seg->pages_end = seg->pages_base;
235 #if 0
236 	epp = seg->epages;
237 
238 	/*
239 	 * This code cannot be tested as the kernel does not compile
240 	 * when page_t size is changed.  It is left here as a starting
241 	 * point if the unaligned page_t size needs to be supported.
242 	 */
243 
244 	if (!IS_P2ALIGNED((uint64_t)pp, PAGESIZE) &&
245 	    page_find(&mpvp, (u_offset_t)(pp - 1)) && !page_deleted(pp - 1)) {
246 		/*
247 		 * Another memseg has page_t's in the same
248 		 * page which 'pp' resides.  This would happen
249 		 * if PAGESIZE is not an integral multiple of
250 		 * sizeof (page_t) and therefore 'seg->pages'
251 		 * does not start on a page boundry.
252 		 *
253 		 * Since the other memseg's pages_t's still
254 		 * map valid pages, skip remap of this page.
255 		 * Advance 'pp' to the next page which should
256 		 * belong only to the outgoing memseg.
257 		 *
258 		 * If the last page_t in the current page
259 		 * crosses a page boundary, this should still
260 		 * work.  The first part of the page_t is
261 		 * valid since memseg_lock_delete_all() has
262 		 * been called.  The second part of the page_t
263 		 * will be remapped to the corresponding
264 		 * dummy page below.
265 		 */
266 		ASSERT(PAGESIZE % sizeof (page_t));
267 		pp = (page_t *)P2ROUNDUP((uint64_t)pp, PAGESIZE);
268 		metapgs--;
269 	}
270 
271 	if (!IS_P2ALIGNED((uint64_t)epp, PAGESIZE) &&
272 	    page_find(&mpvp, (u_offset_t)epp) && !page_deleted(epp)) {
273 		/*
274 		 * Another memseg has page_t's in the same
275 		 * page which 'epp' resides.  This would happen
276 		 * if PAGESIZE is not an integral multiple of
277 		 * sizeof (page_t) and therefore 'seg->epages'
278 		 * does not start on a page boundry.
279 		 *
280 		 * Since the other memseg's pages_t's still
281 		 * map valid pages, skip remap of this page.
282 		 */
283 		ASSERT(PAGESIZE % sizeof (page_t));
284 		metapgs--;
285 	}
286 #endif
287 	ASSERT(IS_P2ALIGNED((uint64_t)pp, PAGESIZE));
288 
289 	remap_to_dummy((caddr_t)pp, metapgs);
290 
291 	off = (u_offset_t)pp;
292 
293 	MEMSEG_DEBUG("memseg_remap_meta: off=0x%lx metapgs=0x%lx\n",
294 	    (uint64_t)off, metapgs);
295 	/*
296 	 * Free pages allocated during add.
297 	 */
298 	for (i = 0; i < metapgs; i++) {
299 		pp = page_find(&mpvp, off);
300 		ASSERT(pp);
301 		ASSERT(pp->p_szc == 0);
302 		page_io_unlock(pp);
303 		page_destroy(pp, 0);
304 		off += PAGESIZE;
305 	}
306 }
307