1 /*
2 *
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/types.h>
28 #include <sys/cmn_err.h>
29 #include <sys/vm.h>
30 #include <sys/mman.h>
31 #include <vm/vm_dep.h>
32 #include <vm/seg_kmem.h>
33 #include <vm/seg_kpm.h>
34 #include <sys/mem_config.h>
35 #include <sys/sysmacros.h>
36
37 extern pgcnt_t pp_dummy_npages;
38 extern pfn_t *pp_dummy_pfn; /* Array of dummy pfns. */
39
40 extern kmutex_t memseg_lists_lock;
41 extern struct memseg *memseg_va_avail;
42 extern struct memseg *memseg_alloc();
43
44 extern page_t *ppvm_base;
45 extern pgcnt_t ppvm_size;
46
47 static int sun4v_memseg_debug;
48
49 extern struct memseg *memseg_reuse(pgcnt_t);
50 extern void remap_to_dummy(caddr_t, pgcnt_t);
51
52 /*
53 * The page_t memory for incoming pages is allocated from existing memory
54 * which can create a potential situation where memory addition fails
55 * because of shortage of existing memory. To mitigate this situation
56 * some memory is always reserved ahead of time for page_t allocation.
57 * Each 4MB of reserved page_t's guarantees a 256MB (x64) addition without
58 * page_t allocation. The added 256MB added memory could theoretically
59 * allow an addition of 16GB.
60 */
61 #define RSV_SIZE 0x40000000 /* add size with rsrvd page_t's 1G */
62
63 #ifdef DEBUG
64 #define MEMSEG_DEBUG(args...) if (sun4v_memseg_debug) printf(args)
65 #else
66 #define MEMSEG_DEBUG(...)
67 #endif
68
69 /*
70 * The page_t's for the incoming memory are allocated from
71 * existing pages.
72 */
73 /*ARGSUSED*/
74 int
memseg_alloc_meta(pfn_t base,pgcnt_t npgs,void ** ptp,pgcnt_t * metap)75 memseg_alloc_meta(pfn_t base, pgcnt_t npgs, void **ptp, pgcnt_t *metap)
76 {
77 page_t *pp, *opp, *epp;
78 pgcnt_t metapgs;
79 int i;
80 struct seg kseg;
81 caddr_t vaddr;
82
83 /*
84 * Verify incoming memory is within supported DR range.
85 */
86 if ((base + npgs) * sizeof (page_t) > ppvm_size)
87 return (KPHYSM_ENOTSUP);
88
89 opp = pp = ppvm_base + base;
90 epp = pp + npgs;
91 metapgs = btopr(npgs * sizeof (page_t));
92
93 if (!IS_P2ALIGNED((uint64_t)pp, PAGESIZE) &&
94 page_find(&mpvp, (u_offset_t)pp)) {
95 /*
96 * Another memseg has page_t's in the same
97 * page which 'pp' resides. This would happen
98 * if PAGESIZE is not an integral multiple of
99 * sizeof (page_t) and therefore 'pp'
100 * does not start on a page boundry.
101 *
102 * Since the other memseg's pages_t's still
103 * map valid pages, skip allocation of this page.
104 * Advance 'pp' to the next page which should
105 * belong only to the incoming memseg.
106 *
107 * If the last page_t in the current page
108 * crosses a page boundary, this should still
109 * work. The first part of the page_t is
110 * already allocated. The second part of
111 * the page_t will be allocated below.
112 */
113 ASSERT(PAGESIZE % sizeof (page_t));
114 pp = (page_t *)P2ROUNDUP((uint64_t)pp, PAGESIZE);
115 metapgs--;
116 }
117
118 if (!IS_P2ALIGNED((uint64_t)epp, PAGESIZE) &&
119 page_find(&mpvp, (u_offset_t)epp)) {
120 /*
121 * Another memseg has page_t's in the same
122 * page which 'epp' resides. This would happen
123 * if PAGESIZE is not an integral multiple of
124 * sizeof (page_t) and therefore 'epp'
125 * does not start on a page boundry.
126 *
127 * Since the other memseg's pages_t's still
128 * map valid pages, skip allocation of this page.
129 */
130 ASSERT(PAGESIZE % sizeof (page_t));
131 metapgs--;
132 }
133
134 ASSERT(IS_P2ALIGNED((uint64_t)pp, PAGESIZE));
135
136 /*
137 * Back metadata space with physical pages.
138 */
139 kseg.s_as = &kas;
140 vaddr = (caddr_t)pp;
141
142 for (i = 0; i < metapgs; i++)
143 if (page_find(&mpvp, (u_offset_t)(vaddr + i * PAGESIZE)))
144 panic("page_find(0x%p, %p)\n",
145 (void *)&mpvp, (void *)(vaddr + i * PAGESIZE));
146
147 /*
148 * Allocate the metadata pages; these are the pages that will
149 * contain the page_t's for the incoming memory.
150 */
151 if ((page_create_va(&mpvp, (u_offset_t)pp, ptob(metapgs),
152 PG_NORELOC | PG_EXCL, &kseg, vaddr)) == NULL) {
153 MEMSEG_DEBUG("memseg_alloc_meta: can't get 0x%ld metapgs",
154 metapgs);
155 return (KPHYSM_ERESOURCE);
156 }
157
158 ASSERT(ptp);
159 ASSERT(metap);
160
161 *ptp = (void *)opp;
162 *metap = metapgs;
163
164 return (KPHYSM_OK);
165 }
166
167 void
memseg_free_meta(void * ptp,pgcnt_t metapgs)168 memseg_free_meta(void *ptp, pgcnt_t metapgs)
169 {
170 int i;
171 page_t *pp;
172 u_offset_t off;
173
174 if (!metapgs)
175 return;
176
177 off = (u_offset_t)ptp;
178
179 ASSERT(off);
180 ASSERT(IS_P2ALIGNED((uint64_t)off, PAGESIZE));
181
182 MEMSEG_DEBUG("memseg_free_meta: off=0x%lx metapgs=0x%lx\n",
183 (uint64_t)off, metapgs);
184 /*
185 * Free pages allocated during add.
186 */
187 for (i = 0; i < metapgs; i++) {
188 pp = page_find(&mpvp, off);
189 ASSERT(pp);
190 ASSERT(pp->p_szc == 0);
191 page_io_unlock(pp);
192 page_destroy(pp, 0);
193 off += PAGESIZE;
194 }
195 }
196
197 pfn_t
memseg_get_metapfn(void * ptp,pgcnt_t metapg)198 memseg_get_metapfn(void *ptp, pgcnt_t metapg)
199 {
200 page_t *pp;
201 u_offset_t off;
202
203 off = (u_offset_t)ptp + ptob(metapg);
204
205 ASSERT(off);
206 ASSERT(IS_P2ALIGNED((uint64_t)off, PAGESIZE));
207
208 pp = page_find(&mpvp, off);
209 ASSERT(pp);
210 ASSERT(pp->p_szc == 0);
211 ASSERT(pp->p_pagenum != PFN_INVALID);
212
213 return (pp->p_pagenum);
214 }
215
216 /*
217 * Remap a memseg's page_t's to dummy pages. Skip the low/high
218 * ends of the range if they are already in use.
219 */
220 void
memseg_remap_meta(struct memseg * seg)221 memseg_remap_meta(struct memseg *seg)
222 {
223 int i;
224 u_offset_t off;
225 page_t *pp;
226 #if 0
227 page_t *epp;
228 #endif
229 pgcnt_t metapgs;
230
231 metapgs = btopr(MSEG_NPAGES(seg) * sizeof (page_t));
232 ASSERT(metapgs);
233 pp = seg->pages;
234 seg->pages_end = seg->pages_base;
235 #if 0
236 epp = seg->epages;
237
238 /*
239 * This code cannot be tested as the kernel does not compile
240 * when page_t size is changed. It is left here as a starting
241 * point if the unaligned page_t size needs to be supported.
242 */
243
244 if (!IS_P2ALIGNED((uint64_t)pp, PAGESIZE) &&
245 page_find(&mpvp, (u_offset_t)(pp - 1)) && !page_deleted(pp - 1)) {
246 /*
247 * Another memseg has page_t's in the same
248 * page which 'pp' resides. This would happen
249 * if PAGESIZE is not an integral multiple of
250 * sizeof (page_t) and therefore 'seg->pages'
251 * does not start on a page boundry.
252 *
253 * Since the other memseg's pages_t's still
254 * map valid pages, skip remap of this page.
255 * Advance 'pp' to the next page which should
256 * belong only to the outgoing memseg.
257 *
258 * If the last page_t in the current page
259 * crosses a page boundary, this should still
260 * work. The first part of the page_t is
261 * valid since memseg_lock_delete_all() has
262 * been called. The second part of the page_t
263 * will be remapped to the corresponding
264 * dummy page below.
265 */
266 ASSERT(PAGESIZE % sizeof (page_t));
267 pp = (page_t *)P2ROUNDUP((uint64_t)pp, PAGESIZE);
268 metapgs--;
269 }
270
271 if (!IS_P2ALIGNED((uint64_t)epp, PAGESIZE) &&
272 page_find(&mpvp, (u_offset_t)epp) && !page_deleted(epp)) {
273 /*
274 * Another memseg has page_t's in the same
275 * page which 'epp' resides. This would happen
276 * if PAGESIZE is not an integral multiple of
277 * sizeof (page_t) and therefore 'seg->epages'
278 * does not start on a page boundry.
279 *
280 * Since the other memseg's pages_t's still
281 * map valid pages, skip remap of this page.
282 */
283 ASSERT(PAGESIZE % sizeof (page_t));
284 metapgs--;
285 }
286 #endif
287 ASSERT(IS_P2ALIGNED((uint64_t)pp, PAGESIZE));
288
289 remap_to_dummy((caddr_t)pp, metapgs);
290
291 off = (u_offset_t)pp;
292
293 MEMSEG_DEBUG("memseg_remap_meta: off=0x%lx metapgs=0x%lx\n",
294 (uint64_t)off, metapgs);
295 /*
296 * Free pages allocated during add.
297 */
298 for (i = 0; i < metapgs; i++) {
299 pp = page_find(&mpvp, off);
300 ASSERT(pp);
301 ASSERT(pp->p_szc == 0);
302 page_io_unlock(pp);
303 page_destroy(pp, 0);
304 off += PAGESIZE;
305 }
306 }
307