1 /* 2 * 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/cmn_err.h> 29 #include <sys/vm.h> 30 #include <sys/mman.h> 31 #include <vm/vm_dep.h> 32 #include <vm/seg_kmem.h> 33 #include <vm/seg_kpm.h> 34 #include <sys/mem_config.h> 35 #include <sys/sysmacros.h> 36 37 extern pgcnt_t pp_dummy_npages; 38 extern pfn_t *pp_dummy_pfn; /* Array of dummy pfns. */ 39 40 extern kmutex_t memseg_lists_lock; 41 extern struct memseg *memseg_va_avail; 42 extern struct memseg *memseg_alloc(); 43 44 extern page_t *ppvm_base; 45 extern pgcnt_t ppvm_size; 46 47 static vnode_t pp_vn, rsv_vn; 48 static pgcnt_t rsv_metapgs; 49 static int meta_rsv_enable; 50 static int sun4v_memseg_debug; 51 52 extern struct memseg *memseg_reuse(pgcnt_t); 53 extern void remap_to_dummy(caddr_t, pgcnt_t); 54 55 /* 56 * The page_t memory for incoming pages is allocated from existing memory 57 * which can create a potential situation where memory addition fails 58 * because of shortage of existing memory. To mitigate this situation 59 * some memory is always reserved ahead of time for page_t allocation. 60 * Each 4MB of reserved page_t's guarantees a 256MB (x64) addition without 61 * page_t allocation. The added 256MB added memory could theoretically 62 * allow an addition of 16GB. 63 */ 64 #define RSV_SIZE 0x40000000 /* add size with rsrvd page_t's 1G */ 65 66 #ifdef DEBUG 67 #define MEMSEG_DEBUG(args...) if (sun4v_memseg_debug) printf(args) 68 #else 69 #define MEMSEG_DEBUG(...) 70 #endif 71 72 /* 73 * The page_t's for the incoming memory are allocated from 74 * existing pages. 75 */ 76 /*ARGSUSED*/ 77 int 78 memseg_alloc_meta(pfn_t base, pgcnt_t npgs, void **ptp, pgcnt_t *metap) 79 { 80 page_t *pp, *opp, *epp, *pgpp; 81 pgcnt_t metapgs; 82 int i, rsv; 83 struct seg kseg; 84 caddr_t vaddr; 85 u_offset_t off; 86 87 /* 88 * Verify incoming memory is within supported DR range. 89 */ 90 if ((base + npgs) * sizeof (page_t) > ppvm_size) 91 return (KPHYSM_ENOTSUP); 92 93 opp = pp = ppvm_base + base; 94 epp = pp + npgs; 95 metapgs = btopr(npgs * sizeof (page_t)); 96 97 if (!IS_P2ALIGNED((uint64_t)pp, PAGESIZE) && 98 page_find(&pp_vn, (u_offset_t)pp)) { 99 /* 100 * Another memseg has page_t's in the same 101 * page which 'pp' resides. This would happen 102 * if PAGESIZE is not an integral multiple of 103 * sizeof (page_t) and therefore 'pp' 104 * does not start on a page boundry. 105 * 106 * Since the other memseg's pages_t's still 107 * map valid pages, skip allocation of this page. 108 * Advance 'pp' to the next page which should 109 * belong only to the incoming memseg. 110 * 111 * If the last page_t in the current page 112 * crosses a page boundary, this should still 113 * work. The first part of the page_t is 114 * already allocated. The second part of 115 * the page_t will be allocated below. 116 */ 117 ASSERT(PAGESIZE % sizeof (page_t)); 118 pp = (page_t *)P2ROUNDUP((uint64_t)pp, PAGESIZE); 119 metapgs--; 120 } 121 122 if (!IS_P2ALIGNED((uint64_t)epp, PAGESIZE) && 123 page_find(&pp_vn, (u_offset_t)epp)) { 124 /* 125 * Another memseg has page_t's in the same 126 * page which 'epp' resides. This would happen 127 * if PAGESIZE is not an integral multiple of 128 * sizeof (page_t) and therefore 'epp' 129 * does not start on a page boundry. 130 * 131 * Since the other memseg's pages_t's still 132 * map valid pages, skip allocation of this page. 133 */ 134 ASSERT(PAGESIZE % sizeof (page_t)); 135 metapgs--; 136 } 137 138 ASSERT(IS_P2ALIGNED((uint64_t)pp, PAGESIZE)); 139 140 /* 141 * Back metadata space with physical pages. 142 */ 143 kseg.s_as = &kas; 144 vaddr = (caddr_t)pp; 145 146 for (i = 0; i < metapgs; i++) 147 if (page_find(&pp_vn, (u_offset_t)(vaddr + i * PAGESIZE))) 148 panic("page_find(0x%p, %p)\n", 149 (void *)&pp_vn, (void *)(vaddr + i * PAGESIZE)); 150 151 /* 152 * Allocate the metadata pages; these are the pages that will 153 * contain the page_t's for the incoming memory. 154 * 155 * If a normal allocation fails, use the reserved metapgs for 156 * a small allocation; otherwise retry with PG_WAIT. 157 */ 158 rsv = off = 0; 159 if (metapgs <= rsv_metapgs) { 160 MEMSEG_DEBUG("memseg_get: use rsv 0x%lx metapgs", metapgs); 161 ASSERT(meta_rsv_enable); 162 rsv = 1; 163 } else if ((pgpp = page_create_va(&pp_vn, (u_offset_t)pp, ptob(metapgs), 164 PG_NORELOC | PG_EXCL, &kseg, vaddr)) == NULL) { 165 cmn_err(CE_WARN, "memseg_get: can't get 0x%ld metapgs", 166 metapgs); 167 return (KPHYSM_ERESOURCE); 168 } 169 if (rsv) { 170 /* 171 * The reseve pages must be hashed out of the reserve vnode 172 * and rehashed by <pp_vn,vaddr>. The resreved pages also 173 * must be replenished immedidately at the end of the add 174 * processing. 175 */ 176 for (i = 0; i < metapgs; i++) { 177 pgpp = page_find(&rsv_vn, off); 178 ASSERT(pgpp); 179 page_hashout(pgpp, 0); 180 hat_devload(kas.a_hat, vaddr, PAGESIZE, 181 page_pptonum(pgpp), PROT_READ | PROT_WRITE, 182 HAT_LOAD | HAT_LOAD_REMAP | HAT_LOAD_NOCONSIST); 183 ASSERT(!page_find(&pp_vn, (u_offset_t)vaddr)); 184 if (!page_hashin(pgpp, &pp_vn, (u_offset_t)vaddr, 0)) 185 panic("memseg_get: page_hashin(0x%p, 0x%p)", 186 (void *)pgpp, (void *)vaddr); 187 off += PAGESIZE; 188 vaddr += PAGESIZE; 189 rsv_metapgs--; 190 } 191 } else { 192 for (i = 0; i < metapgs; i++) { 193 hat_devload(kas.a_hat, vaddr, PAGESIZE, 194 page_pptonum(pgpp), PROT_READ | PROT_WRITE, 195 HAT_LOAD | HAT_LOAD_REMAP | HAT_LOAD_NOCONSIST); 196 pgpp = pgpp->p_next; 197 vaddr += PAGESIZE; 198 } 199 } 200 201 ASSERT(ptp); 202 ASSERT(metap); 203 204 *ptp = (void *)opp; 205 *metap = metapgs; 206 207 return (KPHYSM_OK); 208 } 209 210 void 211 memseg_free_meta(void *ptp, pgcnt_t metapgs) 212 { 213 int i; 214 page_t *pp; 215 u_offset_t off; 216 217 if (!metapgs) 218 return; 219 220 off = (u_offset_t)ptp; 221 222 ASSERT(off); 223 ASSERT(IS_P2ALIGNED((uint64_t)off, PAGESIZE)); 224 225 MEMSEG_DEBUG("memseg_free_meta: off=0x%lx metapgs=0x%lx\n", 226 (uint64_t)off, metapgs); 227 /* 228 * Free pages allocated during add. 229 */ 230 for (i = 0; i < metapgs; i++) { 231 pp = page_find(&pp_vn, off); 232 ASSERT(pp); 233 ASSERT(pp->p_szc == 0); 234 page_io_unlock(pp); 235 page_destroy(pp, 0); 236 off += PAGESIZE; 237 } 238 } 239 240 pfn_t 241 memseg_get_metapfn(void *ptp, pgcnt_t metapg) 242 { 243 page_t *pp; 244 u_offset_t off; 245 246 off = (u_offset_t)ptp + ptob(metapg); 247 248 ASSERT(off); 249 ASSERT(IS_P2ALIGNED((uint64_t)off, PAGESIZE)); 250 251 pp = page_find(&pp_vn, off); 252 ASSERT(pp); 253 ASSERT(pp->p_szc == 0); 254 ASSERT(pp->p_pagenum != PFN_INVALID); 255 256 return (pp->p_pagenum); 257 } 258 259 /* 260 * Remap a memseg's page_t's to dummy pages. Skip the low/high 261 * ends of the range if they are already in use. 262 */ 263 void 264 memseg_remap_meta(struct memseg *seg) 265 { 266 int i; 267 u_offset_t off; 268 page_t *pp; 269 #if 0 270 page_t *epp; 271 #endif 272 pgcnt_t metapgs; 273 274 metapgs = btopr(MSEG_NPAGES(seg) * sizeof (page_t)); 275 ASSERT(metapgs); 276 pp = seg->pages; 277 seg->pages_end = seg->pages_base; 278 #if 0 279 epp = seg->epages; 280 281 /* 282 * This code cannot be tested as the kernel does not compile 283 * when page_t size is changed. It is left here as a starting 284 * point if the unaligned page_t size needs to be supported. 285 */ 286 287 if (!IS_P2ALIGNED((uint64_t)pp, PAGESIZE) && 288 page_find(&pp_vn, (u_offset_t)(pp - 1)) && !page_deleted(pp - 1)) { 289 /* 290 * Another memseg has page_t's in the same 291 * page which 'pp' resides. This would happen 292 * if PAGESIZE is not an integral multiple of 293 * sizeof (page_t) and therefore 'seg->pages' 294 * does not start on a page boundry. 295 * 296 * Since the other memseg's pages_t's still 297 * map valid pages, skip remap of this page. 298 * Advance 'pp' to the next page which should 299 * belong only to the outgoing memseg. 300 * 301 * If the last page_t in the current page 302 * crosses a page boundary, this should still 303 * work. The first part of the page_t is 304 * valid since memseg_lock_delete_all() has 305 * been called. The second part of the page_t 306 * will be remapped to the corresponding 307 * dummy page below. 308 */ 309 ASSERT(PAGESIZE % sizeof (page_t)); 310 pp = (page_t *)P2ROUNDUP((uint64_t)pp, PAGESIZE); 311 metapgs--; 312 } 313 314 if (!IS_P2ALIGNED((uint64_t)epp, PAGESIZE) && 315 page_find(&pp_vn, (u_offset_t)epp) && !page_deleted(epp)) { 316 /* 317 * Another memseg has page_t's in the same 318 * page which 'epp' resides. This would happen 319 * if PAGESIZE is not an integral multiple of 320 * sizeof (page_t) and therefore 'seg->epages' 321 * does not start on a page boundry. 322 * 323 * Since the other memseg's pages_t's still 324 * map valid pages, skip remap of this page. 325 */ 326 ASSERT(PAGESIZE % sizeof (page_t)); 327 metapgs--; 328 } 329 #endif 330 ASSERT(IS_P2ALIGNED((uint64_t)pp, PAGESIZE)); 331 332 remap_to_dummy((caddr_t)pp, metapgs); 333 334 off = (u_offset_t)pp; 335 336 MEMSEG_DEBUG("memseg_remap: off=0x%lx metapgs=0x%lx\n", (uint64_t)off, 337 metapgs); 338 /* 339 * Free pages allocated during add. 340 */ 341 for (i = 0; i < metapgs; i++) { 342 pp = page_find(&pp_vn, off); 343 ASSERT(pp); 344 ASSERT(pp->p_szc == 0); 345 page_io_unlock(pp); 346 page_destroy(pp, 0); 347 off += PAGESIZE; 348 } 349 } 350 351 static void 352 rsv_alloc() 353 { 354 int i; 355 page_t *pp; 356 pgcnt_t metapgs; 357 u_offset_t off; 358 struct seg kseg; 359 360 kseg.s_as = &kas; 361 362 /* 363 * Reserve enough page_t pages for an add request of 364 * RSV_SIZE bytes. 365 */ 366 metapgs = btopr(btop(RSV_SIZE) * sizeof (page_t)) - rsv_metapgs; 367 368 for (i = off = 0; i < metapgs; i++, off += PAGESIZE) { 369 (void) page_create_va(&rsv_vn, off, PAGESIZE, 370 PG_NORELOC | PG_WAIT, &kseg, 0); 371 pp = page_find(&rsv_vn, off); 372 ASSERT(pp); 373 ASSERT(PAGE_EXCL(pp)); 374 page_iolock_init(pp); 375 rsv_metapgs++; 376 } 377 } 378 379 void 380 i_dr_mem_init(size_t *hint) 381 { 382 if (meta_rsv_enable) { 383 rsv_alloc(); 384 if (hint) 385 *hint = RSV_SIZE; 386 } 387 } 388 389 void 390 i_dr_mem_fini() 391 { 392 int i; 393 page_t *pp; 394 u_offset_t off; 395 396 for (i = off = 0; i < rsv_metapgs; i++, off += PAGESIZE) { 397 if (pp = page_find(&rsv_vn, off)) { 398 ASSERT(PAGE_EXCL(pp)); 399 page_destroy(pp, 0); 400 } 401 ASSERT(!page_find(&rsv_vn, off)); 402 } 403 rsv_metapgs = 0; 404 } 405 406 void 407 i_dr_mem_update() 408 { 409 rsv_alloc(); 410 } 411