xref: /illumos-gate/usr/src/uts/sun4/os/memlist.c (revision e5803b76927480e8f9b67b22201c484ccf4c2bcf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/sysmacros.h>
29 #include <sys/signal.h>
30 #include <sys/systm.h>
31 #include <sys/user.h>
32 #include <sys/mman.h>
33 #include <sys/class.h>
34 #include <sys/proc.h>
35 #include <sys/procfs.h>
36 #include <sys/kmem.h>
37 #include <sys/cred.h>
38 #include <sys/archsystm.h>
39 #include <sys/machsystm.h>
40 
41 #include <sys/reboot.h>
42 #include <sys/uadmin.h>
43 
44 #include <sys/vfs.h>
45 #include <sys/vnode.h>
46 #include <sys/session.h>
47 #include <sys/ucontext.h>
48 
49 #include <sys/dnlc.h>
50 #include <sys/var.h>
51 #include <sys/cmn_err.h>
52 #include <sys/debug.h>
53 #include <sys/thread.h>
54 #include <sys/vtrace.h>
55 #include <sys/consdev.h>
56 #include <sys/frame.h>
57 #include <sys/stack.h>
58 #include <sys/swap.h>
59 #include <sys/vmparam.h>
60 #include <sys/cpuvar.h>
61 
62 #include <sys/privregs.h>
63 
64 #include <vm/hat.h>
65 #include <vm/anon.h>
66 #include <vm/as.h>
67 #include <vm/page.h>
68 #include <vm/seg.h>
69 #include <vm/seg_kmem.h>
70 #include <vm/seg_map.h>
71 #include <vm/seg_vn.h>
72 #include <vm/vm_dep.h>
73 
74 #include <sys/exec.h>
75 #include <sys/acct.h>
76 #include <sys/modctl.h>
77 #include <sys/tuneable.h>
78 
79 #include <c2/audit.h>
80 
81 #include <sys/trap.h>
82 #include <sys/sunddi.h>
83 #include <sys/bootconf.h>
84 #include <sys/memlist.h>
85 #include <sys/memlist_plat.h>
86 #include <sys/systeminfo.h>
87 #include <sys/promif.h>
88 #include <sys/prom_plat.h>
89 
90 u_longlong_t	spec_hole_start = 0x80000000000ull;
91 u_longlong_t	spec_hole_end = 0xfffff80000000000ull;
92 
93 pgcnt_t
94 num_phys_pages()
95 {
96 	pgcnt_t npages = 0;
97 	struct memlist *mp;
98 
99 	for (mp = phys_install; mp != NULL; mp = mp->ml_next)
100 		npages += mp->ml_size >> PAGESHIFT;
101 
102 	return (npages);
103 }
104 
105 
106 pgcnt_t
107 size_virtalloc(prom_memlist_t *avail, size_t nelems)
108 {
109 
110 	u_longlong_t	start, end;
111 	pgcnt_t		allocpages = 0;
112 	uint_t		hole_allocated = 0;
113 	uint_t		i;
114 
115 	for (i = 0; i < nelems - 1; i++) {
116 
117 		start = avail[i].addr + avail[i].size;
118 		end = avail[i + 1].addr;
119 
120 		/*
121 		 * Notes:
122 		 *
123 		 * (1) OBP on platforms with US I/II pre-allocates the hole
124 		 * represented by [spec_hole_start, spec_hole_end);
125 		 * pre-allocation is done to make this range unavailable
126 		 * for any allocation.
127 		 *
128 		 * (2) OBP on starcat always pre-allocates the hole similar to
129 		 * platforms with US I/II.
130 		 *
131 		 * (3) OBP on serengeti does _not_ pre-allocate the hole.
132 		 *
133 		 * (4) OBP ignores Spitfire Errata #21; i.e. it does _not_
134 		 * fill up or pre-allocate an additional 4GB on both sides
135 		 * of the hole.
136 		 *
137 		 * (5) kernel virtual range [spec_hole_start, spec_hole_end)
138 		 * is _not_ used on any platform including those with
139 		 * UltraSPARC III where there is no hole.
140 		 *
141 		 * Algorithm:
142 		 *
143 		 * Check if range [spec_hole_start, spec_hole_end) is
144 		 * pre-allocated by OBP; if so, subtract that range from
145 		 * allocpages.
146 		 */
147 		if (end >= spec_hole_end && start <= spec_hole_start)
148 			hole_allocated = 1;
149 
150 		allocpages += btopr(end - start);
151 	}
152 
153 	if (hole_allocated)
154 		allocpages -= btop(spec_hole_end - spec_hole_start);
155 
156 	return (allocpages);
157 }
158 
159 /*
160  * Returns the max contiguous physical memory present in the
161  * memlist "physavail".
162  */
163 uint64_t
164 get_max_phys_size(
165 	struct memlist	*physavail)
166 {
167 	uint64_t	max_size = 0;
168 
169 	for (; physavail; physavail = physavail->ml_next) {
170 		if (physavail->ml_size > max_size)
171 			max_size = physavail->ml_size;
172 	}
173 
174 	return (max_size);
175 }
176 
177 
178 static void
179 more_pages(uint64_t base, uint64_t len)
180 {
181 	void kphysm_add();
182 
183 	kphysm_add(base, len, 1);
184 }
185 
186 static void
187 less_pages(uint64_t base, uint64_t len)
188 {
189 	uint64_t pa, end = base + len;
190 	extern int kcage_on;
191 
192 	for (pa = base; pa < end; pa += PAGESIZE) {
193 		pfn_t pfnum;
194 		page_t *pp;
195 
196 		pfnum = (pfn_t)(pa >> PAGESHIFT);
197 		if ((pp = page_numtopp_nolock(pfnum)) == NULL)
198 			cmn_err(CE_PANIC, "missing pfnum %lx", pfnum);
199 
200 		/*
201 		 * must break up any large pages that may have
202 		 * constituent pages being utilized for
203 		 * prom_alloc()'s. page_reclaim() can't handle
204 		 * large pages.
205 		 */
206 		if (pp->p_szc != 0)
207 			page_boot_demote(pp);
208 
209 		if (!PAGE_LOCKED(pp) && pp->p_lckcnt == 0) {
210 			/*
211 			 * Ahhh yes, a prom page,
212 			 * suck it off the freelist,
213 			 * lock it, and hashin on prom_pages vp.
214 			 */
215 			if (page_trylock(pp, SE_EXCL) == 0)
216 				cmn_err(CE_PANIC, "prom page locked");
217 
218 			(void) page_reclaim(pp, NULL);
219 			/*
220 			 * vnode offsets on the prom_ppages vnode
221 			 * are page numbers (gack) for >32 bit
222 			 * physical memory machines.
223 			 */
224 			(void) page_hashin(pp, &promvp,
225 			    (offset_t)pfnum, NULL);
226 
227 			if (kcage_on) {
228 				ASSERT(pp->p_szc == 0);
229 				if (PP_ISNORELOC(pp) == 0) {
230 					PP_SETNORELOC(pp);
231 					PLCNT_XFER_NORELOC(pp);
232 				}
233 			}
234 			(void) page_pp_lock(pp, 0, 1);
235 		}
236 	}
237 }
238 
239 void
240 diff_memlists(struct memlist *proto, struct memlist *diff, void (*func)())
241 {
242 	uint64_t p_base, p_end, d_base, d_end;
243 
244 	while (proto != NULL) {
245 		/*
246 		 * find diff item which may overlap with proto item
247 		 * if none, apply func to all of proto item
248 		 */
249 		while (diff != NULL &&
250 		    proto->ml_address >= diff->ml_address + diff->ml_size)
251 			diff = diff->ml_next;
252 		if (diff == NULL) {
253 			(*func)(proto->ml_address, proto->ml_size);
254 			proto = proto->ml_next;
255 			continue;
256 		}
257 		if (proto->ml_address == diff->ml_address &&
258 		    proto->ml_size == diff->ml_size) {
259 			proto = proto->ml_next;
260 			diff = diff->ml_next;
261 			continue;
262 		}
263 
264 		p_base = proto->ml_address;
265 		p_end = p_base + proto->ml_size;
266 		d_base = diff->ml_address;
267 		d_end = d_base + diff->ml_size;
268 		/*
269 		 * here p_base < d_end
270 		 * there are 5 cases
271 		 */
272 
273 		/*
274 		 *	d_end
275 		 *	d_base
276 		 *  p_end
277 		 *  p_base
278 		 *
279 		 * apply func to all of proto item
280 		 */
281 		if (p_end <= d_base) {
282 			(*func)(p_base, proto->ml_size);
283 			proto = proto->ml_next;
284 			continue;
285 		}
286 
287 		/*
288 		 * ...
289 		 *	d_base
290 		 *  p_base
291 		 *
292 		 * normalize by applying func from p_base to d_base
293 		 */
294 		if (p_base < d_base)
295 			(*func)(p_base, d_base - p_base);
296 
297 		if (p_end <= d_end) {
298 			/*
299 			 *	d_end
300 			 *  p_end
301 			 *	d_base
302 			 *  p_base
303 			 *
304 			 *	-or-
305 			 *
306 			 *	d_end
307 			 *  p_end
308 			 *  p_base
309 			 *	d_base
310 			 *
311 			 * any non-overlapping ranges applied above,
312 			 * so just continue
313 			 */
314 			proto = proto->ml_next;
315 			continue;
316 		}
317 
318 		/*
319 		 *  p_end
320 		 *	d_end
321 		 *	d_base
322 		 *  p_base
323 		 *
324 		 *	-or-
325 		 *
326 		 *  p_end
327 		 *	d_end
328 		 *  p_base
329 		 *	d_base
330 		 *
331 		 * Find overlapping d_base..d_end ranges, and apply func
332 		 * where no overlap occurs.  Stop when d_base is above
333 		 * p_end
334 		 */
335 		for (p_base = d_end, diff = diff->ml_next; diff != NULL;
336 		    p_base = d_end, diff = diff->ml_next) {
337 			d_base = diff->ml_address;
338 			d_end = d_base + diff->ml_size;
339 			if (p_end <= d_base) {
340 				(*func)(p_base, p_end - p_base);
341 				break;
342 			} else
343 				(*func)(p_base, d_base - p_base);
344 		}
345 		if (diff == NULL)
346 			(*func)(p_base, p_end - p_base);
347 		proto = proto->ml_next;
348 	}
349 }
350 
351 void
352 sync_memlists(struct memlist *orig, struct memlist *new)
353 {
354 
355 	/*
356 	 * Find pages allocated via prom by looking for
357 	 * pages on orig, but no on new.
358 	 */
359 	diff_memlists(orig, new, less_pages);
360 
361 	/*
362 	 * Find pages free'd via prom by looking for
363 	 * pages on new, but not on orig.
364 	 */
365 	diff_memlists(new, orig, more_pages);
366 }
367 
368 
369 /*
370  * Find the page number of the highest installed physical
371  * page and the number of pages installed (one cannot be
372  * calculated from the other because memory isn't necessarily
373  * contiguous).
374  */
375 void
376 installed_top_size_memlist_array(
377 	prom_memlist_t *list,	/* base of array */
378 	size_t	nelems,		/* number of elements */
379 	pfn_t *topp,		/* return ptr for top value */
380 	pgcnt_t *sumpagesp)	/* return prt for sum of installed pages */
381 {
382 	pfn_t top = 0;
383 	pgcnt_t sumpages = 0;
384 	pfn_t highp;		/* high page in a chunk */
385 	size_t i;
386 
387 	for (i = 0; i < nelems; list++, i++) {
388 		highp = (list->addr + list->size - 1) >> PAGESHIFT;
389 		if (top < highp)
390 			top = highp;
391 		sumpages += (list->size >> PAGESHIFT);
392 	}
393 
394 	*topp = top;
395 	*sumpagesp = sumpages;
396 }
397 
398 /*
399  * Copy a memory list.  Used in startup() to copy boot's
400  * memory lists to the kernel.
401  */
402 void
403 copy_memlist(
404 	prom_memlist_t	*src,
405 	size_t		nelems,
406 	struct memlist	**dstp)
407 {
408 	struct memlist *dst, *prev;
409 	size_t	i;
410 
411 	dst = *dstp;
412 	prev = dst;
413 
414 	for (i = 0; i < nelems; src++, i++) {
415 		dst->ml_address = src->addr;
416 		dst->ml_size = src->size;
417 		dst->ml_next = 0;
418 		if (prev == dst) {
419 			dst->ml_prev = 0;
420 			dst++;
421 		} else {
422 			dst->ml_prev = prev;
423 			prev->ml_next = dst;
424 			dst++;
425 			prev++;
426 		}
427 	}
428 
429 	*dstp = dst;
430 }
431 
432 
433 static struct bootmem_props {
434 	prom_memlist_t	*ptr;
435 	size_t		nelems;		/* actual number of elements */
436 	size_t		maxsize;	/* max buffer */
437 } bootmem_props[3];
438 
439 #define	PHYSINSTALLED	0
440 #define	PHYSAVAIL	1
441 #define	VIRTAVAIL	2
442 
443 /*
444  * Comapct contiguous memory list elements
445  */
446 static void
447 compact_promlist(struct bootmem_props *bpp)
448 {
449 	int i = 0, j;
450 	struct prom_memlist *pmp = bpp->ptr;
451 
452 	for (;;) {
453 		if (pmp[i].addr + pmp[i].size == pmp[i+1].addr) {
454 			pmp[i].size += pmp[i+1].size;
455 			bpp->nelems--;
456 			for (j = i + 1; j < bpp->nelems; j++)
457 				pmp[j] = pmp[j+1];
458 			pmp[j].addr = 0;
459 		} else
460 			i++;
461 		if (i == bpp->nelems)
462 			break;
463 	}
464 }
465 
466 /*
467  *  Sort prom memory lists into ascending order
468  */
469 static void
470 sort_promlist(struct bootmem_props *bpp)
471 {
472 	int i, j, min;
473 	struct prom_memlist *pmp = bpp->ptr;
474 	struct prom_memlist temp;
475 
476 	for (i = 0; i < bpp->nelems; i++) {
477 		min = i;
478 
479 		for (j = i+1; j < bpp->nelems; j++)  {
480 			if (pmp[j].addr < pmp[min].addr)
481 				min = j;
482 		}
483 
484 		if (i != min)  {
485 			/* Swap pmp[i] and pmp[min] */
486 			temp = pmp[min];
487 			pmp[min] = pmp[i];
488 			pmp[i] = temp;
489 		}
490 	}
491 }
492 
493 static int max_bootlist_sz;
494 
495 void
496 init_boot_memlists(void)
497 {
498 	size_t	size, len;
499 	char *start;
500 	struct bootmem_props *tmp;
501 
502 	/*
503 	 * These lists can get fragmented as the prom allocates
504 	 * memory, so generously round up.
505 	 */
506 	size = prom_phys_installed_len() + prom_phys_avail_len() +
507 	    prom_virt_avail_len();
508 	size *= 4;
509 	size = roundup(size, PAGESIZE);
510 	start = prom_alloc(0, size, BO_NO_ALIGN);
511 
512 	/*
513 	 * Get physinstalled
514 	 */
515 	tmp = &bootmem_props[PHYSINSTALLED];
516 	len = prom_phys_installed_len();
517 	if (len == 0)
518 		panic("no \"reg\" in /memory");
519 	tmp->nelems = len / sizeof (struct prom_memlist);
520 	tmp->maxsize = len;
521 	tmp->ptr = (prom_memlist_t *)start;
522 	start += len;
523 	size -= len;
524 	(void) prom_phys_installed((caddr_t)tmp->ptr);
525 	sort_promlist(tmp);
526 	compact_promlist(tmp);
527 
528 	/*
529 	 * Start out giving each half of available space
530 	 */
531 	max_bootlist_sz = size;
532 	len = size / 2;
533 	tmp = &bootmem_props[PHYSAVAIL];
534 	tmp->maxsize = len;
535 	tmp->ptr = (prom_memlist_t *)start;
536 	start += len;
537 
538 	tmp = &bootmem_props[VIRTAVAIL];
539 	tmp->maxsize = len;
540 	tmp->ptr = (prom_memlist_t *)start;
541 }
542 
543 
544 void
545 copy_boot_memlists(
546     prom_memlist_t **physinstalled, size_t *physinstalled_len,
547     prom_memlist_t **physavail, size_t *physavail_len,
548     prom_memlist_t **virtavail, size_t *virtavail_len)
549 {
550 	size_t	plen, vlen, move = 0;
551 	struct bootmem_props *il, *pl, *vl;
552 
553 	plen = prom_phys_avail_len();
554 	if (plen == 0)
555 		panic("no \"available\" in /memory");
556 	vlen = prom_virt_avail_len();
557 	if (vlen == 0)
558 		panic("no \"available\" in /virtual-memory");
559 	if (plen + vlen > max_bootlist_sz)
560 		panic("ran out of prom_memlist space");
561 
562 	pl = &bootmem_props[PHYSAVAIL];
563 	vl = &bootmem_props[VIRTAVAIL];
564 
565 	/*
566 	 * re-adjust ptrs if needed
567 	 */
568 	if (plen > pl->maxsize) {
569 		/* move virt avail up */
570 		move = plen - pl->maxsize;
571 		pl->maxsize = plen;
572 		vl->ptr += move / sizeof (struct prom_memlist);
573 		vl->maxsize -= move;
574 	} else if (vlen > vl->maxsize) {
575 		/* move virt avail down */
576 		move = vlen - vl->maxsize;
577 		vl->maxsize = vlen;
578 		vl->ptr -= move / sizeof (struct prom_memlist);
579 		pl->maxsize -= move;
580 	}
581 
582 	pl->nelems = plen / sizeof (struct prom_memlist);
583 	vl->nelems = vlen / sizeof (struct prom_memlist);
584 
585 	/* now we can retrieve the properties */
586 	(void) prom_phys_avail((caddr_t)pl->ptr);
587 	(void) prom_virt_avail((caddr_t)vl->ptr);
588 
589 	/* .. and sort them */
590 	sort_promlist(pl);
591 	sort_promlist(vl);
592 
593 	il = &bootmem_props[PHYSINSTALLED];
594 	*physinstalled = il->ptr;
595 	*physinstalled_len = il->nelems;
596 
597 	*physavail = pl->ptr;
598 	*physavail_len = pl->nelems;
599 
600 	*virtavail = vl->ptr;
601 	*virtavail_len = vl->nelems;
602 }
603 
604 
605 /*
606  * Find the page number of the highest installed physical
607  * page and the number of pages installed (one cannot be
608  * calculated from the other because memory isn't necessarily
609  * contiguous).
610  */
611 void
612 installed_top_size(
613 	struct memlist *list,	/* pointer to start of installed list */
614 	pfn_t *topp,		/* return ptr for top value */
615 	pgcnt_t *sumpagesp)	/* return prt for sum of installed pages */
616 {
617 	pfn_t top = 0;
618 	pfn_t highp;		/* high page in a chunk */
619 	pgcnt_t sumpages = 0;
620 
621 	for (; list; list = list->ml_next) {
622 		highp = (list->ml_address + list->ml_size - 1) >> PAGESHIFT;
623 		if (top < highp)
624 			top = highp;
625 		sumpages += (uint_t)(list->ml_size >> PAGESHIFT);
626 	}
627 
628 	*topp = top;
629 	*sumpagesp = sumpages;
630 }
631