xref: /titanic_52/usr/src/uts/sun4/os/memlist.c (revision e2529962e4cb04b49c12526895f0536d1d46daf6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/sysmacros.h>
31 #include <sys/signal.h>
32 #include <sys/systm.h>
33 #include <sys/user.h>
34 #include <sys/mman.h>
35 #include <sys/class.h>
36 #include <sys/proc.h>
37 #include <sys/procfs.h>
38 #include <sys/kmem.h>
39 #include <sys/cred.h>
40 #include <sys/archsystm.h>
41 #include <sys/machsystm.h>
42 
43 #include <sys/reboot.h>
44 #include <sys/uadmin.h>
45 
46 #include <sys/vfs.h>
47 #include <sys/vnode.h>
48 #include <sys/session.h>
49 #include <sys/ucontext.h>
50 
51 #include <sys/dnlc.h>
52 #include <sys/var.h>
53 #include <sys/cmn_err.h>
54 #include <sys/debug.h>
55 #include <sys/thread.h>
56 #include <sys/vtrace.h>
57 #include <sys/consdev.h>
58 #include <sys/frame.h>
59 #include <sys/stack.h>
60 #include <sys/swap.h>
61 #include <sys/vmparam.h>
62 #include <sys/cpuvar.h>
63 
64 #include <sys/privregs.h>
65 
66 #include <vm/hat.h>
67 #include <vm/anon.h>
68 #include <vm/as.h>
69 #include <vm/page.h>
70 #include <vm/seg.h>
71 #include <vm/seg_kmem.h>
72 #include <vm/seg_map.h>
73 #include <vm/seg_vn.h>
74 
75 #include <sys/exec.h>
76 #include <sys/acct.h>
77 #include <sys/modctl.h>
78 #include <sys/tuneable.h>
79 
80 #include <c2/audit.h>
81 
82 #include <sys/trap.h>
83 #include <sys/sunddi.h>
84 #include <sys/bootconf.h>
85 #include <sys/memlist.h>
86 #include <sys/memlist_plat.h>
87 #include <sys/systeminfo.h>
88 #include <sys/promif.h>
89 #include <sys/prom_plat.h>
90 
91 u_longlong_t	spec_hole_start = 0x80000000000ull;
92 u_longlong_t	spec_hole_end = 0xfffff80000000000ull;
93 
94 pgcnt_t
95 num_phys_pages()
96 {
97 	pgcnt_t npages = 0;
98 	struct memlist *mp;
99 
100 	for (mp = phys_install; mp != NULL; mp = mp->next)
101 		npages += mp->size >> PAGESHIFT;
102 
103 	return (npages);
104 }
105 
106 
107 pgcnt_t
108 size_virtalloc(prom_memlist_t *avail, size_t nelems)
109 {
110 
111 	u_longlong_t	start, end;
112 	pgcnt_t		allocpages = 0;
113 	uint_t		hole_allocated = 0;
114 	uint_t		i;
115 
116 	for (i = 0; i < nelems - 1; i++) {
117 
118 		start = avail[i].addr + avail[i].size;
119 		end = avail[i + 1].addr;
120 
121 		/*
122 		 * Notes:
123 		 *
124 		 * (1) OBP on platforms with US I/II pre-allocates the hole
125 		 * represented by [spec_hole_start, spec_hole_end);
126 		 * pre-allocation is done to make this range unavailable
127 		 * for any allocation.
128 		 *
129 		 * (2) OBP on starcat always pre-allocates the hole similar to
130 		 * platforms with US I/II.
131 		 *
132 		 * (3) OBP on serengeti does _not_ pre-allocate the hole.
133 		 *
134 		 * (4) OBP ignores Spitfire Errata #21; i.e. it does _not_
135 		 * fill up or pre-allocate an additional 4GB on both sides
136 		 * of the hole.
137 		 *
138 		 * (5) kernel virtual range [spec_hole_start, spec_hole_end)
139 		 * is _not_ used on any platform including those with
140 		 * UltraSPARC III where there is no hole.
141 		 *
142 		 * Algorithm:
143 		 *
144 		 * Check if range [spec_hole_start, spec_hole_end) is
145 		 * pre-allocated by OBP; if so, subtract that range from
146 		 * allocpages.
147 		 */
148 		if (end >= spec_hole_end && start <= spec_hole_start)
149 			hole_allocated = 1;
150 
151 		allocpages += btopr(end - start);
152 	}
153 
154 	if (hole_allocated)
155 		allocpages -= btop(spec_hole_end - spec_hole_start);
156 
157 	return (allocpages);
158 }
159 
160 /*
161  * Returns the max contiguous physical memory present in the
162  * memlist "physavail".
163  */
164 uint64_t
165 get_max_phys_size(
166 	struct memlist	*physavail)
167 {
168 	uint64_t	max_size = 0;
169 
170 	for (; physavail; physavail = physavail->next) {
171 		if (physavail->size > max_size)
172 			max_size = physavail->size;
173 	}
174 
175 	return (max_size);
176 }
177 
178 
179 
180 struct vnode prom_ppages;
181 
182 static void
183 more_pages(uint64_t base, uint64_t len)
184 {
185 	void kphysm_add();
186 
187 	kphysm_add(base, len, 1);
188 }
189 
190 static void
191 less_pages(uint64_t base, uint64_t len)
192 {
193 	uint64_t pa, end = base + len;
194 	extern int kcage_on;
195 
196 	for (pa = base; pa < end; pa += PAGESIZE) {
197 		pfn_t pfnum;
198 		page_t *pp;
199 
200 		pfnum = (pfn_t)(pa >> PAGESHIFT);
201 		if ((pp = page_numtopp_nolock(pfnum)) == NULL)
202 			cmn_err(CE_PANIC, "missing pfnum %lx", pfnum);
203 
204 		/*
205 		 * must break up any large pages that may have
206 		 * constituent pages being utilized for
207 		 * prom_alloc()'s. page_reclaim() can't handle
208 		 * large pages.
209 		 */
210 		if (pp->p_szc != 0)
211 			page_boot_demote(pp);
212 
213 		if (!PAGE_LOCKED(pp) && pp->p_lckcnt == 0) {
214 			/*
215 			 * Ahhh yes, a prom page,
216 			 * suck it off the freelist,
217 			 * lock it, and hashin on prom_pages vp.
218 			 */
219 			if (page_trylock(pp, SE_EXCL) == 0)
220 				cmn_err(CE_PANIC, "prom page locked");
221 
222 			(void) page_reclaim(pp, NULL);
223 			/*
224 			 * vnode offsets on the prom_ppages vnode
225 			 * are page numbers (gack) for >32 bit
226 			 * physical memory machines.
227 			 */
228 			(void) page_hashin(pp, &prom_ppages,
229 			    (offset_t)pfnum, NULL);
230 
231 			if (kcage_on) {
232 				ASSERT(pp->p_szc == 0);
233 				PP_SETNORELOC(pp);
234 			}
235 			(void) page_pp_lock(pp, 0, 1);
236 		}
237 	}
238 }
239 
240 void
241 diff_memlists(struct memlist *proto, struct memlist *diff, void (*func)())
242 {
243 	uint64_t p_base, p_end, d_base, d_end;
244 
245 	while (proto != NULL) {
246 		/*
247 		 * find diff item which may overlap with proto item
248 		 * if none, apply func to all of proto item
249 		 */
250 		while (diff != NULL &&
251 		    proto->address >= diff->address + diff->size)
252 			diff = diff->next;
253 		if (diff == NULL) {
254 			(*func)(proto->address, proto->size);
255 			proto = proto->next;
256 			continue;
257 		}
258 		if (proto->address == diff->address &&
259 		    proto->size == diff->size) {
260 			proto = proto->next;
261 			diff = diff->next;
262 			continue;
263 		}
264 
265 		p_base = proto->address;
266 		p_end = p_base + proto->size;
267 		d_base = diff->address;
268 		d_end = d_base + diff->size;
269 		/*
270 		 * here p_base < d_end
271 		 * there are 5 cases
272 		 */
273 
274 		/*
275 		 *	d_end
276 		 *	d_base
277 		 *  p_end
278 		 *  p_base
279 		 *
280 		 * apply func to all of proto item
281 		 */
282 		if (p_end <= d_base) {
283 			(*func)(p_base, proto->size);
284 			proto = proto->next;
285 			continue;
286 		}
287 
288 		/*
289 		 * ...
290 		 *	d_base
291 		 *  p_base
292 		 *
293 		 * normalize by applying func from p_base to d_base
294 		 */
295 		if (p_base < d_base)
296 			(*func)(p_base, d_base - p_base);
297 
298 		if (p_end <= d_end) {
299 			/*
300 			 *	d_end
301 			 *  p_end
302 			 *	d_base
303 			 *  p_base
304 			 *
305 			 *	-or-
306 			 *
307 			 *	d_end
308 			 *  p_end
309 			 *  p_base
310 			 *	d_base
311 			 *
312 			 * any non-overlapping ranges applied above,
313 			 * so just continue
314 			 */
315 			proto = proto->next;
316 			continue;
317 		}
318 
319 		/*
320 		 *  p_end
321 		 *	d_end
322 		 *	d_base
323 		 *  p_base
324 		 *
325 		 *	-or-
326 		 *
327 		 *  p_end
328 		 *	d_end
329 		 *  p_base
330 		 *	d_base
331 		 *
332 		 * Find overlapping d_base..d_end ranges, and apply func
333 		 * where no overlap occurs.  Stop when d_base is above
334 		 * p_end
335 		 */
336 		for (p_base = d_end, diff = diff->next; diff != NULL;
337 		    p_base = d_end, diff = diff->next) {
338 			d_base = diff->address;
339 			d_end = d_base + diff->size;
340 			if (p_end <= d_base) {
341 				(*func)(p_base, p_end - p_base);
342 				break;
343 			} else
344 				(*func)(p_base, d_base - p_base);
345 		}
346 		if (diff == NULL)
347 			(*func)(p_base, p_end - p_base);
348 		proto = proto->next;
349 	}
350 }
351 
352 void
353 sync_memlists(struct memlist *orig, struct memlist *new)
354 {
355 
356 	/*
357 	 * Find pages allocated via prom by looking for
358 	 * pages on orig, but no on new.
359 	 */
360 	diff_memlists(orig, new, less_pages);
361 
362 	/*
363 	 * Find pages free'd via prom by looking for
364 	 * pages on new, but not on orig.
365 	 */
366 	diff_memlists(new, orig, more_pages);
367 }
368 
369 
370 /*
371  * Find the page number of the highest installed physical
372  * page and the number of pages installed (one cannot be
373  * calculated from the other because memory isn't necessarily
374  * contiguous).
375  */
376 void
377 installed_top_size_memlist_array(
378 	prom_memlist_t *list,	/* base of array */
379 	size_t	nelems,		/* number of elements */
380 	pfn_t *topp,		/* return ptr for top value */
381 	pgcnt_t *sumpagesp)	/* return prt for sum of installed pages */
382 {
383 	pfn_t top = 0;
384 	pgcnt_t sumpages = 0;
385 	pfn_t highp;		/* high page in a chunk */
386 	size_t i;
387 
388 	for (i = 0; i < nelems; list++, i++) {
389 		highp = (list->addr + list->size - 1) >> PAGESHIFT;
390 		if (top < highp)
391 			top = highp;
392 		sumpages += (list->size >> PAGESHIFT);
393 	}
394 
395 	*topp = top;
396 	*sumpagesp = sumpages;
397 }
398 
399 /*
400  * Copy a memory list.  Used in startup() to copy boot's
401  * memory lists to the kernel.
402  */
403 void
404 copy_memlist(
405 	prom_memlist_t	*src,
406 	size_t		nelems,
407 	struct memlist	**dstp)
408 {
409 	struct memlist *dst, *prev;
410 	size_t	i;
411 
412 	dst = *dstp;
413 	prev = dst;
414 
415 	for (i = 0; i < nelems; src++, i++) {
416 		dst->address = src->addr;
417 		dst->size = src->size;
418 		dst->next = 0;
419 		if (prev == dst) {
420 			dst->prev = 0;
421 			dst++;
422 		} else {
423 			dst->prev = prev;
424 			prev->next = dst;
425 			dst++;
426 			prev++;
427 		}
428 	}
429 
430 	*dstp = dst;
431 }
432 
433 
434 static struct bootmem_props {
435 	prom_memlist_t	*ptr;
436 	size_t		nelems;		/* actual number of elements */
437 	size_t		maxsize;	/* max buffer */
438 } bootmem_props[3];
439 
440 #define	PHYSINSTALLED	0
441 #define	PHYSAVAIL	1
442 #define	VIRTAVAIL	2
443 
444 /*
445  * Comapct contiguous memory list elements
446  */
447 static void
448 compact_promlist(struct bootmem_props *bpp)
449 {
450 	int i = 0, j;
451 	struct prom_memlist *pmp = bpp->ptr;
452 
453 	for (;;) {
454 		if (pmp[i].addr + pmp[i].size == pmp[i+1].addr) {
455 			pmp[i].size += pmp[i+1].size;
456 			bpp->nelems--;
457 			for (j = i + 1; j < bpp->nelems; j++)
458 				pmp[j] = pmp[j+1];
459 			pmp[j].addr = 0;
460 		} else
461 			i++;
462 		if (i == bpp->nelems)
463 			break;
464 	}
465 }
466 
467 /*
468  *  Sort prom memory lists into ascending order
469  */
470 static void
471 sort_promlist(struct bootmem_props *bpp)
472 {
473 	int i, j, min;
474 	struct prom_memlist *pmp = bpp->ptr;
475 	struct prom_memlist temp;
476 
477 	for (i = 0; i < bpp->nelems; i++) {
478 		min = i;
479 
480 		for (j = i+1; j < bpp->nelems; j++)  {
481 			if (pmp[j].addr < pmp[min].addr)
482 				min = j;
483 		}
484 
485 		if (i != min)  {
486 			/* Swap pmp[i] and pmp[min] */
487 			temp = pmp[min];
488 			pmp[min] = pmp[i];
489 			pmp[i] = temp;
490 		}
491 	}
492 }
493 
494 static int max_bootlist_sz;
495 
496 void
497 init_boot_memlists(void)
498 {
499 	size_t	size, len;
500 	char *start;
501 	struct bootmem_props *tmp;
502 
503 	/*
504 	 * These lists can get fragmented as the prom allocates
505 	 * memory, so generously round up.
506 	 */
507 	size = prom_phys_installed_len() + prom_phys_avail_len() +
508 	    prom_virt_avail_len();
509 	size *= 4;
510 	size = roundup(size, PAGESIZE);
511 	start = prom_alloc(0, size, BO_NO_ALIGN);
512 
513 	/*
514 	 * Get physinstalled
515 	 */
516 	tmp = &bootmem_props[PHYSINSTALLED];
517 	len = prom_phys_installed_len();
518 	if (len == 0)
519 		panic("no \"reg\" in /memory");
520 	tmp->nelems = len / sizeof (struct prom_memlist);
521 	tmp->maxsize = len;
522 	tmp->ptr = (prom_memlist_t *)start;
523 	start += len;
524 	size -= len;
525 	(void) prom_phys_installed((caddr_t)tmp->ptr);
526 	sort_promlist(tmp);
527 	compact_promlist(tmp);
528 
529 	/*
530 	 * Start out giving each half of available space
531 	 */
532 	max_bootlist_sz = size;
533 	len = size / 2;
534 	tmp = &bootmem_props[PHYSAVAIL];
535 	tmp->maxsize = len;
536 	tmp->ptr = (prom_memlist_t *)start;
537 	start += len;
538 
539 	tmp = &bootmem_props[VIRTAVAIL];
540 	tmp->maxsize = len;
541 	tmp->ptr = (prom_memlist_t *)start;
542 }
543 
544 
545 void
546 copy_boot_memlists(
547     prom_memlist_t **physinstalled, size_t *physinstalled_len,
548     prom_memlist_t **physavail, size_t *physavail_len,
549     prom_memlist_t **virtavail, size_t *virtavail_len)
550 {
551 	size_t	plen, vlen, move = 0;
552 	struct bootmem_props *il, *pl, *vl;
553 
554 	plen = prom_phys_avail_len();
555 	if (plen == 0)
556 		panic("no \"available\" in /memory");
557 	vlen = prom_virt_avail_len();
558 	if (vlen == 0)
559 		panic("no \"available\" in /virtual-memory");
560 	if (plen + vlen > max_bootlist_sz)
561 		panic("ran out of prom_memlist space");
562 
563 	pl = &bootmem_props[PHYSAVAIL];
564 	vl = &bootmem_props[VIRTAVAIL];
565 
566 	/*
567 	 * re-adjust ptrs if needed
568 	 */
569 	if (plen > pl->maxsize) {
570 		/* move virt avail up */
571 		move = plen - pl->maxsize;
572 		pl->maxsize = plen;
573 		vl->ptr += move / sizeof (struct prom_memlist);
574 		vl->maxsize -= move;
575 	} else if (vlen > vl->maxsize) {
576 		/* move virt avail down */
577 		move = vlen - vl->maxsize;
578 		vl->maxsize = vlen;
579 		vl->ptr -= move / sizeof (struct prom_memlist);
580 		pl->maxsize -= move;
581 	}
582 
583 	pl->nelems = plen / sizeof (struct prom_memlist);
584 	vl->nelems = vlen / sizeof (struct prom_memlist);
585 
586 	/* now we can retrieve the properties */
587 	(void) prom_phys_avail((caddr_t)pl->ptr);
588 	(void) prom_virt_avail((caddr_t)vl->ptr);
589 
590 	/* .. and sort them */
591 	sort_promlist(pl);
592 	sort_promlist(vl);
593 
594 	il = &bootmem_props[PHYSINSTALLED];
595 	*physinstalled = il->ptr;
596 	*physinstalled_len = il->nelems;
597 
598 	*physavail = pl->ptr;
599 	*physavail_len = pl->nelems;
600 
601 	*virtavail = vl->ptr;
602 	*virtavail_len = vl->nelems;
603 }
604 
605 
606 /*
607  * Find the page number of the highest installed physical
608  * page and the number of pages installed (one cannot be
609  * calculated from the other because memory isn't necessarily
610  * contiguous).
611  */
612 void
613 installed_top_size(
614 	struct memlist *list,	/* pointer to start of installed list */
615 	pfn_t *topp,		/* return ptr for top value */
616 	pgcnt_t *sumpagesp)	/* return prt for sum of installed pages */
617 {
618 	pfn_t top = 0;
619 	pfn_t highp;		/* high page in a chunk */
620 	pgcnt_t sumpages = 0;
621 
622 	for (; list; list = list->next) {
623 		highp = (list->address + list->size - 1) >> PAGESHIFT;
624 		if (top < highp)
625 			top = highp;
626 		sumpages += (uint_t)(list->size >> PAGESHIFT);
627 	}
628 
629 	*topp = top;
630 	*sumpagesp = sumpages;
631 }
632