xref: /illumos-gate/usr/src/uts/sun4/os/memnode.c (revision 24f5a37652e188ebdcdd6da454511686935025df)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/systm.h>
27 #include <sys/platform_module.h>
28 #include <sys/sysmacros.h>
29 #include <sys/atomic.h>
30 #include <sys/memlist.h>
31 #include <sys/memnode.h>
32 #include <vm/vm_dep.h>
33 
34 int max_mem_nodes = 1;		/* max memory nodes on this system */
35 
36 struct mem_node_conf mem_node_config[MAX_MEM_NODES];
37 int mem_node_pfn_shift;
38 /*
39  * num_memnodes should be updated atomically and always >=
40  * the number of bits in memnodes_mask or the algorithm may fail.
41  */
42 uint16_t num_memnodes;
43 mnodeset_t memnodes_mask; /* assumes 8*(sizeof(mnodeset_t)) >= MAX_MEM_NODES */
44 
45 /*
46  * If set, mem_node_physalign should be a power of two, and
47  * should reflect the minimum address alignment of each node.
48  */
49 uint64_t mem_node_physalign;
50 
51 /*
52  * Platform hooks we will need.
53  */
54 
55 #pragma weak plat_build_mem_nodes
56 #pragma weak plat_slice_add
57 #pragma weak plat_slice_del
58 
59 /*
60  * Adjust the memnode config after a DR operation.
61  *
62  * It is rather tricky to do these updates since we can't
63  * protect the memnode structures with locks, so we must
64  * be mindful of the order in which updates and reads to
65  * these values can occur.
66  */
67 void
68 mem_node_add_slice(pfn_t start, pfn_t end)
69 {
70 	int mnode;
71 	mnodeset_t newmask, oldmask;
72 
73 	/*
74 	 * DR will pass us the first pfn that is allocatable.
75 	 * We need to round down to get the real start of
76 	 * the slice.
77 	 */
78 	if (mem_node_physalign) {
79 		start &= ~(btop(mem_node_physalign) - 1);
80 		end = roundup(end, btop(mem_node_physalign)) - 1;
81 	}
82 
83 	mnode = PFN_2_MEM_NODE(start);
84 	ASSERT(mnode < max_mem_nodes);
85 
86 	if (atomic_cas_32((uint32_t *)&mem_node_config[mnode].exists, 0, 1)) {
87 		/*
88 		 * Add slice to existing node.
89 		 */
90 		if (start < mem_node_config[mnode].physbase)
91 			mem_node_config[mnode].physbase = start;
92 		if (end > mem_node_config[mnode].physmax)
93 			mem_node_config[mnode].physmax = end;
94 	} else {
95 		mem_node_config[mnode].physbase = start;
96 		mem_node_config[mnode].physmax = end;
97 		atomic_inc_16(&num_memnodes);
98 		do {
99 			oldmask = memnodes_mask;
100 			newmask = memnodes_mask | (1ull << mnode);
101 		} while (atomic_cas_64(&memnodes_mask, oldmask, newmask) !=
102 			 oldmask);
103 	}
104 	/*
105 	 * Let the common lgrp framework know about the new memory
106 	 */
107 	lgrp_config(LGRP_CONFIG_MEM_ADD, mnode, MEM_NODE_2_LGRPHAND(mnode));
108 }
109 
110 /*
111  * Remove a PFN range from a memnode.  On some platforms,
112  * the memnode will be created with physbase at the first
113  * allocatable PFN, but later deleted with the MC slice
114  * base address converted to a PFN, in which case we need
115  * to assume physbase and up.
116  */
117 void
118 mem_node_del_slice(pfn_t start, pfn_t end)
119 {
120 	int mnode;
121 	pgcnt_t delta_pgcnt, node_size;
122 	mnodeset_t omask, nmask;
123 
124 	if (mem_node_physalign) {
125 		start &= ~(btop(mem_node_physalign) - 1);
126 		end = roundup(end, btop(mem_node_physalign)) - 1;
127 	}
128 	mnode = PFN_2_MEM_NODE(start);
129 
130 	ASSERT(mnode < max_mem_nodes);
131 	ASSERT(mem_node_config[mnode].exists == 1);
132 
133 	delta_pgcnt = end - start;
134 	node_size = mem_node_config[mnode].physmax -
135 	    mem_node_config[mnode].physbase;
136 
137 	if (node_size > delta_pgcnt) {
138 		/*
139 		 * Subtract the slice from the memnode.
140 		 */
141 		if (start <= mem_node_config[mnode].physbase)
142 			mem_node_config[mnode].physbase = end + 1;
143 		ASSERT(end <= mem_node_config[mnode].physmax);
144 		if (end == mem_node_config[mnode].physmax)
145 			mem_node_config[mnode].physmax = start - 1;
146 	} else {
147 
148 		/*
149 		 * Let the common lgrp framework know the mnode is
150 		 * leaving
151 		 */
152 		lgrp_config(LGRP_CONFIG_MEM_DEL, mnode,
153 		    MEM_NODE_2_LGRPHAND(mnode));
154 
155 		/*
156 		 * Delete the whole node.
157 		 */
158 		ASSERT(MNODE_PGCNT(mnode) == 0);
159 		do {
160 			omask = memnodes_mask;
161 			nmask = omask & ~(1ull << mnode);
162 		} while (atomic_cas_64(&memnodes_mask, omask, nmask) != omask);
163 		atomic_dec_16(&num_memnodes);
164 		mem_node_config[mnode].exists = 0;
165 	}
166 }
167 
168 void
169 mem_node_add_range(pfn_t start, pfn_t end)
170 {
171 	if (&plat_slice_add != NULL)
172 		plat_slice_add(start, end);
173 	else
174 		mem_node_add_slice(start, end);
175 }
176 
177 void
178 mem_node_del_range(pfn_t start, pfn_t end)
179 {
180 	if (&plat_slice_del != NULL)
181 		plat_slice_del(start, end);
182 	else
183 		mem_node_del_slice(start, end);
184 }
185 
186 void
187 startup_build_mem_nodes(prom_memlist_t *list, size_t nelems)
188 {
189 	size_t	elem;
190 	pfn_t	basepfn;
191 	pgcnt_t	npgs;
192 
193 	/* LINTED: ASSERT will always true or false */
194 	ASSERT(NBBY * sizeof (mnodeset_t) >= max_mem_nodes);
195 
196 	if (&plat_build_mem_nodes != NULL) {
197 		plat_build_mem_nodes(list, nelems);
198 	} else {
199 		/*
200 		 * Boot install lists are arranged <addr, len>, ...
201 		 */
202 		for (elem = 0; elem < nelems; list++, elem++) {
203 			basepfn = btop(list->addr);
204 			npgs = btop(list->size);
205 			mem_node_add_range(basepfn, basepfn + npgs - 1);
206 		}
207 	}
208 }
209 
210 /*
211  * Allocate an unassigned memnode.
212  */
213 int
214 mem_node_alloc()
215 {
216 	int mnode;
217 	mnodeset_t newmask, oldmask;
218 
219 	/*
220 	 * Find an unused memnode.  Update it atomically to prevent
221 	 * a first time memnode creation race.
222 	 */
223 	for (mnode = 0; mnode < max_mem_nodes; mnode++)
224 		if (atomic_cas_32((uint32_t *)&mem_node_config[mnode].exists,
225 		    0, 1) == 0)
226 			break;
227 
228 	if (mnode >= max_mem_nodes)
229 			panic("Out of free memnodes\n");
230 
231 	mem_node_config[mnode].physbase = (uint64_t)-1;
232 	mem_node_config[mnode].physmax = 0;
233 	atomic_inc_16(&num_memnodes);
234 	do {
235 		oldmask = memnodes_mask;
236 		newmask = memnodes_mask | (1ull << mnode);
237 	} while (atomic_cas_64(&memnodes_mask, oldmask, newmask) != oldmask);
238 
239 	return (mnode);
240 }
241 
242 /*
243  * Find the intersection between a memnode and a memlist
244  * and returns the number of pages that overlap.
245  *
246  * Grab the memlist lock to protect the list from DR operations.
247  */
248 pgcnt_t
249 mem_node_memlist_pages(int mnode, struct memlist *mlist)
250 {
251 	pfn_t		base, end;
252 	pfn_t		cur_base, cur_end;
253 	pgcnt_t		npgs = 0;
254 	pgcnt_t		pages;
255 	struct memlist	*pmem;
256 
257 	if (&plat_mem_node_intersect_range != NULL) {
258 		memlist_read_lock();
259 
260 		for (pmem = mlist; pmem; pmem = pmem->ml_next) {
261 			plat_mem_node_intersect_range(btop(pmem->ml_address),
262 			    btop(pmem->ml_size), mnode, &pages);
263 			npgs += pages;
264 		}
265 
266 		memlist_read_unlock();
267 		return (npgs);
268 	}
269 
270 	base = mem_node_config[mnode].physbase;
271 	end = mem_node_config[mnode].physmax;
272 
273 	memlist_read_lock();
274 
275 	for (pmem = mlist; pmem; pmem = pmem->ml_next) {
276 		cur_base = btop(pmem->ml_address);
277 		cur_end = cur_base + btop(pmem->ml_size) - 1;
278 		if (end < cur_base || base > cur_end)
279 			continue;
280 		npgs = npgs + (MIN(cur_end, end) -
281 		    MAX(cur_base, base)) + 1;
282 	}
283 
284 	memlist_read_unlock();
285 
286 	return (npgs);
287 }
288 
289 /*
290  * Find MIN(physbase) and MAX(physmax) over all mnodes
291  *
292  * Called during startup and DR to find hpm_counters limits when
293  * interleaved_mnodes is set.
294  * NOTE: there is a race condition with DR if it tries to change more than
295  * one mnode in parallel. Sizing shared hpm_counters depends on finding the
296  * min(physbase) and max(physmax) across all mnodes. Therefore, the caller of
297  * page_ctrs_adjust must ensure that mem_node_config does not change while it
298  * is running.
299  */
300 void
301 mem_node_max_range(pfn_t *basep, pfn_t *maxp)
302 {
303 	int mnode;
304 	pfn_t max = 0;
305 	pfn_t base = (pfn_t)-1;
306 
307 	for (mnode = 0; mnode < max_mem_nodes; mnode++) {
308 		if (mem_node_config[mnode].exists == 0)
309 			continue;
310 		if (max < mem_node_config[mnode].physmax)
311 			max = mem_node_config[mnode].physmax;
312 		if (base > mem_node_config[mnode].physbase)
313 			base = mem_node_config[mnode].physbase;
314 	}
315 	ASSERT(base != (pfn_t)-1 && max != 0);
316 	*basep = base;
317 	*maxp = max;
318 }
319