xref: /freebsd/sys/kern/subr_pcpu.c (revision 6f9c8e5b074419423648ffb89b83fd2f257e90b7)
1 /*-
2  * Copyright (c) 2001 Wind River Systems, Inc.
3  * All rights reserved.
4  * Written by: John Baldwin <jhb@FreeBSD.org>
5  *
6  * Copyright (c) 2009 Jeffrey Roberson <jeff@freebsd.org>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 4. Neither the name of the author nor the names of any co-contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 /*
35  * This module provides MI support for per-cpu data.
36  *
37  * Each architecture determines the mapping of logical CPU IDs to physical
38  * CPUs.  The requirements of this mapping are as follows:
39  *  - Logical CPU IDs must reside in the range 0 ... MAXCPU - 1.
40  *  - The mapping is not required to be dense.  That is, there may be
41  *    gaps in the mappings.
42  *  - The platform sets the value of MAXCPU in <machine/param.h>.
43  *  - It is suggested, but not required, that in the non-SMP case, the
44  *    platform define MAXCPU to be 1 and define the logical ID of the
45  *    sole CPU as 0.
46  */
47 
48 #include <sys/cdefs.h>
49 __FBSDID("$FreeBSD$");
50 
51 #include "opt_ddb.h"
52 
53 #include <sys/param.h>
54 #include <sys/systm.h>
55 #include <sys/sysctl.h>
56 #include <sys/lock.h>
57 #include <sys/malloc.h>
58 #include <sys/pcpu.h>
59 #include <sys/proc.h>
60 #include <sys/smp.h>
61 #include <sys/sx.h>
62 #include <ddb/ddb.h>
63 
64 MALLOC_DEFINE(M_PCPU, "Per-cpu", "Per-cpu resource accouting.");
65 
66 struct dpcpu_free {
67 	uintptr_t	df_start;
68 	int		df_len;
69 	TAILQ_ENTRY(dpcpu_free) df_link;
70 };
71 
72 static DPCPU_DEFINE(char, modspace[DPCPU_MODMIN]);
73 static TAILQ_HEAD(, dpcpu_free) dpcpu_head = TAILQ_HEAD_INITIALIZER(dpcpu_head);
74 static struct sx dpcpu_lock;
75 uintptr_t dpcpu_off[MAXCPU];
76 struct pcpu *cpuid_to_pcpu[MAXCPU];
77 struct cpuhead cpuhead = STAILQ_HEAD_INITIALIZER(cpuhead);
78 
79 /*
80  * Initialize the MI portions of a struct pcpu.
81  */
82 void
83 pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
84 {
85 
86 	bzero(pcpu, size);
87 	KASSERT(cpuid >= 0 && cpuid < MAXCPU,
88 	    ("pcpu_init: invalid cpuid %d", cpuid));
89 	pcpu->pc_cpuid = cpuid;
90 	CPU_SETOF(cpuid, &pcpu->pc_cpumask);
91 	cpuid_to_pcpu[cpuid] = pcpu;
92 	STAILQ_INSERT_TAIL(&cpuhead, pcpu, pc_allcpu);
93 	cpu_pcpu_init(pcpu, cpuid, size);
94 	pcpu->pc_rm_queue.rmq_next = &pcpu->pc_rm_queue;
95 	pcpu->pc_rm_queue.rmq_prev = &pcpu->pc_rm_queue;
96 #ifdef KTR
97 	snprintf(pcpu->pc_name, sizeof(pcpu->pc_name), "CPU %d", cpuid);
98 #endif
99 }
100 
101 void
102 dpcpu_init(void *dpcpu, int cpuid)
103 {
104 	struct pcpu *pcpu;
105 
106 	pcpu = pcpu_find(cpuid);
107 	pcpu->pc_dynamic = (uintptr_t)dpcpu - DPCPU_START;
108 
109 	/*
110 	 * Initialize defaults from our linker section.
111 	 */
112 	memcpy(dpcpu, (void *)DPCPU_START, DPCPU_BYTES);
113 
114 	/*
115 	 * Place it in the global pcpu offset array.
116 	 */
117 	dpcpu_off[cpuid] = pcpu->pc_dynamic;
118 }
119 
120 static void
121 dpcpu_startup(void *dummy __unused)
122 {
123 	struct dpcpu_free *df;
124 
125 	df = malloc(sizeof(*df), M_PCPU, M_WAITOK | M_ZERO);
126 	df->df_start = (uintptr_t)&DPCPU_NAME(modspace);
127 	df->df_len = DPCPU_MODMIN;
128 	TAILQ_INSERT_HEAD(&dpcpu_head, df, df_link);
129 	sx_init(&dpcpu_lock, "dpcpu alloc lock");
130 }
131 SYSINIT(dpcpu, SI_SUB_KLD, SI_ORDER_FIRST, dpcpu_startup, 0);
132 
133 /*
134  * First-fit extent based allocator for allocating space in the per-cpu
135  * region reserved for modules.  This is only intended for use by the
136  * kernel linkers to place module linker sets.
137  */
138 void *
139 dpcpu_alloc(int size)
140 {
141 	struct dpcpu_free *df;
142 	void *s;
143 
144 	s = NULL;
145 	size = roundup2(size, sizeof(void *));
146 	sx_xlock(&dpcpu_lock);
147 	TAILQ_FOREACH(df, &dpcpu_head, df_link) {
148 		if (df->df_len < size)
149 			continue;
150 		if (df->df_len == size) {
151 			s = (void *)df->df_start;
152 			TAILQ_REMOVE(&dpcpu_head, df, df_link);
153 			free(df, M_PCPU);
154 			break;
155 		}
156 		s = (void *)df->df_start;
157 		df->df_len -= size;
158 		df->df_start = df->df_start + size;
159 		break;
160 	}
161 	sx_xunlock(&dpcpu_lock);
162 
163 	return (s);
164 }
165 
166 /*
167  * Free dynamic per-cpu space at module unload time.
168  */
169 void
170 dpcpu_free(void *s, int size)
171 {
172 	struct dpcpu_free *df;
173 	struct dpcpu_free *dn;
174 	uintptr_t start;
175 	uintptr_t end;
176 
177 	size = roundup2(size, sizeof(void *));
178 	start = (uintptr_t)s;
179 	end = start + size;
180 	/*
181 	 * Free a region of space and merge it with as many neighbors as
182 	 * possible.  Keeping the list sorted simplifies this operation.
183 	 */
184 	sx_xlock(&dpcpu_lock);
185 	TAILQ_FOREACH(df, &dpcpu_head, df_link) {
186 		if (df->df_start > end)
187 			break;
188 		/*
189 		 * If we expand at the end of an entry we may have to
190 		 * merge it with the one following it as well.
191 		 */
192 		if (df->df_start + df->df_len == start) {
193 			df->df_len += size;
194 			dn = TAILQ_NEXT(df, df_link);
195 			if (df->df_start + df->df_len == dn->df_start) {
196 				df->df_len += dn->df_len;
197 				TAILQ_REMOVE(&dpcpu_head, dn, df_link);
198 				free(dn, M_PCPU);
199 			}
200 			sx_xunlock(&dpcpu_lock);
201 			return;
202 		}
203 		if (df->df_start == end) {
204 			df->df_start = start;
205 			df->df_len += size;
206 			sx_xunlock(&dpcpu_lock);
207 			return;
208 		}
209 	}
210 	dn = malloc(sizeof(*df), M_PCPU, M_WAITOK | M_ZERO);
211 	dn->df_start = start;
212 	dn->df_len = size;
213 	if (df)
214 		TAILQ_INSERT_BEFORE(df, dn, df_link);
215 	else
216 		TAILQ_INSERT_TAIL(&dpcpu_head, dn, df_link);
217 	sx_xunlock(&dpcpu_lock);
218 }
219 
220 /*
221  * Initialize the per-cpu storage from an updated linker-set region.
222  */
223 void
224 dpcpu_copy(void *s, int size)
225 {
226 #ifdef SMP
227 	uintptr_t dpcpu;
228 	int i;
229 
230 	for (i = 0; i < mp_ncpus; ++i) {
231 		dpcpu = dpcpu_off[i];
232 		if (dpcpu == 0)
233 			continue;
234 		memcpy((void *)(dpcpu + (uintptr_t)s), s, size);
235 	}
236 #else
237 	memcpy((void *)(dpcpu_off[0] + (uintptr_t)s), s, size);
238 #endif
239 }
240 
241 /*
242  * Destroy a struct pcpu.
243  */
244 void
245 pcpu_destroy(struct pcpu *pcpu)
246 {
247 
248 	STAILQ_REMOVE(&cpuhead, pcpu, pcpu, pc_allcpu);
249 	cpuid_to_pcpu[pcpu->pc_cpuid] = NULL;
250 	dpcpu_off[pcpu->pc_cpuid] = 0;
251 }
252 
253 /*
254  * Locate a struct pcpu by cpu id.
255  */
256 struct pcpu *
257 pcpu_find(u_int cpuid)
258 {
259 
260 	return (cpuid_to_pcpu[cpuid]);
261 }
262 
263 int
264 sysctl_dpcpu_quad(SYSCTL_HANDLER_ARGS)
265 {
266 	uintptr_t dpcpu;
267 	int64_t count;
268 	int i;
269 
270 	count = 0;
271 	for (i = 0; i < mp_ncpus; ++i) {
272 		dpcpu = dpcpu_off[i];
273 		if (dpcpu == 0)
274 			continue;
275 		count += *(int64_t *)(dpcpu + (uintptr_t)arg1);
276 	}
277 	return (SYSCTL_OUT(req, &count, sizeof(count)));
278 }
279 
280 int
281 sysctl_dpcpu_long(SYSCTL_HANDLER_ARGS)
282 {
283 	uintptr_t dpcpu;
284 	long count;
285 	int i;
286 
287 	count = 0;
288 	for (i = 0; i < mp_ncpus; ++i) {
289 		dpcpu = dpcpu_off[i];
290 		if (dpcpu == 0)
291 			continue;
292 		count += *(long *)(dpcpu + (uintptr_t)arg1);
293 	}
294 	return (SYSCTL_OUT(req, &count, sizeof(count)));
295 }
296 
297 int
298 sysctl_dpcpu_int(SYSCTL_HANDLER_ARGS)
299 {
300 	uintptr_t dpcpu;
301 	int count;
302 	int i;
303 
304 	count = 0;
305 	for (i = 0; i < mp_ncpus; ++i) {
306 		dpcpu = dpcpu_off[i];
307 		if (dpcpu == 0)
308 			continue;
309 		count += *(int *)(dpcpu + (uintptr_t)arg1);
310 	}
311 	return (SYSCTL_OUT(req, &count, sizeof(count)));
312 }
313 
314 #ifdef DDB
315 DB_SHOW_COMMAND(dpcpu_off, db_show_dpcpu_off)
316 {
317 	int id;
318 
319 	CPU_FOREACH(id) {
320 		db_printf("dpcpu_off[%2d] = 0x%jx (+ DPCPU_START = %p)\n",
321 		    id, (uintmax_t)dpcpu_off[id],
322 		    (void *)(uintptr_t)(dpcpu_off[id] + DPCPU_START));
323 	}
324 }
325 
326 static void
327 show_pcpu(struct pcpu *pc)
328 {
329 	struct thread *td;
330 
331 	db_printf("cpuid        = %d\n", pc->pc_cpuid);
332 	db_printf("dynamic pcpu = %p\n", (void *)pc->pc_dynamic);
333 	db_printf("curthread    = ");
334 	td = pc->pc_curthread;
335 	if (td != NULL)
336 		db_printf("%p: pid %d \"%s\"\n", td, td->td_proc->p_pid,
337 		    td->td_name);
338 	else
339 		db_printf("none\n");
340 	db_printf("curpcb       = %p\n", pc->pc_curpcb);
341 	db_printf("fpcurthread  = ");
342 	td = pc->pc_fpcurthread;
343 	if (td != NULL)
344 		db_printf("%p: pid %d \"%s\"\n", td, td->td_proc->p_pid,
345 		    td->td_name);
346 	else
347 		db_printf("none\n");
348 	db_printf("idlethread   = ");
349 	td = pc->pc_idlethread;
350 	if (td != NULL)
351 		db_printf("%p: tid %d \"%s\"\n", td, td->td_tid, td->td_name);
352 	else
353 		db_printf("none\n");
354 	db_show_mdpcpu(pc);
355 
356 #ifdef VIMAGE
357 	db_printf("curvnet      = %p\n", pc->pc_curthread->td_vnet);
358 #endif
359 
360 #ifdef WITNESS
361 	db_printf("spin locks held:\n");
362 	witness_list_locks(&pc->pc_spinlocks, db_printf);
363 #endif
364 }
365 
366 DB_SHOW_COMMAND(pcpu, db_show_pcpu)
367 {
368 	struct pcpu *pc;
369 	int id;
370 
371 	if (have_addr)
372 		id = ((addr >> 4) % 16) * 10 + (addr % 16);
373 	else
374 		id = PCPU_GET(cpuid);
375 	pc = pcpu_find(id);
376 	if (pc == NULL) {
377 		db_printf("CPU %d not found\n", id);
378 		return;
379 	}
380 	show_pcpu(pc);
381 }
382 
383 DB_SHOW_ALL_COMMAND(pcpu, db_show_cpu_all)
384 {
385 	struct pcpu *pc;
386 	int id;
387 
388 	db_printf("Current CPU: %d\n\n", PCPU_GET(cpuid));
389 	for (id = 0; id <= mp_maxid; id++) {
390 		pc = pcpu_find(id);
391 		if (pc != NULL) {
392 			show_pcpu(pc);
393 			db_printf("\n");
394 		}
395 	}
396 }
397 DB_SHOW_ALIAS(allpcpu, db_show_cpu_all);
398 #endif
399