xref: /freebsd/sys/kern/subr_pcpu.c (revision b9128a37faafede823eb456aa65a11ac69997284)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2001 Wind River Systems, Inc.
5  * All rights reserved.
6  * Written by: John Baldwin <jhb@FreeBSD.org>
7  *
8  * Copyright (c) 2009 Jeffrey Roberson <jeff@freebsd.org>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the author nor the names of any co-contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 /*
37  * This module provides MI support for per-cpu data.
38  *
39  * Each architecture determines the mapping of logical CPU IDs to physical
40  * CPUs.  The requirements of this mapping are as follows:
41  *  - Logical CPU IDs must reside in the range 0 ... MAXCPU - 1.
42  *  - The mapping is not required to be dense.  That is, there may be
43  *    gaps in the mappings.
44  *  - The platform sets the value of MAXCPU in <machine/param.h>.
45  *  - It is suggested, but not required, that in the non-SMP case, the
46  *    platform define MAXCPU to be 1 and define the logical ID of the
47  *    sole CPU as 0.
48  */
49 
50 #include <sys/cdefs.h>
51 #include "opt_ddb.h"
52 
53 #include <sys/param.h>
54 #include <sys/systm.h>
55 #include <sys/sysctl.h>
56 #include <sys/lock.h>
57 #include <sys/malloc.h>
58 #include <sys/pcpu.h>
59 #include <sys/proc.h>
60 #include <sys/smp.h>
61 #include <sys/sx.h>
62 #include <vm/uma.h>
63 #include <ddb/ddb.h>
64 
65 static MALLOC_DEFINE(M_PCPU, "Per-cpu", "Per-cpu resource accouting.");
66 
67 struct dpcpu_free {
68 	uintptr_t	df_start;
69 	int		df_len;
70 	TAILQ_ENTRY(dpcpu_free) df_link;
71 };
72 
73 DPCPU_DEFINE_STATIC(char, modspace[DPCPU_MODMIN] __aligned(__alignof(void *)));
74 static TAILQ_HEAD(, dpcpu_free) dpcpu_head = TAILQ_HEAD_INITIALIZER(dpcpu_head);
75 static struct sx dpcpu_lock;
76 uintptr_t dpcpu_off[MAXCPU];
77 struct pcpu *cpuid_to_pcpu[MAXCPU];
78 struct cpuhead cpuhead = STAILQ_HEAD_INITIALIZER(cpuhead);
79 
80 /*
81  * Initialize the MI portions of a struct pcpu.
82  */
83 void
84 pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
85 {
86 
87 	bzero(pcpu, size);
88 	KASSERT(cpuid >= 0 && cpuid < MAXCPU,
89 	    ("pcpu_init: invalid cpuid %d", cpuid));
90 	pcpu->pc_cpuid = cpuid;
91 	cpuid_to_pcpu[cpuid] = pcpu;
92 	STAILQ_INSERT_TAIL(&cpuhead, pcpu, pc_allcpu);
93 	cpu_pcpu_init(pcpu, cpuid, size);
94 	pcpu->pc_rm_queue.rmq_next = &pcpu->pc_rm_queue;
95 	pcpu->pc_rm_queue.rmq_prev = &pcpu->pc_rm_queue;
96 	pcpu->pc_zpcpu_offset = zpcpu_offset_cpu(cpuid);
97 }
98 
99 void
100 dpcpu_init(void *dpcpu, int cpuid)
101 {
102 	struct pcpu *pcpu;
103 
104 	TSENTER();
105 	pcpu = pcpu_find(cpuid);
106 	pcpu->pc_dynamic = (uintptr_t)dpcpu - DPCPU_START;
107 
108 	/*
109 	 * Initialize defaults from our linker section.
110 	 */
111 	memcpy(dpcpu, (void *)DPCPU_START, DPCPU_BYTES);
112 
113 	/*
114 	 * Place it in the global pcpu offset array.
115 	 */
116 	dpcpu_off[cpuid] = pcpu->pc_dynamic;
117 	TSEXIT();
118 }
119 
120 static void
121 dpcpu_startup(void *dummy __unused)
122 {
123 	struct dpcpu_free *df;
124 
125 	df = malloc(sizeof(*df), M_PCPU, M_WAITOK | M_ZERO);
126 	df->df_start = (uintptr_t)&DPCPU_NAME(modspace);
127 	df->df_len = DPCPU_MODMIN;
128 	TAILQ_INSERT_HEAD(&dpcpu_head, df, df_link);
129 	sx_init(&dpcpu_lock, "dpcpu alloc lock");
130 }
131 SYSINIT(dpcpu, SI_SUB_KLD, SI_ORDER_FIRST, dpcpu_startup, NULL);
132 
133 /*
134  * UMA_ZONE_PCPU zones for general kernel use.
135  */
136 uma_zone_t pcpu_zone_4;
137 uma_zone_t pcpu_zone_8;
138 uma_zone_t pcpu_zone_16;
139 uma_zone_t pcpu_zone_32;
140 uma_zone_t pcpu_zone_64;
141 
142 static void
143 pcpu_zones_startup(void)
144 {
145 
146 	pcpu_zone_4 = uma_zcreate("pcpu-4", 4,
147 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
148 	pcpu_zone_8 = uma_zcreate("pcpu-8", 8,
149 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
150 	pcpu_zone_16 = uma_zcreate("pcpu-16", 16,
151 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
152 	pcpu_zone_32 = uma_zcreate("pcpu-32", 32,
153 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
154 	pcpu_zone_64 = uma_zcreate("pcpu-64", 64,
155 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
156 }
157 SYSINIT(pcpu_zones, SI_SUB_COUNTER, SI_ORDER_FIRST, pcpu_zones_startup, NULL);
158 
159 /*
160  * First-fit extent based allocator for allocating space in the per-cpu
161  * region reserved for modules.  This is only intended for use by the
162  * kernel linkers to place module linker sets.
163  */
164 void *
165 dpcpu_alloc(int size)
166 {
167 	struct dpcpu_free *df;
168 	void *s;
169 
170 	s = NULL;
171 	size = roundup2(size, sizeof(void *));
172 	sx_xlock(&dpcpu_lock);
173 	TAILQ_FOREACH(df, &dpcpu_head, df_link) {
174 		if (df->df_len < size)
175 			continue;
176 		if (df->df_len == size) {
177 			s = (void *)df->df_start;
178 			TAILQ_REMOVE(&dpcpu_head, df, df_link);
179 			free(df, M_PCPU);
180 			break;
181 		}
182 		s = (void *)df->df_start;
183 		df->df_len -= size;
184 		df->df_start = df->df_start + size;
185 		break;
186 	}
187 	sx_xunlock(&dpcpu_lock);
188 
189 	return (s);
190 }
191 
192 /*
193  * Free dynamic per-cpu space at module unload time.
194  */
195 void
196 dpcpu_free(void *s, int size)
197 {
198 	struct dpcpu_free *df;
199 	struct dpcpu_free *dn;
200 	uintptr_t start;
201 	uintptr_t end;
202 
203 	size = roundup2(size, sizeof(void *));
204 	start = (uintptr_t)s;
205 	end = start + size;
206 	/*
207 	 * Free a region of space and merge it with as many neighbors as
208 	 * possible.  Keeping the list sorted simplifies this operation.
209 	 */
210 	sx_xlock(&dpcpu_lock);
211 	TAILQ_FOREACH(df, &dpcpu_head, df_link) {
212 		if (df->df_start > end)
213 			break;
214 		/*
215 		 * If we expand at the end of an entry we may have to
216 		 * merge it with the one following it as well.
217 		 */
218 		if (df->df_start + df->df_len == start) {
219 			df->df_len += size;
220 			dn = TAILQ_NEXT(df, df_link);
221 			if (df->df_start + df->df_len == dn->df_start) {
222 				df->df_len += dn->df_len;
223 				TAILQ_REMOVE(&dpcpu_head, dn, df_link);
224 				free(dn, M_PCPU);
225 			}
226 			sx_xunlock(&dpcpu_lock);
227 			return;
228 		}
229 		if (df->df_start == end) {
230 			df->df_start = start;
231 			df->df_len += size;
232 			sx_xunlock(&dpcpu_lock);
233 			return;
234 		}
235 	}
236 	dn = malloc(sizeof(*df), M_PCPU, M_WAITOK | M_ZERO);
237 	dn->df_start = start;
238 	dn->df_len = size;
239 	if (df)
240 		TAILQ_INSERT_BEFORE(df, dn, df_link);
241 	else
242 		TAILQ_INSERT_TAIL(&dpcpu_head, dn, df_link);
243 	sx_xunlock(&dpcpu_lock);
244 }
245 
246 /*
247  * Initialize the per-cpu storage from an updated linker-set region.
248  */
249 void
250 dpcpu_copy(void *s, int size)
251 {
252 #ifdef SMP
253 	uintptr_t dpcpu;
254 	int i;
255 
256 	CPU_FOREACH(i) {
257 		dpcpu = dpcpu_off[i];
258 		if (dpcpu == 0)
259 			continue;
260 		memcpy((void *)(dpcpu + (uintptr_t)s), s, size);
261 	}
262 #else
263 	memcpy((void *)(dpcpu_off[0] + (uintptr_t)s), s, size);
264 #endif
265 }
266 
267 /*
268  * Destroy a struct pcpu.
269  */
270 void
271 pcpu_destroy(struct pcpu *pcpu)
272 {
273 
274 	STAILQ_REMOVE(&cpuhead, pcpu, pcpu, pc_allcpu);
275 	cpuid_to_pcpu[pcpu->pc_cpuid] = NULL;
276 	dpcpu_off[pcpu->pc_cpuid] = 0;
277 }
278 
279 /*
280  * Locate a struct pcpu by cpu id.
281  */
282 struct pcpu *
283 pcpu_find(u_int cpuid)
284 {
285 
286 	return (cpuid_to_pcpu[cpuid]);
287 }
288 
289 int
290 sysctl_dpcpu_quad(SYSCTL_HANDLER_ARGS)
291 {
292 	uintptr_t dpcpu;
293 	int64_t count;
294 	int i;
295 
296 	count = 0;
297 	CPU_FOREACH(i) {
298 		dpcpu = dpcpu_off[i];
299 		if (dpcpu == 0)
300 			continue;
301 		count += *(int64_t *)(dpcpu + (uintptr_t)arg1);
302 	}
303 	return (SYSCTL_OUT(req, &count, sizeof(count)));
304 }
305 
306 int
307 sysctl_dpcpu_long(SYSCTL_HANDLER_ARGS)
308 {
309 	uintptr_t dpcpu;
310 	long count;
311 	int i;
312 
313 	count = 0;
314 	CPU_FOREACH(i) {
315 		dpcpu = dpcpu_off[i];
316 		if (dpcpu == 0)
317 			continue;
318 		count += *(long *)(dpcpu + (uintptr_t)arg1);
319 	}
320 	return (SYSCTL_OUT(req, &count, sizeof(count)));
321 }
322 
323 int
324 sysctl_dpcpu_int(SYSCTL_HANDLER_ARGS)
325 {
326 	uintptr_t dpcpu;
327 	int count;
328 	int i;
329 
330 	count = 0;
331 	CPU_FOREACH(i) {
332 		dpcpu = dpcpu_off[i];
333 		if (dpcpu == 0)
334 			continue;
335 		count += *(int *)(dpcpu + (uintptr_t)arg1);
336 	}
337 	return (SYSCTL_OUT(req, &count, sizeof(count)));
338 }
339 
340 #ifdef DDB
341 DB_SHOW_COMMAND_FLAGS(dpcpu_off, db_show_dpcpu_off, DB_CMD_MEMSAFE)
342 {
343 	int id;
344 
345 	CPU_FOREACH(id) {
346 		db_printf("dpcpu_off[%2d] = 0x%jx (+ DPCPU_START = %p)\n",
347 		    id, (uintmax_t)dpcpu_off[id],
348 		    (void *)(uintptr_t)(dpcpu_off[id] + DPCPU_START));
349 	}
350 }
351 
352 static void
353 show_pcpu(struct pcpu *pc)
354 {
355 	struct thread *td;
356 
357 	db_printf("cpuid        = %d\n", pc->pc_cpuid);
358 	db_printf("dynamic pcpu = %p\n", (void *)pc->pc_dynamic);
359 	db_printf("curthread    = ");
360 	td = pc->pc_curthread;
361 	if (td != NULL)
362 		db_printf("%p: pid %d tid %d critnest %d \"%s\"\n", td,
363 		    td->td_proc->p_pid, td->td_tid, td->td_critnest,
364 		    td->td_name);
365 	else
366 		db_printf("none\n");
367 	db_printf("curpcb       = %p\n", pc->pc_curpcb);
368 	db_printf("fpcurthread  = ");
369 	td = pc->pc_fpcurthread;
370 	if (td != NULL)
371 		db_printf("%p: pid %d \"%s\"\n", td, td->td_proc->p_pid,
372 		    td->td_name);
373 	else
374 		db_printf("none\n");
375 	db_printf("idlethread   = ");
376 	td = pc->pc_idlethread;
377 	if (td != NULL)
378 		db_printf("%p: tid %d \"%s\"\n", td, td->td_tid, td->td_name);
379 	else
380 		db_printf("none\n");
381 	db_show_mdpcpu(pc);
382 
383 #ifdef VIMAGE
384 	db_printf("curvnet      = %p\n", pc->pc_curthread->td_vnet);
385 #endif
386 
387 #ifdef WITNESS
388 	db_printf("spin locks held:\n");
389 	witness_list_locks(&pc->pc_spinlocks, db_printf);
390 #endif
391 }
392 
393 DB_SHOW_COMMAND_FLAGS(pcpu, db_show_pcpu, DB_CMD_MEMSAFE)
394 {
395 	struct pcpu *pc;
396 	int id;
397 
398 	if (have_addr)
399 		id = ((addr >> 4) % 16) * 10 + (addr % 16);
400 	else
401 		id = PCPU_GET(cpuid);
402 	pc = pcpu_find(id);
403 	if (pc == NULL) {
404 		db_printf("CPU %d not found\n", id);
405 		return;
406 	}
407 	show_pcpu(pc);
408 }
409 
410 DB_SHOW_ALL_COMMAND(pcpu, db_show_cpu_all)
411 {
412 	struct pcpu *pc;
413 	int id;
414 
415 	db_printf("Current CPU: %d\n\n", PCPU_GET(cpuid));
416 	CPU_FOREACH(id) {
417 		pc = pcpu_find(id);
418 		if (pc != NULL) {
419 			show_pcpu(pc);
420 			db_printf("\n");
421 		}
422 	}
423 }
424 DB_SHOW_ALIAS_FLAGS(allpcpu, db_show_cpu_all, DB_CMD_MEMSAFE);
425 #endif
426