1da5b7e90SMitchell Horne /*-
24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause
3da5b7e90SMitchell Horne *
4da5b7e90SMitchell Horne * Copyright (c) 2008-2022 NetApp, Inc.
5da5b7e90SMitchell Horne *
6da5b7e90SMitchell Horne * Redistribution and use in source and binary forms, with or without
7da5b7e90SMitchell Horne * modification, are permitted provided that the following conditions
8da5b7e90SMitchell Horne * are met:
9da5b7e90SMitchell Horne * 1. Redistributions of source code must retain the above copyright
10da5b7e90SMitchell Horne * notice, this list of conditions and the following disclaimer.
11da5b7e90SMitchell Horne * 2. Redistributions in binary form must reproduce the above copyright
12da5b7e90SMitchell Horne * notice, this list of conditions and the following disclaimer in the
13da5b7e90SMitchell Horne * documentation and/or other materials provided with the distribution.
14da5b7e90SMitchell Horne *
15da5b7e90SMitchell Horne * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16da5b7e90SMitchell Horne * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17da5b7e90SMitchell Horne * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18da5b7e90SMitchell Horne * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19da5b7e90SMitchell Horne * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20da5b7e90SMitchell Horne * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21da5b7e90SMitchell Horne * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22da5b7e90SMitchell Horne * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23da5b7e90SMitchell Horne * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24da5b7e90SMitchell Horne * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25da5b7e90SMitchell Horne */
26da5b7e90SMitchell Horne
27da5b7e90SMitchell Horne #include <sys/param.h>
28da5b7e90SMitchell Horne #include <sys/systm.h>
29da5b7e90SMitchell Horne #include <sys/boottrace.h>
30da5b7e90SMitchell Horne #include <sys/kernel.h>
31da5b7e90SMitchell Horne #include <sys/malloc.h>
32da5b7e90SMitchell Horne #include <sys/pcpu.h>
33da5b7e90SMitchell Horne #include <sys/proc.h>
34da5b7e90SMitchell Horne #include <sys/resourcevar.h>
35da5b7e90SMitchell Horne #include <sys/sbuf.h>
36da5b7e90SMitchell Horne #include <sys/syscallsubr.h>
37da5b7e90SMitchell Horne #include <sys/sysctl.h>
38da5b7e90SMitchell Horne #include <sys/time.h>
39da5b7e90SMitchell Horne
40da5b7e90SMitchell Horne #include <machine/stdarg.h>
41da5b7e90SMitchell Horne
42da5b7e90SMitchell Horne #define dprintf(fmt, ...) \
43da5b7e90SMitchell Horne do { \
44da5b7e90SMitchell Horne if (dotrace_debugging) \
45da5b7e90SMitchell Horne printf(fmt, ##__VA_ARGS__); \
46da5b7e90SMitchell Horne } while (0);
47da5b7e90SMitchell Horne
48da5b7e90SMitchell Horne static MALLOC_DEFINE(M_BOOTTRACE, "boottrace", "memory for boot tracing");
49da5b7e90SMitchell Horne
50da5b7e90SMitchell Horne #define BT_TABLE_DEFSIZE 3000
51da5b7e90SMitchell Horne #define BT_TABLE_RUNSIZE 2000
52da5b7e90SMitchell Horne #define BT_TABLE_SHTSIZE 1000
53da5b7e90SMitchell Horne #define BT_TABLE_MINSIZE 500
54da5b7e90SMitchell Horne
55da5b7e90SMitchell Horne /*
56da5b7e90SMitchell Horne * Boot-time & shutdown-time event.
57da5b7e90SMitchell Horne */
58da5b7e90SMitchell Horne struct bt_event {
59da5b7e90SMitchell Horne uint64_t tsc; /* Timestamp */
60da5b7e90SMitchell Horne uint64_t tick; /* Kernel tick */
61da5b7e90SMitchell Horne uint64_t cputime; /* Microseconds of process CPU time */
62da5b7e90SMitchell Horne uint32_t cpuid; /* CPU on which the event ran */
63da5b7e90SMitchell Horne uint32_t inblock; /* # of blocks in */
64da5b7e90SMitchell Horne uint32_t oublock; /* # of blocks out */
65da5b7e90SMitchell Horne pid_t pid; /* Current PID */
66da5b7e90SMitchell Horne char name[BT_EVENT_NAMELEN]; /* Event name */
67da5b7e90SMitchell Horne char tdname[BT_EVENT_TDNAMELEN]; /* Thread name */
68da5b7e90SMitchell Horne };
69da5b7e90SMitchell Horne
70da5b7e90SMitchell Horne struct bt_table {
71da5b7e90SMitchell Horne uint32_t size; /* Trace table size */
72da5b7e90SMitchell Horne uint32_t curr; /* Trace entry to use */
73da5b7e90SMitchell Horne uint32_t wrap; /* Wrap-around, instead of dropping */
74da5b7e90SMitchell Horne uint32_t drops_early; /* Trace entries dropped before init */
75da5b7e90SMitchell Horne uint32_t drops_full; /* Trace entries dropped after full */
76da5b7e90SMitchell Horne struct bt_event *table;
77da5b7e90SMitchell Horne };
78da5b7e90SMitchell Horne
79da5b7e90SMitchell Horne /* Boot-time tracing */
80da5b7e90SMitchell Horne static struct bt_table bt;
81da5b7e90SMitchell Horne
82da5b7e90SMitchell Horne /* Run-time tracing */
83da5b7e90SMitchell Horne static struct bt_table rt;
84da5b7e90SMitchell Horne
85da5b7e90SMitchell Horne /* Shutdown-time tracing */
86da5b7e90SMitchell Horne static struct bt_table st;
87da5b7e90SMitchell Horne
88da5b7e90SMitchell Horne /* Set when system boot is complete, and we've switched to runtime tracing. */
89da5b7e90SMitchell Horne static bool bootdone;
90da5b7e90SMitchell Horne
91da5b7e90SMitchell Horne /* Set when system shutdown has started. */
92da5b7e90SMitchell Horne static bool shutdown;
93da5b7e90SMitchell Horne
94da5b7e90SMitchell Horne /* Turn on dotrace() debug logging to console. */
95da5b7e90SMitchell Horne static bool dotrace_debugging;
96da5b7e90SMitchell Horne TUNABLE_BOOL("debug.boottrace.dotrace_debugging", &dotrace_debugging);
97da5b7e90SMitchell Horne
98da5b7e90SMitchell Horne /* Trace kernel events */
99da5b7e90SMitchell Horne static bool dotrace_kernel = true;
100da5b7e90SMitchell Horne TUNABLE_BOOL("kern.boottrace.dotrace_kernel", &dotrace_kernel);
101da5b7e90SMitchell Horne
102da5b7e90SMitchell Horne /* Trace userspace events */
103da5b7e90SMitchell Horne static bool dotrace_user = true;
104da5b7e90SMitchell Horne TUNABLE_BOOL("kern.boottrace.dotrace_user", &dotrace_user);
105da5b7e90SMitchell Horne
106da5b7e90SMitchell Horne static int sysctl_log(SYSCTL_HANDLER_ARGS);
107da5b7e90SMitchell Horne static int sysctl_boottrace(SYSCTL_HANDLER_ARGS);
108da5b7e90SMitchell Horne static int sysctl_runtrace(SYSCTL_HANDLER_ARGS);
109da5b7e90SMitchell Horne static int sysctl_shuttrace(SYSCTL_HANDLER_ARGS);
110da5b7e90SMitchell Horne static int sysctl_boottrace_reset(SYSCTL_HANDLER_ARGS);
111da5b7e90SMitchell Horne
112da5b7e90SMitchell Horne SYSCTL_NODE(_kern, OID_AUTO, boottrace, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
113da5b7e90SMitchell Horne "boottrace statistics");
114da5b7e90SMitchell Horne
115da5b7e90SMitchell Horne SYSCTL_PROC(_kern_boottrace, OID_AUTO, log,
116da5b7e90SMitchell Horne CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP,
117da5b7e90SMitchell Horne 0, 0, sysctl_log, "A",
118da5b7e90SMitchell Horne "Print a log of the boottrace trace data");
119da5b7e90SMitchell Horne SYSCTL_PROC(_kern_boottrace, OID_AUTO, boottrace,
120da5b7e90SMitchell Horne CTLTYPE_STRING | CTLFLAG_WR | CTLFLAG_MPSAFE,
121da5b7e90SMitchell Horne 0, 0, sysctl_boottrace, "A",
122da5b7e90SMitchell Horne "Capture a boot-time trace event");
123da5b7e90SMitchell Horne SYSCTL_PROC(_kern_boottrace, OID_AUTO, runtrace,
124da5b7e90SMitchell Horne CTLTYPE_STRING | CTLFLAG_WR | CTLFLAG_MPSAFE,
125da5b7e90SMitchell Horne 0, 0, sysctl_runtrace, "A",
126da5b7e90SMitchell Horne "Capture a run-time trace event");
127da5b7e90SMitchell Horne SYSCTL_PROC(_kern_boottrace, OID_AUTO, shuttrace,
128da5b7e90SMitchell Horne CTLTYPE_STRING | CTLFLAG_WR | CTLFLAG_MPSAFE,
129da5b7e90SMitchell Horne 0, 0, sysctl_shuttrace, "A",
130da5b7e90SMitchell Horne "Capture a shutdown-time trace event");
131da5b7e90SMitchell Horne SYSCTL_PROC(_kern_boottrace, OID_AUTO, reset,
132da5b7e90SMitchell Horne CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_MPSAFE,
133da5b7e90SMitchell Horne 0, 0, sysctl_boottrace_reset, "A",
134da5b7e90SMitchell Horne "Reset run-time tracing table");
135da5b7e90SMitchell Horne
136da5b7e90SMitchell Horne /*
137da5b7e90SMitchell Horne * Global enable.
138da5b7e90SMitchell Horne */
139da5b7e90SMitchell Horne bool __read_mostly boottrace_enabled = false;
140da5b7e90SMitchell Horne SYSCTL_BOOL(_kern_boottrace, OID_AUTO, enabled, CTLFLAG_RDTUN,
141da5b7e90SMitchell Horne &boottrace_enabled, 0,
142da5b7e90SMitchell Horne "Boot-time and shutdown-time tracing enabled");
143da5b7e90SMitchell Horne
144da5b7e90SMitchell Horne /*
145da5b7e90SMitchell Horne * Enable dumping of the shutdown trace entries to console.
146da5b7e90SMitchell Horne */
147da5b7e90SMitchell Horne bool shutdown_trace = false;
148da5b7e90SMitchell Horne SYSCTL_BOOL(_kern_boottrace, OID_AUTO, shutdown_trace, CTLFLAG_RWTUN,
149da5b7e90SMitchell Horne &shutdown_trace, 0,
150da5b7e90SMitchell Horne "Enable kernel shutdown tracing to the console");
151da5b7e90SMitchell Horne
152da5b7e90SMitchell Horne /*
153da5b7e90SMitchell Horne * Set the delta threshold (ms) below which events are ignored, used in
154da5b7e90SMitchell Horne * determining what to dump to the console.
155da5b7e90SMitchell Horne */
156da5b7e90SMitchell Horne static int shutdown_trace_threshold;
157da5b7e90SMitchell Horne SYSCTL_INT(_kern_boottrace, OID_AUTO, shutdown_trace_threshold, CTLFLAG_RWTUN,
158da5b7e90SMitchell Horne &shutdown_trace_threshold, 0,
159da5b7e90SMitchell Horne "Tracing threshold (ms) below which tracing is ignored");
160da5b7e90SMitchell Horne
16151dc362dSZhenlei Huang SYSCTL_UINT(_kern_boottrace, OID_AUTO, table_size,
16251dc362dSZhenlei Huang CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &bt.size, 0,
16351dc362dSZhenlei Huang "Boot-time tracing table size");
16451dc362dSZhenlei Huang
165da5b7e90SMitchell Horne /*
166da5b7e90SMitchell Horne * Dump a trace to buffer or if buffer is NULL to console.
167da5b7e90SMitchell Horne *
168da5b7e90SMitchell Horne * Non-zero delta_threshold selectively prints entries based on delta
169da5b7e90SMitchell Horne * with current and previous entry. Otherwise, delta_threshold of 0
170da5b7e90SMitchell Horne * prints every trace entry and delta.
171da5b7e90SMitchell Horne *
172da5b7e90SMitchell Horne * Output something like this:
173da5b7e90SMitchell Horne *
174da5b7e90SMitchell Horne * CPU msecs delta process event
175da5b7e90SMitchell Horne * 11 1228262715 0 init shutdown pre sync begin
176da5b7e90SMitchell Horne * 3 1228265622 2907 init shutdown pre sync complete
177da5b7e90SMitchell Horne * 3 1228265623 0 init shutdown turned swap off
178da5b7e90SMitchell Horne * 18 1228266466 843 init shutdown unmounted all filesystems
179da5b7e90SMitchell Horne *
180da5b7e90SMitchell Horne * How to interpret:
181da5b7e90SMitchell Horne *
182da5b7e90SMitchell Horne * delta column represents the time in milliseconds between this event and the previous.
183da5b7e90SMitchell Horne * Usually that means you can take the previous event, current event, match them
184da5b7e90SMitchell Horne * up in the code, and whatever lies between is the culprit taking time.
185da5b7e90SMitchell Horne *
186da5b7e90SMitchell Horne * For example, above: Pre-Sync is taking 2907ms, and something between swap and unmount
187da5b7e90SMitchell Horne * filesystems is taking 843 milliseconds.
188da5b7e90SMitchell Horne *
189da5b7e90SMitchell Horne * An event with a delta of 0 are 'landmark' events that simply exist in the output
190da5b7e90SMitchell Horne * for the developer to know where the time measurement begins. The 0 is an arbitrary
191da5b7e90SMitchell Horne * number that can effectively be ignored.
192da5b7e90SMitchell Horne */
193da5b7e90SMitchell Horne #define BTC_DELTA_PRINT(bte, msecs, delta) do { \
194da5b7e90SMitchell Horne if (sbp) { \
195da5b7e90SMitchell Horne sbuf_printf(sbp, fmt, (bte)->cpuid, msecs, delta, \
196da5b7e90SMitchell Horne (bte)->tdname, (bte)->name, (bte)->pid, \
197da5b7e90SMitchell Horne (bte)->cputime / 1000000, \
198da5b7e90SMitchell Horne ((bte)->cputime % 1000000) / 10000, \
199da5b7e90SMitchell Horne (bte)->inblock, (bte)->oublock); \
200da5b7e90SMitchell Horne } else { \
201da5b7e90SMitchell Horne printf(fmt, (bte)->cpuid, msecs, delta, \
202da5b7e90SMitchell Horne (bte)->tdname, (bte)->name, (bte)->pid, \
203da5b7e90SMitchell Horne (bte)->cputime / 1000000, \
204da5b7e90SMitchell Horne ((bte)->cputime % 1000000) / 10000, \
205da5b7e90SMitchell Horne (bte)->inblock, (bte)->oublock); \
206da5b7e90SMitchell Horne } \
207da5b7e90SMitchell Horne } while (0)
208da5b7e90SMitchell Horne
209da5b7e90SMitchell Horne /*
210da5b7e90SMitchell Horne * Print the trace entries to the message buffer, or to an sbuf, if provided.
211da5b7e90SMitchell Horne *
212da5b7e90SMitchell Horne * Entries with a difference less than dthres will not be printed.
213da5b7e90SMitchell Horne */
214da5b7e90SMitchell Horne static void
boottrace_display(struct sbuf * sbp,struct bt_table * btp,uint64_t dthres)215da5b7e90SMitchell Horne boottrace_display(struct sbuf *sbp, struct bt_table *btp, uint64_t dthres)
216da5b7e90SMitchell Horne {
217da5b7e90SMitchell Horne struct bt_event *evtp;
218da5b7e90SMitchell Horne struct bt_event *last_evtp;
219da5b7e90SMitchell Horne uint64_t msecs;
220da5b7e90SMitchell Horne uint64_t first_msecs;
221da5b7e90SMitchell Horne uint64_t last_msecs;
222da5b7e90SMitchell Horne uint64_t dmsecs;
223da5b7e90SMitchell Horne uint64_t last_dmsecs;
224da5b7e90SMitchell Horne uint64_t total_dmsecs;
225da5b7e90SMitchell Horne uint32_t i;
226da5b7e90SMitchell Horne uint32_t curr;
227da5b7e90SMitchell Horne const char *fmt = "%3u %10llu %10llu %-24s %-40s %5d %4d.%02d %5u %5u\n";
228da5b7e90SMitchell Horne const char *hdr_fmt = "\n\n%3s %10s %10s %-24s %-40s %5s %6s %5s %5s\n";
229da5b7e90SMitchell Horne bool printed;
230da5b7e90SMitchell Horne bool last_printed;
231da5b7e90SMitchell Horne
232da5b7e90SMitchell Horne /* Print the header */
233da5b7e90SMitchell Horne if (sbp != NULL)
234da5b7e90SMitchell Horne sbuf_printf(sbp, hdr_fmt,
235da5b7e90SMitchell Horne "CPU", "msecs", "delta", "process",
236da5b7e90SMitchell Horne "event", "PID", "CPUtime", "IBlks", "OBlks");
237da5b7e90SMitchell Horne else
238da5b7e90SMitchell Horne printf(hdr_fmt,
239da5b7e90SMitchell Horne "CPU", "msecs", "delta", "process",
240da5b7e90SMitchell Horne "event", "PID", "CPUtime", "IBlks", "OBlks");
241da5b7e90SMitchell Horne
242da5b7e90SMitchell Horne first_msecs = 0;
243da5b7e90SMitchell Horne last_evtp = NULL;
244da5b7e90SMitchell Horne last_msecs = 0;
245da5b7e90SMitchell Horne last_dmsecs = 0;
246da5b7e90SMitchell Horne last_printed = false;
247da5b7e90SMitchell Horne i = curr = btp->curr;
248da5b7e90SMitchell Horne
249da5b7e90SMitchell Horne do {
250da5b7e90SMitchell Horne evtp = &btp->table[i];
251da5b7e90SMitchell Horne if (evtp->tsc == 0)
252da5b7e90SMitchell Horne goto next;
253da5b7e90SMitchell Horne
254da5b7e90SMitchell Horne msecs = cputick2usec(evtp->tick) / 1000;
255da5b7e90SMitchell Horne dmsecs = (last_msecs != 0 && msecs > last_msecs) ?
256da5b7e90SMitchell Horne msecs - last_msecs : 0;
257da5b7e90SMitchell Horne printed = false;
258da5b7e90SMitchell Horne
259da5b7e90SMitchell Horne /*
260da5b7e90SMitchell Horne * If a threshold is defined, start filtering events by the
261da5b7e90SMitchell Horne * delta of msecs.
262da5b7e90SMitchell Horne */
263da5b7e90SMitchell Horne if (dthres != 0 && (dmsecs > dthres)) {
264da5b7e90SMitchell Horne /*
265da5b7e90SMitchell Horne * Print the previous entry as a landmark, even if it
266da5b7e90SMitchell Horne * falls below the threshold.
267da5b7e90SMitchell Horne */
268da5b7e90SMitchell Horne if (last_evtp != NULL && !last_printed)
269da5b7e90SMitchell Horne BTC_DELTA_PRINT(last_evtp, last_msecs,
270da5b7e90SMitchell Horne last_dmsecs);
271da5b7e90SMitchell Horne BTC_DELTA_PRINT(evtp, msecs, dmsecs);
272da5b7e90SMitchell Horne printed = true;
273da5b7e90SMitchell Horne } else if (dthres == 0) {
274da5b7e90SMitchell Horne BTC_DELTA_PRINT(evtp, msecs, dmsecs);
275da5b7e90SMitchell Horne printed = true;
276da5b7e90SMitchell Horne }
277da5b7e90SMitchell Horne if (first_msecs == 0 || msecs < first_msecs)
278da5b7e90SMitchell Horne first_msecs = msecs;
279da5b7e90SMitchell Horne last_evtp = evtp;
280da5b7e90SMitchell Horne last_msecs = msecs;
281da5b7e90SMitchell Horne last_dmsecs = dmsecs;
282da5b7e90SMitchell Horne last_printed = printed;
283da5b7e90SMitchell Horne maybe_yield();
284da5b7e90SMitchell Horne next:
285da5b7e90SMitchell Horne i = (i + 1) % btp->size;
286da5b7e90SMitchell Horne } while (i != curr);
287da5b7e90SMitchell Horne
288da5b7e90SMitchell Horne total_dmsecs = last_msecs > first_msecs ?
289da5b7e90SMitchell Horne (last_msecs - first_msecs) : 0;
290da5b7e90SMitchell Horne if (sbp != NULL)
291da5b7e90SMitchell Horne sbuf_printf(sbp, "Total measured time: %ju msecs\n",
292da5b7e90SMitchell Horne (uintmax_t)total_dmsecs);
293da5b7e90SMitchell Horne else
294da5b7e90SMitchell Horne printf("Total measured time: %ju msecs\n",
295da5b7e90SMitchell Horne (uintmax_t)total_dmsecs);
296da5b7e90SMitchell Horne }
297da5b7e90SMitchell Horne
298da5b7e90SMitchell Horne /*
299da5b7e90SMitchell Horne * Dump trace table entries to the console, given a delta threshold.
300da5b7e90SMitchell Horne */
301da5b7e90SMitchell Horne void
boottrace_dump_console(void)302da5b7e90SMitchell Horne boottrace_dump_console(void)
303da5b7e90SMitchell Horne {
304da5b7e90SMitchell Horne if (!boottrace_enabled)
305da5b7e90SMitchell Horne return;
306da5b7e90SMitchell Horne
307da5b7e90SMitchell Horne if (shutdown || rebooting || KERNEL_PANICKED()) {
308da5b7e90SMitchell Horne boottrace_display(NULL, &st, shutdown_trace_threshold);
309da5b7e90SMitchell Horne } else {
310da5b7e90SMitchell Horne boottrace_display(NULL, &bt, 0);
311da5b7e90SMitchell Horne boottrace_display(NULL, &rt, 0);
312da5b7e90SMitchell Horne }
313da5b7e90SMitchell Horne }
314da5b7e90SMitchell Horne
315da5b7e90SMitchell Horne /*
316da5b7e90SMitchell Horne * Records a new tracing event to the specified table.
317da5b7e90SMitchell Horne *
318da5b7e90SMitchell Horne * We don't use a lock because we want this to be callable from interrupt
319da5b7e90SMitchell Horne * context.
320da5b7e90SMitchell Horne */
321da5b7e90SMitchell Horne static int
dotrace(struct bt_table * btp,const char * eventname,const char * tdname)322da5b7e90SMitchell Horne dotrace(struct bt_table *btp, const char *eventname, const char *tdname)
323da5b7e90SMitchell Horne {
324da5b7e90SMitchell Horne uint32_t idx, nxt;
325da5b7e90SMitchell Horne struct rusage usage;
326da5b7e90SMitchell Horne
327da5b7e90SMitchell Horne MPASS(boottrace_enabled);
328da5b7e90SMitchell Horne if (tdname == NULL)
329da5b7e90SMitchell Horne tdname = (curproc->p_flag & P_SYSTEM) ?
330da5b7e90SMitchell Horne curthread->td_name : curproc->p_comm;
331da5b7e90SMitchell Horne
332da5b7e90SMitchell Horne dprintf("dotrace[");
333da5b7e90SMitchell Horne dprintf("cpu=%u, pid=%d, tsc=%ju, tick=%d, td='%s', event='%s'",
334da5b7e90SMitchell Horne PCPU_GET(cpuid), curthread->td_proc->p_pid,
335da5b7e90SMitchell Horne (uintmax_t)get_cyclecount(), ticks, tdname, eventname);
336da5b7e90SMitchell Horne if (btp->table == NULL) {
337da5b7e90SMitchell Horne btp->drops_early++;
338da5b7e90SMitchell Horne dprintf(", return=ENOSPC_1]\n");
339da5b7e90SMitchell Horne return (ENOSPC);
340da5b7e90SMitchell Horne }
341da5b7e90SMitchell Horne
342da5b7e90SMitchell Horne /* Claim a slot in the table. */
343da5b7e90SMitchell Horne do {
344da5b7e90SMitchell Horne idx = btp->curr;
345da5b7e90SMitchell Horne nxt = (idx + 1) % btp->size;
346da5b7e90SMitchell Horne if (nxt == 0 && btp->wrap == 0) {
347da5b7e90SMitchell Horne btp->drops_full++;
348da5b7e90SMitchell Horne dprintf(", return=ENOSPC_2]\n");
349da5b7e90SMitchell Horne return (ENOSPC);
350da5b7e90SMitchell Horne }
351da5b7e90SMitchell Horne } while (!atomic_cmpset_int(&btp->curr, idx, nxt));
352da5b7e90SMitchell Horne
353da5b7e90SMitchell Horne btp->table[idx].cpuid = PCPU_GET(cpuid);
354da5b7e90SMitchell Horne btp->table[idx].tsc = get_cyclecount(),
355da5b7e90SMitchell Horne btp->table[idx].tick = cpu_ticks();
356da5b7e90SMitchell Horne btp->table[idx].pid = curthread->td_proc->p_pid;
357da5b7e90SMitchell Horne
358da5b7e90SMitchell Horne /*
359da5b7e90SMitchell Horne * Get the resource usage for the process. Don't attempt this for the
360da5b7e90SMitchell Horne * kernel proc0 or for critical section activities.
361da5b7e90SMitchell Horne */
362da5b7e90SMitchell Horne if ((curthread->td_proc == &proc0) || (curthread->td_critnest != 0)) {
363da5b7e90SMitchell Horne btp->table[idx].cputime = 0;
364da5b7e90SMitchell Horne btp->table[idx].inblock = 0;
365da5b7e90SMitchell Horne btp->table[idx].oublock = 0;
366da5b7e90SMitchell Horne } else {
367da5b7e90SMitchell Horne kern_getrusage(curthread, RUSAGE_CHILDREN, &usage);
368da5b7e90SMitchell Horne btp->table[idx].cputime =
369da5b7e90SMitchell Horne (uint64_t)(usage.ru_utime.tv_sec * 1000000 +
370da5b7e90SMitchell Horne usage.ru_utime.tv_usec + usage.ru_stime.tv_sec * 1000000 +
371da5b7e90SMitchell Horne usage.ru_stime.tv_usec);
372da5b7e90SMitchell Horne btp->table[idx].inblock = (uint32_t)usage.ru_inblock;
373da5b7e90SMitchell Horne btp->table[idx].oublock = (uint32_t)usage.ru_oublock;
374da5b7e90SMitchell Horne }
375da5b7e90SMitchell Horne strlcpy(btp->table[idx].name, eventname, BT_EVENT_NAMELEN);
376da5b7e90SMitchell Horne strlcpy(btp->table[idx].tdname, tdname, BT_EVENT_TDNAMELEN);
377da5b7e90SMitchell Horne
378da5b7e90SMitchell Horne dprintf(", return=0]\n");
379da5b7e90SMitchell Horne return (0);
380da5b7e90SMitchell Horne }
381da5b7e90SMitchell Horne
382da5b7e90SMitchell Horne /*
383da5b7e90SMitchell Horne * Log various boot-time events, with the trace message encoded using
384da5b7e90SMitchell Horne * printf-like arguments.
385da5b7e90SMitchell Horne */
386da5b7e90SMitchell Horne int
boottrace(const char * tdname,const char * fmt,...)387da5b7e90SMitchell Horne boottrace(const char *tdname, const char *fmt, ...)
388da5b7e90SMitchell Horne {
389da5b7e90SMitchell Horne char eventname[BT_EVENT_NAMELEN];
390da5b7e90SMitchell Horne struct bt_table *btp;
391da5b7e90SMitchell Horne va_list ap;
392da5b7e90SMitchell Horne
393da5b7e90SMitchell Horne if (!dotrace_kernel)
394da5b7e90SMitchell Horne return (ENXIO);
395da5b7e90SMitchell Horne
396da5b7e90SMitchell Horne va_start(ap, fmt);
397da5b7e90SMitchell Horne vsnprintf(eventname, sizeof(eventname), fmt, ap);
398da5b7e90SMitchell Horne va_end(ap);
399da5b7e90SMitchell Horne
400da5b7e90SMitchell Horne btp = &bt;
401da5b7e90SMitchell Horne if (bootdone)
402da5b7e90SMitchell Horne btp = &rt;
403da5b7e90SMitchell Horne if (shutdown || rebooting || KERNEL_PANICKED())
404da5b7e90SMitchell Horne btp = &st;
405da5b7e90SMitchell Horne
406da5b7e90SMitchell Horne return (dotrace(btp, eventname, tdname));
407da5b7e90SMitchell Horne }
408da5b7e90SMitchell Horne
409da5b7e90SMitchell Horne /*
410da5b7e90SMitchell Horne * Log a run-time event & switch over to run-time tracing mode.
411da5b7e90SMitchell Horne */
412da5b7e90SMitchell Horne static int
runtrace(const char * eventname,const char * tdname)413da5b7e90SMitchell Horne runtrace(const char *eventname, const char *tdname)
414da5b7e90SMitchell Horne {
415da5b7e90SMitchell Horne
416da5b7e90SMitchell Horne bootdone = true;
417da5b7e90SMitchell Horne return (dotrace(&rt, eventname, tdname));
418da5b7e90SMitchell Horne }
419da5b7e90SMitchell Horne
420da5b7e90SMitchell Horne /*
421da5b7e90SMitchell Horne * Parse a boottrace message from userspace.
422da5b7e90SMitchell Horne *
423da5b7e90SMitchell Horne * The input from may contain a ':' to denote tdname. If not, tdname is
424da5b7e90SMitchell Horne * inferred from the process' name.
425da5b7e90SMitchell Horne *
426da5b7e90SMitchell Horne * e.g. reboot(8):SIGINT to init(8)
427da5b7e90SMitchell Horne */
428da5b7e90SMitchell Horne static void
boottrace_parse_message(char * message,char ** eventname,char ** tdname)429da5b7e90SMitchell Horne boottrace_parse_message(char *message, char **eventname, char **tdname)
430da5b7e90SMitchell Horne {
431da5b7e90SMitchell Horne char *delim;
432da5b7e90SMitchell Horne
433da5b7e90SMitchell Horne delim = strchr(message, ':');
434da5b7e90SMitchell Horne if (delim != NULL) {
435da5b7e90SMitchell Horne *delim = '\0';
436da5b7e90SMitchell Horne *tdname = message;
437da5b7e90SMitchell Horne *eventname = ++delim;
438da5b7e90SMitchell Horne } else {
439da5b7e90SMitchell Horne *tdname = curproc->p_comm;
440da5b7e90SMitchell Horne *eventname = message;
441da5b7e90SMitchell Horne }
442da5b7e90SMitchell Horne }
443da5b7e90SMitchell Horne
444da5b7e90SMitchell Horne static int
_boottrace_sysctl(struct bt_table * btp,struct sysctl_oid * oidp,struct sysctl_req * req)445da5b7e90SMitchell Horne _boottrace_sysctl(struct bt_table *btp, struct sysctl_oid *oidp,
446da5b7e90SMitchell Horne struct sysctl_req *req)
447da5b7e90SMitchell Horne {
448da5b7e90SMitchell Horne char message[BT_MSGLEN];
449da5b7e90SMitchell Horne char *eventname, *tdname;
450da5b7e90SMitchell Horne int error;
451da5b7e90SMitchell Horne
452da5b7e90SMitchell Horne if (!dotrace_user)
453da5b7e90SMitchell Horne return (ENXIO);
454da5b7e90SMitchell Horne
455da5b7e90SMitchell Horne message[0] = '\0';
456da5b7e90SMitchell Horne error = sysctl_handle_string(oidp, message, sizeof(message), req);
457da5b7e90SMitchell Horne if (error)
458da5b7e90SMitchell Horne return (error);
459da5b7e90SMitchell Horne
460da5b7e90SMitchell Horne boottrace_parse_message(message, &eventname, &tdname);
461da5b7e90SMitchell Horne error = dotrace(btp, eventname, tdname);
462da5b7e90SMitchell Horne if (error == ENOSPC) {
463da5b7e90SMitchell Horne /* Ignore table full error. */
464da5b7e90SMitchell Horne error = 0;
465da5b7e90SMitchell Horne }
466da5b7e90SMitchell Horne return (error);
467da5b7e90SMitchell Horne }
468da5b7e90SMitchell Horne
469da5b7e90SMitchell Horne static int
sysctl_log(SYSCTL_HANDLER_ARGS)470da5b7e90SMitchell Horne sysctl_log(SYSCTL_HANDLER_ARGS)
471da5b7e90SMitchell Horne {
472da5b7e90SMitchell Horne struct sbuf *sbuf;
473da5b7e90SMitchell Horne int error;
474da5b7e90SMitchell Horne
475da5b7e90SMitchell Horne if (!boottrace_enabled || req->newptr != NULL)
476da5b7e90SMitchell Horne return (0);
477da5b7e90SMitchell Horne
478da5b7e90SMitchell Horne sbuf = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
479da5b7e90SMitchell Horne boottrace_display(sbuf, &bt, 0);
480da5b7e90SMitchell Horne boottrace_display(sbuf, &rt, 0);
481da5b7e90SMitchell Horne sbuf_finish(sbuf);
482da5b7e90SMitchell Horne error = SYSCTL_OUT(req, sbuf_data(sbuf), sbuf_len(sbuf));
483da5b7e90SMitchell Horne sbuf_delete(sbuf);
484da5b7e90SMitchell Horne
485da5b7e90SMitchell Horne return (error);
486da5b7e90SMitchell Horne }
487da5b7e90SMitchell Horne
488da5b7e90SMitchell Horne static int
sysctl_boottrace(SYSCTL_HANDLER_ARGS)489da5b7e90SMitchell Horne sysctl_boottrace(SYSCTL_HANDLER_ARGS)
490da5b7e90SMitchell Horne {
491da5b7e90SMitchell Horne
492da5b7e90SMitchell Horne if (!boottrace_enabled)
493da5b7e90SMitchell Horne return (0);
494da5b7e90SMitchell Horne
495da5b7e90SMitchell Horne /* No output */
496da5b7e90SMitchell Horne if (req->newptr == NULL)
497da5b7e90SMitchell Horne return (0);
498da5b7e90SMitchell Horne
499da5b7e90SMitchell Horne /* Trace to rt if we have reached multi-user. */
500da5b7e90SMitchell Horne return (_boottrace_sysctl(bootdone ? &rt : &bt, oidp, req));
501da5b7e90SMitchell Horne }
502da5b7e90SMitchell Horne
503da5b7e90SMitchell Horne /*
504da5b7e90SMitchell Horne * Log a run-time event and switch over to run-time tracing mode.
505da5b7e90SMitchell Horne */
506da5b7e90SMitchell Horne static int
sysctl_runtrace(SYSCTL_HANDLER_ARGS)507da5b7e90SMitchell Horne sysctl_runtrace(SYSCTL_HANDLER_ARGS)
508da5b7e90SMitchell Horne {
509da5b7e90SMitchell Horne
510da5b7e90SMitchell Horne if (!boottrace_enabled)
511da5b7e90SMitchell Horne return (0);
512da5b7e90SMitchell Horne
513da5b7e90SMitchell Horne /* No output */
514da5b7e90SMitchell Horne if (req->newptr == NULL)
515da5b7e90SMitchell Horne return (0);
516da5b7e90SMitchell Horne
517da5b7e90SMitchell Horne bootdone = true;
518da5b7e90SMitchell Horne return (_boottrace_sysctl(&rt, oidp, req));
519da5b7e90SMitchell Horne }
520da5b7e90SMitchell Horne
521da5b7e90SMitchell Horne /*
522da5b7e90SMitchell Horne * Log a shutdown-time event and switch over to shutdown tracing mode.
523da5b7e90SMitchell Horne */
524da5b7e90SMitchell Horne static int
sysctl_shuttrace(SYSCTL_HANDLER_ARGS)525da5b7e90SMitchell Horne sysctl_shuttrace(SYSCTL_HANDLER_ARGS)
526da5b7e90SMitchell Horne {
527da5b7e90SMitchell Horne
528da5b7e90SMitchell Horne if (!boottrace_enabled)
529da5b7e90SMitchell Horne return (0);
530da5b7e90SMitchell Horne
531da5b7e90SMitchell Horne /* No output */
532da5b7e90SMitchell Horne if (req->newptr == NULL)
533da5b7e90SMitchell Horne return (0);
534da5b7e90SMitchell Horne
535da5b7e90SMitchell Horne shutdown = true;
536da5b7e90SMitchell Horne return (_boottrace_sysctl(&st, oidp, req));
537da5b7e90SMitchell Horne }
538da5b7e90SMitchell Horne
539da5b7e90SMitchell Horne /*
540da5b7e90SMitchell Horne * Reset the runtime tracing buffer.
541da5b7e90SMitchell Horne */
542da5b7e90SMitchell Horne void
boottrace_reset(const char * actor)543da5b7e90SMitchell Horne boottrace_reset(const char *actor)
544da5b7e90SMitchell Horne {
545da5b7e90SMitchell Horne char tmpbuf[64];
546da5b7e90SMitchell Horne
547da5b7e90SMitchell Horne snprintf(tmpbuf, sizeof(tmpbuf), "reset: %s", actor);
548da5b7e90SMitchell Horne runtrace(tmpbuf, NULL);
549da5b7e90SMitchell Horne }
550da5b7e90SMitchell Horne
551da5b7e90SMitchell Horne /*
552da5b7e90SMitchell Horne * Note that a resize implies a reset, i.e., the index is reset to 0.
553da5b7e90SMitchell Horne * We never shrink the array; we can only increase its size.
554da5b7e90SMitchell Horne */
555da5b7e90SMitchell Horne int
boottrace_resize(u_int newsize)556da5b7e90SMitchell Horne boottrace_resize(u_int newsize)
557da5b7e90SMitchell Horne {
558da5b7e90SMitchell Horne
559da5b7e90SMitchell Horne if (newsize <= rt.size) {
560da5b7e90SMitchell Horne return (EINVAL);
561da5b7e90SMitchell Horne }
562da5b7e90SMitchell Horne rt.table = realloc(rt.table, newsize * sizeof(struct bt_event),
563da5b7e90SMitchell Horne M_BOOTTRACE, M_WAITOK | M_ZERO);
564da5b7e90SMitchell Horne rt.size = newsize;
565da5b7e90SMitchell Horne boottrace_reset("boottrace_resize");
566da5b7e90SMitchell Horne return (0);
567da5b7e90SMitchell Horne }
568da5b7e90SMitchell Horne
569da5b7e90SMitchell Horne static int
sysctl_boottrace_reset(SYSCTL_HANDLER_ARGS)570da5b7e90SMitchell Horne sysctl_boottrace_reset(SYSCTL_HANDLER_ARGS)
571da5b7e90SMitchell Horne {
572da5b7e90SMitchell Horne
573da5b7e90SMitchell Horne if (!boottrace_enabled)
574da5b7e90SMitchell Horne return (0);
575da5b7e90SMitchell Horne
576da5b7e90SMitchell Horne if (req->newptr != NULL)
577da5b7e90SMitchell Horne boottrace_reset("sysctl_boottrace_reset");
578da5b7e90SMitchell Horne
579da5b7e90SMitchell Horne return (0);
580da5b7e90SMitchell Horne }
581da5b7e90SMitchell Horne
582da5b7e90SMitchell Horne static void
boottrace_init(void)583da5b7e90SMitchell Horne boottrace_init(void)
584da5b7e90SMitchell Horne {
585da5b7e90SMitchell Horne
586da5b7e90SMitchell Horne if (!boottrace_enabled)
587da5b7e90SMitchell Horne return;
588da5b7e90SMitchell Horne
589da5b7e90SMitchell Horne /* Boottime trace table */
590da5b7e90SMitchell Horne bt.size = BT_TABLE_DEFSIZE;
591da5b7e90SMitchell Horne TUNABLE_INT_FETCH("kern.boottrace.table_size", &bt.size);
592da5b7e90SMitchell Horne bt.size = max(bt.size, BT_TABLE_MINSIZE);
593da5b7e90SMitchell Horne bt.table = malloc(bt.size * sizeof(struct bt_event), M_BOOTTRACE,
594da5b7e90SMitchell Horne M_WAITOK | M_ZERO);
595da5b7e90SMitchell Horne
596da5b7e90SMitchell Horne /* Stick in an initial entry. */
597da5b7e90SMitchell Horne bt.table[0].cpuid = PCPU_GET(cpuid);
598da5b7e90SMitchell Horne strlcpy(bt.table[0].tdname, "boottime", BT_EVENT_TDNAMELEN);
599da5b7e90SMitchell Horne strlcpy(bt.table[0].name, "initial event", BT_EVENT_NAMELEN);
600da5b7e90SMitchell Horne bt.curr = 1;
601da5b7e90SMitchell Horne
602da5b7e90SMitchell Horne /* Run-time trace table (may wrap-around). */
603da5b7e90SMitchell Horne rt.wrap = 1;
604da5b7e90SMitchell Horne rt.size = BT_TABLE_RUNSIZE;
605da5b7e90SMitchell Horne rt.table = malloc(rt.size * sizeof(struct bt_event), M_BOOTTRACE,
606da5b7e90SMitchell Horne M_WAITOK | M_ZERO);
607da5b7e90SMitchell Horne
608da5b7e90SMitchell Horne /* Shutdown trace table */
609da5b7e90SMitchell Horne st.size = BT_TABLE_SHTSIZE;
610da5b7e90SMitchell Horne st.table = malloc(st.size * sizeof(struct bt_event), M_BOOTTRACE,
611da5b7e90SMitchell Horne M_WAITOK | M_ZERO);
612da5b7e90SMitchell Horne }
613*d9f1f0a9SZhenlei Huang SYSINIT(boottrace, SI_SUB_CPU, SI_ORDER_ANY, boottrace_init, NULL);
614