xref: /illumos-gate/usr/src/man/man3kstat/kstat.3kstat (revision 0be687ea0c09cd50b4ae51df829900fea257d535)
1.\"
2.\" Copyright (c) 2007, Sun Microsystems, Inc.  All Rights Reserved.
3.\" Copyright 2021 Oxide Computer Company
4.\"
5.\" The contents of this file are subject to the terms of the
6.\" Common Development and Distribution License (the "License").
7.\" You may not use this file except in compliance with the License.
8.\"
9.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10.\" or http://www.opensolaris.org/os/licensing.
11.\" See the License for the specific language governing permissions
12.\" and limitations under the License.
13.\"
14.\" When distributing Covered Code, include this CDDL HEADER in each
15.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16.\" If applicable, add the following below this CDDL HEADER, with the
17.\" fields enclosed by brackets "[]" replaced with your own identifying
18.\" information: Portions Copyright [yyyy] [name of copyright owner]
19.\"
20.Dd November 29, 2021
21.Dt KSTAT 3KSTAT
22.Os
23.Sh NAME
24.Nm kstat
25.Nd kernel statistics facility
26.Sh LIBRARY
27.Lb libkstat
28.Sh DESCRIPTION
29The
30.Nm
31facility is a general-purpose mechanism for providing kernel statistics to
32users.
33.Ss The kstat model
34The kernel maintains a linked list of statistics structures, or kstats.
35Each kstat has a common header section and a type-specific data section.
36The header section is defined by the
37.Vt kstat_t
38structure:
39.Ss "kstat header"
40.Bd -literal -offset indent
41typedef   int kid_t;    /* unique kstat id */
42
43typedef struct kstat {
44   /*
45    * Fields relevant to both kernel and user
46    */
47   hrtime_t      ks_crtime;               /* creation time */
48   struct kstat  *ks_next;                /* kstat chain linkage */
49   kid_t         ks_kid;                  /* unique kstat ID */
50   char          ks_module[KSTAT_STRLEN]; /* module name */
51   uchar_t       ks_resv;                 /* reserved */
52   int           ks_instance;             /* module's instance */
53   char          ks_name[KSTAT_STRLEN];   /* kstat name */
54   uchar_t       ks_type;                 /* kstat data type */
55   char          ks_class[KSTAT_STRLEN];  /* kstat class */
56   uchar_t       ks_flags;                /* kstat flags */
57   void          *ks_data;                /* kstat type-specific
58                                             data */
59   uint_t        ks_ndata;                /* # of data records */
60   size_t        ks_data_size;            /* size of kstat data
61                                             section */
62   hrtime_t      ks_snaptime;             /* time of last data
63                                             snapshot */
64
65   /*
66    * Fields relevant to kernel only
67    */
68   int(*ks_update)(struct kstat *, int);
69   void  *ks_private;
70   int(*ks_snapshot)(struct kstat *, void *, int);
71   void  *ks_lock;
72} kstat_t;
73.Ed
74.Pp
75The fields that are of significance to the user are:
76.Bl -tag -width Ds
77.It Fa ks_crtime
78The time the kstat was created.
79This allows you to compute the rates of various counters since the kstat was
80created;
81.Dq rate since boot
82is replaced by the more general concept of
83.Dq rate since kstat creation .
84All times associated with kstats
85.Po
86such as creation time, last snapshot time,
87.Vt kstat_timer_t
88and
89.Vt kstat_io_t
90timestamps, and the like
91.Pc
92are 64-bit nanosecond values.
93The accuracy of kstat timestamps is machine dependent, but the precision
94.Pq units
95is the same across all platforms.
96See
97.Xr gethrtime 3C
98for general information about high-resolution timestamps.
99.It Fa ks_next
100kstats are stored as a linked list, or chain.
101.Fa ks_next
102points to the next kstat in the chain.
103.It Fa ks_kid
104A unique identifier for the kstat.
105.It Fa ks_module , Fa ks_instance
106contain the name and instance of the module that created the kstat.
107In cases where there can only be one instance,
108.Fa ks_instance
109is 0.
110.It Fa ks_name
111gives a meaningful name to a kstat.
112The full kstat namespace is
113.Ao
114.Fa ks_module ,
115.Fa ks_instance ,
116.Fa ks_name
117.Ac ,
118so the name only need be unique within a module.
119.It Fa ks_type
120The type of data in this kstat.
121kstat data types are discussed below.
122.It Fa ks_class
123Each kstat can be characterized as belonging to some broad class of statistics,
124such as disk, tape, net, vm, and streams.
125This field can be used as a filter to extract related kstats.
126The following values are currently in use: disk, tape, controller, net, rpc, vm,
127kvm, hat, streams, kmem, kmem_cache, kstat, and misc.
128.Po
129The kstat class encompasses things like
130.Fa kstat_types .
131.Pc
132.It Fa ks_data , Fa ks_ndata , Fa ks_data_size
133.Fa ks_data
134is a pointer to the kstat's data section.
135The type of data stored there depends on
136.Fa ks_type .
137.Fa ks_ndata
138indicates the number of data records.
139Only some kstat types support multiple data records.
140Currently,
141.Dv KSTAT_TYPE_RAW ,
142.Dv KSTAT_TYPE_NAMED ,
143and
144.Dv KSTAT_TYPE_TIMER
145kstats support multiple data records.
146.Dv KSTAT_TYPE_INTR
147and
148.Dv KSTAT_TYPE_IO
149kstats support only one data record.
150.Fa ks_data_size
151is the total size of the data section, in bytes.
152.It Fa ks_snaptime
153The timestamp for the last data snapshot.
154This allows you to compute activity rates:
155.Bd -literal
156rate = (new_count - old_count) / (new_snaptime - old_snaptime);
157.Ed
158.El
159.Ss kstat data types
160The following types of kstats are currently available:
161.Bd -literal -offset indent
162#define KSTAT_TYPE_RAW    0   /* can be anything */
163#define KSTAT_TYPE_NAMED  1   /* name/value pairs */
164#define KSTAT_TYPE_INTR   2   /* interrupt statistics */
165#define KSTAT_TYPE_IO     3   /* I/O statistics */
166#define KSTAT_TYPE_TIMER  4   /* event timers */
167.Ed
168.Pp
169To get a list of all kstat types currently supported in the system, tools can
170read out the standard system kstat
171.Fa kstat_types
172.Po
173full name spec is
174.Aq unix, 0, kstat_types
175.Pc .
176This is a
177.Dv KSTAT_TYPE_NAMED
178kstat in which the
179.Fa name
180field describes the type of kstat, and the
181.Fa value
182field is the kstat type number
183.Po
184for example,
185.Dv KSTAT_TYPE_IO
186is type 3 \(em see above
187.Pc .
188.Ss "Raw kstat"
189.Dv KSTAT_TYPE_RAW
190\(em raw data
191.Pp
192The
193.Dq raw
194kstat type is just treated as an array of bytes.
195This is generally used to export well-known structures, like
196.Vt sysinfo .
197.Ss "Name=value kstat"
198.Dv KSTAT_TYPE_NAMED
199\(em A list of arbitrary
200.Fa name=value
201statistics.
202.Bd -literal -offset indent
203typedef struct kstat_named {
204   char    name[KSTAT_STRLEN];    /* name of counter */
205   uchar_t data_type;             /* data type */
206   union {
207            charc[16];            /* enough for 128-bit ints */
208            struct {
209               union {
210                   char *ptr;    /* NULL-terminated string */
211               } addr;
212               uint32_t len;     /* length of string */
213            } str;
214            int32_t   i32;
215            uint32_t  ui32;
216            int64_t   i64;
217            uint64_t  ui64;
218
219  /* These structure members are obsolete */
220
221            int32_t   l;
222            uint32_t  ul;
223            int64_t   ll;
224            uint64_t  ull;
225         } value;                /* value of counter */
226} kstat_named_t;
227
228/* The following types are Stable
229
230KSTAT_DATA_CHAR
231KSTAT_DATA_INT32
232KSTAT_DATA_LONG
233KSTAT_DATA_UINT32
234KSTAT_DATA_ULONG
235KSTAT_DATA_INT64
236KSTAT_DATA_UINT64
237
238/* The following type is Evolving */
239
240KSTAT_DATA_STRING
241
242/* The following types are Obsolete */
243
244KSTAT_DATA_LONGLONG
245KSTAT_DATA_ULONGLONG
246KSTAT_DATA_FLOAT
247KSTAT_DATA_DOUBLE
248.Ed
249.Pp
250Some devices need to publish strings that exceed the maximum value for
251.Dv KSTAT_DATA_CHAR
252in length;
253.Dv KSTAT_DATA_STRING
254is a data type that allows arbitrary-length strings to be associated with a
255named kstat.
256The macros below are the supported means to read the pointer to the string and
257its length.
258.Bd -literal -offset indent
259#define KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.str.addr.ptr)
260#define KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.str.len)
261.Ed
262.Pp
263.Fn KSTAT_NAMED_STR_BUFLEN
264returns the number of bytes required to store the string pointed to by
265.Fn KSTAT_NAMED_STR_PTR ;
266that is,
267.Fo strlen
268.Fa "KSTAT_NAMED_STR_PTR() + 1"
269.Fc .
270.Ss "Interrupt kstat"
271.Dv KSTAT_TYPE_INTR
272\(em Interrupt statistics.
273.Pp
274An interrupt is a hard interrupt
275.Pq sourced from the hardware device itself ,
276a soft interrupt
277.Pq induced by the system via the use of some system interrupt source ,
278a watchdog interrupt
279.Pq induced by a periodic timer call ,
280spurious
281.Pq an interrupt entry point was entered but there was no interrupt to service ,
282or multiple service
283.Po
284an interrupt was detected and serviced just prior to returning from any of the
285other types
286.Pc .
287.Bd -literal -offset indent
288#define KSTAT_INTR_HARD      0
289#define KSTAT_INTR_SOFT      1
290#define KSTAT_INTR_WATCHDOG  2
291#define KSTAT_INTR_SPURIOUS  3
292#define KSTAT_INTR_MULTSVC   4
293#define KSTAT_NUM_INTRS      5
294
295typedef struct kstat_intr {
296   uint_t intrs[KSTAT_NUM_INTRS]; /* interrupt counters */
297} kstat_intr_t;
298.Ed
299.Ss Event timer kstat
300.Dv KSTAT_TYPE_TIMER
301\(em Event timer statistics.
302.Pp
303These provide basic counting and timing information for any type of event.
304.Bd -literal -offset indent
305typedef struct kstat_timer {
306   char         name[KSTAT_STRLEN]; /* event name */
307   uchar_t      resv;               /* reserved */
308   u_longlong_t num_events;         /* number of events */
309   hrtime_t     elapsed_time;       /* cumulative elapsed time */
310   hrtime_t     min_time;           /* shortest event duration */
311   hrtime_t     max_time;           /* longest event duration */
312   hrtime_t     start_time;         /* previous event start time */
313   hrtime_t     stop_time;          /* previous event stop time */
314} kstat_timer_t;
315.Ed
316.Ss I/O kstat
317.Dv KSTAT_TYPE_IO
318\(em I/O statistics.
319.Bd -literal -offset indent
320typedef struct kstat_io {
321/*
322 * Basic counters.
323 */
324   u_longlong_t     nread;      /* number of bytes read */
325   u_longlong_t     nwritten;   /* number of bytes written */
326   uint_t           reads;      /* number of read operations */
327   uint_t           writes;     /* number of write operations */
328/*
329 * Accumulated time and queue length statistics.
330 *
331 * Time statistics are kept as a running sum of "active" time.
332 * Queue length statistics are kept as a running sum of the
333 * product of queue length and elapsed time at that length --
334 * that is, a Riemann sum for queue length integrated against time.
335 *
336 *               ^
337 *               |                       _________
338 *               8                       | i4    |
339 *               |                       |       |
340 *       Queue   6                       |       |
341 *       Length  |       _________       |       |
342 *               4       | i2    |_______|       |
343 *               |       |       i3              |
344 *               2_______|                       |
345 *               |    i1                         |
346 *               |_______________________________|
347 *               Time->  t1      t2      t3      t4
348 *
349 * At each change of state (entry or exit from the queue),
350 * we add the elapsed time (since the previous state change)
351 * to the active time if the queue length was non-zero during
352 * that interval; and we add the product of the elapsed time
353 * times the queue length to the running length*time sum.
354 *
355 * This method is generalizable to measuring residency
356 * in any defined system: instead of queue lengths, think
357 * of "outstanding RPC calls to server X".
358 *
359 * A large number of I/O subsystems have at least two basic
360 * "lists" of transactions they manage: one for transactions
361 * that have been accepted for processing but for which processing
362 * has yet to begin, and one for transactions which are actively
363 * being processed (but not done). For this reason, two cumulative
364 * time statistics are defined here: pre-service (wait) time,
365 * and service (run) time.
366 *
367 * The units of cumulative busy time are accumulated nanoseconds.
368 * The units of cumulative length*time products are elapsed time
369 * times queue length.
370 */
371   hrtime_t   wtime;            /* cumulative wait (pre-service) time */
372   hrtime_t   wlentime;         /* cumulative wait length*time product*/
373   hrtime_t   wlastupdate;      /* last time wait queue changed */
374   hrtime_t   rtime;            /* cumulative run (service) time */
375   hrtime_t   rlentime;         /* cumulative run length*time product */
376   hrtime_t   rlastupdate;      /* last time run queue changed */
377   uint_t     wcnt;             /* count of elements in wait state */
378   uint_t     rcnt;             /* count of elements in run state */
379} kstat_io_t;
380.Ed
381.Ss Using libkstat
382The kstat library,
383.Sy libkstat ,
384defines the user interface
385.Pq API
386to the system's kstat facility.
387.Pp
388You begin by opening libkstat with
389.Xr kstat_open 3KSTAT ,
390which returns a pointer to a fully initialized kstat control structure.
391This is your ticket to subsequent libkstat operations:
392.Bd -literal -offset indent
393typedef struct kstat_ctl {
394   kid_t     kc_chain_id;    /* current kstat chain ID */
395   kstat_t   *kc_chain;      /* pointer to kstat chain */
396   int       kc_kd;          /* /dev/kstat descriptor */
397} kstat_ctl_t;
398.Ed
399.Pp
400Only the first two fields,
401.Fa kc_chain_id
402and
403.Fa kc_chain ,
404are of
405interest to
406libkstat clients.
407.Po
408.Fa kc_kd
409is the descriptor for
410.Pa /dev/kstat ,
411the kernel statistics driver.
412libkstat functions are built on top of
413.Pa /dev/kstat
414.Xr ioctl 2
415primitives.
416Direct interaction with
417.Pa /dev/kstat
418is strongly discouraged, since it is
419.Em not
420a public interface.
421.Pc
422.Pp
423.Fa kc_chain
424points to your copy of the kstat chain.
425You typically walk the chain to find and process a certain kind of kstat.
426For example, to display all
427I/O kstats:
428.Bd -literal -offset indent
429kstat_ctl_t    *kc;
430kstat_t        *ksp;
431kstat_io_t     kio;
432
433kc = kstat_open();
434for (ksp = kc->kc_chain; ksp != NULL; ksp = ksp->ks_next) {
435        if (ksp->ks_type == KSTAT_TYPE_IO) {
436              kstat_read(kc, ksp, &kio);
437                my_io_display(kio);
438        }
439}
440.Ed
441.Pp
442.Fa kc_chain_id
443is the kstat chain ID, or
444.Sy KCID ,
445of your copy of the kstat chain.
446See
447.Xr kstat_chain_update 3KSTAT
448for an explanation of KCIDs.
449.Sh FILES
450.Bl -tag -width Pa
451.It Pa /dev/kstat
452kernel statistics driver character device
453.It Pa /usr/include/kstat.h
454kstat library header file
455.It Pa /usr/include/sys/kstat.h
456system kstat header
457.El
458.Sh SEE ALSO
459.Xr ioctl 2 ,
460.Xr gethrtime 3C ,
461.Xr kstat_chain_update 3KSTAT ,
462.Xr kstat_close 3KSTAT ,
463.Xr kstat_data_lookup 3KSTAT ,
464.Xr kstat_lookup 3KSTAT ,
465.Xr kstat_open 3KSTAT ,
466.Xr kstat_read 3KSTAT ,
467.Xr kstat_write 3KSTAT ,
468.Xr attributes 5
469